fix: heal multiple buckets in bulk (#11029)

makes server startup, orders of magnitude
faster with large number of buckets
master
Harshavardhana 4 years ago committed by GitHub
parent 3514e89eb3
commit 9c53cc1b83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 32
      buildscripts/verify-build.sh
  2. 42
      buildscripts/verify-healing.sh
  3. 28
      cmd/erasure-bucket.go
  4. 22
      cmd/erasure-server-sets.go
  5. 25
      cmd/erasure-sets.go
  6. 5
      cmd/gateway-unsupported.go
  7. 28
      cmd/iam.go
  8. 3
      cmd/metacache-set.go
  9. 1
      cmd/object-api-interface.go
  10. 16
      cmd/server-main.go
  11. 61
      pkg/madmin/examples/heal-buckets-list.go

@ -60,24 +60,24 @@ function start_minio_erasure_sets()
sleep 15 sleep 15
} }
function start_minio_zone_erasure_sets() function start_minio_pool_erasure_sets()
{ {
export MINIO_ACCESS_KEY=$ACCESS_KEY export MINIO_ACCESS_KEY=$ACCESS_KEY
export MINIO_SECRET_KEY=$SECRET_KEY export MINIO_SECRET_KEY=$SECRET_KEY
"${MINIO[@]}" server --address=:9000 "http://127.0.0.1:9000${WORK_DIR}/zone-disk-sets{1...4}" "http://127.0.0.1:9001${WORK_DIR}/zone-disk-sets{5...8}" >"$WORK_DIR/zone-minio-9000.log" 2>&1 & "${MINIO[@]}" server --address=:9000 "http://127.0.0.1:9000${WORK_DIR}/pool-disk-sets{1...4}" "http://127.0.0.1:9001${WORK_DIR}/pool-disk-sets{5...8}" >"$WORK_DIR/pool-minio-9000.log" 2>&1 &
"${MINIO[@]}" server --address=:9001 "http://127.0.0.1:9000${WORK_DIR}/zone-disk-sets{1...4}" "http://127.0.0.1:9001${WORK_DIR}/zone-disk-sets{5...8}" >"$WORK_DIR/zone-minio-9001.log" 2>&1 & "${MINIO[@]}" server --address=:9001 "http://127.0.0.1:9000${WORK_DIR}/pool-disk-sets{1...4}" "http://127.0.0.1:9001${WORK_DIR}/pool-disk-sets{5...8}" >"$WORK_DIR/pool-minio-9001.log" 2>&1 &
sleep 40 sleep 40
} }
function start_minio_zone_erasure_sets_ipv6() function start_minio_pool_erasure_sets_ipv6()
{ {
export MINIO_ACCESS_KEY=$ACCESS_KEY export MINIO_ACCESS_KEY=$ACCESS_KEY
export MINIO_SECRET_KEY=$SECRET_KEY export MINIO_SECRET_KEY=$SECRET_KEY
"${MINIO[@]}" server --address="[::1]:9000" "http://[::1]:9000${WORK_DIR}/zone-disk-sets{1...4}" "http://[::1]:9001${WORK_DIR}/zone-disk-sets{5...8}" >"$WORK_DIR/zone-minio-ipv6-9000.log" 2>&1 & "${MINIO[@]}" server --address="[::1]:9000" "http://[::1]:9000${WORK_DIR}/pool-disk-sets{1...4}" "http://[::1]:9001${WORK_DIR}/pool-disk-sets{5...8}" >"$WORK_DIR/pool-minio-ipv6-9000.log" 2>&1 &
"${MINIO[@]}" server --address="[::1]:9001" "http://[::1]:9000${WORK_DIR}/zone-disk-sets{1...4}" "http://[::1]:9001${WORK_DIR}/zone-disk-sets{5...8}" >"$WORK_DIR/zone-minio-ipv6-9001.log" 2>&1 & "${MINIO[@]}" server --address="[::1]:9001" "http://[::1]:9000${WORK_DIR}/pool-disk-sets{1...4}" "http://[::1]:9001${WORK_DIR}/pool-disk-sets{5...8}" >"$WORK_DIR/pool-minio-ipv6-9001.log" 2>&1 &
sleep 40 sleep 40
} }
@ -129,9 +129,9 @@ function run_test_erasure_sets() {
return "$rv" return "$rv"
} }
function run_test_zone_erasure_sets() function run_test_pool_erasure_sets()
{ {
start_minio_zone_erasure_sets start_minio_pool_erasure_sets
(cd "$WORK_DIR" && "$FUNCTIONAL_TESTS") (cd "$WORK_DIR" && "$FUNCTIONAL_TESTS")
rv=$? rv=$?
@ -142,20 +142,20 @@ function run_test_zone_erasure_sets()
if [ "$rv" -ne 0 ]; then if [ "$rv" -ne 0 ]; then
for i in $(seq 0 1); do for i in $(seq 0 1); do
echo "server$i log:" echo "server$i log:"
cat "$WORK_DIR/zone-minio-900$i.log" cat "$WORK_DIR/pool-minio-900$i.log"
done done
fi fi
for i in $(seq 0 1); do for i in $(seq 0 1); do
rm -f "$WORK_DIR/zone-minio-900$i.log" rm -f "$WORK_DIR/pool-minio-900$i.log"
done done
return "$rv" return "$rv"
} }
function run_test_zone_erasure_sets_ipv6() function run_test_pool_erasure_sets_ipv6()
{ {
start_minio_zone_erasure_sets_ipv6 start_minio_pool_erasure_sets_ipv6
export SERVER_ENDPOINT="[::1]:9000" export SERVER_ENDPOINT="[::1]:9000"
@ -168,12 +168,12 @@ function run_test_zone_erasure_sets_ipv6()
if [ "$rv" -ne 0 ]; then if [ "$rv" -ne 0 ]; then
for i in $(seq 0 1); do for i in $(seq 0 1); do
echo "server$i log:" echo "server$i log:"
cat "$WORK_DIR/zone-minio-ipv6-900$i.log" cat "$WORK_DIR/pool-minio-ipv6-900$i.log"
done done
fi fi
for i in $(seq 0 1); do for i in $(seq 0 1); do
rm -f "$WORK_DIR/zone-minio-ipv6-900$i.log" rm -f "$WORK_DIR/pool-minio-ipv6-900$i.log"
done done
return "$rv" return "$rv"
@ -293,14 +293,14 @@ function main()
fi fi
echo "Testing in Distributed Eraure expanded setup" echo "Testing in Distributed Eraure expanded setup"
if ! run_test_zone_erasure_sets; then if ! run_test_pool_erasure_sets; then
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
exit 1 exit 1
fi fi
echo "Testing in Distributed Erasure expanded setup with ipv6" echo "Testing in Distributed Erasure expanded setup with ipv6"
if ! run_test_zone_erasure_sets_ipv6; then if ! run_test_pool_erasure_sets_ipv6; then
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
exit 1 exit 1

@ -29,32 +29,27 @@ MINIO_CONFIG_DIR="$WORK_DIR/.minio"
MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server ) MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server )
function start_minio_3_node() { function start_minio_3_node() {
declare -a minio_pids
declare -a ARGS
export MINIO_ACCESS_KEY=minio export MINIO_ACCESS_KEY=minio
export MINIO_SECRET_KEY=minio123 export MINIO_SECRET_KEY=minio123
export MINIO_ERASURE_SET_DRIVE_COUNT=6 export MINIO_ERASURE_SET_DRIVE_COUNT=6
start_port=$(shuf -i 10000-65000 -n 1) start_port=$(shuf -i 10000-65000 -n 1)
args=""
for i in $(seq 1 3); do for i in $(seq 1 3); do
ARGS+=("http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/6/") args="$args http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/6/"
done done
"${MINIO[@]}" --address ":$[$start_port+1]" ${ARGS[@]} > "${WORK_DIR}/dist-minio-server1.log" 2>&1 & "${MINIO[@]}" --address ":$[$start_port+1]" $args > "${WORK_DIR}/dist-minio-server1.log" 2>&1 &
minio_pids[0]=$! disown $!
disown "${minio_pids[0]}"
"${MINIO[@]}" --address ":$[$start_port+2]" ${ARGS[@]} > "${WORK_DIR}/dist-minio-server2.log" 2>&1 & "${MINIO[@]}" --address ":$[$start_port+2]" $args > "${WORK_DIR}/dist-minio-server2.log" 2>&1 &
minio_pids[1]=$! disown $!
disown "${minio_pids[1]}"
"${MINIO[@]}" --address ":$[$start_port+3]" ${ARGS[@]} > "${WORK_DIR}/dist-minio-server3.log" 2>&1 & "${MINIO[@]}" --address ":$[$start_port+3]" $args > "${WORK_DIR}/dist-minio-server3.log" 2>&1 &
minio_pids[2]=$! disown $!
disown "${minio_pids[2]}"
sleep "$1" sleep "$1"
for pid in "${minio_pids[@]}"; do if [ "$(pgrep -c minio)" -ne 3 ]; then
if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-server$i.log" cat "${WORK_DIR}/dist-minio-server$i.log"
@ -63,10 +58,23 @@ function start_minio_3_node() {
purge "$WORK_DIR" purge "$WORK_DIR"
exit 1 exit 1
fi fi
# forcibly killing, to proceed further properly. if ! pkill minio; then
kill -9 "$pid" for i in $(seq 1 3); do
sleep 1 # wait 1sec per pid echo "server$i log:"
cat "${WORK_DIR}/dist-minio-server$i.log"
done done
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
sleep 1;
if pgrep minio; then
# forcibly killing, to proceed further properly.
if ! pkill -9 minio; then
echo "no minio process running anymore, proceed."
fi
fi
} }

@ -31,6 +31,34 @@ var bucketOpIgnoredErrs = append(baseIgnoredErrs, errDiskAccessDenied, errUnform
// list all errors that can be ignored in a bucket metadata operation. // list all errors that can be ignored in a bucket metadata operation.
var bucketMetadataOpIgnoredErrs = append(bucketOpIgnoredErrs, errVolumeNotFound) var bucketMetadataOpIgnoredErrs = append(bucketOpIgnoredErrs, errVolumeNotFound)
// MakeMultipleBuckets - create a list of buckets
func (er erasureObjects) MakeMultipleBuckets(ctx context.Context, buckets ...string) error {
storageDisks := er.getDisks()
g := errgroup.WithNErrs(len(storageDisks))
// Make a volume entry on all underlying storage disks.
for index := range storageDisks {
index := index
g.Go(func() error {
if storageDisks[index] != nil {
if err := storageDisks[index].MakeVolBulk(ctx, buckets...); err != nil {
if !errors.Is(err, errVolumeExists) {
logger.LogIf(ctx, err)
}
return err
}
return nil
}
return errDiskNotFound
}, index)
}
writeQuorum := getWriteQuorum(len(storageDisks))
err := reduceWriteQuorumErrs(ctx, g.Wait(), bucketOpIgnoredErrs, writeQuorum)
return toObjectErr(err)
}
/// Bucket operations /// Bucket operations
// MakeBucket - make a bucket. // MakeBucket - make a bucket.

@ -424,6 +424,28 @@ func (z *erasureServerPools) CrawlAndGetDataUsage(ctx context.Context, bf *bloom
return firstErr return firstErr
} }
func (z *erasureServerPools) MakeMultipleBuckets(ctx context.Context, buckets ...string) error {
g := errgroup.WithNErrs(len(z.serverPools))
// Create buckets in parallel across all sets.
for index := range z.serverPools {
index := index
g.Go(func() error {
return z.serverPools[index].MakeMultipleBuckets(ctx, buckets...)
}, index)
}
errs := g.Wait()
// Return the first encountered error
for _, err := range errs {
if err != nil {
return err
}
}
return nil
}
// MakeBucketWithLocation - creates a new bucket across all serverPools simultaneously // MakeBucketWithLocation - creates a new bucket across all serverPools simultaneously
// even if one of the sets fail to create buckets, we proceed all the successful // even if one of the sets fail to create buckets, we proceed all the successful
// operations. // operations.

@ -531,6 +531,31 @@ func (s *erasureSets) Shutdown(ctx context.Context) error {
return nil return nil
} }
// MakeMultipleBuckets - make many buckets at once.
func (s *erasureSets) MakeMultipleBuckets(ctx context.Context, buckets ...string) error {
g := errgroup.WithNErrs(len(s.sets))
// Create buckets in parallel across all sets.
for index := range s.sets {
index := index
g.Go(func() error {
return s.sets[index].MakeMultipleBuckets(ctx, buckets...)
}, index)
}
errs := g.Wait()
// Return the first encountered error
for _, err := range errs {
if err != nil {
return err
}
}
// Success.
return nil
}
// MakeBucketLocation - creates a new bucket across all sets simultaneously, // MakeBucketLocation - creates a new bucket across all sets simultaneously,
// then return the first encountered error // then return the first encountered error
func (s *erasureSets) MakeBucketWithLocation(ctx context.Context, bucket string, opts BucketOptions) error { func (s *erasureSets) MakeBucketWithLocation(ctx context.Context, bucket string, opts BucketOptions) error {

@ -51,6 +51,11 @@ func (a GatewayUnsupported) SetDriveCount() int {
return 0 return 0
} }
// MakeMultipleBuckets is dummy stub for gateway.
func (a GatewayUnsupported) MakeMultipleBuckets(ctx context.Context, buckets ...string) error {
return NotImplemented{}
}
// ListMultipartUploads lists all multipart uploads. // ListMultipartUploads lists all multipart uploads.
func (a GatewayUnsupported) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi ListMultipartsInfo, err error) { func (a GatewayUnsupported) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi ListMultipartsInfo, err error) {
return lmi, NotImplemented{} return lmi, NotImplemented{}

@ -466,11 +466,10 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
r := rand.New(rand.NewSource(time.Now().UnixNano())) r := rand.New(rand.NewSource(time.Now().UnixNano()))
var err error
for { for {
// let one of the server acquire the lock, if not let them timeout. // let one of the server acquire the lock, if not let them timeout.
// which shall be retried again by this loop. // which shall be retried again by this loop.
if err = txnLk.GetLock(retryCtx, iamLockTimeout); err != nil { if err := txnLk.GetLock(retryCtx, iamLockTimeout); err != nil {
logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. trying to acquire lock") logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. trying to acquire lock")
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second))) time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
continue continue
@ -480,7 +479,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
// **** WARNING **** // **** WARNING ****
// Migrating to encrypted backend on etcd should happen before initialization of // Migrating to encrypted backend on etcd should happen before initialization of
// IAM sub-system, make sure that we do not move the above codeblock elsewhere. // IAM sub-system, make sure that we do not move the above codeblock elsewhere.
if err = migrateIAMConfigsEtcdToEncrypted(ctx, globalEtcdClient); err != nil { if err := migrateIAMConfigsEtcdToEncrypted(ctx, globalEtcdClient); err != nil {
txnLk.Unlock() txnLk.Unlock()
logger.LogIf(ctx, fmt.Errorf("Unable to decrypt an encrypted ETCD backend for IAM users and policies: %w", err)) logger.LogIf(ctx, fmt.Errorf("Unable to decrypt an encrypted ETCD backend for IAM users and policies: %w", err))
logger.LogIf(ctx, errors.New("IAM sub-system is partially initialized, some users may not be available")) logger.LogIf(ctx, errors.New("IAM sub-system is partially initialized, some users may not be available"))
@ -494,7 +493,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
} }
// Migrate IAM configuration, if necessary. // Migrate IAM configuration, if necessary.
if err = sys.doIAMConfigMigration(ctx); err != nil { if err := sys.doIAMConfigMigration(ctx); err != nil {
txnLk.Unlock() txnLk.Unlock()
if errors.Is(err, errDiskNotFound) || if errors.Is(err, errDiskNotFound) ||
errors.Is(err, errConfigNotFound) || errors.Is(err, errConfigNotFound) ||
@ -515,14 +514,27 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
break break
} }
err = sys.store.loadAll(ctx, sys) for {
if err := sys.store.loadAll(ctx, sys); err != nil {
// Invalidate the old cred always, even upon error to avoid any leakage. if errors.Is(err, errDiskNotFound) ||
globalOldCred = auth.Credentials{} errors.Is(err, errConfigNotFound) ||
errors.Is(err, context.DeadlineExceeded) ||
errors.As(err, &rquorum) ||
errors.As(err, &wquorum) ||
isErrBucketNotFound(err) {
logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. possible cause (%v)", err)
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
continue
}
if err != nil { if err != nil {
logger.LogIf(ctx, fmt.Errorf("Unable to initialize IAM sub-system, some users may not be available %w", err)) logger.LogIf(ctx, fmt.Errorf("Unable to initialize IAM sub-system, some users may not be available %w", err))
} }
}
break
}
// Invalidate the old cred always, even upon error to avoid any leakage.
globalOldCred = auth.Credentials{}
go sys.store.watch(ctx, sys) go sys.store.watch(ctx, sys)
} }

@ -614,9 +614,6 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
if len(disks) < askDisks { if len(disks) < askDisks {
err = InsufficientReadQuorum{} err = InsufficientReadQuorum{}
if debugPrint {
console.Errorf("listPath: Insufficient disks, %d of %d needed are available", len(disks), askDisks)
}
logger.LogIf(ctx, fmt.Errorf("listPath: Insufficient disks, %d of %d needed are available", len(disks), askDisks)) logger.LogIf(ctx, fmt.Errorf("listPath: Insufficient disks, %d of %d needed are available", len(disks), askDisks))
cancel() cancel()
return return

@ -82,6 +82,7 @@ type ObjectLayer interface {
StorageInfo(ctx context.Context, local bool) (StorageInfo, []error) // local queries only local disks StorageInfo(ctx context.Context, local bool) (StorageInfo, []error) // local queries only local disks
// Bucket operations. // Bucket operations.
MakeMultipleBuckets(ctx context.Context, buckets ...string) error
MakeBucketWithLocation(ctx context.Context, bucket string, opts BucketOptions) error MakeBucketWithLocation(ctx context.Context, bucket string, opts BucketOptions) error
GetBucketInfo(ctx context.Context, bucket string) (bucketInfo BucketInfo, err error) GetBucketInfo(ctx context.Context, bucket string) (bucketInfo BucketInfo, err error)
ListBuckets(ctx context.Context) (buckets []BucketInfo, err error) ListBuckets(ctx context.Context) (buckets []BucketInfo, err error)

@ -304,18 +304,14 @@ func initAllSubsystems(ctx context.Context, newObject ObjectLayer) (err error) {
if err != nil { if err != nil {
return fmt.Errorf("Unable to list buckets to heal: %w", err) return fmt.Errorf("Unable to list buckets to heal: %w", err)
} }
for _, bucket := range buckets { bucketNames := make([]string, len(buckets))
if err = newObject.MakeBucketWithLocation(ctx, bucket.Name, BucketOptions{}); err != nil { for i := range buckets {
bucketNames[i] = buckets[i].Name
}
if err = newObject.MakeMultipleBuckets(ctx, bucketNames...); err != nil {
if errors.As(err, &wquorum) || errors.As(err, &rquorum) { if errors.As(err, &wquorum) || errors.As(err, &rquorum) {
// Return the error upwards for the caller to retry. // Return the error upwards for the caller to retry.
return fmt.Errorf("Unable to heal bucket: %w", err) return fmt.Errorf("Unable to heal buckets: %w", err)
}
if _, ok := err.(BucketExists); !ok {
// ignore any other error and log for investigation.
logger.LogIf(ctx, err)
continue
}
// Bucket already exists, nothing that needs to be done.
} }
} }
} else { } else {

@ -1,61 +0,0 @@
// +build ignore
package main
/*
* MinIO Cloud Storage, (C) 2017 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
import (
"context"
"fmt"
"log"
"github.com/minio/minio/pkg/madmin"
)
func main() {
// Note: YOUR-ACCESSKEYID, YOUR-SECRETACCESSKEY are
// dummy values, please replace them with original values.
// API requests are secure (HTTPS) if secure=true and insecure (HTTP) otherwise.
// New returns an MinIO Admin client object.
madmClnt, err := madmin.New("your-minio.example.com:9000", "YOUR-ACCESSKEYID", "YOUR-SECRETACCESSKEY", true)
if err != nil {
log.Fatalln(err)
}
// List buckets that need healing
healBucketsList, err := madmClnt.ListBucketsHeal(context.Background())
if err != nil {
log.Fatalln(err)
}
for _, bucket := range healBucketsList {
if bucket.HealBucketInfo != nil {
switch healInfo := *bucket.HealBucketInfo; healInfo.Status {
case madmin.CanHeal:
fmt.Println(bucket.Name, " can be healed.")
case madmin.QuorumUnavailable:
fmt.Println(bucket.Name, " can't be healed until quorum is available.")
case madmin.Corrupted:
fmt.Println(bucket.Name, " can't be healed, not enough information.")
}
}
fmt.Println("bucket: ", bucket)
}
}
Loading…
Cancel
Save