From 71b97fd3ac95b95e12e5995d8ce900e1b1b348e8 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 13 Oct 2020 18:28:42 -0700 Subject: [PATCH] fix: connect disks pre-emptively during startup (#10669) connect disks pre-emptively upon startup, to ensure we have enough disks are connected at startup rather than wait for them. we need to do this to avoid long wait times for server to be online when we have servers come up in rolling upgrade fashion --- cmd/erasure-bucket.go | 3 ++- cmd/erasure-sets.go | 10 +++++++++- cmd/erasure-zones.go | 18 ------------------ cmd/rest/client.go | 13 +++++++------ cmd/storage-rest-client.go | 4 ---- 5 files changed, 18 insertions(+), 30 deletions(-) diff --git a/cmd/erasure-bucket.go b/cmd/erasure-bucket.go index f632c4ed4..08d45e559 100644 --- a/cmd/erasure-bucket.go +++ b/cmd/erasure-bucket.go @@ -18,6 +18,7 @@ package cmd import ( "context" + "errors" "github.com/minio/minio-go/v7/pkg/s3utils" "github.com/minio/minio/cmd/logger" @@ -49,7 +50,7 @@ func (er erasureObjects) MakeBucketWithLocation(ctx context.Context, bucket stri g.Go(func() error { if storageDisks[index] != nil { if err := storageDisks[index].MakeVol(ctx, bucket); err != nil { - if err != errVolumeExists { + if !errors.Is(err, errVolumeExists) { logger.LogIf(ctx, err) } return err diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index 7a60bb1cf..73ab63074 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -22,6 +22,7 @@ import ( "fmt" "hash/crc32" "io" + "math/rand" "net/http" "sort" "sync" @@ -235,7 +236,7 @@ func (s *erasureSets) connectDisks() { disk.SetDiskID(format.Erasure.This) if endpoint.IsLocal && disk.Healing() { globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint()) - logger.Info(fmt.Sprintf("Found the drive %s which needs healing, attempting to heal...", disk)) + logger.Info(fmt.Sprintf("Found the drive %s that needs healing, attempting to heal...", disk)) } s.erasureDisksMu.Lock() @@ -261,6 +262,13 @@ func (s *erasureSets) connectDisks() { // endpoints by reconnecting them and making sure to place them into right position in // the set topology, this monitoring happens at a given monitoring interval. func (s *erasureSets) monitorAndConnectEndpoints(ctx context.Context, monitorInterval time.Duration) { + r := rand.New(rand.NewSource(time.Now().UnixNano())) + + time.Sleep(time.Duration(r.Float64() * float64(time.Second))) + + // Pre-emptively connect the disks if possible. + s.connectDisks() + for { select { case <-ctx.Done(): diff --git a/cmd/erasure-zones.go b/cmd/erasure-zones.go index 932b9e444..850456255 100644 --- a/cmd/erasure-zones.go +++ b/cmd/erasure-zones.go @@ -410,24 +410,6 @@ func (z *erasureZones) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter // even if one of the sets fail to create buckets, we proceed all the successful // operations. func (z *erasureZones) MakeBucketWithLocation(ctx context.Context, bucket string, opts BucketOptions) error { - if z.SingleZone() { - if err := z.zones[0].MakeBucketWithLocation(ctx, bucket, opts); err != nil { - return err - } - - // If it doesn't exist we get a new, so ignore errors - meta := newBucketMetadata(bucket) - if opts.LockEnabled { - meta.VersioningConfigXML = enabledBucketVersioningConfig - meta.ObjectLockConfigXML = enabledBucketObjectLockConfig - } - if err := meta.Save(ctx, z); err != nil { - return toObjectErr(err, bucket) - } - globalBucketMetadataSys.Set(bucket, meta) - return nil - } - g := errgroup.WithNErrs(len(z.zones)) // Create buckets in parallel across all sets. diff --git a/cmd/rest/client.go b/cmd/rest/client.go index ef1aeeab7..bb8e3f006 100644 --- a/cmd/rest/client.go +++ b/cmd/rest/client.go @@ -21,6 +21,7 @@ import ( "errors" "io" "io/ioutil" + "math/rand" "net/http" "net/url" "sync/atomic" @@ -185,18 +186,18 @@ func (c *Client) MarkOffline() { // Start goroutine that will attempt to reconnect. // If server is already trying to reconnect this will have no effect. if c.HealthCheckFn != nil && atomic.CompareAndSwapInt32(&c.connected, online, offline) { - go func(healthFunc func() bool) { - ticker := time.NewTicker(c.HealthCheckInterval) - defer ticker.Stop() - for range ticker.C { + r := rand.New(rand.NewSource(time.Now().UnixNano())) + go func() { + for { if atomic.LoadInt32(&c.connected) == closed { return } - if healthFunc() { + if c.HealthCheckFn() { atomic.CompareAndSwapInt32(&c.connected, offline, online) return } + time.Sleep(time.Duration(r.Float64() * float64(c.HealthCheckInterval))) } - }(c.HealthCheckFn) + }() } } diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 956dcc658..d8abd8884 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -121,9 +121,6 @@ type storageRESTClient struct { // permanently. The only way to restore the storage connection is at the xl-sets layer by xlsets.monitorAndConnectEndpoints() // after verifying format.json func (client *storageRESTClient) call(ctx context.Context, method string, values url.Values, body io.Reader, length int64) (io.ReadCloser, error) { - if !client.IsOnline() { - return nil, errDiskNotFound - } if values == nil { values = make(url.Values) } @@ -134,7 +131,6 @@ func (client *storageRESTClient) call(ctx context.Context, method string, values } err = toStorageErr(err) - return nil, err }