fix: connect disks pre-emptively during startup (#10669)

connect disks pre-emptively upon startup, to ensure we have
enough disks are connected at startup rather than wait
for them.

we need to do this to avoid long wait times for server to
be online when we have servers come up in rolling upgrade
fashion
master
Harshavardhana 4 years ago committed by GitHub
parent 03991c5d41
commit 71b97fd3ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      cmd/erasure-bucket.go
  2. 10
      cmd/erasure-sets.go
  3. 18
      cmd/erasure-zones.go
  4. 13
      cmd/rest/client.go
  5. 4
      cmd/storage-rest-client.go

@ -18,6 +18,7 @@ package cmd
import ( import (
"context" "context"
"errors"
"github.com/minio/minio-go/v7/pkg/s3utils" "github.com/minio/minio-go/v7/pkg/s3utils"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
@ -49,7 +50,7 @@ func (er erasureObjects) MakeBucketWithLocation(ctx context.Context, bucket stri
g.Go(func() error { g.Go(func() error {
if storageDisks[index] != nil { if storageDisks[index] != nil {
if err := storageDisks[index].MakeVol(ctx, bucket); err != nil { if err := storageDisks[index].MakeVol(ctx, bucket); err != nil {
if err != errVolumeExists { if !errors.Is(err, errVolumeExists) {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
} }
return err return err

@ -22,6 +22,7 @@ import (
"fmt" "fmt"
"hash/crc32" "hash/crc32"
"io" "io"
"math/rand"
"net/http" "net/http"
"sort" "sort"
"sync" "sync"
@ -235,7 +236,7 @@ func (s *erasureSets) connectDisks() {
disk.SetDiskID(format.Erasure.This) disk.SetDiskID(format.Erasure.This)
if endpoint.IsLocal && disk.Healing() { if endpoint.IsLocal && disk.Healing() {
globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint()) globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint())
logger.Info(fmt.Sprintf("Found the drive %s which needs healing, attempting to heal...", disk)) logger.Info(fmt.Sprintf("Found the drive %s that needs healing, attempting to heal...", disk))
} }
s.erasureDisksMu.Lock() s.erasureDisksMu.Lock()
@ -261,6 +262,13 @@ func (s *erasureSets) connectDisks() {
// endpoints by reconnecting them and making sure to place them into right position in // endpoints by reconnecting them and making sure to place them into right position in
// the set topology, this monitoring happens at a given monitoring interval. // the set topology, this monitoring happens at a given monitoring interval.
func (s *erasureSets) monitorAndConnectEndpoints(ctx context.Context, monitorInterval time.Duration) { func (s *erasureSets) monitorAndConnectEndpoints(ctx context.Context, monitorInterval time.Duration) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
time.Sleep(time.Duration(r.Float64() * float64(time.Second)))
// Pre-emptively connect the disks if possible.
s.connectDisks()
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():

@ -410,24 +410,6 @@ func (z *erasureZones) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter
// even if one of the sets fail to create buckets, we proceed all the successful // even if one of the sets fail to create buckets, we proceed all the successful
// operations. // operations.
func (z *erasureZones) MakeBucketWithLocation(ctx context.Context, bucket string, opts BucketOptions) error { func (z *erasureZones) MakeBucketWithLocation(ctx context.Context, bucket string, opts BucketOptions) error {
if z.SingleZone() {
if err := z.zones[0].MakeBucketWithLocation(ctx, bucket, opts); err != nil {
return err
}
// If it doesn't exist we get a new, so ignore errors
meta := newBucketMetadata(bucket)
if opts.LockEnabled {
meta.VersioningConfigXML = enabledBucketVersioningConfig
meta.ObjectLockConfigXML = enabledBucketObjectLockConfig
}
if err := meta.Save(ctx, z); err != nil {
return toObjectErr(err, bucket)
}
globalBucketMetadataSys.Set(bucket, meta)
return nil
}
g := errgroup.WithNErrs(len(z.zones)) g := errgroup.WithNErrs(len(z.zones))
// Create buckets in parallel across all sets. // Create buckets in parallel across all sets.

@ -21,6 +21,7 @@ import (
"errors" "errors"
"io" "io"
"io/ioutil" "io/ioutil"
"math/rand"
"net/http" "net/http"
"net/url" "net/url"
"sync/atomic" "sync/atomic"
@ -185,18 +186,18 @@ func (c *Client) MarkOffline() {
// Start goroutine that will attempt to reconnect. // Start goroutine that will attempt to reconnect.
// If server is already trying to reconnect this will have no effect. // If server is already trying to reconnect this will have no effect.
if c.HealthCheckFn != nil && atomic.CompareAndSwapInt32(&c.connected, online, offline) { if c.HealthCheckFn != nil && atomic.CompareAndSwapInt32(&c.connected, online, offline) {
go func(healthFunc func() bool) { r := rand.New(rand.NewSource(time.Now().UnixNano()))
ticker := time.NewTicker(c.HealthCheckInterval) go func() {
defer ticker.Stop() for {
for range ticker.C {
if atomic.LoadInt32(&c.connected) == closed { if atomic.LoadInt32(&c.connected) == closed {
return return
} }
if healthFunc() { if c.HealthCheckFn() {
atomic.CompareAndSwapInt32(&c.connected, offline, online) atomic.CompareAndSwapInt32(&c.connected, offline, online)
return return
} }
time.Sleep(time.Duration(r.Float64() * float64(c.HealthCheckInterval)))
} }
}(c.HealthCheckFn) }()
} }
} }

@ -121,9 +121,6 @@ type storageRESTClient struct {
// permanently. The only way to restore the storage connection is at the xl-sets layer by xlsets.monitorAndConnectEndpoints() // permanently. The only way to restore the storage connection is at the xl-sets layer by xlsets.monitorAndConnectEndpoints()
// after verifying format.json // after verifying format.json
func (client *storageRESTClient) call(ctx context.Context, method string, values url.Values, body io.Reader, length int64) (io.ReadCloser, error) { func (client *storageRESTClient) call(ctx context.Context, method string, values url.Values, body io.Reader, length int64) (io.ReadCloser, error) {
if !client.IsOnline() {
return nil, errDiskNotFound
}
if values == nil { if values == nil {
values = make(url.Values) values = make(url.Values)
} }
@ -134,7 +131,6 @@ func (client *storageRESTClient) call(ctx context.Context, method string, values
} }
err = toStorageErr(err) err = toStorageErr(err)
return nil, err return nil, err
} }

Loading…
Cancel
Save