diff --git a/cmd/admin-handlers_test.go b/cmd/admin-handlers_test.go index 262ce9273..f990c21d9 100644 --- a/cmd/admin-handlers_test.go +++ b/cmd/admin-handlers_test.go @@ -109,14 +109,14 @@ func initTestXLObjLayer() (ObjectLayer, []string, error) { return nil, nil, err } endpoints := mustGetNewEndpoints(xlDirs...) - format, err := waitForFormatXL(true, endpoints, 1, 1, 16, "") + storageDisks, format, err := waitForFormatXL(true, endpoints, 1, 1, 16, "") if err != nil { removeRoots(xlDirs) return nil, nil, err } globalPolicySys = NewPolicySys() - objLayer, err := newXLSets(endpoints, format, 1, 16) + objLayer, err := newXLSets(endpoints, storageDisks, format, 1, 16) if err != nil { return nil, nil, err } diff --git a/cmd/config-encrypted.go b/cmd/config-encrypted.go index 0a6fc77cc..53a71d7c0 100644 --- a/cmd/config-encrypted.go +++ b/cmd/config-encrypted.go @@ -21,7 +21,6 @@ import ( "context" "errors" "fmt" - "strings" "unicode/utf8" etcd "github.com/coreos/etcd/clientv3" @@ -48,12 +47,18 @@ func handleEncryptedConfigBackend(objAPI ObjectLayer, server bool) error { // of the object layer. retryTimerCh := newRetryTimerSimple(doneCh) var stop bool + + rquorum := InsufficientReadQuorum{} + wquorum := InsufficientWriteQuorum{} + bucketNotFound := BucketNotFound{} + for !stop { select { case <-retryTimerCh: if encrypted, err = checkBackendEncrypted(objAPI); err != nil { - if err == errDiskNotFound || - strings.Contains(err.Error(), InsufficientReadQuorum{}.Error()) { + if errors.Is(err, errDiskNotFound) || + errors.As(err, &rquorum) || + errors.As(err, &bucketNotFound) { logger.Info("Waiting for config backend to be encrypted..") continue } @@ -100,9 +105,10 @@ func handleEncryptedConfigBackend(objAPI ObjectLayer, server bool) error { case <-retryTimerCh: // Migrate IAM configuration if err = migrateConfigPrefixToEncrypted(objAPI, globalOldCred, encrypted); err != nil { - if err == errDiskNotFound || - strings.Contains(err.Error(), InsufficientReadQuorum{}.Error()) || - strings.Contains(err.Error(), InsufficientWriteQuorum{}.Error()) { + if errors.Is(err, errDiskNotFound) || + errors.As(err, &rquorum) || + errors.As(err, &wquorum) || + errors.As(err, &bucketNotFound) { logger.Info("Waiting for config backend to be encrypted..") continue } diff --git a/cmd/format-xl.go b/cmd/format-xl.go index 6ddd1b27c..b966f7ad4 100644 --- a/cmd/format-xl.go +++ b/cmd/format-xl.go @@ -312,7 +312,7 @@ func quorumUnformattedDisks(errs []error) bool { } // loadFormatXLAll - load all format config from all input disks in parallel. -func loadFormatXLAll(storageDisks []StorageAPI) ([]*formatXLV3, []error) { +func loadFormatXLAll(storageDisks []StorageAPI, heal bool) ([]*formatXLV3, []error) { // Initialize list of errors. g := errgroup.WithNErrs(len(storageDisks)) @@ -331,6 +331,11 @@ func loadFormatXLAll(storageDisks []StorageAPI) ([]*formatXLV3, []error) { return err } formats[index] = format + if !heal { + // If no healing required, make the disks valid and + // online. + storageDisks[index].SetDiskID(format.XL.This) + } return nil }, index) } @@ -339,7 +344,15 @@ func loadFormatXLAll(storageDisks []StorageAPI) ([]*formatXLV3, []error) { return formats, g.Wait() } -func saveFormatXL(disk StorageAPI, format interface{}) error { +func saveFormatXL(disk StorageAPI, format interface{}, diskID string) error { + if format == nil || disk == nil { + return errDiskNotFound + } + + if err := makeFormatXLMetaVolumes(disk); err != nil { + return err + } + // Marshal and write to disk. formatBytes, err := json.Marshal(format) if err != nil { @@ -357,7 +370,12 @@ func saveFormatXL(disk StorageAPI, format interface{}) error { } // Rename file `uuid.json` --> `format.json`. - return disk.RenameFile(minioMetaBucket, tmpFormat, minioMetaBucket, formatConfigFile) + if err = disk.RenameFile(minioMetaBucket, tmpFormat, minioMetaBucket, formatConfigFile); err != nil { + return err + } + + disk.SetDiskID(diskID) + return nil } var ignoredHiddenDirectories = []string{ @@ -475,7 +493,7 @@ func formatXLGetDeploymentID(refFormat *formatXLV3, formats []*formatXLV3) (stri func formatXLFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatXLV3) (err error) { // Attempt to load all `format.json` from all disks. var sErrs []error - formats, sErrs := loadFormatXLAll(storageDisks) + formats, sErrs := loadFormatXLAll(storageDisks, false) for i, sErr := range sErrs { if _, ok := formatCriticalErrors[sErr]; ok { return fmt.Errorf("Disk %s: %w", endpoints[i], sErr) @@ -519,25 +537,38 @@ func formatXLFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, ref func formatXLFixLocalDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatXLV3) error { // If this server was down when the deploymentID was updated // then we make sure that we update the local disks with the deploymentID. - for index, storageDisk := range storageDisks { - if endpoints[index].IsLocal && storageDisk != nil && storageDisk.IsOnline() { - format, err := loadFormatXL(storageDisk) - if err != nil { - // Disk can be offline etc. - // ignore the errors seen here. - continue - } - if format.ID != "" { - continue - } - if !reflect.DeepEqual(format.XL.Sets, refFormat.XL.Sets) { - continue - } - format.ID = refFormat.ID - if err := saveFormatXL(storageDisk, format); err != nil { - logger.LogIf(context.Background(), err) - return fmt.Errorf("Unable to save format.json, %w", err) + + // Initialize errs to collect errors inside go-routine. + g := errgroup.WithNErrs(len(storageDisks)) + + for index := range storageDisks { + index := index + g.Go(func() error { + if endpoints[index].IsLocal && storageDisks[index] != nil && storageDisks[index].IsOnline() { + format, err := loadFormatXL(storageDisks[index]) + if err != nil { + // Disk can be offline etc. + // ignore the errors seen here. + return nil + } + if format.ID != "" { + return nil + } + if !reflect.DeepEqual(format.XL.Sets, refFormat.XL.Sets) { + return nil + } + format.ID = refFormat.ID + if err := saveFormatXL(storageDisks[index], format, format.XL.This); err != nil { + logger.LogIf(context.Background(), err) + return fmt.Errorf("Unable to save format.json, %w", err) + } } + return nil + }, index) + } + for _, err := range g.Wait() { + if err != nil { + return err } } return nil @@ -670,13 +701,7 @@ func saveFormatXLAll(ctx context.Context, storageDisks []StorageAPI, formats []* for index := range storageDisks { index := index g.Go(func() error { - if formats[index] == nil || storageDisks[index] == nil { - return errDiskNotFound - } - if err := makeFormatXLMetaVolumes(storageDisks[index]); err != nil { - return err - } - return saveFormatXL(storageDisks[index], formats[index]) + return saveFormatXL(storageDisks[index], formats[index], formats[index].XL.This) }, index) } @@ -738,25 +763,36 @@ func formatXLV3ThisEmpty(formats []*formatXLV3) bool { // fixFormatXLV3 - fix format XL configuration on all disks. func fixFormatXLV3(storageDisks []StorageAPI, endpoints Endpoints, formats []*formatXLV3) error { - for i, format := range formats { - if format == nil || !endpoints[i].IsLocal { - continue - } - // NOTE: This code is specifically needed when migrating version - // V1 to V2 to V3, in a scenario such as this we only need to handle - // single sets since we never used to support multiple sets in releases - // with V1 format version. - if len(format.XL.Sets) > 1 { - continue - } - if format.XL.This == "" { - formats[i].XL.This = format.XL.Sets[0][i] - if err := saveFormatXL(storageDisks[i], formats[i]); err != nil { - return err + g := errgroup.WithNErrs(len(formats)) + for i := range formats { + i := i + g.Go(func() error { + if formats[i] == nil || !endpoints[i].IsLocal { + return nil + } + // NOTE: This code is specifically needed when migrating version + // V1 to V2 to V3, in a scenario such as this we only need to handle + // single sets since we never used to support multiple sets in releases + // with V1 format version. + if len(formats[i].XL.Sets) > 1 { + return nil } + if formats[i].XL.This == "" { + formats[i].XL.This = formats[i].XL.Sets[0][i] + if err := saveFormatXL(storageDisks[i], formats[i], formats[i].XL.This); err != nil { + return err + } + } + return nil + }, i) + } + for _, err := range g.Wait() { + if err != nil { + return err } } return nil + } // initFormatXL - save XL format configuration on all disks. @@ -827,6 +863,9 @@ func ecDrivesNoConfig(drivesPerSet int) int { // Make XL backend meta volumes. func makeFormatXLMetaVolumes(disk StorageAPI) error { + if disk == nil { + return errDiskNotFound + } // Attempt to create MinIO internal buckets. return disk.MakeVolBulk(minioMetaBucket, minioMetaTmpBucket, minioMetaMultipartBucket, dataUsageBucket) } diff --git a/cmd/format-xl_test.go b/cmd/format-xl_test.go index 0a5b0070e..10f5adde8 100644 --- a/cmd/format-xl_test.go +++ b/cmd/format-xl_test.go @@ -112,7 +112,7 @@ func TestFixFormatV3(t *testing.T) { t.Fatal(err) } - newFormats, errs := loadFormatXLAll(storageDisks) + newFormats, errs := loadFormatXLAll(storageDisks, false) for _, err := range errs { if err != nil && err != errUnformattedDisk { t.Fatal(err) diff --git a/cmd/naughty-disk_test.go b/cmd/naughty-disk_test.go index 47215262a..649a797ac 100644 --- a/cmd/naughty-disk_test.go +++ b/cmd/naughty-disk_test.go @@ -78,7 +78,12 @@ func (d *naughtyDisk) calcError() (err error) { return nil } +func (d *naughtyDisk) GetDiskID() (string, error) { + return d.disk.GetDiskID() +} + func (d *naughtyDisk) SetDiskID(id string) { + d.disk.SetDiskID(id) } func (d *naughtyDisk) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (info dataUsageCache, err error) { diff --git a/cmd/posix-diskid-check.go b/cmd/posix-diskid-check.go index 084c71985..5ce8dd6a5 100644 --- a/cmd/posix-diskid-check.go +++ b/cmd/posix-diskid-check.go @@ -32,7 +32,7 @@ func (p *posixDiskIDCheck) String() string { } func (p *posixDiskIDCheck) IsOnline() bool { - storedDiskID, err := p.storage.getDiskID() + storedDiskID, err := p.storage.GetDiskID() if err != nil { return false } @@ -51,6 +51,10 @@ func (p *posixDiskIDCheck) Close() error { return p.storage.Close() } +func (p *posixDiskIDCheck) GetDiskID() (string, error) { + return p.diskID, nil +} + func (p *posixDiskIDCheck) SetDiskID(id string) { p.diskID = id } @@ -61,7 +65,7 @@ func (p *posixDiskIDCheck) isDiskStale() bool { // or create format.json return false } - storedDiskID, err := p.storage.getDiskID() + storedDiskID, err := p.storage.GetDiskID() if err == nil && p.diskID == storedDiskID { return false } diff --git a/cmd/posix.go b/cmd/posix.go index 14ccb6ede..f46e4644d 100644 --- a/cmd/posix.go +++ b/cmd/posix.go @@ -424,7 +424,8 @@ func (s *posix) getVolDir(volume string) (string, error) { return volumeDir, nil } -func (s *posix) getDiskID() (string, error) { +// GetDiskID - returns the cached disk uuid +func (s *posix) GetDiskID() (string, error) { s.RLock() diskID := s.diskID fileInfo := s.formatFileInfo @@ -440,7 +441,7 @@ func (s *posix) getDiskID() (string, error) { defer s.Unlock() // If somebody else updated the disk ID and changed the time, return what they got. - if !s.formatLastCheck.Equal(lastCheck) { + if !lastCheck.IsZero() && !s.formatLastCheck.Equal(lastCheck) && diskID != "" { // Somebody else got the lock first. return diskID, nil } @@ -448,10 +449,13 @@ func (s *posix) getDiskID() (string, error) { fi, err := os.Stat(formatFile) if err != nil { // If the disk is still not initialized. - return "", err + if os.IsNotExist(err) { + return "", errUnformattedDisk + } + return "", errCorruptedFormat } - if xioutil.SameFile(fi, fileInfo) { + if xioutil.SameFile(fi, fileInfo) && diskID != "" { // If the file has not changed, just return the cached diskID information. s.formatLastCheck = time.Now() return diskID, nil @@ -459,12 +463,12 @@ func (s *posix) getDiskID() (string, error) { b, err := ioutil.ReadFile(formatFile) if err != nil { - return "", err + return "", errCorruptedFormat } format := &formatXLV3{} var json = jsoniter.ConfigCompatibleWithStandardLibrary if err = json.Unmarshal(b, &format); err != nil { - return "", err + return "", errCorruptedFormat } s.diskID = format.XL.This s.formatFileInfo = fi diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index aa5214ecb..848190c35 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -221,15 +221,14 @@ func IsServerResolvable(endpoint Endpoint) error { // connect to list of endpoints and load all XL disk formats, validate the formats are correct // and are in quorum, if no formats are found attempt to initialize all of them for the first // time. additionally make sure to close all the disks used in this attempt. -func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, zoneCount, setCount, drivesPerSet int, deploymentID string) (*formatXLV3, error) { +func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, zoneCount, setCount, drivesPerSet int, deploymentID string) ([]StorageAPI, *formatXLV3, error) { // Initialize all storage disks storageDisks, errs := initStorageDisksWithErrors(endpoints) - defer closeStorageDisks(storageDisks) for i, err := range errs { if err != nil { if err != errDiskNotFound { - return nil, fmt.Errorf("Disk %s: %w", endpoints[i], err) + return nil, nil, fmt.Errorf("Disk %s: %w", endpoints[i], err) } if retryCount >= 5 { logger.Info("Unable to connect to %s: %v\n", endpoints[i], IsServerResolvable(endpoints[i])) @@ -238,11 +237,11 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, } // Attempt to load all `format.json` from all disks. - formatConfigs, sErrs := loadFormatXLAll(storageDisks) + formatConfigs, sErrs := loadFormatXLAll(storageDisks, false) // Check if we have for i, sErr := range sErrs { if _, ok := formatCriticalErrors[sErr]; ok { - return nil, fmt.Errorf("Disk %s: %w", endpoints[i], sErr) + return nil, nil, fmt.Errorf("Disk %s: %w", endpoints[i], sErr) } // not critical error but still print the error, nonetheless, which is perhaps unhandled if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 { @@ -258,7 +257,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, // with expected XL format. For example if a user is // trying to pool FS backend into an XL set. if err := checkFormatXLValues(formatConfigs, drivesPerSet); err != nil { - return nil, err + return nil, nil, err } // All disks report unformatted we should initialized everyone. @@ -269,24 +268,24 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, // Initialize erasure code format on disks format, err := initFormatXL(context.Background(), storageDisks, setCount, drivesPerSet, deploymentID) if err != nil { - return nil, err + return nil, nil, err } // Assign globalDeploymentID on first run for the // minio server managing the first disk globalDeploymentID = format.ID - return format, nil + return storageDisks, format, nil } // Return error when quorum unformatted disks - indicating we are // waiting for first server to be online. if quorumUnformattedDisks(sErrs) && !firstDisk { - return nil, errNotFirstDisk + return nil, nil, errNotFirstDisk } // Return error when quorum unformatted disks but waiting for rest // of the servers to be online. if quorumUnformattedDisks(sErrs) && firstDisk { - return nil, errFirstDiskWait + return nil, nil, errFirstDiskWait } // Following function is added to fix a regressions which was introduced @@ -295,54 +294,54 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, // the disk UUID association. Below function is called to handle and fix // this regression, for more info refer https://github.com/minio/minio/issues/5667 if err := fixFormatXLV3(storageDisks, endpoints, formatConfigs); err != nil { - return nil, err + return nil, nil, err } // If any of the .This field is still empty, we return error. if formatXLV3ThisEmpty(formatConfigs) { - return nil, errXLV3ThisEmpty + return nil, nil, errXLV3ThisEmpty } format, err := getFormatXLInQuorum(formatConfigs) if err != nil { - return nil, err + return nil, nil, err } if format.ID == "" { // Not a first disk, wait until first disk fixes deploymentID if !firstDisk { - return nil, errNotFirstDisk + return nil, nil, errNotFirstDisk } if err = formatXLFixDeploymentID(endpoints, storageDisks, format); err != nil { - return nil, err + return nil, nil, err } } globalDeploymentID = format.ID if err = formatXLFixLocalDeploymentID(endpoints, storageDisks, format); err != nil { - return nil, err + return nil, nil, err } // The will always recreate some directories inside .minio.sys of // the local disk such as tmp, multipart and background-ops initXLMetaVolumesInLocalDisks(storageDisks, formatConfigs) - return format, nil + return storageDisks, format, nil } // Format disks before initialization of object layer. -func waitForFormatXL(firstDisk bool, endpoints Endpoints, zoneCount, setCount, drivesPerSet int, deploymentID string) (format *formatXLV3, err error) { +func waitForFormatXL(firstDisk bool, endpoints Endpoints, zoneCount, setCount, drivesPerSet int, deploymentID string) ([]StorageAPI, *formatXLV3, error) { if len(endpoints) == 0 || setCount == 0 || drivesPerSet == 0 { - return nil, errInvalidArgument + return nil, nil, errInvalidArgument } - if err = formatXLMigrateLocalEndpoints(endpoints); err != nil { - return nil, err + if err := formatXLMigrateLocalEndpoints(endpoints); err != nil { + return nil, nil, err } - if err = formatXLCleanupTmpLocalEndpoints(endpoints); err != nil { - return nil, err + if err := formatXLCleanupTmpLocalEndpoints(endpoints); err != nil { + return nil, nil, err } // prepare getElapsedTime() to calculate elapsed time since we started trying formatting disks. @@ -359,7 +358,7 @@ func waitForFormatXL(firstDisk bool, endpoints Endpoints, zoneCount, setCount, d for { select { case <-ticker.C: - format, err := connectLoadInitFormats(tries, firstDisk, endpoints, zoneCount, setCount, drivesPerSet, deploymentID) + storageDisks, format, err := connectLoadInitFormats(tries, firstDisk, endpoints, zoneCount, setCount, drivesPerSet, deploymentID) if err != nil { tries++ switch err { @@ -380,12 +379,12 @@ func waitForFormatXL(firstDisk bool, endpoints Endpoints, zoneCount, setCount, d continue default: // For all other unhandled errors we exit and fail. - return nil, err + return nil, nil, err } } - return format, nil + return storageDisks, format, nil case <-globalOSSignalCh: - return nil, fmt.Errorf("Initializing data volumes gracefully stopped") + return nil, nil, fmt.Errorf("Initializing data volumes gracefully stopped") } } } diff --git a/cmd/server-main.go b/cmd/server-main.go index bde114bd7..d746260a5 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -213,14 +213,15 @@ func initSafeMode(buckets []BucketInfo) (err error) { for { rquorum := InsufficientReadQuorum{} wquorum := InsufficientWriteQuorum{} - + bucketNotFound := BucketNotFound{} var err error select { case n := <-retryTimerCh: if err = initAllSubsystems(buckets, newObject); err != nil { if errors.Is(err, errDiskNotFound) || errors.As(err, &rquorum) || - errors.As(err, &wquorum) { + errors.As(err, &wquorum) || + errors.As(err, &bucketNotFound) { if n < 5 { logger.Info("Waiting for all sub-systems to be initialized..") } else { @@ -245,7 +246,6 @@ func initAllSubsystems(buckets []BucketInfo, newObject ObjectLayer) (err error) if err = globalConfigSys.Init(newObject); err != nil { return fmt.Errorf("Unable to initialize config system: %w", err) } - if globalEtcdClient != nil { // **** WARNING **** // Migrating to encrypted backend on etcd should happen before initialization of @@ -419,7 +419,6 @@ func serverMain(ctx *cli.Context) { globalObjectAPI = newObject globalObjLayerMutex.Unlock() - // Calls New() and initializes all sub-systems. newAllSubsystems() // Enable healing to heal drives if possible @@ -428,6 +427,9 @@ func serverMain(ctx *cli.Context) { initLocalDisksAutoHeal(GlobalContext, newObject) } + go startBackgroundOps(GlobalContext, newObject) + + // Calls New() and initializes all sub-systems. buckets, err := newObject.ListBuckets(GlobalContext) if err != nil { logger.Fatal(err, "Unable to list buckets") @@ -451,8 +453,6 @@ func serverMain(ctx *cli.Context) { initFederatorBackend(buckets, newObject) } - go startBackgroundOps(GlobalContext, newObject) - // Disable safe mode operation, after all initialization is over. globalObjLayerMutex.Lock() globalSafeMode = false diff --git a/cmd/storage-interface.go b/cmd/storage-interface.go index 61c4d880c..84fa2f166 100644 --- a/cmd/storage-interface.go +++ b/cmd/storage-interface.go @@ -30,6 +30,7 @@ type StorageAPI interface { IsOnline() bool // Returns true if disk is online. Hostname() string // Returns host name if remote host. Close() error + GetDiskID() (string, error) SetDiskID(id string) DiskInfo() (info DiskInfo, err error) diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index c24ef3ba6..164d8e5a6 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -172,6 +172,10 @@ func (client *storageRESTClient) CrawlAndGetDataUsage(ctx context.Context, cache return newCache, newCache.deserialize(b) } +func (client *storageRESTClient) GetDiskID() (string, error) { + return client.diskID, nil +} + func (client *storageRESTClient) SetDiskID(id string) { client.diskID = id } diff --git a/cmd/storage-rest-server.go b/cmd/storage-rest-server.go index d2c921e40..317f27d85 100644 --- a/cmd/storage-rest-server.go +++ b/cmd/storage-rest-server.go @@ -110,7 +110,7 @@ func (s *storageRESTServer) IsValid(w http.ResponseWriter, r *http.Request) bool // or create format.json return true } - storedDiskID, err := s.storage.getDiskID() + storedDiskID, err := s.storage.GetDiskID() if err == nil && diskID == storedDiskID { // If format.json is available and request sent the right disk-id, we allow the request return true diff --git a/cmd/test-utils_test.go b/cmd/test-utils_test.go index 995d6e5e1..6200540f4 100644 --- a/cmd/test-utils_test.go +++ b/cmd/test-utils_test.go @@ -193,13 +193,13 @@ func prepareXLSets32() (ObjectLayer, []string, error) { endpoints := append(endpoints1, endpoints2...) fsDirs := append(fsDirs1, fsDirs2...) - format, err := waitForFormatXL(true, endpoints, 1, 2, 16, "") + storageDisks, format, err := waitForFormatXL(true, endpoints, 1, 2, 16, "") if err != nil { removeRoots(fsDirs) return nil, nil, err } - objAPI, err := newXLSets(endpoints, format, 2, 16) + objAPI, err := newXLSets(endpoints, storageDisks, format, 2, 16) if err != nil { return nil, nil, err } diff --git a/cmd/xl-sets.go b/cmd/xl-sets.go index 38147131d..41e32cbfc 100644 --- a/cmd/xl-sets.go +++ b/cmd/xl-sets.go @@ -45,17 +45,12 @@ type setsStorageAPI [][]StorageAPI // setsDsyncLockers is encapsulated type for Close() type setsDsyncLockers [][]dsync.NetLocker -func (s setsStorageAPI) Close() error { - for i := 0; i < len(s); i++ { - for j, disk := range s[i] { - if disk == nil { - continue - } - disk.Close() - s[i][j] = nil - } +func (s setsStorageAPI) Copy() [][]StorageAPI { + copyS := make(setsStorageAPI, len(s)) + for i, disks := range s { + copyS[i] = append(copyS[i], disks...) } - return nil + return copyS } // Information of a new disk connection @@ -174,6 +169,25 @@ func connectEndpoint(endpoint Endpoint) (StorageAPI, *formatXLV3, error) { return disk, format, nil } +// findDiskIndex - returns the i,j'th position of the input `diskID` against the reference +// format, after successful validation. +// - i'th position is the set index +// - j'th position is the disk index in the current set +func findDiskIndexByDiskID(refFormat *formatXLV3, diskID string) (int, int, error) { + if diskID == offlineDiskUUID { + return -1, -1, fmt.Errorf("diskID: %s is offline", diskID) + } + for i := 0; i < len(refFormat.XL.Sets); i++ { + for j := 0; j < len(refFormat.XL.Sets[0]); j++ { + if refFormat.XL.Sets[i][j] == diskID { + return i, j, nil + } + } + } + + return -1, -1, fmt.Errorf("diskID: %s not found", diskID) +} + // findDiskIndex - returns the i,j'th position of the input `format` against the reference // format, after successful validation. // - i'th position is the set index @@ -198,18 +212,6 @@ func findDiskIndex(refFormat, format *formatXLV3) (int, int, error) { return -1, -1, fmt.Errorf("diskID: %s not found", format.XL.This) } -// connectDisksWithQuorum is same as connectDisks but waits -// for quorum number of formatted disks to be online in any given sets. -func (s *xlSets) connectDisksWithQuorum() { - for { - s.connectDisks() - if s.getOnlineDisksCount() > len(s.endpoints)/2 { - return - } - time.Sleep(100 * time.Millisecond) - } -} - // connectDisks - attempt to connect all the endpoints, loads format // and re-arranges the disks in proper position. func (s *xlSets) connectDisks() { @@ -288,7 +290,7 @@ func (s *xlSets) GetDisks(setIndex int) func() []StorageAPI { const defaultMonitorConnectEndpointInterval = time.Second * 10 // Set to 10 secs. // Initialize new set of erasure coded sets. -func newXLSets(endpoints Endpoints, format *formatXLV3, setCount int, drivesPerSet int) (*xlSets, error) { +func newXLSets(endpoints Endpoints, storageDisks []StorageAPI, format *formatXLV3, setCount int, drivesPerSet int) (*xlSets, error) { endpointStrings := make([]string, len(endpoints)) for i, endpoint := range endpoints { if endpoint.IsLocal { @@ -322,13 +324,25 @@ func newXLSets(endpoints Endpoints, format *formatXLV3, setCount int, drivesPerS // setCount * drivesPerSet with each memory upto blockSizeV1. bp := bpool.NewBytePoolCap(setCount*drivesPerSet, blockSizeV1, blockSizeV1*2) - for i := 0; i < len(format.XL.Sets); i++ { + for i := 0; i < setCount; i++ { s.xlDisks[i] = make([]StorageAPI, drivesPerSet) s.xlLockers[i] = make([]dsync.NetLocker, drivesPerSet) var endpoints Endpoints for j := 0; j < drivesPerSet; j++ { - endpoints = append(endpoints, s.endpoints[i*s.drivesPerSet+j]) + endpoints = append(endpoints, s.endpoints[i*drivesPerSet+j]) + // Rely on endpoints list to initialize, init lockers and available disks. + s.xlLockers[i][j] = newLockAPI(s.endpoints[i*drivesPerSet+j]) + if storageDisks[i*drivesPerSet+j] == nil { + continue + } + if diskID, derr := storageDisks[i*drivesPerSet+j].GetDiskID(); derr == nil { + m, n, err := findDiskIndexByDiskID(format, diskID) + if err != nil { + continue + } + s.xlDisks[m][n] = storageDisks[i*drivesPerSet+j] + } } // Initialize xl objects for a given set. @@ -345,19 +359,8 @@ func newXLSets(endpoints Endpoints, format *formatXLV3, setCount int, drivesPerS GlobalMultipartCleanupInterval, GlobalMultipartExpiry, GlobalServiceDoneCh) } - // Rely on endpoints list to initialize, init lockers. - for i := 0; i < s.setCount; i++ { - for j := 0; j < s.drivesPerSet; j++ { - s.xlLockers[i][j] = newLockAPI(s.endpoints[i*s.drivesPerSet+j]) - } - } - - // Connect disks right away, but wait until we have `format.json` quorum. - s.connectDisksWithQuorum() - // Start the disk monitoring and connect routine. go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval) - go s.maintainMRFList() go s.healMRFRoutine() @@ -421,52 +424,40 @@ func (s *xlSets) StorageInfo(ctx context.Context, local bool) StorageInfo { return storageInfo } - storageDisks, dErrs := initStorageDisksWithErrors(s.endpoints) - defer closeStorageDisks(storageDisks) - - formats, sErrs := loadFormatXLAll(storageDisks) - - combineStorageErrors := func(diskErrs []error, storageErrs []error) []error { - for index, err := range diskErrs { - if err != nil { - storageErrs[index] = err - } - } - return storageErrs - } - - errs := combineStorageErrors(dErrs, sErrs) - drivesInfo := formatsToDrivesInfo(s.endpoints, formats, errs) + s.xlDisksMu.RLock() + storageDisks := s.xlDisks.Copy() + s.xlDisksMu.RUnlock() - // fill all the available/online endpoints - for k, drive := range drivesInfo { - if drive.UUID == "" { - continue - } - if formats[k] == nil { - continue - } - for i := range formats[k].XL.Sets { - for j, driveUUID := range formats[k].XL.Sets[i] { - if driveUUID == drive.UUID { - storageInfo.Backend.Sets[i][j] = drive + for i := 0; i < s.setCount; i++ { + for j := 0; j < s.drivesPerSet; j++ { + if storageDisks[i][j] == nil { + storageInfo.Backend.Sets[i][j] = madmin.DriveInfo{ + State: madmin.DriveStateOffline, + Endpoint: s.endpointStrings[i*s.drivesPerSet+j], } + continue } - } - } - // fill all the offline, missing endpoints as well. - for _, drive := range drivesInfo { - if drive.UUID == "" { - for i := range storageInfo.Backend.Sets { - for j := range storageInfo.Backend.Sets[i] { - if storageInfo.Backend.Sets[i][j].Endpoint == drive.Endpoint { - continue + diskID, err := storageDisks[i][j].GetDiskID() + if err != nil { + if err == errUnformattedDisk { + storageInfo.Backend.Sets[i][j] = madmin.DriveInfo{ + State: madmin.DriveStateUnformatted, + Endpoint: storageDisks[i][j].String(), + UUID: "", } - if storageInfo.Backend.Sets[i][j].Endpoint == "" { - storageInfo.Backend.Sets[i][j] = drive - break + } else { + storageInfo.Backend.Sets[i][j] = madmin.DriveInfo{ + State: madmin.DriveStateCorrupt, + Endpoint: storageDisks[i][j].String(), + UUID: "", } } + continue + } + storageInfo.Backend.Sets[i][j] = madmin.DriveInfo{ + State: madmin.DriveStateOk, + Endpoint: storageDisks[i][j].String(), + UUID: diskID, } } } @@ -1347,7 +1338,7 @@ func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) { } }(storageDisks) - formats, sErrs := loadFormatXLAll(storageDisks) + formats, sErrs := loadFormatXLAll(storageDisks, false) if err = checkFormatXLValues(formats, s.drivesPerSet); err != nil { return err } @@ -1372,14 +1363,33 @@ func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) { // with new format. s.disksConnectDoneCh <- struct{}{} - // Replace the new format. + // Replace with new reference format. s.format = refFormat // Close all existing disks and reconnect all the disks. s.xlDisksMu.Lock() - s.xlDisks.Close() + for _, disk := range storageDisks { + if disk == nil { + continue + } + + diskID, err := disk.GetDiskID() + if err != nil { + continue + } + + m, n, err := findDiskIndexByDiskID(refFormat, diskID) + if err != nil { + continue + } + + if s.xlDisks[m][n] != nil { + s.xlDisks[m][n].Close() + } + + s.xlDisks[m][n] = disk + } s.xlDisksMu.Unlock() - s.connectDisks() // Restart monitoring loop to monitor reformatted disks again. go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval) @@ -1460,7 +1470,7 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe markRootDisksAsDown(storageDisks) - formats, sErrs := loadFormatXLAll(storageDisks) + formats, sErrs := loadFormatXLAll(storageDisks, true) if err = checkFormatXLValues(formats, s.drivesPerSet); err != nil { return madmin.HealResultItem{}, err } @@ -1566,9 +1576,28 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe // Disconnect/relinquish all existing disks, lockers and reconnect the disks, lockers. s.xlDisksMu.Lock() - s.xlDisks.Close() + for _, disk := range storageDisks { + if disk == nil { + continue + } + + diskID, err := disk.GetDiskID() + if err != nil { + continue + } + + m, n, err := findDiskIndexByDiskID(refFormat, diskID) + if err != nil { + continue + } + + if s.xlDisks[m][n] != nil { + s.xlDisks[m][n].Close() + } + + s.xlDisks[m][n] = disk + } s.xlDisksMu.Unlock() - s.connectDisks() // Restart our monitoring loop to start monitoring newly formatted disks. go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval) diff --git a/cmd/xl-sets_test.go b/cmd/xl-sets_test.go index a07b22ed1..3552f8687 100644 --- a/cmd/xl-sets_test.go +++ b/cmd/xl-sets_test.go @@ -76,23 +76,23 @@ func TestNewXLSets(t *testing.T) { } endpoints := mustGetNewEndpoints(erasureDisks...) - _, err := waitForFormatXL(true, endpoints, 1, 0, 16, "") + _, _, err := waitForFormatXL(true, endpoints, 1, 0, 16, "") if err != errInvalidArgument { t.Fatalf("Expecting error, got %s", err) } - _, err = waitForFormatXL(true, nil, 1, 1, 16, "") + _, _, err = waitForFormatXL(true, nil, 1, 1, 16, "") if err != errInvalidArgument { t.Fatalf("Expecting error, got %s", err) } // Initializes all erasure disks - format, err := waitForFormatXL(true, endpoints, 1, 1, 16, "") + storageDisks, format, err := waitForFormatXL(true, endpoints, 1, 1, 16, "") if err != nil { t.Fatalf("Unable to format disks for erasure, %s", err) } - if _, err := newXLSets(endpoints, format, 1, 16); err != nil { + if _, err := newXLSets(endpoints, storageDisks, format, 1, 16); err != nil { t.Fatalf("Unable to initialize erasure") } } diff --git a/cmd/xl-v1.go b/cmd/xl-v1.go index a853c42ed..36718abcb 100644 --- a/cmd/xl-v1.go +++ b/cmd/xl-v1.go @@ -182,7 +182,7 @@ func (xl xlObjects) StorageInfo(ctx context.Context, local bool) StorageInfo { disks = xl.getDisks() } else { for i, d := range xl.getDisks() { - if endpoints[i].IsLocal { + if endpoints[i].IsLocal && d.Hostname() == "" { // Append this local disk since local flag is true disks = append(disks, d) } diff --git a/cmd/xl-zones.go b/cmd/xl-zones.go index 35ed23ef2..d6ae69a47 100644 --- a/cmd/xl-zones.go +++ b/cmd/xl-zones.go @@ -60,12 +60,13 @@ func newXLZones(endpointZones EndpointZones) (ObjectLayer, error) { deploymentID string err error - formats = make([]*formatXLV3, len(endpointZones)) - z = &xlZones{zones: make([]*xlSets, len(endpointZones))} + formats = make([]*formatXLV3, len(endpointZones)) + storageDisks = make([][]StorageAPI, len(endpointZones)) + z = &xlZones{zones: make([]*xlSets, len(endpointZones))} ) local := endpointZones.FirstLocal() for i, ep := range endpointZones { - formats[i], err = waitForFormatXL(local, ep.Endpoints, i+1, + storageDisks[i], formats[i], err = waitForFormatXL(local, ep.Endpoints, i+1, ep.SetCount, ep.DrivesPerSet, deploymentID) if err != nil { return nil, err @@ -73,7 +74,7 @@ func newXLZones(endpointZones EndpointZones) (ObjectLayer, error) { if deploymentID == "" { deploymentID = formats[i].ID } - z.zones[i], err = newXLSets(ep.Endpoints, formats[i], ep.SetCount, ep.DrivesPerSet) + z.zones[i], err = newXLSets(ep.Endpoints, storageDisks[i], formats[i], ep.SetCount, ep.DrivesPerSet) if err != nil { return nil, err } diff --git a/pkg/madmin/heal-commands.go b/pkg/madmin/heal-commands.go index 33d6bbe5c..47910122e 100644 --- a/pkg/madmin/heal-commands.go +++ b/pkg/madmin/heal-commands.go @@ -82,10 +82,11 @@ const ( // Drive state constants const ( - DriveStateOk string = "ok" - DriveStateOffline = "offline" - DriveStateCorrupt = "corrupt" - DriveStateMissing = "missing" + DriveStateOk string = "ok" + DriveStateOffline = "offline" + DriveStateCorrupt = "corrupt" + DriveStateMissing = "missing" + DriveStateUnformatted = "unformatted" // only returned by disk ) // HealDriveInfo - struct for an individual drive info item.