diff --git a/cmd/background-newdisks-heal-ops.go b/cmd/background-newdisks-heal-ops.go index 794a65d98..f4c35a072 100644 --- a/cmd/background-newdisks-heal-ops.go +++ b/cmd/background-newdisks-heal-ops.go @@ -73,6 +73,8 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) { } } + go monitorLocalDisksInconsistentAndHeal(ctx, z, bgSeq) + go monitorLocalDisksAndHeal(ctx, z, bgSeq) } @@ -96,6 +98,50 @@ func getLocalDisksToHeal() (disksToHeal Endpoints) { } +func getLocalDisksToHealInconsistent() (refFormats []*formatErasureV3, diskFormats [][]*formatErasureV3, disksToHeal [][]StorageAPI) { + disksToHeal = make([][]StorageAPI, len(globalEndpoints)) + diskFormats = make([][]*formatErasureV3, len(globalEndpoints)) + refFormats = make([]*formatErasureV3, len(globalEndpoints)) + for k, ep := range globalEndpoints { + disksToHeal[k] = make([]StorageAPI, len(ep.Endpoints)) + diskFormats[k] = make([]*formatErasureV3, len(ep.Endpoints)) + formats := make([]*formatErasureV3, len(ep.Endpoints)) + storageDisks, _ := initStorageDisksWithErrors(ep.Endpoints) + for i, disk := range storageDisks { + if disk != nil { + format, err := loadFormatErasure(disk) + if err != nil { + // any error we don't care proceed. + continue + } + formats[i] = format + } + } + refFormat, err := getFormatErasureInQuorum(formats) + if err != nil { + logger.LogIf(GlobalContext, fmt.Errorf("No erasured disks are in quorum or too many disks are offline - please investigate immediately")) + continue + } + // We have obtained reference format - check if disks are inconsistent + for i, format := range formats { + if format == nil { + continue + } + if err := formatErasureV3Check(refFormat, format); err != nil { + if errors.Is(err, errInconsistentDisk) { + // Found inconsistencies - check which disk it is. + if storageDisks[i] != nil && storageDisks[i].IsLocal() { + disksToHeal[k][i] = storageDisks[i] + } + } + } + } + refFormats[k] = refFormat + diskFormats[k] = formats + } + return refFormats, diskFormats, disksToHeal +} + func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) { // Run the background healer globalBackgroundHealRoutine = newHealRoutine() @@ -104,6 +150,36 @@ func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) { globalBackgroundHealState.LaunchNewHealSequence(newBgHealSequence()) } +// monitorLocalDisksInconsistentAndHeal - ensures that inconsistent +// disks are healed appropriately. +func monitorLocalDisksInconsistentAndHeal(ctx context.Context, z *erasureServerSets, bgSeq *healSequence) { + // Perform automatic disk healing when a disk is found to be inconsistent. + for { + select { + case <-ctx.Done(): + return + case <-time.After(defaultMonitorNewDiskInterval): + waitForLowHTTPReq(int32(globalEndpoints.NEndpoints()), time.Second) + + refFormats, diskFormats, localDisksHeal := getLocalDisksToHealInconsistent() + for k := range refFormats { + for j, disk := range localDisksHeal[k] { + if disk == nil { + continue + } + format := diskFormats[k][j].Clone() + format.Erasure.Sets = refFormats[k].Erasure.Sets + if err := saveFormatErasure(disk, format, true); err != nil { + logger.LogIf(ctx, fmt.Errorf("Unable fix inconsistent format for drive %s: %w", disk, err)) + continue + } + globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint()) + } + } + } + } +} + // monitorLocalDisksAndHeal - ensures that detected new disks are healed // 1. Only the concerned erasure set will be listed and healed // 2. Only the node hosting the disk is responsible to perform the heal @@ -149,7 +225,10 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerSets, bgSeq * } // Calculate the set index where the current endpoint belongs + z.serverSets[zoneIdx].erasureDisksMu.RLock() + // Protect reading reference format. setIndex, _, err := findDiskIndex(z.serverSets[zoneIdx].format, format) + z.serverSets[zoneIdx].erasureDisksMu.RUnlock() if err != nil { printEndpointError(endpoint, err, false) continue @@ -173,7 +252,7 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerSets, bgSeq * logger.Info("Healing disk '%s' on %s zone complete", disk, humanize.Ordinal(i+1)) if err := disk.DeleteFile(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix), - healingTrackerFilename); err != nil { + healingTrackerFilename); err != nil && !errors.Is(err, errFileNotFound) { logger.LogIf(ctx, err) continue } diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index b94f45145..61f274bdb 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -83,7 +83,8 @@ type erasureSets struct { setCount, setDriveCount int listTolerancePerSet int - disksConnectEvent chan diskConnectInfo + monitorContextCancel context.CancelFunc + disksConnectEvent chan diskConnectInfo // Distribution algorithm of choice. distributionAlgo string @@ -220,23 +221,18 @@ func (s *erasureSets) connectDisks() { } return } + if endpoint.IsLocal && disk.Healing() { + globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint()) + logger.Info(fmt.Sprintf("Found the drive %s that needs healing, attempting to heal...", disk)) + } s.erasureDisksMu.RLock() setIndex, diskIndex, err := findDiskIndex(s.format, format) s.erasureDisksMu.RUnlock() if err != nil { - if endpoint.IsLocal { - globalBackgroundHealState.pushHealLocalDisks(endpoint) - logger.Info(fmt.Sprintf("Found inconsistent drive %s with format.json, attempting to heal... (%s)", endpoint, err)) - } else { - printEndpointError(endpoint, err, false) - } + printEndpointError(endpoint, err, false) return } disk.SetDiskID(format.Erasure.This) - if endpoint.IsLocal && disk.Healing() { - globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint()) - logger.Info(fmt.Sprintf("Found the drive %s that needs healing, attempting to heal...", disk)) - } s.erasureDisksMu.Lock() if s.erasureDisks[setIndex][diskIndex] != nil { @@ -341,7 +337,7 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto listTolerancePerSet := 3 // By default this is off - if env.Get("MINIO_API_LIST_STRICT_QUORUM", config.EnableOff) == config.EnableOn { + if env.Get("MINIO_API_LIST_STRICT_QUORUM", config.EnableOn) == config.EnableOn { listTolerancePerSet = -1 } @@ -412,8 +408,11 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto GlobalStaleUploadsCleanupInterval, GlobalStaleUploadsExpiry) } + mctx, mctxCancel := context.WithCancel(ctx) + s.monitorContextCancel = mctxCancel + // Start the disk monitoring and connect routine. - go s.monitorAndConnectEndpoints(ctx, defaultMonitorConnectEndpointInterval) + go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval) go s.maintainMRFList() go s.healMRFRoutine() @@ -1155,6 +1154,8 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) return err } + s.monitorContextCancel() // turn-off disk monitoring and replace format. + s.erasureDisksMu.Lock() // Replace with new reference format. @@ -1186,6 +1187,11 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) s.erasureDisksMu.Unlock() + mctx, mctxCancel := context.WithCancel(GlobalContext) + s.monitorContextCancel = mctxCancel + + go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval) + return nil } @@ -1269,14 +1275,6 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H return res, err } - for i, format := range formats { - if format != nil { - if ferr := formatErasureV3Check(refFormat, format); ferr != nil { - sErrs[i] = errUnformattedDisk - } - } - } - // Prepare heal-result res = madmin.HealResultItem{ Type: madmin.HealItemMetadata, @@ -1297,14 +1295,12 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H } if countErrs(sErrs, errUnformattedDisk) == 0 { - // No unformatted disks found disks are either offline - // or online, no healing is required. return res, errNoHealRequired } // Mark all UUIDs which might be offline, use list // of formats to mark them appropriately. - markUUIDsOffline(refFormat, formats) + markUUIDsOffline(refFormat, formats, sErrs) // Initialize a new set of set formats which will be written to disk. newFormatSets := newHealFormatSets(refFormat, s.setCount, s.setDriveCount, formats, sErrs) @@ -1358,6 +1354,8 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H return madmin.HealResultItem{}, err } + s.monitorContextCancel() // turn-off disk monitoring and replace format. + s.erasureDisksMu.Lock() // Replace with new reference format. @@ -1388,6 +1386,10 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H } s.erasureDisksMu.Unlock() + + mctx, mctxCancel := context.WithCancel(GlobalContext) + s.monitorContextCancel = mctxCancel + go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval) } return res, nil diff --git a/cmd/format-erasure.go b/cmd/format-erasure.go index 6d7352195..3cd03c86c 100644 --- a/cmd/format-erasure.go +++ b/cmd/format-erasure.go @@ -131,13 +131,17 @@ func (f *formatErasureV3) Clone() *formatErasureV3 { } // Returns formatErasure.Erasure.Version -func newFormatErasureV3(numSets int, setLen int) *formatErasureV3 { +func newFormatErasureV3(numSets int, setLen int, distributionAlgo string) *formatErasureV3 { format := &formatErasureV3{} format.Version = formatMetaVersionV1 format.Format = formatBackendErasure format.ID = mustGetUUID() format.Erasure.Version = formatErasureVersionV3 - format.Erasure.DistributionAlgo = formatErasureVersionV3DistributionAlgo + if distributionAlgo == "" { + format.Erasure.DistributionAlgo = formatErasureVersionV3DistributionAlgo + } else { + format.Erasure.DistributionAlgo = distributionAlgo + } format.Erasure.Sets = make([][]string, numSets) for i := 0; i < numSets; i++ { @@ -645,8 +649,8 @@ func formatErasureV3Check(reference *formatErasureV3, format *formatErasureV3) e } for j := range reference.Erasure.Sets[i] { if reference.Erasure.Sets[i][j] != format.Erasure.Sets[i][j] { - return fmt.Errorf("UUID on positions %d:%d do not match with, expected %s got %s", - i, j, reference.Erasure.Sets[i][j], format.Erasure.Sets[i][j]) + return fmt.Errorf("UUID on positions %d:%d do not match with, expected %s got %s: (%w)", + i, j, reference.Erasure.Sets[i][j], format.Erasure.Sets[i][j], errInconsistentDisk) } } } @@ -824,8 +828,8 @@ func fixFormatErasureV3(storageDisks []StorageAPI, endpoints Endpoints, formats } // initFormatErasure - save Erasure format configuration on all disks. -func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, setDriveCount int, deploymentID string, sErrs []error) (*formatErasureV3, error) { - format := newFormatErasureV3(setCount, setDriveCount) +func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, setDriveCount int, distributionAlgo string, deploymentID string, sErrs []error) (*formatErasureV3, error) { + format := newFormatErasureV3(setCount, setDriveCount, distributionAlgo) formats := make([]*formatErasureV3, len(storageDisks)) wantAtMost := ecDrivesNoConfig(setDriveCount) @@ -942,12 +946,15 @@ func getOfflineUUIDs(refFormat *formatErasureV3, formats []*formatErasureV3) (of } // Mark all UUIDs that are offline. -func markUUIDsOffline(refFormat *formatErasureV3, formats []*formatErasureV3) { +func markUUIDsOffline(refFormat *formatErasureV3, formats []*formatErasureV3, errs []error) { offlineUUIDs := getOfflineUUIDs(refFormat, formats) for i, set := range refFormat.Erasure.Sets { + setDriveCount := len(set) for j := range set { for _, offlineUUID := range offlineUUIDs { - if refFormat.Erasure.Sets[i][j] == offlineUUID { + if refFormat.Erasure.Sets[i][j] == offlineUUID && + errors.Is(errs[i*setDriveCount+j], errUnformattedDisk) { + // Unformatted drive gets an offline disk UUID refFormat.Erasure.Sets[i][j] = offlineDiskUUID } } diff --git a/cmd/format-erasure_test.go b/cmd/format-erasure_test.go index 2d723b64b..30a8ed859 100644 --- a/cmd/format-erasure_test.go +++ b/cmd/format-erasure_test.go @@ -27,7 +27,7 @@ import ( // Test get offline/online uuids. func TestGetUUIDs(t *testing.T) { - fmtV2 := newFormatErasureV3(4, 16) + fmtV2 := newFormatErasureV3(4, 16, "CRCMOD") formats := make([]*formatErasureV3, 64) for i := 0; i < 4; i++ { @@ -61,7 +61,12 @@ func TestGetUUIDs(t *testing.T) { t.Errorf("Expected offline count '16', got '%d'", gotCount) } - markUUIDsOffline(fmtV2, formats) + var errs []error + for i := 0; i < 4*16; i++ { + errs = append(errs, errUnformattedDisk) + } + + markUUIDsOffline(fmtV2, formats, errs) gotCount = 0 for i := range fmtV2.Erasure.Sets { for j := range fmtV2.Erasure.Sets[i] { @@ -93,7 +98,7 @@ func TestFixFormatV3(t *testing.T) { } } - format := newFormatErasureV3(1, 8) + format := newFormatErasureV3(1, 8, "CRCMOD") formats := make([]*formatErasureV3, 8) for j := 0; j < 8; j++ { @@ -127,7 +132,7 @@ func TestFixFormatV3(t *testing.T) { // tests formatErasureV3ThisEmpty conditions. func TestFormatErasureEmpty(t *testing.T) { - format := newFormatErasureV3(1, 16) + format := newFormatErasureV3(1, 16, "CRCMOD") formats := make([]*formatErasureV3, 16) for j := 0; j < 16; j++ { @@ -326,7 +331,7 @@ func TestGetFormatErasureInQuorumCheck(t *testing.T) { setCount := 2 setDriveCount := 16 - format := newFormatErasureV3(setCount, setDriveCount) + format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD") formats := make([]*formatErasureV3, 32) for i := 0; i < setCount; i++ { @@ -392,7 +397,7 @@ func TestGetErasureID(t *testing.T) { setCount := 2 setDriveCount := 8 - format := newFormatErasureV3(setCount, setDriveCount) + format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD") formats := make([]*formatErasureV3, 16) for i := 0; i < setCount; i++ { @@ -447,7 +452,7 @@ func TestNewFormatSets(t *testing.T) { setCount := 2 setDriveCount := 16 - format := newFormatErasureV3(setCount, setDriveCount) + format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD") formats := make([]*formatErasureV3, 32) errs := make([]error, 32) diff --git a/cmd/global-heal.go b/cmd/global-heal.go index 34a135dfa..8a2cad8b3 100644 --- a/cmd/global-heal.go +++ b/cmd/global-heal.go @@ -120,6 +120,12 @@ func healErasureSet(ctx context.Context, setIndex int, buckets []BucketInfo, dis Name: pathJoin(minioMetaBucket, bucketConfigPrefix), }) // add metadata .minio.sys/ bucket prefixes to heal + // Try to pro-actively heal backend-encrypted file. + bgSeq.sourceCh <- healSource{ + bucket: minioMetaBucket, + object: backendEncryptedFile, + } + // Heal all buckets with all objects for _, bucket := range buckets { // Heal current bucket diff --git a/cmd/object-api-common.go b/cmd/object-api-common.go index 601cb85d2..d26bbefee 100644 --- a/cmd/object-api-common.go +++ b/cmd/object-api-common.go @@ -18,6 +18,7 @@ package cmd import ( "context" + "errors" "sync" "strings" @@ -77,7 +78,11 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string) if !HasSuffix(entryPath, SlashSeparator) { // Delete the file entry. err := storage.DeleteFile(ctx, volume, entryPath) - if err != errDiskNotFound && err != errUnformattedDisk { + if !IsErrIgnored(err, []error{ + errDiskNotFound, + errUnformattedDisk, + errFileNotFound, + }...) { logger.LogIf(ctx, err) } return err @@ -85,11 +90,15 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string) // If it's a directory, list and call delFunc() for each entry. entries, err := storage.ListDir(ctx, volume, entryPath, -1) - // If entryPath prefix never existed, safe to ignore. - if err == errFileNotFound { + // If entryPath prefix never existed, safe to ignore + if errors.Is(err, errFileNotFound) { return nil } else if err != nil { // For any other errors fail. - if err != errDiskNotFound && err != errUnformattedDisk { + if !IsErrIgnored(err, []error{ + errDiskNotFound, + errUnformattedDisk, + errFileNotFound, + }...) { logger.LogIf(ctx, err) } return err @@ -98,7 +107,11 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string) // Entry path is empty, just delete it. if len(entries) == 0 { err = storage.DeleteFile(ctx, volume, entryPath) - if err != errDiskNotFound && err != errUnformattedDisk { + if !IsErrIgnored(err, []error{ + errDiskNotFound, + errUnformattedDisk, + errFileNotFound, + }...) { logger.LogIf(ctx, err) } return err diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index f2e481e00..2368d787f 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -278,7 +278,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, humanize.Ordinal(zoneCount), setCount, setDriveCount) // Initialize erasure code format on disks - format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, deploymentID, sErrs) + format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, "", deploymentID, sErrs) if err != nil { return nil, nil, err } diff --git a/cmd/storage-errors.go b/cmd/storage-errors.go index 9e0cf5995..b93aadf28 100644 --- a/cmd/storage-errors.go +++ b/cmd/storage-errors.go @@ -27,6 +27,9 @@ var errCorruptedFormat = StorageErr("corrupted backend format, specified disk mo // errUnformattedDisk - unformatted disk found. var errUnformattedDisk = StorageErr("unformatted disk found") +// errInconsistentDisk - inconsistent disk found. +var errInconsistentDisk = StorageErr("inconsistent disk found") + // errUnsupporteDisk - when disk does not support O_DIRECT flag. var errUnsupportedDisk = StorageErr("disk does not support O_DIRECT")