From b16781846edfda08500a9a7f7653f1529b00460e Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 3 Aug 2020 18:17:48 -0700 Subject: [PATCH] allow server to start even with corrupted/faulty disks (#10175) --- cmd/config/errors.go | 6 ------ cmd/erasure-healing.go | 16 ++++++++++------ cmd/erasure-sets.go | 22 +--------------------- cmd/erasure.go | 15 ++++++++------- cmd/format-erasure.go | 29 ++++++----------------------- cmd/format-erasure_test.go | 5 +++-- cmd/prepare-storage.go | 6 +----- cmd/storage-errors.go | 4 ++-- cmd/xl-storage.go | 28 +++++++++++++++++++++++++++- 9 files changed, 58 insertions(+), 73 deletions(-) diff --git a/cmd/config/errors.go b/cmd/config/errors.go index 8dceba492..3ab86687e 100644 --- a/cmd/config/errors.go +++ b/cmd/config/errors.go @@ -187,12 +187,6 @@ Example 1: "", ) - ErrCorruptedBackend = newErrFn( - "Unable to use the specified backend, pre-existing content detected", - "Please ensure your disk mount does not have any pre-existing content", - "", - ) - ErrUnableToWriteInBackend = newErrFn( "Unable to write to the backend", "Please ensure MinIO binary has write permissions for the backend", diff --git a/cmd/erasure-healing.go b/cmd/erasure-healing.go index edba922ac..596487bfd 100644 --- a/cmd/erasure-healing.go +++ b/cmd/erasure-healing.go @@ -18,6 +18,7 @@ package cmd import ( "context" + "errors" "fmt" "io" "sync" @@ -197,10 +198,10 @@ func listAllBuckets(storageDisks []StorageAPI, healBuckets map[string]VolInfo) ( // Only heal on disks where we are sure that healing is needed. We can expand // this list as and when we figure out more errors can be added to this list safely. func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime time.Time) bool { - switch erErr { - case errFileNotFound, errFileVersionNotFound: + switch { + case errors.Is(erErr, errFileNotFound) || errors.Is(erErr, errFileVersionNotFound): return true - case errCorruptedFormat: + case errors.Is(erErr, errCorruptedFormat): return true } if erErr == nil { @@ -686,9 +687,9 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid // or when er.meta is not readable in read quorum disks. var notFoundErasureMeta, corruptedErasureMeta int for _, readErr := range errs { - if readErr == errFileNotFound || readErr == errFileVersionNotFound { + if errors.Is(readErr, errFileNotFound) || errors.Is(readErr, errFileVersionNotFound) { notFoundErasureMeta++ - } else if readErr == errCorruptedFormat { + } else if errors.Is(readErr, errCorruptedFormat) { corruptedErasureMeta++ } } @@ -699,7 +700,10 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid // double counting when both parts and er.meta // are not available. if errs[i] != dataErrs[i] { - if dataErrs[i] == errFileNotFound || dataErrs[i] == errFileVersionNotFound { + if IsErr(dataErrs[i], []error{ + errFileNotFound, + errFileVersionNotFound, + }...) { notFoundParts++ } } diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index bee3b11b8..9164a91c9 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -1199,21 +1199,11 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) } }(storageDisks) - formats, sErrs := loadFormatErasureAll(storageDisks, false) + formats, _ := loadFormatErasureAll(storageDisks, false) if err = checkFormatErasureValues(formats, s.drivesPerSet); err != nil { return err } - for index, sErr := range sErrs { - if sErr != nil { - // Look for acceptable heal errors, for any other - // errors we should simply quit and return. - if _, ok := formatHealErrors[sErr]; !ok { - return fmt.Errorf("Disk %s: %w", s.endpoints[index], sErr) - } - } - } - refFormat, err := getFormatErasureInQuorum(formats) if err != nil { return err @@ -1357,16 +1347,6 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H res.After.Drives[k] = madmin.HealDriveInfo(v) } - for index, sErr := range sErrs { - if sErr != nil { - // Look for acceptable heal errors, for any other - // errors we should simply quit and return. - if _, ok := formatHealErrors[sErr]; !ok { - return res, fmt.Errorf("Disk %s: %w", s.endpoints[index], sErr) - } - } - } - if countErrs(sErrs, errUnformattedDisk) == 0 { // No unformatted disks found disks are either offline // or online, no healing is required. diff --git a/cmd/erasure.go b/cmd/erasure.go index 2ab4a191b..ea91f88dc 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -18,6 +18,7 @@ package cmd import ( "context" + "errors" "fmt" "sort" "sync" @@ -89,18 +90,18 @@ func (d byDiskTotal) Less(i, j int) bool { func diskErrToDriveState(err error) (state string) { state = madmin.DriveStateUnknown - switch err { - case errDiskNotFound: + switch { + case errors.Is(err, errDiskNotFound): state = madmin.DriveStateOffline - case errCorruptedFormat: + case errors.Is(err, errCorruptedFormat): state = madmin.DriveStateCorrupt - case errUnformattedDisk: + case errors.Is(err, errUnformattedDisk): state = madmin.DriveStateUnformatted - case errDiskAccessDenied: + case errors.Is(err, errDiskAccessDenied): state = madmin.DriveStatePermission - case errFaultyDisk: + case errors.Is(err, errFaultyDisk): state = madmin.DriveStateFaulty - case nil: + case err == nil: state = madmin.DriveStateOk } return diff --git a/cmd/format-erasure.go b/cmd/format-erasure.go index 990cb047a..dd75c0f06 100644 --- a/cmd/format-erasure.go +++ b/cmd/format-erasure.go @@ -27,7 +27,6 @@ import ( "sync" humanize "github.com/dustin/go-humanize" - "github.com/minio/minio/cmd/config" "github.com/minio/minio/cmd/config/storageclass" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/color" @@ -58,18 +57,6 @@ const ( // Offline disk UUID represents an offline disk. const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff" -// Healing is only supported for the list of errors mentioned here. -var formatHealErrors = map[error]struct{}{ - errUnformattedDisk: {}, - errDiskNotFound: {}, -} - -// List of errors considered critical for disk formatting. -var formatCriticalErrors = map[error]struct{}{ - errCorruptedFormat: {}, - errFaultyDisk: {}, -} - // Used to detect the version of "xl" format. type formatErasureVersionDetect struct { Erasure struct { @@ -415,7 +402,8 @@ func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) { } if !isHiddenDirectories(vols...) { // 'format.json' not found, but we found user data, reject such disks. - return nil, errCorruptedFormat + return nil, fmt.Errorf("some unexpected files '%v' found on %s: %w", + vols, disk, errCorruptedFormat) } // No other data found, its a fresh disk. return nil, errUnformattedDisk @@ -490,7 +478,8 @@ func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatE } else if deploymentID != format.ID { // DeploymentID found earlier doesn't match with the // current format.json's ID. - return "", errCorruptedFormat + return "", fmt.Errorf("Deployment IDs do not match expected %s, got %s: %w", + deploymentID, format.ID, errCorruptedFormat) } } } @@ -500,14 +489,7 @@ func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatE // formatErasureFixDeploymentID - Add deployment id if it is not present. func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) (err error) { // Attempt to load all `format.json` from all disks. - var sErrs []error - formats, sErrs := loadFormatErasureAll(storageDisks, false) - for i, sErr := range sErrs { - if _, ok := formatCriticalErrors[sErr]; ok { - return config.ErrCorruptedBackend(err).Hint(fmt.Sprintf("Clear any pre-existing content on %s", endpoints[i])) - } - } - + formats, _ := loadFormatErasureAll(storageDisks, false) for index := range formats { // If the Erasure sets do not match, set those formats to nil, // We do not have to update the ID on those format.json file. @@ -515,6 +497,7 @@ func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI formats[index] = nil } } + refFormat.ID, err = formatErasureGetDeploymentID(refFormat, formats) if err != nil { return err diff --git a/cmd/format-erasure_test.go b/cmd/format-erasure_test.go index 656d59a63..e64863aef 100644 --- a/cmd/format-erasure_test.go +++ b/cmd/format-erasure_test.go @@ -18,6 +18,7 @@ package cmd import ( "encoding/json" + "errors" "io/ioutil" "os" "reflect" @@ -436,8 +437,8 @@ func TestGetErasureID(t *testing.T) { } formats[2].ID = "bad-id" - if _, err = formatErasureGetDeploymentID(quorumFormat, formats); err != errCorruptedFormat { - t.Fatal("Unexpected Success") + if _, err = formatErasureGetDeploymentID(quorumFormat, formats); !errors.Is(err, errCorruptedFormat) { + t.Fatalf("Unexpect error %s", err) } } diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index 21aa2b68f..f128b4b7a 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -27,7 +27,6 @@ import ( "time" "github.com/dustin/go-humanize" - "github.com/minio/minio/cmd/config" xhttp "github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/sync/errgroup" @@ -253,10 +252,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false) // Check if we have for i, sErr := range sErrs { - if _, ok := formatCriticalErrors[sErr]; ok { - return nil, nil, config.ErrCorruptedBackend(err).Hint(fmt.Sprintf("Clear any pre-existing content on %s", endpoints[i])) - } - // not critical error but still print the error, nonetheless, which is perhaps unhandled + // print the error, nonetheless, which is perhaps unhandled if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 { if sErr != nil { logger.Info("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr) diff --git a/cmd/storage-errors.go b/cmd/storage-errors.go index bcd38f4b0..4b989cf5d 100644 --- a/cmd/storage-errors.go +++ b/cmd/storage-errors.go @@ -19,10 +19,10 @@ package cmd import "os" // errUnexpected - unexpected error, requires manual intervention. -var errUnexpected = StorageErr("Unexpected error, please report this issue at https://github.com/minio/minio/issues") +var errUnexpected = StorageErr("unexpected error, please report this issue at https://github.com/minio/minio/issues") // errCorruptedFormat - corrupted backend format. -var errCorruptedFormat = StorageErr("corrupted backend format, please join https://slack.min.io for assistance") +var errCorruptedFormat = StorageErr("corrupted backend format, specified disk mount has unexpected previous content") // errUnformattedDisk - unformatted disk found. var errUnformattedDisk = StorageErr("unformatted disk found") diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 21c34f65c..9b0d58658 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -505,6 +505,7 @@ func (s *xlStorage) GetDiskID() (string, error) { // Somebody else got the lock first. return diskID, nil } + formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile) fi, err := os.Stat(formatFile) if err != nil { @@ -520,8 +521,12 @@ func (s *xlStorage) GetDiskID() (string, error) { } else if os.IsPermission(err) { return "", errDiskAccessDenied } - return "", err + logger.LogIf(GlobalContext, err) // log unexpected errors + return "", errCorruptedFormat + } else if os.IsPermission(err) { + return "", errDiskAccessDenied } + logger.LogIf(GlobalContext, err) // log unexpected errors return "", errCorruptedFormat } @@ -533,13 +538,34 @@ func (s *xlStorage) GetDiskID() (string, error) { b, err := ioutil.ReadFile(formatFile) if err != nil { + // If the disk is still not initialized. + if os.IsNotExist(err) { + _, err = os.Stat(s.diskPath) + if err == nil { + // Disk is present but missing `format.json` + return "", errUnformattedDisk + } + if os.IsNotExist(err) { + return "", errDiskNotFound + } else if os.IsPermission(err) { + return "", errDiskAccessDenied + } + logger.LogIf(GlobalContext, err) // log unexpected errors + return "", errCorruptedFormat + } else if os.IsPermission(err) { + return "", errDiskAccessDenied + } + logger.LogIf(GlobalContext, err) // log unexpected errors return "", errCorruptedFormat } + format := &formatErasureV3{} var json = jsoniter.ConfigCompatibleWithStandardLibrary if err = json.Unmarshal(b, &format); err != nil { + logger.LogIf(GlobalContext, err) // log unexpected errors return "", errCorruptedFormat } + s.diskID = format.Erasure.This s.formatFileInfo = fi s.formatLastCheck = time.Now()