allow server to start even with corrupted/faulty disks (#10175)

master
Harshavardhana 4 years ago committed by GitHub
parent 5ce82b45da
commit b16781846e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 6
      cmd/config/errors.go
  2. 16
      cmd/erasure-healing.go
  3. 22
      cmd/erasure-sets.go
  4. 15
      cmd/erasure.go
  5. 29
      cmd/format-erasure.go
  6. 5
      cmd/format-erasure_test.go
  7. 6
      cmd/prepare-storage.go
  8. 4
      cmd/storage-errors.go
  9. 28
      cmd/xl-storage.go

@ -187,12 +187,6 @@ Example 1:
"", "",
) )
ErrCorruptedBackend = newErrFn(
"Unable to use the specified backend, pre-existing content detected",
"Please ensure your disk mount does not have any pre-existing content",
"",
)
ErrUnableToWriteInBackend = newErrFn( ErrUnableToWriteInBackend = newErrFn(
"Unable to write to the backend", "Unable to write to the backend",
"Please ensure MinIO binary has write permissions for the backend", "Please ensure MinIO binary has write permissions for the backend",

@ -18,6 +18,7 @@ package cmd
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"io" "io"
"sync" "sync"
@ -197,10 +198,10 @@ func listAllBuckets(storageDisks []StorageAPI, healBuckets map[string]VolInfo) (
// Only heal on disks where we are sure that healing is needed. We can expand // Only heal on disks where we are sure that healing is needed. We can expand
// this list as and when we figure out more errors can be added to this list safely. // this list as and when we figure out more errors can be added to this list safely.
func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime time.Time) bool { func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime time.Time) bool {
switch erErr { switch {
case errFileNotFound, errFileVersionNotFound: case errors.Is(erErr, errFileNotFound) || errors.Is(erErr, errFileVersionNotFound):
return true return true
case errCorruptedFormat: case errors.Is(erErr, errCorruptedFormat):
return true return true
} }
if erErr == nil { if erErr == nil {
@ -686,9 +687,9 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
// or when er.meta is not readable in read quorum disks. // or when er.meta is not readable in read quorum disks.
var notFoundErasureMeta, corruptedErasureMeta int var notFoundErasureMeta, corruptedErasureMeta int
for _, readErr := range errs { for _, readErr := range errs {
if readErr == errFileNotFound || readErr == errFileVersionNotFound { if errors.Is(readErr, errFileNotFound) || errors.Is(readErr, errFileVersionNotFound) {
notFoundErasureMeta++ notFoundErasureMeta++
} else if readErr == errCorruptedFormat { } else if errors.Is(readErr, errCorruptedFormat) {
corruptedErasureMeta++ corruptedErasureMeta++
} }
} }
@ -699,7 +700,10 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
// double counting when both parts and er.meta // double counting when both parts and er.meta
// are not available. // are not available.
if errs[i] != dataErrs[i] { if errs[i] != dataErrs[i] {
if dataErrs[i] == errFileNotFound || dataErrs[i] == errFileVersionNotFound { if IsErr(dataErrs[i], []error{
errFileNotFound,
errFileVersionNotFound,
}...) {
notFoundParts++ notFoundParts++
} }
} }

@ -1199,21 +1199,11 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error)
} }
}(storageDisks) }(storageDisks)
formats, sErrs := loadFormatErasureAll(storageDisks, false) formats, _ := loadFormatErasureAll(storageDisks, false)
if err = checkFormatErasureValues(formats, s.drivesPerSet); err != nil { if err = checkFormatErasureValues(formats, s.drivesPerSet); err != nil {
return err return err
} }
for index, sErr := range sErrs {
if sErr != nil {
// Look for acceptable heal errors, for any other
// errors we should simply quit and return.
if _, ok := formatHealErrors[sErr]; !ok {
return fmt.Errorf("Disk %s: %w", s.endpoints[index], sErr)
}
}
}
refFormat, err := getFormatErasureInQuorum(formats) refFormat, err := getFormatErasureInQuorum(formats)
if err != nil { if err != nil {
return err return err
@ -1357,16 +1347,6 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
res.After.Drives[k] = madmin.HealDriveInfo(v) res.After.Drives[k] = madmin.HealDriveInfo(v)
} }
for index, sErr := range sErrs {
if sErr != nil {
// Look for acceptable heal errors, for any other
// errors we should simply quit and return.
if _, ok := formatHealErrors[sErr]; !ok {
return res, fmt.Errorf("Disk %s: %w", s.endpoints[index], sErr)
}
}
}
if countErrs(sErrs, errUnformattedDisk) == 0 { if countErrs(sErrs, errUnformattedDisk) == 0 {
// No unformatted disks found disks are either offline // No unformatted disks found disks are either offline
// or online, no healing is required. // or online, no healing is required.

@ -18,6 +18,7 @@ package cmd
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"sort" "sort"
"sync" "sync"
@ -89,18 +90,18 @@ func (d byDiskTotal) Less(i, j int) bool {
func diskErrToDriveState(err error) (state string) { func diskErrToDriveState(err error) (state string) {
state = madmin.DriveStateUnknown state = madmin.DriveStateUnknown
switch err { switch {
case errDiskNotFound: case errors.Is(err, errDiskNotFound):
state = madmin.DriveStateOffline state = madmin.DriveStateOffline
case errCorruptedFormat: case errors.Is(err, errCorruptedFormat):
state = madmin.DriveStateCorrupt state = madmin.DriveStateCorrupt
case errUnformattedDisk: case errors.Is(err, errUnformattedDisk):
state = madmin.DriveStateUnformatted state = madmin.DriveStateUnformatted
case errDiskAccessDenied: case errors.Is(err, errDiskAccessDenied):
state = madmin.DriveStatePermission state = madmin.DriveStatePermission
case errFaultyDisk: case errors.Is(err, errFaultyDisk):
state = madmin.DriveStateFaulty state = madmin.DriveStateFaulty
case nil: case err == nil:
state = madmin.DriveStateOk state = madmin.DriveStateOk
} }
return return

@ -27,7 +27,6 @@ import (
"sync" "sync"
humanize "github.com/dustin/go-humanize" humanize "github.com/dustin/go-humanize"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/cmd/config/storageclass" "github.com/minio/minio/cmd/config/storageclass"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/color" "github.com/minio/minio/pkg/color"
@ -58,18 +57,6 @@ const (
// Offline disk UUID represents an offline disk. // Offline disk UUID represents an offline disk.
const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff" const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff"
// Healing is only supported for the list of errors mentioned here.
var formatHealErrors = map[error]struct{}{
errUnformattedDisk: {},
errDiskNotFound: {},
}
// List of errors considered critical for disk formatting.
var formatCriticalErrors = map[error]struct{}{
errCorruptedFormat: {},
errFaultyDisk: {},
}
// Used to detect the version of "xl" format. // Used to detect the version of "xl" format.
type formatErasureVersionDetect struct { type formatErasureVersionDetect struct {
Erasure struct { Erasure struct {
@ -415,7 +402,8 @@ func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) {
} }
if !isHiddenDirectories(vols...) { if !isHiddenDirectories(vols...) {
// 'format.json' not found, but we found user data, reject such disks. // 'format.json' not found, but we found user data, reject such disks.
return nil, errCorruptedFormat return nil, fmt.Errorf("some unexpected files '%v' found on %s: %w",
vols, disk, errCorruptedFormat)
} }
// No other data found, its a fresh disk. // No other data found, its a fresh disk.
return nil, errUnformattedDisk return nil, errUnformattedDisk
@ -490,7 +478,8 @@ func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatE
} else if deploymentID != format.ID { } else if deploymentID != format.ID {
// DeploymentID found earlier doesn't match with the // DeploymentID found earlier doesn't match with the
// current format.json's ID. // current format.json's ID.
return "", errCorruptedFormat return "", fmt.Errorf("Deployment IDs do not match expected %s, got %s: %w",
deploymentID, format.ID, errCorruptedFormat)
} }
} }
} }
@ -500,14 +489,7 @@ func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatE
// formatErasureFixDeploymentID - Add deployment id if it is not present. // formatErasureFixDeploymentID - Add deployment id if it is not present.
func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) (err error) { func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) (err error) {
// Attempt to load all `format.json` from all disks. // Attempt to load all `format.json` from all disks.
var sErrs []error formats, _ := loadFormatErasureAll(storageDisks, false)
formats, sErrs := loadFormatErasureAll(storageDisks, false)
for i, sErr := range sErrs {
if _, ok := formatCriticalErrors[sErr]; ok {
return config.ErrCorruptedBackend(err).Hint(fmt.Sprintf("Clear any pre-existing content on %s", endpoints[i]))
}
}
for index := range formats { for index := range formats {
// If the Erasure sets do not match, set those formats to nil, // If the Erasure sets do not match, set those formats to nil,
// We do not have to update the ID on those format.json file. // We do not have to update the ID on those format.json file.
@ -515,6 +497,7 @@ func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI
formats[index] = nil formats[index] = nil
} }
} }
refFormat.ID, err = formatErasureGetDeploymentID(refFormat, formats) refFormat.ID, err = formatErasureGetDeploymentID(refFormat, formats)
if err != nil { if err != nil {
return err return err

@ -18,6 +18,7 @@ package cmd
import ( import (
"encoding/json" "encoding/json"
"errors"
"io/ioutil" "io/ioutil"
"os" "os"
"reflect" "reflect"
@ -436,8 +437,8 @@ func TestGetErasureID(t *testing.T) {
} }
formats[2].ID = "bad-id" formats[2].ID = "bad-id"
if _, err = formatErasureGetDeploymentID(quorumFormat, formats); err != errCorruptedFormat { if _, err = formatErasureGetDeploymentID(quorumFormat, formats); !errors.Is(err, errCorruptedFormat) {
t.Fatal("Unexpected Success") t.Fatalf("Unexpect error %s", err)
} }
} }

@ -27,7 +27,6 @@ import (
"time" "time"
"github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
"github.com/minio/minio/cmd/config"
xhttp "github.com/minio/minio/cmd/http" xhttp "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/sync/errgroup" "github.com/minio/minio/pkg/sync/errgroup"
@ -253,10 +252,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false) formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
// Check if we have // Check if we have
for i, sErr := range sErrs { for i, sErr := range sErrs {
if _, ok := formatCriticalErrors[sErr]; ok { // print the error, nonetheless, which is perhaps unhandled
return nil, nil, config.ErrCorruptedBackend(err).Hint(fmt.Sprintf("Clear any pre-existing content on %s", endpoints[i]))
}
// not critical error but still print the error, nonetheless, which is perhaps unhandled
if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 { if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 {
if sErr != nil { if sErr != nil {
logger.Info("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr) logger.Info("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr)

@ -19,10 +19,10 @@ package cmd
import "os" import "os"
// errUnexpected - unexpected error, requires manual intervention. // errUnexpected - unexpected error, requires manual intervention.
var errUnexpected = StorageErr("Unexpected error, please report this issue at https://github.com/minio/minio/issues") var errUnexpected = StorageErr("unexpected error, please report this issue at https://github.com/minio/minio/issues")
// errCorruptedFormat - corrupted backend format. // errCorruptedFormat - corrupted backend format.
var errCorruptedFormat = StorageErr("corrupted backend format, please join https://slack.min.io for assistance") var errCorruptedFormat = StorageErr("corrupted backend format, specified disk mount has unexpected previous content")
// errUnformattedDisk - unformatted disk found. // errUnformattedDisk - unformatted disk found.
var errUnformattedDisk = StorageErr("unformatted disk found") var errUnformattedDisk = StorageErr("unformatted disk found")

@ -505,6 +505,7 @@ func (s *xlStorage) GetDiskID() (string, error) {
// Somebody else got the lock first. // Somebody else got the lock first.
return diskID, nil return diskID, nil
} }
formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile) formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile)
fi, err := os.Stat(formatFile) fi, err := os.Stat(formatFile)
if err != nil { if err != nil {
@ -520,8 +521,12 @@ func (s *xlStorage) GetDiskID() (string, error) {
} else if os.IsPermission(err) { } else if os.IsPermission(err) {
return "", errDiskAccessDenied return "", errDiskAccessDenied
} }
return "", err logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat
} else if os.IsPermission(err) {
return "", errDiskAccessDenied
} }
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat return "", errCorruptedFormat
} }
@ -533,13 +538,34 @@ func (s *xlStorage) GetDiskID() (string, error) {
b, err := ioutil.ReadFile(formatFile) b, err := ioutil.ReadFile(formatFile)
if err != nil { if err != nil {
// If the disk is still not initialized.
if os.IsNotExist(err) {
_, err = os.Stat(s.diskPath)
if err == nil {
// Disk is present but missing `format.json`
return "", errUnformattedDisk
}
if os.IsNotExist(err) {
return "", errDiskNotFound
} else if os.IsPermission(err) {
return "", errDiskAccessDenied
}
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat
} else if os.IsPermission(err) {
return "", errDiskAccessDenied
}
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat return "", errCorruptedFormat
} }
format := &formatErasureV3{} format := &formatErasureV3{}
var json = jsoniter.ConfigCompatibleWithStandardLibrary var json = jsoniter.ConfigCompatibleWithStandardLibrary
if err = json.Unmarshal(b, &format); err != nil { if err = json.Unmarshal(b, &format); err != nil {
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat return "", errCorruptedFormat
} }
s.diskID = format.Erasure.This s.diskID = format.Erasure.This
s.formatFileInfo = fi s.formatFileInfo = fi
s.formatLastCheck = time.Now() s.formatLastCheck = time.Now()

Loading…
Cancel
Save