From 7f9498f43f68e6ed37b608eb648438404ddae8e2 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Fri, 18 Sep 2020 12:09:05 -0700 Subject: [PATCH] fix: ignore faulty drives and continue (#10511) drives might return different types of errors handle them individually, and for some errors just log an error and continue --- cmd/bitrot-streaming.go | 4 ++-- cmd/bitrot-whole.go | 4 ++-- cmd/storage-rest-server.go | 35 +++++++++++++++++++++++------------ cmd/xl-storage.go | 22 ++++++++++++---------- cmd/xl-storage_test.go | 12 ++++++------ 5 files changed, 45 insertions(+), 32 deletions(-) diff --git a/cmd/bitrot-streaming.go b/cmd/bitrot-streaming.go index 7c0426000..ddeaf29c2 100644 --- a/cmd/bitrot-streaming.go +++ b/cmd/bitrot-streaming.go @@ -140,8 +140,8 @@ func (b *streamingBitrotReader) ReadAt(buf []byte, offset int64) (int, error) { b.h.Write(buf) if !bytes.Equal(b.h.Sum(nil), b.hashBytes) { - err := &errHashMismatch{fmt.Sprintf("Disk: %s - content hash does not match - expected %s, got %s", - b.disk, hex.EncodeToString(b.hashBytes), hex.EncodeToString(b.h.Sum(nil)))} + err := &errHashMismatch{fmt.Sprintf("Disk: %s -> %s/%s - content hash does not match - expected %s, got %s", + b.disk, b.volume, b.filePath, hex.EncodeToString(b.hashBytes), hex.EncodeToString(b.h.Sum(nil)))} logger.LogIf(GlobalContext, err) return 0, err } diff --git a/cmd/bitrot-whole.go b/cmd/bitrot-whole.go index d30265c36..e8907e2de 100644 --- a/cmd/bitrot-whole.go +++ b/cmd/bitrot-whole.go @@ -71,12 +71,12 @@ func (b *wholeBitrotReader) ReadAt(buf []byte, offset int64) (n int, err error) if b.buf == nil { b.buf = make([]byte, b.tillOffset-offset) if _, err := b.disk.ReadFile(context.TODO(), b.volume, b.filePath, offset, b.buf, b.verifier); err != nil { - logger.LogIf(GlobalContext, fmt.Errorf("Disk: %s returned %w", b.disk, err)) + logger.LogIf(GlobalContext, fmt.Errorf("Disk: %s -> %s/%s returned %w", b.disk, b.volume, b.filePath, err)) return 0, err } } if len(b.buf) < len(buf) { - logger.LogIf(GlobalContext, errLessData) + logger.LogIf(GlobalContext, fmt.Errorf("Disk: %s -> %s/%s returned %w", b.disk, b.volume, b.filePath, errLessData)) return 0, errLessData } n = copy(buf, b.buf) diff --git a/cmd/storage-rest-server.go b/cmd/storage-rest-server.go index 73dd70891..d2b9c9105 100644 --- a/cmd/storage-rest-server.go +++ b/cmd/storage-rest-server.go @@ -830,21 +830,32 @@ func registerStorageRESTHandlers(router *mux.Router, endpointZones EndpointZones } storage, err := newXLStorage(endpoint) if err != nil { - if err == errMinDiskSize { + switch err { + case errMinDiskSize: logger.Fatal(config.ErrUnableToWriteInBackend(err).Hint(err.Error()), "Unable to initialize backend") - } else if err == errUnsupportedDisk { - hint := fmt.Sprintf("'%s' does not support O_DIRECT flags, refusing to use", endpoint.Path) + case errUnsupportedDisk: + hint := fmt.Sprintf("'%s' does not support O_DIRECT flags, MinIO erasure coding requires filesystems with O_DIRECT support", endpoint.Path) logger.Fatal(config.ErrUnsupportedBackend(err).Hint(hint), "Unable to initialize backend") + case errDiskNotDir: + hint := fmt.Sprintf("'%s' MinIO erasure coding needs a directory", endpoint.Path) + logger.Fatal(config.ErrUnableToWriteInBackend(err).Hint(hint), "Unable to initialize backend") + case errFileAccessDenied: + // Show a descriptive error with a hint about how to fix it. + var username string + if u, err := user.Current(); err == nil { + username = u.Username + } else { + username = "" + } + hint := fmt.Sprintf("Run the following command to add write permissions: `sudo chown -R %s %s && sudo chmod u+rxw %s`", username, endpoint.Path, endpoint.Path) + logger.Fatal(config.ErrUnableToWriteInBackend(err).Hint(hint), "Unable to initialize posix backend") + case errFaultyDisk: + logger.LogIf(GlobalContext, fmt.Errorf("disk is faulty at %s, please replace the drive", endpoint)) + case errDiskFull: + logger.LogIf(GlobalContext, fmt.Errorf("disk is already full at %s, incoming I/O will fail", endpoint)) + default: + logger.LogIf(GlobalContext, fmt.Errorf("disk returned an unexpected error at %s, please investigate", endpoint)) } - // Show a descriptive error with a hint about how to fix it. - var username string - if u, err := user.Current(); err == nil { - username = u.Username - } else { - username = "" - } - hint := fmt.Sprintf("Run the following command to add the convenient permissions: `sudo chown -R %s %s && sudo chmod u+rxw %s`", username, endpoint.Path, endpoint.Path) - logger.Fatal(config.ErrUnableToWriteInBackend(err).Hint(hint), "Unable to initialize posix backend") } server := &storageRESTServer{storage: storage} diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 1ec34332a..fea10963f 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -176,15 +176,6 @@ func getValidPath(path string, requireDirectIO bool) (string, error) { return path, errDiskNotDir } - di, err := getDiskInfo(path) - if err != nil { - return path, err - } - - if err = checkDiskMinTotal(di); err != nil { - return path, err - } - // check if backend is writable. var rnd [8]byte _, _ = rand.Read(rnd[:]) @@ -195,6 +186,7 @@ func getValidPath(path string, requireDirectIO bool) (string, error) { var file *os.File if requireDirectIO { + // only erasure coding needs direct-io support file, err = disk.OpenFileDirectIO(fn, os.O_CREATE|os.O_EXCL, 0666) } else { file, err = os.OpenFile(fn, os.O_CREATE|os.O_EXCL, 0666) @@ -204,12 +196,22 @@ func getValidPath(path string, requireDirectIO bool) (string, error) { // if direct i/o failed. if err != nil { if isSysErrInvalidArg(err) { + // O_DIRECT not supported return path, errUnsupportedDisk } - return path, err + return path, osErrToFileErr(err) } file.Close() + di, err := getDiskInfo(path) + if err != nil { + return path, err + } + + if err = checkDiskMinTotal(di); err != nil { + return path, err + } + return path, nil } diff --git a/cmd/xl-storage_test.go b/cmd/xl-storage_test.go index 85259926f..9bb227aef 100644 --- a/cmd/xl-storage_test.go +++ b/cmd/xl-storage_test.go @@ -453,7 +453,7 @@ func TestXLStorageMakeVol(t *testing.T) { // Initialize xlStorage storage layer for permission denied error. _, err = newLocalXLStorage(permDeniedDir) - if err != nil && !os.IsPermission(err) { + if err != nil && err != errFileAccessDenied { t.Fatalf("Unable to initialize xlStorage, %s", err) } @@ -552,7 +552,7 @@ func TestXLStorageDeleteVol(t *testing.T) { // Initialize xlStorage storage layer for permission denied error. _, err = newLocalXLStorage(permDeniedDir) - if err != nil && !os.IsPermission(err) { + if err != nil && err != errFileAccessDenied { t.Fatalf("Unable to initialize xlStorage, %s", err) } @@ -804,7 +804,7 @@ func TestXLStorageXlStorageListDir(t *testing.T) { // Initialize xlStorage storage layer for permission denied error. _, err = newLocalXLStorage(permDeniedDir) - if err != nil && !os.IsPermission(err) { + if err != nil && err != errFileAccessDenied { t.Fatalf("Unable to initialize xlStorage, %s", err) } @@ -928,7 +928,7 @@ func TestXLStorageDeleteFile(t *testing.T) { // Initialize xlStorage storage layer for permission denied error. _, err = newLocalXLStorage(permDeniedDir) - if err != nil && !os.IsPermission(err) { + if err != nil && err != errFileAccessDenied { t.Fatalf("Unable to initialize xlStorage, %s", err) } @@ -1126,7 +1126,7 @@ func TestXLStorageReadFile(t *testing.T) { // Initialize xlStorage storage layer for permission denied error. _, err = newLocalXLStorage(permDeniedDir) - if err != nil && !os.IsPermission(err) { + if err != nil && err != errFileAccessDenied { t.Fatalf("Unable to initialize xlStorage, %s", err) } @@ -1296,7 +1296,7 @@ func TestXLStorageAppendFile(t *testing.T) { var xlStoragePermStorage StorageAPI // Initialize xlStorage storage layer for permission denied error. _, err = newLocalXLStorage(permDeniedDir) - if err != nil && !os.IsPermission(err) { + if err != nil && err != errFileAccessDenied { t.Fatalf("Unable to initialize xlStorage, %s", err) }