diff --git a/cmd/xl-v1-healing-common.go b/cmd/xl-v1-healing-common.go index 5a8ad2496..6d42904ab 100644 --- a/cmd/xl-v1-healing-common.go +++ b/cmd/xl-v1-healing-common.go @@ -158,17 +158,15 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error // // - slice of errors about the state of data files on disk - can have // a not-found error or a hash-mismatch error. -// -// - non-nil error if any of the disks failed unexpectedly (i.e. error -// other than file not found and not a checksum error). func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, - object string) ([]StorageAPI, []error, error) { + object string) ([]StorageAPI, []error) { availableDisks := make([]StorageAPI, len(onlineDisks)) buffer := []byte{} dataErrs := make([]error, len(onlineDisks)) for i, onlineDisk := range onlineDisks { if onlineDisk == nil { + dataErrs[i] = errDiskNotFound continue } @@ -196,8 +194,8 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad break case hErr != nil: logger.LogIf(ctx, hErr) - // abort on unhandled errors - return nil, nil, hErr + dataErrs[i] = hErr + break } } @@ -207,5 +205,5 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad } } - return availableDisks, dataErrs, nil + return availableDisks, dataErrs } diff --git a/cmd/xl-v1-healing-common_test.go b/cmd/xl-v1-healing-common_test.go index 096a1f07d..983e27744 100644 --- a/cmd/xl-v1-healing-common_test.go +++ b/cmd/xl-v1-healing-common_test.go @@ -258,7 +258,7 @@ func TestListOnlineDisks(t *testing.T) { i+1, test.expectedTime, modTime) } - availableDisks, newErrs, _ := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object) + availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object) test.errs = newErrs if test._tamperBackend != noTamper { @@ -318,10 +318,7 @@ func TestDisksWithAllParts(t *testing.T) { } errs = make([]error, len(xlDisks)) - filteredDisks, errs, err := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) - if err != nil { - t.Errorf("Unexpected error: %s", err) - } + filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) if len(filteredDisks) != len(xlDisks) { t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) @@ -353,10 +350,7 @@ func TestDisksWithAllParts(t *testing.T) { t.Fatalf("Failed to read xl meta data %v", err) } - filteredDisks, errs, err = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) - if err != nil { - t.Errorf("Unexpected error: %s", err) - } + filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) if len(filteredDisks) != len(xlDisks) { t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) diff --git a/cmd/xl-v1-healing.go b/cmd/xl-v1-healing.go index 513243b4b..e6049fe3a 100644 --- a/cmd/xl-v1-healing.go +++ b/cmd/xl-v1-healing.go @@ -295,7 +295,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o // continue to return filled madmin.HealResultItem struct which includes info // on what disks the file is available etc. if reducedErr := reduceReadQuorumErrs(ctx, errs, nil, quorum); reducedErr != nil { - return result, toObjectErr(reducedErr, bucket, object) + return defaultHealResult(storageDisks, errs, bucket, object), toObjectErr(reducedErr, bucket, object) } } @@ -304,10 +304,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) // List of disks having all parts as per latest xl.json. - availableDisks, dataErrs, aErr := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object) - if aErr != nil { - return result, toObjectErr(aErr, bucket, object) - } + availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object) // Initialize heal result object result = madmin.HealResultItem{ @@ -338,7 +335,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o result.ObjectSize = partsMetadata[i].Stat.Size result.ParityBlocks = partsMetadata[i].Erasure.ParityBlocks result.DataBlocks = partsMetadata[i].Erasure.DataBlocks - case errs[i] == errDiskNotFound: + case errs[i] == errDiskNotFound, dataErrs[i] == errDiskNotFound: driveState = madmin.DriveStateOffline case errs[i] == errFileNotFound, errs[i] == errVolumeNotFound: fallthrough @@ -388,6 +385,10 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o // If less than read quorum number of disks have all the parts // of the data, we can't reconstruct the erasure-coded data. if numAvailableDisks < quorum { + // Default to most common configuration for erasure + // blocks upon returning quorum error. + result.ParityBlocks = len(storageDisks) / 2 + result.DataBlocks = len(storageDisks) / 2 return result, toObjectErr(errXLReadQuorum, bucket, object) } @@ -512,7 +513,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o } // Generate and write `xl.json` generated from other disks. - outDatedDisks, aErr = writeUniqueXLMetadata(ctx, outDatedDisks, minioMetaTmpBucket, tmpID, + outDatedDisks, aErr := writeUniqueXLMetadata(ctx, outDatedDisks, minioMetaTmpBucket, tmpID, partsMetadata, diskCount(outDatedDisks)) if aErr != nil { return result, toObjectErr(aErr, bucket, object) @@ -602,6 +603,58 @@ func (xl xlObjects) healObjectDir(ctx context.Context, bucket, object string, dr return hr, nil } +// Populates default heal result item entries with possible values when we are returning prematurely. +// This is to ensure that in any circumstance we are not returning empty arrays with wrong values. +func defaultHealResult(storageDisks []StorageAPI, errs []error, bucket, object string) madmin.HealResultItem { + // Initialize heal result object + result := madmin.HealResultItem{ + Type: madmin.HealItemObject, + Bucket: bucket, + Object: object, + DiskCount: len(storageDisks), + + // Initialize object size to -1, so we can detect if we are + // unable to reliably find the object size. + ObjectSize: -1, + } + + for index, disk := range storageDisks { + if disk == nil { + result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ + UUID: "", + State: madmin.DriveStateOffline, + }) + result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{ + UUID: "", + State: madmin.DriveStateOffline, + }) + continue + } + drive := disk.String() + driveState := madmin.DriveStateCorrupt + switch errs[index] { + case errFileNotFound, errVolumeNotFound: + driveState = madmin.DriveStateMissing + } + result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ + UUID: "", + Endpoint: drive, + State: driveState, + }) + result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{ + UUID: "", + Endpoint: drive, + State: driveState, + }) + } + + // Default to most common configuration for erasure blocks. + result.ParityBlocks = len(storageDisks) / 2 + result.DataBlocks = len(storageDisks) / 2 + + return result +} + // HealObject - heal the given object. // // FIXME: If an object object was deleted and one disk was down, @@ -624,19 +677,21 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu return xl.healObjectDir(healCtx, bucket, object, dryRun) } + storageDisks := xl.getDisks() + // FIXME: Metadata is read again in the healObject() call below. // Read metadata files from all the disks - partsMetadata, errs := readAllXLMetadata(healCtx, xl.getDisks(), bucket, object) + partsMetadata, errs := readAllXLMetadata(healCtx, storageDisks, bucket, object) latestXLMeta, err := getLatestXLMeta(healCtx, partsMetadata, errs) if err != nil { - return hr, toObjectErr(err, bucket, object) + return defaultHealResult(storageDisks, errs, bucket, object), toObjectErr(err, bucket, object) } // Lock the object before healing. objectLock := xl.nsMutex.NewNSLock(bucket, object) if lerr := objectLock.GetRLock(globalHealingTimeout); lerr != nil { - return hr, lerr + return defaultHealResult(storageDisks, errs, bucket, object), lerr } defer objectLock.RUnlock()