Return always a default heal item upon unexpected error (#6556)

Never return an empty result item even upon error,
choose all the default values and based on the errors
make sure to send right result reply.
master
Harshavardhana 6 years ago committed by Dee Koder
parent 274b35154c
commit 223967fd32
  1. 12
      cmd/xl-v1-healing-common.go
  2. 12
      cmd/xl-v1-healing-common_test.go
  3. 75
      cmd/xl-v1-healing.go

@ -158,17 +158,15 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error
// //
// - slice of errors about the state of data files on disk - can have // - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error. // a not-found error or a hash-mismatch error.
//
// - non-nil error if any of the disks failed unexpectedly (i.e. error
// other than file not found and not a checksum error).
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
object string) ([]StorageAPI, []error, error) { object string) ([]StorageAPI, []error) {
availableDisks := make([]StorageAPI, len(onlineDisks)) availableDisks := make([]StorageAPI, len(onlineDisks))
buffer := []byte{} buffer := []byte{}
dataErrs := make([]error, len(onlineDisks)) dataErrs := make([]error, len(onlineDisks))
for i, onlineDisk := range onlineDisks { for i, onlineDisk := range onlineDisks {
if onlineDisk == nil { if onlineDisk == nil {
dataErrs[i] = errDiskNotFound
continue continue
} }
@ -196,8 +194,8 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
break break
case hErr != nil: case hErr != nil:
logger.LogIf(ctx, hErr) logger.LogIf(ctx, hErr)
// abort on unhandled errors dataErrs[i] = hErr
return nil, nil, hErr break
} }
} }
@ -207,5 +205,5 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
} }
} }
return availableDisks, dataErrs, nil return availableDisks, dataErrs
} }

@ -258,7 +258,7 @@ func TestListOnlineDisks(t *testing.T) {
i+1, test.expectedTime, modTime) i+1, test.expectedTime, modTime)
} }
availableDisks, newErrs, _ := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object) availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object)
test.errs = newErrs test.errs = newErrs
if test._tamperBackend != noTamper { if test._tamperBackend != noTamper {
@ -318,10 +318,7 @@ func TestDisksWithAllParts(t *testing.T) {
} }
errs = make([]error, len(xlDisks)) errs = make([]error, len(xlDisks))
filteredDisks, errs, err := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
if err != nil {
t.Errorf("Unexpected error: %s", err)
}
if len(filteredDisks) != len(xlDisks) { if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
@ -353,10 +350,7 @@ func TestDisksWithAllParts(t *testing.T) {
t.Fatalf("Failed to read xl meta data %v", err) t.Fatalf("Failed to read xl meta data %v", err)
} }
filteredDisks, errs, err = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
if err != nil {
t.Errorf("Unexpected error: %s", err)
}
if len(filteredDisks) != len(xlDisks) { if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) t.Errorf("Unexpected number of disks: %d", len(filteredDisks))

@ -295,7 +295,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
// continue to return filled madmin.HealResultItem struct which includes info // continue to return filled madmin.HealResultItem struct which includes info
// on what disks the file is available etc. // on what disks the file is available etc.
if reducedErr := reduceReadQuorumErrs(ctx, errs, nil, quorum); reducedErr != nil { if reducedErr := reduceReadQuorumErrs(ctx, errs, nil, quorum); reducedErr != nil {
return result, toObjectErr(reducedErr, bucket, object) return defaultHealResult(storageDisks, errs, bucket, object), toObjectErr(reducedErr, bucket, object)
} }
} }
@ -304,10 +304,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
// List of disks having all parts as per latest xl.json. // List of disks having all parts as per latest xl.json.
availableDisks, dataErrs, aErr := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object) availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object)
if aErr != nil {
return result, toObjectErr(aErr, bucket, object)
}
// Initialize heal result object // Initialize heal result object
result = madmin.HealResultItem{ result = madmin.HealResultItem{
@ -338,7 +335,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
result.ObjectSize = partsMetadata[i].Stat.Size result.ObjectSize = partsMetadata[i].Stat.Size
result.ParityBlocks = partsMetadata[i].Erasure.ParityBlocks result.ParityBlocks = partsMetadata[i].Erasure.ParityBlocks
result.DataBlocks = partsMetadata[i].Erasure.DataBlocks result.DataBlocks = partsMetadata[i].Erasure.DataBlocks
case errs[i] == errDiskNotFound: case errs[i] == errDiskNotFound, dataErrs[i] == errDiskNotFound:
driveState = madmin.DriveStateOffline driveState = madmin.DriveStateOffline
case errs[i] == errFileNotFound, errs[i] == errVolumeNotFound: case errs[i] == errFileNotFound, errs[i] == errVolumeNotFound:
fallthrough fallthrough
@ -388,6 +385,10 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
// If less than read quorum number of disks have all the parts // If less than read quorum number of disks have all the parts
// of the data, we can't reconstruct the erasure-coded data. // of the data, we can't reconstruct the erasure-coded data.
if numAvailableDisks < quorum { if numAvailableDisks < quorum {
// Default to most common configuration for erasure
// blocks upon returning quorum error.
result.ParityBlocks = len(storageDisks) / 2
result.DataBlocks = len(storageDisks) / 2
return result, toObjectErr(errXLReadQuorum, bucket, object) return result, toObjectErr(errXLReadQuorum, bucket, object)
} }
@ -512,7 +513,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
} }
// Generate and write `xl.json` generated from other disks. // Generate and write `xl.json` generated from other disks.
outDatedDisks, aErr = writeUniqueXLMetadata(ctx, outDatedDisks, minioMetaTmpBucket, tmpID, outDatedDisks, aErr := writeUniqueXLMetadata(ctx, outDatedDisks, minioMetaTmpBucket, tmpID,
partsMetadata, diskCount(outDatedDisks)) partsMetadata, diskCount(outDatedDisks))
if aErr != nil { if aErr != nil {
return result, toObjectErr(aErr, bucket, object) return result, toObjectErr(aErr, bucket, object)
@ -602,6 +603,58 @@ func (xl xlObjects) healObjectDir(ctx context.Context, bucket, object string, dr
return hr, nil return hr, nil
} }
// Populates default heal result item entries with possible values when we are returning prematurely.
// This is to ensure that in any circumstance we are not returning empty arrays with wrong values.
func defaultHealResult(storageDisks []StorageAPI, errs []error, bucket, object string) madmin.HealResultItem {
// Initialize heal result object
result := madmin.HealResultItem{
Type: madmin.HealItemObject,
Bucket: bucket,
Object: object,
DiskCount: len(storageDisks),
// Initialize object size to -1, so we can detect if we are
// unable to reliably find the object size.
ObjectSize: -1,
}
for index, disk := range storageDisks {
if disk == nil {
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
UUID: "",
State: madmin.DriveStateOffline,
})
result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{
UUID: "",
State: madmin.DriveStateOffline,
})
continue
}
drive := disk.String()
driveState := madmin.DriveStateCorrupt
switch errs[index] {
case errFileNotFound, errVolumeNotFound:
driveState = madmin.DriveStateMissing
}
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
UUID: "",
Endpoint: drive,
State: driveState,
})
result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{
UUID: "",
Endpoint: drive,
State: driveState,
})
}
// Default to most common configuration for erasure blocks.
result.ParityBlocks = len(storageDisks) / 2
result.DataBlocks = len(storageDisks) / 2
return result
}
// HealObject - heal the given object. // HealObject - heal the given object.
// //
// FIXME: If an object object was deleted and one disk was down, // FIXME: If an object object was deleted and one disk was down,
@ -624,19 +677,21 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
return xl.healObjectDir(healCtx, bucket, object, dryRun) return xl.healObjectDir(healCtx, bucket, object, dryRun)
} }
storageDisks := xl.getDisks()
// FIXME: Metadata is read again in the healObject() call below. // FIXME: Metadata is read again in the healObject() call below.
// Read metadata files from all the disks // Read metadata files from all the disks
partsMetadata, errs := readAllXLMetadata(healCtx, xl.getDisks(), bucket, object) partsMetadata, errs := readAllXLMetadata(healCtx, storageDisks, bucket, object)
latestXLMeta, err := getLatestXLMeta(healCtx, partsMetadata, errs) latestXLMeta, err := getLatestXLMeta(healCtx, partsMetadata, errs)
if err != nil { if err != nil {
return hr, toObjectErr(err, bucket, object) return defaultHealResult(storageDisks, errs, bucket, object), toObjectErr(err, bucket, object)
} }
// Lock the object before healing. // Lock the object before healing.
objectLock := xl.nsMutex.NewNSLock(bucket, object) objectLock := xl.nsMutex.NewNSLock(bucket, object)
if lerr := objectLock.GetRLock(globalHealingTimeout); lerr != nil { if lerr := objectLock.GetRLock(globalHealingTimeout); lerr != nil {
return hr, lerr return defaultHealResult(storageDisks, errs, bucket, object), lerr
} }
defer objectLock.RUnlock() defer objectLock.RUnlock()

Loading…
Cancel
Save