heal: Check if all parts are available and valid (#3967)

In the algorithm to check if an object requires healing, in addition to
checking if all disks have xl.json present we should check if all parts
of the object are present and have valid blake2b checksums.

Also fixed a minor compilation error in heal-objects-list.go.
master
Krishnan Parthasarathi 8 years ago committed by Harshavardhana
parent 8c0ce2fee9
commit c27ece409b
  1. 39
      cmd/xl-v1-healing-common.go
  2. 2
      cmd/xl-v1-healing.go
  3. 6
      cmd/xl-v1-list-objects-heal.go
  4. 4
      pkg/madmin/examples/heal-objects-list.go

@ -141,19 +141,38 @@ func outDatedDisks(disks, latestDisks []StorageAPI, errs []error, partsMetadata
}
// Returns if the object should be healed.
func xlShouldHeal(partsMetadata []xlMetaV1, errs []error) bool {
modTime, _ := commonTime(listObjectModtimes(partsMetadata, errs))
for index := range partsMetadata {
if errs[index] == errDiskNotFound {
continue
}
if errs[index] != nil {
func xlShouldHeal(disks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, object string) bool {
onlineDisks, _ := listOnlineDisks(disks, partsMetadata,
errs)
// Return true even if one of the disks have stale data.
for _, disk := range onlineDisks {
if disk == nil {
return true
}
if modTime != partsMetadata[index].Stat.ModTime {
}
// Check if all parts of an object are available and their
// checksums are valid.
availableDisks, _, err := disksWithAllParts(onlineDisks, partsMetadata,
errs, bucket, object)
if err != nil {
// Note: This error is due to failure of blake2b
// checksum computation of a part. It doesn't clearly
// indicate if the object needs healing. At this
// juncture healing could fail with the same
// error. So, we choose to return that there is no
// need to heal.
return false
}
// Return true even if one disk has xl.json or one or more
// parts missing.
for _, disk := range availableDisks {
if disk == nil {
return true
}
}
return false
}
@ -232,7 +251,8 @@ func disksWithAllParts(onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs
blakeBytes, hErr := hashSum(onlineDisk, bucket, partPath, hash)
if hErr == errFileNotFound {
errs[index] = errFileNotFound
continue
availableDisks[index] = nil
break
}
if hErr != nil && hErr != errFileNotFound {
@ -246,6 +266,7 @@ func disksWithAllParts(onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs
// healing.
if blakeSum != partChecksum {
errs[index] = errFileNotFound
availableDisks[index] = nil
break
}
availableDisks[index] = onlineDisk

@ -322,7 +322,7 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
return toObjectErr(reducedErr, bucket, object)
}
if !xlShouldHeal(partsMetadata, errs) {
if !xlShouldHeal(storageDisks, partsMetadata, errs, bucket, object) {
// There is nothing to heal.
return nil
}

@ -144,7 +144,7 @@ func (xl xlObjects) listObjectsHeal(bucket, prefix, marker, delimiter string, ma
objectLock := globalNSMutex.NewNSLock(bucket, objInfo.Name)
objectLock.RLock()
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, bucket, objInfo.Name)
if xlShouldHeal(partsMetadata, errs) {
if xlShouldHeal(xl.storageDisks, partsMetadata, errs, bucket, objInfo.Name) {
healStat := xlHealStat(xl, partsMetadata, errs)
result.Objects = append(result.Objects, ObjectInfo{
Name: objInfo.Name,
@ -377,7 +377,9 @@ func (xl xlObjects) listMultipartUploadsHeal(bucket, prefix, keyMarker,
uploadIDPath := filepath.Join(bucket, upload.Object, upload.UploadID)
partsMetadata, errs := readAllXLMetadata(xl.storageDisks,
minioMetaMultipartBucket, uploadIDPath)
if xlShouldHeal(partsMetadata, errs) {
if xlShouldHeal(xl.storageDisks, partsMetadata, errs,
minioMetaMultipartBucket, uploadIDPath) {
healUploadInfo := xlHealStat(xl, partsMetadata, errs)
upload.HealUploadInfo = &healUploadInfo
result.Uploads = append(result.Uploads, upload)

@ -61,8 +61,8 @@ func main() {
return
}
if object.HealInfo != nil {
switch healInfo := *object.HealInfo; healInfo.Status {
if object.HealObjectInfo != nil {
switch healInfo := *object.HealObjectInfo; healInfo.Status {
case madmin.CanHeal:
fmt.Println(object.Key, " can be healed.")
case madmin.QuorumUnavailable:

Loading…
Cancel
Save