|
|
|
@ -333,25 +333,24 @@ func quickHeal(storageDisks []StorageAPI, writeQuorum int, readQuorum int) error |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Heals an object only the corrupted/missing erasure blocks.
|
|
|
|
|
func healObject(storageDisks []StorageAPI, bucket string, object string, quorum int) (int, int, error) { |
|
|
|
|
func healObject(storageDisks []StorageAPI, bucket, object string, quorum int) (int, int, error) { |
|
|
|
|
|
|
|
|
|
partsMetadata, errs := readAllXLMetadata(storageDisks, bucket, object) |
|
|
|
|
// readQuorum suffices for xl.json since we use monotonic
|
|
|
|
|
// system time to break the tie when a split-brain situation
|
|
|
|
|
// arises.
|
|
|
|
|
if reducedErr := reduceReadQuorumErrs(errs, nil, quorum); reducedErr != nil { |
|
|
|
|
return 0, 0, toObjectErr(reducedErr, bucket, object) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if !xlShouldHeal(storageDisks, partsMetadata, errs, bucket, object) { |
|
|
|
|
// There is nothing to heal.
|
|
|
|
|
return 0, 0, nil |
|
|
|
|
if rErr := reduceReadQuorumErrs(errs, nil, quorum); rErr != nil { |
|
|
|
|
return 0, 0, toObjectErr(rErr, bucket, object) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// List of disks having latest version of the object.
|
|
|
|
|
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) |
|
|
|
|
|
|
|
|
|
// List of disks having all parts as per latest xl.json.
|
|
|
|
|
availableDisks, errs, aErr := disksWithAllParts(latestDisks, partsMetadata, errs, bucket, object) |
|
|
|
|
// List of disks having all parts as per latest xl.json - this
|
|
|
|
|
// does a full pass over the data and verifies all part files
|
|
|
|
|
// on disk
|
|
|
|
|
availableDisks, errs, aErr := disksWithAllParts(latestDisks, partsMetadata, errs, bucket, |
|
|
|
|
object) |
|
|
|
|
if aErr != nil { |
|
|
|
|
return 0, 0, toObjectErr(aErr, bucket, object) |
|
|
|
|
} |
|
|
|
@ -359,8 +358,7 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
// Number of disks which don't serve data.
|
|
|
|
|
numOfflineDisks := 0 |
|
|
|
|
for index, disk := range storageDisks { |
|
|
|
|
switch { |
|
|
|
|
case disk == nil, errs[index] == errDiskNotFound: |
|
|
|
|
if disk == nil || errs[index] == errDiskNotFound { |
|
|
|
|
numOfflineDisks++ |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -368,12 +366,16 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
// Number of disks which have all parts of the given object.
|
|
|
|
|
numAvailableDisks := 0 |
|
|
|
|
for _, disk := range availableDisks { |
|
|
|
|
switch { |
|
|
|
|
case disk != nil: |
|
|
|
|
if disk != nil { |
|
|
|
|
numAvailableDisks++ |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if numAvailableDisks == len(storageDisks) { |
|
|
|
|
// nothing to heal in this case
|
|
|
|
|
return 0, 0, nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// If less than read quorum number of disks have all the parts
|
|
|
|
|
// of the data, we can't reconstruct the erasure-coded data.
|
|
|
|
|
if numAvailableDisks < quorum { |
|
|
|
@ -381,8 +383,8 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// List of disks having outdated version of the object or missing object.
|
|
|
|
|
outDatedDisks := outDatedDisks(storageDisks, availableDisks, errs, partsMetadata, |
|
|
|
|
bucket, object) |
|
|
|
|
outDatedDisks := outDatedDisks(storageDisks, availableDisks, errs, partsMetadata, bucket, |
|
|
|
|
object) |
|
|
|
|
|
|
|
|
|
// Number of disks that had outdated content of the given
|
|
|
|
|
// object and are online to be healed.
|
|
|
|
@ -401,9 +403,10 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for index, disk := range outDatedDisks { |
|
|
|
|
// Before healing outdated disks, we need to remove xl.json
|
|
|
|
|
// and part files from "bucket/object/" so that
|
|
|
|
|
// rename(minioMetaBucket, "tmp/tmpuuid/", "bucket", "object/") succeeds.
|
|
|
|
|
// Before healing outdated disks, we need to remove
|
|
|
|
|
// xl.json and part files from "bucket/object/" so
|
|
|
|
|
// that rename(minioMetaBucket, "tmp/tmpuuid/",
|
|
|
|
|
// "bucket", "object/") succeeds.
|
|
|
|
|
if disk == nil { |
|
|
|
|
// Not an outdated disk.
|
|
|
|
|
continue |
|
|
|
@ -417,27 +420,15 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Outdated object with the same name exists that needs to be deleted.
|
|
|
|
|
outDatedMeta := partsMetadata[index] |
|
|
|
|
// Consult valid metadata picked when there is no
|
|
|
|
|
// metadata available on this disk.
|
|
|
|
|
if isErr(errs[index], errFileNotFound) { |
|
|
|
|
outDatedMeta = latestMeta |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Delete all the parts. Ignore if parts are not found.
|
|
|
|
|
for _, part := range outDatedMeta.Parts { |
|
|
|
|
dErr := disk.DeleteFile(bucket, pathJoin(object, part.Name)) |
|
|
|
|
if dErr != nil && !isErr(dErr, errFileNotFound) { |
|
|
|
|
return 0, 0, toObjectErr(traceError(dErr), bucket, object) |
|
|
|
|
// List and delete the object directory, ignoring
|
|
|
|
|
// errors.
|
|
|
|
|
files, err := disk.ListDir(bucket, object) |
|
|
|
|
if err == nil { |
|
|
|
|
for _, entry := range files { |
|
|
|
|
_ = disk.DeleteFile(bucket, |
|
|
|
|
pathJoin(object, entry)) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Delete xl.json file. Ignore if xl.json not found.
|
|
|
|
|
dErr := disk.DeleteFile(bucket, pathJoin(object, xlMetaJSONFile)) |
|
|
|
|
if dErr != nil && !isErr(dErr, errFileNotFound) { |
|
|
|
|
return 0, 0, toObjectErr(traceError(dErr), bucket, object) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Reorder so that we have data disks first and parity disks next.
|
|
|
|
@ -445,16 +436,19 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
outDatedDisks = shuffleDisks(outDatedDisks, latestMeta.Erasure.Distribution) |
|
|
|
|
partsMetadata = shufflePartsMetadata(partsMetadata, latestMeta.Erasure.Distribution) |
|
|
|
|
|
|
|
|
|
// We write at temporary location and then rename to fianal location.
|
|
|
|
|
// We write at temporary location and then rename to final location.
|
|
|
|
|
tmpID := mustGetUUID() |
|
|
|
|
|
|
|
|
|
// Checksum of the part files. checkSumInfos[index] will contain checksums
|
|
|
|
|
// of all the part files in the outDatedDisks[index]
|
|
|
|
|
// Checksum of the part files. checkSumInfos[index] will
|
|
|
|
|
// contain checksums of all the part files in the
|
|
|
|
|
// outDatedDisks[index]
|
|
|
|
|
checksumInfos := make([][]ChecksumInfo, len(outDatedDisks)) |
|
|
|
|
|
|
|
|
|
// Heal each part. erasureHealFile() will write the healed part to
|
|
|
|
|
// .minio/tmp/uuid/ which needs to be renamed later to the final location.
|
|
|
|
|
storage, err := NewErasureStorage(latestDisks, latestMeta.Erasure.DataBlocks, latestMeta.Erasure.ParityBlocks) |
|
|
|
|
// Heal each part. erasureHealFile() will write the healed
|
|
|
|
|
// part to .minio/tmp/uuid/ which needs to be renamed later to
|
|
|
|
|
// the final location.
|
|
|
|
|
storage, err := NewErasureStorage(latestDisks, |
|
|
|
|
latestMeta.Erasure.DataBlocks, latestMeta.Erasure.ParityBlocks) |
|
|
|
|
if err != nil { |
|
|
|
|
return 0, 0, toObjectErr(err, bucket, object) |
|
|
|
|
} |
|
|
|
@ -472,14 +466,33 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
// Heal the part file.
|
|
|
|
|
file, hErr := storage.HealFile(outDatedDisks, bucket, pathJoin(object, partName), erasure.BlockSize, minioMetaTmpBucket, pathJoin(tmpID, partName), partSize, algorithm, checksums) |
|
|
|
|
file, hErr := storage.HealFile(outDatedDisks, bucket, pathJoin(object, partName), |
|
|
|
|
erasure.BlockSize, minioMetaTmpBucket, pathJoin(tmpID, partName), partSize, |
|
|
|
|
algorithm, checksums) |
|
|
|
|
if hErr != nil { |
|
|
|
|
return 0, 0, toObjectErr(hErr, bucket, object) |
|
|
|
|
} |
|
|
|
|
for i := range outDatedDisks { |
|
|
|
|
if outDatedDisks[i] != OfflineDisk { |
|
|
|
|
checksumInfos[i] = append(checksumInfos[i], ChecksumInfo{partName, file.Algorithm, file.Checksums[i]}) |
|
|
|
|
// outDatedDisks that had write errors should not be
|
|
|
|
|
// written to for remaining parts, so we nil it out.
|
|
|
|
|
for i, disk := range outDatedDisks { |
|
|
|
|
if disk == nil { |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
// A non-nil stale disk which did not receive
|
|
|
|
|
// a healed part checksum had a write error.
|
|
|
|
|
if file.Checksums[i] == nil { |
|
|
|
|
outDatedDisks[i] = nil |
|
|
|
|
numHealedDisks-- |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
// append part checksums
|
|
|
|
|
checksumInfos[i] = append(checksumInfos[i], |
|
|
|
|
ChecksumInfo{partName, file.Algorithm, file.Checksums[i]}) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// If all disks are having errors, we give up.
|
|
|
|
|
if numHealedDisks == 0 { |
|
|
|
|
return 0, 0, fmt.Errorf("all disks without up-to-date data had write errors") |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -493,7 +506,8 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Generate and write `xl.json` generated from other disks.
|
|
|
|
|
outDatedDisks, aErr = writeUniqueXLMetadata(outDatedDisks, minioMetaTmpBucket, tmpID, partsMetadata, diskCount(outDatedDisks)) |
|
|
|
|
outDatedDisks, aErr = writeUniqueXLMetadata(outDatedDisks, minioMetaTmpBucket, tmpID, |
|
|
|
|
partsMetadata, diskCount(outDatedDisks)) |
|
|
|
|
if aErr != nil { |
|
|
|
|
return 0, 0, toObjectErr(aErr, bucket, object) |
|
|
|
|
} |
|
|
|
@ -503,13 +517,10 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum |
|
|
|
|
if disk == nil { |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
// Remove any lingering partial data from current namespace.
|
|
|
|
|
aErr = disk.DeleteFile(bucket, retainSlash(object)) |
|
|
|
|
if aErr != nil && aErr != errFileNotFound { |
|
|
|
|
return 0, 0, toObjectErr(traceError(aErr), bucket, object) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Attempt a rename now from healed data to final location.
|
|
|
|
|
aErr = disk.RenameFile(minioMetaTmpBucket, retainSlash(tmpID), bucket, retainSlash(object)) |
|
|
|
|
aErr = disk.RenameFile(minioMetaTmpBucket, retainSlash(tmpID), bucket, |
|
|
|
|
retainSlash(object)) |
|
|
|
|
if aErr != nil { |
|
|
|
|
return 0, 0, toObjectErr(traceError(aErr), bucket, object) |
|
|
|
|
} |
|
|
|
|