Only heal on disks where we are sure that healing is needed (#7148)

master
Krishna Srinivas 6 years ago committed by kannappanr
parent 2d9860e875
commit b18c0478e7
  1. 12
      cmd/storage-rest-client.go
  2. 40
      cmd/xl-v1-healing.go

@ -29,6 +29,9 @@ import (
"encoding/gob"
"encoding/hex"
"fmt"
"strings"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/cmd/rest"
xnet "github.com/minio/minio/pkg/net"
@ -101,6 +104,15 @@ func toStorageErr(err error) error {
case errServerTimeMismatch.Error():
return errServerTimeMismatch
}
if strings.Contains(err.Error(), "Bitrot verification mismatch") {
var expected string
var received string
fmt.Sscanf(err.Error(), "Bitrot verification mismatch - expected %s received %s", &expected, &received)
// Go's Sscanf %s scans "," that comes after the expected hash, hence remove it. Providing "," in the format string does not help.
expected = strings.TrimSuffix(expected, ",")
bitrotErr := hashMismatchError{expected, received}
return bitrotErr
}
return err
}

@ -22,6 +22,7 @@ import (
"io"
"path"
"sync"
"time"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/madmin"
@ -237,6 +238,30 @@ func listAllBuckets(storageDisks []StorageAPI) (buckets map[string]VolInfo,
return buckets, bucketsOcc, nil
}
// Only heal on disks where we are sure that healing is needed. We can expand
// this list as and when we figure out more errors can be added to this list safely.
func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime time.Time) bool {
switch xlErr {
case errFileNotFound:
return true
case errCorruptedFormat:
return true
}
if xlErr == nil {
// If xl.json was read fine but there is some problem with the part.N files.
if dataErr == errFileNotFound {
return true
}
if _, ok := dataErr.(hashMismatchError); ok {
return true
}
if quorumModTime != meta.Stat.ModTime {
return true
}
}
return false
}
// Heals an object by re-writing corrupt/missing erasure blocks.
func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string,
quorum int, dryRun bool) (result madmin.HealResultItem, err error) {
@ -305,17 +330,13 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
driveState = madmin.DriveStateCorrupt
}
// an online disk without valid data/metadata is
// outdated and can be healed.
if errs[i] != errDiskNotFound && v == nil {
outDatedDisks[i] = storageDisks[i]
disksToHealCount++
}
var drive string
if v == nil {
if errs[i] != errDiskNotFound {
drive = outDatedDisks[i].String()
if storageDisks[i] != nil {
drive = storageDisks[i].String()
}
if shouldHealObjectOnDisk(errs[i], dataErrs[i], partsMetadata[i], modTime) {
outDatedDisks[i] = storageDisks[i]
disksToHealCount++
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
UUID: "",
Endpoint: drive,
@ -328,7 +349,6 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
})
continue
}
drive = v.String()
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
UUID: "",
Endpoint: drive,

Loading…
Cancel
Save