Add normal/deep type of heal scanning (#7251)

Healing scan used to read all objects parts to check for bitrot
checksum. This commit will add a quicker way of healing scan
by only checking if parts are actually present in disks or not.
master
Anis Elleuch 6 years ago committed by kannappanr
parent 233824bf92
commit facbd653ba
  1. 4
      cmd/admin-heal-ops.go
  2. 2
      cmd/fs-v1.go
  3. 4
      cmd/fs-v1_test.go
  4. 2
      cmd/gateway-unsupported.go
  5. 2
      cmd/object-api-interface.go
  6. 4
      cmd/xl-sets.go
  7. 52
      cmd/xl-v1-healing-common.go
  8. 8
      cmd/xl-v1-healing-common_test.go
  9. 9
      cmd/xl-v1-healing.go
  10. 6
      cmd/xl-v1-healing_test.go
  11. 5
      cmd/xl-v1-object_test.go
  12. 17
      pkg/madmin/heal-commands.go

@ -575,7 +575,7 @@ func (h *healSequence) healMinioSysMeta(metaPrefix string) func() error {
if h.isQuitting() {
return errHealStopSignalled
}
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove)
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
// Object might have been deleted, by the time heal
// was attempted we ignore this object an move on.
if isErrObjectNotFound(herr) {
@ -718,7 +718,7 @@ func (h *healSequence) healObject(bucket, object string) error {
return errServerNotInitialized
}
hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove)
hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
if isErrObjectNotFound(err) {
return nil
}

@ -1240,7 +1240,7 @@ func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealRe
}
// HealObject - no-op for fs. Valid only for XL.
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (
res madmin.HealResultItem, err error) {
logger.LogIf(ctx, NotImplemented{})
return res, NotImplemented{}

@ -22,6 +22,8 @@ import (
"os"
"path/filepath"
"testing"
"github.com/minio/minio/pkg/madmin"
)
// Tests for if parent directory is object
@ -390,7 +392,7 @@ func TestFSHealObject(t *testing.T) {
defer os.RemoveAll(disk)
obj := initFSObjects(disk, t)
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false)
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false, madmin.HealDeepScan)
if err == nil || !isSameType(err, NotImplemented{}) {
t.Fatalf("Heal Object should return NotImplemented error ")
}

@ -102,7 +102,7 @@ func (a GatewayUnsupported) ListBucketsHeal(ctx context.Context) (buckets []Buck
}
// HealObject - Not implemented stub
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (h madmin.HealResultItem, e error) {
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (h madmin.HealResultItem, e error) {
return h, NotImplemented{}
}

@ -88,7 +88,7 @@ type ObjectLayer interface {
ReloadFormat(ctx context.Context, dryRun bool) error
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error)
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error)
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error)
ListBucketsHeal(ctx context.Context) (buckets []BucketInfo, err error)
HealObjects(ctx context.Context, bucket, prefix string, healObjectFn func(string, string) error) error

@ -1296,8 +1296,8 @@ func (s *xlSets) HealBucket(ctx context.Context, bucket string, dryRun, remove b
}
// HealObject - heals inconsistent object on a hashedSet based on object name.
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error) {
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove)
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error) {
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove, scanMode)
}
// Lists all buckets which need healing.

@ -22,6 +22,7 @@ import (
"time"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/madmin"
)
// commonTime returns a maximally occurring time from a list of time.
@ -158,7 +159,7 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error
// - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error.
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
object string) ([]StorageAPI, []error) {
object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
availableDisks := make([]StorageAPI, len(onlineDisks))
dataErrs := make([]error, len(onlineDisks))
@ -168,27 +169,38 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
continue
}
erasureInfo := partsMetadata[i].Erasure
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
if err != nil {
dataErrs[i] = err
continue
}
// disk has a valid xl.json but may not have all the
// parts. This is considered an outdated disk, since
// it needs healing too.
for _, part := range partsMetadata[i].Parts {
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
switch scanMode {
case madmin.HealDeepScan:
erasureInfo := partsMetadata[i].Erasure
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
if err != nil {
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
logger.LogIf(ctx, err)
}
dataErrs[i] = err
break
continue
}
// disk has a valid xl.json but may not have all the
// parts. This is considered an outdated disk, since
// it needs healing too.
for _, part := range partsMetadata[i].Parts {
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
if err != nil {
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
logger.LogIf(ctx, err)
}
dataErrs[i] = err
break
}
}
case madmin.HealNormalScan:
for _, part := range partsMetadata[i].Parts {
_, err := onlineDisk.StatFile(bucket, pathJoin(object, part.Name))
if err != nil {
dataErrs[i] = err
break
}
}
}

@ -23,6 +23,8 @@ import (
"path/filepath"
"testing"
"time"
"github.com/minio/minio/pkg/madmin"
)
// validates functionality provided to find most common
@ -239,7 +241,7 @@ func TestListOnlineDisks(t *testing.T) {
i+1, test.expectedTime, modTime)
}
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object)
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan)
test.errs = newErrs
if test._tamperBackend != noTamper {
@ -291,7 +293,7 @@ func TestDisksWithAllParts(t *testing.T) {
t.Fatalf("Failed to read xl meta data %v", err)
}
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
@ -328,7 +330,7 @@ func TestDisksWithAllParts(t *testing.T) {
}
errs = make([]error, len(xlDisks))
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))

@ -208,7 +208,8 @@ func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime t
// Heals an object by re-writing corrupt/missing erasure blocks.
func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string,
quorum int, dryRun bool) (result madmin.HealResultItem, err error) {
quorum int, dryRun bool, scanMode madmin.HealScanMode) (result madmin.HealResultItem, err error) {
partsMetadata, errs := readAllXLMetadata(ctx, storageDisks, bucket, object)
errCount := 0
@ -232,7 +233,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
// List of disks having all parts as per latest xl.json.
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object)
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object, scanMode)
// Initialize heal result object
result = madmin.HealResultItem{
@ -621,7 +622,7 @@ func (xl xlObjects) isObjectDangling(metaArr []xlMetaV1, errs []error) (validMet
// FIXME: If an object object was deleted and one disk was down,
// and later the disk comes back up again, heal on the object
// should delete it.
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool) (hr madmin.HealResultItem, err error) {
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool, scanMode madmin.HealScanMode) (hr madmin.HealResultItem, err error) {
// Create context that also contains information about the object and bucket.
// The top level handler might not have this information.
reqInfo := logger.GetReqInfo(ctx)
@ -670,5 +671,5 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
defer objectLock.RUnlock()
// Heal the object.
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun)
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun, scanMode)
}

@ -21,6 +21,8 @@ import (
"context"
"path/filepath"
"testing"
"github.com/minio/minio/pkg/madmin"
)
// Tests undoes and validates if the undoing completes successfully.
@ -114,7 +116,7 @@ func TestHealObjectXL(t *testing.T) {
t.Fatalf("Failed to delete a file - %v", err)
}
_, err = obj.HealObject(context.Background(), bucket, object, false, false)
_, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
if err != nil {
t.Fatalf("Failed to heal object - %v", err)
}
@ -130,7 +132,7 @@ func TestHealObjectXL(t *testing.T) {
}
// Try healing now, expect to receive errDiskNotFound.
_, err = obj.HealObject(context.Background(), bucket, object, false, false)
_, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
// since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
if _, ok := err.(InsufficientReadQuorum); !ok {
t.Errorf("Expected %v but received %v", InsufficientReadQuorum{}, err)

@ -28,6 +28,7 @@ import (
"time"
humanize "github.com/dustin/go-humanize"
"github.com/minio/minio/pkg/madmin"
)
func TestRepeatPutObjectPart(t *testing.T) {
@ -308,7 +309,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err)
}
_, err = xl.HealObject(context.Background(), bucket, object, false, false)
_, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
if err != nil {
t.Fatal(err)
}
@ -337,7 +338,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err)
}
_, err = xl.HealObject(context.Background(), bucket, object, false, false)
_, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
if err != nil {
t.Fatal(err)
}

@ -26,11 +26,22 @@ import (
"time"
)
// HealScanMode represents the type of healing scan
type HealScanMode int
const (
// HealNormalScan checks if parts are present and not outdated
HealNormalScan HealScanMode = iota
// HealDeepScan checks for parts bitrot checksums
HealDeepScan
)
// HealOpts - collection of options for a heal sequence
type HealOpts struct {
Recursive bool `json:"recursive"`
DryRun bool `json:"dryRun"`
Remove bool `json:"remove"`
Recursive bool `json:"recursive"`
DryRun bool `json:"dryRun"`
Remove bool `json:"remove"`
ScanMode HealScanMode `json:"scanMode"`
}
// HealStartSuccess - holds information about a successfully started

Loading…
Cancel
Save