Add normal/deep type of heal scanning (#7251)

Healing scan used to read all objects parts to check for bitrot
checksum. This commit will add a quicker way of healing scan
by only checking if parts are actually present in disks or not.
master
Anis Elleuch 6 years ago committed by kannappanr
parent 233824bf92
commit facbd653ba
  1. 4
      cmd/admin-heal-ops.go
  2. 2
      cmd/fs-v1.go
  3. 4
      cmd/fs-v1_test.go
  4. 2
      cmd/gateway-unsupported.go
  5. 2
      cmd/object-api-interface.go
  6. 4
      cmd/xl-sets.go
  7. 52
      cmd/xl-v1-healing-common.go
  8. 8
      cmd/xl-v1-healing-common_test.go
  9. 9
      cmd/xl-v1-healing.go
  10. 6
      cmd/xl-v1-healing_test.go
  11. 5
      cmd/xl-v1-object_test.go
  12. 17
      pkg/madmin/heal-commands.go

@ -575,7 +575,7 @@ func (h *healSequence) healMinioSysMeta(metaPrefix string) func() error {
if h.isQuitting() { if h.isQuitting() {
return errHealStopSignalled return errHealStopSignalled
} }
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove) res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
// Object might have been deleted, by the time heal // Object might have been deleted, by the time heal
// was attempted we ignore this object an move on. // was attempted we ignore this object an move on.
if isErrObjectNotFound(herr) { if isErrObjectNotFound(herr) {
@ -718,7 +718,7 @@ func (h *healSequence) healObject(bucket, object string) error {
return errServerNotInitialized return errServerNotInitialized
} }
hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove) hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
if isErrObjectNotFound(err) { if isErrObjectNotFound(err) {
return nil return nil
} }

@ -1240,7 +1240,7 @@ func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealRe
} }
// HealObject - no-op for fs. Valid only for XL. // HealObject - no-op for fs. Valid only for XL.
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) ( func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (
res madmin.HealResultItem, err error) { res madmin.HealResultItem, err error) {
logger.LogIf(ctx, NotImplemented{}) logger.LogIf(ctx, NotImplemented{})
return res, NotImplemented{} return res, NotImplemented{}

@ -22,6 +22,8 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"testing" "testing"
"github.com/minio/minio/pkg/madmin"
) )
// Tests for if parent directory is object // Tests for if parent directory is object
@ -390,7 +392,7 @@ func TestFSHealObject(t *testing.T) {
defer os.RemoveAll(disk) defer os.RemoveAll(disk)
obj := initFSObjects(disk, t) obj := initFSObjects(disk, t)
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false) _, err := obj.HealObject(context.Background(), "bucket", "object", false, false, madmin.HealDeepScan)
if err == nil || !isSameType(err, NotImplemented{}) { if err == nil || !isSameType(err, NotImplemented{}) {
t.Fatalf("Heal Object should return NotImplemented error ") t.Fatalf("Heal Object should return NotImplemented error ")
} }

@ -102,7 +102,7 @@ func (a GatewayUnsupported) ListBucketsHeal(ctx context.Context) (buckets []Buck
} }
// HealObject - Not implemented stub // HealObject - Not implemented stub
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (h madmin.HealResultItem, e error) { func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (h madmin.HealResultItem, e error) {
return h, NotImplemented{} return h, NotImplemented{}
} }

@ -88,7 +88,7 @@ type ObjectLayer interface {
ReloadFormat(ctx context.Context, dryRun bool) error ReloadFormat(ctx context.Context, dryRun bool) error
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error) HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error)
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error)
ListBucketsHeal(ctx context.Context) (buckets []BucketInfo, err error) ListBucketsHeal(ctx context.Context) (buckets []BucketInfo, err error)
HealObjects(ctx context.Context, bucket, prefix string, healObjectFn func(string, string) error) error HealObjects(ctx context.Context, bucket, prefix string, healObjectFn func(string, string) error) error

@ -1296,8 +1296,8 @@ func (s *xlSets) HealBucket(ctx context.Context, bucket string, dryRun, remove b
} }
// HealObject - heals inconsistent object on a hashedSet based on object name. // HealObject - heals inconsistent object on a hashedSet based on object name.
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error) { func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error) {
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove) return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove, scanMode)
} }
// Lists all buckets which need healing. // Lists all buckets which need healing.

@ -22,6 +22,7 @@ import (
"time" "time"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/madmin"
) )
// commonTime returns a maximally occurring time from a list of time. // commonTime returns a maximally occurring time from a list of time.
@ -158,7 +159,7 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error
// - slice of errors about the state of data files on disk - can have // - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error. // a not-found error or a hash-mismatch error.
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
object string) ([]StorageAPI, []error) { object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
availableDisks := make([]StorageAPI, len(onlineDisks)) availableDisks := make([]StorageAPI, len(onlineDisks))
dataErrs := make([]error, len(onlineDisks)) dataErrs := make([]error, len(onlineDisks))
@ -168,27 +169,38 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
continue continue
} }
erasureInfo := partsMetadata[i].Erasure switch scanMode {
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize) case madmin.HealDeepScan:
if err != nil { erasureInfo := partsMetadata[i].Erasure
dataErrs[i] = err erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
continue
}
// disk has a valid xl.json but may not have all the
// parts. This is considered an outdated disk, since
// it needs healing too.
for _, part := range partsMetadata[i].Parts {
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
if err != nil { if err != nil {
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
logger.LogIf(ctx, err)
}
dataErrs[i] = err dataErrs[i] = err
break continue
}
// disk has a valid xl.json but may not have all the
// parts. This is considered an outdated disk, since
// it needs healing too.
for _, part := range partsMetadata[i].Parts {
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
if err != nil {
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
logger.LogIf(ctx, err)
}
dataErrs[i] = err
break
}
}
case madmin.HealNormalScan:
for _, part := range partsMetadata[i].Parts {
_, err := onlineDisk.StatFile(bucket, pathJoin(object, part.Name))
if err != nil {
dataErrs[i] = err
break
}
} }
} }

@ -23,6 +23,8 @@ import (
"path/filepath" "path/filepath"
"testing" "testing"
"time" "time"
"github.com/minio/minio/pkg/madmin"
) )
// validates functionality provided to find most common // validates functionality provided to find most common
@ -239,7 +241,7 @@ func TestListOnlineDisks(t *testing.T) {
i+1, test.expectedTime, modTime) i+1, test.expectedTime, modTime)
} }
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object) availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan)
test.errs = newErrs test.errs = newErrs
if test._tamperBackend != noTamper { if test._tamperBackend != noTamper {
@ -291,7 +293,7 @@ func TestDisksWithAllParts(t *testing.T) {
t.Fatalf("Failed to read xl meta data %v", err) t.Fatalf("Failed to read xl meta data %v", err)
} }
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
if len(filteredDisks) != len(xlDisks) { if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
@ -328,7 +330,7 @@ func TestDisksWithAllParts(t *testing.T) {
} }
errs = make([]error, len(xlDisks)) errs = make([]error, len(xlDisks))
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
if len(filteredDisks) != len(xlDisks) { if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) t.Errorf("Unexpected number of disks: %d", len(filteredDisks))

@ -208,7 +208,8 @@ func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime t
// Heals an object by re-writing corrupt/missing erasure blocks. // Heals an object by re-writing corrupt/missing erasure blocks.
func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string, func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string,
quorum int, dryRun bool) (result madmin.HealResultItem, err error) { quorum int, dryRun bool, scanMode madmin.HealScanMode) (result madmin.HealResultItem, err error) {
partsMetadata, errs := readAllXLMetadata(ctx, storageDisks, bucket, object) partsMetadata, errs := readAllXLMetadata(ctx, storageDisks, bucket, object)
errCount := 0 errCount := 0
@ -232,7 +233,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
// List of disks having all parts as per latest xl.json. // List of disks having all parts as per latest xl.json.
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object) availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object, scanMode)
// Initialize heal result object // Initialize heal result object
result = madmin.HealResultItem{ result = madmin.HealResultItem{
@ -621,7 +622,7 @@ func (xl xlObjects) isObjectDangling(metaArr []xlMetaV1, errs []error) (validMet
// FIXME: If an object object was deleted and one disk was down, // FIXME: If an object object was deleted and one disk was down,
// and later the disk comes back up again, heal on the object // and later the disk comes back up again, heal on the object
// should delete it. // should delete it.
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool) (hr madmin.HealResultItem, err error) { func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool, scanMode madmin.HealScanMode) (hr madmin.HealResultItem, err error) {
// Create context that also contains information about the object and bucket. // Create context that also contains information about the object and bucket.
// The top level handler might not have this information. // The top level handler might not have this information.
reqInfo := logger.GetReqInfo(ctx) reqInfo := logger.GetReqInfo(ctx)
@ -670,5 +671,5 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
defer objectLock.RUnlock() defer objectLock.RUnlock()
// Heal the object. // Heal the object.
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun) return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun, scanMode)
} }

@ -21,6 +21,8 @@ import (
"context" "context"
"path/filepath" "path/filepath"
"testing" "testing"
"github.com/minio/minio/pkg/madmin"
) )
// Tests undoes and validates if the undoing completes successfully. // Tests undoes and validates if the undoing completes successfully.
@ -114,7 +116,7 @@ func TestHealObjectXL(t *testing.T) {
t.Fatalf("Failed to delete a file - %v", err) t.Fatalf("Failed to delete a file - %v", err)
} }
_, err = obj.HealObject(context.Background(), bucket, object, false, false) _, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
if err != nil { if err != nil {
t.Fatalf("Failed to heal object - %v", err) t.Fatalf("Failed to heal object - %v", err)
} }
@ -130,7 +132,7 @@ func TestHealObjectXL(t *testing.T) {
} }
// Try healing now, expect to receive errDiskNotFound. // Try healing now, expect to receive errDiskNotFound.
_, err = obj.HealObject(context.Background(), bucket, object, false, false) _, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
// since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum // since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
if _, ok := err.(InsufficientReadQuorum); !ok { if _, ok := err.(InsufficientReadQuorum); !ok {
t.Errorf("Expected %v but received %v", InsufficientReadQuorum{}, err) t.Errorf("Expected %v but received %v", InsufficientReadQuorum{}, err)

@ -28,6 +28,7 @@ import (
"time" "time"
humanize "github.com/dustin/go-humanize" humanize "github.com/dustin/go-humanize"
"github.com/minio/minio/pkg/madmin"
) )
func TestRepeatPutObjectPart(t *testing.T) { func TestRepeatPutObjectPart(t *testing.T) {
@ -308,7 +309,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
_, err = xl.HealObject(context.Background(), bucket, object, false, false) _, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -337,7 +338,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
_, err = xl.HealObject(context.Background(), bucket, object, false, false) _, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

@ -26,11 +26,22 @@ import (
"time" "time"
) )
// HealScanMode represents the type of healing scan
type HealScanMode int
const (
// HealNormalScan checks if parts are present and not outdated
HealNormalScan HealScanMode = iota
// HealDeepScan checks for parts bitrot checksums
HealDeepScan
)
// HealOpts - collection of options for a heal sequence // HealOpts - collection of options for a heal sequence
type HealOpts struct { type HealOpts struct {
Recursive bool `json:"recursive"` Recursive bool `json:"recursive"`
DryRun bool `json:"dryRun"` DryRun bool `json:"dryRun"`
Remove bool `json:"remove"` Remove bool `json:"remove"`
ScanMode HealScanMode `json:"scanMode"`
} }
// HealStartSuccess - holds information about a successfully started // HealStartSuccess - holds information about a successfully started

Loading…
Cancel
Save