fix: trigger heal if one of the parts are not found (#11358)

Previously we added heal trigger when bit-rot checks
failed, now extend that to support heal when parts
are not found either. This healing gets only triggered
if we can successfully decode the object i.e read
quorum is still satisfied for the object.
master
Harshavardhana 4 years ago committed by GitHub
parent e9ac7b0fb7
commit e019f21bda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      cmd/bitrot-streaming.go
  2. 76
      cmd/erasure-decode.go
  3. 8
      cmd/erasure-decode_test.go
  4. 2
      cmd/erasure-lowlevel-heal.go
  5. 43
      cmd/erasure-object.go
  6. 6
      cmd/global-heal.go
  7. 2
      cmd/storage-rest-common.go
  8. 6
      pkg/madmin/heal-commands.go

@ -149,10 +149,9 @@ func (b *streamingBitrotReader) ReadAt(buf []byte, offset int64) (int, error) {
b.h.Write(buf) b.h.Write(buf)
if !bytes.Equal(b.h.Sum(nil), b.hashBytes) { if !bytes.Equal(b.h.Sum(nil), b.hashBytes) {
err := &errHashMismatch{fmt.Sprintf("Disk: %s -> %s/%s - content hash does not match - expected %s, got %s", logger.LogIf(GlobalContext, fmt.Errorf("Disk: %s -> %s/%s - content hash does not match - expected %s, got %s",
b.disk, b.volume, b.filePath, hex.EncodeToString(b.hashBytes), hex.EncodeToString(b.h.Sum(nil)))} b.disk, b.volume, b.filePath, hex.EncodeToString(b.hashBytes), hex.EncodeToString(b.h.Sum(nil))))
logger.LogIf(GlobalContext, err) return 0, errFileCorrupt
return 0, err
} }
b.currOffset += int64(len(buf)) b.currOffset += int64(len(buf))
return len(buf), nil return len(buf), nil

@ -26,8 +26,6 @@ import (
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
) )
var errHealRequired = errors.New("heal required")
// Reads in parallel from readers. // Reads in parallel from readers.
type parallelReader struct { type parallelReader struct {
readers []io.ReaderAt readers []io.ReaderAt
@ -125,7 +123,8 @@ func (p *parallelReader) Read(dst [][]byte) ([][]byte, error) {
readTriggerCh <- true readTriggerCh <- true
} }
healRequired := int32(0) // Atomic bool flag. bitrotHeal := int32(0) // Atomic bool flag.
missingPartsHeal := int32(0) // Atomic bool flag.
readerIndex := 0 readerIndex := 0
var wg sync.WaitGroup var wg sync.WaitGroup
// if readTrigger is true, it implies next disk.ReadAt() should be tried // if readTrigger is true, it implies next disk.ReadAt() should be tried
@ -164,8 +163,10 @@ func (p *parallelReader) Read(dst [][]byte) ([][]byte, error) {
p.buf[bufIdx] = p.buf[bufIdx][:p.shardSize] p.buf[bufIdx] = p.buf[bufIdx][:p.shardSize]
_, err := rr.ReadAt(p.buf[bufIdx], p.offset) _, err := rr.ReadAt(p.buf[bufIdx], p.offset)
if err != nil { if err != nil {
if _, ok := err.(*errHashMismatch); ok { if errors.Is(err, errFileNotFound) {
atomic.StoreInt32(&healRequired, 1) atomic.StoreInt32(&missingPartsHeal, 1)
} else if errors.Is(err, errFileCorrupt) {
atomic.StoreInt32(&bitrotHeal, 1)
} }
// This will be communicated upstream. // This will be communicated upstream.
@ -188,8 +189,10 @@ func (p *parallelReader) Read(dst [][]byte) ([][]byte, error) {
wg.Wait() wg.Wait()
if p.canDecode(newBuf) { if p.canDecode(newBuf) {
p.offset += p.shardSize p.offset += p.shardSize
if healRequired != 0 { if atomic.LoadInt32(&missingPartsHeal) == 1 {
return newBuf, errHealRequired return newBuf, errFileNotFound
} else if atomic.LoadInt32(&bitrotHeal) == 1 {
return newBuf, errFileCorrupt
} }
return newBuf, nil return newBuf, nil
} }
@ -197,41 +200,20 @@ func (p *parallelReader) Read(dst [][]byte) ([][]byte, error) {
return nil, reduceReadQuorumErrs(context.Background(), p.errs, objectOpIgnoredErrs, p.dataBlocks) return nil, reduceReadQuorumErrs(context.Background(), p.errs, objectOpIgnoredErrs, p.dataBlocks)
} }
type errDecodeHealRequired struct {
err error
}
func (err *errDecodeHealRequired) Error() string {
return err.err.Error()
}
func (err *errDecodeHealRequired) Unwrap() error {
return err.err
}
// Decode reads from readers, reconstructs data if needed and writes the data to the writer. // Decode reads from readers, reconstructs data if needed and writes the data to the writer.
// A set of preferred drives can be supplied. In that case they will be used and the data reconstructed. // A set of preferred drives can be supplied. In that case they will be used and the data reconstructed.
func (e Erasure) Decode(ctx context.Context, writer io.Writer, readers []io.ReaderAt, offset, length, totalLength int64, prefer []bool) error { func (e Erasure) Decode(ctx context.Context, writer io.Writer, readers []io.ReaderAt, offset, length, totalLength int64, prefer []bool) (written int64, derr error) {
healRequired, err := e.decode(ctx, writer, readers, offset, length, totalLength, prefer)
if healRequired {
return &errDecodeHealRequired{err}
}
return err
}
// Decode reads from readers, reconstructs data if needed and writes the data to the writer.
func (e Erasure) decode(ctx context.Context, writer io.Writer, readers []io.ReaderAt, offset, length, totalLength int64, prefer []bool) (bool, error) {
if offset < 0 || length < 0 { if offset < 0 || length < 0 {
logger.LogIf(ctx, errInvalidArgument) logger.LogIf(ctx, errInvalidArgument)
return false, errInvalidArgument return -1, errInvalidArgument
} }
if offset+length > totalLength { if offset+length > totalLength {
logger.LogIf(ctx, errInvalidArgument) logger.LogIf(ctx, errInvalidArgument)
return false, errInvalidArgument return -1, errInvalidArgument
} }
if length == 0 { if length == 0 {
return false, nil return 0, nil
} }
reader := newParallelReader(readers, e, offset, totalLength) reader := newParallelReader(readers, e, offset, totalLength)
@ -242,7 +224,6 @@ func (e Erasure) decode(ctx context.Context, writer io.Writer, readers []io.Read
startBlock := offset / e.blockSize startBlock := offset / e.blockSize
endBlock := (offset + length) / e.blockSize endBlock := (offset + length) / e.blockSize
var healRequired bool
var bytesWritten int64 var bytesWritten int64
var bufs [][]byte var bufs [][]byte
for block := startBlock; block <= endBlock; block++ { for block := startBlock; block <= endBlock; block++ {
@ -264,32 +245,39 @@ func (e Erasure) decode(ctx context.Context, writer io.Writer, readers []io.Read
if blockLength == 0 { if blockLength == 0 {
break break
} }
var err error var err error
bufs, err = reader.Read(bufs) bufs, err = reader.Read(bufs)
if err != nil { if len(bufs) > 0 {
if errors.Is(err, errHealRequired) { // Set only if there are be enough data for reconstruction.
// errHealRequired is only returned if there are be enough data for reconstruction. // and only for expected errors, also set once.
healRequired = true if errors.Is(err, errFileNotFound) || errors.Is(err, errFileCorrupt) {
} else { if derr == nil {
return healRequired, err derr = err
}
} }
} else if err != nil {
// For all errors that cannot be reconstructed fail the read operation.
return -1, err
} }
if err = e.DecodeDataBlocks(bufs); err != nil { if err = e.DecodeDataBlocks(bufs); err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return healRequired, err return -1, err
} }
n, err := writeDataBlocks(ctx, writer, bufs, e.dataBlocks, blockOffset, blockLength) n, err := writeDataBlocks(ctx, writer, bufs, e.dataBlocks, blockOffset, blockLength)
if err != nil { if err != nil {
return healRequired, err return -1, err
} }
bytesWritten += n bytesWritten += n
} }
if bytesWritten != length { if bytesWritten != length {
logger.LogIf(ctx, errLessData) logger.LogIf(ctx, errLessData)
return healRequired, errLessData return bytesWritten, errLessData
} }
return healRequired, nil return bytesWritten, derr
} }

@ -138,7 +138,7 @@ func TestErasureDecode(t *testing.T) {
} }
writer := bytes.NewBuffer(nil) writer := bytes.NewBuffer(nil)
err = erasure.Decode(context.Background(), writer, bitrotReaders, test.offset, test.length, test.data, nil) _, err = erasure.Decode(context.Background(), writer, bitrotReaders, test.offset, test.length, test.data, nil)
closeBitrotReaders(bitrotReaders) closeBitrotReaders(bitrotReaders)
if err != nil && !test.shouldFail { if err != nil && !test.shouldFail {
t.Errorf("Test %d: should pass but failed with: %v", i, err) t.Errorf("Test %d: should pass but failed with: %v", i, err)
@ -181,7 +181,7 @@ func TestErasureDecode(t *testing.T) {
bitrotReaders[0] = nil bitrotReaders[0] = nil
} }
writer.Reset() writer.Reset()
err = erasure.Decode(context.Background(), writer, bitrotReaders, test.offset, test.length, test.data, nil) _, err = erasure.Decode(context.Background(), writer, bitrotReaders, test.offset, test.length, test.data, nil)
closeBitrotReaders(bitrotReaders) closeBitrotReaders(bitrotReaders)
if err != nil && !test.shouldFailQuorum { if err != nil && !test.shouldFailQuorum {
t.Errorf("Test %d: should pass but failed with: %v", i, err) t.Errorf("Test %d: should pass but failed with: %v", i, err)
@ -272,7 +272,7 @@ func TestErasureDecodeRandomOffsetLength(t *testing.T) {
tillOffset := erasure.ShardFileOffset(offset, readLen, length) tillOffset := erasure.ShardFileOffset(offset, readLen, length)
bitrotReaders[index] = newStreamingBitrotReader(disk, nil, "testbucket", "object", tillOffset, DefaultBitrotAlgorithm, erasure.ShardSize()) bitrotReaders[index] = newStreamingBitrotReader(disk, nil, "testbucket", "object", tillOffset, DefaultBitrotAlgorithm, erasure.ShardSize())
} }
err = erasure.Decode(context.Background(), buf, bitrotReaders, offset, readLen, length, nil) _, err = erasure.Decode(context.Background(), buf, bitrotReaders, offset, readLen, length, nil)
closeBitrotReaders(bitrotReaders) closeBitrotReaders(bitrotReaders)
if err != nil { if err != nil {
t.Fatal(err, offset, readLen) t.Fatal(err, offset, readLen)
@ -334,7 +334,7 @@ func benchmarkErasureDecode(data, parity, dataDown, parityDown int, size int64,
tillOffset := erasure.ShardFileOffset(0, size, size) tillOffset := erasure.ShardFileOffset(0, size, size)
bitrotReaders[index] = newStreamingBitrotReader(disk, nil, "testbucket", "object", tillOffset, DefaultBitrotAlgorithm, erasure.ShardSize()) bitrotReaders[index] = newStreamingBitrotReader(disk, nil, "testbucket", "object", tillOffset, DefaultBitrotAlgorithm, erasure.ShardSize())
} }
if err = erasure.Decode(context.Background(), bytes.NewBuffer(content[:0]), bitrotReaders, 0, size, size, nil); err != nil { if _, err = erasure.Decode(context.Background(), bytes.NewBuffer(content[:0]), bitrotReaders, 0, size, size, nil); err != nil {
panic(err) panic(err)
} }
closeBitrotReaders(bitrotReaders) closeBitrotReaders(bitrotReaders)

@ -28,7 +28,7 @@ import (
func (e Erasure) Heal(ctx context.Context, readers []io.ReaderAt, writers []io.Writer, size int64) error { func (e Erasure) Heal(ctx context.Context, readers []io.ReaderAt, writers []io.Writer, size int64) error {
r, w := io.Pipe() r, w := io.Pipe()
go func() { go func() {
if err := e.Decode(ctx, w, readers, 0, size, size, nil); err != nil { if _, err := e.Decode(ctx, w, readers, 0, size, size, nil); err != nil {
w.CloseWithError(err) w.CloseWithError(err)
return return
} }

@ -32,6 +32,7 @@ import (
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/bucket/lifecycle" "github.com/minio/minio/pkg/bucket/lifecycle"
"github.com/minio/minio/pkg/bucket/replication" "github.com/minio/minio/pkg/bucket/replication"
"github.com/minio/minio/pkg/madmin"
"github.com/minio/minio/pkg/mimedb" "github.com/minio/minio/pkg/mimedb"
"github.com/minio/minio/pkg/sync/errgroup" "github.com/minio/minio/pkg/sync/errgroup"
) )
@ -315,17 +316,29 @@ func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, obje
// Prefer local disks // Prefer local disks
prefer[index] = disk.Hostname() == "" prefer[index] = disk.Hostname() == ""
} }
err = erasure.Decode(ctx, writer, readers, partOffset, partLength, partSize, prefer)
written, err := erasure.Decode(ctx, writer, readers, partOffset, partLength, partSize, prefer)
// Note: we should not be defer'ing the following closeBitrotReaders() call as // Note: we should not be defer'ing the following closeBitrotReaders() call as
// we are inside a for loop i.e if we use defer, we would accumulate a lot of open files by the time // we are inside a for loop i.e if we use defer, we would accumulate a lot of open files by the time
// we return from this function. // we return from this function.
closeBitrotReaders(readers) closeBitrotReaders(readers)
if err != nil { if err != nil {
if decodeHealErr, ok := err.(*errDecodeHealRequired); ok { // If we have successfully written all the content that was asked
healOnce.Do(func() { // by the client, but we still see an error - this would mean
go deepHealObject(bucket, object, fi.VersionID) // that we have some parts or data blocks missing or corrupted
}) // - attempt a heal to successfully heal them for future calls.
err = decodeHealErr.err if written == partLength {
var scan madmin.HealScanMode
if errors.Is(err, errFileNotFound) {
scan = madmin.HealNormalScan
} else if errors.Is(err, errFileCorrupt) {
scan = madmin.HealDeepScan
}
if scan != madmin.HealUnknownScan {
healOnce.Do(func() {
go healObject(bucket, object, fi.VersionID, scan)
})
}
} }
if err != nil { if err != nil {
return toObjectErr(err, bucket, object) return toObjectErr(err, bucket, object)
@ -416,6 +429,24 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
if err != nil { if err != nil {
return fi, nil, nil, err return fi, nil, nil, err
} }
var missingBlocks int
for i, err := range errs {
if err != nil && errors.Is(err, errFileNotFound) {
missingBlocks++
continue
}
if metaArr[i].IsValid() && metaArr[i].ModTime.Equal(fi.ModTime) {
continue
}
missingBlocks++
}
// if missing metadata can be reconstructed, attempt to reconstruct.
if missingBlocks > 0 && missingBlocks < readQuorum {
go healObject(bucket, object, fi.VersionID, madmin.HealNormalScan)
}
return fi, metaArr, onlineDisks, nil return fi, metaArr, onlineDisks, nil
} }

@ -200,8 +200,8 @@ func healErasureSet(ctx context.Context, setIndex int, buckets []BucketInfo, dis
return nil return nil
} }
// deepHealObject heals given object path in deep to fix bitrot. // healObject heals given object path in deep to fix bitrot.
func deepHealObject(bucket, object, versionID string) { func healObject(bucket, object, versionID string, scan madmin.HealScanMode) {
// Get background heal sequence to send elements to heal // Get background heal sequence to send elements to heal
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID) bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
if ok { if ok {
@ -211,7 +211,7 @@ func deepHealObject(bucket, object, versionID string) {
versionID: versionID, versionID: versionID,
opts: &madmin.HealOpts{ opts: &madmin.HealOpts{
Remove: true, // if found dangling purge it. Remove: true, // if found dangling purge it.
ScanMode: madmin.HealDeepScan, ScanMode: scan,
}, },
} }
} }

@ -17,7 +17,7 @@
package cmd package cmd
const ( const (
storageRESTVersion = "v24" // Add more small file optimization storageRESTVersion = "v25" // Add more small file optimization
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
storageRESTPrefix = minioReservedBucketPath + "/storage" storageRESTPrefix = minioReservedBucketPath + "/storage"
) )

@ -31,8 +31,12 @@ import (
type HealScanMode int type HealScanMode int
const ( const (
// HealUnknownScan default is unknown
HealUnknownScan HealScanMode = iota
// HealNormalScan checks if parts are present and not outdated // HealNormalScan checks if parts are present and not outdated
HealNormalScan HealScanMode = iota HealNormalScan
// HealDeepScan checks for parts bitrot checksums // HealDeepScan checks for parts bitrot checksums
HealDeepScan HealDeepScan
) )

Loading…
Cancel
Save