fix: do not return an error for successfully deleted dangling objects (#10938)

dangling objects when removed `mc admin heal -r` or crawler
auto heal would incorrectly return error - this can interfere
with usage calculation as the entry size for this would be
returned as `0`, instead upon success use the resultant
object size to calculate the final size for the object
and avoid reporting this in the log messages

Also do not set ObjectSize in healResultItem to be '-1'
this has an effect on crawler metrics calculating 1 byte
less for objects which seem to be missing their `xl.meta`
master
Harshavardhana 4 years ago committed by GitHub
parent 734d07a532
commit 519c0077a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 158
      cmd/erasure-healing.go
  2. 5
      cmd/erasure-object_test.go
  3. 35
      cmd/object-api-errors.go
  4. 5
      cmd/object-api-putobject_test.go
  5. 7
      cmd/rest/client.go
  6. 1
      pkg/madmin/heal-commands.go

@ -218,10 +218,12 @@ func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime t
// Heals an object by re-writing corrupt/missing erasure blocks. // Heals an object by re-writing corrupt/missing erasure blocks.
func (er erasureObjects) healObject(ctx context.Context, bucket string, object string, func (er erasureObjects) healObject(ctx context.Context, bucket string, object string,
partsMetadata []FileInfo, errs []error, latestFileInfo FileInfo, versionID string, partsMetadata []FileInfo, errs []error, lfi FileInfo, opts madmin.HealOpts) (result madmin.HealResultItem, err error) {
dryRun bool, remove bool, scanMode madmin.HealScanMode) (result madmin.HealResultItem, err error) {
dataBlocks := latestFileInfo.Erasure.DataBlocks dryRun := opts.DryRun
scanMode := opts.ScanMode
dataBlocks := lfi.Erasure.DataBlocks
storageDisks := er.getDisks() storageDisks := er.getDisks()
storageEndpoints := er.getEndpoints() storageEndpoints := er.getEndpoints()
@ -241,10 +243,6 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
DiskCount: len(storageDisks), DiskCount: len(storageDisks),
ParityBlocks: len(storageDisks) / 2, ParityBlocks: len(storageDisks) / 2,
DataBlocks: len(storageDisks) / 2, DataBlocks: len(storageDisks) / 2,
// Initialize object size to -1, so we can detect if we are
// unable to reliably find the object size.
ObjectSize: -1,
} }
// Loop to find number of disks with valid data, per-drive // Loop to find number of disks with valid data, per-drive
@ -305,28 +303,14 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
} }
if isAllNotFound(errs) { if isAllNotFound(errs) {
return defaultHealResult(latestFileInfo, storageDisks, storageEndpoints, errs, bucket, object), nil // File is fully gone, fileInfo is empty.
return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object, versionID), nil
} }
// If less than read quorum number of disks have all the parts // If less than read quorum number of disks have all the parts
// of the data, we can't reconstruct the erasure-coded data. // of the data, we can't reconstruct the erasure-coded data.
if numAvailableDisks < dataBlocks { if numAvailableDisks < dataBlocks {
// Check if er.meta, and corresponding parts are also missing. return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, dataErrs, opts)
if m, ok := isObjectDangling(partsMetadata, errs, dataErrs); ok {
writeQuorum := m.Erasure.DataBlocks + 1
if m.Erasure.DataBlocks == 0 {
writeQuorum = getWriteQuorum(len(storageDisks))
}
if !dryRun && remove {
if latestFileInfo.VersionID == "" {
err = er.deleteObject(ctx, bucket, object, writeQuorum)
} else {
err = er.deleteObjectVersion(ctx, bucket, object, writeQuorum, FileInfo{VersionID: latestFileInfo.VersionID})
}
}
return defaultHealResult(latestFileInfo, storageDisks, storageEndpoints, errs, bucket, object), err
}
return result, toObjectErr(errErasureReadQuorum, bucket, object)
} }
if disksToHealCount == 0 { if disksToHealCount == 0 {
@ -342,9 +326,9 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// Latest FileInfo for reference. If a valid metadata is not // Latest FileInfo for reference. If a valid metadata is not
// present, it is as good as object not found. // present, it is as good as object not found.
latestMeta, pErr := pickValidFileInfo(ctx, partsMetadata, modTime, dataBlocks) latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, dataBlocks)
if pErr != nil { if err != nil {
return result, toObjectErr(pErr, bucket, object) return result, toObjectErr(err, bucket, object, versionID)
} }
defer ObjectPathUpdated(pathJoin(bucket, object)) defer ObjectPathUpdated(pathJoin(bucket, object))
@ -460,7 +444,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
outDatedDisks, err = writeUniqueFileInfo(ctx, outDatedDisks, minioMetaTmpBucket, tmpID, outDatedDisks, err = writeUniqueFileInfo(ctx, outDatedDisks, minioMetaTmpBucket, tmpID,
partsMetadata, diskCount(outDatedDisks)) partsMetadata, diskCount(outDatedDisks))
if err != nil { if err != nil {
return result, toObjectErr(err, bucket, object) return result, toObjectErr(err, bucket, object, versionID)
} }
// Rename from tmp location to the actual location. // Rename from tmp location to the actual location.
@ -573,20 +557,17 @@ func (er erasureObjects) healObjectDir(ctx context.Context, bucket, object strin
// Populates default heal result item entries with possible values when we are returning prematurely. // Populates default heal result item entries with possible values when we are returning prematurely.
// This is to ensure that in any circumstance we are not returning empty arrays with wrong values. // This is to ensure that in any circumstance we are not returning empty arrays with wrong values.
func defaultHealResult(latestFileInfo FileInfo, storageDisks []StorageAPI, storageEndpoints []string, errs []error, bucket, object string) madmin.HealResultItem { func defaultHealResult(lfi FileInfo, storageDisks []StorageAPI, storageEndpoints []string, errs []error, bucket, object, versionID string) madmin.HealResultItem {
// Initialize heal result object // Initialize heal result object
result := madmin.HealResultItem{ result := madmin.HealResultItem{
Type: madmin.HealItemObject, Type: madmin.HealItemObject,
Bucket: bucket, Bucket: bucket,
Object: object, Object: object,
VersionID: versionID,
DiskCount: len(storageDisks), DiskCount: len(storageDisks),
// Initialize object size to -1, so we can detect if we are
// unable to reliably find the object size.
ObjectSize: -1,
} }
if latestFileInfo.IsValid() { if lfi.IsValid() {
result.ObjectSize = latestFileInfo.Size result.ObjectSize = lfi.Size
} }
for index, disk := range storageDisks { for index, disk := range storageDisks {
@ -620,13 +601,13 @@ func defaultHealResult(latestFileInfo FileInfo, storageDisks []StorageAPI, stora
}) })
} }
if !latestFileInfo.IsValid() { if !lfi.IsValid() {
// Default to most common configuration for erasure blocks. // Default to most common configuration for erasure blocks.
result.ParityBlocks = getDefaultParityBlocks(len(storageDisks)) result.ParityBlocks = getDefaultParityBlocks(len(storageDisks))
result.DataBlocks = getDefaultDataBlocks(len(storageDisks)) result.DataBlocks = getDefaultDataBlocks(len(storageDisks))
} else { } else {
result.ParityBlocks = latestFileInfo.Erasure.ParityBlocks result.ParityBlocks = lfi.Erasure.ParityBlocks
result.DataBlocks = latestFileInfo.Erasure.DataBlocks result.DataBlocks = lfi.Erasure.DataBlocks
} }
return result return result
@ -692,6 +673,51 @@ func isObjectDirDangling(errs []error) (ok bool) {
return found < notFound && found > 0 return found < notFound && found > 0
} }
func (er erasureObjects) purgeObjectDangling(ctx context.Context, bucket, object, versionID string,
metaArr []FileInfo, errs []error, dataErrs []error, opts madmin.HealOpts) (madmin.HealResultItem, error) {
storageDisks := er.getDisks()
storageEndpoints := er.getEndpoints()
// Check if the object is dangling, if yes and user requested
// remove we simply delete it from namespace.
m, ok := isObjectDangling(metaArr, errs, dataErrs)
if ok {
writeQuorum := m.Erasure.DataBlocks
if m.Erasure.DataBlocks == 0 || m.Erasure.DataBlocks == m.Erasure.ParityBlocks {
writeQuorum = getWriteQuorum(len(storageDisks))
}
var err error
if !opts.DryRun && opts.Remove {
if versionID == "" {
err = er.deleteObject(ctx, bucket, object, writeQuorum)
} else {
err = er.deleteObjectVersion(ctx, bucket, object, writeQuorum, FileInfo{VersionID: versionID})
}
// If Delete was successful, make sure to return the appropriate error
// and heal result appropriate with delete's error messages
errs = make([]error, len(errs))
for i := range errs {
errs[i] = err
}
if err == nil {
// Dangling object successfully purged, size is '0'
m.Size = 0
}
}
return defaultHealResult(m, storageDisks, storageEndpoints, errs, bucket, object, versionID), toObjectErr(err, bucket, object, versionID)
}
readQuorum := m.Erasure.DataBlocks
if m.Erasure.DataBlocks == 0 || m.Erasure.DataBlocks == m.Erasure.ParityBlocks {
readQuorum = getReadQuorum(len(storageDisks))
}
err := toObjectErr(reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum), bucket, object, versionID)
return defaultHealResult(m, storageDisks, storageEndpoints, errs, bucket, object, versionID), err
}
// Object is considered dangling/corrupted if any only // Object is considered dangling/corrupted if any only
// if total disks - a combination of corrupted and missing // if total disks - a combination of corrupted and missing
// files is lesser than number of data blocks. // files is lesser than number of data blocks.
@ -770,61 +796,15 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version
partsMetadata, errs := readAllFileInfo(healCtx, storageDisks, bucket, object, versionID) partsMetadata, errs := readAllFileInfo(healCtx, storageDisks, bucket, object, versionID)
if isAllNotFound(errs) { if isAllNotFound(errs) {
// Nothing to do // Nothing to do, file is already gone.
return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object), nil return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object, versionID), nil
}
// Check if the object is dangling, if yes and user requested
// remove we simply delete it from namespace.
if m, ok := isObjectDangling(partsMetadata, errs, []error{}); ok {
writeQuorum := m.Erasure.DataBlocks + 1
if m.Erasure.DataBlocks == 0 {
writeQuorum = getWriteQuorum(len(storageDisks))
}
if !opts.DryRun && opts.Remove {
if versionID == "" {
er.deleteObject(healCtx, bucket, object, writeQuorum)
} else {
er.deleteObjectVersion(healCtx, bucket, object, writeQuorum, FileInfo{VersionID: versionID})
}
}
err = reduceReadQuorumErrs(ctx, errs, nil, writeQuorum-1)
return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object), toObjectErr(err, bucket, object)
} }
latestFileInfo, err := getLatestFileInfo(healCtx, partsMetadata, errs) fi, err := getLatestFileInfo(healCtx, partsMetadata, errs)
if err != nil { if err != nil {
return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object), toObjectErr(err, bucket, object) return er.purgeObjectDangling(healCtx, bucket, object, versionID, partsMetadata, errs, []error{}, opts)
}
errCount := 0
for _, err := range errs {
if err != nil {
errCount++
}
}
if errCount == len(errs) {
// Only if we get errors from all the disks we return error. Else we need to
// continue to return filled madmin.HealResultItem struct which includes info
// on what disks the file is available etc.
if err = reduceReadQuorumErrs(ctx, errs, nil, latestFileInfo.Erasure.DataBlocks); err != nil {
if m, ok := isObjectDangling(partsMetadata, errs, []error{}); ok {
writeQuorum := m.Erasure.DataBlocks + 1
if m.Erasure.DataBlocks == 0 {
writeQuorum = getWriteQuorum(len(storageDisks))
}
if !opts.DryRun && opts.Remove {
if versionID == "" {
er.deleteObject(ctx, bucket, object, writeQuorum)
} else {
er.deleteObjectVersion(ctx, bucket, object, writeQuorum, FileInfo{VersionID: versionID})
}
}
}
return defaultHealResult(latestFileInfo, storageDisks, storageEndpoints, errs, bucket, object), toObjectErr(err, bucket, object)
}
} }
// Heal the object. // Heal the object.
return er.healObject(healCtx, bucket, object, partsMetadata, errs, latestFileInfo, opts.DryRun, opts.Remove, opts.ScanMode) return er.healObject(healCtx, bucket, object, versionID, partsMetadata, errs, fi, opts)
} }

@ -19,6 +19,7 @@ package cmd
import ( import (
"bytes" "bytes"
"context" "context"
"errors"
"io/ioutil" "io/ioutil"
"os" "os"
"testing" "testing"
@ -257,7 +258,7 @@ func TestErasureDeleteObjectDiskNotFound(t *testing.T) {
z.serverSets[0].erasureDisksMu.Unlock() z.serverSets[0].erasureDisksMu.Unlock()
_, err = obj.DeleteObject(ctx, bucket, object, ObjectOptions{}) _, err = obj.DeleteObject(ctx, bucket, object, ObjectOptions{})
// since majority of disks are not available, metaquorum is not achieved and hence errErasureWriteQuorum error // since majority of disks are not available, metaquorum is not achieved and hence errErasureWriteQuorum error
if err != toObjectErr(errErasureWriteQuorum, bucket, object) { if !errors.Is(err, errErasureWriteQuorum) {
t.Errorf("Expected deleteObject to fail with %v, but failed with %v", toObjectErr(errErasureWriteQuorum, bucket, object), err) t.Errorf("Expected deleteObject to fail with %v, but failed with %v", toObjectErr(errErasureWriteQuorum, bucket, object), err)
} }
} }
@ -381,7 +382,7 @@ func TestPutObjectNoQuorum(t *testing.T) {
z.serverSets[0].erasureDisksMu.Unlock() z.serverSets[0].erasureDisksMu.Unlock()
// Upload new content to same object "object" // Upload new content to same object "object"
_, err = obj.PutObject(ctx, bucket, object, mustGetPutObjReader(t, bytes.NewReader([]byte("abcd")), int64(len("abcd")), "", ""), opts) _, err = obj.PutObject(ctx, bucket, object, mustGetPutObjReader(t, bytes.NewReader([]byte("abcd")), int64(len("abcd")), "", ""), opts)
if err != toObjectErr(errErasureWriteQuorum, bucket, object) { if !errors.Is(err, errErasureWriteQuorum) {
t.Errorf("Expected putObject to fail with %v, but failed with %v", toObjectErr(errErasureWriteQuorum, bucket, object), err) t.Errorf("Expected putObject to fail with %v, but failed with %v", toObjectErr(errErasureWriteQuorum, bucket, object), err)
} }
} }

@ -130,9 +130,27 @@ func toObjectErr(err error, params ...string) error {
} }
} }
case errErasureReadQuorum: case errErasureReadQuorum:
err = InsufficientReadQuorum{} if len(params) == 1 {
err = InsufficientReadQuorum{
Bucket: params[0],
}
} else if len(params) >= 2 {
err = InsufficientReadQuorum{
Bucket: params[0],
Object: params[1],
}
}
case errErasureWriteQuorum: case errErasureWriteQuorum:
err = InsufficientWriteQuorum{} if len(params) == 1 {
err = InsufficientWriteQuorum{
Bucket: params[0],
}
} else if len(params) >= 2 {
err = InsufficientWriteQuorum{
Bucket: params[0],
Object: params[1],
}
}
case io.ErrUnexpectedEOF, io.ErrShortWrite: case io.ErrUnexpectedEOF, io.ErrShortWrite:
err = IncompleteBody{} err = IncompleteBody{}
case context.Canceled, context.DeadlineExceeded: case context.Canceled, context.DeadlineExceeded:
@ -163,10 +181,10 @@ func (e SlowDown) Error() string {
} }
// InsufficientReadQuorum storage cannot satisfy quorum for read operation. // InsufficientReadQuorum storage cannot satisfy quorum for read operation.
type InsufficientReadQuorum struct{} type InsufficientReadQuorum GenericError
func (e InsufficientReadQuorum) Error() string { func (e InsufficientReadQuorum) Error() string {
return "Storage resources are insufficient for the read operation." return "Storage resources are insufficient for the read operation " + e.Bucket + "/" + e.Object
} }
// Unwrap the error. // Unwrap the error.
@ -175,10 +193,10 @@ func (e InsufficientReadQuorum) Unwrap() error {
} }
// InsufficientWriteQuorum storage cannot satisfy quorum for write operation. // InsufficientWriteQuorum storage cannot satisfy quorum for write operation.
type InsufficientWriteQuorum struct{} type InsufficientWriteQuorum GenericError
func (e InsufficientWriteQuorum) Error() string { func (e InsufficientWriteQuorum) Error() string {
return "Storage resources are insufficient for the write operation." return "Storage resources are insufficient for the write operation " + e.Bucket + "/" + e.Object
} }
// Unwrap the error. // Unwrap the error.
@ -194,6 +212,11 @@ type GenericError struct {
Err error Err error
} }
// Unwrap the error to its underlying error.
func (e GenericError) Unwrap() error {
return e.Err
}
// InvalidArgument incorrect input argument // InvalidArgument incorrect input argument
type InvalidArgument GenericError type InvalidArgument GenericError

@ -21,6 +21,7 @@ import (
"context" "context"
"crypto/md5" "crypto/md5"
"encoding/hex" "encoding/hex"
"errors"
"io/ioutil" "io/ioutil"
"os" "os"
"path" "path"
@ -296,7 +297,7 @@ func testObjectAPIPutObjectDiskNotFound(obj ObjectLayer, instanceType string, di
int64(len("mnop")), int64(len("mnop")),
false, false,
"", "",
InsufficientWriteQuorum{}, errErasureWriteQuorum,
} }
_, actualErr := obj.PutObject(context.Background(), testCase.bucketName, testCase.objName, mustGetPutObjReader(t, bytes.NewReader(testCase.inputData), testCase.intputDataSize, testCase.inputMeta["etag"], sha256sum), ObjectOptions{UserDefined: testCase.inputMeta}) _, actualErr := obj.PutObject(context.Background(), testCase.bucketName, testCase.objName, mustGetPutObjReader(t, bytes.NewReader(testCase.inputData), testCase.intputDataSize, testCase.inputMeta["etag"], sha256sum), ObjectOptions{UserDefined: testCase.inputMeta})
@ -305,7 +306,7 @@ func testObjectAPIPutObjectDiskNotFound(obj ObjectLayer, instanceType string, di
} }
// Failed as expected, but does it fail for the expected reason. // Failed as expected, but does it fail for the expected reason.
if actualErr != nil && !testCase.shouldPass { if actualErr != nil && !testCase.shouldPass {
if testCase.expectedError.Error() != actualErr.Error() { if !errors.Is(actualErr, testCase.expectedError) {
t.Errorf("Test %d: %s: Expected to fail with error \"%s\", but instead failed with error \"%s\" instead.", len(testCases)+1, instanceType, testCase.expectedError.Error(), actualErr.Error()) t.Errorf("Test %d: %s: Expected to fail with error \"%s\", but instead failed with error \"%s\" instead.", len(testCases)+1, instanceType, testCase.expectedError.Error(), actualErr.Error())
} }
} }

@ -19,6 +19,7 @@ package rest
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"math/rand" "math/rand"
@ -118,7 +119,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
resp, err := c.httpClient.Do(req) resp, err := c.httpClient.Do(req)
if err != nil { if err != nil {
if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) { if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
logger.Info("Marking %s temporary offline; caused by %v", c.url.String(), err) logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
c.MarkOffline() c.MarkOffline()
} }
return nil, &NetworkError{err} return nil, &NetworkError{err}
@ -141,7 +142,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
// fully it should make sure to respond with '412' // fully it should make sure to respond with '412'
// instead, see cmd/storage-rest-server.go for ideas. // instead, see cmd/storage-rest-server.go for ideas.
if c.HealthCheckFn != nil && resp.StatusCode == http.StatusPreconditionFailed { if c.HealthCheckFn != nil && resp.StatusCode == http.StatusPreconditionFailed {
logger.Info("Marking %s temporary offline; caused by PreconditionFailed.", c.url.String()) logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by PreconditionFailed with disk ID mismatch", c.url.String()))
c.MarkOffline() c.MarkOffline()
} }
defer xhttp.DrainBody(resp.Body) defer xhttp.DrainBody(resp.Body)
@ -149,7 +150,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
b, err := ioutil.ReadAll(io.LimitReader(resp.Body, c.MaxErrResponseSize)) b, err := ioutil.ReadAll(io.LimitReader(resp.Body, c.MaxErrResponseSize))
if err != nil { if err != nil {
if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) { if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
logger.Info("Marking %s temporary offline; caused by %v", c.url.String(), err) logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
c.MarkOffline() c.MarkOffline()
} }
return nil, err return nil, err

@ -118,6 +118,7 @@ type HealResultItem struct {
Type HealItemType `json:"type"` Type HealItemType `json:"type"`
Bucket string `json:"bucket"` Bucket string `json:"bucket"`
Object string `json:"object"` Object string `json:"object"`
VersionID string `json:"versionId"`
Detail string `json:"detail"` Detail string `json:"detail"`
ParityBlocks int `json:"parityBlocks,omitempty"` ParityBlocks int `json:"parityBlocks,omitempty"`
DataBlocks int `json:"dataBlocks,omitempty"` DataBlocks int `json:"dataBlocks,omitempty"`

Loading…
Cancel
Save