fix: background heal to call HealFormat only if needed (#9491)

In large setups this avoids unnecessary data transfer
across nodes and potential locks.

This PR also optimizes heal result channel, which should
be avoided for each queueHealTask as its expensive
to create/close channels for large number of objects.
master
Harshavardhana 4 years ago committed by GitHub
parent 5205c9591f
commit 71ce63f79c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 85
      cmd/admin-heal-ops.go
  2. 7
      cmd/background-heal-ops.go
  3. 7
      cmd/background-newdisks-heal-ops.go
  4. 3
      cmd/global-heal.go
  5. 14
      pkg/madmin/heal-commands.go

@ -308,8 +308,8 @@ func (ahs *allHealState) PopHealStatusJSON(path string,
// healSource denotes single entity and heal option. // healSource denotes single entity and heal option.
type healSource struct { type healSource struct {
path string // entity path (format, buckets, objects) to heal path string // entity path (format, buckets, objects) to heal
opts *madmin.HealOpts // optional heal option overrides default setting opts madmin.HealOpts // optional heal option overrides default setting
} }
// healSequence - state for each heal sequence initiated on the // healSequence - state for each heal sequence initiated on the
@ -321,9 +321,12 @@ type healSequence struct {
// path is just pathJoin(bucket, objPrefix) // path is just pathJoin(bucket, objPrefix)
path string path string
// List of entities (format, buckets, objects) to heal // A channel of entities (format, buckets, objects) to heal
sourceCh chan healSource sourceCh chan healSource
// A channel of entities with heal result
respCh chan healResult
// Report healing progress // Report healing progress
reportProgress bool reportProgress bool
@ -385,6 +388,7 @@ func newHealSequence(bucket, objPrefix, clientAddr string,
ctx := logger.SetReqInfo(GlobalContext, reqInfo) ctx := logger.SetReqInfo(GlobalContext, reqInfo)
return &healSequence{ return &healSequence{
respCh: make(chan healResult),
bucket: bucket, bucket: bucket,
objPrefix: objPrefix, objPrefix: objPrefix,
path: pathJoin(bucket, objPrefix), path: pathJoin(bucket, objPrefix),
@ -636,53 +640,58 @@ func (h *healSequence) healSequenceStart() {
} }
func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItemType) error { func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItemType) error {
var respCh = make(chan healResult)
defer close(respCh)
// Send heal request // Send heal request
task := healTask{ task := healTask{
path: source.path, path: source.path,
responseCh: respCh,
opts: h.settings, opts: h.settings,
responseCh: h.respCh,
} }
if source.opts != nil { if !source.opts.Equal(h.settings) {
task.opts = *source.opts task.opts = source.opts
} }
globalBackgroundHealRoutine.queueHealTask(task) globalBackgroundHealRoutine.queueHealTask(task)
// Wait for answer and push result to the client
res := <-respCh select {
if !h.reportProgress { case res := <-h.respCh:
h.mutex.Lock() if !h.reportProgress {
defer h.mutex.Unlock() h.mutex.Lock()
defer h.mutex.Unlock()
// Progress is not reported in case of background heal processing.
// Instead we increment relevant counter based on the heal result // Progress is not reported in case of background heal processing.
// for prometheus reporting. // Instead we increment relevant counter based on the heal result
if res.err != nil && !isErrObjectNotFound(res.err) { // for prometheus reporting.
for _, d := range res.result.After.Drives { if res.err != nil && !isErrObjectNotFound(res.err) {
// For failed items we report the endpoint and drive state for _, d := range res.result.After.Drives {
// This will help users take corrective actions for drives // For failed items we report the endpoint and drive state
h.healFailedItemsMap[d.Endpoint+","+d.State]++ // This will help users take corrective actions for drives
h.healFailedItemsMap[d.Endpoint+","+d.State]++
}
} else {
// Only object type reported for successful healing
h.healedItemsMap[res.result.Type]++
} }
} else {
// Only object type reported for successful healing
h.healedItemsMap[res.result.Type]++
}
return nil
}
res.result.Type = healType
if res.err != nil {
// Object might have been deleted, by the time heal
// was attempted, we should ignore this object and return success.
if isErrObjectNotFound(res.err) {
return nil return nil
} }
// Only report object error res.result.Type = healType
if healType != madmin.HealItemObject { if res.err != nil {
return res.err // Object might have been deleted, by the time heal
// was attempted, we should ignore this object and return success.
if isErrObjectNotFound(res.err) {
return nil
}
// Only report object error
if healType != madmin.HealItemObject {
return res.err
}
res.result.Detail = res.err.Error()
} }
res.result.Detail = res.err.Error() return h.pushHealResultItem(res.result)
case <-h.ctx.Done():
return nil
case <-h.traverseAndHealDoneCh:
return nil
} }
return h.pushHealResultItem(res.result)
} }
func (h *healSequence) healItemsFromSourceCh() error { func (h *healSequence) healItemsFromSourceCh() error {

@ -18,7 +18,6 @@ package cmd
import ( import (
"context" "context"
"path"
"time" "time"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
@ -89,10 +88,10 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
case bucket != "" && object != "": case bucket != "" && object != "":
res, err = objAPI.HealObject(ctx, bucket, object, task.opts) res, err = objAPI.HealObject(ctx, bucket, object, task.opts)
} }
ObjectPathUpdated(path.Join(bucket, object)) if task.path != slashSeparator && task.path != nopHeal {
if task.responseCh != nil { ObjectPathUpdated(task.path)
task.responseCh <- healResult{result: res, err: err}
} }
task.responseCh <- healResult{result: res, err: err}
case <-h.doneCh: case <-h.doneCh:
return return
case <-ctx.Done(): case <-ctx.Done():

@ -57,6 +57,7 @@ func monitorLocalDisksAndHeal(ctx context.Context, objAPI ObjectLayer) {
case <-time.After(defaultMonitorNewDiskInterval): case <-time.After(defaultMonitorNewDiskInterval):
// Attempt a heal as the server starts-up first. // Attempt a heal as the server starts-up first.
localDisksInZoneHeal := make([]Endpoints, len(z.zones)) localDisksInZoneHeal := make([]Endpoints, len(z.zones))
var healNewDisks bool
for i, ep := range globalEndpoints { for i, ep := range globalEndpoints {
localDisksToHeal := Endpoints{} localDisksToHeal := Endpoints{}
for _, endpoint := range ep.Endpoints { for _, endpoint := range ep.Endpoints {
@ -74,6 +75,12 @@ func monitorLocalDisksAndHeal(ctx context.Context, objAPI ObjectLayer) {
continue continue
} }
localDisksInZoneHeal[i] = localDisksToHeal localDisksInZoneHeal[i] = localDisksToHeal
healNewDisks = true
}
// Reformat disks only if needed.
if !healNewDisks {
continue
} }
// Reformat disks // Reformat disks

@ -48,6 +48,7 @@ func newBgHealSequence(numDisks int) *healSequence {
return &healSequence{ return &healSequence{
sourceCh: make(chan healSource), sourceCh: make(chan healSource),
respCh: make(chan healResult),
startTime: UTCNow(), startTime: UTCNow(),
clientToken: bgHealingUUID, clientToken: bgHealingUUID,
settings: hs, settings: hs,
@ -125,7 +126,7 @@ func deepHealObject(objectPath string) {
bgSeq.sourceCh <- healSource{ bgSeq.sourceCh <- healSource{
path: objectPath, path: objectPath,
opts: &madmin.HealOpts{ScanMode: madmin.HealDeepScan}, opts: madmin.HealOpts{ScanMode: madmin.HealDeepScan},
} }
} }

@ -45,6 +45,20 @@ type HealOpts struct {
ScanMode HealScanMode `json:"scanMode"` ScanMode HealScanMode `json:"scanMode"`
} }
// Equal returns true if no is same as o.
func (o HealOpts) Equal(no HealOpts) bool {
if o.Recursive != no.Recursive {
return false
}
if o.DryRun != no.DryRun {
return false
}
if o.Remove != no.Remove {
return false
}
return o.ScanMode == no.ScanMode
}
// HealStartSuccess - holds information about a successfully started // HealStartSuccess - holds information about a successfully started
// heal operation // heal operation
type HealStartSuccess struct { type HealStartSuccess struct {

Loading…
Cancel
Save