rootDisk on containers can have different device Id (#10259)

use `/etc/hosts` instead of `/` to check for common
device id, if the device is same for `/etc/hosts`
and the --bind mount to detect root disks.

Bonus enhance healthcheck logging by adding maintenance
tags, for all messages.
master
Harshavardhana 4 years ago committed by GitHub
parent 038d91feaa
commit 30da442a85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 12
      cmd/erasure-zones.go
  2. 2
      pkg/disk/root_disk_unix.go

@ -22,6 +22,7 @@ import (
"io" "io"
"math/rand" "math/rand"
"net/http" "net/http"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -2063,6 +2064,8 @@ func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthRes
} }
} }
reqInfo := (&logger.ReqInfo{}).AppendTags("maintenance", strconv.FormatBool(opts.Maintenance))
for zoneIdx := range erasureSetUpCount { for zoneIdx := range erasureSetUpCount {
parityDrives := globalStorageClass.GetParityForSC(storageclass.STANDARD) parityDrives := globalStorageClass.GetParityForSC(storageclass.STANDARD)
diskCount := z.zones[zoneIdx].drivesPerSet diskCount := z.zones[zoneIdx].drivesPerSet
@ -2076,8 +2079,9 @@ func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthRes
} }
for setIdx := range erasureSetUpCount[zoneIdx] { for setIdx := range erasureSetUpCount[zoneIdx] {
if erasureSetUpCount[zoneIdx][setIdx] < writeQuorum { if erasureSetUpCount[zoneIdx][setIdx] < writeQuorum {
logger.LogIf(ctx, fmt.Errorf("Write quorum lost on zone: %d, set: %d, expected write quorum: %d", logger.LogIf(logger.SetReqInfo(ctx, reqInfo),
zoneIdx, setIdx, writeQuorum)) fmt.Errorf("Write quorum may be lost on zone: %d, set: %d, expected write quorum: %d",
zoneIdx, setIdx, writeQuorum))
return HealthResult{ return HealthResult{
Healthy: false, Healthy: false,
ZoneID: zoneIdx, ZoneID: zoneIdx,
@ -2101,14 +2105,14 @@ func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthRes
// is not taken down for maintenance // is not taken down for maintenance
aggHealStateResult, err := getAggregatedBackgroundHealState(ctx, true) aggHealStateResult, err := getAggregatedBackgroundHealState(ctx, true)
if err != nil { if err != nil {
logger.LogIf(ctx, fmt.Errorf("Unable to verify global heal status: %w", err)) logger.LogIf(logger.SetReqInfo(ctx, reqInfo), fmt.Errorf("Unable to verify global heal status: %w", err))
return HealthResult{ return HealthResult{
Healthy: false, Healthy: false,
} }
} }
if len(aggHealStateResult.HealDisks) > 0 { if len(aggHealStateResult.HealDisks) > 0 {
logger.LogIf(ctx, fmt.Errorf("Total drives to be healed %d", len(aggHealStateResult.HealDisks))) logger.LogIf(logger.SetReqInfo(ctx, reqInfo), fmt.Errorf("Total drives to be healed %d", len(aggHealStateResult.HealDisks)))
} }
healthy := len(aggHealStateResult.HealDisks) == 0 healthy := len(aggHealStateResult.HealDisks) == 0

@ -31,7 +31,7 @@ func IsRootDisk(diskPath string) (bool, error) {
if err != nil { if err != nil {
return false, err return false, err
} }
rootInfo, err := os.Stat("/") rootInfo, err := os.Stat("/etc/hosts")
if err != nil { if err != nil {
return false, err return false, err
} }

Loading…
Cancel
Save