From 8a291e1dc0b7b260859e03360018bd101a45aae8 Mon Sep 17 00:00:00 2001
From: Harshavardhana <harsha@minio.io>
Date: Wed, 2 Sep 2020 22:54:56 -0700
Subject: [PATCH] Cluster healthcheck improvements (#10408)

- do not fail the healthcheck if heal status
  was not obtained from one of the nodes,
  if many nodes fail then report this as a
  catastrophic error.
- add "x-minio-write-quorum" value to match
  the write tolerance supported by server.
- admin info now states if a drive is healing
  where madmin.Disk.Healing is set to true
  and madmin.Disk.State is "ok"
---
 cmd/admin-handlers.go       | 35 +++++++++++++++++++++++++++--------
 cmd/erasure-zones.go        | 31 ++++++++++++++++---------------
 cmd/erasure.go              |  3 ++-
 cmd/healthcheck-handler.go  |  4 +++-
 pkg/madmin/info-commands.go |  1 +
 5 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go
index 5273c5800..0940ef27f 100644
--- a/cmd/admin-handlers.go
+++ b/cmd/admin-handlers.go
@@ -799,14 +799,12 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) {
 	keepConnLive(w, r, respCh)
 }
 
-func getAggregatedBackgroundHealState(ctx context.Context, failOnErr bool) (madmin.BgHealState, error) {
+func getAggregatedBackgroundHealState(ctx context.Context) (madmin.BgHealState, error) {
 	var bgHealStates []madmin.BgHealState
 
 	localHealState, ok := getLocalBackgroundHealStatus()
 	if !ok {
-		if failOnErr {
-			return madmin.BgHealState{}, errServerNotInitialized
-		}
+		return madmin.BgHealState{}, errServerNotInitialized
 	}
 
 	// Get local heal status first
@@ -815,14 +813,16 @@ func getAggregatedBackgroundHealState(ctx context.Context, failOnErr bool) (madm
 	if globalIsDistErasure {
 		// Get heal status from other peers
 		peersHealStates, nerrs := globalNotificationSys.BackgroundHealStatus()
+		var errCount int
 		for _, nerr := range nerrs {
 			if nerr.Err != nil {
-				if failOnErr {
-					return madmin.BgHealState{}, nerr.Err
-				}
 				logger.LogIf(ctx, nerr.Err)
+				errCount++
 			}
 		}
+		if errCount == len(nerrs) {
+			return madmin.BgHealState{}, fmt.Errorf("all remote servers failed to report heal status, cluster is unhealthy")
+		}
 		bgHealStates = append(bgHealStates, peersHealStates...)
 	}
 
@@ -868,7 +868,12 @@ func (a adminAPIHandlers) BackgroundHealStatusHandler(w http.ResponseWriter, r *
 		return
 	}
 
-	aggregateHealStateResult, _ := getAggregatedBackgroundHealState(r.Context(), false)
+	aggregateHealStateResult, err := getAggregatedBackgroundHealState(r.Context())
+	if err != nil {
+		writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
+		return
+	}
+
 	if err := json.NewEncoder(w).Encode(aggregateHealStateResult); err != nil {
 		writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
 		return
@@ -1489,20 +1494,34 @@ func (a adminAPIHandlers) ServerInfoHandler(w http.ResponseWriter, r *http.Reque
 		Notifications: notifyTarget,
 	}
 
+	// Collect any disk healing.
+	healing, _ := getAggregatedBackgroundHealState(ctx)
+	healDisks := make(map[string]struct{}, len(healing.HealDisks))
+	for _, disk := range healing.HealDisks {
+		healDisks[disk] = struct{}{}
+	}
+
 	// find all disks which belong to each respective endpoints
 	for i := range servers {
 		for _, disk := range storageInfo.Disks {
 			if strings.Contains(disk.Endpoint, servers[i].Endpoint) {
+				if _, ok := healDisks[disk.Endpoint]; ok {
+					disk.Healing = true
+				}
 				servers[i].Disks = append(servers[i].Disks, disk)
 			}
 		}
 	}
+
 	// add all the disks local to this server.
 	for _, disk := range storageInfo.Disks {
 		if disk.DrivePath == "" && disk.Endpoint == "" {
 			continue
 		}
 		if disk.Endpoint == disk.DrivePath {
+			if _, ok := healDisks[disk.Endpoint]; ok {
+				disk.Healing = true
+			}
 			servers[len(servers)-1].Disks = append(servers[len(servers)-1].Disks, disk)
 		}
 	}
diff --git a/cmd/erasure-zones.go b/cmd/erasure-zones.go
index b4adadfdf..17d7461b5 100644
--- a/cmd/erasure-zones.go
+++ b/cmd/erasure-zones.go
@@ -2045,17 +2045,18 @@ func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthRes
 
 	reqInfo := (&logger.ReqInfo{}).AppendTags("maintenance", strconv.FormatBool(opts.Maintenance))
 
+	parityDrives := globalStorageClass.GetParityForSC(storageclass.STANDARD)
+	diskCount := z.SetDriveCount()
+	if parityDrives == 0 {
+		parityDrives = getDefaultParityBlocks(diskCount)
+	}
+	dataDrives := diskCount - parityDrives
+	writeQuorum := dataDrives
+	if dataDrives == parityDrives {
+		writeQuorum++
+	}
+
 	for zoneIdx := range erasureSetUpCount {
-		parityDrives := globalStorageClass.GetParityForSC(storageclass.STANDARD)
-		diskCount := z.zones[zoneIdx].setDriveCount
-		if parityDrives == 0 {
-			parityDrives = getDefaultParityBlocks(diskCount)
-		}
-		dataDrives := diskCount - parityDrives
-		writeQuorum := dataDrives
-		if dataDrives == parityDrives {
-			writeQuorum++
-		}
 		for setIdx := range erasureSetUpCount[zoneIdx] {
 			if erasureSetUpCount[zoneIdx][setIdx] < writeQuorum {
 				logger.LogIf(logger.SetReqInfo(ctx, reqInfo),
@@ -2075,14 +2076,15 @@ func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthRes
 	// to look at the healing side of the code.
 	if !opts.Maintenance {
 		return HealthResult{
-			Healthy: true,
+			Healthy:     true,
+			WriteQuorum: writeQuorum,
 		}
 	}
 
 	// check if local disks are being healed, if they are being healed
 	// we need to tell healthy status as 'false' so that this server
 	// is not taken down for maintenance
-	aggHealStateResult, err := getAggregatedBackgroundHealState(ctx, true)
+	aggHealStateResult, err := getAggregatedBackgroundHealState(ctx)
 	if err != nil {
 		logger.LogIf(logger.SetReqInfo(ctx, reqInfo), fmt.Errorf("Unable to verify global heal status: %w", err))
 		return HealthResult{
@@ -2094,11 +2096,10 @@ func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthRes
 		logger.LogIf(logger.SetReqInfo(ctx, reqInfo), fmt.Errorf("Total drives to be healed %d", len(aggHealStateResult.HealDisks)))
 	}
 
-	healthy := len(aggHealStateResult.HealDisks) == 0
-
 	return HealthResult{
-		Healthy:       healthy,
+		Healthy:       len(aggHealStateResult.HealDisks) == 0,
 		HealingDrives: len(aggHealStateResult.HealDisks),
+		WriteQuorum:   writeQuorum,
 	}
 }
 
diff --git a/cmd/erasure.go b/cmd/erasure.go
index 2847e4ddc..755ebcd52 100644
--- a/cmd/erasure.go
+++ b/cmd/erasure.go
@@ -266,10 +266,11 @@ func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []Buc
 	}
 
 	// Collect any disk healing.
-	healing, err := getAggregatedBackgroundHealState(ctx, true)
+	healing, err := getAggregatedBackgroundHealState(ctx)
 	if err != nil {
 		return err
 	}
+
 	healDisks := make(map[string]struct{}, len(healing.HealDisks))
 	for _, disk := range healing.HealDisks {
 		healDisks[disk] = struct{}{}
diff --git a/cmd/healthcheck-handler.go b/cmd/healthcheck-handler.go
index 749080bdd..795d1ee87 100644
--- a/cmd/healthcheck-handler.go
+++ b/cmd/healthcheck-handler.go
@@ -38,6 +38,9 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
 
 	opts := HealthOptions{Maintenance: r.URL.Query().Get("maintenance") == "true"}
 	result := objLayer.Health(ctx, opts)
+	if result.WriteQuorum > 0 {
+		w.Header().Set("X-Minio-Write-Quorum", strconv.Itoa(result.WriteQuorum))
+	}
 	if !result.Healthy {
 		// return how many drives are being healed if any
 		w.Header().Set("X-Minio-Healing-Drives", strconv.Itoa(result.HealingDrives))
@@ -51,7 +54,6 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
 		}
 		return
 	}
-
 	writeResponse(w, http.StatusOK, nil, mimeNone)
 }
 
diff --git a/pkg/madmin/info-commands.go b/pkg/madmin/info-commands.go
index c481dfb8c..d85018b90 100644
--- a/pkg/madmin/info-commands.go
+++ b/pkg/madmin/info-commands.go
@@ -272,6 +272,7 @@ type Disk struct {
 	Endpoint        string  `json:"endpoint,omitempty"`
 	RootDisk        bool    `json:"rootDisk,omitempty"`
 	DrivePath       string  `json:"path,omitempty"`
+	Healing         bool    `json:"healing,omitempty"`
 	State           string  `json:"state,omitempty"`
 	UUID            string  `json:"uuid,omitempty"`
 	Model           string  `json:"model,omitempty"`