diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index 913a20f4f..b9782a440 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -1633,9 +1633,10 @@ func (s *erasureSets) GetMetrics(ctx context.Context) (*Metrics, error) { return &Metrics{}, NotImplemented{} } -// IsReady - Returns true if atleast n/2 disks (read quorum) are online -func (s *erasureSets) IsReady(_ context.Context) bool { - return false +// Health shouldn't be called directly - will panic +func (s *erasureSets) Health(ctx context.Context, _ HealthOptions) HealthResult { + logger.CriticalIf(ctx, NotImplemented{}) + return HealthResult{} } // maintainMRFList gathers the list of successful partial uploads diff --git a/cmd/erasure-zones.go b/cmd/erasure-zones.go index 087067e29..70ed8e3ea 100644 --- a/cmd/erasure-zones.go +++ b/cmd/erasure-zones.go @@ -2007,29 +2007,49 @@ func (z *erasureZones) getZoneAndSet(id string) (int, int, error) { return 0, 0, fmt.Errorf("DiskID(%s) %w", id, errDiskNotFound) } -// IsReady - Returns true, when all the erasure sets are writable. -func (z *erasureZones) IsReady(ctx context.Context) bool { +// HealthOptions takes input options to return sepcific information +type HealthOptions struct { + Maintenance bool +} + +// HealthResult returns the current state of the system, also +// additionally with any specific heuristic information which +// was queried +type HealthResult struct { + Healthy bool + ZoneID, SetID int + WriteQuorum int +} + +// Health - returns current status of the object layer health, +// provides if write access exists across sets, additionally +// can be used to query scenarios if health may be lost +// if this node is taken down by an external orchestrator. +func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthResult { erasureSetUpCount := make([][]int, len(z.zones)) for i := range z.zones { erasureSetUpCount[i] = make([]int, len(z.zones[i].sets)) } diskIDs := globalNotificationSys.GetLocalDiskIDs(ctx) + if !opts.Maintenance { + diskIDs = append(diskIDs, getLocalDiskIDs(z)) + } - diskIDs = append(diskIDs, getLocalDiskIDs(z)...) - - for _, id := range diskIDs { - zoneIdx, setIdx, err := z.getZoneAndSet(id) - if err != nil { - logger.LogIf(ctx, err) - continue + for _, localDiskIDs := range diskIDs { + for _, id := range localDiskIDs { + zoneIdx, setIdx, err := z.getZoneAndSet(id) + if err != nil { + logger.LogIf(ctx, err) + continue + } + erasureSetUpCount[zoneIdx][setIdx]++ } - erasureSetUpCount[zoneIdx][setIdx]++ } for zoneIdx := range erasureSetUpCount { parityDrives := globalStorageClass.GetParityForSC(storageclass.STANDARD) - diskCount := len(z.zones[zoneIdx].format.Erasure.Sets[0]) + diskCount := z.zones[zoneIdx].drivesPerSet if parityDrives == 0 { parityDrives = getDefaultParityBlocks(diskCount) } @@ -2042,11 +2062,18 @@ func (z *erasureZones) IsReady(ctx context.Context) bool { if erasureSetUpCount[zoneIdx][setIdx] < writeQuorum { logger.LogIf(ctx, fmt.Errorf("Write quorum lost on zone: %d, set: %d, expected write quorum: %d", zoneIdx, setIdx, writeQuorum)) - return false + return HealthResult{ + Healthy: false, + ZoneID: zoneIdx, + SetID: setIdx, + WriteQuorum: writeQuorum, + } } } } - return true + return HealthResult{ + Healthy: true, + } } // PutObjectTags - replace or add tags to an existing object diff --git a/cmd/erasure.go b/cmd/erasure.go index f7a37d279..2ab4a191b 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -391,8 +391,8 @@ func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []Buc return nil } -// IsReady - shouldn't be called will panic. -func (er erasureObjects) IsReady(ctx context.Context) bool { +// Health shouldn't be called directly - will panic +func (er erasureObjects) Health(ctx context.Context, _ HealthOptions) HealthResult { logger.CriticalIf(ctx, NotImplemented{}) - return true + return HealthResult{} } diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index fe3b1773e..16255bc81 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -1557,11 +1557,12 @@ func (fs *FSObjects) IsTaggingSupported() bool { return true } -// IsReady - Check if the backend disk is ready to accept traffic. -func (fs *FSObjects) IsReady(_ context.Context) bool { +// Health returns health of the object layer +func (fs *FSObjects) Health(ctx context.Context, opts HealthOptions) HealthResult { if _, err := os.Stat(fs.fsPath); err != nil { - return false + return HealthResult{} + } + return HealthResult{ + Healthy: newObjectLayerFn() != nil, } - - return newObjectLayerFn() != nil } diff --git a/cmd/gateway-unsupported.go b/cmd/gateway-unsupported.go index f626675f5..70680b58e 100644 --- a/cmd/gateway-unsupported.go +++ b/cmd/gateway-unsupported.go @@ -250,7 +250,7 @@ func (a GatewayUnsupported) IsCompressionSupported() bool { return false } -// IsReady - No Op. -func (a GatewayUnsupported) IsReady(_ context.Context) bool { - return false +// Health - No Op. +func (a GatewayUnsupported) Health(_ context.Context, _ HealthOptions) HealthResult { + return HealthResult{} } diff --git a/cmd/gateway/azure/gateway-azure.go b/cmd/gateway/azure/gateway-azure.go index 574bf3c57..98768bf40 100644 --- a/cmd/gateway/azure/gateway-azure.go +++ b/cmd/gateway/azure/gateway-azure.go @@ -1436,8 +1436,3 @@ func (a *azureObjects) DeleteBucketPolicy(ctx context.Context, bucket string) er func (a *azureObjects) IsCompressionSupported() bool { return false } - -// IsReady returns whether the layer is ready to take requests. -func (a *azureObjects) IsReady(ctx context.Context) bool { - return minio.IsBackendOnline(ctx, a.httpClient, a.endpoint) -} diff --git a/cmd/gateway/gcs/gateway-gcs.go b/cmd/gateway/gcs/gateway-gcs.go index 2b3885c3f..1bbb7a960 100644 --- a/cmd/gateway/gcs/gateway-gcs.go +++ b/cmd/gateway/gcs/gateway-gcs.go @@ -1508,8 +1508,3 @@ func (l *gcsGateway) DeleteBucketPolicy(ctx context.Context, bucket string) erro func (l *gcsGateway) IsCompressionSupported() bool { return false } - -// IsReady returns whether the layer is ready to take requests. -func (l *gcsGateway) IsReady(ctx context.Context) bool { - return minio.IsBackendOnline(ctx, l.httpClient, "https://storage.googleapis.com") -} diff --git a/cmd/gateway/hdfs/gateway-hdfs.go b/cmd/gateway/hdfs/gateway-hdfs.go index 5e860b0b6..66924a19a 100644 --- a/cmd/gateway/hdfs/gateway-hdfs.go +++ b/cmd/gateway/hdfs/gateway-hdfs.go @@ -786,9 +786,3 @@ func (n *hdfsObjects) AbortMultipartUpload(ctx context.Context, bucket, object, } return hdfsToObjectErr(ctx, n.clnt.Remove(n.hdfsPathJoin(minioMetaTmpBucket, uploadID)), bucket, object, uploadID) } - -// IsReady returns whether the layer is ready to take requests. -func (n *hdfsObjects) IsReady(ctx context.Context) bool { - si, _ := n.StorageInfo(ctx, false) - return si.Backend.GatewayOnline -} diff --git a/cmd/gateway/nas/gateway-nas.go b/cmd/gateway/nas/gateway-nas.go index ed332b07a..eca3949cd 100644 --- a/cmd/gateway/nas/gateway-nas.go +++ b/cmd/gateway/nas/gateway-nas.go @@ -121,12 +121,6 @@ type nasObjects struct { minio.ObjectLayer } -// IsReady returns whether the layer is ready to take requests. -func (n *nasObjects) IsReady(ctx context.Context) bool { - si, _ := n.StorageInfo(ctx, false) - return si.Backend.GatewayOnline -} - func (n *nasObjects) IsTaggingSupported() bool { return true } diff --git a/cmd/gateway/s3/gateway-s3.go b/cmd/gateway/s3/gateway-s3.go index b5bd78f24..d947a2f81 100644 --- a/cmd/gateway/s3/gateway-s3.go +++ b/cmd/gateway/s3/gateway-s3.go @@ -755,11 +755,6 @@ func (l *s3Objects) IsEncryptionSupported() bool { return minio.GlobalKMS != nil || len(minio.GlobalGatewaySSE) > 0 } -// IsReady returns whether the layer is ready to take requests. -func (l *s3Objects) IsReady(ctx context.Context) bool { - return minio.IsBackendOnline(ctx, l.HTTPClient, l.Client.EndpointURL().String()) -} - func (l *s3Objects) IsTaggingSupported() bool { return true } diff --git a/cmd/healthcheck-handler.go b/cmd/healthcheck-handler.go index 859184653..8c3e5a9ef 100644 --- a/cmd/healthcheck-handler.go +++ b/cmd/healthcheck-handler.go @@ -35,8 +35,17 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) { ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getReadyDeadline()) defer cancel() - if !objLayer.IsReady(ctx) { - writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) + opts := HealthOptions{Maintenance: r.URL.Query().Get("maintenance") == "true"} + result := objLayer.Health(ctx, opts) + if !result.Healthy { + // As a maintenance call we are purposefully asked to be taken + // down, this is for orchestrators to know if we can safely + // take this server down, return appropriate error. + if opts.Maintenance { + writeResponse(w, http.StatusPreconditionFailed, nil, mimeNone) + } else { + writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) + } return } diff --git a/cmd/notification.go b/cmd/notification.go index 8f04ea8b1..5ad0d831a 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -1164,26 +1164,21 @@ func (sys *NotificationSys) ServerInfo() []madmin.ServerProperties { } // GetLocalDiskIDs - return disk ids of the local disks of the peers. -func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) []string { - var diskIDs []string - var mu sync.Mutex - +func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) (localDiskIDs [][]string) { + localDiskIDs = make([][]string, len(sys.peerClients)) var wg sync.WaitGroup - for _, client := range sys.peerClients { + for idx, client := range sys.peerClients { if client == nil { continue } wg.Add(1) - go func(client *peerRESTClient) { + go func(idx int, client *peerRESTClient) { defer wg.Done() - ids := client.GetLocalDiskIDs(ctx) - mu.Lock() - diskIDs = append(diskIDs, ids...) - mu.Unlock() - }(client) + localDiskIDs[idx] = client.GetLocalDiskIDs(ctx) + }(idx, client) } wg.Wait() - return diskIDs + return localDiskIDs } // NewNotificationSys - creates new notification system object. diff --git a/cmd/object-api-interface.go b/cmd/object-api-interface.go index e19ea5cab..8e4baf657 100644 --- a/cmd/object-api-interface.go +++ b/cmd/object-api-interface.go @@ -133,8 +133,8 @@ type ObjectLayer interface { // Backend related metrics GetMetrics(ctx context.Context) (*Metrics, error) - // Check Readiness - IsReady(ctx context.Context) bool + // Returns health of the backend + Health(ctx context.Context, opts HealthOptions) HealthResult // ObjectTagging operations PutObjectTags(context.Context, string, string, string, ObjectOptions) error diff --git a/docs/metrics/healthcheck/README.md b/docs/metrics/healthcheck/README.md index 658d28ff8..2028587ff 100644 --- a/docs/metrics/healthcheck/README.md +++ b/docs/metrics/healthcheck/README.md @@ -38,5 +38,35 @@ This probe always responds with '200 OK'. When readiness probe fails, Kubernetes ``` ### Cluster probe - This probe is not useful in almost all cases, this is meant for administrators to see if quorum is available in any given cluster. The reply is '200 OK' if cluster has quorum if not it returns '503 Service Unavailable'. + +``` +curl http://minio1:9001/minio/health/cluster +HTTP/1.1 503 Service Unavailable +Accept-Ranges: bytes +Content-Length: 0 +Content-Security-Policy: block-all-mixed-content +Server: MinIO/GOGET.GOGET +Vary: Origin +X-Amz-Bucket-Region: us-east-1 +X-Amz-Request-Id: 16239D6AB80EBECF +X-Xss-Protection: 1; mode=block +Date: Tue, 21 Jul 2020 00:36:14 GMT +``` + +#### Checking cluster health for maintenance +You may query the cluster probe endpoint to check if the node which received the request can be taken down for maintenance, if the server replies back '412 Precondition Failed' this means you will loose HA. '200 OK' means you are okay to proceed. + +``` +curl http://minio1:9001/minio/health/cluster?maintenance=true +HTTP/1.1 412 Precondition Failed +Accept-Ranges: bytes +Content-Length: 0 +Content-Security-Policy: block-all-mixed-content +Server: MinIO/GOGET.GOGET +Vary: Origin +X-Amz-Bucket-Region: us-east-1 +X-Amz-Request-Id: 16239D63820C6E76 +X-Xss-Protection: 1; mode=block +Date: Tue, 21 Jul 2020 00:35:43 GMT +```