diff --git a/cmd/config/api/api.go b/cmd/config/api/api.go index f485d77ca..3b054690e 100644 --- a/cmd/config/api/api.go +++ b/cmd/config/api/api.go @@ -31,17 +31,23 @@ import ( const ( apiRequestsMax = "requests_max" apiRequestsDeadline = "requests_deadline" - apiReadyDeadline = "ready_deadline" + apiClusterDeadline = "cluster_deadline" apiCorsAllowOrigin = "cors_allow_origin" apiRemoteTransportDeadline = "remote_transport_deadline" EnvAPIRequestsMax = "MINIO_API_REQUESTS_MAX" EnvAPIRequestsDeadline = "MINIO_API_REQUESTS_DEADLINE" - EnvAPIReadyDeadline = "MINIO_API_READY_DEADLINE" + EnvAPIClusterDeadline = "MINIO_API_CLUSTER_DEADLINE" EnvAPICorsAllowOrigin = "MINIO_API_CORS_ALLOW_ORIGIN" EnvAPIRemoteTransportDeadline = "MINIO_API_REMOTE_TRANSPORT_DEADLINE" ) +// Deprecated key and ENVs +const ( + apiReadyDeadline = "ready_deadline" + EnvAPIReadyDeadline = "MINIO_API_READY_DEADLINE" +) + // DefaultKVS - default storage class config var ( DefaultKVS = config.KVS{ @@ -54,7 +60,7 @@ var ( Value: "10s", }, config.KV{ - Key: apiReadyDeadline, + Key: apiClusterDeadline, Value: "10s", }, config.KV{ @@ -72,7 +78,7 @@ var ( type Config struct { RequestsMax int `json:"requests_max"` RequestsDeadline time.Duration `json:"requests_deadline"` - ReadyDeadline time.Duration `json:"ready_deadline"` + ClusterDeadline time.Duration `json:"cluster_deadline"` CorsAllowOrigin []string `json:"cors_allow_origin"` RemoteTransportDeadline time.Duration `json:"remote_transport_deadline"` } @@ -90,6 +96,9 @@ func (sCfg *Config) UnmarshalJSON(data []byte) error { // LookupConfig - lookup api config and override with valid environment settings if any. func LookupConfig(kvs config.KVS) (cfg Config, err error) { + // remove this since we have removed this already. + kvs.Delete(apiReadyDeadline) + if err = config.CheckValidKeys(config.APISubSys, kvs, DefaultKVS); err != nil { return cfg, err } @@ -109,7 +118,7 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) { return cfg, err } - readyDeadline, err := time.ParseDuration(env.Get(EnvAPIReadyDeadline, kvs.Get(apiReadyDeadline))) + clusterDeadline, err := time.ParseDuration(env.Get(EnvAPIClusterDeadline, kvs.Get(apiClusterDeadline))) if err != nil { return cfg, err } @@ -124,7 +133,7 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) { return Config{ RequestsMax: requestsMax, RequestsDeadline: requestsDeadline, - ReadyDeadline: readyDeadline, + ClusterDeadline: clusterDeadline, CorsAllowOrigin: corsAllowOrigin, RemoteTransportDeadline: remoteTransportDeadline, }, nil diff --git a/cmd/config/config.go b/cmd/config/config.go index ed4de76ec..efda46064 100644 --- a/cmd/config/config.go +++ b/cmd/config/config.go @@ -267,6 +267,16 @@ func (kvs KVS) Get(key string) string { return "" } +// Delete - deletes the key if present from the KV list. +func (kvs *KVS) Delete(key string) { + for i, kv := range *kvs { + if kv.Key == key { + *kvs = append((*kvs)[:i], (*kvs)[i+1:]...) + return + } + } +} + // Lookup - lookup a key in a list of KVS func (kvs KVS) Lookup(key string) (string, bool) { for _, kv := range kvs { diff --git a/cmd/handler-api.go b/cmd/handler-api.go index 8b4ecc021..f0a6885fc 100644 --- a/cmd/handler-api.go +++ b/cmd/handler-api.go @@ -31,7 +31,7 @@ type apiConfig struct { requestsDeadline time.Duration requestsPool chan struct{} - readyDeadline time.Duration + clusterDeadline time.Duration corsAllowOrigins []string } @@ -39,7 +39,7 @@ func (t *apiConfig) init(cfg api.Config, setDriveCount int) { t.mu.Lock() defer t.mu.Unlock() - t.readyDeadline = cfg.ReadyDeadline + t.clusterDeadline = cfg.ClusterDeadline t.corsAllowOrigins = cfg.CorsAllowOrigin var apiRequestsMaxPerNode int @@ -74,15 +74,15 @@ func (t *apiConfig) getCorsAllowOrigins() []string { return corsAllowOrigins } -func (t *apiConfig) getReadyDeadline() time.Duration { +func (t *apiConfig) getClusterDeadline() time.Duration { t.mu.RLock() defer t.mu.RUnlock() - if t.readyDeadline == 0 { + if t.clusterDeadline == 0 { return 10 * time.Second } - return t.readyDeadline + return t.clusterDeadline } func (t *apiConfig) getRequestsPool() (chan struct{}, <-chan time.Time) { diff --git a/cmd/healthcheck-handler.go b/cmd/healthcheck-handler.go index b99ed379e..8f1de552b 100644 --- a/cmd/healthcheck-handler.go +++ b/cmd/healthcheck-handler.go @@ -33,7 +33,7 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) { return } - ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getReadyDeadline()) + ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getClusterDeadline()) defer cancel() opts := HealthOptions{Maintenance: r.URL.Query().Get("maintenance") == "true"} diff --git a/docs/metrics/README.md b/docs/metrics/README.md index b2dda9d2e..e537c9e62 100644 --- a/docs/metrics/README.md +++ b/docs/metrics/README.md @@ -4,10 +4,10 @@ MinIO server exposes monitoring data over endpoints. Monitoring tools can pick t ### Healthcheck Probe -MinIO server has two healthcheck related un-authenticated endpoints, a liveness probe to indicate if server is working fine and a readiness probe to indicate if server is not accepting connections due to heavy load. +MinIO server has two healthcheck related un-authenticated endpoints, a liveness probe to indicate if server is responding, cluster probe to check if server can be taken down for maintenance. - Liveness probe available at `/minio/health/live` -- Readiness probe available at `/minio/health/ready` +- Cluster probe available at `/minio/health/cluster` Read more on how to use these endpoints in [MinIO healthcheck guide](https://github.com/minio/minio/blob/master/docs/metrics/healthcheck/README.md). diff --git a/docs/metrics/healthcheck/README.md b/docs/metrics/healthcheck/README.md index 6623daed8..3d2940aac 100644 --- a/docs/metrics/healthcheck/README.md +++ b/docs/metrics/healthcheck/README.md @@ -1,6 +1,6 @@ ## MinIO Healthcheck -MinIO server exposes three un-authenticated, healthcheck endpoints liveness probe, readiness probe and a cluster probe at `/minio/health/live`, `/minio/health/ready` and `/minio/health/cluster` respectively. +MinIO server exposes three un-authenticated, healthcheck endpoints liveness probe and a cluster probe at `/minio/health/live`, `/minio/health/ready` and `/minio/health/cluster` respectively. ### Liveness probe @@ -12,31 +12,13 @@ This probe always responds with '200 OK'. When liveness probe fails, Kubernetes path: /minio/health/live port: 9000 scheme: HTTP - initialDelaySeconds: 3 - periodSeconds: 1 - timeoutSeconds: 1 + initialDelaySeconds: 120 + periodSeconds: 15 + timeoutSeconds: 10 successThreshold: 1 failureThreshold: 3 ``` -### Readiness probe - -This probe always responds with '200 OK'. When readiness probe fails, Kubernetes like platforms *do not* forward traffic to a pod. - -``` - readinessProbe: - httpGet: - path: /minio/health/ready - port: 9000 - scheme: HTTP - initialDelaySeconds: 3 - periodSeconds: 1 - timeoutSeconds: 1 - successThreshold: 1 - failureThreshold: 3 - -``` - ### Cluster probe This probe is not useful in almost all cases, this is meant for administrators to see if quorum is available in any given cluster. The reply is '200 OK' if cluster has quorum if not it returns '503 Service Unavailable'. @@ -49,6 +31,7 @@ Content-Security-Policy: block-all-mixed-content Server: MinIO/GOGET.GOGET Vary: Origin X-Amz-Bucket-Region: us-east-1 +X-Minio-Write-Quorum: 3 X-Amz-Request-Id: 16239D6AB80EBECF X-Xss-Protection: 1; mode=block Date: Tue, 21 Jul 2020 00:36:14 GMT @@ -68,5 +51,6 @@ Vary: Origin X-Amz-Bucket-Region: us-east-1 X-Amz-Request-Id: 16239D63820C6E76 X-Xss-Protection: 1; mode=block +X-Minio-Write-Quorum: 3 Date: Tue, 21 Jul 2020 00:35:43 GMT ``` diff --git a/docs/orchestration/kubernetes/README.md b/docs/orchestration/kubernetes/README.md index 1dd8284b7..2839531b8 100644 --- a/docs/orchestration/kubernetes/README.md +++ b/docs/orchestration/kubernetes/README.md @@ -12,9 +12,7 @@ There are multiple options to deploy MinIO on Kubernetes: ## Monitoring MinIO in Kubernetes -MinIO server exposes un-authenticated readiness and liveness endpoints so Kubernetes can natively identify unhealthy MinIO containers. MinIO also exposes Prometheus compatible data on a different endpoint to enable Prometheus users to natively monitor their MinIO deployments. - -_Note_ : Readiness check is not allowed in distributed MinIO deployment. This is because Kubernetes doesn't allow any traffic to containers whose Readiness checks fail, and in a distributed setup, MinIO server can't respond to Readiness checks until all the nodes are reachable. So, Liveness checks are recommended native Kubernetes monitoring approach for distributed MinIO StatefulSets. Read more about Kubernetes recommendations for [container probes](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes). +MinIO server exposes un-authenticated liveness endpoints so Kubernetes can natively identify unhealthy MinIO containers. MinIO also exposes Prometheus compatible data on a different endpoint to enable Prometheus users to natively monitor their MinIO deployments. ## Explore Further