From 5a28ef0d479575a3209566001f093e95e37a4091 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 10 Aug 2019 05:43:14 -0700 Subject: [PATCH] Bump readiness check upto 10000 go-routines (#8057) Most of our current workloads reach this value regularly, it doesn't make sense to keep 1000 go-routine limit. --- cmd/healthcheck-handler.go | 18 ++++++++++-------- docs/metrics/healthcheck/README.md | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cmd/healthcheck-handler.go b/cmd/healthcheck-handler.go index da3c8ea6a..8617aceaf 100644 --- a/cmd/healthcheck-handler.go +++ b/cmd/healthcheck-handler.go @@ -26,14 +26,16 @@ import ( ) const ( - minioHealthGoroutineThreshold = 1000 + minioHealthGoroutineThreshold = 10000 ) -// ReadinessCheckHandler -- checks if there are more than threshold number of goroutines running, -// returns service unavailable. -// Readiness probes are used to detect situations where application is under heavy load -// and temporarily unable to serve. In a orchestrated setup like Kubernetes, containers reporting -// that they are not ready do not receive traffic through Kubernetes Services. +// ReadinessCheckHandler -- checks if there are more than threshold +// number of goroutines running, returns service unavailable. +// +// Readiness probes are used to detect situations where application +// is under heavy load and temporarily unable to serve. In a orchestrated +// setup like Kubernetes, containers reporting that they are not ready do +// not receive traffic through Kubernetes Services. func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) { if err := goroutineCountCheck(minioHealthGoroutineThreshold); err != nil { writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) @@ -98,8 +100,8 @@ func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) { writeResponse(w, http.StatusOK, nil, mimeNone) } -// checks threshold against total number of go-routines in the system and throws error if -// more than threshold go-routines are running. +// checks threshold against total number of go-routines in the system and +// throws error if more than threshold go-routines are running. func goroutineCountCheck(threshold int) error { count := runtime.NumGoroutine() if count > threshold { diff --git a/docs/metrics/healthcheck/README.md b/docs/metrics/healthcheck/README.md index 57e1f60c6..bfcd57b1b 100644 --- a/docs/metrics/healthcheck/README.md +++ b/docs/metrics/healthcheck/README.md @@ -14,7 +14,7 @@ When liveness probe fails, Kubernetes like platforms restart the container. This probe is used to identify situations where the server is not ready to accept requests yet. In most cases, such conditions recover in some time. -Internally, MinIO readiness probe handler checks for total go-routines. If the number of go-routines is less than 1000 (threshold), the server returns 200 OK, otherwise 503 Service Unavailable. +Internally, MinIO readiness probe handler checks for total go-routines. If the number of go-routines is less than 10000 (threshold), the server returns 200 OK, otherwise 503 Service Unavailable. Platforms like Kubernetes *do not* forward traffic to a pod until its readiness probe is successful.