diff --git a/cmd/generic-handlers.go b/cmd/generic-handlers.go index f038ec001..696c5eeed 100644 --- a/cmd/generic-handlers.go +++ b/cmd/generic-handlers.go @@ -193,6 +193,18 @@ func guessIsBrowserReq(req *http.Request) bool { return strings.Contains(req.Header.Get("User-Agent"), "Mozilla") } +// guessIsHealthCheckReq - returns true if incoming request looks +// like healthcheck request +func guessIsHealthCheckReq(req *http.Request) bool { + if req == nil { + return false + } + aType := getRequestAuthType(req) + return req.Method == http.MethodGet && aType == authTypeAnonymous && + (req.URL.Path == healthCheckPathPrefix+healthCheckLivenessPath || + req.URL.Path == healthCheckPathPrefix+healthCheckReadinessPath) +} + // guessIsRPCReq - returns true if the request is for an RPC endpoint. func guessIsRPCReq(req *http.Request) bool { if req == nil { @@ -263,7 +275,7 @@ func setReservedBucketHandler(h http.Handler) http.Handler { func (h minioReservedBucketHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { switch { - case guessIsRPCReq(r), guessIsBrowserReq(r), isAdminReq(r): + case guessIsRPCReq(r), guessIsBrowserReq(r), guessIsHealthCheckReq(r), isAdminReq(r): // Allow access to reserved buckets default: // For all other requests reject access to reserved diff --git a/cmd/healthcheck-handler.go b/cmd/healthcheck-handler.go new file mode 100644 index 000000000..52ee8f218 --- /dev/null +++ b/cmd/healthcheck-handler.go @@ -0,0 +1,69 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "fmt" + "net/http" + "runtime" +) + +const ( + minioHealthGoroutineThreshold = 1000 +) + +// ReadinessCheckHandler -- checks if there are more than threshold number of goroutines running, +// returns service unavailable. +// Readiness probes are used to detect situations where application is under heavy load +// and temporarily unable to serve. In a orchestrated setup like Kubernetes, containers reporting +// that they are not ready do not receive traffic through Kubernetes Services. +func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) { + if err := goroutineCountCheck(minioHealthGoroutineThreshold); err != nil { + writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) + return + } + writeResponse(w, http.StatusOK, nil, mimeNone) +} + +// LivenessCheckHandler -- checks if server can ListBuckets internally. If not, server is +// considered to have failed and needs to be restarted. +// Liveness probes are used to detect situations where application (minio) +// has gone into a state where it can not recover except by being restarted. +func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) { + objLayer := newObjectLayerFn() + // Service not initialized yet + if objLayer == nil { + writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) + return + } + // List buckets is unsuccessful, means server is having issues, send 503 service unavailable + if _, err := objLayer.ListBuckets(); err != nil { + writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) + return + } + writeResponse(w, http.StatusOK, nil, mimeNone) +} + +// checks threshold against total number of go-routines in the system and throws error if +// more than threshold go-routines are running. +func goroutineCountCheck(threshold int) error { + count := runtime.NumGoroutine() + if count > threshold { + return fmt.Errorf("too many goroutines (%d > %d)", count, threshold) + } + return nil +} diff --git a/cmd/healthcheck-handler_test.go b/cmd/healthcheck-handler_test.go new file mode 100644 index 000000000..2def81403 --- /dev/null +++ b/cmd/healthcheck-handler_test.go @@ -0,0 +1,44 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "testing" + "time" +) + +func TestGoroutineCountCheck(t *testing.T) { + tests := []struct { + threshold int + wantErr bool + }{ + {5000, false}, + {5, true}, + {6, true}, + } + for _, tt := range tests { + // Make goroutines -- to make sure number of go-routines is higher than threshold + if tt.threshold == 5 || tt.threshold == 6 { + for i := 0; i < 6; i++ { + go time.Sleep(5) + } + } + if err := goroutineCountCheck(tt.threshold); (err != nil) != tt.wantErr { + t.Errorf("goroutineCountCheck() error = %v, wantErr %v", err, tt.wantErr) + } + } +} diff --git a/cmd/healthcheck-router.go b/cmd/healthcheck-router.go new file mode 100644 index 000000000..421dddec9 --- /dev/null +++ b/cmd/healthcheck-router.go @@ -0,0 +1,43 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "net/http" + + router "github.com/gorilla/mux" +) + +const ( + healthCheckPath = "/health" + healthCheckLivenessPath = "/live" + healthCheckReadinessPath = "/ready" + healthCheckPathPrefix = minioReservedBucketPath + healthCheckPath +) + +// registerHealthCheckRouter - add handler functions for liveness and readiness routes. +func registerHealthCheckRouter(mux *router.Router) { + + // Healthcheck router + healthRouter := mux.NewRoute().PathPrefix(healthCheckPathPrefix).Subrouter() + + // Liveness handler + healthRouter.Methods(http.MethodGet).Path(healthCheckLivenessPath).HandlerFunc(LivenessCheckHandler) + + // Readiness handler + healthRouter.Methods(http.MethodGet).Path(healthCheckReadinessPath).HandlerFunc(ReadinessCheckHandler) +} diff --git a/cmd/routers.go b/cmd/routers.go index f575dd4a6..e9946892c 100644 --- a/cmd/routers.go +++ b/cmd/routers.go @@ -73,6 +73,9 @@ func configureServerHandler(endpoints EndpointList) (http.Handler, error) { // Add Admin router. registerAdminRouter(mux) + // Add healthcheck router + registerHealthCheckRouter(mux) + // Register web router when its enabled. if globalIsBrowserEnabled { if err := registerWebRouter(mux); err != nil { diff --git a/dockerscripts/healthcheck.sh b/dockerscripts/healthcheck.sh index 591e0ffe6..c0ccddecc 100755 --- a/dockerscripts/healthcheck.sh +++ b/dockerscripts/healthcheck.sh @@ -20,7 +20,7 @@ set -x _init () { scheme="http://" address="$(netstat -nplt 2>/dev/null | awk ' /(.*\/minio)/ { gsub(":::","127.0.0.1:",$4); print $4}')" - resource="/minio/index.html" + resource="/minio/health/live" start=$(stat -c "%Y" /proc/1) } @@ -34,11 +34,10 @@ healthcheck_main () { exit 0 else # Get the http response code - http_response=$(curl -H "User-Agent: Mozilla" -s -k -o /dev/null -I -w "%{http_code}" \ - ${scheme}${address}${resource}) + http_response=$(curl -s -k -o /dev/null -I -w "%{http_code}" ${scheme}${address}${resource}) # Get the http response body - http_response_body=$(curl -H "User-Agent: Mozilla" -k -s ${scheme}${address}${resource}) + http_response_body=$(curl -k -s ${scheme}${address}${resource}) # server returns response 403 and body "SSL required" if non-TLS # connection is attempted on a TLS-configured server. Change @@ -46,14 +45,11 @@ healthcheck_main () { if [ "$http_response" = "403" ] && \ [ "$http_response_body" = "SSL required" ]; then scheme="https://" - http_response=$(curl -H "User-Agent: Mozilla" -s -k -o /dev/null -I -w "%{http_code}" \ - ${scheme}${address}${resource}) + http_response=$(curl -s -k -o /dev/null -I -w "%{http_code}" ${scheme}${address}${resource}) fi - # If http_repsonse is 200 - server is up. When MINIO_BROWSER is - # set to off, curl responds with 404. We assume that the server - # is up - [ "$http_response" = "200" ] || [ "$http_response" = "404" ] + # If http_repsonse is 200 - server is up. + [ "$http_response" = "200" ] fi } diff --git a/docs/healthcheck/README.md b/docs/healthcheck/README.md new file mode 100644 index 000000000..0a956a3ab --- /dev/null +++ b/docs/healthcheck/README.md @@ -0,0 +1,39 @@ +## Minio Healthcheck + +Minio server exposes two un-authenticated, healthcheck endpoints - liveness probe and readiness probe at `/minio/health/live` and `/minio/health/ready` respectively. + +### Liveness probe +This probe is used to identify situations where the server is running but may not behave optimally, i.e. sluggish response or corrupt backend. Such problems can be *only* fixed by a restart. + +Internally, Minio liveness probe handler does a ListBuckets call. If successful, the server returns 200 OK, otherwise 503 Service Unavailable. + +When liveness probe fails, Kubernetes like platforms restart the container. + +Sample configuration in a Kubernetes `yaml` file. + +```yaml +livenessProbe: + httpGet: + path: /minio/health/live + port: 9000 + initialDelaySeconds: 10 + periodSeconds: 20 +``` + +### Readiness probe +This probe is used to identify situations where the server is not ready to accept requests yet. In most cases, such conditions recover in some time. + +Internally, Minio readiness probe handler checks for total go-routines. If the number of go-routines is less than 1000 (threshold), the server returns 200 OK, otherwise 503 Service Unavailable. + +Platforms like Kubernetes *do not* forward traffic to a pod until its readiness probe is successful. + +Sample configuration in a Kubernetes `yaml` file. + +```yaml +livenessProbe: + httpGet: + path: /minio/health/ready + port: 9000 + initialDelaySeconds: 10 + periodSeconds: 20 +``` \ No newline at end of file