Add healthcheck endpoints (#5543)
This PR adds readiness and liveness endpoints to probe Minio server instance health. Endpoints can only be accessed without authentication and the paths are /minio/health/live and /minio/health/ready for liveness and readiness respectively. The new healthcheck liveness endpoint is used for Docker healthcheck now. Fixes #5357 Fixes #5514master
parent
d90985b6d8
commit
10b01ac836
@ -0,0 +1,69 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package cmd |
||||
|
||||
import ( |
||||
"fmt" |
||||
"net/http" |
||||
"runtime" |
||||
) |
||||
|
||||
const ( |
||||
minioHealthGoroutineThreshold = 1000 |
||||
) |
||||
|
||||
// ReadinessCheckHandler -- checks if there are more than threshold number of goroutines running,
|
||||
// returns service unavailable.
|
||||
// Readiness probes are used to detect situations where application is under heavy load
|
||||
// and temporarily unable to serve. In a orchestrated setup like Kubernetes, containers reporting
|
||||
// that they are not ready do not receive traffic through Kubernetes Services.
|
||||
func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) { |
||||
if err := goroutineCountCheck(minioHealthGoroutineThreshold); err != nil { |
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) |
||||
return |
||||
} |
||||
writeResponse(w, http.StatusOK, nil, mimeNone) |
||||
} |
||||
|
||||
// LivenessCheckHandler -- checks if server can ListBuckets internally. If not, server is
|
||||
// considered to have failed and needs to be restarted.
|
||||
// Liveness probes are used to detect situations where application (minio)
|
||||
// has gone into a state where it can not recover except by being restarted.
|
||||
func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) { |
||||
objLayer := newObjectLayerFn() |
||||
// Service not initialized yet
|
||||
if objLayer == nil { |
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) |
||||
return |
||||
} |
||||
// List buckets is unsuccessful, means server is having issues, send 503 service unavailable
|
||||
if _, err := objLayer.ListBuckets(); err != nil { |
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) |
||||
return |
||||
} |
||||
writeResponse(w, http.StatusOK, nil, mimeNone) |
||||
} |
||||
|
||||
// checks threshold against total number of go-routines in the system and throws error if
|
||||
// more than threshold go-routines are running.
|
||||
func goroutineCountCheck(threshold int) error { |
||||
count := runtime.NumGoroutine() |
||||
if count > threshold { |
||||
return fmt.Errorf("too many goroutines (%d > %d)", count, threshold) |
||||
} |
||||
return nil |
||||
} |
@ -0,0 +1,44 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package cmd |
||||
|
||||
import ( |
||||
"testing" |
||||
"time" |
||||
) |
||||
|
||||
func TestGoroutineCountCheck(t *testing.T) { |
||||
tests := []struct { |
||||
threshold int |
||||
wantErr bool |
||||
}{ |
||||
{5000, false}, |
||||
{5, true}, |
||||
{6, true}, |
||||
} |
||||
for _, tt := range tests { |
||||
// Make goroutines -- to make sure number of go-routines is higher than threshold
|
||||
if tt.threshold == 5 || tt.threshold == 6 { |
||||
for i := 0; i < 6; i++ { |
||||
go time.Sleep(5) |
||||
} |
||||
} |
||||
if err := goroutineCountCheck(tt.threshold); (err != nil) != tt.wantErr { |
||||
t.Errorf("goroutineCountCheck() error = %v, wantErr %v", err, tt.wantErr) |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,43 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package cmd |
||||
|
||||
import ( |
||||
"net/http" |
||||
|
||||
router "github.com/gorilla/mux" |
||||
) |
||||
|
||||
const ( |
||||
healthCheckPath = "/health" |
||||
healthCheckLivenessPath = "/live" |
||||
healthCheckReadinessPath = "/ready" |
||||
healthCheckPathPrefix = minioReservedBucketPath + healthCheckPath |
||||
) |
||||
|
||||
// registerHealthCheckRouter - add handler functions for liveness and readiness routes.
|
||||
func registerHealthCheckRouter(mux *router.Router) { |
||||
|
||||
// Healthcheck router
|
||||
healthRouter := mux.NewRoute().PathPrefix(healthCheckPathPrefix).Subrouter() |
||||
|
||||
// Liveness handler
|
||||
healthRouter.Methods(http.MethodGet).Path(healthCheckLivenessPath).HandlerFunc(LivenessCheckHandler) |
||||
|
||||
// Readiness handler
|
||||
healthRouter.Methods(http.MethodGet).Path(healthCheckReadinessPath).HandlerFunc(ReadinessCheckHandler) |
||||
} |
@ -0,0 +1,39 @@ |
||||
## Minio Healthcheck |
||||
|
||||
Minio server exposes two un-authenticated, healthcheck endpoints - liveness probe and readiness probe at `/minio/health/live` and `/minio/health/ready` respectively. |
||||
|
||||
### Liveness probe |
||||
This probe is used to identify situations where the server is running but may not behave optimally, i.e. sluggish response or corrupt backend. Such problems can be *only* fixed by a restart. |
||||
|
||||
Internally, Minio liveness probe handler does a ListBuckets call. If successful, the server returns 200 OK, otherwise 503 Service Unavailable. |
||||
|
||||
When liveness probe fails, Kubernetes like platforms restart the container. |
||||
|
||||
Sample configuration in a Kubernetes `yaml` file. |
||||
|
||||
```yaml |
||||
livenessProbe: |
||||
httpGet: |
||||
path: /minio/health/live |
||||
port: 9000 |
||||
initialDelaySeconds: 10 |
||||
periodSeconds: 20 |
||||
``` |
||||
|
||||
### Readiness probe |
||||
This probe is used to identify situations where the server is not ready to accept requests yet. In most cases, such conditions recover in some time. |
||||
|
||||
Internally, Minio readiness probe handler checks for total go-routines. If the number of go-routines is less than 1000 (threshold), the server returns 200 OK, otherwise 503 Service Unavailable. |
||||
|
||||
Platforms like Kubernetes *do not* forward traffic to a pod until its readiness probe is successful. |
||||
|
||||
Sample configuration in a Kubernetes `yaml` file. |
||||
|
||||
```yaml |
||||
livenessProbe: |
||||
httpGet: |
||||
path: /minio/health/ready |
||||
port: 9000 |
||||
initialDelaySeconds: 10 |
||||
periodSeconds: 20 |
||||
``` |
Loading…
Reference in new issue