Updated Prometheus metrics (#11141)

* Add metrics for nodes online and offline
* Add cluster capacity metrics
* Introduce v2 metrics
master
Ritesh H Shukla 3 years ago committed by GitHub
parent 3bda8f755c
commit b4add82bb6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 32
      cmd/admin-server-info.go
  2. 8
      cmd/disk-cache-stats.go
  3. 4
      cmd/erasure-server-pool.go
  4. 4
      cmd/fs-v1.go
  5. 2
      cmd/gateway-common.go
  6. 26
      cmd/gateway-metrics.go
  7. 4
      cmd/gateway-unsupported.go
  8. 4
      cmd/gateway/azure/gateway-azure.go
  9. 4
      cmd/gateway/gcs/gateway-gcs.go
  10. 4
      cmd/gateway/s3/gateway-s3.go
  11. 4
      cmd/generic-handlers.go
  12. 14
      cmd/http-stats.go
  13. 13
      cmd/metrics-router.go
  14. 1187
      cmd/metrics-v2.go
  15. 127
      cmd/metrics.go
  16. 54
      cmd/notification-summary.go
  17. 68
      cmd/notification.go
  18. 9
      cmd/object-api-interface.go
  19. 23
      cmd/peer-rest-client.go
  20. 1
      cmd/peer-rest-common.go
  21. 32
      cmd/peer-rest-server.go
  22. 11
      docs/metrics/README.md
  23. 202
      docs/metrics/prometheus/README.md
  24. 47
      docs/metrics/prometheus/list.md
  25. 3
      go.mod
  26. 6
      go.sum
  27. 28
      pkg/madmin/health.go

@ -69,3 +69,35 @@ func getLocalServerProperty(endpointServerPools EndpointServerPools, r *http.Req
Disks: storageInfo.Disks, Disks: storageInfo.Disks,
} }
} }
func getLocalDisks(endpointServerPools EndpointServerPools) []madmin.Disk {
var localEndpoints Endpoints
network := make(map[string]string)
for _, ep := range endpointServerPools {
for _, endpoint := range ep.Endpoints {
nodeName := endpoint.Host
if nodeName == "" {
nodeName = "localhost"
}
if endpoint.IsLocal {
// Only proceed for local endpoints
network[nodeName] = "online"
localEndpoints = append(localEndpoints, endpoint)
continue
}
_, present := network[nodeName]
if !present {
if err := isServerResolvable(endpoint); err == nil {
network[nodeName] = "online"
} else {
network[nodeName] = "offline"
}
}
}
}
localDisks, _ := initStorageDisksWithErrors(localEndpoints)
defer closeStorageDisks(localDisks)
storageInfo, _ := getStorageInfo(localDisks, localEndpoints.GetAllStrings())
return storageInfo.Disks
}

@ -34,6 +34,14 @@ type CacheDiskStats struct {
Dir string Dir string
} }
// GetUsageLevelString gets the string representation for the usage level.
func (c *CacheDiskStats) GetUsageLevelString() (u string) {
if atomic.LoadInt32(&c.UsageState) == 0 {
return "low"
}
return "high"
}
// CacheStats - represents bytes served from cache, // CacheStats - represents bytes served from cache,
// cache hits and cache misses. // cache hits and cache misses.
type CacheStats struct { type CacheStats struct {

@ -1377,9 +1377,9 @@ func (z *erasureServerPools) HealObject(ctx context.Context, bucket, object, ver
} }
// GetMetrics - no op // GetMetrics - no op
func (z *erasureServerPools) GetMetrics(ctx context.Context) (*Metrics, error) { func (z *erasureServerPools) GetMetrics(ctx context.Context) (*BackendMetrics, error) {
logger.LogIf(ctx, NotImplemented{}) logger.LogIf(ctx, NotImplemented{})
return &Metrics{}, NotImplemented{} return &BackendMetrics{}, NotImplemented{}
} }
func (z *erasureServerPools) getZoneAndSet(id string) (int, int, error) { func (z *erasureServerPools) getZoneAndSet(id string) (int, int, error) {

@ -1554,9 +1554,9 @@ func (fs *FSObjects) HealObjects(ctx context.Context, bucket, prefix string, opt
} }
// GetMetrics - no op // GetMetrics - no op
func (fs *FSObjects) GetMetrics(ctx context.Context) (*Metrics, error) { func (fs *FSObjects) GetMetrics(ctx context.Context) (*BackendMetrics, error) {
logger.LogIf(ctx, NotImplemented{}) logger.LogIf(ctx, NotImplemented{})
return &Metrics{}, NotImplemented{} return &BackendMetrics{}, NotImplemented{}
} }
// ListObjectsV2 lists all blobs in bucket filtered by prefix // ListObjectsV2 lists all blobs in bucket filtered by prefix

@ -389,7 +389,7 @@ func shouldMeterRequest(req *http.Request) bool {
// MetricsTransport is a custom wrapper around Transport to track metrics // MetricsTransport is a custom wrapper around Transport to track metrics
type MetricsTransport struct { type MetricsTransport struct {
Transport *http.Transport Transport *http.Transport
Metrics *Metrics Metrics *BackendMetrics
} }
// RoundTrip implements the RoundTrip method for MetricsTransport // RoundTrip implements the RoundTrip method for MetricsTransport

@ -29,36 +29,28 @@ type RequestStats struct {
Post uint64 `json:"Post"` Post uint64 `json:"Post"`
} }
// Metrics - represents bytes served from backend
// only implemented for S3 Gateway
type Metrics struct {
bytesReceived uint64
bytesSent uint64
requestStats RequestStats
}
// IncBytesReceived - Increase total bytes received from gateway backend // IncBytesReceived - Increase total bytes received from gateway backend
func (s *Metrics) IncBytesReceived(n uint64) { func (s *BackendMetrics) IncBytesReceived(n uint64) {
atomic.AddUint64(&s.bytesReceived, n) atomic.AddUint64(&s.bytesReceived, n)
} }
// GetBytesReceived - Get total bytes received from gateway backend // GetBytesReceived - Get total bytes received from gateway backend
func (s *Metrics) GetBytesReceived() uint64 { func (s *BackendMetrics) GetBytesReceived() uint64 {
return atomic.LoadUint64(&s.bytesReceived) return atomic.LoadUint64(&s.bytesReceived)
} }
// IncBytesSent - Increase total bytes sent to gateway backend // IncBytesSent - Increase total bytes sent to gateway backend
func (s *Metrics) IncBytesSent(n uint64) { func (s *BackendMetrics) IncBytesSent(n uint64) {
atomic.AddUint64(&s.bytesSent, n) atomic.AddUint64(&s.bytesSent, n)
} }
// GetBytesSent - Get total bytes received from gateway backend // GetBytesSent - Get total bytes received from gateway backend
func (s *Metrics) GetBytesSent() uint64 { func (s *BackendMetrics) GetBytesSent() uint64 {
return atomic.LoadUint64(&s.bytesSent) return atomic.LoadUint64(&s.bytesSent)
} }
// IncRequests - Increase request count sent to gateway backend by 1 // IncRequests - Increase request count sent to gateway backend by 1
func (s *Metrics) IncRequests(method string) { func (s *BackendMetrics) IncRequests(method string) {
// Only increment for Head & Get requests, else no op // Only increment for Head & Get requests, else no op
if method == http.MethodGet { if method == http.MethodGet {
atomic.AddUint64(&s.requestStats.Get, 1) atomic.AddUint64(&s.requestStats.Get, 1)
@ -72,11 +64,11 @@ func (s *Metrics) IncRequests(method string) {
} }
// GetRequests - Get total number of Get & Headrequests sent to gateway backend // GetRequests - Get total number of Get & Headrequests sent to gateway backend
func (s *Metrics) GetRequests() RequestStats { func (s *BackendMetrics) GetRequests() RequestStats {
return s.requestStats return s.requestStats
} }
// NewMetrics - Prepare new Metrics structure // NewMetrics - Prepare new BackendMetrics structure
func NewMetrics() *Metrics { func NewMetrics() *BackendMetrics {
return &Metrics{} return &BackendMetrics{}
} }

@ -202,9 +202,9 @@ func (a GatewayUnsupported) CopyObject(ctx context.Context, srcBucket string, sr
} }
// GetMetrics - no op // GetMetrics - no op
func (a GatewayUnsupported) GetMetrics(ctx context.Context) (*Metrics, error) { func (a GatewayUnsupported) GetMetrics(ctx context.Context) (*BackendMetrics, error) {
logger.LogIf(ctx, NotImplemented{}) logger.LogIf(ctx, NotImplemented{})
return &Metrics{}, NotImplemented{} return &BackendMetrics{}, NotImplemented{}
} }
// PutObjectTags - not implemented. // PutObjectTags - not implemented.

@ -419,7 +419,7 @@ type azureObjects struct {
minio.GatewayUnsupported minio.GatewayUnsupported
endpoint *url.URL endpoint *url.URL
httpClient *http.Client httpClient *http.Client
metrics *minio.Metrics metrics *minio.BackendMetrics
client azblob.ServiceURL // Azure sdk client client azblob.ServiceURL // Azure sdk client
} }
@ -533,7 +533,7 @@ func parseAzurePart(metaPartFileName, prefix string) (partID int, err error) {
} }
// GetMetrics returns this gateway's metrics // GetMetrics returns this gateway's metrics
func (a *azureObjects) GetMetrics(ctx context.Context) (*minio.Metrics, error) { func (a *azureObjects) GetMetrics(ctx context.Context) (*minio.BackendMetrics, error) {
return a.metrics, nil return a.metrics, nil
} }

@ -341,7 +341,7 @@ type gcsGateway struct {
minio.GatewayUnsupported minio.GatewayUnsupported
client *storage.Client client *storage.Client
httpClient *http.Client httpClient *http.Client
metrics *minio.Metrics metrics *minio.BackendMetrics
projectID string projectID string
} }
@ -359,7 +359,7 @@ func gcsParseProjectID(credsFile string) (projectID string, err error) {
} }
// GetMetrics returns this gateway's metrics // GetMetrics returns this gateway's metrics
func (l *gcsGateway) GetMetrics(ctx context.Context) (*minio.Metrics, error) { func (l *gcsGateway) GetMetrics(ctx context.Context) (*minio.BackendMetrics, error) {
return l.metrics, nil return l.metrics, nil
} }

@ -259,11 +259,11 @@ type s3Objects struct {
minio.GatewayUnsupported minio.GatewayUnsupported
Client *miniogo.Core Client *miniogo.Core
HTTPClient *http.Client HTTPClient *http.Client
Metrics *minio.Metrics Metrics *minio.BackendMetrics
} }
// GetMetrics returns this gateway's metrics // GetMetrics returns this gateway's metrics
func (l *s3Objects) GetMetrics(ctx context.Context) (*minio.Metrics, error) { func (l *s3Objects) GetMetrics(ctx context.Context) (*minio.BackendMetrics, error) {
return l.Metrics, nil return l.Metrics, nil
} }

@ -228,7 +228,9 @@ func guessIsMetricsReq(req *http.Request) bool {
} }
aType := getRequestAuthType(req) aType := getRequestAuthType(req)
return (aType == authTypeAnonymous || aType == authTypeJWT) && return (aType == authTypeAnonymous || aType == authTypeJWT) &&
req.URL.Path == minioReservedBucketPath+prometheusMetricsPath req.URL.Path == minioReservedBucketPath+prometheusMetricsPathLegacy ||
req.URL.Path == minioReservedBucketPath+prometheusMetricsV2ClusterPath ||
req.URL.Path == minioReservedBucketPath+prometheusMetricsV2NodePath
} }
// guessIsRPCReq - returns true if the request is for an RPC endpoint. // guessIsRPCReq - returns true if the request is for an RPC endpoint.

@ -79,10 +79,10 @@ func (s *ConnStats) getS3OutputBytes() uint64 {
// Return connection stats (total input/output bytes and total s3 input/output bytes) // Return connection stats (total input/output bytes and total s3 input/output bytes)
func (s *ConnStats) toServerConnStats() ServerConnStats { func (s *ConnStats) toServerConnStats() ServerConnStats {
return ServerConnStats{ return ServerConnStats{
TotalInputBytes: s.getTotalInputBytes(), TotalInputBytes: s.getTotalInputBytes(), // Traffic including reserved bucket
TotalOutputBytes: s.getTotalOutputBytes(), TotalOutputBytes: s.getTotalOutputBytes(), // Traffic including reserved bucket
S3InputBytes: s.getS3InputBytes(), S3InputBytes: s.getS3InputBytes(), // Traffic for client buckets
S3OutputBytes: s.getS3OutputBytes(), S3OutputBytes: s.getS3OutputBytes(), // Traffic for client buckets
} }
} }
@ -163,9 +163,11 @@ func (st *HTTPStats) toServerHTTPStats() ServerHTTPStats {
// Update statistics from http request and response data // Update statistics from http request and response data
func (st *HTTPStats) updateStats(api string, r *http.Request, w *logger.ResponseWriter) { func (st *HTTPStats) updateStats(api string, r *http.Request, w *logger.ResponseWriter) {
// A successful request has a 2xx response code // A successful request has a 2xx response code
successReq := (w.StatusCode >= 200 && w.StatusCode < 300) successReq := w.StatusCode >= 200 && w.StatusCode < 300
if !strings.HasSuffix(r.URL.Path, prometheusMetricsPath) { if !strings.HasSuffix(r.URL.Path, prometheusMetricsPathLegacy) ||
!strings.HasSuffix(r.URL.Path, prometheusMetricsV2ClusterPath) ||
!strings.HasSuffix(r.URL.Path, prometheusMetricsV2NodePath) {
st.totalS3Requests.Inc(api) st.totalS3Requests.Inc(api)
if !successReq && w.StatusCode != 0 { if !successReq && w.StatusCode != 0 {
st.totalS3Errors.Inc(api) st.totalS3Errors.Inc(api)

@ -24,7 +24,9 @@ import (
) )
const ( const (
prometheusMetricsPath = "/prometheus/metrics" prometheusMetricsPathLegacy = "/prometheus/metrics"
prometheusMetricsV2ClusterPath = "/v2/metrics/cluster"
prometheusMetricsV2NodePath = "/v2/metrics/node"
) )
// Standard env prometheus auth type // Standard env prometheus auth type
@ -43,14 +45,17 @@ const (
func registerMetricsRouter(router *mux.Router) { func registerMetricsRouter(router *mux.Router) {
// metrics router // metrics router
metricsRouter := router.NewRoute().PathPrefix(minioReservedBucketPath).Subrouter() metricsRouter := router.NewRoute().PathPrefix(minioReservedBucketPath).Subrouter()
authType := strings.ToLower(os.Getenv(EnvPrometheusAuthType)) authType := strings.ToLower(os.Getenv(EnvPrometheusAuthType))
switch prometheusAuthType(authType) { switch prometheusAuthType(authType) {
case prometheusPublic: case prometheusPublic:
metricsRouter.Handle(prometheusMetricsPath, metricsHandler()) metricsRouter.Handle(prometheusMetricsPathLegacy, metricsHandler())
metricsRouter.Handle(prometheusMetricsV2ClusterPath, metricsServerHandler())
metricsRouter.Handle(prometheusMetricsV2NodePath, metricsNodeHandler())
case prometheusJWT: case prometheusJWT:
fallthrough fallthrough
default: default:
metricsRouter.Handle(prometheusMetricsPath, AuthMiddleware(metricsHandler())) metricsRouter.Handle(prometheusMetricsPathLegacy, AuthMiddleware(metricsHandler()))
metricsRouter.Handle(prometheusMetricsV2ClusterPath, AuthMiddleware(metricsServerHandler()))
metricsRouter.Handle(prometheusMetricsV2NodePath, AuthMiddleware(metricsNodeHandler()))
} }
} }

File diff suppressed because it is too large Load Diff

@ -51,6 +51,17 @@ var (
) )
) )
const (
healMetricsNamespace = "self_heal"
gatewayNamespace = "gateway"
cacheNamespace = "cache"
s3Namespace = "s3"
bucketNamespace = "bucket"
minioNamespace = "minio"
diskNamespace = "disk"
interNodeNamespace = "internode"
)
func init() { func init() {
prometheus.MustRegister(httpRequestsDuration) prometheus.MustRegister(httpRequestsDuration)
prometheus.MustRegister(newMinioCollector()) prometheus.MustRegister(newMinioCollector())
@ -81,9 +92,10 @@ func (c *minioCollector) Describe(ch chan<- *prometheus.Desc) {
func (c *minioCollector) Collect(ch chan<- prometheus.Metric) { func (c *minioCollector) Collect(ch chan<- prometheus.Metric) {
// Expose MinIO's version information // Expose MinIO's version information
minioVersionInfo.WithLabelValues(Version, CommitID).Set(float64(1.0)) minioVersionInfo.WithLabelValues(Version, CommitID).Set(1.0)
storageMetricsPrometheus(ch) storageMetricsPrometheus(ch)
nodeHealthMetricsPrometheus(ch)
bucketUsageMetricsPrometheus(ch) bucketUsageMetricsPrometheus(ch)
networkMetricsPrometheus(ch) networkMetricsPrometheus(ch)
httpMetricsPrometheus(ch) httpMetricsPrometheus(ch)
@ -92,6 +104,26 @@ func (c *minioCollector) Collect(ch chan<- prometheus.Metric) {
healingMetricsPrometheus(ch) healingMetricsPrometheus(ch)
} }
func nodeHealthMetricsPrometheus(ch chan<- prometheus.Metric) {
nodesUp, nodesDown := GetPeerOnlineCount()
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(minioNamespace, "nodes", "online"),
"Total number of MinIO nodes online",
nil, nil),
prometheus.GaugeValue,
float64(nodesUp),
)
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(minioNamespace, "nodes", "offline"),
"Total number of MinIO nodes offline",
nil, nil),
prometheus.GaugeValue,
float64(nodesDown),
)
}
// collects healing specific metrics for MinIO instance in Prometheus specific format // collects healing specific metrics for MinIO instance in Prometheus specific format
// and sends to given channel // and sends to given channel
func healingMetricsPrometheus(ch chan<- prometheus.Metric) { func healingMetricsPrometheus(ch chan<- prometheus.Metric) {
@ -102,7 +134,6 @@ func healingMetricsPrometheus(ch chan<- prometheus.Metric) {
if !exists { if !exists {
return return
} }
healMetricsNamespace := "self_heal"
var dur time.Duration var dur time.Duration
if !bgSeq.lastHealActivity.IsZero() { if !bgSeq.lastHealActivity.IsZero() {
@ -172,7 +203,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("gateway", globalGatewayName, "bytes_received"), prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "bytes_received"),
"Total number of bytes received by current MinIO Gateway "+globalGatewayName+" backend", "Total number of bytes received by current MinIO Gateway "+globalGatewayName+" backend",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -180,7 +211,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) {
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("gateway", globalGatewayName, "bytes_sent"), prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "bytes_sent"),
"Total number of bytes sent by current MinIO Gateway to "+globalGatewayName+" backend", "Total number of bytes sent by current MinIO Gateway to "+globalGatewayName+" backend",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -189,7 +220,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) {
s := m.GetRequests() s := m.GetRequests()
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("gateway", globalGatewayName, "requests"), prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"),
"Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway",
[]string{"method"}, nil), []string{"method"}, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -198,7 +229,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) {
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("gateway", globalGatewayName, "requests"), prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"),
"Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway",
[]string{"method"}, nil), []string{"method"}, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -207,7 +238,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) {
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("gateway", globalGatewayName, "requests"), prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"),
"Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway",
[]string{"method"}, nil), []string{"method"}, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -216,7 +247,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) {
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("gateway", globalGatewayName, "requests"), prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"),
"Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway",
[]string{"method"}, nil), []string{"method"}, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -236,7 +267,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("cache", "hits", "total"), prometheus.BuildFQName(cacheNamespace, "hits", "total"),
"Total number of disk cache hits in current MinIO instance", "Total number of disk cache hits in current MinIO instance",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -244,7 +275,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) {
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("cache", "misses", "total"), prometheus.BuildFQName(cacheNamespace, "misses", "total"),
"Total number of disk cache misses in current MinIO instance", "Total number of disk cache misses in current MinIO instance",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -252,7 +283,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) {
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("cache", "data", "served"), prometheus.BuildFQName(cacheNamespace, "data", "served"),
"Total number of bytes served from cache of current MinIO instance", "Total number of bytes served from cache of current MinIO instance",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -262,7 +293,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) {
// Cache disk usage percentage // Cache disk usage percentage
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("cache", "usage", "percent"), prometheus.BuildFQName(cacheNamespace, "usage", "percent"),
"Total percentage cache usage", "Total percentage cache usage",
[]string{"disk"}, nil), []string{"disk"}, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -271,7 +302,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) {
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("cache", "usage", "high"), prometheus.BuildFQName(cacheNamespace, "usage", "high"),
"Indicates cache usage is high or low, relative to current cache 'quota' settings", "Indicates cache usage is high or low, relative to current cache 'quota' settings",
[]string{"disk"}, nil), []string{"disk"}, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -309,7 +340,7 @@ func httpMetricsPrometheus(ch chan<- prometheus.Metric) {
for api, value := range httpStats.CurrentS3Requests.APIStats { for api, value := range httpStats.CurrentS3Requests.APIStats {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("s3", "requests", "current"), prometheus.BuildFQName(s3Namespace, "requests", "current"),
"Total number of running s3 requests in current MinIO server instance", "Total number of running s3 requests in current MinIO server instance",
[]string{"api"}, nil), []string{"api"}, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -321,7 +352,7 @@ func httpMetricsPrometheus(ch chan<- prometheus.Metric) {
for api, value := range httpStats.TotalS3Requests.APIStats { for api, value := range httpStats.TotalS3Requests.APIStats {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("s3", "requests", "total"), prometheus.BuildFQName(s3Namespace, "requests", "total"),
"Total number of s3 requests in current MinIO server instance", "Total number of s3 requests in current MinIO server instance",
[]string{"api"}, nil), []string{"api"}, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -333,7 +364,7 @@ func httpMetricsPrometheus(ch chan<- prometheus.Metric) {
for api, value := range httpStats.TotalS3Errors.APIStats { for api, value := range httpStats.TotalS3Errors.APIStats {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("s3", "errors", "total"), prometheus.BuildFQName(s3Namespace, "errors", "total"),
"Total number of s3 errors in current MinIO server instance", "Total number of s3 errors in current MinIO server instance",
[]string{"api"}, nil), []string{"api"}, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -351,7 +382,7 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) {
// Network Sent/Received Bytes (internode) // Network Sent/Received Bytes (internode)
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("internode", "tx", "bytes_total"), prometheus.BuildFQName(interNodeNamespace, "tx", "bytes_total"),
"Total number of bytes sent to the other peer nodes by current MinIO server instance", "Total number of bytes sent to the other peer nodes by current MinIO server instance",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -360,7 +391,7 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("internode", "rx", "bytes_total"), prometheus.BuildFQName(interNodeNamespace, "rx", "bytes_total"),
"Total number of internode bytes received by current MinIO server instance", "Total number of internode bytes received by current MinIO server instance",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -370,7 +401,7 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) {
// Network Sent/Received Bytes (Outbound) // Network Sent/Received Bytes (Outbound)
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("s3", "tx", "bytes_total"), prometheus.BuildFQName(s3Namespace, "tx", "bytes_total"),
"Total number of s3 bytes sent by current MinIO server instance", "Total number of s3 bytes sent by current MinIO server instance",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -379,7 +410,7 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("s3", "rx", "bytes_total"), prometheus.BuildFQName(s3Namespace, "rx", "bytes_total"),
"Total number of s3 bytes received by current MinIO server instance", "Total number of s3 bytes received by current MinIO server instance",
nil, nil), nil, nil),
prometheus.CounterValue, prometheus.CounterValue,
@ -414,7 +445,7 @@ func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) {
// Total space used by bucket // Total space used by bucket
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("bucket", "usage", "size"), prometheus.BuildFQName(bucketNamespace, "usage", "size"),
"Total bucket size", "Total bucket size",
[]string{"bucket"}, nil), []string{"bucket"}, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -423,7 +454,7 @@ func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) {
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("bucket", "objects", "count"), prometheus.BuildFQName(bucketNamespace, "objects", "count"),
"Total number of objects in a bucket", "Total number of objects in a bucket",
[]string{"bucket"}, nil), []string{"bucket"}, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -469,7 +500,7 @@ func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) {
for k, v := range usageInfo.ObjectSizesHistogram { for k, v := range usageInfo.ObjectSizesHistogram {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("bucket", "objects", "histogram"), prometheus.BuildFQName(bucketNamespace, "objects", "histogram"),
"Total number of objects of different sizes in a bucket", "Total number of objects of different sizes in a bucket",
[]string{"bucket", "object_size"}, nil), []string{"bucket", "object_size"}, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -497,10 +528,50 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) {
onlineDisks, offlineDisks := getOnlineOfflineDisksStats(server.Disks) onlineDisks, offlineDisks := getOnlineOfflineDisksStats(server.Disks)
totalDisks := offlineDisks.Merge(onlineDisks) totalDisks := offlineDisks.Merge(onlineDisks)
// Report total capacity
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(minioNamespace, "capacity_raw", "total"),
"Total capacity online in the cluster",
nil, nil),
prometheus.GaugeValue,
float64(GetTotalCapacity(GlobalContext)),
)
// Report total capacity free
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(minioNamespace, "capacity_raw_free", "total"),
"Total free capacity online in the cluster",
nil, nil),
prometheus.GaugeValue,
float64(GetTotalCapacityFree(GlobalContext)),
)
s, _ := objLayer.StorageInfo(GlobalContext)
// Report total usable capacity
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(minioNamespace, "capacity_usable", "total"),
"Total usable capacity online in the cluster",
nil, nil),
prometheus.GaugeValue,
GetTotalUsableCapacity(GlobalContext, s),
)
// Report total usable capacity free
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName(minioNamespace, "capacity_usable_free", "total"),
"Total free usable capacity online in the cluster",
nil, nil),
prometheus.GaugeValue,
GetTotalUsableCapacityFree(GlobalContext, s),
)
// MinIO Offline Disks per node // MinIO Offline Disks per node
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("minio", "disks", "offline"), prometheus.BuildFQName(minioNamespace, "disks", "offline"),
"Total number of offline disks in current MinIO server instance", "Total number of offline disks in current MinIO server instance",
nil, nil), nil, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -510,7 +581,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) {
// MinIO Total Disks per node // MinIO Total Disks per node
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("minio", "disks", "total"), prometheus.BuildFQName(minioNamespace, "disks", "total"),
"Total number of disks for current MinIO server instance", "Total number of disks for current MinIO server instance",
nil, nil), nil, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -521,7 +592,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) {
// Total disk usage by the disk // Total disk usage by the disk
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("disk", "storage", "used"), prometheus.BuildFQName(diskNamespace, "storage", "used"),
"Total disk storage used on the disk", "Total disk storage used on the disk",
[]string{"disk"}, nil), []string{"disk"}, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -532,7 +603,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) {
// Total available space in the disk // Total available space in the disk
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("disk", "storage", "available"), prometheus.BuildFQName(diskNamespace, "storage", "available"),
"Total available space left on the disk", "Total available space left on the disk",
[]string{"disk"}, nil), []string{"disk"}, nil),
prometheus.GaugeValue, prometheus.GaugeValue,
@ -543,7 +614,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) {
// Total storage space of the disk // Total storage space of the disk
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc( prometheus.NewDesc(
prometheus.BuildFQName("disk", "storage", "total"), prometheus.BuildFQName(diskNamespace, "storage", "total"),
"Total space on the disk", "Total space on the disk",
[]string{"disk"}, nil), []string{"disk"}, nil),
prometheus.GaugeValue, prometheus.GaugeValue,

@ -0,0 +1,54 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package cmd
import (
"context"
)
// GetTotalCapacity gets the total capacity in the cluster.
func GetTotalCapacity(ctx context.Context) (capacity uint64) {
d := globalNotificationSys.DiskHwInfo(ctx)
for _, s := range d {
capacity += s.GetTotalCapacity()
}
return
}
// GetTotalUsableCapacity gets the total usable capacity in the cluster.
func GetTotalUsableCapacity(ctx context.Context, s StorageInfo) (capacity float64) {
raw := GetTotalCapacity(ctx)
ratio := float64(s.Backend.StandardSCData) / float64(s.Backend.StandardSCData+s.Backend.StandardSCParity)
return float64(raw) * ratio
}
// GetTotalCapacityFree gets the total capacity free in the cluster.
func GetTotalCapacityFree(ctx context.Context) (capacity uint64) {
d := globalNotificationSys.DiskHwInfo(ctx)
for _, s := range d {
capacity += s.GetTotalFreeCapacity()
}
return
}
// GetTotalUsableCapacityFree gets the total usable capacity free in the cluster.
func GetTotalUsableCapacityFree(ctx context.Context, s StorageInfo) (capacity float64) {
raw := GetTotalCapacityFree(ctx)
ratio := float64(s.Backend.StandardSCData) / float64(s.Backend.StandardSCData+s.Backend.StandardSCParity)
return float64(raw) * ratio
}

@ -51,8 +51,8 @@ type NotificationSys struct {
targetResCh chan event.TargetIDResult targetResCh chan event.TargetIDResult
bucketRulesMap map[string]event.RulesMap bucketRulesMap map[string]event.RulesMap
bucketRemoteTargetRulesMap map[string]map[event.TargetID]event.RulesMap bucketRemoteTargetRulesMap map[string]map[event.TargetID]event.RulesMap
peerClients []*peerRESTClient peerClients []*peerRESTClient // Excludes self
allPeerClients []*peerRESTClient allPeerClients []*peerRESTClient // Includes nil client for self
} }
// GetARNList - returns available ARNs. // GetARNList - returns available ARNs.
@ -1294,6 +1294,21 @@ func NewNotificationSys(endpoints EndpointServerPools) *NotificationSys {
} }
} }
// GetPeerOnlineCount gets the count of online and offline nodes.
func GetPeerOnlineCount() (nodesOnline, nodesOffline int) {
nodesOnline = 1 // Self is always online.
nodesOffline = 0
servers := globalNotificationSys.ServerInfo()
for _, s := range servers {
if s.State == "ok" {
nodesOnline++
continue
}
nodesOffline++
}
return
}
type eventArgs struct { type eventArgs struct {
EventName event.Name EventName event.Name
BucketName string BucketName string
@ -1428,3 +1443,52 @@ func (sys *NotificationSys) GetBandwidthReports(ctx context.Context, buckets ...
} }
return consolidatedReport return consolidatedReport
} }
// GetClusterMetrics - gets the cluster metrics from all nodes excluding self.
func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) chan Metric {
g := errgroup.WithNErrs(len(sys.peerClients))
peerChannels := make([]<-chan Metric, len(sys.peerClients))
for index := range sys.peerClients {
if sys.peerClients[index] == nil {
continue
}
index := index
g.Go(func() error {
var err error
peerChannels[index], err = sys.peerClients[index].GetPeerMetrics(ctx)
return err
}, index)
}
ch := make(chan Metric)
var wg sync.WaitGroup
for index, err := range g.Wait() {
reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress",
sys.peerClients[index].host.String())
ctx := logger.SetReqInfo(ctx, reqInfo)
if err != nil {
logger.LogOnceIf(ctx, err, sys.peerClients[index].host.String())
continue
}
wg.Add(1)
go func(ctx context.Context, peerChannel <-chan Metric, wg *sync.WaitGroup) {
defer wg.Done()
for {
select {
case m, ok := <-peerChannel:
if !ok {
return
}
ch <- m
case <-ctx.Done():
return
}
}
}(ctx, peerChannels[index], &wg)
}
go func(wg *sync.WaitGroup, ch chan Metric) {
wg.Wait()
close(ch)
}(&wg, ch)
return ch
}

@ -72,6 +72,13 @@ const (
writeLock writeLock
) )
// BackendMetrics - represents bytes served from backend
type BackendMetrics struct {
bytesReceived uint64
bytesSent uint64
requestStats RequestStats
}
// ObjectLayer implements primitives for object API layer. // ObjectLayer implements primitives for object API layer.
type ObjectLayer interface { type ObjectLayer interface {
SetDriveCount() int // Only implemented by erasure layer SetDriveCount() int // Only implemented by erasure layer
@ -143,7 +150,7 @@ type ObjectLayer interface {
IsCompressionSupported() bool IsCompressionSupported() bool
// Backend related metrics // Backend related metrics
GetMetrics(ctx context.Context) (*Metrics, error) GetMetrics(ctx context.Context) (*BackendMetrics, error)
// Returns health of the backend // Returns health of the backend
Health(ctx context.Context, opts HealthOptions) HealthResult Health(ctx context.Context, opts HealthOptions) HealthResult

@ -749,7 +749,7 @@ func (client *peerRESTClient) doListen(listenCh chan interface{}, doneCh <-chan
dec := gob.NewDecoder(respBody) dec := gob.NewDecoder(respBody)
for { for {
var ev event.Event var ev event.Event
if err = dec.Decode(&ev); err != nil { if err := dec.Decode(&ev); err != nil {
return return
} }
if len(ev.EventVersion) > 0 { if len(ev.EventVersion) > 0 {
@ -906,3 +906,24 @@ func (client *peerRESTClient) MonitorBandwidth(ctx context.Context, buckets []st
err = dec.Decode(&bandwidthReport) err = dec.Decode(&bandwidthReport)
return &bandwidthReport, err return &bandwidthReport, err
} }
func (client *peerRESTClient) GetPeerMetrics(ctx context.Context) (<-chan Metric, error) {
respBody, err := client.callWithContext(ctx, peerRESTMethodGetPeerMetrics, nil, nil, -1)
if err != nil {
return nil, err
}
dec := gob.NewDecoder(respBody)
ch := make(chan Metric)
go func(ch chan<- Metric) {
for {
var metric Metric
if err := dec.Decode(&metric); err != nil {
http.DrainBody(respBody)
close(ch)
return
}
ch <- metric
}
}(ch)
return ch, nil
}

@ -58,6 +58,7 @@ const (
peerRESTMethodGetBandwidth = "/bandwidth" peerRESTMethodGetBandwidth = "/bandwidth"
peerRESTMethodGetMetacacheListing = "/getmetacache" peerRESTMethodGetMetacacheListing = "/getmetacache"
peerRESTMethodUpdateMetacacheListing = "/updatemetacache" peerRESTMethodUpdateMetacacheListing = "/updatemetacache"
peerRESTMethodGetPeerMetrics = "/peermetrics"
) )
const ( const (

@ -801,7 +801,7 @@ func (s *peerRESTServer) SignalServiceHandler(w http.ResponseWriter, r *http.Req
// ListenHandler sends http trace messages back to peer rest client // ListenHandler sends http trace messages back to peer rest client
func (s *peerRESTServer) ListenHandler(w http.ResponseWriter, r *http.Request) { func (s *peerRESTServer) ListenHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) { if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request")) s.writeErrorResponse(w, errors.New("invalid request"))
return return
} }
@ -809,7 +809,7 @@ func (s *peerRESTServer) ListenHandler(w http.ResponseWriter, r *http.Request) {
var prefix string var prefix string
if len(values[peerRESTListenPrefix]) > 1 { if len(values[peerRESTListenPrefix]) > 1 {
s.writeErrorResponse(w, errors.New("Invalid request")) s.writeErrorResponse(w, errors.New("invalid request"))
return return
} }
@ -824,7 +824,7 @@ func (s *peerRESTServer) ListenHandler(w http.ResponseWriter, r *http.Request) {
var suffix string var suffix string
if len(values[peerRESTListenSuffix]) > 1 { if len(values[peerRESTListenSuffix]) > 1 {
s.writeErrorResponse(w, errors.New("Invalid request")) s.writeErrorResponse(w, errors.New("invalid request"))
return return
} }
@ -1004,7 +1004,7 @@ func (s *peerRESTServer) IsValid(w http.ResponseWriter, r *http.Request) bool {
// GetBandwidth gets the bandwidth for the buckets requested. // GetBandwidth gets the bandwidth for the buckets requested.
func (s *peerRESTServer) GetBandwidth(w http.ResponseWriter, r *http.Request) { func (s *peerRESTServer) GetBandwidth(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) { if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request")) s.writeErrorResponse(w, errors.New("invalid request"))
return return
} }
bucketsString := r.URL.Query().Get("buckets") bucketsString := r.URL.Query().Get("buckets")
@ -1025,6 +1025,29 @@ func (s *peerRESTServer) GetBandwidth(w http.ResponseWriter, r *http.Request) {
w.(http.Flusher).Flush() w.(http.Flusher).Flush()
} }
// GetPeerMetrics gets the metrics to be federated across peers.
func (s *peerRESTServer) GetPeerMetrics(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("invalid request"))
}
w.WriteHeader(http.StatusOK)
w.(http.Flusher).Flush()
doneCh := make(chan struct{})
defer close(doneCh)
enc := gob.NewEncoder(w)
ch := ReportMetrics(r.Context(), GetGeneratorsForPeer)
for m := range ch {
if err := enc.Encode(m); err != nil {
s.writeErrorResponse(w, errors.New("Encoding metric failed: "+err.Error()))
return
}
}
w.(http.Flusher).Flush()
}
// registerPeerRESTHandlers - register peer rest router. // registerPeerRESTHandlers - register peer rest router.
func registerPeerRESTHandlers(router *mux.Router) { func registerPeerRESTHandlers(router *mux.Router) {
server := &peerRESTServer{} server := &peerRESTServer{}
@ -1064,4 +1087,5 @@ func registerPeerRESTHandlers(router *mux.Router) {
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetBandwidth).HandlerFunc(httpTraceHdrs(server.GetBandwidth)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetBandwidth).HandlerFunc(httpTraceHdrs(server.GetBandwidth))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetMetacacheListing).HandlerFunc(httpTraceHdrs(server.GetMetacacheListingHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetMetacacheListing).HandlerFunc(httpTraceHdrs(server.GetMetacacheListingHandler))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodUpdateMetacacheListing).HandlerFunc(httpTraceHdrs(server.UpdateMetacacheListingHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodUpdateMetacacheListing).HandlerFunc(httpTraceHdrs(server.UpdateMetacacheListingHandler))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetPeerMetrics).HandlerFunc(httpTraceHdrs(server.GetPeerMetrics))
} }

@ -13,8 +13,15 @@ Read more on how to use these endpoints in [MinIO healthcheck guide](https://git
### Prometheus Probe ### Prometheus Probe
MinIO server exposes Prometheus compatible data on a single endpoint. By default, the endpoint is authenticated. MinIO allows reading metrics for the entire cluster from any single node. The cluster wide metrics can be read at
`<Address for MinIO Service>/minio/prometheus/cluster`.
- Prometheus data available at `/minio/prometheus/metrics` The additional node specific metrics which include go metrics or process metrics are exposed at
`<Address for MinIO Node>/minio/prometheus/node`.
To use this endpoint, setup Prometheus to scrape data from this endpoint. Read more on how to configure and use Prometheus to monitor MinIO server in [How to monitor MinIO server with Prometheus](https://github.com/minio/minio/blob/master/docs/metrics/prometheus/README.md). To use this endpoint, setup Prometheus to scrape data from this endpoint. Read more on how to configure and use Prometheus to monitor MinIO server in [How to monitor MinIO server with Prometheus](https://github.com/minio/minio/blob/master/docs/metrics/prometheus/README.md).
**Deprecated metrics monitoring**
- Prometheus' data available at `/minio/prometheus/metrics` is deprecated

@ -1,8 +1,13 @@
# How to monitor MinIO server with Prometheus [![Slack](https://slack.min.io/slack?type=svg)](https://slack.min.io) # How to monitor MinIO server with Prometheus [![Slack](https://slack.min.io/slack?type=svg)](https://slack.min.io)
[Prometheus](https://prometheus.io) is a cloud-native monitoring platform, built originally at SoundCloud. Prometheus offers a multi-dimensional data model with time series data identified by metric name and key/value pairs. The data collection happens via a pull model over HTTP/HTTPS. Targets to pull data from are discovered via service discovery or static configuration. [Prometheus](https://prometheus.io) is a cloud-native monitoring platform.
MinIO exports Prometheus compatible data by default as an authorized endpoint at `/minio/prometheus/metrics`. Users looking to monitor their MinIO instances can point Prometheus configuration to scrape data from this endpoint. Prometheus offers a multi-dimensional data model with time series data identified by metric name and key/value pairs.
The data collection happens via a pull model over HTTP/HTTPS.
MinIO exports Prometheus compatible data by default as an authorized endpoint at `/minio/prometheus/metrics/cluster`.
Users looking to monitor their MinIO instances can point Prometheus configuration to scrape data from this endpoint.
This document explains how to setup Prometheus and configure it to scrape data from MinIO servers. This document explains how to setup Prometheus and configure it to scrape data from MinIO servers.
@ -20,7 +25,8 @@ This document explains how to setup Prometheus and configure it to scrape data f
- [List of metrics exposed by MinIO](#list-of-metrics-exposed-by-minio) - [List of metrics exposed by MinIO](#list-of-metrics-exposed-by-minio)
## Prerequisites ## Prerequisites
To get started with MinIO, refer [MinIO QuickStart Document](https://docs.min.io/docs/minio-quickstart-guide). Follow below steps to get started with MinIO monitoring using Prometheus. To get started with MinIO, refer [MinIO QuickStart Document](https://docs.min.io/docs/minio-quickstart-guide).
Follow below steps to get started with MinIO monitoring using Prometheus.
### 1. Download Prometheus ### 1. Download Prometheus
@ -68,7 +74,7 @@ The command will generate the `scrape_configs` section of the prometheus.yml as
scrape_configs: scrape_configs:
- job_name: minio-job - job_name: minio-job
bearer_token: <secret> bearer_token: <secret>
metrics_path: /minio/prometheus/metrics metrics_path: /minio/v2/metrics/cluster
scheme: http scheme: http
static_configs: static_configs:
- targets: ['localhost:9000'] - targets: ['localhost:9000']
@ -77,16 +83,26 @@ scrape_configs:
#### 3.2 Public Prometheus config #### 3.2 Public Prometheus config
If Prometheus endpoint authentication type is set to `public`. Following prometheus config is sufficient to start scraping metrics data from MinIO. If Prometheus endpoint authentication type is set to `public`. Following prometheus config is sufficient to start scraping metrics data from MinIO.
This can be collected from any server once per collection.
##### Cluster
```yaml ```yaml
scrape_configs: scrape_configs:
- job_name: minio-job - job_name: minio-job
metrics_path: /minio/prometheus/metrics metrics_path: /minio/v2/metrics/cluster
scheme: http
static_configs:
- targets: ['localhost:9000']
```
##### Node
Optionally you can also collect per node metrics. This needs to be done on a per server instance.
```yaml
scrape_configs:
- job_name: minio-job
metrics_path: /minio/v2/metrics/node
scheme: http scheme: http
static_configs: static_configs:
- targets: ['localhost:9000'] - targets: ['localhost:9000']
``` ```
### 4. Update `scrape_configs` section in prometheus.yml ### 4. Update `scrape_configs` section in prometheus.yml
To authorize every scrape request, copy and paste the generated `scrape_configs` section in the prometheus.yml and restart the Prometheus service. To authorize every scrape request, copy and paste the generated `scrape_configs` section in the prometheus.yml and restart the Prometheus service.
@ -103,172 +119,16 @@ Here `prometheus.yml` is the name of configuration file. You can now see MinIO m
### 6. Configure Grafana ### 6. Configure Grafana
After Prometheus is configured, you can use Grafana to visualize MinIO metrics. Refer the [document here to setup Grafana with MinIO prometheus metrics](https://github.com/minio/minio/blob/master/docs/metrics/prometheus/grafana/README.md). After Prometheus is configured, you can use Grafana to visualize MinIO metrics.
Refer the [document here to setup Grafana with MinIO prometheus metrics](https://github.com/minio/minio/blob/master/docs/metrics/prometheus/grafana/README.md).
## List of metrics exposed by MinIO ## List of metrics exposed by MinIO
MinIO server exposes the following metrics on `/minio/prometheus/metrics` endpoint. All of these can be accessed via Prometheus dashboard. The full list of exposed metrics along with their definition is available in the demo server at https://play.min.io:9000/minio/prometheus/metrics MinIO server exposes the following metrics on `/minio/prometheus/metrics/cluster` endpoint.
All of these can be accessed via Prometheus dashboard.
These are the new set of metrics which will be in effect after `RELEASE.2019-10-16*`. Some of the key changes in this update are listed below. A sample list of exposed metrics along with their definition is available in the demo server at
- Metrics are bound the respective nodes and is not cluster-wide. Each and every node in a cluster will expose its own metrics. `curl https://play.min.io:9000/minio/prometheus/metrics/cluster`
- Additional metrics to cover the s3 and internode traffic statistics were added.
- Metrics that records the http statistics and latencies are labeled to their respective APIs (putobject,getobject etc).
- Disk usage metrics are distributed and labeled to the respective disk paths.
For more details, please check the `Migration guide for the new set of metrics`.
The list of metrics and its definition are as follows. (NOTE: instance here is one MinIO node)
> NOTES:
> 1. Instance here is one MinIO node.
> 2. `s3 requests` exclude internode requests.
### Default set of information
| name | description |
|:------------|:--------------------------------|
| `go_` | all standard go runtime metrics |
| `process_` | all process level metrics |
| `promhttp_` | all prometheus scrape metrics |
### MinIO node specific information
| name | description |
|:---------------------------|:-------------------------------------------------------------------------------|
| `minio_version_info` | Current MinIO version with its commit-id |
| `minio_disks_offline` | Total number of offline disks on current MinIO instance |
| `minio_disks_total` | Total number of disks on current MinIO instance |
### Disk metrics are labeled by 'disk' which indentifies each disk
| name | description |
|:---------------------------|:-------------------------------------------------------------------------------|
| `disk_storage_total` | Total size of the disk |
| `disk_storage_used` | Total disk space used per disk |
| `disk_storage_available` | Total available disk space per disk |
### S3 API metrics are labeled by 'api' which identifies different S3 API requests
| name | description |
|:---------------------------|:-------------------------------------------------------------------------------|
| `s3_requests_total` | Total number of s3 requests in current MinIO instance |
| `s3_errors_total` | Total number of errors in s3 requests in current MinIO instance |
| `s3_requests_current` | Total number of active s3 requests in current MinIO instance |
| `s3_rx_bytes_total` | Total number of s3 bytes received by current MinIO server instance |
| `s3_tx_bytes_total` | Total number of s3 bytes sent by current MinIO server instance |
| `s3_ttfb_seconds` | Histogram that holds the latency information of the requests |
#### Internode metrics only available in a distributed setup
| name | description |
|:---------------------------|:-------------------------------------------------------------------------------|
| `internode_rx_bytes_total` | Total number of internode bytes received by current MinIO server instance |
| `internode_tx_bytes_total` | Total number of bytes sent to the other nodes by current MinIO server instance |
Apart from above metrics, MinIO also exposes below mode specific metrics
### Bucket usage specific metrics
All metrics are labeled by `bucket`, each metric is displayed per bucket. `buckets_objects_histogram` is additionally labeled by `object_size` string which is represented by any of the following values
- *LESS_THAN_1024_B*
- *BETWEEN_1024_B_AND_1_MB*
- *BETWEEN_1_MB_AND_10_MB*
- *BETWEEN_10_MB_AND_64_MB*
- *BETWEEN_64_MB_AND_128_MB*
- *BETWEEN_128_MB_AND_512_MB*
- *GREATER_THAN_512_MB*
Units defintions:
- 1 MB = 1024 KB
- 1 KB = 1024 B
| name | description |
|:------------------------------------|:----------------------------------------------------|
| `bucket_usage_size` | Total size of the bucket |
| `bucket_objects_count` | Total number of objects in a bucket |
| `bucket_objects_histogram` | Total number of objects filtered by different sizes |
| `bucket_replication_pending_size` | Total capacity not replicated |
| `bucket_replication_failed_size` | Total capacity failed to replicate at least once |
| `bucket_replication_successful_size`| Total capacity successfully replicated |
| `bucket_replication_received_size` | Total capacity received as replicated objects |
### Cache specific metrics
MinIO Gateway instances enabled with Disk-Caching expose caching related metrics.
#### Global cache metrics
| name | description |
|:---------------------|:--------------------------------------------------|
| `cache_hits_total` | Total number of cache hits |
| `cache_misses_total` | Total number of cache misses |
| `cache_data_served` | Total number of bytes served from cache |
#### Per disk cache metrics
| name | description |
|:-----------------------|:---------------------------------------------------------------------------------|
| `cache_usage_size` | Total cache usage in bytes |
| `cache_total_capacity` | Total size of cache disk |
| `cache_usage_percent` | Total percentage cache usage |
| `cache_usage_state` | Indicates cache usage is high or low, relative to current cache 'quota' settings |
`cache_usage_state` holds only two states
- '1' indicates high disk usage
- '0' indicates low disk usage
### Gateway specific metrics
MinIO Gateway instance exposes metrics related to Gateway communication with the cloud backend (S3, Azure & GCS Gateway).
`<gateway_type>` changes based on the gateway in use can be 's3', 'gcs' or 'azure'. Other metrics are labeled with `method` that identifies HTTP GET, HEAD, PUT and POST requests to the backend.
| name | description |
|:----------------------------------------|:---------------------------------------------------------------------------|
| `gateway_<gateway_type>_requests` | Total number of requests made to the gateway backend |
| `gateway_<gateway_type>_bytes_sent` | Total number of bytes sent to cloud backend (in PUT & POST Requests) |
| `gateway_<gateway_type>_bytes_received` | Total number of bytes received from cloud backend (in GET & HEAD Requests) |
Note that this is currently only support for Azure, S3 and GCS Gateway.
### MinIO self-healing metrics - `self_heal_*`
MinIO exposes self-healing related metrics for erasure-code deployments _only_. These metrics are _not_ available on Gateway or Single Node, Single Drive deployments. Note that these metrics will be exposed _only_ when there is a relevant event happening on MinIO server.
| name | description |
|:-------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `self_heal_time_since_last_activity` | Time elapsed since last self-healing related activity |
| `self_heal_objects_scanned` | Number of objects scanned by self-healing thread in its current run. This will reset when a fresh self-healing run starts. This is labeled with the object type scanned |
| `self_heal_objects_healed` | Number of objects healing by self-healing thread in its current run. This will reset when a fresh self-healing run starts. This is labeled with the object type scanned |
| `self_heal_objects_heal_failed` | Number of objects for which self-healing failed in its current run. This will reset when a fresh self-healing run starts. This is labeled with disk status and its endpoint |
## Migration guide for the new set of metrics
This migration guide applies for older releases or any releases before `RELEASE.2019-10-23*`
### MinIO disk level metrics - `disk_*`
The migrations include
- `minio_total_disks` to `minio_disks_total`
- `minio_offline_disks` to `minio_disks_offline`
### MinIO disk level metrics - `disk_storage_*`
These metrics have one label.
- `disk`: Holds the disk path
The migrations include
- `minio_disk_storage_used_bytes` to `disk_storage_used`
- `minio_disk_storage_available_bytes` to `disk_storage_available`
- `minio_disk_storage_total_bytes` to `disk_storage_total`
### MinIO network level metrics
These metrics are detailed to cover the s3 and internode network statistics.
The migrations include
- `minio_network_sent_bytes_total` to `s3_tx_bytes_total` and `internode_tx_bytes_total`
- `minio_network_received_bytes_total` to `s3_rx_bytes_total` and `internode_rx_bytes_total`
Some of the additional metrics added were ### List of metrics reported
- `s3_requests_total` [The list of metrics reported can be here](https://github.com/minio/minio/blob/master/docs/metrics/prometheus/list.md)
- `s3_errors_total`
- `s3_ttfb_seconds`

@ -0,0 +1,47 @@
# List of metrics reported cluster wide
Each metric includes a label for the server that calculated the metric.
Each metric has a label for the server that generated the metric.
These metrics can be from any MinIO server once per collection.
| Name | Description |
|:-----------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------|
|`minio_bucket_objects_size_distribution` |Distribution of object sizes in the bucket, includes label for the bucket name. |
|`minio_bucket_replication_failed_bytes` |Total number of bytes failed at least once to replicate. |
|`minio_bucket_replication_pending_bytes` |Total bytes pending to replicate. |
|`minio_bucket_replication_received_bytes` |Total number of bytes replicated to this bucket from another source bucket. |
|`minio_bucket_replication_sent_bytes` |Total number of bytes replicated to the target bucket. |
|`minio_bucket_usage_object_total` |Total number of objects |
|`minio_bucket_usage_total_bytes` |Total bucket size in bytes |
|`minio_cluster_capacity_raw_free_bytes` |Total free capacity online in the cluster. |
|`minio_cluster_capacity_raw_total_bytes` |Total capacity online in the cluster. |
|`minio_cluster_capacity_usable_free_bytes` |Total free usable capacity online in the cluster. |
|`minio_cluster_capacity_usable_total_bytes` |Total usable capacity online in the cluster. |
|`minio_cluster_disk_offline_total` |Total disks offline. |
|`minio_cluster_disk_online_total` |Total disks online. |
|`minio_cluster_nodes_offline_total` |Total number of MinIO nodes offline. |
|`minio_cluster_nodes_online_total` |Total number of MinIO nodes online. |
|`minio_heal_objects_error_total` |Objects for which healing failed in current self healing run |
|`minio_heal_objects_heal_total` |Objects healed in current self healing run |
|`minio_heal_objects_total` |Objects scanned in current self healing run |
|`minio_heal_time_last_activity_nano_seconds` |Time elapsed (in nano seconds) since last self healing activity. This is set to -1 until initial self heal activity |
|`minio_inter_node_traffic_received_bytes` |Total number of bytes received from other peer nodes. |
|`minio_inter_node_traffic_sent_bytes` |Total number of bytes sent to the other peer nodes. |
|`minio_node_disk_free_bytes` |Total storage available on a disk. |
|`minio_node_disk_total_bytes` |Total storage on a disk. |
|`minio_node_disk_used_bytes` |Total storage used on a disk. |
|`minio_s3_requests_error_total` |Total number S3 requests with errors |
|`minio_s3_requests_inflight_total` |Total number of S3 requests currently in flight. |
|`minio_s3_requests_total` |Total number S3 requests |
|`minio_s3_time_ttbf_seconds_distribution` |Distribution of the time to first byte across API calls. |
|`minio_s3_traffic_received_bytes` |Total number of s3 bytes received. |
|`minio_s3_traffic_sent_bytes` |Total number of s3 bytes sent |
|`minio_cache_hits_total` |Total number of disk cache hits |
|`minio_cache_missed_total` |Total number of disk cache misses |
|`minio_cache_sent_bytes` |Total number of bytes served from cache |
|`minio_cache_total_bytes` |Total size of cache disk in bytes |
|`minio_cache_usage_info` |Total percentage cache usage, value of 1 indicates high and 0 low, label level is set as well |
|`minio_cache_used_bytes` |Current cache usage in bytes |
|`minio_software_commit_info` |Git commit hash for the MinIO release. |
|`minio_software_version_info` |MinIO Release tag for the server |

@ -65,6 +65,9 @@ require (
github.com/pierrec/lz4 v2.5.2+incompatible github.com/pierrec/lz4 v2.5.2+incompatible
github.com/pkg/errors v0.9.1 github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.8.0 github.com/prometheus/client_golang v1.8.0
github.com/quasilyte/go-ruleguard v0.2.1 // indirect
github.com/quasilyte/go-ruleguard/dsl/fluent v0.0.0-20201222093424-5d7e62a465d3 // indirect
github.com/prometheus/client_model v0.2.0
github.com/rjeczalik/notify v0.9.2 github.com/rjeczalik/notify v0.9.2
github.com/rs/cors v1.7.0 github.com/rs/cors v1.7.0
github.com/secure-io/sio-go v0.3.0 github.com/secure-io/sio-go v0.3.0

@ -529,6 +529,9 @@ github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+Gx
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.2.0 h1:wH4vA7pcjKuZzjF7lM8awk4fnuJO6idemZXoKnULUx4= github.com/prometheus/procfs v0.2.0 h1:wH4vA7pcjKuZzjF7lM8awk4fnuJO6idemZXoKnULUx4=
github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/quasilyte/go-ruleguard v0.2.1 h1:56eRm0daAyny9UhJnmtJW/UyLZQusukBAB8oT8AHKHo=
github.com/quasilyte/go-ruleguard v0.2.1/go.mod h1:hN2rVc/uS4bQhQKTio2XaSJSafJwqBUWWwtssT3cQmc=
github.com/quasilyte/go-ruleguard/dsl/fluent v0.0.0-20201222093424-5d7e62a465d3/go.mod h1:P7JlQWFT7jDcFZMtUPQbtGzzzxva3rBn6oIF+LPwFcM=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 h1:MkV+77GLUNo5oJ0jf870itWm3D0Sjh7+Za9gazKc5LQ= github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 h1:MkV+77GLUNo5oJ0jf870itWm3D0Sjh7+Za9gazKc5LQ=
@ -619,6 +622,7 @@ github.com/xdg/stringprep v1.0.0 h1:d9X0esnoa3dFsV0FG35rAT0RIhYFlPq7MiP+DW89La0=
github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk= go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
@ -711,6 +715,7 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -782,6 +787,7 @@ golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtn
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191216052735-49a3e744a425/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20191216052735-49a3e744a425/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200812195022-5ae4c3c160a0/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20201105001634-bc3cf281b174/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201105001634-bc3cf281b174/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210115202250-e0d201561e39 h1:BTs2GMGSMWpgtCpv1CE7vkJTv7XcHdcLLnAMu7UbgTY= golang.org/x/tools v0.0.0-20210115202250-e0d201561e39 h1:BTs2GMGSMWpgtCpv1CE7vkJTv7XcHdcLLnAMu7UbgTY=
golang.org/x/tools v0.0.0-20210115202250-e0d201561e39/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210115202250-e0d201561e39/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=

@ -158,8 +158,8 @@ type PerfInfo struct {
// ServerDrivesInfo - Drive info about all drives in a single MinIO node // ServerDrivesInfo - Drive info about all drives in a single MinIO node
type ServerDrivesInfo struct { type ServerDrivesInfo struct {
Addr string `json:"addr"` Addr string `json:"addr"`
Serial []DrivePerfInfo `json:"serial,omitempty"` Serial []DrivePerfInfo `json:"serial,omitempty"` // Drive perf info collected one drive at a time
Parallel []DrivePerfInfo `json:"parallel,omitempty"` Parallel []DrivePerfInfo `json:"parallel,omitempty"` // Drive perf info collected in parallel
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
} }
@ -316,3 +316,27 @@ func (adm *AdminClient) ServerHealthInfo(ctx context.Context, healthDataTypes []
return respChan return respChan
} }
// GetTotalCapacity gets the total capacity a server holds.
func (s *ServerDiskHwInfo) GetTotalCapacity() (capacity uint64) {
for _, u := range s.Usage {
capacity += u.Total
}
return
}
// GetTotalFreeCapacity gets the total capacity that is free.
func (s *ServerDiskHwInfo) GetTotalFreeCapacity() (capacity uint64) {
for _, u := range s.Usage {
capacity += u.Free
}
return
}
// GetTotalUsedCapacity gets the total capacity used.
func (s *ServerDiskHwInfo) GetTotalUsedCapacity() (capacity uint64) {
for _, u := range s.Usage {
capacity += u.Used
}
return
}

Loading…
Cancel
Save