From 53aaa5d2a5f0eb83c840ba63592d00eda671a2a1 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 27 May 2020 06:45:43 -0700 Subject: [PATCH] Export bucket usage counts as part of bucket metrics (#9710) Bonus fixes in quota enforcement to use the new datastructure and use timedValue to cache a value/reload automatically avoids one less global variable. --- cmd/admin-handlers-users.go | 4 +- cmd/admin-handlers.go | 2 +- cmd/bucket-quota.go | 94 ++++++++++++++--------- cmd/data-usage-cache.go | 33 ++++---- cmd/data-usage.go | 16 ++++ cmd/globals.go | 1 - cmd/metrics.go | 64 +++++++++++++++- cmd/object-api-datatypes.go | 47 ++++++++---- docs/metrics/prometheus/README.md | 123 ++++++++++++++++++++---------- 9 files changed, 275 insertions(+), 109 deletions(-) diff --git a/cmd/admin-handlers-users.go b/cmd/admin-handlers-users.go index 39671cc7b..3aff70741 100644 --- a/cmd/admin-handlers-users.go +++ b/cmd/admin-handlers-users.go @@ -654,8 +654,8 @@ func (a adminAPIHandlers) AccountUsageInfoHandler(w http.ResponseWriter, r *http if rd || wr { var size uint64 // Fetch the data usage of the current bucket - if !dataUsageInfo.LastUpdate.IsZero() && len(dataUsageInfo.BucketsSizes) > 0 { - size = dataUsageInfo.BucketsSizes[bucket.Name] + if !dataUsageInfo.LastUpdate.IsZero() { + size = dataUsageInfo.BucketsUsage[bucket.Name].Size } acctInfo.Buckets = append(acctInfo.Buckets, madmin.BucketUsageInfo{ Name: bucket.Name, diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index fefd0eed5..0d1d78a1b 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -1329,7 +1329,7 @@ func (a adminAPIHandlers) ServerInfoHandler(w http.ResponseWriter, r *http.Reque dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI) if err == nil { buckets = madmin.Buckets{Count: dataUsageInfo.BucketsCount} - objects = madmin.Objects{Count: dataUsageInfo.ObjectsCount} + objects = madmin.Objects{Count: dataUsageInfo.ObjectsTotalCount} usage = madmin.Usage{Size: dataUsageInfo.ObjectsTotalSize} } diff --git a/cmd/bucket-quota.go b/cmd/bucket-quota.go index e15e6e750..4fdcf6d3d 100644 --- a/cmd/bucket-quota.go +++ b/cmd/bucket-quota.go @@ -20,7 +20,6 @@ import ( "context" "encoding/json" "fmt" - "sync" "time" "github.com/minio/minio/cmd/config" @@ -31,7 +30,9 @@ import ( ) // BucketQuotaSys - map of bucket and quota configuration. -type BucketQuotaSys struct{} +type BucketQuotaSys struct { + bucketStorageCache timedValue +} // Get - Get quota configuration. func (sys *BucketQuotaSys) Get(bucketName string) (*madmin.BucketQuota, error) { @@ -63,49 +64,60 @@ func parseBucketQuota(bucket string, data []byte) (quotaCfg *madmin.BucketQuota, return } -type bucketStorageCache struct { - bucketsSizes map[string]uint64 - lastUpdate time.Time - mu sync.Mutex -} +func (sys *BucketQuotaSys) check(ctx context.Context, bucket string, size int64) error { + objAPI := newObjectLayerWithoutSafeModeFn() + if objAPI == nil { + return errServerNotInitialized + } + + q, err := sys.Get(bucket) + if err != nil { + return nil + } + + if q.Type == madmin.FIFOQuota { + return nil + } -func (b *bucketStorageCache) check(ctx context.Context, q *madmin.BucketQuota, bucket string, size int64) error { if q.Quota == 0 { // No quota set return quickly. return nil } - b.mu.Lock() - defer b.mu.Unlock() - if time.Since(b.lastUpdate) > 10*time.Second { - dui, err := loadDataUsageFromBackend(ctx, newObjectLayerWithoutSafeModeFn()) - if err != nil { - return err + sys.bucketStorageCache.Once.Do(func() { + sys.bucketStorageCache.TTL = 10 * time.Second + sys.bucketStorageCache.Update = func() (interface{}, error) { + return loadDataUsageFromBackend(ctx, objAPI) } - b.lastUpdate = time.Now() - b.bucketsSizes = dui.BucketsSizes + }) + + v, err := sys.bucketStorageCache.Get() + if err != nil { + return err + } + + dui := v.(DataUsageInfo) + + bui, ok := dui.BucketsUsage[bucket] + if !ok { + // bucket not found, cannot enforce quota + // call will fail anyways later. + return nil } - currUsage := b.bucketsSizes[bucket] - if (currUsage + uint64(size)) > q.Quota { + + if (bui.Size + uint64(size)) > q.Quota { return BucketQuotaExceeded{Bucket: bucket} } + return nil } + func enforceBucketQuota(ctx context.Context, bucket string, size int64) error { if size < 0 { return nil } - q, err := globalBucketQuotaSys.Get(bucket) - if err != nil { - return nil - } - - if q.Type == madmin.FIFOQuota { - return nil - } - - return globalBucketStorageCache.check(ctx, q, bucket, size) + return globalBucketQuotaSys.check(ctx, bucket, size) } const ( @@ -139,29 +151,40 @@ func enforceFIFOQuota(ctx context.Context, objectAPI ObjectLayer) error { if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOff { return nil } + buckets, err := objectAPI.ListBuckets(ctx) if err != nil { return err } + + dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI) + if err != nil { + return err + } + for _, binfo := range buckets { bucket := binfo.Name + + bui, ok := dataUsageInfo.BucketsUsage[bucket] + if !ok { + // bucket doesn't exist anymore, or we + // do not have any information to proceed. + continue + } + // Check if the current bucket has quota restrictions, if not skip it cfg, err := globalBucketQuotaSys.Get(bucket) if err != nil { continue } + if cfg.Type != madmin.FIFOQuota { continue } - dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI) - if err != nil { - return err - } - var toFree uint64 - if dataUsageInfo.BucketsSizes[bucket] > cfg.Quota && cfg.Quota > 0 { - toFree = dataUsageInfo.BucketsSizes[bucket] - cfg.Quota + if bui.Size > cfg.Quota && cfg.Quota > 0 { + toFree = bui.Size - cfg.Quota } if toFree == 0 { @@ -175,6 +198,7 @@ func enforceFIFOQuota(ctx context.Context, objectAPI ObjectLayer) error { if err := objectAPI.Walk(ctx, bucket, "", objInfoCh); err != nil { return err } + // reuse the fileScorer used by disk cache to score entries by // ModTime to find the oldest objects in bucket to delete. In // the context of bucket quota enforcement - number of hits are diff --git a/cmd/data-usage-cache.go b/cmd/data-usage-cache.go index 679f41bde..832353e36 100644 --- a/cmd/data-usage-cache.go +++ b/cmd/data-usage-cache.go @@ -116,16 +116,16 @@ func (d *dataUsageCache) find(path string) *dataUsageEntry { func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo { e := d.find(path) if e == nil { - return DataUsageInfo{LastUpdate: UTCNow()} + // No entry found, return empty. + return DataUsageInfo{} } flat := d.flatten(*e) return DataUsageInfo{ - LastUpdate: d.Info.LastUpdate, - ObjectsCount: flat.Objects, - ObjectsTotalSize: uint64(flat.Size), - ObjectsSizesHistogram: flat.ObjSizes.asMap(), - BucketsCount: uint64(len(e.Children)), - BucketsSizes: d.pathSizes(buckets), + LastUpdate: d.Info.LastUpdate, + ObjectsTotalCount: flat.Objects, + ObjectsTotalSize: uint64(flat.Size), + BucketsCount: uint64(len(e.Children)), + BucketsUsage: d.bucketsUsageInfo(buckets), } } @@ -232,25 +232,30 @@ func (h *sizeHistogram) add(size int64) { } } -// asMap returns the map as a map[string]uint64. -func (h *sizeHistogram) asMap() map[string]uint64 { - res := make(map[string]uint64, 7) +// toMap returns the map to a map[string]uint64. +func (h *sizeHistogram) toMap() map[string]uint64 { + res := make(map[string]uint64, dataUsageBucketLen) for i, count := range h { res[ObjectsHistogramIntervals[i].name] = count } return res } -// pathSizes returns the path sizes as a map. -func (d *dataUsageCache) pathSizes(buckets []BucketInfo) map[string]uint64 { - var dst = make(map[string]uint64, len(buckets)) +// bucketsUsageInfo returns the buckets usage info as a map, with +// key as bucket name +func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]BucketUsageInfo { + var dst = make(map[string]BucketUsageInfo, len(buckets)) for _, bucket := range buckets { e := d.find(bucket.Name) if e == nil { continue } flat := d.flatten(*e) - dst[bucket.Name] = uint64(flat.Size) + dst[bucket.Name] = BucketUsageInfo{ + Size: uint64(flat.Size), + ObjectsCount: uint64(flat.Objects), + ObjectSizesHistogram: flat.ObjSizes.toMap(), + } } return dst } diff --git a/cmd/data-usage.go b/cmd/data-usage.go index dcbf85dad..6b0e753ca 100644 --- a/cmd/data-usage.go +++ b/cmd/data-usage.go @@ -154,6 +154,22 @@ func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsag return DataUsageInfo{}, err } + // For forward compatibility reasons, we need to add this code. + if len(dataUsageInfo.BucketsUsage) == 0 { + dataUsageInfo.BucketsUsage = make(map[string]BucketUsageInfo, len(dataUsageInfo.BucketSizes)) + for bucket, size := range dataUsageInfo.BucketSizes { + dataUsageInfo.BucketsUsage[bucket] = BucketUsageInfo{Size: size} + } + } + + // For backward compatibility reasons, we need to add this code. + if len(dataUsageInfo.BucketSizes) == 0 { + dataUsageInfo.BucketSizes = make(map[string]uint64, len(dataUsageInfo.BucketsUsage)) + for bucket, bui := range dataUsageInfo.BucketsUsage { + dataUsageInfo.BucketSizes[bucket] = bui.Size + } + } + return dataUsageInfo, nil } diff --git a/cmd/globals.go b/cmd/globals.go index c9d6a390c..9ebf212f4 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -215,7 +215,6 @@ var ( globalBucketObjectLockSys *BucketObjectLockSys globalBucketQuotaSys *BucketQuotaSys - globalBucketStorageCache bucketStorageCache // Disk cache drives globalCacheConfig cache.Config diff --git a/cmd/metrics.go b/cmd/metrics.go index 6aa0775a5..8284cf953 100644 --- a/cmd/metrics.go +++ b/cmd/metrics.go @@ -1,5 +1,5 @@ /* - * MinIO Cloud Storage, (C) 2018 MinIO, Inc. + * MinIO Cloud Storage, (C) 2018-2020 MinIO, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,9 @@ import ( "sync/atomic" "time" + "github.com/minio/minio/cmd/config" "github.com/minio/minio/cmd/logger" + "github.com/minio/minio/pkg/env" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -84,6 +86,7 @@ func (c *minioCollector) Collect(ch chan<- prometheus.Metric) { minioVersionInfo.WithLabelValues(Version, CommitID).Set(float64(1.0)) storageMetricsPrometheus(ch) + bucketUsageMetricsPrometheus(ch) networkMetricsPrometheus(ch) httpMetricsPrometheus(ch) cacheMetricsPrometheus(ch) @@ -345,6 +348,65 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) { ) } +// Populates prometheus with bucket usage metrics, this metrics +// is only enabled if crawler is enabled. +func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) { + objLayer := newObjectLayerWithoutSafeModeFn() + // Service not initialized yet + if objLayer == nil { + return + } + + // Crawler disabled, nothing to do. + if env.Get(envDataUsageCrawlConf, config.EnableOn) != config.EnableOn { + return + } + + dataUsageInfo, err := loadDataUsageFromBackend(GlobalContext, objLayer) + if err != nil { + return + } + + // data usage has not captured any data yet. + if dataUsageInfo.LastUpdate.IsZero() { + return + } + + for bucket, usageInfo := range dataUsageInfo.BucketsUsage { + // Total space used by bucket + ch <- prometheus.MustNewConstMetric( + prometheus.NewDesc( + prometheus.BuildFQName("bucket", "usage", "size"), + "Total bucket size", + []string{"bucket"}, nil), + prometheus.GaugeValue, + float64(usageInfo.Size), + bucket, + ) + ch <- prometheus.MustNewConstMetric( + prometheus.NewDesc( + prometheus.BuildFQName("bucket", "objects", "count"), + "Total number of objects in a bucket", + []string{"bucket"}, nil), + prometheus.GaugeValue, + float64(usageInfo.ObjectsCount), + bucket, + ) + for k, v := range usageInfo.ObjectSizesHistogram { + ch <- prometheus.MustNewConstMetric( + prometheus.NewDesc( + prometheus.BuildFQName("bucket", "objects", "histogram"), + "Total number of objects of different sizes in a bucket", + []string{"bucket", "object_size"}, nil), + prometheus.GaugeValue, + float64(v), + bucket, + k, + ) + } + } +} + // collects storage metrics for MinIO server in Prometheus specific format // and sends to given channel func storageMetricsPrometheus(ch chan<- prometheus.Metric) { diff --git a/cmd/object-api-datatypes.go b/cmd/object-api-datatypes.go index 14f24ba6d..27d1f2bc9 100644 --- a/cmd/object-api-datatypes.go +++ b/cmd/object-api-datatypes.go @@ -21,6 +21,7 @@ import ( "math" "time" + humanize "github.com/dustin/go-humanize" "github.com/minio/minio/pkg/hash" "github.com/minio/minio/pkg/madmin" ) @@ -86,13 +87,23 @@ const ( // ObjectsHistogramIntervals is the list of all intervals // of object sizes to be included in objects histogram. var ObjectsHistogramIntervals = []objectHistogramInterval{ - {"LESS_THAN_1024_B", -1, 1024 - 1}, - {"BETWEEN_1024_B_AND_1_MB", 1024, 1024*1024 - 1}, - {"BETWEEN_1_MB_AND_10_MB", 1024 * 1024, 1024*1024*10 - 1}, - {"BETWEEN_10_MB_AND_64_MB", 1024 * 1024 * 10, 1024*1024*64 - 1}, - {"BETWEEN_64_MB_AND_128_MB", 1024 * 1024 * 64, 1024*1024*128 - 1}, - {"BETWEEN_128_MB_AND_512_MB", 1024 * 1024 * 128, 1024*1024*512 - 1}, - {"GREATER_THAN_512_MB", 1024 * 1024 * 512, math.MaxInt64}, + {"LESS_THAN_1024_B", 0, humanize.KiByte - 1}, + {"BETWEEN_1024_B_AND_1_MB", humanize.KiByte, humanize.MiByte - 1}, + {"BETWEEN_1_MB_AND_10_MB", humanize.MiByte, humanize.MiByte*10 - 1}, + {"BETWEEN_10_MB_AND_64_MB", humanize.MiByte * 10, humanize.MiByte*64 - 1}, + {"BETWEEN_64_MB_AND_128_MB", humanize.MiByte * 64, humanize.MiByte*128 - 1}, + {"BETWEEN_128_MB_AND_512_MB", humanize.MiByte * 128, humanize.MiByte*512 - 1}, + {"GREATER_THAN_512_MB", humanize.MiByte * 512, math.MaxInt64}, +} + +// BucketUsageInfo - bucket usage info provides +// - total size of the bucket +// - total objects in a bucket +// - object size histogram per bucket +type BucketUsageInfo struct { + Size uint64 `json:"size"` + ObjectsCount uint64 `json:"objectsCount"` + ObjectSizesHistogram map[string]uint64 `json:"objectsSizesHistogram"` } // DataUsageInfo represents data usage stats of the underlying Object API @@ -101,17 +112,23 @@ type DataUsageInfo struct { // This does not indicate a full scan. LastUpdate time.Time `json:"lastUpdate"` - ObjectsCount uint64 `json:"objectsCount"` - // Objects total size - ObjectsTotalSize uint64 `json:"objectsTotalSize"` + // Objects total count across all buckets + ObjectsTotalCount uint64 `json:"objectsCount"` - // ObjectsSizesHistogram contains information on objects across all buckets. - // See ObjectsHistogramIntervals. - ObjectsSizesHistogram map[string]uint64 `json:"objectsSizesHistogram"` + // Objects total size across all buckets + ObjectsTotalSize uint64 `json:"objectsTotalSize"` + // Total number of buckets in this cluster BucketsCount uint64 `json:"bucketsCount"` - // BucketsSizes is "bucket name" -> size. - BucketsSizes map[string]uint64 `json:"bucketsSizes"` + + // Buckets usage info provides following information across all buckets + // - total size of the bucket + // - total objects in a bucket + // - object size histogram per bucket + BucketsUsage map[string]BucketUsageInfo `json:"bucketsUsageInfo"` + + // Deprecated kept here for backward compatibility reasons. + BucketSizes map[string]uint64 `json:"bucketsSizes"` } // BucketInfo - represents bucket metadata. diff --git a/docs/metrics/prometheus/README.md b/docs/metrics/prometheus/README.md index fcbdb4346..5f1af5bcb 100644 --- a/docs/metrics/prometheus/README.md +++ b/docs/metrics/prometheus/README.md @@ -110,7 +110,7 @@ These are the new set of metrics which will be in effect after `RELEASE.2019-10- - Metrics that records the http statistics and latencies are labeled to their respective APIs (putobject,getobject etc). - Disk usage metrics are distributed and labeled to the respective disk paths. -For more details, please check the `Migration guide for the new set of metrics` +For more details, please check the `Migration guide for the new set of metrics`. The list of metrics and its definition are as follows. (NOTE: instance here is one MinIO node) @@ -118,42 +118,83 @@ The list of metrics and its definition are as follows. (NOTE: instance here is o > 1. Instance here is one MinIO node. > 2. `s3 requests` exclude internode requests. -- standard go runtime metrics prefixed by `go_` -- process level metrics prefixed with `process_` -- prometheus scrape metrics prefixed with `promhttp_` - -- `disk_storage_used` : Disk space used by the disk. -- `disk_storage_available`: Available disk space left on the disk. -- `disk_storage_total`: Total disk space on the disk. -- `minio_disks_offline`: Total number of offline disks in current MinIO instance. -- `minio_disks_total`: Total number of disks in current MinIO instance. -- `s3_requests_total`: Total number of s3 requests in current MinIO instance. -- `s3_errors_total`: Total number of errors in s3 requests in current MinIO instance. -- `s3_requests_current`: Total number of active s3 requests in current MinIO instance. -- `internode_rx_bytes_total`: Total number of internode bytes received by current MinIO server instance. -- `internode_tx_bytes_total`: Total number of bytes sent to the other nodes by current MinIO server instance. -- `s3_rx_bytes_total`: Total number of s3 bytes received by current MinIO server instance. -- `s3_tx_bytes_total`: Total number of s3 bytes sent by current MinIO server instance. -- `minio_version_info`: Current MinIO version with commit-id. -- `s3_ttfb_seconds`: Histogram that holds the latency information of the requests. +### Default set of information +| name | description | +|:------------|:--------------------------------| +| `go_` | all standard go runtime metrics | +| `process_` | all process level metrics | +| `promhttp_` | all prometheus scrape metrics | + +### MinIO node specific information +| name | description | +|:---------------------------|:-------------------------------------------------------------------------------| +| `minio_version_info` | Current MinIO version with its commit-id | +| `minio_disks_offline` | Total number of offline disks on current MinIO instance | +| `minio_disks_total` | Total number of disks on current MinIO instance | + +### Disk metrics are labeled by 'disk' which indentifies each disk +| name | description | +|:---------------------------|:-------------------------------------------------------------------------------| +| `disk_storage_total` | Total size of the disk | +| `disk_storage_used` | Total disk space used per disk | +| `disk_storage_available` | Total available disk space per disk | + +### S3 API metrics are labeled by 'api' which identifies different S3 API requests +| name | description | +|:---------------------------|:-------------------------------------------------------------------------------| +| `s3_requests_total` | Total number of s3 requests in current MinIO instance | +| `s3_errors_total` | Total number of errors in s3 requests in current MinIO instance | +| `s3_requests_current` | Total number of active s3 requests in current MinIO instance | +| `s3_rx_bytes_total` | Total number of s3 bytes received by current MinIO server instance | +| `s3_tx_bytes_total` | Total number of s3 bytes sent by current MinIO server instance | +| `s3_ttfb_seconds` | Histogram that holds the latency information of the requests | + +#### Internode metrics only available in a distributed setup +| name | description | +|:---------------------------|:-------------------------------------------------------------------------------| +| `internode_rx_bytes_total` | Total number of internode bytes received by current MinIO server instance | +| `internode_tx_bytes_total` | Total number of bytes sent to the other nodes by current MinIO server instance | Apart from above metrics, MinIO also exposes below mode specific metrics +### Bucket usage specific metrics +All metrics are labeled by `bucket`, each metric is displayed per bucket. `buckets_objects_histogram` is additionally labeled by `object_size` string which is represented by any of the following values + +- *LESS_THAN_1024_B* +- *BETWEEN_1024_B_AND_1_MB* +- *BETWEEN_1_MB_AND_10_MB* +- *BETWEEN_10_MB_AND_64_MB* +- *BETWEEN_64_MB_AND_128_MB* +- *BETWEEN_128_MB_AND_512_MB* +- *GREATER_THAN_512_MB* + +| name | description | +|:---------------------------|:----------------------------------------------------| +| `bucket_usage_size` | Total size of the bucket | +| `bucket_objects_count` | Total number of objects in a bucket | +| `bucket_objects_histogram` | Total number of objects filtered by different sizes | + ### Cache specific metrics MinIO Gateway instances enabled with Disk-Caching expose caching related metrics. -- `cache_data_served`: Total number of bytes served from cache. -- `cache_hits_total`: Total number of cache hits. -- `cache_misses_total`: Total number of cache misses. +| name | description | +|:---------------------|:----------------------------------------| +| `cache_data_served` | Total number of bytes served from cache | +| `cache_hits_total` | Total number of cache hits | +| `cache_misses_total` | Total number of cache misses | ### Gateway & Cache specific metrics MinIO Gateway instance exposes metrics related to Gateway communication with the cloud backend (S3, Azure & GCS Gateway). -- `gateway__requests`: Total number of requests made to cloud backend. This metrics has a label `method` that identifies GET, HEAD, PUT and POST Requests. -- `gateway__bytes_sent`: Total number of bytes sent to cloud backend (in PUT & POST Requests). -- `gateway__bytes_received`: Total number of bytes received from cloud backend (in GET & HEAD Requests). +`` changes based on the gateway in use can be 's3', 'gcs' or 'azure'. Other metrics are labeled with `method` that identifies HTTP GET, HEAD, PUT and POST requests to the backend. + +| name | description | +|:----------------------------------------|:---------------------------------------------------------------------------| +| `gateway__requests` | Total number of requests made to the gateway backend | +| `gateway__bytes_sent` | Total number of bytes sent to cloud backend (in PUT & POST Requests) | +| `gateway__bytes_received` | Total number of bytes received from cloud backend (in GET & HEAD Requests) | Note that this is currently only support for Azure, S3 and GCS Gateway. @@ -161,10 +202,12 @@ Note that this is currently only support for Azure, S3 and GCS Gateway. MinIO exposes self-healing related metrics for erasure-code deployments _only_. These metrics are _not_ available on Gateway or Single Node, Single Drive deployments. Note that these metrics will be exposed _only_ when there is a relevant event happening on MinIO server. -- `self_heal_time_since_last_activity`: Time elapsed since last self-healing related activity. -- `self_heal_objects_scanned`: Number of objects scanned by self-healing thread in its current run. This will reset when a fresh self-healing run starts. This is labeled with the object type scanned. -- `self_heal_objects_healed`: Number of objects healing by self-healing thread in its current run. This will reset when a fresh self-healing run starts. This is labeled with the object type scanned. -- `self_heal_objects_heal_failed`: Number of objects for which self-healing failed in its current run. This will reset when a fresh self-healing run starts. This is labeled with disk status and its endpoint. +| name | description | +|:-------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `self_heal_time_since_last_activity` | Time elapsed since last self-healing related activity | +| `self_heal_objects_scanned` | Number of objects scanned by self-healing thread in its current run. This will reset when a fresh self-healing run starts. This is labeled with the object type scanned | +| `self_heal_objects_healed` | Number of objects healing by self-healing thread in its current run. This will reset when a fresh self-healing run starts. This is labeled with the object type scanned | +| `self_heal_objects_heal_failed` | Number of objects for which self-healing failed in its current run. This will reset when a fresh self-healing run starts. This is labeled with disk status and its endpoint | ## Migration guide for the new set of metrics @@ -174,20 +217,20 @@ This migration guide applies for older releases or any releases before `RELEASE. The migrations include - - `minio_total_disks` to `minio_disks_total` - - `minio_offline_disks` to `minio_disks_offline` +- `minio_total_disks` to `minio_disks_total` +- `minio_offline_disks` to `minio_disks_offline` ### MinIO disk level metrics - `disk_storage_*` These metrics have one label. - - `disk`: Holds the disk path +- `disk`: Holds the disk path The migrations include - - `minio_disk_storage_used_bytes` to `disk_storage_used` - - `minio_disk_storage_available_bytes` to `disk_storage_available` - - `minio_disk_storage_total_bytes` to `disk_storage_total` +- `minio_disk_storage_used_bytes` to `disk_storage_used` +- `minio_disk_storage_available_bytes` to `disk_storage_available` +- `minio_disk_storage_total_bytes` to `disk_storage_total` ### MinIO network level metrics @@ -195,11 +238,11 @@ These metrics are detailed to cover the s3 and internode network statistics. The migrations include - - `minio_network_sent_bytes_total` to `s3_tx_bytes_total` and `internode_tx_bytes_total` - - `minio_network_received_bytes_total` to `s3_rx_bytes_total` and `internode_rx_bytes_total` +- `minio_network_sent_bytes_total` to `s3_tx_bytes_total` and `internode_tx_bytes_total` +- `minio_network_received_bytes_total` to `s3_rx_bytes_total` and `internode_rx_bytes_total` Some of the additional metrics added were - - `s3_requests_total` - - `s3_errors_total` - - `s3_ttfb_seconds` +- `s3_requests_total` +- `s3_errors_total` +- `s3_ttfb_seconds`