From 4a564336feb2ef68385d355faaf7080cf88b7fd1 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 8 Dec 2020 09:23:35 -0800 Subject: [PATCH] Revert "Add metrics for nodes online and offline (#11050)" This reverts commit f60bbdf86b8d54444cf7383c28bb74a224034290. --- cmd/metrics.go | 85 ++++++++++--------------------- cmd/notification.go | 19 +------ cmd/rest/client.go | 13 +---- docs/metrics/prometheus/README.md | 2 - 4 files changed, 31 insertions(+), 88 deletions(-) diff --git a/cmd/metrics.go b/cmd/metrics.go index 26fae65e3..21d82fef5 100644 --- a/cmd/metrics.go +++ b/cmd/metrics.go @@ -53,17 +53,6 @@ var ( ) ) -const ( - healMetricsNamespace = "self_heal" - gatewayNamespace = "gateway" - cacheNamespace = "cache" - s3Namespace = "s3" - bucketNamespace = "bucket" - minioNamespace = "minio" - diskNamespace = "disk" - interNodeNamespace = "internode" -) - func init() { prometheus.MustRegister(httpRequestsDuration) prometheus.MustRegister(newMinioCollector()) @@ -97,7 +86,6 @@ func (c *minioCollector) Collect(ch chan<- prometheus.Metric) { minioVersionInfo.WithLabelValues(Version, CommitID).Set(float64(1.0)) storageMetricsPrometheus(ch) - nodeHealthMetricsPrometheus(ch) bucketUsageMetricsPrometheus(ch) networkMetricsPrometheus(ch) httpMetricsPrometheus(ch) @@ -106,26 +94,6 @@ func (c *minioCollector) Collect(ch chan<- prometheus.Metric) { healingMetricsPrometheus(ch) } -func nodeHealthMetricsPrometheus(ch chan<- prometheus.Metric) { - nodesUp, nodesDown := GetPeerOnlineCount() - ch <- prometheus.MustNewConstMetric( - prometheus.NewDesc( - prometheus.BuildFQName(minioNamespace, "nodes", "online"), - "Total number of MinIO nodes online", - nil, nil), - prometheus.GaugeValue, - float64(nodesUp), - ) - ch <- prometheus.MustNewConstMetric( - prometheus.NewDesc( - prometheus.BuildFQName(minioNamespace, "nodes", "offline"), - "Total number of MinIO nodes offline", - nil, nil), - prometheus.GaugeValue, - float64(nodesDown), - ) -} - // collects healing specific metrics for MinIO instance in Prometheus specific format // and sends to given channel func healingMetricsPrometheus(ch chan<- prometheus.Metric) { @@ -136,6 +104,7 @@ func healingMetricsPrometheus(ch chan<- prometheus.Metric) { if !exists { return } + healMetricsNamespace := "self_heal" var dur time.Duration if !bgSeq.lastHealActivity.IsZero() { @@ -205,7 +174,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "bytes_received"), + prometheus.BuildFQName("gateway", globalGatewayName, "bytes_received"), "Total number of bytes received by current MinIO Gateway "+globalGatewayName+" backend", nil, nil), prometheus.CounterValue, @@ -213,7 +182,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) { ) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "bytes_sent"), + prometheus.BuildFQName("gateway", globalGatewayName, "bytes_sent"), "Total number of bytes sent by current MinIO Gateway to "+globalGatewayName+" backend", nil, nil), prometheus.CounterValue, @@ -222,7 +191,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) { s := m.GetRequests() ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"), + prometheus.BuildFQName("gateway", globalGatewayName, "requests"), "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", []string{"method"}, nil), prometheus.CounterValue, @@ -231,7 +200,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) { ) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"), + prometheus.BuildFQName("gateway", globalGatewayName, "requests"), "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", []string{"method"}, nil), prometheus.CounterValue, @@ -240,7 +209,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) { ) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"), + prometheus.BuildFQName("gateway", globalGatewayName, "requests"), "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", []string{"method"}, nil), prometheus.CounterValue, @@ -249,7 +218,7 @@ func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) { ) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"), + prometheus.BuildFQName("gateway", globalGatewayName, "requests"), "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", []string{"method"}, nil), prometheus.CounterValue, @@ -269,7 +238,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(cacheNamespace, "hits", "total"), + prometheus.BuildFQName("cache", "hits", "total"), "Total number of disk cache hits in current MinIO instance", nil, nil), prometheus.CounterValue, @@ -277,7 +246,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) { ) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(cacheNamespace, "misses", "total"), + prometheus.BuildFQName("cache", "misses", "total"), "Total number of disk cache misses in current MinIO instance", nil, nil), prometheus.CounterValue, @@ -285,7 +254,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) { ) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(cacheNamespace, "data", "served"), + prometheus.BuildFQName("cache", "data", "served"), "Total number of bytes served from cache of current MinIO instance", nil, nil), prometheus.CounterValue, @@ -295,7 +264,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) { // Cache disk usage percentage ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(cacheNamespace, "usage", "percent"), + prometheus.BuildFQName("cache", "usage", "percent"), "Total percentage cache usage", []string{"disk"}, nil), prometheus.GaugeValue, @@ -304,7 +273,7 @@ func cacheMetricsPrometheus(ch chan<- prometheus.Metric) { ) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(cacheNamespace, "usage", "high"), + prometheus.BuildFQName("cache", "usage", "high"), "Indicates cache usage is high or low, relative to current cache 'quota' settings", []string{"disk"}, nil), prometheus.GaugeValue, @@ -342,7 +311,7 @@ func httpMetricsPrometheus(ch chan<- prometheus.Metric) { for api, value := range httpStats.CurrentS3Requests.APIStats { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(s3Namespace, "requests", "current"), + prometheus.BuildFQName("s3", "requests", "current"), "Total number of running s3 requests in current MinIO server instance", []string{"api"}, nil), prometheus.CounterValue, @@ -354,7 +323,7 @@ func httpMetricsPrometheus(ch chan<- prometheus.Metric) { for api, value := range httpStats.TotalS3Requests.APIStats { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(s3Namespace, "requests", "total"), + prometheus.BuildFQName("s3", "requests", "total"), "Total number of s3 requests in current MinIO server instance", []string{"api"}, nil), prometheus.CounterValue, @@ -366,7 +335,7 @@ func httpMetricsPrometheus(ch chan<- prometheus.Metric) { for api, value := range httpStats.TotalS3Errors.APIStats { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(s3Namespace, "errors", "total"), + prometheus.BuildFQName("s3", "errors", "total"), "Total number of s3 errors in current MinIO server instance", []string{"api"}, nil), prometheus.CounterValue, @@ -384,7 +353,7 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) { // Network Sent/Received Bytes (internode) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(interNodeNamespace, "tx", "bytes_total"), + prometheus.BuildFQName("internode", "tx", "bytes_total"), "Total number of bytes sent to the other peer nodes by current MinIO server instance", nil, nil), prometheus.CounterValue, @@ -393,7 +362,7 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(interNodeNamespace, "rx", "bytes_total"), + prometheus.BuildFQName("internode", "rx", "bytes_total"), "Total number of internode bytes received by current MinIO server instance", nil, nil), prometheus.CounterValue, @@ -403,7 +372,7 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) { // Network Sent/Received Bytes (Outbound) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(s3Namespace, "tx", "bytes_total"), + prometheus.BuildFQName("s3", "tx", "bytes_total"), "Total number of s3 bytes sent by current MinIO server instance", nil, nil), prometheus.CounterValue, @@ -412,7 +381,7 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(s3Namespace, "rx", "bytes_total"), + prometheus.BuildFQName("s3", "rx", "bytes_total"), "Total number of s3 bytes received by current MinIO server instance", nil, nil), prometheus.CounterValue, @@ -452,7 +421,7 @@ func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) { // Total space used by bucket ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(bucketNamespace, "usage", "size"), + prometheus.BuildFQName("bucket", "usage", "size"), "Total bucket size", []string{"bucket"}, nil), prometheus.GaugeValue, @@ -461,7 +430,7 @@ func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) { ) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(bucketNamespace, "objects", "count"), + prometheus.BuildFQName("bucket", "objects", "count"), "Total number of objects in a bucket", []string{"bucket"}, nil), prometheus.GaugeValue, @@ -507,7 +476,7 @@ func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) { for k, v := range usageInfo.ObjectSizesHistogram { ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(bucketNamespace, "objects", "histogram"), + prometheus.BuildFQName("bucket", "objects", "histogram"), "Total number of objects of different sizes in a bucket", []string{"bucket", "object_size"}, nil), prometheus.GaugeValue, @@ -538,7 +507,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) { // MinIO Offline Disks per node ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(minioNamespace, "disks", "offline"), + prometheus.BuildFQName("minio", "disks", "offline"), "Total number of offline disks in current MinIO server instance", nil, nil), prometheus.GaugeValue, @@ -548,7 +517,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) { // MinIO Total Disks per node ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(minioNamespace, "disks", "total"), + prometheus.BuildFQName("minio", "disks", "total"), "Total number of disks for current MinIO server instance", nil, nil), prometheus.GaugeValue, @@ -559,7 +528,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) { // Total disk usage by the disk ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(diskNamespace, "storage", "used"), + prometheus.BuildFQName("disk", "storage", "used"), "Total disk storage used on the disk", []string{"disk"}, nil), prometheus.GaugeValue, @@ -570,7 +539,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) { // Total available space in the disk ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(diskNamespace, "storage", "available"), + prometheus.BuildFQName("disk", "storage", "available"), "Total available space left on the disk", []string{"disk"}, nil), prometheus.GaugeValue, @@ -581,7 +550,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) { // Total storage space of the disk ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( - prometheus.BuildFQName(diskNamespace, "storage", "total"), + prometheus.BuildFQName("disk", "storage", "total"), "Total space on the disk", []string{"disk"}, nil), prometheus.GaugeValue, diff --git a/cmd/notification.go b/cmd/notification.go index 9222868a8..528831b68 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -51,8 +51,8 @@ type NotificationSys struct { targetResCh chan event.TargetIDResult bucketRulesMap map[string]event.RulesMap bucketRemoteTargetRulesMap map[string]map[event.TargetID]event.RulesMap - peerClients []*peerRESTClient // Excludes self - allPeerClients []*peerRESTClient // Includes nil client for self + peerClients []*peerRESTClient + allPeerClients []*peerRESTClient } // GetARNList - returns available ARNs. @@ -1288,21 +1288,6 @@ func NewNotificationSys(endpoints EndpointServerPools) *NotificationSys { } } -// GetPeerOnlineCount gets the count of online and offline nodes. -func GetPeerOnlineCount() (nodesOnline, nodesOffline int) { - nodesOnline = 1 - nodesOffline = 0 - servers := globalNotificationSys.ServerInfo() - for _, s := range servers { - if s.State == "online" { - nodesOnline++ - continue - } - nodesOffline++ - } - return -} - type eventArgs struct { EventName event.Name BucketName string diff --git a/cmd/rest/client.go b/cmd/rest/client.go index ce0589bec..410ef011e 100644 --- a/cmd/rest/client.go +++ b/cmd/rest/client.go @@ -200,22 +200,13 @@ func (c *Client) MarkOffline() { if atomic.LoadInt32(&c.connected) == closed { return } - if c.CheckOnlineStatus() { + if c.HealthCheckFn() { atomic.CompareAndSwapInt32(&c.connected, offline, online) logger.Info("Client %s online", c.url.String()) + return } time.Sleep(time.Duration(r.Float64() * float64(c.HealthCheckInterval))) } }() } } - -// CheckOnlineStatus checks if a client is online. -func (c *Client) CheckOnlineStatus() bool { - if c.HealthCheckFn != nil { - if c.HealthCheckFn() { - return true - } - } - return false -} diff --git a/docs/metrics/prometheus/README.md b/docs/metrics/prometheus/README.md index b9015bca0..d8ff49033 100644 --- a/docs/metrics/prometheus/README.md +++ b/docs/metrics/prometheus/README.md @@ -136,8 +136,6 @@ The list of metrics and its definition are as follows. (NOTE: instance here is o | `minio_version_info` | Current MinIO version with its commit-id | | `minio_disks_offline` | Total number of offline disks on current MinIO instance | | `minio_disks_total` | Total number of disks on current MinIO instance | -| `minio_nodes_online` | Total number of MinIO nodes online | -| `minio_nodes_offline` | Total number of MinIO nodes offline | ### Disk metrics are labeled by 'disk' which indentifies each disk | name | description |