From c4848f9b4fbd365daa3285990d74cb9aa89d0267 Mon Sep 17 00:00:00 2001 From: Ritesh H Shukla Date: Mon, 1 Feb 2021 23:02:18 -0800 Subject: [PATCH] Add process start time to cluster metrics. (#11405) --- cmd/metrics-v2.go | 32 +++++++++++++++++++++++++++++--- docs/metrics/prometheus/list.md | 1 + 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index b08fc5fb4..027e3f937 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -53,18 +53,19 @@ const ( capacityRawSubsystem MetricSubsystem = "capacity_raw" capacityUsableSubsystem MetricSubsystem = "capacity_usable" diskSubsystem MetricSubsystem = "disk" + fileDescriptorSubsystem MetricSubsystem = "file_descriptor" goRoutines MetricSubsystem = "go_routine" + ioSubsystem MetricSubsystem = "io" nodesSubsystem MetricSubsystem = "nodes" objectsSubsystem MetricSubsystem = "objects" - fileDescriptorSubsystem MetricSubsystem = "file_descriptor" - ioSubsystem MetricSubsystem = "io" + processSubsystem MetricSubsystem = "process" replicationSubsystem MetricSubsystem = "replication" requestsSubsystem MetricSubsystem = "requests" timeSubsystem MetricSubsystem = "time" trafficSubsystem MetricSubsystem = "traffic" + softwareSubsystem MetricSubsystem = "software" sysCallSubsystem MetricSubsystem = "syscall" usageSubsystem MetricSubsystem = "usage" - softwareSubsystem MetricSubsystem = "software" ) // MetricName are the individual names for the metric. @@ -107,6 +108,7 @@ const ( ttfbDistribution = "ttbf_seconds_distribution" lastActivityTime = "last_activity_nano_seconds" + startTime = "starttime_seconds" ) const ( @@ -631,6 +633,15 @@ func getMinIOGORoutineCountMD() MetricDescription { Type: gaugeMetric, } } +func getMinIOProcessStartTimeMD() MetricDescription { + return MetricDescription{ + Namespace: nodeMetricNamespace, + Subsystem: processSubsystem, + Name: startTime, + Help: "Start time for MinIO process per node in seconds.", + Type: gaugeMetric, + } +} func getMinioProcMetrics() MetricsGroup { return MetricsGroup{ Metrics: []Metric{}, @@ -656,6 +667,16 @@ func getMinioProcMetrics() MetricsGroup { logger.LogOnceIf(ctx, err, ioSubsystem) return } + stat, err := p.Stat() + if err != nil { + logger.LogOnceIf(ctx, err, processSubsystem) + return + } + startTime, err := stat.StartTime() + if err != nil { + logger.LogOnceIf(ctx, err, startTime) + return + } metrics.Metrics = append(metrics.Metrics, Metric{ @@ -698,6 +719,11 @@ func getMinioProcMetrics() MetricsGroup { Description: getMinioProcessIOWriteCachedBytesMD(), Value: float64(io.WChar), }) + metrics.Metrics = append(metrics.Metrics, + Metric{ + Description: getMinIOProcessStartTimeMD(), + Value: startTime, + }) }, } } diff --git a/docs/metrics/prometheus/list.md b/docs/metrics/prometheus/list.md index da9bf793b..4fafae5fe 100644 --- a/docs/metrics/prometheus/list.md +++ b/docs/metrics/prometheus/list.md @@ -37,6 +37,7 @@ These metrics can be from any MinIO server once per collection. |`minio_node_io_read_bytes` |Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes | |`minio_node_io_wchar_bytes` |Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar | |`minio_node_io_write_bytes` |Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes | +|`minio_node_process_starttime_seconds` |Start time for MinIO process per node in seconds. | |`minio_node_syscall_read_total` |Total read SysCalls to the kernel. /proc/[pid]/io syscr | |`minio_node_syscall_write_total` |Total write SysCalls to the kernel. /proc/[pid]/io syscw | |`minio_s3_requests_error_total` |Total number S3 requests with errors |