Add Historic CPU and memory stats (#7136)

Collect historic cpu and mem stats.  Also, use actual values 
instead of formatted strings while returning to the client. The string 
formatting prevents values from being processed by the server or 
by the client without parsing it. 

This change will allow the values to be processed (eg. 
compute rolling-average over the lifetime of the minio server)
and offloads the formatting to the client.
master
Sidhartha Mani 6 years ago committed by Nitish Tiwari
parent d0015b4d66
commit 34e7259f95
  1. 14
      cmd/admin-handlers.go
  2. 16
      cmd/endpoint.go
  3. 70
      pkg/cpu/cpu.go
  4. 8
      pkg/madmin/API.md
  5. 14
      pkg/madmin/info-commands.go
  6. 37
      pkg/mem/mem.go

@ -313,18 +313,20 @@ type ServerDrivesPerfInfo struct {
// of one minio node. It also reports any errors if encountered
// while trying to reach this server.
type ServerCPULoadInfo struct {
Addr string `json:"addr"`
Error string `json:"error,omitempty"`
Load []cpu.Load `json:"load"`
Addr string `json:"addr"`
Error string `json:"error,omitempty"`
Load []cpu.Load `json:"load"`
HistoricLoad []cpu.Load `json:"historicLoad"`
}
// ServerMemUsageInfo holds informantion about memory utilization
// of one minio node. It also reports any errors if encountered
// while trying to reach this server.
type ServerMemUsageInfo struct {
Addr string `json:"addr"`
Error string `json:"error,omitempty"`
Usage []mem.Usage `json:"usage"`
Addr string `json:"addr"`
Error string `json:"error,omitempty"`
Usage []mem.Usage `json:"usage"`
HistoricUsage []mem.Usage `json:"historicUsage"`
}
// PerfInfoHandler - GET /minio/admin/v1/performance?perfType={perfType}

@ -204,6 +204,7 @@ func (endpoints EndpointList) GetString(i int) string {
// local endpoints from given list of endpoints
func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
var memUsages []mem.Usage
var historicUsages []mem.Usage
var addr string
scratchSpace := map[string]bool{}
for _, endpoint := range endpoints {
@ -215,12 +216,15 @@ func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
addr = GetLocalPeer(endpoints)
memUsage := mem.GetUsage()
memUsages = append(memUsages, memUsage)
historicUsage := mem.GetHistoricUsage()
historicUsages = append(historicUsages, historicUsage)
scratchSpace[endpoint.Host] = true
}
}
return ServerMemUsageInfo{
Addr: addr,
Usage: memUsages,
Addr: addr,
Usage: memUsages,
HistoricUsage: historicUsages,
}
}
@ -228,6 +232,7 @@ func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
// local endpoints from given list of endpoints
func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo {
var cpuLoads []cpu.Load
var historicLoads []cpu.Load
var addr string
scratchSpace := map[string]bool{}
for _, endpoint := range endpoints {
@ -239,12 +244,15 @@ func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo {
addr = GetLocalPeer(endpoints)
cpuLoad := cpu.GetLoad()
cpuLoads = append(cpuLoads, cpuLoad)
historicLoad := cpu.GetHistoricLoad()
historicLoads = append(historicLoads, historicLoad)
scratchSpace[endpoint.Host] = true
}
}
return ServerCPULoadInfo{
Addr: addr,
Load: cpuLoads,
Addr: addr,
Load: cpuLoads,
HistoricLoad: historicLoads,
}
}

@ -17,11 +17,44 @@
package cpu
import (
"fmt"
"sync"
"time"
)
// rollingAvg holds the rolling average of the cpu load on the minio
// server over its lifetime
var rollingAvg *Load
// cpuMeasureInterval is the interval of time between two
// measurements of CPU load
const cpuLoadMeasureInterval = 5 * time.Second
// triggers the average load computation at server spawn
func init() {
rollingAvg = &Load{
Min: float64(0),
Max: float64(0),
Avg: float64(0),
}
var rollingSum float64
var cycles float64
go func() {
for {
time.Sleep(cpuLoadMeasureInterval)
cycles = cycles + 1
currLoad := GetLoad()
if rollingAvg.Max < currLoad.Max || rollingAvg.Max == 0 {
rollingAvg.Max = currLoad.Max
}
if rollingAvg.Min > currLoad.Min || rollingAvg.Min == 0 {
rollingAvg.Min = currLoad.Min
}
rollingSum = rollingSum + currLoad.Avg
rollingAvg.Avg = rollingSum / cycles
}
}()
}
const (
// cpuLoadWindow is the interval of time for which the
// cpu utilization is measured
@ -37,15 +70,34 @@ const (
// Load holds CPU utilization % measured in three intervals of 200ms each
type Load struct {
Avg string `json:"avg"`
Max string `json:"max"`
Min string `json:"min"`
Error string `json:"error,omitempty"`
Avg float64 `json:"avg"`
Max float64 `json:"max"`
Min float64 `json:"min"`
Error string `json:"error,omitempty"`
}
type counter struct{}
// GetLoad returns the CPU utilization % of the current process
// GetHistoricLoad returns the historic CPU utilization of the current process
func GetHistoricLoad() Load {
return *rollingAvg
}
// GetLoad returns the CPU utilization of the current process
// This function works by calcualating the amount of cpu clock
// cycles the current process used in a given time window
//
// This corresponds to the CPU utilization calculation done by
// tools like top. Here, we use the getclocktime with the
// CLOCK_PROCESS_CPUTIME_ID parameter to obtain the total number of
// clock ticks used by the process so far. Then we sleep for
// 200ms and obtain the the total number of clock ticks again. The
// difference between the two counts provides us the number of
// clock ticks used by the process in the 200ms interval.
//
// The ratio of clock ticks used (measured in nanoseconds) to number
// of nanoseconds in 200 milliseconds provides us the CPU usage
// for the process currently
func GetLoad() Load {
vals := make(chan time.Duration, 3)
wg := sync.WaitGroup{}
@ -83,9 +135,9 @@ func GetLoad() Load {
close(vals)
avg := sum / 3
return Load{
Avg: fmt.Sprintf("%.2f%%", toFixed4(float64(avg)/float64(200*time.Millisecond))*100),
Max: fmt.Sprintf("%.2f%%", toFixed4(float64(max)/float64(200*time.Millisecond))*100),
Min: fmt.Sprintf("%.2f%%", toFixed4(float64(min)/float64(200*time.Millisecond))*100),
Avg: toFixed4(float64(avg)/float64(200*time.Millisecond)) * 100,
Max: toFixed4(float64(max)/float64(200*time.Millisecond)) * 100,
Min: toFixed4(float64(min)/float64(200*time.Millisecond)) * 100,
Error: "",
}
}

@ -235,9 +235,9 @@ Fetches CPU utilization for all cluster nodes. Returned value is in Bytes.
| Param | Type | Description |
|-------|------|-------------|
|`cpu.Load.Avg` | _string_ | The average utilization % of the CPU measured in a 200ms interval |
|`cpu.Load.Min` | _string_ | The minimum utilization % of the CPU measured in a 200ms interval |
|`cpu.Load.Max` | _string_ | The maximum utilization % of the CPU measured in a 200ms interval |
|`cpu.Load.Avg` | _float64_ | The average utilization of the CPU measured in a 200ms interval |
|`cpu.Load.Min` | _float64_ | The minimum utilization of the CPU measured in a 200ms interval |
|`cpu.Load.Max` | _float64_ | The maximum utilization of the CPU measured in a 200ms interval |
|`cpu.Load.Error` | _string_ | Error (if any) encountered while accesing the CPU info |
<a name="ServerMemUsageInfo"></a>
@ -253,7 +253,7 @@ Fetches Mem utilization for all cluster nodes. Returned value is in Bytes.
| Param | Type | Description |
|-------|------|-------------|
|`mem.Usage.Mem` | _string_ | The total number of bytes obtained from the OS |
|`mem.Usage.Mem` | _uint64_ | The total number of bytes obtained from the OS |
|`mem.Usage.Error` | _string_ | Error (if any) encountered while accesing the CPU info |
## 6. Heal operations

@ -199,9 +199,10 @@ func (adm *AdminClient) ServerDrivesPerfInfo() ([]ServerDrivesPerfInfo, error) {
// ServerCPULoadInfo holds information about address and cpu load of
// a single server node
type ServerCPULoadInfo struct {
Addr string `json:"addr"`
Error string `json:"error,omitempty"`
Load []cpu.Load `json:"load"`
Addr string `json:"addr"`
Error string `json:"error,omitempty"`
Load []cpu.Load `json:"load"`
HistoricLoad []cpu.Load `json:"historicLoad"`
}
// ServerCPULoadInfo - Returns cpu utilization information
@ -242,9 +243,10 @@ func (adm *AdminClient) ServerCPULoadInfo() ([]ServerCPULoadInfo, error) {
// ServerMemUsageInfo holds information about address and memory utilization of
// a single server node
type ServerMemUsageInfo struct {
Addr string `json:"addr"`
Error string `json:"error,omitempty"`
Usage []mem.Usage `json:"usage"`
Addr string `json:"addr"`
Error string `json:"error,omitempty"`
Usage []mem.Usage `json:"usage"`
HistoricUsage []mem.Usage `json:"historicUsage"`
}
// ServerMemUsageInfo - Returns mem utilization information

@ -18,22 +18,51 @@ package mem
import (
"runtime"
humanize "github.com/dustin/go-humanize"
"time"
)
// historicUsage holds the rolling average of memory used by
// minio server
var historicUsage *Usage
// memUsageMeasureInterval is the window of time between
// two measurements of memory usage
const memUsageMeasureInterval = 5 * time.Second
// triggers the collection of historic stats about the memory
// utilized by minio server
func init() {
historicUsage = &Usage{}
var cycles uint64
go func() {
for {
time.Sleep(memUsageMeasureInterval)
currUsage := GetUsage()
currSum := cycles * historicUsage.Mem
cycles = cycles + 1
historicUsage.Mem = (currSum + currUsage.Mem) / cycles
}
}()
}
// Usage holds memory utilization information in human readable format
type Usage struct {
Mem string `json:"mem"`
Mem uint64 `json:"mem"`
Error string `json:"error,omitempty"`
}
// GetHistoricUsage measures the historic average of memory utilized by
// current process
func GetHistoricUsage() Usage {
return *historicUsage
}
// GetUsage measures the total memory provisioned for the current process
// from the OS
func GetUsage() Usage {
memStats := new(runtime.MemStats)
runtime.ReadMemStats(memStats)
return Usage{
Mem: humanize.IBytes(memStats.Sys),
Mem: memStats.Sys,
}
}

Loading…
Cancel
Save