From 3bf67668b6ae2933b45d601df72f5bade4e79166 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sun, 2 Apr 2017 10:46:16 -0700 Subject: [PATCH] sys/stats: return cgroup mem limit, fall back to sysinfo() (#4002) This is necessary where in certain environments where cgroup is used to limit memory usage of a container or a particular process. GetStats() is used by caching module to figure out the optimal cacheable size in memory with cgroup limits what sysinfo reports might not be the right value set for a given process. Fixes #4001 --- pkg/cgroup/linux.go | 177 +++++++++++++++++++++++++++++++++++++++ pkg/cgroup/linux_test.go | 140 +++++++++++++++++++++++++++++++ pkg/cgroup/others.go | 19 +++++ pkg/sys/stats_linux.go | 65 ++++++++++++-- pkg/sys/stats_test.go | 2 +- 5 files changed, 395 insertions(+), 8 deletions(-) create mode 100644 pkg/cgroup/linux.go create mode 100644 pkg/cgroup/linux_test.go create mode 100644 pkg/cgroup/others.go diff --git a/pkg/cgroup/linux.go b/pkg/cgroup/linux.go new file mode 100644 index 000000000..34854bbd8 --- /dev/null +++ b/pkg/cgroup/linux.go @@ -0,0 +1,177 @@ +// +build linux + +/* + * Minio Cloud Storage, (C) 2017 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package cgroup implements parsing for all the cgroup +// categories and functionality in a simple way. +package cgroup + +import ( + "bufio" + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" +) + +// DO NOT EDIT following constants are chosen defaults for any kernel +// after 3.x, please open a github issue https://github.com/minio/minio/issues +// and discuss first if you wish to change this. +const ( + // Default string for looking for kernel memory param. + memoryLimitKernelParam = "memory.limit_in_bytes" + + // Points to sys path memory path. + cgroupMemSysPath = "/sys/fs/cgroup/memory" + + // Default docker prefix. + dockerPrefixName = "/docker/" + + // Proc controller group path. + cgroupFileTemplate = "/proc/%d/cgroup" +) + +// CGEntries - represents all the entries in a process cgroup file +// at /proc//cgroup as key value pairs. +type CGEntries map[string]string + +// GetEntries reads and parses all the cgroup entries for a given process. +func GetEntries(pid int) (CGEntries, error) { + r, err := os.Open(fmt.Sprintf(cgroupFileTemplate, pid)) + if err != nil { + return nil, err + } + defer r.Close() + return parseProcCGroup(r) +} + +// parseProcCGroup - cgroups are always in the following +// format once enabled you need to know the pid of the +// application you are looking for so that the the +// following parsing logic only parses the file located +// at /proc//cgroup. +// +// CGROUP entries id, component and path are always in +// the following format. ``ID:COMPONENT:PATH`` +// +// Following code block parses this information and +// returns a procCGroup which is a parsed list of all +// the line by line entires from /proc//cgroup. +func parseProcCGroup(r io.Reader) (CGEntries, error) { + var cgEntries = CGEntries{} + + // Start reading cgroup categories line by line + // and process them into procCGroup structure. + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := scanner.Text() + + tokens := strings.SplitN(line, ":", 3) + if len(tokens) < 3 { + continue + } + + name, path := tokens[1], tokens[2] + for _, token := range strings.Split(name, ",") { + name = strings.TrimPrefix(token, "name=") + cgEntries[name] = path + } + } + + // Return upon any error while reading the cgroup categories. + if err := scanner.Err(); err != nil { + return nil, err + } + + return cgEntries, nil +} + +// Fetch value of the cgroup kernel param from the cgroup manager, +// if cgroup manager is configured we should just rely on `cgm` cli +// to fetch all the values for us. +func getManagerKernValue(cname, path, kernParam string) (limit uint64, err error) { + + cmd := exec.Command("cgm", "getvalue", cname, path, kernParam) + var out bytes.Buffer + cmd.Stdout = &out + if err = cmd.Run(); err != nil { + return 0, err + } + + // Parse the cgm output. + limit, err = strconv.ParseUint(strings.TrimSpace(out.String()), 10, 64) + return limit, err +} + +// Get cgroup memory limit file path. +func getMemoryLimitFilePath(cgPath string) string { + path := cgroupMemSysPath + + // Docker generates weird cgroup paths that don't + // really exist on the file system. + // + // For example on regular Linux OS : + // `/user.slice/user-1000.slice/session-1.scope` + // + // But they exist as a bind mount on Docker and + // are not accessible : `/docker/` + // + // We we will just ignore if there is `/docker` in the + // path ignore and fall back to : + // `/sys/fs/cgroup/memory/memory.limit_in_bytes` + if !strings.HasPrefix(cgPath, dockerPrefixName) { + path = filepath.Join(path, cgPath) + } + + // Final path. + return filepath.Join(path, memoryLimitKernelParam) +} + +// GetMemoryLimit - Fetches cgroup memory limit either from +// a file path at '/sys/fs/cgroup/memory', if path fails then +// fallback to querying cgroup manager. +func GetMemoryLimit(pid int) (limit uint64, err error) { + var cg CGEntries + cg, err = GetEntries(pid) + if err != nil { + return 0, err + } + + path := cg["memory"] + + limit, err = getManagerKernValue("memory", path, memoryLimitKernelParam) + if err != nil { + + // Upon any failure returned from `cgm`, on some systems cgm + // might not be installed. We fallback to using the the sysfs + // path instead to lookup memory limits. + var b []byte + b, err = ioutil.ReadFile(getMemoryLimitFilePath(path)) + if err != nil { + return 0, err + } + + limit, err = strconv.ParseUint(strings.TrimSpace(string(b)), 10, 64) + } + + return limit, err +} diff --git a/pkg/cgroup/linux_test.go b/pkg/cgroup/linux_test.go new file mode 100644 index 000000000..02b817a50 --- /dev/null +++ b/pkg/cgroup/linux_test.go @@ -0,0 +1,140 @@ +// +build linux + +/* + * Minio Cloud Storage, (C) 2017 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cgroup + +import ( + "io/ioutil" + "os" + "testing" +) + +// Testing parsing correctness for various process cgroup files. +func TestProcCGroup(t *testing.T) { + tmpPath, err := ioutil.TempFile("", "cgroup") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmpPath.Name()) + + cgroup := ` +11:memory:/user.slice +10:blkio:/user.slice +9:hugetlb:/ +8:net_cls,net_prio:/ +7:perf_event:/ +6:pids:/user.slice/user-1000.slice +5:devices:/user.slice +4:cpuset:/ +3:cpu,cpuacct:/user.slice +2:freezer:/ +1:name=systemd:/user.slice/user-1000.slice/session-1.scope +` + _, err = tmpPath.WriteString(cgroup) + if err != nil { + t.Fatal(err) + } + + // Seek back to read from the beginning. + tmpPath.Seek(0, 0) + + cg, err := parseProcCGroup(tmpPath) + if err != nil { + t.Fatal(err) + } + + path := cg["memory"] + if len(path) == 0 { + t.Fatal("Path component cannot be empty") + } + + if path != "/user.slice" { + t.Fatal("Path component cannot be empty") + } + + path = cg["systemd"] + if path != "/user.slice/user-1000.slice/session-1.scope" { + t.Fatal("Path component cannot be empty") + } + + // Mixed cgroups with different group names. + cgroup = ` +11:memory:/newtest/newtest +10:blkio:/user.slice +9:hugetlb:/ +8:net_cls,net_prio:/ +7:perf_event:/ +6:pids:/user.slice/user-1000.slice +5:devices:/user.slice +4:cpuset:/ +3:cpu,cpuacct:/newtest/newtest +2:freezer:/ +1:name=systemd:/user.slice/user-1000.slice/session-1.scope +` + + // Seek back to read from the beginning. + tmpPath.Seek(0, 0) + + _, err = tmpPath.WriteString(cgroup) + if err != nil { + t.Fatal(err) + } + + // Seek back to read from the beginning. + tmpPath.Seek(0, 0) + + cg, err = parseProcCGroup(tmpPath) + if err != nil { + t.Fatal(err) + } + + path = cg["memory"] + if path != "/newtest/newtest" { + t.Fatal("Path component cannot be empty") + } + + path = cg["systemd"] + if path != "/user.slice/user-1000.slice/session-1.scope" { + t.Fatal("Path component cannot be empty") + } + +} + +// Tests cgroup memory limit path construction. +func TestMemoryLimitPath(t *testing.T) { + testCases := []struct { + cgroupPath string + expectedPath string + }{ + { + cgroupPath: "/user.slice", + expectedPath: "/sys/fs/cgroup/memory/user.slice/memory.limit_in_bytes", + }, + { + cgroupPath: "/docker/testing", + expectedPath: "/sys/fs/cgroup/memory/memory.limit_in_bytes", + }, + } + + for i, testCase := range testCases { + actualPath := getMemoryLimitFilePath(testCase.cgroupPath) + if actualPath != testCase.expectedPath { + t.Fatalf("Test: %d: Expected: %s, got %s", i+1, testCase.expectedPath, actualPath) + } + } +} diff --git a/pkg/cgroup/others.go b/pkg/cgroup/others.go new file mode 100644 index 000000000..d36d4d7f5 --- /dev/null +++ b/pkg/cgroup/others.go @@ -0,0 +1,19 @@ +// +build !linux + +/* + * Minio Cloud Storage, (C) 2017 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cgroup diff --git a/pkg/sys/stats_linux.go b/pkg/sys/stats_linux.go index 6cd5c6002..dbb91bb12 100644 --- a/pkg/sys/stats_linux.go +++ b/pkg/sys/stats_linux.go @@ -6,7 +6,7 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - *shouldP + * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software @@ -18,14 +18,65 @@ package sys -import "syscall" +import ( + "os" + "syscall" -// GetStats - return system statistics. -func GetStats() (stats Stats, err error) { + "github.com/minio/minio/pkg/cgroup" +) + +// Get the final system memory limit chosen by the user. +// by default without any configuration on a vanilla Linux +// system you would see physical RAM limit. If cgroup +// is configured at some point in time this function +// would return the memory limit chosen for the given pid. +func getMemoryLimit() (sysLimit uint64, err error) { + if sysLimit, err = getSysinfoMemoryLimit(); err != nil { + // Physical memory info is not accessible, just exit here. + return 0, err + } + + // Following code is deliberately ignoring the error. + cGroupLimit, gerr := cgroup.GetMemoryLimit(os.Getpid()) + if gerr != nil { + // Upon error just return system limit. + return sysLimit, nil + } + + // cgroup limit is lesser than system limit means + // user wants to limit the memory usage further + // treat cgroup limit as the system limit. + if cGroupLimit <= sysLimit { + sysLimit = cGroupLimit + } + + // Final system limit. + return sysLimit, nil + +} + +// Get physical RAM size of the node. +func getSysinfoMemoryLimit() (limit uint64, err error) { var si syscall.Sysinfo_t - if err = syscall.Sysinfo(&si); err == nil { - stats.TotalRAM = uint64(si.Totalram) + if err = syscall.Sysinfo(&si); err != nil { + return 0, err + } + + // Total RAM is always the multiplicative value + // of unit size and total ram. + limit = uint64(si.Unit) * si.Totalram + return limit, nil +} + +// GetStats - return system statistics, currently only +// supported value is TotalRAM. +func GetStats() (stats Stats, err error) { + var limit uint64 + limit, err = getMemoryLimit() + if err != nil { + return Stats{}, err } - return stats, err + stats.TotalRAM = limit + return stats, nil } diff --git a/pkg/sys/stats_test.go b/pkg/sys/stats_test.go index e2666639e..178bd9d1e 100644 --- a/pkg/sys/stats_test.go +++ b/pkg/sys/stats_test.go @@ -1,5 +1,5 @@ /* - * Minio Cloud Storage, (C) 2016,2017 Minio, Inc. + * Minio Cloud Storage, (C) 2016, 2017 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.