From 957ecb1b64dfce7bb140c4e8f3ec4b0dc5f6987a Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Thu, 23 Apr 2020 12:26:13 -0700 Subject: [PATCH] use optimal memory while purging cache (#9426) re-implement the cache purging routine to avoid using ioutil.ReadDir which can lead to high allocations when there are cache directories with lots of content, or when cache is installed in memory constrainted environments. Instead rely on a callback function where we are not using memory no-more than 8KiB per cycle. Precursor for this change refer #9425, original issue pointed by Caleb Case --- cmd/disk-cache-backend.go | 45 ++++++++++++++++-------- cmd/posix-list-dir_other.go | 40 ++++++++++++++++++++- cmd/posix-list-dir_unix.go | 55 ++++++++++++++++++++++++++++- cmd/posix-list-dir_windows.go | 65 ++++++++++++++++++++++++++++++++++- 4 files changed, 188 insertions(+), 17 deletions(-) diff --git a/cmd/disk-cache-backend.go b/cmd/disk-cache-backend.go index 2061bb8e0..475a36bd0 100644 --- a/cmd/disk-cache-backend.go +++ b/cmd/disk-cache-backend.go @@ -21,10 +21,10 @@ import ( "context" "crypto/rand" "encoding/hex" + "errors" "fmt" "io" "io/ioutil" - "log" "net/http" "os" "strings" @@ -228,6 +228,10 @@ func (c *diskCache) toClear() uint64 { return bytesToClear(int64(di.Total), int64(di.Free), uint64(c.quotaPct), uint64(c.lowWatermark)) } +var ( + errDoneForNow = errors.New("done for now") +) + // Purge cache entries that were not accessed. func (c *diskCache) purge(ctx context.Context) { if c.diskUsageLow() { @@ -263,25 +267,24 @@ func (c *diskCache) purge(ctx context.Context) { } return fm } - objDirs, err := ioutil.ReadDir(c.dir) - if err != nil { - log.Fatal(err) - } - for _, obj := range objDirs { - if obj.Name() == minioMetaBucket { - continue + filterFn := func(name string, typ os.FileMode) error { + if name == minioMetaBucket { + // Proceed to next file. + return nil } - cacheDir := pathJoin(c.dir, obj.Name()) + cacheDir := pathJoin(c.dir, name) meta, _, numHits, err := c.statCachedMeta(ctx, cacheDir) if err != nil { // delete any partially filled cache entry left behind. removeAll(cacheDir) - continue + // Proceed to next file. + return nil } + // stat all cached file ranges and cacheDataFile. - cachedFiles := fiStatFn(meta.Ranges, cacheDataFile, pathJoin(c.dir, obj.Name())) + cachedFiles := fiStatFn(meta.Ranges, cacheDataFile, pathJoin(c.dir, name)) objInfo := meta.ToObjectInfo("", "") cc := cacheControlOpts(objInfo) for fname, fi := range cachedFiles { @@ -291,9 +294,11 @@ func (c *diskCache) purge(ctx context.Context) { logger.LogIf(ctx, err) } scorer.adjustSaveBytes(-fi.Size()) + // break early if sufficient disk space reclaimed. if c.diskUsageLow() { - return + // if we found disk usage is already low, we return nil filtering is complete. + return errDoneForNow } } continue @@ -305,12 +310,24 @@ func (c *diskCache) purge(ctx context.Context) { if err != nil || (fi.ModTime().Before(expiry) && len(cachedFiles) == 0) { removeAll(cacheDir) scorer.adjustSaveBytes(-fi.Size()) - continue + // Proceed to next file. + return nil } + + // if we found disk usage is already low, we return nil filtering is complete. if c.diskUsageLow() { - return + return errDoneForNow } + + // Proceed to next file. + return nil + } + + if err := readDirFilterFn(c.dir, filterFn); err != nil { + logger.LogIf(ctx, err) + return } + for _, path := range scorer.fileNames() { removeAll(path) slashIdx := strings.LastIndex(path, SlashSeparator) diff --git a/cmd/posix-list-dir_other.go b/cmd/posix-list-dir_other.go index 32b05a062..5bdfaa1e6 100644 --- a/cmd/posix-list-dir_other.go +++ b/cmd/posix-list-dir_other.go @@ -1,7 +1,7 @@ // +build plan9 solaris /* - * MinIO Cloud Storage, (C) 2016, 2017, 2018 MinIO, Inc. + * MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,44 @@ func readDir(dirPath string) (entries []string, err error) { return readDirN(dirPath, -1) } +// readDir applies the filter function on each entries at dirPath, doesn't recurse into +// the directory itself. +func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error { + d, err := os.Open(dirPath) + if err != nil { + // File is really not found. + if os.IsNotExist(err) { + return errFileNotFound + } + + // File path cannot be verified since one of the parents is a file. + if strings.Contains(err.Error(), "not a directory") { + return errFileNotFound + } + return err + } + defer d.Close() + + maxEntries := 1000 + for { + // Read up to max number of entries. + fis, err := d.Readdir(maxEntries) + if err != nil { + if err == io.EOF { + break + } + return err + } + for _, fi := range fis { + if err = filter(fi.Name(), fi.Mode()); err == errDoneForNow { + // filtering requested to return by caller. + return nil + } + } + } + return nil +} + // Return N entries at the directory dirPath. If count is -1, return all entries func readDirN(dirPath string, count int) (entries []string, err error) { d, err := os.Open(dirPath) diff --git a/cmd/posix-list-dir_unix.go b/cmd/posix-list-dir_unix.go index aebd0c7ab..c4d101f98 100644 --- a/cmd/posix-list-dir_unix.go +++ b/cmd/posix-list-dir_unix.go @@ -1,7 +1,7 @@ // +build linux,!appengine darwin freebsd netbsd openbsd /* - * MinIO Cloud Storage, (C) 2016, 2017, 2018 MinIO, Inc. + * MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -75,6 +75,59 @@ func readDir(dirPath string) (entries []string, err error) { return readDirN(dirPath, -1) } +// readDir applies the filter function on each entries at dirPath, doesn't recurse into +// the directory itself. +func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error { + fd, err := syscall.Open(dirPath, 0, 0) + if err != nil { + if os.IsNotExist(err) || isSysErrNotDir(err) { + return errFileNotFound + } + if os.IsPermission(err) { + return errFileAccessDenied + } + return err + } + defer syscall.Close(fd) + + buf := make([]byte, blockSize) // stack-allocated; doesn't escape + boff := 0 // starting read position in buf + nbuf := 0 // end valid data in buf + + for { + if boff >= nbuf { + boff = 0 + nbuf, err = syscall.ReadDirent(fd, buf) + if err != nil { + if isSysErrNotDir(err) { + return errFileNotFound + } + return err + } + if nbuf <= 0 { + break + } + } + consumed, name, typ, err := parseDirEnt(buf[boff:nbuf]) + if err != nil { + return err + } + boff += consumed + if name == "" || name == "." || name == ".." { + continue + } + if typ&os.ModeSymlink == os.ModeSymlink { + continue + } + if err = filter(name, typ); err == errDoneForNow { + // filtering requested to return by caller. + return nil + } + } + + return err +} + // Return count entries at the directory dirPath and all entries // if count is set to -1 func readDirN(dirPath string, count int) (entries []string, err error) { diff --git a/cmd/posix-list-dir_windows.go b/cmd/posix-list-dir_windows.go index f04a0fe89..158221ff9 100644 --- a/cmd/posix-list-dir_windows.go +++ b/cmd/posix-list-dir_windows.go @@ -1,7 +1,7 @@ // +build windows /* - * MinIO Cloud Storage, (C) 2016, 2017, 2018 MinIO, Inc. + * MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,69 @@ func readDir(dirPath string) (entries []string, err error) { return readDirN(dirPath, -1) } +// readDir applies the filter function on each entries at dirPath, doesn't recurse into +// the directory itself. +func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error { + d, err := os.Open(dirPath) + if err != nil { + // File is really not found. + if os.IsNotExist(err) { + return errFileNotFound + } + + // File path cannot be verified since one of the parents is a file. + if strings.Contains(err.Error(), "not a directory") { + return errFileNotFound + } + return err + } + defer d.Close() + + st, err := d.Stat() + if err != nil { + return err + } + // Not a directory return error. + if !st.IsDir() { + return errFileAccessDenied + } + + data := &syscall.Win32finddata{} + + for { + e := syscall.FindNextFile(syscall.Handle(d.Fd()), data) + if e != nil { + if e == syscall.ERROR_NO_MORE_FILES { + break + } else { + err = &os.PathError{ + Op: "FindNextFile", + Path: dirPath, + Err: e, + } + return err + } + } + name := syscall.UTF16ToString(data.FileName[0:]) + if name == "" || name == "." || name == ".." { // Useless names + continue + } + if data.FileAttributes&syscall.FILE_ATTRIBUTE_REPARSE_POINT != 0 { + continue + } + var typ os.FileMode = 0 // regular file + if data.FileAttributes&syscall.FILE_ATTRIBUTE_DIRECTORY != 0 { + typ = os.ModeDir + } + if err = filter(name, typ); err == errDoneForNow { + // filtering requested to return by caller. + return nil + } + } + + return err +} + // Return N entries at the directory dirPath. If count is -1, return all entries func readDirN(dirPath string, count int) (entries []string, err error) { d, err := os.Open(dirPath)