fix: reduce crawler memory usage by orders of magnitude (#11556)

currently crawler waits for an entire readdir call to
return until it processes usage, lifecycle, replication
and healing - instead we should pass the applicator all
the way down to avoid building any special stack for all
the contents in a single directory.

This allows for

- no need to remember the entire list of entries per directory
  before applying the required functions
- no need to wait for entire readdir() call to finish before
  applying the required functions
master
Harshavardhana 4 years ago committed by GitHub
parent e07918abe3
commit 289e1d8b2a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      cmd/data-scanner.go
  2. 9
      cmd/disk-cache-backend.go
  3. 38
      cmd/fastwalk.go
  4. 2
      cmd/metacache-server-pool.go
  5. 13
      cmd/os-readdir_other.go
  6. 16
      cmd/os-readdir_unix.go
  7. 15
      cmd/os-readdir_windows.go
  8. 9
      cmd/storage-errors.go

@ -407,19 +407,19 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
if f.dataUsageCrawlDebug { if f.dataUsageCrawlDebug {
console.Debugf(scannerLogPrefix+" no bucket (%s,%s)\n", f.root, entName) console.Debugf(scannerLogPrefix+" no bucket (%s,%s)\n", f.root, entName)
} }
return nil return errDoneForNow
} }
if isReservedOrInvalidBucket(bucket, false) { if isReservedOrInvalidBucket(bucket, false) {
if f.dataUsageCrawlDebug { if f.dataUsageCrawlDebug {
console.Debugf(scannerLogPrefix+" invalid bucket: %v, entry: %v\n", bucket, entName) console.Debugf(scannerLogPrefix+" invalid bucket: %v, entry: %v\n", bucket, entName)
} }
return nil return errDoneForNow
} }
select { select {
case <-done: case <-done:
return ctx.Err() return errDoneForNow
default: default:
} }
@ -682,7 +682,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
addDir = func(entName string, typ os.FileMode) error { addDir = func(entName string, typ os.FileMode) error {
select { select {
case <-done: case <-done:
return ctx.Err() return errDoneForNow
default: default:
} }

@ -23,7 +23,6 @@ import (
"crypto/rand" "crypto/rand"
"encoding/base64" "encoding/base64"
"encoding/hex" "encoding/hex"
"errors"
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
@ -269,10 +268,6 @@ func (c *diskCache) toClear() uint64 {
return bytesToClear(int64(di.Total), int64(di.Free), uint64(c.quotaPct), uint64(c.lowWatermark), uint64(c.highWatermark)) return bytesToClear(int64(di.Total), int64(di.Free), uint64(c.quotaPct), uint64(c.lowWatermark), uint64(c.highWatermark))
} }
var (
errDoneForNow = errors.New("done for now")
)
func (c *diskCache) purgeWait(ctx context.Context) { func (c *diskCache) purgeWait(ctx context.Context) {
for { for {
select { select {
@ -382,7 +377,7 @@ func (c *diskCache) purge(ctx context.Context) {
return nil return nil
} }
if err := readDirFilterFn(c.dir, filterFn); err != nil { if err := readDirFn(c.dir, filterFn); err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return return
} }
@ -1025,7 +1020,7 @@ func (c *diskCache) scanCacheWritebackFailures(ctx context.Context) {
return nil return nil
} }
if err := readDirFilterFn(c.dir, filterFn); err != nil { if err := readDirFn(c.dir, filterFn); err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return return
} }

@ -1,38 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code is imported from "golang.org/x/tools/internal/fastwalk",
// only fastwalk.go is imported since we already implement readDir()
// with some little tweaks.
package cmd
import (
"errors"
"os"
"strings"
)
var errSkipFile = errors.New("fastwalk: skip this file")
func readDirFn(dirName string, fn func(entName string, typ os.FileMode) error) error {
fis, err := readDir(dirName)
if err != nil {
if osIsNotExist(err) || err == errFileNotFound {
return nil
}
return err
}
for _, fi := range fis {
var mode os.FileMode
if strings.HasSuffix(fi, SlashSeparator) {
mode |= os.ModeDir
}
if err = fn(fi, mode); err != nil {
return err
}
}
return nil
}

@ -33,7 +33,7 @@ import (
func renameAllBucketMetacache(epPath string) error { func renameAllBucketMetacache(epPath string) error {
// Rename all previous `.minio.sys/buckets/<bucketname>/.metacache` to // Rename all previous `.minio.sys/buckets/<bucketname>/.metacache` to
// to `.minio.sys/tmp/` for deletion. // to `.minio.sys/tmp/` for deletion.
return readDirFilterFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error { return readDirFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error {
if typ == os.ModeDir { if typ == os.ModeDir {
tmpMetacacheOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID()) tmpMetacacheOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID())
if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)), if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)),

@ -29,11 +29,15 @@ func readDir(dirPath string) (entries []string, err error) {
return readDirN(dirPath, -1) return readDirN(dirPath, -1)
} }
// readDir applies the filter function on each entries at dirPath, doesn't recurse into // readDirFn applies the fn() function on each entries at dirPath, doesn't recurse into
// the directory itself. // the directory itself, if the dirPath doesn't exist this function doesn't return
func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error { // an error.
func readDirFn(dirPath string, filter func(name string, typ os.FileMode) error) error {
d, err := os.Open(dirPath) d, err := os.Open(dirPath)
if err != nil { if err != nil {
if osErrToFileErr(err) == errFileNotFound {
return nil
}
return osErrToFileErr(err) return osErrToFileErr(err)
} }
defer d.Close() defer d.Close()
@ -46,6 +50,9 @@ func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) e
if err == io.EOF { if err == io.EOF {
break break
} }
if osErrToFileErr(err) == errFileNotFound {
return nil
}
return osErrToFileErr(err) return osErrToFileErr(err)
} }
for _, fi := range fis { for _, fi := range fis {

@ -84,11 +84,15 @@ func readDir(dirPath string) (entries []string, err error) {
return readDirN(dirPath, -1) return readDirN(dirPath, -1)
} }
// readDir applies the filter function on each entries at dirPath, doesn't recurse into // readDirFn applies the fn() function on each entries at dirPath, doesn't recurse into
// the directory itself. // the directory itself, if the dirPath doesn't exist this function doesn't return
func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error { // an error.
func readDirFn(dirPath string, fn func(name string, typ os.FileMode) error) error {
f, err := os.Open(dirPath) f, err := os.Open(dirPath)
if err != nil { if err != nil {
if osErrToFileErr(err) == errFileNotFound {
return nil
}
return osErrToFileErr(err) return osErrToFileErr(err)
} }
defer f.Close() defer f.Close()
@ -103,7 +107,7 @@ func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) e
nbuf, err = syscall.ReadDirent(int(f.Fd()), buf) nbuf, err = syscall.ReadDirent(int(f.Fd()), buf)
if err != nil { if err != nil {
if isSysErrNotDir(err) { if isSysErrNotDir(err) {
return errFileNotFound return nil
} }
return err return err
} }
@ -122,8 +126,8 @@ func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) e
if typ&os.ModeSymlink == os.ModeSymlink { if typ&os.ModeSymlink == os.ModeSymlink {
continue continue
} }
if err = filter(string(name), typ); err == errDoneForNow { if err = fn(string(name), typ); err == errDoneForNow {
// filtering requested to return by caller. // fn() requested to return by caller.
return nil return nil
} }
} }

@ -29,11 +29,15 @@ func readDir(dirPath string) (entries []string, err error) {
return readDirN(dirPath, -1) return readDirN(dirPath, -1)
} }
// readDir applies the filter function on each entries at dirPath, doesn't recurse into // readDirFn applies the fn() function on each entries at dirPath, doesn't recurse into
// the directory itself. // the directory itself, if the dirPath doesn't exist this function doesn't return
func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error { // an error.
func readDirFn(dirPath string, filter func(name string, typ os.FileMode) error) error {
f, err := os.Open(dirPath) f, err := os.Open(dirPath)
if err != nil { if err != nil {
if osErrToFileErr(err) == errFileNotFound {
return nil
}
return osErrToFileErr(err) return osErrToFileErr(err)
} }
defer f.Close() defer f.Close()
@ -45,6 +49,9 @@ func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) e
if e == syscall.ERROR_NO_MORE_FILES { if e == syscall.ERROR_NO_MORE_FILES {
break break
} else { } else {
if isSysErrPathNotFound(e) {
return nil
}
return osErrToFileErr(&os.PathError{ return osErrToFileErr(&os.PathError{
Op: "FindNextFile", Op: "FindNextFile",
Path: dirPath, Path: dirPath,
@ -69,7 +76,7 @@ func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) e
} }
} }
return err return nil
} }
// Return N entries at the directory dirPath. If count is -1, return all entries // Return N entries at the directory dirPath. If count is -1, return all entries

@ -16,6 +16,8 @@
package cmd package cmd
import "errors"
// errUnexpected - unexpected error, requires manual intervention. // errUnexpected - unexpected error, requires manual intervention.
var errUnexpected = StorageErr("unexpected error, please report this issue at https://github.com/minio/minio/issues") var errUnexpected = StorageErr("unexpected error, please report this issue at https://github.com/minio/minio/issues")
@ -104,6 +106,13 @@ var errLessData = StorageErr("less data available than what was requested")
// errMoreData = returned when more data was sent by the caller than what it was supposed to. // errMoreData = returned when more data was sent by the caller than what it was supposed to.
var errMoreData = StorageErr("more data was sent than what was advertised") var errMoreData = StorageErr("more data was sent than what was advertised")
// indicates readDirFn to return without further applying the fn()
var errDoneForNow = errors.New("done for now")
// errSkipFile returned by the fn() for readDirFn() when it needs
// to proceed to next entry.
var errSkipFile = errors.New("skip this file")
// StorageErr represents error generated by xlStorage call. // StorageErr represents error generated by xlStorage call.
type StorageErr string type StorageErr string

Loading…
Cancel
Save