From 03991c5d4190ac06af4d59c0e94440e02cd847ba Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 13 Oct 2020 13:45:08 -0700 Subject: [PATCH] crawler: Remove waitForLowActiveIO (#10667) Only use dynamic delays for the crawler. Even though the max wait was 1 second the number of waits could severely impact crawler speed. Instead of relying on a global metric, we use the stateless local delays to keep the crawler running at a speed more adjusted to current conditions. The only case we keep it is before bitrot checks when enabled. --- cmd/data-crawler.go | 22 ++++++++++------------ cmd/data-usage_test.go | 14 +++++++------- cmd/fs-v1.go | 2 +- cmd/xl-storage.go | 2 +- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/cmd/data-crawler.go b/cmd/data-crawler.go index a25d92752..d07696ea0 100644 --- a/cmd/data-crawler.go +++ b/cmd/data-crawler.go @@ -135,12 +135,11 @@ type cachedFolder struct { } type folderScanner struct { - root string - getSize getSizeFn - oldCache dataUsageCache - newCache dataUsageCache - withFilter *bloomFilter - waitForLowActiveIO func() + root string + getSize getSizeFn + oldCache dataUsageCache + newCache dataUsageCache + withFilter *bloomFilter dataUsageCrawlMult float64 dataUsageCrawlDebug bool @@ -155,7 +154,7 @@ type folderScanner struct { // The returned cache will always be valid, but may not be updated from the existing. // Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler. // If the supplied context is canceled the function will return at the first chance. -func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) { +func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) { t := UTCNow() logPrefix := color.Green("data-usage: ") @@ -183,7 +182,6 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize: getSize, oldCache: cache, newCache: dataUsageCache{Info: cache.Info}, - waitForLowActiveIO: waitForLowActiveIO, newFolders: nil, existingFolders: nil, dataUsageCrawlMult: delayMult, @@ -376,7 +374,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo } } } - f.waitForLowActiveIO() sleepDuration(dataCrawlSleepPerFolder, f.dataUsageCrawlMult) cache := dataUsageEntry{} @@ -424,7 +421,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo } return nil } - f.waitForLowActiveIO() // Dynamic time delay. t := UTCNow() @@ -484,7 +480,9 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo // If that doesn't bring it back we remove the folder and assume it was deleted. // This means that the next run will not look for it. for k := range existing { - f.waitForLowActiveIO() + // Dynamic time delay. + t := UTCNow() + bucket, prefix := path2BucketObject(k) if f.dataUsageCrawlDebug { logger.Info(color.Green("folder-scanner:")+" checking disappeared folder: %v/%v", bucket, prefix) @@ -498,6 +496,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo versionID: versionID, }, madmin.HealItemObject) }) + sleepDuration(time.Since(t), f.dataUsageCrawlMult) if f.dataUsageCrawlDebug && err != nil { logger.Info(color.Green("healObjects:")+" checking returned value %v", err) @@ -535,7 +534,6 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder) default: } - f.waitForLowActiveIO() if typ&os.ModeDir != 0 { dirStack = append(dirStack, entName) err := readDirFn(path.Join(dirStack...), addDir) diff --git a/cmd/data-usage_test.go b/cmd/data-usage_test.go index dc03560b7..680672950 100644 --- a/cmd/data-usage_test.go +++ b/cmd/data-usage_test.go @@ -62,7 +62,7 @@ func TestDataUsageUpdate(t *testing.T) { return 0, nil } - got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize) + got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize) if err != nil { t.Fatal(err) } @@ -183,7 +183,7 @@ func TestDataUsageUpdate(t *testing.T) { }, } createUsageTestFiles(t, base, bucket, files) - got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize) + got, err = crawlDataFolder(context.Background(), base, got, getSize) if err != nil { t.Fatal(err) } @@ -268,7 +268,7 @@ func TestDataUsageUpdate(t *testing.T) { } // Changed dir must be picked up in this many cycles. for i := 0; i < dataUsageUpdateDirCycles; i++ { - got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize) + got, err = crawlDataFolder(context.Background(), base, got, getSize) if err != nil { t.Fatal(err) } @@ -355,7 +355,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) { } return 0, nil } - got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, func() {}, getSize) + got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize) if err != nil { t.Fatal(err) } @@ -465,7 +465,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) { }, } createUsageTestFiles(t, base, "", files) - got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize) + got, err = crawlDataFolder(context.Background(), base, got, getSize) if err != nil { t.Fatal(err) } @@ -548,7 +548,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) { } // Changed dir must be picked up in this many cycles. for i := 0; i < dataUsageUpdateDirCycles; i++ { - got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize) + got, err = crawlDataFolder(context.Background(), base, got, getSize) if err != nil { t.Fatal(err) } @@ -652,7 +652,7 @@ func TestDataUsageCacheSerialize(t *testing.T) { } return 0, nil } - want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize) + want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize) if err != nil { t.Fatal(err) } diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index 15350811d..e09a3ebbc 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -327,7 +327,7 @@ func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataU } // Load bucket info. - cache, err = crawlDataFolder(ctx, fs.fsPath, cache, fs.waitForLowActiveIO, func(item crawlItem) (int64, error) { + cache, err = crawlDataFolder(ctx, fs.fsPath, cache, func(item crawlItem) (int64, error) { bucket, object := item.bucket, item.objectPath() fsMetaBytes, err := ioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile)) if err != nil && !os.IsNotExist(err) { diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 57fb4af83..30b243af3 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -372,7 +372,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac } opts := globalCrawlerConfig - dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, s.waitForLowActiveIO, func(item crawlItem) (int64, error) { + dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, func(item crawlItem) (int64, error) { // Look for `xl.meta/xl.json' at the leaf. if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) && !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {