From 6e0575a53d6ccf2a7a33538ea94569ee9eb71828 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 25 May 2020 11:32:53 -0700 Subject: [PATCH] Revert "Disable crawler in FS/NAS gateway mode (#9695)" (#9702) This reverts commit eba423bb9dcbdd33429d8500401104053da7e636. Additionally also address the FS crawler to properly calculate the sizes for encrypted/compressed content. --- cmd/data-usage.go | 8 ++--- cmd/fs-v1.go | 71 ++++++++++++++++++++++++++++++++++++- cmd/gateway-main.go | 2 -- docs/bucket/quota/README.md | 13 +++---- docs/config/README.md | 4 ++- 5 files changed, 79 insertions(+), 19 deletions(-) diff --git a/cmd/data-usage.go b/cmd/data-usage.go index 5ef3d67b9..dcbf85dad 100644 --- a/cmd/data-usage.go +++ b/cmd/data-usage.go @@ -55,12 +55,8 @@ const ( // initDataUsageStats will start the crawler unless disabled. func initDataUsageStats(ctx context.Context, objAPI ObjectLayer) { - // data usage stats are only available erasure - // coded mode - if globalIsXL || globalIsDistXL { - if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn { - go runDataUsageInfo(ctx, objAPI) - } + if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn { + go runDataUsageInfo(ctx, objAPI) } } diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index 7b2d45df3..8dd8b3165 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -19,6 +19,7 @@ package cmd import ( "bytes" "context" + "encoding/json" "fmt" "io" "io/ioutil" @@ -30,6 +31,7 @@ import ( "strings" "sync" "sync/atomic" + "time" jsoniter "github.com/json-iterator/go" "github.com/minio/minio-go/v6/pkg/s3utils" @@ -37,6 +39,7 @@ import ( "github.com/minio/minio/cmd/config" xhttp "github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/logger" + "github.com/minio/minio/pkg/color" "github.com/minio/minio/pkg/lock" "github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/mimedb" @@ -226,9 +229,75 @@ func (fs *FSObjects) StorageInfo(ctx context.Context, _ bool) StorageInfo { return storageInfo } +func (fs *FSObjects) waitForLowActiveIO() { + for atomic.LoadInt64(&fs.activeIOCount) >= fs.maxActiveIOCount { + time.Sleep(lowActiveIOWaitTick) + } +} + // CrawlAndGetDataUsage returns data usage stats of the current FS deployment func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error { - return NotImplemented{} + // Load bucket totals + var oldCache dataUsageCache + err := oldCache.load(ctx, fs, dataUsageCacheName) + if err != nil { + return err + } + if oldCache.Info.Name == "" { + oldCache.Info.Name = dataUsageRoot + } + buckets, err := fs.ListBuckets(ctx) + if err != nil { + return err + } + oldCache.Info.BloomFilter = nil + if bf != nil { + oldCache.Info.BloomFilter = bf.bytes() + } + + if false && intDataUpdateTracker.debug { + b, _ := json.MarshalIndent(bf, "", " ") + logger.Info("Bloom filter: %v", string(b)) + } + cache, err := updateUsage(ctx, fs.fsPath, oldCache, fs.waitForLowActiveIO, func(item Item) (int64, error) { + bucket, object := path2BucketObject(strings.TrimPrefix(item.Path, fs.fsPath)) + fsMetaBytes, err := ioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile)) + if err != nil && !os.IsNotExist(err) { + return 0, errSkipFile + } + // Get file size, symlinks which cannot be + // followed are automatically filtered by fastwalk. + fi, err := os.Stat(item.Path) + if err != nil { + return 0, errSkipFile + } + if len(fsMetaBytes) > 0 { + fsMeta := newFSMetaV1() + var json = jsoniter.ConfigCompatibleWithStandardLibrary + if err = json.Unmarshal(fsMetaBytes, &fsMeta); err != nil { + return 0, errSkipFile + } + return fsMeta.ToObjectInfo(bucket, object, fi).GetActualSize() + } + return fi.Size(), nil + + }) + cache.Info.BloomFilter = nil + + // Even if there was an error, the new cache may have better info. + if cache.Info.LastUpdate.After(oldCache.Info.LastUpdate) { + if intDataUpdateTracker.debug { + logger.Info(color.Green("CrawlAndGetDataUsage:")+" Saving cache with %d entries", len(cache.Cache)) + } + logger.LogIf(ctx, cache.save(ctx, fs, dataUsageCacheName)) + updates <- cache.dui(dataUsageRoot, buckets) + } else { + if intDataUpdateTracker.debug { + logger.Info(color.Green("CrawlAndGetDataUsage:")+" Cache not updated, %d entries", len(cache.Cache)) + } + } + + return err } /// Bucket operations diff --git a/cmd/gateway-main.go b/cmd/gateway-main.go index 22f586a85..a8f5d075b 100644 --- a/cmd/gateway-main.go +++ b/cmd/gateway-main.go @@ -183,8 +183,6 @@ func StartGateway(ctx *cli.Context, gw Gateway) { // Enable IAM admin APIs if etcd is enabled, if not just enable basic // operations such as profiling, server info etc. - // - // quota opts are disabled in gateway mode. registerAdminRouter(router, enableConfigOps, enableIAMOps) // Add healthcheck router diff --git a/docs/bucket/quota/README.md b/docs/bucket/quota/README.md index ddcee0acf..ffd1ba900 100644 --- a/docs/bucket/quota/README.md +++ b/docs/bucket/quota/README.md @@ -1,32 +1,27 @@ # Bucket Quota Configuration Quickstart Guide [![Slack](https://slack.min.io/slack?type=svg)](https://slack.min.io) [![Docker Pulls](https://img.shields.io/docker/pulls/minio/minio.svg?maxAge=604800)](https://hub.docker.com/r/minio/minio/) - ![quota](bucketquota.png) - Buckets can be configured to have one of two types of quota configuration - FIFO and Hard quota. - `Hard` quota disallows writes to the bucket after configured quota limit is reached. +- `FIFO` quota automatically deletes oldest content until bucket usage falls within configured limit while permitting writes. -- `FIFO` quota automatically deletes oldest content until bucket usage falls within configured limit while permitting writes. +> NOTE: Bucket quotas are not supported under Gateway deployments. ## 1. Prerequisites - Install MinIO - [MinIO Quickstart Guide](https://docs.min.io/docs/minio-quickstart-guide). - [Use `mc` with MinIO Server](https://docs.min.io/docs/minio-client-quickstart-guide) - - ## 2. Set bucket quota configuration -1. Set a hard quota of 1GB for a bucket `mybucket` on MinIO -object storage: +1. Set a hard quota of 1GB for a bucket `mybucket` on MinIO object storage: ```sh $ mc admin bucket quota myminio/mybucket --hard 1gb ``` -2. Set FIFO quota of 5GB for a bucket "mybucket" on MinIO to allow automatic deletion of -older content to ensure bucket usage remains within 5GB +2. Set FIFO quota of 5GB for a bucket "mybucket" on MinIO to allow automatic deletion of older content to ensure bucket usage remains within 5GB ```sh $ mc admin bucket quota myminio/mybucket --fifo 5gb diff --git a/docs/config/README.md b/docs/config/README.md index 54a05aad4..6152a5b89 100644 --- a/docs/config/README.md +++ b/docs/config/README.md @@ -257,7 +257,9 @@ This behavior is consistent across all keys, each key self documents itself with ## Environment only settings (not in config) #### Usage crawler -Data usage crawler is enabled by default on erasure coded and distributed erasure coded deployments. +> NOTE: Data usage crawler is not supported under Gateway deployments. + +Data usage crawler is enabled by default, following ENVs allow for more staggered delay in terms of usage calculation. The crawler adapts to the system speed and completely pauses when the system is under load. It is possible to adjust the speed of the crawler and thereby the latency of updates being reflected. The delays between each operation of the crawl can be adjusted by the `MINIO_DISK_USAGE_CRAWL_DELAY` environment variable. By default the value is `10`. This means the crawler will sleep *10x* the time each operation takes.