From eba423bb9dcbdd33429d8500401104053da7e636 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 25 May 2020 00:17:52 -0700 Subject: [PATCH] Disable crawler in FS/NAS gateway mode (#9695) No one really uses FS for large scale accounting usage, neither we crawl in NAS gateway mode. It is worthwhile to simply disable this feature as its not useful for anyone. Bonus disable bucket quota ops as well in, FS and gateway mode --- cmd/admin-handlers-users.go | 1 + cmd/admin-handlers_test.go | 2 +- cmd/admin-router.go | 20 +++++++------ cmd/data-usage.go | 8 ++++-- cmd/fs-v1.go | 57 +------------------------------------ cmd/gateway-main.go | 5 ++-- cmd/routers.go | 4 +-- cmd/server-main.go | 5 +--- cmd/test-utils_test.go | 2 +- docs/config/README.md | 2 +- 10 files changed, 29 insertions(+), 77 deletions(-) diff --git a/cmd/admin-handlers-users.go b/cmd/admin-handlers-users.go index 8e9c5dc90..39671cc7b 100644 --- a/cmd/admin-handlers-users.go +++ b/cmd/admin-handlers-users.go @@ -636,6 +636,7 @@ func (a adminAPIHandlers) AccountUsageInfoHandler(w http.ResponseWriter, r *http // Load the latest calculated data usage dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI) if err != nil { + // log the error, continue with the accounting response logger.LogIf(ctx, err) } diff --git a/cmd/admin-handlers_test.go b/cmd/admin-handlers_test.go index 0b9589f69..615b3ddd5 100644 --- a/cmd/admin-handlers_test.go +++ b/cmd/admin-handlers_test.go @@ -74,7 +74,7 @@ func prepareAdminXLTestBed(ctx context.Context) (*adminXLTestBed, error) { // Setup admin mgmt REST API handlers. adminRouter := mux.NewRouter() - registerAdminRouter(adminRouter, true, true, false) + registerAdminRouter(adminRouter, true, true) return &adminXLTestBed{ xlDirs: xlDirs, diff --git a/cmd/admin-router.go b/cmd/admin-router.go index 73699dcd1..f3c877339 100644 --- a/cmd/admin-router.go +++ b/cmd/admin-router.go @@ -20,6 +20,8 @@ import ( "net/http" "github.com/gorilla/mux" + "github.com/minio/minio/cmd/config" + "github.com/minio/minio/pkg/env" "github.com/minio/minio/pkg/madmin" ) @@ -35,7 +37,7 @@ const ( type adminAPIHandlers struct{} // registerAdminRouter - Add handler functions for each service REST API routes. -func registerAdminRouter(router *mux.Router, enableConfigOps, enableIAMOps, enableBucketQuotaOps bool) { +func registerAdminRouter(router *mux.Router, enableConfigOps, enableIAMOps bool) { adminAPI := adminAPIHandlers{} // Admin router @@ -170,13 +172,15 @@ func registerAdminRouter(router *mux.Router, enableConfigOps, enableIAMOps, enab } // Quota operations - if enableConfigOps && enableBucketQuotaOps { - // GetBucketQuotaConfig - adminRouter.Methods(http.MethodGet).Path(adminVersion+"/get-bucket-quota").HandlerFunc( - httpTraceHdrs(adminAPI.GetBucketQuotaConfigHandler)).Queries("bucket", "{bucket:.*}") - // PutBucketQuotaConfig - adminRouter.Methods(http.MethodPut).Path(adminVersion+"/set-bucket-quota").HandlerFunc( - httpTraceHdrs(adminAPI.PutBucketQuotaConfigHandler)).Queries("bucket", "{bucket:.*}") + if globalIsXL || globalIsDistXL { + if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn { + // GetBucketQuotaConfig + adminRouter.Methods(http.MethodGet).Path(adminVersion+"/get-bucket-quota").HandlerFunc( + httpTraceHdrs(adminAPI.GetBucketQuotaConfigHandler)).Queries("bucket", "{bucket:.*}") + // PutBucketQuotaConfig + adminRouter.Methods(http.MethodPut).Path(adminVersion+"/set-bucket-quota").HandlerFunc( + httpTraceHdrs(adminAPI.PutBucketQuotaConfigHandler)).Queries("bucket", "{bucket:.*}") + } } // -- Top APIs -- diff --git a/cmd/data-usage.go b/cmd/data-usage.go index dcbf85dad..5ef3d67b9 100644 --- a/cmd/data-usage.go +++ b/cmd/data-usage.go @@ -55,8 +55,12 @@ const ( // initDataUsageStats will start the crawler unless disabled. func initDataUsageStats(ctx context.Context, objAPI ObjectLayer) { - if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn { - go runDataUsageInfo(ctx, objAPI) + // data usage stats are only available erasure + // coded mode + if globalIsXL || globalIsDistXL { + if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn { + go runDataUsageInfo(ctx, objAPI) + } } } diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index 61be1b13b..7b2d45df3 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -19,7 +19,6 @@ package cmd import ( "bytes" "context" - "encoding/json" "fmt" "io" "io/ioutil" @@ -31,7 +30,6 @@ import ( "strings" "sync" "sync/atomic" - "time" jsoniter "github.com/json-iterator/go" "github.com/minio/minio-go/v6/pkg/s3utils" @@ -39,7 +37,6 @@ import ( "github.com/minio/minio/cmd/config" xhttp "github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/logger" - "github.com/minio/minio/pkg/color" "github.com/minio/minio/pkg/lock" "github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/mimedb" @@ -229,61 +226,9 @@ func (fs *FSObjects) StorageInfo(ctx context.Context, _ bool) StorageInfo { return storageInfo } -func (fs *FSObjects) waitForLowActiveIO() { - for atomic.LoadInt64(&fs.activeIOCount) >= fs.maxActiveIOCount { - time.Sleep(lowActiveIOWaitTick) - } -} - // CrawlAndGetDataUsage returns data usage stats of the current FS deployment func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error { - // Load bucket totals - var oldCache dataUsageCache - err := oldCache.load(ctx, fs, dataUsageCacheName) - if err != nil { - return err - } - if oldCache.Info.Name == "" { - oldCache.Info.Name = dataUsageRoot - } - buckets, err := fs.ListBuckets(ctx) - if err != nil { - return err - } - oldCache.Info.BloomFilter = nil - if bf != nil { - oldCache.Info.BloomFilter = bf.bytes() - } - - if false && intDataUpdateTracker.debug { - b, _ := json.MarshalIndent(bf, "", " ") - logger.Info("Bloom filter: %v", string(b)) - } - cache, err := updateUsage(ctx, fs.fsPath, oldCache, fs.waitForLowActiveIO, func(item Item) (int64, error) { - // Get file size, symlinks which cannot be - // followed are automatically filtered by fastwalk. - fi, err := os.Stat(item.Path) - if err != nil { - return 0, errSkipFile - } - return fi.Size(), nil - }) - cache.Info.BloomFilter = nil - - // Even if there was an error, the new cache may have better info. - if cache.Info.LastUpdate.After(oldCache.Info.LastUpdate) { - if intDataUpdateTracker.debug { - logger.Info(color.Green("CrawlAndGetDataUsage:")+" Saving cache with %d entries", len(cache.Cache)) - } - logger.LogIf(ctx, cache.save(ctx, fs, dataUsageCacheName)) - updates <- cache.dui(dataUsageRoot, buckets) - } else { - if intDataUpdateTracker.debug { - logger.Info(color.Green("CrawlAndGetDataUsage:")+" Cache not updated, %d entries", len(cache.Cache)) - } - } - - return err + return NotImplemented{} } /// Bucket operations diff --git a/cmd/gateway-main.go b/cmd/gateway-main.go index a4dde5c78..22f586a85 100644 --- a/cmd/gateway-main.go +++ b/cmd/gateway-main.go @@ -180,11 +180,12 @@ func StartGateway(ctx *cli.Context, gw Gateway) { } enableIAMOps := globalEtcdClient != nil - enableBucketQuotaOps := env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn // Enable IAM admin APIs if etcd is enabled, if not just enable basic // operations such as profiling, server info etc. - registerAdminRouter(router, enableConfigOps, enableIAMOps, enableBucketQuotaOps) + // + // quota opts are disabled in gateway mode. + registerAdminRouter(router, enableConfigOps, enableIAMOps) // Add healthcheck router registerHealthCheckRouter(router) diff --git a/cmd/routers.go b/cmd/routers.go index c12df2d1f..7e4237024 100644 --- a/cmd/routers.go +++ b/cmd/routers.go @@ -81,7 +81,7 @@ var globalHandlers = []HandlerFunc{ } // configureServer handler returns final handler for the http server. -func configureServerHandler(endpointZones EndpointZones, enableBucketQuotaOps bool) (http.Handler, error) { +func configureServerHandler(endpointZones EndpointZones) (http.Handler, error) { // Initialize router. `SkipClean(true)` stops gorilla/mux from // normalizing URL path minio/minio#3256 router := mux.NewRouter().SkipClean(true).UseEncodedPath() @@ -95,7 +95,7 @@ func configureServerHandler(endpointZones EndpointZones, enableBucketQuotaOps bo registerSTSRouter(router) // Add Admin router, all APIs are enabled in server mode. - registerAdminRouter(router, true, true, enableBucketQuotaOps) + registerAdminRouter(router, true, true) // Add healthcheck router registerHealthCheckRouter(router) diff --git a/cmd/server-main.go b/cmd/server-main.go index 1c67f8f39..815769c13 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -24,7 +24,6 @@ import ( "io" "log" "net" - "net/http" "os" "os/signal" "strings" @@ -441,9 +440,7 @@ func serverMain(ctx *cli.Context) { } // Configure server. - var handler http.Handler - enableBucketQuotaOps := env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn - handler, err = configureServerHandler(globalEndpoints, enableBucketQuotaOps) + handler, err := configureServerHandler(globalEndpoints) if err != nil { logger.Fatal(config.ErrUnexpectedError(err), "Unable to configure one of server's RPC services") } diff --git a/cmd/test-utils_test.go b/cmd/test-utils_test.go index cf570d3ba..e44bb0900 100644 --- a/cmd/test-utils_test.go +++ b/cmd/test-utils_test.go @@ -311,7 +311,7 @@ func UnstartedTestServer(t TestErrHandler, instanceType string) TestServer { testServer.AccessKey = credentials.AccessKey testServer.SecretKey = credentials.SecretKey - httpHandler, err := configureServerHandler(testServer.Disks, false) + httpHandler, err := configureServerHandler(testServer.Disks) if err != nil { t.Fatalf("Failed to configure one of the RPC services %s", err) } diff --git a/docs/config/README.md b/docs/config/README.md index 973c5b26f..54a05aad4 100644 --- a/docs/config/README.md +++ b/docs/config/README.md @@ -257,7 +257,7 @@ This behavior is consistent across all keys, each key self documents itself with ## Environment only settings (not in config) #### Usage crawler -Data usage crawler is enabled by default, following ENVs allow for more staggered delay in terms of usage calculation. +Data usage crawler is enabled by default on erasure coded and distributed erasure coded deployments. The crawler adapts to the system speed and completely pauses when the system is under load. It is possible to adjust the speed of the crawler and thereby the latency of updates being reflected. The delays between each operation of the crawl can be adjusted by the `MINIO_DISK_USAGE_CRAWL_DELAY` environment variable. By default the value is `10`. This means the crawler will sleep *10x* the time each operation takes.