From f14f60a487cb42da2d57f3d64faebc1b942fd3c7 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 21 Jan 2020 14:07:49 -0800 Subject: [PATCH] fix: Avoid double usage calculation on every restart (#8856) On every restart of the server, usage was being calculated which is not useful instead wait for sufficient time to start the crawling routine. This PR also avoids lots of double allocations through strings, optimizes usage of string builders and also avoids crawling through symbolic links. Fixes #8844 --- cmd/admin-handlers.go | 4 +- cmd/background-heal-ops.go | 2 +- cmd/data-usage.go | 157 +++++++++++++++++++++++++++--------- cmd/fastwalk.go | 57 ++++++------- cmd/fs-v1.go | 94 ++++----------------- cmd/gateway-unsupported.go | 6 ++ cmd/generic-handlers.go | 6 +- cmd/handler-utils.go | 17 ---- cmd/object-api-interface.go | 1 + cmd/object-handlers.go | 4 +- cmd/posix-errors.go | 5 ++ cmd/posix-list-dir_unix.go | 7 +- cmd/posix.go | 90 +++++---------------- cmd/utils.go | 42 ++++------ cmd/utils_test.go | 57 ++++++------- cmd/web-handlers.go | 2 +- cmd/xl-sets.go | 4 + cmd/xl-v1.go | 7 +- cmd/xl-zones.go | 4 +- 19 files changed, 250 insertions(+), 316 deletions(-) diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index 27f73bbcf..beb470ab6 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -302,13 +302,13 @@ func (a adminAPIHandlers) DataUsageInfoHandler(w http.ResponseWriter, r *http.Re dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI) if err != nil { - writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrInternalError), r.URL) + writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL) return } dataUsageInfoJSON, err := json.Marshal(dataUsageInfo) if err != nil { - writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrInternalError), r.URL) + writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL) return } diff --git a/cmd/background-heal-ops.go b/cmd/background-heal-ops.go index 04ff997bc..946530e0c 100644 --- a/cmd/background-heal-ops.go +++ b/cmd/background-heal-ops.go @@ -75,7 +75,7 @@ func (h *healRoutine) run() { var res madmin.HealResultItem var err error - bucket, object := urlPath2BucketObjectName(task.path) + bucket, object := path2BucketObject(task.path) switch { case bucket == "" && object == "": res, err = bgHealDiskFormat(ctx, task.opts) diff --git a/cmd/data-usage.go b/cmd/data-usage.go index b7beb4284..8240eb187 100644 --- a/cmd/data-usage.go +++ b/cmd/data-usage.go @@ -20,6 +20,8 @@ import ( "bytes" "context" "encoding/json" + "os" + "path/filepath" "time" jsoniter "github.com/json-iterator/go" @@ -48,40 +50,36 @@ func runDataUsageInfoUpdateRoutine() { break } - ctx := context.Background() - - switch v := objAPI.(type) { - case *xlZones: - runDataUsageInfoForXLZones(ctx, v, GlobalServiceDoneCh) - case *FSObjects: - runDataUsageInfoForFS(ctx, v, GlobalServiceDoneCh) - default: - return - } + runDataUsageInfo(context.Background(), objAPI, GlobalServiceDoneCh) } -func runDataUsageInfoForFS(ctx context.Context, fsObj *FSObjects, endCh <-chan struct{}) { - t := time.NewTicker(dataUsageCrawlInterval) - defer t.Stop() - for { - // Get data usage info of the FS Object - usageInfo := fsObj.crawlAndGetDataUsageInfo(ctx, endCh) - // Save the data usage in the disk - err := storeDataUsageInBackend(ctx, fsObj, usageInfo) - if err != nil { - logger.LogIf(ctx, err) - } - select { - case <-endCh: - return - // Wait until the next crawl interval - case <-t.C: - } +// timeToNextCrawl returns the duration until next crawl should occur +// this is validated by verifying the LastUpdate time. +func timeToCrawl(ctx context.Context, objAPI ObjectLayer) time.Duration { + dataUsageInfo, err := loadDataUsageFromBackend(ctx, objAPI) + if err != nil { + // Upon an error wait for like 10 + // seconds to start the crawler. + return 10 * time.Second + } + // File indeed doesn't exist when LastUpdate is zero + // so we have never crawled, start crawl right away. + if dataUsageInfo.LastUpdate.IsZero() { + return 1 * time.Second } + waitDuration := dataUsageInfo.LastUpdate.Sub(UTCNow()) + if waitDuration > dataUsageCrawlInterval { + // Waited long enough start crawl in a 1 second + return 1 * time.Second + } + // No crawling needed, ask the routine to wait until + // the daily interval 12hrs - delta between last update + // with current time. + return dataUsageCrawlInterval - waitDuration } -func runDataUsageInfoForXLZones(ctx context.Context, z *xlZones, endCh <-chan struct{}) { - locker := z.NewNSLock(ctx, minioMetaBucket, "leader-data-usage-info") +func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer, endCh <-chan struct{}) { + locker := objAPI.NewNSLock(ctx, minioMetaBucket, "leader-data-usage-info") for { err := locker.GetLock(newDynamicTimeout(time.Millisecond, time.Millisecond)) if err != nil { @@ -93,19 +91,17 @@ func runDataUsageInfoForXLZones(ctx context.Context, z *xlZones, endCh <-chan st break } - t := time.NewTicker(dataUsageCrawlInterval) - defer t.Stop() for { - usageInfo := z.crawlAndGetDataUsage(ctx, endCh) - err := storeDataUsageInBackend(ctx, z, usageInfo) - if err != nil { - logger.LogIf(ctx, err) - } + wait := timeToCrawl(ctx, objAPI) select { case <-endCh: locker.Unlock() return - case <-t.C: + case <-time.NewTimer(wait).C: + // Crawl only when no previous crawl has occurred, + // or its been too long since last crawl. + err := storeDataUsageInBackend(ctx, objAPI, objAPI.CrawlAndGetDataUsage(ctx, endCh)) + logger.LogIf(ctx, err) } } } @@ -131,7 +127,10 @@ func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsag err := objAPI.GetObject(ctx, minioMetaBackgroundOpsBucket, dataUsageObjName, 0, -1, &dataUsageInfoJSON, "", ObjectOptions{}) if err != nil { - return DataUsageInfo{}, nil + if isErrObjectNotFound(err) { + return DataUsageInfo{}, nil + } + return DataUsageInfo{}, toObjectErr(err, minioMetaBackgroundOpsBucket, dataUsageObjName) } var dataUsageInfo DataUsageInfo @@ -143,3 +142,85 @@ func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsag return dataUsageInfo, nil } + +// Item represents each file while walking. +type Item struct { + Path string + Typ os.FileMode +} + +type getSizeFn func(item Item) (int64, error) +type activeIOFn func() error + +func updateUsage(basePath string, endCh <-chan struct{}, waitForLowActiveIO activeIOFn, getSize getSizeFn) DataUsageInfo { + var dataUsageInfo = DataUsageInfo{ + BucketsSizes: make(map[string]uint64), + ObjectsSizesHistogram: make(map[string]uint64), + } + + itemCh := make(chan Item) + skipCh := make(chan error) + defer close(skipCh) + + go func() { + defer close(itemCh) + fastWalk(basePath, func(path string, typ os.FileMode) error { + if err := waitForLowActiveIO(); err != nil { + return filepath.SkipDir + } + + select { + case <-endCh: + return filepath.SkipDir + case itemCh <- Item{path, typ}: + } + return <-skipCh + }) + }() + + for { + select { + case <-endCh: + return dataUsageInfo + case item, ok := <-itemCh: + if !ok { + return dataUsageInfo + } + + bucket, entry := path2BucketObjectWithBasePath(basePath, item.Path) + if bucket == "" { + skipCh <- nil + continue + } + + if isReservedOrInvalidBucket(bucket, false) { + skipCh <- filepath.SkipDir + continue + } + + if entry == "" && item.Typ&os.ModeDir != 0 { + dataUsageInfo.BucketsCount++ + dataUsageInfo.BucketsSizes[bucket] = 0 + skipCh <- nil + continue + } + + if item.Typ&os.ModeDir != 0 { + skipCh <- nil + continue + } + + size, err := getSize(item) + if err != nil { + skipCh <- errSkipFile + continue + } + + dataUsageInfo.ObjectsCount++ + dataUsageInfo.ObjectsTotalSize += uint64(size) + dataUsageInfo.BucketsSizes[bucket] += uint64(size) + dataUsageInfo.ObjectsSizesHistogram[objSizeToHistoInterval(uint64(size))]++ + skipCh <- nil + } + } +} diff --git a/cmd/fastwalk.go b/cmd/fastwalk.go index 8470b4e12..2da818ff8 100644 --- a/cmd/fastwalk.go +++ b/cmd/fastwalk.go @@ -17,14 +17,7 @@ import ( "sync" ) -// ErrTraverseLink is used as a return value from WalkFuncs to indicate that the -// symlink named in the call may be traversed. -var ErrTraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory") - -// ErrSkipFiles is a used as a return value from WalkFuncs to indicate that the -// callback should not be called for any other files in the current directory. -// Child directories will still be traversed. -var ErrSkipFiles = errors.New("fastwalk: skip remaining files in directory") +var errSkipFile = errors.New("fastwalk: skip this file") // Walk is a faster implementation of filepath.Walk. // @@ -161,25 +154,32 @@ func (w *walker) enqueue(it walkItem) { } } +var stringsBuilderPool = sync.Pool{ + New: func() interface{} { + return &strings.Builder{} + }, +} + func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { - joined := dirName + string(os.PathSeparator) + baseName + builder := stringsBuilderPool.Get().(*strings.Builder) + defer func() { + builder.Reset() + stringsBuilderPool.Put(builder) + }() + + builder.WriteString(dirName) + if !strings.HasSuffix(dirName, SlashSeparator) { + builder.WriteString(SlashSeparator) + } + builder.WriteString(baseName) if typ == os.ModeDir { - w.enqueue(walkItem{dir: joined}) + w.enqueue(walkItem{dir: builder.String()}) return nil } - err := w.fn(joined, typ) - if typ == os.ModeSymlink { - if err == ErrTraverseLink { - // Set callbackDone so we don't call it twice for both the - // symlink-as-symlink and the symlink-as-directory later: - w.enqueue(walkItem{dir: joined, callbackDone: true}) - return nil - } - if err == filepath.SkipDir { - // Permit SkipDir on symlinks too. - return nil - } + err := w.fn(builder.String(), typ) + if err == filepath.SkipDir || err == errSkipFile { + return nil } return err } @@ -189,22 +189,13 @@ func readDirFn(dirName string, fn func(dirName, entName string, typ os.FileMode) if err != nil { return err } - skipFiles := false for _, fi := range fis { var mode os.FileMode if strings.HasSuffix(fi, SlashSeparator) { mode |= os.ModeDir } - if mode == 0 && skipFiles { - continue - } - - if err := fn(dirName, fi, mode); err != nil { - if err == ErrSkipFiles { - skipFiles = true - continue - } + if err = fn(dirName, fi, mode); err != nil { return err } } @@ -214,7 +205,7 @@ func readDirFn(dirName string, fn func(dirName, entName string, typ os.FileMode) func (w *walker) walk(root string, runUserCallback bool) error { if runUserCallback { err := w.fn(root, os.ModeDir) - if err == filepath.SkipDir { + if err == filepath.SkipDir || err == errSkipFile { return nil } if err != nil { diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index 6428799d8..1daf966a7 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -27,7 +27,6 @@ import ( "os" "os/user" "path" - "path/filepath" "sort" "strings" "sync" @@ -111,8 +110,7 @@ func initMetaVolumeFS(fsPath, fsUUID string) error { return err } - metaStatsPath := pathJoin(fsPath, minioMetaBackgroundOpsBucket, fsUUID) - if err := os.MkdirAll(metaStatsPath, 0777); err != nil { + if err := os.MkdirAll(pathJoin(fsPath, minioMetaBackgroundOpsBucket), 0777); err != nil { return err } @@ -229,90 +227,30 @@ func (fs *FSObjects) StorageInfo(ctx context.Context) StorageInfo { } func (fs *FSObjects) waitForLowActiveIO() error { - t := time.NewTicker(lowActiveIOWaitTick) - defer t.Stop() - for { - if atomic.LoadInt64(&fs.activeIOCount) >= fs.maxActiveIOCount { - select { - case <-GlobalServiceDoneCh: - return errors.New("forced exit") - case <-t.C: - continue - } + for atomic.LoadInt64(&fs.activeIOCount) >= fs.maxActiveIOCount { + select { + case <-GlobalServiceDoneCh: + return errors.New("forced exit") + case <-time.NewTimer(lowActiveIOWaitTick).C: + continue } - break } return nil } -// crawlAndGetDataUsageInfo returns data usage stats of the current FS deployment -func (fs *FSObjects) crawlAndGetDataUsageInfo(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { - - var dataUsageInfoMu sync.Mutex - var dataUsageInfo = DataUsageInfo{ - BucketsSizes: make(map[string]uint64), - ObjectsSizesHistogram: make(map[string]uint64), - } - - walkFn := func(origPath string, typ os.FileMode) error { - - select { - case <-GlobalServiceDoneCh: - return filepath.SkipDir - default: - } - - if err := fs.waitForLowActiveIO(); err != nil { - return filepath.SkipDir - } - - path := strings.TrimPrefix(origPath, fs.fsPath) - path = strings.TrimPrefix(path, SlashSeparator) - - splits := splitN(path, SlashSeparator, 2) - bucket := splits[0] - prefix := splits[1] - - if bucket == "" { - return nil - } - - if isReservedOrInvalidBucket(bucket, false) { - return filepath.SkipDir - } - - if prefix == "" { - dataUsageInfoMu.Lock() - dataUsageInfo.BucketsCount++ - dataUsageInfo.BucketsSizes[bucket] = 0 - dataUsageInfoMu.Unlock() - return nil - } - - if typ&os.ModeDir != 0 { - return nil - } - - // Get file size - fi, err := os.Stat(origPath) +// CrawlAndGetDataUsage returns data usage stats of the current FS deployment +func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { + dataUsageInfo := updateUsage(fs.fsPath, endCh, fs.waitForLowActiveIO, func(item Item) (int64, error) { + // Get file size, symlinks which cannot bex + // followed are automatically filtered by fastwalk. + fi, err := os.Stat(item.Path) if err != nil { - return nil + return 0, errSkipFile } - size := fi.Size() - - dataUsageInfoMu.Lock() - dataUsageInfo.ObjectsCount++ - dataUsageInfo.ObjectsTotalSize += uint64(size) - dataUsageInfo.BucketsSizes[bucket] += uint64(size) - dataUsageInfo.ObjectsSizesHistogram[objSizeToHistoInterval(uint64(size))]++ - dataUsageInfoMu.Unlock() - - return nil - } - - fastWalk(fs.fsPath, walkFn) + return fi.Size(), nil + }) dataUsageInfo.LastUpdate = UTCNow() atomic.StoreUint64(&fs.totalUsed, dataUsageInfo.ObjectsTotalSize) diff --git a/cmd/gateway-unsupported.go b/cmd/gateway-unsupported.go index fc80784e4..35a4d4251 100644 --- a/cmd/gateway-unsupported.go +++ b/cmd/gateway-unsupported.go @@ -46,6 +46,12 @@ func NewGatewayLayerWithLocker(gwLayer ObjectLayer) ObjectLayer { // GatewayUnsupported list of unsupported call stubs for gateway. type GatewayUnsupported struct{} +// CrawlAndGetDataUsage - crawl is not implemented for gateway +func (a GatewayUnsupported) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { + logger.CriticalIf(ctx, errors.New("not implemented")) + return DataUsageInfo{} +} + // NewNSLock is a dummy stub for gateway. func (a GatewayUnsupported) NewNSLock(ctx context.Context, bucket string, object string) RWLocker { logger.CriticalIf(ctx, errors.New("not implemented")) diff --git a/cmd/generic-handlers.go b/cmd/generic-handlers.go index 66649fe46..2d367489b 100644 --- a/cmd/generic-handlers.go +++ b/cmd/generic-handlers.go @@ -631,11 +631,11 @@ func (f bucketForwardingHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques switch r.Method { case http.MethodPut: if getRequestAuthType(r) == authTypeJWT { - bucket, _ = urlPath2BucketObjectName(strings.TrimPrefix(r.URL.Path, minioReservedBucketPath+"/upload")) + bucket, _ = path2BucketObjectWithBasePath(minioReservedBucketPath+"/upload", r.URL.Path) } case http.MethodGet: if t := r.URL.Query().Get("token"); t != "" { - bucket, _ = urlPath2BucketObjectName(strings.TrimPrefix(r.URL.Path, minioReservedBucketPath+"/download")) + bucket, _ = path2BucketObjectWithBasePath(minioReservedBucketPath+"/download", r.URL.Path) } else if getRequestAuthType(r) != authTypeJWT && !strings.HasPrefix(r.URL.Path, minioReservedBucketPath) { bucket, _ = request2BucketObjectName(r) } @@ -687,7 +687,7 @@ func (f bucketForwardingHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques // requests have target bucket and object in URI and source details are in // header fields if r.Method == http.MethodPut && r.Header.Get(xhttp.AmzCopySource) != "" { - bucket, object = urlPath2BucketObjectName(r.Header.Get(xhttp.AmzCopySource)) + bucket, object = path2BucketObject(r.Header.Get(xhttp.AmzCopySource)) if bucket == "" || object == "" { f.handler.ServeHTTP(w, r) return diff --git a/cmd/handler-utils.go b/cmd/handler-utils.go index 4bcd7e0cf..945e77340 100644 --- a/cmd/handler-utils.go +++ b/cmd/handler-utils.go @@ -98,23 +98,6 @@ func isDirectiveReplace(value string) bool { return value == replaceDirective } -// Splits an incoming path into bucket and object components. -func path2BucketAndObject(path string) (bucket, object string) { - // Skip the first element if it is '/', split the rest. - path = strings.TrimPrefix(path, SlashSeparator) - pathComponents := strings.SplitN(path, SlashSeparator, 2) - - // Save the bucket and object extracted from path. - switch len(pathComponents) { - case 1: - bucket = pathComponents[0] - case 2: - bucket = pathComponents[0] - object = pathComponents[1] - } - return bucket, object -} - // userMetadataKeyPrefixes contains the prefixes of used-defined metadata keys. // All values stored with a key starting with one of the following prefixes // must be extracted from the header. diff --git a/cmd/object-api-interface.go b/cmd/object-api-interface.go index 3ae2fbebc..d218fba30 100644 --- a/cmd/object-api-interface.go +++ b/cmd/object-api-interface.go @@ -57,6 +57,7 @@ type ObjectLayer interface { // Storage operations. Shutdown(context.Context) error + CrawlAndGetDataUsage(context.Context, <-chan struct{}) DataUsageInfo StorageInfo(context.Context) StorageInfo // Bucket operations. diff --git a/cmd/object-handlers.go b/cmd/object-handlers.go index d49c1e8c7..a31cc7b04 100644 --- a/cmd/object-handlers.go +++ b/cmd/object-handlers.go @@ -743,7 +743,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re } } - srcBucket, srcObject := path2BucketAndObject(cpSrcPath) + srcBucket, srcObject := path2BucketObject(cpSrcPath) // If source object is empty or bucket is empty, reply back invalid copy source. if srcObject == "" || srcBucket == "" { writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrInvalidCopySource), r.URL, guessIsBrowserReq(r)) @@ -1578,7 +1578,7 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt } } - srcBucket, srcObject := path2BucketAndObject(cpSrcPath) + srcBucket, srcObject := path2BucketObject(cpSrcPath) // If source object is empty or bucket is empty, reply back invalid copy source. if srcObject == "" || srcBucket == "" { writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrInvalidCopySource), r.URL, guessIsBrowserReq(r)) diff --git a/cmd/posix-errors.go b/cmd/posix-errors.go index e68cf8bb8..d4f6e76af 100644 --- a/cmd/posix-errors.go +++ b/cmd/posix-errors.go @@ -58,6 +58,11 @@ func isSysErrTooLong(err error) bool { return errors.Is(err, syscall.ENAMETOOLONG) } +// Check if the given error corresponds to the ELOOP (too many symlinks). +func isSysErrTooManySymlinks(err error) bool { + return errors.Is(err, syscall.ELOOP) +} + // Check if the given error corresponds to ENOTEMPTY for unix // and ERROR_DIR_NOT_EMPTY for windows (directory not empty). func isSysErrNotEmpty(err error) bool { diff --git a/cmd/posix-list-dir_unix.go b/cmd/posix-list-dir_unix.go index c3bec5f44..aebd0c7ab 100644 --- a/cmd/posix-list-dir_unix.go +++ b/cmd/posix-list-dir_unix.go @@ -122,8 +122,11 @@ func readDirN(dirPath string, count int) (entries []string, err error) { if typ == unexpectedFileMode || typ&os.ModeSymlink == os.ModeSymlink { fi, err := os.Stat(pathJoin(dirPath, name)) if err != nil { - // It got deleted in the meantime. - if os.IsNotExist(err) { + // It got deleted in the meantime, not found + // or returns too many symlinks ignore this + // file/directory. + if os.IsNotExist(err) || isSysErrPathNotFound(err) || + isSysErrTooManySymlinks(err) { continue } return nil, err diff --git a/cmd/posix.go b/cmd/posix.go index 80f34b0c1..9e24c0555 100644 --- a/cmd/posix.go +++ b/cmd/posix.go @@ -334,91 +334,39 @@ func isQuitting(endCh chan struct{}) bool { } func (s *posix) waitForLowActiveIO() error { - t := time.NewTicker(lowActiveIOWaitTick) - defer t.Stop() - for { - if atomic.LoadInt32(&s.activeIOCount) >= s.maxActiveIOCount { - select { - case <-GlobalServiceDoneCh: - return errors.New("forced exit") - case <-t.C: - continue - } + for atomic.LoadInt32(&s.activeIOCount) >= s.maxActiveIOCount { + select { + case <-GlobalServiceDoneCh: + return errors.New("forced exit") + case <-time.NewTimer(lowActiveIOWaitTick).C: + continue } - break } return nil } func (s *posix) CrawlAndGetDataUsage(endCh <-chan struct{}) (DataUsageInfo, error) { - - var dataUsageInfoMu sync.Mutex - var dataUsageInfo = DataUsageInfo{ - BucketsSizes: make(map[string]uint64), - ObjectsSizesHistogram: make(map[string]uint64), - } - - walkFn := func(origPath string, typ os.FileMode) error { - - select { - case <-GlobalServiceDoneCh: - return filepath.SkipDir - default: + dataUsageInfo := updateUsage(s.diskPath, endCh, s.waitForLowActiveIO, func(item Item) (int64, error) { + // Look for `xl.json' at the leaf. + if !strings.HasSuffix(item.Path, SlashSeparator+xlMetaJSONFile) { + // if no xl.json found, skip the file. + return 0, errSkipFile } - if err := s.waitForLowActiveIO(); err != nil { - return filepath.SkipDir - } - - path := strings.TrimPrefix(origPath, s.diskPath) - path = strings.TrimPrefix(path, SlashSeparator) - - splits := splitN(path, SlashSeparator, 2) - - bucket := splits[0] - prefix := splits[1] - - if bucket == "" { - return nil - } - - if isReservedOrInvalidBucket(bucket, false) { - return nil - } - - if prefix == "" { - dataUsageInfoMu.Lock() - dataUsageInfo.BucketsCount++ - dataUsageInfo.BucketsSizes[bucket] = 0 - dataUsageInfoMu.Unlock() - return nil + xlMetaBuf, err := ioutil.ReadFile(item.Path) + if err != nil { + return 0, errSkipFile } - if strings.HasSuffix(prefix, SlashSeparator+xlMetaJSONFile) { - xlMetaBuf, err := ioutil.ReadFile(origPath) - if err != nil { - return nil - } - meta, err := xlMetaV1UnmarshalJSON(context.Background(), xlMetaBuf) - if err != nil { - return nil - } - - dataUsageInfoMu.Lock() - dataUsageInfo.ObjectsCount++ - dataUsageInfo.ObjectsTotalSize += uint64(meta.Stat.Size) - dataUsageInfo.BucketsSizes[bucket] += uint64(meta.Stat.Size) - dataUsageInfo.ObjectsSizesHistogram[objSizeToHistoInterval(uint64(meta.Stat.Size))]++ - dataUsageInfoMu.Unlock() + meta, err := xlMetaV1UnmarshalJSON(context.Background(), xlMetaBuf) + if err != nil { + return 0, errSkipFile } - return nil - } - - fastWalk(s.diskPath, walkFn) + return meta.Stat.Size, nil + }) dataUsageInfo.LastUpdate = UTCNow() - atomic.StoreUint64(&s.totalUsed, dataUsageInfo.ObjectsTotalSize) return dataUsageInfo, nil } diff --git a/cmd/utils.go b/cmd/utils.go index cec439ebe..59fdbb627 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -67,28 +67,24 @@ func request2BucketObjectName(r *http.Request) (bucketName, objectName string) { if err != nil { logger.CriticalIf(context.Background(), err) } - return urlPath2BucketObjectName(path) -} - -// Convert url path into bucket and object name. -func urlPath2BucketObjectName(path string) (bucketName, objectName string) { - if path == "" || path == SlashSeparator { - return "", "" - } - // Trim any preceding slash separator. - urlPath := strings.TrimPrefix(path, SlashSeparator) + return path2BucketObject(path) +} - // Split urlpath using slash separator into a given number of - // expected tokens. - tokens := strings.SplitN(urlPath, SlashSeparator, 2) - bucketName = tokens[0] - if len(tokens) == 2 { - objectName = tokens[1] +// path2BucketObjectWithBasePath returns bucket and prefix, if any, +// of a 'path'. basePath is trimmed from the front of the 'path'. +func path2BucketObjectWithBasePath(basePath, path string) (bucket, prefix string) { + path = strings.TrimPrefix(path, basePath) + path = strings.TrimPrefix(path, SlashSeparator) + m := strings.Index(path, SlashSeparator) + if m < 0 { + return path, "" } + return path[:m], path[m+len(SlashSeparator):] +} - // Success. - return bucketName, objectName +func path2BucketObject(s string) (bucket, prefix string) { + return path2BucketObjectWithBasePath("", s) } // URI scheme constants. @@ -553,16 +549,6 @@ func getMinioMode() string { return mode } -func splitN(str, delim string, num int) []string { - stdSplit := strings.SplitN(str, delim, num) - retSplit := make([]string, num) - for i := 0; i < len(stdSplit); i++ { - retSplit[i] = stdSplit[i] - } - - return retSplit -} - func iamPolicyClaimName() string { return globalOpenIDConfig.ClaimPrefix + globalOpenIDConfig.ClaimName } diff --git a/cmd/utils_test.go b/cmd/utils_test.go index f3cbdb1d1..39ed623d7 100644 --- a/cmd/utils_test.go +++ b/cmd/utils_test.go @@ -107,85 +107,74 @@ func TestMaxPartID(t *testing.T) { } } -// Tests extracting bucket and objectname from various types of URL paths. -func TestURL2BucketObjectName(t *testing.T) { +// Tests extracting bucket and objectname from various types of paths. +func TestPath2BucketObjectName(t *testing.T) { testCases := []struct { - u *url.URL + path string bucket, object string }{ // Test case 1 normal case. { - u: &url.URL{ - Path: "/bucket/object", - }, + path: "/bucket/object", bucket: "bucket", object: "object", }, // Test case 2 where url only has separator. { - u: &url.URL{ - Path: SlashSeparator, - }, + path: SlashSeparator, bucket: "", object: "", }, // Test case 3 only bucket is present. { - u: &url.URL{ - Path: "/bucket", - }, + path: "/bucket", bucket: "bucket", object: "", }, // Test case 4 many separators and object is a directory. { - u: &url.URL{ - Path: "/bucket/object/1/", - }, + path: "/bucket/object/1/", bucket: "bucket", object: "object/1/", }, // Test case 5 object has many trailing separators. { - u: &url.URL{ - Path: "/bucket/object/1///", - }, + path: "/bucket/object/1///", bucket: "bucket", object: "object/1///", }, // Test case 6 object has only trailing separators. { - u: &url.URL{ - Path: "/bucket/object///////", - }, + path: "/bucket/object///////", bucket: "bucket", object: "object///////", }, // Test case 7 object has preceding separators. { - u: &url.URL{ - Path: "/bucket////object////", - }, + path: "/bucket////object////", bucket: "bucket", object: "///object////", }, - // Test case 9 url path is empty. + // Test case 8 url path is empty. { - u: &url.URL{}, + path: "", bucket: "", object: "", }, } // Validate all test cases. - for i, testCase := range testCases { - bucketName, objectName := urlPath2BucketObjectName(testCase.u.Path) - if bucketName != testCase.bucket { - t.Errorf("Test %d: failed expected bucket name \"%s\", got \"%s\"", i+1, testCase.bucket, bucketName) - } - if objectName != testCase.object { - t.Errorf("Test %d: failed expected bucket name \"%s\", got \"%s\"", i+1, testCase.object, objectName) - } + for _, testCase := range testCases { + testCase := testCase + t.Run("", func(t *testing.T) { + bucketName, objectName := path2BucketObject(testCase.path) + if bucketName != testCase.bucket { + t.Errorf("failed expected bucket name \"%s\", got \"%s\"", testCase.bucket, bucketName) + } + if objectName != testCase.object { + t.Errorf("failed expected bucket name \"%s\", got \"%s\"", testCase.object, objectName) + } + }) } } diff --git a/cmd/web-handlers.go b/cmd/web-handlers.go index 506ac0df0..bb6f33505 100644 --- a/cmd/web-handlers.go +++ b/cmd/web-handlers.go @@ -1723,7 +1723,7 @@ func (web *webAPIHandlers) ListAllBucketPolicies(r *http.Request, args *ListAllB reply.UIVersion = browser.UIVersion for prefix, policy := range miniogopolicy.GetPolicies(policyInfo.Statements, args.BucketName, "") { - bucketName, objectPrefix := urlPath2BucketObjectName(prefix) + bucketName, objectPrefix := path2BucketObject(prefix) objectPrefix = strings.TrimSuffix(objectPrefix, "*") reply.Policies = append(reply.Policies, BucketAccessPolicy{ Bucket: bucketName, diff --git a/cmd/xl-sets.go b/cmd/xl-sets.go index 90c0ded45..ae8cafda9 100644 --- a/cmd/xl-sets.go +++ b/cmd/xl-sets.go @@ -435,6 +435,10 @@ func (s *xlSets) StorageInfo(ctx context.Context) StorageInfo { return storageInfo } +func (s *xlSets) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { + return DataUsageInfo{} +} + // Shutdown shutsdown all erasure coded sets in parallel // returns error upon first error. func (s *xlSets) Shutdown(ctx context.Context) error { diff --git a/cmd/xl-v1.go b/cmd/xl-v1.go index 7b3e89bf2..972b08997 100644 --- a/cmd/xl-v1.go +++ b/cmd/xl-v1.go @@ -192,15 +192,14 @@ func (xl xlObjects) StorageInfo(ctx context.Context) StorageInfo { return getStorageInfo(xl.getDisks()) } -// GetMetrics - no op +// GetMetrics - is not implemented and shouldn't be called. func (xl xlObjects) GetMetrics(ctx context.Context) (*Metrics, error) { logger.LogIf(ctx, NotImplemented{}) return &Metrics{}, NotImplemented{} } -// crawlAndGetDataUsage picks three random disks to crawl and get data usage -func (xl xlObjects) crawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { - +// CrawlAndGetDataUsage picks three random disks to crawl and get data usage +func (xl xlObjects) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { var randomDisks []StorageAPI for _, d := range xl.getLoadBalancedDisks() { if d == nil || !d.IsOnline() { diff --git a/cmd/xl-zones.go b/cmd/xl-zones.go index db913b936..8df299210 100644 --- a/cmd/xl-zones.go +++ b/cmd/xl-zones.go @@ -212,7 +212,7 @@ func (z *xlZones) StorageInfo(ctx context.Context) StorageInfo { return storageInfo } -func (z *xlZones) crawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { +func (z *xlZones) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { var aggDataUsageInfo = struct { sync.Mutex DataUsageInfo @@ -227,7 +227,7 @@ func (z *xlZones) crawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{ wg.Add(1) go func(xl *xlObjects) { defer wg.Done() - info := xl.crawlAndGetDataUsage(ctx, endCh) + info := xl.CrawlAndGetDataUsage(ctx, endCh) aggDataUsageInfo.Lock() aggDataUsageInfo.ObjectsCount += info.ObjectsCount