From b3c56b53fb98db364d05c21020652f27c6e9f825 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Thu, 11 Feb 2021 10:22:03 -0800 Subject: [PATCH] fix: metacache should only rename entries during cleanup (#11503) To avoid large delays in metacache cleanup, use rename instead of recursive delete calls, renames are cheaper move the content to minioMetaTmpBucket and then cleanup this folder once in 24hrs instead. If the new cache can replace an existing one, we should let it replace since that is currently being saved anyways, this avoids pile up of 1000's of metacache entires for same listing calls that are not necessary to be stored on disk. --- cmd/bucket-listobjects-handlers.go | 14 --------- cmd/erasure-multipart.go | 48 ++++++++++++++++++++++++++---- cmd/erasure-server-pool.go | 26 ++++++++-------- cmd/metacache-bucket.go | 11 ++++--- cmd/metacache-entries.go | 19 ++++++++---- cmd/metacache-manager.go | 5 ++-- cmd/metacache-server-pool.go | 20 +++++++++++++ cmd/metacache.go | 9 +++--- cmd/prepare-storage.go | 12 ++------ cmd/storage-datatypes.go | 14 ++++----- 10 files changed, 110 insertions(+), 68 deletions(-) diff --git a/cmd/bucket-listobjects-handlers.go b/cmd/bucket-listobjects-handlers.go index f6be47b8a..80cd57d1b 100644 --- a/cmd/bucket-listobjects-handlers.go +++ b/cmd/bucket-listobjects-handlers.go @@ -26,7 +26,6 @@ import ( "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/bucket/policy" - "github.com/minio/minio/pkg/handlers" "github.com/minio/minio/pkg/sync/errgroup" ) @@ -295,10 +294,6 @@ func proxyRequestByNodeIndex(ctx context.Context, w http.ResponseWriter, r *http return proxyRequest(ctx, w, r, ep) } -func proxyRequestByStringHash(ctx context.Context, w http.ResponseWriter, r *http.Request, str string) (success bool) { - return proxyRequestByNodeIndex(ctx, w, r, crcHashMod(str, len(globalProxyEndpoints))) -} - // ListObjectsV1Handler - GET Bucket (List Objects) Version 1. // -------------------------- // This implementation of the GET operation returns some or all (up to 10000) @@ -337,15 +332,6 @@ func (api objectAPIHandlers) ListObjectsV1Handler(w http.ResponseWriter, r *http return } - // Forward the request using Source IP or bucket - forwardStr := handlers.GetSourceIPFromHeaders(r) - if forwardStr == "" { - forwardStr = bucket - } - if proxyRequestByStringHash(ctx, w, r, forwardStr) { - return - } - listObjects := objectAPI.ListObjects // Inititate a list objects operation based on the input params. diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index 9856f8e2b..c260e0633 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -24,6 +24,7 @@ import ( "sort" "strconv" "strings" + "sync" "time" "github.com/minio/minio-go/v7/pkg/set" @@ -91,12 +92,47 @@ func (er erasureObjects) removeObjectPart(bucket, object, uploadID, dataDir stri // Clean-up the old multipart uploads. Should be run in a Go routine. func (er erasureObjects) cleanupStaleUploads(ctx context.Context, expiry time.Duration) { // run multiple cleanup's local to this server. + var wg sync.WaitGroup for _, disk := range er.getLoadBalancedLocalDisks() { if disk != nil { - er.cleanupStaleUploadsOnDisk(ctx, disk, expiry) - return + wg.Add(1) + go func(disk StorageAPI) { + defer wg.Done() + er.cleanupStaleUploadsOnDisk(ctx, disk, expiry) + }(disk) } } + wg.Wait() +} + +func (er erasureObjects) renameAll(ctx context.Context, bucket, prefix string) { + var wg sync.WaitGroup + for _, disk := range er.getDisks() { + if disk == nil { + continue + } + wg.Add(1) + go func(disk StorageAPI) { + defer wg.Done() + disk.RenameFile(ctx, bucket, prefix, minioMetaTmpBucket, mustGetUUID()) + }(disk) + } + wg.Wait() +} + +func (er erasureObjects) deleteAll(ctx context.Context, bucket, prefix string) { + var wg sync.WaitGroup + for _, disk := range er.getDisks() { + if disk == nil { + continue + } + wg.Add(1) + go func(disk StorageAPI) { + defer wg.Done() + disk.Delete(ctx, bucket, prefix, true) + }(disk) + } + wg.Wait() } // Remove the old multipart uploads on the given disk. @@ -118,7 +154,7 @@ func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk Sto continue } if now.Sub(fi.ModTime) > expiry { - er.deleteObject(ctx, minioMetaMultipartBucket, uploadIDPath, fi.Erasure.DataBlocks+1) + er.renameAll(ctx, minioMetaMultipartBucket, uploadIDPath) } } } @@ -127,12 +163,12 @@ func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk Sto return } for _, tmpDir := range tmpDirs { - fi, err := disk.ReadVersion(ctx, minioMetaTmpBucket, tmpDir, "", false) + vi, err := disk.StatVol(ctx, pathJoin(minioMetaTmpBucket, tmpDir)) if err != nil { continue } - if now.Sub(fi.ModTime) > expiry { - er.deleteObject(ctx, minioMetaTmpBucket, tmpDir, fi.Erasure.DataBlocks+1) + if now.Sub(vi.Created) > expiry { + er.deleteAll(ctx, minioMetaTmpBucket, tmpDir) } } } diff --git a/cmd/erasure-server-pool.go b/cmd/erasure-server-pool.go index 5deb6450a..13eab0336 100644 --- a/cmd/erasure-server-pool.go +++ b/cmd/erasure-server-pool.go @@ -1121,22 +1121,24 @@ func (z *erasureServerPools) DeleteBucket(ctx context.Context, bucket string, fo // data is not distributed across sets. // Errors are logged but individual disk failures are not returned. func (z *erasureServerPools) deleteAll(ctx context.Context, bucket, prefix string) { - var wg sync.WaitGroup for _, servers := range z.serverPools { for _, set := range servers.sets { - for _, disk := range set.getDisks() { - if disk == nil { - continue - } - wg.Add(1) - go func(disk StorageAPI) { - defer wg.Done() - disk.Delete(ctx, bucket, prefix, true) - }(disk) - } + set.deleteAll(ctx, bucket, prefix) + } + } +} + +// renameAll will rename bucket+prefix unconditionally across all disks to +// minioMetaTmpBucket + unique uuid, +// Note that set distribution is ignored so it should only be used in cases where +// data is not distributed across sets. Errors are logged but individual +// disk failures are not returned. +func (z *erasureServerPools) renameAll(ctx context.Context, bucket, prefix string) { + for _, servers := range z.serverPools { + for _, set := range servers.sets { + set.renameAll(ctx, bucket, prefix) } } - wg.Wait() } // This function is used to undo a successful DeleteBucket operation. diff --git a/cmd/metacache-bucket.go b/cmd/metacache-bucket.go index 779c74ac3..77fc55b42 100644 --- a/cmd/metacache-bucket.go +++ b/cmd/metacache-bucket.go @@ -64,7 +64,7 @@ func newBucketMetacache(bucket string, cleanup bool) *bucketMetacache { ez, ok := objAPI.(*erasureServerPools) if ok { ctx := context.Background() - ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(bucket, slashSeparator)) + ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(bucket, slashSeparator)) } } return &bucketMetacache{ @@ -292,7 +292,7 @@ func (b *bucketMetacache) cleanup() { caches, rootIdx := b.cloneCaches() for id, cache := range caches { - if b.transient && time.Since(cache.lastUpdate) > 15*time.Minute && time.Since(cache.lastHandout) > 15*time.Minute { + if b.transient && time.Since(cache.lastUpdate) > 10*time.Minute && time.Since(cache.lastHandout) > 10*time.Minute { // Keep transient caches only for 15 minutes. remove[id] = struct{}{} continue @@ -361,7 +361,7 @@ func (b *bucketMetacache) cleanup() { }) // Keep first metacacheMaxEntries... for _, cache := range remainCaches[metacacheMaxEntries:] { - if time.Since(cache.lastHandout) > time.Hour { + if time.Since(cache.lastHandout) > 30*time.Minute { remove[cache.id] = struct{}{} } } @@ -409,7 +409,6 @@ func (b *bucketMetacache) updateCacheEntry(update metacache) (metacache, error) defer b.mu.Unlock() existing, ok := b.caches[update.id] if !ok { - logger.Info("updateCacheEntry: bucket %s list id %v not found", b.bucket, update.id) return update, errFileNotFound } existing.update(update) @@ -465,7 +464,7 @@ func (b *bucketMetacache) deleteAll() { b.updated = true if !b.transient { // Delete all. - ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(b.bucket, slashSeparator)) + ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(b.bucket, slashSeparator)) b.caches = make(map[string]metacache, 10) b.cachesRoot = make(map[string][]string, 10) return @@ -477,7 +476,7 @@ func (b *bucketMetacache) deleteAll() { wg.Add(1) go func(cache metacache) { defer wg.Done() - ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(cache.bucket, cache.id)) + ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(cache.bucket, cache.id)) }(b.caches[id]) } wg.Wait() diff --git a/cmd/metacache-entries.go b/cmd/metacache-entries.go index a8dccea4b..c1b525ec5 100644 --- a/cmd/metacache-entries.go +++ b/cmd/metacache-entries.go @@ -330,16 +330,23 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft } fiv, err := entry.fileInfoVersions(bucket) + if err != nil { + continue + } + + fiVersions := fiv.Versions if afterV != "" { - // Forward first entry to specified version - fiv.forwardPastVersion(afterV) + vidMarkerIdx := fiv.findVersionIndex(afterV) + if vidMarkerIdx >= 0 { + fiVersions = fiVersions[vidMarkerIdx+1:] + } afterV = "" } - if err == nil { - for _, version := range fiv.Versions { - versions = append(versions, version.ToObjectInfo(bucket, entry.name)) - } + + for _, version := range fiVersions { + versions = append(versions, version.ToObjectInfo(bucket, entry.name)) } + continue } diff --git a/cmd/metacache-manager.go b/cmd/metacache-manager.go index 5e48b3972..480b06570 100644 --- a/cmd/metacache-manager.go +++ b/cmd/metacache-manager.go @@ -92,7 +92,6 @@ func (m *metacacheManager) initManager() { } m.mu.Unlock() } - m.getTransient().deleteAll() }() } @@ -124,11 +123,11 @@ func (m *metacacheManager) updateCacheEntry(update metacache) (metacache, error) } b, ok := m.buckets[update.bucket] + m.mu.RUnlock() if ok { - m.mu.RUnlock() return b.updateCacheEntry(update) } - m.mu.RUnlock() + // We should have either a trashed bucket or this return metacache{}, errVolumeNotFound } diff --git a/cmd/metacache-server-pool.go b/cmd/metacache-server-pool.go index 4193176fd..a29330e2b 100644 --- a/cmd/metacache-server-pool.go +++ b/cmd/metacache-server-pool.go @@ -19,7 +19,9 @@ package cmd import ( "context" "errors" + "fmt" "io" + "os" "path" "strings" "sync" @@ -28,6 +30,24 @@ import ( "github.com/minio/minio/cmd/logger" ) +func renameAllBucketMetacache(epPath string) error { + // Rename all previous `.minio.sys/buckets//.metacache` to + // to `.minio.sys/tmp/` for deletion. + return readDirFilterFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error { + if typ == os.ModeDir { + tmpMetacacheOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID()) + if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)), + tmpMetacacheOld); err != nil && err != errFileNotFound { + return fmt.Errorf("unable to rename (%s -> %s) %w", + pathJoin(epPath, minioMetaBucket+metacachePrefixForID(minioMetaBucket, slashSeparator)), + tmpMetacacheOld, + osErrToFileErr(err)) + } + } + return nil + }) +} + // listPath will return the requested entries. // If no more entries are in the listing io.EOF is returned, // otherwise nil or an unexpected error is returned. diff --git a/cmd/metacache.go b/cmd/metacache.go index c33b1862a..71c4d4413 100644 --- a/cmd/metacache.go +++ b/cmd/metacache.go @@ -123,7 +123,7 @@ func (m *metacache) matches(o *listPathOptions, extend time.Duration) bool { } if time.Since(m.lastUpdate) > metacacheMaxRunningAge+extend { // Cache ended within bloom cycle, but we can extend the life. - o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, extend+metacacheMaxRunningAge) + o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, metacacheMaxRunningAge+extend) return false } } @@ -151,8 +151,8 @@ func (m *metacache) worthKeeping(currentCycle uint64) bool { // Cycle is too old to be valuable. return false case cache.status == scanStateError || cache.status == scanStateNone: - // Remove failed listings after 10 minutes. - return time.Since(cache.lastUpdate) < 10*time.Minute + // Remove failed listings after 5 minutes. + return time.Since(cache.lastUpdate) < 5*time.Minute } return true } @@ -170,8 +170,9 @@ func (m *metacache) canBeReplacedBy(other *metacache) bool { if m.status == scanStateStarted && time.Since(m.lastUpdate) < metacacheMaxRunningAge { return false } + // Keep it around a bit longer. - if time.Since(m.lastHandout) < time.Hour || time.Since(m.lastUpdate) < metacacheMaxRunningAge { + if time.Since(m.lastHandout) < 30*time.Minute || time.Since(m.lastUpdate) < metacacheMaxRunningAge { return false } diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index c93e05555..abdf027cc 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -125,16 +125,8 @@ func formatErasureCleanupTmpLocalEndpoints(endpoints Endpoints) error { osErrToFileErr(err)) } - // Move .minio.sys/buckets/.minio.sys/metacache transient list cache - // folder to speed up startup routines. - tmpMetacacheOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID()) - if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(minioMetaBucket, "")), - tmpMetacacheOld); err != nil && err != errFileNotFound { - return fmt.Errorf("unable to rename (%s -> %s) %w", - pathJoin(epPath, minioMetaBucket+metacachePrefixForID(minioMetaBucket, "")), - tmpMetacacheOld, - osErrToFileErr(err)) - } + // Renames and schedules for puring all bucket metacache. + renameAllBucketMetacache(epPath) // Removal of tmp-old folder is backgrounded completely. go removeAll(pathJoin(epPath, minioMetaTmpBucket+"-old")) diff --git a/cmd/storage-datatypes.go b/cmd/storage-datatypes.go index 6eda4710e..26ac7afaf 100644 --- a/cmd/storage-datatypes.go +++ b/cmd/storage-datatypes.go @@ -85,18 +85,18 @@ type FileInfoVersions struct { Versions []FileInfo } -// forwardPastVersion will truncate the result to only contain versions after 'v'. -// If v is empty or the version isn't found no changes will be made. -func (f *FileInfoVersions) forwardPastVersion(v string) { - if v == "" { - return +// findVersionIndex will return the version index where the version +// was found. Returns -1 if not found. +func (f *FileInfoVersions) findVersionIndex(v string) int { + if f == nil || v == "" { + return -1 } for i, ver := range f.Versions { if ver.VersionID == v { - f.Versions = f.Versions[i+1:] - return + return i } } + return -1 } // FileInfo - represents file stat information.