fix: metacache should only rename entries during cleanup (#11503)

To avoid large delays in metacache cleanup, use rename
instead of recursive delete calls, renames are cheaper
move the content to minioMetaTmpBucket and then cleanup
this folder once in 24hrs instead.

If the new cache can replace an existing one, we should
let it replace since that is currently being saved anyways,
this avoids pile up of 1000's of metacache entires for
same listing calls that are not necessary to be stored
on disk.
master
Harshavardhana 4 years ago committed by GitHub
parent 0ef3e359d8
commit b3c56b53fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 14
      cmd/bucket-listobjects-handlers.go
  2. 46
      cmd/erasure-multipart.go
  3. 22
      cmd/erasure-server-pool.go
  4. 11
      cmd/metacache-bucket.go
  5. 17
      cmd/metacache-entries.go
  6. 5
      cmd/metacache-manager.go
  7. 20
      cmd/metacache-server-pool.go
  8. 9
      cmd/metacache.go
  9. 12
      cmd/prepare-storage.go
  10. 14
      cmd/storage-datatypes.go

@ -26,7 +26,6 @@ import (
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/bucket/policy" "github.com/minio/minio/pkg/bucket/policy"
"github.com/minio/minio/pkg/handlers"
"github.com/minio/minio/pkg/sync/errgroup" "github.com/minio/minio/pkg/sync/errgroup"
) )
@ -295,10 +294,6 @@ func proxyRequestByNodeIndex(ctx context.Context, w http.ResponseWriter, r *http
return proxyRequest(ctx, w, r, ep) return proxyRequest(ctx, w, r, ep)
} }
func proxyRequestByStringHash(ctx context.Context, w http.ResponseWriter, r *http.Request, str string) (success bool) {
return proxyRequestByNodeIndex(ctx, w, r, crcHashMod(str, len(globalProxyEndpoints)))
}
// ListObjectsV1Handler - GET Bucket (List Objects) Version 1. // ListObjectsV1Handler - GET Bucket (List Objects) Version 1.
// -------------------------- // --------------------------
// This implementation of the GET operation returns some or all (up to 10000) // This implementation of the GET operation returns some or all (up to 10000)
@ -337,15 +332,6 @@ func (api objectAPIHandlers) ListObjectsV1Handler(w http.ResponseWriter, r *http
return return
} }
// Forward the request using Source IP or bucket
forwardStr := handlers.GetSourceIPFromHeaders(r)
if forwardStr == "" {
forwardStr = bucket
}
if proxyRequestByStringHash(ctx, w, r, forwardStr) {
return
}
listObjects := objectAPI.ListObjects listObjects := objectAPI.ListObjects
// Inititate a list objects operation based on the input params. // Inititate a list objects operation based on the input params.

@ -24,6 +24,7 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/minio/minio-go/v7/pkg/set" "github.com/minio/minio-go/v7/pkg/set"
@ -91,12 +92,47 @@ func (er erasureObjects) removeObjectPart(bucket, object, uploadID, dataDir stri
// Clean-up the old multipart uploads. Should be run in a Go routine. // Clean-up the old multipart uploads. Should be run in a Go routine.
func (er erasureObjects) cleanupStaleUploads(ctx context.Context, expiry time.Duration) { func (er erasureObjects) cleanupStaleUploads(ctx context.Context, expiry time.Duration) {
// run multiple cleanup's local to this server. // run multiple cleanup's local to this server.
var wg sync.WaitGroup
for _, disk := range er.getLoadBalancedLocalDisks() { for _, disk := range er.getLoadBalancedLocalDisks() {
if disk != nil { if disk != nil {
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
er.cleanupStaleUploadsOnDisk(ctx, disk, expiry) er.cleanupStaleUploadsOnDisk(ctx, disk, expiry)
return }(disk)
}
}
wg.Wait()
}
func (er erasureObjects) renameAll(ctx context.Context, bucket, prefix string) {
var wg sync.WaitGroup
for _, disk := range er.getDisks() {
if disk == nil {
continue
} }
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
disk.RenameFile(ctx, bucket, prefix, minioMetaTmpBucket, mustGetUUID())
}(disk)
} }
wg.Wait()
}
func (er erasureObjects) deleteAll(ctx context.Context, bucket, prefix string) {
var wg sync.WaitGroup
for _, disk := range er.getDisks() {
if disk == nil {
continue
}
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
disk.Delete(ctx, bucket, prefix, true)
}(disk)
}
wg.Wait()
} }
// Remove the old multipart uploads on the given disk. // Remove the old multipart uploads on the given disk.
@ -118,7 +154,7 @@ func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk Sto
continue continue
} }
if now.Sub(fi.ModTime) > expiry { if now.Sub(fi.ModTime) > expiry {
er.deleteObject(ctx, minioMetaMultipartBucket, uploadIDPath, fi.Erasure.DataBlocks+1) er.renameAll(ctx, minioMetaMultipartBucket, uploadIDPath)
} }
} }
} }
@ -127,12 +163,12 @@ func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk Sto
return return
} }
for _, tmpDir := range tmpDirs { for _, tmpDir := range tmpDirs {
fi, err := disk.ReadVersion(ctx, minioMetaTmpBucket, tmpDir, "", false) vi, err := disk.StatVol(ctx, pathJoin(minioMetaTmpBucket, tmpDir))
if err != nil { if err != nil {
continue continue
} }
if now.Sub(fi.ModTime) > expiry { if now.Sub(vi.Created) > expiry {
er.deleteObject(ctx, minioMetaTmpBucket, tmpDir, fi.Erasure.DataBlocks+1) er.deleteAll(ctx, minioMetaTmpBucket, tmpDir)
} }
} }
} }

@ -1121,22 +1121,24 @@ func (z *erasureServerPools) DeleteBucket(ctx context.Context, bucket string, fo
// data is not distributed across sets. // data is not distributed across sets.
// Errors are logged but individual disk failures are not returned. // Errors are logged but individual disk failures are not returned.
func (z *erasureServerPools) deleteAll(ctx context.Context, bucket, prefix string) { func (z *erasureServerPools) deleteAll(ctx context.Context, bucket, prefix string) {
var wg sync.WaitGroup
for _, servers := range z.serverPools { for _, servers := range z.serverPools {
for _, set := range servers.sets { for _, set := range servers.sets {
for _, disk := range set.getDisks() { set.deleteAll(ctx, bucket, prefix)
if disk == nil {
continue
} }
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
disk.Delete(ctx, bucket, prefix, true)
}(disk)
} }
}
// renameAll will rename bucket+prefix unconditionally across all disks to
// minioMetaTmpBucket + unique uuid,
// Note that set distribution is ignored so it should only be used in cases where
// data is not distributed across sets. Errors are logged but individual
// disk failures are not returned.
func (z *erasureServerPools) renameAll(ctx context.Context, bucket, prefix string) {
for _, servers := range z.serverPools {
for _, set := range servers.sets {
set.renameAll(ctx, bucket, prefix)
} }
} }
wg.Wait()
} }
// This function is used to undo a successful DeleteBucket operation. // This function is used to undo a successful DeleteBucket operation.

@ -64,7 +64,7 @@ func newBucketMetacache(bucket string, cleanup bool) *bucketMetacache {
ez, ok := objAPI.(*erasureServerPools) ez, ok := objAPI.(*erasureServerPools)
if ok { if ok {
ctx := context.Background() ctx := context.Background()
ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(bucket, slashSeparator)) ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(bucket, slashSeparator))
} }
} }
return &bucketMetacache{ return &bucketMetacache{
@ -292,7 +292,7 @@ func (b *bucketMetacache) cleanup() {
caches, rootIdx := b.cloneCaches() caches, rootIdx := b.cloneCaches()
for id, cache := range caches { for id, cache := range caches {
if b.transient && time.Since(cache.lastUpdate) > 15*time.Minute && time.Since(cache.lastHandout) > 15*time.Minute { if b.transient && time.Since(cache.lastUpdate) > 10*time.Minute && time.Since(cache.lastHandout) > 10*time.Minute {
// Keep transient caches only for 15 minutes. // Keep transient caches only for 15 minutes.
remove[id] = struct{}{} remove[id] = struct{}{}
continue continue
@ -361,7 +361,7 @@ func (b *bucketMetacache) cleanup() {
}) })
// Keep first metacacheMaxEntries... // Keep first metacacheMaxEntries...
for _, cache := range remainCaches[metacacheMaxEntries:] { for _, cache := range remainCaches[metacacheMaxEntries:] {
if time.Since(cache.lastHandout) > time.Hour { if time.Since(cache.lastHandout) > 30*time.Minute {
remove[cache.id] = struct{}{} remove[cache.id] = struct{}{}
} }
} }
@ -409,7 +409,6 @@ func (b *bucketMetacache) updateCacheEntry(update metacache) (metacache, error)
defer b.mu.Unlock() defer b.mu.Unlock()
existing, ok := b.caches[update.id] existing, ok := b.caches[update.id]
if !ok { if !ok {
logger.Info("updateCacheEntry: bucket %s list id %v not found", b.bucket, update.id)
return update, errFileNotFound return update, errFileNotFound
} }
existing.update(update) existing.update(update)
@ -465,7 +464,7 @@ func (b *bucketMetacache) deleteAll() {
b.updated = true b.updated = true
if !b.transient { if !b.transient {
// Delete all. // Delete all.
ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(b.bucket, slashSeparator)) ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(b.bucket, slashSeparator))
b.caches = make(map[string]metacache, 10) b.caches = make(map[string]metacache, 10)
b.cachesRoot = make(map[string][]string, 10) b.cachesRoot = make(map[string][]string, 10)
return return
@ -477,7 +476,7 @@ func (b *bucketMetacache) deleteAll() {
wg.Add(1) wg.Add(1)
go func(cache metacache) { go func(cache metacache) {
defer wg.Done() defer wg.Done()
ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(cache.bucket, cache.id)) ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(cache.bucket, cache.id))
}(b.caches[id]) }(b.caches[id])
} }
wg.Wait() wg.Wait()

@ -330,16 +330,23 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft
} }
fiv, err := entry.fileInfoVersions(bucket) fiv, err := entry.fileInfoVersions(bucket)
if err != nil {
continue
}
fiVersions := fiv.Versions
if afterV != "" { if afterV != "" {
// Forward first entry to specified version vidMarkerIdx := fiv.findVersionIndex(afterV)
fiv.forwardPastVersion(afterV) if vidMarkerIdx >= 0 {
fiVersions = fiVersions[vidMarkerIdx+1:]
}
afterV = "" afterV = ""
} }
if err == nil {
for _, version := range fiv.Versions { for _, version := range fiVersions {
versions = append(versions, version.ToObjectInfo(bucket, entry.name)) versions = append(versions, version.ToObjectInfo(bucket, entry.name))
} }
}
continue continue
} }

@ -92,7 +92,6 @@ func (m *metacacheManager) initManager() {
} }
m.mu.Unlock() m.mu.Unlock()
} }
m.getTransient().deleteAll()
}() }()
} }
@ -124,11 +123,11 @@ func (m *metacacheManager) updateCacheEntry(update metacache) (metacache, error)
} }
b, ok := m.buckets[update.bucket] b, ok := m.buckets[update.bucket]
if ok {
m.mu.RUnlock() m.mu.RUnlock()
if ok {
return b.updateCacheEntry(update) return b.updateCacheEntry(update)
} }
m.mu.RUnlock()
// We should have either a trashed bucket or this // We should have either a trashed bucket or this
return metacache{}, errVolumeNotFound return metacache{}, errVolumeNotFound
} }

@ -19,7 +19,9 @@ package cmd
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"io" "io"
"os"
"path" "path"
"strings" "strings"
"sync" "sync"
@ -28,6 +30,24 @@ import (
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
) )
func renameAllBucketMetacache(epPath string) error {
// Rename all previous `.minio.sys/buckets/<bucketname>/.metacache` to
// to `.minio.sys/tmp/` for deletion.
return readDirFilterFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error {
if typ == os.ModeDir {
tmpMetacacheOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID())
if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)),
tmpMetacacheOld); err != nil && err != errFileNotFound {
return fmt.Errorf("unable to rename (%s -> %s) %w",
pathJoin(epPath, minioMetaBucket+metacachePrefixForID(minioMetaBucket, slashSeparator)),
tmpMetacacheOld,
osErrToFileErr(err))
}
}
return nil
})
}
// listPath will return the requested entries. // listPath will return the requested entries.
// If no more entries are in the listing io.EOF is returned, // If no more entries are in the listing io.EOF is returned,
// otherwise nil or an unexpected error is returned. // otherwise nil or an unexpected error is returned.

@ -123,7 +123,7 @@ func (m *metacache) matches(o *listPathOptions, extend time.Duration) bool {
} }
if time.Since(m.lastUpdate) > metacacheMaxRunningAge+extend { if time.Since(m.lastUpdate) > metacacheMaxRunningAge+extend {
// Cache ended within bloom cycle, but we can extend the life. // Cache ended within bloom cycle, but we can extend the life.
o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, extend+metacacheMaxRunningAge) o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, metacacheMaxRunningAge+extend)
return false return false
} }
} }
@ -151,8 +151,8 @@ func (m *metacache) worthKeeping(currentCycle uint64) bool {
// Cycle is too old to be valuable. // Cycle is too old to be valuable.
return false return false
case cache.status == scanStateError || cache.status == scanStateNone: case cache.status == scanStateError || cache.status == scanStateNone:
// Remove failed listings after 10 minutes. // Remove failed listings after 5 minutes.
return time.Since(cache.lastUpdate) < 10*time.Minute return time.Since(cache.lastUpdate) < 5*time.Minute
} }
return true return true
} }
@ -170,8 +170,9 @@ func (m *metacache) canBeReplacedBy(other *metacache) bool {
if m.status == scanStateStarted && time.Since(m.lastUpdate) < metacacheMaxRunningAge { if m.status == scanStateStarted && time.Since(m.lastUpdate) < metacacheMaxRunningAge {
return false return false
} }
// Keep it around a bit longer. // Keep it around a bit longer.
if time.Since(m.lastHandout) < time.Hour || time.Since(m.lastUpdate) < metacacheMaxRunningAge { if time.Since(m.lastHandout) < 30*time.Minute || time.Since(m.lastUpdate) < metacacheMaxRunningAge {
return false return false
} }

@ -125,16 +125,8 @@ func formatErasureCleanupTmpLocalEndpoints(endpoints Endpoints) error {
osErrToFileErr(err)) osErrToFileErr(err))
} }
// Move .minio.sys/buckets/.minio.sys/metacache transient list cache // Renames and schedules for puring all bucket metacache.
// folder to speed up startup routines. renameAllBucketMetacache(epPath)
tmpMetacacheOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID())
if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(minioMetaBucket, "")),
tmpMetacacheOld); err != nil && err != errFileNotFound {
return fmt.Errorf("unable to rename (%s -> %s) %w",
pathJoin(epPath, minioMetaBucket+metacachePrefixForID(minioMetaBucket, "")),
tmpMetacacheOld,
osErrToFileErr(err))
}
// Removal of tmp-old folder is backgrounded completely. // Removal of tmp-old folder is backgrounded completely.
go removeAll(pathJoin(epPath, minioMetaTmpBucket+"-old")) go removeAll(pathJoin(epPath, minioMetaTmpBucket+"-old"))

@ -85,18 +85,18 @@ type FileInfoVersions struct {
Versions []FileInfo Versions []FileInfo
} }
// forwardPastVersion will truncate the result to only contain versions after 'v'. // findVersionIndex will return the version index where the version
// If v is empty or the version isn't found no changes will be made. // was found. Returns -1 if not found.
func (f *FileInfoVersions) forwardPastVersion(v string) { func (f *FileInfoVersions) findVersionIndex(v string) int {
if v == "" { if f == nil || v == "" {
return return -1
} }
for i, ver := range f.Versions { for i, ver := range f.Versions {
if ver.VersionID == v { if ver.VersionID == v {
f.Versions = f.Versions[i+1:] return i
return
} }
} }
return -1
} }
// FileInfo - represents file stat information. // FileInfo - represents file stat information.

Loading…
Cancel
Save