From 1cf322b7d466e7d8532db8f9fb05f7dbd2adc972 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Fri, 18 Sep 2020 11:15:54 -0700 Subject: [PATCH] change leader locker only for crawler (#10509) --- cmd/data-crawler.go | 18 +++++++++++++++++- cmd/global-heal.go | 6 ------ cmd/server-main.go | 18 +----------------- pkg/dsync/drwmutex.go | 4 ++++ 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/cmd/data-crawler.go b/cmd/data-crawler.go index 10a6eead3..09b1f52c3 100644 --- a/cmd/data-crawler.go +++ b/cmd/data-crawler.go @@ -50,10 +50,14 @@ const ( healFolderIncludeProb = 32 // Include a clean folder one in n cycles. healObjectSelectProb = 512 // Overall probability of a file being scanned; one in n. + // sleep for an hour after a lock timeout + // before retrying to acquire lock again. + dataCrawlerLeaderLockTimeoutSleepInterval = time.Hour ) var ( - globalCrawlerConfig crawler.Config + globalCrawlerConfig crawler.Config + dataCrawlerLeaderLockTimeout = newDynamicTimeout(1*time.Minute, 30*time.Second) ) // initDataCrawler will start the crawler unless disabled. @@ -67,6 +71,18 @@ func initDataCrawler(ctx context.Context, objAPI ObjectLayer) { // The function will block until the context is canceled. // There should only ever be one crawler running per cluster. func runDataCrawler(ctx context.Context, objAPI ObjectLayer) { + // Make sure only 1 crawler is running on the cluster. + locker := objAPI.NewNSLock(ctx, minioMetaBucket, "runDataCrawler.lock") + for { + err := locker.GetLock(dataCrawlerLeaderLockTimeout) + if err != nil { + time.Sleep(dataCrawlerLeaderLockTimeoutSleepInterval) + continue + } + break + // No unlock for "leader" lock. + } + // Load current bloom cycle nextBloomCycle := intDataUpdateTracker.current() + 1 var buf bytes.Buffer diff --git a/cmd/global-heal.go b/cmd/global-heal.go index 987ac5d68..20034117e 100644 --- a/cmd/global-heal.go +++ b/cmd/global-heal.go @@ -26,17 +26,11 @@ import ( const ( bgHealingUUID = "0000-0000-0000-0000" - // sleep for an hour after a lock timeout - // before retrying to acquire lock again. - leaderLockTimeoutSleepInterval = time.Hour ) -var leaderLockTimeout = newDynamicTimeout(1*time.Minute, 30*time.Second) - // NewBgHealSequence creates a background healing sequence // operation which crawls all objects and heal them. func newBgHealSequence() *healSequence { - reqInfo := &logger.ReqInfo{API: "BackgroundHeal"} ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo)) diff --git a/cmd/server-main.go b/cmd/server-main.go index 61d014ebf..f944f0778 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -366,22 +366,6 @@ func initAllSubsystems(ctx context.Context, newObject ObjectLayer) (err error) { return nil } -func startBackgroundOps(ctx context.Context, objAPI ObjectLayer) { - // Make sure only 1 crawler is running on the cluster. - locker := objAPI.NewNSLock(ctx, minioMetaBucket, "leader") - for { - err := locker.GetLock(leaderLockTimeout) - if err != nil { - time.Sleep(leaderLockTimeoutSleepInterval) - continue - } - break - // No unlock for "leader" lock. - } - - initDataCrawler(ctx, objAPI) -} - // serverMain handler called for 'minio server' command. func serverMain(ctx *cli.Context) { signal.Notify(globalOSSignalCh, os.Interrupt, syscall.SIGTERM) @@ -503,7 +487,7 @@ func serverMain(ctx *cli.Context) { globalObjectAPI = newObject globalObjLayerMutex.Unlock() - go startBackgroundOps(GlobalContext, newObject) + go initDataCrawler(GlobalContext, newObject) logger.FatalIf(initSafeMode(GlobalContext, newObject), "Unable to initialize server switching into safe-mode") diff --git a/pkg/dsync/drwmutex.go b/pkg/dsync/drwmutex.go index 0bbf008a0..580d66f30 100644 --- a/pkg/dsync/drwmutex.go +++ b/pkg/dsync/drwmutex.go @@ -154,7 +154,9 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadL } } + log("lockBlocking %s/%s for %#v: lockType readLock(%t), additional opts: %#v\n", id, source, dm.Names, isReadLock, opts) retryCtx, cancel := context.WithTimeout(ctx, opts.Timeout) + defer cancel() for { @@ -163,6 +165,8 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadL select { case <-retryCtx.Done(): + log("lockBlocking canceled %s/%s for %#v: lockType readLock(%t), additional opts: %#v\n", id, source, dm.Names, isReadLock, opts) + // Caller context canceled or we timedout, // return false anyways for both situations. return false