diff --git a/cmd/namespace-lock.go b/cmd/namespace-lock.go index 968014c98..df1772e58 100644 --- a/cmd/namespace-lock.go +++ b/cmd/namespace-lock.go @@ -28,6 +28,7 @@ import ( "fmt" "time" + "github.com/minio/minio/cmd/config/storageclass" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/dsync" "github.com/minio/minio/pkg/lsync" @@ -147,7 +148,18 @@ func (di *distLockInstance) GetLock(timeout *dynamicTimeout) (timedOutErr error) lockSource := getSource(2) start := UTCNow() - if !di.rwMutex.GetLock(di.ctx, di.opsID, lockSource, timeout.Timeout()) { + // Lockers default to standard storage class always, why because + // we always dictate storage tolerance in terms of standard + // storage class be it number of drives or a multiplicative + // of number of nodes, defaulting lockers to this value + // simply means that locking is always similar in behavior + // and effect with erasure coded drive tolerance. + tolerance := globalStorageClass.GetParityForSC(storageclass.STANDARD) + + if !di.rwMutex.GetLock(di.ctx, di.opsID, lockSource, dsync.Options{ + Timeout: timeout.Timeout(), + Tolerance: tolerance, + }) { timeout.LogFailure() return OperationTimedOut{} } @@ -164,7 +176,14 @@ func (di *distLockInstance) Unlock() { func (di *distLockInstance) GetRLock(timeout *dynamicTimeout) (timedOutErr error) { lockSource := getSource(2) start := UTCNow() - if !di.rwMutex.GetRLock(di.ctx, di.opsID, lockSource, timeout.Timeout()) { + + // Lockers default to standard storage class always. + tolerance := globalStorageClass.GetParityForSC(storageclass.STANDARD) + + if !di.rwMutex.GetRLock(di.ctx, di.opsID, lockSource, dsync.Options{ + Timeout: timeout.Timeout(), + Tolerance: tolerance, + }) { timeout.LogFailure() return OperationTimedOut{} } diff --git a/cmd/server-main.go b/cmd/server-main.go index 60078d382..431258782 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -229,6 +229,8 @@ func initSafeMode(ctx context.Context, newObject ObjectLayer) (err error) { initAutoHeal(ctx, newObject) } + timeout := newDynamicTimeout(3*time.Second, 3*time.Second) + // **** WARNING **** // Migrating to encrypted backend should happen before initialization of any // sub-systems, make sure that we do not move the above codeblock elsewhere. @@ -244,7 +246,7 @@ func initSafeMode(ctx context.Context, newObject ObjectLayer) (err error) { for range retry.NewTimer(retryCtx) { // let one of the server acquire the lock, if not let them timeout. // which shall be retried again by this loop. - if err = txnLk.GetLock(newDynamicTimeout(3*time.Second, 3*time.Second)); err != nil { + if err = txnLk.GetLock(timeout); err != nil { logger.Info("Waiting for all MinIO sub-systems to be initialized.. trying to acquire lock") continue } diff --git a/go.mod b/go.mod index 33dd4d184..5eb800cd7 100644 --- a/go.mod +++ b/go.mod @@ -83,6 +83,7 @@ require ( golang.org/x/crypto v0.0.0-20200709230013-948cd5f35899 golang.org/x/net v0.0.0-20200707034311-ab3426394381 golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae + golang.org/x/tools v0.0.0-20200814172026-c4923e618c08 // indirect google.golang.org/api v0.5.0 gopkg.in/jcmturner/gokrb5.v7 v7.3.0 gopkg.in/ldap.v3 v3.0.3 diff --git a/go.sum b/go.sum index 5b676d7b0..3905686f0 100644 --- a/go.sum +++ b/go.sum @@ -447,6 +447,7 @@ github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0 github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= go.etcd.io/etcd/v3 v3.3.0-rc.0.0.20200707003333-58bb8ae09f8e h1:HZQLoe71Q24wVyDrGBRcVuogx32U+cPlcm/WoSLUI6c= @@ -487,6 +488,8 @@ golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -500,6 +503,7 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191112182307-2180aed22343/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -512,6 +516,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208 h1:qwRHBd0NqMbJxfbotnDhm2ByMI1Shq4Y6oRJo21SGJA= +golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -560,6 +566,8 @@ golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200425043458-8463f397d07c h1:iHhCR0b26amDCiiO+kBguKZom9aMF+NrFxh9zeKR/XU= golang.org/x/tools v0.0.0-20200425043458-8463f397d07c/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200814172026-c4923e618c08 h1:sfBQLM20fzeXhOixVQirwEbuW4PGStP773EXQpsBB6E= +golang.org/x/tools v0.0.0-20200814172026-c4923e618c08/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= diff --git a/pkg/dsync/drwmutex.go b/pkg/dsync/drwmutex.go index 15bbec525..8ef1f1747 100644 --- a/pkg/dsync/drwmutex.go +++ b/pkg/dsync/drwmutex.go @@ -86,7 +86,15 @@ func NewDRWMutex(clnt *Dsync, names ...string) *DRWMutex { func (dm *DRWMutex) Lock(id, source string) { isReadLock := false - dm.lockBlocking(context.Background(), drwMutexInfinite, id, source, isReadLock) + dm.lockBlocking(context.Background(), id, source, isReadLock, Options{ + Timeout: drwMutexInfinite, + }) +} + +// Options lock options. +type Options struct { + Timeout time.Duration + Tolerance int } // GetLock tries to get a write lock on dm before the timeout elapses. @@ -94,10 +102,10 @@ func (dm *DRWMutex) Lock(id, source string) { // If the lock is already in use, the calling go routine // blocks until either the mutex becomes available and return success or // more time has passed than the timeout value and return false. -func (dm *DRWMutex) GetLock(ctx context.Context, id, source string, timeout time.Duration) (locked bool) { +func (dm *DRWMutex) GetLock(ctx context.Context, id, source string, opts Options) (locked bool) { isReadLock := false - return dm.lockBlocking(ctx, timeout, id, source, isReadLock) + return dm.lockBlocking(ctx, id, source, isReadLock, opts) } // RLock holds a read lock on dm. @@ -107,7 +115,9 @@ func (dm *DRWMutex) GetLock(ctx context.Context, id, source string, timeout time func (dm *DRWMutex) RLock(id, source string) { isReadLock := true - dm.lockBlocking(context.Background(), drwMutexInfinite, id, source, isReadLock) + dm.lockBlocking(context.Background(), id, source, isReadLock, Options{ + Timeout: drwMutexInfinite, + }) } // GetRLock tries to get a read lock on dm before the timeout elapses. @@ -116,10 +126,10 @@ func (dm *DRWMutex) RLock(id, source string) { // Otherwise the calling go routine blocks until either the mutex becomes // available and return success or more time has passed than the timeout // value and return false. -func (dm *DRWMutex) GetRLock(ctx context.Context, id, source string, timeout time.Duration) (locked bool) { +func (dm *DRWMutex) GetRLock(ctx context.Context, id, source string, opts Options) (locked bool) { isReadLock := true - return dm.lockBlocking(ctx, timeout, id, source, isReadLock) + return dm.lockBlocking(ctx, id, source, isReadLock, opts) } // lockBlocking will try to acquire either a read or a write lock @@ -127,10 +137,10 @@ func (dm *DRWMutex) GetRLock(ctx context.Context, id, source string, timeout tim // The function will loop using a built-in timing randomized back-off // algorithm until either the lock is acquired successfully or more // time has elapsed than the timeout value. -func (dm *DRWMutex) lockBlocking(ctx context.Context, timeout time.Duration, id, source string, isReadLock bool) (locked bool) { +func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadLock bool, opts Options) (locked bool) { restClnts := dm.clnt.GetLockersFn() - retryCtx, cancel := context.WithTimeout(ctx, timeout) + retryCtx, cancel := context.WithTimeout(ctx, opts.Timeout) defer cancel() @@ -140,7 +150,7 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, timeout time.Duration, id, locks := make([]string, len(restClnts)) // Try to acquire the lock. - locked = lock(retryCtx, dm.clnt, &locks, id, source, isReadLock, dm.Names...) + locked = lock(retryCtx, dm.clnt, &locks, id, source, isReadLock, opts.Tolerance, dm.Names...) if !locked { continue } @@ -167,10 +177,29 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, timeout time.Duration, id, } // lock tries to acquire the distributed lock, returning true or false. -func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNames ...string) bool { +func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, isReadLock bool, tolerance int, lockNames ...string) bool { restClnts := ds.GetLockersFn() + // Tolerance is not set, defaults to half of the locker clients. + if tolerance == 0 { + tolerance = len(restClnts) / 2 + } + + // Quorum is effectively = total clients subtracted with tolerance limit + quorum := len(restClnts) - tolerance + if !isReadLock { + // In situations for write locks, as a special case + // to avoid split brains we make sure to acquire + // quorum + 1 when tolerance is exactly half of the + // total locker clients. + if quorum == tolerance { + quorum++ + } + } + + tolerance = len(restClnts) - quorum + // Create buffered channel of size equal to total number of nodes. ch := make(chan Granted, len(restClnts)) defer close(ch) @@ -217,7 +246,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is }(index, isReadLock, c) } - quorum := false + quorumMet := false wg.Add(1) go func(isReadLock bool) { @@ -232,9 +261,6 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is done := false timeout := time.After(DRWMutexAcquireTimeout) - dquorumReads := (len(restClnts) + 1) / 2 - dquorum := dquorumReads + 1 - for ; i < len(restClnts); i++ { // Loop until we acquired all locks select { @@ -244,8 +270,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is (*locks)[grant.index] = grant.lockUID } else { locksFailed++ - if !isReadLock && locksFailed > len(restClnts)-dquorum || - isReadLock && locksFailed > len(restClnts)-dquorumReads { + if locksFailed > tolerance { // We know that we are not going to get the lock anymore, // so exit out and release any locks that did get acquired done = true @@ -258,7 +283,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is done = true // timeout happened, maybe one of the nodes is slow, count // number of locks to check whether we have quorum or not - if !quorumMet(locks, isReadLock, dquorum, dquorumReads) { + if !checkQuorumMet(locks, quorum) { log("Quorum not met after timeout\n") releaseAll(ds, locks, isReadLock, restClnts, lockNames...) } else { @@ -272,7 +297,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is } // Count locks in order to determine whether we have quorum or not - quorum = quorumMet(locks, isReadLock, dquorum, dquorumReads) + quorumMet = checkQuorumMet(locks, quorum) // Signal that we have the quorum wg.Done() @@ -292,12 +317,11 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is wg.Wait() - return quorum + return quorumMet } -// quorumMet determines whether we have acquired the required quorum of underlying locks or not -func quorumMet(locks *[]string, isReadLock bool, quorum, quorumReads int) bool { - +// checkQuorumMet determines whether we have acquired the required quorum of underlying locks or not +func checkQuorumMet(locks *[]string, quorum int) bool { count := 0 for _, uid := range *locks { if isLocked(uid) { @@ -305,14 +329,7 @@ func quorumMet(locks *[]string, isReadLock bool, quorum, quorumReads int) bool { } } - var metQuorum bool - if isReadLock { - metQuorum = count >= quorumReads - } else { - metQuorum = count >= quorum - } - - return metQuorum + return count >= quorum } // releaseAll releases all locks that are marked as locked diff --git a/pkg/dsync/drwmutex_test.go b/pkg/dsync/drwmutex_test.go index 422b966aa..7a20bd451 100644 --- a/pkg/dsync/drwmutex_test.go +++ b/pkg/dsync/drwmutex_test.go @@ -36,12 +36,12 @@ func testSimpleWriteLock(t *testing.T, duration time.Duration) (locked bool) { drwm := NewDRWMutex(ds, "simplelock") - if !drwm.GetRLock(context.Background(), id, source, time.Second) { + if !drwm.GetRLock(context.Background(), id, source, Options{Timeout: time.Second}) { panic("Failed to acquire read lock") } // fmt.Println("1st read lock acquired, waiting...") - if !drwm.GetRLock(context.Background(), id, source, time.Second) { + if !drwm.GetRLock(context.Background(), id, source, Options{Timeout: time.Second}) { panic("Failed to acquire read lock") } // fmt.Println("2nd read lock acquired, waiting...") @@ -59,7 +59,7 @@ func testSimpleWriteLock(t *testing.T, duration time.Duration) (locked bool) { }() // fmt.Println("Trying to acquire write lock, waiting...") - locked = drwm.GetLock(context.Background(), id, source, duration) + locked = drwm.GetLock(context.Background(), id, source, Options{Timeout: duration}) if locked { // fmt.Println("Write lock acquired, waiting...") time.Sleep(time.Second) @@ -93,7 +93,7 @@ func testDualWriteLock(t *testing.T, duration time.Duration) (locked bool) { drwm := NewDRWMutex(ds, "duallock") // fmt.Println("Getting initial write lock") - if !drwm.GetLock(context.Background(), id, source, time.Second) { + if !drwm.GetLock(context.Background(), id, source, Options{Timeout: time.Second}) { panic("Failed to acquire initial write lock") } @@ -104,7 +104,7 @@ func testDualWriteLock(t *testing.T, duration time.Duration) (locked bool) { }() // fmt.Println("Trying to acquire 2nd write lock, waiting...") - locked = drwm.GetLock(context.Background(), id, source, duration) + locked = drwm.GetLock(context.Background(), id, source, Options{Timeout: duration}) if locked { // fmt.Println("2nd write lock acquired, waiting...") time.Sleep(time.Second) @@ -139,7 +139,7 @@ func TestDualWriteLockTimedOut(t *testing.T) { // Borrowed from rwmutex_test.go func parallelReader(ctx context.Context, m *DRWMutex, clocked, cunlock, cdone chan bool) { - if m.GetRLock(ctx, id, source, time.Second) { + if m.GetRLock(ctx, id, source, Options{Timeout: time.Second}) { clocked <- true <-cunlock m.RUnlock() @@ -182,7 +182,7 @@ func TestParallelReaders(t *testing.T) { // Borrowed from rwmutex_test.go func reader(rwm *DRWMutex, numIterations int, activity *int32, cdone chan bool) { for i := 0; i < numIterations; i++ { - if rwm.GetRLock(context.Background(), id, source, time.Second) { + if rwm.GetRLock(context.Background(), id, source, Options{Timeout: time.Second}) { n := atomic.AddInt32(activity, 1) if n < 1 || n >= 10000 { panic(fmt.Sprintf("wlock(%d)\n", n)) @@ -199,7 +199,7 @@ func reader(rwm *DRWMutex, numIterations int, activity *int32, cdone chan bool) // Borrowed from rwmutex_test.go func writer(rwm *DRWMutex, numIterations int, activity *int32, cdone chan bool) { for i := 0; i < numIterations; i++ { - if rwm.GetLock(context.Background(), id, source, time.Second) { + if rwm.GetLock(context.Background(), id, source, Options{Timeout: time.Second}) { n := atomic.AddInt32(activity, 10000) if n != 10000 { panic(fmt.Sprintf("wlock(%d)\n", n)) diff --git a/pkg/dsync/dsync_test.go b/pkg/dsync/dsync_test.go index a5788d5b8..7d61b4f6c 100644 --- a/pkg/dsync/dsync_test.go +++ b/pkg/dsync/dsync_test.go @@ -63,7 +63,7 @@ func TestMain(m *testing.M) { rand.Seed(time.Now().UTC().UnixNano()) - nodes := make([]string, 4) // list of node IP addrs or hostname with ports. + nodes := make([]string, 5) // list of node IP addrs or hostname with ports. for i := range nodes { nodes[i] = fmt.Sprintf("127.0.0.1:%d", i+12345) }