From b730bd1396f0803612b3cc523b474aa049c18b57 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Thu, 14 May 2020 23:59:07 -0700 Subject: [PATCH] fix: possible race in FS local lockMap (#9598) --- cmd/fs-v1.go | 21 ------------- cmd/namespace-lock.go | 60 ++++++++++++++++---------------------- cmd/namespace-lock_test.go | 2 +- cmd/server_test.go | 2 +- cmd/test-utils_test.go | 35 ++++++++++------------ 5 files changed, 42 insertions(+), 78 deletions(-) diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index e482f991b..1d047f12e 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -320,12 +320,6 @@ func (fs *FSObjects) MakeBucketWithLocation(ctx context.Context, bucket, locatio return NotImplemented{} } - bucketLock := fs.NewNSLock(ctx, bucket, "") - if err := bucketLock.GetLock(globalObjectTimeout); err != nil { - return err - } - defer bucketLock.Unlock() - // Verify if bucket is valid. if s3utils.CheckValidBucketNameStrict(bucket) != nil { return BucketNameInvalid{Bucket: bucket} @@ -356,12 +350,6 @@ func (fs *FSObjects) MakeBucketWithLocation(ctx context.Context, bucket, locatio // GetBucketInfo - fetch bucket metadata info. func (fs *FSObjects) GetBucketInfo(ctx context.Context, bucket string) (bi BucketInfo, e error) { - bucketLock := fs.NewNSLock(ctx, bucket, "") - if e := bucketLock.GetRLock(globalObjectTimeout); e != nil { - return bi, e - } - defer bucketLock.RUnlock() - atomic.AddInt64(&fs.activeIOCount, 1) defer func() { atomic.AddInt64(&fs.activeIOCount, -1) @@ -438,15 +426,6 @@ func (fs *FSObjects) ListBuckets(ctx context.Context) ([]BucketInfo, error) { // DeleteBucket - delete a bucket and all the metadata associated // with the bucket including pending multipart, object metadata. func (fs *FSObjects) DeleteBucket(ctx context.Context, bucket string, forceDelete bool) error { - if !forceDelete { - bucketLock := fs.NewNSLock(ctx, bucket, "") - if err := bucketLock.GetLock(globalObjectTimeout); err != nil { - logger.LogIf(ctx, err) - return err - } - defer bucketLock.Unlock() - } - atomic.AddInt64(&fs.activeIOCount, 1) defer func() { atomic.AddInt64(&fs.activeIOCount, -1) diff --git a/cmd/namespace-lock.go b/cmd/namespace-lock.go index 6b4544033..3194d76ac 100644 --- a/cmd/namespace-lock.go +++ b/cmd/namespace-lock.go @@ -18,18 +18,17 @@ package cmd import ( "context" - "errors" pathutil "path" "runtime" "sort" "strings" "sync" + "sync/atomic" "fmt" "time" "github.com/minio/lsync" - "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/dsync" ) @@ -58,8 +57,8 @@ func newNSLock(isDistXL bool) *nsLockMap { // nsLock - provides primitives for locking critical namespace regions. type nsLock struct { + ref uint32 *lsync.LRWMutex - ref uint } // nsLockMap - namespace lock map, provides primitives to Lock, @@ -68,7 +67,7 @@ type nsLockMap struct { // Indicates if namespace is part of a distributed setup. isDistXL bool lockMap map[string]*nsLock - lockMapMutex sync.RWMutex + lockMapMutex sync.Mutex } // Lock the namespace resource. @@ -78,17 +77,16 @@ func (n *nsLockMap) lock(ctx context.Context, volume string, path string, lockSo resource := pathJoin(volume, path) n.lockMapMutex.Lock() - nsLk, found := n.lockMap[resource] - if !found { - nsLk = &nsLock{ + if _, found := n.lockMap[resource]; !found { + n.lockMap[resource] = &nsLock{ LRWMutex: lsync.NewLRWMutex(ctx), ref: 1, } - n.lockMap[resource] = nsLk } else { // Update ref count here to avoid multiple races. - nsLk.ref++ + atomic.AddUint32(&n.lockMap[resource].ref, 1) } + nsLk = n.lockMap[resource] n.lockMapMutex.Unlock() // Locking here will block (until timeout). @@ -101,13 +99,12 @@ func (n *nsLockMap) lock(ctx context.Context, volume string, path string, lockSo if !locked { // We failed to get the lock // Decrement ref count since we failed to get the lock - n.lockMapMutex.Lock() - nsLk.ref-- - if nsLk.ref == 0 { + if atomic.AddUint32(&nsLk.ref, ^uint32(0)) == 0 { // Remove from the map if there are no more references. + n.lockMapMutex.Lock() delete(n.lockMap, resource) + n.lockMapMutex.Unlock() } - n.lockMapMutex.Unlock() } return } @@ -115,28 +112,21 @@ func (n *nsLockMap) lock(ctx context.Context, volume string, path string, lockSo // Unlock the namespace resource. func (n *nsLockMap) unlock(volume string, path string, readLock bool) { resource := pathJoin(volume, path) - n.lockMapMutex.RLock() - nsLk, found := n.lockMap[resource] - n.lockMapMutex.RUnlock() - if !found { + + n.lockMapMutex.Lock() + defer n.lockMapMutex.Unlock() + if _, found := n.lockMap[resource]; !found { return } if readLock { - nsLk.RUnlock() + n.lockMap[resource].RUnlock() } else { - nsLk.Unlock() + n.lockMap[resource].Unlock() } - n.lockMapMutex.Lock() - if nsLk.ref == 0 { - logger.LogIf(GlobalContext, errors.New("Namespace reference count cannot be 0")) - } else { - nsLk.ref-- - if nsLk.ref == 0 { - // Remove from the map if there are no more references. - delete(n.lockMap, resource) - } + if atomic.AddUint32(&n.lockMap[resource].ref, ^uint32(0)) == 0 { + // Remove from the map if there are no more references. + delete(n.lockMap, resource) } - n.lockMapMutex.Unlock() } // dsync's distributed lock instance. @@ -147,7 +137,7 @@ type distLockInstance struct { // Lock - block until write lock is taken or timeout has occurred. func (di *distLockInstance) GetLock(timeout *dynamicTimeout) (timedOutErr error) { - lockSource := getSource() + lockSource := getSource(2) start := UTCNow() if !di.rwMutex.GetLock(di.opsID, lockSource, timeout.Timeout()) { @@ -165,7 +155,7 @@ func (di *distLockInstance) Unlock() { // RLock - block until read lock is taken or timeout has occurred. func (di *distLockInstance) GetRLock(timeout *dynamicTimeout) (timedOutErr error) { - lockSource := getSource() + lockSource := getSource(2) start := UTCNow() if !di.rwMutex.GetRLock(di.opsID, lockSource, timeout.Timeout()) { timeout.LogFailure() @@ -206,7 +196,7 @@ func (n *nsLockMap) NewNSLock(ctx context.Context, lockersFn func() []dsync.NetL // Lock - block until write lock is taken or timeout has occurred. func (li *localLockInstance) GetLock(timeout *dynamicTimeout) (timedOutErr error) { - lockSource := getSource() + lockSource := getSource(2) start := UTCNow() readLock := false var success []int @@ -234,7 +224,7 @@ func (li *localLockInstance) Unlock() { // RLock - block until read lock is taken or timeout has occurred. func (li *localLockInstance) GetRLock(timeout *dynamicTimeout) (timedOutErr error) { - lockSource := getSource() + lockSource := getSource(2) start := UTCNow() readLock := true var success []int @@ -260,9 +250,9 @@ func (li *localLockInstance) RUnlock() { } } -func getSource() string { +func getSource(n int) string { var funcName string - pc, filename, lineNum, ok := runtime.Caller(2) + pc, filename, lineNum, ok := runtime.Caller(n) if ok { filename = pathutil.Base(filename) funcName = strings.TrimPrefix(runtime.FuncForPC(pc).Name(), diff --git a/cmd/namespace-lock_test.go b/cmd/namespace-lock_test.go index 2c4f34b02..0b198e056 100644 --- a/cmd/namespace-lock_test.go +++ b/cmd/namespace-lock_test.go @@ -27,7 +27,7 @@ import ( // position will cause the line number to change and the test to FAIL // Tests getSource(). func TestGetSource(t *testing.T) { - currentSource := func() string { return getSource() } + currentSource := func() string { return getSource(2) } gotSource := currentSource() // Hard coded line number, 31, in the "expectedSource" value expectedSource := "[namespace-lock_test.go:31:TestGetSource()]" diff --git a/cmd/server_test.go b/cmd/server_test.go index fd2523b38..4e785b049 100644 --- a/cmd/server_test.go +++ b/cmd/server_test.go @@ -58,7 +58,7 @@ type check struct { // Assert - checks if gotValue is same as expectedValue, if not fails the test. func (c *check) Assert(gotValue interface{}, expectedValue interface{}) { if !reflect.DeepEqual(gotValue, expectedValue) { - c.Fatalf("Test %s:%s expected %v, got %v", getSource(), c.testType, expectedValue, gotValue) + c.Fatalf("Test %s:%s expected %v, got %v", getSource(2), c.testType, expectedValue, gotValue) } } diff --git a/cmd/test-utils_test.go b/cmd/test-utils_test.go index 936aab7d3..8bb7fb1b8 100644 --- a/cmd/test-utils_test.go +++ b/cmd/test-utils_test.go @@ -57,6 +57,7 @@ import ( "github.com/minio/minio-go/v6/pkg/s3utils" "github.com/minio/minio-go/v6/pkg/signer" "github.com/minio/minio/cmd/config" + "github.com/minio/minio/cmd/crypto" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/auth" "github.com/minio/minio/pkg/bucket/policy" @@ -65,6 +66,18 @@ import ( // Tests should initNSLock only once. func init() { + // disable ENVs which interfere with tests. + for _, env := range []string{ + crypto.EnvAutoEncryptionLegacy, + crypto.EnvKMSAutoEncryption, + config.EnvAccessKey, + config.EnvAccessKeyOld, + config.EnvSecretKey, + config.EnvSecretKeyOld, + } { + os.Unsetenv(env) + } + // Set as non-distributed. globalIsDistXL = false @@ -342,27 +355,9 @@ func UnstartedTestServer(t TestErrHandler, instanceType string) TestServer { globalMinioPort = port globalMinioAddr = getEndpointsLocalAddr(testServer.Disks) - globalConfigSys = NewConfigSys() - - globalIAMSys = NewIAMSys() - globalIAMSys.Init(ctx, objLayer) - - buckets, err := objLayer.ListBuckets(ctx) - if err != nil { - t.Fatalf("Unable to list buckets on backend %s", err) - } - - globalPolicySys = NewPolicySys() - globalPolicySys.Init(buckets, objLayer) - - globalNotificationSys = NewNotificationSys(testServer.Disks) - globalNotificationSys.Init(buckets, objLayer) - - globalLifecycleSys = NewLifecycleSys() - globalLifecycleSys.Init(buckets, objLayer) + newAllSubsystems() - globalBucketSSEConfigSys = NewBucketSSEConfigSys() - globalBucketSSEConfigSys.Init(buckets, objLayer) + initAllSubsystems(objLayer) return testServer }