Support MinIO to be deployed on more than 32 nodes (#8492)

This PR implements locking from a global entity into
a more localized set level entity, allowing for locks
to be held only on the resources which are writing
to a collection of disks rather than a global level.

In this process this PR also removes the top-level
limit of 32 nodes to an unlimited number of nodes. This
is a precursor change before bring in bucket expansion.
master
Harshavardhana 5 years ago committed by kannappanr
parent 069b8ee8ff
commit e9b2bf00ad
  1. 2
      .travis.yml
  2. 28
      CREDITS
  3. 29
      cmd/admin-handlers.go
  4. 45
      cmd/admin-handlers_test.go
  5. 2
      cmd/config-encrypted.go
  6. 2
      cmd/config.go
  7. 2
      cmd/daily-lifecycle-ops.go
  8. 31
      cmd/disk-cache.go
  9. 5
      cmd/endpoint-ellipses.go
  10. 6
      cmd/endpoint.go
  11. 16
      cmd/format-xl.go
  12. 2
      cmd/fs-v1-multipart.go
  13. 26
      cmd/fs-v1.go
  14. 2
      cmd/gateway-main.go
  15. 7
      cmd/gateway-unsupported.go
  16. 5
      cmd/global-heal.go
  17. 2
      cmd/iam.go
  18. 126
      cmd/local-locker.go
  19. 73
      cmd/lock-rest-client.go
  20. 17
      cmd/lock-rest-client_test.go
  21. 73
      cmd/lock-rest-server-common.go
  22. 124
      cmd/lock-rest-server-common_test.go
  23. 143
      cmd/lock-rest-server.go
  24. 92
      cmd/namespace-lock.go
  25. 89
      cmd/namespace-lock_test.go
  26. 7
      cmd/notification.go
  27. 2
      cmd/object-api-common.go
  28. 3
      cmd/object-api-interface.go
  29. 8
      cmd/peer-rest-client.go
  30. 8
      cmd/peer-rest-server.go
  31. 8
      cmd/prepare-storage.go
  32. 27
      cmd/rest/client.go
  33. 2
      cmd/routers.go
  34. 22
      cmd/server-main.go
  35. 27
      cmd/storage-rest-client.go
  36. 6
      cmd/storage-rest_test.go
  37. 19
      cmd/test-utils_test.go
  38. 54
      cmd/utils.go
  39. 128
      cmd/xl-sets.go
  40. 11
      cmd/xl-v1-bucket.go
  41. 2
      cmd/xl-v1-healing.go
  42. 15
      cmd/xl-v1-multipart.go
  43. 12
      cmd/xl-v1-object.go
  44. 16
      cmd/xl-v1.go
  45. 5
      docs/minio-limits.md
  46. 4
      docs/zh_CN/minio-limits.md
  47. 1
      go.mod
  48. 3
      pkg/dsync/.gitignore
  49. 440
      pkg/dsync/drwmutex.go
  50. 383
      pkg/dsync/drwmutex_test.go
  51. 103
      pkg/dsync/dsync-server_test.go
  52. 60
      pkg/dsync/dsync.go
  53. 58
      pkg/dsync/dsync_private_test.go
  54. 361
      pkg/dsync/dsync_test.go
  55. 142
      pkg/dsync/retry.go
  56. 82
      pkg/dsync/retry_test.go
  57. 111
      pkg/dsync/rpc-client-impl_test.go
  58. 59
      pkg/dsync/rpc-client-interface.go

@ -46,7 +46,7 @@ matrix:
go: 1.13.x go: 1.13.x
script: script:
- go build --ldflags="$(go run buildscripts/gen-ldflags.go)" -o %GOPATH%\bin\minio.exe - go build --ldflags="$(go run buildscripts/gen-ldflags.go)" -o %GOPATH%\bin\minio.exe
- go test -v --timeout 20m ./... - for d in $(go list ./... | grep -v browser); do go test -v --timeout 20m "$d"; done
before_script: before_script:
# Add an IPv6 config - see the corresponding Travis issue # Add an IPv6 config - see the corresponding Travis issue

@ -10910,6 +10910,34 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. SOFTWARE.
================================================================
github.com/klauspost/readahead
https://github.com/klauspost/readahead
----------------------------------------------------------------
The MIT License (MIT)
Copyright (c) 2015 Klaus Post
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================================ ================================================================
github.com/klauspost/reedsolomon github.com/klauspost/reedsolomon

@ -476,7 +476,13 @@ func (a adminAPIHandlers) PerfInfoHandler(w http.ResponseWriter, r *http.Request
} }
func newLockEntry(l lockRequesterInfo, resource, server string) *madmin.LockEntry { func newLockEntry(l lockRequesterInfo, resource, server string) *madmin.LockEntry {
entry := &madmin.LockEntry{Timestamp: l.Timestamp, Resource: resource, ServerList: []string{server}, Owner: l.Node, Source: l.Source, ID: l.UID} entry := &madmin.LockEntry{
Timestamp: l.Timestamp,
Resource: resource,
ServerList: []string{server},
Source: l.Source,
ID: l.UID,
}
if l.Writer { if l.Writer {
entry.Type = "Write" entry.Type = "Write"
} else { } else {
@ -491,12 +497,14 @@ func topLockEntries(peerLocks []*PeerLocks) madmin.LockEntries {
if peerLock == nil { if peerLock == nil {
continue continue
} }
for k, v := range peerLock.Locks { for _, locks := range peerLock.Locks {
for _, lockReqInfo := range v { for k, v := range locks {
if val, ok := entryMap[lockReqInfo.UID]; ok { for _, lockReqInfo := range v {
val.ServerList = append(val.ServerList, peerLock.Addr) if val, ok := entryMap[lockReqInfo.UID]; ok {
} else { val.ServerList = append(val.ServerList, peerLock.Addr)
entryMap[lockReqInfo.UID] = newLockEntry(lockReqInfo, k, peerLock.Addr) } else {
entryMap[lockReqInfo.UID] = newLockEntry(lockReqInfo, k, peerLock.Addr)
}
} }
} }
} }
@ -531,10 +539,13 @@ func (a adminAPIHandlers) TopLocksHandler(w http.ResponseWriter, r *http.Request
peerLocks := globalNotificationSys.GetLocks(ctx) peerLocks := globalNotificationSys.GetLocks(ctx)
// Once we have received all the locks currently used from peers // Once we have received all the locks currently used from peers
// add the local peer locks list as well. // add the local peer locks list as well.
localLocks := globalLockServer.ll.DupLockMap() var getRespLocks GetLocksResp
for _, llocker := range globalLockServers {
getRespLocks = append(getRespLocks, llocker.DupLockMap())
}
peerLocks = append(peerLocks, &PeerLocks{ peerLocks = append(peerLocks, &PeerLocks{
Addr: getHostName(r), Addr: getHostName(r),
Locks: localLocks, Locks: getRespLocks,
}) })
topLocks := topLockEntries(peerLocks) topLocks := topLockEntries(peerLocks)

@ -27,7 +27,6 @@ import (
"net/url" "net/url"
"sync" "sync"
"testing" "testing"
"time"
"github.com/gorilla/mux" "github.com/gorilla/mux"
"github.com/minio/minio/pkg/auth" "github.com/minio/minio/pkg/auth"
@ -68,10 +67,6 @@ func prepareAdminXLTestBed() (*adminXLTestBed, error) {
// code backend. // code backend.
globalIsXL = true globalIsXL = true
// initialize NSLock.
isDistXL := false
initNSLock(isDistXL)
// Init global heal state // Init global heal state
if globalIsXL { if globalIsXL {
globalAllHealState = initHealState() globalAllHealState = initHealState()
@ -345,46 +340,6 @@ func TestToAdminAPIErrCode(t *testing.T) {
} }
} }
func TestTopLockEntries(t *testing.T) {
t1 := UTCNow()
t2 := UTCNow().Add(10 * time.Second)
peerLocks := []*PeerLocks{
{
Addr: "1",
Locks: map[string][]lockRequesterInfo{
"1": {
{false, "node2", "ep2", "2", t2, t2, ""},
{true, "node1", "ep1", "1", t1, t1, ""},
},
"2": {
{false, "node2", "ep2", "2", t2, t2, ""},
{true, "node1", "ep1", "1", t1, t1, ""},
},
},
},
{
Addr: "2",
Locks: map[string][]lockRequesterInfo{
"1": {
{false, "node2", "ep2", "2", t2, t2, ""},
{true, "node1", "ep1", "1", t1, t1, ""},
},
"2": {
{false, "node2", "ep2", "2", t2, t2, ""},
{true, "node1", "ep1", "1", t1, t1, ""},
},
},
},
}
les := topLockEntries(peerLocks)
if len(les) != 2 {
t.Fatalf("Did not get 2 results")
}
if les[0].Timestamp.After(les[1].Timestamp) {
t.Fatalf("Got wrong sorted value")
}
}
func TestExtractHealInitParams(t *testing.T) { func TestExtractHealInitParams(t *testing.T) {
mkParams := func(clientToken string, forceStart, forceStop bool) url.Values { mkParams := func(clientToken string, forceStart, forceStop bool) url.Values {
v := url.Values{} v := url.Values{}

@ -52,7 +52,7 @@ func handleEncryptedConfigBackend(objAPI ObjectLayer, server bool) error {
// at a given time, this big transaction lock ensures this // at a given time, this big transaction lock ensures this
// appropriately. This is also true for rotation of encrypted // appropriately. This is also true for rotation of encrypted
// content. // content.
objLock := globalNSMutex.NewNSLock(context.Background(), minioMetaBucket, transactionConfigPrefix) objLock := objAPI.NewNSLock(context.Background(), minioMetaBucket, transactionConfigPrefix)
if err := objLock.GetLock(globalOperationTimeout); err != nil { if err := objLock.GetLock(globalOperationTimeout); err != nil {
return err return err
} }

@ -294,7 +294,7 @@ func initConfig(objAPI ObjectLayer) error {
// all the config as necessary, this is to ensure that // all the config as necessary, this is to ensure that
// redundant locks are not held for each migration - this allows // redundant locks are not held for each migration - this allows
// for a more predictable behavior while debugging. // for a more predictable behavior while debugging.
objLock := globalNSMutex.NewNSLock(context.Background(), minioMetaBucket, transactionConfigPrefix) objLock := objAPI.NewNSLock(context.Background(), minioMetaBucket, transactionConfigPrefix)
if err := objLock.GetLock(globalOperationTimeout); err != nil { if err := objLock.GetLock(globalOperationTimeout); err != nil {
return err return err
} }

@ -108,7 +108,7 @@ var lifecycleTimeout = newDynamicTimeout(60*time.Second, time.Second)
func lifecycleRound(ctx context.Context, objAPI ObjectLayer) error { func lifecycleRound(ctx context.Context, objAPI ObjectLayer) error {
// Lock to avoid concurrent lifecycle ops from other nodes // Lock to avoid concurrent lifecycle ops from other nodes
sweepLock := globalNSMutex.NewNSLock(ctx, "system", "daily-lifecycle-ops") sweepLock := objAPI.NewNSLock(ctx, "system", "daily-lifecycle-ops")
if err := sweepLock.GetLock(lifecycleTimeout); err != nil { if err := sweepLock.GetLock(lifecycleTimeout); err != nil {
return err return err
} }

@ -1,3 +1,19 @@
/*
* MinIO Cloud Storage, (C) 2019 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd package cmd
import ( import (
@ -49,8 +65,6 @@ type cacheObjects struct {
cache []*diskCache cache []*diskCache
// file path patterns to exclude from cache // file path patterns to exclude from cache
exclude []string exclude []string
// to manage cache namespace locks
nsMutex *nsLockMap
// if true migration is in progress from v1 to v2 // if true migration is in progress from v1 to v2
migrating bool migrating bool
@ -58,6 +72,7 @@ type cacheObjects struct {
migMutex sync.Mutex migMutex sync.Mutex
// Object functions pointing to the corresponding functions of backend implementation. // Object functions pointing to the corresponding functions of backend implementation.
NewNSLockFn func(ctx context.Context, bucket, object string) RWLocker
GetObjectNInfoFn func(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, lockType LockType, opts ObjectOptions) (gr *GetObjectReader, err error) GetObjectNInfoFn func(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, lockType LockType, opts ObjectOptions) (gr *GetObjectReader, err error)
GetObjectInfoFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error) GetObjectInfoFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error)
DeleteObjectFn func(ctx context.Context, bucket, object string) error DeleteObjectFn func(ctx context.Context, bucket, object string) error
@ -66,7 +81,7 @@ type cacheObjects struct {
} }
func (c *cacheObjects) delete(ctx context.Context, dcache *diskCache, bucket, object string) (err error) { func (c *cacheObjects) delete(ctx context.Context, dcache *diskCache, bucket, object string) (err error) {
cLock := c.nsMutex.NewNSLock(ctx, bucket, object) cLock := c.NewNSLockFn(ctx, bucket, object)
if err := cLock.GetLock(globalObjectTimeout); err != nil { if err := cLock.GetLock(globalObjectTimeout); err != nil {
return err return err
} }
@ -75,7 +90,7 @@ func (c *cacheObjects) delete(ctx context.Context, dcache *diskCache, bucket, ob
} }
func (c *cacheObjects) put(ctx context.Context, dcache *diskCache, bucket, object string, data io.Reader, size int64, opts ObjectOptions) error { func (c *cacheObjects) put(ctx context.Context, dcache *diskCache, bucket, object string, data io.Reader, size int64, opts ObjectOptions) error {
cLock := c.nsMutex.NewNSLock(ctx, bucket, object) cLock := c.NewNSLockFn(ctx, bucket, object)
if err := cLock.GetLock(globalObjectTimeout); err != nil { if err := cLock.GetLock(globalObjectTimeout); err != nil {
return err return err
} }
@ -84,7 +99,7 @@ func (c *cacheObjects) put(ctx context.Context, dcache *diskCache, bucket, objec
} }
func (c *cacheObjects) get(ctx context.Context, dcache *diskCache, bucket, object string, rs *HTTPRangeSpec, h http.Header, opts ObjectOptions) (gr *GetObjectReader, err error) { func (c *cacheObjects) get(ctx context.Context, dcache *diskCache, bucket, object string, rs *HTTPRangeSpec, h http.Header, opts ObjectOptions) (gr *GetObjectReader, err error) {
cLock := c.nsMutex.NewNSLock(ctx, bucket, object) cLock := c.NewNSLockFn(ctx, bucket, object)
if err := cLock.GetRLock(globalObjectTimeout); err != nil { if err := cLock.GetRLock(globalObjectTimeout); err != nil {
return nil, err return nil, err
} }
@ -94,7 +109,7 @@ func (c *cacheObjects) get(ctx context.Context, dcache *diskCache, bucket, objec
} }
func (c *cacheObjects) stat(ctx context.Context, dcache *diskCache, bucket, object string) (oi ObjectInfo, err error) { func (c *cacheObjects) stat(ctx context.Context, dcache *diskCache, bucket, object string) (oi ObjectInfo, err error) {
cLock := c.nsMutex.NewNSLock(ctx, bucket, object) cLock := c.NewNSLockFn(ctx, bucket, object)
if err := cLock.GetRLock(globalObjectTimeout); err != nil { if err := cLock.GetRLock(globalObjectTimeout); err != nil {
return oi, err return oi, err
} }
@ -549,9 +564,11 @@ func newServerCacheObjects(ctx context.Context, config cache.Config) (CacheObjec
c := &cacheObjects{ c := &cacheObjects{
cache: cache, cache: cache,
exclude: config.Exclude, exclude: config.Exclude,
nsMutex: newNSLock(false),
migrating: migrateSw, migrating: migrateSw,
migMutex: sync.Mutex{}, migMutex: sync.Mutex{},
NewNSLockFn: func(ctx context.Context, bucket, object string) RWLocker {
return globalObjectAPI.NewNSLock(ctx, bucket, object)
},
GetObjectInfoFn: func(ctx context.Context, bucket, object string, opts ObjectOptions) (ObjectInfo, error) { GetObjectInfoFn: func(ctx context.Context, bucket, object string, opts ObjectOptions) (ObjectInfo, error) {
return newObjectLayerFn().GetObjectInfo(ctx, bucket, object, opts) return newObjectLayerFn().GetObjectInfo(ctx, bucket, object, opts)
}, },

@ -30,11 +30,6 @@ import (
// This file implements and supports ellipses pattern for // This file implements and supports ellipses pattern for
// `minio server` command line arguments. // `minio server` command line arguments.
// Maximum number of unique args supported on the command line.
const (
serverCommandLineArgsMax = 32
)
// Endpoint set represents parsed ellipses values, also provides // Endpoint set represents parsed ellipses values, also provides
// methods to get the sets of endpoints. // methods to get the sets of endpoints.
type endpointSet struct { type endpointSet struct {

@ -577,12 +577,6 @@ func CreateEndpoints(serverAddr string, args ...[]string) (string, EndpointList,
uniqueArgs.Add(endpoint.Host) uniqueArgs.Add(endpoint.Host)
} }
// Error out if we have more than serverCommandLineArgsMax unique servers.
if len(uniqueArgs.ToSlice()) > serverCommandLineArgsMax {
err := fmt.Errorf("Unsupported number of endpoints (%s), total number of servers cannot be more than %d", endpoints, serverCommandLineArgsMax)
return serverAddr, endpoints, setupType, err
}
// Error out if we have less than 2 unique servers. // Error out if we have less than 2 unique servers.
if len(uniqueArgs.ToSlice()) < 2 && setupType == DistXLSetupType { if len(uniqueArgs.ToSlice()) < 2 && setupType == DistXLSetupType {
err := fmt.Errorf("Unsupported number of endpoints (%s), minimum number of servers cannot be less than 2 in distributed setup", endpoints) err := fmt.Errorf("Unsupported number of endpoints (%s), minimum number of servers cannot be less than 2 in distributed setup", endpoints)

@ -471,15 +471,7 @@ func formatXLGetDeploymentID(refFormat *formatXLV3, formats []*formatXLV3) (stri
} }
// formatXLFixDeploymentID - Add deployment id if it is not present. // formatXLFixDeploymentID - Add deployment id if it is not present.
func formatXLFixDeploymentID(ctx context.Context, endpoints EndpointList, storageDisks []StorageAPI, refFormat *formatXLV3) (err error) { func formatXLFixDeploymentID(endpoints EndpointList, storageDisks []StorageAPI, refFormat *formatXLV3) (err error) {
// Acquire lock on format.json
mutex := newNSLock(globalIsDistXL)
formatLock := mutex.NewNSLock(ctx, minioMetaBucket, formatConfigFile)
if err = formatLock.GetLock(globalHealingTimeout); err != nil {
return err
}
defer formatLock.Unlock()
// Attempt to load all `format.json` from all disks. // Attempt to load all `format.json` from all disks.
var sErrs []error var sErrs []error
formats, sErrs := loadFormatXLAll(storageDisks) formats, sErrs := loadFormatXLAll(storageDisks)
@ -518,12 +510,12 @@ func formatXLFixDeploymentID(ctx context.Context, endpoints EndpointList, storag
} }
// Deployment ID needs to be set on all the disks. // Deployment ID needs to be set on all the disks.
// Save `format.json` across all disks. // Save `format.json` across all disks.
return saveFormatXLAll(ctx, storageDisks, formats) return saveFormatXLAll(context.Background(), storageDisks, formats)
} }
// Update only the valid local disks which have not been updated before. // Update only the valid local disks which have not been updated before.
func formatXLFixLocalDeploymentID(ctx context.Context, endpoints EndpointList, storageDisks []StorageAPI, refFormat *formatXLV3) error { func formatXLFixLocalDeploymentID(endpoints EndpointList, storageDisks []StorageAPI, refFormat *formatXLV3) error {
// If this server was down when the deploymentID was updated // If this server was down when the deploymentID was updated
// then we make sure that we update the local disks with the deploymentID. // then we make sure that we update the local disks with the deploymentID.
for index, storageDisk := range storageDisks { for index, storageDisk := range storageDisks {
@ -542,7 +534,7 @@ func formatXLFixLocalDeploymentID(ctx context.Context, endpoints EndpointList, s
} }
format.ID = refFormat.ID format.ID = refFormat.ID
if err := saveFormatXL(storageDisk, format); err != nil { if err := saveFormatXL(storageDisk, format); err != nil {
logger.LogIf(ctx, err) logger.LogIf(context.Background(), err)
return fmt.Errorf("Unable to save format.json, %s", err) return fmt.Errorf("Unable to save format.json, %s", err)
} }
} }

@ -646,7 +646,7 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
} }
// Hold write lock on the object. // Hold write lock on the object.
destLock := fs.nsMutex.NewNSLock(ctx, bucket, object) destLock := fs.NewNSLock(ctx, bucket, object)
if err = destLock.GetLock(globalObjectTimeout); err != nil { if err = destLock.GetLock(globalObjectTimeout); err != nil {
return oi, err return oi, err
} }

@ -165,6 +165,12 @@ func NewFSObjectLayer(fsPath string) (ObjectLayer, error) {
return fs, nil return fs, nil
} }
// NewNSLock - initialize a new namespace RWLocker instance.
func (fs *FSObjects) NewNSLock(ctx context.Context, bucket string, object string) RWLocker {
// lockers are explicitly 'nil' for FS mode since there are only local lockers
return fs.nsMutex.NewNSLock(ctx, nil, bucket, object)
}
// Shutdown - should be called when process shuts down. // Shutdown - should be called when process shuts down.
func (fs *FSObjects) Shutdown(ctx context.Context) error { func (fs *FSObjects) Shutdown(ctx context.Context) error {
fs.fsFormatRlk.Close() fs.fsFormatRlk.Close()
@ -290,7 +296,7 @@ func (fs *FSObjects) statBucketDir(ctx context.Context, bucket string) (os.FileI
// MakeBucketWithLocation - create a new bucket, returns if it // MakeBucketWithLocation - create a new bucket, returns if it
// already exists. // already exists.
func (fs *FSObjects) MakeBucketWithLocation(ctx context.Context, bucket, location string) error { func (fs *FSObjects) MakeBucketWithLocation(ctx context.Context, bucket, location string) error {
bucketLock := fs.nsMutex.NewNSLock(ctx, bucket, "") bucketLock := fs.NewNSLock(ctx, bucket, "")
if err := bucketLock.GetLock(globalObjectTimeout); err != nil { if err := bucketLock.GetLock(globalObjectTimeout); err != nil {
return err return err
} }
@ -313,7 +319,7 @@ func (fs *FSObjects) MakeBucketWithLocation(ctx context.Context, bucket, locatio
// GetBucketInfo - fetch bucket metadata info. // GetBucketInfo - fetch bucket metadata info.
func (fs *FSObjects) GetBucketInfo(ctx context.Context, bucket string) (bi BucketInfo, e error) { func (fs *FSObjects) GetBucketInfo(ctx context.Context, bucket string) (bi BucketInfo, e error) {
bucketLock := fs.nsMutex.NewNSLock(ctx, bucket, "") bucketLock := fs.NewNSLock(ctx, bucket, "")
if e := bucketLock.GetRLock(globalObjectTimeout); e != nil { if e := bucketLock.GetRLock(globalObjectTimeout); e != nil {
return bi, e return bi, e
} }
@ -376,7 +382,7 @@ func (fs *FSObjects) ListBuckets(ctx context.Context) ([]BucketInfo, error) {
// DeleteBucket - delete a bucket and all the metadata associated // DeleteBucket - delete a bucket and all the metadata associated
// with the bucket including pending multipart, object metadata. // with the bucket including pending multipart, object metadata.
func (fs *FSObjects) DeleteBucket(ctx context.Context, bucket string) error { func (fs *FSObjects) DeleteBucket(ctx context.Context, bucket string) error {
bucketLock := fs.nsMutex.NewNSLock(ctx, bucket, "") bucketLock := fs.NewNSLock(ctx, bucket, "")
if err := bucketLock.GetLock(globalObjectTimeout); err != nil { if err := bucketLock.GetLock(globalObjectTimeout); err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return err return err
@ -412,7 +418,7 @@ func (fs *FSObjects) DeleteBucket(ctx context.Context, bucket string) error {
func (fs *FSObjects) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (oi ObjectInfo, e error) { func (fs *FSObjects) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (oi ObjectInfo, e error) {
cpSrcDstSame := isStringEqual(pathJoin(srcBucket, srcObject), pathJoin(dstBucket, dstObject)) cpSrcDstSame := isStringEqual(pathJoin(srcBucket, srcObject), pathJoin(dstBucket, dstObject))
if !cpSrcDstSame { if !cpSrcDstSame {
objectDWLock := fs.nsMutex.NewNSLock(ctx, dstBucket, dstObject) objectDWLock := fs.NewNSLock(ctx, dstBucket, dstObject)
if err := objectDWLock.GetLock(globalObjectTimeout); err != nil { if err := objectDWLock.GetLock(globalObjectTimeout); err != nil {
return oi, err return oi, err
} }
@ -484,7 +490,7 @@ func (fs *FSObjects) GetObjectNInfo(ctx context.Context, bucket, object string,
if lockType != noLock { if lockType != noLock {
// Lock the object before reading. // Lock the object before reading.
lock := fs.nsMutex.NewNSLock(ctx, bucket, object) lock := fs.NewNSLock(ctx, bucket, object)
switch lockType { switch lockType {
case writeLock: case writeLock:
if err = lock.GetLock(globalObjectTimeout); err != nil { if err = lock.GetLock(globalObjectTimeout); err != nil {
@ -571,7 +577,7 @@ func (fs *FSObjects) GetObject(ctx context.Context, bucket, object string, offse
} }
// Lock the object before reading. // Lock the object before reading.
objectLock := fs.nsMutex.NewNSLock(ctx, bucket, object) objectLock := fs.NewNSLock(ctx, bucket, object)
if err := objectLock.GetRLock(globalObjectTimeout); err != nil { if err := objectLock.GetRLock(globalObjectTimeout); err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return err return err
@ -739,7 +745,7 @@ func (fs *FSObjects) getObjectInfo(ctx context.Context, bucket, object string) (
// getObjectInfoWithLock - reads object metadata and replies back ObjectInfo. // getObjectInfoWithLock - reads object metadata and replies back ObjectInfo.
func (fs *FSObjects) getObjectInfoWithLock(ctx context.Context, bucket, object string) (oi ObjectInfo, e error) { func (fs *FSObjects) getObjectInfoWithLock(ctx context.Context, bucket, object string) (oi ObjectInfo, e error) {
// Lock the object before reading. // Lock the object before reading.
objectLock := fs.nsMutex.NewNSLock(ctx, bucket, object) objectLock := fs.NewNSLock(ctx, bucket, object)
if err := objectLock.GetRLock(globalObjectTimeout); err != nil { if err := objectLock.GetRLock(globalObjectTimeout); err != nil {
return oi, err return oi, err
} }
@ -764,7 +770,7 @@ func (fs *FSObjects) getObjectInfoWithLock(ctx context.Context, bucket, object s
func (fs *FSObjects) GetObjectInfo(ctx context.Context, bucket, object string, opts ObjectOptions) (oi ObjectInfo, e error) { func (fs *FSObjects) GetObjectInfo(ctx context.Context, bucket, object string, opts ObjectOptions) (oi ObjectInfo, e error) {
oi, err := fs.getObjectInfoWithLock(ctx, bucket, object) oi, err := fs.getObjectInfoWithLock(ctx, bucket, object)
if err == errCorruptedFormat || err == io.EOF { if err == errCorruptedFormat || err == io.EOF {
objectLock := fs.nsMutex.NewNSLock(ctx, bucket, object) objectLock := fs.NewNSLock(ctx, bucket, object)
if err = objectLock.GetLock(globalObjectTimeout); err != nil { if err = objectLock.GetLock(globalObjectTimeout); err != nil {
return oi, toObjectErr(err, bucket, object) return oi, toObjectErr(err, bucket, object)
} }
@ -810,7 +816,7 @@ func (fs *FSObjects) PutObject(ctx context.Context, bucket string, object string
return ObjectInfo{}, err return ObjectInfo{}, err
} }
// Lock the object. // Lock the object.
objectLock := fs.nsMutex.NewNSLock(ctx, bucket, object) objectLock := fs.NewNSLock(ctx, bucket, object)
if err := objectLock.GetLock(globalObjectTimeout); err != nil { if err := objectLock.GetLock(globalObjectTimeout); err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return objInfo, err return objInfo, err
@ -965,7 +971,7 @@ func (fs *FSObjects) DeleteObjects(ctx context.Context, bucket string, objects [
// and there are no rollbacks supported. // and there are no rollbacks supported.
func (fs *FSObjects) DeleteObject(ctx context.Context, bucket, object string) error { func (fs *FSObjects) DeleteObject(ctx context.Context, bucket, object string) error {
// Acquire a write lock before deleting the object. // Acquire a write lock before deleting the object.
objectLock := fs.nsMutex.NewNSLock(ctx, bucket, object) objectLock := fs.NewNSLock(ctx, bucket, object)
if err := objectLock.GetLock(globalOperationTimeout); err != nil { if err := objectLock.GetLock(globalOperationTimeout); err != nil {
return err return err
} }

@ -142,8 +142,6 @@ func StartGateway(ctx *cli.Context, gw Gateway) {
// Set system resources to maximum. // Set system resources to maximum.
logger.LogIf(context.Background(), setMaxResources()) logger.LogIf(context.Background(), setMaxResources())
initNSLock(false) // Enable local namespace lock.
// Set when gateway is enabled // Set when gateway is enabled
globalIsGateway = true globalIsGateway = true

@ -18,6 +18,7 @@ package cmd
import ( import (
"context" "context"
"errors"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/lifecycle" "github.com/minio/minio/pkg/lifecycle"
@ -28,6 +29,12 @@ import (
// GatewayUnsupported list of unsupported call stubs for gateway. // GatewayUnsupported list of unsupported call stubs for gateway.
type GatewayUnsupported struct{} type GatewayUnsupported struct{}
// NewNSLock is a dummy stub for gateway.
func (a GatewayUnsupported) NewNSLock(ctx context.Context, bucket string, object string) RWLocker {
logger.CriticalIf(ctx, errors.New("not implemented"))
return nil
}
// ListMultipartUploads lists all multipart uploads. // ListMultipartUploads lists all multipart uploads.
func (a GatewayUnsupported) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi ListMultipartsInfo, err error) { func (a GatewayUnsupported) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi ListMultipartsInfo, err error) {
return lmi, NotImplemented{} return lmi, NotImplemented{}

@ -83,7 +83,8 @@ func healErasureSet(ctx context.Context, setIndex int, xlObj *xlObjects) error {
// Hold a lock for healing the erasure set // Hold a lock for healing the erasure set
zeroDuration := time.Millisecond zeroDuration := time.Millisecond
zeroDynamicTimeout := newDynamicTimeout(zeroDuration, zeroDuration) zeroDynamicTimeout := newDynamicTimeout(zeroDuration, zeroDuration)
erasureSetHealLock := globalNSMutex.NewNSLock(ctx, "system", fmt.Sprintf("erasure-set-heal-%d", setIndex)) erasureSetHealLock := xlObj.nsMutex.NewNSLock(ctx, xlObj.getLockers(),
"system", fmt.Sprintf("erasure-set-heal-%d", setIndex))
if err := erasureSetHealLock.GetLock(zeroDynamicTimeout); err != nil { if err := erasureSetHealLock.GetLock(zeroDynamicTimeout); err != nil {
return err return err
} }
@ -126,7 +127,7 @@ func execLeaderTasks(sets *xlSets) {
ctx := context.Background() ctx := context.Background()
// Hold a lock so only one server performs auto-healing // Hold a lock so only one server performs auto-healing
leaderLock := globalNSMutex.NewNSLock(ctx, minioMetaBucket, "leader") leaderLock := sets.NewNSLock(ctx, minioMetaBucket, "leader")
for { for {
err := leaderLock.GetLock(leaderLockTimeout) err := leaderLock.GetLock(leaderLockTimeout)
if err == nil { if err == nil {

@ -338,7 +338,7 @@ func (sys *IAMSys) Load() error {
func (sys *IAMSys) doIAMConfigMigration(objAPI ObjectLayer) error { func (sys *IAMSys) doIAMConfigMigration(objAPI ObjectLayer) error {
// Take IAM configuration migration lock // Take IAM configuration migration lock
lockPath := iamConfigPrefix + "/migration.lock" lockPath := iamConfigPrefix + "/migration.lock"
objLock := globalNSMutex.NewNSLock(context.Background(), minioMetaBucket, lockPath) objLock := objAPI.NewNSLock(context.Background(), minioMetaBucket, lockPath)
if err := objLock.GetLock(globalOperationTimeout); err != nil { if err := objLock.GetLock(globalOperationTimeout); err != nil {
return err return err
} }

@ -17,22 +17,21 @@
package cmd package cmd
import ( import (
"errors"
"fmt" "fmt"
"sync" "sync"
"time" "time"
"github.com/minio/dsync/v2" "github.com/minio/minio/pkg/dsync"
) )
// lockRequesterInfo stores various info from the client for each lock that is requested. // lockRequesterInfo stores various info from the client for each lock that is requested.
type lockRequesterInfo struct { type lockRequesterInfo struct {
Writer bool // Bool whether write or read lock. Writer bool // Bool whether write or read lock.
Node string // Network address of client claiming lock. UID string // UID to uniquely identify request of client.
ServiceEndpoint string // RPC path of client claiming lock. Timestamp time.Time // Timestamp set at the time of initialization.
UID string // UID to uniquely identify request of client. TimeLastCheck time.Time // Timestamp for last check of validity of lock.
Timestamp time.Time // Timestamp set at the time of initialization. Source string // Contains line, function and filename reqesting the lock.
TimeLastCheck time.Time // Timestamp for last check of validity of lock.
Source string // Contains line, function and filename reqesting the lock.
} }
// isWriteLock returns whether the lock is a write or read lock. // isWriteLock returns whether the lock is a write or read lock.
@ -40,20 +39,41 @@ func isWriteLock(lri []lockRequesterInfo) bool {
return len(lri) == 1 && lri[0].Writer return len(lri) == 1 && lri[0].Writer
} }
// localLocker implements Dsync.NetLocker type errorLocker struct{}
type localLocker struct {
mutex sync.Mutex func (d *errorLocker) String() string {
serviceEndpoint string return ""
serverAddr string }
lockMap map[string][]lockRequesterInfo
func (d *errorLocker) Lock(args dsync.LockArgs) (reply bool, err error) {
return false, errors.New("unable to lock")
}
func (d *errorLocker) Unlock(args dsync.LockArgs) (reply bool, err error) {
return false, errors.New("unable to unlock")
}
func (d *errorLocker) RLock(args dsync.LockArgs) (reply bool, err error) {
return false, errors.New("unable to rlock")
}
func (d *errorLocker) RUnlock(args dsync.LockArgs) (reply bool, err error) {
return false, errors.New("unable to runlock")
}
func (d *errorLocker) Close() error {
return nil
} }
func (l *localLocker) ServerAddr() string { // localLocker implements Dsync.NetLocker
return l.serverAddr type localLocker struct {
mutex sync.Mutex
endpoint Endpoint
lockMap map[string][]lockRequesterInfo
} }
func (l *localLocker) ServiceEndpoint() string { func (l *localLocker) String() string {
return l.serviceEndpoint return l.endpoint.String()
} }
func (l *localLocker) Lock(args dsync.LockArgs) (reply bool, err error) { func (l *localLocker) Lock(args dsync.LockArgs) (reply bool, err error) {
@ -63,13 +83,11 @@ func (l *localLocker) Lock(args dsync.LockArgs) (reply bool, err error) {
if !isLockTaken { // No locks held on the given name, so claim write lock if !isLockTaken { // No locks held on the given name, so claim write lock
l.lockMap[args.Resource] = []lockRequesterInfo{ l.lockMap[args.Resource] = []lockRequesterInfo{
{ {
Writer: true, Writer: true,
Node: args.ServerAddr, Source: args.Source,
ServiceEndpoint: args.ServiceEndpoint, UID: args.UID,
Source: args.Source, Timestamp: UTCNow(),
UID: args.UID, TimeLastCheck: UTCNow(),
Timestamp: UTCNow(),
TimeLastCheck: UTCNow(),
}, },
} }
} }
@ -96,17 +114,38 @@ func (l *localLocker) Unlock(args dsync.LockArgs) (reply bool, err error) {
} }
// removeEntry based on the uid of the lock message, removes a single entry from the
// lockRequesterInfo array or the whole array from the map (in case of a write lock
// or last read lock)
func (l *localLocker) removeEntry(name, uid string, lri *[]lockRequesterInfo) bool {
// Find correct entry to remove based on uid.
for index, entry := range *lri {
if entry.UID == uid {
if len(*lri) == 1 {
// Remove the write lock.
delete(l.lockMap, name)
} else {
// Remove the appropriate read lock.
*lri = append((*lri)[:index], (*lri)[index+1:]...)
l.lockMap[name] = *lri
}
return true
}
}
// None found return false, perhaps entry removed in previous run.
return false
}
func (l *localLocker) RLock(args dsync.LockArgs) (reply bool, err error) { func (l *localLocker) RLock(args dsync.LockArgs) (reply bool, err error) {
l.mutex.Lock() l.mutex.Lock()
defer l.mutex.Unlock() defer l.mutex.Unlock()
lrInfo := lockRequesterInfo{ lrInfo := lockRequesterInfo{
Writer: false, Writer: false,
Node: args.ServerAddr, Source: args.Source,
ServiceEndpoint: args.ServiceEndpoint, UID: args.UID,
Source: args.Source, Timestamp: UTCNow(),
UID: args.UID, TimeLastCheck: UTCNow(),
Timestamp: UTCNow(),
TimeLastCheck: UTCNow(),
} }
if lri, ok := l.lockMap[args.Resource]; ok { if lri, ok := l.lockMap[args.Resource]; ok {
if reply = !isWriteLock(lri); reply { if reply = !isWriteLock(lri); reply {
@ -139,18 +178,6 @@ func (l *localLocker) RUnlock(args dsync.LockArgs) (reply bool, err error) {
return reply, nil return reply, nil
} }
func (l *localLocker) ForceUnlock(args dsync.LockArgs) (reply bool, err error) {
l.mutex.Lock()
defer l.mutex.Unlock()
if len(args.UID) != 0 {
return false, fmt.Errorf("ForceUnlock called with non-empty UID: %s", args.UID)
}
// Only clear lock when it is taken
// Remove the lock (irrespective of write or read lock)
delete(l.lockMap, args.Resource)
return true, nil
}
func (l *localLocker) DupLockMap() map[string][]lockRequesterInfo { func (l *localLocker) DupLockMap() map[string][]lockRequesterInfo {
l.mutex.Lock() l.mutex.Lock()
defer l.mutex.Unlock() defer l.mutex.Unlock()
@ -161,3 +188,14 @@ func (l *localLocker) DupLockMap() map[string][]lockRequesterInfo {
} }
return lockCopy return lockCopy
} }
func (l *localLocker) Close() error {
return nil
}
func newLocker(endpoint Endpoint) *localLocker {
return &localLocker{
endpoint: endpoint,
lockMap: make(map[string][]lockRequesterInfo),
}
}

@ -26,18 +26,16 @@ import (
"net/url" "net/url"
"github.com/minio/dsync/v2"
"github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/cmd/rest" "github.com/minio/minio/cmd/rest"
xnet "github.com/minio/minio/pkg/net" "github.com/minio/minio/pkg/dsync"
) )
// lockRESTClient is authenticable lock REST client // lockRESTClient is authenticable lock REST client
type lockRESTClient struct { type lockRESTClient struct {
host *xnet.Host
restClient *rest.Client restClient *rest.Client
serverURL *url.URL endpoint Endpoint
connected int32 connected int32
} }
@ -49,20 +47,13 @@ func toLockError(err error) error {
switch err.Error() { switch err.Error() {
case errLockConflict.Error(): case errLockConflict.Error():
return errLockConflict return errLockConflict
case errLockNotExpired.Error():
return errLockNotExpired
} }
return err return err
} }
// ServerAddr - dsync.NetLocker interface compatible method. // String stringer *dsync.NetLocker* interface compatible method.
func (client *lockRESTClient) ServerAddr() string { func (client *lockRESTClient) String() string {
return client.serverURL.Host return client.endpoint.String()
}
// ServiceEndpoint - dsync.NetLocker interface compatible method.
func (client *lockRESTClient) ServiceEndpoint() string {
return client.serverURL.Path
} }
// Wrapper to restClient.Call to handle network errors, in case of network error the connection is marked disconnected // Wrapper to restClient.Call to handle network errors, in case of network error the connection is marked disconnected
@ -83,22 +74,17 @@ func (client *lockRESTClient) call(method string, values url.Values, body io.Rea
} }
if isNetworkError(err) { if isNetworkError(err) {
time.AfterFunc(defaultRetryUnit*5, func() { time.AfterFunc(defaultRetryUnit*3, func() {
// After 5 seconds, take this lock client // After 3 seconds, take this lock client online for a retry.
// online for a retry.
atomic.StoreInt32(&client.connected, 1) atomic.StoreInt32(&client.connected, 1)
}) })
atomic.StoreInt32(&client.connected, 0) atomic.StoreInt32(&client.connected, 0)
} }
return nil, toLockError(err) return nil, toLockError(err)
} }
// Stringer provides a canonicalized representation of node.
func (client *lockRESTClient) String() string {
return client.host.String()
}
// IsOnline - returns whether REST client failed to connect or not. // IsOnline - returns whether REST client failed to connect or not.
func (client *lockRESTClient) IsOnline() bool { func (client *lockRESTClient) IsOnline() bool {
return atomic.LoadInt32(&client.connected) == 1 return atomic.LoadInt32(&client.connected) == 1
@ -117,15 +103,13 @@ func (client *lockRESTClient) restCall(call string, args dsync.LockArgs) (reply
values.Set(lockRESTUID, args.UID) values.Set(lockRESTUID, args.UID)
values.Set(lockRESTSource, args.Source) values.Set(lockRESTSource, args.Source)
values.Set(lockRESTResource, args.Resource) values.Set(lockRESTResource, args.Resource)
values.Set(lockRESTServerAddr, args.ServerAddr)
values.Set(lockRESTServerEndpoint, args.ServiceEndpoint)
respBody, err := client.call(call, values, nil, -1) respBody, err := client.call(call, values, nil, -1)
defer http.DrainBody(respBody) defer http.DrainBody(respBody)
switch err { switch err {
case nil: case nil:
return true, nil return true, nil
case errLockConflict, errLockNotExpired: case errLockConflict:
return false, nil return false, nil
default: default:
return false, err return false, err
@ -152,45 +136,42 @@ func (client *lockRESTClient) Unlock(args dsync.LockArgs) (reply bool, err error
return client.restCall(lockRESTMethodUnlock, args) return client.restCall(lockRESTMethodUnlock, args)
} }
// ForceUnlock calls force unlock RPC. func closeLockers(lockers []dsync.NetLocker) {
func (client *lockRESTClient) ForceUnlock(args dsync.LockArgs) (reply bool, err error) { for _, locker := range lockers {
return client.restCall(lockRESTMethodForceUnlock, args) locker.Close()
}
} }
// Expired calls expired RPC. func newLockAPI(endpoint Endpoint) dsync.NetLocker {
func (client *lockRESTClient) Expired(args dsync.LockArgs) (reply bool, err error) { if endpoint.IsLocal {
return client.restCall(lockRESTMethodExpired, args) return globalLockServers[endpoint]
}
return newlockRESTClient(endpoint)
} }
// Returns a lock rest client. // Returns a lock rest client.
func newlockRESTClient(peer *xnet.Host) *lockRESTClient { func newlockRESTClient(endpoint Endpoint) *lockRESTClient {
scheme := "http"
if globalIsSSL {
scheme = "https"
}
serverURL := &url.URL{ serverURL := &url.URL{
Scheme: scheme, Scheme: endpoint.Scheme,
Host: peer.String(), Host: endpoint.Host,
Path: lockRESTPath, Path: pathJoin(lockRESTPrefix, endpoint.Path, lockRESTVersion),
} }
var tlsConfig *tls.Config var tlsConfig *tls.Config
if globalIsSSL { if globalIsSSL {
tlsConfig = &tls.Config{ tlsConfig = &tls.Config{
ServerName: peer.Name, ServerName: endpoint.Hostname(),
RootCAs: globalRootCAs, RootCAs: globalRootCAs,
NextProtos: []string{"http/1.1"}, // Force http1.1 NextProtos: []string{"http/1.1"}, // Force http1.1
} }
} }
restClient, err := rest.NewClient(serverURL, tlsConfig, rest.DefaultRESTTimeout, newAuthToken) trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout, rest.DefaultRESTTimeout)
restClient, err := rest.NewClient(serverURL, trFn, newAuthToken)
if err != nil { if err != nil {
logger.LogIf(context.Background(), err) logger.LogIf(context.Background(), err)
return &lockRESTClient{serverURL: serverURL, host: peer, restClient: restClient, connected: 0} return &lockRESTClient{endpoint: endpoint, restClient: restClient, connected: 0}
} }
return &lockRESTClient{serverURL: serverURL, host: peer, restClient: restClient, connected: 1} return &lockRESTClient{endpoint: endpoint, restClient: restClient, connected: 1}
} }

@ -19,17 +19,16 @@ package cmd
import ( import (
"testing" "testing"
"github.com/minio/dsync/v2" "github.com/minio/minio/pkg/dsync"
xnet "github.com/minio/minio/pkg/net"
) )
// Tests lock rpc client. // Tests lock rpc client.
func TestLockRESTlient(t *testing.T) { func TestLockRESTlient(t *testing.T) {
host, err := xnet.ParseHost("localhost:9000") endpoint, err := NewEndpoint("http://localhost:9000")
if err != nil { if err != nil {
t.Fatalf("unexpected error %v", err) t.Fatalf("unexpected error %v", err)
} }
lkClient := newlockRESTClient(host) lkClient := newlockRESTClient(endpoint)
if lkClient.connected == 0 { if lkClient.connected == 0 {
t.Fatalf("unexpected error. connection failed") t.Fatalf("unexpected error. connection failed")
} }
@ -54,14 +53,4 @@ func TestLockRESTlient(t *testing.T) {
if err == nil { if err == nil {
t.Fatal("Expected for Unlock to fail") t.Fatal("Expected for Unlock to fail")
} }
_, err = lkClient.ForceUnlock(dsync.LockArgs{})
if err == nil {
t.Fatal("Expected for ForceUnlock to fail")
}
_, err = lkClient.Expired(dsync.LockArgs{})
if err == nil {
t.Fatal("Expected for Expired to fail")
}
} }

@ -18,23 +18,19 @@ package cmd
import ( import (
"errors" "errors"
"time"
) )
const ( const (
lockRESTVersion = "v2" lockRESTVersion = "v2"
lockRESTVersionPrefix = SlashSeparator + "v2" lockRESTVersionPrefix = SlashSeparator + "v2"
lockRESTPrefix = minioReservedBucketPath + "/lock" lockRESTPrefix = minioReservedBucketPath + "/lock"
lockRESTPath = lockRESTPrefix + lockRESTVersionPrefix
) )
const ( const (
lockRESTMethodLock = "/lock" lockRESTMethodLock = "/lock"
lockRESTMethodRLock = "/rlock" lockRESTMethodRLock = "/rlock"
lockRESTMethodUnlock = "/unlock" lockRESTMethodUnlock = "/unlock"
lockRESTMethodRUnlock = "/runlock" lockRESTMethodRUnlock = "/runlock"
lockRESTMethodForceUnlock = "/forceunlock"
lockRESTMethodExpired = "/expired"
// Unique ID of lock/unlock request. // Unique ID of lock/unlock request.
lockRESTUID = "uid" lockRESTUID = "uid"
@ -43,67 +39,6 @@ const (
lockRESTSource = "source" lockRESTSource = "source"
// Resource contains a entity to be locked/unlocked. // Resource contains a entity to be locked/unlocked.
lockRESTResource = "resource" lockRESTResource = "resource"
// ServerAddr contains the address of the server who requested lock/unlock of the above resource.
lockRESTServerAddr = "serverAddr"
// ServiceEndpoint contains the network path of above server to do lock/unlock.
lockRESTServerEndpoint = "serverEndpoint"
) )
// nameLockRequesterInfoPair is a helper type for lock maintenance
type nameLockRequesterInfoPair struct {
name string
lri lockRequesterInfo
}
var errLockConflict = errors.New("lock conflict") var errLockConflict = errors.New("lock conflict")
var errLockNotExpired = errors.New("lock not expired")
// Similar to removeEntry but only removes an entry only if the lock entry exists in map.
func (l *localLocker) removeEntryIfExists(nlrip nameLockRequesterInfoPair) {
// Check if entry is still in map (could have been removed altogether by 'concurrent' (R)Unlock of last entry)
if lri, ok := l.lockMap[nlrip.name]; ok {
// Even if the entry exists, it may not be the same entry which was
// considered as expired, so we simply an attempt to remove it if its
// not possible there is nothing we need to do.
l.removeEntry(nlrip.name, nlrip.lri.UID, &lri)
}
}
// removeEntry based on the uid of the lock message, removes a single entry from the
// lockRequesterInfo array or the whole array from the map (in case of a write lock
// or last read lock)
func (l *localLocker) removeEntry(name, uid string, lri *[]lockRequesterInfo) bool {
// Find correct entry to remove based on uid.
for index, entry := range *lri {
if entry.UID == uid {
if len(*lri) == 1 {
// Remove the write lock.
delete(l.lockMap, name)
} else {
// Remove the appropriate read lock.
*lri = append((*lri)[:index], (*lri)[index+1:]...)
l.lockMap[name] = *lri
}
return true
}
}
// None found return false, perhaps entry removed in previous run.
return false
}
// getLongLivedLocks returns locks that are older than a certain time and
// have not been 'checked' for validity too soon enough
func getLongLivedLocks(m map[string][]lockRequesterInfo, interval time.Duration) []nameLockRequesterInfoPair {
rslt := []nameLockRequesterInfoPair{}
for name, lriArray := range m {
for idx := range lriArray {
// Check whether enough time has gone by since last check
if time.Since(lriArray[idx].TimeLastCheck) >= interval {
rslt = append(rslt, nameLockRequesterInfoPair{name: name, lri: lriArray[idx]})
lriArray[idx].TimeLastCheck = UTCNow()
}
}
}
return rslt
}

@ -21,7 +21,6 @@ import (
"reflect" "reflect"
"sync" "sync"
"testing" "testing"
"time"
) )
// Helper function to create a lock server for testing // Helper function to create a lock server for testing
@ -36,9 +35,8 @@ func createLockTestServer(t *testing.T) (string, *lockRESTServer, string) {
locker := &lockRESTServer{ locker := &lockRESTServer{
ll: &localLocker{ ll: &localLocker{
mutex: sync.Mutex{}, mutex: sync.Mutex{},
serviceEndpoint: "rpc-path", lockMap: make(map[string][]lockRequesterInfo),
lockMap: make(map[string][]lockRequesterInfo),
}, },
} }
creds := globalActiveCred creds := globalActiveCred
@ -49,63 +47,22 @@ func createLockTestServer(t *testing.T) (string, *lockRESTServer, string) {
return fsDir, locker, token return fsDir, locker, token
} }
// Test function to remove lock entries from map only in case they still exist based on name & uid combination
func TestLockRpcServerRemoveEntryIfExists(t *testing.T) {
testPath, locker, _ := createLockTestServer(t)
defer os.RemoveAll(testPath)
lri := lockRequesterInfo{
Writer: false,
Node: "host",
ServiceEndpoint: "rpc-path",
UID: "0123-4567",
Timestamp: UTCNow(),
TimeLastCheck: UTCNow(),
}
nlrip := nameLockRequesterInfoPair{name: "name", lri: lri}
// first test by simulating item has already been deleted
locker.ll.removeEntryIfExists(nlrip)
{
gotLri := locker.ll.lockMap["name"]
expectedLri := []lockRequesterInfo(nil)
if !reflect.DeepEqual(expectedLri, gotLri) {
t.Errorf("Expected %#v, got %#v", expectedLri, gotLri)
}
}
// then test normal deletion
locker.ll.lockMap["name"] = []lockRequesterInfo{lri} // add item
locker.ll.removeEntryIfExists(nlrip)
{
gotLri := locker.ll.lockMap["name"]
expectedLri := []lockRequesterInfo(nil)
if !reflect.DeepEqual(expectedLri, gotLri) {
t.Errorf("Expected %#v, got %#v", expectedLri, gotLri)
}
}
}
// Test function to remove lock entries from map based on name & uid combination // Test function to remove lock entries from map based on name & uid combination
func TestLockRpcServerRemoveEntry(t *testing.T) { func TestLockRpcServerRemoveEntry(t *testing.T) {
testPath, locker, _ := createLockTestServer(t) testPath, locker, _ := createLockTestServer(t)
defer os.RemoveAll(testPath) defer os.RemoveAll(testPath)
lockRequesterInfo1 := lockRequesterInfo{ lockRequesterInfo1 := lockRequesterInfo{
Writer: true, Writer: true,
Node: "host", UID: "0123-4567",
ServiceEndpoint: "rpc-path", Timestamp: UTCNow(),
UID: "0123-4567", TimeLastCheck: UTCNow(),
Timestamp: UTCNow(),
TimeLastCheck: UTCNow(),
} }
lockRequesterInfo2 := lockRequesterInfo{ lockRequesterInfo2 := lockRequesterInfo{
Writer: true, Writer: true,
Node: "host", UID: "89ab-cdef",
ServiceEndpoint: "rpc-path", Timestamp: UTCNow(),
UID: "89ab-cdef", TimeLastCheck: UTCNow(),
Timestamp: UTCNow(),
TimeLastCheck: UTCNow(),
} }
locker.ll.lockMap["name"] = []lockRequesterInfo{ locker.ll.lockMap["name"] = []lockRequesterInfo{
@ -140,64 +97,3 @@ func TestLockRpcServerRemoveEntry(t *testing.T) {
} }
} }
} }
// Tests function returning long lived locks.
func TestLockRpcServerGetLongLivedLocks(t *testing.T) {
ut := UTCNow()
// Collection of test cases for verifying returning valid long lived locks.
testCases := []struct {
lockMap map[string][]lockRequesterInfo
lockInterval time.Duration
expectedNSLR []nameLockRequesterInfoPair
}{
// Testcase - 1 validates long lived locks, returns empty list.
{
lockMap: map[string][]lockRequesterInfo{
"test": {{
Writer: true,
Node: "10.1.10.21",
ServiceEndpoint: "/lock/mnt/disk1",
UID: "10000112",
Timestamp: ut,
TimeLastCheck: ut,
}},
},
lockInterval: 1 * time.Minute,
expectedNSLR: []nameLockRequesterInfoPair{},
},
// Testcase - 2 validates long lived locks, returns at least one list.
{
lockMap: map[string][]lockRequesterInfo{
"test": {{
Writer: true,
Node: "10.1.10.21",
ServiceEndpoint: "/lock/mnt/disk1",
UID: "10000112",
Timestamp: ut,
TimeLastCheck: ut.Add(-2 * time.Minute),
}},
},
lockInterval: 1 * time.Minute,
expectedNSLR: []nameLockRequesterInfoPair{
{
name: "test",
lri: lockRequesterInfo{
Writer: true,
Node: "10.1.10.21",
ServiceEndpoint: "/lock/mnt/disk1",
UID: "10000112",
Timestamp: ut,
TimeLastCheck: ut.Add(-2 * time.Minute),
},
},
},
},
}
// Validates all test cases here.
for i, testCase := range testCases {
nsLR := getLongLivedLocks(testCase.lockMap, testCase.lockInterval)
if !reflect.DeepEqual(testCase.expectedNSLR, nsLR) {
t.Errorf("Test %d: Expected %#v, got %#v", i+1, testCase.expectedNSLR, nsLR)
}
}
}

@ -17,16 +17,13 @@
package cmd package cmd
import ( import (
"context"
"errors" "errors"
"math/rand"
"net/http" "net/http"
"path"
"time" "time"
"github.com/gorilla/mux" "github.com/gorilla/mux"
"github.com/minio/dsync/v2" "github.com/minio/minio/pkg/dsync"
"github.com/minio/minio/cmd/logger"
xnet "github.com/minio/minio/pkg/net"
) )
const ( const (
@ -58,11 +55,9 @@ func (l *lockRESTServer) IsValid(w http.ResponseWriter, r *http.Request) bool {
func getLockArgs(r *http.Request) dsync.LockArgs { func getLockArgs(r *http.Request) dsync.LockArgs {
return dsync.LockArgs{ return dsync.LockArgs{
UID: r.URL.Query().Get(lockRESTUID), UID: r.URL.Query().Get(lockRESTUID),
Source: r.URL.Query().Get(lockRESTSource), Source: r.URL.Query().Get(lockRESTSource),
Resource: r.URL.Query().Get(lockRESTResource), Resource: r.URL.Query().Get(lockRESTResource),
ServerAddr: r.URL.Query().Get(lockRESTServerAddr),
ServiceEndpoint: r.URL.Query().Get(lockRESTServerEndpoint),
} }
} }
@ -132,130 +127,28 @@ func (l *lockRESTServer) RUnlockHandler(w http.ResponseWriter, r *http.Request)
} }
} }
// ForceUnlockHandler - force releases the acquired lock. // registerLockRESTHandlers - register lock rest router.
func (l *lockRESTServer) ForceUnlockHandler(w http.ResponseWriter, r *http.Request) { func registerLockRESTHandlers(router *mux.Router, endpoints EndpointList) {
if !l.IsValid(w, r) { queries := restQueries(lockRESTUID, lockRESTSource, lockRESTResource)
l.writeErrorResponse(w, errors.New("Invalid request")) for _, endpoint := range endpoints {
return if !endpoint.IsLocal {
}
// Ignore the ForceUnlock() "reply" return value because if err == nil, "reply" is always true
// Consequently, if err != nil, reply is always false
if _, err := l.ll.ForceUnlock(getLockArgs(r)); err != nil {
l.writeErrorResponse(w, err)
return
}
}
// ExpiredHandler - query expired lock status.
func (l *lockRESTServer) ExpiredHandler(w http.ResponseWriter, r *http.Request) {
if !l.IsValid(w, r) {
l.writeErrorResponse(w, errors.New("Invalid request"))
return
}
lockArgs := getLockArgs(r)
l.ll.mutex.Lock()
defer l.ll.mutex.Unlock()
// Lock found, proceed to verify if belongs to given uid.
if lri, ok := l.ll.lockMap[lockArgs.Resource]; ok {
// Check whether uid is still active
for _, entry := range lri {
if entry.UID == lockArgs.UID {
l.writeErrorResponse(w, errLockNotExpired)
return
}
}
}
}
// lockMaintenance loops over locks that have been active for some time and checks back
// with the original server whether it is still alive or not
//
// Following logic inside ignores the errors generated for Dsync.Active operation.
// - server at client down
// - some network error (and server is up normally)
//
// We will ignore the error, and we will retry later to get a resolve on this lock
func (l *lockRESTServer) lockMaintenance(interval time.Duration) {
l.ll.mutex.Lock()
// Get list of long lived locks to check for staleness.
nlripLongLived := getLongLivedLocks(l.ll.lockMap, interval)
l.ll.mutex.Unlock()
// Validate if long lived locks are indeed clean.
for _, nlrip := range nlripLongLived {
// Initialize client based on the long live locks.
host, err := xnet.ParseHost(nlrip.lri.Node)
if err != nil {
logger.LogIf(context.Background(), err)
continue
}
c := newlockRESTClient(host)
if !c.IsOnline() {
continue
}
// Call back to original server verify whether the lock is still active (based on name & uid)
expired, err := c.Expired(dsync.LockArgs{
UID: nlrip.lri.UID,
Resource: nlrip.name,
})
if err != nil {
continue continue
} }
// For successful response, verify if lock was indeed active or stale. lockServer := &lockRESTServer{
if expired { ll: newLocker(endpoint),
// The lock is no longer active at server that originated
// the lock, attempt to remove the lock.
l.ll.mutex.Lock()
l.ll.removeEntryIfExists(nlrip) // Purge the stale entry if it exists.
l.ll.mutex.Unlock()
} }
// Close the connection regardless of the call response. subrouter := router.PathPrefix(path.Join(lockRESTPrefix, endpoint.Path)).Subrouter()
c.Close() subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodLock).HandlerFunc(httpTraceHdrs(lockServer.LockHandler)).Queries(queries...)
} subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodRLock).HandlerFunc(httpTraceHdrs(lockServer.RLockHandler)).Queries(queries...)
} subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodUnlock).HandlerFunc(httpTraceHdrs(lockServer.UnlockHandler)).Queries(queries...)
subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodRUnlock).HandlerFunc(httpTraceHdrs(lockServer.RUnlockHandler)).Queries(queries...)
// Start lock maintenance from all lock servers. globalLockServers[endpoint] = lockServer.ll
func startLockMaintenance(lkSrv *lockRESTServer) {
// Initialize a new ticker with a minute between each ticks.
ticker := time.NewTicker(lockMaintenanceInterval)
// Stop the timer upon service closure and cleanup the go-routine.
defer ticker.Stop()
// Start with random sleep time, so as to avoid "synchronous checks" between servers
time.Sleep(time.Duration(rand.Float64() * float64(lockMaintenanceInterval)))
for {
// Verifies every minute for locks held more than 2 minutes.
select {
case <-GlobalServiceDoneCh:
return
case <-ticker.C:
lkSrv.lockMaintenance(lockValidityCheckInterval)
}
} }
}
// registerLockRESTHandlers - register lock rest router.
func registerLockRESTHandlers(router *mux.Router) {
subrouter := router.PathPrefix(lockRESTPrefix).Subrouter()
queries := restQueries(lockRESTUID, lockRESTSource, lockRESTResource, lockRESTServerAddr, lockRESTServerEndpoint)
subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodLock).HandlerFunc(httpTraceHdrs(globalLockServer.LockHandler)).Queries(queries...)
subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodRLock).HandlerFunc(httpTraceHdrs(globalLockServer.RLockHandler)).Queries(queries...)
subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodUnlock).HandlerFunc(httpTraceHdrs(globalLockServer.UnlockHandler)).Queries(queries...)
subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodRUnlock).HandlerFunc(httpTraceHdrs(globalLockServer.RUnlockHandler)).Queries(queries...)
subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodForceUnlock).HandlerFunc(httpTraceHdrs(globalLockServer.ForceUnlockHandler)).Queries(queries...)
subrouter.Methods(http.MethodPost).Path(lockRESTVersionPrefix + lockRESTMethodExpired).HandlerFunc(httpTraceAll(globalLockServer.ExpiredHandler)).Queries(queries...)
// If none of the routes match add default error handler routes // If none of the routes match add default error handler routes
router.NotFoundHandler = http.HandlerFunc(httpTraceAll(errorResponseHandler)) router.NotFoundHandler = http.HandlerFunc(httpTraceAll(errorResponseHandler))
router.MethodNotAllowedHandler = http.HandlerFunc(httpTraceAll(errorResponseHandler)) router.MethodNotAllowedHandler = http.HandlerFunc(httpTraceAll(errorResponseHandler))
// Start lock maintenance from all lock servers.
go startLockMaintenance(globalLockServer)
} }

@ -27,21 +27,13 @@ import (
"fmt" "fmt"
"time" "time"
"github.com/minio/dsync/v2"
"github.com/minio/lsync" "github.com/minio/lsync"
"github.com/minio/minio-go/v6/pkg/set"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
xnet "github.com/minio/minio/pkg/net" "github.com/minio/minio/pkg/dsync"
) )
// Global name space lock. // local lock servers
var globalNSMutex *nsLockMap var globalLockServers = make(map[Endpoint]*localLocker)
// Global lock server one per server.
var globalLockServer *lockRESTServer
// Instance of dsync for distributed clients.
var globalDsync *dsync.Dsync
// RWLocker - locker interface to introduce GetRLock, RUnlock. // RWLocker - locker interface to introduce GetRLock, RUnlock.
type RWLocker interface { type RWLocker interface {
@ -51,45 +43,6 @@ type RWLocker interface {
RUnlock() RUnlock()
} }
// Initialize distributed locking only in case of distributed setup.
// Returns lock clients and the node index for the current server.
func newDsyncNodes(endpoints EndpointList) (clnts []dsync.NetLocker, myNode int, err error) {
myNode = -1
seenHosts := set.NewStringSet()
for _, endpoint := range endpoints {
if seenHosts.Contains(endpoint.Host) {
continue
}
seenHosts.Add(endpoint.Host)
var locker dsync.NetLocker
if endpoint.IsLocal {
myNode = len(clnts)
globalLockServer = &lockRESTServer{
ll: &localLocker{
serverAddr: endpoint.Host,
serviceEndpoint: lockRESTPrefix,
lockMap: make(map[string][]lockRequesterInfo),
},
}
locker = globalLockServer.ll
} else {
var host *xnet.Host
host, err = xnet.ParseHost(endpoint.Host)
locker = newlockRESTClient(host)
}
clnts = append(clnts, locker)
}
if myNode == -1 {
return clnts, myNode, errors.New("no endpoint pointing to the local machine is found")
}
return clnts, myNode, err
}
// newNSLock - return a new name space lock map. // newNSLock - return a new name space lock map.
func newNSLock(isDistXL bool) *nsLockMap { func newNSLock(isDistXL bool) *nsLockMap {
nsMutex := nsLockMap{ nsMutex := nsLockMap{
@ -102,11 +55,6 @@ func newNSLock(isDistXL bool) *nsLockMap {
return &nsMutex return &nsMutex
} }
// initNSLock - initialize name space lock map.
func initNSLock(isDistXL bool) {
globalNSMutex = newNSLock(isDistXL)
}
// nsParam - carries name space resource. // nsParam - carries name space resource.
type nsParam struct { type nsParam struct {
volume string volume string
@ -224,32 +172,6 @@ func (n *nsLockMap) RUnlock(volume, path, opsID string) {
n.unlock(volume, path, readLock) n.unlock(volume, path, readLock)
} }
// ForceUnlock - forcefully unlock a lock based on name.
func (n *nsLockMap) ForceUnlock(volume, path string) {
n.lockMapMutex.Lock()
defer n.lockMapMutex.Unlock()
// Clarification on operation:
// - In case of FS or XL we call ForceUnlock on the local globalNSMutex
// (since there is only a single server) which will cause the 'stuck'
// mutex to be removed from the map. Existing operations for this
// will continue to be blocked (and timeout). New operations on this
// resource will use a new mutex and proceed normally.
//
// - In case of Distributed setup (using dsync), there is no need to call
// ForceUnlock on the server where the lock was acquired and is presumably
// 'stuck'. Instead dsync.ForceUnlock() will release the underlying locks
// that participated in granting the lock. Any pending dsync locks that
// are blocking can now proceed as normal and any new locks will also
// participate normally.
if n.isDistXL { // For distributed mode, broadcast ForceUnlock message.
dsync.NewDRWMutex(context.Background(), pathJoin(volume, path), globalDsync).ForceUnlock()
}
// Remove lock from the map.
delete(n.lockMap, nsParam{volume, path})
}
// dsync's distributed lock instance. // dsync's distributed lock instance.
type distLockInstance struct { type distLockInstance struct {
rwMutex *dsync.DRWMutex rwMutex *dsync.DRWMutex
@ -301,10 +223,14 @@ type localLockInstance struct {
// NewNSLock - returns a lock instance for a given volume and // NewNSLock - returns a lock instance for a given volume and
// path. The returned lockInstance object encapsulates the nsLockMap, // path. The returned lockInstance object encapsulates the nsLockMap,
// volume, path and operation ID. // volume, path and operation ID.
func (n *nsLockMap) NewNSLock(ctx context.Context, volume, path string) RWLocker { func (n *nsLockMap) NewNSLock(ctx context.Context, lockers []dsync.NetLocker, volume, path string) RWLocker {
opsID := mustGetUUID() opsID := mustGetUUID()
if n.isDistXL { if n.isDistXL {
return &distLockInstance{dsync.NewDRWMutex(ctx, pathJoin(volume, path), globalDsync), volume, path, opsID} sync, err := dsync.New(lockers)
if err != nil {
logger.CriticalIf(ctx, err)
}
return &distLockInstance{dsync.NewDRWMutex(ctx, pathJoin(volume, path), sync), volume, path, opsID}
} }
return &localLockInstance{ctx, n, volume, path, opsID} return &localLockInstance{ctx, n, volume, path, opsID}
} }

@ -17,7 +17,6 @@
package cmd package cmd
import ( import (
"context"
"testing" "testing"
"time" "time"
) )
@ -32,7 +31,7 @@ func TestGetSource(t *testing.T) {
currentSource := func() string { return getSource() } currentSource := func() string { return getSource() }
gotSource := currentSource() gotSource := currentSource()
// Hard coded line number, 32, in the "expectedSource" value // Hard coded line number, 32, in the "expectedSource" value
expectedSource := "[namespace-lock_test.go:33:TestGetSource()]" expectedSource := "[namespace-lock_test.go:32:TestGetSource()]"
if gotSource != expectedSource { if gotSource != expectedSource {
t.Errorf("expected : %s, got : %s", expectedSource, gotSource) t.Errorf("expected : %s, got : %s", expectedSource, gotSource)
} }
@ -41,7 +40,8 @@ func TestGetSource(t *testing.T) {
// Tests functionality provided by namespace lock. // Tests functionality provided by namespace lock.
func TestNamespaceLockTest(t *testing.T) { func TestNamespaceLockTest(t *testing.T) {
isDistXL := false isDistXL := false
initNSLock(isDistXL) nsMutex := newNSLock(isDistXL)
// List of test cases. // List of test cases.
testCases := []struct { testCases := []struct {
lk func(s1, s2, s3 string, t time.Duration) bool lk func(s1, s2, s3 string, t time.Duration) bool
@ -53,22 +53,22 @@ func TestNamespaceLockTest(t *testing.T) {
shouldPass bool shouldPass bool
}{ }{
{ {
lk: globalNSMutex.Lock, lk: nsMutex.Lock,
unlk: globalNSMutex.Unlock, unlk: nsMutex.Unlock,
lockedRefCount: 1, lockedRefCount: 1,
unlockedRefCount: 0, unlockedRefCount: 0,
shouldPass: true, shouldPass: true,
}, },
{ {
rlk: globalNSMutex.RLock, rlk: nsMutex.RLock,
runlk: globalNSMutex.RUnlock, runlk: nsMutex.RUnlock,
lockedRefCount: 4, lockedRefCount: 4,
unlockedRefCount: 2, unlockedRefCount: 2,
shouldPass: true, shouldPass: true,
}, },
{ {
rlk: globalNSMutex.RLock, rlk: nsMutex.RLock,
runlk: globalNSMutex.RUnlock, runlk: nsMutex.RUnlock,
lockedRefCount: 1, lockedRefCount: 1,
unlockedRefCount: 0, unlockedRefCount: 0,
shouldPass: true, shouldPass: true,
@ -82,7 +82,7 @@ func TestNamespaceLockTest(t *testing.T) {
if !testCase.lk("a", "b", "c", 60*time.Second) { // lock once. if !testCase.lk("a", "b", "c", 60*time.Second) { // lock once.
t.Fatalf("Failed to acquire lock") t.Fatalf("Failed to acquire lock")
} }
nsLk, ok := globalNSMutex.lockMap[nsParam{"a", "b"}] nsLk, ok := nsMutex.lockMap[nsParam{"a", "b"}]
if !ok && testCase.shouldPass { if !ok && testCase.shouldPass {
t.Errorf("Lock in map missing.") t.Errorf("Lock in map missing.")
} }
@ -94,7 +94,7 @@ func TestNamespaceLockTest(t *testing.T) {
if testCase.unlockedRefCount != nsLk.ref && testCase.shouldPass { if testCase.unlockedRefCount != nsLk.ref && testCase.shouldPass {
t.Errorf("Test %d fails, expected to pass. Wanted ref count is %d, got %d", 1, testCase.unlockedRefCount, nsLk.ref) t.Errorf("Test %d fails, expected to pass. Wanted ref count is %d, got %d", 1, testCase.unlockedRefCount, nsLk.ref)
} }
_, ok = globalNSMutex.lockMap[nsParam{"a", "b"}] _, ok = nsMutex.lockMap[nsParam{"a", "b"}]
if ok && !testCase.shouldPass { if ok && !testCase.shouldPass {
t.Errorf("Lock map found after unlock.") t.Errorf("Lock map found after unlock.")
} }
@ -113,7 +113,7 @@ func TestNamespaceLockTest(t *testing.T) {
if !testCase.rlk("a", "b", "c", 60*time.Second) { // lock fourth time. if !testCase.rlk("a", "b", "c", 60*time.Second) { // lock fourth time.
t.Fatalf("Failed to acquire fourth read lock") t.Fatalf("Failed to acquire fourth read lock")
} }
nsLk, ok = globalNSMutex.lockMap[nsParam{"a", "b"}] nsLk, ok = nsMutex.lockMap[nsParam{"a", "b"}]
if !ok && testCase.shouldPass { if !ok && testCase.shouldPass {
t.Errorf("Lock in map missing.") t.Errorf("Lock in map missing.")
} }
@ -127,7 +127,7 @@ func TestNamespaceLockTest(t *testing.T) {
if testCase.unlockedRefCount != nsLk.ref && testCase.shouldPass { if testCase.unlockedRefCount != nsLk.ref && testCase.shouldPass {
t.Errorf("Test %d fails, expected to pass. Wanted ref count is %d, got %d", 2, testCase.unlockedRefCount, nsLk.ref) t.Errorf("Test %d fails, expected to pass. Wanted ref count is %d, got %d", 2, testCase.unlockedRefCount, nsLk.ref)
} }
_, ok = globalNSMutex.lockMap[nsParam{"a", "b"}] _, ok = nsMutex.lockMap[nsParam{"a", "b"}]
if !ok && testCase.shouldPass { if !ok && testCase.shouldPass {
t.Errorf("Lock map not found.") t.Errorf("Lock map not found.")
} }
@ -138,7 +138,7 @@ func TestNamespaceLockTest(t *testing.T) {
t.Fatalf("Failed to acquire read lock") t.Fatalf("Failed to acquire read lock")
} }
nsLk, ok = globalNSMutex.lockMap[nsParam{"a", "c"}] nsLk, ok = nsMutex.lockMap[nsParam{"a", "c"}]
if !ok && testCase.shouldPass { if !ok && testCase.shouldPass {
t.Errorf("Lock in map missing.") t.Errorf("Lock in map missing.")
} }
@ -150,7 +150,7 @@ func TestNamespaceLockTest(t *testing.T) {
if testCase.unlockedRefCount != nsLk.ref && testCase.shouldPass { if testCase.unlockedRefCount != nsLk.ref && testCase.shouldPass {
t.Errorf("Test %d fails, expected to pass. Wanted ref count is %d, got %d", 3, testCase.unlockedRefCount, nsLk.ref) t.Errorf("Test %d fails, expected to pass. Wanted ref count is %d, got %d", 3, testCase.unlockedRefCount, nsLk.ref)
} }
_, ok = globalNSMutex.lockMap[nsParam{"a", "c"}] _, ok = nsMutex.lockMap[nsParam{"a", "c"}]
if ok && !testCase.shouldPass { if ok && !testCase.shouldPass {
t.Errorf("Lock map not found.") t.Errorf("Lock map not found.")
} }
@ -158,83 +158,44 @@ func TestNamespaceLockTest(t *testing.T) {
func TestNamespaceLockTimedOut(t *testing.T) { func TestNamespaceLockTimedOut(t *testing.T) {
isDistXL := false isDistXL := false
initNSLock(isDistXL) nsMutex := newNSLock(isDistXL)
// Get write lock // Get write lock
if !globalNSMutex.Lock("my-bucket", "my-object", "abc", 60*time.Second) { if !nsMutex.Lock("my-bucket", "my-object", "abc", 60*time.Second) {
t.Fatalf("Failed to acquire lock") t.Fatalf("Failed to acquire lock")
} }
// Second attempt for write lock on same resource should time out // Second attempt for write lock on same resource should time out
locked := globalNSMutex.Lock("my-bucket", "my-object", "def", 1*time.Second) locked := nsMutex.Lock("my-bucket", "my-object", "def", 1*time.Second)
if locked { if locked {
t.Fatalf("Should not have acquired lock") t.Fatalf("Should not have acquired lock")
} }
// Read lock on same resource should also time out // Read lock on same resource should also time out
locked = globalNSMutex.RLock("my-bucket", "my-object", "def", 1*time.Second) locked = nsMutex.RLock("my-bucket", "my-object", "def", 1*time.Second)
if locked { if locked {
t.Fatalf("Should not have acquired read lock while write lock is active") t.Fatalf("Should not have acquired read lock while write lock is active")
} }
// Release write lock // Release write lock
globalNSMutex.Unlock("my-bucket", "my-object", "abc") nsMutex.Unlock("my-bucket", "my-object", "abc")
// Get read lock // Get read lock
if !globalNSMutex.RLock("my-bucket", "my-object", "ghi", 60*time.Second) { if !nsMutex.RLock("my-bucket", "my-object", "ghi", 60*time.Second) {
t.Fatalf("Failed to acquire read lock") t.Fatalf("Failed to acquire read lock")
} }
// Write lock on same resource should time out // Write lock on same resource should time out
locked = globalNSMutex.Lock("my-bucket", "my-object", "klm", 1*time.Second) locked = nsMutex.Lock("my-bucket", "my-object", "klm", 1*time.Second)
if locked { if locked {
t.Fatalf("Should not have acquired lock") t.Fatalf("Should not have acquired lock")
} }
// 2nd read lock should be just fine // 2nd read lock should be just fine
if !globalNSMutex.RLock("my-bucket", "my-object", "nop", 60*time.Second) { if !nsMutex.RLock("my-bucket", "my-object", "nop", 60*time.Second) {
t.Fatalf("Failed to acquire second read lock") t.Fatalf("Failed to acquire second read lock")
} }
// Release both read locks // Release both read locks
globalNSMutex.RUnlock("my-bucket", "my-object", "ghi") nsMutex.RUnlock("my-bucket", "my-object", "ghi")
globalNSMutex.RUnlock("my-bucket", "my-object", "nop") nsMutex.RUnlock("my-bucket", "my-object", "nop")
}
// Tests functionality to forcefully unlock locks.
func TestNamespaceForceUnlockTest(t *testing.T) {
isDistXL := false
initNSLock(isDistXL)
// Create lock.
lock := globalNSMutex.NewNSLock(context.Background(), "bucket", "object")
if lock.GetLock(newDynamicTimeout(60*time.Second, time.Second)) != nil {
t.Fatalf("Failed to get lock")
}
// Forcefully unlock lock.
globalNSMutex.ForceUnlock("bucket", "object")
ch := make(chan struct{}, 1)
go func() {
// Try to claim lock again.
anotherLock := globalNSMutex.NewNSLock(context.Background(), "bucket", "object")
if anotherLock.GetLock(newDynamicTimeout(60*time.Second, time.Second)) != nil {
t.Errorf("Failed to get lock")
return
}
// And signal success.
ch <- struct{}{}
}()
select {
case <-ch:
// Signaled so all is fine.
break
case <-time.After(100 * time.Millisecond):
// In case we hit the time out, the lock has not been cleared.
t.Errorf("Lock not cleared.")
}
// Clean up lock.
globalNSMutex.ForceUnlock("bucket", "object")
} }

@ -466,7 +466,6 @@ func (sys *NotificationSys) ServerInfo(ctx context.Context) []ServerInfo {
// GetLocks - makes GetLocks RPC call on all peers. // GetLocks - makes GetLocks RPC call on all peers.
func (sys *NotificationSys) GetLocks(ctx context.Context) []*PeerLocks { func (sys *NotificationSys) GetLocks(ctx context.Context) []*PeerLocks {
locksResp := make([]*PeerLocks, len(sys.peerClients)) locksResp := make([]*PeerLocks, len(sys.peerClients))
g := errgroup.WithNErrs(len(sys.peerClients)) g := errgroup.WithNErrs(len(sys.peerClients))
for index, client := range sys.peerClients { for index, client := range sys.peerClients {
@ -703,7 +702,7 @@ func (sys *NotificationSys) initListeners(ctx context.Context, objAPI ObjectLaye
// As object layer's GetObject() and PutObject() take respective lock on minioMetaBucket // As object layer's GetObject() and PutObject() take respective lock on minioMetaBucket
// and configFile, take a transaction lock to avoid data race between readConfig() // and configFile, take a transaction lock to avoid data race between readConfig()
// and saveConfig(). // and saveConfig().
objLock := globalNSMutex.NewNSLock(ctx, minioMetaBucket, transactionConfigFile) objLock := objAPI.NewNSLock(ctx, minioMetaBucket, transactionConfigFile)
if err := objLock.GetRLock(globalOperationTimeout); err != nil { if err := objLock.GetRLock(globalOperationTimeout); err != nil {
return err return err
} }
@ -1344,7 +1343,7 @@ func SaveListener(objAPI ObjectLayer, bucketName string, eventNames []event.Name
// As object layer's GetObject() and PutObject() take respective lock on minioMetaBucket // As object layer's GetObject() and PutObject() take respective lock on minioMetaBucket
// and configFile, take a transaction lock to avoid data race between readConfig() // and configFile, take a transaction lock to avoid data race between readConfig()
// and saveConfig(). // and saveConfig().
objLock := globalNSMutex.NewNSLock(ctx, minioMetaBucket, transactionConfigFile) objLock := objAPI.NewNSLock(ctx, minioMetaBucket, transactionConfigFile)
if err := objLock.GetLock(globalOperationTimeout); err != nil { if err := objLock.GetLock(globalOperationTimeout); err != nil {
return err return err
} }
@ -1395,7 +1394,7 @@ func RemoveListener(objAPI ObjectLayer, bucketName string, targetID event.Target
// As object layer's GetObject() and PutObject() take respective lock on minioMetaBucket // As object layer's GetObject() and PutObject() take respective lock on minioMetaBucket
// and configFile, take a transaction lock to avoid data race between readConfig() // and configFile, take a transaction lock to avoid data race between readConfig()
// and saveConfig(). // and saveConfig().
objLock := globalNSMutex.NewNSLock(ctx, minioMetaBucket, transactionConfigFile) objLock := objAPI.NewNSLock(ctx, minioMetaBucket, transactionConfigFile)
if err := objLock.GetLock(globalOperationTimeout); err != nil { if err := objLock.GetLock(globalOperationTimeout); err != nil {
return err return err
} }

@ -106,7 +106,7 @@ func newStorageAPI(endpoint Endpoint) (storage StorageAPI, err error) {
return &posixDiskIDCheck{storage: storage}, nil return &posixDiskIDCheck{storage: storage}, nil
} }
return newStorageRESTClient(endpoint) return newStorageRESTClient(endpoint), nil
} }
// Cleanup a directory recursively. // Cleanup a directory recursively.

@ -48,6 +48,9 @@ const (
// ObjectLayer implements primitives for object API layer. // ObjectLayer implements primitives for object API layer.
type ObjectLayer interface { type ObjectLayer interface {
// Locking operations on object.
NewNSLock(ctx context.Context, bucket string, object string) RWLocker
// Storage operations. // Storage operations.
Shutdown(context.Context) error Shutdown(context.Context) error
StorageInfo(context.Context) StorageInfo StorageInfo(context.Context) StorageInfo

@ -100,7 +100,7 @@ func (client *peerRESTClient) Close() error {
} }
// GetLocksResp stores various info from the client for each lock that is requested. // GetLocksResp stores various info from the client for each lock that is requested.
type GetLocksResp map[string][]lockRequesterInfo type GetLocksResp []map[string][]lockRequesterInfo
// NetReadPerfInfo - fetch network read performance information for a remote node. // NetReadPerfInfo - fetch network read performance information for a remote node.
func (client *peerRESTClient) NetReadPerfInfo(size int64) (info ServerNetReadPerfInfo, err error) { func (client *peerRESTClient) NetReadPerfInfo(size int64) (info ServerNetReadPerfInfo, err error) {
@ -759,10 +759,10 @@ func newPeerRESTClient(peer *xnet.Host) (*peerRESTClient, error) {
} }
} }
restClient, err := rest.NewClient(serverURL, tlsConfig, rest.DefaultRESTTimeout, newAuthToken) trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout, rest.DefaultRESTTimeout)
restClient, err := rest.NewClient(serverURL, trFn, newAuthToken)
if err != nil { if err != nil {
return &peerRESTClient{host: peer, restClient: restClient, connected: 0}, err return nil, err
} }
return &peerRESTClient{host: peer, restClient: restClient, connected: 1}, nil return &peerRESTClient{host: peer, restClient: restClient, connected: 1}, nil

@ -148,8 +148,12 @@ func (s *peerRESTServer) GetLocksHandler(w http.ResponseWriter, r *http.Request)
} }
ctx := newContext(r, w, "GetLocks") ctx := newContext(r, w, "GetLocks")
locks := globalLockServer.ll.DupLockMap()
logger.LogIf(ctx, gob.NewEncoder(w).Encode(locks)) var llockers []map[string][]lockRequesterInfo
for _, llocker := range globalLockServers {
llockers = append(llockers, llocker.DupLockMap())
}
logger.LogIf(ctx, gob.NewEncoder(w).Encode(llockers))
w.(http.Flusher).Flush() w.(http.Flusher).Flush()

@ -268,14 +268,18 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints EndpointLi
} }
if format.ID == "" { if format.ID == "" {
if err = formatXLFixDeploymentID(context.Background(), endpoints, storageDisks, format); err != nil { // Not a first disk, wait until first disk fixes deploymentID
if !firstDisk {
return nil, errNotFirstDisk
}
if err = formatXLFixDeploymentID(endpoints, storageDisks, format); err != nil {
return nil, err return nil, err
} }
} }
globalDeploymentID = format.ID globalDeploymentID = format.ID
if err = formatXLFixLocalDeploymentID(context.Background(), endpoints, storageDisks, format); err != nil { if err = formatXLFixLocalDeploymentID(endpoints, storageDisks, format); err != nil {
return nil, err return nil, err
} }
return format, nil return format, nil

@ -18,11 +18,9 @@ package rest
import ( import (
"context" "context"
"crypto/tls"
"errors" "errors"
"io" "io"
"io/ioutil" "io/ioutil"
"net"
"net/http" "net/http"
"net/url" "net/url"
"time" "time"
@ -102,32 +100,11 @@ func (c *Client) Close() {
} }
} }
func newCustomDialContext(timeout time.Duration) func(ctx context.Context, network, addr string) (net.Conn, error) {
return func(ctx context.Context, network, addr string) (net.Conn, error) {
dialer := &net.Dialer{
Timeout: timeout,
KeepAlive: timeout,
DualStack: true,
}
return dialer.DialContext(ctx, network, addr)
}
}
// NewClient - returns new REST client. // NewClient - returns new REST client.
func NewClient(url *url.URL, tlsConfig *tls.Config, timeout time.Duration, newAuthToken func() string) (*Client, error) { func NewClient(url *url.URL, newCustomTransport func() *http.Transport, newAuthToken func() string) (*Client, error) {
// Transport is exactly same as Go default in https://golang.org/pkg/net/http/#RoundTripper // Transport is exactly same as Go default in https://golang.org/pkg/net/http/#RoundTripper
// except custom DialContext and TLSClientConfig. // except custom DialContext and TLSClientConfig.
tr := &http.Transport{ tr := newCustomTransport()
Proxy: http.ProxyFromEnvironment,
DialContext: newCustomDialContext(timeout),
MaxIdleConnsPerHost: 256,
IdleConnTimeout: 60 * time.Second,
TLSHandshakeTimeout: 30 * time.Second,
ExpectContinueTimeout: 10 * time.Second,
TLSClientConfig: tlsConfig,
DisableCompression: true,
}
return &Client{ return &Client{
httpClient: &http.Client{Transport: tr}, httpClient: &http.Client{Transport: tr},
httpIdleConnsCloser: tr.CloseIdleConnections, httpIdleConnsCloser: tr.CloseIdleConnections,

@ -31,7 +31,7 @@ func registerDistXLRouters(router *mux.Router, endpoints EndpointList) {
registerPeerRESTHandlers(router) registerPeerRESTHandlers(router)
// Register distributed namespace lock. // Register distributed namespace lock.
registerLockRESTHandlers(router) registerLockRESTHandlers(router, endpoints)
} }

@ -27,7 +27,6 @@ import (
"syscall" "syscall"
"github.com/minio/cli" "github.com/minio/cli"
"github.com/minio/dsync/v2"
"github.com/minio/minio/cmd/config" "github.com/minio/minio/cmd/config"
xhttp "github.com/minio/minio/cmd/http" xhttp "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
@ -144,12 +143,6 @@ func serverHandleCmdArgs(ctx *cli.Context) {
var setupType SetupType var setupType SetupType
var err error var err error
if len(ctx.Args()) > serverCommandLineArgsMax {
uErr := config.ErrInvalidErasureEndpoints(nil).Msg(fmt.Sprintf("Invalid total number of endpoints (%d) passed, supported upto 32 unique arguments",
len(ctx.Args())))
logger.FatalIf(uErr, "Unable to validate passed endpoints")
}
endpoints := strings.Fields(env.Get(config.EnvEndpoints, "")) endpoints := strings.Fields(env.Get(config.EnvEndpoints, ""))
if len(endpoints) > 0 { if len(endpoints) > 0 {
globalMinioAddr, globalEndpoints, setupType, globalXLSetCount, globalXLSetDriveCount, err = createServerEndpoints(globalCLIContext.Addr, endpoints...) globalMinioAddr, globalEndpoints, setupType, globalXLSetCount, globalXLSetDriveCount, err = createServerEndpoints(globalCLIContext.Addr, endpoints...)
@ -329,21 +322,6 @@ func serverMain(ctx *cli.Context) {
// Set system resources to maximum. // Set system resources to maximum.
logger.LogIf(context.Background(), setMaxResources()) logger.LogIf(context.Background(), setMaxResources())
// Set nodes for dsync for distributed setup.
if globalIsDistXL {
clnts, myNode, err := newDsyncNodes(globalEndpoints)
if err != nil {
logger.Fatal(err, "Unable to initialize distributed locking on %s", globalEndpoints)
}
globalDsync, err = dsync.New(clnts, myNode)
if err != nil {
logger.Fatal(err, "Unable to initialize distributed locking on %s", globalEndpoints)
}
}
// Initialize name space lock.
initNSLock(globalIsDistXL)
if globalIsXL { if globalIsXL {
// Init global heal state // Init global heal state
globalAllHealState = initHealState() globalAllHealState = initHealState()

@ -19,6 +19,7 @@ package cmd
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"context"
"crypto/tls" "crypto/tls"
"encoding/gob" "encoding/gob"
"encoding/hex" "encoding/hex"
@ -30,6 +31,7 @@ import (
"sync/atomic" "sync/atomic"
"github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/cmd/rest" "github.com/minio/minio/cmd/rest"
xnet "github.com/minio/minio/pkg/net" xnet "github.com/minio/minio/pkg/net"
) )
@ -446,19 +448,9 @@ func (client *storageRESTClient) Close() error {
} }
// Returns a storage rest client. // Returns a storage rest client.
func newStorageRESTClient(endpoint Endpoint) (*storageRESTClient, error) { func newStorageRESTClient(endpoint Endpoint) *storageRESTClient {
host, err := xnet.ParseHost(endpoint.Host)
if err != nil {
return nil, err
}
scheme := "http"
if globalIsSSL {
scheme = "https"
}
serverURL := &url.URL{ serverURL := &url.URL{
Scheme: scheme, Scheme: endpoint.Scheme,
Host: endpoint.Host, Host: endpoint.Host,
Path: path.Join(storageRESTPrefix, endpoint.Path, storageRESTVersion), Path: path.Join(storageRESTPrefix, endpoint.Path, storageRESTVersion),
} }
@ -466,16 +458,17 @@ func newStorageRESTClient(endpoint Endpoint) (*storageRESTClient, error) {
var tlsConfig *tls.Config var tlsConfig *tls.Config
if globalIsSSL { if globalIsSSL {
tlsConfig = &tls.Config{ tlsConfig = &tls.Config{
ServerName: host.Name, ServerName: endpoint.Hostname(),
RootCAs: globalRootCAs, RootCAs: globalRootCAs,
NextProtos: []string{"http/1.1"}, // Force http1.1 NextProtos: []string{"http/1.1"}, // Force http1.1
} }
} }
restClient, err := rest.NewClient(serverURL, tlsConfig, rest.DefaultRESTTimeout, newAuthToken) trFn := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout, rest.DefaultRESTTimeout)
restClient, err := rest.NewClient(serverURL, trFn, newAuthToken)
if err != nil { if err != nil {
return nil, err logger.LogIf(context.Background(), err)
return &storageRESTClient{endpoint: endpoint, restClient: restClient, connected: 0}
} }
client := &storageRESTClient{endpoint: endpoint, restClient: restClient, connected: 1} return &storageRESTClient{endpoint: endpoint, restClient: restClient, connected: 1}
return client, nil
} }

@ -514,11 +514,7 @@ func newStorageRESTHTTPServerClient(t *testing.T) (*httptest.Server, *storageRES
} }
registerStorageRESTHandlers(router, EndpointList{endpoint}) registerStorageRESTHandlers(router, EndpointList{endpoint})
restClient, err := newStorageRESTClient(endpoint) restClient := newStorageRESTClient(endpoint)
if err != nil {
t.Fatalf("newStorageRESTClient failed for %v, with error %s", endpoint, err)
}
prevGlobalServerConfig := globalServerConfig prevGlobalServerConfig := globalServerConfig
globalServerConfig = newServerConfig() globalServerConfig = newServerConfig()

@ -60,6 +60,7 @@ import (
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/auth" "github.com/minio/minio/pkg/auth"
"github.com/minio/minio/pkg/bpool" "github.com/minio/minio/pkg/bpool"
"github.com/minio/minio/pkg/dsync"
"github.com/minio/minio/pkg/hash" "github.com/minio/minio/pkg/hash"
"github.com/minio/minio/pkg/policy" "github.com/minio/minio/pkg/policy"
) )
@ -69,9 +70,6 @@ func init() {
// Set as non-distributed. // Set as non-distributed.
globalIsDistXL = false globalIsDistXL = false
// Initialize name space lock.
initNSLock(globalIsDistXL)
// Disable printing console messages during tests. // Disable printing console messages during tests.
color.Output = ioutil.Discard color.Output = ioutil.Discard
@ -451,13 +449,6 @@ func resetGlobalConfig() {
globalServerConfigMu.Unlock() globalServerConfigMu.Unlock()
} }
// reset global NSLock.
func resetGlobalNSLock() {
if globalNSMutex != nil {
globalNSMutex = nil
}
}
func resetGlobalEndpoints() { func resetGlobalEndpoints() {
globalEndpoints = EndpointList{} globalEndpoints = EndpointList{}
} }
@ -497,8 +488,6 @@ func resetTestGlobals() {
resetGlobalConfigPath() resetGlobalConfigPath()
// Reset Global server config. // Reset Global server config.
resetGlobalConfig() resetGlobalConfig()
// Reset global NSLock.
resetGlobalNSLock()
// Reset global endpoints. // Reset global endpoints.
resetGlobalEndpoints() resetGlobalEndpoints()
// Reset global isXL flag. // Reset global isXL flag.
@ -1629,6 +1618,9 @@ func newTestObjectLayer(endpoints EndpointList) (newObject ObjectLayer, err erro
xl.getDisks = func() []StorageAPI { xl.getDisks = func() []StorageAPI {
return xl.storageDisks return xl.storageDisks
} }
xl.getLockers = func() []dsync.NetLocker {
return nil
}
globalConfigSys = NewConfigSys() globalConfigSys = NewConfigSys()
@ -1918,9 +1910,6 @@ func ExecObjectLayerAPITest(t *testing.T, objAPITest objAPITestType, endpoints [
// this is to make sure that the tests are not affected by modified value. // this is to make sure that the tests are not affected by modified value.
resetTestGlobals() resetTestGlobals()
// initialize NSLock.
initNSLock(false)
objLayer, fsDir, err := prepareFS() objLayer, fsDir, err := prepareFS()
if err != nil { if err != nil {
t.Fatalf("Initialization of object layer failed for single node setup: %s", err) t.Fatalf("Initialization of object layer failed for single node setup: %s", err)

@ -332,25 +332,49 @@ func ToS3ETag(etag string) string {
return etag return etag
} }
type dialContext func(ctx context.Context, network, address string) (net.Conn, error)
func newCustomDialContext(dialTimeout, dialKeepAlive time.Duration) dialContext {
return func(ctx context.Context, network, addr string) (net.Conn, error) {
dialer := &net.Dialer{
Timeout: dialTimeout,
KeepAlive: dialKeepAlive,
DualStack: true,
}
return dialer.DialContext(ctx, network, addr)
}
}
func newCustomHTTPTransport(tlsConfig *tls.Config, dialTimeout, dialKeepAlive time.Duration) func() *http.Transport {
// For more details about various values used here refer
// https://golang.org/pkg/net/http/#Transport documentation
tr := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: newCustomDialContext(dialTimeout, dialKeepAlive),
MaxIdleConnsPerHost: 256,
IdleConnTimeout: 60 * time.Second,
TLSHandshakeTimeout: 30 * time.Second,
ExpectContinueTimeout: 10 * time.Second,
TLSClientConfig: tlsConfig,
// Go net/http automatically unzip if content-type is
// gzip disable this feature, as we are always interested
// in raw stream.
DisableCompression: true,
}
return func() *http.Transport {
return tr
}
}
// NewCustomHTTPTransport returns a new http configuration // NewCustomHTTPTransport returns a new http configuration
// used while communicating with the cloud backends. // used while communicating with the cloud backends.
// This sets the value for MaxIdleConnsPerHost from 2 (go default) // This sets the value for MaxIdleConnsPerHost from 2 (go default)
// to 100. // to 256.
func NewCustomHTTPTransport() *http.Transport { func NewCustomHTTPTransport() *http.Transport {
return &http.Transport{ return newCustomHTTPTransport(&tls.Config{
Proxy: http.ProxyFromEnvironment, RootCAs: globalRootCAs,
DialContext: (&net.Dialer{ }, defaultDialTimeout, defaultDialKeepAlive)()
Timeout: defaultDialTimeout,
KeepAlive: defaultDialKeepAlive,
}).DialContext,
MaxIdleConns: 1024,
MaxIdleConnsPerHost: 1024,
IdleConnTimeout: 30 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
TLSClientConfig: &tls.Config{RootCAs: globalRootCAs},
DisableCompression: true,
}
} }
// Load the json (typically from disk file). // Load the json (typically from disk file).

@ -29,6 +29,7 @@ import (
"github.com/minio/minio/cmd/config/storageclass" "github.com/minio/minio/cmd/config/storageclass"
xhttp "github.com/minio/minio/cmd/http" xhttp "github.com/minio/minio/cmd/http"
"github.com/minio/minio/pkg/bpool" "github.com/minio/minio/pkg/bpool"
"github.com/minio/minio/pkg/dsync"
"github.com/minio/minio/pkg/lifecycle" "github.com/minio/minio/pkg/lifecycle"
"github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/madmin"
"github.com/minio/minio/pkg/policy" "github.com/minio/minio/pkg/policy"
@ -38,6 +39,9 @@ import (
// setsStorageAPI is encapsulated type for Close() // setsStorageAPI is encapsulated type for Close()
type setsStorageAPI [][]StorageAPI type setsStorageAPI [][]StorageAPI
// setsDsyncLockers is encapsulated type for Close()
type setsDsyncLockers [][]dsync.NetLocker
func (s setsStorageAPI) Close() error { func (s setsStorageAPI) Close() error {
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
for j, disk := range s[i] { for j, disk := range s[i] {
@ -51,6 +55,18 @@ func (s setsStorageAPI) Close() error {
return nil return nil
} }
func (s setsDsyncLockers) Close() error {
for i := 0; i < len(s); i++ {
for _, locker := range s[i] {
if locker == nil {
continue
}
locker.Close()
}
}
return nil
}
// xlSets implements ObjectLayer combining a static list of erasure coded // xlSets implements ObjectLayer combining a static list of erasure coded
// object sets. NOTE: There is no dynamic scaling allowed or intended in // object sets. NOTE: There is no dynamic scaling allowed or intended in
// current design. // current design.
@ -66,6 +82,12 @@ type xlSets struct {
// Re-ordered list of disks per set. // Re-ordered list of disks per set.
xlDisks setsStorageAPI xlDisks setsStorageAPI
// Distributed locker clients.
xlLockers setsDsyncLockers
// Lockers map holds dsync lockers for each endpoint
lockersMap map[Endpoint]dsync.NetLocker
// List of endpoints provided on the command line. // List of endpoints provided on the command line.
endpoints EndpointList endpoints EndpointList
@ -101,7 +123,11 @@ func (s *xlSets) isConnected(endpoint Endpoint) bool {
if s.xlDisks[i][j].String() != endpointStr { if s.xlDisks[i][j].String() != endpointStr {
continue continue
} }
return s.xlDisks[i][j].IsOnline() if s.xlDisks[i][j].IsOnline() {
return true
}
s.xlLockers[i][j].Close()
return false
} }
} }
return false return false
@ -147,10 +173,9 @@ func findDiskIndex(refFormat, format *formatXLV3) (int, int, error) {
return -1, -1, fmt.Errorf("diskID: %s not found", format.XL.This) return -1, -1, fmt.Errorf("diskID: %s not found", format.XL.This)
} }
// connectDisksWithQuorum is same as connectDisks but waits // connectDisksAndLockersWithQuorum is same as connectDisksAndLockers but waits
// for quorum number of formatted disks to be online in // for quorum number of formatted disks to be online in any given sets.
// any given sets. func (s *xlSets) connectDisksAndLockersWithQuorum() {
func (s *xlSets) connectDisksWithQuorum() {
var onlineDisks int var onlineDisks int
for onlineDisks < len(s.endpoints)/2 { for onlineDisks < len(s.endpoints)/2 {
for _, endpoint := range s.endpoints { for _, endpoint := range s.endpoints {
@ -171,6 +196,7 @@ func (s *xlSets) connectDisksWithQuorum() {
} }
disk.SetDiskID(format.XL.This) disk.SetDiskID(format.XL.This)
s.xlDisks[i][j] = disk s.xlDisks[i][j] = disk
s.xlLockers[i][j] = s.lockersMap[endpoint]
onlineDisks++ onlineDisks++
} }
// Sleep for a while - so that we don't go into // Sleep for a while - so that we don't go into
@ -179,9 +205,9 @@ func (s *xlSets) connectDisksWithQuorum() {
} }
} }
// connectDisks - attempt to connect all the endpoints, loads format // connectDisksAndLockers - attempt to connect all the endpoints, loads format
// and re-arranges the disks in proper position. // and re-arranges the disks in proper position.
func (s *xlSets) connectDisks() { func (s *xlSets) connectDisksAndLockers() {
for _, endpoint := range s.endpoints { for _, endpoint := range s.endpoints {
if s.isConnected(endpoint) { if s.isConnected(endpoint) {
continue continue
@ -201,6 +227,7 @@ func (s *xlSets) connectDisks() {
disk.SetDiskID(format.XL.This) disk.SetDiskID(format.XL.This)
s.xlDisksMu.Lock() s.xlDisksMu.Lock()
s.xlDisks[i][j] = disk s.xlDisks[i][j] = disk
s.xlLockers[i][j] = s.lockersMap[endpoint]
s.xlDisksMu.Unlock() s.xlDisksMu.Unlock()
} }
} }
@ -220,8 +247,24 @@ func (s *xlSets) monitorAndConnectEndpoints(monitorInterval time.Duration) {
case <-s.disksConnectDoneCh: case <-s.disksConnectDoneCh:
return return
case <-ticker.C: case <-ticker.C:
s.connectDisks() s.connectDisksAndLockers()
}
}
}
func (s *xlSets) GetLockers(setIndex int) func() []dsync.NetLocker {
return func() []dsync.NetLocker {
s.xlDisksMu.Lock()
defer s.xlDisksMu.Unlock()
lockers := make([]dsync.NetLocker, s.drivesPerSet)
copy(lockers, s.xlLockers[setIndex])
for i, lk := range lockers {
// Add error lockers for unavailable locker.
if lk == nil {
lockers[i] = &errorLocker{}
}
} }
return lockers
} }
} }
@ -241,10 +284,17 @@ const defaultMonitorConnectEndpointInterval = time.Second * 10 // Set to 10 secs
// Initialize new set of erasure coded sets. // Initialize new set of erasure coded sets.
func newXLSets(endpoints EndpointList, format *formatXLV3, setCount int, drivesPerSet int) (ObjectLayer, error) { func newXLSets(endpoints EndpointList, format *formatXLV3, setCount int, drivesPerSet int) (ObjectLayer, error) {
lockersMap := make(map[Endpoint]dsync.NetLocker)
for _, endpoint := range endpoints {
lockersMap[endpoint] = newLockAPI(endpoint)
}
// Initialize the XL sets instance. // Initialize the XL sets instance.
s := &xlSets{ s := &xlSets{
sets: make([]*xlObjects, setCount), sets: make([]*xlObjects, setCount),
xlDisks: make([][]StorageAPI, setCount), xlDisks: make([][]StorageAPI, setCount),
xlLockers: make([][]dsync.NetLocker, setCount),
lockersMap: lockersMap,
endpoints: endpoints, endpoints: endpoints,
setCount: setCount, setCount: setCount,
drivesPerSet: drivesPerSet, drivesPerSet: drivesPerSet,
@ -262,18 +312,21 @@ func newXLSets(endpoints EndpointList, format *formatXLV3, setCount int, drivesP
for i := 0; i < len(format.XL.Sets); i++ { for i := 0; i < len(format.XL.Sets); i++ {
s.xlDisks[i] = make([]StorageAPI, drivesPerSet) s.xlDisks[i] = make([]StorageAPI, drivesPerSet)
s.xlLockers[i] = make([]dsync.NetLocker, drivesPerSet)
// Initialize xl objects for a given set. // Initialize xl objects for a given set.
s.sets[i] = &xlObjects{ s.sets[i] = &xlObjects{
getDisks: s.GetDisks(i), getDisks: s.GetDisks(i),
nsMutex: mutex, getLockers: s.GetLockers(i),
bp: bp, nsMutex: mutex,
bp: bp,
} }
go s.sets[i].cleanupStaleMultipartUploads(context.Background(), GlobalMultipartCleanupInterval, GlobalMultipartExpiry, GlobalServiceDoneCh) go s.sets[i].cleanupStaleMultipartUploads(context.Background(),
GlobalMultipartCleanupInterval, GlobalMultipartExpiry, GlobalServiceDoneCh)
} }
// Connect disks right away, but wait until we have `format.json` quorum. // Connect disks right away, but wait until we have `format.json` quorum.
s.connectDisksWithQuorum() s.connectDisksAndLockersWithQuorum()
// Start the disk monitoring and connect routine. // Start the disk monitoring and connect routine.
go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval) go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval)
@ -281,6 +334,11 @@ func newXLSets(endpoints EndpointList, format *formatXLV3, setCount int, drivesP
return s, nil return s, nil
} }
// NewNSLock - initialize a new namespace RWLocker instance.
func (s *xlSets) NewNSLock(ctx context.Context, bucket string, object string) RWLocker {
return s.getHashedSet(object).NewNSLock(ctx, bucket, object)
}
// StorageInfo - combines output of StorageInfo across all erasure coded object sets. // StorageInfo - combines output of StorageInfo across all erasure coded object sets.
func (s *xlSets) StorageInfo(ctx context.Context) StorageInfo { func (s *xlSets) StorageInfo(ctx context.Context) StorageInfo {
var storageInfo StorageInfo var storageInfo StorageInfo
@ -406,6 +464,13 @@ func (s *xlSets) Shutdown(ctx context.Context) error {
// even if one of the sets fail to create buckets, we proceed to undo a // even if one of the sets fail to create buckets, we proceed to undo a
// successful operation. // successful operation.
func (s *xlSets) MakeBucketWithLocation(ctx context.Context, bucket, location string) error { func (s *xlSets) MakeBucketWithLocation(ctx context.Context, bucket, location string) error {
set := s.getHashedSet(bucket)
bucketLock := set.nsMutex.NewNSLock(ctx, set.getLockers(), bucket, "")
if err := bucketLock.GetLock(globalOperationTimeout); err != nil {
return err
}
defer bucketLock.Unlock()
g := errgroup.WithNErrs(len(s.sets)) g := errgroup.WithNErrs(len(s.sets))
// Create buckets in parallel across all sets. // Create buckets in parallel across all sets.
@ -484,6 +549,13 @@ func (s *xlSets) getHashedSet(input string) (set *xlObjects) {
// GetBucketInfo - returns bucket info from one of the erasure coded set. // GetBucketInfo - returns bucket info from one of the erasure coded set.
func (s *xlSets) GetBucketInfo(ctx context.Context, bucket string) (bucketInfo BucketInfo, err error) { func (s *xlSets) GetBucketInfo(ctx context.Context, bucket string) (bucketInfo BucketInfo, err error) {
set := s.getHashedSet(bucket)
bucketLock := set.nsMutex.NewNSLock(ctx, set.getLockers(), bucket, "")
if err = bucketLock.GetRLock(globalOperationTimeout); err != nil {
return bucketInfo, err
}
defer bucketLock.RUnlock()
return s.getHashedSet(bucket).GetBucketInfo(ctx, bucket) return s.getHashedSet(bucket).GetBucketInfo(ctx, bucket)
} }
@ -563,6 +635,13 @@ func (s *xlSets) IsCompressionSupported() bool {
// even if one of the sets fail to delete buckets, we proceed to // even if one of the sets fail to delete buckets, we proceed to
// undo a successful operation. // undo a successful operation.
func (s *xlSets) DeleteBucket(ctx context.Context, bucket string) error { func (s *xlSets) DeleteBucket(ctx context.Context, bucket string) error {
set := s.getHashedSet(bucket)
bucketLock := set.nsMutex.NewNSLock(ctx, set.getLockers(), bucket, "")
if err := bucketLock.GetLock(globalOperationTimeout); err != nil {
return err
}
defer bucketLock.Unlock()
g := errgroup.WithNErrs(len(s.sets)) g := errgroup.WithNErrs(len(s.sets))
// Delete buckets in parallel across all sets. // Delete buckets in parallel across all sets.
@ -709,7 +788,7 @@ func (s *xlSets) CopyObject(ctx context.Context, srcBucket, srcObject, destBucke
} }
if !cpSrcDstSame { if !cpSrcDstSame {
objectDWLock := destSet.nsMutex.NewNSLock(ctx, destBucket, destObject) objectDWLock := destSet.nsMutex.NewNSLock(ctx, destSet.getLockers(), destBucket, destObject)
if err := objectDWLock.GetLock(globalObjectTimeout); err != nil { if err := objectDWLock.GetLock(globalObjectTimeout); err != nil {
return objInfo, err return objInfo, err
} }
@ -1239,7 +1318,8 @@ func formatsToDrivesInfo(endpoints EndpointList, formats []*formatXLV3, sErrs []
// healing in a distributed setup. // healing in a distributed setup.
func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) { func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) {
// Acquire lock on format.json // Acquire lock on format.json
formatLock := s.getHashedSet(formatConfigFile).nsMutex.NewNSLock(ctx, minioMetaBucket, formatConfigFile) set := s.getHashedSet(formatConfigFile)
formatLock := set.nsMutex.NewNSLock(ctx, set.getLockers(), minioMetaBucket, formatConfigFile)
if err = formatLock.GetRLock(globalHealingTimeout); err != nil { if err = formatLock.GetRLock(globalHealingTimeout); err != nil {
return err return err
} }
@ -1285,9 +1365,10 @@ func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) {
// Replace the new format. // Replace the new format.
s.format = refFormat s.format = refFormat
// Close all existing disks and reconnect all the disks. // Close all existing disks, lockers and reconnect all the disks/lockers.
s.xlDisks.Close() s.xlDisks.Close()
s.connectDisks() s.xlLockers.Close()
s.connectDisksAndLockers()
// Restart monitoring loop to monitor reformatted disks again. // Restart monitoring loop to monitor reformatted disks again.
go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval) go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval)
@ -1356,7 +1437,8 @@ func markRootDisksAsDown(storageDisks []StorageAPI) {
// coded data in it. // coded data in it.
func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealResultItem, err error) { func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealResultItem, err error) {
// Acquire lock on format.json // Acquire lock on format.json
formatLock := s.getHashedSet(formatConfigFile).nsMutex.NewNSLock(ctx, minioMetaBucket, formatConfigFile) set := s.getHashedSet(formatConfigFile)
formatLock := set.nsMutex.NewNSLock(ctx, set.getLockers(), minioMetaBucket, formatConfigFile)
if err = formatLock.GetLock(globalHealingTimeout); err != nil { if err = formatLock.GetLock(globalHealingTimeout); err != nil {
return madmin.HealResultItem{}, err return madmin.HealResultItem{}, err
} }
@ -1492,9 +1574,10 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe
// Replace with new reference format. // Replace with new reference format.
s.format = refFormat s.format = refFormat
// Disconnect/relinquish all existing disks and reconnect the disks. // Disconnect/relinquish all existing disks, lockers and reconnect the disks, lockers.
s.xlDisks.Close() s.xlDisks.Close()
s.connectDisks() s.xlLockers.Close()
s.connectDisksAndLockers()
// Restart our monitoring loop to start monitoring newly formatted disks. // Restart our monitoring loop to start monitoring newly formatted disks.
go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval) go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval)
@ -1505,8 +1588,9 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe
// HealBucket - heals inconsistent buckets and bucket metadata on all sets. // HealBucket - heals inconsistent buckets and bucket metadata on all sets.
func (s *xlSets) HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (result madmin.HealResultItem, err error) { func (s *xlSets) HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (result madmin.HealResultItem, err error) {
bucketLock := globalNSMutex.NewNSLock(ctx, bucket, "") set := s.getHashedSet(bucket)
if err := bucketLock.GetLock(globalHealingTimeout); err != nil { bucketLock := set.nsMutex.NewNSLock(ctx, set.getLockers(), bucket, "")
if err = bucketLock.GetLock(globalOperationTimeout); err != nil {
return result, err return result, err
} }
defer bucketLock.Unlock() defer bucketLock.Unlock()

@ -140,11 +140,6 @@ func (xl xlObjects) getBucketInfo(ctx context.Context, bucketName string) (bucke
// GetBucketInfo - returns BucketInfo for a bucket. // GetBucketInfo - returns BucketInfo for a bucket.
func (xl xlObjects) GetBucketInfo(ctx context.Context, bucket string) (bi BucketInfo, e error) { func (xl xlObjects) GetBucketInfo(ctx context.Context, bucket string) (bi BucketInfo, e error) {
bucketLock := xl.nsMutex.NewNSLock(ctx, bucket, "")
if e := bucketLock.GetRLock(globalObjectTimeout); e != nil {
return bi, e
}
defer bucketLock.RUnlock()
bucketInfo, err := xl.getBucketInfo(ctx, bucket) bucketInfo, err := xl.getBucketInfo(ctx, bucket)
if err != nil { if err != nil {
return bi, toObjectErr(err, bucket) return bi, toObjectErr(err, bucket)
@ -226,12 +221,6 @@ func deleteDanglingBucket(ctx context.Context, storageDisks []StorageAPI, dErrs
// DeleteBucket - deletes a bucket. // DeleteBucket - deletes a bucket.
func (xl xlObjects) DeleteBucket(ctx context.Context, bucket string) error { func (xl xlObjects) DeleteBucket(ctx context.Context, bucket string) error {
bucketLock := xl.nsMutex.NewNSLock(ctx, bucket, "")
if err := bucketLock.GetLock(globalObjectTimeout); err != nil {
return err
}
defer bucketLock.Unlock()
// Collect if all disks report volume not found. // Collect if all disks report volume not found.
storageDisks := xl.getDisks() storageDisks := xl.getDisks()

@ -681,7 +681,7 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
// Lock the object before healing. Use read lock since healing // Lock the object before healing. Use read lock since healing
// will only regenerate parts & xl.json of outdated disks. // will only regenerate parts & xl.json of outdated disks.
objectLock := xl.nsMutex.NewNSLock(ctx, bucket, object) objectLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
if lerr := objectLock.GetRLock(globalHealingTimeout); lerr != nil { if lerr := objectLock.GetRLock(globalHealingTimeout); lerr != nil {
return madmin.HealResultItem{}, lerr return madmin.HealResultItem{}, lerr
} }

@ -264,7 +264,7 @@ func (xl xlObjects) NewMultipartUpload(ctx context.Context, bucket, object strin
func (xl xlObjects) CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error) { func (xl xlObjects) CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error) {
// Hold read locks on source object only if we are // Hold read locks on source object only if we are
// going to read data from source object. // going to read data from source object.
objectSRLock := xl.nsMutex.NewNSLock(ctx, srcBucket, srcObject) objectSRLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), srcBucket, srcObject)
if err := objectSRLock.GetRLock(globalObjectTimeout); err != nil { if err := objectSRLock.GetRLock(globalObjectTimeout); err != nil {
return pi, err return pi, err
} }
@ -306,7 +306,7 @@ func (xl xlObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID
uploadIDLockPath := xl.getUploadIDLockPath(bucket, object, uploadID) uploadIDLockPath := xl.getUploadIDLockPath(bucket, object, uploadID)
// pre-check upload id lock. // pre-check upload id lock.
preUploadIDLock := xl.nsMutex.NewNSLock(ctx, minioMetaMultipartBucket, uploadIDLockPath) preUploadIDLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), minioMetaMultipartBucket, uploadIDLockPath)
if err := preUploadIDLock.GetRLock(globalOperationTimeout); err != nil { if err := preUploadIDLock.GetRLock(globalOperationTimeout); err != nil {
return pi, err return pi, err
} }
@ -404,7 +404,7 @@ func (xl xlObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID
} }
// post-upload check (write) lock // post-upload check (write) lock
postUploadIDLock := xl.nsMutex.NewNSLock(ctx, minioMetaMultipartBucket, uploadIDLockPath) postUploadIDLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), minioMetaMultipartBucket, uploadIDLockPath)
if err = postUploadIDLock.GetLock(globalOperationTimeout); err != nil { if err = postUploadIDLock.GetLock(globalOperationTimeout); err != nil {
return pi, err return pi, err
} }
@ -499,7 +499,8 @@ func (xl xlObjects) ListObjectParts(ctx context.Context, bucket, object, uploadI
} }
// Hold lock so that there is no competing // Hold lock so that there is no competing
// abort-multipart-upload or complete-multipart-upload. // abort-multipart-upload or complete-multipart-upload.
uploadIDLock := xl.nsMutex.NewNSLock(ctx, minioMetaMultipartBucket, uploadIDLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(),
minioMetaMultipartBucket,
xl.getUploadIDLockPath(bucket, object, uploadID)) xl.getUploadIDLockPath(bucket, object, uploadID))
if err := uploadIDLock.GetLock(globalListingTimeout); err != nil { if err := uploadIDLock.GetLock(globalListingTimeout); err != nil {
return result, err return result, err
@ -603,7 +604,7 @@ func (xl xlObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
return oi, err return oi, err
} }
// Hold write lock on the object. // Hold write lock on the object.
destLock := xl.nsMutex.NewNSLock(ctx, bucket, object) destLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
if err := destLock.GetLock(globalObjectTimeout); err != nil { if err := destLock.GetLock(globalObjectTimeout); err != nil {
return oi, err return oi, err
} }
@ -618,7 +619,7 @@ func (xl xlObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
// //
// 2) no one does a parallel complete-multipart-upload on this // 2) no one does a parallel complete-multipart-upload on this
// multipart upload // multipart upload
uploadIDLock := xl.nsMutex.NewNSLock(ctx, minioMetaMultipartBucket, uploadIDLockPath) uploadIDLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), minioMetaMultipartBucket, uploadIDLockPath)
if err := uploadIDLock.GetLock(globalOperationTimeout); err != nil { if err := uploadIDLock.GetLock(globalOperationTimeout); err != nil {
return oi, err return oi, err
} }
@ -824,7 +825,7 @@ func (xl xlObjects) AbortMultipartUpload(ctx context.Context, bucket, object, up
uploadIDLockPath := xl.getUploadIDLockPath(bucket, object, uploadID) uploadIDLockPath := xl.getUploadIDLockPath(bucket, object, uploadID)
// Hold lock so that there is no competing // Hold lock so that there is no competing
// complete-multipart-upload or put-object-part. // complete-multipart-upload or put-object-part.
uploadIDLock := xl.nsMutex.NewNSLock(ctx, minioMetaMultipartBucket, uploadIDLockPath) uploadIDLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), minioMetaMultipartBucket, uploadIDLockPath)
if err := uploadIDLock.GetLock(globalOperationTimeout); err != nil { if err := uploadIDLock.GetLock(globalOperationTimeout); err != nil {
return err return err
} }

@ -127,7 +127,7 @@ func (xl xlObjects) GetObjectNInfo(ctx context.Context, bucket, object string, r
// Acquire lock // Acquire lock
if lockType != noLock { if lockType != noLock {
lock := xl.nsMutex.NewNSLock(ctx, bucket, object) lock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
switch lockType { switch lockType {
case writeLock: case writeLock:
if err = lock.GetLock(globalObjectTimeout); err != nil { if err = lock.GetLock(globalObjectTimeout); err != nil {
@ -190,7 +190,7 @@ func (xl xlObjects) GetObjectNInfo(ctx context.Context, bucket, object string, r
// length indicates the total length of the object. // length indicates the total length of the object.
func (xl xlObjects) GetObject(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, etag string, opts ObjectOptions) error { func (xl xlObjects) GetObject(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, etag string, opts ObjectOptions) error {
// Lock the object before reading. // Lock the object before reading.
objectLock := xl.nsMutex.NewNSLock(ctx, bucket, object) objectLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
if err := objectLock.GetRLock(globalObjectTimeout); err != nil { if err := objectLock.GetRLock(globalObjectTimeout); err != nil {
return err return err
} }
@ -369,7 +369,7 @@ func (xl xlObjects) getObjectInfoDir(ctx context.Context, bucket, object string)
// GetObjectInfo - reads object metadata and replies back ObjectInfo. // GetObjectInfo - reads object metadata and replies back ObjectInfo.
func (xl xlObjects) GetObjectInfo(ctx context.Context, bucket, object string, opts ObjectOptions) (oi ObjectInfo, e error) { func (xl xlObjects) GetObjectInfo(ctx context.Context, bucket, object string, opts ObjectOptions) (oi ObjectInfo, e error) {
// Lock the object before reading. // Lock the object before reading.
objectLock := xl.nsMutex.NewNSLock(ctx, bucket, object) objectLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
if err := objectLock.GetRLock(globalObjectTimeout); err != nil { if err := objectLock.GetRLock(globalObjectTimeout); err != nil {
return oi, err return oi, err
} }
@ -498,7 +498,7 @@ func (xl xlObjects) PutObject(ctx context.Context, bucket string, object string,
} }
// Lock the object. // Lock the object.
objectLock := xl.nsMutex.NewNSLock(ctx, bucket, object) objectLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
if err := objectLock.GetLock(globalObjectTimeout); err != nil { if err := objectLock.GetLock(globalObjectTimeout); err != nil {
return objInfo, err return objInfo, err
} }
@ -851,7 +851,7 @@ func (xl xlObjects) deleteObjects(ctx context.Context, bucket string, objects []
continue continue
} }
// Acquire a write lock before deleting the object. // Acquire a write lock before deleting the object.
objectLocks[i] = xl.nsMutex.NewNSLock(ctx, bucket, object) objectLocks[i] = xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
if errs[i] = objectLocks[i].GetLock(globalOperationTimeout); errs[i] != nil { if errs[i] = objectLocks[i].GetLock(globalOperationTimeout); errs[i] != nil {
continue continue
} }
@ -954,7 +954,7 @@ func (xl xlObjects) DeleteObjects(ctx context.Context, bucket string, objects []
// response to the client request. // response to the client request.
func (xl xlObjects) DeleteObject(ctx context.Context, bucket, object string) (err error) { func (xl xlObjects) DeleteObject(ctx context.Context, bucket, object string) (err error) {
// Acquire a write lock before deleting the object. // Acquire a write lock before deleting the object.
objectLock := xl.nsMutex.NewNSLock(ctx, bucket, object) objectLock := xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
if perr := objectLock.GetLock(globalOperationTimeout); perr != nil { if perr := objectLock.GetLock(globalOperationTimeout); perr != nil {
return perr return perr
} }

@ -23,6 +23,7 @@ import (
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/bpool" "github.com/minio/minio/pkg/bpool"
"github.com/minio/minio/pkg/dsync"
"github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/madmin"
xnet "github.com/minio/minio/pkg/net" xnet "github.com/minio/minio/pkg/net"
"github.com/minio/minio/pkg/sync/errgroup" "github.com/minio/minio/pkg/sync/errgroup"
@ -39,12 +40,15 @@ var OfflineDisk StorageAPI // zero value is nil
// xlObjects - Implements XL object layer. // xlObjects - Implements XL object layer.
type xlObjects struct { type xlObjects struct {
// name space mutex for object layer.
nsMutex *nsLockMap
// getDisks returns list of storageAPIs. // getDisks returns list of storageAPIs.
getDisks func() []StorageAPI getDisks func() []StorageAPI
// getLockers returns list of remote and local lockers.
getLockers func() []dsync.NetLocker
// Locker mutex map.
nsMutex *nsLockMap
// Byte pools used for temporary i/o buffers. // Byte pools used for temporary i/o buffers.
bp *bpool.BytePoolCap bp *bpool.BytePoolCap
@ -55,10 +59,16 @@ type xlObjects struct {
listPool *TreeWalkPool listPool *TreeWalkPool
} }
// NewNSLock - initialize a new namespace RWLocker instance.
func (xl xlObjects) NewNSLock(ctx context.Context, bucket string, object string) RWLocker {
return xl.nsMutex.NewNSLock(ctx, xl.getLockers(), bucket, object)
}
// Shutdown function for object storage interface. // Shutdown function for object storage interface.
func (xl xlObjects) Shutdown(ctx context.Context) error { func (xl xlObjects) Shutdown(ctx context.Context) error {
// Add any object layer shutdown activities here. // Add any object layer shutdown activities here.
closeStorageDisks(xl.getDisks()) closeStorageDisks(xl.getDisks())
closeLockers(xl.getLockers())
return nil return nil
} }

@ -4,8 +4,8 @@
|Item|Specification| |Item|Specification|
|:---|:---| |:---|:---|
|Maximum number of servers per cluster| 32| |Maximum number of servers per cluster| Unlimited|
|Maximum number of Federated clusters | Unlimited| |Maximum number of federated clusters | Unlimited|
|Minimum number of servers| 02| |Minimum number of servers| 02|
|Maximum number of drives per server| Unlimited| |Maximum number of drives per server| Unlimited|
|Read quorum| N/2| |Read quorum| N/2|
@ -39,7 +39,6 @@ We found the following APIs to be redundant or less useful outside of AWS S3. If
- BucketACL (Use [bucket policies](https://docs.min.io/docs/minio-client-complete-guide#policy) instead) - BucketACL (Use [bucket policies](https://docs.min.io/docs/minio-client-complete-guide#policy) instead)
- BucketCORS (CORS enabled by default on all buckets for all HTTP verbs) - BucketCORS (CORS enabled by default on all buckets for all HTTP verbs)
- BucketLifecycle (Not required for MinIO erasure coded backend)
- BucketReplication (Use [`mc mirror`](https://docs.min.io/docs/minio-client-complete-guide#mirror) instead) - BucketReplication (Use [`mc mirror`](https://docs.min.io/docs/minio-client-complete-guide#mirror) instead)
- BucketVersions, BucketVersioning (Use [`s3git`](https://github.com/s3git/s3git)) - BucketVersions, BucketVersioning (Use [`s3git`](https://github.com/s3git/s3git))
- BucketWebsite (Use [`caddy`](https://github.com/mholt/caddy) or [`nginx`](https://www.nginx.com/resources/wiki/)) - BucketWebsite (Use [`caddy`](https://github.com/mholt/caddy) or [`nginx`](https://www.nginx.com/resources/wiki/))

@ -5,8 +5,8 @@
|项目|参数| |项目|参数|
|:---|:---| |:---|:---|
|最大驱动器数量|16| |最大驱动器数量|Unlimited|
|最小驱动器数量|4| |最小驱动器数量|Unlimited|
|读仲裁|N / 2| |读仲裁|N / 2|
|写仲裁|N / 2+1 | |写仲裁|N / 2+1 |

@ -38,7 +38,6 @@ require (
github.com/lib/pq v1.0.0 github.com/lib/pq v1.0.0
github.com/miekg/dns v1.1.8 github.com/miekg/dns v1.1.8
github.com/minio/cli v1.22.0 github.com/minio/cli v1.22.0
github.com/minio/dsync/v2 v2.0.0
github.com/minio/gokrb5/v7 v7.2.5 github.com/minio/gokrb5/v7 v7.2.5
github.com/minio/hdfs/v3 v3.0.1 github.com/minio/hdfs/v3 v3.0.1
github.com/minio/highwayhash v1.0.0 github.com/minio/highwayhash v1.0.0

@ -0,0 +1,3 @@
dsync.test
coverage.txt
*.out

@ -0,0 +1,440 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dsync
import (
"context"
"fmt"
golog "log"
"math/rand"
"os"
"path"
"runtime"
"sync"
"time"
)
// Indicator if logging is enabled.
var dsyncLog bool
func init() {
// Check for MINIO_DSYNC_TRACE env variable, if set logging will be enabled for failed REST operations.
dsyncLog = os.Getenv("MINIO_DSYNC_TRACE") == "1"
rand.Seed(time.Now().UnixNano())
}
func log(msg ...interface{}) {
if dsyncLog {
golog.Println(msg...)
}
}
// DRWMutexAcquireTimeout - tolerance limit to wait for lock acquisition before.
const DRWMutexAcquireTimeout = 1 * time.Second // 1 second.
const drwMutexInfinite = time.Duration(1<<63 - 1)
// A DRWMutex is a distributed mutual exclusion lock.
type DRWMutex struct {
Name string
writeLocks []string // Array of nodes that granted a write lock
readersLocks [][]string // Array of array of nodes that granted reader locks
m sync.Mutex // Mutex to prevent multiple simultaneous locks from this node
clnt *Dsync
ctx context.Context
}
// Granted - represents a structure of a granted lock.
type Granted struct {
index int
lockUID string // Locked if set with UID string, unlocked if empty
}
func (g *Granted) isLocked() bool {
return isLocked(g.lockUID)
}
func isLocked(uid string) bool {
return len(uid) > 0
}
// NewDRWMutex - initializes a new dsync RW mutex.
func NewDRWMutex(ctx context.Context, name string, clnt *Dsync) *DRWMutex {
return &DRWMutex{
Name: name,
writeLocks: make([]string, clnt.dNodeCount),
clnt: clnt,
ctx: ctx,
}
}
// Lock holds a write lock on dm.
//
// If the lock is already in use, the calling go routine
// blocks until the mutex is available.
func (dm *DRWMutex) Lock(id, source string) {
isReadLock := false
dm.lockBlocking(drwMutexInfinite, id, source, isReadLock)
}
// GetLock tries to get a write lock on dm before the timeout elapses.
//
// If the lock is already in use, the calling go routine
// blocks until either the mutex becomes available and return success or
// more time has passed than the timeout value and return false.
func (dm *DRWMutex) GetLock(id, source string, timeout time.Duration) (locked bool) {
isReadLock := false
return dm.lockBlocking(timeout, id, source, isReadLock)
}
// RLock holds a read lock on dm.
//
// If one or more read locks are already in use, it will grant another lock.
// Otherwise the calling go routine blocks until the mutex is available.
func (dm *DRWMutex) RLock(id, source string) {
isReadLock := true
dm.lockBlocking(drwMutexInfinite, id, source, isReadLock)
}
// GetRLock tries to get a read lock on dm before the timeout elapses.
//
// If one or more read locks are already in use, it will grant another lock.
// Otherwise the calling go routine blocks until either the mutex becomes
// available and return success or more time has passed than the timeout
// value and return false.
func (dm *DRWMutex) GetRLock(id, source string, timeout time.Duration) (locked bool) {
isReadLock := true
return dm.lockBlocking(timeout, id, source, isReadLock)
}
// lockBlocking will try to acquire either a read or a write lock
//
// The function will loop using a built-in timing randomized back-off
// algorithm until either the lock is acquired successfully or more
// time has elapsed than the timeout value.
func (dm *DRWMutex) lockBlocking(timeout time.Duration, id, source string, isReadLock bool) (locked bool) {
doneCh, start := make(chan struct{}), time.Now().UTC()
defer close(doneCh)
// Use incremental back-off algorithm for repeated attempts to acquire the lock
for range newRetryTimerSimple(doneCh) {
select {
case <-dm.ctx.Done():
return
default:
}
// Create temp array on stack.
locks := make([]string, dm.clnt.dNodeCount)
// Try to acquire the lock.
success := lock(dm.clnt, &locks, dm.Name, id, source, isReadLock)
if success {
dm.m.Lock()
// If success, copy array to object
if isReadLock {
// Append new array of strings at the end
dm.readersLocks = append(dm.readersLocks, make([]string, dm.clnt.dNodeCount))
// and copy stack array into last spot
copy(dm.readersLocks[len(dm.readersLocks)-1], locks[:])
} else {
copy(dm.writeLocks, locks[:])
}
dm.m.Unlock()
return true
}
if time.Now().UTC().Sub(start) >= timeout { // Are we past the timeout?
break
}
// Failed to acquire the lock on this attempt, incrementally wait
// for a longer back-off time and try again afterwards.
}
return false
}
// lock tries to acquire the distributed lock, returning true or false.
func lock(ds *Dsync, locks *[]string, lockName, id, source string, isReadLock bool) bool {
// Create buffered channel of size equal to total number of nodes.
ch := make(chan Granted, ds.dNodeCount)
defer close(ch)
var wg sync.WaitGroup
for index, c := range ds.restClnts {
wg.Add(1)
// broadcast lock request to all nodes
go func(index int, isReadLock bool, c NetLocker) {
defer wg.Done()
args := LockArgs{
UID: id,
Resource: lockName,
Source: source,
}
var locked bool
var err error
if isReadLock {
if locked, err = c.RLock(args); err != nil {
log("Unable to call RLock", err)
}
} else {
if locked, err = c.Lock(args); err != nil {
log("Unable to call Lock", err)
}
}
g := Granted{index: index}
if locked {
g.lockUID = args.UID
}
ch <- g
}(index, isReadLock, c)
}
quorum := false
wg.Add(1)
go func(isReadLock bool) {
// Wait until we have either
//
// a) received all lock responses
// b) received too many 'non-'locks for quorum to be still possible
// c) time out
//
i, locksFailed := 0, 0
done := false
timeout := time.After(DRWMutexAcquireTimeout)
for ; i < ds.dNodeCount; i++ { // Loop until we acquired all locks
select {
case grant := <-ch:
if grant.isLocked() {
// Mark that this node has acquired the lock
(*locks)[grant.index] = grant.lockUID
} else {
locksFailed++
if !isReadLock && locksFailed > ds.dNodeCount-ds.dquorum ||
isReadLock && locksFailed > ds.dNodeCount-ds.dquorumReads {
// We know that we are not going to get the lock anymore,
// so exit out and release any locks that did get acquired
done = true
// Increment the number of grants received from the buffered channel.
i++
releaseAll(ds, locks, lockName, isReadLock)
}
}
case <-timeout:
done = true
// timeout happened, maybe one of the nodes is slow, count
// number of locks to check whether we have quorum or not
if !quorumMet(locks, isReadLock, ds.dquorum, ds.dquorumReads) {
releaseAll(ds, locks, lockName, isReadLock)
}
}
if done {
break
}
}
// Count locks in order to determine whether we have quorum or not
quorum = quorumMet(locks, isReadLock, ds.dquorum, ds.dquorumReads)
// Signal that we have the quorum
wg.Done()
// Wait for the other responses and immediately release the locks
// (do not add them to the locks array because the DRWMutex could
// already has been unlocked again by the original calling thread)
for ; i < ds.dNodeCount; i++ {
grantToBeReleased := <-ch
if grantToBeReleased.isLocked() {
// release lock
sendRelease(ds, ds.restClnts[grantToBeReleased.index], lockName, grantToBeReleased.lockUID, isReadLock)
}
}
}(isReadLock)
wg.Wait()
return quorum
}
// quorumMet determines whether we have acquired the required quorum of underlying locks or not
func quorumMet(locks *[]string, isReadLock bool, quorum, quorumReads int) bool {
count := 0
for _, uid := range *locks {
if isLocked(uid) {
count++
}
}
var metQuorum bool
if isReadLock {
metQuorum = count >= quorumReads
} else {
metQuorum = count >= quorum
}
return metQuorum
}
// releaseAll releases all locks that are marked as locked
func releaseAll(ds *Dsync, locks *[]string, lockName string, isReadLock bool) {
for lock := 0; lock < ds.dNodeCount; lock++ {
if isLocked((*locks)[lock]) {
sendRelease(ds, ds.restClnts[lock], lockName, (*locks)[lock], isReadLock)
(*locks)[lock] = ""
}
}
}
// Unlock unlocks the write lock.
//
// It is a run-time error if dm is not locked on entry to Unlock.
func (dm *DRWMutex) Unlock() {
// create temp array on stack
locks := make([]string, dm.clnt.dNodeCount)
{
dm.m.Lock()
defer dm.m.Unlock()
// Check if minimally a single bool is set in the writeLocks array
lockFound := false
for _, uid := range dm.writeLocks {
if isLocked(uid) {
lockFound = true
break
}
}
if !lockFound {
panic("Trying to Unlock() while no Lock() is active")
}
// Copy write locks to stack array
copy(locks, dm.writeLocks[:])
// Clear write locks array
dm.writeLocks = make([]string, dm.clnt.dNodeCount)
}
isReadLock := false
unlock(dm.clnt, locks, dm.Name, isReadLock)
}
// RUnlock releases a read lock held on dm.
//
// It is a run-time error if dm is not locked on entry to RUnlock.
func (dm *DRWMutex) RUnlock() {
// create temp array on stack
locks := make([]string, dm.clnt.dNodeCount)
{
dm.m.Lock()
defer dm.m.Unlock()
if len(dm.readersLocks) == 0 {
panic("Trying to RUnlock() while no RLock() is active")
}
// Copy out first element to release it first (FIFO)
copy(locks, dm.readersLocks[0][:])
// Drop first element from array
dm.readersLocks = dm.readersLocks[1:]
}
isReadLock := true
unlock(dm.clnt, locks, dm.Name, isReadLock)
}
func unlock(ds *Dsync, locks []string, name string, isReadLock bool) {
// We don't need to synchronously wait until we have released all the locks (or the quorum)
// (a subsequent lock will retry automatically in case it would fail to get quorum)
for index, c := range ds.restClnts {
if isLocked(locks[index]) {
// broadcast lock release to all nodes that granted the lock
sendRelease(ds, c, name, locks[index], isReadLock)
}
}
}
// sendRelease sends a release message to a node that previously granted a lock
func sendRelease(ds *Dsync, c NetLocker, name, uid string, isReadLock bool) {
args := LockArgs{
UID: uid,
Resource: name,
}
if isReadLock {
if _, err := c.RUnlock(args); err != nil {
log("Unable to call RUnlock", err)
}
} else {
if _, err := c.Unlock(args); err != nil {
log("Unable to call Unlock", err)
}
}
}
// DRLocker returns a sync.Locker interface that implements
// the Lock and Unlock methods by calling drw.RLock and drw.RUnlock.
func (dm *DRWMutex) DRLocker() sync.Locker {
return (*drlocker)(dm)
}
type drlocker DRWMutex
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func randString(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}
func getSource() string {
var funcName string
pc, filename, lineNum, ok := runtime.Caller(2)
if ok {
filename = path.Base(filename)
funcName = runtime.FuncForPC(pc).Name()
} else {
filename = "<unknown>"
lineNum = 0
}
return fmt.Sprintf("[%s:%d:%s()]", filename, lineNum, funcName)
}
func (dr *drlocker) Lock() { (*DRWMutex)(dr).RLock(randString(16), getSource()) }
func (dr *drlocker) Unlock() { (*DRWMutex)(dr).RUnlock() }

@ -0,0 +1,383 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// GOMAXPROCS=10 go test
package dsync_test
import (
"context"
"fmt"
"runtime"
"sync"
"sync/atomic"
"testing"
"time"
. "github.com/minio/minio/pkg/dsync"
)
const (
id = "1234-5678"
source = "main.go"
)
func testSimpleWriteLock(t *testing.T, duration time.Duration) (locked bool) {
drwm := NewDRWMutex(context.Background(), "simplelock", ds)
if !drwm.GetRLock(id, source, time.Second) {
panic("Failed to acquire read lock")
}
// fmt.Println("1st read lock acquired, waiting...")
if !drwm.GetRLock(id, source, time.Second) {
panic("Failed to acquire read lock")
}
// fmt.Println("2nd read lock acquired, waiting...")
go func() {
time.Sleep(2 * time.Second)
drwm.RUnlock()
// fmt.Println("1st read lock released, waiting...")
}()
go func() {
time.Sleep(3 * time.Second)
drwm.RUnlock()
// fmt.Println("2nd read lock released, waiting...")
}()
// fmt.Println("Trying to acquire write lock, waiting...")
locked = drwm.GetLock(id, source, duration)
if locked {
// fmt.Println("Write lock acquired, waiting...")
time.Sleep(time.Second)
drwm.Unlock()
}
// fmt.Println("Write lock failed due to timeout")
return
}
func TestSimpleWriteLockAcquired(t *testing.T) {
locked := testSimpleWriteLock(t, 5*time.Second)
expected := true
if locked != expected {
t.Errorf("TestSimpleWriteLockAcquired(): \nexpected %#v\ngot %#v", expected, locked)
}
}
func TestSimpleWriteLockTimedOut(t *testing.T) {
locked := testSimpleWriteLock(t, time.Second)
expected := false
if locked != expected {
t.Errorf("TestSimpleWriteLockTimedOut(): \nexpected %#v\ngot %#v", expected, locked)
}
}
func testDualWriteLock(t *testing.T, duration time.Duration) (locked bool) {
drwm := NewDRWMutex(context.Background(), "duallock", ds)
// fmt.Println("Getting initial write lock")
if !drwm.GetLock(id, source, time.Second) {
panic("Failed to acquire initial write lock")
}
go func() {
time.Sleep(2 * time.Second)
drwm.Unlock()
// fmt.Println("Initial write lock released, waiting...")
}()
// fmt.Println("Trying to acquire 2nd write lock, waiting...")
locked = drwm.GetLock(id, source, duration)
if locked {
// fmt.Println("2nd write lock acquired, waiting...")
time.Sleep(time.Second)
drwm.Unlock()
}
// fmt.Println("2nd write lock failed due to timeout")
return
}
func TestDualWriteLockAcquired(t *testing.T) {
locked := testDualWriteLock(t, 5*time.Second)
expected := true
if locked != expected {
t.Errorf("TestDualWriteLockAcquired(): \nexpected %#v\ngot %#v", expected, locked)
}
}
func TestDualWriteLockTimedOut(t *testing.T) {
locked := testDualWriteLock(t, time.Second)
expected := false
if locked != expected {
t.Errorf("TestDualWriteLockTimedOut(): \nexpected %#v\ngot %#v", expected, locked)
}
}
// Test cases below are copied 1 to 1 from sync/rwmutex_test.go (adapted to use DRWMutex)
// Borrowed from rwmutex_test.go
func parallelReader(m *DRWMutex, clocked, cunlock, cdone chan bool) {
if m.GetRLock(id, source, time.Second) {
clocked <- true
<-cunlock
m.RUnlock()
cdone <- true
}
}
// Borrowed from rwmutex_test.go
func doTestParallelReaders(numReaders, gomaxprocs int) {
runtime.GOMAXPROCS(gomaxprocs)
m := NewDRWMutex(context.Background(), "test-parallel", ds)
clocked := make(chan bool)
cunlock := make(chan bool)
cdone := make(chan bool)
for i := 0; i < numReaders; i++ {
go parallelReader(m, clocked, cunlock, cdone)
}
// Wait for all parallel RLock()s to succeed.
for i := 0; i < numReaders; i++ {
<-clocked
}
for i := 0; i < numReaders; i++ {
cunlock <- true
}
// Wait for the goroutines to finish.
for i := 0; i < numReaders; i++ {
<-cdone
}
}
// Borrowed from rwmutex_test.go
func TestParallelReaders(t *testing.T) {
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1))
doTestParallelReaders(1, 4)
doTestParallelReaders(3, 4)
doTestParallelReaders(4, 2)
}
// Borrowed from rwmutex_test.go
func reader(rwm *DRWMutex, numIterations int, activity *int32, cdone chan bool) {
for i := 0; i < numIterations; i++ {
if rwm.GetRLock(id, source, time.Second) {
n := atomic.AddInt32(activity, 1)
if n < 1 || n >= 10000 {
panic(fmt.Sprintf("wlock(%d)\n", n))
}
for i := 0; i < 100; i++ {
}
atomic.AddInt32(activity, -1)
rwm.RUnlock()
}
}
cdone <- true
}
// Borrowed from rwmutex_test.go
func writer(rwm *DRWMutex, numIterations int, activity *int32, cdone chan bool) {
for i := 0; i < numIterations; i++ {
if rwm.GetLock(id, source, time.Second) {
n := atomic.AddInt32(activity, 10000)
if n != 10000 {
panic(fmt.Sprintf("wlock(%d)\n", n))
}
for i := 0; i < 100; i++ {
}
atomic.AddInt32(activity, -10000)
rwm.Unlock()
}
}
cdone <- true
}
// Borrowed from rwmutex_test.go
func HammerRWMutex(gomaxprocs, numReaders, numIterations int) {
runtime.GOMAXPROCS(gomaxprocs)
// Number of active readers + 10000 * number of active writers.
var activity int32
rwm := NewDRWMutex(context.Background(), "test", ds)
cdone := make(chan bool)
go writer(rwm, numIterations, &activity, cdone)
var i int
for i = 0; i < numReaders/2; i++ {
go reader(rwm, numIterations, &activity, cdone)
}
go writer(rwm, numIterations, &activity, cdone)
for ; i < numReaders; i++ {
go reader(rwm, numIterations, &activity, cdone)
}
// Wait for the 2 writers and all readers to finish.
for i := 0; i < 2+numReaders; i++ {
<-cdone
}
}
// Borrowed from rwmutex_test.go
func TestRWMutex(t *testing.T) {
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1))
n := 1000
if testing.Short() {
n = 5
}
HammerRWMutex(1, 1, n)
HammerRWMutex(1, 3, n)
HammerRWMutex(1, 10, n)
HammerRWMutex(4, 1, n)
HammerRWMutex(4, 3, n)
HammerRWMutex(4, 10, n)
HammerRWMutex(10, 1, n)
HammerRWMutex(10, 3, n)
HammerRWMutex(10, 10, n)
HammerRWMutex(10, 5, n)
}
// Borrowed from rwmutex_test.go
func TestDRLocker(t *testing.T) {
wl := NewDRWMutex(context.Background(), "test", ds)
var rl sync.Locker
wlocked := make(chan bool, 1)
rlocked := make(chan bool, 1)
rl = wl.DRLocker()
n := 10
go func() {
for i := 0; i < n; i++ {
rl.Lock()
rl.Lock()
rlocked <- true
wl.Lock(id, source)
wlocked <- true
}
}()
for i := 0; i < n; i++ {
<-rlocked
rl.Unlock()
select {
case <-wlocked:
t.Fatal("RLocker() didn't read-lock it")
default:
}
rl.Unlock()
<-wlocked
select {
case <-rlocked:
t.Fatal("RLocker() didn't respect the write lock")
default:
}
wl.Unlock()
}
}
// Borrowed from rwmutex_test.go
func TestUnlockPanic(t *testing.T) {
defer func() {
if recover() == nil {
t.Fatalf("unlock of unlocked RWMutex did not panic")
}
}()
mu := NewDRWMutex(context.Background(), "test", ds)
mu.Unlock()
}
// Borrowed from rwmutex_test.go
func TestUnlockPanic2(t *testing.T) {
defer func() {
if recover() == nil {
t.Fatalf("unlock of unlocked RWMutex did not panic")
}
}()
mu := NewDRWMutex(context.Background(), "test-unlock-panic-2", ds)
mu.RLock(id, source)
mu.Unlock()
}
// Borrowed from rwmutex_test.go
func TestRUnlockPanic(t *testing.T) {
defer func() {
if recover() == nil {
t.Fatalf("read unlock of unlocked RWMutex did not panic")
}
}()
mu := NewDRWMutex(context.Background(), "test", ds)
mu.RUnlock()
}
// Borrowed from rwmutex_test.go
func TestRUnlockPanic2(t *testing.T) {
defer func() {
if recover() == nil {
t.Fatalf("read unlock of unlocked RWMutex did not panic")
}
}()
mu := NewDRWMutex(context.Background(), "test-runlock-panic-2", ds)
mu.Lock(id, source)
mu.RUnlock()
}
// Borrowed from rwmutex_test.go
func benchmarkRWMutex(b *testing.B, localWork, writeRatio int) {
rwm := NewDRWMutex(context.Background(), "test", ds)
b.RunParallel(func(pb *testing.PB) {
foo := 0
for pb.Next() {
foo++
if foo%writeRatio == 0 {
rwm.Lock(id, source)
rwm.Unlock()
} else {
rwm.RLock(id, source)
for i := 0; i != localWork; i += 1 {
foo *= 2
foo /= 2
}
rwm.RUnlock()
}
}
_ = foo
})
}
// Borrowed from rwmutex_test.go
func BenchmarkRWMutexWrite100(b *testing.B) {
benchmarkRWMutex(b, 0, 100)
}
// Borrowed from rwmutex_test.go
func BenchmarkRWMutexWrite10(b *testing.B) {
benchmarkRWMutex(b, 0, 10)
}
// Borrowed from rwmutex_test.go
func BenchmarkRWMutexWorkWrite100(b *testing.B) {
benchmarkRWMutex(b, 100, 100)
}
// Borrowed from rwmutex_test.go
func BenchmarkRWMutexWorkWrite10(b *testing.B) {
benchmarkRWMutex(b, 100, 10)
}

@ -0,0 +1,103 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dsync_test
import (
"fmt"
"sync"
. "github.com/minio/minio/pkg/dsync"
)
const WriteLock = -1
type lockServer struct {
mutex sync.Mutex
// Map of locks, with negative value indicating (exclusive) write lock
// and positive values indicating number of read locks
lockMap map[string]int64
}
func (l *lockServer) Lock(args *LockArgs, reply *bool) error {
l.mutex.Lock()
defer l.mutex.Unlock()
if _, *reply = l.lockMap[args.Resource]; !*reply {
l.lockMap[args.Resource] = WriteLock // No locks held on the given name, so claim write lock
}
*reply = !*reply // Negate *reply to return true when lock is granted or false otherwise
return nil
}
func (l *lockServer) Unlock(args *LockArgs, reply *bool) error {
l.mutex.Lock()
defer l.mutex.Unlock()
var locksHeld int64
if locksHeld, *reply = l.lockMap[args.Resource]; !*reply { // No lock is held on the given name
return fmt.Errorf("Unlock attempted on an unlocked entity: %s", args.Resource)
}
if *reply = locksHeld == WriteLock; !*reply { // Unless it is a write lock
return fmt.Errorf("Unlock attempted on a read locked entity: %s (%d read locks active)", args.Resource, locksHeld)
}
delete(l.lockMap, args.Resource) // Remove the write lock
return nil
}
const ReadLock = 1
func (l *lockServer) RLock(args *LockArgs, reply *bool) error {
l.mutex.Lock()
defer l.mutex.Unlock()
var locksHeld int64
if locksHeld, *reply = l.lockMap[args.Resource]; !*reply {
l.lockMap[args.Resource] = ReadLock // No locks held on the given name, so claim (first) read lock
*reply = true
} else {
if *reply = locksHeld != WriteLock; *reply { // Unless there is a write lock
l.lockMap[args.Resource] = locksHeld + ReadLock // Grant another read lock
}
}
return nil
}
func (l *lockServer) RUnlock(args *LockArgs, reply *bool) error {
l.mutex.Lock()
defer l.mutex.Unlock()
var locksHeld int64
if locksHeld, *reply = l.lockMap[args.Resource]; !*reply { // No lock is held on the given name
return fmt.Errorf("RUnlock attempted on an unlocked entity: %s", args.Resource)
}
if *reply = locksHeld != WriteLock; !*reply { // A write-lock is held, cannot release a read lock
return fmt.Errorf("RUnlock attempted on a write locked entity: %s", args.Resource)
}
if locksHeld > ReadLock {
l.lockMap[args.Resource] = locksHeld - ReadLock // Remove one of the read locks held
} else {
delete(l.lockMap, args.Resource) // Remove the (last) read lock
}
return nil
}
func (l *lockServer) ForceUnlock(args *LockArgs, reply *bool) error {
l.mutex.Lock()
defer l.mutex.Unlock()
if len(args.UID) != 0 {
return fmt.Errorf("ForceUnlock called with non-empty UID: %s", args.UID)
}
delete(l.lockMap, args.Resource) // Remove the lock (irrespective of write or read lock)
*reply = true
return nil
}

@ -0,0 +1,60 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dsync
import (
"errors"
"math"
)
// Dsync represents dsync client object which is initialized with
// authenticated clients, used to initiate lock REST calls.
type Dsync struct {
// Number of nodes participating in the distributed locking.
dNodeCount int
// List of rest client objects, one per lock server.
restClnts []NetLocker
// Simple majority based quorum, set to dNodeCount/2+1
dquorum int
// Simple quorum for read operations, set to dNodeCount/2
dquorumReads int
}
// New - initializes a new dsync object with input restClnts.
func New(restClnts []NetLocker) (*Dsync, error) {
if len(restClnts) < 2 {
return nil, errors.New("Dsync is not designed for less than 2 nodes")
} else if len(restClnts) > 32 {
return nil, errors.New("Dsync is not designed for more than 32 nodes")
}
ds := &Dsync{}
ds.dNodeCount = len(restClnts)
// With odd number of nodes, write and read quorum is basically the same
ds.dquorum = int(ds.dNodeCount/2) + 1
ds.dquorumReads = int(math.Ceil(float64(ds.dNodeCount) / 2.0))
// Initialize node name and rest path for each NetLocker object.
ds.restClnts = make([]NetLocker, ds.dNodeCount)
copy(ds.restClnts, restClnts)
return ds, nil
}

@ -0,0 +1,58 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// GOMAXPROCS=10 go test
package dsync
import "testing"
// Tests dsync.New
func TestNew(t *testing.T) {
nclnts := make([]NetLocker, 33)
if _, err := New(nclnts); err == nil {
t.Fatal("Should have failed")
}
nclnts = make([]NetLocker, 1)
if _, err := New(nclnts); err == nil {
t.Fatal("Should have failed")
}
nclnts = make([]NetLocker, 2)
nds, err := New(nclnts)
if err != nil {
t.Fatal("Should pass", err)
}
if nds.dquorumReads != 1 {
t.Fatalf("Unexpected read quorum values expected 1, got %d", nds.dquorumReads)
}
if nds.dquorum != 2 {
t.Fatalf("Unexpected quorum values expected 2, got %d", nds.dquorum)
}
nclnts = make([]NetLocker, 3)
nds, err = New(nclnts)
if err != nil {
t.Fatal("Should pass", err)
}
if nds.dquorumReads != nds.dquorum {
t.Fatalf("Unexpected quorum values for odd nodes we expect read %d and write %d quorum to be same", nds.dquorumReads, nds.dquorum)
}
}

@ -0,0 +1,361 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// GOMAXPROCS=10 go test
package dsync_test
import (
"context"
"fmt"
"log"
"math/rand"
"net"
"net/http"
"net/rpc"
"os"
"strconv"
"sync"
"testing"
"time"
. "github.com/minio/minio/pkg/dsync"
)
var ds *Dsync
var rpcPaths []string // list of rpc paths where lock server is serving.
func startRPCServers(nodes []string) {
for i := range nodes {
server := rpc.NewServer()
server.RegisterName("Dsync", &lockServer{
mutex: sync.Mutex{},
lockMap: make(map[string]int64),
})
// For some reason the registration paths need to be different (even for different server objs)
server.HandleHTTP(rpcPaths[i], fmt.Sprintf("%s-debug", rpcPaths[i]))
l, e := net.Listen("tcp", ":"+strconv.Itoa(i+12345))
if e != nil {
log.Fatal("listen error:", e)
}
go http.Serve(l, nil)
}
// Let servers start
time.Sleep(10 * time.Millisecond)
}
// TestMain initializes the testing framework
func TestMain(m *testing.M) {
const rpcPath = "/dsync"
rand.Seed(time.Now().UTC().UnixNano())
nodes := make([]string, 4) // list of node IP addrs or hostname with ports.
for i := range nodes {
nodes[i] = fmt.Sprintf("127.0.0.1:%d", i+12345)
}
for i := range nodes {
rpcPaths = append(rpcPaths, rpcPath+"-"+strconv.Itoa(i))
}
// Initialize net/rpc clients for dsync.
var clnts []NetLocker
for i := 0; i < len(nodes); i++ {
clnts = append(clnts, newClient(nodes[i], rpcPaths[i]))
}
var err error
ds, err = New(clnts)
if err != nil {
log.Fatalf("set nodes failed with %v", err)
}
startRPCServers(nodes)
os.Exit(m.Run())
}
func TestSimpleLock(t *testing.T) {
dm := NewDRWMutex(context.Background(), "test", ds)
dm.Lock(id, source)
// fmt.Println("Lock acquired, waiting...")
time.Sleep(2500 * time.Millisecond)
dm.Unlock()
}
func TestSimpleLockUnlockMultipleTimes(t *testing.T) {
dm := NewDRWMutex(context.Background(), "test", ds)
dm.Lock(id, source)
time.Sleep(time.Duration(10+(rand.Float32()*50)) * time.Millisecond)
dm.Unlock()
dm.Lock(id, source)
time.Sleep(time.Duration(10+(rand.Float32()*50)) * time.Millisecond)
dm.Unlock()
dm.Lock(id, source)
time.Sleep(time.Duration(10+(rand.Float32()*50)) * time.Millisecond)
dm.Unlock()
dm.Lock(id, source)
time.Sleep(time.Duration(10+(rand.Float32()*50)) * time.Millisecond)
dm.Unlock()
dm.Lock(id, source)
time.Sleep(time.Duration(10+(rand.Float32()*50)) * time.Millisecond)
dm.Unlock()
}
// Test two locks for same resource, one succeeds, one fails (after timeout)
func TestTwoSimultaneousLocksForSameResource(t *testing.T) {
dm1st := NewDRWMutex(context.Background(), "aap", ds)
dm2nd := NewDRWMutex(context.Background(), "aap", ds)
dm1st.Lock(id, source)
// Release lock after 10 seconds
go func() {
time.Sleep(10 * time.Second)
// fmt.Println("Unlocking dm1")
dm1st.Unlock()
}()
dm2nd.Lock(id, source)
// fmt.Printf("2nd lock obtained after 1st lock is released\n")
time.Sleep(2500 * time.Millisecond)
dm2nd.Unlock()
}
// Test three locks for same resource, one succeeds, one fails (after timeout)
func TestThreeSimultaneousLocksForSameResource(t *testing.T) {
dm1st := NewDRWMutex(context.Background(), "aap", ds)
dm2nd := NewDRWMutex(context.Background(), "aap", ds)
dm3rd := NewDRWMutex(context.Background(), "aap", ds)
dm1st.Lock(id, source)
// Release lock after 10 seconds
go func() {
time.Sleep(10 * time.Second)
// fmt.Println("Unlocking dm1")
dm1st.Unlock()
}()
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
dm2nd.Lock(id, source)
// Release lock after 10 seconds
go func() {
time.Sleep(2500 * time.Millisecond)
// fmt.Println("Unlocking dm2")
dm2nd.Unlock()
}()
dm3rd.Lock(id, source)
// fmt.Printf("3rd lock obtained after 1st & 2nd locks are released\n")
time.Sleep(2500 * time.Millisecond)
dm3rd.Unlock()
}()
go func() {
defer wg.Done()
dm3rd.Lock(id, source)
// Release lock after 10 seconds
go func() {
time.Sleep(2500 * time.Millisecond)
// fmt.Println("Unlocking dm3")
dm3rd.Unlock()
}()
dm2nd.Lock(id, source)
// fmt.Printf("2nd lock obtained after 1st & 3rd locks are released\n")
time.Sleep(2500 * time.Millisecond)
dm2nd.Unlock()
}()
wg.Wait()
}
// Test two locks for different resources, both succeed
func TestTwoSimultaneousLocksForDifferentResources(t *testing.T) {
dm1 := NewDRWMutex(context.Background(), "aap", ds)
dm2 := NewDRWMutex(context.Background(), "noot", ds)
dm1.Lock(id, source)
dm2.Lock(id, source)
// fmt.Println("Both locks acquired, waiting...")
time.Sleep(2500 * time.Millisecond)
dm1.Unlock()
dm2.Unlock()
time.Sleep(10 * time.Millisecond)
}
// Borrowed from mutex_test.go
func HammerMutex(m *DRWMutex, loops int, cdone chan bool) {
for i := 0; i < loops; i++ {
m.Lock(id, source)
m.Unlock()
}
cdone <- true
}
// Borrowed from mutex_test.go
func TestMutex(t *testing.T) {
c := make(chan bool)
m := NewDRWMutex(context.Background(), "test", ds)
for i := 0; i < 10; i++ {
go HammerMutex(m, 1000, c)
}
for i := 0; i < 10; i++ {
<-c
}
}
func BenchmarkMutexUncontended(b *testing.B) {
type PaddedMutex struct {
DRWMutex
pad [128]uint8
}
b.RunParallel(func(pb *testing.PB) {
var mu PaddedMutex
for pb.Next() {
mu.Lock(id, source)
mu.Unlock()
}
})
}
func benchmarkMutex(b *testing.B, slack, work bool) {
mu := NewDRWMutex(context.Background(), "", ds)
if slack {
b.SetParallelism(10)
}
b.RunParallel(func(pb *testing.PB) {
foo := 0
for pb.Next() {
mu.Lock(id, source)
mu.Unlock()
if work {
for i := 0; i < 100; i++ {
foo *= 2
foo /= 2
}
}
}
_ = foo
})
}
func BenchmarkMutex(b *testing.B) {
benchmarkMutex(b, false, false)
}
func BenchmarkMutexSlack(b *testing.B) {
benchmarkMutex(b, true, false)
}
func BenchmarkMutexWork(b *testing.B) {
benchmarkMutex(b, false, true)
}
func BenchmarkMutexWorkSlack(b *testing.B) {
benchmarkMutex(b, true, true)
}
func BenchmarkMutexNoSpin(b *testing.B) {
// This benchmark models a situation where spinning in the mutex should be
// non-profitable and allows to confirm that spinning does not do harm.
// To achieve this we create excess of goroutines most of which do local work.
// These goroutines yield during local work, so that switching from
// a blocked goroutine to other goroutines is profitable.
// As a matter of fact, this benchmark still triggers some spinning in the mutex.
m := NewDRWMutex(context.Background(), "", ds)
var acc0, acc1 uint64
b.SetParallelism(4)
b.RunParallel(func(pb *testing.PB) {
c := make(chan bool)
var data [4 << 10]uint64
for i := 0; pb.Next(); i++ {
if i%4 == 0 {
m.Lock(id, source)
acc0 -= 100
acc1 += 100
m.Unlock()
} else {
for i := 0; i < len(data); i += 4 {
data[i]++
}
// Elaborate way to say runtime.Gosched
// that does not put the goroutine onto global runq.
go func() {
c <- true
}()
<-c
}
}
})
}
func BenchmarkMutexSpin(b *testing.B) {
// This benchmark models a situation where spinning in the mutex should be
// profitable. To achieve this we create a goroutine per-proc.
// These goroutines access considerable amount of local data so that
// unnecessary rescheduling is penalized by cache misses.
m := NewDRWMutex(context.Background(), "", ds)
var acc0, acc1 uint64
b.RunParallel(func(pb *testing.PB) {
var data [16 << 10]uint64
for i := 0; pb.Next(); i++ {
m.Lock(id, source)
acc0 -= 100
acc1 += 100
m.Unlock()
for i := 0; i < len(data); i += 4 {
data[i]++
}
}
})
}

@ -0,0 +1,142 @@
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dsync
import (
"math/rand"
"sync"
"time"
)
// lockedRandSource provides protected rand source, implements rand.Source interface.
type lockedRandSource struct {
lk sync.Mutex
src rand.Source
}
// Int63 returns a non-negative pseudo-random 63-bit integer as an
// int64.
func (r *lockedRandSource) Int63() (n int64) {
r.lk.Lock()
n = r.src.Int63()
r.lk.Unlock()
return
}
// Seed uses the provided seed value to initialize the generator to a
// deterministic state.
func (r *lockedRandSource) Seed(seed int64) {
r.lk.Lock()
r.src.Seed(seed)
r.lk.Unlock()
}
// MaxJitter will randomize over the full exponential backoff time
const MaxJitter = 1.0
// NoJitter disables the use of jitter for randomizing the
// exponential backoff time
const NoJitter = 0.0
// Global random source for fetching random values.
var globalRandomSource = rand.New(&lockedRandSource{
src: rand.NewSource(time.Now().UTC().UnixNano()),
})
// newRetryTimerJitter creates a timer with exponentially increasing delays
// until the maximum retry attempts are reached. - this function is a fully
// configurable version, meant for only advanced use cases. For the most part
// one should use newRetryTimerSimple and newRetryTimer.
func newRetryTimerWithJitter(unit time.Duration, cap time.Duration, jitter float64, doneCh <-chan struct{}) <-chan int {
attemptCh := make(chan int)
// normalize jitter to the range [0, 1.0]
if jitter < NoJitter {
jitter = NoJitter
}
if jitter > MaxJitter {
jitter = MaxJitter
}
// computes the exponential backoff duration according to
// https://www.awsarchitectureblog.com/2015/03/backoff.html
exponentialBackoffWait := func(attempt int) time.Duration {
// 1<<uint(attempt) below could overflow, so limit the value of attempt
maxAttempt := 30
if attempt > maxAttempt {
attempt = maxAttempt
}
//sleep = random_between(0, min(cap, base * 2 ** attempt))
sleep := unit * time.Duration(1<<uint(attempt))
if sleep > cap {
sleep = cap
}
if jitter != NoJitter {
sleep -= time.Duration(globalRandomSource.Float64() * float64(sleep) * jitter)
}
return sleep
}
go func() {
defer close(attemptCh)
nextBackoff := 0
// Channel used to signal after the expiry of backoff wait seconds.
var timer *time.Timer
for {
select { // Attempts starts.
case attemptCh <- nextBackoff:
nextBackoff++
case <-doneCh:
// Stop the routine.
return
}
timer = time.NewTimer(exponentialBackoffWait(nextBackoff))
// wait till next backoff time or till doneCh gets a message.
select {
case <-timer.C:
case <-doneCh:
// stop the timer and return.
timer.Stop()
return
}
}
}()
// Start reading..
return attemptCh
}
// Default retry constants.
const (
defaultRetryUnit = time.Second // 1 second.
defaultRetryCap = 1 * time.Second // 1 second.
)
// newRetryTimer creates a timer with exponentially increasing delays
// until the maximum retry attempts are reached. - this function provides
// resulting retry values to be of maximum jitter.
func newRetryTimer(unit time.Duration, cap time.Duration, doneCh <-chan struct{}) <-chan int {
return newRetryTimerWithJitter(unit, cap, MaxJitter, doneCh)
}
// newRetryTimerSimple creates a timer with exponentially increasing delays
// until the maximum retry attempts are reached. - this function is a
// simpler version with all default values.
func newRetryTimerSimple(doneCh <-chan struct{}) <-chan int {
return newRetryTimerWithJitter(defaultRetryUnit, defaultRetryCap, MaxJitter, doneCh)
}

@ -0,0 +1,82 @@
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dsync
import (
"testing"
"time"
)
// Tests for retry timer.
func TestRetryTimerSimple(t *testing.T) {
doneCh := make(chan struct{})
attemptCh := newRetryTimerSimple(doneCh)
i := <-attemptCh
if i != 0 {
close(doneCh)
t.Fatalf("Invalid attempt counter returned should be 0, found %d instead", i)
}
i = <-attemptCh
if i <= 0 {
close(doneCh)
t.Fatalf("Invalid attempt counter returned should be greater than 0, found %d instead", i)
}
close(doneCh)
_, ok := <-attemptCh
if ok {
t.Fatal("Attempt counter should be closed")
}
}
// Test retry time with no jitter.
func TestRetryTimerWithNoJitter(t *testing.T) {
doneCh := make(chan struct{})
// No jitter
attemptCh := newRetryTimerWithJitter(time.Millisecond, 5*time.Millisecond, NoJitter, doneCh)
i := <-attemptCh
if i != 0 {
close(doneCh)
t.Fatalf("Invalid attempt counter returned should be 0, found %d instead", i)
}
// Loop through the maximum possible attempt.
for i = range attemptCh {
if i == 30 {
close(doneCh)
}
}
_, ok := <-attemptCh
if ok {
t.Fatal("Attempt counter should be closed")
}
}
// Test retry time with Jitter greater than MaxJitter.
func TestRetryTimerWithJitter(t *testing.T) {
doneCh := make(chan struct{})
// Jitter will be set back to 1.0
attemptCh := newRetryTimerWithJitter(time.Second, 30*time.Second, 2.0, doneCh)
i := <-attemptCh
if i != 0 {
close(doneCh)
t.Fatalf("Invalid attempt counter returned should be 0, found %d instead", i)
}
close(doneCh)
_, ok := <-attemptCh
if ok {
t.Fatal("Attempt counter should be closed")
}
}

@ -0,0 +1,111 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dsync_test
import (
"net/rpc"
"sync"
. "github.com/minio/minio/pkg/dsync"
)
// ReconnectRPCClient is a wrapper type for rpc.Client which provides reconnect on first failure.
type ReconnectRPCClient struct {
mutex sync.Mutex
rpc *rpc.Client
addr string
endpoint string
}
// newClient constructs a ReconnectRPCClient object with addr and endpoint initialized.
// It _doesn't_ connect to the remote endpoint. See Call method to see when the
// connect happens.
func newClient(addr, endpoint string) NetLocker {
return &ReconnectRPCClient{
addr: addr,
endpoint: endpoint,
}
}
// Close closes the underlying socket file descriptor.
func (rpcClient *ReconnectRPCClient) Close() error {
rpcClient.mutex.Lock()
defer rpcClient.mutex.Unlock()
// If rpc client has not connected yet there is nothing to close.
if rpcClient.rpc == nil {
return nil
}
// Reset rpcClient.rpc to allow for subsequent calls to use a new
// (socket) connection.
clnt := rpcClient.rpc
rpcClient.rpc = nil
return clnt.Close()
}
// Call makes a RPC call to the remote endpoint using the default codec, namely encoding/gob.
func (rpcClient *ReconnectRPCClient) Call(serviceMethod string, args interface{}, reply interface{}) (err error) {
rpcClient.mutex.Lock()
defer rpcClient.mutex.Unlock()
dialCall := func() error {
// If the rpc.Client is nil, we attempt to (re)connect with the remote endpoint.
if rpcClient.rpc == nil {
clnt, derr := rpc.DialHTTPPath("tcp", rpcClient.addr, rpcClient.endpoint)
if derr != nil {
return derr
}
rpcClient.rpc = clnt
}
// If the RPC fails due to a network-related error, then we reset
// rpc.Client for a subsequent reconnect.
return rpcClient.rpc.Call(serviceMethod, args, reply)
}
if err = dialCall(); err == rpc.ErrShutdown {
rpcClient.rpc.Close()
rpcClient.rpc = nil
err = dialCall()
}
return err
}
func (rpcClient *ReconnectRPCClient) RLock(args LockArgs) (status bool, err error) {
err = rpcClient.Call("Dsync.RLock", &args, &status)
return status, err
}
func (rpcClient *ReconnectRPCClient) Lock(args LockArgs) (status bool, err error) {
err = rpcClient.Call("Dsync.Lock", &args, &status)
return status, err
}
func (rpcClient *ReconnectRPCClient) RUnlock(args LockArgs) (status bool, err error) {
err = rpcClient.Call("Dsync.RUnlock", &args, &status)
return status, err
}
func (rpcClient *ReconnectRPCClient) Unlock(args LockArgs) (status bool, err error) {
err = rpcClient.Call("Dsync.Unlock", &args, &status)
return status, err
}
func (rpcClient *ReconnectRPCClient) ForceUnlock(args LockArgs) (status bool, err error) {
err = rpcClient.Call("Dsync.ForceUnlock", &args, &status)
return status, err
}
func (rpcClient *ReconnectRPCClient) String() string {
return "http://" + rpcClient.addr + "/" + rpcClient.endpoint
}

@ -0,0 +1,59 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dsync
// LockArgs is minimal required values for any dsync compatible lock operation.
type LockArgs struct {
// Unique ID of lock/unlock request.
UID string
// Resource contains a entity to be locked/unlocked.
Resource string
// Source contains the line number, function and file name of the code
// on the client node that requested the lock.
Source string
}
// NetLocker is dsync compatible locker interface.
type NetLocker interface {
// Do read lock for given LockArgs. It should return
// * a boolean to indicate success/failure of the operation
// * an error on failure of lock request operation.
RLock(args LockArgs) (bool, error)
// Do write lock for given LockArgs. It should return
// * a boolean to indicate success/failure of the operation
// * an error on failure of lock request operation.
Lock(args LockArgs) (bool, error)
// Do read unlock for given LockArgs. It should return
// * a boolean to indicate success/failure of the operation
// * an error on failure of unlock request operation.
RUnlock(args LockArgs) (bool, error)
// Do write unlock for given LockArgs. It should return
// * a boolean to indicate success/failure of the operation
// * an error on failure of unlock request operation.
Unlock(args LockArgs) (bool, error)
// Returns underlying endpoint of this lock client instance.
String() string
// Close closes any underlying connection to the service endpoint
Close() error
}
Loading…
Cancel
Save