From 1f9abbee4d3b9e26b1762874a4e5779cff22cfa1 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 29 Sep 2020 15:18:34 -0700 Subject: [PATCH] make sure to release locks upon timeout (#10596) fixes #10418 --- cmd/bootstrap-peer-server.go | 2 +- cmd/endpoint.go | 2 +- cmd/lock-rest-client.go | 3 +-- cmd/peer-rest-client.go | 2 +- cmd/rest/client.go | 4 +-- cmd/storage-rest-client.go | 2 +- cmd/utils.go | 1 - pkg/dsync/drwmutex.go | 49 ++++++++++++++++++++---------------- 8 files changed, 34 insertions(+), 31 deletions(-) diff --git a/cmd/bootstrap-peer-server.go b/cmd/bootstrap-peer-server.go index 096e6b340..1064a6405 100644 --- a/cmd/bootstrap-peer-server.go +++ b/cmd/bootstrap-peer-server.go @@ -233,7 +233,7 @@ func newBootstrapRESTClient(endpoint Endpoint) *bootstrapRESTClient { } } - trFn := newInternodeHTTPTransport(tlsConfig, rest.DefaultRESTTimeout) + trFn := newInternodeHTTPTransport(tlsConfig, rest.DefaultTimeout) restClient := rest.NewClient(serverURL, trFn, newAuthToken) restClient.HealthCheckFn = func() bool { ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout) diff --git a/cmd/endpoint.go b/cmd/endpoint.go index 96d1a6116..925a87c63 100644 --- a/cmd/endpoint.go +++ b/cmd/endpoint.go @@ -770,7 +770,7 @@ func GetProxyEndpoints(endpointZones EndpointZones) ([]ProxyEndpoint, error) { } } - tr := newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout)() + tr := newCustomHTTPTransport(tlsConfig, rest.DefaultTimeout)() // Allow more requests to be in flight with higher response header timeout. tr.ResponseHeaderTimeout = 30 * time.Minute tr.MaxIdleConns = 64 diff --git a/cmd/lock-rest-client.go b/cmd/lock-rest-client.go index 72832ce54..f8c45d9db 100644 --- a/cmd/lock-rest-client.go +++ b/cmd/lock-rest-client.go @@ -23,7 +23,6 @@ import ( "errors" "io" "net/url" - "time" "github.com/minio/minio/cmd/http" xhttp "github.com/minio/minio/cmd/http" @@ -154,7 +153,7 @@ func newlockRESTClient(endpoint Endpoint) *lockRESTClient { } } - trFn := newInternodeHTTPTransport(tlsConfig, 10*time.Second) + trFn := newInternodeHTTPTransport(tlsConfig, rest.DefaultTimeout) restClient := rest.NewClient(serverURL, trFn, newAuthToken) restClient.HealthCheckFn = func() bool { ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout) diff --git a/cmd/peer-rest-client.go b/cmd/peer-rest-client.go index 1ce849fbe..3c098bb39 100644 --- a/cmd/peer-rest-client.go +++ b/cmd/peer-rest-client.go @@ -883,7 +883,7 @@ func newPeerRESTClient(peer *xnet.Host) *peerRESTClient { } } - trFn := newInternodeHTTPTransport(tlsConfig, 10*time.Second) + trFn := newInternodeHTTPTransport(tlsConfig, rest.DefaultTimeout) restClient := rest.NewClient(serverURL, trFn, newAuthToken) // Construct a new health function. diff --git a/cmd/rest/client.go b/cmd/rest/client.go index 1e6a17c64..1c1c9dac8 100644 --- a/cmd/rest/client.go +++ b/cmd/rest/client.go @@ -30,8 +30,8 @@ import ( xnet "github.com/minio/minio/pkg/net" ) -// DefaultRESTTimeout - default RPC timeout is 15 seconds. -const DefaultRESTTimeout = 15 * time.Second +// DefaultTimeout - default REST timeout is 10 seconds. +const DefaultTimeout = 10 * time.Second const ( offline = iota diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 2ca8a9314..956dcc658 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -680,7 +680,7 @@ func newStorageRESTClient(endpoint Endpoint) *storageRESTClient { } } - trFn := newInternodeHTTPTransport(tlsConfig, rest.DefaultRESTTimeout) + trFn := newInternodeHTTPTransport(tlsConfig, rest.DefaultTimeout) restClient := rest.NewClient(serverURL, trFn, newAuthToken) restClient.HealthCheckFn = func() bool { ctx, cancel := context.WithTimeout(GlobalContext, restClient.HealthCheckTimeout) diff --git a/cmd/utils.go b/cmd/utils.go index 7b7d14006..21cb4bc70 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -497,7 +497,6 @@ func newCustomHTTPTransport(tlsConfig *tls.Config, dialTimeout time.Duration) fu Proxy: http.ProxyFromEnvironment, DialContext: xhttp.NewCustomDialContext(dialTimeout), MaxIdleConnsPerHost: 16, - MaxIdleConns: 16, IdleConnTimeout: 1 * time.Minute, ResponseHeaderTimeout: 3 * time.Minute, // Set conservative timeouts for MinIO internode. TLSHandshakeTimeout: 10 * time.Second, diff --git a/pkg/dsync/drwmutex.go b/pkg/dsync/drwmutex.go index 054630332..972e8e56d 100644 --- a/pkg/dsync/drwmutex.go +++ b/pkg/dsync/drwmutex.go @@ -142,7 +142,7 @@ const ( // algorithm until either the lock is acquired successfully or more // time has elapsed than the timeout value. func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadLock bool, opts Options) (locked bool) { - restClnts, _ := dm.clnt.GetLockers() + restClnts, owner := dm.clnt.GetLockers() r := rand.New(rand.NewSource(time.Now().UnixNano())) @@ -154,6 +154,26 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadL defer cancel() + // Tolerance is not set, defaults to half of the locker clients. + tolerance := opts.Tolerance + if tolerance == 0 { + tolerance = len(restClnts) / 2 + } + + // Quorum is effectively = total clients subtracted with tolerance limit + quorum := len(restClnts) - tolerance + if !isReadLock { + // In situations for write locks, as a special case + // to avoid split brains we make sure to acquire + // quorum + 1 when tolerance is exactly half of the + // total locker clients. + if quorum == tolerance { + quorum++ + } + } + + tolerance = len(restClnts) - quorum + for { select { case <-retryCtx.Done(): @@ -161,10 +181,14 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadL // Caller context canceled or we timedout, // return false anyways for both situations. + + // make sure to unlock any successful locks, since caller has timedout or canceled the request. + releaseAll(dm.clnt, quorum, owner, &locks, isReadLock, restClnts, dm.Names...) + return false default: // Try to acquire the lock. - if locked = lock(retryCtx, dm.clnt, &locks, id, source, isReadLock, opts.Tolerance, dm.Names...); locked { + if locked = lock(retryCtx, dm.clnt, &locks, id, source, isReadLock, tolerance, quorum, dm.Names...); locked { dm.m.Lock() // If success, copy array to object @@ -186,32 +210,13 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadL } // lock tries to acquire the distributed lock, returning true or false. -func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, isReadLock bool, tolerance int, lockNames ...string) bool { +func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, isReadLock bool, tolerance, quorum int, lockNames ...string) bool { for i := range *locks { (*locks)[i] = "" } restClnts, owner := ds.GetLockers() - // Tolerance is not set, defaults to half of the locker clients. - if tolerance == 0 { - tolerance = len(restClnts) / 2 - } - - // Quorum is effectively = total clients subtracted with tolerance limit - quorum := len(restClnts) - tolerance - if !isReadLock { - // In situations for write locks, as a special case - // to avoid split brains we make sure to acquire - // quorum + 1 when tolerance is exactly half of the - // total locker clients. - if quorum == tolerance { - quorum++ - } - } - - tolerance = len(restClnts) - quorum - // Create buffered channel of size equal to total number of nodes. ch := make(chan Granted, len(restClnts)) defer close(ch)