expire lockers if lockers are offline (#10749)

lockers currently might leave stale lockers,
in unknown ways waiting for downed lockers.

locker check interval is high enough to safely
cleanup stale locks.
master
Harshavardhana 4 years ago committed by GitHub
parent 6a8c62f9fd
commit d9db7f3308
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 14
      cmd/admin-handlers.go
  2. 4
      cmd/erasure.go
  3. 4
      cmd/local-locker.go
  4. 2
      cmd/lock-rest-client.go
  5. 6
      cmd/lock-rest-server-common.go
  6. 23
      cmd/lock-rest-server.go
  7. 1
      pkg/dsync/drwmutex.go
  8. 3
      pkg/dsync/rpc-client-interface.go
  9. 3
      pkg/madmin/top-commands.go

@ -348,7 +348,7 @@ func (a adminAPIHandlers) DataUsageInfoHandler(w http.ResponseWriter, r *http.Re
writeSuccessResponseJSON(w, dataUsageInfoJSON) writeSuccessResponseJSON(w, dataUsageInfoJSON)
} }
func lriToLockEntry(l lockRequesterInfo, resource, server string, rquorum, wquorum int) *madmin.LockEntry { func lriToLockEntry(l lockRequesterInfo, resource, server string) *madmin.LockEntry {
entry := &madmin.LockEntry{ entry := &madmin.LockEntry{
Timestamp: l.Timestamp, Timestamp: l.Timestamp,
Resource: resource, Resource: resource,
@ -356,18 +356,17 @@ func lriToLockEntry(l lockRequesterInfo, resource, server string, rquorum, wquor
Source: l.Source, Source: l.Source,
Owner: l.Owner, Owner: l.Owner,
ID: l.UID, ID: l.UID,
Quorum: l.Quorum,
} }
if l.Writer { if l.Writer {
entry.Type = "WRITE" entry.Type = "WRITE"
entry.Quorum = wquorum
} else { } else {
entry.Type = "READ" entry.Type = "READ"
entry.Quorum = rquorum
} }
return entry return entry
} }
func topLockEntries(peerLocks []*PeerLocks, rquorum, wquorum int, stale bool) madmin.LockEntries { func topLockEntries(peerLocks []*PeerLocks, stale bool) madmin.LockEntries {
entryMap := make(map[string]*madmin.LockEntry) entryMap := make(map[string]*madmin.LockEntry)
for _, peerLock := range peerLocks { for _, peerLock := range peerLocks {
if peerLock == nil { if peerLock == nil {
@ -379,7 +378,7 @@ func topLockEntries(peerLocks []*PeerLocks, rquorum, wquorum int, stale bool) ma
if val, ok := entryMap[lockReqInfo.UID]; ok { if val, ok := entryMap[lockReqInfo.UID]; ok {
val.ServerList = append(val.ServerList, peerLock.Addr) val.ServerList = append(val.ServerList, peerLock.Addr)
} else { } else {
entryMap[lockReqInfo.UID] = lriToLockEntry(lockReqInfo, k, peerLock.Addr, rquorum, wquorum) entryMap[lockReqInfo.UID] = lriToLockEntry(lockReqInfo, k, peerLock.Addr)
} }
} }
} }
@ -429,10 +428,7 @@ func (a adminAPIHandlers) TopLocksHandler(w http.ResponseWriter, r *http.Request
peerLocks := globalNotificationSys.GetLocks(ctx, r) peerLocks := globalNotificationSys.GetLocks(ctx, r)
rquorum := getReadQuorum(objectAPI.SetDriveCount()) topLocks := topLockEntries(peerLocks, stale)
wquorum := getWriteQuorum(objectAPI.SetDriveCount())
topLocks := topLockEntries(peerLocks, rquorum, wquorum, stale)
// Marshal API response upto requested count. // Marshal API response upto requested count.
if len(topLocks) > count && count > 0 { if len(topLocks) > count && count > 0 {

@ -178,7 +178,9 @@ func getDisksInfo(disks []StorageAPI, endpoints []string) (disksInfo []madmin.Di
} }
} }
if len(disksInfo) == rootDiskCount { // Count offline disks as well to ensure consistent
// reportability of offline drives on local setups.
if len(disksInfo) == (rootDiskCount + offlineDisks.Sum()) {
// Success. // Success.
return disksInfo, errs, onlineDisks, offlineDisks return disksInfo, errs, onlineDisks, offlineDisks
} }

@ -35,6 +35,8 @@ type lockRequesterInfo struct {
// Owner represents the UUID of the owner who originally requested the lock // Owner represents the UUID of the owner who originally requested the lock
// useful in expiry. // useful in expiry.
Owner string Owner string
// Quorum represents the quorum required for this lock to be active.
Quorum int
} }
// isWriteLock returns whether the lock is a write or read lock. // isWriteLock returns whether the lock is a write or read lock.
@ -96,6 +98,7 @@ func (l *localLocker) Lock(ctx context.Context, args dsync.LockArgs) (reply bool
UID: args.UID, UID: args.UID,
Timestamp: UTCNow(), Timestamp: UTCNow(),
TimeLastCheck: UTCNow(), TimeLastCheck: UTCNow(),
Quorum: args.Quorum,
}, },
} }
} }
@ -153,6 +156,7 @@ func (l *localLocker) RLock(ctx context.Context, args dsync.LockArgs) (reply boo
UID: args.UID, UID: args.UID,
Timestamp: UTCNow(), Timestamp: UTCNow(),
TimeLastCheck: UTCNow(), TimeLastCheck: UTCNow(),
Quorum: args.Quorum,
} }
resource := args.Resources[0] resource := args.Resources[0]
if lri, ok := l.lockMap[resource]; ok { if lri, ok := l.lockMap[resource]; ok {

@ -23,6 +23,7 @@ import (
"errors" "errors"
"io" "io"
"net/url" "net/url"
"strconv"
"github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/http"
xhttp "github.com/minio/minio/cmd/http" xhttp "github.com/minio/minio/cmd/http"
@ -93,6 +94,7 @@ func (client *lockRESTClient) restCall(ctx context.Context, call string, args ds
values.Set(lockRESTUID, args.UID) values.Set(lockRESTUID, args.UID)
values.Set(lockRESTOwner, args.Owner) values.Set(lockRESTOwner, args.Owner)
values.Set(lockRESTSource, args.Source) values.Set(lockRESTSource, args.Source)
values.Set(lockRESTQuorum, strconv.Itoa(args.Quorum))
var buffer bytes.Buffer var buffer bytes.Buffer
for _, resource := range args.Resources { for _, resource := range args.Resources {
buffer.WriteString(resource) buffer.WriteString(resource)

@ -21,7 +21,7 @@ import (
) )
const ( const (
lockRESTVersion = "v3" lockRESTVersion = "v4" // Add Quorum query param
lockRESTVersionPrefix = SlashSeparator + lockRESTVersion lockRESTVersionPrefix = SlashSeparator + lockRESTVersion
lockRESTPrefix = minioReservedBucketPath + "/lock" lockRESTPrefix = minioReservedBucketPath + "/lock"
) )
@ -43,6 +43,10 @@ const (
// Source contains the line number, function and file name of the code // Source contains the line number, function and file name of the code
// on the client node that requested the lock. // on the client node that requested the lock.
lockRESTSource = "source" lockRESTSource = "source"
// Quroum value to be saved along lock requester info, useful
// in verifying stale locks
lockRESTQuorum = "quorum"
) )
var ( var (

@ -24,6 +24,7 @@ import (
"net/http" "net/http"
"path" "path"
"sort" "sort"
"strconv"
"time" "time"
"github.com/gorilla/mux" "github.com/gorilla/mux"
@ -32,10 +33,10 @@ import (
const ( const (
// Lock maintenance interval. // Lock maintenance interval.
lockMaintenanceInterval = 30 * time.Second lockMaintenanceInterval = 15 * time.Second
// Lock validity check interval. // Lock validity check interval.
lockValidityCheckInterval = 30 * time.Second lockValidityCheckInterval = 5 * time.Second
) )
// To abstract a node over network. // To abstract a node over network.
@ -63,10 +64,16 @@ func (l *lockRESTServer) IsValid(w http.ResponseWriter, r *http.Request) bool {
} }
func getLockArgs(r *http.Request) (args dsync.LockArgs, err error) { func getLockArgs(r *http.Request) (args dsync.LockArgs, err error) {
quorum, err := strconv.Atoi(r.URL.Query().Get(lockRESTQuorum))
if err != nil {
return args, err
}
args = dsync.LockArgs{ args = dsync.LockArgs{
Owner: r.URL.Query().Get(lockRESTOwner), Owner: r.URL.Query().Get(lockRESTOwner),
UID: r.URL.Query().Get(lockRESTUID), UID: r.URL.Query().Get(lockRESTUID),
Source: r.URL.Query().Get(lockRESTSource), Source: r.URL.Query().Get(lockRESTSource),
Quorum: quorum,
} }
var resources []string var resources []string
@ -277,12 +284,8 @@ func lockMaintenance(ctx context.Context, interval time.Duration) error {
for lendpoint, nlrips := range getLongLivedLocks(interval) { for lendpoint, nlrips := range getLongLivedLocks(interval) {
nlripsMap := make(map[string]nlock, len(nlrips)) nlripsMap := make(map[string]nlock, len(nlrips))
for _, nlrip := range nlrips { for _, nlrip := range nlrips {
// Locks are only held on first zone, make sure that
// we only look for ownership of locks from endpoints
// on first zone.
for _, c := range allLockersFn() { for _, c := range allLockersFn() {
if !c.IsOnline() || c == nil { if !c.IsOnline() || c == nil {
updateNlocks(nlripsMap, nlrip.name, nlrip.lri.Writer)
continue continue
} }
@ -306,14 +309,8 @@ func lockMaintenance(ctx context.Context, interval time.Duration) error {
} }
} }
// Read locks we assume quorum for be N/2 success
quorum := getReadQuorum(objAPI.SetDriveCount())
if nlrip.lri.Writer {
quorum = getWriteQuorum(objAPI.SetDriveCount())
}
// less than the quorum, we have locks expired. // less than the quorum, we have locks expired.
if nlripsMap[nlrip.name].locks < quorum { if nlripsMap[nlrip.name].locks < nlrip.lri.Quorum {
// The lock is no longer active at server that originated // The lock is no longer active at server that originated
// the lock, attempt to remove the lock. // the lock, attempt to remove the lock.
globalLockServers[lendpoint].mutex.Lock() globalLockServers[lendpoint].mutex.Lock()

@ -241,6 +241,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is
UID: id, UID: id,
Resources: lockNames, Resources: lockNames,
Source: source, Source: source,
Quorum: quorum,
} }
var locked bool var locked bool

@ -33,6 +33,9 @@ type LockArgs struct {
// Owner represents unique ID for this instance, an owner who originally requested // Owner represents unique ID for this instance, an owner who originally requested
// the locked resource, useful primarily in figuring our stale locks. // the locked resource, useful primarily in figuring our stale locks.
Owner string Owner string
// Quorum represents the expected quorum for this lock type.
Quorum int
} }
// NetLocker is dsync compatible locker interface. // NetLocker is dsync compatible locker interface.

@ -38,7 +38,8 @@ type LockEntry struct {
ServerList []string `json:"serverlist"` // List of servers participating in the lock. ServerList []string `json:"serverlist"` // List of servers participating in the lock.
Owner string `json:"owner"` // Owner UUID indicates server owns the lock. Owner string `json:"owner"` // Owner UUID indicates server owns the lock.
ID string `json:"id"` // UID to uniquely identify request of client. ID string `json:"id"` // UID to uniquely identify request of client.
Quorum int `json:"quorum"` // represents quorum number of servers required to hold this lock, used to look for stale locks. // Represents quorum number of servers required to hold this lock, used to look for stale locks.
Quorum int `json:"quorum"`
} }
// LockEntries - To sort the locks // LockEntries - To sort the locks

Loading…
Cancel
Save