|
|
|
@ -17,6 +17,7 @@ |
|
|
|
|
package cmd |
|
|
|
|
|
|
|
|
|
import ( |
|
|
|
|
"context" |
|
|
|
|
"errors" |
|
|
|
|
"math/rand" |
|
|
|
|
"net/http" |
|
|
|
@ -29,7 +30,7 @@ import ( |
|
|
|
|
|
|
|
|
|
const ( |
|
|
|
|
// Lock maintenance interval.
|
|
|
|
|
lockMaintenanceInterval = 30 * time.Second |
|
|
|
|
lockMaintenanceInterval = 1 * time.Minute |
|
|
|
|
|
|
|
|
|
// Lock validity check interval.
|
|
|
|
|
lockValidityCheckInterval = 2 * time.Minute |
|
|
|
@ -179,6 +180,8 @@ func getLongLivedLocks(interval time.Duration) map[Endpoint][]nameLockRequesterI |
|
|
|
|
return nlripMap |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var lockMaintenanceTimeout = newDynamicTimeout(60*time.Second, time.Second) |
|
|
|
|
|
|
|
|
|
// lockMaintenance loops over locks that have been active for some time and checks back
|
|
|
|
|
// with the original server whether it is still alive or not
|
|
|
|
|
//
|
|
|
|
@ -187,7 +190,14 @@ func getLongLivedLocks(interval time.Duration) map[Endpoint][]nameLockRequesterI |
|
|
|
|
// - some network error (and server is up normally)
|
|
|
|
|
//
|
|
|
|
|
// We will ignore the error, and we will retry later to get a resolve on this lock
|
|
|
|
|
func lockMaintenance(interval time.Duration) { |
|
|
|
|
func lockMaintenance(ctx context.Context, interval time.Duration, objAPI ObjectLayer) error { |
|
|
|
|
// Lock to avoid concurrent lock maintenance loops
|
|
|
|
|
maintenanceLock := objAPI.NewNSLock(ctx, "system", "lock-maintenance-ops") |
|
|
|
|
if err := maintenanceLock.GetLock(lockMaintenanceTimeout); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
defer maintenanceLock.Unlock() |
|
|
|
|
|
|
|
|
|
// Validate if long lived locks are indeed clean.
|
|
|
|
|
// Get list of long lived locks to check for staleness.
|
|
|
|
|
for lendpoint, nlrips := range getLongLivedLocks(interval) { |
|
|
|
@ -203,7 +213,8 @@ func lockMaintenance(interval time.Duration) { |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Call back to original server verify whether the lock is still active (based on name & uid)
|
|
|
|
|
// Call back to original server verify whether the lock is
|
|
|
|
|
// still active (based on name & uid)
|
|
|
|
|
expired, err := c.Expired(dsync.LockArgs{ |
|
|
|
|
UID: nlrip.lri.UID, |
|
|
|
|
Resource: nlrip.name, |
|
|
|
@ -230,15 +241,31 @@ func lockMaintenance(interval time.Duration) { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Start lock maintenance from all lock servers.
|
|
|
|
|
func startLockMaintenance() { |
|
|
|
|
var objAPI ObjectLayer |
|
|
|
|
var ctx = context.Background() |
|
|
|
|
|
|
|
|
|
// Wait until the object API is ready
|
|
|
|
|
for { |
|
|
|
|
objAPI = newObjectLayerWithoutSafeModeFn() |
|
|
|
|
if objAPI == nil { |
|
|
|
|
time.Sleep(time.Second) |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
break |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Initialize a new ticker with a minute between each ticks.
|
|
|
|
|
ticker := time.NewTicker(lockMaintenanceInterval) |
|
|
|
|
// Stop the timer upon service closure and cleanup the go-routine.
|
|
|
|
|
defer ticker.Stop() |
|
|
|
|
|
|
|
|
|
r := rand.New(rand.NewSource(UTCNow().UnixNano())) |
|
|
|
|
for { |
|
|
|
|
// Verifies every minute for locks held more than 2 minutes.
|
|
|
|
|
select { |
|
|
|
@ -247,10 +274,13 @@ func startLockMaintenance() { |
|
|
|
|
case <-ticker.C: |
|
|
|
|
// Start with random sleep time, so as to avoid
|
|
|
|
|
// "synchronous checks" between servers
|
|
|
|
|
r := rand.New(rand.NewSource(UTCNow().UnixNano())) |
|
|
|
|
duration := time.Duration(r.Float64() * float64(lockMaintenanceInterval)) |
|
|
|
|
time.Sleep(duration) |
|
|
|
|
lockMaintenance(lockValidityCheckInterval) |
|
|
|
|
if err := lockMaintenance(ctx, lockValidityCheckInterval, objAPI); err != nil { |
|
|
|
|
// Sleep right after an error.
|
|
|
|
|
duration := time.Duration(r.Float64() * float64(lockMaintenanceInterval)) |
|
|
|
|
time.Sleep(duration) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|