@ -30,210 +30,275 @@ const (
// Attempt to retry only this many number of times before
// giving up on the remote disk entirely after initialization.
globalStorageRetryThreshold = 1
// Interval to check health status of a node whether it has
// come back up online
globalStorageHealthCheckInterval = 5 * time . Minute
)
// Converts rpc.ServerError to underlying error. This function is
// written so that the storageAPI errors are consistent across network
// disks as well.
func retryToStorageErr ( err error ) error {
if err == errDiskNotFoundFromNetError || err == errDiskNotFoundFromRPCShutdown {
return errDiskNotFound
}
return err
}
// Retry storage is an instance of StorageAPI which
// additionally verifies upon network shutdown if the
// underlying storage is available and is really
// formatted.
// formatted. After the initialization phase it will
// also cache when the underlying storage is offline
// to prevent needless calls and recheck the health of
// underlying storage in regular intervals.
type retryStorage struct {
remoteStorage StorageAPI
maxRetryAttempts int
retryUnit time . Duration
retryCap time . Duration
offline bool // Mark whether node is offline
offlineTimestamp time . Time // Last timestamp of checking status of node
}
// String representation of remoteStorage.
func ( f retryStorage ) String ( ) string {
func ( f * retryStorage ) String ( ) string {
return f . remoteStorage . String ( )
}
// Reconncts to underlying remote storage.
func ( f retryStorage ) Init ( ) ( err error ) {
return f . remoteStorage . Init ( )
// Reconne cts to underlying remote storage.
func ( f * retryStorage ) Init ( ) ( err error ) {
return retryToStorageErr ( f . remoteStorage . Init ( ) )
}
// Closes the underlying remote storage connection.
func ( f retryStorage ) Close ( ) ( err error ) {
return f . remoteStorage . Close ( )
func ( f * retryStorage ) Close ( ) ( err error ) {
return retryToStorageErr ( f . remoteStorage . Close ( ) )
}
// Return whether the underlying remote storage is offline
// and, if so, try to reconnect at regular intervals to
// restore the connection
func ( f * retryStorage ) IsOffline ( ) bool {
// Check if offline and whether enough time has lapsed since most recent check
if f . offline && UTCNow ( ) . Sub ( f . offlineTimestamp ) >= globalStorageHealthCheckInterval {
f . offlineTimestamp = UTCNow ( ) // reset timestamp
if e := f . reInit ( nil ) ; e == nil {
// Connection has been re-established
f . offline = false // Mark node as back online
}
}
return f . offline
}
// DiskInfo - a retryable implementation of disk info.
func ( f retryStorage ) DiskInfo ( ) ( info disk . Info , err error ) {
func ( f * retryStorage ) DiskInfo ( ) ( info disk . Info , err error ) {
if f . IsOffline ( ) {
return info , errDiskNotFound
}
info , err = f . remoteStorage . DiskInfo ( )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . DiskInfo ( )
}
if f . reInitUponDiskNotFound ( err ) {
info , err = f . remoteStorage . DiskInfo ( )
return info , retryToStorageErr ( err )
}
return info , err
return info , retryToStorageErr ( err )
}
// MakeVol - a retryable implementation of creating a volume.
func ( f retryStorage ) MakeVol ( volume string ) ( err error ) {
func ( f * retryStorage ) MakeVol ( volume string ) ( err error ) {
if f . IsOffline ( ) {
return errDiskNotFound
}
err = f . remoteStorage . MakeVol ( volume )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . MakeVol ( volume )
}
if f . reInitUponDiskNotFound ( err ) {
return retryToStorageErr ( f . remoteStorage . MakeVol ( volume ) )
}
return err
return retryToStorageErr ( err )
}
// ListVols - a retryable implementation of listing all the volumes.
func ( f retryStorage ) ListVols ( ) ( vols [ ] VolInfo , err error ) {
func ( f * retryStorage ) ListVols ( ) ( vols [ ] VolInfo , err error ) {
if f . IsOffline ( ) {
return vols , errDiskNotFound
}
vols , err = f . remoteStorage . ListVols ( )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . ListVols ( )
}
if f . reInitUponDiskNotFound ( err ) {
vols , err = f . remoteStorage . ListVols ( )
return vols , retryToStorageErr ( err )
}
return vols , err
return vols , retryToStorageErr ( err )
}
// StatVol - a retryable implementation of stating a volume.
func ( f retryStorage ) StatVol ( volume string ) ( vol VolInfo , err error ) {
func ( f * retryStorage ) StatVol ( volume string ) ( vol VolInfo , err error ) {
if f . IsOffline ( ) {
return vol , errDiskNotFound
}
vol , err = f . remoteStorage . StatVol ( volume )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . StatVol ( volume )
}
if f . reInitUponDiskNotFound ( err ) {
vol , err = f . remoteStorage . StatVol ( volume )
return vol , retryToStorageErr ( err )
}
return vol , err
return vol , retryToStorageErr ( err )
}
// DeleteVol - a retryable implementation of deleting a volume.
func ( f retryStorage ) DeleteVol ( volume string ) ( err error ) {
func ( f * retryStorage ) DeleteVol ( volume string ) ( err error ) {
if f . IsOffline ( ) {
return errDiskNotFound
}
err = f . remoteStorage . DeleteVol ( volume )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . DeleteVol ( volume )
}
if f . reInitUponDiskNotFound ( err ) {
return retryToStorageErr ( f . remoteStorage . DeleteVol ( volume ) )
}
return err
return retryToStorageErr ( err )
}
// PrepareFile - a retryable implementation of preparing a file.
func ( f retryStorage ) PrepareFile ( volume , path string , length int64 ) ( err error ) {
func ( f * retryStorage ) PrepareFile ( volume , path string , length int64 ) ( err error ) {
if f . IsOffline ( ) {
return errDiskNotFound
}
err = f . remoteStorage . PrepareFile ( volume , path , length )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . PrepareFile ( volume , path , length )
}
if f . reInitUponDiskNotFound ( err ) {
return retryToStorageErr ( f . remoteStorage . PrepareFile ( volume , path , length ) )
}
return err
return retryToStorageErr ( err )
}
// AppendFile - a retryable implementation of append to a file.
func ( f retryStorage ) AppendFile ( volume , path string , buffer [ ] byte ) ( err error ) {
func ( f * retryStorage ) AppendFile ( volume , path string , buffer [ ] byte ) ( err error ) {
if f . IsOffline ( ) {
return errDiskNotFound
}
err = f . remoteStorage . AppendFile ( volume , path , buffer )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . AppendFile ( volume , path , buffer )
}
if f . reInitUponDiskNotFound ( err ) {
return retryToStorageErr ( f . remoteStorage . AppendFile ( volume , path , buffer ) )
}
return err
return retryToStorageErr ( err )
}
// StatFile - a retryable implementation of stating a file.
func ( f retryStorage ) StatFile ( volume , path string ) ( fileInfo FileInfo , err error ) {
func ( f * retryStorage ) StatFile ( volume , path string ) ( fileInfo FileInfo , err error ) {
if f . IsOffline ( ) {
return fileInfo , errDiskNotFound
}
fileInfo , err = f . remoteStorage . StatFile ( volume , path )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . StatFile ( volume , path )
}
if f . reInitUponDiskNotFound ( err ) {
fileInfo , err = f . remoteStorage . StatFile ( volume , path )
return fileInfo , retryToStorageErr ( err )
}
return fileInfo , err
return fileInfo , retryToStorageErr ( err )
}
// ReadAll - a retryable implementation of reading all the content from a file.
func ( f retryStorage ) ReadAll ( volume , path string ) ( buf [ ] byte , err error ) {
func ( f * retryStorage ) ReadAll ( volume , path string ) ( buf [ ] byte , err error ) {
if f . IsOffline ( ) {
return buf , errDiskNotFound
}
buf , err = f . remoteStorage . ReadAll ( volume , path )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . ReadAll ( volume , path )
}
if f . reInitUponDiskNotFound ( err ) {
buf , err = f . remoteStorage . ReadAll ( volume , path )
return buf , retryToStorageErr ( err )
}
return buf , err
return buf , retryToStorageErr ( err )
}
// ReadFile - a retryable implementation of reading at offset from a file.
func ( f retryStorage ) ReadFile ( volume , path string , offset int64 , buffer [ ] byte ) ( m int64 , err error ) {
func ( f * retryStorage ) ReadFile ( volume , path string , offset int64 , buffer [ ] byte ) ( m int64 , err error ) {
if f . IsOffline ( ) {
return m , errDiskNotFound
}
m , err = f . remoteStorage . ReadFile ( volume , path , offset , buffer )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . ReadFile ( volume , path , offset , buffer )
}
if f . reInitUponDiskNotFound ( err ) {
m , err = f . remoteStorage . ReadFile ( volume , path , offset , buffer )
return m , retryToStorageErr ( err )
}
return m , err
return m , retryToStorageErr ( err )
}
// ReadFileWithVerify - a retryable implementation of reading at
// offset from a file with verification.
func ( f retryStorage ) ReadFileWithVerify ( volume , path string , offset int64 , buffer [ ] byte ,
func ( f * retryStorage ) ReadFileWithVerify ( volume , path string , offset int64 , buffer [ ] byte ,
algo HashAlgo , expectedHash string ) ( m int64 , err error ) {
if f . IsOffline ( ) {
return m , errDiskNotFound
}
m , err = f . remoteStorage . ReadFileWithVerify ( volume , path , offset , buffer ,
algo , expectedHash )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . ReadFileWithVerify ( volume , path ,
offset , buffer , algo , expectedHash )
}
if f . reInitUponDiskNotFound ( err ) {
m , err = f . remoteStorage . ReadFileWithVerify ( volume , path ,
offset , buffer , algo , expectedHash )
return m , retryToStorageErr ( err )
}
return m , err
return m , retryToStorageErr ( err )
}
// ListDir - a retryable implementation of listing directory entries.
func ( f retryStorage ) ListDir ( volume , path string ) ( entries [ ] string , err error ) {
func ( f * retryStorage ) ListDir ( volume , path string ) ( entries [ ] string , err error ) {
if f . IsOffline ( ) {
return entries , errDiskNotFound
}
entries , err = f . remoteStorage . ListDir ( volume , path )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . ListDir ( volume , path )
}
if f . reInitUponDiskNotFound ( err ) {
entries , err = f . remoteStorage . ListDir ( volume , path )
return entries , retryToStorageErr ( err )
}
return entries , err
return entries , retryToStorageErr ( err )
}
// DeleteFile - a retryable implementation of deleting a file.
func ( f retryStorage ) DeleteFile ( volume , path string ) ( err error ) {
func ( f * retryStorage ) DeleteFile ( volume , path string ) ( err error ) {
if f . IsOffline ( ) {
return errDiskNotFound
}
err = f . remoteStorage . DeleteFile ( volume , path )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . DeleteFile ( volume , path )
}
if f . reInitUponDiskNotFound ( err ) {
return retryToStorageErr ( f . remoteStorage . DeleteFile ( volume , path ) )
}
return err
return retryToStorageErr ( err )
}
// RenameFile - a retryable implementation of renaming a file.
func ( f retryStorage ) RenameFile ( srcVolume , srcPath , dstVolume , dstPath string ) ( err error ) {
func ( f * retryStorage ) RenameFile ( srcVolume , srcPath , dstVolume , dstPath string ) ( err error ) {
if f . IsOffline ( ) {
return errDiskNotFound
}
err = f . remoteStorage . RenameFile ( srcVolume , srcPath , dstVolume , dstPath )
if err == errDiskNotFound {
err = f . reInit ( )
if err == nil {
return f . remoteStorage . RenameFile ( srcVolume , srcPath , dstVolume , dstPath )
}
if f . reInitUponDiskNotFound ( err ) {
return retryToStorageErr ( f . remoteStorage . RenameFile ( srcVolume , srcPath , dstVolume , dstPath ) )
}
return err
return retryToStorageErr ( err )
}
// Try to reinitialize the connection when we have some form of DiskNotFound error
func ( f * retryStorage ) reInitUponDiskNotFound ( err error ) bool {
if err == errDiskNotFound || err == errDiskNotFoundFromNetError || err == errDiskNotFoundFromRPCShutdown {
return f . reInit ( err ) == nil
}
return false
}
// Connect and attempt to load the format from a disconnected node,
// attempts three times before giving up.
func ( f retryStorage ) reInit ( ) ( err error ) {
func ( f * retryStorage ) reInit ( e error ) ( err error ) {
// Only after initialization and minimum of one interval
// has passed (to prevent marking a node as offline right
// after initialization), check whether node has gone offline
if f . maxRetryAttempts == globalStorageRetryThreshold &&
UTCNow ( ) . Sub ( f . offlineTimestamp ) >= globalStorageHealthCheckInterval {
if e == errDiskNotFoundFromNetError { // Make node offline due to network error
f . offline = true // Marking node offline
f . offlineTimestamp = UTCNow ( )
return errDiskNotFound
}
// Continue for other errors like RPC shutdown (and retry connection below)
}
// Close the underlying connection.
f . remoteStorage . Close ( ) // Error here is purposefully ignored.