XL: Bring in support for object versions written during writeQuorum. (#1762)
Erasure is initialized as needed depending on the quorum and onlineDisks. This way we can manage the quorum at the object layer.master
parent
cae4782973
commit
553fdb9211
@ -0,0 +1,180 @@ |
||||
package main |
||||
|
||||
import ( |
||||
"path" |
||||
"sync" |
||||
) |
||||
|
||||
// Get the highest integer from a given integer slice.
|
||||
func highestInt(intSlice []int64) (highestInteger int64) { |
||||
highestInteger = int64(1) |
||||
for _, integer := range intSlice { |
||||
if highestInteger < integer { |
||||
highestInteger = integer |
||||
} |
||||
} |
||||
return highestInteger |
||||
} |
||||
|
||||
// Extracts objects versions from xlMetaV1 slice and returns version slice.
|
||||
func listObjectVersions(partsMetadata []xlMetaV1, errs []error) (versions []int64) { |
||||
versions = make([]int64, len(partsMetadata)) |
||||
for index, metadata := range partsMetadata { |
||||
if errs[index] == nil { |
||||
versions[index] = metadata.Stat.Version |
||||
} else { |
||||
versions[index] = -1 |
||||
} |
||||
} |
||||
return versions |
||||
} |
||||
|
||||
// Reads all `xl.json` metadata as a xlMetaV1 slice.
|
||||
// Returns error slice indicating the failed metadata reads.
|
||||
func (xl xlObjects) readAllXLMetadata(bucket, object string) ([]xlMetaV1, []error) { |
||||
errs := make([]error, len(xl.storageDisks)) |
||||
metadataArray := make([]xlMetaV1, len(xl.storageDisks)) |
||||
xlMetaPath := path.Join(object, xlMetaJSONFile) |
||||
var wg = &sync.WaitGroup{} |
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
offset := int64(0) |
||||
metadataReader, err := disk.ReadFile(bucket, xlMetaPath, offset) |
||||
if err != nil { |
||||
errs[index] = err |
||||
return |
||||
} |
||||
defer metadataReader.Close() |
||||
|
||||
_, err = metadataArray[index].ReadFrom(metadataReader) |
||||
if err != nil { |
||||
// Unable to parse xl.json, set error.
|
||||
errs[index] = err |
||||
return |
||||
} |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all the routines to finish.
|
||||
wg.Wait() |
||||
|
||||
// Return all the metadata.
|
||||
return metadataArray, errs |
||||
} |
||||
|
||||
// error based on total errors and read quorum.
|
||||
func (xl xlObjects) reduceError(errs []error) error { |
||||
fileNotFoundCount := 0 |
||||
longNameCount := 0 |
||||
diskNotFoundCount := 0 |
||||
volumeNotFoundCount := 0 |
||||
diskAccessDeniedCount := 0 |
||||
for _, err := range errs { |
||||
if err == errFileNotFound { |
||||
fileNotFoundCount++ |
||||
} else if err == errFileNameTooLong { |
||||
longNameCount++ |
||||
} else if err == errDiskNotFound { |
||||
diskNotFoundCount++ |
||||
} else if err == errVolumeAccessDenied { |
||||
diskAccessDeniedCount++ |
||||
} else if err == errVolumeNotFound { |
||||
volumeNotFoundCount++ |
||||
} |
||||
} |
||||
// If we have errors with 'file not found' greater than
|
||||
// readQuorum, return as errFileNotFound.
|
||||
// else if we have errors with 'volume not found'
|
||||
// greater than readQuorum, return as errVolumeNotFound.
|
||||
if fileNotFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return errFileNotFound |
||||
} else if longNameCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return errFileNameTooLong |
||||
} else if volumeNotFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return errVolumeNotFound |
||||
} |
||||
// If we have errors with disk not found equal to the
|
||||
// number of disks, return as errDiskNotFound.
|
||||
if diskNotFoundCount == len(xl.storageDisks) { |
||||
return errDiskNotFound |
||||
} else if diskNotFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
// If we have errors with 'disk not found'
|
||||
// greater than readQuorum, return as errFileNotFound.
|
||||
return errFileNotFound |
||||
} |
||||
// If we have errors with disk not found equal to the
|
||||
// number of disks, return as errDiskNotFound.
|
||||
if diskAccessDeniedCount == len(xl.storageDisks) { |
||||
return errVolumeAccessDenied |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// Similar to 'len(slice)' but returns the actualelements count
|
||||
// skipping the unallocated elements.
|
||||
func diskCount(disks []StorageAPI) int { |
||||
diskCount := 0 |
||||
for _, disk := range disks { |
||||
if disk == nil { |
||||
continue |
||||
} |
||||
diskCount++ |
||||
} |
||||
return diskCount |
||||
} |
||||
|
||||
func (xl xlObjects) shouldHeal(onlineDisks []StorageAPI) (heal bool) { |
||||
onlineDiskCount := diskCount(onlineDisks) |
||||
// If online disks count is lesser than configured disks, most
|
||||
// probably we need to heal the file, additionally verify if the
|
||||
// count is lesser than readQuorum, if not we throw an error.
|
||||
if onlineDiskCount < len(xl.storageDisks) { |
||||
// Online disks lesser than total storage disks, needs to be
|
||||
// healed. unless we do not have readQuorum.
|
||||
heal = true |
||||
// Verify if online disks count are lesser than readQuorum
|
||||
// threshold, return an error.
|
||||
if onlineDiskCount < xl.readQuorum { |
||||
errorIf(errReadQuorum, "Unable to establish read quorum, disks are offline.") |
||||
return false |
||||
} |
||||
} |
||||
return heal |
||||
} |
||||
|
||||
// Returns slice of online disks needed.
|
||||
// - slice returing readable disks.
|
||||
// - xlMetaV1
|
||||
// - bool value indicating if healing is needed.
|
||||
// - error if any.
|
||||
func (xl xlObjects) listOnlineDisks(bucket, object string) (onlineDisks []StorageAPI, version int64, err error) { |
||||
onlineDisks = make([]StorageAPI, len(xl.storageDisks)) |
||||
partsMetadata, errs := xl.readAllXLMetadata(bucket, object) |
||||
if err = xl.reduceError(errs); err != nil { |
||||
if err == errFileNotFound { |
||||
// For file not found, treat as if disks are available
|
||||
// return all the configured ones.
|
||||
onlineDisks = xl.storageDisks |
||||
return onlineDisks, 1, nil |
||||
} |
||||
return nil, 0, err |
||||
} |
||||
highestVersion := int64(0) |
||||
// List all the file versions from partsMetadata list.
|
||||
versions := listObjectVersions(partsMetadata, errs) |
||||
|
||||
// Get highest object version.
|
||||
highestVersion = highestInt(versions) |
||||
|
||||
// Pick online disks with version set to highestVersion.
|
||||
for index, version := range versions { |
||||
if version == highestVersion { |
||||
onlineDisks[index] = xl.storageDisks[index] |
||||
} else { |
||||
onlineDisks[index] = nil |
||||
} |
||||
} |
||||
return onlineDisks, highestVersion, nil |
||||
} |
@ -0,0 +1,85 @@ |
||||
package main |
||||
|
||||
import ( |
||||
"path" |
||||
"sync" |
||||
) |
||||
|
||||
// This function does the following check, suppose
|
||||
// object is "a/b/c/d", stat makes sure that objects ""a/b/c""
|
||||
// "a/b" and "a" do not exist.
|
||||
func (xl xlObjects) parentDirIsObject(bucket, parent string) bool { |
||||
var isParentDirObject func(string) bool |
||||
isParentDirObject = func(p string) bool { |
||||
if p == "." { |
||||
return false |
||||
} |
||||
if xl.isObject(bucket, p) { |
||||
// If there is already a file at prefix "p" return error.
|
||||
return true |
||||
} |
||||
// Check if there is a file as one of the parent paths.
|
||||
return isParentDirObject(path.Dir(p)) |
||||
} |
||||
return isParentDirObject(parent) |
||||
} |
||||
|
||||
func (xl xlObjects) isObject(bucket, prefix string) bool { |
||||
// Create errs and volInfo slices of storageDisks size.
|
||||
var errs = make([]error, len(xl.storageDisks)) |
||||
|
||||
// Allocate a new waitgroup.
|
||||
var wg = &sync.WaitGroup{} |
||||
for index, disk := range xl.storageDisks { |
||||
wg.Add(1) |
||||
// Stat file on all the disks in a routine.
|
||||
go func(index int, disk StorageAPI) { |
||||
defer wg.Done() |
||||
_, err := disk.StatFile(bucket, path.Join(prefix, xlMetaJSONFile)) |
||||
if err != nil { |
||||
errs[index] = err |
||||
return |
||||
} |
||||
errs[index] = nil |
||||
}(index, disk) |
||||
} |
||||
|
||||
// Wait for all the Stat operations to finish.
|
||||
wg.Wait() |
||||
|
||||
var errFileNotFoundCount int |
||||
for _, err := range errs { |
||||
if err != nil { |
||||
if err == errFileNotFound { |
||||
errFileNotFoundCount++ |
||||
// If we have errors with file not found greater than allowed read
|
||||
// quorum we return err as errFileNotFound.
|
||||
if errFileNotFoundCount > len(xl.storageDisks)-xl.readQuorum { |
||||
return false |
||||
} |
||||
continue |
||||
} |
||||
errorIf(err, "Unable to access file "+path.Join(bucket, prefix)) |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// statPart - stat a part file.
|
||||
func (xl xlObjects) statPart(bucket, objectPart string) (fileInfo FileInfo, err error) { |
||||
// Count for errors encountered.
|
||||
var xlJSONErrCount = 0 |
||||
|
||||
// Return the first success entry based on the selected random disk.
|
||||
for xlJSONErrCount < len(xl.storageDisks) { |
||||
// Choose a random disk on each attempt, do not hit the same disk all the time.
|
||||
disk := xl.getRandomDisk() // Pick a random disk.
|
||||
fileInfo, err = disk.StatFile(bucket, objectPart) |
||||
if err == nil { |
||||
return fileInfo, nil |
||||
} |
||||
xlJSONErrCount++ // Update error count.
|
||||
} |
||||
return FileInfo{}, err |
||||
} |
Loading…
Reference in new issue