@ -65,38 +65,6 @@ func isSuccessDataBlocks(enBlocks [][]byte, dataBlocks int) bool {
return successDataBlocksCount >= dataBlocks
return successDataBlocksCount >= dataBlocks
}
}
// Return ordered partsMetadata depeinding on distribution.
func getOrderedPartsMetadata ( distribution [ ] int , partsMetadata [ ] xlMetaV1 ) ( orderedPartsMetadata [ ] xlMetaV1 ) {
orderedPartsMetadata = make ( [ ] xlMetaV1 , len ( partsMetadata ) )
for index := range partsMetadata {
blockIndex := distribution [ index ]
orderedPartsMetadata [ blockIndex - 1 ] = partsMetadata [ index ]
}
return orderedPartsMetadata
}
// getOrderedDisks - get ordered disks from erasure distribution.
// returns ordered slice of disks from their actual distribution.
func getOrderedDisks ( distribution [ ] int , disks [ ] StorageAPI ) ( orderedDisks [ ] StorageAPI ) {
orderedDisks = make ( [ ] StorageAPI , len ( disks ) )
// From disks gets ordered disks.
for index := range disks {
blockIndex := distribution [ index ]
orderedDisks [ blockIndex - 1 ] = disks [ index ]
}
return orderedDisks
}
// Return ordered CheckSums depending on the distribution.
func getOrderedCheckSums ( distribution [ ] int , blockCheckSums [ ] checkSumInfo ) ( orderedBlockCheckSums [ ] checkSumInfo ) {
orderedBlockCheckSums = make ( [ ] checkSumInfo , len ( blockCheckSums ) )
for index := range blockCheckSums {
blockIndex := distribution [ index ]
orderedBlockCheckSums [ blockIndex - 1 ] = blockCheckSums [ index ]
}
return orderedBlockCheckSums
}
// Return readable disks slice from which we can read parallelly.
// Return readable disks slice from which we can read parallelly.
func getReadDisks ( orderedDisks [ ] StorageAPI , index int , dataBlocks int ) ( readDisks [ ] StorageAPI , nextIndex int , err error ) {
func getReadDisks ( orderedDisks [ ] StorageAPI , index int , dataBlocks int ) ( readDisks [ ] StorageAPI , nextIndex int , err error ) {
readDisks = make ( [ ] StorageAPI , len ( orderedDisks ) )
readDisks = make ( [ ] StorageAPI , len ( orderedDisks ) )
@ -192,27 +160,16 @@ func parallelRead(volume, path string, readDisks []StorageAPI, orderedDisks []St
// are decoded into a data block. Data block is trimmed for given offset and length,
// are decoded into a data block. Data block is trimmed for given offset and length,
// then written to given writer. This function also supports bit-rot detection by
// then written to given writer. This function also supports bit-rot detection by
// verifying checksum of individual block's checksum.
// verifying checksum of individual block's checksum.
func erasureReadFile ( writer io . Writer , disks [ ] StorageAPI , volume string , path string , partName string , eInfos [ ] erasureInfo , offset int64 , length int64 , totalLength int64 ) ( int64 , error ) {
func erasureReadFile ( writer io . Writer , disks [ ] StorageAPI , volume string , path string , offset int64 , length int64 , totalLength int64 , blockSize int64 , dataBlocks int , parityBlocks int , checkSums [ ] string ) ( int64 , error ) {
// Offset and length cannot be negative.
// Offset and length cannot be negative.
if offset < 0 || length < 0 {
if offset < 0 || length < 0 {
return 0 , errUnexpected
return 0 , errUnexpected
}
}
// Pick one erasure info.
eInfo := pickValidErasureInfo ( eInfos )
// Gather previously calculated block checksums.
blockCheckSums := metaPartBlockChecksums ( disks , eInfos , partName )
// []orderedDisks will have first eInfo.DataBlocks disks as data
// disks and rest will be parity.
orderedDisks := getOrderedDisks ( eInfo . Distribution , disks )
orderedBlockCheckSums := getOrderedCheckSums ( eInfo . Distribution , blockCheckSums )
// bitRotVerify verifies if the file on a particular disk doesn't have bitrot
// bitRotVerify verifies if the file on a particular disk doesn't have bitrot
// by verifying the hash of the contents of the file.
// by verifying the hash of the contents of the file.
bitRotVerify := func ( ) func ( diskIndex int ) bool {
bitRotVerify := func ( ) func ( diskIndex int ) bool {
verified := make ( [ ] bool , len ( or deredD isks) )
verified := make ( [ ] bool , len ( disks ) )
// Return closure so that we have reference to []verified and
// Return closure so that we have reference to []verified and
// not recalculate the hash on it every time the function is
// not recalculate the hash on it every time the function is
// called for the same disk.
// called for the same disk.
@ -222,7 +179,7 @@ func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path s
return true
return true
}
}
// Is this a valid block?
// Is this a valid block?
isValid := isValidBlock ( or deredD isks[ diskIndex ] , volume , path , orderedBlo ckC heckSums[ diskIndex ] )
isValid := isValidBlock ( disks [ diskIndex ] , volume , path , checkSums [ diskIndex ] )
verified [ diskIndex ] = isValid
verified [ diskIndex ] = isValid
return isValid
return isValid
}
}
@ -235,32 +192,32 @@ func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path s
// chunkSize is calculated such that chunkSize*DataBlocks accommodates BlockSize bytes.
// chunkSize is calculated such that chunkSize*DataBlocks accommodates BlockSize bytes.
// So chunkSize*DataBlocks can be slightly larger than BlockSize if BlockSize is not divisible by
// So chunkSize*DataBlocks can be slightly larger than BlockSize if BlockSize is not divisible by
// DataBlocks. The extra space will have 0-padding.
// DataBlocks. The extra space will have 0-padding.
chunkSize := getEncodedBlockLen ( eInfo . BlockSize , eInfo . D ataBlocks)
chunkSize := getEncodedBlockLen ( blockSize , d ataBlocks)
// Get start and end block, also bytes to be skipped based on the input offset.
// Get start and end block, also bytes to be skipped based on the input offset.
startBlock , endBlock , bytesToSkip := getBlockInfo ( offset , totalLength , eInfo . B lockSize)
startBlock , endBlock , bytesToSkip := getBlockInfo ( offset , totalLength , b lockSize)
// For each block, read chunk from each disk. If we are able to read all the data disks then we don't
// For each block, read chunk from each disk. If we are able to read all the data disks then we don't
// need to read parity disks. If one of the data disk is missing we need to read DataBlocks+1 number
// need to read parity disks. If one of the data disk is missing we need to read DataBlocks+1 number
// of disks. Once read, we Reconstruct() missing data if needed and write it to the given writer.
// of disks. Once read, we Reconstruct() missing data if needed and write it to the given writer.
for block := startBlock ; bytesWritten < length ; block ++ {
for block := startBlock ; bytesWritten < length ; block ++ {
// Each element of enBlocks holds curChunkSize'd amount of data read from its corresponding disk.
// Each element of enBlocks holds curChunkSize'd amount of data read from its corresponding disk.
enBlocks := make ( [ ] [ ] byte , len ( or deredD isks) )
enBlocks := make ( [ ] [ ] byte , len ( disks ) )
// enBlocks data can have 0-padding hence we need to figure the exact number
// enBlocks data can have 0-padding hence we need to figure the exact number
// of bytes we want to read from enBlocks.
// of bytes we want to read from enBlocks.
blockSize := eInfo . B lockSize
blockSize := b lockSize
// curChunkSize is chunkSize until end block.
// curChunkSize is chunkSize until end block.
curChunkSize := chunkSize
curChunkSize := chunkSize
// We have endBlock, verify if we need to have padding.
// We have endBlock, verify if we need to have padding.
if block == endBlock && ( totalLength % eInfo . B lockSize != 0 ) {
if block == endBlock && ( totalLength % b lockSize != 0 ) {
// If this is the last block and size of the block is < BlockSize.
// If this is the last block and size of the block is < BlockSize.
curChunkSize = getEncodedBlockLen ( totalLength % eInfo . BlockSize , eInfo . D ataBlocks)
curChunkSize = getEncodedBlockLen ( totalLength % blockSize , d ataBlocks)
// For the last block, the block size can be less than BlockSize.
// For the last block, the block size can be less than BlockSize.
blockSize = totalLength % eInfo . B lockSize
blockSize = totalLength % b lockSize
}
}
// Block offset.
// Block offset.
@ -277,25 +234,29 @@ func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path s
// readDisks - disks from which we need to read in parallel.
// readDisks - disks from which we need to read in parallel.
var readDisks [ ] StorageAPI
var readDisks [ ] StorageAPI
var err error
var err error
readDisks , nextIndex , err = getReadDisks ( orderedDisks , nextIndex , eInfo . DataBlocks )
// get readable disks slice from which we can read parallelly.
readDisks , nextIndex , err = getReadDisks ( disks , nextIndex , dataBlocks )
if err != nil {
if err != nil {
return bytesWritten , err
return bytesWritten , err
}
}
parallelRead ( volume , path , readDisks , orderedDisks , enBlocks , blockOffset , curChunkSize , bitRotVerify )
// Issue a parallel read across the disks specified in readDisks.
if isSuccessDecodeBlocks ( enBlocks , eInfo . DataBlocks ) {
parallelRead ( volume , path , readDisks , disks , enBlocks , blockOffset , curChunkSize , bitRotVerify )
if isSuccessDecodeBlocks ( enBlocks , dataBlocks ) {
// If enough blocks are available to do rs.Reconstruct()
// If enough blocks are available to do rs.Reconstruct()
break
break
}
}
if nextIndex == len ( or deredD isks) {
if nextIndex == len ( disks ) {
// No more disks to read from.
// No more disks to read from.
return bytesWritten , errXLReadQuorum
return bytesWritten , errXLReadQuorum
}
}
// We do not have enough enough data blocks to reconstruct the data
// hence continue the for-loop till we have enough data blocks.
}
}
// If we have all the data blocks no need to decode, continue to write.
// If we have all the data blocks no need to decode, continue to write.
if ! isSuccessDataBlocks ( enBlocks , eInfo . D ataBlocks) {
if ! isSuccessDataBlocks ( enBlocks , d ataBlocks) {
// Reconstruct the missing data blocks.
// Reconstruct the missing data blocks.
if err := decodeData ( enBlocks , eInfo . D ataBlocks, eInfo . P arityBlocks) ; err != nil {
if err := decodeData ( enBlocks , d ataBlocks, p arityBlocks) ; err != nil {
return bytesWritten , err
return bytesWritten , err
}
}
}
}
@ -314,7 +275,7 @@ func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path s
}
}
// Write data blocks.
// Write data blocks.
n , err := writeDataBlocks ( writer , enBlocks , eInfo . D ataBlocks, outOffset , outSize )
n , err := writeDataBlocks ( writer , enBlocks , d ataBlocks, outOffset , outSize )
if err != nil {
if err != nil {
return bytesWritten , err
return bytesWritten , err
}
}
@ -337,34 +298,21 @@ func (e erasureInfo) PartObjectChecksum(partName string) checkSumInfo {
return checkSumInfo { }
return checkSumInfo { }
}
}
// xlMetaPartBlockChecksums - get block checksums for a given part.
func metaPartBlockChecksums ( disks [ ] StorageAPI , eInfos [ ] erasureInfo , partName string ) ( blockCheckSums [ ] checkSumInfo ) {
for index := range disks {
if eInfos [ index ] . IsValid ( ) {
// Save the read checksums for a given part.
blockCheckSums = append ( blockCheckSums , eInfos [ index ] . PartObjectChecksum ( partName ) )
} else {
blockCheckSums = append ( blockCheckSums , checkSumInfo { } )
}
}
return blockCheckSums
}
// isValidBlock - calculates the checksum hash for the block and
// isValidBlock - calculates the checksum hash for the block and
// validates if its correct returns true for valid cases, false otherwise.
// validates if its correct returns true for valid cases, false otherwise.
func isValidBlock ( disk StorageAPI , volume , path string , blockCheckSum checkSumInfo ) ( ok bool ) {
func isValidBlock ( disk StorageAPI , volume , path string , checksum string ) ( ok bool ) {
// Disk is not available, not a valid block.
// Disk is not available, not a valid block.
if disk == nil {
if disk == nil {
return false
return false
}
}
// Read everything for a given block and calculate hash.
// Read everything for a given block and calculate hash.
hashWriter := newHash ( blockCheckSum . Algorithm )
hashWriter := newHash ( "blake2b" )
hashBytes , err := hashSum ( disk , volume , path , hashWriter )
hashBytes , err := hashSum ( disk , volume , path , hashWriter )
if err != nil {
if err != nil {
errorIf ( err , "Unable to calculate checksum %s/%s" , volume , path )
errorIf ( err , "Unable to calculate checksum %s/%s" , volume , path )
return false
return false
}
}
return hex . EncodeToString ( hashBytes ) == blockCheckSum . Hash
return hex . EncodeToString ( hashBytes ) == checksum
}
}
// decodeData - decode encoded blocks.
// decodeData - decode encoded blocks.