XL: Fix GetObject erasure decode issues. (#1793)

master
Harshavardhana 9 years ago
parent 5e8de786b3
commit a4a0ea605b
  1. 157
      erasure-readfile.go
  2. 26
      erasure-utils.go
  3. 2
      format-config-v1.go
  4. 2
      fs-v1-metadata.go
  5. 4
      fs-v1-multipart.go
  6. 10
      fs-v1.go
  7. 5
      object-common.go
  8. 2
      xl-v1-healing.go
  9. 9
      xl-v1-metadata.go
  10. 2
      xl-v1-multipart-common.go
  11. 8
      xl-v1-multipart.go
  12. 51
      xl-v1-object.go

@ -16,114 +16,83 @@
package main package main
import ( import "errors"
"bytes"
"errors"
"io"
)
// ReadFile - decoded erasure coded file. // ReadFile - decoded erasure coded file.
func (e erasure) ReadFile(volume, path string, startOffset int64, totalSize int64) (io.Reader, error) { func (e erasure) ReadFile(volume, path string, bufferOffset int64, startOffset int64, buffer []byte) (int64, error) {
var totalLeft = totalSize // Calculate the current encoded block size.
var bufWriter = new(bytes.Buffer) curEncBlockSize := getEncodedBlockLen(int64(len(buffer)), e.DataBlocks)
for totalLeft > 0 { offsetEncOffset := getEncodedBlockLen(startOffset, e.DataBlocks)
// Figure out the right blockSize as it was encoded before.
var curBlockSize int64
if erasureBlockSize < totalLeft {
curBlockSize = erasureBlockSize
} else {
curBlockSize = totalLeft
}
// Calculate the current encoded block size.
curEncBlockSize := getEncodedBlockLen(curBlockSize, e.DataBlocks)
// Allocate encoded blocks up to storage disks. // Allocate encoded blocks up to storage disks.
enBlocks := make([][]byte, len(e.storageDisks)) enBlocks := make([][]byte, len(e.storageDisks))
// Counter to keep success data blocks. // Counter to keep success data blocks.
var successDataBlocksCount = 0 var successDataBlocksCount = 0
var noReconstruct bool // Set for no reconstruction. var noReconstruct bool // Set for no reconstruction.
// Read from all the disks. // Read from all the disks.
for index, disk := range e.storageDisks { for index, disk := range e.storageDisks {
blockIndex := e.distribution[index] - 1 blockIndex := e.distribution[index] - 1
// Initialize shard slice and fill the data from each parts. // Initialize shard slice and fill the data from each parts.
enBlocks[blockIndex] = make([]byte, curEncBlockSize) enBlocks[blockIndex] = make([]byte, curEncBlockSize)
if disk == nil { if disk == nil {
enBlocks[blockIndex] = nil enBlocks[blockIndex] = nil
} else { continue
var offset = int64(0)
// Read the necessary blocks.
_, err := disk.ReadFile(volume, path, offset, enBlocks[blockIndex])
if err != nil {
enBlocks[blockIndex] = nil
}
}
// Verify if we have successfully read all the data blocks.
if blockIndex < e.DataBlocks && enBlocks[blockIndex] != nil {
successDataBlocksCount++
// Set when we have all the data blocks and no
// reconstruction is needed, so that we can avoid
// erasure reconstruction.
noReconstruct = successDataBlocksCount == e.DataBlocks
if noReconstruct {
// Break out we have read all the data blocks.
break
}
}
} }
// Read the necessary blocks.
// Check blocks if they are all zero in length, we have corruption return error. _, err := disk.ReadFile(volume, path, offsetEncOffset, enBlocks[blockIndex])
if checkBlockSize(enBlocks) == 0 { if err != nil {
return nil, errDataCorrupt enBlocks[blockIndex] = nil
} }
// Verify if we have successfully read all the data blocks.
// Verify if reconstruction is needed, proceed with reconstruction. if blockIndex < e.DataBlocks && enBlocks[blockIndex] != nil {
if !noReconstruct { successDataBlocksCount++
err := e.ReedSolomon.Reconstruct(enBlocks) // Set when we have all the data blocks and no
if err != nil { // reconstruction is needed, so that we can avoid
return nil, err // erasure reconstruction.
} noReconstruct = successDataBlocksCount == e.DataBlocks
// Verify reconstructed blocks (parity). if noReconstruct {
ok, err := e.ReedSolomon.Verify(enBlocks) // Break out we have read all the data blocks.
if err != nil { break
return nil, err
}
if !ok {
// Blocks cannot be reconstructed, corrupted data.
err = errors.New("Verification failed after reconstruction, data likely corrupted.")
return nil, err
} }
} }
}
// Get data blocks from encoded blocks. // Check blocks if they are all zero in length, we have corruption return error.
dataBlocks := getDataBlocks(enBlocks, e.DataBlocks, int(curBlockSize)) if checkBlockSize(enBlocks) == 0 {
return 0, errDataCorrupt
}
// Verify if the offset is right for the block, if not move to // Verify if reconstruction is needed, proceed with reconstruction.
// the next block. if !noReconstruct {
if startOffset > 0 { err := e.ReedSolomon.Reconstruct(enBlocks)
startOffset = startOffset - int64(len(dataBlocks)) if err != nil {
// Start offset is greater than or equal to zero, skip the dataBlocks. return 0, err
if startOffset >= 0 {
totalLeft = totalLeft - erasureBlockSize
continue
}
// Now get back the remaining offset if startOffset is negative.
startOffset = startOffset + int64(len(dataBlocks))
} }
// Verify reconstructed blocks (parity).
// Copy data blocks. ok, err := e.ReedSolomon.Verify(enBlocks)
_, err := bufWriter.Write(dataBlocks[startOffset:])
if err != nil { if err != nil {
return nil, err return 0, err
} }
if !ok {
// Blocks cannot be reconstructed, corrupted data.
err = errors.New("Verification failed after reconstruction, data likely corrupted.")
return 0, err
}
}
// Reset dataBlocks to relenquish memory. // Get data blocks from encoded blocks.
dataBlocks = nil dataBlocks, err := getDataBlocks(enBlocks, e.DataBlocks, len(buffer))
if err != nil {
// Save what's left after reading erasureBlockSize. return 0, err
totalLeft = totalLeft - erasureBlockSize
} }
return bufWriter, nil
// Copy data blocks.
copy(buffer, dataBlocks[bufferOffset:])
// Relenquish memory.
dataBlocks = nil
return int64(len(buffer)), nil
} }

@ -16,13 +16,31 @@
package main package main
import "github.com/klauspost/reedsolomon"
// getDataBlocks - fetches the data block only part of the input encoded blocks. // getDataBlocks - fetches the data block only part of the input encoded blocks.
func getDataBlocks(enBlocks [][]byte, dataBlocks int, curBlockSize int) (data []byte) { func getDataBlocks(enBlocks [][]byte, dataBlocks int, curBlockSize int) (data []byte, err error) {
for _, block := range enBlocks[:dataBlocks] { if len(enBlocks) < dataBlocks {
return nil, reedsolomon.ErrTooFewShards
}
size := 0
blocks := enBlocks[:dataBlocks]
for _, block := range blocks {
size += len(block)
}
if size < curBlockSize {
return nil, reedsolomon.ErrShortData
}
write := curBlockSize
for _, block := range blocks {
if write < len(block) {
data = append(data, block[:write]...)
return data, nil
}
data = append(data, block...) data = append(data, block...)
write -= len(block)
} }
data = data[:curBlockSize] return data, nil
return data
} }
// checkBlockSize return the size of a single block. // checkBlockSize return the size of a single block.

@ -115,7 +115,7 @@ func reorderDisks(bootstrapDisks []StorageAPI, formatConfigs []*formatConfigV1)
// loadFormat - load format from disk. // loadFormat - load format from disk.
func loadFormat(disk StorageAPI) (format *formatConfigV1, err error) { func loadFormat(disk StorageAPI) (format *formatConfigV1, err error) {
buffer := make([]byte, blockSize) buffer := make([]byte, blockSizeV1)
offset := int64(0) offset := int64(0)
var n int64 var n int64
n, err = disk.ReadFile(minioMetaBucket, formatConfigFile, offset, buffer) n, err = disk.ReadFile(minioMetaBucket, formatConfigFile, offset, buffer)

@ -57,7 +57,7 @@ func (m *fsMetaV1) AddObjectPart(partNumber int, partName string, partETag strin
// readFSMetadata - returns the object metadata `fs.json` content. // readFSMetadata - returns the object metadata `fs.json` content.
func (fs fsObjects) readFSMetadata(bucket, object string) (fsMeta fsMetaV1, err error) { func (fs fsObjects) readFSMetadata(bucket, object string) (fsMeta fsMetaV1, err error) {
buffer := make([]byte, blockSize) buffer := make([]byte, blockSizeV1)
n, err := fs.storage.ReadFile(bucket, path.Join(object, fsMetaJSONFile), int64(0), buffer) n, err := fs.storage.ReadFile(bucket, path.Join(object, fsMetaJSONFile), int64(0), buffer)
if err != nil { if err != nil {
return fsMetaV1{}, err return fsMetaV1{}, err

@ -305,7 +305,7 @@ func (fs fsObjects) putObjectPartCommon(bucket string, object string, uploadID s
// Initialize md5 writer. // Initialize md5 writer.
md5Writer := md5.New() md5Writer := md5.New()
var buf = make([]byte, blockSize) var buf = make([]byte, blockSizeV1)
for { for {
n, err := io.ReadFull(data, buf) n, err := io.ReadFull(data, buf)
if err == io.EOF { if err == io.EOF {
@ -476,7 +476,7 @@ func (fs fsObjects) CompleteMultipartUpload(bucket string, object string, upload
} }
tempObj := path.Join(tmpMetaPrefix, uploadID, "object1") tempObj := path.Join(tmpMetaPrefix, uploadID, "object1")
var buffer = make([]byte, blockSize) var buffer = make([]byte, blockSizeV1)
// Loop through all parts, validate them and then commit to disk. // Loop through all parts, validate them and then commit to disk.
for i, part := range parts { for i, part := range parts {

@ -160,8 +160,8 @@ func (fs fsObjects) GetObject(bucket, object string, startOffset int64, length i
for totalLeft > 0 { for totalLeft > 0 {
// Figure out the right blockSize as it was encoded before. // Figure out the right blockSize as it was encoded before.
var curBlockSize int64 var curBlockSize int64
if blockSize < totalLeft { if blockSizeV1 < totalLeft {
curBlockSize = blockSize curBlockSize = blockSizeV1
} else { } else {
curBlockSize = totalLeft curBlockSize = totalLeft
} }
@ -212,10 +212,6 @@ func (fs fsObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) {
}, nil }, nil
} }
const (
blockSize = 4 * 1024 * 1024 // 4MiB.
)
// PutObject - create an object. // PutObject - create an object.
func (fs fsObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) { func (fs fsObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) {
// Verify if bucket is valid. // Verify if bucket is valid.
@ -245,7 +241,7 @@ func (fs fsObjects) PutObject(bucket string, object string, size int64, data io.
} }
} else { } else {
// Allocate buffer. // Allocate buffer.
buf := make([]byte, blockSize) buf := make([]byte, blockSizeV1)
for { for {
n, rErr := data.Read(buf) n, rErr := data.Read(buf)
if rErr == io.EOF { if rErr == io.EOF {

@ -21,6 +21,11 @@ import (
"sync" "sync"
) )
const (
// Block size used for all internal operations version 1.
blockSizeV1 = 10 * 1024 * 1024 // 10MiB.
)
// Common initialization needed for both object layers. // Common initialization needed for both object layers.
func initObjectLayer(storageDisks ...StorageAPI) error { func initObjectLayer(storageDisks ...StorageAPI) error {
// This happens for the first time, but keep this here since this // This happens for the first time, but keep this here since this

@ -42,7 +42,7 @@ func (xl xlObjects) readAllXLMetadata(bucket, object string) ([]xlMetaV1, []erro
go func(index int, disk StorageAPI) { go func(index int, disk StorageAPI) {
defer wg.Done() defer wg.Done()
offset := int64(0) offset := int64(0)
var buffer = make([]byte, blockSize) var buffer = make([]byte, blockSizeV1)
n, err := disk.ReadFile(bucket, xlMetaPath, offset, buffer) n, err := disk.ReadFile(bucket, xlMetaPath, offset, buffer)
if err != nil { if err != nil {
errs[index] = err errs[index] = err

@ -25,9 +25,8 @@ import (
"time" "time"
) )
// Erasure block size.
const ( const (
erasureBlockSize = 4 * 1024 * 1024 // 4MiB. // Erasure related constants.
erasureAlgorithmKlauspost = "klauspost/reedsolomon/vandermonde" erasureAlgorithmKlauspost = "klauspost/reedsolomon/vandermonde"
erasureAlgorithmISAL = "isa-l/reedsolomon/cauchy" erasureAlgorithmISAL = "isa-l/reedsolomon/cauchy"
) )
@ -140,8 +139,8 @@ func (xl xlObjects) readXLMetadata(bucket, object string) (xlMeta xlMetaV1, err
// Count for errors encountered. // Count for errors encountered.
var xlJSONErrCount = 0 var xlJSONErrCount = 0
// Allocate 4MiB buffer. // Allocate 10MiB buffer.
buffer := make([]byte, blockSize) buffer := make([]byte, blockSizeV1)
// Return the first successful lookup from a random list of disks. // Return the first successful lookup from a random list of disks.
for xlJSONErrCount < len(xl.storageDisks) { for xlJSONErrCount < len(xl.storageDisks) {
@ -168,7 +167,7 @@ func newXLMetaV1(dataBlocks, parityBlocks int) (xlMeta xlMetaV1) {
xlMeta.Erasure.Algorithm = erasureAlgorithmKlauspost xlMeta.Erasure.Algorithm = erasureAlgorithmKlauspost
xlMeta.Erasure.DataBlocks = dataBlocks xlMeta.Erasure.DataBlocks = dataBlocks
xlMeta.Erasure.ParityBlocks = parityBlocks xlMeta.Erasure.ParityBlocks = parityBlocks
xlMeta.Erasure.BlockSize = erasureBlockSize xlMeta.Erasure.BlockSize = blockSizeV1
xlMeta.Erasure.Distribution = randErasureDistribution(dataBlocks + parityBlocks) xlMeta.Erasure.Distribution = randErasureDistribution(dataBlocks + parityBlocks)
return xlMeta return xlMeta
} }

@ -81,7 +81,7 @@ func readUploadsJSON(bucket, object string, storageDisks ...StorageAPI) (uploadI
// Read `uploads.json` in a routine. // Read `uploads.json` in a routine.
go func(index int, disk StorageAPI) { go func(index int, disk StorageAPI) {
defer wg.Done() defer wg.Done()
var buffer = make([]byte, blockSize) var buffer = make([]byte, blockSizeV1) // Allocate blockSized buffer.
n, rErr := disk.ReadFile(minioMetaBucket, uploadJSONPath, int64(0), buffer) n, rErr := disk.ReadFile(minioMetaBucket, uploadJSONPath, int64(0), buffer)
if rErr != nil { if rErr != nil {
errs[index] = rErr errs[index] = rErr

@ -146,7 +146,10 @@ func (xl xlObjects) putObjectPartCommon(bucket string, object string, uploadID s
// Initialize md5 writer. // Initialize md5 writer.
md5Writer := md5.New() md5Writer := md5.New()
buf := make([]byte, blockSize) // Allocate blocksized buffer for reading.
buf := make([]byte, blockSizeV1)
// Read until io.EOF, fill the allocated buf.
for { for {
var n int var n int
n, err = io.ReadFull(data, buf) n, err = io.ReadFull(data, buf)
@ -167,6 +170,8 @@ func (xl xlObjects) putObjectPartCommon(bucket string, object string, uploadID s
return "", toObjectErr(errUnexpected, bucket, object) return "", toObjectErr(errUnexpected, bucket, object)
} }
} }
// Calculate new md5sum.
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil)) newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
if md5Hex != "" { if md5Hex != "" {
if newMD5Hex != md5Hex { if newMD5Hex != md5Hex {
@ -174,6 +179,7 @@ func (xl xlObjects) putObjectPartCommon(bucket string, object string, uploadID s
} }
} }
// Rename temporary part file to its final location.
partPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix) partPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
err = xl.renameObject(minioMetaBucket, tmpPartPath, minioMetaBucket, partPath) err = xl.renameObject(minioMetaBucket, tmpPartPath, minioMetaBucket, partPath)
if err != nil { if err != nil {

@ -1,7 +1,6 @@
package main package main
import ( import (
"bytes"
"crypto/md5" "crypto/md5"
"encoding/hex" "encoding/hex"
"io" "io"
@ -51,27 +50,42 @@ func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length i
if err != nil { if err != nil {
return toObjectErr(err, bucket, object) return toObjectErr(err, bucket, object)
} }
totalLeft := length
for ; partIndex < len(xlMeta.Parts); partIndex++ { for ; partIndex < len(xlMeta.Parts); partIndex++ {
part := xlMeta.Parts[partIndex] part := xlMeta.Parts[partIndex]
var buffer io.Reader totalLeft := part.Size
buffer, err = erasure.ReadFile(bucket, pathJoin(object, part.Name), partOffset, part.Size) beginOffset := int64(0)
if err != nil { for totalLeft > 0 {
return err var curBlockSize int64
} if xlMeta.Erasure.BlockSize < totalLeft {
if int64(buffer.(*bytes.Buffer).Len()) > totalLeft { curBlockSize = xlMeta.Erasure.BlockSize
if _, err := io.CopyN(writer, buffer, totalLeft); err != nil { } else {
curBlockSize = totalLeft
}
var buffer = make([]byte, curBlockSize)
var n int64
n, err = erasure.ReadFile(bucket, pathJoin(object, part.Name), partOffset, beginOffset, buffer)
if err != nil {
return err return err
} }
return nil if length > int64(len(buffer)) {
} var m int
n, err := io.Copy(writer, buffer) m, err = writer.Write(buffer)
if err != nil { if err != nil {
return err return err
}
length -= int64(m)
} else {
_, err = writer.Write(buffer[:length])
if err != nil {
return err
}
return nil
}
totalLeft -= partOffset + n
beginOffset += n
// Reset part offset to 0 to read rest of the parts from the beginning.
partOffset = 0
} }
totalLeft -= n
// Reset part offset to 0 to read rest of the parts from the beginning.
partOffset = 0
} }
return nil return nil
} }
@ -222,7 +236,8 @@ func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.
// Initialize md5 writer. // Initialize md5 writer.
md5Writer := md5.New() md5Writer := md5.New()
buf := make([]byte, blockSize) // Allocated blockSized buffer for reading.
buf := make([]byte, blockSizeV1)
for { for {
var n int var n int
n, err = io.ReadFull(data, buf) n, err = io.ReadFull(data, buf)

Loading…
Cancel
Save