From fa9b1f341b35fc4e205bdbd4a49705beb88439dd Mon Sep 17 00:00:00 2001 From: "Anand Babu (AB) Periasamy" Date: Tue, 24 Mar 2015 18:44:21 -0700 Subject: [PATCH] erasure encode janitor duty --- pkg/encoding/erasure/ctypes.go | 2 +- pkg/encoding/erasure/erasure_decode.go | 52 +++++----- pkg/encoding/erasure/erasure_encode.go | 125 ++++++++++++++----------- pkg/storage/donut/erasure.go | 4 +- 4 files changed, 98 insertions(+), 85 deletions(-) diff --git a/pkg/encoding/erasure/ctypes.go b/pkg/encoding/erasure/ctypes.go index d6c0240c0..a421be26b 100644 --- a/pkg/encoding/erasure/ctypes.go +++ b/pkg/encoding/erasure/ctypes.go @@ -24,7 +24,7 @@ import ( ) // Integer to Int conversion -func int2cInt(src_err_list []int) *C.int32_t { +func int2CInt(src_err_list []int) *C.int32_t { var sizeErrInt = int(unsafe.Sizeof(src_err_list[0])) switch sizeInt { case sizeErrInt: diff --git a/pkg/encoding/erasure/erasure_decode.go b/pkg/encoding/erasure/erasure_decode.go index a4de0fdf6..b6c242051 100644 --- a/pkg/encoding/erasure/erasure_decode.go +++ b/pkg/encoding/erasure/erasure_decode.go @@ -32,47 +32,47 @@ import ( // // Decoded data is exactly similar in length and content as the original data. func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) { - var decode_matrix *C.uint8_t - var decode_tbls *C.uint8_t - var decode_index *C.uint32_t + var decodeMatrix *C.uint8_t + var decodeTbls *C.uint8_t + var decodeIndex *C.uint32_t var source, target **C.uint8_t - k := e.params.K - m := e.params.M + k := int(e.params.K) + m := int(e.params.M) n := k + m - if len(chunks) != int(n) { + if len(chunks) != n { return nil, errors.New(fmt.Sprintf("chunks length must be %d", n)) } - chunk_size := GetEncodedChunkLen(length, uint8(k)) + chunkLen := GetEncodedBlockLen(length, uint8(k)) - error_index := make([]int, n+1) - var err_count int = 0 + errorIndex := make([]int, n+1) + var errCount int = 0 for i := range chunks { // Check of chunks are really null if chunks[i] == nil || len(chunks[i]) == 0 { - error_index[err_count] = i - err_count++ + errorIndex[errCount] = i + errCount++ } } - error_index[err_count] = -1 - err_count++ + errorIndex[errCount] = -1 + errCount++ // Too many missing chunks, cannot be more than parity `m` - if err_count-1 > int(n-k) { + if errCount-1 > int(n-k) { return nil, errors.New("too many erasures requested, can't decode") } - error_index_ptr := int2cInt(error_index[:err_count]) + errorIndex_ptr := int2CInt(errorIndex[:errCount]) for i := range chunks { if chunks[i] == nil || len(chunks[i]) == 0 { - chunks[i] = make([]byte, chunk_size) + chunks[i] = make([]byte, chunkLen) } } - C.minio_init_decoder(error_index_ptr, C.int(k), C.int(n), C.int(err_count-1), - e.encode_matrix, &decode_matrix, &decode_tbls, &decode_index) + C.minio_init_decoder(errorIndex_ptr, C.int(k), C.int(n), C.int(errCount-1), + e.encodeMatrix, &decodeMatrix, &decodeTbls, &decodeIndex) pointers := make([]*byte, n) for i := range chunks { @@ -81,24 +81,24 @@ func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) { data := (**C.uint8_t)(unsafe.Pointer(&pointers[0])) - ret := C.minio_get_source_target(C.int(err_count-1), C.int(k), C.int(m), error_index_ptr, - decode_index, data, &source, &target) + ret := C.minio_get_source_target(C.int(errCount-1), C.int(k), C.int(m), errorIndex_ptr, + decodeIndex, data, &source, &target) if int(ret) == -1 { return nil, errors.New("Decoding source target failed") } - C.ec_encode_data(C.int(chunk_size), C.int(k), C.int(err_count-1), decode_tbls, + C.ec_encode_data(C.int(chunkLen), C.int(k), C.int(errCount-1), decodeTbls, source, target) - recovered_output := make([]byte, 0, chunk_size*int(k)) + recoveredOutput := make([]byte, 0, chunkLen*int(k)) for i := 0; i < int(k); i++ { - recovered_output = append(recovered_output, chunks[i]...) + recoveredOutput = append(recoveredOutput, chunks[i]...) } // TODO cache this if necessary - e.decode_matrix = decode_matrix - e.decode_tbls = decode_tbls + e.decodeMatrix = decodeMatrix + e.decodeTbls = decodeTbls - return recovered_output[:length], nil + return recoveredOutput[:length], nil } diff --git a/pkg/encoding/erasure/erasure_encode.go b/pkg/encoding/erasure/erasure_encode.go index 65f1313e1..f1a1df5e7 100644 --- a/pkg/encoding/erasure/erasure_encode.go +++ b/pkg/encoding/erasure/erasure_encode.go @@ -26,7 +26,7 @@ import ( "unsafe" ) -type Technique int +type Technique uint8 const ( Vandermonde Technique = iota @@ -52,10 +52,10 @@ type EncoderParams struct { // Encoder is an object used to encode and decode data. type Encoder struct { params *EncoderParams - encode_matrix, - encode_tbls, - decode_matrix, - decode_tbls *C.uint8_t + encodeMatrix, + encodeTbls, + decodeMatrix, + decodeTbls *C.uint8_t } // ParseEncoderParams creates an EncoderParams object. @@ -94,30 +94,30 @@ func ParseEncoderParams(k, m uint8, technique Technique) (*EncoderParams, error) // NewEncoder creates an encoder object with a given set of parameters. func NewEncoder(ep *EncoderParams) *Encoder { - var encode_matrix *C.uint8_t - var encode_tbls *C.uint8_t + var k = C.int(ep.K) + var m = C.int(ep.M) - k := C.int(ep.K) - m := C.int(ep.M) + var encodeMatrix *C.uint8_t + var encodeTbls *C.uint8_t - C.minio_init_encoder(C.int(ep.Technique), k, m, &encode_matrix, - &encode_tbls) + C.minio_init_encoder(C.int(ep.Technique), k, m, &encodeMatrix, + &encodeTbls) return &Encoder{ - params: ep, - encode_matrix: encode_matrix, - encode_tbls: encode_tbls, - decode_matrix: nil, - decode_tbls: nil, + params: ep, + encodeMatrix: encodeMatrix, + encodeTbls: encodeTbls, + decodeMatrix: nil, + decodeTbls: nil, } } -func GetEncodedLen(inputLen int, k, m uint8) (outputLen int) { - outputLen = GetEncodedChunkLen(inputLen, k) * int(k+m) +func GetEncodedBlocksLen(inputLen int, k, m uint8) (outputLen int) { + outputLen = GetEncodedBlockLen(inputLen, k) * int(k+m) return outputLen } -func GetEncodedChunkLen(inputLen int, k uint8) (outputChunkLen int) { +func GetEncodedBlockLen(inputLen int, k uint8) (encodedOutputLen int) { alignment := int(k) * SIMDAlign remainder := inputLen % alignment @@ -125,55 +125,66 @@ func GetEncodedChunkLen(inputLen int, k uint8) (outputChunkLen int) { if remainder != 0 { paddedInputLen = inputLen + (alignment - remainder) } - outputChunkLen = paddedInputLen / int(k) - return outputChunkLen + encodedOutputLen = paddedInputLen / int(k) + return encodedOutputLen } -// Encode encodes a block of data. The input is the original data. The output -// is a 2 tuple containing (k + m) chunks of erasure encoded data and the -// length of the original object. -func (e *Encoder) Encode(input []byte) ([][]byte, error) { - inputLen := len(input) - k := C.int(e.params.K) - m := C.int(e.params.M) - n := k + m - - chunkLen := GetEncodedChunkLen(inputLen, e.params.K) - encodedDataLen := chunkLen * int(k) - paddedDataLen := int(encodedDataLen) - inputLen - - if paddedDataLen > 0 { - s := make([]byte, paddedDataLen) +// Encode erasure codes a block of data in "k" data blocks and "m" parity blocks. +// Output is [k+m][]blocks of data and parity slices. +func (e *Encoder) Encode(inputData []byte) (encodedBlocks [][]byte, err error) { + k := int(e.params.K) // "k" data blocks + m := int(e.params.M) // "m" parity blocks + n := k + m // "n" total encoded blocks + + // Length of a single encoded chunk. + // Total number of encoded chunks = "k" data + "m" parity blocks + encodedBlockLen := GetEncodedBlockLen(len(inputData), uint8(k)) + + // Length of total number of "k" data chunks + encodedDataBlocksLen := encodedBlockLen * k + + // Length of extra padding required for the data blocks. + encodedDataBlocksPadLen := encodedDataBlocksLen - len(inputData) + + // Extend inputData buffer to accommodate coded data blocks if necesssary + if encodedDataBlocksPadLen > 0 { + padding := make([]byte, encodedDataBlocksPadLen) // Expand with new padded blocks to the byte array - input = append(input, s...) + inputData = append(inputData, padding...) } - encodedParityLen := chunkLen * int(e.params.M) - c := make([]byte, encodedParityLen) - input = append(input, c...) + // Extend inputData buffer to accommodate coded parity blocks + if true { // create a temporary scope to trigger garbage collect + encodedParityBlocksLen := encodedBlockLen * m + parityBlocks := make([]byte, encodedParityBlocksLen) + inputData = append(inputData, parityBlocks...) + } - // encodedOutLen := encodedDataLen + encodedParityLen + // Allocate memory to the "encoded blocks" return buffer + encodedBlocks = make([][]byte, n) // Return buffer - // Allocate chunks - chunks := make([][]byte, k+m) - pointers := make([]*byte, k+m) + // Nessary to bridge Go to the C world. C requires 2D arry of pointers to + // byte array. "encodedBlocks" is a 2D slice. + pointersToEncodedBlock := make([]*byte, n) // Pointers to encoded blocks. - var i int - // Add data blocks to chunks - for i = 0; i < int(k); i++ { - chunks[i] = input[i*chunkLen : (i+1)*chunkLen] - pointers[i] = &chunks[i][0] + // Copy data block slices to encoded block buffer + for i := 0; i < k; i++ { + encodedBlocks[i] = inputData[i*encodedBlockLen : (i+1)*encodedBlockLen] + pointersToEncodedBlock[i] = &encodedBlocks[i][0] } - for i = int(k); i < int(n); i++ { - chunks[i] = make([]byte, chunkLen) - pointers[i] = &chunks[i][0] + // Copy erasure block slices to encoded block buffer + for i := k; i < n; i++ { + encodedBlocks[i] = make([]byte, encodedBlockLen) + pointersToEncodedBlock[i] = &encodedBlocks[i][0] } - data := (**C.uint8_t)(unsafe.Pointer(&pointers[:k][0])) - coding := (**C.uint8_t)(unsafe.Pointer(&pointers[k:][0])) + // Erasure code the data into K data blocks and M parity + // blocks. Only the parity blocks are filled. Data blocks remain + // intact. + C.ec_encode_data(C.int(encodedBlockLen), C.int(k), C.int(m), e.encodeTbls, + (**C.uint8_t)(unsafe.Pointer(&pointersToEncodedBlock[:k][0])), // Pointers to data blocks + (**C.uint8_t)(unsafe.Pointer(&pointersToEncodedBlock[k:][0]))) // Pointers to parity blocks - C.ec_encode_data(C.int(chunkLen), k, m, e.encode_tbls, data, - coding) - return chunks, nil + return encodedBlocks, nil } diff --git a/pkg/storage/donut/erasure.go b/pkg/storage/donut/erasure.go index 7404a3672..b8552e601 100644 --- a/pkg/storage/donut/erasure.go +++ b/pkg/storage/donut/erasure.go @@ -75,7 +75,9 @@ func erasureReader(readers []io.ReadCloser, donutMetadata map[string]string, wri if blockSize < totalLeft { curBlockSize = blockSize } - curChunkSize := erasure.GetEncodedChunkLen(curBlockSize, uint8(k)) + + curChunkSize := erasure.GetEncodedBlockLen(curBlockSize, uint8(k)) + encodedBytes := make([][]byte, 16) for i, reader := range readers { var bytesBuffer bytes.Buffer