From 922e6bac0b0db0743f7157e7b0b6c4eb75356087 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Thu, 27 Nov 2014 19:43:54 -0800 Subject: [PATCH] Erasure restructuring --- pkgs/erasure/Makefile | 9 +-- pkgs/erasure/cauchy_test.go | 21 ++++-- pkgs/erasure/ctypes.go | 8 +-- pkgs/erasure/{matrix_decode.c => decode.c} | 41 ++++++----- pkgs/erasure/decode.go | 45 ++++++------ pkgs/erasure/decode.h | 25 +++++++ pkgs/erasure/encode.c | 75 +++++++++++++++++++ pkgs/erasure/encode.go | 84 +++++----------------- pkgs/erasure/{matrix_decode.h => encode.h} | 15 ++-- pkgs/erasure/vandermonde_test.go | 22 +++--- 10 files changed, 203 insertions(+), 142 deletions(-) rename pkgs/erasure/{matrix_decode.c => decode.c} (72%) create mode 100644 pkgs/erasure/decode.h create mode 100644 pkgs/erasure/encode.c rename pkgs/erasure/{matrix_decode.h => encode.h} (65%) diff --git a/pkgs/erasure/Makefile b/pkgs/erasure/Makefile index e650b1ba8..0786bdb5c 100644 --- a/pkgs/erasure/Makefile +++ b/pkgs/erasure/Makefile @@ -1,13 +1,8 @@ all: build test .PHONY: all -test: cauchy vandermonde - -cauchy: - @godep go test -test.run="TestCauchy*" - -vandermonde: - @godep go test -test.run="TestVanderMonde*" +test: + @godep go test isal/isal-l.so: @$(MAKE) --quiet -C isal lib diff --git a/pkgs/erasure/cauchy_test.go b/pkgs/erasure/cauchy_test.go index 42ac5df36..23de8bbf2 100644 --- a/pkgs/erasure/cauchy_test.go +++ b/pkgs/erasure/cauchy_test.go @@ -17,6 +17,7 @@ package erasure import ( + "bytes" . "gopkg.in/check.v1" "testing" ) @@ -27,15 +28,19 @@ var _ = Suite(&MySuite{}) func Test(t *testing.T) { TestingT(t) } -func (s *MySuite) TestCachyEncode(c *C) { - ep, _ := ValidateParams(10, 5, 8, CAUCHY) - data := make([]byte, 1000) - _, length := Encode(data, ep) +func (s *MySuite) TestCauchyEncode(c *C) { + ep, _ := ParseEncoderParams(10, 5, CAUCHY) + + data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") + + chunks, length := Encode(data, ep) + + c.Logf("chunks length: %d;\nlength: %d\n", len(chunks), length) c.Assert(length, Equals, len(data)) } func (s *MySuite) TestCauchyDecode(c *C) { - ep, _ := ValidateParams(10, 5, 8, CAUCHY) + ep, _ := ParseEncoderParams(10, 5, CAUCHY) data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") @@ -49,7 +54,9 @@ func (s *MySuite) TestCauchyDecode(c *C) { chunks[13] = nil recovered_data, err := Decode(chunks, ep, length) - c.Assert(err, Not(IsNil)) + c.Assert(err, IsNil) - c.Assert(recovered_data, DeepEquals, data) + if !bytes.Equal(data, recovered_data) { + c.Fatalf("Recovered data mismatches with original data") + } } diff --git a/pkgs/erasure/ctypes.go b/pkgs/erasure/ctypes.go index a95816791..c0061707e 100644 --- a/pkgs/erasure/ctypes.go +++ b/pkgs/erasure/ctypes.go @@ -23,10 +23,10 @@ import ( ) // Integer to Int conversion -func Int2cInt(src_err_list []int) *C.int { - SrcErrInt := int(unsafe.Sizeof(src_err_list[0])) - switch SrcErrInt { - case SizeInt: +func int2cInt(src_err_list []int) *C.int { + var SrcErrInt = int(unsafe.Sizeof(src_err_list[0])) + switch SizeInt { + case SrcErrInt: return (*C.int)(unsafe.Pointer(&src_err_list[0])) case SizeInt8: Int8Array := make([]int8, len(src_err_list)) diff --git a/pkgs/erasure/matrix_decode.c b/pkgs/erasure/decode.c similarity index 72% rename from pkgs/erasure/matrix_decode.c rename to pkgs/erasure/decode.c index ad0526b7e..a5153e404 100644 --- a/pkgs/erasure/matrix_decode.c +++ b/pkgs/erasure/decode.c @@ -16,13 +16,13 @@ #include #include +#include #include -#include "matrix_decode.h" +#include "decode.h" static int src_in_err (int r, int *src_err_list) { - int i; for (i = 0; src_err_list[i] != -1; i++) { if (src_err_list[i] == r) { @@ -37,25 +37,25 @@ static int src_in_err (int r, int *src_err_list) Generate decode matrix during the decoding phase */ -int gf_gen_decode_matrix (int *src_err_list, - unsigned char *encode_matrix, - unsigned char *decode_matrix, - int k, int n, int errs, - size_t matrix_size) +int minio_init_decoder (int *src_err_list, + unsigned char *encode_matrix, + unsigned char **decode_matrix, + unsigned char **decode_tbls, + int k, int n, int errs) { int i, j, r, s, l, z; - unsigned char *input_matrix = NULL; - unsigned char *inverse_matrix = NULL; + unsigned char input_matrix[k * n]; + unsigned char inverse_matrix[k * n]; + unsigned char *tmp_decode_matrix; + unsigned char *tmp_decode_tbls; - input_matrix = malloc(k * n); - if (!input_matrix) { + tmp_decode_matrix = (unsigned char *) malloc (k * n); + if (!tmp_decode_matrix) return -1; - } - inverse_matrix = malloc(matrix_size); - if (!inverse_matrix) { + tmp_decode_tbls = (unsigned char *) malloc (k * n * 32); + if (!tmp_decode_tbls) return -1; - } for (i = 0, r = 0; i < k; i++, r++) { while (src_in_err(r, src_err_list)) @@ -74,7 +74,7 @@ int gf_gen_decode_matrix (int *src_err_list, if (src_err_list[l] < k) { // decoding matrix elements for data chunks for (j = 0; j < k; j++) { - decode_matrix[k * l + j] = + tmp_decode_matrix[k * l + j] = inverse_matrix[k * src_err_list[l] + j]; } @@ -88,11 +88,14 @@ int gf_gen_decode_matrix (int *src_err_list, encode_matrix[k * src_err_list[l] + j]); } - decode_matrix[k * l + i] = s; + tmp_decode_matrix[k * l + i] = s; } } } - free(input_matrix); - free(inverse_matrix); + + ec_init_tables(k, errs, tmp_decode_matrix, tmp_decode_tbls); + + *decode_matrix = tmp_decode_matrix; + *decode_tbls = tmp_decode_tbls; return 0; } diff --git a/pkgs/erasure/decode.go b/pkgs/erasure/decode.go index 46a973874..5616568a0 100644 --- a/pkgs/erasure/decode.go +++ b/pkgs/erasure/decode.go @@ -15,7 +15,6 @@ */ // +build linux -// amd64 package erasure @@ -25,7 +24,8 @@ package erasure // #include // #include // -// #include "matrix_decode.h" +// #include "decode.h" +// #include "encode.h" import "C" import ( "errors" @@ -37,17 +37,14 @@ func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) { var decode_matrix *C.uchar var decode_tbls *C.uchar - var matrix_size C.size_t - var decode_tbls_size C.size_t - k := int(e.p.k) - n := int(e.p.n) + n := int(e.p.k + e.p.m) if len(chunks) != n { return nil, errors.New(fmt.Sprintf("chunks length must be %d", n)) } - var chunk_size int = e.CalcChunkSize(length) + chunk_size := int(C.calc_chunk_size(e.k, C.uint(length))) src_err_list := make([]int, n+1) var err_count int = 0 @@ -67,7 +64,7 @@ func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) { return nil, errors.New("too many erasures requested, can't decode") } - src_err_list_ptr := Int2cInt(src_err_list[:err_count]) + src_err_list_ptr := int2cInt(src_err_list[:err_count]) for i := range chunks { if chunks[i] == nil || len(chunks[i]) == 0 { @@ -75,36 +72,42 @@ func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) { } } - matrix_size = C.size_t(k * n) - decode_matrix = (*C.uchar)(unsafe.Pointer(C.malloc(matrix_size))) - defer C.free(unsafe.Pointer(decode_matrix)) + C.minio_init_decoder(src_err_list_ptr, e.encode_matrix, + &decode_matrix, &decode_tbls, + e.k, e.k+e.m, C.int(err_count-1)) - decode_tbls_size = C.size_t(k * n * 32) - decode_tbls = (*C.uchar)(unsafe.Pointer(C.malloc(decode_tbls_size))) + defer C.free(unsafe.Pointer(decode_matrix)) defer C.free(unsafe.Pointer(decode_tbls)) - C.gf_gen_decode_matrix(src_err_list_ptr, e.encode_matrix, - decode_matrix, e.k, e.k+e.m, C.int(err_count-1), matrix_size) - - C.ec_init_tables(e.k, C.int(err_count-1), decode_matrix, decode_tbls) - e.decode_matrix = decode_matrix - e.decode_tbls = decode_tbls - pointers := make([]*byte, n) for i := range chunks { pointers[i] = &chunks[i][0] } + /* + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + var i int + for i = 0; i < e.p.k; i++ { + recov[i] = buffs[decode_index[i]] + } + */ + data := (**C.uchar)(unsafe.Pointer(&pointers[:k][0])) coding := (**C.uchar)(unsafe.Pointer(&pointers[k:][0])) - C.ec_encode_data(C.int(matrix_size), e.k, C.int(err_count-1), e.decode_tbls, + C.ec_encode_data(C.int(chunk_size), e.k, C.int(err_count-1), decode_tbls, data, coding) recovered_output := make([]byte, 0, chunk_size*k) for i := 0; i < k; i++ { recovered_output = append(recovered_output, chunks[i]...) } + + e.decode_matrix = decode_matrix + e.decode_tbls = decode_tbls + return recovered_output[:length], nil } diff --git a/pkgs/erasure/decode.h b/pkgs/erasure/decode.h new file mode 100644 index 000000000..4c3844a78 --- /dev/null +++ b/pkgs/erasure/decode.h @@ -0,0 +1,25 @@ +/* + * Mini Object Storage, (C) 2014 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DECODE_H__ +#define __DECODE_H__ + +int minio_init_decoder (int *src_err_list, + unsigned char *encoding_matrix, + unsigned char **decode_matrix, + unsigned char **decode_tbls, + int k, int n, int errs); +#endif /* __DECODE_H__ */ diff --git a/pkgs/erasure/encode.c b/pkgs/erasure/encode.c new file mode 100644 index 000000000..756f82da2 --- /dev/null +++ b/pkgs/erasure/encode.c @@ -0,0 +1,75 @@ +/* + * Mini Object Storage, (C) 2014 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include "encode.h" + +void minio_init_encoder (int technique, int k, int m, + unsigned char **encode_matrix, + unsigned char **encode_tbls) +{ + size_t encode_matrix_size; + size_t encode_tbls_size; + unsigned char *tmp_matrix, *tmp_tbls; + + encode_matrix_size = k * (k + m); + encode_tbls_size = k * (k + m) * 32; + + tmp_matrix = (unsigned char *) malloc (encode_matrix_size); + tmp_tbls = (unsigned char *) malloc (encode_tbls_size); + + if (technique == 0) { + /* + Commonly used method for choosing coefficients in erasure + encoding but does not guarantee invertable for every sub + matrix. For large k it is possible to find cases where the + decode matrix chosen from sources and parity not in erasure + are not invertable. Users may want to adjust for k > 5. + -- Intel + */ + gf_gen_rs_matrix (tmp_matrix, k + m, k); + } else if (technique == 1) { + gf_gen_cauchy1_matrix (tmp_matrix, k + m, k); + } + + ec_init_tables(k, m, &tmp_matrix[k * k], tmp_tbls); + + *encode_matrix = tmp_matrix; + *encode_tbls = tmp_tbls; +} + +unsigned int calc_chunk_size (int k, unsigned int split_len) +{ + int alignment; + int remainder; + int padded_len; + + alignment = k * SIMD_ALIGN; + remainder = split_len % alignment; + + padded_len = split_len; + if (remainder) { + padded_len = split_len + (alignment - remainder); + } + return padded_len / k; +} +/* +void minio_encode (int k, int m, ) +{ +*/ diff --git a/pkgs/erasure/encode.go b/pkgs/erasure/encode.go index 299d6aba6..e60497b47 100644 --- a/pkgs/erasure/encode.go +++ b/pkgs/erasure/encode.go @@ -15,7 +15,6 @@ */ // +build linux -// amd64 package erasure @@ -25,10 +24,11 @@ package erasure // #include // #include // -// #include "cpufeatures.h" +// #include "encode.h" import "C" import ( "errors" + //"fmt" "unsafe" ) @@ -38,24 +38,20 @@ const ( ) const ( - K = 10 - M = 3 - ALIGN = 32 + K = 10 + M = 3 ) type EncoderParams struct { k, m, - w, - n, technique int // cauchy or vandermonde matrix (RS) } type Encoder struct { p *EncoderParams k, - m, - w C.int + m C.int encode_matrix, encode_tbls, decode_matrix, @@ -63,7 +59,7 @@ type Encoder struct { } // Parameter validation -func ValidateParams(k, m, w, technique int) (*EncoderParams, error) { +func ParseEncoderParams(k, m, technique int) (*EncoderParams, error) { if k < 1 { return nil, errors.New("k cannot be zero") } @@ -76,14 +72,6 @@ func ValidateParams(k, m, w, technique int) (*EncoderParams, error) { return nil, errors.New("(k + m) cannot be bigger than Galois field GF(2^8) - 1") } - if 1< 5. - // -- Intel - C.gf_gen_rs_matrix(encode_matrix, n, k) - } else if ep.technique == CAUCHY { - C.gf_gen_cauchy1_matrix(encode_matrix, n, k) - } - - C.ec_init_tables(k, m, encode_matrix, encode_tbls) - return &Encoder{ p: ep, k: k, m: m, - w: w, encode_matrix: encode_matrix, encode_tbls: encode_tbls, decode_matrix: nil, @@ -148,27 +110,15 @@ func NewEncoder(ep *EncoderParams) *Encoder { } } -func (e *Encoder) CalcChunkSize(block_len int) int { - var alignment int = ALIGN - var remainder = block_len % alignment - var chunk_size int - - chunk_size = block_len - if remainder > 0 { - chunk_size = block_len + (alignment - remainder) - } - - return chunk_size / e.p.k -} - func (e *Encoder) Encode(block []byte) ([][]byte, int) { var block_len = len(block) - chunk_size := e.CalcChunkSize(block_len) - padded_len := chunk_size * e.p.k + chunk_size := int(C.calc_chunk_size(e.k, C.uint(block_len))) + chunk_len := chunk_size * e.p.k + pad_len := chunk_len - block_len - if (padded_len - block_len) > 0 { - s := make([]byte, (padded_len - block_len)) + if pad_len > 0 { + s := make([]byte, pad_len) // Expand with new padded blocks to the byte array block = append(block, s...) } @@ -178,8 +128,8 @@ func (e *Encoder) Encode(block []byte) ([][]byte, int) { block = append(block, c...) // Allocate chunks - chunks := make([][]byte, e.p.n) - pointers := make([]*byte, e.p.n) + chunks := make([][]byte, e.p.k+e.p.m) + pointers := make([]*byte, e.p.k+e.p.m) var i int // Add data blocks to chunks @@ -188,7 +138,7 @@ func (e *Encoder) Encode(block []byte) ([][]byte, int) { pointers[i] = &chunks[i][0] } - for i = e.p.k; i < e.p.n; i++ { + for i = e.p.k; i < (e.p.k + e.p.m); i++ { chunks[i] = make([]byte, chunk_size) pointers[i] = &chunks[i][0] } diff --git a/pkgs/erasure/matrix_decode.h b/pkgs/erasure/encode.h similarity index 65% rename from pkgs/erasure/matrix_decode.h rename to pkgs/erasure/encode.h index f15081b9c..aded47dc1 100644 --- a/pkgs/erasure/matrix_decode.h +++ b/pkgs/erasure/encode.h @@ -14,11 +14,12 @@ * limitations under the License. */ -#ifndef __MATRIX_DECODE_H__ -#define __MATRIX_DECODE_H__ +#ifndef __ENCODE_H__ +#define __ENCODE_H__ -int gf_gen_decode_matrix (int *src_err_list, - unsigned char *encoding_matrix, - unsigned char *decode_matrix, int k, int n, - int errs, size_t matrix_size); -#endif /* __MATRIX_DECODE_H__ */ +#define SIMD_ALIGN 32 +void minio_init_encoder (int technique, int k, int m, + unsigned char **encode_matrix, + unsigned char **encode_tbls); +unsigned int calc_chunk_size (int k, unsigned int split_len); +#endif /* __ENCODE_H__ */ diff --git a/pkgs/erasure/vandermonde_test.go b/pkgs/erasure/vandermonde_test.go index 4a67f0164..ece8c7524 100644 --- a/pkgs/erasure/vandermonde_test.go +++ b/pkgs/erasure/vandermonde_test.go @@ -17,27 +17,27 @@ package erasure import ( + "bytes" . "gopkg.in/check.v1" ) func (s *MySuite) TestVanderMondeEncode(c *C) { - ep, _ := ValidateParams(10, 5, 8, VANDERMONDE) - p := NewEncoder(ep) + ep, _ := ParseEncoderParams(10, 5, VANDERMONDE) - data := make([]byte, 1000) - chunks, length := p.Encode(data) + data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") + + chunks, length := Encode(data, ep) c.Logf("chunks length: %d;\nlength: %d\n", len(chunks), length) c.Assert(length, Equals, len(data)) } func (s *MySuite) TestVanderMondeDecode(c *C) { - ep, _ := ValidateParams(10, 5, 8, VANDERMONDE) - p := NewEncoder(ep) + ep, _ := ParseEncoderParams(10, 5, VANDERMONDE) data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") - chunks, length := p.Encode(data) + chunks, length := Encode(data, ep) c.Logf("chunks length: %d", len(chunks)) c.Logf("length: %d", length) c.Assert(length, Equals, len(data)) @@ -48,8 +48,10 @@ func (s *MySuite) TestVanderMondeDecode(c *C) { chunks[9] = nil chunks[13] = nil - recovered_data, err := p.Decode(chunks, length) - c.Assert(err, Not(IsNil)) + recovered_data, err := Decode(chunks, ep, length) + c.Assert(err, IsNil) - c.Assert(recovered_data, DeepEquals, data) + if !bytes.Equal(recovered_data, data) { + c.Fatalf("Recovered data mismatches with original data") + } }