Make erasure matrix type not optional choose automatically

Remove option of providing Technique and handling errors based on that
choose a matrix type automatically based on number of data blocks.

INTEL recommends on using cauchy for consistent invertible matrices,
while vandermonde is faster we should default to cauchy for large
data blocks.
master
Harshavardhana 9 years ago
parent cf0e1a156b
commit d5ce2f6944
  1. 9
      pkg/donut/bucket.go
  2. 1
      pkg/donut/definitions.go
  3. 24
      pkg/donut/encoder.go
  4. 16
      pkg/donut/errors.go
  5. 7
      pkg/erasure/doc.go
  6. 0
      pkg/erasure/ec.h
  7. 2
      pkg/erasure/ec_minio_common.h
  8. 2
      pkg/erasure/ec_minio_decode.c
  9. 10
      pkg/erasure/ec_minio_encode.c
  10. 2
      pkg/erasure/erasure_decode.go
  11. 34
      pkg/erasure/erasure_encode.go
  12. 39
      pkg/erasure/erasure_test.go
  13. 66
      pkg/erasure/vandermonde_test.go

@ -288,7 +288,6 @@ func (b bucket) WriteObject(objectName string, objectData io.Reader, size int64,
objMetadata.ChunkCount = chunkCount objMetadata.ChunkCount = chunkCount
objMetadata.DataDisks = k objMetadata.DataDisks = k
objMetadata.ParityDisks = m objMetadata.ParityDisks = m
objMetadata.ErasureTechnique = "Cauchy"
objMetadata.Size = int64(totalLength) objMetadata.Size = int64(totalLength)
} }
objMetadata.Bucket = b.getBucketName() objMetadata.Bucket = b.getBucketName()
@ -431,7 +430,7 @@ func (b bucket) getDataAndParity(totalWriters int) (k uint8, m uint8, err *probe
// writeObjectData - // writeObjectData -
func (b bucket) writeObjectData(k, m uint8, writers []io.WriteCloser, objectData io.Reader, size int64, hashWriter io.Writer) (int, int, *probe.Error) { func (b bucket) writeObjectData(k, m uint8, writers []io.WriteCloser, objectData io.Reader, size int64, hashWriter io.Writer) (int, int, *probe.Error) {
encoder, err := newEncoder(k, m, "Cauchy") encoder, err := newEncoder(k, m)
if err != nil { if err != nil {
return 0, 0, err.Trace() return 0, 0, err.Trace()
} }
@ -503,11 +502,7 @@ func (b bucket) readObjectData(objectName string, writer *io.PipeWriter, objMeta
mwriter := io.MultiWriter(writer, hasher, sum512hasher) mwriter := io.MultiWriter(writer, hasher, sum512hasher)
switch len(readers) > 1 { switch len(readers) > 1 {
case true: case true:
if objMetadata.ErasureTechnique == "" { encoder, err := newEncoder(objMetadata.DataDisks, objMetadata.ParityDisks)
writer.CloseWithError(probe.WrapError(probe.NewError(MissingErasureTechnique{})))
return
}
encoder, err := newEncoder(objMetadata.DataDisks, objMetadata.ParityDisks, objMetadata.ErasureTechnique)
if err != nil { if err != nil {
writer.CloseWithError(probe.WrapError(err)) writer.CloseWithError(probe.WrapError(err))
return return

@ -32,7 +32,6 @@ type ObjectMetadata struct {
// erasure // erasure
DataDisks uint8 `json:"sys.erasureK"` DataDisks uint8 `json:"sys.erasureK"`
ParityDisks uint8 `json:"sys.erasureM"` ParityDisks uint8 `json:"sys.erasureM"`
ErasureTechnique string `json:"sys.erasureTechnique"`
BlockSize int `json:"sys.blockSize"` BlockSize int `json:"sys.blockSize"`
ChunkCount int `json:"sys.chunkCount"` ChunkCount int `json:"sys.chunkCount"`

@ -25,40 +25,20 @@ import (
type encoder struct { type encoder struct {
encoder *encoding.Erasure encoder *encoding.Erasure
k, m uint8 k, m uint8
technique encoding.Technique
}
// getErasureTechnique - convert technique string into Technique type
func getErasureTechnique(technique string) (encoding.Technique, *probe.Error) {
switch true {
case technique == "Cauchy":
return encoding.Cauchy, nil
case technique == "Vandermonde":
return encoding.Cauchy, nil
default:
return encoding.None, probe.NewError(InvalidErasureTechnique{Technique: technique})
}
} }
// newEncoder - instantiate a new encoder // newEncoder - instantiate a new encoder
func newEncoder(k, m uint8, technique string) (encoder, *probe.Error) { func newEncoder(k, m uint8) (encoder, *probe.Error) {
e := encoder{} e := encoder{}
t, err := getErasureTechnique(technique) params, err := encoding.ValidateParams(k, m)
if err != nil {
return encoder{}, err.Trace()
}
{
params, err := encoding.ValidateParams(k, m, t)
if err != nil { if err != nil {
return encoder{}, probe.NewError(err) return encoder{}, probe.NewError(err)
} }
e.encoder = encoding.NewErasure(params) e.encoder = encoding.NewErasure(params)
e.k = k e.k = k
e.m = m e.m = m
e.technique = t
return e, nil return e, nil
} }
}
// TODO - think again if this is needed // TODO - think again if this is needed
// GetEncodedBlockLen - wrapper around erasure function with the same name // GetEncodedBlockLen - wrapper around erasure function with the same name

@ -132,22 +132,6 @@ func (e MissingPOSTPolicy) Error() string {
return "Missing POST policy in multipart form" return "Missing POST policy in multipart form"
} }
// MissingErasureTechnique missing erasure technique
type MissingErasureTechnique struct{}
func (e MissingErasureTechnique) Error() string {
return "Missing erasure technique"
}
// InvalidErasureTechnique invalid erasure technique
type InvalidErasureTechnique struct {
Technique string
}
func (e InvalidErasureTechnique) Error() string {
return "Invalid erasure technique: " + e.Technique
}
// InternalError - generic internal error // InternalError - generic internal error
type InternalError struct { type InternalError struct {
} }

@ -39,10 +39,9 @@
// //
// But here are the few points to keep in mind // But here are the few points to keep in mind
// //
// Techniques: // Matrix Type:
// - Vandermonde is most commonly used method for choosing coefficients in erasure // - Vandermonde is most commonly used method for choosing coefficients in erasure
// encoding but does not guarantee invertable for every sub matrix. // encoding but does not guarantee invertable for every sub matrix.
// Users may want to adjust for k > 5. (k is data blocks)
// - Whereas Cauchy is our recommended method for choosing coefficients in erasure coding. // - Whereas Cauchy is our recommended method for choosing coefficients in erasure coding.
// Since any sub-matrix of a Cauchy matrix is invertable. // Since any sub-matrix of a Cauchy matrix is invertable.
// //
@ -53,14 +52,14 @@
// //
// Creating and using an encoder // Creating and using an encoder
// var bytes []byte // var bytes []byte
// params := erasure.ValidateParams(10, 5, erasure.Cauchy) // params := erasure.ValidateParams(10, 5)
// encoder := erasure.NewErasure(params) // encoder := erasure.NewErasure(params)
// encodedData, length := encoder.Encode(bytes) // encodedData, length := encoder.Encode(bytes)
// //
// Creating and using a decoder // Creating and using a decoder
// var encodedData [][]byte // var encodedData [][]byte
// var length int // var length int
// params := erasure.ValidateParams(10, 5, erasure.Cauchy) // params := erasure.ValidateParams(10, 5)
// encoder := erasure.NewErasure(params) // encoder := erasure.NewErasure(params)
// originalData, err := encoder.Decode(encodedData, length) // originalData, err := encoder.Decode(encodedData, length)
// //

@ -19,7 +19,7 @@
#include <stdint.h> #include <stdint.h>
int32_t minio_init_encoder (int technique, int k, int m, int32_t minio_init_encoder (int k, int m,
unsigned char **encode_matrix, unsigned char **encode_matrix,
unsigned char **encode_tbls); unsigned char **encode_tbls);

@ -18,7 +18,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "ec_isal-l.h" #include "ec.h"
#include "ec_minio_common.h" #include "ec_minio_common.h"
static static

@ -17,16 +17,14 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include "ec_isal-l.h" #include "ec.h"
#include "ec_minio_common.h" #include "ec_minio_common.h"
/* /*
Generate encode matrix during the encoding phase Generate encode matrix during the encoding phase
*/ */
int32_t minio_init_encoder (int technique, int k, int m, int32_t minio_init_encoder (int k, int m, unsigned char **encode_matrix, unsigned char **encode_tbls)
unsigned char **encode_matrix,
unsigned char **encode_tbls)
{ {
unsigned char *tmp_matrix; unsigned char *tmp_matrix;
unsigned char *tmp_tbls; unsigned char *tmp_tbls;
@ -34,7 +32,7 @@ int32_t minio_init_encoder (int technique, int k, int m,
tmp_matrix = (unsigned char *) malloc (k * (k + m)); tmp_matrix = (unsigned char *) malloc (k * (k + m));
tmp_tbls = (unsigned char *) malloc (k * (k + m) * 32); tmp_tbls = (unsigned char *) malloc (k * (k + m) * 32);
if (technique == 0) { if (k < 5) {
/* /*
Commonly used method for choosing coefficients in erasure Commonly used method for choosing coefficients in erasure
encoding but does not guarantee invertable for every sub encoding but does not guarantee invertable for every sub
@ -44,7 +42,7 @@ int32_t minio_init_encoder (int technique, int k, int m,
-- Intel -- Intel
*/ */
gf_gen_rs_matrix (tmp_matrix, k + m, k); gf_gen_rs_matrix (tmp_matrix, k + m, k);
} else if (technique == 1) { } else {
gf_gen_cauchy1_matrix (tmp_matrix, k + m, k); gf_gen_cauchy1_matrix (tmp_matrix, k + m, k);
} }

@ -18,7 +18,7 @@ package erasure
// #cgo CFLAGS: -O0 // #cgo CFLAGS: -O0
// #include <stdlib.h> // #include <stdlib.h>
// #include "ec_isal-l.h" // #include "ec.h"
// #include "ec_minio_common.h" // #include "ec_minio_common.h"
import "C" import "C"
import ( import (

@ -17,7 +17,7 @@
package erasure package erasure
// #include <stdlib.h> // #include <stdlib.h>
// #include "ec_isal-l.h" // #include "ec.h"
// #include "ec_minio_common.h" // #include "ec_minio_common.h"
import "C" import "C"
import ( import (
@ -25,22 +25,6 @@ import (
"unsafe" "unsafe"
) )
// Technique - type of matrix type used in encoding
type Technique uint8
// Different types of supported matrix types
const (
Vandermonde Technique = iota
Cauchy
None
)
// Default Data and Parity blocks
const (
K = 10
M = 3
)
// Block alignment // Block alignment
const ( const (
SIMDAlign = 32 SIMDAlign = 32
@ -50,7 +34,6 @@ const (
type Params struct { type Params struct {
K uint8 K uint8
M uint8 M uint8
Technique Technique // cauchy or vandermonde matrix (RS)
} }
// Erasure is an object used to encode and decode data. // Erasure is an object used to encode and decode data.
@ -66,7 +49,7 @@ type Erasure struct {
// k and m represent the matrix size, which corresponds to the protection level // k and m represent the matrix size, which corresponds to the protection level
// technique is the matrix type. Valid inputs are Cauchy (recommended) or Vandermonde. // technique is the matrix type. Valid inputs are Cauchy (recommended) or Vandermonde.
// //
func ValidateParams(k, m uint8, technique Technique) (*Params, error) { func ValidateParams(k, m uint8) (*Params, error) {
if k < 1 { if k < 1 {
return nil, errors.New("k cannot be zero") return nil, errors.New("k cannot be zero")
} }
@ -79,19 +62,9 @@ func ValidateParams(k, m uint8, technique Technique) (*Params, error) {
return nil, errors.New("(k + m) cannot be bigger than Galois field GF(2^8) - 1") return nil, errors.New("(k + m) cannot be bigger than Galois field GF(2^8) - 1")
} }
switch technique {
case Vandermonde:
break
case Cauchy:
break
default:
return nil, errors.New("Technique can be either vandermonde or cauchy")
}
return &Params{ return &Params{
K: k, K: k,
M: m, M: m,
Technique: technique,
}, nil }, nil
} }
@ -103,8 +76,7 @@ func NewErasure(ep *Params) *Erasure {
var encodeMatrix *C.uchar var encodeMatrix *C.uchar
var encodeTbls *C.uchar var encodeTbls *C.uchar
C.minio_init_encoder(C.int(ep.Technique), k, m, &encodeMatrix, C.minio_init_encoder(k, m, &encodeMatrix, &encodeTbls)
&encodeTbls)
return &Erasure{ return &Erasure{
params: ep, params: ep,

@ -34,8 +34,15 @@ const (
m = 5 m = 5
) )
func (s *MySuite) TestCauchyEncodeDecodeFailure(c *C) { func corruptChunks(chunks [][]byte, errorIndex []int) [][]byte {
ep, _ := ValidateParams(k, m, Cauchy) for _, err := range errorIndex {
chunks[err] = nil
}
return chunks
}
func (s *MySuite) TestEncodeDecodeFailure(c *C) {
ep, _ := ValidateParams(k, m)
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
@ -50,8 +57,8 @@ func (s *MySuite) TestCauchyEncodeDecodeFailure(c *C) {
c.Assert(err, Not(IsNil)) c.Assert(err, Not(IsNil))
} }
func (s *MySuite) TestCauchyEncodeDecodeSuccess(c *C) { func (s *MySuite) TestEncodeDecodeSuccess(c *C) {
ep, _ := ValidateParams(k, m, Cauchy) ep, _ := ValidateParams(k, m)
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
@ -69,3 +76,27 @@ func (s *MySuite) TestCauchyEncodeDecodeSuccess(c *C) {
c.Fatalf("Recovered data mismatches with original data") c.Fatalf("Recovered data mismatches with original data")
} }
} }
func (s *MySuite) TestEncodeDecodeSuccessBuffer(c *C) {
ep, _ := ValidateParams(k, m)
tmpBuffer := new(bytes.Buffer)
for i := 0; i < 1024*1024; i++ {
tmpBuffer.Write([]byte("Hello world, hello world"))
}
e := NewErasure(ep)
chunks, err := e.Encode(tmpBuffer.Bytes())
c.Assert(err, IsNil)
errorIndex := []int{0, 3, 5, 9, 13}
chunks = corruptChunks(chunks, errorIndex)
recoveredData, err := e.Decode(chunks, len(tmpBuffer.Bytes()))
c.Assert(err, IsNil)
if !bytes.Equal(tmpBuffer.Bytes(), recoveredData) {
c.Fatalf("Recovered data mismatches with original data")
}
}

@ -1,66 +0,0 @@
/*
* Minio Cloud Storage, (C) 2014 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package erasure
import (
"bytes"
. "gopkg.in/check.v1"
)
func corruptChunks(chunks [][]byte, errorIndex []int) [][]byte {
for _, err := range errorIndex {
chunks[err] = nil
}
return chunks
}
func (s *MySuite) TestVanderMondeEncodeDecodeFailure(c *C) {
ep, _ := ValidateParams(k, m, Vandermonde)
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
e := NewErasure(ep)
chunks, err := e.Encode(data)
c.Assert(err, IsNil)
errorIndex := []int{0, 3, 5, 9, 11, 13}
chunks = corruptChunks(chunks, errorIndex)
_, err = e.Decode(chunks, len(data))
c.Assert(err, Not(IsNil))
}
func (s *MySuite) TestVanderMondeEncodeDecodeSuccess(c *C) {
ep, _ := ValidateParams(k, m, Vandermonde)
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
e := NewErasure(ep)
chunks, err := e.Encode(data)
c.Assert(err, IsNil)
errorIndex := []int{0, 3, 5, 9, 13}
chunks = corruptChunks(chunks, errorIndex)
recoveredData, err := e.Decode(chunks, len(data))
c.Assert(err, IsNil)
if !bytes.Equal(recoveredData, data) {
c.Fatalf("Recovered data mismatches with original data")
}
}
Loading…
Cancel
Save