From 12cff1be5887c0aa3790dfd5569d1a05e29a0ad7 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 21 Feb 2015 11:25:09 -0800 Subject: [PATCH] Implement sha1 as intel optimized set --- pkg/utils/crypto/sha1/TODO | 0 pkg/utils/crypto/sha1/sha1.go | 171 ++++++++++++++++++++--------- pkg/utils/crypto/sha1/sha1_gen.go | 49 +++++++++ pkg/utils/crypto/sha1/sha1_test.go | 128 ++++++++++++++++++--- pkg/utils/crypto/sha1/sha1block.go | 98 +++++++++++++++++ 5 files changed, 381 insertions(+), 65 deletions(-) delete mode 100644 pkg/utils/crypto/sha1/TODO create mode 100644 pkg/utils/crypto/sha1/sha1_gen.go create mode 100644 pkg/utils/crypto/sha1/sha1block.go diff --git a/pkg/utils/crypto/sha1/TODO b/pkg/utils/crypto/sha1/TODO deleted file mode 100644 index e69de29bb..000000000 diff --git a/pkg/utils/crypto/sha1/sha1.go b/pkg/utils/crypto/sha1/sha1.go index b379f8eaa..866dc4a38 100644 --- a/pkg/utils/crypto/sha1/sha1.go +++ b/pkg/utils/crypto/sha1/sha1.go @@ -1,83 +1,150 @@ -/* - * Mini Object Storage, (C) 2014 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - +// Package sha1 implements the SHA1 hash algorithm as defined in RFC 3174. package sha1 -// #include -// #include -// void sha1_transform(int32_t *hash, const char* input, size_t num_blocks); -import "C" import ( - gosha1 "crypto/sha1" + "hash" "io" + + "github.com/minio-io/minio/pkg/utils/cpu" ) -/* +// The size of a SHA1 checksum in bytes. +const Size = 20 + +// The blocksize of SHA1 in bytes. +const BlockSize = 64 + const ( - SHA1_BLOCKSIZE = 64 - SHA1_DIGESTSIZE = 20 + chunk = 64 + init0 = 0x67452301 + init1 = 0xEFCDAB89 + init2 = 0x98BADCFE + init3 = 0x10325476 + init4 = 0xC3D2E1F0 ) +// digest represents the partial evaluation of a checksum. +type digest struct { + h [5]uint32 + x [chunk]byte + nx int + len uint64 +} -func Sha1(buffer []byte) ([]int32, error) { - if cpu.HasAVX2() { - var shbuf []int32 - var cbuffer *C.char +func (d *digest) Reset() { + d.h[0] = init0 + d.h[1] = init1 + d.h[2] = init2 + d.h[3] = init3 + d.h[4] = init4 + d.nx = 0 + d.len = 0 +} - shbuf = make([]int32, SHA1_DIGESTSIZE) - var length = len(buffer) - if length == 0 { - return []int32{0}, errors.New("Invalid input") - } +// New returns a new hash.Hash computing the SHA1 checksum. +func New() hash.Hash { + d := new(digest) + d.Reset() + return d +} - rem := length % SHA1_BLOCKSIZE - padded_len := length +func block(dig *digest, p []byte) { + switch true { + case cpu.HasAVX2() == true: + blockAVX2(dig, p) + default: + blockGeneric(dig, p) + } +} - if rem > 0 { - padded_len = length + (SHA1_BLOCKSIZE - rem) - } +func (d *digest) Size() int { return Size } - rounds := padded_len / SHA1_BLOCKSIZE - pad := padded_len - length - if pad > 0 { - s := make([]byte, pad) - // Expand with new padded blocks to the byte array - buffer = append(buffer, s...) +func (d *digest) BlockSize() int { return BlockSize } + +func (d *digest) Write(p []byte) (nn int, err error) { + nn = len(p) + d.len += uint64(nn) + if d.nx > 0 { + n := copy(d.x[d.nx:], p) + d.nx += n + if d.nx == chunk { + block(d, d.x[:]) + d.nx = 0 } + p = p[n:] + } + if len(p) >= chunk { + n := len(p) &^ (chunk - 1) + block(d, p[:n]) + p = p[n:] + } + if len(p) > 0 { + d.nx = copy(d.x[:], p) + } + return +} + +func (d0 *digest) Sum(in []byte) []byte { + // Make a copy of d0 so that caller can keep writing and summing. + d := *d0 + hash := d.checkSum() + return append(in, hash[:]...) +} + +func (d *digest) checkSum() [Size]byte { + len := d.len + // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. + var tmp [64]byte + tmp[0] = 0x80 + if len%64 < 56 { + d.Write(tmp[0 : 56-len%64]) + } else { + d.Write(tmp[0 : 64+56-len%64]) + } + + // Length in bits. + len <<= 3 + for i := uint(0); i < 8; i++ { + tmp[i] = byte(len >> (56 - 8*i)) + } + d.Write(tmp[0:8]) - cshbuf := (*C.int32_t)(unsafe.Pointer(&shbuf[0])) - cbuffer = (*C.char)(unsafe.Pointer(&buffer[0])) - C.sha1_transform(cshbuf, cbuffer, C.size_t(rounds)) + if d.nx != 0 { + panic("d.nx != 0") + } - return 0, nil + var digest [Size]byte + for i, s := range d.h { + digest[i*4] = byte(s >> 24) + digest[i*4+1] = byte(s >> 16) + digest[i*4+2] = byte(s >> 8) + digest[i*4+3] = byte(s) } + + return digest } -*/ + +// Convenience functions + +func Sum1(data []byte) [Size]byte { + var d digest + d.Reset() + d.Write(data) + return d.checkSum() +} + func Sum(reader io.Reader) ([]byte, error) { - hash := gosha1.New() + h := New() var err error for err == nil { length := 0 byteBuffer := make([]byte, 1024*1024) length, err = reader.Read(byteBuffer) byteBuffer = byteBuffer[0:length] - hash.Write(byteBuffer) + h.Write(byteBuffer) } if err != io.EOF { return nil, err } - return hash.Sum(nil), nil + return h.Sum(nil), nil } diff --git a/pkg/utils/crypto/sha1/sha1_gen.go b/pkg/utils/crypto/sha1/sha1_gen.go new file mode 100644 index 000000000..ecd1c842c --- /dev/null +++ b/pkg/utils/crypto/sha1/sha1_gen.go @@ -0,0 +1,49 @@ +// +build ignore + +package main + +import ( + "crypto/sha1" + "encoding/hex" + "fmt" + "io" + "os" + "time" + + sha1intel "github.com/minio-io/minio/pkg/utils/crypto/sha1" +) + +func Sum(reader io.Reader) ([]byte, error) { + k := sha1.New() + var err error + for err == nil { + length := 0 + byteBuffer := make([]byte, 1024*1024) + length, err = reader.Read(byteBuffer) + byteBuffer = byteBuffer[0:length] + k.Write(byteBuffer) + } + if err != io.EOF { + return nil, err + } + return k.Sum(nil), nil +} + +func main() { + fmt.Println("-- start") + + file1, _ := os.Open("filename1") + defer file1.Close() + stark := time.Now() + sum, _ := Sum(file1) + endk := time.Since(stark) + + file2, _ := os.Open("filename2") + defer file2.Close() + starth := time.Now() + sumAVX2, _ := sha1intel.Sum(file2) + endh := time.Since(starth) + + fmt.Println("std(", endk, ")", "avx2(", endh, ")") + fmt.Println(hex.EncodeToString(sum), hex.EncodeToString(sumAVX2)) +} diff --git a/pkg/utils/crypto/sha1/sha1_test.go b/pkg/utils/crypto/sha1/sha1_test.go index 6e5a472a6..325b25d72 100644 --- a/pkg/utils/crypto/sha1/sha1_test.go +++ b/pkg/utils/crypto/sha1/sha1_test.go @@ -1,23 +1,125 @@ +// SHA1 hash algorithm. See RFC 3174. + package sha1 import ( - "bytes" - "encoding/hex" + "crypto/rand" + "fmt" + "io" "testing" - - . "gopkg.in/check.v1" ) -func Test(t *testing.T) { TestingT(t) } +type sha1Test struct { + out string + in string +} + +var golden = []sha1Test{ + {"da39a3ee5e6b4b0d3255bfef95601890afd80709", ""}, + {"86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "a"}, + {"da23614e02469a0d7c7bd1bdab5c9c474b1904dc", "ab"}, + {"a9993e364706816aba3e25717850c26c9cd0d89d", "abc"}, + {"81fe8bfe87576c3ecb22426f8e57847382917acf", "abcd"}, + {"03de6c570bfe24bfc328ccd7ca46b76eadaf4334", "abcde"}, + {"1f8ac10f23c5b5bc1167bda84b833e5c057a77d2", "abcdef"}, + {"2fb5e13419fc89246865e7a324f476ec624e8740", "abcdefg"}, + {"425af12a0743502b322e93a015bcf868e324d56a", "abcdefgh"}, + {"c63b19f1e4c8b5f76b25c49b8b87f57d8e4872a1", "abcdefghi"}, + {"d68c19a0a345b7eab78d5e11e991c026ec60db63", "abcdefghij"}, + {"ebf81ddcbe5bf13aaabdc4d65354fdf2044f38a7", "Discard medicine more than two years old."}, + {"e5dea09392dd886ca63531aaa00571dc07554bb6", "He who has a shady past knows that nice guys finish last."}, + {"45988f7234467b94e3e9494434c96ee3609d8f8f", "I wouldn't marry him with a ten foot pole."}, + {"55dee037eb7460d5a692d1ce11330b260e40c988", "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, + {"b7bc5fb91080c7de6b582ea281f8a396d7c0aee8", "The days of the digital watch are numbered. -Tom Stoppard"}, + {"c3aed9358f7c77f523afe86135f06b95b3999797", "Nepal premier won't resign."}, + {"6e29d302bf6e3a5e4305ff318d983197d6906bb9", "For every action there is an equal and opposite government program."}, + {"597f6a540010f94c15d71806a99a2c8710e747bd", "His money is twice tainted: 'taint yours and 'taint mine."}, + {"6859733b2590a8a091cecf50086febc5ceef1e80", "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, + {"514b2630ec089b8aee18795fc0cf1f4860cdacad", "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, + {"c5ca0d4a7b6676fc7aa72caa41cc3d5df567ed69", "size: a.out: bad magic"}, + {"74c51fa9a04eadc8c1bbeaa7fc442f834b90a00a", "The major problem is with sendmail. -Mark Horton"}, + {"0b4c4ce5f52c3ad2821852a8dc00217fa18b8b66", "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, + {"3ae7937dd790315beb0f48330e8642237c61550a", "If the enemy is within range, then so are you."}, + {"410a2b296df92b9a47412b13281df8f830a9f44b", "It's well we cannot hear the screams/That we create in others' dreams."}, + {"841e7c85ca1adcddbdd0187f1289acb5c642f7f5", "You remind me of a TV show, but that's all right: I watch it anyway."}, + {"163173b825d03b952601376b25212df66763e1db", "C is as portable as Stonehedge!!"}, + {"32b0377f2687eb88e22106f133c586ab314d5279", "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, + {"0885aaf99b569542fd165fa44e322718f4a984e0", "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, + {"6627d6904d71420b0bf3886ab629623538689f45", "How can you write a big system without C++? -Paul Glick"}, +} + +func TestGolden(t *testing.T) { + for i := 0; i < len(golden); i++ { + g := golden[i] + s := fmt.Sprintf("%x", Sum1([]byte(g.in))) + if s != g.out { + t.Fatalf("Sum function: sha1(%s) = %s want %s", g.in, s, g.out) + } + c := New() + for j := 0; j < 3; j++ { + if j < 2 { + io.WriteString(c, g.in) + } else { + io.WriteString(c, g.in[0:len(g.in)/2]) + c.Sum(nil) + io.WriteString(c, g.in[len(g.in)/2:]) + } + s := fmt.Sprintf("%x", c.Sum(nil)) + if s != g.out { + t.Fatalf("sha1[%d](%s) = %s want %s", j, g.in, s, g.out) + } + c.Reset() + } + } +} + +func TestSize(t *testing.T) { + c := New() + if got := c.Size(); got != Size { + t.Errorf("Size = %d; want %d", got, Size) + } +} + +func TestBlockSize(t *testing.T) { + c := New() + if got := c.BlockSize(); got != BlockSize { + t.Errorf("BlockSize = %d; want %d", got, BlockSize) + } +} -type MySuite struct{} +// Tests that blockGeneric (pure Go) and block (in assembly for amd64, 386, arm) match. +func TestBlockGeneric(t *testing.T) { + gen, asm := New().(*digest), New().(*digest) + buf := make([]byte, BlockSize*20) // arbitrary factor + rand.Read(buf) + blockGeneric(gen, buf) + block(asm, buf) + if *gen != *asm { + t.Error("block and blockGeneric resulted in different states") + } +} -var _ = Suite(&MySuite{}) +var bench = New() +var buf = make([]byte, 8192) + +func benchmarkSize(b *testing.B, size int) { + b.SetBytes(int64(size)) + sum := make([]byte, bench.Size()) + for i := 0; i < b.N; i++ { + bench.Reset() + bench.Write(buf[:size]) + bench.Sum(sum[:0]) + } +} + +func BenchmarkHash8Bytes(b *testing.B) { + benchmarkSize(b, 8) +} + +func BenchmarkHash1K(b *testing.B) { + benchmarkSize(b, 1024) +} -func (s *MySuite) TestStreamingSha1(c *C) { - testString := []byte("Test string") - expectedHash, _ := hex.DecodeString("18af819125b70879d36378431c4e8d9bfa6a2599") - hash, err := Sum(bytes.NewBuffer(testString)) - c.Assert(err, IsNil) - c.Assert(bytes.Equal(expectedHash, hash), Equals, true) +func BenchmarkHash8K(b *testing.B) { + benchmarkSize(b, 8192) } diff --git a/pkg/utils/crypto/sha1/sha1block.go b/pkg/utils/crypto/sha1/sha1block.go new file mode 100644 index 000000000..1d29c2624 --- /dev/null +++ b/pkg/utils/crypto/sha1/sha1block.go @@ -0,0 +1,98 @@ +// +build amd64 + +package sha1 + +// #cgo CFLAGS: -DHAS_AVX2 +// #include +// #include +// void sha1_transform(int32_t *hash, const char* input, size_t num_blocks); +import "C" +import "unsafe" + +const ( + _K0 = 0x5A827999 + _K1 = 0x6ED9EBA1 + _K2 = 0x8F1BBCDC + _K3 = 0xCA62C1D6 +) + +func blockAVX2(dig *digest, p []byte) { + C.sha1_transform((*C.int32_t)(unsafe.Pointer(&dig.h[0])), (*C.char)(unsafe.Pointer(&p[0])), (C.size_t)(len(p)/chunk)) +} + +// blockGeneric is a portable, pure Go version of the SHA1 block step. +// It's used by sha1block_generic.go and tests. +func blockGeneric(dig *digest, p []byte) { + var w [16]uint32 + + h0, h1, h2, h3, h4 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] + for len(p) >= chunk { + // Can interlace the computation of w with the + // rounds below if needed for speed. + for i := 0; i < 16; i++ { + j := i * 4 + w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) + } + + a, b, c, d, e := h0, h1, h2, h3, h4 + + // Each of the four 20-iteration rounds + // differs only in the computation of f and + // the choice of K (_K0, _K1, etc). + i := 0 + for ; i < 16; i++ { + f := b&c | (^b)&d + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K0 + a, b, c, d, e = t, a, b30, c, d + } + for ; i < 20; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + + f := b&c | (^b)&d + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K0 + a, b, c, d, e = t, a, b30, c, d + } + for ; i < 40; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + f := b ^ c ^ d + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K1 + a, b, c, d, e = t, a, b30, c, d + } + for ; i < 60; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + f := ((b | c) & d) | (b & c) + + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K2 + a, b, c, d, e = t, a, b30, c, d + } + for ; i < 80; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + f := b ^ c ^ d + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K3 + a, b, c, d, e = t, a, b30, c, d + } + + h0 += a + h1 += b + h2 += c + h3 += d + h4 += e + + p = p[chunk:] + } + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] = h0, h1, h2, h3, h4 +}