add HighwayHash256 support (#5359)
This change adds the HighwayHash256 PRF as bitrot protection / detection algorithm. Since HighwayHash256 requires a 256 bit we generate a random key from the first 100 decimals of π - See nothing-up-my-sleeve-numbers. This key is fixed forever and tied to the HighwayHash256 bitrot algorithm. Fixes #5358master
parent
2760409656
commit
7f99cc9768
@ -0,0 +1,21 @@ |
||||
MIT License |
||||
|
||||
Copyright (c) 2017 Minio Inc. |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
of this software and associated documentation files (the "Software"), to deal |
||||
in the Software without restriction, including without limitation the rights |
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
copies of the Software, and to permit persons to whom the Software is |
||||
furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in all |
||||
copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
SOFTWARE. |
@ -0,0 +1,83 @@ |
||||
[![Godoc Reference](https://godoc.org/github.com/minio/highwayhash?status.svg)](https://godoc.org/github.com/minio/highwayhash) |
||||
[![Build Status](https://travis-ci.org/minio/highwayhash.svg?branch=master)](https://travis-ci.org/minio/highwayhash) |
||||
|
||||
## HighwayHash |
||||
|
||||
[HighwayHash](https://github.com/google/highwayhash) is a pseudo-random-function (PRF) developed by Jyrki Alakuijala, Bill Cox and Jan Wassenberg (Google research). HighwayHash takes a 256 bit key and computes 64, 128 or 256 bit hash values of given messages. |
||||
|
||||
It can be used to prevent hash-flooding attacks or authenticate short-lived messages. Additionally it can be used as a fingerprinting function. HighwayHash is not a general purpose cryptographic hash function (such as Blake2b, SHA-3 or SHA-2) and should not be used if strong collision resistance is required. |
||||
|
||||
This repository contains a native Go version and optimized assembly implementations on both Intel and ARM platforms. |
||||
|
||||
### High performance |
||||
|
||||
HighwayHash is an approximately 5x faster SIMD hash function as compared to [SipHash](https://www.131002.net/siphash/siphash.pdf) which in itself is a fast and 'cryptographically strong' pseudo-random function designed by Aumasson and Bernstein. |
||||
|
||||
HighwayHash uses a new way of mixing inputs with AVX2 multiply and permute instructions. The multiplications are 32x32 bit giving 64 bits-wide results and are therefore infeasible to reverse. Additionally permuting equalizes the distribution of the resulting bytes. The algorithm outputs digests ranging from 64 bits up to 256 bits at no extra cost. |
||||
|
||||
### Stable |
||||
|
||||
All three output sizes of HighwayHash have been declared [stable](https://github.com/google/highwayhash/#versioning-and-stability) as of January 2018. This means that the hash results for any given input message are guaranteed not to change. |
||||
|
||||
### Installation |
||||
|
||||
Install: `go get -u github.com/minio/highwayhash` |
||||
|
||||
### Intel Performance |
||||
|
||||
Below are the single core results on an Intel Core i7 (3.1 GHz) for 256 bit outputs: |
||||
|
||||
``` |
||||
BenchmarkSum256_16 204.17 MB/s |
||||
BenchmarkSum256_64 1040.63 MB/s |
||||
BenchmarkSum256_1K 8653.30 MB/s |
||||
BenchmarkSum256_8K 13476.07 MB/s |
||||
BenchmarkSum256_1M 14928.71 MB/s |
||||
BenchmarkSum256_5M 14180.04 MB/s |
||||
BenchmarkSum256_10M 12458.65 MB/s |
||||
BenchmarkSum256_25M 11927.25 MB/s |
||||
``` |
||||
|
||||
So for moderately sized messages it tops out at about 15 GB/sec. Also for small messages (1K) the performance is already at approximately 60% of the maximum throughput. |
||||
|
||||
### ARM Performance |
||||
|
||||
On an 8 core 1.2 GHz ARM Cortex-A53 (running Debian 8.0 Jessie with Go 1.7.4) the following results were obtained: |
||||
|
||||
Platform/CPU | Write 64 | Write 1024 | Write 8192 |
||||
----------------- | ---------------- | ----------------- | ----------------- |
||||
ARM64 NEON | 384 MB/s | 955 MB/s | 1053 MB/s |
||||
|
||||
*Note: For now just the (main) update loop is implemented in assembly, so for small messages there is still considerable overhead due to initialization and finalization.* |
||||
|
||||
### Performance compared to other hashing techniques |
||||
|
||||
On a Skylake CPU (3.0 GHz Xeon Platinum 8124M) the table below shows how HighwayHash compares to other hashing techniques for 5 MB messages (single core performance, all Golang implementations, see [benchmark](https://github.com/fwessels/HashCompare/blob/master/benchmarks_test.go)). |
||||
|
||||
``` |
||||
BenchmarkHighwayHash 11986.98 MB/s |
||||
BenchmarkSHA256_AVX512 3552.74 MB/s |
||||
BenchmarkBlake2b 972.38 MB/s |
||||
BenchmarkSHA1 950.64 MB/s |
||||
BenchmarkMD5 684.18 MB/s |
||||
BenchmarkSHA512 562.04 MB/s |
||||
BenchmarkSHA256 383.07 MB/s |
||||
``` |
||||
|
||||
*Note: the AVX512 version of SHA256 uses the [multi-buffer crypto library](https://github.com/intel/intel-ipsec-mb) technique as developed by Intel, more details can be found in [sha256-simd](https://github.com/minio/sha256-simd/).* |
||||
|
||||
### Qualitative assessment |
||||
|
||||
We have performed a 'qualitative' assessment of how HighwayHash compares to Blake2b in terms of the distribution of the checksums for varying numbers of messages. It shows that HighwayHash behaves similarly according to the following graph: |
||||
|
||||
![Hash Comparison Overview](https://s3.amazonaws.com/s3git-assets/hash-comparison-final.png) |
||||
|
||||
More information can be found in [HashCompare](https://github.com/fwessels/HashCompare). |
||||
|
||||
### Requirements |
||||
|
||||
All Go versions >= 1.7 are supported. Notice that the amd64 AVX2 implementation is only available with Go 1.8 and newer. |
||||
|
||||
### Contributing |
||||
|
||||
Contributions are welcome, please send PRs for any enhancements. |
@ -0,0 +1,225 @@ |
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Package highwayhash implements the pseudo-random-function (PRF) HighwayHash.
|
||||
// HighwayHash is a fast hash function designed to defend hash-flooding attacks
|
||||
// or to authenticate short-lived messages.
|
||||
//
|
||||
// HighwayHash is not a general purpose cryptographic hash function and does not
|
||||
// provide (strong) collision resistance.
|
||||
package highwayhash |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
"errors" |
||||
"hash" |
||||
) |
||||
|
||||
const ( |
||||
// Size is the size of HighwayHash-256 checksum in bytes.
|
||||
Size = 32 |
||||
// Size128 is the size of HighwayHash-128 checksum in bytes.
|
||||
Size128 = 16 |
||||
// Size64 is the size of HighwayHash-64 checksum in bytes.
|
||||
Size64 = 8 |
||||
) |
||||
|
||||
var errKeySize = errors.New("highwayhash: invalid key size") |
||||
|
||||
// New returns a hash.Hash computing the HighwayHash-256 checksum.
|
||||
// It returns a non-nil error if the key is not 32 bytes long.
|
||||
func New(key []byte) (hash.Hash, error) { |
||||
if len(key) != Size { |
||||
return nil, errKeySize |
||||
} |
||||
h := &digest{size: Size} |
||||
copy(h.key[:], key) |
||||
h.Reset() |
||||
return h, nil |
||||
} |
||||
|
||||
// New128 returns a hash.Hash computing the HighwayHash-128 checksum.
|
||||
// It returns a non-nil error if the key is not 32 bytes long.
|
||||
func New128(key []byte) (hash.Hash, error) { |
||||
if len(key) != Size { |
||||
return nil, errKeySize |
||||
} |
||||
h := &digest{size: Size128} |
||||
copy(h.key[:], key) |
||||
h.Reset() |
||||
return h, nil |
||||
} |
||||
|
||||
// New64 returns a hash.Hash computing the HighwayHash-64 checksum.
|
||||
// It returns a non-nil error if the key is not 32 bytes long.
|
||||
func New64(key []byte) (hash.Hash64, error) { |
||||
if len(key) != Size { |
||||
return nil, errKeySize |
||||
} |
||||
h := new(digest64) |
||||
h.size = Size64 |
||||
copy(h.key[:], key) |
||||
h.Reset() |
||||
return h, nil |
||||
} |
||||
|
||||
// Sum computes the HighwayHash-256 checksum of data.
|
||||
// It panics if the key is not 32 bytes long.
|
||||
func Sum(data, key []byte) [Size]byte { |
||||
if len(key) != Size { |
||||
panic(errKeySize) |
||||
} |
||||
var state [16]uint64 |
||||
initialize(&state, key) |
||||
if n := len(data) & (^(Size - 1)); n > 0 { |
||||
update(&state, data[:n]) |
||||
data = data[n:] |
||||
} |
||||
if len(data) > 0 { |
||||
var block [Size]byte |
||||
offset := copy(block[:], data) |
||||
hashBuffer(&state, &block, offset) |
||||
} |
||||
var hash [Size]byte |
||||
finalize(hash[:], &state) |
||||
return hash |
||||
} |
||||
|
||||
// Sum128 computes the HighwayHash-128 checksum of data.
|
||||
// It panics if the key is not 32 bytes long.
|
||||
func Sum128(data, key []byte) [Size128]byte { |
||||
if len(key) != Size { |
||||
panic(errKeySize) |
||||
} |
||||
var state [16]uint64 |
||||
initialize(&state, key) |
||||
if n := len(data) & (^(Size - 1)); n > 0 { |
||||
update(&state, data[:n]) |
||||
data = data[n:] |
||||
} |
||||
if len(data) > 0 { |
||||
var block [Size]byte |
||||
offset := copy(block[:], data) |
||||
hashBuffer(&state, &block, offset) |
||||
} |
||||
var hash [Size128]byte |
||||
finalize(hash[:], &state) |
||||
return hash |
||||
} |
||||
|
||||
// Sum64 computes the HighwayHash-64 checksum of data.
|
||||
// It panics if the key is not 32 bytes long.
|
||||
func Sum64(data, key []byte) uint64 { |
||||
if len(key) != Size { |
||||
panic(errKeySize) |
||||
} |
||||
var state [16]uint64 |
||||
initialize(&state, key) |
||||
if n := len(data) & (^(Size - 1)); n > 0 { |
||||
update(&state, data[:n]) |
||||
data = data[n:] |
||||
} |
||||
if len(data) > 0 { |
||||
var block [Size]byte |
||||
offset := copy(block[:], data) |
||||
hashBuffer(&state, &block, offset) |
||||
} |
||||
var hash [Size64]byte |
||||
finalize(hash[:], &state) |
||||
return binary.LittleEndian.Uint64(hash[:]) |
||||
} |
||||
|
||||
type digest64 struct{ digest } |
||||
|
||||
func (d *digest64) Sum64() uint64 { |
||||
state := d.state |
||||
if d.offset > 0 { |
||||
hashBuffer(&state, &d.buffer, d.offset) |
||||
} |
||||
var hash [8]byte |
||||
finalize(hash[:], &state) |
||||
return binary.LittleEndian.Uint64(hash[:]) |
||||
} |
||||
|
||||
type digest struct { |
||||
state [16]uint64 // v0 | v1 | mul0 | mul1
|
||||
|
||||
key, buffer [Size]byte |
||||
offset int |
||||
|
||||
size int |
||||
} |
||||
|
||||
func (d *digest) Size() int { return d.size } |
||||
|
||||
func (d *digest) BlockSize() int { return Size } |
||||
|
||||
func (d *digest) Reset() { |
||||
initialize(&d.state, d.key[:]) |
||||
d.offset = 0 |
||||
} |
||||
|
||||
func (d *digest) Write(p []byte) (n int, err error) { |
||||
n = len(p) |
||||
if d.offset > 0 { |
||||
remaining := Size - d.offset |
||||
if n < remaining { |
||||
d.offset += copy(d.buffer[d.offset:], p) |
||||
return |
||||
} |
||||
copy(d.buffer[d.offset:], p[:remaining]) |
||||
update(&d.state, d.buffer[:]) |
||||
p = p[remaining:] |
||||
d.offset = 0 |
||||
} |
||||
if nn := len(p) & (^(Size - 1)); nn > 0 { |
||||
update(&d.state, p[:nn]) |
||||
p = p[nn:] |
||||
} |
||||
if len(p) > 0 { |
||||
d.offset = copy(d.buffer[d.offset:], p) |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (d *digest) Sum(b []byte) []byte { |
||||
state := d.state |
||||
if d.offset > 0 { |
||||
hashBuffer(&state, &d.buffer, d.offset) |
||||
} |
||||
var hash [Size]byte |
||||
finalize(hash[:d.size], &state) |
||||
return append(b, hash[:d.size]...) |
||||
} |
||||
|
||||
func hashBuffer(state *[16]uint64, buffer *[32]byte, offset int) { |
||||
var block [Size]byte |
||||
mod32 := (uint64(offset) << 32) + uint64(offset) |
||||
for i := range state[:4] { |
||||
state[i] += mod32 |
||||
} |
||||
for i := range state[4:8] { |
||||
t0 := uint32(state[i+4]) |
||||
t0 = (t0 << uint(offset)) | (t0 >> uint(32-offset)) |
||||
|
||||
t1 := uint32(state[i+4] >> 32) |
||||
t1 = (t1 << uint(offset)) | (t1 >> uint(32-offset)) |
||||
|
||||
state[i+4] = (uint64(t1) << 32) | uint64(t0) |
||||
} |
||||
|
||||
mod4 := offset & 3 |
||||
remain := offset - mod4 |
||||
|
||||
copy(block[:], buffer[:remain]) |
||||
if offset >= 16 { |
||||
copy(block[28:], buffer[offset-4:]) |
||||
} else if mod4 != 0 { |
||||
last := uint32(buffer[remain]) |
||||
last += uint32(buffer[remain+mod4>>1]) << 8 |
||||
last += uint32(buffer[offset-1]) << 16 |
||||
binary.LittleEndian.PutUint32(block[16:], last) |
||||
} |
||||
update(state, block[:]) |
||||
} |
@ -0,0 +1,68 @@ |
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build go1.8
|
||||
// +build amd64 !gccgo !appengine !nacl
|
||||
|
||||
package highwayhash |
||||
|
||||
var ( |
||||
useSSE4 = supportsSSE4() |
||||
useAVX2 = supportsAVX2() |
||||
useNEON = false |
||||
) |
||||
|
||||
//go:noescape
|
||||
func supportsSSE4() bool |
||||
|
||||
//go:noescape
|
||||
func supportsAVX2() bool |
||||
|
||||
//go:noescape
|
||||
func initializeSSE4(state *[16]uint64, key []byte) |
||||
|
||||
//go:noescape
|
||||
func initializeAVX2(state *[16]uint64, key []byte) |
||||
|
||||
//go:noescape
|
||||
func updateSSE4(state *[16]uint64, msg []byte) |
||||
|
||||
//go:noescape
|
||||
func updateAVX2(state *[16]uint64, msg []byte) |
||||
|
||||
//go:noescape
|
||||
func finalizeSSE4(out []byte, state *[16]uint64) |
||||
|
||||
//go:noescape
|
||||
func finalizeAVX2(out []byte, state *[16]uint64) |
||||
|
||||
func initialize(state *[16]uint64, key []byte) { |
||||
if useAVX2 { |
||||
initializeAVX2(state, key) |
||||
} else if useSSE4 { |
||||
initializeSSE4(state, key) |
||||
} else { |
||||
initializeGeneric(state, key) |
||||
} |
||||
} |
||||
|
||||
func update(state *[16]uint64, msg []byte) { |
||||
if useAVX2 { |
||||
updateAVX2(state, msg) |
||||
} else if useSSE4 { |
||||
updateSSE4(state, msg) |
||||
} else { |
||||
updateGeneric(state, msg) |
||||
} |
||||
} |
||||
|
||||
func finalize(out []byte, state *[16]uint64) { |
||||
if useAVX2 { |
||||
finalizeAVX2(out, state) |
||||
} else if useSSE4 { |
||||
finalizeSSE4(out, state) |
||||
} else { |
||||
finalizeGeneric(out, state) |
||||
} |
||||
} |
@ -0,0 +1,255 @@ |
||||
// Copyright (c) 2017 Minio Inc. All rights reserved. |
||||
// Use of this source code is governed by a license that can be |
||||
// found in the LICENSE file. |
||||
|
||||
// +build go1.8
|
||||
// +build amd64 !gccgo !appengine !nacl |
||||
|
||||
#include "textflag.h" |
||||
|
||||
|
||||
DATA ·consAVX2<>+0x00(SB)/8, $0xdbe6d5d5fe4cce2f |
||||
DATA ·consAVX2<>+0x08(SB)/8, $0xa4093822299f31d0 |
||||
DATA ·consAVX2<>+0x10(SB)/8, $0x13198a2e03707344 |
||||
DATA ·consAVX2<>+0x18(SB)/8, $0x243f6a8885a308d3 |
||||
DATA ·consAVX2<>+0x20(SB)/8, $0x3bd39e10cb0ef593 |
||||
DATA ·consAVX2<>+0x28(SB)/8, $0xc0acf169b5f18a8c |
||||
DATA ·consAVX2<>+0x30(SB)/8, $0xbe5466cf34e90c6c |
||||
DATA ·consAVX2<>+0x38(SB)/8, $0x452821e638d01377 |
||||
GLOBL ·consAVX2<>(SB), (NOPTR+RODATA), $64 |
||||
|
||||
DATA ·zipperMergeAVX2<>+0x00(SB)/8, $0xf010e05020c03 |
||||
DATA ·zipperMergeAVX2<>+0x08(SB)/8, $0x70806090d0a040b |
||||
DATA ·zipperMergeAVX2<>+0x10(SB)/8, $0xf010e05020c03 |
||||
DATA ·zipperMergeAVX2<>+0x18(SB)/8, $0x70806090d0a040b |
||||
GLOBL ·zipperMergeAVX2<>(SB), (NOPTR+RODATA), $32 |
||||
|
||||
#define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \ |
||||
MOVQ $0x3FFFFFFFFFFFFFFF, tmp0 \ |
||||
ANDQ tmp0, x3 \ |
||||
MOVQ x2, y0 \ |
||||
MOVQ x3, y1 \ |
||||
\ |
||||
MOVQ x2, tmp0 \ |
||||
MOVQ x3, tmp1 \ |
||||
SHLQ $1, tmp1 \ |
||||
SHRQ $63, tmp0 \ |
||||
MOVQ tmp1, x3 \ |
||||
ORQ tmp0, x3 \ |
||||
\ |
||||
SHLQ $1, x2 \ |
||||
\ |
||||
MOVQ y0, tmp0 \ |
||||
MOVQ y1, tmp1 \ |
||||
SHLQ $2, tmp1 \ |
||||
SHRQ $62, tmp0 \ |
||||
MOVQ tmp1, y1 \ |
||||
ORQ tmp0, y1 \ |
||||
\ |
||||
SHLQ $2, y0 \ |
||||
\ |
||||
XORQ x0, y0 \ |
||||
XORQ x2, y0 \ |
||||
XORQ x1, y1 \ |
||||
XORQ x3, y1 |
||||
|
||||
#define UPDATE(msg) \ |
||||
VPADDQ msg, Y2, Y2 \ |
||||
VPADDQ Y3, Y2, Y2 \ |
||||
\ |
||||
VPSRLQ $32, Y1, Y0 \ |
||||
BYTE $0xC5; BYTE $0xFD; BYTE $0xF4; BYTE $0xC2 \ // VPMULUDQ Y2, Y0, Y0
|
||||
VPXOR Y0, Y3, Y3 \ |
||||
\ |
||||
VPADDQ Y4, Y1, Y1 \ |
||||
\ |
||||
VPSRLQ $32, Y2, Y0 \ |
||||
BYTE $0xC5; BYTE $0xFD; BYTE $0xF4; BYTE $0xC1 \ // VPMULUDQ Y1, Y0, Y0
|
||||
VPXOR Y0, Y4, Y4 \ |
||||
\ |
||||
VPSHUFB Y5, Y2, Y0 \ |
||||
VPADDQ Y0, Y1, Y1 \ |
||||
\ |
||||
VPSHUFB Y5, Y1, Y0 \ |
||||
VPADDQ Y0, Y2, Y2 |
||||
|
||||
// func initializeAVX2(state *[16]uint64, key []byte) |
||||
TEXT ·initializeAVX2(SB), 4, $0-32 |
||||
MOVQ state+0(FP), AX |
||||
MOVQ key_base+8(FP), BX |
||||
MOVQ $·consAVX2<>(SB), CX |
||||
|
||||
VMOVDQU 0(BX), Y1 |
||||
VPSHUFD $177, Y1, Y2 |
||||
|
||||
VMOVDQU 0(CX), Y3 |
||||
VMOVDQU 32(CX), Y4 |
||||
|
||||
VPXOR Y3, Y1, Y1 |
||||
VPXOR Y4, Y2, Y2 |
||||
|
||||
VMOVDQU Y1, 0(AX) |
||||
VMOVDQU Y2, 32(AX) |
||||
VMOVDQU Y3, 64(AX) |
||||
VMOVDQU Y4, 96(AX) |
||||
VZEROUPPER |
||||
RET |
||||
|
||||
// func updateAVX2(state *[16]uint64, msg []byte) |
||||
TEXT ·updateAVX2(SB), 4, $0-32 |
||||
MOVQ state+0(FP), AX |
||||
MOVQ msg_base+8(FP), BX |
||||
MOVQ msg_len+16(FP), CX |
||||
|
||||
CMPQ CX, $32 |
||||
JB DONE |
||||
|
||||
VMOVDQU 0(AX), Y1 |
||||
VMOVDQU 32(AX), Y2 |
||||
VMOVDQU 64(AX), Y3 |
||||
VMOVDQU 96(AX), Y4 |
||||
|
||||
VMOVDQU ·zipperMergeAVX2<>(SB), Y5 |
||||
|
||||
LOOP: |
||||
VMOVDQU 0(BX), Y0 |
||||
UPDATE(Y0) |
||||
|
||||
ADDQ $32, BX |
||||
SUBQ $32, CX |
||||
JA LOOP |
||||
|
||||
VMOVDQU Y1, 0(AX) |
||||
VMOVDQU Y2, 32(AX) |
||||
VMOVDQU Y3, 64(AX) |
||||
VMOVDQU Y4, 96(AX) |
||||
VZEROUPPER |
||||
|
||||
DONE: |
||||
RET |
||||
|
||||
// func finalizeAVX2(out []byte, state *[16]uint64) |
||||
TEXT ·finalizeAVX2(SB), 4, $0-32 |
||||
MOVQ state+24(FP), AX |
||||
MOVQ out_base+0(FP), BX |
||||
MOVQ out_len+8(FP), CX |
||||
|
||||
VMOVDQU 0(AX), Y1 |
||||
VMOVDQU 32(AX), Y2 |
||||
VMOVDQU 64(AX), Y3 |
||||
VMOVDQU 96(AX), Y4 |
||||
|
||||
VMOVDQU ·zipperMergeAVX2<>(SB), Y5 |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
CMPQ CX, $8 |
||||
JE skipUpdate // Just 4 rounds for 64-bit checksum |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
CMPQ CX, $16 |
||||
JE skipUpdate // 6 rounds for 128-bit checksum |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
VPERM2I128 $1, Y1, Y1, Y0 |
||||
VPSHUFD $177, Y0, Y0 |
||||
UPDATE(Y0) |
||||
|
||||
skipUpdate: |
||||
VMOVDQU Y1, 0(AX) |
||||
VMOVDQU Y2, 32(AX) |
||||
VMOVDQU Y3, 64(AX) |
||||
VMOVDQU Y4, 96(AX) |
||||
VZEROUPPER |
||||
|
||||
CMPQ CX, $8 |
||||
JE hash64 |
||||
CMPQ CX, $16 |
||||
JE hash128 |
||||
|
||||
// 256-bit checksum |
||||
MOVQ 0*8(AX), R8 |
||||
MOVQ 1*8(AX), R9 |
||||
MOVQ 4*8(AX), R10 |
||||
MOVQ 5*8(AX), R11 |
||||
ADDQ 8*8(AX), R8 |
||||
ADDQ 9*8(AX), R9 |
||||
ADDQ 12*8(AX), R10 |
||||
ADDQ 13*8(AX), R11 |
||||
|
||||
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15) |
||||
MOVQ R14, 0(BX) |
||||
MOVQ R15, 8(BX) |
||||
|
||||
MOVQ 2*8(AX), R8 |
||||
MOVQ 3*8(AX), R9 |
||||
MOVQ 6*8(AX), R10 |
||||
MOVQ 7*8(AX), R11 |
||||
ADDQ 10*8(AX), R8 |
||||
ADDQ 11*8(AX), R9 |
||||
ADDQ 14*8(AX), R10 |
||||
ADDQ 15*8(AX), R11 |
||||
|
||||
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15) |
||||
MOVQ R14, 16(BX) |
||||
MOVQ R15, 24(BX) |
||||
RET |
||||
|
||||
hash128: |
||||
MOVQ 0*8(AX), R8 |
||||
MOVQ 1*8(AX), R9 |
||||
ADDQ 6*8(AX), R8 |
||||
ADDQ 7*8(AX), R9 |
||||
ADDQ 8*8(AX), R8 |
||||
ADDQ 9*8(AX), R9 |
||||
ADDQ 14*8(AX), R8 |
||||
ADDQ 15*8(AX), R9 |
||||
MOVQ R8, 0(BX) |
||||
MOVQ R9, 8(BX) |
||||
RET |
||||
|
||||
hash64: |
||||
MOVQ 0*8(AX), DX |
||||
ADDQ 4*8(AX), DX |
||||
ADDQ 8*8(AX), DX |
||||
ADDQ 12*8(AX), DX |
||||
MOVQ DX, 0(BX) |
||||
RET |
||||
|
||||
// func supportsAVX2() bool |
||||
TEXT ·supportsAVX2(SB), 4, $0-1 |
||||
MOVQ runtime·support_avx2(SB), AX |
||||
MOVB AX, ret+0(FP) |
||||
RET |
@ -0,0 +1,50 @@ |
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build !go1.8
|
||||
// +build amd64 !gccgo !appengine !nacl
|
||||
|
||||
package highwayhash |
||||
|
||||
var ( |
||||
useSSE4 = supportsSSE4() |
||||
useAVX2 = false |
||||
useNEON = false |
||||
) |
||||
|
||||
//go:noescape
|
||||
func supportsSSE4() bool |
||||
|
||||
//go:noescape
|
||||
func initializeSSE4(state *[16]uint64, key []byte) |
||||
|
||||
//go:noescape
|
||||
func updateSSE4(state *[16]uint64, msg []byte) |
||||
|
||||
//go:noescape
|
||||
func finalizeSSE4(out []byte, state *[16]uint64) |
||||
|
||||
func initialize(state *[16]uint64, key []byte) { |
||||
if useSSE4 { |
||||
initializeSSE4(state, key) |
||||
} else { |
||||
initializeGeneric(state, key) |
||||
} |
||||
} |
||||
|
||||
func update(state *[16]uint64, msg []byte) { |
||||
if useSSE4 { |
||||
updateSSE4(state, msg) |
||||
} else { |
||||
updateGeneric(state, msg) |
||||
} |
||||
} |
||||
|
||||
func finalize(out []byte, state *[16]uint64) { |
||||
if useSSE4 { |
||||
finalizeSSE4(out, state) |
||||
} else { |
||||
finalizeGeneric(out, state) |
||||
} |
||||
} |
@ -0,0 +1,303 @@ |
||||
// Copyright (c) 2017 Minio Inc. All rights reserved. |
||||
// Use of this source code is governed by a license that can be |
||||
// found in the LICENSE file. |
||||
|
||||
// +build amd64 !gccgo !appengine !nacl |
||||
|
||||
#include "textflag.h" |
||||
|
||||
DATA ·cons<>+0x00(SB)/8, $0xdbe6d5d5fe4cce2f |
||||
DATA ·cons<>+0x08(SB)/8, $0xa4093822299f31d0 |
||||
DATA ·cons<>+0x10(SB)/8, $0x13198a2e03707344 |
||||
DATA ·cons<>+0x18(SB)/8, $0x243f6a8885a308d3 |
||||
DATA ·cons<>+0x20(SB)/8, $0x3bd39e10cb0ef593 |
||||
DATA ·cons<>+0x28(SB)/8, $0xc0acf169b5f18a8c |
||||
DATA ·cons<>+0x30(SB)/8, $0xbe5466cf34e90c6c |
||||
DATA ·cons<>+0x38(SB)/8, $0x452821e638d01377 |
||||
GLOBL ·cons<>(SB), (NOPTR+RODATA), $64 |
||||
|
||||
DATA ·zipperMerge<>+0x00(SB)/8, $0xf010e05020c03 |
||||
DATA ·zipperMerge<>+0x08(SB)/8, $0x70806090d0a040b |
||||
GLOBL ·zipperMerge<>(SB), (NOPTR+RODATA), $16 |
||||
|
||||
#define v00 X0 |
||||
#define v01 X1 |
||||
#define v10 X2 |
||||
#define v11 X3 |
||||
#define m00 X4 |
||||
#define m01 X5 |
||||
#define m10 X6 |
||||
#define m11 X7 |
||||
|
||||
#define t0 X8 |
||||
#define t1 X9 |
||||
#define t2 X10 |
||||
|
||||
#define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \ |
||||
MOVQ $0x3FFFFFFFFFFFFFFF, tmp0 \ |
||||
ANDQ tmp0, x3 \ |
||||
MOVQ x2, y0 \ |
||||
MOVQ x3, y1 \ |
||||
\ |
||||
MOVQ x2, tmp0 \ |
||||
MOVQ x3, tmp1 \ |
||||
SHLQ $1, tmp1 \ |
||||
SHRQ $63, tmp0 \ |
||||
MOVQ tmp1, x3 \ |
||||
ORQ tmp0, x3 \ |
||||
\ |
||||
SHLQ $1, x2 \ |
||||
\ |
||||
MOVQ y0, tmp0 \ |
||||
MOVQ y1, tmp1 \ |
||||
SHLQ $2, tmp1 \ |
||||
SHRQ $62, tmp0 \ |
||||
MOVQ tmp1, y1 \ |
||||
ORQ tmp0, y1 \ |
||||
\ |
||||
SHLQ $2, y0 \ |
||||
\ |
||||
XORQ x0, y0 \ |
||||
XORQ x2, y0 \ |
||||
XORQ x1, y1 \ |
||||
XORQ x3, y1 |
||||
|
||||
#define UPDATE(msg0, msg1) \ |
||||
PADDQ msg0, v10 \ |
||||
PADDQ m00, v10 \ |
||||
PADDQ msg1, v11 \ |
||||
PADDQ m01, v11 \ |
||||
\ |
||||
MOVO v00, t0 \ |
||||
MOVO v01, t1 \ |
||||
PSRLQ $32, t0 \ |
||||
PSRLQ $32, t1 \ |
||||
PMULULQ v10, t0 \ |
||||
PMULULQ v11, t1 \ |
||||
PXOR t0, m00 \ |
||||
PXOR t1, m01 \ |
||||
\ |
||||
PADDQ m10, v00 \ |
||||
PADDQ m11, v01 \ |
||||
\ |
||||
MOVO v10, t0 \ |
||||
MOVO v11, t1 \ |
||||
PSRLQ $32, t0 \ |
||||
PSRLQ $32, t1 \ |
||||
PMULULQ v00, t0 \ |
||||
PMULULQ v01, t1 \ |
||||
PXOR t0, m10 \ |
||||
PXOR t1, m11 \ |
||||
\ |
||||
MOVO v10, t0 \ |
||||
PSHUFB t2, t0 \ |
||||
MOVO v11, t1 \ |
||||
PSHUFB t2, t1 \ |
||||
PADDQ t0, v00 \ |
||||
PADDQ t1, v01 \ |
||||
\ |
||||
MOVO v00, t0 \ |
||||
PSHUFB t2, t0 \ |
||||
MOVO v01, t1 \ |
||||
PSHUFB t2, t1 \ |
||||
PADDQ t0, v10 \ |
||||
PADDQ t1, v11 |
||||
|
||||
// func initializeSSE4(state *[16]uint64, key []byte) |
||||
TEXT ·initializeSSE4(SB), 4, $0-32 |
||||
MOVQ state+0(FP), AX |
||||
MOVQ key_base+8(FP), BX |
||||
MOVQ $·cons<>(SB), CX |
||||
|
||||
MOVOU 0(BX), v00 |
||||
MOVOU 16(BX), v01 |
||||
|
||||
PSHUFD $177, v00, v10 |
||||
PSHUFD $177, v01, v11 |
||||
|
||||
MOVOU 0(CX), m00 |
||||
MOVOU 16(CX), m01 |
||||
MOVOU 32(CX), m10 |
||||
MOVOU 48(CX), m11 |
||||
|
||||
PXOR m00, v00 |
||||
PXOR m01, v01 |
||||
PXOR m10, v10 |
||||
PXOR m11, v11 |
||||
|
||||
MOVOU v00, 0(AX) |
||||
MOVOU v01, 16(AX) |
||||
MOVOU v10, 32(AX) |
||||
MOVOU v11, 48(AX) |
||||
MOVOU m00, 64(AX) |
||||
MOVOU m01, 80(AX) |
||||
MOVOU m10, 96(AX) |
||||
MOVOU m11, 112(AX) |
||||
RET |
||||
|
||||
// func updateSSE4(state *[16]uint64, msg []byte) |
||||
TEXT ·updateSSE4(SB), 4, $0-32 |
||||
MOVQ state+0(FP), AX |
||||
MOVQ msg_base+8(FP), BX |
||||
MOVQ msg_len+16(FP), CX |
||||
|
||||
CMPQ CX, $32 |
||||
JB DONE |
||||
|
||||
MOVOU 0(AX), v00 |
||||
MOVOU 16(AX), v01 |
||||
MOVOU 32(AX), v10 |
||||
MOVOU 48(AX), v11 |
||||
MOVOU 64(AX), m00 |
||||
MOVOU 80(AX), m01 |
||||
MOVOU 96(AX), m10 |
||||
MOVOU 112(AX), m11 |
||||
|
||||
MOVOU ·zipperMerge<>(SB), t2 |
||||
|
||||
LOOP: |
||||
MOVOU 0(BX), t0 |
||||
MOVOU 16(BX), t1 |
||||
|
||||
UPDATE(t0, t1) |
||||
|
||||
ADDQ $32, BX |
||||
SUBQ $32, CX |
||||
JA LOOP |
||||
|
||||
MOVOU v00, 0(AX) |
||||
MOVOU v01, 16(AX) |
||||
MOVOU v10, 32(AX) |
||||
MOVOU v11, 48(AX) |
||||
MOVOU m00, 64(AX) |
||||
MOVOU m01, 80(AX) |
||||
MOVOU m10, 96(AX) |
||||
MOVOU m11, 112(AX) |
||||
|
||||
DONE: |
||||
RET |
||||
|
||||
// func finalizeSSE4(out []byte, state *[16]uint64) |
||||
TEXT ·finalizeSSE4(SB), 4, $0-32 |
||||
MOVQ state+24(FP), AX |
||||
MOVQ out_base+0(FP), BX |
||||
MOVQ out_len+8(FP), CX |
||||
|
||||
MOVOU 0(AX), v00 |
||||
MOVOU 16(AX), v01 |
||||
MOVOU 32(AX), v10 |
||||
MOVOU 48(AX), v11 |
||||
MOVOU 64(AX), m00 |
||||
MOVOU 80(AX), m01 |
||||
MOVOU 96(AX), m10 |
||||
MOVOU 112(AX), m11 |
||||
|
||||
MOVOU ·zipperMerge<>(SB), t2 |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
CMPQ CX, $8 |
||||
JE skipUpdate // Just 4 rounds for 64-bit checksum |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
CMPQ CX, $16 |
||||
JE skipUpdate // 6 rounds for 128-bit checksum |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
PSHUFD $177, v01, t0 |
||||
PSHUFD $177, v00, t1 |
||||
UPDATE(t0, t1) |
||||
|
||||
skipUpdate: |
||||
MOVOU v00, 0(AX) |
||||
MOVOU v01, 16(AX) |
||||
MOVOU v10, 32(AX) |
||||
MOVOU v11, 48(AX) |
||||
MOVOU m00, 64(AX) |
||||
MOVOU m01, 80(AX) |
||||
MOVOU m10, 96(AX) |
||||
MOVOU m11, 112(AX) |
||||
|
||||
CMPQ CX, $8 |
||||
JE hash64 |
||||
CMPQ CX, $16 |
||||
JE hash128 |
||||
|
||||
// 256-bit checksum |
||||
PADDQ v00, m00 |
||||
PADDQ v10, m10 |
||||
PADDQ v01, m01 |
||||
PADDQ v11, m11 |
||||
|
||||
MOVQ m00, R8 |
||||
PEXTRQ $1, m00, R9 |
||||
MOVQ m10, R10 |
||||
PEXTRQ $1, m10, R11 |
||||
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15) |
||||
MOVQ R14, 0(BX) |
||||
MOVQ R15, 8(BX) |
||||
|
||||
MOVQ m01, R8 |
||||
PEXTRQ $1, m01, R9 |
||||
MOVQ m11, R10 |
||||
PEXTRQ $1, m11, R11 |
||||
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15) |
||||
MOVQ R14, 16(BX) |
||||
MOVQ R15, 24(BX) |
||||
RET |
||||
|
||||
hash128: |
||||
PADDQ v00, v11 |
||||
PADDQ m00, m11 |
||||
PADDQ v11, m11 |
||||
MOVOU m11, 0(BX) |
||||
RET |
||||
|
||||
hash64: |
||||
PADDQ v00, v10 |
||||
PADDQ m00, m10 |
||||
PADDQ v10, m10 |
||||
MOVQ m10, DX |
||||
MOVQ DX, 0(BX) |
||||
RET |
||||
|
||||
// func supportsSSE4() bool |
||||
TEXT ·supportsSSE4(SB), 4, $0-1 |
||||
MOVL $1, AX |
||||
CPUID |
||||
SHRL $19, CX // Bit 19 indicates SSE4 support |
||||
ANDL $1, CX // CX != 0 if support SSE4 |
||||
MOVB CX, ret+0(FP) |
||||
RET |
@ -0,0 +1,32 @@ |
||||
//+build !noasm
|
||||
|
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package highwayhash |
||||
|
||||
var ( |
||||
useSSE4 = false |
||||
useAVX2 = false |
||||
useNEON = true |
||||
) |
||||
|
||||
//go:noescape
|
||||
func updateArm64(state *[16]uint64, msg []byte) |
||||
|
||||
func initialize(state *[16]uint64, key []byte) { |
||||
initializeGeneric(state, key) |
||||
} |
||||
|
||||
func update(state *[16]uint64, msg []byte) { |
||||
if useNEON { |
||||
updateArm64(state, msg) |
||||
} else { |
||||
updateGeneric(state, msg) |
||||
} |
||||
} |
||||
|
||||
func finalize(out []byte, state *[16]uint64) { |
||||
finalizeGeneric(out, state) |
||||
} |
@ -0,0 +1,116 @@ |
||||
//+build !noasm !appengine |
||||
|
||||
// |
||||
// Minio Cloud Storage, (C) 2017 Minio, Inc. |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
// |
||||
|
||||
// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to |
||||
// the opcodes of their Plan9 equivalents |
||||
|
||||
TEXT ·updateArm64(SB), 7, $0 |
||||
MOVD state+0(FP), R0 |
||||
MOVD msg_base+8(FP), R1 |
||||
MOVD msg_len+16(FP), R2 // length of message |
||||
SUBS $32, R2 |
||||
BMI complete |
||||
|
||||
// Definition of registers |
||||
// v0 = v0.lo |
||||
// v1 = v0.hi |
||||
// v2 = v1.lo |
||||
// v3 = v1.hi |
||||
// v4 = mul0.lo |
||||
// v5 = mul0.hi |
||||
// v6 = mul1.lo |
||||
// v7 = mul1.hi |
||||
|
||||
// Load constants table pointer |
||||
MOVD $·constants(SB), R3 |
||||
|
||||
// and load constants into v28, v29, and v30 |
||||
WORD $0x4c40607c // ld1 {v28.16b-v30.16b}, [x3] |
||||
|
||||
WORD $0x4cdf2c00 // ld1 {v0.2d-v3.2d}, [x0], #64 |
||||
WORD $0x4c402c04 // ld1 {v4.2d-v7.2d}, [x0] |
||||
SUBS $64, R0 |
||||
|
||||
loop: |
||||
// Main loop |
||||
WORD $0x4cdfa83a // ld1 {v26.4s-v27.4s}, [x1], #32 |
||||
|
||||
// Add message |
||||
WORD $0x4efa8442 // add v2.2d, v2.2d, v26.2d |
||||
WORD $0x4efb8463 // add v3.2d, v3.2d, v27.2d |
||||
|
||||
// v1 += mul0 |
||||
WORD $0x4ee48442 // add v2.2d, v2.2d, v4.2d |
||||
WORD $0x4ee58463 // add v3.2d, v3.2d, v5.2d |
||||
|
||||
// First pair of multiplies |
||||
WORD $0x4e1d200a // tbl v10.16b,{v0.16b,v1.16b},v29.16b |
||||
WORD $0x4e1e204b // tbl v11.16b,{v2.16b,v3.16b},v30.16b |
||||
WORD $0x2eaac16c // umull v12.2d, v11.2s, v10.2s |
||||
WORD $0x6eaac16d // umull2 v13.2d, v11.4s, v10.4s |
||||
|
||||
// v0 += mul1 |
||||
WORD $0x4ee68400 // add v0.2d, v0.2d, v6.2d |
||||
WORD $0x4ee78421 // add v1.2d, v1.2d, v7.2d |
||||
|
||||
// Second pair of multiplies |
||||
WORD $0x4e1d204f // tbl v15.16b,{v2.16b,v3.16b},v29.16b |
||||
WORD $0x4e1e200e // tbl v14.16b,{v0.16b,v1.16b},v30.16b |
||||
|
||||
// EOR multiplication result in |
||||
WORD $0x6e2c1c84 // eor v4.16b,v4.16b,v12.16b |
||||
WORD $0x6e2d1ca5 // eor v5.16b,v5.16b,v13.16b |
||||
|
||||
WORD $0x2eaec1f0 // umull v16.2d, v15.2s, v14.2s |
||||
WORD $0x6eaec1f1 // umull2 v17.2d, v15.4s, v14.4s |
||||
|
||||
// First pair of zipper-merges |
||||
WORD $0x4e1c0052 // tbl v18.16b,{v2.16b},v28.16b |
||||
WORD $0x4ef28400 // add v0.2d, v0.2d, v18.2d |
||||
WORD $0x4e1c0073 // tbl v19.16b,{v3.16b},v28.16b |
||||
WORD $0x4ef38421 // add v1.2d, v1.2d, v19.2d |
||||
|
||||
// Second pair of zipper-merges |
||||
WORD $0x4e1c0014 // tbl v20.16b,{v0.16b},v28.16b |
||||
WORD $0x4ef48442 // add v2.2d, v2.2d, v20.2d |
||||
WORD $0x4e1c0035 // tbl v21.16b,{v1.16b},v28.16b |
||||
WORD $0x4ef58463 // add v3.2d, v3.2d, v21.2d |
||||
|
||||
// EOR multiplication result in |
||||
WORD $0x6e301cc6 // eor v6.16b,v6.16b,v16.16b |
||||
WORD $0x6e311ce7 // eor v7.16b,v7.16b,v17.16b |
||||
|
||||
SUBS $32, R2 |
||||
BPL loop |
||||
|
||||
// Store result |
||||
WORD $0x4c9f2c00 // st1 {v0.2d-v3.2d}, [x0], #64 |
||||
WORD $0x4c002c04 // st1 {v4.2d-v7.2d}, [x0] |
||||
|
||||
complete: |
||||
RET |
||||
|
||||
// Constants for TBL instructions |
||||
DATA ·constants+0x0(SB)/8, $0x000f010e05020c03 // zipper merge constant |
||||
DATA ·constants+0x8(SB)/8, $0x070806090d0a040b |
||||
DATA ·constants+0x10(SB)/8, $0x0f0e0d0c07060504 // setup first register for multiply |
||||
DATA ·constants+0x18(SB)/8, $0x1f1e1d1c17161514 |
||||
DATA ·constants+0x20(SB)/8, $0x0b0a090803020100 // setup second register for multiply |
||||
DATA ·constants+0x28(SB)/8, $0x1b1a191813121110 |
||||
|
||||
GLOBL ·constants(SB), 8, $48 |
@ -0,0 +1,161 @@ |
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package highwayhash |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
) |
||||
|
||||
const ( |
||||
v0 = 0 |
||||
v1 = 4 |
||||
mul0 = 8 |
||||
mul1 = 12 |
||||
) |
||||
|
||||
var ( |
||||
init0 = [4]uint64{0xdbe6d5d5fe4cce2f, 0xa4093822299f31d0, 0x13198a2e03707344, 0x243f6a8885a308d3} |
||||
init1 = [4]uint64{0x3bd39e10cb0ef593, 0xc0acf169b5f18a8c, 0xbe5466cf34e90c6c, 0x452821e638d01377} |
||||
) |
||||
|
||||
func initializeGeneric(state *[16]uint64, k []byte) { |
||||
var key [4]uint64 |
||||
|
||||
key[0] = binary.LittleEndian.Uint64(k[0:]) |
||||
key[1] = binary.LittleEndian.Uint64(k[8:]) |
||||
key[2] = binary.LittleEndian.Uint64(k[16:]) |
||||
key[3] = binary.LittleEndian.Uint64(k[24:]) |
||||
|
||||
copy(state[mul0:], init0[:]) |
||||
copy(state[mul1:], init1[:]) |
||||
|
||||
for i, k := range key { |
||||
state[v0+i] = init0[i] ^ k |
||||
} |
||||
|
||||
key[0] = key[0]>>32 | key[0]<<32 |
||||
key[1] = key[1]>>32 | key[1]<<32 |
||||
key[2] = key[2]>>32 | key[2]<<32 |
||||
key[3] = key[3]>>32 | key[3]<<32 |
||||
|
||||
for i, k := range key { |
||||
state[v1+i] = init1[i] ^ k |
||||
} |
||||
} |
||||
|
||||
func updateGeneric(state *[16]uint64, msg []byte) { |
||||
for len(msg) > 0 { |
||||
// add message
|
||||
state[v1+0] += binary.LittleEndian.Uint64(msg) |
||||
state[v1+1] += binary.LittleEndian.Uint64(msg[8:]) |
||||
state[v1+2] += binary.LittleEndian.Uint64(msg[16:]) |
||||
state[v1+3] += binary.LittleEndian.Uint64(msg[24:]) |
||||
|
||||
// v1 += mul0
|
||||
state[v1+0] += state[mul0+0] |
||||
state[v1+1] += state[mul0+1] |
||||
state[v1+2] += state[mul0+2] |
||||
state[v1+3] += state[mul0+3] |
||||
|
||||
state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32) |
||||
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32) |
||||
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32) |
||||
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32) |
||||
|
||||
// v0 += mul1
|
||||
state[v0+0] += state[mul1+0] |
||||
state[v0+1] += state[mul1+1] |
||||
state[v0+2] += state[mul1+2] |
||||
state[v0+3] += state[mul1+3] |
||||
|
||||
state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32) |
||||
state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32) |
||||
state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32) |
||||
state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32) |
||||
|
||||
zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1]) |
||||
zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3]) |
||||
|
||||
zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1]) |
||||
zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3]) |
||||
msg = msg[32:] |
||||
} |
||||
} |
||||
|
||||
func finalizeGeneric(out []byte, state *[16]uint64) { |
||||
var perm [4]uint64 |
||||
var tmp [32]byte |
||||
runs := 4 |
||||
if len(out) == 16 { |
||||
runs = 6 |
||||
} else if len(out) == 32 { |
||||
runs = 10 |
||||
} |
||||
for i := 0; i < runs; i++ { |
||||
perm[0] = state[v0+2]>>32 | state[v0+2]<<32 |
||||
perm[1] = state[v0+3]>>32 | state[v0+3]<<32 |
||||
perm[2] = state[v0+0]>>32 | state[v0+0]<<32 |
||||
perm[3] = state[v0+1]>>32 | state[v0+1]<<32 |
||||
|
||||
binary.LittleEndian.PutUint64(tmp[0:], perm[0]) |
||||
binary.LittleEndian.PutUint64(tmp[8:], perm[1]) |
||||
binary.LittleEndian.PutUint64(tmp[16:], perm[2]) |
||||
binary.LittleEndian.PutUint64(tmp[24:], perm[3]) |
||||
|
||||
update(state, tmp[:]) |
||||
} |
||||
|
||||
switch len(out) { |
||||
case 8: |
||||
binary.LittleEndian.PutUint64(out, state[v0+0]+state[v1+0]+state[mul0+0]+state[mul1+0]) |
||||
case 16: |
||||
binary.LittleEndian.PutUint64(out, state[v0+0]+state[v1+2]+state[mul0+0]+state[mul1+2]) |
||||
binary.LittleEndian.PutUint64(out[8:], state[v0+1]+state[v1+3]+state[mul0+1]+state[mul1+3]) |
||||
case 32: |
||||
h0, h1 := reduceMod(state[v0+0]+state[mul0+0], state[v0+1]+state[mul0+1], state[v1+0]+state[mul1+0], state[v1+1]+state[mul1+1]) |
||||
binary.LittleEndian.PutUint64(out[0:], h0) |
||||
binary.LittleEndian.PutUint64(out[8:], h1) |
||||
|
||||
h0, h1 = reduceMod(state[v0+2]+state[mul0+2], state[v0+3]+state[mul0+3], state[v1+2]+state[mul1+2], state[v1+3]+state[mul1+3]) |
||||
binary.LittleEndian.PutUint64(out[16:], h0) |
||||
binary.LittleEndian.PutUint64(out[24:], h1) |
||||
} |
||||
} |
||||
|
||||
func zipperMerge(v0, v1 uint64, d0, d1 *uint64) { |
||||
m0 := v0 & (0xFF << (2 * 8)) |
||||
m1 := (v1 & (0xFF << (7 * 8))) >> 8 |
||||
m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16 |
||||
m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24 |
||||
m4 := (v0 & (0xFF << (1 * 8))) << 32 |
||||
m5 := v0 << 56 |
||||
|
||||
*d0 += m0 + m1 + m2 + m3 + m4 + m5 |
||||
|
||||
m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8))) |
||||
m1 = (v0 & (0xFF << (6 * 8))) >> 8 |
||||
m2 = (v1 & (0xFF << (5 * 8))) >> 16 |
||||
m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24 |
||||
m4 = (v1 & 0xFF) << 48 |
||||
m5 = (v1 & (0xFF << (1 * 8))) << 24 |
||||
|
||||
*d1 += m3 + m2 + m5 + m1 + m4 + m0 |
||||
} |
||||
|
||||
// reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
|
||||
func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) { |
||||
v3 &= 0x3FFFFFFFFFFFFFFF |
||||
|
||||
r0, r1 = v2, v3 |
||||
|
||||
v3 = (v3 << 1) | (v2 >> (64 - 1)) |
||||
v2 <<= 1 |
||||
r1 = (r1 << 2) | (r0 >> (64 - 2)) |
||||
r0 <<= 2 |
||||
|
||||
r0 ^= v0 ^ v2 |
||||
r1 ^= v1 ^ v3 |
||||
return |
||||
} |
@ -0,0 +1,26 @@ |
||||
// Copyright (c) 2017 Minio Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build !amd64
|
||||
// +build !arm64
|
||||
|
||||
package highwayhash |
||||
|
||||
var ( |
||||
useSSE4 = false |
||||
useAVX2 = false |
||||
useNEON = false |
||||
) |
||||
|
||||
func initialize(state *[16]uint64, k []byte) { |
||||
initializeGeneric(state, k) |
||||
} |
||||
|
||||
func update(state *[16]uint64, msg []byte) { |
||||
updateGeneric(state, msg) |
||||
} |
||||
|
||||
func finalize(out []byte, state *[16]uint64) { |
||||
finalizeGeneric(out, state) |
||||
} |
Loading…
Reference in new issue