diff --git a/vendor/github.com/klauspost/reedsolomon/README.md b/vendor/github.com/klauspost/reedsolomon/README.md index dc4cfd909..c53188ec8 100644 --- a/vendor/github.com/klauspost/reedsolomon/README.md +++ b/vendor/github.com/klauspost/reedsolomon/README.md @@ -24,6 +24,10 @@ go get -u github.com/klauspost/reedsolomon # Changes +## December 18, 2018 + +Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform. + ## November 18, 2017 Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU. @@ -259,6 +263,18 @@ By exploiting NEON instructions the performance for ARM has been accelerated. Be | 10 | 2 | 20% | 188 | 1738 | 925% | | 10 | 4 | 40% | 96 | 839 | 877% | +# Performance on ppc64le + +The performance for ppc64le has been accelerated. This gives roughly a 10x performance improvement on this architecture as can been seen below: + +``` +benchmark old MB/s new MB/s speedup +BenchmarkGalois128K-160 948.87 8878.85 9.36x +BenchmarkGalois1M-160 968.85 9041.92 9.33x +BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x +BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x +``` + # asm2plan9s [asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents. @@ -266,8 +282,10 @@ By exploiting NEON instructions the performance for ARM has been accelerated. Be # Links * [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/). * [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze. +* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation. * [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package. * [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance. +* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation. * [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests. * [rsraid](https://github.com/goayame/rsraid). A similar library written in Go. Slower, but supports more shards. * [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations. diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go index aeebdbb7f..2c719f59c 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go @@ -1,5 +1,6 @@ //+build !noasm //+build !appengine +//+build !gccgo // Copyright 2015, Klaus Post, see LICENSE for details. diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s index 8a294c17c..b768028a9 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s +++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s @@ -1,4 +1,4 @@ -//+build !noasm !appengine +//+build !noasm !appengine !gccgo // Copyright 2015, Klaus Post, see LICENSE for details. diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go index a9e533f51..7af56fae0 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go @@ -1,5 +1,6 @@ //+build !noasm //+build !appengine +//+build !gccgo // Copyright 2015, Klaus Post, see LICENSE for details. // Copyright 2017, Minio, Inc. diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s index b18e2587b..0c43f6d63 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s +++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s @@ -1,4 +1,4 @@ -//+build !noasm !appengine +//+build !noasm !appengine !gccgo // Copyright 2015, Klaus Post, see LICENSE for details. // Copyright 2017, Minio, Inc. diff --git a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go index ebde7be6a..81d5597e1 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go @@ -1,5 +1,6 @@ -//+build !amd64 noasm appengine -//+build !arm64 noasm appengine +//+build !amd64 noasm appengine gccgo +//+build !arm64 noasm appengine gccgo +//+build !ppc64le noasm appengine gccgo // Copyright 2015, Klaus Post, see LICENSE for details. diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go new file mode 100644 index 000000000..9033279cd --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go @@ -0,0 +1,67 @@ +//+build !noasm +//+build !appengine +//+build !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. +// Copyright 2018, Minio, Inc. + +package reedsolomon + +//go:noescape +func galMulPpc(low, high, in, out []byte) + +//go:noescape +func galMulPpcXor(low, high, in, out []byte) + +// This is what the assembler routines do in blocks of 16 bytes: +/* +func galMulPpc(low, high, in, out []byte) { + for n, input := range in { + l := input & 0xf + h := input >> 4 + out[n] = low[l] ^ high[h] + } +} +func galMulPpcXor(low, high, in, out []byte) { + for n, input := range in { + l := input & 0xf + h := input >> 4 + out[n] ^= low[l] ^ high[h] + } +} +*/ + +func galMulSlice(c byte, in, out []byte, ssse3, avx2 bool) { + done := (len(in) >> 4) << 4 + if done > 0 { + galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out) + } + remain := len(in) - done + if remain > 0 { + mt := mulTable[c] + for i := done; i < len(in); i++ { + out[i] = mt[in[i]] + } + } +} + +func galMulSliceXor(c byte, in, out []byte, ssse3, avx2 bool) { + done := (len(in) >> 4) << 4 + if done > 0 { + galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out) + } + remain := len(in) - done + if remain > 0 { + mt := mulTable[c] + for i := done; i < len(in); i++ { + out[i] ^= mt[in[i]] + } + } +} + +// slice galois add +func sliceXor(in, out []byte, sse2 bool) { + for n, input := range in { + out[n] ^= input + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s new file mode 100644 index 000000000..960087c37 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s @@ -0,0 +1,126 @@ +//+build !noasm !appengine !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. +// Copyright 2018, Minio, Inc. + +#include "textflag.h" + +#define LOW R3 +#define HIGH R4 +#define IN R5 +#define LEN R6 +#define OUT R7 +#define CONSTANTS R8 +#define OFFSET R9 +#define OFFSET1 R10 +#define OFFSET2 R11 + +#define X6 VS34 +#define X6_ V2 +#define X7 VS35 +#define X7_ V3 +#define MSG VS36 +#define MSG_ V4 +#define MSG_HI VS37 +#define MSG_HI_ V5 +#define RESULT VS38 +#define RESULT_ V6 +#define ROTATE VS39 +#define ROTATE_ V7 +#define MASK VS40 +#define MASK_ V8 +#define FLIP VS41 +#define FLIP_ V9 + + +// func galMulPpc(low, high, in, out []byte) +TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96 + MOVD low+0(FP), LOW + MOVD high+24(FP), HIGH + MOVD in+48(FP), IN + MOVD in_len+56(FP), LEN + MOVD out+72(FP), OUT + + MOVD $16, OFFSET1 + MOVD $32, OFFSET2 + + MOVD $·constants(SB), CONSTANTS + LXVD2X (CONSTANTS)(R0), ROTATE + LXVD2X (CONSTANTS)(OFFSET1), MASK + LXVD2X (CONSTANTS)(OFFSET2), FLIP + + LXVD2X (LOW)(R0), X6 + LXVD2X (HIGH)(R0), X7 + VPERM X6_, V31, FLIP_, X6_ + VPERM X7_, V31, FLIP_, X7_ + + MOVD $0, OFFSET + +loop: + LXVD2X (IN)(OFFSET), MSG + + VSRB MSG_, ROTATE_, MSG_HI_ + VAND MSG_, MASK_, MSG_ + VPERM X6_, V31, MSG_, MSG_ + VPERM X7_, V31, MSG_HI_, MSG_HI_ + + VXOR MSG_, MSG_HI_, MSG_ + + STXVD2X MSG, (OUT)(OFFSET) + + ADD $16, OFFSET, OFFSET + CMP LEN, OFFSET + BGT loop + RET + + +// func galMulPpcXorlow, high, in, out []byte) +TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96 + MOVD low+0(FP), LOW + MOVD high+24(FP), HIGH + MOVD in+48(FP), IN + MOVD in_len+56(FP), LEN + MOVD out+72(FP), OUT + + MOVD $16, OFFSET1 + MOVD $32, OFFSET2 + + MOVD $·constants(SB), CONSTANTS + LXVD2X (CONSTANTS)(R0), ROTATE + LXVD2X (CONSTANTS)(OFFSET1), MASK + LXVD2X (CONSTANTS)(OFFSET2), FLIP + + LXVD2X (LOW)(R0), X6 + LXVD2X (HIGH)(R0), X7 + VPERM X6_, V31, FLIP_, X6_ + VPERM X7_, V31, FLIP_, X7_ + + MOVD $0, OFFSET + +loopXor: + LXVD2X (IN)(OFFSET), MSG + LXVD2X (OUT)(OFFSET), RESULT + + VSRB MSG_, ROTATE_, MSG_HI_ + VAND MSG_, MASK_, MSG_ + VPERM X6_, V31, MSG_, MSG_ + VPERM X7_, V31, MSG_HI_, MSG_HI_ + + VXOR MSG_, MSG_HI_, MSG_ + VXOR MSG_, RESULT_, RESULT_ + + STXVD2X RESULT, (OUT)(OFFSET) + + ADD $16, OFFSET, OFFSET + CMP LEN, OFFSET + BGT loopXor + RET + +DATA ·constants+0x0(SB)/8, $0x0404040404040404 +DATA ·constants+0x8(SB)/8, $0x0404040404040404 +DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f +DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f +DATA ·constants+0x20(SB)/8, $0x0706050403020100 +DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908 + +GLOBL ·constants(SB), 8, $48 diff --git a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go index 213d0b4e4..706c4377c 100644 --- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go +++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go @@ -471,12 +471,12 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu wg.Add(1) go func(start, stop int) { for c := 0; c < r.DataShards; c++ { - in := inputs[c] + in := inputs[c][start:stop] for iRow := 0; iRow < outputCount; iRow++ { if c == 0 { - galMulSlice(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2) + galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2) } else { - galMulSliceXor(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2) + galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2) } } } diff --git a/vendor/github.com/minio/highwayhash/README.md b/vendor/github.com/minio/highwayhash/README.md index 2cd7d2cd2..18e179c6b 100644 --- a/vendor/github.com/minio/highwayhash/README.md +++ b/vendor/github.com/minio/highwayhash/README.md @@ -7,7 +7,7 @@ It can be used to prevent hash-flooding attacks or authenticate short-lived messages. Additionally it can be used as a fingerprinting function. HighwayHash is not a general purpose cryptographic hash function (such as Blake2b, SHA-3 or SHA-2) and should not be used if strong collision resistance is required. -This repository contains a native Go version and optimized assembly implementations on both Intel and ARM platforms. +This repository contains a native Go version and optimized assembly implementations for Intel, ARM and ppc64le architectures. ### High performance @@ -50,6 +50,17 @@ ARM64 NEON | 384 MB/s | 955 MB/s | 1053 MB/s *Note: For now just the (main) update loop is implemented in assembly, so for small messages there is still considerable overhead due to initialization and finalization.* +### ppc64le Performance + +The ppc64le accelerated version is roughly 10x faster compared to the non-optimized version: + +``` +benchmark old MB/s new MB/s speedup +BenchmarkWrite_8K 531.19 5566.41 10.48x +BenchmarkSum64_8K 518.86 4971.88 9.58x +BenchmarkSum256_8K 502.45 4474.20 8.90x +``` + ### Performance compared to other hashing techniques On a Skylake CPU (3.0 GHz Xeon Platinum 8124M) the table below shows how HighwayHash compares to other hashing techniques for 5 MB messages (single core performance, all Golang implementations, see [benchmark](https://github.com/fwessels/HashCompare/blob/master/benchmarks_test.go)). diff --git a/vendor/github.com/minio/highwayhash/highwayhashAVX2_amd64.go b/vendor/github.com/minio/highwayhash/highwayhashAVX2_amd64.go index 0e201998e..d2b03d75a 100644 --- a/vendor/github.com/minio/highwayhash/highwayhashAVX2_amd64.go +++ b/vendor/github.com/minio/highwayhash/highwayhashAVX2_amd64.go @@ -13,6 +13,7 @@ var ( useSSE4 = cpu.X86.HasSSE41 useAVX2 = cpu.X86.HasAVX2 useNEON = false + useVMX = false ) //go:noescape diff --git a/vendor/github.com/minio/highwayhash/highwayhash_amd64.go b/vendor/github.com/minio/highwayhash/highwayhash_amd64.go index 8c520e76b..703635b2d 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_amd64.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_amd64.go @@ -13,6 +13,7 @@ var ( useSSE4 = cpu.X86.HasSSE41 useAVX2 = false useNEON = false + useVMX = false ) //go:noescape diff --git a/vendor/github.com/minio/highwayhash/highwayhash_arm64.go b/vendor/github.com/minio/highwayhash/highwayhash_arm64.go index 79831372c..387e46d71 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_arm64.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_arm64.go @@ -10,6 +10,7 @@ var ( useSSE4 = false useAVX2 = false useNEON = true + useVMX = false ) //go:noescape diff --git a/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go b/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go new file mode 100644 index 000000000..9a8a1259c --- /dev/null +++ b/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go @@ -0,0 +1,33 @@ +//+build !noasm + +// Copyright (c) 2017 Minio Inc. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +package highwayhash + +var ( + useSSE4 = false + useAVX2 = false + useNEON = false + useVMX = true +) + +//go:noescape +func updatePpc64Le(state *[16]uint64, msg []byte) + +func initialize(state *[16]uint64, key []byte) { + initializeGeneric(state, key) +} + +func update(state *[16]uint64, msg []byte) { + if useVMX { + updatePpc64Le(state, msg) + } else { + updateGeneric(state, msg) + } +} + +func finalize(out []byte, state *[16]uint64) { + finalizeGeneric(out, state) +} diff --git a/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.s b/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.s new file mode 100644 index 000000000..8a512c0f8 --- /dev/null +++ b/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.s @@ -0,0 +1,183 @@ +//+build !noasm !appengine + +// +// Minio Cloud Storage, (C) 2018 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "textflag.h" + +// Definition of registers +#define V0_LO VS32 +#define V0_LO_ V0 +#define V0_HI VS33 +#define V0_HI_ V1 +#define V1_LO VS34 +#define V1_LO_ V2 +#define V1_HI VS35 +#define V1_HI_ V3 +#define MUL0_LO VS36 +#define MUL0_LO_ V4 +#define MUL0_HI VS37 +#define MUL0_HI_ V5 +#define MUL1_LO VS38 +#define MUL1_LO_ V6 +#define MUL1_HI VS39 +#define MUL1_HI_ V7 + +// Message +#define MSG_LO VS40 +#define MSG_LO_ V8 +#define MSG_HI VS41 + +// Constants +#define ROTATE VS42 +#define ROTATE_ V10 +#define MASK VS43 +#define MASK_ V11 + +// Temps +#define TEMP1 VS44 +#define TEMP1_ V12 +#define TEMP2 VS45 +#define TEMP2_ V13 +#define TEMP3 VS46 +#define TEMP3_ V14 +#define TEMP4_ V15 +#define TEMP5_ V16 +#define TEMP6_ V17 +#define TEMP7_ V18 + +// Regular registers +#define STATE R3 +#define MSG_BASE R4 +#define MSG_LEN R5 +#define CONSTANTS R6 +#define P1 R7 +#define P2 R8 +#define P3 R9 +#define P4 R10 +#define P5 R11 +#define P6 R12 +#define P7 R14 // avoid using R13 + +TEXT ·updatePpc64Le(SB), NOFRAME|NOSPLIT, $0-32 + MOVD state+0(FP), STATE + MOVD msg_base+8(FP), MSG_BASE + MOVD msg_len+16(FP), MSG_LEN // length of message + + // Sanity check for length + CMPU MSG_LEN, $31 + BLE complete + + // Setup offsets + MOVD $16, P1 + MOVD $32, P2 + MOVD $48, P3 + MOVD $64, P4 + MOVD $80, P5 + MOVD $96, P6 + MOVD $112, P7 + + // Load state + LXVD2X (STATE)(R0), V0_LO + LXVD2X (STATE)(P1), V0_HI + LXVD2X (STATE)(P2), V1_LO + LXVD2X (STATE)(P3), V1_HI + LXVD2X (STATE)(P4), MUL0_LO + LXVD2X (STATE)(P5), MUL0_HI + LXVD2X (STATE)(P6), MUL1_LO + LXVD2X (STATE)(P7), MUL1_HI + XXPERMDI V0_LO, V0_LO, $2, V0_LO + XXPERMDI V0_HI, V0_HI, $2, V0_HI + XXPERMDI V1_LO, V1_LO, $2, V1_LO + XXPERMDI V1_HI, V1_HI, $2, V1_HI + XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO + XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI + XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO + XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI + + // Load constants table pointer + MOVD $·constants(SB), CONSTANTS + LXVD2X (CONSTANTS)(R0), ROTATE + LXVD2X (CONSTANTS)(P1), MASK + XXLNAND MASK, MASK, MASK + +loop: + // Main highwayhash update loop + LXVD2X (MSG_BASE)(R0), MSG_LO + VADDUDM V0_LO_, MUL1_LO_, TEMP1_ + VRLD V0_LO_, ROTATE_, TEMP2_ + VADDUDM MUL1_HI_, V0_HI_, TEMP3_ + LXVD2X (MSG_BASE)(P1), MSG_HI + ADD $32, MSG_BASE, MSG_BASE + XXPERMDI MSG_LO, MSG_LO, $2, MSG_LO + XXPERMDI MSG_HI, MSG_HI, $2, V0_LO + VADDUDM MSG_LO_, MUL0_LO_, MSG_LO_ + VADDUDM V0_LO_, MUL0_HI_, V0_LO_ + VADDUDM MSG_LO_, V1_LO_, V1_LO_ + VSRD V0_HI_, ROTATE_, MSG_LO_ + VADDUDM V0_LO_, V1_HI_, V1_HI_ + VPERM V1_LO_, V1_LO_, MASK_, V0_LO_ + VMULOUW V1_LO_, TEMP2_, TEMP2_ + VPERM V1_HI_, V1_HI_, MASK_, TEMP7_ + VADDUDM V0_LO_, TEMP1_, V0_LO_ + VMULOUW V1_HI_, MSG_LO_, MSG_LO_ + VADDUDM TEMP7_, TEMP3_, V0_HI_ + VPERM V0_LO_, V0_LO_, MASK_, TEMP6_ + VRLD V1_LO_, ROTATE_, TEMP4_ + VSRD V1_HI_, ROTATE_, TEMP5_ + VPERM V0_HI_, V0_HI_, MASK_, TEMP7_ + XXLXOR MUL0_LO, TEMP2, MUL0_LO + VMULOUW TEMP1_, TEMP4_, TEMP1_ + VMULOUW TEMP3_, TEMP5_, TEMP3_ + XXLXOR MUL0_HI, MSG_LO, MUL0_HI + XXLXOR MUL1_LO, TEMP1, MUL1_LO + XXLXOR MUL1_HI, TEMP3, MUL1_HI + VADDUDM TEMP6_, V1_LO_, V1_LO_ + VADDUDM TEMP7_, V1_HI_, V1_HI_ + + SUB $32, MSG_LEN, MSG_LEN + CMPU MSG_LEN, $32 + BGE loop + + // Save state + XXPERMDI V0_LO, V0_LO, $2, V0_LO + XXPERMDI V0_HI, V0_HI, $2, V0_HI + XXPERMDI V1_LO, V1_LO, $2, V1_LO + XXPERMDI V1_HI, V1_HI, $2, V1_HI + XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO + XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI + XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO + XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI + STXVD2X V0_LO, (STATE)(R0) + STXVD2X V0_HI, (STATE)(P1) + STXVD2X V1_LO, (STATE)(P2) + STXVD2X V1_HI, (STATE)(P3) + STXVD2X MUL0_LO, (STATE)(P4) + STXVD2X MUL0_HI, (STATE)(P5) + STXVD2X MUL1_LO, (STATE)(P6) + STXVD2X MUL1_HI, (STATE)(P7) + +complete: + RET + + +// Constants table +DATA ·constants+0x0(SB)/8, $0x0000000000000020 +DATA ·constants+0x8(SB)/8, $0x0000000000000020 +DATA ·constants+0x10(SB)/8, $0x070806090d0a040b // zipper merge constant +DATA ·constants+0x18(SB)/8, $0x000f010e05020c03 // zipper merge constant + +GLOBL ·constants(SB), 8, $32 diff --git a/vendor/github.com/minio/highwayhash/highwayhash_ref.go b/vendor/github.com/minio/highwayhash/highwayhash_ref.go index 9214bdb27..fddac4b5f 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_ref.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_ref.go @@ -4,6 +4,7 @@ // +build !amd64 // +build !arm64 +// +build !ppc64le package highwayhash @@ -11,6 +12,7 @@ var ( useSSE4 = false useAVX2 = false useNEON = false + useVMX = false ) func initialize(state *[16]uint64, k []byte) { diff --git a/vendor/vendor.json b/vendor/vendor.json index e5e6e19c1..5d435c9af 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -563,10 +563,10 @@ "revisionTime": "2017-10-07T12:43:06Z" }, { - "checksumSHA1": "ehsrWipiGIWqa4To8TmelIx06vI=", + "checksumSHA1": "KiQa3vguztElzJkoqeIGHlfLFJA=", "path": "github.com/klauspost/reedsolomon", - "revision": "0b30fa71cc8e4e9010c9aba6d0320e2e5b163b29", - "revisionTime": "2017-12-19T13:34:37Z" + "revision": "8885f3a1c73882e6f11b766242c69a1eb8f44b28", + "revisionTime": "2018-12-18T19:39:59Z" }, { "checksumSHA1": "xxLSo5tKtXc7jGrR70yoEfza8Cw=", @@ -634,10 +634,10 @@ "revisionTime": "2018-01-23T12:12:34Z" }, { - "checksumSHA1": "2Fu1GmLwDo6FFdahjnlWnPkwJTE=", + "checksumSHA1": "CD2MtlgA8h0z6hYJHURS5eOmZ1k=", "path": "github.com/minio/highwayhash", - "revision": "85fc8a2dacad36a6beb2865793cd81363a496696", - "revisionTime": "2018-05-01T08:09:13Z" + "revision": "93ed73d641695483ab4438817457b6586ee5765c", + "revisionTime": "2018-12-20T01:13:08Z" }, { "checksumSHA1": "7/Hdd23/j4/yt4BXa+h0kqz1yjw=",