Merge with Intel ISAL changes from github.com/minio-io/isal

- These changes bring in a much needed Mac OSX port for
    Intel ISAL library
  - At the current stage this MacOSX part of code is
    considered beta
  - pkg/cpu now supports OSX
  - pkg/checksum/crc32c - is still WIP, rest of the packages
    have been validated
master
Harshavardhana 10 years ago
parent c82d2b95d7
commit f347a1e590
  1. 5
      Makefile
  2. 6
      pkg/checksum/crc32c/crc32c_amd64.S
  3. 33
      pkg/cpu/cpu_amd64.S
  4. 7
      pkg/erasure/Makefile
  5. 2
      pkg/erasure/decode.c
  6. 21
      pkg/erasure/isal/.gitignore
  7. 2
      pkg/erasure/isal/Makefile
  8. 4
      pkg/erasure/isal/include/erasure/types.h
  9. 10
      pkg/erasure/isal/make.inc
  10. 130
      pkg/erasure/isal/src/ec-multibinary.asm
  11. 39
      pkg/erasure/isal/src/gf-2vect-dot-prod-avx.asm
  12. 41
      pkg/erasure/isal/src/gf-2vect-dot-prod-avx2.asm
  13. 39
      pkg/erasure/isal/src/gf-2vect-dot-prod-sse.asm
  14. 40
      pkg/erasure/isal/src/gf-3vect-dot-prod-avx.asm
  15. 42
      pkg/erasure/isal/src/gf-3vect-dot-prod-avx2.asm
  16. 40
      pkg/erasure/isal/src/gf-3vect-dot-prod-sse.asm
  17. 46
      pkg/erasure/isal/src/gf-4vect-dot-prod-avx.asm
  18. 48
      pkg/erasure/isal/src/gf-4vect-dot-prod-avx2.asm
  19. 46
      pkg/erasure/isal/src/gf-4vect-dot-prod-sse.asm
  20. 40
      pkg/erasure/isal/src/gf-5vect-dot-prod-avx.asm
  21. 48
      pkg/erasure/isal/src/gf-5vect-dot-prod-avx2.asm
  22. 46
      pkg/erasure/isal/src/gf-5vect-dot-prod-sse.asm
  23. 46
      pkg/erasure/isal/src/gf-6vect-dot-prod-avx.asm
  24. 42
      pkg/erasure/isal/src/gf-6vect-dot-prod-avx2.asm
  25. 46
      pkg/erasure/isal/src/gf-6vect-dot-prod-sse.asm
  26. 31
      pkg/erasure/isal/src/gf-vect-dot-prod-avx.asm
  27. 33
      pkg/erasure/isal/src/gf-vect-dot-prod-avx2.asm
  28. 30
      pkg/erasure/isal/src/gf-vect-dot-prod-sse.asm
  29. 26
      pkg/erasure/isal/src/gf-vect-mul-avx.asm
  30. 26
      pkg/erasure/isal/src/gf-vect-mul-sse.asm
  31. 2
      pkg/utils/execpipe_test.go

@ -14,9 +14,6 @@ build-erasure:
@$(MAKE) $(MAKE_OPTIONS) -C pkg/erasure/isal lib
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/erasure
build-signify:
@$(MAKE) $(MAKE_OPTIONS) -C pkg/signify
build-cpu:
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/cpu
@ -53,7 +50,7 @@ build-storage-append:
build-storage-encoded:
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/storage/encodedstorage
cover: build-erasure build-signify build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512
cover: build-erasure build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/gateway
install: build-erasure

@ -168,7 +168,7 @@ continue_block:
## branch into array
lea jump_table(%rip), bufp
movzxw (bufp, %rax, 2), len
movzwq (bufp, %rax, 2), len
offset=crc_array-jump_table
lea offset(bufp, len, 1), bufp
jmp *bufp
@ -194,18 +194,22 @@ full_block:
crc_array:
i=128
.rept 128-1
#if !defined(__clang__)
.altmacro
LABEL crc_ %i
.noaltmacro
#endif
crc32q -i*8(block_0), crc_init
crc32q -i*8(block_1), crc1
crc32q -i*8(block_2), crc2
i=(i-1)
.endr
#if !defined(__clang__)
.altmacro
LABEL crc_ %i
.noaltmacro
#endif
crc32q -i*8(block_0), crc_init
crc32q -i*8(block_1), crc1
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet

@ -14,9 +14,18 @@
* limitations under the License.
*/
#ifdef __APPLE__
#define HAS_SSE _has_sse41
#define HAS_AVX _has_avx
#define HAS_AVX2 _has_avx2
#else
#define HAS_SSE has_sse41
#define HAS_AVX has_avx
#define HAS_AVX2 has_avx2
#endif
.file "cpufeatures.c"
.text
.type cpuid, @function
cpuid:
.LFB2:
.cfi_startproc
@ -56,10 +65,8 @@ cpuid:
ret
.cfi_endproc
.LFE2:
.size cpuid, .-cpuid
.globl has_sse41
.type has_sse41, @function
has_sse41:
.globl HAS_SSE
HAS_SSE:
.LFB3:
.cfi_startproc
pushq %rbp
@ -82,10 +89,8 @@ has_sse41:
ret
.cfi_endproc
.LFE3:
.size has_sse41, .-has_sse41
.globl has_avx
.type has_avx, @function
has_avx:
.globl HAS_AVX
HAS_AVX:
.LFB4:
.cfi_startproc
pushq %rbp
@ -108,10 +113,8 @@ has_avx:
ret
.cfi_endproc
.LFE4:
.size has_avx, .-has_avx
.globl has_avx2
.type has_avx2, @function
has_avx2:
.globl HAS_AVX2
HAS_AVX2:
.LFB5:
.cfi_startproc
pushq %rbp
@ -133,7 +136,3 @@ has_avx2:
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE5:
.size has_avx2, .-has_avx2
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
.section .note.GNU-stack,"",@progbits

@ -1,12 +1,17 @@
all: build test
.PHONY: all
SYSTEM_NAME := $(shell uname -s)
test:
@godep go test -race -coverprofile=cover.out
isal/isal-l.a:
ifeq ($(SYSTEM_NAME), Darwin)
@$(MAKE) -C isal arch=osx lib
else
@$(MAKE) -C isal lib
endif
build: isal/isal-l.a
@godep go build

@ -60,6 +60,8 @@ int32_t minio_get_source_target (int errs, int k, int m,
*source = tmp_source;
*target = tmp_target;
return 0;
}
/*

@ -1,3 +1,22 @@
*.o
*.a
*.so
*.so
*~
*.dSYM
erasure-code-base-test
erasure-code-sse-test
erasure-code-test
gf-2vect-dot-prod-sse-test
gf-3vect-dot-prod-sse-test
gf-4vect-dot-prod-sse-test
gf-5vect-dot-prod-sse-test
gf-6vect-dot-prod-sse-test
gf-inverse-test
gf-vect-dot-prod-avx-test
gf-vect-dot-prod-base-test
gf-vect-dot-prod-sse-test
gf-vect-dot-prod-test
gf-vect-mul-avx-test
gf-vect-mul-base-test
gf-vect-mul-sse-test
gf-vect-mul-test

@ -30,7 +30,7 @@
units = src
default: slib
default: lib
include $(foreach unit,$(units), $(unit)/Makefile)

@ -41,7 +41,7 @@
extern "C" {
#endif
#ifndef __unix__
#if !defined(__unix__) && !defined(__APPLE__)
#ifdef __MINGW32__
# include <_mingw.h>
#endif
@ -59,7 +59,7 @@ typedef unsigned char UINT8;
#endif
#ifdef __unix__
#if defined(__unix__) || defined(__APPLE__)
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
# define __forceinline static inline
#else

@ -63,6 +63,11 @@ ASFLAGS_win64 = -f win64
CFLAGS_icl = -Qstd=c99
ARFLAGS_win64 = -out:$@
# arch=osx build options
ASFLAGS_osx = -f macho64
ARFLAGS_osx = -r $@
STRIP_gcc = strip -d $@
# arch=mingw build options
ASFLAGS_mingw = -f win64
ARFLAGS_mingw = cr $@
@ -101,9 +106,13 @@ ifeq ($(arch),win64)
lib_name := $(basename $(lib_name)).lib
endif
lsrcwin64 = $(lsrc)
lsrcosx = $(lsrc)
unit_testswin64 = $(unit_tests)
unit_testsosx = $(unit_tests)
exampleswin64 = $(examples)
examplesosx = $(examples)
perf_testswin64 = $(perf_tests)
perf_testsosx = $(perf_tests)
# Build and run unit tests, performance tests, etc.
all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests))
@ -199,7 +208,6 @@ perf_report:
@echo Summary:
-grep runtime $(rpt_name)
clean:
@echo Cleaning up
@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name)

@ -33,6 +33,42 @@
%define WRT_OPT
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define EC_ENCODE_DATA_SSE _ec_encode_data_sse
%define EC_ENCODE_DATA_AVX _ec_encode_data_avx
%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
%define GF_VECT_MUL_BASE _gf_vect_mul_base
%define EC_ENCODE_DATA_BASE _ec_encode_data_base
%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base
%define EC_ENCODE_DATA _ec_encode_data
%define GF_VECT_MUL _gf_vect_mul
%define GF_VECT_DOT_PROD _gf_vect_dot_prod
%else
%define EC_ENCODE_DATA_SSE ec_encode_data_sse
%define EC_ENCODE_DATA_AVX ec_encode_data_avx
%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2
%define GF_VECT_MUL_SSE gf_vect_mul_sse
%define GF_VECT_MUL_AVX gf_vect_mul_avx
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
%define GF_VECT_MUL_BASE gf_vect_mul_base
%define EC_ENCODE_DATA_BASE ec_encode_data_base
%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base
%define EC_ENCODE_DATA ec_encode_data
%define GF_VECT_MUL gf_vect_mul
%define GF_VECT_DOT_PROD gf_vect_dot_prod
%endif
%ifidn __OUTPUT_FORMAT__, elf32
[bits 32]
@ -51,19 +87,19 @@ default rel
%define wrd_sz qword
%define arg1 rsi
extern ec_encode_data_sse
extern ec_encode_data_avx
extern ec_encode_data_avx2
extern gf_vect_mul_sse
extern gf_vect_mul_avx
extern gf_vect_dot_prod_sse
extern gf_vect_dot_prod_avx
extern gf_vect_dot_prod_avx2
extern EC_ENCODE_DATA_SSE
extern EC_ENCODE_DATA_AVX
extern EC_ENCODE_DATA_AVX2
extern GF_VECT_MUL_SSE
extern GF_VECT_MUL_AVX
extern GF_VECT_DOT_PROD_SSE
extern GF_VECT_DOT_PROD_AVX
extern GF_VECT_DOT_PROD_AVX2
%endif
extern gf_vect_mul_base
extern ec_encode_data_base
extern gf_vect_dot_prod_base
extern GF_VECT_MUL_BASE
extern EC_ENCODE_DATA_BASE
extern GF_VECT_DOT_PROD_BASE
section .data
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
@ -82,33 +118,33 @@ section .text
;;;;
; ec_encode_data multibinary function
;;;;
global ec_encode_data:function
global EC_ENCODE_DATA:function
ec_encode_data_mbinit:
call ec_encode_data_dispatch_init
ec_encode_data:
EC_ENCODE_DATA:
jmp wrd_sz [ec_encode_data_dispatched]
ec_encode_data_dispatch_init:
push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [ec_encode_data_base]
lea arg1, [EC_ENCODE_DATA_BASE]
%else
push rax
push rbx
push rcx
push rdx
lea arg1, [ec_encode_data_base WRT_OPT] ; Default
lea arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default
mov eax, 1
cpuid
lea rbx, [ec_encode_data_sse WRT_OPT]
lea rbx, [EC_ENCODE_DATA_BASE WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, rbx
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea rbx, [ec_encode_data_avx WRT_OPT]
lea rbx, [EC_ENCODE_DATA_AVX WRT_OPT]
jne _done_ec_encode_data_init
mov rsi, rbx
@ -118,7 +154,7 @@ ec_encode_data_dispatch_init:
mov eax, 7
cpuid
test ebx, FLAG_CPUID1_EBX_AVX2
lea rbx, [ec_encode_data_avx2 WRT_OPT]
lea rbx, [EC_ENCODE_DATA_AVX2 WRT_OPT]
cmovne rsi, rbx
;; Does it have xmm and ymm support
@ -127,7 +163,7 @@ ec_encode_data_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_ec_encode_data_init
lea rsi, [ec_encode_data_sse WRT_OPT]
lea rsi, [EC_ENCODE_DATA_SSE WRT_OPT]
_done_ec_encode_data_init:
pop rdx
@ -142,30 +178,30 @@ _done_ec_encode_data_init:
;;;;
; gf_vect_mul multibinary function
;;;;
global gf_vect_mul:function
global GF_VECT_MUL:function
gf_vect_mul_mbinit:
call gf_vect_mul_dispatch_init
gf_vect_mul:
GF_VECT_MUL:
jmp wrd_sz [gf_vect_mul_dispatched]
gf_vect_mul_dispatch_init:
push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [gf_vect_mul_base]
lea arg1, [GF_VECT_MUL_BASE]
%else
push rax
push rbx
push rcx
push rdx
lea arg1, [gf_vect_mul_base WRT_OPT] ; Default
lea arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default
mov eax, 1
cpuid
test ecx, FLAG_CPUID1_ECX_SSE4_2
lea rbx, [gf_vect_mul_sse WRT_OPT]
je _done_gf_vect_mul_dispatch_init
mov arg1, rbx
lea rbx, [GF_VECT_MUL_SSE WRT_OPT]
je _done_gf_vect_mul_dispatch_init
mov arg1, rbx
;; Try for AVX
and ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
@ -178,49 +214,49 @@ gf_vect_mul_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
jne _done_gf_vect_mul_dispatch_init
lea arg1, [gf_vect_mul_avx WRT_OPT]
lea arg1, [GF_VECT_MUL_AVX WRT_OPT]
_done_gf_vect_mul_dispatch_init:
pop rdx
pop rcx
pop rbx
pop rax
%endif ;; END 32-bit check
mov [gf_vect_mul_dispatched], arg1
pop arg1
ret
pop rdx
pop rcx
pop rbx
pop rax
%endif ;; END 32-bit check
mov [gf_vect_mul_dispatched], arg1
pop arg1
ret
;;;;
; gf_vect_dot_prod multibinary function
;;;;
global gf_vect_dot_prod:function
global GF_VECT_DOT_PROD:function
gf_vect_dot_prod_mbinit:
call gf_vect_dot_prod_dispatch_init
gf_vect_dot_prod:
GF_VECT_DOT_PROD:
jmp wrd_sz [gf_vect_dot_prod_dispatched]
gf_vect_dot_prod_dispatch_init:
push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [gf_vect_dot_prod_base]
lea arg1, [GF_VECT_DOT_PROD_BASE]
%else
push rax
push rbx
push rcx
push rdx
lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default
lea arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default
mov eax, 1
cpuid
lea rbx, [gf_vect_dot_prod_sse WRT_OPT]
lea rbx, [GF_VECT_DOT_PROD_SSE WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, rbx
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea rbx, [gf_vect_dot_prod_avx WRT_OPT]
lea rbx, [GF_VECT_DOT_PROD_AVX WRT_OPT]
jne _done_gf_vect_dot_prod_init
mov rsi, rbx
@ -230,7 +266,7 @@ gf_vect_dot_prod_dispatch_init:
mov eax, 7
cpuid
test ebx, FLAG_CPUID1_EBX_AVX2
lea rbx, [gf_vect_dot_prod_avx2 WRT_OPT]
lea rbx, [GF_VECT_DOT_PROD_AVX2 WRT_OPT]
cmovne rsi, rbx
;; Does it have xmm and ymm support
@ -238,8 +274,8 @@ gf_vect_dot_prod_dispatch_init:
xgetbv
and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_gf_vect_dot_prod_init
lea rsi, [gf_vect_dot_prod_sse WRT_OPT]
je _done_gf_vect_dot_prod_init
lea rsi, [GF_VECT_DOT_PROD_SSE WRT_OPT]
_done_gf_vect_dot_prod_init:
pop rdx
@ -261,6 +297,6 @@ global %1_slver
%endmacro
;;; func core, ver, snum
slversion ec_encode_data, 00, 02, 0133
slversion gf_vect_mul, 00, 02, 0134
slversion gf_vect_dot_prod, 00, 01, 0138
slversion EC_ENCODE_DATA, 00, 02, 0133
slversion GF_VECT_MUL, 00, 02, 0134
slversion GF_VECT_DOT_PROD, 00, 01, 0138

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx
%else
%define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -58,6 +63,31 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -143,9 +173,8 @@ section .text
%define xp2 xmm3
align 16
global gf_2vect_dot_prod_avx:function
func(gf_2vect_dot_prod_avx)
global GF_2VECT_DOT_PROD_AVX:function
func(GF_2VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -231,4 +260,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_2vect_dot_prod_avx, 02, 03, 0191
slversion GF_2VECT_DOT_PROD_AVX, 02, 03, 0191

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2
%else
%define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -60,6 +65,33 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -149,9 +181,8 @@ section .text
%define xp2 ymm3
align 16
global gf_2vect_dot_prod_avx2:function
func(gf_2vect_dot_prod_avx2)
global GF_2VECT_DOT_PROD_AVX2:function
func(GF_2VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@ -243,4 +274,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_2vect_dot_prod_avx2, 04, 03, 0196
slversion GF_2VECT_DOT_PROD_AVX2, 04, 03, 0196

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse
%else
%define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -58,6 +63,31 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -144,9 +174,8 @@ section .text
%define xp2 xmm3
align 16
global gf_2vect_dot_prod_sse:function
func(gf_2vect_dot_prod_sse)
global GF_2VECT_DOT_PROD_SSE:function
func(GF_2VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -233,4 +262,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_2vect_dot_prod_sse, 00, 02, 0062
slversion GF_2VECT_DOT_PROD_SSE, 00, 02, 0062

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx
%else
%define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -60,6 +65,33 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -157,8 +189,8 @@ section .text
%define xp3 xmm4
align 16
global gf_3vect_dot_prod_avx:function
func(gf_3vect_dot_prod_avx)
global GF_3VECT_DOT_PROD_AVX:function
func(GF_3VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -255,4 +287,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_3vect_dot_prod_avx, 02, 03, 0192
slversion GF_3VECT_DOT_PROD_AVX, 02, 03, 0192

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2
%else
%define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -62,6 +67,35 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -162,8 +196,8 @@ section .text
%define xp3 ymm4
align 16
global gf_3vect_dot_prod_avx2:function
func(gf_3vect_dot_prod_avx2)
global GF_3VECT_DOT_PROD_AVX2:function
func(GF_3VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@ -268,4 +302,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_3vect_dot_prod_avx2, 04, 03, 0197
slversion GF_3VECT_DOT_PROD_AVX2, 04, 03, 0197

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse
%else
%define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -60,6 +65,33 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -157,8 +189,8 @@ section .text
%define xp3 xmm4
align 16
global gf_3vect_dot_prod_sse:function
func(gf_3vect_dot_prod_sse)
global GF_3VECT_DOT_PROD_SSE:function
func(GF_3VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -256,4 +288,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_3vect_dot_prod_sse, 00, 03, 0063
slversion GF_3VECT_DOT_PROD_SSE, 00, 03, 0063

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx
%else
%define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -182,8 +220,8 @@ section .text
%define xp4 xmm5
align 16
global gf_4vect_dot_prod_avx:function
func(gf_4vect_dot_prod_avx)
global GF_4VECT_DOT_PROD_AVX:function
func(GF_4VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -293,4 +331,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_4vect_dot_prod_avx, 00, 02, 0064
slversion GF_4VECT_DOT_PROD_AVX, 00, 02, 0064

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2
%else
%define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -68,6 +73,41 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -187,8 +227,8 @@ section .text
%define xp4 ymm5
align 16
global gf_4vect_dot_prod_avx2:function
func(gf_4vect_dot_prod_avx2)
global GF_4VECT_DOT_PROD_AVX2:function
func(GF_4VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@ -302,4 +342,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_4vect_dot_prod_avx2, 04, 03, 0064
slversion GF_4VECT_DOT_PROD_AVX2, 04, 03, 0064

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse
%else
%define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -182,8 +220,8 @@ section .text
%define xp4 xmm5
align 16
global gf_4vect_dot_prod_sse:function
func(gf_4vect_dot_prod_sse)
global GF_4VECT_DOT_PROD_SSE:function
func(GF_4VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -293,4 +331,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_4vect_dot_prod_sse, 00, 03, 0064
slversion GF_4VECT_DOT_PROD_SSE, 00, 03, 0064

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx
%else
%define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2
%else
%define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -68,6 +73,41 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -189,8 +229,8 @@ section .text
%define xp5 ymm6
align 16
global gf_5vect_dot_prod_avx2:function
func(gf_5vect_dot_prod_avx2)
global GF_5VECT_DOT_PROD_AVX2:function
func(GF_5VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@ -320,4 +360,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_5vect_dot_prod_avx2, 04, 03, 0199
slversion GF_5VECT_DOT_PROD_AVX2, 04, 03, 0199

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse
%else
%define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -184,8 +222,8 @@ section .text
%define xp5 xmm6
align 16
global gf_5vect_dot_prod_sse:function
func(gf_5vect_dot_prod_sse)
global GF_5VECT_DOT_PROD_SSE:function
func(GF_5VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -309,4 +347,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_5vect_dot_prod_sse, 00, 03, 0065
slversion GF_5VECT_DOT_PROD_SSE, 00, 03, 0065

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx
%else
%define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -182,8 +220,8 @@ section .text
%define xp6 xmm7
align 16
global gf_6vect_dot_prod_avx:function
func(gf_6vect_dot_prod_avx)
global GF_6VECT_DOT_PROD_AVX:function
func(GF_6VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -320,4 +358,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_6vect_dot_prod_avx, 02, 03, 0195
slversion GF_6VECT_DOT_PROD_AVX, 02, 03, 0195

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2
%else
%define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -68,6 +73,41 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse
%else
%define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -182,8 +220,8 @@ section .text
%define xp6 xmm7
align 16
global gf_6vect_dot_prod_sse:function
func(gf_6vect_dot_prod_sse)
global GF_6VECT_DOT_PROD_SSE:function
func(GF_6VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -320,4 +358,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_6vect_dot_prod_sse, 00, 03, 0066
slversion GF_6VECT_DOT_PROD_SSE, 00, 03, 0066

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
%else
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -51,6 +56,24 @@
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -121,8 +144,8 @@ section .text
%define xp xmm2
align 16
global gf_vect_dot_prod_avx:function
func(gf_vect_dot_prod_avx)
global GF_VECT_DOT_PROD_AVX:function
func(GF_VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -195,4 +218,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_dot_prod_avx, 02, 03, 0061
slversion GF_VECT_DOT_PROD_AVX, 02, 03, 0061

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
%else
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -53,6 +58,26 @@
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -126,8 +151,8 @@ section .text
%define xp ymm2
align 16
global gf_vect_dot_prod_avx2:function
func(gf_vect_dot_prod_avx2)
global GF_VECT_DOT_PROD_AVX2:function
func(GF_VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@ -200,4 +225,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_dot_prod_avx2, 04, 03, 0190
slversion GF_VECT_DOT_PROD_AVX2, 04, 03, 0190

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
%else
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -50,6 +55,23 @@
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -121,8 +143,8 @@ section .text
%define xp xmm2
align 16
global gf_vect_dot_prod_sse:function
func(gf_vect_dot_prod_sse)
global GF_VECT_DOT_PROD_SSE:function
func(GF_VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@ -192,4 +214,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_dot_prod_sse, 00, 03, 0060
slversion GF_VECT_DOT_PROD_SSE, 00, 03, 0060

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
%else
%define GF_VECT_MUL_AVX gf_vect_mul_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -46,6 +51,19 @@
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -111,8 +129,8 @@ section .text
%define xtmp2c xmm7
align 16
global gf_vect_mul_avx:function
func(gf_vect_mul_avx)
global GF_VECT_MUL_AVX:function
func(GF_VECT_MUL_AVX)
FUNC_SAVE
mov pos, 0
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
@ -169,4 +187,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_mul_avx, 01, 02, 0036
slversion GF_VECT_MUL_AVX, 01, 02, 0036

@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
%else
%define GF_VECT_MUL_SSE gf_vect_mul_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@ -46,6 +51,19 @@
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@ -112,8 +130,8 @@ section .text
align 16
global gf_vect_mul_sse:function
func(gf_vect_mul_sse)
global GF_VECT_MUL_SSE:function
func(GF_VECT_MUL_SSE)
FUNC_SAVE
mov pos, 0
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
@ -175,4 +193,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_mul_sse, 00, 02, 0034
slversion GF_VECT_MUL_SSE, 00, 02, 0034

@ -23,7 +23,7 @@ func (s *MySuite) TestPiping(c *C) {
// Run the command on each directory
for _, dir := range dirs {
// find $DIR -type f # Find all files
ls := exec.Command("ls", dir, "-l")
ls := exec.Command("ls", "-l", dir)
// | sort -t. -k2 # Sort by file extension
sort := exec.Command("sort", "-t.", "-k2")

Loading…
Cancel
Save