Simplify erasure package for OSX

master
Harshavardhana 9 years ago
parent 90a247b336
commit d0f945f8e7
  1. 2
      Makefile
  2. 7
      main.go
  3. 174
      pkg/erasure/ec_multibinary.asm
  4. 11
      pkg/erasure/ec_reg_sizes.asm
  5. 78
      pkg/erasure/erasure_yasm_darwin.go
  6. 13
      pkg/erasure/gf_2vect_dot_prod_avx.asm
  7. 13
      pkg/erasure/gf_2vect_dot_prod_avx2.asm
  8. 13
      pkg/erasure/gf_2vect_dot_prod_sse.asm
  9. 14
      pkg/erasure/gf_2vect_mad_avx.asm
  10. 13
      pkg/erasure/gf_2vect_mad_avx2.asm
  11. 13
      pkg/erasure/gf_2vect_mad_sse.asm
  12. 14
      pkg/erasure/gf_3vect_dot_prod_avx.asm
  13. 12
      pkg/erasure/gf_3vect_dot_prod_avx2.asm
  14. 12
      pkg/erasure/gf_3vect_dot_prod_sse.asm
  15. 13
      pkg/erasure/gf_3vect_mad_avx.asm
  16. 13
      pkg/erasure/gf_3vect_mad_avx2.asm
  17. 13
      pkg/erasure/gf_3vect_mad_sse.asm
  18. 12
      pkg/erasure/gf_4vect_dot_prod_avx.asm
  19. 12
      pkg/erasure/gf_4vect_dot_prod_avx2.asm
  20. 12
      pkg/erasure/gf_4vect_dot_prod_sse.asm
  21. 13
      pkg/erasure/gf_4vect_mad_avx.asm
  22. 15
      pkg/erasure/gf_4vect_mad_avx2.asm
  23. 13
      pkg/erasure/gf_4vect_mad_sse.asm
  24. 12
      pkg/erasure/gf_5vect_dot_prod_avx.asm
  25. 12
      pkg/erasure/gf_5vect_dot_prod_avx2.asm
  26. 12
      pkg/erasure/gf_5vect_dot_prod_sse.asm
  27. 12
      pkg/erasure/gf_5vect_mad_avx.asm
  28. 12
      pkg/erasure/gf_5vect_mad_avx2.asm
  29. 12
      pkg/erasure/gf_5vect_mad_sse.asm
  30. 12
      pkg/erasure/gf_6vect_dot_prod_avx.asm
  31. 12
      pkg/erasure/gf_6vect_dot_prod_avx2.asm
  32. 12
      pkg/erasure/gf_6vect_dot_prod_sse.asm
  33. 12
      pkg/erasure/gf_6vect_mad_avx.asm
  34. 12
      pkg/erasure/gf_6vect_mad_avx2.asm
  35. 12
      pkg/erasure/gf_6vect_mad_sse.asm
  36. 12
      pkg/erasure/gf_vect_dot_prod_avx.asm
  37. 12
      pkg/erasure/gf_vect_dot_prod_avx2.asm
  38. 12
      pkg/erasure/gf_vect_dot_prod_sse.asm
  39. 13
      pkg/erasure/gf_vect_mad_avx.asm
  40. 13
      pkg/erasure/gf_vect_mad_avx2.asm
  41. 13
      pkg/erasure/gf_vect_mad_sse.asm
  42. 12
      pkg/erasure/gf_vect_mul_avx.asm
  43. 12
      pkg/erasure/gf_vect_mul_sse.asm

@ -61,5 +61,5 @@ install: gomake-all
clean: clean:
@echo "Cleaning up all the generated files:" @echo "Cleaning up all the generated files:"
@rm -fv cover.out @rm -fv cover.out
@rm -fv pkg/utils/split/TESTPREFIX.*
@rm -fv minio @rm -fv minio
@rm -fv pkg/erasure/*.syso

@ -118,14 +118,11 @@ GLOBAL FLAGS:
{{range .Flags}}{{.}} {{range .Flags}}{{.}}
{{end}}{{end}} {{end}}{{end}}
VERSION: VERSION:
` + getFormattedVersion() +
` + getFormattedVersion() + `{{range $key, $value := ExtraInfo}}
`
{{range $key, $value := ExtraInfo}}
{{$key}}: {{$key}}:
{{$value}} {{$value}}
{{end}} {{end}}
` `
app.CommandNotFound = func(ctx *cli.Context, command string) { app.CommandNotFound = func(ctx *cli.Context, command string) {
Fatalf("Command not found: ‘%s’\n", command) Fatalf("Command not found: ‘%s’\n", command)

@ -33,64 +33,6 @@
%define WRT_OPT %define WRT_OPT
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define EC_ENCODE_DATA_SSE _ec_encode_data_sse
%define EC_ENCODE_DATA_AVX _ec_encode_data_avx
%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2
%define EC_ENCODE_DATA_BASE _ec_encode_data_base
%define EC_ENCODE_DATA _ec_encode_data
%define EC_ENCODE_DATA_UPDATE_BASE _ec_encode_data_update_base
%define EC_ENCODE_DATA_UPDATE_SSE _ec_encode_data_update_sse
%define EC_ENCODE_DATA_UPDATE_AVX _ec_encode_data_update_avx
%define EC_ENCODE_DATA_UPDATE_AVX2 _ec_encode_data_update_avx2
%define GF_VECT_MAD_BASE _gf_vect_mad_base
%define GF_VECT_MAD_SSE _gf_vect_mad_sse
%define GF_VECT_MAD_AVX _gf_vect_mad_avx
%define GF_VECT_MAD_AVX2 _gf_vect_mad_avx2
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
%define GF_VECT_MUL_BASE _gf_vect_mul_base
%define GF_VECT_MUL _gf_vect_mul
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base
%define GF_VECT_DOT_PROD _gf_vect_dot_prod
%else
%define EC_ENCODE_DATA_SSE ec_encode_data_sse
%define EC_ENCODE_DATA_AVX ec_encode_data_avx
%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2
%define EC_ENCODE_DATA_BASE ec_encode_data_base
%define EC_ENCODE_DATA ec_encode_data
%define EC_ENCODE_DATA_UPDATE_BASE ec_encode_data_update_base
%define EC_ENCODE_DATA_UPDATE_SSE ec_encode_data_update_sse
%define EC_ENCODE_DATA_UPDATE_AVX ec_encode_data_update_avx
%define EC_ENCODE_DATA_UPDATE_AVX2 ec_encode_data_update_avx2
%define GF_VECT_MAD_BASE gf_vect_mad_base
%define GF_VECT_MAD_SSE gf_vect_mad_sse
%define GF_VECT_MAD_AVX gf_vect_mad_avx
%define GF_VECT_MAD_AVX2 gf_vect_mad_avx2
%define GF_VECT_MUL_SSE gf_vect_mul_sse
%define GF_VECT_MUL_AVX gf_vect_mul_avx
%define GF_VECT_MUL_BASE gf_vect_mul_base
%define GF_VECT_MUL gf_vect_mul
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base
%define GF_VECT_DOT_PROD gf_vect_dot_prod
%endif
%include "ec_reg_sizes.asm" %include "ec_reg_sizes.asm"
%ifidn __OUTPUT_FORMAT__, elf32 %ifidn __OUTPUT_FORMAT__, elf32
@ -119,29 +61,29 @@
%define arg5 rdx %define arg5 rdx
extern EC_ENCODE_DATA_UPDATE_SSE extern ec_encode_data_update_sse
extern EC_ENCODE_DATA_UPDATE_AVX extern ec_encode_data_update_avx
extern EC_ENCODE_DATA_UPDATE_AVX2 extern ec_encode_data_update_avx2
extern GF_VECT_MUL_SSE extern gf_vect_mul_sse
extern GF_VECT_MUL_AVX extern gf_vect_mul_avx
extern GF_VECT_MAD_SSE extern gf_vect_mad_sse
extern GF_VECT_MAD_AVX extern gf_vect_mad_avx
extern GF_VECT_MAD_AVX2 extern gf_vect_mad_avx2
%endif %endif
extern GF_VECT_MUL_BASE extern gf_vect_mul_base
extern EC_ENCODE_DATA_BASE extern ec_encode_data_base
extern EC_ENCODE_DATA_UPDATE_BASE extern ec_encode_data_update_base
extern GF_VECT_DOT_PROD_BASE extern gf_vect_dot_prod_base
extern GF_VECT_MAD_BASE extern gf_vect_mad_base
extern GF_VECT_DOT_PROD_SSE extern gf_vect_dot_prod_sse
extern GF_VECT_DOT_PROD_AVX extern gf_vect_dot_prod_avx
extern GF_VECT_DOT_PROD_AVX2 extern gf_vect_dot_prod_avx2
extern EC_ENCODE_DATA_SSE extern ec_encode_data_sse
extern EC_ENCODE_DATA_AVX extern ec_encode_data_avx
extern EC_ENCODE_DATA_AVX2 extern ec_encode_data_avx2
section .data section .data
@ -167,11 +109,11 @@ section .text
;;;; ;;;;
; ec_encode_data multibinary function ; ec_encode_data multibinary function
;;;; ;;;;
global EC_ENCODE_DATA:function global ec_encode_data:function
ec_encode_data_mbinit: ec_encode_data_mbinit:
call ec_encode_data_dispatch_init call ec_encode_data_dispatch_init
EC_ENCODE_DATA: ec_encode_data:
jmp wrd_sz [ec_encode_data_dispatched] jmp wrd_sz [ec_encode_data_dispatched]
ec_encode_data_dispatch_init: ec_encode_data_dispatch_init:
@ -180,17 +122,17 @@ ec_encode_data_dispatch_init:
push arg3 push arg3
push arg4 push arg4
push arg5 push arg5
lea arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default lea arg1, [ec_encode_data_base WRT_OPT] ; Default
mov eax, 1 mov eax, 1
cpuid cpuid
lea arg3, [EC_ENCODE_DATA_SSE WRT_OPT] lea arg3, [ec_encode_data_sse WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1 test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, arg3 cmovne arg1, arg3
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea arg3, [EC_ENCODE_DATA_AVX WRT_OPT] lea arg3, [ec_encode_data_avx WRT_OPT]
jne _done_ec_encode_data_init jne _done_ec_encode_data_init
mov arg1, arg3 mov arg1, arg3
@ -200,7 +142,7 @@ ec_encode_data_dispatch_init:
mov eax, 7 mov eax, 7
cpuid cpuid
test ebx, FLAG_CPUID1_EBX_AVX2 test ebx, FLAG_CPUID1_EBX_AVX2
lea arg3, [EC_ENCODE_DATA_AVX2 WRT_OPT] lea arg3, [ec_encode_data_avx2 WRT_OPT]
cmovne arg1, arg3 cmovne arg1, arg3
;; Does it have xmm and ymm support ;; Does it have xmm and ymm support
xor ecx, ecx xor ecx, ecx
@ -208,7 +150,7 @@ ec_encode_data_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_ec_encode_data_init je _done_ec_encode_data_init
lea arg1, [EC_ENCODE_DATA_SSE WRT_OPT] lea arg1, [ec_encode_data_sse WRT_OPT]
_done_ec_encode_data_init: _done_ec_encode_data_init:
pop arg5 pop arg5
@ -222,28 +164,28 @@ _done_ec_encode_data_init:
;;;; ;;;;
; gf_vect_mul multibinary function ; gf_vect_mul multibinary function
;;;; ;;;;
global GF_VECT_MUL:function global gf_vect_mul:function
gf_vect_mul_mbinit: gf_vect_mul_mbinit:
call gf_vect_mul_dispatch_init call gf_vect_mul_dispatch_init
GF_VECT_MUL: gf_vect_mul:
jmp wrd_sz [gf_vect_mul_dispatched] jmp wrd_sz [gf_vect_mul_dispatched]
gf_vect_mul_dispatch_init: gf_vect_mul_dispatch_init:
push arg1 push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check %ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [GF_VECT_MUL_BASE] lea arg1, [gf_vect_mul_base]
%else %else
push rax push rax
push rbx push rbx
push rcx push rcx
push rdx push rdx
lea arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default lea arg1, [gf_vect_mul_base WRT_OPT] ; Default
mov eax, 1 mov eax, 1
cpuid cpuid
test ecx, FLAG_CPUID1_ECX_SSE4_2 test ecx, FLAG_CPUID1_ECX_SSE4_2
lea rbx, [GF_VECT_MUL_SSE WRT_OPT] lea rbx, [gf_vect_mul_sse WRT_OPT]
je _done_gf_vect_mul_dispatch_init je _done_gf_vect_mul_dispatch_init
mov arg1, rbx mov arg1, rbx
@ -258,7 +200,7 @@ gf_vect_mul_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM cmp eax, FLAG_XGETBV_EAX_XMM_YMM
jne _done_gf_vect_mul_dispatch_init jne _done_gf_vect_mul_dispatch_init
lea arg1, [GF_VECT_MUL_AVX WRT_OPT] lea arg1, [gf_vect_mul_avx WRT_OPT]
_done_gf_vect_mul_dispatch_init: _done_gf_vect_mul_dispatch_init:
pop rdx pop rdx
@ -273,11 +215,11 @@ _done_gf_vect_mul_dispatch_init:
;;;; ;;;;
; ec_encode_data_update multibinary function ; ec_encode_data_update multibinary function
;;;; ;;;;
global EC_ENCODE_DATA_UPDATE:function global ec_encode_data_update:function
ec_encode_data_update_mbinit: ec_encode_data_update_mbinit:
call ec_encode_data_update_dispatch_init call ec_encode_data_update_dispatch_init
EC_ENCODE_DATA_UPDATE: ec_encode_data_update:
jmp wrd_sz [ec_encode_data_update_dispatched] jmp wrd_sz [ec_encode_data_update_dispatched]
ec_encode_data_update_dispatch_init: ec_encode_data_update_dispatch_init:
@ -289,17 +231,17 @@ ec_encode_data_update_dispatch_init:
push rbx push rbx
push rcx push rcx
push rdx push rdx
lea arg1, [EC_ENCODE_DATA_UPDATE_BASE WRT_OPT] ; Default lea arg1, [ec_encode_data_update_base WRT_OPT] ; Default
mov eax, 1 mov eax, 1
cpuid cpuid
lea rbx, [EC_ENCODE_DATA_UPDATE_SSE WRT_OPT] lea rbx, [ec_encode_data_update_sse WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1 test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, rbx cmovne arg1, rbx
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea rbx, [EC_ENCODE_DATA_UPDATE_AVX WRT_OPT] lea rbx, [ec_encode_data_update_avx WRT_OPT]
jne _done_ec_encode_data_update_init jne _done_ec_encode_data_update_init
mov rsi, rbx mov rsi, rbx
@ -309,7 +251,7 @@ ec_encode_data_update_dispatch_init:
mov eax, 7 mov eax, 7
cpuid cpuid
test ebx, FLAG_CPUID1_EBX_AVX2 test ebx, FLAG_CPUID1_EBX_AVX2
lea rbx, [EC_ENCODE_DATA_UPDATE_AVX2 WRT_OPT] lea rbx, [ec_encode_data_update_avx2 WRT_OPT]
cmovne rsi, rbx cmovne rsi, rbx
;; Does it have xmm and ymm support ;; Does it have xmm and ymm support
@ -318,7 +260,7 @@ ec_encode_data_update_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_ec_encode_data_update_init je _done_ec_encode_data_update_init
lea rsi, [EC_ENCODE_DATA_UPDATE_SSE WRT_OPT] lea rsi, [ec_encode_data_update_sse WRT_OPT]
_done_ec_encode_data_update_init: _done_ec_encode_data_update_init:
pop rdx pop rdx
@ -333,11 +275,11 @@ _done_ec_encode_data_update_init:
;;;; ;;;;
; gf_vect_dot_prod multibinary function ; gf_vect_dot_prod multibinary function
;;;; ;;;;
global GF_VECT_DOT_PROD:function global gf_vect_dot_prod:function
gf_vect_dot_prod_mbinit: gf_vect_dot_prod_mbinit:
call gf_vect_dot_prod_dispatch_init call gf_vect_dot_prod_dispatch_init
GF_VECT_DOT_PROD: gf_vect_dot_prod:
jmp wrd_sz [gf_vect_dot_prod_dispatched] jmp wrd_sz [gf_vect_dot_prod_dispatched]
gf_vect_dot_prod_dispatch_init: gf_vect_dot_prod_dispatch_init:
@ -346,17 +288,17 @@ gf_vect_dot_prod_dispatch_init:
push arg3 push arg3
push arg4 push arg4
push arg5 push arg5
lea arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default
mov eax, 1 mov eax, 1
cpuid cpuid
lea arg3, [GF_VECT_DOT_PROD_SSE WRT_OPT] lea arg3, [gf_vect_dot_prod_sse WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1 test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, arg3 cmovne arg1, arg3
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea arg3, [GF_VECT_DOT_PROD_AVX WRT_OPT] lea arg3, [gf_vect_dot_prod_avx WRT_OPT]
jne _done_gf_vect_dot_prod_init jne _done_gf_vect_dot_prod_init
mov arg1, arg3 mov arg1, arg3
@ -366,7 +308,7 @@ gf_vect_dot_prod_dispatch_init:
mov eax, 7 mov eax, 7
cpuid cpuid
test ebx, FLAG_CPUID1_EBX_AVX2 test ebx, FLAG_CPUID1_EBX_AVX2
lea arg3, [GF_VECT_DOT_PROD_AVX2 WRT_OPT] lea arg3, [gf_vect_dot_prod_avx2 WRT_OPT]
cmovne arg1, arg3 cmovne arg1, arg3
;; Does it have xmm and ymm support ;; Does it have xmm and ymm support
xor ecx, ecx xor ecx, ecx
@ -374,7 +316,7 @@ gf_vect_dot_prod_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_gf_vect_dot_prod_init je _done_gf_vect_dot_prod_init
lea arg1, [GF_VECT_DOT_PROD_SSE WRT_OPT] lea arg1, [gf_vect_dot_prod_sse WRT_OPT]
_done_gf_vect_dot_prod_init: _done_gf_vect_dot_prod_init:
pop arg5 pop arg5
@ -388,11 +330,11 @@ _done_gf_vect_dot_prod_init:
;;;; ;;;;
; gf_vect_mad multibinary function ; gf_vect_mad multibinary function
;;;; ;;;;
global GF_VECT_MAD:function global gf_vect_mad:function
gf_vect_mad_mbinit: gf_vect_mad_mbinit:
call gf_vect_mad_dispatch_init call gf_vect_mad_dispatch_init
GF_VECT_MAD: gf_vect_mad:
jmp wrd_sz [gf_vect_mad_dispatched] jmp wrd_sz [gf_vect_mad_dispatched]
gf_vect_mad_dispatch_init: gf_vect_mad_dispatch_init:
@ -404,17 +346,17 @@ gf_vect_mad_dispatch_init:
push rbx push rbx
push rcx push rcx
push rdx push rdx
lea arg1, [GF_VECT_MAD_BASE WRT_OPT] ; Default lea arg1, [gf_vect_mad_base WRT_OPT] ; Default
mov eax, 1 mov eax, 1
cpuid cpuid
lea rbx, [GF_VECT_MAD_SSE WRT_OPT] lea rbx, [gf_vect_mad_sse WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1 test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, rbx cmovne arg1, rbx
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea rbx, [GF_VECT_MAD_AVX WRT_OPT] lea rbx, [gf_vect_mad_avx WRT_OPT]
jne _done_gf_vect_mad_init jne _done_gf_vect_mad_init
mov rsi, rbx mov rsi, rbx
@ -424,7 +366,7 @@ gf_vect_mad_dispatch_init:
mov eax, 7 mov eax, 7
cpuid cpuid
test ebx, FLAG_CPUID1_EBX_AVX2 test ebx, FLAG_CPUID1_EBX_AVX2
lea rbx, [GF_VECT_MAD_AVX2 WRT_OPT] lea rbx, [gf_vect_mad_avx2 WRT_OPT]
cmovne rsi, rbx cmovne rsi, rbx
;; Does it have xmm and ymm support ;; Does it have xmm and ymm support
@ -433,7 +375,7 @@ gf_vect_mad_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_gf_vect_mad_init je _done_gf_vect_mad_init
lea rsi, [GF_VECT_MAD_SSE WRT_OPT] lea rsi, [gf_vect_mad_sse WRT_OPT]
_done_gf_vect_mad_init: _done_gf_vect_mad_init:
pop rdx pop rdx
@ -455,8 +397,8 @@ global %1_slver
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion EC_ENCODE_DATA, 00, 03, 0133 slversion ec_encode_data, 00, 03, 0133
slversion GF_VECT_MUL, 00, 02, 0134 slversion gf_vect_mul, 00, 02, 0134
slversion EC_ENCODE_DATA_UPDATE, 00, 02, 0212 slversion ec_encode_data_update, 00, 02, 0212
slversion GF_VECT_DOT_PROD, 00, 02, 0138 slversion gf_vect_dot_prod, 00, 02, 0138
slversion GF_VECT_MAD, 00, 01, 0213 slversion gf_vect_mad, 00, 01, 0213

@ -94,3 +94,14 @@
%define BYTE(reg) reg %+ b %define BYTE(reg) reg %+ b
%define XWORD(reg) reg %+ x %define XWORD(reg) reg %+ x
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
section .text
%endif
%ifidn __OUTPUT_FORMAT__,elf64
section .note.GNU-stack noalloc noexec nowrite progbits
section .text
%endif

@ -2,42 +2,42 @@
package erasure package erasure
//go:generate yasm -f macho64 ec_multibinary.asm -o ec_multibinary.syso //go:generate yasm -f macho64 --prefix=_ ec_multibinary.asm -o ec_multibinary.syso
//go:generate yasm -f macho64 gf_2vect_mad_avx2.asm -o gf_2vect_mad_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_2vect_mad_avx2.asm -o gf_2vect_mad_avx2.syso
//go:generate yasm -f macho64 gf_2vect_mad_avx.asm -o gf_2vect_mad_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_2vect_mad_avx.asm -o gf_2vect_mad_avx.syso
//go:generate yasm -f macho64 gf_2vect_mad_sse.asm -o gf_2vect_mad_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_2vect_mad_sse.asm -o gf_2vect_mad_sse.syso
//go:generate yasm -f macho64 gf_3vect_mad_avx2.asm -o gf_3vect_mad_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_3vect_mad_avx2.asm -o gf_3vect_mad_avx2.syso
//go:generate yasm -f macho64 gf_3vect_mad_avx.asm -o gf_3vect_mad_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_3vect_mad_avx.asm -o gf_3vect_mad_avx.syso
//go:generate yasm -f macho64 gf_3vect_mad_sse.asm -o gf_3vect_mad_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_3vect_mad_sse.asm -o gf_3vect_mad_sse.syso
//go:generate yasm -f macho64 gf_4vect_mad_avx2.asm -o gf_4vect_mad_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_4vect_mad_avx2.asm -o gf_4vect_mad_avx2.syso
//go:generate yasm -f macho64 gf_4vect_mad_avx.asm -o gf_4vect_mad_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_4vect_mad_avx.asm -o gf_4vect_mad_avx.syso
//go:generate yasm -f macho64 gf_4vect_mad_sse.asm -o gf_4vect_mad_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_4vect_mad_sse.asm -o gf_4vect_mad_sse.syso
//go:generate yasm -f macho64 gf_5vect_mad_avx2.asm -o gf_5vect_mad_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_5vect_mad_avx2.asm -o gf_5vect_mad_avx2.syso
//go:generate yasm -f macho64 gf_5vect_mad_avx.asm -o gf_5vect_mad_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_5vect_mad_avx.asm -o gf_5vect_mad_avx.syso
//go:generate yasm -f macho64 gf_5vect_mad_sse.asm -o gf_5vect_mad_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_5vect_mad_sse.asm -o gf_5vect_mad_sse.syso
//go:generate yasm -f macho64 gf_6vect_mad_avx2.asm -o gf_6vect_mad_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_6vect_mad_avx2.asm -o gf_6vect_mad_avx2.syso
//go:generate yasm -f macho64 gf_6vect_mad_avx.asm -o gf_6vect_mad_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_6vect_mad_avx.asm -o gf_6vect_mad_avx.syso
//go:generate yasm -f macho64 gf_6vect_mad_sse.asm -o gf_6vect_mad_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_6vect_mad_sse.asm -o gf_6vect_mad_sse.syso
//go:generate yasm -f macho64 gf_vect_mad_avx2.asm -o gf_vect_mad_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_vect_mad_avx2.asm -o gf_vect_mad_avx2.syso
//go:generate yasm -f macho64 gf_vect_mad_avx.asm -o gf_vect_mad_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_vect_mad_avx.asm -o gf_vect_mad_avx.syso
//go:generate yasm -f macho64 gf_vect_mad_sse.asm -o gf_vect_mad_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_vect_mad_sse.asm -o gf_vect_mad_sse.syso
//go:generate yasm -f macho64 gf_2vect_dot_prod_avx2.asm -o gf_2vect_dot_prod_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_2vect_dot_prod_avx2.asm -o gf_2vect_dot_prod_avx2.syso
//go:generate yasm -f macho64 gf_2vect_dot_prod_avx.asm -o gf_2vect_dot_prod_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_2vect_dot_prod_avx.asm -o gf_2vect_dot_prod_avx.syso
//go:generate yasm -f macho64 gf_2vect_dot_prod_sse.asm -o gf_2vect_dot_prod_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_2vect_dot_prod_sse.asm -o gf_2vect_dot_prod_sse.syso
//go:generate yasm -f macho64 gf_3vect_dot_prod_avx2.asm -o gf_3vect_dot_prod_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_3vect_dot_prod_avx2.asm -o gf_3vect_dot_prod_avx2.syso
//go:generate yasm -f macho64 gf_3vect_dot_prod_avx.asm -o gf_3vect_dot_prod_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_3vect_dot_prod_avx.asm -o gf_3vect_dot_prod_avx.syso
//go:generate yasm -f macho64 gf_3vect_dot_prod_sse.asm -o gf_3vect_dot_prod_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_3vect_dot_prod_sse.asm -o gf_3vect_dot_prod_sse.syso
//go:generate yasm -f macho64 gf_4vect_dot_prod_avx2.asm -o gf_4vect_dot_prod_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_4vect_dot_prod_avx2.asm -o gf_4vect_dot_prod_avx2.syso
//go:generate yasm -f macho64 gf_4vect_dot_prod_avx.asm -o gf_4vect_dot_prod_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_4vect_dot_prod_avx.asm -o gf_4vect_dot_prod_avx.syso
//go:generate yasm -f macho64 gf_4vect_dot_prod_sse.asm -o gf_4vect_dot_prod_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_4vect_dot_prod_sse.asm -o gf_4vect_dot_prod_sse.syso
//go:generate yasm -f macho64 gf_5vect_dot_prod_avx2.asm -o gf_5vect_dot_prod_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_5vect_dot_prod_avx2.asm -o gf_5vect_dot_prod_avx2.syso
//go:generate yasm -f macho64 gf_5vect_dot_prod_avx.asm -o gf_5vect_dot_prod_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_5vect_dot_prod_avx.asm -o gf_5vect_dot_prod_avx.syso
//go:generate yasm -f macho64 gf_5vect_dot_prod_sse.asm -o gf_5vect_dot_prod_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_5vect_dot_prod_sse.asm -o gf_5vect_dot_prod_sse.syso
//go:generate yasm -f macho64 gf_6vect_dot_prod_avx2.asm -o gf_6vect_dot_prod_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_6vect_dot_prod_avx2.asm -o gf_6vect_dot_prod_avx2.syso
//go:generate yasm -f macho64 gf_6vect_dot_prod_avx.asm -o gf_6vect_dot_prod_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_6vect_dot_prod_avx.asm -o gf_6vect_dot_prod_avx.syso
//go:generate yasm -f macho64 gf_6vect_dot_prod_sse.asm -o gf_6vect_dot_prod_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_6vect_dot_prod_sse.asm -o gf_6vect_dot_prod_sse.syso
//go:generate yasm -f macho64 gf_vect_dot_prod_avx2.asm -o gf_vect_dot_prod_avx2.syso //go:generate yasm -f macho64 --prefix=_ gf_vect_dot_prod_avx2.asm -o gf_vect_dot_prod_avx2.syso
//go:generate yasm -f macho64 gf_vect_dot_prod_avx.asm -o gf_vect_dot_prod_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_vect_dot_prod_avx.asm -o gf_vect_dot_prod_avx.syso
//go:generate yasm -f macho64 gf_vect_dot_prod_sse.asm -o gf_vect_dot_prod_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_vect_dot_prod_sse.asm -o gf_vect_dot_prod_sse.syso
//go:generate yasm -f macho64 gf_vect_mul_avx.asm -o gf_vect_mul_avx.syso //go:generate yasm -f macho64 --prefix=_ gf_vect_mul_avx.asm -o gf_vect_mul_avx.syso
//go:generate yasm -f macho64 gf_vect_mul_sse.asm -o gf_vect_mul_sse.syso //go:generate yasm -f macho64 --prefix=_ gf_vect_mul_sse.asm -o gf_vect_mul_sse.syso

@ -31,12 +31,6 @@
;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); ;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx
%else
%define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -274,9 +268,8 @@ section .text
%endif %endif
align 16 align 16
global GF_2VECT_DOT_PROD_AVX:function global gf_2vect_dot_prod_avx:function
func(gf_2vect_dot_prod_avx)
func(GF_2VECT_DOT_PROD_AVX)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 16 sub len, 16
@ -378,4 +371,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_2VECT_DOT_PROD_AVX, 02, 04, 0191 slversion gf_2vect_dot_prod_avx, 02, 04, 0191

@ -31,12 +31,6 @@
;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); ;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2
%else
%define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -282,9 +276,8 @@ section .text
%endif %endif
align 16 align 16
global GF_2VECT_DOT_PROD_AVX2:function global gf_2vect_dot_prod_avx2:function
func(gf_2vect_dot_prod_avx2)
func(GF_2VECT_DOT_PROD_AVX2)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 32 sub len, 32
@ -395,4 +388,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_2VECT_DOT_PROD_AVX2, 04, 04, 0196 slversion gf_2vect_dot_prod_avx2, 04, 04, 0196

@ -31,12 +31,6 @@
;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); ;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse
%else
%define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -274,9 +268,8 @@ section .text
%endif %endif
align 16 align 16
global GF_2VECT_DOT_PROD_SSE:function global gf_2vect_dot_prod_sse:function
func(gf_2vect_dot_prod_sse)
func(GF_2VECT_DOT_PROD_SSE)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 16 sub len, 16
@ -380,4 +373,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_2VECT_DOT_PROD_SSE, 00, 03, 0062 slversion gf_2vect_dot_prod_sse, 00, 03, 0062

@ -31,12 +31,6 @@
;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest); ;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_MAD_AVX _gf_2vect_mad_avx
%else
%define GF_2VECT_MAD_AVX gf_2vect_mad_avx
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -104,6 +98,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -175,9 +170,8 @@ section .text
align 16 align 16
global GF_2VECT_MAD_AVX:function global gf_2vect_mad_avx:function
func(gf_2vect_mad_avx)
func(GF_2VECT_MAD_AVX)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -261,4 +255,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_2VECT_MAD_AVX, 02, 00, 0204 slversion gf_2vect_mad_avx, 02, 00, 0204

@ -31,12 +31,6 @@
;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); ;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_MAD_AVX2 _gf_2vect_mad_avx2
%else
%define GF_2VECT_MAD_AVX2 gf_2vect_mad_avx2
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -188,9 +182,8 @@ section .text
%define xtmpd2 ymm9 %define xtmpd2 ymm9
align 16 align 16
global GF_2VECT_MAD_AVX2:function global gf_2vect_mad_avx2:function
func(gf_2vect_mad_avx2)
func(GF_2VECT_MAD_AVX2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -277,4 +270,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_2VECT_MAD_AVX2, 04, 00, 0205 slversion gf_2vect_mad_avx2, 04, 00, 0205

@ -31,12 +31,6 @@
;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest); ;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_MAD_SSE _gf_2vect_mad_sse
%else
%define GF_2VECT_MAD_SSE gf_2vect_mad_sse
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -104,6 +98,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -174,8 +169,8 @@ section .text
align 16 align 16
global GF_2VECT_MAD_SSE:function global gf_2vect_mad_sse:function
func(GF_2VECT_MAD_SSE) func(gf_2vect_mad_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -264,4 +259,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_2VECT_MAD_SSE, 00, 00, 0203 slversion gf_2vect_mad_sse, 00, 00, 0203

@ -31,12 +31,6 @@
;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); ;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx
%else
%define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -297,9 +291,9 @@ section .text
%endif %endif
align 16 align 16
global GF_3VECT_DOT_PROD_AVX:function global gf_3vect_dot_prod_avx:function
func(GF_3VECT_DOT_PROD_AVX) func(gf_3vect_dot_prod_avx)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 16 sub len, 16
SSTR len_m, len SSTR len_m, len
@ -418,4 +412,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_3VECT_DOT_PROD_AVX, 02, 04, 0192 slversion gf_3vect_dot_prod_avx, 02, 04, 0192

@ -31,12 +31,6 @@
;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); ;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2
%else
%define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -305,8 +299,8 @@ section .text
%endif %endif
align 16 align 16
global GF_3VECT_DOT_PROD_AVX2:function global gf_3vect_dot_prod_avx2:function
func(GF_3VECT_DOT_PROD_AVX2) func(gf_3vect_dot_prod_avx2)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 32 sub len, 32
@ -438,4 +432,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_3VECT_DOT_PROD_AVX2, 04, 04, 0197 slversion gf_3vect_dot_prod_avx2, 04, 04, 0197

@ -31,12 +31,6 @@
;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); ;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse
%else
%define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -297,8 +291,8 @@ section .text
%endif %endif
align 16 align 16
global GF_3VECT_DOT_PROD_SSE:function global gf_3vect_dot_prod_sse:function
func(GF_3VECT_DOT_PROD_SSE) func(gf_3vect_dot_prod_sse)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 16 sub len, 16
@ -419,4 +413,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_3VECT_DOT_PROD_SSE, 00, 05, 0063 slversion gf_3vect_dot_prod_sse, 00, 05, 0063

@ -31,12 +31,6 @@
;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest); ;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_MAD_AVX _gf_3vect_mad_avx
%else
%define GF_3VECT_MAD_AVX gf_3vect_mad_avx
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -104,6 +98,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -177,8 +172,8 @@ section .text
%define xd3 xtmph1 %define xd3 xtmph1
align 16 align 16
global GF_3VECT_MAD_AVX:function global gf_3vect_mad_avx:function
func(GF_3VECT_MAD_AVX) func(gf_3vect_mad_avx)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -312,4 +307,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_3VECT_MAD_AVX, 02, 00, 0207 slversion gf_3vect_mad_avx, 02, 00, 0207

@ -31,12 +31,6 @@
;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); ;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_MAD_AVX2 _gf_3vect_mad_avx2
%else
%define GF_3VECT_MAD_AVX2 gf_3vect_mad_avx2
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -110,6 +104,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -187,8 +182,8 @@ section .text
%define xd3 ymm10 %define xd3 ymm10
align 16 align 16
global GF_3VECT_MAD_AVX2:function global gf_3vect_mad_avx2:function
func(GF_3VECT_MAD_AVX2) func(gf_3vect_mad_avx2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -344,4 +339,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_3VECT_MAD_AVX2, 04, 00, 0208 slversion gf_3vect_mad_avx2, 04, 00, 0208

@ -31,12 +31,6 @@
;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest); ;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_MAD_SSE _gf_3vect_mad_sse
%else
%define GF_3VECT_MAD_SSE gf_3vect_mad_sse
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -104,6 +98,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -176,8 +171,8 @@ section .text
%define xd3 xtmph1 %define xd3 xtmph1
align 16 align 16
global GF_3VECT_MAD_SSE:function global gf_3vect_mad_sse:function
func(GF_3VECT_MAD_SSE) func(gf_3vect_mad_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -323,4 +318,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_3VECT_MAD_SSE, 00, 00, 0206 slversion gf_3vect_mad_sse, 00, 00, 0206

@ -31,12 +31,6 @@
;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); ;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx
%else
%define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -334,8 +328,8 @@ section .text
%define xp4 xmm5 %define xp4 xmm5
%endif %endif
align 16 align 16
global GF_4VECT_DOT_PROD_AVX:function global gf_4vect_dot_prod_avx:function
func(GF_4VECT_DOT_PROD_AVX) func(gf_4vect_dot_prod_avx)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 16 sub len, 16
@ -486,4 +480,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_4VECT_DOT_PROD_AVX, 02, 04, 0193 slversion gf_4vect_dot_prod_avx, 02, 04, 0193

@ -31,12 +31,6 @@
;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); ;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2
%else
%define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -344,8 +338,8 @@ section .text
%define xp4 ymm5 %define xp4 ymm5
%endif %endif
align 16 align 16
global GF_4VECT_DOT_PROD_AVX2:function global gf_4vect_dot_prod_avx2:function
func(GF_4VECT_DOT_PROD_AVX2) func(gf_4vect_dot_prod_avx2)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 32 sub len, 32
@ -507,4 +501,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_4VECT_DOT_PROD_AVX2, 04, 04, 0198 slversion gf_4vect_dot_prod_avx2, 04, 04, 0198

@ -31,12 +31,6 @@
;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); ;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse
%else
%define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -334,8 +328,8 @@ section .text
%define xp4 xmm5 %define xp4 xmm5
%endif %endif
align 16 align 16
global GF_4VECT_DOT_PROD_SSE:function global gf_4vect_dot_prod_sse:function
func(GF_4VECT_DOT_PROD_SSE) func(gf_4vect_dot_prod_sse)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 16 sub len, 16
@ -488,4 +482,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_4VECT_DOT_PROD_SSE, 00, 05, 0064 slversion gf_4vect_dot_prod_sse, 00, 05, 0064

@ -31,12 +31,6 @@
;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest); ;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_MAD_AVX _gf_4vect_mad_avx
%else
%define GF_4VECT_MAD_AVX gf_4vect_mad_avx
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -114,6 +108,7 @@
%macro FUNC_RESTORE 0 %macro FUNC_RESTORE 0
pop r12 pop r12
%endmacro %endmacro
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -194,8 +189,8 @@ section .text
%define xd4 xtmpl1 %define xd4 xtmpl1
align 16 align 16
global GF_4VECT_MAD_AVX:function global gf_4vect_mad_avx:function
func(GF_4VECT_MAD_AVX) func(gf_4vect_mad_avx)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -367,4 +362,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_4VECT_MAD_AVX, 02, 00, 020a slversion gf_4vect_mad_avx, 02, 00, 020a

@ -30,13 +30,7 @@
;;; ;;;
;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); ;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_MAD_AVX2 _gf_4vect_mad_avx2
%else
%define GF_4VECT_MAD_AVX2 gf_4vect_mad_avx2
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -108,6 +102,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -186,8 +181,8 @@ section .text
%define xd4 ymm10 %define xd4 ymm10
align 16 align 16
global GF_4VECT_MAD_AVX2:function global gf_4vect_mad_avx2:function
func(GF_4VECT_MAD_AVX2) func(gf_4vect_mad_avx2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -368,4 +363,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_4VECT_MAD_AVX2, 04, 00, 020b slversion gf_4vect_mad_avx2, 04, 00, 020b

@ -31,12 +31,6 @@
;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest); ;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_MAD_SSE _gf_4vect_mad_sse
%else
%define GF_4VECT_MAD_SSE gf_4vect_mad_sse
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -114,6 +108,7 @@
%macro FUNC_RESTORE 0 %macro FUNC_RESTORE 0
pop r12 pop r12
%endmacro %endmacro
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -193,8 +188,8 @@ section .text
%define xd4 xtmpl1 %define xd4 xtmpl1
align 16 align 16
global GF_4VECT_MAD_SSE:function global gf_4vect_mad_sse:function
func(GF_4VECT_MAD_SSE) func(gf_4vect_mad_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -372,4 +367,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_4VECT_MAD_SSE, 00, 00, 0209 slversion gf_4vect_mad_sse, 00, 00, 0209

@ -31,12 +31,6 @@
;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); ;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx
%else
%define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -221,8 +215,8 @@ section .text
%define xp5 xmm6 %define xp5 xmm6
align 16 align 16
global GF_5VECT_DOT_PROD_AVX:function global gf_5vect_dot_prod_avx:function
func(GF_5VECT_DOT_PROD_AVX) func(gf_5vect_dot_prod_avx)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -345,4 +339,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_5VECT_DOT_PROD_AVX, 02, 03, 0194 slversion gf_5vect_dot_prod_avx, 02, 03, 0194

@ -31,12 +31,6 @@
;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); ;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2
%else
%define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -228,8 +222,8 @@ section .text
%define xp5 ymm6 %define xp5 ymm6
align 16 align 16
global GF_5VECT_DOT_PROD_AVX2:function global gf_5vect_dot_prod_avx2:function
func(GF_5VECT_DOT_PROD_AVX2) func(gf_5vect_dot_prod_avx2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -359,4 +353,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_5VECT_DOT_PROD_AVX2, 04, 03, 0199 slversion gf_5vect_dot_prod_avx2, 04, 03, 0199

@ -31,12 +31,6 @@
;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); ;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse
%else
%define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -221,8 +215,8 @@ section .text
%define xp5 xmm14 %define xp5 xmm14
align 16 align 16
global GF_5VECT_DOT_PROD_SSE:function global gf_5vect_dot_prod_sse:function
func(GF_5VECT_DOT_PROD_SSE) func(gf_5vect_dot_prod_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -346,4 +340,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_5VECT_DOT_PROD_SSE, 00, 04, 0065 slversion gf_5vect_dot_prod_sse, 00, 04, 0065

@ -31,12 +31,6 @@
;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest); ;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_MAD_AVX _gf_5vect_mad_avx
%else
%define GF_5VECT_MAD_AVX gf_5vect_mad_avx
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -206,8 +200,8 @@ section .text
align 16 align 16
global GF_5VECT_MAD_AVX:function global gf_5vect_mad_avx:function
func(GF_5VECT_MAD_AVX) func(gf_5vect_mad_avx)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -398,4 +392,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_5VECT_MAD_AVX, 02, 00, 020d slversion gf_5vect_mad_avx, 02, 00, 020d

@ -31,12 +31,6 @@
;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); ;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_MAD_AVX2 _gf_5vect_mad_avx2
%else
%define GF_5VECT_MAD_AVX2 gf_5vect_mad_avx2
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -188,8 +182,8 @@ section .text
%define xd5 ymm9 %define xd5 ymm9
align 16 align 16
global GF_5VECT_MAD_AVX2:function global gf_5vect_mad_avx2:function
func(GF_5VECT_MAD_AVX2) func(gf_5vect_mad_avx2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -390,4 +384,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_5VECT_MAD_AVX2, 04, 00, 020e slversion gf_5vect_mad_avx2, 04, 00, 020e

@ -31,12 +31,6 @@
;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest); ;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_MAD_SSE _gf_5vect_mad_sse
%else
%define GF_5VECT_MAD_SSE gf_5vect_mad_sse
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -205,8 +199,8 @@ section .text
align 16 align 16
global GF_5VECT_MAD_SSE:function global gf_5vect_mad_sse:function
func(GF_5VECT_MAD_SSE) func(gf_5vect_mad_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -406,4 +400,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_5VECT_MAD_SSE, 00, 00, 020c slversion gf_5vect_mad_sse, 00, 00, 020c

@ -31,12 +31,6 @@
;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); ;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx
%else
%define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -219,8 +213,8 @@ section .text
%define xp6 xmm7 %define xp6 xmm7
align 16 align 16
global GF_6VECT_DOT_PROD_AVX:function global gf_6vect_dot_prod_avx:function
func(GF_6VECT_DOT_PROD_AVX) func(gf_6vect_dot_prod_avx)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -357,4 +351,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_6VECT_DOT_PROD_AVX, 02, 03, 0195 slversion gf_6vect_dot_prod_avx, 02, 03, 0195

@ -31,12 +31,6 @@
;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); ;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2
%else
%define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -226,8 +220,8 @@ section .text
%define xp6 ymm7 %define xp6 ymm7
align 16 align 16
global GF_6VECT_DOT_PROD_AVX2:function global gf_6vect_dot_prod_avx2:function
func(GF_6VECT_DOT_PROD_AVX2) func(gf_6vect_dot_prod_avx2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -370,4 +364,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_6VECT_DOT_PROD_AVX2, 04, 03, 019a slversion gf_6vect_dot_prod_avx2, 04, 03, 019a

@ -31,12 +31,6 @@
;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); ;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse
%else
%define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -219,8 +213,8 @@ section .text
%define xp6 xmm13 %define xp6 xmm13
align 16 align 16
global GF_6VECT_DOT_PROD_SSE:function global gf_6vect_dot_prod_sse:function
func(GF_6VECT_DOT_PROD_SSE) func(gf_6vect_dot_prod_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -357,4 +351,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_6VECT_DOT_PROD_SSE, 00, 04, 0066 slversion gf_6vect_dot_prod_sse, 00, 04, 0066

@ -31,12 +31,6 @@
;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest); ;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_MAD_AVX _gf_6vect_mad_avx
%else
%define GF_6VECT_MAD_AVX gf_6vect_mad_avx
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -215,8 +209,8 @@ section .text
align 16 align 16
global GF_6VECT_MAD_AVX:function global gf_6vect_mad_avx:function
func(GF_6VECT_MAD_AVX) func(gf_6vect_mad_avx)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -430,4 +424,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_6VECT_MAD_AVX, 02, 00, 0210 slversion gf_6vect_mad_avx, 02, 00, 0210

@ -31,12 +31,6 @@
;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); ;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_MAD_AVX2 _gf_6vect_mad_avx2
%else
%define GF_6VECT_MAD_AVX2 gf_6vect_mad_avx2
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -204,8 +198,8 @@ section .text
%define xd6 xd1 %define xd6 xd1
align 16 align 16
global GF_6VECT_MAD_AVX2:function global gf_6vect_mad_avx2:function
func(GF_6VECT_MAD_AVX2) func(gf_6vect_mad_avx2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -432,4 +426,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_6VECT_MAD_AVX2, 04, 00, 0211 slversion gf_6vect_mad_avx2, 04, 00, 0211

@ -31,12 +31,6 @@
;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest); ;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_MAD_SSE _gf_6vect_mad_sse
%else
%define GF_6VECT_MAD_SSE gf_6vect_mad_sse
%endif
%define PS 8 %define PS 8
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
@ -217,8 +211,8 @@ section .text
align 16 align 16
global GF_6VECT_MAD_SSE:function global gf_6vect_mad_sse:function
func(GF_6VECT_MAD_SSE) func(gf_6vect_mad_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -443,4 +437,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_6VECT_MAD_SSE, 00, 00, 020f slversion gf_6vect_mad_sse, 00, 00, 020f

@ -31,12 +31,6 @@
;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest); ;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
%else
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -218,8 +212,8 @@ section .text
%define xp xmm2 %define xp xmm2
align 16 align 16
global GF_VECT_DOT_PROD_AVX:function global gf_vect_dot_prod_avx:function
func(GF_VECT_DOT_PROD_AVX) func(gf_vect_dot_prod_avx)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 16 sub len, 16
@ -300,4 +294,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_VECT_DOT_PROD_AVX, 02, 04, 0061 slversion gf_vect_dot_prod_avx, 02, 04, 0061

@ -31,12 +31,6 @@
;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest); ;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
%else
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -229,8 +223,8 @@ section .text
%define xp ymm2 %define xp ymm2
align 16 align 16
global GF_VECT_DOT_PROD_AVX2:function global gf_vect_dot_prod_avx2:function
func(GF_VECT_DOT_PROD_AVX2) func(gf_vect_dot_prod_avx2)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 32 sub len, 32
@ -312,4 +306,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_VECT_DOT_PROD_AVX2, 04, 04, 0190 slversion gf_vect_dot_prod_avx2, 04, 04, 0190

@ -31,12 +31,6 @@
;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest); ;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
%else
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -218,8 +212,8 @@ section .text
%define xp xmm2 %define xp xmm2
align 16 align 16
global GF_VECT_DOT_PROD_SSE:function global gf_vect_dot_prod_sse:function
func(GF_VECT_DOT_PROD_SSE) func(gf_vect_dot_prod_sse)
FUNC_SAVE FUNC_SAVE
SLDR len, len_m SLDR len, len_m
sub len, 16 sub len, 16
@ -300,4 +294,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_VECT_DOT_PROD_SSE, 00, 04, 0060 slversion gf_vect_dot_prod_sse, 00, 04, 0060

@ -31,12 +31,6 @@
;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest); ;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MAD_AVX _gf_vect_mad_avx
%else
%define GF_VECT_MAD_AVX gf_vect_mad_avx
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg0.w ecx %define arg0.w ecx
@ -89,6 +83,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -150,8 +145,8 @@ section .text
%define xtmpd xmm5 %define xtmpd xmm5
align 16 align 16
global GF_VECT_MAD_AVX:function global gf_vect_mad_avx:function
func(GF_VECT_MAD_AVX) func(gf_vect_mad_avx)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -220,4 +215,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_VECT_MAD_AVX, 02, 00, 0201 slversion gf_vect_mad_avx, 02, 00, 0201

@ -31,12 +31,6 @@
;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); ;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MAD_AVX2 _gf_vect_mad_avx2
%else
%define GF_VECT_MAD_AVX2 gf_vect_mad_avx2
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg0.w ecx %define arg0.w ecx
@ -95,6 +89,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -161,8 +156,8 @@ section .text
%define xtmpd ymm5 %define xtmpd ymm5
align 16 align 16
global GF_VECT_MAD_AVX2:function global gf_vect_mad_avx2:function
func(GF_VECT_MAD_AVX2) func(gf_vect_mad_avx2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -230,4 +225,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_VECT_MAD_AVX2, 04, 00, 0202 slversion gf_vect_mad_avx2, 04, 00, 0202

@ -31,12 +31,6 @@
;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest); ;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MAD_SSE _gf_vect_mad_sse
%else
%define GF_VECT_MAD_SSE gf_vect_mad_sse
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg0.w ecx %define arg0.w ecx
@ -89,6 +83,7 @@
%define func(x) x: %define func(x) x:
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64 %elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi %define arg0 rdi
%define arg0.w edi %define arg0.w edi
@ -150,8 +145,8 @@ section .text
align 16 align 16
global GF_VECT_MAD_SSE:function global gf_vect_mad_sse:function
func(GF_VECT_MAD_SSE) func(gf_vect_mad_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -221,4 +216,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_VECT_MAD_SSE, 00, 00, 0200 slversion gf_vect_mad_sse, 00, 00, 0200

@ -31,12 +31,6 @@
;;; gf_vect_mul_avx(len, mul_array, src, dest) ;;; gf_vect_mul_avx(len, mul_array, src, dest)
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
%else
%define GF_VECT_MUL_AVX gf_vect_mul_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -128,8 +122,8 @@ section .text
%define xtmp2c xmm7 %define xtmp2c xmm7
align 16 align 16
global GF_VECT_MUL_AVX:function global gf_vect_mul_avx:function
func(GF_VECT_MUL_AVX) func(gf_vect_mul_avx)
FUNC_SAVE FUNC_SAVE
mov pos, 0 mov pos, 0
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
@ -186,4 +180,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_VECT_MUL_AVX, 01, 02, 0036 slversion gf_vect_mul_avx, 01, 02, 0036

@ -31,12 +31,6 @@
;;; gf_vect_mul_sse(len, mul_array, src, dest) ;;; gf_vect_mul_sse(len, mul_array, src, dest)
;;; ;;;
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
%else
%define GF_VECT_MUL_SSE gf_vect_mul_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
%define arg1 rsi %define arg1 rsi
@ -129,8 +123,8 @@ section .text
align 16 align 16
global GF_VECT_MUL_SSE:function global gf_vect_mul_sse:function
func(GF_VECT_MUL_SSE) func(gf_vect_mul_sse)
FUNC_SAVE FUNC_SAVE
mov pos, 0 mov pos, 0
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
@ -192,4 +186,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion GF_VECT_MUL_SSE, 00, 02, 0034 slversion gf_vect_mul_sse, 00, 02, 0034

Loading…
Cancel
Save