|
|
@ -33,64 +33,6 @@ |
|
|
|
%define WRT_OPT |
|
|
|
%define WRT_OPT |
|
|
|
%endif |
|
|
|
%endif |
|
|
|
|
|
|
|
|
|
|
|
%ifidn __OUTPUT_FORMAT__, macho64 |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_SSE _ec_encode_data_sse |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_AVX _ec_encode_data_avx |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2 |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_BASE _ec_encode_data_base |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA _ec_encode_data |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_UPDATE_BASE _ec_encode_data_update_base |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_UPDATE_SSE _ec_encode_data_update_sse |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_UPDATE_AVX _ec_encode_data_update_avx |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_UPDATE_AVX2 _ec_encode_data_update_avx2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%define GF_VECT_MAD_BASE _gf_vect_mad_base |
|
|
|
|
|
|
|
%define GF_VECT_MAD_SSE _gf_vect_mad_sse |
|
|
|
|
|
|
|
%define GF_VECT_MAD_AVX _gf_vect_mad_avx |
|
|
|
|
|
|
|
%define GF_VECT_MAD_AVX2 _gf_vect_mad_avx2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%define GF_VECT_MUL_SSE _gf_vect_mul_sse |
|
|
|
|
|
|
|
%define GF_VECT_MUL_AVX _gf_vect_mul_avx |
|
|
|
|
|
|
|
%define GF_VECT_MUL_BASE _gf_vect_mul_base |
|
|
|
|
|
|
|
%define GF_VECT_MUL _gf_vect_mul |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse |
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx |
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2 |
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base |
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD _gf_vect_dot_prod |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%else |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_SSE ec_encode_data_sse |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_AVX ec_encode_data_avx |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2 |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_BASE ec_encode_data_base |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA ec_encode_data |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_UPDATE_BASE ec_encode_data_update_base |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_UPDATE_SSE ec_encode_data_update_sse |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_UPDATE_AVX ec_encode_data_update_avx |
|
|
|
|
|
|
|
%define EC_ENCODE_DATA_UPDATE_AVX2 ec_encode_data_update_avx2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%define GF_VECT_MAD_BASE gf_vect_mad_base |
|
|
|
|
|
|
|
%define GF_VECT_MAD_SSE gf_vect_mad_sse |
|
|
|
|
|
|
|
%define GF_VECT_MAD_AVX gf_vect_mad_avx |
|
|
|
|
|
|
|
%define GF_VECT_MAD_AVX2 gf_vect_mad_avx2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%define GF_VECT_MUL_SSE gf_vect_mul_sse |
|
|
|
|
|
|
|
%define GF_VECT_MUL_AVX gf_vect_mul_avx |
|
|
|
|
|
|
|
%define GF_VECT_MUL_BASE gf_vect_mul_base |
|
|
|
|
|
|
|
%define GF_VECT_MUL gf_vect_mul |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse |
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx |
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2 |
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base |
|
|
|
|
|
|
|
%define GF_VECT_DOT_PROD gf_vect_dot_prod |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%include "ec_reg_sizes.asm" |
|
|
|
%include "ec_reg_sizes.asm" |
|
|
|
|
|
|
|
|
|
|
|
%ifidn __OUTPUT_FORMAT__, elf32 |
|
|
|
%ifidn __OUTPUT_FORMAT__, elf32 |
|
|
@ -119,29 +61,29 @@ |
|
|
|
%define arg5 rdx |
|
|
|
%define arg5 rdx |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extern EC_ENCODE_DATA_UPDATE_SSE |
|
|
|
extern ec_encode_data_update_sse |
|
|
|
extern EC_ENCODE_DATA_UPDATE_AVX |
|
|
|
extern ec_encode_data_update_avx |
|
|
|
extern EC_ENCODE_DATA_UPDATE_AVX2 |
|
|
|
extern ec_encode_data_update_avx2 |
|
|
|
extern GF_VECT_MUL_SSE |
|
|
|
extern gf_vect_mul_sse |
|
|
|
extern GF_VECT_MUL_AVX |
|
|
|
extern gf_vect_mul_avx |
|
|
|
|
|
|
|
|
|
|
|
extern GF_VECT_MAD_SSE |
|
|
|
extern gf_vect_mad_sse |
|
|
|
extern GF_VECT_MAD_AVX |
|
|
|
extern gf_vect_mad_avx |
|
|
|
extern GF_VECT_MAD_AVX2 |
|
|
|
extern gf_vect_mad_avx2 |
|
|
|
%endif |
|
|
|
%endif |
|
|
|
|
|
|
|
|
|
|
|
extern GF_VECT_MUL_BASE |
|
|
|
extern gf_vect_mul_base |
|
|
|
extern EC_ENCODE_DATA_BASE |
|
|
|
extern ec_encode_data_base |
|
|
|
extern EC_ENCODE_DATA_UPDATE_BASE |
|
|
|
extern ec_encode_data_update_base |
|
|
|
extern GF_VECT_DOT_PROD_BASE |
|
|
|
extern gf_vect_dot_prod_base |
|
|
|
extern GF_VECT_MAD_BASE |
|
|
|
extern gf_vect_mad_base |
|
|
|
|
|
|
|
|
|
|
|
extern GF_VECT_DOT_PROD_SSE |
|
|
|
extern gf_vect_dot_prod_sse |
|
|
|
extern GF_VECT_DOT_PROD_AVX |
|
|
|
extern gf_vect_dot_prod_avx |
|
|
|
extern GF_VECT_DOT_PROD_AVX2 |
|
|
|
extern gf_vect_dot_prod_avx2 |
|
|
|
extern EC_ENCODE_DATA_SSE |
|
|
|
extern ec_encode_data_sse |
|
|
|
extern EC_ENCODE_DATA_AVX |
|
|
|
extern ec_encode_data_avx |
|
|
|
extern EC_ENCODE_DATA_AVX2 |
|
|
|
extern ec_encode_data_avx2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
section .data |
|
|
|
section .data |
|
|
@ -167,11 +109,11 @@ section .text |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
; ec_encode_data multibinary function |
|
|
|
; ec_encode_data multibinary function |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
global EC_ENCODE_DATA:function |
|
|
|
global ec_encode_data:function |
|
|
|
ec_encode_data_mbinit: |
|
|
|
ec_encode_data_mbinit: |
|
|
|
call ec_encode_data_dispatch_init |
|
|
|
call ec_encode_data_dispatch_init |
|
|
|
|
|
|
|
|
|
|
|
EC_ENCODE_DATA: |
|
|
|
ec_encode_data: |
|
|
|
jmp wrd_sz [ec_encode_data_dispatched] |
|
|
|
jmp wrd_sz [ec_encode_data_dispatched] |
|
|
|
|
|
|
|
|
|
|
|
ec_encode_data_dispatch_init: |
|
|
|
ec_encode_data_dispatch_init: |
|
|
@ -180,17 +122,17 @@ ec_encode_data_dispatch_init: |
|
|
|
push arg3 |
|
|
|
push arg3 |
|
|
|
push arg4 |
|
|
|
push arg4 |
|
|
|
push arg5 |
|
|
|
push arg5 |
|
|
|
lea arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default |
|
|
|
lea arg1, [ec_encode_data_base WRT_OPT] ; Default |
|
|
|
|
|
|
|
|
|
|
|
mov eax, 1 |
|
|
|
mov eax, 1 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
lea arg3, [EC_ENCODE_DATA_SSE WRT_OPT] |
|
|
|
lea arg3, [ec_encode_data_sse WRT_OPT] |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_1 |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_1 |
|
|
|
cmovne arg1, arg3 |
|
|
|
cmovne arg1, arg3 |
|
|
|
|
|
|
|
|
|
|
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
lea arg3, [EC_ENCODE_DATA_AVX WRT_OPT] |
|
|
|
lea arg3, [ec_encode_data_avx WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
jne _done_ec_encode_data_init |
|
|
|
jne _done_ec_encode_data_init |
|
|
|
mov arg1, arg3 |
|
|
|
mov arg1, arg3 |
|
|
@ -200,7 +142,7 @@ ec_encode_data_dispatch_init: |
|
|
|
mov eax, 7 |
|
|
|
mov eax, 7 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
test ebx, FLAG_CPUID1_EBX_AVX2 |
|
|
|
test ebx, FLAG_CPUID1_EBX_AVX2 |
|
|
|
lea arg3, [EC_ENCODE_DATA_AVX2 WRT_OPT] |
|
|
|
lea arg3, [ec_encode_data_avx2 WRT_OPT] |
|
|
|
cmovne arg1, arg3 |
|
|
|
cmovne arg1, arg3 |
|
|
|
;; Does it have xmm and ymm support |
|
|
|
;; Does it have xmm and ymm support |
|
|
|
xor ecx, ecx |
|
|
|
xor ecx, ecx |
|
|
@ -208,7 +150,7 @@ ec_encode_data_dispatch_init: |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
je _done_ec_encode_data_init |
|
|
|
je _done_ec_encode_data_init |
|
|
|
lea arg1, [EC_ENCODE_DATA_SSE WRT_OPT] |
|
|
|
lea arg1, [ec_encode_data_sse WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
_done_ec_encode_data_init: |
|
|
|
_done_ec_encode_data_init: |
|
|
|
pop arg5 |
|
|
|
pop arg5 |
|
|
@ -222,28 +164,28 @@ _done_ec_encode_data_init: |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
; gf_vect_mul multibinary function |
|
|
|
; gf_vect_mul multibinary function |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
global GF_VECT_MUL:function |
|
|
|
global gf_vect_mul:function |
|
|
|
gf_vect_mul_mbinit: |
|
|
|
gf_vect_mul_mbinit: |
|
|
|
call gf_vect_mul_dispatch_init |
|
|
|
call gf_vect_mul_dispatch_init |
|
|
|
|
|
|
|
|
|
|
|
GF_VECT_MUL: |
|
|
|
gf_vect_mul: |
|
|
|
jmp wrd_sz [gf_vect_mul_dispatched] |
|
|
|
jmp wrd_sz [gf_vect_mul_dispatched] |
|
|
|
|
|
|
|
|
|
|
|
gf_vect_mul_dispatch_init: |
|
|
|
gf_vect_mul_dispatch_init: |
|
|
|
push arg1 |
|
|
|
push arg1 |
|
|
|
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check |
|
|
|
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check |
|
|
|
lea arg1, [GF_VECT_MUL_BASE] |
|
|
|
lea arg1, [gf_vect_mul_base] |
|
|
|
%else |
|
|
|
%else |
|
|
|
push rax |
|
|
|
push rax |
|
|
|
push rbx |
|
|
|
push rbx |
|
|
|
push rcx |
|
|
|
push rcx |
|
|
|
push rdx |
|
|
|
push rdx |
|
|
|
lea arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default |
|
|
|
lea arg1, [gf_vect_mul_base WRT_OPT] ; Default |
|
|
|
|
|
|
|
|
|
|
|
mov eax, 1 |
|
|
|
mov eax, 1 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_2 |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_2 |
|
|
|
lea rbx, [GF_VECT_MUL_SSE WRT_OPT] |
|
|
|
lea rbx, [gf_vect_mul_sse WRT_OPT] |
|
|
|
je _done_gf_vect_mul_dispatch_init |
|
|
|
je _done_gf_vect_mul_dispatch_init |
|
|
|
mov arg1, rbx |
|
|
|
mov arg1, rbx |
|
|
|
|
|
|
|
|
|
|
@ -258,7 +200,7 @@ gf_vect_mul_dispatch_init: |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
jne _done_gf_vect_mul_dispatch_init |
|
|
|
jne _done_gf_vect_mul_dispatch_init |
|
|
|
lea arg1, [GF_VECT_MUL_AVX WRT_OPT] |
|
|
|
lea arg1, [gf_vect_mul_avx WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
_done_gf_vect_mul_dispatch_init: |
|
|
|
_done_gf_vect_mul_dispatch_init: |
|
|
|
pop rdx |
|
|
|
pop rdx |
|
|
@ -273,11 +215,11 @@ _done_gf_vect_mul_dispatch_init: |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
; ec_encode_data_update multibinary function |
|
|
|
; ec_encode_data_update multibinary function |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
global EC_ENCODE_DATA_UPDATE:function |
|
|
|
global ec_encode_data_update:function |
|
|
|
ec_encode_data_update_mbinit: |
|
|
|
ec_encode_data_update_mbinit: |
|
|
|
call ec_encode_data_update_dispatch_init |
|
|
|
call ec_encode_data_update_dispatch_init |
|
|
|
|
|
|
|
|
|
|
|
EC_ENCODE_DATA_UPDATE: |
|
|
|
ec_encode_data_update: |
|
|
|
jmp wrd_sz [ec_encode_data_update_dispatched] |
|
|
|
jmp wrd_sz [ec_encode_data_update_dispatched] |
|
|
|
|
|
|
|
|
|
|
|
ec_encode_data_update_dispatch_init: |
|
|
|
ec_encode_data_update_dispatch_init: |
|
|
@ -289,17 +231,17 @@ ec_encode_data_update_dispatch_init: |
|
|
|
push rbx |
|
|
|
push rbx |
|
|
|
push rcx |
|
|
|
push rcx |
|
|
|
push rdx |
|
|
|
push rdx |
|
|
|
lea arg1, [EC_ENCODE_DATA_UPDATE_BASE WRT_OPT] ; Default |
|
|
|
lea arg1, [ec_encode_data_update_base WRT_OPT] ; Default |
|
|
|
|
|
|
|
|
|
|
|
mov eax, 1 |
|
|
|
mov eax, 1 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
lea rbx, [EC_ENCODE_DATA_UPDATE_SSE WRT_OPT] |
|
|
|
lea rbx, [ec_encode_data_update_sse WRT_OPT] |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_1 |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_1 |
|
|
|
cmovne arg1, rbx |
|
|
|
cmovne arg1, rbx |
|
|
|
|
|
|
|
|
|
|
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
lea rbx, [EC_ENCODE_DATA_UPDATE_AVX WRT_OPT] |
|
|
|
lea rbx, [ec_encode_data_update_avx WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
jne _done_ec_encode_data_update_init |
|
|
|
jne _done_ec_encode_data_update_init |
|
|
|
mov rsi, rbx |
|
|
|
mov rsi, rbx |
|
|
@ -309,7 +251,7 @@ ec_encode_data_update_dispatch_init: |
|
|
|
mov eax, 7 |
|
|
|
mov eax, 7 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
test ebx, FLAG_CPUID1_EBX_AVX2 |
|
|
|
test ebx, FLAG_CPUID1_EBX_AVX2 |
|
|
|
lea rbx, [EC_ENCODE_DATA_UPDATE_AVX2 WRT_OPT] |
|
|
|
lea rbx, [ec_encode_data_update_avx2 WRT_OPT] |
|
|
|
cmovne rsi, rbx |
|
|
|
cmovne rsi, rbx |
|
|
|
|
|
|
|
|
|
|
|
;; Does it have xmm and ymm support |
|
|
|
;; Does it have xmm and ymm support |
|
|
@ -318,7 +260,7 @@ ec_encode_data_update_dispatch_init: |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
je _done_ec_encode_data_update_init |
|
|
|
je _done_ec_encode_data_update_init |
|
|
|
lea rsi, [EC_ENCODE_DATA_UPDATE_SSE WRT_OPT] |
|
|
|
lea rsi, [ec_encode_data_update_sse WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
_done_ec_encode_data_update_init: |
|
|
|
_done_ec_encode_data_update_init: |
|
|
|
pop rdx |
|
|
|
pop rdx |
|
|
@ -333,11 +275,11 @@ _done_ec_encode_data_update_init: |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
; gf_vect_dot_prod multibinary function |
|
|
|
; gf_vect_dot_prod multibinary function |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
global GF_VECT_DOT_PROD:function |
|
|
|
global gf_vect_dot_prod:function |
|
|
|
gf_vect_dot_prod_mbinit: |
|
|
|
gf_vect_dot_prod_mbinit: |
|
|
|
call gf_vect_dot_prod_dispatch_init |
|
|
|
call gf_vect_dot_prod_dispatch_init |
|
|
|
|
|
|
|
|
|
|
|
GF_VECT_DOT_PROD: |
|
|
|
gf_vect_dot_prod: |
|
|
|
jmp wrd_sz [gf_vect_dot_prod_dispatched] |
|
|
|
jmp wrd_sz [gf_vect_dot_prod_dispatched] |
|
|
|
|
|
|
|
|
|
|
|
gf_vect_dot_prod_dispatch_init: |
|
|
|
gf_vect_dot_prod_dispatch_init: |
|
|
@ -346,17 +288,17 @@ gf_vect_dot_prod_dispatch_init: |
|
|
|
push arg3 |
|
|
|
push arg3 |
|
|
|
push arg4 |
|
|
|
push arg4 |
|
|
|
push arg5 |
|
|
|
push arg5 |
|
|
|
lea arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default |
|
|
|
lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default |
|
|
|
|
|
|
|
|
|
|
|
mov eax, 1 |
|
|
|
mov eax, 1 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
lea arg3, [GF_VECT_DOT_PROD_SSE WRT_OPT] |
|
|
|
lea arg3, [gf_vect_dot_prod_sse WRT_OPT] |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_1 |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_1 |
|
|
|
cmovne arg1, arg3 |
|
|
|
cmovne arg1, arg3 |
|
|
|
|
|
|
|
|
|
|
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
lea arg3, [GF_VECT_DOT_PROD_AVX WRT_OPT] |
|
|
|
lea arg3, [gf_vect_dot_prod_avx WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
jne _done_gf_vect_dot_prod_init |
|
|
|
jne _done_gf_vect_dot_prod_init |
|
|
|
mov arg1, arg3 |
|
|
|
mov arg1, arg3 |
|
|
@ -366,7 +308,7 @@ gf_vect_dot_prod_dispatch_init: |
|
|
|
mov eax, 7 |
|
|
|
mov eax, 7 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
test ebx, FLAG_CPUID1_EBX_AVX2 |
|
|
|
test ebx, FLAG_CPUID1_EBX_AVX2 |
|
|
|
lea arg3, [GF_VECT_DOT_PROD_AVX2 WRT_OPT] |
|
|
|
lea arg3, [gf_vect_dot_prod_avx2 WRT_OPT] |
|
|
|
cmovne arg1, arg3 |
|
|
|
cmovne arg1, arg3 |
|
|
|
;; Does it have xmm and ymm support |
|
|
|
;; Does it have xmm and ymm support |
|
|
|
xor ecx, ecx |
|
|
|
xor ecx, ecx |
|
|
@ -374,7 +316,7 @@ gf_vect_dot_prod_dispatch_init: |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
je _done_gf_vect_dot_prod_init |
|
|
|
je _done_gf_vect_dot_prod_init |
|
|
|
lea arg1, [GF_VECT_DOT_PROD_SSE WRT_OPT] |
|
|
|
lea arg1, [gf_vect_dot_prod_sse WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
_done_gf_vect_dot_prod_init: |
|
|
|
_done_gf_vect_dot_prod_init: |
|
|
|
pop arg5 |
|
|
|
pop arg5 |
|
|
@ -388,11 +330,11 @@ _done_gf_vect_dot_prod_init: |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
; gf_vect_mad multibinary function |
|
|
|
; gf_vect_mad multibinary function |
|
|
|
;;;; |
|
|
|
;;;; |
|
|
|
global GF_VECT_MAD:function |
|
|
|
global gf_vect_mad:function |
|
|
|
gf_vect_mad_mbinit: |
|
|
|
gf_vect_mad_mbinit: |
|
|
|
call gf_vect_mad_dispatch_init |
|
|
|
call gf_vect_mad_dispatch_init |
|
|
|
|
|
|
|
|
|
|
|
GF_VECT_MAD: |
|
|
|
gf_vect_mad: |
|
|
|
jmp wrd_sz [gf_vect_mad_dispatched] |
|
|
|
jmp wrd_sz [gf_vect_mad_dispatched] |
|
|
|
|
|
|
|
|
|
|
|
gf_vect_mad_dispatch_init: |
|
|
|
gf_vect_mad_dispatch_init: |
|
|
@ -404,17 +346,17 @@ gf_vect_mad_dispatch_init: |
|
|
|
push rbx |
|
|
|
push rbx |
|
|
|
push rcx |
|
|
|
push rcx |
|
|
|
push rdx |
|
|
|
push rdx |
|
|
|
lea arg1, [GF_VECT_MAD_BASE WRT_OPT] ; Default |
|
|
|
lea arg1, [gf_vect_mad_base WRT_OPT] ; Default |
|
|
|
|
|
|
|
|
|
|
|
mov eax, 1 |
|
|
|
mov eax, 1 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
lea rbx, [GF_VECT_MAD_SSE WRT_OPT] |
|
|
|
lea rbx, [gf_vect_mad_sse WRT_OPT] |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_1 |
|
|
|
test ecx, FLAG_CPUID1_ECX_SSE4_1 |
|
|
|
cmovne arg1, rbx |
|
|
|
cmovne arg1, rbx |
|
|
|
|
|
|
|
|
|
|
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) |
|
|
|
lea rbx, [GF_VECT_MAD_AVX WRT_OPT] |
|
|
|
lea rbx, [gf_vect_mad_avx WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
jne _done_gf_vect_mad_init |
|
|
|
jne _done_gf_vect_mad_init |
|
|
|
mov rsi, rbx |
|
|
|
mov rsi, rbx |
|
|
@ -424,7 +366,7 @@ gf_vect_mad_dispatch_init: |
|
|
|
mov eax, 7 |
|
|
|
mov eax, 7 |
|
|
|
cpuid |
|
|
|
cpuid |
|
|
|
test ebx, FLAG_CPUID1_EBX_AVX2 |
|
|
|
test ebx, FLAG_CPUID1_EBX_AVX2 |
|
|
|
lea rbx, [GF_VECT_MAD_AVX2 WRT_OPT] |
|
|
|
lea rbx, [gf_vect_mad_avx2 WRT_OPT] |
|
|
|
cmovne rsi, rbx |
|
|
|
cmovne rsi, rbx |
|
|
|
|
|
|
|
|
|
|
|
;; Does it have xmm and ymm support |
|
|
|
;; Does it have xmm and ymm support |
|
|
@ -433,7 +375,7 @@ gf_vect_mad_dispatch_init: |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
and eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM |
|
|
|
je _done_gf_vect_mad_init |
|
|
|
je _done_gf_vect_mad_init |
|
|
|
lea rsi, [GF_VECT_MAD_SSE WRT_OPT] |
|
|
|
lea rsi, [gf_vect_mad_sse WRT_OPT] |
|
|
|
|
|
|
|
|
|
|
|
_done_gf_vect_mad_init: |
|
|
|
_done_gf_vect_mad_init: |
|
|
|
pop rdx |
|
|
|
pop rdx |
|
|
@ -455,8 +397,8 @@ global %1_slver |
|
|
|
%endmacro |
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
|
|
;;; func core, ver, snum |
|
|
|
;;; func core, ver, snum |
|
|
|
slversion EC_ENCODE_DATA, 00, 03, 0133 |
|
|
|
slversion ec_encode_data, 00, 03, 0133 |
|
|
|
slversion GF_VECT_MUL, 00, 02, 0134 |
|
|
|
slversion gf_vect_mul, 00, 02, 0134 |
|
|
|
slversion EC_ENCODE_DATA_UPDATE, 00, 02, 0212 |
|
|
|
slversion ec_encode_data_update, 00, 02, 0212 |
|
|
|
slversion GF_VECT_DOT_PROD, 00, 02, 0138 |
|
|
|
slversion gf_vect_dot_prod, 00, 02, 0138 |
|
|
|
slversion GF_VECT_MAD, 00, 01, 0213 |
|
|
|
slversion gf_vect_mad, 00, 01, 0213 |
|
|
|