Merge pull request #76 from harshavardhana/pr_out_fast_crc_implementations_ported_from_intel_s_efforts
commit
a2e0a41f7e
@ -0,0 +1,18 @@ |
|||||||
|
// +build linux,amd64
|
||||||
|
|
||||||
|
package cpu |
||||||
|
|
||||||
|
// #include "cpu.h"
|
||||||
|
import "C" |
||||||
|
|
||||||
|
func HasSSE41() int { |
||||||
|
return int(C.has_sse41()) |
||||||
|
} |
||||||
|
|
||||||
|
func HasAVX() int { |
||||||
|
return int(C.has_avx()) |
||||||
|
} |
||||||
|
|
||||||
|
func HasAVX2() int { |
||||||
|
return int(C.has_avx2()) |
||||||
|
} |
@ -0,0 +1,24 @@ |
|||||||
|
/*
|
||||||
|
* Mini Object Storage, (C) 2014 Minio, Inc. |
||||||
|
* |
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
* you may not use this file except in compliance with the License. |
||||||
|
* You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef __CPU_H__ |
||||||
|
#define __CPU_H__ |
||||||
|
|
||||||
|
int has_sse41 (void); |
||||||
|
int has_avx (void); |
||||||
|
int has_avx2 (void); |
||||||
|
|
||||||
|
#endif /* __CPU_H__ */ |
@ -0,0 +1,139 @@ |
|||||||
|
/* |
||||||
|
* Mini Object Storage, (C) 2014 Minio, Inc. |
||||||
|
* |
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License") ;
|
||||||
|
* you may not use this file except in compliance with the License. |
||||||
|
* You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
.file "cpufeatures.c" |
||||||
|
.text |
||||||
|
.type cpuid, @function
|
||||||
|
cpuid: |
||||||
|
.LFB2: |
||||||
|
.cfi_startproc |
||||||
|
pushq %rbp |
||||||
|
.cfi_def_cfa_offset 16
|
||||||
|
.cfi_offset 6, -16 |
||||||
|
movq %rsp, %rbp |
||||||
|
.cfi_def_cfa_register 6
|
||||||
|
pushq %rbx |
||||||
|
.cfi_offset 3, -24 |
||||||
|
movq %rdi, -16(%rbp) |
||||||
|
movl %esi, -20(%rbp) |
||||||
|
movq -16(%rbp), %rax |
||||||
|
leaq 4(%rax), %r10 |
||||||
|
movq -16(%rbp), %rax |
||||||
|
leaq 8(%rax), %r9 |
||||||
|
movq -16(%rbp), %rax |
||||||
|
leaq 12(%rax), %r8 |
||||||
|
movl -20(%rbp), %eax |
||||||
|
movl $0, %edx |
||||||
|
movl %edx, %ecx |
||||||
|
#APP |
||||||
|
# 21 "cpufeatures.c" 1 |
||||||
|
cpuid |
||||||
|
# 0 "" 2 |
||||||
|
#NO_APP |
||||||
|
movl %ebx, %esi |
||||||
|
movl %eax, %edi |
||||||
|
movq -16(%rbp), %rax |
||||||
|
movl %edi, (%rax) |
||||||
|
movl %esi, (%r10) |
||||||
|
movl %ecx, (%r9) |
||||||
|
movl %edx, (%r8) |
||||||
|
popq %rbx |
||||||
|
popq %rbp |
||||||
|
.cfi_def_cfa 7, 8 |
||||||
|
ret |
||||||
|
.cfi_endproc |
||||||
|
.LFE2: |
||||||
|
.size cpuid, .-cpuid |
||||||
|
.globl has_sse41
|
||||||
|
.type has_sse41, @function
|
||||||
|
has_sse41: |
||||||
|
.LFB3: |
||||||
|
.cfi_startproc |
||||||
|
pushq %rbp |
||||||
|
.cfi_def_cfa_offset 16
|
||||||
|
.cfi_offset 6, -16 |
||||||
|
movq %rsp, %rbp |
||||||
|
.cfi_def_cfa_register 6
|
||||||
|
subq $16, %rsp |
||||||
|
leaq -16(%rbp), %rax |
||||||
|
movl $1, %esi |
||||||
|
movq %rax, %rdi |
||||||
|
call cpuid |
||||||
|
movl -8(%rbp), %eax |
||||||
|
andl $524288, %eax |
||||||
|
testl %eax, %eax |
||||||
|
setne %al |
||||||
|
movzbl %al, %eax |
||||||
|
leave |
||||||
|
.cfi_def_cfa 7, 8 |
||||||
|
ret |
||||||
|
.cfi_endproc |
||||||
|
.LFE3: |
||||||
|
.size has_sse41, .-has_sse41 |
||||||
|
.globl has_avx
|
||||||
|
.type has_avx, @function
|
||||||
|
has_avx: |
||||||
|
.LFB4: |
||||||
|
.cfi_startproc |
||||||
|
pushq %rbp |
||||||
|
.cfi_def_cfa_offset 16
|
||||||
|
.cfi_offset 6, -16 |
||||||
|
movq %rsp, %rbp |
||||||
|
.cfi_def_cfa_register 6
|
||||||
|
subq $16, %rsp |
||||||
|
leaq -16(%rbp), %rax |
||||||
|
movl $1, %esi |
||||||
|
movq %rax, %rdi |
||||||
|
call cpuid |
||||||
|
movl -8(%rbp), %eax |
||||||
|
andl $268435456, %eax |
||||||
|
testl %eax, %eax |
||||||
|
setne %al |
||||||
|
movzbl %al, %eax |
||||||
|
leave |
||||||
|
.cfi_def_cfa 7, 8 |
||||||
|
ret |
||||||
|
.cfi_endproc |
||||||
|
.LFE4: |
||||||
|
.size has_avx, .-has_avx |
||||||
|
.globl has_avx2
|
||||||
|
.type has_avx2, @function
|
||||||
|
has_avx2: |
||||||
|
.LFB5: |
||||||
|
.cfi_startproc |
||||||
|
pushq %rbp |
||||||
|
.cfi_def_cfa_offset 16
|
||||||
|
.cfi_offset 6, -16 |
||||||
|
movq %rsp, %rbp |
||||||
|
.cfi_def_cfa_register 6
|
||||||
|
subq $16, %rsp |
||||||
|
leaq -16(%rbp), %rax |
||||||
|
movl $7, %esi |
||||||
|
movq %rax, %rdi |
||||||
|
call cpuid |
||||||
|
movl -12(%rbp), %eax |
||||||
|
andl $32, %eax |
||||||
|
testl %eax, %eax |
||||||
|
setne %al |
||||||
|
movzbl %al, %eax |
||||||
|
leave |
||||||
|
.cfi_def_cfa 7, 8 |
||||||
|
ret |
||||||
|
.cfi_endproc |
||||||
|
.LFE5: |
||||||
|
.size has_avx2, .-has_avx2 |
||||||
|
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2" |
||||||
|
.section .note.GNU-stack,"",@progbits
|
@ -0,0 +1,27 @@ |
|||||||
|
package cpu |
||||||
|
|
||||||
|
import ( |
||||||
|
. "gopkg.in/check.v1" |
||||||
|
"testing" |
||||||
|
) |
||||||
|
|
||||||
|
func Test(t *testing.T) { TestingT(t) } |
||||||
|
|
||||||
|
type MySuite struct{} |
||||||
|
|
||||||
|
var _ = Suite(&MySuite{}) |
||||||
|
|
||||||
|
func (s *MySuite) TestHasSSE41(c *C) { |
||||||
|
var bool = HasSSE41() |
||||||
|
c.Check(bool, Equals, 1) |
||||||
|
} |
||||||
|
|
||||||
|
func (s *MySuite) TestHasAVX(c *C) { |
||||||
|
var bool = HasAVX() |
||||||
|
c.Check(bool, Equals, 1) |
||||||
|
} |
||||||
|
|
||||||
|
func (s *MySuite) TestHasAVX2(c *C) { |
||||||
|
var bool = HasAVX2() |
||||||
|
c.Check(bool, Equals, 0) |
||||||
|
} |
@ -0,0 +1,23 @@ |
|||||||
|
// +build linux,amd64
|
||||||
|
|
||||||
|
package crc32c |
||||||
|
|
||||||
|
// #include "crc32c.h"
|
||||||
|
import "C" |
||||||
|
import ( |
||||||
|
"errors" |
||||||
|
"unsafe" |
||||||
|
) |
||||||
|
|
||||||
|
func Crc32c(buffer []byte) (uint32, error) { |
||||||
|
var length = len(buffer) |
||||||
|
if length == 0 { |
||||||
|
return 0, errors.New("Invalid input") |
||||||
|
} |
||||||
|
|
||||||
|
var cbuf *C.uint8_t |
||||||
|
cbuf = (*C.uint8_t)(unsafe.Pointer(&buffer[0])) |
||||||
|
crc := C.crc32c_pcl(cbuf, C.int32_t(length), C.uint32_t(0)) |
||||||
|
|
||||||
|
return uint32(crc), nil |
||||||
|
} |
@ -0,0 +1,3 @@ |
|||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
uint32_t crc32c_pcl(uint8_t *buf, int32_t len, uint32_t prev_crc); |
@ -0,0 +1,732 @@ |
|||||||
|
/* |
||||||
|
* Implement fast CRC32C with PCLMULQDQ instructions. (x86_64) |
||||||
|
* |
||||||
|
* The white papers on CRC32C calculations with PCLMULQDQ instruction can be |
||||||
|
* downloaded from: |
||||||
|
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf |
||||||
|
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf |
||||||
|
* |
||||||
|
* Copyright (C) 2012 Intel Corporation. |
||||||
|
* |
||||||
|
* Authors: |
||||||
|
* Wajdi Feghali <wajdi.k.feghali@intel.com>
|
||||||
|
* James Guilford <james.guilford@intel.com>
|
||||||
|
* David Cote <david.m.cote@intel.com>
|
||||||
|
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||||
|
* |
||||||
|
* This software is available to you under a choice of one of two |
||||||
|
* licenses. You may choose to be licensed under the terms of the GNU |
||||||
|
* General Public License (GPL) Version 2, available from the file |
||||||
|
* COPYING in the main directory of this source tree, or the |
||||||
|
* OpenIB.org BSD license below: |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or |
||||||
|
* without modification, are permitted provided that the following |
||||||
|
* conditions are met: |
||||||
|
* |
||||||
|
* - Redistributions of source code must retain the above |
||||||
|
* copyright notice, this list of conditions and the following |
||||||
|
* disclaimer. |
||||||
|
* |
||||||
|
* - Redistributions in binary form must reproduce the above |
||||||
|
* copyright notice, this list of conditions and the following |
||||||
|
* disclaimer in the documentation and/or other materials |
||||||
|
* provided with the distribution. |
||||||
|
* |
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
||||||
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
||||||
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||||||
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||||
|
* SOFTWARE. |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef ASM_NL |
||||||
|
#define ASM_NL ;
|
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef __ALIGN |
||||||
|
#define __ALIGN .align 4,0x90 |
||||||
|
#endif |
||||||
|
|
||||||
|
#define ALIGN __ALIGN |
||||||
|
|
||||||
|
#ifndef ENTRY |
||||||
|
#define ENTRY(name) \ |
||||||
|
.globl name ASM_NL \ |
||||||
|
ALIGN ASM_NL \ |
||||||
|
name: |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef END |
||||||
|
#define END(name) \ |
||||||
|
.size name, .-name |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifndef ENDPROC |
||||||
|
#define ENDPROC(name) \ |
||||||
|
.type name, @function ASM_NL \
|
||||||
|
END(name) |
||||||
|
#endif |
||||||
|
|
||||||
|
#define NUM_INVALID 100 |
||||||
|
|
||||||
|
#define TYPE_R32 0 |
||||||
|
#define TYPE_R64 1 |
||||||
|
#define TYPE_XMM 2 |
||||||
|
#define TYPE_INVALID 100 |
||||||
|
|
||||||
|
.macro R32_NUM opd r32 |
||||||
|
\opd = NUM_INVALID |
||||||
|
.ifc \r32,%eax |
||||||
|
\opd = 0 |
||||||
|
.endif |
||||||
|
.ifc \r32,%ecx |
||||||
|
\opd = 1 |
||||||
|
.endif |
||||||
|
.ifc \r32,%edx |
||||||
|
\opd = 2 |
||||||
|
.endif |
||||||
|
.ifc \r32,%ebx |
||||||
|
\opd = 3 |
||||||
|
.endif |
||||||
|
.ifc \r32,%esp |
||||||
|
\opd = 4 |
||||||
|
.endif |
||||||
|
.ifc \r32,%ebp |
||||||
|
\opd = 5 |
||||||
|
.endif |
||||||
|
.ifc \r32,%esi |
||||||
|
\opd = 6 |
||||||
|
.endif |
||||||
|
.ifc \r32,%edi |
||||||
|
\opd = 7 |
||||||
|
.endif |
||||||
|
#ifdef X86_64 |
||||||
|
.ifc \r32,%r8d |
||||||
|
\opd = 8 |
||||||
|
.endif |
||||||
|
.ifc \r32,%r9d |
||||||
|
\opd = 9 |
||||||
|
.endif |
||||||
|
.ifc \r32,%r10d |
||||||
|
\opd = 10 |
||||||
|
.endif |
||||||
|
.ifc \r32,%r11d |
||||||
|
\opd = 11 |
||||||
|
.endif |
||||||
|
.ifc \r32,%r12d |
||||||
|
\opd = 12 |
||||||
|
.endif |
||||||
|
.ifc \r32,%r13d |
||||||
|
\opd = 13 |
||||||
|
.endif |
||||||
|
.ifc \r32,%r14d |
||||||
|
\opd = 14 |
||||||
|
.endif |
||||||
|
.ifc \r32,%r15d |
||||||
|
\opd = 15 |
||||||
|
.endif |
||||||
|
#endif |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro R64_NUM opd r64 |
||||||
|
\opd = NUM_INVALID |
||||||
|
#ifdef X86_64 |
||||||
|
.ifc \r64,%rax |
||||||
|
\opd = 0 |
||||||
|
.endif |
||||||
|
.ifc \r64,%rcx |
||||||
|
\opd = 1 |
||||||
|
.endif |
||||||
|
.ifc \r64,%rdx |
||||||
|
\opd = 2 |
||||||
|
.endif |
||||||
|
.ifc \r64,%rbx |
||||||
|
\opd = 3 |
||||||
|
.endif |
||||||
|
.ifc \r64,%rsp |
||||||
|
\opd = 4 |
||||||
|
.endif |
||||||
|
.ifc \r64,%rbp |
||||||
|
\opd = 5 |
||||||
|
.endif |
||||||
|
.ifc \r64,%rsi |
||||||
|
\opd = 6 |
||||||
|
.endif |
||||||
|
.ifc \r64,%rdi |
||||||
|
\opd = 7 |
||||||
|
.endif |
||||||
|
.ifc \r64,%r8 |
||||||
|
\opd = 8 |
||||||
|
.endif |
||||||
|
.ifc \r64,%r9 |
||||||
|
\opd = 9 |
||||||
|
.endif |
||||||
|
.ifc \r64,%r10 |
||||||
|
\opd = 10 |
||||||
|
.endif |
||||||
|
.ifc \r64,%r11 |
||||||
|
\opd = 11 |
||||||
|
.endif |
||||||
|
.ifc \r64,%r12 |
||||||
|
\opd = 12 |
||||||
|
.endif |
||||||
|
.ifc \r64,%r13 |
||||||
|
\opd = 13 |
||||||
|
.endif |
||||||
|
.ifc \r64,%r14 |
||||||
|
\opd = 14 |
||||||
|
.endif |
||||||
|
.ifc \r64,%r15 |
||||||
|
\opd = 15 |
||||||
|
.endif |
||||||
|
#endif |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro XMM_NUM opd xmm |
||||||
|
\opd = NUM_INVALID |
||||||
|
.ifc \xmm,%xmm0 |
||||||
|
\opd = 0 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm1 |
||||||
|
\opd = 1 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm2 |
||||||
|
\opd = 2 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm3 |
||||||
|
\opd = 3 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm4 |
||||||
|
\opd = 4 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm5 |
||||||
|
\opd = 5 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm6 |
||||||
|
\opd = 6 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm7 |
||||||
|
\opd = 7 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm8 |
||||||
|
\opd = 8 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm9 |
||||||
|
\opd = 9 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm10 |
||||||
|
\opd = 10 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm11 |
||||||
|
\opd = 11 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm12 |
||||||
|
\opd = 12 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm13 |
||||||
|
\opd = 13 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm14 |
||||||
|
\opd = 14 |
||||||
|
.endif |
||||||
|
.ifc \xmm,%xmm15 |
||||||
|
\opd = 15 |
||||||
|
.endif |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro TYPE type reg |
||||||
|
R32_NUM reg_type_r32 \reg |
||||||
|
R64_NUM reg_type_r64 \reg |
||||||
|
XMM_NUM reg_type_xmm \reg |
||||||
|
.if reg_type_r64 <> NUM_INVALID |
||||||
|
\type = TYPE_R64 |
||||||
|
.elseif reg_type_r32 <> NUM_INVALID |
||||||
|
\type = TYPE_R32 |
||||||
|
.elseif reg_type_xmm <> NUM_INVALID |
||||||
|
\type = TYPE_XMM |
||||||
|
.else |
||||||
|
\type = TYPE_INVALID |
||||||
|
.endif |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro PFX_OPD_SIZE
|
||||||
|
.byte 0x66
|
||||||
|
.endm |
||||||
|
|
||||||
|
.macro PFX_REX opd1 opd2 W=0 |
||||||
|
.if ((\opd1 | \opd2) & 8) || \W |
||||||
|
.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) |
||||||
|
.endif |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro MODRM mod opd1 opd2 |
||||||
|
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro PSHUFB_XMM xmm1 xmm2 |
||||||
|
XMM_NUM pshufb_opd1 \xmm1 |
||||||
|
XMM_NUM pshufb_opd2 \xmm2 |
||||||
|
PFX_OPD_SIZE |
||||||
|
PFX_REX pshufb_opd1 pshufb_opd2 |
||||||
|
.byte 0x0f, 0x38, 0x00 |
||||||
|
MODRM 0xc0 pshufb_opd1 pshufb_opd2 |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro PCLMULQDQ imm8 xmm1 xmm2 |
||||||
|
XMM_NUM clmul_opd1 \xmm1 |
||||||
|
XMM_NUM clmul_opd2 \xmm2 |
||||||
|
PFX_OPD_SIZE |
||||||
|
PFX_REX clmul_opd1 clmul_opd2 |
||||||
|
.byte 0x0f, 0x3a, 0x44 |
||||||
|
MODRM 0xc0 clmul_opd1 clmul_opd2 |
||||||
|
.byte \imm8 |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro PEXTRD imm8 xmm gpr |
||||||
|
R32_NUM extrd_opd1 \gpr |
||||||
|
XMM_NUM extrd_opd2 \xmm |
||||||
|
PFX_OPD_SIZE |
||||||
|
PFX_REX extrd_opd1 extrd_opd2 |
||||||
|
.byte 0x0f, 0x3a, 0x16 |
||||||
|
MODRM 0xc0 extrd_opd1 extrd_opd2 |
||||||
|
.byte \imm8 |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro MOVQ_R64_XMM opd1 opd2 |
||||||
|
TYPE movq_r64_xmm_opd1_type \opd1 |
||||||
|
.if movq_r64_xmm_opd1_type == TYPE_XMM |
||||||
|
XMM_NUM movq_r64_xmm_opd1 \opd1 |
||||||
|
R64_NUM movq_r64_xmm_opd2 \opd2 |
||||||
|
.else |
||||||
|
R64_NUM movq_r64_xmm_opd1 \opd1 |
||||||
|
XMM_NUM movq_r64_xmm_opd2 \opd2 |
||||||
|
.endif |
||||||
|
PFX_OPD_SIZE |
||||||
|
PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1 |
||||||
|
.if movq_r64_xmm_opd1_type == TYPE_XMM |
||||||
|
.byte 0x0f, 0x7e |
||||||
|
.else |
||||||
|
.byte 0x0f, 0x6e |
||||||
|
.endif |
||||||
|
MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 |
||||||
|
.endm |
||||||
|
|
||||||
|
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction |
||||||
|
|
||||||
|
.macro LABEL prefix n |
||||||
|
\prefix\n\(): |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro JMPTBL_ENTRY i |
||||||
|
.word crc_\i - crc_array |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro JNC_LESS_THAN j |
||||||
|
jnc less_than_\j |
||||||
|
.endm |
||||||
|
|
||||||
|
# Define threshold where buffers are considered "small" and routed to more |
||||||
|
# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so |
||||||
|
# SMALL_SIZE can be no larger than 255. |
||||||
|
|
||||||
|
#define SMALL_SIZE 200 |
||||||
|
|
||||||
|
.if (SMALL_SIZE > 255) |
||||||
|
.error "SMALL_ SIZE must be < 256" |
||||||
|
.endif |
||||||
|
|
||||||
|
# unsigned int crc32c_pcl(u8 *buffer, int len, unsigned int crc_init);
|
||||||
|
|
||||||
|
.text |
||||||
|
ENTRY(crc32c_pcl) |
||||||
|
#define bufp %rdi |
||||||
|
#define bufp_dw %edi |
||||||
|
#define bufp_w %di |
||||||
|
#define bufp_b %dil |
||||||
|
#define bufptmp %rcx |
||||||
|
#define block_0 %rcx |
||||||
|
#define block_1 %rdx |
||||||
|
#define block_2 %r11 |
||||||
|
#define len %rsi |
||||||
|
#define len_dw %esi |
||||||
|
#define len_w %si |
||||||
|
#define len_b %sil |
||||||
|
#define crc_init_arg %rdx |
||||||
|
#define tmp %rbx |
||||||
|
#define crc_init %r8 |
||||||
|
#define crc_init_dw %r8d |
||||||
|
#define crc1 %r9 |
||||||
|
#define crc2 %r10 |
||||||
|
|
||||||
|
pushq %rbx |
||||||
|
pushq %rdi |
||||||
|
pushq %rsi |
||||||
|
|
||||||
|
## Move crc_init for Linux to a different |
||||||
|
mov crc_init_arg, crc_init |
||||||
|
|
||||||
|
################################################################ |
||||||
|
## 1) ALIGN: |
||||||
|
################################################################ |
||||||
|
|
||||||
|
mov bufp, bufptmp # rdi = *buf |
||||||
|
neg bufp |
||||||
|
and $7, bufp # calculate the unalignment amount of |
||||||
|
# the address |
||||||
|
je proc_block # Skip if aligned |
||||||
|
|
||||||
|
## If len is less than 8 and we're unaligned, we need to jump |
||||||
|
## to special code to avoid reading beyond the end of the buffer |
||||||
|
cmp $8, len |
||||||
|
jae do_align |
||||||
|
# less_than_8 expects length in upper 3 bits of len_dw |
||||||
|
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] |
||||||
|
shl $32-3+1, len_dw |
||||||
|
jmp less_than_8_post_shl1 |
||||||
|
|
||||||
|
do_align: |
||||||
|
#### Calculate CRC of unaligned bytes of the buffer (if any) |
||||||
|
movq (bufptmp), tmp # load a quadward from the buffer |
||||||
|
add bufp, bufptmp # align buffer pointer for quadword |
||||||
|
# processing |
||||||
|
sub bufp, len # update buffer length |
||||||
|
align_loop: |
||||||
|
crc32b %bl, crc_init_dw # compute crc32 of 1-byte |
||||||
|
shr $8, tmp # get next byte |
||||||
|
dec bufp |
||||||
|
jne align_loop |
||||||
|
|
||||||
|
proc_block: |
||||||
|
|
||||||
|
################################################################ |
||||||
|
## 2) PROCESS BLOCKS: |
||||||
|
################################################################ |
||||||
|
|
||||||
|
## compute num of bytes to be processed |
||||||
|
movq len, tmp # save num bytes in tmp |
||||||
|
|
||||||
|
cmpq $128*24, len |
||||||
|
jae full_block |
||||||
|
|
||||||
|
continue_block: |
||||||
|
cmpq $SMALL_SIZE, len |
||||||
|
jb small |
||||||
|
|
||||||
|
## len < 128*24 |
||||||
|
movq $2731, %rax # 2731 = ceil(2^16 / 24) |
||||||
|
mul len_dw |
||||||
|
shrq $16, %rax |
||||||
|
|
||||||
|
## eax contains floor(bytes / 24) = num 24-byte chunks to do |
||||||
|
|
||||||
|
## process rax 24-byte chunks (128 >= rax >= 0) |
||||||
|
|
||||||
|
## compute end address of each block |
||||||
|
## block 0 (base addr + RAX * 8) |
||||||
|
## block 1 (base addr + RAX * 16) |
||||||
|
## block 2 (base addr + RAX * 24) |
||||||
|
lea (bufptmp, %rax, 8), block_0 |
||||||
|
lea (block_0, %rax, 8), block_1 |
||||||
|
lea (block_1, %rax, 8), block_2 |
||||||
|
|
||||||
|
xor crc1, crc1 |
||||||
|
xor crc2, crc2 |
||||||
|
|
||||||
|
## branch into array |
||||||
|
lea jump_table(%rip), bufp |
||||||
|
movzxw (bufp, %rax, 2), len |
||||||
|
offset=crc_array-jump_table |
||||||
|
lea offset(bufp, len, 1), bufp |
||||||
|
jmp *bufp |
||||||
|
|
||||||
|
################################################################ |
||||||
|
## 2a) PROCESS FULL BLOCKS: |
||||||
|
################################################################ |
||||||
|
full_block: |
||||||
|
movq $128,%rax |
||||||
|
lea 128*8*2(block_0), block_1 |
||||||
|
lea 128*8*3(block_0), block_2 |
||||||
|
add $128*8*1, block_0 |
||||||
|
|
||||||
|
xor crc1,crc1 |
||||||
|
xor crc2,crc2 |
||||||
|
|
||||||
|
# Fall thruogh into top of crc array (crc_128) |
||||||
|
|
||||||
|
################################################################ |
||||||
|
## 3) CRC Array: |
||||||
|
################################################################ |
||||||
|
|
||||||
|
crc_array: |
||||||
|
i=128 |
||||||
|
.rept 128-1 |
||||||
|
.altmacro |
||||||
|
LABEL crc_ %i |
||||||
|
.noaltmacro |
||||||
|
crc32q -i*8(block_0), crc_init |
||||||
|
crc32q -i*8(block_1), crc1 |
||||||
|
crc32q -i*8(block_2), crc2 |
||||||
|
i=(i-1) |
||||||
|
.endr |
||||||
|
|
||||||
|
.altmacro |
||||||
|
LABEL crc_ %i |
||||||
|
.noaltmacro |
||||||
|
crc32q -i*8(block_0), crc_init |
||||||
|
crc32q -i*8(block_1), crc1 |
||||||
|
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
|
||||||
|
|
||||||
|
mov block_2, block_0 |
||||||
|
|
||||||
|
################################################################ |
||||||
|
## 4) Combine three results: |
||||||
|
################################################################ |
||||||
|
|
||||||
|
lea (K_table-8)(%rip), bufp # first entry is for idx 1 |
||||||
|
shlq $3, %rax # rax *= 8 |
||||||
|
pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2 |
||||||
|
leal (%eax,%eax,2), %eax # rax *= 3 (total *24) |
||||||
|
subq %rax, tmp # tmp -= rax*24 |
||||||
|
|
||||||
|
movq crc_init, %xmm1 # CRC for block 1 |
||||||
|
PCLMULQDQ 0x00, %xmm0, %xmm1 # Multiply by K2 |
||||||
|
|
||||||
|
movq crc1, %xmm2 # CRC for block 2 |
||||||
|
PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1 |
||||||
|
|
||||||
|
pxor %xmm2,%xmm1 |
||||||
|
movq %xmm1, %rax |
||||||
|
xor -i*8(block_2), %rax |
||||||
|
mov crc2, crc_init |
||||||
|
crc32 %rax, crc_init |
||||||
|
|
||||||
|
################################################################ |
||||||
|
## 5) Check for end: |
||||||
|
################################################################ |
||||||
|
|
||||||
|
LABEL crc_ 0 |
||||||
|
mov tmp, len |
||||||
|
cmp $128*24, tmp |
||||||
|
jae full_block |
||||||
|
cmp $24, tmp |
||||||
|
jae continue_block |
||||||
|
|
||||||
|
less_than_24: |
||||||
|
shl $32-4, len_dw # less_than_16 expects length |
||||||
|
# in upper 4 bits of len_dw |
||||||
|
jnc less_than_16 |
||||||
|
crc32q (bufptmp), crc_init |
||||||
|
crc32q 8(bufptmp), crc_init |
||||||
|
jz do_return |
||||||
|
add $16, bufptmp |
||||||
|
# len is less than 8 if we got here |
||||||
|
# less_than_8 expects length in upper 3 bits of len_dw |
||||||
|
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] |
||||||
|
shl $2, len_dw |
||||||
|
jmp less_than_8_post_shl1 |
||||||
|
|
||||||
|
####################################################################### |
||||||
|
## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full) |
||||||
|
####################################################################### |
||||||
|
small: |
||||||
|
shl $32-8, len_dw # Prepare len_dw for less_than_256 |
||||||
|
j=256 |
||||||
|
.rept 5 # j = {256, 128, 64, 32, 16} |
||||||
|
.altmacro |
||||||
|
LABEL less_than_ %j # less_than_j: Length should be in |
||||||
|
# upper lg(j) bits of len_dw |
||||||
|
j=(j/2) |
||||||
|
shl $1, len_dw # Get next MSB |
||||||
|
JNC_LESS_THAN %j |
||||||
|
.noaltmacro |
||||||
|
i=0 |
||||||
|
.rept (j/8) |
||||||
|
crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data |
||||||
|
i=i+8 |
||||||
|
.endr |
||||||
|
jz do_return # Return if remaining length is zero |
||||||
|
add $j, bufptmp # Advance buf |
||||||
|
.endr |
||||||
|
|
||||||
|
less_than_8: # Length should be stored in |
||||||
|
# upper 3 bits of len_dw |
||||||
|
shl $1, len_dw |
||||||
|
less_than_8_post_shl1: |
||||||
|
jnc less_than_4 |
||||||
|
crc32l (bufptmp), crc_init_dw # CRC of 4 bytes |
||||||
|
jz do_return # return if remaining data is zero |
||||||
|
add $4, bufptmp |
||||||
|
less_than_4: # Length should be stored in |
||||||
|
# upper 2 bits of len_dw |
||||||
|
shl $1, len_dw |
||||||
|
jnc less_than_2 |
||||||
|
crc32w (bufptmp), crc_init_dw # CRC of 2 bytes |
||||||
|
jz do_return # return if remaining data is zero |
||||||
|
add $2, bufptmp |
||||||
|
less_than_2: # Length should be stored in the MSB |
||||||
|
# of len_dw |
||||||
|
shl $1, len_dw |
||||||
|
jnc less_than_1 |
||||||
|
crc32b (bufptmp), crc_init_dw # CRC of 1 byte |
||||||
|
less_than_1: # Length should be zero |
||||||
|
do_return: |
||||||
|
movq crc_init, %rax |
||||||
|
popq %rsi |
||||||
|
popq %rdi |
||||||
|
popq %rbx |
||||||
|
ret |
||||||
|
|
||||||
|
################################################################ |
||||||
|
## jump table Table is 129 entries x 2 bytes each |
||||||
|
################################################################ |
||||||
|
.align 4
|
||||||
|
jump_table: |
||||||
|
i=0 |
||||||
|
.rept 129
|
||||||
|
.altmacro |
||||||
|
JMPTBL_ENTRY %i |
||||||
|
.noaltmacro |
||||||
|
i=i+1 |
||||||
|
.endr |
||||||
|
|
||||||
|
ENDPROC(crc32c_pcl) |
||||||
|
|
||||||
|
################################################################ |
||||||
|
## PCLMULQDQ tables |
||||||
|
## Table is 128 entries x 2 words (8 bytes) each |
||||||
|
################################################################ |
||||||
|
.section .rotata, "a", %progbits |
||||||
|
.align 8
|
||||||
|
K_table: |
||||||
|
.long 0x493c7d27, 0x00000001 |
||||||
|
.long 0xba4fc28e, 0x493c7d27 |
||||||
|
.long 0xddc0152b, 0xf20c0dfe |
||||||
|
.long 0x9e4addf8, 0xba4fc28e |
||||||
|
.long 0x39d3b296, 0x3da6d0cb |
||||||
|
.long 0x0715ce53, 0xddc0152b |
||||||
|
.long 0x47db8317, 0x1c291d04 |
||||||
|
.long 0x0d3b6092, 0x9e4addf8 |
||||||
|
.long 0xc96cfdc0, 0x740eef02 |
||||||
|
.long 0x878a92a7, 0x39d3b296 |
||||||
|
.long 0xdaece73e, 0x083a6eec |
||||||
|
.long 0xab7aff2a, 0x0715ce53 |
||||||
|
.long 0x2162d385, 0xc49f4f67 |
||||||
|
.long 0x83348832, 0x47db8317 |
||||||
|
.long 0x299847d5, 0x2ad91c30 |
||||||
|
.long 0xb9e02b86, 0x0d3b6092 |
||||||
|
.long 0x18b33a4e, 0x6992cea2 |
||||||
|
.long 0xb6dd949b, 0xc96cfdc0 |
||||||
|
.long 0x78d9ccb7, 0x7e908048 |
||||||
|
.long 0xbac2fd7b, 0x878a92a7 |
||||||
|
.long 0xa60ce07b, 0x1b3d8f29 |
||||||
|
.long 0xce7f39f4, 0xdaece73e |
||||||
|
.long 0x61d82e56, 0xf1d0f55e |
||||||
|
.long 0xd270f1a2, 0xab7aff2a |
||||||
|
.long 0xc619809d, 0xa87ab8a8 |
||||||
|
.long 0x2b3cac5d, 0x2162d385 |
||||||
|
.long 0x65863b64, 0x8462d800 |
||||||
|
.long 0x1b03397f, 0x83348832 |
||||||
|
.long 0xebb883bd, 0x71d111a8 |
||||||
|
.long 0xb3e32c28, 0x299847d5 |
||||||
|
.long 0x064f7f26, 0xffd852c6 |
||||||
|
.long 0xdd7e3b0c, 0xb9e02b86 |
||||||
|
.long 0xf285651c, 0xdcb17aa4 |
||||||
|
.long 0x10746f3c, 0x18b33a4e |
||||||
|
.long 0xc7a68855, 0xf37c5aee |
||||||
|
.long 0x271d9844, 0xb6dd949b |
||||||
|
.long 0x8e766a0c, 0x6051d5a2 |
||||||
|
.long 0x93a5f730, 0x78d9ccb7 |
||||||
|
.long 0x6cb08e5c, 0x18b0d4ff |
||||||
|
.long 0x6b749fb2, 0xbac2fd7b |
||||||
|
.long 0x1393e203, 0x21f3d99c |
||||||
|
.long 0xcec3662e, 0xa60ce07b |
||||||
|
.long 0x96c515bb, 0x8f158014 |
||||||
|
.long 0xe6fc4e6a, 0xce7f39f4 |
||||||
|
.long 0x8227bb8a, 0xa00457f7 |
||||||
|
.long 0xb0cd4768, 0x61d82e56 |
||||||
|
.long 0x39c7ff35, 0x8d6d2c43 |
||||||
|
.long 0xd7a4825c, 0xd270f1a2 |
||||||
|
.long 0x0ab3844b, 0x00ac29cf |
||||||
|
.long 0x0167d312, 0xc619809d |
||||||
|
.long 0xf6076544, 0xe9adf796 |
||||||
|
.long 0x26f6a60a, 0x2b3cac5d |
||||||
|
.long 0xa741c1bf, 0x96638b34 |
||||||
|
.long 0x98d8d9cb, 0x65863b64 |
||||||
|
.long 0x49c3cc9c, 0xe0e9f351 |
||||||
|
.long 0x68bce87a, 0x1b03397f |
||||||
|
.long 0x57a3d037, 0x9af01f2d |
||||||
|
.long 0x6956fc3b, 0xebb883bd |
||||||
|
.long 0x42d98888, 0x2cff42cf |
||||||
|
.long 0x3771e98f, 0xb3e32c28 |
||||||
|
.long 0xb42ae3d9, 0x88f25a3a |
||||||
|
.long 0x2178513a, 0x064f7f26 |
||||||
|
.long 0xe0ac139e, 0x4e36f0b0 |
||||||
|
.long 0x170076fa, 0xdd7e3b0c |
||||||
|
.long 0x444dd413, 0xbd6f81f8 |
||||||
|
.long 0x6f345e45, 0xf285651c |
||||||
|
.long 0x41d17b64, 0x91c9bd4b |
||||||
|
.long 0xff0dba97, 0x10746f3c |
||||||
|
.long 0xa2b73df1, 0x885f087b |
||||||
|
.long 0xf872e54c, 0xc7a68855 |
||||||
|
.long 0x1e41e9fc, 0x4c144932 |
||||||
|
.long 0x86d8e4d2, 0x271d9844 |
||||||
|
.long 0x651bd98b, 0x52148f02 |
||||||
|
.long 0x5bb8f1bc, 0x8e766a0c |
||||||
|
.long 0xa90fd27a, 0xa3c6f37a |
||||||
|
.long 0xb3af077a, 0x93a5f730 |
||||||
|
.long 0x4984d782, 0xd7c0557f |
||||||
|
.long 0xca6ef3ac, 0x6cb08e5c |
||||||
|
.long 0x234e0b26, 0x63ded06a |
||||||
|
.long 0xdd66cbbb, 0x6b749fb2 |
||||||
|
.long 0x4597456a, 0x4d56973c |
||||||
|
.long 0xe9e28eb4, 0x1393e203 |
||||||
|
.long 0x7b3ff57a, 0x9669c9df |
||||||
|
.long 0xc9c8b782, 0xcec3662e |
||||||
|
.long 0x3f70cc6f, 0xe417f38a |
||||||
|
.long 0x93e106a4, 0x96c515bb |
||||||
|
.long 0x62ec6c6d, 0x4b9e0f71 |
||||||
|
.long 0xd813b325, 0xe6fc4e6a |
||||||
|
.long 0x0df04680, 0xd104b8fc |
||||||
|
.long 0x2342001e, 0x8227bb8a |
||||||
|
.long 0x0a2a8d7e, 0x5b397730 |
||||||
|
.long 0x6d9a4957, 0xb0cd4768 |
||||||
|
.long 0xe8b6368b, 0xe78eb416 |
||||||
|
.long 0xd2c3ed1a, 0x39c7ff35 |
||||||
|
.long 0x995a5724, 0x61ff0e01 |
||||||
|
.long 0x9ef68d35, 0xd7a4825c |
||||||
|
.long 0x0c139b31, 0x8d96551c |
||||||
|
.long 0xf2271e60, 0x0ab3844b |
||||||
|
.long 0x0b0bf8ca, 0x0bf80dd2 |
||||||
|
.long 0x2664fd8b, 0x0167d312 |
||||||
|
.long 0xed64812d, 0x8821abed |
||||||
|
.long 0x02ee03b2, 0xf6076544 |
||||||
|
.long 0x8604ae0f, 0x6a45d2b2 |
||||||
|
.long 0x363bd6b3, 0x26f6a60a |
||||||
|
.long 0x135c83fd, 0xd8d26619 |
||||||
|
.long 0x5fabe670, 0xa741c1bf |
||||||
|
.long 0x35ec3279, 0xde87806c |
||||||
|
.long 0x00bcf5f6, 0x98d8d9cb |
||||||
|
.long 0x8ae00689, 0x14338754 |
||||||
|
.long 0x17f27698, 0x49c3cc9c |
||||||
|
.long 0x58ca5f00, 0x5bd2011f |
||||||
|
.long 0xaa7c7ad5, 0x68bce87a |
||||||
|
.long 0xb5cfca28, 0xdd07448e |
||||||
|
.long 0xded288f8, 0x57a3d037 |
||||||
|
.long 0x59f229bc, 0xdde8f5b9 |
||||||
|
.long 0x6d390dec, 0x6956fc3b |
||||||
|
.long 0x37170390, 0xa3e3e02c |
||||||
|
.long 0x6353c1cc, 0x42d98888 |
||||||
|
.long 0xc4584f5c, 0xd73c7bea |
||||||
|
.long 0xf48642e9, 0x3771e98f |
||||||
|
.long 0x531377e2, 0x80ff0093 |
||||||
|
.long 0xdd35bc8d, 0xb42ae3d9 |
||||||
|
.long 0xb25b29f2, 0x8fe4c34d |
||||||
|
.long 0x9a5ede41, 0x2178513a |
||||||
|
.long 0xa563905d, 0xdf99fc11 |
||||||
|
.long 0x45cddf4e, 0xe0ac139e |
||||||
|
.long 0xacfa3103, 0x6c23e841 |
||||||
|
.long 0xa51b6135, 0x170076fa |
@ -0,0 +1,24 @@ |
|||||||
|
package crc32c |
||||||
|
|
||||||
|
import ( |
||||||
|
. "gopkg.in/check.v1" |
||||||
|
"testing" |
||||||
|
) |
||||||
|
|
||||||
|
func Test(t *testing.T) { TestingT(t) } |
||||||
|
|
||||||
|
type MySuite struct{} |
||||||
|
|
||||||
|
var _ = Suite(&MySuite{}) |
||||||
|
|
||||||
|
func (s *MySuite) TestCrc32c(c *C) { |
||||||
|
data_1 := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry") |
||||||
|
crc, err := Crc32c(data_1) |
||||||
|
c.Assert(err, IsNil) |
||||||
|
|
||||||
|
data_2 := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry") |
||||||
|
newcrc, newerr := Crc32c(data_2) |
||||||
|
c.Assert(newerr, IsNil) |
||||||
|
|
||||||
|
c.Assert(crc, Equals, newcrc) |
||||||
|
} |
Loading…
Reference in new issue