parent
75dddfb2ae
commit
e10934a88e
@ -1,202 +0,0 @@ |
||||
|
||||
Apache License |
||||
Version 2.0, January 2004 |
||||
http://www.apache.org/licenses/ |
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
||||
|
||||
1. Definitions. |
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, |
||||
and distribution as defined by Sections 1 through 9 of this document. |
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by |
||||
the copyright owner that is granting the License. |
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all |
||||
other entities that control, are controlled by, or are under common |
||||
control with that entity. For the purposes of this definition, |
||||
"control" means (i) the power, direct or indirect, to cause the |
||||
direction or management of such entity, whether by contract or |
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
||||
outstanding shares, or (iii) beneficial ownership of such entity. |
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity |
||||
exercising permissions granted by this License. |
||||
|
||||
"Source" form shall mean the preferred form for making modifications, |
||||
including but not limited to software source code, documentation |
||||
source, and configuration files. |
||||
|
||||
"Object" form shall mean any form resulting from mechanical |
||||
transformation or translation of a Source form, including but |
||||
not limited to compiled object code, generated documentation, |
||||
and conversions to other media types. |
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or |
||||
Object form, made available under the License, as indicated by a |
||||
copyright notice that is included in or attached to the work |
||||
(an example is provided in the Appendix below). |
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object |
||||
form, that is based on (or derived from) the Work and for which the |
||||
editorial revisions, annotations, elaborations, or other modifications |
||||
represent, as a whole, an original work of authorship. For the purposes |
||||
of this License, Derivative Works shall not include works that remain |
||||
separable from, or merely link (or bind by name) to the interfaces of, |
||||
the Work and Derivative Works thereof. |
||||
|
||||
"Contribution" shall mean any work of authorship, including |
||||
the original version of the Work and any modifications or additions |
||||
to that Work or Derivative Works thereof, that is intentionally |
||||
submitted to Licensor for inclusion in the Work by the copyright owner |
||||
or by an individual or Legal Entity authorized to submit on behalf of |
||||
the copyright owner. For the purposes of this definition, "submitted" |
||||
means any form of electronic, verbal, or written communication sent |
||||
to the Licensor or its representatives, including but not limited to |
||||
communication on electronic mailing lists, source code control systems, |
||||
and issue tracking systems that are managed by, or on behalf of, the |
||||
Licensor for the purpose of discussing and improving the Work, but |
||||
excluding communication that is conspicuously marked or otherwise |
||||
designated in writing by the copyright owner as "Not a Contribution." |
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity |
||||
on behalf of whom a Contribution has been received by Licensor and |
||||
subsequently incorporated within the Work. |
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of |
||||
this License, each Contributor hereby grants to You a perpetual, |
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||
copyright license to reproduce, prepare Derivative Works of, |
||||
publicly display, publicly perform, sublicense, and distribute the |
||||
Work and such Derivative Works in Source or Object form. |
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of |
||||
this License, each Contributor hereby grants to You a perpetual, |
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||
(except as stated in this section) patent license to make, have made, |
||||
use, offer to sell, sell, import, and otherwise transfer the Work, |
||||
where such license applies only to those patent claims licensable |
||||
by such Contributor that are necessarily infringed by their |
||||
Contribution(s) alone or by combination of their Contribution(s) |
||||
with the Work to which such Contribution(s) was submitted. If You |
||||
institute patent litigation against any entity (including a |
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work |
||||
or a Contribution incorporated within the Work constitutes direct |
||||
or contributory patent infringement, then any patent licenses |
||||
granted to You under this License for that Work shall terminate |
||||
as of the date such litigation is filed. |
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the |
||||
Work or Derivative Works thereof in any medium, with or without |
||||
modifications, and in Source or Object form, provided that You |
||||
meet the following conditions: |
||||
|
||||
(a) You must give any other recipients of the Work or |
||||
Derivative Works a copy of this License; and |
||||
|
||||
(b) You must cause any modified files to carry prominent notices |
||||
stating that You changed the files; and |
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works |
||||
that You distribute, all copyright, patent, trademark, and |
||||
attribution notices from the Source form of the Work, |
||||
excluding those notices that do not pertain to any part of |
||||
the Derivative Works; and |
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its |
||||
distribution, then any Derivative Works that You distribute must |
||||
include a readable copy of the attribution notices contained |
||||
within such NOTICE file, excluding those notices that do not |
||||
pertain to any part of the Derivative Works, in at least one |
||||
of the following places: within a NOTICE text file distributed |
||||
as part of the Derivative Works; within the Source form or |
||||
documentation, if provided along with the Derivative Works; or, |
||||
within a display generated by the Derivative Works, if and |
||||
wherever such third-party notices normally appear. The contents |
||||
of the NOTICE file are for informational purposes only and |
||||
do not modify the License. You may add Your own attribution |
||||
notices within Derivative Works that You distribute, alongside |
||||
or as an addendum to the NOTICE text from the Work, provided |
||||
that such additional attribution notices cannot be construed |
||||
as modifying the License. |
||||
|
||||
You may add Your own copyright statement to Your modifications and |
||||
may provide additional or different license terms and conditions |
||||
for use, reproduction, or distribution of Your modifications, or |
||||
for any such Derivative Works as a whole, provided Your use, |
||||
reproduction, and distribution of the Work otherwise complies with |
||||
the conditions stated in this License. |
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise, |
||||
any Contribution intentionally submitted for inclusion in the Work |
||||
by You to the Licensor shall be under the terms and conditions of |
||||
this License, without any additional terms or conditions. |
||||
Notwithstanding the above, nothing herein shall supersede or modify |
||||
the terms of any separate license agreement you may have executed |
||||
with Licensor regarding such Contributions. |
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade |
||||
names, trademarks, service marks, or product names of the Licensor, |
||||
except as required for reasonable and customary use in describing the |
||||
origin of the Work and reproducing the content of the NOTICE file. |
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or |
||||
agreed to in writing, Licensor provides the Work (and each |
||||
Contributor provides its Contributions) on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
||||
implied, including, without limitation, any warranties or conditions |
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
||||
PARTICULAR PURPOSE. You are solely responsible for determining the |
||||
appropriateness of using or redistributing the Work and assume any |
||||
risks associated with Your exercise of permissions under this License. |
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory, |
||||
whether in tort (including negligence), contract, or otherwise, |
||||
unless required by applicable law (such as deliberate and grossly |
||||
negligent acts) or agreed to in writing, shall any Contributor be |
||||
liable to You for damages, including any direct, indirect, special, |
||||
incidental, or consequential damages of any character arising as a |
||||
result of this License or out of the use or inability to use the |
||||
Work (including but not limited to damages for loss of goodwill, |
||||
work stoppage, computer failure or malfunction, or any and all |
||||
other commercial damages or losses), even if such Contributor |
||||
has been advised of the possibility of such damages. |
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing |
||||
the Work or Derivative Works thereof, You may choose to offer, |
||||
and charge a fee for, acceptance of support, warranty, indemnity, |
||||
or other liability obligations and/or rights consistent with this |
||||
License. However, in accepting such obligations, You may act only |
||||
on Your own behalf and on Your sole responsibility, not on behalf |
||||
of any other Contributor, and only if You agree to indemnify, |
||||
defend, and hold each Contributor harmless for any liability |
||||
incurred by, or claims asserted against, such Contributor by reason |
||||
of your accepting any such warranty or additional liability. |
||||
|
||||
END OF TERMS AND CONDITIONS |
||||
|
||||
APPENDIX: How to apply the Apache License to your work. |
||||
|
||||
To apply the Apache License to your work, attach the following |
||||
boilerplate notice, with the fields enclosed by brackets "[]" |
||||
replaced with your own identifying information. (Don't include |
||||
the brackets!) The text should be enclosed in the appropriate |
||||
comment syntax for the file format. We also recommend that a |
||||
file or class name and description of purpose be included on the |
||||
same "printed page" as the copyright notice for easier |
||||
identification within third-party archives. |
||||
|
||||
Copyright [yyyy] [name of copyright owner] |
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); |
||||
you may not use this file except in compliance with the License. |
||||
You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
@ -1,686 +0,0 @@ |
||||
######################################################################## |
||||
# Implement fast SHA-512 with AVX instructions. (x86_64) |
||||
# |
||||
# Copyright (C) 2013 Intel Corporation. |
||||
# |
||||
# Authors: |
||||
# James Guilford <james.guilford@intel.com>
|
||||
# Kirk Yap <kirk.s.yap@intel.com>
|
||||
# David Cote <david.m.cote@intel.com>
|
||||
# Tim Chen <tim.c.chen@linux.intel.com>
|
||||
# |
||||
# This software is available to you under a choice of one of two |
||||
# licenses. You may choose to be licensed under the terms of the GNU |
||||
# General Public License (GPL) Version 2, available from the file |
||||
# COPYING in the main directory of this source tree, or the |
||||
# OpenIB.org BSD license below: |
||||
# |
||||
# Redistribution and use in source and binary forms, with or |
||||
# without modification, are permitted provided that the following |
||||
# conditions are met: |
||||
# |
||||
# - Redistributions of source code must retain the above |
||||
# copyright notice, this list of conditions and the following |
||||
# disclaimer. |
||||
# |
||||
# - Redistributions in binary form must reproduce the above |
||||
# copyright notice, this list of conditions and the following |
||||
# disclaimer in the documentation and/or other materials |
||||
# provided with the distribution. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
# |
||||
######################################################################## |
||||
# |
||||
# This code is described in an Intel White-Paper: |
||||
# "Fast SHA-512 Implementations on Intel Architecture Processors" |
||||
# |
||||
# To find it, surf to http://www.intel.com/p/en_US/embedded |
||||
# and search for that title. |
||||
# |
||||
######################################################################## |
||||
# Using this part of Minio codebase under the license |
||||
# Apache License Version 2.0 with modifications |
||||
## |
||||
|
||||
#ifdef HAS_AVX |
||||
#ifndef ENTRY |
||||
#define ENTRY(name) \ |
||||
.globl name ; \
|
||||
.align 4,0x90 ; \
|
||||
name: |
||||
#endif |
||||
|
||||
#ifndef END |
||||
#define END(name) \ |
||||
.size name, .-name |
||||
#endif |
||||
|
||||
#ifndef ENDPROC |
||||
#define ENDPROC(name) \ |
||||
.type name, @function ; \
|
||||
END(name) |
||||
#endif |
||||
|
||||
#define NUM_INVALID 100 |
||||
|
||||
#define TYPE_R32 0 |
||||
#define TYPE_R64 1 |
||||
#define TYPE_XMM 2 |
||||
#define TYPE_INVALID 100 |
||||
|
||||
.macro R32_NUM opd r32 |
||||
\opd = NUM_INVALID |
||||
.ifc \r32,%eax |
||||
\opd = 0 |
||||
.endif |
||||
.ifc \r32,%ecx |
||||
\opd = 1 |
||||
.endif |
||||
.ifc \r32,%edx |
||||
\opd = 2 |
||||
.endif |
||||
.ifc \r32,%ebx |
||||
\opd = 3 |
||||
.endif |
||||
.ifc \r32,%esp |
||||
\opd = 4 |
||||
.endif |
||||
.ifc \r32,%ebp |
||||
\opd = 5 |
||||
.endif |
||||
.ifc \r32,%esi |
||||
\opd = 6 |
||||
.endif |
||||
.ifc \r32,%edi |
||||
\opd = 7 |
||||
.endif |
||||
#ifdef X86_64 |
||||
.ifc \r32,%r8d |
||||
\opd = 8 |
||||
.endif |
||||
.ifc \r32,%r9d |
||||
\opd = 9 |
||||
.endif |
||||
.ifc \r32,%r10d |
||||
\opd = 10 |
||||
.endif |
||||
.ifc \r32,%r11d |
||||
\opd = 11 |
||||
.endif |
||||
.ifc \r32,%r12d |
||||
\opd = 12 |
||||
.endif |
||||
.ifc \r32,%r13d |
||||
\opd = 13 |
||||
.endif |
||||
.ifc \r32,%r14d |
||||
\opd = 14 |
||||
.endif |
||||
.ifc \r32,%r15d |
||||
\opd = 15 |
||||
.endif |
||||
#endif |
||||
.endm |
||||
|
||||
.macro R64_NUM opd r64 |
||||
\opd = NUM_INVALID |
||||
#ifdef X86_64 |
||||
.ifc \r64,%rax |
||||
\opd = 0 |
||||
.endif |
||||
.ifc \r64,%rcx |
||||
\opd = 1 |
||||
.endif |
||||
.ifc \r64,%rdx |
||||
\opd = 2 |
||||
.endif |
||||
.ifc \r64,%rbx |
||||
\opd = 3 |
||||
.endif |
||||
.ifc \r64,%rsp |
||||
\opd = 4 |
||||
.endif |
||||
.ifc \r64,%rbp |
||||
\opd = 5 |
||||
.endif |
||||
.ifc \r64,%rsi |
||||
\opd = 6 |
||||
.endif |
||||
.ifc \r64,%rdi |
||||
\opd = 7 |
||||
.endif |
||||
.ifc \r64,%r8 |
||||
\opd = 8 |
||||
.endif |
||||
.ifc \r64,%r9 |
||||
\opd = 9 |
||||
.endif |
||||
.ifc \r64,%r10 |
||||
\opd = 10 |
||||
.endif |
||||
.ifc \r64,%r11 |
||||
\opd = 11 |
||||
.endif |
||||
.ifc \r64,%r12 |
||||
\opd = 12 |
||||
.endif |
||||
.ifc \r64,%r13 |
||||
\opd = 13 |
||||
.endif |
||||
.ifc \r64,%r14 |
||||
\opd = 14 |
||||
.endif |
||||
.ifc \r64,%r15 |
||||
\opd = 15 |
||||
.endif |
||||
#endif |
||||
.endm |
||||
|
||||
.macro XMM_NUM opd xmm |
||||
\opd = NUM_INVALID |
||||
.ifc \xmm,%xmm0 |
||||
\opd = 0 |
||||
.endif |
||||
.ifc \xmm,%xmm1 |
||||
\opd = 1 |
||||
.endif |
||||
.ifc \xmm,%xmm2 |
||||
\opd = 2 |
||||
.endif |
||||
.ifc \xmm,%xmm3 |
||||
\opd = 3 |
||||
.endif |
||||
.ifc \xmm,%xmm4 |
||||
\opd = 4 |
||||
.endif |
||||
.ifc \xmm,%xmm5 |
||||
\opd = 5 |
||||
.endif |
||||
.ifc \xmm,%xmm6 |
||||
\opd = 6 |
||||
.endif |
||||
.ifc \xmm,%xmm7 |
||||
\opd = 7 |
||||
.endif |
||||
.ifc \xmm,%xmm8 |
||||
\opd = 8 |
||||
.endif |
||||
.ifc \xmm,%xmm9 |
||||
\opd = 9 |
||||
.endif |
||||
.ifc \xmm,%xmm10 |
||||
\opd = 10 |
||||
.endif |
||||
.ifc \xmm,%xmm11 |
||||
\opd = 11 |
||||
.endif |
||||
.ifc \xmm,%xmm12 |
||||
\opd = 12 |
||||
.endif |
||||
.ifc \xmm,%xmm13 |
||||
\opd = 13 |
||||
.endif |
||||
.ifc \xmm,%xmm14 |
||||
\opd = 14 |
||||
.endif |
||||
.ifc \xmm,%xmm15 |
||||
\opd = 15 |
||||
.endif |
||||
.endm |
||||
|
||||
.macro TYPE type reg |
||||
R32_NUM reg_type_r32 \reg |
||||
R64_NUM reg_type_r64 \reg |
||||
XMM_NUM reg_type_xmm \reg |
||||
.if reg_type_r64 <> NUM_INVALID |
||||
\type = TYPE_R64 |
||||
.elseif reg_type_r32 <> NUM_INVALID |
||||
\type = TYPE_R32 |
||||
.elseif reg_type_xmm <> NUM_INVALID |
||||
\type = TYPE_XMM |
||||
.else |
||||
\type = TYPE_INVALID |
||||
.endif |
||||
.endm |
||||
|
||||
.macro PFX_OPD_SIZE
|
||||
.byte 0x66
|
||||
.endm |
||||
|
||||
.macro PFX_REX opd1 opd2 W=0 |
||||
.if ((\opd1 | \opd2) & 8) || \W |
||||
.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) |
||||
.endif |
||||
.endm |
||||
|
||||
.macro MODRM mod opd1 opd2 |
||||
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) |
||||
.endm |
||||
|
||||
.macro PSHUFB_XMM xmm1 xmm2 |
||||
XMM_NUM pshufb_opd1 \xmm1 |
||||
XMM_NUM pshufb_opd2 \xmm2 |
||||
PFX_OPD_SIZE |
||||
PFX_REX pshufb_opd1 pshufb_opd2 |
||||
.byte 0x0f, 0x38, 0x00 |
||||
MODRM 0xc0 pshufb_opd1 pshufb_opd2 |
||||
.endm |
||||
|
||||
.macro PCLMULQDQ imm8 xmm1 xmm2 |
||||
XMM_NUM clmul_opd1 \xmm1 |
||||
XMM_NUM clmul_opd2 \xmm2 |
||||
PFX_OPD_SIZE |
||||
PFX_REX clmul_opd1 clmul_opd2 |
||||
.byte 0x0f, 0x3a, 0x44 |
||||
MODRM 0xc0 clmul_opd1 clmul_opd2 |
||||
.byte \imm8 |
||||
.endm |
||||
|
||||
.macro PEXTRD imm8 xmm gpr |
||||
R32_NUM extrd_opd1 \gpr |
||||
XMM_NUM extrd_opd2 \xmm |
||||
PFX_OPD_SIZE |
||||
PFX_REX extrd_opd1 extrd_opd2 |
||||
.byte 0x0f, 0x3a, 0x16 |
||||
MODRM 0xc0 extrd_opd1 extrd_opd2 |
||||
.byte \imm8 |
||||
.endm |
||||
|
||||
.macro MOVQ_R64_XMM opd1 opd2 |
||||
TYPE movq_r64_xmm_opd1_type \opd1 |
||||
.if movq_r64_xmm_opd1_type == TYPE_XMM |
||||
XMM_NUM movq_r64_xmm_opd1 \opd1 |
||||
R64_NUM movq_r64_xmm_opd2 \opd2 |
||||
.else |
||||
R64_NUM movq_r64_xmm_opd1 \opd1 |
||||
XMM_NUM movq_r64_xmm_opd2 \opd2 |
||||
.endif |
||||
PFX_OPD_SIZE |
||||
PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1 |
||||
.if movq_r64_xmm_opd1_type == TYPE_XMM |
||||
.byte 0x0f, 0x7e |
||||
.else |
||||
.byte 0x0f, 0x6e |
||||
.endif |
||||
MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 |
||||
.endm |
||||
|
||||
.text |
||||
|
||||
# Virtual Registers |
||||
# ARG1 |
||||
msg = %rdi |
||||
# ARG2 |
||||
digest = %rsi |
||||
# ARG3 |
||||
msglen = %rdx |
||||
T1 = %rcx |
||||
T2 = %r8 |
||||
a_64 = %r9 |
||||
b_64 = %r10 |
||||
c_64 = %r11 |
||||
d_64 = %r12 |
||||
e_64 = %r13 |
||||
f_64 = %r14 |
||||
g_64 = %r15 |
||||
h_64 = %rbx |
||||
tmp0 = %rax |
||||
|
||||
# Local variables (stack frame) |
||||
|
||||
# Message Schedule |
||||
W_SIZE = 80*8 |
||||
# W[t] + K[t] | W[t+1] + K[t+1] |
||||
WK_SIZE = 2*8 |
||||
RSPSAVE_SIZE = 1*8 |
||||
GPRSAVE_SIZE = 5*8 |
||||
|
||||
frame_W = 0 |
||||
frame_WK = frame_W + W_SIZE |
||||
frame_RSPSAVE = frame_WK + WK_SIZE |
||||
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE |
||||
frame_size = frame_GPRSAVE + GPRSAVE_SIZE |
||||
|
||||
# Useful QWORD "arrays" for simpler memory references |
||||
# MSG, DIGEST, K_t, W_t are arrays |
||||
# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even |
||||
|
||||
# Input message (arg1) |
||||
#define MSG(i) 8*i(msg) |
||||
|
||||
# Output Digest (arg2) |
||||
#define DIGEST(i) 8*i(digest) |
||||
|
||||
# SHA Constants (static mem) |
||||
#define K_t(i) 8*i+K512(%rip) |
||||
|
||||
# Message Schedule (stack frame) |
||||
#define W_t(i) 8*i+frame_W(%rsp) |
||||
|
||||
# W[t]+K[t] (stack frame) |
||||
#define WK_2(i) 8*((i%2))+frame_WK(%rsp) |
||||
|
||||
.macro RotateState
|
||||
# Rotate symbols a..h right |
||||
TMP = h_64 |
||||
h_64 = g_64 |
||||
g_64 = f_64 |
||||
f_64 = e_64 |
||||
e_64 = d_64 |
||||
d_64 = c_64 |
||||
c_64 = b_64 |
||||
b_64 = a_64 |
||||
a_64 = TMP |
||||
.endm |
||||
|
||||
.macro RORQ p1 p2 |
||||
# shld is faster than ror on Sandybridge |
||||
shld $(64-\p2), \p1, \p1 |
||||
.endm |
||||
|
||||
.macro SHA512_Round rnd |
||||
# Compute Round %%t |
||||
mov f_64, T1 # T1 = f |
||||
mov e_64, tmp0 # tmp = e |
||||
xor g_64, T1 # T1 = f ^ g |
||||
RORQ tmp0, 23 # 41 # tmp = e ror 23 |
||||
and e_64, T1 # T1 = (f ^ g) & e |
||||
xor e_64, tmp0 # tmp = (e ror 23) ^ e |
||||
xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g) |
||||
idx = \rnd |
||||
add WK_2(idx), T1 # W[t] + K[t] from message scheduler |
||||
RORQ tmp0, 4 # 18 # tmp = ((e ror 23) ^ e) ror 4 |
||||
xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e |
||||
mov a_64, T2 # T2 = a |
||||
add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h |
||||
RORQ tmp0, 14 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) |
||||
add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e) |
||||
mov a_64, tmp0 # tmp = a |
||||
xor c_64, T2 # T2 = a ^ c |
||||
and c_64, tmp0 # tmp = a & c |
||||
and b_64, T2 # T2 = (a ^ c) & b |
||||
xor tmp0, T2 # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) |
||||
mov a_64, tmp0 # tmp = a |
||||
RORQ tmp0, 5 # 39 # tmp = a ror 5 |
||||
xor a_64, tmp0 # tmp = (a ror 5) ^ a |
||||
add T1, d_64 # e(next_state) = d + T1 |
||||
RORQ tmp0, 6 # 34 # tmp = ((a ror 5) ^ a) ror 6 |
||||
xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a |
||||
lea (T1, T2), h_64 # a(next_state) = T1 + Maj(a,b,c) |
||||
RORQ tmp0, 28 # 28 # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) |
||||
add tmp0, h_64 # a(next_state) = T1 + Maj(a,b,c) S0(a) |
||||
RotateState |
||||
.endm |
||||
|
||||
.macro SHA512_2Sched_2Round_avx rnd |
||||
# Compute rounds t-2 and t-1 |
||||
# Compute message schedule QWORDS t and t+1 |
||||
|
||||
# Two rounds are computed based on the values for K[t-2]+W[t-2] and |
||||
# K[t-1]+W[t-1] which were previously stored at WK_2 by the message |
||||
# scheduler. |
||||
# The two new schedule QWORDS are stored at [W_t(t)] and [W_t(t+1)]. |
||||
# They are then added to their respective SHA512 constants at |
||||
# [K_t(t)] and [K_t(t+1)] and stored at dqword [WK_2(t)] |
||||
# For brievity, the comments following vectored instructions only refer to |
||||
# the first of a pair of QWORDS. |
||||
# Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]} |
||||
# The computation of the message schedule and the rounds are tightly |
||||
# stitched to take advantage of instruction-level parallelism. |
||||
|
||||
idx = \rnd - 2 |
||||
vmovdqa W_t(idx), %xmm4 # XMM4 = W[t-2] |
||||
idx = \rnd - 15 |
||||
vmovdqu W_t(idx), %xmm5 # XMM5 = W[t-15] |
||||
mov f_64, T1 |
||||
vpsrlq $61, %xmm4, %xmm0 # XMM0 = W[t-2]>>61 |
||||
mov e_64, tmp0 |
||||
vpsrlq $1, %xmm5, %xmm6 # XMM6 = W[t-15]>>1 |
||||
xor g_64, T1 |
||||
RORQ tmp0, 23 # 41 |
||||
vpsrlq $19, %xmm4, %xmm1 # XMM1 = W[t-2]>>19 |
||||
and e_64, T1 |
||||
xor e_64, tmp0 |
||||
vpxor %xmm1, %xmm0, %xmm0 # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 |
||||
xor g_64, T1 |
||||
idx = \rnd |
||||
add WK_2(idx), T1# |
||||
vpsrlq $8, %xmm5, %xmm7 # XMM7 = W[t-15]>>8 |
||||
RORQ tmp0, 4 # 18 |
||||
vpsrlq $6, %xmm4, %xmm2 # XMM2 = W[t-2]>>6 |
||||
xor e_64, tmp0 |
||||
mov a_64, T2 |
||||
add h_64, T1 |
||||
vpxor %xmm7, %xmm6, %xmm6 # XMM6 = W[t-15]>>1 ^ W[t-15]>>8 |
||||
RORQ tmp0, 14 # 14 |
||||
add tmp0, T1 |
||||
vpsrlq $7, %xmm5, %xmm8 # XMM8 = W[t-15]>>7 |
||||
mov a_64, tmp0 |
||||
xor c_64, T2 |
||||
vpsllq $(64-61), %xmm4, %xmm3 # XMM3 = W[t-2]<<3 |
||||
and c_64, tmp0 |
||||
and b_64, T2 |
||||
vpxor %xmm3, %xmm2, %xmm2 # XMM2 = W[t-2]>>6 ^ W[t-2]<<3 |
||||
xor tmp0, T2 |
||||
mov a_64, tmp0 |
||||
vpsllq $(64-1), %xmm5, %xmm9 # XMM9 = W[t-15]<<63 |
||||
RORQ tmp0, 5 # 39 |
||||
vpxor %xmm9, %xmm8, %xmm8 # XMM8 = W[t-15]>>7 ^ W[t-15]<<63 |
||||
xor a_64, tmp0 |
||||
add T1, d_64 |
||||
RORQ tmp0, 6 # 34 |
||||
xor a_64, tmp0 |
||||
vpxor %xmm8, %xmm6, %xmm6 # XMM6 = W[t-15]>>1 ^ W[t-15]>>8 ^ |
||||
# W[t-15]>>7 ^ W[t-15]<<63 |
||||
lea (T1, T2), h_64 |
||||
RORQ tmp0, 28 # 28 |
||||
vpsllq $(64-19), %xmm4, %xmm4 # XMM4 = W[t-2]<<25 |
||||
add tmp0, h_64 |
||||
RotateState |
||||
vpxor %xmm4, %xmm0, %xmm0 # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 ^ |
||||
# W[t-2]<<25 |
||||
mov f_64, T1 |
||||
vpxor %xmm2, %xmm0, %xmm0 # XMM0 = s1(W[t-2]) |
||||
mov e_64, tmp0 |
||||
xor g_64, T1 |
||||
idx = \rnd - 16 |
||||
vpaddq W_t(idx), %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + W[t-16] |
||||
idx = \rnd - 7 |
||||
vmovdqu W_t(idx), %xmm1 # XMM1 = W[t-7] |
||||
RORQ tmp0, 23 # 41 |
||||
and e_64, T1 |
||||
xor e_64, tmp0 |
||||
xor g_64, T1 |
||||
vpsllq $(64-8), %xmm5, %xmm5 # XMM5 = W[t-15]<<56 |
||||
idx = \rnd + 1 |
||||
add WK_2(idx), T1 |
||||
vpxor %xmm5, %xmm6, %xmm6 # XMM6 = s0(W[t-15]) |
||||
RORQ tmp0, 4 # 18 |
||||
vpaddq %xmm6, %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + W[t-16] + s0(W[t-15]) |
||||
xor e_64, tmp0 |
||||
vpaddq %xmm1, %xmm0, %xmm0 # XMM0 = W[t] = s1(W[t-2]) + W[t-7] + |
||||
# s0(W[t-15]) + W[t-16] |
||||
mov a_64, T2 |
||||
add h_64, T1 |
||||
RORQ tmp0, 14 # 14 |
||||
add tmp0, T1 |
||||
idx = \rnd |
||||
vmovdqa %xmm0, W_t(idx) # Store W[t] |
||||
vpaddq K_t(idx), %xmm0, %xmm0 # Compute W[t]+K[t] |
||||
vmovdqa %xmm0, WK_2(idx) # Store W[t]+K[t] for next rounds |
||||
mov a_64, tmp0 |
||||
xor c_64, T2 |
||||
and c_64, tmp0 |
||||
and b_64, T2 |
||||
xor tmp0, T2 |
||||
mov a_64, tmp0 |
||||
RORQ tmp0, 5 # 39 |
||||
xor a_64, tmp0 |
||||
add T1, d_64 |
||||
RORQ tmp0, 6 # 34 |
||||
xor a_64, tmp0 |
||||
lea (T1, T2), h_64 |
||||
RORQ tmp0, 28 # 28 |
||||
add tmp0, h_64 |
||||
RotateState |
||||
.endm |
||||
|
||||
######################################################################## |
||||
# void sha512_transform_avx(const void* M, void* D, u64 L) |
||||
# Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
||||
# The size of the message pointed to by M must be an integer multiple of SHA512 |
||||
# message blocks. |
||||
# L is the message length in SHA512 blocks |
||||
######################################################################## |
||||
ENTRY(sha512_transform_avx) |
||||
cmp $0, msglen |
||||
je nowork |
||||
|
||||
# Allocate Stack Space |
||||
mov %rsp, %rax |
||||
sub $frame_size, %rsp |
||||
and $~(0x20 - 1), %rsp |
||||
mov %rax, frame_RSPSAVE(%rsp) |
||||
|
||||
# Save GPRs |
||||
mov %rbx, frame_GPRSAVE(%rsp) |
||||
mov %r12, frame_GPRSAVE +8*1(%rsp) |
||||
mov %r13, frame_GPRSAVE +8*2(%rsp) |
||||
mov %r14, frame_GPRSAVE +8*3(%rsp) |
||||
mov %r15, frame_GPRSAVE +8*4(%rsp) |
||||
|
||||
updateblock: |
||||
|
||||
# Load state variables |
||||
mov DIGEST(0), a_64 |
||||
mov DIGEST(1), b_64 |
||||
mov DIGEST(2), c_64 |
||||
mov DIGEST(3), d_64 |
||||
mov DIGEST(4), e_64 |
||||
mov DIGEST(5), f_64 |
||||
mov DIGEST(6), g_64 |
||||
mov DIGEST(7), h_64 |
||||
|
||||
t = 0 |
||||
.rept 80/2 + 1 |
||||
# (80 rounds) / (2 rounds/iteration) + (1 iteration) |
||||
# +1 iteration because the scheduler leads hashing by 1 iteration |
||||
.if t < 2 |
||||
# BSWAP 2 QWORDS |
||||
vmovdqa XMM_QWORD_BSWAP(%rip), %xmm1 |
||||
vmovdqu MSG(t), %xmm0 |
||||
vpshufb %xmm1, %xmm0, %xmm0 # BSWAP |
||||
vmovdqa %xmm0, W_t(t) # Store Scheduled Pair |
||||
vpaddq K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t] |
||||
vmovdqa %xmm0, WK_2(t) # Store into WK for rounds |
||||
.elseif t < 16 |
||||
# BSWAP 2 QWORDS# Compute 2 Rounds |
||||
vmovdqu MSG(t), %xmm0 |
||||
vpshufb %xmm1, %xmm0, %xmm0 # BSWAP |
||||
SHA512_Round t-2 # Round t-2 |
||||
vmovdqa %xmm0, W_t(t) # Store Scheduled Pair |
||||
vpaddq K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t] |
||||
SHA512_Round t-1 # Round t-1 |
||||
vmovdqa %xmm0, WK_2(t)# Store W[t]+K[t] into WK |
||||
.elseif t < 79 |
||||
# Schedule 2 QWORDS# Compute 2 Rounds |
||||
SHA512_2Sched_2Round_avx t |
||||
.else |
||||
# Compute 2 Rounds |
||||
SHA512_Round t-2 |
||||
SHA512_Round t-1 |
||||
.endif |
||||
t = t+2 |
||||
.endr |
||||
|
||||
# Update digest |
||||
add a_64, DIGEST(0) |
||||
add b_64, DIGEST(1) |
||||
add c_64, DIGEST(2) |
||||
add d_64, DIGEST(3) |
||||
add e_64, DIGEST(4) |
||||
add f_64, DIGEST(5) |
||||
add g_64, DIGEST(6) |
||||
add h_64, DIGEST(7) |
||||
|
||||
# Advance to next message block |
||||
add $16*8, msg |
||||
dec msglen |
||||
jnz updateblock |
||||
|
||||
# Restore GPRs |
||||
mov frame_GPRSAVE(%rsp), %rbx |
||||
mov frame_GPRSAVE +8*1(%rsp), %r12 |
||||
mov frame_GPRSAVE +8*2(%rsp), %r13 |
||||
mov frame_GPRSAVE +8*3(%rsp), %r14 |
||||
mov frame_GPRSAVE +8*4(%rsp), %r15 |
||||
|
||||
# Restore Stack Pointer |
||||
mov frame_RSPSAVE(%rsp), %rsp |
||||
|
||||
nowork: |
||||
ret |
||||
ENDPROC(sha512_transform_avx) |
||||
|
||||
######################################################################## |
||||
### Binary Data |
||||
|
||||
.data |
||||
|
||||
.align 16
|
||||
|
||||
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. |
||||
XMM_QWORD_BSWAP: |
||||
.octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
|
||||
# K[t] used in SHA512 hashing |
||||
K512: |
||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd |
||||
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc |
||||
.quad 0x3956c25bf348b538,0x59f111f1b605d019 |
||||
.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 |
||||
.quad 0xd807aa98a3030242,0x12835b0145706fbe |
||||
.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 |
||||
.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 |
||||
.quad 0x9bdc06a725c71235,0xc19bf174cf692694 |
||||
.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 |
||||
.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 |
||||
.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 |
||||
.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 |
||||
.quad 0x983e5152ee66dfab,0xa831c66d2db43210 |
||||
.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 |
||||
.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 |
||||
.quad 0x06ca6351e003826f,0x142929670a0e6e70 |
||||
.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 |
||||
.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df |
||||
.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 |
||||
.quad 0x81c2c92e47edaee6,0x92722c851482353b |
||||
.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 |
||||
.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 |
||||
.quad 0xd192e819d6ef5218,0xd69906245565a910 |
||||
.quad 0xf40e35855771202a,0x106aa07032bbd1b8 |
||||
.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 |
||||
.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 |
||||
.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb |
||||
.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 |
||||
.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 |
||||
.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec |
||||
.quad 0x90befffa23631e28,0xa4506cebde82bde9 |
||||
.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b |
||||
.quad 0xca273eceea26619c,0xd186b8c721c0c207 |
||||
.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 |
||||
.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 |
||||
.quad 0x113f9804bef90dae,0x1b710b35131c471b |
||||
.quad 0x28db77f523047d84,0x32caab7b40c72493 |
||||
.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c |
||||
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a |
||||
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 |
||||
#endif |
File diff suppressed because it is too large
Load Diff
@ -1,686 +0,0 @@ |
||||
######################################################################## |
||||
# Implement fast SHA-512 with SSSE3 instructions. (x86_64) |
||||
# |
||||
# Copyright (C) 2013 Intel Corporation. |
||||
# |
||||
# Authors: |
||||
# James Guilford <james.guilford@intel.com>
|
||||
# Kirk Yap <kirk.s.yap@intel.com>
|
||||
# David Cote <david.m.cote@intel.com>
|
||||
# Tim Chen <tim.c.chen@linux.intel.com>
|
||||
# |
||||
# This software is available to you under a choice of one of two |
||||
# licenses. You may choose to be licensed under the terms of the GNU |
||||
# General Public License (GPL) Version 2, available from the file |
||||
# COPYING in the main directory of this source tree, or the |
||||
# OpenIB.org BSD license below: |
||||
# |
||||
# Redistribution and use in source and binary forms, with or |
||||
# without modification, are permitted provided that the following |
||||
# conditions are met: |
||||
# |
||||
# - Redistributions of source code must retain the above |
||||
# copyright notice, this list of conditions and the following |
||||
# disclaimer. |
||||
# |
||||
# - Redistributions in binary form must reproduce the above |
||||
# copyright notice, this list of conditions and the following |
||||
# disclaimer in the documentation and/or other materials |
||||
# provided with the distribution. |
||||
# |
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
||||
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
||||
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
# SOFTWARE. |
||||
# |
||||
######################################################################## |
||||
# |
||||
# This code is described in an Intel White-Paper: |
||||
# "Fast SHA-512 Implementations on Intel Architecture Processors" |
||||
# |
||||
# To find it, surf to http://www.intel.com/p/en_US/embedded |
||||
# and search for that title. |
||||
# |
||||
######################################################################## |
||||
# Using this part of Minio codebase under the license |
||||
# Apache License Version 2.0 with modifications |
||||
## |
||||
|
||||
#ifdef HAS_SSE41 |
||||
#ifndef ENTRY |
||||
#define ENTRY(name) \ |
||||
.globl name ; \
|
||||
.align 4,0x90 ; \
|
||||
name: |
||||
#endif |
||||
|
||||
#ifndef END |
||||
#define END(name) \ |
||||
.size name, .-name |
||||
#endif |
||||
|
||||
#ifndef ENDPROC |
||||
#define ENDPROC(name) \ |
||||
.type name, @function ; \
|
||||
END(name) |
||||
#endif |
||||
|
||||
#define NUM_INVALID 100 |
||||
|
||||
#define TYPE_R32 0 |
||||
#define TYPE_R64 1 |
||||
#define TYPE_XMM 2 |
||||
#define TYPE_INVALID 100 |
||||
|
||||
.macro R32_NUM opd r32 |
||||
\opd = NUM_INVALID |
||||
.ifc \r32,%eax |
||||
\opd = 0 |
||||
.endif |
||||
.ifc \r32,%ecx |
||||
\opd = 1 |
||||
.endif |
||||
.ifc \r32,%edx |
||||
\opd = 2 |
||||
.endif |
||||
.ifc \r32,%ebx |
||||
\opd = 3 |
||||
.endif |
||||
.ifc \r32,%esp |
||||
\opd = 4 |
||||
.endif |
||||
.ifc \r32,%ebp |
||||
\opd = 5 |
||||
.endif |
||||
.ifc \r32,%esi |
||||
\opd = 6 |
||||
.endif |
||||
.ifc \r32,%edi |
||||
\opd = 7 |
||||
.endif |
||||
#ifdef X86_64 |
||||
.ifc \r32,%r8d |
||||
\opd = 8 |
||||
.endif |
||||
.ifc \r32,%r9d |
||||
\opd = 9 |
||||
.endif |
||||
.ifc \r32,%r10d |
||||
\opd = 10 |
||||
.endif |
||||
.ifc \r32,%r11d |
||||
\opd = 11 |
||||
.endif |
||||
.ifc \r32,%r12d |
||||
\opd = 12 |
||||
.endif |
||||
.ifc \r32,%r13d |
||||
\opd = 13 |
||||
.endif |
||||
.ifc \r32,%r14d |
||||
\opd = 14 |
||||
.endif |
||||
.ifc \r32,%r15d |
||||
\opd = 15 |
||||
.endif |
||||
#endif |
||||
.endm |
||||
|
||||
.macro R64_NUM opd r64 |
||||
\opd = NUM_INVALID |
||||
#ifdef X86_64 |
||||
.ifc \r64,%rax |
||||
\opd = 0 |
||||
.endif |
||||
.ifc \r64,%rcx |
||||
\opd = 1 |
||||
.endif |
||||
.ifc \r64,%rdx |
||||
\opd = 2 |
||||
.endif |
||||
.ifc \r64,%rbx |
||||
\opd = 3 |
||||
.endif |
||||
.ifc \r64,%rsp |
||||
\opd = 4 |
||||
.endif |
||||
.ifc \r64,%rbp |
||||
\opd = 5 |
||||
.endif |
||||
.ifc \r64,%rsi |
||||
\opd = 6 |
||||
.endif |
||||
.ifc \r64,%rdi |
||||
\opd = 7 |
||||
.endif |
||||
.ifc \r64,%r8 |
||||
\opd = 8 |
||||
.endif |
||||
.ifc \r64,%r9 |
||||
\opd = 9 |
||||
.endif |
||||
.ifc \r64,%r10 |
||||
\opd = 10 |
||||
.endif |
||||
.ifc \r64,%r11 |
||||
\opd = 11 |
||||
.endif |
||||
.ifc \r64,%r12 |
||||
\opd = 12 |
||||
.endif |
||||
.ifc \r64,%r13 |
||||
\opd = 13 |
||||
.endif |
||||
.ifc \r64,%r14 |
||||
\opd = 14 |
||||
.endif |
||||
.ifc \r64,%r15 |
||||
\opd = 15 |
||||
.endif |
||||
#endif |
||||
.endm |
||||
|
||||
.macro XMM_NUM opd xmm |
||||
\opd = NUM_INVALID |
||||
.ifc \xmm,%xmm0 |
||||
\opd = 0 |
||||
.endif |
||||
.ifc \xmm,%xmm1 |
||||
\opd = 1 |
||||
.endif |
||||
.ifc \xmm,%xmm2 |
||||
\opd = 2 |
||||
.endif |
||||
.ifc \xmm,%xmm3 |
||||
\opd = 3 |
||||
.endif |
||||
.ifc \xmm,%xmm4 |
||||
\opd = 4 |
||||
.endif |
||||
.ifc \xmm,%xmm5 |
||||
\opd = 5 |
||||
.endif |
||||
.ifc \xmm,%xmm6 |
||||
\opd = 6 |
||||
.endif |
||||
.ifc \xmm,%xmm7 |
||||
\opd = 7 |
||||
.endif |
||||
.ifc \xmm,%xmm8 |
||||
\opd = 8 |
||||
.endif |
||||
.ifc \xmm,%xmm9 |
||||
\opd = 9 |
||||
.endif |
||||
.ifc \xmm,%xmm10 |
||||
\opd = 10 |
||||
.endif |
||||
.ifc \xmm,%xmm11 |
||||
\opd = 11 |
||||
.endif |
||||
.ifc \xmm,%xmm12 |
||||
\opd = 12 |
||||
.endif |
||||
.ifc \xmm,%xmm13 |
||||
\opd = 13 |
||||
.endif |
||||
.ifc \xmm,%xmm14 |
||||
\opd = 14 |
||||
.endif |
||||
.ifc \xmm,%xmm15 |
||||
\opd = 15 |
||||
.endif |
||||
.endm |
||||
|
||||
.macro TYPE type reg |
||||
R32_NUM reg_type_r32 \reg |
||||
R64_NUM reg_type_r64 \reg |
||||
XMM_NUM reg_type_xmm \reg |
||||
.if reg_type_r64 <> NUM_INVALID |
||||
\type = TYPE_R64 |
||||
.elseif reg_type_r32 <> NUM_INVALID |
||||
\type = TYPE_R32 |
||||
.elseif reg_type_xmm <> NUM_INVALID |
||||
\type = TYPE_XMM |
||||
.else |
||||
\type = TYPE_INVALID |
||||
.endif |
||||
.endm |
||||
|
||||
.macro PFX_OPD_SIZE
|
||||
.byte 0x66
|
||||
.endm |
||||
|
||||
.macro PFX_REX opd1 opd2 W=0 |
||||
.if ((\opd1 | \opd2) & 8) || \W |
||||
.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) |
||||
.endif |
||||
.endm |
||||
|
||||
.macro MODRM mod opd1 opd2 |
||||
.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) |
||||
.endm |
||||
|
||||
.macro PSHUFB_XMM xmm1 xmm2 |
||||
XMM_NUM pshufb_opd1 \xmm1 |
||||
XMM_NUM pshufb_opd2 \xmm2 |
||||
PFX_OPD_SIZE |
||||
PFX_REX pshufb_opd1 pshufb_opd2 |
||||
.byte 0x0f, 0x38, 0x00 |
||||
MODRM 0xc0 pshufb_opd1 pshufb_opd2 |
||||
.endm |
||||
|
||||
.macro PCLMULQDQ imm8 xmm1 xmm2 |
||||
XMM_NUM clmul_opd1 \xmm1 |
||||
XMM_NUM clmul_opd2 \xmm2 |
||||
PFX_OPD_SIZE |
||||
PFX_REX clmul_opd1 clmul_opd2 |
||||
.byte 0x0f, 0x3a, 0x44 |
||||
MODRM 0xc0 clmul_opd1 clmul_opd2 |
||||
.byte \imm8 |
||||
.endm |
||||
|
||||
.macro PEXTRD imm8 xmm gpr |
||||
R32_NUM extrd_opd1 \gpr |
||||
XMM_NUM extrd_opd2 \xmm |
||||
PFX_OPD_SIZE |
||||
PFX_REX extrd_opd1 extrd_opd2 |
||||
.byte 0x0f, 0x3a, 0x16 |
||||
MODRM 0xc0 extrd_opd1 extrd_opd2 |
||||
.byte \imm8 |
||||
.endm |
||||
|
||||
.macro MOVQ_R64_XMM opd1 opd2 |
||||
TYPE movq_r64_xmm_opd1_type \opd1 |
||||
.if movq_r64_xmm_opd1_type == TYPE_XMM |
||||
XMM_NUM movq_r64_xmm_opd1 \opd1 |
||||
R64_NUM movq_r64_xmm_opd2 \opd2 |
||||
.else |
||||
R64_NUM movq_r64_xmm_opd1 \opd1 |
||||
XMM_NUM movq_r64_xmm_opd2 \opd2 |
||||
.endif |
||||
PFX_OPD_SIZE |
||||
PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1 |
||||
.if movq_r64_xmm_opd1_type == TYPE_XMM |
||||
.byte 0x0f, 0x7e |
||||
.else |
||||
.byte 0x0f, 0x6e |
||||
.endif |
||||
MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 |
||||
.endm |
||||
|
||||
.text |
||||
|
||||
# Virtual Registers |
||||
# ARG1 |
||||
msg = %rdi |
||||
# ARG2 |
||||
digest = %rsi |
||||
# ARG3 |
||||
msglen = %rdx |
||||
T1 = %rcx |
||||
T2 = %r8 |
||||
a_64 = %r9 |
||||
b_64 = %r10 |
||||
c_64 = %r11 |
||||
d_64 = %r12 |
||||
e_64 = %r13 |
||||
f_64 = %r14 |
||||
g_64 = %r15 |
||||
h_64 = %rbx |
||||
tmp0 = %rax |
||||
|
||||
# Local variables (stack frame) |
||||
|
||||
W_SIZE = 80*8 |
||||
WK_SIZE = 2*8 |
||||
RSPSAVE_SIZE = 1*8 |
||||
GPRSAVE_SIZE = 5*8 |
||||
|
||||
frame_W = 0 |
||||
frame_WK = frame_W + W_SIZE |
||||
frame_RSPSAVE = frame_WK + WK_SIZE |
||||
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE |
||||
frame_size = frame_GPRSAVE + GPRSAVE_SIZE |
||||
|
||||
# Useful QWORD "arrays" for simpler memory references |
||||
# MSG, DIGEST, K_t, W_t are arrays |
||||
# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even |
||||
|
||||
# Input message (arg1) |
||||
#define MSG(i) 8*i(msg) |
||||
|
||||
# Output Digest (arg2) |
||||
#define DIGEST(i) 8*i(digest) |
||||
|
||||
# SHA Constants (static mem) |
||||
#define K_t(i) 8*i+K512(%rip) |
||||
|
||||
# Message Schedule (stack frame) |
||||
#define W_t(i) 8*i+frame_W(%rsp) |
||||
|
||||
# W[t]+K[t] (stack frame) |
||||
#define WK_2(i) 8*((i%2))+frame_WK(%rsp) |
||||
|
||||
.macro RotateState
|
||||
# Rotate symbols a..h right |
||||
TMP = h_64 |
||||
h_64 = g_64 |
||||
g_64 = f_64 |
||||
f_64 = e_64 |
||||
e_64 = d_64 |
||||
d_64 = c_64 |
||||
c_64 = b_64 |
||||
b_64 = a_64 |
||||
a_64 = TMP |
||||
.endm |
||||
|
||||
.macro SHA512_Round rnd |
||||
|
||||
# Compute Round %%t |
||||
mov f_64, T1 # T1 = f |
||||
mov e_64, tmp0 # tmp = e |
||||
xor g_64, T1 # T1 = f ^ g |
||||
ror $23, tmp0 # 41 # tmp = e ror 23 |
||||
and e_64, T1 # T1 = (f ^ g) & e |
||||
xor e_64, tmp0 # tmp = (e ror 23) ^ e |
||||
xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g) |
||||
idx = \rnd |
||||
add WK_2(idx), T1 # W[t] + K[t] from message scheduler |
||||
ror $4, tmp0 # 18 # tmp = ((e ror 23) ^ e) ror 4 |
||||
xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e |
||||
mov a_64, T2 # T2 = a |
||||
add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h |
||||
ror $14, tmp0 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) |
||||
add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e) |
||||
mov a_64, tmp0 # tmp = a |
||||
xor c_64, T2 # T2 = a ^ c |
||||
and c_64, tmp0 # tmp = a & c |
||||
and b_64, T2 # T2 = (a ^ c) & b |
||||
xor tmp0, T2 # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) |
||||
mov a_64, tmp0 # tmp = a |
||||
ror $5, tmp0 # 39 # tmp = a ror 5 |
||||
xor a_64, tmp0 # tmp = (a ror 5) ^ a |
||||
add T1, d_64 # e(next_state) = d + T1 |
||||
ror $6, tmp0 # 34 # tmp = ((a ror 5) ^ a) ror 6 |
||||
xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a |
||||
lea (T1, T2), h_64 # a(next_state) = T1 + Maj(a,b,c) |
||||
ror $28, tmp0 # 28 # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) |
||||
add tmp0, h_64 # a(next_state) = T1 + Maj(a,b,c) S0(a) |
||||
RotateState |
||||
.endm |
||||
|
||||
.macro SHA512_2Sched_2Round_sse rnd |
||||
|
||||
# Compute rounds t-2 and t-1 |
||||
# Compute message schedule QWORDS t and t+1 |
||||
|
||||
# Two rounds are computed based on the values for K[t-2]+W[t-2] and |
||||
# K[t-1]+W[t-1] which were previously stored at WK_2 by the message |
||||
# scheduler. |
||||
# The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)]. |
||||
# They are then added to their respective SHA512 constants at |
||||
# [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] |
||||
# For brievity, the comments following vectored instructions only refer to |
||||
# the first of a pair of QWORDS. |
||||
# Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]} |
||||
# The computation of the message schedule and the rounds are tightly |
||||
# stitched to take advantage of instruction-level parallelism. |
||||
# For clarity, integer instructions (for the rounds calculation) are indented |
||||
# by one tab. Vectored instructions (for the message scheduler) are indented |
||||
# by two tabs. |
||||
|
||||
mov f_64, T1 |
||||
idx = \rnd -2 |
||||
movdqa W_t(idx), %xmm2 # XMM2 = W[t-2] |
||||
xor g_64, T1 |
||||
and e_64, T1 |
||||
movdqa %xmm2, %xmm0 # XMM0 = W[t-2] |
||||
xor g_64, T1 |
||||
idx = \rnd |
||||
add WK_2(idx), T1 |
||||
idx = \rnd - 15 |
||||
movdqu W_t(idx), %xmm5 # XMM5 = W[t-15] |
||||
mov e_64, tmp0 |
||||
ror $23, tmp0 # 41 |
||||
movdqa %xmm5, %xmm3 # XMM3 = W[t-15] |
||||
xor e_64, tmp0 |
||||
ror $4, tmp0 # 18 |
||||
psrlq $61-19, %xmm0 # XMM0 = W[t-2] >> 42 |
||||
xor e_64, tmp0 |
||||
ror $14, tmp0 # 14 |
||||
psrlq $(8-7), %xmm3 # XMM3 = W[t-15] >> 1 |
||||
add tmp0, T1 |
||||
add h_64, T1 |
||||
pxor %xmm2, %xmm0 # XMM0 = (W[t-2] >> 42) ^ W[t-2] |
||||
mov a_64, T2 |
||||
xor c_64, T2 |
||||
pxor %xmm5, %xmm3 # XMM3 = (W[t-15] >> 1) ^ W[t-15] |
||||
and b_64, T2 |
||||
mov a_64, tmp0 |
||||
psrlq $(19-6), %xmm0 # XMM0 = ((W[t-2]>>42)^W[t-2])>>13 |
||||
and c_64, tmp0 |
||||
xor tmp0, T2 |
||||
psrlq $(7-1), %xmm3 # XMM3 = ((W[t-15]>>1)^W[t-15])>>6 |
||||
mov a_64, tmp0 |
||||
ror $5, tmp0 # 39 |
||||
pxor %xmm2, %xmm0 # XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2] |
||||
xor a_64, tmp0 |
||||
ror $6, tmp0 # 34 |
||||
pxor %xmm5, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15] |
||||
xor a_64, tmp0 |
||||
ror $28, tmp0 # 28 |
||||
psrlq $6, %xmm0 # XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6 |
||||
add tmp0, T2 |
||||
add T1, d_64 |
||||
psrlq $1, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1 |
||||
lea (T1, T2), h_64 |
||||
RotateState |
||||
movdqa %xmm2, %xmm1 # XMM1 = W[t-2] |
||||
mov f_64, T1 |
||||
xor g_64, T1 |
||||
movdqa %xmm5, %xmm4 # XMM4 = W[t-15] |
||||
and e_64, T1 |
||||
xor g_64, T1 |
||||
psllq $(64-19)-(64-61) , %xmm1 # XMM1 = W[t-2] << 42 |
||||
idx = \rnd + 1 |
||||
add WK_2(idx), T1 |
||||
mov e_64, tmp0 |
||||
psllq $(64-1)-(64-8), %xmm4 # XMM4 = W[t-15] << 7 |
||||
ror $23, tmp0 # 41 |
||||
xor e_64, tmp0 |
||||
pxor %xmm2, %xmm1 # XMM1 = (W[t-2] << 42)^W[t-2] |
||||
ror $4, tmp0 # 18 |
||||
xor e_64, tmp0 |
||||
pxor %xmm5, %xmm4 # XMM4 = (W[t-15]<<7)^W[t-15] |
||||
ror $14, tmp0 # 14 |
||||
add tmp0, T1 |
||||
psllq $(64-61), %xmm1 # XMM1 = ((W[t-2] << 42)^W[t-2])<<3 |
||||
add h_64, T1 |
||||
mov a_64, T2 |
||||
psllq $(64-8), %xmm4 # XMM4 = ((W[t-15]<<7)^W[t-15])<<56 |
||||
xor c_64, T2 |
||||
and b_64, T2 |
||||
pxor %xmm1, %xmm0 # XMM0 = s1(W[t-2]) |
||||
mov a_64, tmp0 |
||||
and c_64, tmp0 |
||||
idx = \rnd - 7 |
||||
movdqu W_t(idx), %xmm1 # XMM1 = W[t-7] |
||||
xor tmp0, T2 |
||||
pxor %xmm4, %xmm3 # XMM3 = s0(W[t-15]) |
||||
mov a_64, tmp0 |
||||
paddq %xmm3, %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15]) |
||||
ror $5, tmp0 # 39 |
||||
idx =\rnd-16 |
||||
paddq W_t(idx), %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16] |
||||
xor a_64, tmp0 |
||||
paddq %xmm1, %xmm0 # XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16] |
||||
ror $6, tmp0 # 34 |
||||
movdqa %xmm0, W_t(\rnd) # Store scheduled qwords |
||||
xor a_64, tmp0 |
||||
paddq K_t(\rnd), %xmm0 # Compute W[t]+K[t] |
||||
ror $28, tmp0 # 28 |
||||
idx = \rnd |
||||
movdqa %xmm0, WK_2(idx) # Store W[t]+K[t] for next rounds |
||||
add tmp0, T2 |
||||
add T1, d_64 |
||||
lea (T1, T2), h_64 |
||||
RotateState |
||||
.endm |
||||
|
||||
######################################################################## |
||||
# void sha512_transform_ssse3(const void* M, void* D, u64 L)# |
||||
# Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
||||
# The size of the message pointed to by M must be an integer multiple of SHA512 |
||||
# message blocks. |
||||
# L is the message length in SHA512 blocks. |
||||
######################################################################## |
||||
ENTRY(sha512_transform_ssse3) |
||||
|
||||
cmp $0, msglen |
||||
je nowork |
||||
|
||||
# Allocate Stack Space |
||||
mov %rsp, %rax |
||||
sub $frame_size, %rsp |
||||
and $~(0x20 - 1), %rsp |
||||
mov %rax, frame_RSPSAVE(%rsp) |
||||
|
||||
# Save GPRs |
||||
mov %rbx, frame_GPRSAVE(%rsp) |
||||
mov %r12, frame_GPRSAVE +8*1(%rsp) |
||||
mov %r13, frame_GPRSAVE +8*2(%rsp) |
||||
mov %r14, frame_GPRSAVE +8*3(%rsp) |
||||
mov %r15, frame_GPRSAVE +8*4(%rsp) |
||||
|
||||
updateblock: |
||||
|
||||
# Load state variables |
||||
mov DIGEST(0), a_64 |
||||
mov DIGEST(1), b_64 |
||||
mov DIGEST(2), c_64 |
||||
mov DIGEST(3), d_64 |
||||
mov DIGEST(4), e_64 |
||||
mov DIGEST(5), f_64 |
||||
mov DIGEST(6), g_64 |
||||
mov DIGEST(7), h_64 |
||||
|
||||
t = 0 |
||||
.rept 80/2 + 1 |
||||
# (80 rounds) / (2 rounds/iteration) + (1 iteration) |
||||
# +1 iteration because the scheduler leads hashing by 1 iteration |
||||
.if t < 2 |
||||
# BSWAP 2 QWORDS |
||||
movdqa XMM_QWORD_BSWAP(%rip), %xmm1 |
||||
movdqu MSG(t), %xmm0 |
||||
pshufb %xmm1, %xmm0 # BSWAP |
||||
movdqa %xmm0, W_t(t) # Store Scheduled Pair |
||||
paddq K_t(t), %xmm0 # Compute W[t]+K[t] |
||||
movdqa %xmm0, WK_2(t) # Store into WK for rounds |
||||
.elseif t < 16 |
||||
# BSWAP 2 QWORDS# Compute 2 Rounds |
||||
movdqu MSG(t), %xmm0 |
||||
pshufb %xmm1, %xmm0 # BSWAP |
||||
SHA512_Round t-2 # Round t-2 |
||||
movdqa %xmm0, W_t(t) # Store Scheduled Pair |
||||
paddq K_t(t), %xmm0 # Compute W[t]+K[t] |
||||
SHA512_Round t-1 # Round t-1 |
||||
movdqa %xmm0, WK_2(t) # Store W[t]+K[t] into WK |
||||
.elseif t < 79 |
||||
# Schedule 2 QWORDS# Compute 2 Rounds |
||||
SHA512_2Sched_2Round_sse t |
||||
.else |
||||
# Compute 2 Rounds |
||||
SHA512_Round t-2 |
||||
SHA512_Round t-1 |
||||
.endif |
||||
t = t+2 |
||||
.endr |
||||
|
||||
# Update digest |
||||
add a_64, DIGEST(0) |
||||
add b_64, DIGEST(1) |
||||
add c_64, DIGEST(2) |
||||
add d_64, DIGEST(3) |
||||
add e_64, DIGEST(4) |
||||
add f_64, DIGEST(5) |
||||
add g_64, DIGEST(6) |
||||
add h_64, DIGEST(7) |
||||
|
||||
# Advance to next message block |
||||
add $16*8, msg |
||||
dec msglen |
||||
jnz updateblock |
||||
|
||||
# Restore GPRs |
||||
mov frame_GPRSAVE(%rsp), %rbx |
||||
mov frame_GPRSAVE +8*1(%rsp), %r12 |
||||
mov frame_GPRSAVE +8*2(%rsp), %r13 |
||||
mov frame_GPRSAVE +8*3(%rsp), %r14 |
||||
mov frame_GPRSAVE +8*4(%rsp), %r15 |
||||
|
||||
# Restore Stack Pointer |
||||
mov frame_RSPSAVE(%rsp), %rsp |
||||
|
||||
nowork: |
||||
ret |
||||
ENDPROC(sha512_transform_ssse3) |
||||
|
||||
######################################################################## |
||||
### Binary Data |
||||
|
||||
.data |
||||
|
||||
.align 16
|
||||
|
||||
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. |
||||
XMM_QWORD_BSWAP: |
||||
.octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
|
||||
# K[t] used in SHA512 hashing |
||||
K512: |
||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd |
||||
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc |
||||
.quad 0x3956c25bf348b538,0x59f111f1b605d019 |
||||
.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 |
||||
.quad 0xd807aa98a3030242,0x12835b0145706fbe |
||||
.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 |
||||
.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 |
||||
.quad 0x9bdc06a725c71235,0xc19bf174cf692694 |
||||
.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 |
||||
.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 |
||||
.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 |
||||
.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 |
||||
.quad 0x983e5152ee66dfab,0xa831c66d2db43210 |
||||
.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 |
||||
.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 |
||||
.quad 0x06ca6351e003826f,0x142929670a0e6e70 |
||||
.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 |
||||
.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df |
||||
.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 |
||||
.quad 0x81c2c92e47edaee6,0x92722c851482353b |
||||
.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 |
||||
.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 |
||||
.quad 0xd192e819d6ef5218,0xd69906245565a910 |
||||
.quad 0xf40e35855771202a,0x106aa07032bbd1b8 |
||||
.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 |
||||
.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 |
||||
.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb |
||||
.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 |
||||
.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 |
||||
.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec |
||||
.quad 0x90befffa23631e28,0xa4506cebde82bde9 |
||||
.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b |
||||
.quad 0xca273eceea26619c,0xd186b8c721c0c207 |
||||
.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 |
||||
.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 |
||||
.quad 0x113f9804bef90dae,0x1b710b35131c471b |
||||
.quad 0x28db77f523047d84,0x32caab7b40c72493 |
||||
.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c |
||||
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a |
||||
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 |
||||
#endif |
@ -1,41 +0,0 @@ |
||||
// +build freebsd darwin windows 386 arm !cgo
|
||||
|
||||
/* |
||||
* Minio Cloud Storage, (C) 2014-2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package sha512 |
||||
|
||||
import ( |
||||
"hash" |
||||
|
||||
"crypto/sha512" |
||||
) |
||||
|
||||
// Size - The size of a SHA512 checksum in bytes.
|
||||
const Size = 64 |
||||
|
||||
// BlockSize - The blocksize of SHA512 in bytes.
|
||||
const BlockSize = 128 |
||||
|
||||
// New returns a new hash.Hash computing SHA512.
|
||||
func New() hash.Hash { |
||||
return sha512.New() |
||||
} |
||||
|
||||
// Sum512 - single caller sha512 helper
|
||||
func Sum512(data []byte) [Size]byte { |
||||
return sha512.Sum512(data) |
||||
} |
@ -1,166 +0,0 @@ |
||||
// +build linux,amd64,cgo
|
||||
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file of
|
||||
// Golang project:
|
||||
// https://github.com/golang/go/blob/master/LICENSE
|
||||
|
||||
// Using this part of Minio codebase under the license
|
||||
// Apache License Version 2.0 with modifications
|
||||
|
||||
// Package sha512 implements the SHA512 hash algorithms as defined
|
||||
// in FIPS 180-2.
|
||||
package sha512 |
||||
|
||||
import ( |
||||
"hash" |
||||
|
||||
"github.com/klauspost/cpuid" |
||||
) |
||||
|
||||
// Size - The size of a SHA512 checksum in bytes.
|
||||
const Size = 64 |
||||
|
||||
// BlockSize - The blocksize of SHA512 in bytes.
|
||||
const BlockSize = 128 |
||||
|
||||
const ( |
||||
chunk = 128 |
||||
init0 = 0x6a09e667f3bcc908 |
||||
init1 = 0xbb67ae8584caa73b |
||||
init2 = 0x3c6ef372fe94f82b |
||||
init3 = 0xa54ff53a5f1d36f1 |
||||
init4 = 0x510e527fade682d1 |
||||
init5 = 0x9b05688c2b3e6c1f |
||||
init6 = 0x1f83d9abfb41bd6b |
||||
init7 = 0x5be0cd19137e2179 |
||||
) |
||||
|
||||
// digest represents the partial evaluation of a checksum.
|
||||
type digest struct { |
||||
h [8]uint64 |
||||
x [chunk]byte |
||||
nx int |
||||
len uint64 |
||||
} |
||||
|
||||
func block(dig *digest, p []byte) { |
||||
switch true { |
||||
case cpuid.CPU.AVX2(): |
||||
blockAVX2(dig, p) |
||||
case cpuid.CPU.AVX(): |
||||
blockAVX(dig, p) |
||||
case cpuid.CPU.SSSE3(): |
||||
blockSSE(dig, p) |
||||
default: |
||||
blockGeneric(dig, p) |
||||
} |
||||
} |
||||
|
||||
// Reset digest to its default value
|
||||
func (d *digest) Reset() { |
||||
d.h[0] = init0 |
||||
d.h[1] = init1 |
||||
d.h[2] = init2 |
||||
d.h[3] = init3 |
||||
d.h[4] = init4 |
||||
d.h[5] = init5 |
||||
d.h[6] = init6 |
||||
d.h[7] = init7 |
||||
d.nx = 0 |
||||
d.len = 0 |
||||
} |
||||
|
||||
// New returns a new hash.Hash computing the SHA512 checksum.
|
||||
func New() hash.Hash { |
||||
d := new(digest) |
||||
d.Reset() |
||||
return d |
||||
} |
||||
|
||||
// Sum512 - single caller sha512 helper
|
||||
func Sum512(data []byte) [Size]byte { |
||||
var d digest |
||||
d.Reset() |
||||
d.Write(data) |
||||
return d.checkSum() |
||||
} |
||||
|
||||
// Return output array byte size
|
||||
func (d *digest) Size() int { return Size } |
||||
|
||||
// Return blockSize
|
||||
func (d *digest) BlockSize() int { return BlockSize } |
||||
|
||||
// Write blocks
|
||||
func (d *digest) Write(p []byte) (nn int, err error) { |
||||
nn = len(p) |
||||
d.len += uint64(nn) |
||||
if d.nx > 0 { |
||||
n := copy(d.x[d.nx:], p) |
||||
d.nx += n |
||||
if d.nx == chunk { |
||||
block(d, d.x[:]) |
||||
d.nx = 0 |
||||
} |
||||
p = p[n:] |
||||
} |
||||
if len(p) >= chunk { |
||||
n := len(p) &^ (chunk - 1) |
||||
block(d, p[:n]) |
||||
p = p[n:] |
||||
} |
||||
if len(p) > 0 { |
||||
d.nx = copy(d.x[:], p) |
||||
} |
||||
return |
||||
} |
||||
|
||||
// Calculate sha512
|
||||
func (d *digest) Sum(in []byte) []byte { |
||||
// Make a copy of d0 so that caller can keep writing and summing.
|
||||
d0 := *d |
||||
hash := d0.checkSum() |
||||
return append(in, hash[:]...) |
||||
} |
||||
|
||||
// internal checksum calculation, returns [Size]byte
|
||||
func (d *digest) checkSum() [Size]byte { |
||||
// Padding. Add a 1 bit and 0 bits until 112 bytes mod 128.
|
||||
len := d.len |
||||
var tmp [128]byte |
||||
tmp[0] = 0x80 |
||||
if len%128 < 112 { |
||||
d.Write(tmp[0 : 112-len%128]) |
||||
} else { |
||||
d.Write(tmp[0 : 128+112-len%128]) |
||||
} |
||||
|
||||
// Length in bits.
|
||||
len <<= 3 |
||||
for i := uint(0); i < 16; i++ { |
||||
tmp[i] = byte(len >> (120 - 8*i)) |
||||
} |
||||
d.Write(tmp[0:16]) |
||||
|
||||
if d.nx != 0 { |
||||
panic("d.nx != 0") |
||||
} |
||||
|
||||
h := d.h[:] |
||||
|
||||
var digest [Size]byte |
||||
for i, s := range h { |
||||
digest[i*8] = byte(s >> 56) |
||||
digest[i*8+1] = byte(s >> 48) |
||||
digest[i*8+2] = byte(s >> 40) |
||||
digest[i*8+3] = byte(s >> 32) |
||||
digest[i*8+4] = byte(s >> 24) |
||||
digest[i*8+5] = byte(s >> 16) |
||||
digest[i*8+6] = byte(s >> 8) |
||||
digest[i*8+7] = byte(s) |
||||
} |
||||
|
||||
return digest |
||||
} |
@ -1,141 +0,0 @@ |
||||
/* |
||||
* Minio Cloud Storage, (C) 2014-2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file of
|
||||
// Golang project:
|
||||
// https://github.com/golang/go/blob/master/LICENSE
|
||||
|
||||
// Using this part of Minio codebase under the license
|
||||
// Apache License Version 2.0 with modifications
|
||||
|
||||
// SHA512 hash algorithm. See FIPS 180-2.
|
||||
|
||||
package sha512 |
||||
|
||||
import ( |
||||
"fmt" |
||||
"io" |
||||
"testing" |
||||
) |
||||
|
||||
type sha512Test struct { |
||||
out string |
||||
in string |
||||
} |
||||
|
||||
var golden = []sha512Test{ |
||||
{"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", ""}, |
||||
{"1f40fc92da241694750979ee6cf582f2d5d7d28e18335de05abc54d0560e0f5302860c652bf08d560252aa5e74210546f369fbbbce8c12cfc7957b2652fe9a75", "a"}, |
||||
{"2d408a0717ec188158278a796c689044361dc6fdde28d6f04973b80896e1823975cdbf12eb63f9e0591328ee235d80e9b5bf1aa6a44f4617ff3caf6400eb172d", "ab"}, |
||||
{"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f", "abc"}, |
||||
{"d8022f2060ad6efd297ab73dcc5355c9b214054b0d1776a136a669d26a7d3b14f73aa0d0ebff19ee333368f0164b6419a96da49e3e481753e7e96b716bdccb6f", "abcd"}, |
||||
{"878ae65a92e86cac011a570d4c30a7eaec442b85ce8eca0c2952b5e3cc0628c2e79d889ad4d5c7c626986d452dd86374b6ffaa7cd8b67665bef2289a5c70b0a1", "abcde"}, |
||||
{"e32ef19623e8ed9d267f657a81944b3d07adbb768518068e88435745564e8d4150a0a703be2a7d88b61e3d390c2bb97e2d4c311fdc69d6b1267f05f59aa920e7", "abcdef"}, |
||||
{"d716a4188569b68ab1b6dfac178e570114cdf0ea3a1cc0e31486c3e41241bc6a76424e8c37ab26f096fc85ef9886c8cb634187f4fddff645fb099f1ff54c6b8c", "abcdefg"}, |
||||
{"a3a8c81bc97c2560010d7389bc88aac974a104e0e2381220c6e084c4dccd1d2d17d4f86db31c2a851dc80e6681d74733c55dcd03dd96f6062cdda12a291ae6ce", "abcdefgh"}, |
||||
{"f22d51d25292ca1d0f68f69aedc7897019308cc9db46efb75a03dd494fc7f126c010e8ade6a00a0c1a5f1b75d81e0ed5a93ce98dc9b833db7839247b1d9c24fe", "abcdefghi"}, |
||||
{"ef6b97321f34b1fea2169a7db9e1960b471aa13302a988087357c520be957ca119c3ba68e6b4982c019ec89de3865ccf6a3cda1fe11e59f98d99f1502c8b9745", "abcdefghij"}, |
||||
{"2210d99af9c8bdecda1b4beff822136753d8342505ddce37f1314e2cdbb488c6016bdaa9bd2ffa513dd5de2e4b50f031393d8ab61f773b0e0130d7381e0f8a1d", "Discard medicine more than two years old."}, |
||||
{"a687a8985b4d8d0a24f115fe272255c6afaf3909225838546159c1ed685c211a203796ae8ecc4c81a5b6315919b3a64f10713da07e341fcdbb08541bf03066ce", "He who has a shady past knows that nice guys finish last."}, |
||||
{"8ddb0392e818b7d585ab22769a50df660d9f6d559cca3afc5691b8ca91b8451374e42bcdabd64589ed7c91d85f626596228a5c8572677eb98bc6b624befb7af8", "I wouldn't marry him with a ten foot pole."}, |
||||
{"26ed8f6ca7f8d44b6a8a54ae39640fa8ad5c673f70ee9ce074ba4ef0d483eea00bab2f61d8695d6b34df9c6c48ae36246362200ed820448bdc03a720366a87c6", "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, |
||||
{"e5a14bf044be69615aade89afcf1ab0389d5fc302a884d403579d1386a2400c089b0dbb387ed0f463f9ee342f8244d5a38cfbc0e819da9529fbff78368c9a982", "The days of the digital watch are numbered. -Tom Stoppard"}, |
||||
{"420a1faa48919e14651bed45725abe0f7a58e0f099424c4e5a49194946e38b46c1f8034b18ef169b2e31050d1648e0b982386595f7df47da4b6fd18e55333015", "Nepal premier won't resign."}, |
||||
{"d926a863beadb20134db07683535c72007b0e695045876254f341ddcccde132a908c5af57baa6a6a9c63e6649bba0c213dc05fadcf9abccea09f23dcfb637fbe", "For every action there is an equal and opposite government program."}, |
||||
{"9a98dd9bb67d0da7bf83da5313dff4fd60a4bac0094f1b05633690ffa7f6d61de9a1d4f8617937d560833a9aaa9ccafe3fd24db418d0e728833545cadd3ad92d", "His money is twice tainted: 'taint yours and 'taint mine."}, |
||||
{"d7fde2d2351efade52f4211d3746a0780a26eec3df9b2ed575368a8a1c09ec452402293a8ea4eceb5a4f60064ea29b13cdd86918cd7a4faf366160b009804107", "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, |
||||
{"b0f35ffa2697359c33a56f5c0cf715c7aeed96da9905ca2698acadb08fbc9e669bf566b6bd5d61a3e86dc22999bcc9f2224e33d1d4f32a228cf9d0349e2db518", "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, |
||||
{"3d2e5f91778c9e66f7e061293aaa8a8fc742dd3b2e4f483772464b1144189b49273e610e5cccd7a81a19ca1fa70f16b10f1a100a4d8c1372336be8484c64b311", "size: a.out: bad magic"}, |
||||
{"b2f68ff58ac015efb1c94c908b0d8c2bf06f491e4de8e6302c49016f7f8a33eac3e959856c7fddbc464de618701338a4b46f76dbfaf9a1e5262b5f40639771c7", "The major problem is with sendmail. -Mark Horton"}, |
||||
{"d8c92db5fdf52cf8215e4df3b4909d29203ff4d00e9ad0b64a6a4e04dec5e74f62e7c35c7fb881bd5de95442123df8f57a489b0ae616bd326f84d10021121c57", "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, |
||||
{"19a9f8dc0a233e464e8566ad3ca9b91e459a7b8c4780985b015776e1bf239a19bc233d0556343e2b0a9bc220900b4ebf4f8bdf89ff8efeaf79602d6849e6f72e", "If the enemy is within range, then so are you."}, |
||||
{"00b4c41f307bde87301cdc5b5ab1ae9a592e8ecbb2021dd7bc4b34e2ace60741cc362560bec566ba35178595a91932b8d5357e2c9cec92d393b0fa7831852476", "It's well we cannot hear the screams/That we create in others' dreams."}, |
||||
{"91eccc3d5375fd026e4d6787874b1dce201cecd8a27dbded5065728cb2d09c58a3d467bb1faf353bf7ba567e005245d5321b55bc344f7c07b91cb6f26c959be7", "You remind me of a TV show, but that's all right: I watch it anyway."}, |
||||
{"fabbbe22180f1f137cfdc9556d2570e775d1ae02a597ded43a72a40f9b485d500043b7be128fb9fcd982b83159a0d99aa855a9e7cc4240c00dc01a9bdf8218d7", "C is as portable as Stonehedge!!"}, |
||||
{"2ecdec235c1fa4fc2a154d8fba1dddb8a72a1ad73838b51d792331d143f8b96a9f6fcb0f34d7caa351fe6d88771c4f105040e0392f06e0621689d33b2f3ba92e", "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, |
||||
{"7ad681f6f96f82f7abfa7ecc0334e8fa16d3dc1cdc45b60b7af43fe4075d2357c0c1d60e98350f1afb1f2fe7a4d7cd2ad55b88e458e06b73c40b437331f5dab4", "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, |
||||
{"833f9248ab4a3b9e5131f745fda1ffd2dd435b30e965957e78291c7ab73605fd1912b0794e5c233ab0a12d205a39778d19b83515d6a47003f19cdee51d98c7e0", "How can you write a big system without C++? -Paul Glick"}, |
||||
} |
||||
|
||||
func TestGolden(t *testing.T) { |
||||
for i := 0; i < len(golden); i++ { |
||||
g := golden[i] |
||||
s := fmt.Sprintf("%x", Sum512([]byte(g.in))) |
||||
if s != g.out { |
||||
t.Fatalf("Sum512 function: sha512(%s) = %s want %s", g.in, s, g.out) |
||||
} |
||||
c := New() |
||||
for j := 0; j < 3; j++ { |
||||
if j < 2 { |
||||
io.WriteString(c, g.in) |
||||
} else { |
||||
io.WriteString(c, g.in[0:len(g.in)/2]) |
||||
c.Sum(nil) |
||||
io.WriteString(c, g.in[len(g.in)/2:]) |
||||
} |
||||
s := fmt.Sprintf("%x", c.Sum(nil)) |
||||
if s != g.out { |
||||
t.Fatalf("sha512[%d](%s) = %s want %s", j, g.in, s, g.out) |
||||
} |
||||
c.Reset() |
||||
} |
||||
} |
||||
} |
||||
|
||||
func TestSize(t *testing.T) { |
||||
c := New() |
||||
if got := c.Size(); got != Size { |
||||
t.Errorf("Size = %d; want %d", got, Size) |
||||
} |
||||
} |
||||
|
||||
func TestBlockSize(t *testing.T) { |
||||
c := New() |
||||
if got := c.BlockSize(); got != BlockSize { |
||||
t.Errorf("BlockSize = %d; want %d", got, BlockSize) |
||||
} |
||||
} |
||||
|
||||
var bench = New() |
||||
var buf = make([]byte, 1024*1024) |
||||
|
||||
func benchmarkSize(b *testing.B, size int) { |
||||
b.SetBytes(int64(size)) |
||||
sum := make([]byte, bench.Size()) |
||||
for i := 0; i < b.N; i++ { |
||||
bench.Reset() |
||||
bench.Write(buf[:size]) |
||||
bench.Sum(sum[:0]) |
||||
} |
||||
} |
||||
|
||||
func BenchmarkHash8Bytes(b *testing.B) { |
||||
benchmarkSize(b, 8) |
||||
} |
||||
|
||||
func BenchmarkHash1K(b *testing.B) { |
||||
benchmarkSize(b, 1024) |
||||
} |
||||
|
||||
func BenchmarkHash8K(b *testing.B) { |
||||
benchmarkSize(b, 8192) |
||||
} |
||||
|
||||
func BenchmarkHash1M(b *testing.B) { |
||||
benchmarkSize(b, 1024*1024) |
||||
} |
@ -1,181 +0,0 @@ |
||||
// +build linux,amd64,cgo
|
||||
|
||||
/* |
||||
* Minio Cloud Storage, (C) 2014-2016 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
// Software block transform are provided by The Go Authors:
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file of
|
||||
// Golang project:
|
||||
// https://github.com/golang/go/blob/master/LICENSE
|
||||
|
||||
package sha512 |
||||
|
||||
// #cgo CFLAGS: -DHAS_SSE41 -DHAS_AVX -DHAS_AVX2
|
||||
// #include <stdint.h>
|
||||
// void sha512_transform_ssse3 (const void* M, void* D, uint64_t L);
|
||||
// void sha512_transform_avx (const void* M, void* D, uint64_t L);
|
||||
// void sha512_transform_rorx (const void* M, void* D, uint64_t L);
|
||||
import "C" |
||||
import ( |
||||
"unsafe" |
||||
) |
||||
|
||||
func blockSSE(dig *digest, p []byte) { |
||||
C.sha512_transform_ssse3(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk)) |
||||
} |
||||
|
||||
func blockAVX(dig *digest, p []byte) { |
||||
C.sha512_transform_avx(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk)) |
||||
} |
||||
|
||||
func blockAVX2(dig *digest, p []byte) { |
||||
C.sha512_transform_rorx(unsafe.Pointer(&p[0]), unsafe.Pointer(&dig.h[0]), (C.uint64_t)(len(p)/chunk)) |
||||
} |
||||
|
||||
func blockGeneric(dig *digest, p []byte) { |
||||
var w [80]uint64 |
||||
h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] |
||||
for len(p) >= chunk { |
||||
for i := 0; i < 16; i++ { |
||||
j := i * 8 |
||||
w[i] = uint64(p[j])<<56 | uint64(p[j+1])<<48 | uint64(p[j+2])<<40 | uint64(p[j+3])<<32 | |
||||
uint64(p[j+4])<<24 | uint64(p[j+5])<<16 | uint64(p[j+6])<<8 | uint64(p[j+7]) |
||||
} |
||||
for i := 16; i < 80; i++ { |
||||
v1 := w[i-2] |
||||
t1 := (v1>>19 | v1<<(64-19)) ^ (v1>>61 | v1<<(64-61)) ^ (v1 >> 6) |
||||
v2 := w[i-15] |
||||
t2 := (v2>>1 | v2<<(64-1)) ^ (v2>>8 | v2<<(64-8)) ^ (v2 >> 7) |
||||
|
||||
w[i] = t1 + w[i-7] + t2 + w[i-16] |
||||
} |
||||
|
||||
a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7 |
||||
|
||||
for i := 0; i < 80; i++ { |
||||
t1 := h + ((e>>14 | e<<(64-14)) ^ (e>>18 | e<<(64-18)) ^ (e>>41 | e<<(64-41))) + ((e & f) ^ (^e & g)) + _K[i] + w[i] |
||||
|
||||
t2 := ((a>>28 | a<<(64-28)) ^ (a>>34 | a<<(64-34)) ^ (a>>39 | a<<(64-39))) + ((a & b) ^ (a & c) ^ (b & c)) |
||||
|
||||
h = g |
||||
g = f |
||||
f = e |
||||
e = d + t1 |
||||
d = c |
||||
c = b |
||||
b = a |
||||
a = t1 + t2 |
||||
} |
||||
|
||||
h0 += a |
||||
h1 += b |
||||
h2 += c |
||||
h3 += d |
||||
h4 += e |
||||
h5 += f |
||||
h6 += g |
||||
h7 += h |
||||
|
||||
p = p[chunk:] |
||||
} |
||||
|
||||
dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7 |
||||
} |
||||
|
||||
var _K = []uint64{ |
||||
0x428a2f98d728ae22, |
||||
0x7137449123ef65cd, |
||||
0xb5c0fbcfec4d3b2f, |
||||
0xe9b5dba58189dbbc, |
||||
0x3956c25bf348b538, |
||||
0x59f111f1b605d019, |
||||
0x923f82a4af194f9b, |
||||
0xab1c5ed5da6d8118, |
||||
0xd807aa98a3030242, |
||||
0x12835b0145706fbe, |
||||
0x243185be4ee4b28c, |
||||
0x550c7dc3d5ffb4e2, |
||||
0x72be5d74f27b896f, |
||||
0x80deb1fe3b1696b1, |
||||
0x9bdc06a725c71235, |
||||
0xc19bf174cf692694, |
||||
0xe49b69c19ef14ad2, |
||||
0xefbe4786384f25e3, |
||||
0x0fc19dc68b8cd5b5, |
||||
0x240ca1cc77ac9c65, |
||||
0x2de92c6f592b0275, |
||||
0x4a7484aa6ea6e483, |
||||
0x5cb0a9dcbd41fbd4, |
||||
0x76f988da831153b5, |
||||
0x983e5152ee66dfab, |
||||
0xa831c66d2db43210, |
||||
0xb00327c898fb213f, |
||||
0xbf597fc7beef0ee4, |
||||
0xc6e00bf33da88fc2, |
||||
0xd5a79147930aa725, |
||||
0x06ca6351e003826f, |
||||
0x142929670a0e6e70, |
||||
0x27b70a8546d22ffc, |
||||
0x2e1b21385c26c926, |
||||
0x4d2c6dfc5ac42aed, |
||||
0x53380d139d95b3df, |
||||
0x650a73548baf63de, |
||||
0x766a0abb3c77b2a8, |
||||
0x81c2c92e47edaee6, |
||||
0x92722c851482353b, |
||||
0xa2bfe8a14cf10364, |
||||
0xa81a664bbc423001, |
||||
0xc24b8b70d0f89791, |
||||
0xc76c51a30654be30, |
||||
0xd192e819d6ef5218, |
||||
0xd69906245565a910, |
||||
0xf40e35855771202a, |
||||
0x106aa07032bbd1b8, |
||||
0x19a4c116b8d2d0c8, |
||||
0x1e376c085141ab53, |
||||
0x2748774cdf8eeb99, |
||||
0x34b0bcb5e19b48a8, |
||||
0x391c0cb3c5c95a63, |
||||
0x4ed8aa4ae3418acb, |
||||
0x5b9cca4f7763e373, |
||||
0x682e6ff3d6b2b8a3, |
||||
0x748f82ee5defb2fc, |
||||
0x78a5636f43172f60, |
||||
0x84c87814a1f0ab72, |
||||
0x8cc702081a6439ec, |
||||
0x90befffa23631e28, |
||||
0xa4506cebde82bde9, |
||||
0xbef9a3f7b2c67915, |
||||
0xc67178f2e372532b, |
||||
0xca273eceea26619c, |
||||
0xd186b8c721c0c207, |
||||
0xeada7dd6cde0eb1e, |
||||
0xf57d4f7fee6ed178, |
||||
0x06f067aa72176fba, |
||||
0x0a637dc5a2c898a6, |
||||
0x113f9804bef90dae, |
||||
0x1b710b35131c471b, |
||||
0x28db77f523047d84, |
||||
0x32caab7b40c72493, |
||||
0x3c9ebe0a15c9bebc, |
||||
0x431d67c49c100d4c, |
||||
0x4cc5d4becb3e42b6, |
||||
0x597f299cfc657e2a, |
||||
0x5fcb6fab3ad6faec, |
||||
0x6c44198c4a475817, |
||||
} |
@ -0,0 +1,23 @@ |
||||
Go implementation of BLAKE2b collision-resistant cryptographic hash function |
||||
created by Jean-Philippe Aumasson, Samuel Neves, Zooko Wilcox-O'Hearn, and |
||||
Christian Winnerlein (https://blake2.net). |
||||
|
||||
INSTALLATION |
||||
|
||||
$ go get github.com/dchest/blake2b |
||||
|
||||
|
||||
DOCUMENTATION |
||||
|
||||
See http://godoc.org/github.com/dchest/blake2b |
||||
|
||||
|
||||
PUBLIC DOMAIN DEDICATION |
||||
|
||||
Written in 2012 by Dmitry Chestnykh. |
||||
|
||||
To the extent possible under law, the author have dedicated all copyright |
||||
and related and neighboring rights to this software to the public domain |
||||
worldwide. This software is distributed without any warranty. |
||||
http://creativecommons.org/publicdomain/zero/1.0/ |
||||
|
@ -0,0 +1,299 @@ |
||||
// Written in 2012 by Dmitry Chestnykh.
|
||||
//
|
||||
// To the extent possible under law, the author have dedicated all copyright
|
||||
// and related and neighboring rights to this software to the public domain
|
||||
// worldwide. This software is distributed without any warranty.
|
||||
// http://creativecommons.org/publicdomain/zero/1.0/
|
||||
|
||||
// Package blake2b implements BLAKE2b cryptographic hash function.
|
||||
package blake2b |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
"errors" |
||||
"hash" |
||||
) |
||||
|
||||
const ( |
||||
BlockSize = 128 // block size of algorithm
|
||||
Size = 64 // maximum digest size
|
||||
SaltSize = 16 // maximum salt size
|
||||
PersonSize = 16 // maximum personalization string size
|
||||
KeySize = 64 // maximum size of key
|
||||
) |
||||
|
||||
type digest struct { |
||||
h [8]uint64 // current chain value
|
||||
t [2]uint64 // message bytes counter
|
||||
f [2]uint64 // finalization flags
|
||||
x [BlockSize]byte // buffer for data not yet compressed
|
||||
nx int // number of bytes in buffer
|
||||
|
||||
ih [8]uint64 // initial chain value (after config)
|
||||
paddedKey [BlockSize]byte // copy of key, padded with zeros
|
||||
isKeyed bool // indicates whether hash was keyed
|
||||
size uint8 // digest size in bytes
|
||||
isLastNode bool // indicates processing of the last node in tree hashing
|
||||
} |
||||
|
||||
// Initialization values.
|
||||
var iv = [8]uint64{ |
||||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, |
||||
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, |
||||
0x510e527fade682d1, 0x9b05688c2b3e6c1f, |
||||
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, |
||||
} |
||||
|
||||
// Config is used to configure hash function parameters and keying.
|
||||
// All parameters are optional.
|
||||
type Config struct { |
||||
Size uint8 // digest size (if zero, default size of 64 bytes is used)
|
||||
Key []byte // key for prefix-MAC
|
||||
Salt []byte // salt (if < 16 bytes, padded with zeros)
|
||||
Person []byte // personalization (if < 16 bytes, padded with zeros)
|
||||
Tree *Tree // parameters for tree hashing
|
||||
} |
||||
|
||||
// Tree represents parameters for tree hashing.
|
||||
type Tree struct { |
||||
Fanout uint8 // fanout
|
||||
MaxDepth uint8 // maximal depth
|
||||
LeafSize uint32 // leaf maximal byte length (0 for unlimited)
|
||||
NodeOffset uint64 // node offset (0 for first, leftmost or leaf)
|
||||
NodeDepth uint8 // node depth (0 for leaves)
|
||||
InnerHashSize uint8 // inner hash byte length
|
||||
IsLastNode bool // indicates processing of the last node of layer
|
||||
} |
||||
|
||||
var ( |
||||
defaultConfig = &Config{Size: Size} |
||||
config256 = &Config{Size: 32} |
||||
) |
||||
|
||||
func verifyConfig(c *Config) error { |
||||
if c.Size > Size { |
||||
return errors.New("digest size is too large") |
||||
} |
||||
if len(c.Key) > KeySize { |
||||
return errors.New("key is too large") |
||||
} |
||||
if len(c.Salt) > SaltSize { |
||||
// Smaller salt is okay: it will be padded with zeros.
|
||||
return errors.New("salt is too large") |
||||
} |
||||
if len(c.Person) > PersonSize { |
||||
// Smaller personalization is okay: it will be padded with zeros.
|
||||
return errors.New("personalization is too large") |
||||
} |
||||
if c.Tree != nil { |
||||
if c.Tree.Fanout == 1 { |
||||
return errors.New("fanout of 1 is not allowed in tree mode") |
||||
} |
||||
if c.Tree.MaxDepth < 2 { |
||||
return errors.New("incorrect tree depth") |
||||
} |
||||
if c.Tree.InnerHashSize < 1 || c.Tree.InnerHashSize > Size { |
||||
return errors.New("incorrect tree inner hash size") |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// New returns a new hash.Hash configured with the given Config.
|
||||
// Config can be nil, in which case the default one is used, calculating 64-byte digest.
|
||||
// Returns non-nil error if Config contains invalid parameters.
|
||||
func New(c *Config) (hash.Hash, error) { |
||||
if c == nil { |
||||
c = defaultConfig |
||||
} else { |
||||
if c.Size == 0 { |
||||
// Set default size if it's zero.
|
||||
c.Size = Size |
||||
} |
||||
if err := verifyConfig(c); err != nil { |
||||
return nil, err |
||||
} |
||||
} |
||||
d := new(digest) |
||||
d.initialize(c) |
||||
return d, nil |
||||
} |
||||
|
||||
// initialize initializes digest with the given
|
||||
// config, which must be non-nil and verified.
|
||||
func (d *digest) initialize(c *Config) { |
||||
// Create parameter block.
|
||||
var p [BlockSize]byte |
||||
p[0] = c.Size |
||||
p[1] = uint8(len(c.Key)) |
||||
if c.Salt != nil { |
||||
copy(p[32:], c.Salt) |
||||
} |
||||
if c.Person != nil { |
||||
copy(p[48:], c.Person) |
||||
} |
||||
if c.Tree != nil { |
||||
p[2] = c.Tree.Fanout |
||||
p[3] = c.Tree.MaxDepth |
||||
binary.LittleEndian.PutUint32(p[4:], c.Tree.LeafSize) |
||||
binary.LittleEndian.PutUint64(p[8:], c.Tree.NodeOffset) |
||||
p[16] = c.Tree.NodeDepth |
||||
p[17] = c.Tree.InnerHashSize |
||||
} else { |
||||
p[2] = 1 |
||||
p[3] = 1 |
||||
} |
||||
// Initialize.
|
||||
d.size = c.Size |
||||
for i := 0; i < 8; i++ { |
||||
d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(p[i*8:]) |
||||
} |
||||
if c.Tree != nil && c.Tree.IsLastNode { |
||||
d.isLastNode = true |
||||
} |
||||
// Process key.
|
||||
if c.Key != nil { |
||||
copy(d.paddedKey[:], c.Key) |
||||
d.Write(d.paddedKey[:]) |
||||
d.isKeyed = true |
||||
} |
||||
// Save a copy of initialized state.
|
||||
copy(d.ih[:], d.h[:]) |
||||
} |
||||
|
||||
// New512 returns a new hash.Hash computing the BLAKE2b 64-byte checksum.
|
||||
func New512() hash.Hash { |
||||
d := new(digest) |
||||
d.initialize(defaultConfig) |
||||
return d |
||||
} |
||||
|
||||
// New256 returns a new hash.Hash computing the BLAKE2b 32-byte checksum.
|
||||
func New256() hash.Hash { |
||||
d := new(digest) |
||||
d.initialize(config256) |
||||
return d |
||||
} |
||||
|
||||
// NewMAC returns a new hash.Hash computing BLAKE2b prefix-
|
||||
// Message Authentication Code of the given size in bytes
|
||||
// (up to 64) with the given key (up to 64 bytes in length).
|
||||
func NewMAC(outBytes uint8, key []byte) hash.Hash { |
||||
d, err := New(&Config{Size: outBytes, Key: key}) |
||||
if err != nil { |
||||
panic(err.Error()) |
||||
} |
||||
return d |
||||
} |
||||
|
||||
// Reset resets the state of digest to the initial state
|
||||
// after configuration and keying.
|
||||
func (d *digest) Reset() { |
||||
copy(d.h[:], d.ih[:]) |
||||
d.t[0] = 0 |
||||
d.t[1] = 0 |
||||
d.f[0] = 0 |
||||
d.f[1] = 0 |
||||
d.nx = 0 |
||||
if d.isKeyed { |
||||
d.Write(d.paddedKey[:]) |
||||
} |
||||
} |
||||
|
||||
// Size returns the digest size in bytes.
|
||||
func (d *digest) Size() int { return int(d.size) } |
||||
|
||||
// BlockSize returns the algorithm block size in bytes.
|
||||
func (d *digest) BlockSize() int { return BlockSize } |
||||
|
||||
func (d *digest) Write(p []byte) (nn int, err error) { |
||||
nn = len(p) |
||||
left := BlockSize - d.nx |
||||
if len(p) > left { |
||||
// Process buffer.
|
||||
copy(d.x[d.nx:], p[:left]) |
||||
p = p[left:] |
||||
blocks(d, d.x[:]) |
||||
d.nx = 0 |
||||
} |
||||
// Process full blocks except for the last one.
|
||||
if len(p) > BlockSize { |
||||
n := len(p) &^ (BlockSize - 1) |
||||
if n == len(p) { |
||||
n -= BlockSize |
||||
} |
||||
blocks(d, p[:n]) |
||||
p = p[n:] |
||||
} |
||||
// Fill buffer.
|
||||
d.nx += copy(d.x[d.nx:], p) |
||||
return |
||||
} |
||||
|
||||
// Sum returns the calculated checksum.
|
||||
func (d0 *digest) Sum(in []byte) []byte { |
||||
// Make a copy of d0 so that caller can keep writing and summing.
|
||||
d := *d0 |
||||
hash := d.checkSum() |
||||
return append(in, hash[:d.size]...) |
||||
} |
||||
|
||||
func (d *digest) checkSum() [Size]byte { |
||||
// Do not create unnecessary copies of the key.
|
||||
if d.isKeyed { |
||||
for i := 0; i < len(d.paddedKey); i++ { |
||||
d.paddedKey[i] = 0 |
||||
} |
||||
} |
||||
|
||||
dec := BlockSize - uint64(d.nx) |
||||
if d.t[0] < dec { |
||||
d.t[1]-- |
||||
} |
||||
d.t[0] -= dec |
||||
|
||||
// Pad buffer with zeros.
|
||||
for i := d.nx; i < len(d.x); i++ { |
||||
d.x[i] = 0 |
||||
} |
||||
// Set last block flag.
|
||||
d.f[0] = 0xffffffffffffffff |
||||
if d.isLastNode { |
||||
d.f[1] = 0xffffffffffffffff |
||||
} |
||||
// Compress last block.
|
||||
blocks(d, d.x[:]) |
||||
|
||||
var out [Size]byte |
||||
j := 0 |
||||
for _, s := range d.h[:(d.size-1)/8+1] { |
||||
out[j+0] = byte(s >> 0) |
||||
out[j+1] = byte(s >> 8) |
||||
out[j+2] = byte(s >> 16) |
||||
out[j+3] = byte(s >> 24) |
||||
out[j+4] = byte(s >> 32) |
||||
out[j+5] = byte(s >> 40) |
||||
out[j+6] = byte(s >> 48) |
||||
out[j+7] = byte(s >> 56) |
||||
j += 8 |
||||
} |
||||
return out |
||||
} |
||||
|
||||
// Sum512 returns a 64-byte BLAKE2b hash of data.
|
||||
func Sum512(data []byte) [64]byte { |
||||
var d digest |
||||
d.initialize(defaultConfig) |
||||
d.Write(data) |
||||
return d.checkSum() |
||||
} |
||||
|
||||
// Sum256 returns a 32-byte BLAKE2b hash of data.
|
||||
func Sum256(data []byte) (out [32]byte) { |
||||
var d digest |
||||
d.initialize(config256) |
||||
d.Write(data) |
||||
sum := d.checkSum() |
||||
copy(out[:], sum[:32]) |
||||
return |
||||
} |
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue