mirror of
https://github.com/Ed94/raddebugger.git
synced 2026-06-12 23:31:38 -07:00
update third_party/martins_hash with latest code
This commit is contained in:
committed by
Ryan Fleury
parent
4bc6240ac0
commit
6589cbe374
Vendored
+4
-4
@@ -46,9 +46,9 @@ static inline void md5_finish(md5_ctx* ctx, uint8_t digest[MD5_DIGEST_SIZE]);
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# define MD5_GET32LE(ptr) *((const _UNALIGNED uint32_t*)(ptr))
|
||||
# define MD5_SET32LE(ptr,x) *((_UNALIGNED uint32_t*)(ptr)) = (x)
|
||||
# define MD5_SET64LE(ptr,x) *((_UNALIGNED uint64_t*)(ptr)) = (x)
|
||||
# define MD5_GET32LE(ptr) *((const __unaligned uint32_t*)(ptr))
|
||||
# define MD5_SET32LE(ptr,x) *((__unaligned uint32_t*)(ptr)) = (x)
|
||||
# define MD5_SET64LE(ptr,x) *((__unaligned uint64_t*)(ptr)) = (x)
|
||||
#else
|
||||
# define MD5_GET32LE(ptr) \
|
||||
( \
|
||||
@@ -431,5 +431,5 @@ void md5_finish(md5_ctx* ctx, uint8_t digest[MD5_DIGEST_SIZE])
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang diagnostic pop
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
Vendored
+232
-17
@@ -50,9 +50,9 @@ static inline void sha1_finish(sha1_ctx* ctx, uint8_t digest[SHA1_DIGEST_SIZE]);
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# include <stdlib.h>
|
||||
# define SHA1_GET32BE(ptr) _byteswap_ulong( *((const _UNALIGNED uint32_t*)(ptr)) )
|
||||
# define SHA1_SET32BE(ptr,x) *((_UNALIGNED uint32_t*)(ptr)) = _byteswap_ulong(x)
|
||||
# define SHA1_SET64BE(ptr,x) *((_UNALIGNED uint64_t*)(ptr)) = _byteswap_uint64(x)
|
||||
# define SHA1_GET32BE(ptr) _byteswap_ulong( *((const __unaligned uint32_t*)(ptr)) )
|
||||
# define SHA1_SET32BE(ptr,x) *((__unaligned uint32_t*)(ptr)) = _byteswap_ulong(x)
|
||||
# define SHA1_SET64BE(ptr,x) *((__unaligned uint64_t*)(ptr)) = _byteswap_uint64(x)
|
||||
#else
|
||||
# define SHA1_GET32BE(ptr) \
|
||||
( \
|
||||
@@ -137,36 +137,86 @@ static inline int sha1_cpuid(void)
|
||||
SHA1_TARGET("ssse3,sha")
|
||||
static void sha1_process_shani(uint32_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
const __m128i* buffer = (const __m128i*)block;
|
||||
// in SHA1 each round has two parts:
|
||||
// 1) calculate message schedule dwords in w[i]
|
||||
// 2) do round functions to update a/b/c/d/e state values using w[i]
|
||||
|
||||
// for performing two operations in one:
|
||||
// 1) dwords need to be loaded as big-endian
|
||||
// 2) order of dwords need to be reversed for sha instructions: [0,1,2,3] -> [3,2,1,0]
|
||||
const __m128i bswap = _mm_setr_epi8(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
|
||||
// w[i] in first 16 rounds is just loaded from block bytes, as 32-bit big-endian load
|
||||
|
||||
// for next rounds it is done as:
|
||||
// w[i] = ROL(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16])
|
||||
// where ROL(x) = 32-bit rotate left by 1
|
||||
|
||||
// this means it is possible to keep just the last 16 of w's in circular buffer
|
||||
// and every new w calculated will need to update 1 to 3 previous w's
|
||||
|
||||
// unrolling round calculations by 4 we get:
|
||||
// w[i+0] = ROL(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16])
|
||||
// w[i+1] = ROL(w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15])
|
||||
// w[i+2] = ROL(w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14])
|
||||
// w[i+3] = ROL(w[i+0] ^ w[i-5] ^ w[i-11] ^ w[i-13])
|
||||
|
||||
// now if you store 4 w[..] values in 128-bit SSE register, then
|
||||
// W(i) = ROL( r0 ^ r1 ^ r2 ^ r3 )
|
||||
// with caveat that r0 lane 3 depends on W(i) lane 0
|
||||
|
||||
// [3] [2] [1] [0] // lanes
|
||||
// r0 = [ special, w[i-1], w[i-2], w[i-3] ]
|
||||
// r1 = [ w[i-5], w[i-6], w[i-7], w[i-8] ]
|
||||
// r2 = [ w[i-11], w[i-12], w[i-13], w[i-14] ]
|
||||
// r3 = [ w[i-13], w[i-14], w[i-15], w[i-16] ]
|
||||
|
||||
// in each 4-round i'th step it is possible to incrementally update new W(..) value when
|
||||
// keeping W(i) values in 4 xmm element circular buffer
|
||||
|
||||
// rounds i>0: W(i-1) = r2 ^ r3 = _mm_sha1msg1_epu32(W(i-1), W(i))
|
||||
// rounds i>1: W(i-2) = W(i-2) ^ r1 = _mm_xor_si128 (W(i-2), W(i))
|
||||
// rounds i>2: W(i-3) = ROL(W(i-3) ^ r0) = _mm_sha1msg2_epu32(W(i-3), W(i))
|
||||
// then the new W(i) can be used in round function calculations
|
||||
// _mm_sha1msg2_epu32 correctly handles r0 lane 3 dependency on W(i) lane 0
|
||||
|
||||
// to perform round functions on two SIMD registers with state as:
|
||||
// abcd = [a,b,c,d]
|
||||
// e0 = [e,0,0,0]
|
||||
// use the following code to get next abcd/e0 state 4 rounds at a time:
|
||||
|
||||
// tmp = _mm_sha1nexte_epu32(e0, W(i)) // rotates e0 and adds message dwords
|
||||
// abcd_next = _mm_sha1rnds4_epu32(abcd, tmp, Fn) // with Fn = 0..3 round function selection
|
||||
// e0_next = abcd
|
||||
|
||||
// sha1nexte is not needed on first round, just regular add32(e0, W(i)) should be used
|
||||
// after last round need to do extra rotation, which sha1nexte takes care when adding to last_e0
|
||||
|
||||
#define W(i) w[(i)%4]
|
||||
|
||||
// 4 wide round calculations
|
||||
#define QROUND(i) do { \
|
||||
/* first four rounds loads input message */ \
|
||||
/* first 4 rounds load input block */ \
|
||||
if (i < 4) W(i) = _mm_shuffle_epi8(_mm_loadu_si128(&buffer[i]), bswap); \
|
||||
/* update previous message dwords for next rounds */ \
|
||||
/* update message schedule */ \
|
||||
if (i > 0 && i < 17) W(i-1) = _mm_sha1msg1_epu32(W(i-1), W(i)); \
|
||||
if (i > 1 && i < 18) W(i-2) = _mm_xor_si128(W(i-2), W(i)); \
|
||||
if (i > 1 && i < 18) W(i-2) = _mm_xor_si128 (W(i-2), W(i)); \
|
||||
if (i > 2 && i < 19) W(i-3) = _mm_sha1msg2_epu32(W(i-3), W(i)); \
|
||||
/* calculate E from message dwords */ \
|
||||
if (i == 0) tmp = _mm_add_epi32(e0, W(i)); \
|
||||
/* calculate E plus message schedule */ \
|
||||
if (i == 0) tmp = _mm_add_epi32 (e0, W(i)); \
|
||||
if (i != 0) tmp = _mm_sha1nexte_epu32(e0, W(i)); \
|
||||
/* round function */ \
|
||||
/* 4 round functions */ \
|
||||
e0 = abcd; \
|
||||
abcd = _mm_sha1rnds4_epu32(abcd, tmp, (i/5)%4); \
|
||||
abcd = _mm_sha1rnds4_epu32(abcd, tmp, i/5); \
|
||||
} while(0)
|
||||
|
||||
const __m128i* buffer = (const __m128i*)block;
|
||||
|
||||
// for performing two operations in one:
|
||||
// 1) dwords need to be loaded as big-endian
|
||||
// 2) order of dwords need to be reversed for sha1 instructions: [0,1,2,3] -> [3,2,1,0]
|
||||
const __m128i bswap = _mm_setr_epi8(15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0);
|
||||
|
||||
// load initial state
|
||||
__m128i abcd = _mm_loadu_si128((const __m128i*)state); // [d,c,b,a]
|
||||
__m128i e0 = _mm_loadu_si32(&state[4]); // [0,0,0,e]
|
||||
|
||||
// change dword order
|
||||
// flip dword order, to what sha1 instructions use
|
||||
abcd = _mm_shuffle_epi32(abcd, _MM_SHUFFLE(0,1,2,3)); // [a,b,c,d] where a is in the top lane
|
||||
e0 = _mm_slli_si128(e0, 12); // [e,0,0,0] where e is in top lane
|
||||
|
||||
@@ -183,16 +233,19 @@ static void sha1_process_shani(uint32_t* state, const uint8_t* block, size_t cou
|
||||
QROUND(2);
|
||||
QROUND(3);
|
||||
QROUND(4);
|
||||
|
||||
QROUND(5);
|
||||
QROUND(6);
|
||||
QROUND(7);
|
||||
QROUND(8);
|
||||
QROUND(9);
|
||||
|
||||
QROUND(10);
|
||||
QROUND(11);
|
||||
QROUND(12);
|
||||
QROUND(13);
|
||||
QROUND(14);
|
||||
|
||||
QROUND(15);
|
||||
QROUND(16);
|
||||
QROUND(17);
|
||||
@@ -221,6 +274,159 @@ static void sha1_process_shani(uint32_t* state, const uint8_t* block, size_t cou
|
||||
|
||||
#endif // defined(__x86_64__) || defined(_M_AMD64)
|
||||
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
#if defined(__clang__)
|
||||
# define SHA1_TARGET __attribute__((target("sha2")))
|
||||
#elif defined(__GNUC__)
|
||||
# define SHA1_TARGET __attribute__((target("+sha2")))
|
||||
#elif defined(_MSC_VER)
|
||||
# define SHA1_TARGET
|
||||
#endif
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
# include <windows.h>
|
||||
#elif defined(__linux__)
|
||||
# include <sys/auxv.h>
|
||||
# include <asm/hwcap.h>
|
||||
#elif defined(__APPLE__)
|
||||
# include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
#define SHA1_CPUID_INIT (1 << 0)
|
||||
#define SHA1_CPUID_ARM64 (1 << 1)
|
||||
|
||||
static inline int sha1_cpuid(void)
|
||||
{
|
||||
#if defined(__ARM_FEATURE_CRYPTO) || defined(__ARM_FEATURE_SHA2)
|
||||
int result = SHA1_CPUID_ARM64;
|
||||
#else
|
||||
static int cpuid;
|
||||
|
||||
int result = cpuid;
|
||||
if (result == 0)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
int has_arm64 = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
|
||||
#elif defined(__linux__)
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
int has_arm64 = hwcap & HWCAP_SHA1;
|
||||
#elif defined(__APPLE__)
|
||||
int value = 0;
|
||||
size_t valuelen = sizeof(value);
|
||||
int has_arm64 = sysctlbyname("hw.optional.arm.FEAT_SHA1", &value, &valuelen, NULL, 0) == 0 && value != 0;
|
||||
#else
|
||||
#error unknown platform
|
||||
#endif
|
||||
result |= SHA1_CPUID_INIT;
|
||||
if (has_arm64)
|
||||
{
|
||||
result |= SHA1_CPUID_ARM64;
|
||||
}
|
||||
|
||||
cpuid = result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SHA1_CPUID_MASK)
|
||||
result &= SHA1_CPUID_MASK;
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
SHA1_TARGET
|
||||
static void sha1_process_arm64(uint32_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
// code here is similar to x64 shani implementation
|
||||
|
||||
// message array is 16 element circular buffer
|
||||
// each iteration updates 4 rounds at the same time
|
||||
|
||||
#define W(i) w[(i)%4]
|
||||
|
||||
#define QROUND(i,F,k) do { \
|
||||
/* update message schedule */ \
|
||||
if (i >= 4) W(i) = vsha1su0q_u32(W(i), W(i-3), W(i-2)); \
|
||||
if (i >= 4) W(i) = vsha1su1q_u32(W(i), W(i-1)); \
|
||||
/* add round constant */ \
|
||||
uint32x4_t tmp = vaddq_u32(W(i), k); \
|
||||
/* 4 round functions */ \
|
||||
uint32_t x = e0; \
|
||||
e0 = vsha1h_u32(vgetq_lane_u32(abcd, 0)); \
|
||||
abcd = F(abcd, x, tmp); \
|
||||
} while (0)
|
||||
|
||||
const uint32x4_t k0 = vdupq_n_u32(0x5a827999);
|
||||
const uint32x4_t k1 = vdupq_n_u32(0x6ed9eba1);
|
||||
const uint32x4_t k2 = vdupq_n_u32(0x8f1bbcdc);
|
||||
const uint32x4_t k3 = vdupq_n_u32(0xca62c1d6);
|
||||
|
||||
// load state - a,b,c,d,e
|
||||
uint32x4_t abcd = vld1q_u32(state);
|
||||
uint32_t e0 = state[4];
|
||||
|
||||
do
|
||||
{
|
||||
// remember current state
|
||||
uint32x4_t last_abcd = abcd;
|
||||
uint32_t last_e0 = e0;
|
||||
|
||||
// load 64-byte block and advance pointer to next block
|
||||
uint8x16x4_t msg = vld1q_u8_x4(block);
|
||||
block += SHA1_BLOCK_SIZE;
|
||||
|
||||
uint32x4_t w[4];
|
||||
|
||||
// for first 16 w's reverse the byte order in each 32-bit lane
|
||||
W(0) = vreinterpretq_u32_u8(vrev32q_u8(msg.val[0]));
|
||||
W(1) = vreinterpretq_u32_u8(vrev32q_u8(msg.val[1]));
|
||||
W(2) = vreinterpretq_u32_u8(vrev32q_u8(msg.val[2]));
|
||||
W(3) = vreinterpretq_u32_u8(vrev32q_u8(msg.val[3]));
|
||||
|
||||
QROUND( 0, vsha1cq_u32, k0);
|
||||
QROUND( 1, vsha1cq_u32, k0);
|
||||
QROUND( 2, vsha1cq_u32, k0);
|
||||
QROUND( 3, vsha1cq_u32, k0);
|
||||
QROUND( 4, vsha1cq_u32, k0);
|
||||
|
||||
QROUND( 5, vsha1pq_u32, k1);
|
||||
QROUND( 6, vsha1pq_u32, k1);
|
||||
QROUND( 7, vsha1pq_u32, k1);
|
||||
QROUND( 8, vsha1pq_u32, k1);
|
||||
QROUND( 9, vsha1pq_u32, k1);
|
||||
|
||||
QROUND(10, vsha1mq_u32, k2);
|
||||
QROUND(11, vsha1mq_u32, k2);
|
||||
QROUND(12, vsha1mq_u32, k2);
|
||||
QROUND(13, vsha1mq_u32, k2);
|
||||
QROUND(14, vsha1mq_u32, k2);
|
||||
|
||||
QROUND(15, vsha1pq_u32, k3);
|
||||
QROUND(16, vsha1pq_u32, k3);
|
||||
QROUND(17, vsha1pq_u32, k3);
|
||||
QROUND(18, vsha1pq_u32, k3);
|
||||
QROUND(19, vsha1pq_u32, k3);
|
||||
|
||||
// update next state
|
||||
abcd = vaddq_u32(abcd, last_abcd);
|
||||
e0 += last_e0;
|
||||
}
|
||||
while (--count);
|
||||
|
||||
// save state
|
||||
vst1q_u32(state, abcd);
|
||||
state[4] = e0;
|
||||
|
||||
#undef QROUND
|
||||
#undef W
|
||||
}
|
||||
|
||||
#endif // defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
static void sha1_process(uint32_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
#if defined(__x86_64__) || defined(_M_AMD64)
|
||||
@@ -232,12 +438,21 @@ static void sha1_process(uint32_t* state, const uint8_t* block, size_t count)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
int cpuid = sha1_cpuid();
|
||||
if (cpuid & SHA1_CPUID_ARM64)
|
||||
{
|
||||
sha1_process_arm64(state, block, count);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define F1(x,y,z) (0x5a827999 + ((x & (y ^ z)) ^ z))
|
||||
#define F2(x,y,z) (0x6ed9eba1 + (x ^ y ^ z))
|
||||
#define F3(x,y,z) (0x8f1bbcdc + ((x & y) | (z & (x | y))))
|
||||
#define F4(x,y,z) (0xca62c1d6 + (x ^ y ^ z))
|
||||
|
||||
#define W(i) w[(i+16)%16]
|
||||
#define W(i) w[(i)%16]
|
||||
|
||||
#define ROUND(i,a,b,c,d,e,F) do \
|
||||
{ \
|
||||
|
||||
+288
-108
@@ -58,9 +58,9 @@ static inline void sha224_finish(sha224_ctx* ctx, uint8_t digest[SHA224_DIGEST_S
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# include <stdlib.h>
|
||||
# define SHA256_GET32BE(ptr) _byteswap_ulong( *((const _UNALIGNED uint32_t*)(ptr)) )
|
||||
# define SHA256_SET32BE(ptr,x) *((_UNALIGNED uint32_t*)(ptr)) = _byteswap_ulong(x)
|
||||
# define SHA256_SET64BE(ptr,x) *((_UNALIGNED uint64_t*)(ptr)) = _byteswap_uint64(x)
|
||||
# define SHA256_GET32BE(ptr) _byteswap_ulong( *((const __unaligned uint32_t*)(ptr)) )
|
||||
# define SHA256_SET32BE(ptr,x) *((__unaligned uint32_t*)(ptr)) = _byteswap_ulong(x)
|
||||
# define SHA256_SET64BE(ptr,x) *((__unaligned uint64_t*)(ptr)) = _byteswap_uint64(x)
|
||||
#else
|
||||
# define SHA256_GET32BE(ptr) \
|
||||
( \
|
||||
@@ -91,6 +91,26 @@ static inline void sha224_finish(sha224_ctx* ctx, uint8_t digest[SHA224_DIGEST_S
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
static const uint32_t SHA256_K[64] =
|
||||
{
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
|
||||
};
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_AMD64)
|
||||
|
||||
#include <tmmintrin.h> // SSSE3
|
||||
@@ -145,47 +165,64 @@ static inline int sha256_cpuid(void)
|
||||
SHA256_TARGET("ssse3,sha")
|
||||
static void sha256_process_shani(uint32_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
const __m128i* buffer = (const __m128i*)block;
|
||||
// similar way how sha1 works in with shani
|
||||
|
||||
// to byteswap when doing big-ending load for message dwords
|
||||
const __m128i bswap = _mm_setr_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12);
|
||||
// first 16 rounds loads message schedule dwords as 32-bit big endian values
|
||||
|
||||
static const uint32_t K[16][4] =
|
||||
{
|
||||
{ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 },
|
||||
{ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 },
|
||||
{ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 },
|
||||
{ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 },
|
||||
{ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc },
|
||||
{ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da },
|
||||
{ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 },
|
||||
{ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 },
|
||||
{ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 },
|
||||
{ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 },
|
||||
{ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 },
|
||||
{ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 },
|
||||
{ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 },
|
||||
{ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 },
|
||||
{ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 },
|
||||
{ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 },
|
||||
};
|
||||
// for next rounds message schedule is prepared as:
|
||||
// w[i] = SSig1(w[i-2]) + w[i-7] + SSig0(w[i-15]) + w[i-16]
|
||||
|
||||
// unrolled by 4:
|
||||
// w[i+0] = SSig1(w[i-2]) + w[i-7] + SSig0(w[i-15]) + w[i-16]
|
||||
// w[i+1] = SSig1(w[i-1]) + w[i-6] + SSig0(w[i-14]) + w[i-15]
|
||||
// w[i+2] = SSig1(w[i+0]) + w[i-5] + SSig0(w[i-13]) + w[i-14]
|
||||
// w[i+3] = SSig1(w[i+1]) + w[i-4] + SSig0(w[i-12]) + w[i-13]
|
||||
|
||||
// there is tricky dependency for lanes 2 and 3 on result of lanes 0 and 1, but sha256msg2 op takes care of that
|
||||
|
||||
// by storing W[i] word in 128-bit simd register, the message schedule becomes:
|
||||
// W(i) = SSig1(r0) + r1 + SSig0(r2) + r3
|
||||
// where + is 32-bit lane addition
|
||||
|
||||
// [3] [2] [1] [0] // lanes
|
||||
// r0 = [ special, special, w[i-1], w[i-2] ]
|
||||
// r1 = [ w[i-4], w[i-5], w[i-6], w[i-7] ]
|
||||
// r2 = [ w[i-12], w[i-13], w[i-14], w[i-15] ]
|
||||
// r3 = [ w[i-13], w[i-14], w[i-15], w[i-16] ]
|
||||
|
||||
// rN's can be calculated from previous W(..) values:
|
||||
// r0 from W(i)
|
||||
// r1 from _mm_alignr_epi8(W(i), W(i-1), 4)
|
||||
// r2 from W(i-1) and W(i)
|
||||
// r3 from W(i-1)
|
||||
|
||||
// rounds i>2: W(i-3) = _mm_sha256msg2_epu32(_mm_add_epi32( W(i-3), _mm_alignr_epi8(W(i), W(i-1), 4) ), W(i))
|
||||
// rounds i>0: W(i-1) = _mm_sha256msg1_epu32(W(i-1), W(i))
|
||||
|
||||
// round functions are done with _mm_sha256rnds2_epu32 which performs it for 2 rounds
|
||||
// thus repeat it two times, as input use W(i) + K(i) - message schedule added with sha256 constants
|
||||
|
||||
#define W(i) w[(i)%4]
|
||||
|
||||
// 4 wide round calculations
|
||||
#define QROUND(i) do { \
|
||||
/* first four rounds loads input message */ \
|
||||
/* first 4 rounds load input block */ \
|
||||
if (i < 4) W(i) = _mm_shuffle_epi8(_mm_loadu_si128(&buffer[i]), bswap); \
|
||||
/* add round constant */ \
|
||||
tmp = _mm_add_epi32(W(i), _mm_loadu_si128((const __m128i*)K[i])); \
|
||||
/* update previous message dwords for next rounds */ \
|
||||
/* update message schedule */ \
|
||||
if (i > 2 && i < 15) W(i-3) = _mm_sha256msg2_epu32(_mm_add_epi32(W(i-3), _mm_alignr_epi8(W(i), W(i-1), 4)), W(i)); \
|
||||
if (i > 0 && i < 13) W(i-1) = _mm_sha256msg1_epu32(W(i-1), W(i)); \
|
||||
/* round functions */ \
|
||||
/* add round constants */ \
|
||||
__m128i tmp = _mm_add_epi32(W(i), _mm_loadu_si128((const __m128i*)&SHA256_K[4*i])); \
|
||||
/* 4 round functions */ \
|
||||
state1 = _mm_sha256rnds2_epu32(state1, state0, tmp); \
|
||||
state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(tmp, _MM_SHUFFLE(0,0,3,2))); \
|
||||
} while(0)
|
||||
|
||||
|
||||
const __m128i* buffer = (const __m128i*)block;
|
||||
|
||||
// to byteswap when doing big-ending load for message dwords
|
||||
const __m128i bswap = _mm_setr_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12);
|
||||
|
||||
// load initial state
|
||||
__m128i abcd = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)&state[0]), _MM_SHUFFLE(0,1,2,3)); // [a,b,c,d]
|
||||
__m128i efgh = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)&state[4]), _MM_SHUFFLE(0,1,2,3)); // [e,f,g,h]
|
||||
@@ -200,18 +237,18 @@ static void sha256_process_shani(uint32_t* state, const uint8_t* block, size_t c
|
||||
__m128i last0 = state0;
|
||||
__m128i last1 = state1;
|
||||
|
||||
__m128i tmp, w[4];
|
||||
__m128i w[4];
|
||||
|
||||
QROUND(0);
|
||||
QROUND(1);
|
||||
QROUND(2);
|
||||
QROUND(3);
|
||||
QROUND(4);
|
||||
QROUND(5);
|
||||
QROUND(6);
|
||||
QROUND(7);
|
||||
QROUND(8);
|
||||
QROUND(9);
|
||||
QROUND( 0);
|
||||
QROUND( 1);
|
||||
QROUND( 2);
|
||||
QROUND( 3);
|
||||
QROUND( 4);
|
||||
QROUND( 5);
|
||||
QROUND( 6);
|
||||
QROUND( 7);
|
||||
QROUND( 8);
|
||||
QROUND( 9);
|
||||
QROUND(10);
|
||||
QROUND(11);
|
||||
QROUND(12);
|
||||
@@ -241,6 +278,140 @@ static void sha256_process_shani(uint32_t* state, const uint8_t* block, size_t c
|
||||
|
||||
#endif // defined(__x86_64__) || defined(_M_AMD64)
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
#if defined(__clang__)
|
||||
# define SHA256_TARGET __attribute__((target("sha2")))
|
||||
#elif defined(__GNUC__)
|
||||
# define SHA256_TARGET __attribute__((target("+sha2")))
|
||||
#elif defined(_MSC_VER)
|
||||
# define SHA256_TARGET
|
||||
#endif
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
# include <windows.h>
|
||||
#elif defined(__linux__)
|
||||
# include <sys/auxv.h>
|
||||
# include <asm/hwcap.h>
|
||||
#elif defined(__APPLE__)
|
||||
# include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
#define SHA256_CPUID_INIT (1 << 0)
|
||||
#define SHA256_CPUID_ARM64 (1 << 1)
|
||||
|
||||
static inline int sha256_cpuid(void)
|
||||
{
|
||||
#if defined(__ARM_FEATURE_CRYPTO) || defined(__ARM_FEATURE_SHA2)
|
||||
int result = SHA256_CPUID_ARM64;
|
||||
#else
|
||||
static int cpuid;
|
||||
|
||||
int result = cpuid;
|
||||
if (result == 0)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
int has_arm64 = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
|
||||
#elif defined(__linux__)
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
int has_arm64 = hwcap & HWCAP_SHA2;
|
||||
#elif defined(__APPLE__)
|
||||
int value = 0;
|
||||
size_t valuelen = sizeof(value);
|
||||
int has_arm64 = sysctlbyname("hw.optional.arm.FEAT_SHA256", &value, &valuelen, NULL, 0) == 0 && value != 0;
|
||||
#else
|
||||
#error unknown platform
|
||||
#endif
|
||||
result |= SHA256_CPUID_INIT;
|
||||
if (has_arm64)
|
||||
{
|
||||
result |= SHA256_CPUID_ARM64;
|
||||
}
|
||||
|
||||
cpuid = result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SHA256_CPUID_MASK)
|
||||
result &= SHA256_CPUID_MASK;
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
SHA256_TARGET
|
||||
static void sha256_process_arm64(uint32_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
// code here is similar to x64 shani implementation
|
||||
|
||||
#define W(i) w[(i)%4]
|
||||
|
||||
#define QROUND(i) do { \
|
||||
/* load 16 round constants */ \
|
||||
if ((i % 4) == 0) rk = vld1q_u32_x4(&SHA256_K[4*i]); \
|
||||
/* first 4 rounds reverse byte order in each 32-bit lane of input block */ \
|
||||
if (i < 4) W(i) = vreinterpretq_u32_u8(vrev32q_u8(msg.val[i])); \
|
||||
/* update message schedule */ \
|
||||
if (i >= 4) W(i) = vsha256su0q_u32(W(i), W(i-3)); \
|
||||
if (i >= 4) W(i) = vsha256su1q_u32(W(i), W(i-2), W(i-1)); \
|
||||
/* add round constants */ \
|
||||
uint32x4_t tmp = vaddq_u32(W(i), rk.val[i%4]); \
|
||||
/* 4 round functions */ \
|
||||
uint32x4_t x = vstate.val[0]; \
|
||||
vstate.val[0] = vsha256hq_u32(vstate.val[0], vstate.val[1], tmp); \
|
||||
vstate.val[1] = vsha256h2q_u32(vstate.val[1], x, tmp); \
|
||||
} while (0)
|
||||
|
||||
// load initial state
|
||||
uint32x4x2_t vstate = vld1q_u32_x2(state);
|
||||
|
||||
do
|
||||
{
|
||||
// remember current state
|
||||
uint32x4x2_t vlast = vstate;
|
||||
|
||||
// load 64-byte block
|
||||
uint8x16x4_t msg = vld1q_u8_x4(block);
|
||||
|
||||
uint32x4x4_t rk;
|
||||
uint32x4_t w[4];
|
||||
|
||||
QROUND( 0);
|
||||
QROUND( 1);
|
||||
QROUND( 2);
|
||||
QROUND( 3);
|
||||
QROUND( 4);
|
||||
QROUND( 5);
|
||||
QROUND( 6);
|
||||
QROUND( 7);
|
||||
QROUND( 8);
|
||||
QROUND( 9);
|
||||
QROUND(10);
|
||||
QROUND(11);
|
||||
QROUND(12);
|
||||
QROUND(13);
|
||||
QROUND(14);
|
||||
QROUND(15);
|
||||
|
||||
// update next state
|
||||
vstate.val[0] = vaddq_u32(vstate.val[0], vlast.val[0]);
|
||||
vstate.val[1] = vaddq_u32(vstate.val[1], vlast.val[1]);
|
||||
|
||||
block += SHA256_BLOCK_SIZE;
|
||||
}
|
||||
while (--count);
|
||||
|
||||
// save the new state
|
||||
vst1q_u32_x2(state, vstate);
|
||||
|
||||
#undef QROUND
|
||||
#undef W
|
||||
}
|
||||
|
||||
#endif // defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
static void sha256_process(uint32_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
#if defined(__x86_64__) || defined(_M_AMD64)
|
||||
@@ -252,6 +423,15 @@ static void sha256_process(uint32_t* state, const uint8_t* block, size_t count)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
int cpuid = sha256_cpuid();
|
||||
if (cpuid & SHA256_CPUID_ARM64)
|
||||
{
|
||||
sha256_process_arm64(state, block, count);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define Ch(x,y,z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x,y,z) ((x & y) | (z & (x | y)))
|
||||
|
||||
@@ -262,13 +442,13 @@ static void sha256_process(uint32_t* state, const uint8_t* block, size_t count)
|
||||
|
||||
#define W(i) w[(i+16)%16]
|
||||
|
||||
#define ROUND(i,a,b,c,d,e,f,g,h,K) do \
|
||||
#define ROUND(i,a,b,c,d,e,f,g,h) do \
|
||||
{ \
|
||||
uint32_t w0; \
|
||||
if (i < 16) W(i) = w0 = SHA256_GET32BE(block + i*sizeof(uint32_t)); \
|
||||
if (i >= 16) W(i) = w0 = SSig1(W(i-2)) + W(i-7) + SSig0(W(i-15)) + W(i-16); \
|
||||
\
|
||||
uint32_t t1 = h + BSig1(e) + Ch(e,f,g) + K + w0; \
|
||||
uint32_t t1 = h + BSig1(e) + Ch(e,f,g) + SHA256_K[i] + w0; \
|
||||
uint32_t t2 = BSig0(a) + Maj(a,b,c); \
|
||||
d += t1; \
|
||||
h = t1 + t2; \
|
||||
@@ -287,70 +467,70 @@ static void sha256_process(uint32_t* state, const uint8_t* block, size_t count)
|
||||
|
||||
uint32_t w[16];
|
||||
|
||||
ROUND( 0, a, b, c, d, e, f, g, h, 0x428a2f98);
|
||||
ROUND( 1, h, a, b, c, d, e, f, g, 0x71374491);
|
||||
ROUND( 2, g, h, a, b, c, d, e, f, 0xb5c0fbcf);
|
||||
ROUND( 3, f, g, h, a, b, c, d, e, 0xe9b5dba5);
|
||||
ROUND( 4, e, f, g, h, a, b, c, d, 0x3956c25b);
|
||||
ROUND( 5, d, e, f, g, h, a, b, c, 0x59f111f1);
|
||||
ROUND( 6, c, d, e, f, g, h, a, b, 0x923f82a4);
|
||||
ROUND( 7, b, c, d, e, f, g, h, a, 0xab1c5ed5);
|
||||
ROUND( 8, a, b, c, d, e, f, g, h, 0xd807aa98);
|
||||
ROUND( 9, h, a, b, c, d, e, f, g, 0x12835b01);
|
||||
ROUND(10, g, h, a, b, c, d, e, f, 0x243185be);
|
||||
ROUND(11, f, g, h, a, b, c, d, e, 0x550c7dc3);
|
||||
ROUND(12, e, f, g, h, a, b, c, d, 0x72be5d74);
|
||||
ROUND(13, d, e, f, g, h, a, b, c, 0x80deb1fe);
|
||||
ROUND(14, c, d, e, f, g, h, a, b, 0x9bdc06a7);
|
||||
ROUND(15, b, c, d, e, f, g, h, a, 0xc19bf174);
|
||||
ROUND(16, a, b, c, d, e, f, g, h, 0xe49b69c1);
|
||||
ROUND(17, h, a, b, c, d, e, f, g, 0xefbe4786);
|
||||
ROUND(18, g, h, a, b, c, d, e, f, 0x0fc19dc6);
|
||||
ROUND(19, f, g, h, a, b, c, d, e, 0x240ca1cc);
|
||||
ROUND(20, e, f, g, h, a, b, c, d, 0x2de92c6f);
|
||||
ROUND(21, d, e, f, g, h, a, b, c, 0x4a7484aa);
|
||||
ROUND(22, c, d, e, f, g, h, a, b, 0x5cb0a9dc);
|
||||
ROUND(23, b, c, d, e, f, g, h, a, 0x76f988da);
|
||||
ROUND(24, a, b, c, d, e, f, g, h, 0x983e5152);
|
||||
ROUND(25, h, a, b, c, d, e, f, g, 0xa831c66d);
|
||||
ROUND(26, g, h, a, b, c, d, e, f, 0xb00327c8);
|
||||
ROUND(27, f, g, h, a, b, c, d, e, 0xbf597fc7);
|
||||
ROUND(28, e, f, g, h, a, b, c, d, 0xc6e00bf3);
|
||||
ROUND(29, d, e, f, g, h, a, b, c, 0xd5a79147);
|
||||
ROUND(30, c, d, e, f, g, h, a, b, 0x06ca6351);
|
||||
ROUND(31, b, c, d, e, f, g, h, a, 0x14292967);
|
||||
ROUND(32, a, b, c, d, e, f, g, h, 0x27b70a85);
|
||||
ROUND(33, h, a, b, c, d, e, f, g, 0x2e1b2138);
|
||||
ROUND(34, g, h, a, b, c, d, e, f, 0x4d2c6dfc);
|
||||
ROUND(35, f, g, h, a, b, c, d, e, 0x53380d13);
|
||||
ROUND(36, e, f, g, h, a, b, c, d, 0x650a7354);
|
||||
ROUND(37, d, e, f, g, h, a, b, c, 0x766a0abb);
|
||||
ROUND(38, c, d, e, f, g, h, a, b, 0x81c2c92e);
|
||||
ROUND(39, b, c, d, e, f, g, h, a, 0x92722c85);
|
||||
ROUND(40, a, b, c, d, e, f, g, h, 0xa2bfe8a1);
|
||||
ROUND(41, h, a, b, c, d, e, f, g, 0xa81a664b);
|
||||
ROUND(42, g, h, a, b, c, d, e, f, 0xc24b8b70);
|
||||
ROUND(43, f, g, h, a, b, c, d, e, 0xc76c51a3);
|
||||
ROUND(44, e, f, g, h, a, b, c, d, 0xd192e819);
|
||||
ROUND(45, d, e, f, g, h, a, b, c, 0xd6990624);
|
||||
ROUND(46, c, d, e, f, g, h, a, b, 0xf40e3585);
|
||||
ROUND(47, b, c, d, e, f, g, h, a, 0x106aa070);
|
||||
ROUND(48, a, b, c, d, e, f, g, h, 0x19a4c116);
|
||||
ROUND(49, h, a, b, c, d, e, f, g, 0x1e376c08);
|
||||
ROUND(50, g, h, a, b, c, d, e, f, 0x2748774c);
|
||||
ROUND(51, f, g, h, a, b, c, d, e, 0x34b0bcb5);
|
||||
ROUND(52, e, f, g, h, a, b, c, d, 0x391c0cb3);
|
||||
ROUND(53, d, e, f, g, h, a, b, c, 0x4ed8aa4a);
|
||||
ROUND(54, c, d, e, f, g, h, a, b, 0x5b9cca4f);
|
||||
ROUND(55, b, c, d, e, f, g, h, a, 0x682e6ff3);
|
||||
ROUND(56, a, b, c, d, e, f, g, h, 0x748f82ee);
|
||||
ROUND(57, h, a, b, c, d, e, f, g, 0x78a5636f);
|
||||
ROUND(58, g, h, a, b, c, d, e, f, 0x84c87814);
|
||||
ROUND(59, f, g, h, a, b, c, d, e, 0x8cc70208);
|
||||
ROUND(60, e, f, g, h, a, b, c, d, 0x90befffa);
|
||||
ROUND(61, d, e, f, g, h, a, b, c, 0xa4506ceb);
|
||||
ROUND(62, c, d, e, f, g, h, a, b, 0xbef9a3f7);
|
||||
ROUND(63, b, c, d, e, f, g, h, a, 0xc67178f2);
|
||||
ROUND( 0, a, b, c, d, e, f, g, h);
|
||||
ROUND( 1, h, a, b, c, d, e, f, g);
|
||||
ROUND( 2, g, h, a, b, c, d, e, f);
|
||||
ROUND( 3, f, g, h, a, b, c, d, e);
|
||||
ROUND( 4, e, f, g, h, a, b, c, d);
|
||||
ROUND( 5, d, e, f, g, h, a, b, c);
|
||||
ROUND( 6, c, d, e, f, g, h, a, b);
|
||||
ROUND( 7, b, c, d, e, f, g, h, a);
|
||||
ROUND( 8, a, b, c, d, e, f, g, h);
|
||||
ROUND( 9, h, a, b, c, d, e, f, g);
|
||||
ROUND(10, g, h, a, b, c, d, e, f);
|
||||
ROUND(11, f, g, h, a, b, c, d, e);
|
||||
ROUND(12, e, f, g, h, a, b, c, d);
|
||||
ROUND(13, d, e, f, g, h, a, b, c);
|
||||
ROUND(14, c, d, e, f, g, h, a, b);
|
||||
ROUND(15, b, c, d, e, f, g, h, a);
|
||||
ROUND(16, a, b, c, d, e, f, g, h);
|
||||
ROUND(17, h, a, b, c, d, e, f, g);
|
||||
ROUND(18, g, h, a, b, c, d, e, f);
|
||||
ROUND(19, f, g, h, a, b, c, d, e);
|
||||
ROUND(20, e, f, g, h, a, b, c, d);
|
||||
ROUND(21, d, e, f, g, h, a, b, c);
|
||||
ROUND(22, c, d, e, f, g, h, a, b);
|
||||
ROUND(23, b, c, d, e, f, g, h, a);
|
||||
ROUND(24, a, b, c, d, e, f, g, h);
|
||||
ROUND(25, h, a, b, c, d, e, f, g);
|
||||
ROUND(26, g, h, a, b, c, d, e, f);
|
||||
ROUND(27, f, g, h, a, b, c, d, e);
|
||||
ROUND(28, e, f, g, h, a, b, c, d);
|
||||
ROUND(29, d, e, f, g, h, a, b, c);
|
||||
ROUND(30, c, d, e, f, g, h, a, b);
|
||||
ROUND(31, b, c, d, e, f, g, h, a);
|
||||
ROUND(32, a, b, c, d, e, f, g, h);
|
||||
ROUND(33, h, a, b, c, d, e, f, g);
|
||||
ROUND(34, g, h, a, b, c, d, e, f);
|
||||
ROUND(35, f, g, h, a, b, c, d, e);
|
||||
ROUND(36, e, f, g, h, a, b, c, d);
|
||||
ROUND(37, d, e, f, g, h, a, b, c);
|
||||
ROUND(38, c, d, e, f, g, h, a, b);
|
||||
ROUND(39, b, c, d, e, f, g, h, a);
|
||||
ROUND(40, a, b, c, d, e, f, g, h);
|
||||
ROUND(41, h, a, b, c, d, e, f, g);
|
||||
ROUND(42, g, h, a, b, c, d, e, f);
|
||||
ROUND(43, f, g, h, a, b, c, d, e);
|
||||
ROUND(44, e, f, g, h, a, b, c, d);
|
||||
ROUND(45, d, e, f, g, h, a, b, c);
|
||||
ROUND(46, c, d, e, f, g, h, a, b);
|
||||
ROUND(47, b, c, d, e, f, g, h, a);
|
||||
ROUND(48, a, b, c, d, e, f, g, h);
|
||||
ROUND(49, h, a, b, c, d, e, f, g);
|
||||
ROUND(50, g, h, a, b, c, d, e, f);
|
||||
ROUND(51, f, g, h, a, b, c, d, e);
|
||||
ROUND(52, e, f, g, h, a, b, c, d);
|
||||
ROUND(53, d, e, f, g, h, a, b, c);
|
||||
ROUND(54, c, d, e, f, g, h, a, b);
|
||||
ROUND(55, b, c, d, e, f, g, h, a);
|
||||
ROUND(56, a, b, c, d, e, f, g, h);
|
||||
ROUND(57, h, a, b, c, d, e, f, g);
|
||||
ROUND(58, g, h, a, b, c, d, e, f);
|
||||
ROUND(59, f, g, h, a, b, c, d, e);
|
||||
ROUND(60, e, f, g, h, a, b, c, d);
|
||||
ROUND(61, d, e, f, g, h, a, b, c);
|
||||
ROUND(62, c, d, e, f, g, h, a, b);
|
||||
ROUND(63, b, c, d, e, f, g, h, a);
|
||||
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
|
||||
+318
-121
@@ -58,8 +58,8 @@ static inline void sha384_finish(sha384_ctx* ctx, uint8_t digest[SHA384_DIGEST_S
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# include <stdlib.h>
|
||||
# define SHA512_GET64BE(ptr) _byteswap_uint64( *((const _UNALIGNED uint64_t*)(ptr)) )
|
||||
# define SHA512_SET64BE(ptr,x) *((_UNALIGNED uint64_t*)(ptr)) = _byteswap_uint64(x)
|
||||
# define SHA512_GET64BE(ptr) _byteswap_uint64( *((const __unaligned uint64_t*)(ptr)) )
|
||||
# define SHA512_SET64BE(ptr,x) *((__unaligned uint64_t*)(ptr)) = _byteswap_uint64(x)
|
||||
#else
|
||||
# define SHA512_GET64BE(ptr) \
|
||||
( \
|
||||
@@ -86,9 +86,33 @@ static inline void sha384_finish(sha384_ctx* ctx, uint8_t digest[SHA384_DIGEST_S
|
||||
while (0)
|
||||
#endif
|
||||
|
||||
static const uint64_t SHA512_K[80] =
|
||||
{
|
||||
0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
|
||||
0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
|
||||
0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
|
||||
0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694,
|
||||
0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
|
||||
0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
|
||||
0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4,
|
||||
0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70,
|
||||
0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
|
||||
0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b,
|
||||
0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30,
|
||||
0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8,
|
||||
0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
|
||||
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
|
||||
0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
|
||||
0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b,
|
||||
0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
|
||||
0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b,
|
||||
0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
|
||||
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
|
||||
};
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_AMD64)
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <immintrin.h> // AVX2 + SHA512
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
# include <cpuid.h>
|
||||
@@ -117,7 +141,7 @@ static inline int sha512_cpuid(void)
|
||||
{
|
||||
int info[4];
|
||||
|
||||
SHA256_CPUID(1, info);
|
||||
SHA512_CPUID(1, info);
|
||||
int has_xsave = info[2] & (1 << 26);
|
||||
|
||||
int has_ymm = 0;
|
||||
@@ -127,13 +151,13 @@ static inline int sha512_cpuid(void)
|
||||
has_ymm = xcr0 & (1 << 2);
|
||||
}
|
||||
|
||||
SHA256_CPUID_EX(7, 0, info);
|
||||
SHA512_CPUID_EX(7, 0, info);
|
||||
int has_avx2 = info[1] & (1 << 5);
|
||||
|
||||
SHA256_CPUID_EX(7, 1, info);
|
||||
SHA512_CPUID_EX(7, 1, info);
|
||||
int has_sha512 = info[0] & (1 << 0);
|
||||
|
||||
result |= SHA256_CPUID_INIT;
|
||||
result |= SHA512_CPUID_INIT;
|
||||
if (has_ymm && has_avx2 && has_sha512)
|
||||
{
|
||||
result |= SHA512_CPUID_VSHA512;
|
||||
@@ -152,51 +176,32 @@ static inline int sha512_cpuid(void)
|
||||
SHA512_TARGET("avx2,sha512")
|
||||
static void sha512_process_vsha512(uint64_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
const __m256i* buffer = (const __m256i*)block;
|
||||
// pretty much same way how sha256 works, only with avx2 registers and 64-bit additions
|
||||
// state is kept as two 256-bit ymm registers (8 qwords)
|
||||
|
||||
// to byteswap when doing big-ending load for message qwords
|
||||
const __m256i bswap = _mm256_broadcastsi128_si256(_mm_setr_epi8(7,6,5,4,3,2,1,0, 15,14,13,12,11,10,9,8));
|
||||
|
||||
static const uint64_t K[20][4] =
|
||||
{
|
||||
{ 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc },
|
||||
{ 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118 },
|
||||
{ 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2 },
|
||||
{ 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694 },
|
||||
{ 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65 },
|
||||
{ 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5 },
|
||||
{ 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4 },
|
||||
{ 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70 },
|
||||
{ 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df },
|
||||
{ 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b },
|
||||
{ 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30 },
|
||||
{ 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8 },
|
||||
{ 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8 },
|
||||
{ 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3 },
|
||||
{ 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec },
|
||||
{ 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b },
|
||||
{ 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178 },
|
||||
{ 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b },
|
||||
{ 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c },
|
||||
{ 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 },
|
||||
};
|
||||
// message qwords are loaded as 64-bit big-endian values
|
||||
|
||||
#define W(i) w[(i)%4]
|
||||
|
||||
// 4 wide round calculations
|
||||
#define QROUND(i) do { \
|
||||
/* first four rounds loads input message */ \
|
||||
/* first 4 rounds load input block */ \
|
||||
if (i < 4) W(i) = _mm256_shuffle_epi8(_mm256_loadu_si256(&buffer[i]), bswap); \
|
||||
/* add round constant */ \
|
||||
tmp = _mm256_add_epi64(W(i), _mm256_loadu_si256((const __m256i*)K[i])); \
|
||||
/* update previous message qwords for next rounds */ \
|
||||
/* update message schedule */ \
|
||||
if (i > 2 && i < 19) W(i-3) = _mm256_sha512msg2_epi64(_mm256_add_epi64(W(i-3), _mm256_permute4x64_epi64(_mm256_blend_epi32(W(i-1), W(i), 3), _MM_SHUFFLE(0,3,2,1))), W(i)); \
|
||||
if (i > 0 && i < 17) W(i-1) = _mm256_sha512msg1_epi64(W(i-1), _mm256_castsi256_si128(W(i))); \
|
||||
/* add round constants */ \
|
||||
__m256i tmp = _mm256_add_epi64(W(i), _mm256_loadu_si256((const __m256i*)&SHA512_K[4*i])); \
|
||||
/* round functions */ \
|
||||
state1 = _mm256_sha512rnds2_epi64(state1, state0, _mm256_castsi256_si128(tmp)); \
|
||||
state0 = _mm256_sha512rnds2_epi64(state0, state1, _mm256_extracti128_si256(tmp, 1)); \
|
||||
} while(0)
|
||||
|
||||
const __m256i* buffer = (const __m256i*)block;
|
||||
|
||||
// to byteswap when doing big-ending load for message qwords
|
||||
const __m256i bswap = _mm256_broadcastsi128_si256(_mm_setr_epi8(7,6,5,4,3,2,1,0, 15,14,13,12,11,10,9,8));
|
||||
|
||||
// load initial state
|
||||
__m256i abcd = _mm256_permute4x64_epi64(_mm256_loadu_si256((const __m256i*)&state[0]), _MM_SHUFFLE(0,1,2,3)); // [a,b,c,d]
|
||||
__m256i efgh = _mm256_permute4x64_epi64(_mm256_loadu_si256((const __m256i*)&state[4]), _MM_SHUFFLE(0,1,2,3)); // [e,f,g,h]
|
||||
@@ -211,7 +216,7 @@ static void sha512_process_vsha512(uint64_t* state, const uint8_t* block, size_t
|
||||
__m256i last0 = state0;
|
||||
__m256i last1 = state1;
|
||||
|
||||
__m256i tmp, w[4];
|
||||
__m256i w[4];
|
||||
|
||||
QROUND(0);
|
||||
QROUND(1);
|
||||
@@ -256,6 +261,189 @@ static void sha512_process_vsha512(uint64_t* state, const uint8_t* block, size_t
|
||||
|
||||
#endif // defined(__x86_64__) || defined(_M_AMD64)
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
#if defined(__clang__)
|
||||
# define SHA512_TARGET __attribute__((target("sha3")))
|
||||
#elif defined(__GNUC__)
|
||||
# define SHA512_TARGET __attribute__((target("+sha3")))
|
||||
#elif defined(_MSC_VER)
|
||||
# define SHA512_TARGET
|
||||
#endif
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
# include <windows.h>
|
||||
# pragma comment (lib, "advapi32")
|
||||
#elif defined(__linux__)
|
||||
# include <sys/auxv.h>
|
||||
# include <asm/hwcap.h>
|
||||
#elif defined(__APPLE__)
|
||||
# include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
#define SHA512_CPUID_INIT (1 << 0)
|
||||
#define SHA512_CPUID_ARM64 (1 << 1)
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#endif
|
||||
|
||||
static inline int sha512_cpuid(void)
|
||||
{
|
||||
#if defined(__ARM_FEATURE_SHA512)
|
||||
int result = SHA512_CPUID_ARM64;
|
||||
#else
|
||||
static int cpuid;
|
||||
|
||||
int result = cpuid;
|
||||
if (result == 0)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
// no sha512 bit in IsProcessorFeaturePresent function :(
|
||||
uint64_t bits;
|
||||
DWORD bitsize = sizeof(bits);
|
||||
RegGetValueA(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "CP 4030", RRF_RT_QWORD | RRF_ZEROONFAILURE, NULL, &bits, &bitsize);
|
||||
// bits from ID_AA64ISAR0_EL1
|
||||
int has_arm64 = ((bits >> 15) & 0xf) == 0x2;
|
||||
#elif defined(__linux__)
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
int has_arm64 = hwcap & HWCAP_SHA512;
|
||||
#elif defined(__APPLE__)
|
||||
int value = 0;
|
||||
size_t valuelen = sizeof(value);
|
||||
int has_arm64 = sysctlbyname("hw.optional.arm.FEAT_SHA512", &value, &valuelen, NULL, 0) == 0 && value != 0;
|
||||
#else
|
||||
#error unknown platform
|
||||
#endif
|
||||
result |= SHA512_CPUID_INIT;
|
||||
if (has_arm64)
|
||||
{
|
||||
result |= SHA512_CPUID_ARM64;
|
||||
}
|
||||
|
||||
cpuid = result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SHA512_CPUID_MASK)
|
||||
result &= SHA512_CPUID_MASK;
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
SHA512_TARGET
|
||||
static void sha512_process_arm64(uint64_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
#define W(i) w[(i)%8]
|
||||
#define S(i) vstate.val[3-(i)%4]
|
||||
|
||||
#define DROUND(i) do { \
|
||||
/* load 8 round constants */ \
|
||||
if ((i % 4) == 0) rk = vld1q_u64_x4(&SHA512_K[2*i]); \
|
||||
/* first 8 rounds reverse byte order in each 64-bit lane of input block */ \
|
||||
if (i < 8) W(i) = vreinterpretq_u64_u8(vrev64q_u8(msg[(i/4)%2].val[i%4])); \
|
||||
/* update message schedule for next rounds */ \
|
||||
if (i >= 8) W(i) = vsha512su1q_u64(vsha512su0q_u64(W(i), W(i-7)), W(i-1), vextq_u64(W(i-4), W(i-3), 1)); \
|
||||
/* add round constants */ \
|
||||
uint64x2_t tmp = vaddq_u64(W(i), rk.val[i%4]); \
|
||||
/* 2 round functions */ \
|
||||
uint64x2_t x0 = vaddq_u64(vextq_u64(tmp, tmp, 1), S(i+0)); \
|
||||
uint64x2_t x1 = vsha512hq_u64(x0, vextq_u64(S(i+1), S(i+0), 1), vextq_u64(S(i+2), S(i+1), 1)); \
|
||||
S(i+0) = vsha512h2q_u64(x1, S(i+2), S(i+3)); \
|
||||
S(i+2) = vaddq_u64(S(i+2), x1); \
|
||||
} while (0)
|
||||
|
||||
// load initial state
|
||||
uint64x2x4_t vstate = vld1q_u64_x4(state);
|
||||
|
||||
do
|
||||
{
|
||||
// remember current state
|
||||
uint64x2x4_t vlast = vstate;
|
||||
|
||||
// load 128-byte block
|
||||
uint8x16x4_t msg[2] =
|
||||
{
|
||||
vld1q_u8_x4(block + 0 * 16),
|
||||
vld1q_u8_x4(block + 4 * 16),
|
||||
};
|
||||
|
||||
uint64x2x4_t rk;
|
||||
uint64x2_t w[8];
|
||||
|
||||
DROUND( 0);
|
||||
DROUND( 1);
|
||||
DROUND( 2);
|
||||
DROUND( 3);
|
||||
|
||||
DROUND( 4);
|
||||
DROUND( 5);
|
||||
DROUND( 6);
|
||||
DROUND( 7);
|
||||
|
||||
DROUND( 8);
|
||||
DROUND( 9);
|
||||
DROUND(10);
|
||||
DROUND(11);
|
||||
|
||||
DROUND(12);
|
||||
DROUND(13);
|
||||
DROUND(14);
|
||||
DROUND(15);
|
||||
|
||||
DROUND(16);
|
||||
DROUND(17);
|
||||
DROUND(18);
|
||||
DROUND(19);
|
||||
|
||||
DROUND(20);
|
||||
DROUND(21);
|
||||
DROUND(22);
|
||||
DROUND(23);
|
||||
|
||||
DROUND(24);
|
||||
DROUND(25);
|
||||
DROUND(26);
|
||||
DROUND(27);
|
||||
|
||||
DROUND(28);
|
||||
DROUND(29);
|
||||
DROUND(30);
|
||||
DROUND(31);
|
||||
|
||||
DROUND(32);
|
||||
DROUND(33);
|
||||
DROUND(34);
|
||||
DROUND(35);
|
||||
|
||||
DROUND(36);
|
||||
DROUND(37);
|
||||
DROUND(38);
|
||||
DROUND(39);
|
||||
|
||||
// update next state
|
||||
vstate.val[0] = vaddq_u64(vstate.val[0], vlast.val[0]);
|
||||
vstate.val[1] = vaddq_u64(vstate.val[1], vlast.val[1]);
|
||||
vstate.val[2] = vaddq_u64(vstate.val[2], vlast.val[2]);
|
||||
vstate.val[3] = vaddq_u64(vstate.val[3], vlast.val[3]);
|
||||
|
||||
block += SHA512_BLOCK_SIZE;
|
||||
}
|
||||
while (--count);
|
||||
|
||||
// save the new state
|
||||
vst1q_u64_x4(state, vstate);
|
||||
|
||||
#undef DROUND
|
||||
#undef S
|
||||
#undef W
|
||||
}
|
||||
|
||||
#endif // defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
static void sha512_process(uint64_t* state, const uint8_t* block, size_t count)
|
||||
{
|
||||
#if defined(__x86_64__) || defined(_M_AMD64)
|
||||
@@ -267,6 +455,15 @@ static void sha512_process(uint64_t* state, const uint8_t* block, size_t count)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
int cpuid = sha512_cpuid();
|
||||
if (cpuid & SHA512_CPUID_ARM64)
|
||||
{
|
||||
sha512_process_arm64(state, block, count);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define Ch(x,y,z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x,y,z) ((x & y) | (z & (x | y)))
|
||||
|
||||
@@ -277,13 +474,13 @@ static void sha512_process(uint64_t* state, const uint8_t* block, size_t count)
|
||||
|
||||
#define W(i) w[(i+16)%16]
|
||||
|
||||
#define ROUND(i,a,b,c,d,e,f,g,h,K) do \
|
||||
#define ROUND(i,a,b,c,d,e,f,g,h) do \
|
||||
{ \
|
||||
uint64_t w0; \
|
||||
if (i < 16) W(i) = w0 = SHA512_GET64BE(block + i*sizeof(uint64_t)); \
|
||||
if (i >= 16) W(i) = w0 = SSig1(W(i-2)) + W(i-7) + SSig0(W(i-15)) + W(i-16); \
|
||||
\
|
||||
uint64_t t1 = h + BSig1(e) + Ch(e,f,g) + K + w0; \
|
||||
uint64_t t1 = h + BSig1(e) + Ch(e,f,g) + SHA512_K[i] + w0; \
|
||||
uint64_t t2 = BSig0(a) + Maj(a,b,c); \
|
||||
d += t1; \
|
||||
h = t1 + t2; \
|
||||
@@ -302,86 +499,86 @@ static void sha512_process(uint64_t* state, const uint8_t* block, size_t count)
|
||||
|
||||
uint64_t w[16];
|
||||
|
||||
ROUND( 0, a, b, c, d, e, f, g, h, 0x428a2f98d728ae22);
|
||||
ROUND( 1, h, a, b, c, d, e, f, g, 0x7137449123ef65cd);
|
||||
ROUND( 2, g, h, a, b, c, d, e, f, 0xb5c0fbcfec4d3b2f);
|
||||
ROUND( 3, f, g, h, a, b, c, d, e, 0xe9b5dba58189dbbc);
|
||||
ROUND( 4, e, f, g, h, a, b, c, d, 0x3956c25bf348b538);
|
||||
ROUND( 5, d, e, f, g, h, a, b, c, 0x59f111f1b605d019);
|
||||
ROUND( 6, c, d, e, f, g, h, a, b, 0x923f82a4af194f9b);
|
||||
ROUND( 7, b, c, d, e, f, g, h, a, 0xab1c5ed5da6d8118);
|
||||
ROUND( 8, a, b, c, d, e, f, g, h, 0xd807aa98a3030242);
|
||||
ROUND( 9, h, a, b, c, d, e, f, g, 0x12835b0145706fbe);
|
||||
ROUND(10, g, h, a, b, c, d, e, f, 0x243185be4ee4b28c);
|
||||
ROUND(11, f, g, h, a, b, c, d, e, 0x550c7dc3d5ffb4e2);
|
||||
ROUND(12, e, f, g, h, a, b, c, d, 0x72be5d74f27b896f);
|
||||
ROUND(13, d, e, f, g, h, a, b, c, 0x80deb1fe3b1696b1);
|
||||
ROUND(14, c, d, e, f, g, h, a, b, 0x9bdc06a725c71235);
|
||||
ROUND(15, b, c, d, e, f, g, h, a, 0xc19bf174cf692694);
|
||||
ROUND(16, a, b, c, d, e, f, g, h, 0xe49b69c19ef14ad2);
|
||||
ROUND(17, h, a, b, c, d, e, f, g, 0xefbe4786384f25e3);
|
||||
ROUND(18, g, h, a, b, c, d, e, f, 0x0fc19dc68b8cd5b5);
|
||||
ROUND(19, f, g, h, a, b, c, d, e, 0x240ca1cc77ac9c65);
|
||||
ROUND(20, e, f, g, h, a, b, c, d, 0x2de92c6f592b0275);
|
||||
ROUND(21, d, e, f, g, h, a, b, c, 0x4a7484aa6ea6e483);
|
||||
ROUND(22, c, d, e, f, g, h, a, b, 0x5cb0a9dcbd41fbd4);
|
||||
ROUND(23, b, c, d, e, f, g, h, a, 0x76f988da831153b5);
|
||||
ROUND(24, a, b, c, d, e, f, g, h, 0x983e5152ee66dfab);
|
||||
ROUND(25, h, a, b, c, d, e, f, g, 0xa831c66d2db43210);
|
||||
ROUND(26, g, h, a, b, c, d, e, f, 0xb00327c898fb213f);
|
||||
ROUND(27, f, g, h, a, b, c, d, e, 0xbf597fc7beef0ee4);
|
||||
ROUND(28, e, f, g, h, a, b, c, d, 0xc6e00bf33da88fc2);
|
||||
ROUND(29, d, e, f, g, h, a, b, c, 0xd5a79147930aa725);
|
||||
ROUND(30, c, d, e, f, g, h, a, b, 0x06ca6351e003826f);
|
||||
ROUND(31, b, c, d, e, f, g, h, a, 0x142929670a0e6e70);
|
||||
ROUND(32, a, b, c, d, e, f, g, h, 0x27b70a8546d22ffc);
|
||||
ROUND(33, h, a, b, c, d, e, f, g, 0x2e1b21385c26c926);
|
||||
ROUND(34, g, h, a, b, c, d, e, f, 0x4d2c6dfc5ac42aed);
|
||||
ROUND(35, f, g, h, a, b, c, d, e, 0x53380d139d95b3df);
|
||||
ROUND(36, e, f, g, h, a, b, c, d, 0x650a73548baf63de);
|
||||
ROUND(37, d, e, f, g, h, a, b, c, 0x766a0abb3c77b2a8);
|
||||
ROUND(38, c, d, e, f, g, h, a, b, 0x81c2c92e47edaee6);
|
||||
ROUND(39, b, c, d, e, f, g, h, a, 0x92722c851482353b);
|
||||
ROUND(40, a, b, c, d, e, f, g, h, 0xa2bfe8a14cf10364);
|
||||
ROUND(41, h, a, b, c, d, e, f, g, 0xa81a664bbc423001);
|
||||
ROUND(42, g, h, a, b, c, d, e, f, 0xc24b8b70d0f89791);
|
||||
ROUND(43, f, g, h, a, b, c, d, e, 0xc76c51a30654be30);
|
||||
ROUND(44, e, f, g, h, a, b, c, d, 0xd192e819d6ef5218);
|
||||
ROUND(45, d, e, f, g, h, a, b, c, 0xd69906245565a910);
|
||||
ROUND(46, c, d, e, f, g, h, a, b, 0xf40e35855771202a);
|
||||
ROUND(47, b, c, d, e, f, g, h, a, 0x106aa07032bbd1b8);
|
||||
ROUND(48, a, b, c, d, e, f, g, h, 0x19a4c116b8d2d0c8);
|
||||
ROUND(49, h, a, b, c, d, e, f, g, 0x1e376c085141ab53);
|
||||
ROUND(50, g, h, a, b, c, d, e, f, 0x2748774cdf8eeb99);
|
||||
ROUND(51, f, g, h, a, b, c, d, e, 0x34b0bcb5e19b48a8);
|
||||
ROUND(52, e, f, g, h, a, b, c, d, 0x391c0cb3c5c95a63);
|
||||
ROUND(53, d, e, f, g, h, a, b, c, 0x4ed8aa4ae3418acb);
|
||||
ROUND(54, c, d, e, f, g, h, a, b, 0x5b9cca4f7763e373);
|
||||
ROUND(55, b, c, d, e, f, g, h, a, 0x682e6ff3d6b2b8a3);
|
||||
ROUND(56, a, b, c, d, e, f, g, h, 0x748f82ee5defb2fc);
|
||||
ROUND(57, h, a, b, c, d, e, f, g, 0x78a5636f43172f60);
|
||||
ROUND(58, g, h, a, b, c, d, e, f, 0x84c87814a1f0ab72);
|
||||
ROUND(59, f, g, h, a, b, c, d, e, 0x8cc702081a6439ec);
|
||||
ROUND(60, e, f, g, h, a, b, c, d, 0x90befffa23631e28);
|
||||
ROUND(61, d, e, f, g, h, a, b, c, 0xa4506cebde82bde9);
|
||||
ROUND(62, c, d, e, f, g, h, a, b, 0xbef9a3f7b2c67915);
|
||||
ROUND(63, b, c, d, e, f, g, h, a, 0xc67178f2e372532b);
|
||||
ROUND(64, a, b, c, d, e, f, g, h, 0xca273eceea26619c);
|
||||
ROUND(65, h, a, b, c, d, e, f, g, 0xd186b8c721c0c207);
|
||||
ROUND(66, g, h, a, b, c, d, e, f, 0xeada7dd6cde0eb1e);
|
||||
ROUND(67, f, g, h, a, b, c, d, e, 0xf57d4f7fee6ed178);
|
||||
ROUND(68, e, f, g, h, a, b, c, d, 0x06f067aa72176fba);
|
||||
ROUND(69, d, e, f, g, h, a, b, c, 0x0a637dc5a2c898a6);
|
||||
ROUND(70, c, d, e, f, g, h, a, b, 0x113f9804bef90dae);
|
||||
ROUND(71, b, c, d, e, f, g, h, a, 0x1b710b35131c471b);
|
||||
ROUND(72, a, b, c, d, e, f, g, h, 0x28db77f523047d84);
|
||||
ROUND(73, h, a, b, c, d, e, f, g, 0x32caab7b40c72493);
|
||||
ROUND(74, g, h, a, b, c, d, e, f, 0x3c9ebe0a15c9bebc);
|
||||
ROUND(75, f, g, h, a, b, c, d, e, 0x431d67c49c100d4c);
|
||||
ROUND(76, e, f, g, h, a, b, c, d, 0x4cc5d4becb3e42b6);
|
||||
ROUND(77, d, e, f, g, h, a, b, c, 0x597f299cfc657e2a);
|
||||
ROUND(78, c, d, e, f, g, h, a, b, 0x5fcb6fab3ad6faec);
|
||||
ROUND(79, b, c, d, e, f, g, h, a, 0x6c44198c4a475817);
|
||||
ROUND( 0, a, b, c, d, e, f, g, h);
|
||||
ROUND( 1, h, a, b, c, d, e, f, g);
|
||||
ROUND( 2, g, h, a, b, c, d, e, f);
|
||||
ROUND( 3, f, g, h, a, b, c, d, e);
|
||||
ROUND( 4, e, f, g, h, a, b, c, d);
|
||||
ROUND( 5, d, e, f, g, h, a, b, c);
|
||||
ROUND( 6, c, d, e, f, g, h, a, b);
|
||||
ROUND( 7, b, c, d, e, f, g, h, a);
|
||||
ROUND( 8, a, b, c, d, e, f, g, h);
|
||||
ROUND( 9, h, a, b, c, d, e, f, g);
|
||||
ROUND(10, g, h, a, b, c, d, e, f);
|
||||
ROUND(11, f, g, h, a, b, c, d, e);
|
||||
ROUND(12, e, f, g, h, a, b, c, d);
|
||||
ROUND(13, d, e, f, g, h, a, b, c);
|
||||
ROUND(14, c, d, e, f, g, h, a, b);
|
||||
ROUND(15, b, c, d, e, f, g, h, a);
|
||||
ROUND(16, a, b, c, d, e, f, g, h);
|
||||
ROUND(17, h, a, b, c, d, e, f, g);
|
||||
ROUND(18, g, h, a, b, c, d, e, f);
|
||||
ROUND(19, f, g, h, a, b, c, d, e);
|
||||
ROUND(20, e, f, g, h, a, b, c, d);
|
||||
ROUND(21, d, e, f, g, h, a, b, c);
|
||||
ROUND(22, c, d, e, f, g, h, a, b);
|
||||
ROUND(23, b, c, d, e, f, g, h, a);
|
||||
ROUND(24, a, b, c, d, e, f, g, h);
|
||||
ROUND(25, h, a, b, c, d, e, f, g);
|
||||
ROUND(26, g, h, a, b, c, d, e, f);
|
||||
ROUND(27, f, g, h, a, b, c, d, e);
|
||||
ROUND(28, e, f, g, h, a, b, c, d);
|
||||
ROUND(29, d, e, f, g, h, a, b, c);
|
||||
ROUND(30, c, d, e, f, g, h, a, b);
|
||||
ROUND(31, b, c, d, e, f, g, h, a);
|
||||
ROUND(32, a, b, c, d, e, f, g, h);
|
||||
ROUND(33, h, a, b, c, d, e, f, g);
|
||||
ROUND(34, g, h, a, b, c, d, e, f);
|
||||
ROUND(35, f, g, h, a, b, c, d, e);
|
||||
ROUND(36, e, f, g, h, a, b, c, d);
|
||||
ROUND(37, d, e, f, g, h, a, b, c);
|
||||
ROUND(38, c, d, e, f, g, h, a, b);
|
||||
ROUND(39, b, c, d, e, f, g, h, a);
|
||||
ROUND(40, a, b, c, d, e, f, g, h);
|
||||
ROUND(41, h, a, b, c, d, e, f, g);
|
||||
ROUND(42, g, h, a, b, c, d, e, f);
|
||||
ROUND(43, f, g, h, a, b, c, d, e);
|
||||
ROUND(44, e, f, g, h, a, b, c, d);
|
||||
ROUND(45, d, e, f, g, h, a, b, c);
|
||||
ROUND(46, c, d, e, f, g, h, a, b);
|
||||
ROUND(47, b, c, d, e, f, g, h, a);
|
||||
ROUND(48, a, b, c, d, e, f, g, h);
|
||||
ROUND(49, h, a, b, c, d, e, f, g);
|
||||
ROUND(50, g, h, a, b, c, d, e, f);
|
||||
ROUND(51, f, g, h, a, b, c, d, e);
|
||||
ROUND(52, e, f, g, h, a, b, c, d);
|
||||
ROUND(53, d, e, f, g, h, a, b, c);
|
||||
ROUND(54, c, d, e, f, g, h, a, b);
|
||||
ROUND(55, b, c, d, e, f, g, h, a);
|
||||
ROUND(56, a, b, c, d, e, f, g, h);
|
||||
ROUND(57, h, a, b, c, d, e, f, g);
|
||||
ROUND(58, g, h, a, b, c, d, e, f);
|
||||
ROUND(59, f, g, h, a, b, c, d, e);
|
||||
ROUND(60, e, f, g, h, a, b, c, d);
|
||||
ROUND(61, d, e, f, g, h, a, b, c);
|
||||
ROUND(62, c, d, e, f, g, h, a, b);
|
||||
ROUND(63, b, c, d, e, f, g, h, a);
|
||||
ROUND(64, a, b, c, d, e, f, g, h);
|
||||
ROUND(65, h, a, b, c, d, e, f, g);
|
||||
ROUND(66, g, h, a, b, c, d, e, f);
|
||||
ROUND(67, f, g, h, a, b, c, d, e);
|
||||
ROUND(68, e, f, g, h, a, b, c, d);
|
||||
ROUND(69, d, e, f, g, h, a, b, c);
|
||||
ROUND(70, c, d, e, f, g, h, a, b);
|
||||
ROUND(71, b, c, d, e, f, g, h, a);
|
||||
ROUND(72, a, b, c, d, e, f, g, h);
|
||||
ROUND(73, h, a, b, c, d, e, f, g);
|
||||
ROUND(74, g, h, a, b, c, d, e, f);
|
||||
ROUND(75, f, g, h, a, b, c, d, e);
|
||||
ROUND(76, e, f, g, h, a, b, c, d);
|
||||
ROUND(77, d, e, f, g, h, a, b, c);
|
||||
ROUND(78, c, d, e, f, g, h, a, b);
|
||||
ROUND(79, b, c, d, e, f, g, h, a);
|
||||
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
|
||||
Reference in New Issue
Block a user