switch to martins hash

This commit is contained in:
Ryan Fleury
2025-10-08 12:58:59 -07:00
parent 7eb64dbd13
commit 96477c65dd
11 changed files with 1913 additions and 590 deletions
+31
View File
@@ -84,6 +84,37 @@
# define C_LINKAGE
#endif
////////////////////////////////
//~ rjf: Optimization Settings
#if COMPILER_MSVC
# define OPTIMIZE_BEGIN _Pragma("optimize(\"\", on)")
# define OPTIMIZE_END _Pragma("optimize(\"\", off)")
#elif COMPILER_CLANG
# define OPTIMIZE_BEGIN _Pragma("clang optimize on")
# define OPTIMIZE_END _Pragma("clang optimize off")
#elif COMPILER_GCC
# define OPTIMIZE_BEGIN _Pragma("GCC push_options") _Pragma("GCC optimize(\"O2\")")
# define OPTIMIZE_END _Pragma("GCC pop_options")
#else
# define OPTIMIZE_BEGIN
# define OPTIMIZE_END
#endif
#if COMPILER_MSVC && !BUILD_DEBUG
# define NO_OPTIMIZE_BEGIN _Pragma("optimize(\"\", off)")
# define NO_OPTIMIZE_END _Pragma("optimize(\"\", on)")
#elif COMPILER_CLANG && !BUILD_DEBUG
# define NO_OPTIMIZE_BEGIN _Pragma("clang optimize off")
# define NO_OPTIMIZE_END _Pragma("clang optimize on")
#elif COMPILER_GCC && !BUILD_DEBUG
# define NO_OPTIMIZE_BEGIN _Pragma("GCC push_options") _Pragma("GCC optimize(\"O0\")")
# define NO_OPTIMIZE_END _Pragma("GCC pop_options")
#else
# define NO_OPTIMIZE_BEGIN
# define NO_OPTIMIZE_END
#endif
////////////////////////////////
//~ rjf: Versions
+16 -19
View File
@@ -4,37 +4,34 @@
////////////////////////////////
//~ rjf: MD5
#if !defined(MD5_API)
# define MD5_API static
# include "third_party/md5/md5.c"
# include "third_party/md5/md5.h"
#endif
#include "third_party/martins_hash/md5.h"
internal MD5
md5_from_data(String8 data)
{
MD5_CTX ctx = {0};
MD5_Init(&ctx);
MD5_Update(&ctx, (void*)data.str, data.size);
md5_ctx ctx = {0};
md5_init(&ctx);
md5_update(&ctx, (void*)data.str, data.size);
MD5 result = {0};
MD5_Final(result.u8, &ctx);
md5_finish(&ctx, result.u8);
return result;
}
////////////////////////////////
//~ rjf: SHA1
//~ rjf: SHA
#include "third_party/tomcrypt_hash/tomcrypt_hash.h"
#include "third_party/martins_hash/sha1.h"
#include "third_party/martins_hash/sha256.h"
internal SHA1
sha1_from_data(String8 data)
{
SHA1 result = {0};
{
SHA1State state = {0};
sha1_init(&state);
sha1_process(&state, data.str, data.size);
sha1_done(&state, result.u8);
sha1_ctx ctx = {0};
sha1_init(&ctx);
sha1_update(&ctx, data.str, data.size);
sha1_finish(&ctx, result.u8);
}
return result;
}
@@ -44,10 +41,10 @@ sha256_from_data(String8 data)
{
SHA256 result = {0};
{
SHA256State state = {0};
sha256_init(&state);
sha256_process(&state, data.str, data.size);
sha256_done(&state, result.u8);
sha256_ctx ctx = {0};
sha256_init(&ctx);
sha256_update(&ctx, data.str, data.size);
sha256_finish(&ctx, result.u8);
}
return result;
}
+2 -1
View File
@@ -6136,13 +6136,14 @@ internal C_Key
ctrl_key_from_process_vaddr_range(CTRL_Handle process, Rng1U64 vaddr_range, B32 zero_terminated, B32 wait_for_fresh, U64 endt_us, B32 *out_is_stale)
{
ProfBeginFunction();
#pragma pack(push, 1)
struct
{
CTRL_Handle process;
Rng1U64 vaddr_range;
B32 zero_terminated;
B32 _padding_;
} key_data = {process, vaddr_range, zero_terminated};
#pragma pack(pop)
String8 key = str8_struct(&key_data);
Access *access = access_open();
AC_Artifact artifact = ac_artifact_from_key(access, key, ctrl_memory_artifact_create, ctrl_memory_artifact_destroy, endt_us,
+4 -3
View File
@@ -5779,10 +5779,11 @@ rd_store_view_loading_info(B32 is_loading, U64 progress_u64, U64 progress_u64_ta
{
RD_Cfg *view = rd_cfg_from_id(rd_regs()->view);
RD_ViewState *view_state = rd_view_state_from_cfg(view);
B32 loading_state_is_new = (is_loading && view_state->loading_t_target != (F32)!!is_loading);
view_state->loading_t_target = (F32)!!is_loading;
view_state->loading_progress_v = progress_u64;
view_state->loading_progress_v_target = progress_u64_target;
if(view_state->last_frame_index_built+1 < rd_state->frame_index)
if(loading_state_is_new || view_state->last_frame_index_built+1 < rd_state->frame_index)
{
view_state->loading_t = view_state->loading_t_target;
}
@@ -5983,7 +5984,7 @@ rd_window_state_from_os_handle(OS_Handle os)
}
#if COMPILER_MSVC && !BUILD_DEBUG
#pragma optimize("", off)
NO_OPTIMIZE_BEGIN
#endif
internal void
@@ -9954,7 +9955,7 @@ rd_window_frame(void)
}
#if COMPILER_MSVC && !BUILD_DEBUG
#pragma optimize("", on)
NO_OPTIMIZE_END
#endif
////////////////////////////////
+2
View File
@@ -4024,12 +4024,14 @@ RD_VIEW_UI_FUNCTION_DEF(bitmap)
for EachIndex(rewind_idx, C_KEY_HASH_HISTORY_COUNT)
{
U128 hash = c_hash_from_key(texture_key, rewind_idx);
#pragma pack(push, 1)
struct
{
U128 hash;
RD_BitmapTopology top;
}
key_data = {hash, topology};
#pragma pack(pop)
String8 key = str8_struct(&key_data);
AC_Artifact artifact = ac_artifact_from_key(access, key, rd_bitmap_artifact_create, rd_bitmap_artifact_destroy, 0);
R_Handle texture_candidate = {0};
+2
View File
@@ -2250,11 +2250,13 @@ txt_artifact_destroy(AC_Artifact artifact)
internal TXT_TextInfo
txt_text_info_from_hash_lang(Access *access, U128 hash, TXT_LangKind lang)
{
#pragma pack(push, 1)
struct
{
U128 hash;
TXT_LangKind lang;
} key = {hash, lang};
#pragma pack(pop)
String8 key_string = str8_struct(&key);
AC_Artifact artifact = ac_artifact_from_key(access, key_string, txt_artifact_create, txt_artifact_destroy, 0, .flags = AC_Flag_Wide);
TXT_Artifact *txt_artifact = (TXT_Artifact *)artifact.u64[0];
+435
View File
@@ -0,0 +1,435 @@
#pragma once
// https://www.rfc-editor.org/rfc/rfc1321.html
#include <stddef.h>
#include <stdint.h>
//
// interface
//
#define MD5_DIGEST_SIZE 16
#define MD5_BLOCK_SIZE 64
typedef struct {
uint8_t buffer[MD5_BLOCK_SIZE];
uint64_t count;
uint32_t state[4];
} md5_ctx;
static inline void md5_init(md5_ctx* ctx);
static inline void md5_update(md5_ctx* ctx, const void* data, size_t size);
static inline void md5_finish(md5_ctx* ctx, uint8_t digest[MD5_DIGEST_SIZE]);
//
// implementation
//
#include <string.h> // memcpy, memset
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wcast-align"
# pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
# pragma clang diagnostic ignored "-Wlanguage-extension-token"
# pragma clang diagnostic ignored "-Wdeclaration-after-statement"
#endif
#if defined(__clang__)
# define MD5_ROL32(x,n) __builtin_rotateleft32(x, n)
#elif defined(_MSC_VER)
# include <stdlib.h>
# define MD5_ROL32(x,n) _rotl(x, n)
#else
# define MD5_ROL32(x,n) ( ((x) << (n)) | ((x) >> (32-(n))) )
#endif
#if defined(_MSC_VER)
# define MD5_GET32LE(ptr) *((const _UNALIGNED uint32_t*)(ptr))
# define MD5_SET32LE(ptr,x) *((_UNALIGNED uint32_t*)(ptr)) = (x)
# define MD5_SET64LE(ptr,x) *((_UNALIGNED uint64_t*)(ptr)) = (x)
#else
# define MD5_GET32LE(ptr) \
( \
((ptr)[0] << 0) | \
((ptr)[1] << 8) | \
((ptr)[2] << 16) | \
((ptr)[3] << 24) \
)
# define MD5_SET32LE(ptr, x) do \
{ \
(ptr)[0] = (uint8_t)((x) >> 0); \
(ptr)[1] = (uint8_t)((x) >> 8); \
(ptr)[2] = (uint8_t)((x) >> 16); \
(ptr)[3] = (uint8_t)((x) >> 24); \
} \
while (0)
# define MD5_SET64LE(ptr, x) do \
{ \
(ptr)[0] = (uint8_t)((x) >> 0); \
(ptr)[1] = (uint8_t)((x) >> 8); \
(ptr)[2] = (uint8_t)((x) >> 16); \
(ptr)[3] = (uint8_t)((x) >> 24); \
(ptr)[4] = (uint8_t)((x) >> 32); \
(ptr)[5] = (uint8_t)((x) >> 40); \
(ptr)[6] = (uint8_t)((x) >> 48); \
(ptr)[7] = (uint8_t)((x) >> 56); \
} \
while (0)
#endif
// MD5_COMPILER_BARRIER forces clang to do better codegen without spilling registers to stack too much
#if defined(__clang__) || defined(__GNUC__)
# define MD5_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
#else
# define MD5_COMPILER_BARRIER()
#endif
#if defined(__x86_64__) || defined(_M_AMD64)
#if defined(__clang__) || defined(__GNUC__)
# include <cpuid.h>
# define MD5_TARGET(str) __attribute__((target(str)))
# define MD5_CPUID_EX(x, y, info) __cpuid_count(x, y, info[0], info[1], info[2], info[3])
# define MD5_ANDN_U32(x,y) (~(x) & (y))
#else
# include <intrin.h>
# define MD5_TARGET(str)
# define MD5_CPUID_EX(x, y, info) __cpuidex(info, x, y)
# define MD5_ANDN_U32(x,y) _andn_u32(x,y)
#endif
#if defined(__clang__)
# define MD5_RORX_U32(x,n) __builtin_rotateright32(x, n)
#elif defined(_MSC_VER)
# define MD5_RORX_U32(x,n) _rorx_u32(x,n)
#else
# define MD5_RORX_U32(x,n) ( ((x) >> (n)) | ((x) << (32-(n))) )
#endif
#define MD5_CPUID_INIT (1 << 0)
#define MD5_CPUID_BMI2 (1 << 1)
static inline int md5_cpuid(void)
{
static int cpuid;
int result = cpuid;
if (result == 0)
{
int info[4];
MD5_CPUID_EX(7, 0, info);
int has_bmi = info[1] & (1 << 3);
int has_bmi2 = info[1] & (1 << 8);
result |= MD5_CPUID_INIT;
if (has_bmi && has_bmi2)
{
result |= MD5_CPUID_BMI2;
}
cpuid = result;
}
#if defined(MD5_CPUID_MASK)
result &= MD5_CPUID_MASK;
#endif
return result;
}
MD5_TARGET("bmi,bmi2,tune=znver1")
static void md5_process_bmi2(uint32_t* state, const uint8_t* block, size_t count)
{
// "tune=znver1" allows clang to use LEA with [reg+reg+imm] operand which helps performance on modern CPU's
// -1 in I will get folded together with constant k
#define F(x,y,z) (x & y) + MD5_ANDN_U32(x, z)
#define G(x,y,z) (x & z) + MD5_ANDN_U32(z, y)
#define H(x,y,z) (x ^ y ^ z)
#define I(x,y,z) 0 - 1 - (y ^ MD5_ANDN_U32(x, z))
#define X(i) MD5_GET32LE(block + i*sizeof(uint32_t))
#define ROUND(F, a, b, c, d, x, k, r) do { \
a += (k) + F(b, c, d) + (x); \
a = MD5_RORX_U32(a, 32-r) + b; \
} while (0)
#define QROUND_F(x0, x1, x2, x3, k0, k1, k2, k3) do { \
ROUND(F, a, b, c, d, X(x0), k0, 7); \
ROUND(F, d, a, b, c, X(x1), k1, 12); \
ROUND(F, c, d, a, b, X(x2), k2, 17); \
ROUND(F, b, c, d, a, X(x3), k3, 22); \
} while (0)
#define QROUND_G(x0, x1, x2, x3, k0, k1, k2, k3) do { \
ROUND(G, a, b, c, d, X(x0), k0, 5); \
ROUND(G, d, a, b, c, X(x1), k1, 9); \
ROUND(G, c, d, a, b, X(x2), k2, 14); \
ROUND(G, b, c, d, a, X(x3), k3, 20); \
} while (0)
#define QROUND_H(x0, x1, x2, x3, k0, k1, k2, k3) do { \
ROUND(H, a, b, c, d, X(x0), k0, 4); \
ROUND(H, d, a, b, c, X(x1), k1, 11); \
ROUND(H, c, d, a, b, X(x2), k2, 16); \
ROUND(H, b, c, d, a, X(x3), k3, 23); \
} while (0)
#define QROUND_I(x0, x1, x2, x3, k0, k1, k2, k3) do { \
ROUND(I, a, b, c, d, X(x0), k0, 6); \
ROUND(I, d, a, b, c, X(x1), k1, 10); \
ROUND(I, c, d, a, b, X(x2), k2, 15); \
ROUND(I, b, c, d, a, X(x3), k3, 21); \
} while (0)
uint32_t a = state[0];
uint32_t b = state[1];
uint32_t c = state[2];
uint32_t d = state[3];
do
{
uint32_t last_a = a;
uint32_t last_b = b;
uint32_t last_c = c;
uint32_t last_d = d;
QROUND_F( 0, 1, 2, 3, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee);
QROUND_F( 4, 5, 6, 7, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501);
QROUND_F( 8, 9, 10, 11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be);
QROUND_F(12, 13, 14, 15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821);
MD5_COMPILER_BARRIER();
QROUND_G( 1, 6, 11, 0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa);
QROUND_G( 5, 10, 15, 4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8);
QROUND_G( 9, 14, 3, 8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed);
QROUND_G(13, 2, 7, 12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a);
MD5_COMPILER_BARRIER();
QROUND_H( 5, 8, 11, 14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c);
QROUND_H( 1, 4, 7, 10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70);
QROUND_H(13, 0, 3, 6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05);
QROUND_H( 9, 12, 15, 2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665);
MD5_COMPILER_BARRIER();
QROUND_I( 0, 7, 14, 5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039);
QROUND_I(12, 3, 10, 1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1);
QROUND_I( 8, 15, 6, 13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1);
QROUND_I( 4, 11, 2, 9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391);
MD5_COMPILER_BARRIER();
a += last_a;
b += last_b;
c += last_c;
d += last_d;
block += MD5_BLOCK_SIZE;
}
while (--count);
state[0] = a;
state[1] = b;
state[2] = c;
state[3] = d;
#undef QROUND_F
#undef QROUND_G
#undef QROUND_H
#undef QROUND_I
#undef ROUND
#undef X
#undef F
#undef G
#undef H
#undef I
}
#endif // defined(__x86_64__) || defined(_M_AMD64)
static void md5_process(uint32_t* state, const uint8_t* block, size_t count)
{
#if defined(__x86_64__) || defined(_M_AMD64)
int cpuid = md5_cpuid();
if (cpuid & MD5_CPUID_BMI2)
{
md5_process_bmi2(state, block, count);
return;
}
#endif
// F function uses 3 operations instead of 4 when "bit select" instruction is not available
// (x & y) | (~x & z) == (z ^ (x & (y ^ z))
// G function uses + instead of | for better ILP
// #define F(x,y,z) ((x & y) | (~x & z))
#define F(x,y,z) (z ^ (x & (y ^ z)))
#define G(x,y,z) (x & z) + (y & ~z)
#define H(x,y,z) (x ^ y ^ z)
#define I(x,y,z) (y ^ (x | ~z))
#define X(i) MD5_GET32LE(block + i*sizeof(uint32_t))
#define ROUND(F, a, b, c, d, x, k, r) do { \
a += F(b, c, d) + (x) + (k); \
a = MD5_ROL32(a, r) + b; \
} while (0)
#define QROUND_F(x0, x1, x2, x3, k0, k1, k2, k3) do { \
ROUND(F, a, b, c, d, X(x0), k0, 7); \
ROUND(F, d, a, b, c, X(x1), k1, 12); \
ROUND(F, c, d, a, b, X(x2), k2, 17); \
ROUND(F, b, c, d, a, X(x3), k3, 22); \
} while (0)
#define QROUND_G(x0, x1, x2, x3, k0, k1, k2, k3) do { \
ROUND(G, a, b, c, d, X(x0), k0, 5); \
ROUND(G, d, a, b, c, X(x1), k1, 9); \
ROUND(G, c, d, a, b, X(x2), k2, 14); \
ROUND(G, b, c, d, a, X(x3), k3, 20); \
} while (0)
#define QROUND_H(x0, x1, x2, x3, k0, k1, k2, k3) do { \
ROUND(H, a, b, c, d, X(x0), k0, 4); \
ROUND(H, d, a, b, c, X(x1), k1, 11); \
ROUND(H, c, d, a, b, X(x2), k2, 16); \
ROUND(H, b, c, d, a, X(x3), k3, 23); \
} while (0)
#define QROUND_I(x0, x1, x2, x3, k0, k1, k2, k3) do { \
ROUND(I, a, b, c, d, X(x0), k0, 6); \
ROUND(I, d, a, b, c, X(x1), k1, 10); \
ROUND(I, c, d, a, b, X(x2), k2, 15); \
ROUND(I, b, c, d, a, X(x3), k3, 21); \
} while (0)
uint32_t a = state[0];
uint32_t b = state[1];
uint32_t c = state[2];
uint32_t d = state[3];
do
{
uint32_t last_a = a;
uint32_t last_b = b;
uint32_t last_c = c;
uint32_t last_d = d;
QROUND_F( 0, 1, 2, 3, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee);
QROUND_F( 4, 5, 6, 7, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501);
QROUND_F( 8, 9, 10, 11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be);
QROUND_F(12, 13, 14, 15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821);
MD5_COMPILER_BARRIER();
QROUND_G( 1, 6, 11, 0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa);
QROUND_G( 5, 10, 15, 4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8);
QROUND_G( 9, 14, 3, 8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed);
QROUND_G(13, 2, 7, 12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a);
MD5_COMPILER_BARRIER();
QROUND_H( 5, 8, 11, 14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c);
QROUND_H( 1, 4, 7, 10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70);
QROUND_H(13, 0, 3, 6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05);
QROUND_H( 9, 12, 15, 2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665);
MD5_COMPILER_BARRIER();
QROUND_I( 0, 7, 14, 5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039);
QROUND_I(12, 3, 10, 1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1);
QROUND_I( 8, 15, 6, 13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1);
QROUND_I( 4, 11, 2, 9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391);
MD5_COMPILER_BARRIER();
a += last_a;
b += last_b;
c += last_c;
d += last_d;
block += MD5_BLOCK_SIZE;
}
while (--count);
state[0] = a;
state[1] = b;
state[2] = c;
state[3] = d;
#undef QROUND_F
#undef QROUND_G
#undef QROUND_H
#undef QROUND_I
#undef ROUND
#undef X
#undef F
#undef G
#undef H
#undef I
}
void md5_init(md5_ctx* ctx)
{
ctx->count = 0;
ctx->state[0] = 0x67452301;
ctx->state[1] = 0xefcdab89;
ctx->state[2] = 0x98badcfe;
ctx->state[3] = 0x10325476;
}
void md5_update(md5_ctx* ctx, const void* data, size_t size)
{
const uint8_t* buffer = (const uint8_t*)data;
size_t pending = ctx->count % MD5_BLOCK_SIZE;
ctx->count += size;
size_t available = MD5_BLOCK_SIZE - pending;
if (pending && size >= available)
{
memcpy(ctx->buffer + pending, buffer, available);
md5_process(ctx->state, ctx->buffer, 1);
buffer += available;
size -= available;
pending = 0;
}
size_t count = size / MD5_BLOCK_SIZE;
if (count)
{
md5_process(ctx->state, buffer, count);
buffer += count * MD5_BLOCK_SIZE;
size -= count * MD5_BLOCK_SIZE;
}
memcpy(ctx->buffer + pending, buffer, size);
}
void md5_finish(md5_ctx* ctx, uint8_t digest[MD5_DIGEST_SIZE])
{
uint64_t count = ctx->count;
uint64_t bitcount = count * 8;
size_t pending = count % MD5_BLOCK_SIZE;
size_t blocks = pending < MD5_BLOCK_SIZE - sizeof(bitcount) ? 1 : 2;
ctx->buffer[pending++] = 0x80;
uint8_t padding[2 * MD5_BLOCK_SIZE];
memcpy(padding, ctx->buffer, MD5_BLOCK_SIZE);
memset(padding + pending, 0, MD5_BLOCK_SIZE);
MD5_SET64LE(padding + blocks * MD5_BLOCK_SIZE - sizeof(bitcount), bitcount);
md5_process(ctx->state, padding, blocks);
for (size_t i=0; i<4; i++)
{
MD5_SET32LE(digest + i*sizeof(uint32_t), ctx->state[i]);
}
}
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
+441
View File
@@ -0,0 +1,441 @@
#pragma once
// https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
#include <stddef.h>
#include <stdint.h>
//
// interface
//
#define SHA1_DIGEST_SIZE 20
#define SHA1_BLOCK_SIZE 64
typedef struct {
uint8_t buffer[SHA1_BLOCK_SIZE];
uint64_t count;
uint32_t state[5];
} sha1_ctx;
static inline void sha1_init(sha1_ctx* ctx);
static inline void sha1_update(sha1_ctx* ctx, const void* data, size_t size);
static inline void sha1_finish(sha1_ctx* ctx, uint8_t digest[SHA1_DIGEST_SIZE]);
//
// implementation
//
#include <string.h> // memcpy, memset
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wcast-align"
# pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
# pragma clang diagnostic ignored "-Wlanguage-extension-token"
# pragma clang diagnostic ignored "-Wdeclaration-after-statement"
#elif defined(_MSC_VER)
# pragma warning (push)
# pragma warning (disable : 4127)
#endif
#if defined(__clang__)
# define SHA1_ROL32(x,n) __builtin_rotateleft32(x, n)
#elif defined(_MSC_VER)
# include <stdlib.h>
# define SHA1_ROL32(x,n) _rotl(x, n)
#else
# define SHA1_ROL32(x,n) ( ((x) << (n)) | ((x) >> (32-(n))) )
#endif
#if defined(_MSC_VER)
# include <stdlib.h>
# define SHA1_GET32BE(ptr) _byteswap_ulong( *((const _UNALIGNED uint32_t*)(ptr)) )
# define SHA1_SET32BE(ptr,x) *((_UNALIGNED uint32_t*)(ptr)) = _byteswap_ulong(x)
# define SHA1_SET64BE(ptr,x) *((_UNALIGNED uint64_t*)(ptr)) = _byteswap_uint64(x)
#else
# define SHA1_GET32BE(ptr) \
( \
((ptr)[0] << 24) | \
((ptr)[1] << 16) | \
((ptr)[2] << 8) | \
((ptr)[3] << 0) \
)
# define SHA1_SET32BE(ptr, x) do \
{ \
(ptr)[0] = (uint8_t)((x) >> 24); \
(ptr)[1] = (uint8_t)((x) >> 16); \
(ptr)[2] = (uint8_t)((x) >> 8); \
(ptr)[3] = (uint8_t)((x) >> 0); \
} \
while (0)
# define SHA1_SET64BE(ptr, x) do \
{ \
(ptr)[0] = (uint8_t)((x) >> 56); \
(ptr)[1] = (uint8_t)((x) >> 48); \
(ptr)[2] = (uint8_t)((x) >> 40); \
(ptr)[3] = (uint8_t)((x) >> 32); \
(ptr)[4] = (uint8_t)((x) >> 24); \
(ptr)[5] = (uint8_t)((x) >> 16); \
(ptr)[6] = (uint8_t)((x) >> 8); \
(ptr)[7] = (uint8_t)((x) >> 0); \
} \
while (0)
#endif
#if defined(__x86_64__) || defined(_M_AMD64)
#include <tmmintrin.h> // SSSE3
#include <immintrin.h> // SHANI
#if defined(__clang__) || defined(__GNUC__)
# include <cpuid.h>
# define SHA1_TARGET(str) __attribute__((target(str)))
# define SHA1_CPUID(x, info) __cpuid(x, info[0], info[1], info[2], info[3])
# define SHA1_CPUID_EX(x, y, info) __cpuid_count(x, y, info[0], info[1], info[2], info[3])
#else
# include <intrin.h>
# define SHA1_TARGET(str)
# define SHA1_CPUID(x, info) __cpuid(info, x)
# define SHA1_CPUID_EX(x, y, info) __cpuidex(info, x, y)
#endif
#define SHA1_CPUID_INIT (1 << 0)
#define SHA1_CPUID_SHANI (1 << 1)
static inline int sha1_cpuid(void)
{
static int cpuid;
int result = cpuid;
if (result == 0)
{
int info[4];
SHA1_CPUID(1, info);
int has_ssse3 = info[3] & (1 << 9);
SHA1_CPUID_EX(7, 0, info);
int has_shani = info[1] & (1 << 29);
result |= SHA1_CPUID_INIT;
if (has_ssse3 && has_shani)
{
result |= SHA1_CPUID_SHANI;
}
cpuid = result;
}
#if defined(SHA1_CPUID_MASK)
result &= SHA1_CPUID_MASK;
#endif
return result;
}
SHA1_TARGET("ssse3,sha")
static void sha1_process_shani(uint32_t* state, const uint8_t* block, size_t count)
{
const __m128i* buffer = (const __m128i*)block;
// for performing two operations in one:
// 1) dwords need to be loaded as big-endian
// 2) order of dwords need to be reversed for sha instructions: [0,1,2,3] -> [3,2,1,0]
const __m128i bswap = _mm_setr_epi8(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
#define W(i) w[(i)%4]
// 4 wide round calculations
#define QROUND(i) do { \
/* first four rounds loads input message */ \
if (i < 4) W(i) = _mm_shuffle_epi8(_mm_loadu_si128(&buffer[i]), bswap); \
/* update previous message dwords for next rounds */ \
if (i > 0 && i < 17) W(i-1) = _mm_sha1msg1_epu32(W(i-1), W(i)); \
if (i > 1 && i < 18) W(i-2) = _mm_xor_si128(W(i-2), W(i)); \
if (i > 2 && i < 19) W(i-3) = _mm_sha1msg2_epu32(W(i-3), W(i)); \
/* calculate E from message dwords */ \
if (i == 0) tmp = _mm_add_epi32(e0, W(i)); \
if (i != 0) tmp = _mm_sha1nexte_epu32(e0, W(i)); \
/* round function */ \
e0 = abcd; \
abcd = _mm_sha1rnds4_epu32(abcd, tmp, (i/5)%4); \
} while(0)
// load initial state
__m128i abcd = _mm_loadu_si128((const __m128i*)state); // [d,c,b,a]
__m128i e0 = _mm_loadu_si32(&state[4]); // [0,0,0,e]
// change dword order
abcd = _mm_shuffle_epi32(abcd, _MM_SHUFFLE(0,1,2,3)); // [a,b,c,d] where a is in the top lane
e0 = _mm_slli_si128(e0, 12); // [e,0,0,0] where e is in top lane
do
{
// remember current state
__m128i last_abcd = abcd;
__m128i last_e0 = e0;
__m128i tmp, w[4];
QROUND(0);
QROUND(1);
QROUND(2);
QROUND(3);
QROUND(4);
QROUND(5);
QROUND(6);
QROUND(7);
QROUND(8);
QROUND(9);
QROUND(10);
QROUND(11);
QROUND(12);
QROUND(13);
QROUND(14);
QROUND(15);
QROUND(16);
QROUND(17);
QROUND(18);
QROUND(19);
// update next state
abcd = _mm_add_epi32(abcd, last_abcd);
e0 = _mm_sha1nexte_epu32(e0, last_e0);
buffer += 4;
}
while (--count);
// restore dword order
abcd = _mm_shuffle_epi32(abcd, _MM_SHUFFLE(0,1,2,3));
e0 = _mm_shuffle_epi32(e0, _MM_SHUFFLE(0,1,2,3));
// save the new state
_mm_storeu_si128((__m128i*)state, abcd);
_mm_storeu_si32(&state[4], e0);
#undef QROUND
#undef W
}
#endif // defined(__x86_64__) || defined(_M_AMD64)
static void sha1_process(uint32_t* state, const uint8_t* block, size_t count)
{
#if defined(__x86_64__) || defined(_M_AMD64)
int cpuid = sha1_cpuid();
if (cpuid & SHA1_CPUID_SHANI)
{
sha1_process_shani(state, block, count);
return;
}
#endif
#define F1(x,y,z) (0x5a827999 + ((x & (y ^ z)) ^ z))
#define F2(x,y,z) (0x6ed9eba1 + (x ^ y ^ z))
#define F3(x,y,z) (0x8f1bbcdc + ((x & y) | (z & (x | y))))
#define F4(x,y,z) (0xca62c1d6 + (x ^ y ^ z))
#define W(i) w[(i+16)%16]
#define ROUND(i,a,b,c,d,e,F) do \
{ \
uint32_t w0; \
if (i < 16) W(i) = w0 = SHA1_GET32BE(block + i*sizeof(uint32_t)); \
if (i >= 16) W(i) = w0 = SHA1_ROL32(W(i-3) ^ W(i-8) ^ W(i-14) ^ W(i-16), 1); \
\
e += SHA1_ROL32(a,5) + F(b,c,d) + w0; \
b = SHA1_ROL32(b,30); \
} while (0)
uint32_t a = state[0];
uint32_t b = state[1];
uint32_t c = state[2];
uint32_t d = state[3];
uint32_t e = state[4];
do
{
uint32_t last_a = a;
uint32_t last_b = b;
uint32_t last_c = c;
uint32_t last_d = d;
uint32_t last_e = e;
uint32_t w[16];
ROUND( 0, a, b, c, d, e, F1);
ROUND( 1, e, a, b, c, d, F1);
ROUND( 2, d, e, a, b, c, F1);
ROUND( 3, c, d, e, a, b, F1);
ROUND( 4, b, c, d, e, a, F1);
ROUND( 5, a, b, c, d, e, F1);
ROUND( 6, e, a, b, c, d, F1);
ROUND( 7, d, e, a, b, c, F1);
ROUND( 8, c, d, e, a, b, F1);
ROUND( 9, b, c, d, e, a, F1);
ROUND(10, a, b, c, d, e, F1);
ROUND(11, e, a, b, c, d, F1);
ROUND(12, d, e, a, b, c, F1);
ROUND(13, c, d, e, a, b, F1);
ROUND(14, b, c, d, e, a, F1);
ROUND(15, a, b, c, d, e, F1);
ROUND(16, e, a, b, c, d, F1);
ROUND(17, d, e, a, b, c, F1);
ROUND(18, c, d, e, a, b, F1);
ROUND(19, b, c, d, e, a, F1);
ROUND(20, a, b, c, d, e, F2);
ROUND(21, e, a, b, c, d, F2);
ROUND(22, d, e, a, b, c, F2);
ROUND(23, c, d, e, a, b, F2);
ROUND(24, b, c, d, e, a, F2);
ROUND(25, a, b, c, d, e, F2);
ROUND(26, e, a, b, c, d, F2);
ROUND(27, d, e, a, b, c, F2);
ROUND(28, c, d, e, a, b, F2);
ROUND(29, b, c, d, e, a, F2);
ROUND(30, a, b, c, d, e, F2);
ROUND(31, e, a, b, c, d, F2);
ROUND(32, d, e, a, b, c, F2);
ROUND(33, c, d, e, a, b, F2);
ROUND(34, b, c, d, e, a, F2);
ROUND(35, a, b, c, d, e, F2);
ROUND(36, e, a, b, c, d, F2);
ROUND(37, d, e, a, b, c, F2);
ROUND(38, c, d, e, a, b, F2);
ROUND(39, b, c, d, e, a, F2);
ROUND(40, a, b, c, d, e, F3);
ROUND(41, e, a, b, c, d, F3);
ROUND(42, d, e, a, b, c, F3);
ROUND(43, c, d, e, a, b, F3);
ROUND(44, b, c, d, e, a, F3);
ROUND(45, a, b, c, d, e, F3);
ROUND(46, e, a, b, c, d, F3);
ROUND(47, d, e, a, b, c, F3);
ROUND(48, c, d, e, a, b, F3);
ROUND(49, b, c, d, e, a, F3);
ROUND(50, a, b, c, d, e, F3);
ROUND(51, e, a, b, c, d, F3);
ROUND(52, d, e, a, b, c, F3);
ROUND(53, c, d, e, a, b, F3);
ROUND(54, b, c, d, e, a, F3);
ROUND(55, a, b, c, d, e, F3);
ROUND(56, e, a, b, c, d, F3);
ROUND(57, d, e, a, b, c, F3);
ROUND(58, c, d, e, a, b, F3);
ROUND(59, b, c, d, e, a, F3);
ROUND(60, a, b, c, d, e, F4);
ROUND(61, e, a, b, c, d, F4);
ROUND(62, d, e, a, b, c, F4);
ROUND(63, c, d, e, a, b, F4);
ROUND(64, b, c, d, e, a, F4);
ROUND(65, a, b, c, d, e, F4);
ROUND(66, e, a, b, c, d, F4);
ROUND(67, d, e, a, b, c, F4);
ROUND(68, c, d, e, a, b, F4);
ROUND(69, b, c, d, e, a, F4);
ROUND(70, a, b, c, d, e, F4);
ROUND(71, e, a, b, c, d, F4);
ROUND(72, d, e, a, b, c, F4);
ROUND(73, c, d, e, a, b, F4);
ROUND(74, b, c, d, e, a, F4);
ROUND(75, a, b, c, d, e, F4);
ROUND(76, e, a, b, c, d, F4);
ROUND(77, d, e, a, b, c, F4);
ROUND(78, c, d, e, a, b, F4);
ROUND(79, b, c, d, e, a, F4);
a += last_a;
b += last_b;
c += last_c;
d += last_d;
e += last_e;
block += SHA1_BLOCK_SIZE;
}
while (--count);
state[0] = a;
state[1] = b;
state[2] = c;
state[3] = d;
state[4] = e;
#undef ROUND
#undef W
#undef F1
#undef F2
#undef F3
#undef F4
}
void sha1_init(sha1_ctx* ctx)
{
ctx->count = 0;
ctx->state[0] = 0x67452301;
ctx->state[1] = 0xefcdab89;
ctx->state[2] = 0x98badcfe;
ctx->state[3] = 0x10325476;
ctx->state[4] = 0xc3d2e1f0;
}
void sha1_update(sha1_ctx* ctx, const void* data, size_t size)
{
const uint8_t* buffer = (const uint8_t*)data;
size_t pending = ctx->count % SHA1_BLOCK_SIZE;
ctx->count += size;
size_t available = SHA1_BLOCK_SIZE - pending;
if (pending && size >= available)
{
memcpy(ctx->buffer + pending, buffer, available);
sha1_process(ctx->state, ctx->buffer, 1);
buffer += available;
size -= available;
pending = 0;
}
size_t count = size / SHA1_BLOCK_SIZE;
if (count)
{
sha1_process(ctx->state, buffer, count);
buffer += count * SHA1_BLOCK_SIZE;
size -= count * SHA1_BLOCK_SIZE;
}
memcpy(ctx->buffer + pending, buffer, size);
}
void sha1_finish(sha1_ctx* ctx, uint8_t digest[SHA1_DIGEST_SIZE])
{
uint64_t count = ctx->count;
uint64_t bitcount = count * 8;
size_t pending = count % SHA1_BLOCK_SIZE;
size_t blocks = pending < SHA1_BLOCK_SIZE - sizeof(bitcount) ? 1 : 2;
ctx->buffer[pending++] = 0x80;
uint8_t padding[2 * SHA1_BLOCK_SIZE];
memcpy(padding, ctx->buffer, SHA1_BLOCK_SIZE);
memset(padding + pending, 0, SHA1_BLOCK_SIZE);
SHA1_SET64BE(padding + blocks * SHA1_BLOCK_SIZE - sizeof(bitcount), bitcount);
sha1_process(ctx->state, padding, blocks);
for (size_t i=0; i<5; i++)
{
SHA1_SET32BE(digest + i*sizeof(uint32_t), ctx->state[i]);
}
}
#if defined(__clang__)
# pragma clang diagnostic pop
#elif defined(_MSC_VER)
# pragma warning (pop)
#endif
+472
View File
@@ -0,0 +1,472 @@
#pragma once
// https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
// https://www.rfc-editor.org/rfc/rfc6234
#include <stddef.h>
#include <stdint.h>
//
// interface
//
#define SHA224_DIGEST_SIZE 28
#define SHA256_DIGEST_SIZE 32
#define SHA256_BLOCK_SIZE 64
typedef struct {
uint8_t buffer[SHA256_BLOCK_SIZE];
uint64_t count;
uint32_t state[8];
} sha256_ctx;
typedef sha256_ctx sha224_ctx;
static inline void sha256_init(sha256_ctx* ctx);
static inline void sha256_update(sha256_ctx* ctx, const void* data, size_t size);
static inline void sha256_finish(sha256_ctx* ctx, uint8_t digest[SHA256_DIGEST_SIZE]);
static inline void sha224_init(sha224_ctx* ctx);
static inline void sha224_update(sha224_ctx* ctx, const void* data, size_t size);
static inline void sha224_finish(sha224_ctx* ctx, uint8_t digest[SHA224_DIGEST_SIZE]);
//
// implementation
//
#include <string.h> // memcpy, memset
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wcast-align"
# pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
# pragma clang diagnostic ignored "-Wlanguage-extension-token"
# pragma clang diagnostic ignored "-Wdeclaration-after-statement"
#elif defined(_MSC_VER)
# pragma warning (push)
# pragma warning (disable : 4127)
#endif
#if defined(__clang__)
# define SHA256_ROR32(x,n) __builtin_rotateright32(x, n)
#elif defined(_MSC_VER)
# include <stdlib.h>
# define SHA256_ROR32(x,n) _rotr(x, n)
#else
# define SHA256_ROR32(x,n) ( ((x) >> (n)) | ((x) << (32-(n))) )
#endif
#if defined(_MSC_VER)
# include <stdlib.h>
# define SHA256_GET32BE(ptr) _byteswap_ulong( *((const _UNALIGNED uint32_t*)(ptr)) )
# define SHA256_SET32BE(ptr,x) *((_UNALIGNED uint32_t*)(ptr)) = _byteswap_ulong(x)
# define SHA256_SET64BE(ptr,x) *((_UNALIGNED uint64_t*)(ptr)) = _byteswap_uint64(x)
#else
# define SHA256_GET32BE(ptr) \
( \
((ptr)[0] << 24) | \
((ptr)[1] << 16) | \
((ptr)[2] << 8) | \
((ptr)[3] << 0) \
)
# define SHA256_SET32BE(ptr, x) do \
{ \
(ptr)[0] = (uint8_t)((x) >> 24); \
(ptr)[1] = (uint8_t)((x) >> 16); \
(ptr)[2] = (uint8_t)((x) >> 8); \
(ptr)[3] = (uint8_t)((x) >> 0); \
} \
while (0)
# define SHA256_SET64BE(ptr, x) do \
{ \
(ptr)[0] = (uint8_t)((x) >> 56); \
(ptr)[1] = (uint8_t)((x) >> 48); \
(ptr)[2] = (uint8_t)((x) >> 40); \
(ptr)[3] = (uint8_t)((x) >> 32); \
(ptr)[4] = (uint8_t)((x) >> 24); \
(ptr)[5] = (uint8_t)((x) >> 16); \
(ptr)[6] = (uint8_t)((x) >> 8); \
(ptr)[7] = (uint8_t)((x) >> 0); \
} \
while (0)
#endif
#if defined(__x86_64__) || defined(_M_AMD64)
#include <tmmintrin.h> // SSSE3
#include <immintrin.h> // SHANI
#if defined(__clang__) || defined(__GNUC__)
# include <cpuid.h>
# define SHA256_TARGET(str) __attribute__((target(str)))
# define SHA256_CPUID(x, info) __cpuid(x, info[0], info[1], info[2], info[3])
# define SHA256_CPUID_EX(x, y, info) __cpuid_count(x, y, info[0], info[1], info[2], info[3])
#else
# include <intrin.h>
# define SHA256_TARGET(str)
# define SHA256_CPUID(x, info) __cpuid(info, x)
# define SHA256_CPUID_EX(x, y, info) __cpuidex(info, x, y)
#endif
#define SHA256_CPUID_INIT (1 << 0)
#define SHA256_CPUID_SHANI (1 << 1)
static inline int sha256_cpuid(void)
{
static int cpuid;
int result = cpuid;
if (result == 0)
{
int info[4];
SHA256_CPUID(1, info);
int has_ssse3 = info[3] & (1 << 9);
SHA256_CPUID_EX(7, 0, info);
int has_shani = info[1] & (1 << 29);
result |= SHA256_CPUID_INIT;
if (has_ssse3 && has_shani)
{
result |= SHA256_CPUID_SHANI;
}
cpuid = result;
}
#if defined(SHA256_CPUID_MASK)
result &= SHA256_CPUID_MASK;
#endif
return result;
}
SHA256_TARGET("ssse3,sha")
static void sha256_process_shani(uint32_t* state, const uint8_t* block, size_t count)
{
const __m128i* buffer = (const __m128i*)block;
// to byteswap when doing big-ending load for message dwords
const __m128i bswap = _mm_setr_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12);
static const uint32_t K[16][4] =
{
{ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 },
{ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 },
{ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 },
{ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 },
{ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc },
{ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da },
{ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 },
{ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 },
{ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 },
{ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 },
{ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 },
{ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 },
{ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 },
{ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 },
{ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 },
{ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 },
};
#define W(i) w[(i)%4]
// 4 wide round calculations
#define QROUND(i) do { \
/* first four rounds loads input message */ \
if (i < 4) W(i) = _mm_shuffle_epi8(_mm_loadu_si128(&buffer[i]), bswap); \
/* add round constant */ \
tmp = _mm_add_epi32(W(i), _mm_loadu_si128((const __m128i*)K[i])); \
/* update previous message dwords for next rounds */ \
if (i > 2 && i < 15) W(i-3) = _mm_sha256msg2_epu32(_mm_add_epi32(W(i-3), _mm_alignr_epi8(W(i), W(i-1), 4)), W(i)); \
if (i > 0 && i < 13) W(i-1) = _mm_sha256msg1_epu32(W(i-1), W(i)); \
/* round functions */ \
state1 = _mm_sha256rnds2_epu32(state1, state0, tmp); \
state0 = _mm_sha256rnds2_epu32(state0, state1, _mm_shuffle_epi32(tmp, _MM_SHUFFLE(0,0,3,2))); \
} while(0)
// load initial state
__m128i abcd = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)&state[0]), _MM_SHUFFLE(0,1,2,3)); // [a,b,c,d]
__m128i efgh = _mm_shuffle_epi32(_mm_loadu_si128((const __m128i*)&state[4]), _MM_SHUFFLE(0,1,2,3)); // [e,f,g,h]
// dword order for sha256rnds2 instruction
__m128i state0 = _mm_unpackhi_epi64(efgh, abcd); // [a,b,e,f]
__m128i state1 = _mm_unpacklo_epi64(efgh, abcd); // [c,d,g,h]
do
{
// remember current state
__m128i last0 = state0;
__m128i last1 = state1;
__m128i tmp, w[4];
QROUND(0);
QROUND(1);
QROUND(2);
QROUND(3);
QROUND(4);
QROUND(5);
QROUND(6);
QROUND(7);
QROUND(8);
QROUND(9);
QROUND(10);
QROUND(11);
QROUND(12);
QROUND(13);
QROUND(14);
QROUND(15);
// update next state
state0 = _mm_add_epi32(state0, last0);
state1 = _mm_add_epi32(state1, last1);
buffer += 4;
}
while (--count);
// restore dword order
abcd = _mm_unpackhi_epi64(state1, state0);
efgh = _mm_unpacklo_epi64(state1, state0);
// save the new state
_mm_storeu_si128((__m128i*)&state[0], _mm_shuffle_epi32(abcd, _MM_SHUFFLE(0,1,2,3)));
_mm_storeu_si128((__m128i*)&state[4], _mm_shuffle_epi32(efgh, _MM_SHUFFLE(0,1,2,3)));
#undef QROUND
#undef W
}
#endif // defined(__x86_64__) || defined(_M_AMD64)
static void sha256_process(uint32_t* state, const uint8_t* block, size_t count)
{
#if defined(__x86_64__) || defined(_M_AMD64)
int cpuid = sha256_cpuid();
if (cpuid & SHA256_CPUID_SHANI)
{
sha256_process_shani(state, block, count);
return;
}
#endif
#define Ch(x,y,z) ((x & (y ^ z)) ^ z)
#define Maj(x,y,z) ((x & y) | (z & (x | y)))
#define BSig0(x) (SHA256_ROR32(x, 2) ^ SHA256_ROR32(x, 13) ^ SHA256_ROR32(x, 22))
#define BSig1(x) (SHA256_ROR32(x, 6) ^ SHA256_ROR32(x, 11) ^ SHA256_ROR32(x, 25))
#define SSig0(x) (SHA256_ROR32(x, 7) ^ SHA256_ROR32(x, 18) ^ (x >> 3))
#define SSig1(x) (SHA256_ROR32(x, 17) ^ SHA256_ROR32(x, 19) ^ (x >> 10))
#define W(i) w[(i+16)%16]
#define ROUND(i,a,b,c,d,e,f,g,h,K) do \
{ \
uint32_t w0; \
if (i < 16) W(i) = w0 = SHA256_GET32BE(block + i*sizeof(uint32_t)); \
if (i >= 16) W(i) = w0 = SSig1(W(i-2)) + W(i-7) + SSig0(W(i-15)) + W(i-16); \
\
uint32_t t1 = h + BSig1(e) + Ch(e,f,g) + K + w0; \
uint32_t t2 = BSig0(a) + Maj(a,b,c); \
d += t1; \
h = t1 + t2; \
} while (0)
do
{
uint32_t a = state[0];
uint32_t b = state[1];
uint32_t c = state[2];
uint32_t d = state[3];
uint32_t e = state[4];
uint32_t f = state[5];
uint32_t g = state[6];
uint32_t h = state[7];
uint32_t w[16];
ROUND( 0, a, b, c, d, e, f, g, h, 0x428a2f98);
ROUND( 1, h, a, b, c, d, e, f, g, 0x71374491);
ROUND( 2, g, h, a, b, c, d, e, f, 0xb5c0fbcf);
ROUND( 3, f, g, h, a, b, c, d, e, 0xe9b5dba5);
ROUND( 4, e, f, g, h, a, b, c, d, 0x3956c25b);
ROUND( 5, d, e, f, g, h, a, b, c, 0x59f111f1);
ROUND( 6, c, d, e, f, g, h, a, b, 0x923f82a4);
ROUND( 7, b, c, d, e, f, g, h, a, 0xab1c5ed5);
ROUND( 8, a, b, c, d, e, f, g, h, 0xd807aa98);
ROUND( 9, h, a, b, c, d, e, f, g, 0x12835b01);
ROUND(10, g, h, a, b, c, d, e, f, 0x243185be);
ROUND(11, f, g, h, a, b, c, d, e, 0x550c7dc3);
ROUND(12, e, f, g, h, a, b, c, d, 0x72be5d74);
ROUND(13, d, e, f, g, h, a, b, c, 0x80deb1fe);
ROUND(14, c, d, e, f, g, h, a, b, 0x9bdc06a7);
ROUND(15, b, c, d, e, f, g, h, a, 0xc19bf174);
ROUND(16, a, b, c, d, e, f, g, h, 0xe49b69c1);
ROUND(17, h, a, b, c, d, e, f, g, 0xefbe4786);
ROUND(18, g, h, a, b, c, d, e, f, 0x0fc19dc6);
ROUND(19, f, g, h, a, b, c, d, e, 0x240ca1cc);
ROUND(20, e, f, g, h, a, b, c, d, 0x2de92c6f);
ROUND(21, d, e, f, g, h, a, b, c, 0x4a7484aa);
ROUND(22, c, d, e, f, g, h, a, b, 0x5cb0a9dc);
ROUND(23, b, c, d, e, f, g, h, a, 0x76f988da);
ROUND(24, a, b, c, d, e, f, g, h, 0x983e5152);
ROUND(25, h, a, b, c, d, e, f, g, 0xa831c66d);
ROUND(26, g, h, a, b, c, d, e, f, 0xb00327c8);
ROUND(27, f, g, h, a, b, c, d, e, 0xbf597fc7);
ROUND(28, e, f, g, h, a, b, c, d, 0xc6e00bf3);
ROUND(29, d, e, f, g, h, a, b, c, 0xd5a79147);
ROUND(30, c, d, e, f, g, h, a, b, 0x06ca6351);
ROUND(31, b, c, d, e, f, g, h, a, 0x14292967);
ROUND(32, a, b, c, d, e, f, g, h, 0x27b70a85);
ROUND(33, h, a, b, c, d, e, f, g, 0x2e1b2138);
ROUND(34, g, h, a, b, c, d, e, f, 0x4d2c6dfc);
ROUND(35, f, g, h, a, b, c, d, e, 0x53380d13);
ROUND(36, e, f, g, h, a, b, c, d, 0x650a7354);
ROUND(37, d, e, f, g, h, a, b, c, 0x766a0abb);
ROUND(38, c, d, e, f, g, h, a, b, 0x81c2c92e);
ROUND(39, b, c, d, e, f, g, h, a, 0x92722c85);
ROUND(40, a, b, c, d, e, f, g, h, 0xa2bfe8a1);
ROUND(41, h, a, b, c, d, e, f, g, 0xa81a664b);
ROUND(42, g, h, a, b, c, d, e, f, 0xc24b8b70);
ROUND(43, f, g, h, a, b, c, d, e, 0xc76c51a3);
ROUND(44, e, f, g, h, a, b, c, d, 0xd192e819);
ROUND(45, d, e, f, g, h, a, b, c, 0xd6990624);
ROUND(46, c, d, e, f, g, h, a, b, 0xf40e3585);
ROUND(47, b, c, d, e, f, g, h, a, 0x106aa070);
ROUND(48, a, b, c, d, e, f, g, h, 0x19a4c116);
ROUND(49, h, a, b, c, d, e, f, g, 0x1e376c08);
ROUND(50, g, h, a, b, c, d, e, f, 0x2748774c);
ROUND(51, f, g, h, a, b, c, d, e, 0x34b0bcb5);
ROUND(52, e, f, g, h, a, b, c, d, 0x391c0cb3);
ROUND(53, d, e, f, g, h, a, b, c, 0x4ed8aa4a);
ROUND(54, c, d, e, f, g, h, a, b, 0x5b9cca4f);
ROUND(55, b, c, d, e, f, g, h, a, 0x682e6ff3);
ROUND(56, a, b, c, d, e, f, g, h, 0x748f82ee);
ROUND(57, h, a, b, c, d, e, f, g, 0x78a5636f);
ROUND(58, g, h, a, b, c, d, e, f, 0x84c87814);
ROUND(59, f, g, h, a, b, c, d, e, 0x8cc70208);
ROUND(60, e, f, g, h, a, b, c, d, 0x90befffa);
ROUND(61, d, e, f, g, h, a, b, c, 0xa4506ceb);
ROUND(62, c, d, e, f, g, h, a, b, 0xbef9a3f7);
ROUND(63, b, c, d, e, f, g, h, a, 0xc67178f2);
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
block += SHA256_BLOCK_SIZE;
}
while (--count);
#undef ROUND
#undef W
#undef Ch
#undef Maj
#undef BSig0
#undef BSig1
#undef SSig0
#undef SSig1
}
void sha256_init(sha256_ctx* ctx)
{
ctx->count = 0;
ctx->state[0] = 0x6a09e667;
ctx->state[1] = 0xbb67ae85;
ctx->state[2] = 0x3c6ef372;
ctx->state[3] = 0xa54ff53a;
ctx->state[4] = 0x510e527f;
ctx->state[5] = 0x9b05688c;
ctx->state[6] = 0x1f83d9ab;
ctx->state[7] = 0x5be0cd19;
}
void sha256_update(sha256_ctx* ctx, const void* data, size_t size)
{
const uint8_t* buffer = (const uint8_t*)data;
size_t pending = ctx->count % SHA256_BLOCK_SIZE;
ctx->count += size;
size_t available = SHA256_BLOCK_SIZE - pending;
if (pending && size >= available)
{
memcpy(ctx->buffer + pending, buffer, available);
sha256_process(ctx->state, ctx->buffer, 1);
buffer += available;
size -= available;
pending = 0;
}
size_t count = size / SHA256_BLOCK_SIZE;
if (count)
{
sha256_process(ctx->state, buffer, count);
buffer += count * SHA256_BLOCK_SIZE;
size -= count * SHA256_BLOCK_SIZE;
}
memcpy(ctx->buffer + pending, buffer, size);
}
void sha256_finish(sha256_ctx* ctx, uint8_t digest[SHA256_DIGEST_SIZE])
{
uint64_t count = ctx->count;
uint64_t bitcount = count * 8;
size_t pending = count % SHA256_BLOCK_SIZE;
size_t blocks = pending < SHA256_BLOCK_SIZE - sizeof(bitcount) ? 1 : 2;
ctx->buffer[pending++] = 0x80;
uint8_t padding[2 * SHA256_BLOCK_SIZE];
memcpy(padding, ctx->buffer, SHA256_BLOCK_SIZE);
memset(padding + pending, 0, SHA256_BLOCK_SIZE);
SHA256_SET64BE(padding + blocks * SHA256_BLOCK_SIZE - sizeof(bitcount), bitcount);
sha256_process(ctx->state, padding, blocks);
for (size_t i=0; i<8; i++)
{
SHA256_SET32BE(digest + i*sizeof(uint32_t), ctx->state[i]);
}
}
void sha224_init(sha224_ctx* ctx)
{
ctx->count = 0;
ctx->state[0] = 0xc1059ed8;
ctx->state[1] = 0x367cd507;
ctx->state[2] = 0x3070dd17;
ctx->state[3] = 0xf70e5939;
ctx->state[4] = 0xffc00b31;
ctx->state[5] = 0x68581511;
ctx->state[6] = 0x64f98fa7;
ctx->state[7] = 0xbefa4fa4;
}
void sha224_update(sha224_ctx* ctx, const void* data, size_t size)
{
sha256_update(ctx, data, size);
}
void sha224_finish(sha224_ctx* ctx, uint8_t digest[SHA224_DIGEST_SIZE])
{
uint8_t temp[SHA256_DIGEST_SIZE];
sha256_finish(ctx, temp);
memcpy(digest, temp, SHA224_DIGEST_SIZE);
}
#if defined(__clang__)
# pragma clang diagnostic pop
#elif defined(_MSC_VER)
# pragma warning (pop)
#endif
+508
View File
@@ -0,0 +1,508 @@
#pragma once
// https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
// https://www.rfc-editor.org/rfc/rfc6234
#include <stddef.h>
#include <stdint.h>
//
// interface
//
#define SHA384_DIGEST_SIZE 48
#define SHA512_DIGEST_SIZE 64
#define SHA512_BLOCK_SIZE 128
typedef struct {
uint8_t buffer[SHA512_BLOCK_SIZE];
uint64_t count[2];
uint64_t state[8];
} sha512_ctx;
typedef sha512_ctx sha384_ctx;
static inline void sha512_init(sha512_ctx* ctx);
static inline void sha512_update(sha512_ctx* ctx, const void* data, size_t size);
static inline void sha512_finish(sha512_ctx* ctx, uint8_t digest[SHA512_DIGEST_SIZE]);
static inline void sha384_init(sha384_ctx* ctx);
static inline void sha384_update(sha384_ctx* ctx, const void* data, size_t size);
static inline void sha384_finish(sha384_ctx* ctx, uint8_t digest[SHA384_DIGEST_SIZE]);
//
// implementation
//
#include <string.h> // memcpy, memset
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wcast-align"
# pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
# pragma clang diagnostic ignored "-Wlanguage-extension-token"
# pragma clang diagnostic ignored "-Wdeclaration-after-statement"
#elif defined(_MSC_VER)
# pragma warning (push)
# pragma warning (disable : 4127)
#endif
#if defined(__clang__)
# define SHA512_ROR64(x,n) __builtin_rotateright64(x, n)
#elif defined(_MSC_VER)
# include <stdlib.h>
# define SHA512_ROR64(x,n) _rotr64(x, n)
#else
# define SHA512_ROR64(x,n) ( ((x) >> (n)) | ((x) << (64-(n))) )
#endif
#if defined(_MSC_VER)
# include <stdlib.h>
# define SHA512_GET64BE(ptr) _byteswap_uint64( *((const _UNALIGNED uint64_t*)(ptr)) )
# define SHA512_SET64BE(ptr,x) *((_UNALIGNED uint64_t*)(ptr)) = _byteswap_uint64(x)
#else
# define SHA512_GET64BE(ptr) \
( \
((uint64_t)((ptr)[0]) << 56) | \
((uint64_t)((ptr)[1]) << 48) | \
((uint64_t)((ptr)[2]) << 40) | \
((uint64_t)((ptr)[3]) << 32) | \
((uint64_t)((ptr)[4]) << 24) | \
((uint64_t)((ptr)[5]) << 16) | \
((uint64_t)((ptr)[6]) << 8) | \
((uint64_t)((ptr)[7]) << 0) \
)
# define SHA512_SET64BE(ptr, x) do \
{ \
(ptr)[0] = (uint8_t)((x) >> 56); \
(ptr)[1] = (uint8_t)((x) >> 48); \
(ptr)[2] = (uint8_t)((x) >> 40); \
(ptr)[3] = (uint8_t)((x) >> 32); \
(ptr)[4] = (uint8_t)((x) >> 24); \
(ptr)[5] = (uint8_t)((x) >> 16); \
(ptr)[6] = (uint8_t)((x) >> 8); \
(ptr)[7] = (uint8_t)((x) >> 0); \
} \
while (0)
#endif
#if defined(__x86_64__) || defined(_M_AMD64)
#include <immintrin.h>
#if defined(__clang__) || defined(__GNUC__)
# include <cpuid.h>
# define SHA512_TARGET(str) __attribute__((target(str)))
# define SHA512_CPUID(x, info) __cpuid(x, info[0], info[1], info[2], info[3])
# define SHA512_CPUID_EX(x, y, info) __cpuid_count(x, y, info[0], info[1], info[2], info[3])
# define SHA512_XGETBV(x) __builtin_ia32_xgetbv(x)
#else
# include <intrin.h>
# define SHA512_TARGET(str)
# define SHA512_CPUID(x, info) __cpuid(info, x)
# define SHA512_CPUID_EX(x, y, info) __cpuidex(info, x, y)
# define SHA512_XGETBV(x) _xgetbv(x)
#endif
#define SHA512_CPUID_INIT (1 << 0)
#define SHA512_CPUID_VSHA512 (1 << 1)
SHA512_TARGET("xsave")
static inline int sha512_cpuid(void)
{
static int cpuid;
int result = cpuid;
if (result == 0)
{
int info[4];
SHA256_CPUID(1, info);
int has_xsave = info[2] & (1 << 26);
int has_ymm = 0;
if (has_xsave)
{
uint64_t xcr0 = SHA512_XGETBV(0);
has_ymm = xcr0 & (1 << 2);
}
SHA256_CPUID_EX(7, 0, info);
int has_avx2 = info[1] & (1 << 5);
SHA256_CPUID_EX(7, 1, info);
int has_sha512 = info[0] & (1 << 0);
result |= SHA256_CPUID_INIT;
if (has_ymm && has_avx2 && has_sha512)
{
result |= SHA512_CPUID_VSHA512;
}
cpuid = result;
}
#if defined(SHA512_CPUID_MASK)
result &= SHA512_CPUID_MASK;
#endif
return result;
}
SHA512_TARGET("avx2,sha512")
static void sha512_process_vsha512(uint64_t* state, const uint8_t* block, size_t count)
{
const __m256i* buffer = (const __m256i*)block;
// to byteswap when doing big-ending load for message qwords
const __m256i bswap = _mm256_broadcastsi128_si256(_mm_setr_epi8(7,6,5,4,3,2,1,0, 15,14,13,12,11,10,9,8));
static const uint64_t K[20][4] =
{
{ 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc },
{ 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118 },
{ 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2 },
{ 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694 },
{ 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65 },
{ 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5 },
{ 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4 },
{ 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70 },
{ 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df },
{ 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b },
{ 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30 },
{ 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8 },
{ 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8 },
{ 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3 },
{ 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec },
{ 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b },
{ 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178 },
{ 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b },
{ 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c },
{ 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 },
};
#define W(i) w[(i)%4]
// 4 wide round calculations
#define QROUND(i) do { \
/* first four rounds loads input message */ \
if (i < 4) W(i) = _mm256_shuffle_epi8(_mm256_loadu_si256(&buffer[i]), bswap); \
/* add round constant */ \
tmp = _mm256_add_epi64(W(i), _mm256_loadu_si256((const __m256i*)K[i])); \
/* update previous message qwords for next rounds */ \
if (i > 2 && i < 19) W(i-3) = _mm256_sha512msg2_epi64(_mm256_add_epi64(W(i-3), _mm256_permute4x64_epi64(_mm256_blend_epi32(W(i-1), W(i), 3), _MM_SHUFFLE(0,3,2,1))), W(i)); \
if (i > 0 && i < 17) W(i-1) = _mm256_sha512msg1_epi64(W(i-1), _mm256_castsi256_si128(W(i))); \
/* round functions */ \
state1 = _mm256_sha512rnds2_epi64(state1, state0, _mm256_castsi256_si128(tmp)); \
state0 = _mm256_sha512rnds2_epi64(state0, state1, _mm256_extracti128_si256(tmp, 1)); \
} while(0)
// load initial state
__m256i abcd = _mm256_permute4x64_epi64(_mm256_loadu_si256((const __m256i*)&state[0]), _MM_SHUFFLE(0,1,2,3)); // [a,b,c,d]
__m256i efgh = _mm256_permute4x64_epi64(_mm256_loadu_si256((const __m256i*)&state[4]), _MM_SHUFFLE(0,1,2,3)); // [e,f,g,h]
// qword order for vsha512rnds2 instruction
__m256i state0 = _mm256_permute2x128_si256(efgh, abcd, (3 << 4) | 1); // [a,b,e,f]
__m256i state1 = _mm256_permute2x128_si256(efgh, abcd, (2 << 4) | 0); // [c,d,g,h]
do
{
// remember current state
__m256i last0 = state0;
__m256i last1 = state1;
__m256i tmp, w[4];
QROUND(0);
QROUND(1);
QROUND(2);
QROUND(3);
QROUND(4);
QROUND(5);
QROUND(6);
QROUND(7);
QROUND(8);
QROUND(9);
QROUND(10);
QROUND(11);
QROUND(12);
QROUND(13);
QROUND(14);
QROUND(15);
QROUND(16);
QROUND(17);
QROUND(18);
QROUND(19);
// update next state
state0 = _mm256_add_epi64(state0, last0);
state1 = _mm256_add_epi64(state1, last1);
buffer += 4;
}
while (--count);
// restore qword order
abcd = _mm256_permute2x128_si256(state1, state0, (3 << 4) | 1);
efgh = _mm256_permute2x128_si256(state1, state0, (2 << 4) | 0);
// save the new state
_mm256_storeu_si256((__m256i*)&state[0], _mm256_permute4x64_epi64(abcd, _MM_SHUFFLE(0,1,2,3)));
_mm256_storeu_si256((__m256i*)&state[4], _mm256_permute4x64_epi64(efgh, _MM_SHUFFLE(0,1,2,3)));
#undef QROUND
#undef W
}
#endif // defined(__x86_64__) || defined(_M_AMD64)
static void sha512_process(uint64_t* state, const uint8_t* block, size_t count)
{
#if defined(__x86_64__) || defined(_M_AMD64)
int cpuid = sha512_cpuid();
if (cpuid & SHA512_CPUID_VSHA512)
{
sha512_process_vsha512(state, block, count);
return;
}
#endif
#define Ch(x,y,z) ((x & (y ^ z)) ^ z)
#define Maj(x,y,z) ((x & y) | (z & (x | y)))
#define BSig0(x) (SHA512_ROR64(x, 28) ^ SHA512_ROR64(x, 34) ^ SHA512_ROR64(x, 39))
#define BSig1(x) (SHA512_ROR64(x, 14) ^ SHA512_ROR64(x, 18) ^ SHA512_ROR64(x, 41))
#define SSig0(x) (SHA512_ROR64(x, 1) ^ SHA512_ROR64(x, 8) ^ (x >> 7))
#define SSig1(x) (SHA512_ROR64(x, 19) ^ SHA512_ROR64(x, 61) ^ (x >> 6))
#define W(i) w[(i+16)%16]
#define ROUND(i,a,b,c,d,e,f,g,h,K) do \
{ \
uint64_t w0; \
if (i < 16) W(i) = w0 = SHA512_GET64BE(block + i*sizeof(uint64_t)); \
if (i >= 16) W(i) = w0 = SSig1(W(i-2)) + W(i-7) + SSig0(W(i-15)) + W(i-16); \
\
uint64_t t1 = h + BSig1(e) + Ch(e,f,g) + K + w0; \
uint64_t t2 = BSig0(a) + Maj(a,b,c); \
d += t1; \
h = t1 + t2; \
} while (0)
do
{
uint64_t a = state[0];
uint64_t b = state[1];
uint64_t c = state[2];
uint64_t d = state[3];
uint64_t e = state[4];
uint64_t f = state[5];
uint64_t g = state[6];
uint64_t h = state[7];
uint64_t w[16];
ROUND( 0, a, b, c, d, e, f, g, h, 0x428a2f98d728ae22);
ROUND( 1, h, a, b, c, d, e, f, g, 0x7137449123ef65cd);
ROUND( 2, g, h, a, b, c, d, e, f, 0xb5c0fbcfec4d3b2f);
ROUND( 3, f, g, h, a, b, c, d, e, 0xe9b5dba58189dbbc);
ROUND( 4, e, f, g, h, a, b, c, d, 0x3956c25bf348b538);
ROUND( 5, d, e, f, g, h, a, b, c, 0x59f111f1b605d019);
ROUND( 6, c, d, e, f, g, h, a, b, 0x923f82a4af194f9b);
ROUND( 7, b, c, d, e, f, g, h, a, 0xab1c5ed5da6d8118);
ROUND( 8, a, b, c, d, e, f, g, h, 0xd807aa98a3030242);
ROUND( 9, h, a, b, c, d, e, f, g, 0x12835b0145706fbe);
ROUND(10, g, h, a, b, c, d, e, f, 0x243185be4ee4b28c);
ROUND(11, f, g, h, a, b, c, d, e, 0x550c7dc3d5ffb4e2);
ROUND(12, e, f, g, h, a, b, c, d, 0x72be5d74f27b896f);
ROUND(13, d, e, f, g, h, a, b, c, 0x80deb1fe3b1696b1);
ROUND(14, c, d, e, f, g, h, a, b, 0x9bdc06a725c71235);
ROUND(15, b, c, d, e, f, g, h, a, 0xc19bf174cf692694);
ROUND(16, a, b, c, d, e, f, g, h, 0xe49b69c19ef14ad2);
ROUND(17, h, a, b, c, d, e, f, g, 0xefbe4786384f25e3);
ROUND(18, g, h, a, b, c, d, e, f, 0x0fc19dc68b8cd5b5);
ROUND(19, f, g, h, a, b, c, d, e, 0x240ca1cc77ac9c65);
ROUND(20, e, f, g, h, a, b, c, d, 0x2de92c6f592b0275);
ROUND(21, d, e, f, g, h, a, b, c, 0x4a7484aa6ea6e483);
ROUND(22, c, d, e, f, g, h, a, b, 0x5cb0a9dcbd41fbd4);
ROUND(23, b, c, d, e, f, g, h, a, 0x76f988da831153b5);
ROUND(24, a, b, c, d, e, f, g, h, 0x983e5152ee66dfab);
ROUND(25, h, a, b, c, d, e, f, g, 0xa831c66d2db43210);
ROUND(26, g, h, a, b, c, d, e, f, 0xb00327c898fb213f);
ROUND(27, f, g, h, a, b, c, d, e, 0xbf597fc7beef0ee4);
ROUND(28, e, f, g, h, a, b, c, d, 0xc6e00bf33da88fc2);
ROUND(29, d, e, f, g, h, a, b, c, 0xd5a79147930aa725);
ROUND(30, c, d, e, f, g, h, a, b, 0x06ca6351e003826f);
ROUND(31, b, c, d, e, f, g, h, a, 0x142929670a0e6e70);
ROUND(32, a, b, c, d, e, f, g, h, 0x27b70a8546d22ffc);
ROUND(33, h, a, b, c, d, e, f, g, 0x2e1b21385c26c926);
ROUND(34, g, h, a, b, c, d, e, f, 0x4d2c6dfc5ac42aed);
ROUND(35, f, g, h, a, b, c, d, e, 0x53380d139d95b3df);
ROUND(36, e, f, g, h, a, b, c, d, 0x650a73548baf63de);
ROUND(37, d, e, f, g, h, a, b, c, 0x766a0abb3c77b2a8);
ROUND(38, c, d, e, f, g, h, a, b, 0x81c2c92e47edaee6);
ROUND(39, b, c, d, e, f, g, h, a, 0x92722c851482353b);
ROUND(40, a, b, c, d, e, f, g, h, 0xa2bfe8a14cf10364);
ROUND(41, h, a, b, c, d, e, f, g, 0xa81a664bbc423001);
ROUND(42, g, h, a, b, c, d, e, f, 0xc24b8b70d0f89791);
ROUND(43, f, g, h, a, b, c, d, e, 0xc76c51a30654be30);
ROUND(44, e, f, g, h, a, b, c, d, 0xd192e819d6ef5218);
ROUND(45, d, e, f, g, h, a, b, c, 0xd69906245565a910);
ROUND(46, c, d, e, f, g, h, a, b, 0xf40e35855771202a);
ROUND(47, b, c, d, e, f, g, h, a, 0x106aa07032bbd1b8);
ROUND(48, a, b, c, d, e, f, g, h, 0x19a4c116b8d2d0c8);
ROUND(49, h, a, b, c, d, e, f, g, 0x1e376c085141ab53);
ROUND(50, g, h, a, b, c, d, e, f, 0x2748774cdf8eeb99);
ROUND(51, f, g, h, a, b, c, d, e, 0x34b0bcb5e19b48a8);
ROUND(52, e, f, g, h, a, b, c, d, 0x391c0cb3c5c95a63);
ROUND(53, d, e, f, g, h, a, b, c, 0x4ed8aa4ae3418acb);
ROUND(54, c, d, e, f, g, h, a, b, 0x5b9cca4f7763e373);
ROUND(55, b, c, d, e, f, g, h, a, 0x682e6ff3d6b2b8a3);
ROUND(56, a, b, c, d, e, f, g, h, 0x748f82ee5defb2fc);
ROUND(57, h, a, b, c, d, e, f, g, 0x78a5636f43172f60);
ROUND(58, g, h, a, b, c, d, e, f, 0x84c87814a1f0ab72);
ROUND(59, f, g, h, a, b, c, d, e, 0x8cc702081a6439ec);
ROUND(60, e, f, g, h, a, b, c, d, 0x90befffa23631e28);
ROUND(61, d, e, f, g, h, a, b, c, 0xa4506cebde82bde9);
ROUND(62, c, d, e, f, g, h, a, b, 0xbef9a3f7b2c67915);
ROUND(63, b, c, d, e, f, g, h, a, 0xc67178f2e372532b);
ROUND(64, a, b, c, d, e, f, g, h, 0xca273eceea26619c);
ROUND(65, h, a, b, c, d, e, f, g, 0xd186b8c721c0c207);
ROUND(66, g, h, a, b, c, d, e, f, 0xeada7dd6cde0eb1e);
ROUND(67, f, g, h, a, b, c, d, e, 0xf57d4f7fee6ed178);
ROUND(68, e, f, g, h, a, b, c, d, 0x06f067aa72176fba);
ROUND(69, d, e, f, g, h, a, b, c, 0x0a637dc5a2c898a6);
ROUND(70, c, d, e, f, g, h, a, b, 0x113f9804bef90dae);
ROUND(71, b, c, d, e, f, g, h, a, 0x1b710b35131c471b);
ROUND(72, a, b, c, d, e, f, g, h, 0x28db77f523047d84);
ROUND(73, h, a, b, c, d, e, f, g, 0x32caab7b40c72493);
ROUND(74, g, h, a, b, c, d, e, f, 0x3c9ebe0a15c9bebc);
ROUND(75, f, g, h, a, b, c, d, e, 0x431d67c49c100d4c);
ROUND(76, e, f, g, h, a, b, c, d, 0x4cc5d4becb3e42b6);
ROUND(77, d, e, f, g, h, a, b, c, 0x597f299cfc657e2a);
ROUND(78, c, d, e, f, g, h, a, b, 0x5fcb6fab3ad6faec);
ROUND(79, b, c, d, e, f, g, h, a, 0x6c44198c4a475817);
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
block += SHA512_BLOCK_SIZE;
}
while (--count);
#undef ROUND
#undef W
#undef Ch
#undef Maj
#undef BSig0
#undef BSig1
#undef SSig0
#undef SSig1
}
void sha512_init(sha512_ctx* ctx)
{
ctx->count[0] = 0;
ctx->count[1] = 0;
ctx->state[0] = 0x6a09e667f3bcc908;
ctx->state[1] = 0xbb67ae8584caa73b;
ctx->state[2] = 0x3c6ef372fe94f82b;
ctx->state[3] = 0xa54ff53a5f1d36f1;
ctx->state[4] = 0x510e527fade682d1;
ctx->state[5] = 0x9b05688c2b3e6c1f;
ctx->state[6] = 0x1f83d9abfb41bd6b;
ctx->state[7] = 0x5be0cd19137e2179;
}
void sha512_update(sha512_ctx* ctx, const void* data, size_t size)
{
const uint8_t* buffer = (const uint8_t*)data;
size_t pending = ctx->count[0] % SHA512_BLOCK_SIZE;
ctx->count[0] += size;
ctx->count[1] += size > ctx->count[0];
size_t available = SHA512_BLOCK_SIZE - pending;
if (pending && size >= available)
{
memcpy(ctx->buffer + pending, buffer, available);
sha512_process(ctx->state, ctx->buffer, 1);
buffer += available;
size -= available;
pending = 0;
}
size_t count = size / SHA512_BLOCK_SIZE;
if (count)
{
sha512_process(ctx->state, buffer, count);
buffer += count * SHA512_BLOCK_SIZE;
size -= count * SHA512_BLOCK_SIZE;
}
memcpy(ctx->buffer + pending, buffer, size);
}
void sha512_finish(sha512_ctx* ctx, uint8_t digest[SHA512_DIGEST_SIZE])
{
uint64_t count0 = ctx->count[0];
uint64_t count1 = ctx->count[1];
uint64_t bitcount[2] = { (count0 << 3), (count1 << 3) | (count0 >> 61) };
size_t pending = count0 % SHA512_BLOCK_SIZE;
size_t blocks = pending < SHA512_BLOCK_SIZE - sizeof(bitcount) ? 1 : 2;
ctx->buffer[pending++] = 0x80;
uint8_t padding[2 * SHA512_BLOCK_SIZE];
memcpy(padding, ctx->buffer, SHA512_BLOCK_SIZE);
memset(padding + pending, 0, SHA512_BLOCK_SIZE);
SHA512_SET64BE(padding + blocks * SHA512_BLOCK_SIZE - 2*sizeof(uint64_t), bitcount[1]);
SHA512_SET64BE(padding + blocks * SHA512_BLOCK_SIZE - 1*sizeof(uint64_t), bitcount[0]);
sha512_process(ctx->state, padding, blocks);
for (size_t i=0; i<8; i++)
{
SHA512_SET64BE(digest + i*sizeof(uint64_t), ctx->state[i]);
}
}
void sha384_init(sha384_ctx* ctx)
{
ctx->count[0] = 0;
ctx->count[1] = 0;
ctx->state[0] = 0xcbbb9d5dc1059ed8;
ctx->state[1] = 0x629a292a367cd507;
ctx->state[2] = 0x9159015a3070dd17;
ctx->state[3] = 0x152fecd8f70e5939;
ctx->state[4] = 0x67332667ffc00b31;
ctx->state[5] = 0x8eb44a8768581511;
ctx->state[6] = 0xdb0c2e0d64f98fa7;
ctx->state[7] = 0x47b5481dbefa4fa4;
}
void sha384_update(sha512_ctx* ctx, const void* data, size_t size)
{
sha512_update(ctx, data, size);
}
void sha384_finish(sha384_ctx* ctx, uint8_t digest[SHA384_DIGEST_SIZE])
{
uint8_t temp[SHA512_DIGEST_SIZE];
sha512_finish(ctx, temp);
memcpy(digest, temp, SHA384_DIGEST_SIZE);
}
#if defined(__clang__)
# pragma clang diagnostic pop
#elif defined(_MSC_VER)
# pragma warning (pop)
#endif
-567
View File
@@ -1,567 +0,0 @@
// This is a collection of code originally sourced from LibTomCrypt, located at
// https://github.com/libtom/libtomcrypt, released under the following license:
//
// ---
//
// The LibTom license
//
// This is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
//
// In jurisdictions that recognize copyright laws, the author or authors
// of this software dedicate any and all copyright interest in the
// software to the public domain. We make this dedication for the benefit
// of the public at large and to the detriment of our heirs and
// successors. We intend this dedication to be an overt act of
// relinquishment in perpetuity of all present and future rights to this
// software under copyright law.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
// For more information, please refer to <http://unlicense.org/>
//
// ---
//
// The code has been narrowed down and slightly modified, to include only the
// things that the RAD Debugger project needs, and to work with the project's
// build structure cleanly.
#ifndef TOMCRYPT_HASH_H
#define TOMCRYPT_HASH_H
////////////////////////////////
//~ rjf: Common Helpers
#define CRYPT_OK 1
#define LOAD32H(x, y) \
do { x = ((U32)((y)[0] & 255)<<24) | \
((U32)((y)[1] & 255)<<16) | \
((U32)((y)[2] & 255)<<8) | \
((U32)((y)[3] & 255)); } while(0)
#define STORE32H(x, y) \
do { (y)[0] = (unsigned char)(((x)>>24)&255); (y)[1] = (unsigned char)(((x)>>16)&255); \
(y)[2] = (unsigned char)(((x)>>8)&255); (y)[3] = (unsigned char)((x)&255); } while(0)
#define STORE64H(x, y) \
do { (y)[0] = (unsigned char)(((x)>>56)&255); (y)[1] = (unsigned char)(((x)>>48)&255); \
(y)[2] = (unsigned char)(((x)>>40)&255); (y)[3] = (unsigned char)(((x)>>32)&255); \
(y)[4] = (unsigned char)(((x)>>24)&255); (y)[5] = (unsigned char)(((x)>>16)&255); \
(y)[6] = (unsigned char)(((x)>>8)&255); (y)[7] = (unsigned char)((x)&255); } while(0)
#define LTC_TMPVAR__(n, l) n ## l
#define LTC_TMPVAR_(n, l) LTC_TMPVAR__(n, l)
#define LTC_TMPVAR(n) LTC_TMPVAR_(LTC_ ## n ## _, __LINE__)
#define ROL(x, y) ( (((U32)(x)<<(U32)((y)&31)) | (((U32)(x)&0xFFFFFFFFUL)>>(U32)((32-((y)&31))&31))) & 0xFFFFFFFFUL)
#define ROR(x, y) ( ((((U32)(x)&0xFFFFFFFFUL)>>(U32)((y)&31)) | ((U32)(x)<<(U32)((32-((y)&31))&31))) & 0xFFFFFFFFUL)
#define ROLc(x, y) ( (((U32)(x)<<(U32)((y)&31)) | (((U32)(x)&0xFFFFFFFFUL)>>(U32)((32-((y)&31))&31))) & 0xFFFFFFFFUL)
#define RORc(x, y) ( ((((U32)(x)&0xFFFFFFFFUL)>>(U32)((y)&31)) | ((U32)(x)<<(U32)((32-((y)&31))&31))) & 0xFFFFFFFFUL)
#define MIN(x, y) ( ((x)<(y))?(x):(y) )
////////////////////////////////
//~ rjf: SHA256
typedef struct SHA256State SHA256State;
struct SHA256State
{
U64 length;
U32 state[8], curlen;
U8 buf[64];
};
/* Various logical functions */
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
#define Maj(x,y,z) (((x | y) & z) | (x & y))
#define S(x, n) RORc((x),(n))
#define R(x, n) (((x)&0xFFFFFFFFUL)>>(n))
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
/* compress 512-bits */
static int s_sha256_compress(SHA256State *state, const unsigned char *buf)
{
U32 S[8], W[64], t0, t1;
int i;
/* copy state into S */
for (i = 0; i < 8; i++) {
S[i] = state->state[i];
}
/* copy the state into 512-bits into W[0..15] */
for (i = 0; i < 16; i++) {
LOAD32H(W[i], buf + (4*i));
}
/* fill W[16..63] */
for (i = 16; i < 64; i++) {
W[i] = Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16];
}
/* Compress */
#define RND(a,b,c,d,e,f,g,h,i,ki) \
t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i]; \
t1 = Sigma0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1;
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],1,0x71374491);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],2,0xb5c0fbcf);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],3,0xe9b5dba5);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],4,0x3956c25b);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],5,0x59f111f1);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],6,0x923f82a4);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],7,0xab1c5ed5);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],8,0xd807aa98);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],9,0x12835b01);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],10,0x243185be);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],11,0x550c7dc3);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],12,0x72be5d74);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],13,0x80deb1fe);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],14,0x9bdc06a7);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],15,0xc19bf174);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],16,0xe49b69c1);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],17,0xefbe4786);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],18,0x0fc19dc6);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],19,0x240ca1cc);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],20,0x2de92c6f);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],21,0x4a7484aa);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],22,0x5cb0a9dc);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],23,0x76f988da);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],24,0x983e5152);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],25,0xa831c66d);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],26,0xb00327c8);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],27,0xbf597fc7);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],28,0xc6e00bf3);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],29,0xd5a79147);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],30,0x06ca6351);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],31,0x14292967);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],32,0x27b70a85);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],33,0x2e1b2138);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],34,0x4d2c6dfc);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],35,0x53380d13);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],36,0x650a7354);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],37,0x766a0abb);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],38,0x81c2c92e);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],39,0x92722c85);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],40,0xa2bfe8a1);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],41,0xa81a664b);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],42,0xc24b8b70);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],43,0xc76c51a3);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],44,0xd192e819);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],45,0xd6990624);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],46,0xf40e3585);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],47,0x106aa070);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],48,0x19a4c116);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],49,0x1e376c08);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],50,0x2748774c);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],51,0x34b0bcb5);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],52,0x391c0cb3);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],53,0x4ed8aa4a);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],54,0x5b9cca4f);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],55,0x682e6ff3);
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],56,0x748f82ee);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],57,0x78a5636f);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],58,0x84c87814);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],59,0x8cc70208);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],60,0x90befffa);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],61,0xa4506ceb);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],62,0xbef9a3f7);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],63,0xc67178f2);
#undef RND
/* feedback */
for (i = 0; i < 8; i++) {
state->state[i] = state->state[i] + S[i];
}
return CRYPT_OK;
}
/**
Initialize the hash state
@param md The hash state you wish to initialize
@return CRYPT_OK if successful
*/
int sha256_init(SHA256State *state)
{
state->curlen = 0;
state->length = 0;
state->state[0] = 0x6A09E667UL;
state->state[1] = 0xBB67AE85UL;
state->state[2] = 0x3C6EF372UL;
state->state[3] = 0xA54FF53AUL;
state->state[4] = 0x510E527FUL;
state->state[5] = 0x9B05688CUL;
state->state[6] = 0x1F83D9ABUL;
state->state[7] = 0x5BE0CD19UL;
return CRYPT_OK;
}
/**
Process a block of memory though the hash
@param md The hash state
@param in The data to hash
@param inlen The length of the data (octets)
@return CRYPT_OK if successful
*/
int sha256_process(SHA256State *state, const unsigned char *in, unsigned long inlen)
{
unsigned long n;
int err;
int block_size = 64;
if(state->curlen > sizeof(state->buf))
{
return 0; // CRYPT_INVALID_ARG
}
if(((state->length + inlen * 8) < state->length) || ((inlen * 8) < inlen))
{
return 0; // CRYPT_HASH_OVERFLOW
}
while(inlen > 0)
{
if(state->curlen == 0 && inlen >= block_size)
{
if ((err = s_sha256_compress(state, in)) != CRYPT_OK)
{
return err;
}
state->length += block_size * 8;
in += block_size;
inlen -= block_size;
} else {
n = MIN(inlen, (block_size - state->curlen));
MemoryCopy(state->buf + state->curlen, in, (size_t)n);
state->curlen += n;
in += n;
inlen -= n;
if(state->curlen == block_size)
{
if((err = s_sha256_compress(state, state->buf)) != CRYPT_OK)
{
return err;
}
state->length += 8*block_size;
state->curlen = 0;
}
}
}
return CRYPT_OK;
}
/**
Terminate the hash to get the digest
@param md The hash state
@param out [out] The destination of the hash (32 bytes)
@return CRYPT_OK if successful
*/
int sha256_done(SHA256State *state, unsigned char *out)
{
int i;
if (state->curlen >= sizeof(state->buf)) {
return 0; // CRYPT_INVALID_ARG
}
/* increase the length of the message */
state->length += state->curlen * 8;
/* append the '1' bit */
state->buf[state->curlen++] = (unsigned char)0x80;
/* if the length is currently above 56 bytes we append zeros
* then compress. Then we can fall back to padding zeros and length
* encoding like normal.
*/
if (state->curlen > 56) {
while (state->curlen < 64) {
state->buf[state->curlen++] = (unsigned char)0;
}
s_sha256_compress(state, state->buf);
state->curlen = 0;
}
/* pad upto 56 bytes of zeroes */
while (state->curlen < 56) {
state->buf[state->curlen++] = (unsigned char)0;
}
/* store length */
STORE64H(state->length, state->buf+56);
s_sha256_compress(state, state->buf);
/* copy output */
for (i = 0; i < 8; i++) {
STORE32H(state->state[i], out+(4*i));
}
return CRYPT_OK;
}
#undef Ch
#undef Maj
#undef S
#undef R
#undef Sigma0
#undef Sigma1
#undef Gamma0
#undef Gamma1
////////////////////////////////
//~ rjf: SHA1
typedef struct SHA1State SHA1State;
struct SHA1State
{
U64 length;
U32 state[5], curlen;
unsigned char buf[64];
};
#define F0(x,y,z) (z ^ (x & (y ^ z)))
#define F1(x,y,z) (x ^ y ^ z)
#define F2(x,y,z) ((x & y) | (z & (x | y)))
#define F3(x,y,z) (x ^ y ^ z)
static int s_sha1_compress(SHA1State *state, const unsigned char *buf)
{
U32 a,b,c,d,e,W[80],i;
/* copy the state into 512-bits into W[0..15] */
for (i = 0; i < 16; i++) {
LOAD32H(W[i], buf + (4*i));
}
/* copy state */
a = state->state[0];
b = state->state[1];
c = state->state[2];
d = state->state[3];
e = state->state[4];
/* expand it */
for (i = 16; i < 80; i++) {
W[i] = ROL(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1);
}
/* compress */
/* round one */
#define FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[i] + 0x5a827999UL); b = ROLc(b, 30);
#define FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[i] + 0x6ed9eba1UL); b = ROLc(b, 30);
#define FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[i] + 0x8f1bbcdcUL); b = ROLc(b, 30);
#define FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[i] + 0xca62c1d6UL); b = ROLc(b, 30);
#ifdef LTC_SMALL_CODE
for (i = 0; i < 20; ) {
FF0(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
}
for (; i < 40; ) {
FF1(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
}
for (; i < 60; ) {
FF2(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
}
for (; i < 80; ) {
FF3(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
}
#else
for (i = 0; i < 20; ) {
FF0(a,b,c,d,e,i++);
FF0(e,a,b,c,d,i++);
FF0(d,e,a,b,c,i++);
FF0(c,d,e,a,b,i++);
FF0(b,c,d,e,a,i++);
}
/* round two */
for (; i < 40; ) {
FF1(a,b,c,d,e,i++);
FF1(e,a,b,c,d,i++);
FF1(d,e,a,b,c,i++);
FF1(c,d,e,a,b,i++);
FF1(b,c,d,e,a,i++);
}
/* round three */
for (; i < 60; ) {
FF2(a,b,c,d,e,i++);
FF2(e,a,b,c,d,i++);
FF2(d,e,a,b,c,i++);
FF2(c,d,e,a,b,i++);
FF2(b,c,d,e,a,i++);
}
/* round four */
for (; i < 80; ) {
FF3(a,b,c,d,e,i++);
FF3(e,a,b,c,d,i++);
FF3(d,e,a,b,c,i++);
FF3(c,d,e,a,b,i++);
FF3(b,c,d,e,a,i++);
}
#endif
#undef FF0
#undef FF1
#undef FF2
#undef FF3
/* store */
state->state[0] = state->state[0] + a;
state->state[1] = state->state[1] + b;
state->state[2] = state->state[2] + c;
state->state[3] = state->state[3] + d;
state->state[4] = state->state[4] + e;
return CRYPT_OK;
}
/**
Initialize the hash state
@param md The hash state you wish to initialize
@return CRYPT_OK if successful
*/
int sha1_init(SHA1State *state)
{
state->state[0] = 0x67452301UL;
state->state[1] = 0xefcdab89UL;
state->state[2] = 0x98badcfeUL;
state->state[3] = 0x10325476UL;
state->state[4] = 0xc3d2e1f0UL;
state->curlen = 0;
state->length = 0;
return CRYPT_OK;
}
/**
Process a block of memory though the hash
@param md The hash state
@param in The data to hash
@param inlen The length of the data (octets)
@return CRYPT_OK if successful
*/
// HASH_PROCESS(sha1_process, s_sha1_compress, sha1, 64)
int sha1_process(SHA1State *state, const unsigned char *in, unsigned long inlen)
{
unsigned long n;
int err;
int block_size = 64;
if(state->curlen > sizeof(state->buf))
{
return 0; // CRYPT_INVALID_ARG
}
if(((state->length + inlen * 8) < state->length) || ((inlen * 8) < inlen))
{
return 0; // CRYPT_HASH_OVERFLOW
}
while(inlen > 0)
{
if(state->curlen == 0 && inlen >= block_size)
{
if ((err = s_sha1_compress(state, in)) != CRYPT_OK)
{
return err;
}
state->length += block_size * 8;
in += block_size;
inlen -= block_size;
} else {
n = MIN(inlen, (block_size - state->curlen));
MemoryCopy(state->buf + state->curlen, in, (size_t)n);
state->curlen += n;
in += n;
inlen -= n;
if(state->curlen == block_size)
{
if((err = s_sha1_compress(state, state->buf)) != CRYPT_OK)
{
return err;
}
state->length += 8*block_size;
state->curlen = 0;
}
}
}
return CRYPT_OK;
}
/**
Terminate the hash to get the digest
@param md The hash state
@param out [out] The destination of the hash (20 bytes)
@return CRYPT_OK if successful
*/
int sha1_done(SHA1State *state, unsigned char *out)
{
int i;
if (state->curlen >= sizeof(state->buf)) {
return 0; // CRYPT_INVALID_ARG;
}
/* increase the length of the message */
state->length += state->curlen * 8;
/* append the '1' bit */
state->buf[state->curlen++] = (unsigned char)0x80;
/* if the length is currently above 56 bytes we append zeros
* then compress. Then we can fall back to padding zeros and length
* encoding like normal.
*/
if (state->curlen > 56) {
while (state->curlen < 64) {
state->buf[state->curlen++] = (unsigned char)0;
}
s_sha1_compress(state, state->buf);
state->curlen = 0;
}
/* pad upto 56 bytes of zeroes */
while (state->curlen < 56) {
state->buf[state->curlen++] = (unsigned char)0;
}
/* store length */
STORE64H(state->length, state->buf+56);
s_sha1_compress(state, state->buf);
/* copy output */
for (i = 0; i < 5; i++) {
STORE32H(state->state[i], out+(4*i));
}
return CRYPT_OK;
}
#undef F0
#undef F1
#undef F2
#undef F3
#undef FF0
#undef FF1
#undef FF2
#undef FF3
#endif // TOMCRYPT_HASH_H