mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-22 21:54:59 -07:00
4ef7ed1cbd
This helps performance with SSE (somewhat) and AVX-512 (quite a bit), but not AVX2 for some reason.
1031 lines
36 KiB
Odin
1031 lines
36 KiB
Odin
/*
|
|
An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/).
|
|
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
|
|
|
Made available under Odin's BSD-3 license, based on the original C code.
|
|
|
|
List of contributors:
|
|
Jeroen van Rijn: Initial implementation.
|
|
*/
|
|
|
|
package xxhash
|
|
|
|
import "base:intrinsics"
|
|
|
|
/*
|
|
*************************************************************************
|
|
* XXH3
|
|
* New generation hash designed for speed on small keys and vectorization
|
|
*************************************************************************
|
|
* One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
|
|
* remaining a true 64-bit/128-bit hash function.
|
|
* ==========================================
|
|
* XXH3 default settings
|
|
* ==========================================
|
|
*/
|
|
|
|
/*
|
|
Custom secrets have a default length of 192, but can be set to a different size.
|
|
The minimum secret size is 136 bytes. It must also be a multiple of 64.
|
|
*/
|
|
XXH_SECRET_DEFAULT_SIZE :: max(XXH3_SECRET_SIZE_MIN, #config(XXH_SECRET_DEFAULT_SIZE, 192))
|
|
#assert(XXH_SECRET_DEFAULT_SIZE % 64 == 0)
|
|
|
|
XXH3_kSecret := [XXH_SECRET_DEFAULT_SIZE]u8{
|
|
0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
|
|
0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
|
|
0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
|
|
0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
|
|
0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
|
|
0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
|
|
0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
|
|
0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
|
|
0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
|
|
0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
|
|
0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
|
|
0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
|
|
}
|
|
/*
|
|
Do not change this constant.
|
|
*/
|
|
XXH3_SECRET_SIZE_MIN :: 136
|
|
#assert(len(XXH3_kSecret) == 192 && len(XXH3_kSecret) > XXH3_SECRET_SIZE_MIN)
|
|
|
|
XXH_ACC_ALIGN :: 8 /* scalar */
|
|
XXH_MAX_WIDTH :: #config(XXH_MAX_WIDTH, 512) / 64
|
|
|
|
/*
|
|
This is the optimal update size for incremental hashing.
|
|
*/
|
|
XXH3_INTERNAL_BUFFER_SIZE :: 256
|
|
|
|
/*
|
|
Streaming state.
|
|
|
|
IMPORTANT: This structure has a strict alignment requirement of 64 bytes!! **
|
|
Default allocators will align it correctly if created via `new`, as will
|
|
placing this struct on the stack, but if using a custom allocator make sure
|
|
that it handles the alignment correctly!
|
|
*/
|
|
XXH3_state :: struct #align(64) {
|
|
acc: [8]u64,
|
|
custom_secret: [XXH_SECRET_DEFAULT_SIZE]u8,
|
|
buffer: [XXH3_INTERNAL_BUFFER_SIZE]u8,
|
|
buffered_size: u32,
|
|
reserved32: u32,
|
|
stripes_so_far: uint,
|
|
total_length: u64,
|
|
stripes_per_block: uint,
|
|
secret_limit: uint,
|
|
seed: u64,
|
|
reserved64: u64,
|
|
external_secret: []u8,
|
|
}
|
|
#assert(offset_of(XXH3_state, acc) % 64 == 0 && offset_of(XXH3_state, custom_secret) % 64 == 0 &&
|
|
offset_of(XXH3_state, buffer) % 64 == 0)
|
|
|
|
/************************************************************************
|
|
* XXH3 128-bit variant
|
|
************************************************************************/
|
|
|
|
/*
|
|
Stored in little endian order, although the fields themselves are in native endianness.
|
|
*/
|
|
xxh_u128 :: u128
|
|
XXH3_128_hash :: u128
|
|
|
|
XXH128_hash_t :: struct #raw_union {
|
|
using raw: struct {
|
|
low: XXH64_hash, /*!< `value & 0xFFFFFFFFFFFFFFFF` */
|
|
high: XXH64_hash, /*!< `value >> 64` */
|
|
},
|
|
h: xxh_u128,
|
|
}
|
|
#assert(size_of(xxh_u128) == size_of(XXH128_hash_t))
|
|
|
|
XXH128_canonical :: struct {
|
|
digest: [size_of(XXH128_hash_t)]u8,
|
|
}
|
|
|
|
/*
|
|
The reason for the separate function is to prevent passing too many structs
|
|
around by value. This will hopefully inline the multiply, but we don't force it.
|
|
|
|
@param lhs, rhs The 64-bit integers to multiply
|
|
@return The low 64 bits of the product XOR'd by the high 64 bits.
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH_mul_64_to_128_fold_64 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u64) {
|
|
t := u128(lhs) * u128(rhs)
|
|
return u64(t & 0xFFFFFFFFFFFFFFFF) ~ u64(t >> 64)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH_xorshift_64 :: #force_inline proc(v: xxh_u64, #any_int shift: uint) -> (res: xxh_u64) {
|
|
return v ~ (v >> shift)
|
|
}
|
|
|
|
/*
|
|
This is a fast avalanche stage, suitable when input bits are already partially mixed
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_avalanche :: #force_inline proc(h64: xxh_u64) -> (res: xxh_u64) {
|
|
res = XXH_xorshift_64(h64, 37)
|
|
res *= 0x165667919E3779F9
|
|
res = XXH_xorshift_64(res, 32)
|
|
return
|
|
}
|
|
|
|
/*
|
|
This is a stronger avalanche, inspired by Pelle Evensen's rrmxmx
|
|
preferable when input has not been previously mixed
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_rrmxmx :: #force_inline proc(h64, length: xxh_u64) -> (res: xxh_u64) {
|
|
/* this mix is inspired by Pelle Evensen's rrmxmx */
|
|
res = h64
|
|
res ~= XXH_rotl64(res, 49) ~ XXH_rotl64(res, 24)
|
|
res *= 0x9FB21C651E98DF25
|
|
res ~= (res >> 35) + length
|
|
res *= 0x9FB21C651E98DF25
|
|
return XXH_xorshift_64(res, 28)
|
|
}
|
|
|
|
/*
|
|
==========================================
|
|
XXH3 128 bits (a.k.a XXH128)
|
|
==========================================
|
|
XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
|
|
even without counting the significantly larger output size.
|
|
|
|
For example, extra steps are taken to avoid the seed-dependent collisions
|
|
in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
|
|
|
|
This strength naturally comes at the cost of some speed, especially on short
|
|
lengths. Note that longer hashes are about as fast as the 64-bit version
|
|
due to it using only a slight modification of the 64-bit loop.
|
|
|
|
XXH128 is also more oriented towards 64-bit machines. It is still extremely
|
|
fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
|
|
*/
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_1to3_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
/* A doubled version of 1to3_64b with different constants. */
|
|
length := len(input)
|
|
/*
|
|
* len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
|
|
* len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
|
|
* len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
|
|
*/
|
|
#no_bounds_check {
|
|
c1 := input[ 0]
|
|
c2 := input[length >> 1]
|
|
c3 := input[length - 1]
|
|
combinedl := (u32(c1) << 16) | (u32(c2) << 24) | (u32(c3) << 0) | (u32(length) << 8)
|
|
combinedh := XXH_rotl32(byte_swap(combinedl), 13)
|
|
bitflipl := u64(XXH32_read32(secret[0:]) ~ XXH32_read32(secret[4: ])) + seed
|
|
bitfliph := u64(XXH32_read32(secret[8:]) ~ XXH32_read32(secret[12:])) - seed
|
|
keyed_lo := u64(combinedl) ~ bitflipl
|
|
keyed_hi := u64(combinedh) ~ bitfliph
|
|
|
|
return xxh_u128(XXH64_avalanche(keyed_lo)) | xxh_u128(XXH64_avalanche(keyed_hi)) << 64
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_4to8_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
seed := seed
|
|
|
|
seed ~= u64(byte_swap(u32(seed))) << 32
|
|
#no_bounds_check {
|
|
input_lo := u64(XXH32_read32(input[0:]))
|
|
input_hi := u64(XXH32_read32(input[length - 4:]))
|
|
input_64 := u64(input_lo) + u64(input_hi) << 32
|
|
bitflip := (XXH64_read64(secret[16:]) ~ XXH64_read64(secret[24:])) + seed
|
|
keyed := input_64 ~ bitflip
|
|
|
|
/* Shift len to the left to ensure it is even, this avoids even multiplies. */
|
|
m128 := XXH128_hash_t{
|
|
h = u128(keyed) * (XXH_PRIME64_1 + u128(length) << 2),
|
|
}
|
|
m128.high += (m128.low << 1)
|
|
m128.low ~= (m128.high >> 3)
|
|
|
|
m128.low = XXH_xorshift_64(m128.low, 35)
|
|
m128.low *= 0x9FB21C651E98DF25
|
|
m128.low = XXH_xorshift_64(m128.low, 28)
|
|
m128.high = XXH3_avalanche(m128.high)
|
|
|
|
return m128.h
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_9to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
|
|
#no_bounds_check {
|
|
bitflipl := (XXH64_read64(secret[32:]) ~ XXH64_read64(secret[40:])) - seed
|
|
bitfliph := (XXH64_read64(secret[48:]) ~ XXH64_read64(secret[56:])) + seed
|
|
input_lo := XXH64_read64(input[0:])
|
|
input_hi := XXH64_read64(input[length - 8:])
|
|
m128 := XXH128_hash_t{
|
|
h = u128(input_lo ~ input_hi ~ bitflipl) * XXH_PRIME64_1,
|
|
}
|
|
/*
|
|
* Put len in the middle of m128 to ensure that the length gets mixed to
|
|
* both the low and high bits in the 128x64 multiply below.
|
|
*/
|
|
m128.low += u64(length - 1) << 54
|
|
input_hi ~= bitfliph
|
|
/*
|
|
* Add the high 32 bits of input_hi to the high 32 bits of m128, then
|
|
* add the long product of the low 32 bits of input_hi and XXH_XXH_PRIME32_2 to
|
|
* the high 64 bits of m128.
|
|
*/
|
|
m128.high += input_hi + u64(u32(input_hi)) * u64(XXH_PRIME32_2 - 1)
|
|
|
|
/* m128 ^= XXH_swap64(m128 >> 64); */
|
|
m128.low ~= byte_swap(m128.high)
|
|
{ /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
|
|
h128 := XXH128_hash_t{
|
|
h = u128(m128.low) * XXH_PRIME64_2,
|
|
}
|
|
h128.high += m128.high * XXH_PRIME64_2
|
|
h128.low = XXH3_avalanche(h128.low)
|
|
h128.high = XXH3_avalanche(h128.high)
|
|
return h128.h
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_0to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
|
|
switch {
|
|
case length > 8: return XXH3_len_9to16_128b(input, secret, seed)
|
|
case length >= 4: return XXH3_len_4to8_128b (input, secret, seed)
|
|
case length > 0: return XXH3_len_1to3_128b (input, secret, seed)
|
|
case:
|
|
#no_bounds_check bitflipl := XXH64_read64(secret[64:]) ~ XXH64_read64(secret[72:])
|
|
#no_bounds_check bitfliph := XXH64_read64(secret[80:]) ~ XXH64_read64(secret[88:])
|
|
return xxh_u128(XXH64_avalanche(seed ~ bitflipl)) | xxh_u128(XXH64_avalanche(seed ~ bitfliph)) << 64
|
|
}
|
|
}
|
|
|
|
/*
|
|
A bit slower than XXH3_mix16B, but handles multiply by zero better.
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH128_mix32B :: #force_inline proc(acc: xxh_u128, input_1: []u8, input_2: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
acc128 := XXH128_hash_t{
|
|
h = acc,
|
|
}
|
|
#no_bounds_check {
|
|
acc128.low += XXH3_mix16B (input_1, secret[0:], seed)
|
|
acc128.low ~= XXH64_read64(input_2[0:]) + XXH64_read64(input_2[8:])
|
|
acc128.high += XXH3_mix16B (input_2, secret[16:], seed)
|
|
acc128.high ~= XXH64_read64(input_1) + XXH64_read64(input_1[8:])
|
|
return acc128.h
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_17to128_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
|
|
acc := XXH128_hash_t{}
|
|
acc.low = xxh_u64(length) * XXH_PRIME64_1
|
|
|
|
switch{
|
|
case length > 96:
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h, input[48:], input[length - 64:], secret[96:], seed)
|
|
fallthrough
|
|
case length > 64:
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h, input[32:], input[length - 48:], secret[64:], seed)
|
|
fallthrough
|
|
case length > 32:
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h, input[16:], input[length - 32:], secret[32:], seed)
|
|
fallthrough
|
|
case:
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h, input, input[length - 16:], secret, seed)
|
|
|
|
h128 := XXH128_hash_t{}
|
|
h128.low = acc.low + acc.high
|
|
h128.high = (acc.low * XXH_PRIME64_1) + (acc.high * XXH_PRIME64_4) + ((u64(length) - seed) * XXH_PRIME64_2)
|
|
h128.low = XXH3_avalanche(h128.low)
|
|
h128.high = u64(i64(0) - i64(XXH3_avalanche(h128.high)))
|
|
return h128.h
|
|
}
|
|
unreachable()
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_129to240_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
|
|
#no_bounds_check {
|
|
acc := XXH128_hash_t{}
|
|
acc.low = u64(length) * XXH_PRIME64_1
|
|
|
|
nbRounds := length / 32
|
|
|
|
i: int
|
|
#no_bounds_check for i = 0; i < 4; i += 1 {
|
|
acc.h = XXH128_mix32B(acc.h,
|
|
input[32 * i:],
|
|
input [32 * i + 16:],
|
|
secret[32 * i:],
|
|
seed)
|
|
}
|
|
acc.low = XXH3_avalanche(acc.low)
|
|
acc.high = XXH3_avalanche(acc.high)
|
|
|
|
#no_bounds_check for i = 4; i < nbRounds; i += 1 {
|
|
acc.h = XXH128_mix32B(acc.h,
|
|
input[32 * i:], input[32 * i + 16:],
|
|
secret[XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)):],
|
|
seed)
|
|
}
|
|
/* last bytes */
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h,
|
|
input[length - 16:],
|
|
input[length - 32:],
|
|
secret[XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16:],
|
|
u64(i64(0) - i64(seed)))
|
|
|
|
#no_bounds_check {
|
|
h128 := XXH128_hash_t{}
|
|
h128.low = acc.low + acc.high
|
|
h128.high = u64(
|
|
u128(acc.low * XXH_PRIME64_1) \
|
|
+ u128(acc.high * XXH_PRIME64_4) \
|
|
+ u128((u64(length) - seed) * XXH_PRIME64_2))
|
|
h128.low = XXH3_avalanche(h128.low)
|
|
h128.high = u64(i64(0) - i64(XXH3_avalanche(h128.high)))
|
|
return h128.h
|
|
}
|
|
}
|
|
unreachable()
|
|
}
|
|
|
|
XXH3_INIT_ACC :: [XXH_ACC_NB]xxh_u64{
|
|
XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3,
|
|
XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1,
|
|
}
|
|
|
|
XXH_SECRET_MERGEACCS_START :: 11
|
|
|
|
XXH3_hashLong_128b_internal :: #force_inline proc(
|
|
input: []u8,
|
|
secret: []u8,
|
|
f_acc512: XXH3_accumulate_512_f,
|
|
f_scramble: XXH3_scramble_accumulator_f) -> (res: XXH3_128_hash) {
|
|
|
|
acc := XXH3_INIT_ACC
|
|
#assert(size_of(acc) == 64)
|
|
|
|
XXH3_hashLong_internal_loop(acc[:], input, secret, f_acc512, f_scramble)
|
|
|
|
/* converge into final hash */
|
|
{
|
|
length := len(input)
|
|
secret_size := len(secret)
|
|
|
|
h128 := XXH128_hash_t{}
|
|
h128.low = XXH3_mergeAccs(acc[:], secret[XXH_SECRET_MERGEACCS_START:], u64(length) * XXH_PRIME64_1)
|
|
h128.high = XXH3_mergeAccs(acc[:], secret[secret_size - size_of(acc) - XXH_SECRET_MERGEACCS_START:],
|
|
~(u64(length) * XXH_PRIME64_2))
|
|
return h128.h
|
|
}
|
|
}
|
|
|
|
/*
|
|
* It's important for performance that XXH3_hashLong is not inlined.
|
|
*/
|
|
XXH3_hashLong_128b_default :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
|
|
return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
|
|
}
|
|
|
|
/*
|
|
* It's important for performance that XXH3_hashLong is not inlined.
|
|
*/
|
|
XXH3_hashLong_128b_withSecret :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
|
|
return XXH3_hashLong_128b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator)
|
|
}
|
|
|
|
XXH3_hashLong_128b_withSeed_internal :: #force_inline proc(
|
|
input: []u8, seed: xxh_u64, secret: []u8,
|
|
f_acc512: XXH3_accumulate_512_f,
|
|
f_scramble: XXH3_scramble_accumulator_f,
|
|
f_initSec: XXH3_init_custom_secret_f) -> (res: XXH3_128_hash) {
|
|
|
|
if seed == 0 {
|
|
return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble)
|
|
}
|
|
|
|
{
|
|
_secret := [XXH_SECRET_DEFAULT_SIZE]u8{}
|
|
f_initSec(_secret[:], seed)
|
|
return XXH3_hashLong_128b_internal(input, _secret[:], f_acc512, f_scramble)
|
|
}
|
|
}
|
|
|
|
/*
|
|
* It's important for performance that XXH3_hashLong is not inlined.
|
|
*/
|
|
XXH3_hashLong_128b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
|
|
return XXH3_hashLong_128b_withSeed_internal(input, seed, secret, XXH3_accumulate_512, XXH3_scramble_accumulator , XXH3_init_custom_secret)
|
|
}
|
|
|
|
XXH3_hashLong128_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash)
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_128bits_internal :: #force_inline proc(
|
|
input: []u8, seed: xxh_u64, secret: []u8, f_hl128: XXH3_hashLong128_f) -> (res: XXH3_128_hash) {
|
|
|
|
assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
|
|
/*
|
|
* If an action is to be taken if `secret` conditions are not respected,
|
|
* it should be done here.
|
|
* For now, it's a contract pre-condition.
|
|
* Adding a check and a branch here would cost performance at every hash.
|
|
*/
|
|
length := len(input)
|
|
|
|
switch {
|
|
case length <= 16:
|
|
return XXH3_len_0to16_128b(input, secret, seed)
|
|
case length <= 128:
|
|
return XXH3_len_17to128_128b(input, secret, seed)
|
|
case length <= XXH3_MIDSIZE_MAX:
|
|
return XXH3_len_129to240_128b(input, secret, seed)
|
|
case:
|
|
return f_hl128(input, seed, secret)
|
|
}
|
|
}
|
|
|
|
/* === Public XXH128 API === */
|
|
@(optimization_mode="favor_size")
|
|
XXH3_128_default :: proc(input: []u8) -> (hash: XXH3_128_hash) {
|
|
return XXH3_128bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_128b_default)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_128_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: XXH3_128_hash) {
|
|
return XXH3_128bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_128b_withSeed)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_128_with_secret :: proc(input: []u8, secret: []u8) -> (hash: XXH3_128_hash) {
|
|
return XXH3_128bits_internal(input, 0, secret, XXH3_hashLong_128b_withSecret)
|
|
}
|
|
XXH3_128 :: proc { XXH3_128_default, XXH3_128_with_seed, XXH3_128_with_secret }
|
|
|
|
/*
|
|
==========================================
|
|
Short keys
|
|
==========================================
|
|
One of the shortcomings of XXH32 and XXH64 was that their performance was
|
|
sub-optimal on short lengths. It used an iterative algorithm which strongly
|
|
favored lengths that were a multiple of 4 or 8.
|
|
|
|
Instead of iterating over individual inputs, we use a set of single shot
|
|
functions which piece together a range of lengths and operate in constant time.
|
|
Additionally, the number of multiplies has been significantly reduced. This
|
|
reduces latency, especially when emulating 64-bit multiplies on 32-bit.
|
|
|
|
Depending on the platform, this may or may not be faster than XXH32, but it
|
|
is almost guaranteed to be faster than XXH64.
|
|
*/
|
|
|
|
/*
|
|
At very short lengths, there isn't enough input to fully hide secrets, or use the entire secret.
|
|
|
|
There is also only a limited amount of mixing we can do before significantly impacting performance.
|
|
|
|
Therefore, we use different sections of the secret and always mix two secret samples with an XOR.
|
|
This should have no effect on performance on the seedless or withSeed variants because everything
|
|
_should_ be constant folded by modern compilers.
|
|
|
|
The XOR mixing hides individual parts of the secret and increases entropy.
|
|
This adds an extra layer of strength for custom secrets.
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_1to3_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
length := u32(len(input))
|
|
assert(input != nil)
|
|
assert(1 <= length && length <= 3)
|
|
assert(secret != nil)
|
|
/*
|
|
len = 1: combined = { input[0], 0x01, input[0], input[0] }
|
|
len = 2: combined = { input[1], 0x02, input[0], input[1] }
|
|
len = 3: combined = { input[2], 0x03, input[0], input[1] }
|
|
*/
|
|
#no_bounds_check {
|
|
c1 := u32(input[0 ])
|
|
c2 := u32(input[length >> 1])
|
|
c3 := u32(input[length - 1])
|
|
|
|
combined := c1 << 16 | c2 << 24 | c3 << 0 | length << 8
|
|
bitflip := (u64(XXH32_read32(secret)) ~ u64(XXH32_read32(secret[4:]))) + seed
|
|
keyed := u64(combined) ~ bitflip
|
|
return XXH64_avalanche(keyed)
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_4to8_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
length := u32(len(input))
|
|
assert(input != nil)
|
|
assert(4 <= length && length <= 8)
|
|
assert(secret != nil)
|
|
seed := seed
|
|
|
|
seed ~= (u64(byte_swap(u32(seed))) << 32)
|
|
|
|
#no_bounds_check {
|
|
input1 := XXH32_read32(input)
|
|
input2 := XXH32_read32(input[length - 4:])
|
|
bitflip := (XXH64_read64(secret[8:]) ~ XXH64_read64(secret[16:])) - seed
|
|
input64 := u64(input2) + (u64(input1) << 32)
|
|
keyed := input64 ~ bitflip
|
|
return XXH3_rrmxmx(keyed, u64(length))
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_9to16_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
length := u64(len(input))
|
|
assert(input != nil)
|
|
assert(9 <= length && length <= 16)
|
|
assert(secret != nil)
|
|
#no_bounds_check {
|
|
bitflip1 := (XXH64_read64(secret[24:]) ~ XXH64_read64(secret[32:])) + seed
|
|
bitflip2 := (XXH64_read64(secret[40:]) ~ XXH64_read64(secret[48:])) - seed
|
|
input_lo := XXH64_read64(input) ~ bitflip1
|
|
input_hi := XXH64_read64(input[length - 8:]) ~ bitflip2
|
|
acc := length + byte_swap(input_lo) + input_hi \
|
|
+ XXH_mul_64_to_128_fold_64(input_lo, input_hi)
|
|
return XXH3_avalanche(acc)
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_0to16_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
length := u64(len(input))
|
|
assert(input != nil)
|
|
assert(length <= 16)
|
|
#no_bounds_check {
|
|
switch {
|
|
case length > 8: return #force_inline XXH3_len_9to16_64b(input, secret, seed)
|
|
case length >= 4: return #force_inline XXH3_len_4to8_64b (input, secret, seed)
|
|
case length > 0: return #force_inline XXH3_len_1to3_64b (input, secret, seed)
|
|
case:
|
|
return #force_inline XXH64_avalanche(seed ~ (XXH64_read64(secret[56:]) ~ XXH64_read64(secret[64:])))
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
DISCLAIMER: There are known *seed-dependent* multicollisions here due to
|
|
multiplication by zero, affecting hashes of lengths 17 to 240.
|
|
|
|
However, they are very unlikely.
|
|
|
|
Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
|
|
unseeded non-cryptographic hashes, it does not attempt to defend itself
|
|
against specially crafted inputs, only random inputs.
|
|
|
|
Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
|
|
cancelling out the secret is taken an arbitrary number of times (addressed
|
|
in XXH3_accumulate_512), this collision is very unlikely with random inputs
|
|
and/or proper seeding:
|
|
|
|
This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
|
|
function that is only called up to 16 times per hash with up to 240 bytes of
|
|
input.
|
|
|
|
This is not too bad for a non-cryptographic hash function, especially with
|
|
only 64 bit outputs.
|
|
|
|
The 128-bit variant (which trades some speed for strength) is NOT affected
|
|
by this, although it is always a good idea to use a proper seed if you care
|
|
about strength.
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_mix16B :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
input_lo := XXH64_read64(input[0:])
|
|
input_hi := XXH64_read64(input[8:])
|
|
|
|
input_lo ~= (XXH64_read64(secret[0:]) + seed)
|
|
input_hi ~= (XXH64_read64(secret[8:]) - seed)
|
|
return XXH_mul_64_to_128_fold_64(input_lo, input_hi)
|
|
}
|
|
|
|
/* For mid range keys, XXH3 uses a Mum-hash variant. */
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_17to128_64b :: proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
|
|
length := len(input)
|
|
assert(16 < length && length <= 128)
|
|
|
|
#no_bounds_check {
|
|
acc := u64(length) * XXH_PRIME64_1
|
|
switch {
|
|
case length > 96:
|
|
acc += XXH3_mix16B(input[48: ], secret[96: ], seed)
|
|
acc += XXH3_mix16B(input[length - 64:], secret[112:], seed)
|
|
fallthrough
|
|
case length > 64:
|
|
acc += XXH3_mix16B(input[32: ], secret[64: ], seed)
|
|
acc += XXH3_mix16B(input[length - 48:], secret[80: ], seed)
|
|
fallthrough
|
|
case length > 32:
|
|
acc += XXH3_mix16B(input[16: ], secret[32: ], seed)
|
|
acc += XXH3_mix16B(input[length - 32:], secret[48: ], seed)
|
|
fallthrough
|
|
case:
|
|
acc += XXH3_mix16B(input[0: ], secret[0: ], seed)
|
|
acc += XXH3_mix16B(input[length - 16:], secret[16: ], seed)
|
|
}
|
|
return XXH3_avalanche(acc)
|
|
}
|
|
}
|
|
|
|
XXH3_MIDSIZE_MAX :: 240
|
|
XXH3_MIDSIZE_STARTOFFSET :: 3
|
|
XXH3_MIDSIZE_LASTOFFSET :: 17
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_129to240_64b :: proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
|
|
length := len(input)
|
|
assert(128 < length && length <= XXH3_MIDSIZE_MAX)
|
|
|
|
#no_bounds_check {
|
|
acc := u64(length) * XXH_PRIME64_1
|
|
nbRounds := length / 16
|
|
|
|
i: int
|
|
for i = 0; i < 8; i += 1 {
|
|
acc += XXH3_mix16B(input[16 * i:], secret[16 * i:], seed)
|
|
}
|
|
|
|
acc = XXH3_avalanche(acc)
|
|
assert(nbRounds >= 8)
|
|
|
|
for i = 8; i < nbRounds; i += 1 {
|
|
acc += XXH3_mix16B(input[16 * i:], secret[(16 * (i - 8)) + XXH3_MIDSIZE_STARTOFFSET:], seed)
|
|
}
|
|
/* last bytes */
|
|
acc += XXH3_mix16B(input[length - 16:], secret[XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET:], seed)
|
|
return XXH3_avalanche(acc)
|
|
}
|
|
}
|
|
|
|
/* ======= Long Keys ======= */
|
|
|
|
XXH_STRIPE_LEN :: 64
|
|
XXH_SECRET_CONSUME_RATE :: 8 /* nb of secret bytes consumed at each accumulation */
|
|
XXH_ACC_NB :: (XXH_STRIPE_LEN / size_of(xxh_u64))
|
|
XXH_SECRET_LASTACC_START :: 7 /* not aligned on 8, last secret is different from acc & scrambler */
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH_writeLE64 :: #force_inline proc(dst: []u8, v64: u64le) {
|
|
v := v64
|
|
mem_copy(raw_data(dst), &v, size_of(v64))
|
|
}
|
|
|
|
/*
|
|
* XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
|
|
*
|
|
* It is a hardened version of UMAC, based off of FARSH's implementation.
|
|
*
|
|
* This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
|
|
* implementations, and it is ridiculously fast.
|
|
*
|
|
* We harden it by mixing the original input to the accumulators as well as the product.
|
|
*
|
|
* This means that in the (relatively likely) case of a multiply by zero, the
|
|
* original input is preserved.
|
|
*
|
|
* On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
|
|
* cross-pollination, as otherwise the upper and lower halves would be
|
|
* essentially independent.
|
|
*
|
|
* This doesn't matter on 64-bit hashes since they all get merged together in
|
|
* the end, so we skip the extra step.
|
|
*
|
|
* Both XXH3_64bits and XXH3_128bits use this subroutine.
|
|
*/
|
|
|
|
XXH3_accumulate_512_f :: #type proc(acc: []xxh_u64, input: []u8, secret: []u8)
|
|
XXH3_scramble_accumulator_f :: #type proc(acc: []xxh_u64, secret: []u8)
|
|
XXH3_init_custom_secret_f :: #type proc(custom_secret: []u8, seed64: xxh_u64)
|
|
|
|
/* scalar variants - universal */
|
|
@(optimization_mode="favor_size")
|
|
XXH3_accumulate_512_scalar :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8) {
|
|
xacc := acc /* presumed aligned */
|
|
xinput := input /* no alignment restriction */
|
|
xsecret := secret /* no alignment restriction */
|
|
|
|
assert(uintptr(raw_data(acc)) & uintptr(XXH_ACC_ALIGN - 1) == 0)
|
|
|
|
#no_bounds_check for i := uint(0); i < XXH_ACC_NB; i += 1 {
|
|
data_val := XXH64_read64(xinput[8 * i:])
|
|
sec := XXH64_read64(xsecret[8 * i:])
|
|
data_key := data_val ~ sec
|
|
xacc[i ~ 1] += data_val /* swap adjacent lanes */
|
|
xacc[i ] += u64(u32(data_key)) * u64(data_key >> 32)
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_scramble_accumulator_scalar :: #force_inline proc(acc: []xxh_u64, secret: []u8) {
|
|
xacc := acc /* presumed aligned */
|
|
xsecret := secret /* no alignment restriction */
|
|
|
|
assert(uintptr(raw_data(acc)) & uintptr(XXH_ACC_ALIGN - 1) == 0)
|
|
|
|
#no_bounds_check for i := uint(0); i < XXH_ACC_NB; i += 1 {
|
|
key64 := XXH64_read64(xsecret[8 * i:])
|
|
acc64 := xacc[i]
|
|
acc64 = XXH_xorshift_64(acc64, 47)
|
|
acc64 ~= key64
|
|
acc64 *= u64(XXH_PRIME32_1)
|
|
xacc[i] = acc64
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_init_custom_secret_scalar :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64) {
|
|
#assert((XXH_SECRET_DEFAULT_SIZE & 15) == 0)
|
|
|
|
nbRounds := XXH_SECRET_DEFAULT_SIZE / 16
|
|
#no_bounds_check for i := 0; i < nbRounds; i += 1 {
|
|
lo := XXH64_read64(XXH3_kSecret[16 * i: ]) + seed64
|
|
hi := XXH64_read64(XXH3_kSecret[16 * i + 8:]) - seed64
|
|
XXH_writeLE64(custom_secret[16 * i: ], u64le(lo))
|
|
XXH_writeLE64(custom_secret[16 * i + 8:], u64le(hi))
|
|
}
|
|
}
|
|
|
|
/* generalized SIMD variants */
|
|
XXH3_accumulate_512_simd_generic :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8, $W: uint) {
|
|
u32xW :: #simd[W]u32
|
|
u64xW :: #simd[W]u64
|
|
|
|
#no_bounds_check for i in uint(0)..<XXH_ACC_NB/W {
|
|
data_val := XXH64_read64_simd(input[8 * W * i:], W)
|
|
sec := XXH64_read64_simd(secret[8 * W * i:], W)
|
|
data_key := data_val ~ sec
|
|
|
|
// Swap adjacent lanes
|
|
when W == 2 {
|
|
data_val = swizzle(data_val, 1, 0)
|
|
} else when W == 4 {
|
|
data_val = swizzle(data_val, 1, 0, 3, 2)
|
|
} else when W == 8 {
|
|
data_val = swizzle(data_val, 1, 0, 3, 2, 5, 4, 7, 6)
|
|
} else {
|
|
#panic("Unsupported vector size!")
|
|
}
|
|
|
|
a := XXH64_read64_simd(acc[W * i:], W)
|
|
a += data_val
|
|
a += u64xW(u32xW(data_key)) * intrinsics.simd_shr(data_key, 32)
|
|
XXH64_write64_simd(acc[W * i:], a)
|
|
}
|
|
}
|
|
|
|
XXH3_scramble_accumulator_simd_generic :: #force_inline proc(acc: []xxh_u64, secret: []u8, $W: uint) {
|
|
u64xW :: #simd[W]u64
|
|
#no_bounds_check for i in uint(0)..<XXH_ACC_NB/W {
|
|
key64 := XXH64_read64_simd(secret[8 * W * i:], W)
|
|
acc64 := XXH64_read64_simd(acc[W * i:], W)
|
|
acc64 ~= intrinsics.simd_shr(acc64, 47)
|
|
acc64 ~= key64
|
|
acc64 *= XXH_PRIME32_1
|
|
XXH64_write64_simd(acc[W * i:], acc64)
|
|
}
|
|
}
|
|
|
|
XXH3_init_custom_secret_simd_generic :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64, $W: uint) {
|
|
u64xW :: #simd[W]u64
|
|
|
|
seedVec := u64xW(seed64)
|
|
for i in 0..<W/2 {
|
|
j := 2*i + 1
|
|
seedVec = intrinsics.simd_replace(seedVec, j, -intrinsics.simd_extract(seedVec, j))
|
|
}
|
|
|
|
nbRounds := XXH_SECRET_DEFAULT_SIZE / 8 / W
|
|
#no_bounds_check for i in uint(0)..<nbRounds {
|
|
block := XXH64_read64_simd(XXH3_kSecret[8 * W * i:], W)
|
|
block += seedVec
|
|
XXH64_write64_simd(custom_secret[8 * W * i:], block)
|
|
}
|
|
}
|
|
|
|
XXH3_accumulate_512 :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8) {
|
|
when XXH_NATIVE_WIDTH > 1 {
|
|
XXH3_accumulate_512_simd_generic(acc, input, secret, XXH_NATIVE_WIDTH)
|
|
} else {
|
|
XXH3_accumulate_512_scalar(acc, input, secret)
|
|
}
|
|
}
|
|
|
|
XXH3_scramble_accumulator :: #force_inline proc(acc: []xxh_u64, secret: []u8) {
|
|
when XXH_NATIVE_WIDTH > 1 {
|
|
XXH3_scramble_accumulator_simd_generic(acc, secret, XXH_NATIVE_WIDTH)
|
|
} else {
|
|
XXH3_scramble_accumulator_scalar(acc, secret)
|
|
}
|
|
}
|
|
|
|
XXH3_init_custom_secret :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64) {
|
|
when XXH_NATIVE_WIDTH > 1 {
|
|
XXH3_init_custom_secret_simd_generic(custom_secret, seed64, XXH_NATIVE_WIDTH)
|
|
} else {
|
|
XXH3_init_custom_secret_scalar(custom_secret, seed64)
|
|
}
|
|
}
|
|
|
|
XXH_PREFETCH_DIST :: 320
|
|
|
|
/*
|
|
* XXH3_accumulate()
|
|
* Loops over XXH3_accumulate_512().
|
|
* Assumption: nbStripes will not overflow the secret size
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_accumulate :: #force_inline proc(
|
|
acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint, f_acc512: XXH3_accumulate_512_f) {
|
|
|
|
#no_bounds_check for n := uint(0); n < nbStripes; n += 1 {
|
|
when !XXH_DISABLE_PREFETCH {
|
|
in_ptr := &input[n * XXH_STRIPE_LEN]
|
|
prefetch(in_ptr, XXH_PREFETCH_DIST)
|
|
}
|
|
f_acc512(acc, input[n * XXH_STRIPE_LEN:], secret[n * XXH_SECRET_CONSUME_RATE:])
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_hashLong_internal_loop :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8,
|
|
f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) {
|
|
|
|
length := uint(len(input))
|
|
secret_size := uint(len(secret))
|
|
stripes_per_block := (secret_size - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE
|
|
|
|
block_len := XXH_STRIPE_LEN * stripes_per_block
|
|
blocks := (length - 1) / block_len
|
|
|
|
#no_bounds_check for n := uint(0); n < blocks; n += 1 {
|
|
XXH3_accumulate(acc, input[n * block_len:], secret, stripes_per_block, f_acc512)
|
|
f_scramble(acc, secret[secret_size - XXH_STRIPE_LEN:])
|
|
}
|
|
|
|
/* last partial block */
|
|
#no_bounds_check {
|
|
stripes := ((length - 1) - (block_len * blocks)) / XXH_STRIPE_LEN
|
|
XXH3_accumulate(acc, input[blocks * block_len:], secret, stripes, f_acc512)
|
|
|
|
/* last stripe */
|
|
#no_bounds_check {
|
|
p := input[length - XXH_STRIPE_LEN:]
|
|
f_acc512(acc, p, secret[secret_size - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START:])
|
|
}
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_mix2Accs :: #force_inline proc(acc: []xxh_u64, secret: []u8) -> (res: xxh_u64) {
|
|
return XXH_mul_64_to_128_fold_64(
|
|
acc[0] ~ XXH64_read64(secret),
|
|
acc[1] ~ XXH64_read64(secret[8:]))
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_mergeAccs :: #force_inline proc(acc: []xxh_u64, secret: []u8, start: xxh_u64) -> (res: xxh_u64) {
|
|
result64 := start
|
|
#no_bounds_check for i := 0; i < 4; i += 1 {
|
|
result64 += XXH3_mix2Accs(acc[2 * i:], secret[16 * i:])
|
|
}
|
|
return XXH3_avalanche(result64)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_hashLong_64b_internal :: #force_inline proc(input: []u8, secret: []u8,
|
|
f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) -> (hash: xxh_u64) {
|
|
|
|
acc: [XXH_ACC_NB]xxh_u64 = XXH3_INIT_ACC
|
|
|
|
XXH3_hashLong_internal_loop(acc[:], input, secret, f_acc512, f_scramble)
|
|
|
|
/* converge into final hash */
|
|
#assert(size_of(acc) == 64)
|
|
/* do not align on 8, so that the secret is different from the accumulator */
|
|
XXH_SECRET_MERGEACCS_START :: 11
|
|
assert(len(secret) >= size_of(acc) + XXH_SECRET_MERGEACCS_START)
|
|
return XXH3_mergeAccs(acc[:], secret[XXH_SECRET_MERGEACCS_START:], xxh_u64(len(input)) * XXH_PRIME64_1)
|
|
}
|
|
|
|
/*
|
|
It's important for performance that XXH3_hashLong is not inlined.
|
|
*/
|
|
XXH3_hashLong_64b_withSecret :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_hashLong_64b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator)
|
|
}
|
|
|
|
/*
|
|
It's important for performance that XXH3_hashLong is not inlined.
|
|
Since the function is not inlined, the compiler may not be able to understand that,
|
|
in some scenarios, its `secret` argument is actually a compile time constant.
|
|
This variant enforces that the compiler can detect that,
|
|
and uses this opportunity to streamline the generated code for better performance.
|
|
*/
|
|
XXH3_hashLong_64b_default :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
|
|
}
|
|
|
|
XXH3_hashLong_64b_withSeed_internal :: #force_inline proc(
|
|
input: []u8,
|
|
seed: xxh_u64,
|
|
f_acc512: XXH3_accumulate_512_f,
|
|
f_scramble: XXH3_scramble_accumulator_f,
|
|
f_init_sec: XXH3_init_custom_secret_f,
|
|
) -> (hash: xxh_u64) {
|
|
if seed == 0 {
|
|
return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble)
|
|
}
|
|
|
|
secret: [XXH_SECRET_DEFAULT_SIZE]u8
|
|
f_init_sec(secret[:], seed)
|
|
return XXH3_hashLong_64b_internal(input, secret[:], f_acc512, f_scramble)
|
|
}
|
|
|
|
/*
|
|
XXH3_hashLong_64b_withSeed():
|
|
Generate a custom key based on alteration of default XXH3_kSecret with the seed,
|
|
and then use this key for long mode hashing.
|
|
|
|
This operation is decently fast but nonetheless costs a little bit of time.
|
|
Try to avoid it whenever possible (typically when seed==0).
|
|
|
|
It's important for performance that XXH3_hashLong is not inlined. Not sure
|
|
why (uop cache maybe?), but the difference is large and easily measurable.
|
|
*/
|
|
XXH3_hashLong_64b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_hashLong_64b_withSeed_internal(input, seed, XXH3_accumulate_512, XXH3_scramble_accumulator, XXH3_init_custom_secret)
|
|
}
|
|
|
|
|
|
XXH3_hashLong64_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: xxh_u64)
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_64bits_internal :: #force_inline proc(input: []u8, seed: xxh_u64, secret: []u8, f_hashLong: XXH3_hashLong64_f) -> (hash: xxh_u64) {
|
|
assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
|
|
/*
|
|
If an action is to be taken if len(secret) condition is not respected, it should be done here.
|
|
For now, it's a contract pre-condition.
|
|
Adding a check and a branch here would cost performance at every hash.
|
|
Also, note that function signature doesn't offer room to return an error.
|
|
*/
|
|
length := len(input)
|
|
switch {
|
|
case length <= 16: return XXH3_len_0to16_64b(input, secret, seed)
|
|
case length <= 128: return XXH3_len_17to128_64b(input, secret, seed)
|
|
case length <= XXH3_MIDSIZE_MAX: return XXH3_len_129to240_64b(input, secret, seed)
|
|
case: return f_hashLong(input, seed, secret)
|
|
}
|
|
unreachable()
|
|
}
|
|
|
|
/* === Public entry point === */
|
|
@(optimization_mode="favor_size")
|
|
XXH3_64_default :: proc(input: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_64bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_64b_default)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_64_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: xxh_u64) {
|
|
return XXH3_64bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_64b_withSeed)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_64_with_secret :: proc(input, secret: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_64bits_internal(input, 0, secret, XXH3_hashLong_64b_withSecret)
|
|
}
|
|
|
|
XXH3_64 :: proc { XXH3_64_default, XXH3_64_with_seed, XXH3_64_with_secret }
|