mirror of
https://github.com/odin-lang/Odin.git
synced 2026-06-18 16:21:11 +00:00
This change was made in order to allow things produced with Odin and using Odin's core library, to not require the LICENSE to also be distributed alongside the binary form.
1031 lines
36 KiB
Odin
1031 lines
36 KiB
Odin
package xxhash
|
|
|
|
/*
|
|
An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/).
|
|
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
|
|
|
Made available under Odin's license, based on the original C code.
|
|
|
|
List of contributors:
|
|
Jeroen van Rijn: Initial implementation.
|
|
*/
|
|
|
|
import "base:intrinsics"
|
|
|
|
/*
|
|
*************************************************************************
|
|
* XXH3
|
|
* New generation hash designed for speed on small keys and vectorization
|
|
*************************************************************************
|
|
* One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
|
|
* remaining a true 64-bit/128-bit hash function.
|
|
* ==========================================
|
|
* XXH3 default settings
|
|
* ==========================================
|
|
*/
|
|
|
|
/*
|
|
Custom secrets have a default length of 192, but can be set to a different size.
|
|
The minimum secret size is 136 bytes. It must also be a multiple of 64.
|
|
*/
|
|
XXH_SECRET_DEFAULT_SIZE :: max(XXH3_SECRET_SIZE_MIN, #config(XXH_SECRET_DEFAULT_SIZE, 192))
|
|
#assert(XXH_SECRET_DEFAULT_SIZE % 64 == 0)
|
|
|
|
XXH3_kSecret := [XXH_SECRET_DEFAULT_SIZE]u8{
|
|
0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
|
|
0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
|
|
0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
|
|
0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
|
|
0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
|
|
0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
|
|
0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
|
|
0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
|
|
0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
|
|
0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
|
|
0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
|
|
0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
|
|
}
|
|
/*
|
|
Do not change this constant.
|
|
*/
|
|
XXH3_SECRET_SIZE_MIN :: 136
|
|
#assert(len(XXH3_kSecret) == 192 && len(XXH3_kSecret) > XXH3_SECRET_SIZE_MIN)
|
|
|
|
XXH_ACC_ALIGN :: 8 /* scalar */
|
|
XXH_MAX_WIDTH :: #config(XXH_MAX_WIDTH, 512) / 64
|
|
|
|
/*
|
|
This is the optimal update size for incremental hashing.
|
|
*/
|
|
XXH3_INTERNAL_BUFFER_SIZE :: 256
|
|
|
|
/*
|
|
Streaming state.
|
|
|
|
IMPORTANT: This structure has a strict alignment requirement of 64 bytes!! **
|
|
Default allocators will align it correctly if created via `new`, as will
|
|
placing this struct on the stack, but if using a custom allocator make sure
|
|
that it handles the alignment correctly!
|
|
*/
|
|
XXH3_state :: struct #align(64) {
|
|
acc: [8]u64,
|
|
custom_secret: [XXH_SECRET_DEFAULT_SIZE]u8,
|
|
buffer: [XXH3_INTERNAL_BUFFER_SIZE]u8,
|
|
buffered_size: u32,
|
|
reserved32: u32,
|
|
stripes_so_far: uint,
|
|
total_length: u64,
|
|
stripes_per_block: uint,
|
|
secret_limit: uint,
|
|
seed: u64,
|
|
reserved64: u64,
|
|
external_secret: []u8,
|
|
}
|
|
#assert(offset_of(XXH3_state, acc) % 64 == 0 && offset_of(XXH3_state, custom_secret) % 64 == 0 &&
|
|
offset_of(XXH3_state, buffer) % 64 == 0)
|
|
|
|
/************************************************************************
|
|
* XXH3 128-bit variant
|
|
************************************************************************/
|
|
|
|
/*
|
|
Stored in little endian order, although the fields themselves are in native endianness.
|
|
*/
|
|
xxh_u128 :: u128
|
|
XXH3_128_hash :: u128
|
|
|
|
XXH128_hash_t :: struct #raw_union {
|
|
using raw: struct {
|
|
low: XXH64_hash, /*!< `value & 0xFFFFFFFFFFFFFFFF` */
|
|
high: XXH64_hash, /*!< `value >> 64` */
|
|
},
|
|
h: xxh_u128,
|
|
}
|
|
#assert(size_of(xxh_u128) == size_of(XXH128_hash_t))
|
|
|
|
XXH128_canonical :: struct {
|
|
digest: [size_of(XXH128_hash_t)]u8,
|
|
}
|
|
|
|
/*
|
|
The reason for the separate function is to prevent passing too many structs
|
|
around by value. This will hopefully inline the multiply, but we don't force it.
|
|
|
|
@param lhs, rhs The 64-bit integers to multiply
|
|
@return The low 64 bits of the product XOR'd by the high 64 bits.
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH_mul_64_to_128_fold_64 :: #force_inline proc(lhs, rhs: xxh_u64) -> (res: xxh_u64) {
|
|
t := u128(lhs) * u128(rhs)
|
|
return u64(t & 0xFFFFFFFFFFFFFFFF) ~ u64(t >> 64)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH_xorshift_64 :: #force_inline proc(v: xxh_u64, #any_int shift: uint) -> (res: xxh_u64) {
|
|
return v ~ (v >> shift)
|
|
}
|
|
|
|
/*
|
|
This is a fast avalanche stage, suitable when input bits are already partially mixed
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_avalanche :: #force_inline proc(h64: xxh_u64) -> (res: xxh_u64) {
|
|
res = XXH_xorshift_64(h64, 37)
|
|
res *= 0x165667919E3779F9
|
|
res = XXH_xorshift_64(res, 32)
|
|
return
|
|
}
|
|
|
|
/*
|
|
This is a stronger avalanche, inspired by Pelle Evensen's rrmxmx
|
|
preferable when input has not been previously mixed
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_rrmxmx :: #force_inline proc(h64, length: xxh_u64) -> (res: xxh_u64) {
|
|
/* this mix is inspired by Pelle Evensen's rrmxmx */
|
|
res = h64
|
|
res ~= XXH_rotl64(res, 49) ~ XXH_rotl64(res, 24)
|
|
res *= 0x9FB21C651E98DF25
|
|
res ~= (res >> 35) + length
|
|
res *= 0x9FB21C651E98DF25
|
|
return XXH_xorshift_64(res, 28)
|
|
}
|
|
|
|
/*
|
|
==========================================
|
|
XXH3 128 bits (a.k.a XXH128)
|
|
==========================================
|
|
XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
|
|
even without counting the significantly larger output size.
|
|
|
|
For example, extra steps are taken to avoid the seed-dependent collisions
|
|
in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
|
|
|
|
This strength naturally comes at the cost of some speed, especially on short
|
|
lengths. Note that longer hashes are about as fast as the 64-bit version
|
|
due to it using only a slight modification of the 64-bit loop.
|
|
|
|
XXH128 is also more oriented towards 64-bit machines. It is still extremely
|
|
fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
|
|
*/
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_1to3_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
/* A doubled version of 1to3_64b with different constants. */
|
|
length := len(input)
|
|
/*
|
|
* len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
|
|
* len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
|
|
* len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
|
|
*/
|
|
#no_bounds_check {
|
|
c1 := input[ 0]
|
|
c2 := input[length >> 1]
|
|
c3 := input[length - 1]
|
|
combinedl := (u32(c1) << 16) | (u32(c2) << 24) | (u32(c3) << 0) | (u32(length) << 8)
|
|
combinedh := XXH_rotl32(byte_swap(combinedl), 13)
|
|
bitflipl := u64(XXH32_read32(secret[0:]) ~ XXH32_read32(secret[4: ])) + seed
|
|
bitfliph := u64(XXH32_read32(secret[8:]) ~ XXH32_read32(secret[12:])) - seed
|
|
keyed_lo := u64(combinedl) ~ bitflipl
|
|
keyed_hi := u64(combinedh) ~ bitfliph
|
|
|
|
return xxh_u128(XXH64_avalanche(keyed_lo)) | xxh_u128(XXH64_avalanche(keyed_hi)) << 64
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_4to8_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
seed := seed
|
|
|
|
seed ~= u64(byte_swap(u32(seed))) << 32
|
|
#no_bounds_check {
|
|
input_lo := u64(XXH32_read32(input[0:]))
|
|
input_hi := u64(XXH32_read32(input[length - 4:]))
|
|
input_64 := u64(input_lo) + u64(input_hi) << 32
|
|
bitflip := (XXH64_read64(secret[16:]) ~ XXH64_read64(secret[24:])) + seed
|
|
keyed := input_64 ~ bitflip
|
|
|
|
/* Shift len to the left to ensure it is even, this avoids even multiplies. */
|
|
m128 := XXH128_hash_t{
|
|
h = u128(keyed) * (XXH_PRIME64_1 + u128(length) << 2),
|
|
}
|
|
m128.high += (m128.low << 1)
|
|
m128.low ~= (m128.high >> 3)
|
|
|
|
m128.low = XXH_xorshift_64(m128.low, 35)
|
|
m128.low *= 0x9FB21C651E98DF25
|
|
m128.low = XXH_xorshift_64(m128.low, 28)
|
|
m128.high = XXH3_avalanche(m128.high)
|
|
|
|
return m128.h
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_9to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
|
|
#no_bounds_check {
|
|
bitflipl := (XXH64_read64(secret[32:]) ~ XXH64_read64(secret[40:])) - seed
|
|
bitfliph := (XXH64_read64(secret[48:]) ~ XXH64_read64(secret[56:])) + seed
|
|
input_lo := XXH64_read64(input[0:])
|
|
input_hi := XXH64_read64(input[length - 8:])
|
|
m128 := XXH128_hash_t{
|
|
h = u128(input_lo ~ input_hi ~ bitflipl) * XXH_PRIME64_1,
|
|
}
|
|
/*
|
|
* Put len in the middle of m128 to ensure that the length gets mixed to
|
|
* both the low and high bits in the 128x64 multiply below.
|
|
*/
|
|
m128.low += u64(length - 1) << 54
|
|
input_hi ~= bitfliph
|
|
/*
|
|
* Add the high 32 bits of input_hi to the high 32 bits of m128, then
|
|
* add the long product of the low 32 bits of input_hi and XXH_XXH_PRIME32_2 to
|
|
* the high 64 bits of m128.
|
|
*/
|
|
m128.high += input_hi + u64(u32(input_hi)) * u64(XXH_PRIME32_2 - 1)
|
|
|
|
/* m128 ^= XXH_swap64(m128 >> 64); */
|
|
m128.low ~= byte_swap(m128.high)
|
|
{ /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
|
|
h128 := XXH128_hash_t{
|
|
h = u128(m128.low) * XXH_PRIME64_2,
|
|
}
|
|
h128.high += m128.high * XXH_PRIME64_2
|
|
h128.low = XXH3_avalanche(h128.low)
|
|
h128.high = XXH3_avalanche(h128.high)
|
|
return h128.h
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_0to16_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
|
|
switch {
|
|
case length > 8: return XXH3_len_9to16_128b(input, secret, seed)
|
|
case length >= 4: return XXH3_len_4to8_128b (input, secret, seed)
|
|
case length > 0: return XXH3_len_1to3_128b (input, secret, seed)
|
|
case:
|
|
#no_bounds_check bitflipl := XXH64_read64(secret[64:]) ~ XXH64_read64(secret[72:])
|
|
#no_bounds_check bitfliph := XXH64_read64(secret[80:]) ~ XXH64_read64(secret[88:])
|
|
return xxh_u128(XXH64_avalanche(seed ~ bitflipl)) | xxh_u128(XXH64_avalanche(seed ~ bitfliph)) << 64
|
|
}
|
|
}
|
|
|
|
/*
|
|
A bit slower than XXH3_mix16B, but handles multiply by zero better.
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH128_mix32B :: #force_inline proc(acc: xxh_u128, input_1: []u8, input_2: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
acc128 := XXH128_hash_t{
|
|
h = acc,
|
|
}
|
|
#no_bounds_check {
|
|
acc128.low += XXH3_mix16B (input_1, secret[0:], seed)
|
|
acc128.low ~= XXH64_read64(input_2[0:]) + XXH64_read64(input_2[8:])
|
|
acc128.high += XXH3_mix16B (input_2, secret[16:], seed)
|
|
acc128.high ~= XXH64_read64(input_1) + XXH64_read64(input_1[8:])
|
|
return acc128.h
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_17to128_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
|
|
acc := XXH128_hash_t{}
|
|
acc.low = xxh_u64(length) * XXH_PRIME64_1
|
|
|
|
switch{
|
|
case length > 96:
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h, input[48:], input[length - 64:], secret[96:], seed)
|
|
fallthrough
|
|
case length > 64:
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h, input[32:], input[length - 48:], secret[64:], seed)
|
|
fallthrough
|
|
case length > 32:
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h, input[16:], input[length - 32:], secret[32:], seed)
|
|
fallthrough
|
|
case:
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h, input, input[length - 16:], secret, seed)
|
|
|
|
h128 := XXH128_hash_t{}
|
|
h128.low = acc.low + acc.high
|
|
h128.high = (acc.low * XXH_PRIME64_1) + (acc.high * XXH_PRIME64_4) + ((u64(length) - seed) * XXH_PRIME64_2)
|
|
h128.low = XXH3_avalanche(h128.low)
|
|
h128.high = u64(i64(0) - i64(XXH3_avalanche(h128.high)))
|
|
return h128.h
|
|
}
|
|
unreachable()
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_129to240_128b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u128) {
|
|
length := len(input)
|
|
|
|
#no_bounds_check {
|
|
acc := XXH128_hash_t{}
|
|
acc.low = u64(length) * XXH_PRIME64_1
|
|
|
|
nbRounds := length / 32
|
|
|
|
i: int
|
|
#no_bounds_check for i = 0; i < 4; i += 1 {
|
|
acc.h = XXH128_mix32B(acc.h,
|
|
input[32 * i:],
|
|
input [32 * i + 16:],
|
|
secret[32 * i:],
|
|
seed)
|
|
}
|
|
acc.low = XXH3_avalanche(acc.low)
|
|
acc.high = XXH3_avalanche(acc.high)
|
|
|
|
#no_bounds_check for i = 4; i < nbRounds; i += 1 {
|
|
acc.h = XXH128_mix32B(acc.h,
|
|
input[32 * i:], input[32 * i + 16:],
|
|
secret[XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)):],
|
|
seed)
|
|
}
|
|
/* last bytes */
|
|
#no_bounds_check acc.h = XXH128_mix32B(acc.h,
|
|
input[length - 16:],
|
|
input[length - 32:],
|
|
secret[XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16:],
|
|
u64(i64(0) - i64(seed)))
|
|
|
|
#no_bounds_check {
|
|
h128 := XXH128_hash_t{}
|
|
h128.low = acc.low + acc.high
|
|
h128.high = u64(
|
|
u128(acc.low * XXH_PRIME64_1) \
|
|
+ u128(acc.high * XXH_PRIME64_4) \
|
|
+ u128((u64(length) - seed) * XXH_PRIME64_2))
|
|
h128.low = XXH3_avalanche(h128.low)
|
|
h128.high = u64(i64(0) - i64(XXH3_avalanche(h128.high)))
|
|
return h128.h
|
|
}
|
|
}
|
|
unreachable()
|
|
}
|
|
|
|
XXH3_INIT_ACC :: [XXH_ACC_NB]xxh_u64{
|
|
XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3,
|
|
XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1,
|
|
}
|
|
|
|
XXH_SECRET_MERGEACCS_START :: 11
|
|
|
|
XXH3_hashLong_128b_internal :: #force_inline proc(
|
|
input: []u8,
|
|
secret: []u8,
|
|
f_acc512: XXH3_accumulate_512_f,
|
|
f_scramble: XXH3_scramble_accumulator_f) -> (res: XXH3_128_hash) {
|
|
|
|
acc := XXH3_INIT_ACC
|
|
#assert(size_of(acc) == 64)
|
|
|
|
XXH3_hashLong_internal_loop(acc[:], input, secret, f_acc512, f_scramble)
|
|
|
|
/* converge into final hash */
|
|
{
|
|
length := len(input)
|
|
secret_size := len(secret)
|
|
|
|
h128 := XXH128_hash_t{}
|
|
h128.low = XXH3_mergeAccs(acc[:], secret[XXH_SECRET_MERGEACCS_START:], u64(length) * XXH_PRIME64_1)
|
|
h128.high = XXH3_mergeAccs(acc[:], secret[secret_size - size_of(acc) - XXH_SECRET_MERGEACCS_START:],
|
|
~(u64(length) * XXH_PRIME64_2))
|
|
return h128.h
|
|
}
|
|
}
|
|
|
|
/*
|
|
* It's important for performance that XXH3_hashLong is not inlined.
|
|
*/
|
|
XXH3_hashLong_128b_default :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
|
|
return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
|
|
}
|
|
|
|
/*
|
|
* It's important for performance that XXH3_hashLong is not inlined.
|
|
*/
|
|
XXH3_hashLong_128b_withSecret :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
|
|
return XXH3_hashLong_128b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator)
|
|
}
|
|
|
|
XXH3_hashLong_128b_withSeed_internal :: #force_inline proc(
|
|
input: []u8, seed: xxh_u64, secret: []u8,
|
|
f_acc512: XXH3_accumulate_512_f,
|
|
f_scramble: XXH3_scramble_accumulator_f,
|
|
f_initSec: XXH3_init_custom_secret_f) -> (res: XXH3_128_hash) {
|
|
|
|
if seed == 0 {
|
|
return XXH3_hashLong_128b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble)
|
|
}
|
|
|
|
{
|
|
_secret := [XXH_SECRET_DEFAULT_SIZE]u8{}
|
|
f_initSec(_secret[:], seed)
|
|
return XXH3_hashLong_128b_internal(input, _secret[:], f_acc512, f_scramble)
|
|
}
|
|
}
|
|
|
|
/*
|
|
* It's important for performance that XXH3_hashLong is not inlined.
|
|
*/
|
|
XXH3_hashLong_128b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash) {
|
|
return XXH3_hashLong_128b_withSeed_internal(input, seed, secret, XXH3_accumulate_512, XXH3_scramble_accumulator , XXH3_init_custom_secret)
|
|
}
|
|
|
|
XXH3_hashLong128_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: XXH3_128_hash)
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_128bits_internal :: #force_inline proc(
|
|
input: []u8, seed: xxh_u64, secret: []u8, f_hl128: XXH3_hashLong128_f) -> (res: XXH3_128_hash) {
|
|
|
|
assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
|
|
/*
|
|
* If an action is to be taken if `secret` conditions are not respected,
|
|
* it should be done here.
|
|
* For now, it's a contract pre-condition.
|
|
* Adding a check and a branch here would cost performance at every hash.
|
|
*/
|
|
length := len(input)
|
|
|
|
switch {
|
|
case length <= 16:
|
|
return XXH3_len_0to16_128b(input, secret, seed)
|
|
case length <= 128:
|
|
return XXH3_len_17to128_128b(input, secret, seed)
|
|
case length <= XXH3_MIDSIZE_MAX:
|
|
return XXH3_len_129to240_128b(input, secret, seed)
|
|
case:
|
|
return f_hl128(input, seed, secret)
|
|
}
|
|
}
|
|
|
|
/* === Public XXH128 API === */
|
|
@(optimization_mode="favor_size")
|
|
XXH3_128_default :: proc(input: []u8) -> (hash: XXH3_128_hash) {
|
|
return XXH3_128bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_128b_default)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_128_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: XXH3_128_hash) {
|
|
return XXH3_128bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_128b_withSeed)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_128_with_secret :: proc(input: []u8, secret: []u8) -> (hash: XXH3_128_hash) {
|
|
return XXH3_128bits_internal(input, 0, secret, XXH3_hashLong_128b_withSecret)
|
|
}
|
|
XXH3_128 :: proc { XXH3_128_default, XXH3_128_with_seed, XXH3_128_with_secret }
|
|
|
|
/*
|
|
==========================================
|
|
Short keys
|
|
==========================================
|
|
One of the shortcomings of XXH32 and XXH64 was that their performance was
|
|
sub-optimal on short lengths. It used an iterative algorithm which strongly
|
|
favored lengths that were a multiple of 4 or 8.
|
|
|
|
Instead of iterating over individual inputs, we use a set of single shot
|
|
functions which piece together a range of lengths and operate in constant time.
|
|
Additionally, the number of multiplies has been significantly reduced. This
|
|
reduces latency, especially when emulating 64-bit multiplies on 32-bit.
|
|
|
|
Depending on the platform, this may or may not be faster than XXH32, but it
|
|
is almost guaranteed to be faster than XXH64.
|
|
*/
|
|
|
|
/*
|
|
At very short lengths, there isn't enough input to fully hide secrets, or use the entire secret.
|
|
|
|
There is also only a limited amount of mixing we can do before significantly impacting performance.
|
|
|
|
Therefore, we use different sections of the secret and always mix two secret samples with an XOR.
|
|
This should have no effect on performance on the seedless or withSeed variants because everything
|
|
_should_ be constant folded by modern compilers.
|
|
|
|
The XOR mixing hides individual parts of the secret and increases entropy.
|
|
This adds an extra layer of strength for custom secrets.
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_1to3_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
length := u32(len(input))
|
|
assert(input != nil)
|
|
assert(1 <= length && length <= 3)
|
|
assert(secret != nil)
|
|
/*
|
|
len = 1: combined = { input[0], 0x01, input[0], input[0] }
|
|
len = 2: combined = { input[1], 0x02, input[0], input[1] }
|
|
len = 3: combined = { input[2], 0x03, input[0], input[1] }
|
|
*/
|
|
#no_bounds_check {
|
|
c1 := u32(input[0 ])
|
|
c2 := u32(input[length >> 1])
|
|
c3 := u32(input[length - 1])
|
|
|
|
combined := c1 << 16 | c2 << 24 | c3 << 0 | length << 8
|
|
bitflip := (u64(XXH32_read32(secret)) ~ u64(XXH32_read32(secret[4:]))) + seed
|
|
keyed := u64(combined) ~ bitflip
|
|
return XXH64_avalanche(keyed)
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_4to8_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
length := u32(len(input))
|
|
assert(input != nil)
|
|
assert(4 <= length && length <= 8)
|
|
assert(secret != nil)
|
|
seed := seed
|
|
|
|
seed ~= (u64(byte_swap(u32(seed))) << 32)
|
|
|
|
#no_bounds_check {
|
|
input1 := XXH32_read32(input)
|
|
input2 := XXH32_read32(input[length - 4:])
|
|
bitflip := (XXH64_read64(secret[8:]) ~ XXH64_read64(secret[16:])) - seed
|
|
input64 := u64(input2) + (u64(input1) << 32)
|
|
keyed := input64 ~ bitflip
|
|
return XXH3_rrmxmx(keyed, u64(length))
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_9to16_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
length := u64(len(input))
|
|
assert(input != nil)
|
|
assert(9 <= length && length <= 16)
|
|
assert(secret != nil)
|
|
#no_bounds_check {
|
|
bitflip1 := (XXH64_read64(secret[24:]) ~ XXH64_read64(secret[32:])) + seed
|
|
bitflip2 := (XXH64_read64(secret[40:]) ~ XXH64_read64(secret[48:])) - seed
|
|
input_lo := XXH64_read64(input) ~ bitflip1
|
|
input_hi := XXH64_read64(input[length - 8:]) ~ bitflip2
|
|
acc := length + byte_swap(input_lo) + input_hi \
|
|
+ XXH_mul_64_to_128_fold_64(input_lo, input_hi)
|
|
return XXH3_avalanche(acc)
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_0to16_64b :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
length := u64(len(input))
|
|
assert(input != nil)
|
|
assert(length <= 16)
|
|
#no_bounds_check {
|
|
switch {
|
|
case length > 8: return #force_inline XXH3_len_9to16_64b(input, secret, seed)
|
|
case length >= 4: return #force_inline XXH3_len_4to8_64b (input, secret, seed)
|
|
case length > 0: return #force_inline XXH3_len_1to3_64b (input, secret, seed)
|
|
case:
|
|
return #force_inline XXH64_avalanche(seed ~ (XXH64_read64(secret[56:]) ~ XXH64_read64(secret[64:])))
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
DISCLAIMER: There are known *seed-dependent* multicollisions here due to
|
|
multiplication by zero, affecting hashes of lengths 17 to 240.
|
|
|
|
However, they are very unlikely.
|
|
|
|
Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
|
|
unseeded non-cryptographic hashes, it does not attempt to defend itself
|
|
against specially crafted inputs, only random inputs.
|
|
|
|
Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
|
|
cancelling out the secret is taken an arbitrary number of times (addressed
|
|
in XXH3_accumulate_512), this collision is very unlikely with random inputs
|
|
and/or proper seeding:
|
|
|
|
This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
|
|
function that is only called up to 16 times per hash with up to 240 bytes of
|
|
input.
|
|
|
|
This is not too bad for a non-cryptographic hash function, especially with
|
|
only 64 bit outputs.
|
|
|
|
The 128-bit variant (which trades some speed for strength) is NOT affected
|
|
by this, although it is always a good idea to use a proper seed if you care
|
|
about strength.
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_mix16B :: #force_inline proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
input_lo := XXH64_read64(input[0:])
|
|
input_hi := XXH64_read64(input[8:])
|
|
|
|
input_lo ~= (XXH64_read64(secret[0:]) + seed)
|
|
input_hi ~= (XXH64_read64(secret[8:]) - seed)
|
|
return XXH_mul_64_to_128_fold_64(input_lo, input_hi)
|
|
}
|
|
|
|
/* For mid range keys, XXH3 uses a Mum-hash variant. */
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_17to128_64b :: proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
|
|
length := len(input)
|
|
assert(16 < length && length <= 128)
|
|
|
|
#no_bounds_check {
|
|
acc := u64(length) * XXH_PRIME64_1
|
|
switch {
|
|
case length > 96:
|
|
acc += XXH3_mix16B(input[48: ], secret[96: ], seed)
|
|
acc += XXH3_mix16B(input[length - 64:], secret[112:], seed)
|
|
fallthrough
|
|
case length > 64:
|
|
acc += XXH3_mix16B(input[32: ], secret[64: ], seed)
|
|
acc += XXH3_mix16B(input[length - 48:], secret[80: ], seed)
|
|
fallthrough
|
|
case length > 32:
|
|
acc += XXH3_mix16B(input[16: ], secret[32: ], seed)
|
|
acc += XXH3_mix16B(input[length - 32:], secret[48: ], seed)
|
|
fallthrough
|
|
case:
|
|
acc += XXH3_mix16B(input[0: ], secret[0: ], seed)
|
|
acc += XXH3_mix16B(input[length - 16:], secret[16: ], seed)
|
|
}
|
|
return XXH3_avalanche(acc)
|
|
}
|
|
}
|
|
|
|
XXH3_MIDSIZE_MAX :: 240
|
|
XXH3_MIDSIZE_STARTOFFSET :: 3
|
|
XXH3_MIDSIZE_LASTOFFSET :: 17
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_len_129to240_64b :: proc(input: []u8, secret: []u8, seed: xxh_u64) -> (res: xxh_u64) {
|
|
assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
|
|
length := len(input)
|
|
assert(128 < length && length <= XXH3_MIDSIZE_MAX)
|
|
|
|
#no_bounds_check {
|
|
acc := u64(length) * XXH_PRIME64_1
|
|
nbRounds := length / 16
|
|
|
|
i: int
|
|
for i = 0; i < 8; i += 1 {
|
|
acc += XXH3_mix16B(input[16 * i:], secret[16 * i:], seed)
|
|
}
|
|
|
|
acc = XXH3_avalanche(acc)
|
|
assert(nbRounds >= 8)
|
|
|
|
for i = 8; i < nbRounds; i += 1 {
|
|
acc += XXH3_mix16B(input[16 * i:], secret[(16 * (i - 8)) + XXH3_MIDSIZE_STARTOFFSET:], seed)
|
|
}
|
|
/* last bytes */
|
|
acc += XXH3_mix16B(input[length - 16:], secret[XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET:], seed)
|
|
return XXH3_avalanche(acc)
|
|
}
|
|
}
|
|
|
|
/* ======= Long Keys ======= */
|
|
|
|
XXH_STRIPE_LEN :: 64
|
|
XXH_SECRET_CONSUME_RATE :: 8 /* nb of secret bytes consumed at each accumulation */
|
|
XXH_ACC_NB :: (XXH_STRIPE_LEN / size_of(xxh_u64))
|
|
XXH_SECRET_LASTACC_START :: 7 /* not aligned on 8, last secret is different from acc & scrambler */
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH_writeLE64 :: #force_inline proc(dst: []u8, v64: u64le) {
|
|
v := v64
|
|
mem_copy(raw_data(dst), &v, size_of(v64))
|
|
}
|
|
|
|
/*
|
|
* XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
|
|
*
|
|
* It is a hardened version of UMAC, based off of FARSH's implementation.
|
|
*
|
|
* This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
|
|
* implementations, and it is ridiculously fast.
|
|
*
|
|
* We harden it by mixing the original input to the accumulators as well as the product.
|
|
*
|
|
* This means that in the (relatively likely) case of a multiply by zero, the
|
|
* original input is preserved.
|
|
*
|
|
* On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
|
|
* cross-pollination, as otherwise the upper and lower halves would be
|
|
* essentially independent.
|
|
*
|
|
* This doesn't matter on 64-bit hashes since they all get merged together in
|
|
* the end, so we skip the extra step.
|
|
*
|
|
* Both XXH3_64bits and XXH3_128bits use this subroutine.
|
|
*/
|
|
|
|
XXH3_accumulate_512_f :: #type proc(acc: []xxh_u64, input: []u8, secret: []u8)
|
|
XXH3_scramble_accumulator_f :: #type proc(acc: []xxh_u64, secret: []u8)
|
|
XXH3_init_custom_secret_f :: #type proc(custom_secret: []u8, seed64: xxh_u64)
|
|
|
|
/* scalar variants - universal */
|
|
@(optimization_mode="favor_size")
|
|
XXH3_accumulate_512_scalar :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8) {
|
|
xacc := acc /* presumed aligned */
|
|
xinput := input /* no alignment restriction */
|
|
xsecret := secret /* no alignment restriction */
|
|
|
|
assert(uintptr(raw_data(acc)) & uintptr(XXH_ACC_ALIGN - 1) == 0)
|
|
|
|
#no_bounds_check for i := uint(0); i < XXH_ACC_NB; i += 1 {
|
|
data_val := XXH64_read64(xinput[8 * i:])
|
|
sec := XXH64_read64(xsecret[8 * i:])
|
|
data_key := data_val ~ sec
|
|
xacc[i ~ 1] += data_val /* swap adjacent lanes */
|
|
xacc[i ] += u64(u32(data_key)) * u64(data_key >> 32)
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_scramble_accumulator_scalar :: #force_inline proc(acc: []xxh_u64, secret: []u8) {
|
|
xacc := acc /* presumed aligned */
|
|
xsecret := secret /* no alignment restriction */
|
|
|
|
assert(uintptr(raw_data(acc)) & uintptr(XXH_ACC_ALIGN - 1) == 0)
|
|
|
|
#no_bounds_check for i := uint(0); i < XXH_ACC_NB; i += 1 {
|
|
key64 := XXH64_read64(xsecret[8 * i:])
|
|
acc64 := xacc[i]
|
|
acc64 = XXH_xorshift_64(acc64, 47)
|
|
acc64 ~= key64
|
|
acc64 *= u64(XXH_PRIME32_1)
|
|
xacc[i] = acc64
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_init_custom_secret_scalar :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64) {
|
|
#assert((XXH_SECRET_DEFAULT_SIZE & 15) == 0)
|
|
|
|
nbRounds := XXH_SECRET_DEFAULT_SIZE / 16
|
|
#no_bounds_check for i := 0; i < nbRounds; i += 1 {
|
|
lo := XXH64_read64(XXH3_kSecret[16 * i: ]) + seed64
|
|
hi := XXH64_read64(XXH3_kSecret[16 * i + 8:]) - seed64
|
|
XXH_writeLE64(custom_secret[16 * i: ], u64le(lo))
|
|
XXH_writeLE64(custom_secret[16 * i + 8:], u64le(hi))
|
|
}
|
|
}
|
|
|
|
/* generalized SIMD variants */
|
|
XXH3_accumulate_512_simd_generic :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8, $W: uint) {
|
|
u32xW :: #simd[W]u32
|
|
u64xW :: #simd[W]u64
|
|
|
|
#no_bounds_check for i in uint(0)..<XXH_ACC_NB/W {
|
|
data_val := XXH64_read64_simd(input[8 * W * i:], W)
|
|
sec := XXH64_read64_simd(secret[8 * W * i:], W)
|
|
data_key := data_val ~ sec
|
|
|
|
// Swap adjacent lanes
|
|
when W == 2 {
|
|
data_val = swizzle(data_val, 1, 0)
|
|
} else when W == 4 {
|
|
data_val = swizzle(data_val, 1, 0, 3, 2)
|
|
} else when W == 8 {
|
|
data_val = swizzle(data_val, 1, 0, 3, 2, 5, 4, 7, 6)
|
|
} else {
|
|
#panic("Unsupported vector size!")
|
|
}
|
|
|
|
a := XXH64_read64_simd(acc[W * i:], W)
|
|
a += data_val
|
|
a += u64xW(u32xW(data_key)) * intrinsics.simd_shr(data_key, 32)
|
|
XXH64_write64_simd(acc[W * i:], a)
|
|
}
|
|
}
|
|
|
|
XXH3_scramble_accumulator_simd_generic :: #force_inline proc(acc: []xxh_u64, secret: []u8, $W: uint) {
|
|
u64xW :: #simd[W]u64
|
|
#no_bounds_check for i in uint(0)..<XXH_ACC_NB/W {
|
|
key64 := XXH64_read64_simd(secret[8 * W * i:], W)
|
|
acc64 := XXH64_read64_simd(acc[W * i:], W)
|
|
acc64 ~= intrinsics.simd_shr(acc64, 47)
|
|
acc64 ~= key64
|
|
acc64 *= XXH_PRIME32_1
|
|
XXH64_write64_simd(acc[W * i:], acc64)
|
|
}
|
|
}
|
|
|
|
XXH3_init_custom_secret_simd_generic :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64, $W: uint) {
|
|
u64xW :: #simd[W]u64
|
|
|
|
seedVec := u64xW(seed64)
|
|
for i in 0..<W/2 {
|
|
j := 2*i + 1
|
|
seedVec = intrinsics.simd_replace(seedVec, j, -intrinsics.simd_extract(seedVec, j))
|
|
}
|
|
|
|
nbRounds := XXH_SECRET_DEFAULT_SIZE / 8 / W
|
|
#no_bounds_check for i in uint(0)..<nbRounds {
|
|
block := XXH64_read64_simd(XXH3_kSecret[8 * W * i:], W)
|
|
block += seedVec
|
|
XXH64_write64_simd(custom_secret[8 * W * i:], block)
|
|
}
|
|
}
|
|
|
|
XXH3_accumulate_512 :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8) {
|
|
when XXH_NATIVE_WIDTH > 1 {
|
|
XXH3_accumulate_512_simd_generic(acc, input, secret, XXH_NATIVE_WIDTH)
|
|
} else {
|
|
XXH3_accumulate_512_scalar(acc, input, secret)
|
|
}
|
|
}
|
|
|
|
XXH3_scramble_accumulator :: #force_inline proc(acc: []xxh_u64, secret: []u8) {
|
|
when XXH_NATIVE_WIDTH > 1 {
|
|
XXH3_scramble_accumulator_simd_generic(acc, secret, XXH_NATIVE_WIDTH)
|
|
} else {
|
|
XXH3_scramble_accumulator_scalar(acc, secret)
|
|
}
|
|
}
|
|
|
|
XXH3_init_custom_secret :: #force_inline proc(custom_secret: []u8, seed64: xxh_u64) {
|
|
when XXH_NATIVE_WIDTH > 1 {
|
|
XXH3_init_custom_secret_simd_generic(custom_secret, seed64, XXH_NATIVE_WIDTH)
|
|
} else {
|
|
XXH3_init_custom_secret_scalar(custom_secret, seed64)
|
|
}
|
|
}
|
|
|
|
XXH_PREFETCH_DIST :: 320
|
|
|
|
/*
|
|
* XXH3_accumulate()
|
|
* Loops over XXH3_accumulate_512().
|
|
* Assumption: nbStripes will not overflow the secret size
|
|
*/
|
|
@(optimization_mode="favor_size")
|
|
XXH3_accumulate :: #force_inline proc(
|
|
acc: []xxh_u64, input: []u8, secret: []u8, nbStripes: uint, f_acc512: XXH3_accumulate_512_f) {
|
|
|
|
#no_bounds_check for n := uint(0); n < nbStripes; n += 1 {
|
|
when !XXH_DISABLE_PREFETCH {
|
|
in_ptr := &input[n * XXH_STRIPE_LEN]
|
|
prefetch(in_ptr, XXH_PREFETCH_DIST)
|
|
}
|
|
f_acc512(acc, input[n * XXH_STRIPE_LEN:], secret[n * XXH_SECRET_CONSUME_RATE:])
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_hashLong_internal_loop :: #force_inline proc(acc: []xxh_u64, input: []u8, secret: []u8,
|
|
f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) {
|
|
|
|
length := uint(len(input))
|
|
secret_size := uint(len(secret))
|
|
stripes_per_block := (secret_size - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE
|
|
|
|
block_len := XXH_STRIPE_LEN * stripes_per_block
|
|
blocks := (length - 1) / block_len
|
|
|
|
#no_bounds_check for n := uint(0); n < blocks; n += 1 {
|
|
XXH3_accumulate(acc, input[n * block_len:], secret, stripes_per_block, f_acc512)
|
|
f_scramble(acc, secret[secret_size - XXH_STRIPE_LEN:])
|
|
}
|
|
|
|
/* last partial block */
|
|
#no_bounds_check {
|
|
stripes := ((length - 1) - (block_len * blocks)) / XXH_STRIPE_LEN
|
|
XXH3_accumulate(acc, input[blocks * block_len:], secret, stripes, f_acc512)
|
|
|
|
/* last stripe */
|
|
#no_bounds_check {
|
|
p := input[length - XXH_STRIPE_LEN:]
|
|
f_acc512(acc, p, secret[secret_size - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START:])
|
|
}
|
|
}
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_mix2Accs :: #force_inline proc(acc: []xxh_u64, secret: []u8) -> (res: xxh_u64) {
|
|
return XXH_mul_64_to_128_fold_64(
|
|
acc[0] ~ XXH64_read64(secret),
|
|
acc[1] ~ XXH64_read64(secret[8:]))
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_mergeAccs :: #force_inline proc(acc: []xxh_u64, secret: []u8, start: xxh_u64) -> (res: xxh_u64) {
|
|
result64 := start
|
|
#no_bounds_check for i := 0; i < 4; i += 1 {
|
|
result64 += XXH3_mix2Accs(acc[2 * i:], secret[16 * i:])
|
|
}
|
|
return XXH3_avalanche(result64)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_hashLong_64b_internal :: #force_inline proc(input: []u8, secret: []u8,
|
|
f_acc512: XXH3_accumulate_512_f, f_scramble: XXH3_scramble_accumulator_f) -> (hash: xxh_u64) {
|
|
|
|
acc: [XXH_ACC_NB]xxh_u64 = XXH3_INIT_ACC
|
|
|
|
XXH3_hashLong_internal_loop(acc[:], input, secret, f_acc512, f_scramble)
|
|
|
|
/* converge into final hash */
|
|
#assert(size_of(acc) == 64)
|
|
/* do not align on 8, so that the secret is different from the accumulator */
|
|
XXH_SECRET_MERGEACCS_START :: 11
|
|
assert(len(secret) >= size_of(acc) + XXH_SECRET_MERGEACCS_START)
|
|
return XXH3_mergeAccs(acc[:], secret[XXH_SECRET_MERGEACCS_START:], xxh_u64(len(input)) * XXH_PRIME64_1)
|
|
}
|
|
|
|
/*
|
|
It's important for performance that XXH3_hashLong is not inlined.
|
|
*/
|
|
XXH3_hashLong_64b_withSecret :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_hashLong_64b_internal(input, secret, XXH3_accumulate_512, XXH3_scramble_accumulator)
|
|
}
|
|
|
|
/*
|
|
It's important for performance that XXH3_hashLong is not inlined.
|
|
Since the function is not inlined, the compiler may not be able to understand that,
|
|
in some scenarios, its `secret` argument is actually a compile time constant.
|
|
This variant enforces that the compiler can detect that,
|
|
and uses this opportunity to streamline the generated code for better performance.
|
|
*/
|
|
XXH3_hashLong_64b_default :: #force_no_inline proc(input: []u8, seed64: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], XXH3_accumulate_512, XXH3_scramble_accumulator)
|
|
}
|
|
|
|
XXH3_hashLong_64b_withSeed_internal :: #force_inline proc(
|
|
input: []u8,
|
|
seed: xxh_u64,
|
|
f_acc512: XXH3_accumulate_512_f,
|
|
f_scramble: XXH3_scramble_accumulator_f,
|
|
f_init_sec: XXH3_init_custom_secret_f,
|
|
) -> (hash: xxh_u64) {
|
|
if seed == 0 {
|
|
return XXH3_hashLong_64b_internal(input, XXH3_kSecret[:], f_acc512, f_scramble)
|
|
}
|
|
|
|
secret: [XXH_SECRET_DEFAULT_SIZE]u8
|
|
f_init_sec(secret[:], seed)
|
|
return XXH3_hashLong_64b_internal(input, secret[:], f_acc512, f_scramble)
|
|
}
|
|
|
|
/*
|
|
XXH3_hashLong_64b_withSeed():
|
|
Generate a custom key based on alteration of default XXH3_kSecret with the seed,
|
|
and then use this key for long mode hashing.
|
|
|
|
This operation is decently fast but nonetheless costs a little bit of time.
|
|
Try to avoid it whenever possible (typically when seed==0).
|
|
|
|
It's important for performance that XXH3_hashLong is not inlined. Not sure
|
|
why (uop cache maybe?), but the difference is large and easily measurable.
|
|
*/
|
|
XXH3_hashLong_64b_withSeed :: #force_no_inline proc(input: []u8, seed: xxh_u64, secret: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_hashLong_64b_withSeed_internal(input, seed, XXH3_accumulate_512, XXH3_scramble_accumulator, XXH3_init_custom_secret)
|
|
}
|
|
|
|
|
|
XXH3_hashLong64_f :: #type proc(input: []u8, seed: xxh_u64, secret: []u8) -> (res: xxh_u64)
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_64bits_internal :: #force_inline proc(input: []u8, seed: xxh_u64, secret: []u8, f_hashLong: XXH3_hashLong64_f) -> (hash: xxh_u64) {
|
|
assert(len(secret) >= XXH3_SECRET_SIZE_MIN)
|
|
/*
|
|
If an action is to be taken if len(secret) condition is not respected, it should be done here.
|
|
For now, it's a contract pre-condition.
|
|
Adding a check and a branch here would cost performance at every hash.
|
|
Also, note that function signature doesn't offer room to return an error.
|
|
*/
|
|
length := len(input)
|
|
switch {
|
|
case length <= 16: return XXH3_len_0to16_64b(input, secret, seed)
|
|
case length <= 128: return XXH3_len_17to128_64b(input, secret, seed)
|
|
case length <= XXH3_MIDSIZE_MAX: return XXH3_len_129to240_64b(input, secret, seed)
|
|
case: return f_hashLong(input, seed, secret)
|
|
}
|
|
unreachable()
|
|
}
|
|
|
|
/* === Public entry point === */
|
|
@(optimization_mode="favor_size")
|
|
XXH3_64_default :: proc(input: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_64bits_internal(input, 0, XXH3_kSecret[:], XXH3_hashLong_64b_default)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_64_with_seed :: proc(input: []u8, seed: xxh_u64) -> (hash: xxh_u64) {
|
|
return XXH3_64bits_internal(input, seed, XXH3_kSecret[:], XXH3_hashLong_64b_withSeed)
|
|
}
|
|
|
|
@(optimization_mode="favor_size")
|
|
XXH3_64_with_secret :: proc(input, secret: []u8) -> (hash: xxh_u64) {
|
|
return XXH3_64bits_internal(input, 0, secret, XXH3_hashLong_64b_withSecret)
|
|
}
|
|
|
|
XXH3_64 :: proc { XXH3_64_default, XXH3_64_with_seed, XXH3_64_with_secret }
|