diff --git a/core/hash/xxhash/common.odin b/core/hash/xxhash/common.odin new file mode 100644 index 000000000..93a4230c4 --- /dev/null +++ b/core/hash/xxhash/common.odin @@ -0,0 +1,78 @@ +/* + An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/). + Copyright 2021 Jeroen van Rijn . + + Made available under Odin's BSD-3 license, based on the original C code. + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +package xxhash + +import "core:intrinsics" +import "core:runtime" +mem_copy :: runtime.mem_copy + +/* + Version definition +*/ +XXH_VERSION_MAJOR :: 0 +XXH_VERSION_MINOR :: 8 +XXH_VERSION_RELEASE :: 1 +XXH_VERSION_NUMBER :: XXH_VERSION_MAJOR * 100 * 100 + XXH_VERSION_MINOR * 100 + XXH_VERSION_RELEASE + +/* + 0 - Use memcopy, for platforms where unaligned reads are a problem + 2 - Direct cast, for platforms where unaligned are allowed (default) +*/ +XXH_FORCE_MEMORY_ACCESS :: #config(XXH_FORCE_MEMORY_ACCESS, 2) + +/* + `false` - Use this on platforms where unaligned reads are fast + `true` - Use this on platforms where unaligned reads are slow +*/ +XXH_FORCE_ALIGN_CHECK :: #config(XXH_FORCE_ALIGN_CHECK, false) + +Alignment :: enum { + Aligned, + Unaligned, +} + +Error :: enum { + Okay = 0, + Error, +} + +@(optimization_mode="speed") +XXH_rotl32 :: #force_inline proc(x, r: u32) -> (res: u32) { + return ((x << r) | (x >> (32 - r))) +} + +@(optimization_mode="speed") +XXH_rotl64 :: #force_inline proc(x, r: u64) -> (res: u64) { + return ((x << r) | (x >> (64 - r))) +} + +@(optimization_mode="speed") +XXH32_read32 :: #force_inline proc(buf: []u8, alignment: Alignment) -> (res: u32) { + if XXH_FORCE_MEMORY_ACCESS == 2 || alignment == .Aligned { + #no_bounds_check b := (^u32le)(&buf[0])^ + return u32(b) + } else { + b: u32le + mem_copy(&b, raw_data(buf[:]), 4) + return u32(b) + } +} + +@(optimization_mode="speed") +XXH64_read64 :: #force_inline proc(buf: []u8, alignment: Alignment) -> (res: u64) { + if XXH_FORCE_MEMORY_ACCESS == 2 || alignment == .Aligned { + #no_bounds_check b := (^u64le)(&buf[0])^ + return u64(b) + } else { + b: u64le + mem_copy(&b, raw_data(buf[:]), 8) + return u64(b) + } +} \ No newline at end of file diff --git a/core/hash/xxhash/xxhash_32.odin b/core/hash/xxhash/xxhash_32.odin new file mode 100644 index 000000000..fe1a686ce --- /dev/null +++ b/core/hash/xxhash/xxhash_32.odin @@ -0,0 +1,319 @@ +/* + An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/). + Copyright 2021 Jeroen van Rijn . + + Made available under Odin's BSD-3 license, based on the original C code. + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +package xxhash + +import "core:intrinsics" + +/* + 32-bit hash functions +*/ +XXH32_hash :: u32 +XXH32_DEFAULT_SEED :: XXH32_hash(0) + +XXH32_state :: struct { + total_len_32: XXH32_hash, /*!< Total length hashed, modulo 2^32 */ + large_len: XXH32_hash, /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */ + v1: XXH32_hash, /*!< First accumulator lane */ + v2: XXH32_hash, /*!< Second accumulator lane */ + v3: XXH32_hash, /*!< Third accumulator lane */ + v4: XXH32_hash, /*!< Fourth accumulator lane */ + mem32: [4]XXH32_hash, /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */ + memsize: XXH32_hash, /*!< Amount of data in @ref mem32 */ + reserved: XXH32_hash, /*!< Reserved field. Do not read or write to it, it may be removed. */ +} + +XXH32_canonical :: struct { + digest: [4]u8, +} + +XXH_PRIME32_1 :: 0x9E3779B1 /*!< 0b10011110001101110111100110110001 */ +XXH_PRIME32_2 :: 0x85EBCA77 /*!< 0b10000101111010111100101001110111 */ +XXH_PRIME32_3 :: 0xC2B2AE3D /*!< 0b11000010101100101010111000111101 */ +XXH_PRIME32_4 :: 0x27D4EB2F /*!< 0b00100111110101001110101100101111 */ +XXH_PRIME32_5 :: 0x165667B1 /*!< 0b00010110010101100110011110110001 */ + +@(optimization_mode="speed") +XXH32_round :: #force_inline proc(seed, input: XXH32_hash) -> (res: XXH32_hash) { + seed := seed + + seed += input * XXH_PRIME32_2 + seed = XXH_rotl32(seed, 13) + seed *= XXH_PRIME32_1 + return seed +} + +/* + Mix all bits +*/ +@(optimization_mode="speed") +XXH32_avalanche :: #force_inline proc(h32: u32) -> (res: u32) { + h32 := h32 + + h32 ~= h32 >> 15 + h32 *= XXH_PRIME32_2 + h32 ~= h32 >> 13 + h32 *= XXH_PRIME32_3 + h32 ~= h32 >> 16 + return h32 +} + +@(optimization_mode="speed") +XXH32_finalize :: #force_inline proc(h32: u32, buf: []u8, alignment: Alignment) -> (res: u32) { + process_1 :: #force_inline proc(h32: u32, buf: []u8) -> (h32_res: u32, buf_res: []u8) { + #no_bounds_check b := u32(buf[0]) + h32_res = h32 + b * XXH_PRIME32_5 + h32_res = XXH_rotl32(h32_res, 11) * XXH_PRIME32_1 + #no_bounds_check return h32_res, buf[1:] + } + + process_4 :: #force_inline proc(h32: u32, buf: []u8, alignment: Alignment) -> (h32_res: u32, buf_res: []u8) { + b := XXH32_read32(buf, alignment) + h32_res = h32 + b * XXH_PRIME32_3 + h32_res = XXH_rotl32(h32_res, 17) * XXH_PRIME32_4 + #no_bounds_check return h32_res, buf[4:] + } + + buf := buf + h32 := h32 + + switch len(buf) & 15 { + case 12: + h32, buf = process_4(h32, buf, alignment) + fallthrough + case 8: + h32, buf = process_4(h32, buf, alignment) + fallthrough + case 4: + h32, _ = process_4(h32, buf, alignment) + return XXH32_avalanche(h32) + + case 13: + h32, buf = process_4(h32, buf, alignment) + fallthrough + case 9: + h32, buf = process_4(h32, buf, alignment) + fallthrough + case 5: + h32, buf = process_4(h32, buf, alignment) + h32, buf = process_1(h32, buf) + return XXH32_avalanche(h32) + + case 14: + h32, buf = process_4(h32, buf, alignment) + fallthrough + case 10: + h32, buf = process_4(h32, buf, alignment) + fallthrough + case 6: + h32, buf = process_4(h32, buf, alignment) + h32, buf = process_1(h32, buf) + h32, buf = process_1(h32, buf) + return XXH32_avalanche(h32) + + case 15: + h32, buf = process_4(h32, buf, alignment) + fallthrough + case 11: + h32, buf = process_4(h32, buf, alignment) + fallthrough + case 7: + h32, buf = process_4(h32, buf, alignment) + fallthrough + + case 3: + h32, buf = process_1(h32, buf) + fallthrough + case 2: + h32, buf = process_1(h32, buf) + fallthrough + case 1: + h32, buf = process_1(h32, buf) + fallthrough + case 0: + return XXH32_avalanche(h32) + } + unreachable() +} + +@(optimization_mode="speed") +XXH32_endian_align :: #force_inline proc(input: []u8, seed := XXH32_DEFAULT_SEED, alignment: Alignment) -> (res: XXH32_hash) { + buf := input + length := len(input) + + if length >= 16 { + v1 := seed + XXH_PRIME32_1 + XXH_PRIME32_2 + v2 := seed + XXH_PRIME32_2 + v3 := seed + 0 + v4 := seed - XXH_PRIME32_1 + + for len(buf) >= 15 { + #no_bounds_check v1 = XXH32_round(v1, XXH32_read32(buf, alignment)); buf = buf[4:] + #no_bounds_check v2 = XXH32_round(v2, XXH32_read32(buf, alignment)); buf = buf[4:] + #no_bounds_check v3 = XXH32_round(v3, XXH32_read32(buf, alignment)); buf = buf[4:] + #no_bounds_check v4 = XXH32_round(v4, XXH32_read32(buf, alignment)); buf = buf[4:] + } + + res = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18) + } else { + res = seed + XXH_PRIME32_5 + } + + res += u32(length) + return XXH32_finalize(res, buf, alignment) +} + +XXH32 :: proc(input: []u8, seed := XXH32_DEFAULT_SEED) -> (digest: XXH32_hash) { + when false { + /* + Simple version, good for code maintenance, but unfortunately slow for small inputs. + */ + state: XXH32_state + XXH32_reset_state(&state, seed) + XXH32_update(&state, input) + return XXH32_digest(&state) + } else { + when XXH_FORCE_ALIGN_CHECK { + if uintptr(raw_data(input)) & uintptr(3) == 0 { + /* + Input is 4-bytes aligned, leverage the speed benefit. + */ + return XXH32_endian_align(input, seed, .Aligned) + } + } + return XXH32_endian_align(input, seed, .Unaligned) + } +} + +/* + ****** Hash streaming ****** +*/ +XXH32_create_state :: proc(allocator := context.allocator) -> (res: ^XXH32_state, err: Error) { + state := new(XXH32_state, allocator) + return state, nil if state != nil else .Error +} + +XXH32_destroy_state :: proc(state: ^XXH32_state, allocator := context.allocator) -> (err: Error) { + free(state, allocator) + return nil +} + +XXH32_copy_state :: proc(dest, src: ^XXH32_state) { + assert(dest != nil && src != nil) + mem_copy(dest, src, size_of(XXH32_state)) +} + +XXH32_reset_state :: proc(state_ptr: ^XXH32_state, seed := XXH32_DEFAULT_SEED) -> (err: Error) { + state := XXH32_state{} + + state.v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2 + state.v2 = seed + XXH_PRIME32_2 + state.v3 = seed + 0 + state.v4 = seed - XXH_PRIME32_1 + /* + Do not write into reserved, planned to be removed in a future version. + */ + mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved)) + return nil +} + +XXH32_update :: proc(state: ^XXH32_state, input: []u8) -> (err: Error) { + + buf := input + length := len(buf) + + state.total_len_32 += XXH32_hash(length) + state.large_len |= 1 if length >= 16 || state.total_len_32 >= 16 else 0 + + if state.memsize + u32(length) < 16 { /* Fill in tmp buffer */ + ptr := uintptr(raw_data(state.mem32[:])) + uintptr(state.memsize) + mem_copy(rawptr(ptr), raw_data(input), int(length)) + state.memsize += XXH32_hash(length) + return nil + } + + if state.memsize > 0 {/* Some data left from previous update */ + ptr := uintptr(raw_data(state.mem32[:])) + uintptr(state.memsize) + mem_copy(rawptr(ptr), raw_data(input), int(16 - state.memsize)) + { + #no_bounds_check state.v1 = XXH32_round(state.v1, state.mem32[0]) + #no_bounds_check state.v2 = XXH32_round(state.v2, state.mem32[1]) + #no_bounds_check state.v3 = XXH32_round(state.v3, state.mem32[2]) + #no_bounds_check state.v4 = XXH32_round(state.v4, state.mem32[3]) + } + buf = buf[16 - state.memsize:] + state.memsize = 0 + } + + if len(buf) >= 16 { + v1 := state.v1 + v2 := state.v2 + v3 := state.v3 + v4 := state.v4 + + for len(buf) >= 15 { + #no_bounds_check v1 = XXH32_round(v1, XXH32_read32(buf, .Unaligned)); buf = buf[4:] + #no_bounds_check v2 = XXH32_round(v2, XXH32_read32(buf, .Unaligned)); buf = buf[4:] + #no_bounds_check v3 = XXH32_round(v3, XXH32_read32(buf, .Unaligned)); buf = buf[4:] + #no_bounds_check v4 = XXH32_round(v4, XXH32_read32(buf, .Unaligned)); buf = buf[4:] + } + + state.v1 = v1 + state.v2 = v2 + state.v3 = v3 + state.v4 = v4 + } + + length = len(buf) + if length > 0 { + mem_copy(raw_data(state.mem32[:]), raw_data(buf[:]), int(length)) + state.memsize = u32(length) + } + return nil +} + +XXH32_digest :: proc(state: ^XXH32_state) -> (res: XXH32_hash) { + if state.large_len > 0 { + res = XXH_rotl32(state.v1, 1) + XXH_rotl32(state.v2, 7) + XXH_rotl32(state.v3, 12) + XXH_rotl32(state.v4, 18) + } else { + res = state.v3 /* == seed */ + XXH_PRIME32_5 + } + + res += state.total_len_32 + + buf := (^[16]u8)(&state.mem32)^ + alignment: Alignment = .Aligned if uintptr(&state.mem32) & 15 == 0 else .Unaligned + return XXH32_finalize(res, buf[:state.memsize], alignment) +} + +/* + ****** Canonical representation ****** + + The default return values from XXH functions are unsigned 32 and 64 bit integers. + + The canonical representation uses big endian convention, + the same convention as human-readable numbers (large digits first). + + This way, hash values can be written into a file or buffer, remaining + comparable across different systems. + + The following functions allow transformation of hash values to and from their + canonical format. +*/ +XXH32_canonical_from_hash :: proc(hash: XXH32_hash) -> (canonical: XXH32_canonical) { + #assert(size_of(XXH32_canonical) == size_of(XXH32_hash)) + h := u32be(hash) + mem_copy(&canonical, &h, size_of(canonical)) + return +} + +XXH32_hash_from_canonical :: proc(canonical: ^XXH32_canonical) -> (hash: XXH32_hash) { + h := (^u32be)(&canonical.digest)^ + return XXH32_hash(h) +} \ No newline at end of file diff --git a/core/hash/xxhash/xxhash_64.odin b/core/hash/xxhash/xxhash_64.odin new file mode 100644 index 000000000..d535a134c --- /dev/null +++ b/core/hash/xxhash/xxhash_64.odin @@ -0,0 +1,294 @@ +/* + An implementation of Yann Collet's [xxhash Fast Hash Algorithm](https://cyan4973.github.io/xxHash/). + Copyright 2021 Jeroen van Rijn . + + Made available under Odin's BSD-3 license, based on the original C code. + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +package xxhash + +import "core:intrinsics" + +/* + 64-bit hash functions +*/ +XXH64_hash :: u64 +xxh_u64 :: u64 +XXH64_DEFAULT_SEED :: XXH64_hash(0) + +XXH64_state :: struct { + total_len: XXH64_hash, /*!< Total length hashed. This is always 64-bit. */ + v1: XXH64_hash, /*!< First accumulator lane */ + v2: XXH64_hash, /*!< Second accumulator lane */ + v3: XXH64_hash, /*!< Third accumulator lane */ + v4: XXH64_hash, /*!< Fourth accumulator lane */ + mem64: [4]XXH64_hash, /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */ + memsize: XXH32_hash, /*!< Amount of data in @ref mem64 */ + reserved32: XXH32_hash, /*!< Reserved field, needed for padding anyways*/ + reserved64: XXH64_hash, /*!< Reserved field. Do not read or write to it, it may be removed. */ +} + +XXH64_canonical :: struct { + digest: [8]u8, +} + +XXH_PRIME64_1 :: 0x9E3779B185EBCA87 /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ +XXH_PRIME64_2 :: 0xC2B2AE3D27D4EB4F /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ +XXH_PRIME64_3 :: 0x165667B19E3779F9 /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ +XXH_PRIME64_4 :: 0x85EBCA77C2B2AE63 /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ +XXH_PRIME64_5 :: 0x27D4EB2F165667C5 /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ + +@(optimization_mode="speed") +XXH64_round :: proc(acc, input: xxh_u64) -> (res: xxh_u64) { + acc := acc + + acc += input * XXH_PRIME64_2 + acc = XXH_rotl64(acc, 31) + acc *= XXH_PRIME64_1 + return acc +} + +@(optimization_mode="speed") +XXH64_mergeRound :: proc(acc, val: xxh_u64) -> (res: xxh_u64) { + res = acc ~ XXH64_round(0, val) + res = res * XXH_PRIME64_1 + XXH_PRIME64_4 + return res +} + +@(optimization_mode="speed") +XXH64_avalanche :: proc(h64: xxh_u64) -> (res: xxh_u64) { + res = h64 + res ~= res >> 33 + res *= XXH_PRIME64_2 + res ~= res >> 29 + res *= XXH_PRIME64_3 + res ~= res >> 32 + return res +} + +@(optimization_mode="speed") +XXH64_finalize :: proc(h64: xxh_u64, buf: []u8, alignment: Alignment) -> (res: xxh_u64) { + buf := buf + length := len(buf) & 31 + res = h64 + + for length >= 8 { + b := XXH64_read64(buf, alignment) + k1 := XXH64_round(0, b) + #no_bounds_check buf = buf[8:] + res ~= k1 + res = XXH_rotl64(res, 27) * XXH_PRIME64_1 + XXH_PRIME64_4 + length -= 8 + } + + if length >= 4 { + res ~= xxh_u64(XXH32_read32(buf, alignment)) * XXH_PRIME64_1 + #no_bounds_check buf = buf[4:] + res = XXH_rotl64(res, 23) * XXH_PRIME64_2 + XXH_PRIME64_3 + length -= 4 + } + + for length > 0 { + #no_bounds_check b := xxh_u64(buf[0]) + buf = buf[1:] + res ~= b * XXH_PRIME64_5 + res = XXH_rotl64(res, 11) * XXH_PRIME64_1 + length -= 1 + } + return XXH64_avalanche(res) +} + +@(optimization_mode="speed") +XXH64_endian_align :: proc(input: []u8, seed := XXH64_DEFAULT_SEED, alignment := Alignment.Unaligned) -> (res: xxh_u64) { + buf := input + length := len(buf) + + if length >= 32 { + v1 := seed + XXH_PRIME64_1 + XXH_PRIME64_2 + v2 := seed + XXH_PRIME64_2 + v3 := seed + 0 + v4 := seed - XXH_PRIME64_1 + + for len(buf) >= 32 { + v1 = XXH64_round(v1, XXH64_read64(buf, alignment)); buf = buf[8:] + v2 = XXH64_round(v2, XXH64_read64(buf, alignment)); buf = buf[8:] + v3 = XXH64_round(v3, XXH64_read64(buf, alignment)); buf = buf[8:] + v4 = XXH64_round(v4, XXH64_read64(buf, alignment)); buf = buf[8:] + } + + res = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18) + res = XXH64_mergeRound(res, v1) + res = XXH64_mergeRound(res, v2) + res = XXH64_mergeRound(res, v3) + res = XXH64_mergeRound(res, v4) + } else { + res = seed + XXH_PRIME64_5 + } + res += xxh_u64(length) + + return XXH64_finalize(res, buf, alignment) +} + +XXH64 :: proc(input: []u8, seed := XXH64_DEFAULT_SEED) -> (digest: XXH64_hash) { + when false { + /* + Simple version, good for code maintenance, but unfortunately slow for small inputs. + */ + state: XXH64_state + XXH64_reset_state(&state, seed) + buf := input + for len(buf) > 0 { + l := min(65536, len(buf)) + XXH64_update(&state, buf[:l]) + buf = buf[l:] + } + return XXH64_digest(&state) + } else { + when XXH_FORCE_ALIGN_CHECK { + if uintptr(raw_data(input)) & uintptr(7) == 0 { + /* + Input is 8-bytes aligned, leverage the speed benefit. + */ + return XXH64_endian_align(input, seed, .Aligned) + } + } + return XXH64_endian_align(input, seed, .Unaligned) + } +} + +/* + ****** Hash Streaming ****** +*/ +XXH64_create_state :: proc(allocator := context.allocator) -> (res: ^XXH64_state, err: Error) { + state := new(XXH64_state, allocator) + return state, nil if state != nil else .Error +} + +XXH64_destroy_state :: proc(state: ^XXH64_state, allocator := context.allocator) -> (err: Error) { + free(state, allocator) + return nil +} + +XXH64_copy_state :: proc(dest, src: ^XXH64_state) { + assert(dest != nil && src != nil) + mem_copy(dest, src, size_of(XXH64_state)) +} + +XXH64_reset_state :: proc(state_ptr: ^XXH64_state, seed := XXH64_DEFAULT_SEED) -> (err: Error) { + state := XXH64_state{} + + state.v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2 + state.v2 = seed + XXH_PRIME64_2 + state.v3 = seed + 0 + state.v4 = seed - XXH_PRIME64_1 + /* + Fo not write into reserved64, might be removed in a future version. + */ + mem_copy(state_ptr, &state, size_of(state) - size_of(state.reserved64)) + return nil +} + +@(optimization_mode="speed") +XXH64_update :: proc(state: ^XXH64_state, input: []u8) -> (err: Error) { + buf := input + length := len(buf) + + state.total_len += u64(length) + + if state.memsize + u32(length) < 32 { /* fill in tmp buffer */ + ptr := uintptr(raw_data(state.mem64[:])) + uintptr(state.memsize) + mem_copy(rawptr(ptr), raw_data(input), int(length)) + state.memsize += u32(length) + return nil + } + + if state.memsize > 0 { /* tmp buffer is full */ + ptr := uintptr(raw_data(state.mem64[:])) + uintptr(state.memsize) + mem_copy(rawptr(ptr), raw_data(input), int(32 - state.memsize)) + { + #no_bounds_check state.v1 = XXH64_round(state.v1, state.mem64[0]) + #no_bounds_check state.v2 = XXH64_round(state.v2, state.mem64[1]) + #no_bounds_check state.v3 = XXH64_round(state.v3, state.mem64[2]) + #no_bounds_check state.v4 = XXH64_round(state.v4, state.mem64[3]) + } + buf = buf[32 - state.memsize:] + state.memsize = 0 + } + + if len(buf) >= 32 { + v1 := state.v1 + v2 := state.v2 + v3 := state.v3 + v4 := state.v4 + + for len(buf) >= 32 { + #no_bounds_check v1 = XXH64_round(v1, XXH64_read64(buf, .Unaligned)); buf = buf[8:] + #no_bounds_check v2 = XXH64_round(v2, XXH64_read64(buf, .Unaligned)); buf = buf[8:] + #no_bounds_check v3 = XXH64_round(v3, XXH64_read64(buf, .Unaligned)); buf = buf[8:] + #no_bounds_check v4 = XXH64_round(v4, XXH64_read64(buf, .Unaligned)); buf = buf[8:] + } + + state.v1 = v1 + state.v2 = v2 + state.v3 = v3 + state.v4 = v4 + } + + length = len(buf) + if length > 0 { + mem_copy(raw_data(state.mem64[:]), raw_data(buf[:]), int(length)) + state.memsize = u32(length) + } + return nil +} + +@(optimization_mode="speed") +XXH64_digest :: proc(state: ^XXH64_state) -> (res: XXH64_hash) { + if state.total_len >= 32 { + v1 := state.v1 + v2 := state.v2 + v3 := state.v3 + v4 := state.v4 + + res = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18) + res = XXH64_mergeRound(res, v1) + res = XXH64_mergeRound(res, v2) + res = XXH64_mergeRound(res, v3) + res = XXH64_mergeRound(res, v4) + } else { + res = state.v3 /*seed*/ + XXH_PRIME64_5 + } + res += XXH64_hash(state.total_len) + + buf := (^[32]u8)(&state.mem64)^ + alignment: Alignment = .Aligned if uintptr(&state.mem64) & 15 == 0 else .Unaligned + return XXH64_finalize(res, buf[:state.memsize], alignment) +} + +/* + ****** Canonical representation ****** + + The default return values from XXH functions are unsigned 32 and 64 bit integers. + + The canonical representation uses big endian convention, + the same convention as human-readable numbers (large digits first). + + This way, hash values can be written into a file or buffer, remaining + comparable across different systems. + + The following functions allow transformation of hash values to and from their + canonical format. +*/ +XXH64_canonical_from_hash :: proc(hash: XXH64_hash) -> (canonical: XXH64_canonical) { + #assert(size_of(XXH64_canonical) == size_of(XXH64_hash)) + h := u64be(hash) + mem_copy(&canonical, &h, size_of(canonical)) + return +} + +XXH64_hash_from_canonical :: proc(canonical: ^XXH64_canonical) -> (hash: XXH64_hash) { + h := (^u64be)(&canonical.digest)^ + return XXH64_hash(h) +} \ No newline at end of file diff --git a/core/time/perf.odin b/core/time/perf.odin index 5146ad543..f8d2765bb 100644 --- a/core/time/perf.odin +++ b/core/time/perf.odin @@ -1,5 +1,7 @@ package time +import "core:mem" + Tick :: struct { _nsec: i64, // relative amount } @@ -37,3 +39,59 @@ SCOPED_TICK_DURATION :: proc(d: ^Duration) -> Tick { _tick_duration_end :: proc(d: ^Duration, t: Tick) { d^ = tick_since(t) } + +/* + Benchmark helpers +*/ + +Benchmark_Error :: enum { + Okay = 0, + Allocation_Error, +} + +Benchmark_Options :: struct { + setup: #type proc(options: ^Benchmark_Options, allocator: mem.Allocator) -> (err: Benchmark_Error), + bench: #type proc(options: ^Benchmark_Options, allocator: mem.Allocator) -> (err: Benchmark_Error), + teardown: #type proc(options: ^Benchmark_Options, allocator: mem.Allocator) -> (err: Benchmark_Error), + + rounds: int, + bytes: int, + input: []u8, + + count: int, + processed: int, + output: []u8, // Unused for hash benchmarks + hash: u128, + + /* + Performance + */ + duration: Duration, + rounds_per_second: f64, + megabytes_per_second: f64, +} + +benchmark :: proc(options: ^Benchmark_Options, allocator := context.allocator) -> (err: Benchmark_Error) { + assert(options != nil) + assert(options.bench != nil) + + if options.setup != nil { + options->setup(allocator) or_return + } + + diff: Duration + { + SCOPED_TICK_DURATION(&diff) + options->bench(allocator) or_return + } + options.duration = diff + + times_per_second := f64(Second) / f64(diff) + options.rounds_per_second = times_per_second * f64(options.count) + options.megabytes_per_second = f64(options.processed) / f64(1024 * 1024) * times_per_second + + if options.teardown != nil { + options->teardown(allocator) or_return + } + return +} \ No newline at end of file diff --git a/core/time/time_windows.odin b/core/time/time_windows.odin index 18a8c4046..6d4648b12 100644 --- a/core/time/time_windows.odin +++ b/core/time/time_windows.odin @@ -24,7 +24,8 @@ _tick_now :: proc() -> Tick { return q * num + r * num / den } - @thread_local qpc_frequency: win32.LARGE_INTEGER + // @thread_local qpc_frequency: win32.LARGE_INTEGER + qpc_frequency: win32.LARGE_INTEGER if qpc_frequency == 0 { win32.QueryPerformanceFrequency(&qpc_frequency) diff --git a/tests/core/Makefile b/tests/core/Makefile index 65af21204..5bcb924b1 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -1,7 +1,7 @@ ODIN=../../odin PYTHON=$(shell which python3) -all: download_test_assets image_test compress_test strings_test +all: download_test_assets image_test compress_test strings_test hash_test download_test_assets: $(PYTHON) download_assets.py @@ -14,3 +14,6 @@ compress_test: strings_test: $(ODIN) run strings/test_core_strings.odin + +hash_test: + $(ODIN) run hash/test_core_hash.odin -o:size -no-bounds-check \ No newline at end of file diff --git a/tests/core/build.bat b/tests/core/build.bat index d298bfc3e..d7f7de902 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -16,3 +16,8 @@ echo --- echo Running core:strings tests echo --- %PATH_TO_ODIN% run strings %COMMON% + +echo --- +echo Running core:hash tests +echo --- +%PATH_TO_ODIN% run hash %COMMON% -o:size \ No newline at end of file diff --git a/tests/core/hash/test_core_hash.odin b/tests/core/hash/test_core_hash.odin new file mode 100644 index 000000000..44c7ef044 --- /dev/null +++ b/tests/core/hash/test_core_hash.odin @@ -0,0 +1,131 @@ +package test_core_image + +import "core:hash/xxhash" +import "core:time" +import "core:testing" +import "core:fmt" + +TEST_count := 0 +TEST_fail := 0 + +when ODIN_TEST { + expect :: testing.expect + log :: testing.log +} else { + expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { + fmt.printf("[%v] ", loc) + TEST_count += 1 + if !condition { + TEST_fail += 1 + fmt.println(" FAIL:", message) + return + } + fmt.println(" PASS") + } + log :: proc(t: ^testing.T, v: any, loc := #caller_location) { + fmt.printf("[%v] ", loc) + fmt.printf("log: %v\n", v) + } +} + +main :: proc() { + t := testing.T{} + test_benchmark_runner(&t) + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) +} + +/* + Benchmarks +*/ + +setup_xxhash :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) { + assert(options != nil) + + options.input = make([]u8, options.bytes, allocator) + return nil if len(options.input) == options.bytes else .Allocation_Error +} + +teardown_xxhash :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) { + assert(options != nil) + + delete(options.input) + return nil +} + +benchmark_xxhash32 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) { + buf := options.input + + h: u32 + for _ in 0..=options.rounds { + h = xxhash.XXH32(buf) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + options.hash = u128(h) + return nil +} + +benchmark_xxhash64 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) { + buf := options.input + + h: u64 + for _ in 0..=options.rounds { + h = xxhash.XXH64(buf) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + options.hash = u128(h) + return nil +} + +benchmark_print :: proc(name: string, options: ^time.Benchmark_Options) { + fmt.printf("\t[%v] %v rounds, %v bytes procesed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n", + name, + options.rounds, + options.processed, + time.duration_nanoseconds(options.duration), + options.rounds_per_second, + options.megabytes_per_second, + ) +} + +@test +test_benchmark_runner :: proc(t: ^testing.T) { + fmt.println("Starting benchmarks:") + + name := "xxhash32 100 zero bytes" + options := &time.Benchmark_Options{ + rounds = 1_000, + bytes = 100, + setup = setup_xxhash, + bench = benchmark_xxhash32, + teardown = teardown_xxhash, + } + + err := time.benchmark(options, context.allocator) + expect(t, err == nil, name) + expect(t, options.hash == 0x85f6413c, name) + benchmark_print(name, options) + + name = "xxhash32 1 MiB zero bytes" + options.bytes = 1_048_576 + err = time.benchmark(options, context.allocator) + expect(t, err == nil, name) + expect(t, options.hash == 0x9430f97f, name) + benchmark_print(name, options) + + name = "xxhash64 100 zero bytes" + options.bytes = 100 + options.bench = benchmark_xxhash64 + err = time.benchmark(options, context.allocator) + expect(t, err == nil, name) + expect(t, options.hash == 0x17bb1103c92c502f, name) + benchmark_print(name, options) + + name = "xxhash64 1 MiB zero bytes" + options.bytes = 1_048_576 + err = time.benchmark(options, context.allocator) + expect(t, err == nil, name) + expect(t, options.hash == 0x87d2a1b6e1163ef1, name) + benchmark_print(name, options) +} \ No newline at end of file