From 3edb3d8d8c16a24371bb401fe0d69ec93e667b27 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 13 Nov 2022 23:24:08 +0000 Subject: [PATCH] Simplify the handling of the hashing calls for `map`s --- core/runtime/dynamic_map_internal.odin | 96 ++++++-------------------- src/check_type.cpp | 20 ++---- src/llvm_backend.cpp | 14 +--- 3 files changed, 30 insertions(+), 100 deletions(-) diff --git a/core/runtime/dynamic_map_internal.odin b/core/runtime/dynamic_map_internal.odin index e943a3e24..ac09c3e2b 100644 --- a/core/runtime/dynamic_map_internal.odin +++ b/core/runtime/dynamic_map_internal.odin @@ -732,92 +732,42 @@ __dynamic_map_reserve :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Ma +// NOTE: the default hashing algorithm derives from fnv64a, with some minor modifications to work for `map` type: +// +// * Convert a `0` result to `1` +// * "empty entry" +// * Prevent the top bit from being set +// * "deleted entry" +// +// Both of these modification are necessary for the implementation of the `map` INITIAL_HASH_SEED :: 0xcbf29ce484222325 HASH_MASK :: 1 << (8*size_of(uintptr) - 1) -1 -_fnv64a :: proc "contextless" (data: []byte, seed: u64 = INITIAL_HASH_SEED) -> u64 { - h: u64 = seed - for b in data { - h = (h ~ u64(b)) * 0x100000001b3 - } - h &= HASH_MASK - return h | u64(h == 0) -} - -default_hash :: #force_inline proc "contextless" (data: []byte) -> uintptr { - return uintptr(_fnv64a(data)) -} -default_hash_string :: #force_inline proc "contextless" (s: string) -> uintptr { - return default_hash(transmute([]byte)(s)) -} -default_hash_ptr :: #force_inline proc "contextless" (data: rawptr, size: int) -> uintptr { - s := Raw_Slice{data, size} - return default_hash(transmute([]byte)(s)) -} - -@(private) -_default_hasher_const :: #force_inline proc "contextless" (data: rawptr, seed: uintptr, $N: uint) -> uintptr where N <= 16 { - h := u64(seed) + 0xcbf29ce484222325 - p := uintptr(data) - #unroll for _ in 0.. uintptr { - h := u64(seed) + 0xcbf29ce484222325 - p := uintptr(data) +default_hasher :: #force_inline proc "contextless" (data: rawptr, seed: uintptr, N: int) -> uintptr { + h := u64(seed) + INITIAL_HASH_SEED + p := ([^]byte)(data) for _ in 0.. uintptr { return #force_inline _default_hasher_const(data, seed, 1) } -default_hasher2 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 2) } -default_hasher3 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 3) } -default_hasher4 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 4) } -default_hasher5 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 5) } -default_hasher6 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 6) } -default_hasher7 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 7) } -default_hasher8 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 8) } -default_hasher9 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 9) } -default_hasher10 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 10) } -default_hasher11 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 11) } -default_hasher12 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 12) } -default_hasher13 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 13) } -default_hasher14 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 14) } -default_hasher15 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 15) } -default_hasher16 :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { return #force_inline _default_hasher_const(data, seed, 16) } - default_hasher_string :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { - h := u64(seed) + 0xcbf29ce484222325 - str := (^[]byte)(data)^ - for b in str { - h = (h ~ u64(b)) * 0x100000001b3 - } - h &= HASH_MASK - return uintptr(h) | uintptr(h == 0) + str := (^[]byte)(data) + return default_hasher(raw_data(str^), seed, len(str)) } default_hasher_cstring :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { - h := u64(seed) + 0xcbf29ce484222325 - ptr := (^uintptr)(data)^ - for (^byte)(ptr)^ != 0 { - b := (^byte)(ptr)^ - h = (h ~ u64(b)) * 0x100000001b3 - ptr += 1 + h := u64(seed) + INITIAL_HASH_SEED + if ptr := (^[^]byte)(data)^; ptr != nil { + for ptr[0] != 0 { + h = (h ~ u64(ptr[0])) * 0x100000001b3 + ptr = ptr[1:] + } } h &= HASH_MASK - return uintptr(h) | uintptr(h == 0) + return uintptr(h) | uintptr(uintptr(h) == 0) } diff --git a/src/check_type.cpp b/src/check_type.cpp index 5d7f8d7b5..4d94fce6c 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -2221,35 +2221,27 @@ void add_map_key_type_dependencies(CheckerContext *ctx, Type *key) { } if (is_type_simple_compare(key)) { - i64 sz = type_size_of(key); - if (1 <= sz && sz <= 16) { - char buf[20] = {}; - gb_snprintf(buf, 20, "default_hasher%d", cast(i32)sz); - add_package_dependency(ctx, "runtime", buf); - return; - } else { - add_package_dependency(ctx, "runtime", "default_hasher_n"); - return; - } + add_package_dependency(ctx, "runtime", "default_hasher"); + return; } if (key->kind == Type_Struct) { - add_package_dependency(ctx, "runtime", "default_hasher_n"); + add_package_dependency(ctx, "runtime", "default_hasher"); for_array(i, key->Struct.fields) { Entity *field = key->Struct.fields[i]; add_map_key_type_dependencies(ctx, field->type); } } else if (key->kind == Type_Union) { - add_package_dependency(ctx, "runtime", "default_hasher_n"); + add_package_dependency(ctx, "runtime", "default_hasher"); for_array(i, key->Union.variants) { Type *v = key->Union.variants[i]; add_map_key_type_dependencies(ctx, v); } } else if (key->kind == Type_EnumeratedArray) { - add_package_dependency(ctx, "runtime", "default_hasher_n"); + add_package_dependency(ctx, "runtime", "default_hasher"); add_map_key_type_dependencies(ctx, key->EnumeratedArray.elem); } else if (key->kind == Type_Array) { - add_package_dependency(ctx, "runtime", "default_hasher_n"); + add_package_dependency(ctx, "runtime", "default_hasher"); add_map_key_type_dependencies(ctx, key->Array.elem); } } diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 5b9db7f2b..2ee292880 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -282,23 +282,11 @@ lbValue lb_equal_proc_for_type(lbModule *m, Type *type) { lbValue lb_simple_compare_hash(lbProcedure *p, Type *type, lbValue data, lbValue seed) { GB_ASSERT_MSG(is_type_simple_compare(type), "%s", type_to_string(type)); - i64 sz = type_size_of(type); - - if (1 <= sz && sz <= 16) { - char name[32] = {}; - gb_snprintf(name, 32, "default_hasher%d", cast(i32)sz); - - auto args = array_make(permanent_allocator(), 2); - args[0] = data; - args[1] = seed; - return lb_emit_runtime_call(p, name, args); - } - auto args = array_make(permanent_allocator(), 3); args[0] = data; args[1] = seed; args[2] = lb_const_int(p->module, t_int, type_size_of(type)); - return lb_emit_runtime_call(p, "default_hasher_n", args); + return lb_emit_runtime_call(p, "default_hasher", args); } void lb_add_callsite_force_inline(lbProcedure *p, lbValue ret_value) {