From a9936d15708030b146cfb1fff13d7d52dcb2f789 Mon Sep 17 00:00:00 2001 From: jason Date: Tue, 16 May 2023 23:49:55 -0400 Subject: [PATCH] implement random map seed --- core/runtime/dynamic_map_internal.odin | 42 +++++++++++++++++++++----- src/llvm_backend.cpp | 27 ++++++++++++++--- src/llvm_backend.hpp | 2 +- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/core/runtime/dynamic_map_internal.odin b/core/runtime/dynamic_map_internal.odin index b0d6cefc4..188d6343e 100644 --- a/core/runtime/dynamic_map_internal.odin +++ b/core/runtime/dynamic_map_internal.odin @@ -251,6 +251,26 @@ map_hash_is_valid :: #force_inline proc "contextless" (hash: Map_Hash) -> bool { return (hash != 0) & (hash & TOMBSTONE_MASK == 0) } +@(require_results) +map_seed :: #force_inline proc "contextless" (m: Raw_Map) -> uintptr { + return map_seed_from_map_data(map_data(m)) +} + +// splitmix for uintptr +@(require_results) +map_seed_from_map_data :: #force_inline proc "contextless" (data: uintptr) -> uintptr { + when size_of(uintptr) == size_of(u64) { + mix := data + 0x9e3779b97f4a7c15 + mix = (mix ~ (mix >> 30)) * 0xbf58476d1ce4e5b9 + mix = (mix ~ (mix >> 27)) * 0x94d049bb133111eb + return mix ~ (mix >> 31) + } else { + mix := data + 0x9e3779b9 + mix = (mix ~ (mix >> 16)) * 0x21f0aaad + mix = (mix ~ (mix >> 15)) * 0x735a2d97 + return mix ~ (mix >> 15) + } +} // Computes the desired position in the array. This is just index % capacity, // but a procedure as there's some math involved here to recover the capacity. @@ -542,6 +562,7 @@ map_reserve_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_ } k := map_cell_index_dynamic(ks, info.ks, i) v := map_cell_index_dynamic(vs, info.vs, i) + hash = info.key_hasher(rawptr(k), map_seed(resized)) _ = map_insert_hash_dynamic(&resized, info, hash, k, v) // Only need to do this comparison on each actually added pair, so do not // fold it into the for loop comparator as a micro-optimization. @@ -620,7 +641,7 @@ map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, if map_len(m) == 0 { return 0, false } - h := info.key_hasher(rawptr(k), 0) + h := info.key_hasher(rawptr(k), map_seed(m)) p := map_desired_position(m, h) d := uintptr(0) c := (uintptr(1) << map_log2_cap(m)) - 1 @@ -643,7 +664,7 @@ map_exists_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, if map_len(m) == 0 { return false } - h := info.key_hasher(rawptr(k), 0) + h := info.key_hasher(rawptr(k), map_seed(m)) p := map_desired_position(m, h) d := uintptr(0) c := (uintptr(1) << map_log2_cap(m)) - 1 @@ -749,7 +770,7 @@ map_get :: proc "contextless" (m: $T/map[$K]$V, key: K) -> (stored_key: K, store info := intrinsics.type_map_info(T) key := key - h := info.key_hasher(&key, 0) + h := info.key_hasher(&key, map_seed(m)) pos := map_desired_position(rm, h) distance := uintptr(0) mask := (uintptr(1) << map_log2_cap(rm)) - 1 @@ -800,15 +821,15 @@ __dynamic_map_get :: proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: } // IMPORTANT: USED WITHIN THE COMPILER -__dynamic_map_check_grow :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> Allocator_Error { +__dynamic_map_check_grow :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (err: Allocator_Error, has_grown: bool) { if m.len >= map_resize_threshold(m^) { - return map_grow_dynamic(m, info, loc) + return map_grow_dynamic(m, info, loc), true } - return nil + return nil, false } __dynamic_map_set_without_hash :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, key, value: rawptr, loc := #caller_location) -> rawptr { - return __dynamic_map_set(m, info, info.key_hasher(key, 0), key, value, loc) + return __dynamic_map_set(m, info, info.key_hasher(key, map_seed(m^)), key, value, loc) } @@ -819,9 +840,14 @@ __dynamic_map_set :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_In return found } - if __dynamic_map_check_grow(m, info, loc) != nil { + hash := hash + err, has_grown := __dynamic_map_check_grow(m, info, loc) + if err != nil { return nil } + if has_grown { + hash = info.key_hasher(key, map_seed(m^)) + } result := map_insert_hash_dynamic(m, info, hash, uintptr(key), uintptr(value)) m.len += 1 diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 4d8e13f0f..f28b1f62d 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -720,6 +720,7 @@ gb_internal lbValue lb_map_set_proc_for_type(lbModule *m, Type *type) { lbBlock *check_grow_block = lb_create_block(p, "check-grow"); lbBlock *grow_fail_block = lb_create_block(p, "grow-fail"); lbBlock *insert_block = lb_create_block(p, "insert"); + lbBlock *rehash_block = lb_create_block(p, "rehash"); lb_emit_if(p, lb_emit_comp_against_nil(p, Token_NotEq, found_ptr), found_block, check_grow_block); lb_start_block(p, found_block); @@ -737,12 +738,19 @@ gb_internal lbValue lb_map_set_proc_for_type(lbModule *m, Type *type) { args[0] = lb_emit_conv(p, map_ptr, t_rawptr); args[1] = map_info; args[2] = lb_emit_load(p, location_ptr); - lbValue grow_err = lb_emit_runtime_call(p, "__dynamic_map_check_grow", args); + lbValue grow_err_and_has_grown = lb_emit_runtime_call(p, "__dynamic_map_check_grow", args); + lbValue grow_err = lb_emit_struct_ev(p, grow_err_and_has_grown, 0); + lbValue has_grown = lb_emit_struct_ev(p, grow_err_and_has_grown, 1); lb_emit_if(p, lb_emit_comp_against_nil(p, Token_NotEq, grow_err), grow_fail_block, insert_block); lb_start_block(p, grow_fail_block); LLVMBuildRet(p->builder, LLVMConstNull(lb_type(m, t_rawptr))); + + lb_emit_if(p, has_grown, grow_fail_block, rehash_block); + lb_start_block(p, rehash_block); + lbValue key = lb_emit_load(p, key_ptr); + hash = lb_gen_map_key_hash(p, map_ptr, key, nullptr); } lb_start_block(p, insert_block); @@ -916,7 +924,7 @@ gb_internal lbValue lb_const_hash(lbModule *m, lbValue key, Type *key_type) { return hashed_key; } -gb_internal lbValue lb_gen_map_key_hash(lbProcedure *p, lbValue key, Type *key_type, lbValue *key_ptr_) { +gb_internal lbValue lb_gen_map_key_hash(lbProcedure *p, lbValue const &map_ptr, lbValue key, lbValue *key_ptr_) { TEMPORARY_ALLOCATOR_GUARD(); lbValue key_ptr = lb_address_from_load_or_generate_local(p, key); @@ -924,13 +932,22 @@ gb_internal lbValue lb_gen_map_key_hash(lbProcedure *p, lbValue key, Type *key_t if (key_ptr_) *key_ptr_ = key_ptr; + Type* key_type = base_type(type_deref(map_ptr.type))->Map.key; + lbValue hashed_key = lb_const_hash(p->module, key, key_type); if (hashed_key.value == nullptr) { lbValue hasher = lb_hasher_proc_for_type(p->module, key_type); + lbValue seed = {}; + { + auto args = array_make(temporary_allocator(), 1); + args[0] = lb_map_data_uintptr(p, lb_emit_load(p, map_ptr)); + seed = lb_emit_runtime_call(p, "map_seed_from_map_data", args); + } + auto args = array_make(temporary_allocator(), 2); args[0] = key_ptr; - args[1] = lb_const_int(p->module, t_uintptr, 0); + args[1] = seed; hashed_key = lb_emit_call(p, hasher, args); } @@ -945,7 +962,7 @@ gb_internal lbValue lb_internal_dynamic_map_get_ptr(lbProcedure *p, lbValue cons lbValue ptr = {}; lbValue key_ptr = {}; - lbValue hash = lb_gen_map_key_hash(p, key, map_type->Map.key, &key_ptr); + lbValue hash = lb_gen_map_key_hash(p, map_ptr, key, &key_ptr); if (build_context.dynamic_map_calls) { auto args = array_make(temporary_allocator(), 4); @@ -976,7 +993,7 @@ gb_internal void lb_internal_dynamic_map_set(lbProcedure *p, lbValue const &map_ GB_ASSERT(map_type->kind == Type_Map); lbValue key_ptr = {}; - lbValue hash = lb_gen_map_key_hash(p, map_key, map_type->Map.key, &key_ptr); + lbValue hash = lb_gen_map_key_hash(p, map_ptr, map_key, &key_ptr); lbValue v = lb_emit_conv(p, map_value, map_type->Map.value); lbValue value_ptr = lb_address_from_load_or_generate_local(p, v); diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 964195223..1ddd4fed7 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -477,7 +477,7 @@ gb_internal String lb_get_const_string(lbModule *m, lbValue value); gb_internal lbValue lb_generate_local_array(lbProcedure *p, Type *elem_type, i64 count, bool zero_init=true); gb_internal lbValue lb_generate_global_array(lbModule *m, Type *elem_type, i64 count, String prefix, i64 id); -gb_internal lbValue lb_gen_map_key_hash(lbProcedure *p, lbValue key, Type *key_type, lbValue *key_ptr_); +gb_internal lbValue lb_gen_map_key_hash(lbProcedure *p, lbValue const &map_ptr, lbValue key, lbValue *key_ptr_); gb_internal lbValue lb_gen_map_cell_info_ptr(lbModule *m, Type *type); gb_internal lbValue lb_gen_map_info_ptr(lbModule *m, Type *map_type);