diff --git a/core/runtime/dynamic_map_internal.odin b/core/runtime/dynamic_map_internal.odin index 7e453b4b8..b9b10dd40 100644 --- a/core/runtime/dynamic_map_internal.odin +++ b/core/runtime/dynamic_map_internal.odin @@ -137,6 +137,47 @@ map_cell_index_dynamic_const :: proc "contextless" (base: uintptr, #no_alias inf return base + (cell_index * size_of_cell) + (data_index * size_of_type) } +// We always round the capacity to a power of two so this becomes [16]Foo, which +// works out to [4]Cell(Foo). +// +// The following compile-time procedure indexes such a [N]Cell(T) structure as +// if it were a flat array accounting for the internal padding introduced by the +// Cell structure. +map_cell_index_static :: #force_inline proc "contextless" (cells: [^]Map_Cell($T), index: uintptr) -> ^T #no_bounds_check { + N :: size_of(Map_Cell(T){}.data) / size_of(T) when size_of(T) > 0 else 1 + + #assert(N <= MAP_CACHE_LINE_SIZE) + + // No padding case, can treat as a regular array of []T. + when size_of(Map_Cell(T)) == size_of([N]T) { + return &([^]T)(cells)[index] + } + + // Likely case, N is a power of two because T is a power of two. + when (N & (N - 1)) == 0 { + // Compute the integer log 2 of N, this is the shift amount to index the + // correct cell. Odin's intrinsics.count_leading_zeros does not produce a + // constant, hence this approach. We only need to check up to N = 64. + SHIFT :: 1 when N < 2 else + 2 when N < 4 else + 3 when N < 8 else + 4 when N < 16 else + 5 when N < 32 else 6 + #assert(SHIFT <= MAP_CACHE_LINE_LOG2) + // Unique case, no need to index data here since only one element. + when N == 1 { + return &cells[index >> SHIFT].data[0] + } else { + return &cells[index >> SHIFT].data[index & (N - 1)] + } + } + + // Least likely (and worst case), we pay for a division operation but we + // assume the compiler does not actually generate a division. N will be in the + // range [1, CACHE_LINE_SIZE) and not a power of two. + return &cells[index / N].data[index % N] +} + // len() for map map_len :: #force_inline proc "contextless" (m: Raw_Map) -> int { return m.len @@ -721,27 +762,72 @@ map_clear_dynamic :: #force_inline proc "contextless" (#no_alias m: ^Raw_Map, #n } +map_kvh_data_static :: #force_inline proc "contextless" (m: $T/map[$K]$V) -> ([^]Map_Cell(K), [^]Map_Cell(V), [^]Map_Hash) { + H :: Map_Hash + capacity := uintptr(cap(m)) + ks := ([^]Map_Cell(K))(map_data(transmute(Raw_Map)m)) + vs := ([^]Map_Cell(V))(map_cell_index_static(ks, capacity)) + hs := ([^]Map_Cell(H))(map_cell_index_static(vs, capacity)) + return ks, vs, ([^]Map_Hash)(hs) +} + + +map_get :: proc "contextless" (m: $T/map[$K]$V, key: K) -> (stored_key: K, stored_value: V, ok: bool) { + rm := transmute(Raw_Map)m + if rm.len == 0 { + return + } + info := intrinsics.type_map_info(T) + key := key + + h := info.key_hasher(&key, 0) + pos := map_desired_position(rm, h) + distance := uintptr(0) + mask := (uintptr(1) << map_log2_cap(rm)) - 1 + ks, vs, hs := map_kvh_data_static(m) + for { + element_hash := hs[pos] + if map_hash_is_empty(element_hash) { + return + } else if distance > map_probe_distance(rm, element_hash, pos) { + return + } else if element_hash == h { + element_key := map_cell_index_static(ks, pos) + if info.key_equal(&key, rawptr(element_key)) { + element_value := map_cell_index_static(vs, pos) + stored_key = (^K)(element_key)^ + stored_value = (^V)(element_value)^ + ok = true + return + } + + } + pos = (pos + 1) & mask + distance += 1 + } +} + __dynamic_map_get :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, key: rawptr) -> (ptr: rawptr) { if m.len == 0 { return nil } h := info.key_hasher(key, 0) - p := map_desired_position(m, h) - d := uintptr(0) - c := (uintptr(1) << map_log2_cap(m)) - 1 + pos := map_desired_position(m, h) + distance := uintptr(0) + mask := (uintptr(1) << map_log2_cap(m)) - 1 ks, vs, hs, _, _ := map_kvh_data_dynamic(m, info) for { - element_hash := hs[p] + element_hash := hs[pos] if map_hash_is_empty(element_hash) { return nil - } else if d > map_probe_distance(m, element_hash, p) { + } else if distance > map_probe_distance(m, element_hash, pos) { return nil - } else if element_hash == h && info.key_equal(key, rawptr(map_cell_index_dynamic(ks, info.ks, p))) { - return rawptr(map_cell_index_dynamic(vs, info.vs, p)) + } else if element_hash == h && info.key_equal(key, rawptr(map_cell_index_dynamic(ks, info.ks, pos))) { + return rawptr(map_cell_index_dynamic(vs, info.vs, pos)) } - p = (p + 1) & c - d += 1 + pos = (pos + 1) & mask + distance += 1 } } diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 8067d1d01..1cd2899c4 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -307,6 +307,8 @@ struct BuildContext { bool disallow_rtti; + bool use_static_map_calls; + RelocMode reloc_mode; bool disable_red_zone; diff --git a/src/check_expr.cpp b/src/check_expr.cpp index c2753e979..045b22ca2 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -3244,7 +3244,12 @@ void check_binary_expr(CheckerContext *c, Operand *x, Ast *node, Type *type_hint check_assignment(c, x, yt->Map.key, str_lit("map 'not_in'")); } - add_package_dependency(c, "runtime", "__dynamic_map_get"); + if (build_context.use_static_map_calls) { + add_package_dependency(c, "runtime", "map_desired_position"); + add_package_dependency(c, "runtime", "map_probe_distance"); + } else { + add_package_dependency(c, "runtime", "__dynamic_map_get"); + } } else if (is_type_bit_set(rhs_type)) { Type *yt = base_type(rhs_type); @@ -8992,8 +8997,14 @@ ExprKind check_index_expr(CheckerContext *c, Operand *o, Ast *node, Type *type_h o->type = t->Map.value; o->expr = node; - add_package_dependency(c, "runtime", "__dynamic_map_get"); + add_package_dependency(c, "runtime", "__dynamic_map_set"); + if (build_context.use_static_map_calls) { + add_package_dependency(c, "runtime", "map_desired_position"); + add_package_dependency(c, "runtime", "map_probe_distance"); + } else { + add_package_dependency(c, "runtime", "__dynamic_map_get"); + } return Expr_Expr; } diff --git a/src/checker.cpp b/src/checker.cpp index 75a6da6fa..d48b37b26 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -926,6 +926,10 @@ void init_universal(void) { Type *hasher_args[2] = {t_rawptr, t_uintptr}; t_hasher_proc = alloc_type_proc_from_types(hasher_args, 2, t_uintptr, false, ProcCC_Contextless); + + Type *map_get_args[2] = {/*map*/t_rawptr, /*key*/t_rawptr}; + t_map_get_proc = alloc_type_proc_from_types(map_get_args, 2, t_rawptr, false, ProcCC_Contextless); + } // Constants diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 629daf1c9..2b95c5b2f 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -157,8 +157,8 @@ lbValue lb_get_equal_proc_for_type(lbModule *m, Type *type) { static u32 proc_index = 0; - char buf[16] = {}; - isize n = gb_snprintf(buf, 16, "__$equal%u", ++proc_index); + char buf[32] = {}; + isize n = gb_snprintf(buf, 32, "__$equal%u", ++proc_index); char *str = gb_alloc_str_len(permanent_allocator(), buf, n-1); String proc_name = make_string_c(str); @@ -280,8 +280,8 @@ lbValue lb_simple_compare_hash(lbProcedure *p, Type *type, lbValue data, lbValue i64 sz = type_size_of(type); if (1 <= sz && sz <= 16) { - char name[20] = {}; - gb_snprintf(name, 20, "default_hasher%d", cast(i32)sz); + char name[32] = {}; + gb_snprintf(name, 32, "default_hasher%d", cast(i32)sz); auto args = array_make(permanent_allocator(), 2); args[0] = data; @@ -310,8 +310,8 @@ lbValue lb_get_hasher_proc_for_type(lbModule *m, Type *type) { static u32 proc_index = 0; - char buf[16] = {}; - isize n = gb_snprintf(buf, 16, "__$hasher%u", ++proc_index); + char buf[32] = {}; + isize n = gb_snprintf(buf, 32, "__$hasher%u", ++proc_index); char *str = gb_alloc_str_len(permanent_allocator(), buf, n-1); String proc_name = make_string_c(str); @@ -454,6 +454,141 @@ lbValue lb_get_hasher_proc_for_type(lbModule *m, Type *type) { } +lbValue lb_get_map_get_proc_for_type(lbModule *m, Type *type) { + GB_ASSERT(build_context.use_static_map_calls); + type = base_type(type); + GB_ASSERT(type->kind == Type_Map); + + + lbProcedure **found = map_get(&m->map_get_procs, type); + if (found) { + GB_ASSERT(*found != nullptr); + return {(*found)->value, (*found)->type}; + } + static u32 proc_index = 0; + + char buf[32] = {}; + isize n = gb_snprintf(buf, 32, "__$map_get%u", ++proc_index); + char *str = gb_alloc_str_len(permanent_allocator(), buf, n-1); + String proc_name = make_string_c(str); + + lbProcedure *p = lb_create_dummy_procedure(m, proc_name, t_map_get_proc); + map_set(&m->map_get_procs, type, p); + lb_begin_procedure_body(p); + defer (lb_end_procedure_body(p)); + + LLVMValueRef x = LLVMGetParam(p->value, 0); + LLVMValueRef y = LLVMGetParam(p->value, 1); + lbValue map_ptr = {x, t_rawptr}; + lbValue key_ptr = {y, t_rawptr}; + + LLVMAttributeRef nonnull_attr = lb_create_enum_attribute(m->ctx, "nonnull"); + LLVMAttributeRef noalias_attr = lb_create_enum_attribute(m->ctx, "noalias"); + LLVMAddAttributeAtIndex(p->value, 1+0, nonnull_attr); + LLVMAddAttributeAtIndex(p->value, 1+0, noalias_attr); + LLVMAddAttributeAtIndex(p->value, 1+1, nonnull_attr); + LLVMAddAttributeAtIndex(p->value, 1+1, noalias_attr); + + map_ptr = lb_emit_conv(p, map_ptr, t_raw_map_ptr); + lbValue map = lb_emit_load(p, map_ptr); + + key_ptr = lb_emit_conv(p, key_ptr, alloc_type_pointer(type->Map.key)); + lbValue key = lb_emit_load(p, key_ptr); + + lbValue h = lb_gen_map_key_hash(p, key, type->Map.key, nullptr); + lbAddr pos = lb_add_local_generated(p, t_uintptr, false); + lbAddr distance = lb_add_local_generated(p, t_uintptr, true); + lbValue capacity = lb_map_cap(p, map); + lbValue mask = lb_emit_conv(p, lb_emit_arith(p, Token_Sub, capacity, lb_const_int(m, t_int, 1), t_int), t_uintptr); + + { + auto args = array_make(heap_allocator(), 2); + args[0] = map; + args[1] = h; + lb_addr_store(p, pos, lb_emit_runtime_call(p, "map_desired_position", args)); + } + lbValue zero_uintptr = lb_const_int(m, t_uintptr, 0); + lbValue one_uintptr = lb_const_int(m, t_uintptr, 1); + + lbValue ks = lb_map_data_uintptr(p, map); + lbValue vs = lb_map_cell_index_static(p, type->Map.key, ks, capacity); + lbValue hs = lb_map_cell_index_static(p, type->Map.value, vs, capacity); + + ks = lb_emit_conv(p, ks, alloc_type_pointer(type->Map.key)); + vs = lb_emit_conv(p, vs, alloc_type_pointer(type->Map.value)); + hs = lb_emit_conv(p, hs, alloc_type_pointer(t_uintptr)); + + // lbValue res = + // LLVMBuildRet(p->builder, res.value); + + lbBlock *loop = lb_create_block(p, "loop"); + lbBlock *probe_block = lb_create_block(p, "probe"); + lbBlock *increment_block = lb_create_block(p, "increment"); + lbBlock *hash_compare_block = lb_create_block(p, "hash_compare"); + lbBlock *key_compare_block = lb_create_block(p, "key_compare"); + lbBlock *value_block = lb_create_block(p, "value"); + lbBlock *nil_block = lb_create_block(p, "nil"); + + lb_emit_jump(p, loop); + lb_start_block(p, loop); + + lbValue element_hash = lb_emit_load(p, lb_emit_ptr_offset(p, hs, lb_addr_load(p, pos))); + { + // if element_hash == 0 { return nil } + lb_emit_if(p, lb_emit_comp(p, Token_CmpEq, element_hash, zero_uintptr), nil_block, probe_block); + } + + lb_start_block(p, probe_block); + { + auto args = array_make(heap_allocator(), 3); + args[0] = map; + args[1] = element_hash; + args[2] = lb_addr_load(p, pos); + lbValue probe_distance = lb_emit_runtime_call(p, "map_probe_distance", args); + lbValue cond = lb_emit_comp(p, Token_Gt, lb_addr_load(p, distance), probe_distance); + lb_emit_if(p, cond, nil_block, hash_compare_block); + } + + lb_start_block(p, hash_compare_block); + { + lb_emit_if(p, lb_emit_comp(p, Token_CmpEq, element_hash, h), key_compare_block, increment_block); + } + + lb_start_block(p, key_compare_block); + { + lbValue element_key = lb_map_cell_index_static(p, type->Map.key, ks, lb_addr_load(p, pos)); + element_key = lb_emit_conv(p, element_key, ks.type); + lbValue cond = lb_emit_comp(p, Token_CmpEq, lb_emit_load(p, element_key), key); + lb_emit_if(p, cond, value_block, increment_block); + } + + lb_start_block(p, value_block); + { + lbValue element_value = lb_map_cell_index_static(p, type->Map.value, vs, lb_addr_load(p, pos)); + element_value = lb_emit_conv(p, element_value, t_rawptr); + LLVMBuildRet(p->builder, element_value.value); + } + + lb_start_block(p, increment_block); + { + lbValue pp = lb_addr_load(p, pos); + pp = lb_emit_arith(p, Token_Add, pp, one_uintptr, t_uintptr); + pp = lb_emit_arith(p, Token_And, pp, mask, t_uintptr); + lb_addr_store(p, pos, pp); + lb_emit_increment(p, distance.addr); + } + lb_emit_jump(p, loop); + + lb_start_block(p, nil_block); + { + lbValue res = lb_const_nil(m, t_rawptr); + LLVMBuildRet(p->builder, res.value); + } + + + return {p->value, p->type}; +} + lbValue lb_generate_anonymous_proc_lit(lbModule *m, String const &prefix_name, Ast *expr, lbProcedure *parent) { lbProcedure **found = map_get(&m->gen->anonymous_proc_lits, expr); if (found) { @@ -626,16 +761,27 @@ lbValue lb_internal_dynamic_map_get_ptr(lbProcedure *p, lbValue const &map, lbVa Type *map_type = base_type(map.type); GB_ASSERT(map_type->kind == Type_Map); + lbValue ptr = {}; + lbValue key_ptr = lb_address_from_load_or_generate_local(p, key); key_ptr = lb_emit_conv(p, key_ptr, t_rawptr); - auto args = array_make(permanent_allocator(), 3); - args[0] = lb_emit_transmute(p, map, t_raw_map); - args[1] = lb_gen_map_info_ptr(p->module, map_type); - args[2] = key_ptr; + if (build_context.use_static_map_calls) { + lbValue map_get_proc = lb_get_map_get_proc_for_type(p->module, map_type); - lbValue ptr = lb_emit_runtime_call(p, "__dynamic_map_get", args); + auto args = array_make(permanent_allocator(), 2); + args[0] = lb_address_from_load_or_generate_local(p, map); + args[1] = key_ptr; + ptr = lb_emit_call(p, map_get_proc, args); + } else { + auto args = array_make(permanent_allocator(), 3); + args[0] = lb_emit_transmute(p, map, t_raw_map); + args[1] = lb_gen_map_info_ptr(p->module, map_type); + args[2] = key_ptr; + + ptr = lb_emit_runtime_call(p, "__dynamic_map_get", args); + } return lb_emit_conv(p, ptr, alloc_type_pointer(map_type->Map.value)); } @@ -1206,6 +1352,10 @@ WORKER_TASK_PROC(lb_llvm_function_pass_worker_proc) { lbProcedure *p = m->hasher_procs.entries[i].value; lb_run_function_pass_manager(default_function_pass_manager, p); } + for_array(i, m->map_get_procs.entries) { + lbProcedure *p = m->map_get_procs.entries[i].value; + lb_run_function_pass_manager(default_function_pass_manager, p); + } return 0; } diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 6c7c2e392..f9fe6cff0 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -144,6 +144,7 @@ struct lbModule { PtrMap equal_procs; PtrMap hasher_procs; + PtrMap map_get_procs; u32 nested_type_name_guid; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index e1a926255..859542fb5 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -67,6 +67,7 @@ void lb_init_module(lbModule *m, Checker *c) { map_init(&m->function_type_map, a); map_init(&m->equal_procs, a); map_init(&m->hasher_procs, a); + map_init(&m->map_get_procs, a); array_init(&m->procedures_to_generate, a, 0, 1024); array_init(&m->missing_procedures_to_check, a, 0, 16); map_init(&m->debug_values, a); diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index 3e4846f02..6b83068ce 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -371,24 +371,30 @@ void lb_build_range_indexed(lbProcedure *p, lbValue expr, Type *val_type, lbValu } lbValue lb_map_cell_index_static(lbProcedure *p, Type *type, lbValue cells_ptr, lbValue index) { - i64 size, N; - i64 sz = type_size_of(type); - map_cell_size_and_len(type, &size, &N); + i64 size, len; + i64 elem_sz = type_size_of(type); + map_cell_size_and_len(type, &size, &len); - index = lb_emit_conv(p, index, t_uint); + index = lb_emit_conv(p, index, t_uintptr); - if (size == N*sz) { + if (size == len*elem_sz) { lbValue elems_ptr = lb_emit_conv(p, cells_ptr, alloc_type_pointer(type)); return lb_emit_ptr_offset(p, elems_ptr, index); } // TOOD(bill): N power of two optimization to use >> and & - lbValue N_const = lb_const_int(p->module, index.type, N); - lbValue cell_index = lb_emit_arith(p, Token_Quo, index, N_const, index.type); - lbValue data_index = lb_emit_arith(p, Token_Mod, index, N_const, index.type); - lbValue cell = lb_emit_ptr_offset(p, cells_ptr, cell_index); - lbValue elems_ptr = lb_emit_conv(p, cell, alloc_type_pointer(type)); + lbValue size_const = lb_const_int(p->module, t_uintptr, size); + lbValue len_const = lb_const_int(p->module, t_uintptr, len); + lbValue cell_index = lb_emit_arith(p, Token_Quo, index, len_const, t_uintptr); + lbValue data_index = lb_emit_arith(p, Token_Mod, index, len_const, t_uintptr); + + lbValue elems_ptr = lb_emit_conv(p, cells_ptr, t_uintptr); + lbValue cell_offset = lb_emit_arith(p, Token_Mul, size_const, cell_index, t_uintptr); + elems_ptr = lb_emit_arith(p, Token_Add, elems_ptr, cell_offset, t_uintptr); + + elems_ptr = lb_emit_conv(p, elems_ptr, alloc_type_pointer(type)); + return lb_emit_ptr_offset(p, elems_ptr, data_index); } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index a3493f864..6d69021ce 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1440,7 +1440,7 @@ lbValue lb_map_len(lbProcedure *p, lbValue value) { } lbValue lb_map_cap(lbProcedure *p, lbValue value) { - GB_ASSERT(is_type_map(value.type)); + GB_ASSERT_MSG(is_type_map(value.type) || are_types_identical(value.type, t_raw_map), "%s", type_to_string(value.type)); lbValue zero = lb_const_int(p->module, t_uintptr, 0); lbValue one = lb_const_int(p->module, t_uintptr, 1); @@ -1454,7 +1454,7 @@ lbValue lb_map_cap(lbProcedure *p, lbValue value) { } lbValue lb_map_data_uintptr(lbProcedure *p, lbValue value) { - GB_ASSERT(is_type_map(value.type)); + GB_ASSERT(is_type_map(value.type) || are_types_identical(value.type, t_raw_map)); lbValue data = lb_emit_struct_ev(p, value, 0); u64 mask_value = 0; if (build_context.word_size == 4) { diff --git a/src/main.cpp b/src/main.cpp index b75137613..3b0a599db 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -638,6 +638,7 @@ enum BuildFlagKind { BuildFlag_StrictStyleInitOnly, BuildFlag_ForeignErrorProcedures, BuildFlag_DisallowRTTI, + BuildFlag_UseStaticMapCalls, BuildFlag_Compact, BuildFlag_GlobalDefinitions, @@ -814,6 +815,8 @@ bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_DisallowRTTI, str_lit("disallow-rtti"), BuildFlagParam_None, Command__does_check); + add_flag(&build_flags, BuildFlag_UseStaticMapCalls, str_lit("use-static-map-calls"), BuildFlagParam_None, Command__does_check); + add_flag(&build_flags, BuildFlag_Compact, str_lit("compact"), BuildFlagParam_None, Command_query); add_flag(&build_flags, BuildFlag_GlobalDefinitions, str_lit("global-definitions"), BuildFlagParam_None, Command_query); @@ -1414,6 +1417,9 @@ bool parse_build_flags(Array args) { case BuildFlag_DisallowRTTI: build_context.disallow_rtti = true; break; + case BuildFlag_UseStaticMapCalls: + build_context.use_static_map_calls = true; + break; case BuildFlag_DefaultToNilAllocator: build_context.ODIN_DEFAULT_TO_NIL_ALLOCATOR = true; break; diff --git a/src/types.cpp b/src/types.cpp index ab82e87b8..74b192010 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -693,6 +693,7 @@ gb_global Type *t_raw_map_ptr = nullptr; gb_global Type *t_equal_proc = nullptr; gb_global Type *t_hasher_proc = nullptr; +gb_global Type *t_map_get_proc = nullptr; gb_global Type *t_objc_object = nullptr; gb_global Type *t_objc_selector = nullptr;