diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 2047aaf75..ce79aab9f 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -2072,6 +2072,7 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { if info.generated_struct == nil { return } + /* entries := &m.entries gs := runtime.type_info_base(info.generated_struct).variant.(runtime.Type_Info_Struct) ed := runtime.type_info_base(gs.types[1]).variant.(runtime.Type_Info_Dynamic_Array) @@ -2106,6 +2107,7 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { value := data + entry_type.offsets[3] // value: Value fmt_arg(fi, any{rawptr(value), info.value.id}, 'v') } + */ } case runtime.Type_Info_Struct: diff --git a/core/mem/alloc.odin b/core/mem/alloc.odin index 551906bed..2d11b523f 100644 --- a/core/mem/alloc.odin +++ b/core/mem/alloc.odin @@ -112,22 +112,21 @@ query_info :: proc(pointer: rawptr, allocator: Allocator, loc := #caller_locatio -delete_string :: proc(str: string, allocator := context.allocator, loc := #caller_location) { - free(raw_data(str), allocator, loc) +delete_string :: proc(str: string, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return free(raw_data(str), allocator, loc) } -delete_cstring :: proc(str: cstring, allocator := context.allocator, loc := #caller_location) { - free((^byte)(str), allocator, loc) +delete_cstring :: proc(str: cstring, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return free((^byte)(str), allocator, loc) } -delete_dynamic_array :: proc(array: $T/[dynamic]$E, loc := #caller_location) { - free(raw_data(array), array.allocator, loc) +delete_dynamic_array :: proc(array: $T/[dynamic]$E, loc := #caller_location) -> Allocator_Error { + return free(raw_data(array), array.allocator, loc) } -delete_slice :: proc(array: $T/[]$E, allocator := context.allocator, loc := #caller_location) { - free(raw_data(array), allocator, loc) +delete_slice :: proc(array: $T/[]$E, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return free(raw_data(array), allocator, loc) } -delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) { +delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error { raw := transmute(Raw_Map)m - delete_slice(raw.hashes, raw.entries.allocator, loc) - free(raw.entries.data, raw.entries.allocator, loc) + return runtime.map_free(raw, loc) } diff --git a/core/reflect/reflect.odin b/core/reflect/reflect.odin index 896fc4473..8aab77399 100644 --- a/core/reflect/reflect.odin +++ b/core/reflect/reflect.odin @@ -273,7 +273,7 @@ length :: proc(val: any) -> int { return (^runtime.Raw_Dynamic_Array)(val.data).len case Type_Info_Map: - return (^runtime.Raw_Map)(val.data).entries.len + return runtime.map_len((^runtime.Raw_Map)(val.data)^) case Type_Info_String: if a.is_cstring { @@ -305,7 +305,7 @@ capacity :: proc(val: any) -> int { return (^runtime.Raw_Dynamic_Array)(val.data).cap case Type_Info_Map: - return (^runtime.Raw_Map)(val.data).entries.cap + return runtime.map_cap((^runtime.Raw_Map)(val.data)^) } return 0 } diff --git a/core/runtime/core.odin b/core/runtime/core.odin index 385a03b71..ce3aa239b 100644 --- a/core/runtime/core.odin +++ b/core/runtime/core.odin @@ -394,9 +394,32 @@ Raw_Dynamic_Array :: struct { allocator: Allocator, } +// The raw, type-erased representation of a map. +// +// 32-bytes on 64-bit +// 16-bytes on 32-bit Raw_Map :: struct { - hashes: []Map_Index, - entries: Raw_Dynamic_Array, + // A single allocation spanning all keys, values, and hashes. + // { + // k: Map_Cell(K) * (capacity / ks_per_cell) + // v: Map_Cell(V) * (capacity / vs_per_cell) + // h: Map_Cell(H) * (capacity / hs_per_cell) + // } + // + // The data is allocated assuming 64-byte alignment, meaning the address is + // always a multiple of 64. This means we have 6 bits of zeros in the pointer + // to store the capacity. We can store a value as large as 2^6-1 or 63 in + // there. This conveniently is the maximum log2 capacity we can have for a map + // as Odin uses signed integers to represent capacity. + // + // Since the hashes are backed by Map_Hash, which is just a 64-bit unsigned + // integer, the cell structure for hashes is unnecessary because 64/8 is 8 and + // requires no padding, meaning it can be indexed as a regular array of + // Map_Hash directly, though for consistency sake it's written as if it were + // an array of Map_Cell(Map_Hash). + data: uintptr, // 8-bytes on 64-bits, 4-bytes on 32-bits + len: uintptr, // 8-bytes on 64-bits, 4-bytes on 32-bits + allocator: Allocator, // 16-bytes on 64-bits, 8-bytes on 32-bits } Raw_Any :: struct { diff --git a/core/runtime/core_builtin.odin b/core/runtime/core_builtin.odin index b0f4cb25c..80a9f2944 100644 --- a/core/runtime/core_builtin.odin +++ b/core/runtime/core_builtin.odin @@ -159,20 +159,7 @@ delete_slice :: proc(array: $T/[]$E, allocator := context.allocator, loc := #cal } @builtin delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error { - Entry :: struct { - hash: uintptr, - next: int, - key: K, - value: V, - } - - raw := transmute(Raw_Map)m - err := delete_slice(raw.hashes, raw.entries.allocator, loc) - err1 := mem_free_with_size(raw.entries.data, raw.entries.cap*size_of(Entry), raw.entries.allocator, loc) - if err == nil { - err = err1 - } - return err + return map_free(transmute(Raw_Map)m, loc) } @@ -285,19 +272,13 @@ clear_map :: proc "contextless" (m: ^$T/map[$K]$V) { if m == nil { return } - raw_map := (^Raw_Map)(m) - entries := (^Raw_Dynamic_Array)(&raw_map.entries) - entries.len = 0 - for _, i in raw_map.hashes { - raw_map.hashes[i] = MAP_SENTINEL - } + map_clear_dynamic((^Raw_Map)(m), map_info(K, V)) } @builtin reserve_map :: proc(m: ^$T/map[$K]$V, capacity: int, loc := #caller_location) { if m != nil { - h := __get_map_header_table(T) - __dynamic_map_reserve(m, h, uint(capacity), loc) + __dynamic_map_reserve((^Raw_Map)(m), map_info(K, V), uint(capacity), loc) } } @@ -325,15 +306,9 @@ shrink_map :: proc(m: ^$T/map[$K]$V, new_cap := -1, loc := #caller_location) -> delete_key :: proc(m: ^$T/map[$K]$V, key: K) -> (deleted_key: K, deleted_value: V) { if m != nil { key := key - h := __get_map_header(m) - fr := __map_find(h, &key) - if fr.entry_index != MAP_SENTINEL { - entry := __dynamic_map_get_entry(h, fr.entry_index) - deleted_key = (^K)(uintptr(entry)+h.key_offset)^ - deleted_value = (^V)(uintptr(entry)+h.value_offset)^ - - __dynamic_map_erase(h, fr) - } + info := map_info(K, V) + _ = map_erase_dynamic((^Raw_Map)(m), info, uintptr(&key)) + // TODO(bill) old key and value } return } diff --git a/core/runtime/dynamic_map_internal.odin b/core/runtime/dynamic_map_internal.odin index abe58fc5a..07c1fbe7e 100644 --- a/core/runtime/dynamic_map_internal.odin +++ b/core/runtime/dynamic_map_internal.odin @@ -3,150 +3,727 @@ package runtime import "core:intrinsics" _ :: intrinsics -INITIAL_MAP_CAP :: 16 +// High performance, cache-friendly, open-addressed Robin Hood hashing hash map +// data structure with various optimizations for Odin. +// +// Copyright 2022 (c) Dale Weiler +// +// The core of the hash map data structure is the Raw_Map struct which is a +// type-erased representation of the map. This type-erased representation is +// used in two ways: static and dynamic. When static type information is known, +// the procedures suffixed with _static should be used instead of _dynamic. The +// static procedures are optimized since they have type information. Hashing of +// keys, comparison of keys, and data lookup are all optimized. When type +// information is not known, the procedures suffixed with _dynamic should be +// used. The representation of the map is the same for both static and dynamic, +// and procedures of each can be mixed and matched. The purpose of the dynamic +// representation is to enable reflection and runtime manipulation of the map. +// The dynamic procedures all take an additional Map_Info structure parameter +// which carries runtime values describing the size, alignment, and offset of +// various traits of a given key and value type pair. The Map_Info value can +// be created by calling map_info(K, V) with the key and value typeids. +// +// This map implementation makes extensive use of uintptr for representing +// sizes, lengths, capacities, masks, pointers, offsets, and addresses to avoid +// expensive sign extension and masking that would be generated if types were +// casted all over. The only place regular ints show up is in the cap() and +// len() implementations. +// +// To make this map cache-friendly it uses a novel strategy to ensure keys and +// values of the map are always cache-line aligned and that no single key or +// value of any type ever straddles a cache-line. This cache efficiency makes +// for quick lookups because the linear-probe always addresses data in a cache +// friendly way. This is enabled through the use of a special meta-type called +// a Map_Cell which packs as many values of a given type into a local array adding +// internal padding to round to MAP_CACHE_LINE_SIZE. One other benefit to storing +// the internal data in this manner is false sharing no longer occurs when using +// a map, enabling efficient concurrent access of the map data structure with +// minimal locking if desired. -// Temporary data structure for comparing hashes and keys -Map_Hash :: struct { - hash: uintptr, - key_ptr: rawptr, // address of Map_Entry_Header.key +// With Robin Hood hashing a maximum load factor of 75% is ideal. +MAP_LOAD_FACTOR :: 75 + +// Minimum log2 capacity. +MAP_MIN_LOG2_CAPACITY :: 6 // 64 elements + +// Has to be less than 100% though. +#assert(MAP_LOAD_FACTOR < 100) + +// This is safe to change. The log2 size of a cache-line. At minimum it has to +// be six though. Higher cache line sizes are permitted. +MAP_CACHE_LINE_LOG2 :: 6 + +// The size of a cache-line. +MAP_CACHE_LINE_SIZE :: 1 << MAP_CACHE_LINE_LOG2 + +// The minimum cache-line size allowed by this implementation is 64 bytes since +// we need 6 bits in the base pointer to store the integer log2 capacity, which +// at maximum is 63. Odin uses signed integers to represent length and capacity, +// so only 63 bits are needed in the maximum case. +#assert(MAP_CACHE_LINE_SIZE >= 64) + +// Map_Cell type that packs multiple T in such a way to ensure that each T stays +// aligned by align_of(T) and such that align_of(Map_Cell(T)) % MAP_CACHE_LINE_SIZE == 0 +// +// This means a value of type T will never straddle a cache-line. +// +// When multiple Ts can fit in a single cache-line the data array will have more +// than one element. When it cannot, the data array will have one element and +// an array of Map_Cell(T) will be padded to stay a multiple of MAP_CACHE_LINE_SIZE. +// +// We rely on the type system to do all the arithmetic and padding for us here. +// +// The usual array[index] indexing for []T backed by a []Map_Cell(T) becomes a bit +// more involved as there now may be internal padding. The indexing now becomes +// +// N :: len(Map_Cell(T){}.data) +// i := index / N +// j := index % N +// cell[i].data[j] +// +// However, since len(Map_Cell(T){}.data) is a compile-time constant, there are some +// optimizations we can do to eliminate the need for any divisions as N will +// be bounded by [1, 64). +// +// In the optimal case, len(Map_Cell(T){}.data) = 1 so the cell array can be treated +// as a regular array of T, which is the case for hashes. +Map_Cell :: struct($T: typeid) #align MAP_CACHE_LINE_SIZE { + data: [MAP_CACHE_LINE_SIZE / size_of(T) when size_of(T) < MAP_CACHE_LINE_SIZE else 1]T, } -__get_map_key_hash :: #force_inline proc "contextless" (k: ^$K) -> uintptr { - hasher := intrinsics.type_hasher_proc(K) - return hasher(k, 0) +// So we can operate on a cell data structure at runtime without any type +// information, we have a simple table that stores some traits about the cell. +// +// 32-bytes on 64-bit +// 16-bytes on 32-bit +Map_Cell_Info :: struct { + size_of_type: uintptr, // 8-bytes on 64-bit, 4-bytes on 32-bits + align_of_type: uintptr, // 8-bytes on 64-bit, 4-bytes on 32-bits + size_of_cell: uintptr, // 8-bytes on 64-bit, 4-bytes on 32-bits + elements_per_cell: uintptr, // 8-bytes on 64-bit, 4-bytes on 32-bits } -__get_map_entry_key_ptr :: #force_inline proc "contextless" (h: Map_Header_Table, entry: ^Map_Entry_Header) -> rawptr { - return rawptr(uintptr(entry) + h.key_offset) -} - -Map_Index :: distinct uint -MAP_SENTINEL :: ~Map_Index(0) - -Map_Find_Result :: struct { - hash_index: Map_Index, - entry_prev: Map_Index, - entry_index: Map_Index, -} - -Map_Entry_Header :: struct { - hash: uintptr, - next: Map_Index, -/* - key: Key_Value, - value: Value_Type, -*/ -} - -Map_Header_Table :: struct { - equal: Equal_Proc, - - entry_size: int, - entry_align: int, - - key_offset: uintptr, - key_size: int, - - value_offset: uintptr, - value_size: int, -} - -Map_Header :: struct { - m: ^Raw_Map, - using table: Map_Header_Table, -} - -// USED INTERNALLY BY THE COMPILER -__dynamic_map_get :: proc "contextless" (m: rawptr, table: Map_Header_Table, key_hash: uintptr, key_ptr: rawptr) -> rawptr { - if m != nil { - h := Map_Header{(^Raw_Map)(m), table} - index := __dynamic_map_find(h, key_hash, key_ptr).entry_index - if index != MAP_SENTINEL { - data := uintptr(__dynamic_map_get_entry(h, index)) - return rawptr(data + h.value_offset) - } +// Same as the above procedure but at runtime with the cell Map_Cell_Info value. +map_cell_index_dynamic :: #force_inline proc "contextless" (base: uintptr, info: ^Map_Cell_Info, index: uintptr) -> uintptr { + // Micro-optimize the case when the number of elements per cell is one or two + // to save on expensive integer division. + switch elements_per_cell := info.elements_per_cell; elements_per_cell { + case 1: + return base + (index * info.size_of_cell) + case 2: + cell_index := index >> 1 + data_index := index & 1 + return base + (cell_index * info.size_of_cell) + (data_index * info.size_of_type) + case 4: + cell_index := index >> 2 + data_index := index & 3 + return base + (cell_index * info.size_of_cell) + (data_index * info.size_of_type) + case 8: + cell_index := index >> 3 + data_index := index & 7 + return base + (cell_index * info.size_of_cell) + (data_index * info.size_of_type) + case 16: + cell_index := index >> 4 + data_index := index & 15 + return base + (cell_index * info.size_of_cell) + (data_index * info.size_of_type) + case 32: + cell_index := index >> 5 + data_index := index & 31 + return base + (cell_index * info.size_of_cell) + (data_index * info.size_of_type) + case 64: + cell_index := index >> 6 + data_index := index & 63 + return base + (cell_index * info.size_of_cell) + (data_index * info.size_of_type) + case: + cell_index := index / elements_per_cell + data_index := index % elements_per_cell + return base + (cell_index * info.size_of_cell) + (data_index * info.size_of_type) } +} + +// Same as above procedure but with compile-time constant index. +map_cell_index_dynamic_const :: proc "contextless" (base: uintptr, #no_alias info: ^Map_Cell_Info, $INDEX: uintptr) -> uintptr { + elements_per_cell := uintptr(info.elements_per_cell) + size_of_cell := uintptr(info.size_of_cell) + size_of_type := uintptr(info.size_of_type) + cell_index := INDEX / elements_per_cell + data_index := INDEX % elements_per_cell + return base + (cell_index * size_of_cell) + (data_index * size_of_type) +} + +// len() for map +map_len :: #force_inline proc "contextless" (m: Raw_Map) -> int { + return int(m.len) +} + +// cap() for map +map_cap :: #force_inline proc "contextless" (m: Raw_Map) -> int { + // The data uintptr stores the capacity in the lower six bits which gives the + // a maximum value of 2^6-1, or 63. We store the integer log2 of capacity + // since our capacity is always a power of two. We only need 63 bits as Odin + // represents length and capacity as a signed integer. + return 0 if m.data == 0 else 1 << map_log2_cap(m) +} + +// Query the load factor of the map. This is not actually configurable, but +// some math is needed to compute it. Compute it as a fixed point percentage to +// avoid floating point operations. This division can be optimized out by +// multiplying by the multiplicative inverse of 100. +map_load_factor :: #force_inline proc "contextless" (log2_capacity: uintptr) -> uintptr { + return ((uintptr(1) << log2_capacity) * MAP_LOAD_FACTOR) / 100 +} + +map_resize_threshold :: #force_inline proc "contextless" (m: Raw_Map) -> int { + return int(map_load_factor(map_log2_cap(m))) +} + +// The data stores the log2 capacity in the lower six bits. This is primarily +// used in the implementation rather than map_cap since the check for data = 0 +// isn't necessary in the implementation. cap() on the otherhand needs to work +// when called on an empty map. +map_log2_cap :: #force_inline proc "contextless" (m: Raw_Map) -> uintptr { + return m.data & (64 - 1) +} + +// Canonicalize the data by removing the tagged capacity stored in the lower six +// bits of the data uintptr. +map_data :: #force_inline proc "contextless" (m: Raw_Map) -> uintptr { + return m.data & ~uintptr(64 - 1) +} + + + +Map_Hash :: uintptr + +// __get_map_key_hash :: #force_inline proc "contextless" (k: ^$K) -> uintptr { +// hasher := intrinsics.type_hasher_proc(K) +// return hasher(k, 0) +// } + +// __get_map_entry_key_ptr :: #force_inline proc "contextless" (h: Map_Header_Table, entry: ^Map_Entry_Header) -> rawptr { +// return rawptr(uintptr(entry) + h.key_offset) +// } + + +// Procedure to check if a slot is empty for a given hash. This is represented +// by the zero value to make the zero value useful. This is a procedure just +// for prose reasons. +map_hash_is_empty :: #force_inline proc "contextless" (hash: Map_Hash) -> bool { + return hash == 0 +} + +map_hash_is_deleted :: #force_inline proc "contextless" (hash: Map_Hash) -> bool { + // The MSB indicates a tombstone + return (hash >> ((size_of(Map_Hash) * 8) - 1)) != 0 +} + +// Computes the desired position in the array. This is just index % capacity, +// but a procedure as there's some math involved here to recover the capacity. +map_desired_position :: #force_inline proc "contextless" (m: Raw_Map, hash: Map_Hash) -> uintptr { + // We do not use map_cap since we know the capacity will not be zero here. + capacity := uintptr(1) << map_log2_cap(m) + return uintptr(hash & Map_Hash(capacity - 1)) +} + +map_probe_distance :: #force_inline proc "contextless" (m: Raw_Map, hash: Map_Hash, slot: uintptr) -> uintptr { + // We do not use map_cap since we know the capacity will not be zero here. + capacity := uintptr(1) << map_log2_cap(m) + return (slot + capacity - map_desired_position(m, hash)) & (capacity - 1) +} + +// When working with the type-erased structure at runtime we need information +// about the map to make working with it possible. This info structure stores +// that. +// +// The Odin compiler should generate this for __get_map_header. +// +// 80-bytes on 64-bit +// 40-bytes on 32-bit +Map_Info :: struct { + ks: Map_Cell_Info, // 32-bytes on 64-bit, 16-bytes on 32-bit + vs: Map_Cell_Info, // 32-bytes on 64-bit, 16-bytes on 32-bit + hash: proc "contextless" (key: rawptr, seed: Map_Hash) -> Map_Hash, // 8-bytes on 64-bit, 4-bytes on 32-bit + cmp: proc "contextless" (lhs, rhs: rawptr) -> bool, // 8-bytes on 64-bit, 4-bytes on 32-bit +} + + +// The Map_Info structure is basically a pseudo-table of information for a given K and V pair. +map_info :: #force_inline proc "contextless" ($K: typeid, $V: typeid) -> ^Map_Info where intrinsics.type_is_comparable(K) { + @static INFO := Map_Info { + Map_Cell_Info { + size_of(K), + align_of(K), + size_of(Map_Cell(K)), + len(Map_Cell(K){}.data), + }, + Map_Cell_Info { + size_of(V), + align_of(V), + size_of(Map_Cell(V)), + len(Map_Cell(V){}.data), + }, + proc "contextless" (ptr: rawptr, seed: uintptr) -> Map_Hash { return intrinsics.type_hasher_proc(K)(ptr, seed) } , + proc "contextless" (a, b: rawptr) -> bool { return intrinsics.type_equal_proc(K)(a, b) }, + } + return &INFO +} + + +map_kvh_data_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info) -> (ks: uintptr, vs: uintptr, hs: [^]Map_Hash, sk: uintptr, sv: uintptr) { + @static INFO_HS := Map_Cell_Info { + size_of(Map_Hash), + align_of(Map_Hash), + size_of(Map_Cell(Map_Hash)), + len(Map_Cell(Map_Hash){}.data), + } + + capacity := uintptr(1) << map_log2_cap(m) + ks = map_data(m) + vs = map_cell_index_dynamic(ks, &info.ks, capacity) // Skip past ks to get start of vs + hs_ := map_cell_index_dynamic(vs, &info.vs, capacity) // Skip past vs to get start of hs + sk = map_cell_index_dynamic(hs_, &INFO_HS, capacity) // Skip past hs to get start of sk + // Need to skip past two elements in the scratch key space to get to the start + // of the scratch value space, of which there's only two elements as well. + sv = map_cell_index_dynamic_const(sk, &info.ks, 2) + + hs = ([^]Map_Hash)(hs_) + return +} + + +// The only procedure which needs access to the context is the one which allocates the map. +map_alloc_dynamic :: proc(info: ^Map_Info, log2_capacity: uintptr, allocator := context.allocator) -> (result: Raw_Map, err: Allocator_Error) { + if log2_capacity == 0 { + // Empty map, but set the allocator. + return { 0, 0, allocator }, nil + } + + if log2_capacity >= 64 { + // Overflowed, would be caused by log2_capacity > 64 + return {}, .Out_Of_Memory + } + + capacity := uintptr(1) << log2_capacity + + @static INFO_HS := Map_Cell_Info { + size_of(Map_Hash), + align_of(Map_Hash), + size_of(Map_Cell(Map_Hash)), + len(Map_Cell(Map_Hash){}.data), + } + + round :: #force_inline proc "contextless" (value: uintptr) -> uintptr { + return (value + MAP_CACHE_LINE_SIZE - 1) & ~uintptr(MAP_CACHE_LINE_SIZE - 1) + } + + size := uintptr(0) + size = round(map_cell_index_dynamic(size, &info.ks, capacity)) + size = round(map_cell_index_dynamic(size, &info.vs, capacity)) + size = round(map_cell_index_dynamic(size, &INFO_HS, capacity)) + + data := mem_alloc(int(size), MAP_CACHE_LINE_SIZE, allocator) or_return + data_ptr := uintptr(raw_data(data)) + + result = { + // Tagged pointer representation for capacity. + data_ptr | log2_capacity, + 0, + allocator, + } + + map_clear_dynamic(&result, info) + + return +} + +// When the type information is known we should use map_insert_hash_static for +// better performance. This procedure has to stack allocate storage to store +// local keys during the Robin Hood hashing technique where elements are swapped +// in the backing arrays to reduce variance. This swapping can only be done with +// memcpy since there is no type information. +// +// This procedure returns the address of the just inserted value. +@(optimization_mode="size") +map_insert_hash_dynamic :: proc(m: Raw_Map, info: ^Map_Info, h: Map_Hash, k, v: uintptr) -> (result: uintptr) { + info_ks := &info.ks + info_vs := &info.vs + + // Storage to exchange when reducing variance. + k_storage := intrinsics.alloca(info_ks.size_of_type, MAP_CACHE_LINE_SIZE) + v_storage := intrinsics.alloca(info_vs.size_of_type, MAP_CACHE_LINE_SIZE) + intrinsics.mem_copy_non_overlapping(rawptr(k_storage), rawptr(k), info_ks.size_of_type) + intrinsics.mem_copy_non_overlapping(rawptr(v_storage), rawptr(v), info_vs.size_of_type) + h := h + + p := map_desired_position(m, h) + d := uintptr(0) + c := (uintptr(1) << map_log2_cap(m)) - 1 // Saturating arithmetic mask + + ks, vs, hs, _, _ := map_kvh_data_dynamic(m, info) + + for { + hp := &hs[p] + element_hash := hp^ + + if map_hash_is_empty(element_hash) { + k_dst := map_cell_index_dynamic(ks, info_ks, p) + v_dst := map_cell_index_dynamic(vs, info_vs, p) + intrinsics.mem_copy_non_overlapping(rawptr(k_dst), k_storage, info_ks.size_of_type) + intrinsics.mem_copy_non_overlapping(rawptr(v_dst), v_storage, info_vs.size_of_type) + hp^ = h + return result if result != 0 else v_dst + } + + if pd := map_probe_distance(m, element_hash, p); pd < d { + if map_hash_is_deleted(element_hash) { + k_dst := map_cell_index_dynamic(ks, info_ks, p) + v_dst := map_cell_index_dynamic(vs, info_vs, p) + intrinsics.mem_copy_non_overlapping(rawptr(k_dst), k_storage, info_ks.size_of_type) + intrinsics.mem_copy_non_overlapping(rawptr(v_dst), v_storage, info_vs.size_of_type) + hp^ = h + return result if result != 0 else v_dst + } + + if result == 0 { + result = map_cell_index_dynamic(vs, info_vs, p) + } + + swap :: #force_inline proc "contextless" (lhs, rhs, size: uintptr) { + tmp := intrinsics.alloca(size, MAP_CACHE_LINE_SIZE) + intrinsics.mem_copy_non_overlapping(&tmp[0], rawptr(lhs), size) + intrinsics.mem_copy_non_overlapping(rawptr(lhs), rawptr(rhs), size) + intrinsics.mem_copy_non_overlapping(rawptr(rhs), &tmp[0], size) + } + + // Exchange to reduce variance. + swap(uintptr(k_storage), map_cell_index_dynamic(ks, info_ks, p), info_ks.size_of_type) + swap(uintptr(v_storage), map_cell_index_dynamic(vs, info_vs, p), info_vs.size_of_type) + hp^, h = h, hp^ + + d = pd + } + + p = (p + 1) & c + d += 1 + } +} + +@(optimization_mode="speed") +map_add_hash_dynamic :: proc(m: Raw_Map, #no_alias info: ^Map_Info, h: Map_Hash, ik: uintptr, iv: uintptr) { + info_ks := &info.ks + info_vs := &info.vs + + capacity := uintptr(1) << map_log2_cap(m) + p := map_desired_position(m, h) + d := uintptr(0) + c := capacity - 1 // Saturating arithmetic mask + + ks, vs, hs, sk, sv := map_kvh_data_dynamic(m, info) + + // Avoid redundant loads of these values + size_of_k := info_ks.size_of_type + size_of_v := info_vs.size_of_type + + // Use sk and sv scratch storage space for dynamic k and v storage here. + // + // Simulate the following at runtime + // k = ik + // v = iv + // h = h + k := map_cell_index_dynamic_const(sk, info_ks, 0) + v := map_cell_index_dynamic_const(sv, info_vs, 0) + intrinsics.mem_copy_non_overlapping(rawptr(k), rawptr(ik), size_of_k) + intrinsics.mem_copy_non_overlapping(rawptr(v), rawptr(iv), size_of_v) + h := h + + // Temporary k and v dynamic storage for swap below + tk := map_cell_index_dynamic_const(sk, info_ks, 1) + tv := map_cell_index_dynamic_const(sv, info_vs, 1) + + for { + hp := &hs[p] + element_hash := hp^ + + if map_hash_is_empty(element_hash) { + k_dst := map_cell_index_dynamic(ks, info_ks, p) + v_dst := map_cell_index_dynamic(vs, info_vs, p) + intrinsics.mem_copy_non_overlapping(rawptr(k_dst), rawptr(k), size_of_k) + intrinsics.mem_copy_non_overlapping(rawptr(v_dst), rawptr(v), size_of_v) + hp^ = h + return + } + + if pd := map_probe_distance(m, element_hash, p); pd < d { + if map_hash_is_deleted(element_hash) { + k_dst := map_cell_index_dynamic(ks, info_ks, p) + v_dst := map_cell_index_dynamic(vs, info_vs, p) + intrinsics.mem_copy_non_overlapping(rawptr(k_dst), rawptr(k), size_of_k) + intrinsics.mem_copy_non_overlapping(rawptr(v_dst), rawptr(v), size_of_v) + hp^ = h + return + } + + kp := map_cell_index_dynamic(ks, info_vs, p) + vp := map_cell_index_dynamic(vs, info_ks, p) + + // Simulate the following at runtime with dynamic storage + // + // kp^, k = k, kp^ + // vp^, v = v, vp^ + // hp^, h = h, hp^ + intrinsics.mem_copy_non_overlapping(rawptr(tk), rawptr(kp), size_of_k) + intrinsics.mem_copy_non_overlapping(rawptr(tv), rawptr(vp), size_of_v) + intrinsics.mem_copy_non_overlapping(rawptr(kp), rawptr(k), size_of_k) + intrinsics.mem_copy_non_overlapping(rawptr(vp), rawptr(v), size_of_v) + intrinsics.mem_copy_non_overlapping(rawptr(k), rawptr(tk), size_of_k) + intrinsics.mem_copy_non_overlapping(rawptr(v), rawptr(tv), size_of_v) + hp^, h = h, hp^ + + d = pd + } + + p = (p + 1) & c + d += 1 + } +} + +@(optimization_mode="size") +map_grow_dynamic :: proc(#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info) -> Allocator_Error { + allocator := m.allocator + + log2_capacity := map_log2_cap(m^) + + if m.data == 0 { + n := map_alloc_dynamic(info, MAP_MIN_LOG2_CAPACITY, allocator) or_return + m.data = n.data + return nil + } + + resized := map_alloc_dynamic(info, log2_capacity + 1, allocator) or_return + + capacity := uintptr(1) << log2_capacity + + ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info) + + // Cache these loads to avoid hitting them in the for loop. + info_ks := &info.ks + info_vs := &info.vs + + n := map_len(m^) + for i := uintptr(0); i < capacity; i += 1 { + hash := hs[i] + if map_hash_is_empty(hash) do continue + if map_hash_is_deleted(hash) do continue + k := map_cell_index_dynamic(ks, info_ks, i) + v := map_cell_index_dynamic(vs, info_vs, i) + map_insert_hash_dynamic(resized, info, hash, k, v) + // Only need to do this comparison on each actually added pair, so do not + // fold it into the for loop comparator as a micro-optimization. + n -= 1 + if n == 0 do break + } + + mem_free(rawptr(ks), allocator) + + m.data = resized.data // Should copy the capacity too + return nil } -// USED INTERNALLY BY THE COMPILER -__dynamic_map_set :: proc "odin" (m: rawptr, table: Map_Header_Table, key_hash: uintptr, key_ptr: rawptr, value: rawptr, loc := #caller_location) -> ^Map_Entry_Header #no_bounds_check { - add_entry :: proc "odin" (h: Map_Header, key_hash: uintptr, key_ptr: rawptr, loc := #caller_location) -> Map_Index { - prev := Map_Index(h.m.entries.len) - c := Map_Index(__dynamic_array_append_nothing(&h.m.entries, h.entry_size, h.entry_align, loc)) - if c != prev { - end := __dynamic_map_get_entry(h, c-1) - end.hash = key_hash - mem_copy(rawptr(uintptr(end) + h.key_offset), key_ptr, h.key_size) - end.next = MAP_SENTINEL - } - return prev + +@(optimization_mode="size") +map_reserve_dynamic :: proc(#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uintptr) -> Allocator_Error { + allocator := m.allocator + + log2_capacity := map_log2_cap(m^) + capacity := uintptr(1) << log2_capacity + + if capacity >= new_capacity { + return nil + } + // ceiling nearest power of two + log2_new_capacity := size_of(uintptr) - intrinsics.count_leading_zeros(new_capacity-1) + + if m.data == 0 { + n := map_alloc_dynamic(info, MAP_MIN_LOG2_CAPACITY, allocator) or_return + m.data = n.data + return nil } - h := Map_Header{(^Raw_Map)(m), table} + resized := map_alloc_dynamic(info, log2_new_capacity, allocator) or_return - index := MAP_SENTINEL - if len(h.m.hashes) == 0 { - __dynamic_map_reserve(m, table, INITIAL_MAP_CAP, loc) - __dynamic_map_grow(h, loc) + ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info) + + // Cache these loads to avoid hitting them in the for loop. + info_ks := &info.ks + info_vs := &info.vs + + n := map_len(m^) + for i := uintptr(0); i < capacity; i += 1 { + hash := hs[i] + if map_hash_is_empty(hash) do continue + if map_hash_is_deleted(hash) do continue + k := map_cell_index_dynamic(ks, info_ks, i) + v := map_cell_index_dynamic(vs, info_vs, i) + map_insert_hash_dynamic(resized, info, hash, k, v) + // Only need to do this comparison on each actually added pair, so do not + // fold it into the for loop comparator as a micro-optimization. + n -= 1 + if n == 0 do break } - fr := __dynamic_map_find(h, key_hash, key_ptr) - if fr.entry_index != MAP_SENTINEL { - index = fr.entry_index - } else { - index = add_entry(h, key_hash, key_ptr, loc) - if fr.entry_prev != MAP_SENTINEL { - entry := __dynamic_map_get_entry(h, fr.entry_prev) - entry.next = index - } else if fr.hash_index != MAP_SENTINEL { - h.m.hashes[fr.hash_index] = index - } else { - return nil - } - } + mem_free(rawptr(ks), allocator) - e := __dynamic_map_get_entry(h, index) - e.hash = key_hash + m.data = resized.data // Should copy the capacity too - key := rawptr(uintptr(e) + h.key_offset) - val := rawptr(uintptr(e) + h.value_offset) - - mem_copy(key, key_ptr, h.key_size) - mem_copy(val, value, h.value_size) - - if __dynamic_map_full(h) { - __dynamic_map_grow(h, loc) - } - - return __dynamic_map_get_entry(h, index) + return nil } -// USED INTERNALLY BY THE COMPILER -__dynamic_map_reserve :: proc "odin" (m: rawptr, table: Map_Header_Table, cap: uint, loc := #caller_location) { - h := Map_Header{(^Raw_Map)(m), table} - c := context - if h.m.entries.allocator.procedure != nil { - c.allocator = h.m.entries.allocator +@(optimization_mode="size") +map_shrink_dynamic :: proc(#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info) -> Allocator_Error { + allocator := m.allocator + + // Cannot shrink the capacity if the number of items in the map would exceed + // one minus the current log2 capacity's resize threshold. That is the shrunk + // map needs to be within the max load factor. + log2_capacity := map_log2_cap(m^) + if m.len >= map_load_factor(log2_capacity - 1) do return nil + + shrinked := map_alloc_dynamic(info, log2_capacity - 1, allocator) or_return + + capacity := uintptr(1) << log2_capacity + + ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info) + + info_ks := &info.ks + info_vs := &info.vs + + n := map_len(m^) + for i := uintptr(0); i < capacity; i += 1 { + hash := hs[i] + if map_hash_is_empty(hash) do continue + if map_hash_is_deleted(hash) do continue + + k := map_cell_index_dynamic(ks, info_ks, i) + v := map_cell_index_dynamic(vs, info_vs, i) + + map_insert_hash_dynamic(shrinked, info, hash, k, v) + + // Only need to do this comparison on each actually added pair, so do not + // fold it into the for loop comparator as a micro-optimization. + n -= 1 + if n == 0 do break } - context = c - cap := cap - cap = ceil_to_pow2(cap) + free(rawptr(ks), allocator) - __dynamic_array_reserve(&h.m.entries, h.entry_size, h.entry_align, int(cap), loc) + m.data = shrinked.data // Should copy the capacity too - if h.m.entries.len*2 < len(h.m.hashes) { - return - } - if __slice_resize(&h.m.hashes, int(cap*2), h.m.entries.allocator, loc) { - __dynamic_map_reset_entries(h, loc) + return nil +} + +// Single procedure for static and dynamic paths. +@(require_results) +map_free :: proc(m: Raw_Map, loc := #caller_location) -> Allocator_Error { + return mem_free(rawptr(map_data(m)), m.allocator, loc) +} + +@(optimization_mode="speed") +map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (index: uintptr, ok: bool) { + if map_len(m) == 0 do return 0, false + h := info.hash(rawptr(k), 0) + p := map_desired_position(m, h) + d := uintptr(0) + c := (uintptr(1) << map_log2_cap(m)) - 1 + ks, _, hs, _, _ := map_kvh_data_dynamic(m, info) + info_ks := &info.ks + for { + element_hash := hs[p] + if map_hash_is_empty(element_hash) { + return 0, false + } else if d > map_probe_distance(m, element_hash, p) { + return 0, false + } else if element_hash == h && info.cmp(rawptr(k), rawptr(map_cell_index_dynamic(ks, info_ks, p))) { + return p, true + } + p = (p + 1) & c + d += 1 } } + + + +@(optimization_mode="speed") +map_insert_dynamic :: proc(#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k, v: uintptr) -> (value: uintptr, err: Allocator_Error) { + if map_len(m^) + 1 >= map_resize_threshold(m^) { + map_grow_dynamic(m, info) or_return + } + hashed := info.hash(rawptr(k), 0) + result := map_insert_hash_dynamic(m^, info, hashed, k, v) + m.len += 1 + return result, nil +} + +// Same as map_insert_dynamic but does not return address to the inserted element. +@(optimization_mode="speed") +map_add_dynamic :: proc(#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k, v: uintptr) -> Allocator_Error { + if map_len(m^) + 1 >= map_resize_threshold(m^) { + map_grow_dynamic(m, info) or_return + } + map_add_hash_dynamic(m^, info, info.hash(rawptr(k), 0), k, v) + m.len += 1 + return nil +} + +map_erase_dynamic :: #force_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> bool { + when size_of(Map_Hash) == 4 do MASK :: Map_Hash(0x8000_0000) + when size_of(Map_Hash) == 8 do MASK :: Map_Hash(0x8000_0000_0000_0000) + when size_of(Map_Hash) == 16 do MASK :: Map_Hash(0x8000_0000_0000_0000_0000_0000_0000_0000) + index := map_lookup_dynamic(m^, info, k) or_return + _, _, hs, _, _ := map_kvh_data_dynamic(m^, info) + hs[index] |= MASK + m.len -= 1 + return true +} + +map_clear_dynamic :: #force_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info) { + if m.data == 0 do return + _, _, hs, _, _ := map_kvh_data_dynamic(m^, info) + intrinsics.mem_zero(rawptr(hs), map_cap(m^) * size_of(Map_Hash)) + m.len = 0 +} + + +// TODO(bill): Change signature to not be a `rawptr` +__dynamic_map_get :: proc "contextless" (m: rawptr, #no_alias info: ^Map_Info, key: rawptr) -> rawptr { + rm := (^Raw_Map)(m)^ + index, ok := map_lookup_dynamic(rm, info, uintptr(key)) + if !ok { + return nil + } + _ = index + // TODO(bill) + return nil +} + +__dynamic_map_set :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, key, value: rawptr, loc := #caller_location) -> rawptr { + // value, _ := map_insert_dynamic(m, info, uintptr(key), uintptr(value)) + // return rawptr(value) + // TODO(bill) + return nil +} + +__dynamic_map_reserve :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uint, loc := #caller_location) { + map_reserve_dynamic(m, info, uintptr(new_capacity)) +} + + + + INITIAL_HASH_SEED :: 0xcbf29ce484222325 _fnv64a :: proc "contextless" (data: []byte, seed: u64 = INITIAL_HASH_SEED) -> u64 { @@ -154,7 +731,7 @@ _fnv64a :: proc "contextless" (data: []byte, seed: u64 = INITIAL_HASH_SEED) -> u for b in data { h = (h ~ u64(b)) * 0x100000001b3 } - return h + return h | u64(h == 0) } default_hash :: #force_inline proc "contextless" (data: []byte) -> uintptr { @@ -177,7 +754,7 @@ _default_hasher_const :: #force_inline proc "contextless" (data: rawptr, seed: u h = (h ~ b) * 0x100000001b3 p += 1 } - return uintptr(h) + return uintptr(h) | uintptr(h == 0) } default_hasher_n :: #force_inline proc "contextless" (data: rawptr, seed: uintptr, N: int) -> uintptr { @@ -188,7 +765,7 @@ default_hasher_n :: #force_inline proc "contextless" (data: rawptr, seed: uintpt h = (h ~ b) * 0x100000001b3 p += 1 } - return uintptr(h) + return uintptr(h) | uintptr(h == 0) } // NOTE(bill): There are loads of predefined ones to improve optimizations for small types @@ -216,7 +793,7 @@ default_hasher_string :: proc "contextless" (data: rawptr, seed: uintptr) -> uin for b in str { h = (h ~ u64(b)) * 0x100000001b3 } - return uintptr(h) + return uintptr(h) | uintptr(h == 0) } default_hasher_cstring :: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr { h := u64(seed) + 0xcbf29ce484222325 @@ -226,203 +803,5 @@ default_hasher_cstring :: proc "contextless" (data: rawptr, seed: uintptr) -> ui h = (h ~ u64(b)) * 0x100000001b3 ptr += 1 } - return uintptr(h) -} - - -__get_map_header :: proc "contextless" (m: ^$T/map[$K]$V) -> (header: Map_Header) { - header.m = (^Raw_Map)(m) - header.table = #force_inline __get_map_header_table(T) - return -} - -__get_map_header_runtime :: proc "contextless" (m: ^Raw_Map, ti: Type_Info_Map) -> (header: Map_Header) { - header.m = m - header.table = #force_inline __get_map_header_table_runtime(ti) - return -} - -__get_map_header_table :: proc "contextless" ($T: typeid/map[$K]$V) -> (header: Map_Header_Table) { - Entry :: struct { - hash: uintptr, - next: Map_Index, - key: K, - value: V, - } - - header.equal = intrinsics.type_equal_proc(K) - - header.entry_size = size_of(Entry) - header.entry_align = align_of(Entry) - - header.key_offset = offset_of(Entry, key) - header.key_size = size_of(K) - - header.value_offset = offset_of(Entry, value) - header.value_size = size_of(V) - - return -} - -__get_map_header_table_runtime :: proc "contextless" (ti: Type_Info_Map) -> (header: Map_Header) { - header.equal = ti.key_equal - - entries := ti.generated_struct.variant.(Type_Info_Struct).types[1] - entry := entries.variant.(Type_Info_Dynamic_Array).elem - e := entry.variant.(Type_Info_Struct) - - header.entry_size = entry.size - header.entry_align = entry.align - - header.key_offset = e.offsets[2] - header.key_size = e.types[2].size - - header.value_offset = e.offsets[3] - header.value_size = e.types[3].size - - return -} - - - -__slice_resize :: proc "odin" (array_: ^$T/[]$E, new_count: int, allocator: Allocator, loc := #caller_location) -> bool { - array := (^Raw_Slice)(array_) - - if new_count < array.len { - return true - } - - old_size := array.len*size_of(T) - new_size := new_count*size_of(T) - - new_data, err := mem_resize(array.data, old_size, new_size, align_of(T), allocator, loc) - if err != nil { - return false - } - if new_data != nil || size_of(E) == 0 { - array.data = raw_data(new_data) - array.len = new_count - return true - } - return false -} - -__dynamic_map_reset_entries :: proc "contextless" (h: Map_Header, loc := #caller_location) { - for i in 0.. (did_shrink: bool) { - c := context - if h.m.entries.allocator.procedure != nil { - c.allocator = h.m.entries.allocator - } - context = c - - return __dynamic_array_shrink(&h.m.entries, h.entry_size, h.entry_align, cap, loc) -} - - -@(private="file") -ceil_to_pow2 :: proc "contextless" (n: uint) -> uint { - if n <= 2 { - return n - } - n := n - n -= 1 - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - when size_of(int) == 8 { - n |= n >> 32 - } - n += 1 - return n -} - -__dynamic_map_grow :: proc "odin" (h: Map_Header, loc := #caller_location) { - new_count := max(uint(h.m.entries.cap) * 2, INITIAL_MAP_CAP) - // Rehash through Reserve - __dynamic_map_reserve(h.m, h.table, new_count, loc) -} - -__dynamic_map_full :: #force_inline proc "contextless" (h: Map_Header) -> bool { - return int(0.75 * f64(len(h.m.hashes))) <= h.m.entries.len -} - -__dynamic_map_find_from_entry :: proc "contextless" (h: Map_Header, e: ^Map_Entry_Header) -> Map_Find_Result #no_bounds_check { - key_ptr := __get_map_entry_key_ptr(h, e) - return __dynamic_map_find(h, e.hash, key_ptr) - -} - -__dynamic_map_find :: proc "contextless" (h: Map_Header, key_hash: uintptr, key_ptr: rawptr) -> Map_Find_Result #no_bounds_check { - fr := Map_Find_Result{MAP_SENTINEL, MAP_SENTINEL, MAP_SENTINEL} - if n := uintptr(len(h.m.hashes)); n != 0 { - fr.hash_index = Map_Index(key_hash & (n-1)) - fr.entry_index = h.m.hashes[fr.hash_index] - for fr.entry_index != MAP_SENTINEL { - entry := __dynamic_map_get_entry(h, fr.entry_index) - entry_key_ptr := __get_map_entry_key_ptr(h, entry) - if entry.hash == key_hash && h.equal(entry_key_ptr, key_ptr) { - return fr - } - - fr.entry_prev = fr.entry_index - fr.entry_index = entry.next - } - } - return fr -} - -// Utility procedure used by other runtime procedures -__map_find :: proc "contextless" (h: Map_Header, key_ptr: ^$K) -> Map_Find_Result #no_bounds_check { - hash := __get_map_key_hash(key_ptr) - return #force_inline __dynamic_map_find(h, hash, key_ptr) -} - -__dynamic_map_get_entry :: #force_inline proc "contextless" (h: Map_Header, index: Map_Index) -> ^Map_Entry_Header { - return (^Map_Entry_Header)(uintptr(h.m.entries.data) + uintptr(index*Map_Index(h.entry_size))) -} - -__dynamic_map_erase :: proc "contextless" (h: Map_Header, fr: Map_Find_Result) #no_bounds_check { - if fr.entry_prev != MAP_SENTINEL { - prev := __dynamic_map_get_entry(h, fr.entry_prev) - curr := __dynamic_map_get_entry(h, fr.entry_index) - prev.next = curr.next - } else { - h.m.hashes[fr.hash_index] = __dynamic_map_get_entry(h, fr.entry_index).next - } - last_index := Map_Index(h.m.entries.len-1) - if fr.entry_index != last_index { - old := __dynamic_map_get_entry(h, fr.entry_index) - end := __dynamic_map_get_entry(h, last_index) - mem_copy(old, end, h.entry_size) - - last := __dynamic_map_find_from_entry(h, old) - if last.entry_prev != MAP_SENTINEL { - e := __dynamic_map_get_entry(h, last.entry_prev) - e.next = fr.entry_index - } else { - h.m.hashes[last.hash_index] = fr.entry_index - } - } - - h.m.entries.len -= 1 + return uintptr(h) | uintptr(h == 0) } diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 9e48fd8ad..043b98173 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -1364,7 +1364,6 @@ bool is_polymorphic_type_assignable(CheckerContext *c, Type *poly, Type *source, bool key = is_polymorphic_type_assignable(c, poly->Map.key, source->Map.key, true, modify_type); bool value = is_polymorphic_type_assignable(c, poly->Map.value, source->Map.value, true, modify_type); if (key || value) { - poly->Map.entry_type = nullptr; poly->Map.internal_type = nullptr; poly->Map.lookup_result_type = nullptr; init_map_internal_types(poly); diff --git a/src/check_type.cpp b/src/check_type.cpp index 2ffe04342..ea1c90a66 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -2176,40 +2176,9 @@ Type *make_optional_ok_type(Type *value, bool typed) { return t; } -void init_map_entry_type(Type *type) { - GB_ASSERT(type->kind == Type_Map); - if (type->Map.entry_type != nullptr) return; - - // NOTE(bill): The preload types may have not been set yet - GB_ASSERT(t_map_hash != nullptr); - - /* - struct { - hash: uintptr, - next: int, - key: Key, - value: Value, - } - */ - Scope *s = create_scope(nullptr, builtin_pkg->scope); - - auto fields = slice_make(permanent_allocator(), 4); - fields[0] = alloc_entity_field(s, make_token_ident(str_lit("hash")), t_uintptr, false, 0, EntityState_Resolved); - fields[1] = alloc_entity_field(s, make_token_ident(str_lit("next")), t_int, false, 1, EntityState_Resolved); - fields[2] = alloc_entity_field(s, make_token_ident(str_lit("key")), type->Map.key, false, 2, EntityState_Resolved); - fields[3] = alloc_entity_field(s, make_token_ident(str_lit("value")), type->Map.value, false, 3, EntityState_Resolved); - - Type *entry_type = alloc_type_struct(); - entry_type->Struct.fields = fields; - entry_type->Struct.tags = gb_alloc_array(permanent_allocator(), String, fields.count); - - type_set_offsets(entry_type); - type->Map.entry_type = entry_type; -} - void init_map_internal_types(Type *type) { GB_ASSERT(type->kind == Type_Map); - init_map_entry_type(type); + GB_ASSERT(t_allocator != nullptr); if (type->Map.internal_type != nullptr) return; Type *key = type->Map.key; @@ -2221,19 +2190,17 @@ void init_map_internal_types(Type *type) { /* struct { - hashes: []int; - entries: [dynamic]EntryType; + data: uintptr, + size: uintptr, + allocator: runtime.Allocator, } */ Scope *s = create_scope(nullptr, builtin_pkg->scope); - Type *hashes_type = alloc_type_slice(t_int); - Type *entries_type = alloc_type_dynamic_array(type->Map.entry_type); - - - auto fields = slice_make(permanent_allocator(), 2); - fields[0] = alloc_entity_field(s, make_token_ident(str_lit("hashes")), hashes_type, false, 0, EntityState_Resolved); - fields[1] = alloc_entity_field(s, make_token_ident(str_lit("entries")), entries_type, false, 1, EntityState_Resolved); + auto fields = slice_make(permanent_allocator(), 3); + fields[0] = alloc_entity_field(s, make_token_ident(str_lit("data")), t_uintptr, false, 0, EntityState_Resolved); + fields[1] = alloc_entity_field(s, make_token_ident(str_lit("size")), t_uintptr, false, 1, EntityState_Resolved); + fields[2] = alloc_entity_field(s, make_token_ident(str_lit("allocator")), t_allocator, false, 2, EntityState_Resolved); generated_struct_type->Struct.fields = fields; type_set_offsets(generated_struct_type); diff --git a/src/checker.cpp b/src/checker.cpp index dd81e2a48..d5d2c6026 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -2842,12 +2842,12 @@ void init_core_source_code_location(Checker *c) { } void init_core_map_type(Checker *c) { - if (t_map_hash != nullptr) { + if (t_map_info != nullptr) { return; } - t_map_hash = find_core_type(c, str_lit("Map_Hash")); - t_map_header = find_core_type(c, str_lit("Map_Header")); - t_map_header_table = find_core_type(c, str_lit("Map_Header_Table")); + t_map_info = find_core_type(c, str_lit("Map_Info")); + t_map_cell_info = find_core_type(c, str_lit("Map_Cell_Info")); + init_mem_allocator(c); } void init_preload(Checker *c) { diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 1d2c00700..40b861d40 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -298,7 +298,7 @@ lbValue lb_simple_compare_hash(lbProcedure *p, Type *type, lbValue data, lbValue lbValue lb_get_hasher_proc_for_type(lbModule *m, Type *type) { type = core_type(type); - GB_ASSERT(is_type_valid_for_keys(type)); + GB_ASSERT_MSG(is_type_valid_for_keys(type), "%s", type_to_string(type)); Type *pt = alloc_type_pointer(type); @@ -500,51 +500,67 @@ lbValue lb_generate_anonymous_proc_lit(lbModule *m, String const &prefix_name, A return value; } -lbValue lb_gen_map_header_table_internal(lbProcedure *p, Type *map_type) { +// IMPORTANT NOTE(bill): This must match the definition in dynamic_map_internal.odin +enum : i64 { + MAP_CACHE_LINE_LOG2 = 6, + MAP_CACHE_LINE_SIZE = 1 << MAP_CACHE_LINE_LOG2 +}; +GB_STATIC_ASSERT(MAP_CACHE_LINE_SIZE >= 64); +void lb_map_cell_size_and_len(Type *type, i64 *size_, i64 *len_) { + i64 elem_sz = type_size_of(type); + + i64 len = 1; + if (0 < elem_sz && elem_sz < MAP_CACHE_LINE_SIZE) { + len = MAP_CACHE_LINE_SIZE / elem_sz; + } + i64 size = align_formula(elem_sz * len, MAP_CACHE_LINE_SIZE); + if (size_) *size_ = size; + if (len_) *len_ = len; +} + +LLVMValueRef lb_gen_map_cell_info(lbModule *m, Type *type) { + i64 size = 0, len = 0; + lb_map_cell_size_and_len(type, &size, &len); + + LLVMValueRef const_values[4] = {}; + const_values[0] = lb_const_int(m, t_uintptr, type_size_of(type)).value; + const_values[1] = lb_const_int(m, t_uintptr, type_align_of(type)).value; + const_values[2] = lb_const_int(m, t_uintptr, size).value; + const_values[3] = lb_const_int(m, t_uintptr, len).value; + return llvm_const_named_struct(m, t_map_cell_info, const_values, gb_count_of(const_values)); + +} +lbValue lb_gen_map_info_ptr(lbProcedure *p, Type *map_type) { lbModule *m = p->module; map_type = base_type(map_type); GB_ASSERT(map_type->kind == Type_Map); - lbAddr *found = map_get(&m->map_header_table_map, map_type); + lbAddr *found = map_get(&m->map_info_map, map_type); if (found) { - return lb_addr_load(p, *found); + return lb_addr_get_ptr(p, *found); } - GB_ASSERT(map_type->Map.entry_type->kind == Type_Struct); - i64 entry_size = type_size_of (map_type->Map.entry_type); - i64 entry_align = type_align_of (map_type->Map.entry_type); + GB_ASSERT(t_map_info != nullptr); + GB_ASSERT(t_map_cell_info != nullptr); - i64 key_offset = type_offset_of(map_type->Map.entry_type, 2); - i64 key_size = type_size_of (map_type->Map.key); + LLVMValueRef key_cell_info = lb_gen_map_cell_info(m, map_type->Map.key); + LLVMValueRef value_cell_info = lb_gen_map_cell_info(m, map_type->Map.value); - i64 value_offset = type_offset_of(map_type->Map.entry_type, 3); - i64 value_size = type_size_of (map_type->Map.value); + LLVMValueRef const_values[4] = {}; + const_values[0] = key_cell_info; + const_values[1] = value_cell_info; + const_values[2] = lb_get_hasher_proc_for_type(m, map_type->Map.key).value; + const_values[3] = lb_get_equal_proc_for_type(m, map_type->Map.key).value; - Type *key_type = map_type->Map.key; - Type *val_type = map_type->Map.value; - gb_unused(val_type); + LLVMValueRef llvm_res = llvm_const_named_struct(m, t_map_info, const_values, gb_count_of(const_values)); + lbValue res = {llvm_res, t_map_info}; - Type *st = base_type(t_map_header_table); - GB_ASSERT(st->Struct.fields.count == 7); - - LLVMValueRef const_values[7] = {}; - const_values[0] = lb_get_equal_proc_for_type(m, key_type) .value; - const_values[1] = lb_const_int(m, t_int, entry_size) .value; - const_values[2] = lb_const_int(m, t_int, entry_align) .value; - const_values[3] = lb_const_int(m, t_uintptr, key_offset) .value; - const_values[4] = lb_const_int(m, t_int, key_size) .value; - const_values[5] = lb_const_int(m, t_uintptr, value_offset).value; - const_values[6] = lb_const_int(m, t_int, value_size) .value; - - LLVMValueRef llvm_res = llvm_const_named_struct(m, t_map_header_table, const_values, gb_count_of(const_values)); - lbValue res = {llvm_res, t_map_header_table}; - - lbAddr addr = lb_add_global_generated(m, t_map_header_table, res, nullptr); + lbAddr addr = lb_add_global_generated(m, t_map_info, res, nullptr); lb_make_global_private_const(addr); - map_set(&m->map_header_table_map, map_type, addr); - return lb_addr_load(p, addr); + map_set(&m->map_info_map, map_type, addr); + return lb_addr_get_ptr(p, addr); } lbValue lb_const_hash(lbModule *m, lbValue key, Type *key_type) { @@ -616,12 +632,13 @@ lbValue lb_gen_map_key_hash(lbProcedure *p, lbValue key, Type *key_type, lbValue lbValue lb_internal_dynamic_map_get_ptr(lbProcedure *p, lbValue const &map_ptr, lbValue const &key) { Type *map_type = base_type(type_deref(map_ptr.type)); - lbValue key_ptr = {}; - auto args = array_make(permanent_allocator(), 4); + lbValue key_ptr = lb_address_from_load_or_generate_local(p, key); + key_ptr = lb_emit_conv(p, key_ptr, t_rawptr); + + auto args = array_make(permanent_allocator(), 3); args[0] = lb_emit_conv(p, map_ptr, t_rawptr); - args[1] = lb_gen_map_header_table_internal(p, map_type); - args[2] = lb_gen_map_key_hash(p, key, map_type->Map.key, &key_ptr); - args[3] = key_ptr; + args[1] = lb_gen_map_info_ptr(p, map_type); + args[2] = key_ptr; lbValue ptr = lb_emit_runtime_call(p, "__dynamic_map_get", args); @@ -633,20 +650,19 @@ void lb_insert_dynamic_map_key_and_value(lbProcedure *p, lbValue const &map_ptr, map_type = base_type(map_type); GB_ASSERT(map_type->kind == Type_Map); - lbValue key_ptr = {}; - lbValue key_hash = lb_gen_map_key_hash(p, map_key, map_type->Map.key, &key_ptr); + lbValue key_ptr = lb_address_from_load_or_generate_local(p, map_key); + key_ptr = lb_emit_conv(p, key_ptr, t_rawptr); lbValue v = lb_emit_conv(p, map_value, map_type->Map.value); lbAddr value_addr = lb_add_local_generated(p, v.type, false); lb_addr_store(p, value_addr, v); - auto args = array_make(permanent_allocator(), 6); + auto args = array_make(permanent_allocator(), 5); args[0] = lb_emit_conv(p, map_ptr, t_rawptr); - args[1] = lb_gen_map_header_table_internal(p, map_type); - args[2] = key_hash; - args[3] = key_ptr; - args[4] = lb_emit_conv(p, value_addr.addr, t_rawptr); - args[5] = lb_emit_source_code_location_as_global(p, node); + args[1] = lb_gen_map_info_ptr(p, map_type); + args[2] = key_ptr; + args[3] = lb_emit_conv(p, value_addr.addr, t_rawptr); + args[4] = lb_emit_source_code_location_as_global(p, node); lb_emit_runtime_call(p, "__dynamic_map_set", args); } @@ -660,8 +676,8 @@ void lb_dynamic_map_reserve(lbProcedure *p, lbValue const &map_ptr, isize const auto args = array_make(permanent_allocator(), 4); args[0] = lb_emit_conv(p, map_ptr, t_rawptr); - args[1] = lb_gen_map_header_table_internal(p, type_deref(map_ptr.type)); - args[2] = lb_const_int(p->module, t_int, capacity); + args[1] = lb_gen_map_info_ptr(p, type_deref(map_ptr.type)); + args[2] = lb_const_int(p->module, t_uint, capacity); args[3] = lb_emit_source_code_location_as_global(p, proc_name, pos); lb_emit_runtime_call(p, "__dynamic_map_reserve", args); } diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index a8ff1571c..b797f28f9 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -160,7 +160,7 @@ struct lbModule { StringMap objc_classes; StringMap objc_selectors; - PtrMap map_header_table_map; + PtrMap map_info_map; }; struct lbGenerator { diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 87f8afa05..69b1fce20 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -75,7 +75,7 @@ void lb_init_module(lbModule *m, Checker *c) { string_map_init(&m->objc_classes, a); string_map_init(&m->objc_selectors, a); - map_init(&m->map_header_table_map, a, 0); + map_init(&m->map_info_map, a, 0); } @@ -1939,27 +1939,13 @@ LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { defer (m->internal_type_level += 1); unsigned field_count = cast(unsigned)(internal_type->Struct.fields.count); - GB_ASSERT(field_count == 2); - LLVMTypeRef *fields = gb_alloc_array(temporary_allocator(), LLVMTypeRef, field_count); + GB_ASSERT(field_count == 3); - LLVMTypeRef entries_fields[] = { - lb_type(m, t_rawptr), // data - lb_type(m, t_int), // len - lb_type(m, t_int), // cap + LLVMTypeRef fields[3] = { + lb_type(m, t_uintptr), // data + lb_type(m, t_uintptr), // len lb_type(m, t_allocator), // allocator }; - - fields[0] = lb_type(m, internal_type->Struct.fields[0]->type); - fields[1] = LLVMStructTypeInContext(ctx, entries_fields, gb_count_of(entries_fields), false); - - { // Add this to simplify things - lbStructFieldRemapping entries_field_remapping = {}; - slice_init(&entries_field_remapping, permanent_allocator(), gb_count_of(entries_fields)); - for_array(i, entries_field_remapping) { - entries_field_remapping[i] = cast(i32)i; - } - map_set(&m->struct_field_remapping, cast(void *)fields[1], entries_field_remapping); - } return LLVMStructTypeInContext(ctx, fields, field_count, false); } diff --git a/src/types.cpp b/src/types.cpp index b9f2b375f..220d1a6ab 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -226,7 +226,6 @@ struct TypeProc { TYPE_KIND(Map, struct { \ Type *key; \ Type *value; \ - Type *entry_type; \ Type *internal_type; \ Type *lookup_result_type; \ }) \ @@ -685,9 +684,8 @@ gb_global Type *t_allocator_error = nullptr; gb_global Type *t_source_code_location = nullptr; gb_global Type *t_source_code_location_ptr = nullptr; -gb_global Type *t_map_hash = nullptr; -gb_global Type *t_map_header = nullptr; -gb_global Type *t_map_header_table = nullptr; +gb_global Type *t_map_info = nullptr; +gb_global Type *t_map_cell_info = nullptr; gb_global Type *t_equal_proc = nullptr; @@ -3330,8 +3328,6 @@ Selection lookup_field_with_selection(Type *type_, String field_name, bool is_ty } } } else if (type->kind == Type_DynamicArray) { - // IMPORTANT TODO(bill): Should these members be available to should I only allow them with - // `Raw_Dynamic_Array` type? GB_ASSERT(t_allocator != nullptr); String allocator_str = str_lit("allocator"); gb_local_persist Entity *entity__allocator = alloc_entity_field(nullptr, make_token_ident(allocator_str), t_allocator, false, 3); @@ -3342,15 +3338,12 @@ Selection lookup_field_with_selection(Type *type_, String field_name, bool is_ty return sel; } } else if (type->kind == Type_Map) { - // IMPORTANT TODO(bill): Should these members be available to should I only allow them with - // `Raw_Map` type? GB_ASSERT(t_allocator != nullptr); String allocator_str = str_lit("allocator"); - gb_local_persist Entity *entity__allocator = alloc_entity_field(nullptr, make_token_ident(allocator_str), t_allocator, false, 3); + gb_local_persist Entity *entity__allocator = alloc_entity_field(nullptr, make_token_ident(allocator_str), t_allocator, false, 2); if (field_name == allocator_str) { - selection_add_index(&sel, 1); - selection_add_index(&sel, 3); + selection_add_index(&sel, 2); sel.entity = entity__allocator; return sel; } @@ -3795,11 +3788,12 @@ i64 type_size_of_internal(Type *t, TypePath *path) { case Type_Map: /* struct { - hashes: []int, // 2 words - entries: [dynamic]Entry_Type, // 5 words + data: uintptr, // 1 word + size: uintptr, // 1 word + allocator: runtime.Allocator, // 2 words } */ - return (2 + (3 + 2))*build_context.word_size; + return (1 + 1 + 2)*build_context.word_size; case Type_Tuple: { i64 count, align, size;