Change typeid definition to be based around the canonical type hash

`typeid` used to be a fancy index with extra metadata stored on it. Now it is direct hash of the type.

This is safe to do in practice since any possible collisions are checked at compile time AND the chances of having a 1% collision are around 1 in 600K (see the Birthday Paradox).

Therefore accessing a `^Type_Info` is now a hash table lookup with linear probing. The table is twice the size than necessary so prevent too much probing due to an overly dense hash table.
This commit is contained in:
gingerBill
2025-02-20 14:10:45 +00:00
parent c25ac939d4
commit 5489a88983
7 changed files with 65 additions and 112 deletions

View File

@@ -239,47 +239,6 @@ Type_Info :: struct {
},
}
// NOTE(bill): This must match the compiler's
Typeid_Kind :: enum u8 {
Invalid,
Integer,
Rune,
Float,
Complex,
Quaternion,
String,
Boolean,
Any,
Type_Id,
Pointer,
Multi_Pointer,
Procedure,
Array,
Enumerated_Array,
Dynamic_Array,
Slice,
Tuple,
Struct,
Union,
Enum,
Map,
Bit_Set,
Simd_Vector,
Matrix,
Soa_Pointer,
Bit_Field,
}
#assert(len(Typeid_Kind) < 32)
Typeid_Bit_Field :: bit_field uintptr {
index: uintptr | 8*size_of(uintptr) - 8,
kind: Typeid_Kind | 5, // Typeid_Kind
named: bool | 1,
special: bool | 1, // signed, cstring, etc
reserved: bool | 1,
}
#assert(size_of(Typeid_Bit_Field) == size_of(uintptr))
// NOTE(bill): only the ones that are needed (not all types)
// This will be set by the compiler
type_table: []^Type_Info
@@ -686,13 +645,16 @@ type_info_core :: proc "contextless" (info: ^Type_Info) -> ^Type_Info {
type_info_base_without_enum :: type_info_core
__type_info_of :: proc "contextless" (id: typeid) -> ^Type_Info #no_bounds_check {
MASK :: 1<<(8*size_of(typeid) - 8) - 1
data := transmute(uintptr)id
n := int(data & MASK)
if n < 0 || n >= len(type_table) {
n = 0
n := u64(len(type_table))
i := transmute(u64)id % n
for k in 0..<n {
ptr := type_table[i]
if ptr != nil && ptr.id == id {
return ptr
}
i = i+1 if i+1 < n else 0
}
return type_table[n]
return type_table[0]
}
when !ODIN_NO_RTTI {

BIN
odin.rdi

Binary file not shown.

View File

@@ -6740,30 +6740,43 @@ gb_internal void check_parsed_files(Checker *c) {
}
array_sort(c->info.type_info_types, type_info_pair_cmp);
array_init(&c->info.type_info_types_hash_map, heap_allocator(), c->info.type_info_types.count*2 + 1);
map_reserve(&c->info.minimum_dependency_type_info_index_map, c->info.type_info_types.count);
for_array(i, c->info.type_info_types) {
auto const &tt = c->info.type_info_types[i];
bool exists = map_set_if_not_previously_exists(&c->info.minimum_dependency_type_info_index_map, tt.hash, i);
if (!exists) {
continue;
isize hash_map_len = c->info.type_info_types_hash_map.count;
for (auto const &tt : c->info.type_info_types) {
isize index = tt.hash % hash_map_len;
// NOTE(bill): no need for a sanity check since there
// will always be enough space for the entries
for (;;) {
if (index == 0 || c->info.type_info_types_hash_map[index].hash != 0) {
index = (index+1) % hash_map_len;
continue;
}
break;
}
for (auto const &entry : c->info.minimum_dependency_type_info_index_map) {
if (entry.key != tt.hash) {
continue;
c->info.type_info_types_hash_map[index] = tt;
bool exists = map_set_if_not_previously_exists(&c->info.minimum_dependency_type_info_index_map, tt.hash, index);
if (exists) {
for (auto const &entry : c->info.minimum_dependency_type_info_index_map) {
if (entry.key != tt.hash) {
continue;
}
auto const &other = c->info.type_info_types[entry.value];
if (are_types_identical_unique_tuples(tt.type, other.type)) {
continue;
}
gbString t = temp_canonical_string(tt.type);
gbString o = temp_canonical_string(other.type);
GB_PANIC("%s (%s) %llu vs %s (%s) %llu",
type_to_string(tt.type, false), t, cast(unsigned long long)tt.hash,
type_to_string(other.type, false), o, cast(unsigned long long)other.hash);
}
auto const &other = c->info.type_info_types[entry.value];
if (are_types_identical_unique_tuples(tt.type, other.type)) {
continue;
}
gbString t = temp_canonical_string(tt.type);
gbString o = temp_canonical_string(other.type);
GB_PANIC("%s (%s) %llu vs %s (%s) %llu",
type_to_string(tt.type, false), t, cast(unsigned long long)tt.hash,
type_to_string(other.type, false), o, cast(unsigned long long)other.hash);
}
}
GB_ASSERT(c->info.minimum_dependency_type_info_index_map.count <= c->info.type_info_types.count);
}

View File

@@ -432,6 +432,7 @@ struct CheckerInfo {
PtrMap</*type info hash*/u64, /*min dep index*/isize> minimum_dependency_type_info_index_map;
TypeSet min_dep_type_info_set;
Array<TypeInfoPair> type_info_types; // sorted after filled
Array<TypeInfoPair> type_info_types_hash_map; // 2 * type_info_types.count
Array<Entity *> testing_procedures;

View File

@@ -3154,9 +3154,10 @@ gb_internal bool lb_generate_code(lbGenerator *gen) {
lbModule *m = default_module;
{ // Add type info data
GB_ASSERT_MSG(info->minimum_dependency_type_info_index_map.count == info->type_info_types.count, "%tu vs %tu", info->minimum_dependency_type_info_index_map.count, info->type_info_types.count);
// GB_ASSERT_MSG(info->minimum_dependency_type_info_index_map.count == info->type_info_types.count, "%tu vs %tu", info->minimum_dependency_type_info_index_map.count, info->type_info_types.count);
isize max_type_info_count = info->minimum_dependency_type_info_index_map.count+1;
// isize max_type_info_count = info->minimum_dependency_type_info_index_map.count+1;
isize max_type_info_count = info->type_info_types_hash_map.count;
Type *t = alloc_type_array(t_type_info_ptr, max_type_info_count);
// IMPORTANT NOTE(bill): As LLVM does not have a union type, an array of unions cannot be initialized

View File

@@ -2,13 +2,13 @@
gb_internal isize lb_type_info_index(CheckerInfo *info, TypeInfoPair pair, bool err_on_not_found=true) {
isize index = type_info_index(info, pair, err_on_not_found);
if (index >= 0) {
return index+1;
return index;
}
if (err_on_not_found) {
gb_printf_err("NOT FOUND lb_type_info_index:\n\t%s\n\t@ index %td\n\tmax count: %u\nFound:\n", type_to_string(pair.type), index, info->minimum_dependency_type_info_index_map.count);
for (auto const &entry : info->minimum_dependency_type_info_index_map) {
isize type_info_index = entry.key;
gb_printf_err("\t%s\n", type_to_string(info->type_info_types[type_info_index].type));
gb_printf_err("\t%s\n", type_to_string(info->type_info_types_hash_map[type_info_index].type));
}
GB_PANIC("NOT FOUND");
}
@@ -73,37 +73,8 @@ gb_internal lbValue lb_typeid(lbModule *m, Type *type) {
type = default_type(type);
u64 id = cast(u64)lb_type_info_index(m->info, type);
GB_ASSERT(id >= 0);
u64 kind = lb_typeid_kind(m, type, id);
u64 named = is_type_named(type) && type->kind != Type_Basic;
u64 special = 0;
u64 reserved = 0;
if (is_type_cstring(type)) {
special = 1;
} else if (is_type_integer(type) && !is_type_unsigned(type)) {
special = 1;
}
u64 data = 0;
if (build_context.ptr_size == 4) {
GB_ASSERT(id <= (1u<<24u));
data |= (id &~ (1u<<24)) << 0u; // index
data |= (kind &~ (1u<<5)) << 24u; // kind
data |= (named &~ (1u<<1)) << 29u; // named
data |= (special &~ (1u<<1)) << 30u; // special
data |= (reserved &~ (1u<<1)) << 31u; // reserved
} else {
GB_ASSERT(build_context.ptr_size == 8);
GB_ASSERT(id <= (1ull<<56u));
data |= (id &~ (1ull<<56)) << 0ul; // index
data |= (kind &~ (1ull<<5)) << 56ull; // kind
data |= (named &~ (1ull<<1)) << 61ull; // named
data |= (special &~ (1ull<<1)) << 62ull; // special
data |= (reserved &~ (1ull<<1)) << 63ull; // reserved
}
u64 data = type_hash_canonical_type(type);
GB_ASSERT(data != 0);
lbValue res = {};
res.value = LLVMConstInt(lb_type(m, t_typeid), data, false);
@@ -279,8 +250,8 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ
LLVMTypeRef *modified_types = lb_setup_modified_types_for_type_info(m, global_type_info_data_entity_count);
defer (gb_free(heap_allocator(), modified_types));
for_array(type_info_type_index, info->type_info_types) {
auto const &tt = info->type_info_types[type_info_type_index];
for_array(type_info_type_index, info->type_info_types_hash_map) {
auto const &tt = info->type_info_types_hash_map[type_info_type_index];
Type *t = tt.type;
if (t == nullptr || t == t_invalid) {
continue;
@@ -343,8 +314,8 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ
return giant_const_values[index];
};
for_array(type_info_type_index, info->type_info_types) {
Type *t = info->type_info_types[type_info_type_index].type;
for_array(type_info_type_index, info->type_info_types_hash_map) {
Type *t = info->type_info_types_hash_map[type_info_type_index].type;
if (t == nullptr || t == t_invalid) {
continue;
}
@@ -1072,7 +1043,12 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ
LLVMSetInitializer(giant_const_values[entry_index], LLVMConstNamedStruct(stype, small_const_values, variant_index+1));
}
for (isize i = 0; i < global_type_info_data_entity_count; i++) {
giant_const_values[i] = LLVMConstPointerCast(giant_const_values[i], lb_type(m, t_type_info_ptr));
auto *ptr = &giant_const_values[i];
if (*ptr != nullptr) {
*ptr = LLVMConstPointerCast(*ptr, lb_type(m, t_type_info_ptr));
} else {
*ptr = LLVMConstNull(lb_type(m, t_type_info_ptr));
}
}

View File

@@ -503,9 +503,9 @@ gb_global Type basic_types[] = {
{Type_Basic, {Basic_rawptr, BasicFlag_Pointer, -1, STR_LIT("rawptr")}},
{Type_Basic, {Basic_string, BasicFlag_String, -1, STR_LIT("string")}},
{Type_Basic, {Basic_cstring, BasicFlag_String, -1, STR_LIT("cstring")}},
{Type_Basic, {Basic_any, 0, -1, STR_LIT("any")}},
{Type_Basic, {Basic_any, 0, 16, STR_LIT("any")}},
{Type_Basic, {Basic_typeid, 0, -1, STR_LIT("typeid")}},
{Type_Basic, {Basic_typeid, 0, 8, STR_LIT("typeid")}},
// Endian
{Type_Basic, {Basic_i16le, BasicFlag_Integer | BasicFlag_EndianLittle, 2, STR_LIT("i16le")}},
@@ -3700,7 +3700,7 @@ gb_internal i64 type_size_of(Type *t) {
switch (t->Basic.kind) {
case Basic_string: size = 2*build_context.int_size; break;
case Basic_cstring: size = build_context.ptr_size; break;
case Basic_any: size = 2*build_context.ptr_size; break;
case Basic_any: size = 16; break;
case Basic_typeid: size = build_context.ptr_size; break;
case Basic_int: case Basic_uint:
@@ -3763,7 +3763,7 @@ gb_internal i64 type_align_of_internal(Type *t, TypePath *path) {
switch (t->Basic.kind) {
case Basic_string: return build_context.int_size;
case Basic_cstring: return build_context.ptr_size;
case Basic_any: return build_context.ptr_size;
case Basic_any: return 8;
case Basic_typeid: return build_context.ptr_size;
case Basic_int: case Basic_uint:
@@ -4014,7 +4014,7 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
switch (kind) {
case Basic_string: return 2*build_context.int_size;
case Basic_cstring: return build_context.ptr_size;
case Basic_any: return 2*build_context.ptr_size;
case Basic_any: return 16;
case Basic_typeid: return build_context.ptr_size;
case Basic_int: case Basic_uint:
@@ -4251,7 +4251,7 @@ gb_internal i64 type_offset_of(Type *t, i64 index, Type **field_type_) {
return 0; // data
case 1:
if (field_type_) *field_type_ = t_typeid;
return build_context.ptr_size; // id
return 8; // id
}
}
break;
@@ -4322,8 +4322,8 @@ gb_internal i64 type_offset_of_from_selection(Type *type, Selection sel) {
}
} else if (t->Basic.kind == Basic_any) {
switch (index) {
case 0: t = t_type_info_ptr; break;
case 1: t = t_rawptr; break;
case 0: t = t_rawptr; break;
case 1: t = t_typeid; break;
}
}
break;