Implement a new StringInterner

This commit is contained in:
gingerBill
2026-03-17 09:18:39 +00:00
parent 8f2fd4f886
commit a8c6ea7c8a
6 changed files with 338 additions and 61 deletions

View File

@@ -366,6 +366,7 @@ gb_global bool global_module_path_set = false;
#include "string_set.cpp"
#include "priority_queue.cpp"
#include "thread_pool.cpp"
#include "string_interner.cpp"
gb_internal String obfuscate_string(String const &s, char const *prefix) {
@@ -388,48 +389,6 @@ gb_internal i32 obfuscate_i32(i32 i) {
}
struct StringIntern {
StringIntern *next;
isize len;
char str[1];
};
PtrMap<uintptr, StringIntern *> string_intern_map = {}; // Key: u64
gb_global Arena string_intern_arena = {};
gb_internal char const *string_intern(char const *text, isize len) {
u64 hash = gb_fnv64a(text, len);
uintptr key = cast(uintptr)(hash ? hash : 1);
StringIntern **found = map_get(&string_intern_map, key);
if (found) {
for (StringIntern *it = *found; it != nullptr; it = it->next) {
if (it->len == len && gb_strncmp(it->str, (char *)text, len) == 0) {
return it->str;
}
}
}
StringIntern *new_intern = cast(StringIntern *)arena_alloc(&string_intern_arena, gb_offset_of(StringIntern, str) + len + 1, gb_align_of(StringIntern));
new_intern->len = len;
new_intern->next = found ? *found : nullptr;
gb_memmove(new_intern->str, text, len);
new_intern->str[len] = 0;
map_set(&string_intern_map, key, new_intern);
return new_intern->str;
}
gb_internal char const *string_intern(String const &string) {
return string_intern(cast(char const *)string.text, string.len);
}
gb_internal void init_string_interner(void) {
map_init(&string_intern_map);
}
gb_internal i32 next_pow2(i32 n) {
if (n <= 0) {
return 0;

View File

@@ -40,6 +40,7 @@ struct MemoryBlock {
u8 * base;
isize size;
isize used;
isize committed;
};
struct Arena {
@@ -48,13 +49,14 @@ struct Arena {
// BlockingMutex mutex;
isize temp_count;
Thread * parent_thread;
bool custom_arena;
};
enum { DEFAULT_MINIMUM_BLOCK_SIZE = 8ll*1024ll*1024ll };
gb_global isize DEFAULT_PAGE_SIZE = 4096;
gb_internal MemoryBlock *virtual_memory_alloc(isize size);
gb_internal MemoryBlock *virtual_memory_alloc(isize size, bool commit);
gb_internal void virtual_memory_dealloc(MemoryBlock *block);
gb_internal void *arena_alloc(Arena *arena, isize min_size, isize alignment);
gb_internal void arena_free_all(Arena *arena);
@@ -82,7 +84,7 @@ gb_internal void thread_init_arenas(Thread *t) {
gb_internal void *arena_alloc(Arena *arena, isize min_size, isize alignment) {
GB_ASSERT(gb_is_power_of_two(alignment));
GB_ASSERT(arena->parent_thread == get_current_thread());
GB_ASSERT(arena->custom_arena || arena->parent_thread == get_current_thread());
isize size = 0;
if (arena->curr_block != nullptr) {
@@ -95,7 +97,7 @@ gb_internal void *arena_alloc(Arena *arena, isize min_size, isize alignment) {
isize block_size = gb_max(size, arena->minimum_block_size);
MemoryBlock *new_block = virtual_memory_alloc(block_size);
MemoryBlock *new_block = virtual_memory_alloc(block_size, true);
new_block->prev = arena->curr_block;
arena->curr_block = new_block;
}
@@ -113,6 +115,62 @@ gb_internal void *arena_alloc(Arena *arena, isize min_size, isize alignment) {
return ptr;
}
gb_internal void *platform_virtual_memory_alloc_internal(isize total_size, bool commit);
gb_internal bool platform_virtual_memory_commit_internal(void *data, isize commit_amount);
struct StaticArena {
u8 * data;
isize used;
isize committed;
isize reserved;
isize commit_block_size;
};
enum {STATIC_ARENA_DEFAULT_COMMIT_BLOCK_SIZE = 8<<20};
gb_internal void static_arena_init(StaticArena *arena, isize reserve_size, isize commit_block_size) {
GB_ASSERT(gb_is_power_of_two(reserve_size));
GB_ASSERT(gb_is_power_of_two(commit_block_size));
GB_ASSERT(commit_block_size <= reserve_size);
arena->data = cast(u8 *)platform_virtual_memory_alloc_internal(reserve_size, false);
arena->reserved = reserve_size;
arena->commit_block_size = commit_block_size;
}
gb_internal void static_arena_commit_memory(StaticArena *arena, isize amount) {
isize blocks = (amount + arena->commit_block_size-1)/arena->commit_block_size;
isize total_amount = blocks * arena->commit_block_size;
if (total_amount > arena->reserved - arena->committed) {
total_amount = arena->reserved - arena->committed;
}
platform_virtual_memory_commit_internal(arena->data + arena->committed, total_amount);
arena->committed += total_amount;
}
gb_internal void *static_arena_alloc(StaticArena *arena, isize size, isize alignment) {
GB_ASSERT(gb_is_power_of_two(alignment));
size = align_formula_isize(size, alignment);
u8 *curr = arena->data + arena->used;
curr = cast(u8 *)align_formula_ptr(curr, alignment);
u8 *end = curr + size;
if (end-arena->data > arena->committed) {
isize needed = (end - arena->data) - arena->committed;
static_arena_commit_memory(arena, needed);
}
GB_ASSERT_MSG(end-arena->data <= arena->committed, "out of memory for the static arena");
arena->used = end - arena->data;
return curr;
}
template <typename T>
gb_internal T *arena_alloc_item(Arena *arena) {
@@ -138,10 +196,13 @@ struct PlatformMemoryBlock {
gb_global std::atomic<isize> global_platform_memory_total_usage;
gb_global PlatformMemoryBlock global_platform_memory_block_sentinel;
gb_internal PlatformMemoryBlock *platform_virtual_memory_alloc(isize total_size);
gb_internal PlatformMemoryBlock *platform_virtual_memory_alloc(isize total_size, bool commit);
gb_internal void platform_virtual_memory_free(PlatformMemoryBlock *block);
gb_internal void platform_virtual_memory_protect(void *memory, isize size);
gb_internal void *platform_virtual_memory_alloc_internal(isize total_size, bool commit);
gb_internal bool platform_virtual_memory_commit_internal(void *data, isize commit_amount);
#if defined(GB_SYSTEM_WINDOWS)
gb_internal void platform_virtual_memory_init(void) {
global_platform_memory_block_sentinel.prev = &global_platform_memory_block_sentinel;
@@ -153,14 +214,20 @@ gb_internal void platform_virtual_memory_protect(void *memory, isize size);
GB_ASSERT(gb_is_power_of_two(DEFAULT_PAGE_SIZE));
}
gb_internal PlatformMemoryBlock *platform_virtual_memory_alloc(isize total_size) {
PlatformMemoryBlock *pmblock = (PlatformMemoryBlock *)VirtualAlloc(0, total_size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
if (pmblock == nullptr) {
gb_internal void *platform_virtual_memory_alloc_internal(isize total_size, bool commit) {
DWORD flags = commit ? MEM_RESERVE|MEM_COMMIT : MEM_RESERVE;
void *mem = VirtualAlloc(0, total_size, flags, PAGE_READWRITE);
if (mem == nullptr) {
gb_printf_err("Out of Virtual memory, oh no...\n");
gb_printf_err("Requested: %lld bytes\n", cast(long long)total_size);
gb_printf_err("Total Usage: %lld bytes\n", cast(long long)global_platform_memory_total_usage);
GB_ASSERT_MSG(pmblock != nullptr, "Out of Virtual Memory, oh no...");
GB_ASSERT_MSG(mem != nullptr, "Out of Virtual Memory, oh no...");
}
return mem;
}
gb_internal PlatformMemoryBlock *platform_virtual_memory_alloc(isize total_size, bool commit) {
PlatformMemoryBlock *pmblock = cast(PlatformMemoryBlock *)platform_virtual_memory_alloc_internal(total_size, commit);
global_platform_memory_total_usage.fetch_add(total_size);
return pmblock;
}
@@ -173,6 +240,16 @@ gb_internal void platform_virtual_memory_protect(void *memory, isize size);
BOOL is_protected = VirtualProtect(memory, size, PAGE_NOACCESS, &old_protect);
GB_ASSERT(is_protected);
}
gb_internal bool platform_virtual_memory_commit_internal(void *data, isize commit_amount) {
void *res = VirtualAlloc(data, commit_amount, MEM_COMMIT, PAGE_READWRITE);
if (res == nullptr) {
GB_PANIC("Out of Virtual Memory, oh no...\n");
GB_ASSERT_MSG(res != nullptr, "Out of Virtual Memory, oh no...");
return false;
}
return true;
}
#else
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
#define MAP_ANONYMOUS MAP_ANON
@@ -185,9 +262,20 @@ gb_internal void platform_virtual_memory_protect(void *memory, isize size);
DEFAULT_PAGE_SIZE = gb_max(DEFAULT_PAGE_SIZE, cast(isize)sysconf(_SC_PAGE_SIZE));
GB_ASSERT(gb_is_power_of_two(DEFAULT_PAGE_SIZE));
}
gb_internal void *platform_virtual_memory_alloc_internal(isize total_size, bool commit) {
void *mem = mmap(nullptr, total_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (mem == nullptr) {
gb_printf_err("Out of Virtual memory, oh no...\n");
gb_printf_err("Requested: %lld bytes\n", cast(long long)total_size);
gb_printf_err("Total Usage: %lld bytes\n", cast(long long)global_platform_memory_total_usage);
GB_ASSERT_MSG(mem != nullptr, "Out of Virtual Memory, oh no...");
}
return mem;
}
gb_internal PlatformMemoryBlock *platform_virtual_memory_alloc(isize total_size) {
PlatformMemoryBlock *pmblock = (PlatformMemoryBlock *)mmap(nullptr, total_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
gb_internal PlatformMemoryBlock *platform_virtual_memory_alloc(isize total_size, bool commit) {
PlatformMemoryBlock *pmblock = cast(PlatformMemoryBlock *)platform_virtual_memory_alloc_internal(total_size, commit);
if (pmblock == nullptr) {
gb_printf_err("Out of Virtual memory, oh no...\n");
gb_printf_err("Requested: %lld bytes\n", cast(long long)total_size);
@@ -197,6 +285,9 @@ gb_internal void platform_virtual_memory_protect(void *memory, isize size);
global_platform_memory_total_usage.fetch_add(total_size);
return pmblock;
}
gb_internal PlatformMemoryBlock *platform_virtual_memory_alloc_uncommited(isize total_size) {
return platform_virtual_memory_alloc(total_size);
}
gb_internal void platform_virtual_memory_free(PlatformMemoryBlock *block) {
isize size = block->total_size;
global_platform_memory_total_usage.fetch_sub(size);
@@ -206,9 +297,19 @@ gb_internal void platform_virtual_memory_protect(void *memory, isize size);
int err = mprotect(memory, size, PROT_NONE);
GB_ASSERT(err == 0);
}
gb_internal bool platform_virtual_memory_commit_internal(void *data, isize commit_amount) {
int err = mprotect(data, commit_amount, PROT_READ | PROT_WRITE)
if (err != 0) {
GB_PANIC("Out of Virtual Memory, oh no...\n");
GB_ASSERT_MSG(err == 0, "Out of Virtual Memory, oh no...");
return false;
}
return true;
}
#endif
gb_internal MemoryBlock *virtual_memory_alloc(isize size) {
gb_internal MemoryBlock *virtual_memory_alloc(isize size, bool commit) {
isize const page_size = DEFAULT_PAGE_SIZE;
isize total_size = size + gb_size_of(PlatformMemoryBlock);
@@ -224,7 +325,7 @@ gb_internal MemoryBlock *virtual_memory_alloc(isize size) {
do_protection = true;
}
PlatformMemoryBlock *pmblock = platform_virtual_memory_alloc(total_size);
PlatformMemoryBlock *pmblock = platform_virtual_memory_alloc(total_size, commit);
GB_ASSERT_MSG(pmblock != nullptr, "Out of Virtual Memory, oh no...");
pmblock->block.base = cast(u8 *)pmblock + base_offset;

View File

@@ -1086,7 +1086,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
break;
}
char const *key = string_intern(name);
char const *key = string_intern_cstring(name);
if (map_get(&build_context.defined_values, key) != nullptr) {
gb_printf_err("Defined constant '%.*s' already exists\n", LIT(name));
@@ -3588,7 +3588,7 @@ int main(int arg_count, char const **arg_ptr) {
MAIN_TIME_SECTION("initialization");
init_string_interner();
g_string_interner = string_interner_create();
init_global_error_collector();
init_keyword_hash_table();
init_terminal();

View File

@@ -773,8 +773,9 @@ gb_internal Ast *ast_matrix_index_expr(AstFile *f, Ast *expr, Token open, Token
gb_internal Ast *ast_ident(AstFile *f, Token token) {
Ast *result = alloc_ast_node(f, Ast_Ident);
result->Ident.token = token;
result->Ident.hash = string_hash(token.string);
result->Ident.token = token;
result->Ident.hash = string_hash(token.string);
result->Ident.interned = string_interner_insert(token.string);
return result;
}

View File

@@ -424,9 +424,10 @@ struct AstSplitArgs {
#define AST_KINDS \
AST_KIND(Ident, "identifier", struct { \
Token token; \
std::atomic<Entity *> entity; \
u32 hash; \
Token token; \
std::atomic<Entity *> entity; \
u32 hash; \
InternedString interned; \
}) \
AST_KIND(Implicit, "implicit", Token) \
AST_KIND(Uninit, "uninitialized value", Token) \

215
src/string_interner.cpp Normal file
View File

@@ -0,0 +1,215 @@
#define STRING_INTERNER_CELL_WIDTH 8
#define STRING_INTERNER_MUTEX_STRIPE_COUNT 1024
#define STRING_INTERNER_MUTEX_STRIPE_MASK (STRING_INTERNER_MUTEX_STRIPE_COUNT - 1)
#define STRING_INTERNER_THREAD_LOCAL_SIZE (1024 * 1024 * 2)
#define STRING_INTERN_CACHE_LINE (2*GB_CACHE_LINE_SIZE)
struct InternedString {
u32 value;
bool operator==(InternedString other) const {
return this->value == other.value;
}
String load() const;
char const *load_cstring() const;
};
struct alignas(STRING_INTERN_CACHE_LINE) StringInternCell {
std::atomic<u64> hashes [STRING_INTERNER_CELL_WIDTH];
InternedString offsets[STRING_INTERNER_CELL_WIDTH];
std::atomic<StringInternCell *> next;
};
struct alignas(STRING_INTERN_CACHE_LINE) PaddedMutex {
BlockingMutex m;
};
struct alignas(STRING_INTERN_CACHE_LINE) PaddedI64 {
std::atomic<i64> value;
};
struct StringInterner {
StringInternCell *cells;
u64 cell_mask;
PaddedMutex mutexes[STRING_INTERNER_MUTEX_STRIPE_COUNT];
StaticArena arena;
PaddedMutex arena_mutex;
bool track_count;
PaddedI64 count;
};
gb_internal StringInterner *string_interner_create();
gb_internal InternedString string_interner_insert(String str, u32 hash=0);
gb_internal String string_interner_load(InternedString interned);
gb_global StringInterner *g_string_interner;
struct StringInternerThreadLocalArena {
u8 *data;
u64 cursor;
};
gb_thread_local gb_global StringInternerThreadLocalArena g_interner_arena;
gb_internal void string_interner_thread_local_arena_init(StringInternerThreadLocalArena *tl_arena);
gb_internal void *string_interner_thread_local_arena_alloc(StringInternerThreadLocalArena *tl_arena, isize size, isize alignment);
gb_internal StringInterner *string_interner_create() {
StaticArena arena = {};
static_arena_init(&arena, 1<<30, STATIC_ARENA_DEFAULT_COMMIT_BLOCK_SIZE);
StringInterner *interner = cast(StringInterner *)static_arena_alloc(&arena, gb_size_of(StringInterner), STRING_INTERN_CACHE_LINE);
interner->arena = arena;
u64 cell_size = 1llu << 17llu;
u64 cell_mask = cell_size - 1;
interner->cell_mask = cell_mask;
interner->cells = cast(StringInternCell *)static_arena_alloc(&interner->arena, cell_size * gb_size_of(StringInternCell), STRING_INTERN_CACHE_LINE);
interner->track_count = false;
return interner;
}
gb_internal String string_interner_load(InternedString interned) {
StringInterner* interner = g_string_interner;
if (interned.value == 0) {
return {};
}
u8 *base = cast(u8 *)interner + interned.value;
u32 str_len = *cast(u32 *)base;
u8 *text = base + 4;
String str = { text, str_len };
return str;
}
gb_internal char const *string_interner_load_cstring(InternedString interned) {
StringInterner* interner = g_string_interner;
if (interned.value == 0) {
return "";
}
u8 *base = cast(u8 *)interner + interned.value;
// u32 str_len = *cast(u32 *)base;
u8 *text = base + 4;
return cast(char const *)text;
}
String InternedString::load() const {
return string_interner_load(*this);
}
char const *InternedString::load_cstring() const {
return string_interner_load_cstring(*this);
}
gb_internal InternedString string_interner_insert(String str, u32 hash) {
StringInterner* interner = g_string_interner;
if (str.len == 0) {
return {};
}
if (hash == 0) {
hash = string_hash(str);
}
u64 cell_idx = hash & interner->cell_mask;
StringInternCell *cell = &interner->cells[cell_idx];
while (true) {
StringInternCell *next = cell->next.load(std::memory_order_acquire);
for (i32 i = 0; i < STRING_INTERNER_CELL_WIDTH; i += 1) {
if (cell->hashes[i].load(std::memory_order_acquire) == hash) {
String to_compare = string_interner_load(cell->offsets[i]);
if (to_compare == str) {
return cell->offsets[i];
}
}
}
if (next == nullptr) {
break;
}
cell = next;
}
u64 mutex_cell = cell_idx & STRING_INTERNER_MUTEX_STRIPE_MASK;
PaddedMutex* m = &interner->mutexes[mutex_cell];
MUTEX_GUARD(&m->m);
StringInternCell *load_cell = nullptr;
while (cell) {
for (i32 i = 0; i < STRING_INTERNER_CELL_WIDTH; i += 1) {
if (cell->hashes[i].load(std::memory_order_relaxed) == hash) {
// string check
String to_compare = string_interner_load(cell->offsets[i]);
if (to_compare == str) {
return cell->offsets[i];
}
}
}
load_cell = cell;
cell = cell->next.load(std::memory_order_relaxed);
}
u64 data_to_allocate = 4 + str.len + 1;
u8 *data = cast(u8 *)string_interner_thread_local_arena_alloc(&g_interner_arena, data_to_allocate, 8);
u32 str_len = cast(u32)str.len;
gb_memcopy(data, &str_len, 4);
gb_memcopy(&data[4], str.text, str_len);
data[4+str_len] = 0;
InternedString offset = { cast(u32)(cast(u8 *)data - cast(u8 *)interner) };
for (i32 i = 0; i < STRING_INTERNER_CELL_WIDTH; i += 1) {
if (load_cell->hashes[i].load(std::memory_order_relaxed) == 0) {
load_cell->offsets[i] = offset;
load_cell->hashes[i].store(hash, std::memory_order_release);
if (interner->track_count) {
interner->count.value.fetch_add(1, std::memory_order_relaxed);
}
return offset;
}
}
StringInternCell *new_cell = cast(StringInternCell *)string_interner_thread_local_arena_alloc(&g_interner_arena, gb_size_of(StringInternCell), STRING_INTERN_CACHE_LINE);
new_cell->offsets[0] = offset;
new_cell->hashes[0].store(hash, std::memory_order_relaxed);
load_cell->next.store(new_cell, std::memory_order_release);
if (interner->track_count) {
interner->count.value.fetch_add(1, std::memory_order_relaxed);
}
return offset;
}
gb_internal char const *string_intern_cstring(String str) {
InternedString i = string_interner_insert(str, 0);
return string_interner_load_cstring(i);
}
gb_internal String string_intern_string(String str) {
InternedString i = string_interner_insert(str, 0);
return string_interner_load(i);
}
gb_internal void string_interner_thread_local_arena_init(StringInternerThreadLocalArena *tl_arena) {
*tl_arena = {
nullptr,
STRING_INTERNER_THREAD_LOCAL_SIZE,
};
}
gb_internal void *string_interner_thread_local_arena_alloc(StringInternerThreadLocalArena *tl_arena, isize size, isize alignment) {
if (tl_arena->data == nullptr) {
tl_arena->cursor = STRING_INTERNER_THREAD_LOCAL_SIZE;
}
isize new_head = align_formula_isize(tl_arena->cursor, alignment);
isize cursor = new_head + size;
if (cursor > STRING_INTERNER_THREAD_LOCAL_SIZE) {
mutex_lock(&g_string_interner->arena_mutex.m);
tl_arena->data = cast(u8 *)static_arena_alloc(&g_string_interner->arena, STRING_INTERNER_THREAD_LOCAL_SIZE, 4096);
tl_arena->cursor = 0;
mutex_unlock(&g_string_interner->arena_mutex.m);
return string_interner_thread_local_arena_alloc(tl_arena, size, alignment);
}
u8 *return_head = tl_arena->data + new_head;
tl_arena->cursor = cursor;
return return_head;
}