From 620bf162a048fdf29fdfcedc12abae79cffeedf4 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 2 Aug 2025 12:32:18 +0100 Subject: [PATCH] Cache const `string16` in LLVM --- src/check_builtin.cpp | 13 +++++-- src/common.cpp | 1 + src/llvm_backend.hpp | 3 +- src/llvm_backend_const.cpp | 2 +- src/llvm_backend_expr.cpp | 6 --- src/llvm_backend_general.cpp | 71 +++++++++++++++++++++++++++++++++++- 6 files changed, 83 insertions(+), 13 deletions(-) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 66ea0cfbd..da5eb8977 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2329,10 +2329,15 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As if (operand->mode == Addressing_Constant) { mode = Addressing_Constant; - GB_ASSERT_MSG(!is_type_string16(op_type), "TODO(bill): constant utf-16 string len"); - - String str = operand->value.value_string; - value = exact_value_i64(str.len); + if (operand->value.kind == ExactValue_String) { + String str = operand->value.value_string; + value = exact_value_i64(str.len); + } else if (operand->value.kind == ExactValue_String16) { + String16 str = operand->value.value_string16; + value = exact_value_i64(str.len); + } else { + GB_PANIC("Unhandled value kind: %d", operand->value.kind); + } type = t_untyped_integer; } else { mode = Addressing_Value; diff --git a/src/common.cpp b/src/common.cpp index b3761fc36..53848cacf 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -350,6 +350,7 @@ gb_global bool global_module_path_set = false; #include "ptr_map.cpp" #include "ptr_set.cpp" #include "string_map.cpp" +#include "string16_map.cpp" #include "string_set.cpp" #include "priority_queue.cpp" #include "thread_pool.cpp" diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index fef6e754d..648e8a732 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -173,7 +173,8 @@ struct lbModule { PtrMap procedure_values; Array missing_procedures_to_check; - StringMap const_strings; + StringMap const_strings; + String16Map const_string16s; PtrMap function_type_map; diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index 8c05ed4a2..cba0000cd 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -594,7 +594,7 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, lb return res; } else if (value.kind == ExactValue_String16) { GB_ASSERT(is_type_slice(type)); - GB_PANIC("TODO(bill): UTF-16 String"); + res.value = lb_find_or_add_entity_string16_slice_with_type(m, value.value_string16, original_type).value; return res; }else { ast_node(cl, CompoundLit, value.value_compound); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 3463b6083..8ad6a5a1c 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -2526,12 +2526,6 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { if (is_type_untyped(src)) { - if (is_type_string(src) && is_type_string16(dst)) { - GB_PANIC("TODO(bill): UTF-16 string"); - lbAddr result = lb_add_local_generated(p, t, false); - lb_addr_store(p, result, value); - return lb_addr_load(p, result); - } if (is_type_string(src) && is_type_string(dst)) { lbAddr result = lb_add_local_generated(p, t, false); lb_addr_store(p, result, value); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 9ef1c23c0..064d0ef39 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -85,6 +85,7 @@ gb_internal void lb_init_module(lbModule *m, Checker *c) { string_map_init(&m->members); string_map_init(&m->procedures); string_map_init(&m->const_strings); + string16_map_init(&m->const_string16s); map_init(&m->function_type_map); string_map_init(&m->gen_procs); if (USE_SEPARATE_MODULES) { @@ -2716,7 +2717,18 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string_ptr(lbModule *m, String co } gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String16 const &str, bool custom_link_section) { - // TODO(bill): caching for UTF-16 strings + String16HashKey key = {}; + LLVMValueRef *found = nullptr; + + if (!custom_link_section) { + key = string_hash_string(str); + found = string16_map_get(&m->const_string16s, key); + } + if (found != nullptr) { + return *found; + } + + LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)}; @@ -2749,6 +2761,9 @@ gb_internal LLVMValueRef lb_find_or_add_entity_string16_ptr(lbModule *m, String1 LLVMSetAlignment(global_data, 1); LLVMValueRef ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); + if (!custom_link_section) { + string16_map_set(&m->const_string16s, key, ptr); + } return ptr; } @@ -2812,6 +2827,60 @@ gb_internal lbValue lb_find_or_add_entity_string_byte_slice_with_type(lbModule * return res; } +gb_internal lbValue lb_find_or_add_entity_string16_slice_with_type(lbModule *m, String16 const &str, Type *slice_type) { + GB_ASSERT(is_type_slice(slice_type)); + LLVMValueRef indices[2] = {llvm_zero(m), llvm_zero(m)}; + LLVMValueRef data = nullptr; + { + LLVMTypeRef llvm_u16 = LLVMInt16TypeInContext(m->ctx); + + TEMPORARY_ALLOCATOR_GUARD(); + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, str.len+1); + + for (isize i = 0; i < str.len; i++) { + values[i] = LLVMConstInt(llvm_u16, str.text[i], false); + } + values[str.len] = LLVMConstInt(llvm_u16, 0, false); + + data = LLVMConstArray(llvm_u16, values, cast(unsigned)(str.len+1)); + } + + u32 id = m->global_array_index.fetch_add(1); + gbString name = gb_string_make(temporary_allocator(), "csba$"); + name = gb_string_appendc(name, m->module_name); + name = gb_string_append_fmt(name, "$%x", id); + + LLVMTypeRef type = LLVMTypeOf(data); + LLVMValueRef global_data = LLVMAddGlobal(m->mod, type, name); + LLVMSetInitializer(global_data, data); + lb_make_global_private_const(global_data); + LLVMSetAlignment(global_data, 1); + + i64 data_len = str.len; + LLVMValueRef ptr = nullptr; + if (data_len != 0) { + ptr = LLVMConstInBoundsGEP2(type, global_data, indices, 2); + } else { + ptr = LLVMConstNull(lb_type(m, t_u8_ptr)); + } + if (!is_type_u16_slice(slice_type)) { + Type *bt = base_type(slice_type); + Type *elem = bt->Slice.elem; + i64 sz = type_size_of(elem); + GB_ASSERT(sz > 0); + ptr = LLVMConstPointerCast(ptr, lb_type(m, alloc_type_pointer(elem))); + data_len /= sz; + } + + LLVMValueRef len = LLVMConstInt(lb_type(m, t_int), data_len, true); + LLVMValueRef values[2] = {ptr, len}; + + lbValue res = {}; + res.value = llvm_const_named_struct(m, slice_type, values, 2); + res.type = slice_type; + return res; +} gb_internal lbValue lb_find_ident(lbProcedure *p, lbModule *m, Entity *e, Ast *expr) {