From af3184adc96cef59fff986ea6400caa6dbdb56ae Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 5 Aug 2025 15:12:54 +0100 Subject: [PATCH] Change `is_utf16` field to `encoding` and use an enum --- base/runtime/core.odin | 7 ++++++- base/runtime/print.odin | 5 +++-- core/encoding/cbor/tags.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 +- core/encoding/json/unmarshal.odin | 2 +- core/flags/internal_rtti.odin | 2 +- core/reflect/types.odin | 5 +++-- src/checker.cpp | 3 +++ src/llvm_backend_type.cpp | 16 ++++++++++++---- src/types.cpp | 2 ++ 10 files changed, 33 insertions(+), 13 deletions(-) diff --git a/base/runtime/core.odin b/base/runtime/core.odin index fe40427ff..478a3d307 100644 --- a/base/runtime/core.odin +++ b/base/runtime/core.odin @@ -61,6 +61,11 @@ Type_Info_Struct_Soa_Kind :: enum u8 { Dynamic = 3, } +Type_Info_String_Encoding_Kind :: enum u8 { + UTF_8 = 0, + UTF_16 = 1, +} + // Variant Types Type_Info_Named :: struct { name: string, @@ -73,7 +78,7 @@ Type_Info_Rune :: struct {} Type_Info_Float :: struct {endianness: Platform_Endianness} Type_Info_Complex :: struct {} Type_Info_Quaternion :: struct {} -Type_Info_String :: struct {is_cstring: bool, is_utf16: bool} +Type_Info_String :: struct {is_cstring: bool, encoding: Type_Info_String_Encoding_Kind} Type_Info_Boolean :: struct {} Type_Info_Any :: struct {} Type_Info_Type_Id :: struct {} diff --git a/base/runtime/print.odin b/base/runtime/print.odin index 85ed49445..2cfb6661b 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -297,8 +297,9 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { print_byte('c') } print_string("string") - if info.is_utf16 { - print_string("16") + switch info.encoding { + case .UTF_8: /**/ + case .UTF_16: print_string("16") } case Type_Info_Boolean: switch ti.id { diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index e0e69cbf5..ae1664dfc 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -298,7 +298,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, #partial switch t in ti.variant { case reflect.Type_Info_String: - assert(!t.is_utf16) + assert(t.encoding == .UTF_8) if t.is_cstring { length := base64.decoded_len(bytes) builder := strings.builder_make(0, length+1) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 2840429f5..043b2ec60 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -335,7 +335,7 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: - assert(!t.is_utf16) + assert(t.encoding == .UTF_8) bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index 51e7e3b81..0b65adaac 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -571,7 +571,7 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm #partial switch tk in t.key.variant { case runtime.Type_Info_String: - assert(!tk.is_utf16) + assert(tk.encoding == .UTF_8) key_ptr = rawptr(&key) key_cstr: cstring diff --git a/core/flags/internal_rtti.odin b/core/flags/internal_rtti.odin index 58224cc87..a1b050597 100644 --- a/core/flags/internal_rtti.odin +++ b/core/flags/internal_rtti.odin @@ -127,7 +127,7 @@ parse_and_set_pointer_by_base_type :: proc(ptr: rawptr, str: string, type_info: } case runtime.Type_Info_String: - assert(!specific_type_info.is_utf16) + assert(specific_type_info.encoding == .UTF_8) if specific_type_info.is_cstring { cstr_ptr := (^cstring)(ptr) diff --git a/core/reflect/types.odin b/core/reflect/types.odin index 2351408cc..98b7b368f 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -514,8 +514,9 @@ write_type_writer :: #force_no_inline proc(w: io.Writer, ti: ^Type_Info, n_writt io.write_byte(w, 'c', &n) or_return } io.write_string(w, "string", &n) or_return - if info.is_utf16 { - io.write_string(w, "16", &n) or_return + switch info.encoding { + case .UTF_8: /**/ + case .UTF_16: io.write_string(w, "16", &n) or_return } case Type_Info_Boolean: switch ti.id { diff --git a/src/checker.cpp b/src/checker.cpp index e9fa792f3..e72061f56 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -3101,6 +3101,9 @@ gb_internal void init_core_type_info(Checker *c) { GB_ASSERT(tis->fields.count == 5); + Entity *type_info_string_encoding_kind = find_core_entity(c, str_lit("Type_Info_String_Encoding_Kind")); + t_type_info_string_encoding_kind = type_info_string_encoding_kind->type; + Entity *type_info_variant = tis->fields[4]; Type *tiv_type = type_info_variant->type; GB_ASSERT(is_type_union(tiv_type)); diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index a91d77fe5..d1e7c0559 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -525,7 +525,15 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ break; case Basic_string: - tag_type = t_type_info_string; + { + tag_type = t_type_info_string; + LLVMValueRef vals[2] = { + lb_const_bool(m, t_bool, false).value, + lb_const_int(m, t_type_info_string_encoding_kind, 0).value, + }; + + variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); + } break; case Basic_cstring: @@ -533,7 +541,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ tag_type = t_type_info_string; LLVMValueRef vals[2] = { lb_const_bool(m, t_bool, true).value, - lb_const_bool(m, t_bool, false).value, + lb_const_int(m, t_type_info_string_encoding_kind, 0).value, }; variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); @@ -545,7 +553,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ tag_type = t_type_info_string; LLVMValueRef vals[2] = { lb_const_bool(m, t_bool, false).value, - lb_const_bool(m, t_bool, true).value, + lb_const_int(m, t_type_info_string_encoding_kind, 1).value, }; variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); @@ -558,7 +566,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ tag_type = t_type_info_string; LLVMValueRef vals[2] = { lb_const_bool(m, t_bool, true).value, - lb_const_bool(m, t_bool, true).value, + lb_const_int(m, t_type_info_string_encoding_kind, 1).value, }; variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); diff --git a/src/types.cpp b/src/types.cpp index 51d170f2b..c465714db 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -661,6 +661,8 @@ gb_global Type *t_type_info_enum_value = nullptr; gb_global Type *t_type_info_ptr = nullptr; gb_global Type *t_type_info_enum_value_ptr = nullptr; +gb_global Type *t_type_info_string_encoding_kind = nullptr; + gb_global Type *t_type_info_named = nullptr; gb_global Type *t_type_info_integer = nullptr; gb_global Type *t_type_info_rune = nullptr;