diff --git a/base/runtime/core.odin b/base/runtime/core.odin index baecb4146..fe40427ff 100644 --- a/base/runtime/core.odin +++ b/base/runtime/core.odin @@ -73,7 +73,7 @@ Type_Info_Rune :: struct {} Type_Info_Float :: struct {endianness: Platform_Endianness} Type_Info_Complex :: struct {} Type_Info_Quaternion :: struct {} -Type_Info_String :: struct {is_cstring: bool} +Type_Info_String :: struct {is_cstring: bool, is_utf16: bool} Type_Info_Boolean :: struct {} Type_Info_Any :: struct {} Type_Info_Type_Id :: struct {} @@ -397,6 +397,11 @@ Raw_String :: struct { len: int, } +Raw_String16 :: struct { + data: [^]u16, + len: int, +} + Raw_Slice :: struct { data: rawptr, len: int, @@ -450,6 +455,12 @@ Raw_Cstring :: struct { } #assert(size_of(Raw_Cstring) == size_of(cstring)) +Raw_Cstring16 :: struct { + data: [^]u16, +} +#assert(size_of(Raw_Cstring16) == size_of(cstring16)) + + Raw_Soa_Pointer :: struct { data: rawptr, index: int, diff --git a/base/runtime/core_builtin.odin b/base/runtime/core_builtin.odin index e2ba14f3a..09118998c 100644 --- a/base/runtime/core_builtin.odin +++ b/base/runtime/core_builtin.odin @@ -86,11 +86,26 @@ copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int } return n } + +// `copy_from_string16` is a built-in procedure that copies elements from a source string `src` to a destination slice `dst`. +// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum +// of len(src) and len(dst). +// +// Prefer the procedure group `copy`. +@builtin +copy_from_string16 :: proc "contextless" (dst: $T/[]$E/u16, src: $S/string16) -> int { + n := min(len(dst), len(src)) + if n > 0 { + intrinsics.mem_copy(raw_data(dst), raw_data(src), n*size_of(u16)) + } + return n +} + // `copy` is a built-in procedure that copies elements from a source slice/string `src` to a destination slice `dst`. // The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum // of len(src) and len(dst). @builtin -copy :: proc{copy_slice, copy_from_string} +copy :: proc{copy_slice, copy_from_string, copy_from_string16} @@ -285,6 +300,15 @@ delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error } +@builtin +delete_string16 :: proc(str: string16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return mem_free_with_size(raw_data(str), len(str)*size_of(u16), allocator, loc) +} +@builtin +delete_cstring16 :: proc(str: cstring16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error { + return mem_free((^u16)(str), allocator, loc) +} + // `delete` will try to free the underlying data of the passed built-in data structure (string, cstring, dynamic array, slice, or map), with the given `allocator` if the allocator supports this operation. // // Note: Prefer `delete` over the specific `delete_*` procedures where possible. @@ -297,6 +321,8 @@ delete :: proc{ delete_map, delete_soa_slice, delete_soa_dynamic_array, + delete_string16, + delete_cstring16, } diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin index 907b187f1..660af58ab 100644 --- a/base/runtime/internal.odin +++ b/base/runtime/internal.odin @@ -493,12 +493,40 @@ string_cmp :: proc "contextless" (a, b: string) -> int { return ret } + +string16_eq :: proc "contextless" (lhs, rhs: string16) -> bool { + x := transmute(Raw_String16)lhs + y := transmute(Raw_String16)rhs + if x.len != y.len { + return false + } + return #force_inline memory_equal(x.data, y.data, x.len*size_of(u16)) +} + +string16_cmp :: proc "contextless" (a, b: string16) -> int { + x := transmute(Raw_String16)a + y := transmute(Raw_String16)b + + ret := memory_compare(x.data, y.data, min(x.len, y.len)*size_of(u16)) + if ret == 0 && x.len != y.len { + return -1 if x.len < y.len else +1 + } + return ret +} + string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) } string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 } string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 } string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 } string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 } +string16_ne :: #force_inline proc "contextless" (a, b: string16) -> bool { return !string16_eq(a, b) } +string16_lt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) < 0 } +string16_gt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) > 0 } +string16_le :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) <= 0 } +string16_ge :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) >= 0 } + + cstring_len :: proc "contextless" (s: cstring) -> int { p0 := uintptr((^byte)(s)) p := p0 @@ -508,6 +536,16 @@ cstring_len :: proc "contextless" (s: cstring) -> int { return int(p - p0) } +cstring16_len :: proc "contextless" (s: cstring16) -> int { + p := ([^]u16)(s) + n := 0 + for p != nil && p[0] != 0 { + p = p[1:] + n += 1 + } + return n +} + cstring_to_string :: proc "contextless" (s: cstring) -> string { if s == nil { return "" @@ -517,6 +555,15 @@ cstring_to_string :: proc "contextless" (s: cstring) -> string { return transmute(string)Raw_String{ptr, n} } +cstring16_to_string16 :: proc "contextless" (s: cstring16) -> string16 { + if s == nil { + return "" + } + ptr := (^u16)(s) + n := cstring16_len(s) + return transmute(string16)Raw_String16{ptr, n} +} + cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool { x := ([^]byte)(lhs) @@ -559,6 +606,46 @@ cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 } cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 } +cstring16_eq :: proc "contextless" (lhs, rhs: cstring16) -> bool { + x := ([^]u16)(lhs) + y := ([^]u16)(rhs) + if x == y { + return true + } + if (x == nil) ~ (y == nil) { + return false + } + xn := cstring16_len(lhs) + yn := cstring16_len(rhs) + if xn != yn { + return false + } + return #force_inline memory_equal(x, y, xn*size_of(u16)) +} + +cstring16_cmp :: proc "contextless" (lhs, rhs: cstring16) -> int { + x := ([^]u16)(lhs) + y := ([^]u16)(rhs) + if x == y { + return 0 + } + if (x == nil) ~ (y == nil) { + return -1 if x == nil else +1 + } + xn := cstring16_len(lhs) + yn := cstring16_len(rhs) + ret := memory_compare(x, y, min(xn, yn)*size_of(u16)) + if ret == 0 && xn != yn { + return -1 if xn < yn else +1 + } + return ret +} + +cstring16_ne :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return !cstring16_eq(a, b) } +cstring16_lt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) < 0 } +cstring16_gt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) > 0 } +cstring16_le :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) <= 0 } +cstring16_ge :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) >= 0 } complex32_eq :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) == real(b) && imag(a) == imag(b) } complex32_ne :: #force_inline proc "contextless" (a, b: complex32) -> bool { return real(a) != real(b) || imag(a) != imag(b) } diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 0f6470cca..7fe6287d4 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1551,6 +1551,79 @@ fmt_string :: proc(fi: ^Info, s: string, verb: rune) { fmt_cstring :: proc(fi: ^Info, s: cstring, verb: rune) { fmt_string(fi, string(s), verb) } + +// Formats a string UTF-16 with a specific format. +// +// Inputs: +// - fi: Pointer to the Info struct containing format settings. +// - s: The string to format. +// - verb: The format specifier character (e.g. 's', 'v', 'q', 'x', 'X'). +// +fmt_string16 :: proc(fi: ^Info, s: string16, verb: rune) { + s, verb := s, verb + if ol, ok := fi.optional_len.?; ok { + s = s[:clamp(ol, 0, len(s))] + } + if !fi.in_bad && fi.record_level > 0 && verb == 'v' { + verb = 'q' + } + + switch verb { + case 's', 'v': + if fi.width_set { + if fi.width > len(s) { + if fi.minus { + io.write_string16(fi.writer, s, &fi.n) + } + + for _ in 0.. 0 && space { + io.write_byte(fi.writer, ' ', &fi.n) + } + char_set := __DIGITS_UPPER + if verb == 'x' { + char_set = __DIGITS_LOWER + } + _fmt_int(fi, u64(s[i]), 16, false, bit_size=16, digits=char_set) + } + + case: + fmt_bad_verb(fi, verb) + } +} +// Formats a C-style UTF-16 string with a specific format. +// +// Inputs: +// - fi: Pointer to the Info struct containing format settings. +// - s: The C-style string to format. +// - verb: The format specifier character (Ref fmt_string). +// +fmt_cstring16 :: proc(fi: ^Info, s: cstring16, verb: rune) { + fmt_string16(fi, string16(s), verb) +} + // Formats a raw pointer with a specific format. // // Inputs: @@ -3210,6 +3283,9 @@ fmt_arg :: proc(fi: ^Info, arg: any, verb: rune) { case string: fmt_string(fi, a, verb) case cstring: fmt_cstring(fi, a, verb) + case string16: fmt_string16(fi, a, verb) + case cstring16: fmt_cstring16(fi, a, verb) + case typeid: reflect.write_typeid(fi.writer, a, &fi.n) case i16le: fmt_int(fi, u64(a), true, 16, verb) diff --git a/core/io/io.odin b/core/io/io.odin index c2b44cbdb..5431519bf 100644 --- a/core/io/io.odin +++ b/core/io/io.odin @@ -5,6 +5,7 @@ package io import "base:intrinsics" import "core:unicode/utf8" +import "core:unicode/utf16" // Seek whence values Seek_From :: enum { @@ -314,6 +315,29 @@ write_string :: proc(s: Writer, str: string, n_written: ^int = nil) -> (n: int, return write(s, transmute([]byte)str, n_written) } +// write_string16 writes the contents of the string16 s to w reencoded as utf-8 +write_string16 :: proc(s: Writer, str: string16, n_written: ^int = nil) -> (n: int, err: Error) { + for i := 0; i < len(str); i += 1 { + r := rune(utf16.REPLACEMENT_CHAR) + + switch c := str[i]; { + case c < utf16._surr1, utf16._surr3 <= c: + r = rune(c) + case utf16._surr1 <= c && c < utf16._surr2 && i+1 < len(str) && + utf16._surr2 <= str[i+1] && str[i+1] < utf16._surr3: + r = utf16.decode_surrogate_pair(rune(c), rune(str[i+1])) + i += 1 + } + + w, err := write_rune(s, r, n_written) + n += w + if err != nil { + return + } + } + return +} + // write_rune writes a UTF-8 encoded rune to w. write_rune :: proc(s: Writer, r: rune, n_written: ^int = nil) -> (size: int, err: Error) { defer if err == nil && n_written != nil { diff --git a/core/io/util.odin b/core/io/util.odin index fa98e007b..72983523a 100644 --- a/core/io/util.odin +++ b/core/io/util.odin @@ -264,6 +264,33 @@ write_quoted_string :: proc(w: Writer, str: string, quote: byte = '"', n_written return } +write_quoted_string16 :: proc(w: Writer, str: string16, quote: byte = '"', n_written: ^int = nil, for_json := false) -> (n: int, err: Error) { + defer if n_written != nil { + n_written^ += n + } + write_byte(w, quote, &n) or_return + for width, s := 0, str; len(s) > 0; s = s[width:] { + r := rune(s[0]) + width = 1 + if r >= utf8.RUNE_SELF { + r, width = utf16.decode_rune_in_string(s) + } + if width == 1 && r == utf8.RUNE_ERROR { + write_byte(w, '\\', &n) or_return + write_byte(w, 'x', &n) or_return + write_byte(w, DIGITS_LOWER[s[0]>>4], &n) or_return + write_byte(w, DIGITS_LOWER[s[0]&0xf], &n) or_return + continue + } + + n_wrapper(write_escaped_rune(w, r, quote, false, nil, for_json), &n) or_return + + } + write_byte(w, quote, &n) or_return + return +} + + // writer append a quoted rune into the byte buffer, return the written size write_quoted_rune :: proc(w: Writer, r: rune) -> (n: int) { _write_byte :: #force_inline proc(w: Writer, c: byte) -> int { diff --git a/core/unicode/utf16/utf16.odin b/core/unicode/utf16/utf16.odin index e2bcf7f68..9a8cfe438 100644 --- a/core/unicode/utf16/utf16.odin +++ b/core/unicode/utf16/utf16.odin @@ -106,6 +106,26 @@ decode :: proc(d: []rune, s: []u16) -> (n: int) { return } +decode_rune_in_string :: proc(s: string16) -> (r: rune, width: int) { + r = rune(REPLACEMENT_CHAR) + n := len(s) + if n < 1 { + return + } + width = 1 + + + switch c := s[0]; { + case c < _surr1, _surr3 <= c: + r = rune(c) + case _surr1 <= c && c < _surr2 && 1 < len(s) && + _surr2 <= s[1] && s[1] < _surr3: + r = decode_surrogate_pair(rune(c), rune(s[1])) + width += 1 + } + return +} + rune_count :: proc(s: []u16) -> (n: int) { for i := 0; i < len(s); i += 1 { c := s[i] diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 46a4f9ae5..40bbe41e5 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -1089,7 +1089,7 @@ gb_internal String internal_odin_root_dir(void) { text = gb_alloc_array(permanent_allocator(), wchar_t, len+1); GetModuleFileNameW(nullptr, text, cast(int)len); - path = string16_to_string(heap_allocator(), make_string16(text, len)); + path = string16_to_string(heap_allocator(), make_string16(cast(u16 *)text, len)); for (i = path.len-1; i >= 0; i--) { u8 c = path[i]; @@ -1387,14 +1387,14 @@ gb_internal String path_to_fullpath(gbAllocator a, String s, bool *ok_) { mutex_lock(&fullpath_mutex); - len = GetFullPathNameW(&string16[0], 0, nullptr, nullptr); + len = GetFullPathNameW(cast(wchar_t *)&string16[0], 0, nullptr, nullptr); if (len != 0) { wchar_t *text = gb_alloc_array(permanent_allocator(), wchar_t, len+1); - GetFullPathNameW(&string16[0], len, text, nullptr); + GetFullPathNameW(cast(wchar_t *)&string16[0], len, text, nullptr); mutex_unlock(&fullpath_mutex); text[len] = 0; - result = string16_to_string(a, make_string16(text, len)); + result = string16_to_string(a, make_string16(cast(u16 *)text, len)); result = string_trim_whitespace(result); // Replace Windows style separators diff --git a/src/cached.cpp b/src/cached.cpp index efdadce7b..61b5d01b4 100644 --- a/src/cached.cpp +++ b/src/cached.cpp @@ -231,7 +231,7 @@ Array cache_gather_envs() { wchar_t *curr_string = strings; while (curr_string && *curr_string) { - String16 wstr = make_string16_c(curr_string); + String16 wstr = make_string16_c(cast(u16 *)curr_string); curr_string += wstr.len+1; String str = string16_to_string(temporary_allocator(), wstr); if (string_starts_with(str, str_lit("CURR_DATE_TIME="))) { diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 974224ed2..d36cf4520 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2327,6 +2327,9 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As if (is_type_string(op_type) && id == BuiltinProc_len) { if (operand->mode == Addressing_Constant) { mode = Addressing_Constant; + + GB_ASSERT_MSG(!is_type_string16(op_type), "TODO(bill): constant utf-16 string len"); + String str = operand->value.value_string; value = exact_value_i64(str.len); type = t_untyped_integer; @@ -2334,6 +2337,8 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As mode = Addressing_Value; if (is_type_cstring(op_type)) { add_package_dependency(c, "runtime", "cstring_len"); + } else if (is_type_cstring16(op_type)) { + add_package_dependency(c, "runtime", "cstring16_len"); } } } else if (is_type_array(op_type)) { @@ -4683,7 +4688,9 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As break; case Type_Basic: if (t->Basic.kind == Basic_string) { - operand->type = alloc_type_multi_pointer(t_u8); + operand->type = t_u8_multi_ptr; + } else if (t->Basic.kind == Basic_string16) { + operand->type = t_u16_multi_ptr; } break; case Type_Pointer: diff --git a/src/check_decl.cpp b/src/check_decl.cpp index dd4c09e85..af46ee40e 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -815,6 +815,12 @@ gb_internal bool signature_parameter_similar_enough(Type *x, Type *y) { if (sig_compare(is_type_cstring, is_type_u8_multi_ptr, x, y)) { return true; } + if (sig_compare(is_type_cstring16, is_type_u16_ptr, x, y)) { + return true; + } + if (sig_compare(is_type_cstring16, is_type_u16_multi_ptr, x, y)) { + return true; + } if (sig_compare(is_type_uintptr, is_type_rawptr, x, y)) { return true; diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 6723a7580..57073e22f 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2862,6 +2862,14 @@ gb_internal void add_comparison_procedures_for_fields(CheckerContext *c, Type *t add_package_dependency(c, "runtime", "string_eq"); add_package_dependency(c, "runtime", "string_ne"); break; + case Basic_cstring16: + add_package_dependency(c, "runtime", "cstring16_eq"); + add_package_dependency(c, "runtime", "cstring16_ne"); + break; + case Basic_string16: + add_package_dependency(c, "runtime", "string16_eq"); + add_package_dependency(c, "runtime", "string16_ne"); + break; } break; case Type_Struct: @@ -3035,6 +3043,24 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper case Token_LtEq: add_package_dependency(c, "runtime", "cstring_le"); break; case Token_GtEq: add_package_dependency(c, "runtime", "cstring_gt"); break; } + } else if (is_type_cstring16(x->type) && is_type_cstring16(y->type)) { + switch (op) { + case Token_CmpEq: add_package_dependency(c, "runtime", "cstring16_eq"); break; + case Token_NotEq: add_package_dependency(c, "runtime", "cstring16_ne"); break; + case Token_Lt: add_package_dependency(c, "runtime", "cstring16_lt"); break; + case Token_Gt: add_package_dependency(c, "runtime", "cstring16_gt"); break; + case Token_LtEq: add_package_dependency(c, "runtime", "cstring16_le"); break; + case Token_GtEq: add_package_dependency(c, "runtime", "cstring16_gt"); break; + } + } else if (is_type_string16(x->type) || is_type_string16(y->type)) { + switch (op) { + case Token_CmpEq: add_package_dependency(c, "runtime", "string16_eq"); break; + case Token_NotEq: add_package_dependency(c, "runtime", "string16_ne"); break; + case Token_Lt: add_package_dependency(c, "runtime", "string16_lt"); break; + case Token_Gt: add_package_dependency(c, "runtime", "string16_gt"); break; + case Token_LtEq: add_package_dependency(c, "runtime", "string16_le"); break; + case Token_GtEq: add_package_dependency(c, "runtime", "string16_gt"); break; + } } else if (is_type_string(x->type) || is_type_string(y->type)) { switch (op) { case Token_CmpEq: add_package_dependency(c, "runtime", "string_eq"); break; @@ -3340,6 +3366,11 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type return true; } + // []u16 <-> string16 (not cstring16) + if (is_type_u16_slice(src) && (is_type_string16(dst) && !is_type_cstring16(dst))) { + return true; + } + // cstring -> string if (are_types_identical(src, t_cstring) && are_types_identical(dst, t_string)) { if (operand->mode != Addressing_Constant) { @@ -3347,6 +3378,14 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type } return true; } + // cstring16 -> string16 + if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) { + if (operand->mode != Addressing_Constant) { + add_package_dependency(c, "runtime", "cstring16_to_string16"); + } + return true; + } + // cstring -> ^u8 if (are_types_identical(src, t_cstring) && is_type_u8_ptr(dst)) { return !is_constant; @@ -3372,6 +3411,34 @@ gb_internal bool check_is_castable_to(CheckerContext *c, Operand *operand, Type if (is_type_rawptr(src) && are_types_identical(dst, t_cstring)) { return !is_constant; } + + // cstring -> ^u16 + if (are_types_identical(src, t_cstring16) && is_type_u16_ptr(dst)) { + return !is_constant; + } + // cstring -> [^]u16 + if (are_types_identical(src, t_cstring16) && is_type_u16_multi_ptr(dst)) { + return !is_constant; + } + // cstring -> rawptr + if (are_types_identical(src, t_cstring16) && is_type_rawptr(dst)) { + return !is_constant; + } + + + // ^u16 -> cstring16 + if (is_type_u16_ptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // [^]u16 -> cstring + if (is_type_u16_multi_ptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // rawptr -> cstring16 + if (is_type_rawptr(src) && are_types_identical(dst, t_cstring16)) { + return !is_constant; + } + // proc <-> proc if (is_type_proc(src) && is_type_proc(dst)) { if (is_type_polymorphic(dst)) { @@ -4558,6 +4625,8 @@ gb_internal void convert_to_typed(CheckerContext *c, Operand *operand, Type *tar // target_type = t_untyped_nil; } else if (is_type_cstring(target_type)) { // target_type = t_untyped_nil; + } else if (is_type_cstring16(target_type)) { + // target_type = t_untyped_nil; } else if (!type_has_nil(target_type)) { operand->mode = Addressing_Invalid; convert_untyped_error(c, operand, target_type); @@ -8226,6 +8295,7 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 case Type_Basic: if (t->Basic.kind == Basic_string) { if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String); *max_count = o->value.value_string.len; } if (o->mode != Addressing_Constant) { @@ -8233,6 +8303,16 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 } o->type = t_u8; return true; + } else if (t->Basic.kind == Basic_string16) { + if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String16); + *max_count = o->value.value_string16.len; + } + if (o->mode != Addressing_Constant) { + o->mode = Addressing_Value; + } + o->type = t_u16; + return true; } else if (t->Basic.kind == Basic_UntypedString) { if (o->mode == Addressing_Constant) { *max_count = o->value.value_string.len; @@ -10879,9 +10959,17 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node, if (t->Basic.kind == Basic_string || t->Basic.kind == Basic_UntypedString) { valid = true; if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String); max_count = o->value.value_string.len; } o->type = type_deref(o->type); + } else if (t->Basic.kind == Basic_string16) { + valid = true; + if (o->mode == Addressing_Constant) { + GB_ASSERT(o->value.kind == ExactValue_String16); + max_count = o->value.value_string16.len; + } + o->type = type_deref(o->type); } break; diff --git a/src/checker.cpp b/src/checker.cpp index a1d8f98d7..20da5b19b 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1363,13 +1363,15 @@ gb_internal void init_universal(void) { } - t_u8_ptr = alloc_type_pointer(t_u8); - t_u8_multi_ptr = alloc_type_multi_pointer(t_u8); - t_int_ptr = alloc_type_pointer(t_int); - t_i64_ptr = alloc_type_pointer(t_i64); - t_f64_ptr = alloc_type_pointer(t_f64); - t_u8_slice = alloc_type_slice(t_u8); - t_string_slice = alloc_type_slice(t_string); + t_u8_ptr = alloc_type_pointer(t_u8); + t_u8_multi_ptr = alloc_type_multi_pointer(t_u8); + t_u16_ptr = alloc_type_pointer(t_u16); + t_u16_multi_ptr = alloc_type_multi_pointer(t_u16); + t_int_ptr = alloc_type_pointer(t_int); + t_i64_ptr = alloc_type_pointer(t_i64); + t_f64_ptr = alloc_type_pointer(t_f64); + t_u8_slice = alloc_type_slice(t_u8); + t_string_slice = alloc_type_slice(t_string); // intrinsics types for objective-c stuff { diff --git a/src/common.cpp b/src/common.cpp index ad1e5a851..b3761fc36 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -669,7 +669,7 @@ gb_internal gb_inline f64 gb_sqrt(f64 x) { gb_internal wchar_t **command_line_to_wargv(wchar_t *cmd_line, int *_argc) { u32 i, j; - u32 len = cast(u32)string16_len(cmd_line); + u32 len = cast(u32)string16_len(cast(u16 *)cmd_line); i = ((len+2)/2)*gb_size_of(void *) + gb_size_of(void *); wchar_t **argv = cast(wchar_t **)GlobalAlloc(GMEM_FIXED, i + (len+2)*gb_size_of(wchar_t)); diff --git a/src/exact_value.cpp b/src/exact_value.cpp index 37751c8f1..f2aed84c2 100644 --- a/src/exact_value.cpp +++ b/src/exact_value.cpp @@ -29,6 +29,7 @@ enum ExactValueKind { ExactValue_Compound = 8, ExactValue_Procedure = 9, ExactValue_Typeid = 10, + ExactValue_String16 = 11, ExactValue_Count, }; @@ -46,6 +47,7 @@ struct ExactValue { Ast * value_compound; Ast * value_procedure; Type * value_typeid; + String16 value_string16; }; }; @@ -66,6 +68,9 @@ gb_internal uintptr hash_exact_value(ExactValue v) { case ExactValue_String: res = gb_fnv32a(v.value_string.text, v.value_string.len); break; + case ExactValue_String16: + res = gb_fnv32a(v.value_string.text, v.value_string.len*gb_size_of(u16)); + break; case ExactValue_Integer: { u32 key = gb_fnv32a(v.value_integer.dp, gb_size_of(*v.value_integer.dp) * v.value_integer.used); @@ -118,6 +123,11 @@ gb_internal ExactValue exact_value_string(String string) { result.value_string = string; return result; } +gb_internal ExactValue exact_value_string16(String16 string) { + ExactValue result = {ExactValue_String16}; + result.value_string16 = string; + return result; +} gb_internal ExactValue exact_value_i64(i64 i) { ExactValue result = {ExactValue_Integer}; @@ -656,6 +666,7 @@ gb_internal i32 exact_value_order(ExactValue const &v) { return 0; case ExactValue_Bool: case ExactValue_String: + case ExactValue_String16: return 1; case ExactValue_Integer: return 2; @@ -689,6 +700,7 @@ gb_internal void match_exact_values(ExactValue *x, ExactValue *y) { case ExactValue_Bool: case ExactValue_String: + case ExactValue_String16: case ExactValue_Quaternion: case ExactValue_Pointer: case ExactValue_Compound: @@ -891,7 +903,18 @@ gb_internal ExactValue exact_binary_operator_value(TokenKind op, ExactValue x, E gb_memmove(data, sx.text, sx.len); gb_memmove(data+sx.len, sy.text, sy.len); return exact_value_string(make_string(data, len)); - break; + } + case ExactValue_String16: { + if (op != Token_Add) goto error; + + // NOTE(bill): How do you minimize this over allocation? + String sx = x.value_string; + String sy = y.value_string; + isize len = sx.len+sy.len; + u16 *data = gb_alloc_array(permanent_allocator(), u16, len); + gb_memmove(data, sx.text, sx.len*gb_size_of(u16)); + gb_memmove(data+sx.len, sy.text, sy.len*gb_size_of(u16)); + return exact_value_string16(make_string16(data, len)); } } @@ -994,6 +1017,19 @@ gb_internal bool compare_exact_values(TokenKind op, ExactValue x, ExactValue y) } break; } + case ExactValue_String16: { + String16 a = x.value_string16; + String16 b = y.value_string16; + switch (op) { + case Token_CmpEq: return a == b; + case Token_NotEq: return a != b; + case Token_Lt: return a < b; + case Token_LtEq: return a <= b; + case Token_Gt: return a > b; + case Token_GtEq: return a >= b; + } + break; + } case ExactValue_Pointer: { switch (op) { @@ -1050,6 +1086,20 @@ gb_internal gbString write_exact_value_to_string(gbString str, ExactValue const gb_free(heap_allocator(), s.text); return str; } + case ExactValue_String16: { + String s = quote_to_ascii(heap_allocator(), v.value_string16); + string_limit = gb_max(string_limit, 36); + if (s.len <= string_limit) { + str = gb_string_append_length(str, s.text, s.len); + } else { + isize n = string_limit/5; + str = gb_string_append_length(str, s.text, n); + str = gb_string_append_fmt(str, "\"..%lld chars..\"", s.len-(2*n)); + str = gb_string_append_length(str, s.text+s.len-n, n); + } + gb_free(heap_allocator(), s.text); + return str; + } case ExactValue_Integer: { String s = big_int_to_string(heap_allocator(), &v.value_integer); str = gb_string_append_length(str, s.text, s.len); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 74aea82f1..fbf0dea11 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -1656,6 +1656,8 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { res.type = t; res.value = llvm_cstring(m, str); return res; + } else if (src->kind == Type_Basic && src->Basic.kind == Basic_string16 && dst->Basic.kind == Basic_cstring16) { + GB_PANIC("TODO(bill): UTF-16 string"); } // if (is_type_float(dst)) { // return value; @@ -1795,6 +1797,38 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { } + + if (is_type_cstring16(src) && is_type_u16_ptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_u16_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_u8_multi_ptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_cstring16(src) && is_type_rawptr(dst)) { + return lb_emit_transmute(p, value, dst); + } + if (is_type_rawptr(src) && is_type_cstring16(dst)) { + return lb_emit_transmute(p, value, dst); + } + + if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) { + TEMPORARY_ALLOCATOR_GUARD(); + + lbValue c = lb_emit_conv(p, value, t_cstring16); + auto args = array_make(temporary_allocator(), 1); + args[0] = c; + lbValue s = lb_emit_runtime_call(p, "cstring16_to_string16", args); + return lb_emit_conv(p, s, dst); + } + + + // integer -> boolean if (is_type_integer(src) && is_type_boolean(dst)) { lbValue res = {}; @@ -2296,6 +2330,14 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return res; } + // []u16 <-> string16 + if (is_type_u16_slice(src) && is_type_string16(dst)) { + return lb_emit_transmute(p, value, t); + } + if (is_type_string16(src) && is_type_u16_slice(dst)) { + return lb_emit_transmute(p, value, t); + } + // []byte/[]u8 <-> string if (is_type_u8_slice(src) && is_type_string(dst)) { return lb_emit_transmute(p, value, t); @@ -2304,6 +2346,7 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return lb_emit_transmute(p, value, t); } + if (is_type_array_like(dst)) { Type *elem = base_array_type(dst); isize index_count = cast(isize)get_array_type_count(dst); @@ -2483,6 +2526,12 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { if (is_type_untyped(src)) { + if (is_type_string(src) && is_type_string16(dst)) { + GB_PANIC("TODO(bill): UTF-16 string"); + lbAddr result = lb_add_local_generated(p, t, false); + lb_addr_store(p, result, value); + return lb_addr_load(p, result); + } if (is_type_string(src) && is_type_string(dst)) { lbAddr result = lb_add_local_generated(p, t, false); lb_addr_store(p, result, value); @@ -3056,6 +3105,13 @@ gb_internal lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, res.value = LLVMBuildIsNotNull(p->builder, x.value, ""); } return res; + case Basic_cstring16: + if (op_kind == Token_CmpEq) { + res.value = LLVMBuildIsNull(p->builder, x.value, ""); + } else if (op_kind == Token_NotEq) { + res.value = LLVMBuildIsNotNull(p->builder, x.value, ""); + } + return res; case Basic_any: { // TODO(bill): is this correct behaviour for nil comparison for any? @@ -4432,6 +4488,22 @@ gb_internal lbAddr lb_build_addr_slice_expr(lbProcedure *p, Ast *expr) { } case Type_Basic: { + if (is_type_string16(type)) { + GB_ASSERT_MSG(are_types_identical(type, t_string16), "got %s", type_to_string(type)); + lbValue len = lb_string_len(p, base); + if (high.value == nullptr) high = len; + + if (!no_indices) { + lb_emit_slice_bounds_check(p, se->open, low, high, len, se->low != nullptr); + } + + lbValue elem = lb_emit_ptr_offset(p, lb_string_elem(p, base), low); + lbValue new_len = lb_emit_arith(p, Token_Sub, high, low, t_int); + + lbAddr str = lb_add_local_generated(p, t_string16, false); + lb_fill_string(p, str, elem, new_len); + return str; + } GB_ASSERT_MSG(are_types_identical(type, t_string), "got %s", type_to_string(type)); lbValue len = lb_string_len(p, base); if (high.value == nullptr) high = len; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 3ce0c725f..d9771a75b 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1812,6 +1812,37 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { return type; } case Basic_cstring: return LLVMPointerType(LLVMInt8TypeInContext(ctx), 0); + + + case Basic_string16: + { + char const *name = "..string16"; + LLVMTypeRef type = LLVMGetTypeByName(m->mod, name); + if (type != nullptr) { + return type; + } + type = LLVMStructCreateNamed(ctx, name); + + if (build_context.metrics.ptr_size < build_context.metrics.int_size) { + GB_ASSERT(build_context.metrics.ptr_size == 4); + GB_ASSERT(build_context.metrics.int_size == 8); + LLVMTypeRef fields[3] = { + LLVMPointerType(lb_type(m, t_u16), 0), + lb_type(m, t_i32), + lb_type(m, t_int), + }; + LLVMStructSetBody(type, fields, 3, false); + } else { + LLVMTypeRef fields[2] = { + LLVMPointerType(lb_type(m, t_u16), 0), + lb_type(m, t_int), + }; + LLVMStructSetBody(type, fields, 2, false); + } + return type; + } + case Basic_cstring16: return LLVMPointerType(LLVMInt16TypeInContext(ctx), 0); + case Basic_any: { char const *name = "..any"; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index e63c92f6f..8f306b771 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2289,6 +2289,10 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } if (is_type_cstring(t)) { return lb_cstring_len(p, v); + } else if (is_type_cstring16(t)) { + return lb_cstring16_len(p, v); + } else if (is_type_string16(t)) { + return lb_string_len(p, v); } else if (is_type_string(t)) { return lb_string_len(p, v); } else if (is_type_array(t)) { @@ -2728,6 +2732,11 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu res = lb_emit_conv(p, res, tv.type); } else if (t->Basic.kind == Basic_cstring) { res = lb_emit_conv(p, x, tv.type); + } else if (t->Basic.kind == Basic_string16) { + res = lb_string_elem(p, x); + res = lb_emit_conv(p, res, tv.type); + } else if (t->Basic.kind == Basic_cstring16) { + res = lb_emit_conv(p, x, tv.type); } break; case Type_Pointer: diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 43c5f0b40..a91d77fe5 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -531,7 +531,33 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ case Basic_cstring: { tag_type = t_type_info_string; - LLVMValueRef vals[1] = { + LLVMValueRef vals[2] = { + lb_const_bool(m, t_bool, true).value, + lb_const_bool(m, t_bool, false).value, + }; + + variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); + } + break; + + case Basic_string16: + { + tag_type = t_type_info_string; + LLVMValueRef vals[2] = { + lb_const_bool(m, t_bool, false).value, + lb_const_bool(m, t_bool, true).value, + }; + + variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); + } + break; + + + case Basic_cstring16: + { + tag_type = t_type_info_string; + LLVMValueRef vals[2] = { + lb_const_bool(m, t_bool, true).value, lb_const_bool(m, t_bool, true).value, }; diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 521553147..d4117b7ff 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1626,11 +1626,17 @@ gb_internal void lb_fill_string(lbProcedure *p, lbAddr const &string, lbValue ba gb_internal lbValue lb_string_elem(lbProcedure *p, lbValue string) { Type *t = base_type(string.type); + if (t->kind == Type_Basic && t->Basic.kind == Basic_string16) { + return lb_emit_struct_ev(p, string, 0); + } GB_ASSERT(t->kind == Type_Basic && t->Basic.kind == Basic_string); return lb_emit_struct_ev(p, string, 0); } gb_internal lbValue lb_string_len(lbProcedure *p, lbValue string) { Type *t = base_type(string.type); + if (t->kind == Type_Basic && t->Basic.kind == Basic_string16) { + return lb_emit_struct_ev(p, string, 1); + } GB_ASSERT_MSG(t->kind == Type_Basic && t->Basic.kind == Basic_string, "%s", type_to_string(t)); return lb_emit_struct_ev(p, string, 1); } @@ -1641,6 +1647,12 @@ gb_internal lbValue lb_cstring_len(lbProcedure *p, lbValue value) { args[0] = lb_emit_conv(p, value, t_cstring); return lb_emit_runtime_call(p, "cstring_len", args); } +gb_internal lbValue lb_cstring16_len(lbProcedure *p, lbValue value) { + GB_ASSERT(is_type_cstring16(value.type)); + auto args = array_make(permanent_allocator(), 1); + args[0] = lb_emit_conv(p, value, t_cstring16); + return lb_emit_runtime_call(p, "cstring16_len", args); +} gb_internal lbValue lb_array_elem(lbProcedure *p, lbValue array_ptr) { diff --git a/src/main.cpp b/src/main.cpp index 112d1208a..5a43e3c02 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -142,9 +142,9 @@ gb_internal i32 system_exec_command_line_app_internal(bool exit_on_err, char con } wcmd = string_to_string16(permanent_allocator(), make_string(cast(u8 *)cmd_line, cmd_len-1)); - if (CreateProcessW(nullptr, wcmd.text, - nullptr, nullptr, true, 0, nullptr, nullptr, - &start_info, &pi)) { + if (CreateProcessW(nullptr, cast(wchar_t *)wcmd.text, + nullptr, nullptr, true, 0, nullptr, nullptr, + &start_info, &pi)) { WaitForSingleObject(pi.hProcess, INFINITE); GetExitCodeProcess(pi.hProcess, cast(DWORD *)&exit_code); @@ -232,7 +232,7 @@ gb_internal Array setup_args(int argc, char const **argv) { wchar_t **wargv = command_line_to_wargv(GetCommandLineW(), &wargc); auto args = array_make(a, 0, wargc); for (isize i = 0; i < wargc; i++) { - wchar_t *warg = wargv[i]; + u16 *warg = cast(u16 *)wargv[i]; isize wlen = string16_len(warg); String16 wstr = make_string16(warg, wlen); String arg = string16_to_string(a, wstr); diff --git a/src/microsoft_craziness.h b/src/microsoft_craziness.h index b0fd22a23..933607a2a 100644 --- a/src/microsoft_craziness.h +++ b/src/microsoft_craziness.h @@ -59,7 +59,7 @@ struct Find_Result { }; gb_internal String mc_wstring_to_string(wchar_t const *str) { - return string16_to_string(mc_allocator, make_string16_c(str)); + return string16_to_string(mc_allocator, make_string16_c(cast(u16 *)str)); } gb_internal String16 mc_string_to_wstring(String str) { @@ -103,7 +103,7 @@ gb_internal HANDLE mc_find_first(String wildcard, MC_Find_Data *find_data) { String16 wildcard_wide = mc_string_to_wstring(wildcard); defer (mc_free(wildcard_wide)); - HANDLE handle = FindFirstFileW(wildcard_wide.text, &_find_data); + HANDLE handle = FindFirstFileW(cast(wchar_t *)wildcard_wide.text, &_find_data); if (handle == INVALID_HANDLE_VALUE) return INVALID_HANDLE_VALUE; find_data->file_attributes = _find_data.dwFileAttributes; diff --git a/src/path.cpp b/src/path.cpp index d5e982088..2b97a04df 100644 --- a/src/path.cpp +++ b/src/path.cpp @@ -130,7 +130,7 @@ gb_internal String directory_from_path(String const &s) { String16 wstr = string_to_string16(a, path); defer (gb_free(a, wstr.text)); - i32 attribs = GetFileAttributesW(wstr.text); + i32 attribs = GetFileAttributesW(cast(wchar_t *)wstr.text); if (attribs < 0) return false; return (attribs & FILE_ATTRIBUTE_DIRECTORY) != 0; @@ -360,7 +360,7 @@ gb_internal ReadDirectoryError read_directory(String path, Array *fi) defer (gb_free(a, wstr.text)); WIN32_FIND_DATAW file_data = {}; - HANDLE find_file = FindFirstFileW(wstr.text, &file_data); + HANDLE find_file = FindFirstFileW(cast(wchar_t *)wstr.text, &file_data); if (find_file == INVALID_HANDLE_VALUE) { return ReadDirectory_Unknown; } @@ -372,7 +372,7 @@ gb_internal ReadDirectoryError read_directory(String path, Array *fi) wchar_t *filename_w = file_data.cFileName; u64 size = cast(u64)file_data.nFileSizeLow; size |= (cast(u64)file_data.nFileSizeHigh) << 32; - String name = string16_to_string(a, make_string16_c(filename_w)); + String name = string16_to_string(a, make_string16_c(cast(u16 *)filename_w)); if (name == "." || name == "..") { gb_free(a, name.text); continue; @@ -494,7 +494,7 @@ gb_internal bool write_directory(String path) { #else gb_internal bool write_directory(String path) { String16 wstr = string_to_string16(heap_allocator(), path); - LPCWSTR wdirectory_name = wstr.text; + LPCWSTR wdirectory_name = cast(wchar_t *)wstr.text; HANDLE directory = CreateFileW(wdirectory_name, GENERIC_WRITE, diff --git a/src/string.cpp b/src/string.cpp index ae8d066b1..8405938f4 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -26,15 +26,14 @@ struct String_Iterator { // NOTE(bill): String16 is only used for Windows due to its file directories struct String16 { - wchar_t *text; - isize len; - wchar_t const &operator[](isize i) const { + u16 * text; + isize len; + u16 const &operator[](isize i) const { GB_ASSERT_MSG(0 <= i && i < len, "[%td]", i); return text[i]; } }; - gb_internal gb_inline String make_string(u8 const *text, isize len) { String s; s.text = cast(u8 *)text; @@ -45,19 +44,19 @@ gb_internal gb_inline String make_string(u8 const *text, isize len) { return s; } - -gb_internal gb_inline String16 make_string16(wchar_t const *text, isize len) { +gb_internal gb_inline String16 make_string16(u16 const *text, isize len) { String16 s; - s.text = cast(wchar_t *)text; + s.text = cast(u16 *)text; s.len = len; return s; } -gb_internal isize string16_len(wchar_t const *s) { + +gb_internal isize string16_len(u16 const *s) { if (s == nullptr) { return 0; } - wchar_t const *p = s; + u16 const *p = s; while (*p) { p++; } @@ -69,7 +68,7 @@ gb_internal gb_inline String make_string_c(char const *text) { return make_string(cast(u8 *)cast(void *)text, gb_strlen(text)); } -gb_internal gb_inline String16 make_string16_c(wchar_t const *text) { +gb_internal gb_inline String16 make_string16_c(u16 const *text) { return make_string16(text, string16_len(text)); } @@ -145,6 +144,27 @@ gb_internal int string_compare(String const &a, String const &b) { return res; } + +gb_internal int string16_compare(String16 const &a, String16 const &b) { + if (a.text == b.text) { + return cast(int)(a.len - b.len); + } + if (a.text == nullptr) { + return -1; + } + if (b.text == nullptr) { + return +1; + } + + uintptr n = gb_min(a.len, b.len); + int res = memcmp(a.text, b.text, n*gb_size_of(u16)); + if (res == 0) { + res = cast(int)(a.len - b.len); + } + return res; +} + + gb_internal isize string_index_byte(String const &s, u8 x) { for (isize i = 0; i < s.len; i++) { if (s.text[i] == x) { @@ -182,6 +202,26 @@ template gb_internal bool operator >= (String const &a, char const (&b template <> bool operator == (String const &a, char const (&b)[1]) { return a.len == 0; } template <> bool operator != (String const &a, char const (&b)[1]) { return a.len != 0; } + +gb_internal gb_inline bool str_eq(String16 const &a, String16 const &b) { + if (a.len != b.len) return false; + if (a.len == 0) return true; + return memcmp(a.text, b.text, a.len) == 0; +} +gb_internal gb_inline bool str_ne(String16 const &a, String16 const &b) { return !str_eq(a, b); } +gb_internal gb_inline bool str_lt(String16 const &a, String16 const &b) { return string16_compare(a, b) < 0; } +gb_internal gb_inline bool str_gt(String16 const &a, String16 const &b) { return string16_compare(a, b) > 0; } +gb_internal gb_inline bool str_le(String16 const &a, String16 const &b) { return string16_compare(a, b) <= 0; } +gb_internal gb_inline bool str_ge(String16 const &a, String16 const &b) { return string16_compare(a, b) >= 0; } + +gb_internal gb_inline bool operator == (String16 const &a, String16 const &b) { return str_eq(a, b); } +gb_internal gb_inline bool operator != (String16 const &a, String16 const &b) { return str_ne(a, b); } +gb_internal gb_inline bool operator < (String16 const &a, String16 const &b) { return str_lt(a, b); } +gb_internal gb_inline bool operator > (String16 const &a, String16 const &b) { return str_gt(a, b); } +gb_internal gb_inline bool operator <= (String16 const &a, String16 const &b) { return str_le(a, b); } +gb_internal gb_inline bool operator >= (String16 const &a, String16 const &b) { return str_ge(a, b); } + + gb_internal gb_inline bool string_starts_with(String const &s, String const &prefix) { if (prefix.len > s.len) { return false; @@ -614,7 +654,7 @@ gb_internal String normalize_path(gbAllocator a, String const &path, String cons // TODO(bill): Make this non-windows specific gb_internal String16 string_to_string16(gbAllocator a, String s) { int len, len1; - wchar_t *text; + u16 *text; if (s.len < 1) { return make_string16(nullptr, 0); @@ -625,9 +665,9 @@ gb_internal String16 string_to_string16(gbAllocator a, String s) { return make_string16(nullptr, 0); } - text = gb_alloc_array(a, wchar_t, len+1); + text = gb_alloc_array(a, u16, len+1); - len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, text, cast(int)len); + len1 = convert_multibyte_to_widechar(cast(char *)s.text, cast(int)s.len, cast(wchar_t *)text, cast(int)len); if (len1 == 0) { gb_free(a, text); return make_string16(nullptr, 0); @@ -646,7 +686,7 @@ gb_internal String string16_to_string(gbAllocator a, String16 s) { return make_string(nullptr, 0); } - len = convert_widechar_to_multibyte(s.text, cast(int)s.len, nullptr, 0); + len = convert_widechar_to_multibyte(cast(wchar_t *)s.text, cast(int)s.len, nullptr, 0); if (len == 0) { return make_string(nullptr, 0); } @@ -654,7 +694,7 @@ gb_internal String string16_to_string(gbAllocator a, String16 s) { text = gb_alloc_array(a, u8, len+1); - len1 = convert_widechar_to_multibyte(s.text, cast(int)s.len, cast(char *)text, cast(int)len); + len1 = convert_widechar_to_multibyte(cast(wchar_t *)s.text, cast(int)s.len, cast(char *)text, cast(int)len); if (len1 == 0) { gb_free(a, text); return make_string(nullptr, 0); @@ -674,9 +714,9 @@ gb_internal String temporary_directory(gbAllocator allocator) { return String{0}; } DWORD len = gb_max(MAX_PATH, n); - wchar_t *b = gb_alloc_array(heap_allocator(), wchar_t, len+1); + u16 *b = gb_alloc_array(heap_allocator(), u16, len+1); defer (gb_free(heap_allocator(), b)); - n = GetTempPathW(len, b); + n = GetTempPathW(len, cast(wchar_t *)b); if (n == 3 && b[1] == ':' && b[2] == '\\') { } else if (n > 0 && b[n-1] == '\\') { @@ -791,6 +831,104 @@ gb_internal String quote_to_ascii(gbAllocator a, String str, u8 quote='"') { return res; } +gb_internal Rune decode_surrogate_pair(u16 r1, u16 r2) { + static Rune const _surr1 = 0xd800; + static Rune const _surr2 = 0xdc00; + static Rune const _surr3 = 0xe000; + static Rune const _surr_self = 0x10000; + + if (_surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3) { + return (((r1-_surr1)<<10) | (r2 - _surr2)) + _surr_self; + } + return GB_RUNE_INVALID; +} + +gb_internal String quote_to_ascii(gbAllocator a, String16 str, u8 quote='"') { + static Rune const _surr1 = 0xd800; + static Rune const _surr2 = 0xdc00; + static Rune const _surr3 = 0xe000; + static Rune const _surr_self = 0x10000; + + u16 *s = cast(u16 *)str.text; + isize n = str.len; + auto buf = array_make(a, 0, n*2); + array_add(&buf, quote); + for (isize width = 0; n > 0; s += width, n -= width) { + Rune r = cast(Rune)s[0]; + width = 1; + if (r < _surr1 || _surr3 <= r) { + r = cast(Rune)r; + } else if (_surr1 <= r && r < _surr2) { + if (n>1) { + r = decode_surrogate_pair(s[0], s[1]); + if (r != GB_RUNE_INVALID) { + width = 2; + } + } else { + r = GB_RUNE_INVALID; + } + } + if (width == 1 && r == GB_RUNE_INVALID) { + array_add(&buf, cast(u8)'\\'); + array_add(&buf, cast(u8)'x'); + array_add(&buf, cast(u8)lower_hex[s[0]>>4]); + array_add(&buf, cast(u8)lower_hex[s[0]&0xf]); + continue; + } + + if (r == quote || r == '\\') { + array_add(&buf, cast(u8)'\\'); + array_add(&buf, u8(r)); + continue; + } + if (r < 0x80 && is_printable(r)) { + array_add(&buf, u8(r)); + continue; + } + switch (r) { + case '\a': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + default: + if (r < ' ') { + u8 b = cast(u8)r; + array_add(&buf, cast(u8)'\\'); + array_add(&buf, cast(u8)'x'); + array_add(&buf, cast(u8)lower_hex[b>>4]); + array_add(&buf, cast(u8)lower_hex[b&0xf]); + } + if (r > GB_RUNE_MAX) { + r = 0XFFFD; + } + if (r < 0x10000) { + array_add(&buf, cast(u8)'\\'); + array_add(&buf, cast(u8)'u'); + for (isize i = 12; i >= 0; i -= 4) { + array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]); + } + } else { + array_add(&buf, cast(u8)'\\'); + array_add(&buf, cast(u8)'U'); + for (isize i = 28; i >= 0; i -= 4) { + array_add(&buf, cast(u8)lower_hex[(r>>i)&0xf]); + } + } + } + } + + + + array_add(&buf, quote); + String res = {}; + res.text = buf.data; + res.len = buf.count; + return res; +} + diff --git a/src/types.cpp b/src/types.cpp index 2e696810d..ad576c8af 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -41,8 +41,13 @@ enum BasicKind { Basic_uint, Basic_uintptr, Basic_rawptr, - Basic_string, // ^u8 + int - Basic_cstring, // ^u8 + + Basic_string, // [^]u8 + int + Basic_cstring, // [^]u8 + + Basic_string16, // [^]u16 + int + Basic_cstring16, // [^]u16 + int + Basic_any, // rawptr + ^Type_Info Basic_typeid, @@ -500,8 +505,14 @@ gb_global Type basic_types[] = { {Type_Basic, {Basic_uintptr, BasicFlag_Integer | BasicFlag_Unsigned, -1, STR_LIT("uintptr")}}, {Type_Basic, {Basic_rawptr, BasicFlag_Pointer, -1, STR_LIT("rawptr")}}, + {Type_Basic, {Basic_string, BasicFlag_String, -1, STR_LIT("string")}}, {Type_Basic, {Basic_cstring, BasicFlag_String, -1, STR_LIT("cstring")}}, + + {Type_Basic, {Basic_string16, BasicFlag_String, -1, STR_LIT("string16")}}, + {Type_Basic, {Basic_cstring16, BasicFlag_String, -1, STR_LIT("cstring16")}}, + + {Type_Basic, {Basic_any, 0, 16, STR_LIT("any")}}, {Type_Basic, {Basic_typeid, 0, 8, STR_LIT("typeid")}}, @@ -591,8 +602,12 @@ gb_global Type *t_uint = &basic_types[Basic_uint]; gb_global Type *t_uintptr = &basic_types[Basic_uintptr]; gb_global Type *t_rawptr = &basic_types[Basic_rawptr]; + gb_global Type *t_string = &basic_types[Basic_string]; gb_global Type *t_cstring = &basic_types[Basic_cstring]; +gb_global Type *t_string16 = &basic_types[Basic_string16]; +gb_global Type *t_cstring16 = &basic_types[Basic_cstring16]; + gb_global Type *t_any = &basic_types[Basic_any]; gb_global Type *t_typeid = &basic_types[Basic_typeid]; @@ -630,6 +645,8 @@ gb_global Type *t_untyped_uninit = &basic_types[Basic_UntypedUninit]; gb_global Type *t_u8_ptr = nullptr; gb_global Type *t_u8_multi_ptr = nullptr; +gb_global Type *t_u16_ptr = nullptr; +gb_global Type *t_u16_multi_ptr = nullptr; gb_global Type *t_int_ptr = nullptr; gb_global Type *t_i64_ptr = nullptr; gb_global Type *t_f64_ptr = nullptr; @@ -1292,6 +1309,14 @@ gb_internal bool is_type_string(Type *t) { } return false; } +gb_internal bool is_type_string16(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Basic) { + return t->Basic.kind == Basic_string16; + } + return false; +} gb_internal bool is_type_cstring(Type *t) { t = base_type(t); if (t == nullptr) { return false; } @@ -1300,6 +1325,14 @@ gb_internal bool is_type_cstring(Type *t) { } return false; } +gb_internal bool is_type_cstring16(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Basic) { + return t->Basic.kind == Basic_cstring16; + } + return false; +} gb_internal bool is_type_typed(Type *t) { t = base_type(t); if (t == nullptr) { return false; } @@ -1429,6 +1462,12 @@ gb_internal bool is_type_u8(Type *t) { } return false; } +gb_internal bool is_type_u16(Type *t) { + if (t->kind == Type_Basic) { + return t->Basic.kind == Basic_u16; + } + return false; +} gb_internal bool is_type_array(Type *t) { t = base_type(t); if (t == nullptr) { return false; } @@ -1690,6 +1729,39 @@ gb_internal bool is_type_rune_array(Type *t) { return false; } +gb_internal bool is_type_u16_slice(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Slice) { + return is_type_u16(t->Slice.elem); + } + return false; +} +gb_internal bool is_type_u16_array(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Array) { + return is_type_u16(t->Array.elem); + } + return false; +} +gb_internal bool is_type_u16_ptr(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_Pointer) { + return is_type_u16(t->Slice.elem); + } + return false; +} +gb_internal bool is_type_u16_multi_ptr(Type *t) { + t = base_type(t); + if (t == nullptr) { return false; } + if (t->kind == Type_MultiPointer) { + return is_type_u16(t->Slice.elem); + } + return false; +} + gb_internal bool is_type_array_like(Type *t) { return is_type_array(t) || is_type_enumerated_array(t); @@ -2109,7 +2181,7 @@ gb_internal bool is_type_indexable(Type *t) { Type *bt = base_type(t); switch (bt->kind) { case Type_Basic: - return bt->Basic.kind == Basic_string; + return bt->Basic.kind == Basic_string || bt->Basic.kind == Basic_string16; case Type_Array: case Type_Slice: case Type_DynamicArray: @@ -2129,7 +2201,7 @@ gb_internal bool is_type_sliceable(Type *t) { Type *bt = base_type(t); switch (bt->kind) { case Type_Basic: - return bt->Basic.kind == Basic_string; + return bt->Basic.kind == Basic_string || bt->Basic.kind == Basic_string16; case Type_Array: case Type_Slice: case Type_DynamicArray: @@ -2376,6 +2448,7 @@ gb_internal bool type_has_nil(Type *t) { case Basic_any: return true; case Basic_cstring: + case Basic_cstring16: return true; case Basic_typeid: return true; @@ -2443,8 +2516,9 @@ gb_internal bool is_type_comparable(Type *t) { case Basic_rune: return true; case Basic_string: - return true; case Basic_cstring: + case Basic_string16: + case Basic_cstring16: return true; case Basic_typeid: return true; @@ -3774,10 +3848,12 @@ gb_internal i64 type_size_of(Type *t) { if (t->kind == Type_Basic) { GB_ASSERT_MSG(is_type_typed(t), "%s", type_to_string(t)); switch (t->Basic.kind) { - case Basic_string: size = 2*build_context.int_size; break; - case Basic_cstring: size = build_context.ptr_size; break; - case Basic_any: size = 16; break; - case Basic_typeid: size = 8; break; + case Basic_string: size = 2*build_context.int_size; break; + case Basic_cstring: size = build_context.ptr_size; break; + case Basic_string16: size = 2*build_context.int_size; break; + case Basic_cstring16: size = build_context.ptr_size; break; + case Basic_any: size = 16; break; + case Basic_typeid: size = 8; break; case Basic_int: case Basic_uint: size = build_context.int_size; @@ -3837,10 +3913,12 @@ gb_internal i64 type_align_of_internal(Type *t, TypePath *path) { case Type_Basic: { GB_ASSERT(is_type_typed(t)); switch (t->Basic.kind) { - case Basic_string: return build_context.int_size; - case Basic_cstring: return build_context.ptr_size; - case Basic_any: return 8; - case Basic_typeid: return 8; + case Basic_string: return build_context.int_size; + case Basic_cstring: return build_context.ptr_size; + case Basic_string16: return build_context.int_size; + case Basic_cstring16: return build_context.ptr_size; + case Basic_any: return 8; + case Basic_typeid: return 8; case Basic_int: case Basic_uint: return build_context.int_size; @@ -4088,10 +4166,12 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) { return size; } switch (kind) { - case Basic_string: return 2*build_context.int_size; - case Basic_cstring: return build_context.ptr_size; - case Basic_any: return 16; - case Basic_typeid: return 8; + case Basic_string: return 2*build_context.int_size; + case Basic_cstring: return build_context.ptr_size; + case Basic_string16: return 2*build_context.int_size; + case Basic_cstring16: return build_context.ptr_size; + case Basic_any: return 16; + case Basic_typeid: return 8; case Basic_int: case Basic_uint: return build_context.int_size; @@ -4320,6 +4400,15 @@ gb_internal i64 type_offset_of(Type *t, i64 index, Type **field_type_) { if (field_type_) *field_type_ = t_int; return build_context.int_size; // len } + } else if (t->Basic.kind == Basic_string16) { + switch (index) { + case 0: + if (field_type_) *field_type_ = t_u16_ptr; + return 0; // data + case 1: + if (field_type_) *field_type_ = t_int; + return build_context.int_size; // len + } } else if (t->Basic.kind == Basic_any) { switch (index) { case 0: @@ -4396,6 +4485,11 @@ gb_internal i64 type_offset_of_from_selection(Type *type, Selection sel) { case 0: t = t_rawptr; break; case 1: t = t_int; break; } + } else if (t->Basic.kind == Basic_string16) { + switch (index) { + case 0: t = t_rawptr; break; + case 1: t = t_int; break; + } } else if (t->Basic.kind == Basic_any) { switch (index) { case 0: t = t_rawptr; break; @@ -4637,6 +4731,11 @@ gb_internal Type *type_internal_index(Type *t, isize index) { GB_ASSERT(index == 0 || index == 1); return index == 0 ? t_u8_ptr : t_int; } + case Basic_string16: + { + GB_ASSERT(index == 0 || index == 1); + return index == 0 ? t_u16_ptr : t_int; + } case Basic_any: { GB_ASSERT(index == 0 || index == 1);