Add string16 and cstring16 (UTF-16 based strings)

This commit is contained in:
gingerBill
2025-08-02 11:00:15 +01:00
parent 710203eadb
commit 2561427dd3
25 changed files with 873 additions and 62 deletions

View File

@@ -1656,6 +1656,8 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
res.type = t;
res.value = llvm_cstring(m, str);
return res;
} else if (src->kind == Type_Basic && src->Basic.kind == Basic_string16 && dst->Basic.kind == Basic_cstring16) {
GB_PANIC("TODO(bill): UTF-16 string");
}
// if (is_type_float(dst)) {
// return value;
@@ -1795,6 +1797,38 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
}
if (is_type_cstring16(src) && is_type_u16_ptr(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_u16_ptr(src) && is_type_cstring16(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_cstring16(src) && is_type_u16_multi_ptr(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_u8_multi_ptr(src) && is_type_cstring16(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_cstring16(src) && is_type_rawptr(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (is_type_rawptr(src) && is_type_cstring16(dst)) {
return lb_emit_transmute(p, value, dst);
}
if (are_types_identical(src, t_cstring16) && are_types_identical(dst, t_string16)) {
TEMPORARY_ALLOCATOR_GUARD();
lbValue c = lb_emit_conv(p, value, t_cstring16);
auto args = array_make<lbValue>(temporary_allocator(), 1);
args[0] = c;
lbValue s = lb_emit_runtime_call(p, "cstring16_to_string16", args);
return lb_emit_conv(p, s, dst);
}
// integer -> boolean
if (is_type_integer(src) && is_type_boolean(dst)) {
lbValue res = {};
@@ -2296,6 +2330,14 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
return res;
}
// []u16 <-> string16
if (is_type_u16_slice(src) && is_type_string16(dst)) {
return lb_emit_transmute(p, value, t);
}
if (is_type_string16(src) && is_type_u16_slice(dst)) {
return lb_emit_transmute(p, value, t);
}
// []byte/[]u8 <-> string
if (is_type_u8_slice(src) && is_type_string(dst)) {
return lb_emit_transmute(p, value, t);
@@ -2304,6 +2346,7 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
return lb_emit_transmute(p, value, t);
}
if (is_type_array_like(dst)) {
Type *elem = base_array_type(dst);
isize index_count = cast(isize)get_array_type_count(dst);
@@ -2483,6 +2526,12 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
if (is_type_untyped(src)) {
if (is_type_string(src) && is_type_string16(dst)) {
GB_PANIC("TODO(bill): UTF-16 string");
lbAddr result = lb_add_local_generated(p, t, false);
lb_addr_store(p, result, value);
return lb_addr_load(p, result);
}
if (is_type_string(src) && is_type_string(dst)) {
lbAddr result = lb_add_local_generated(p, t, false);
lb_addr_store(p, result, value);
@@ -3056,6 +3105,13 @@ gb_internal lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind,
res.value = LLVMBuildIsNotNull(p->builder, x.value, "");
}
return res;
case Basic_cstring16:
if (op_kind == Token_CmpEq) {
res.value = LLVMBuildIsNull(p->builder, x.value, "");
} else if (op_kind == Token_NotEq) {
res.value = LLVMBuildIsNotNull(p->builder, x.value, "");
}
return res;
case Basic_any:
{
// TODO(bill): is this correct behaviour for nil comparison for any?
@@ -4432,6 +4488,22 @@ gb_internal lbAddr lb_build_addr_slice_expr(lbProcedure *p, Ast *expr) {
}
case Type_Basic: {
if (is_type_string16(type)) {
GB_ASSERT_MSG(are_types_identical(type, t_string16), "got %s", type_to_string(type));
lbValue len = lb_string_len(p, base);
if (high.value == nullptr) high = len;
if (!no_indices) {
lb_emit_slice_bounds_check(p, se->open, low, high, len, se->low != nullptr);
}
lbValue elem = lb_emit_ptr_offset(p, lb_string_elem(p, base), low);
lbValue new_len = lb_emit_arith(p, Token_Sub, high, low, t_int);
lbAddr str = lb_add_local_generated(p, t_string16, false);
lb_fill_string(p, str, elem, new_len);
return str;
}
GB_ASSERT_MSG(are_types_identical(type, t_string), "got %s", type_to_string(type));
lbValue len = lb_string_len(p, base);
if (high.value == nullptr) high = len;