From d3ea334e7ab2897bbc948acc57aa9ba073304215 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 28 Feb 2018 11:20:11 +0000 Subject: [PATCH] `cstring` --- core/_preload.odin | 32 +++++++++++++++++++++++--------- core/fmt.odin | 10 +++++++++- examples/demo.odin | 18 ++++++++++++++++++ src/check_expr.cpp | 16 ++++++++++++++-- src/ir.cpp | 29 ++++++++++++++++++++++++++++- src/ir_print.cpp | 15 +++++++++++++++ src/types.cpp | 35 ++++++++++++++++++++++++++++------- 7 files changed, 135 insertions(+), 20 deletions(-) diff --git a/core/_preload.odin b/core/_preload.odin index 03a29b5b7..2e29ecaa8 100644 --- a/core/_preload.odin +++ b/core/_preload.odin @@ -41,15 +41,15 @@ Type_Info_Enum_Value :: union { }; // Variant Types -Type_Info_Named :: struct {name: string, base: ^Type_Info}; -Type_Info_Integer :: struct {signed: bool}; -Type_Info_Rune :: struct{}; -Type_Info_Float :: struct{}; -Type_Info_Complex :: struct{}; -Type_Info_String :: struct{}; -Type_Info_Boolean :: struct{}; -Type_Info_Any :: struct{}; -Type_Info_Pointer :: struct { +Type_Info_Named :: struct {name: string, base: ^Type_Info}; +Type_Info_Integer :: struct {signed: bool}; +Type_Info_Rune :: struct {}; +Type_Info_Float :: struct {}; +Type_Info_Complex :: struct {}; +Type_Info_String :: struct {is_cstring: bool}; +Type_Info_Boolean :: struct {}; +Type_Info_Any :: struct {}; +Type_Info_Pointer :: struct { elem: ^Type_Info // nil -> rawptr }; Type_Info_Procedure :: struct { @@ -863,6 +863,20 @@ __string_gt :: inline proc "contextless" (a, b: string) -> bool { return __strin __string_le :: inline proc "contextless" (a, b: string) -> bool { return __string_cmp(a, b) <= 0; } __string_ge :: inline proc "contextless" (a, b: string) -> bool { return __string_cmp(a, b) >= 0; } +__cstring_len :: proc "contextless" (s: cstring) -> int { + n := 0; + for p := (^byte)(s); p != nil && p^ != 0; p += 1 { + n += 1; + } + return n; +} + +__cstring_to_string :: proc "contextless" (s: cstring) -> string { + ptr := (^byte)(s); + n := __cstring_len(s); + return transmute(string)raw.String{ptr, n}; +} + __complex64_eq :: inline proc "contextless" (a, b: complex64) -> bool { return real(a) == real(b) && imag(a) == imag(b); } __complex64_ne :: inline proc "contextless" (a, b: complex64) -> bool { return real(a) != real(b) || imag(a) != imag(b); } diff --git a/core/fmt.odin b/core/fmt.odin index 201cdd36b..50c083e9b 100644 --- a/core/fmt.odin +++ b/core/fmt.odin @@ -178,7 +178,11 @@ write_type :: proc(buf: ^String_Buffer, ti: ^Type_Info) { write_string(buf, "complex"); write_i64(buf, i64(8*ti.size), 10); case Type_Info_String: - write_string(buf, "string"); + if info.is_cstring { + write_string(buf, "cstring"); + } else { + write_string(buf, "string"); + } case Type_Info_Boolean: a := any{type_info = ti}; switch _ in a { @@ -599,6 +603,9 @@ fmt_string :: proc(fi: ^Fmt_Info, s: string, verb: rune) { fmt_bad_verb(fi, verb); } } +fmt_cstring :: proc(fi: ^Fmt_Info, s: cstring, verb: rune) { + fmt_string(fi, string(s), verb); +} fmt_pointer :: proc(fi: ^Fmt_Info, p: rawptr, verb: rune) { switch verb { @@ -974,6 +981,7 @@ fmt_arg :: proc(fi: ^Fmt_Info, arg: any, verb: rune) { case uintptr: fmt_int(fi, u64(a), false, 8*size_of(uintptr), verb); case string: fmt_string(fi, a, verb); + case cstring: fmt_cstring(fi, a, verb); case: fmt_value(fi, arg, verb); } diff --git a/examples/demo.odin b/examples/demo.odin index 466eb625e..3be85725b 100644 --- a/examples/demo.odin +++ b/examples/demo.odin @@ -761,6 +761,23 @@ complete_switch :: proc() { } +cstring_example :: proc() { + W :: "Hellope"; + X :: cstring(W); + Y :: string(X); + + w := W; + x: cstring = X; + y: string = Y; + z := string(x); + fmt.println(x, y, z); + fmt.println(len(x), len(y), len(z)); + fmt.println(len(W), len(X), len(Y)); + // IMPORTANT NOTE for cstring variables + // len(cstring) is O(N) + // cast(cstring)string is O(N) +} + main :: proc() { when true { general_stuff(); @@ -774,5 +791,6 @@ main :: proc() { enum_export(); explicit_procedure_overloading(); complete_switch(); + cstring_example(); } } diff --git a/src/check_expr.cpp b/src/check_expr.cpp index ec6f95b14..7ad75ac48 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -1786,7 +1786,19 @@ bool check_is_castable_to(Checker *c, Operand *operand, Type *y) { return true; // } } + // cstring -> string + if (src == t_cstring && dst == t_string) { + return true; + } + // cstring -> ^u8 + if (src == t_cstring && is_type_u8_ptr(dst)) { + return true; + } + // ^u8 -> cstring + if (is_type_u8_ptr(src) && dst == t_cstring) { + return true; + } // proc <-> proc if (is_type_proc(src) && is_type_proc(dst)) { return true; @@ -5005,7 +5017,7 @@ bool check_set_index_data(Operand *o, Type *type, bool indirection, i64 *max_cou switch (t->kind) { case Type_Basic: - if (is_type_string(t)) { + if (t->Basic.kind == Basic_string) { if (o->mode == Addressing_Constant) { *max_count = o->value.value_string.len; } @@ -5904,7 +5916,7 @@ ExprKind check_expr_base_internal(Checker *c, Operand *o, AstNode *node, Type *t Type *t = base_type(type_deref(o->type)); switch (t->kind) { case Type_Basic: - if (is_type_string(t)) { + if (t->Basic.kind == Basic_string) { valid = true; if (o->mode == Addressing_Constant) { max_count = o->value.value_string.len; diff --git a/src/ir.cpp b/src/ir.cpp index a1b3ca8ee..6ab3c9ef7 100644 --- a/src/ir.cpp +++ b/src/ir.cpp @@ -2890,6 +2890,14 @@ irValue *ir_string_len(irProcedure *proc, irValue *string) { return ir_emit_struct_ev(proc, string, 1); } +irValue *ir_cstring_len(irProcedure *proc, irValue *value) { + GB_ASSERT(is_type_cstring(ir_type(value))); + auto args = array_make(proc->module->allocator, 1); + args[0] = ir_emit_conv(proc, value, t_cstring); + return ir_emit_global_call(proc, "__cstring_len", args); +} + + void ir_fill_slice(irProcedure *proc, irValue *slice_ptr, irValue *data, irValue *len) { Type *t = ir_type(slice_ptr); @@ -3122,6 +3130,18 @@ irValue *ir_emit_conv(irProcedure *proc, irValue *value, Type *t) { return ir_emit(proc, ir_instr_conv(proc, irConv_zext, b, t_llvm_bool, t)); } + if (src == t_cstring && is_type_u8_ptr(dst)) { + return ir_emit_bitcast(proc, value, dst); + } + + if (src == t_cstring && dst == t_string) { + irValue *c = ir_emit_conv(proc, value, t_cstring); + auto args = array_make(proc->module->allocator, 1); + args[0] = c; + irValue *s = ir_emit_global_call(proc, "__cstring_to_string", args); + return ir_emit_conv(proc, s, dst); + } + // integer -> boolean if (is_type_integer(src) && is_type_boolean(dst)) { @@ -4171,7 +4191,9 @@ irValue *ir_build_builtin_proc(irProcedure *proc, AstNode *expr, TypeAndValue tv v = ir_emit_load(proc, v); t = type_deref(t); } - if (is_type_string(t)) { + if (is_type_cstring(t)) { + return ir_cstring_len(proc, v); + } else if (is_type_string(t)) { return ir_string_len(proc, v); } else if (is_type_array(t)) { GB_PANIC("Array lengths are constant"); @@ -7902,6 +7924,11 @@ void ir_setup_type_info_data(irProcedure *proc) { // NOTE(bill): Setup type_info tag = ir_emit_conv(proc, variant_ptr, t_type_info_string_ptr); break; + case Basic_cstring: + tag = ir_emit_conv(proc, variant_ptr, t_type_info_string_ptr); + ir_emit_store(proc, ir_emit_struct_ep(proc, tag, 0), v_true); // is_cstring + break; + case Basic_any: tag = ir_emit_conv(proc, variant_ptr, t_type_info_any_ptr); break; diff --git a/src/ir_print.cpp b/src/ir_print.cpp index eaf7e0477..632d9c0e1 100644 --- a/src/ir_print.cpp +++ b/src/ir_print.cpp @@ -340,6 +340,7 @@ void ir_print_type(irFileBuffer *f, irModule *m, Type *t, bool in_struct) { case Basic_any: ir_write_str_lit(f, "%..any"); return; case Basic_rawptr: ir_write_str_lit(f, "%..rawptr"); return; case Basic_string: ir_write_str_lit(f, "%..string"); return; + case Basic_cstring: ir_write_str_lit(f, "i8*"); return; } break; @@ -551,11 +552,25 @@ void ir_print_exact_value(irFileBuffer *f, irModule *m, ExactValue value, Type * ir_write_str_lit(f, "zeroinitializer"); break; } + Type *t = core_type(type); if (!is_type_string(type)) { GB_ASSERT(is_type_array(type)); ir_write_str_lit(f, "c\""); ir_print_escape_string(f, str, false, false); ir_write_str_lit(f, "\\00\""); + } else if (t == t_cstring) { + // HACK NOTE(bill): This is a hack but it works because strings are created at the very end + // of the .ll file + irValue *str_array = ir_add_global_string_array(m, str); + ir_write_str_lit(f, "getelementptr inbounds ("); + ir_print_type(f, m, str_array->Global.entity->type); + ir_write_str_lit(f, ", "); + ir_print_type(f, m, str_array->Global.entity->type); + ir_write_str_lit(f, "* "); + ir_print_encoded_global(f, str_array->Global.entity->token.string, false); + ir_write_str_lit(f, ", "); + ir_print_type(f, m, t_int); + ir_write_str_lit(f, " 0, i32 0)"); } else { // HACK NOTE(bill): This is a hack but it works because strings are created at the very end // of the .ll file diff --git a/src/types.cpp b/src/types.cpp index 239a530d1..c0254729f 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -34,8 +34,9 @@ enum BasicKind { Basic_uint, Basic_uintptr, Basic_rawptr, - Basic_string, // ^u8 + int - Basic_any, // rawptr + ^Type_Info + Basic_string, // ^u8 + int + Basic_cstring, // ^u8 + Basic_any, // rawptr + ^Type_Info Basic_UntypedBool, Basic_UntypedInteger, @@ -277,6 +278,7 @@ gb_global Type basic_types[] = { {Type_Basic, {Basic_rawptr, BasicFlag_Pointer, -1, STR_LIT("rawptr")}}, {Type_Basic, {Basic_string, BasicFlag_String, -1, STR_LIT("string")}}, + {Type_Basic, {Basic_cstring, BasicFlag_String, -1, STR_LIT("cstring")}}, {Type_Basic, {Basic_any, 0, -1, STR_LIT("any")}}, {Type_Basic, {Basic_UntypedBool, BasicFlag_Boolean | BasicFlag_Untyped, 0, STR_LIT("untyped bool")}}, @@ -322,6 +324,7 @@ gb_global Type *t_uintptr = &basic_types[Basic_uintptr]; gb_global Type *t_rawptr = &basic_types[Basic_rawptr]; gb_global Type *t_string = &basic_types[Basic_string]; +gb_global Type *t_cstring = &basic_types[Basic_cstring]; gb_global Type *t_any = &basic_types[Basic_any]; gb_global Type *t_untyped_bool = &basic_types[Basic_UntypedBool]; @@ -690,6 +693,13 @@ bool is_type_string(Type *t) { } return false; } +bool is_type_cstring(Type *t) { + t = base_type(t); + if (t->kind == Type_Basic) { + return t->Basic.kind == Basic_cstring; + } + return false; +} bool is_type_typed(Type *t) { t = base_type(t); if (t == nullptr) { @@ -802,6 +812,13 @@ bool is_type_u8_slice(Type *t) { } return false; } +bool is_type_u8_ptr(Type *t) { + t = base_type(t); + if (t->kind == Type_Pointer) { + return is_type_u8(t->Slice.elem); + } + return false; +} bool is_type_proc(Type *t) { t = base_type(t); return t->kind == Type_Proc; @@ -933,7 +950,7 @@ bool is_type_indexable(Type *t) { Type *bt = base_type(t); switch (bt->kind) { case Type_Basic: - return is_type_string(bt); + return bt->Basic.kind == Basic_string; case Type_Array: case Type_Slice: case Type_DynamicArray: @@ -1101,6 +1118,8 @@ bool is_type_comparable(Type *t) { return false; case Basic_rune: return true; + case Basic_cstring: + return false; } return true; case Type_Pointer: @@ -1849,8 +1868,9 @@ i64 type_align_of_internal(gbAllocator allocator, Type *t, TypePath *path) { case Type_Basic: { GB_ASSERT(is_type_typed(t)); switch (t->Basic.kind) { - case Basic_string: return build_context.word_size; - case Basic_any: return build_context.word_size; + case Basic_string: return build_context.word_size; + case Basic_cstring: return build_context.word_size; + case Basic_any: return build_context.word_size; case Basic_int: case Basic_uint: case Basic_uintptr: case Basic_rawptr: return build_context.word_size; @@ -2048,8 +2068,9 @@ i64 type_size_of_internal(gbAllocator allocator, Type *t, TypePath *path) { return size; } switch (kind) { - case Basic_string: return 2*build_context.word_size; - case Basic_any: return 2*build_context.word_size; + case Basic_string: return 2*build_context.word_size; + case Basic_cstring: return build_context.word_size; + case Basic_any: return 2*build_context.word_size; case Basic_int: case Basic_uint: case Basic_uintptr: case Basic_rawptr: return build_context.word_size;