diff --git a/examples/basic.odin b/examples/basic.odin index 63d30d602..897b1a093 100644 --- a/examples/basic.odin +++ b/examples/basic.odin @@ -14,7 +14,53 @@ string_byte_reverse :: proc(s: string) { } } -print_int :: proc(i, base: int) { +encode_rune :: proc(buf : []u8, r : rune) -> int { + i := cast(u32)r; + mask : u8 : 0x3f; + if i <= 1<<7-1 { + buf[0] = cast(u8)r; + return 1; + } + if i <= 1<<11-1 { + buf[0] = 0xc0 | cast(u8)(r>>6); + buf[1] = 0x80 | cast(u8)(r)&mask; + return 2; + } + + // Invalid or Surrogate range + if i > 0x0010ffff || + (i >= 0xd800 && i <= 0xdfff) { + r = 0xfffd; + + buf[0] = 0xe0 | cast(u8)(r>>12); + buf[1] = 0x80 | cast(u8)(r>>6)&mask; + buf[2] = 0x80 | cast(u8)(r)&mask; + return 3; + } + + if i <= 1<<16-1 { + buf[0] = 0xe0 | cast(u8)(r>>12); + buf[1] = 0x80 | cast(u8)(r>>6)&mask; + buf[2] = 0x80 | cast(u8)(r)&mask; + return 3; + } + + buf[0] = 0xf0 | cast(u8)(r>>18); + buf[1] = 0x80 | cast(u8)(r>>12)&mask; + buf[2] = 0x80 | cast(u8)(r>>6)&mask; + buf[3] = 0x80 | cast(u8)(r)&mask; + return 4; +} + +print_rune :: proc(r : rune) { + buf : [4]u8; + n := encode_rune(buf[:], r); + str := cast(string)buf[:n]; + + print_string(str); +} + +print_int :: proc(i, base : int) { NUM_TO_CHAR_TABLE :: "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz@$"; buf: [21]u8; diff --git a/examples/main.ll b/examples/main.ll index 9b6ed5d16..828ffafa2 100644 --- a/examples/main.ll +++ b/examples/main.ll @@ -1,145 +1,19 @@ -%-string = type {i8*, i64} ; Basic_string +%.string = type {i8*, i64} ; Basic_string -%-rawptr = type i8* ; Basic_rawptr +%.rawptr = type i8* ; Basic_rawptr define void @main() { "entry - 0": - %0 = alloca i64, align 8 ; a - store i64 zeroinitializer, i64* %0 - %1 = getelementptr inbounds [6 x i8], [6 x i8]* @.str0, i64 0, i64 0 - %2 = getelementptr i8, i8* %1, i64 1 - %3 = load i8, i8* %2 - %4 = zext i8 %3 to i64 - store i64 %4, i64* %0 - %5 = load i64, i64* %0 - call void @print_int(i64 %5, i64 10) - %6 = getelementptr inbounds [1 x i8], [1 x i8]* @.str1, i64 0, i64 0 - %7 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %7 - %8 = getelementptr inbounds %-string, %-string* %7, i64 0, i32 0 - %9 = getelementptr inbounds %-string, %-string* %7, i64 0, i32 1 - store i8* %6, i8** %8 - store i64 1, i64* %9 - %10 = load %-string, %-string* %7 - call void @print_string(%-string %10) - %11 = getelementptr inbounds [23 x i8], [23 x i8]* @.str2, i64 0, i64 0 - %12 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %12 - %13 = getelementptr inbounds %-string, %-string* %12, i64 0, i32 0 - %14 = getelementptr inbounds %-string, %-string* %12, i64 0, i32 1 - store i8* %11, i8** %13 - store i64 23, i64* %14 - %15 = load %-string, %-string* %12 - call void @print_string(%-string %15) - %16 = getelementptr inbounds [21 x i8], [21 x i8]* @.str3, i64 0, i64 0 - %17 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %17 - %18 = getelementptr inbounds %-string, %-string* %17, i64 0, i32 0 - %19 = getelementptr inbounds %-string, %-string* %17, i64 0, i32 1 - store i8* %16, i8** %18 - store i64 21, i64* %19 - %20 = load %-string, %-string* %17 - call void @print_string(%-string %20) - %21 = getelementptr inbounds [22 x i8], [22 x i8]* @.str4, i64 0, i64 0 - %22 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %22 - %23 = getelementptr inbounds %-string, %-string* %22, i64 0, i32 0 - %24 = getelementptr inbounds %-string, %-string* %22, i64 0, i32 1 - store i8* %21, i8** %23 - store i64 22, i64* %24 - %25 = load %-string, %-string* %22 - call void @print_string(%-string %25) - %26 = getelementptr inbounds [23 x i8], [23 x i8]* @.str5, i64 0, i64 0 - %27 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %27 - %28 = getelementptr inbounds %-string, %-string* %27, i64 0, i32 0 - %29 = getelementptr inbounds %-string, %-string* %27, i64 0, i32 1 - store i8* %26, i8** %28 - store i64 23, i64* %29 - %30 = load %-string, %-string* %27 - call void @print_string(%-string %30) - %31 = getelementptr inbounds [20 x i8], [20 x i8]* @.str6, i64 0, i64 0 - %32 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %32 - %33 = getelementptr inbounds %-string, %-string* %32, i64 0, i32 0 - %34 = getelementptr inbounds %-string, %-string* %32, i64 0, i32 1 - store i8* %31, i8** %33 - store i64 20, i64* %34 - %35 = load %-string, %-string* %32 - call void @print_string(%-string %35) - %36 = getelementptr inbounds [37 x i8], [37 x i8]* @.str7, i64 0, i64 0 - %37 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %37 - %38 = getelementptr inbounds %-string, %-string* %37, i64 0, i32 0 - %39 = getelementptr inbounds %-string, %-string* %37, i64 0, i32 1 - store i8* %36, i8** %38 - store i64 37, i64* %39 - %40 = load %-string, %-string* %37 - call void @print_string(%-string %40) - %41 = getelementptr inbounds [21 x i8], [21 x i8]* @.str8, i64 0, i64 0 - %42 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %42 - %43 = getelementptr inbounds %-string, %-string* %42, i64 0, i32 0 - %44 = getelementptr inbounds %-string, %-string* %42, i64 0, i32 1 - store i8* %41, i8** %43 - store i64 21, i64* %44 - %45 = load %-string, %-string* %42 - call void @print_string(%-string %45) - %46 = getelementptr inbounds [33 x i8], [33 x i8]* @.str9, i64 0, i64 0 - %47 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %47 - %48 = getelementptr inbounds %-string, %-string* %47, i64 0, i32 0 - %49 = getelementptr inbounds %-string, %-string* %47, i64 0, i32 1 - store i8* %46, i8** %48 - store i64 33, i64* %49 - %50 = load %-string, %-string* %47 - call void @print_string(%-string %50) - %51 = getelementptr inbounds [29 x i8], [29 x i8]* @.stra, i64 0, i64 0 - %52 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %52 - %53 = getelementptr inbounds %-string, %-string* %52, i64 0, i32 0 - %54 = getelementptr inbounds %-string, %-string* %52, i64 0, i32 1 - store i8* %51, i8** %53 - store i64 29, i64* %54 - %55 = load %-string, %-string* %52 - call void @print_string(%-string %55) - %56 = getelementptr inbounds [24 x i8], [24 x i8]* @.strb, i64 0, i64 0 - %57 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %57 - %58 = getelementptr inbounds %-string, %-string* %57, i64 0, i32 0 - %59 = getelementptr inbounds %-string, %-string* %57, i64 0, i32 1 - store i8* %56, i8** %58 - store i64 24, i64* %59 - %60 = load %-string, %-string* %57 - call void @print_string(%-string %60) - %61 = getelementptr inbounds [42 x i8], [42 x i8]* @.strc, i64 0, i64 0 - %62 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %62 - %63 = getelementptr inbounds %-string, %-string* %62, i64 0, i32 0 - %64 = getelementptr inbounds %-string, %-string* %62, i64 0, i32 1 - store i8* %61, i8** %63 - store i64 42, i64* %64 - %65 = load %-string, %-string* %62 - call void @print_string(%-string %65) - %66 = getelementptr inbounds [21 x i8], [21 x i8]* @.strd, i64 0, i64 0 - %67 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %67 - %68 = getelementptr inbounds %-string, %-string* %67, i64 0, i32 0 - %69 = getelementptr inbounds %-string, %-string* %67, i64 0, i32 1 - store i8* %66, i8** %68 - store i64 21, i64* %69 - %70 = load %-string, %-string* %67 - call void @print_string(%-string %70) ret void } declare i32 @putchar(i32 %c) ; foreign procedure -define void @print_string(%-string %s) { +define void @print_string(%.string %s) { "entry - 0": - %0 = alloca %-string, align 8 ; s - store %-string zeroinitializer, %-string* %0 - store %-string %s, %-string* %0 + %0 = alloca %.string, align 8 ; s + store %.string zeroinitializer, %.string* %0 + store %.string %s, %.string* %0 %1 = alloca i64, align 8 ; i store i64 zeroinitializer, i64* %1 store i64 0, i64* %1 @@ -148,7 +22,7 @@ define void @print_string(%-string %s) { "for.body - 1": %2 = alloca i32, align 4 ; c store i32 zeroinitializer, i32* %2 - %3 = getelementptr inbounds %-string, %-string* %0, i64 0, i32 0 + %3 = getelementptr inbounds %.string, %.string* %0, i64 0, i32 0 %4 = load i8*, i8** %3 %5 = load i64, i64* %1 %6 = getelementptr i8, i8* %4, i64 %5 @@ -161,7 +35,7 @@ define void @print_string(%-string %s) { "for.loop - 2": %11 = load i64, i64* %1 - %12 = getelementptr inbounds %-string, %-string* %0, i64 0, i32 1 + %12 = getelementptr inbounds %.string, %.string* %0, i64 0, i32 1 %13 = load i64, i64* %12 %14 = icmp slt i64 %11, %13 br i1 %14, label %"for.body - 1", label %"for.done - 4" @@ -176,14 +50,14 @@ define void @print_string(%-string %s) { ret void } -define void @string_byte_reverse(%-string %s) { +define void @string_byte_reverse(%.string %s) { "entry - 0": - %0 = alloca %-string, align 8 ; s - store %-string zeroinitializer, %-string* %0 - store %-string %s, %-string* %0 + %0 = alloca %.string, align 8 ; s + store %.string zeroinitializer, %.string* %0 + store %.string %s, %.string* %0 %1 = alloca i64, align 8 ; n store i64 zeroinitializer, i64* %1 - %2 = getelementptr inbounds %-string, %-string* %0, i64 0, i32 1 + %2 = getelementptr inbounds %.string, %.string* %0, i64 0, i32 1 %3 = load i64, i64* %2 store i64 %3, i64* %1 %4 = alloca i64, align 8 ; i @@ -192,18 +66,18 @@ define void @string_byte_reverse(%-string %s) { br label %"for.loop - 2" "for.body - 1": - %5 = getelementptr inbounds %-string, %-string* %0, i64 0, i32 0 + %5 = getelementptr inbounds %.string, %.string* %0, i64 0, i32 0 %6 = load i8*, i8** %5 %7 = load i64, i64* %4 %8 = getelementptr i8, i8* %6, i64 %7 - %9 = getelementptr inbounds %-string, %-string* %0, i64 0, i32 0 + %9 = getelementptr inbounds %.string, %.string* %0, i64 0, i32 0 %10 = load i8*, i8** %9 %11 = load i64, i64* %4 %12 = load i64, i64* %1 %13 = sub i64 %12, 1 %14 = sub i64 %13, %11 %15 = getelementptr i8, i8* %10, i64 %14 - %16 = getelementptr inbounds %-string, %-string* %0, i64 0, i32 0 + %16 = getelementptr inbounds %.string, %.string* %0, i64 0, i32 0 %17 = load i8*, i8** %16 %18 = load i64, i64* %4 %19 = load i64, i64* %1 @@ -211,7 +85,7 @@ define void @string_byte_reverse(%-string %s) { %21 = sub i64 %20, %18 %22 = getelementptr i8, i8* %17, i64 %21 %23 = load i8, i8* %22 - %24 = getelementptr inbounds %-string, %-string* %0, i64 0, i32 0 + %24 = getelementptr inbounds %.string, %.string* %0, i64 0, i32 0 %25 = load i8*, i8** %24 %26 = load i64, i64* %4 %27 = getelementptr i8, i8* %25, i64 %26 @@ -237,6 +111,231 @@ define void @string_byte_reverse(%-string %s) { ret void } +define i64 @encode_rune({i8*, i64, i64} %buf, i32 %r) { +"entry - 0": + %0 = alloca {i8*, i64, i64}, align 8 ; buf + store {i8*, i64, i64} zeroinitializer, {i8*, i64, i64}* %0 + store {i8*, i64, i64} %buf, {i8*, i64, i64}* %0 + %1 = alloca i32, align 4 ; r + store i32 zeroinitializer, i32* %1 + store i32 %r, i32* %1 + %2 = alloca i32, align 4 ; i + store i32 zeroinitializer, i32* %2 + %3 = load i32, i32* %1 + store i32 %3, i32* %2 + %4 = load i32, i32* %2 + %5 = icmp ule i32 %4, 127 + br i1 %5, label %"if.then - 1", label %"if.done - 2" + +"if.then - 1": + %6 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %7 = load i8*, i8** %6 + %8 = getelementptr i8, i8* %7, i64 0 + %9 = load i32, i32* %1 + %10 = trunc i32 %9 to i8 + store i8 %10, i8* %8 + ret i64 1 + +"if.done - 2": + %11 = load i32, i32* %2 + %12 = icmp ule i32 %11, 2047 + br i1 %12, label %"if.then - 3", label %"if.done - 4" + +"if.then - 3": + %13 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %14 = load i8*, i8** %13 + %15 = getelementptr i8, i8* %14, i64 0 + %16 = load i32, i32* %1 + %17 = lshr i32 %16, 6 + %18 = trunc i32 %17 to i8 + %19 = or i8 192, %18 + store i8 %19, i8* %15 + %20 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %21 = load i8*, i8** %20 + %22 = getelementptr i8, i8* %21, i64 1 + %23 = load i32, i32* %1 + %24 = trunc i32 %23 to i8 + %25 = and i8 %24, 63 + %26 = or i8 128, %25 + store i8 %26, i8* %22 + ret i64 2 + +"if.done - 4": + %27 = load i32, i32* %2 + %28 = icmp ugt i32 %27, 1114111 + br i1 %28, label %"if.then - 5", label %"cmp-or - 6" + +"if.then - 5": + store i32 65533, i32* %1 + %29 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %30 = load i8*, i8** %29 + %31 = getelementptr i8, i8* %30, i64 0 + %32 = load i32, i32* %1 + %33 = lshr i32 %32, 12 + %34 = trunc i32 %33 to i8 + %35 = or i8 224, %34 + store i8 %35, i8* %31 + %36 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %37 = load i8*, i8** %36 + %38 = getelementptr i8, i8* %37, i64 1 + %39 = load i32, i32* %1 + %40 = lshr i32 %39, 6 + %41 = trunc i32 %40 to i8 + %42 = and i8 %41, 63 + %43 = or i8 128, %42 + store i8 %43, i8* %38 + %44 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %45 = load i8*, i8** %44 + %46 = getelementptr i8, i8* %45, i64 2 + %47 = load i32, i32* %1 + %48 = trunc i32 %47 to i8 + %49 = and i8 %48, 63 + %50 = or i8 128, %49 + store i8 %50, i8* %46 + ret i64 3 + +"cmp-or - 6": + %51 = load i32, i32* %2 + %52 = icmp uge i32 %51, 55296 + br i1 %52, label %"cmp-and - 7", label %"if.done - 8" + +"cmp-and - 7": + %53 = load i32, i32* %2 + %54 = icmp ule i32 %53, 57343 + br i1 %54, label %"if.then - 5", label %"if.done - 8" + +"if.done - 8": + %55 = load i32, i32* %2 + %56 = icmp ule i32 %55, 65535 + br i1 %56, label %"if.then - 9", label %"if.done - 10" + +"if.then - 9": + %57 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %58 = load i8*, i8** %57 + %59 = getelementptr i8, i8* %58, i64 0 + %60 = load i32, i32* %1 + %61 = lshr i32 %60, 12 + %62 = trunc i32 %61 to i8 + %63 = or i8 224, %62 + store i8 %63, i8* %59 + %64 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %65 = load i8*, i8** %64 + %66 = getelementptr i8, i8* %65, i64 1 + %67 = load i32, i32* %1 + %68 = lshr i32 %67, 6 + %69 = trunc i32 %68 to i8 + %70 = and i8 %69, 63 + %71 = or i8 128, %70 + store i8 %71, i8* %66 + %72 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %73 = load i8*, i8** %72 + %74 = getelementptr i8, i8* %73, i64 2 + %75 = load i32, i32* %1 + %76 = trunc i32 %75 to i8 + %77 = and i8 %76, 63 + %78 = or i8 128, %77 + store i8 %78, i8* %74 + ret i64 3 + +"if.done - 10": + %79 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %80 = load i8*, i8** %79 + %81 = getelementptr i8, i8* %80, i64 0 + %82 = load i32, i32* %1 + %83 = lshr i32 %82, 18 + %84 = trunc i32 %83 to i8 + %85 = or i8 240, %84 + store i8 %85, i8* %81 + %86 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %87 = load i8*, i8** %86 + %88 = getelementptr i8, i8* %87, i64 1 + %89 = load i32, i32* %1 + %90 = lshr i32 %89, 12 + %91 = trunc i32 %90 to i8 + %92 = and i8 %91, 63 + %93 = or i8 128, %92 + store i8 %93, i8* %88 + %94 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %95 = load i8*, i8** %94 + %96 = getelementptr i8, i8* %95, i64 2 + %97 = load i32, i32* %1 + %98 = lshr i32 %97, 6 + %99 = trunc i32 %98 to i8 + %100 = and i8 %99, 63 + %101 = or i8 128, %100 + store i8 %101, i8* %96 + %102 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %0, i64 0, i32 0 + %103 = load i8*, i8** %102 + %104 = getelementptr i8, i8* %103, i64 3 + %105 = load i32, i32* %1 + %106 = trunc i32 %105 to i8 + %107 = and i8 %106, 63 + %108 = or i8 128, %107 + store i8 %108, i8* %104 + ret i64 4 +} + +define void @print_rune(i32 %r) { +"entry - 0": + %0 = alloca i32, align 4 ; r + store i32 zeroinitializer, i32* %0 + store i32 %r, i32* %0 + %1 = alloca [4 x i8], align 1 ; buf + store [4 x i8] zeroinitializer, [4 x i8]* %1 + %2 = alloca i64, align 8 ; n + store i64 zeroinitializer, i64* %2 + %3 = sub i64 4, 0 + %4 = sub i64 4, 0 + %5 = getelementptr inbounds [4 x i8], [4 x i8]* %1, i64 0, i64 0 + %6 = getelementptr i8, i8* %5, i64 0 + %7 = alloca {i8*, i64, i64}, align 8 + store {i8*, i64, i64} zeroinitializer, {i8*, i64, i64}* %7 + %8 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %7, i64 0, i32 0 + store i8* %6, i8** %8 + %9 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %7, i64 0, i32 1 + store i64 %3, i64* %9 + %10 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %7, i64 0, i32 2 + store i64 %4, i64* %10 + %11 = load {i8*, i64, i64}, {i8*, i64, i64}* %7 + %12 = load i32, i32* %0 + %13 = call i64 @encode_rune({i8*, i64, i64} %11, i32 %12) + store i64 %13, i64* %2 + %14 = alloca %.string, align 8 ; str + store %.string zeroinitializer, %.string* %14 + %15 = load i64, i64* %2 + %16 = sub i64 %15, 0 + %17 = sub i64 4, 0 + %18 = getelementptr inbounds [4 x i8], [4 x i8]* %1, i64 0, i64 0 + %19 = getelementptr i8, i8* %18, i64 0 + %20 = alloca {i8*, i64, i64}, align 8 + store {i8*, i64, i64} zeroinitializer, {i8*, i64, i64}* %20 + %21 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %20, i64 0, i32 0 + store i8* %19, i8** %21 + %22 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %20, i64 0, i32 1 + store i64 %16, i64* %22 + %23 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %20, i64 0, i32 2 + store i64 %17, i64* %23 + %24 = load {i8*, i64, i64}, {i8*, i64, i64}* %20 + %25 = alloca {i8*, i64, i64}, align 8 + store {i8*, i64, i64} zeroinitializer, {i8*, i64, i64}* %25 + store {i8*, i64, i64} %24, {i8*, i64, i64}* %25 + %26 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %25, i64 0, i32 0 + %27 = load i8*, i8** %26 + %28 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %25, i64 0, i32 1 + %29 = load i64, i64* %28 + %30 = alloca %.string, align 8 + store %.string zeroinitializer, %.string* %30 + %31 = getelementptr inbounds %.string, %.string* %30, i64 0, i32 0 + %32 = getelementptr inbounds %.string, %.string* %30, i64 0, i32 1 + store i8* %27, i8** %31 + store i64 %29, i64* %32 + %33 = load %.string, %.string* %30 + store %.string %33, %.string* %14 + %34 = load %.string, %.string* %14 + call void @print_string(%.string %34) + ret void +} + define void @print_int(i64 %i, i64 %base) { "entry - 0": %0 = alloca i64, align 8 ; i @@ -285,7 +384,7 @@ define void @print_int(i64 %i, i64 %base) { "for.body - 5": %16 = alloca i8, align 1 ; c store i8 zeroinitializer, i8* %16 - %17 = getelementptr inbounds [64 x i8], [64 x i8]* @.stre, i64 0, i64 0 + %17 = getelementptr inbounds [64 x i8], [64 x i8]* @.str0, i64 0, i64 0 %18 = load i64, i64* %1 %19 = load i64, i64* %0 %20 = srem i64 %19, %18 @@ -329,8 +428,8 @@ define void @print_int(i64 %i, i64 %base) { br label %"if.done - 10" "if.done - 10": - %40 = alloca %-string, align 8 ; str - store %-string zeroinitializer, %-string* %40 + %40 = alloca %.string, align 8 ; str + store %.string zeroinitializer, %.string* %40 %41 = load i64, i64* %3 %42 = sub i64 %41, 0 %43 = sub i64 21, 0 @@ -352,33 +451,19 @@ define void @print_int(i64 %i, i64 %base) { %53 = load i8*, i8** %52 %54 = getelementptr inbounds {i8*, i64, i64}, {i8*, i64, i64}* %51, i64 0, i32 1 %55 = load i64, i64* %54 - %56 = alloca %-string, align 8 - store %-string zeroinitializer, %-string* %56 - %57 = getelementptr inbounds %-string, %-string* %56, i64 0, i32 0 - %58 = getelementptr inbounds %-string, %-string* %56, i64 0, i32 1 + %56 = alloca %.string, align 8 + store %.string zeroinitializer, %.string* %56 + %57 = getelementptr inbounds %.string, %.string* %56, i64 0, i32 0 + %58 = getelementptr inbounds %.string, %.string* %56, i64 0, i32 1 store i8* %53, i8** %57 store i64 %55, i64* %58 - %59 = load %-string, %-string* %56 - store %-string %59, %-string* %40 - %60 = load %-string, %-string* %40 - call void @string_byte_reverse(%-string %60) - %61 = load %-string, %-string* %40 - call void @print_string(%-string %61) + %59 = load %.string, %.string* %56 + store %.string %59, %.string* %40 + %60 = load %.string, %.string* %40 + call void @string_byte_reverse(%.string %60) + %61 = load %.string, %.string* %40 + call void @print_string(%.string %61) ret void } -@.str0 = global [6 x i8] c"Hello\0A" -@.str1 = global [1 x i8] c"\0A" -@.str2 = global [23 x i8] c"Chinese\20-\20\E4\BD\A0\E5\A5\BD\E4\B8\96\E7\95\8C\0A" -@.str3 = global [21 x i8] c"Dutch\20-\20Hello\20wereld\0A" -@.str4 = global [22 x i8] c"English\20-\20Hello\20world\0A" -@.str5 = global [23 x i8] c"French\20-\20Bonjour\20monde\0A" -@.str6 = global [20 x i8] c"German\20-\20Hallo\20Welt\0A" -@.str7 = global [37 x i8] c"Greek\20-\20\CE\B3\CE\B5\CE\B9\CE\AC\20\CF\83\CE\BF\CF\85\20\CE\BA\CF\8C\CF\83\CE\BC\CE\BF\CF\82\0A" -@.str8 = global [21 x i8] c"Italian\20-\20Ciao\20mondo\0A" -@.str9 = global [33 x i8] c"Japanese\20-\20\E3\81\93\E3\82\93\E3\81\AB\E3\81\A1\E3\81\AF\E4\B8\96\E7\95\8C\0A" -@.stra = global [29 x i8] c"Korean\20-\20\EC\97\AC\EB\B3\B4\EC\84\B8\EC\9A\94\20\EC\84\B8\EA\B3\84\0A" -@.strb = global [24 x i8] c"Portuguese\20-\20Ol\C3\A1\20mundo\0A" -@.strc = global [42 x i8] c"Russian\20-\20\D0\97\D0\B4\D1\80\D0\B0\D0\B2\D1\81\D1\82\D0\B2\D1\83\D0\BB\D1\82\D0\B5\20\D0\BC\D0\B8\D1\80\0A" -@.strd = global [21 x i8] c"Spanish\20-\20Hola\20mundo\0A" -@.stre = global [64 x i8] c"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\40$" +@.str0 = global [64 x i8] c"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\40$" diff --git a/examples/main.odin b/examples/main.odin index c7b4240ca..d42ea8022 100644 --- a/examples/main.odin +++ b/examples/main.odin @@ -1,20 +1,21 @@ import "basic" -main :: proc() { - a := cast(int)"Hello\n"[1]; - print_int(a, 10); - print_string("\n"); +TWO_HEARTS :: '💕'; - print_string("Chinese - 你好世界\n"); - print_string("Dutch - Hello wereld\n"); - print_string("English - Hello world\n"); - print_string("French - Bonjour monde\n"); - print_string("German - Hallo Welt\n"); - print_string("Greek - γειά σου κόσμος\n"); - print_string("Italian - Ciao mondo\n"); - print_string("Japanese - こんにちは世界\n"); - print_string("Korean - 여보세요 세계\n"); +main :: proc() { +/* + print_string("Chinese - 你好世界\n"); + print_string("Dutch - Hello wereld\n"); + print_string("English - Hello world\n"); + print_string("French - Bonjour monde\n"); + print_string("German - Hallo Welt\n"); + print_string("Greek - γειά σου κόσμος\n"); + print_string("Italian - Ciao mondo\n"); + print_string("Japanese - こんにちは世界\n"); + print_string("Korean - 여보세요 세계\n"); print_string("Portuguese - Olá mundo\n"); - print_string("Russian - Здравствулте мир\n"); - print_string("Spanish - Hola mundo\n"); + print_string("Russian - Здравствулте мир\n"); + print_string("Spanish - Hola mundo\n"); +*/ } + diff --git a/src/checker/expr.cpp b/src/checker/expr.cpp index c413eff39..4c1e1f937 100644 --- a/src/checker/expr.cpp +++ b/src/checker/expr.cpp @@ -274,6 +274,22 @@ Type *check_type_expr_extra(Checker *c, AstNode *e, Type *named_type) { } case_end; + case_ast_node(vt, VectorType, e); + Type *elem = check_type(c, vt->elem); + Type *be = get_base_type(elem); + if (!is_type_vector(be) && + !(is_type_boolean(be) || is_type_numeric(be))) { + err_str = type_to_string(elem); + error(&c->error_collector, ast_node_token(vt->elem), "Vector element type must be a boolean, numerical, or vector. Got `%s`", err_str); + break; + } else { + i64 count = check_array_count(c, vt->count); + Type *t = make_type_vector(c->allocator, elem, count); + set_base_type(named_type, t); + return t; + } + case_end; + case_ast_node(st, StructType, e); Type *t = make_type_structure(c->allocator); set_base_type(named_type, t); @@ -366,6 +382,22 @@ Type *check_type(Checker *c, AstNode *e, Type *named_type) { goto end; case_end; + + case_ast_node(vt, VectorType, e); + Type *elem = check_type(c, vt->elem); + Type *be = get_base_type(elem); + i64 count = check_array_count(c, vt->count); + if (!is_type_vector(be) && + !(is_type_boolean(be) || is_type_numeric(be))) { + err_str = type_to_string(elem); + error(&c->error_collector, ast_node_token(vt->elem), "Vector element type must be a boolean, numerical, or vector. Got `%s`", err_str); + } else { + } + type = make_type_vector(c->allocator, elem, count); + set_base_type(named_type, type); + goto end; + case_end; + case_ast_node(st, StructType, e); type = make_type_structure(c->allocator); set_base_type(named_type, type); @@ -404,25 +436,26 @@ end: b32 check_unary_op(Checker *c, Operand *o, Token op) { // TODO(bill): Handle errors correctly + Type *type = get_base_type(base_vector_type(o->type)); gbString str = NULL; defer (gb_string_free(str)); switch (op.kind) { case Token_Add: case Token_Sub: - if (!is_type_numeric(o->type)) { + if (!is_type_numeric(type)) { str = expr_to_string(o->expr); error(&c->error_collector, op, "Operator `%.*s` is not allowed with `%s`", LIT(op.string), str); } break; case Token_Xor: - if (!is_type_integer(o->type)) { + if (!is_type_integer(type)) { error(&c->error_collector, op, "Operator `%.*s` is only allowed with integers", LIT(op.string)); } break; case Token_Not: - if (!is_type_boolean(o->type)) { + if (!is_type_boolean(type)) { str = expr_to_string(o->expr); error(&c->error_collector, op, "Operator `%.*s` is only allowed on boolean expression", LIT(op.string)); } @@ -438,6 +471,7 @@ b32 check_unary_op(Checker *c, Operand *o, Token op) { b32 check_binary_op(Checker *c, Operand *o, Token op) { // TODO(bill): Handle errors correctly + Type *type = get_base_type(base_vector_type(o->type)); switch (op.kind) { case Token_Add: case Token_Sub: @@ -448,7 +482,7 @@ b32 check_binary_op(Checker *c, Operand *o, Token op) { case Token_SubEq: case Token_MulEq: case Token_QuoEq: - if (!is_type_numeric(o->type)) { + if (!is_type_numeric(type)) { error(&c->error_collector, op, "Operator `%.*s` is only allowed with numeric expressions", LIT(op.string)); return false; } @@ -465,7 +499,7 @@ b32 check_binary_op(Checker *c, Operand *o, Token op) { case Token_OrEq: case Token_XorEq: case Token_AndNotEq: - if (!is_type_integer(o->type)) { + if (!is_type_integer(type)) { error(&c->error_collector, op, "Operator `%.*s` is only allowed with integers", LIT(op.string)); return false; } @@ -476,7 +510,7 @@ b32 check_binary_op(Checker *c, Operand *o, Token op) { case Token_CmpAndEq: case Token_CmpOrEq: - if (!is_type_boolean(o->type)) { + if (!is_type_boolean(type)) { error(&c->error_collector, op, "Operator `%.*s` is only allowed with boolean expressions", LIT(op.string)); return false; } @@ -671,7 +705,107 @@ void check_comparison(Checker *c, Operand *x, Operand *y, Token op) { update_expr_type(c, y->expr, default_type(y->type), true); } + if (is_type_vector(x->type)) { + Type *vec_bool = NULL; + do { + } while (is_type_vector(x->type->vector.elem)); + } x->type = t_untyped_bool; + +} + +void check_shift(Checker *c, Operand *x, Operand *y, AstNode *node) { + GB_ASSERT(node->kind == AstNode_BinaryExpr); + ast_node(be, BinaryExpr, node); + + + ExactValue x_val = {}; + if (x->mode == Addressing_Constant) { + x_val = exact_value_to_integer(x->value); + } + + b32 x_is_untyped = is_type_untyped(x->type); + if (!(is_type_integer(x->type) || (x_is_untyped && x_val.kind == ExactValue_Integer))) { + gbString err_str = expr_to_string(x->expr); + defer (gb_string_free(err_str)); + error(&c->error_collector, ast_node_token(node), + "Shifted operand `%s` must be an integer", err_str); + x->mode = Addressing_Invalid; + return; + } + + if (is_type_unsigned(y->type)) { + + } else if (is_type_untyped(y->type)) { + convert_to_typed(c, y, t_untyped_integer); + if (y->mode == Addressing_Invalid) { + x->mode = Addressing_Invalid; + return; + } + } else { + gbString err_str = expr_to_string(y->expr); + defer (gb_string_free(err_str)); + error(&c->error_collector, ast_node_token(node), + "Shift amount `%s` must be an unsigned integer", err_str); + x->mode = Addressing_Invalid; + return; + } + + + if (x->mode == Addressing_Constant) { + if (y->mode == Addressing_Constant) { + ExactValue y_val = exact_value_to_integer(y->value); + if (y_val.kind != ExactValue_Integer) { + gbString err_str = expr_to_string(y->expr); + defer (gb_string_free(err_str)); + error(&c->error_collector, ast_node_token(node), + "Shift amount `%s` must be an unsigned integer", err_str); + x->mode = Addressing_Invalid; + return; + } + + u64 amount = cast(u64)y_val.value_integer; + if (amount > 1074) { + gbString err_str = expr_to_string(y->expr); + defer (gb_string_free(err_str)); + error(&c->error_collector, ast_node_token(node), + "Shift amount too large: `%s`", err_str); + x->mode = Addressing_Invalid; + return; + } + + if (!is_type_integer(x->type)) { + // NOTE(bill): It could be an untyped float but still representable + // as an integer + x->type = t_untyped_integer; + } + + x->value = exact_value_shift(be->op, x_val, make_exact_value_integer(amount)); + + if (is_type_typed(x->type)) { + check_is_expressible(c, x, get_base_type(x->type)); + } + return; + } + + if (x_is_untyped) { + ExpressionInfo *info = map_get(&c->info.untyped, hash_pointer(x->expr)); + if (info != NULL) { + info->is_lhs = true; + } + x->mode = Addressing_Value; + return; + } + } + + if (y->mode == Addressing_Constant && y->value.value_integer < 0) { + gbString err_str = expr_to_string(y->expr); + defer (gb_string_free(err_str)); + error(&c->error_collector, ast_node_token(node), + "Shift amount cannot be negative: `%s`", err_str); + } + + x->mode = Addressing_Value; } void check_binary_expr(Checker *c, Operand *x, AstNode *node) { @@ -684,13 +818,20 @@ void check_binary_expr(Checker *c, Operand *x, AstNode *node) { check_expr(c, x, be->left); check_expr(c, y, be->right); - if (x->mode == Addressing_Invalid) return; + if (x->mode == Addressing_Invalid) { + return; + } if (y->mode == Addressing_Invalid) { x->mode = Addressing_Invalid; x->expr = y->expr; return; } + if (token_is_shift(be->op)) { + check_shift(c, x, y, node); + return; + } + convert_to_typed(c, x, y->type); if (x->mode == Addressing_Invalid) return; convert_to_typed(c, y, x->type); @@ -791,8 +932,12 @@ void update_expr_type(Checker *c, AstNode *e, Type *type, b32 final) { if (found->value.kind != ExactValue_Invalid) break; if (!token_is_comparison(be->op)) { - update_expr_type(c, be->left, type, final); - update_expr_type(c, be->right, type, final); + if (token_is_shift(be->op)) { + update_expr_type(c, be->left, type, final); + } else { + update_expr_type(c, be->left, type, final); + update_expr_type(c, be->right, type, final); + } } case_end; } @@ -1891,6 +2036,7 @@ ExpressionKind check__expr_base(Checker *c, Operand *o, AstNode *node, Type *typ case AstNode_ProcType: case AstNode_PointerType: case AstNode_ArrayType: + case AstNode_VectorType: case AstNode_StructType: o->mode = Addressing_Type; o->type = check_type(c, node); @@ -2123,6 +2269,13 @@ gbString write_expr_to_string(gbString str, AstNode *node) { str = write_expr_to_string(str, at->elem); case_end; + case_ast_node(vt, VectorType, node); + str = gb_string_appendc(str, "{"); + str = write_expr_to_string(str, vt->count); + str = gb_string_appendc(str, "}"); + str = write_expr_to_string(str, vt->elem); + case_end; + case_ast_node(ce, CallExpr, node); str = write_expr_to_string(str, ce->proc); str = gb_string_appendc(str, "("); diff --git a/src/checker/type.cpp b/src/checker/type.cpp index 1b0977eec..0af46bd59 100644 --- a/src/checker/type.cpp +++ b/src/checker/type.cpp @@ -7,12 +7,16 @@ enum BasicKind { Basic_i16, Basic_i32, Basic_i64, + Basic_i128, Basic_u8, Basic_u16, Basic_u32, Basic_u64, + Basic_u128, + Basic_f16, Basic_f32, Basic_f64, + Basic_f128, Basic_int, Basic_uint, Basic_rawptr, @@ -56,6 +60,7 @@ struct BasicType { TYPE_KIND(Invalid), \ TYPE_KIND(Basic), \ TYPE_KIND(Array), \ + TYPE_KIND(Vector), \ TYPE_KIND(Slice), \ TYPE_KIND(Structure), \ TYPE_KIND(Pointer), \ @@ -85,6 +90,10 @@ struct Type { Type *elem; i64 count; } array; + struct { + Type *elem; + i64 count; + } vector; struct { Type *elem; } slice; @@ -159,6 +168,13 @@ Type *make_type_array(gbAllocator a, Type *elem, i64 count) { return t; } +Type *make_type_vector(gbAllocator a, Type *elem, i64 count) { + Type *t = alloc_type(a, Type_Vector); + t->vector.elem = elem; + t->vector.count = count; + return t; +} + Type *make_type_slice(gbAllocator a, Type *elem) { Type *t = alloc_type(a, Type_Slice); t->array.elem = elem; @@ -223,12 +239,16 @@ gb_global Type basic_types[] = { {Type_Basic, {Basic_i16, BasicFlag_Integer, STR_LIT("i16")}}, {Type_Basic, {Basic_i32, BasicFlag_Integer, STR_LIT("i32")}}, {Type_Basic, {Basic_i64, BasicFlag_Integer, STR_LIT("i64")}}, + {Type_Basic, {Basic_i128, BasicFlag_Integer, STR_LIT("i128")}}, {Type_Basic, {Basic_u8, BasicFlag_Integer | BasicFlag_Unsigned, STR_LIT("u8")}}, {Type_Basic, {Basic_u16, BasicFlag_Integer | BasicFlag_Unsigned, STR_LIT("u16")}}, {Type_Basic, {Basic_u32, BasicFlag_Integer | BasicFlag_Unsigned, STR_LIT("u32")}}, {Type_Basic, {Basic_u64, BasicFlag_Integer | BasicFlag_Unsigned, STR_LIT("u64")}}, + {Type_Basic, {Basic_u128, BasicFlag_Integer | BasicFlag_Unsigned, STR_LIT("u128")}}, + {Type_Basic, {Basic_f16, BasicFlag_Float, STR_LIT("f16")}}, {Type_Basic, {Basic_f32, BasicFlag_Float, STR_LIT("f32")}}, {Type_Basic, {Basic_f64, BasicFlag_Float, STR_LIT("f64")}}, + {Type_Basic, {Basic_f128, BasicFlag_Float, STR_LIT("f128")}}, {Type_Basic, {Basic_int, BasicFlag_Integer, STR_LIT("int")}}, {Type_Basic, {Basic_uint, BasicFlag_Integer | BasicFlag_Unsigned, STR_LIT("uint")}}, {Type_Basic, {Basic_rawptr, BasicFlag_Pointer, STR_LIT("rawptr")}}, @@ -256,8 +276,10 @@ gb_global Type *t_u8 = &basic_types[Basic_u8]; gb_global Type *t_u16 = &basic_types[Basic_u16]; gb_global Type *t_u32 = &basic_types[Basic_u32]; gb_global Type *t_u64 = &basic_types[Basic_u64]; +gb_global Type *t_f16 = &basic_types[Basic_f16]; gb_global Type *t_f32 = &basic_types[Basic_f32]; gb_global Type *t_f64 = &basic_types[Basic_f64]; +gb_global Type *t_f128 = &basic_types[Basic_f128]; gb_global Type *t_int = &basic_types[Basic_int]; gb_global Type *t_uint = &basic_types[Basic_uint]; gb_global Type *t_rawptr = &basic_types[Basic_rawptr]; @@ -295,6 +317,8 @@ b32 is_type_unsigned(Type *t) { b32 is_type_numeric(Type *t) { if (t->kind == Type_Basic) return (t->basic.flags & BasicFlag_Numeric) != 0; + if (t->kind == Type_Vector) + return is_type_numeric(t->vector.elem); return false; } b32 is_type_string(Type *t) { @@ -351,13 +375,22 @@ b32 is_type_u8(Type *t) { b32 is_type_slice(Type *t) { return t->kind == Type_Slice; } - - b32 is_type_u8_slice(Type *t) { if (t->kind == Type_Slice) return is_type_u8(t->slice.elem); return false; } +b32 is_type_vector(Type *t) { + return t->kind == Type_Vector; +} +Type *base_vector_type(Type *t) { + while (is_type_vector(t)) { + t = t->vector.elem; + } + return t; +} + + b32 is_type_comparable(Type *t) { t = get_base_type(t); @@ -375,6 +408,8 @@ b32 is_type_comparable(Type *t) { } break; case Type_Array: return is_type_comparable(t->array.elem); + case Type_Vector: + return is_type_comparable(t->vector.elem); } return false; } @@ -399,6 +434,11 @@ b32 are_types_identical(Type *x, Type *y) { return (x->array.count == y->array.count) && are_types_identical(x->array.elem, y->array.elem); break; + case Type_Vector: + if (y->kind == Type_Vector) + return (x->vector.count == y->vector.count) && are_types_identical(x->vector.elem, y->vector.elem); + break; + case Type_Structure: if (y->kind == Type_Structure) { if (x->structure.field_count == y->structure.field_count) { @@ -482,18 +522,22 @@ struct BaseTypeSizes { // TODO(bill): Change gb_global i64 basic_type_sizes[] = { - 0, // Basic_Invalid - 1, // Basic_bool // TODO(bill): What size should this be? And should I have different booleans? - 1, // Basic_i8 - 2, // Basic_i16 - 4, // Basic_i32 - 8, // Basic_i64 - 1, // Basic_u8 - 2, // Basic_u16 - 4, // Basic_u32 - 8, // Basic_u64 - 4, // Basic_f32 - 8, // Basic_f64 + 0, // Basic_Invalid + 1, // Basic_bool // TODO(bill): What size should this be? And should I have different booleans? + 1, // Basic_i8 + 2, // Basic_i16 + 4, // Basic_i32 + 8, // Basic_i64 + 16, // Basic_i128 + 1, // Basic_u8 + 2, // Basic_u16 + 4, // Basic_u32 + 8, // Basic_u64 + 16, // Basic_u128 + 2, // Basic_f16 + 4, // Basic_f32 + 8, // Basic_f64 + 16, // Basic_f128 }; @@ -512,6 +556,12 @@ i64 type_align_of(BaseTypeSizes s, gbAllocator allocator, Type *t) { switch (t->kind) { case Type_Array: return type_align_of(s, allocator, t->array.elem); + case Type_Vector: { + i64 size = type_size_of(s, allocator, t->vector.elem); + // TODO(bill): Type_Vector type_align_of + return gb_clamp(size, 1, 2*s.max_align); + } break; + case Type_Structure: { i64 max = 1; for (isize i = 0; i < t->structure.field_count; i++) { @@ -575,6 +625,18 @@ i64 type_size_of(BaseTypeSizes s, gbAllocator allocator, Type *t) { return alignment*(count-1) + size; } break; + case Type_Vector: { + i64 count = t->vector.count; + if (count == 0) + return 0; + count = next_pow2(count); + i64 align = type_align_of(s, allocator, t->vector.elem); + i64 size = type_size_of(s, allocator, t->vector.elem); + i64 alignment = align_formula(size, align); + return alignment*(count-1) + size; + } break; + + case Type_Slice: // ptr + len + cap return 3 * s.word_size; @@ -617,6 +679,11 @@ gbString write_type_to_string(gbString str, Type *type) { str = write_type_to_string(str, type->array.elem); break; + case Type_Vector: + str = gb_string_appendc(str, gb_bprintf("{%td}", type->vector.count)); + str = write_type_to_string(str, type->vector.elem); + break; + case Type_Slice: str = gb_string_appendc(str, "[]"); str = write_type_to_string(str, type->array.elem); diff --git a/src/codegen/print_llvm.cpp b/src/codegen/print_llvm.cpp index 5ae423f41..55b03d5ca 100644 --- a/src/codegen/print_llvm.cpp +++ b/src/codegen/print_llvm.cpp @@ -96,14 +96,18 @@ void ssa_print_type(gbFile *f, BaseTypeSizes s, Type *t) { case Basic_i16: ssa_fprintf(f, "i16"); break; case Basic_i32: ssa_fprintf(f, "i32"); break; case Basic_i64: ssa_fprintf(f, "i64"); break; + case Basic_i128: ssa_fprintf(f, "i128"); break; case Basic_u8: ssa_fprintf(f, "i8"); break; case Basic_u16: ssa_fprintf(f, "i16"); break; case Basic_u32: ssa_fprintf(f, "i32"); break; case Basic_u64: ssa_fprintf(f, "i64"); break; + case Basic_u128: ssa_fprintf(f, "u128"); break; + case Basic_f16: ssa_fprintf(f, "half"); break; case Basic_f32: ssa_fprintf(f, "float"); break; case Basic_f64: ssa_fprintf(f, "double"); break; - case Basic_rawptr: ssa_fprintf(f, "%%-rawptr"); break; - case Basic_string: ssa_fprintf(f, "%%-string"); break; + case Basic_f128: ssa_fprintf(f, "fp128"); break; + case Basic_rawptr: ssa_fprintf(f, "%%.rawptr"); break; + case Basic_string: ssa_fprintf(f, "%%.string"); break; case Basic_uint: ssa_fprintf(f, "i%lld", word_bits); break; case Basic_int: ssa_fprintf(f, "i%lld", word_bits); break; } @@ -113,6 +117,12 @@ void ssa_print_type(gbFile *f, BaseTypeSizes s, Type *t) { ssa_print_type(f, s, t->array.elem); ssa_fprintf(f, "]"); break; + case Type_Vector: { + // TODO(bill): actually do correctly + ssa_fprintf(f, "<%lld x ", t->vector.count); + ssa_print_type(f, s, t->vector.elem); + ssa_fprintf(f, ">"); + } break; case Type_Slice: ssa_fprintf(f, "{"); ssa_print_type(f, s, t->slice.elem); @@ -366,11 +376,15 @@ void ssa_print_instr(gbFile *f, ssaModule *m, ssaValue *value) { case ssaInstr_BinaryOp: { auto *bo = &value->instr.binary_op; Type *type = ssa_value_type(bo->left); + Type *elem_type = type; + while (elem_type->kind == Type_Vector) { + elem_type = elem_type->vector.elem; + } ssa_fprintf(f, "%%%d = ", value->id); if (gb_is_between(bo->op.kind, Token__ComparisonBegin+1, Token__ComparisonEnd-1)) { - if (is_type_float(type)) { + if (is_type_float(elem_type)) { ssa_fprintf(f, "fcmp "); switch (bo->op.kind) { case Token_CmpEq: ssa_fprintf(f, "oeq"); break; @@ -384,7 +398,7 @@ void ssa_print_instr(gbFile *f, ssaModule *m, ssaValue *value) { ssa_fprintf(f, "icmp "); if (bo->op.kind != Token_CmpEq && bo->op.kind != Token_NotEq) { - if (is_type_unsigned(type)) { + if (is_type_unsigned(elem_type)) { ssa_fprintf(f, "u"); } else { ssa_fprintf(f, "s"); @@ -400,24 +414,25 @@ void ssa_print_instr(gbFile *f, ssaModule *m, ssaValue *value) { } } } else { - if (is_type_float(type)) + if (is_type_float(elem_type)) ssa_fprintf(f, "f"); switch (bo->op.kind) { case Token_Add: ssa_fprintf(f, "add"); break; case Token_Sub: ssa_fprintf(f, "sub"); break; case Token_And: ssa_fprintf(f, "and"); break; - case Token_Or: ssa_fprintf(f, "or"); break; + case Token_Or: ssa_fprintf(f, "or"); break; case Token_Xor: ssa_fprintf(f, "xor"); break; + case Token_Shl: ssa_fprintf(f, "shl"); break; + case Token_Shr: ssa_fprintf(f, "lshr"); break; + case Token_Mul: ssa_fprintf(f, "mul"); break; case Token_AndNot: GB_PANIC("Token_AndNot Should never be called"); - case Token_Mul: ssa_fprintf(f, "mul"); break; - default: { - if (!is_type_float(type)) { - if (is_type_unsigned(type)) ssa_fprintf(f, "u"); - else ssa_fprintf(f, "s"); + if (!is_type_float(elem_type)) { + if (is_type_unsigned(elem_type)) ssa_fprintf(f, "u"); + else ssa_fprintf(f, "s"); } switch (bo->op.kind) { @@ -483,12 +498,12 @@ void ssa_print_llvm_ir(gbFile *f, ssaModule *m) { ssa_fprintf(f, "target datalayout = %.*s\n", LIT(m->layout)); } - ssa_print_encoded_local(f, make_string("-string")); + ssa_print_encoded_local(f, make_string(".string")); ssa_fprintf(f, " = type {i8*, "); ssa_print_type(f, m->sizes, t_int); ssa_fprintf(f, "} ; Basic_string\n\n"); - ssa_print_encoded_local(f, make_string("-rawptr")); + ssa_print_encoded_local(f, make_string(".rawptr")); ssa_fprintf(f, " = type i8*"); ssa_fprintf(f, " ; Basic_rawptr\n\n"); diff --git a/src/codegen/ssa.cpp b/src/codegen/ssa.cpp index 310e1c423..57887d3f1 100644 --- a/src/codegen/ssa.cpp +++ b/src/codegen/ssa.cpp @@ -284,8 +284,15 @@ Type *ssa_instr_type(ssaInstr *instr) { return instr->binary_op.type; case ssaInstr_Conv: return instr->conv.to; - case ssaInstr_Call: - return instr->call.type; + case ssaInstr_Call: { + Type *pt = instr->call.type; + GB_ASSERT(pt->kind == Type_Proc); + auto *tuple = &pt->proc.results->tuple; + if (tuple->variable_count != 1) + return pt->proc.results; + else + return tuple->variables[0]->type; + } } return NULL; } @@ -560,12 +567,28 @@ b32 ssa_is_blank_ident(AstNode *node) { } +ssaInstr *ssa_get_last_instr(ssaBlock *block) { + isize len = gb_array_count(block->instrs); + if (len > 0) { + ssaValue *v = block->instrs[len-1]; + GB_ASSERT(v->kind == ssaValue_Instr); + return &v->instr; + } + return NULL; + +} ssaValue *ssa_emit(ssaProcedure *proc, ssaValue *instr) { ssaBlock *b = proc->curr_block; instr->instr.parent = b; if (b) { - gb_array_append(b->instrs, instr); + ssaInstr *i = ssa_get_last_instr(b); + if (i && (i->kind == ssaInstr_Ret || i->kind == ssaInstr_Unreachable)) { + // NOTE(bill): any instruction in the current block after a `ret` + // or an `unreachable`, is never executed + } else { + gb_array_append(b->instrs, instr); + } } return instr; } @@ -688,11 +711,13 @@ void ssa_emit_if(ssaProcedure *proc, ssaValue *cond, ssaBlock *true_block, ssaBl ssaBlock *ssa__make_block(ssaProcedure *proc, AstNode *node, String label) { Scope *scope = NULL; - Scope **found = map_get(&proc->module->info->scopes, hash_pointer(node)); - if (found) { - scope = *found; - } else { - GB_PANIC("Block scope not found"); + if (node != NULL) { + Scope **found = map_get(&proc->module->info->scopes, hash_pointer(node)); + if (found) { + scope = *found; + } else { + GB_PANIC("Block scope not found for %.*s", LIT(ast_node_strings[node->kind])); + } } ssaValue *block = ssa_make_value_block(proc, node, scope, label); @@ -1021,6 +1046,8 @@ ssaValue *ssa_emit_conv(ssaProcedure *proc, ssaValue *value, Type *t) { Type *src = get_base_type(src_type); Type *dst = get_base_type(t); + if (are_types_identical(t, src_type)) + return value; if (value->kind == ssaValue_Constant) { if (dst->kind == Type_Basic) @@ -1035,6 +1062,12 @@ ssaValue *ssa_emit_conv(ssaProcedure *proc, ssaValue *value, Type *t) { if (dz >= sz) { kind = ssaConv_zext; } + + if (sz == dz) { + // NOTE(bill): In LLVM, all integers are signed and rely upon 2's compliment + return value; + } + return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst)); } @@ -1117,7 +1150,7 @@ ssaValue *ssa_build_single_expr(ssaProcedure *proc, AstNode *expr, TypeAndValue case_ast_node(i, Ident, expr); Entity *e = *map_get(&proc->module->info->uses, hash_pointer(expr)); if (e->kind == Entity_Builtin) { - GB_PANIC("TODO(bill): Entity_Builtin"); + GB_PANIC("TODO(bill): ssa_build_single_expr Entity_Builtin"); return NULL; } @@ -1181,6 +1214,8 @@ ssaValue *ssa_build_single_expr(ssaProcedure *proc, AstNode *expr, TypeAndValue case Token_Or: case Token_Xor: case Token_AndNot: + case Token_Shl: + case Token_Shr: return ssa_emit_arith(proc, be->op, ssa_build_expr(proc, be->left), ssa_build_expr(proc, be->right), diff --git a/src/common.cpp b/src/common.cpp index d1905f1e8..2a0cd8371 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -20,6 +20,17 @@ gb_inline u64 hash_pointer(void *ptr) { return p; } +i64 next_pow2(i64 n) { + n--; + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + n++; + return n; +} #define gb_for_array(index_, array_) for (isize index_ = 0; index_ < gb_array_count(array_); index_++) diff --git a/src/exact_value.cpp b/src/exact_value.cpp index f9a069c73..d62779462 100644 --- a/src/exact_value.cpp +++ b/src/exact_value.cpp @@ -272,6 +272,8 @@ ExactValue exact_binary_operator_value(Token op, ExactValue x, ExactValue y) { case Token_Or: c = a | b; break; case Token_Xor: c = a ^ b; break; case Token_AndNot: c = a&(~b); break; + case Token_Shl: c = a << b; break; + case Token_Shr: c = a >> b; break; default: goto error; } return make_exact_value_integer(c); @@ -296,10 +298,11 @@ error: return error_value; } -gb_inline ExactValue exact_value_add(ExactValue x, ExactValue y) { Token op = {Token_Add}; return exact_binary_operator_value(op, x, y); } -gb_inline ExactValue exact_value_sub(ExactValue x, ExactValue y) { Token op = {Token_Sub}; return exact_binary_operator_value(op, x, y); } -gb_inline ExactValue exact_value_mul(ExactValue x, ExactValue y) { Token op = {Token_Mul}; return exact_binary_operator_value(op, x, y); } -gb_inline ExactValue exact_value_quo(ExactValue x, ExactValue y) { Token op = {Token_Quo}; return exact_binary_operator_value(op, x, y); } +gb_inline ExactValue exact_value_add(ExactValue x, ExactValue y) { Token op = {Token_Add}; return exact_binary_operator_value(op, x, y); } +gb_inline ExactValue exact_value_sub(ExactValue x, ExactValue y) { Token op = {Token_Sub}; return exact_binary_operator_value(op, x, y); } +gb_inline ExactValue exact_value_mul(ExactValue x, ExactValue y) { Token op = {Token_Mul}; return exact_binary_operator_value(op, x, y); } +gb_inline ExactValue exact_value_quo(ExactValue x, ExactValue y) { Token op = {Token_Quo}; return exact_binary_operator_value(op, x, y); } +gb_inline ExactValue exact_value_shift(Token op, ExactValue x, ExactValue y) { return exact_binary_operator_value(op, x, y); } i32 cmp_f64(f64 a, f64 b) { diff --git a/src/parser.cpp b/src/parser.cpp index ae2162059..43d3cf196 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -197,6 +197,11 @@ AST_NODE_KIND(_TypeBegin, struct{}) \ AstNode *count; \ AstNode *elem; \ }) \ + AST_NODE_KIND(VectorType, struct { \ + Token token; \ + AstNode *count; \ + AstNode *elem; \ + }) \ AST_NODE_KIND(StructType, struct { \ Token token; \ AstNode *field_list; \ @@ -341,6 +346,8 @@ Token ast_node_token(AstNode *node) { return node->PointerType.token; case AstNode_ArrayType: return node->ArrayType.token; + case AstNode_VectorType: + return node->VectorType.token; case AstNode_StructType: return node->StructType.token; } @@ -725,6 +732,14 @@ gb_inline AstNode *make_array_type(AstFile *f, Token token, AstNode *count, AstN return result; } +gb_inline AstNode *make_vector_type(AstFile *f, Token token, AstNode *count, AstNode *elem) { + AstNode *result = make_node(f, AstNode_VectorType); + result->VectorType.token = token; + result->VectorType.count = count; + result->VectorType.elem = elem; + return result; +} + gb_inline AstNode *make_struct_type(AstFile *f, Token token, AstNode *field_list, isize field_count) { AstNode *result = make_node(f, AstNode_StructType); result->StructType.token = token; @@ -1031,6 +1046,7 @@ b32 is_literal_type(AstNode *node) { case AstNode_BadExpr: case AstNode_Ident: case AstNode_ArrayType: + case AstNode_VectorType: case AstNode_StructType: return true; } @@ -1275,6 +1291,8 @@ AstNode *parse_simple_stmt(AstFile *f) { case Token_AndEq: case Token_OrEq: case Token_XorEq: + case Token_ShlEq: + case Token_ShrEq: case Token_AndNotEq: case Token_CmpAndEq: case Token_CmpOrEq: @@ -1458,6 +1476,15 @@ AstNode *parse_identifier_or_type(AstFile *f) { return make_array_type(f, token, count_expr, parse_type(f)); } + case Token_OpenBrace: { + f->expr_level++; + Token token = expect_token(f, Token_OpenBrace); + AstNode *count_expr = parse_expr(f, false); + expect_token(f, Token_CloseBrace); + f->expr_level--; + return make_vector_type(f, token, count_expr, parse_type(f)); + } + case Token_struct: { Token token = expect_token(f, Token_struct); Token open, close; diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index e34fba9fa..79a4ed755 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -52,6 +52,8 @@ TOKEN_KIND(Token__OperatorBegin, "_OperatorBegin"), \ TOKEN_KIND(Token_Or, "|"), \ TOKEN_KIND(Token_Xor, "~"), \ TOKEN_KIND(Token_AndNot, "&~"), \ + TOKEN_KIND(Token_Shl, "<<"), \ + TOKEN_KIND(Token_Shr, ">>"), \ TOKEN_KIND(Token__AssignOpBegin, "_AssignOpBegin"), \ TOKEN_KIND(Token_AddEq, "+="), \ TOKEN_KIND(Token_SubEq, "-="), \ @@ -62,6 +64,8 @@ TOKEN_KIND(Token__AssignOpBegin, "_AssignOpBegin"), \ TOKEN_KIND(Token_OrEq, "|="), \ TOKEN_KIND(Token_XorEq, "~="), \ TOKEN_KIND(Token_AndNotEq, "&~="), \ + TOKEN_KIND(Token_ShlEq, "<<="), \ + TOKEN_KIND(Token_ShrEq, ">>="), \ TOKEN_KIND(Token__AssignOpEnd, "_AssignOpEnd"), \ TOKEN_KIND(Token_Increment, "++"), \ TOKEN_KIND(Token_Decrement, "--"), \ @@ -217,6 +221,8 @@ i32 token_precedence(Token t) { case Token_Mod: case Token_And: case Token_AndNot: + case Token_Shl: + case Token_Shr: return 5; } @@ -236,6 +242,9 @@ gb_inline b32 token_is_keyword(Token t) { gb_inline b32 token_is_comparison(Token t) { return gb_is_between(t.kind, Token__ComparisonBegin+1, Token__ComparisonEnd-1); } +gb_inline b32 token_is_shift(Token t) { + return t.kind == Token_Shl || t.kind == Token_Shr; +} gb_inline void print_token(Token t) { gb_printf("%.*s\n", LIT(t.string)); } @@ -561,7 +570,7 @@ b32 scan_escape(Tokenizer *t, Rune quote) { return true; } -gb_inline TokenKind token_type_variant2(Tokenizer *t, TokenKind a, TokenKind b) { +gb_inline TokenKind token_kind_variant2(Tokenizer *t, TokenKind a, TokenKind b) { if (t->curr_rune == '=') { advance_to_next_rune(t); return b; @@ -570,7 +579,7 @@ gb_inline TokenKind token_type_variant2(Tokenizer *t, TokenKind a, TokenKind b) } -gb_inline TokenKind token_type_variant3(Tokenizer *t, TokenKind a, TokenKind b, Rune ch_c, TokenKind c) { +gb_inline TokenKind token_kind_variant3(Tokenizer *t, TokenKind a, TokenKind b, Rune ch_c, TokenKind c) { if (t->curr_rune == '=') { advance_to_next_rune(t); return b; @@ -582,7 +591,7 @@ gb_inline TokenKind token_type_variant3(Tokenizer *t, TokenKind a, TokenKind b, return a; } -gb_inline TokenKind token_type_variant4(Tokenizer *t, TokenKind a, TokenKind b, Rune ch_c, TokenKind c, Rune ch_d, TokenKind d) { +gb_inline TokenKind token_kind_variant4(Tokenizer *t, TokenKind a, TokenKind b, Rune ch_c, TokenKind c, Rune ch_d, TokenKind d) { if (t->curr_rune == '=') { advance_to_next_rune(t); return b; @@ -596,6 +605,22 @@ gb_inline TokenKind token_type_variant4(Tokenizer *t, TokenKind a, TokenKind b, return a; } + +gb_inline TokenKind token_kind_dub_eq(Tokenizer *t, Rune sing_rune, TokenKind sing, TokenKind sing_eq, TokenKind dub, TokenKind dub_eq) { + if (t->curr_rune == '=') { + advance_to_next_rune(t); + return sing_eq; + } else if (t->curr_rune == sing_rune) { + advance_to_next_rune(t); + if (t->curr_rune == '=') { + advance_to_next_rune(t); + return dub_eq; + } + return dub; + } + return sing; +} + Token tokenizer_get_token(Tokenizer *t) { Token token = {}; Rune curr_rune; @@ -736,16 +761,25 @@ Token tokenizer_get_token(Tokenizer *t) { case '}': token.kind = Token_CloseBrace; break; case ':': token.kind = Token_Colon; break; - case '*': token.kind = token_type_variant2(t, Token_Mul, Token_MulEq); break; - case '/': token.kind = token_type_variant2(t, Token_Quo, Token_QuoEq); break; - case '%': token.kind = token_type_variant2(t, Token_Mod, Token_ModEq); break; - case '=': token.kind = token_type_variant2(t, Token_Eq, Token_CmpEq); break; - case '~': token.kind = token_type_variant2(t, Token_Xor, Token_XorEq); break; - case '!': token.kind = token_type_variant2(t, Token_Not, Token_NotEq); break; - case '>': token.kind = token_type_variant2(t, Token_Gt, Token_GtEq); break; - case '<': token.kind = token_type_variant3(t, Token_Lt, Token_LtEq, '-', Token_ArrowLeft); break; - case '+': token.kind = token_type_variant3(t, Token_Add, Token_AddEq, '+', Token_Increment); break; - case '-': token.kind = token_type_variant4(t, Token_Sub, Token_SubEq, '-', Token_Decrement, '>', Token_ArrowRight); break; + case '*': token.kind = token_kind_variant2(t, Token_Mul, Token_MulEq); break; + case '/': token.kind = token_kind_variant2(t, Token_Quo, Token_QuoEq); break; + case '%': token.kind = token_kind_variant2(t, Token_Mod, Token_ModEq); break; + case '=': token.kind = token_kind_variant2(t, Token_Eq, Token_CmpEq); break; + case '~': token.kind = token_kind_variant2(t, Token_Xor, Token_XorEq); break; + case '!': token.kind = token_kind_variant2(t, Token_Not, Token_NotEq); break; + case '+': token.kind = token_kind_variant3(t, Token_Add, Token_AddEq, '+', Token_Increment); break; + case '-': token.kind = token_kind_variant4(t, Token_Sub, Token_SubEq, '-', Token_Decrement, '>', Token_ArrowRight); break; + + case '<': + if (t->curr_rune == '-') { + token.kind = Token_ArrowLeft; + } else { + token.kind = token_kind_dub_eq(t, '<', Token_Lt, Token_LtEq, Token_Shl, Token_ShlEq); + } + break; + case '>': + token.kind = token_kind_dub_eq(t, '>', Token_Gt, Token_GtEq, Token_Shr, Token_ShrEq); + break; case '&': token.kind = Token_And; @@ -757,25 +791,18 @@ Token tokenizer_get_token(Tokenizer *t) { advance_to_next_rune(t); } } else { - token.kind = token_type_variant3(t, Token_And, Token_AndEq, '&', Token_CmpAnd); - if (t->curr_rune == '=') { - token.kind = Token_CmpAndEq; - advance_to_next_rune(t); - } + token.kind = token_kind_dub_eq(t, '&', Token_And, Token_AndEq, Token_CmpAnd, Token_CmpAndEq); } break; - case '|': - token.kind = token_type_variant3(t, Token_Or, Token_OrEq, '|', Token_CmpOr); - if (t->curr_rune == '=') { - token.kind = Token_CmpOrEq; - advance_to_next_rune(t); - } - break; + case '|': token.kind = token_kind_dub_eq(t, '|', Token_Or, Token_OrEq, Token_CmpOr, Token_CmpOrEq); break; default: - if (curr_rune != GB_RUNE_BOM) - tokenizer_err(t, "Illegal character: %c (%d) ", cast(char)curr_rune, curr_rune); + if (curr_rune != GB_RUNE_BOM) { + u8 str[4] = {}; + int len = cast(int)gb_utf8_encode_rune(str, curr_rune); + tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune); + } token.kind = Token_Invalid; break; }