From 4c655865e5d9af83a98c137609b01972f4e51beb Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 18 Oct 2021 16:52:19 +0100 Subject: [PATCH 01/47] Begin work on matrix type --- core/fmt/fmt.odin | 2 + core/reflect/reflect.odin | 6 +- core/reflect/types.odin | 14 +++++ core/runtime/core.odin | 9 +++ core/runtime/print.odin | 8 +++ src/check_expr.cpp | 52 +++++++++++++++- src/check_type.cpp | 68 +++++++++++++++++++++ src/checker.cpp | 2 + src/llvm_backend_general.cpp | 18 ++++++ src/llvm_backend_type.cpp | 19 ++++++ src/parser.cpp | 54 ++++++++++++++++- src/parser.hpp | 7 +++ src/parser_pos.cpp | 4 ++ src/types.cpp | 113 +++++++++++++++++++++++++++++++++-- 14 files changed, 367 insertions(+), 9 deletions(-) diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 77b848315..cee00da23 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1953,6 +1953,8 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { } } + case runtime.Type_Info_Matrix: + io.write_string(fi.writer, "[]") } } diff --git a/core/reflect/reflect.odin b/core/reflect/reflect.odin index f509ffe1b..7f64d0974 100644 --- a/core/reflect/reflect.odin +++ b/core/reflect/reflect.odin @@ -33,6 +33,7 @@ Type_Info_Bit_Set :: runtime.Type_Info_Bit_Set Type_Info_Simd_Vector :: runtime.Type_Info_Simd_Vector Type_Info_Relative_Pointer :: runtime.Type_Info_Relative_Pointer Type_Info_Relative_Slice :: runtime.Type_Info_Relative_Slice +Type_Info_Matrix :: runtime.Type_Info_Matrix Type_Info_Enum_Value :: runtime.Type_Info_Enum_Value @@ -66,6 +67,7 @@ Type_Kind :: enum { Simd_Vector, Relative_Pointer, Relative_Slice, + Matrix, } @@ -99,6 +101,7 @@ type_kind :: proc(T: typeid) -> Type_Kind { case Type_Info_Simd_Vector: return .Simd_Vector case Type_Info_Relative_Pointer: return .Relative_Pointer case Type_Info_Relative_Slice: return .Relative_Slice + case Type_Info_Matrix: return .Matrix } } @@ -1401,7 +1404,8 @@ equal :: proc(a, b: any, including_indirect_array_recursion := false, recursion_ Type_Info_Bit_Set, Type_Info_Enum, Type_Info_Simd_Vector, - Type_Info_Relative_Pointer: + Type_Info_Relative_Pointer, + Type_Info_Matrix: return mem.compare_byte_ptrs((^byte)(a.data), (^byte)(b.data), t.size) == 0 case Type_Info_String: diff --git a/core/reflect/types.odin b/core/reflect/types.odin index d0a96a088..cf79abb07 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -164,6 +164,12 @@ are_types_identical :: proc(a, b: ^Type_Info) -> bool { case Type_Info_Relative_Slice: y := b.variant.(Type_Info_Relative_Slice) or_return return x.base_integer == y.base_integer && x.slice == y.slice + + case Type_Info_Matrix: + y := b.variant.(Type_Info_Matrix) or_return + if x.row_count != y.row_count { return false } + if x.column_count != y.column_count { return false } + return are_types_identical(x.elem, y.elem) } return false @@ -584,6 +590,14 @@ write_type_writer :: proc(w: io.Writer, ti: ^Type_Info, n_written: ^int = nil) - write_type(w, info.base_integer, &n) or_return io.write_string(w, ") ", &n) or_return write_type(w, info.slice, &n) or_return + + case Type_Info_Matrix: + io.write_string(w, "[", &n) or_return + io.write_i64(w, i64(info.row_count), 10, &n) or_return + io.write_string(w, "; ", &n) or_return + io.write_i64(w, i64(info.column_count), 10, &n) or_return + io.write_string(w, "]", &n) or_return + write_type(w, info.elem, &n) or_return } return diff --git a/core/runtime/core.odin b/core/runtime/core.odin index 36a88a8b5..611b4002c 100644 --- a/core/runtime/core.odin +++ b/core/runtime/core.odin @@ -162,6 +162,13 @@ Type_Info_Relative_Slice :: struct { slice: ^Type_Info, base_integer: ^Type_Info, } +Type_Info_Matrix :: struct { + elem: ^Type_Info, + elem_size: int, + stride: int, // bytes + row_count: int, + column_count: int, +} Type_Info_Flag :: enum u8 { Comparable = 0, @@ -202,6 +209,7 @@ Type_Info :: struct { Type_Info_Simd_Vector, Type_Info_Relative_Pointer, Type_Info_Relative_Slice, + Type_Info_Matrix, }, } @@ -233,6 +241,7 @@ Typeid_Kind :: enum u8 { Simd_Vector, Relative_Pointer, Relative_Slice, + Matrix, } #assert(len(Typeid_Kind) < 32) diff --git a/core/runtime/print.odin b/core/runtime/print.odin index 3ccd4ef90..f32ac0831 100644 --- a/core/runtime/print.odin +++ b/core/runtime/print.odin @@ -370,5 +370,13 @@ print_type :: proc "contextless" (ti: ^Type_Info) { print_type(info.base_integer) print_string(") ") print_type(info.slice) + + case Type_Info_Matrix: + print_string("[") + print_u64(u64(info.row_count)) + print_string("; ") + print_u64(u64(info.column_count)) + print_string("]") + print_type(info.elem) } } diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 513144f11..85f2eeb23 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -6207,6 +6207,17 @@ bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 *max_count, } o->type = t->EnumeratedArray.elem; return true; + + case Type_Matrix: + *max_count = t->Matrix.column_count; + if (indirection) { + o->mode = Addressing_Variable; + } else if (o->mode != Addressing_Variable && + o->mode != Addressing_Constant) { + o->mode = Addressing_Value; + } + o->type = alloc_type_array(t->Matrix.elem, t->Matrix.row_count); + return true; case Type_Slice: o->type = t->Slice.elem; @@ -6505,6 +6516,11 @@ void check_promote_optional_ok(CheckerContext *c, Operand *x, Type **val_type_, } +void check_matrix_index_expr(CheckerContext *c, Operand *o, Ast *node, Type *type_hint) { + error(node, "TODO: matrix index expressions"); +} + + ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type *type_hint) { u32 prev_state_flags = c->state_flags; defer (c->state_flags = prev_state_flags); @@ -8202,6 +8218,8 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type // Okay } else if (is_type_relative_slice(t)) { // Okay + } else if (is_type_matrix(t)) { + // Okay } else { valid = false; } @@ -8266,10 +8284,14 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type } } } + + if (type_hint != nullptr && is_type_matrix(t)) { + // TODO(bill): allow matrix columns to be assignable to other types which are the same internally + // if a type hint exists + } + case_end; - - case_ast_node(se, SliceExpr, node); check_expr(c, o, se->expr); node->viral_state_flags |= se->expr->viral_state_flags; @@ -8442,7 +8464,12 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type } case_end; - + + case_ast_node(mie, MatrixIndexExpr, node); + check_matrix_index_expr(c, o, node, type_hint); + o->expr = node; + return Expr_Expr; + case_end; case_ast_node(ce, CallExpr, node); return check_call_expr(c, o, node, ce->proc, ce->args, ce->inlining, type_hint); @@ -8952,6 +8979,15 @@ gbString write_expr_to_string(gbString str, Ast *node, bool shorthand) { str = gb_string_append_rune(str, ']'); case_end; + case_ast_node(mie, MatrixIndexExpr, node); + str = write_expr_to_string(str, mie->expr, shorthand); + str = gb_string_append_rune(str, '['); + str = write_expr_to_string(str, mie->row_index, shorthand); + str = gb_string_appendc(str, "; "); + str = write_expr_to_string(str, mie->column_index, shorthand); + str = gb_string_append_rune(str, ']'); + case_end; + case_ast_node(e, Ellipsis, node); str = gb_string_appendc(str, ".."); str = write_expr_to_string(str, e->expr, shorthand); @@ -9023,6 +9059,16 @@ gbString write_expr_to_string(gbString str, Ast *node, bool shorthand) { str = gb_string_append_rune(str, ']'); str = write_expr_to_string(str, mt->value, shorthand); case_end; + + case_ast_node(mt, MatrixType, node); + str = gb_string_append_rune(str, '['); + str = write_expr_to_string(str, mt->row_count, shorthand); + str = gb_string_appendc(str, "; "); + str = write_expr_to_string(str, mt->column_count, shorthand); + str = gb_string_append_rune(str, ']'); + str = write_expr_to_string(str, mt->elem, shorthand); + case_end; + case_ast_node(f, Field, node); if (f->flags&FieldFlag_using) { diff --git a/src/check_type.cpp b/src/check_type.cpp index 0d5c0f977..e752f192d 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -2200,6 +2200,63 @@ void check_map_type(CheckerContext *ctx, Type *type, Ast *node) { // error(node, "'map' types are not yet implemented"); } +void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) { + ast_node(mt, MatrixType, node); + + Operand row = {}; + Operand column = {}; + + i64 row_count = check_array_count(ctx, &row, mt->row_count); + i64 column_count = check_array_count(ctx, &column, mt->column_count); + + Type *elem = check_type_expr(ctx, mt->elem, nullptr); + + Type *generic_row = nullptr; + Type *generic_column = nullptr; + + if (row.mode == Addressing_Type && row.type->kind == Type_Generic) { + generic_row = row.type; + } + + if (column.mode == Addressing_Type && column.type->kind == Type_Generic) { + generic_column = column.type; + } + + if (row_count < MIN_MATRIX_ELEMENT_COUNT && generic_row == nullptr) { + gbString s = expr_to_string(row.expr); + error(row.expr, "Invalid matrix row count, expected %d+ rows, got %s", MIN_MATRIX_ELEMENT_COUNT, s); + gb_string_free(s); + } + + if (column_count < MIN_MATRIX_ELEMENT_COUNT && generic_column == nullptr) { + gbString s = expr_to_string(column.expr); + error(column.expr, "Invalid matrix column count, expected %d+ rows, got %s", MIN_MATRIX_ELEMENT_COUNT, s); + gb_string_free(s); + } + + if (row_count*column_count > MAX_MATRIX_ELEMENT_COUNT) { + i64 element_count = row_count*column_count; + error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MAX_MATRIX_ELEMENT_COUNT, cast(long long)element_count); + } + + if (is_type_integer(elem)) { + // okay + } else if (is_type_float(elem)) { + // okay + } else if (is_type_complex(elem)) { + // okay + } else { + gbString s = type_to_string(elem); + error(column.expr, "Matrix elements types are limited to integers, floats, and complex, got %s", s); + gb_string_free(s); + } + + *type = alloc_type_matrix(elem, row_count, column_count, generic_row, generic_column); + + return; +} + + Type *make_soa_struct_internal(CheckerContext *ctx, Ast *array_typ_expr, Ast *elem_expr, Type *elem, i64 count, Type *generic_type, StructSoaKind soa_kind) { Type *bt_elem = base_type(elem); @@ -2785,6 +2842,17 @@ bool check_type_internal(CheckerContext *ctx, Ast *e, Type **type, Type *named_t return true; } case_end; + + + case_ast_node(mt, MatrixType, e); + bool ips = ctx->in_polymorphic_specialization; + defer (ctx->in_polymorphic_specialization = ips); + ctx->in_polymorphic_specialization = false; + + check_matrix_type(ctx, type, e); + set_base_type(named_type, *type); + return true; + case_end; } *type = t_invalid; diff --git a/src/checker.cpp b/src/checker.cpp index d3c0080de..8711fdc0c 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -2458,6 +2458,7 @@ void init_core_type_info(Checker *c) { t_type_info_simd_vector = find_core_type(c, str_lit("Type_Info_Simd_Vector")); t_type_info_relative_pointer = find_core_type(c, str_lit("Type_Info_Relative_Pointer")); t_type_info_relative_slice = find_core_type(c, str_lit("Type_Info_Relative_Slice")); + t_type_info_matrix = find_core_type(c, str_lit("Type_Info_Matrix")); t_type_info_named_ptr = alloc_type_pointer(t_type_info_named); t_type_info_integer_ptr = alloc_type_pointer(t_type_info_integer); @@ -2485,6 +2486,7 @@ void init_core_type_info(Checker *c) { t_type_info_simd_vector_ptr = alloc_type_pointer(t_type_info_simd_vector); t_type_info_relative_pointer_ptr = alloc_type_pointer(t_type_info_relative_pointer); t_type_info_relative_slice_ptr = alloc_type_pointer(t_type_info_relative_slice); + t_type_info_matrix_ptr = alloc_type_pointer(t_type_info_matrix); } void init_mem_allocator(Checker *c) { diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 094275429..ee8f220ef 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1930,6 +1930,24 @@ LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { fields[1] = base_integer; return LLVMStructTypeInContext(ctx, fields, field_count, false); } + + case Type_Matrix: + { + i64 size = type_size_of(type); + i64 elem_size = type_size_of(type->Matrix.elem); + GB_ASSERT(elem_size > 0); + i64 elem_count = size/elem_size; + GB_ASSERT(elem_count > 0); + + m->internal_type_level -= 1; + + LLVMTypeRef elem = lb_type(m, type->Matrix.elem); + LLVMTypeRef t = LLVMArrayType(elem, cast(unsigned)elem_count); + + m->internal_type_level += 1; + return t; + } + } GB_PANIC("Invalid type %s", type_to_string(type)); diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index e90bb6f16..82e20bf60 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -42,6 +42,7 @@ lbValue lb_typeid(lbModule *m, Type *type) { case Type_Pointer: kind = Typeid_Pointer; break; case Type_MultiPointer: kind = Typeid_Multi_Pointer; break; case Type_Array: kind = Typeid_Array; break; + case Type_Matrix: kind = Typeid_Matrix; break; case Type_EnumeratedArray: kind = Typeid_Enumerated_Array; break; case Type_Slice: kind = Typeid_Slice; break; case Type_DynamicArray: kind = Typeid_Dynamic_Array; break; @@ -868,7 +869,25 @@ void lb_setup_type_info_data(lbProcedure *p) { // NOTE(bill): Setup type_info da lb_emit_store(p, tag, res); } break; + case Type_Matrix: + { + tag = lb_const_ptr_cast(m, variant_ptr, t_type_info_matrix_ptr); + i64 ez = type_size_of(t->Matrix.elem); + LLVMValueRef vals[5] = { + lb_get_type_info_ptr(m, t->Matrix.elem).value, + lb_const_int(m, t_int, ez).value, + lb_const_int(m, t_int, matrix_type_stride(t)).value, + lb_const_int(m, t_int, t->Matrix.row_count).value, + lb_const_int(m, t_int, t->Matrix.column_count).value, + }; + + lbValue res = {}; + res.type = type_deref(tag.type); + res.value = llvm_const_named_struct(m, res.type, vals, gb_count_of(vals)); + lb_emit_store(p, tag, res); + } + break; } diff --git a/src/parser.cpp b/src/parser.cpp index 716986b5d..499bd337b 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -159,6 +159,11 @@ Ast *clone_ast(Ast *node) { n->IndexExpr.expr = clone_ast(n->IndexExpr.expr); n->IndexExpr.index = clone_ast(n->IndexExpr.index); break; + case Ast_MatrixIndexExpr: + n->MatrixIndexExpr.expr = clone_ast(n->MatrixIndexExpr.expr); + n->MatrixIndexExpr.row_index = clone_ast(n->MatrixIndexExpr.row_index); + n->MatrixIndexExpr.column_index = clone_ast(n->MatrixIndexExpr.column_index); + break; case Ast_DerefExpr: n->DerefExpr.expr = clone_ast(n->DerefExpr.expr); break; @@ -371,6 +376,11 @@ Ast *clone_ast(Ast *node) { n->MapType.key = clone_ast(n->MapType.key); n->MapType.value = clone_ast(n->MapType.value); break; + case Ast_MatrixType: + n->MatrixType.row_count = clone_ast(n->MatrixType.row_count); + n->MatrixType.column_count = clone_ast(n->MatrixType.column_count); + n->MatrixType.elem = clone_ast(n->MatrixType.elem); + break; } return n; @@ -574,6 +584,15 @@ Ast *ast_deref_expr(AstFile *f, Ast *expr, Token op) { } +Ast *ast_matrix_index_expr(AstFile *f, Ast *expr, Token open, Token close, Token interval, Ast *row, Ast *column) { + Ast *result = alloc_ast_node(f, Ast_MatrixIndexExpr); + result->MatrixIndexExpr.expr = expr; + result->MatrixIndexExpr.row_index = row; + result->MatrixIndexExpr.column_index = column; + result->MatrixIndexExpr.open = open; + result->MatrixIndexExpr.close = close; + return result; +} Ast *ast_ident(AstFile *f, Token token) { @@ -1066,6 +1085,14 @@ Ast *ast_map_type(AstFile *f, Token token, Ast *key, Ast *value) { return result; } +Ast *ast_matrix_type(AstFile *f, Token token, Ast *row_count, Ast *column_count, Ast *elem) { + Ast *result = alloc_ast_node(f, Ast_MatrixType); + result->MatrixType.token = token; + result->MatrixType.row_count = row_count; + result->MatrixType.column_count = column_count; + result->MatrixType.elem = elem; + return result; +} Ast *ast_foreign_block_decl(AstFile *f, Token token, Ast *foreign_library, Ast *body, CommentGroup *docs) { @@ -2214,6 +2241,19 @@ Ast *parse_operand(AstFile *f, bool lhs) { count_expr = parse_expr(f, false); f->expr_level--; } + if (allow_token(f, Token_Semicolon)) { + Ast *row_count = count_expr; + Ast *column_count = nullptr; + + f->expr_level++; + column_count = parse_expr(f, false); + f->expr_level--; + + expect_token(f, Token_CloseBracket); + + return ast_matrix_type(f, token, row_count, column_count, parse_type(f)); + } + expect_token(f, Token_CloseBracket); return ast_array_type(f, token, count_expr, parse_type(f)); } break; @@ -2676,6 +2716,11 @@ Ast *parse_atom_expr(AstFile *f, Ast *operand, bool lhs) { case Token_RangeHalf: syntax_error(f->curr_token, "Expected a colon, not a range"); /* fallthrough */ + case Token_Semicolon: // matrix index + if (f->curr_token.kind == Token_Semicolon && f->curr_token.string == "\n") { + syntax_error(f->curr_token, "Expected a ';', not a newline"); + } + /* fallthrough */ case Token_Colon: interval = advance_token(f); is_interval = true; @@ -2691,7 +2736,14 @@ Ast *parse_atom_expr(AstFile *f, Ast *operand, bool lhs) { close = expect_token(f, Token_CloseBracket); if (is_interval) { - operand = ast_slice_expr(f, operand, open, close, interval, indices[0], indices[1]); + if (interval.kind == Token_Semicolon) { + if (indices[0] == nullptr || indices[1] == nullptr) { + syntax_error(open, "Matrix index expressions require both row and column indices"); + } + operand = ast_matrix_index_expr(f, operand, open, close, interval, indices[0], indices[1]); + } else { + operand = ast_slice_expr(f, operand, open, close, interval, indices[0], indices[1]); + } } else { operand = ast_index_expr(f, operand, indices[0], open, close); } diff --git a/src/parser.hpp b/src/parser.hpp index f1779bdbc..b58047dfd 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -407,6 +407,7 @@ AST_KIND(_ExprBegin, "", bool) \ bool is_align_stack; \ InlineAsmDialectKind dialect; \ }) \ + AST_KIND(MatrixIndexExpr, "matrix index expression", struct { Ast *expr, *row_index, *column_index; Token open, close; }) \ AST_KIND(_ExprEnd, "", bool) \ AST_KIND(_StmtBegin, "", bool) \ AST_KIND(BadStmt, "bad statement", struct { Token begin, end; }) \ @@ -657,6 +658,12 @@ AST_KIND(_TypeBegin, "", bool) \ Ast *key; \ Ast *value; \ }) \ + AST_KIND(MatrixType, "matrix type", struct { \ + Token token; \ + Ast *row_count; \ + Ast *column_count; \ + Ast *elem; \ + }) \ AST_KIND(_TypeEnd, "", bool) enum AstKind { diff --git a/src/parser_pos.cpp b/src/parser_pos.cpp index 22d12621d..6ef0db215 100644 --- a/src/parser_pos.cpp +++ b/src/parser_pos.cpp @@ -35,6 +35,7 @@ Token ast_token(Ast *node) { } return node->ImplicitSelectorExpr.token; case Ast_IndexExpr: return node->IndexExpr.open; + case Ast_MatrixIndexExpr: return node->MatrixIndexExpr.open; case Ast_SliceExpr: return node->SliceExpr.open; case Ast_Ellipsis: return node->Ellipsis.token; case Ast_FieldValue: return node->FieldValue.eq; @@ -103,6 +104,7 @@ Token ast_token(Ast *node) { case Ast_EnumType: return node->EnumType.token; case Ast_BitSetType: return node->BitSetType.token; case Ast_MapType: return node->MapType.token; + case Ast_MatrixType: return node->MatrixType.token; } return empty_token; @@ -168,6 +170,7 @@ Token ast_end_token(Ast *node) { } return node->ImplicitSelectorExpr.token; case Ast_IndexExpr: return node->IndexExpr.close; + case Ast_MatrixIndexExpr: return node->MatrixIndexExpr.close; case Ast_SliceExpr: return node->SliceExpr.close; case Ast_Ellipsis: if (node->Ellipsis.expr) { @@ -345,6 +348,7 @@ Token ast_end_token(Ast *node) { } return ast_end_token(node->BitSetType.elem); case Ast_MapType: return ast_end_token(node->MapType.value); + case Ast_MatrixType: return ast_end_token(node->MatrixType.elem); } return empty_token; diff --git a/src/types.cpp b/src/types.cpp index a808b54fb..0313ade60 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -270,6 +270,13 @@ struct TypeProc { TYPE_KIND(RelativeSlice, struct { \ Type *slice_type; \ Type *base_integer; \ + }) \ + TYPE_KIND(Matrix, struct { \ + Type *elem; \ + i64 row_count; \ + i64 column_count; \ + Type *generic_row_count; \ + Type *generic_column_count; \ }) @@ -341,6 +348,7 @@ enum Typeid_Kind : u8 { Typeid_Simd_Vector, Typeid_Relative_Pointer, Typeid_Relative_Slice, + Typeid_Matrix, }; // IMPORTANT NOTE(bill): This must match the same as the in core.odin @@ -349,6 +357,13 @@ enum TypeInfoFlag : u32 { TypeInfoFlag_Simple_Compare = 1<<1, }; + +enum : int { + MIN_MATRIX_ELEMENT_COUNT = 1, + MAX_MATRIX_ELEMENT_COUNT = 16, +}; + + bool is_type_comparable(Type *t); bool is_type_simple_compare(Type *t); @@ -622,6 +637,7 @@ gb_global Type *t_type_info_bit_set = nullptr; gb_global Type *t_type_info_simd_vector = nullptr; gb_global Type *t_type_info_relative_pointer = nullptr; gb_global Type *t_type_info_relative_slice = nullptr; +gb_global Type *t_type_info_matrix = nullptr; gb_global Type *t_type_info_named_ptr = nullptr; gb_global Type *t_type_info_integer_ptr = nullptr; @@ -649,6 +665,7 @@ gb_global Type *t_type_info_bit_set_ptr = nullptr; gb_global Type *t_type_info_simd_vector_ptr = nullptr; gb_global Type *t_type_info_relative_pointer_ptr = nullptr; gb_global Type *t_type_info_relative_slice_ptr = nullptr; +gb_global Type *t_type_info_matrix_ptr = nullptr; gb_global Type *t_allocator = nullptr; gb_global Type *t_allocator_ptr = nullptr; @@ -804,6 +821,24 @@ Type *alloc_type_array(Type *elem, i64 count, Type *generic_count = nullptr) { return t; } +Type *alloc_type_matrix(Type *elem, i64 row_count, i64 column_count, Type *generic_row_count = nullptr, Type *generic_column_count = nullptr) { + if (generic_row_count != nullptr || generic_column_count != nullptr) { + Type *t = alloc_type(Type_Matrix); + t->Matrix.elem = elem; + t->Matrix.row_count = row_count; + t->Matrix.column_count = column_count; + t->Matrix.generic_row_count = generic_row_count; + t->Matrix.generic_column_count = generic_column_count; + return t; + } + Type *t = alloc_type(Type_Matrix); + t->Matrix.elem = elem; + t->Matrix.row_count = row_count; + t->Matrix.column_count = column_count; + return t; +} + + Type *alloc_type_enumerated_array(Type *elem, Type *index, ExactValue const *min_value, ExactValue const *max_value, TokenKind op) { Type *t = alloc_type(Type_EnumeratedArray); t->EnumeratedArray.elem = elem; @@ -1208,6 +1243,20 @@ bool is_type_enumerated_array(Type *t) { t = base_type(t); return t->kind == Type_EnumeratedArray; } +bool is_type_matrix(Type *t) { + t = base_type(t); + return t->kind == Type_Matrix; +} + +i64 matrix_type_stride(Type *t) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + i64 align = type_align_of(t); + i64 elem_size = type_size_of(t->Matrix.elem); + i64 stride = align_formula(elem_size*t->Matrix.row_count, align); + return stride; +} + bool is_type_dynamic_array(Type *t) { t = base_type(t); return t->kind == Type_DynamicArray; @@ -1241,6 +1290,8 @@ Type *base_array_type(Type *t) { return bt->EnumeratedArray.elem; } else if (is_type_simd_vector(bt)) { return bt->SimdVector.elem; + } else if (is_type_matrix(bt)) { + return bt->Matrix.elem; } return t; } @@ -1315,11 +1366,16 @@ i64 get_array_type_count(Type *t) { Type *core_array_type(Type *t) { for (;;) { t = base_array_type(t); - if (t->kind != Type_Array && t->kind != Type_EnumeratedArray && t->kind != Type_SimdVector) { + switch (t->kind) { + case Type_Array: + case Type_EnumeratedArray: + case Type_SimdVector: + case Type_Matrix: break; + default: + return t; } } - return t; } @@ -1934,6 +1990,8 @@ bool is_type_comparable(Type *t) { return is_type_comparable(t->Array.elem); case Type_Proc: return true; + case Type_Matrix: + return is_type_comparable(t->Matrix.elem); case Type_BitSet: return true; @@ -1995,6 +2053,9 @@ bool is_type_simple_compare(Type *t) { case Type_Proc: case Type_BitSet: return true; + + case Type_Matrix: + return is_type_simple_compare(t->Matrix.elem); case Type_Struct: for_array(i, t->Struct.fields) { @@ -2107,6 +2168,14 @@ bool are_types_identical(Type *x, Type *y) { return (x->Array.count == y->Array.count) && are_types_identical(x->Array.elem, y->Array.elem); } break; + + case Type_Matrix: + if (y->kind == Type_Matrix) { + return x->Matrix.row_count == y->Matrix.row_count && + x->Matrix.column_count == y->Matrix.column_count && + are_types_identical(x->Matrix.elem, y->Matrix.elem); + } + break; case Type_DynamicArray: if (y->kind == Type_DynamicArray) { @@ -2982,7 +3051,7 @@ i64 type_align_of_internal(Type *t, TypePath *path) { if (path->failure) { return FAILURE_ALIGNMENT; } - i64 align = type_align_of_internal(t->Array.elem, path); + i64 align = type_align_of_internal(elem, path); if (pop) type_path_pop(path); return align; } @@ -2993,7 +3062,7 @@ i64 type_align_of_internal(Type *t, TypePath *path) { if (path->failure) { return FAILURE_ALIGNMENT; } - i64 align = type_align_of_internal(t->EnumeratedArray.elem, path); + i64 align = type_align_of_internal(elem, path); if (pop) type_path_pop(path); return align; } @@ -3102,6 +3171,22 @@ i64 type_align_of_internal(Type *t, TypePath *path) { // IMPORTANT TODO(bill): Figure out the alignment of vector types return gb_clamp(next_pow2(type_size_of_internal(t, path)), 1, build_context.max_align); } + + case Type_Matrix: { + Type *elem = t->Matrix.elem; + i64 row_count = t->Matrix.row_count; + // i64 column_count = t->Matrix.column_count; + bool pop = type_path_push(path, elem); + if (path->failure) { + return FAILURE_ALIGNMENT; + } + i64 elem_align = type_align_of_internal(elem, path); + if (pop) type_path_pop(path); + + i64 align = gb_clamp(elem_align * row_count, elem_align, build_context.max_align); + + return align; + } case Type_RelativePointer: return type_align_of_internal(t->RelativePointer.base_integer, path); @@ -3369,6 +3454,26 @@ i64 type_size_of_internal(Type *t, TypePath *path) { Type *elem = t->SimdVector.elem; return count * type_size_of_internal(elem, path); } + + case Type_Matrix: { + Type *elem = t->Matrix.elem; + i64 row_count = t->Matrix.row_count; + i64 column_count = t->Matrix.column_count; + bool pop = type_path_push(path, elem); + if (path->failure) { + return FAILURE_SIZE; + } + i64 elem_size = type_size_of_internal(elem, path); + if (pop) type_path_pop(path); + i64 align = type_align_of(t); + + /* + [3; 4]f32 -> [4]{x, y, z, _: f32} // extra padding for alignment reasons + */ + + i64 size = align_formula(elem_size * row_count, align) * column_count; + return size; + } case Type_RelativePointer: return type_size_of_internal(t->RelativePointer.base_integer, path); From ba331024af2f5074125442e91dda6c8e63324c8f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 18 Oct 2021 18:16:52 +0100 Subject: [PATCH 02/47] Very basic matrix support in backend --- core/fmt/fmt.odin | 35 +++++++- src/check_expr.cpp | 153 ++++++++++++++++++++++++++++++++++- src/checker.cpp | 8 ++ src/llvm_backend.hpp | 4 + src/llvm_backend_const.cpp | 28 +++++++ src/llvm_backend_expr.cpp | 78 ++++++++++++++++++ src/llvm_backend_utility.cpp | 35 ++++++++ src/types.cpp | 31 ++++++- 8 files changed, 364 insertions(+), 8 deletions(-) diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index cee00da23..804a29cab 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1954,7 +1954,40 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { } case runtime.Type_Info_Matrix: - io.write_string(fi.writer, "[]") + reflect.write_type(fi.writer, type_info_of(v.id)) + io.write_byte(fi.writer, '{') + defer io.write_byte(fi.writer, '}') + + fi.indent += 1; defer fi.indent -= 1 + + if fi.hash { + io.write_byte(fi.writer, '\n') + // TODO(bill): Should this render it like in written form? e.g. tranposed + for col in 0.. 0 { io.write_string(fi.writer, ", ") } + + offset := row*info.elem_size + col*info.stride + + data := uintptr(v.data) + uintptr(offset) + fmt_arg(fi, any{rawptr(data), info.elem.id}, verb) + } + io.write_string(fi.writer, ";\n") + } + } else { + for col in 0.. 0 { io.write_string(fi.writer, "; ") } + for row in 0.. 0 { io.write_string(fi.writer, ", ") } + + offset := row*info.elem_size + col*info.stride + + data := uintptr(v.data) + uintptr(offset) + fmt_arg(fi, any{rawptr(data), info.elem.id}, verb) + } + } + } } } diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 85f2eeb23..9c12802d7 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -1400,8 +1400,9 @@ bool check_unary_op(CheckerContext *c, Operand *o, Token op) { } bool check_binary_op(CheckerContext *c, Operand *o, Token op) { + Type *main_type = o->type; // TODO(bill): Handle errors correctly - Type *type = base_type(core_array_type(o->type)); + Type *type = base_type(core_array_type(main_type)); Type *ct = core_type(type); switch (op.kind) { case Token_Sub: @@ -1414,10 +1415,15 @@ bool check_binary_op(CheckerContext *c, Operand *o, Token op) { } break; - case Token_Mul: case Token_Quo: - case Token_MulEq: case Token_QuoEq: + if (is_type_matrix(main_type)) { + error(op, "Operator '%.*s' is only allowed with matrix types", LIT(op.string)); + return false; + } + /*fallthrough*/ + case Token_Mul: + case Token_MulEq: case Token_AddEq: if (is_type_bit_set(type)) { return true; @@ -1458,6 +1464,10 @@ bool check_binary_op(CheckerContext *c, Operand *o, Token op) { case Token_ModMod: case Token_ModEq: case Token_ModModEq: + if (is_type_matrix(main_type)) { + error(op, "Operator '%.*s' is only allowed with matrix types", LIT(op.string)); + return false; + } if (!is_type_integer(type)) { error(op, "Operator '%.*s' is only allowed with integers", LIT(op.string)); return false; @@ -2671,6 +2681,114 @@ bool can_use_other_type_as_type_hint(bool use_lhs_as_type_hint, Type *other_type } +void check_binary_matrix(CheckerContext *c, Token const &op, Operand *x, Operand *y, Type *type_hint, bool use_lhs_as_type_hint) { + if (!check_binary_op(c, x, op)) { + x->mode = Addressing_Invalid; + return; + } + + if (is_type_matrix(x->type)) { + Type *xt = base_type(x->type); + Type *yt = base_type(y->type); + GB_ASSERT(xt->kind == Type_Matrix); + if (op.kind == Token_Mul) { + if (yt->kind == Type_Matrix) { + if (!are_types_identical(xt->Matrix.elem, yt->Matrix.elem)) { + goto matrix_error; + } + + if (xt->Matrix.column_count != yt->Matrix.row_count) { + goto matrix_error; + } + x->mode = Addressing_Value; + x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, yt->Matrix.column_count); + goto matrix_success; + } else if (yt->kind == Type_Array) { + if (!are_types_identical(xt->Matrix.elem, yt->Array.elem)) { + goto matrix_error; + } + + if (xt->Matrix.column_count != yt->Array.count) { + goto matrix_error; + } + + // Treat arrays as column vectors + x->mode = Addressing_Value; + x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, 1); + goto matrix_success; + } + } + if (!are_types_identical(xt, yt)) { + goto matrix_error; + } + x->mode = Addressing_Value; + x->type = xt; + goto matrix_success; + } else { + Type *xt = base_type(x->type); + Type *yt = base_type(y->type); + GB_ASSERT(is_type_matrix(yt)); + GB_ASSERT(!is_type_matrix(xt)); + + if (op.kind == Token_Mul) { + // NOTE(bill): no need to handle the matrix case here since it should be handled above + if (xt->kind == Type_Array) { + if (!are_types_identical(yt->Matrix.elem, xt->Array.elem)) { + goto matrix_error; + } + + if (xt->Array.count != yt->Matrix.row_count) { + goto matrix_error; + } + + // Treat arrays as row vectors + x->mode = Addressing_Value; + x->type = alloc_type_matrix(xt->Matrix.elem, 1, xt->Matrix.column_count); + goto matrix_success; + } + } + if (!are_types_identical(xt, yt)) { + goto matrix_error; + } + x->mode = Addressing_Value; + x->type = xt; + goto matrix_success; + } + +matrix_success: + if (type_hint != nullptr) { + Type *th = base_type(type_hint); + if (are_types_identical(th, x->type)) { + x->type = type_hint; + } else if (x->type->kind == Type_Matrix && th->kind == Type_Array) { + Type *xt = x->type; + if (!are_types_identical(xt->Matrix.elem, th->Array.elem)) { + // ignore + } else if (xt->Matrix.row_count == 1 && xt->Matrix.column_count == th->Array.count) { + x->type = type_hint; + } else if (xt->Matrix.column_count == 1 && xt->Matrix.row_count == th->Array.count) { + x->type = type_hint; + } + } + } + return; + + +matrix_error: + gbString xt = type_to_string(x->type); + gbString yt = type_to_string(y->type); + gbString expr_str = expr_to_string(x->expr); + error(op, "Mismatched types in binary matrix expression '%s' for operator '%.*s' : '%s' vs '%s'", expr_str, LIT(op.string), xt, yt); + gb_string_free(expr_str); + gb_string_free(yt); + gb_string_free(xt); + x->type = t_invalid; + x->mode = Addressing_Invalid; + return; + +} + + void check_binary_expr(CheckerContext *c, Operand *x, Ast *node, Type *type_hint, bool use_lhs_as_type_hint=false) { GB_ASSERT(node->kind == Ast_BinaryExpr); Operand y_ = {}, *y = &y_; @@ -2874,6 +2992,12 @@ void check_binary_expr(CheckerContext *c, Operand *x, Ast *node, Type *type_hint x->type = y->type; return; } + if (is_type_matrix(x->type) || is_type_matrix(y->type)) { + check_binary_matrix(c, op, x, y, type_hint, use_lhs_as_type_hint); + return; + } + + if (!are_types_identical(x->type, y->type)) { if (x->type != t_invalid && y->type != t_invalid) { @@ -3258,6 +3382,29 @@ void convert_to_typed(CheckerContext *c, Operand *operand, Type *target_type) { break; } + + case Type_Matrix: { + Type *elem = base_array_type(t); + if (check_is_assignable_to(c, operand, elem)) { + if (t->Matrix.row_count != t->Matrix.column_count) { + operand->mode = Addressing_Invalid; + begin_error_block(); + defer (end_error_block()); + + convert_untyped_error(c, operand, target_type); + error_line("\tNote: Only a square matrix types can be initialized with a scalar value\n"); + return; + } else { + operand->mode = Addressing_Value; + } + } else { + operand->mode = Addressing_Invalid; + convert_untyped_error(c, operand, target_type); + return; + } + break; + } + case Type_Union: if (!is_operand_nil(*operand) && !is_operand_undef(*operand)) { diff --git a/src/checker.cpp b/src/checker.cpp index 8711fdc0c..c0e6d47c0 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1659,6 +1659,10 @@ void add_type_info_type_internal(CheckerContext *c, Type *t) { add_type_info_type_internal(c, bt->RelativeSlice.slice_type); add_type_info_type_internal(c, bt->RelativeSlice.base_integer); break; + + case Type_Matrix: + add_type_info_type_internal(c, bt->Matrix.elem); + break; default: GB_PANIC("Unhandled type: %*.s %d", LIT(type_strings[bt->kind]), bt->kind); @@ -1870,6 +1874,10 @@ void add_min_dep_type_info(Checker *c, Type *t) { add_min_dep_type_info(c, bt->RelativeSlice.slice_type); add_min_dep_type_info(c, bt->RelativeSlice.base_integer); break; + + case Type_Matrix: + add_min_dep_type_info(c, bt->Matrix.elem); + break; default: GB_PANIC("Unhandled type: %*.s", LIT(type_strings[bt->kind])); diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index ffb81f0e4..73ddad797 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -333,6 +333,10 @@ lbValue lb_emit_array_ep(lbProcedure *p, lbValue s, lbValue index); lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection sel); lbValue lb_emit_deep_field_ev(lbProcedure *p, lbValue e, Selection sel); +lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isize column); +lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isize column); + + lbValue lb_emit_arith(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type); lbValue lb_emit_byte_swap(lbProcedure *p, lbValue value, Type *end_type); void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block); diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index 68050e0ce..4cfcecdc3 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -512,6 +512,34 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc res.value = llvm_const_array(lb_type(m, elem), elems, cast(unsigned)count); return res; + } else if (is_type_matrix(type) && + value.kind != ExactValue_Invalid && + value.kind != ExactValue_Compound) { + i64 row = type->Matrix.row_count; + i64 column = type->Matrix.column_count; + GB_ASSERT(row == column); + + Type *elem = type->Matrix.elem; + + lbValue single_elem = lb_const_value(m, elem, value, allow_local); + single_elem.value = llvm_const_cast(single_elem.value, lb_type(m, elem)); + + i64 stride_bytes = matrix_type_stride(type); + i64 stride_elems = stride_bytes/type_size_of(elem); + + i64 total_elem_count = matrix_type_total_elems(type); + LLVMValueRef *elems = gb_alloc_array(permanent_allocator(), LLVMValueRef, cast(isize)total_elem_count); + for (i64 i = 0; i < row; i++) { + elems[i*stride_elems + i] = single_elem.value; + } + for (i64 i = 0; i < total_elem_count; i++) { + if (elems[i] == nullptr) { + elems[i] = LLVMConstNull(lb_type(m, elem)); + } + } + + res.value = LLVMConstArray(lb_type(m, elem), elems, cast(unsigned)total_elem_count); + return res; } switch (value.kind) { diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 3056952f6..6b7d90ec0 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -477,10 +477,72 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r } +lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type) { + GB_ASSERT(is_type_matrix(lhs.type) || is_type_matrix(rhs.type)); + + Type *xt = base_type(lhs.type); + Type *yt = base_type(rhs.type); + + if (op == Token_Mul) { + if (xt->kind == Type_Matrix) { + if (yt->kind == Type_Matrix) { + GB_ASSERT(is_type_matrix(type)); + GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count); + GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem)); + + Type *elem = xt->Matrix.elem; + + lbAddr res = lb_add_local_generated(p, type, true); + for (i64 i = 0; i < xt->Matrix.row_count; i++) { + for (i64 j = 0; j < yt->Matrix.column_count; j++) { + for (i64 k = 0; k < xt->Matrix.column_count; k++) { + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + + lbValue a = lb_emit_matrix_ev(p, lhs, i, k); + lbValue b = lb_emit_matrix_ev(p, rhs, k, j); + lbValue c = lb_emit_arith(p, op, a, b, elem); + lbValue d = lb_emit_load(p, dst); + lbValue e = lb_emit_arith(p, Token_Add, d, c, elem); + lb_emit_store(p, dst, e); + + } + } + } + + return lb_addr_load(p, res); + } + } + + } else { + GB_ASSERT(are_types_identical(xt, yt)); + GB_ASSERT(xt->kind == Type_Matrix); + // element-wise arithmetic + // pretend it is an array + lbValue array_lhs = lhs; + lbValue array_rhs = rhs; + Type *array_type = alloc_type_array(xt->Matrix.elem, matrix_type_total_elems(xt)); + GB_ASSERT(type_size_of(array_type) == type_size_of(type)); + + array_lhs.type = array_type; + array_rhs.type = array_type; + + lbValue array = lb_emit_arith_array(p, op, array_lhs, array_rhs, type); + array.type = type; + return array; + } + + GB_PANIC("TODO: lb_emit_arith_matrix"); + + return {}; +} + + lbValue lb_emit_arith(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type) { if (is_type_array_like(lhs.type) || is_type_array_like(rhs.type)) { return lb_emit_arith_array(p, op, lhs, rhs, type); + } else if (is_type_matrix(lhs.type) || is_type_matrix(rhs.type)) { + return lb_emit_arith_matrix(p, op, lhs, rhs, type); } else if (is_type_complex(type)) { lhs = lb_emit_conv(p, lhs, type); rhs = lb_emit_conv(p, rhs, type); @@ -1417,6 +1479,22 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { } return lb_addr_load(p, v); } + + if (is_type_matrix(dst) && !is_type_matrix(src)) { + GB_ASSERT(dst->Matrix.row_count == dst->Matrix.column_count); + + Type *elem = base_array_type(dst); + lbValue e = lb_emit_conv(p, value, elem); + lbAddr v = lb_add_local_generated(p, t, false); + for (i64 i = 0; i < dst->Matrix.row_count; i++) { + isize j = cast(isize)i; + lbValue ptr = lb_emit_matrix_epi(p, v.addr, j, j); + lb_emit_store(p, ptr, e); + } + + + return lb_addr_load(p, v); + } if (is_type_any(dst)) { if (is_type_untyped_nil(src)) { diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 0531c62bb..1b41be2a3 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1221,6 +1221,41 @@ lbValue lb_emit_ptr_offset(lbProcedure *p, lbValue ptr, lbValue index) { return res; } +lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isize column) { + Type *t = s.type; + GB_ASSERT(is_type_pointer(t)); + Type *st = base_type(type_deref(t)); + GB_ASSERT_MSG(is_type_matrix(st), "%s", type_to_string(st)); + + Type *ptr = base_array_type(st); + + isize index = row*column; + GB_ASSERT(0 <= index); + + LLVMValueRef indices[2] = { + LLVMConstInt(lb_type(p->module, t_int), 0, false), + LLVMConstInt(lb_type(p->module, t_int), cast(unsigned)index, false), + }; + + lbValue res = {}; + if (lb_is_const(s)) { + res.value = LLVMConstGEP(s.value, indices, gb_count_of(indices)); + } else { + res.value = LLVMBuildGEP(p->builder, s.value, indices, gb_count_of(indices), ""); + } + res.type = alloc_type_pointer(ptr); + return res; +} + +lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isize column) { + Type *st = base_type(s.type); + GB_ASSERT_MSG(is_type_matrix(st), "%s", type_to_string(st)); + + lbValue value = lb_address_from_load_or_generate_local(p, s); + lbValue ptr = lb_emit_matrix_epi(p, value, row, column); + return lb_emit_load(p, ptr); +} + void lb_fill_slice(lbProcedure *p, lbAddr const &slice, lbValue base_elem, lbValue len) { Type *t = lb_addr_type(slice); diff --git a/src/types.cpp b/src/types.cpp index 0313ade60..fd9b20c91 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1257,6 +1257,22 @@ i64 matrix_type_stride(Type *t) { return stride; } +i64 matrix_type_stride_in_elems(Type *t) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + i64 stride = matrix_type_stride(t); + return stride/gb_max(1, type_size_of(t->Matrix.elem)); +} + + +i64 matrix_type_total_elems(Type *t) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + i64 size = type_size_of(t); + i64 elem_size = type_size_of(t->Matrix.elem); + return size/gb_max(elem_size, 1); +} + bool is_type_dynamic_array(Type *t) { t = base_type(t); return t->kind == Type_DynamicArray; @@ -3174,17 +3190,17 @@ i64 type_align_of_internal(Type *t, TypePath *path) { case Type_Matrix: { Type *elem = t->Matrix.elem; - i64 row_count = t->Matrix.row_count; - // i64 column_count = t->Matrix.column_count; + i64 row_count = gb_max(t->Matrix.row_count, 1); + bool pop = type_path_push(path, elem); if (path->failure) { return FAILURE_ALIGNMENT; } + // elem align is used here rather than size as it make a little more sense i64 elem_align = type_align_of_internal(elem, path); if (pop) type_path_pop(path); - i64 align = gb_clamp(elem_align * row_count, elem_align, build_context.max_align); - + i64 align = gb_min(next_pow2(elem_align * row_count), build_context.max_align); return align; } @@ -3935,6 +3951,13 @@ gbString write_type_to_string(gbString str, Type *type) { str = gb_string_append_fmt(str, ") "); str = write_type_to_string(str, type->RelativeSlice.slice_type); break; + + case Type_Matrix: + str = gb_string_appendc(str, gb_bprintf("[%d", cast(int)type->Matrix.row_count)); + str = gb_string_appendc(str, "; "); + str = gb_string_appendc(str, gb_bprintf("%d]", cast(int)type->Matrix.column_count)); + str = write_type_to_string(str, type->Matrix.elem); + break; } return str; From 35111b39b88bb12d61e1dc67ed0161109be3f865 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 18 Oct 2021 18:57:21 +0100 Subject: [PATCH 03/47] Try to support the matrix multiplication LLVM intrinsics --- src/llvm_backend_expr.cpp | 273 ++++++++++++++++++++++++++++++++++---- 1 file changed, 249 insertions(+), 24 deletions(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 6b7d90ec0..2e2d45991 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -476,6 +476,254 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r } } +bool lb_matrix_elem_simple(Type *t) { + Type *mt = base_type(t); + GB_ASSERT(mt->kind == Type_Matrix); + + Type *elem = core_type(mt->Matrix.elem); + if (is_type_complex(elem)) { + return false; + } + + if (is_type_different_to_arch_endianness(elem)) { + return false; + } + + if (elem->kind == Type_Basic) { + switch (elem->Basic.kind) { + case Basic_f16: + case Basic_f16le: + case Basic_f16be: + // TODO(bill): determine when this is fine + return false; + } + } + + return true; +} + +LLVMValueRef llvm_matrix_column_major_load(lbProcedure *p, lbValue lhs) { + lbModule *m = p->module; + + Type *mt = base_type(lhs.type); + GB_ASSERT(mt->kind == Type_Matrix); + GB_ASSERT(lb_matrix_elem_simple(mt)); + + unsigned total_elem_count = cast(unsigned)matrix_type_total_elems(mt); + + Type *elem = mt->Matrix.elem; + LLVMTypeRef elem_type = lb_type(m, elem); + + LLVMTypeRef vector_type = LLVMVectorType(elem_type, total_elem_count); + LLVMTypeRef types[] = {vector_type}; + + char const *name = "llvm.matrix.column.major.load"; + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); + + lbValue ptr = lb_address_from_load_or_generate_local(p, lhs); + ptr = lb_emit_matrix_epi(p, ptr, 0, 0); + + LLVMValueRef values[5] = {}; + values[0] = ptr.value; + values[1] = lb_const_int(m, t_u64, 8*matrix_type_stride(mt)).value; // bit width + values[2] = LLVMConstNull(lb_type(m, t_llvm_bool)); + values[3] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; + values[4] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; + + return LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); +} +LLVMValueRef llvm_matrix_column_major_load_from_ptr(lbProcedure *p, lbValue ptr) { + lbModule *m = p->module; + + Type *mt = base_type(type_deref(ptr.type)); + GB_ASSERT(mt->kind == Type_Matrix); + GB_ASSERT(lb_matrix_elem_simple(mt)); + + unsigned total_elem_count = cast(unsigned)matrix_type_total_elems(mt); + + Type *elem = mt->Matrix.elem; + LLVMTypeRef elem_type = lb_type(m, elem); + + LLVMTypeRef vector_type = LLVMVectorType(elem_type, total_elem_count); + LLVMTypeRef types[] = {vector_type}; + + char const *name = "llvm.matrix.column.major.load"; + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); + + LLVMValueRef values[5] = {}; + values[0] = lb_emit_matrix_epi(p, ptr, 0, 0).value; + values[1] = lb_const_int(m, t_u64, 8*matrix_type_stride(mt)).value; // bit width + values[2] = LLVMConstNull(lb_type(m, t_llvm_bool)); + values[3] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; + values[4] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; + + return LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); +} + +void llvm_matrix_column_major_store(lbProcedure *p, lbAddr addr, LLVMValueRef vector_value) { + lbModule *m = p->module; + + Type *mt = base_type(lb_addr_type(addr)); + GB_ASSERT(mt->kind == Type_Matrix); + GB_ASSERT(lb_matrix_elem_simple(mt)); + + unsigned total_elem_count = cast(unsigned)matrix_type_total_elems(mt); + + Type *elem = mt->Matrix.elem; + LLVMTypeRef elem_type = lb_type(m, elem); + + LLVMTypeRef vector_type = LLVMVectorType(elem_type, total_elem_count); + LLVMTypeRef types[] = {vector_type}; + + char const *name = "llvm.matrix.column.major.store"; + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); + + lbValue ptr = lb_addr_get_ptr(p, addr); + ptr = lb_emit_matrix_epi(p, ptr, 0, 0); + + GB_ASSERT(LLVMTypeOf(vector_value) == vector_type); + unsigned vector_size = LLVMGetVectorSize(vector_type); + GB_ASSERT((mt->Matrix.row_count*mt->Matrix.column_count) == cast(i64)vector_size); + + LLVMValueRef values[6] = {}; + values[0] = vector_value; + values[1] = ptr.value; + values[2] = lb_const_int(m, t_u64, 8*matrix_type_stride(mt)).value; // bit width + values[3] = LLVMConstNull(lb_type(m, t_llvm_bool)); + values[4] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; + values[5] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; + + LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); +} + +void llvm_matrix_column_major_store_to_raw_ptr(lbProcedure *p, Type *mt, lbValue ptr, LLVMValueRef vector_value) { + lbModule *m = p->module; + + mt = base_type(mt); + GB_ASSERT(mt->kind == Type_Matrix); + GB_ASSERT(lb_matrix_elem_simple(mt)); + + unsigned total_elem_count = cast(unsigned)matrix_type_total_elems(mt); + + Type *elem = mt->Matrix.elem; + LLVMTypeRef elem_type = lb_type(m, elem); + + LLVMTypeRef vector_type = LLVMVectorType(elem_type, total_elem_count); + LLVMTypeRef types[] = {vector_type}; + + char const *name = "llvm.matrix.column.major.store"; + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); + + GB_ASSERT(LLVMTypeOf(vector_value) == vector_type); + unsigned vector_size = LLVMGetVectorSize(vector_type); + GB_ASSERT((mt->Matrix.row_count*mt->Matrix.column_count) == cast(i64)vector_size); + + LLVMValueRef values[6] = {}; + values[0] = vector_value; + values[1] = ptr.value; + values[2] = lb_const_int(m, t_u64, 8*matrix_type_stride(mt)).value; // bit width + values[3] = LLVMConstNull(lb_type(m, t_llvm_bool)); + values[4] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; + values[5] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; + + LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); +} + +LLVMValueRef llvm_matrix_multiply(lbProcedure *p, LLVMValueRef a, LLVMValueRef b, i64 outer_rows, i64 inner, i64 outer_columns) { + lbModule *m = p->module; + + LLVMTypeRef a_type = LLVMTypeOf(a); + LLVMTypeRef b_type = LLVMTypeOf(b); + + GB_ASSERT(LLVMGetElementType(a_type) == LLVMGetElementType(b_type)); + + LLVMTypeRef elem_type = LLVMGetElementType(a_type); + + LLVMTypeRef res_vector_type = LLVMVectorType(elem_type, cast(unsigned)(outer_rows*outer_columns)); + LLVMTypeRef types[] = {res_vector_type, a_type, b_type}; + + char const *name = "llvm.matrix.multiply"; + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); + + LLVMValueRef values[5] = {}; + values[0] = a; + values[1] = b; + values[2] = lb_const_int(m, t_u32, outer_rows).value; + values[3] = lb_const_int(m, t_u32, inner).value; + values[4] = lb_const_int(m, t_u32, outer_columns).value; + + return LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); +} + + +lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { + Type *xt = base_type(lhs.type); + Type *yt = base_type(rhs.type); + + GB_ASSERT(is_type_matrix(type)); + GB_ASSERT(is_type_matrix(xt)); + GB_ASSERT(is_type_matrix(yt)); + GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count); + GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem)); + + if (!lb_matrix_elem_simple(xt)) { + goto slow_form; + } + + if (false) { + // TODO(bill): LLVM ERROR: Do not know how to split the result of this operator! + lbAddr res = lb_add_local_generated(p, type, true); + + lbValue res_ptr = lb_addr_get_ptr(p, res); + res_ptr = lb_emit_matrix_epi(p, res_ptr, 0, 0); + + lbValue lhs_ptr = lb_address_from_load_or_generate_local(p, lhs); + lbValue rhs_ptr = lb_address_from_load_or_generate_local(p, rhs); + LLVMValueRef a = llvm_matrix_column_major_load_from_ptr(p, lhs_ptr); + LLVMValueRef b = llvm_matrix_column_major_load_from_ptr(p, rhs_ptr); + LLVMValueRef c = llvm_matrix_multiply(p, a, b, xt->Matrix.row_count, xt->Matrix.column_count, yt->Matrix.column_count); + + llvm_matrix_column_major_store_to_raw_ptr(p, type, res_ptr, c); + + return lb_addr_load(p, res); + } + +slow_form: + { + Type *elem = xt->Matrix.elem; + + lbAddr res = lb_add_local_generated(p, type, true); + + for (i64 i = 0; i < xt->Matrix.row_count; i++) { + for (i64 j = 0; j < yt->Matrix.column_count; j++) { + for (i64 k = 0; k < xt->Matrix.column_count; k++) { + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + + lbValue a = lb_emit_matrix_ev(p, lhs, i, k); + lbValue b = lb_emit_matrix_ev(p, rhs, k, j); + lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem); + lbValue d = lb_emit_load(p, dst); + lbValue e = lb_emit_arith(p, Token_Add, d, c, elem); + lb_emit_store(p, dst, e); + + } + } + } + + return lb_addr_load(p, res); + } +} + lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type) { GB_ASSERT(is_type_matrix(lhs.type) || is_type_matrix(rhs.type)); @@ -486,30 +734,7 @@ lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue if (op == Token_Mul) { if (xt->kind == Type_Matrix) { if (yt->kind == Type_Matrix) { - GB_ASSERT(is_type_matrix(type)); - GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count); - GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem)); - - Type *elem = xt->Matrix.elem; - - lbAddr res = lb_add_local_generated(p, type, true); - for (i64 i = 0; i < xt->Matrix.row_count; i++) { - for (i64 j = 0; j < yt->Matrix.column_count; j++) { - for (i64 k = 0; k < xt->Matrix.column_count; k++) { - lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); - - lbValue a = lb_emit_matrix_ev(p, lhs, i, k); - lbValue b = lb_emit_matrix_ev(p, rhs, k, j); - lbValue c = lb_emit_arith(p, op, a, b, elem); - lbValue d = lb_emit_load(p, dst); - lbValue e = lb_emit_arith(p, Token_Add, d, c, elem); - lb_emit_store(p, dst, e); - - } - } - } - - return lb_addr_load(p, res); + return lb_emit_matrix_mul(p, lhs, rhs, type); } } From 243e2e2b8a7566087375178a66b25b5d9ac9a356 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 19 Oct 2021 11:24:26 +0100 Subject: [PATCH 04/47] Basic support for matrix*vector, vector*matrix operations --- core/fmt/fmt.odin | 16 +-- src/check_expr.cpp | 30 +++-- src/llvm_backend.cpp | 57 ++++++++-- src/llvm_backend_expr.cpp | 213 +++++++++++++++++++---------------- src/llvm_backend_general.cpp | 8 +- src/llvm_backend_utility.cpp | 10 +- src/types.cpp | 3 + 7 files changed, 207 insertions(+), 130 deletions(-) diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 804a29cab..46b1fc14c 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1960,13 +1960,13 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { fi.indent += 1; defer fi.indent -= 1 - if fi.hash { + if fi.hash { io.write_byte(fi.writer, '\n') // TODO(bill): Should this render it like in written form? e.g. tranposed - for col in 0.. 0 { io.write_string(fi.writer, ", ") } + for col in 0.. 0 { io.write_string(fi.writer, ", ") } offset := row*info.elem_size + col*info.stride @@ -1976,10 +1976,10 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { io.write_string(fi.writer, ";\n") } } else { - for col in 0.. 0 { io.write_string(fi.writer, "; ") } - for row in 0.. 0 { io.write_string(fi.writer, ", ") } + for row in 0.. 0 { io.write_string(fi.writer, ", ") } + for col in 0.. 0 { io.write_string(fi.writer, "; ") } offset := row*info.elem_size + col*info.stride diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 9c12802d7..1ca5b895d 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2686,10 +2686,11 @@ void check_binary_matrix(CheckerContext *c, Token const &op, Operand *x, Operand x->mode = Addressing_Invalid; return; } + + Type *xt = base_type(x->type); + Type *yt = base_type(y->type); if (is_type_matrix(x->type)) { - Type *xt = base_type(x->type); - Type *yt = base_type(y->type); GB_ASSERT(xt->kind == Type_Matrix); if (op.kind == Token_Mul) { if (yt->kind == Type_Matrix) { @@ -2714,7 +2715,11 @@ void check_binary_matrix(CheckerContext *c, Token const &op, Operand *x, Operand // Treat arrays as column vectors x->mode = Addressing_Value; - x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, 1); + if (type_hint == nullptr && xt->Matrix.row_count == yt->Array.count) { + x->type = y->type; + } else { + x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, 1); + } goto matrix_success; } } @@ -2725,8 +2730,6 @@ void check_binary_matrix(CheckerContext *c, Token const &op, Operand *x, Operand x->type = xt; goto matrix_success; } else { - Type *xt = base_type(x->type); - Type *yt = base_type(y->type); GB_ASSERT(is_type_matrix(yt)); GB_ASSERT(!is_type_matrix(xt)); @@ -2743,7 +2746,11 @@ void check_binary_matrix(CheckerContext *c, Token const &op, Operand *x, Operand // Treat arrays as row vectors x->mode = Addressing_Value; - x->type = alloc_type_matrix(xt->Matrix.elem, 1, xt->Matrix.column_count); + if (type_hint == nullptr && yt->Matrix.column_count == xt->Array.count) { + x->type = x->type; + } else { + x->type = alloc_type_matrix(yt->Matrix.elem, 1, yt->Matrix.column_count); + } goto matrix_success; } } @@ -2775,13 +2782,13 @@ matrix_success: matrix_error: - gbString xt = type_to_string(x->type); - gbString yt = type_to_string(y->type); + gbString xts = type_to_string(x->type); + gbString yts = type_to_string(y->type); gbString expr_str = expr_to_string(x->expr); - error(op, "Mismatched types in binary matrix expression '%s' for operator '%.*s' : '%s' vs '%s'", expr_str, LIT(op.string), xt, yt); + error(op, "Mismatched types in binary matrix expression '%s' for operator '%.*s' : '%s' vs '%s'", expr_str, LIT(op.string), xts, yts); gb_string_free(expr_str); - gb_string_free(yt); - gb_string_free(xt); + gb_string_free(yts); + gb_string_free(xts); x->type = t_invalid; x->mode = Addressing_Invalid; return; @@ -2994,6 +3001,7 @@ void check_binary_expr(CheckerContext *c, Operand *x, Ast *node, Type *type_hint } if (is_type_matrix(x->type) || is_type_matrix(y->type)) { check_binary_matrix(c, op, x, y, type_hint, use_lhs_as_type_hint); + x->expr = node; return; } diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index a72ddc646..a853a6224 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -1135,13 +1135,46 @@ void lb_generate_code(lbGenerator *gen) { auto *min_dep_set = &info->minimum_dependency_set; - LLVMInitializeAllTargetInfos(); - LLVMInitializeAllTargets(); - LLVMInitializeAllTargetMCs(); - LLVMInitializeAllAsmPrinters(); - LLVMInitializeAllAsmParsers(); - LLVMInitializeAllDisassemblers(); - LLVMInitializeNativeTarget(); + switch (build_context.metrics.arch) { + case TargetArch_amd64: + case TargetArch_386: + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86Target(); + LLVMInitializeX86TargetMC(); + LLVMInitializeX86AsmPrinter(); + LLVMInitializeX86AsmParser(); + LLVMInitializeX86Disassembler(); + break; + case TargetArch_arm64: + LLVMInitializeAArch64TargetInfo(); + LLVMInitializeAArch64Target(); + LLVMInitializeAArch64TargetMC(); + LLVMInitializeAArch64AsmPrinter(); + LLVMInitializeAArch64AsmParser(); + LLVMInitializeAArch64Disassembler(); + break; + case TargetArch_wasm32: + LLVMInitializeWebAssemblyTargetInfo(); + LLVMInitializeWebAssemblyTarget(); + LLVMInitializeWebAssemblyTargetMC(); + LLVMInitializeWebAssemblyAsmPrinter(); + LLVMInitializeWebAssemblyAsmParser(); + LLVMInitializeWebAssemblyDisassembler(); + break; + default: + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargets(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllAsmPrinters(); + LLVMInitializeAllAsmParsers(); + LLVMInitializeAllDisassemblers(); + break; + } + + + if (build_context.microarch == "native") { + LLVMInitializeNativeTarget(); + } char const *target_triple = alloc_cstring(permanent_allocator(), build_context.metrics.target_triplet); for_array(i, gen->modules.entries) { @@ -1174,6 +1207,14 @@ void lb_generate_code(lbGenerator *gen) { if (gb_strcmp(llvm_cpu, host_cpu_name) == 0) { llvm_features = LLVMGetHostCPUFeatures(); } + } else if (build_context.metrics.arch == TargetArch_amd64) { + // NOTE(bill): x86-64-v2 is more than enough for everyone + // + // x86-64: CMOV, CMPXCHG8B, FPU, FXSR, MMX, FXSR, SCE, SSE, SSE2 + // x86-64-v2: (close to Nehalem) CMPXCHG16B, LAHF-SAHF, POPCNT, SSE3, SSE4.1, SSE4.2, SSSE3 + // x86-64-v3: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, XSAVE + // x86-64-v4: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL + llvm_cpu = "x86-64-v2"; } // GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target)); @@ -1640,6 +1681,7 @@ void lb_generate_code(lbGenerator *gen) { code_gen_file_type = LLVMAssemblyFile; } + for_array(j, gen->modules.entries) { lbModule *m = gen->modules.entries[j].value; if (LLVMVerifyModule(m->mod, LLVMReturnStatusAction, &llvm_error)) { @@ -1684,7 +1726,6 @@ void lb_generate_code(lbGenerator *gen) { } } - TIME_SECTION("LLVM Add Foreign Library Paths"); for_array(j, gen->modules.entries) { diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 2e2d45991..ed98c6845 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -509,12 +509,16 @@ LLVMValueRef llvm_matrix_column_major_load(lbProcedure *p, lbValue lhs) { GB_ASSERT(mt->kind == Type_Matrix); GB_ASSERT(lb_matrix_elem_simple(mt)); - unsigned total_elem_count = cast(unsigned)matrix_type_total_elems(mt); + + i64 stride = matrix_type_stride_in_elems(mt); + i64 rows = mt->Matrix.row_count; + i64 columns = mt->Matrix.column_count; + unsigned elem_count = cast(unsigned)(rows*columns); Type *elem = mt->Matrix.elem; LLVMTypeRef elem_type = lb_type(m, elem); - LLVMTypeRef vector_type = LLVMVectorType(elem_type, total_elem_count); + LLVMTypeRef vector_type = LLVMVectorType(elem_type, elem_count); LLVMTypeRef types[] = {vector_type}; char const *name = "llvm.matrix.column.major.load"; @@ -524,44 +528,18 @@ LLVMValueRef llvm_matrix_column_major_load(lbProcedure *p, lbValue lhs) { lbValue ptr = lb_address_from_load_or_generate_local(p, lhs); ptr = lb_emit_matrix_epi(p, ptr, 0, 0); - + LLVMValueRef values[5] = {}; values[0] = ptr.value; - values[1] = lb_const_int(m, t_u64, 8*matrix_type_stride(mt)).value; // bit width + values[1] = lb_const_int(m, t_u64, stride).value; values[2] = LLVMConstNull(lb_type(m, t_llvm_bool)); values[3] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; values[4] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; - return LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); -} -LLVMValueRef llvm_matrix_column_major_load_from_ptr(lbProcedure *p, lbValue ptr) { - lbModule *m = p->module; - - Type *mt = base_type(type_deref(ptr.type)); - GB_ASSERT(mt->kind == Type_Matrix); - GB_ASSERT(lb_matrix_elem_simple(mt)); - - unsigned total_elem_count = cast(unsigned)matrix_type_total_elems(mt); - - Type *elem = mt->Matrix.elem; - LLVMTypeRef elem_type = lb_type(m, elem); - - LLVMTypeRef vector_type = LLVMVectorType(elem_type, total_elem_count); - LLVMTypeRef types[] = {vector_type}; - - char const *name = "llvm.matrix.column.major.load"; - unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); - GB_ASSERT_MSG(id != 0, "Unable to find %s", name); - LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); - - LLVMValueRef values[5] = {}; - values[0] = lb_emit_matrix_epi(p, ptr, 0, 0).value; - values[1] = lb_const_int(m, t_u64, 8*matrix_type_stride(mt)).value; // bit width - values[2] = LLVMConstNull(lb_type(m, t_llvm_bool)); - values[3] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; - values[4] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; - - return LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + gb_printf_err("%s\n", LLVMPrintValueToString(call)); + // LLVMAddAttributeAtIndex(call, 0, lb_create_enum_attribute(p->module->ctx, "align", cast(u64)type_align_of(mt))); + return call; } void llvm_matrix_column_major_store(lbProcedure *p, lbAddr addr, LLVMValueRef vector_value) { @@ -571,12 +549,7 @@ void llvm_matrix_column_major_store(lbProcedure *p, lbAddr addr, LLVMValueRef ve GB_ASSERT(mt->kind == Type_Matrix); GB_ASSERT(lb_matrix_elem_simple(mt)); - unsigned total_elem_count = cast(unsigned)matrix_type_total_elems(mt); - - Type *elem = mt->Matrix.elem; - LLVMTypeRef elem_type = lb_type(m, elem); - - LLVMTypeRef vector_type = LLVMVectorType(elem_type, total_elem_count); + LLVMTypeRef vector_type = LLVMTypeOf(vector_value); LLVMTypeRef types[] = {vector_type}; char const *name = "llvm.matrix.column.major.store"; @@ -587,55 +560,25 @@ void llvm_matrix_column_major_store(lbProcedure *p, lbAddr addr, LLVMValueRef ve lbValue ptr = lb_addr_get_ptr(p, addr); ptr = lb_emit_matrix_epi(p, ptr, 0, 0); - GB_ASSERT(LLVMTypeOf(vector_value) == vector_type); unsigned vector_size = LLVMGetVectorSize(vector_type); GB_ASSERT((mt->Matrix.row_count*mt->Matrix.column_count) == cast(i64)vector_size); + i64 stride = matrix_type_stride_in_elems(mt); + LLVMValueRef values[6] = {}; values[0] = vector_value; values[1] = ptr.value; - values[2] = lb_const_int(m, t_u64, 8*matrix_type_stride(mt)).value; // bit width + values[2] = lb_const_int(m, t_u64, stride).value; values[3] = LLVMConstNull(lb_type(m, t_llvm_bool)); values[4] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; values[5] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; - LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + gb_printf_err("%s\n", LLVMPrintValueToString(call)); + // LLVMAddAttributeAtIndex(call, 1, lb_create_enum_attribute(p->module->ctx, "align", cast(u64)type_align_of(mt))); + gb_unused(call); } -void llvm_matrix_column_major_store_to_raw_ptr(lbProcedure *p, Type *mt, lbValue ptr, LLVMValueRef vector_value) { - lbModule *m = p->module; - - mt = base_type(mt); - GB_ASSERT(mt->kind == Type_Matrix); - GB_ASSERT(lb_matrix_elem_simple(mt)); - - unsigned total_elem_count = cast(unsigned)matrix_type_total_elems(mt); - - Type *elem = mt->Matrix.elem; - LLVMTypeRef elem_type = lb_type(m, elem); - - LLVMTypeRef vector_type = LLVMVectorType(elem_type, total_elem_count); - LLVMTypeRef types[] = {vector_type}; - - char const *name = "llvm.matrix.column.major.store"; - unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); - GB_ASSERT_MSG(id != 0, "Unable to find %s", name); - LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); - - GB_ASSERT(LLVMTypeOf(vector_value) == vector_type); - unsigned vector_size = LLVMGetVectorSize(vector_type); - GB_ASSERT((mt->Matrix.row_count*mt->Matrix.column_count) == cast(i64)vector_size); - - LLVMValueRef values[6] = {}; - values[0] = vector_value; - values[1] = ptr.value; - values[2] = lb_const_int(m, t_u64, 8*matrix_type_stride(mt)).value; // bit width - values[3] = LLVMConstNull(lb_type(m, t_llvm_bool)); - values[4] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; - values[5] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; - - LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); -} LLVMValueRef llvm_matrix_multiply(lbProcedure *p, LLVMValueRef a, LLVMValueRef b, i64 outer_rows, i64 inner, i64 outer_columns) { lbModule *m = p->module; @@ -648,6 +591,7 @@ LLVMValueRef llvm_matrix_multiply(lbProcedure *p, LLVMValueRef a, LLVMValueRef b LLVMTypeRef elem_type = LLVMGetElementType(a_type); LLVMTypeRef res_vector_type = LLVMVectorType(elem_type, cast(unsigned)(outer_rows*outer_columns)); + LLVMTypeRef types[] = {res_vector_type, a_type, b_type}; char const *name = "llvm.matrix.multiply"; @@ -662,7 +606,9 @@ LLVMValueRef llvm_matrix_multiply(lbProcedure *p, LLVMValueRef a, LLVMValueRef b values[3] = lb_const_int(m, t_u32, inner).value; values[4] = lb_const_int(m, t_u32, outer_columns).value; - return LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + gb_printf_err("%s\n", LLVMPrintValueToString(call)); + return call; } @@ -684,19 +630,13 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) // TODO(bill): LLVM ERROR: Do not know how to split the result of this operator! lbAddr res = lb_add_local_generated(p, type, true); - lbValue res_ptr = lb_addr_get_ptr(p, res); - res_ptr = lb_emit_matrix_epi(p, res_ptr, 0, 0); - - lbValue lhs_ptr = lb_address_from_load_or_generate_local(p, lhs); - lbValue rhs_ptr = lb_address_from_load_or_generate_local(p, rhs); - LLVMValueRef a = llvm_matrix_column_major_load_from_ptr(p, lhs_ptr); - LLVMValueRef b = llvm_matrix_column_major_load_from_ptr(p, rhs_ptr); - LLVMValueRef c = llvm_matrix_multiply(p, a, b, xt->Matrix.row_count, xt->Matrix.column_count, yt->Matrix.column_count); - - llvm_matrix_column_major_store_to_raw_ptr(p, type, res_ptr, c); + LLVMValueRef a = llvm_matrix_column_major_load(p, lhs); gb_unused(a); + LLVMValueRef b = llvm_matrix_column_major_load(p, rhs); gb_unused(b); + LLVMValueRef c = llvm_matrix_multiply(p, a, b, xt->Matrix.row_count, xt->Matrix.column_count, yt->Matrix.column_count); gb_unused(c); + llvm_matrix_column_major_store(p, res, c); return lb_addr_load(p, res); - } + } slow_form: { @@ -704,18 +644,21 @@ slow_form: lbAddr res = lb_add_local_generated(p, type, true); - for (i64 i = 0; i < xt->Matrix.row_count; i++) { - for (i64 j = 0; j < yt->Matrix.column_count; j++) { - for (i64 k = 0; k < xt->Matrix.column_count; k++) { + i64 outer_rows = xt->Matrix.row_count; + i64 inner = xt->Matrix.column_count; + i64 outer_columns = yt->Matrix.column_count; + + for (i64 j = 0; j < outer_columns; j++) { + for (i64 i = 0; i < outer_rows; i++) { + for (i64 k = 0; k < inner; k++) { lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + lbValue d0 = lb_emit_load(p, dst); lbValue a = lb_emit_matrix_ev(p, lhs, i, k); lbValue b = lb_emit_matrix_ev(p, rhs, k, j); lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem); - lbValue d = lb_emit_load(p, dst); - lbValue e = lb_emit_arith(p, Token_Add, d, c, elem); - lb_emit_store(p, dst, e); - + lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem); + lb_emit_store(p, dst, d); } } } @@ -724,6 +667,72 @@ slow_form: } } +lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { + Type *mt = base_type(lhs.type); + Type *vt = base_type(rhs.type); + + GB_ASSERT(is_type_matrix(mt)); + GB_ASSERT(is_type_array_like(vt)); + + i64 vector_count = get_array_type_count(vt); + + GB_ASSERT(mt->Matrix.column_count == vector_count); + GB_ASSERT(are_types_identical(mt->Matrix.elem, base_array_type(vt))); + + Type *elem = mt->Matrix.elem; + + lbAddr res = lb_add_local_generated(p, type, true); + + for (i64 i = 0; i < mt->Matrix.row_count; i++) { + for (i64 j = 0; j < mt->Matrix.column_count; j++) { + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, 0); + lbValue d0 = lb_emit_load(p, dst); + + lbValue a = lb_emit_matrix_ev(p, lhs, i, j); + lbValue b = lb_emit_struct_ev(p, rhs, cast(i32)j); + lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem); + lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem); + lb_emit_store(p, dst, d); + } + } + + return lb_addr_load(p, res); +} + +lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { + Type *mt = base_type(rhs.type); + Type *vt = base_type(lhs.type); + + GB_ASSERT(is_type_matrix(mt)); + GB_ASSERT(is_type_array_like(vt)); + + i64 vector_count = get_array_type_count(vt); + + GB_ASSERT(mt->Matrix.row_count == vector_count); + GB_ASSERT(are_types_identical(mt->Matrix.elem, base_array_type(vt))); + + Type *elem = mt->Matrix.elem; + + lbAddr res = lb_add_local_generated(p, type, true); + + for (i64 j = 0; j < mt->Matrix.column_count; j++) { + for (i64 k = 0; k < mt->Matrix.row_count; k++) { + lbValue dst = lb_emit_matrix_epi(p, res.addr, 0, j); + lbValue d0 = lb_emit_load(p, dst); + + lbValue a = lb_emit_struct_ev(p, lhs, cast(i32)k); + lbValue b = lb_emit_matrix_ev(p, rhs, k, j); + lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem); + lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem); + lb_emit_store(p, dst, d); + } + } + + return lb_addr_load(p, res); +} + + + lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type) { GB_ASSERT(is_type_matrix(lhs.type) || is_type_matrix(rhs.type)); @@ -735,7 +744,12 @@ lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue if (xt->kind == Type_Matrix) { if (yt->kind == Type_Matrix) { return lb_emit_matrix_mul(p, lhs, rhs, type); + } else if (is_type_array_like(yt)) { + return lb_emit_matrix_mul_vector(p, lhs, rhs, type); } + } else if (is_type_array_like(xt)) { + GB_ASSERT(yt->kind == Type_Matrix); + return lb_emit_vector_mul_matrix(p, lhs, rhs, type); } } else { @@ -1036,6 +1050,13 @@ lbValue lb_build_binary_expr(lbProcedure *p, Ast *expr) { ast_node(be, BinaryExpr, expr); TypeAndValue tv = type_and_value_of_expr(expr); + + if (is_type_matrix(be->left->tav.type) || is_type_matrix(be->right->tav.type)) { + lbValue left = lb_build_expr(p, be->left); + lbValue right = lb_build_expr(p, be->right); + return lb_emit_arith_matrix(p, be->op.kind, left, right, default_type(tv.type)); + } + switch (be->op.kind) { case Token_Add: diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index ee8f220ef..63a63349a 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1937,7 +1937,7 @@ LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { i64 elem_size = type_size_of(type->Matrix.elem); GB_ASSERT(elem_size > 0); i64 elem_count = size/elem_size; - GB_ASSERT(elem_count > 0); + GB_ASSERT_MSG(elem_count > 0, "%s", type_to_string(type)); m->internal_type_level -= 1; @@ -2611,8 +2611,10 @@ lbAddr lb_add_local(lbProcedure *p, Type *type, Entity *e, bool zero_init, i32 p LLVMTypeRef llvm_type = lb_type(p->module, type); LLVMValueRef ptr = LLVMBuildAlloca(p->builder, llvm_type, name); - // unsigned alignment = 16; // TODO(bill): Make this configurable - unsigned alignment = cast(unsigned)lb_alignof(llvm_type); + unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(llvm_type)); + if (is_type_matrix(type)) { + alignment *= 2; // NOTE(bill): Just in case + } LLVMSetAlignment(ptr, alignment); LLVMPositionBuilderAtEnd(p->builder, p->curr_block->block); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 1b41be2a3..3971c0ca6 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1224,12 +1224,14 @@ lbValue lb_emit_ptr_offset(lbProcedure *p, lbValue ptr, lbValue index) { lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isize column) { Type *t = s.type; GB_ASSERT(is_type_pointer(t)); - Type *st = base_type(type_deref(t)); - GB_ASSERT_MSG(is_type_matrix(st), "%s", type_to_string(st)); + Type *mt = base_type(type_deref(t)); + GB_ASSERT_MSG(is_type_matrix(mt), "%s", type_to_string(mt)); - Type *ptr = base_array_type(st); + Type *ptr = base_array_type(mt); - isize index = row*column; + i64 stride_elems = matrix_type_stride_in_elems(mt); + + isize index = row + column*stride_elems; GB_ASSERT(0 <= index); LLVMValueRef indices[2] = { diff --git a/src/types.cpp b/src/types.cpp index fd9b20c91..8bce69cf3 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1249,6 +1249,7 @@ bool is_type_matrix(Type *t) { } i64 matrix_type_stride(Type *t) { + // TODO(bill): precompute matrix stride t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); i64 align = type_align_of(t); @@ -1258,6 +1259,7 @@ i64 matrix_type_stride(Type *t) { } i64 matrix_type_stride_in_elems(Type *t) { + // TODO(bill): precompute matrix stride t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); i64 stride = matrix_type_stride(t); @@ -1266,6 +1268,7 @@ i64 matrix_type_stride_in_elems(Type *t) { i64 matrix_type_total_elems(Type *t) { + // TODO(bill): precompute matrix total elems t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); i64 size = type_size_of(t); From 1556fad65a52af7683d5c80f2f724ef252525163 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 19 Oct 2021 11:31:11 +0100 Subject: [PATCH 05/47] Change syntax for matrices to `matrix[R, C]T` --- core/fmt/fmt.odin | 5 ++--- core/reflect/types.odin | 4 ++-- core/runtime/print.odin | 4 ++-- src/check_expr.cpp | 6 +++--- src/parser.cpp | 37 +++++++++++++++++++------------------ src/tokenizer.cpp | 1 + src/types.cpp | 4 +--- 7 files changed, 30 insertions(+), 31 deletions(-) diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 46b1fc14c..c0190a0b9 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1954,9 +1954,8 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { } case runtime.Type_Info_Matrix: - reflect.write_type(fi.writer, type_info_of(v.id)) - io.write_byte(fi.writer, '{') - defer io.write_byte(fi.writer, '}') + io.write_string(fi.writer, "matrix[") + defer io.write_byte(fi.writer, ']') fi.indent += 1; defer fi.indent -= 1 diff --git a/core/reflect/types.odin b/core/reflect/types.odin index cf79abb07..74778013a 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -592,9 +592,9 @@ write_type_writer :: proc(w: io.Writer, ti: ^Type_Info, n_written: ^int = nil) - write_type(w, info.slice, &n) or_return case Type_Info_Matrix: - io.write_string(w, "[", &n) or_return + io.write_string(w, "matrix[", &n) or_return io.write_i64(w, i64(info.row_count), 10, &n) or_return - io.write_string(w, "; ", &n) or_return + io.write_string(w, ", ", &n) or_return io.write_i64(w, i64(info.column_count), 10, &n) or_return io.write_string(w, "]", &n) or_return write_type(w, info.elem, &n) or_return diff --git a/core/runtime/print.odin b/core/runtime/print.odin index f32ac0831..8a14eba08 100644 --- a/core/runtime/print.odin +++ b/core/runtime/print.odin @@ -372,9 +372,9 @@ print_type :: proc "contextless" (ti: ^Type_Info) { print_type(info.slice) case Type_Info_Matrix: - print_string("[") + print_string("matrix[") print_u64(u64(info.row_count)) - print_string("; ") + print_string(", ") print_u64(u64(info.column_count)) print_string("]") print_type(info.elem) diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 1ca5b895d..a75334e6c 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -9138,7 +9138,7 @@ gbString write_expr_to_string(gbString str, Ast *node, bool shorthand) { str = write_expr_to_string(str, mie->expr, shorthand); str = gb_string_append_rune(str, '['); str = write_expr_to_string(str, mie->row_index, shorthand); - str = gb_string_appendc(str, "; "); + str = gb_string_appendc(str, ", "); str = write_expr_to_string(str, mie->column_index, shorthand); str = gb_string_append_rune(str, ']'); case_end; @@ -9216,9 +9216,9 @@ gbString write_expr_to_string(gbString str, Ast *node, bool shorthand) { case_end; case_ast_node(mt, MatrixType, node); - str = gb_string_append_rune(str, '['); + str = gb_string_appendc(str, "matrix["); str = write_expr_to_string(str, mt->row_count, shorthand); - str = gb_string_appendc(str, "; "); + str = gb_string_appendc(str, ", "); str = write_expr_to_string(str, mt->column_count, shorthand); str = gb_string_append_rune(str, ']'); str = write_expr_to_string(str, mt->elem, shorthand); diff --git a/src/parser.cpp b/src/parser.cpp index 499bd337b..c29cf70d9 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -2241,18 +2241,6 @@ Ast *parse_operand(AstFile *f, bool lhs) { count_expr = parse_expr(f, false); f->expr_level--; } - if (allow_token(f, Token_Semicolon)) { - Ast *row_count = count_expr; - Ast *column_count = nullptr; - - f->expr_level++; - column_count = parse_expr(f, false); - f->expr_level--; - - expect_token(f, Token_CloseBracket); - - return ast_matrix_type(f, token, row_count, column_count, parse_type(f)); - } expect_token(f, Token_CloseBracket); return ast_array_type(f, token, count_expr, parse_type(f)); @@ -2271,6 +2259,23 @@ Ast *parse_operand(AstFile *f, bool lhs) { return ast_map_type(f, token, key, value); } break; + + case Token_matrix: { + Token token = expect_token(f, Token_matrix); + Ast *row_count = nullptr; + Ast *column_count = nullptr; + Ast *type = nullptr; + Token open, close; + + open = expect_token_after(f, Token_OpenBracket, "matrix"); + row_count = parse_expr(f, true); + expect_token(f, Token_Comma); + column_count = parse_expr(f, true); + close = expect_token(f, Token_CloseBracket); + type = parse_type(f); + + return ast_matrix_type(f, token, row_count, column_count, type); + } break; case Token_struct: { Token token = expect_token(f, Token_struct); @@ -2716,11 +2721,7 @@ Ast *parse_atom_expr(AstFile *f, Ast *operand, bool lhs) { case Token_RangeHalf: syntax_error(f->curr_token, "Expected a colon, not a range"); /* fallthrough */ - case Token_Semicolon: // matrix index - if (f->curr_token.kind == Token_Semicolon && f->curr_token.string == "\n") { - syntax_error(f->curr_token, "Expected a ';', not a newline"); - } - /* fallthrough */ + case Token_Comma: // matrix index case Token_Colon: interval = advance_token(f); is_interval = true; @@ -2736,7 +2737,7 @@ Ast *parse_atom_expr(AstFile *f, Ast *operand, bool lhs) { close = expect_token(f, Token_CloseBracket); if (is_interval) { - if (interval.kind == Token_Semicolon) { + if (interval.kind == Token_Comma) { if (indices[0] == nullptr || indices[1] == nullptr) { syntax_error(open, "Matrix index expressions require both row and column indices"); } diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index c7627d09c..10b4494d7 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -117,6 +117,7 @@ TOKEN_KIND(Token__KeywordBegin, ""), \ TOKEN_KIND(Token_or_else, "or_else"), \ TOKEN_KIND(Token_or_return, "or_return"), \ TOKEN_KIND(Token_asm, "asm"), \ + TOKEN_KIND(Token_matrix, "matrix"), \ TOKEN_KIND(Token__KeywordEnd, ""), \ TOKEN_KIND(Token_Count, "") diff --git a/src/types.cpp b/src/types.cpp index 8bce69cf3..8e64a10c1 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -3956,9 +3956,7 @@ gbString write_type_to_string(gbString str, Type *type) { break; case Type_Matrix: - str = gb_string_appendc(str, gb_bprintf("[%d", cast(int)type->Matrix.row_count)); - str = gb_string_appendc(str, "; "); - str = gb_string_appendc(str, gb_bprintf("%d]", cast(int)type->Matrix.column_count)); + str = gb_string_appendc(str, gb_bprintf("matrix[%d, %d]", cast(int)type->Matrix.row_count, cast(int)type->Matrix.column_count)); str = write_type_to_string(str, type->Matrix.elem); break; } From 662cbaf425a54127dea206c3a35d776853bac169 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 19 Oct 2021 12:13:19 +0100 Subject: [PATCH 06/47] Support indexing matrices --- core/runtime/error_checks.odin | 23 ++++++++++++ src/check_expr.cpp | 66 ++++++++++++++++++++++++++++++++-- src/checker.cpp | 1 + src/llvm_backend.hpp | 1 + src/llvm_backend_expr.cpp | 54 +++++++++++++++++++++++++++- src/llvm_backend_general.cpp | 30 ++++++++++++++++ src/llvm_backend_utility.cpp | 31 ++++++++++++++++ src/types.cpp | 4 +++ 8 files changed, 206 insertions(+), 4 deletions(-) diff --git a/core/runtime/error_checks.odin b/core/runtime/error_checks.odin index bdd010b50..7f1aeb2d7 100644 --- a/core/runtime/error_checks.odin +++ b/core/runtime/error_checks.odin @@ -96,6 +96,29 @@ dynamic_array_expr_error :: proc "contextless" (file: string, line, column: i32, } +matrix_bounds_check_error :: proc "contextless" (file: string, line, column: i32, row_index, column_index, row_count, column_count: int) { + if 0 <= row_index && row_index < row_count && + 0 <= column_index && column_index < column_count { + return + } + handle_error :: proc "contextless" (file: string, line, column: i32, row_index, column_index, row_count, column_count: int) { + print_caller_location(Source_Code_Location{file, line, column, ""}) + print_string(" Matrix indices [") + print_i64(i64(row_index)) + print_string(", ") + print_i64(i64(column_index)) + print_string(" is out of bounds range [0..<") + print_i64(i64(row_count)) + print_string(", 0..<") + print_i64(i64(column_count)) + print_string("]") + print_byte('\n') + bounds_trap() + } + handle_error(file, line, column, row_index, column_index, row_count, column_count) +} + + type_assertion_check :: proc "contextless" (ok: bool, file: string, line, column: i32, from, to: typeid) { if ok { return diff --git a/src/check_expr.cpp b/src/check_expr.cpp index a75334e6c..73e1a7e51 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -6367,8 +6367,7 @@ bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 *max_count, *max_count = t->Matrix.column_count; if (indirection) { o->mode = Addressing_Variable; - } else if (o->mode != Addressing_Variable && - o->mode != Addressing_Constant) { + } else if (o->mode != Addressing_Variable) { o->mode = Addressing_Value; } o->type = alloc_type_array(t->Matrix.elem, t->Matrix.row_count); @@ -6672,7 +6671,68 @@ void check_promote_optional_ok(CheckerContext *c, Operand *x, Type **val_type_, void check_matrix_index_expr(CheckerContext *c, Operand *o, Ast *node, Type *type_hint) { - error(node, "TODO: matrix index expressions"); + ast_node(ie, MatrixIndexExpr, node); + + check_expr(c, o, ie->expr); + node->viral_state_flags |= ie->expr->viral_state_flags; + if (o->mode == Addressing_Invalid) { + o->expr = node; + return; + } + + Type *t = base_type(type_deref(o->type)); + bool is_ptr = is_type_pointer(o->type); + bool is_const = o->mode == Addressing_Constant; + + if (t->kind != Type_Matrix) { + gbString str = expr_to_string(o->expr); + gbString type_str = type_to_string(o->type); + defer (gb_string_free(str)); + defer (gb_string_free(type_str)); + if (is_const) { + error(o->expr, "Cannot use matrix indexing on constant '%s' of type '%s'", str, type_str); + } else { + error(o->expr, "Cannot use matrix indexing on '%s' of type '%s'", str, type_str); + } + o->mode = Addressing_Invalid; + o->expr = node; + return; + } + o->type = t->Matrix.elem; + if (is_ptr) { + o->mode = Addressing_Variable; + } else if (o->mode != Addressing_Variable) { + o->mode = Addressing_Value; + } + + if (ie->row_index == nullptr) { + gbString str = expr_to_string(o->expr); + error(o->expr, "Missing row index for '%s'", str); + gb_string_free(str); + o->mode = Addressing_Invalid; + o->expr = node; + return; + } + if (ie->column_index == nullptr) { + gbString str = expr_to_string(o->expr); + error(o->expr, "Missing column index for '%s'", str); + gb_string_free(str); + o->mode = Addressing_Invalid; + o->expr = node; + return; + } + + i64 row_count = t->Matrix.row_count; + i64 column_count = t->Matrix.column_count; + + i64 row_index = 0; + i64 column_index = 0; + bool row_ok = check_index_value(c, t, false, ie->row_index, row_count, &row_index, nullptr); + bool column_ok = check_index_value(c, t, false, ie->column_index, column_count, &column_index, nullptr); + + + gb_unused(row_ok); + gb_unused(column_ok); } diff --git a/src/checker.cpp b/src/checker.cpp index c0e6d47c0..23597167b 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -2022,6 +2022,7 @@ void generate_minimum_dependency_set(Checker *c, Entity *start) { String bounds_check_entities[] = { // Bounds checking related procedures str_lit("bounds_check_error"), + str_lit("matrix_bounds_check_error"), str_lit("slice_expr_error_hi"), str_lit("slice_expr_error_lo_hi"), str_lit("multi_pointer_slice_expr_error"), diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 73ddad797..9041e7621 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -333,6 +333,7 @@ lbValue lb_emit_array_ep(lbProcedure *p, lbValue s, lbValue index); lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection sel); lbValue lb_emit_deep_field_ev(lbProcedure *p, lbValue e, Selection sel); +lbValue lb_emit_matrix_ep(lbProcedure *p, lbValue s, lbValue row, lbValue column); lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isize column); lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isize column); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index ed98c6845..bcbb77355 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -1727,7 +1727,7 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { } if (is_type_matrix(dst) && !is_type_matrix(src)) { - GB_ASSERT(dst->Matrix.row_count == dst->Matrix.column_count); + GB_ASSERT_MSG(dst->Matrix.row_count == dst->Matrix.column_count, "%s <- %s", type_to_string(dst), type_to_string(src)); Type *elem = base_array_type(dst); lbValue e = lb_emit_conv(p, value, elem); @@ -2805,6 +2805,10 @@ lbValue lb_build_expr(lbProcedure *p, Ast *expr) { case_ast_node(ie, IndexExpr, expr); return lb_addr_load(p, lb_build_addr(p, expr)); case_end; + + case_ast_node(ie, MatrixIndexExpr, expr); + return lb_addr_load(p, lb_build_addr(p, expr)); + case_end; case_ast_node(ia, InlineAsmExpr, expr); Type *t = type_of_expr(expr); @@ -3304,6 +3308,25 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) { lbValue v = lb_emit_ptr_offset(p, elem, index); return lb_addr(v); } + + case Type_Matrix: { + lbValue matrix = {}; + matrix = lb_build_addr_ptr(p, ie->expr); + if (deref) { + matrix = lb_emit_load(p, matrix); + } + lbValue index = lb_build_expr(p, ie->index); + index = lb_emit_conv(p, index, t_int); + lbValue elem = lb_emit_matrix_ep(p, matrix, lb_const_int(p->module, t_int, 0), index); + elem = lb_emit_conv(p, elem, alloc_type_pointer(type_of_expr(expr))); + + auto index_tv = type_and_value_of_expr(ie->index); + if (index_tv.mode != Addressing_Constant) { + lbValue len = lb_const_int(p->module, t_int, t->Matrix.column_count); + lb_emit_bounds_check(p, ast_token(ie->index), index, len); + } + return lb_addr(elem); + } case Type_Basic: { // Basic_string @@ -3326,6 +3349,35 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) { } } case_end; + + case_ast_node(ie, MatrixIndexExpr, expr); + Type *t = base_type(type_of_expr(ie->expr)); + + bool deref = is_type_pointer(t); + t = base_type(type_deref(t)); + + lbValue m = {}; + m = lb_build_addr_ptr(p, ie->expr); + if (deref) { + m = lb_emit_load(p, m); + } + lbValue row_index = lb_build_expr(p, ie->row_index); + lbValue column_index = lb_build_expr(p, ie->column_index); + row_index = lb_emit_conv(p, row_index, t_int); + column_index = lb_emit_conv(p, column_index, t_int); + lbValue elem = lb_emit_matrix_ep(p, m, row_index, column_index); + + auto row_index_tv = type_and_value_of_expr(ie->row_index); + auto column_index_tv = type_and_value_of_expr(ie->column_index); + if (row_index_tv.mode != Addressing_Constant || column_index_tv.mode != Addressing_Constant) { + lbValue row_count = lb_const_int(p->module, t_int, t->Matrix.row_count); + lbValue column_count = lb_const_int(p->module, t_int, t->Matrix.column_count); + lb_emit_matrix_bounds_check(p, ast_token(ie->row_index), row_index, column_index, row_count, column_count); + } + return lb_addr(elem); + + + case_end; case_ast_node(se, SliceExpr, expr); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 63a63349a..01221cad6 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -419,6 +419,36 @@ void lb_emit_bounds_check(lbProcedure *p, Token token, lbValue index, lbValue le lb_emit_runtime_call(p, "bounds_check_error", args); } +void lb_emit_matrix_bounds_check(lbProcedure *p, Token token, lbValue row_index, lbValue column_index, lbValue row_count, lbValue column_count) { + if (build_context.no_bounds_check) { + return; + } + if ((p->state_flags & StateFlag_no_bounds_check) != 0) { + return; + } + + row_index = lb_emit_conv(p, row_index, t_int); + column_index = lb_emit_conv(p, column_index, t_int); + row_count = lb_emit_conv(p, row_count, t_int); + column_count = lb_emit_conv(p, column_count, t_int); + + lbValue file = lb_find_or_add_entity_string(p->module, get_file_path_string(token.pos.file_id)); + lbValue line = lb_const_int(p->module, t_i32, token.pos.line); + lbValue column = lb_const_int(p->module, t_i32, token.pos.column); + + auto args = array_make(permanent_allocator(), 7); + args[0] = file; + args[1] = line; + args[2] = column; + args[3] = row_index; + args[4] = column_index; + args[5] = row_count; + args[6] = column_count; + + lb_emit_runtime_call(p, "matrix_bounds_check_error", args); +} + + void lb_emit_multi_pointer_slice_bounds_check(lbProcedure *p, Token token, lbValue low, lbValue high) { if (build_context.no_bounds_check) { return; diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 3971c0ca6..c7e9e1742 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1249,6 +1249,37 @@ lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isize column) { return res; } +lbValue lb_emit_matrix_ep(lbProcedure *p, lbValue s, lbValue row, lbValue column) { + Type *t = s.type; + GB_ASSERT(is_type_pointer(t)); + Type *mt = base_type(type_deref(t)); + GB_ASSERT_MSG(is_type_matrix(mt), "%s", type_to_string(mt)); + + Type *ptr = base_array_type(mt); + + LLVMValueRef stride_elems = lb_const_int(p->module, t_int, matrix_type_stride_in_elems(mt)).value; + + row = lb_emit_conv(p, row, t_int); + column = lb_emit_conv(p, column, t_int); + + LLVMValueRef index = LLVMBuildAdd(p->builder, row.value, LLVMBuildMul(p->builder, column.value, stride_elems, ""), ""); + + LLVMValueRef indices[2] = { + LLVMConstInt(lb_type(p->module, t_int), 0, false), + index, + }; + + lbValue res = {}; + if (lb_is_const(s)) { + res.value = LLVMConstGEP(s.value, indices, gb_count_of(indices)); + } else { + res.value = LLVMBuildGEP(p->builder, s.value, indices, gb_count_of(indices), ""); + } + res.type = alloc_type_pointer(ptr); + return res; +} + + lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isize column) { Type *st = base_type(s.type); GB_ASSERT_MSG(is_type_matrix(st), "%s", type_to_string(st)); diff --git a/src/types.cpp b/src/types.cpp index 8e64a10c1..ec094b4ff 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1726,6 +1726,8 @@ bool is_type_indexable(Type *t) { return true; case Type_RelativeSlice: return true; + case Type_Matrix: + return true; } return false; } @@ -1743,6 +1745,8 @@ bool is_type_sliceable(Type *t) { return false; case Type_RelativeSlice: return true; + case Type_Matrix: + return false; } return false; } From 82b6772ea4fa9872a1fb98305814be8cf7f2c7c4 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 00:40:03 +0100 Subject: [PATCH 07/47] Support matrix literals --- core/fmt/fmt.odin | 4 +- core/runtime/core.odin | 2 +- src/check_expr.cpp | 5 ++ src/llvm_backend.hpp | 2 + src/llvm_backend_const.cpp | 83 ++++++++++++++++++++++-- src/llvm_backend_expr.cpp | 119 ++++++++++++++++++++++++++++++++--- src/llvm_backend_type.cpp | 2 +- src/llvm_backend_utility.cpp | 78 +++++++++++++++++++++-- src/parser.cpp | 1 + src/types.cpp | 33 ++++++++++ 10 files changed, 306 insertions(+), 23 deletions(-) diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index c0190a0b9..dc5b529ea 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1967,7 +1967,7 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { for col in 0.. 0 { io.write_string(fi.writer, ", ") } - offset := row*info.elem_size + col*info.stride + offset := (row + col*info.elem_stride)*info.elem_size data := uintptr(v.data) + uintptr(offset) fmt_arg(fi, any{rawptr(data), info.elem.id}, verb) @@ -1980,7 +1980,7 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { for col in 0.. 0 { io.write_string(fi.writer, "; ") } - offset := row*info.elem_size + col*info.stride + offset := (row + col*info.elem_stride)*info.elem_size data := uintptr(v.data) + uintptr(offset) fmt_arg(fi, any{rawptr(data), info.elem.id}, verb) diff --git a/core/runtime/core.odin b/core/runtime/core.odin index 611b4002c..ba1e81da6 100644 --- a/core/runtime/core.odin +++ b/core/runtime/core.odin @@ -165,7 +165,7 @@ Type_Info_Relative_Slice :: struct { Type_Info_Matrix :: struct { elem: ^Type_Info, elem_size: int, - stride: int, // bytes + elem_stride: int, row_count: int, column_count: int, } diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 73e1a7e51..eb6040320 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -7369,6 +7369,7 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type case Type_Array: case Type_DynamicArray: case Type_SimdVector: + case Type_Matrix: { Type *elem_type = nullptr; String context_name = {}; @@ -7395,6 +7396,10 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type elem_type = t->SimdVector.elem; context_name = str_lit("simd vector literal"); max_type_count = t->SimdVector.count; + } else if (t->kind == Type_Matrix) { + elem_type = t->Matrix.elem; + context_name = str_lit("matrix literal"); + max_type_count = t->Matrix.row_count*t->Matrix.column_count; } else { GB_PANIC("unreachable"); } diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 9041e7621..d2abed354 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -393,6 +393,8 @@ lbValue lb_soa_struct_len(lbProcedure *p, lbValue value); void lb_emit_increment(lbProcedure *p, lbValue addr); lbValue lb_emit_select(lbProcedure *p, lbValue cond, lbValue x, lbValue y); +lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValue c, Type *t); + void lb_fill_slice(lbProcedure *p, lbAddr const &slice, lbValue base_elem, lbValue len); lbValue lb_type_info(lbModule *m, Type *type); diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index 4cfcecdc3..413fb365b 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -523,14 +523,11 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc lbValue single_elem = lb_const_value(m, elem, value, allow_local); single_elem.value = llvm_const_cast(single_elem.value, lb_type(m, elem)); - - i64 stride_bytes = matrix_type_stride(type); - i64 stride_elems = stride_bytes/type_size_of(elem); - + i64 total_elem_count = matrix_type_total_elems(type); LLVMValueRef *elems = gb_alloc_array(permanent_allocator(), LLVMValueRef, cast(isize)total_elem_count); for (i64 i = 0; i < row; i++) { - elems[i*stride_elems + i] = single_elem.value; + elems[matrix_index_to_offset(type, i)] = single_elem.value; } for (i64 i = 0; i < total_elem_count; i++) { if (elems[i] == nullptr) { @@ -984,6 +981,82 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc res.value = LLVMConstInt(lb_type(m, original_type), bits, false); return res; + } else if (is_type_matrix(type)) { + ast_node(cl, CompoundLit, value.value_compound); + Type *elem_type = type->Matrix.elem; + isize elem_count = cl->elems.count; + if (elem_count == 0 || !elem_type_can_be_constant(elem_type)) { + return lb_const_nil(m, original_type); + } + + i64 max_count = type->Matrix.row_count*type->Matrix.column_count; + i64 total_count = matrix_type_total_elems(type); + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, cast(isize)total_count); + if (cl->elems[0]->kind == Ast_FieldValue) { + for_array(j, cl->elems) { + Ast *elem = cl->elems[j]; + ast_node(fv, FieldValue, elem); + if (is_ast_range(fv->field)) { + ast_node(ie, BinaryExpr, fv->field); + TypeAndValue lo_tav = ie->left->tav; + TypeAndValue hi_tav = ie->right->tav; + GB_ASSERT(lo_tav.mode == Addressing_Constant); + GB_ASSERT(hi_tav.mode == Addressing_Constant); + + TokenKind op = ie->op.kind; + i64 lo = exact_value_to_i64(lo_tav.value); + i64 hi = exact_value_to_i64(hi_tav.value); + if (op != Token_RangeHalf) { + hi += 1; + } + TypeAndValue tav = fv->value->tav; + LLVMValueRef val = lb_const_value(m, elem_type, tav.value, allow_local).value; + for (i64 k = lo; k < hi; k++) { + i64 offset = matrix_index_to_offset(type, k); + GB_ASSERT(values[offset] == nullptr); + values[offset] = val; + } + } else { + TypeAndValue index_tav = fv->field->tav; + GB_ASSERT(index_tav.mode == Addressing_Constant); + i64 index = exact_value_to_i64(index_tav.value); + TypeAndValue tav = fv->value->tav; + LLVMValueRef val = lb_const_value(m, elem_type, tav.value, allow_local).value; + i64 offset = matrix_index_to_offset(type, index); + GB_ASSERT(values[offset] == nullptr); + values[offset] = val; + } + } + + for (i64 i = 0; i < total_count; i++) { + if (values[i] == nullptr) { + values[i] = LLVMConstNull(lb_type(m, elem_type)); + } + } + + res.value = lb_build_constant_array_values(m, type, elem_type, cast(isize)total_count, values, allow_local); + return res; + } else { + GB_ASSERT_MSG(elem_count == max_count, "%td != %td", elem_count, max_count); + + LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, cast(isize)total_count); + + for_array(i, cl->elems) { + TypeAndValue tav = cl->elems[i]->tav; + GB_ASSERT(tav.mode != Addressing_Invalid); + i64 offset = matrix_index_to_offset(type, i); + values[offset] = lb_const_value(m, elem_type, tav.value, allow_local).value; + } + for (isize i = 0; i < total_count; i++) { + if (values[i] == nullptr) { + values[i] = LLVMConstNull(lb_type(m, elem_type)); + } + } + + res.value = lb_build_constant_array_values(m, type, elem_type, cast(isize)total_count, values, allow_local); + return res; + } } else { return lb_const_nil(m, original_type); } diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index bcbb77355..518ce33af 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -648,18 +648,23 @@ slow_form: i64 inner = xt->Matrix.column_count; i64 outer_columns = yt->Matrix.column_count; + auto inners = slice_make(permanent_allocator(), inner); + for (i64 j = 0; j < outer_columns; j++) { for (i64 i = 0; i < outer_rows; i++) { + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); for (i64 k = 0; k < inner; k++) { - lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); - lbValue d0 = lb_emit_load(p, dst); - - lbValue a = lb_emit_matrix_ev(p, lhs, i, k); - lbValue b = lb_emit_matrix_ev(p, rhs, k, j); - lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem); - lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem); - lb_emit_store(p, dst, d); + inners[k][0] = lb_emit_matrix_ev(p, lhs, i, k); + inners[k][1] = lb_emit_matrix_ev(p, rhs, k, j); } + + lbValue sum = lb_emit_load(p, dst); + for (i64 k = 0; k < inner; k++) { + lbValue a = inners[k][0]; + lbValue b = inners[k][1]; + sum = lb_emit_mul_add(p, a, b, sum, elem); + } + lb_emit_store(p, dst, sum); } } @@ -3626,6 +3631,7 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) { case Type_Slice: et = bt->Slice.elem; break; case Type_BitSet: et = bt->BitSet.elem; break; case Type_SimdVector: et = bt->SimdVector.elem; break; + case Type_Matrix: et = bt->Matrix.elem; break; } String proc_name = {}; @@ -4157,7 +4163,104 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) { } break; } + + case Type_Matrix: { + if (cl->elems.count > 0) { + lb_addr_store(p, v, lb_const_value(p->module, type, exact_value_compound(expr))); + auto temp_data = array_make(temporary_allocator(), 0, cl->elems.count); + + // NOTE(bill): Separate value, gep, store into their own chunks + for_array(i, cl->elems) { + Ast *elem = cl->elems[i]; + + if (elem->kind == Ast_FieldValue) { + ast_node(fv, FieldValue, elem); + if (lb_is_elem_const(fv->value, et)) { + continue; + } + if (is_ast_range(fv->field)) { + ast_node(ie, BinaryExpr, fv->field); + TypeAndValue lo_tav = ie->left->tav; + TypeAndValue hi_tav = ie->right->tav; + GB_ASSERT(lo_tav.mode == Addressing_Constant); + GB_ASSERT(hi_tav.mode == Addressing_Constant); + + TokenKind op = ie->op.kind; + i64 lo = exact_value_to_i64(lo_tav.value); + i64 hi = exact_value_to_i64(hi_tav.value); + if (op != Token_RangeHalf) { + hi += 1; + } + + lbValue value = lb_build_expr(p, fv->value); + + for (i64 k = lo; k < hi; k++) { + lbCompoundLitElemTempData data = {}; + data.value = value; + + data.elem_index = cast(i32)matrix_index_to_offset(bt, k); + array_add(&temp_data, data); + } + + } else { + auto tav = fv->field->tav; + GB_ASSERT(tav.mode == Addressing_Constant); + i64 index = exact_value_to_i64(tav.value); + + lbValue value = lb_build_expr(p, fv->value); + lbCompoundLitElemTempData data = {}; + data.value = lb_emit_conv(p, value, et); + data.expr = fv->value; + + data.elem_index = cast(i32)matrix_index_to_offset(bt, index); + array_add(&temp_data, data); + } + + } else { + if (lb_is_elem_const(elem, et)) { + continue; + } + lbCompoundLitElemTempData data = {}; + data.expr = elem; + data.elem_index = cast(i32)matrix_index_to_offset(bt, i); + array_add(&temp_data, data); + } + } + + for_array(i, temp_data) { + temp_data[i].gep = lb_emit_array_epi(p, lb_addr_get_ptr(p, v), temp_data[i].elem_index); + } + + for_array(i, temp_data) { + lbValue field_expr = temp_data[i].value; + Ast *expr = temp_data[i].expr; + + auto prev_hint = lb_set_copy_elision_hint(p, lb_addr(temp_data[i].gep), expr); + + if (field_expr.value == nullptr) { + field_expr = lb_build_expr(p, expr); + } + Type *t = field_expr.type; + GB_ASSERT(t->kind != Type_Tuple); + lbValue ev = lb_emit_conv(p, field_expr, et); + + if (!p->copy_elision_hint.used) { + temp_data[i].value = ev; + } + + lb_reset_copy_elision_hint(p, prev_hint); + } + + for_array(i, temp_data) { + if (temp_data[i].value.value != nullptr) { + lb_emit_store(p, temp_data[i].gep, temp_data[i].value); + } + } + } + break; + } + } return v; diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 82e20bf60..decb57702 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -877,7 +877,7 @@ void lb_setup_type_info_data(lbProcedure *p) { // NOTE(bill): Setup type_info da LLVMValueRef vals[5] = { lb_get_type_info_ptr(m, t->Matrix.elem).value, lb_const_int(m, t_int, ez).value, - lb_const_int(m, t_int, matrix_type_stride(t)).value, + lb_const_int(m, t_int, matrix_type_stride_in_elems(t)).value, lb_const_int(m, t_int, t->Matrix.row_count).value, lb_const_int(m, t_int, t->Matrix.column_count).value, }; diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index c7e9e1742..fb9264661 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1225,18 +1225,53 @@ lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isize column) { Type *t = s.type; GB_ASSERT(is_type_pointer(t)); Type *mt = base_type(type_deref(t)); - GB_ASSERT_MSG(is_type_matrix(mt), "%s", type_to_string(mt)); - + Type *ptr = base_array_type(mt); - i64 stride_elems = matrix_type_stride_in_elems(mt); + if (column == 0) { + GB_ASSERT_MSG(is_type_matrix(mt) || is_type_array_like(mt), "%s", type_to_string(mt)); + + LLVMValueRef indices[2] = { + LLVMConstInt(lb_type(p->module, t_int), 0, false), + LLVMConstInt(lb_type(p->module, t_int), cast(unsigned)row, false), + }; + + lbValue res = {}; + if (lb_is_const(s)) { + res.value = LLVMConstGEP(s.value, indices, gb_count_of(indices)); + } else { + res.value = LLVMBuildGEP(p->builder, s.value, indices, gb_count_of(indices), ""); + } + + Type *ptr = base_array_type(mt); + res.type = alloc_type_pointer(ptr); + return res; + } else if (row == 0 && is_type_array_like(mt)) { + LLVMValueRef indices[2] = { + LLVMConstInt(lb_type(p->module, t_int), 0, false), + LLVMConstInt(lb_type(p->module, t_int), cast(unsigned)column, false), + }; + + lbValue res = {}; + if (lb_is_const(s)) { + res.value = LLVMConstGEP(s.value, indices, gb_count_of(indices)); + } else { + res.value = LLVMBuildGEP(p->builder, s.value, indices, gb_count_of(indices), ""); + } + + Type *ptr = base_array_type(mt); + res.type = alloc_type_pointer(ptr); + return res; + } - isize index = row + column*stride_elems; - GB_ASSERT(0 <= index); + + GB_ASSERT_MSG(is_type_matrix(mt), "%s", type_to_string(mt)); + + isize offset = matrix_indices_to_offset(mt, row, column); LLVMValueRef indices[2] = { LLVMConstInt(lb_type(p->module, t_int), 0, false), - LLVMConstInt(lb_type(p->module, t_int), cast(unsigned)index, false), + LLVMConstInt(lb_type(p->module, t_int), cast(unsigned)offset, false), }; lbValue res = {}; @@ -1447,3 +1482,34 @@ lbValue lb_soa_struct_cap(lbProcedure *p, lbValue value) { } return lb_emit_struct_ev(p, value, cast(i32)n); } + + + +lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValue c, Type *t) { + lbModule *m = p->module; + + a = lb_emit_conv(p, a, t); + b = lb_emit_conv(p, b, t); + c = lb_emit_conv(p, c, t); + + if (!is_type_different_to_arch_endianness(t) && is_type_float(t)) { + char const *name = "llvm.fma"; + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + + LLVMTypeRef types[1] = {}; + types[0] = lb_type(m, t); + + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); + LLVMValueRef values[3] = {}; + values[0] = a.value; + values[1] = b.value; + values[2] = c.value; + LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + return {call, t}; + } else { + lbValue x = lb_emit_arith(p, Token_Mul, a, b, t); + lbValue y = lb_emit_arith(p, Token_Add, x, c, t); + return y; + } +} \ No newline at end of file diff --git a/src/parser.cpp b/src/parser.cpp index c29cf70d9..83da481d5 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -2569,6 +2569,7 @@ bool is_literal_type(Ast *node) { case Ast_DynamicArrayType: case Ast_MapType: case Ast_BitSetType: + case Ast_MatrixType: case Ast_CallExpr: return true; } diff --git a/src/types.cpp b/src/types.cpp index ec094b4ff..bbabdf732 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1276,6 +1276,39 @@ i64 matrix_type_total_elems(Type *t) { return size/gb_max(elem_size, 1); } +void matrix_indices_from_index(Type *t, i64 index, i64 *row_index_, i64 *column_index_) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + i64 row_count = t->Matrix.row_count; + i64 column_count = t->Matrix.column_count; + GB_ASSERT(0 <= index && index < row_count*column_count); + + i64 row_index = index / column_count; + i64 column_index = index % column_count; + + if (row_index_) *row_index_ = row_index; + if (column_index_) *column_index_ = column_index; +} + +i64 matrix_index_to_offset(Type *t, i64 index) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + + i64 row_index, column_index; + matrix_indices_from_index(t, index, &row_index, &column_index); + i64 stride_elems = matrix_type_stride_in_elems(t); + return stride_elems*column_index + row_index; +} + +i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + GB_ASSERT(0 <= row_index && row_index < t->Matrix.row_count); + GB_ASSERT(0 <= column_index && column_index < t->Matrix.column_count); + i64 stride_elems = matrix_type_stride_in_elems(t); + return stride_elems*column_index + row_index; +} + bool is_type_dynamic_array(Type *t) { t = base_type(t); return t->kind == Type_DynamicArray; From f454ac3150905d640af396b7c7c7582fd2288482 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 00:43:07 +0100 Subject: [PATCH 08/47] Fix %#v for matrices --- core/fmt/fmt.odin | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index dc5b529ea..1f8949002 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1957,7 +1957,7 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { io.write_string(fi.writer, "matrix[") defer io.write_byte(fi.writer, ']') - fi.indent += 1; defer fi.indent -= 1 + fi.indent += 1 if fi.hash { io.write_byte(fi.writer, '\n') @@ -1987,6 +1987,12 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { } } } + + fi.indent -= 1 + + if fi.hash { + fmt_write_indent(fi) + } } } From 445696d660804650c7dbaf1fb3f344b59d93fdf4 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 01:06:58 +0100 Subject: [PATCH 09/47] Support parapoly matrices --- src/check_expr.cpp | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/check_expr.cpp b/src/check_expr.cpp index eb6040320..d98430aec 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -1165,6 +1165,67 @@ bool is_polymorphic_type_assignable(CheckerContext *c, Type *poly, Type *source, return key || value; } return false; + + case Type_Matrix: + if (source->kind == Type_Matrix) { + if (poly->Matrix.generic_row_count != nullptr) { + Type *gt = poly->Matrix.generic_row_count; + GB_ASSERT(gt->kind == Type_Generic); + Entity *e = scope_lookup(gt->Generic.scope, gt->Generic.name); + GB_ASSERT(e != nullptr); + if (e->kind == Entity_TypeName) { + poly->Matrix.generic_row_count = nullptr; + poly->Matrix.row_count = source->Matrix.row_count; + + e->kind = Entity_Constant; + e->Constant.value = exact_value_i64(source->Matrix.row_count); + e->type = t_untyped_integer; + } else if (e->kind == Entity_Constant) { + poly->Matrix.generic_row_count = nullptr; + if (e->Constant.value.kind != ExactValue_Integer) { + return false; + } + i64 count = big_int_to_i64(&e->Constant.value.value_integer); + if (count != source->Matrix.row_count) { + return false; + } + poly->Matrix.row_count = source->Matrix.row_count; + } else { + return false; + } + } + if (poly->Matrix.generic_column_count != nullptr) { + Type *gt = poly->Matrix.generic_column_count; + GB_ASSERT(gt->kind == Type_Generic); + Entity *e = scope_lookup(gt->Generic.scope, gt->Generic.name); + GB_ASSERT(e != nullptr); + if (e->kind == Entity_TypeName) { + poly->Matrix.generic_column_count = nullptr; + poly->Matrix.column_count = source->Matrix.column_count; + + e->kind = Entity_Constant; + e->Constant.value = exact_value_i64(source->Matrix.column_count); + e->type = t_untyped_integer; + } else if (e->kind == Entity_Constant) { + poly->Matrix.generic_column_count = nullptr; + if (e->Constant.value.kind != ExactValue_Integer) { + return false; + } + i64 count = big_int_to_i64(&e->Constant.value.value_integer); + if (count != source->Matrix.column_count) { + return false; + } + poly->Matrix.column_count = source->Matrix.column_count; + } else { + return false; + } + } + if (poly->Matrix.row_count == source->Matrix.row_count && + poly->Matrix.column_count == source->Matrix.column_count) { + return is_polymorphic_type_assignable(c, poly->Matrix.elem, source->Matrix.elem, true, modify_type); + } + } + return false; } return false; } From afac1a2cc1fd3b9278b9ae0ab5f0905c5945ab54 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 01:17:10 +0100 Subject: [PATCH 10/47] Unify matrix stride calculation --- src/types.cpp | 52 +++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/src/types.cpp b/src/types.cpp index bbabdf732..c23335f3b 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -684,11 +684,13 @@ gb_global Type *t_hasher_proc = nullptr; gb_global RecursiveMutex g_type_mutex; +struct TypePath; -i64 type_size_of (Type *t); -i64 type_align_of (Type *t); -i64 type_offset_of (Type *t, i32 index); -gbString type_to_string (Type *type); +i64 type_size_of (Type *t); +i64 type_align_of (Type *t); +i64 type_offset_of (Type *t, i32 index); +gbString type_to_string (Type *type); +i64 type_size_of_internal(Type *t, TypePath *path); void init_map_internal_types(Type *type); Type * bit_set_to_int(Type *t); bool are_types_identical(Type *x, Type *y); @@ -1248,21 +1250,36 @@ bool is_type_matrix(Type *t) { return t->kind == Type_Matrix; } -i64 matrix_type_stride(Type *t) { +i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) { // TODO(bill): precompute matrix stride t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); + i64 elem_size; + if (tp != nullptr) { + elem_size = type_size_of_internal(t->Matrix.elem, tp); + } else { + elem_size = type_size_of(t->Matrix.elem); + } + + + /* + [3; 4]f32 -> [4]{x, y, z, _: f32} // extra padding for alignment reasons + */ + + i64 row_count = t->Matrix.row_count; + if (row_count == 1) { + return elem_size; + } + i64 align = type_align_of(t); - i64 elem_size = type_size_of(t->Matrix.elem); - i64 stride = align_formula(elem_size*t->Matrix.row_count, align); - return stride; + return align_formula(elem_size*t->Matrix.row_count, align); } i64 matrix_type_stride_in_elems(Type *t) { // TODO(bill): precompute matrix stride t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); - i64 stride = matrix_type_stride(t); + i64 stride = matrix_type_stride_in_bytes(t, nullptr); return stride/gb_max(1, type_size_of(t->Matrix.elem)); } @@ -3512,23 +3529,14 @@ i64 type_size_of_internal(Type *t, TypePath *path) { } case Type_Matrix: { - Type *elem = t->Matrix.elem; - i64 row_count = t->Matrix.row_count; - i64 column_count = t->Matrix.column_count; - bool pop = type_path_push(path, elem); + bool pop = type_path_push(path, t->Matrix.elem); if (path->failure) { return FAILURE_SIZE; } - i64 elem_size = type_size_of_internal(elem, path); + i64 stride_in_bytes = matrix_type_stride_in_bytes(t, path); if (pop) type_path_pop(path); - i64 align = type_align_of(t); - - /* - [3; 4]f32 -> [4]{x, y, z, _: f32} // extra padding for alignment reasons - */ - - i64 size = align_formula(elem_size * row_count, align) * column_count; - return size; + + return stride_in_bytes * t->Matrix.column_count; } case Type_RelativePointer: From 3f1930b76eac0c7c38a36bfd1cdcf97c91ae527d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 01:26:33 +0100 Subject: [PATCH 11/47] Clean up stride logic --- src/types.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/types.cpp b/src/types.cpp index c23335f3b..eaf1bac74 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -277,6 +277,7 @@ struct TypeProc { i64 column_count; \ Type *generic_row_count; \ Type *generic_column_count; \ + i64 stride_in_bytes; \ }) @@ -1251,9 +1252,14 @@ bool is_type_matrix(Type *t) { } i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) { - // TODO(bill): precompute matrix stride t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); + if (t->Matrix.stride_in_bytes != 0) { + return t->Matrix.stride_in_bytes; + } else if (t->Matrix.row_count == 0) { + return 0; + } + i64 elem_size; if (tp != nullptr) { elem_size = type_size_of_internal(t->Matrix.elem, tp); @@ -1265,18 +1271,20 @@ i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) { /* [3; 4]f32 -> [4]{x, y, z, _: f32} // extra padding for alignment reasons */ + i64 stride_in_bytes = 0; i64 row_count = t->Matrix.row_count; if (row_count == 1) { - return elem_size; + stride_in_bytes = elem_size; + } else { + i64 matrix_alignment = type_align_of(t); + stride_in_bytes = align_formula(elem_size*t->Matrix.row_count, matrix_alignment); } - - i64 align = type_align_of(t); - return align_formula(elem_size*t->Matrix.row_count, align); + t->Matrix.stride_in_bytes = stride_in_bytes; + return stride_in_bytes; } i64 matrix_type_stride_in_elems(Type *t) { - // TODO(bill): precompute matrix stride t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); i64 stride = matrix_type_stride_in_bytes(t, nullptr); @@ -1285,7 +1293,6 @@ i64 matrix_type_stride_in_elems(Type *t) { i64 matrix_type_total_elems(Type *t) { - // TODO(bill): precompute matrix total elems t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); i64 size = type_size_of(t); From 3eaac057da11d28cbedd7321f9f6368588b0b4ee Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 01:26:55 +0100 Subject: [PATCH 12/47] Unify `polymorphic_assign_index` logic --- src/check_expr.cpp | 99 ++++++++++++++++------------------------------ 1 file changed, 33 insertions(+), 66 deletions(-) diff --git a/src/check_expr.cpp b/src/check_expr.cpp index d98430aec..299810ce0 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -897,6 +897,34 @@ void check_assignment(CheckerContext *c, Operand *operand, Type *type, String co } } +bool polymorphic_assign_index(Type **gt_, i64 *dst_count, i64 source_count) { + Type *gt = *gt_; + + GB_ASSERT(gt->kind == Type_Generic); + Entity *e = scope_lookup(gt->Generic.scope, gt->Generic.name); + GB_ASSERT(e != nullptr); + if (e->kind == Entity_TypeName) { + *gt_ = nullptr; + *dst_count = source_count; + + e->kind = Entity_Constant; + e->Constant.value = exact_value_i64(source_count); + e->type = t_untyped_integer; + return true; + } else if (e->kind == Entity_Constant) { + *gt_ = nullptr; + if (e->Constant.value.kind != ExactValue_Integer) { + return false; + } + i64 count = big_int_to_i64(&e->Constant.value.value_integer); + if (count != source_count) { + return false; + } + *dst_count = source_count; + return true; + } + return false; +} bool is_polymorphic_type_assignable(CheckerContext *c, Type *poly, Type *source, bool compound, bool modify_type) { Operand o = {Addressing_Value}; @@ -951,28 +979,7 @@ bool is_polymorphic_type_assignable(CheckerContext *c, Type *poly, Type *source, case Type_Array: if (source->kind == Type_Array) { if (poly->Array.generic_count != nullptr) { - Type *gt = poly->Array.generic_count; - GB_ASSERT(gt->kind == Type_Generic); - Entity *e = scope_lookup(gt->Generic.scope, gt->Generic.name); - GB_ASSERT(e != nullptr); - if (e->kind == Entity_TypeName) { - poly->Array.generic_count = nullptr; - poly->Array.count = source->Array.count; - - e->kind = Entity_Constant; - e->Constant.value = exact_value_i64(source->Array.count); - e->type = t_untyped_integer; - } else if (e->kind == Entity_Constant) { - poly->Array.generic_count = nullptr; - if (e->Constant.value.kind != ExactValue_Integer) { - return false; - } - i64 count = big_int_to_i64(&e->Constant.value.value_integer); - if (count != source->Array.count) { - return false; - } - poly->Array.count = source->Array.count; - } else { + if (!polymorphic_assign_index(&poly->Array.generic_count, &poly->Array.count, source->Array.count)) { return false; } } @@ -1169,54 +1176,14 @@ bool is_polymorphic_type_assignable(CheckerContext *c, Type *poly, Type *source, case Type_Matrix: if (source->kind == Type_Matrix) { if (poly->Matrix.generic_row_count != nullptr) { - Type *gt = poly->Matrix.generic_row_count; - GB_ASSERT(gt->kind == Type_Generic); - Entity *e = scope_lookup(gt->Generic.scope, gt->Generic.name); - GB_ASSERT(e != nullptr); - if (e->kind == Entity_TypeName) { - poly->Matrix.generic_row_count = nullptr; - poly->Matrix.row_count = source->Matrix.row_count; - - e->kind = Entity_Constant; - e->Constant.value = exact_value_i64(source->Matrix.row_count); - e->type = t_untyped_integer; - } else if (e->kind == Entity_Constant) { - poly->Matrix.generic_row_count = nullptr; - if (e->Constant.value.kind != ExactValue_Integer) { - return false; - } - i64 count = big_int_to_i64(&e->Constant.value.value_integer); - if (count != source->Matrix.row_count) { - return false; - } - poly->Matrix.row_count = source->Matrix.row_count; - } else { + poly->Matrix.stride_in_bytes = 0; + if (!polymorphic_assign_index(&poly->Matrix.generic_row_count, &poly->Matrix.row_count, source->Matrix.row_count)) { return false; } } if (poly->Matrix.generic_column_count != nullptr) { - Type *gt = poly->Matrix.generic_column_count; - GB_ASSERT(gt->kind == Type_Generic); - Entity *e = scope_lookup(gt->Generic.scope, gt->Generic.name); - GB_ASSERT(e != nullptr); - if (e->kind == Entity_TypeName) { - poly->Matrix.generic_column_count = nullptr; - poly->Matrix.column_count = source->Matrix.column_count; - - e->kind = Entity_Constant; - e->Constant.value = exact_value_i64(source->Matrix.column_count); - e->type = t_untyped_integer; - } else if (e->kind == Entity_Constant) { - poly->Matrix.generic_column_count = nullptr; - if (e->Constant.value.kind != ExactValue_Integer) { - return false; - } - i64 count = big_int_to_i64(&e->Constant.value.value_integer); - if (count != source->Matrix.column_count) { - return false; - } - poly->Matrix.column_count = source->Matrix.column_count; - } else { + poly->Matrix.stride_in_bytes = 0; + if (!polymorphic_assign_index(&poly->Matrix.generic_column_count, &poly->Matrix.column_count, source->Matrix.column_count)) { return false; } } From 7faca7066c30d6e663b268dc1e8ec66710ae3dd5 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 01:51:16 +0100 Subject: [PATCH 13/47] Add builtin `transpose` --- src/check_builtin.cpp | 34 ++++++++- src/check_expr.cpp | 36 +++++---- src/checker_builtin_procs.hpp | 4 + src/llvm_backend_expr.cpp | 135 +++++----------------------------- src/llvm_backend_proc.cpp | 6 ++ 5 files changed, 81 insertions(+), 134 deletions(-) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index a04302d01..659a74ad7 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1966,13 +1966,13 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 return false; } if (!is_operand_value(x)) { - error(call, "'soa_unzip' expects an #soa slice"); + error(call, "'%.*s' expects an #soa slice", LIT(builtin_name)); return false; } Type *t = base_type(x.type); if (!is_type_soa_struct(t) || t->Struct.soa_kind != StructSoa_Slice) { gbString s = type_to_string(x.type); - error(call, "'soa_unzip' expects an #soa slice, got %s", s); + error(call, "'%.*s' expects an #soa slice, got %s", LIT(builtin_name), s); gb_string_free(s); return false; } @@ -1987,6 +1987,36 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 operand->mode = Addressing_Value; break; } + + case BuiltinProc_transpose: { + Operand x = {}; + check_expr(c, &x, ce->args[0]); + if (x.mode == Addressing_Invalid) { + return false; + } + if (!is_operand_value(x)) { + error(call, "'%.*s' expects a matrix or array", LIT(builtin_name)); + return false; + } + Type *t = base_type(x.type); + if (!is_type_matrix(t) && !is_type_array(t)) { + gbString s = type_to_string(x.type); + error(call, "'%.*s' expects a matrix or array, got %s", LIT(builtin_name), s); + gb_string_free(s); + return false; + } + + operand->mode = Addressing_Value; + if (is_type_array(t)) { + // Do nothing + operand->type = x.type; + } else { + GB_ASSERT(t->kind == Type_Matrix); + operand->type = alloc_type_matrix(t->Matrix.elem, t->Matrix.column_count, t->Matrix.row_count); + } + operand->type = check_matrix_type_hint(operand->type, type_hint); + break; + } case BuiltinProc_simd_vector: { Operand x = {}; diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 299810ce0..8a1e5fd86 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2708,6 +2708,25 @@ bool can_use_other_type_as_type_hint(bool use_lhs_as_type_hint, Type *other_type return false; } +Type *check_matrix_type_hint(Type *matrix, Type *type_hint) { + Type *xt = base_type(matrix); + if (type_hint != nullptr) { + Type *th = base_type(type_hint); + if (are_types_identical(th, xt)) { + return type_hint; + } else if (xt->kind == Type_Matrix && th->kind == Type_Array) { + if (!are_types_identical(xt->Matrix.elem, th->Array.elem)) { + // ignore + } else if (xt->Matrix.row_count == 1 && xt->Matrix.column_count == th->Array.count) { + return type_hint; + } else if (xt->Matrix.column_count == 1 && xt->Matrix.row_count == th->Array.count) { + return type_hint; + } + } + } + return matrix; +} + void check_binary_matrix(CheckerContext *c, Token const &op, Operand *x, Operand *y, Type *type_hint, bool use_lhs_as_type_hint) { if (!check_binary_op(c, x, op)) { @@ -2791,21 +2810,8 @@ void check_binary_matrix(CheckerContext *c, Token const &op, Operand *x, Operand } matrix_success: - if (type_hint != nullptr) { - Type *th = base_type(type_hint); - if (are_types_identical(th, x->type)) { - x->type = type_hint; - } else if (x->type->kind == Type_Matrix && th->kind == Type_Array) { - Type *xt = x->type; - if (!are_types_identical(xt->Matrix.elem, th->Array.elem)) { - // ignore - } else if (xt->Matrix.row_count == 1 && xt->Matrix.column_count == th->Array.count) { - x->type = type_hint; - } else if (xt->Matrix.column_count == 1 && xt->Matrix.row_count == th->Array.count) { - x->type = type_hint; - } - } - } + x->type = check_matrix_type_hint(x->type, type_hint); + return; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 8991d2d5c..21a33bdd3 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -34,6 +34,8 @@ enum BuiltinProcId { BuiltinProc_soa_zip, BuiltinProc_soa_unzip, + + BuiltinProc_transpose, BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures @@ -274,6 +276,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("soa_zip"), 1, true, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("soa_unzip"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, + + {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT(""), 0, true, Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 518ce33af..d41a0a127 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -502,116 +502,29 @@ bool lb_matrix_elem_simple(Type *t) { return true; } -LLVMValueRef llvm_matrix_column_major_load(lbProcedure *p, lbValue lhs) { - lbModule *m = p->module; - - Type *mt = base_type(lhs.type); +lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { + if (is_type_array(m.type)) { + m.type = type; + return m; + } + Type *mt = base_type(m.type); GB_ASSERT(mt->kind == Type_Matrix); - GB_ASSERT(lb_matrix_elem_simple(mt)); + lbAddr res = lb_add_local_generated(p, type, true); - i64 stride = matrix_type_stride_in_elems(mt); - i64 rows = mt->Matrix.row_count; - i64 columns = mt->Matrix.column_count; - unsigned elem_count = cast(unsigned)(rows*columns); - - Type *elem = mt->Matrix.elem; - LLVMTypeRef elem_type = lb_type(m, elem); - - LLVMTypeRef vector_type = LLVMVectorType(elem_type, elem_count); - LLVMTypeRef types[] = {vector_type}; - - char const *name = "llvm.matrix.column.major.load"; - unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); - GB_ASSERT_MSG(id != 0, "Unable to find %s", name); - LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); - - lbValue ptr = lb_address_from_load_or_generate_local(p, lhs); - ptr = lb_emit_matrix_epi(p, ptr, 0, 0); - - LLVMValueRef values[5] = {}; - values[0] = ptr.value; - values[1] = lb_const_int(m, t_u64, stride).value; - values[2] = LLVMConstNull(lb_type(m, t_llvm_bool)); - values[3] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; - values[4] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; - - LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); - gb_printf_err("%s\n", LLVMPrintValueToString(call)); - // LLVMAddAttributeAtIndex(call, 0, lb_create_enum_attribute(p->module->ctx, "align", cast(u64)type_align_of(mt))); - return call; + i64 row_count = mt->Matrix.row_count; + i64 column_count = mt->Matrix.column_count; + for (i64 j = 0; j < column_count; j++) { + for (i64 i = 0; i < row_count; i++) { + lbValue src = lb_emit_matrix_ev(p, m, i, j); + lbValue dst = lb_emit_matrix_epi(p, res.addr, j, i); + lb_emit_store(p, dst, src); + } + } + return lb_addr_load(p, res); + } -void llvm_matrix_column_major_store(lbProcedure *p, lbAddr addr, LLVMValueRef vector_value) { - lbModule *m = p->module; - - Type *mt = base_type(lb_addr_type(addr)); - GB_ASSERT(mt->kind == Type_Matrix); - GB_ASSERT(lb_matrix_elem_simple(mt)); - - LLVMTypeRef vector_type = LLVMTypeOf(vector_value); - LLVMTypeRef types[] = {vector_type}; - - char const *name = "llvm.matrix.column.major.store"; - unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); - GB_ASSERT_MSG(id != 0, "Unable to find %s", name); - LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); - - lbValue ptr = lb_addr_get_ptr(p, addr); - ptr = lb_emit_matrix_epi(p, ptr, 0, 0); - - unsigned vector_size = LLVMGetVectorSize(vector_type); - GB_ASSERT((mt->Matrix.row_count*mt->Matrix.column_count) == cast(i64)vector_size); - - i64 stride = matrix_type_stride_in_elems(mt); - - LLVMValueRef values[6] = {}; - values[0] = vector_value; - values[1] = ptr.value; - values[2] = lb_const_int(m, t_u64, stride).value; - values[3] = LLVMConstNull(lb_type(m, t_llvm_bool)); - values[4] = lb_const_int(m, t_u32, mt->Matrix.row_count).value; - values[5] = lb_const_int(m, t_u32, mt->Matrix.column_count).value; - - LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); - gb_printf_err("%s\n", LLVMPrintValueToString(call)); - // LLVMAddAttributeAtIndex(call, 1, lb_create_enum_attribute(p->module->ctx, "align", cast(u64)type_align_of(mt))); - gb_unused(call); -} - - -LLVMValueRef llvm_matrix_multiply(lbProcedure *p, LLVMValueRef a, LLVMValueRef b, i64 outer_rows, i64 inner, i64 outer_columns) { - lbModule *m = p->module; - - LLVMTypeRef a_type = LLVMTypeOf(a); - LLVMTypeRef b_type = LLVMTypeOf(b); - - GB_ASSERT(LLVMGetElementType(a_type) == LLVMGetElementType(b_type)); - - LLVMTypeRef elem_type = LLVMGetElementType(a_type); - - LLVMTypeRef res_vector_type = LLVMVectorType(elem_type, cast(unsigned)(outer_rows*outer_columns)); - - LLVMTypeRef types[] = {res_vector_type, a_type, b_type}; - - char const *name = "llvm.matrix.multiply"; - unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); - GB_ASSERT_MSG(id != 0, "Unable to find %s", name); - LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); - - LLVMValueRef values[5] = {}; - values[0] = a; - values[1] = b; - values[2] = lb_const_int(m, t_u32, outer_rows).value; - values[3] = lb_const_int(m, t_u32, inner).value; - values[4] = lb_const_int(m, t_u32, outer_columns).value; - - LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); - gb_printf_err("%s\n", LLVMPrintValueToString(call)); - return call; -} - - lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { Type *xt = base_type(lhs.type); Type *yt = base_type(rhs.type); @@ -626,18 +539,6 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) goto slow_form; } - if (false) { - // TODO(bill): LLVM ERROR: Do not know how to split the result of this operator! - lbAddr res = lb_add_local_generated(p, type, true); - - LLVMValueRef a = llvm_matrix_column_major_load(p, lhs); gb_unused(a); - LLVMValueRef b = llvm_matrix_column_major_load(p, rhs); gb_unused(b); - LLVMValueRef c = llvm_matrix_multiply(p, a, b, xt->Matrix.row_count, xt->Matrix.column_count, yt->Matrix.column_count); gb_unused(c); - llvm_matrix_column_major_store(p, res, c); - - return lb_addr_load(p, res); - } - slow_form: { Type *elem = xt->Matrix.elem; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 222161164..1431fffaa 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1257,6 +1257,12 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, return lb_soa_zip(p, ce, tv); case BuiltinProc_soa_unzip: return lb_soa_unzip(p, ce, tv); + + case BuiltinProc_transpose: + { + lbValue m = lb_build_expr(p, ce->args[0]); + return lb_emit_matrix_tranpose(p, m, tv.type); + } // "Intrinsics" From 68afbb37f40b10fd01dda9e5640cc7ae2535a371 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 02:06:56 +0100 Subject: [PATCH 14/47] Add builtin `outer_product` --- src/check_builtin.cpp | 60 +++++++++++++++++++++++++++++++++++ src/checker_builtin_procs.hpp | 2 ++ src/llvm_backend_expr.cpp | 32 +++++++++++++++++++ src/llvm_backend_proc.cpp | 8 +++++ 4 files changed, 102 insertions(+) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 659a74ad7..1d033932f 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2017,6 +2017,66 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 operand->type = check_matrix_type_hint(operand->type, type_hint); break; } + + case BuiltinProc_outer_product: { + Operand x = {}; + Operand y = {}; + check_expr(c, &x, ce->args[0]); + if (x.mode == Addressing_Invalid) { + return false; + } + check_expr(c, &y, ce->args[1]); + if (y.mode == Addressing_Invalid) { + return false; + } + if (!is_operand_value(x) || !is_operand_value(y)) { + error(call, "'%.*s' expects only arrays", LIT(builtin_name)); + return false; + } + + if (!is_type_array(x.type) && !is_type_array(y.type)) { + gbString s1 = type_to_string(x.type); + gbString s2 = type_to_string(y.type); + error(call, "'%.*s' expects only arrays, got %s and %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + Type *xt = base_type(x.type); + Type *yt = base_type(y.type); + GB_ASSERT(xt->kind == Type_Array); + GB_ASSERT(yt->kind == Type_Array); + if (!are_types_identical(xt->Array.elem, yt->Array.elem)) { + gbString s1 = type_to_string(xt->Array.elem); + gbString s2 = type_to_string(yt->Array.elem); + error(call, "'%.*s' mismatched element types, got %s vs %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + if (xt->Array.count == 0 || yt->Array.count == 0) { + gbString s1 = type_to_string(x.type); + gbString s2 = type_to_string(y.type); + error(call, "'%.*s' expects only arrays of non-zero length, got %s and %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + i64 max_count = xt->Array.count*yt->Array.count; + if (max_count > MAX_MATRIX_ELEMENT_COUNT) { + error(call, "Product of the array lengths exceed the maximum matrix element count, got %d, expected a maximum of %d", cast(int)max_count, MAX_MATRIX_ELEMENT_COUNT); + return false; + } + + operand->mode = Addressing_Value; + operand->type = alloc_type_matrix(xt->Array.elem, xt->Array.count, yt->Array.count); + operand->type = check_matrix_type_hint(operand->type, type_hint); + break; + } + case BuiltinProc_simd_vector: { Operand x = {}; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 21a33bdd3..2c7392b09 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -36,6 +36,7 @@ enum BuiltinProcId { BuiltinProc_soa_unzip, BuiltinProc_transpose, + BuiltinProc_outer_product, BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures @@ -278,6 +279,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("soa_unzip"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, + {STR_LIT("outer_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT(""), 0, true, Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index d41a0a127..27f12a829 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -522,9 +522,41 @@ lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { } } return lb_addr_load(p, res); +} + + +lbValue lb_emit_outer_product(lbProcedure *p, lbValue a, lbValue b, Type *type) { + Type *mt = base_type(type); + Type *at = base_type(a.type); + Type *bt = base_type(b.type); + GB_ASSERT(mt->kind == Type_Matrix); + GB_ASSERT(at->kind == Type_Array); + GB_ASSERT(bt->kind == Type_Array); + + + i64 row_count = mt->Matrix.row_count; + i64 column_count = mt->Matrix.column_count; + + GB_ASSERT(row_count == at->Array.count); + GB_ASSERT(column_count == bt->Array.count); + + + lbAddr res = lb_add_local_generated(p, type, true); + + for (i64 j = 0; j < column_count; j++) { + for (i64 i = 0; i < row_count; i++) { + lbValue x = lb_emit_struct_ev(p, a, cast(i32)i); + lbValue y = lb_emit_struct_ev(p, b, cast(i32)j); + lbValue src = lb_emit_arith(p, Token_Mul, x, y, mt->Matrix.elem); + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + lb_emit_store(p, dst, src); + } + } + return lb_addr_load(p, res); } + lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { Type *xt = base_type(lhs.type); Type *yt = base_type(rhs.type); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 1431fffaa..5a7fc1626 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1263,6 +1263,14 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, lbValue m = lb_build_expr(p, ce->args[0]); return lb_emit_matrix_tranpose(p, m, tv.type); } + + case BuiltinProc_outer_product: + { + lbValue a = lb_build_expr(p, ce->args[0]); + lbValue b = lb_build_expr(p, ce->args[1]); + return lb_emit_outer_product(p, a, b, tv.type); + } + // "Intrinsics" From cee45c1b155fcc917c2b0f9cfdbfa060304255e1 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 02:18:30 +0100 Subject: [PATCH 15/47] Add `hadamard_product` --- src/check_builtin.cpp | 56 ++++++++++++++++++++++++++++++++++- src/check_type.cpp | 12 ++------ src/checker_builtin_procs.hpp | 2 ++ src/llvm_backend_expr.cpp | 6 ++-- src/llvm_backend_proc.cpp | 10 +++++++ src/types.cpp | 11 +++++++ 6 files changed, 84 insertions(+), 13 deletions(-) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 1d033932f..a9427d4e0 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2056,6 +2056,14 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 return false; } + Type *elem = xt->Array.elem; + + if (!is_type_valid_for_matrix_elems(elem)) { + gbString s = type_to_string(elem); + error(call, "Matrix elements types are limited to integers, floats, and complex, got %s", s); + gb_string_free(s); + } + if (xt->Array.count == 0 || yt->Array.count == 0) { gbString s1 = type_to_string(x.type); gbString s2 = type_to_string(y.type); @@ -2072,7 +2080,53 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 } operand->mode = Addressing_Value; - operand->type = alloc_type_matrix(xt->Array.elem, xt->Array.count, yt->Array.count); + operand->type = alloc_type_matrix(elem, xt->Array.count, yt->Array.count); + operand->type = check_matrix_type_hint(operand->type, type_hint); + break; + } + + case BuiltinProc_hadamard_product: { + Operand x = {}; + Operand y = {}; + check_expr(c, &x, ce->args[0]); + if (x.mode == Addressing_Invalid) { + return false; + } + check_expr(c, &y, ce->args[1]); + if (y.mode == Addressing_Invalid) { + return false; + } + if (!is_operand_value(x) || !is_operand_value(y)) { + error(call, "'%.*s' expects a matrix or array types", LIT(builtin_name)); + return false; + } + if (!is_type_matrix(x.type) && !is_type_array(y.type)) { + gbString s1 = type_to_string(x.type); + gbString s2 = type_to_string(y.type); + error(call, "'%.*s' expects matrix or array values, got %s and %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + if (!are_types_identical(x.type, y.type)) { + gbString s1 = type_to_string(x.type); + gbString s2 = type_to_string(y.type); + error(call, "'%.*s' values of the same type, got %s and %s", LIT(builtin_name), s1, s2); + gb_string_free(s2); + gb_string_free(s1); + return false; + } + + Type *elem = core_array_type(x.type); + if (!is_type_valid_for_matrix_elems(elem)) { + gbString s = type_to_string(elem); + error(call, "'%.*s' expects elements to be types are limited to integers, floats, and complex, got %s", LIT(builtin_name), s); + gb_string_free(s); + } + + operand->mode = Addressing_Value; + operand->type = x.type; operand->type = check_matrix_type_hint(operand->type, type_hint); break; } diff --git a/src/check_type.cpp b/src/check_type.cpp index e752f192d..d9302c65a 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -997,8 +997,8 @@ void check_bit_set_type(CheckerContext *c, Type *type, Type *named_type, Ast *no GB_ASSERT(lower <= upper); - i64 bits = MAX_BITS; - if (bs->underlying != nullptr) { + i64 bits = MAX_BITS +; if (bs->underlying != nullptr) { Type *u = check_type(c, bs->underlying); if (!is_type_integer(u)) { gbString ts = type_to_string(u); @@ -2239,13 +2239,7 @@ void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) { error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MAX_MATRIX_ELEMENT_COUNT, cast(long long)element_count); } - if (is_type_integer(elem)) { - // okay - } else if (is_type_float(elem)) { - // okay - } else if (is_type_complex(elem)) { - // okay - } else { + if (!is_type_valid_for_matrix_elems(elem)) { gbString s = type_to_string(elem); error(column.expr, "Matrix elements types are limited to integers, floats, and complex, got %s", s); gb_string_free(s); diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 2c7392b09..de4e99d14 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -37,6 +37,7 @@ enum BuiltinProcId { BuiltinProc_transpose, BuiltinProc_outer_product, + BuiltinProc_hadamard_product, BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures @@ -280,6 +281,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("outer_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, + {STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT(""), 0, true, Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 27f12a829..b894bc7b8 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -672,13 +672,13 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type -lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type) { +lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type, bool component_wise=false) { GB_ASSERT(is_type_matrix(lhs.type) || is_type_matrix(rhs.type)); Type *xt = base_type(lhs.type); Type *yt = base_type(rhs.type); - if (op == Token_Mul) { + if (op == Token_Mul && !component_wise) { if (xt->kind == Type_Matrix) { if (yt->kind == Type_Matrix) { return lb_emit_matrix_mul(p, lhs, rhs, type); @@ -703,7 +703,7 @@ lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue array_lhs.type = array_type; array_rhs.type = array_type; - lbValue array = lb_emit_arith_array(p, op, array_lhs, array_rhs, type); + lbValue array = lb_emit_arith_array(p, op, array_lhs, array_rhs, array_type); array.type = type; return array; } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 5a7fc1626..da4e4ad28 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1270,6 +1270,16 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, lbValue b = lb_build_expr(p, ce->args[1]); return lb_emit_outer_product(p, a, b, tv.type); } + case BuiltinProc_hadamard_product: + { + lbValue a = lb_build_expr(p, ce->args[0]); + lbValue b = lb_build_expr(p, ce->args[1]); + if (is_type_array(tv.type)) { + return lb_emit_arith(p, Token_Mul, a, b, tv.type); + } + GB_ASSERT(is_type_matrix(tv.type)); + return lb_emit_arith_matrix(p, Token_Mul, a, b, tv.type, true); + } // "Intrinsics" diff --git a/src/types.cpp b/src/types.cpp index eaf1bac74..32e26bcc6 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1333,6 +1333,17 @@ i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) { return stride_elems*column_index + row_index; } +bool is_type_valid_for_matrix_elems(Type *t) { + if (is_type_integer(t)) { + return true; + } else if (is_type_float(t)) { + return true; + } else if (is_type_complex(t)) { + return true; + } + return false; +} + bool is_type_dynamic_array(Type *t) { t = base_type(t); return t->kind == Type_DynamicArray; From 07bf64ae5243d3e2f38da9cf9da81ef7a99a6f44 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 11:42:06 +0100 Subject: [PATCH 16/47] Transform `matrix` to `mat` identifier for Vulkan --- vendor/vulkan/_gen/create_vulkan_odin_wrapper.py | 4 ++-- vendor/vulkan/enums.odin | 2 -- vendor/vulkan/procedures.odin | 2 -- vendor/vulkan/structs.odin | 4 +--- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/vendor/vulkan/_gen/create_vulkan_odin_wrapper.py b/vendor/vulkan/_gen/create_vulkan_odin_wrapper.py index 4f4477812..6ea2c3717 100644 --- a/vendor/vulkan/_gen/create_vulkan_odin_wrapper.py +++ b/vendor/vulkan/_gen/create_vulkan_odin_wrapper.py @@ -405,8 +405,8 @@ def parse_structs(f): if "Flag_Bits" in type_: comment = " // only single bit set" t = do_type(type_, prev_name, fname) - if t == "Structure_Type" and n == "type": - n = "s_type" + if n == "matrix": + n = "mat" ffields.append(tuple([n, t, comment])) prev_name = fname diff --git a/vendor/vulkan/enums.odin b/vendor/vulkan/enums.odin index be6691ab4..d468e7fa1 100644 --- a/vendor/vulkan/enums.odin +++ b/vendor/vulkan/enums.odin @@ -3,8 +3,6 @@ // package vulkan -import "core:c" - // Enums AccelerationStructureBuildTypeKHR :: enum c.int { HOST = 0, diff --git a/vendor/vulkan/procedures.odin b/vendor/vulkan/procedures.odin index b40523b6d..f585215e4 100644 --- a/vendor/vulkan/procedures.odin +++ b/vendor/vulkan/procedures.odin @@ -3,8 +3,6 @@ // package vulkan -import "core:c" - // Procedure Types ProcAllocationFunction :: #type proc "system" (pUserData: rawptr, size: int, alignment: int, allocationScope: SystemAllocationScope) -> rawptr diff --git a/vendor/vulkan/structs.odin b/vendor/vulkan/structs.odin index 24d47489a..ece398cde 100644 --- a/vendor/vulkan/structs.odin +++ b/vendor/vulkan/structs.odin @@ -3,8 +3,6 @@ // package vulkan -import "core:c" - when ODIN_OS == "windows" { import win32 "core:sys/windows" @@ -3750,7 +3748,7 @@ PhysicalDeviceRayTracingPropertiesNV :: struct { } TransformMatrixKHR :: struct { - matrix: [3][4]f32, + mat: [3][4]f32, } AabbPositionsKHR :: struct { From 0fd525d7789d2a0786b28677c5dd4cbd263f4537 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 12:39:38 +0100 Subject: [PATCH 17/47] Make `lb_emit_matrix_mul_vector` use SIMD if possible --- src/llvm_backend_expr.cpp | 68 ++++++++++++++++++++++++++++++++++-- src/llvm_backend_utility.cpp | 32 +++++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index b894bc7b8..6cb221a94 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -567,11 +567,10 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count); GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem)); - if (!lb_matrix_elem_simple(xt)) { - goto slow_form; + if (lb_matrix_elem_simple(xt)) { + // TODO(bill): SIMD version } -slow_form: { Type *elem = xt->Matrix.elem; @@ -618,6 +617,69 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type GB_ASSERT(are_types_identical(mt->Matrix.elem, base_array_type(vt))); Type *elem = mt->Matrix.elem; + LLVMTypeRef elem_type = lb_type(p->module, elem); + + unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); + + if (lb_matrix_elem_simple(mt)) { + unsigned row_count = cast(unsigned)mt->Matrix.row_count; gb_unused(row_count); + unsigned column_count = cast(unsigned)mt->Matrix.column_count; + auto m_columns = slice_make(permanent_allocator(), column_count); + auto v_rows = slice_make(permanent_allocator(), column_count); + + unsigned total_count = cast(unsigned)matrix_type_total_elems(mt); + LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); + + LLVMValueRef lhs_ptr = lb_address_from_load_or_generate_local(p, lhs).value; + LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, lhs_ptr, LLVMPointerType(total_matrix_type, 0), ""); + LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, ""); + + + for (unsigned column_index = 0; column_index < column_count; column_index++) { + LLVMValueRef mask = llvm_mask_iota(p->module, stride*column_index, row_count); + LLVMValueRef column = LLVMBuildShuffleVector(p->builder, matrix_vector, LLVMGetUndef(LLVMTypeOf(matrix_vector)), mask, ""); + m_columns[column_index] = column; + } + + for (unsigned row_index = 0; row_index < column_count; row_index++) { + LLVMValueRef value = lb_emit_struct_ev(p, rhs, row_index).value; + LLVMValueRef row = llvm_splat(p, value, row_count); + v_rows[row_index] = row; + } + + GB_ASSERT(column_count > 0); + + LLVMValueRef vector = nullptr; + if (is_type_float(elem)) { + for (i64 i = 0; i < column_count; i++) { + LLVMValueRef product = LLVMBuildFMul(p->builder, m_columns[i], v_rows[i], ""); + if (i == 0) { + vector = product; + } else { + vector = LLVMBuildFAdd(p->builder, vector, product, ""); + } + } + } else { + for (i64 i = 0; i < column_count; i++) { + LLVMValueRef product = LLVMBuildMul(p->builder, m_columns[i], v_rows[i], ""); + if (i == 0) { + vector = product; + } else { + vector = LLVMBuildAdd(p->builder, vector, product, ""); + } + } + } + + lbAddr res = lb_add_local_generated(p, type, true); + LLVMValueRef res_ptr = res.addr.value; + unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(LLVMTypeOf(vector))); + LLVMSetAlignment(res_ptr, alignment); + + res_ptr = LLVMBuildPointerCast(p->builder, res_ptr, LLVMPointerType(LLVMTypeOf(vector), 0), ""); + LLVMBuildStore(p->builder, vector, res_ptr); + + return lb_addr_load(p, res); + } lbAddr res = lb_add_local_generated(p, type, true); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index fb9264661..56637e907 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1512,4 +1512,36 @@ lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValue c, Type *t lbValue y = lb_emit_arith(p, Token_Add, x, c, t); return y; } +} + +LLVMValueRef llvm_mask_iota(lbModule *m, unsigned start, unsigned count) { + auto iota = slice_make(temporary_allocator(), count); + for (unsigned i = 0; i < count; i++) { + iota[i] = lb_const_int(m, t_u32, start+i).value; + } + return LLVMConstVector(iota.data, count); +} + +LLVMValueRef llvm_mask_zero(lbModule *m, unsigned count) { + return LLVMConstNull(LLVMVectorType(lb_type(m, t_u32), count)); +} + +LLVMValueRef llvm_splat(lbProcedure *p, LLVMValueRef value, unsigned count) { + GB_ASSERT(count > 0); + if (LLVMIsConstant(value)) { + LLVMValueRef single = LLVMConstVector(&value, 1); + if (count == 1) { + return single; + } + LLVMValueRef mask = llvm_mask_zero(p->module, count); + return LLVMConstShuffleVector(single, LLVMGetUndef(LLVMTypeOf(single)), mask); + } + + LLVMTypeRef single_type = LLVMVectorType(LLVMTypeOf(value), 1); + LLVMValueRef single = LLVMBuildBitCast(p->builder, value, single_type, ""); + if (count == 1) { + return single; + } + LLVMValueRef mask = llvm_mask_zero(p->module, count); + return LLVMBuildShuffleVector(p->builder, single, LLVMGetUndef(LLVMTypeOf(single)), mask, ""); } \ No newline at end of file From 1bfbed0e02b4cd947acf9693f09016ec609356e1 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 12:48:48 +0100 Subject: [PATCH 18/47] Add `llvm_vector_reduce_add` --- src/llvm_backend_expr.cpp | 3 ++- src/llvm_backend_utility.cpp | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 6cb221a94..18d5e267b 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -619,9 +619,10 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type Type *elem = mt->Matrix.elem; LLVMTypeRef elem_type = lb_type(p->module, elem); - unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); if (lb_matrix_elem_simple(mt)) { + unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); + unsigned row_count = cast(unsigned)mt->Matrix.row_count; gb_unused(row_count); unsigned column_count = cast(unsigned)mt->Matrix.column_count; auto m_columns = slice_make(permanent_allocator(), column_count); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 56637e907..b07dc3459 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1544,4 +1544,45 @@ LLVMValueRef llvm_splat(lbProcedure *p, LLVMValueRef value, unsigned count) { } LLVMValueRef mask = llvm_mask_zero(p->module, count); return LLVMBuildShuffleVector(p->builder, single, LLVMGetUndef(LLVMTypeOf(single)), mask, ""); +} + +LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) { + LLVMTypeRef type = LLVMTypeOf(value); + GB_ASSERT(LLVMGetTypeKind(type) == LLVMVectorTypeKind); + LLVMTypeRef elem = LLVMGetElementType(type); + + char const *name = nullptr; + i32 value_offset = 0; + i32 value_count = 0; + + switch (LLVMGetTypeKind(elem)) { + case LLVMHalfTypeKind: + case LLVMFloatTypeKind: + case LLVMDoubleTypeKind: + name = "llvm.vector.reduce.fadd"; + value_offset = 0; + value_count = 2; + break; + case LLVMIntegerTypeKind: + name = "llvm.vector.reduce.add"; + value_offset = 1; + value_count = 1; + break; + default: + GB_PANIC("invalid vector type %s", LLVMPrintTypeToString(type)); + break; + } + + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + + LLVMTypeRef types[1] = {}; + types[0] = elem; + + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(p->module->mod, id, types, gb_count_of(types)); + LLVMValueRef values[2] = {}; + values[0] = LLVMConstNull(elem); + values[1] = value; + LLVMValueRef call = LLVMBuildCall(p->builder, ip, values+value_offset, value_count, ""); + return call; } \ No newline at end of file From 9e43072113782a96a3ab14a63c1d5cfc9a99b881 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 13:11:33 +0100 Subject: [PATCH 19/47] Make `lb_emit_vector_mul_matrix` use SIMD if possible --- src/llvm_backend_expr.cpp | 74 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 18d5e267b..c0a7a9edf 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -619,11 +619,10 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type Type *elem = mt->Matrix.elem; LLVMTypeRef elem_type = lb_type(p->module, elem); - if (lb_matrix_elem_simple(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); - unsigned row_count = cast(unsigned)mt->Matrix.row_count; gb_unused(row_count); + unsigned row_count = cast(unsigned)mt->Matrix.row_count; unsigned column_count = cast(unsigned)mt->Matrix.column_count; auto m_columns = slice_make(permanent_allocator(), column_count); auto v_rows = slice_make(permanent_allocator(), column_count); @@ -709,10 +708,79 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type i64 vector_count = get_array_type_count(vt); - GB_ASSERT(mt->Matrix.row_count == vector_count); + GB_ASSERT(vector_count == mt->Matrix.row_count); GB_ASSERT(are_types_identical(mt->Matrix.elem, base_array_type(vt))); Type *elem = mt->Matrix.elem; + LLVMTypeRef elem_type = lb_type(p->module, elem); + + if (lb_matrix_elem_simple(mt)) { + unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); + + unsigned row_count = cast(unsigned)mt->Matrix.row_count; + unsigned column_count = cast(unsigned)mt->Matrix.column_count; gb_unused(column_count); + auto m_columns = slice_make(permanent_allocator(), row_count); + auto v_rows = slice_make(permanent_allocator(), row_count); + + unsigned total_count = cast(unsigned)matrix_type_total_elems(mt); + LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); + + LLVMValueRef matrix_ptr = lb_address_from_load_or_generate_local(p, rhs).value; + LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, matrix_ptr, LLVMPointerType(total_matrix_type, 0), ""); + LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, ""); + + for (unsigned row_index = 0; row_index < row_count; row_index++) { + auto mask_elems = slice_make(temporary_allocator(), column_count); + for (unsigned column_index = 0; column_index < column_count; column_index++) { + unsigned offset = row_index + column_index*stride; + mask_elems[column_index] = lb_const_int(p->module, t_u32, offset).value; + } + + // transpose mask + LLVMValueRef mask = LLVMConstVector(mask_elems.data, column_count); + LLVMValueRef column = LLVMBuildShuffleVector(p->builder, matrix_vector, LLVMGetUndef(LLVMTypeOf(matrix_vector)), mask, ""); + m_columns[row_index] = column; + } + + for (unsigned column_index = 0; column_index < row_count; column_index++) { + LLVMValueRef value = lb_emit_struct_ev(p, lhs, column_index).value; + LLVMValueRef row = llvm_splat(p, value, column_count); + v_rows[column_index] = row; + } + + GB_ASSERT(row_count > 0); + + LLVMValueRef vector = nullptr; + if (is_type_float(elem)) { + for (i64 i = 0; i < row_count; i++) { + LLVMValueRef product = LLVMBuildFMul(p->builder, v_rows[i], m_columns[i], ""); + if (i == 0) { + vector = product; + } else { + vector = LLVMBuildFAdd(p->builder, vector, product, ""); + } + } + } else { + for (i64 i = 0; i < row_count; i++) { + LLVMValueRef product = LLVMBuildMul(p->builder, v_rows[i], m_columns[i], ""); + if (i == 0) { + vector = product; + } else { + vector = LLVMBuildAdd(p->builder, vector, product, ""); + } + } + } + + lbAddr res = lb_add_local_generated(p, type, true); + LLVMValueRef res_ptr = res.addr.value; + unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(LLVMTypeOf(vector))); + LLVMSetAlignment(res_ptr, alignment); + + res_ptr = LLVMBuildPointerCast(p->builder, res_ptr, LLVMPointerType(LLVMTypeOf(vector), 0), ""); + LLVMBuildStore(p->builder, vector, res_ptr); + + return lb_addr_load(p, res); + } lbAddr res = lb_add_local_generated(p, type, true); From d0d9a3a4f4f3b4bc528c73ffcecb31d3eb4162a7 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 14:49:20 +0100 Subject: [PATCH 20/47] Make `lb_emit_matrix_mul` SIMD if possible --- src/llvm_backend_expr.cpp | 144 ++++++++++++++++++++--------------- src/llvm_backend_utility.cpp | 29 ++++++- 2 files changed, 110 insertions(+), 63 deletions(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index c0a7a9edf..22e66c147 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -557,6 +557,20 @@ lbValue lb_emit_outer_product(lbProcedure *p, lbValue a, lbValue b, Type *type) } +LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { + Type *mt = base_type(matrix.type); + GB_ASSERT(mt->kind == Type_Matrix); + LLVMTypeRef elem_type = lb_type(p->module, mt->Matrix.elem); + + unsigned total_count = cast(unsigned)matrix_type_total_elems(mt); + LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); + + LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value; + LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(total_matrix_type, 0), ""); + LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, ""); + return matrix_vector; +} + lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { Type *xt = base_type(lhs.type); Type *yt = base_type(rhs.type); @@ -567,31 +581,72 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count); GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem)); + Type *elem = xt->Matrix.elem; + + unsigned outer_rows = cast(unsigned)xt->Matrix.row_count; + unsigned inner = cast(unsigned)xt->Matrix.column_count; + unsigned outer_columns = cast(unsigned)yt->Matrix.column_count; + if (lb_matrix_elem_simple(xt)) { - // TODO(bill): SIMD version + unsigned x_stride = cast(unsigned)matrix_type_stride_in_elems(xt); + unsigned y_stride = cast(unsigned)matrix_type_stride_in_elems(yt); + + auto x_rows = slice_make(permanent_allocator(), outer_rows); + auto y_columns = slice_make(permanent_allocator(), outer_columns); + + + LLVMValueRef x_vector = lb_matrix_to_vector(p, lhs); + LLVMValueRef y_vector = lb_matrix_to_vector(p, rhs); + + for (unsigned i = 0; i < outer_rows; i++) { + auto mask_elems = slice_make(temporary_allocator(), inner); + for (unsigned j = 0; j < inner; j++) { + unsigned offset = x_stride*j + i; + mask_elems[j] = lb_const_int(p->module, t_u32, offset).value; + } + + // transpose mask + LLVMValueRef mask = LLVMConstVector(mask_elems.data, inner); + LLVMValueRef row = LLVMBuildShuffleVector(p->builder, x_vector, LLVMGetUndef(LLVMTypeOf(x_vector)), mask, ""); + x_rows[i] = row; + } + + for (unsigned i = 0; i < outer_columns; i++) { + LLVMValueRef mask = llvm_mask_iota(p->module, y_stride*i, inner); + LLVMValueRef column = LLVMBuildShuffleVector(p->builder, y_vector, LLVMGetUndef(LLVMTypeOf(y_vector)), mask, ""); + y_columns[i] = column; + } + + + + lbAddr res = lb_add_local_generated(p, type, true); + for_array(i, x_rows) { + LLVMValueRef x_row = x_rows[i]; + for_array(j, y_columns) { + LLVMValueRef y_column = y_columns[j]; + LLVMValueRef elem = llvm_vector_dot(p, x_row, y_column); + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + LLVMBuildStore(p->builder, elem, dst.value); + } + } + return lb_addr_load(p, res); } { - Type *elem = xt->Matrix.elem; - lbAddr res = lb_add_local_generated(p, type, true); - i64 outer_rows = xt->Matrix.row_count; - i64 inner = xt->Matrix.column_count; - i64 outer_columns = yt->Matrix.column_count; - auto inners = slice_make(permanent_allocator(), inner); - for (i64 j = 0; j < outer_columns; j++) { - for (i64 i = 0; i < outer_rows; i++) { + for (unsigned j = 0; j < outer_columns; j++) { + for (unsigned i = 0; i < outer_rows; i++) { lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); - for (i64 k = 0; k < inner; k++) { + for (unsigned k = 0; k < inner; k++) { inners[k][0] = lb_emit_matrix_ev(p, lhs, i, k); inners[k][1] = lb_emit_matrix_ev(p, rhs, k, j); } - lbValue sum = lb_emit_load(p, dst); - for (i64 k = 0; k < inner; k++) { + lbValue sum = lb_const_nil(p->module, elem); + for (unsigned k = 0; k < inner; k++) { lbValue a = inners[k][0]; lbValue b = inners[k][1]; sum = lb_emit_mul_add(p, a, b, sum, elem); @@ -617,7 +672,6 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type GB_ASSERT(are_types_identical(mt->Matrix.elem, base_array_type(vt))); Type *elem = mt->Matrix.elem; - LLVMTypeRef elem_type = lb_type(p->module, elem); if (lb_matrix_elem_simple(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); @@ -627,13 +681,7 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type auto m_columns = slice_make(permanent_allocator(), column_count); auto v_rows = slice_make(permanent_allocator(), column_count); - unsigned total_count = cast(unsigned)matrix_type_total_elems(mt); - LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); - - LLVMValueRef lhs_ptr = lb_address_from_load_or_generate_local(p, lhs).value; - LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, lhs_ptr, LLVMPointerType(total_matrix_type, 0), ""); - LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, ""); - + LLVMValueRef matrix_vector = lb_matrix_to_vector(p, lhs); for (unsigned column_index = 0; column_index < column_count; column_index++) { LLVMValueRef mask = llvm_mask_iota(p->module, stride*column_index, row_count); @@ -650,23 +698,12 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type GB_ASSERT(column_count > 0); LLVMValueRef vector = nullptr; - if (is_type_float(elem)) { - for (i64 i = 0; i < column_count; i++) { - LLVMValueRef product = LLVMBuildFMul(p->builder, m_columns[i], v_rows[i], ""); - if (i == 0) { - vector = product; - } else { - vector = LLVMBuildFAdd(p->builder, vector, product, ""); - } - } - } else { - for (i64 i = 0; i < column_count; i++) { - LLVMValueRef product = LLVMBuildMul(p->builder, m_columns[i], v_rows[i], ""); - if (i == 0) { - vector = product; - } else { - vector = LLVMBuildAdd(p->builder, vector, product, ""); - } + for (i64 i = 0; i < column_count; i++) { + LLVMValueRef product = llvm_vector_mul(p, m_columns[i], v_rows[i]); + if (i == 0) { + vector = product; + } else { + vector = llvm_vector_add(p, vector, product); } } @@ -712,7 +749,6 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type GB_ASSERT(are_types_identical(mt->Matrix.elem, base_array_type(vt))); Type *elem = mt->Matrix.elem; - LLVMTypeRef elem_type = lb_type(p->module, elem); if (lb_matrix_elem_simple(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); @@ -722,13 +758,8 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type auto m_columns = slice_make(permanent_allocator(), row_count); auto v_rows = slice_make(permanent_allocator(), row_count); - unsigned total_count = cast(unsigned)matrix_type_total_elems(mt); - LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); - - LLVMValueRef matrix_ptr = lb_address_from_load_or_generate_local(p, rhs).value; - LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, matrix_ptr, LLVMPointerType(total_matrix_type, 0), ""); - LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, ""); - + LLVMValueRef matrix_vector = lb_matrix_to_vector(p, rhs); + for (unsigned row_index = 0; row_index < row_count; row_index++) { auto mask_elems = slice_make(temporary_allocator(), column_count); for (unsigned column_index = 0; column_index < column_count; column_index++) { @@ -751,23 +782,12 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type GB_ASSERT(row_count > 0); LLVMValueRef vector = nullptr; - if (is_type_float(elem)) { - for (i64 i = 0; i < row_count; i++) { - LLVMValueRef product = LLVMBuildFMul(p->builder, v_rows[i], m_columns[i], ""); - if (i == 0) { - vector = product; - } else { - vector = LLVMBuildFAdd(p->builder, vector, product, ""); - } - } - } else { - for (i64 i = 0; i < row_count; i++) { - LLVMValueRef product = LLVMBuildMul(p->builder, v_rows[i], m_columns[i], ""); - if (i == 0) { - vector = product; - } else { - vector = LLVMBuildAdd(p->builder, vector, product, ""); - } + for (i64 i = 0; i < row_count; i++) { + LLVMValueRef product = llvm_vector_mul(p, v_rows[i], m_columns[i]); + if (i == 0) { + vector = product; + } else { + vector = llvm_vector_add(p, vector, product); } } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index b07dc3459..6754ce798 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1577,7 +1577,7 @@ LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) { GB_ASSERT_MSG(id != 0, "Unable to find %s", name); LLVMTypeRef types[1] = {}; - types[0] = elem; + types[0] = type; LLVMValueRef ip = LLVMGetIntrinsicDeclaration(p->module->mod, id, types, gb_count_of(types)); LLVMValueRef values[2] = {}; @@ -1585,4 +1585,31 @@ LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) { values[1] = value; LLVMValueRef call = LLVMBuildCall(p->builder, ip, values+value_offset, value_count, ""); return call; +} + +LLVMValueRef llvm_vector_add(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { + GB_ASSERT(LLVMTypeOf(a) == LLVMTypeOf(b)); + + LLVMTypeRef elem = LLVMGetElementType(LLVMTypeOf(a)); + + if (LLVMGetTypeKind(elem) == LLVMIntegerTypeKind) { + return LLVMBuildAdd(p->builder, a, b, ""); + } + return LLVMBuildFAdd(p->builder, a, b, ""); +} + +LLVMValueRef llvm_vector_mul(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { + GB_ASSERT(LLVMTypeOf(a) == LLVMTypeOf(b)); + + LLVMTypeRef elem = LLVMGetElementType(LLVMTypeOf(a)); + + if (LLVMGetTypeKind(elem) == LLVMIntegerTypeKind) { + return LLVMBuildMul(p->builder, a, b, ""); + } + return LLVMBuildFMul(p->builder, a, b, ""); +} + + +LLVMValueRef llvm_vector_dot(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { + return llvm_vector_reduce_add(p, llvm_vector_mul(p, a, b)); } \ No newline at end of file From e8c602b98f60f5cafd2ce28a059fd483a54b1716 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 15:02:38 +0100 Subject: [PATCH 21/47] Correct vulkan headers --- vendor/vulkan/_gen/create_vulkan_odin_wrapper.py | 4 ++++ vendor/vulkan/enums.odin | 2 ++ vendor/vulkan/procedures.odin | 2 ++ vendor/vulkan/structs.odin | 2 ++ 4 files changed, 10 insertions(+) diff --git a/vendor/vulkan/_gen/create_vulkan_odin_wrapper.py b/vendor/vulkan/_gen/create_vulkan_odin_wrapper.py index 6ea2c3717..1525f4e15 100644 --- a/vendor/vulkan/_gen/create_vulkan_odin_wrapper.py +++ b/vendor/vulkan/_gen/create_vulkan_odin_wrapper.py @@ -262,6 +262,7 @@ def parse_constants(f): def parse_enums(f): + f.write("import \"core:c\"\n\n") f.write("// Enums\n") data = re.findall(r"typedef enum Vk(\w+) {(.+?)} \w+;", src, re.S) @@ -467,6 +468,7 @@ def parse_procedures(f): max_len = max(len(n) for n, t in ff) + f.write("import \"core:c\"\n\n") f.write("// Procedure Types\n\n"); for n, t in ff: f.write("{} :: #type {}\n".format(n.ljust(max_len), t.replace('"c"', '"system"'))) @@ -587,6 +589,8 @@ MAX_GLOBAL_PRIORITY_SIZE_EXT :: 16 with open("../structs.odin", 'w', encoding='utf-8') as f: f.write(BASE) f.write(""" +import "core:c" + when ODIN_OS == "windows" { \timport win32 "core:sys/windows" diff --git a/vendor/vulkan/enums.odin b/vendor/vulkan/enums.odin index d468e7fa1..be6691ab4 100644 --- a/vendor/vulkan/enums.odin +++ b/vendor/vulkan/enums.odin @@ -3,6 +3,8 @@ // package vulkan +import "core:c" + // Enums AccelerationStructureBuildTypeKHR :: enum c.int { HOST = 0, diff --git a/vendor/vulkan/procedures.odin b/vendor/vulkan/procedures.odin index f585215e4..b40523b6d 100644 --- a/vendor/vulkan/procedures.odin +++ b/vendor/vulkan/procedures.odin @@ -3,6 +3,8 @@ // package vulkan +import "core:c" + // Procedure Types ProcAllocationFunction :: #type proc "system" (pUserData: rawptr, size: int, alignment: int, allocationScope: SystemAllocationScope) -> rawptr diff --git a/vendor/vulkan/structs.odin b/vendor/vulkan/structs.odin index ece398cde..4d90a53fa 100644 --- a/vendor/vulkan/structs.odin +++ b/vendor/vulkan/structs.odin @@ -3,6 +3,8 @@ // package vulkan +import "core:c" + when ODIN_OS == "windows" { import win32 "core:sys/windows" From cdf881a3787c0649f2da66a954667c6779bfdb68 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 15:02:55 +0100 Subject: [PATCH 22/47] Fix json.marshal for Matrix --- core/encoding/json/marshal.odin | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index d2d908440..0c95df924 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -160,6 +160,9 @@ marshal_to_writer :: proc(w: io.Writer, v: any) -> (err: Marshal_Error) { case runtime.Type_Info_Relative_Slice: return .Unsupported_Type + + case runtime.Type_Info_Matrix: + return .Unsupported_Type case runtime.Type_Info_Array: io.write_byte(w, '[') or_return From 465c87bd5a38488ae7b177a10ecf93f05ec18e9d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 15:22:02 +0100 Subject: [PATCH 23/47] Make `transpose` use SIMD if possible --- src/llvm_backend_expr.cpp | 73 ++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 22e66c147..c1bdceba6 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -495,21 +495,70 @@ bool lb_matrix_elem_simple(Type *t) { case Basic_f16le: case Basic_f16be: // TODO(bill): determine when this is fine - return false; + return true; } } return true; } + +LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { + Type *mt = base_type(matrix.type); + GB_ASSERT(mt->kind == Type_Matrix); + LLVMTypeRef elem_type = lb_type(p->module, mt->Matrix.elem); + + unsigned total_count = cast(unsigned)matrix_type_total_elems(mt); + LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); + + LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value; + LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(total_matrix_type, 0), ""); + LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, ""); + return matrix_vector; +} + lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { if (is_type_array(m.type)) { + // no-op m.type = type; return m; } Type *mt = base_type(m.type); GB_ASSERT(mt->kind == Type_Matrix); + if (lb_matrix_elem_simple(mt)) { + unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); + unsigned row_count = cast(unsigned)mt->Matrix.row_count; + unsigned column_count = cast(unsigned)mt->Matrix.column_count; + + auto rows = slice_make(permanent_allocator(), row_count); + auto mask_elems = slice_make(permanent_allocator(), column_count); + + LLVMValueRef vector = lb_matrix_to_vector(p, m); + for (unsigned i = 0; i < row_count; i++) { + for (unsigned j = 0; j < column_count; j++) { + unsigned offset = stride*j + i; + mask_elems[j] = lb_const_int(p->module, t_u32, offset).value; + } + + // transpose mask + LLVMValueRef mask = LLVMConstVector(mask_elems.data, column_count); + LLVMValueRef row = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, ""); + rows[i] = row; + } + + lbAddr res = lb_add_local_generated(p, type, true); + for_array(i, rows) { + LLVMValueRef row = rows[i]; + lbValue dst_row_ptr = lb_emit_matrix_epi(p, res.addr, 0, i); + LLVMValueRef ptr = dst_row_ptr.value; + ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(LLVMTypeOf(row), 0), ""); + LLVMBuildStore(p->builder, row, ptr); + } + + return lb_addr_load(p, res); + } + lbAddr res = lb_add_local_generated(p, type, true); i64 row_count = mt->Matrix.row_count; @@ -556,21 +605,6 @@ lbValue lb_emit_outer_product(lbProcedure *p, lbValue a, lbValue b, Type *type) } - -LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { - Type *mt = base_type(matrix.type); - GB_ASSERT(mt->kind == Type_Matrix); - LLVMTypeRef elem_type = lb_type(p->module, mt->Matrix.elem); - - unsigned total_count = cast(unsigned)matrix_type_total_elems(mt); - LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); - - LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value; - LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(total_matrix_type, 0), ""); - LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, ""); - return matrix_vector; -} - lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { Type *xt = base_type(lhs.type); Type *yt = base_type(rhs.type); @@ -594,12 +628,11 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) auto x_rows = slice_make(permanent_allocator(), outer_rows); auto y_columns = slice_make(permanent_allocator(), outer_columns); - LLVMValueRef x_vector = lb_matrix_to_vector(p, lhs); LLVMValueRef y_vector = lb_matrix_to_vector(p, rhs); + auto mask_elems = slice_make(permanent_allocator(), inner); for (unsigned i = 0; i < outer_rows; i++) { - auto mask_elems = slice_make(temporary_allocator(), inner); for (unsigned j = 0; j < inner; j++) { unsigned offset = x_stride*j + i; mask_elems[j] = lb_const_int(p->module, t_u32, offset).value; @@ -616,8 +649,6 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) LLVMValueRef column = LLVMBuildShuffleVector(p->builder, y_vector, LLVMGetUndef(LLVMTypeOf(y_vector)), mask, ""); y_columns[i] = column; } - - lbAddr res = lb_add_local_generated(p, type, true); for_array(i, x_rows) { @@ -760,8 +791,8 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type LLVMValueRef matrix_vector = lb_matrix_to_vector(p, rhs); + auto mask_elems = slice_make(permanent_allocator(), column_count); for (unsigned row_index = 0; row_index < row_count; row_index++) { - auto mask_elems = slice_make(temporary_allocator(), column_count); for (unsigned column_index = 0; column_index < column_count; column_index++) { unsigned offset = row_index + column_index*stride; mask_elems[column_index] = lb_const_int(p->module, t_u32, offset).value; From d3abc1a2b4fe024fed5f2b9f5371fc2b7fb029be Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 15:33:23 +0100 Subject: [PATCH 24/47] Add `matrix_flatten` - `matrix[R, C]T` -> `[R*C]T` --- src/check_builtin.cpp | 30 ++++++++++++++ src/checker_builtin_procs.hpp | 2 + src/llvm_backend_expr.cpp | 77 +++++++++++++++++++++++++++++++---- src/llvm_backend_proc.cpp | 6 +++ 4 files changed, 106 insertions(+), 9 deletions(-) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index a9427d4e0..b60509c03 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2131,6 +2131,36 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 break; } + case BuiltinProc_matrix_flatten: { + Operand x = {}; + check_expr(c, &x, ce->args[0]); + if (x.mode == Addressing_Invalid) { + return false; + } + if (!is_operand_value(x)) { + error(call, "'%.*s' expects a matrix or array", LIT(builtin_name)); + return false; + } + Type *t = base_type(x.type); + if (!is_type_matrix(t) && !is_type_array(t)) { + gbString s = type_to_string(x.type); + error(call, "'%.*s' expects a matrix or array, got %s", LIT(builtin_name), s); + gb_string_free(s); + return false; + } + + operand->mode = Addressing_Value; + if (is_type_array(t)) { + // Do nothing + operand->type = x.type; + } else { + GB_ASSERT(t->kind == Type_Matrix); + operand->type = alloc_type_array(t->Matrix.elem, t->Matrix.row_count*t->Matrix.column_count); + } + operand->type = check_matrix_type_hint(operand->type, type_hint); + break; + } + case BuiltinProc_simd_vector: { Operand x = {}; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index de4e99d14..5594c1a1a 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -38,6 +38,7 @@ enum BuiltinProcId { BuiltinProc_transpose, BuiltinProc_outer_product, BuiltinProc_hadamard_product, + BuiltinProc_matrix_flatten, BuiltinProc_DIRECTIVE, // NOTE(bill): This is used for specialized hash-prefixed procedures @@ -282,6 +283,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("outer_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_builtin}, + {STR_LIT("matrix_flatten"), 1, false, Expr_Expr, BuiltinProcPkg_builtin}, {STR_LIT(""), 0, true, Expr_Expr, BuiltinProcPkg_builtin}, // DIRECTIVE diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index c1bdceba6..7d1c8e3db 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -517,6 +517,33 @@ LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { return matrix_vector; } +LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) { + Type *mt = base_type(m.type); + GB_ASSERT(mt->kind == Type_Matrix); + + unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); + unsigned row_count = cast(unsigned)mt->Matrix.row_count; + unsigned column_count = cast(unsigned)mt->Matrix.column_count; + + auto columns = slice_make(permanent_allocator(), column_count); + + LLVMValueRef vector = lb_matrix_to_vector(p, m); + + unsigned mask_elems_index = 0; + auto mask_elems = slice_make(permanent_allocator(), row_count*column_count); + for (unsigned j = 0; j < column_count; j++) { + for (unsigned i = 0; i < row_count; i++) { + unsigned offset = stride*j + i; + mask_elems[mask_elems_index++] = lb_const_int(p->module, t_u32, offset).value; + } + } + + LLVMValueRef mask = LLVMConstVector(mask_elems.data, cast(unsigned)mask_elems.count); + LLVMValueRef trimmed_vector = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, ""); + return trimmed_vector; +} + + lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { if (is_type_array(m.type)) { // no-op @@ -573,6 +600,46 @@ lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { return lb_addr_load(p, res); } +lbValue lb_matrix_cast_vector_to_type(lbProcedure *p, LLVMValueRef vector, Type *type) { + lbAddr res = lb_add_local_generated(p, type, true); + LLVMValueRef res_ptr = res.addr.value; + unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(LLVMTypeOf(vector))); + LLVMSetAlignment(res_ptr, alignment); + + res_ptr = LLVMBuildPointerCast(p->builder, res_ptr, LLVMPointerType(LLVMTypeOf(vector), 0), ""); + LLVMBuildStore(p->builder, vector, res_ptr); + + return lb_addr_load(p, res); +} + +lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type) { + if (is_type_array(m.type)) { + // no-op + m.type = type; + return m; + } + Type *mt = base_type(m.type); + GB_ASSERT(mt->kind == Type_Matrix); + + if (lb_matrix_elem_simple(mt)) { + LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m); + return lb_matrix_cast_vector_to_type(p, vector, type); + } + + lbAddr res = lb_add_local_generated(p, type, true); + + i64 row_count = mt->Matrix.row_count; + i64 column_count = mt->Matrix.column_count; + for (i64 j = 0; j < column_count; j++) { + for (i64 i = 0; i < row_count; i++) { + lbValue src = lb_emit_matrix_ev(p, m, i, j); + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + lb_emit_store(p, dst, src); + } + } + return lb_addr_load(p, res); +} + lbValue lb_emit_outer_product(lbProcedure *p, lbValue a, lbValue b, Type *type) { Type *mt = base_type(type); @@ -737,16 +804,8 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type vector = llvm_vector_add(p, vector, product); } } - - lbAddr res = lb_add_local_generated(p, type, true); - LLVMValueRef res_ptr = res.addr.value; - unsigned alignment = cast(unsigned)gb_max(type_align_of(type), lb_alignof(LLVMTypeOf(vector))); - LLVMSetAlignment(res_ptr, alignment); - res_ptr = LLVMBuildPointerCast(p->builder, res_ptr, LLVMPointerType(LLVMTypeOf(vector), 0), ""); - LLVMBuildStore(p->builder, vector, res_ptr); - - return lb_addr_load(p, res); + return lb_matrix_cast_vector_to_type(p, vector, type); } lbAddr res = lb_add_local_generated(p, type, true); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index da4e4ad28..8686b3262 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1280,6 +1280,12 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, GB_ASSERT(is_type_matrix(tv.type)); return lb_emit_arith_matrix(p, Token_Mul, a, b, tv.type, true); } + + case BuiltinProc_matrix_flatten: + { + lbValue m = lb_build_expr(p, ce->args[0]); + return lb_emit_matrix_flatten(p, m, tv.type); + } // "Intrinsics" From 30c141ceb98d4b65418fb70c572f86cd701dd872 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 15:36:24 +0100 Subject: [PATCH 25/47] Minor clean up for `lb_matrix_trimmed_vector_mask` --- src/llvm_backend_expr.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 7d1c8e3db..beb860383 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -517,18 +517,14 @@ LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { return matrix_vector; } -LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) { - Type *mt = base_type(m.type); +LLVMValueRef lb_matrix_trimmed_vector_mask(lbProcedure *p, Type *mt) { + mt = base_type(mt); GB_ASSERT(mt->kind == Type_Matrix); unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; unsigned column_count = cast(unsigned)mt->Matrix.column_count; - auto columns = slice_make(permanent_allocator(), column_count); - - LLVMValueRef vector = lb_matrix_to_vector(p, m); - unsigned mask_elems_index = 0; auto mask_elems = slice_make(permanent_allocator(), row_count*column_count); for (unsigned j = 0; j < column_count; j++) { @@ -539,6 +535,12 @@ LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) { } LLVMValueRef mask = LLVMConstVector(mask_elems.data, cast(unsigned)mask_elems.count); + return mask; +} + +LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) { + LLVMValueRef vector = lb_matrix_to_vector(p, m); + LLVMValueRef mask = lb_matrix_trimmed_vector_mask(p, m.type); LLVMValueRef trimmed_vector = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, ""); return trimmed_vector; } From 3e4c2e49320b6ddd905b38fc884ec47aa8da7748 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 16:03:16 +0100 Subject: [PATCH 26/47] Support `conj` on array and matrix types --- src/check_builtin.cpp | 13 +++++-- src/llvm_backend_proc.cpp | 77 ++++++++++++++++++++++++++------------- 2 files changed, 62 insertions(+), 28 deletions(-) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index b60509c03..7dc4784f8 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1266,7 +1266,10 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 case BuiltinProc_conj: { // conj :: proc(x: type) -> type Operand *x = operand; - if (is_type_complex(x->type)) { + Type *t = x->type; + Type *elem = core_array_type(t); + + if (is_type_complex(t)) { if (x->mode == Addressing_Constant) { ExactValue v = exact_value_to_complex(x->value); f64 r = v.value_complex->real; @@ -1276,7 +1279,7 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 } else { x->mode = Addressing_Value; } - } else if (is_type_quaternion(x->type)) { + } else if (is_type_quaternion(t)) { if (x->mode == Addressing_Constant) { ExactValue v = exact_value_to_quaternion(x->value); f64 r = +v.value_quaternion->real; @@ -1288,7 +1291,11 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 } else { x->mode = Addressing_Value; } - } else { + } else if (is_type_array_like(t) && (is_type_complex(elem) || is_type_quaternion(elem))) { + x->mode = Addressing_Value; + } else if (is_type_matrix(t) && (is_type_complex(elem) || is_type_quaternion(elem))) { + x->mode = Addressing_Value; + }else { gbString s = type_to_string(x->type); error(call, "Expected a complex or quaternion, got '%s'", s); gb_string_free(s); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 8686b3262..72ba3982c 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -725,6 +725,57 @@ lbValue lb_emit_runtime_call(lbProcedure *p, char const *c_name, Array return lb_emit_call(p, proc, args); } +lbValue lb_emit_conjugate(lbProcedure *p, lbValue val, Type *type) { + lbValue res = {}; + Type *t = val.type; + if (is_type_complex(t)) { + res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, false)); + lbValue real = lb_emit_struct_ev(p, val, 0); + lbValue imag = lb_emit_struct_ev(p, val, 1); + imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type); + lb_emit_store(p, lb_emit_struct_ep(p, res, 0), real); + lb_emit_store(p, lb_emit_struct_ep(p, res, 1), imag); + } else if (is_type_quaternion(t)) { + // @QuaternionLayout + res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, false)); + lbValue real = lb_emit_struct_ev(p, val, 3); + lbValue imag = lb_emit_struct_ev(p, val, 0); + lbValue jmag = lb_emit_struct_ev(p, val, 1); + lbValue kmag = lb_emit_struct_ev(p, val, 2); + imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type); + jmag = lb_emit_unary_arith(p, Token_Sub, jmag, jmag.type); + kmag = lb_emit_unary_arith(p, Token_Sub, kmag, kmag.type); + lb_emit_store(p, lb_emit_struct_ep(p, res, 3), real); + lb_emit_store(p, lb_emit_struct_ep(p, res, 0), imag); + lb_emit_store(p, lb_emit_struct_ep(p, res, 1), jmag); + lb_emit_store(p, lb_emit_struct_ep(p, res, 2), kmag); + } else if (is_type_array_like(t)) { + res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, true)); + Type *elem_type = base_array_type(t); + i64 count = get_array_type_count(t); + for (i64 i = 0; i < count; i++) { + lbValue dst = lb_emit_array_epi(p, res, i); + lbValue elem = lb_emit_struct_ev(p, val, cast(i32)i); + elem = lb_emit_conjugate(p, elem, elem_type); + lb_emit_store(p, dst, elem); + } + } else if (is_type_matrix(t)) { + Type *mt = base_type(t); + GB_ASSERT(mt->kind == Type_Matrix); + Type *elem_type = mt->Matrix.elem; + res = lb_addr_get_ptr(p, lb_add_local_generated(p, type, true)); + for (i64 j = 0; j < mt->Matrix.column_count; j++) { + for (i64 i = 0; i < mt->Matrix.row_count; i++) { + lbValue dst = lb_emit_matrix_epi(p, res, i, j); + lbValue elem = lb_emit_matrix_ev(p, val, i, j); + elem = lb_emit_conjugate(p, elem, elem_type); + lb_emit_store(p, dst, elem); + } + } + } + return lb_emit_load(p, res); +} + lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining, bool use_copy_elision_hint) { lbModule *m = p->module; @@ -1117,31 +1168,7 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, case BuiltinProc_conj: { lbValue val = lb_build_expr(p, ce->args[0]); - lbValue res = {}; - Type *t = val.type; - if (is_type_complex(t)) { - res = lb_addr_get_ptr(p, lb_add_local_generated(p, tv.type, false)); - lbValue real = lb_emit_struct_ev(p, val, 0); - lbValue imag = lb_emit_struct_ev(p, val, 1); - imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type); - lb_emit_store(p, lb_emit_struct_ep(p, res, 0), real); - lb_emit_store(p, lb_emit_struct_ep(p, res, 1), imag); - } else if (is_type_quaternion(t)) { - // @QuaternionLayout - res = lb_addr_get_ptr(p, lb_add_local_generated(p, tv.type, false)); - lbValue real = lb_emit_struct_ev(p, val, 3); - lbValue imag = lb_emit_struct_ev(p, val, 0); - lbValue jmag = lb_emit_struct_ev(p, val, 1); - lbValue kmag = lb_emit_struct_ev(p, val, 2); - imag = lb_emit_unary_arith(p, Token_Sub, imag, imag.type); - jmag = lb_emit_unary_arith(p, Token_Sub, jmag, jmag.type); - kmag = lb_emit_unary_arith(p, Token_Sub, kmag, kmag.type); - lb_emit_store(p, lb_emit_struct_ep(p, res, 3), real); - lb_emit_store(p, lb_emit_struct_ep(p, res, 0), imag); - lb_emit_store(p, lb_emit_struct_ep(p, res, 1), jmag); - lb_emit_store(p, lb_emit_struct_ep(p, res, 2), kmag); - } - return lb_emit_load(p, res); + return lb_emit_conjugate(p, val, tv.type); } case BuiltinProc_expand_to_tuple: { From e6f725dc2c71d960defda3aa549b47e0cd043c70 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 17:00:14 +0100 Subject: [PATCH 27/47] Minor fix for parapoly matrix types --- src/check_builtin.cpp | 1 - src/llvm_backend_proc.cpp | 3 +-- src/types.cpp | 4 ++++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 7dc4784f8..1535dc2a2 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2168,7 +2168,6 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 break; } - case BuiltinProc_simd_vector: { Operand x = {}; Operand y = {}; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 72ba3982c..5623f75ec 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1313,8 +1313,7 @@ lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValue const &tv, lbValue m = lb_build_expr(p, ce->args[0]); return lb_emit_matrix_flatten(p, m, tv.type); } - - + // "Intrinsics" case BuiltinProc_alloca: diff --git a/src/types.cpp b/src/types.cpp index 32e26bcc6..716ebe31f 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1334,12 +1334,16 @@ i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) { } bool is_type_valid_for_matrix_elems(Type *t) { + t = base_type(t); if (is_type_integer(t)) { return true; } else if (is_type_float(t)) { return true; } else if (is_type_complex(t)) { return true; + } + if (t->kind == Type_Generic) { + return true; } return false; } From bb0855b35aca9e5ecef1c9d13abde14a358c66cc Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 17:00:59 +0100 Subject: [PATCH 28/47] Add builtin procedures for matrix values: `determinant`, `adjugate`, `inverse`, `inverse_transpose`, `hermitian_adjoint` --- core/runtime/core_builtin_matrix.odin | 317 ++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) create mode 100644 core/runtime/core_builtin_matrix.odin diff --git a/core/runtime/core_builtin_matrix.odin b/core/runtime/core_builtin_matrix.odin new file mode 100644 index 000000000..667c6f031 --- /dev/null +++ b/core/runtime/core_builtin_matrix.odin @@ -0,0 +1,317 @@ +package runtime + +import "core:intrinsics" +_ :: intrinsics + +@(builtin) +matrix1x1_determinant :: proc(m: $M/matrix[1, 1]$T) -> (det: T) { + return m[0, 0] +} + +@(builtin) +matrix2x2_determinant :: proc(m: $M/matrix[2, 2]$T) -> (det: T) { + return m[0, 0]*m[1, 1] - m[0, 1]*m[1, 0] +} +@(builtin) +matrix3x3_determinant :: proc(m: $M/matrix[3, 3]$T) -> (det: T) { + a := +m[0, 0] * (m[1, 1] * m[2, 2] - m[2, 1] * m[1, 2]) + b := -m[1, 0] * (m[0, 1] * m[2, 2] - m[2, 1] * m[0, 2]) + c := +m[2, 0] * (m[0, 1] * m[1, 2] - m[1, 1] * m[0, 2]) + return a + b + c +} +@(builtin) +matrix4x4_determinant :: proc(m: $M/matrix[4, 4]$T) -> (det: T) { + a := adjugate(m) + #no_bounds_check for i in 0..<4 { + det += m[0, i] * a[0, i] + } + return +} + +@(builtin) +matrix_determinant :: proc{ + matrix1x1_determinant, + matrix2x2_determinant, + matrix3x3_determinant, + matrix4x4_determinant, +} + +@(builtin) +determinant :: proc{ + matrix1x1_determinant, + matrix2x2_determinant, + matrix3x3_determinant, + matrix4x4_determinant, +} + + +@(builtin) +matrix1x1_adjugate :: proc(x: $M/matrix[1, 1]$T) -> (y: M) { + y = x + return +} + +@(builtin) +matrix2x2_adjugate :: proc(x: $M/matrix[2, 2]$T) -> (y: M) { + y[0, 0] = +x[1, 1] + y[0, 1] = -x[1, 0] + y[1, 0] = -x[0, 1] + y[1, 1] = +x[0, 0] + return +} + +@(builtin) +matrix3x3_adjugate :: proc(x: $M/matrix[3, 3]$T) -> (y: M) { + y[0, 0] = +(x[1, 1] * x[2, 2] - x[1, 2] * x[2, 1]) + y[0, 1] = -(x[1, 0] * x[2, 2] - x[1, 2] * x[2, 0]) + y[0, 2] = +(x[1, 0] * x[2, 1] - x[1, 1] * x[2, 0]) + y[1, 0] = -(x[0, 1] * x[2, 2] - x[0, 2] * x[2, 1]) + y[1, 1] = +(x[0, 0] * x[2, 2] - x[0, 2] * x[2, 0]) + y[1, 2] = -(x[0, 0] * x[2, 1] - x[0, 1] * x[2, 0]) + y[2, 0] = +(x[0, 1] * x[1, 2] - x[0, 2] * x[1, 1]) + y[2, 1] = -(x[0, 0] * x[1, 2] - x[0, 2] * x[1, 0]) + y[2, 2] = +(x[0, 0] * x[1, 1] - x[0, 1] * x[1, 0]) + return +} + +@(builtin) +matrix4x4_adjugate :: proc(x: $M/matrix[4, 4]$T) -> (y: M) { + minor :: proc(m: $M/matrix[4, 4]$T, row, column: i32) -> (minor: T) { + cut_down: matrix[3, 3]T + for col_idx in 0..<3 { + col := col_idx + int(col_idx >= column) + for row_idx in 0..<3 { + row := row_idx + int(row_idx >= row) + cut_down[row_idx, col_idx] = m[row, col] + } + } + return determinant(cut_down) + } + cofactor :: proc(m: $M/matrix[4, 4]$T, row, column: i32) -> (cofactor: T) { + sign: T = 1 if (row + column) % 2 == 0 else -1 + return sign * matrix4x4_minor(m, row, column) + } + + for i in 0..<4 { + for j in 0..<4 { + y[i, j] = matrix4x4_cofactor(x, i, j) + } + } + return +} + +@(builtin) +matrix_adjugate :: proc{ + matrix1x1_adjugate, + matrix2x2_adjugate, + matrix3x3_adjugate, + matrix4x4_adjugate, +} + + +@(builtin) +adjugate :: proc{ + matrix1x1_adjugate, + matrix2x2_adjugate, + matrix3x3_adjugate, + matrix4x4_adjugate, +} + + + +@(builtin) +matrix1x1_inverse_transpose :: proc(x: $M/matrix[1, 1]$T) -> (y: M) { + y[0, 0] = 1/x[0, 0] + return +} + +@(builtin) +matrix2x2_inverse_transpose :: proc(x: $M/matrix[2, 2]$T) -> (y: M) { + d := x[0, 0]*x[1, 1] - x[0, 1]*x[1, 0] + when intrinsics.type_is_integer(T) { + y[0, 0] = x[1, 1] / d + y[0, 1] = x[0, 1] / d + y[1, 0] = x[1, 0] / d + y[1, 1] = x[0, 0] / d + } else { + id := 1 / d + y[0, 0] = x[1, 1] * id + y[0, 1] = x[0, 1] * id + y[1, 0] = x[1, 0] * id + y[1, 1] = x[0, 0] * id + } + return +} + +@(builtin) +matrix3x3_inverse_transpose :: proc(x: $M/matrix[3, 3]$T) -> (y: M) #no_bounds_check { + a := adjugate(x) + d := determinant(x) + when intrinsics.type_is_integer(T) { + for i in 0..<3 { + for j in 0..<3 { + inverse_transpose[i, j] = a[i, j] / d + } + } + } else { + id := 1/d + for i in 0..<3 { + for j in 0..<3 { + inverse_transpose[i, j] = a[i, j] * id + } + } + } + return +} + +@(builtin) +matrix4x4_inverse_transpose :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check { + a := adjugate(x) + d: T + for i in 0..<4 { + d += x[0, i] * a[0, i] + } + when intrinsics.type_is_integer(T) { + for i in 0..<4 { + for j in 0..<4 { + inverse_transpose[i, j] = a[i, j] / d + } + } + } else { + id := 1/d + for i in 0..<4 { + for j in 0..<4 { + inverse_transpose[i, j] = a[i, j] * id + } + } + } + return +} + +@(builtin) +matrix_inverse_transpose :: proc{ + matrix1x1_inverse_transpose, + matrix2x2_inverse_transpose, + matrix3x3_inverse_transpose, + matrix4x4_inverse_transpose, +} + +@(builtin) +inverse_transpose :: proc{ + matrix1x1_inverse_transpose, + matrix2x2_inverse_transpose, + matrix3x3_inverse_transpose, + matrix4x4_inverse_transpose, +} + + +@(builtin) +matrix1x1_inverse :: proc(x: $M/matrix[1, 1]$T) -> (y: M) { + y[0, 0] = 1/x[0, 0] + return +} + +@(builtin) +matrix2x2_inverse :: proc(x: $M/matrix[2, 2]$T) -> (y: M) { + d := x[0, 0]*x[1, 1] - x[0, 1]*x[1, 0] + when intrinsics.type_is_integer(T) { + y[0, 0] = x[1, 1] / d + y[0, 1] = x[1, 0] / d + y[1, 0] = x[0, 1] / d + y[1, 1] = x[0, 0] / d + } else { + id := 1 / d + y[0, 0] = x[1, 1] * id + y[0, 1] = x[1, 0] * id + y[1, 0] = x[0, 1] * id + y[1, 1] = x[0, 0] * id + } + return +} + +@(builtin) +matrix3x3_inverse :: proc(x: $M/matrix[3, 3]$T) -> (y: M) #no_bounds_check { + a := adjugate(x) + d := determinant(x) + when intrinsics.type_is_integer(T) { + for i in 0..<3 { + for j in 0..<3 { + inverse_transpose[i, j] = a[j, i] / d + } + } + } else { + id := 1/d + for i in 0..<3 { + for j in 0..<3 { + inverse_transpose[i, j] = a[j, i] * id + } + } + } + return +} + +@(builtin) +matrix4x4_inverse :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check { + a := adjugate(x) + d: T + for i in 0..<4 { + d += x[0, i] * a[0, i] + } + when intrinsics.type_is_integer(T) { + for i in 0..<4 { + for j in 0..<4 { + inverse_transpose[i, j] = a[j, i] / d + } + } + } else { + id := 1/d + for i in 0..<4 { + for j in 0..<4 { + inverse_transpose[i, j] = a[j, i] * id + } + } + } + return +} + + +@(builtin) +matrix_inverse :: proc{ + matrix1x1_inverse, + matrix2x2_inverse, + matrix3x3_inverse, + matrix4x4_inverse, +} + +@(builtin) +inverse :: proc{ + matrix1x1_inverse, + matrix2x2_inverse, + matrix3x3_inverse, + matrix4x4_inverse, +} + +@(builtin) +matrix1x1_hermitian_adjoint :: proc(m: $M/matrix[1, 1]$T) -> M where intrinsics.type_is_complex(T) { + return conj(transpose(m)) +} +@(builtin) +matrix2x2_hermitian_adjoint :: proc(m: $M/matrix[2, 2]$T) -> M where intrinsics.type_is_complex(T) { + return conj(transpose(m)) +} +@(builtin) +matrix3x3_hermitian_adjoint :: proc(m: $M/matrix[3, 3]$T) -> M where intrinsics.type_is_complex(T) { + return conj(transpose(m)) +} +@(builtin) +matrix4x4_hermitian_adjoint :: proc(m: $M/matrix[4, 4]$T) -> M where intrinsics.type_is_complex(T) { + return conj(transpose(m)) +} + +@(builtin) +hermitian_adjoint :: proc{ + matrix1x1_hermitian_adjoint, + matrix2x2_hermitian_adjoint, + matrix3x3_hermitian_adjoint, + matrix4x4_hermitian_adjoint, +} \ No newline at end of file From 9b7a25d4a969a2a5063a039effa17795954f6fdc Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 20 Oct 2021 17:03:54 +0100 Subject: [PATCH 29/47] Remove padding in stride of matrix types --- src/types.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/types.cpp b/src/types.cpp index 716ebe31f..d3fa363c2 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1267,19 +1267,20 @@ i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) { elem_size = type_size_of(t->Matrix.elem); } - - /* - [3; 4]f32 -> [4]{x, y, z, _: f32} // extra padding for alignment reasons - */ + i64 stride_in_bytes = 0; i64 row_count = t->Matrix.row_count; +#if 0 if (row_count == 1) { stride_in_bytes = elem_size; } else { i64 matrix_alignment = type_align_of(t); - stride_in_bytes = align_formula(elem_size*t->Matrix.row_count, matrix_alignment); + stride_in_bytes = align_formula(elem_size*row_count, matrix_alignment); } +#else + stride_in_bytes = elem_size*row_count; +#endif t->Matrix.stride_in_bytes = stride_in_bytes; return stride_in_bytes; } From 3b3e7550f62c8f61ac4368d3ed6bf4d385fa9508 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 21 Oct 2021 00:04:08 +0100 Subject: [PATCH 30/47] Correct core_builtin_matrix.odin --- core/runtime/core_builtin_matrix.odin | 157 +++++++++++--------------- 1 file changed, 63 insertions(+), 94 deletions(-) diff --git a/core/runtime/core_builtin_matrix.odin b/core/runtime/core_builtin_matrix.odin index 667c6f031..1ed19f484 100644 --- a/core/runtime/core_builtin_matrix.odin +++ b/core/runtime/core_builtin_matrix.odin @@ -3,6 +3,48 @@ package runtime import "core:intrinsics" _ :: intrinsics + +@(builtin) +determinant :: proc{ + matrix1x1_determinant, + matrix2x2_determinant, + matrix3x3_determinant, + matrix4x4_determinant, +} + +@(builtin) +adjugate :: proc{ + matrix1x1_adjugate, + matrix2x2_adjugate, + matrix3x3_adjugate, + matrix4x4_adjugate, +} + +@(builtin) +inverse_transpose :: proc{ + matrix1x1_inverse_transpose, + matrix2x2_inverse_transpose, + matrix3x3_inverse_transpose, + matrix4x4_inverse_transpose, +} + + +@(builtin) +inverse :: proc{ + matrix1x1_inverse, + matrix2x2_inverse, + matrix3x3_inverse, + matrix4x4_inverse, +} + +@(builtin) +hermitian_adjoint :: proc{ + matrix1x1_hermitian_adjoint, + matrix2x2_hermitian_adjoint, + matrix3x3_hermitian_adjoint, + matrix4x4_hermitian_adjoint, +} + @(builtin) matrix1x1_determinant :: proc(m: $M/matrix[1, 1]$T) -> (det: T) { return m[0, 0] @@ -14,9 +56,9 @@ matrix2x2_determinant :: proc(m: $M/matrix[2, 2]$T) -> (det: T) { } @(builtin) matrix3x3_determinant :: proc(m: $M/matrix[3, 3]$T) -> (det: T) { - a := +m[0, 0] * (m[1, 1] * m[2, 2] - m[2, 1] * m[1, 2]) - b := -m[1, 0] * (m[0, 1] * m[2, 2] - m[2, 1] * m[0, 2]) - c := +m[2, 0] * (m[0, 1] * m[1, 2] - m[1, 1] * m[0, 2]) + a := +m[0, 0] * (m[1, 1] * m[2, 2] - m[1, 2] * m[2, 1]) + b := -m[0, 1] * (m[1, 0] * m[2, 2] - m[1, 2] * m[2, 0]) + c := +m[0, 2] * (m[1, 0] * m[2, 1] - m[1, 1] * m[2, 0]) return a + b + c } @(builtin) @@ -28,21 +70,7 @@ matrix4x4_determinant :: proc(m: $M/matrix[4, 4]$T) -> (det: T) { return } -@(builtin) -matrix_determinant :: proc{ - matrix1x1_determinant, - matrix2x2_determinant, - matrix3x3_determinant, - matrix4x4_determinant, -} -@(builtin) -determinant :: proc{ - matrix1x1_determinant, - matrix2x2_determinant, - matrix3x3_determinant, - matrix4x4_determinant, -} @(builtin) @@ -61,16 +89,16 @@ matrix2x2_adjugate :: proc(x: $M/matrix[2, 2]$T) -> (y: M) { } @(builtin) -matrix3x3_adjugate :: proc(x: $M/matrix[3, 3]$T) -> (y: M) { - y[0, 0] = +(x[1, 1] * x[2, 2] - x[1, 2] * x[2, 1]) - y[0, 1] = -(x[1, 0] * x[2, 2] - x[1, 2] * x[2, 0]) - y[0, 2] = +(x[1, 0] * x[2, 1] - x[1, 1] * x[2, 0]) - y[1, 0] = -(x[0, 1] * x[2, 2] - x[0, 2] * x[2, 1]) - y[1, 1] = +(x[0, 0] * x[2, 2] - x[0, 2] * x[2, 0]) - y[1, 2] = -(x[0, 0] * x[2, 1] - x[0, 1] * x[2, 0]) - y[2, 0] = +(x[0, 1] * x[1, 2] - x[0, 2] * x[1, 1]) - y[2, 1] = -(x[0, 0] * x[1, 2] - x[0, 2] * x[1, 0]) - y[2, 2] = +(x[0, 0] * x[1, 1] - x[0, 1] * x[1, 0]) +matrix3x3_adjugate :: proc(m: $M/matrix[3, 3]$T) -> (y: M) { + y[0, 0] = +(m[1, 1] * m[2, 2] - m[2, 1] * m[1, 2]) + y[0, 1] = -(m[1, 0] * m[2, 2] - m[2, 0] * m[1, 2]) + y[0, 2] = +(m[1, 0] * m[2, 1] - m[2, 0] * m[1, 1]) + y[1, 0] = -(m[0, 1] * m[2, 2] - m[2, 1] * m[0, 2]) + y[1, 1] = +(m[0, 0] * m[2, 2] - m[2, 0] * m[0, 2]) + y[1, 2] = -(m[0, 0] * m[2, 1] - m[2, 0] * m[0, 1]) + y[2, 0] = +(m[0, 1] * m[1, 2] - m[1, 1] * m[0, 2]) + y[2, 1] = -(m[0, 0] * m[1, 2] - m[1, 0] * m[0, 2]) + y[2, 2] = +(m[0, 0] * m[1, 1] - m[1, 0] * m[0, 1]) return } @@ -100,25 +128,6 @@ matrix4x4_adjugate :: proc(x: $M/matrix[4, 4]$T) -> (y: M) { return } -@(builtin) -matrix_adjugate :: proc{ - matrix1x1_adjugate, - matrix2x2_adjugate, - matrix3x3_adjugate, - matrix4x4_adjugate, -} - - -@(builtin) -adjugate :: proc{ - matrix1x1_adjugate, - matrix2x2_adjugate, - matrix3x3_adjugate, - matrix4x4_adjugate, -} - - - @(builtin) matrix1x1_inverse_transpose :: proc(x: $M/matrix[1, 1]$T) -> (y: M) { y[0, 0] = 1/x[0, 0] @@ -129,16 +138,16 @@ matrix1x1_inverse_transpose :: proc(x: $M/matrix[1, 1]$T) -> (y: M) { matrix2x2_inverse_transpose :: proc(x: $M/matrix[2, 2]$T) -> (y: M) { d := x[0, 0]*x[1, 1] - x[0, 1]*x[1, 0] when intrinsics.type_is_integer(T) { - y[0, 0] = x[1, 1] / d - y[0, 1] = x[0, 1] / d - y[1, 0] = x[1, 0] / d - y[1, 1] = x[0, 0] / d + y[0, 0] = +x[1, 1] / d + y[1, 0] = -x[1, 0] / d + y[0, 1] = -x[0, 1] / d + y[1, 1] = +x[0, 0] / d } else { id := 1 / d - y[0, 0] = x[1, 1] * id - y[0, 1] = x[0, 1] * id - y[1, 0] = x[1, 0] * id - y[1, 1] = x[0, 0] * id + y[0, 0] = +x[1, 1] * id + y[1, 0] = -x[1, 0] * id + y[0, 1] = -x[0, 1] * id + y[1, 1] = +x[0, 0] * id } return } @@ -188,23 +197,6 @@ matrix4x4_inverse_transpose :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_c return } -@(builtin) -matrix_inverse_transpose :: proc{ - matrix1x1_inverse_transpose, - matrix2x2_inverse_transpose, - matrix3x3_inverse_transpose, - matrix4x4_inverse_transpose, -} - -@(builtin) -inverse_transpose :: proc{ - matrix1x1_inverse_transpose, - matrix2x2_inverse_transpose, - matrix3x3_inverse_transpose, - matrix4x4_inverse_transpose, -} - - @(builtin) matrix1x1_inverse :: proc(x: $M/matrix[1, 1]$T) -> (y: M) { y[0, 0] = 1/x[0, 0] @@ -275,22 +267,6 @@ matrix4x4_inverse :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check { } -@(builtin) -matrix_inverse :: proc{ - matrix1x1_inverse, - matrix2x2_inverse, - matrix3x3_inverse, - matrix4x4_inverse, -} - -@(builtin) -inverse :: proc{ - matrix1x1_inverse, - matrix2x2_inverse, - matrix3x3_inverse, - matrix4x4_inverse, -} - @(builtin) matrix1x1_hermitian_adjoint :: proc(m: $M/matrix[1, 1]$T) -> M where intrinsics.type_is_complex(T) { return conj(transpose(m)) @@ -308,10 +284,3 @@ matrix4x4_hermitian_adjoint :: proc(m: $M/matrix[4, 4]$T) -> M where intrinsics. return conj(transpose(m)) } -@(builtin) -hermitian_adjoint :: proc{ - matrix1x1_hermitian_adjoint, - matrix2x2_hermitian_adjoint, - matrix3x3_hermitian_adjoint, - matrix4x4_hermitian_adjoint, -} \ No newline at end of file From d67d7168e2d4ed8e0e5f0d1b23aba5e5ebac6847 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 21 Oct 2021 00:04:22 +0100 Subject: [PATCH 31/47] Allow scalars with matrices --- src/check_expr.cpp | 8 ++++++++ src/llvm_backend_const.cpp | 2 +- src/llvm_backend_expr.cpp | 32 ++++++++++++++++++++++++-------- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 8a1e5fd86..498bf78c7 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -657,6 +657,14 @@ i64 check_distance_between_types(CheckerContext *c, Operand *operand, Type *type return distance + 6; } } + + if (is_type_matrix(dst)) { + Type *elem = base_array_type(dst); + i64 distance = check_distance_between_types(c, operand, elem); + if (distance >= 0) { + return distance + 7; + } + } if (is_type_any(dst)) { if (!is_type_polymorphic(src)) { diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index 413fb365b..554255f47 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -527,7 +527,7 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc i64 total_elem_count = matrix_type_total_elems(type); LLVMValueRef *elems = gb_alloc_array(permanent_allocator(), LLVMValueRef, cast(isize)total_elem_count); for (i64 i = 0; i < row; i++) { - elems[matrix_index_to_offset(type, i)] = single_elem.value; + elems[matrix_indices_to_offset(type, i, i)] = single_elem.value; } for (i64 i = 0; i < total_elem_count; i++) { if (elems[i] == nullptr) { diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index beb860383..cdc1deea1 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -331,7 +331,7 @@ bool lb_try_direct_vector_arith(lbProcedure *p, TokenKind op, lbValue lhs, lbVal z = LLVMBuildFRem(p->builder, x, y, ""); break; default: - GB_PANIC("Unsupported vector operation"); + GB_PANIC("Unsupported vector operation %.*s", LIT(token_strings[op])); break; } @@ -918,10 +918,11 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type, bool component_wise=false) { GB_ASSERT(is_type_matrix(lhs.type) || is_type_matrix(rhs.type)); - Type *xt = base_type(lhs.type); - Type *yt = base_type(rhs.type); if (op == Token_Mul && !component_wise) { + Type *xt = base_type(lhs.type); + Type *yt = base_type(rhs.type); + if (xt->kind == Type_Matrix) { if (yt->kind == Type_Matrix) { return lb_emit_matrix_mul(p, lhs, rhs, type); @@ -934,21 +935,36 @@ lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue } } else { - GB_ASSERT(are_types_identical(xt, yt)); + if (is_type_matrix(lhs.type)) { + rhs = lb_emit_conv(p, rhs, lhs.type); + } else { + lhs = lb_emit_conv(p, lhs, rhs.type); + } + + Type *xt = base_type(lhs.type); + Type *yt = base_type(rhs.type); + + GB_ASSERT_MSG(are_types_identical(xt, yt), "%s %.*s %s", type_to_string(lhs.type), LIT(token_strings[op]), type_to_string(rhs.type)); GB_ASSERT(xt->kind == Type_Matrix); // element-wise arithmetic // pretend it is an array lbValue array_lhs = lhs; lbValue array_rhs = rhs; Type *array_type = alloc_type_array(xt->Matrix.elem, matrix_type_total_elems(xt)); - GB_ASSERT(type_size_of(array_type) == type_size_of(type)); + GB_ASSERT(type_size_of(array_type) == type_size_of(xt)); array_lhs.type = array_type; array_rhs.type = array_type; - lbValue array = lb_emit_arith_array(p, op, array_lhs, array_rhs, array_type); - array.type = type; - return array; + if (token_is_comparison(op)) { + lbValue res = lb_emit_comp(p, op, array_lhs, array_rhs); + return lb_emit_conv(p, res, type); + } else { + lbValue array = lb_emit_arith(p, op, array_lhs, array_rhs, array_type); + array.type = type; + return array; + } + } GB_PANIC("TODO: lb_emit_arith_matrix"); From c561de33eec802578b2f56b303b0909a346e897c Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 21 Oct 2021 00:07:10 +0100 Subject: [PATCH 32/47] Add intrinsics for the matrix type --- src/check_builtin.cpp | 2 ++ src/checker_builtin_procs.hpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 1535dc2a2..6b7fe21f1 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -25,6 +25,7 @@ BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_boolean_end - is_type_simple_compare, is_type_dereferenceable, is_type_valid_for_keys, + is_type_valid_for_matrix_elems, is_type_named, is_type_pointer, @@ -40,6 +41,7 @@ BuiltinTypeIsProc *builtin_type_is_procs[BuiltinProc__type_simple_boolean_end - is_type_proc, is_type_bit_set, is_type_simd_vector, + is_type_matrix, is_type_polymorphic_record_specialized, is_type_polymorphic_record_unspecialized, diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 5594c1a1a..b503460da 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -196,6 +196,7 @@ BuiltinProc__type_simple_boolean_begin, BuiltinProc_type_is_simple_compare, // easily compared using memcmp BuiltinProc_type_is_dereferenceable, BuiltinProc_type_is_valid_map_key, + BuiltinProc_type_is_valid_matrix_elements, BuiltinProc_type_is_named, BuiltinProc_type_is_pointer, @@ -212,6 +213,7 @@ BuiltinProc__type_simple_boolean_begin, BuiltinProc_type_is_bit_field_value, BuiltinProc_type_is_bit_set, BuiltinProc_type_is_simd_vector, + BuiltinProc_type_is_matrix, BuiltinProc_type_is_specialized_polymorphic_record, BuiltinProc_type_is_unspecialized_polymorphic_record, @@ -441,6 +443,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("type_is_simple_compare"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_dereferenceable"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_valid_map_key"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("type_is_valid_matrix_elements"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_named"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_pointer"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, @@ -457,6 +460,7 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("type_is_bit_field_value"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_bit_set"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_simd_vector"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("type_is_matrix"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_specialized_polymorphic_record"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("type_is_unspecialized_polymorphic_record"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, From e0b9475378f4d69ebaf3e141ed941674b2c0d3f3 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 21 Oct 2021 01:14:44 +0100 Subject: [PATCH 33/47] Allow casting between square matrices of the same element type --- src/check_expr.cpp | 19 +++++++++++++++++ src/check_type.cpp | 10 ++++----- src/llvm_backend_expr.cpp | 44 ++++++++++++++++++++++++++++----------- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 498bf78c7..ad12e00c8 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2460,6 +2460,24 @@ bool check_is_castable_to(CheckerContext *c, Operand *operand, Type *y) { if (is_type_quaternion(src) && is_type_quaternion(dst)) { return true; } + + if (is_type_matrix(src) && is_type_matrix(dst)) { + GB_ASSERT(src->kind == Type_Matrix); + GB_ASSERT(dst->kind == Type_Matrix); + if (!are_types_identical(src->Matrix.elem, dst->Matrix.elem)) { + return false; + } + + if (src->Matrix.row_count != src->Matrix.column_count) { + return false; + } + + if (dst->Matrix.row_count != dst->Matrix.column_count) { + return false; + } + + return true; + } // Cast between pointers @@ -8838,6 +8856,7 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type case Ast_EnumType: case Ast_MapType: case Ast_BitSetType: + case Ast_MatrixType: o->mode = Addressing_Type; o->type = check_type(c, node); break; diff --git a/src/check_type.cpp b/src/check_type.cpp index d9302c65a..21c8a9f19 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -1154,7 +1154,11 @@ Type *determine_type_from_polymorphic(CheckerContext *ctx, Type *poly_type, Oper bool show_error = modify_type && !ctx->hide_polymorphic_errors; if (!is_operand_value(operand)) { if (show_error) { - error(operand.expr, "Cannot determine polymorphic type from parameter"); + gbString pts = type_to_string(poly_type); + gbString ots = type_to_string(operand.type); + defer (gb_string_free(pts)); + defer (gb_string_free(ots)); + error(operand.expr, "Cannot determine polymorphic type from parameter: '%s' to '%s'", ots, pts); } return t_invalid; } @@ -2839,10 +2843,6 @@ bool check_type_internal(CheckerContext *ctx, Ast *e, Type **type, Type *named_t case_ast_node(mt, MatrixType, e); - bool ips = ctx->in_polymorphic_specialization; - defer (ctx->in_polymorphic_specialization = ips); - ctx->in_polymorphic_specialization = false; - check_matrix_type(ctx, type, e); set_base_type(named_type, *type); return true; diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index cdc1deea1..9582be93c 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -476,7 +476,7 @@ lbValue lb_emit_arith_array(lbProcedure *p, TokenKind op, lbValue lhs, lbValue r } } -bool lb_matrix_elem_simple(Type *t) { +bool lb_is_matrix_simdable(Type *t) { Type *mt = base_type(t); GB_ASSERT(mt->kind == Type_Matrix); @@ -555,7 +555,7 @@ lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { Type *mt = base_type(m.type); GB_ASSERT(mt->kind == Type_Matrix); - if (lb_matrix_elem_simple(mt)) { + if (lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; unsigned column_count = cast(unsigned)mt->Matrix.column_count; @@ -623,7 +623,7 @@ lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type) { Type *mt = base_type(m.type); GB_ASSERT(mt->kind == Type_Matrix); - if (lb_matrix_elem_simple(mt)) { + if (lb_is_matrix_simdable(mt)) { LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m); return lb_matrix_cast_vector_to_type(p, vector, type); } @@ -690,7 +690,7 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) unsigned inner = cast(unsigned)xt->Matrix.column_count; unsigned outer_columns = cast(unsigned)yt->Matrix.column_count; - if (lb_matrix_elem_simple(xt)) { + if (lb_is_matrix_simdable(xt)) { unsigned x_stride = cast(unsigned)matrix_type_stride_in_elems(xt); unsigned y_stride = cast(unsigned)matrix_type_stride_in_elems(yt); @@ -773,7 +773,7 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type Type *elem = mt->Matrix.elem; - if (lb_matrix_elem_simple(mt)) { + if (lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; @@ -819,9 +819,8 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type lbValue a = lb_emit_matrix_ev(p, lhs, i, j); lbValue b = lb_emit_struct_ev(p, rhs, cast(i32)j); - lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem); - lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem); - lb_emit_store(p, dst, d); + lbValue c = lb_emit_mul_add(p, a, b, d0, elem); + lb_emit_store(p, dst, c); } } @@ -842,7 +841,7 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type Type *elem = mt->Matrix.elem; - if (lb_matrix_elem_simple(mt)) { + if (lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; @@ -903,9 +902,8 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type lbValue a = lb_emit_struct_ev(p, lhs, cast(i32)k); lbValue b = lb_emit_matrix_ev(p, rhs, k, j); - lbValue c = lb_emit_arith(p, Token_Mul, a, b, elem); - lbValue d = lb_emit_arith(p, Token_Add, d0, c, elem); - lb_emit_store(p, dst, d); + lbValue c = lb_emit_mul_add(p, a, b, d0, elem); + lb_emit_store(p, dst, c); } } @@ -1938,6 +1936,28 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return lb_addr_load(p, v); } + + if (is_type_matrix(dst) && is_type_matrix(src)) { + GB_ASSERT(dst->kind == Type_Matrix); + GB_ASSERT(src->kind == Type_Matrix); + lbAddr v = lb_add_local_generated(p, t, true); + for (i64 j = 0; j < dst->Matrix.column_count; j++) { + for (i64 i = 0; i < dst->Matrix.row_count; i++) { + if (i < src->Matrix.row_count && j < src->Matrix.column_count) { + lbValue d = lb_emit_matrix_epi(p, v.addr, i, j); + lbValue s = lb_emit_matrix_ev(p, value, i, j); + lb_emit_store(p, d, s); + } else if (i == j) { + lbValue d = lb_emit_matrix_epi(p, v.addr, i, j); + lbValue s = lb_const_value(p->module, dst->Matrix.elem, exact_value_i64(1), true); + lb_emit_store(p, d, s); + } + } + } + return lb_addr_load(p, v); + } + + if (is_type_any(dst)) { if (is_type_untyped_nil(src)) { From 48d277a3c4604481074df2914efbaba9e0dbed25 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 21 Oct 2021 01:34:39 +0100 Subject: [PATCH 34/47] Allow conversions between matrices of the same element count --- src/check_expr.cpp | 4 +++- src/llvm_backend_const.cpp | 4 ++-- src/llvm_backend_expr.cpp | 38 +++++++++++++++++++++++++--------- src/types.cpp | 42 +++++++++++++++----------------------- 4 files changed, 50 insertions(+), 38 deletions(-) diff --git a/src/check_expr.cpp b/src/check_expr.cpp index ad12e00c8..ee7493553 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2469,7 +2469,9 @@ bool check_is_castable_to(CheckerContext *c, Operand *operand, Type *y) { } if (src->Matrix.row_count != src->Matrix.column_count) { - return false; + i64 src_count = src->Matrix.row_count*src->Matrix.column_count; + i64 dst_count = dst->Matrix.row_count*dst->Matrix.column_count; + return src_count == dst_count; } if (dst->Matrix.row_count != dst->Matrix.column_count) { diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index 554255f47..b543089e5 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -524,7 +524,7 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc lbValue single_elem = lb_const_value(m, elem, value, allow_local); single_elem.value = llvm_const_cast(single_elem.value, lb_type(m, elem)); - i64 total_elem_count = matrix_type_total_elems(type); + i64 total_elem_count = matrix_type_total_internal_elems(type); LLVMValueRef *elems = gb_alloc_array(permanent_allocator(), LLVMValueRef, cast(isize)total_elem_count); for (i64 i = 0; i < row; i++) { elems[matrix_indices_to_offset(type, i, i)] = single_elem.value; @@ -990,7 +990,7 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc } i64 max_count = type->Matrix.row_count*type->Matrix.column_count; - i64 total_count = matrix_type_total_elems(type); + i64 total_count = matrix_type_total_internal_elems(type); LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, cast(isize)total_count); if (cl->elems[0]->kind == Ast_FieldValue) { diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 9582be93c..eb88bbde0 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -508,7 +508,7 @@ LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { GB_ASSERT(mt->kind == Type_Matrix); LLVMTypeRef elem_type = lb_type(p->module, mt->Matrix.elem); - unsigned total_count = cast(unsigned)matrix_type_total_elems(mt); + unsigned total_count = cast(unsigned)matrix_type_total_internal_elems(mt); LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value; @@ -948,7 +948,7 @@ lbValue lb_emit_arith_matrix(lbProcedure *p, TokenKind op, lbValue lhs, lbValue // pretend it is an array lbValue array_lhs = lhs; lbValue array_rhs = rhs; - Type *array_type = alloc_type_array(xt->Matrix.elem, matrix_type_total_elems(xt)); + Type *array_type = alloc_type_array(xt->Matrix.elem, matrix_type_total_internal_elems(xt)); GB_ASSERT(type_size_of(array_type) == type_size_of(xt)); array_lhs.type = array_type; @@ -1941,15 +1941,33 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { GB_ASSERT(dst->kind == Type_Matrix); GB_ASSERT(src->kind == Type_Matrix); lbAddr v = lb_add_local_generated(p, t, true); - for (i64 j = 0; j < dst->Matrix.column_count; j++) { - for (i64 i = 0; i < dst->Matrix.row_count; i++) { - if (i < src->Matrix.row_count && j < src->Matrix.column_count) { - lbValue d = lb_emit_matrix_epi(p, v.addr, i, j); + + if (is_matrix_square(dst) && is_matrix_square(dst)) { + for (i64 j = 0; j < dst->Matrix.column_count; j++) { + for (i64 i = 0; i < dst->Matrix.row_count; i++) { + if (i < src->Matrix.row_count && j < src->Matrix.column_count) { + lbValue d = lb_emit_matrix_epi(p, v.addr, i, j); + lbValue s = lb_emit_matrix_ev(p, value, i, j); + lb_emit_store(p, d, s); + } else if (i == j) { + lbValue d = lb_emit_matrix_epi(p, v.addr, i, j); + lbValue s = lb_const_value(p->module, dst->Matrix.elem, exact_value_i64(1), true); + lb_emit_store(p, d, s); + } + } + } + } else { + i64 dst_count = dst->Matrix.row_count*dst->Matrix.column_count; + i64 src_count = src->Matrix.row_count*src->Matrix.column_count; + GB_ASSERT(dst_count == src_count); + + for (i64 j = 0; j < src->Matrix.column_count; j++) { + for (i64 i = 0; i < src->Matrix.row_count; i++) { lbValue s = lb_emit_matrix_ev(p, value, i, j); - lb_emit_store(p, d, s); - } else if (i == j) { - lbValue d = lb_emit_matrix_epi(p, v.addr, i, j); - lbValue s = lb_const_value(p->module, dst->Matrix.elem, exact_value_i64(1), true); + i64 index = i + j*src->Matrix.row_count; + i64 dst_i = index%dst->Matrix.row_count; + i64 dst_j = index/dst->Matrix.row_count; + lbValue d = lb_emit_matrix_epi(p, v.addr, dst_i, dst_j); lb_emit_store(p, d, s); } } diff --git a/src/types.cpp b/src/types.cpp index d3fa363c2..3abcebdfb 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1293,7 +1293,7 @@ i64 matrix_type_stride_in_elems(Type *t) { } -i64 matrix_type_total_elems(Type *t) { +i64 matrix_type_total_internal_elems(Type *t) { t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); i64 size = type_size_of(t); @@ -1301,30 +1301,6 @@ i64 matrix_type_total_elems(Type *t) { return size/gb_max(elem_size, 1); } -void matrix_indices_from_index(Type *t, i64 index, i64 *row_index_, i64 *column_index_) { - t = base_type(t); - GB_ASSERT(t->kind == Type_Matrix); - i64 row_count = t->Matrix.row_count; - i64 column_count = t->Matrix.column_count; - GB_ASSERT(0 <= index && index < row_count*column_count); - - i64 row_index = index / column_count; - i64 column_index = index % column_count; - - if (row_index_) *row_index_ = row_index; - if (column_index_) *column_index_ = column_index; -} - -i64 matrix_index_to_offset(Type *t, i64 index) { - t = base_type(t); - GB_ASSERT(t->kind == Type_Matrix); - - i64 row_index, column_index; - matrix_indices_from_index(t, index, &row_index, &column_index); - i64 stride_elems = matrix_type_stride_in_elems(t); - return stride_elems*column_index + row_index; -} - i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) { t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); @@ -1333,6 +1309,22 @@ i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_index) { i64 stride_elems = matrix_type_stride_in_elems(t); return stride_elems*column_index + row_index; } +i64 matrix_index_to_offset(Type *t, i64 index) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + + i64 row_index = index%t->Matrix.row_count; + i64 column_index = index/t->Matrix.row_count; + return matrix_indices_to_offset(t, row_index, column_index); +} + + + +bool is_matrix_square(Type *t) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + return t->Matrix.row_count == t->Matrix.column_count; +} bool is_type_valid_for_matrix_elems(Type *t) { t = base_type(t); From 306bdf8869f2c9676e73acbf477a302c08137087 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 00:46:50 +0100 Subject: [PATCH 35/47] Update alignment rules for `matrix` types as a compromise to keep zero padding --- src/check_builtin.cpp | 4 +- src/check_type.cpp | 12 +- src/llvm_backend_expr.cpp | 23 +++- src/llvm_backend_general.cpp | 8 +- src/llvm_backend_utility.cpp | 2 +- src/types.cpp | 205 +++++++++++++++++++---------------- 6 files changed, 147 insertions(+), 107 deletions(-) diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 9b94be002..2373317c3 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2083,8 +2083,8 @@ bool check_builtin_procedure(CheckerContext *c, Operand *operand, Ast *call, i32 } i64 max_count = xt->Array.count*yt->Array.count; - if (max_count > MAX_MATRIX_ELEMENT_COUNT) { - error(call, "Product of the array lengths exceed the maximum matrix element count, got %d, expected a maximum of %d", cast(int)max_count, MAX_MATRIX_ELEMENT_COUNT); + if (max_count > MATRIX_ELEMENT_COUNT_MAX) { + error(call, "Product of the array lengths exceed the maximum matrix element count, got %d, expected a maximum of %d", cast(int)max_count, MATRIX_ELEMENT_COUNT_MAX); return false; } diff --git a/src/check_type.cpp b/src/check_type.cpp index 21c8a9f19..813990020 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -2226,21 +2226,21 @@ void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) { generic_column = column.type; } - if (row_count < MIN_MATRIX_ELEMENT_COUNT && generic_row == nullptr) { + if (row_count < MATRIX_ELEMENT_COUNT_MIN && generic_row == nullptr) { gbString s = expr_to_string(row.expr); - error(row.expr, "Invalid matrix row count, expected %d+ rows, got %s", MIN_MATRIX_ELEMENT_COUNT, s); + error(row.expr, "Invalid matrix row count, expected %d+ rows, got %s", MATRIX_ELEMENT_COUNT_MIN, s); gb_string_free(s); } - if (column_count < MIN_MATRIX_ELEMENT_COUNT && generic_column == nullptr) { + if (column_count < MATRIX_ELEMENT_COUNT_MIN && generic_column == nullptr) { gbString s = expr_to_string(column.expr); - error(column.expr, "Invalid matrix column count, expected %d+ rows, got %s", MIN_MATRIX_ELEMENT_COUNT, s); + error(column.expr, "Invalid matrix column count, expected %d+ rows, got %s", MATRIX_ELEMENT_COUNT_MIN, s); gb_string_free(s); } - if (row_count*column_count > MAX_MATRIX_ELEMENT_COUNT) { + if (row_count*column_count > MATRIX_ELEMENT_COUNT_MAX) { i64 element_count = row_count*column_count; - error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MAX_MATRIX_ELEMENT_COUNT, cast(long long)element_count); + error(column.expr, "Matrix types are limited to a maximum of %d elements, got %lld", MATRIX_ELEMENT_COUNT_MAX, cast(long long)element_count); } if (!is_type_valid_for_matrix_elems(elem)) { diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 9c114882e..fa2b0b084 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -511,10 +511,16 @@ LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { unsigned total_count = cast(unsigned)matrix_type_total_internal_elems(mt); LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); +#if 1 LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value; LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(total_matrix_type, 0), ""); LLVMValueRef matrix_vector = LLVMBuildLoad(p->builder, matrix_vector_ptr, ""); + LLVMSetAlignment(matrix_vector, cast(unsigned)type_align_of(mt)); return matrix_vector; +#else + LLVMValueRef matrix_vector = LLVMBuildBitCast(p->builder, matrix.value, total_matrix_type, ""); + return matrix_vector; +#endif } LLVMValueRef lb_matrix_trimmed_vector_mask(lbProcedure *p, Type *mt) { @@ -524,7 +530,6 @@ LLVMValueRef lb_matrix_trimmed_vector_mask(lbProcedure *p, Type *mt) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; unsigned column_count = cast(unsigned)mt->Matrix.column_count; - unsigned mask_elems_index = 0; auto mask_elems = slice_make(permanent_allocator(), row_count*column_count); for (unsigned j = 0; j < column_count; j++) { @@ -540,7 +545,17 @@ LLVMValueRef lb_matrix_trimmed_vector_mask(lbProcedure *p, Type *mt) { LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) { LLVMValueRef vector = lb_matrix_to_vector(p, m); - LLVMValueRef mask = lb_matrix_trimmed_vector_mask(p, m.type); + + Type *mt = base_type(m.type); + GB_ASSERT(mt->kind == Type_Matrix); + + unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); + unsigned row_count = cast(unsigned)mt->Matrix.row_count; + if (stride == row_count) { + return vector; + } + + LLVMValueRef mask = lb_matrix_trimmed_vector_mask(p, mt); LLVMValueRef trimmed_vector = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, ""); return trimmed_vector; } @@ -791,7 +806,7 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type for (unsigned row_index = 0; row_index < column_count; row_index++) { LLVMValueRef value = lb_emit_struct_ev(p, rhs, row_index).value; - LLVMValueRef row = llvm_splat(p, value, row_count); + LLVMValueRef row = llvm_vector_broadcast(p, value, row_count); v_rows[row_index] = row; } @@ -866,7 +881,7 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type for (unsigned column_index = 0; column_index < row_count; column_index++) { LLVMValueRef value = lb_emit_struct_ev(p, lhs, column_index).value; - LLVMValueRef row = llvm_splat(p, value, column_count); + LLVMValueRef row = llvm_vector_broadcast(p, value, column_count); v_rows[column_index] = row; } diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 01221cad6..7aa7c7cdd 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -512,8 +512,7 @@ void lb_emit_slice_bounds_check(lbProcedure *p, Token token, lbValue low, lbValu } } -bool lb_try_update_alignment(lbValue ptr, unsigned alignment) { - LLVMValueRef addr_ptr = ptr.value; +bool lb_try_update_alignment(LLVMValueRef addr_ptr, unsigned alignment) { if (LLVMIsAGlobalValue(addr_ptr) || LLVMIsAAllocaInst(addr_ptr) || LLVMIsALoadInst(addr_ptr)) { if (LLVMGetAlignment(addr_ptr) < alignment) { if (LLVMIsAAllocaInst(addr_ptr) || LLVMIsAGlobalValue(addr_ptr)) { @@ -525,6 +524,11 @@ bool lb_try_update_alignment(lbValue ptr, unsigned alignment) { return false; } +bool lb_try_update_alignment(lbValue ptr, unsigned alignment) { + return lb_try_update_alignment(ptr.value, alignment); +} + + bool lb_try_vector_cast(lbModule *m, lbValue ptr, LLVMTypeRef *vector_type_) { Type *array_type = base_type(type_deref(ptr.type)); GB_ASSERT(is_type_array_like(array_type)); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 6754ce798..e458c0692 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1526,7 +1526,7 @@ LLVMValueRef llvm_mask_zero(lbModule *m, unsigned count) { return LLVMConstNull(LLVMVectorType(lb_type(m, t_u32), count)); } -LLVMValueRef llvm_splat(lbProcedure *p, LLVMValueRef value, unsigned count) { +LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned count) { GB_ASSERT(count > 0); if (LLVMIsConstant(value)) { LLVMValueRef single = LLVMConstVector(&value, 1); diff --git a/src/types.cpp b/src/types.cpp index 3abcebdfb..bfedb5381 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -360,8 +360,8 @@ enum TypeInfoFlag : u32 { enum : int { - MIN_MATRIX_ELEMENT_COUNT = 1, - MAX_MATRIX_ELEMENT_COUNT = 16, + MATRIX_ELEMENT_COUNT_MIN = 1, + MATRIX_ELEMENT_COUNT_MAX = 16, }; @@ -700,6 +700,74 @@ bool is_type_pointer(Type *t); bool is_type_slice(Type *t); bool is_type_integer(Type *t); bool type_set_offsets(Type *t); +Type *base_type(Type *t); + +i64 type_size_of_internal(Type *t, TypePath *path); +i64 type_align_of_internal(Type *t, TypePath *path); + + +// IMPORTANT TODO(bill): SHould this TypePath code be removed since type cycle checking is handled much earlier on? + +struct TypePath { + Array path; // Entity_TypeName; + bool failure; +}; + + +void type_path_init(TypePath *tp) { + tp->path.allocator = heap_allocator(); +} + +void type_path_free(TypePath *tp) { + array_free(&tp->path); +} + +void type_path_print_illegal_cycle(TypePath *tp, isize start_index) { + GB_ASSERT(tp != nullptr); + + GB_ASSERT(start_index < tp->path.count); + Entity *e = tp->path[start_index]; + GB_ASSERT(e != nullptr); + error(e->token, "Illegal type declaration cycle of `%.*s`", LIT(e->token.string)); + // NOTE(bill): Print cycle, if it's deep enough + for (isize j = start_index; j < tp->path.count; j++) { + Entity *e = tp->path[j]; + error(e->token, "\t%.*s refers to", LIT(e->token.string)); + } + // NOTE(bill): This will only print if the path count > 1 + error(e->token, "\t%.*s", LIT(e->token.string)); + tp->failure = true; + e->type->failure = true; + base_type(e->type)->failure = true; +} + +bool type_path_push(TypePath *tp, Type *t) { + GB_ASSERT(tp != nullptr); + if (t->kind != Type_Named) { + return false; + } + Entity *e = t->Named.type_name; + + for (isize i = 0; i < tp->path.count; i++) { + Entity *p = tp->path[i]; + if (p == e) { + type_path_print_illegal_cycle(tp, i); + } + } + + array_add(&tp->path, e); + return true; +} + +void type_path_pop(TypePath *tp) { + if (tp != nullptr && tp->path.count > 0) { + array_pop(&tp->path); + } +} + + +#define FAILURE_SIZE 0 +#define FAILURE_ALIGNMENT 0 void init_type_mutex(void) { mutex_init(&g_type_mutex); @@ -1251,6 +1319,42 @@ bool is_type_matrix(Type *t) { return t->kind == Type_Matrix; } +i64 matrix_align_of(Type *t, struct TypePath *tp) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + + Type *elem = t->Matrix.elem; + i64 row_count = gb_max(t->Matrix.row_count, 1); + + bool pop = type_path_push(tp, elem); + if (tp->failure) { + return FAILURE_ALIGNMENT; + } + + i64 elem_align = type_align_of_internal(elem, tp); + if (pop) type_path_pop(tp); + + i64 elem_size = type_size_of(elem); + + + // NOTE(bill, 2021-10-25): The alignment strategy here is to have zero padding + // It would be better for performance to pad each column so that each column + // could be maximally aligned but as a compromise, having no padding will be + // beneficial to third libraries that assume no padding + + i64 total_expected_size = row_count*t->Matrix.column_count*elem_size; + // i64 min_alignment = prev_pow2(elem_align * row_count); + i64 min_alignment = prev_pow2(total_expected_size); + while ((total_expected_size % min_alignment) != 0) { + min_alignment >>= 1; + } + GB_ASSERT(min_alignment >= elem_align); + + i64 align = gb_min(min_alignment, build_context.max_align); + return align; +} + + i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) { t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); @@ -1266,21 +1370,16 @@ i64 matrix_type_stride_in_bytes(Type *t, struct TypePath *tp) { } else { elem_size = type_size_of(t->Matrix.elem); } - i64 stride_in_bytes = 0; + // NOTE(bill, 2021-10-25): The alignment strategy here is to have zero padding + // It would be better for performance to pad each column so that each column + // could be maximally aligned but as a compromise, having no padding will be + // beneficial to third libraries that assume no padding i64 row_count = t->Matrix.row_count; -#if 0 - if (row_count == 1) { - stride_in_bytes = elem_size; - } else { - i64 matrix_alignment = type_align_of(t); - stride_in_bytes = align_formula(elem_size*row_count, matrix_alignment); - } -#else stride_in_bytes = elem_size*row_count; -#endif + t->Matrix.stride_in_bytes = stride_in_bytes; return stride_in_bytes; } @@ -2969,71 +3068,6 @@ Slice struct_fields_index_by_increasing_offset(gbAllocator allocator, Type - -// IMPORTANT TODO(bill): SHould this TypePath code be removed since type cycle checking is handled much earlier on? - -struct TypePath { - Array path; // Entity_TypeName; - bool failure; -}; - - -void type_path_init(TypePath *tp) { - tp->path.allocator = heap_allocator(); -} - -void type_path_free(TypePath *tp) { - array_free(&tp->path); -} - -void type_path_print_illegal_cycle(TypePath *tp, isize start_index) { - GB_ASSERT(tp != nullptr); - - GB_ASSERT(start_index < tp->path.count); - Entity *e = tp->path[start_index]; - GB_ASSERT(e != nullptr); - error(e->token, "Illegal type declaration cycle of `%.*s`", LIT(e->token.string)); - // NOTE(bill): Print cycle, if it's deep enough - for (isize j = start_index; j < tp->path.count; j++) { - Entity *e = tp->path[j]; - error(e->token, "\t%.*s refers to", LIT(e->token.string)); - } - // NOTE(bill): This will only print if the path count > 1 - error(e->token, "\t%.*s", LIT(e->token.string)); - tp->failure = true; - e->type->failure = true; - base_type(e->type)->failure = true; -} - -bool type_path_push(TypePath *tp, Type *t) { - GB_ASSERT(tp != nullptr); - if (t->kind != Type_Named) { - return false; - } - Entity *e = t->Named.type_name; - - for (isize i = 0; i < tp->path.count; i++) { - Entity *p = tp->path[i]; - if (p == e) { - type_path_print_illegal_cycle(tp, i); - } - } - - array_add(&tp->path, e); - return true; -} - -void type_path_pop(TypePath *tp) { - if (tp != nullptr && tp->path.count > 0) { - array_pop(&tp->path); - } -} - - -#define FAILURE_SIZE 0 -#define FAILURE_ALIGNMENT 0 - - i64 type_size_of_internal (Type *t, TypePath *path); i64 type_align_of_internal(Type *t, TypePath *path); i64 type_size_of(Type *t); @@ -3260,21 +3294,8 @@ i64 type_align_of_internal(Type *t, TypePath *path) { return gb_clamp(next_pow2(type_size_of_internal(t, path)), 1, build_context.max_align); } - case Type_Matrix: { - Type *elem = t->Matrix.elem; - i64 row_count = gb_max(t->Matrix.row_count, 1); - - bool pop = type_path_push(path, elem); - if (path->failure) { - return FAILURE_ALIGNMENT; - } - // elem align is used here rather than size as it make a little more sense - i64 elem_align = type_align_of_internal(elem, path); - if (pop) type_path_pop(path); - - i64 align = gb_min(next_pow2(elem_align * row_count), build_context.max_align); - return align; - } + case Type_Matrix: + return matrix_align_of(t, path); case Type_RelativePointer: return type_align_of_internal(t->RelativePointer.base_integer, path); From c60c7a762157b3d3e34b872550663c4c3989ed19 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 00:47:12 +0100 Subject: [PATCH 36/47] Add comments to `Type_Info_Matrix` --- core/runtime/core.odin | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/runtime/core.odin b/core/runtime/core.odin index ba1e81da6..22e23d79e 100644 --- a/core/runtime/core.odin +++ b/core/runtime/core.odin @@ -165,9 +165,10 @@ Type_Info_Relative_Slice :: struct { Type_Info_Matrix :: struct { elem: ^Type_Info, elem_size: int, - elem_stride: int, + elem_stride: int, // elem_stride >= row_count row_count: int, column_count: int, + // Total element count = column_count * elem_stride } Type_Info_Flag :: enum u8 { From 79ad6f4564e928b166d02c26836f700ea848cb87 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 01:02:38 +0100 Subject: [PATCH 37/47] Remove assert --- core/sys/windows/types.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/sys/windows/types.odin b/core/sys/windows/types.odin index 7fe67e648..3e25a4c18 100644 --- a/core/sys/windows/types.odin +++ b/core/sys/windows/types.odin @@ -916,7 +916,7 @@ USER_INFO_1 :: struct #packed { flags: USER_INFO_FLAGS, script_path: LPWSTR, } -#assert(size_of(USER_INFO_1) == 50) +// #assert(size_of(USER_INFO_1) == 50) LOCALGROUP_MEMBERS_INFO_0 :: struct #packed { sid: ^SID, From d62c701a43b255195b1d0dc2f7d80afa40d2b5fe Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 01:03:16 +0100 Subject: [PATCH 38/47] Improve matrix code generation for all supported platforms Through assembly optimization --- src/llvm_backend_expr.cpp | 29 +++++++++++++++++++++++++++-- src/llvm_backend_utility.cpp | 21 ++++++++++++++++++++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index fa2b0b084..7ae1a7315 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -489,13 +489,32 @@ bool lb_is_matrix_simdable(Type *t) { return false; } + switch (build_context.metrics.arch) { + case TargetArch_amd64: + case TargetArch_arm64: + // possible + break; + case TargetArch_386: + case TargetArch_wasm32: + // nope + return false; + } + if (elem->kind == Type_Basic) { switch (elem->Basic.kind) { case Basic_f16: case Basic_f16le: case Basic_f16be: - // TODO(bill): determine when this is fine - return true; + switch (build_context.metrics.arch) { + case TargetArch_amd64: + return false; + case TargetArch_arm64: + // TODO(bill): determine when this is fine + return true; + case TargetArch_386: + case TargetArch_wasm32: + return false; + } } } @@ -690,6 +709,8 @@ lbValue lb_emit_outer_product(lbProcedure *p, lbValue a, lbValue b, Type *type) } lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { + // TODO(bill): Handle edge case for f16 types on x86(-64) platforms + Type *xt = base_type(lhs.type); Type *yt = base_type(rhs.type); @@ -775,6 +796,8 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) } lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { + // TODO(bill): Handle edge case for f16 types on x86(-64) platforms + Type *mt = base_type(lhs.type); Type *vt = base_type(rhs.type); @@ -843,6 +866,8 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type } lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) { + // TODO(bill): Handle edge case for f16 types on x86(-64) platforms + Type *mt = base_type(rhs.type); Type *vt = base_type(lhs.type); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index e458c0692..af773d467 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1492,7 +1492,26 @@ lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValue c, Type *t b = lb_emit_conv(p, b, t); c = lb_emit_conv(p, c, t); - if (!is_type_different_to_arch_endianness(t) && is_type_float(t)) { + bool is_possible = !is_type_different_to_arch_endianness(t) && is_type_float(t); + + if (is_possible) { + switch (build_context.metrics.arch) { + case TargetArch_amd64: + if (type_size_of(t) == 2) { + is_possible = false; + } + break; + case TargetArch_arm64: + // possible + break; + case TargetArch_386: + case TargetArch_wasm32: + is_possible = false; + break; + } + } + + if (is_possible) { char const *name = "llvm.fma"; unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); GB_ASSERT_MSG(id != 0, "Unable to find %s", name); From aaaddd03a6fc7194fa9315f802e369a0f62b9e07 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 01:28:06 +0100 Subject: [PATCH 39/47] Improve internal procedures --- core/mem/mem.odin | 10 ++-------- core/runtime/internal.odin | 22 +++++++++------------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/core/mem/mem.odin b/core/mem/mem.odin index 708cfffb9..29d124e42 100644 --- a/core/mem/mem.odin +++ b/core/mem/mem.odin @@ -128,14 +128,8 @@ compare_ptrs :: proc "contextless" (a, b: rawptr, n: int) -> int { return compare_byte_ptrs((^byte)(a), (^byte)(b), n) } -ptr_offset :: proc "contextless" (ptr: $P/^$T, n: int) -> P { - new := int(uintptr(ptr)) + size_of(T)*n - return P(uintptr(new)) -} - -ptr_sub :: proc "contextless" (a, b: $P/^$T) -> int { - return (int(uintptr(a)) - int(uintptr(b)))/size_of(T) -} +ptr_offset :: intrinsics.ptr_offset +ptr_sub :: intrinsics.ptr_sub slice_ptr :: proc "contextless" (ptr: ^$T, len: int) -> []T { return ([^]T)(ptr)[:len] diff --git a/core/runtime/internal.odin b/core/runtime/internal.odin index 96944c7f2..4347f28c0 100644 --- a/core/runtime/internal.odin +++ b/core/runtime/internal.odin @@ -2,15 +2,15 @@ package runtime import "core:intrinsics" -bswap_16 :: proc "none" (x: u16) -> u16 { +bswap_16 :: proc "contextless" (x: u16) -> u16 { return x>>8 | x<<8 } -bswap_32 :: proc "none" (x: u32) -> u32 { +bswap_32 :: proc "contextless" (x: u32) -> u32 { return x>>24 | (x>>8)&0xff00 | (x<<8)&0xff0000 | x<<24 } -bswap_64 :: proc "none" (x: u64) -> u64 { +bswap_64 :: proc "contextless" (x: u64) -> u64 { z := x z = (z & 0x00000000ffffffff) << 32 | (z & 0xffffffff00000000) >> 32 z = (z & 0x0000ffff0000ffff) << 16 | (z & 0xffff0000ffff0000) >> 16 @@ -18,7 +18,7 @@ bswap_64 :: proc "none" (x: u64) -> u64 { return z } -bswap_128 :: proc "none" (x: u128) -> u128 { +bswap_128 :: proc "contextless" (x: u128) -> u128 { z := transmute([4]u32)x z[0] = bswap_32(z[3]) z[1] = bswap_32(z[2]) @@ -27,33 +27,27 @@ bswap_128 :: proc "none" (x: u128) -> u128 { return transmute(u128)z } -bswap_f16 :: proc "none" (f: f16) -> f16 { +bswap_f16 :: proc "contextless" (f: f16) -> f16 { x := transmute(u16)f z := bswap_16(x) return transmute(f16)z } -bswap_f32 :: proc "none" (f: f32) -> f32 { +bswap_f32 :: proc "contextless" (f: f32) -> f32 { x := transmute(u32)f z := bswap_32(x) return transmute(f32)z } -bswap_f64 :: proc "none" (f: f64) -> f64 { +bswap_f64 :: proc "contextless" (f: f64) -> f64 { x := transmute(u64)f z := bswap_64(x) return transmute(f64)z } - -ptr_offset :: #force_inline proc "contextless" (ptr: $P/^$T, n: int) -> P { - new := int(uintptr(ptr)) + size_of(T)*n - return P(uintptr(new)) -} - is_power_of_two_int :: #force_inline proc(x: int) -> bool { if x <= 0 { return false @@ -828,12 +822,14 @@ floattidf_unsigned :: proc "c" (a: u128) -> f64 { @(link_name="__fixunsdfti") fixunsdfti :: #force_no_inline proc "c" (a: f64) -> u128 { + // TODO(bill): implement `fixunsdfti` correctly x := u64(a) return u128(x) } @(link_name="__fixunsdfdi") fixunsdfdi :: #force_no_inline proc "c" (a: f64) -> i128 { + // TODO(bill): implement `fixunsdfdi` correctly x := i64(a) return i128(x) } From a440d8d812223961f0934aefecaef4975a604c43 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 13:10:56 +0100 Subject: [PATCH 40/47] Improve use of vector muladd operations --- src/llvm_abi.cpp | 13 ++++++++++- src/llvm_backend.hpp | 4 +++- src/llvm_backend_expr.cpp | 10 ++++---- src/llvm_backend_proc.cpp | 11 +-------- src/llvm_backend_utility.cpp | 44 ++++++++++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+), 18 deletions(-) diff --git a/src/llvm_abi.cpp b/src/llvm_abi.cpp index 8d3d5542f..9e7f4b290 100644 --- a/src/llvm_abi.cpp +++ b/src/llvm_abi.cpp @@ -153,7 +153,18 @@ void lb_add_function_type_attributes(LLVMValueRef fn, lbFunctionType *ft, ProcCa // TODO(bill): Clean up this logic if (!is_arch_wasm()) { cc_kind = lb_calling_convention_map[calling_convention]; - } + } + // if (build_context.metrics.arch == TargetArch_amd64) { + // if (build_context.metrics.os == TargetOs_windows) { + // if (cc_kind == lbCallingConvention_C) { + // cc_kind = lbCallingConvention_Win64; + // } + // } else { + // if (cc_kind == lbCallingConvention_C) { + // cc_kind = lbCallingConvention_X86_64_SysV; + // } + // } + // } LLVMSetFunctionCallConv(fn, cc_kind); if (calling_convention == ProcCC_Odin) { unsigned context_index = offset+arg_count; diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index d2abed354..4aea88f47 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -472,7 +472,7 @@ LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align); -enum lbCallingConventionKind { +enum lbCallingConventionKind : unsigned { lbCallingConvention_C = 0, lbCallingConvention_Fast = 8, lbCallingConvention_Cold = 9, @@ -517,6 +517,8 @@ enum lbCallingConventionKind { lbCallingConvention_AMDGPU_LS = 95, lbCallingConvention_AMDGPU_ES = 96, lbCallingConvention_AArch64_VectorCall = 97, + lbCallingConvention_AArch64_SVE_VectorCall = 98, + lbCallingConvention_WASM_EmscriptenInvoke = 99, lbCallingConvention_MaxID = 1023, }; diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 7ae1a7315..ad2c609ef 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -837,11 +837,10 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type LLVMValueRef vector = nullptr; for (i64 i = 0; i < column_count; i++) { - LLVMValueRef product = llvm_vector_mul(p, m_columns[i], v_rows[i]); if (i == 0) { - vector = product; + vector = llvm_vector_mul(p, m_columns[i], v_rows[i]); } else { - vector = llvm_vector_add(p, vector, product); + vector = llvm_vector_mul_add(p, m_columns[i], v_rows[i], vector); } } @@ -914,11 +913,10 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type LLVMValueRef vector = nullptr; for (i64 i = 0; i < row_count; i++) { - LLVMValueRef product = llvm_vector_mul(p, v_rows[i], m_columns[i]); if (i == 0) { - vector = product; + vector = llvm_vector_mul(p, v_rows[i], m_columns[i]); } else { - vector = llvm_vector_add(p, vector, product); + vector = llvm_vector_mul_add(p, v_rows[i], m_columns[i], vector); } } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 5423ab51b..96bbbcee6 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -127,16 +127,7 @@ lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool ignore_body) lb_ensure_abi_function_type(m, p); lb_add_function_type_attributes(p->value, p->abi_function_type, p->abi_function_type->calling_convention); - if (false) { - lbCallingConventionKind cc_kind = lbCallingConvention_C; - // TODO(bill): Clean up this logic - if (!is_arch_wasm()) { - cc_kind = lb_calling_convention_map[pt->Proc.calling_convention]; - } - LLVMSetFunctionCallConv(p->value, cc_kind); - } - - + if (pt->Proc.diverging) { lb_add_attribute_to_proc(m, p->value, "noreturn"); } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index af773d467..9bb22b50b 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1631,4 +1631,48 @@ LLVMValueRef llvm_vector_mul(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { LLVMValueRef llvm_vector_dot(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) { return llvm_vector_reduce_add(p, llvm_vector_mul(p, a, b)); +} + +LLVMValueRef llvm_vector_mul_add(lbProcedure *p, LLVMValueRef a, LLVMValueRef b, LLVMValueRef c) { + lbModule *m = p->module; + + LLVMTypeRef t = LLVMTypeOf(a); + GB_ASSERT(t == LLVMTypeOf(b)); + GB_ASSERT(t == LLVMTypeOf(c)); + GB_ASSERT(LLVMGetTypeKind(t) == LLVMVectorTypeKind); + + LLVMTypeRef elem = LLVMGetElementType(t); + + bool is_possible = false; + + switch (LLVMGetTypeKind(elem)) { + case LLVMHalfTypeKind: + is_possible = true; + break; + case LLVMFloatTypeKind: + case LLVMDoubleTypeKind: + is_possible = true; + break; + } + + if (is_possible) { + char const *name = "llvm.fmuladd"; + unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", name); + + LLVMTypeRef types[1] = {}; + types[0] = t; + + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, types, gb_count_of(types)); + LLVMValueRef values[3] = {}; + values[0] = a; + values[1] = b; + values[2] = c; + LLVMValueRef call = LLVMBuildCall(p->builder, ip, values, gb_count_of(values), ""); + return call; + } else { + LLVMValueRef x = llvm_vector_mul(p, a, b); + LLVMValueRef y = llvm_vector_add(p, x, c); + return y; + } } \ No newline at end of file From f15825d2c62b15a46eb6b383abbcd4e823b78f12 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 13:32:53 +0100 Subject: [PATCH 41/47] Fix typo --- core/runtime/core_builtin_matrix.odin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/runtime/core_builtin_matrix.odin b/core/runtime/core_builtin_matrix.odin index 1ed19f484..7f74dcfc9 100644 --- a/core/runtime/core_builtin_matrix.odin +++ b/core/runtime/core_builtin_matrix.odin @@ -117,12 +117,12 @@ matrix4x4_adjugate :: proc(x: $M/matrix[4, 4]$T) -> (y: M) { } cofactor :: proc(m: $M/matrix[4, 4]$T, row, column: i32) -> (cofactor: T) { sign: T = 1 if (row + column) % 2 == 0 else -1 - return sign * matrix4x4_minor(m, row, column) + return sign * minor(m, row, column) } for i in 0..<4 { for j in 0..<4 { - y[i, j] = matrix4x4_cofactor(x, i, j) + y[i, j] = cofactor(x, i, j) } } return From 44754546323ffe40785ecb992ee5927b9a6b5341 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 14:59:43 +0100 Subject: [PATCH 42/47] Improve core_builtin_matrix.odin --- core/runtime/core_builtin_matrix.odin | 59 +++++++++++---------------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/core/runtime/core_builtin_matrix.odin b/core/runtime/core_builtin_matrix.odin index 7f74dcfc9..9c62eaba5 100644 --- a/core/runtime/core_builtin_matrix.odin +++ b/core/runtime/core_builtin_matrix.odin @@ -37,13 +37,6 @@ inverse :: proc{ matrix4x4_inverse, } -@(builtin) -hermitian_adjoint :: proc{ - matrix1x1_hermitian_adjoint, - matrix2x2_hermitian_adjoint, - matrix3x3_hermitian_adjoint, - matrix4x4_hermitian_adjoint, -} @(builtin) matrix1x1_determinant :: proc(m: $M/matrix[1, 1]$T) -> (det: T) { @@ -103,26 +96,25 @@ matrix3x3_adjugate :: proc(m: $M/matrix[3, 3]$T) -> (y: M) { } @(builtin) -matrix4x4_adjugate :: proc(x: $M/matrix[4, 4]$T) -> (y: M) { - minor :: proc(m: $M/matrix[4, 4]$T, row, column: i32) -> (minor: T) { - cut_down: matrix[3, 3]T - for col_idx in 0..<3 { - col := col_idx + int(col_idx >= column) - for row_idx in 0..<3 { - row := row_idx + int(row_idx >= row) - cut_down[row_idx, col_idx] = m[row, col] - } +matrix_minor :: proc(m: $M/matrix[$N, N]$T, row, column: int) -> (minor: T) where N > 1 { + K :: N-1 + cut_down: matrix[K, K]T + for col_idx in 0..= column) + for row_idx in 0..= row) + cut_down[row_idx, col_idx] = m[i, j] } - return determinant(cut_down) } - cofactor :: proc(m: $M/matrix[4, 4]$T, row, column: i32) -> (cofactor: T) { - sign: T = 1 if (row + column) % 2 == 0 else -1 - return sign * minor(m, row, column) - } - + return determinant(cut_down) +} + +@(builtin) +matrix4x4_adjugate :: proc(x: $M/matrix[4, 4]$T) -> (y: M) { for i in 0..<4 { for j in 0..<4 { - y[i, j] = cofactor(x, i, j) + sign: T = 1 if (i + j) % 2 == 0 else -1 + y[i, j] = sign * matrix_minor(x, i, j) } } return @@ -268,19 +260,14 @@ matrix4x4_inverse :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check { @(builtin) -matrix1x1_hermitian_adjoint :: proc(m: $M/matrix[1, 1]$T) -> M where intrinsics.type_is_complex(T) { - return conj(transpose(m)) -} -@(builtin) -matrix2x2_hermitian_adjoint :: proc(m: $M/matrix[2, 2]$T) -> M where intrinsics.type_is_complex(T) { - return conj(transpose(m)) -} -@(builtin) -matrix3x3_hermitian_adjoint :: proc(m: $M/matrix[3, 3]$T) -> M where intrinsics.type_is_complex(T) { - return conj(transpose(m)) -} -@(builtin) -matrix4x4_hermitian_adjoint :: proc(m: $M/matrix[4, 4]$T) -> M where intrinsics.type_is_complex(T) { +matrix_hermitian_adjoint :: proc(m: $M/matrix[$N, N]$T) -> M where intrinsics.type_is_complex(T), N >= 1, N <= 4 { return conj(transpose(m)) } +@(builtin) +matrix_trace :: proc(m: $M/matrix[$N, N]$T) -> (trace: T) where N >= 1, N <= 4 { + for i in 0.. Date: Mon, 25 Oct 2021 15:16:34 +0100 Subject: [PATCH 43/47] Minor changes to `where` conditions --- core/runtime/core_builtin_matrix.odin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/runtime/core_builtin_matrix.odin b/core/runtime/core_builtin_matrix.odin index 9c62eaba5..4559dea43 100644 --- a/core/runtime/core_builtin_matrix.odin +++ b/core/runtime/core_builtin_matrix.odin @@ -260,12 +260,12 @@ matrix4x4_inverse :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check { @(builtin) -matrix_hermitian_adjoint :: proc(m: $M/matrix[$N, N]$T) -> M where intrinsics.type_is_complex(T), N >= 1, N <= 4 { +matrix_hermitian_adjoint :: proc(m: $M/matrix[$N, N]$T) -> M where intrinsics.type_is_complex(T), N >= 1 { return conj(transpose(m)) } @(builtin) -matrix_trace :: proc(m: $M/matrix[$N, N]$T) -> (trace: T) where N >= 1, N <= 4 { +matrix_trace :: proc(m: $M/matrix[$N, N]$T) -> (trace: T) { for i in 0.. Date: Mon, 25 Oct 2021 15:35:06 +0100 Subject: [PATCH 44/47] Rename `hermitian_adjoint` --- core/runtime/core_builtin_matrix.odin | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/core/runtime/core_builtin_matrix.odin b/core/runtime/core_builtin_matrix.odin index 4559dea43..548dd6874 100644 --- a/core/runtime/core_builtin_matrix.odin +++ b/core/runtime/core_builtin_matrix.odin @@ -37,6 +37,19 @@ inverse :: proc{ matrix4x4_inverse, } +@(builtin) +hermitian_adjoint :: proc(m: $M/matrix[$N, N]$T) -> M where intrinsics.type_is_complex(T), N >= 1 { + return conj(transpose(m)) +} + +@(builtin) +matrix_trace :: proc(m: $M/matrix[$N, N]$T) -> (trace: T) { + for i in 0.. (det: T) { @@ -257,17 +270,3 @@ matrix4x4_inverse :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check { } return } - - -@(builtin) -matrix_hermitian_adjoint :: proc(m: $M/matrix[$N, N]$T) -> M where intrinsics.type_is_complex(T), N >= 1 { - return conj(transpose(m)) -} - -@(builtin) -matrix_trace :: proc(m: $M/matrix[$N, N]$T) -> (trace: T) { - for i in 0.. Date: Mon, 25 Oct 2021 15:36:00 +0100 Subject: [PATCH 45/47] Reorder code --- core/runtime/core_builtin_matrix.odin | 28 ++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/core/runtime/core_builtin_matrix.odin b/core/runtime/core_builtin_matrix.odin index 548dd6874..32f2e303b 100644 --- a/core/runtime/core_builtin_matrix.odin +++ b/core/runtime/core_builtin_matrix.odin @@ -50,6 +50,21 @@ matrix_trace :: proc(m: $M/matrix[$N, N]$T) -> (trace: T) { return } +@(builtin) +matrix_minor :: proc(m: $M/matrix[$N, N]$T, row, column: int) -> (minor: T) where N > 1 { + K :: N-1 + cut_down: matrix[K, K]T + for col_idx in 0..= column) + for row_idx in 0..= row) + cut_down[row_idx, col_idx] = m[i, j] + } + } + return determinant(cut_down) +} + + @(builtin) matrix1x1_determinant :: proc(m: $M/matrix[1, 1]$T) -> (det: T) { @@ -108,19 +123,6 @@ matrix3x3_adjugate :: proc(m: $M/matrix[3, 3]$T) -> (y: M) { return } -@(builtin) -matrix_minor :: proc(m: $M/matrix[$N, N]$T, row, column: int) -> (minor: T) where N > 1 { - K :: N-1 - cut_down: matrix[K, K]T - for col_idx in 0..= column) - for row_idx in 0..= row) - cut_down[row_idx, col_idx] = m[i, j] - } - } - return determinant(cut_down) -} @(builtin) matrix4x4_adjugate :: proc(x: $M/matrix[4, 4]$T) -> (y: M) { From 7d715fe113c6bffaf004fe21a6e9723913e38bb6 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 16:05:22 +0100 Subject: [PATCH 46/47] Add `ODIN_LLVM_MINIMUM_VERSION_12` --- src/llvm_backend.cpp | 10 +++------- src/llvm_backend.hpp | 12 ++++++++++++ src/llvm_backend_general.cpp | 2 +- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index a853a6224..4d1245c98 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -21,12 +21,6 @@ #include "llvm_backend_stmt.cpp" #include "llvm_backend_proc.cpp" -#if LLVM_VERSION_MAJOR < 11 -#error "LLVM Version 11 is the minimum required" -#elif LLVM_VERSION_MAJOR == 12 && !(LLVM_VERSION_MINOR > 0 || LLVM_VERSION_PATCH > 0) -#error "If LLVM Version 12.x.y is wanted, at least LLVM 12.0.1 is required" -#endif - void lb_add_foreign_library_path(lbModule *m, Entity *e) { if (e == nullptr) { @@ -1214,7 +1208,9 @@ void lb_generate_code(lbGenerator *gen) { // x86-64-v2: (close to Nehalem) CMPXCHG16B, LAHF-SAHF, POPCNT, SSE3, SSE4.1, SSE4.2, SSSE3 // x86-64-v3: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, XSAVE // x86-64-v4: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL - llvm_cpu = "x86-64-v2"; + if (ODIN_LLVM_MINIMUM_VERSION_12) { + llvm_cpu = "x86-64-v2"; + } } // GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target)); diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 4aea88f47..9aa9920f2 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -30,6 +30,18 @@ #include #endif +#if LLVM_VERSION_MAJOR < 11 +#error "LLVM Version 11 is the minimum required" +#elif LLVM_VERSION_MAJOR == 12 && !(LLVM_VERSION_MINOR > 0 || LLVM_VERSION_PATCH > 0) +#error "If LLVM Version 12.x.y is wanted, at least LLVM 12.0.1 is required" +#endif + +#if LLVM_VERSION_MAJOR > 12 || (LLVM_VERSION_MAJOR == 12 && LLVM_VERSION_MINOR >= 0 && LLVM_VERSION_PATCH > 0) +#define ODIN_LLVM_MINIMUM_VERSION_12 1 +#else +#define ODIN_LLVM_MINIMUM_VERSION_12 0 +#endif + struct lbProcedure; struct lbValue { diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 7aa7c7cdd..b1c1f924b 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -2065,7 +2065,7 @@ LLVMAttributeRef lb_create_enum_attribute_with_type(LLVMContextRef ctx, char con unsigned kind = 0; String s = make_string_c(name); - #if (LLVM_VERSION_MAJOR > 12 || (LLVM_VERSION_MAJOR == 12 && (LLVM_VERSION_MINOR > 0 || LLVM_VERSION_PATCH >= 1))) + #if ODIN_LLVM_MINIMUM_VERSION_12 kind = LLVMGetEnumAttributeKindForName(name, s.len); GB_ASSERT_MSG(kind != 0, "unknown attribute: %s", name); return LLVMCreateTypeAttribute(ctx, kind, type); From 12ebd422c61882beb2921310b378e06929f7f22e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 25 Oct 2021 19:04:08 +0100 Subject: [PATCH 47/47] Fix typos --- core/runtime/core_builtin_matrix.odin | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/core/runtime/core_builtin_matrix.odin b/core/runtime/core_builtin_matrix.odin index 32f2e303b..4091e6197 100644 --- a/core/runtime/core_builtin_matrix.odin +++ b/core/runtime/core_builtin_matrix.odin @@ -166,14 +166,14 @@ matrix3x3_inverse_transpose :: proc(x: $M/matrix[3, 3]$T) -> (y: M) #no_bounds_c when intrinsics.type_is_integer(T) { for i in 0..<3 { for j in 0..<3 { - inverse_transpose[i, j] = a[i, j] / d + y[i, j] = a[i, j] / d } } } else { id := 1/d for i in 0..<3 { for j in 0..<3 { - inverse_transpose[i, j] = a[i, j] * id + y[i, j] = a[i, j] * id } } } @@ -190,14 +190,14 @@ matrix4x4_inverse_transpose :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_c when intrinsics.type_is_integer(T) { for i in 0..<4 { for j in 0..<4 { - inverse_transpose[i, j] = a[i, j] / d + y[i, j] = a[i, j] / d } } } else { id := 1/d for i in 0..<4 { for j in 0..<4 { - inverse_transpose[i, j] = a[i, j] * id + y[i, j] = a[i, j] * id } } } @@ -235,14 +235,14 @@ matrix3x3_inverse :: proc(x: $M/matrix[3, 3]$T) -> (y: M) #no_bounds_check { when intrinsics.type_is_integer(T) { for i in 0..<3 { for j in 0..<3 { - inverse_transpose[i, j] = a[j, i] / d + y[i, j] = a[j, i] / d } } } else { id := 1/d for i in 0..<3 { for j in 0..<3 { - inverse_transpose[i, j] = a[j, i] * id + y[i, j] = a[j, i] * id } } } @@ -259,14 +259,14 @@ matrix4x4_inverse :: proc(x: $M/matrix[4, 4]$T) -> (y: M) #no_bounds_check { when intrinsics.type_is_integer(T) { for i in 0..<4 { for j in 0..<4 { - inverse_transpose[i, j] = a[j, i] / d + y[i, j] = a[j, i] / d } } } else { id := 1/d for i in 0..<4 { for j in 0..<4 { - inverse_transpose[i, j] = a[j, i] * id + y[i, j] = a[j, i] * id } } }