From 23f0fbc376bc4065d0d9391415e10ea6d9b43d96 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 8 Nov 2021 11:40:41 +0000 Subject: [PATCH] Improve matrix->matrix casting implementation --- src/build_settings.cpp | 41 +++++++++++++++++++++++++-------------- src/llvm_backend.hpp | 4 ++++ src/llvm_backend_expr.cpp | 33 ++++++++++++++++++------------- src/llvm_backend_proc.cpp | 4 ++-- src/types.cpp | 9 ++++++++- 5 files changed, 59 insertions(+), 32 deletions(-) diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 182975d7b..3253457b9 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -18,6 +18,7 @@ enum TargetOsKind { TargetOs_freebsd, TargetOs_wasi, + TargetOs_js, TargetOs_freestanding, @@ -54,6 +55,7 @@ String target_os_names[TargetOs_COUNT] = { str_lit("freebsd"), str_lit("wasi"), + str_lit("js"), str_lit("freestanding"), }; @@ -344,12 +346,12 @@ gb_global TargetMetrics target_freestanding_wasm32 = { str_lit(""), }; -gb_global TargetMetrics target_freestanding_wasm64 = { - TargetOs_freestanding, - TargetArch_wasm64, +gb_global TargetMetrics target_js_wasm32 = { + TargetOs_js, + TargetArch_wasm32, + 4, 8, - 16, - str_lit("wasm64-freestanding-js"), + str_lit("wasm32-js-js"), str_lit(""), }; @@ -363,6 +365,14 @@ gb_global TargetMetrics target_wasi_wasm32 = { }; +// gb_global TargetMetrics target_freestanding_wasm64 = { +// TargetOs_freestanding, +// TargetArch_wasm64, +// 8, +// 16, +// str_lit("wasm64-freestanding-js"), +// str_lit(""), +// }; @@ -372,18 +382,19 @@ struct NamedTargetMetrics { }; gb_global NamedTargetMetrics named_targets[] = { - { str_lit("darwin_amd64"), &target_darwin_amd64 }, - { str_lit("darwin_arm64"), &target_darwin_arm64 }, - { str_lit("essence_amd64"), &target_essence_amd64 }, - { str_lit("linux_386"), &target_linux_386 }, - { str_lit("linux_amd64"), &target_linux_amd64 }, - { str_lit("windows_386"), &target_windows_386 }, - { str_lit("windows_amd64"), &target_windows_amd64 }, - { str_lit("freebsd_386"), &target_freebsd_386 }, - { str_lit("freebsd_amd64"), &target_freebsd_amd64 }, + { str_lit("darwin_amd64"), &target_darwin_amd64 }, + { str_lit("darwin_arm64"), &target_darwin_arm64 }, + { str_lit("essence_amd64"), &target_essence_amd64 }, + { str_lit("linux_386"), &target_linux_386 }, + { str_lit("linux_amd64"), &target_linux_amd64 }, + { str_lit("windows_386"), &target_windows_386 }, + { str_lit("windows_amd64"), &target_windows_amd64 }, + { str_lit("freebsd_386"), &target_freebsd_386 }, + { str_lit("freebsd_amd64"), &target_freebsd_amd64 }, { str_lit("freestanding_wasm32"), &target_freestanding_wasm32 }, + { str_lit("wasi_wasm32"), &target_wasi_wasm32 }, + { str_lit("js_wasm32"), &target_js_wasm32 }, // { str_lit("freestanding_wasm64"), &target_freestanding_wasm64 }, - { str_lit("wasi_wasm32"), &target_wasi_wasm32 }, }; NamedTargetMetrics *selected_target_metrics; diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 87382961b..e70b1f84c 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -473,6 +473,10 @@ LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align); LLVMValueRef llvm_basic_shuffle(lbProcedure *p, LLVMValueRef vector, LLVMValueRef mask); +void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false); +void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false); + + #define LB_STARTUP_RUNTIME_PROC_NAME "__$startup_runtime" #define LB_STARTUP_TYPE_INFO_PROC_NAME "__$startup_type_info" #define LB_TYPE_INFO_DATA_NAME "__$type_info_data" diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 291c5ab66..7f162856c 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -490,15 +490,11 @@ bool lb_is_matrix_simdable(Type *t) { } switch (build_context.metrics.arch) { + default: + return false; case TargetArch_amd64: case TargetArch_arm64: - // possible break; - case TargetArch_386: - case TargetArch_wasm32: - case TargetArch_wasm64: - // nope - return false; } if (elem->kind == Type_Basic) { @@ -2018,14 +2014,23 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { i64 src_count = src->Matrix.row_count*src->Matrix.column_count; GB_ASSERT(dst_count == src_count); - for (i64 j = 0; j < src->Matrix.column_count; j++) { - for (i64 i = 0; i < src->Matrix.row_count; i++) { - lbValue s = lb_emit_matrix_ev(p, value, i, j); - i64 index = i + j*src->Matrix.row_count; - i64 dst_i = index%dst->Matrix.row_count; - i64 dst_j = index/dst->Matrix.row_count; - lbValue d = lb_emit_matrix_epi(p, v.addr, dst_i, dst_j); - lb_emit_store(p, d, s); + lbValue pdst = v.addr; + lbValue psrc = lb_address_from_load_or_generate_local(p, value); + + bool same_elem_base_types = are_types_identical( + base_type(dst->Matrix.elem), + base_type(src->Matrix.elem) + ); + + if (same_elem_base_types && type_size_of(dst) == type_size_of(src)) { + lb_mem_copy_overlapping(p, v.addr, psrc, lb_const_int(p->module, t_int, type_size_of(dst))); + } else { + for (i64 i = 0; i < src_count; i++) { + lbValue dp = lb_emit_array_epi(p, v.addr, matrix_column_major_index_to_offset(dst, i)); + lbValue sp = lb_emit_array_epi(p, psrc, matrix_column_major_index_to_offset(src, i)); + lbValue s = lb_emit_load(p, sp); + s = lb_emit_conv(p, s, dst->Matrix.elem); + lb_emit_store(p, dp, s); } } } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index c7b0374cd..2b094cfae 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1,4 +1,4 @@ -void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false) { +void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile) { dst = lb_emit_conv(p, dst, t_rawptr); src = lb_emit_conv(p, src, t_rawptr); len = lb_emit_conv(p, len, t_int); @@ -27,7 +27,7 @@ void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue l args[3] = LLVMConstInt(LLVMInt1TypeInContext(p->module->ctx), 0, is_volatile); LLVMBuildCall(p->builder, ip, args, gb_count_of(args), ""); } -void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false) { +void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile) { dst = lb_emit_conv(p, dst, t_rawptr); src = lb_emit_conv(p, src, t_rawptr); len = lb_emit_conv(p, len, t_int); diff --git a/src/types.cpp b/src/types.cpp index 52bb2e324..652d383ee 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1417,7 +1417,14 @@ i64 matrix_row_major_index_to_offset(Type *t, i64 index) { i64 row_index = index/t->Matrix.column_count; return matrix_indices_to_offset(t, row_index, column_index); } - +i64 matrix_column_major_index_to_offset(Type *t, i64 index) { + t = base_type(t); + GB_ASSERT(t->kind == Type_Matrix); + + i64 row_index = index%t->Matrix.row_count; + i64 column_index = index/t->Matrix.row_count; + return matrix_indices_to_offset(t, row_index, column_index); +} bool is_matrix_square(Type *t) {