Improve matrix->matrix casting implementation

This commit is contained in:
gingerBill
2021-11-08 11:40:41 +00:00
parent 1296fabe2c
commit 23f0fbc376
5 changed files with 59 additions and 32 deletions

View File

@@ -18,6 +18,7 @@ enum TargetOsKind {
TargetOs_freebsd,
TargetOs_wasi,
TargetOs_js,
TargetOs_freestanding,
@@ -54,6 +55,7 @@ String target_os_names[TargetOs_COUNT] = {
str_lit("freebsd"),
str_lit("wasi"),
str_lit("js"),
str_lit("freestanding"),
};
@@ -344,12 +346,12 @@ gb_global TargetMetrics target_freestanding_wasm32 = {
str_lit(""),
};
gb_global TargetMetrics target_freestanding_wasm64 = {
TargetOs_freestanding,
TargetArch_wasm64,
gb_global TargetMetrics target_js_wasm32 = {
TargetOs_js,
TargetArch_wasm32,
4,
8,
16,
str_lit("wasm64-freestanding-js"),
str_lit("wasm32-js-js"),
str_lit(""),
};
@@ -363,6 +365,14 @@ gb_global TargetMetrics target_wasi_wasm32 = {
};
// gb_global TargetMetrics target_freestanding_wasm64 = {
// TargetOs_freestanding,
// TargetArch_wasm64,
// 8,
// 16,
// str_lit("wasm64-freestanding-js"),
// str_lit(""),
// };
@@ -372,18 +382,19 @@ struct NamedTargetMetrics {
};
gb_global NamedTargetMetrics named_targets[] = {
{ str_lit("darwin_amd64"), &target_darwin_amd64 },
{ str_lit("darwin_arm64"), &target_darwin_arm64 },
{ str_lit("essence_amd64"), &target_essence_amd64 },
{ str_lit("linux_386"), &target_linux_386 },
{ str_lit("linux_amd64"), &target_linux_amd64 },
{ str_lit("windows_386"), &target_windows_386 },
{ str_lit("windows_amd64"), &target_windows_amd64 },
{ str_lit("freebsd_386"), &target_freebsd_386 },
{ str_lit("freebsd_amd64"), &target_freebsd_amd64 },
{ str_lit("darwin_amd64"), &target_darwin_amd64 },
{ str_lit("darwin_arm64"), &target_darwin_arm64 },
{ str_lit("essence_amd64"), &target_essence_amd64 },
{ str_lit("linux_386"), &target_linux_386 },
{ str_lit("linux_amd64"), &target_linux_amd64 },
{ str_lit("windows_386"), &target_windows_386 },
{ str_lit("windows_amd64"), &target_windows_amd64 },
{ str_lit("freebsd_386"), &target_freebsd_386 },
{ str_lit("freebsd_amd64"), &target_freebsd_amd64 },
{ str_lit("freestanding_wasm32"), &target_freestanding_wasm32 },
{ str_lit("wasi_wasm32"), &target_wasi_wasm32 },
{ str_lit("js_wasm32"), &target_js_wasm32 },
// { str_lit("freestanding_wasm64"), &target_freestanding_wasm64 },
{ str_lit("wasi_wasm32"), &target_wasi_wasm32 },
};
NamedTargetMetrics *selected_target_metrics;

View File

@@ -473,6 +473,10 @@ LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align);
LLVMValueRef llvm_basic_shuffle(lbProcedure *p, LLVMValueRef vector, LLVMValueRef mask);
void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false);
void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false);
#define LB_STARTUP_RUNTIME_PROC_NAME "__$startup_runtime"
#define LB_STARTUP_TYPE_INFO_PROC_NAME "__$startup_type_info"
#define LB_TYPE_INFO_DATA_NAME "__$type_info_data"

View File

@@ -490,15 +490,11 @@ bool lb_is_matrix_simdable(Type *t) {
}
switch (build_context.metrics.arch) {
default:
return false;
case TargetArch_amd64:
case TargetArch_arm64:
// possible
break;
case TargetArch_386:
case TargetArch_wasm32:
case TargetArch_wasm64:
// nope
return false;
}
if (elem->kind == Type_Basic) {
@@ -2018,14 +2014,23 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
i64 src_count = src->Matrix.row_count*src->Matrix.column_count;
GB_ASSERT(dst_count == src_count);
for (i64 j = 0; j < src->Matrix.column_count; j++) {
for (i64 i = 0; i < src->Matrix.row_count; i++) {
lbValue s = lb_emit_matrix_ev(p, value, i, j);
i64 index = i + j*src->Matrix.row_count;
i64 dst_i = index%dst->Matrix.row_count;
i64 dst_j = index/dst->Matrix.row_count;
lbValue d = lb_emit_matrix_epi(p, v.addr, dst_i, dst_j);
lb_emit_store(p, d, s);
lbValue pdst = v.addr;
lbValue psrc = lb_address_from_load_or_generate_local(p, value);
bool same_elem_base_types = are_types_identical(
base_type(dst->Matrix.elem),
base_type(src->Matrix.elem)
);
if (same_elem_base_types && type_size_of(dst) == type_size_of(src)) {
lb_mem_copy_overlapping(p, v.addr, psrc, lb_const_int(p->module, t_int, type_size_of(dst)));
} else {
for (i64 i = 0; i < src_count; i++) {
lbValue dp = lb_emit_array_epi(p, v.addr, matrix_column_major_index_to_offset(dst, i));
lbValue sp = lb_emit_array_epi(p, psrc, matrix_column_major_index_to_offset(src, i));
lbValue s = lb_emit_load(p, sp);
s = lb_emit_conv(p, s, dst->Matrix.elem);
lb_emit_store(p, dp, s);
}
}
}

View File

@@ -1,4 +1,4 @@
void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false) {
void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile) {
dst = lb_emit_conv(p, dst, t_rawptr);
src = lb_emit_conv(p, src, t_rawptr);
len = lb_emit_conv(p, len, t_int);
@@ -27,7 +27,7 @@ void lb_mem_copy_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue l
args[3] = LLVMConstInt(LLVMInt1TypeInContext(p->module->ctx), 0, is_volatile);
LLVMBuildCall(p->builder, ip, args, gb_count_of(args), "");
}
void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile=false) {
void lb_mem_copy_non_overlapping(lbProcedure *p, lbValue dst, lbValue src, lbValue len, bool is_volatile) {
dst = lb_emit_conv(p, dst, t_rawptr);
src = lb_emit_conv(p, src, t_rawptr);
len = lb_emit_conv(p, len, t_int);

View File

@@ -1417,7 +1417,14 @@ i64 matrix_row_major_index_to_offset(Type *t, i64 index) {
i64 row_index = index/t->Matrix.column_count;
return matrix_indices_to_offset(t, row_index, column_index);
}
i64 matrix_column_major_index_to_offset(Type *t, i64 index) {
t = base_type(t);
GB_ASSERT(t->kind == Type_Matrix);
i64 row_index = index%t->Matrix.row_count;
i64 column_index = index/t->Matrix.row_count;
return matrix_indices_to_offset(t, row_index, column_index);
}
bool is_matrix_square(Type *t) {