Improve matrix->matrix casting implementation

This commit is contained in:
gingerBill
2021-11-08 11:40:41 +00:00
parent 1296fabe2c
commit 23f0fbc376
5 changed files with 59 additions and 32 deletions

View File

@@ -490,15 +490,11 @@ bool lb_is_matrix_simdable(Type *t) {
}
switch (build_context.metrics.arch) {
default:
return false;
case TargetArch_amd64:
case TargetArch_arm64:
// possible
break;
case TargetArch_386:
case TargetArch_wasm32:
case TargetArch_wasm64:
// nope
return false;
}
if (elem->kind == Type_Basic) {
@@ -2018,14 +2014,23 @@ lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) {
i64 src_count = src->Matrix.row_count*src->Matrix.column_count;
GB_ASSERT(dst_count == src_count);
for (i64 j = 0; j < src->Matrix.column_count; j++) {
for (i64 i = 0; i < src->Matrix.row_count; i++) {
lbValue s = lb_emit_matrix_ev(p, value, i, j);
i64 index = i + j*src->Matrix.row_count;
i64 dst_i = index%dst->Matrix.row_count;
i64 dst_j = index/dst->Matrix.row_count;
lbValue d = lb_emit_matrix_epi(p, v.addr, dst_i, dst_j);
lb_emit_store(p, d, s);
lbValue pdst = v.addr;
lbValue psrc = lb_address_from_load_or_generate_local(p, value);
bool same_elem_base_types = are_types_identical(
base_type(dst->Matrix.elem),
base_type(src->Matrix.elem)
);
if (same_elem_base_types && type_size_of(dst) == type_size_of(src)) {
lb_mem_copy_overlapping(p, v.addr, psrc, lb_const_int(p->module, t_int, type_size_of(dst)));
} else {
for (i64 i = 0; i < src_count; i++) {
lbValue dp = lb_emit_array_epi(p, v.addr, matrix_column_major_index_to_offset(dst, i));
lbValue sp = lb_emit_array_epi(p, psrc, matrix_column_major_index_to_offset(src, i));
lbValue s = lb_emit_load(p, sp);
s = lb_emit_conv(p, s, dst->Matrix.elem);
lb_emit_store(p, dp, s);
}
}
}