Support llvm_vector_reduce_add if the LLVM intrinsic is not supported

This commit is contained in:
gingerBill
2021-10-28 00:57:10 +01:00
parent 0a1ef1e59d
commit 70793236ab
2 changed files with 29 additions and 10 deletions

View File

@@ -2373,7 +2373,9 @@ matrix_type :: proc() {
// Unlike normal arrays, matrices try to maximize alignment to allow for the (SIMD) vectorization
// properties whilst keeping zero padding (either between columns or at the end of the type).
//
// Zero padding is a compromise for use with third-party libraries, instead of optimizing for performance
// Zero padding is a compromise for use with third-party libraries, instead of optimizing for performance.
// Padding between columns was not taken even if that would have allowed each column to be loaded
// individually into a SIMD register with the correct alignment properties.
//
// Currently, matrices are limited to a maximum of 16 elements (rows*columns), and a minimum of 1 element.
// This is because matrices are stored as values (not a reference type), and thus operations on them will

View File

@@ -1567,6 +1567,10 @@ LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) {
LLVMTypeRef type = LLVMTypeOf(value);
GB_ASSERT(LLVMGetTypeKind(type) == LLVMVectorTypeKind);
LLVMTypeRef elem = LLVMGetElementType(type);
unsigned len = LLVMGetVectorSize(type);
if (len == 0) {
return LLVMConstNull(type);
}
char const *name = nullptr;
i32 value_offset = 0;
@@ -1591,17 +1595,30 @@ LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) {
}
unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name));
GB_ASSERT_MSG(id != 0, "Unable to find %s", name);
if (id != 0) {
LLVMTypeRef types[1] = {};
types[0] = type;
LLVMValueRef ip = LLVMGetIntrinsicDeclaration(p->module->mod, id, types, gb_count_of(types));
LLVMValueRef values[2] = {};
values[0] = LLVMConstNull(elem);
values[1] = value;
LLVMValueRef call = LLVMBuildCall(p->builder, ip, values+value_offset, value_count, "");
return call;
}
LLVMTypeRef types[1] = {};
types[0] = type;
// Manual reduce
LLVMValueRef ip = LLVMGetIntrinsicDeclaration(p->module->mod, id, types, gb_count_of(types));
LLVMValueRef values[2] = {};
values[0] = LLVMConstNull(elem);
values[1] = value;
LLVMValueRef call = LLVMBuildCall(p->builder, ip, values+value_offset, value_count, "");
return call;
LLVMValueRef sum = LLVMBuildExtractElement(p->builder, value, lb_const_int(p->module, t_u32, 0).value, "");
for (unsigned i = 0; i < len; i++) {
LLVMValueRef val = LLVMBuildExtractElement(p->builder, value, lb_const_int(p->module, t_u32, i).value, "");
if (LLVMGetTypeKind(elem) == LLVMIntegerTypeKind) {
sum = LLVMBuildAdd(p->builder, sum, val, "");
} else {
sum = LLVMBuildFAdd(p->builder, sum, val, "");
}
}
return sum;
}
LLVMValueRef llvm_vector_add(lbProcedure *p, LLVMValueRef a, LLVMValueRef b) {