Remove many LLVM optimization passes which were causes UB due to them assuming C-like behaviour incompatible with Odin

This commit is contained in:
gingerBill
2021-11-06 17:23:33 +00:00
parent 5df15b5724
commit 3d3785a7f1
6 changed files with 83 additions and 41 deletions

View File

@@ -684,7 +684,8 @@ lbProcedure *lb_create_startup_runtime(lbModule *main_module, lbProcedure *start
if (init.value == nullptr) {
LLVMTypeRef global_type = LLVMGetElementType(LLVMTypeOf(var->var.value));
if (is_type_untyped_undef(init.type)) {
LLVMSetInitializer(var->var.value, LLVMGetUndef(global_type));
// LLVMSetInitializer(var->var.value, LLVMGetUndef(global_type));
LLVMSetInitializer(var->var.value, LLVMConstNull(global_type));
var->is_initialized = true;
continue;
} else if (is_type_untyped_nil(init.type)) {

View File

@@ -471,6 +471,8 @@ lbValue lb_consume_copy_elision_hint(lbProcedure *p);
lbStructFieldRemapping lb_get_struct_remapping(lbModule *m, Type *t);
LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align);
LLVMValueRef llvm_basic_shuffle(lbProcedure *p, LLVMValueRef vector, LLVMValueRef mask);
#define LB_STARTUP_RUNTIME_PROC_NAME "__$startup_runtime"
#define LB_STARTUP_TYPE_INFO_PROC_NAME "__$startup_type_info"
#define LB_TYPE_INFO_DATA_NAME "__$type_info_data"

View File

@@ -577,7 +577,7 @@ LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) {
}
LLVMValueRef mask = lb_matrix_trimmed_vector_mask(p, mt);
LLVMValueRef trimmed_vector = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, "");
LLVMValueRef trimmed_vector = llvm_basic_shuffle(p, vector, mask);
return trimmed_vector;
}
@@ -608,7 +608,7 @@ lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) {
// transpose mask
LLVMValueRef mask = LLVMConstVector(mask_elems.data, column_count);
LLVMValueRef row = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, "");
LLVMValueRef row = llvm_basic_shuffle(p, vector, mask);
rows[i] = row;
}
@@ -747,13 +747,13 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type)
// transpose mask
LLVMValueRef mask = LLVMConstVector(mask_elems.data, inner);
LLVMValueRef row = LLVMBuildShuffleVector(p->builder, x_vector, LLVMGetUndef(LLVMTypeOf(x_vector)), mask, "");
LLVMValueRef row = llvm_basic_shuffle(p, x_vector, mask);
x_rows[i] = row;
}
for (unsigned i = 0; i < outer_columns; i++) {
LLVMValueRef mask = llvm_mask_iota(p->module, y_stride*i, inner);
LLVMValueRef column = LLVMBuildShuffleVector(p->builder, y_vector, LLVMGetUndef(LLVMTypeOf(y_vector)), mask, "");
LLVMValueRef column = llvm_basic_shuffle(p, y_vector, mask);
y_columns[i] = column;
}
@@ -825,7 +825,7 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type
for (unsigned column_index = 0; column_index < column_count; column_index++) {
LLVMValueRef mask = llvm_mask_iota(p->module, stride*column_index, row_count);
LLVMValueRef column = LLVMBuildShuffleVector(p->builder, matrix_vector, LLVMGetUndef(LLVMTypeOf(matrix_vector)), mask, "");
LLVMValueRef column = llvm_basic_shuffle(p, matrix_vector, mask);
m_columns[column_index] = column;
}
@@ -901,7 +901,7 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type
// transpose mask
LLVMValueRef mask = LLVMConstVector(mask_elems.data, column_count);
LLVMValueRef column = LLVMBuildShuffleVector(p->builder, matrix_vector, LLVMGetUndef(LLVMTypeOf(matrix_vector)), mask, "");
LLVMValueRef column = llvm_basic_shuffle(p, matrix_vector, mask);
m_columns[row_index] = column;
}

View File

@@ -1084,7 +1084,7 @@ lbValue lb_addr_load(lbProcedure *p, lbAddr const &addr) {
scalars[i] = LLVMConstInt(lb_type(p->module, t_u32), addr.swizzle.indices[i], false);
}
LLVMValueRef mask = LLVMConstVector(scalars, addr.swizzle.count);
LLVMValueRef sv = LLVMBuildShuffleVector(p->builder, v, LLVMGetUndef(vector_type), mask, "");
LLVMValueRef sv = llvm_basic_shuffle(p, v, mask);
LLVMValueRef dst = LLVMBuildPointerCast(p->builder, ptr.value, LLVMPointerType(LLVMTypeOf(sv), 0), "");
LLVMBuildStore(p->builder, sv, dst);

View File

@@ -1,3 +1,36 @@
/**************************************************************************
IMPORTANT NOTE(bill, 2021-11-06): Regarding Optimization Passes
A lot of the passes taken here have been modified with what was
partially done in LLVM 11.
Passes that CANNOT be used by Odin due to C-like optimizations which
are not compatible with Odin:
LLVMAddCorrelatedValuePropagationPass
LLVMAddAggressiveInstCombinerPass
LLVMAddInstructionCombiningPass
LLVMAddIndVarSimplifyPass
LLVMAddLoopUnrollPass
LLVMAddEarlyCSEMemSSAPass
LLVMAddGVNPass
Odin does not allow poison-value based optimizations.
For example, *-flowing integers in C is "undefined behaviour" and thus
many optimizers, including LLVM, take advantage of this for a certain
class of optimizations. Odin on the other hand defines *-flowing
behaviour to obey the rules of 2's complement, meaning wrapping is a
expected. This means any outputted IR containing the following flags
may cause incorrect behaviour:
nsw (no signed wrap)
nuw (no unsigned wrap)
poison (poison value)
**************************************************************************/
void lb_populate_function_pass_manager(lbModule *m, LLVMPassManagerRef fpm, bool ignore_memcpy_pass, i32 optimization_level);
void lb_add_function_simplifcation_passes(LLVMPassManagerRef mpm, i32 optimization_level);
void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPassManagerRef mpm, i32 optimization_level);
@@ -33,10 +66,10 @@ void lb_basic_populate_function_pass_manager(LLVMPassManagerRef fpm) {
LLVM_ADD_CONSTANT_VALUE_PASS(fpm);
LLVMAddEarlyCSEPass(fpm);
LLVM_ADD_CONSTANT_VALUE_PASS(fpm);
LLVMAddMergedLoadStoreMotionPass(fpm);
LLVMAddPromoteMemoryToRegisterPass(fpm);
LLVMAddCFGSimplificationPass(fpm);
// LLVM_ADD_CONSTANT_VALUE_PASS(fpm);
// LLVMAddMergedLoadStoreMotionPass(fpm);
// LLVMAddPromoteMemoryToRegisterPass(fpm);
// LLVMAddCFGSimplificationPass(fpm);
}
void lb_populate_function_pass_manager(lbModule *m, LLVMPassManagerRef fpm, bool ignore_memcpy_pass, i32 optimization_level) {
@@ -61,6 +94,7 @@ void lb_populate_function_pass_manager(lbModule *m, LLVMPassManagerRef fpm, bool
LLVMPassManagerBuilderSetSizeLevel(pmb, optimization_level);
LLVMPassManagerBuilderPopulateFunctionPassManager(pmb, fpm);
#else
LLVMAddMemCpyOptPass(fpm);
lb_basic_populate_function_pass_manager(fpm);
LLVMAddSCCPPass(fpm);
@@ -116,17 +150,10 @@ void lb_populate_function_pass_manager_specific(lbModule *m, LLVMPassManagerRef
}
void lb_add_function_simplifcation_passes(LLVMPassManagerRef mpm, i32 optimization_level) {
LLVMAddEarlyCSEMemSSAPass(mpm);
LLVMAddGVNPass(mpm);
LLVMAddCFGSimplificationPass(mpm);
LLVMAddJumpThreadingPass(mpm);
// if (optimization_level > 2) {
// LLVMAddAggressiveInstCombinerPass(mpm);
// }
LLVMAddInstructionCombiningPass(mpm);
LLVMAddSimplifyLibCallsPass(mpm);
LLVMAddTailCallEliminationPass(mpm);
@@ -138,23 +165,16 @@ void lb_add_function_simplifcation_passes(LLVMPassManagerRef mpm, i32 optimizati
LLVMAddLoopUnswitchPass(mpm);
LLVMAddCFGSimplificationPass(mpm);
LLVMAddInstructionCombiningPass(mpm);
LLVMAddIndVarSimplifyPass(mpm);
LLVMAddLoopIdiomPass(mpm);
LLVMAddLoopDeletionPass(mpm);
LLVMAddLoopUnrollPass(mpm);
LLVMAddMergedLoadStoreMotionPass(mpm);
LLVMAddGVNPass(mpm);
LLVMAddMemCpyOptPass(mpm);
LLVMAddSCCPPass(mpm);
LLVMAddBitTrackingDCEPass(mpm);
LLVMAddInstructionCombiningPass(mpm);
LLVMAddJumpThreadingPass(mpm);
LLVM_ADD_CONSTANT_VALUE_PASS(mpm);
LLVMAddDeadStoreEliminationPass(mpm);
@@ -163,7 +183,6 @@ void lb_add_function_simplifcation_passes(LLVMPassManagerRef mpm, i32 optimizati
LLVMAddLoopRerollPass(mpm);
LLVMAddAggressiveDCEPass(mpm);
LLVMAddCFGSimplificationPass(mpm);
LLVMAddInstructionCombiningPass(mpm);
}
@@ -191,6 +210,7 @@ void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPa
// LLVMPassManagerBuilderPopulateLTOPassManager(pmb, mpm, false, true);
// return;
}
LLVMAddIPSCCPPass(mpm);
LLVMAddCalledValuePropagationPass(mpm);
@@ -198,8 +218,6 @@ void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPa
LLVMAddGlobalOptimizerPass(mpm);
LLVMAddDeadArgEliminationPass(mpm);
// LLVMAddConstantMergePass(mpm); // ???
LLVMAddInstructionCombiningPass(mpm);
LLVMAddCFGSimplificationPass(mpm);
LLVMAddPruneEHPass(mpm);
@@ -208,25 +226,24 @@ void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPa
}
LLVMAddFunctionInliningPass(mpm);
lb_add_function_simplifcation_passes(mpm, optimization_level);
LLVMAddGlobalDCEPass(mpm);
LLVMAddGlobalOptimizerPass(mpm);
// LLVMAddLowerConstantIntrinsicsPass(mpm);
LLVMAddLoopRotatePass(mpm);
LLVMAddLoopVectorizePass(mpm);
LLVMAddInstructionCombiningPass(mpm);
if (optimization_level >= 2) {
LLVMAddEarlyCSEPass(mpm);
LLVM_ADD_CONSTANT_VALUE_PASS(mpm);
LLVMAddLICMPass(mpm);
LLVMAddLoopUnswitchPass(mpm);
LLVMAddCFGSimplificationPass(mpm);
LLVMAddInstructionCombiningPass(mpm);
}
LLVMAddCFGSimplificationPass(mpm);
@@ -246,6 +263,15 @@ void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPa
LLVMAddCFGSimplificationPass(mpm);
}
/**************************************************************************
IMPORTANT NOTE(bill, 2021-11-06): Custom Passes
The procedures below are custom written passes to aid in the
optimization of Odin programs
**************************************************************************/
void lb_run_remove_dead_instruction_pass(lbProcedure *p) {
isize removal_count = 0;
isize pass_count = 0;

View File

@@ -1544,6 +1544,19 @@ LLVMValueRef llvm_mask_zero(lbModule *m, unsigned count) {
return LLVMConstNull(LLVMVectorType(lb_type(m, t_u32), count));
}
#define LLVM_VECTOR_DUMMY_VALUE(type) LLVMGetUndef((type))
// #define LLVM_VECTOR_DUMMY_VALUE(type) LLVMConstNull((type))
LLVMValueRef llvm_basic_shuffle(lbProcedure *p, LLVMValueRef vector, LLVMValueRef mask) {
return LLVMBuildShuffleVector(p->builder, vector, LLVM_VECTOR_DUMMY_VALUE(LLVMTypeOf(vector)), mask, "");
}
LLVMValueRef llvm_basic_const_shuffle(LLVMValueRef vector, LLVMValueRef mask) {
return LLVMConstShuffleVector(vector, LLVM_VECTOR_DUMMY_VALUE(LLVMTypeOf(vector)), mask);
}
LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned count) {
GB_ASSERT(count > 0);
if (LLVMIsConstant(value)) {
@@ -1552,7 +1565,7 @@ LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned
return single;
}
LLVMValueRef mask = llvm_mask_zero(p->module, count);
return LLVMConstShuffleVector(single, LLVMGetUndef(LLVMTypeOf(single)), mask);
return llvm_basic_const_shuffle(single, mask);
}
LLVMTypeRef single_type = LLVMVectorType(LLVMTypeOf(value), 1);
@@ -1561,7 +1574,7 @@ LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned
return single;
}
LLVMValueRef mask = llvm_mask_zero(p->module, count);
return LLVMBuildShuffleVector(p->builder, single, LLVMGetUndef(LLVMTypeOf(single)), mask, "");
return llvm_basic_shuffle(p, single, mask);
}
LLVMValueRef llvm_vector_shuffle_reduction(lbProcedure *p, LLVMValueRef value, LLVMOpcode op_code) {
@@ -1582,8 +1595,8 @@ LLVMValueRef llvm_vector_shuffle_reduction(lbProcedure *p, LLVMValueRef value, L
LLVMValueRef rhs_mask = llvm_mask_iota(p->module, mask_len, mask_len);
GB_ASSERT(LLVMTypeOf(lhs_mask) == LLVMTypeOf(rhs_mask));
LLVMValueRef lhs = LLVMBuildShuffleVector(p->builder, value, LLVMGetUndef(LLVMTypeOf(value)), lhs_mask, "");
LLVMValueRef rhs = LLVMBuildShuffleVector(p->builder, value, LLVMGetUndef(LLVMTypeOf(value)), rhs_mask, "");
LLVMValueRef lhs = llvm_basic_shuffle(p, value, lhs_mask);
LLVMValueRef rhs = llvm_basic_shuffle(p, value, rhs_mask);
GB_ASSERT(LLVMTypeOf(lhs) == LLVMTypeOf(rhs));
value = LLVMBuildBinOp(p->builder, op_code, lhs, rhs, "");
@@ -1675,8 +1688,8 @@ LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) {
GB_ASSERT(len_pow_2 < len);
LLVMValueRef lower_mask = llvm_mask_iota(p->module, 0, len_pow_2);
LLVMValueRef upper_mask = llvm_mask_iota(p->module, len_pow_2, len-len_pow_2);
LLVMValueRef lower = LLVMBuildShuffleVector(p->builder, value, LLVMGetUndef(LLVMTypeOf(value)), lower_mask, "");
LLVMValueRef upper = LLVMBuildShuffleVector(p->builder, value, LLVMGetUndef(LLVMTypeOf(value)), upper_mask, "");
LLVMValueRef lower = llvm_basic_shuffle(p, value, lower_mask);
LLVMValueRef upper = llvm_basic_shuffle(p, value, upper_mask);
upper = llvm_vector_expand_to_power_of_two(p, upper);
LLVMValueRef lower_reduced = llvm_vector_shuffle_reduction(p, lower, op_code);