From 3d3785a7f1535cfa213f074704085f42ae02638a Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 6 Nov 2021 17:23:33 +0000 Subject: [PATCH] Remove many LLVM optimization passes which were causes UB due to them assuming C-like behaviour incompatible with Odin --- src/llvm_backend.cpp | 3 +- src/llvm_backend.hpp | 2 + src/llvm_backend_expr.cpp | 12 +++--- src/llvm_backend_general.cpp | 2 +- src/llvm_backend_opt.cpp | 80 ++++++++++++++++++++++++------------ src/llvm_backend_utility.cpp | 25 ++++++++--- 6 files changed, 83 insertions(+), 41 deletions(-) diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index dbe780284..c002bfc5e 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -684,7 +684,8 @@ lbProcedure *lb_create_startup_runtime(lbModule *main_module, lbProcedure *start if (init.value == nullptr) { LLVMTypeRef global_type = LLVMGetElementType(LLVMTypeOf(var->var.value)); if (is_type_untyped_undef(init.type)) { - LLVMSetInitializer(var->var.value, LLVMGetUndef(global_type)); + // LLVMSetInitializer(var->var.value, LLVMGetUndef(global_type)); + LLVMSetInitializer(var->var.value, LLVMConstNull(global_type)); var->is_initialized = true; continue; } else if (is_type_untyped_nil(init.type)) { diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 7a93b7088..87382961b 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -471,6 +471,8 @@ lbValue lb_consume_copy_elision_hint(lbProcedure *p); lbStructFieldRemapping lb_get_struct_remapping(lbModule *m, Type *t); LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align); +LLVMValueRef llvm_basic_shuffle(lbProcedure *p, LLVMValueRef vector, LLVMValueRef mask); + #define LB_STARTUP_RUNTIME_PROC_NAME "__$startup_runtime" #define LB_STARTUP_TYPE_INFO_PROC_NAME "__$startup_type_info" #define LB_TYPE_INFO_DATA_NAME "__$type_info_data" diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 006d396ed..291c5ab66 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -577,7 +577,7 @@ LLVMValueRef lb_matrix_to_trimmed_vector(lbProcedure *p, lbValue m) { } LLVMValueRef mask = lb_matrix_trimmed_vector_mask(p, mt); - LLVMValueRef trimmed_vector = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, ""); + LLVMValueRef trimmed_vector = llvm_basic_shuffle(p, vector, mask); return trimmed_vector; } @@ -608,7 +608,7 @@ lbValue lb_emit_matrix_tranpose(lbProcedure *p, lbValue m, Type *type) { // transpose mask LLVMValueRef mask = LLVMConstVector(mask_elems.data, column_count); - LLVMValueRef row = LLVMBuildShuffleVector(p->builder, vector, LLVMGetUndef(LLVMTypeOf(vector)), mask, ""); + LLVMValueRef row = llvm_basic_shuffle(p, vector, mask); rows[i] = row; } @@ -747,13 +747,13 @@ lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, Type *type) // transpose mask LLVMValueRef mask = LLVMConstVector(mask_elems.data, inner); - LLVMValueRef row = LLVMBuildShuffleVector(p->builder, x_vector, LLVMGetUndef(LLVMTypeOf(x_vector)), mask, ""); + LLVMValueRef row = llvm_basic_shuffle(p, x_vector, mask); x_rows[i] = row; } for (unsigned i = 0; i < outer_columns; i++) { LLVMValueRef mask = llvm_mask_iota(p->module, y_stride*i, inner); - LLVMValueRef column = LLVMBuildShuffleVector(p->builder, y_vector, LLVMGetUndef(LLVMTypeOf(y_vector)), mask, ""); + LLVMValueRef column = llvm_basic_shuffle(p, y_vector, mask); y_columns[i] = column; } @@ -825,7 +825,7 @@ lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbValue rhs, Type for (unsigned column_index = 0; column_index < column_count; column_index++) { LLVMValueRef mask = llvm_mask_iota(p->module, stride*column_index, row_count); - LLVMValueRef column = LLVMBuildShuffleVector(p->builder, matrix_vector, LLVMGetUndef(LLVMTypeOf(matrix_vector)), mask, ""); + LLVMValueRef column = llvm_basic_shuffle(p, matrix_vector, mask); m_columns[column_index] = column; } @@ -901,7 +901,7 @@ lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbValue rhs, Type // transpose mask LLVMValueRef mask = LLVMConstVector(mask_elems.data, column_count); - LLVMValueRef column = LLVMBuildShuffleVector(p->builder, matrix_vector, LLVMGetUndef(LLVMTypeOf(matrix_vector)), mask, ""); + LLVMValueRef column = llvm_basic_shuffle(p, matrix_vector, mask); m_columns[row_index] = column; } diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 68bba2206..b671f0c8f 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1084,7 +1084,7 @@ lbValue lb_addr_load(lbProcedure *p, lbAddr const &addr) { scalars[i] = LLVMConstInt(lb_type(p->module, t_u32), addr.swizzle.indices[i], false); } LLVMValueRef mask = LLVMConstVector(scalars, addr.swizzle.count); - LLVMValueRef sv = LLVMBuildShuffleVector(p->builder, v, LLVMGetUndef(vector_type), mask, ""); + LLVMValueRef sv = llvm_basic_shuffle(p, v, mask); LLVMValueRef dst = LLVMBuildPointerCast(p->builder, ptr.value, LLVMPointerType(LLVMTypeOf(sv), 0), ""); LLVMBuildStore(p->builder, sv, dst); diff --git a/src/llvm_backend_opt.cpp b/src/llvm_backend_opt.cpp index 6c947dcae..94efe793f 100644 --- a/src/llvm_backend_opt.cpp +++ b/src/llvm_backend_opt.cpp @@ -1,3 +1,36 @@ +/************************************************************************** + + IMPORTANT NOTE(bill, 2021-11-06): Regarding Optimization Passes + + A lot of the passes taken here have been modified with what was + partially done in LLVM 11. + + Passes that CANNOT be used by Odin due to C-like optimizations which + are not compatible with Odin: + + LLVMAddCorrelatedValuePropagationPass + LLVMAddAggressiveInstCombinerPass + LLVMAddInstructionCombiningPass + LLVMAddIndVarSimplifyPass + LLVMAddLoopUnrollPass + LLVMAddEarlyCSEMemSSAPass + LLVMAddGVNPass + + Odin does not allow poison-value based optimizations. + + For example, *-flowing integers in C is "undefined behaviour" and thus + many optimizers, including LLVM, take advantage of this for a certain + class of optimizations. Odin on the other hand defines *-flowing + behaviour to obey the rules of 2's complement, meaning wrapping is a + expected. This means any outputted IR containing the following flags + may cause incorrect behaviour: + + nsw (no signed wrap) + nuw (no unsigned wrap) + poison (poison value) +**************************************************************************/ + + void lb_populate_function_pass_manager(lbModule *m, LLVMPassManagerRef fpm, bool ignore_memcpy_pass, i32 optimization_level); void lb_add_function_simplifcation_passes(LLVMPassManagerRef mpm, i32 optimization_level); void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPassManagerRef mpm, i32 optimization_level); @@ -33,10 +66,10 @@ void lb_basic_populate_function_pass_manager(LLVMPassManagerRef fpm) { LLVM_ADD_CONSTANT_VALUE_PASS(fpm); LLVMAddEarlyCSEPass(fpm); - LLVM_ADD_CONSTANT_VALUE_PASS(fpm); - LLVMAddMergedLoadStoreMotionPass(fpm); - LLVMAddPromoteMemoryToRegisterPass(fpm); - LLVMAddCFGSimplificationPass(fpm); + // LLVM_ADD_CONSTANT_VALUE_PASS(fpm); + // LLVMAddMergedLoadStoreMotionPass(fpm); + // LLVMAddPromoteMemoryToRegisterPass(fpm); + // LLVMAddCFGSimplificationPass(fpm); } void lb_populate_function_pass_manager(lbModule *m, LLVMPassManagerRef fpm, bool ignore_memcpy_pass, i32 optimization_level) { @@ -61,6 +94,7 @@ void lb_populate_function_pass_manager(lbModule *m, LLVMPassManagerRef fpm, bool LLVMPassManagerBuilderSetSizeLevel(pmb, optimization_level); LLVMPassManagerBuilderPopulateFunctionPassManager(pmb, fpm); #else + LLVMAddMemCpyOptPass(fpm); lb_basic_populate_function_pass_manager(fpm); LLVMAddSCCPPass(fpm); @@ -116,17 +150,10 @@ void lb_populate_function_pass_manager_specific(lbModule *m, LLVMPassManagerRef } void lb_add_function_simplifcation_passes(LLVMPassManagerRef mpm, i32 optimization_level) { - LLVMAddEarlyCSEMemSSAPass(mpm); - - LLVMAddGVNPass(mpm); LLVMAddCFGSimplificationPass(mpm); LLVMAddJumpThreadingPass(mpm); - // if (optimization_level > 2) { - // LLVMAddAggressiveInstCombinerPass(mpm); - // } - LLVMAddInstructionCombiningPass(mpm); LLVMAddSimplifyLibCallsPass(mpm); LLVMAddTailCallEliminationPass(mpm); @@ -138,23 +165,16 @@ void lb_add_function_simplifcation_passes(LLVMPassManagerRef mpm, i32 optimizati LLVMAddLoopUnswitchPass(mpm); LLVMAddCFGSimplificationPass(mpm); - LLVMAddInstructionCombiningPass(mpm); - LLVMAddIndVarSimplifyPass(mpm); LLVMAddLoopIdiomPass(mpm); LLVMAddLoopDeletionPass(mpm); - LLVMAddLoopUnrollPass(mpm); - LLVMAddMergedLoadStoreMotionPass(mpm); - LLVMAddGVNPass(mpm); - LLVMAddMemCpyOptPass(mpm); LLVMAddSCCPPass(mpm); LLVMAddBitTrackingDCEPass(mpm); - LLVMAddInstructionCombiningPass(mpm); LLVMAddJumpThreadingPass(mpm); LLVM_ADD_CONSTANT_VALUE_PASS(mpm); LLVMAddDeadStoreEliminationPass(mpm); @@ -163,7 +183,6 @@ void lb_add_function_simplifcation_passes(LLVMPassManagerRef mpm, i32 optimizati LLVMAddLoopRerollPass(mpm); LLVMAddAggressiveDCEPass(mpm); LLVMAddCFGSimplificationPass(mpm); - LLVMAddInstructionCombiningPass(mpm); } @@ -191,6 +210,7 @@ void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPa // LLVMPassManagerBuilderPopulateLTOPassManager(pmb, mpm, false, true); // return; } + LLVMAddIPSCCPPass(mpm); LLVMAddCalledValuePropagationPass(mpm); @@ -198,8 +218,6 @@ void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPa LLVMAddGlobalOptimizerPass(mpm); LLVMAddDeadArgEliminationPass(mpm); - // LLVMAddConstantMergePass(mpm); // ??? - LLVMAddInstructionCombiningPass(mpm); LLVMAddCFGSimplificationPass(mpm); LLVMAddPruneEHPass(mpm); @@ -208,25 +226,24 @@ void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPa } LLVMAddFunctionInliningPass(mpm); + lb_add_function_simplifcation_passes(mpm, optimization_level); - + + LLVMAddGlobalDCEPass(mpm); LLVMAddGlobalOptimizerPass(mpm); - - // LLVMAddLowerConstantIntrinsicsPass(mpm); + LLVMAddLoopRotatePass(mpm); LLVMAddLoopVectorizePass(mpm); - - LLVMAddInstructionCombiningPass(mpm); + if (optimization_level >= 2) { LLVMAddEarlyCSEPass(mpm); LLVM_ADD_CONSTANT_VALUE_PASS(mpm); LLVMAddLICMPass(mpm); LLVMAddLoopUnswitchPass(mpm); LLVMAddCFGSimplificationPass(mpm); - LLVMAddInstructionCombiningPass(mpm); } LLVMAddCFGSimplificationPass(mpm); @@ -246,6 +263,15 @@ void lb_populate_module_pass_manager(LLVMTargetMachineRef target_machine, LLVMPa LLVMAddCFGSimplificationPass(mpm); } + + +/************************************************************************** + IMPORTANT NOTE(bill, 2021-11-06): Custom Passes + + The procedures below are custom written passes to aid in the + optimization of Odin programs +**************************************************************************/ + void lb_run_remove_dead_instruction_pass(lbProcedure *p) { isize removal_count = 0; isize pass_count = 0; diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 2ff54342a..46f7a22a3 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1544,6 +1544,19 @@ LLVMValueRef llvm_mask_zero(lbModule *m, unsigned count) { return LLVMConstNull(LLVMVectorType(lb_type(m, t_u32), count)); } +#define LLVM_VECTOR_DUMMY_VALUE(type) LLVMGetUndef((type)) +// #define LLVM_VECTOR_DUMMY_VALUE(type) LLVMConstNull((type)) + + +LLVMValueRef llvm_basic_shuffle(lbProcedure *p, LLVMValueRef vector, LLVMValueRef mask) { + return LLVMBuildShuffleVector(p->builder, vector, LLVM_VECTOR_DUMMY_VALUE(LLVMTypeOf(vector)), mask, ""); +} +LLVMValueRef llvm_basic_const_shuffle(LLVMValueRef vector, LLVMValueRef mask) { + return LLVMConstShuffleVector(vector, LLVM_VECTOR_DUMMY_VALUE(LLVMTypeOf(vector)), mask); +} + + + LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned count) { GB_ASSERT(count > 0); if (LLVMIsConstant(value)) { @@ -1552,7 +1565,7 @@ LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned return single; } LLVMValueRef mask = llvm_mask_zero(p->module, count); - return LLVMConstShuffleVector(single, LLVMGetUndef(LLVMTypeOf(single)), mask); + return llvm_basic_const_shuffle(single, mask); } LLVMTypeRef single_type = LLVMVectorType(LLVMTypeOf(value), 1); @@ -1561,7 +1574,7 @@ LLVMValueRef llvm_vector_broadcast(lbProcedure *p, LLVMValueRef value, unsigned return single; } LLVMValueRef mask = llvm_mask_zero(p->module, count); - return LLVMBuildShuffleVector(p->builder, single, LLVMGetUndef(LLVMTypeOf(single)), mask, ""); + return llvm_basic_shuffle(p, single, mask); } LLVMValueRef llvm_vector_shuffle_reduction(lbProcedure *p, LLVMValueRef value, LLVMOpcode op_code) { @@ -1582,8 +1595,8 @@ LLVMValueRef llvm_vector_shuffle_reduction(lbProcedure *p, LLVMValueRef value, L LLVMValueRef rhs_mask = llvm_mask_iota(p->module, mask_len, mask_len); GB_ASSERT(LLVMTypeOf(lhs_mask) == LLVMTypeOf(rhs_mask)); - LLVMValueRef lhs = LLVMBuildShuffleVector(p->builder, value, LLVMGetUndef(LLVMTypeOf(value)), lhs_mask, ""); - LLVMValueRef rhs = LLVMBuildShuffleVector(p->builder, value, LLVMGetUndef(LLVMTypeOf(value)), rhs_mask, ""); + LLVMValueRef lhs = llvm_basic_shuffle(p, value, lhs_mask); + LLVMValueRef rhs = llvm_basic_shuffle(p, value, rhs_mask); GB_ASSERT(LLVMTypeOf(lhs) == LLVMTypeOf(rhs)); value = LLVMBuildBinOp(p->builder, op_code, lhs, rhs, ""); @@ -1675,8 +1688,8 @@ LLVMValueRef llvm_vector_reduce_add(lbProcedure *p, LLVMValueRef value) { GB_ASSERT(len_pow_2 < len); LLVMValueRef lower_mask = llvm_mask_iota(p->module, 0, len_pow_2); LLVMValueRef upper_mask = llvm_mask_iota(p->module, len_pow_2, len-len_pow_2); - LLVMValueRef lower = LLVMBuildShuffleVector(p->builder, value, LLVMGetUndef(LLVMTypeOf(value)), lower_mask, ""); - LLVMValueRef upper = LLVMBuildShuffleVector(p->builder, value, LLVMGetUndef(LLVMTypeOf(value)), upper_mask, ""); + LLVMValueRef lower = llvm_basic_shuffle(p, value, lower_mask); + LLVMValueRef upper = llvm_basic_shuffle(p, value, upper_mask); upper = llvm_vector_expand_to_power_of_two(p, upper); LLVMValueRef lower_reduced = llvm_vector_shuffle_reduction(p, lower, op_code);