diff --git a/gen_c_library/c_library.refactor b/gen_c_library/c_library.refactor index 20f3073..4e1d247 100644 --- a/gen_c_library/c_library.refactor +++ b/gen_c_library/c_library.refactor @@ -420,6 +420,10 @@ word make_code, gen_make_code namespace set_allocator_, gen_set_allocator_ +word register_macro, gen_register_macro +word register_macros, gen_register_macros +word register_macros_arr, gen_register_macros_arr + namespace Opts_, gen_Opts_ namespace def_, gen_def_ diff --git a/scripts/build.ci.ps1 b/scripts/build.ci.ps1 index 8234477..32fa51b 100644 --- a/scripts/build.ci.ps1 +++ b/scripts/build.ci.ps1 @@ -326,7 +326,7 @@ if ( $unreal ) } # C Library testing -if ( $test -and $true ) +if ( $test -and $false ) { $path_test_c = join-path $path_test c_library $path_build = join-path $path_test_c build @@ -368,7 +368,49 @@ if ( $test -and $true ) Pop-Location } -if ($test -and $true) +if ( $test -and $true ) +{ + $path_test_c = join-path $path_test c_library + $path_build = join-path $path_test_c build + $path_gen = join-path $path_test_c gen + if ( -not(Test-Path($path_build) )) { + New-Item -ItemType Directory -Path $path_build + } + if ( -not(Test-Path($path_gen) )) { + New-Item -ItemType Directory -Path $path_gen + } + + $path_singleheader_include = join-path $path_c_library gen + $includes = @( $path_singleheader_include ) + $unit = join-path $path_test_c "test_cuik.c" + $executable = join-path $path_build "test_cuik.exe" + + $compiler_args = @() + $compiler_args += ( $flag_define + 'GEN_TIME' ) + $compiler_args += $flag_all_c + $compiler_args += $flag_updated_cpp_macro + $compiler_args += $flag_c11 + + $linker_args = @( + $flag_link_win_subsystem_console + ) + + $result = build-simple $path_build $includes $compiler_args $linker_args $unit $executable + + Push-Location $path_test_c + if ( Test-Path( $executable ) ) { + write-host "`nRunning c_library test" + $time_taken = Measure-Command { & $executable + | ForEach-Object { + write-host `t $_ -ForegroundColor Green + } + } + write-host "`nc_library generator completed in $($time_taken.TotalMilliseconds) ms" + } + Pop-Location +} + +if ($test -and $false) { $path_test_cpp = join-path $path_test cpp_library $path_build = join-path $path_test_cpp build diff --git a/test/c_library/Cuik/LICENSE.txt b/test/c_library/Cuik/LICENSE.txt new file mode 100644 index 0000000..3cca8d4 --- /dev/null +++ b/test/c_library/Cuik/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Yasser Arguelles Snape + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/test/c_library/Cuik/tb/opt/passes.h b/test/c_library/Cuik/tb/opt/passes.h new file mode 100644 index 0000000..0754982 --- /dev/null +++ b/test/c_library/Cuik/tb/opt/passes.h @@ -0,0 +1,454 @@ +#pragma once +#include "../tb_internal.h" +#include +#include + +enum { + INT_WIDEN_LIMIT = 3, + FAST_IDOM_LIMIT = 20 +}; + +#if TB_PACKED_USERS +#define USERN(u) ((TB_Node*) ((u)->_n)) // node +#define USERI(u) ((int) ((u)->_slot)) // index +#else +#define USERN(u) ((u)->_n) // node +#define USERI(u) ((u)->_slot) // index +#endif + +#define FOR_USERS(u, n) for (TB_User *u = (n)->users, *_end_ = &u[(n)->user_count]; u != _end_; u++) + +//////////////////////////////// +// Constant prop +//////////////////////////////// +typedef struct { + int64_t min, max; + // for known bit analysis + uint64_t known_zeros, known_ones; + // we really don't wanna widen 18 quintillion times, it's never worth it + uint64_t widen; +} LatticeInt; + +// Represents the fancier type system within the optimizer, it's +// all backed by my shitty understanding of lattice theory +struct Lattice { + enum { + LATTICE_BOT, // bot ^ x = bot + LATTICE_TOP, // top ^ x = x + + LATTICE_INT, + LATTICE_TUPLE, + + // float (each float type has it's own separate set of these btw): + // + // top + // / \ + // / \ + // / \ + // / \ + // /|\ /|\ + // / | \ / | \ + // N N N 0.0 1.5 ... # fltcon + // \ | / \ | / + // \|/ \|/ + // nan ~nan + // \ / + // \ / + // \ / + // \ / + // flt + // + // N means NaN it's just too long to write in the diagram + LATTICE_FLT32, LATTICE_FLT64, // bottom types for floats + LATTICE_NAN32, LATTICE_NAN64, + LATTICE_XNAN32, LATTICE_XNAN64, + LATTICE_FLTCON32, LATTICE_FLTCON64, // _f32 and _f64 + + // pointers: + // anyptr + // / \ + // / \ + // / /|\ + // | / | \ + // null a b ... # ptrcon + // | \ | / + // \ ~null + // \ / + // allptr + LATTICE_ALLPTR, + LATTICE_ANYPTR, + LATTICE_NULL, + LATTICE_XNULL, + LATTICE_PTRCON, + + // memory types + LATTICE_MEMORY, + + // control tokens: + // top + // | + // dead + // | + // live + // | + // bot + LATTICE_LIVE, + LATTICE_DEAD, + } tag; + union { + size_t _elem_count; // LATTICE_TUPLE + LatticeInt _int; // LATTICE_INT + TB_Symbol* _ptr; // LATTICE_PTRCON + float _f32; // LATTICE_FLTCON32 + double _f64; // LATTICE_FLTCON64 + }; + union { + Lattice* elems[0]; + }; +}; + +//////////////////////////////// +// Cool properties +//////////////////////////////// +uint32_t cfg_flags(TB_Node* n); +bool cfg_is_region(TB_Node* n); +bool cfg_is_natural_loop(TB_Node* n); +bool cfg_is_branch(TB_Node* n); +bool cfg_is_fork(TB_Node* n); +bool cfg_is_terminator(TB_Node* n); +bool cfg_is_endpoint(TB_Node* n); + +bool tb_node_is_safepoint(TB_Node* n); +bool tb_node_has_mem_out(TB_Node* n); +TB_Node* tb_node_mem_in(TB_Node* n); + +//////////////////////////////// +// CFG +//////////////////////////////// +typedef struct { + TB_Node *phi, *n; + int dst, src; +} PhiVal; + +//////////////////////////////// +// Core optimizer +//////////////////////////////// +typedef struct { + TB_Module* mod; + NL_HashSet visited; + + size_t ws_cap; + size_t ws_cnt; + TB_Function** ws; +} IPOSolver; + +static bool cant_signed_overflow(TB_Node* n) { + return TB_NODE_GET_EXTRA_T(n, TB_NodeBinopInt)->ab & TB_ARITHMATIC_NSW; +} + +static bool is_proj(TB_Node* n) { + return n->type == TB_PROJ || n->type == TB_MACH_PROJ || n->type == TB_BRANCH_PROJ; +} + +static uint64_t tb__mask(uint64_t bits) { + return ~UINT64_C(0) >> (64 - bits); +} + +static bool cfg_is_cproj(TB_Node* n) { + return is_proj(n) && n->dt.type == TB_TAG_CONTROL; +} + +static bool cfg_is_mproj(TB_Node* n) { + return n->type == TB_PROJ && n->dt.type == TB_TAG_MEMORY; +} + +// includes tuples which have control flow +static bool cfg_is_control(TB_Node* n) { + if (n->dt.type == TB_TAG_CONTROL) { return true; } + if (n->dt.type == TB_TAG_TUPLE) { + FOR_USERS(u, n) { + if (cfg_is_cproj(USERN(u))) { return true; } + } + } + return false; +} + +static bool cfg_is_bb_entry(TB_Node* n) { + if (cfg_is_region(n)) { + return true; + } else if (cfg_is_cproj(n) && (n->inputs[0]->type == TB_ROOT || cfg_is_fork(n->inputs[0]))) { + // Start's control proj or a branch target + return true; + } else { + return false; + } +} + +// returns a BranchProj's falsey proj, if it's an if-like TB_BRANCH +static TB_NodeBranchProj* cfg_if_branch(TB_Node* n) { + size_t succ_count = 0; + if (n->type == TB_BRANCH || n->type == TB_AFFINE_LATCH) { + TB_NodeBranch* br = TB_NODE_GET_EXTRA(n); + succ_count = br->succ_count; + } else if (cfg_is_branch(n)) { + FOR_USERS(u, n) { + if (USERN(u)->type == TB_BRANCH_PROJ) { succ_count++; } + } + } else { + tb_todo(); + } + + if (succ_count != 2) { return NULL; } + FOR_USERS(u, n) { + if (USERN(u)->type == TB_BRANCH_PROJ) { + TB_NodeBranchProj* proj = TB_NODE_GET_EXTRA(USERN(u)); + if (proj->index == 1) { return proj; } + } + } + + // shouldn't be reached wtf? + return NULL; +} + +static bool is_mem_out_op(TB_Node* n) { + return n->dt.type == TB_TAG_MEMORY || (n->type >= TB_STORE && n->type <= TB_ATOMIC_CAS) || (n->type >= TB_CALL && n->type <= TB_TAILCALL) || n->type == TB_SPLITMEM || n->type == TB_MERGEMEM || n->type == TB_DEBUG_LOCATION; +} + +static bool is_mem_end_op(TB_Node* n) { + return n->type == TB_RETURN || n->type == TB_TRAP || n->type == TB_UNREACHABLE; +} + +static bool is_mem_in_op(TB_Node* n) { + return is_mem_out_op(n) || n->type == TB_SAFEPOINT || n->type == TB_LOAD; +} + +static bool is_mem_only_in_op(TB_Node* n) { + return n->type == TB_SAFEPOINT || n->type == TB_LOAD; +} + +static bool single_use(TB_Node* n) { + return n->user_count == 1; +} + +static TB_User* get_single_use(TB_Node* n) { + return n->user_count == 1 ? &n->users[0] : NULL; +} + +static bool tb_node_is_pinned(TB_Node* n) { + if ((n->type >= TB_ROOT && n->type <= TB_SAFEPOINT) || is_proj(n) || cfg_is_control(n)) { + return true; + } + + return cfg_flags(n) & NODE_PINNED; +} + +//////////////////////////////// +// CFG analysis +//////////////////////////////// +// if we see a branch projection, it may either be a BB itself +// or if it enters a REGION directly, then that region is the BB. +static TB_Node* cfg_next_bb_after_cproj(TB_Node* proj) { + return proj; +} + +static TB_User* proj_with_index(TB_Node* n, int i) { + FOR_USERS(u, n) if (is_proj(USERN(u))) { + TB_NodeProj* p = TB_NODE_GET_EXTRA(USERN(u)); + if (p->index == i) { return u; } + } + + return NULL; +} + +static TB_User* cfg_next_user(TB_Node* n) { + FOR_USERS(u, n) { + if (cfg_is_control(USERN(u))) { return u; } + } + + return NULL; +} + +static bool cfg_has_phis(TB_Node* n) { + if (!cfg_is_region(n)) { return false; } + FOR_USERS(u, n) { + if (USERN(u)->type == TB_PHI) { return true; } + } + return false; +} + +static bool cfg_is_unreachable(TB_Node* n) { + FOR_USERS(u, n) { + if (USERN(u)->type == TB_UNREACHABLE) { return true; } + } + + return false; +} + +static TB_Node* cfg_next_control(TB_Node* n) { + FOR_USERS(u, n) { + if (cfg_is_control(USERN(u))) { return USERN(u); } + } + + return NULL; +} + +static TB_Node* cfg_get_pred(TB_CFG* cfg, TB_Node* n, int i) { + n = n->inputs[i]; + for (;;) { + ptrdiff_t search = nl_map_get(cfg->node_to_block, n); + if (search >= 0 || n->type == TB_DEAD || cfg_is_region(n)) { + return n; + } + + n = n->inputs[0]; + } +} + +static TB_BasicBlock* cfg_get_pred_bb(TB_CFG* cfg, TB_Node* n, int i) { + n = n->inputs[i]; + for (;;) { + ptrdiff_t search = nl_map_get(cfg->node_to_block, n); + if (search >= 0) { + return cfg->node_to_block[search].v; + } else if (n->type == TB_DEAD || cfg_is_region(n)) { + return NULL; + } + + n = n->inputs[0]; + } +} + +// shorthand because we use it a lot +static TB_Node* idom(TB_CFG* cfg, TB_Node* n) { + TB_ASSERT(cfg->node_to_block == NULL); + ptrdiff_t search = nl_map_get(cfg->node_to_block, n); + if (search < 0) { + return NULL; + } + + TB_BasicBlock* dom = cfg->node_to_block[search].v->dom; + return dom ? dom->start : NULL; +} + +static int dom_depth(TB_CFG* cfg, TB_Node* n) { + return nl_map_get_checked(cfg->node_to_block, n)->dom_depth; +} + +static bool slow_dommy2(TB_BasicBlock* expected_dom, TB_BasicBlock* bb) { + while (bb->dom_depth > expected_dom->dom_depth) { + bb = bb->dom; + } + return bb == expected_dom; +} + +static bool slow_dommy(TB_CFG* cfg, TB_Node* expected_dom, TB_Node* bb) { + TB_BasicBlock* a = nl_map_get_checked(cfg->node_to_block, expected_dom); + TB_BasicBlock* b = nl_map_get_checked(cfg->node_to_block, bb); + return slow_dommy2(a, b); +} + + +//////////////////////////////// +// Unordered SoN successor iterator +//////////////////////////////// +#define FOR_SUCC(it, n) for (SuccIter it = succ_iter(n); succ_iter_next(&it);) + +typedef struct { + TB_Node* n; + TB_Node* succ; + int index; // -1 if we're not walking CProjs +} SuccIter; + +static SuccIter succ_iter(TB_Node* n) { + if (n->dt.type == TB_TAG_TUPLE) { + return (SuccIter){ n, NULL, 0 }; + } else if (!cfg_is_endpoint(n)) { + return (SuccIter){ n, NULL, -1 }; + } else { + return (SuccIter){ n, NULL, n->user_count }; + } +} + +static bool succ_iter_next(SuccIter* restrict it) { + TB_Node* n = it->n; + + // not branching? ok pick single next control + if (it->index == -1) { + it->index = n->user_count; // terminate + it->succ = cfg_next_control(n); + return true; + } + + // if we're in this loop, we know we're scanning for CProjs + while (it->index < n->user_count) { + TB_Node* un = USERN(&n->users[it->index++]); + if (cfg_is_cproj(un)) { + it->succ = un; + return true; + } + } + + return false; +} + +// lovely properties +bool cfg_is_region(TB_Node* n); +bool cfg_is_natural_loop(TB_Node* n); +bool cfg_is_terminator(TB_Node* n); +bool cfg_is_endpoint(TB_Node* n); + +// internal debugging mostly +void tb_print_dumb_node(Lattice** types, TB_Node* n); + +// computes basic blocks but also dominators and loop nests if necessary. +TB_CFG tb_compute_cfg(TB_Function* f, TB_Worklist* ws, TB_Arena* tmp_arena, bool dominators); +void tb_free_cfg(TB_CFG* cfg); + +// TB_Worklist API +void worklist_alloc(TB_Worklist* restrict ws, size_t initial_cap); +void worklist_free(TB_Worklist* restrict ws); +void worklist_clear(TB_Worklist* restrict ws); +void worklist_clear_visited(TB_Worklist* restrict ws); +bool worklist_test(TB_Worklist* restrict ws, TB_Node* n); +bool worklist_test_n_set(TB_Worklist* restrict ws, TB_Node* n); +void worklist_push(TB_Worklist* restrict ws, TB_Node* restrict n); +int worklist_count(TB_Worklist* ws); +TB_Node* worklist_pop(TB_Worklist* ws); + +void subsume_node(TB_Function* f, TB_Node* n, TB_Node* new_n); +void subsume_node2(TB_Function* f, TB_Node* n, TB_Node* new_n); +void subsume_node_without_phis(TB_Function* f, TB_Node* n, TB_Node* new_n); +void tb__gvn_remove(TB_Function* f, TB_Node* n); + +// Scheduler's cost model crap (talk about these in codegen_impl.h) +typedef int (*TB_GetLatency)(TB_Function* f, TB_Node* n, TB_Node* end); +typedef uint64_t (*TB_GetUnitMask)(TB_Function* f, TB_Node* n); + +// Local scheduler +void tb_list_scheduler(TB_Function* f, TB_CFG* cfg, TB_Worklist* ws, DynArray(PhiVal*) phi_vals, TB_BasicBlock* bb, TB_GetLatency get_lat, TB_GetUnitMask get_unit_mask, int unit_count); +void tb_greedy_scheduler(TB_Function* f, TB_CFG* cfg, TB_Worklist* ws, DynArray(PhiVal*) phi_vals, TB_BasicBlock* bb); +void tb_dataflow(TB_Function* f, TB_Arena* arena, TB_CFG cfg); + +// Global scheduler +void tb_clear_anti_deps(TB_Function* f, TB_Worklist* ws); +void tb_renumber_nodes(TB_Function* f, TB_Worklist* ws); +void tb_compact_nodes(TB_Function* f, TB_Worklist* ws); +void tb_global_schedule(TB_Function* f, TB_Worklist* ws, TB_CFG cfg, bool early_only, bool dataflow, TB_GetLatency get_lat); +void tb_compute_synthetic_loop_freq(TB_Function* f, TB_CFG* cfg); + +// BB placement +int bb_placement_rpo(TB_Arena* arena, TB_CFG* cfg, int* dst_order); +int bb_placement_trace(TB_Arena* arena, TB_CFG* cfg, int* dst_order); + +// makes arch-friendly IR +void tb_opt_legalize(TB_Function* f, TB_Arch arch); +int tb_opt_peeps(TB_Function* f); +int tb_opt_locals(TB_Function* f); + +// Integrated IR debugger +void tb_integrated_dbg(TB_Function* f, TB_Node* n); + +Lattice* latuni_get(TB_Function* f, TB_Node* n); + +void tb__print_regmask(RegMask* mask); + diff --git a/test/c_library/test_cuik.c b/test/c_library/test_cuik.c new file mode 100644 index 0000000..3a965f5 --- /dev/null +++ b/test/c_library/test_cuik.c @@ -0,0 +1,35 @@ +#define GEN_IMPLEMENTATION +#define GEN_DEFINE_LIBRARY_CODE_CONSTANTS +#include "gen_singleheader.h" + +#define gen_iterator( Type, container, iter ) \ + gen_begin_ ## Type(container); \ + iter != gen_end_ ## Type(container); \ + code = gen_next_ ## Type(container, iter) + +int main() +{ + gen_Context ctx = {0}; + gen_init(& ctx); + + gen_register_macros( args( + ((gen_Macro){ txt("USERN"), MT_Expression, MF_Functional }), + ((gen_Macro){ txt("USERI"), MT_Expression, MF_Functional }), + ((gen_Macro){ txt("USERI"), MT_Expression, MF_Functional }), + ((gen_Macro){ txt("FOR_USERS"), MT_Statement, MF_Functional }), + ((gen_Macro){ txt("FOR_SUCC"), MT_Statement, MF_Functional }) + )); + + gen_CodeBody h_passes = gen_parse_file("Cuik/tb/opt/passes.h"); + for (gen_Code code = gen_iterator(CodeBody, h_passes, code)) switch (code->Type) { + case CT_Struct: + case CT_Function: + case CT_Variable: + case CT_Typedef: + gen_log_fmt("%S: %S", gen_codetype_to_str(code->Type), code->Name); + break; + } + + gen_deinit(& ctx); + return 0; +}