/* WATL Exercise Version: 0 (From Scratch, 1-Stage Compilation, LLVM & WinAPI Only, Win CRT Multi-threaded Static Linkage) Host: Windows 11 (x86-64) Toolchain: LLVM (2025-08-30), C-Stanard: 11 Based on: Neokineogfx - Fixing C https://youtu.be/RrL7121MOeA */ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-const-variable" #pragma clang diagnostic ignored "-Wunused-but-set-variable" #pragma clang diagnostic ignored "-Wswitch" #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wunknown-pragmas" #pragma clang diagnostic ignored "-Wvarargs" #pragma clang diagnostic ignored "-Wunused-function" #pragma clang diagnostic ignored "-Wbraced-scalar-init" #pragma clang diagnostic ignored "-W#pragma-messages" #pragma clang diagnostic ignored "-Wstatic-in-inline" #pragma clang diagnostic ignored "-Wkeyword-macro" #pragma clang diagnostic ignored "-Wc23-compat" #pragma clang diagnostic ignored "-Wreserved-identifier" #pragma clang diagnostic ignored "-Wpre-c11-compat" #pragma clang diagnostic ignored "-Wc23-extensions" #pragma clang diagnostic ignored "-Wunused-macros" #pragma clang diagnostic ignored "-Wdeclaration-after-statement" #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" #pragma clang diagnostic ignored "-Wc++-keyword" #pragma clang diagnostic ignored "-Wimplicit-function-declaration" #pragma clang diagnostic ignored "-Wcast-align" #pragma clang diagnostic ignored "-Wunused-parameter" #pragma clang diagnostic ignored "-Wswitch-default" #pragma clang diagnostic ignored "-Wmissing-field-initializers" #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" #pragma clang diagnostic ignored "-Wpointer-sign" #pragma region Header #pragma region DSL #if 0 // Original macros #define A_(x) __attribute__((aligned (x))) #define E_(x,y) __builtin_expect(x,y) #define S_ static #define I_ static inline __attribute__((always_inline)) #define N_ static __attribute__((noinline)) #define R_ __restrict #define V_ volatile // #define W_ __attribute((__stdcall__)) __attribute__((__force_align_arg_pointer__)) #endif // Ones I'm deciding to use.. #define align_(value) __attribute__((aligned (value))) // for easy alignment #define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path #define finline static inline __attribute__((always_inline)) // force inline #define noinline static __attribute__((noinline)) // force no inline [used in thread api] #define R_ __restrict // pointers are either restricted or volatile and nothing else #define V_ volatile // pointers are either restricted or volatile and nothing else // #define W_ __attribute((__stdcall__)) __attribute__((__force_align_arg_pointer__)) #define glue_impl(A, B) A ## B #define glue(A, B) glue_impl(A, B) #define stringify_impl(S) #S #define stringify(S) stringify_impl(S) #define tmpl(prefix, type) prefix ## _ ## type #define local_persist static #define global static #define static_assert _Static_assert #define typeof __typeof__ #define typeof_ptr(ptr) typeof(ptr[0]) #define typeof_same(a, b) _Generic((a), typeof((b)): 1, default: 0) #define def_R_(type) type* restrict type ## _R #define def_V_(type) type* volatile type ## _V #define def_ptr_set(type) def_R_(type); typedef def_V_(type) #define def_tset(type) type; typedef def_ptr_set(type) typedef __UINT8_TYPE__ def_tset(U1); typedef __UINT16_TYPE__ def_tset(U2); typedef __UINT32_TYPE__ def_tset(U4); typedef __UINT64_TYPE__ def_tset(U8); typedef __INT8_TYPE__ def_tset(S1); typedef __INT16_TYPE__ def_tset(S2); typedef __INT32_TYPE__ def_tset(S4); typedef __INT64_TYPE__ def_tset(S8); typedef unsigned char def_tset(B1); typedef __UINT16_TYPE__ def_tset(B2); typedef __UINT32_TYPE__ def_tset(B4); typedef float def_tset(F4); typedef double def_tset(F8); typedef float V4_F4 __attribute__((vector_size(16))); typedef def_ptr_set(V4_F4); enum { false = 0, true = 1, true_overflow, }; #define u1_r(value) cast(U1_R, value) #define u2_r(value) cast(U2_R, value) #define u4_r(value) cast(U4_R, value) #define u8_r(value) cast(U8_R, value) #define u1_v(value) cast(U1_V, value) #define u2_v(value) cast(U2_V, value) #define u4_v(value) cast(U4_V, value) #define u8_v(value) cast(U8_V, value) #define u1_(value) cast(U1, value) #define u2_(value) cast(U2, value) #define u4_(value) cast(U4, value) #define u8_(value) cast(U8, value) #define s1_(value) cast(S1, value) #define s2_(value) cast(S2, value) #define s4_(value) cast(S4, value) #define s8_(value) cast(S8, value) #define f4_(value) cast(F4, value) #define f8_(value) cast(F8, value) #define farray_len(array) (SSIZE)sizeof(array) / size_of( typeof((array)[0])) #define farray_init(type, ...) (type[]){__VA_ARGS__} #define def_farray_sym(_type, _len) A ## _len ## _ ## _type #define def_farray_impl(_type, _len) _type def_farray_sym(_type, _len)[_len]; typedef def_ptr_set(def_farray_sym(_type, _len)) #define def_farray(type, len) def_farray_impl(type, len) #define def_enum(underlying_type, symbol) underlying_type def_tset(symbol); enum symbol #define def_struct(symbol) struct symbol def_tset(symbol); struct symbol #define def_union(symbol) union symbol def_tset(symbol); union symbol #define def_proc(symbol) symbol #define opt_args(symbol, ...) &(symbol){__VA_ARGS__} #define alignas _Alignas #define alignof _Alignof #define cast(type, data) ((type)(data)) #define pcast(type, data) * cast(type*, & (data)) #define nullptr cast(void*, 0) #define null cast(U8, 0) #define offset_of(type, member) cast(U8, & (((type*) 0)->member)) #define size_of(data) cast(U8, sizeof(data)) #define kilo(n) (cast(U8, n) << 10) #define mega(n) (cast(U8, n) << 20) #define giga(n) (cast(U8, n) << 30) #define tera(n) (cast(U8, n) << 40) // Signed stuff (still diff flavor from Lottes) #define sop_1(op, a, b) cast(U1, s1_(a) op s1_(b)) #define sop_2(op, a, b) cast(U2, s2_(a) op s2_(b)) #define sop_4(op, a, b) cast(U4, s4_(a) op s4_(b)) #define sop_8(op, a, b) cast(U8, s8_(a) op s8_(b)) #define def_signed_op(id, op, width) finline U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); } #define def_signed_ops(id, op) def_signed_op(id, op, 1) def_signed_op(id, op, 2) def_signed_op(id, op, 4) def_signed_op(id, op, 8) def_signed_ops(add, +) def_signed_ops(sub, -) def_signed_ops(mut, *) def_signed_ops(div, /) def_signed_ops(gt, >) def_signed_ops(lt, <) def_signed_ops(ge, >=) def_signed_ops(le, <=) #define def_generic_sop(op, a, ...) _Generic((a), U1: op ## _s1, U2: op ## _s2, U4: op ## _s4, U8: op ## _s8) (a, __VA_ARGS__) #define add_s(a,b) def_generic_sop(add,a,b) #define sub_s(a,b) def_generic_sop(sub,a,b) #define mut_s(a,b) def_generic_sop(mut,a,b) #define gt_s(a,b) def_generic_sop(gt, a,b) #define lt_s(a,b) def_generic_sop(lt, a,b) #define ge_s(a,b) def_generic_sop(ge, a,b) #define le_s(a,b) def_generic_sop(le, a,b) finline U4 AtmAdd_u4 (U4_R a, U4 v){__asm__ volatile("lock xaddl %0,%1":"=r"(v),"=m"(*a):"0"(v),"m"(*a):"memory","cc");return v;} finline U8 AtmAdd_u8 (U8_R a, U8 v){__asm__ volatile("lock xaddq %0,%1":"=r"(v),"=m"(*a):"0"(v),"m"(*a):"memory","cc");return v;} finline U4 AtmSwap_u4(U4_R a, U4 v){__asm__ volatile("lock xchgl %0,%1":"=r"(v),"=m"(*a):"0"(v),"m"(*a):"memory","cc");return v;} finline U8 AtmSwap_u8(U8_R a, U8 v){__asm__ volatile("lock xchgq %0,%1":"=r"(v),"=m"(*a):"0"(v),"m"(*a):"memory","cc");return v;} #pragma endregion DSL #pragma region Strings typedef unsigned char def_tset(UTF8); typedef def_struct(Str8) { UTF8*R_ ptr; U8 len; }; typedef Str8 def_tset(Slice_UTF8); typedef def_struct(Slice_Str8) { Str8*R_ ptr; U8 len; }; #define lit(string_literal) (Str8){ (UTF8*R_) string_literal, size_of(string_literal) - 1 } #pragma endregion Strings #pragma region Debug #define debug_trap() __debugbreak() #define assert_trap(cond) do { if (cond) __debug_trap(); } while(0) #define assert_msg(cond, msg, ...) do { \ if (! (cond)) \ { \ assert_handler( \ stringify(cond), \ __FILE__, \ __func__, \ cast(S4, __LINE__), \ msg, \ ## __VA_ARGS__); \ debug_trap(); \ } \ } while(0) void assert_handler(UTF8*R_ condition, UTF8*R_ file, UTF8*R_ function, S4 line, UTF8*R_ msg, ... ); #pragma endregion Debug #pragma region Memory typedef def_farray(B1, 1); typedef def_farray(B1, 2); typedef def_farray(B1, 4); typedef def_farray(B1, 8); inline U8 align_pow2(U8 x, U8 b); #define align_struct(type_width) ((U8)(((type_width) + 7) / 8 * 8)) #define assert_bounds(point, start, end) do { \ assert(start <= point); \ assert(point <= end); \ } while(0) U8 mem_copy (U8 dest, U8 src, U8 length); U8 mem_copy_overlapping(U8 dest, U8 src, U8 length); B4 mem_zero (U8 dest, U8 length); finline void BarC(void){__asm__ volatile("::""memory");} // Compiler Barrier finline void BarM(void){__builtin_ia32_mfence();} // Memory Barrier finline void BarR(void){__builtin_ia32_lfence();} // Read Barrier finline void BarW(void){__builtin_ia32_sfence();} // Write Barrier #define check_nil(nil, p) ((p) == 0 || (p) == nil) #define set_nil(nil, p) ((p) = nil) #define sll_stack_push_n(f, n, next) do { (n)->next = (f); (f) = (n); } while(0) #define sll_queue_push_nz(nil, f, l, n, next) \ ( \ check_nil(nil, f) ? ( \ (f) = (l) = (n), \ set_nil(nil, (n)->next) \ ) \ : ( \ (l)->next=(n), \ (l) = (n), \ set_nil(nil,(n)->next) \ ) \ ) #define sll_queue_push_n(f, l, n, next) sll_queue_push_nz(0, f, l, n, next) typedef def_struct(Slice_Mem) { U8 ptr; U8 len; }; #define slice_mem(ptr, len) (Slice_Mem){ptr, len} #define def_Slice(type) def_struct(tmpl(Slice,type)) { type*R_ ptr; U8 len; }; typedef def_ptr_set(tmpl(Slice,type)) #define slice_assert(slice) do { assert((slice).ptr != 0); assert((slice).len > 0); } while(0) #define slice_end(slice) ((slice).ptr + (slice).len) #define size_of_slice_type(slice) size_of( * (slice).ptr ) typedef def_Slice(void); typedef def_Slice(B1); #define slice_byte(slice) ((Slice_B1){cast(B1, (slice).ptr), (slice).len * size_of_slice_type(slice)}) #define slice_fmem(mem) ((Slice_B1){ mem, size_of(mem) }) void slice__copy(Slice_B1 dest, U8 dest_typewidth, Slice_B1 src, U8 src_typewidth); void slice__zero(Slice_B1 mem, U8 typewidth); #define slice_copy(dest, src) do { \ static_assert(typeof_same(dest, src)); \ slice__copy(slice_byte(dest), size_of_slice_type(dest), slice_byte(src), size_of_slice_type(src)); \ } while (0) #define slice_zero(slice) slice__zero(slice_byte(slice), size_of_slice_type(slice)) #define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != slice_end(container); ++ iter #define slice_arg_from_array(type, ...) & (tmpl(Slice,type)) { .ptr = farray_init(type, __VA_ARGS__), .len = farray_len( farray_init(type, __VA_ARGS__)) } #define span_iter(type, iter, m_begin, op, m_end) \ tmpl(Iter_Span,type) iter = { \ .r = {(m_begin), (m_end)}, \ .cursor = (m_begin) }; \ iter.cursor op iter.r.end; \ ++ iter.cursor #define def_span(type) \ def_struct(tmpl( Span,type)) { type begin; type end; }; \ typedef def_struct(tmpl(Iter_Span,type)) { tmpl(Span,type) r; type cursor; } typedef def_span(B1); typedef def_span(U4); typedef def_span(U8); #pragma endregion Memory #pragma region Math #define min(A, B) (((A) < (B)) ? (A) : (B)) #define max(A, B) (((A) > (B)) ? (A) : (B)) #define clamp_bot(X, B) max(X, B) #pragma endregion Math #pragma region Allocator Interface typedef def_enum(U4, AllocatorOp) { AllocatorOp_Alloc_NoZero = 0, // If Alloc exist, so must No_Zero AllocatorOp_Alloc, AllocatorOp_Free, AllocatorOp_Reset, AllocatorOp_Grow_NoZero, AllocatorOp_Grow, AllocatorOp_Shrink, AllocatorOp_Rewind, AllocatorOp_SavePoint, AllocatorOp_Query, // Must always be implemented }; typedef def_enum(U4, AllocatorQueryFlags) { AllocatorQuery_Alloc = (1 << 0), AllocatorQuery_Free = (1 << 1), // Wipe the allocator's state AllocatorQuery_Reset = (1 << 2), // Supports both grow and shrink AllocatorQuery_Shrink = (1 << 4), AllocatorQuery_Grow = (1 << 5), AllocatorQuery_Resize = AllocatorQuery_Grow | AllocatorQuery_Shrink, // Ability to rewind to a save point (ex: arenas, stack), must also be able to save such a point AllocatorQuery_Rewind = (1 << 6), }; typedef struct AllocatorProc_In def_tset(AllocatorProc_In); typedef struct AllocatorProc_Out def_tset(AllocatorProc_Out); typedef struct AllocatorSP AllocatorSP; typedef void def_proc(AllocatorProc) (AllocatorProc_In In, AllocatorProc_Out_R Out); struct AllocatorSP { AllocatorProc* type_sig; U8 slot; }; struct AllocatorProc_In { U8 data; U8 requested_size; U8 alignment; union { Slice_Mem old_allocation; AllocatorSP save_point; }; AllocatorOp op; A4_B1 _PAD_; }; struct AllocatorProc_Out { union { Slice_Mem allocation; AllocatorSP save_point; }; AllocatorQueryFlags features; A4_B1 _PAD_; U8 left; // Contiguous memory left U8 max_alloc; U8 min_alloc; B4 continuity_break; // Whether this allocation broke continuity with the previous (address space wise) A4_B1 _PAD_2; }; typedef def_struct(AllocatorInfo) { AllocatorProc* proc; U8 data; }; static_assert(size_of(AllocatorSP) <= size_of(Slice_Mem)); typedef def_struct(AllocatorQueryInfo) { AllocatorSP save_point; AllocatorQueryFlags features; A4_B1 _PAD_; U8 left; // Contiguous memory left U8 max_alloc; U8 min_alloc; B4 continuity_break; // Whether this allocation broke continuity with the previous (address space wise) A4_B1 _PAD_2; }; static_assert(size_of(AllocatorProc_Out) == size_of(AllocatorQueryInfo)); #define MEMORY_ALIGNMENT_DEFAULT (2 * size_of(void*)) AllocatorQueryInfo allocator_query(AllocatorInfo ainfo); void mem_free (AllocatorInfo ainfo, Slice_Mem mem); void mem_reset (AllocatorInfo ainfo); void mem_rewind (AllocatorInfo ainfo, AllocatorSP save_point); AllocatorSP mem_save_point(AllocatorInfo ainfo); typedef def_struct(Opts_mem_alloc) { U8 alignment; B4 no_zero; A4_B1 _PAD_; }; typedef def_struct(Opts_mem_grow) { U8 alignment; B4 no_zero; A4_B1 _PAD_; }; typedef def_struct(Opts_mem_shrink) { U8 alignment; }; typedef def_struct(Opts_mem_resize) { U8 alignment; B4 no_zero; A4_B1 _PAD_; }; Slice_Mem mem__alloc (AllocatorInfo ainfo, U8 size, Opts_mem_alloc_R opts); Slice_Mem mem__grow (AllocatorInfo ainfo, Slice_Mem mem, U8 size, Opts_mem_grow_R opts); Slice_Mem mem__resize(AllocatorInfo ainfo, Slice_Mem mem, U8 size, Opts_mem_resize_R opts); Slice_Mem mem__shrink(AllocatorInfo ainfo, Slice_Mem mem, U8 size, Opts_mem_shrink_R opts); #define mem_alloc(ainfo, size, ...) mem__alloc (ainfo, size, opt_args(Opts_mem_alloc, __VA_ARGS__)) #define mem_grow(ainfo, mem, size, ...) mem__grow (ainfo, mem, size, opt_args(Opts_mem_grow, __VA_ARGS__)) #define mem_resize(ainfo, mem, size, ...) mem__resize(ainfo, mem, size, opt_args(Opts_mem_resize, __VA_ARGS__)) #define mem_shrink(ainfo, mem, size, ...) mem__shrink(ainfo, mem, size, opt_args(Opts_mem_shrink, __VA_ARGS__)) #define alloc_type(ainfo, type, ...) (type*R_) mem__alloc(ainfo, size_of(type), opt_args(Opts_mem_alloc, __VA_ARGS__)).ptr #define alloc_slice(ainfo, type, num, ...) (tmpl(Slice,type)){ mem__alloc(ainfo, size_of(type) * num, opt_args(Opts_mem_alloc, __VA_ARGS__)).ptr, num } #pragma endregion Allocator Interface #pragma region FArena (Fixed-Sized Arena) typedef def_struct(Opts_farena) { Str8 type_name; U8 alignment; }; typedef def_struct(FArena) { U8 start; U8 capacity; U8 used; }; typedef def_ptr_set(FArena); FArena farena_make (Slice_Mem mem); void farena_init (FArena_R arena, Slice_Mem byte); Slice_Mem farena__push (FArena_R arena, U8 amount, U8 type_width, Opts_farena*R_ opts); void farena_reset (FArena_R arena); void farena_rewind(FArena_R arena, AllocatorSP save_point); AllocatorSP farena_save (FArena arena); void farena_allocator_proc(AllocatorProc_In in, AllocatorProc_Out_R out); #define ainfo_farena(arena) (AllocatorInfo){ .proc = farena_allocator_proc, .data = & arena } #define farena_push_mem(arena, amount, ...) farena__push(arena, amount, 1, opt_args(Opts_farena, lit(stringify(B1)), __VA_ARGS__)) #define farena_push(arena, type, ...) \ cast(type*, farena__push(arena, size_of(type), 1, opt_args(Opts_farena, lit(stringify(type)), __VA_ARGS__))).ptr #define farena_push_array(arena, type, amount, ...) \ (Slice ## type){ farena__push(arena, size_of(type), amount, opt_args(Opts_farena, lit(stringify(type)), __VA_ARGS__)).ptr, amount } #pragma endregion FArena #pragma region OS finline U8 Clk (void){U8 aa,dd;__asm__ volatile("rdtsc":"=a"(aa),"=d"(dd));return aa;} finline void Pause(void){__asm__ volatile("pause":::"memory");} typedef def_struct(OS_SystemInfo) { U8 target_page_size; }; typedef def_struct(Opts_vmem) { U8 base_addr; B4 no_large_pages; A4_B1 _PAD_; }; void os_init(void); OS_SystemInfo* os_system_info(void); inline B4 os__vmem_commit (U8 vm, U8 size, Opts_vmem*R_ opts); inline U8 os__vmem_reserve( U8 size, Opts_vmem*R_ opts); inline void os_vmem_release (U8 vm, U8 size); #define os_vmem_reserve(size, ...) os__vmem_reserve( size, opt_args(Opts_vmem, __VA_ARGS__)) #define os_vmem_commit(vm, size, ...) os__vmem_commit (vm, size, opt_args(Opts_vmem, __VA_ARGS__)) #pragma endregion OS #pragma region VArena (Virutal Address Space Arena) typedef Opts_farena Opts_varena; typedef def_enum(U4, VArenaFlags) { VArenaFlag_NoLargePages = (1 << 0), }; typedef def_struct(VArena) { U8 reserve_start; U8 reserve; U8 commit_size; U8 committed; U8 commit_used; VArenaFlags flags; A4_B1 _PAD; }; typedef def_struct(Opts_varena_make) { U8 base_addr; U8 reserve_size; U8 commit_size; VArenaFlags flags; A4_B1 _PAD_; }; VArena_R varena__make(Opts_varena_make*R_ opts); #define varena_make(...) varena__make(opt_args(Opts_varena_make, __VA_ARGS__)) Slice_Mem varena__push (VArena_R arena, U8 amount, U8 type_width, Opts_varena*R_ opts); void varena_release(VArena_R arena); void varena_rewind (VArena_R arena, AllocatorSP save_point); void varena_reset (VArena_R arena); Slice_Mem varena__shrink(VArena_R arena, Slice_Mem old_allocation, U8 requested_size, Opts_varena*R_ opts); AllocatorSP varena_save (VArena_R arena); void varena_allocator_proc(AllocatorProc_In in, AllocatorProc_Out_R out); #define ainfo_varena(varena) (AllocatorInfo) { .proc = & varena_allocator_proc, .data = varena } #define varena_push_mem(arena, amount, ...) varena__push(arena, amount, 1, opt_args(Opts_varena, lit(stringify(B1)), __VA_ARGS__)) #define varena_push(arena, type, ...) \ cast(type*R_, varena__push(arena, 1, size_of(type), opt_args(Opts_varena, lit(stringify(type)), __VA_ARGS__) ).ptr) #define varena_push_array(arena, type, amount, ...) \ (tmpl(Slice,type)){ varena__push(arena, size_of(type), amount, opt_args(Opts_varena, lit(stringify(type)), __VA_ARGS__)).ptr, amount } #pragma endregion VArena #pragma region Arena (Casey-Ryan Composite Arenas) typedef Opts_varena Opts_arena; typedef def_enum(U4, ArenaFlags) { ArenaFlag_NoLargePages = (1 << 0), ArenaFlag_NoChain = (1 << 1), }; typedef def_struct(Arena) { VArena_R backing; Arena_R prev; Arena_R current; U8 base_pos; U8 pos; ArenaFlags flags; A4_B1 _PAD_; }; typedef Opts_varena_make Opts_arena_make; Arena_R arena__make (Opts_arena_make*R_ opts); Slice_Mem arena__push (Arena_R arena, U8 amount, U8 type_width, Opts_arena*R_ opts); void arena_release(Arena_R arena); void arena_reset (Arena_R arena); void arena_rewind (Arena_R arena, AllocatorSP save_point); AllocatorSP arena_save (Arena_R arena); void arena_allocator_proc(AllocatorProc_In in, AllocatorProc_Out_R out); #define ainfo_arena(arena) (AllocatorInfo){ .proc = & arena_allocator_proc, .data = arena } #define arena_make(...) arena__make(opt_args(Opts_arena_make, __VA_ARGS__)) #define arena_push_mem(arena, amount, ...) arena__push(arena, amount, 1, opt_args(Opts_arena, lit(stringify(B1)), __VA_ARGS__)) #define arena_push(arena, type, ...) \ cast(type*R_, arena__push(arena, 1, size_of(type), opt_args(Opts_arena, lit(stringify(type)), __VA_ARGS__) ).ptr) #define arena_push_array(arena, type, amount, ...) \ (tmpl(Slice,type)){ arena__push(arena, size_of(type), amount, opt_args(Opts_arena, lit(stringify(type)), __VA_ARGS__)).ptr, amount } #pragma endregion Arena #pragma region Hashing finline void hash64_djb8(U8_R hash, Slice_Mem bytes) { U8 elem = bytes.ptr; loop: hash[0] <<= 8; hash[0] += hash[0]; hash[0] += elem; if (elem != bytes.ptr + bytes.len) goto end; ++ elem; goto loop; end: return; } #pragma endregion Hashing #pragma region Key Table 1-Layer Linear (KT1L) #define def_KT1L_Slot(type) \ def_struct(tmpl(KT1L_Slot,type)) { \ U8 key; \ type value; \ } #define def_KT1L(type) \ def_Slice(tmpl(KT1L_Slot,type)); \ typedef tmpl(Slice_KT1L_Slot,type) tmpl(KT1L,type) typedef Slice_Mem KT1L_Byte; typedef def_struct(KT1L_Meta) { U8 slot_size; U8 kt_value_offset; U8 type_width; Str8 type_name; }; void kt1l__populate_slice_a2(KT1L_Byte*R_ kt, AllocatorInfo backing, KT1L_Meta m, Slice_Mem values, U8 num_values ); #define kt1l_populate_slice_a2(type, kt, ainfo, values) kt1l__populate_slice_a2( \ cast(KT1L_Byte*R_, kt), \ ainfo, \ (KT1L_Meta){ \ .slot_size = size_of(tmpl(KT1L_Slot,type)), \ .kt_value_offset = offset_of(tmpl(KT1L_Slot,type), value), \ .type_width = size_of(type), \ .type_name = lit(stringify(type)) \ }, \ slice_byte(values), (values).len \ ) #pragma endregion KT1L #pragma region Key Table 1-Layer Chained-Chunked-Cells (KT1CX) #define def_KT1CX_Slot(type) \ def_struct(tmpl(KT1CX_Slot,type)) { \ type value; \ U8 key; \ B4 occupied; \ A4_B1 _PAD_; \ } #define def_KT1CX_Cell(type, depth) \ def_struct(tmpl(KT1CX_Cell,type)) { \ tmpl(KT1CX_Slot,type) slots[depth]; \ tmpl(KT1CX_Slot,type)*R_ next; \ } #define def_KT1CX(type) \ def_struct(tmpl(KT1CX,type)) { \ tmpl(Slice_KT1CX_Cell,type) cell_pool; \ tmpl(Slice_KT1CX_Cell,type) table; \ } typedef def_struct(KT1CX_Byte_Slot) { U8 key; B4 occupied; A4_B1 _PAD_; }; typedef def_struct(KT1CX_Byte_Cell) { U8 next; }; typedef def_struct(KT1CX_Byte) { Slice_Mem cell_pool; Slice_Mem table; }; typedef def_struct(KT1CX_ByteMeta) { U8 slot_size; U8 slot_key_offset; U8 cell_next_offset; U8 cell_depth; U8 cell_size; U8 type_width; Str8 type_name; }; typedef def_struct(KT1CX_InfoMeta) { U8 cell_pool_size; U8 table_size; U8 slot_size; U8 slot_key_offset; U8 cell_next_offset; U8 cell_depth; U8 cell_size; U8 type_width; Str8 type_name; }; typedef def_struct(KT1CX_Info) { AllocatorInfo backing_table; AllocatorInfo backing_cells; }; void kt1cx_init (KT1CX_Info info, KT1CX_InfoMeta m, KT1CX_Byte*R_ result); void kt1cx_clear (KT1CX_Byte kt, KT1CX_ByteMeta meta); U8 kt1cx_slot_id(KT1CX_Byte kt, U8 key, KT1CX_ByteMeta meta); U8 kt1cx_get (KT1CX_Byte kt, U8 key, KT1CX_ByteMeta meta); U8 kt1cx_set (KT1CX_Byte kt, U8 key, Slice_Mem value, AllocatorInfo backing_cells, KT1CX_ByteMeta meta); #define kt1cx_assert(kt) do { \ slice_assert(kt.cell_pool); \ slice_assert(kt.table); \ } while(0) #define kt1cx_byte(kt) (KT1CX_Byte){slice_byte(kt.cell_pool), { cast(U8, kt.table.ptr), kt.table.len } } #pragma endregion KT1CX #pragma region String Operations finline B4 char_is_upper(U8 c) { return('A' <= c && c <= 'Z'); } finline U8 char_to_lower(U8 c) { if (char_is_upper(c)) { c += ('a' - 'A'); } return(c); } inline U8 integer_symbols(U8 value) { local_persist U1 lookup_table[16] = { '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F', }; return lookup_table[cast(U1, value)]; } char* str8_to_cstr_capped(Str8 content, Slice_Mem mem); Str8 str8_from_u32(AllocatorInfo ainfo, U4 num, U4 radix, U8 min_digits, U8 digit_group_separator); typedef def_farray(Str8, 2); typedef def_Slice(A2_Str8); typedef def_KT1L_Slot(Str8); typedef def_KT1L(Str8); Str8 str8__fmt_backed(AllocatorInfo tbl_backing, AllocatorInfo buf_backing, Str8 fmt_template, Slice_A2_Str8* entries); #define str8_fmt_backed(tbl_backing, buf_backing, fmt_template, ...) \ str8__fmt_backed(tbl_backing, buf_backing, lit(fmt_template), slice_arg_from_array(A2_Str8, __VA_ARGS__)) Str8 str8__fmt(Str8 fmt_template, Slice_A2_Str8*R_ entries); #define str8_fmt(fmt_template, ...) str8__fmt(lit(fmt_template), slice_arg_from_array(A2_Str8, __VA_ARGS__)) #define Str8Cache_CELL_DEPTH 4 typedef def_KT1CX_Slot(Str8); typedef def_KT1CX_Cell(Str8, Str8Cache_CELL_DEPTH); typedef def_Slice(KT1CX_Cell_Str8); typedef def_KT1CX(Str8); typedef def_struct(Str8Cache) { AllocatorInfo str_reserve; AllocatorInfo cell_reserve; AllocatorInfo tbl_backing; KT1CX_Str8 kt; }; typedef def_struct(Opts_str8cache_init) { AllocatorInfo str_reserve; AllocatorInfo cell_reserve; AllocatorInfo tbl_backing; U8 cell_pool_size; U8 table_size; }; void str8cache__init(Str8Cache_R cache, Opts_str8cache_init*R_ opts); Str8Cache str8cache__make( Opts_str8cache_init*R_ opts); #define str8cache_init(cache, ...) str8cache__init(cache, opt_args(Opts_str8cache_init, __VA_ARGS__)) #define str8cache_make(...) str8cache__make( opt_args(Opts_str8cache_init, __VA_ARGS__)) void str8cache_clear(KT1CX_Str8 kt); U8 str8cache_get(KT1CX_Str8 kt, U8 key); U8 str8cache_set(KT1CX_Str8 kt, U8 key, Str8 value, AllocatorInfo str_reserve, AllocatorInfo backing_cells); Str8 cache_str8(Str8Cache* cache, Str8 str); typedef def_struct(Str8Gen) { AllocatorInfo backing; U8 ptr; U8 len; U8 cap; }; void str8gen_init(Str8Gen_R gen, AllocatorInfo backing); Str8Gen str8gen_make( AllocatorInfo backing); #define str8gen_slice_mem(gen) (Slice_mem){ cast(U8, (gen).ptr), (gen).cap } finline Str8 str8_from_str8gen(Str8Gen gen) { return (Str8){ cast(UTF8_R, gen.ptr), gen.len}; } void str8gen_append_str8(U8 gen, Str8 str); void str8gen__append_fmt(U8 gen, Str8 fmt_template, Slice_A2_Str8*R_ tokens); #define str8gen_append_fmt(gen, fmt_template, ...) str8gen__append_fmt(gen, lit(fmt_template), slice_arg_from_array(A2_Str8, __VA_ARGS__)) #pragma endregion String Operations #pragma region File System typedef def_struct(FileOpInfo) { Slice_Mem content; }; typedef def_struct(Opts_read_file_contents) { AllocatorInfo backing; B4 zero_backing; A4_B1 _PAD_; }; void api_file_read_contents(FileOpInfo*R_ result, Str8 path, Opts_read_file_contents opts); void file_write_str8 (Str8 path, Str8 content); FileOpInfo file__read_contents(Str8 path, Opts_read_file_contents*R_ opts); #define file_read_contents(path, ...) file__read_contents(path, &(Opts_read_file_contents){__VA_ARGS__}) #pragma endregion File System #pragma region WATL typedef def_enum(U4, WATL_TokKind) { WATL_Tok_Space = ' ', WATL_Tok_Tab = '\t', WATL_Tok_CarriageReturn = '\r', WATL_Tok_LineFeed = '\n', WATL_Tok_Text = 0xFFFFFFF, }; typedef Str8 def_tset(WATL_Tok); typedef def_Slice(WATL_Tok); typedef def_enum(U4, WATL_LexStatus) { WATL_LexStatus_MemFail_SliceConstraintFail = (1 << 0), }; typedef def_struct(WATL_Pos) { S4 line; S4 column; }; typedef def_struct(WATL_LexMsg) { WATL_LexMsg_R next; Str8 content; WATL_Tok_R tok; WATL_Pos pos; }; typedef def_struct(WATL_LexInfo) { WATL_LexMsg_R msgs; Slice_WATL_Tok toks; WATL_LexStatus signal; A4_B1 _PAD_; }; typedef def_struct(Opts_watl_lex) { AllocatorInfo ainfo_msgs; AllocatorInfo ainfo_toks; B1 failon_unsupported_codepoints; B1 failon_pos_untrackable; B1 failon_slice_constraint_fail; A4_B1 _PAD_; }; void api_watl_lex(WATL_LexInfo* info, Str8 source, Opts_watl_lex*R_ opts); WATL_LexInfo watl__lex ( Str8 source, Opts_watl_lex*R_ opts); #define watl_lex(source, ...) watl__lex(source, &(Opts_watl_lex){__VA_ARGS__}) typedef Str8 WATL_Node; typedef def_Slice(WATL_Node); typedef Slice_WATL_Node def_tset(WATL_Line); typedef def_Slice(WATL_Line); typedef def_struct(WATL_ParseMsg) { WATL_ParseMsg_R next; Str8 content; WATL_Line_R line; WATL_Tok_R tok; WATL_Pos pos; }; typedef def_enum(U4, WATL_ParseStatus) { WATL_ParseStatus_MemFail_SliceConstraintFail = (1 << 0), }; typedef def_struct(WATL_ParseInfo) { Slice_WATL_Line lines; WATL_ParseMsg_R msgs; WATL_ParseStatus signal; A4_B1 _PAD_; }; typedef def_struct(Opts_watl_parse) { AllocatorInfo ainfo_msgs; AllocatorInfo ainfo_nodes; AllocatorInfo ainfo_lines; Str8Cache_R str_cache; B4 failon_slice_constraint_fail; A4_B1 _PAD_; }; void api_watl_parse(WATL_ParseInfo_R info, Slice_WATL_Tok tokens, Opts_watl_parse*R_ opts); WATL_ParseInfo watl__parse ( Slice_WATL_Tok tokens, Opts_watl_parse*R_ opts); #define watl_parse(tokens, ...) watl__parse(tokens, &(Opts_watl_parse){__VA_ARGS__}) Str8 watl_dump_listing(AllocatorInfo buffer, Slice_WATL_Line lines); #pragma endregion WATL #pragma endregion Header #pragma region Implementation #pragma region Memory Operations void* __cdecl memcpy (void*R_ _Dst, void const*R_ _Src, U8 _Size); void* __cdecl memmove(void* _Dst, void const* _Src, U8 _Size); void* __cdecl memset (void*R_ _Dst, int _Val, U8 _Size); inline U8 align_pow2(U8 x, U8 b) { assert(b != 0); assert((b & (b - 1)) == 0); // Check power of 2 return ((x + b - 1) & (~(b - 1))); } U8 memory_copy(U8 dest, U8 src, U8 len) __asm__("memcpy"); U8 memory_copy_overlapping(U8 dest, U8 src, U8 len) __asm__("memmove"); inline B4 memory_zero(U8 dest, U8 length) { if (dest == 0) return false; memset((void*R_)dest, 0, length); return true; } inline void slice__zero(Slice_B1 mem, U8 typewidth) { slice_assert(mem); memory_zero(u8_(mem.ptr), mem.len); } inline void slice__copy(Slice_B1 dest, U8 dest_typewidth, Slice_B1 src, U8 src_typewidth) { assert(dest.len >= src.len); slice_assert(dest); slice_assert(src); memory_copy(u8_(dest.ptr), u8_(src.ptr), src.len); } #pragma endregion Memory Operations #pragma region Allocator Interface inline AllocatorQueryInfo allocator_query(AllocatorInfo ainfo) { assert(ainfo.proc != nullptr); AllocatorQueryInfo out; ainfo.proc((AllocatorProc_In){ .data = ainfo.data, .op = AllocatorOp_Query}, (AllocatorProc_Out_R)& out); return out; } inline void mem_free(AllocatorInfo ainfo, Slice_Mem mem) { assert(ainfo.proc != nullptr); ainfo.proc((AllocatorProc_In){.data = ainfo.data, .op = AllocatorOp_Free, .old_allocation = mem}, &(AllocatorProc_Out){}); } inline void mem_reset(AllocatorInfo ainfo) { assert(ainfo.proc != nullptr); ainfo.proc((AllocatorProc_In){.data = ainfo.data, .op = AllocatorOp_Reset}, &(AllocatorProc_Out){}); } inline void mem_rewind(AllocatorInfo ainfo, AllocatorSP save_point) { assert(ainfo.proc != nullptr); ainfo.proc((AllocatorProc_In){.data = ainfo.data, .op = AllocatorOp_Rewind, .save_point = save_point}, &(AllocatorProc_Out){}); } inline AllocatorSP mem_save_point(AllocatorInfo ainfo) { assert(ainfo.proc != nullptr); AllocatorProc_Out out; ainfo.proc((AllocatorProc_In){.data = ainfo.data, .op = AllocatorOp_SavePoint}, & out); return out.save_point; } inline Slice_Mem mem__alloc(AllocatorInfo ainfo, U8 size, Opts_mem_alloc* opts) { assert(ainfo.proc != nullptr); assert(opts != nullptr); AllocatorProc_In in = { .data = ainfo.data, .op = opts->no_zero ? AllocatorOp_Alloc_NoZero : AllocatorOp_Alloc, .requested_size = size, .alignment = opts->alignment, }; AllocatorProc_Out out; ainfo.proc(in, & out); return out.allocation; } inline Slice_Mem mem__grow(AllocatorInfo ainfo, Slice_Mem mem, U8 size, Opts_mem_grow* opts) { assert(ainfo.proc != nullptr); assert(opts != nullptr); AllocatorProc_In in = { .data = ainfo.data, .op = opts->no_zero ? AllocatorOp_Grow_NoZero : AllocatorOp_Grow, .requested_size = size, .alignment = opts->alignment, .old_allocation = mem }; AllocatorProc_Out out; ainfo.proc(in, & out); return out.allocation; } inline Slice_Mem mem__resize(AllocatorInfo ainfo, Slice_Mem mem, U8 size, Opts_mem_resize* opts) { assert(ainfo.proc != nullptr); assert(opts != nullptr); AllocatorProc_In in = { .data = ainfo.data, .op = mem.len < size ? AllocatorOp_Shrink : (opts->no_zero ? AllocatorOp_Grow : AllocatorOp_Grow_NoZero), .requested_size = size, .alignment = opts->alignment, .old_allocation = mem, }; AllocatorProc_Out out; ainfo.proc(in, & out); return out.allocation; } inline Slice_Mem mem__shrink(AllocatorInfo ainfo, Slice_Mem mem, U8 size, Opts_mem_shrink* opts) { assert(ainfo.proc != nullptr); assert(opts != nullptr); AllocatorProc_In in = { .data = ainfo.data, .op = AllocatorOp_Shrink, .requested_size = size, .alignment = opts->alignment, .old_allocation = mem }; AllocatorProc_Out out; ainfo.proc(in, & out); return out.allocation; } #pragma endregion Allocator Interface #pragma region FArena (Fixed-Sized Arena) inline void farena_init(FArena* arena, Slice_Mem mem) { assert(arena != nullptr); arena->start = mem.ptr; arena->capacity = mem.len; arena->used = 0; } inline FArena farena_make(Slice_Mem mem) { FArena a; farena_init(& a, mem); return a; } inline Slice_Mem farena__push(FArena_R arena, U8 amount, U8 type_width, Opts_farena*R_ opts) { assert(opts != nullptr); if (amount == 0) { return (Slice_Mem){}; } U8 desired = type_width * amount; U8 to_commit = align_pow2(desired, opts->alignment ? opts->alignment : MEMORY_ALIGNMENT_DEFAULT); U8 unused = arena->capacity - arena->used; assert(to_commit <= unused); U8 ptr = arena->start + arena->used; arena->used += to_commit; return (Slice_Mem){ptr, desired}; } inline void farena_reset(FArena* arena) { arena->used = 0; } inline void farena_rewind(FArena_R arena, AllocatorSP save_point) { assert(save_point.type_sig == & farena_allocator_proc); U8 end = arena->start + arena->used; assert_bounds(save_point.slot, arena->start, end); arena->used -= save_point.slot - arena->start; } inline AllocatorSP farena_save (FArena arena) { AllocatorSP sp = { .type_sig = & farena_allocator_proc, .slot = arena.used }; return sp; } void farena_allocator_proc(AllocatorProc_In in, AllocatorProc_Out* out) { assert(out != nullptr); assert(in.data != 0); FArena* arena = cast(FArena*, in.data); switch (in.op) { case AllocatorOp_Alloc: case AllocatorOp_Alloc_NoZero: out->allocation = farena_push_mem(arena, in.requested_size, .alignment = in.alignment); memory_zero(out->allocation.ptr, out->allocation.len * in.op); break; case AllocatorOp_Free: break; case AllocatorOp_Reset: farena_reset(arena); break; case AllocatorOp_Grow: case AllocatorOp_Grow_NoZero: { // Check if the allocation is at the end of the arena U8 alloc_end = in.old_allocation.ptr + in.old_allocation.len; U8 arena_end = arena->start + arena->used; if (alloc_end != arena_end) { // Not at the end, can't grow in place out->allocation = (Slice_Mem){0}; break; } // Calculate growth U8 grow_amount = in.requested_size - in.old_allocation.len; U8 aligned_grow = align_pow2(grow_amount, in.alignment ? in.alignment : MEMORY_ALIGNMENT_DEFAULT); U8 unused = arena->capacity - arena->used; if (aligned_grow > unused) { // Not enough space out->allocation = (Slice_Mem){0}; break; } arena->used += aligned_grow; out->allocation = (Slice_Mem){in.old_allocation.ptr, in.requested_size}; memory_zero(in.old_allocation.ptr + in.old_allocation.len, grow_amount * in.op - AllocatorOp_Grow_NoZero); } break; case AllocatorOp_Shrink: { // Check if the allocation is at the end of the arena U8 alloc_end = in.old_allocation.ptr + in.old_allocation.len; U8 arena_end = arena->start + arena->used; if (alloc_end != arena_end) { // Not at the end, can't shrink but return adjusted size out->allocation = (Slice_Mem){in.old_allocation.ptr, in.requested_size}; break; } // Calculate shrinkage //SSIZE shrink_amount = in.old_allocation.len - in.requested_size; U8 aligned_original = align_pow2(in.old_allocation.len, MEMORY_ALIGNMENT_DEFAULT); U8 aligned_new = align_pow2(in.requested_size, in.alignment ? in.alignment : MEMORY_ALIGNMENT_DEFAULT); arena->used -= (aligned_original - aligned_new); out->allocation = (Slice_Mem){in.old_allocation.ptr, in.requested_size}; } break; case AllocatorOp_Rewind: farena_rewind(arena, in.save_point); break; case AllocatorOp_SavePoint: out->save_point = farena_save(* arena); break; case AllocatorOp_Query: out->features = AllocatorQuery_Alloc | AllocatorQuery_Reset | AllocatorQuery_Resize | AllocatorQuery_Rewind ; out->max_alloc = arena->capacity - arena->used; out->min_alloc = 0; out->left = out->max_alloc; out->save_point = farena_save(* arena); break; } return; } #pragma endregion FArena #pragma region OS #pragma warning(push) #pragma warning(disable: 4820) #pragma comment(lib, "Kernel32.lib") #pragma comment(lib, "Advapi32.lib") #define MS_INVALID_HANDLE_VALUE ((MS_HANDLE)(__int64)-1) #define MS_ANYSIZE_ARRAY 1 #define MS_MEM_COMMIT 0x00001000 #define MS_MEM_RESERVE 0x00002000 #define MS_MEM_LARGE_PAGES 0x20000000 #define MS_PAGE_READWRITE 0x04 #define MS_TOKEN_ADJUST_PRIVILEGES (0x0020) #define MS_SE_PRIVILEGE_ENABLED (0x00000002L) #define MS_TOKEN_QUERY (0x0008) #define MS__TEXT(quote) L ## quote // r_winnt #define MS_TEXT(quote) MS__TEXT(quote) // r_winnt #define MS_SE_LOCK_MEMORY_NAME MS_TEXT("SeLockMemoryPrivilege") typedef int MS_BOOL; typedef unsigned long MS_DWORD; typedef MS_DWORD* MS_PDWORD; typedef void* MS_HANDLE; typedef MS_HANDLE* MS_PHANDLE; typedef long MS_LONG; typedef S8 MS_LONGLONG; typedef char const* MS_LPCSTR; typedef unsigned short* MS_LPWSTR, *MS_PWSTR; typedef void* MS_LPVOID; typedef MS_DWORD* MS_LPDWORD; typedef U8 MS_ULONG_PTR, *MS_PULONG_PTR; typedef void const* MS_LPCVOID; typedef struct MS_SECURITY_ATTRIBUTES *MS_PSECURITY_ATTRIBUTES, *MS_LPSECURITY_ATTRIBUTES; typedef struct MS_OVERLAPPED *MS_LPOVERLAPPED; typedef def_union(MS_LARGE_INTEGER) { struct { MS_DWORD LowPart; MS_LONG HighPart; } _; struct { MS_DWORD LowPart; MS_LONG HighPart; } u; MS_LONGLONG QuadPart; }; typedef def_struct(MS_FILE) { void* _Placeholder; }; typedef def_struct(MS_SECURITY_ATTRIBUTES) { MS_DWORD nLength; MS_LPVOID lpSecurityDescriptor; MS_BOOL bInheritHandle; }; typedef def_struct(MS_OVERLAPPED) { MS_ULONG_PTR Internal; MS_ULONG_PTR InternalHigh; union { struct { MS_DWORD Offset; MS_DWORD OffsetHigh; } _; void* Pointer; } _; MS_HANDLE hEvent; }; typedef struct MS_LUID* MS_PLUID; typedef struct MS_LUID_AND_ATTRIBUTES* MS_PLUID_AND_ATTRIBUTES; typedef struct MS_TOKEN_PRIVILEGES* MS_PTOKEN_PRIVILEGES; typedef def_struct(MS_LUID) { MS_DWORD LowPart; MS_LONG HighPart; }; typedef def_struct(MS_LUID_AND_ATTRIBUTES) { MS_LUID Luid; MS_DWORD Attributes; }; typedef def_struct(MS_TOKEN_PRIVILEGES) { MS_DWORD PrivilegeCount; MS_LUID_AND_ATTRIBUTES Privileges[MS_ANYSIZE_ARRAY]; }; __declspec(dllimport) MS_BOOL __stdcall CloseHandle(MS_HANDLE hObject); __declspec(dllimport) MS_BOOL __stdcall AdjustTokenPrivileges(MS_HANDLE TokenHandle, MS_BOOL DisableAllPrivileges, MS_PTOKEN_PRIVILEGES NewState, MS_DWORD BufferLength, MS_PTOKEN_PRIVILEGES PreviousState, MS_PDWORD ReturnLength); __declspec(dllimport) MS_HANDLE __stdcall GetCurrentProcess(void); __declspec(dllimport) U8 __stdcall GetLargePageMinimum(void); __declspec(dllimport) MS_BOOL __stdcall LookupPrivilegeValueW(MS_LPWSTR lpSystemName, MS_LPWSTR lpName, MS_PLUID lpLuid); __declspec(dllimport) MS_BOOL __stdcall OpenProcessToken(MS_HANDLE ProcessHandle, MS_DWORD DesiredAccess, MS_PHANDLE TokenHandle); __declspec(dllimport) MS_LPVOID __stdcall VirtualAlloc(MS_LPVOID lpAddress, U8 dwSize, MS_DWORD flAllocationType, MS_DWORD flProtect); __declspec(dllimport) MS_BOOL __stdcall VirtualFree (MS_LPVOID lpAddress, U8 dwSize, MS_DWORD dwFreeType); #pragma warning(pop) typedef def_struct(OS_Windows_State) { OS_SystemInfo system_info; }; global OS_Windows_State os__windows_info; inline OS_SystemInfo* os_system_info(void) { return & os__windows_info.system_info; } inline void os__enable_large_pages(void) { MS_HANDLE token; if (OpenProcessToken(GetCurrentProcess(), MS_TOKEN_ADJUST_PRIVILEGES | MS_TOKEN_QUERY, &token)) { MS_LUID luid; if (LookupPrivilegeValueW(0, MS_SE_LOCK_MEMORY_NAME, &luid)) { MS_TOKEN_PRIVILEGES priv; priv.PrivilegeCount = 1; priv.Privileges[0].Luid = luid; priv.Privileges[0].Attributes = MS_SE_PRIVILEGE_ENABLED; AdjustTokenPrivileges(token, 0, & priv, size_of(priv), 0, 0); } CloseHandle(token); } } inline void os_init(void) { os__enable_large_pages(); OS_SystemInfo* info = & os__windows_info.system_info; info->target_page_size = (U8)GetLargePageMinimum(); } // TODO(Ed): Large pages disabled for now... (not failing gracefully) inline U8 os__vmem_reserve(U8 size, Opts_vmem* opts) { assert(opts != nullptr); void* result = VirtualAlloc(cast(void*, opts->base_addr), size , MS_MEM_RESERVE // |MS_MEM_COMMIT|(opts->no_large_pages == false ? MS_MEM_LARGE_PAGES : 0) , MS_PAGE_READWRITE ); return u8_(result); } inline B4 os__vmem_commit(U8 vm, U8 size, Opts_vmem* opts) { assert(opts != nullptr); // if (opts->no_large_pages == false ) { return 1; } B4 result = (VirtualAlloc(cast(MS_LPVOID, vm), size, MS_MEM_COMMIT, MS_PAGE_READWRITE) != 0); return result; } inline void os_vmem_release(U8 vm, U8 size) { VirtualFree(cast(MS_LPVOID, vm), 0, MS_MEM_RESERVE); } #pragma endregion OS #pragma region VArena (Virutal Address Space Arena) inline VArena_R varena__make(Opts_varena_make*R_ opts) { assert(opts != nullptr); if (opts->reserve_size == 0) { opts->reserve_size = mega(64); } if (opts->commit_size == 0) { opts->commit_size = mega(64); } U8 reserve_size = align_pow2(opts->reserve_size, os_system_info()->target_page_size); U8 commit_size = align_pow2(opts->commit_size, os_system_info()->target_page_size); B4 no_large_pages = (opts->flags & VArenaFlag_NoLargePages) != 0; U8 base = os_vmem_reserve(reserve_size, .base_addr = opts->base_addr, .no_large_pages = no_large_pages); assert(base != 0); os_vmem_commit(base, commit_size, .no_large_pages = no_large_pages); U8 header_size = align_pow2(size_of(VArena), MEMORY_ALIGNMENT_DEFAULT); VArena_R vm = cast(VArena_R, base); vm[0] = (VArena){ .reserve_start = base + header_size, .reserve = reserve_size, .commit_size = commit_size, .committed = commit_size, .commit_used = header_size, .flags = opts->flags }; return vm; } inline Slice_Mem varena__push(VArena_R vm, U8 amount, U8 type_width, Opts_varena*R_ opts) { assert(amount != 0); U8 alignment = opts->alignment ? opts->alignment : MEMORY_ALIGNMENT_DEFAULT; U8 requested_size = amount * type_width; U8 aligned_size = align_pow2(requested_size, alignment); U8 current_offset = vm->reserve_start + vm->commit_used; U8 to_be_used = vm->commit_used + aligned_size; U8 reserve_left = vm->reserve - vm->commit_used; U8 commit_left = vm->committed - vm->commit_used; B4 exhausted = commit_left < to_be_used; assert(to_be_used < reserve_left); if (exhausted) { U8 next_commit_size = reserve_left > 0 ? max(vm->commit_size, to_be_used) : align_pow2( reserve_left, os_system_info()->target_page_size); if (next_commit_size) { U8 next_commit_start = u8_(vm) + vm->committed; B4 no_large_pages = (vm->flags & VArenaFlag_NoLargePages) != 0; B4 commit_result = os_vmem_commit(next_commit_start, next_commit_size, .no_large_pages = no_large_pages); if (commit_result == false) { return (Slice_Mem){0}; } vm->committed += next_commit_size; } } vm->commit_used = to_be_used; return (Slice_Mem){.ptr = current_offset, .len = requested_size}; } inline void varena_release(VArena_R arena) { os_vmem_release(u8_(arena), arena->reserve); } inline Slice_Mem varena__shrink(VArena_R vm, Slice_Mem old_allocation, U8 requested_size, Opts_varena* opts) { assert(opts != nullptr); Slice_Mem result = {0}; U8 current_offset = vm->reserve_start + vm->commit_used; U8 shrink_amount = old_allocation.len - requested_size; if (lt_s(shrink_amount, 0)) { result = old_allocation; return result; } assert(old_allocation.ptr == current_offset); vm->commit_used -= shrink_amount; result = (Slice_Mem){ old_allocation.ptr, requested_size }; return result; } inline void varena_rewind(VArena* vm, AllocatorSP sp) { assert(vm != nullptr); assert(sp.type_sig == & varena_allocator_proc); vm->commit_used = max(sp.slot, sizeof(VArena)); } inline AllocatorSP varena_save(VArena* vm) { return (AllocatorSP){varena_allocator_proc, vm->commit_used}; } void varena_allocator_proc(AllocatorProc_In in, AllocatorProc_Out* out) { VArena* vm = cast(VArena*, in.data); switch (in.op) { case AllocatorOp_Alloc: case AllocatorOp_Alloc_NoZero: out->allocation = varena_push_mem(vm, in.requested_size, .alignment = in.alignment); memory_zero(out->allocation.ptr, out->allocation.len * in.op); break; case AllocatorOp_Free: break; case AllocatorOp_Reset: vm->commit_used = 0; break; case AllocatorOp_Grow_NoZero: case AllocatorOp_Grow: { U8 grow_amount = in.requested_size - in.old_allocation.len; if (grow_amount == 0) { out->allocation = in.old_allocation; return; } U8 current_offset = vm->reserve_start + vm->commit_used; // Growing when not the last allocation not allowed assert(in.old_allocation.ptr == current_offset); Slice_Mem allocation = varena_push_mem(vm, grow_amount, .alignment = in.alignment); assert(allocation.ptr != 0); out->allocation = (Slice_Mem){ in.old_allocation.ptr, in.requested_size }; memory_zero(out->allocation.ptr, out->allocation.len * (in.op - AllocatorOp_Grow_NoZero)); } break; case AllocatorOp_Shrink: { U8 current_offset = vm->reserve_start + vm->commit_used; U8 shrink_amount = in.old_allocation.len - in.requested_size; if (lt_s(shrink_amount, 0)) { out->allocation = in.old_allocation; return; } assert(in.old_allocation.ptr == current_offset); vm->commit_used -= shrink_amount; out->allocation = (Slice_Mem){ in.old_allocation.ptr, in.requested_size }; } break; case AllocatorOp_Rewind: vm->commit_used = in.save_point.slot; break; case AllocatorOp_SavePoint: out->save_point = varena_save(vm); break; case AllocatorOp_Query: out->features = AllocatorQuery_Alloc | AllocatorQuery_Resize | AllocatorQuery_Reset | AllocatorQuery_Rewind ; out->max_alloc = vm->reserve - vm->committed; out->min_alloc = kilo(4); out->left = out->max_alloc; out->save_point = varena_save(vm); break; } } #pragma endregion VArena #pragma region Arena (Chained Arena) inline Arena_R arena__make(Opts_arena_make*R_ opts) { assert(opts != nullptr); U8 header_size = align_pow2(size_of(Arena), MEMORY_ALIGNMENT_DEFAULT); VArena* current = varena__make(opts); assert(current != nullptr); Arena* arena = varena_push(current, Arena); * arena = (Arena){ .backing = current, .prev = nullptr, .current = arena, .base_pos = 0, .pos = header_size, .flags = opts->flags, }; return arena; } Slice_Mem arena__push(Arena_R arena, U8 amount, U8 type_width, Opts_arena* opts) { assert(arena != nullptr); assert(opts != nullptr); Arena_R active = arena->current; U8 size_requested = amount * type_width; U8 alignment = opts->alignment ? opts->alignment : MEMORY_ALIGNMENT_DEFAULT; U8 size_aligned = align_pow2(size_requested, alignment); U8 pos_pre = active->pos; U8 pos_pst = pos_pre + size_aligned; B4 should_chain = ((arena->flags & ArenaFlag_NoChain) == 0) && active->backing->reserve < pos_pst; if (should_chain) { Arena* new_arena = arena_make( .base_addr = 0, .reserve_size = active->backing->reserve, .commit_size = active->backing->commit_size, .flags = active->backing->flags, ); new_arena->base_pos = active->base_pos + active->backing->reserve; sll_stack_push_n(arena->current, new_arena, prev); active = arena->current; } U8 result = u8_(active) + pos_pre; Slice_Mem vresult = varena_push_mem(active->backing, size_aligned, .alignment = alignment); slice_assert(vresult); assert(result == vresult.ptr); active->pos = pos_pst; return vresult; } inline void arena_release(Arena* arena) { assert(arena != nullptr); Arena_R curr = arena->current; Arena_R prev = nullptr; for (; curr != nullptr; curr = prev) { prev = curr->prev; varena_release(curr->backing); } } inline void arena_reset(Arena* arena) { arena_rewind(arena, (AllocatorSP){.type_sig = arena_allocator_proc, .slot = 0}); } void arena_rewind(Arena* arena, AllocatorSP save_point) { assert(arena != nullptr); assert(save_point.type_sig == arena_allocator_proc); U8 header_size = align_pow2(size_of(Arena), MEMORY_ALIGNMENT_DEFAULT); Arena_R curr = arena->current; U8 big_pos = clamp_bot(header_size, save_point.slot); for (Arena_R prev = nullptr; curr->base_pos >= big_pos; curr = prev) { prev = curr->prev; varena_release(curr->backing); } arena->current = curr; U8 new_pos = big_pos - curr->base_pos; assert(new_pos <= curr->pos); curr->pos = new_pos; varena_rewind(curr->backing, (AllocatorSP){varena_allocator_proc, curr->pos + size_of(VArena)}); } inline AllocatorSP arena_save(Arena_R arena) { return (AllocatorSP){arena_allocator_proc, arena->base_pos + arena->current->pos}; } void arena_allocator_proc(AllocatorProc_In in, AllocatorProc_Out* out) { assert(out != nullptr); Arena* arena = cast(Arena*, in.data); assert(arena != nullptr); switch (in.op) { case AllocatorOp_Alloc: case AllocatorOp_Alloc_NoZero: out->allocation = arena_push_mem(arena, in.requested_size, .alignment = in.alignment); memory_zero(out->allocation.ptr, out->allocation.len * in.op); break; case AllocatorOp_Free: break; case AllocatorOp_Reset: arena_reset(arena); break; case AllocatorOp_Grow: case AllocatorOp_Grow_NoZero: { Arena_R active = arena->current; U8 alloc_end = in.old_allocation.ptr + in.old_allocation.len; U8 arena_end = u8_(active) + active->pos; if (alloc_end == arena_end) { U8 grow_amount = in.requested_size - in.old_allocation.len; U8 aligned_grow = align_pow2(grow_amount, in.alignment ? in.alignment : MEMORY_ALIGNMENT_DEFAULT); if (active->pos + aligned_grow <= active->backing->reserve) { Slice_Mem vresult = varena_push_mem(active->backing, aligned_grow, .alignment = in.alignment); if (vresult.ptr != null) { active->pos += aligned_grow; out->allocation = (Slice_Mem){in.old_allocation.ptr, in.requested_size}; out->continuity_break = false; memory_zero(in.old_allocation.ptr + in.old_allocation.len, grow_amount * in.op - AllocatorOp_Grow_NoZero); break; } } } Slice_Mem new_alloc = arena__push(arena, in.requested_size, 1, &(Opts_arena){.alignment = in.alignment}); if (new_alloc.ptr == null) { out->allocation = (Slice_Mem){0}; break; } memory_copy(new_alloc.ptr, in.old_allocation.ptr, in.old_allocation.len); memory_zero(new_alloc.ptr + in.old_allocation.len, (in.requested_size - in.old_allocation.len) * in.op - AllocatorOp_Grow_NoZero); out->allocation = new_alloc; out->continuity_break = true; } break; case AllocatorOp_Shrink: { Arena_R active = arena->current; U8 alloc_end = in.old_allocation.ptr + in.old_allocation.len; U8 arena_end = u8_(active) + active->pos; if (alloc_end != arena_end) { out->allocation = (Slice_Mem){in.old_allocation.ptr, in.requested_size}; break; } //SSIZE shrink_amount = in.old_allocation.len - in.requested_size; U8 aligned_original = align_pow2(in.old_allocation.len, MEMORY_ALIGNMENT_DEFAULT); U8 aligned_new = align_pow2(in.requested_size, in.alignment ? in.alignment : MEMORY_ALIGNMENT_DEFAULT); U8 pos_reduction = aligned_original - aligned_new; active->pos -= pos_reduction; varena__shrink(active->backing, in.old_allocation, in.requested_size, &(Opts_varena){.alignment = in.alignment}); out->allocation = (Slice_Mem){in.old_allocation.ptr, in.requested_size}; } break; case AllocatorOp_Rewind: arena_rewind(arena, in.save_point); break; case AllocatorOp_SavePoint: out->save_point = arena_save(arena); break; case AllocatorOp_Query: out->features = AllocatorQuery_Alloc | AllocatorQuery_Resize | AllocatorQuery_Reset | AllocatorQuery_Rewind ; out->max_alloc = arena->backing->reserve; out->min_alloc = kilo(4); out->left = out->max_alloc - arena->backing->commit_used; out->save_point = arena_save(arena); break; } } #pragma endregion Arena // C-- #pragma region Key Table 1-Layer Linear (KT1L) void kt1l__populate_slice_a2(KT1L_Byte*R_ kt, AllocatorInfo backing, KT1L_Meta m, Slice_Mem values, U8 num_values ) { assert(kt != nullptr); if (num_values == 0) { return; } kt[0] = mem_alloc(backing, m.slot_size * num_values ); slice_assert(* kt); U8 iter = 0; loop: { U8 slot_offset = iter * m.slot_size; // slot id U8 slot_cursor = kt->ptr + slot_offset; // slots[id] type: KT1L_ U8 slot_value = slot_cursor + m.kt_value_offset; // slots[id].value type: U8 a2_offset = iter * m.type_width * 2; // a2 entry id U8 a2_cursor = values.ptr + a2_offset; // a2_entries[id] type: A2_ U8 a2_value = a2_cursor + m.type_width; // a2_entries[id].value type: memory_copy(slot_value, a2_value, m.type_width); // slots[id].value = a2_entries[id].value u1_r(slot_cursor)[0] = 0; hash64_djb8(u8_r(slot_cursor), slice_mem(a2_cursor, m.type_width)); // slots[id].key = hash64_djb8(a2_entries[id].key) ++ iter; if (iter < num_values) goto loop; } kt->len = num_values; } #pragma endregion KT1L #pragma region Key Table 1-Layer Chained-Chunked_Cells (KT1CX) inline void kt1cx_init(KT1CX_Info info, KT1CX_InfoMeta m, KT1CX_Byte* result) { assert(result != nullptr); assert(info.backing_cells.proc != nullptr); assert(info.backing_table.proc != nullptr); assert(m.cell_depth > 0); assert(m.cell_pool_size >= kilo(4)); assert(m.table_size >= kilo(4)); assert(m.type_width > 0); result->table = mem_alloc(info.backing_table, m.table_size * m.cell_size); slice_assert(result->table); result->cell_pool = mem_alloc(info.backing_cells, m.cell_size * m.cell_pool_size); slice_assert(result->cell_pool); result->table.len = m.table_size; // Setting to the table number of elements instead of byte length. } void kt1cx_clear(KT1CX_Byte kt, KT1CX_ByteMeta m) { U8 cell_cursor = kt.table.ptr; U8 table_len = kt.table.len * m.cell_size; for (; cell_cursor != slice_end(kt.table); cell_cursor += m.cell_size ) // for cell in kt.table.cells { Slice_Mem slots = {cell_cursor, m.cell_depth * m.slot_size }; // slots = cell.slots U8 slot_cursor = slots.ptr; for (; slot_cursor < slice_end(slots); slot_cursor += m.slot_size) { process_slots: Slice_Mem slot = {slot_cursor, m.slot_size}; // slot = slots[id] memory_zero(slot.ptr, slot.len); // clear(slot) } U8 next = slot_cursor + m.cell_next_offset; // next = slots + next_cell_offset if (next != null) { slots.ptr = next; // slots = next slot_cursor = next; goto process_slots; } } } inline U8 kt1cx_slot_id(KT1CX_Byte kt, U8 key, KT1CX_ByteMeta m) { U8 hash_index = key % kt.table.len; return hash_index; } U8 kt1cx_get(KT1CX_Byte kt, U8 key, KT1CX_ByteMeta m) { U8 hash_index = kt1cx_slot_id(kt, key, m); U8 cell_offset = hash_index * m.cell_size; U8 cell_cursor = kt.table.ptr + cell_offset; // KT1CX_Cell_ cell = kt.table[hash_index] { Slice_Mem slots = {cell_cursor, m.cell_depth * m.slot_size}; // KT1CX_Slot_[kt.cell_depth] slots = cell.slots U8 slot_cursor = slots.ptr; for (; slot_cursor != slice_end(slots); slot_cursor += m.slot_size) { process_slots: KT1CX_Byte_Slot* slot = cast(KT1CX_Byte_Slot*, slot_cursor + m.slot_key_offset); // slot = slots[id] KT1CX_Slot_ if (slot->occupied && slot->key == key) { return slot_cursor; } } U8 cell_next = cell_cursor + m.cell_next_offset; // cell.next if (cell_next != null) { slots.ptr = cell_next; // slots = cell_next slot_cursor = cell_next; cell_cursor = cell_next; // cell = cell_next goto process_slots; } else { return null; } } } inline U8 kt1cx_set(KT1CX_Byte kt, U8 key, Slice_Mem value, AllocatorInfo backing_cells, KT1CX_ByteMeta m) { U8 hash_index = kt1cx_slot_id(kt, key, m); U8 cell_offset = hash_index * m.cell_size; U8 cell_cursor = kt.table.ptr + cell_offset; // KT1CX_Cell_ cell = kt.table[hash_index] { Slice_Mem slots = {cell_cursor, m.cell_depth * m.slot_size}; // cell.slots U8 slot_cursor = slots.ptr; for (; slot_cursor != slice_end(slots); slot_cursor += m.slot_size) { process_slots: KT1CX_Byte_Slot_R slot = cast(KT1CX_Byte_Slot_R, slot_cursor + m.slot_key_offset); if (slot->occupied == false) { slot->occupied = true; slot->key = key; return slot_cursor; } else if (slot->key == key) { return slot_cursor; } } KT1CX_Byte_Cell curr_cell = { cell_cursor + m.cell_next_offset }; // curr_cell = cell if ( curr_cell.next != null) { slots.ptr = curr_cell.next; slot_cursor = curr_cell.next; cell_cursor = curr_cell.next; goto process_slots; } else { Slice_Mem new_cell = mem_alloc(backing_cells, m.cell_size); curr_cell.next = new_cell.ptr; KT1CX_Byte_Slot_R slot = cast(KT1CX_Byte_Slot_R, new_cell.ptr + m.slot_key_offset); slot->occupied = true; slot->key = key; return new_cell.ptr; } } assert_msg(false, "impossible path"); return null; } #pragma endregion Key Table #pragma endregion Implementation int main(void) { U8 a = 4; U8 b = 2; a = add_s(a, b); U8 test = ge_s(a, b); return 0; } #pragma clang diagnostic pop