hmm

2026-02-19 16:49:25 -05:00
parent d147dccbb4
commit 9951907f10
4 changed files with 816 additions and 1 deletions
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -18,7 +18,9 @@ There are transcripts for each of these vide2s in the [references](./references/

 ## Goal

-Learn ColorForth and be able to build a ColorForth derivative from scratch similar to Timothy Lottes and Onatt. First I want to make sure the agent has a full grasp of the background content provided. I can provide more if necessary.
+Learn ColorForth and be able to build a ColorForth derivative from scratch similar to Timothy Lottes and Onatt. 
+
+**Critical Clarification:** The goal is *not* for the AI to auto-generate a novelty solution or dump a finished codebase. The objective is for me (the user) to *learn* how to build this architecture from scratch. The AI must act as a highly contextualized mentor, providing guided nudges, architectural validation, and specific tactical assistance when requested. We are at the cusp of implementation. The AI should lean on the extensive curation in `./references/` to ensure its advice remains strictly aligned with the Lottes/Onat "sourceless, zero-overhead, register-only" paradigm, minimizing generic LLM hallucinations.

 ## Some exceprts from discussing things with Grok 4.20 beta

--- a/attempt_1/duffle.amd64.win32.h
+++ b/attempt_1/duffle.amd64.win32.h
@@ -0,0 +1,568 @@
+/*
+C DSL Duffle
+ISA:      amd64
+Sandbox:  Windows 11
+Compiler: clang
+Standard: c23
+*/
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wswitch"
+#pragma clang diagnostic ignored "-Wuninitialized"
+#pragma comment(lib, "Advapi32.lib")
+#pragma comment(lib, "gdi32.lib")
+#pragma comment(lib, "Kernel32.lib")
+#pragma comment(lib, "msvcrt.lib")
+#pragma comment(lib, "user32.lib")
+#pragma comment(lib, "ucrt.lib")
+#pragma comment(lib, "vcruntime.lib")
+#define WinAPI __attribute((__stdcall__)) __attribute__((__force_align_arg_pointer__)) // Win32 Syscall FFI
+
+#pragma region DSL
+#define m_expand(...)      __VA_ARGS__
+#define glue_impl(A, B)    A ## B
+#define glue(A, B)         glue_impl(A, B)
+#define tmpl(prefix, type) prefix ## _ ## type
+
+#define VA_Sel_1( _1, ... ) _1 // <-- Of all th args passed pick _1.
+#define VA_Sel_2( _1, _2, ... ) _2 // <-- Of all the args passed pick _2.
+#define VA_Sel_3( _1, _2, _3, ... ) _3 // etc..
+
+#define global static // Mark global data
+#define gknown        // Mark global data used in procedure
+
+#define LT_      thread_local
+#define LP_      static // static data within procedure scope
+#define internal static // internal
+
+#define asm           __asm__
+#define align(value)  __attribute__(aligned (value))          // for easy alignment
+#define C_(type,data) ((type)(data))                          // for enforced precedence
+#define expect(x,y)   __builtin_expect(x, y)                  // so compiler knows the common path
+#define I_            internal inline
+#define IA_           I_       __attribute__((always_inline)) // inline always
+#define N_            internal __attribute__((noinline))      // inline never
+#define RO_           __attribute__((section(".rodata")))     // Read only data allocation
+#define r             restrict                                // pointers are either restricted or volatile and nothing else 
+#define v             volatile                                // pointers are either restricted or volatile and nothing else
+#define T_            typeof
+#define T_same(a,b)   _Generic((a), typeof((b)): 1, default: 0)
+
+#define r_(ptr)        C_(T_(ptr[0])*r, ptr)
+#define v_(ptr)        C_(T_(ptr[0])*v, ptr)
+#define tr_(type, ptr) C_(type*r, ptr)
+#define tv_(type, ptr) C_(type*v, ptr)
+
+#define array_len(a)                  (U8)(sizeof(a) / sizeof(typeof((a)[0])))
+#define array_decl(type, ...)         (type[]){__VA_ARGS__}
+#define Array_sym(type,len)           type ## _ ## A ## len
+#define Array_expand(type,len)        type Array_sym(type, len)[len];
+#define Array_(type,len)              Array_expand(type,len)
+#define Bit_(id,b)                    id = (1 << b), tmpl(id,pos) = b
+#define Enum_(underlying_type,symbol) enum   symbol: underlying_type symbol; enum symbol: underlying_type
+#define Struct_(symbol)               struct symbol   symbol;                struct symbol
+#define Union_(symbol)                union  symbol   symbol;                union  symbol
+
+#define Opt_(proc)                    Struct_(tmpl(Opt,proc))
+#define opt_(symbol, ...)             (tmpl(Opt,symbol)){__VA_ARGS__}
+#define Ret_(proc)                    Struct_(tmpl(Ret,proc))
+#define ret_(proc)                    tmpl(Ret,proc) proc
+
+// Generally unused, allows force inlining of procedures at the discretion of the call-site.
+#if 0
+#define IC_(name) inline_ ## name
+#define I_proc(name, params, args) \
+  IA_ void IC_(name) params; \
+  I_  void     name  params { IC_(name)args; } \
+  IA_ void IC_(name) params
+#define I_proc_r(type_ret, name, params, args) \
+  IA_ type_ret IC_(name) params; \
+  I_  type_ret     name  params { return IC_(name)args; }
+#endif
+
+// Using Byte-Width convention for the fundamental types.
+typedef __UINT8_TYPE__  U1; typedef __UINT16_TYPE__ U2; typedef __UINT32_TYPE__ U4; typedef __UINT64_TYPE__ U8; 
+typedef __INT8_TYPE__   S1; typedef __INT16_TYPE__  S2; typedef __INT32_TYPE__  S4; typedef __INT64_TYPE__  S8;
+typedef unsigned char   B1; typedef __UINT16_TYPE__ B2; typedef __UINT32_TYPE__ B4; typedef __UINT64_TYPE__ B8;
+typedef float           F4; typedef double          F8; 
+typedef float F4_2 __attribute__((vector_size(16)));
+
+#define u1_(value)  C_(U1, value)
+#define u2_(value)  C_(U2, value)
+#define u4_(value)  C_(U4, value)
+#define u8_(value)  C_(U8, value)
+#define s1_(value)  C_(S1, value)
+#define s2_(value)  C_(S2, value)
+#define s4_(value)  C_(S4, value)
+#define s8_(value)  C_(S8, value)
+#define f4_(value)  C_(F4, value)
+#define f8_(value)  C_(F8, value)
+
+#define u1_r(value) C_(U1*r, value)
+#define u2_r(value) C_(U2*r, value)
+#define u4_r(value) C_(U4*r, value)
+#define u8_r(value) C_(U8*r, value)
+#define u1_v(value) C_(U1*v, value)
+#define u2_v(value) C_(U2*v, value)
+#define u4_v(value) C_(U4*v, value)
+#define u8_v(value) C_(U8*v, value)
+
+#define kilo(n)         (C_(U8, n) << 10)
+#define mega(n)         (C_(U8, n) << 20)
+#define giga(n)         (C_(U8, n) << 30)
+#define tera(n)         (C_(U8, n) << 40)
+#define null             C_(U8,    0)
+#define nullptr          C_(void*, 0)
+#define O_(type,member)  C_(U8,__builtin_offsetof(type,member))
+#define S_(data)         C_(U8, sizeof(data))
+
+#define sop_1(op,a,b) C_(U1, s1_(a) op s1_(b))
+#define sop_2(op,a,b) C_(U2, s2_(a) op s2_(b))
+#define sop_4(op,a,b) C_(U4, s4_(a) op s4_(b))
+#define sop_8(op,a,b) C_(U8, s8_(a) op s8_(b))
+
+#undef def_signed_op
+#define def_signed_op(id,op,width) IA_ U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); }
+#define def_signed_ops(id,op)      def_signed_op(id, op, 1) def_signed_op(id, op, 2) def_signed_op(id, op, 4) def_signed_op(id, op, 8)
+def_signed_ops(add, +)
+def_signed_ops(sub, -)
+def_signed_ops(mut, *)
+def_signed_ops(div, /)
+def_signed_ops(gt,  >)
+def_signed_ops(lt,  <) 
+def_signed_ops(ge, >=)
+def_signed_ops(le, <=)
+#undef def_signed_ops
+#undef def_signed_op
+
+#define def_generic_sop(op, a, ...) _Generic((a), U1:  op ## _s1, U2: op ## _s2, U4: op ## _s4, U8: op ## _s8) (a, __VA_ARGS__)
+#define add_s(a,b) def_generic_sop(add,a,b)
+#define sub_s(a,b) def_generic_sop(sub,a,b)
+#define mut_s(a,b) def_generic_sop(mut,a,b)
+#define gt_s(a,b)  def_generic_sop(gt, a,b)
+#define lt_s(a,b)  def_generic_sop(lt, a,b)
+#define ge_s(a,b)  def_generic_sop(ge, a,b)
+#define le_s(a,b)  def_generic_sop(le, a,b)
+#undef def_generic_sop
+#pragma endregion DSL
+
+#pragma region Thread Coherence
+IA_ void barrier_compiler(void){asm volatile("::""memory");} // Compiler Barrier
+IA_ void barrier_memory  (void){__builtin_ia32_mfence();}    // Memory   Barrier
+IA_ void barrier_read    (void){__builtin_ia32_lfence();}    // Read     Barrier
+IA_ void barrier_write   (void){__builtin_ia32_sfence();}    // Write    Barrier
+
+IA_ U4 atm_add_u4 (U4*r addr, U4 value){asm volatile("lock xaddl %0,%1":"=r"(value),"=m"(addr[0]):"0"(value),"m"(addr[0]):"memory","cc");return value;}
+IA_ U8 atm_add_u8 (U8*r addr, U8 value){asm volatile("lock xaddq %0,%1":"=r"(value),"=m"(addr[0]):"0"(value),"m"(addr[0]):"memory","cc");return value;}
+IA_ U4 atm_swap_u4(U4*r addr, U4 value){asm volatile("lock xchgl %0,%1":"=r"(value),"=m"(addr[0]):"0"(value),"m"(addr[0]):"memory","cc");return value;}
+IA_ U8 atm_swap_u8(U8*r addr, U8 value){asm volatile("lock xchgq %0,%1":"=r"(value),"=m"(addr[0]):"0"(value),"m"(addr[0]):"memory","cc");return value;}
+#pragma endregion Thread Coherence
+
+#pragma region Debug
+WinAPI void process_exit(U4 status) asm("exit");
+#define debug_trap() __builtin_debugtrap()
+#if BUILD_DEBUG
+IA_ void assert(U8 cond) { if(cond){return;} else{debug_trap(); process_exit(1);} }
+#else
+#define assert(cond)
+#endif
+#pragma endregion Debug
+
+#pragma region Memory
+#define MEM_ALIGNMENT_DEFAULT  (2 * S_(void*))
+
+#define assert_bounds(point, start, end) for(;0;){ \
+	assert((start) <= (point)); \
+	assert((point) <= (end));   \
+} while(0)
+
+IA_ U8 align_pow2(U8 x, U8 b) {
+	assert(b != 0);
+	assert((b & (b - 1)) == 0);  // Check power of 2
+	return ((x + b - 1) & (~(b - 1)));
+}
+
+IA_ U8 mem_copy            (U8 dest, U8 src,   U8 len) { return (U8)(__builtin_memcpy ((void*)dest, (void const*)src,   len)); }
+IA_ U8 mem_copy_overlapping(U8 dest, U8 src,   U8 len) { return (U8)(__builtin_memmove((void*)dest, (void const*)src,   len)); }
+IA_ U8 mem_fill            (U8 dest, U8 value, U8 len) { return (U8)(__builtin_memset ((void*)dest, (int)        value, len)); }
+IA_ B4 mem_zero            (U8 dest,           U8 len) { if(dest == 0){return false;} mem_fill(dest, 0, len); return true; }
+
+typedef Struct_(Slice) { U8 ptr, len; }; // Untyped Slice
+IA_ Slice slice_ut_(U8 ptr, U8 len) { return (Slice){ptr, len}; }
+
+#define Slice_(type)        Struct_(tmpl(Slice,type)) { type* ptr; U8 len; }
+typedef Slice_(B1);
+#define slice_assert(s)    do { assert((s).ptr != 0); assert((s).len > 0); } while(0)
+#define slice_end(slice)   ((slice).ptr + (slice).len)
+#define S_slice(s)         ((s).len * S_((s).ptr[0]))
+
+#define slice_ut(ptr,len)  slice_ut_(u8_(ptr), u8_(len))
+#define slice_ut_arr(a)    slice_ut_(u8_(a),   S_(a))
+#define slice_to_ut(s)     slice_ut_(u8_((s).ptr), S_slice(s))
+
+#define slice_iter(container, iter)     (T_((container).ptr) iter = (container).ptr; iter != slice_end(container); ++ iter)
+#define slice_arg_from_array(type, ...) & (tmpl(Slice,type)) { .ptr = array_decl(type,__VA_ARGS__), .len = array_len( array_decl(type,__VA_ARGS__)) }
+
+IA_ void slice_zero_(Slice s) { slice_assert(s); mem_zero(s.ptr, s.len); }
+#define  slice_zero(s)        slice_zero_(slice_to_ut(s))
+
+IA_ void slice_copy_(Slice dest, Slice src) {
+	assert(dest.len >= src.len);
+	slice_assert(dest);
+	slice_assert(src);
+	mem_copy(dest.ptr, src.ptr, src.len);
+}
+#define slice_copy(dest, src) do {  \
+	static_assert(T_same(dest, src)); \
+	slice_copy_(slice_to_ut(dest), slice_to_ut(src)); \
+} while(0)
+
+IA_ Slice mem_bump(U8 start, U8 cap, U8*r used, U8 amount) {
+	assert(amount <= (cap - used[0])); 
+	used[0] += amount; 
+	return (Slice){start + used[0], amount};
+}
+#pragma endregion Memory
+
+#pragma region Math
+#define u8_max 0xffffffffffffffffull
+
+#define min(A,B)       (((A) < (B)) ? (A) : (B))
+#define max(A,B)       (((A) > (B)) ? (A) : (B))
+#define clamp_bot(X,B) max(X, B) // Clamp "X" by "B"
+
+#define clamp_decrement(X) (((X) > 0) ? ((X) - 1) : 0)
+
+typedef Struct_(R1_U1){ U1 p0; U1 p1; };
+typedef Struct_(R1_U2){ U2 p0; U2 p1; };
+typedef Struct_(R1_U4){ U4 p0; U2 p4; };
+typedef Struct_(R1_U8){ U8 p0; U8 p4; };
+
+typedef Struct_(V2_U1){ U1 x; U1 y;};
+
+IA_ B8 add_of  (U8 a, U8 b, U8*r res) { return __builtin_uaddll_overflow(a, b, res); }
+IA_ B8 sub_of  (U8 a, U8 b, U8*r res) { return __builtin_usubll_overflow(a, b, res); }
+IA_ B8 mul_of  (U8 a, U8 b, U8*r res) { return __builtin_umulll_overflow(a, b, res); }
+IA_ B8 add_s_of(S8 a, S8 b, S8*r res) { return __builtin_saddll_overflow(a, b, res); }
+IA_ B8 sub_s_of(S8 a, S8 b, S8*r res) { return __builtin_ssubll_overflow(a, b, res); }
+IA_ B8 mul_s_of(S8 a, S8 b, S8*r res) { return __builtin_smulll_overflow(a, b, res); }
+#pragma endregion Math
+
+#pragma region Control Flow & Iteration
+#define each_iter(type, iter, end)             (type iter = 0; iter < end; ++ iter)
+#define index_iter(type, iter, begin, op, end) (type iter = begin; iter op end; (begin < end ? ++ iter : -- iter))
+#define range_iter(iter,op,range)              (T_((range).p0) iter = (range).p0; iter op (range).p1; ((range).p0 < (range).p1 ? ++ iter : -- iter))
+
+#define defer(expr)                for(U4         once= 1;                  once!=1;++     once,(expr))    // Basic do something after body
+#define scope(begin,end)           for(U4         once=(1,(begin));         once!=1;++     once,(end ))    // Do things before or after a scope
+#define defer_rewind(cursor)       for(T_(cursor) sp=cursor,once=0;         once!=1;++     once,cursor=sp) // Used with arenas/stacks
+#define defer_info(type,expr, ...) for(type       info= {__VA_ARGS__}; info.once!=1;++info.once,(expr))    // Defer with tracked state
+
+#define do_while(cond) for (U8 once=0; once!=1 || (cond); ++once)
+#pragma endregion Control Flow & Iteration
+
+#pragma region FArena
+typedef Opt_(farena)    { U8 alignment, type_width; };
+typedef Struct_(FArena) { U8 start, capacity, used; };
+IA_ void farena_init(FArena*r arena, Slice mem) {  assert(arena != nullptr);
+	arena->start    = mem.ptr;
+	arena->capacity = mem.len;
+	arena->used     = 0;
+}
+IA_ FArena farena_make(Slice mem) { FArena a; farena_init(& a, mem); return a; }
+I_ Slice farena_push(FArena*r arena, U8 amount, Opt_farena o) {
+	if (amount == 0) { return (Slice){}; }
+	U8 desired   = amount * (o.type_width == 0 ? 1 : o.type_width);
+	U8 to_commit = align_pow2(desired, o.alignment ?  o.alignment : MEM_ALIGNMENT_DEFAULT);
+	return mem_bump(arena->start, arena->capacity, & arena->used, to_commit);
+}
+IA_ void farena_reset(FArena*r arena) { arena->used = 0; }
+IA_ void farena_rewind(FArena*r arena, U8 save_point) {
+	U8 end       = arena->start + arena->used; assert_bounds(save_point, arena->start, end);
+	arena->used -= save_point - arena->start;
+}
+IA_ U8 farena_save(FArena arena) { return arena.used; }
+#define farena_push_(arena, amount, ...)                                          farena_push((arena), (amount), opt_(farena, __VA_ARGS__))
+#define farena_push_type(arena, type, ...)                              C_(type*, farena_push((arena), 1,        opt_(farena, .type_width=S_(type), __VA_ARGS__)).ptr)
+#define farena_push_array(arena, type, amount, ...) (tmpl(Slice,type)){ C_(type*, farena_push((arena), (amount), opt_(farena, .type_width=S_(type), __VA_ARGS__)).ptr), (amount) }
+#pragma endregion FArena
+
+#pragma region FStack
+#define FStack_(name, type, width) Struct_(name) { U8 top; type arr[width]; }
+
+IA_ Slice fstack_push(Slice mem, U8* top, U8 amount, Opt_farena o) { 
+	FArena a = { mem.ptr, mem.len, top[0] }; Slice s = farena_push(& a, amount, o); 
+	top[0] = a.used; return s;
+};
+
+// This is here more for annotation than anything else.
+#define fstack_save(stack)       stack.top
+#define fstack_rewind(stack, sp) do{stack.top = sp;}while(0)
+#define fstack_reset(stack)      do{stack.top = 0; }while(0)
+
+#define fstack_slice(stack) slice_ut_arr((stack).arr)
+#define fstack_push_(stk, amount, ...) fstack_push(fstack_slice(stk), & (stk).top, (amount), opt_(farena, __VA_ARGS__))
+#define fstack_push_array(stk, type, amount, ...) \
+(tmpl(Slice,type)){ C_(type*, fstack_push(fstack_slice(stk), & (stk).top, (amount), opt_(farena, .type_width=S_(type), __VA_ARGS__)).ptr), (amount) }
+#pragma endregion FStack
+
+#pragma region Text
+// Using Bit-Width convention for the Unicode Encoding.
+typedef unsigned char UTF8;
+typedef Struct_(Str8) { UTF8* ptr; U8 len; }; 
+typedef Str8 Slice_UTF8;
+typedef Slice_(Str8);
+#define str8_comp(ptr, len) ((Str8){(UTF8*)ptr, len})
+#define str8(literal)       ((Str8){(UTF8*)literal, S_(literal) - 1})
+#pragma endregion Text
+
+#pragma region Hashing
+IA_ void hash64_fnv1a(U8*r hash, Slice data, U8 seed) {
+	LP_ U8 const default_seed = 0xcbf29ce484222325; 
+	if (seed == 0) seed = default_seed;
+	hash[0] = seed; for (U8 elem = data.ptr; elem != slice_end(data); elem += 1) {
+		hash[0] ^= u1_r(elem)[0];
+		hash[0] *= 0x100000001b3;
+	}
+}
+IA_ U8 hash64_fnv1a_ret(Slice data, U8 seed) { U8 h = 0; hash64_fnv1a(& h, data, seed); return h; }
+#pragma endregion Hashing
+
+#pragma region IO
+#define MS_STD_INPUT  u4_(-10)
+#define MS_STD_OUTPUT u4_(-11)
+typedef Struct_(MS_Handle){U8 id;};
+WinAPI MS_Handle ms_get_std_handle(U4 handle_type) asm("GetStdHandle");
+WinAPI B4        ms_read_console(MS_Handle handle, UTF8*r buffer, U4 to_read, U4*r num_read, U8 reserved_input_control) asm("ReadConsoleA");
+WinAPI B4        ms_write_console(MS_Handle handle, UTF8 const*r buffer, U4 chars_to_write, U4*v chars_written, U8 reserved) asm("WriteConsoleA");
+#pragma endregion IO
+
+#pragma region Key Table Linear (KTL)
+enum { KT_SLot_value = S_(U8), };
+#define KTL_Slot_(type) Struct_(tmpl(KTL_Slot,type)) { \
+	U8   key;   \
+	type value; \
+}
+#define KTL_(type) Slice_(tmpl(KTL_Slot,type)); \
+	typedef tmpl(Slice_KTL_Slot,type) tmpl(KTL,type)
+typedef Slice KTL_Byte;
+typedef Struct_(KTL_Meta) {
+	U8   slot_size;
+	U8   type_width;
+};
+
+typedef Array_(Str8, 2);
+typedef Slice_(Str8_A2);
+typedef KTL_Slot_(Str8);
+typedef KTL_(Str8);
+IA_ void ktl_populate_slice_a2_str8(KTL_Str8* kt, Slice_Str8_A2 values) {
+	assert(kt != null); slice_assert(* kt);
+	if (values.len == 0) return;
+	assert(kt->len == values.len);
+	for index_iter(U4, id, 0, <, values.len) { 
+		hash64_fnv1a(& kt->ptr[id].key, slice_to_ut(values.ptr[id][0]), 0);
+		mem_copy(u8_(& kt->ptr[id].value), u8_(& values.ptr[id][1]), S_(Str8));
+	}
+}
+#define ktl_str8_key(str)      hash64_fnv1a_ret(slice_to_ut(str8(str)), 0)
+#define ktl_str8_from_arr(arr) (KTL_Str8){arr, array_len(arr)}
+#pragma endregion KTL
+
+#pragma region Text Ops
+// NOTE(rjf): Includes reverses for uppercase and lowercase hex.
+RO_ global U8 integer_symbol_reverse[128] = {
+  0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+  0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+  0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+  0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+  0xFF,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+  0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+  0xFF,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+  0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+};
+
+IA_ B4   char_is_upper(UTF8 c) { return('A' <= c && c <= 'Z'); }
+IA_ UTF8 char_to_lower(UTF8 c) { if (char_is_upper(c)) { c += ('a' - 'A'); } return(c); }
+IA_ B4   char_is_digit(UTF8 c, U4 base) {
+  B4 result = 0; if (0 < base && base <= 16) {
+    if (integer_symbol_reverse[c] < base) result = 1;
+  }
+  return result;
+}
+IA_ UTF8 integer_symbols(UTF8 value) {
+	LP_ UTF8 lookup_table[16] = { '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F', }; 
+	return lookup_table[C_(UTF8, value)]; 
+}
+IA_ U8 u8_from_str8(Str8 str, U4 radix) {
+  U8 x = 0; if(1 < radix && radix <= 16) {
+    for each_iter(U8, cursor, str.len) { 
+      x *= radix;
+      x += integer_symbol_reverse[str.ptr[cursor] & 0x7F];
+    }
+  }
+  return x;
+}
+
+typedef Struct_(Info_str8_from_u4) {
+	Str8 prefix;
+	U4   digit_group_size;
+	U4   needed_leading_zeros;
+	U4   size_required;
+};
+I_ Info_str8_from_u4 str8_from_u4_info(U4 num, U4 radix, U4 min_digits, U4 digit_group_separator)
+{
+	Info_str8_from_u4 info = {0};
+	LP_ Str8 tbl_prefix[] = { str8("0x"), str8("0o"), str8("0b") };
+	switch (radix) {
+	case 16: { info.prefix = tbl_prefix[0]; } break;
+	case 8:  { info.prefix = tbl_prefix[1]; } break;
+	case 2:  { info.prefix = tbl_prefix[2]; } break;
+	}
+	info.digit_group_size = 3;
+	switch (radix) {
+	default: break;
+	case 2:
+	case 8:
+	case 16: {
+		info.digit_group_size = 4;
+	}
+	break;
+	}
+	info.needed_leading_zeros = 0;
+	{
+		U4 needed_digits = 1;
+		{
+			U4 u32_reduce = num;
+			for(;;)
+			{
+				u32_reduce /= radix;
+				if (u32_reduce == 0) {
+					break;
+				}
+				needed_digits += 1;
+			}
+		}
+		info.needed_leading_zeros = (min_digits > needed_digits) ? min_digits - needed_digits : 0;
+		U4 needed_separators       = 0;
+		if (digit_group_separator != 0)
+		{
+			needed_separators = (needed_digits + info.needed_leading_zeros) / info.digit_group_size;
+			if (needed_separators > 0 && (needed_digits + info.needed_leading_zeros) % info.digit_group_size == 0) {
+				needed_separators -= 1;
+			}
+		}
+		info.size_required = info.prefix.len + info.needed_leading_zeros + needed_separators + needed_digits;
+	}
+	return info;
+}
+I_ Str8 str8_from_u4_buf(Slice buf, U4 num, U4 radix, U4 min_digits, U4 digit_group_separator, Info_str8_from_u4 info)
+{
+	assert(buf.len >= info.size_required);
+	Str8 result = { C_(UTF8*, buf.ptr), info.size_required };
+	/*Fill Content*/ {
+		U4 num_reduce             = num;
+		U4 digits_until_separator = info.digit_group_size;
+		for (U8 idx = 0; idx < result.len; idx += 1)
+		{
+			U8 separator_pos = result.len - idx - 1;
+			if (digits_until_separator == 0 && digit_group_separator != 0) {
+				result.ptr[separator_pos] = u1_(digit_group_separator);
+				digits_until_separator    = info.digit_group_size + 1;
+			}
+			else {
+				result.ptr[separator_pos] = (U1) char_to_lower(integer_symbols(u1_(num_reduce % radix)));
+				num_reduce /= radix;
+			}
+			digits_until_separator -= 1;
+			if (num_reduce == 0) break;
+		}
+		for (U8 leading_0_idx = 0; leading_0_idx < info.needed_leading_zeros; leading_0_idx += 1) {
+			result.ptr[info.prefix.len + leading_0_idx] = '0';
+		}
+	}
+	/*Fill Prefix*/ if (info.prefix.len > 0) { slice_copy(result, info.prefix); }
+	return result;
+}
+I_ Str8 str8_fmt_ktl_buf(Slice buffer, KTL_Str8 table, Str8 fmt_template)
+{
+	slice_assert(buffer);
+	slice_assert(table);
+	slice_assert(fmt_template);
+	UTF8*r cursor_buffer    = C_(UTF8*r, buffer.ptr);
+	U8     buffer_remaining = buffer.len;
+	UTF8*r cursor_fmt       = fmt_template.ptr;
+	U8     left_fmt         = fmt_template.len;
+	while (left_fmt && buffer_remaining)
+	{
+		// Forward until we hit the delimiter '<' or the template's contents are exhausted.
+		U8 copy_offset = 0;
+		if (cursor_fmt[0] == '<')
+		{
+			UTF8*r potential_token_cursor = cursor_fmt + 1; // Skip '<'
+			U8     potential_token_len    = 0;
+			B4     fmt_overflow           = false;
+			while(true) {
+				UTF8*r cursor       = potential_token_cursor + potential_token_len;
+				fmt_overflow        = cursor >= slice_end(fmt_template);
+				B4 found_terminator = potential_token_cursor[potential_token_len] == '>';
+				if (fmt_overflow || found_terminator) { break; }
+				++ potential_token_len;
+			}
+			if (fmt_overflow) { 
+				// Failed to find a subst and we're at end of fmt, just copy segment.
+				copy_offset = 1 + potential_token_len; // '<' + token
+				goto write_to_buffer; 
+			}
+			// Hashing the potential token and cross checking it with our token table
+			U8 key = hash64_fnv1a_ret(slice_ut(u8_(potential_token_cursor), potential_token_len), 0);
+			Str8*r value = nullptr; for slice_iter(table, token) {
+				// We do a linear iteration instead of a hash table lookup because the user should never subst with more than 100 unqiue tokens..
+				if (token->key == key) { value = & token->value; break; }
+			}
+			if (value)
+			{
+				// We're going to appending the string, make sure we have enough space in our buffer.
+				// NOTE(Ed): this version doesn't support growing the buffer (No Allocator Interface)
+				assert((buffer_remaining - potential_token_len) > 0);
+				copy_offset = min(buffer_remaining, value->len); // Prevent Buffer overflow.
+				mem_copy(u8_(cursor_buffer), u8_(value->ptr), buffer_remaining);
+				// Sync cursor format to after the processed token
+				cursor_buffer    += copy_offset;
+				buffer_remaining -= copy_offset;
+				cursor_fmt        = potential_token_cursor + 1 + potential_token_len; // '<' + token
+				left_fmt         -= potential_token_len    + 2; // The 2 here are the '<' & '>' delimiters being omitted.
+				continue;
+			}
+			// If not a subsitution, we copy the segment and continue.
+			copy_offset = 1 + potential_token_len; // '<' + token
+			goto write_to_buffer;
+		}
+		else do {
+			++ copy_offset;
+		} 
+		while ( (cursor_fmt[copy_offset] != '<' && (cursor_fmt + copy_offset) < slice_end(fmt_template)) );
+	write_to_buffer:
+		assert((buffer_remaining - copy_offset) > 0);
+		copy_offset = min(buffer_remaining, copy_offset); // Prevent buffer overflow.
+		mem_copy(u8_(cursor_buffer), u8_(cursor_fmt), copy_offset);
+		buffer_remaining -= copy_offset;
+		left_fmt         -= copy_offset;
+		cursor_buffer    += copy_offset;
+		cursor_fmt       += copy_offset;
+	}
+	return (Str8){C_(UTF8*, buffer.ptr), buffer.len - buffer_remaining};
+}
+
+typedef Struct_(Str8Gen) { UTF8* ptr; U8 cap, len; };
+IA_ Slice str8gen_buf(Str8Gen*r gen) { return (Slice){u8_(gen->ptr) + gen->len, gen->cap - gen->len}; }
+
+IA_ void str8gen_append_str8(Str8Gen*r gen, Str8 str) { assert(gen != nullptr);
+	U8 ptr = mem_bump(u8_(gen->ptr), gen->cap, & gen->len, str.len).ptr;
+	mem_copy(ptr, u8_(str.ptr), str.len);
+}
+IA_ void str8gen_append_fmt(Str8Gen*r gen, Str8 fmt, KTL_Str8 tbl) {
+	Str8 result = str8_fmt_ktl_buf(str8gen_buf(gen), tbl, fmt);
+	gen->len += result.len;
+}
+#define str8gen_append_str8_(gen, s) str8gen_append_str8(gen, str8(s))
+#pragma endregion Text Ops
--- a/scripts/build.simple.c.ps1
+++ b/scripts/build.simple.c.ps1
@@ -0,0 +1,178 @@
+$path_root = split-path -Path $PSScriptRoot -Parent
+$misc = join-path $PSScriptRoot 'helpers/misc.ps1'
+. $misc
+
+$path_toolchain = join-path $path_root      'toolchain'
+$path_rad       = join-path $path_toolchain 'rad'
+# --- Toolchain Executable Paths ---
+$compiler  = 'clang'
+$optimizer = 'opt.exe'
+$linker    = 'lld-link.exe'
+$archiver  = 'llvm-lib.exe'
+$radbin    = join-path $path_rad 'radbin.exe'
+$radlink   = join-path $path_rad 'radlink.exe'
+
+# https://clang.llvm.org/docs/ClangCommandLineReference.html
+$flag_all_c 					   = @('-x', 'c')
+$flag_c11                          = '-std=c11'
+$flag_c23                          = '-std=c23'
+$flag_all_cpp                      = '-x c++'
+$flag_charset_utf8                 = '-fexec-charset=utf-8'
+$flag_compile                      = '-c'
+$flag_color_diagnostics            = '-fcolor-diagnostics'
+$flag_no_builtin_includes          = '-nobuiltininc'
+$flag_no_color_diagnostics         = '-fno-color-diagnostics'
+$flag_debug                        = '-g'
+$flag_debug_codeview               = '-gcodeview'
+$flag_define                       = '-D'
+$flag_emit_llvm                    = '-emit-llvm'
+$flag_stop_after_gen               = '-S'
+$flag_exceptions_disabled		   = '-fno-exceptions'
+$flag_rtti_disabled                = '-fno-rtti'
+$flag_diagnostics_absolute_paths   = '-fdiagnostics-absolute-paths'
+$flag_preprocess 			       = '-E'
+$flag_include                      = '-I'
+$flag_section_data                 = '-fdata-sections'
+$flag_section_functions            = '-ffunction-sections'
+$flag_library					   = '-l'
+$flag_library_path				   = '-L'
+$flag_linker                       = '-Wl,'
+$flag_link_dll                     = '/DLL'
+$flag_link_mapfile 		           = '/MAP:'
+$flag_link_optimize_references     = '/OPT:REF'
+$flag_link_win_subsystem_console   = '/SUBSYSTEM:CONSOLE'
+$flag_link_win_subsystem_windows   = '/SUBSYSTEM:WINDOWS'
+$flag_link_win_machine_32          = '/MACHINE:X86'
+$flag_link_win_machine_64          = '/MACHINE:X64'
+$flag_link_win_debug               = '/DEBUG'
+$flag_link_win_pdb 			       = '/PDB:'
+$flag_link_win_path_output         = '/OUT:'
+$flag_link_no_incremental          = '/INCREMENTAL:NO'
+$flag_no_optimization 		       = '-O0'
+$flag_optimize_fast 		       = '-O2'
+$flag_optimize_size 		       = '-O1'
+$flag_optimize_intrinsics		   = '-Oi'
+$flag_path_output                  = '-o'
+$flag_preprocess_non_intergrated   = '-no-integrated-cpp'
+$flag_profiling_debug              = '-fdebug-info-for-profiling'
+$flag_set_stack_size			   = '-stack='
+$flag_syntax_only				   = '-fsyntax-only'
+$flag_target_arch				   = '-target'
+$flag_time_trace				   = '-ftime-trace'
+$flag_verbose 					   = '-v'
+$flag_wall 					       = '-Wall'
+$flag_warning 					   = '-W'
+$flag_warnings_as_errors           = '-Werror'
+$flag_nologo 			           = '/nologo'
+
+$path_build = join-path $path_root 'build'
+if ( -not(test-path -Path $path_build) ) {
+	new-item -ItemType Directory -Path $path_build
+}
+
+push-location $path_build
+
+# --- File Paths ---
+$unit_name      = "simple"
+$unit_source    = join-path $path_root "code\C\$unit_name.c"
+$ir_unoptimized = join-path $path_build "$unit_name.ll"
+$ir_optimized   = join-path $path_build "$unit_name.opt.ll"
+$object         = join-path $path_build "$unit_name.obj"
+$binary         = join-path $path_build "$unit_name.exe"
+$pdb            = join-path $path_build "$unit_name.pdb"
+$map            = join-path $path_build "$unit_name.map"
+
+# --- Stage 1: Compile C to LLVM IR ---
+write-host "Stage 1: Compiling C to LLVM IR"
+$compiler_args = @()
+# $compiler_args += $flag_stop_after_gen
+# $compiler_args += $flag_emit_llvm
+$compiler_args += ($flag_define + 'BUILD_DEBUG=1')
+$compiler_args += $flag_debug
+# $compiler_args += $flag_debug_codeview
+$compiler_args += $flag_wall
+# $compiler_args += $flag_charset_utf8
+$compiler_args += $flag_c23
+$compiler_args += $flag_no_optimization
+# $compiler_args += $flag_no_builtin_includes
+$compiler_args += $flag_diagnostics_absolute_paths
+$compiler_args += $flag_rtti_disabled
+$compiler_args += $flag_exceptions_disabled
+$compiler_args += ($flag_include + $path_root)
+$compiler_args += $flag_compile
+$compiler_args += $flag_path_output, $object
+$compiler_args += $unit_source
+$compiler_args | ForEach-Object { Write-Host $_ }
+$stage1_time = Measure-Command { & $compiler $compiler_args }
+write-host "Compilation took $($stage1_time.TotalMilliseconds)ms"
+# write-host "IR generation took $($stage1_time.TotalMilliseconds)ms"
+write-host
+
+# --- Stage 2: Manually Optimize LLVM IR ---
+if ($false) {
+write-host "Manually Optimizing LLVM IR with 'opt'"
+$optimization_passes = @(
+    '-sroa',              # Scalar Replacement Of Aggregates
+    '-early-cse',         # Early Common Subexpression Elimination
+    '-instcombine'        # Instruction Combining
+)
+$optimizer_args = @(
+	$optimization_passes, 
+	$ir_unoptimized, 
+	$flag_path_output, 
+	$ir_optimized
+)
+$optimizer_args | ForEach-Object { Write-Host $_ }
+$stage2_time = Measure-Command { & $optimizer $optimizer_args }
+write-host "Optimization took $($stage2_time.TotalMilliseconds)ms"
+write-hosts
+
+write-host "Compiling LLVM IR to Object File with 'clang'"
+$ir_to_obj_args = @()
+$ir_to_obj_args += $flag_compile
+$ir_to_obj_args += $flag_path_output, $object
+$ir_to_obj_args += $ir_optimized
+
+$ir_to_obj_args | ForEach-Object { Write-Host $_ }
+$stage3_time = Measure-Command { & $compiler $ir_to_obj_args }
+write-host "Object file generation took $($stage3_time.TotalMilliseconds)ms"
+write-host
+}
+if ($true) {
+	# write-host "Linking with lld-link"
+	$linker_args = @()
+	$linker_args += $flag_nologo
+	$linker_args += $flag_link_win_machine_64
+	$linker_args += $flag_link_no_incremental
+	$linker_args += ($flag_link_win_path_output + $binary)
+
+	$linker_args += "$flag_link_win_debug"
+	$linker_args += $flag_link_win_pdb + $pdb
+	$linker_args += $flag_link_mapfile + $map
+    $linker_args += $flag_link_win_subsystem_console
+
+	$linker_args += $object
+    
+	# Diagnoistc print for the args
+	$linker_args | ForEach-Object { Write-Host $_ }
+
+	$linking_time = Measure-Command { & $linker $linker_args }
+	write-host "Linking took $($linking_time.TotalMilliseconds)ms"
+	write-host
+}
+if ($false) {
+	write-host "Dumping Debug Info"
+	$rbin_out  = '--out:'
+	$rbin_dump = '--dump'
+    $rdi         = join-path $path_build "$unit_name.rdi"
+    $rdi_listing = join-path $path_build "$unit_name.rdi.list"
+
+	$nargs = @($pdb, ($rbin_out + $rdi))
+	& $radbin $nargs
+
+	$nargs = @($rbin_dump, $rdi)
+	$dump = & $radbin $nargs
+	$dump > $rdi_listing
+}
+
+Pop-Location
--- a/scripts/helpers/misc.ps1
+++ b/scripts/helpers/misc.ps1
@@ -0,0 +1,67 @@
+function clone-gitrepo { param( [string] $path, [string] $url )
+	if (test-path $path) {
+		# git -C $path pull
+	}
+	else {
+		Write-Host "Cloning $url ..."
+		git clone --recursive $url $path
+	}
+}
+
+function Update-GitRepo
+{
+	param( [string] $path, [string] $url, [string] $build_command )
+
+	if ( $build_command -eq $null ) {
+		write-host "Attempted to call Update-GitRepo without build_command specified"
+		return
+	}
+
+	$repo_name = $url.Split('/')[-1].Replace('.git', '')
+
+	$last_built_commit = join-path $path_build "last_built_commit_$repo_name.txt"
+	if ( -not(test-path -Path $path))
+	{
+		write-host "Cloining repo from $url to $path"
+		git clone $url $path
+
+		write-host "Building $url"
+		push-location $path
+		& "$build_command"
+		pop-location
+
+		git -C $path rev-parse HEAD | out-file $last_built_commit
+		$script:binaries_dirty = $true
+		write-host
+		return
+	}
+
+	git -C $path fetch
+	$latest_commit_hash = git -C $path rev-parse '@{u}'
+	$last_built_hash    = if (Test-Path $last_built_commit) { Get-Content $last_built_commit } else { "" }
+
+	if ( $latest_commit_hash -eq $last_built_hash ) {
+		write-host
+		return
+	}
+
+	write-host "Build out of date for: $path, updating"
+	write-host 'Pulling...'
+	git -C $path pull
+
+	write-host "Building $url"
+	push-location $path
+	& $build_command
+	pop-location
+
+	$latest_commit_hash | out-file $last_built_commit
+	$script:binaries_dirty = $true
+	write-host
+}
+
+function verify-path { param( $path )
+	if (test-path $path) {return $true}
+
+	new-item -ItemType Directory -Path $path
+	return $false
+}