diff --git a/.editorconfig b/.editorconfig index 1cd7d68..8ebaa61 100644 --- a/.editorconfig +++ b/.editorconfig @@ -16,7 +16,11 @@ indent_size = 4 indent_style = space indent_size = 4 -[*{.h, .c, .hpp, .cpp}] +[*.h] +indent_style = tab +indent_size = 2 + +[*.c] indent_style = tab indent_size = 2 diff --git a/code/duffle/dsl.h b/code/duffle/dsl.h index 833a0b2..cf1cbd9 100644 --- a/code/duffle/dsl.h +++ b/code/duffle/dsl.h @@ -3,105 +3,123 @@ # include "assert.h" #endif -#define LP_ static // local_persist -#define internal static // internal -#define global -#define gknown - -#define align_(value) __attribute__((aligned (value))) // for easy alignment -#define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path -#define FI_ static inline __attribute__((always_inline)) // force inline -#define NI_ static __attribute__((noinline)) // force no inline [used in thread api] -#define R_ __restrict // pointers are either restricted or volatile and nothing else -#define V_ volatile // pointers are either restricted or volatile and nothing else - -#define glue_impl(A, B) A ## B -#define glue(A, B) glue_impl(A, B) -#define stringify_impl(S) #S -#define stringify(S) stringify_impl(S) -#define tmpl(prefix, type) prefix ## _ ## type - #define offset_of(type, member) cast(U8,__builtin_offsetof(type,member)) #define static_assert _Static_assert #define typeof __typeof__ #define typeof_ptr(ptr) typeof((ptr)[0]) #define typeof_same(a, b) _Generic((a), typeof((b)): 1, default: 0) -#define def_R_(type) type*restrict type ## _R -#define def_V_(type) type*volatile type ## _V -#define def_ptr_set(type) def_R_(type); typedef def_V_(type) -#define def_tset(type) type; typedef def_ptr_set(type) +#define m_expand(...) __VA_ARGS__ +#define glue_impl(A, B) A ## B +#define glue(A, B) glue_impl(A, B) +#define tmpl(prefix, type) prefix ## _ ## type -typedef __UINT8_TYPE__ def_tset(U1); -typedef __UINT16_TYPE__ def_tset(U2); -typedef __UINT32_TYPE__ def_tset(U4); -typedef __INT8_TYPE__ def_tset(S1); -typedef __INT16_TYPE__ def_tset(S2); -typedef __INT32_TYPE__ def_tset(S4); -typedef unsigned char def_tset(B1); -typedef __UINT16_TYPE__ def_tset(B2); -typedef __UINT32_TYPE__ def_tset(B4); -typedef __UINT64_TYPE__ def_tset(B8); +#define stringify_impl(S) #S +#define stringify(S) stringify_impl(S) + +#define VA_Sel_1( _1, ... ) _1 // <-- Of all th args passed pick _1. +#define VA_Sel_2( _1, _2, ... ) _2 // <-- Of all the args passed pick _2. +#define VA_Sel_3( _1, _2, _3, ... ) _3 // etc.. + +#define global static // Mark global data +#define gknown // Mark global data used in procedure + +#define LP_ static // static data within procedure scope +#define internal static // internal + +#define asm __asm__ +#define align_(value) __attribute__((aligned (value))) // for easy alignment +#define C_(type,data) ((type)(data)) // for enforced precedence +#define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path +#define I_ internal inline +#define FI_ inline __attribute__((always_inline)) // inline always +#define NI_ internal __attribute__((noinline)) // inline never +#define RO_ __attribute__((section(".rodata"))) // Read only data allocation +#define R_ restrict // pointers are either restricted or volatile and nothing else +#define V_ volatile // pointers are either restricted or volatile and nothing else +#define T_ typeof +#define T_same(a,b) _Generic((a), typeof((b)): 1, default: 0) + +#define r_(ptr) C_(T_(ptr[0])*R_, ptr) +#define v_(ptr) C_(T_(ptr[0])*V_, ptr) +#define tr_(type, ptr) C_(type*R_, ptr) +#define tv_(type, ptr) C_(type*V_, ptr) + +#define TypeR_(type) type*restrict type ## _R +#define TypeV_(type) type*volatile type ## _V +#define PtrSet_(type) TypeR_(type); typedef TypeV_(type) +#define TSet_(type) type; typedef PtrSet_(type) + +#define array_len(a) (U8)(sizeof(a) / sizeof(typeof((a)[0]))) +#define array_decl(type, ...) (type[]){__VA_ARGS__} +#define Array_sym(type,len) A ## len ## _ ## type +#define Array_expand(type,len) type Array_sym(type, len)[len]; typedef PtrSet_(Array_sym(type, len)) +#define Array_(type,len) Array_expand(type,len) +#define Bit_(id,b) id = (1 << b), tmpl(id,pos) = b +#define Enum_(underlying_type, symbol) underlying_type TSet_(symbol); enum symbol +#define Proc_(symbol) symbol +#define Struct_(symbol) struct symbol TSet_(symbol); struct symbol +#define Union_(symbol) union symbol TSet_(symbol); union symbol + +#define Opt_(proc) Struct_(tmpl(Opt,proc)) +#define opt_(symbol, ...) (tmpl(Opt,symbol)){__VA_ARGS__} +#define Ret_(proc) Struct_(tmpl(Ret,proc)) +#define ret_(proc) tmpl(Ret,proc) proc + +// Using Byte-Width convention for the fundamental types. +typedef __UINT8_TYPE__ TSet_(U1); +typedef __UINT16_TYPE__ TSet_(U2); +typedef __UINT32_TYPE__ TSet_(U4); +typedef __INT8_TYPE__ TSet_(S1); +typedef __INT16_TYPE__ TSet_(S2); +typedef __INT32_TYPE__ TSet_(S4); +typedef unsigned char TSet_(B1); +typedef __UINT16_TYPE__ TSet_(B2); +typedef __UINT32_TYPE__ TSet_(B4); + +#define u1_(value) C_(U1, value) +#define u2_(value) C_(U2, value) +#define u4_(value) C_(U4, value) +#define s1_(value) C_(S1, value) +#define s2_(value) C_(S2, value) +#define s4_(value) C_(S4, value) + +#define u1_r(value) C_(U1*R_, value) +#define u2_r(value) C_(U2*R_, value) +#define u4_r(value) C_(U4*R_, value) +#define u1_v(value) C_(U1*V_, value) +#define u2_v(value) C_(U2*V_, value) +#define u4_v(value) C_(U4*V_, value) enum { false = 0, true = 1, true_overflow, }; -#define u1_r(value) cast(U1_R, value) -#define u2_r(value) cast(U2_R, value) -#define u4_r(value) cast(U4_R, value) -#define u1_v(value) cast(U1_V, value) -#define u2_v(value) cast(U2_V, value) -#define u4_v(value) cast(U4_V, value) +typedef void Proc_(VoidFn) (void); -#define u1_(value) cast(U1, value) -#define u2_(value) cast(U2, value) -#define u4_(value) cast(U4, value) -#define s1_(value) cast(S1, value) -#define s2_(value) cast(S2, value) -#define s4_(value) cast(S4, value) +#define kilo(n) (C_(U4, n) << 10) +#define mega(n) (C_(U4, n) << 20) +#define giga(n) (C_(U4, n) << 30) +#define tera(n) (C_(U4, n) << 40) +#define null C_(U4, 0) +#define nullptr C_(void*, 0) +#define O_(type,member) C_(U4,__builtin_offsetof(type,member)) +#define S_(data) C_(U4, sizeof(data)) -#define farray_len(array) (SSIZE)sizeof(array) / size_of( typeof((array)[0])) -#define farray_init(type, ...) (type[]){__VA_ARGS__} -#define def_farray_sym(_type, _len) A ## _len ## _ ## _type -#define def_farray_impl(_type, _len) _type def_farray_sym(_type, _len)[_len]; typedef def_ptr_set(def_farray_sym(_type, _len)) -#define def_farray(type, len) def_farray_impl(type, len) -#define def_enum(underlying_type, symbol) underlying_type def_tset(symbol); enum symbol -#define def_struct(symbol) struct symbol def_tset(symbol); struct symbol -#define def_union(symbol) union symbol def_tset(symbol); union symbol -#define def_proc(symbol) symbol -#define opt_args(symbol, ...) &(symbol){__VA_ARGS__} -#define ret_type(type) type +#define sop_1(op,a,b) C_(U1, s1_(a) op s1_(b)) +#define sop_2(op,a,b) C_(U2, s2_(a) op s2_(b)) +#define sop_4(op,a,b) C_(U4, s4_(a) op s4_(b)) -#define o_(field) offset_of(typeof_ptr(& field), filed)) - -#define alignas _Alignas -#define alignof _Alignof -#define byte_pad(amount, ...) B1 glue(_PAD_, __VA_ARGS__) [amount] -#define cast(type, data) ((type)(data)) -#define pcast(type, data) (cast(type*, & (data)) [0]) -#define nullptr cast(void*, 0) -#define size_of(data) cast(U4, sizeof(data)) - -#define r_(ptr) cast(typeof_ptr(ptr)*R_, ptr) -#define v_(ptr) cast(typeof_ptr(ptr)*V_, ptr) -#define tr_(type, ptr) cast(type*R_, ptr) -#define tv_(type, ptr) cast(type*V_, ptr) - -#define kilo(n) (cast(U4, n) << 10) -#define mega(n) (cast(U4, n) << 20) -#define giga(n) (cast(U4, n) << 30) -#define tera(n) (cast(U4, n) << 40) - -#define dbg_args(...) __VA_ARGS__ - -#define sop_1(op, a, b) cast(U1, s1_(a) op s1_(b)) -#define sop_2(op, a, b) cast(U2, s2_(a) op s2_(b)) -#define sop_4(op, a, b) cast(U4, s4_(a) op s4_(b)) - -#define def_signed_op(id, op, width) FI_ U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); } -#define def_signed_ops(id, op) def_signed_op(id, op, 1) def_signed_op(id, op, 2) def_signed_op(id, op, 4) -def_signed_ops(add, +) def_signed_ops(sub, -) -def_signed_ops(mut, *) def_signed_ops(div, /) -def_signed_ops(gt, >) def_signed_ops(lt, <) -def_signed_ops(ge, >=) def_signed_ops(le, <=) +#undef def_signed_op +#define def_signed_op(id,op,width) FI_ U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); } +#define def_signed_ops(id,op) def_signed_op(id, op, 1) def_signed_op(id, op, 2) def_signed_op(id, op, 4) +def_signed_ops(add, +) +def_signed_ops(sub, -) +def_signed_ops(mut, *) +def_signed_ops(div, /) +def_signed_ops(gt, >) +def_signed_ops(lt, <) +def_signed_ops(ge, >=) +def_signed_ops(le, <=) +#undef def_signed_ops +#undef def_signed_op #define def_generic_sop(op, a, ...) _Generic((a), U1: op ## _s1, U2: op ## _s2, U4: op ## _s4) (a, __VA_ARGS__) #define add_s(a,b) def_generic_sop(add,a,b) @@ -111,6 +129,29 @@ def_signed_ops(ge, >=) def_signed_ops(le, <=) #define lt_s(a,b) def_generic_sop(lt, a,b) #define ge_s(a,b) def_generic_sop(ge, a,b) #define le_s(a,b) def_generic_sop(le, a,b) +#undef def_generic_sop + +#define o_(field) offset_of(typeof_ptr(& field), filed)) + +#define alignas _Alignas +#define alignof _Alignof +#define byte_pad(amount, ...) B1 glue(_PAD_, __VA_ARGS__) [amount] +#define pcast(type, data) (C_(type*, & (data)) [0]) + +#define dbg_args(...) __VA_ARGS__ + +#pragma region Control Flow & Iteration +#define each_iter(type, iter, end) (type iter = 0; iter < end; ++ iter) +#define index_iter(type, iter, begin, op, end) (type iter = begin; iter op end; (begin < end ? ++ iter : -- iter)) +#define range_iter(iter,op,range) (T_((range).p0) iter = (range).p0; iter op (range).p1; ((range).p0 < (range).p1 ? ++ iter : -- iter)) + +#define defer(expr) for(U4 once= 1; once!=1;++ once,(expr)) // Basic do something after body +#define scope(begin,end) for(U4 once=(1,(begin)); once!=1;++ once,(end )) // Do things before or after a scope +#define defer_rewind(cursor) for(T_(cursor) sp=cursor,once=0; once!=1;++ once,cursor=sp) // Used with arenas/stacks +#define defer_info(type,expr, ...) for(type info= {__VA_ARGS__}; info.once!=1;++info.once,(expr)) // Defer with tracked state + +#define do_while(cond) for (U8 once=0; once!=1 || (cond); ++once) +#pragma endregion Control Flow & Iteration #define span_iter(type, iter, m_begin, op, m_end) ( \ tmpl(Iter_Span,type) iter = { \ @@ -119,44 +160,20 @@ def_signed_ops(ge, >=) def_signed_ops(le, <=) iter.cursor op iter.r.end; \ ++ iter.cursor \ ) -#define def_span(type) \ - def_struct(tmpl( Span,type)) { type begin; type end; }; \ - typedef def_struct(tmpl(Iter_Span,type)) { tmpl(Span,type) r; type cursor; } +#define Span_(type) \ + Struct_(tmpl( Span,type)) { type begin; type end; }; \ + typedef Struct_(tmpl(Iter_Span,type)) { tmpl(Span,type) r; type cursor; } -typedef def_span(S4); -typedef def_span(U4); +typedef Span_(S4); +typedef Span_(U4); -typedef void def_proc(VoidFn) (void); - -typedef unsigned char def_tset(UTF8); -typedef def_struct(Str8) { UTF8* ptr; U4 len; }; typedef Str8 def_tset(Slice_UTF8); -typedef def_struct(Slice_Str8) { Str8* ptr; U4 len; }; -#define txt(string_literal) (Str8){ (UTF8*) string_literal, size_of(string_literal) - 1 } - -#define def_Slice(type) def_struct(tmpl(Slice,type)) { type* ptr; U4 len; } -#define slice_assert(slice) do { assert((slice).ptr != nullptr); assert((slice).len > 0); } while(0) -#define slice_end(slice) ((slice).ptr + (slice).len) -#define size_of_slice_type(slice) size_of((slice).ptr[0]) - -typedef def_Slice(void); -typedef def_Slice(B1); -#define slice_byte(slice) ((Slice_B1){cast(B1*, (slice).ptr), (slice).len * size_of_slice_type(slice)}) -#define slice_fmem(mem) ((Slice_B1){ mem, size_of(mem) }) - -void slice__copy(Slice_B1 dest, U4 dest_typewidth, Slice_B1 src, U4 src_typewidth); -void slice__zero(Slice_B1 mem, U4 typewidth); -#define slice_copy(dest, src) do { \ - static_assert(typeof_same(dest, src)); \ - slice__copy(slice_byte(dest), size_of_slice_type(dest), slice_byte(src), size_of_slice_type(src)); \ -} while (0) -#define slice_zero(slice) slice__zero(slice_byte(slice), size_of_slice_type(slice)) - -#define slice_iter(container, iter) ( \ - typeof((container).ptr) iter = (container).ptr; \ - iter != slice_end(container); \ - ++ iter \ -) -#define slice_from_farray(type, ...) & (tmpl(Slice,type)) { \ - .ptr = farray_init(type, __VA_ARGS__), \ - .len = farray_len( farray_init(type, __VA_ARGS__)) \ -} +#if 0 +#pragma region Debug +#define debug_trap() __builtin_debugtrap() +#if BUILD_DEBUG +IA_ void assert(U8 cond) { if(cond){return;} else{debug_trap(); ms_exit_process(1);} } +#else +#define assert(cond) +#endif +#pragma endregion Debug +#endif diff --git a/code/duffle/farena.h b/code/duffle/farena.h deleted file mode 100644 index b6b1a37..0000000 --- a/code/duffle/farena.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifdef INTELLISENSE_DIRECTIVES -# pragma once -# include "dsl.h" -# include "memory.h" -# include "strings.h" -#endif - -typedef def_struct(Opts_farena) { - Str8 type_name; - U4 alignment; -}; -typedef def_struct(FArena) { - void* start; - U4 capacity; - U4 used; -}; -FArena farena_make (Slice_B1 mem); -void farena_init (FArena* arena, Slice_B1 byte); -Slice_B1 farena__push (FArena* arena, U4 amount, U4 type_width, Opts_farena* opts); -void farena_reset (FArena* arena); -void farena_rewind(FArena* arena, AllocatorSP save_point); -AllocatorSP farena_save (FArena arena); - -// void farena_allocator_proc(AllocatorProc_In in, AllocatorProc_Out* out); -// #define ainfo_farena(arena) (AllocatorInfo){ .proc = farena_allocator_proc, .data = & arena } - -#define farena_push(arena, type, ...) \ -cast(type*, farena__push(arena, size_of(type), 1, opt_args(Opts_farena_push, lit(stringify(type)), __VA_ARGS__))).ptr - -#define farena_push_array(arena, type, amount, ...) \ -(Slice ## type){ farena__push(arena, size_of(type), amount, opt_args(Opts_farena_push, lit(stringify(type)), __VA_ARGS__)).ptr, amount } diff --git a/code/duffle/gp.h b/code/duffle/gp.h index 230fad7..462de7c 100644 --- a/code/duffle/gp.h +++ b/code/duffle/gp.h @@ -4,7 +4,7 @@ # include "math.h" #endif -typedef def_enum(U4, gp_Commands) { +typedef Enum_(U4, gp_Commands) { gcmd_Reset = 0b000, gcmd_Polygon = 0b001, gcmd_Line = 0b010, @@ -75,8 +75,8 @@ enum { gp_SetArea_BottomRight = (gcmd_SetDrawArea_BotRight << gcmd_offset), }; -typedef def_struct(RGB8) { B1 r; B1 g; B1 b; }; -#define rgb8(r, g, b) (RGB8){ r, g, b } +typedef Struct_(RGB8) { B1 r; B1 g; B1 b; }; +#define rgb8(r, g, b) (RGB8){ r, g, b } typedef B1 gp_Pixel16[1]; typedef B1 gp_Pixel24[3]; @@ -88,10 +88,35 @@ enum { gp_b16_Y = 16, }; -typedef def_struct(gp_Vec2) { U2 y; U2 x; }; +typedef Struct_(gp_Vec2) { U2 y; U2 x; }; #if 1 void gp_screen_init(void) __asm__("gp_screen_init_asm"); #else #define gp_screen_init() gp_screen_init_c11() #endif + + + +// TODO REVIEW: + +/* --- GPU Command Semantics (GP0) --- */ + +#define GPU_CMD_CLEAR_CACHE 0x01 +#define GPU_CMD_VRAM_FILL 0x02 +#define GPU_CMD_VRAM_COPY 0x80 +#define GPU_CMD_VRAM_READ 0xC0 +#define GPU_CMD_POLY_F3 0x20 /* Flat Triangle */ +#define GPU_CMD_POLY_FT3 0x24 /* Flat Textured Triangle */ +#define GPU_CMD_POLY_G3 0x30 /* Gouraud Triangle */ +#define GPU_CMD_POLY_GT3 0x34 /* Gouraud Textured Triangle */ +#define GPU_CMD_POLY_F4 0x28 /* Flat Quad */ +#define GPU_CMD_POLY_FT4 0x2C /* Flat Textured Quad */ +#define GPU_CMD_POLY_G4 0x38 /* Gouraud Quad */ +#define GPU_CMD_POLY_GT4 0x3C /* Gouraud Textured Quad */ + +/* --- Hardware MMIO Addresses --- */ + +#define HW_GP0_ADDR 0x1F801810 /* GPU Data Port */ +#define HW_GP1_ADDR 0x1F801814 /* GPU Status/Control Port */ + diff --git a/code/duffle/gte.h b/code/duffle/gte.h index e06d3bb..85bb74c 100644 --- a/code/duffle/gte.h +++ b/code/duffle/gte.h @@ -2,55 +2,92 @@ # pragma once # include "dsl.h" # include "math.h" +# include "mips.h" #endif /* C2 data registers */ -#define C2_VXY0 0 -#define C2_VZ0 1 -#define C2_VXY1 2 -#define C2_VZ1 3 -#define C2_VXY2 4 -#define C2_VZ2 5 -#define C2_RGB 6 -#define C2_OTZ 7 -#define C2_IR0 8 -#define C2_IR1 9 -#define C2_IR2 10 -#define C2_IR3 11 -#define C2_SXY0 12 -#define C2_SXY1 13 -#define C2_SXY2 14 -#define C2_SXYP 15 -#define C2_SZ0 16 -#define C2_SZ1 17 -#define C2_SZ2 18 -#define C2_SZ3 19 -#define C2_MAC0 24 -#define C2_MAC1 25 -#define C2_MAC2 26 -#define C2_FLAG 31 +/* --- GTE Data Registers (Coprocessor 2) --- */ +typedef enum { + C2_VXY0 = 0, C2_VZ0 = 1, C2_VXY1 = 2, C2_VZ1 = 3, + C2_VXY2 = 4, C2_VZ2 = 5, C2_RGB = 6, C2_OTZ = 7, + C2_IR0 = 8, C2_IR1 = 9, C2_IR2 = 10, C2_IR3 = 11, + C2_SXY0 = 12, C2_SXY1 = 13, C2_SXY2 = 14, C2_SXYP = 15, + C2_SZ0 = 16, C2_SZ1 = 17, C2_SZ2 = 18, C2_SZ3 = 19, + C2_RGB0 = 20, C2_RGB1 = 21, C2_RGB2 = 22, C2_RES1 = 23, + C2_MAC0 = 24, C2_MAC1 = 25, C2_MAC2 = 26, C2_MAC3 = 27, + C2_IRGB = 28, C2_ORGB = 29, C2_LZCS = 30, C2_LZCR = 31 +}; -/* C2 control registers */ +/* Semantic Aliases for GTE Data Registers */ -#define C2_RT11RT12 0 -#define C2_RT13RT21 1 -#define C2_RT22RT23 2 -#define C2_RT31RT32 3 -#define C2_RT33_TRX 4 -#define C2_TRY_TRZ 5 -#define C2_OFX_OFY 18 /* (low = OFX, high = OFY) */ -#define C2_H_SF 20 +#define GTE_IN_VEC0_XY C2_VXY0 /* Input Vector 0 (X, Y) */ +#define GTE_IN_VEC0_Z C2_VZ0 /* Input Vector 0 (Z) */ +#define GTE_IN_VEC1_XY C2_VXY1 /* Input Vector 1 (X, Y) */ +#define GTE_IN_VEC1_Z C2_VZ1 /* Input Vector 1 (Z) */ +#define GTE_IN_VEC2_XY C2_VXY2 /* Input Vector 2 (X, Y) */ +#define GTE_IN_VEC2_Z C2_VZ2 /* Input Vector 2 (Z) */ +#define GTE_IN_COLOR C2_RGB /* Input Color (R, G, B, Code) */ +#define GTE_OUT_SCR_XY0 C2_SXY0 /* Output Screen Coord 0 (X, Y) */ +#define GTE_OUT_SCR_XY1 C2_SXY1 /* Output Screen Coord 1 (X, Y) */ +#define GTE_OUT_SCR_XY2 C2_SXY2 /* Output Screen Coord 2 (X, Y) */ +#define GTE_OUT_DEPTH C2_OTZ /* Output Ordering Table Z (Depth) */ +#define GTE_MATH_ACCUM0 C2_MAC0 /* Math Accumulator 0 */ +#define GTE_MATH_ACCUM1 C2_MAC1 /* Math Accumulator 1 */ +#define GTE_MATH_ACCUM2 C2_MAC2 /* Math Accumulator 2 */ -/* Command codes (low 6 bits) */ +/* --- GTE Command Semantics (The Bitfield Meanings) --- + * A GTE command is a single 32-bit word sent to COP2. + * It is highly configurable via bitfields. + */ -#define CMD_RTPS 0x01 -#define CMD_RTPT 0x02 -#define CMD_NCLIP 0x06 -#define CMD_OP 0x0C -#define CMD_DPCS 0x10 -#define CMD_INTPL 0x11 -#define CMD_MVMVA 0x09 -#define CMD_AVSZ3 0x2D -#define CMD_AVSZ4 0x2E +/* Shift Fraction (Bit 19) - Determines fixed-point division */ +#define GTE_SF_FRACTIONAL 0 /* Divide result by 4096 (Standard 4.12 fixed point) */ +#define GTE_SF_INTEGER 1 /* No division (Raw integer math) */ +/* Matrix Select (Bits 18-17) - Which 3x3 matrix to multiply by */ +#define GTE_MX_ROTATION 0 /* Rotation Matrix (RT) */ +#define GTE_MX_LIGHT 1 /* Light Matrix (LL) */ +#define GTE_MX_COLOR 2 /* Color Matrix (LC) */ +#define GTE_MX_NONE 3 /* Reserved / Do not multiply */ + +/* Vector Select (Bits 16-15) - Which input vector to use */ +#define GTE_V_VEC0 0 /* Use Vector 0 (VXY0, VZ0) */ +#define GTE_V_VEC1 1 /* Use Vector 1 (VXY1, VZ1) */ +#define GTE_V_VEC2 2 /* Use Vector 2 (VXY2, VZ2) */ +#define GTE_V_IR_REGS 3 /* Use Intermediate Registers (IR1, IR2, IR3) */ + +/* Control Vector Select (Bits 14-13) - Which vector to ADD after multiplication */ +#define GTE_CV_TRANSLATE 0 /* Add Translation Vector (TRX, TRY, TRZ) */ +#define GTE_CV_BG_COLOR 1 /* Add Background Color (RBK, GBK, BBK) */ +#define GTE_CV_FAR_COLOR 2 /* Add Far Color (RFC, GFC, BFC) */ +#define GTE_CV_NONE 3 /* Add Zero (No addition) */ + +/* Limit/Clamp (Bit 10) - Prevents overflow artifacts */ +#define GTE_LM_NORMAL 0 /* Normal math (can overflow) */ +#define GTE_LM_CLAMP 1 /* Clamp results to valid hardware ranges (e.g., RGB 0-255) */ + +/* Core Command IDs (Bits 5-0) */ +#define GTE_CMD_RTPS 0x01 /* Rot/Trans Perspective Single (1 vertex) */ +#define GTE_CMD_RTPT 0x02 /* Rot/Trans Perspective Triple (3 vertices) */ +#define GTE_CMD_NCLIP 0x06 /* Normal Clipping (Backface culling) */ +#define GTE_CMD_OP 0x0C /* Outer Product */ +#define GTE_CMD_MVMVA 0x12 /* Matrix Vector Multiply & Add (Custom math) */ + + +/* COP2 (GTE) Transfer Format + * Opcode is always MIPS_OP_COP2. The 'sub' field determines direction (MT/MF). */ +#define ENC_COP2_TX(sub, rt, rd) \ + ((MIPS_OP_COP2 << MIPS_OPCODE_SHIFT) | \ + (((sub) & MIPS_REG_MASK) << MIPS_RS_SHIFT) | \ + (((rt) & MIPS_REG_MASK) << MIPS_RT_SHIFT) | \ + (((rd) & MIPS_REG_MASK) << MIPS_RD_SHIFT)) + +/* GTE Command Format (The math engine trigger) + * Opcode is always MIPS_OP_COP2, RS is always 1 (CO). + * The lower 25 bits are the GTE-specific command payload. */ +#define GTE_CMD_BASE ((MIPS_OP_COP2 << MIPS_OPCODE_SHIFT) | (1 << 25)) +#define ENC_GTE_CMD(sf, mx, v, cv, lm, cmd) \ + (GTE_CMD_BASE | \ + (((sf) & 1) << 19) | (((mx) & 3) << 17) | (((v) & 3) << 15) | \ + (((cv) & 3) << 13) | (((lm) & 1) << 10) | ((cmd) & 0x3F)) diff --git a/code/duffle/math.h b/code/duffle/math.h index c8092c6..7e8c5f0 100644 --- a/code/duffle/math.h +++ b/code/duffle/math.h @@ -7,34 +7,34 @@ #define max(A, B) (((A) > (B)) ? (A) : (B)) #define clamp_bot(X, B) max(X, B) -typedef def_farray(U4, 2); -typedef def_farray(S2, 2); -typedef def_farray(S2, 3); -typedef def_farray(S4, 2); -typedef def_farray(S4, 3); -typedef def_farray(S4, 4); -typedef S2 A3A3_S2[3][3]; +typedef Array_(U4, 2); +typedef Array_(S2, 2); +typedef Array_(S2, 3); +typedef Array_(S4, 2); +typedef Array_(S4, 3); +typedef Array_(S4, 4); +typedef S2 A3x3_S2[3][3]; -typedef def_struct(Extent2_S2) { S2 width; S2 height; }; -typedef def_struct(Extent2_S4) { S4 width; S4 height; }; +typedef Struct_(Extent2_S2) { S2 width; S2 height; }; +typedef Struct_(Extent2_S4) { S4 width; S4 height; }; -typedef def_struct(V2_S2) { S2 x; S2 y; }; -typedef def_struct(V2_S4) { S4 x; S4 y; }; -typedef def_struct(V3_S2) { S2 x; S2 y; S2 z; S2 pad; }; -typedef def_struct(V3_S4) { S4 x; S4 y; S4 z; S4 pad; }; -typedef def_struct(V4_S2) { S2 x; S2 y; S2 z; S2 w; }; -typedef def_struct(V4_S4) { S4 x; S4 y; S4 z; S4 w; }; +typedef Struct_(V2_S2) { S2 x; S2 y; }; +typedef Struct_(V2_S4) { S4 x; S4 y; }; +typedef Struct_(V3_S2) { S2 x; S2 y; S2 z; S2 pad; }; +typedef Struct_(V3_S4) { S4 x; S4 y; S4 z; S4 pad; }; +typedef Struct_(V4_S2) { S2 x; S2 y; S2 z; S2 w; }; +typedef Struct_(V4_S4) { S4 x; S4 y; S4 z; S4 w; }; -typedef def_struct(R2_S2) { V2_S2 p0; V2_S2 p1; }; -typedef def_struct(R2_S4) { V2_S4 p0; V2_S4 p1; }; +typedef Struct_(R2_S2) { V2_S2 p0; V2_S2 p1; }; +typedef Struct_(R2_S4) { V2_S4 p0; V2_S4 p1; }; -typedef def_struct(Rect_S2) { S2 x; S2 y; S2 width; S2 height; }; -typedef def_struct(Rect_S4) { S4 x; S4 y; S4 width; S4 height; }; +typedef Struct_(Rect_S2) { S2 x; S2 y; S2 width; S2 height; }; +typedef Struct_(Rect_S4) { S4 x; S4 y; S4 width; S4 height; }; -typedef def_struct(M3_S2) { A3A3_S2 m; A3_S4 t; }; +typedef Struct_(M3_S2) { A3x3_S2 m; A3_S4 t; }; -typedef def_farray(V2_S2, 3); -typedef def_farray(V2_S2, 4); +typedef Array_(V2_S2, 3); +typedef Array_(V2_S2, 4); #define v2s2(x,y) (V2_S2){x,y} #define v3s2(x,y,z) (V3_S2){x,y,z,0} diff --git a/code/duffle/memory.h b/code/duffle/memory.h index e01d8c9..22d91cf 100644 --- a/code/duffle/memory.h +++ b/code/duffle/memory.h @@ -3,6 +3,13 @@ # include "dsl.h" #endif +#define MEM_ALIGNMENT_DEFAULT (2 * S_(void*)) + +#define assert_bounds(point, start, end) for(;0;){ \ + assert((start) <= (point)); \ + assert((point) <= (end)); \ +} while(0) + inline U4 align_pow2(U4 x, U4 b) { assert(b != 0); assert((b & (b - 1)) == 0); // Check power of 2 @@ -11,17 +18,17 @@ inline U4 align_pow2(U4 x, U4 b) { #define align_struct(type_width) ((U4)(((type_width) + 3) & ~3)) -#define assert_bounds(point, start, end) do { \ - U4 pos_point = cast(U4, point); \ - U4 pos_start = cast(U4, start); \ - U4 pos_end = cast(U4, end); \ - assert(pos_start <= pos_point); \ - assert(pos_point <= pos_end); \ -} while(0) +FI_ void mem_bump(U4 start, U4 cap, U4*R_ used, U4 amount) { + assert(amount <= (cap - used[0])); + used[0] += amount; +} -void* memory_copy (void* restrict dest, void const* restrict src, U4 length) __asm__("memcpy"); -void* memory_copy_overlapping(void* restrict dest, void const* restrict src, U4 length); -B4 memory_zero (void* dest, U4 length); +FI_ U4 mem_copy (U4 dest, U4 src, U4 len) { return (U4)(__builtin_memcpy ((void*)dest, (void const*)src, len)); } +FI_ U4 mem_copy_overlapping(U4 dest, U4 src, U4 len) { return (U4)(__builtin_memmove((void*)dest, (void const*)src, len)); } +FI_ U4 mem_fill (U4 dest, U4 value, U4 len) { return (U4)(__builtin_memset ((void*)dest, (int) value, len)); } +FI_ B4 mem_zero (U4 dest, U4 len) { if(dest == 0){return false;} mem_fill(dest, 0, len); return true; } + +#pragma region DAG #define check_nil(nil, p) ((p) == 0 || (p) == nil) #define set_nil(nil, p) ((p) = nil) @@ -42,101 +49,70 @@ B4 memory_zero (void* dest, U4 length); ) #define sll_queue_push_n(f, l, n, next) sll_queue_push_nz(0, f, l, n, next) -#pragma region Allocator Interface -#if 0 -typedef def_enum(U4, AllocatorOp) { - AllocatorOp_Alloc_NoZero = 0, // If Alloc exist, so must No_Zero - AllocatorOp_Alloc, - AllocatorOp_Free, - AllocatorOp_Reset, - AllocatorOp_Grow_NoZero, - AllocatorOp_Grow, - AllocatorOp_Shrink, - AllocatorOp_Rewind, - AllocatorOp_SavePoint, - AllocatorOp_Query, // Must always be implemented -}; -typedef def_enum(U4, AllocatorQueryFlags) { - AllocatorQuery_Alloc = (1 << 0), - AllocatorQuery_Free = (1 << 1), - // Wipe the allocator's state - AllocatorQuery_Reset = (1 << 2), - // Supports both grow and shrink - AllocatorQuery_Shrink = (1 << 4), - AllocatorQuery_Grow = (1 << 5), - AllocatorQuery_Resize = AllocatorQuery_Grow | AllocatorQuery_Shrink, - // Ability to rewind to a save point (ex: arenas, stack), must also be able to save such a point - AllocatorQuery_Rewind = (1 << 6), -}; -typedef struct AllocatorProc_In AllocatorProc_In; -typedef struct AllocatorProc_Out AllocatorProc_Out; -typedef void def_proc(AllocatorProc) (AllocatorProc_In In, AllocatorProc_Out* Out); -typedef def_struct(AllocatorSP) { - AllocatorProc* type_sig; - U4 slot; -}; -struct AllocatorProc_In { - void* data; - U4 requested_size; - U4 alignment; - union { - Slice_B1 old_allocation; - AllocatorSP save_point; - }; - AllocatorOp op; - byte_pad(4); -}; -struct AllocatorProc_Out { - union { - Slice_B1 allocation; - AllocatorSP save_point; - }; - AllocatorQueryFlags features; - U4 left; // Contiguous memory left - U4 max_alloc; - U4 min_alloc; - // byte_pad(8); -}; -typedef def_struct(AllocatorInfo) { - AllocatorProc* proc; - void* data; -}; -static_assert(size_of(AllocatorSP) <= size_of(Slice_B1)); -typedef def_struct(AllocatorQueryInfo) { - AllocatorSP save_point; - AllocatorQueryFlags features; - U4 left; // Contiguous memory left - U4 max_alloc; - U4 min_alloc; - // byte_pad(4); -}; -static_assert(size_of(AllocatorProc_Out) == size_of(AllocatorQueryInfo)); +#pragma endregion DAG -#define MEMORY_ALIGNMENT_DEFAULT (2 * size_of(void*)) +#pragma region Slice -AllocatorQueryInfo allocator_query(AllocatorInfo ainfo); +typedef unsigned char UTF8; +typedef Struct_(Str8) { UTF8* ptr; U4 len; }; +typedef Struct_(Slice_Str8) { Str8* ptr; U4 len; }; +#define txt(string_literal) (Str8){ (UTF8*) string_literal, S_(string_literal) - 1 } -void mem_free (AllocatorInfo ainfo, Slice_B1 mem); -void mem_reset (AllocatorInfo ainfo); -void mem_rewind (AllocatorInfo ainfo, AllocatorSP save_point); -AllocatorSP mem_save_point(AllocatorInfo ainfo); +typedef Struct_(Slice) { U4 ptr, len; }; // Untyped Slice +FI_ Slice slice_ut_(U4 ptr, U4 len) { return (Slice){ptr, len}; } -typedef def_struct(Opts_mem_alloc) { U4 alignment; B4 no_zero; byte_pad(4); }; -typedef def_struct(Opts_mem_grow) { U4 alignment; B4 no_zero; byte_pad(4); }; -typedef def_struct(Opts_mem_shrink) { U4 alignment; }; -typedef def_struct(Opts_mem_resize) { U4 alignment; B4 no_zero; byte_pad(4); }; +#define Slice_(type) Struct_(tmpl(Slice,type)) { type* ptr; U4 len; } +typedef Slice_(B1); +#define slice_assert(s) do { assert((s).ptr != 0); assert((s).len > 0); } while(0) +#define slice_end(slice) ((slice).ptr + (slice).len) +#define S_slice(s) ((s).len * S_((s).ptr[0])) -Slice_B1 mem__alloc (AllocatorInfo ainfo, U4 size, Opts_mem_alloc* opts); -Slice_B1 mem__grow (AllocatorInfo ainfo, Slice_B1 mem, U4 size, Opts_mem_grow* opts); -Slice_B1 mem__resize(AllocatorInfo ainfo, Slice_B1 mem, U4 size, Opts_mem_resize* opts); -Slice_B1 mem__shrink(AllocatorInfo ainfo, Slice_B1 mem, U4 size, Opts_mem_shrink* opts); +#define slice_ut(ptr,len) slice_ut_(u4_(ptr), u4_(len)) +#define slice_ut_arr(a) slice_ut_(u4_(a), S_(a)) +#define slice_to_ut(s) slice_ut_(u4_((s).ptr), S_slice(s)) -#define mem_alloc(ainfo, size, ...) mem__alloc (ainfo, size, opt_args(Opts_mem_alloc, __VA_ARGS__)) -#define mem_grow(ainfo, mem, size, ...) mem__grow (ainfo, mem, size, opt_args(Opts_mem_grow, __VA_ARGS__)) -#define mem_resize(ainfo, mem, size, ...) mem__resize(ainfo, mem, size, opt_args(Opts_mem_resize, __VA_ARGS__)) -#define mem_shrink(ainfo, mem, size, ...) mem__shrink(ainfo, mem, size, opt_args(Opts_mem_shrink, __VA_ARGS__)) +#define slice_iter(container, iter) (T_((container).ptr) iter = (container).ptr; iter != slice_end(container); ++ iter) +#define slice_arg_from_array(type, ...) & (tmpl(Slice,type)) { .ptr = array_decl(type,__VA_ARGS__), .len = array_len( array_decl(type,__VA_ARGS__)) } -#define alloc_type(ainfo, type, ...) (type*) mem__alloc(ainfo, size_of(type), opt_args(Opts_mem_alloc, __VA_ARGS__)).ptr -#define alloc_slice(ainfo, type, num, ...) (tmpl(Slice,type)){ mem__alloc(ainfo, size_of(type) * num, opt_args(Opts_mem_alloc, __VA_ARGS__)).ptr, num } -#endif -#pragma endregion Allocator Interface +FI_ void slice_zero_(Slice s) { slice_assert(s); mem_zero(s.ptr, s.len); } +#define slice_zero(s) slice_zero_(slice_to_ut(s)) + +FI_ void slice_copy_(Slice dest, Slice src) { + assert(dest.len >= src.len); + slice_assert(dest); + slice_assert(src); + mem_copy(dest.ptr, src.ptr, src.len); +} +#define slice_copy(dest, src) do { \ + static_assert(T_same(dest, src)); \ + slice_copy_(slice_to_ut(dest), slice_to_ut(src)); \ +} while(0) + +#pragma endregion Slice + +#pragma region FArena +typedef Opt_(farena) { U4 alignment, type_width; }; +typedef Struct_(FArena) { U4 start, capacity, used; }; +FI_ void farena_init(FArena_R arena, Slice mem) { assert(arena != nullptr); + arena->start = mem.ptr; + arena->capacity = mem.len; + arena->used = 0; +} +FI_ FArena farena_make(Slice mem) { FArena a; farena_init(& a, mem); return a; } +I_ Slice farena_push(FArena_R arena, U4 amount, Opt_farena o) { + if (amount == 0) { return (Slice){}; } + U4 desired = amount * (o.type_width == 0 ? 1 : o.type_width); + U4 to_commit = align_pow2(desired, o.alignment ? o.alignment : MEM_ALIGNMENT_DEFAULT); + mem_bump(arena->start, arena->capacity, & arena->used, to_commit); + return (Slice){ arena->start + arena->used, to_commit }; +} +FI_ void farena_reset(FArena_R arena) { arena->used = 0; } +FI_ void farena_rewind(FArena_R arena, U4 save_point) { + U4 end = arena->start + arena->used; assert_bounds(save_point, arena->start, end); + arena->used -= save_point - arena->start; +} +FI_ U4 farena_save(FArena arena) { return arena.used; } +#define farena_push_(arena, amount, ...) farena_push((arena), (amount), opt_(farena, __VA_ARGS__)) +#define farena_push_type(arena, type, ...) C_(type*, farena_push((arena), 1, opt_(farena, .type_width=S_(type), __VA_ARGS__)).ptr) +#define farena_push_array(arena, type, amount, ...) (tmpl(Slice,type)){ C_(type*, farena_push((arena), (amount), opt_(farena, .type_width=S_(type), __VA_ARGS__)).ptr), (amount) } +#pragma endregion FArena diff --git a/code/duffle/mips.h b/code/duffle/mips.h new file mode 100644 index 0000000..cfb2802 --- /dev/null +++ b/code/duffle/mips.h @@ -0,0 +1,203 @@ +#ifdef INTELLISENSE_DIRECTIVES +# pragma once +# include "dsl.h" +#endif + +/* --- MIPS CPU Registers --- */ +typedef enum { + R_ZERO = 0, R_AT = 1, R_V0 = 2, R_V1 = 3, + R_A0 = 4, R_A1 = 5, R_A2 = 6, R_A3 = 7, + R_T0 = 8, R_T1 = 9, R_T2 = 10, R_T3 = 11, + R_T4 = 12, R_T5 = 13, R_T6 = 14, R_T7 = 15, + R_S0 = 16, R_S1 = 17, R_S2 = 18, R_S3 = 19, + R_S4 = 20, R_S5 = 21, R_S6 = 22, R_S7 = 23, + R_T8 = 24, R_T9 = 25, R_K0 = 26, R_K1 = 27, + R_GP = 28, R_SP = 29, R_FP = 30, R_RA = 31 +}; + +/* Semantic Aliases for MIPS Registers (O32 ABI) */ + +#define REG_DISCARD R_ZERO /* Hardwired to 0 */ +#define REG_RETURN_VAL R_V0 /* Function return value */ +#define REG_RETURN_VAL2 R_V1 /* Second return value (e.g., 64-bit) */ +#define REG_ARG_0 R_A0 /* First function argument */ +#define REG_ARG_1 R_A1 /* Second function argument */ +#define REG_ARG_2 R_A2 /* Third function argument */ +#define REG_ARG_3 R_A3 /* Fourth function argument */ +#define REG_TEMP_0 R_T0 /* Temporary (Caller saved) */ +#define REG_TEMP_1 R_T1 /* Temporary (Caller saved) */ +#define REG_TEMP_2 R_T2 /* Temporary (Caller saved) */ +#define REG_SAVED_0 R_S0 /* Saved register (Callee saved) */ +#define REG_STACK_PTR R_SP /* Stack Pointer */ +#define REG_RETURN_ADDR R_RA /* Return Address (populated by JAL) */ + +/* --- MIPS CPU Opcodes (Bits 31-26) --- */ + +#define MIPS_OP_SPECIAL 0x00 /* R-Type instructions (uses FUNCT field) */ +#define MIPS_OP_BCOND 0x01 /* Branch on condition */ +#define MIPS_OP_J 0x02 /* Jump */ +#define MIPS_OP_JAL 0x03 /* Jump and Link */ +#define MIPS_OP_BEQ 0x04 /* Branch on Equal */ +#define MIPS_OP_BNE 0x05 /* Branch on Not Equal */ +#define MIPS_OP_BLEZ 0x06 /* Branch on Less Than or Equal to Zero */ +#define MIPS_OP_BGTZ 0x07 /* Branch on Greater Than Zero */ +#define MIPS_OP_ADDI 0x08 /* Add Immediate */ +#define MIPS_OP_ADDIU 0x09 /* Add Immediate Unsigned */ +#define MIPS_OP_SLTI 0x0A /* Set on Less Than Immediate */ +#define MIPS_OP_SLTIU 0x0B /* Set on Less Than Immediate Unsigned */ +#define MIPS_OP_ANDI 0x0C /* AND Immediate */ +#define MIPS_OP_ORI 0x0D /* OR Immediate */ +#define MIPS_OP_XORI 0x0E /* XOR Immediate */ +#define MIPS_OP_LUI 0x0F /* Load Upper Immediate */ +#define MIPS_OP_COP0 0x10 /* Coprocessor 0 (System) */ +#define MIPS_OP_COP2 0x12 /* Coprocessor 2 (GTE) */ +#define MIPS_OP_LB 0x20 /* Load Byte */ +#define MIPS_OP_LH 0x21 /* Load Halfword */ +#define MIPS_OP_LW 0x23 /* Load Word */ +#define MIPS_OP_LBU 0x24 /* Load Byte Unsigned */ +#define MIPS_OP_LHU 0x25 /* Load Halfword Unsigned */ +#define MIPS_OP_SB 0x28 /* Store Byte */ +#define MIPS_OP_SH 0x29 /* Store Halfword */ +#define MIPS_OP_SW 0x2B /* Store Word */ + +/* --- MIPS CPU Function Codes (Bits 5-0, used when OP == MIPS_OP_SPECIAL) --- */ + +#define MIPS_FC_SLL 0x00 /* Shift Word Left Logical */ +#define MIPS_FC_SRL 0x02 /* Shift Word Right Logical */ +#define MIPS_FC_SRA 0x03 /* Shift Word Right Arithmetic */ +#define MIPS_FC_SLLV 0x04 /* Shift Word Left Logical Variable */ +#define MIPS_FC_SRLV 0x06 /* Shift Word Right Logical Variable */ +#define MIPS_FC_SRAV 0x07 /* Shift Word Right Arithmetic Variable */ +#define MIPS_FC_JR 0x08 /* Jump Register */ +#define MIPS_FC_JALR 0x09 /* Jump and Link Register */ +#define MIPS_FC_SYSCALL 0x0C /* System Call */ +#define MIPS_FC_BREAK 0x0D /* Breakpoint */ +#define MIPS_FC_MFHI 0x10 /* Move From HI */ +#define MIPS_FC_MTHI 0x11 /* Move To HI */ +#define MIPS_FC_MFLO 0x12 /* Move From LO */ +#define MIPS_FC_MTLO 0x13 /* Move To LO */ +#define MIPS_FC_MULT 0x18 /* Multiply Word */ +#define MIPS_FC_MULTU 0x19 /* Multiply Unsigned Word */ +#define MIPS_FC_DIV 0x1A /* Divide Word */ +#define MIPS_FC_DIVU 0x1B /* Divide Unsigned Word */ +#define MIPS_FC_ADD 0x20 /* Add Word */ +#define MIPS_FC_ADDU 0x21 /* Add Unsigned Word */ +#define MIPS_FC_SUB 0x22 /* Subtract Word */ +#define MIPS_FC_SUBU 0x23 /* Subtract Unsigned Word */ +#define MIPS_FC_AND 0x24 /* AND */ +#define MIPS_FC_OR 0x25 /* OR */ +#define MIPS_FC_XOR 0x26 /* XOR */ +#define MIPS_FC_NOR 0x27 /* NOR */ +#define MIPS_FC_SLT 0x2A /* Set on Less Than */ +#define MIPS_FC_SLTU 0x2B /* Set on Less Than Unsigned */ + +/* --- Coprocessor 0 (System Control & Exceptions) --- */ + +#define MIPS_COP_MF 0x00 /* Move From Coprocessor */ +#define MIPS_COP_MT 0x04 /* Move To Coprocessor */ + + + + +// Bitfield Packets (Encoders) + +/* Bit Offsets for MIPS Instruction Fields */ + +#define MIPS_OPCODE_SHIFT 26 +#define MIPS_RS_SHIFT 21 +#define MIPS_RT_SHIFT 16 +#define MIPS_RD_SHIFT 11 +#define MIPS_SHAMT_SHIFT 6 +#define MIPS_FC_SHIFT 0 + +/* Bit Masks to prevent overflow into adjacent fields */ + +#define MIPS_OPCODE_MASK 0x3F +#define MIPS_REG_MASK 0x1F +#define MIPS_SHAMT_MASK 0x1F +#define MIPS_FC_MASK 0x3F +#define MIPS_IMM_MASK 0xFFFF + +/* MIPS R-Type Instruction Format (Register-to-Register) */ +#define ENC_R(op, rs, rt, rd, shamt, funct) \ + ((((op) & MIPS_OPCODE_MASK) << MIPS_OPCODE_SHIFT) | \ + (((rs) & MIPS_REG_MASK) << MIPS_RS_SHIFT) | \ + (((rt) & MIPS_REG_MASK) << MIPS_RT_SHIFT) | \ + (((rd) & MIPS_REG_MASK) << MIPS_RD_SHIFT) | \ + (((shamt) & MIPS_SHAMT_MASK) << MIPS_SHAMT_SHIFT) | \ + (((funct) & MIPS_FC_MASK) << MIPS_FC_SHIFT)) + +/* MIPS I-Type Instruction Format (Immediate/Constant) */ +#define ENC_I(op, rs, rt, imm) \ + ((((op) & MIPS_OPCODE_MASK) << MIPS_OPCODE_SHIFT) | \ + (((rs) & MIPS_REG_MASK) << MIPS_RS_SHIFT) | \ + (((rt) & MIPS_REG_MASK) << MIPS_RT_SHIFT) | \ + (((imm) & MIPS_IMM_MASK))) + +/* COP0 (System) Transfer Format */ +#define ENC_COP0_TX(sub, rt, rd) \ + ((MIPS_OP_COP0 << MIPS_OPCODE_SHIFT) | \ + (((sub) & MIPS_REG_MASK) << MIPS_RS_SHIFT) | \ + (((rt) & MIPS_REG_MASK) << MIPS_RT_SHIFT) | \ + (((rd) & MIPS_REG_MASK) << MIPS_RD_SHIFT)) + +/* COP0 Return From Exception (rfe) */ +#define ENC_RFE() 0x42000010 + + +// Binary Metaprogramming + +typedef U4 const Code; +#define def_code_blob(sym) sym ## _ ## blob [] align_(4) = + +// #define def_code_blob(func_name, func_signature, ...) \ +// internal U4 const \ +// tmpl(func_name,blob) [] align(4) \ +// = { \ +// __VA_ARGS__ \ +// }; \ +// internal func_signature func_name = (func_signature)func_name##_blob; + +internal +Code def_code_blob(mips_flush_icache) { + /* addiu , , -8 */ + ENC_I(MIPS_OP_ADDIU, REG_STACK_PTR, REG_STACK_PTR, -8), + /* sw , 4() */ + ENC_I(MIPS_OP_SW, REG_STACK_PTR, REG_RETURN_ADDR, 4), + /* addiu , , 0x44 (BIOS Call 0x44: FlushCache) */ + ENC_I(MIPS_OP_ADDIU, REG_DISCARD, REG_RETURN_VAL, 0x44), + /* addiu , , 0xA0 (BIOS A0 Table Address) */ + ENC_I(MIPS_OP_ADDIU, REG_DISCARD, REG_TEMP_1, 0xA0), + /* jalr , (Jump to BIOS) */ + ENC_R(MIPS_OP_SPECIAL, REG_TEMP_1, R_ZERO, REG_RETURN_ADDR, 0, MIPS_FC_JALR), + /* nop (Branch delay slot) */ + ENC_R(MIPS_OP_SPECIAL, R_ZERO, R_ZERO, R_ZERO, 0, MIPS_FC_SLL), + /* lw , 4() */ + ENC_I(MIPS_OP_LW, REG_STACK_PTR, REG_RETURN_ADDR, 4), + /* jr (Return to C code) */ + ENC_R(MIPS_OP_SPECIAL, REG_RETURN_ADDR, R_ZERO, R_ZERO, 0, MIPS_FC_JR), + /* addiu , , 8 (Branch delay slot: restore stack pointer) */ + ENC_I(MIPS_OP_ADDIU, REG_STACK_PTR, REG_STACK_PTR, 8) +}; +FI_ void mips_flush_icache(void) { C_(VoidFn*, mips_flush_icache_blob)(); } + +/* Flushes the Instruction Cache so the CPU sees our newly written tape */ +// FI_ void mips_flush_icache(void) { +// /* Uses standard PS1 BIOS A0 table call 0x44 */ +// __asm__ volatile ( +// "li $v0, 0x44\n\t" +// "li $t1, 0xA0\n\t" +// "jalr $t1\n\t" +// "nop" +// : : : "v0", "t1", "ra", "memory" +// ); +// } + + + + + +// TAPE & EMITTERS + + + diff --git a/code/gte_hello/hello_gte.c b/code/gte_hello/hello_gte.c index dd6fee7..470d2ef 100644 --- a/code/gte_hello/hello_gte.c +++ b/code/gte_hello/hello_gte.c @@ -17,21 +17,21 @@ enum { }; typedef U4 OrderingTable_Buffer[OrderingTbl_Len]; -typedef def_farray(OrderingTable_Buffer, 2); +typedef Array_(OrderingTable_Buffer, 2); typedef B1 PrimitiveBuffer[PrimitiveBuff_Len]; -typedef def_farray(PrimitiveBuffer, 2); -typedef def_struct(PrimitiveArena) { +typedef Array_(PrimitiveBuffer, 2); +typedef Struct_(PrimitiveArena) { A2_PrimitiveBuffer buf; U4 used; }; #define Cube_num_verts 8 -typedef def_farray(V3_S2, Cube_num_verts); +typedef Array_(V3_S2, Cube_num_verts); #define Cube_num_faces 6 -typedef def_farray(V4_S2, Cube_num_faces); -void ent_cube128_init(A8_V3_S2* verts, A6_V4_S2* faces) { - memory_copy(verts, & (A8_V3_S2) { +typedef Array_(V4_S2, Cube_num_faces); +I_ void ent_cube128_init(A8_V3_S2* verts, A6_V4_S2* faces) { + LP_ A8_V3_S2 baked_verts = (A8_V3_S2) { { -128, -128, -128 }, { 128, -128, -128 }, { 128, -128, 128 }, @@ -40,18 +40,20 @@ void ent_cube128_init(A8_V3_S2* verts, A6_V4_S2* faces) { { 128, 128, -128 }, { 128, 128, 128 }, { -128, 128, 128 } - }, size_of(A8_V3_S2) ); - memory_copy(faces, & (A6_V4_S2) { + }; + LP_ A6_V4_S2 baked_faces = (A6_V4_S2) { { 3, 2, 0, 1 }, { 0, 1, 4, 5 }, { 4, 5, 7, 6 }, { 1, 2, 5, 6 }, { 2, 3, 6, 7 }, { 3, 0, 7, 4 }, - }, size_of(A6_V4_S2) ); + }; + mem_copy(u4_(verts), u4_(& baked_verts), S_(A8_V3_S2) ); + mem_copy(u4_(faces), u4_(& baked_faces), S_(A6_V4_S2) ); return; } -typedef def_struct(Ent_Cube) { +typedef Struct_(Ent_Cube) { V3_S4 accel; V3_S4 vel; V3_S4 pos; @@ -62,22 +64,24 @@ typedef def_struct(Ent_Cube) { }; #define Floor_num_verts 4 -typedef def_farray(V3_S2, Floor_num_verts); +typedef Array_(V3_S2, Floor_num_verts); #define Floor_num_faces 2 -typedef def_farray(V3_S2, Floor_num_faces); -void ent_floor_init(A4_V3_S2* verts, A2_V3_S2* faces) { - memory_copy(verts, &(A4_V3_S2) { +typedef Array_(V3_S2, Floor_num_faces); +I_ void ent_floor_init(A4_V3_S2* verts, A2_V3_S2* faces) { + LP_ A4_V3_S2 baked_verts = (A4_V3_S2) { { -900, 0, -900 }, { -900, 0, 900 }, { 900, 0, -900 }, { 900, 0, 900 }, - }, size_of(A8_V3_S2)); - memory_copy(faces, & (A2_V3_S2) { + }; + LP_ A2_V3_S2 baked_faces = (A2_V3_S2) { { 0, 1, 2 }, { 1, 3, 2 }, - }, size_of(A2_V3_S2)); + }; + mem_copy(u4_(verts), u4_(& baked_verts), S_(A4_V3_S2)); + mem_copy(u4_(faces), u4_(& baked_faces), S_(A2_V3_S2)); }; -typedef def_struct(Ent_Floor) { +typedef Struct_(Ent_Floor) { V3_S4 accel; V3_S4 pos; V3_S4 scale; @@ -86,7 +90,7 @@ typedef def_struct(Ent_Floor) { A2_V3_S2 faces; }; -typedef def_struct(SMemory) { +typedef Struct_(SMemory) { DoubleBuffer screen_buf; A2_OrderingTable_Buffer ordering_tbl; PrimitiveArena primitives; @@ -100,7 +104,7 @@ typedef def_struct(SMemory) { global SMemory static_mem; extern SMemory static_mem; -B1* prim__alloc(U4 type_width, Str8 type_name) { +I_ B1* prim__alloc(U4 type_width, Str8 type_name) { gknown PrimitiveArena* pa = & static_mem.primitives; gknown B1* buf = (B1*) r_(static_mem.primitives.buf)[static_mem.active_buf_id]; assert(pa->used + type_width < PrimitiveBuff_Len); @@ -108,7 +112,7 @@ B1* prim__alloc(U4 type_width, Str8 type_name) { pa->used += type_width; return next; } -#define prim_alloc(type) (type*)prim__alloc(size_of(type), txt( stringify(type))) +#define prim_alloc(type) (type*)prim__alloc(S_(type), txt( stringify(type))) void gp_screen_init_c11(DoubleBuffer* screen_buf, S2* active_buf_id) { diff --git a/code/gte_hello/hello_gte.h b/code/gte_hello/hello_gte.h index 4ab9b25..1778a39 100644 --- a/code/gte_hello/hello_gte.h +++ b/code/gte_hello/hello_gte.h @@ -5,8 +5,8 @@ # include "duffle/gp.h" #endif -typedef def_struct(DrawEnv_Packed) { U4 tag; U4 code[15]; }; -typedef def_struct(DrawEnv) { +typedef Struct_(DrawEnv_Packed) { U4 tag; U4 code[15]; }; +typedef Struct_(DrawEnv) { Rect_S2 clip_area; A2_S2 drawing_offset; Rect_S2 texture_window; @@ -17,7 +17,7 @@ typedef def_struct(DrawEnv) { RGB8 initial_bg_color; DrawEnv_Packed dr_env; // reserved }; -typedef def_struct(DisplayEnv) { +typedef Struct_(DisplayEnv) { Rect_S2 display_area; Rect_S2 screen; B1 vinterlace; @@ -25,9 +25,9 @@ typedef def_struct(DisplayEnv) { B1 pad0; B1 pad1; }; -typedef def_farray(DrawEnv, 2); -typedef def_farray(DisplayEnv, 2); -typedef def_struct(DoubleBuffer) { +typedef Array_(DrawEnv, 2); +typedef Array_(DisplayEnv, 2); +typedef Struct_(DoubleBuffer) { A2_DrawEnv draw; A2_DisplayEnv display; }; @@ -58,7 +58,7 @@ U4 vsync(U4 mode) __asm__("VSync"); void draw_orderingtbl(U4* buf) __asm__("DrawOTag"); -typedef def_struct(PolyTag) { +typedef Struct_(PolyTag) { U4 addr: 24; U4 len: 8; RGB8 color; @@ -106,7 +106,7 @@ typedef def_struct(PolyTag) { // #define setLineF4(p) set_len(p, 6), set_code(p, 0x4c),(p)->pad = 0x55555555 // #define setLineG4(p) set_len(p, 9), set_code(p, 0x5c),(p)->pad = 0x55555555, (p)->p2 = 0, (p)->p3 = 0 -typedef def_struct(Poly_F3) { +typedef Struct_(Poly_F3) { U4 tag; RGB8 color; B1 code; @@ -120,14 +120,14 @@ typedef def_struct(Poly_F3) { }; }; -typedef def_struct(Poly_G3) { +typedef Struct_(Poly_G3) { U4 tag; RGB8 c0; B1 code; V2_S2 p0; RGB8 c1; B1 pad1; V2_S2 p1; RGB8 c2; B1 pad2; V2_S2 p2; }; -typedef def_struct(Poly_F4) { +typedef Struct_(Poly_F4) { U4 tag; RGB8 color; B1 code; @@ -142,7 +142,7 @@ typedef def_struct(Poly_F4) { }; }; -typedef def_struct(Poly_G4) { +typedef Struct_(Poly_G4) { U4 tag; RGB8 c0; B1 code; V2_S2 p0; RGB8 c1; B1 pad1; V2_S2 p1; RGB8 c2; B1 pad2; @@ -150,7 +150,7 @@ typedef def_struct(Poly_G4) { V2_S2 p3; }; -typedef def_struct(Tile) { +typedef Struct_(Tile) { U4 tag; RGB8 color; B1 code; @@ -169,7 +169,7 @@ M3_S2* m3s2_scale (M3_S2* mat, V3_S4* vec) __asm__("ScaleMatrix"); // Rotation, Translation, Perspective S4 rtp_v3s2_raw(V3_S2* vec, S4* xy, S4* pp, S4* flag) __asm__("RotTransPers"); -FI_ S4 rtp_v3s2(V3_S2* vec, V2_S2* xy, A2_S2* pp, S4* flag) { return rtp_v3s2_raw(vec, cast(S4*R_, & xy->x), cast(S4*R_, pp), r_(flag)); } +FI_ S4 rtp_v3s2(V3_S2* vec, V2_S2* xy, A2_S2* pp, S4* flag) { return rtp_v3s2_raw(vec, C_(S4*R_, & xy->x), C_(S4*R_, pp), r_(flag)); } S4 rtp_avg_nclip_a3_v3s2_raw(V3_S2* v0, V3_S2* v1, V3_S2* v2, S4* xy1, S4* xy2, S4* xy3, S4* pp, S4* otz, S4* flag) __asm__("RotAverageNclip3"); FI_ S4 rtp_avg_nclip_a3_v3s2( @@ -179,8 +179,8 @@ FI_ S4 rtp_avg_nclip_a3_v3s2( ){ return rtp_avg_nclip_a3_v3s2_raw( v0, v1, v2, - cast(S4*R_, xy0), cast(S4*R_, xy1), cast(S4*R_, xy2), - cast(S4*R_, pp), cast(S4*R_, otz), cast(S4*R_, flag) + C_(S4*R_, xy0), C_(S4*R_, xy1), C_(S4*R_, xy2), + C_(S4*R_, pp), C_(S4*R_, otz), C_(S4*R_, flag) ); } @@ -192,8 +192,8 @@ FI_ S4 rtp_avg_nclip_a4_v3s2( ){ return rtp_avg_nclip_a4_v3s2_raw( v0, v1, v2, v3, - cast(S4*R_, xy0), cast(S4*R_, xy1), cast(S4*R_, xy2), cast(S4*R_, xy3), - cast(S4*R_, pp), cast(S4*R_, otz), cast(S4*R_, flag) + C_(S4*R_, xy0), C_(S4*R_, xy1), C_(S4*R_, xy2), C_(S4*R_, xy3), + C_(S4*R_, pp), C_(S4*R_, otz), C_(S4*R_, flag) ); } diff --git a/scripts/build_psyq.ps1 b/scripts/build_psyq.ps1 index 01f0f76..96f57c2 100644 --- a/scripts/build_psyq.ps1 +++ b/scripts/build_psyq.ps1 @@ -24,6 +24,7 @@ $f_define = "-D" $f_include = "-I" $f_output = "-o" $f_std_c11 = "-std=c11" +$f_std_c23 = "-std=c23" # Warning Flags $f_wall = "-Wall" @@ -139,6 +140,7 @@ function compile-unit { param( $f_arch_no_shared, $f_arch_no_stack_prot ) + # $compile_args += $f_std_c23 $compile_args += ($f_include + $path_psyq_imyu_inc) $compile_args += ($f_include + $path_nugget) @@ -350,3 +352,48 @@ function build-gte_hello { make-binary $elf $exe } build-gte_hello + +function Send-ToEmulator { param( + [string]$exePath +) + $uri = "http://localhost:8080/api/v1/load-exec" + + # Absolute path is safest for the emulator web server + $absolutePath = [System.IO.Path]::GetFullPath($exePath) + + # Create JSON payload pointing to your compiled .ps-exe + $body = @{ + filename = $absolutePath + } | ConvertTo-Json + + Write-Host "Pushing hot-reload to PCSX-Redux..." -ForegroundColor Magenta + try { + $response = Invoke-RestMethod -Uri $uri -Method Post -Body $body -ContentType "application/json" + Write-Host "Hot-reload successful!" -ForegroundColor Green + } catch { + Write-Warning "Could not connect to PCSX-Redux web server. Ensure the emulator is running and Web Server is enabled." + } +} + +# # Automatically hot-reloads it into the running emulator +# Send-ToEmulator (join-path $path_build 'hello_gte.ps-exe') + +# --- Hot Reload via PCSX-Redux Web Server --- +$exe_path = join-path $path_build 'hello_gte.ps-exe' +$absolute_path = [System.IO.Path]::GetFullPath($exe_path) + +# PCSX-Redux expects the file location in the URL query string! +# We URL-encode the path to ensure backslashes and spaces don't break the HTTP request. +$encoded_path = [uri]::EscapeDataString($absolute_path) +$uri = "http://localhost:8080/api/v1/load-exec?path=$encoded_path" + +# Write-Host "Pushing hot-reload to PCSX-Redux..." -ForegroundColor Magenta +# try { +# # Send the request with the query string included +# Invoke-RestMethod -Uri $uri -Method Post +# Write-Host "Hot-reload successful!" -ForegroundColor Green +# } catch { +# Write-Host "Failed to hot-reload." -ForegroundColor Red +# # This will print the *actual* HTTP error instead of our generic warning +# Write-Host $_.Exception.Message -ForegroundColor Yellow +# }