diff --git a/code/duffle/gte.h b/code/duffle/gte.h index b0de607..dfdff00 100644 --- a/code/duffle/gte.h +++ b/code/duffle/gte.h @@ -271,8 +271,14 @@ enum { _C2_OPS_ = 0 * - rd: COP2 control register index (0..31) */ #define enc_gte_tx(sub, rt, rd) (enc_op(op_cop2) | enc_rs(sub) | enc_rt(rt) | enc_rd(rd)) -#define gte_mt(rt, rd) enc_gte_tx(cop_mt, (rt), (rd)) /* Move GPR (rt) to GTE Control Register (rd) */ -#define gte_mf(rt, rd) enc_gte_tx(cop_mf, (rt), (rd)) /* Move GTE Control Register (rd) to GPR (rt) */ +// #define gte_mt(rt, rd) enc_gte_tx(cop_mt, (rt), (rd)) /* Move GPR (rt) to GTE Control Register (rd) */ +// #define gte_mf(rt, rd) enc_gte_tx(cop_mf, (rt), (rd)) /* Move GTE Control Register (rd) to GPR (rt) */ + +/* Explicit GTE Data vs Control Register Transfers */ +#define gte_mf(rt, rd) enc_gte_tx(0x00, (rt), (rd)) /* Move from GTE Data Reg (e.g. MAC0, OTZ) */ +#define gte_cf(rt, rd) enc_gte_tx(0x02, (rt), (rd)) /* Move from GTE Control Reg */ +#define gte_mt(rt, rd) enc_gte_tx(0x04, (rt), (rd)) /* Move to GTE Data Reg (e.g. VXY0) */ +#define gte_ct(rt, rd) enc_gte_tx(0x06, (rt), (rd)) /* Move to GTE Control Reg (e.g. Matrices) */ /* COP2 Data Load (lwc2): `lwc2 rt, off(rs)` * Layout: [op_lwc2:6][rs:5][rt:5][imm:16] diff --git a/code/duffle/lottes_tape.h b/code/duffle/lottes_tape.h new file mode 100644 index 0000000..768bdba --- /dev/null +++ b/code/duffle/lottes_tape.h @@ -0,0 +1,148 @@ +#ifdef INTELLISENSE_DIRECTIVES +# pragma once +# include "dsl.h" +# include "gcc_asm.h" +# include "mips.h" +# include "gte.h" +# include "memory.h" +#endif + +/* R_T8 is our dedicated Tape Pointer (TP) */ +#define R_TP R_T8 +#define R_TP_Code R_T8_Code + +/* The 'Yield' sequence for CodeBlobs */ +#define mips_yield \ + load_word(R_T9, R_TP, 0) \ + , add_ui(R_TP, R_TP, 4) \ + , jump_reg(R_T9) \ + , nop + +/* The 'Exit' Atom */ +internal Code CodeBlob_(tape_exit) { jump_reg(rret_addr), nop }; + +typedef Slice_(U4); + +FI_ void tape_run(Slice_U4 tape, B1** r_prim_cursor, void* face_cursor, void* vert_base, void* ot_base) { + register U4* tp rgcc(R_TP) = tape.ptr; + register B1* pcur rgcc(R_T7) = *r_prim_cursor; + register void* r_t4 rgcc(R_T4) = face_cursor; + register void* r_t5 rgcc(R_T5) = vert_base; + register void* r_t6 rgcc(R_T6) = ot_base; + + asm volatile( + "lw $25, 0(%0);" + "addiu %0, %0, 4;" + "jalr $25;" + "nop;" + : "+r"(tp), "+r"(pcur), "+r"(r_t4), "+r"(r_t5), "+r"(r_t6) + : + : "at", "v0", "v1", "t0", "t1", "t2", "t3", "t9", "ra", "memory" + ); + + *r_prim_cursor = pcur; +} + +typedef Struct_(TapeBuilder) {FArena* arena;}; +FI_ TapeBuilder tb_begin(FArena* arena) {return (TapeBuilder){ arena };} + +I_ void tb_emit(TapeBuilder* tb, Code* atom) { + U4* slot = farena_push_type(tb->arena, U4); + slot[0] = (U4)atom; +} + +I_ Slice_U4 tb_end(TapeBuilder* tb) { + tb_emit(tb, code_tape_exit); + return (Slice_U4){ (U4*)tb->arena->start, tb->arena->used / 4 }; +} + +internal Code CodeBlob_(atom_set_gte_world) { + /* Pop matrix address from tape into R_T3 ($11) */ + load_word(R_T3, R_TP, 0), + add_ui(R_TP, R_TP, 4), + + /* Load 3x3 Rotation + 3x1 Translation from R_T3 into GTE CONTROL Regs (ctc2) */ + load_word(R_T0, R_T3, 0), load_word(R_T1, R_T3, 4), + gte_ct(R_T0, gte_cr_RT11), gte_ct(R_T1, gte_cr_RT12), + + load_word(R_T0, R_T3, 8), load_word(R_T1, R_T3, 12), load_word(R_T2, R_T3, 16), + gte_ct(R_T0, gte_cr_RT13), gte_ct(R_T1, gte_cr_RT21), gte_ct(R_T2, gte_cr_RT22), + + load_word(R_T0, R_T3, 20), load_word(R_T1, R_T3, 24), load_word(R_T2, R_T3, 28), + gte_ct(R_T0, gte_cr_TRX), gte_ct(R_T1, gte_cr_TRY), gte_ct(R_T2, gte_cr_TRZ), + + mips_yield +}; + +internal Code CodeBlob_(atom_floor_tri) { + /* 1. Load 3 indices from $t4 */ + load_half_u(R_T0, R_T4, 0), + load_half_u(R_T1, R_T4, 2), + load_half_u(R_T2, R_T4, 4), + + /* 2. Load Vertices: Addr = Base + (idx * 8). Write to GTE DATA Regs (mtc2) */ + shift_ll(R_AT, R_T0, 3), add_u(R_AT, R_AT, R_T5), + load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), + gte_mt(R_V0, C2_VXY0), gte_mt(R_V1, C2_VZ0), + + shift_ll(R_AT, R_T1, 3), add_u(R_AT, R_AT, R_T5), + load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), + gte_mt(R_V0, C2_VXY1), gte_mt(R_V1, C2_VZ1), + + shift_ll(R_AT, R_T2, 3), add_u(R_AT, R_AT, R_T5), + load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), + gte_mt(R_V0, C2_VXY2), gte_mt(R_V1, C2_VZ2), + + /* 3. RTPT + NCLIP */ + nop, nop, gte_cmdw_rtpt, + nop, nop, gte_cmdw_nclip, + nop, nop, /* Wait for NCLIP to finish */ + + /* 4. Check NCLIP. + If MAC0 <= 0 (Backface), branch to end. + Target is 28 instructions past the delay slot. */ + gte_mf(R_T0, C2_MAC0), + branch_le_zero(R_T0, 28), + nop, /* <--- DELAY SLOT (Index 0) */ + + /* 5. Store Primitive Data */ + /* 1 */ store_word(R_0, R_T7, 0), + /* 2 */ load_ui(R_AT, 0x20FF), /* High: Code 0x20 + Color R:FF */ + /* 3 */ or_i(R_AT, R_AT, 0xFFFF), /* Low: Color G:FF, B:FF */ + /* 4 */ store_word(R_AT, R_T7, 4), + /* 5 */ enc_gte_sw(C2_SXY0, R_T7, 8), + /* 6 */ enc_gte_sw(C2_SXY1, R_T7, 12), + /* 7 */ enc_gte_sw(C2_SXY2, R_T7, 16), + + /* 6. OT Insertion with Bounds Checking */ + /* 8 */ nop, + /* 9 */ nop, + /* 10 */ enc_gte_cmd(0x2D), /* AVSZ3 */ + /* 11 */ nop, /* Wait for AVSZ3 */ + /* 12 */ nop, /* Wait for AVSZ3 */ + /* 13 */ gte_mf(R_T1, C2_OTZ), /* T1 = Depth index */ + + /* Bounds Check: OTZ < 2048 */ + /* 14 */ load_ui(R_AT, 2048), + /* 15 */ slt_u(R_AT, R_T1, R_AT), /* AT = (OTZ < 2048) ? 1 : 0 */ + /* 16 */ branch_equal(R_AT, R_0, 11), /* If AT == 0, skip to end (11 instrs past delay) */ + /* 17 */ nop, /* <--- DELAY SLOT (Index 0 for Bounds branch) */ + + /* 18 (1) */ shift_ll(R_T1, R_T1, 2), + /* 19 (2) */ add_u(R_T1, R_T1, R_T6), /* T1 = &OrderingTable[OTZ] */ + /* 20 (3) */ load_word(R_AT, R_T1, 0), /* AT = current head */ + /* 21 (4) */ store_word(R_AT, R_T7, 0), /* prim->next = head */ + + /* Create Tag in AT: Len 4 (0x04) in top 8 bits, T7 in bottom 24 */ + /* 22 (5) */ shift_ll(R_AT, R_T7, 8), + /* 23 (6) */ shift_lr(R_AT, R_AT, 8), /* AT = T7 & 0x00FFFFFF */ + /* 24 (7) */ load_ui(R_V0, 0x0400), /* V0 = 0x04000000 */ + /* 25 (8) */ or_u(R_AT, R_AT, R_V0), /* AT = Tag */ + + /* 26 (9) */ store_word(R_AT, R_T1, 0), /* OrderingTable[OTZ] = Tag */ + /* 27 (10) */ add_ui(R_T7, R_T7, 20), /* Advance Prim Cursor (5 words) */ + + /* 7. Yield */ + /* 28 (11) */ add_ui(R_T4, R_T4, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */ + mips_yield +}; diff --git a/code/duffle/mips.h b/code/duffle/mips.h index d65b156..191229e 100644 --- a/code/duffle/mips.h +++ b/code/duffle/mips.h @@ -246,6 +246,13 @@ enum { _BitOffsets = 0 #define xor_i(rt, rs, imm) enc_i(op_xori, (rs), (rt), (imm)) #define load_ui(rt, imm) enc_i(op_lui, R_0, (rt), (imm)) +/* Logic Opcodes */ + +#define and_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_and) +#define or_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_or) +#define xor_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_xor) +#define nor_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_nor) + /* Shift family (R-type). shift_ll/lr/ra: `sll rd, rt, shamt` */ #define shift_ll(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_sll) #define shift_lr(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_srl) @@ -321,12 +328,13 @@ enum { _BitOffsets = 0 * branch_le_zero rs, off → blez rs, off * branch_ge_zero rs, off → bgez rs, off * (For `bgez`, the opcode is `op_bcond` with rt=1 to invert the bltz condition.) */ + #define branch_equal(rs, rt, off) enc_i(op_beq, (rs), (rt), (off)) #define branch_ne(rs, rt, off) enc_i(op_bne, (rs), (rt), (off)) -#define branch_lt_zero(rs, off) enc_i(op_bltz, R_0, (rs), (off)) -#define branch_gt_zero(rs, off) enc_i(op_bgtz, R_0, (rs), (off)) -#define branch_le_zero(rs, off) enc_i(op_blez, R_0, (rs), (off)) -#define branch_ge_zero(rs, off) enc_i(op_bcond, R_0, (rs), (1u << 16) | ((off) & 0xFFFF)) +#define branch_lt_zero(rs, off) enc_i(op_bcond, (rs), R_0, (off)) /* bltz is bcond with rt=0 */ +#define branch_ge_zero(rs, off) enc_i(op_bcond, (rs), 1, (off)) /* bgez is bcond with rt=1 */ +#define branch_le_zero(rs, off) enc_i(op_blez, (rs), R_0, (off)) /* blez has its own opcode, rt=0 */ +#define branch_gt_zero(rs, off) enc_i(op_bgtz, (rs), R_0, (off)) /* bgtz has its own opcode, rt=0 */ /* --- System (kernel) instructions --- */ #define syscall() enc_r(op_special, R_0, R_0, R_0, 0, fc_syscall) @@ -490,7 +498,7 @@ Code CodeBlob_(mips_flush_icache) { , jump_reg(rret_addr) /* jr $ra */ , add_ui(rstack_ptr, rstack_ptr, 8) /* sp += 8 (BD) */ }; -FI_ void mips_flush_icache(void) { C_(VoidFn*, code_mips_flush_icache)(); } +I_ void mips_flush_icache(void) { C_(VoidFn*, code_mips_flush_icache)(); } /* Standard clobber list for pure-MIPS asm volatile blocks: caller-saved * GPRs that the kernel treats as volatile (v0/v1/t0/t1/ra) plus the @@ -513,5 +521,3 @@ FI_ void mips_flush_icache(void) { C_(VoidFn*, code_mips_flush_icache)(); } void test_mips_asm() { asm_mips_flush_icache(); } - -// TAPE & EMITTERS diff --git a/code/gte_hello/hello_gte.c b/code/gte_hello/hello_gte.c index a37541c..f3f557d 100644 --- a/code/gte_hello/hello_gte.c +++ b/code/gte_hello/hello_gte.c @@ -12,6 +12,7 @@ #include "duffle/mips.h" #include "duffle/gp.h" #include "duffle/gte.h" +#include "duffle/lottes_tape.h" #include "hello_gte.h" enum { @@ -229,6 +230,7 @@ void update(PrimitiveArena* pa, U4* ordering_buf) static_mem.cube.rot.y += 30; } // Draw Floor + if (0) { m3s2_rotation (& static_mem.floor.rot, & static_mem.tform_world); m3s2_translation(& static_mem.tform_world, & static_mem.floor.pos); @@ -282,6 +284,40 @@ void update(PrimitiveArena* pa, U4* ordering_buf) } static_mem.floor.rot.y += 5; } + // Draw floor tape method + if (1) + { + LP_ U4 mem_temp_tape[512]; // Buffer for function addresses + FArena tape_arena; + farena_init(&tape_arena, slice_ut(mem_temp_tape, S_(mem_temp_tape))); + + TapeBuilder tb = tb_begin(&tape_arena); { + // Setup state atoms + m3s2_rotation(&static_mem.floor.rot, &static_mem.tform_world); + m3s2_translation(&static_mem.tform_world, &static_mem.floor.pos); + + // Push "Protocol" to tape + tb_emit(&tb, code_atom_set_gte_world); + tb_emit(&tb, (Code*)&static_mem.tform_world); + for (U4 i = 0; i < Floor_num_faces; i++) { + tb_emit(&tb, code_atom_floor_tri); + } + } + Slice_U4 tape = tb_end(&tb); + + // 1. Setup Argument Registers (The Workspace) + register V3_S2* face_cursor rgcc(R_T4) = static_mem.floor.faces; + register V3_S2* vert_base rgcc(R_T5) = static_mem.floor.verts; + register U4* ot_base rgcc(R_T6) = ordering_buf; + // --- EXECUTION --- + B1* prim_cursor = (B1*)r_(pa->buf)[static_mem.active_buf_id] + pa->used; + // 2. Fire the Tape Drive (Explicitly bind the workspace variables) + tape_run(tape, &prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf); + + // 3. Update C-side state + pa->used = (U4)prim_cursor - (U4)r_(pa->buf)[static_mem.active_buf_id]; + static_mem.floor.rot.y += 5; + } } int main(void)