From 47806e3d21ed39b353b8202e9a2e90e291fca762 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 14 Jun 2026 20:35:33 -0400 Subject: [PATCH] got a working floor! (bouncy though) --- code/duffle/gte.h | 3 ++ code/duffle/lottes_tape.h | 79 ++++++++++++++++++++++---------------- code/gte_hello/hello_gte.c | 27 +++++++------ 3 files changed, 61 insertions(+), 48 deletions(-) diff --git a/code/duffle/gte.h b/code/duffle/gte.h index dfdff00..d23ce9c 100644 --- a/code/duffle/gte.h +++ b/code/duffle/gte.h @@ -647,3 +647,6 @@ enum { asm_clobber: clb_system, rlit(R_T4_Code), rlit(R_T5_Code), rlit(R_T6_Code) \ ) +// TODO(Ed): Resolve magic number. +/* PsyQ Compliant AVSZ3 Command Word */ +#define gte_cmdw_avsz3 (gte_cmd_base | 0x0158002D) diff --git a/code/duffle/lottes_tape.h b/code/duffle/lottes_tape.h index 19b45ff..1c64090 100644 --- a/code/duffle/lottes_tape.h +++ b/code/duffle/lottes_tape.h @@ -31,12 +31,14 @@ FI_ void tape_run(Slice_U4 tape, B1** r_prim_cursor, void* face_cursor, void* ve register void* r_t6 rgcc(R_T6) = ot_base; asm volatile( - "move $11, $31;" /* Manually save $ra to $t3 to protect against GCC bugs */ + "addiu $sp, $sp, -8;" /* Allocate stack space */ + "sw $31, 0($sp);" /* Safely backup $ra to the stack */ "lw $25, 0(%0);" "addiu %0, %0, 4;" "jalr $25;" "nop;" - "move $31, $11;" /* Restore $ra */ + "lw $31, 0($sp);" /* Restore $ra from stack */ + "addiu $sp, $sp, 8;" /* Deallocate stack space */ : "+r"(tp), "+r"(pcur), "+r"(r_t4), "+r"(r_t5), "+r"(r_t6) : : "at", "v0", "v1", "t0", "t1", "t2", "t3", "t9", "memory" @@ -79,17 +81,17 @@ internal Code CodeBlob_(atom_floor_tri) { load_half_u(R_T2, R_T4, 4), /* 2. Load Vertices: Addr = Base + (idx * 8). Write to GTE DATA Regs (mtc2) */ - shift_ll(R_AT, R_T0, 3), add_u(R_AT, R_AT, R_T5), + shift_ll( R_AT, R_T0, 3), add_u( R_AT, R_AT, R_T5), load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), - gte_mt(R_V0, C2_VXY0), gte_mt(R_V1, C2_VZ0), + gte_mt( R_V0, C2_VXY0), gte_mt( R_V1, C2_VZ0), - shift_ll(R_AT, R_T1, 3), add_u(R_AT, R_AT, R_T5), + shift_ll( R_AT, R_T1, 3), add_u( R_AT, R_AT, R_T5), load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), - gte_mt(R_V0, C2_VXY1), gte_mt(R_V1, C2_VZ1), + gte_mt( R_V0, C2_VXY1), gte_mt( R_V1, C2_VZ1), - shift_ll(R_AT, R_T2, 3), add_u(R_AT, R_AT, R_T5), + shift_ll( R_AT, R_T2, 3), add_u( R_AT, R_AT, R_T5), load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), - gte_mt(R_V0, C2_VXY2), gte_mt(R_V1, C2_VZ2), + gte_mt( R_V0, C2_VXY2), gte_mt( R_V1, C2_VZ2), /* 3. RTPT + NCLIP */ nop, nop, gte_cmdw_rtpt, @@ -97,16 +99,17 @@ internal Code CodeBlob_(atom_floor_tri) { nop, nop, /* Wait for NCLIP to finish */ /* 4. Check NCLIP. - If MAC0 <= 0 (Backface), branch to end. - Target is 28 instructions past the delay slot. */ + Temporarily disabled backface culling to ensure floor is visible regardless of winding! + To re-enable: replace the two nops below with `branch_le_zero(R_T0, 30)` */ gte_mf(R_T0, C2_MAC0), - branch_le_zero(R_T0, 28), - nop, /* <--- DELAY SLOT (Index 0) */ + nop, /* <--- FILL LOAD DELAY SLOT */ + nop, /* branch_le_zero(R_T0, 30), <-- CULLING DISABLED */ + nop, /* <--- DELAY SLOT (Index 0) */ /* 5. Store Primitive Data */ - /* 1 */ store_word(R_0, R_T7, 0), - /* 2 */ load_ui(R_AT, 0x20FF), /* High: Code 0x20 + Color R:FF */ - /* 3 */ or_i(R_AT, R_AT, 0xFFFF), /* Low: Color G:FF, B:FF */ + /* 1 */ store_word(R_0, R_T7, 0), + /* 2 */ load_ui( R_AT, 0x20FF), /* High: Code 0x20 + Color B:FF */ + /* 3 */ or_i( R_AT, R_AT, 0xFFFF), /* Low: Color G:FF, R:FF (White) */ /* 4 */ store_word(R_AT, R_T7, 4), /* 5 */ enc_gte_sw(C2_SXY0, R_T7, 8), /* 6 */ enc_gte_sw(C2_SXY1, R_T7, 12), @@ -115,33 +118,41 @@ internal Code CodeBlob_(atom_floor_tri) { /* 6. OT Insertion with Bounds Checking */ /* 8 */ nop, /* 9 */ nop, - /* 10 */ enc_gte_cmd(0x2D), /* AVSZ3 */ + /* 10 */ gte_cmdw_avsz3, /* 11 */ nop, /* Wait for AVSZ3 */ /* 12 */ nop, /* Wait for AVSZ3 */ /* 13 */ gte_mf(R_T1, C2_OTZ), /* T1 = Depth index */ /* Bounds Check: OTZ < 2048 */ - /* 14 */ add_ui(R_AT, R_0, 2048), /* <--- FIXED: Use add_ui for small constants! */ - /* 15 */ slt_u(R_AT, R_T1, R_AT), /* AT = (OTZ < 2048) ? 1 : 0 */ - /* 16 */ branch_equal(R_AT, R_0, 11), /* If AT == 0, skip to end (11 instrs past delay) */ + /* 14 */ add_ui( R_AT, R_0, 2048), + /* 15 */ slt_u( R_AT, R_T1, R_AT), /* AT = (OTZ < 2048) ? 1 : 0 */ + /* 16 */ branch_equal(R_AT, R_0, 13), /* If AT == 0, skip to end (13 instrs past delay) */ /* 17 */ nop, /* <--- DELAY SLOT (Index 0 for Bounds branch) */ - /* 18 (1) */ shift_ll(R_T1, R_T1, 2), - /* 19 (2) */ add_u(R_T1, R_T1, R_T6), /* T1 = &OrderingTable[OTZ] */ - /* 20 (3) */ load_word(R_AT, R_T1, 0), /* AT = current head */ - /* 21 (4) */ store_word(R_AT, R_T7, 0), /* prim->next = head */ + /* CORRECTED DMA TAG LOGIC */ + /* 18 (1) */ shift_ll( R_T1, R_T1, 2), + /* 19 (2) */ add_u( R_T1, R_T1, R_T6), /* T1 = &OrderingTable[OTZ] */ - /* Create Tag in AT: Len 4 (0x04) in top 8 bits, T7 in bottom 24 */ - /* 22 (5) */ shift_ll(R_AT, R_T7, 8), - /* 23 (6) */ shift_lr(R_AT, R_AT, 8), /* AT = T7 & 0x00FFFFFF */ - /* 24 (7) */ load_ui(R_V0, 0x0400), /* V0 = 0x04000000 (Here LUI is correct!) */ - /* 25 (8) */ or_u(R_AT, R_AT, R_V0), /* AT = Tag */ + /* 20 (3) */ load_word( R_AT, R_T1, 0), /* AT = current head (old_ot) */ + /* 21 (4) */ load_ui( R_V0, 0x0400), /* V0 = 0x04000000 (Length = 4) */ - /* 26 (9) */ store_word(R_AT, R_T1, 0), /* OrderingTable[OTZ] = Tag */ - /* 27 (10) */ add_ui(R_T7, R_T7, 20), /* Advance Prim Cursor (5 words) */ + /* Strip upper 8 bits from old_ot */ + /* 22 (5) */ shift_ll(R_AT, R_AT, 8), + /* 23 (6) */ shift_lr(R_AT, R_AT, 8), /* AT &= 0x00FFFFFF */ + + /* Merge length into prim->tag */ + /* 24 (7) */ or_u(R_AT, R_AT, R_V0), /* AT |= 0x04000000 */ + /* 25 (8) */ store_word(R_AT, R_T7, 0), /* prim->tag = AT */ + + /* Create OT Tag: Len 0 (0x00) in top 8 bits, T7 in bottom 24 */ + /* 26 (9) */ shift_ll(R_AT, R_T7, 8), + /* 27 (10) */ shift_lr(R_AT, R_AT, 8), /* AT = T7 & 0x00FFFFFF */ + /* 28 (11) */ store_word(R_AT, R_T1, 0), /* OrderingTable[OTZ] = AT */ + + /* 29 (12) */ add_ui( R_T7, R_T7, 20), /* Advance Prim Cursor (5 words) */ /* 7. Yield */ - /* 28 (11) */ add_ui(R_T4, R_T4, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */ + /* 30 (13) */ add_ui(R_T4, R_T4, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */ mips_yield }; @@ -153,8 +164,8 @@ internal Code CodeBlob_(atom_diag_yield) { /* DIAGNOSTIC 2: Pure memory test (No GTE). Draws a fixed cyan triangle. */ internal Code CodeBlob_(atom_diag_color) { store_word(R_0, R_T7, 0), - load_ui(R_AT, 0x2000), - or_i(R_AT, R_AT, 0xFFFF), + load_ui(R_AT, 0x20FF), /* High: Code 0x20 + Color B:FF */ + or_i(R_AT, R_AT, 0xFF00), /* Low: Color G:FF, R:00 (Cyan) */ store_word(R_AT, R_T7, 4), /* Fake coordinates - Swapped winding order to prevent GPU culling! */ @@ -167,7 +178,7 @@ internal Code CodeBlob_(atom_diag_color) { add_u(R_T1, R_T1, R_T6), load_word(R_AT, R_T1, 0), - load_ui(R_V0, 0x0400), + load_ui(R_V0, 0x0400), /* <--- Fills load delay slot! */ store_word(R_AT, R_T7, 0), shift_ll(R_AT, R_T7, 8), shift_lr(R_AT, R_AT, 8), diff --git a/code/gte_hello/hello_gte.c b/code/gte_hello/hello_gte.c index 527c369..ffc2d52 100644 --- a/code/gte_hello/hello_gte.c +++ b/code/gte_hello/hello_gte.c @@ -286,36 +286,35 @@ void update(PrimitiveArena* pa, U4* ordering_buf) static_mem.floor.rot.y += 5; } // Draw floor tape method - if (0) + if (1) { LP_ U4 mem_temp_tape[512]; // Buffer for function addresses - FArena tape_arena; - farena_init(&tape_arena, slice_ut(mem_temp_tape, S_(mem_temp_tape))); + FArena tape_arena; farena_init(&tape_arena, slice_ut(mem_temp_tape, S_(mem_temp_tape))); TapeBuilder tb = tb_begin(&tape_arena); { // Setup state atoms - m3s2_rotation(&static_mem.floor.rot, &static_mem.tform_world); - m3s2_translation(&static_mem.tform_world, &static_mem.floor.pos); + m3s2_rotation( & static_mem.floor.rot, & static_mem.tform_world); + m3s2_translation( & static_mem.tform_world, & static_mem.floor.pos); // Push "Protocol" to tape - tb_emit(&tb, code_atom_set_gte_world); - tb_emit(&tb, (Code*)&static_mem.tform_world); + tb_emit(& tb, code_atom_set_gte_world); + tb_emit(& tb, (Code*)& static_mem.tform_world); for (U4 i = 0; i < Floor_num_faces; i++) { - tb_emit(&tb, code_atom_floor_tri); + tb_emit(& tb, code_atom_floor_tri); } } - Slice_U4 tape = tb_end(&tb); + Slice_U4 tape = tb_end(& tb); // --- EXECUTION --- B1* prim_cursor = (B1*)r_(pa->buf)[static_mem.active_buf_id] + pa->used; // 2. Fire the Tape Drive (Explicitly bind the workspace variables) - tape_run(tape, &prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf); + tape_run(tape, & prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf); // 3. Update C-side state pa->used = (U4)prim_cursor - (U4)r_(pa->buf)[static_mem.active_buf_id]; static_mem.floor.rot.y += 5; } // --- TAPE DIAGNOSTICS --- - if (1) + if (0) { LP_ U4 mem_temp_tape[512]; FArena tape_arena; @@ -331,16 +330,16 @@ void update(PrimitiveArena* pa, U4* ordering_buf) // 2. code_atom_diag_color -> Tests OT and Prim Arena memory // 3. code_atom_diag_gte -> Tests Vertex arrays and GTE Math // tb_emit(&tb, code_atom_diag_yield); - tb_emit(&tb, code_atom_diag_color); + tb_emit(& tb, code_atom_diag_color); // tb_emit(&tb, code_atom_diag_gte); } } - Slice_U4 tape = tb_end(&tb); + Slice_U4 tape = tb_end(& tb); // Setup Workspace Registers B1* prim_cursor = (B1*)r_(pa->buf)[static_mem.active_buf_id] + pa->used; - tape_run(tape, &prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf); + tape_run(tape, & prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf); pa->used = (U4)prim_cursor - (U4)r_(pa->buf)[static_mem.active_buf_id]; static_mem.floor.rot.y += 5;