got a working floor! (bouncy though)

This commit is contained in:
2026-06-14 20:35:33 -04:00
parent c4e25daa9b
commit 47806e3d21
3 changed files with 61 additions and 48 deletions
+3
View File
@@ -647,3 +647,6 @@ enum {
asm_clobber: clb_system, rlit(R_T4_Code), rlit(R_T5_Code), rlit(R_T6_Code) \
)
// TODO(Ed): Resolve magic number.
/* PsyQ Compliant AVSZ3 Command Word */
#define gte_cmdw_avsz3 (gte_cmd_base | 0x0158002D)
+45 -34
View File
@@ -31,12 +31,14 @@ FI_ void tape_run(Slice_U4 tape, B1** r_prim_cursor, void* face_cursor, void* ve
register void* r_t6 rgcc(R_T6) = ot_base;
asm volatile(
"move $11, $31;" /* Manually save $ra to $t3 to protect against GCC bugs */
"addiu $sp, $sp, -8;" /* Allocate stack space */
"sw $31, 0($sp);" /* Safely backup $ra to the stack */
"lw $25, 0(%0);"
"addiu %0, %0, 4;"
"jalr $25;"
"nop;"
"move $31, $11;" /* Restore $ra */
"lw $31, 0($sp);" /* Restore $ra from stack */
"addiu $sp, $sp, 8;" /* Deallocate stack space */
: "+r"(tp), "+r"(pcur), "+r"(r_t4), "+r"(r_t5), "+r"(r_t6)
:
: "at", "v0", "v1", "t0", "t1", "t2", "t3", "t9", "memory"
@@ -79,17 +81,17 @@ internal Code CodeBlob_(atom_floor_tri) {
load_half_u(R_T2, R_T4, 4),
/* 2. Load Vertices: Addr = Base + (idx * 8). Write to GTE DATA Regs (mtc2) */
shift_ll(R_AT, R_T0, 3), add_u(R_AT, R_AT, R_T5),
shift_ll( R_AT, R_T0, 3), add_u( R_AT, R_AT, R_T5),
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
gte_mt(R_V0, C2_VXY0), gte_mt(R_V1, C2_VZ0),
gte_mt( R_V0, C2_VXY0), gte_mt( R_V1, C2_VZ0),
shift_ll(R_AT, R_T1, 3), add_u(R_AT, R_AT, R_T5),
shift_ll( R_AT, R_T1, 3), add_u( R_AT, R_AT, R_T5),
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
gte_mt(R_V0, C2_VXY1), gte_mt(R_V1, C2_VZ1),
gte_mt( R_V0, C2_VXY1), gte_mt( R_V1, C2_VZ1),
shift_ll(R_AT, R_T2, 3), add_u(R_AT, R_AT, R_T5),
shift_ll( R_AT, R_T2, 3), add_u( R_AT, R_AT, R_T5),
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
gte_mt(R_V0, C2_VXY2), gte_mt(R_V1, C2_VZ2),
gte_mt( R_V0, C2_VXY2), gte_mt( R_V1, C2_VZ2),
/* 3. RTPT + NCLIP */
nop, nop, gte_cmdw_rtpt,
@@ -97,16 +99,17 @@ internal Code CodeBlob_(atom_floor_tri) {
nop, nop, /* Wait for NCLIP to finish */
/* 4. Check NCLIP.
If MAC0 <= 0 (Backface), branch to end.
Target is 28 instructions past the delay slot. */
Temporarily disabled backface culling to ensure floor is visible regardless of winding!
To re-enable: replace the two nops below with `branch_le_zero(R_T0, 30)` */
gte_mf(R_T0, C2_MAC0),
branch_le_zero(R_T0, 28),
nop, /* <--- DELAY SLOT (Index 0) */
nop, /* <--- FILL LOAD DELAY SLOT */
nop, /* branch_le_zero(R_T0, 30), <-- CULLING DISABLED */
nop, /* <--- DELAY SLOT (Index 0) */
/* 5. Store Primitive Data */
/* 1 */ store_word(R_0, R_T7, 0),
/* 2 */ load_ui(R_AT, 0x20FF), /* High: Code 0x20 + Color R:FF */
/* 3 */ or_i(R_AT, R_AT, 0xFFFF), /* Low: Color G:FF, B:FF */
/* 1 */ store_word(R_0, R_T7, 0),
/* 2 */ load_ui( R_AT, 0x20FF), /* High: Code 0x20 + Color B:FF */
/* 3 */ or_i( R_AT, R_AT, 0xFFFF), /* Low: Color G:FF, R:FF (White) */
/* 4 */ store_word(R_AT, R_T7, 4),
/* 5 */ enc_gte_sw(C2_SXY0, R_T7, 8),
/* 6 */ enc_gte_sw(C2_SXY1, R_T7, 12),
@@ -115,33 +118,41 @@ internal Code CodeBlob_(atom_floor_tri) {
/* 6. OT Insertion with Bounds Checking */
/* 8 */ nop,
/* 9 */ nop,
/* 10 */ enc_gte_cmd(0x2D), /* AVSZ3 */
/* 10 */ gte_cmdw_avsz3,
/* 11 */ nop, /* Wait for AVSZ3 */
/* 12 */ nop, /* Wait for AVSZ3 */
/* 13 */ gte_mf(R_T1, C2_OTZ), /* T1 = Depth index */
/* Bounds Check: OTZ < 2048 */
/* 14 */ add_ui(R_AT, R_0, 2048), /* <--- FIXED: Use add_ui for small constants! */
/* 15 */ slt_u(R_AT, R_T1, R_AT), /* AT = (OTZ < 2048) ? 1 : 0 */
/* 16 */ branch_equal(R_AT, R_0, 11), /* If AT == 0, skip to end (11 instrs past delay) */
/* 14 */ add_ui( R_AT, R_0, 2048),
/* 15 */ slt_u( R_AT, R_T1, R_AT), /* AT = (OTZ < 2048) ? 1 : 0 */
/* 16 */ branch_equal(R_AT, R_0, 13), /* If AT == 0, skip to end (13 instrs past delay) */
/* 17 */ nop, /* <--- DELAY SLOT (Index 0 for Bounds branch) */
/* 18 (1) */ shift_ll(R_T1, R_T1, 2),
/* 19 (2) */ add_u(R_T1, R_T1, R_T6), /* T1 = &OrderingTable[OTZ] */
/* 20 (3) */ load_word(R_AT, R_T1, 0), /* AT = current head */
/* 21 (4) */ store_word(R_AT, R_T7, 0), /* prim->next = head */
/* CORRECTED DMA TAG LOGIC */
/* 18 (1) */ shift_ll( R_T1, R_T1, 2),
/* 19 (2) */ add_u( R_T1, R_T1, R_T6), /* T1 = &OrderingTable[OTZ] */
/* Create Tag in AT: Len 4 (0x04) in top 8 bits, T7 in bottom 24 */
/* 22 (5) */ shift_ll(R_AT, R_T7, 8),
/* 23 (6) */ shift_lr(R_AT, R_AT, 8), /* AT = T7 & 0x00FFFFFF */
/* 24 (7) */ load_ui(R_V0, 0x0400), /* V0 = 0x04000000 (Here LUI is correct!) */
/* 25 (8) */ or_u(R_AT, R_AT, R_V0), /* AT = Tag */
/* 20 (3) */ load_word( R_AT, R_T1, 0), /* AT = current head (old_ot) */
/* 21 (4) */ load_ui( R_V0, 0x0400), /* V0 = 0x04000000 (Length = 4) */
/* 26 (9) */ store_word(R_AT, R_T1, 0), /* OrderingTable[OTZ] = Tag */
/* 27 (10) */ add_ui(R_T7, R_T7, 20), /* Advance Prim Cursor (5 words) */
/* Strip upper 8 bits from old_ot */
/* 22 (5) */ shift_ll(R_AT, R_AT, 8),
/* 23 (6) */ shift_lr(R_AT, R_AT, 8), /* AT &= 0x00FFFFFF */
/* Merge length into prim->tag */
/* 24 (7) */ or_u(R_AT, R_AT, R_V0), /* AT |= 0x04000000 */
/* 25 (8) */ store_word(R_AT, R_T7, 0), /* prim->tag = AT */
/* Create OT Tag: Len 0 (0x00) in top 8 bits, T7 in bottom 24 */
/* 26 (9) */ shift_ll(R_AT, R_T7, 8),
/* 27 (10) */ shift_lr(R_AT, R_AT, 8), /* AT = T7 & 0x00FFFFFF */
/* 28 (11) */ store_word(R_AT, R_T1, 0), /* OrderingTable[OTZ] = AT */
/* 29 (12) */ add_ui( R_T7, R_T7, 20), /* Advance Prim Cursor (5 words) */
/* 7. Yield */
/* 28 (11) */ add_ui(R_T4, R_T4, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */
/* 30 (13) */ add_ui(R_T4, R_T4, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */
mips_yield
};
@@ -153,8 +164,8 @@ internal Code CodeBlob_(atom_diag_yield) {
/* DIAGNOSTIC 2: Pure memory test (No GTE). Draws a fixed cyan triangle. */
internal Code CodeBlob_(atom_diag_color) {
store_word(R_0, R_T7, 0),
load_ui(R_AT, 0x2000),
or_i(R_AT, R_AT, 0xFFFF),
load_ui(R_AT, 0x20FF), /* High: Code 0x20 + Color B:FF */
or_i(R_AT, R_AT, 0xFF00), /* Low: Color G:FF, R:00 (Cyan) */
store_word(R_AT, R_T7, 4),
/* Fake coordinates - Swapped winding order to prevent GPU culling! */
@@ -167,7 +178,7 @@ internal Code CodeBlob_(atom_diag_color) {
add_u(R_T1, R_T1, R_T6),
load_word(R_AT, R_T1, 0),
load_ui(R_V0, 0x0400),
load_ui(R_V0, 0x0400), /* <--- Fills load delay slot! */
store_word(R_AT, R_T7, 0),
shift_ll(R_AT, R_T7, 8), shift_lr(R_AT, R_AT, 8),
+13 -14
View File
@@ -286,36 +286,35 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
static_mem.floor.rot.y += 5;
}
// Draw floor tape method
if (0)
if (1)
{
LP_ U4 mem_temp_tape[512]; // Buffer for function addresses
FArena tape_arena;
farena_init(&tape_arena, slice_ut(mem_temp_tape, S_(mem_temp_tape)));
FArena tape_arena; farena_init(&tape_arena, slice_ut(mem_temp_tape, S_(mem_temp_tape)));
TapeBuilder tb = tb_begin(&tape_arena); {
// Setup state atoms
m3s2_rotation(&static_mem.floor.rot, &static_mem.tform_world);
m3s2_translation(&static_mem.tform_world, &static_mem.floor.pos);
m3s2_rotation( & static_mem.floor.rot, & static_mem.tform_world);
m3s2_translation( & static_mem.tform_world, & static_mem.floor.pos);
// Push "Protocol" to tape
tb_emit(&tb, code_atom_set_gte_world);
tb_emit(&tb, (Code*)&static_mem.tform_world);
tb_emit(& tb, code_atom_set_gte_world);
tb_emit(& tb, (Code*)& static_mem.tform_world);
for (U4 i = 0; i < Floor_num_faces; i++) {
tb_emit(&tb, code_atom_floor_tri);
tb_emit(& tb, code_atom_floor_tri);
}
}
Slice_U4 tape = tb_end(&tb);
Slice_U4 tape = tb_end(& tb);
// --- EXECUTION ---
B1* prim_cursor = (B1*)r_(pa->buf)[static_mem.active_buf_id] + pa->used;
// 2. Fire the Tape Drive (Explicitly bind the workspace variables)
tape_run(tape, &prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
tape_run(tape, & prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
// 3. Update C-side state
pa->used = (U4)prim_cursor - (U4)r_(pa->buf)[static_mem.active_buf_id];
static_mem.floor.rot.y += 5;
}
// --- TAPE DIAGNOSTICS ---
if (1)
if (0)
{
LP_ U4 mem_temp_tape[512];
FArena tape_arena;
@@ -331,16 +330,16 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
// 2. code_atom_diag_color -> Tests OT and Prim Arena memory
// 3. code_atom_diag_gte -> Tests Vertex arrays and GTE Math
// tb_emit(&tb, code_atom_diag_yield);
tb_emit(&tb, code_atom_diag_color);
tb_emit(& tb, code_atom_diag_color);
// tb_emit(&tb, code_atom_diag_gte);
}
}
Slice_U4 tape = tb_end(&tb);
Slice_U4 tape = tb_end(& tb);
// Setup Workspace Registers
B1* prim_cursor = (B1*)r_(pa->buf)[static_mem.active_buf_id] + pa->used;
tape_run(tape, &prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
tape_run(tape, & prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
pa->used = (U4)prim_cursor - (U4)r_(pa->buf)[static_mem.active_buf_id];
static_mem.floor.rot.y += 5;