mirror of
https://github.com/Ed94/pikuma_ps1.git
synced 2026-06-28 06:11:49 -07:00
got a working floor! (bouncy though)
This commit is contained in:
@@ -647,3 +647,6 @@ enum {
|
||||
asm_clobber: clb_system, rlit(R_T4_Code), rlit(R_T5_Code), rlit(R_T6_Code) \
|
||||
)
|
||||
|
||||
// TODO(Ed): Resolve magic number.
|
||||
/* PsyQ Compliant AVSZ3 Command Word */
|
||||
#define gte_cmdw_avsz3 (gte_cmd_base | 0x0158002D)
|
||||
|
||||
+45
-34
@@ -31,12 +31,14 @@ FI_ void tape_run(Slice_U4 tape, B1** r_prim_cursor, void* face_cursor, void* ve
|
||||
register void* r_t6 rgcc(R_T6) = ot_base;
|
||||
|
||||
asm volatile(
|
||||
"move $11, $31;" /* Manually save $ra to $t3 to protect against GCC bugs */
|
||||
"addiu $sp, $sp, -8;" /* Allocate stack space */
|
||||
"sw $31, 0($sp);" /* Safely backup $ra to the stack */
|
||||
"lw $25, 0(%0);"
|
||||
"addiu %0, %0, 4;"
|
||||
"jalr $25;"
|
||||
"nop;"
|
||||
"move $31, $11;" /* Restore $ra */
|
||||
"lw $31, 0($sp);" /* Restore $ra from stack */
|
||||
"addiu $sp, $sp, 8;" /* Deallocate stack space */
|
||||
: "+r"(tp), "+r"(pcur), "+r"(r_t4), "+r"(r_t5), "+r"(r_t6)
|
||||
:
|
||||
: "at", "v0", "v1", "t0", "t1", "t2", "t3", "t9", "memory"
|
||||
@@ -79,17 +81,17 @@ internal Code CodeBlob_(atom_floor_tri) {
|
||||
load_half_u(R_T2, R_T4, 4),
|
||||
|
||||
/* 2. Load Vertices: Addr = Base + (idx * 8). Write to GTE DATA Regs (mtc2) */
|
||||
shift_ll(R_AT, R_T0, 3), add_u(R_AT, R_AT, R_T5),
|
||||
shift_ll( R_AT, R_T0, 3), add_u( R_AT, R_AT, R_T5),
|
||||
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
||||
gte_mt(R_V0, C2_VXY0), gte_mt(R_V1, C2_VZ0),
|
||||
gte_mt( R_V0, C2_VXY0), gte_mt( R_V1, C2_VZ0),
|
||||
|
||||
shift_ll(R_AT, R_T1, 3), add_u(R_AT, R_AT, R_T5),
|
||||
shift_ll( R_AT, R_T1, 3), add_u( R_AT, R_AT, R_T5),
|
||||
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
||||
gte_mt(R_V0, C2_VXY1), gte_mt(R_V1, C2_VZ1),
|
||||
gte_mt( R_V0, C2_VXY1), gte_mt( R_V1, C2_VZ1),
|
||||
|
||||
shift_ll(R_AT, R_T2, 3), add_u(R_AT, R_AT, R_T5),
|
||||
shift_ll( R_AT, R_T2, 3), add_u( R_AT, R_AT, R_T5),
|
||||
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
||||
gte_mt(R_V0, C2_VXY2), gte_mt(R_V1, C2_VZ2),
|
||||
gte_mt( R_V0, C2_VXY2), gte_mt( R_V1, C2_VZ2),
|
||||
|
||||
/* 3. RTPT + NCLIP */
|
||||
nop, nop, gte_cmdw_rtpt,
|
||||
@@ -97,16 +99,17 @@ internal Code CodeBlob_(atom_floor_tri) {
|
||||
nop, nop, /* Wait for NCLIP to finish */
|
||||
|
||||
/* 4. Check NCLIP.
|
||||
If MAC0 <= 0 (Backface), branch to end.
|
||||
Target is 28 instructions past the delay slot. */
|
||||
Temporarily disabled backface culling to ensure floor is visible regardless of winding!
|
||||
To re-enable: replace the two nops below with `branch_le_zero(R_T0, 30)` */
|
||||
gte_mf(R_T0, C2_MAC0),
|
||||
branch_le_zero(R_T0, 28),
|
||||
nop, /* <--- DELAY SLOT (Index 0) */
|
||||
nop, /* <--- FILL LOAD DELAY SLOT */
|
||||
nop, /* branch_le_zero(R_T0, 30), <-- CULLING DISABLED */
|
||||
nop, /* <--- DELAY SLOT (Index 0) */
|
||||
|
||||
/* 5. Store Primitive Data */
|
||||
/* 1 */ store_word(R_0, R_T7, 0),
|
||||
/* 2 */ load_ui(R_AT, 0x20FF), /* High: Code 0x20 + Color R:FF */
|
||||
/* 3 */ or_i(R_AT, R_AT, 0xFFFF), /* Low: Color G:FF, B:FF */
|
||||
/* 1 */ store_word(R_0, R_T7, 0),
|
||||
/* 2 */ load_ui( R_AT, 0x20FF), /* High: Code 0x20 + Color B:FF */
|
||||
/* 3 */ or_i( R_AT, R_AT, 0xFFFF), /* Low: Color G:FF, R:FF (White) */
|
||||
/* 4 */ store_word(R_AT, R_T7, 4),
|
||||
/* 5 */ enc_gte_sw(C2_SXY0, R_T7, 8),
|
||||
/* 6 */ enc_gte_sw(C2_SXY1, R_T7, 12),
|
||||
@@ -115,33 +118,41 @@ internal Code CodeBlob_(atom_floor_tri) {
|
||||
/* 6. OT Insertion with Bounds Checking */
|
||||
/* 8 */ nop,
|
||||
/* 9 */ nop,
|
||||
/* 10 */ enc_gte_cmd(0x2D), /* AVSZ3 */
|
||||
/* 10 */ gte_cmdw_avsz3,
|
||||
/* 11 */ nop, /* Wait for AVSZ3 */
|
||||
/* 12 */ nop, /* Wait for AVSZ3 */
|
||||
/* 13 */ gte_mf(R_T1, C2_OTZ), /* T1 = Depth index */
|
||||
|
||||
/* Bounds Check: OTZ < 2048 */
|
||||
/* 14 */ add_ui(R_AT, R_0, 2048), /* <--- FIXED: Use add_ui for small constants! */
|
||||
/* 15 */ slt_u(R_AT, R_T1, R_AT), /* AT = (OTZ < 2048) ? 1 : 0 */
|
||||
/* 16 */ branch_equal(R_AT, R_0, 11), /* If AT == 0, skip to end (11 instrs past delay) */
|
||||
/* 14 */ add_ui( R_AT, R_0, 2048),
|
||||
/* 15 */ slt_u( R_AT, R_T1, R_AT), /* AT = (OTZ < 2048) ? 1 : 0 */
|
||||
/* 16 */ branch_equal(R_AT, R_0, 13), /* If AT == 0, skip to end (13 instrs past delay) */
|
||||
/* 17 */ nop, /* <--- DELAY SLOT (Index 0 for Bounds branch) */
|
||||
|
||||
/* 18 (1) */ shift_ll(R_T1, R_T1, 2),
|
||||
/* 19 (2) */ add_u(R_T1, R_T1, R_T6), /* T1 = &OrderingTable[OTZ] */
|
||||
/* 20 (3) */ load_word(R_AT, R_T1, 0), /* AT = current head */
|
||||
/* 21 (4) */ store_word(R_AT, R_T7, 0), /* prim->next = head */
|
||||
/* CORRECTED DMA TAG LOGIC */
|
||||
/* 18 (1) */ shift_ll( R_T1, R_T1, 2),
|
||||
/* 19 (2) */ add_u( R_T1, R_T1, R_T6), /* T1 = &OrderingTable[OTZ] */
|
||||
|
||||
/* Create Tag in AT: Len 4 (0x04) in top 8 bits, T7 in bottom 24 */
|
||||
/* 22 (5) */ shift_ll(R_AT, R_T7, 8),
|
||||
/* 23 (6) */ shift_lr(R_AT, R_AT, 8), /* AT = T7 & 0x00FFFFFF */
|
||||
/* 24 (7) */ load_ui(R_V0, 0x0400), /* V0 = 0x04000000 (Here LUI is correct!) */
|
||||
/* 25 (8) */ or_u(R_AT, R_AT, R_V0), /* AT = Tag */
|
||||
/* 20 (3) */ load_word( R_AT, R_T1, 0), /* AT = current head (old_ot) */
|
||||
/* 21 (4) */ load_ui( R_V0, 0x0400), /* V0 = 0x04000000 (Length = 4) */
|
||||
|
||||
/* 26 (9) */ store_word(R_AT, R_T1, 0), /* OrderingTable[OTZ] = Tag */
|
||||
/* 27 (10) */ add_ui(R_T7, R_T7, 20), /* Advance Prim Cursor (5 words) */
|
||||
/* Strip upper 8 bits from old_ot */
|
||||
/* 22 (5) */ shift_ll(R_AT, R_AT, 8),
|
||||
/* 23 (6) */ shift_lr(R_AT, R_AT, 8), /* AT &= 0x00FFFFFF */
|
||||
|
||||
/* Merge length into prim->tag */
|
||||
/* 24 (7) */ or_u(R_AT, R_AT, R_V0), /* AT |= 0x04000000 */
|
||||
/* 25 (8) */ store_word(R_AT, R_T7, 0), /* prim->tag = AT */
|
||||
|
||||
/* Create OT Tag: Len 0 (0x00) in top 8 bits, T7 in bottom 24 */
|
||||
/* 26 (9) */ shift_ll(R_AT, R_T7, 8),
|
||||
/* 27 (10) */ shift_lr(R_AT, R_AT, 8), /* AT = T7 & 0x00FFFFFF */
|
||||
/* 28 (11) */ store_word(R_AT, R_T1, 0), /* OrderingTable[OTZ] = AT */
|
||||
|
||||
/* 29 (12) */ add_ui( R_T7, R_T7, 20), /* Advance Prim Cursor (5 words) */
|
||||
|
||||
/* 7. Yield */
|
||||
/* 28 (11) */ add_ui(R_T4, R_T4, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */
|
||||
/* 30 (13) */ add_ui(R_T4, R_T4, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */
|
||||
mips_yield
|
||||
};
|
||||
|
||||
@@ -153,8 +164,8 @@ internal Code CodeBlob_(atom_diag_yield) {
|
||||
/* DIAGNOSTIC 2: Pure memory test (No GTE). Draws a fixed cyan triangle. */
|
||||
internal Code CodeBlob_(atom_diag_color) {
|
||||
store_word(R_0, R_T7, 0),
|
||||
load_ui(R_AT, 0x2000),
|
||||
or_i(R_AT, R_AT, 0xFFFF),
|
||||
load_ui(R_AT, 0x20FF), /* High: Code 0x20 + Color B:FF */
|
||||
or_i(R_AT, R_AT, 0xFF00), /* Low: Color G:FF, R:00 (Cyan) */
|
||||
store_word(R_AT, R_T7, 4),
|
||||
|
||||
/* Fake coordinates - Swapped winding order to prevent GPU culling! */
|
||||
@@ -167,7 +178,7 @@ internal Code CodeBlob_(atom_diag_color) {
|
||||
add_u(R_T1, R_T1, R_T6),
|
||||
|
||||
load_word(R_AT, R_T1, 0),
|
||||
load_ui(R_V0, 0x0400),
|
||||
load_ui(R_V0, 0x0400), /* <--- Fills load delay slot! */
|
||||
store_word(R_AT, R_T7, 0),
|
||||
|
||||
shift_ll(R_AT, R_T7, 8), shift_lr(R_AT, R_AT, 8),
|
||||
|
||||
+13
-14
@@ -286,36 +286,35 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
static_mem.floor.rot.y += 5;
|
||||
}
|
||||
// Draw floor tape method
|
||||
if (0)
|
||||
if (1)
|
||||
{
|
||||
LP_ U4 mem_temp_tape[512]; // Buffer for function addresses
|
||||
FArena tape_arena;
|
||||
farena_init(&tape_arena, slice_ut(mem_temp_tape, S_(mem_temp_tape)));
|
||||
FArena tape_arena; farena_init(&tape_arena, slice_ut(mem_temp_tape, S_(mem_temp_tape)));
|
||||
|
||||
TapeBuilder tb = tb_begin(&tape_arena); {
|
||||
// Setup state atoms
|
||||
m3s2_rotation(&static_mem.floor.rot, &static_mem.tform_world);
|
||||
m3s2_translation(&static_mem.tform_world, &static_mem.floor.pos);
|
||||
m3s2_rotation( & static_mem.floor.rot, & static_mem.tform_world);
|
||||
m3s2_translation( & static_mem.tform_world, & static_mem.floor.pos);
|
||||
|
||||
// Push "Protocol" to tape
|
||||
tb_emit(&tb, code_atom_set_gte_world);
|
||||
tb_emit(&tb, (Code*)&static_mem.tform_world);
|
||||
tb_emit(& tb, code_atom_set_gte_world);
|
||||
tb_emit(& tb, (Code*)& static_mem.tform_world);
|
||||
for (U4 i = 0; i < Floor_num_faces; i++) {
|
||||
tb_emit(&tb, code_atom_floor_tri);
|
||||
tb_emit(& tb, code_atom_floor_tri);
|
||||
}
|
||||
}
|
||||
Slice_U4 tape = tb_end(&tb);
|
||||
Slice_U4 tape = tb_end(& tb);
|
||||
// --- EXECUTION ---
|
||||
B1* prim_cursor = (B1*)r_(pa->buf)[static_mem.active_buf_id] + pa->used;
|
||||
// 2. Fire the Tape Drive (Explicitly bind the workspace variables)
|
||||
tape_run(tape, &prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
|
||||
tape_run(tape, & prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
|
||||
|
||||
// 3. Update C-side state
|
||||
pa->used = (U4)prim_cursor - (U4)r_(pa->buf)[static_mem.active_buf_id];
|
||||
static_mem.floor.rot.y += 5;
|
||||
}
|
||||
// --- TAPE DIAGNOSTICS ---
|
||||
if (1)
|
||||
if (0)
|
||||
{
|
||||
LP_ U4 mem_temp_tape[512];
|
||||
FArena tape_arena;
|
||||
@@ -331,16 +330,16 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
// 2. code_atom_diag_color -> Tests OT and Prim Arena memory
|
||||
// 3. code_atom_diag_gte -> Tests Vertex arrays and GTE Math
|
||||
// tb_emit(&tb, code_atom_diag_yield);
|
||||
tb_emit(&tb, code_atom_diag_color);
|
||||
tb_emit(& tb, code_atom_diag_color);
|
||||
// tb_emit(&tb, code_atom_diag_gte);
|
||||
}
|
||||
}
|
||||
Slice_U4 tape = tb_end(&tb);
|
||||
Slice_U4 tape = tb_end(& tb);
|
||||
|
||||
// Setup Workspace Registers
|
||||
B1* prim_cursor = (B1*)r_(pa->buf)[static_mem.active_buf_id] + pa->used;
|
||||
|
||||
tape_run(tape, &prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
|
||||
tape_run(tape, & prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
|
||||
|
||||
pa->used = (U4)prim_cursor - (U4)r_(pa->buf)[static_mem.active_buf_id];
|
||||
static_mem.floor.rot.y += 5;
|
||||
|
||||
Reference in New Issue
Block a user