mirror of
https://github.com/Ed94/pikuma_ps1.git
synced 2026-06-22 11:34:59 -07:00
255 lines
10 KiB
C
255 lines
10 KiB
C
#ifdef INTELLISENSE_DIRECTIVES
|
|
# pragma once
|
|
# include "dsl.h"
|
|
# include "gcc_asm.h"
|
|
# include "mips.h"
|
|
# include "gte.h"
|
|
# include "memory.h"
|
|
#endif
|
|
|
|
/* ---------------------------------------------------------------------------
|
|
* TAPE DRIVE ABI & REGISTER ALIASES
|
|
* ---------------------------------------------------------------------------
|
|
* We map the MIPS temporary registers to a persistent global workspace.
|
|
* The C compiler is completely unaware of these bindings.
|
|
* ---------------------------------------------------------------------------*/
|
|
enum {
|
|
R_TapePtr = R_T8, /* The Instruction Stream Pointer */
|
|
R_PrimCur = R_T7, /* VRAM output cursor (primitive buffer) */
|
|
R_FaceCur = R_T4, /* Input data cursor (indices/faces) */
|
|
R_VertBase = R_T5, /* Base address of the vertex array */
|
|
R_OtBase = R_T6, /* Base address of the Ordering Table */
|
|
/* Stringification codes for the GCC inline assembler clobber lists */
|
|
#define R_TapePtr_Code R_T8_Code
|
|
#define R_PrimCur_Code R_T7_Code
|
|
#define R_FaceCur_Code R_T4_Code
|
|
#define R_VertBase_Code R_T5_Code
|
|
#define R_OtBase_Code R_T6_Code
|
|
};
|
|
|
|
/* The 'Yield' sequence for Tape Atoms.
|
|
* Loads the next pointer from the tape, advances the tape, and jumps.
|
|
* Cost: ~ 4 cycles */
|
|
#define mips_yield() \
|
|
load_word(R_T9, R_TapePtr, 0) \
|
|
, add_ui_1( R_TapePtr, 4) \
|
|
, jump_reg( R_T9) \
|
|
, nop
|
|
|
|
/* The 'Exit' Atom */
|
|
MipsAtom_(tape_exit) { jump_reg(rret_addr), nop };
|
|
|
|
typedef Slice_(U4);
|
|
|
|
/* Generalized Tape Engine Runner */
|
|
FI_ void tape_run(Slice_U4 tape) { register U4* tp rgcc(R_TapePtr) = tape.ptr; asm volatile(
|
|
asm_words(
|
|
add_ui( R_SP, R_SP, -8) /* Allocate stack space */
|
|
, store_word(R_RA, R_SP, 0) /* Safely backup $ra to the stack */
|
|
, load_word( R_T9, R_TapePtr, 0) /* Bootstrap the first jump */
|
|
, add_ui_1( R_TapePtr, 4) /* Advance tape */
|
|
, jump_nreg(R_T9) /* jalr $t9 */
|
|
, nop /* Branch delay slot */
|
|
, load_word(R_RA, R_SP, 0) /* Restore $ra from stack */
|
|
, add_ui_1( R_SP, 8) /* Deallocate stack space */
|
|
)
|
|
asm_rpins, r_use(tp)
|
|
asm_clobber:
|
|
rlit(R_AT_Code)
|
|
, rlit(R_V0_Code), rlit(R_V1_Code)
|
|
, rlit(R_T0_Code), rlit(R_T1_Code), rlit(R_T2_Code), rlit(R_T3_Code)
|
|
/* Tell GCC the tape engine owns and destroys the workspace registers */
|
|
, rlit(R_PrimCur_Code), rlit(R_FaceCur_Code), rlit(R_VertBase_Code), rlit(R_OtBase_Code)
|
|
, rlit(R_T9_Code)
|
|
, clb_mem_drain
|
|
); }
|
|
|
|
typedef Struct_(TapeBuilder) { U4 ptr; U4 count; };
|
|
FI_ void tb_init(TapeBuilder* tb, FArena* arena) { tb->ptr = arena->start; tb->count = 0; }
|
|
FI_ TapeBuilder tb_make( FArena* arena) { return (TapeBuilder){ arena->start, 0 }; }
|
|
|
|
FI_ void tb_emit(TapeBuilder* tb, MipsAtom* atom) { u4_r(tb->ptr)[tb->count] = u4_(atom); ++ tb->count; }
|
|
FI_ void tb_data(TapeBuilder* tb, U4 data) { u4_r(tb->ptr)[tb->count] = u4_(data); ++ tb->count; }
|
|
|
|
FI_ Slice_U4 tb_end (TapeBuilder* tb) { tb_emit(tb,code_tape_exit); return (Slice_U4){ C_(U4*,tb->ptr), tb->count }; }
|
|
FI_ Slice_U4 tb_slice(TapeBuilder tb) { return (Slice_U4){ C_(U4*,tb.ptr), tb.count }; }
|
|
#define tb_scope(tb) for(U4 tbs_once=0;tbs_once==0;++tbs_once,tb_emit(tb,code_tape_exit))
|
|
|
|
/* ---------------------------------------------------------------------------
|
|
* MACRO ATOMS (Reusable Assembly Components)
|
|
* These do NOT yield. They are expanded inline inside Tape Atoms.
|
|
* ---------------------------------------------------------------------------*/
|
|
|
|
/* Loads 3 16-bit indices from the face array */
|
|
#define mac_load_tri_indices(r_idx0, r_idx1, r_idx2) \
|
|
load_half_u(r_idx0, R_FaceCur, 0) \
|
|
, load_half_u(r_idx1, R_FaceCur, 2) \
|
|
, load_half_u(r_idx2, R_FaceCur, 4)
|
|
|
|
/* Translates indices to vertex addresses and pushes them to GTE */
|
|
#define mac_load_tri_verts(r_idx0, r_idx1, r_idx2) \
|
|
shift_ll(R_AT, r_idx0, 3), add_u(R_AT, R_AT, R_VertBase), load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), gte_mt(R_V0, C2_VXY0), gte_mt(R_V1, C2_VZ0) \
|
|
, shift_ll(R_AT, r_idx1, 3), add_u(R_AT, R_AT, R_VertBase), load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), gte_mt(R_V0, C2_VXY1), gte_mt(R_V1, C2_VZ1) \
|
|
, shift_ll(R_AT, r_idx2, 3), add_u(R_AT, R_AT, R_VertBase), load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4), gte_mt(R_V0, C2_VXY2), gte_mt(R_V1, C2_VZ2)
|
|
|
|
/* Formats the primitive memory layout (Tag + Color + Coordinates) */
|
|
#define mac_format_prim_f3(color_hi, color_lo) \
|
|
store_word(R_0, R_PrimCur, 0) \
|
|
, load_ui(R_AT, color_hi), or_i(R_AT, R_AT, color_lo) \
|
|
, store_word(R_AT, R_PrimCur, 4) \
|
|
, gte_sw(C2_SXY0, R_PrimCur, 8) \
|
|
, gte_sw(C2_SXY1, R_PrimCur, 12) \
|
|
, gte_sw(C2_SXY2, R_PrimCur, 16)
|
|
|
|
/* Correctly inserts a primitive into the Ordering Table linked list */
|
|
#define mac_insert_ot_tag(r_otz, prim_length) \
|
|
shift_ll( R_T1, r_otz, 2) \
|
|
, add_u( R_T1, R_T1, R_OtBase) /* T1 = &OrderingTable[OTZ] */ \
|
|
, load_word( R_AT, R_T1, 0) /* AT = old_ot_head */ \
|
|
, load_ui( R_V0, prim_length) /* V0 = Length << 24 */ \
|
|
, shift_ll( R_AT, R_AT, 8) /* Strip upper 8 bits from old_ot */ \
|
|
, shift_lr( R_AT, R_AT, 8) \
|
|
, or_u( R_AT, R_AT, R_V0) /* Merge length */ \
|
|
, store_word(R_AT, R_PrimCur, 0) /* prim->tag = old_ot_head */ \
|
|
, shift_ll( R_AT, R_PrimCur, 8) /* AT = PrimCur & 0x00FFFFFF */ \
|
|
, shift_lr( R_AT, R_AT, 8) \
|
|
, store_word(R_AT, R_T1, 0) /* OrderingTable[OTZ] = PrimCur */
|
|
|
|
internal MipsAtom_(bind_workspace) {
|
|
/* Pop 4 arguments from the tape directly into the workspace registers */
|
|
load_word(R_PrimCur, R_TapePtr, 0),
|
|
load_word(R_FaceCur, R_TapePtr, 4),
|
|
load_word(R_VertBase, R_TapePtr, 8),
|
|
load_word(R_OtBase, R_TapePtr, 12),
|
|
add_ui_1( R_TapePtr, 16),
|
|
mips_yield()
|
|
};
|
|
|
|
internal MipsAtom_(sync_prim_cursor) {
|
|
/* Pop the C-struct address and base address from the tape */
|
|
load_word(R_AT, R_TapePtr, 0), /* AT = &pa->used */
|
|
load_word(R_T0, R_TapePtr, 4), /* T0 = prim_base */
|
|
add_ui_1( R_TapePtr, 8),
|
|
/* Calculate byte offset and store directly back to RAM */
|
|
sub_u(R_T0, R_PrimCur, R_T0),
|
|
store_word(R_T0, R_AT, 0),
|
|
mips_yield()
|
|
};
|
|
|
|
internal MipsAtom_(set_gte_world) {
|
|
/* Pop matrix address from tape into R_T3 ($11) */
|
|
load_word(R_T3, R_TapePtr, 0),
|
|
add_ui_1( R_TapePtr, 4),
|
|
|
|
/* Load 3x3 Rotation + 3x1 Translation from R_T3 into GTE CONTROL Regs (ctc2) */
|
|
load_word(R_T0, R_T3, 0), load_word(R_T1, R_T3, 4),
|
|
gte_ct( R_T0, gte_cr_RT11), gte_ct( R_T1, gte_cr_RT12),
|
|
load_word(R_T0, R_T3, 8), load_word(R_T1, R_T3, 12), load_word(R_T2, R_T3, 16),
|
|
gte_ct( R_T0, gte_cr_RT13), gte_ct( R_T1, gte_cr_RT21), gte_ct( R_T2, gte_cr_RT22),
|
|
load_word(R_T0, R_T3, 20), load_word(R_T1, R_T3, 24), load_word(R_T2, R_T3, 28),
|
|
gte_ct( R_T0, gte_cr_TRX), gte_ct( R_T1, gte_cr_TRY), gte_ct( R_T2, gte_cr_TRZ),
|
|
|
|
mips_yield()
|
|
};
|
|
|
|
internal MipsAtom_(floor_tri) {
|
|
mac_load_tri_indices(R_T0, R_T1, R_T2),
|
|
mac_load_tri_verts( R_T0, R_T1, R_T2),
|
|
|
|
/* 3. Execute Math */
|
|
nop, nop, gte_cmdw_rtpt,
|
|
nop, nop, gte_cmdw_nclip,
|
|
nop, nop,
|
|
|
|
/* 4. Culling (Branch forward 29 instructions if Backface) */
|
|
gte_mf(R_T0, C2_MAC0),
|
|
nop,
|
|
branch_le_zero(R_T0, 29),
|
|
nop,
|
|
|
|
/* 5. Format Primitive */
|
|
mac_format_prim_f3(0x20FF, 0xFFFF), /* High: 0x20/B, Low: G/R */
|
|
|
|
/* 6. Calculate Depth */
|
|
nop, nop, gte_cmdw_avsz3,
|
|
nop, nop,
|
|
gte_mf(R_T1, C2_OTZ),
|
|
|
|
/* 7. Bounds Check OTZ < 2048 (Branch forward 13 instructions to skip insertion) */
|
|
add_ui( R_AT, R_0, 2048),
|
|
slt_u( R_AT, R_T1, R_AT),
|
|
branch_equal(R_AT, R_0, 13),
|
|
nop,
|
|
|
|
/* 8. Insert into Ordering Table Linked List */
|
|
mac_insert_ot_tag(R_T1, 0x0400), /* Length = 4 words */
|
|
|
|
add_ui(R_PrimCur, R_PrimCur, 20), /* Advance Prim Cursor (5 words) */
|
|
/* 9. Advance Input Cursor & Yield (Both branch targets land here) */
|
|
add_ui(R_FaceCur, R_FaceCur, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */
|
|
mips_yield()
|
|
};
|
|
|
|
/* DIAGNOSTIC 1: Pure tape loop test */
|
|
internal MipsAtom_(diag_yield) { mips_yield() };
|
|
|
|
/* DIAGNOSTIC 2: Pure memory test (No GTE). Draws a fixed cyan triangle. */
|
|
internal MipsAtom_(diag_color) {
|
|
store_word(R_0, R_T7, 0),
|
|
load_ui( R_AT, 0x20FF), /* High: MipsAtom 0x20 + Color B:FF */
|
|
or_i( R_AT, R_AT, 0xFF00), /* Low: Color G:FF, R:00 (Cyan) */
|
|
store_word(R_AT, R_T7, 4),
|
|
|
|
/* Fake coordinates - Swapped winding order to prevent GPU culling! */
|
|
load_ui(R_AT, 0x0010), or_i(R_AT, R_AT, 0x0010), store_word(R_AT, R_T7, 8), /* (16, 16) */
|
|
load_ui(R_AT, 0x0050), or_i(R_AT, R_AT, 0x0010), store_word(R_AT, R_T7, 12), /* (80, 16) */
|
|
load_ui(R_AT, 0x0010), or_i(R_AT, R_AT, 0x0050), store_word(R_AT, R_T7, 16), /* (16, 80) */
|
|
|
|
add_ui( R_T1, R_0, 10),
|
|
shift_ll(R_T1, R_T1, 2),
|
|
add_u( R_T1, R_T1, R_T6),
|
|
|
|
load_word( R_AT, R_T1, 0),
|
|
load_ui( R_V0, 0x0400), // <--- Fills load delay slot!
|
|
store_word(R_AT, R_T7, 0),
|
|
|
|
shift_ll( R_AT, R_T7, 8), shift_lr(R_AT, R_AT, 8),
|
|
or_u( R_AT, R_AT, R_V0),
|
|
store_word(R_AT, R_T1, 0),
|
|
|
|
add_ui(R_T7, R_T7, 20),
|
|
|
|
mips_yield()
|
|
};
|
|
|
|
/* DIAGNOSTIC 3: Pure GTE test (No Memory Writes) */
|
|
internal MipsAtom_(diag_gte) {
|
|
/* Load 3 indices */
|
|
load_half_u(R_T0, R_T4, 0),
|
|
load_half_u(R_T1, R_T4, 2),
|
|
load_half_u(R_T2, R_T4, 4),
|
|
|
|
/* Load Vertices into GTE */
|
|
shift_ll( R_AT, R_T0, 3), add_u( R_AT, R_AT, R_T5),
|
|
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
|
gte_mt( R_V0, C2_VXY0), gte_mt( R_V1, C2_VZ0),
|
|
|
|
shift_ll( R_AT, R_T1, 3), add_u( R_AT, R_AT, R_T5),
|
|
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
|
gte_mt( R_V0, C2_VXY1), gte_mt( R_V1, C2_VZ1),
|
|
|
|
shift_ll( R_AT, R_T2, 3), add_u( R_AT, R_AT, R_T5),
|
|
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
|
gte_mt( R_V0, C2_VXY2), gte_mt( R_V1, C2_VZ2),
|
|
|
|
/* Run Math */
|
|
nop, nop, gte_cmdw_rtpt,
|
|
nop, nop, gte_cmdw_nclip,
|
|
nop, nop,
|
|
|
|
/* Advance Face Cursor and Yield */
|
|
add_ui(R_T4, R_T4, 8),
|
|
|
|
mips_yield()
|
|
};
|