mirror of
https://github.com/Ed94/pikuma_ps1.git
synced 2026-06-16 00:52:22 -07:00
WIP: trying to see if I can get this tape execution working
This commit is contained in:
+8
-2
@@ -271,8 +271,14 @@ enum { _C2_OPS_ = 0
|
||||
* - rd: COP2 control register index (0..31) */
|
||||
#define enc_gte_tx(sub, rt, rd) (enc_op(op_cop2) | enc_rs(sub) | enc_rt(rt) | enc_rd(rd))
|
||||
|
||||
#define gte_mt(rt, rd) enc_gte_tx(cop_mt, (rt), (rd)) /* Move GPR (rt) to GTE Control Register (rd) */
|
||||
#define gte_mf(rt, rd) enc_gte_tx(cop_mf, (rt), (rd)) /* Move GTE Control Register (rd) to GPR (rt) */
|
||||
// #define gte_mt(rt, rd) enc_gte_tx(cop_mt, (rt), (rd)) /* Move GPR (rt) to GTE Control Register (rd) */
|
||||
// #define gte_mf(rt, rd) enc_gte_tx(cop_mf, (rt), (rd)) /* Move GTE Control Register (rd) to GPR (rt) */
|
||||
|
||||
/* Explicit GTE Data vs Control Register Transfers */
|
||||
#define gte_mf(rt, rd) enc_gte_tx(0x00, (rt), (rd)) /* Move from GTE Data Reg (e.g. MAC0, OTZ) */
|
||||
#define gte_cf(rt, rd) enc_gte_tx(0x02, (rt), (rd)) /* Move from GTE Control Reg */
|
||||
#define gte_mt(rt, rd) enc_gte_tx(0x04, (rt), (rd)) /* Move to GTE Data Reg (e.g. VXY0) */
|
||||
#define gte_ct(rt, rd) enc_gte_tx(0x06, (rt), (rd)) /* Move to GTE Control Reg (e.g. Matrices) */
|
||||
|
||||
/* COP2 Data Load (lwc2): `lwc2 rt, off(rs)`
|
||||
* Layout: [op_lwc2:6][rs:5][rt:5][imm:16]
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
#ifdef INTELLISENSE_DIRECTIVES
|
||||
# pragma once
|
||||
# include "dsl.h"
|
||||
# include "gcc_asm.h"
|
||||
# include "mips.h"
|
||||
# include "gte.h"
|
||||
# include "memory.h"
|
||||
#endif
|
||||
|
||||
/* R_T8 is our dedicated Tape Pointer (TP) */
|
||||
#define R_TP R_T8
|
||||
#define R_TP_Code R_T8_Code
|
||||
|
||||
/* The 'Yield' sequence for CodeBlobs */
|
||||
#define mips_yield \
|
||||
load_word(R_T9, R_TP, 0) \
|
||||
, add_ui(R_TP, R_TP, 4) \
|
||||
, jump_reg(R_T9) \
|
||||
, nop
|
||||
|
||||
/* The 'Exit' Atom */
|
||||
internal Code CodeBlob_(tape_exit) { jump_reg(rret_addr), nop };
|
||||
|
||||
typedef Slice_(U4);
|
||||
|
||||
FI_ void tape_run(Slice_U4 tape, B1** r_prim_cursor, void* face_cursor, void* vert_base, void* ot_base) {
|
||||
register U4* tp rgcc(R_TP) = tape.ptr;
|
||||
register B1* pcur rgcc(R_T7) = *r_prim_cursor;
|
||||
register void* r_t4 rgcc(R_T4) = face_cursor;
|
||||
register void* r_t5 rgcc(R_T5) = vert_base;
|
||||
register void* r_t6 rgcc(R_T6) = ot_base;
|
||||
|
||||
asm volatile(
|
||||
"lw $25, 0(%0);"
|
||||
"addiu %0, %0, 4;"
|
||||
"jalr $25;"
|
||||
"nop;"
|
||||
: "+r"(tp), "+r"(pcur), "+r"(r_t4), "+r"(r_t5), "+r"(r_t6)
|
||||
:
|
||||
: "at", "v0", "v1", "t0", "t1", "t2", "t3", "t9", "ra", "memory"
|
||||
);
|
||||
|
||||
*r_prim_cursor = pcur;
|
||||
}
|
||||
|
||||
typedef Struct_(TapeBuilder) {FArena* arena;};
|
||||
FI_ TapeBuilder tb_begin(FArena* arena) {return (TapeBuilder){ arena };}
|
||||
|
||||
I_ void tb_emit(TapeBuilder* tb, Code* atom) {
|
||||
U4* slot = farena_push_type(tb->arena, U4);
|
||||
slot[0] = (U4)atom;
|
||||
}
|
||||
|
||||
I_ Slice_U4 tb_end(TapeBuilder* tb) {
|
||||
tb_emit(tb, code_tape_exit);
|
||||
return (Slice_U4){ (U4*)tb->arena->start, tb->arena->used / 4 };
|
||||
}
|
||||
|
||||
internal Code CodeBlob_(atom_set_gte_world) {
|
||||
/* Pop matrix address from tape into R_T3 ($11) */
|
||||
load_word(R_T3, R_TP, 0),
|
||||
add_ui(R_TP, R_TP, 4),
|
||||
|
||||
/* Load 3x3 Rotation + 3x1 Translation from R_T3 into GTE CONTROL Regs (ctc2) */
|
||||
load_word(R_T0, R_T3, 0), load_word(R_T1, R_T3, 4),
|
||||
gte_ct(R_T0, gte_cr_RT11), gte_ct(R_T1, gte_cr_RT12),
|
||||
|
||||
load_word(R_T0, R_T3, 8), load_word(R_T1, R_T3, 12), load_word(R_T2, R_T3, 16),
|
||||
gte_ct(R_T0, gte_cr_RT13), gte_ct(R_T1, gte_cr_RT21), gte_ct(R_T2, gte_cr_RT22),
|
||||
|
||||
load_word(R_T0, R_T3, 20), load_word(R_T1, R_T3, 24), load_word(R_T2, R_T3, 28),
|
||||
gte_ct(R_T0, gte_cr_TRX), gte_ct(R_T1, gte_cr_TRY), gte_ct(R_T2, gte_cr_TRZ),
|
||||
|
||||
mips_yield
|
||||
};
|
||||
|
||||
internal Code CodeBlob_(atom_floor_tri) {
|
||||
/* 1. Load 3 indices from $t4 */
|
||||
load_half_u(R_T0, R_T4, 0),
|
||||
load_half_u(R_T1, R_T4, 2),
|
||||
load_half_u(R_T2, R_T4, 4),
|
||||
|
||||
/* 2. Load Vertices: Addr = Base + (idx * 8). Write to GTE DATA Regs (mtc2) */
|
||||
shift_ll(R_AT, R_T0, 3), add_u(R_AT, R_AT, R_T5),
|
||||
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
||||
gte_mt(R_V0, C2_VXY0), gte_mt(R_V1, C2_VZ0),
|
||||
|
||||
shift_ll(R_AT, R_T1, 3), add_u(R_AT, R_AT, R_T5),
|
||||
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
||||
gte_mt(R_V0, C2_VXY1), gte_mt(R_V1, C2_VZ1),
|
||||
|
||||
shift_ll(R_AT, R_T2, 3), add_u(R_AT, R_AT, R_T5),
|
||||
load_word(R_V0, R_AT, 0), load_word(R_V1, R_AT, 4),
|
||||
gte_mt(R_V0, C2_VXY2), gte_mt(R_V1, C2_VZ2),
|
||||
|
||||
/* 3. RTPT + NCLIP */
|
||||
nop, nop, gte_cmdw_rtpt,
|
||||
nop, nop, gte_cmdw_nclip,
|
||||
nop, nop, /* Wait for NCLIP to finish */
|
||||
|
||||
/* 4. Check NCLIP.
|
||||
If MAC0 <= 0 (Backface), branch to end.
|
||||
Target is 28 instructions past the delay slot. */
|
||||
gte_mf(R_T0, C2_MAC0),
|
||||
branch_le_zero(R_T0, 28),
|
||||
nop, /* <--- DELAY SLOT (Index 0) */
|
||||
|
||||
/* 5. Store Primitive Data */
|
||||
/* 1 */ store_word(R_0, R_T7, 0),
|
||||
/* 2 */ load_ui(R_AT, 0x20FF), /* High: Code 0x20 + Color R:FF */
|
||||
/* 3 */ or_i(R_AT, R_AT, 0xFFFF), /* Low: Color G:FF, B:FF */
|
||||
/* 4 */ store_word(R_AT, R_T7, 4),
|
||||
/* 5 */ enc_gte_sw(C2_SXY0, R_T7, 8),
|
||||
/* 6 */ enc_gte_sw(C2_SXY1, R_T7, 12),
|
||||
/* 7 */ enc_gte_sw(C2_SXY2, R_T7, 16),
|
||||
|
||||
/* 6. OT Insertion with Bounds Checking */
|
||||
/* 8 */ nop,
|
||||
/* 9 */ nop,
|
||||
/* 10 */ enc_gte_cmd(0x2D), /* AVSZ3 */
|
||||
/* 11 */ nop, /* Wait for AVSZ3 */
|
||||
/* 12 */ nop, /* Wait for AVSZ3 */
|
||||
/* 13 */ gte_mf(R_T1, C2_OTZ), /* T1 = Depth index */
|
||||
|
||||
/* Bounds Check: OTZ < 2048 */
|
||||
/* 14 */ load_ui(R_AT, 2048),
|
||||
/* 15 */ slt_u(R_AT, R_T1, R_AT), /* AT = (OTZ < 2048) ? 1 : 0 */
|
||||
/* 16 */ branch_equal(R_AT, R_0, 11), /* If AT == 0, skip to end (11 instrs past delay) */
|
||||
/* 17 */ nop, /* <--- DELAY SLOT (Index 0 for Bounds branch) */
|
||||
|
||||
/* 18 (1) */ shift_ll(R_T1, R_T1, 2),
|
||||
/* 19 (2) */ add_u(R_T1, R_T1, R_T6), /* T1 = &OrderingTable[OTZ] */
|
||||
/* 20 (3) */ load_word(R_AT, R_T1, 0), /* AT = current head */
|
||||
/* 21 (4) */ store_word(R_AT, R_T7, 0), /* prim->next = head */
|
||||
|
||||
/* Create Tag in AT: Len 4 (0x04) in top 8 bits, T7 in bottom 24 */
|
||||
/* 22 (5) */ shift_ll(R_AT, R_T7, 8),
|
||||
/* 23 (6) */ shift_lr(R_AT, R_AT, 8), /* AT = T7 & 0x00FFFFFF */
|
||||
/* 24 (7) */ load_ui(R_V0, 0x0400), /* V0 = 0x04000000 */
|
||||
/* 25 (8) */ or_u(R_AT, R_AT, R_V0), /* AT = Tag */
|
||||
|
||||
/* 26 (9) */ store_word(R_AT, R_T1, 0), /* OrderingTable[OTZ] = Tag */
|
||||
/* 27 (10) */ add_ui(R_T7, R_T7, 20), /* Advance Prim Cursor (5 words) */
|
||||
|
||||
/* 7. Yield */
|
||||
/* 28 (11) */ add_ui(R_T4, R_T4, 8), /* Advance Face Cursor (4 * S2 = 8 bytes) */
|
||||
mips_yield
|
||||
};
|
||||
+13
-7
@@ -246,6 +246,13 @@ enum { _BitOffsets = 0
|
||||
#define xor_i(rt, rs, imm) enc_i(op_xori, (rs), (rt), (imm))
|
||||
#define load_ui(rt, imm) enc_i(op_lui, R_0, (rt), (imm))
|
||||
|
||||
/* Logic Opcodes */
|
||||
|
||||
#define and_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_and)
|
||||
#define or_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_or)
|
||||
#define xor_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_xor)
|
||||
#define nor_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_nor)
|
||||
|
||||
/* Shift family (R-type). shift_ll/lr/ra: `sll rd, rt, shamt` */
|
||||
#define shift_ll(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_sll)
|
||||
#define shift_lr(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_srl)
|
||||
@@ -321,12 +328,13 @@ enum { _BitOffsets = 0
|
||||
* branch_le_zero rs, off → blez rs, off
|
||||
* branch_ge_zero rs, off → bgez rs, off
|
||||
* (For `bgez`, the opcode is `op_bcond` with rt=1 to invert the bltz condition.) */
|
||||
|
||||
#define branch_equal(rs, rt, off) enc_i(op_beq, (rs), (rt), (off))
|
||||
#define branch_ne(rs, rt, off) enc_i(op_bne, (rs), (rt), (off))
|
||||
#define branch_lt_zero(rs, off) enc_i(op_bltz, R_0, (rs), (off))
|
||||
#define branch_gt_zero(rs, off) enc_i(op_bgtz, R_0, (rs), (off))
|
||||
#define branch_le_zero(rs, off) enc_i(op_blez, R_0, (rs), (off))
|
||||
#define branch_ge_zero(rs, off) enc_i(op_bcond, R_0, (rs), (1u << 16) | ((off) & 0xFFFF))
|
||||
#define branch_lt_zero(rs, off) enc_i(op_bcond, (rs), R_0, (off)) /* bltz is bcond with rt=0 */
|
||||
#define branch_ge_zero(rs, off) enc_i(op_bcond, (rs), 1, (off)) /* bgez is bcond with rt=1 */
|
||||
#define branch_le_zero(rs, off) enc_i(op_blez, (rs), R_0, (off)) /* blez has its own opcode, rt=0 */
|
||||
#define branch_gt_zero(rs, off) enc_i(op_bgtz, (rs), R_0, (off)) /* bgtz has its own opcode, rt=0 */
|
||||
|
||||
/* --- System (kernel) instructions --- */
|
||||
#define syscall() enc_r(op_special, R_0, R_0, R_0, 0, fc_syscall)
|
||||
@@ -490,7 +498,7 @@ Code CodeBlob_(mips_flush_icache) {
|
||||
, jump_reg(rret_addr) /* jr $ra */
|
||||
, add_ui(rstack_ptr, rstack_ptr, 8) /* sp += 8 (BD) */
|
||||
};
|
||||
FI_ void mips_flush_icache(void) { C_(VoidFn*, code_mips_flush_icache)(); }
|
||||
I_ void mips_flush_icache(void) { C_(VoidFn*, code_mips_flush_icache)(); }
|
||||
|
||||
/* Standard clobber list for pure-MIPS asm volatile blocks: caller-saved
|
||||
* GPRs that the kernel treats as volatile (v0/v1/t0/t1/ra) plus the
|
||||
@@ -513,5 +521,3 @@ FI_ void mips_flush_icache(void) { C_(VoidFn*, code_mips_flush_icache)(); }
|
||||
void test_mips_asm() {
|
||||
asm_mips_flush_icache();
|
||||
}
|
||||
|
||||
// TAPE & EMITTERS
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "duffle/mips.h"
|
||||
#include "duffle/gp.h"
|
||||
#include "duffle/gte.h"
|
||||
#include "duffle/lottes_tape.h"
|
||||
#include "hello_gte.h"
|
||||
|
||||
enum {
|
||||
@@ -229,6 +230,7 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
static_mem.cube.rot.y += 30;
|
||||
}
|
||||
// Draw Floor
|
||||
if (0)
|
||||
{
|
||||
m3s2_rotation (& static_mem.floor.rot, & static_mem.tform_world);
|
||||
m3s2_translation(& static_mem.tform_world, & static_mem.floor.pos);
|
||||
@@ -282,6 +284,40 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
}
|
||||
static_mem.floor.rot.y += 5;
|
||||
}
|
||||
// Draw floor tape method
|
||||
if (1)
|
||||
{
|
||||
LP_ U4 mem_temp_tape[512]; // Buffer for function addresses
|
||||
FArena tape_arena;
|
||||
farena_init(&tape_arena, slice_ut(mem_temp_tape, S_(mem_temp_tape)));
|
||||
|
||||
TapeBuilder tb = tb_begin(&tape_arena); {
|
||||
// Setup state atoms
|
||||
m3s2_rotation(&static_mem.floor.rot, &static_mem.tform_world);
|
||||
m3s2_translation(&static_mem.tform_world, &static_mem.floor.pos);
|
||||
|
||||
// Push "Protocol" to tape
|
||||
tb_emit(&tb, code_atom_set_gte_world);
|
||||
tb_emit(&tb, (Code*)&static_mem.tform_world);
|
||||
for (U4 i = 0; i < Floor_num_faces; i++) {
|
||||
tb_emit(&tb, code_atom_floor_tri);
|
||||
}
|
||||
}
|
||||
Slice_U4 tape = tb_end(&tb);
|
||||
|
||||
// 1. Setup Argument Registers (The Workspace)
|
||||
register V3_S2* face_cursor rgcc(R_T4) = static_mem.floor.faces;
|
||||
register V3_S2* vert_base rgcc(R_T5) = static_mem.floor.verts;
|
||||
register U4* ot_base rgcc(R_T6) = ordering_buf;
|
||||
// --- EXECUTION ---
|
||||
B1* prim_cursor = (B1*)r_(pa->buf)[static_mem.active_buf_id] + pa->used;
|
||||
// 2. Fire the Tape Drive (Explicitly bind the workspace variables)
|
||||
tape_run(tape, &prim_cursor, static_mem.floor.faces, static_mem.floor.verts, ordering_buf);
|
||||
|
||||
// 3. Update C-side state
|
||||
pa->used = (U4)prim_cursor - (U4)r_(pa->buf)[static_mem.active_buf_id];
|
||||
static_mem.floor.rot.y += 5;
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
|
||||
Reference in New Issue
Block a user