WIP: still learning

This commit is contained in:
2026-06-01 20:29:48 -04:00
parent 27667a4232
commit e178743ffb
4 changed files with 179 additions and 53 deletions
+58 -4
View File
@@ -254,9 +254,63 @@
#define _INL_98(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36,p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49,p50,p51,p52,p53,p54,p55,p56,p57,p58,p59,p60,p61,p62,p63,p64,p65,p66,p67,p68,p69,p70,p71,p72,p73,p74,p75,p76,p77,p78,p79,p80,p81,p82,p83,p84,p85,p86,p87,p88,p89,p90,p91,p92,p93,p94,p95,p96,p97) ".word " _STR98 : : _OP90,"i"(p90),"i"(p91),"i"(p92),"i"(p93),"i"(p94),"i"(p95),"i"(p96),"i"(p97)
#define _INL_99(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36,p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49,p50,p51,p52,p53,p54,p55,p56,p57,p58,p59,p60,p61,p62,p63,p64,p65,p66,p67,p68,p69,p70,p71,p72,p73,p74,p75,p76,p77,p78,p79,p80,p81,p82,p83,p84,p85,p86,p87,p88,p89,p90,p91,p92,p93,p94,p95,p96,p97,p98) ".word " _STR99 : : _OP90,"i"(p90),"i"(p91),"i"(p92),"i"(p93),"i"(p94),"i"(p95),"i"(p96),"i"(p97),"i"(p98)
/* The AST Builders */
#define asm_clobber(...) : __VA_ARGS__
#define asm_inline(...) m_expand(glue(_INL_, _ASM_COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__))
/* ============================================================================
* AST BUILDERS — assemble a complete inline-asm block
* ============================================================================
*
* A complete GCC inline-asm statement has up to 4 sections separated by `:`:
*
* asm volatile ( "code" : OUTPUTS : INPUTS : CLOBBERS );
*
* Every section-builder below prepends the `:` separator that GCC requires,
* so you can compose them inline without thinking about punctuation. The
* master `asm_block(...)` then wraps the four sections in `asm volatile (...)`.
*
* asm_block(
* asm_code( "..." ),
* asm_out ( "=r"(x), "+m"(y) ), // optional
* asm_in ( "r"(a), "m"(b) ), // optional
* asm_clb ( "$8", "memory" ) // optional
* );
*
* Common idioms (kept for back-compat / terseness):
*
* asm_blob(asm_inline(...), asm_clobber(...)) // 2-section, no I/O
* asm_block(asm_inline(...), , , ) // 4-section, empty
*/
/* The Shell */
/* `asm_code` is a passthrough — it does NOT prepend a colon, since the code
* section is always the first (no separator needed before it). The format
* string + `"i"(...)` operand list are produced by `asm_inline(...)` and
* just pass through unchanged. */
#define asm_code(...) __VA_ARGS__
/* `asm_out` prepends `:` — separates code/outputs/inputs/clobbers */
#define asm_out(...) : __VA_ARGS__
/* `asm_in` prepends `:` */
#define asm_in(...) : __VA_ARGS__
/* `asm_clb` prepends `:` */
#define asm_clb(...) : __VA_ARGS__
/* `asm_clobber` is the legacy single-section name. Kept for existing
* call-sites that put inputs *before* clobbers and want both as one colon-
* prefixed block (i.e. the user wrote `: "r"(x) ... : "..."` by hand). */
#define asm_clobber(...) : __VA_ARGS__
/* `asm_inline(...)` dispatches into `_INL_<count>` to emit up to 99 encoded
* instruction words. This is the "compiled-instruction" form of `asm_code`. */
#define asm_inline(...) m_expand(glue(_INL_, _ASM_COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__))
/* `asm_blob(inlines, clobbers)` — the original 2-section shell. Emits
* `asm volatile ( inlines clobbers )`
* which is the `.word`-only shape (no inputs/outputs): the inlines expand
* to `".word %c0, ..." : : "i"(...)` already including the empty output
* and input sections via their trailing `:`, so clobbers just tacks on the
* end. */
#define asm_blob(inlines, clobbers) asm volatile ( inlines clobbers )
/* `asm_block(code, outs, ins, clb)` — the full 4-section shell. Each
* argument is expected to already include its own leading `:` (via the
* `asm_out` / `asm_in` / `asm_clb` builders) or be empty. The `code`
* argument should NOT have a leading `:`. */
#define asm_block(code, outs, ins, clb) asm volatile ( code outs ins clb )
+44 -7
View File
@@ -119,9 +119,22 @@ enum {
gte_cr_OFX = 30, gte_cr_OFY = 31,
};
/* COP2 (GTE) Transfer Format
* Opcode is always op_cop2. The 'sub' field determines direction (MT/MF). */
#define enc_cop2_tx(sub, rt, rd) enc_op(op_cop2) | enc_rs(sub) | enc_rt(rt) | enc_rd(rd)
/* COP2 (GTE) Transfer Format: ctc2 rt, rd or cfc2 rt, rd
* Layout: [op_cop2:6][sub:5][rt:5][rd:5][0:11]
* - sub: cop_mf (0x00) for cfc2, cop_mt (0x04) for ctc2
* - rt: GPR source/dest
* - rd: COP2 control register index (0..31) */
#define enc_cop2_tx(sub, rt, rd) (enc_op(op_cop2) | enc_rs(sub) | enc_rt(rt) | enc_rd(rd))
/* COP2 Data Load (lwc2): `lwc2 rt, off(rs)`
* Layout: [op_lwc2:6][rs:5][rt:5][imm:16]
* - rs: GPR base address
* - rt: COP2 data register index (0..31)
* - imm: signed 16-bit offset
* NOTE: When `rs` is a runtime register, the encoding cannot be pre-baked
* into a .word — use the string-style `gte_load_v0` macro below instead. */
#define enc_cop2_lwc2(rt, base, off) enc_i(op_lwc2, (base), (rt), (off))
#define enc_cop2_swc2(rt, base, off) enc_i(op_swc2, (base), (rt), (off))
/* GTE Command Format (The math engine trigger)
* Opcode is always MIPS_OP_COP2, RS is always 1 (CO).
@@ -145,7 +158,7 @@ enum {
#define enc_gte_cmd(cmd) (((cmd) & gte_mask_cmd) << gte_shift_cmd)
/* Composite: all six GTE fields + the COP2/CO base. */
#define enc_gte_cmd(sf, mx, v, cv, lm, cmd) ( \
#define enc_gte_cmdw(sf, mx, v, cv, lm, cmd) ( \
gte_cmd_base \
| enc_gte_sf(sf) \
| enc_gte_mx(mx) \
@@ -155,6 +168,26 @@ enum {
| enc_gte_cmd(cmd) \
)
/**
* @brief Loads a single SVECTOR to GTE vector register V0
*
* @details Loads values from an SVECTOR struct to GTE data registers C2_VXY0
* (XY at offset 0) and C2_VZ0 (Z at offset 4) using `lwc2`.
*
* Uses string-style GCC inline asm with `%0` substitution because the
* base register `r0` is a runtime GPR chosen by the compiler — it cannot
* be encoded into a static `.word` constant.
*
* Usage:
* asm_gte_load_v0(svector_ptr);
*/
#define asm_gte_load_v0(r0) asm volatile( \
"lwc2 $0, 0(%0);" \
"lwc2 $1, 4(%0);" \
: \
: "r"(r0) \
)
/* asm_gte_matrix_set_rotation(r0)
*
* Loads the 3x3 rotation matrix at `r0` into the GTE's rotation-matrix
@@ -178,6 +211,10 @@ enum {
* ctc2 $13, $3 ; → C2_RT21
* ctc2 $14, $4 ; → C2_RT22
*
* Uses string-style GCC inline asm with `%0` substitution because the
* base register `r0` is a runtime GPR — the `lw` offsets use literal
* values (0, 4, 8, ...) so only the base register needs substitution.
*
* WARNING: Incomplete by design. The source macro only writes RT11..RT22
* (5 of 9 rotation elements); RT23 and the entire RT3x row are left
* untouched. Real libpsn00b SetRotMatrix writes all 9. Use only when the
@@ -185,7 +222,7 @@ enum {
* get stale-RT2x/RT3x artifacts in RTPS/RTPT/MVMVA output.
*/
#define asm_gte_matrix_set_rotation(r0) \
asm volatile( \
asm volatile( \
asm_inline( \
load_imm(R_T4, r0, 0), \
load_imm(R_T5, r0, 4), \
@@ -199,6 +236,6 @@ enum {
enc_cop2_tx(cop_mt, R_T6, 4) \
) \
asm_clobber( clb_system, "$12", "$13", "$14") \
: \
: "r"(r0) \
: \
: "r"(r0) \
)
+74 -22
View File
@@ -59,9 +59,11 @@ enum {
, op_lw = 0x23 /* Load Word */
, op_lbu = 0x24 /* Load Byte Unsigned */
, op_lhu = 0x25 /* Load Halfword Unsigned */
, op_lwc2 = 0x32 /* Load Word to Coprocessor 2 (GTE) */
, op_sb = 0x28 /* Store Byte */
, op_sh = 0x29 /* Store Halfword */
, op_sw = 0x2B /* Store Word */
, op_swc2 = 0x3A /* Store Word from Coprocessor 2 (GTE) */
, op_load_addr = op_la
, op_load_imm = op_li
@@ -142,21 +144,55 @@ enum { _BitOffsets = 0
/* MIPS I-Type Instruction Format (Immediate/Constant) */
#define enc_i(op, rs, rt, imm) (enc_op(op) | enc_rs(rs) | enc_rt(rt) | enc_imm(imm))
/* COP0 (System) Transfer Format */
#define enc_cop0_tx(sub, rt, rd) enc_op(op_cop0) | enc_rs(sub) | enc_rt(rt) | enc_rd(rd)
/* COP0 (System) Transfer Format: mtc0 rt, rd or mfc0 rt, rd
* `sub` is the COP0 sub-opcode (cop_mf=0 or cop_mt=4), placed in rs slot.
* `rt` is the GPR operand (in rt slot).
* `rd` is the COP0 register index (in rd slot at bits 15..11). */
#define enc_cop0_tx(sub, rt, rd) enc_i(op_cop0, (sub), (rt), ((rd) << 11))
/* COP0 Return From Exception (rfe) */
#define enc_rfe() 0x42000010
#define load_imm(rs,rt,imm) enc_i(op_lw, rs, rt, imm)
#define store_word(rs,rt,imm) enc_i(op_sw, rs, rt, imm)
#define add_ui(rs,rt,imm) enc_i(op_addiu, rs, rt, imm)
#define shift_ll(rs,rt,rd) enc_r(op_special, rs, rt, rd, 0, fc_sll)
/* --- Semantic Encoders (MIPS mnemonics) ---
* Argument order matches the MIPS assembly syntax:
* dest-first, then source operands, then immediate last.
*
* load_word(rt, base, off) → lw rt, off(base)
* store_word(rt, base, off) → sw rt, off(base)
* add_ui(rt, rs, imm) → addiu rt, rs, imm
* shift_ll(rd, rt, shamt) → sll rd, rt, shamt
* jump_reg(rs) → jr rs
* jump_link(rs, rd) → jalr rs (link in rd, default $ra)
* nop() → sll $0, $0, 0
*/
#define load_word(rt, base, off) enc_i(op_lw, (base), (rt), (off))
#define load_byte(rt, base, off) enc_i(op_lb, (base), (rt), (off))
#define load_half(rt, base, off) enc_i(op_lh, (base), (rt), (off))
#define load_byte_u(rt, base, off) enc_i(op_lbu, (base), (rt), (off))
#define load_half_u(rt, base, off) enc_i(op_lhu, (base), (rt), (off))
#define store_word(rt, base, off) enc_i(op_sw, (base), (rt), (off))
#define add_ui(rt, rs, imm) enc_i(op_addiu, (rs), (rt), (imm))
#define andi_op(rt, rs, imm) enc_i(op_andi, (rs), (rt), (imm))
#define ori_op(rt, rs, imm) enc_i(op_ori, (rs), (rt), (imm))
#define xori_op(rt, rs, imm) enc_i(op_xori, (rs), (rt), (imm))
#define lui_op(rt, imm) enc_i(op_lui, R_0, (rt), (imm))
#define jump_reg(rs) enc_r(op_special, rs, R_0, R_0, 0, fc_jr)
#define jump_nreg(rs,rt,rd) enc_r(op_special, rs, rt, rd, 0, fc_jalr)
/* Shift family (R-type). shift_ll/lr/ra: `sll rd, rt, shamt` */
#define shift_ll(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_sll)
#define shift_lr(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_srl)
#define shift_ra(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_sra)
#define nop() shift_ll(rdiscard, rdiscard, rdiscard)
/* jr rs — jump to address in rs. */
#define jump_reg(rs) enc_r(op_special, (rs), R_0, R_0, 0, fc_jr)
/* jalr rs, rd — link in rd (default $ra) and jump to address in rs.
* Layout: [op_special][rs:5][rt=0:5][rd:5][shamt=0:5][fc_jalr=0x09] */
#define jump_link(rs, rd) enc_r(op_special, (rs), R_0, (rd), 0, fc_jalr)
/* Back-compat alias: the old `load_imm` was a misnomer for `lw`. */
#define load_imm(rt, base, off) load_word(rt, base, off)
#define nop() shift_ll(rdiscard, rdiscard, 0)
// FI_ void emit_load_imm(U4 rs, U4 rt, U4 imm) { emit(load_imm()); }
@@ -178,15 +214,28 @@ enum {
bios_table_addr = 0xA0,
};
/* Flushes the Instruction Cache */
/* Flushes the Instruction Cache (PSX A-function 0x44 via BIOS stub at 0xA0).
*
* Sequence (per MIPS ABI; arguments in arg registers, RA pushed to stack):
* 1. sp -= 8; sw $ra, 4($sp) ; save RA
* 2. $a0 = bios_flushcache (arg0)
* 3. $t0 = bios_table_addr ; t0 = &BIOS A-function table
* 4. jalr $t0, $ra ; call BIOS(flushcache)
* nop ; branch delay slot
* 5. lw $ra, 4($sp); jr $ra ; restore & return
* 6. sp += 8
*/
I_
Code CodeBlob_(mips_flush_icache) {
add_ui(rstack_ptr, rstack_ptr, -8),
store_word(rstack_ptr, rret_addr, 4),
add_ui(rdiscard, rret_0, bios_flushcache), add_ui(rdiscard, rtmp_0, bios_table_addr),
jump_nreg(rtmp_0, rdiscard, rret_addr),
nop(), load_imm(rstack_ptr, rret_addr, 4), jump_reg(rret_addr),
add_ui(rstack_ptr, rstack_ptr, 8)
add_ui(rstack_ptr, rstack_ptr, -8), /* sp -= 8 */
store_word(rret_addr, rstack_ptr, 4), /* sw $ra, 4($sp) */
add_ui(rret_0, rdiscard, bios_flushcache), /* addiu $a0, $0, 0x44 */
add_ui(rtmp_0, rdiscard, bios_table_addr), /* addiu $t0, $0, 0xA0 */
jump_link(rtmp_0, rret_addr), /* jalr $t0, $ra */
nop(), /* BD slot */
load_word(rret_addr, rstack_ptr, 4), /* lw $ra, 4($sp) */
jump_reg(rret_addr), /* jr $ra */
add_ui(rstack_ptr, rstack_ptr, 8) /* sp += 8 (BD) */
};
FI_ void mips_flush_icache(void) { C_(VoidFn*, codeblob_mips_flush_icache)(); }
@@ -194,12 +243,15 @@ FI_ void mips_flush_icache(void) { C_(VoidFn*, codeblob_mips_flush_icache)(); }
#define asm_mips_flush_icache() asm volatile( \
asm_inline( \
add_ui(rstack_ptr, rstack_ptr, -8) \
, store_word(rstack_ptr, rret_addr, 4) \
, add_ui(rdiscard, rret_0, bios_flushcache), add_ui(rdiscard, rtmp_0, bios_table_addr) \
, jump_nreg(rtmp_0, rdiscard, rret_addr) \
, nop(), load_imm(rstack_ptr, rret_addr, 4), jump_reg(rret_addr) \
, add_ui(rstack_ptr, rstack_ptr, 8) \
add_ui(rstack_ptr, rstack_ptr, -8) \
, store_word(rret_addr, rstack_ptr, 4) \
, add_ui(rret_0, rdiscard, bios_flushcache) \
, add_ui(rtmp_0, rdiscard, bios_table_addr) \
, jump_link(rtmp_0, rret_addr) \
, nop() \
, load_word(rret_addr, rstack_ptr, 4) \
, jump_reg(rret_addr) \
, add_ui(rstack_ptr, rstack_ptr, 8) \
) \
asm_clobber( clb_system ) \
)
+3 -20
View File
@@ -9,6 +9,7 @@
#include "duffle/memory.h"
#include "duffle/math.h"
#include "duffle/gp.h"
#include "duffle/gte.h"
#include "hello_gte.h"
enum {
@@ -161,26 +162,6 @@ void gp_display_frame(DoubleBuffer* screen_buf, S2* active_buf_id, U4* ordering_
void render(void) {
}
// #define gte_ldv0(r0) \
// __asm__ volatile( \
// "lwc2 $0, 0( %0 );" \
// "lwc2 $1, 4( %0 )" \
// : \
// : "r"(r0))
/**
* @brief Loads a single V3_S2 to GTE vector register V0
*
* @details Loads values from an V3_S2 struct to GTE data registers C2_VXY0
* and C2_VZ0.
*/
// #define gte_ldv0( r0 ) __asm__ volatile ( \
// "lwc2 $0, 0( %0 );" \
// "lwc2 $1, 4( %0 );" \
// : \
// : "r"( r0 ) \
// : "$t0" )
void update(PrimitiveArena* pa, U4* ordering_buf)
{
orderingtbl_clear_reverse(ordering_buf, OrderingTbl_Len);
@@ -262,6 +243,8 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
V3_S2* p1 = & static_mem.floor.verts[face->y];
V3_S2* p2 = & static_mem.floor.verts[face->z];
asm_gte_load_v0(p0);
nclip = rtp_avg_nclip_a3_v3s2(p0, p1, p2
, & tri->p0, & tri->p1, & tri->p2
, & p, & orderingtbl_z, & flag