mirror of
https://github.com/Ed94/pikuma_ps1.git
synced 2026-06-25 12:54:59 -07:00
WIP: still learning
This commit is contained in:
+58
-4
@@ -254,9 +254,63 @@
|
||||
#define _INL_98(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36,p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49,p50,p51,p52,p53,p54,p55,p56,p57,p58,p59,p60,p61,p62,p63,p64,p65,p66,p67,p68,p69,p70,p71,p72,p73,p74,p75,p76,p77,p78,p79,p80,p81,p82,p83,p84,p85,p86,p87,p88,p89,p90,p91,p92,p93,p94,p95,p96,p97) ".word " _STR98 : : _OP90,"i"(p90),"i"(p91),"i"(p92),"i"(p93),"i"(p94),"i"(p95),"i"(p96),"i"(p97)
|
||||
#define _INL_99(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36,p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49,p50,p51,p52,p53,p54,p55,p56,p57,p58,p59,p60,p61,p62,p63,p64,p65,p66,p67,p68,p69,p70,p71,p72,p73,p74,p75,p76,p77,p78,p79,p80,p81,p82,p83,p84,p85,p86,p87,p88,p89,p90,p91,p92,p93,p94,p95,p96,p97,p98) ".word " _STR99 : : _OP90,"i"(p90),"i"(p91),"i"(p92),"i"(p93),"i"(p94),"i"(p95),"i"(p96),"i"(p97),"i"(p98)
|
||||
|
||||
/* The AST Builders */
|
||||
#define asm_clobber(...) : __VA_ARGS__
|
||||
#define asm_inline(...) m_expand(glue(_INL_, _ASM_COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__))
|
||||
/* ============================================================================
|
||||
* AST BUILDERS — assemble a complete inline-asm block
|
||||
* ============================================================================
|
||||
*
|
||||
* A complete GCC inline-asm statement has up to 4 sections separated by `:`:
|
||||
*
|
||||
* asm volatile ( "code" : OUTPUTS : INPUTS : CLOBBERS );
|
||||
*
|
||||
* Every section-builder below prepends the `:` separator that GCC requires,
|
||||
* so you can compose them inline without thinking about punctuation. The
|
||||
* master `asm_block(...)` then wraps the four sections in `asm volatile (...)`.
|
||||
*
|
||||
* asm_block(
|
||||
* asm_code( "..." ),
|
||||
* asm_out ( "=r"(x), "+m"(y) ), // optional
|
||||
* asm_in ( "r"(a), "m"(b) ), // optional
|
||||
* asm_clb ( "$8", "memory" ) // optional
|
||||
* );
|
||||
*
|
||||
* Common idioms (kept for back-compat / terseness):
|
||||
*
|
||||
* asm_blob(asm_inline(...), asm_clobber(...)) // 2-section, no I/O
|
||||
* asm_block(asm_inline(...), , , ) // 4-section, empty
|
||||
*/
|
||||
|
||||
/* The Shell */
|
||||
/* `asm_code` is a passthrough — it does NOT prepend a colon, since the code
|
||||
* section is always the first (no separator needed before it). The format
|
||||
* string + `"i"(...)` operand list are produced by `asm_inline(...)` and
|
||||
* just pass through unchanged. */
|
||||
#define asm_code(...) __VA_ARGS__
|
||||
|
||||
/* `asm_out` prepends `:` — separates code/outputs/inputs/clobbers */
|
||||
#define asm_out(...) : __VA_ARGS__
|
||||
/* `asm_in` prepends `:` */
|
||||
#define asm_in(...) : __VA_ARGS__
|
||||
/* `asm_clb` prepends `:` */
|
||||
#define asm_clb(...) : __VA_ARGS__
|
||||
|
||||
/* `asm_clobber` is the legacy single-section name. Kept for existing
|
||||
* call-sites that put inputs *before* clobbers and want both as one colon-
|
||||
* prefixed block (i.e. the user wrote `: "r"(x) ... : "..."` by hand). */
|
||||
#define asm_clobber(...) : __VA_ARGS__
|
||||
|
||||
/* `asm_inline(...)` dispatches into `_INL_<count>` to emit up to 99 encoded
|
||||
* instruction words. This is the "compiled-instruction" form of `asm_code`. */
|
||||
#define asm_inline(...) m_expand(glue(_INL_, _ASM_COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__))
|
||||
|
||||
/* `asm_blob(inlines, clobbers)` — the original 2-section shell. Emits
|
||||
* `asm volatile ( inlines clobbers )`
|
||||
* which is the `.word`-only shape (no inputs/outputs): the inlines expand
|
||||
* to `".word %c0, ..." : : "i"(...)` already including the empty output
|
||||
* and input sections via their trailing `:`, so clobbers just tacks on the
|
||||
* end. */
|
||||
#define asm_blob(inlines, clobbers) asm volatile ( inlines clobbers )
|
||||
|
||||
/* `asm_block(code, outs, ins, clb)` — the full 4-section shell. Each
|
||||
* argument is expected to already include its own leading `:` (via the
|
||||
* `asm_out` / `asm_in` / `asm_clb` builders) or be empty. The `code`
|
||||
* argument should NOT have a leading `:`. */
|
||||
#define asm_block(code, outs, ins, clb) asm volatile ( code outs ins clb )
|
||||
|
||||
+44
-7
@@ -119,9 +119,22 @@ enum {
|
||||
gte_cr_OFX = 30, gte_cr_OFY = 31,
|
||||
};
|
||||
|
||||
/* COP2 (GTE) Transfer Format
|
||||
* Opcode is always op_cop2. The 'sub' field determines direction (MT/MF). */
|
||||
#define enc_cop2_tx(sub, rt, rd) enc_op(op_cop2) | enc_rs(sub) | enc_rt(rt) | enc_rd(rd)
|
||||
/* COP2 (GTE) Transfer Format: ctc2 rt, rd or cfc2 rt, rd
|
||||
* Layout: [op_cop2:6][sub:5][rt:5][rd:5][0:11]
|
||||
* - sub: cop_mf (0x00) for cfc2, cop_mt (0x04) for ctc2
|
||||
* - rt: GPR source/dest
|
||||
* - rd: COP2 control register index (0..31) */
|
||||
#define enc_cop2_tx(sub, rt, rd) (enc_op(op_cop2) | enc_rs(sub) | enc_rt(rt) | enc_rd(rd))
|
||||
|
||||
/* COP2 Data Load (lwc2): `lwc2 rt, off(rs)`
|
||||
* Layout: [op_lwc2:6][rs:5][rt:5][imm:16]
|
||||
* - rs: GPR base address
|
||||
* - rt: COP2 data register index (0..31)
|
||||
* - imm: signed 16-bit offset
|
||||
* NOTE: When `rs` is a runtime register, the encoding cannot be pre-baked
|
||||
* into a .word — use the string-style `gte_load_v0` macro below instead. */
|
||||
#define enc_cop2_lwc2(rt, base, off) enc_i(op_lwc2, (base), (rt), (off))
|
||||
#define enc_cop2_swc2(rt, base, off) enc_i(op_swc2, (base), (rt), (off))
|
||||
|
||||
/* GTE Command Format (The math engine trigger)
|
||||
* Opcode is always MIPS_OP_COP2, RS is always 1 (CO).
|
||||
@@ -145,7 +158,7 @@ enum {
|
||||
#define enc_gte_cmd(cmd) (((cmd) & gte_mask_cmd) << gte_shift_cmd)
|
||||
|
||||
/* Composite: all six GTE fields + the COP2/CO base. */
|
||||
#define enc_gte_cmd(sf, mx, v, cv, lm, cmd) ( \
|
||||
#define enc_gte_cmdw(sf, mx, v, cv, lm, cmd) ( \
|
||||
gte_cmd_base \
|
||||
| enc_gte_sf(sf) \
|
||||
| enc_gte_mx(mx) \
|
||||
@@ -155,6 +168,26 @@ enum {
|
||||
| enc_gte_cmd(cmd) \
|
||||
)
|
||||
|
||||
/**
|
||||
* @brief Loads a single SVECTOR to GTE vector register V0
|
||||
*
|
||||
* @details Loads values from an SVECTOR struct to GTE data registers C2_VXY0
|
||||
* (XY at offset 0) and C2_VZ0 (Z at offset 4) using `lwc2`.
|
||||
*
|
||||
* Uses string-style GCC inline asm with `%0` substitution because the
|
||||
* base register `r0` is a runtime GPR chosen by the compiler — it cannot
|
||||
* be encoded into a static `.word` constant.
|
||||
*
|
||||
* Usage:
|
||||
* asm_gte_load_v0(svector_ptr);
|
||||
*/
|
||||
#define asm_gte_load_v0(r0) asm volatile( \
|
||||
"lwc2 $0, 0(%0);" \
|
||||
"lwc2 $1, 4(%0);" \
|
||||
: \
|
||||
: "r"(r0) \
|
||||
)
|
||||
|
||||
/* asm_gte_matrix_set_rotation(r0)
|
||||
*
|
||||
* Loads the 3x3 rotation matrix at `r0` into the GTE's rotation-matrix
|
||||
@@ -178,6 +211,10 @@ enum {
|
||||
* ctc2 $13, $3 ; → C2_RT21
|
||||
* ctc2 $14, $4 ; → C2_RT22
|
||||
*
|
||||
* Uses string-style GCC inline asm with `%0` substitution because the
|
||||
* base register `r0` is a runtime GPR — the `lw` offsets use literal
|
||||
* values (0, 4, 8, ...) so only the base register needs substitution.
|
||||
*
|
||||
* WARNING: Incomplete by design. The source macro only writes RT11..RT22
|
||||
* (5 of 9 rotation elements); RT23 and the entire RT3x row are left
|
||||
* untouched. Real libpsn00b SetRotMatrix writes all 9. Use only when the
|
||||
@@ -185,7 +222,7 @@ enum {
|
||||
* get stale-RT2x/RT3x artifacts in RTPS/RTPT/MVMVA output.
|
||||
*/
|
||||
#define asm_gte_matrix_set_rotation(r0) \
|
||||
asm volatile( \
|
||||
asm volatile( \
|
||||
asm_inline( \
|
||||
load_imm(R_T4, r0, 0), \
|
||||
load_imm(R_T5, r0, 4), \
|
||||
@@ -199,6 +236,6 @@ enum {
|
||||
enc_cop2_tx(cop_mt, R_T6, 4) \
|
||||
) \
|
||||
asm_clobber( clb_system, "$12", "$13", "$14") \
|
||||
: \
|
||||
: "r"(r0) \
|
||||
: \
|
||||
: "r"(r0) \
|
||||
)
|
||||
|
||||
+74
-22
@@ -59,9 +59,11 @@ enum {
|
||||
, op_lw = 0x23 /* Load Word */
|
||||
, op_lbu = 0x24 /* Load Byte Unsigned */
|
||||
, op_lhu = 0x25 /* Load Halfword Unsigned */
|
||||
, op_lwc2 = 0x32 /* Load Word to Coprocessor 2 (GTE) */
|
||||
, op_sb = 0x28 /* Store Byte */
|
||||
, op_sh = 0x29 /* Store Halfword */
|
||||
, op_sw = 0x2B /* Store Word */
|
||||
, op_swc2 = 0x3A /* Store Word from Coprocessor 2 (GTE) */
|
||||
|
||||
, op_load_addr = op_la
|
||||
, op_load_imm = op_li
|
||||
@@ -142,21 +144,55 @@ enum { _BitOffsets = 0
|
||||
/* MIPS I-Type Instruction Format (Immediate/Constant) */
|
||||
#define enc_i(op, rs, rt, imm) (enc_op(op) | enc_rs(rs) | enc_rt(rt) | enc_imm(imm))
|
||||
|
||||
/* COP0 (System) Transfer Format */
|
||||
#define enc_cop0_tx(sub, rt, rd) enc_op(op_cop0) | enc_rs(sub) | enc_rt(rt) | enc_rd(rd)
|
||||
/* COP0 (System) Transfer Format: mtc0 rt, rd or mfc0 rt, rd
|
||||
* `sub` is the COP0 sub-opcode (cop_mf=0 or cop_mt=4), placed in rs slot.
|
||||
* `rt` is the GPR operand (in rt slot).
|
||||
* `rd` is the COP0 register index (in rd slot at bits 15..11). */
|
||||
#define enc_cop0_tx(sub, rt, rd) enc_i(op_cop0, (sub), (rt), ((rd) << 11))
|
||||
|
||||
/* COP0 Return From Exception (rfe) */
|
||||
#define enc_rfe() 0x42000010
|
||||
|
||||
#define load_imm(rs,rt,imm) enc_i(op_lw, rs, rt, imm)
|
||||
#define store_word(rs,rt,imm) enc_i(op_sw, rs, rt, imm)
|
||||
#define add_ui(rs,rt,imm) enc_i(op_addiu, rs, rt, imm)
|
||||
#define shift_ll(rs,rt,rd) enc_r(op_special, rs, rt, rd, 0, fc_sll)
|
||||
/* --- Semantic Encoders (MIPS mnemonics) ---
|
||||
* Argument order matches the MIPS assembly syntax:
|
||||
* dest-first, then source operands, then immediate last.
|
||||
*
|
||||
* load_word(rt, base, off) → lw rt, off(base)
|
||||
* store_word(rt, base, off) → sw rt, off(base)
|
||||
* add_ui(rt, rs, imm) → addiu rt, rs, imm
|
||||
* shift_ll(rd, rt, shamt) → sll rd, rt, shamt
|
||||
* jump_reg(rs) → jr rs
|
||||
* jump_link(rs, rd) → jalr rs (link in rd, default $ra)
|
||||
* nop() → sll $0, $0, 0
|
||||
*/
|
||||
#define load_word(rt, base, off) enc_i(op_lw, (base), (rt), (off))
|
||||
#define load_byte(rt, base, off) enc_i(op_lb, (base), (rt), (off))
|
||||
#define load_half(rt, base, off) enc_i(op_lh, (base), (rt), (off))
|
||||
#define load_byte_u(rt, base, off) enc_i(op_lbu, (base), (rt), (off))
|
||||
#define load_half_u(rt, base, off) enc_i(op_lhu, (base), (rt), (off))
|
||||
#define store_word(rt, base, off) enc_i(op_sw, (base), (rt), (off))
|
||||
#define add_ui(rt, rs, imm) enc_i(op_addiu, (rs), (rt), (imm))
|
||||
#define andi_op(rt, rs, imm) enc_i(op_andi, (rs), (rt), (imm))
|
||||
#define ori_op(rt, rs, imm) enc_i(op_ori, (rs), (rt), (imm))
|
||||
#define xori_op(rt, rs, imm) enc_i(op_xori, (rs), (rt), (imm))
|
||||
#define lui_op(rt, imm) enc_i(op_lui, R_0, (rt), (imm))
|
||||
|
||||
#define jump_reg(rs) enc_r(op_special, rs, R_0, R_0, 0, fc_jr)
|
||||
#define jump_nreg(rs,rt,rd) enc_r(op_special, rs, rt, rd, 0, fc_jalr)
|
||||
/* Shift family (R-type). shift_ll/lr/ra: `sll rd, rt, shamt` */
|
||||
#define shift_ll(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_sll)
|
||||
#define shift_lr(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_srl)
|
||||
#define shift_ra(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_sra)
|
||||
|
||||
#define nop() shift_ll(rdiscard, rdiscard, rdiscard)
|
||||
/* jr rs — jump to address in rs. */
|
||||
#define jump_reg(rs) enc_r(op_special, (rs), R_0, R_0, 0, fc_jr)
|
||||
|
||||
/* jalr rs, rd — link in rd (default $ra) and jump to address in rs.
|
||||
* Layout: [op_special][rs:5][rt=0:5][rd:5][shamt=0:5][fc_jalr=0x09] */
|
||||
#define jump_link(rs, rd) enc_r(op_special, (rs), R_0, (rd), 0, fc_jalr)
|
||||
|
||||
/* Back-compat alias: the old `load_imm` was a misnomer for `lw`. */
|
||||
#define load_imm(rt, base, off) load_word(rt, base, off)
|
||||
|
||||
#define nop() shift_ll(rdiscard, rdiscard, 0)
|
||||
|
||||
// FI_ void emit_load_imm(U4 rs, U4 rt, U4 imm) { emit(load_imm()); }
|
||||
|
||||
@@ -178,15 +214,28 @@ enum {
|
||||
bios_table_addr = 0xA0,
|
||||
};
|
||||
|
||||
/* Flushes the Instruction Cache */
|
||||
/* Flushes the Instruction Cache (PSX A-function 0x44 via BIOS stub at 0xA0).
|
||||
*
|
||||
* Sequence (per MIPS ABI; arguments in arg registers, RA pushed to stack):
|
||||
* 1. sp -= 8; sw $ra, 4($sp) ; save RA
|
||||
* 2. $a0 = bios_flushcache (arg0)
|
||||
* 3. $t0 = bios_table_addr ; t0 = &BIOS A-function table
|
||||
* 4. jalr $t0, $ra ; call BIOS(flushcache)
|
||||
* nop ; branch delay slot
|
||||
* 5. lw $ra, 4($sp); jr $ra ; restore & return
|
||||
* 6. sp += 8
|
||||
*/
|
||||
I_
|
||||
Code CodeBlob_(mips_flush_icache) {
|
||||
add_ui(rstack_ptr, rstack_ptr, -8),
|
||||
store_word(rstack_ptr, rret_addr, 4),
|
||||
add_ui(rdiscard, rret_0, bios_flushcache), add_ui(rdiscard, rtmp_0, bios_table_addr),
|
||||
jump_nreg(rtmp_0, rdiscard, rret_addr),
|
||||
nop(), load_imm(rstack_ptr, rret_addr, 4), jump_reg(rret_addr),
|
||||
add_ui(rstack_ptr, rstack_ptr, 8)
|
||||
add_ui(rstack_ptr, rstack_ptr, -8), /* sp -= 8 */
|
||||
store_word(rret_addr, rstack_ptr, 4), /* sw $ra, 4($sp) */
|
||||
add_ui(rret_0, rdiscard, bios_flushcache), /* addiu $a0, $0, 0x44 */
|
||||
add_ui(rtmp_0, rdiscard, bios_table_addr), /* addiu $t0, $0, 0xA0 */
|
||||
jump_link(rtmp_0, rret_addr), /* jalr $t0, $ra */
|
||||
nop(), /* BD slot */
|
||||
load_word(rret_addr, rstack_ptr, 4), /* lw $ra, 4($sp) */
|
||||
jump_reg(rret_addr), /* jr $ra */
|
||||
add_ui(rstack_ptr, rstack_ptr, 8) /* sp += 8 (BD) */
|
||||
};
|
||||
FI_ void mips_flush_icache(void) { C_(VoidFn*, codeblob_mips_flush_icache)(); }
|
||||
|
||||
@@ -194,12 +243,15 @@ FI_ void mips_flush_icache(void) { C_(VoidFn*, codeblob_mips_flush_icache)(); }
|
||||
|
||||
#define asm_mips_flush_icache() asm volatile( \
|
||||
asm_inline( \
|
||||
add_ui(rstack_ptr, rstack_ptr, -8) \
|
||||
, store_word(rstack_ptr, rret_addr, 4) \
|
||||
, add_ui(rdiscard, rret_0, bios_flushcache), add_ui(rdiscard, rtmp_0, bios_table_addr) \
|
||||
, jump_nreg(rtmp_0, rdiscard, rret_addr) \
|
||||
, nop(), load_imm(rstack_ptr, rret_addr, 4), jump_reg(rret_addr) \
|
||||
, add_ui(rstack_ptr, rstack_ptr, 8) \
|
||||
add_ui(rstack_ptr, rstack_ptr, -8) \
|
||||
, store_word(rret_addr, rstack_ptr, 4) \
|
||||
, add_ui(rret_0, rdiscard, bios_flushcache) \
|
||||
, add_ui(rtmp_0, rdiscard, bios_table_addr) \
|
||||
, jump_link(rtmp_0, rret_addr) \
|
||||
, nop() \
|
||||
, load_word(rret_addr, rstack_ptr, 4) \
|
||||
, jump_reg(rret_addr) \
|
||||
, add_ui(rstack_ptr, rstack_ptr, 8) \
|
||||
) \
|
||||
asm_clobber( clb_system ) \
|
||||
)
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "duffle/memory.h"
|
||||
#include "duffle/math.h"
|
||||
#include "duffle/gp.h"
|
||||
#include "duffle/gte.h"
|
||||
#include "hello_gte.h"
|
||||
|
||||
enum {
|
||||
@@ -161,26 +162,6 @@ void gp_display_frame(DoubleBuffer* screen_buf, S2* active_buf_id, U4* ordering_
|
||||
void render(void) {
|
||||
}
|
||||
|
||||
// #define gte_ldv0(r0) \
|
||||
// __asm__ volatile( \
|
||||
// "lwc2 $0, 0( %0 );" \
|
||||
// "lwc2 $1, 4( %0 )" \
|
||||
// : \
|
||||
// : "r"(r0))
|
||||
|
||||
/**
|
||||
* @brief Loads a single V3_S2 to GTE vector register V0
|
||||
*
|
||||
* @details Loads values from an V3_S2 struct to GTE data registers C2_VXY0
|
||||
* and C2_VZ0.
|
||||
*/
|
||||
// #define gte_ldv0( r0 ) __asm__ volatile ( \
|
||||
// "lwc2 $0, 0( %0 );" \
|
||||
// "lwc2 $1, 4( %0 );" \
|
||||
// : \
|
||||
// : "r"( r0 ) \
|
||||
// : "$t0" )
|
||||
|
||||
void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
{
|
||||
orderingtbl_clear_reverse(ordering_buf, OrderingTbl_Len);
|
||||
@@ -262,6 +243,8 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
V3_S2* p1 = & static_mem.floor.verts[face->y];
|
||||
V3_S2* p2 = & static_mem.floor.verts[face->z];
|
||||
|
||||
asm_gte_load_v0(p0);
|
||||
|
||||
nclip = rtp_avg_nclip_a3_v3s2(p0, p1, p2
|
||||
, & tri->p0, & tri->p1, & tri->p2
|
||||
, & p, & orderingtbl_z, & flag
|
||||
|
||||
Reference in New Issue
Block a user