mirror of
https://github.com/Ed94/pikuma_ps1.git
synced 2026-06-20 02:32:21 -07:00
hell yes.
This commit is contained in:
@@ -29,6 +29,26 @@
|
||||
|
||||
#define asm __asm__
|
||||
#define align_(value) __attribute__((aligned (value))) // for easy alignment
|
||||
|
||||
/* reg_str(n) — Stringify an integer register id into the GCC asm
|
||||
* string form (e.g. 12 → "$12"). Use this anywhere GCC's parser
|
||||
* expects a literal string identifying a register: clobber lists,
|
||||
* asm templates, etc. The two-level macro is the standard preprocessor
|
||||
* idiom for forcing one level of expansion before stringify — without
|
||||
* it, `#n` would stringify the macro name `R_T4` to `"R_T4"` instead
|
||||
* of expanding `R_T4` to its value first.
|
||||
*
|
||||
* For declaring a register variable bound to a specific GPR, use the
|
||||
* `rgcc(n)` bundle from gcc_asm.h instead — it adds the `__asm__()`
|
||||
* qualifier around the string.
|
||||
*
|
||||
* register V3_S2* p0 __asm__(reg_str(R_T4)) = ...; // verbose
|
||||
* register V3_S2* p0 rgcc(R_T4) = ...; // bundled
|
||||
*
|
||||
* asm volatile("nop" : : : reg_str(R_RA), "memory"); // clobber list */
|
||||
#define reg_str_(n) "$" #n
|
||||
#define reg_str(n) reg_str_(n)
|
||||
#define align_(value) __attribute__((aligned (value))) // for easy alignment
|
||||
#define C_(type,data) ((type)(data)) // for enforced precedence
|
||||
#define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path
|
||||
#define I_ internal inline
|
||||
|
||||
@@ -326,3 +326,58 @@
|
||||
*
|
||||
* 3 colons total. Always valid. */
|
||||
#define asm_inline(...) m_expand(glue(_INL_, _ASM_COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__))
|
||||
|
||||
/* ------------------------------------------------------------------------ *
|
||||
* rgcc(n) — GCC-specific bundle for register-variable declarations.
|
||||
*
|
||||
* Produces `__asm__(reg_str(tmpl(n, Code)))` at expansion time. The
|
||||
* `tmpl(n, Code)` indirection derives the preprocessor-visible `_Code`
|
||||
* form from the enum name (which the preprocessor can't expand on
|
||||
* its own). So a call like
|
||||
*
|
||||
* register V3_S2* p rgcc(R_T4) = verts[0].ptr;
|
||||
*
|
||||
* expands (via tmpl) to
|
||||
*
|
||||
* register V3_S2* p __asm__(reg_str(R_T4_Code))
|
||||
* = verts[0].ptr;
|
||||
*
|
||||
* which (via reg_str) becomes
|
||||
*
|
||||
* register V3_S2* p __asm__("$12") = verts[0].ptr;
|
||||
*
|
||||
* Why bundle the `__asm__()` wrapper?
|
||||
* - The integer R_T4 (= 12, via R_T4_Code) is the canonical truth.
|
||||
* - The string "$12" is derived from it via reg_str, so they
|
||||
* cannot drift apart.
|
||||
* - Spelling `__asm__(reg_str(R_T4_Code))` at every call site is
|
||||
* noise. `rgcc(R_T4)` says what you mean.
|
||||
*
|
||||
* The two-level form (rgcc_/rgcc) is the standard preprocessor idiom
|
||||
* for forcing one level of expansion before the bundle's `__asm__`
|
||||
* token is written; without it, `rgcc(R_T4)` would expand to
|
||||
* `__asm__(reg_str(tmpl(R_T4, Code)))` but the inner `tmpl(R_T4, Code)`
|
||||
* would token-paste prematurely.
|
||||
*
|
||||
* Layering: reg_str lives in dsl.h (the integer-to-string primitive,
|
||||
* compiler-agnostic in name). tmpl lives in dsl.h (the token-paste
|
||||
* glue). rgcc lives here (gcc_asm.h) because the `__asm__` keyword
|
||||
* is GCC-specific. Anyone porting to a different compiler's asm
|
||||
* dialect overrides rgcc, and the integer→string derivation in
|
||||
* reg_str can be retargeted in one place.
|
||||
*
|
||||
* For clobber lists and asm-template strings, use the bare
|
||||
* `reg_str(R_T4_Code)` — you don't want __asm__() there, you just
|
||||
* want the string.
|
||||
* ------------------------------------------------------------------------ */
|
||||
#define rgcc_(n) __asm__(reg_str(tmpl(n, Code)))
|
||||
#define rgcc(n) rgcc_(n)
|
||||
|
||||
/* rgcc_ref(n) — GCC operand-reference form "%N". Not currently used
|
||||
* by the placeholder-pun macros (the .word bodies are fully baked
|
||||
* at compile time and have no runtime operand references), but kept
|
||||
* here for completeness in case a future asm template needs to refer
|
||||
* to a runtime input by position. Mirror of rgcc but produces "%N"
|
||||
* instead of "$N". */
|
||||
#define rgcc_ref_(n) "%" #n
|
||||
#define rgcc_ref(n) rgcc_ref_(n)
|
||||
|
||||
+193
-66
@@ -42,8 +42,18 @@
|
||||
* asm_clobber( clb_system )
|
||||
* );
|
||||
*
|
||||
* // Runtime-base-register load — uses R_T4 ($12) under the hood:
|
||||
* gte_load_v0( my_svector_ptr );
|
||||
* // Runtime-base-register load — caller picks the base GPR:
|
||||
* register V3_S2* p_in_12 __asm__("$12") = verts[0].ptr;
|
||||
* gte_load_v0(p_in_12, R_T4); // R_T4 = 12 = $t4 = $12
|
||||
*
|
||||
* // Three independent bases for an RTPT pipeline:
|
||||
* register V3_S2* p0 __asm__("$12") = verts[0].ptr;
|
||||
* register V3_S2* p1 __asm__("$13") = verts[1].ptr;
|
||||
* register V3_S2* p2 __asm__("$14") = verts[2].ptr;
|
||||
* gte_load_v0(p0, R_T4);
|
||||
* gte_load_v1(p1, R_T5);
|
||||
* gte_load_v2(p2, R_T6);
|
||||
* gte_rtpt();
|
||||
*
|
||||
* STYLE NOTES
|
||||
* -----------
|
||||
@@ -65,16 +75,53 @@
|
||||
|
||||
/* C2 data registers */
|
||||
|
||||
/* --- GTE Data Registers (Coprocessor 2) --- */
|
||||
/* --- GTE Data Registers (Coprocessor 2) ---
|
||||
* Preprocessor-visible integer ids for the COP2 data register file.
|
||||
* Each enum value is bound to a parallel `_Code` `#define` so the
|
||||
* preprocessor can stringify the integer (for `reg_str`/`rgcc` paths).
|
||||
* Same pattern as the GPR `_Code` set in mips.h. */
|
||||
#define C2_VXY0_Code 0
|
||||
#define C2_VZ0_Code 1
|
||||
#define C2_VXY1_Code 2
|
||||
#define C2_VZ1_Code 3
|
||||
#define C2_VXY2_Code 4
|
||||
#define C2_VZ2_Code 5
|
||||
#define C2_RGB_Code 6
|
||||
#define C2_OTZ_Code 7
|
||||
#define C2_IR0_Code 8
|
||||
#define C2_IR1_Code 9
|
||||
#define C2_IR2_Code 10
|
||||
#define C2_IR3_Code 11
|
||||
#define C2_SXY0_Code 12
|
||||
#define C2_SXY1_Code 13
|
||||
#define C2_SXY2_Code 14
|
||||
#define C2_SXYP_Code 15
|
||||
#define C2_SZ0_Code 16
|
||||
#define C2_SZ1_Code 17
|
||||
#define C2_SZ2_Code 18
|
||||
#define C2_SZ3_Code 19
|
||||
#define C2_RGB0_Code 20
|
||||
#define C2_RGB1_Code 21
|
||||
#define C2_RGB2_Code 22
|
||||
#define C2_RES1_Code 23
|
||||
#define C2_MAC0_Code 24
|
||||
#define C2_MAC1_Code 25
|
||||
#define C2_MAC2_Code 26
|
||||
#define C2_MAC3_Code 27
|
||||
#define C2_IRGB_Code 28
|
||||
#define C2_ORGB_Code 29
|
||||
#define C2_LZCS_Code 30
|
||||
#define C2_LZCR_Code 31
|
||||
|
||||
enum {
|
||||
C2_VXY0 = 0, C2_VZ0 = 1, C2_VXY1 = 2, C2_VZ1 = 3,
|
||||
C2_VXY2 = 4, C2_VZ2 = 5, C2_RGB = 6, C2_OTZ = 7,
|
||||
C2_IR0 = 8, C2_IR1 = 9, C2_IR2 = 10, C2_IR3 = 11,
|
||||
C2_SXY0 = 12, C2_SXY1 = 13, C2_SXY2 = 14, C2_SXYP = 15,
|
||||
C2_SZ0 = 16, C2_SZ1 = 17, C2_SZ2 = 18, C2_SZ3 = 19,
|
||||
C2_RGB0 = 20, C2_RGB1 = 21, C2_RGB2 = 22, C2_RES1 = 23,
|
||||
C2_MAC0 = 24, C2_MAC1 = 25, C2_MAC2 = 26, C2_MAC3 = 27,
|
||||
C2_IRGB = 28, C2_ORGB = 29, C2_LZCS = 30, C2_LZCR = 31
|
||||
C2_VXY0 = C2_VXY0_Code, C2_VZ0 = C2_VZ0_Code, C2_VXY1 = C2_VXY1_Code, C2_VZ1 = C2_VZ1_Code,
|
||||
C2_VXY2 = C2_VXY2_Code, C2_VZ2 = C2_VZ2_Code, C2_RGB = C2_RGB_Code, C2_OTZ = C2_OTZ_Code,
|
||||
C2_IR0 = C2_IR0_Code, C2_IR1 = C2_IR1_Code, C2_IR2 = C2_IR2_Code, C2_IR3 = C2_IR3_Code,
|
||||
C2_SXY0 = C2_SXY0_Code, C2_SXY1 = C2_SXY1_Code, C2_SXY2 = C2_SXY2_Code, C2_SXYP = C2_SXYP_Code,
|
||||
C2_SZ0 = C2_SZ0_Code, C2_SZ1 = C2_SZ1_Code, C2_SZ2 = C2_SZ2_Code, C2_SZ3 = C2_SZ3_Code,
|
||||
C2_RGB0 = C2_RGB0_Code, C2_RGB1 = C2_RGB1_Code, C2_RGB2 = C2_RGB2_Code, C2_RES1 = C2_RES1_Code,
|
||||
C2_MAC0 = C2_MAC0_Code, C2_MAC1 = C2_MAC1_Code, C2_MAC2 = C2_MAC2_Code, C2_MAC3 = C2_MAC3_Code,
|
||||
C2_IRGB = C2_IRGB_Code, C2_ORGB = C2_ORGB_Code, C2_LZCS = C2_LZCS_Code, C2_LZCR = C2_LZCR_Code
|
||||
};
|
||||
|
||||
/* Semantic Aliases for GTE Data Registers */
|
||||
@@ -162,19 +209,53 @@ enum {
|
||||
gte_shift_cmd = 0, gte_width_cmd = 6, gte_mask_cmd = 0x3F,
|
||||
};
|
||||
|
||||
/* --- GTE Control Register Indices (for ctc2/cfc2) --- */
|
||||
/* --- GTE Control Register Indices (for ctc2/cfc2) ---
|
||||
* Preprocessor-visible integer ids for the COP2 control register file.
|
||||
* Each enum value is bound to a parallel `_Code` `#define` so the
|
||||
* preprocessor can stringify the integer (for `reg_str`/`rgcc` paths).
|
||||
* Same pattern as the GPR `_Code` set in mips.h. Note: indices 21-23
|
||||
* are reserved/unused on real hardware, so there's a gap. */
|
||||
#define gte_cr_RT11_Code 0
|
||||
#define gte_cr_RT12_Code 1
|
||||
#define gte_cr_RT13_Code 2
|
||||
#define gte_cr_RT21_Code 3
|
||||
#define gte_cr_RT22_Code 4
|
||||
#define gte_cr_RT23_Code 5
|
||||
#define gte_cr_RT31_Code 6
|
||||
#define gte_cr_RT32_Code 7
|
||||
#define gte_cr_RT33_Code 8
|
||||
#define gte_cr_TRX_Code 9
|
||||
#define gte_cr_TRY_Code 10
|
||||
#define gte_cr_TRZ_Code 11
|
||||
#define gte_cr_L11_Code 12
|
||||
#define gte_cr_L12_Code 13
|
||||
#define gte_cr_L13_Code 14
|
||||
#define gte_cr_L21_Code 15
|
||||
#define gte_cr_L22_Code 16
|
||||
#define gte_cr_L23_Code 17
|
||||
#define gte_cr_LR1_Code 18
|
||||
#define gte_cr_LR2_Code 19
|
||||
#define gte_cr_LR3_Code 20
|
||||
#define gte_cr_RBK_Code 24
|
||||
#define gte_cr_GBK_Code 25
|
||||
#define gte_cr_BBK_Code 26
|
||||
#define gte_cr_RFC_Code 27
|
||||
#define gte_cr_GFC_Code 28
|
||||
#define gte_cr_BFC_Code 29
|
||||
#define gte_cr_OFX_Code 30
|
||||
#define gte_cr_OFY_Code 31
|
||||
|
||||
enum {
|
||||
gte_cr_RT11 = 0, gte_cr_RT12 = 1, gte_cr_RT13 = 2,
|
||||
gte_cr_RT21 = 3, gte_cr_RT22 = 4, gte_cr_RT23 = 5,
|
||||
gte_cr_RT31 = 6, gte_cr_RT32 = 7, gte_cr_RT33 = 8,
|
||||
gte_cr_TRX = 9, gte_cr_TRY = 10, gte_cr_TRZ = 11,
|
||||
gte_cr_L11 = 12, gte_cr_L12 = 13, gte_cr_L13 = 14,
|
||||
gte_cr_L21 = 15, gte_cr_L22 = 16, gte_cr_L23 = 17,
|
||||
gte_cr_LR1 = 18, gte_cr_LR2 = 19, gte_cr_LR3 = 20,
|
||||
gte_cr_RBK = 24, gte_cr_GBK = 25, gte_cr_BBK = 26,
|
||||
gte_cr_RFC = 27, gte_cr_GFC = 28, gte_cr_BFC = 29,
|
||||
gte_cr_OFX = 30, gte_cr_OFY = 31,
|
||||
gte_cr_RT11 = gte_cr_RT11_Code, gte_cr_RT12 = gte_cr_RT12_Code, gte_cr_RT13 = gte_cr_RT13_Code,
|
||||
gte_cr_RT21 = gte_cr_RT21_Code, gte_cr_RT22 = gte_cr_RT22_Code, gte_cr_RT23 = gte_cr_RT23_Code,
|
||||
gte_cr_RT31 = gte_cr_RT31_Code, gte_cr_RT32 = gte_cr_RT32_Code, gte_cr_RT33 = gte_cr_RT33_Code,
|
||||
gte_cr_TRX = gte_cr_TRX_Code, gte_cr_TRY = gte_cr_TRY_Code, gte_cr_TRZ = gte_cr_TRZ_Code,
|
||||
gte_cr_L11 = gte_cr_L11_Code, gte_cr_L12 = gte_cr_L12_Code, gte_cr_L13 = gte_cr_L13_Code,
|
||||
gte_cr_L21 = gte_cr_L21_Code, gte_cr_L22 = gte_cr_L22_Code, gte_cr_L23 = gte_cr_L23_Code,
|
||||
gte_cr_LR1 = gte_cr_LR1_Code, gte_cr_LR2 = gte_cr_LR2_Code, gte_cr_LR3 = gte_cr_LR3_Code,
|
||||
gte_cr_RBK = gte_cr_RBK_Code, gte_cr_GBK = gte_cr_GBK_Code, gte_cr_BBK = gte_cr_BBK_Code,
|
||||
gte_cr_RFC = gte_cr_RFC_Code, gte_cr_GFC = gte_cr_GFC_Code, gte_cr_BFC = gte_cr_BFC_Code,
|
||||
gte_cr_OFX = gte_cr_OFX_Code, gte_cr_OFY = gte_cr_OFY_Code,
|
||||
};
|
||||
|
||||
enum { _C2_OPS_ = 0
|
||||
@@ -246,67 +327,102 @@ enum { _C2_OPS_ = 0
|
||||
* asm_gte_load_v0(svector_ptr);
|
||||
*/
|
||||
|
||||
/* Pre-baked constants: lwc2 $N, off($12) — plain integers the C compiler
|
||||
* constant-folds into .word directives. The R_T4 in the name reminds you
|
||||
* that the `rs` field is baked to R_T4 ($12), forcing the placeholder-pun
|
||||
* pattern below. */
|
||||
#define gte_lwc2_v0_RT4 enc_cop2_lwc2(gte_in_v0_xy, R_T4, 0)
|
||||
#define gte_lwc2_v0z_RT4 enc_cop2_lwc2(gte_in_v0_z, R_T4, 4)
|
||||
#define gte_lwc2_v1_RT4 enc_cop2_lwc2(gte_in_v1_xy, R_T4, 0)
|
||||
#define gte_lwc2_v1z_RT4 enc_cop2_lwc2(gte_in_v1_z, R_T4, 4)
|
||||
#define gte_lwc2_v2_RT4 enc_cop2_lwc2(gte_in_v2_xy, R_T4, 0)
|
||||
#define gte_lwc2_v2z_RT4 enc_cop2_lwc2(gte_in_v2_z, R_T4, 4)
|
||||
|
||||
/* gte_load_vN(r_ptr) — placeholder-punned lwc2 loaders
|
||||
/* Pre-baked lwc2 encoding helpers parameterized on the base GPR.
|
||||
*
|
||||
* Each emits a small sequence of `.word` constants that encode `lwc2 $N,
|
||||
* off($12)` for the chosen GTE vector register. The base register is
|
||||
* forced to be R_T4 ($12) at runtime via:
|
||||
* - `"r"(r_ptr)`: GCC picks a GPR for `r_ptr`
|
||||
* - `"$12"` in the clobber list: GCC can't put any other live value in $12
|
||||
* - Net effect: GCC must place `r_ptr` in $12, the register the .word
|
||||
* constants expect.
|
||||
* gte_lwc2_v0(base) → lwc2 $0, 0(base) ; C2_VXY0
|
||||
* gte_lwc2_v0z(base) → lwc2 $1, 4(base) ; C2_VZ0
|
||||
* gte_lwc2_v1(base) → lwc2 $2, 0(base) ; C2_VXY1
|
||||
* gte_lwc2_v1z(base) → lwc2 $3, 4(base) ; C2_VZ1
|
||||
* gte_lwc2_v2(base) → lwc2 $4, 0(base) ; C2_VXY2
|
||||
* gte_lwc2_v2z(base) → lwc2 $5, 4(base) ; C2_VZ2
|
||||
*
|
||||
* `base` is the GPR number to bake into the .word constant's `rs` field.
|
||||
* These are pure compile-time integers; the C compiler constant-folds
|
||||
* them into .word directives. */
|
||||
#define gte_lwc2_v0(base) enc_cop2_lwc2(gte_in_v0_xy, (base), 0)
|
||||
#define gte_lwc2_v0z(base) enc_cop2_lwc2(gte_in_v0_z, (base), 4)
|
||||
#define gte_lwc2_v1(base) enc_cop2_lwc2(gte_in_v1_xy, (base), 0)
|
||||
#define gte_lwc2_v1z(base) enc_cop2_lwc2(gte_in_v1_z, (base), 4)
|
||||
#define gte_lwc2_v2(base) enc_cop2_lwc2(gte_in_v2_xy, (base), 0)
|
||||
#define gte_lwc2_v2z(base) enc_cop2_lwc2(gte_in_v2_z, (base), 4)
|
||||
|
||||
/* gte_load_vN(r_ptr, base) — placeholder-punned lwc2 loaders
|
||||
*
|
||||
* Emits `.word` constants encoding `lwc2 $N, off(<base>)` for the chosen
|
||||
* GTE vector register, where `<base>` is the GPR number you pass in
|
||||
* (typically one of R_T4..R_T9 for the standard "3-pointer" pattern).
|
||||
*
|
||||
* The caller MUST bind `r_ptr` to that same GPR via a register variable:
|
||||
*
|
||||
* register V3_S2* p_in_12 __asm__("$12") = my_ptr;
|
||||
* gte_load_v0(p_in_12, R_T4); // R_T4 = 12, base is $12
|
||||
*
|
||||
* Then `"r"(r_ptr)` inside the asm binds to $12 (the only register
|
||||
* `p_in_12` can live in), which is exactly the register the .word
|
||||
* constants expect. A `"$12"` clobber would conflict with the
|
||||
* register-variable binding ("asm specifier for variable conflicts
|
||||
* with asm clobber list"), so we omit it. The other ABI-clobbers
|
||||
* ($2/$8/$9/$31) stay because the GTE instructions don't touch
|
||||
* caller-saved GPRs but the kernel does treat them as volatile.
|
||||
*
|
||||
* WHICH REGISTER TO PICK
|
||||
* ----------------------
|
||||
* Any caller-saved GPR is safe. Recommended default for an RTPT-style
|
||||
* 3-pointer pipeline:
|
||||
* gte_load_v0(p0, R_T4); // $12
|
||||
* gte_load_v1(p1, R_T5); // $13
|
||||
* gte_load_v2(p2, R_T6); // $14
|
||||
* Avoid $0 (zero), $1 (at), $26/$27 (k0/k1), $28-$31 (gp/sp/fp/ra).
|
||||
*
|
||||
* Shape of the generated `asm volatile (...)`:
|
||||
* code section : ".word %0, %1" (from asm_inline)
|
||||
* outputs section : (empty, the 2nd colon)
|
||||
* inputs section : "i"(w0), "i"(w1), "r"(r_ptr)
|
||||
* clobbers section : "$2", "$8", ..., "$12" (from asm_clobber)
|
||||
* inputs section : "i"(w0), "i"(w1), "r"(r_ptr) — r_ptr bound to <base>
|
||||
* clobbers section : "$2", "$8", ..., "memory" (from asm_clobber)
|
||||
* 3 colons total, GCC-legal. No string-syntax mnemonics in the .word body.
|
||||
*
|
||||
* The `asm_clobber(...)` helper from gcc_asm.h prepends the colon that
|
||||
* starts the clobbers section. */
|
||||
#define gte_load_v0(r_ptr) \
|
||||
#define gte_load_v0(r_ptr, base) \
|
||||
asm volatile( \
|
||||
asm_inline( gte_lwc2_v0_RT4, gte_lwc2_v0z_RT4 ) \
|
||||
asm_inline( gte_lwc2_v0(base), gte_lwc2_v0z(base) ) \
|
||||
, "r"(r_ptr) \
|
||||
asm_clobber( "$2", "$8", "$9", "$31", "memory", "$12" ) \
|
||||
asm_clobber( reg_str(R_V0_Code), reg_str(R_T0_Code), reg_str(R_T1_Code), reg_str(R_RA_Code), "memory" ) \
|
||||
)
|
||||
|
||||
#define gte_load_v1(r_ptr) \
|
||||
#define gte_load_v1(r_ptr, base) \
|
||||
asm volatile( \
|
||||
asm_inline( gte_lwc2_v1_RT4, gte_lwc2_v1z_RT4 ) \
|
||||
asm_inline( gte_lwc2_v1(base), gte_lwc2_v1z(base) ) \
|
||||
, "r"(r_ptr) \
|
||||
asm_clobber( "$2", "$8", "$9", "$31", "memory", "$12" ) \
|
||||
asm_clobber( reg_str(R_V0_Code), reg_str(R_T0_Code), reg_str(R_T1_Code), reg_str(R_RA_Code), "memory" ) \
|
||||
)
|
||||
|
||||
#define gte_load_v2(r_ptr) \
|
||||
#define gte_load_v2(r_ptr, base) \
|
||||
asm volatile( \
|
||||
asm_inline( gte_lwc2_v2_RT4, gte_lwc2_v2z_RT4 ) \
|
||||
asm_inline( gte_lwc2_v2(base), gte_lwc2_v2z(base) ) \
|
||||
, "r"(r_ptr) \
|
||||
asm_clobber( "$2", "$8", "$9", "$31", "memory", "$12" ) \
|
||||
asm_clobber( reg_str(R_V0_Code), reg_str(R_T0_Code), reg_str(R_T1_Code), reg_str(R_RA_Code), "memory" ) \
|
||||
)
|
||||
|
||||
/* gte_load_v0v1v2(r_ptr) — the canonical prelude to gte_cmd_rtpt.
|
||||
* Loads all three GTE input vectors (6 words) from a contiguous array
|
||||
* of three SVECTORs (24 bytes total). */
|
||||
#define gte_load_v0v1v2(r_ptr) \
|
||||
/* gte_load_v0v1v2(p0, p1, p2, b0, b1, b2) — the canonical prelude to gte_cmd_rtpt.
|
||||
*
|
||||
* Loads all three GTE input vectors (6 words) from three separate pointers,
|
||||
* one per GTE vector register, each loaded from its own base GPR. Caller
|
||||
* must bind each `pN` to `bN` via a register variable.
|
||||
*
|
||||
* register V3_S2* p0 rgcc(R_T4) = verts[0].ptr; // → __asm__("$12")
|
||||
* register V3_S2* p1 rgcc(R_T5) = verts[1].ptr; // → __asm__("$13")
|
||||
* register V3_S2* p2 rgcc(R_T6) = verts[2].ptr; // → __asm__("$14")
|
||||
* gte_load_v0v1v2(p0, p1, p2, R_T4, R_T5, R_T6);
|
||||
* gte_rtpt();
|
||||
*/
|
||||
#define gte_load_v0v1v2(p0, p1, p2, b0, b1, b2) \
|
||||
asm volatile( \
|
||||
asm_inline( gte_lwc2_v0_RT4, gte_lwc2_v0z_RT4, \
|
||||
gte_lwc2_v1_RT4, gte_lwc2_v1z_RT4, \
|
||||
gte_lwc2_v2_RT4, gte_lwc2_v2z_RT4 ) \
|
||||
, "r"(r_ptr) \
|
||||
asm_clobber( "$2", "$8", "$9", "$31", "memory", "$12" ) \
|
||||
asm_inline( gte_lwc2_v0(b0), gte_lwc2_v0z(b0), \
|
||||
gte_lwc2_v1(b1), gte_lwc2_v1z(b1), \
|
||||
gte_lwc2_v2(b2), gte_lwc2_v2z(b2) ) \
|
||||
, "r"(p0), "r"(p1), "r"(p2) \
|
||||
asm_clobber( reg_str(R_V0_Code), reg_str(R_T0_Code), reg_str(R_T1_Code), reg_str(R_RA_Code), "memory" ) \
|
||||
)
|
||||
|
||||
#define gte_ldv0(r0) \
|
||||
@@ -382,10 +498,21 @@ enum { _C2_OPS_ = 0
|
||||
* ctc2 $13, $3 ; → C2_RT21
|
||||
* ctc2 $14, $4 ; → C2_RT22
|
||||
*
|
||||
* Uses the placeholder-pun: R_T4 ($12) is hard-wired into the `lw` base
|
||||
* field of every `.word` constant, and the `"r"(r0)` constraint + `"$12"`
|
||||
* clobber force GCC to put `r0` in $12 at runtime. The `lw` offsets are
|
||||
* literal values (0, 4, 8, ...) so the only runtime GPR in play is $12.
|
||||
* Same contract as gte_load_v0: caller MUST bind `r0` to $12 via a
|
||||
* register variable (`rgcc(R_T4)`) for the `lw $12, off(...)`
|
||||
* instructions to read from the right base. The `"r"(r0)` constraint
|
||||
* alone doesn't force a specific GPR — it just lets GCC pick one.
|
||||
* The .word constants here bake R_T4/R_T5/R_T6 into the `rs` field
|
||||
* of each lw, so the lw instructions will only do the right thing
|
||||
* if $12/$13/$14 hold the matrix base at runtime.
|
||||
*
|
||||
* M3_S2* m = ...;
|
||||
* register M3_S2* m_in_12 rgcc(R_T4) = m;
|
||||
* asm_gte_matrix_set_rotation(m_in_12);
|
||||
*
|
||||
* We clobber $12/$13/$14 (the ones we use as scratch inside the
|
||||
* inline asm) plus the system clobbers; we don't clobber `r0` because
|
||||
* the `rgcc` binding already says "this variable lives in $12".
|
||||
*
|
||||
* WARNING: Incomplete by design. The source macro only writes RT11..RT22
|
||||
* (5 of 9 rotation elements); RT23 and the entire RT3x row are left
|
||||
@@ -407,7 +534,7 @@ enum { _C2_OPS_ = 0
|
||||
enc_cop2_tx(cop_mt, R_T5, 3), \
|
||||
enc_cop2_tx(cop_mt, R_T6, 4) \
|
||||
) \
|
||||
asm_clobber( clb_system, "$12", "$13", "$14") \
|
||||
asm_clobber( clb_system, reg_str(R_T4_Code), reg_str(R_T5_Code), reg_str(R_T6_Code) ) \
|
||||
: \
|
||||
: "r"(r0) \
|
||||
)
|
||||
|
||||
+64
-9
@@ -4,17 +4,67 @@
|
||||
# include "gcc_asm.h"
|
||||
#endif
|
||||
|
||||
/* ============================================================================
|
||||
* REGISTER INTEGER IDS (preprocessor-visible)
|
||||
* ============================================================================
|
||||
* Every R_* enum below has a parallel R_*_Code `#define` so that the
|
||||
* preprocessor can stringify the integer (e.g. for asm clobber lists and
|
||||
* register-variable declarations via `rgcc(R_X)`). The enum value is
|
||||
* bound to the `#define` so the two forms cannot drift apart.
|
||||
*
|
||||
* Only registers that get stringified need a `_Code` form; the rest are
|
||||
* plain enum values. If you need to add a new one, follow the pattern:
|
||||
* #define R_T7_Code 15
|
||||
* R_T7 = R_T7_Code, // in the enum
|
||||
*
|
||||
* User code should always reference the enum form (`R_T4`) at arithmetic
|
||||
* sites and let `reg_str(R_T4_Code)` / `rgcc(R_T4)` handle the stringify
|
||||
* cases — never write the bare number `12`.
|
||||
* ============================================================================ */
|
||||
#define R_0_Code 0
|
||||
#define R_AT_Code 1
|
||||
#define R_V0_Code 2
|
||||
#define R_V1_Code 3
|
||||
#define R_A0_Code 4
|
||||
#define R_A1_Code 5
|
||||
#define R_A2_Code 6
|
||||
#define R_A3_Code 7
|
||||
#define R_T0_Code 8
|
||||
#define R_T1_Code 9
|
||||
#define R_T2_Code 10
|
||||
#define R_T3_Code 11
|
||||
#define R_T4_Code 12
|
||||
#define R_T5_Code 13
|
||||
#define R_T6_Code 14
|
||||
#define R_T7_Code 15
|
||||
#define R_S0_Code 16
|
||||
#define R_S1_Code 17
|
||||
#define R_S2_Code 18
|
||||
#define R_S3_Code 19
|
||||
#define R_S4_Code 20
|
||||
#define R_S5_Code 21
|
||||
#define R_S6_Code 22
|
||||
#define R_S7_Code 23
|
||||
#define R_T8_Code 24
|
||||
#define R_T9_Code 25
|
||||
#define R_K0_Code 26
|
||||
#define R_K1_Code 27
|
||||
#define R_GP_Code 28
|
||||
#define R_SP_Code 29
|
||||
#define R_FP_Code 30
|
||||
#define R_RA_Code 31
|
||||
|
||||
enum {
|
||||
/* --- MIPS CPU Registers --- */
|
||||
|
||||
R_0 = 0, R_AT = 1, R_V0 = 2, R_V1 = 3,
|
||||
R_A0 = 4, R_A1 = 5, R_A2 = 6, R_A3 = 7,
|
||||
R_T0 = 8, R_T1 = 9, R_T2 = 10, R_T3 = 11,
|
||||
R_T4 = 12, R_T5 = 13, R_T6 = 14, R_T7 = 15,
|
||||
R_S0 = 16, R_S1 = 17, R_S2 = 18, R_S3 = 19,
|
||||
R_S4 = 20, R_S5 = 21, R_S6 = 22, R_S7 = 23,
|
||||
R_T8 = 24, R_T9 = 25, R_K0 = 26, R_K1 = 27,
|
||||
R_GP = 28, R_SP = 29, R_FP = 30, R_RA = 31
|
||||
R_0 = R_0_Code, R_AT = R_AT_Code, R_V0 = R_V0_Code, R_V1 = R_V1_Code,
|
||||
R_A0 = R_A0_Code, R_A1 = R_A1_Code, R_A2 = R_A2_Code, R_A3 = R_A3_Code,
|
||||
R_T0 = R_T0_Code, R_T1 = R_T1_Code, R_T2 = R_T2_Code, R_T3 = R_T3_Code,
|
||||
R_T4 = R_T4_Code, R_T5 = R_T5_Code, R_T6 = R_T6_Code, R_T7 = R_T7_Code,
|
||||
R_S0 = R_S0_Code, R_S1 = R_S1_Code, R_S2 = R_S2_Code, R_S3 = R_S3_Code,
|
||||
R_S4 = R_S4_Code, R_S5 = R_S5_Code, R_S6 = R_S6_Code, R_S7 = R_S7_Code,
|
||||
R_T8 = R_T8_Code, R_T9 = R_T9_Code, R_K0 = R_K0_Code, R_K1 = R_K1_Code,
|
||||
R_GP = R_GP_Code, R_SP = R_SP_Code, R_FP = R_FP_Code, R_RA = R_RA_Code
|
||||
|
||||
/* Semantic Aliases for MIPS Registers (O32 ABI) */
|
||||
|
||||
@@ -238,7 +288,12 @@ Code CodeBlob_(mips_flush_icache) {
|
||||
};
|
||||
FI_ void mips_flush_icache(void) { C_(VoidFn*, codeblob_mips_flush_icache)(); }
|
||||
|
||||
#define clb_system "$2", "$8", "$9", "$31", "memory"
|
||||
/* Standard clobber list for pure-MIPS asm volatile blocks: caller-saved
|
||||
* GPRs that the kernel treats as volatile (v0/v1/t0/t1/ra) plus the
|
||||
* "memory" barrier. The register ids are passed through `reg_str` so
|
||||
* the R_*_Code `#define`s are stringified into "$N" at expansion time. */
|
||||
#define clb_system \
|
||||
reg_str(R_V0_Code), reg_str(R_T0_Code), reg_str(R_T1_Code), reg_str(R_RA_Code), "memory"
|
||||
|
||||
#define asm_mips_flush_icache() asm volatile( asm_inline( \
|
||||
add_ui(rstack_ptr, rstack_ptr, -8) \
|
||||
|
||||
@@ -241,14 +241,14 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
tri->color = rgb8(255, 255, 255);
|
||||
|
||||
V3_S2* face = & static_mem.floor.faces[face_id];
|
||||
register V3_S2* p0 asm("$12") = & static_mem.floor.verts[face->x];
|
||||
V3_S2* p1 = & static_mem.floor.verts[face->y];
|
||||
V3_S2* p2 = & static_mem.floor.verts[face->z];
|
||||
register V3_S2* p0 rgcc(R_T4) = & static_mem.floor.verts[face->x];
|
||||
register V3_S2* p1 rgcc(R_T5) = & static_mem.floor.verts[face->y];
|
||||
register V3_S2* p2 rgcc(R_T6) = & static_mem.floor.verts[face->z];
|
||||
|
||||
// gte_ldv0(p0);
|
||||
gte_load_v0(p0);
|
||||
gte_ldv1(p1);
|
||||
gte_ldv2(p2);
|
||||
// Three independent bases — full register discretion at the call site
|
||||
gte_load_v0(p0, R_T4);
|
||||
gte_load_v1(p1, R_T5);
|
||||
gte_load_v2(p2, R_T6);
|
||||
|
||||
gte_rtpt();
|
||||
gte_nclip();
|
||||
|
||||
Reference in New Issue
Block a user