mirror of
https://github.com/Ed94/pikuma_ps1.git
synced 2026-06-16 09:02:22 -07:00
524 lines
24 KiB
C
524 lines
24 KiB
C
#ifdef INTELLISENSE_DIRECTIVES
|
|
# pragma once
|
|
# include "dsl.h"
|
|
# include "gcc_asm.h"
|
|
#endif
|
|
|
|
/* ============================================================================
|
|
* REGISTER INTEGER IDS (preprocessor-visible)
|
|
* ============================================================================
|
|
* Every R_* enum below has a parallel R_*_Code `#define` so that the
|
|
* preprocessor can stringify the integer (e.g. for asm clobber lists and
|
|
* register-variable declarations via `rgcc(R_X)`). The enum value is
|
|
* bound to the `#define` so the two forms cannot drift apart.
|
|
*
|
|
* Only registers that get stringified need a `_Code` form; the rest are
|
|
* plain enum values. If you need to add a new one, follow the pattern:
|
|
* #define R_T7_Code 15
|
|
* R_T7 = R_T7_Code, // in the enum
|
|
*
|
|
* User code should always reference the enum form (`R_T4`) at arithmetic
|
|
* sites and let `rlit(R_T4_Code)` / `rgcc(R_T4)` handle the stringify
|
|
* cases — never write the bare number `12`.
|
|
* ============================================================================ */
|
|
#define R_0_Code 0
|
|
#define R_AT_Code 1
|
|
#define R_V0_Code 2
|
|
#define R_V1_Code 3
|
|
#define R_A0_Code 4
|
|
#define R_A1_Code 5
|
|
#define R_A2_Code 6
|
|
#define R_A3_Code 7
|
|
#define R_T0_Code 8
|
|
#define R_T1_Code 9
|
|
#define R_T2_Code 10
|
|
#define R_T3_Code 11
|
|
#define R_T4_Code 12
|
|
#define R_T5_Code 13
|
|
#define R_T6_Code 14
|
|
#define R_T7_Code 15
|
|
#define R_S0_Code 16
|
|
#define R_S1_Code 17
|
|
#define R_S2_Code 18
|
|
#define R_S3_Code 19
|
|
#define R_S4_Code 20
|
|
#define R_S5_Code 21
|
|
#define R_S6_Code 22
|
|
#define R_S7_Code 23
|
|
#define R_T8_Code 24
|
|
#define R_T9_Code 25
|
|
#define R_K0_Code 26
|
|
#define R_K1_Code 27
|
|
#define R_GP_Code 28
|
|
#define R_SP_Code 29
|
|
#define R_FP_Code 30
|
|
#define R_RA_Code 31
|
|
|
|
enum {
|
|
/* --- MIPS CPU Registers --- */
|
|
|
|
R_0 = R_0_Code, R_AT = R_AT_Code, R_V0 = R_V0_Code, R_V1 = R_V1_Code,
|
|
R_A0 = R_A0_Code, R_A1 = R_A1_Code, R_A2 = R_A2_Code, R_A3 = R_A3_Code,
|
|
R_T0 = R_T0_Code, R_T1 = R_T1_Code, R_T2 = R_T2_Code, R_T3 = R_T3_Code,
|
|
R_T4 = R_T4_Code, R_T5 = R_T5_Code, R_T6 = R_T6_Code, R_T7 = R_T7_Code,
|
|
R_S0 = R_S0_Code, R_S1 = R_S1_Code, R_S2 = R_S2_Code, R_S3 = R_S3_Code,
|
|
R_S4 = R_S4_Code, R_S5 = R_S5_Code, R_S6 = R_S6_Code, R_S7 = R_S7_Code,
|
|
R_T8 = R_T8_Code, R_T9 = R_T9_Code, R_K0 = R_K0_Code, R_K1 = R_K1_Code,
|
|
R_GP = R_GP_Code, R_SP = R_SP_Code, R_FP = R_FP_Code, R_RA = R_RA_Code
|
|
|
|
/* Semantic Aliases for MIPS Registers (O32 ABI) */
|
|
|
|
, rdiscard = R_0 /* Hardwired to 0 */
|
|
, rret_0 = R_V0 /* Function return value */
|
|
, rret_1 = R_V1 /* Second return value (e.g., 64-bit) */
|
|
, rarg_0 = R_A0 /* First function argument */
|
|
, rarg_1 = R_A1 /* Second function argument */
|
|
, rarg_2 = R_A2 /* Third function argument */
|
|
, rarg_3 = R_A3 /* Fourth function argument */
|
|
, rtmp_0 = R_T0 /* Temporary (Caller saved) */
|
|
, rtmp_1 = R_T1 /* Temporary (Caller saved) */
|
|
, rtmp_2 = R_T2 /* Temporary (Caller saved) */
|
|
, rtmp_3 = R_T3 /* Temporary (Caller saved) */
|
|
, rtmp_4 = R_T4 /* Temporary (Caller saved) — common GTE base pointer */
|
|
, rtmp_9 = R_T9 /* Temporary (Caller saved) — common GTE base pointer */
|
|
, rstatic_0 = R_S0 /* Static (Callee saved, preserved across calls) */
|
|
, rstatic_1 = R_S1
|
|
, rstatic_2 = R_S2
|
|
, rstatic_3 = R_S3
|
|
, rstatic_4 = R_S4
|
|
, rstatic_5 = R_S5
|
|
, rstatic_6 = R_S6
|
|
, rstatic_7 = R_S7
|
|
, rsaved_0 = R_S0 /* Alias for rstatic_0 (alternate vocabulary) */
|
|
, rstack_ptr = R_SP /* Stack Pointer */
|
|
, rret_addr = R_RA /* Return Address (populated by JAL) */
|
|
|
|
/* --- MIPS CPU Opcodes (Bits 31-26) --- */
|
|
|
|
, op_special = 0x00 /* R-Type instructions (uses FUNCT field) */
|
|
, op_bcond = 0x01 /* Branch on condition */
|
|
, op_j = 0x02 /* Jump */
|
|
, op_jal = 0x03 /* Jump and Link */
|
|
, op_beq = 0x04 /* Branch on Equal */
|
|
, op_bne = 0x05 /* Branch on Not Equal */
|
|
, op_blez = 0x06 /* Branch on Less Than or Equal to Zero */
|
|
, op_bgtz = 0x07 /* Branch on Greater Than Zero */
|
|
, op_addi = 0x08 /* Add Immediate */
|
|
, op_addiu = 0x09 /* Add Immediate Unsigned */
|
|
, op_slti = 0x0A /* Set on Less Than Immediate */
|
|
, op_sltiu = 0x0B /* Set on Less Than Immediate Unsigned */
|
|
, op_andi = 0x0C /* AND Immediate */
|
|
, op_ori = 0x0D /* OR Immediate */
|
|
, op_xori = 0x0E /* XOR Immediate */
|
|
, op_lui = 0x0F /* Load Upper Immediate */
|
|
, op_cop0 = 0x10 /* Coprocessor 0 (System) */
|
|
, op_cop2 = 0x12 /* Coprocessor 2 (GTE) */
|
|
, op_la = 0
|
|
, op_li = 0
|
|
, op_lb = 0x20 /* Load Byte */
|
|
, op_lh = 0x21 /* Load Halfword */
|
|
, op_lw = 0x23 /* Load Word */
|
|
, op_lbu = 0x24 /* Load Byte Unsigned */
|
|
, op_lhu = 0x25 /* Load Halfword Unsigned */
|
|
, op_sb = 0x28 /* Store Byte */
|
|
, op_sh = 0x29 /* Store Halfword */
|
|
, op_sw = 0x2B /* Store Word */
|
|
|
|
|
|
, op_load_addr = op_la
|
|
, op_load_imm = op_li
|
|
, op_jump = op_j
|
|
, op_jump_nlink = op_jal
|
|
|
|
/* --- MIPS CPU Function Codes (Bits 5-0, used when OP == MIPS_OP_SPECIAL) --- */
|
|
|
|
, fc_sll = 0x00 /* Shift Word Left Logical */
|
|
, fc_srl = 0x02 /* Shift Word Right Logical */
|
|
, fc_sra = 0x03 /* Shift Word Right Arithmetic */
|
|
, fc_sllv = 0x04 /* Shift Word Left Logical Variable */
|
|
, fc_srlv = 0x06 /* Shift Word Right Logical Variable */
|
|
, fc_srav = 0x07 /* Shift Word Right Arithmetic Variable */
|
|
, fc_jr = 0x08 /* Jump Register */
|
|
, fc_jalr = 0x09 /* Jump and Link Register */
|
|
, fc_syscall = 0x0C /* System Call */
|
|
, fc_break = 0x0D /* Breakpoint */
|
|
, fc_mfhi = 0x10 /* Move From HI */
|
|
, fc_mthi = 0x11 /* Move To HI */
|
|
, fc_mflo = 0x12 /* Move From LO */
|
|
, fc_mtlo = 0x13 /* Move To LO */
|
|
, fc_mult = 0x18 /* Multiply Word */
|
|
, fc_multu = 0x19 /* Multiply Unsigned Word */
|
|
, fc_div = 0x1A /* Divide Word */
|
|
, fc_divu = 0x1B /* Divide Unsigned Word */
|
|
, fc_add = 0x20 /* Add Word */
|
|
, fc_addu = 0x21 /* Add Unsigned Word */
|
|
, fc_sub = 0x22 /* Subtract Word */
|
|
, fc_subu = 0x23 /* Subtract Unsigned Word */
|
|
, fc_and = 0x24 /* AND */
|
|
, fc_or = 0x25 /* OR */
|
|
, fc_xor = 0x26 /* XOR */
|
|
, fc_nor = 0x27 /* NOR */
|
|
, fc_slt = 0x2A /* Set on Less Than */
|
|
, fc_sltu = 0x2B /* Set on Less Than Unsigned */
|
|
|
|
, fc_jump_reg = fc_jr
|
|
|
|
/* --- Coprocessor 0 (System Control & Exceptions) --- */
|
|
|
|
, cop_mf = 0x00 /* Move From Coprocessor */
|
|
, cop_mt = 0x04 /* Move To Coprocessor */
|
|
};
|
|
|
|
|
|
// Bitfield Packets (Encoders)
|
|
|
|
enum { _BitOffsets = 0
|
|
/* Bit Offsets for MIPS Instruction Fields */
|
|
|
|
, OPCODE_SHIFT = 26
|
|
, RS_SHIFT = 21
|
|
, RT_SHIFT = 16
|
|
, RD_SHIFT = 11
|
|
, SHAMT_SHIFT = 6 /* Shift Amount */
|
|
, FC_SHIFT = 0
|
|
|
|
/* Bit Masks to prevent overflow into adjacent fields */
|
|
|
|
, OPCODE_MASK = 0x3F
|
|
, REG_MASK = 0x1F
|
|
, SHAMT_MASK = 0x1F /* Shift Amount */
|
|
, FC_MASK = 0x3F
|
|
, IMM_MASK = 0xFFFF
|
|
};
|
|
|
|
#define enc_op(op) (((op) & OPCODE_MASK) << OPCODE_SHIFT)
|
|
#define enc_rs(rs) (((rs) & REG_MASK) << RS_SHIFT)
|
|
#define enc_rt(rt) (((rt) & REG_MASK) << RT_SHIFT)
|
|
#define enc_rd(rd) (((rd) & REG_MASK) << RD_SHIFT)
|
|
#define enc_shamt(shamt) (((shamt) & SHAMT_MASK) << SHAMT_SHIFT)
|
|
#define enc_fc(fc) (((fc) & FC_MASK) << FC_SHIFT)
|
|
#define enc_imm(imm) (((imm) & IMM_MASK))
|
|
|
|
/* MIPS R-Type Instruction Format (Register-to-Register) */
|
|
#define enc_r(op, rs, rt, rd, shamt, fc) (enc_op(op) | enc_rs(rs) | enc_rt(rt) | enc_rd(rd) | enc_shamt(shamt) | enc_fc(fc))
|
|
/* MIPS I-Type Instruction Format (Immediate/Constant) */
|
|
#define enc_i(op, rs, rt, imm) (enc_op(op) | enc_rs(rs) | enc_rt(rt) | enc_imm(imm))
|
|
|
|
/* COP0 (System) Transfer Format: mtc0 rt, rd or mfc0 rt, rd
|
|
* `sub` is the COP0 sub-opcode (cop_mf=0 or cop_mt=4), placed in rs slot.
|
|
* `rt` is the GPR operand (in rt slot).
|
|
* `rd` is the COP0 register index (in rd slot at bits 15..11). */
|
|
#define enc_cop0_tx(sub, rt, rd) enc_i(op_cop0, (sub), (rt), ((rd) << 11))
|
|
|
|
/* Semantic aliases for COP0 transfer. `sys_` is the namespace marker
|
|
* for system-control instructions (analogous to `gte_` for COP2).
|
|
* sys_mov_to_cop0 rt, rd → mtc0 rt, rd
|
|
* sys_mov_from_cop0 rt, rd → mfc0 rt, rd
|
|
* sys_rfe → rfe (return from exception) */
|
|
#define sys_mov_to_cop0(rt, rd) enc_cop0_tx(cop_mt, (rt), (rd))
|
|
#define sys_mov_from_cop0(rt, rd) enc_cop0_tx(cop_mf, (rt), (rd))
|
|
#define sys_rfe() enc_rfe()
|
|
|
|
/* COP0 Return From Exception (rfe) */
|
|
#define enc_rfe() 0x42000010
|
|
|
|
/* --- Semantic Encoders (MIPS mnemonics) ---
|
|
* Argument order matches the MIPS assembly syntax:
|
|
* dest-first, then source operands, then immediate last.
|
|
*
|
|
* load_word(rt, base, off) → lw rt, off(base)
|
|
* store_word(rt, base, off) → sw rt, off(base)
|
|
* add_ui(rt, rs, imm) → addiu rt, rs, imm
|
|
* shift_ll(rd, rt, shamt) → sll rd, rt, shamt
|
|
* jump_reg(rs) → jr rs
|
|
* jump_link(rs, rd) → jalr rs (link in rd, default $ra)
|
|
* nop → sll $0, $0, 0
|
|
*/
|
|
#define load_word(rt, base, off) enc_i(op_lw, (base), (rt), (off))
|
|
#define load_byte(rt, base, off) enc_i(op_lb, (base), (rt), (off))
|
|
#define load_half(rt, base, off) enc_i(op_lh, (base), (rt), (off))
|
|
#define load_byte_u(rt, base, off) enc_i(op_lbu, (base), (rt), (off))
|
|
#define load_half_u(rt, base, off) enc_i(op_lhu, (base), (rt), (off))
|
|
#define store_word(rt, base, off) enc_i(op_sw, (base), (rt), (off))
|
|
#define add_ui(rt, rs, imm) enc_i(op_addiu, (rs), (rt), (imm))
|
|
#define and_si(rt, rs, imm) enc_i(op_andi, (rs), (rt), (imm))
|
|
#define or_i(rt, rs, imm) enc_i(op_ori, (rs), (rt), (imm))
|
|
#define xor_i(rt, rs, imm) enc_i(op_xori, (rs), (rt), (imm))
|
|
#define load_ui(rt, imm) enc_i(op_lui, R_0, (rt), (imm))
|
|
|
|
/* Logic Opcodes */
|
|
|
|
#define and_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_and)
|
|
#define or_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_or)
|
|
#define xor_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_xor)
|
|
#define nor_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_nor)
|
|
|
|
/* Shift family (R-type). shift_ll/lr/ra: `sll rd, rt, shamt` */
|
|
#define shift_ll(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_sll)
|
|
#define shift_lr(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_srl)
|
|
#define shift_ra(rd, rt, shamt) enc_r(op_special, R_0, (rt), (rd), (shamt), fc_sra)
|
|
|
|
/* jr rs — jump to address in rs. */
|
|
#define jump_reg(rs) enc_r(op_special, (rs), R_0, R_0, 0, fc_jr)
|
|
|
|
/* jalr rs, rd — link in rd (default $ra) and jump to address in rs.
|
|
* Layout: [op_special][rs:5][rt=0:5][rd:5][shamt=0:5][fc_jalr=0x09] */
|
|
#define jump_link(rs, rd) enc_r(op_special, (rs), R_0, (rd), 0, fc_jalr)
|
|
|
|
/* jalr rs — link in $ra and jump to address in rs (most common form). */
|
|
#define jump_nreg(rs) jump_link((rs), R_RA)
|
|
|
|
/* j target — absolute jump within the current 256MB region. */
|
|
#define jump(off) enc_i(op_j, R_0, R_0, (off))
|
|
|
|
/* jal target — absolute call within the current 256MB region. */
|
|
#define jump_nlink(off) enc_i(op_jal, R_0, R_0, (off))
|
|
|
|
/* --- Store family (mirrors the load family) --- */
|
|
#define store_byte(rt, base, off) enc_i(op_sb, (base), (rt), (off))
|
|
#define store_half(rt, base, off) enc_i(op_sh, (base), (rt), (off))
|
|
/* store_word already exists above */
|
|
|
|
/* --- Arithmetic R-type (signed/unsigned split: _s traps, _u doesn't) ---
|
|
* add_s rd, rs, rt → add rd, rs, rt (overflow traps)
|
|
* add_u rd, rs, rt → addu rd, rs, rt (overflow silent)
|
|
* sub_s / sub_u → sub / subu
|
|
* mult_s / mult_u → mult / multu (writes HI/LO; result in LO)
|
|
* div_s / div_u → div / divu (LO = quot, HI = rem)
|
|
*
|
|
* NOTE: dsl.h defines `add_s`/`sub_s`/`mut_s`/`gt_s`/etc. as
|
|
* _Generic-based signed integer-arithmetic helpers for U1/U2/U4. Those
|
|
* live in a different conceptual layer (generic arithmetic on DSL
|
|
* types) and would collide with the instruction encoders here. The
|
|
* `#undef` below lets the gas-style names below win; if a file needs
|
|
* both, the dsl.h versions can be reached via their long forms
|
|
* (e.g. `def_signed_op`-style or the underlying `add_s1/s2/s4`). */
|
|
#undef add_s
|
|
#undef sub_s
|
|
#define add_s(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_add)
|
|
#define add_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_addu)
|
|
#define sub_s(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_sub)
|
|
#define sub_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_subu)
|
|
#define mult_s(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_mult)
|
|
#define mult_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_multu)
|
|
#define div_s(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_div)
|
|
#define div_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_divu)
|
|
|
|
/* --- Arithmetic I-type (immediate) --- */
|
|
#define add_si(rt, rs, imm) enc_i(op_addi, (rs), (rt), (imm))
|
|
/* add_ui already exists above as add_ui */
|
|
|
|
/* --- Set on less than (R-type and I-type) --- */
|
|
#define slt_s(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_slt)
|
|
#define slt_u(rd, rs, rt) enc_r(op_special, (rs), (rt), (rd), 0, fc_sltu)
|
|
#define slt_si(rt, rs, imm) enc_i(op_slti, (rs), (rt), (imm))
|
|
#define slt_ui(rt, rs, imm) enc_i(op_sltiu, (rs), (rt), (imm))
|
|
|
|
/* --- Move from/to HI/LO (mult/div results) --- */
|
|
#define mov_from_high(rd) enc_r(op_special, R_0, R_0, (rd), 0, fc_mfhi)
|
|
#define mov_from_low(rd) enc_r(op_special, R_0, R_0, (rd), 0, fc_mflo)
|
|
#define mov_to_high(rs) enc_r(op_special, (rs), R_0, R_0, 0, fc_mthi)
|
|
#define mov_to_low(rs) enc_r(op_special, (rs), R_0, R_0, 0, fc_mtlo)
|
|
|
|
/* --- Atomic branches (no pseudos like bgt/bge; compose with slt_* + branch_ne) ---
|
|
* branch_equal rs, rt, off → beq rs, rt, off
|
|
* branch_ne rs, rt, off → bne rs, rt, off
|
|
* branch_lt_zero rs, off → bltz rs, off
|
|
* branch_gt_zero rs, off → bgtz rs, off
|
|
* branch_le_zero rs, off → blez rs, off
|
|
* branch_ge_zero rs, off → bgez rs, off
|
|
* (For `bgez`, the opcode is `op_bcond` with rt=1 to invert the bltz condition.) */
|
|
|
|
#define branch_equal(rs, rt, off) enc_i(op_beq, (rs), (rt), (off))
|
|
#define branch_ne(rs, rt, off) enc_i(op_bne, (rs), (rt), (off))
|
|
#define branch_lt_zero(rs, off) enc_i(op_bcond, (rs), R_0, (off)) /* bltz is bcond with rt=0 */
|
|
#define branch_ge_zero(rs, off) enc_i(op_bcond, (rs), 1, (off)) /* bgez is bcond with rt=1 */
|
|
#define branch_le_zero(rs, off) enc_i(op_blez, (rs), R_0, (off)) /* blez has its own opcode, rt=0 */
|
|
#define branch_gt_zero(rs, off) enc_i(op_bgtz, (rs), R_0, (off)) /* bgtz has its own opcode, rt=0 */
|
|
|
|
/* --- System (kernel) instructions --- */
|
|
#define syscall() enc_r(op_special, R_0, R_0, R_0, 0, fc_syscall)
|
|
#define breakpoint() enc_r(op_special, R_0, R_0, R_0, 0, fc_break)
|
|
|
|
/* --- Shift-amount alias (matches the gas convention `\p3 = shamt`) --- */
|
|
#define shift_amount(rd, rt, n) shift_ll(rd, rt, n)
|
|
|
|
/* nop — canonical sll $0, $0, 0 */
|
|
#define nop shift_ll(rdiscard, rdiscard, 0)
|
|
|
|
#define load_imm_1w(rt, imm) add_ui((rt), R_0, (imm))
|
|
#define load_imm_1w_s0(rt, imm) add_si((rt)), R_0, (imm))
|
|
|
|
/* load_imm_2w — unconditional 2-word `li` form: `lui` + (ori | addi).
|
|
*
|
|
* Granular companion to `load_imm`: skips the compile-time range checks
|
|
* and always emits 2 .words. Use this when:
|
|
* - you know `imm` is > 0xFFFF (otherwise you're wasting a word), OR
|
|
* - `imm` is not a compile-time constant and you want predictable
|
|
* 2-word emission without the `__builtin_constant_p` branches.
|
|
*
|
|
* The lo16 strategy is still chosen at expansion time on the lo half:
|
|
* lo16 in 0x0000..0x7FFF → addi (sign-ext is harmless, the lui
|
|
* already cleared bits 15..0)
|
|
* lo16 in 0x8000..0xFFFF → ori (zero-extends to preserve the
|
|
* intended bit pattern)
|
|
*
|
|
* For situations where you need to bypass even this choice (e.g. to
|
|
* force a specific encoding for a known discontiguous high/low pair),
|
|
* see `load_imm_2w_ori` and `load_imm_2w_addi` below.
|
|
*
|
|
* Statement-level (not expression-level): emits its own `asm volatile(...)`.
|
|
*/
|
|
#define load_imm_2w(rt, imm) do { \
|
|
if (u4_low(imm) <= 0x7FFFU) { \
|
|
asm volatile( \
|
|
asm_words(load_ui((rt), u4_hi(imm), \
|
|
add_si((rt), (rt), (S2)C_(U2,u4_lo(imm))) \
|
|
asm_clobber: rlit(R_AT_Code), clb_mem_drain \
|
|
); \
|
|
} \
|
|
else { \
|
|
asm volatile(asm_words( \
|
|
load_ui((rt), u4_hi(imm)), \
|
|
or_i((rt), (rt), C_(U2,u4_lo(imm)) \
|
|
asm_clobber: rlit(R_AT_Code), clb_mem_drain \
|
|
); \
|
|
} \
|
|
} while (0)
|
|
|
|
/* load_imm_2w_ori — force the `lui` + `ori` form regardless of lo16 sign.
|
|
* Use when you specifically need zero-extension in the lo half. */
|
|
#define load_imm_2w_ori(rt, imm) do { \
|
|
asm volatile( \
|
|
asm_words(load_ui((rt), u4_lo(imm)), \
|
|
or_i((rt), (rt), C_(U2,u4_hi(imm))) ) \
|
|
asm_clobber: rlit(R_AT_Code), clb_mem_drain \
|
|
); \
|
|
} while (0)
|
|
|
|
/* load_imm_2w_addi — force the `lui` + `addi` form regardless of lo16 sign.
|
|
* Use when you know sign-extension is fine (e.g. lo16 is treated as
|
|
* signed downstream) and you want a smaller effective instruction
|
|
* (the assembler/MIPS hardware will sign-extend the imm16). */
|
|
#define load_imm_2w_addi(rt, imm) do { \
|
|
/*U4 _li2a_imm_ = (U4)(imm);*/ \
|
|
asm volatile(asm_words( \
|
|
lui_op((rt), u4_lo(imm)), \
|
|
add_si((rt), (rt), (S2)C_(U2,u4_hi(imm))) ) \
|
|
asm_clobber: rlit(R_AT_Code), clb_mem_drain \
|
|
); \
|
|
} while (0)
|
|
|
|
/* load_imm rt, imm — true `li` semantics (assembler `li` pseudo)
|
|
*
|
|
* Dispatches at compile time on the immediate's range, picking the
|
|
* smallest single-instruction form when possible:
|
|
*
|
|
* imm in 0 .. 0x7FFF → addi rt, $0, imm (1 word)
|
|
* imm in 0x8000 .. 0xFFFF → ori rt, $0, imm (1 word; sign-bit must be zeroed)
|
|
* imm in 0x10000 .. 0xFFFFFFFF → lui + (ori | addi) (2 words)
|
|
*
|
|
* Statement-level (not expression-level): the macro emits its own
|
|
* `asm volatile(...)` block with 1 or 2 .word constants. Callers can
|
|
* group multiple `load_imm` calls in a single volatile by using the
|
|
* lower-level encoders directly:
|
|
*
|
|
* load_imm(R_T4, 0x12345678); // emits 2 .words
|
|
*
|
|
* Falls back to a 2-word form if `imm` is not a compile-time constant,
|
|
* but that path is unusual (load_imm is most useful with literal
|
|
* addresses and magic numbers). */
|
|
#define load_imm(rt, imm) do { \
|
|
if (cexpr_(imm) && ((imm) <= 0x7FFFU)) { \
|
|
/* Small positive: addi rt, $0, imm */ \
|
|
asm volatile( \
|
|
asm_words(add_si((rt), R_0, (imm))) \
|
|
asm_clobber: rlit(R_AT_Code), clb_mem_drain \
|
|
); \
|
|
} \
|
|
else if (cexpr_(imm) && ((U4)(imm) <= 0xFFFFU)) { \
|
|
/* 0x8000..0xFFFF: ori rt, $0, imm (zero-extends) */ \
|
|
asm volatile( \
|
|
asm_words(or_i((rt), R_0, (imm))) \
|
|
asm_clobber: rlit(R_AT_Code), clb_mem_drain \
|
|
); \
|
|
} \
|
|
else \
|
|
{ \
|
|
/* > 16 bits: lui + (ori | addi). \
|
|
* If lo16 is in [0, 0x7FFF] use addi (sign-ext is harmless \
|
|
* since the high half cleared bits 15..0). Otherwise ori. */ \
|
|
if (u4_lo(imm) <= 0x7FFFU) { \
|
|
asm volatile(asm_words( \
|
|
load_ui((rt), u4_hi(imm)), \
|
|
add_si((rt), (rt), (S2)C_(U2,u4_lo(imm))) \
|
|
asm_clobber: rlit(R_AT_Code), clb_mem_drain \
|
|
); \
|
|
} \
|
|
else { \
|
|
asm volatile(asm_words( \
|
|
load_ui((rt), u4_hi(imm)), \
|
|
or_i((rt), (rt), C_(U2,u4_lo(imm)) \
|
|
asm_clobber: rlit(R_AT_Code), clb_mem_drain \
|
|
); \
|
|
} \
|
|
} \
|
|
} while (0 )
|
|
|
|
// Binary Metaprogramming
|
|
|
|
typedef U4 const Code;
|
|
#define CodeBlob_(sym) tmpl(code,sym) [] align_(4) =
|
|
|
|
enum {
|
|
bios_flushcache = 0x44,
|
|
bios_table_addr = 0xA0,
|
|
};
|
|
|
|
/* Flushes the Instruction Cache (PSX A-function 0x44 via BIOS stub at 0xA0).
|
|
*
|
|
* Sequence (per MIPS ABI; arguments in arg registers, RA pushed to stack):
|
|
* 1. sp -= 8; sw $ra, 4($sp) ; save RA
|
|
* 2. $a0 = bios_flushcache (arg0)
|
|
* 3. $t0 = bios_table_addr ; t0 = &BIOS A-function table
|
|
* 4. jalr $t0, $ra ; call BIOS(flushcache)
|
|
* nop ; branch delay slot
|
|
* 5. lw $ra, 4($sp); jr $ra ; restore & return
|
|
* 6. sp += 8
|
|
*/
|
|
internal
|
|
Code CodeBlob_(mips_flush_icache) {
|
|
add_ui(rstack_ptr, rstack_ptr, -8) /* sp -= 8 */
|
|
, store_word(rret_addr, rstack_ptr, 4) /* sw $ra, 4($sp) */
|
|
, add_ui(rret_0, rdiscard, bios_flushcache) /* addiu $a0, $0, 0x44 */
|
|
, add_ui(rtmp_0, rdiscard, bios_table_addr) /* addiu $t0, $0, 0xA0 */
|
|
, jump_link(rtmp_0, rret_addr) /* jalr $t0, $ra */
|
|
, nop /* BD slot */
|
|
, load_word(rret_addr, rstack_ptr, 4) /* lw $ra, 4($sp) */
|
|
, jump_reg(rret_addr) /* jr $ra */
|
|
, add_ui(rstack_ptr, rstack_ptr, 8) /* sp += 8 (BD) */
|
|
};
|
|
I_ void mips_flush_icache(void) { C_(VoidFn*, code_mips_flush_icache)(); }
|
|
|
|
/* Standard clobber list for pure-MIPS asm volatile blocks: caller-saved
|
|
* GPRs that the kernel treats as volatile (v0/v1/t0/t1/ra) plus the
|
|
* "memory" barrier. The register ids are passed through `rlit` so
|
|
* the R_*_Code `#define`s are stringified into "$N" at expansion time. */
|
|
#define clb_system rlit(R_V0_Code), rlit(R_T0_Code), rlit(R_T1_Code), rlit(R_RA_Code), clb_mem_drain
|
|
|
|
#define asm_mips_flush_icache() asm volatile( asm_words( \
|
|
add_ui(rstack_ptr, rstack_ptr, -8) \
|
|
, store_word(rret_addr, rstack_ptr, 4) \
|
|
, add_ui(rret_0, rdiscard, bios_flushcache) \
|
|
, add_ui(rtmp_0, rdiscard, bios_table_addr) \
|
|
, jump_link(rtmp_0, rret_addr) \
|
|
, nop \
|
|
, load_word(rret_addr, rstack_ptr, 4) \
|
|
, jump_reg(rret_addr) \
|
|
, add_ui(rstack_ptr, rstack_ptr, 8) \
|
|
) asm_clobber: clb_system )
|
|
|
|
void test_mips_asm() {
|
|
asm_mips_flush_icache();
|
|
}
|