From 690d184acfd24e450ebf15f92e76966dd57dfe53 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Mon, 1 Jun 2026 21:50:50 -0400 Subject: [PATCH] PITA --- code/duffle/gte.h | 67 +++++++++------ code/duffle/mips.h | 25 +++--- code/gte_hello/hello_gte.s | 170 ------------------------------------- scripts/build_psyq.ps1 | 8 +- 4 files changed, 54 insertions(+), 216 deletions(-) delete mode 100644 code/gte_hello/hello_gte.s diff --git a/code/duffle/gte.h b/code/duffle/gte.h index 052c9ce..9e3ac4a 100644 --- a/code/duffle/gte.h +++ b/code/duffle/gte.h @@ -199,43 +199,54 @@ enum { _C2_OPS_ = 0 #define gte_lwc2_v2_RT4 enc_cop2_lwc2(gte_in_v2_xy, R_T4, 0) #define gte_lwc2_v2z_RT4 enc_cop2_lwc2(gte_in_v2_z, R_T4, 4) -/* The actual call-site macros — zero string syntax. The asm_blob wrapper - * (defined in gcc_asm.h) handles the asm volatile (...) envelope, and the - * colon-prefixed clobber/input sections slot in cleanly: +/* The actual call-site macros — zero string syntax in the .word body. * - * asm_blob( , ) + * The "r"(r_ptr) input constraint is the irreducible GCC-syntax bit: the + * base register of lwc2 is a runtime value, so the compiler must allocate + * one for us. The "$12" clobber + the .word constants having rs=R_T4 ($12) + * hardwired form the "placeholder-pun" — GCC is forced to bind r_ptr to + * $12, which is exactly the register the .word constants expect. * - * The placeholder-pun: the .word constants have rs=R_T4 ($12) hardwired, - * and the "$12" clobber + "r"(r_ptr) input forces GCC to bind the pointer - * to $12, which is exactly the register the constants expect. */ + * Uses asm_volatile_4(code, outs, ins, clb) from gcc_asm.h. + * asm_inline(...) produces the 2-colon code:outputs:inputs body + * "r"(r_ptr) is the runtime-input section body + * "$2", ..., "$12" is the clobber section body + * asm_volatile_4() joins them with 3 colons and wraps in asm volatile + */ #define gte_load_v0(r_ptr) \ - asm volatile( \ - asm_inline( gte_lwc2_v0_RT4, gte_lwc2_v0z_RT4 ), \ - asm_clobber( clb_system, "$12" ) \ - : : "r"(r_ptr) ) + asm_volatile_4( \ + (asm_inline( gte_lwc2_v0_RT4, gte_lwc2_v0z_RT4 )), \ + (), \ + ("r"(r_ptr)), \ + ("$2", "$8", "$9", "$31", "memory", "$12") \ + ) #define gte_load_v1(r_ptr) \ - asm volatile( \ - asm_inline( gte_lwc2_v1_RT4, gte_lwc2_v1z_RT4 ), \ - asm_clobber( clb_system, "$12" ) \ - : : "r"(r_ptr) ) + asm_volatile_4( \ + (asm_inline( gte_lwc2_v1_RT4, gte_lwc2_v1z_RT4 )), \ + (), \ + ("r"(r_ptr)), \ + ("$2", "$8", "$9", "$31", "memory", "$12") \ + ) #define gte_load_v2(r_ptr) \ - asm volatile( \ - asm_inline( gte_lwc2_v2_RT4, gte_lwc2_v2z_RT4 ), \ - asm_clobber( clb_system, "$12" ) \ - : : "r"(r_ptr) ) + asm_volatile_4( \ + (asm_inline( gte_lwc2_v2_RT4, gte_lwc2_v2z_RT4 )), \ + (), \ + ("r"(r_ptr)), \ + ("$2", "$8", "$9", "$31", "memory", "$12") \ + ) -/* All three at once — the canonical prelude to gte_cmd_rtpt. */ +/* All three at once -- the canonical prelude to gte_cmd_rtpt. */ #define gte_load_v0v1v2(r_ptr) \ - asm volatile( \ - asm_inline( \ - gte_lwc2_v0_RT4, gte_lwc2_v0z_RT4, \ - gte_lwc2_v1_RT4, gte_lwc2_v1z_RT4, \ - gte_lwc2_v2_RT4, gte_lwc2_v2z_RT4 \ - ), \ - asm_clobber( clb_system, "$12" ) \ - : : "r"(r_ptr) ) + asm_volatile_4( \ + (asm_inline( gte_lwc2_v0_RT4, gte_lwc2_v0z_RT4, \ + gte_lwc2_v1_RT4, gte_lwc2_v1z_RT4, \ + gte_lwc2_v2_RT4, gte_lwc2_v2z_RT4 )), \ + (), \ + ("r"(r_ptr)), \ + ("$2", "$8", "$9", "$31", "memory", "$12") \ + ) /** * @brief Loads a single SVECTOR to GTE vector register V1 diff --git a/code/duffle/mips.h b/code/duffle/mips.h index 6210df9..42d34e6 100644 --- a/code/duffle/mips.h +++ b/code/duffle/mips.h @@ -240,20 +240,17 @@ FI_ void mips_flush_icache(void) { C_(VoidFn*, codeblob_mips_flush_icache)(); } #define clb_system "$2", "$8", "$9", "$31", "memory" -#define asm_mips_flush_icache() asm volatile( \ - asm_inline( \ - add_ui(rstack_ptr, rstack_ptr, -8) \ - , store_word(rret_addr, rstack_ptr, 4) \ - , add_ui(rret_0, rdiscard, bios_flushcache) \ - , add_ui(rtmp_0, rdiscard, bios_table_addr) \ - , jump_link(rtmp_0, rret_addr) \ - , nop() \ - , load_word(rret_addr, rstack_ptr, 4) \ - , jump_reg(rret_addr) \ - , add_ui(rstack_ptr, rstack_ptr, 8) \ - ) \ - asm_clobber( clb_system ) \ -) +#define asm_mips_flush_icache() asm_volatile( asm_inline( \ + add_ui(rstack_ptr, rstack_ptr, -8) \ + , store_word(rret_addr, rstack_ptr, 4) \ + , add_ui(rret_0, rdiscard, bios_flushcache) \ + , add_ui(rtmp_0, rdiscard, bios_table_addr) \ + , jump_link(rtmp_0, rret_addr) \ + , nop() \ + , load_word(rret_addr, rstack_ptr, 4) \ + , jump_reg(rret_addr) \ + , add_ui(rstack_ptr, rstack_ptr, 8) \ +), clb_system ) void test_mips_asm() { asm_mips_flush_icache(); diff --git a/code/gte_hello/hello_gte.s b/code/gte_hello/hello_gte.s deleted file mode 100644 index 1712b3a..0000000 --- a/code/gte_hello/hello_gte.s +++ /dev/null @@ -1,170 +0,0 @@ -// .include "./toolchain/pcsx-redux/src/mips/common/crt0/crt0.s" - -.include "./asmdd/dsl.s" -.include "./asmdd/math.s" -.include "./asmdd/io.s" -.include "./asmdd/gp.s" - -# DrawEnv_Packed { U4 tag; U4 code[15]; } -.equ DrawEnv_Packed_tag, 0 -.equ DrawEnv_Packed_code, DrawEnv_Packed_tag + U4 -.equ DrawEnv_Packed, 64 -# DrawEnv { Rect_S2 clip; V2_S2 ofs; Rect_S2 tw; U2 tpage; U8 dtd; U8 dfe; U8 tme; U8 r0,g0,b0; DR_ENV dr_env; } -.equ DrawEnv_clip_area, /* 0 */ Rect_S2 * 0 -.equ DrawEnv_drawing_offset, /* 8 */ V2_S2 * 0 + Rect_S2 -.equ DrawEnv_texture_window, /* 12 */ Rect_S2 * 0 + A2_S2 + DrawEnv_drawing_offset -.equ DrawEnv_texture_page, /* 20 */ S1 * 0 + Rect_S2 + DrawEnv_texture_window -.equ DrawEnv_flag_dither, /* 22 */ B1 * 0 + S2 + DrawEnv_texture_page -.equ DrawEnv_flag_draw_on_display, /* 23 */ B1 * 0 + B1 + DrawEnv_flag_dither -.equ DrawEnv_enable_auto_clear, /* 24 */ B1 * 0 + B1 + DrawEnv_flag_draw_on_display -.equ DrawEnv_initial_bg_color, /* 25 */ RGB8 * 0 + B1 + DrawEnv_enable_auto_clear -.equ DrawEnv_dr_env, /* 28 */ DrawEnv_Packed * 0 + RGB8 + DrawEnv_initial_bg_color -.equ DrawEnv, /* 92 */ DrawEnv_dr_env + DrawEnv_Packed -# DisplayEnv { Rect_S16 disp; Rect_S16 screen; U8 isinter; U8 isrgb24; U8 pad[2]; } -.equ DisplayEnv_display_area, Rect_S2 * 0 -.equ DisplayEnv_screen, Rect_S2 * 0 + Rect_S2 + DisplayEnv_display_area -.equ DisplayEnv_vinterlace, B1 * 0 + Rect_S2 + DisplayEnv_screen -.equ DisplayEnv_color24, B1 * 0 + B1 + DisplayEnv_vinterlace -.equ DisplayEnv_pad0, B1 * 0 + B1 + DisplayEnv_color24 -.equ DisplayEnv_pad1, B1 * 0 + B1 + DisplayEnv_pad0 -.equ DisplayEnv, DisplayEnv_pad1 + B1 -# DoubleBuffer { DrawEnv draw[2]; DisplayEnv display[2]; } -.equ DoubleBuffer_draw, 0 -.equ DoubleBuffer_draw_0, (DrawEnv * 0) -.equ DoubleBuffer_draw_1, (DrawEnv * 1) -.equ DoubleBuffer_display, (DrawEnv * 2) -.equ DoubleBuffer_display_0, (DisplayEnv * 0) + DoubleBuffer_display -.equ DoubleBuffer_display_1, (DisplayEnv * 1) + DoubleBuffer_display -.equ DoubleBuffer, (DisplayEnv * 2) + DoubleBuffer_display -# Screen Constants -.equ ScreenRes_X, 320 -.equ ScreenRes_Y, 240 -.equ ScreenRes_CenterX, (ScreenRes_X >> 1) -.equ ScreenRes_CenterY, (ScreenRes_Y >> 1) - -.equ SMemory_screen_buf, DoubleBuffer * 0 -.equ SMemory_active_screen_buf, S2 * 0 + DoubleBuffer - -.equ CF_Shadow, 16 - -.extern ResetGraph -.equ ResetGraph_mode, rarg_0 - -.extern SetDispMask -.equ SetDispMask_mask, rarg_0 - -.extern PutDispEnv -.extern PutDrawEnv -.equ PutDispEnv_env, rarg_0 -.equ PutDrawEnv_env, rarg_0 - -.extern SetDefDispEnv -.equ SetDefDispEnv_env, rarg_0 -.equ SetDefDispEnv_x, rarg_1 -.equ SetDefDispEnv_y, rarg_2 -.equ SetDefDispEnv_w, rarg_3 -.equ SetDefDispEnv_h, CF_Shadow -.set SetDefDispEnv_sp_size, CF_Shadow + S4 - -.extern SetDefDrawEnv -.equ SetDefDrawEnv_env, rarg_0 -.equ SetDefDrawEnv_x, rarg_1 -.equ SetDefDrawEnv_y, rarg_2 -.equ SetDefDrawEnv_w, rarg_3 -.equ SetDefDrawEnv_h, CF_Shadow -.set SetDefDrawEnv_sp_size, CF_Shadow + S4 - -.extern SetGeomOffset -.equ SetGeomOffset_x, rarg_0 -.equ SetGeomOffset_y, rarg_1 - -.extern SetGeomScreen -.equ SetGeomScreen_h, rarg_0 - -.global gp_screen_init_asm -.type gp_screen_init_asm, @function -gp_screen_init_asm: - .equiv rio_offset, rtmp_0 - load_imm rtmp_0, IO_BASE_ADDR - #define gp0 gpio_port0(rio_offset) - #define gp1 gpio_port1(rio_offset) - - def_cf_sp_size 0x18; // Should be enough for all calls within this proc, for some reason SetDefDispEnv needs the offset to be CF_Shadow.. - stack_alloc cf_ssize - store_word rret_addr, 0($sp) - - // Note(Ed): Cannot be used psyq manages things related to vblank and other things so the api must be called instead - // gcmd_push gp1, rtmp_1, gp_Reset // ResetGraph(0) - // gcmd_push gp1, rtmp_1, gp_DisplayEnabled // SetDispMask(1) - load_imm ResetGraph_mode, gp_Reset; jump_nlink ResetGraph - load_imm SetDispMask_mask, 1; jump_nlink SetDispMask - - // First buffer area - load_addr rtmp_0, static_mem; add_ui SetDefDispEnv_env, rtmp_0, SMemory_screen_buf + DoubleBuffer_display_0 - move SetDefDispEnv_x, $zero - move SetDefDispEnv_y, $zero - load_imm SetDefDispEnv_w, ScreenRes_X - load_imm rtmp_0, ScreenRes_Y; store_word rtmp_0, SetDefDispEnv_h($sp) - jump_nlink SetDefDispEnv - load_addr rtmp_0, static_mem; add_ui SetDefDrawEnv_env, rtmp_0, SMemory_screen_buf + DoubleBuffer_draw_0 - move SetDefDrawEnv_x, $zero - load_imm SetDefDrawEnv_y, ScreenRes_Y - load_imm SetDefDrawEnv_w, ScreenRes_X - load_imm rtmp_0, ScreenRes_Y; store_word rtmp_0, SetDefDrawEnv_h($sp) - jump_nlink SetDefDrawEnv - // Second buffer area - load_addr rtmp_0, static_mem; add_ui SetDefDispEnv_env, rtmp_0, SMemory_screen_buf + DoubleBuffer_display_1 - move SetDefDispEnv_x, $zero - load_imm SetDefDispEnv_y, ScreenRes_Y - load_imm SetDefDispEnv_w, ScreenRes_X - load_imm rtmp_0, ScreenRes_Y; store_word rtmp_0, SetDefDispEnv_h($sp) - jump_nlink SetDefDispEnv - load_addr rtmp_0, static_mem; add_ui SetDefDrawEnv_env, rtmp_0, SMemory_screen_buf + DoubleBuffer_draw_1 - move SetDefDrawEnv_x, $zero - move SetDefDrawEnv_y, $zero - load_imm SetDefDrawEnv_w, ScreenRes_X - load_imm rtmp_0, ScreenRes_Y; store_word rtmp_0, SetDefDrawEnv_h($sp) - jump_nlink SetDefDrawEnv - - // Set the back/drawing buffer - load_imm rtmp_1, true - load_addr rtmp_0, static_mem; // At SMemory_screen_buf - store_word rtmp_1, DoubleBuffer_draw_0 + DrawEnv_enable_auto_clear(rtmp_0) - store_word rtmp_1, DoubleBuffer_draw_1 + DrawEnv_enable_auto_clear(rtmp_0) - - // Set background clear color - load_imm rtmp_1, 28; load_imm rtmp_2, 22; load_imm rtmp_3, 25 - // 63, 0, 127 - store_byte rtmp_2, DoubleBuffer_draw_0 + DrawEnv_initial_bg_color + RGB8_r(rtmp_0) - store_byte rtmp_1, DoubleBuffer_draw_0 + DrawEnv_initial_bg_color + RGB8_g(rtmp_0) - store_byte rtmp_3, DoubleBuffer_draw_0 + DrawEnv_initial_bg_color + RGB8_b(rtmp_0) - // 127, 63, 0 - store_byte rtmp_3, DoubleBuffer_draw_1 + DrawEnv_initial_bg_color + RGB8_r(rtmp_0) - store_byte rtmp_2, DoubleBuffer_draw_1 + DrawEnv_initial_bg_color + RGB8_g(rtmp_0) - store_byte rtmp_1, DoubleBuffer_draw_1 + DrawEnv_initial_bg_color + RGB8_b(rtmp_0) - load_addr rtmp_0, static_mem; store_word rtmp_1, SMemory_active_screen_buf(rtmp_0) - - load_addr rtmp_1, static_mem; load_half rtmp_1, SMemory_active_screen_buf(rtmp_1); // rtmp_1 = active_screen_buffer - load_imm rtmp_2, DisplayEnv; mult_u rtmp_1, rtmp_2; mov_from_low rtmp_2 // rtmp_2 = DisplayEnv.type_size * active_screen_Buffer (rtmp_1) - add_ui rtmp_2, rtmp_2, DoubleBuffer_display // rtmp_2 += DoubleBuffer.display - load_addr rtmp_0, static_mem; add_u PutDispEnv_env, rtmp_0, rtmp_2 // rarg_0 = rtmp_0 (screen_buffer) + rtmp_2 (.display[active_screen-buffer]) - jump_nlink PutDispEnv - load_addr rtmp_1, static_mem; load_half rtmp_1, SMemory_active_screen_buf(rtmp_1); - load_imm rtmp_2, DrawEnv; mult_u rtmp_1, rtmp_2; mov_from_low rtmp_2; - add_ui rtmp_2, rtmp_2, DoubleBuffer_draw - load_addr rtmp_0, static_mem; add_u PutDrawEnv_env, rtmp_0, rtmp_2 - jump_nlink PutDrawEnv - - // Initialize and setup the GTE geometry offsets - jump_nlink InitGeom - load_imm SetGeomOffset_x, ScreenRes_CenterX - load_imm SetGeomOffset_y, ScreenRes_CenterY - jump_nlink SetGeomOffset - load_imm SetGeomScreen_h, ScreenRes_CenterX - jump_nlink SetGeomScreen - - load_word rret_addr, 0($sp) - stack_release cf_ssize - jump_reg rret_addr; -.Lgp_screen_init_end: -.size gp_screen_init_asm, . - gp_screen_init_asm diff --git a/scripts/build_psyq.ps1 b/scripts/build_psyq.ps1 index 96f57c2..fecafd3 100644 --- a/scripts/build_psyq.ps1 +++ b/scripts/build_psyq.ps1 @@ -325,10 +325,10 @@ function build-gte_hello { $module_asm_crt = join-path $path_build 'crt0.o' # assemble-unit $src_asm_crt $module_asm_crt $includes $assemble_args - $src_asm = join-path $path_module 'hello_gte.s' - $module_asm = join-path $path_build 'hello_gte.o' + # $src_asm = join-path $path_module 'hello_gte.s' + # $module_asm = join-path $path_build 'hello_gte.o' - assemble-unit $src_asm $module_asm $includes $assemble_args + # assemble-unit $src_asm $module_asm $includes $assemble_args $src_c = join-path $path_module 'hello_gte.c' $module_c = join-path $path_build 'hello_gte_c.o' @@ -348,7 +348,7 @@ function build-gte_hello { $link_args = @() $link_args += $f_debug # $link_args += $f_optimize_size - link-modules @($module_asm_crt, $module_asm, $module_c) $elf $link_args + link-modules @($module_asm_crt, $module_c) $elf $link_args make-binary $elf $exe } build-gte_hello