diff --git a/.gitignore b/.gitignore index 0411a49..538ee20 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ build toolchain/armips toolchain/pcsx-redux -toolchain/psyq_iwyu -toolchain/PSn00bSDK +# toolchain/psyq_iwyu +# toolchain/PSn00bSDK *.exe *.elf diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index cc2d581..28d863a 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -14,7 +14,7 @@ "INTELLISENSE_DIRECTIVES" ], "intelliSenseMode": "gcc-x86", - "compilerPath": "C:\\Users\\Ed\\AppData\\Roaming\\mips\\versions\\v14.2.0\\bin\\mipsel-none-elf-gcc.exe", + "compilerPath": "C:/Users/Ed/scoop/apps/gcc/current/bin/gcc.exe" } ], "version": 4 diff --git a/code/duffle/dsl.h b/code/duffle/dsl.h index c4c6b05..b0e4ba0 100644 --- a/code/duffle/dsl.h +++ b/code/duffle/dsl.h @@ -3,12 +3,17 @@ # include "assert.h" #endif -#define align_(value) __attribute__((aligned (value))) // for easy alignment -#define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path -#define finline static inline __attribute__((always_inline)) // force inline -#define no_inline static __attribute__((noinline)) // force no inline [used in thread api] -#define R_ __restrict // pointers are either restricted or volatile and nothing else -#define V_ volatile // pointers are either restricted or volatile and nothing else +#define LP_ static // local_persist +#define internal static // internal +#define global +#define gknown + +#define align_(value) __attribute__((aligned (value))) // for easy alignment +#define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path +#define FI_ static inline __attribute__((always_inline)) // force inline +#define NI_ static __attribute__((noinline)) // force no inline [used in thread api] +#define R_ __restrict // pointers are either restricted or volatile and nothing else +#define V_ volatile // pointers are either restricted or volatile and nothing else #define glue_impl(A, B) A ## B #define glue(A, B) glue_impl(A, B) @@ -16,12 +21,7 @@ #define stringify(S) stringify_impl(S) #define tmpl(prefix, type) prefix ## _ ## type -#define local_persist static -#define internal static -#define global -#define gknown - -#define offset_of(type, member) cast(SSIZE, & (((type*) 0)->member)) +#define offset_of(type, member) cast(U8,__builtin_offsetof(type,member)) #define static_assert _Static_assert #define typeof __typeof__ #define typeof_ptr(ptr) typeof((ptr)[0]) @@ -32,9 +32,16 @@ #define def_ptr_set(type) def_R_(type); typedef def_V_(type) #define def_tset(type) type; typedef def_ptr_set(type) -typedef __UINT8_TYPE__ def_tset(U1); typedef __UINT16_TYPE__ def_tset(U2); typedef __UINT32_TYPE__ def_tset(U4); -typedef __INT8_TYPE__ def_tset(S1); typedef __INT16_TYPE__ def_tset(S2); typedef __INT32_TYPE__ def_tset(S4); -typedef unsigned char def_tset(B1); typedef __UINT16_TYPE__ def_tset(B2); typedef __UINT32_TYPE__ def_tset(B4); +typedef __UINT8_TYPE__ def_tset(U1); +typedef __UINT16_TYPE__ def_tset(U2); +typedef __UINT32_TYPE__ def_tset(U4); +typedef __INT8_TYPE__ def_tset(S1); +typedef __INT16_TYPE__ def_tset(S2); +typedef __INT32_TYPE__ def_tset(S4); +typedef unsigned char def_tset(B1); +typedef __UINT16_TYPE__ def_tset(B2); +typedef __UINT32_TYPE__ def_tset(B4); +typedef __UINT64_TYPE__ def_tset(B8); enum { false = 0, true = 1, true_overflow, }; #define u1_r(value) cast(U1_R, value) @@ -75,6 +82,8 @@ enum { false = 0, true = 1, true_overflow, }; #define r_(ptr) cast(typeof_ptr(ptr)*R_, ptr) #define v_(ptr) cast(typeof_ptr(ptr)*V_, ptr) +#define tr_(type, ptr) cast(type*R_, ptr) +#define tv_(type, ptr) cast(type*V_, ptr) #define kilo(n) (cast(U4, n) << 10) #define mega(n) (cast(U4, n) << 20) @@ -87,7 +96,7 @@ enum { false = 0, true = 1, true_overflow, }; #define sop_2(op, a, b) cast(U2, s2_(a) op s2_(b)) #define sop_4(op, a, b) cast(U4, s4_(a) op s4_(b)) -#define def_signed_op(id, op, width) finline U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); } +#define def_signed_op(id, op, width) FI_ U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); } #define def_signed_ops(id, op) def_signed_op(id, op, 1) def_signed_op(id, op, 2) def_signed_op(id, op, 4) def_signed_ops(add, +) def_signed_ops(sub, -) def_signed_ops(mut, *) def_signed_ops(div, /) @@ -103,13 +112,13 @@ def_signed_ops(ge, >=) def_signed_ops(le, <=) #define ge_s(a,b) def_generic_sop(ge, a,b) #define le_s(a,b) def_generic_sop(le, a,b) -#define span_iter(type, iter, m_begin, op, m_end) \ - tmpl(Iter_Span,type) iter = { \ - .r = {(m_begin), (m_end)}, \ - .cursor = (m_begin) }; \ - iter.cursor op iter.r.end; \ - ++ iter.cursor - +#define span_iter(type, iter, m_begin, op, m_end) ( \ + tmpl(Iter_Span,type) iter = { \ + .r = {(m_begin), (m_end)}, \ + .cursor = (m_begin) }; \ + iter.cursor op iter.r.end; \ + ++ iter.cursor \ +) #define def_span(type) \ def_struct(tmpl( Span,type)) { type begin; type end; }; \ typedef def_struct(tmpl(Iter_Span,type)) { tmpl(Span,type) r; type cursor; } @@ -127,7 +136,7 @@ typedef def_struct(Slice_Str8) { Str8* ptr; U4 len; }; #define def_Slice(type) def_struct(tmpl(Slice,type)) { type* ptr; U4 len; } #define slice_assert(slice) do { assert((slice).ptr != nullptr); assert((slice).len > 0); } while(0) #define slice_end(slice) ((slice).ptr + (slice).len) -#define size_of_slice_type(slice) size_of( * (slice).ptr ) +#define size_of_slice_type(slice) size_of((slice).ptr[0]) typedef def_Slice(void); typedef def_Slice(B1); @@ -142,10 +151,11 @@ void slice__zero(Slice_B1 mem, U4 typewidth); } while (0) #define slice_zero(slice) slice__zero(slice_byte(slice), size_of_slice_type(slice)) -#define slice_iter(container, iter) \ +#define slice_iter(container, iter) ( \ typeof((container).ptr) iter = (container).ptr; \ iter != slice_end(container); \ - ++ iter + ++ iter \ +) #define slice_from_farray(type, ...) & (tmpl(Slice,type)) { \ .ptr = farray_init(type, __VA_ARGS__), \ .len = farray_len( farray_init(type, __VA_ARGS__)) \ diff --git a/code/duffle/math.h b/code/duffle/math.h index 0238ea8..2703776 100644 --- a/code/duffle/math.h +++ b/code/duffle/math.h @@ -16,10 +16,14 @@ typedef S2 A3A3_S2[3][3]; typedef def_struct(Extent2_S2) { S2 width; S2 height; }; typedef def_struct(Extent2_S4) { S4 width; S4 height; }; + typedef def_struct(V2_S2) { S2 x; S2 y; }; typedef def_struct(V2_S4) { S4 x; S4 y; }; typedef def_struct(V3_S2) { S2 x; S2 y; S2 z; S2 pad; }; typedef def_struct(V3_S4) { S4 x; S4 y; S4 z; S4 pad; }; +typedef def_struct(V4_S2) { S2 x; S2 y; S2 z; S2 w; }; +typedef def_struct(V4_S4) { S4 x; S4 y; S4 z; S4 w; }; + typedef def_struct(R2_S2) { V2_S2 p0; V2_S2 p1; }; typedef def_struct(R2_S4) { V2_S4 p0; V2_S4 p1; }; @@ -28,6 +32,8 @@ typedef def_struct(Rect_S4) { S4 x; S4 y; S4 width; S4 height; }; typedef def_struct(M3_S2) { A3A3_S2 m; A3_S4 t; }; -#define v2s2(x,y) (V2_S2){x,y} -#define v3s2(x,y,z) (V3_S2){x,y,z} -#define v3s4(x,y,z) (V3_S4){x,y,z} +#define v2s2(x,y) (V2_S2){x,y} +#define v3s2(x,y,z) (V3_S2){x,y,z} +#define v3s4(x,y,z) (V3_S4){x,y,z} +#define v4s2(x,y,z,w) (V4_S2){x,y,z,w} +#define v4s4(x,y,z,w) (V4_S4){x,y,z,w} diff --git a/code/duffle/memory.h b/code/duffle/memory.h index 27dcc38..76d1268 100644 --- a/code/duffle/memory.h +++ b/code/duffle/memory.h @@ -94,8 +94,7 @@ struct AllocatorProc_Out { U4 left; // Contiguous memory left U4 max_alloc; U4 min_alloc; - B4 continuity_break; // Whether this allocation broke continuity with the previous (address space wise) - byte_pad(4); + // byte_pad(8); }; typedef def_struct(AllocatorInfo) { AllocatorProc* proc; @@ -108,8 +107,7 @@ typedef def_struct(AllocatorQueryInfo) { U4 left; // Contiguous memory left U4 max_alloc; U4 min_alloc; - B4 continuity_break; // Whether this allocation broke continuity with the previous (address space wise) - byte_pad(4); + // byte_pad(4); }; static_assert(size_of(AllocatorProc_Out) == size_of(AllocatorQueryInfo)); diff --git a/code/graphics_hello_psyq/hello_gpu.c b/code/graphics_hello_psyq/hello_gpu.c index e8e5538..d4d684f 100644 --- a/code/graphics_hello_psyq/hello_gpu.c +++ b/code/graphics_hello_psyq/hello_gpu.c @@ -11,6 +11,8 @@ #include "duffle/gp.h" #include "hello_gpu.h" +#define GTE_Coprocessor_Chapter 1 + typedef def_farray(V2_S2, 3); typedef def_struct(Poly_F3) { U4 tag; @@ -73,12 +75,22 @@ typedef def_struct(PrimitiveArena) { U4 used; }; -#define Cube_num_verts 8 -#define Cube_num_faces 12 -typedef def_farray(V3_S2, Cube_num_verts); -typedef def_farray(V3_S2, Cube_num_faces); +#define GTE_Coprocessor_UseQuads 1 +#define GTE_Coprocessor_UseTris 0 -void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) { +#define Cube_num_verts 8 +typedef def_farray(V3_S2, Cube_num_verts); +#if GTE_Coprocessor_UseTris +#define Cube_num_faces 12 +typedef def_farray(V3_S2, Cube_num_faces) +typedef A12_V3_S2 ACubeFaces; +#endif +#if GTE_Coprocessor_UseQuads +#define Cube_num_faces 6 +typedef def_farray(V4_S2, Cube_num_faces); +typedef A6_V4_S2 ACubeFaces; +#endif +void cube128_init(A8_V3_S2* verts, ACubeFaces* faces) { memory_copy(verts, & (A8_V3_S2) { { -128, -128, -128 }, { 128, -128, -128 }, @@ -91,6 +103,7 @@ void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) { }, size_of(A8_V3_S2) ); + #if GTE_Coprocessor_UseTris memory_copy(faces, & (A12_V3_S2) { { 0, 3, 2 }, // top { 0, 2, 1 }, // top @@ -107,6 +120,19 @@ void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) { }, size_of(A12_V3_S2) ); + #endif + #if GTE_Coprocessor_UseQuads + memory_copy(faces, & (A6_V4_S2) { + { 3, 2, 0, 1 }, + { 0, 1, 4, 5 }, + { 4, 5, 7, 6 }, + { 1, 2, 5, 6 }, + { 2, 3, 6, 7 }, + { 3, 0, 7, 4 }, + }, + sizeof(A6_V4_S2) + ); + #endif return; } @@ -122,8 +148,8 @@ typedef def_struct(SMemory) { M3_S2 tform_world; - A8_V3_S2 cube_verts; - A12_V3_S2 cube_faces; + A8_V3_S2 cube_verts; + ACubeFaces cube_faces; }; global SMemory static_mem; extern SMemory static_mem; @@ -195,7 +221,7 @@ void update(PrimitiveArena* pa, U4* ordering_buf) gte_matrix_set_rotation (& static_mem.tform_world); gte_matrix_set_translation(& static_mem.tform_world); -#if 1 +#if GTE_Coprocessor_Chapter S4 nclip = 0; S4 orderingtbl_z = 0; A2_S2 p; //??? @@ -203,6 +229,7 @@ void update(PrimitiveArena* pa, U4* ordering_buf) for (U4 face_id = 0; face_id < Cube_num_faces; face_id += 1) { + #if GTE_Coprocessor_UseTris Poly_G3* tri = prim_alloc(Poly_G3); set_poly_g3(tri); tri->c0 = rgb8(255, 0, 255); tri->c1 = rgb8(255, 255, 0); @@ -231,8 +258,34 @@ void update(PrimitiveArena* pa, U4* ordering_buf) if ((orderingtbl_z > 0) && (orderingtbl_z < OrderingTbl_Len)) { orderingtbl_add_primitive(ordering_buf[orderingtbl_z], tri); } - } + #endif + #if GTE_Coprocessor_UseQuads + Poly_G4* quad = prim_alloc(Poly_G4); set_poly_g4(quad); + quad->c0 = rgb8(255, 0, 255); + quad->c1 = rgb8(255, 255, 0); + quad->c2 = rgb8( 0, 255, 255); + quad->c3 = rgb8( 0, 255, 0); + V4_S2* face = & static_mem.cube_faces[face_id]; + V3_S2* p0 = & static_mem.cube_verts[face->x]; + V3_S2* p1 = & static_mem.cube_verts[face->y]; + V3_S2* p2 = & static_mem.cube_verts[face->z]; + V3_S2* p3 = & static_mem.cube_verts[face->w]; + + nclip = rtp_avg_nclip_a4_v3s2( + p0, p1, p2, p3, + & quad->p0, & quad->p1, & quad->p2, & quad->p3, + & p, & orderingtbl_z, & flag + ); + if (nclip <= 0) { + continue; + } + + if ((orderingtbl_z > 0) && (orderingtbl_z < OrderingTbl_Len)) { + orderingtbl_add_primitive(ordering_buf[orderingtbl_z], quad); + } + #endif + } static_mem.rotation.x += 6; static_mem.rotation.y += 8; static_mem.rotation.z += 12; diff --git a/code/graphics_hello_psyq/hello_gpu.h b/code/graphics_hello_psyq/hello_gpu.h index f82ec78..dfb4993 100644 --- a/code/graphics_hello_psyq/hello_gpu.h +++ b/code/graphics_hello_psyq/hello_gpu.h @@ -117,10 +117,10 @@ M3_S2* m3s2_scale (M3_S2* mat, V3_S4* vec) __asm__("ScaleMatrix"); // Rotation, Translation, Perspective S4 rtp_v3s2_raw(V3_S2* vec, S4* xy, S4* pp, S4* flag) __asm__("RotTransPers"); -finline S4 rtp_v3s2(V3_S2* vec, V2_S2* xy, A2_S2* pp, S4* flag) { return rtp_v3s2_raw(vec, cast(S4*R_, & xy->x), cast(S4*R_, pp), r_(flag)); } +FI_ S4 rtp_v3s2(V3_S2* vec, V2_S2* xy, A2_S2* pp, S4* flag) { return rtp_v3s2_raw(vec, cast(S4*R_, & xy->x), cast(S4*R_, pp), r_(flag)); } S4 rtp_avg_nclip_a3_v3s2_raw(V3_S2* v0, V3_S2* v1, V3_S2* v2, S4* xy1, S4* xy2, S4* xy3, S4* pp, S4* otz, S4* flag) __asm__("RotAverageNclip3"); -finline S4 rtp_avg_nclip_a3_v3s2( +FI_ S4 rtp_avg_nclip_a3_v3s2( V3_S2* v0, V3_S2* v1, V3_S2* v2, V2_S2* xy0, V2_S2* xy1, V2_S2* xy2, A2_S2* pp, S4* otz, S4* flag @@ -132,6 +132,19 @@ finline S4 rtp_avg_nclip_a3_v3s2( ); } +S4 rtp_avg_nclip_a4_v3s2_raw(V3_S2* v0, V3_S2* v1, V3_S2* v2, V3_S2* v3, S4* xy1, S4* xy2, S4* xy3, S4* xy4, S4* pp, S4* otz, S4* flag) __asm__("RotAverageNclip4"); +FI_ S4 rtp_avg_nclip_a4_v3s2( + V3_S2* v0, V3_S2* v1, V3_S2* v2, V3_S2* v3, + V2_S2* xy0, V2_S2* xy1, V2_S2* xy2, V2_S2* xy3, + A2_S2* pp, S4* otz, S4* flag +){ + return rtp_avg_nclip_a4_v3s2_raw( + v0, v1, v2, v3, + cast(S4*R_, xy0), cast(S4*R_, xy1), cast(S4*R_, xy2), cast(S4*R_, xy3), + cast(S4*R_, pp), cast(S4*R_, otz), cast(S4*R_, flag) + ); +} + void gte_matrix_set_rotation (M3_S2* mat) __asm__("SetRotMatrix"); void gte_matrix_set_translation(M3_S2* mat) __asm__("SetTransMatrix"); diff --git a/scripts/build_psyq.ps1 b/scripts/build_psyq.ps1 index bd4e4b0..6872d39 100644 --- a/scripts/build_psyq.ps1 +++ b/scripts/build_psyq.ps1 @@ -30,9 +30,11 @@ $f_wall = "-Wall" $f_wno_attributes = "-Wno-attributes" # Optimization Flags -$f_optimize_none = "-O0" # For Debug builds -$f_optimize_size = "-Os" # For Release builds -$f_omit_frame_ptr = "-fomit-frame-pointer" +$f_optimize_none = "-O0" +$f_optimize_size = "-Os" +$f_optimize_intrinsics = "-Oi" +$f_optimize_debug = "-Og" +$f_omit_frame_ptr = "-fomit-frame-pointer" # Environment & Standard Library Flags $f_no_stdlib = "-nostdlib" @@ -289,8 +291,10 @@ function build-graphis_hello { $compile_args = @() $compile_args += $f_debug - $compile_args += $f_optimize_none + # $compile_args += $f_optimize_none + # $compile_args += $f_optimize_intrinsics # $compile_args += $f_optimize_size + $compile_args += $f_optimize_debug $compile_args += ($f_include + $path_code) compile-unit $src_c $module_c $includes $compile_args