Finish chapter: The GTE Coprocessor.

This commit is contained in:
2026-03-28 12:51:29 -04:00
parent b846c697c6
commit d1919137a3
8 changed files with 135 additions and 51 deletions
+2 -2
View File
@@ -1,8 +1,8 @@
build build
toolchain/armips toolchain/armips
toolchain/pcsx-redux toolchain/pcsx-redux
toolchain/psyq_iwyu # toolchain/psyq_iwyu
toolchain/PSn00bSDK # toolchain/PSn00bSDK
*.exe *.exe
*.elf *.elf
+1 -1
View File
@@ -14,7 +14,7 @@
"INTELLISENSE_DIRECTIVES" "INTELLISENSE_DIRECTIVES"
], ],
"intelliSenseMode": "gcc-x86", "intelliSenseMode": "gcc-x86",
"compilerPath": "C:\\Users\\Ed\\AppData\\Roaming\\mips\\versions\\v14.2.0\\bin\\mipsel-none-elf-gcc.exe", "compilerPath": "C:/Users/Ed/scoop/apps/gcc/current/bin/gcc.exe"
} }
], ],
"version": 4 "version": 4
+28 -18
View File
@@ -3,10 +3,15 @@
# include "assert.h" # include "assert.h"
#endif #endif
#define LP_ static // local_persist
#define internal static // internal
#define global
#define gknown
#define align_(value) __attribute__((aligned (value))) // for easy alignment #define align_(value) __attribute__((aligned (value))) // for easy alignment
#define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path #define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path
#define finline static inline __attribute__((always_inline)) // force inline #define FI_ static inline __attribute__((always_inline)) // force inline
#define no_inline static __attribute__((noinline)) // force no inline [used in thread api] #define NI_ static __attribute__((noinline)) // force no inline [used in thread api]
#define R_ __restrict // pointers are either restricted or volatile and nothing else #define R_ __restrict // pointers are either restricted or volatile and nothing else
#define V_ volatile // pointers are either restricted or volatile and nothing else #define V_ volatile // pointers are either restricted or volatile and nothing else
@@ -16,12 +21,7 @@
#define stringify(S) stringify_impl(S) #define stringify(S) stringify_impl(S)
#define tmpl(prefix, type) prefix ## _ ## type #define tmpl(prefix, type) prefix ## _ ## type
#define local_persist static #define offset_of(type, member) cast(U8,__builtin_offsetof(type,member))
#define internal static
#define global
#define gknown
#define offset_of(type, member) cast(SSIZE, & (((type*) 0)->member))
#define static_assert _Static_assert #define static_assert _Static_assert
#define typeof __typeof__ #define typeof __typeof__
#define typeof_ptr(ptr) typeof((ptr)[0]) #define typeof_ptr(ptr) typeof((ptr)[0])
@@ -32,9 +32,16 @@
#define def_ptr_set(type) def_R_(type); typedef def_V_(type) #define def_ptr_set(type) def_R_(type); typedef def_V_(type)
#define def_tset(type) type; typedef def_ptr_set(type) #define def_tset(type) type; typedef def_ptr_set(type)
typedef __UINT8_TYPE__ def_tset(U1); typedef __UINT16_TYPE__ def_tset(U2); typedef __UINT32_TYPE__ def_tset(U4); typedef __UINT8_TYPE__ def_tset(U1);
typedef __INT8_TYPE__ def_tset(S1); typedef __INT16_TYPE__ def_tset(S2); typedef __INT32_TYPE__ def_tset(S4); typedef __UINT16_TYPE__ def_tset(U2);
typedef unsigned char def_tset(B1); typedef __UINT16_TYPE__ def_tset(B2); typedef __UINT32_TYPE__ def_tset(B4); typedef __UINT32_TYPE__ def_tset(U4);
typedef __INT8_TYPE__ def_tset(S1);
typedef __INT16_TYPE__ def_tset(S2);
typedef __INT32_TYPE__ def_tset(S4);
typedef unsigned char def_tset(B1);
typedef __UINT16_TYPE__ def_tset(B2);
typedef __UINT32_TYPE__ def_tset(B4);
typedef __UINT64_TYPE__ def_tset(B8);
enum { false = 0, true = 1, true_overflow, }; enum { false = 0, true = 1, true_overflow, };
#define u1_r(value) cast(U1_R, value) #define u1_r(value) cast(U1_R, value)
@@ -75,6 +82,8 @@ enum { false = 0, true = 1, true_overflow, };
#define r_(ptr) cast(typeof_ptr(ptr)*R_, ptr) #define r_(ptr) cast(typeof_ptr(ptr)*R_, ptr)
#define v_(ptr) cast(typeof_ptr(ptr)*V_, ptr) #define v_(ptr) cast(typeof_ptr(ptr)*V_, ptr)
#define tr_(type, ptr) cast(type*R_, ptr)
#define tv_(type, ptr) cast(type*V_, ptr)
#define kilo(n) (cast(U4, n) << 10) #define kilo(n) (cast(U4, n) << 10)
#define mega(n) (cast(U4, n) << 20) #define mega(n) (cast(U4, n) << 20)
@@ -87,7 +96,7 @@ enum { false = 0, true = 1, true_overflow, };
#define sop_2(op, a, b) cast(U2, s2_(a) op s2_(b)) #define sop_2(op, a, b) cast(U2, s2_(a) op s2_(b))
#define sop_4(op, a, b) cast(U4, s4_(a) op s4_(b)) #define sop_4(op, a, b) cast(U4, s4_(a) op s4_(b))
#define def_signed_op(id, op, width) finline U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); } #define def_signed_op(id, op, width) FI_ U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); }
#define def_signed_ops(id, op) def_signed_op(id, op, 1) def_signed_op(id, op, 2) def_signed_op(id, op, 4) #define def_signed_ops(id, op) def_signed_op(id, op, 1) def_signed_op(id, op, 2) def_signed_op(id, op, 4)
def_signed_ops(add, +) def_signed_ops(sub, -) def_signed_ops(add, +) def_signed_ops(sub, -)
def_signed_ops(mut, *) def_signed_ops(div, /) def_signed_ops(mut, *) def_signed_ops(div, /)
@@ -103,13 +112,13 @@ def_signed_ops(ge, >=) def_signed_ops(le, <=)
#define ge_s(a,b) def_generic_sop(ge, a,b) #define ge_s(a,b) def_generic_sop(ge, a,b)
#define le_s(a,b) def_generic_sop(le, a,b) #define le_s(a,b) def_generic_sop(le, a,b)
#define span_iter(type, iter, m_begin, op, m_end) \ #define span_iter(type, iter, m_begin, op, m_end) ( \
tmpl(Iter_Span,type) iter = { \ tmpl(Iter_Span,type) iter = { \
.r = {(m_begin), (m_end)}, \ .r = {(m_begin), (m_end)}, \
.cursor = (m_begin) }; \ .cursor = (m_begin) }; \
iter.cursor op iter.r.end; \ iter.cursor op iter.r.end; \
++ iter.cursor ++ iter.cursor \
)
#define def_span(type) \ #define def_span(type) \
def_struct(tmpl( Span,type)) { type begin; type end; }; \ def_struct(tmpl( Span,type)) { type begin; type end; }; \
typedef def_struct(tmpl(Iter_Span,type)) { tmpl(Span,type) r; type cursor; } typedef def_struct(tmpl(Iter_Span,type)) { tmpl(Span,type) r; type cursor; }
@@ -127,7 +136,7 @@ typedef def_struct(Slice_Str8) { Str8* ptr; U4 len; };
#define def_Slice(type) def_struct(tmpl(Slice,type)) { type* ptr; U4 len; } #define def_Slice(type) def_struct(tmpl(Slice,type)) { type* ptr; U4 len; }
#define slice_assert(slice) do { assert((slice).ptr != nullptr); assert((slice).len > 0); } while(0) #define slice_assert(slice) do { assert((slice).ptr != nullptr); assert((slice).len > 0); } while(0)
#define slice_end(slice) ((slice).ptr + (slice).len) #define slice_end(slice) ((slice).ptr + (slice).len)
#define size_of_slice_type(slice) size_of( * (slice).ptr ) #define size_of_slice_type(slice) size_of((slice).ptr[0])
typedef def_Slice(void); typedef def_Slice(void);
typedef def_Slice(B1); typedef def_Slice(B1);
@@ -142,10 +151,11 @@ void slice__zero(Slice_B1 mem, U4 typewidth);
} while (0) } while (0)
#define slice_zero(slice) slice__zero(slice_byte(slice), size_of_slice_type(slice)) #define slice_zero(slice) slice__zero(slice_byte(slice), size_of_slice_type(slice))
#define slice_iter(container, iter) \ #define slice_iter(container, iter) ( \
typeof((container).ptr) iter = (container).ptr; \ typeof((container).ptr) iter = (container).ptr; \
iter != slice_end(container); \ iter != slice_end(container); \
++ iter ++ iter \
)
#define slice_from_farray(type, ...) & (tmpl(Slice,type)) { \ #define slice_from_farray(type, ...) & (tmpl(Slice,type)) { \
.ptr = farray_init(type, __VA_ARGS__), \ .ptr = farray_init(type, __VA_ARGS__), \
.len = farray_len( farray_init(type, __VA_ARGS__)) \ .len = farray_len( farray_init(type, __VA_ARGS__)) \
+6
View File
@@ -16,10 +16,14 @@ typedef S2 A3A3_S2[3][3];
typedef def_struct(Extent2_S2) { S2 width; S2 height; }; typedef def_struct(Extent2_S2) { S2 width; S2 height; };
typedef def_struct(Extent2_S4) { S4 width; S4 height; }; typedef def_struct(Extent2_S4) { S4 width; S4 height; };
typedef def_struct(V2_S2) { S2 x; S2 y; }; typedef def_struct(V2_S2) { S2 x; S2 y; };
typedef def_struct(V2_S4) { S4 x; S4 y; }; typedef def_struct(V2_S4) { S4 x; S4 y; };
typedef def_struct(V3_S2) { S2 x; S2 y; S2 z; S2 pad; }; typedef def_struct(V3_S2) { S2 x; S2 y; S2 z; S2 pad; };
typedef def_struct(V3_S4) { S4 x; S4 y; S4 z; S4 pad; }; typedef def_struct(V3_S4) { S4 x; S4 y; S4 z; S4 pad; };
typedef def_struct(V4_S2) { S2 x; S2 y; S2 z; S2 w; };
typedef def_struct(V4_S4) { S4 x; S4 y; S4 z; S4 w; };
typedef def_struct(R2_S2) { V2_S2 p0; V2_S2 p1; }; typedef def_struct(R2_S2) { V2_S2 p0; V2_S2 p1; };
typedef def_struct(R2_S4) { V2_S4 p0; V2_S4 p1; }; typedef def_struct(R2_S4) { V2_S4 p0; V2_S4 p1; };
@@ -31,3 +35,5 @@ typedef def_struct(M3_S2) { A3A3_S2 m; A3_S4 t; };
#define v2s2(x,y) (V2_S2){x,y} #define v2s2(x,y) (V2_S2){x,y}
#define v3s2(x,y,z) (V3_S2){x,y,z} #define v3s2(x,y,z) (V3_S2){x,y,z}
#define v3s4(x,y,z) (V3_S4){x,y,z} #define v3s4(x,y,z) (V3_S4){x,y,z}
#define v4s2(x,y,z,w) (V4_S2){x,y,z,w}
#define v4s4(x,y,z,w) (V4_S4){x,y,z,w}
+2 -4
View File
@@ -94,8 +94,7 @@ struct AllocatorProc_Out {
U4 left; // Contiguous memory left U4 left; // Contiguous memory left
U4 max_alloc; U4 max_alloc;
U4 min_alloc; U4 min_alloc;
B4 continuity_break; // Whether this allocation broke continuity with the previous (address space wise) // byte_pad(8);
byte_pad(4);
}; };
typedef def_struct(AllocatorInfo) { typedef def_struct(AllocatorInfo) {
AllocatorProc* proc; AllocatorProc* proc;
@@ -108,8 +107,7 @@ typedef def_struct(AllocatorQueryInfo) {
U4 left; // Contiguous memory left U4 left; // Contiguous memory left
U4 max_alloc; U4 max_alloc;
U4 min_alloc; U4 min_alloc;
B4 continuity_break; // Whether this allocation broke continuity with the previous (address space wise) // byte_pad(4);
byte_pad(4);
}; };
static_assert(size_of(AllocatorProc_Out) == size_of(AllocatorQueryInfo)); static_assert(size_of(AllocatorProc_Out) == size_of(AllocatorQueryInfo));
+60 -7
View File
@@ -11,6 +11,8 @@
#include "duffle/gp.h" #include "duffle/gp.h"
#include "hello_gpu.h" #include "hello_gpu.h"
#define GTE_Coprocessor_Chapter 1
typedef def_farray(V2_S2, 3); typedef def_farray(V2_S2, 3);
typedef def_struct(Poly_F3) { typedef def_struct(Poly_F3) {
U4 tag; U4 tag;
@@ -73,12 +75,22 @@ typedef def_struct(PrimitiveArena) {
U4 used; U4 used;
}; };
#define Cube_num_verts 8 #define GTE_Coprocessor_UseQuads 1
#define Cube_num_faces 12 #define GTE_Coprocessor_UseTris 0
typedef def_farray(V3_S2, Cube_num_verts);
typedef def_farray(V3_S2, Cube_num_faces);
void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) { #define Cube_num_verts 8
typedef def_farray(V3_S2, Cube_num_verts);
#if GTE_Coprocessor_UseTris
#define Cube_num_faces 12
typedef def_farray(V3_S2, Cube_num_faces)
typedef A12_V3_S2 ACubeFaces;
#endif
#if GTE_Coprocessor_UseQuads
#define Cube_num_faces 6
typedef def_farray(V4_S2, Cube_num_faces);
typedef A6_V4_S2 ACubeFaces;
#endif
void cube128_init(A8_V3_S2* verts, ACubeFaces* faces) {
memory_copy(verts, & (A8_V3_S2) { memory_copy(verts, & (A8_V3_S2) {
{ -128, -128, -128 }, { -128, -128, -128 },
{ 128, -128, -128 }, { 128, -128, -128 },
@@ -91,6 +103,7 @@ void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) {
}, },
size_of(A8_V3_S2) size_of(A8_V3_S2)
); );
#if GTE_Coprocessor_UseTris
memory_copy(faces, & (A12_V3_S2) { memory_copy(faces, & (A12_V3_S2) {
{ 0, 3, 2 }, // top { 0, 3, 2 }, // top
{ 0, 2, 1 }, // top { 0, 2, 1 }, // top
@@ -107,6 +120,19 @@ void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) {
}, },
size_of(A12_V3_S2) size_of(A12_V3_S2)
); );
#endif
#if GTE_Coprocessor_UseQuads
memory_copy(faces, & (A6_V4_S2) {
{ 3, 2, 0, 1 },
{ 0, 1, 4, 5 },
{ 4, 5, 7, 6 },
{ 1, 2, 5, 6 },
{ 2, 3, 6, 7 },
{ 3, 0, 7, 4 },
},
sizeof(A6_V4_S2)
);
#endif
return; return;
} }
@@ -123,7 +149,7 @@ typedef def_struct(SMemory) {
M3_S2 tform_world; M3_S2 tform_world;
A8_V3_S2 cube_verts; A8_V3_S2 cube_verts;
A12_V3_S2 cube_faces; ACubeFaces cube_faces;
}; };
global SMemory static_mem; global SMemory static_mem;
extern SMemory static_mem; extern SMemory static_mem;
@@ -195,7 +221,7 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
gte_matrix_set_rotation (& static_mem.tform_world); gte_matrix_set_rotation (& static_mem.tform_world);
gte_matrix_set_translation(& static_mem.tform_world); gte_matrix_set_translation(& static_mem.tform_world);
#if 1 #if GTE_Coprocessor_Chapter
S4 nclip = 0; S4 nclip = 0;
S4 orderingtbl_z = 0; S4 orderingtbl_z = 0;
A2_S2 p; //??? A2_S2 p; //???
@@ -203,6 +229,7 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
for (U4 face_id = 0; face_id < Cube_num_faces; face_id += 1) for (U4 face_id = 0; face_id < Cube_num_faces; face_id += 1)
{ {
#if GTE_Coprocessor_UseTris
Poly_G3* tri = prim_alloc(Poly_G3); set_poly_g3(tri); Poly_G3* tri = prim_alloc(Poly_G3); set_poly_g3(tri);
tri->c0 = rgb8(255, 0, 255); tri->c0 = rgb8(255, 0, 255);
tri->c1 = rgb8(255, 255, 0); tri->c1 = rgb8(255, 255, 0);
@@ -231,8 +258,34 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
if ((orderingtbl_z > 0) && (orderingtbl_z < OrderingTbl_Len)) { if ((orderingtbl_z > 0) && (orderingtbl_z < OrderingTbl_Len)) {
orderingtbl_add_primitive(ordering_buf[orderingtbl_z], tri); orderingtbl_add_primitive(ordering_buf[orderingtbl_z], tri);
} }
#endif
#if GTE_Coprocessor_UseQuads
Poly_G4* quad = prim_alloc(Poly_G4); set_poly_g4(quad);
quad->c0 = rgb8(255, 0, 255);
quad->c1 = rgb8(255, 255, 0);
quad->c2 = rgb8( 0, 255, 255);
quad->c3 = rgb8( 0, 255, 0);
V4_S2* face = & static_mem.cube_faces[face_id];
V3_S2* p0 = & static_mem.cube_verts[face->x];
V3_S2* p1 = & static_mem.cube_verts[face->y];
V3_S2* p2 = & static_mem.cube_verts[face->z];
V3_S2* p3 = & static_mem.cube_verts[face->w];
nclip = rtp_avg_nclip_a4_v3s2(
p0, p1, p2, p3,
& quad->p0, & quad->p1, & quad->p2, & quad->p3,
& p, & orderingtbl_z, & flag
);
if (nclip <= 0) {
continue;
} }
if ((orderingtbl_z > 0) && (orderingtbl_z < OrderingTbl_Len)) {
orderingtbl_add_primitive(ordering_buf[orderingtbl_z], quad);
}
#endif
}
static_mem.rotation.x += 6; static_mem.rotation.x += 6;
static_mem.rotation.y += 8; static_mem.rotation.y += 8;
static_mem.rotation.z += 12; static_mem.rotation.z += 12;
+15 -2
View File
@@ -117,10 +117,10 @@ M3_S2* m3s2_scale (M3_S2* mat, V3_S4* vec) __asm__("ScaleMatrix");
// Rotation, Translation, Perspective // Rotation, Translation, Perspective
S4 rtp_v3s2_raw(V3_S2* vec, S4* xy, S4* pp, S4* flag) __asm__("RotTransPers"); S4 rtp_v3s2_raw(V3_S2* vec, S4* xy, S4* pp, S4* flag) __asm__("RotTransPers");
finline S4 rtp_v3s2(V3_S2* vec, V2_S2* xy, A2_S2* pp, S4* flag) { return rtp_v3s2_raw(vec, cast(S4*R_, & xy->x), cast(S4*R_, pp), r_(flag)); } FI_ S4 rtp_v3s2(V3_S2* vec, V2_S2* xy, A2_S2* pp, S4* flag) { return rtp_v3s2_raw(vec, cast(S4*R_, & xy->x), cast(S4*R_, pp), r_(flag)); }
S4 rtp_avg_nclip_a3_v3s2_raw(V3_S2* v0, V3_S2* v1, V3_S2* v2, S4* xy1, S4* xy2, S4* xy3, S4* pp, S4* otz, S4* flag) __asm__("RotAverageNclip3"); S4 rtp_avg_nclip_a3_v3s2_raw(V3_S2* v0, V3_S2* v1, V3_S2* v2, S4* xy1, S4* xy2, S4* xy3, S4* pp, S4* otz, S4* flag) __asm__("RotAverageNclip3");
finline S4 rtp_avg_nclip_a3_v3s2( FI_ S4 rtp_avg_nclip_a3_v3s2(
V3_S2* v0, V3_S2* v1, V3_S2* v2, V3_S2* v0, V3_S2* v1, V3_S2* v2,
V2_S2* xy0, V2_S2* xy1, V2_S2* xy2, V2_S2* xy0, V2_S2* xy1, V2_S2* xy2,
A2_S2* pp, S4* otz, S4* flag A2_S2* pp, S4* otz, S4* flag
@@ -132,6 +132,19 @@ finline S4 rtp_avg_nclip_a3_v3s2(
); );
} }
S4 rtp_avg_nclip_a4_v3s2_raw(V3_S2* v0, V3_S2* v1, V3_S2* v2, V3_S2* v3, S4* xy1, S4* xy2, S4* xy3, S4* xy4, S4* pp, S4* otz, S4* flag) __asm__("RotAverageNclip4");
FI_ S4 rtp_avg_nclip_a4_v3s2(
V3_S2* v0, V3_S2* v1, V3_S2* v2, V3_S2* v3,
V2_S2* xy0, V2_S2* xy1, V2_S2* xy2, V2_S2* xy3,
A2_S2* pp, S4* otz, S4* flag
){
return rtp_avg_nclip_a4_v3s2_raw(
v0, v1, v2, v3,
cast(S4*R_, xy0), cast(S4*R_, xy1), cast(S4*R_, xy2), cast(S4*R_, xy3),
cast(S4*R_, pp), cast(S4*R_, otz), cast(S4*R_, flag)
);
}
void gte_matrix_set_rotation (M3_S2* mat) __asm__("SetRotMatrix"); void gte_matrix_set_rotation (M3_S2* mat) __asm__("SetRotMatrix");
void gte_matrix_set_translation(M3_S2* mat) __asm__("SetTransMatrix"); void gte_matrix_set_translation(M3_S2* mat) __asm__("SetTransMatrix");
+7 -3
View File
@@ -30,8 +30,10 @@ $f_wall = "-Wall"
$f_wno_attributes = "-Wno-attributes" $f_wno_attributes = "-Wno-attributes"
# Optimization Flags # Optimization Flags
$f_optimize_none = "-O0" # For Debug builds $f_optimize_none = "-O0"
$f_optimize_size = "-Os" # For Release builds $f_optimize_size = "-Os"
$f_optimize_intrinsics = "-Oi"
$f_optimize_debug = "-Og"
$f_omit_frame_ptr = "-fomit-frame-pointer" $f_omit_frame_ptr = "-fomit-frame-pointer"
# Environment & Standard Library Flags # Environment & Standard Library Flags
@@ -289,8 +291,10 @@ function build-graphis_hello {
$compile_args = @() $compile_args = @()
$compile_args += $f_debug $compile_args += $f_debug
$compile_args += $f_optimize_none # $compile_args += $f_optimize_none
# $compile_args += $f_optimize_intrinsics
# $compile_args += $f_optimize_size # $compile_args += $f_optimize_size
$compile_args += $f_optimize_debug
$compile_args += ($f_include + $path_code) $compile_args += ($f_include + $path_code)
compile-unit $src_c $module_c $includes $compile_args compile-unit $src_c $module_c $includes $compile_args