mirror of
https://github.com/Ed94/pikuma_ps1.git
synced 2026-06-01 18:41:13 -07:00
Finish chapter: The GTE Coprocessor.
This commit is contained in:
+2
-2
@@ -1,8 +1,8 @@
|
||||
build
|
||||
toolchain/armips
|
||||
toolchain/pcsx-redux
|
||||
toolchain/psyq_iwyu
|
||||
toolchain/PSn00bSDK
|
||||
# toolchain/psyq_iwyu
|
||||
# toolchain/PSn00bSDK
|
||||
|
||||
*.exe
|
||||
*.elf
|
||||
|
||||
Vendored
+1
-1
@@ -14,7 +14,7 @@
|
||||
"INTELLISENSE_DIRECTIVES"
|
||||
],
|
||||
"intelliSenseMode": "gcc-x86",
|
||||
"compilerPath": "C:\\Users\\Ed\\AppData\\Roaming\\mips\\versions\\v14.2.0\\bin\\mipsel-none-elf-gcc.exe",
|
||||
"compilerPath": "C:/Users/Ed/scoop/apps/gcc/current/bin/gcc.exe"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
|
||||
+28
-18
@@ -3,10 +3,15 @@
|
||||
# include "assert.h"
|
||||
#endif
|
||||
|
||||
#define LP_ static // local_persist
|
||||
#define internal static // internal
|
||||
#define global
|
||||
#define gknown
|
||||
|
||||
#define align_(value) __attribute__((aligned (value))) // for easy alignment
|
||||
#define expect_(x, y) __builtin_expect(x, y) // so compiler knows the common path
|
||||
#define finline static inline __attribute__((always_inline)) // force inline
|
||||
#define no_inline static __attribute__((noinline)) // force no inline [used in thread api]
|
||||
#define FI_ static inline __attribute__((always_inline)) // force inline
|
||||
#define NI_ static __attribute__((noinline)) // force no inline [used in thread api]
|
||||
#define R_ __restrict // pointers are either restricted or volatile and nothing else
|
||||
#define V_ volatile // pointers are either restricted or volatile and nothing else
|
||||
|
||||
@@ -16,12 +21,7 @@
|
||||
#define stringify(S) stringify_impl(S)
|
||||
#define tmpl(prefix, type) prefix ## _ ## type
|
||||
|
||||
#define local_persist static
|
||||
#define internal static
|
||||
#define global
|
||||
#define gknown
|
||||
|
||||
#define offset_of(type, member) cast(SSIZE, & (((type*) 0)->member))
|
||||
#define offset_of(type, member) cast(U8,__builtin_offsetof(type,member))
|
||||
#define static_assert _Static_assert
|
||||
#define typeof __typeof__
|
||||
#define typeof_ptr(ptr) typeof((ptr)[0])
|
||||
@@ -32,9 +32,16 @@
|
||||
#define def_ptr_set(type) def_R_(type); typedef def_V_(type)
|
||||
#define def_tset(type) type; typedef def_ptr_set(type)
|
||||
|
||||
typedef __UINT8_TYPE__ def_tset(U1); typedef __UINT16_TYPE__ def_tset(U2); typedef __UINT32_TYPE__ def_tset(U4);
|
||||
typedef __INT8_TYPE__ def_tset(S1); typedef __INT16_TYPE__ def_tset(S2); typedef __INT32_TYPE__ def_tset(S4);
|
||||
typedef unsigned char def_tset(B1); typedef __UINT16_TYPE__ def_tset(B2); typedef __UINT32_TYPE__ def_tset(B4);
|
||||
typedef __UINT8_TYPE__ def_tset(U1);
|
||||
typedef __UINT16_TYPE__ def_tset(U2);
|
||||
typedef __UINT32_TYPE__ def_tset(U4);
|
||||
typedef __INT8_TYPE__ def_tset(S1);
|
||||
typedef __INT16_TYPE__ def_tset(S2);
|
||||
typedef __INT32_TYPE__ def_tset(S4);
|
||||
typedef unsigned char def_tset(B1);
|
||||
typedef __UINT16_TYPE__ def_tset(B2);
|
||||
typedef __UINT32_TYPE__ def_tset(B4);
|
||||
typedef __UINT64_TYPE__ def_tset(B8);
|
||||
enum { false = 0, true = 1, true_overflow, };
|
||||
|
||||
#define u1_r(value) cast(U1_R, value)
|
||||
@@ -75,6 +82,8 @@ enum { false = 0, true = 1, true_overflow, };
|
||||
|
||||
#define r_(ptr) cast(typeof_ptr(ptr)*R_, ptr)
|
||||
#define v_(ptr) cast(typeof_ptr(ptr)*V_, ptr)
|
||||
#define tr_(type, ptr) cast(type*R_, ptr)
|
||||
#define tv_(type, ptr) cast(type*V_, ptr)
|
||||
|
||||
#define kilo(n) (cast(U4, n) << 10)
|
||||
#define mega(n) (cast(U4, n) << 20)
|
||||
@@ -87,7 +96,7 @@ enum { false = 0, true = 1, true_overflow, };
|
||||
#define sop_2(op, a, b) cast(U2, s2_(a) op s2_(b))
|
||||
#define sop_4(op, a, b) cast(U4, s4_(a) op s4_(b))
|
||||
|
||||
#define def_signed_op(id, op, width) finline U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); }
|
||||
#define def_signed_op(id, op, width) FI_ U ## width id ## _s ## width(U ## width a, U ## width b) {return sop_ ## width(op, a, b); }
|
||||
#define def_signed_ops(id, op) def_signed_op(id, op, 1) def_signed_op(id, op, 2) def_signed_op(id, op, 4)
|
||||
def_signed_ops(add, +) def_signed_ops(sub, -)
|
||||
def_signed_ops(mut, *) def_signed_ops(div, /)
|
||||
@@ -103,13 +112,13 @@ def_signed_ops(ge, >=) def_signed_ops(le, <=)
|
||||
#define ge_s(a,b) def_generic_sop(ge, a,b)
|
||||
#define le_s(a,b) def_generic_sop(le, a,b)
|
||||
|
||||
#define span_iter(type, iter, m_begin, op, m_end) \
|
||||
#define span_iter(type, iter, m_begin, op, m_end) ( \
|
||||
tmpl(Iter_Span,type) iter = { \
|
||||
.r = {(m_begin), (m_end)}, \
|
||||
.cursor = (m_begin) }; \
|
||||
iter.cursor op iter.r.end; \
|
||||
++ iter.cursor
|
||||
|
||||
++ iter.cursor \
|
||||
)
|
||||
#define def_span(type) \
|
||||
def_struct(tmpl( Span,type)) { type begin; type end; }; \
|
||||
typedef def_struct(tmpl(Iter_Span,type)) { tmpl(Span,type) r; type cursor; }
|
||||
@@ -127,7 +136,7 @@ typedef def_struct(Slice_Str8) { Str8* ptr; U4 len; };
|
||||
#define def_Slice(type) def_struct(tmpl(Slice,type)) { type* ptr; U4 len; }
|
||||
#define slice_assert(slice) do { assert((slice).ptr != nullptr); assert((slice).len > 0); } while(0)
|
||||
#define slice_end(slice) ((slice).ptr + (slice).len)
|
||||
#define size_of_slice_type(slice) size_of( * (slice).ptr )
|
||||
#define size_of_slice_type(slice) size_of((slice).ptr[0])
|
||||
|
||||
typedef def_Slice(void);
|
||||
typedef def_Slice(B1);
|
||||
@@ -142,10 +151,11 @@ void slice__zero(Slice_B1 mem, U4 typewidth);
|
||||
} while (0)
|
||||
#define slice_zero(slice) slice__zero(slice_byte(slice), size_of_slice_type(slice))
|
||||
|
||||
#define slice_iter(container, iter) \
|
||||
#define slice_iter(container, iter) ( \
|
||||
typeof((container).ptr) iter = (container).ptr; \
|
||||
iter != slice_end(container); \
|
||||
++ iter
|
||||
++ iter \
|
||||
)
|
||||
#define slice_from_farray(type, ...) & (tmpl(Slice,type)) { \
|
||||
.ptr = farray_init(type, __VA_ARGS__), \
|
||||
.len = farray_len( farray_init(type, __VA_ARGS__)) \
|
||||
|
||||
@@ -16,10 +16,14 @@ typedef S2 A3A3_S2[3][3];
|
||||
|
||||
typedef def_struct(Extent2_S2) { S2 width; S2 height; };
|
||||
typedef def_struct(Extent2_S4) { S4 width; S4 height; };
|
||||
|
||||
typedef def_struct(V2_S2) { S2 x; S2 y; };
|
||||
typedef def_struct(V2_S4) { S4 x; S4 y; };
|
||||
typedef def_struct(V3_S2) { S2 x; S2 y; S2 z; S2 pad; };
|
||||
typedef def_struct(V3_S4) { S4 x; S4 y; S4 z; S4 pad; };
|
||||
typedef def_struct(V4_S2) { S2 x; S2 y; S2 z; S2 w; };
|
||||
typedef def_struct(V4_S4) { S4 x; S4 y; S4 z; S4 w; };
|
||||
|
||||
typedef def_struct(R2_S2) { V2_S2 p0; V2_S2 p1; };
|
||||
typedef def_struct(R2_S4) { V2_S4 p0; V2_S4 p1; };
|
||||
|
||||
@@ -31,3 +35,5 @@ typedef def_struct(M3_S2) { A3A3_S2 m; A3_S4 t; };
|
||||
#define v2s2(x,y) (V2_S2){x,y}
|
||||
#define v3s2(x,y,z) (V3_S2){x,y,z}
|
||||
#define v3s4(x,y,z) (V3_S4){x,y,z}
|
||||
#define v4s2(x,y,z,w) (V4_S2){x,y,z,w}
|
||||
#define v4s4(x,y,z,w) (V4_S4){x,y,z,w}
|
||||
|
||||
@@ -94,8 +94,7 @@ struct AllocatorProc_Out {
|
||||
U4 left; // Contiguous memory left
|
||||
U4 max_alloc;
|
||||
U4 min_alloc;
|
||||
B4 continuity_break; // Whether this allocation broke continuity with the previous (address space wise)
|
||||
byte_pad(4);
|
||||
// byte_pad(8);
|
||||
};
|
||||
typedef def_struct(AllocatorInfo) {
|
||||
AllocatorProc* proc;
|
||||
@@ -108,8 +107,7 @@ typedef def_struct(AllocatorQueryInfo) {
|
||||
U4 left; // Contiguous memory left
|
||||
U4 max_alloc;
|
||||
U4 min_alloc;
|
||||
B4 continuity_break; // Whether this allocation broke continuity with the previous (address space wise)
|
||||
byte_pad(4);
|
||||
// byte_pad(4);
|
||||
};
|
||||
static_assert(size_of(AllocatorProc_Out) == size_of(AllocatorQueryInfo));
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#include "duffle/gp.h"
|
||||
#include "hello_gpu.h"
|
||||
|
||||
#define GTE_Coprocessor_Chapter 1
|
||||
|
||||
typedef def_farray(V2_S2, 3);
|
||||
typedef def_struct(Poly_F3) {
|
||||
U4 tag;
|
||||
@@ -73,12 +75,22 @@ typedef def_struct(PrimitiveArena) {
|
||||
U4 used;
|
||||
};
|
||||
|
||||
#define Cube_num_verts 8
|
||||
#define Cube_num_faces 12
|
||||
typedef def_farray(V3_S2, Cube_num_verts);
|
||||
typedef def_farray(V3_S2, Cube_num_faces);
|
||||
#define GTE_Coprocessor_UseQuads 1
|
||||
#define GTE_Coprocessor_UseTris 0
|
||||
|
||||
void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) {
|
||||
#define Cube_num_verts 8
|
||||
typedef def_farray(V3_S2, Cube_num_verts);
|
||||
#if GTE_Coprocessor_UseTris
|
||||
#define Cube_num_faces 12
|
||||
typedef def_farray(V3_S2, Cube_num_faces)
|
||||
typedef A12_V3_S2 ACubeFaces;
|
||||
#endif
|
||||
#if GTE_Coprocessor_UseQuads
|
||||
#define Cube_num_faces 6
|
||||
typedef def_farray(V4_S2, Cube_num_faces);
|
||||
typedef A6_V4_S2 ACubeFaces;
|
||||
#endif
|
||||
void cube128_init(A8_V3_S2* verts, ACubeFaces* faces) {
|
||||
memory_copy(verts, & (A8_V3_S2) {
|
||||
{ -128, -128, -128 },
|
||||
{ 128, -128, -128 },
|
||||
@@ -91,6 +103,7 @@ void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) {
|
||||
},
|
||||
size_of(A8_V3_S2)
|
||||
);
|
||||
#if GTE_Coprocessor_UseTris
|
||||
memory_copy(faces, & (A12_V3_S2) {
|
||||
{ 0, 3, 2 }, // top
|
||||
{ 0, 2, 1 }, // top
|
||||
@@ -107,6 +120,19 @@ void cube128_init(A8_V3_S2* verts, A12_V3_S2* faces) {
|
||||
},
|
||||
size_of(A12_V3_S2)
|
||||
);
|
||||
#endif
|
||||
#if GTE_Coprocessor_UseQuads
|
||||
memory_copy(faces, & (A6_V4_S2) {
|
||||
{ 3, 2, 0, 1 },
|
||||
{ 0, 1, 4, 5 },
|
||||
{ 4, 5, 7, 6 },
|
||||
{ 1, 2, 5, 6 },
|
||||
{ 2, 3, 6, 7 },
|
||||
{ 3, 0, 7, 4 },
|
||||
},
|
||||
sizeof(A6_V4_S2)
|
||||
);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -123,7 +149,7 @@ typedef def_struct(SMemory) {
|
||||
M3_S2 tform_world;
|
||||
|
||||
A8_V3_S2 cube_verts;
|
||||
A12_V3_S2 cube_faces;
|
||||
ACubeFaces cube_faces;
|
||||
};
|
||||
global SMemory static_mem;
|
||||
extern SMemory static_mem;
|
||||
@@ -195,7 +221,7 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
gte_matrix_set_rotation (& static_mem.tform_world);
|
||||
gte_matrix_set_translation(& static_mem.tform_world);
|
||||
|
||||
#if 1
|
||||
#if GTE_Coprocessor_Chapter
|
||||
S4 nclip = 0;
|
||||
S4 orderingtbl_z = 0;
|
||||
A2_S2 p; //???
|
||||
@@ -203,6 +229,7 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
|
||||
for (U4 face_id = 0; face_id < Cube_num_faces; face_id += 1)
|
||||
{
|
||||
#if GTE_Coprocessor_UseTris
|
||||
Poly_G3* tri = prim_alloc(Poly_G3); set_poly_g3(tri);
|
||||
tri->c0 = rgb8(255, 0, 255);
|
||||
tri->c1 = rgb8(255, 255, 0);
|
||||
@@ -231,8 +258,34 @@ void update(PrimitiveArena* pa, U4* ordering_buf)
|
||||
if ((orderingtbl_z > 0) && (orderingtbl_z < OrderingTbl_Len)) {
|
||||
orderingtbl_add_primitive(ordering_buf[orderingtbl_z], tri);
|
||||
}
|
||||
#endif
|
||||
#if GTE_Coprocessor_UseQuads
|
||||
Poly_G4* quad = prim_alloc(Poly_G4); set_poly_g4(quad);
|
||||
quad->c0 = rgb8(255, 0, 255);
|
||||
quad->c1 = rgb8(255, 255, 0);
|
||||
quad->c2 = rgb8( 0, 255, 255);
|
||||
quad->c3 = rgb8( 0, 255, 0);
|
||||
|
||||
V4_S2* face = & static_mem.cube_faces[face_id];
|
||||
V3_S2* p0 = & static_mem.cube_verts[face->x];
|
||||
V3_S2* p1 = & static_mem.cube_verts[face->y];
|
||||
V3_S2* p2 = & static_mem.cube_verts[face->z];
|
||||
V3_S2* p3 = & static_mem.cube_verts[face->w];
|
||||
|
||||
nclip = rtp_avg_nclip_a4_v3s2(
|
||||
p0, p1, p2, p3,
|
||||
& quad->p0, & quad->p1, & quad->p2, & quad->p3,
|
||||
& p, & orderingtbl_z, & flag
|
||||
);
|
||||
if (nclip <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((orderingtbl_z > 0) && (orderingtbl_z < OrderingTbl_Len)) {
|
||||
orderingtbl_add_primitive(ordering_buf[orderingtbl_z], quad);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
static_mem.rotation.x += 6;
|
||||
static_mem.rotation.y += 8;
|
||||
static_mem.rotation.z += 12;
|
||||
|
||||
@@ -117,10 +117,10 @@ M3_S2* m3s2_scale (M3_S2* mat, V3_S4* vec) __asm__("ScaleMatrix");
|
||||
// Rotation, Translation, Perspective
|
||||
|
||||
S4 rtp_v3s2_raw(V3_S2* vec, S4* xy, S4* pp, S4* flag) __asm__("RotTransPers");
|
||||
finline S4 rtp_v3s2(V3_S2* vec, V2_S2* xy, A2_S2* pp, S4* flag) { return rtp_v3s2_raw(vec, cast(S4*R_, & xy->x), cast(S4*R_, pp), r_(flag)); }
|
||||
FI_ S4 rtp_v3s2(V3_S2* vec, V2_S2* xy, A2_S2* pp, S4* flag) { return rtp_v3s2_raw(vec, cast(S4*R_, & xy->x), cast(S4*R_, pp), r_(flag)); }
|
||||
|
||||
S4 rtp_avg_nclip_a3_v3s2_raw(V3_S2* v0, V3_S2* v1, V3_S2* v2, S4* xy1, S4* xy2, S4* xy3, S4* pp, S4* otz, S4* flag) __asm__("RotAverageNclip3");
|
||||
finline S4 rtp_avg_nclip_a3_v3s2(
|
||||
FI_ S4 rtp_avg_nclip_a3_v3s2(
|
||||
V3_S2* v0, V3_S2* v1, V3_S2* v2,
|
||||
V2_S2* xy0, V2_S2* xy1, V2_S2* xy2,
|
||||
A2_S2* pp, S4* otz, S4* flag
|
||||
@@ -132,6 +132,19 @@ finline S4 rtp_avg_nclip_a3_v3s2(
|
||||
);
|
||||
}
|
||||
|
||||
S4 rtp_avg_nclip_a4_v3s2_raw(V3_S2* v0, V3_S2* v1, V3_S2* v2, V3_S2* v3, S4* xy1, S4* xy2, S4* xy3, S4* xy4, S4* pp, S4* otz, S4* flag) __asm__("RotAverageNclip4");
|
||||
FI_ S4 rtp_avg_nclip_a4_v3s2(
|
||||
V3_S2* v0, V3_S2* v1, V3_S2* v2, V3_S2* v3,
|
||||
V2_S2* xy0, V2_S2* xy1, V2_S2* xy2, V2_S2* xy3,
|
||||
A2_S2* pp, S4* otz, S4* flag
|
||||
){
|
||||
return rtp_avg_nclip_a4_v3s2_raw(
|
||||
v0, v1, v2, v3,
|
||||
cast(S4*R_, xy0), cast(S4*R_, xy1), cast(S4*R_, xy2), cast(S4*R_, xy3),
|
||||
cast(S4*R_, pp), cast(S4*R_, otz), cast(S4*R_, flag)
|
||||
);
|
||||
}
|
||||
|
||||
void gte_matrix_set_rotation (M3_S2* mat) __asm__("SetRotMatrix");
|
||||
void gte_matrix_set_translation(M3_S2* mat) __asm__("SetTransMatrix");
|
||||
|
||||
|
||||
@@ -30,8 +30,10 @@ $f_wall = "-Wall"
|
||||
$f_wno_attributes = "-Wno-attributes"
|
||||
|
||||
# Optimization Flags
|
||||
$f_optimize_none = "-O0" # For Debug builds
|
||||
$f_optimize_size = "-Os" # For Release builds
|
||||
$f_optimize_none = "-O0"
|
||||
$f_optimize_size = "-Os"
|
||||
$f_optimize_intrinsics = "-Oi"
|
||||
$f_optimize_debug = "-Og"
|
||||
$f_omit_frame_ptr = "-fomit-frame-pointer"
|
||||
|
||||
# Environment & Standard Library Flags
|
||||
@@ -289,8 +291,10 @@ function build-graphis_hello {
|
||||
|
||||
$compile_args = @()
|
||||
$compile_args += $f_debug
|
||||
$compile_args += $f_optimize_none
|
||||
# $compile_args += $f_optimize_none
|
||||
# $compile_args += $f_optimize_intrinsics
|
||||
# $compile_args += $f_optimize_size
|
||||
$compile_args += $f_optimize_debug
|
||||
$compile_args += ($f_include + $path_code)
|
||||
compile-unit $src_c $module_c $includes $compile_args
|
||||
|
||||
|
||||
Reference in New Issue
Block a user