p2r2: reslice per-lane work; bucket subsets of units by record count, rather than assigning units to lanes

This commit is contained in:
Ryan Fleury
2025-08-18 15:24:25 -07:00
parent 55f21018df
commit 0fa45fe71b
4 changed files with 372 additions and 266 deletions
+1 -1
View File
@@ -43,7 +43,7 @@
# define ProfEndLockWait(...) tmEndWaitForLock(0)
# define ProfLockTake(...) tmAcquiredLock(0, 0, __VA_ARGS__)
# define ProfLockDrop(...) tmReleasedLock(0, __VA_ARGS__)
# define ProfColor(color) tmZoneColorSticky(color)
# define ProfColor(color) tmZoneColor((((color) & 0xff000000) >> 24) / 255.f, (((color) & 0x00ff0000) >> 16) / 255.f, (((color) & 0x0000ff00) >> 8) / 255.f)
# define ProfBeginV(...) \
if (TM_API_PTR) { \
static tm_uint64 file_id = 0; TM_API_PTR->_tmStaticString(&file_id, __FILE__); \
+3
View File
@@ -85,8 +85,11 @@ tctx_set_lane_ctx(LaneCtx lane_ctx)
internal void
tctx_lane_barrier_wait(void)
{
ProfBeginFunction();
ProfColor(0xff0000ff);
TCTX *tctx = tctx_selected();
os_barrier_wait(tctx->lane_ctx.barrier);
ProfEnd();
}
internal Rng1U64