artifact cache waitable cache access; debugging / fixes; start plugging in file stream to artifact cache

This commit is contained in:
Ryan Fleury
2025-09-24 11:25:18 -07:00
parent 7e05a60ffe
commit e7368af35c
11 changed files with 260 additions and 179 deletions
+18 -4
View File
@@ -3,11 +3,11 @@
global U64 global_update_tick_idx = 0;
global CondVar async_tick_start_cond_var = {0};
global CondVar async_tick_stop_cond_var = {0};
global Mutex async_tick_start_mutex = {0};
global Mutex async_tick_stop_mutex = {0};
global B32 async_loop_again = 0;
global B32 global_async_exit = 0;
thread_static B32 is_async_thread = 0;
internal void
main_thread_base_entry_point(int arguments_count, char **arguments)
@@ -18,7 +18,6 @@ main_thread_base_entry_point(int arguments_count, char **arguments)
//- rjf: set up async thread group info
async_tick_start_cond_var = cond_var_alloc();
async_tick_start_mutex = mutex_alloc();
async_tick_stop_cond_var = cond_var_alloc();
async_tick_stop_mutex = mutex_alloc();
//- rjf: set up telemetry
@@ -186,9 +185,11 @@ async_thread_entry_point(void *params)
{
LaneCtx lctx = *(LaneCtx *)params;
lane_ctx(lctx);
is_async_thread = 1;
ThreadNameF("async_thread_%I64u", lane_idx());
for(;!ins_atomic_u32_eval(&global_async_exit);)
for(;;)
{
// rjf: wait for signal if we need, otherwise reset loop signal & continue
if(!ins_atomic_u32_eval(&async_loop_again))
{
MutexScope(async_tick_start_mutex) cond_var_wait(async_tick_start_cond_var, async_tick_start_mutex, os_now_microseconds()+100000);
@@ -197,6 +198,7 @@ async_thread_entry_point(void *params)
{
async_loop_again = 0;
}
#if defined(ARTIFACT_CACHE_H)
ac_async_tick();
#endif
@@ -209,6 +211,18 @@ async_thread_entry_point(void *params)
#if defined(TEXTURE_CACHE_H)
tex_async_tick();
#endif
cond_var_broadcast(async_tick_stop_cond_var);
// rjf: take exit signal; break if set
lane_sync();
B32 need_exit = 0;
if(lane_idx() == 0)
{
need_exit = ins_atomic_u32_eval(&global_async_exit);
}
lane_sync_u64(&need_exit, 0);
if(need_exit)
{
break;
}
}
}
+13
View File
@@ -90,16 +90,29 @@ tctx_lane_barrier_wait(void *broadcast_ptr, U64 broadcast_size, U64 broadcast_sr
ProfBeginFunction();
ProfColor(0x00000ff);
TCTX *tctx = tctx_selected();
// rjf: doing broadcast -> copy to broadcast memory on source lane
U64 broadcast_size_clamped = ClampTop(broadcast_size, sizeof(tctx->lane_ctx.broadcast_memory[0]));
if(broadcast_ptr != 0 && lane_idx() == broadcast_src_lane_idx)
{
MemoryCopy(tctx->lane_ctx.broadcast_memory, broadcast_ptr, broadcast_size_clamped);
}
// rjf: all cases: barrier
os_barrier_wait(tctx->lane_ctx.barrier);
// rjf: doing broadcast -> copy from broadcast memory on destination lanes
if(broadcast_ptr != 0 && lane_idx() != broadcast_src_lane_idx)
{
MemoryCopy(broadcast_ptr, tctx->lane_ctx.broadcast_memory, broadcast_size_clamped);
}
// rjf: doing broadcast -> barrier on all lanes
if(broadcast_ptr != 0)
{
os_barrier_wait(tctx->lane_ctx.barrier);
}
ProfEnd();
}
+1 -1
View File
@@ -94,7 +94,7 @@ internal void tctx_lane_barrier_wait(void *broadcast_ptr, U64 broadcast_size, U6
#define lane_from_task_idx(idx) ((idx)%lane_count())
#define lane_ctx(ctx) tctx_set_lane_ctx((ctx))
#define lane_sync() tctx_lane_barrier_wait(0, 0, 0)
#define lane_sync_u64(ptr, src_lane_idx) tctx_lane_barrier_wait((ptr), sizeof(U64), (src_lane_idx))
#define lane_sync_u64(ptr, src_lane_idx) tctx_lane_barrier_wait((ptr), sizeof(*(ptr)), (src_lane_idx))
#define lane_range(count) m_range_from_n_idx_m_count(lane_idx(), lane_count(), (count))
//- rjf: thread names