better artifact cache cancellation thread work organization - only do more frequent cancellation scans when in a tick, otherwise sleep

This commit is contained in:
Ryan Fleury
2025-10-23 14:39:44 -07:00
parent 721649b7ea
commit ae17084b44
4 changed files with 63 additions and 41 deletions
+46 -28
View File
@@ -19,6 +19,8 @@ ac_init(void)
ac_shared->req_batches[idx].arena = arena_alloc();
}
ac_shared->cancel_thread = thread_launch(ac_cancel_thread_entry_point, 0);
ac_shared->cancel_thread_mutex = mutex_alloc();
mutex_take(ac_shared->cancel_thread_mutex);
}
////////////////////////////////
@@ -211,6 +213,14 @@ ac_async_tick(void)
{
Temp scratch = scratch_begin(0, 0);
//////////////////////////////
//- rjf: enable cancellation scanning
//
if(lane_idx() == 0)
{
mutex_drop(ac_shared->cancel_thread_mutex);
}
//////////////////////////////
//- rjf: do eviction pass across all caches
//
@@ -575,6 +585,13 @@ ac_async_tick(void)
}
lane_sync();
//////////////////////////////
//- rjf: disable cancellation scanning
//
if(lane_idx() == 0)
{
mutex_take(ac_shared->cancel_thread_mutex);
}
scratch_end(scratch);
}
@@ -586,46 +603,47 @@ ac_cancel_thread_entry_point(void *p)
{
for(;;)
{
os_sleep_milliseconds(500);
//- rjf: scan in-flight nodes for expiration
for EachIndex(cache_slot_idx, ac_shared->cache_slots_count)
os_sleep_milliseconds(50);
MutexScope(ac_shared->cancel_thread_mutex)
{
Stripe *cache_stripe = stripe_from_slot_idx(&ac_shared->cache_stripes, cache_slot_idx);
RWMutexScope(cache_stripe->rw_mutex, 0)
for EachIndex(cache_slot_idx, ac_shared->cache_slots_count)
{
for EachNode(cache, AC_Cache, ac_shared->cache_slots[cache_slot_idx])
Stripe *cache_stripe = stripe_from_slot_idx(&ac_shared->cache_stripes, cache_slot_idx);
RWMutexScope(cache_stripe->rw_mutex, 0)
{
Rng1U64 slot_range = lane_range(cache->slots_count);
for EachInRange(slot_idx, slot_range)
for EachNode(cache, AC_Cache, ac_shared->cache_slots[cache_slot_idx])
{
AC_Slot *slot = &cache->slots[slot_idx];
Stripe *stripe = stripe_from_slot_idx(&cache->stripes, slot_idx);
for(B32 write_mode = 0; write_mode <= 1; write_mode += 1)
Rng1U64 slot_range = lane_range(cache->slots_count);
for EachInRange(slot_idx, slot_range)
{
B32 slot_has_work = 0;
RWMutexScope(stripe->rw_mutex, write_mode)
AC_Slot *slot = &cache->slots[slot_idx];
Stripe *stripe = stripe_from_slot_idx(&cache->stripes, slot_idx);
for(B32 write_mode = 0; write_mode <= 1; write_mode += 1)
{
for(AC_Node *n = slot->first, *next = 0; n != 0; n = next)
B32 slot_has_work = 0;
RWMutexScope(stripe->rw_mutex, write_mode)
{
next = n->next;
if(access_pt_is_expired(&n->access_pt, .time = n->evict_threshold_us) && ins_atomic_u64_eval(&n->working_count) > 0)
for(AC_Node *n = slot->first, *next = 0; n != 0; n = next)
{
slot_has_work = 1;
if(!write_mode)
next = n->next;
if(access_pt_is_expired(&n->access_pt, .time = n->evict_threshold_us) && ins_atomic_u64_eval(&n->working_count) > 0)
{
break;
}
else
{
n->cancelled = 1;
slot_has_work = 1;
if(!write_mode)
{
break;
}
else
{
n->cancelled = 1;
}
}
}
}
}
if(!slot_has_work)
{
break;
if(!slot_has_work)
{
break;
}
}
}
}
+1
View File
@@ -129,6 +129,7 @@ struct AC_Shared
// rjf: cancel thread
Thread cancel_thread;
Mutex cancel_thread_mutex;
};
////////////////////////////////
+3 -3
View File
@@ -1301,11 +1301,11 @@ di_search_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out, U64 *
{
if(lane_idx() == 0)
{
sort_records = push_array(scratch.arena, SortRecord, sort_records_count);
sort_records = push_array_no_zero(scratch.arena, SortRecord, sort_records_count);
}
if(lane_idx() == lane_from_task_idx(1))
{
sort_records__swap = push_array(scratch.arena, SortRecord, sort_records_count);
sort_records__swap = push_array_no_zero(scratch.arena, SortRecord, sort_records_count);
}
lane_sync_u64(&sort_records, 0);
lane_sync_u64(&sort_records__swap, lane_from_task_idx(1));
@@ -1429,7 +1429,7 @@ di_search_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out, U64 *
if(lane_idx() == 0)
{
items.count = all_items->total_count;
items.v = push_array(arena, DI_SearchItem, items.count);
items.v = push_array_no_zero(arena, DI_SearchItem, items.count);
}
lane_sync_u64(&items.count, 0);
lane_sync_u64(&items.v, 0);
+13 -10
View File
@@ -10308,16 +10308,6 @@ rd_frame(void)
}
#endif
//////////////////////////////
//- rjf: [windows] clear pages from working set shortly after startup, many of which will not be needed
//
#if OS_WINDOWS
if(rd_state->frame_index == 1) ProfScope("SetProcessWorkingSetSize")
{
SetProcessWorkingSetSize(GetCurrentProcess(), max_U64, max_U64);
}
#endif
//////////////////////////////
//- rjf: do per-frame resets
//
@@ -16563,6 +16553,19 @@ rd_frame(void)
U64 frame_time_us = end_time_us-begin_time_us;
rd_state->frame_time_us_history[rd_state->frame_index%ArrayCount(rd_state->frame_time_us_history)] = frame_time_us;
//////////////////////////////
//- rjf: [windows] clear pages from working set shortly after startup, many of which will not be needed
//
#if OS_WINDOWS
if(di_load_count() < 50)
{
if(rd_state->frame_index == 15) ProfScope("SetProcessWorkingSetSize")
{
SetProcessWorkingSetSize(GetCurrentProcess(), max_U64, max_U64);
}
}
#endif
//////////////////////////////
//- rjf: bump frame time counters
//