From 8fa01d3e300133fbf8018325d478d4784c8f66c7 Mon Sep 17 00:00:00 2001 From: Ryan Fleury Date: Fri, 19 Sep 2025 15:31:30 -0700 Subject: [PATCH] expand access scope touches to automatically record update clock idx touch time, + timestamp; move texture cache to new async wavefront --- src/base/base_entry_point.c | 3 + src/base/base_thread_context.c | 24 ++- src/base/base_thread_context.h | 15 +- src/content/content.c | 167 ++++++++-------- src/content/content.h | 26 ++- src/dasm_cache/dasm_cache.c | 9 +- src/dasm_cache/dasm_cache.h | 6 +- src/geo_cache/geo_cache.c | 12 +- src/geo_cache/geo_cache.h | 4 +- src/text_cache/text_cache.c | 16 +- src/text_cache/text_cache.h | 4 +- src/texture_cache/texture_cache.c | 310 +++++++++++++----------------- src/texture_cache/texture_cache.h | 48 ++--- 13 files changed, 314 insertions(+), 330 deletions(-) diff --git a/src/base/base_entry_point.c b/src/base/base_entry_point.c index 8368a725..b9cc7abe 100644 --- a/src/base/base_entry_point.c +++ b/src/base/base_entry_point.c @@ -206,6 +206,9 @@ async_thread_entry_point(void *params) #endif #if defined(DASM_CACHE_H) dasm_tick(); +#endif +#if defined(TEXTURE_CACHE_H) + tex_tick(); #endif cond_var_broadcast(async_tick_stop_cond_var); } diff --git a/src/base/base_thread_context.c b/src/base/base_thread_context.c index 465fcb37..c9e04f7f 100644 --- a/src/base/base_thread_context.c +++ b/src/base/base_thread_context.c @@ -160,7 +160,7 @@ access_close(Access *access) for(Touch *touch = access->top_touch, *next = 0; touch != 0; touch = next) { next = touch->next; - ins_atomic_u64_dec_eval(touch->touch_count); + ins_atomic_u64_dec_eval(&touch->pt->access_refcount); if(touch->cv.u64[0] != 0) { cond_var_broadcast(touch->cv); } SLLStackPush(tctx_thread_local->free_touch, touch); } @@ -168,9 +168,11 @@ access_close(Access *access) } internal void -access_touch(Access *access, U64 *touch_count, CondVar cv) +access_touch(Access *access, AccessPt *pt, CondVar cv) { - ins_atomic_u64_inc_eval(touch_count); + ins_atomic_u64_inc_eval(&pt->access_refcount); + ins_atomic_u64_eval_assign(&pt->last_time_touched_us, os_now_microseconds()); + ins_atomic_u64_eval_assign(&pt->last_update_idx_touched, update_tick_idx()); Touch *touch = tctx_thread_local->free_touch; if(touch != 0) { @@ -183,5 +185,19 @@ access_touch(Access *access, U64 *touch_count, CondVar cv) MemoryZeroStruct(touch); SLLStackPush(access->top_touch, touch); touch->cv = cv; - touch->touch_count = touch_count; + touch->pt = pt; +} + +//- rjf: access points + +internal B32 +access_pt_is_expired(AccessPt *pt) +{ + U64 access_refcount = ins_atomic_u64_eval(&pt->access_refcount); + U64 last_time_touched_us = ins_atomic_u64_eval(&pt->last_time_touched_us); + U64 last_update_idx_touched = ins_atomic_u64_eval(&pt->last_update_idx_touched); + B32 result = (access_refcount == 0 && + last_time_touched_us + 2000000 < os_now_microseconds() && + last_update_idx_touched + 10 < update_tick_idx()); + return result; } diff --git a/src/base/base_thread_context.h b/src/base/base_thread_context.h index cfd9e18b..340bccf1 100644 --- a/src/base/base_thread_context.h +++ b/src/base/base_thread_context.h @@ -18,11 +18,19 @@ struct LaneCtx //////////////////////////////// //~ rjf: Access Scopes +typedef struct AccessPt AccessPt; +struct AccessPt +{ + U64 access_refcount; + U64 last_time_touched_us; + U64 last_update_idx_touched; +}; + typedef struct Touch Touch; struct Touch { Touch *next; - U64 *touch_count; + AccessPt *pt; CondVar cv; }; @@ -95,6 +103,9 @@ internal void tctx_read_srcloc(char **file_name, U64 *line_number); //- rjf: access scopes internal Access *access_open(void); internal void access_close(Access *access); -internal void access_touch(Access *access, U64 *touch_count, CondVar cv); +internal void access_touch(Access *access, AccessPt *pt, CondVar cv); + +//- rjf: access points +internal B32 access_pt_is_expired(AccessPt *pt); #endif // BASE_THREAD_CONTEXT_H diff --git a/src/content/content.c b/src/content/content.c index 4995f600..f16383b5 100644 --- a/src/content/content.c +++ b/src/content/content.c @@ -169,45 +169,7 @@ c_root_release(C_Root root) { C_ID id = id_chunk_n->v[chunk_idx]; C_Key key = c_key_make(root, id); - U64 key_hash = c_little_hash_from_data(str8_struct(&key)); - U64 key_slot_idx = key_hash%c_shared->key_slots_count; - U64 key_stripe_idx = key_slot_idx%c_shared->key_stripes_count; - C_KeySlot *key_slot = &c_shared->key_slots[key_slot_idx]; - C_Stripe *key_stripe = &c_shared->key_stripes[key_stripe_idx]; - MutexScopeW(key_stripe->rw_mutex) - { - for(C_KeyNode *n = key_slot->first; n != 0; n = n->next) - { - if(c_key_match(n->key, key)) - { - // rjf: release reference to all hashes - for(U64 history_idx = 0; history_idx < C_KEY_HASH_HISTORY_STRONG_REF_COUNT && history_idx < n->hash_history_gen; history_idx += 1) - { - U128 hash = n->hash_history[(n->hash_history_gen+history_idx)%ArrayCount(n->hash_history)]; - U64 hash_slot_idx = hash.u64[1]%c_shared->blob_slots_count; - U64 hash_stripe_idx = hash_slot_idx%c_shared->blob_stripes_count; - C_BlobSlot *hash_slot = &c_shared->blob_slots[hash_slot_idx]; - C_Stripe *hash_stripe = &c_shared->blob_stripes[hash_stripe_idx]; - MutexScopeR(hash_stripe->rw_mutex) - { - for(C_BlobNode *n = hash_slot->first; n != 0; n = n->next) - { - if(u128_match(n->hash, hash)) - { - ins_atomic_u64_dec_eval(&n->key_ref_count); - break; - } - } - } - } - - // rjf: release key node - DLLRemove(key_slot->first, key_slot->last, n); - SLLStackPush(c_shared->key_stripes_free_nodes[key_stripe_idx], n); - break; - } - } - } + c_close_key(key); } } } @@ -218,11 +180,14 @@ c_root_release(C_Root root) internal U128 c_submit_data(C_Key key, Arena **data_arena, String8 data) { + //- rjf: unpack key U64 key_hash = c_little_hash_from_data(str8_struct(&key)); U64 key_slot_idx = key_hash%c_shared->key_slots_count; U64 key_stripe_idx = key_slot_idx%c_shared->key_stripes_count; C_KeySlot *key_slot = &c_shared->key_slots[key_slot_idx]; C_Stripe *key_stripe = &c_shared->key_stripes[key_stripe_idx]; + + //- rjf: hash data, unpack hash U128 hash = c_hash_from_data(data); U64 slot_idx = hash.u64[1]%c_shared->blob_slots_count; U64 stripe_idx = slot_idx%c_shared->blob_stripes_count; @@ -250,15 +215,15 @@ c_submit_data(C_Key key, Arena **data_arena, String8 data) } else { - node = push_array(stripe->arena, C_BlobNode, 1); + node = push_array_no_zero(stripe->arena, C_BlobNode, 1); } + MemoryZeroStruct(node); node->hash = hash; if(data_arena != 0) { node->arena = *data_arena; } node->data = data; - node->scope_ref_count = 0; node->key_ref_count = 1; DLLPushBack(slot->first, slot->last, node); } @@ -377,6 +342,52 @@ c_submit_data(C_Key key, Arena **data_arena, String8 data) return hash; } +//////////////////////////////// +//~ rjf: Key Closing + +internal void +c_close_key(C_Key key) +{ + U64 key_hash = c_little_hash_from_data(str8_struct(&key)); + U64 key_slot_idx = key_hash%c_shared->key_slots_count; + U64 key_stripe_idx = key_slot_idx%c_shared->key_stripes_count; + C_KeySlot *key_slot = &c_shared->key_slots[key_slot_idx]; + C_Stripe *key_stripe = &c_shared->key_stripes[key_stripe_idx]; + RWMutexScope(key_stripe->rw_mutex, 1) + { + for(C_KeyNode *n = key_slot->first; n != 0; n = n->next) + { + if(c_key_match(n->key, key)) + { + for(U64 history_idx = 0; + history_idx < C_KEY_HASH_HISTORY_STRONG_REF_COUNT && history_idx < n->hash_history_gen; + history_idx += 1) + { + U128 hash = n->hash_history[(n->hash_history_gen+history_idx)%ArrayCount(n->hash_history)]; + U64 hash_slot_idx = hash.u64[1]%c_shared->blob_slots_count; + U64 hash_stripe_idx = hash_slot_idx%c_shared->blob_stripes_count; + C_BlobSlot *hash_slot = &c_shared->blob_slots[hash_slot_idx]; + C_Stripe *hash_stripe = &c_shared->blob_stripes[hash_stripe_idx]; + MutexScopeR(hash_stripe->rw_mutex) + { + for(C_BlobNode *n = hash_slot->first; n != 0; n = n->next) + { + if(u128_match(n->hash, hash)) + { + ins_atomic_u64_dec_eval(&n->key_ref_count); + break; + } + } + } + } + DLLRemove(key_slot->first, key_slot->last, n); + SLLStackPush(c_shared->key_stripes_free_nodes[key_stripe_idx], n); + break; + } + } + } +} + //////////////////////////////// //~ rjf: Downstream Accesses @@ -432,7 +443,7 @@ c_hash_from_key(C_Key key, U64 rewind_count) U64 key_stripe_idx = key_slot_idx%c_shared->key_stripes_count; C_KeySlot *key_slot = &c_shared->key_slots[key_slot_idx]; C_Stripe *key_stripe = &c_shared->key_stripes[key_stripe_idx]; - MutexScopeR(key_stripe->rw_mutex) + RWMutexScope(key_stripe->rw_mutex, 0) { for(C_KeyNode *n = key_slot->first; n != 0; n = n->next) { @@ -462,7 +473,7 @@ c_data_from_hash(Access *access, U128 hash) if(u128_match(n->hash, hash)) { result = n->data; - access_touch(access, &n->scope_ref_count, stripe->cv); + access_touch(access, &n->access_pt, stripe->cv); break; } } @@ -478,46 +489,52 @@ internal void c_tick(void) { ProfBeginFunction(); - Rng1U64 range = lane_range(c_shared->blob_slots_count); - for EachInRange(slot_idx, range) + + //- rjf: garbage collect blobs { - U64 stripe_idx = slot_idx%c_shared->blob_stripes_count; - C_BlobSlot *slot = &c_shared->blob_slots[slot_idx]; - C_Stripe *stripe = &c_shared->blob_stripes[stripe_idx]; - B32 slot_has_work = 0; - MutexScopeR(stripe->rw_mutex) + Rng1U64 range = lane_range(c_shared->blob_slots_count); + for EachInRange(slot_idx, range) { - for(C_BlobNode *n = slot->first; n != 0; n = n->next) + U64 stripe_idx = slot_idx%c_shared->blob_stripes_count; + C_BlobSlot *slot = &c_shared->blob_slots[slot_idx]; + C_Stripe *stripe = &c_shared->blob_stripes[stripe_idx]; + for(B32 write_mode = 0; write_mode <= 1; write_mode += 1) { - U64 key_ref_count = ins_atomic_u64_eval(&n->key_ref_count); - U64 scope_ref_count = ins_atomic_u64_eval(&n->scope_ref_count); - U64 downstream_ref_count = ins_atomic_u64_eval(&n->downstream_ref_count); - if(key_ref_count == 0 && scope_ref_count == 0 && downstream_ref_count == 0) + B32 slot_has_work = 0; + RWMutexScope(stripe->rw_mutex, write_mode) + { + for(C_BlobNode *n = slot->first, *next = 0; n != 0; n = next) + { + next = n->next; + U64 key_ref_count = ins_atomic_u64_eval(&n->key_ref_count); + U64 scope_ref_count = ins_atomic_u64_eval(&n->access_pt.access_refcount); + U64 downstream_ref_count = ins_atomic_u64_eval(&n->downstream_ref_count); + if(key_ref_count == 0 && scope_ref_count == 0 && downstream_ref_count == 0) + { + slot_has_work = 1; + if(!write_mode) + { + break; + } + else + { + DLLRemove(slot->first, slot->last, n); + SLLStackPush(c_shared->blob_stripes_free_nodes[stripe_idx], n); + if(n->arena != 0) + { + arena_release(n->arena); + } + } + } + } + } + if(!slot_has_work) { - slot_has_work = 1; break; } } } - if(slot_has_work) MutexScopeW(stripe->rw_mutex) - { - for(C_BlobNode *n = slot->first, *next = 0; n != 0; n = next) - { - next = n->next; - U64 key_ref_count = ins_atomic_u64_eval(&n->key_ref_count); - U64 scope_ref_count = ins_atomic_u64_eval(&n->scope_ref_count); - U64 downstream_ref_count = ins_atomic_u64_eval(&n->downstream_ref_count); - if(key_ref_count == 0 && scope_ref_count == 0 && downstream_ref_count == 0) - { - DLLRemove(slot->first, slot->last, n); - SLLStackPush(c_shared->blob_stripes_free_nodes[stripe_idx], n); - if(n->arena != 0) - { - arena_release(n->arena); - } - } - } - } } + ProfEnd(); } diff --git a/src/content/content.h b/src/content/content.h index b91ad606..659c5c03 100644 --- a/src/content/content.h +++ b/src/content/content.h @@ -63,6 +63,17 @@ struct C_Key C_ID id; }; +//////////////////////////////// +//~ rjf: Cache Stripe Type + +typedef struct C_Stripe C_Stripe; +struct C_Stripe +{ + Arena *arena; + RWMutex rw_mutex; + CondVar cv; +}; + //////////////////////////////// //~ rjf: Root Cache Types @@ -135,7 +146,7 @@ struct C_BlobNode U128 hash; Arena *arena; String8 data; - U64 scope_ref_count; + AccessPt access_pt; U64 key_ref_count; U64 downstream_ref_count; }; @@ -147,14 +158,6 @@ struct C_BlobSlot C_BlobNode *last; }; -typedef struct C_Stripe C_Stripe; -struct C_Stripe -{ - Arena *arena; - RWMutex rw_mutex; - CondVar cv; -}; - //////////////////////////////// //~ rjf: Shared State @@ -217,6 +220,11 @@ internal void c_root_release(C_Root root); internal U128 c_submit_data(C_Key key, Arena **data_arena, String8 data); +//////////////////////////////// +//~ rjf: Key Closing + +internal void c_close_key(C_Key key); + //////////////////////////////// //~ rjf: Downstream Accesses diff --git a/src/dasm_cache/dasm_cache.c b/src/dasm_cache/dasm_cache.c index 9755cc44..a4d49003 100644 --- a/src/dasm_cache/dasm_cache.c +++ b/src/dasm_cache/dasm_cache.c @@ -346,9 +346,7 @@ dasm_info_from_hash_params(Access *access, U128 hash, DASM_Params *params) // rjf: nonzero node, request if needed - touch & return results if(node != 0) { - ins_atomic_u64_eval_assign(&node->last_time_touched_us, os_now_microseconds()); - ins_atomic_u64_eval_assign(&node->last_user_clock_idx_touched, update_tick_idx()); - access_touch(access, &node->scope_ref_count, stripe->cv); + access_touch(access, &node->access_pt, stripe->cv); MemoryCopyStruct(&info, &node->info); } } @@ -411,10 +409,7 @@ dasm_tick(void) RWMutexScope(stripe->rw_mutex, write_mode) for(DASM_Node *n = slot->first; n != 0; n = n->next) { // rjf: node needs eviction - if(n->scope_ref_count == 0 && - n->last_time_touched_us+evict_threshold_us <= check_time_us && - n->last_user_clock_idx_touched+evict_threshold_user_clocks <= check_time_user_clocks && - ins_atomic_u64_eval(&n->working_count) == 0) + if(access_pt_is_expired(&n->access_pt) && ins_atomic_u64_eval(&n->working_count) == 0) { slot_has_work = 1; if(!write_mode) diff --git a/src/dasm_cache/dasm_cache.h b/src/dasm_cache/dasm_cache.h index 890a4bd0..e95cb148 100644 --- a/src/dasm_cache/dasm_cache.h +++ b/src/dasm_cache/dasm_cache.h @@ -196,10 +196,8 @@ struct DASM_Node DASM_Info info; // rjf: metadata + AccessPt access_pt; U64 working_count; - U64 scope_ref_count; - U64 last_time_touched_us; - U64 last_user_clock_idx_touched; U64 last_time_requested_us; U64 last_user_clock_idx_requested; }; @@ -249,8 +247,6 @@ struct DASM_Shared DASM_RequestNode *first_req; DASM_RequestNode *last_req; U64 req_count; - - // rjf: request take counter U64 lane_req_take_counter; }; diff --git a/src/geo_cache/geo_cache.c b/src/geo_cache/geo_cache.c index 0f4003ba..938fe565 100644 --- a/src/geo_cache/geo_cache.c +++ b/src/geo_cache/geo_cache.c @@ -53,9 +53,7 @@ geo_buffer_from_hash(Access *access, U128 hash) { handle = n->buffer; found = !r_handle_match(r_handle_zero(), handle); - ins_atomic_u64_eval_assign(&n->last_time_touched_us, os_now_microseconds()); - ins_atomic_u64_eval_assign(&n->last_user_clock_idx_touched, update_tick_idx()); - access_touch(access, &n->scope_ref_count, stripe->cv); + access_touch(access, &n->access_pt, stripe->cv); break; } } @@ -249,9 +247,7 @@ geo_evictor_thread__entry_point(void *p) { for(GEO_Node *n = slot->first; n != 0; n = n->next) { - if(n->scope_ref_count == 0 && - n->last_time_touched_us+evict_threshold_us <= check_time_us && - n->last_user_clock_idx_touched+evict_threshold_user_clocks <= check_time_user_clocks && + if(access_pt_is_expired(&n->access_pt) && n->load_count != 0 && n->is_working == 0) { @@ -265,9 +261,7 @@ geo_evictor_thread__entry_point(void *p) for(GEO_Node *n = slot->first, *next = 0; n != 0; n = next) { next = n->next; - if(n->scope_ref_count == 0 && - n->last_time_touched_us+evict_threshold_us <= check_time_us && - n->last_user_clock_idx_touched+evict_threshold_user_clocks <= check_time_user_clocks && + if(access_pt_is_expired(&n->access_pt) && n->load_count != 0 && n->is_working == 0) { diff --git a/src/geo_cache/geo_cache.h b/src/geo_cache/geo_cache.h index af88db79..fe55b7b0 100644 --- a/src/geo_cache/geo_cache.h +++ b/src/geo_cache/geo_cache.h @@ -15,10 +15,8 @@ struct GEO_Node U128 hash; R_Handle buffer; B32 is_working; - U64 scope_ref_count; - U64 last_time_touched_us; - U64 last_user_clock_idx_touched; U64 load_count; + AccessPt access_pt; }; typedef struct GEO_Slot GEO_Slot; diff --git a/src/text_cache/text_cache.c b/src/text_cache/text_cache.c index 554c8dd1..edca8496 100644 --- a/src/text_cache/text_cache.c +++ b/src/text_cache/text_cache.c @@ -1641,9 +1641,7 @@ txt_text_info_from_hash_lang(Access *access, U128 hash, TXT_LangKind lang) info.bytes_processed = ins_atomic_u64_eval(&n->info.bytes_processed); info.bytes_to_process = ins_atomic_u64_eval(&n->info.bytes_to_process); found = 1; - ins_atomic_u64_eval_assign(&n->last_time_touched_us, os_now_microseconds()); - ins_atomic_u64_eval_assign(&n->last_user_clock_idx_touched, update_tick_idx()); - access_touch(access, &n->scope_ref_count, stripe->cv); + access_touch(access, &n->access_pt, stripe->cv); break; } } @@ -2434,10 +2432,6 @@ txt_evictor_thread__entry_point(void *p) ThreadNameF("txt_evictor_thread"); for(;;) { - U64 check_time_us = os_now_microseconds(); - U64 check_time_user_clocks = update_tick_idx(); - U64 evict_threshold_us = 2*1000000; - U64 evict_threshold_user_clocks = 10; for(U64 slot_idx = 0; slot_idx < txt_shared->slots_count; slot_idx += 1) { U64 stripe_idx = slot_idx%txt_shared->stripes_count; @@ -2448,9 +2442,7 @@ txt_evictor_thread__entry_point(void *p) { for(TXT_Node *n = slot->first; n != 0; n = n->next) { - if(n->scope_ref_count == 0 && - n->last_time_touched_us+evict_threshold_us <= check_time_us && - n->last_user_clock_idx_touched+evict_threshold_user_clocks <= check_time_user_clocks && + if(access_pt_is_expired(&n->access_pt) && n->load_count != 0 && n->is_working == 0) { @@ -2464,9 +2456,7 @@ txt_evictor_thread__entry_point(void *p) for(TXT_Node *n = slot->first, *next = 0; n != 0; n = next) { next = n->next; - if(n->scope_ref_count == 0 && - n->last_time_touched_us+evict_threshold_us <= check_time_us && - n->last_user_clock_idx_touched+evict_threshold_user_clocks <= check_time_user_clocks && + if(access_pt_is_expired(&n->access_pt) && n->load_count != 0 && n->is_working == 0) { diff --git a/src/text_cache/text_cache.h b/src/text_cache/text_cache.h index abf41f24..3e5d5cb2 100644 --- a/src/text_cache/text_cache.h +++ b/src/text_cache/text_cache.h @@ -174,10 +174,8 @@ struct TXT_Node TXT_TextInfo info; // rjf: metadata + AccessPt access_pt; B32 is_working; - U64 scope_ref_count; - U64 last_time_touched_us; - U64 last_user_clock_idx_touched; U64 load_count; }; diff --git a/src/texture_cache/texture_cache.c b/src/texture_cache/texture_cache.c index d5e63181..95d137a0 100644 --- a/src/texture_cache/texture_cache.c +++ b/src/texture_cache/texture_cache.c @@ -37,11 +37,8 @@ tex_init(void) tex_shared->stripes[idx].rw_mutex = rw_mutex_alloc(); tex_shared->stripes[idx].cv = cond_var_alloc(); } - tex_shared->u2x_ring_size = KB(64); - tex_shared->u2x_ring_base = push_array_no_zero(arena, U8, tex_shared->u2x_ring_size); - tex_shared->u2x_ring_cv = cond_var_alloc(); - tex_shared->u2x_ring_mutex = mutex_alloc(); - tex_shared->evictor_thread = thread_launch(tex_evictor_thread__entry_point, 0); + tex_shared->req_mutex = mutex_alloc(); + tex_shared->req_arena = arena_alloc(); } //////////////////////////////// @@ -52,42 +49,32 @@ tex_texture_from_hash_topology(Access *access, U128 hash, TEX_Topology topology) { R_Handle handle = {0}; { + //- rjf: unpack hash U64 slot_idx = hash.u64[1]%tex_shared->slots_count; U64 stripe_idx = slot_idx%tex_shared->stripes_count; TEX_Slot *slot = &tex_shared->slots[slot_idx]; TEX_Stripe *stripe = &tex_shared->stripes[stripe_idx]; - B32 found = 0; - B32 stale = 0; - MutexScopeR(stripe->rw_mutex) + + //- rjf: get results, request if needed + for(B32 write_mode = 0; write_mode <= 1; write_mode += 1) { - for(TEX_Node *n = slot->first; n != 0; n = n->next) - { - if(u128_match(hash, n->hash) && MemoryMatchStruct(&topology, &n->topology)) - { - handle = n->texture; - found = !r_handle_match(r_handle_zero(), handle); - ins_atomic_u64_eval_assign(&n->last_time_touched_us, os_now_microseconds()); - ins_atomic_u64_eval_assign(&n->last_user_clock_idx_touched, update_tick_idx()); - access_touch(access, &n->scope_ref_count, stripe->cv); - break; - } - } - } - B32 node_is_new = 0; - if(!found) - { - MutexScopeW(stripe->rw_mutex) + B32 got_node = 0; + RWMutexScope(stripe->rw_mutex, write_mode) { + // rjf: get node TEX_Node *node = 0; for(TEX_Node *n = slot->first; n != 0; n = n->next) { if(u128_match(hash, n->hash) && MemoryMatchStruct(&topology, &n->topology)) { node = n; + got_node = 1; break; } } - if(node == 0) + + // rjf: no node? -> create & request + if(write_mode && !node) { node = tex_shared->stripes_free_nodes[stripe_idx]; if(node) @@ -102,14 +89,27 @@ tex_texture_from_hash_topology(Access *access, U128 hash, TEX_Topology topology) DLLPushBack(slot->first, slot->last, node); node->hash = hash; MemoryCopyStruct(&node->topology, &topology); - node_is_new = 1; + MutexScope(tex_shared->req_mutex) + { + TEX_RequestNode *n = push_array(tex_shared->req_arena, TEX_RequestNode, 1); + SLLQueuePush(tex_shared->first_req, tex_shared->last_req, n); + n->v.hash = hash; + n->v.top = topology; + tex_shared->req_count += 1; + } + } + + // rjf: node? -> grab & access + if(!write_mode && node) + { + handle = node->texture; + access_touch(access, &node->access_pt, stripe->cv); } } - } - if(node_is_new) - { - tex_u2x_enqueue_req(hash, topology, max_U64); - async_push_work(tex_xfer_work); + if(got_node) + { + break; + } } } return handle; @@ -136,173 +136,129 @@ tex_texture_from_key_topology(Access *access, C_Key key, TEX_Topology topology, } //////////////////////////////// -//~ rjf: Transfer Threads - -internal B32 -tex_u2x_enqueue_req(U128 hash, TEX_Topology top, U64 endt_us) -{ - B32 good = 0; - MutexScope(tex_shared->u2x_ring_mutex) for(;;) - { - U64 unconsumed_size = tex_shared->u2x_ring_write_pos-tex_shared->u2x_ring_read_pos; - U64 available_size = tex_shared->u2x_ring_size-unconsumed_size; - if(available_size >= sizeof(hash)+sizeof(top)) - { - good = 1; - tex_shared->u2x_ring_write_pos += ring_write_struct(tex_shared->u2x_ring_base, tex_shared->u2x_ring_size, tex_shared->u2x_ring_write_pos, &hash); - tex_shared->u2x_ring_write_pos += ring_write_struct(tex_shared->u2x_ring_base, tex_shared->u2x_ring_size, tex_shared->u2x_ring_write_pos, &top); - break; - } - if(os_now_microseconds() >= endt_us) - { - break; - } - cond_var_wait(tex_shared->u2x_ring_cv, tex_shared->u2x_ring_mutex, endt_us); - } - if(good) - { - cond_var_broadcast(tex_shared->u2x_ring_cv); - } - return good; -} +//~ rjf: Tick internal void -tex_u2x_dequeue_req(U128 *hash_out, TEX_Topology *top_out) -{ - MutexScope(tex_shared->u2x_ring_mutex) for(;;) - { - U64 unconsumed_size = tex_shared->u2x_ring_write_pos-tex_shared->u2x_ring_read_pos; - if(unconsumed_size >= sizeof(*hash_out)+sizeof(*top_out)) - { - tex_shared->u2x_ring_read_pos += ring_read_struct(tex_shared->u2x_ring_base, tex_shared->u2x_ring_size, tex_shared->u2x_ring_read_pos, hash_out); - tex_shared->u2x_ring_read_pos += ring_read_struct(tex_shared->u2x_ring_base, tex_shared->u2x_ring_size, tex_shared->u2x_ring_read_pos, top_out); - break; - } - cond_var_wait(tex_shared->u2x_ring_cv, tex_shared->u2x_ring_mutex, max_U64); - } - cond_var_broadcast(tex_shared->u2x_ring_cv); -} - -ASYNC_WORK_DEF(tex_xfer_work) +tex_tick(void) { + if(ins_atomic_u64_eval(&tex_shared) == 0) { return; } ProfBeginFunction(); - Access *access = access_open(); + Temp scratch = scratch_begin(0, 0); - //- rjf: decode - U128 hash = {0}; - TEX_Topology top = {0}; - tex_u2x_dequeue_req(&hash, &top); - - //- rjf: unpack hash - U64 slot_idx = hash.u64[1]%tex_shared->slots_count; - U64 stripe_idx = slot_idx%tex_shared->stripes_count; - TEX_Slot *slot = &tex_shared->slots[slot_idx]; - TEX_Stripe *stripe = &tex_shared->stripes[stripe_idx]; - - //- rjf: take task - B32 got_task = 0; - MutexScopeR(stripe->rw_mutex) - { - for(TEX_Node *n = slot->first; n != 0; n = n->next) - { - if(u128_match(n->hash, hash) && MemoryMatchStruct(&top, &n->topology)) - { - got_task = !ins_atomic_u32_eval_cond_assign(&n->is_working, 1, 0); - break; - } - } - } - - //- rjf: hash -> data - String8 data = {0}; - if(got_task) - { - data = c_data_from_hash(access, hash); - } - - //- rjf: data * topology -> texture - R_Handle texture = {0}; - if(got_task && top.dim.x > 0 && top.dim.y > 0 && data.size >= (U64)top.dim.x*(U64)top.dim.y*(U64)r_tex2d_format_bytes_per_pixel_table[top.fmt]) - { - texture = r_tex2d_alloc(R_ResourceKind_Static, v2s32(top.dim.x, top.dim.y), top.fmt, data.str); - } - - //- rjf: commit results to cache - if(got_task) MutexScopeW(stripe->rw_mutex) - { - for(TEX_Node *n = slot->first; n != 0; n = n->next) - { - if(u128_match(n->hash, hash) && MemoryMatchStruct(&top, &n->topology)) - { - n->texture = texture; - ins_atomic_u32_eval_assign(&n->is_working, 0); - ins_atomic_u64_inc_eval(&n->load_count); - break; - } - } - } - - access_close(access); - ProfEnd(); - return 0; -} - -//////////////////////////////// -//~ rjf: Evictor Threads - -internal void -tex_evictor_thread__entry_point(void *p) -{ - ThreadNameF("tex_evictor_thread"); - for(;;) + //- rjf: do eviction pass { U64 check_time_us = os_now_microseconds(); U64 check_time_user_clocks = update_tick_idx(); U64 evict_threshold_us = 10*1000000; U64 evict_threshold_user_clocks = 10; - for(U64 slot_idx = 0; slot_idx < tex_shared->slots_count; slot_idx += 1) + Rng1U64 range = lane_range(tex_shared->slots_count); + for EachInRange(slot_idx, range) { U64 stripe_idx = slot_idx%tex_shared->stripes_count; TEX_Slot *slot = &tex_shared->slots[slot_idx]; TEX_Stripe *stripe = &tex_shared->stripes[stripe_idx]; - B32 slot_has_work = 0; - MutexScopeR(stripe->rw_mutex) + for(B32 write_mode = 0; write_mode <= 1; write_mode += 1) { - for(TEX_Node *n = slot->first; n != 0; n = n->next) + B32 slot_has_work = 0; + RWMutexScope(stripe->rw_mutex, write_mode) { - if(n->scope_ref_count == 0 && - n->last_time_touched_us+evict_threshold_us <= check_time_us && - n->last_user_clock_idx_touched+evict_threshold_user_clocks <= check_time_user_clocks && - n->load_count != 0 && - n->is_working == 0) + for(TEX_Node *n = slot->first; n != 0; n = n->next) { - slot_has_work = 1; - break; - } - } - } - if(slot_has_work) MutexScopeW(stripe->rw_mutex) - { - for(TEX_Node *n = slot->first, *next = 0; n != 0; n = next) - { - next = n->next; - if(n->scope_ref_count == 0 && - n->last_time_touched_us+evict_threshold_us <= check_time_us && - n->last_user_clock_idx_touched+evict_threshold_user_clocks <= check_time_user_clocks && - n->load_count != 0 && - n->is_working == 0) - { - DLLRemove(slot->first, slot->last, n); - if(!r_handle_match(n->texture, r_handle_zero())) + if(access_pt_is_expired(&n->access_pt) && + n->load_count != 0 && + n->working_count == 0) { - r_tex2d_release(n->texture); + slot_has_work = 1; + if(!write_mode) + { + break; + } + else + { + DLLRemove(slot->first, slot->last, n); + if(!r_handle_match(n->texture, r_handle_zero())) + { + r_tex2d_release(n->texture); + } + SLLStackPush(tex_shared->stripes_free_nodes[stripe_idx], n); + } } - SLLStackPush(tex_shared->stripes_free_nodes[stripe_idx], n); } } + if(!slot_has_work) + { + break; + } } - os_sleep_milliseconds(5); } - os_sleep_milliseconds(1000); } + + //- rjf: gather all requests + local_persist TEX_Request *reqs = 0; + local_persist U64 reqs_count = 0; + if(lane_idx() == 0) MutexScope(tex_shared->req_mutex) + { + reqs_count = tex_shared->req_count; + reqs = push_array(scratch.arena, TEX_Request, reqs_count); + U64 idx = 0; + for EachNode(r, TEX_RequestNode, tex_shared->first_req) + { + MemoryCopyStruct(&reqs[idx], &r->v); + idx += 1; + } + arena_clear(tex_shared->req_arena); + tex_shared->first_req = tex_shared->last_req = 0; + tex_shared->req_count = 0; + tex_shared->lane_req_take_counter = 0; + } + lane_sync(); + + //- rjf: do requests + for(;;) + { + //- rjf: get next request + U64 req_num = ins_atomic_u64_inc_eval(&tex_shared->lane_req_take_counter); + if(req_num < 1 || reqs_count < req_num) + { + break; + } + U64 req_idx = req_num-1; + U128 hash = reqs[req_idx].hash; + TEX_Topology top = reqs[req_idx].top; + Access *access = access_open(); + + //- rjf: unpack request + U64 slot_idx = hash.u64[1]%tex_shared->slots_count; + U64 stripe_idx = slot_idx%tex_shared->stripes_count; + TEX_Slot *slot = &tex_shared->slots[slot_idx]; + TEX_Stripe *stripe = &tex_shared->stripes[stripe_idx]; + String8 data = c_data_from_hash(access, hash); + + //- rjf: create texture + R_Handle texture = {0}; + if(top.dim.x > 0 && top.dim.y > 0 && data.size >= (U64)top.dim.x*(U64)top.dim.y*(U64)r_tex2d_format_bytes_per_pixel_table[top.fmt]) + { + texture = r_tex2d_alloc(R_ResourceKind_Static, v2s32(top.dim.x, top.dim.y), top.fmt, data.str); + } + + //- rjf: commit results to cache + RWMutexScope(stripe->rw_mutex, 1) + { + for(TEX_Node *n = slot->first; n != 0; n = n->next) + { + if(u128_match(n->hash, hash) && MemoryMatchStruct(&top, &n->topology)) + { + n->texture = texture; + ins_atomic_u64_dec_eval(&n->working_count); + ins_atomic_u64_inc_eval(&n->load_count); + break; + } + } + } + + access_close(access); + } + + scratch_end(scratch); + ProfEnd(); } diff --git a/src/texture_cache/texture_cache.h b/src/texture_cache/texture_cache.h index a9140e91..86ac7e96 100644 --- a/src/texture_cache/texture_cache.h +++ b/src/texture_cache/texture_cache.h @@ -25,10 +25,8 @@ struct TEX_Node U128 hash; TEX_Topology topology; R_Handle texture; - B32 is_working; - U64 scope_ref_count; - U64 last_time_touched_us; - U64 last_user_clock_idx_touched; + AccessPt access_pt; + U64 working_count; U64 load_count; }; @@ -50,6 +48,20 @@ struct TEX_Stripe //////////////////////////////// //~ rjf: Shared State +typedef struct TEX_Request TEX_Request; +struct TEX_Request +{ + U128 hash; + TEX_Topology top; +}; + +typedef struct TEX_RequestNode TEX_RequestNode; +struct TEX_RequestNode +{ + TEX_RequestNode *next; + TEX_Request v; +}; + typedef struct TEX_Shared TEX_Shared; struct TEX_Shared { @@ -62,16 +74,13 @@ struct TEX_Shared TEX_Stripe *stripes; TEX_Node **stripes_free_nodes; - // rjf: user -> xfer thread - U64 u2x_ring_size; - U8 *u2x_ring_base; - U64 u2x_ring_write_pos; - U64 u2x_ring_read_pos; - CondVar u2x_ring_cv; - Mutex u2x_ring_mutex; - - // rjf: evictor thread - Thread evictor_thread; + // rjf: requests + Mutex req_mutex; + Arena *req_arena; + TEX_RequestNode *first_req; + TEX_RequestNode *last_req; + U64 req_count; + U64 lane_req_take_counter; }; //////////////////////////////// @@ -96,15 +105,8 @@ internal R_Handle tex_texture_from_hash_topology(Access *access, U128 hash, TEX_ internal R_Handle tex_texture_from_key_topology(Access *access, C_Key key, TEX_Topology topology, U128 *hash_out); //////////////////////////////// -//~ rjf: Transfer Threads +//~ rjf: Tick -internal B32 tex_u2x_enqueue_req(U128 hash, TEX_Topology top, U64 endt_us); -internal void tex_u2x_dequeue_req(U128 *hash_out, TEX_Topology *top_out); -ASYNC_WORK_DEF(tex_xfer_work); - -//////////////////////////////// -//~ rjf: Evictor Threads - -internal void tex_evictor_thread__entry_point(void *p); +internal void tex_tick(void); #endif //TEXTURE_CACHE_H