From 4b335103bc17b45859126adb0ac53cffd61c3d2e Mon Sep 17 00:00:00 2001 From: Ryan Fleury Date: Wed, 24 Sep 2025 15:04:45 -0700 Subject: [PATCH] begin moving call stack computation / cache to artifact cache; fix incorrect broadcast val location --- src/artifact_cache/artifact_cache.c | 9 ++ src/artifact_cache/artifact_cache.h | 1 + src/base/base_entry_point.c | 2 +- src/ctrl/ctrl_core.c | 189 +++++++++++++++++++++++++++- src/ctrl/ctrl_core.h | 7 ++ src/raddbg/raddbg_core.c | 4 + src/raddbg/raddbg_core.h | 1 + src/raddbg/raddbg_eval.c | 2 +- 8 files changed, 212 insertions(+), 3 deletions(-) diff --git a/src/artifact_cache/artifact_cache.c b/src/artifact_cache/artifact_cache.c index 7c074d9d..1e0b647f 100644 --- a/src/artifact_cache/artifact_cache.c +++ b/src/artifact_cache/artifact_cache.c @@ -66,6 +66,7 @@ ac_artifact_from_key_(Access *access, String8 key, AC_ArtifactParams *params, U6 Stripe *stripe = stripe_from_slot_idx(&cache->stripes, slot_idx); //- rjf: cache * key -> existing artifact + B32 artifact_is_stale = 0; B32 got_artifact = 0; B32 need_request = 0; AC_Artifact artifact = {0}; @@ -78,6 +79,7 @@ ac_artifact_from_key_(Access *access, String8 key, AC_ArtifactParams *params, U6 if(ins_atomic_u64_eval(&n->completion_count) != 0 && (n->gen == params->gen || !params->wait_for_fresh)) { got_artifact = 1; + artifact_is_stale = (n->gen == params->gen); artifact = n->val; access_touch(access, &n->access_pt, stripe->cv); } @@ -150,6 +152,7 @@ ac_artifact_from_key_(Access *access, String8 key, AC_ArtifactParams *params, U6 if(!got_artifact && ins_atomic_u64_eval(&node->completion_count) != 0 && ((node->gen == params->gen) || !params->wait_for_fresh || out_of_time)) { got_artifact = 1; + artifact_is_stale = (node->gen == params->gen); artifact = node->val; access_touch(access, &node->access_pt, stripe->cv); } @@ -165,6 +168,12 @@ ac_artifact_from_key_(Access *access, String8 key, AC_ArtifactParams *params, U6 } } + //- rjf: report staleness + if(params->stale_out) + { + params->stale_out[0] = artifact_is_stale; + } + return artifact; } diff --git a/src/artifact_cache/artifact_cache.h b/src/artifact_cache/artifact_cache.h index 303c1d43..4f219dfd 100644 --- a/src/artifact_cache/artifact_cache.h +++ b/src/artifact_cache/artifact_cache.h @@ -27,6 +27,7 @@ struct AC_ArtifactParams U64 slots_count; U64 gen; B32 wait_for_fresh; + B32 *stale_out; }; //////////////////////////////// diff --git a/src/base/base_entry_point.c b/src/base/base_entry_point.c index 751b8254..e0cadd76 100644 --- a/src/base/base_entry_point.c +++ b/src/base/base_entry_point.c @@ -104,6 +104,7 @@ main_thread_base_entry_point(int arguments_count, char **arguments) //- rjf: launch async threads Thread *async_threads = 0; U64 async_threads_count = 0; + U64 lane_broadcast_val = 0; { U64 num_main_threads = 1; #if defined(CTRL_CORE_H) @@ -113,7 +114,6 @@ main_thread_base_entry_point(int arguments_count, char **arguments) U64 num_main_threads_clamped = Min(num_async_threads, num_main_threads); num_async_threads -= num_main_threads_clamped; num_async_threads = Max(1, num_async_threads); - U64 lane_broadcast_val = 0; Barrier barrier = barrier_alloc(num_async_threads); LaneCtx *lane_ctxs = push_array(scratch.arena, LaneCtx, num_async_threads); async_threads_count = num_async_threads; diff --git a/src/ctrl/ctrl_core.c b/src/ctrl/ctrl_core.c index f063d1f6..5ed8b14d 100644 --- a/src/ctrl/ctrl_core.c +++ b/src/ctrl/ctrl_core.c @@ -7575,6 +7575,7 @@ ctrl_memory_artifact_create(String8 key, B32 *retry_out) else if(range_arena != 0) { arena_release(range_arena); + retry_out[0] = 1; } //- rjf: wakeup on new reads @@ -7617,10 +7618,196 @@ ctrl_key_from_process_vaddr_range_new(CTRL_Handle process, Rng1U64 vaddr_range, } key_data = {process, vaddr_range, zero_terminated}; String8 key = str8_struct(&key_data); Access *access = access_open(); - AC_Artifact artifact = ac_artifact_from_key(access, key, ctrl_memory_artifact_create, ctrl_memory_artifact_destroy, endt_us, .gen = ctrl_mem_gen(), .slots_count = 2048); + AC_Artifact artifact = ac_artifact_from_key(access, key, ctrl_memory_artifact_create, ctrl_memory_artifact_destroy, endt_us, .gen = ctrl_mem_gen(), .slots_count = 2048, .stale_out = out_is_stale); C_Key content_key = {0}; MemoryCopyStruct(&content_key, &artifact); access_close(access); ProfEnd(); return content_key; } + +//////////////////////////////// +//~ rjf: Call Stack Artifact Cache Hooks / Lookups + +internal AC_Artifact +ctrl_call_stack_artifact_create(String8 key, B32 *retry_out) +{ + AC_Artifact artifact = {0}; + if(lane_idx() == 0) + { + Temp scratch = scratch_begin(0, 0); + + //- rjf: unpack key + CTRL_Handle thread_handle = {0}; + str8_deserial_read_struct(key, 0, &thread_handle); + + //- rjf: produce mini entity context for just this call stack build + CTRL_EntityCtx *entity_ctx = push_array(scratch.arena, CTRL_EntityCtx, 1); + MutexScopeR(ctrl_state->ctrl_thread_entity_ctx_rw_mutex) + { + CTRL_EntityCtx *src_ctx = &ctrl_state->ctrl_thread_entity_store->ctx; + CTRL_EntityCtx *dst_ctx = entity_ctx; + { + dst_ctx->root = &ctrl_entity_nil; + dst_ctx->hash_slots_count = 1024; + dst_ctx->hash_slots = push_array(scratch.arena, CTRL_EntityHashSlot, dst_ctx->hash_slots_count); + MemoryCopyArray(dst_ctx->entity_kind_counts, src_ctx->entity_kind_counts); + MemoryCopyArray(dst_ctx->entity_kind_alloc_gens, src_ctx->entity_kind_alloc_gens); + } + CTRL_Entity *src_thread = ctrl_entity_from_handle(src_ctx, thread_handle); + CTRL_Entity *src_process = ctrl_process_from_entity(src_thread); + { + CTRL_EntityRec rec = {0}; + CTRL_Entity *dst_parent = &ctrl_entity_nil; + for(CTRL_Entity *src_e = src_process; src_e != &ctrl_entity_nil; src_e = rec.next) + { + rec = ctrl_entity_rec_depth_first_pre(src_e, src_process); + + // rjf: copy this entity + CTRL_Entity *dst_e = push_array(scratch.arena, CTRL_Entity, 1); + { + dst_e->first = dst_e->last = dst_e->next = dst_e->prev = &ctrl_entity_nil; + dst_e->parent = dst_parent; + dst_e->kind = src_e->kind; + dst_e->arch = src_e->arch; + dst_e->is_frozen = src_e->is_frozen; + dst_e->is_soloed = src_e->is_soloed; + dst_e->rgba = src_e->rgba; + dst_e->handle = src_e->handle; + dst_e->id = src_e->id; + dst_e->vaddr_range = src_e->vaddr_range; + dst_e->stack_base = src_e->stack_base; + dst_e->timestamp = src_e->timestamp; + dst_e->bp_flags = src_e->bp_flags; + dst_e->string = push_str8_copy(scratch.arena, src_e->string); + } + if(dst_parent == &ctrl_entity_nil) + { + dst_ctx->root = dst_e; + } + else + { + DLLPushBack_NPZ(&ctrl_entity_nil, dst_parent->first, dst_parent->last, dst_e, next, prev); + } + + // rjf: insert into hash map + { + U64 hash = ctrl_hash_from_handle(dst_e->handle); + U64 slot_idx = hash%dst_ctx->hash_slots_count; + CTRL_EntityHashSlot *slot = &dst_ctx->hash_slots[slot_idx]; + CTRL_EntityHashNode *node = 0; + for(CTRL_EntityHashNode *n = slot->first; n != 0; n = n->next) + { + if(ctrl_handle_match(n->entity->handle, dst_e->handle)) + { + node = n; + break; + } + } + if(node == 0) + { + node = push_array(scratch.arena, CTRL_EntityHashNode, 1); + MemoryZeroStruct(node); + DLLPushBack(slot->first, slot->last, node); + node->entity = dst_e; + } + } + + // rjf: push/pop + if(rec.push_count) + { + dst_parent = dst_e; + } + else for(S32 pop_idx = 0; pop_idx < rec.pop_count; pop_idx += 1) + { + dst_parent = dst_parent->parent; + } + } + } + } + + //- rjf: compute call stack + B32 good = 0; + Arena *arena = arena_alloc(); + CTRL_CallStack *call_stack = push_array(arena, CTRL_CallStack, 1); + { + CTRL_Entity *thread = ctrl_entity_from_handle(entity_ctx, thread_handle); + CTRL_Entity *process = ctrl_process_from_entity(thread); + U64 pre_reg_gen = 0; + U64 post_reg_gen = 0; + U64 pre_mem_gen = 0; + U64 post_mem_gen = 0; + CTRL_Unwind unwind = {0}; + { + pre_reg_gen = ctrl_reg_gen(); + pre_mem_gen = ctrl_mem_gen(); + unwind = ctrl_unwind_from_thread(arena, entity_ctx, thread_handle, os_now_microseconds()+5000); + if(unwind.flags == 0) + { + good = 1; + call_stack[0] = ctrl_call_stack_from_unwind(arena, process, &unwind); + } + post_reg_gen = ctrl_reg_gen(); + post_mem_gen = ctrl_mem_gen(); + } + if(pre_reg_gen != post_reg_gen || pre_mem_gen != post_mem_gen) + { + good = 0; + } + } + + //- rjf: broadcast update + if(good && ctrl_state->wakeup_hook != 0) + { + ctrl_state->wakeup_hook(); + } + + //- rjf: bundle call stack as artifact + if(good) + { + artifact.u64[0] = (U64)arena; + artifact.u64[1] = (U64)call_stack; + } + + //- rjf: release results on bad + if(!good) + { + arena_release(arena); + } + + //- rjf: retry on bad + if(!good) + { + retry_out[0] = 1; + } + + scratch_end(scratch); + } + lane_sync_u64(&artifact.u64[0], 0); + lane_sync_u64(&artifact.u64[1], 0); + return artifact; +} + +internal void +ctrl_call_stack_artifact_destroy(AC_Artifact artifact) +{ + Arena *arena = (Arena *)artifact.u64[0]; + if(arena != 0) + { + arena_release(arena); + } +} + +internal CTRL_CallStack +ctrl_call_stack_from_thread_new(Access *access, CTRL_Handle thread_handle, B32 high_priority, U64 endt_us) +{ + CTRL_CallStack result = {0}; + { + AC_Artifact artifact = ac_artifact_from_key(access, str8_struct(&thread_handle), ctrl_call_stack_artifact_create, ctrl_call_stack_artifact_destroy, endt_us, .gen = ctrl_mem_gen() + ctrl_reg_gen()); + if(artifact.u64[1] != 0) + { + MemoryCopyStruct(&result, (CTRL_CallStack *)artifact.u64[1]); + } + } + return result; +} diff --git a/src/ctrl/ctrl_core.h b/src/ctrl/ctrl_core.h index 03e959c9..44060551 100644 --- a/src/ctrl/ctrl_core.h +++ b/src/ctrl/ctrl_core.h @@ -1257,4 +1257,11 @@ internal AC_Artifact ctrl_memory_artifact_create(String8 key, B32 *retry_out); internal void ctrl_memory_artifact_destroy(AC_Artifact artifact); internal C_Key ctrl_key_from_process_vaddr_range_new(CTRL_Handle process, Rng1U64 vaddr_range, B32 zero_terminated, U64 endt_us, B32 *out_is_stale); +//////////////////////////////// +//~ rjf: Call Stack Artifact Cache Hooks / Lookups + +internal AC_Artifact ctrl_call_stack_artifact_create(String8 key, B32 *retry_out); +internal void ctrl_call_stack_artifact_destroy(AC_Artifact artifact); +internal CTRL_CallStack ctrl_call_stack_from_thread_new(Access *access, CTRL_Handle thread_handle, B32 high_priority, U64 endt_us); + #endif // CTRL_CORE_H diff --git a/src/raddbg/raddbg_core.c b/src/raddbg/raddbg_core.c index 18dbecf8..2db08019 100644 --- a/src/raddbg/raddbg_core.c +++ b/src/raddbg/raddbg_core.c @@ -11495,8 +11495,10 @@ rd_frame(void) ////////////////////////////// //- rjf: push frame scopes // + Access *frame_access_restore = rd_state->frame_access; DI_Scope *frame_di_scope_restore = rd_state->frame_di_scope; CTRL_Scope *frame_ctrl_scope_restore = rd_state->frame_ctrl_scope; + rd_state->frame_access = access_open(); rd_state->frame_di_scope = di_scope_open(); rd_state->frame_ctrl_scope = ctrl_scope_open(); rd_state->got_frame_call_stack_tree = 0; @@ -17323,8 +17325,10 @@ rd_frame(void) // will sleep for vsync, and we do not want to hold handles for long, // since eviction threads may be waiting to get rid of stuff. // + access_close(rd_state->frame_access); di_scope_close(rd_state->frame_di_scope); ctrl_scope_close(rd_state->frame_ctrl_scope); + rd_state->frame_access = frame_access_restore; rd_state->frame_di_scope = frame_di_scope_restore; rd_state->frame_ctrl_scope = frame_ctrl_scope_restore; diff --git a/src/raddbg/raddbg_core.h b/src/raddbg/raddbg_core.h index 2d0d87af..388be833 100644 --- a/src/raddbg/raddbg_core.h +++ b/src/raddbg/raddbg_core.h @@ -599,6 +599,7 @@ struct RD_State // rjf: frame parameters F32 frame_dt; + Access *frame_access; DI_Scope *frame_di_scope; CTRL_Scope *frame_ctrl_scope; CTRL_CallStackTree frame_call_stack_tree; diff --git a/src/raddbg/raddbg_eval.c b/src/raddbg/raddbg_eval.c index c1795d34..d6b98a8b 100644 --- a/src/raddbg/raddbg_eval.c +++ b/src/raddbg/raddbg_eval.c @@ -968,7 +968,7 @@ E_TYPE_IREXT_FUNCTION_DEF(call_stack) B32 call_stack_high_priority = ctrl_handle_match(entity->handle, rd_base_regs()->thread); accel->arch = entity->arch; accel->process = ctrl_process_from_entity(entity)->handle; - accel->call_stack = ctrl_call_stack_from_thread(rd_state->frame_ctrl_scope, entity->handle, call_stack_high_priority, call_stack_high_priority ? rd_state->frame_eval_memread_endt_us : 0); + accel->call_stack = ctrl_call_stack_from_thread_new(rd_state->frame_access, entity->handle, call_stack_high_priority, call_stack_high_priority ? rd_state->frame_eval_memread_endt_us : 0); } scratch_end(scratch); }