diff --git a/src/ctrl/ctrl_core.c b/src/ctrl/ctrl_core.c index eaa97541..65d42656 100644 --- a/src/ctrl/ctrl_core.c +++ b/src/ctrl/ctrl_core.c @@ -819,7 +819,7 @@ ctrl_init(void) } ctrl_state->process_memory_cache.slots_count = 256; ctrl_state->process_memory_cache.slots = push_array(arena, CTRL_ProcessMemoryCacheSlot, ctrl_state->process_memory_cache.slots_count); - ctrl_state->process_memory_cache.stripes_count = 8; + ctrl_state->process_memory_cache.stripes_count = os_logical_core_count(); ctrl_state->process_memory_cache.stripes = push_array(arena, CTRL_ProcessMemoryCacheStripe, ctrl_state->process_memory_cache.stripes_count); for(U64 idx = 0; idx < ctrl_state->process_memory_cache.stripes_count; idx += 1) { @@ -828,14 +828,13 @@ ctrl_init(void) } ctrl_state->thread_reg_cache.slots_count = 1024; ctrl_state->thread_reg_cache.slots = push_array(arena, CTRL_ThreadRegCacheSlot, ctrl_state->thread_reg_cache.slots_count); - ctrl_state->thread_reg_cache.stripes_count = 8; + ctrl_state->thread_reg_cache.stripes_count = os_logical_core_count(); ctrl_state->thread_reg_cache.stripes = push_array(arena, CTRL_ThreadRegCacheStripe, ctrl_state->thread_reg_cache.stripes_count); for(U64 idx = 0; idx < ctrl_state->thread_reg_cache.stripes_count; idx += 1) { - ctrl_state->thread_reg_cache.stripes[idx].rw_mutex = os_rw_mutex_alloc(); ctrl_state->thread_reg_cache.stripes[idx].arena = arena_alloc(); + ctrl_state->thread_reg_cache.stripes[idx].rw_mutex = os_rw_mutex_alloc(); } - ctrl_state->ctrl_thread_entity_store = ctrl_entity_store_alloc(); ctrl_state->u2c_ring_size = KB(64); ctrl_state->u2c_ring_base = push_array_no_zero(arena, U8, ctrl_state->u2c_ring_size); ctrl_state->u2c_ring_mutex = os_mutex_alloc(); @@ -844,6 +843,7 @@ ctrl_init(void) ctrl_state->c2u_ring_base = push_array_no_zero(arena, U8, ctrl_state->c2u_ring_size); ctrl_state->c2u_ring_mutex = os_mutex_alloc(); ctrl_state->c2u_ring_cv = os_condition_variable_alloc(); + ctrl_state->ctrl_thread_entity_store = ctrl_entity_store_alloc(); ctrl_state->dmn_event_arena = arena_alloc(); ctrl_state->user_entry_point_arena = arena_alloc(); for(CTRL_ExceptionCodeKind k = (CTRL_ExceptionCodeKind)0; k < CTRL_ExceptionCodeKind_COUNT; k = (CTRL_ExceptionCodeKind)(k+1)) diff --git a/src/df/core/df_core.c b/src/df/core/df_core.c index 2a526015..75c2525d 100644 --- a/src/df/core/df_core.c +++ b/src/df/core/df_core.c @@ -3558,7 +3558,7 @@ df_tls_base_vaddr_from_process_root_rip(DF_Entity *process, U64 root_vaddr, U64 //- rjf: read module's TLS index U64 tls_index = 0; { - CTRL_ProcessMemorySlice tls_index_slice = ctrl_query_cached_data_from_process_vaddr_range(scratch.arena, process->ctrl_machine_id, process->ctrl_handle, tls_vaddr_range, os_now_microseconds()+5000); + CTRL_ProcessMemorySlice tls_index_slice = ctrl_query_cached_data_from_process_vaddr_range(scratch.arena, process->ctrl_machine_id, process->ctrl_handle, tls_vaddr_range, 0); if(tls_index_slice.data.size >= addr_size) { tls_index = *(U64 *)tls_index_slice.data.str; @@ -3571,13 +3571,13 @@ df_tls_base_vaddr_from_process_root_rip(DF_Entity *process, U64 root_vaddr, U64 U64 thread_info_addr = root_vaddr; U64 tls_addr_off = tls_index*addr_size; U64 tls_addr_array = 0; - CTRL_ProcessMemorySlice tls_addr_array_slice = ctrl_query_cached_data_from_process_vaddr_range(scratch.arena, process->ctrl_machine_id, process->ctrl_handle, r1u64(thread_info_addr, thread_info_addr+addr_size), os_now_microseconds()+5000); + CTRL_ProcessMemorySlice tls_addr_array_slice = ctrl_query_cached_data_from_process_vaddr_range(scratch.arena, process->ctrl_machine_id, process->ctrl_handle, r1u64(thread_info_addr, thread_info_addr+addr_size), 0); String8 tls_addr_array_data = tls_addr_array_slice.data; if(tls_addr_array_data.size >= 8) { MemoryCopy(&tls_addr_array, tls_addr_array_data.str, sizeof(U64)); } - CTRL_ProcessMemorySlice result_slice = ctrl_query_cached_data_from_process_vaddr_range(scratch.arena, process->ctrl_machine_id, process->ctrl_handle, r1u64(tls_addr_array + tls_addr_off, tls_addr_array + tls_addr_off + addr_size), os_now_microseconds()+5000); + CTRL_ProcessMemorySlice result_slice = ctrl_query_cached_data_from_process_vaddr_range(scratch.arena, process->ctrl_machine_id, process->ctrl_handle, r1u64(tls_addr_array + tls_addr_off, tls_addr_array + tls_addr_off + addr_size), 0); String8 result_data = result_slice.data; if(result_data.size >= 8) { @@ -3666,7 +3666,7 @@ df_set_thread_rip(DF_Entity *thread, U64 vaddr) // rjf: early mutation of unwind cache for immediate frontend effect if(result) { - DF_RunUnwindCache *unwind_cache = &df_state->unwind_cache; + DF_RunUnwindCache *unwind_cache = &df_state->unwind_caches[df_state->unwind_cache_gen%ArrayCount(df_state->unwind_caches)]; if(unwind_cache->slots_count != 0) { DF_Handle thread_handle = df_handle_from_entity(thread); @@ -6135,40 +6135,49 @@ df_push_active_target_list(Arena *arena) internal CTRL_Unwind df_query_cached_unwind_from_thread(DF_Entity *thread) { - DF_RunUnwindCache *cache = &df_state->unwind_cache; - if(cache->slots_count == 0) - { - cache->slots_count = 1024; - cache->slots = push_array(cache->arena, DF_RunUnwindCacheSlot, cache->slots_count); - } + CTRL_Unwind result = {0}; DF_Handle handle = df_handle_from_entity(thread); U64 hash = df_hash_from_string(str8_struct(&handle)); - U64 slot_idx = hash % cache->slots_count; - DF_RunUnwindCacheSlot *slot = &cache->slots[slot_idx]; - DF_RunUnwindCacheNode *node = 0; - for(DF_RunUnwindCacheNode *n = slot->first; n != 0; n = n->hash_next) + for(U64 cache_idx = 0; cache_idx < ArrayCount(df_state->unwind_caches); cache_idx += 1) { - if(df_handle_match(n->thread, handle)) + DF_RunUnwindCache *cache = &df_state->unwind_caches[(df_state->unwind_cache_gen+cache_idx)%ArrayCount(df_state->unwind_caches)]; + if(cache_idx == 0 && cache->slots_count == 0) + { + cache->slots_count = 1024; + cache->slots = push_array(cache->arena, DF_RunUnwindCacheSlot, cache->slots_count); + } + else if(cache->slots_count == 0) { - node = n; break; } - } - CTRL_Unwind result = {0}; - if(node == 0) - { - result = ctrl_unwind_from_thread(cache->arena, df_state->ctrl_entity_store, thread->ctrl_machine_id, thread->ctrl_handle, 0); - if(!result.error) + U64 slot_idx = hash%cache->slots_count; + DF_RunUnwindCacheSlot *slot = &cache->slots[slot_idx]; + DF_RunUnwindCacheNode *node = 0; + for(DF_RunUnwindCacheNode *n = slot->first; n != 0; n = n->hash_next) { - node = push_array(cache->arena, DF_RunUnwindCacheNode, 1); - SLLQueuePush_N(slot->first, slot->last, node, hash_next); - node->thread = handle; - node->unwind = result; + if(df_handle_match(n->thread, handle)) + { + node = n; + break; + } + } + if(node != 0) + { + result = node->unwind; + break; + } + else + { + result = ctrl_unwind_from_thread(cache->arena, df_state->ctrl_entity_store, thread->ctrl_machine_id, thread->ctrl_handle, 0); + if(!result.error) + { + node = push_array(cache->arena, DF_RunUnwindCacheNode, 1); + SLLQueuePush_N(slot->first, slot->last, node, hash_next); + node->thread = handle; + node->unwind = result; + break; + } } - } - else - { - result = node->unwind; } return result; } @@ -6176,12 +6185,7 @@ df_query_cached_unwind_from_thread(DF_Entity *thread) internal U64 df_query_cached_rip_from_thread(DF_Entity *thread) { - U64 result = 0; - CTRL_Unwind unwind = df_query_cached_unwind_from_thread(thread); - if(unwind.first != 0) - { - result = unwind.first->rip; - } + U64 result = df_query_cached_rip_from_thread_unwind(thread, 0); return result; } @@ -6405,7 +6409,10 @@ df_core_init(CmdLine *cmdln, DF_StateDeltaHistory *hist) } // rjf: set up per-run caches - df_state->unwind_cache.arena = arena_alloc(); + for(U64 idx = 0; idx < ArrayCount(df_state->unwind_caches); idx += 1) + { + df_state->unwind_caches[idx].arena = arena_alloc(); + } df_state->tls_base_cache.arena = arena_alloc(); df_state->locals_cache.arena = arena_alloc(); df_state->member_cache.arena = arena_alloc(); @@ -6880,7 +6887,8 @@ df_core_begin_frame(Arena *arena, DF_CmdList *cmds, F32 dt) df_state->unwind_cache_reggen_idx != new_reg_gen) && !df_ctrl_targets_running()) ProfScope("per-thread unwind gather") { - DF_RunUnwindCache *cache = &df_state->unwind_cache; + df_state->unwind_cache_gen += 1; + DF_RunUnwindCache *cache = &df_state->unwind_caches[df_state->unwind_cache_gen%ArrayCount(df_state->unwind_caches)]; arena_clear(cache->arena); cache->slots_count = 0; cache->slots = 0; diff --git a/src/df/core/df_core.h b/src/df/core/df_core.h index 1a8f907a..76c97340 100644 --- a/src/df/core/df_core.h +++ b/src/df/core/df_core.h @@ -1135,7 +1135,8 @@ struct DF_State // rjf: per-run caches U64 unwind_cache_reggen_idx; U64 unwind_cache_memgen_idx; - DF_RunUnwindCache unwind_cache; + DF_RunUnwindCache unwind_caches[2]; + U64 unwind_cache_gen; U64 tls_base_cache_reggen_idx; U64 tls_base_cache_memgen_idx; DF_RunTLSBaseCache tls_base_cache; diff --git a/src/df/gfx/df_views.c b/src/df/gfx/df_views.c index ad8e38ff..d411c08c 100644 --- a/src/df/gfx/df_views.c +++ b/src/df/gfx/df_views.c @@ -6235,6 +6235,7 @@ DF_VIEW_UI_FUNCTION_DEF(Disassembly) } // rjf: find live threads mapping to this disassembly + ProfScope("find live threads mapping to this disassembly") { DF_Entity *selected_thread = df_entity_from_handle(ctrl_ctx.thread); DF_EntityList threads = df_query_cached_entity_list_with_kind(DF_EntityKind_Thread); @@ -6243,7 +6244,7 @@ DF_VIEW_UI_FUNCTION_DEF(Disassembly) DF_Entity *thread = thread_n->entity; U64 unwind_count = (thread == selected_thread) ? ctrl_ctx.unwind_count : 0; U64 rip_vaddr = df_query_cached_rip_from_thread_unwind(thread, unwind_count); - if(contains_1u64(disasm_vaddr_rng, rip_vaddr)) + if(contains_1u64(disasm_vaddr_rng, rip_vaddr)) ProfScope("in-range rip scan") { U64 rip_off = rip_vaddr - disasm_vaddr_rng.min; S64 line_num = dasm_inst_array_idx_from_off__linear_scan(&insts, rip_off)+1; @@ -6257,6 +6258,7 @@ DF_VIEW_UI_FUNCTION_DEF(Disassembly) } // rjf: find breakpoints mapping to this disassembly + ProfScope("find breakpoints mapping to this disassembly") { DF_EntityList bps = df_query_cached_entity_list_with_kind(DF_EntityKind_Breakpoint); for(DF_EntityNode *n = bps.first; n != 0; n = n->next) @@ -6277,6 +6279,7 @@ DF_VIEW_UI_FUNCTION_DEF(Disassembly) } // rjf: find watch pins mapping to this disassembly + ProfScope("find watch pins mapping to this disassembly") { DF_EntityList pins = df_query_cached_entity_list_with_kind(DF_EntityKind_WatchPin); for(DF_EntityNode *n = pins.first; n != 0; n = n->next)