diff --git a/src/artifact_cache/artifact_cache.c b/src/artifact_cache/artifact_cache.c index 514463b5..2993b3be 100644 --- a/src/artifact_cache/artifact_cache.c +++ b/src/artifact_cache/artifact_cache.c @@ -161,8 +161,8 @@ ac_artifact_from_key_(Access *access, String8 key, AC_ArtifactParams *params, U6 } n->v.key = str8_copy(req_batch->arena, key); n->v.gen = params->gen; - n->v.last_requested_gen = &node->last_requested_gen; n->v.create = params->create; + n->v.cancel_signal = params->cancel_signal; } cond_var_broadcast(async_tick_start_cond_var); ins_atomic_u32_eval_assign(&async_loop_again, 1); @@ -356,7 +356,7 @@ ac_async_tick(void) // rjf: compute val B32 retry = 0; - AC_Artifact val = r->create(r->key, r->gen, r->last_requested_gen, &retry); + AC_Artifact val = r->create(r->key, r->cancel_signal, &retry); // rjf: retry? -> resubmit request if(retry && lane_idx() == 0) @@ -458,7 +458,7 @@ ac_async_tick(void) // rjf: compute val B32 retry = 0; - AC_Artifact val = r->create(r->key, r->gen, r->last_requested_gen, &retry); + AC_Artifact val = r->create(r->key, r->cancel_signal, &retry); // rjf: retry? -> resubmit request if(retry) diff --git a/src/artifact_cache/artifact_cache.h b/src/artifact_cache/artifact_cache.h index 6d14a48e..eab90c4e 100644 --- a/src/artifact_cache/artifact_cache.h +++ b/src/artifact_cache/artifact_cache.h @@ -16,7 +16,7 @@ struct AC_Artifact //////////////////////////////// //~ rjf: Artifact Computation Function Types -typedef AC_Artifact AC_CreateFunctionType(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out); +typedef AC_Artifact AC_CreateFunctionType(String8 key, B32 *cancel_signal, B32 *retry_out); typedef void AC_DestroyFunctionType(AC_Artifact artifact); typedef U32 AC_Flags; @@ -37,6 +37,7 @@ struct AC_ArtifactParams U64 gen; U64 evict_threshold_us; B32 *stale_out; + B32 *cancel_signal; AC_Flags flags; }; @@ -48,7 +49,7 @@ struct AC_Request { String8 key; U64 gen; - U64 *last_requested_gen; + B32 *cancel_signal; AC_CreateFunctionType *create; }; @@ -138,6 +139,11 @@ global AC_Shared *ac_shared = 0; internal void ac_init(void); +//////////////////////////////// +//~ rjf: Helpers + +internal B32 ac_cancelled(void); + //////////////////////////////// //~ rjf: Cache Lookups diff --git a/src/ctrl/ctrl_core.c b/src/ctrl/ctrl_core.c index 8a0b1b2d..be446cdd 100644 --- a/src/ctrl/ctrl_core.c +++ b/src/ctrl/ctrl_core.c @@ -6134,7 +6134,7 @@ ctrl_thread__single_step(DMN_CtrlCtx *ctrl_ctx, CTRL_Msg *msg) //~ rjf: Process Memory Artifact Cache Hooks / Lookups internal AC_Artifact -ctrl_memory_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +ctrl_memory_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { AC_Artifact artifact = {0}; { @@ -6503,7 +6503,7 @@ ctrl_process_write(CTRL_Handle process, Rng1U64 range, void *src) //~ rjf: Call Stack Artifact Cache Hooks / Lookups internal AC_Artifact -ctrl_call_stack_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +ctrl_call_stack_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { AC_Artifact artifact = {0}; { @@ -6699,7 +6699,7 @@ ctrl_call_stack_from_thread(Access *access, CTRL_Handle thread_handle, B32 high_ //~ rjf: Call Stack Tree Artifact Cache Hooks / Lookups internal AC_Artifact -ctrl_call_stack_tree_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +ctrl_call_stack_tree_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { Temp scratch = scratch_begin(0, 0); Access *access = access_open(); diff --git a/src/ctrl/ctrl_core.h b/src/ctrl/ctrl_core.h index 56762142..218dc1af 100644 --- a/src/ctrl/ctrl_core.h +++ b/src/ctrl/ctrl_core.h @@ -1002,7 +1002,7 @@ internal void ctrl_thread__single_step(DMN_CtrlCtx *ctrl_ctx, CTRL_Msg *msg); //////////////////////////////// //~ rjf: Process Memory Artifact Cache Hooks / Lookups -internal AC_Artifact ctrl_memory_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out); +internal AC_Artifact ctrl_memory_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out); internal void ctrl_memory_artifact_destroy(AC_Artifact artifact); internal C_Key ctrl_key_from_process_vaddr_range(CTRL_Handle process, Rng1U64 vaddr_range, B32 zero_terminated, U64 endt_us, B32 *out_is_stale); @@ -1017,14 +1017,14 @@ internal B32 ctrl_process_write(CTRL_Handle process, Rng1U64 range, void *src); //////////////////////////////// //~ rjf: Call Stack Artifact Cache Hooks / Lookups -internal AC_Artifact ctrl_call_stack_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out); +internal AC_Artifact ctrl_call_stack_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out); internal void ctrl_call_stack_artifact_destroy(AC_Artifact artifact); internal CTRL_CallStack ctrl_call_stack_from_thread(Access *access, CTRL_Handle thread_handle, B32 high_priority, U64 endt_us); //////////////////////////////// //~ rjf: Call Stack Tree Artifact Cache Hooks / Lookups -internal AC_Artifact ctrl_call_stack_tree_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out); +internal AC_Artifact ctrl_call_stack_tree_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out); internal void ctrl_call_stack_tree_artifact_destroy(AC_Artifact artifact); internal CTRL_CallStackTree ctrl_call_stack_tree(Access *access, U64 endt_us); diff --git a/src/dbg_info/dbg_info2.c b/src/dbg_info/dbg_info2.c index 720ddce0..1e118c82 100644 --- a/src/dbg_info/dbg_info2.c +++ b/src/dbg_info/dbg_info2.c @@ -861,7 +861,7 @@ di2_conversion_completion_signal_receiver_thread_entry_point(void *p) //~ rjf: Search Artifact Cache Hooks / Lookups internal AC_Artifact -di2_search_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +di2_search_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { ProfBeginFunction(); Access *access = access_open(); @@ -1065,9 +1065,144 @@ di2_search_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_ } lane_sync(); - //- rjf: flatten into array + //- rjf: produce sort records + typedef struct SortRecord SortRecord; + struct SortRecord + { + U64 key; + DI2_SearchItem *item; + }; + U64 sort_records_count = all_items->total_count; + SortRecord *sort_records = 0; + SortRecord *sort_records__swap = 0; + ProfScope("produce sort records") + { + if(lane_idx() == 0) + { + sort_records = push_array(scratch.arena, SortRecord, sort_records_count); + } + if(lane_idx() == lane_from_task_idx(1)) + { + sort_records__swap = push_array(scratch.arena, SortRecord, sort_records_count); + } + lane_sync_u64(&sort_records, 0); + lane_sync_u64(&sort_records__swap, lane_from_task_idx(1)); + for EachNode(n, DI2_SearchItemChunk, all_items->first) + { + Rng1U64 range = lane_range(n->count); + U64 dst_idx = n->base_idx + range.min; + for EachInRange(n_idx, range) + { + DI2_SearchItem *item = &n->v[n_idx]; + sort_records[dst_idx].item = item; + sort_records[dst_idx].key = (((item->missed_size & 0xffffffffull) << 32) | (u64_hash_from_seed_str8(item->idx, str8_struct(&key)) & 0xffffffffull)); + dst_idx += 1; + } + } + } + lane_sync(); + + //- rjf: sort records + ProfScope("sort records") + { + //- rjf: set up common data + U64 bits_per_digit = 8; + U64 digits_count = 64 / bits_per_digit; + U64 num_possible_values_per_digit = 1 << bits_per_digit; + U32 **lanes_digit_counts = 0; + U32 **lanes_digit_offsets = 0; + if(lane_idx() == 0) + { + lanes_digit_counts = push_array(scratch.arena, U32 *, lane_count()); + lanes_digit_offsets = push_array(scratch.arena, U32 *, lane_count()); + } + lane_sync_u64(&lanes_digit_counts, 0); + lane_sync_u64(&lanes_digit_offsets, 0); + + //- rjf: set up this lane + lanes_digit_counts[lane_idx()] = push_array(scratch.arena, U32, num_possible_values_per_digit); + lanes_digit_offsets[lane_idx()] = push_array(scratch.arena, U32, num_possible_values_per_digit); + SortRecord *src = sort_records; + SortRecord *dst = sort_records__swap; + U64 count = sort_records_count; + + //- rjf: do all per-digit sorts + for EachIndex(digit_idx, digits_count) + { + // rjf: count digit value occurrences per-lane + { + U32 *digit_counts = lanes_digit_counts[lane_idx()]; + MemoryZero(digit_counts, sizeof(digit_counts[0])*num_possible_values_per_digit); + Rng1U64 range = lane_range(count); + for EachInRange(idx, range) + { + SortRecord *r = &src[idx]; + U16 digit_value = (U16)(U8)(r->key >> (digit_idx*bits_per_digit)); + digit_counts[digit_value] += 1; + } + } + lane_sync(); + + // rjf: compute thread * digit value *relative* offset table + { + Rng1U64 range = lane_range(num_possible_values_per_digit); + for EachInRange(value_idx, range) + { + U64 layout_off = 0; + for EachIndex(lane_idx, lane_count()) + { + lanes_digit_offsets[lane_idx][value_idx] = layout_off; + layout_off += lanes_digit_counts[lane_idx][value_idx]; + } + } + } + lane_sync(); + + // rjf: convert relative offsets -> absolute offsets + if(lane_idx() == 0) + { + U64 last_off = 0; + U64 num_of_nonzero_digit = 0; + for EachIndex(value_idx, num_possible_values_per_digit) + { + for EachIndex(lane_idx, lane_count()) + { + lanes_digit_offsets[lane_idx][value_idx] += last_off; + } + last_off = lanes_digit_offsets[lane_count()-1][value_idx] + lanes_digit_counts[lane_count()-1][value_idx]; + } + // NOTE(rjf): required that: (last_off == element_count) + } + lane_sync(); + + // rjf: move + { + U32 *lane_digit_offsets = lanes_digit_offsets[lane_idx()]; + Rng1U64 range = lane_range(count); + for EachInRange(idx, range) + { + SortRecord *src_r = &src[idx]; + U16 digit_value = (U16)(U8)(src_r->key >> (digit_idx*bits_per_digit)); + U64 dst_off = lane_digit_offsets[digit_value]; + lane_digit_offsets[digit_value] += 1; + MemoryCopyStruct(&dst[dst_off], src_r); + } + } + lane_sync(); + + // rjf: swap + { + SortRecord *swap = src; + src = dst; + dst = swap; + } + } + } + lane_sync(); + + //- rjf: produce final array DI2_SearchItemArray items = {0}; - ProfScope("flatten into array") + ProfScope("produce final array") { if(lane_idx() == 0) { @@ -1076,19 +1211,15 @@ di2_search_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_ } lane_sync_u64(&items.count, 0); lane_sync_u64(&items.v, 0); - for EachNode(n, DI2_SearchItemChunk, all_items->first) + Rng1U64 range = lane_range(sort_records_count); + for EachInRange(idx, range) { - Rng1U64 range = lane_range(n->count); - U64 dst_idx = n->base_idx + range.min; - MemoryCopy(&items.v[dst_idx], n->v, sizeof(n->v[0]) * dim_1u64(range)); + SortRecord *record = &sort_records[idx]; + DI2_SearchItem *dst_item = &items.v[idx]; + MemoryCopyStruct(dst_item, record->item); } } - - //- rjf: sort items - ProfScope("sort items") - { - - } + lane_sync(); //- rjf: bundle as artifact artifact.u64[0] = (U64)arena; diff --git a/src/dbg_info/dbg_info2.h b/src/dbg_info/dbg_info2.h index d1129d3c..e96f2455 100644 --- a/src/dbg_info/dbg_info2.h +++ b/src/dbg_info/dbg_info2.h @@ -274,7 +274,7 @@ internal void di2_conversion_completion_signal_receiver_thread_entry_point(void //////////////////////////////// //~ rjf: Search Artifact Cache Hooks / Lookups -internal AC_Artifact di2_search_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out); +internal AC_Artifact di2_search_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out); internal void di2_search_artifact_destroy(AC_Artifact artifact); internal DI2_SearchItemArray di2_search_item_array_from_target_query(Access *access, RDI_SectionKind target, String8 query, U64 endt_us); diff --git a/src/disasm/disasm.c b/src/disasm/disasm.c index 6143ceab..b54fb710 100644 --- a/src/disasm/disasm.c +++ b/src/disasm/disasm.c @@ -266,7 +266,7 @@ struct DASM_Artifact }; internal AC_Artifact -dasm_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +dasm_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { DASM_Artifact *artifact = 0; if(lane_idx() == 0) diff --git a/src/disasm/disasm.h b/src/disasm/disasm.h index 7ccd09f8..0001693c 100644 --- a/src/disasm/disasm.h +++ b/src/disasm/disasm.h @@ -197,7 +197,7 @@ internal U64 dasm_line_array_code_off_from_idx(DASM_LineArray *array, U64 idx); //////////////////////////////// //~ rjf: Artifact Cache Hooks / Lookups -internal AC_Artifact dasm_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out); +internal AC_Artifact dasm_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out); internal void dasm_artifact_destroy(AC_Artifact artifact); internal DASM_Info dasm_info_from_hash_params(Access *access, U128 hash, DASM_Params *params); internal DASM_Info dasm_info_from_key_params(Access *access, C_Key key, DASM_Params *params, U128 *hash_out); diff --git a/src/file_stream/file_stream.c b/src/file_stream/file_stream.c index f68da6f5..aa29c8c2 100644 --- a/src/file_stream/file_stream.c +++ b/src/file_stream/file_stream.c @@ -32,7 +32,7 @@ fs_change_gen(void) //~ rjf: Cache Interaction internal AC_Artifact -fs_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +fs_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { ProfBeginFunction(); Temp scratch = scratch_begin(0, 0); diff --git a/src/file_stream/file_stream.h b/src/file_stream/file_stream.h index 9622e48c..132debf5 100644 --- a/src/file_stream/file_stream.h +++ b/src/file_stream/file_stream.h @@ -55,7 +55,7 @@ internal U64 fs_change_gen(void); //////////////////////////////// //~ rjf: Artifact Cache Hooks / Accessing API -internal AC_Artifact fs_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out); +internal AC_Artifact fs_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out); internal void fs_artifact_destroy(AC_Artifact artifact); internal C_Key fs_key_from_path_range(String8 path, Rng1U64 range, U64 endt_us); diff --git a/src/raddbg/raddbg_views.c b/src/raddbg/raddbg_views.c index b1ced1c3..19d175b0 100644 --- a/src/raddbg/raddbg_views.c +++ b/src/raddbg/raddbg_views.c @@ -3749,7 +3749,7 @@ struct RD_BitmapCanvasBoxDrawData }; internal AC_Artifact -rd_bitmap_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +rd_bitmap_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { Access *access = access_open(); @@ -4363,7 +4363,7 @@ struct RD_Geo3DBoxDrawData }; internal AC_Artifact -rd_geo3d_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +rd_geo3d_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { Access *access = access_open(); U128 hash = {0}; diff --git a/src/text/text.c b/src/text/text.c index 3ee0c191..d86062fc 100644 --- a/src/text/text.c +++ b/src/text/text.c @@ -1970,7 +1970,7 @@ struct TXT_ArtifactCreateShared }; internal AC_Artifact -txt_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out) +txt_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out) { ProfBeginFunction(); Temp scratch = scratch_begin(0, 0); diff --git a/src/text/text.h b/src/text/text.h index 1c31aa52..06e69bd4 100644 --- a/src/text/text.h +++ b/src/text/text.h @@ -203,7 +203,7 @@ internal TXT_ScopeNode *txt_scope_node_from_info_pt(TXT_TextInfo *info, TxtPt pt //////////////////////////////// //~ rjf: Artifact Cache Hooks / Lookups -internal AC_Artifact txt_artifact_create(String8 key, U64 gen, U64 *requested_gen, B32 *retry_out); +internal AC_Artifact txt_artifact_create(String8 key, B32 *cancel_signal, B32 *retry_out); internal void txt_artifact_destroy(AC_Artifact artifact); internal TXT_TextInfo txt_text_info_from_hash_lang(Access *access, U128 hash, TXT_LangKind lang); internal TXT_TextInfo txt_text_info_from_key_lang(Access *access, C_Key key, TXT_LangKind lang, U128 *hash_out);