From c7d0a3431b67ebcdf6c7961b3f70d3293b9959a6 Mon Sep 17 00:00:00 2001 From: Ryan Fleury Date: Tue, 14 Oct 2025 16:35:24 -0700 Subject: [PATCH] eliminate old vmap baking --- src/rdi_make/rdi_make_local.c | 482 ---------------------------------- 1 file changed, 482 deletions(-) diff --git a/src/rdi_make/rdi_make_local.c b/src/rdi_make/rdi_make_local.c index 573f7ec4..67f70693 100644 --- a/src/rdi_make/rdi_make_local.c +++ b/src/rdi_make/rdi_make_local.c @@ -577,488 +577,6 @@ rdim_bake(Arena *arena, RDIM_BakeParams *params) } lane_sync(); - ////////////////////////////////////////////////////////////// - //- rjf: @rdim_bake_stage gather unsorted vmap keys/markers - // -#if 0 - ProfScope("gather unsorted vmap keys/markers") - { - //- rjf: gather scope vmap keys/markers - if(lane_idx() == lane_from_task_idx(0)) ProfScope("gather scope vmap keys/markers") - { - rdim_shared->scope_vmap_count = params->scopes.scope_voff_count; - rdim_shared->scope_vmap_keys = push_array_no_zero(arena, RDIM_SortKey, rdim_shared->scope_vmap_count); - rdim_shared->scope_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, rdim_shared->scope_vmap_count); - rdim_shared->scope_vmap_markers = push_array_no_zero(arena, RDIM_VMapMarker, rdim_shared->scope_vmap_count); - ProfScope("fill keys/markers") - { - RDIM_SortKey *key_ptr = rdim_shared->scope_vmap_keys; - RDIM_VMapMarker *marker_ptr = rdim_shared->scope_vmap_markers; - for(RDIM_ScopeChunkNode *chunk_n = params->scopes.first; chunk_n != 0; chunk_n = chunk_n->next) - { - for(RDI_U64 chunk_idx = 0; chunk_idx < chunk_n->count; chunk_idx += 1) - { - RDIM_Scope *src_scope = &chunk_n->v[chunk_idx]; - RDI_U32 scope_idx = (RDI_U32)rdim_idx_from_scope(src_scope); // TODO(rjf): @u64_to_u32 - for(RDIM_Rng1U64Node *n = src_scope->voff_ranges.first; n != 0; n = n->next) - { - key_ptr->key = n->v.min; - key_ptr->val = marker_ptr; - marker_ptr->idx = scope_idx; - marker_ptr->begin_range = 1; - key_ptr += 1; - marker_ptr += 1; - - key_ptr->key = n->v.max; - key_ptr->val = marker_ptr; - marker_ptr->idx = scope_idx; - marker_ptr->begin_range = 0; - key_ptr += 1; - marker_ptr += 1; - } - } - } - } - } - - //- rjf: gather unit vmap keys/markers - if(lane_idx() == lane_from_task_idx(1)) ProfScope("gather unit vmap keys/markers") - { - // rjf: count voff ranges - RDI_U64 voff_range_count = 0; - for(RDIM_UnitChunkNode *n = params->units.first; n != 0; n = n->next) - { - for(RDI_U64 idx = 0; idx < n->count; idx += 1) - { - RDIM_Unit *unit = &n->v[idx]; - voff_range_count += unit->voff_ranges.total_count; - } - } - - // rjf: count necessary markers - RDI_U64 marker_count = voff_range_count*2; - - // rjf: build keys/markers arrays - RDIM_SortKey *keys = rdim_push_array_no_zero(arena, RDIM_SortKey, marker_count); - RDIM_VMapMarker *markers = rdim_push_array_no_zero(arena, RDIM_VMapMarker, marker_count); - { - RDIM_SortKey *key_ptr = keys; - RDIM_VMapMarker *marker_ptr = markers; - RDI_U32 unit_idx = 1; - for(RDIM_UnitChunkNode *unit_chunk_n = params->units.first; - unit_chunk_n != 0; - unit_chunk_n = unit_chunk_n->next) - { - for(RDI_U64 idx = 0; idx < unit_chunk_n->count; idx += 1) - { - RDIM_Unit *unit = &unit_chunk_n->v[idx]; - for(RDIM_Rng1U64ChunkNode *n = unit->voff_ranges.first; n != 0; n = n->next) - { - for(RDI_U64 chunk_idx = 0; chunk_idx < n->count; chunk_idx += 1) - { - RDIM_Rng1U64 range = n->v[chunk_idx]; - if(range.min < range.max) - { - key_ptr->key = range.min; - key_ptr->val = marker_ptr; - marker_ptr->idx = unit_idx; - marker_ptr->begin_range = 1; - key_ptr += 1; - marker_ptr += 1; - - key_ptr->key = range.max; - key_ptr->val = marker_ptr; - marker_ptr->idx = unit_idx; - marker_ptr->begin_range = 0; - key_ptr += 1; - marker_ptr += 1; - } - } - } - unit_idx += 1; - } - } - } - - // rjf: store - rdim_shared->unit_vmap_count = marker_count; - rdim_shared->unit_vmap_keys = keys; - rdim_shared->unit_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, marker_count); - rdim_shared->unit_vmap_markers = markers; - } - - //- rjf: gather global vmap keys/markers - if(lane_idx() == lane_from_task_idx(2)) ProfScope("gather global vmap keys/markers") - { - //- rjf: allocate keys/markers - RDI_U64 marker_count = params->global_variables.total_count*2 + 2; - RDIM_SortKey *keys = rdim_push_array_no_zero(arena, RDIM_SortKey, marker_count); - RDIM_VMapMarker *markers = rdim_push_array_no_zero(arena, RDIM_VMapMarker, marker_count); - - //- rjf: fill - { - RDIM_SortKey *key_ptr = keys; - RDIM_VMapMarker *marker_ptr = markers; - - // rjf: fill actual globals - for(RDIM_SymbolChunkNode *n = params->global_variables.first; n != 0; n = n->next) - { - for(RDI_U64 chunk_idx = 0; chunk_idx < n->count; chunk_idx += 1) - { - RDIM_Symbol *global_var = &n->v[chunk_idx]; - RDI_U32 global_var_idx = (RDI_U32)rdim_idx_from_symbol(global_var); // TODO(rjf): @u64_to_u32 - RDI_U64 global_var_size = global_var->type ? global_var->type->byte_size : 1; - - RDI_U64 first = global_var->offset; - RDI_U64 opl = first + global_var_size; - - key_ptr->key = first; - key_ptr->val = marker_ptr; - marker_ptr->idx = global_var_idx; - marker_ptr->begin_range = 1; - key_ptr += 1; - marker_ptr += 1; - - key_ptr->key = opl; - key_ptr->val = marker_ptr; - marker_ptr->idx = global_var_idx; - marker_ptr->begin_range = 0; - key_ptr += 1; - marker_ptr += 1; - } - } - - // rjf: fill nil global - { - RDI_U32 global_idx = 0; - RDI_U64 first = 0; - RDI_U64 opl = 0xffffffffffffffffull; - key_ptr->key = first; - key_ptr->val = marker_ptr; - marker_ptr->idx = global_idx; - marker_ptr->begin_range = 1; - key_ptr += 1; - marker_ptr += 1; - key_ptr->key = opl; - key_ptr->val = marker_ptr; - marker_ptr->idx = global_idx; - marker_ptr->begin_range = 0; - key_ptr += 1; - marker_ptr += 1; - } - } - - //- rjf: store - rdim_shared->global_vmap_count = marker_count; - rdim_shared->global_vmap_keys = keys; - rdim_shared->global_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, marker_count); - rdim_shared->global_vmap_markers = markers; - } - } - lane_sync(); -#endif - - ////////////////////////////////////////////////////////////// - //- rjf: @rdim_bake_stage sort all vmap keys - // -#if 0 - ProfScope("sort all vmap keys") - { - // rjf: set up - if(lane_idx() == 0) - { - rdim_shared->lane_digit_counts = push_array(arena, U32 *, lane_count()); - rdim_shared->lane_digit_offsets = push_array(arena, U32 *, lane_count()); - } - lane_sync(); - - // rjf: sort - struct - { - RDI_U64 vmap_count; - RDIM_SortKey *keys; - RDIM_SortKey *keys__swap; - } - sort_tasks[] = - { - {rdim_shared->scope_vmap_count, rdim_shared->scope_vmap_keys, rdim_shared->scope_vmap_keys__swap}, - {rdim_shared->unit_vmap_count, rdim_shared->unit_vmap_keys, rdim_shared->unit_vmap_keys__swap}, - {rdim_shared->global_vmap_count, rdim_shared->global_vmap_keys, rdim_shared->global_vmap_keys__swap}, - }; - for EachElement(sort_task_idx, sort_tasks) ProfScope("sort %I64u", sort_task_idx) - { - RDI_U64 vmap_count = sort_tasks[sort_task_idx].vmap_count; - RDIM_SortKey *keys = sort_tasks[sort_task_idx].keys; - RDIM_SortKey *keys__swap = sort_tasks[sort_task_idx].keys__swap; - U64 bits_per_digit = 8; - U64 digits_count = 64 / bits_per_digit; - U64 num_possible_values_per_digit = 1 << bits_per_digit; - rdim_shared->lane_digit_counts[lane_idx()] = push_array_no_zero(arena, U32, num_possible_values_per_digit); - rdim_shared->lane_digit_offsets[lane_idx()] = push_array_no_zero(arena, U32, num_possible_values_per_digit); - RDIM_SortKey *src = keys; - RDIM_SortKey *dst = keys__swap; - U64 element_count = vmap_count; - for EachIndex(digit_idx, digits_count) - { - // rjf: count digit value occurrences per-lane - { - U32 *digit_counts = rdim_shared->lane_digit_counts[lane_idx()]; - MemoryZero(digit_counts, sizeof(digit_counts[0])*num_possible_values_per_digit); - Rng1U64 range = lane_range(element_count); - for EachInRange(idx, range) - { - RDIM_SortKey *sort_key = &src[idx]; - U16 digit_value = (U16)(U8)(sort_key->key >> (digit_idx*bits_per_digit)); - digit_counts[digit_value] += 1; - } - } - lane_sync(); - - // rjf: compute thread * digit value *relative* offset table - { - Rng1U64 range = lane_range(num_possible_values_per_digit); - for EachInRange(value_idx, range) - { - U64 layout_off = 0; - for EachIndex(lane_idx, lane_count()) - { - rdim_shared->lane_digit_offsets[lane_idx][value_idx] = layout_off; - layout_off += rdim_shared->lane_digit_counts[lane_idx][value_idx]; - } - } - } - lane_sync(); - - // rjf: convert relative offsets -> absolute offsets - if(lane_idx() == 0) - { - U64 last_off = 0; - U64 num_of_nonzero_digit = 0; - for EachIndex(value_idx, num_possible_values_per_digit) - { - for EachIndex(lane_idx, lane_count()) - { - rdim_shared->lane_digit_offsets[lane_idx][value_idx] += last_off; - } - last_off = rdim_shared->lane_digit_offsets[lane_count()-1][value_idx] + rdim_shared->lane_digit_counts[lane_count()-1][value_idx]; - } - // NOTE(rjf): required that: (last_off == element_count) - } - lane_sync(); - - // rjf: move - { - U32 *lane_digit_offsets = rdim_shared->lane_digit_offsets[lane_idx()]; - Rng1U64 range = lane_range(element_count); - for EachInRange(idx, range) - { - RDIM_SortKey *src_key = &src[idx]; - U16 digit_value = (U16)(U8)(src_key->key >> (digit_idx*bits_per_digit)); - U64 dst_off = lane_digit_offsets[digit_value]; - lane_digit_offsets[digit_value] += 1; - MemoryCopyStruct(&dst[dst_off], src_key); - } - } - lane_sync(); - - // rjf: swap - { - RDIM_SortKey *swap = src; - src = dst; - dst = swap; - } - } - } - } - lane_sync(); -#endif - - ////////////////////////////////////////////////////////////// - //- rjf: @rdim_bake_stage bake all vmaps - // -#if 0 - ProfScope("bake all vmaps") - { - Temp scratch = scratch_begin(&arena, 1); - typedef struct VMapBakeTask VMapBakeTask; - struct VMapBakeTask - { - VMapBakeTask *next; - String8 name; - RDI_U64 count; - RDIM_SortKey *keys; - RDIM_VMapMarker *markers; - RDIM_BakeVMap *bake_vmap_out; - }; - VMapBakeTask *first_task = 0; - VMapBakeTask *last_task = 0; - if(lane_idx() == lane_from_task_idx(0)) - { - VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1); - task->name = str8_lit("scopes"); - task->count = rdim_shared->scope_vmap_count; - task->keys = rdim_shared->scope_vmap_keys; - task->markers = rdim_shared->scope_vmap_markers; - task->bake_vmap_out = &rdim_shared->baked_scope_vmap.vmap; - SLLQueuePush(first_task, last_task, task); - } - if(lane_idx() == lane_from_task_idx(1)) - { - VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1); - task->name = str8_lit("units"); - task->count = rdim_shared->unit_vmap_count; - task->keys = rdim_shared->unit_vmap_keys; - task->markers = rdim_shared->unit_vmap_markers; - task->bake_vmap_out = &rdim_shared->baked_unit_vmap.vmap; - SLLQueuePush(first_task, last_task, task); - } - if(lane_idx() == lane_from_task_idx(2)) - { - VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1); - task->name = str8_lit("globals"); - task->count = rdim_shared->global_vmap_count; - task->keys = rdim_shared->global_vmap_keys; - task->markers = rdim_shared->global_vmap_markers; - task->bake_vmap_out = &rdim_shared->baked_global_vmap.vmap; - SLLQueuePush(first_task, last_task, task); - } - for(VMapBakeTask *task = first_task; task != 0; task = task->next) ProfScope("vmap bake for %.*s", str8_varg(task->name)) - { - //- rjf: determine if an extra vmap entry for zero is needed - RDI_U32 extra_vmap_entry = 0; - if(task->count > 0 && task->keys[0].key != 0) - { - extra_vmap_entry = 1; - } - - //- rjf: fill output vmap entries - RDI_U32 vmap_count_raw = extra_vmap_entry + task->count; - RDI_VMapEntry *vmap = rdim_push_array(arena, RDI_VMapEntry, vmap_count_raw); - RDI_U32 vmap_entry_count_pass_1 = 0; - ProfScope("fill output vmap entries") - { - typedef struct RDIM_VMapRangeTracker RDIM_VMapRangeTracker; - struct RDIM_VMapRangeTracker - { - RDIM_VMapRangeTracker *next; - RDI_U32 idx; - }; - RDI_VMapEntry *vmap_ptr = vmap; - if(extra_vmap_entry) - { - vmap_ptr->voff = 0; - vmap_ptr->idx = 0; - vmap_ptr += 1; - } - RDIM_VMapRangeTracker *tracker_stack = 0; - RDIM_VMapRangeTracker *tracker_free = 0; - RDIM_SortKey *key_ptr = task->keys; - RDIM_SortKey *key_opl = task->keys + task->count; - for(;key_ptr < key_opl;) - { - // rjf: get initial map state from tracker stack - RDI_U32 initial_idx = (RDI_U32)0xffffffff; - if(tracker_stack != 0) - { - initial_idx = tracker_stack->idx; - } - - // rjf: update tracker stack - // - // * we must process _all_ of the changes that apply at this voff before moving on - // - RDI_U64 voff = key_ptr->key; - for(;key_ptr < key_opl && key_ptr->key == voff; key_ptr += 1) - { - RDIM_VMapMarker *marker = (RDIM_VMapMarker*)key_ptr->val; - RDI_U32 idx = marker->idx; - - // rjf: range begin -> push to stack - if(marker->begin_range) - { - RDIM_VMapRangeTracker *new_tracker = tracker_free; - if(new_tracker != 0) - { - RDIM_SLLStackPop(tracker_free); - } - else - { - new_tracker = rdim_push_array(scratch.arena, RDIM_VMapRangeTracker, 1); - } - RDIM_SLLStackPush(tracker_stack, new_tracker); - new_tracker->idx = idx; - } - - // rjf: range ending -> pop matching node from stack (not always the top) - else - { - RDIM_VMapRangeTracker **ptr_in = &tracker_stack; - RDIM_VMapRangeTracker *match = 0; - for(RDIM_VMapRangeTracker *node = tracker_stack; node != 0;) - { - if(node->idx == idx) - { - match = node; - break; - } - ptr_in = &node->next; - node = node->next; - } - if(match != 0) - { - *ptr_in = match->next; - RDIM_SLLStackPush(tracker_free, match); - } - } - } - - // rjf: get final map state from tracker stack - RDI_U32 final_idx = 0; - if(tracker_stack != 0) - { - final_idx = tracker_stack->idx; - } - - // rjf: if final is different from initial - emit new vmap entry - if(final_idx != initial_idx) - { - vmap_ptr->voff = voff; - vmap_ptr->idx = final_idx; - vmap_ptr += 1; - } - } - - vmap_entry_count_pass_1 = (RDI_U32)(vmap_ptr - vmap); // TODO(rjf): @u64_to_u32 - } - - //- rjf: combine duplicate neighbors - RDI_U32 vmap_entry_count = 0; - ProfScope("combine duplicate neighbors") - { - RDI_VMapEntry *vmap_ptr = vmap; - RDI_VMapEntry *vmap_opl = vmap + vmap_entry_count_pass_1; - RDI_VMapEntry *vmap_out = vmap; - for(;vmap_ptr < vmap_opl;) - { - RDI_VMapEntry *vmap_range_first = vmap_ptr; - RDI_U64 idx = vmap_ptr->idx; - vmap_ptr += 1; - for(;vmap_ptr < vmap_opl && vmap_ptr->idx == idx;) vmap_ptr += 1; - rdim_memcpy_struct(vmap_out, vmap_range_first); - vmap_out += 1; - } - vmap_entry_count = (RDI_U32)(vmap_out - vmap); // TODO(rjf): @u64_to_u32 - } - - //- rjf: fill result - task->bake_vmap_out->vmap = vmap; - task->bake_vmap_out->count = vmap_entry_count; - } - scratch_end(scratch); - } - lane_sync(); -#endif - ////////////////////////////////////////////////////////////// //- rjf: @rdim_bake_stage build interned path tree //