eliminate old vmap baking

This commit is contained in:
Ryan Fleury
2025-10-14 16:35:24 -07:00
parent b757762908
commit c7d0a3431b
-482
View File
@@ -577,488 +577,6 @@ rdim_bake(Arena *arena, RDIM_BakeParams *params)
}
lane_sync();
//////////////////////////////////////////////////////////////
//- rjf: @rdim_bake_stage gather unsorted vmap keys/markers
//
#if 0
ProfScope("gather unsorted vmap keys/markers")
{
//- rjf: gather scope vmap keys/markers
if(lane_idx() == lane_from_task_idx(0)) ProfScope("gather scope vmap keys/markers")
{
rdim_shared->scope_vmap_count = params->scopes.scope_voff_count;
rdim_shared->scope_vmap_keys = push_array_no_zero(arena, RDIM_SortKey, rdim_shared->scope_vmap_count);
rdim_shared->scope_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, rdim_shared->scope_vmap_count);
rdim_shared->scope_vmap_markers = push_array_no_zero(arena, RDIM_VMapMarker, rdim_shared->scope_vmap_count);
ProfScope("fill keys/markers")
{
RDIM_SortKey *key_ptr = rdim_shared->scope_vmap_keys;
RDIM_VMapMarker *marker_ptr = rdim_shared->scope_vmap_markers;
for(RDIM_ScopeChunkNode *chunk_n = params->scopes.first; chunk_n != 0; chunk_n = chunk_n->next)
{
for(RDI_U64 chunk_idx = 0; chunk_idx < chunk_n->count; chunk_idx += 1)
{
RDIM_Scope *src_scope = &chunk_n->v[chunk_idx];
RDI_U32 scope_idx = (RDI_U32)rdim_idx_from_scope(src_scope); // TODO(rjf): @u64_to_u32
for(RDIM_Rng1U64Node *n = src_scope->voff_ranges.first; n != 0; n = n->next)
{
key_ptr->key = n->v.min;
key_ptr->val = marker_ptr;
marker_ptr->idx = scope_idx;
marker_ptr->begin_range = 1;
key_ptr += 1;
marker_ptr += 1;
key_ptr->key = n->v.max;
key_ptr->val = marker_ptr;
marker_ptr->idx = scope_idx;
marker_ptr->begin_range = 0;
key_ptr += 1;
marker_ptr += 1;
}
}
}
}
}
//- rjf: gather unit vmap keys/markers
if(lane_idx() == lane_from_task_idx(1)) ProfScope("gather unit vmap keys/markers")
{
// rjf: count voff ranges
RDI_U64 voff_range_count = 0;
for(RDIM_UnitChunkNode *n = params->units.first; n != 0; n = n->next)
{
for(RDI_U64 idx = 0; idx < n->count; idx += 1)
{
RDIM_Unit *unit = &n->v[idx];
voff_range_count += unit->voff_ranges.total_count;
}
}
// rjf: count necessary markers
RDI_U64 marker_count = voff_range_count*2;
// rjf: build keys/markers arrays
RDIM_SortKey *keys = rdim_push_array_no_zero(arena, RDIM_SortKey, marker_count);
RDIM_VMapMarker *markers = rdim_push_array_no_zero(arena, RDIM_VMapMarker, marker_count);
{
RDIM_SortKey *key_ptr = keys;
RDIM_VMapMarker *marker_ptr = markers;
RDI_U32 unit_idx = 1;
for(RDIM_UnitChunkNode *unit_chunk_n = params->units.first;
unit_chunk_n != 0;
unit_chunk_n = unit_chunk_n->next)
{
for(RDI_U64 idx = 0; idx < unit_chunk_n->count; idx += 1)
{
RDIM_Unit *unit = &unit_chunk_n->v[idx];
for(RDIM_Rng1U64ChunkNode *n = unit->voff_ranges.first; n != 0; n = n->next)
{
for(RDI_U64 chunk_idx = 0; chunk_idx < n->count; chunk_idx += 1)
{
RDIM_Rng1U64 range = n->v[chunk_idx];
if(range.min < range.max)
{
key_ptr->key = range.min;
key_ptr->val = marker_ptr;
marker_ptr->idx = unit_idx;
marker_ptr->begin_range = 1;
key_ptr += 1;
marker_ptr += 1;
key_ptr->key = range.max;
key_ptr->val = marker_ptr;
marker_ptr->idx = unit_idx;
marker_ptr->begin_range = 0;
key_ptr += 1;
marker_ptr += 1;
}
}
}
unit_idx += 1;
}
}
}
// rjf: store
rdim_shared->unit_vmap_count = marker_count;
rdim_shared->unit_vmap_keys = keys;
rdim_shared->unit_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, marker_count);
rdim_shared->unit_vmap_markers = markers;
}
//- rjf: gather global vmap keys/markers
if(lane_idx() == lane_from_task_idx(2)) ProfScope("gather global vmap keys/markers")
{
//- rjf: allocate keys/markers
RDI_U64 marker_count = params->global_variables.total_count*2 + 2;
RDIM_SortKey *keys = rdim_push_array_no_zero(arena, RDIM_SortKey, marker_count);
RDIM_VMapMarker *markers = rdim_push_array_no_zero(arena, RDIM_VMapMarker, marker_count);
//- rjf: fill
{
RDIM_SortKey *key_ptr = keys;
RDIM_VMapMarker *marker_ptr = markers;
// rjf: fill actual globals
for(RDIM_SymbolChunkNode *n = params->global_variables.first; n != 0; n = n->next)
{
for(RDI_U64 chunk_idx = 0; chunk_idx < n->count; chunk_idx += 1)
{
RDIM_Symbol *global_var = &n->v[chunk_idx];
RDI_U32 global_var_idx = (RDI_U32)rdim_idx_from_symbol(global_var); // TODO(rjf): @u64_to_u32
RDI_U64 global_var_size = global_var->type ? global_var->type->byte_size : 1;
RDI_U64 first = global_var->offset;
RDI_U64 opl = first + global_var_size;
key_ptr->key = first;
key_ptr->val = marker_ptr;
marker_ptr->idx = global_var_idx;
marker_ptr->begin_range = 1;
key_ptr += 1;
marker_ptr += 1;
key_ptr->key = opl;
key_ptr->val = marker_ptr;
marker_ptr->idx = global_var_idx;
marker_ptr->begin_range = 0;
key_ptr += 1;
marker_ptr += 1;
}
}
// rjf: fill nil global
{
RDI_U32 global_idx = 0;
RDI_U64 first = 0;
RDI_U64 opl = 0xffffffffffffffffull;
key_ptr->key = first;
key_ptr->val = marker_ptr;
marker_ptr->idx = global_idx;
marker_ptr->begin_range = 1;
key_ptr += 1;
marker_ptr += 1;
key_ptr->key = opl;
key_ptr->val = marker_ptr;
marker_ptr->idx = global_idx;
marker_ptr->begin_range = 0;
key_ptr += 1;
marker_ptr += 1;
}
}
//- rjf: store
rdim_shared->global_vmap_count = marker_count;
rdim_shared->global_vmap_keys = keys;
rdim_shared->global_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, marker_count);
rdim_shared->global_vmap_markers = markers;
}
}
lane_sync();
#endif
//////////////////////////////////////////////////////////////
//- rjf: @rdim_bake_stage sort all vmap keys
//
#if 0
ProfScope("sort all vmap keys")
{
// rjf: set up
if(lane_idx() == 0)
{
rdim_shared->lane_digit_counts = push_array(arena, U32 *, lane_count());
rdim_shared->lane_digit_offsets = push_array(arena, U32 *, lane_count());
}
lane_sync();
// rjf: sort
struct
{
RDI_U64 vmap_count;
RDIM_SortKey *keys;
RDIM_SortKey *keys__swap;
}
sort_tasks[] =
{
{rdim_shared->scope_vmap_count, rdim_shared->scope_vmap_keys, rdim_shared->scope_vmap_keys__swap},
{rdim_shared->unit_vmap_count, rdim_shared->unit_vmap_keys, rdim_shared->unit_vmap_keys__swap},
{rdim_shared->global_vmap_count, rdim_shared->global_vmap_keys, rdim_shared->global_vmap_keys__swap},
};
for EachElement(sort_task_idx, sort_tasks) ProfScope("sort %I64u", sort_task_idx)
{
RDI_U64 vmap_count = sort_tasks[sort_task_idx].vmap_count;
RDIM_SortKey *keys = sort_tasks[sort_task_idx].keys;
RDIM_SortKey *keys__swap = sort_tasks[sort_task_idx].keys__swap;
U64 bits_per_digit = 8;
U64 digits_count = 64 / bits_per_digit;
U64 num_possible_values_per_digit = 1 << bits_per_digit;
rdim_shared->lane_digit_counts[lane_idx()] = push_array_no_zero(arena, U32, num_possible_values_per_digit);
rdim_shared->lane_digit_offsets[lane_idx()] = push_array_no_zero(arena, U32, num_possible_values_per_digit);
RDIM_SortKey *src = keys;
RDIM_SortKey *dst = keys__swap;
U64 element_count = vmap_count;
for EachIndex(digit_idx, digits_count)
{
// rjf: count digit value occurrences per-lane
{
U32 *digit_counts = rdim_shared->lane_digit_counts[lane_idx()];
MemoryZero(digit_counts, sizeof(digit_counts[0])*num_possible_values_per_digit);
Rng1U64 range = lane_range(element_count);
for EachInRange(idx, range)
{
RDIM_SortKey *sort_key = &src[idx];
U16 digit_value = (U16)(U8)(sort_key->key >> (digit_idx*bits_per_digit));
digit_counts[digit_value] += 1;
}
}
lane_sync();
// rjf: compute thread * digit value *relative* offset table
{
Rng1U64 range = lane_range(num_possible_values_per_digit);
for EachInRange(value_idx, range)
{
U64 layout_off = 0;
for EachIndex(lane_idx, lane_count())
{
rdim_shared->lane_digit_offsets[lane_idx][value_idx] = layout_off;
layout_off += rdim_shared->lane_digit_counts[lane_idx][value_idx];
}
}
}
lane_sync();
// rjf: convert relative offsets -> absolute offsets
if(lane_idx() == 0)
{
U64 last_off = 0;
U64 num_of_nonzero_digit = 0;
for EachIndex(value_idx, num_possible_values_per_digit)
{
for EachIndex(lane_idx, lane_count())
{
rdim_shared->lane_digit_offsets[lane_idx][value_idx] += last_off;
}
last_off = rdim_shared->lane_digit_offsets[lane_count()-1][value_idx] + rdim_shared->lane_digit_counts[lane_count()-1][value_idx];
}
// NOTE(rjf): required that: (last_off == element_count)
}
lane_sync();
// rjf: move
{
U32 *lane_digit_offsets = rdim_shared->lane_digit_offsets[lane_idx()];
Rng1U64 range = lane_range(element_count);
for EachInRange(idx, range)
{
RDIM_SortKey *src_key = &src[idx];
U16 digit_value = (U16)(U8)(src_key->key >> (digit_idx*bits_per_digit));
U64 dst_off = lane_digit_offsets[digit_value];
lane_digit_offsets[digit_value] += 1;
MemoryCopyStruct(&dst[dst_off], src_key);
}
}
lane_sync();
// rjf: swap
{
RDIM_SortKey *swap = src;
src = dst;
dst = swap;
}
}
}
}
lane_sync();
#endif
//////////////////////////////////////////////////////////////
//- rjf: @rdim_bake_stage bake all vmaps
//
#if 0
ProfScope("bake all vmaps")
{
Temp scratch = scratch_begin(&arena, 1);
typedef struct VMapBakeTask VMapBakeTask;
struct VMapBakeTask
{
VMapBakeTask *next;
String8 name;
RDI_U64 count;
RDIM_SortKey *keys;
RDIM_VMapMarker *markers;
RDIM_BakeVMap *bake_vmap_out;
};
VMapBakeTask *first_task = 0;
VMapBakeTask *last_task = 0;
if(lane_idx() == lane_from_task_idx(0))
{
VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1);
task->name = str8_lit("scopes");
task->count = rdim_shared->scope_vmap_count;
task->keys = rdim_shared->scope_vmap_keys;
task->markers = rdim_shared->scope_vmap_markers;
task->bake_vmap_out = &rdim_shared->baked_scope_vmap.vmap;
SLLQueuePush(first_task, last_task, task);
}
if(lane_idx() == lane_from_task_idx(1))
{
VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1);
task->name = str8_lit("units");
task->count = rdim_shared->unit_vmap_count;
task->keys = rdim_shared->unit_vmap_keys;
task->markers = rdim_shared->unit_vmap_markers;
task->bake_vmap_out = &rdim_shared->baked_unit_vmap.vmap;
SLLQueuePush(first_task, last_task, task);
}
if(lane_idx() == lane_from_task_idx(2))
{
VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1);
task->name = str8_lit("globals");
task->count = rdim_shared->global_vmap_count;
task->keys = rdim_shared->global_vmap_keys;
task->markers = rdim_shared->global_vmap_markers;
task->bake_vmap_out = &rdim_shared->baked_global_vmap.vmap;
SLLQueuePush(first_task, last_task, task);
}
for(VMapBakeTask *task = first_task; task != 0; task = task->next) ProfScope("vmap bake for %.*s", str8_varg(task->name))
{
//- rjf: determine if an extra vmap entry for zero is needed
RDI_U32 extra_vmap_entry = 0;
if(task->count > 0 && task->keys[0].key != 0)
{
extra_vmap_entry = 1;
}
//- rjf: fill output vmap entries
RDI_U32 vmap_count_raw = extra_vmap_entry + task->count;
RDI_VMapEntry *vmap = rdim_push_array(arena, RDI_VMapEntry, vmap_count_raw);
RDI_U32 vmap_entry_count_pass_1 = 0;
ProfScope("fill output vmap entries")
{
typedef struct RDIM_VMapRangeTracker RDIM_VMapRangeTracker;
struct RDIM_VMapRangeTracker
{
RDIM_VMapRangeTracker *next;
RDI_U32 idx;
};
RDI_VMapEntry *vmap_ptr = vmap;
if(extra_vmap_entry)
{
vmap_ptr->voff = 0;
vmap_ptr->idx = 0;
vmap_ptr += 1;
}
RDIM_VMapRangeTracker *tracker_stack = 0;
RDIM_VMapRangeTracker *tracker_free = 0;
RDIM_SortKey *key_ptr = task->keys;
RDIM_SortKey *key_opl = task->keys + task->count;
for(;key_ptr < key_opl;)
{
// rjf: get initial map state from tracker stack
RDI_U32 initial_idx = (RDI_U32)0xffffffff;
if(tracker_stack != 0)
{
initial_idx = tracker_stack->idx;
}
// rjf: update tracker stack
//
// * we must process _all_ of the changes that apply at this voff before moving on
//
RDI_U64 voff = key_ptr->key;
for(;key_ptr < key_opl && key_ptr->key == voff; key_ptr += 1)
{
RDIM_VMapMarker *marker = (RDIM_VMapMarker*)key_ptr->val;
RDI_U32 idx = marker->idx;
// rjf: range begin -> push to stack
if(marker->begin_range)
{
RDIM_VMapRangeTracker *new_tracker = tracker_free;
if(new_tracker != 0)
{
RDIM_SLLStackPop(tracker_free);
}
else
{
new_tracker = rdim_push_array(scratch.arena, RDIM_VMapRangeTracker, 1);
}
RDIM_SLLStackPush(tracker_stack, new_tracker);
new_tracker->idx = idx;
}
// rjf: range ending -> pop matching node from stack (not always the top)
else
{
RDIM_VMapRangeTracker **ptr_in = &tracker_stack;
RDIM_VMapRangeTracker *match = 0;
for(RDIM_VMapRangeTracker *node = tracker_stack; node != 0;)
{
if(node->idx == idx)
{
match = node;
break;
}
ptr_in = &node->next;
node = node->next;
}
if(match != 0)
{
*ptr_in = match->next;
RDIM_SLLStackPush(tracker_free, match);
}
}
}
// rjf: get final map state from tracker stack
RDI_U32 final_idx = 0;
if(tracker_stack != 0)
{
final_idx = tracker_stack->idx;
}
// rjf: if final is different from initial - emit new vmap entry
if(final_idx != initial_idx)
{
vmap_ptr->voff = voff;
vmap_ptr->idx = final_idx;
vmap_ptr += 1;
}
}
vmap_entry_count_pass_1 = (RDI_U32)(vmap_ptr - vmap); // TODO(rjf): @u64_to_u32
}
//- rjf: combine duplicate neighbors
RDI_U32 vmap_entry_count = 0;
ProfScope("combine duplicate neighbors")
{
RDI_VMapEntry *vmap_ptr = vmap;
RDI_VMapEntry *vmap_opl = vmap + vmap_entry_count_pass_1;
RDI_VMapEntry *vmap_out = vmap;
for(;vmap_ptr < vmap_opl;)
{
RDI_VMapEntry *vmap_range_first = vmap_ptr;
RDI_U64 idx = vmap_ptr->idx;
vmap_ptr += 1;
for(;vmap_ptr < vmap_opl && vmap_ptr->idx == idx;) vmap_ptr += 1;
rdim_memcpy_struct(vmap_out, vmap_range_first);
vmap_out += 1;
}
vmap_entry_count = (RDI_U32)(vmap_out - vmap); // TODO(rjf): @u64_to_u32
}
//- rjf: fill result
task->bake_vmap_out->vmap = vmap;
task->bake_vmap_out->count = vmap_entry_count;
}
scratch_end(scratch);
}
lane_sync();
#endif
//////////////////////////////////////////////////////////////
//- rjf: @rdim_bake_stage build interned path tree
//