mirror of
https://github.com/Ed94/raddebugger.git
synced 2026-06-12 23:31:38 -07:00
eliminate old vmap baking
This commit is contained in:
@@ -577,488 +577,6 @@ rdim_bake(Arena *arena, RDIM_BakeParams *params)
|
||||
}
|
||||
lane_sync();
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//- rjf: @rdim_bake_stage gather unsorted vmap keys/markers
|
||||
//
|
||||
#if 0
|
||||
ProfScope("gather unsorted vmap keys/markers")
|
||||
{
|
||||
//- rjf: gather scope vmap keys/markers
|
||||
if(lane_idx() == lane_from_task_idx(0)) ProfScope("gather scope vmap keys/markers")
|
||||
{
|
||||
rdim_shared->scope_vmap_count = params->scopes.scope_voff_count;
|
||||
rdim_shared->scope_vmap_keys = push_array_no_zero(arena, RDIM_SortKey, rdim_shared->scope_vmap_count);
|
||||
rdim_shared->scope_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, rdim_shared->scope_vmap_count);
|
||||
rdim_shared->scope_vmap_markers = push_array_no_zero(arena, RDIM_VMapMarker, rdim_shared->scope_vmap_count);
|
||||
ProfScope("fill keys/markers")
|
||||
{
|
||||
RDIM_SortKey *key_ptr = rdim_shared->scope_vmap_keys;
|
||||
RDIM_VMapMarker *marker_ptr = rdim_shared->scope_vmap_markers;
|
||||
for(RDIM_ScopeChunkNode *chunk_n = params->scopes.first; chunk_n != 0; chunk_n = chunk_n->next)
|
||||
{
|
||||
for(RDI_U64 chunk_idx = 0; chunk_idx < chunk_n->count; chunk_idx += 1)
|
||||
{
|
||||
RDIM_Scope *src_scope = &chunk_n->v[chunk_idx];
|
||||
RDI_U32 scope_idx = (RDI_U32)rdim_idx_from_scope(src_scope); // TODO(rjf): @u64_to_u32
|
||||
for(RDIM_Rng1U64Node *n = src_scope->voff_ranges.first; n != 0; n = n->next)
|
||||
{
|
||||
key_ptr->key = n->v.min;
|
||||
key_ptr->val = marker_ptr;
|
||||
marker_ptr->idx = scope_idx;
|
||||
marker_ptr->begin_range = 1;
|
||||
key_ptr += 1;
|
||||
marker_ptr += 1;
|
||||
|
||||
key_ptr->key = n->v.max;
|
||||
key_ptr->val = marker_ptr;
|
||||
marker_ptr->idx = scope_idx;
|
||||
marker_ptr->begin_range = 0;
|
||||
key_ptr += 1;
|
||||
marker_ptr += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//- rjf: gather unit vmap keys/markers
|
||||
if(lane_idx() == lane_from_task_idx(1)) ProfScope("gather unit vmap keys/markers")
|
||||
{
|
||||
// rjf: count voff ranges
|
||||
RDI_U64 voff_range_count = 0;
|
||||
for(RDIM_UnitChunkNode *n = params->units.first; n != 0; n = n->next)
|
||||
{
|
||||
for(RDI_U64 idx = 0; idx < n->count; idx += 1)
|
||||
{
|
||||
RDIM_Unit *unit = &n->v[idx];
|
||||
voff_range_count += unit->voff_ranges.total_count;
|
||||
}
|
||||
}
|
||||
|
||||
// rjf: count necessary markers
|
||||
RDI_U64 marker_count = voff_range_count*2;
|
||||
|
||||
// rjf: build keys/markers arrays
|
||||
RDIM_SortKey *keys = rdim_push_array_no_zero(arena, RDIM_SortKey, marker_count);
|
||||
RDIM_VMapMarker *markers = rdim_push_array_no_zero(arena, RDIM_VMapMarker, marker_count);
|
||||
{
|
||||
RDIM_SortKey *key_ptr = keys;
|
||||
RDIM_VMapMarker *marker_ptr = markers;
|
||||
RDI_U32 unit_idx = 1;
|
||||
for(RDIM_UnitChunkNode *unit_chunk_n = params->units.first;
|
||||
unit_chunk_n != 0;
|
||||
unit_chunk_n = unit_chunk_n->next)
|
||||
{
|
||||
for(RDI_U64 idx = 0; idx < unit_chunk_n->count; idx += 1)
|
||||
{
|
||||
RDIM_Unit *unit = &unit_chunk_n->v[idx];
|
||||
for(RDIM_Rng1U64ChunkNode *n = unit->voff_ranges.first; n != 0; n = n->next)
|
||||
{
|
||||
for(RDI_U64 chunk_idx = 0; chunk_idx < n->count; chunk_idx += 1)
|
||||
{
|
||||
RDIM_Rng1U64 range = n->v[chunk_idx];
|
||||
if(range.min < range.max)
|
||||
{
|
||||
key_ptr->key = range.min;
|
||||
key_ptr->val = marker_ptr;
|
||||
marker_ptr->idx = unit_idx;
|
||||
marker_ptr->begin_range = 1;
|
||||
key_ptr += 1;
|
||||
marker_ptr += 1;
|
||||
|
||||
key_ptr->key = range.max;
|
||||
key_ptr->val = marker_ptr;
|
||||
marker_ptr->idx = unit_idx;
|
||||
marker_ptr->begin_range = 0;
|
||||
key_ptr += 1;
|
||||
marker_ptr += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
unit_idx += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rjf: store
|
||||
rdim_shared->unit_vmap_count = marker_count;
|
||||
rdim_shared->unit_vmap_keys = keys;
|
||||
rdim_shared->unit_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, marker_count);
|
||||
rdim_shared->unit_vmap_markers = markers;
|
||||
}
|
||||
|
||||
//- rjf: gather global vmap keys/markers
|
||||
if(lane_idx() == lane_from_task_idx(2)) ProfScope("gather global vmap keys/markers")
|
||||
{
|
||||
//- rjf: allocate keys/markers
|
||||
RDI_U64 marker_count = params->global_variables.total_count*2 + 2;
|
||||
RDIM_SortKey *keys = rdim_push_array_no_zero(arena, RDIM_SortKey, marker_count);
|
||||
RDIM_VMapMarker *markers = rdim_push_array_no_zero(arena, RDIM_VMapMarker, marker_count);
|
||||
|
||||
//- rjf: fill
|
||||
{
|
||||
RDIM_SortKey *key_ptr = keys;
|
||||
RDIM_VMapMarker *marker_ptr = markers;
|
||||
|
||||
// rjf: fill actual globals
|
||||
for(RDIM_SymbolChunkNode *n = params->global_variables.first; n != 0; n = n->next)
|
||||
{
|
||||
for(RDI_U64 chunk_idx = 0; chunk_idx < n->count; chunk_idx += 1)
|
||||
{
|
||||
RDIM_Symbol *global_var = &n->v[chunk_idx];
|
||||
RDI_U32 global_var_idx = (RDI_U32)rdim_idx_from_symbol(global_var); // TODO(rjf): @u64_to_u32
|
||||
RDI_U64 global_var_size = global_var->type ? global_var->type->byte_size : 1;
|
||||
|
||||
RDI_U64 first = global_var->offset;
|
||||
RDI_U64 opl = first + global_var_size;
|
||||
|
||||
key_ptr->key = first;
|
||||
key_ptr->val = marker_ptr;
|
||||
marker_ptr->idx = global_var_idx;
|
||||
marker_ptr->begin_range = 1;
|
||||
key_ptr += 1;
|
||||
marker_ptr += 1;
|
||||
|
||||
key_ptr->key = opl;
|
||||
key_ptr->val = marker_ptr;
|
||||
marker_ptr->idx = global_var_idx;
|
||||
marker_ptr->begin_range = 0;
|
||||
key_ptr += 1;
|
||||
marker_ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// rjf: fill nil global
|
||||
{
|
||||
RDI_U32 global_idx = 0;
|
||||
RDI_U64 first = 0;
|
||||
RDI_U64 opl = 0xffffffffffffffffull;
|
||||
key_ptr->key = first;
|
||||
key_ptr->val = marker_ptr;
|
||||
marker_ptr->idx = global_idx;
|
||||
marker_ptr->begin_range = 1;
|
||||
key_ptr += 1;
|
||||
marker_ptr += 1;
|
||||
key_ptr->key = opl;
|
||||
key_ptr->val = marker_ptr;
|
||||
marker_ptr->idx = global_idx;
|
||||
marker_ptr->begin_range = 0;
|
||||
key_ptr += 1;
|
||||
marker_ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
//- rjf: store
|
||||
rdim_shared->global_vmap_count = marker_count;
|
||||
rdim_shared->global_vmap_keys = keys;
|
||||
rdim_shared->global_vmap_keys__swap = push_array_no_zero(arena, RDIM_SortKey, marker_count);
|
||||
rdim_shared->global_vmap_markers = markers;
|
||||
}
|
||||
}
|
||||
lane_sync();
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//- rjf: @rdim_bake_stage sort all vmap keys
|
||||
//
|
||||
#if 0
|
||||
ProfScope("sort all vmap keys")
|
||||
{
|
||||
// rjf: set up
|
||||
if(lane_idx() == 0)
|
||||
{
|
||||
rdim_shared->lane_digit_counts = push_array(arena, U32 *, lane_count());
|
||||
rdim_shared->lane_digit_offsets = push_array(arena, U32 *, lane_count());
|
||||
}
|
||||
lane_sync();
|
||||
|
||||
// rjf: sort
|
||||
struct
|
||||
{
|
||||
RDI_U64 vmap_count;
|
||||
RDIM_SortKey *keys;
|
||||
RDIM_SortKey *keys__swap;
|
||||
}
|
||||
sort_tasks[] =
|
||||
{
|
||||
{rdim_shared->scope_vmap_count, rdim_shared->scope_vmap_keys, rdim_shared->scope_vmap_keys__swap},
|
||||
{rdim_shared->unit_vmap_count, rdim_shared->unit_vmap_keys, rdim_shared->unit_vmap_keys__swap},
|
||||
{rdim_shared->global_vmap_count, rdim_shared->global_vmap_keys, rdim_shared->global_vmap_keys__swap},
|
||||
};
|
||||
for EachElement(sort_task_idx, sort_tasks) ProfScope("sort %I64u", sort_task_idx)
|
||||
{
|
||||
RDI_U64 vmap_count = sort_tasks[sort_task_idx].vmap_count;
|
||||
RDIM_SortKey *keys = sort_tasks[sort_task_idx].keys;
|
||||
RDIM_SortKey *keys__swap = sort_tasks[sort_task_idx].keys__swap;
|
||||
U64 bits_per_digit = 8;
|
||||
U64 digits_count = 64 / bits_per_digit;
|
||||
U64 num_possible_values_per_digit = 1 << bits_per_digit;
|
||||
rdim_shared->lane_digit_counts[lane_idx()] = push_array_no_zero(arena, U32, num_possible_values_per_digit);
|
||||
rdim_shared->lane_digit_offsets[lane_idx()] = push_array_no_zero(arena, U32, num_possible_values_per_digit);
|
||||
RDIM_SortKey *src = keys;
|
||||
RDIM_SortKey *dst = keys__swap;
|
||||
U64 element_count = vmap_count;
|
||||
for EachIndex(digit_idx, digits_count)
|
||||
{
|
||||
// rjf: count digit value occurrences per-lane
|
||||
{
|
||||
U32 *digit_counts = rdim_shared->lane_digit_counts[lane_idx()];
|
||||
MemoryZero(digit_counts, sizeof(digit_counts[0])*num_possible_values_per_digit);
|
||||
Rng1U64 range = lane_range(element_count);
|
||||
for EachInRange(idx, range)
|
||||
{
|
||||
RDIM_SortKey *sort_key = &src[idx];
|
||||
U16 digit_value = (U16)(U8)(sort_key->key >> (digit_idx*bits_per_digit));
|
||||
digit_counts[digit_value] += 1;
|
||||
}
|
||||
}
|
||||
lane_sync();
|
||||
|
||||
// rjf: compute thread * digit value *relative* offset table
|
||||
{
|
||||
Rng1U64 range = lane_range(num_possible_values_per_digit);
|
||||
for EachInRange(value_idx, range)
|
||||
{
|
||||
U64 layout_off = 0;
|
||||
for EachIndex(lane_idx, lane_count())
|
||||
{
|
||||
rdim_shared->lane_digit_offsets[lane_idx][value_idx] = layout_off;
|
||||
layout_off += rdim_shared->lane_digit_counts[lane_idx][value_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
lane_sync();
|
||||
|
||||
// rjf: convert relative offsets -> absolute offsets
|
||||
if(lane_idx() == 0)
|
||||
{
|
||||
U64 last_off = 0;
|
||||
U64 num_of_nonzero_digit = 0;
|
||||
for EachIndex(value_idx, num_possible_values_per_digit)
|
||||
{
|
||||
for EachIndex(lane_idx, lane_count())
|
||||
{
|
||||
rdim_shared->lane_digit_offsets[lane_idx][value_idx] += last_off;
|
||||
}
|
||||
last_off = rdim_shared->lane_digit_offsets[lane_count()-1][value_idx] + rdim_shared->lane_digit_counts[lane_count()-1][value_idx];
|
||||
}
|
||||
// NOTE(rjf): required that: (last_off == element_count)
|
||||
}
|
||||
lane_sync();
|
||||
|
||||
// rjf: move
|
||||
{
|
||||
U32 *lane_digit_offsets = rdim_shared->lane_digit_offsets[lane_idx()];
|
||||
Rng1U64 range = lane_range(element_count);
|
||||
for EachInRange(idx, range)
|
||||
{
|
||||
RDIM_SortKey *src_key = &src[idx];
|
||||
U16 digit_value = (U16)(U8)(src_key->key >> (digit_idx*bits_per_digit));
|
||||
U64 dst_off = lane_digit_offsets[digit_value];
|
||||
lane_digit_offsets[digit_value] += 1;
|
||||
MemoryCopyStruct(&dst[dst_off], src_key);
|
||||
}
|
||||
}
|
||||
lane_sync();
|
||||
|
||||
// rjf: swap
|
||||
{
|
||||
RDIM_SortKey *swap = src;
|
||||
src = dst;
|
||||
dst = swap;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
lane_sync();
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//- rjf: @rdim_bake_stage bake all vmaps
|
||||
//
|
||||
#if 0
|
||||
ProfScope("bake all vmaps")
|
||||
{
|
||||
Temp scratch = scratch_begin(&arena, 1);
|
||||
typedef struct VMapBakeTask VMapBakeTask;
|
||||
struct VMapBakeTask
|
||||
{
|
||||
VMapBakeTask *next;
|
||||
String8 name;
|
||||
RDI_U64 count;
|
||||
RDIM_SortKey *keys;
|
||||
RDIM_VMapMarker *markers;
|
||||
RDIM_BakeVMap *bake_vmap_out;
|
||||
};
|
||||
VMapBakeTask *first_task = 0;
|
||||
VMapBakeTask *last_task = 0;
|
||||
if(lane_idx() == lane_from_task_idx(0))
|
||||
{
|
||||
VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1);
|
||||
task->name = str8_lit("scopes");
|
||||
task->count = rdim_shared->scope_vmap_count;
|
||||
task->keys = rdim_shared->scope_vmap_keys;
|
||||
task->markers = rdim_shared->scope_vmap_markers;
|
||||
task->bake_vmap_out = &rdim_shared->baked_scope_vmap.vmap;
|
||||
SLLQueuePush(first_task, last_task, task);
|
||||
}
|
||||
if(lane_idx() == lane_from_task_idx(1))
|
||||
{
|
||||
VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1);
|
||||
task->name = str8_lit("units");
|
||||
task->count = rdim_shared->unit_vmap_count;
|
||||
task->keys = rdim_shared->unit_vmap_keys;
|
||||
task->markers = rdim_shared->unit_vmap_markers;
|
||||
task->bake_vmap_out = &rdim_shared->baked_unit_vmap.vmap;
|
||||
SLLQueuePush(first_task, last_task, task);
|
||||
}
|
||||
if(lane_idx() == lane_from_task_idx(2))
|
||||
{
|
||||
VMapBakeTask *task = push_array(scratch.arena, VMapBakeTask, 1);
|
||||
task->name = str8_lit("globals");
|
||||
task->count = rdim_shared->global_vmap_count;
|
||||
task->keys = rdim_shared->global_vmap_keys;
|
||||
task->markers = rdim_shared->global_vmap_markers;
|
||||
task->bake_vmap_out = &rdim_shared->baked_global_vmap.vmap;
|
||||
SLLQueuePush(first_task, last_task, task);
|
||||
}
|
||||
for(VMapBakeTask *task = first_task; task != 0; task = task->next) ProfScope("vmap bake for %.*s", str8_varg(task->name))
|
||||
{
|
||||
//- rjf: determine if an extra vmap entry for zero is needed
|
||||
RDI_U32 extra_vmap_entry = 0;
|
||||
if(task->count > 0 && task->keys[0].key != 0)
|
||||
{
|
||||
extra_vmap_entry = 1;
|
||||
}
|
||||
|
||||
//- rjf: fill output vmap entries
|
||||
RDI_U32 vmap_count_raw = extra_vmap_entry + task->count;
|
||||
RDI_VMapEntry *vmap = rdim_push_array(arena, RDI_VMapEntry, vmap_count_raw);
|
||||
RDI_U32 vmap_entry_count_pass_1 = 0;
|
||||
ProfScope("fill output vmap entries")
|
||||
{
|
||||
typedef struct RDIM_VMapRangeTracker RDIM_VMapRangeTracker;
|
||||
struct RDIM_VMapRangeTracker
|
||||
{
|
||||
RDIM_VMapRangeTracker *next;
|
||||
RDI_U32 idx;
|
||||
};
|
||||
RDI_VMapEntry *vmap_ptr = vmap;
|
||||
if(extra_vmap_entry)
|
||||
{
|
||||
vmap_ptr->voff = 0;
|
||||
vmap_ptr->idx = 0;
|
||||
vmap_ptr += 1;
|
||||
}
|
||||
RDIM_VMapRangeTracker *tracker_stack = 0;
|
||||
RDIM_VMapRangeTracker *tracker_free = 0;
|
||||
RDIM_SortKey *key_ptr = task->keys;
|
||||
RDIM_SortKey *key_opl = task->keys + task->count;
|
||||
for(;key_ptr < key_opl;)
|
||||
{
|
||||
// rjf: get initial map state from tracker stack
|
||||
RDI_U32 initial_idx = (RDI_U32)0xffffffff;
|
||||
if(tracker_stack != 0)
|
||||
{
|
||||
initial_idx = tracker_stack->idx;
|
||||
}
|
||||
|
||||
// rjf: update tracker stack
|
||||
//
|
||||
// * we must process _all_ of the changes that apply at this voff before moving on
|
||||
//
|
||||
RDI_U64 voff = key_ptr->key;
|
||||
for(;key_ptr < key_opl && key_ptr->key == voff; key_ptr += 1)
|
||||
{
|
||||
RDIM_VMapMarker *marker = (RDIM_VMapMarker*)key_ptr->val;
|
||||
RDI_U32 idx = marker->idx;
|
||||
|
||||
// rjf: range begin -> push to stack
|
||||
if(marker->begin_range)
|
||||
{
|
||||
RDIM_VMapRangeTracker *new_tracker = tracker_free;
|
||||
if(new_tracker != 0)
|
||||
{
|
||||
RDIM_SLLStackPop(tracker_free);
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tracker = rdim_push_array(scratch.arena, RDIM_VMapRangeTracker, 1);
|
||||
}
|
||||
RDIM_SLLStackPush(tracker_stack, new_tracker);
|
||||
new_tracker->idx = idx;
|
||||
}
|
||||
|
||||
// rjf: range ending -> pop matching node from stack (not always the top)
|
||||
else
|
||||
{
|
||||
RDIM_VMapRangeTracker **ptr_in = &tracker_stack;
|
||||
RDIM_VMapRangeTracker *match = 0;
|
||||
for(RDIM_VMapRangeTracker *node = tracker_stack; node != 0;)
|
||||
{
|
||||
if(node->idx == idx)
|
||||
{
|
||||
match = node;
|
||||
break;
|
||||
}
|
||||
ptr_in = &node->next;
|
||||
node = node->next;
|
||||
}
|
||||
if(match != 0)
|
||||
{
|
||||
*ptr_in = match->next;
|
||||
RDIM_SLLStackPush(tracker_free, match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rjf: get final map state from tracker stack
|
||||
RDI_U32 final_idx = 0;
|
||||
if(tracker_stack != 0)
|
||||
{
|
||||
final_idx = tracker_stack->idx;
|
||||
}
|
||||
|
||||
// rjf: if final is different from initial - emit new vmap entry
|
||||
if(final_idx != initial_idx)
|
||||
{
|
||||
vmap_ptr->voff = voff;
|
||||
vmap_ptr->idx = final_idx;
|
||||
vmap_ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
vmap_entry_count_pass_1 = (RDI_U32)(vmap_ptr - vmap); // TODO(rjf): @u64_to_u32
|
||||
}
|
||||
|
||||
//- rjf: combine duplicate neighbors
|
||||
RDI_U32 vmap_entry_count = 0;
|
||||
ProfScope("combine duplicate neighbors")
|
||||
{
|
||||
RDI_VMapEntry *vmap_ptr = vmap;
|
||||
RDI_VMapEntry *vmap_opl = vmap + vmap_entry_count_pass_1;
|
||||
RDI_VMapEntry *vmap_out = vmap;
|
||||
for(;vmap_ptr < vmap_opl;)
|
||||
{
|
||||
RDI_VMapEntry *vmap_range_first = vmap_ptr;
|
||||
RDI_U64 idx = vmap_ptr->idx;
|
||||
vmap_ptr += 1;
|
||||
for(;vmap_ptr < vmap_opl && vmap_ptr->idx == idx;) vmap_ptr += 1;
|
||||
rdim_memcpy_struct(vmap_out, vmap_range_first);
|
||||
vmap_out += 1;
|
||||
}
|
||||
vmap_entry_count = (RDI_U32)(vmap_out - vmap); // TODO(rjf): @u64_to_u32
|
||||
}
|
||||
|
||||
//- rjf: fill result
|
||||
task->bake_vmap_out->vmap = vmap;
|
||||
task->bake_vmap_out->count = vmap_entry_count;
|
||||
}
|
||||
scratch_end(scratch);
|
||||
}
|
||||
lane_sync();
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//- rjf: @rdim_bake_stage build interned path tree
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user