fix incorrect idx run baking; slots need to be laid out based on *idx counts*, rather than *value counts*!

This commit is contained in:
Ryan Fleury
2025-09-03 14:26:29 -07:00
parent e276ce4fb3
commit 955f033043
7 changed files with 143 additions and 121 deletions
+4 -38
View File
@@ -1709,7 +1709,7 @@ rdim_bake_idx_run_chunk_list_concat_in_place(RDIM_BakeIdxRunChunkList *dst, RDIM
{
for(RDIM_BakeIdxRunChunkNode *n = to_push->first; n != 0; n = n->next)
{
n->base_idx += dst->total_count;
n->base_idx += dst->total_idx_count;
}
if(dst->last != 0 && to_push->first != 0)
{
@@ -1810,6 +1810,7 @@ rdim_bake_idx_run_map_loose_make(RDIM_Arena *arena, RDIM_BakeIdxRunMapTopology *
{
RDIM_BakeIdxRunMapLoose *map = rdim_push_array(arena, RDIM_BakeIdxRunMapLoose, 1);
map->slots = rdim_push_array(arena, RDIM_BakeIdxRunChunkList *, top->slots_count);
map->slots_idx_counts = rdim_push_array(arena, RDI_U64, top->slots_count);
return map;
}
@@ -1844,48 +1845,13 @@ rdim_bake_idx_run_map_loose_insert(RDIM_Arena *arena, RDIM_BakeIdxRunMapTopology
bir->hash = hash;
bir->count = count;
bir->idxes = idxes;
map->slots_idx_counts[slot_idx] += count;
}
}
}
RDI_PROC RDIM_BakeIdxRunMapBaseIndices
rdim_bake_idx_run_map_base_indices_from_map_loose(RDIM_Arena *arena, RDIM_BakeIdxRunMapTopology *map_topology, RDIM_BakeIdxRunMapLoose *map)
{
RDIM_BakeIdxRunMapBaseIndices indices = {0};
indices.slots_base_idxs = rdim_push_array(arena, RDI_U64, map_topology->slots_count+1);
RDI_U64 total_count = 0;
for(RDI_U64 idx = 0; idx < map_topology->slots_count; idx += 1)
{
indices.slots_base_idxs[idx] += total_count;
if(map->slots[idx] != 0)
{
total_count += map->slots[idx]->total_count;
}
}
indices.slots_base_idxs[map_topology->slots_count] = total_count;
return indices;
}
//- rjf: finalized / tight map
RDI_PROC RDIM_BakeIdxRunMap2 *
rdim_bake_idx_run_map_from_loose(RDIM_Arena *arena, RDIM_BakeIdxRunMapTopology *map_topology, RDIM_BakeIdxRunMapBaseIndices *map_base_indices, RDIM_BakeIdxRunMapLoose *map)
{
RDIM_BakeIdxRunMap2 *m = rdim_push_array(arena, RDIM_BakeIdxRunMap2, 1);
m->slots_count = map_topology->slots_count;
m->slots = rdim_push_array(arena, RDIM_BakeIdxRunChunkList, m->slots_count);
m->slots_base_idxs = map_base_indices->slots_base_idxs;
for(RDI_U64 idx = 0; idx < m->slots_count; idx += 1)
{
if(map->slots[idx] != 0)
{
rdim_memcpy_struct(&m->slots[idx], map->slots[idx]);
}
}
m->total_count = m->slots_base_idxs[m->slots_count];
return m;
}
RDI_PROC RDI_U32
rdim_bake_idx_from_idx_run_2(RDIM_BakeIdxRunMap2 *map, RDI_U32 *idxes, RDI_U32 count)
{
@@ -1900,7 +1866,7 @@ rdim_bake_idx_from_idx_run_2(RDIM_BakeIdxRunMap2 *map, RDI_U32 *idxes, RDI_U32 c
{
if(n->v[chunk_idx].hash == hash)
{
idx = map->slots_base_idxs[slot_idx] + n->base_idx + chunk_idx + 1;
idx = map->slots_base_idxs[slot_idx] + n->base_idx + n->v[chunk_idx].encoding_idx;
break;
}
}
+4 -10
View File
@@ -1183,6 +1183,7 @@ struct RDIM_BakeIdxRun
RDI_U64 hash;
RDI_U64 count;
RDI_U32 *idxes;
RDI_U64 encoding_idx;
};
typedef struct RDIM_BakeIdxRunChunkNode RDIM_BakeIdxRunChunkNode;
@@ -1202,6 +1203,7 @@ struct RDIM_BakeIdxRunChunkList
RDIM_BakeIdxRunChunkNode *last;
RDI_U64 chunk_count;
RDI_U64 total_count;
RDI_U64 total_idx_count;
};
typedef struct RDIM_BakeIdxRunMapTopology RDIM_BakeIdxRunMapTopology;
@@ -1210,25 +1212,19 @@ struct RDIM_BakeIdxRunMapTopology
RDI_U64 slots_count;
};
typedef struct RDIM_BakeIdxRunMapBaseIndices RDIM_BakeIdxRunMapBaseIndices;
struct RDIM_BakeIdxRunMapBaseIndices
{
RDI_U64 *slots_base_idxs;
};
typedef struct RDIM_BakeIdxRunMapLoose RDIM_BakeIdxRunMapLoose;
struct RDIM_BakeIdxRunMapLoose
{
RDIM_BakeIdxRunChunkList **slots;
RDI_U64 *slots_idx_counts;
};
typedef struct RDIM_BakeIdxRunMap2 RDIM_BakeIdxRunMap2;
struct RDIM_BakeIdxRunMap2
{
RDIM_BakeIdxRunChunkList *slots;
RDI_U64 *slots_base_idxs;
RDI_U64 *slots_base_idxs; // NOTE(rjf): [slots_count+1], [slots_count] holds total count
RDI_U64 slots_count;
RDI_U64 total_count;
};
//- rjf: index runs (OLD)
@@ -1836,10 +1832,8 @@ RDI_PROC RDIM_BakeIdxRunChunkList rdim_bake_idx_run_chunk_list_sorted_from_unsor
//- rjf: loose map
RDI_PROC RDIM_BakeIdxRunMapLoose *rdim_bake_idx_run_map_loose_make(RDIM_Arena *arena, RDIM_BakeIdxRunMapTopology *top);
RDI_PROC void rdim_bake_idx_run_map_loose_insert(RDIM_Arena *arena, RDIM_BakeIdxRunMapTopology *map_topology, RDIM_BakeIdxRunMapLoose *map, RDI_U64 chunk_cap, RDI_U32 *idxes, RDI_U32 count);
RDI_PROC RDIM_BakeIdxRunMapBaseIndices rdim_bake_idx_run_map_base_indices_from_map_loose(RDIM_Arena *arena, RDIM_BakeIdxRunMapTopology *map_topology, RDIM_BakeIdxRunMapLoose *map);
//- rjf: finalized / tight map
RDI_PROC RDIM_BakeIdxRunMap2 *rdim_bake_idx_run_map_from_loose(RDIM_Arena *arena, RDIM_BakeIdxRunMapTopology *map_topology, RDIM_BakeIdxRunMapBaseIndices *map_base_indices, RDIM_BakeIdxRunMapLoose *map);
RDI_PROC RDI_U32 rdim_bake_idx_from_idx_run_2(RDIM_BakeIdxRunMap2 *map, RDI_U32 *idxes, RDI_U32 count);
////////////////////////////////
+9
View File
@@ -14,6 +14,15 @@ rb_entry_point(CmdLine *cmdline)
{
Temp scratch = scratch_begin(0, 0);
U64 threads_count = os_get_system_info()->logical_processor_count;
String8 threads_count_from_cmdline_string = cmd_line_string(cmdline, str8_lit("thread_count"));
if(threads_count_from_cmdline_string.size != 0)
{
U64 threads_count_from_cmdline = 0;
if(try_u64_from_str8_c_rules(threads_count_from_cmdline_string, &threads_count_from_cmdline))
{
threads_count = threads_count_from_cmdline;
}
}
OS_Handle *threads = push_array(scratch.arena, OS_Handle, threads_count);
RB_ThreadParams *threads_params = push_array(scratch.arena, RB_ThreadParams, threads_count);
Barrier barrier = barrier_alloc(threads_count);
+3 -3
View File
@@ -798,7 +798,7 @@ rdi_dump_list_from_parsed(Arena *arena, RDI_Parsed *rdi, RDI_DumpSubsetFlags fla
str8_list_pushf(scratch.arena, &param_idx_strings, "%u", param_idx_array[param_idx]);
}
String8 param_idx_str = str8_list_join(scratch.arena, &param_idx_strings, &(StringJoin){.pre = str8_lit("["), .sep = str8_lit(", "), .post = str8_lit("]")});
dumpf(" constructed__params: %S\n", param_idx_str);
dumpf(" constructed__params: %S // idx_run[%u]\n", param_idx_str, type->constructed.param_idx_run_first);
dumpf(" return_type: %u\n", type->constructed.direct_type_idx);
}
else if(type->kind == RDI_TypeKind_Method)
@@ -818,8 +818,8 @@ rdi_dump_list_from_parsed(Arena *arena, RDI_Parsed *rdi, RDI_DumpSubsetFlags fla
str8_list_pushf(scratch.arena, &param_idx_strings, "%u", param_idx_array[param_idx]);
}
String8 param_idx_str = str8_list_join(scratch.arena, &param_idx_strings, &(StringJoin){.pre = str8_lit("["), .sep = str8_lit(", "), .post = str8_lit("]")});
dumpf(" constructed__this_type: %S\n", this_type_str);
dumpf(" constructed__params: %S\n", param_idx_str);
dumpf(" constructed__this_type: %S // idx_run[%u]\n", this_type_str, type->constructed.param_idx_run_first);
dumpf(" constructed__params: %S // idx_run[%u]\n", param_idx_str, type->constructed.param_idx_run_first);
dumpf(" return_type: %u\n", type->constructed.direct_type_idx);
}
else if(RDI_TypeKind_FirstConstructed <= type->kind && type->kind <= RDI_TypeKind_LastConstructed)
+1 -1
View File
@@ -1777,7 +1777,7 @@ p2r2_convert(Arena *arena, P2R_ConvertParams *params)
// subsequent passes, to build RDI "UDT" information, which is distinct
// from regular type info.
//
ProfScope("types pass 3: construct all root/stub types from TPI") if(lane_idx() == 0)
if(lane_idx() == 0) ProfScope("types pass 3: construct all root/stub types from TPI")
{
#define p2r_builtin_type_ptr_from_kind(kind) ((basic_type_ptrs && RDI_TypeKind_FirstBuiltIn <= (kind) && (kind) <= RDI_TypeKind_LastBuiltIn) ? (basic_type_ptrs[(kind) - RDI_TypeKind_FirstBuiltIn]) : 0)
#define p2r_type_ptr_from_itype(itype) ((itype_type_ptrs && (itype) < itype_opl) ? (itype_type_ptrs[(itype_fwd_map[(itype)] ? itype_fwd_map[(itype)] : (itype))]) : 0)
+120 -69
View File
@@ -1074,6 +1074,7 @@ rdim2_bake(Arena *arena, RDIM_BakeParams *params)
params->thread_variables.total_count +
params->types.total_count);
rdim2_shared->lane_bake_idx_run_maps__loose = push_array(arena, RDIM_BakeIdxRunMapLoose *, lane_count());
rdim2_shared->bake_idx_run_map__loose = rdim_bake_idx_run_map_loose_make(arena, &rdim2_shared->bake_idx_run_map_topology);
}
lane_sync();
@@ -1202,76 +1203,76 @@ rdim2_bake(Arena *arena, RDIM_BakeParams *params)
}
}
}
//- rjf: join & sort
if(lane_idx() == 0)
}
lane_sync();
//- rjf: join
ProfScope("join")
{
Rng1U64 slot_range = lane_range(rdim2_shared->bake_idx_run_map_topology.slots_count);
for EachInRange(slot_idx, slot_range)
{
rdim2_shared->bake_idx_run_map__loose = rdim_bake_idx_run_map_loose_make(arena, &rdim2_shared->bake_idx_run_map_topology);
}
lane_sync();
ProfScope("join & sort")
{
//- rjf: join
ProfScope("join")
for EachIndex(src_lane_idx, lane_count())
{
Rng1U64 slot_range = lane_range(rdim2_shared->bake_idx_run_map_topology.slots_count);
for EachInRange(slot_idx, slot_range)
RDIM_BakeIdxRunMapLoose *src_map = rdim2_shared->lane_bake_idx_run_maps__loose[src_lane_idx];
RDIM_BakeIdxRunMapLoose *dst_map = rdim2_shared->bake_idx_run_map__loose;
if(dst_map->slots[slot_idx] == 0 && src_map->slots[slot_idx] != 0)
{
for EachIndex(src_lane_idx, lane_count())
{
RDIM_BakeIdxRunMapLoose *src_map = rdim2_shared->lane_bake_idx_run_maps__loose[src_lane_idx];
RDIM_BakeIdxRunMapLoose *dst_map = rdim2_shared->bake_idx_run_map__loose;
if(dst_map->slots[slot_idx] == 0 && src_map->slots[slot_idx] != 0)
{
dst_map->slots[slot_idx] = src_map->slots[slot_idx];
}
else if(dst_map->slots[slot_idx] != 0 && src_map->slots[slot_idx] != 0)
{
rdim_bake_idx_run_chunk_list_concat_in_place(dst_map->slots[slot_idx], src_map->slots[slot_idx]);
}
}
dst_map->slots[slot_idx] = src_map->slots[slot_idx];
}
else if(dst_map->slots[slot_idx] != 0 && src_map->slots[slot_idx] != 0)
{
rdim_bake_idx_run_chunk_list_concat_in_place(dst_map->slots[slot_idx], src_map->slots[slot_idx]);
}
}
//- rjf: sort
ProfScope("sort")
}
}
lane_sync();
//- rjf: sort
ProfScope("sort")
{
RDIM_BakeIdxRunMapLoose *map = rdim2_shared->bake_idx_run_map__loose;
Rng1U64 slot_range = lane_range(rdim2_shared->bake_idx_run_map_topology.slots_count);
for EachInRange(slot_idx, slot_range)
{
if(map->slots[slot_idx] != 0 && map->slots[slot_idx]->total_count > 1)
{
RDIM_BakeIdxRunMapLoose *map = rdim2_shared->bake_idx_run_map__loose;
Rng1U64 slot_range = lane_range(rdim2_shared->bake_idx_run_map_topology.slots_count);
for EachInRange(slot_idx, slot_range)
*map->slots[slot_idx] = rdim_bake_idx_run_chunk_list_sorted_from_unsorted(arena, map->slots[slot_idx]);
}
}
}
lane_sync();
//- rjf: tighten idx run table
ProfScope("tighten idx run table")
{
RDIM_BakeIdxRunMapLoose *map = rdim2_shared->bake_idx_run_map__loose;
RDIM_BakeIdxRunMapTopology *map_top = &rdim2_shared->bake_idx_run_map_topology;
if(lane_idx() == 0) ProfScope("calc base indices, set up tight map")
{
rdim2_shared->bake_idx_runs.slots_count = map_top->slots_count;
rdim2_shared->bake_idx_runs.slots = rdim_push_array(arena, RDIM_BakeIdxRunChunkList, rdim2_shared->bake_idx_runs.slots_count);
rdim2_shared->bake_idx_runs.slots_base_idxs = rdim_push_array(arena, RDI_U64, rdim2_shared->bake_idx_runs.slots_count+1);
RDI_U64 encoding_idx_off = 0;
for(RDI_U64 slot_idx = 0; slot_idx < map_top->slots_count; slot_idx += 1)
{
rdim2_shared->bake_idx_runs.slots_base_idxs[slot_idx] = encoding_idx_off;
if(map->slots[slot_idx] != 0)
{
if(map->slots[slot_idx] != 0 && map->slots[slot_idx]->total_count > 1)
{
*map->slots[slot_idx] = rdim_bake_idx_run_chunk_list_sorted_from_unsorted(arena, map->slots[slot_idx]);
}
encoding_idx_off += map->slots[slot_idx]->total_idx_count;
}
}
}
lane_sync();
//- rjf: tighten idx run table
ProfScope("tighten idx run table")
ProfScope("fill tight map")
{
RDIM_BakeIdxRunMapLoose *map = rdim2_shared->bake_idx_run_map__loose;
RDIM_BakeIdxRunMapTopology *map_top = &rdim2_shared->bake_idx_run_map_topology;
if(lane_idx() == 0) ProfScope("calc base indices, set up tight map")
Rng1U64 slot_range = lane_range(rdim2_shared->bake_idx_runs.slots_count);
for EachInRange(idx, slot_range)
{
RDIM_BakeIdxRunMapBaseIndices bake_idx_run_map_base_indices = rdim_bake_idx_run_map_base_indices_from_map_loose(arena, map_top, map);
rdim2_shared->bake_idx_runs.slots_count = map_top->slots_count;
rdim2_shared->bake_idx_runs.slots = rdim_push_array(arena, RDIM_BakeIdxRunChunkList, rdim2_shared->bake_idx_runs.slots_count);
rdim2_shared->bake_idx_runs.slots_base_idxs = bake_idx_run_map_base_indices.slots_base_idxs;
rdim2_shared->bake_idx_runs.total_count = rdim2_shared->bake_idx_runs.slots_base_idxs[rdim2_shared->bake_idx_runs.slots_count];
}
lane_sync();
ProfScope("fill tight map")
{
Rng1U64 slot_range = lane_range(rdim2_shared->bake_idx_runs.slots_count);
for EachInRange(idx, slot_range)
if(map->slots[idx] != 0)
{
if(map->slots[idx] != 0)
{
rdim_memcpy_struct(&rdim2_shared->bake_idx_runs.slots[idx], map->slots[idx]);
}
rdim_memcpy_struct(&rdim2_shared->bake_idx_runs.slots[idx], map->slots[idx]);
}
}
}
@@ -1339,8 +1340,8 @@ rdim2_bake(Arena *arena, RDIM_BakeParams *params)
// rjf: set up
if(lane_idx() == 0)
{
rdim2_shared->baked_idx_runs.idx_count = bake_idx_runs->total_count;
rdim2_shared->baked_idx_runs.idx_runs = push_array_no_zero(arena, RDI_U32, rdim2_shared->baked_idx_runs.idx_count);
rdim2_shared->baked_idx_runs.idx_count = bake_idx_runs->slots_base_idxs[bake_idx_runs->slots_count];
rdim2_shared->baked_idx_runs.idx_runs = push_array(arena, RDI_U32, rdim2_shared->baked_idx_runs.idx_count);
}
lane_sync();
@@ -1352,9 +1353,10 @@ rdim2_bake(Arena *arena, RDIM_BakeParams *params)
RDI_U64 off = bake_idx_runs->slots_base_idxs[slot_idx];
for EachNode(n, RDIM_BakeIdxRunChunkNode, bake_idx_runs->slots[slot_idx].first)
{
StaticAssert(sizeof(rdim2_shared->baked_idx_runs.idx_runs[0]) == sizeof(n->v[0].idxes[0]), idx_run_size_check);
for EachIndex(n_idx, n->count)
{
rdim_memcpy(rdim2_shared->baked_idx_runs.idx_runs + off, n->v[n_idx].idxes, sizeof(n->v[n_idx].idxes[0])*n->v[n_idx].count);
rdim_memcpy(rdim2_shared->baked_idx_runs.idx_runs + off, n->v[n_idx].idxes, sizeof(RDI_U32) * n->v[n_idx].count);
off += n->v[n_idx].count;
}
}
@@ -1362,6 +1364,14 @@ rdim2_bake(Arena *arena, RDIM_BakeParams *params)
}
}
lane_sync();
if(lane_idx() == 0)
{
for EachIndex(idx, rdim2_shared->baked_idx_runs.idx_count)
{
printf("%u\n", rdim2_shared->baked_idx_runs.idx_runs[idx]);
}
fflush(stdout);
}
RDIM_IndexRunBakeResult baked_idx_runs = rdim2_shared->baked_idx_runs;
//////////////////////////////////////////////////////////////
@@ -1582,24 +1592,65 @@ rdim2_bake(Arena *arena, RDIM_BakeParams *params)
ProfScope("bake src files")
{
//- rjf: set up
if(lane_idx() == lane_from_task_idx(0))
ProfScope("set up")
{
rdim2_shared->baked_src_files.source_files_count = params->src_files.total_count+1;
rdim2_shared->baked_src_files.source_files = push_array(arena, RDI_SourceFile, rdim2_shared->baked_src_files.source_files_count);
}
if(lane_idx() == lane_from_task_idx(1))
{
rdim2_shared->baked_src_files.source_line_maps_count = params->src_files.source_line_map_count+1;
rdim2_shared->baked_src_files.source_line_maps = push_array(arena, RDI_SourceLineMap, rdim2_shared->baked_src_files.source_line_maps_count);
if(lane_idx() == lane_from_task_idx(0))
{
rdim2_shared->lane_src_line_map_counts = push_array(arena, U64, lane_count());
rdim2_shared->lane_src_line_map_offs = push_array(arena, U64, lane_count());
}
if(lane_idx() == lane_from_task_idx(1))
{
rdim2_shared->baked_src_files.source_files_count = params->src_files.total_count+1;
rdim2_shared->baked_src_files.source_files = push_array(arena, RDI_SourceFile, rdim2_shared->baked_src_files.source_files_count);
}
if(lane_idx() == lane_from_task_idx(2))
{
rdim2_shared->baked_src_files.source_line_maps_count = params->src_files.source_line_map_count+1;
rdim2_shared->baked_src_files.source_line_maps = push_array(arena, RDI_SourceLineMap, rdim2_shared->baked_src_files.source_line_maps_count);
}
}
lane_sync();
//- rjf: bake src files
ProfScope("bake src files")
//- rjf: calculate layout of src file line maps
ProfScope("calculate layout of src file line maps")
{
// rjf: count lines seen by each lane
for EachNode(n, RDIM_SrcFileChunkNode, params->src_files.first)
{
Rng1U64 range = lane_range(n->count);
for EachInRange(n_idx, range)
{
RDIM_SrcFile *src_file = &n->v[n_idx];
for(RDIM_SrcFileLineMapFragment *f = src_file->first_line_map_fragment; f != 0; f = f->next)
{
rdim2_shared->lane_src_line_map_counts[lane_idx()] += f->seq->line_count;
}
}
}
lane_sync();
// rjf: lay out per-lane offset
if(lane_idx() == 0)
{
U64 off = 0;
for EachIndex(l_idx, lane_count())
{
rdim2_shared->lane_src_line_map_offs[l_idx] = off;
off += rdim2_shared->lane_src_line_map_counts[l_idx];
}
// rdim2_shared->baked_src_files.
}
}
lane_sync();
//- rjf: bake
ProfScope("bake")
{
}
}
lane_sync();
//////////////////////////////////////////////////////////////
//- rjf: @rdim_bake_stage compute layout for scope sub-lists (locals / voffs)
+2
View File
@@ -76,6 +76,8 @@ struct RDIM2_Shared
RDIM_TopLevelNameMapBakeResult baked_top_level_name_maps;
RDIM_NameMapBakeResult baked_name_maps;
RDI_U64 *lane_src_line_map_counts;
RDI_U64 *lane_src_line_map_offs;
RDIM_SrcFileBakeResult baked_src_files;
RDI_U64 *scope_local_chunk_lane_counts; // [lane_count * scope_chunk_count]