first pass at bake string chunk list, bake string chunk list map, and bake string chunk list sorting paths/datastructures - basic machinery for going wide on the string/name map builds

This commit is contained in:
Ryan Fleury
2024-02-28 13:27:48 -08:00
parent 56690a703f
commit 0916c2a3c8
3 changed files with 217 additions and 13 deletions
+1 -1
View File
@@ -47,7 +47,7 @@ commands =
{
.rjf_f1 =
{
.win = "build raddbgi_breakpad_from_pdb telemetry release",
.win = "build raddbgi_from_pdb telemetry debug",
.linux = "",
.out = "*compilation*",
.footer_panel = true,
+169 -9
View File
@@ -1212,6 +1212,167 @@ rdim_bake_vmap_from_markers(RDIM_Arena *arena, RDIM_VMapMarker *markers, RDIM_So
////////////////////////////////
//~ rjf: [Baking Helpers] Interned / Deduplicated Blob Data Structure Helpers
//- rjf: bake string chunk lists
RDI_PROC RDIM_BakeString *
rdim_bake_string_chunk_list_push(RDIM_Arena *arena, RDIM_BakeStringChunkList *list, RDI_U64 cap)
{
RDIM_BakeStringChunkNode *n = list->last;
if(n == 0 || n->count >= n->cap)
{
n = rdim_push_array(arena, RDIM_BakeStringChunkNode, 1);
n->cap = cap;
n->v = rdim_push_array(arena, RDIM_BakeString, n->cap);
RDIM_SLLQueuePush(list->first, list->last, n);
list->chunk_count += 1;
}
RDIM_BakeString *s = &n->v[n->count];
n->count += 1;
list->total_count += 1;
return s;
}
RDI_PROC void
rdim_bake_string_chunk_list_concat_in_place(RDIM_BakeStringChunkList *dst, RDIM_BakeStringChunkList *to_push)
{
if(dst->last != 0 && to_push->first != 0)
{
dst->last->next = to_push->first;
dst->last = to_push->last;
dst->chunk_count += to_push->chunk_count;
dst->total_count += to_push->total_count;
}
else if(dst->first == 0)
{
rdim_memcpy_struct(dst, to_push);
}
rdim_memzero_struct(to_push);
}
RDI_PROC RDIM_BakeStringChunkList
rdim_bake_string_chunk_list_sorted_from_unsorted(RDIM_Arena *arena, RDIM_BakeStringChunkList *src)
{
//- rjf: produce unsorted destination list with single chunk node
RDIM_BakeStringChunkList dst = {0};
for(RDIM_BakeStringChunkNode *n = src->first; n != 0; n = n->next)
{
for(RDI_U64 idx = 0; idx < n->count; idx += 1)
{
RDIM_BakeString *src_str = &n->v[idx];
RDIM_BakeString *dst_str = rdim_bake_string_chunk_list_push(arena, &dst, src->total_count);
rdim_memcpy_struct(dst_str, src_str);
}
}
//- rjf: sort chunk node
if(dst.first != 0)
{
RDIM_Temp scratch = rdim_scratch_begin(&arena, 1);
typedef struct SortTask SortTask;
struct SortTask
{
SortTask *next;
RDI_U64 string_off;
RDIM_BakeString *v;
RDI_U64 count;
};
SortTask start_task = {0, 0, dst.first->v, dst.first->count};
SortTask *first_task = &start_task;
SortTask *last_task = &start_task;
//- rjf: for each sort task range:
for(SortTask *t = first_task; t != 0; t = t->next)
{
//- rjf: loop through range, drop each element into bucket according to byte in string at task offset
RDIM_BakeStringChunkList *buckets = rdim_push_array(scratch.arena, RDIM_BakeStringChunkList, 256);
for(RDI_U64 idx = 0; idx < t->count; idx += 1)
{
U8 byte = t->string_off < t->v[idx].string.size ? t->v[idx].string.str[t->string_off] : 0;
RDIM_BakeStringChunkList *bucket = &buckets[byte];
RDIM_BakeString *bstr = rdim_bake_string_chunk_list_push(scratch.arena, bucket, 8);
rdim_memcpy_struct(bstr, &t->v[idx]);
}
//- rjf: in-place mutate the original source array to reflect the order per the buckets.
// build new sort tasks for buckets with many elements
{
RDI_U64 write_idx = 0;
for(U64 bucket_idx = 0; bucket_idx < 256; bucket_idx += 1)
{
// rjf: write each chunk node's array into original array, detect if there is size left to sort
RDI_U64 bucket_base_idx = write_idx;
RDI_U64 max_size_left_to_sort = 0;
for(RDIM_BakeStringChunkNode *n = buckets[bucket_idx].first; n != 0; n = n->next)
{
rdim_memcpy(t->v+write_idx, n->v, sizeof(n->v[0])*n->count);
write_idx += n->count;
for(RDI_U64 idx = 0; idx < n->count; idx += 1)
{
if(n->v[idx].string.size > t->string_off+1)
{
max_size_left_to_sort = Max(max_size_left_to_sort, (n->v[idx].string.size - t->string_off+1));
}
}
}
// rjf: if any bucket has >1 element & has some amount of size left to sort, push new task for this
// bucket's region in the array, and for remainder of keys
if(buckets[bucket_idx].total_count > 1 && max_size_left_to_sort > 0)
{
SortTask *new_task = rdim_push_array(scratch.arena, SortTask, 1);
RDIM_SLLQueuePush(first_task, last_task, new_task);
new_task->string_off = t->string_off+1;
new_task->v = t->v + bucket_base_idx;
new_task->count = write_idx-bucket_base_idx;
}
}
}
}
scratch_end(scratch);
}
return dst;
}
//- rjf: bake string chunk list maps
RDI_PROC void
rdim_bake_string_chunk_list_map_insert(RDIM_Arena *arena, RDIM_BakeStringChunkListMapTopology *map_topology, RDIM_BakeStringChunkListMap *map, RDI_U64 chunk_cap, RDIM_String8 string)
{
RDI_U64 hash = rdi_hash(string.RDIM_String8_BaseMember, string.RDIM_String8_SizeMember);
RDI_U64 slot_idx = hash%map_topology->slots_count;
RDIM_BakeStringChunkList *slot = &map->slots[slot_idx];
RDI_S32 is_duplicate = 0;
for(RDIM_BakeStringChunkNode *n = slot->first; n != 0; n = n->next)
{
for(RDI_U64 idx = 0; idx < n->count; idx += 1)
{
if(rdim_str8_match(n->v[idx].string, string, 0))
{
is_duplicate = 1;
goto break_all;
}
}
}
break_all:;
if(!is_duplicate)
{
RDIM_BakeString *bstr = rdim_bake_string_chunk_list_push(arena, slot, chunk_cap);
bstr->string = string;
bstr->hash = hash;
}
}
RDI_PROC void
rdim_bake_string_chunk_list_map_join_in_place(RDIM_BakeStringChunkListMapTopology *map_topology, RDIM_BakeStringChunkListMap *dst, RDIM_BakeStringChunkListMap *src)
{
for(RDI_U64 idx = 0; idx < map_topology->slots_count; idx += 1)
{
rdim_bake_string_chunk_list_concat_in_place(&dst->slots[idx], &src->slots[idx]);
}
rdim_memzero_struct(src);
}
//- rjf: bake string map reading/writing
RDI_PROC RDI_U32
@@ -1224,7 +1385,7 @@ rdim_bake_idx_from_string(RDIM_BakeStringMap *map, RDIM_String8 string)
RDIM_BakeStringNode *node = 0;
for(RDIM_BakeStringNode *n = map->slots[slot_idx]; n != 0; n = n->hash_next)
{
if(n->hash == hash && rdim_str8_match(n->string, string, 0))
if(n->v.hash == hash && rdim_str8_match(n->v.string, string, 0))
{
node = n;
break;
@@ -1246,7 +1407,7 @@ rdim_bake_string_map_insert(RDIM_Arena *arena, RDIM_BakeStringMap *map, RDIM_Str
RDIM_BakeStringNode *node = 0;
for(RDIM_BakeStringNode *n = map->slots[slot_idx]; n != 0; n = n->hash_next)
{
if(n->hash == hash && rdim_str8_match(n->string, string, 0))
if(n->v.hash == hash && rdim_str8_match(n->v.string, string, 0))
{
node = n;
break;
@@ -1257,13 +1418,12 @@ rdim_bake_string_map_insert(RDIM_Arena *arena, RDIM_BakeStringMap *map, RDIM_Str
if(node == 0)
{
node = rdim_push_array(arena, RDIM_BakeStringNode, 1);
node->string = string;
node->hash = hash;
node->idx = map->count;
node->v.string = string;
node->v.hash = hash;
node->idx = map->count;
map->count += 1;
RDIM_SLLQueuePush_N(map->order_first, map->order_last, node, order_next);
RDIM_SLLStackPush_N(map->slots[slot_idx], node, hash_next);
map->slot_collision_count += (node->hash_next != 0);
}
// rjf: node -> index
@@ -3277,7 +3437,7 @@ rdim_bake_string_section_list_from_string_map(RDIM_Arena *arena, RDIM_BakeString
node != 0;
node = node->order_next)
{
off_cursor += node->string.size;
off_cursor += node->v.string.size;
*off_ptr = off_cursor;
off_ptr += 1;
}
@@ -3289,8 +3449,8 @@ rdim_bake_string_section_list_from_string_map(RDIM_Arena *arena, RDIM_BakeString
node != 0;
node = node->order_next)
{
rdim_memcpy(ptr, node->string.str, node->string.size);
ptr += node->string.size;
rdim_memcpy(ptr, node->v.string.str, node->v.string.size);
ptr += node->v.string.size;
}
}
rdim_bake_section_list_push_new(arena, &sections, str_offs, sizeof(RDI_U32)*(strings->count+1), RDI_DataSectionTag_StringTable, 0);
+47 -3
View File
@@ -840,13 +840,49 @@ struct RDIM_BakeSectionList
//- rjf: interned strings
typedef struct RDIM_BakeString RDIM_BakeString;
struct RDIM_BakeString
{
RDIM_String8 string;
RDI_U64 hash;
};
typedef struct RDIM_BakeStringChunkNode RDIM_BakeStringChunkNode;
struct RDIM_BakeStringChunkNode
{
RDIM_BakeStringChunkNode *next;
RDIM_BakeString *v;
RDI_U64 count;
RDI_U64 cap;
};
typedef struct RDIM_BakeStringChunkList RDIM_BakeStringChunkList;
struct RDIM_BakeStringChunkList
{
RDIM_BakeStringChunkNode *first;
RDIM_BakeStringChunkNode *last;
RDI_U64 chunk_count;
RDI_U64 total_count;
};
typedef struct RDIM_BakeStringChunkListMapTopology RDIM_BakeStringChunkListMapTopology;
struct RDIM_BakeStringChunkListMapTopology
{
RDI_U64 slots_count;
};
typedef struct RDIM_BakeStringChunkListMap RDIM_BakeStringChunkListMap;
struct RDIM_BakeStringChunkListMap
{
RDIM_BakeStringChunkList *slots;
};
typedef struct RDIM_BakeStringNode RDIM_BakeStringNode;
struct RDIM_BakeStringNode
{
RDIM_BakeStringNode *hash_next;
RDIM_BakeStringNode *order_next;
RDIM_String8 string;
RDI_U64 hash;
RDIM_BakeString v;
RDI_U32 idx;
};
@@ -857,7 +893,6 @@ struct RDIM_BakeStringMap
RDIM_BakeStringNode *order_last;
RDIM_BakeStringNode **slots;
RDI_U64 slots_count;
RDI_U64 slot_collision_count;
RDI_U32 count;
};
@@ -1096,6 +1131,15 @@ RDI_PROC RDIM_BakeVMap rdim_bake_vmap_from_markers(RDIM_Arena *arena, RDIM_VMapM
////////////////////////////////
//~ rjf: [Baking Helpers] Interned / Deduplicated Blob Data Structure Helpers
//- rjf: bake string chunk lists
RDI_PROC RDIM_BakeString *rdim_bake_string_chunk_list_push(RDIM_Arena *arena, RDIM_BakeStringChunkList *list, RDI_U64 cap);
RDI_PROC void rdim_bake_string_chunk_list_concat_in_place(RDIM_BakeStringChunkList *dst, RDIM_BakeStringChunkList *to_push);
RDI_PROC RDIM_BakeStringChunkList rdim_bake_string_chunk_list_sorted_from_unsorted(RDIM_Arena *arena, RDIM_BakeStringChunkList *src);
//- rjf: bake string chunk list maps
RDI_PROC void rdim_bake_string_chunk_list_map_insert(RDIM_Arena *arena, RDIM_BakeStringChunkListMapTopology *map_topology, RDIM_BakeStringChunkListMap *map, RDI_U64 chunk_cap, RDIM_String8 string);
RDI_PROC void rdim_bake_string_chunk_list_map_join_in_place(RDIM_BakeStringChunkListMapTopology *map_topology, RDIM_BakeStringChunkListMap *dst, RDIM_BakeStringChunkListMap *src);
//- rjf: bake string map reading/writing
RDI_PROC RDI_U32 rdim_bake_idx_from_string(RDIM_BakeStringMap *map, RDIM_String8 string);
RDI_PROC RDI_U32 rdim_bake_string_map_insert(RDIM_Arena *arena, RDIM_BakeStringMap *map, RDIM_String8 string);