From 66d5d93fc9a68fb0f58ddf23d8bdfd75e4549f7d Mon Sep 17 00:00:00 2001 From: Ryan Fleury Date: Wed, 23 Jul 2025 17:18:54 -0700 Subject: [PATCH] build path tree via non-case-mutated (non-normalized) src file paths in rdi build; still sanitize paths, but do not normalize them until we actually have to build the normalized file path map build --- project.4coder | 1 + src/base/base_strings.c | 3 +- src/lib_rdi_make/rdi_make.c | 38 ++++++++--- src/lib_rdi_make/rdi_make.h | 3 +- src/radbin/radbin.c | 2 +- src/rdi_from_dwarf/rdi_from_dwarf.c | 9 ++- src/rdi_from_pdb/rdi_from_pdb.c | 98 ++++++++++++++--------------- 7 files changed, 90 insertions(+), 64 deletions(-) diff --git a/project.4coder b/project.4coder index c47b6ec8..b5a61eb5 100644 --- a/project.4coder +++ b/project.4coder @@ -47,6 +47,7 @@ commands = { //- rjf: [raddbg] .f1 = { .win = "raddbg_stable --ipc kill_all && build raddbg telemetry", .linux = "", .out = "*compilation*", .footer_panel = true, .save_dirty_files = true, .cursor_at_end = false, }, + // .f1 = { .win = "raddbg_stable --ipc kill_all && build radbin", .linux = "", .out = "*compilation*", .footer_panel = true, .save_dirty_files = true, .cursor_at_end = false, }, //- rjf: [raddbg wsl] // .f1 = { .win = "wsl ./build.sh raddbg", .linux = "", .out = "*compilation*", .footer_panel = true, .save_dirty_files = true, .cursor_at_end = false, }, diff --git a/src/base/base_strings.c b/src/base/base_strings.c index 5088f164..36ed92c4 100644 --- a/src/base/base_strings.c +++ b/src/base/base_strings.c @@ -1473,7 +1473,8 @@ str8_path_list_resolve_dots_in_place(String8List *path, PathStyle style) } internal String8 -str8_path_list_join_by_style(Arena *arena, String8List *path, PathStyle style){ +str8_path_list_join_by_style(Arena *arena, String8List *path, PathStyle style) +{ StringJoin params = {0}; switch(style) { diff --git a/src/lib_rdi_make/rdi_make.c b/src/lib_rdi_make/rdi_make.c index 4af2d48c..f5c0c745 100644 --- a/src/lib_rdi_make/rdi_make.c +++ b/src/lib_rdi_make/rdi_make.c @@ -196,6 +196,21 @@ rdim_str8_match(RDIM_String8 a, RDIM_String8 b, RDIM_StringMatchFlags flags) return result; } +RDI_PROC RDIM_String8 +rdim_lower_from_str8(RDIM_Arena *arena, RDIM_String8 string) +{ + RDIM_String8 result = rdim_str8_copy(arena, string); + for(RDI_U64 idx = 0; idx < result.RDIM_String8_SizeMember; idx += 1) + { + RDI_U8 byte = result.RDIM_String8_BaseMember[idx]; + if('A' <= byte && byte <= 'Z') + { + result.RDIM_String8_BaseMember[idx] += ('a' - 'A'); + } + } + return result; +} + //- rjf: string lists RDI_PROC void @@ -1949,7 +1964,7 @@ rdim_bake_path_node_from_string(RDIM_BakePathTree *tree, RDIM_String8 string) RDIM_BakePathNode *sub_dir_node = 0; for(RDIM_BakePathNode *child = node->first_child; child != 0; child = child->next_sibling) { - if(rdim_str8_match(child->name, sub_dir, RDIM_StringMatchFlag_CaseInsensitive)) + if(rdim_str8_match(child->name, sub_dir, 0)) { sub_dir_node = child; } @@ -2022,7 +2037,7 @@ rdim_bake_path_tree_insert(RDIM_Arena *arena, RDIM_BakePathTree *tree, RDIM_Stri RDIM_BakePathNode *sub_dir_node = 0; for(RDIM_BakePathNode *child = node->first_child; child != 0; child = child->next_sibling) { - if(rdim_str8_match(child->name, sub_dir, RDIM_StringMatchFlag_CaseInsensitive)) + if(rdim_str8_match(child->name, sub_dir, 0)) { sub_dir_node = child; } @@ -2210,7 +2225,10 @@ rdim_bake_string_map_loose_push_src_file_slice(RDIM_Arena *arena, RDIM_BakeStrin { for(RDI_U64 idx = 0; idx < count; idx += 1) { - rdim_bake_string_map_loose_insert(arena, top, map, 1, v[idx].normal_full_path); + RDIM_Temp scratch = rdim_scratch_begin(&arena, 1); + RDIM_String8 normalized_path = rdim_lower_from_str8(scratch.arena, v[idx].path); + rdim_bake_string_map_loose_insert(arena, top, map, 1, normalized_path); + rdim_scratch_end(scratch); } } @@ -2437,8 +2455,11 @@ rdim_bake_name_map_from_kind_params(RDIM_Arena *arena, RDI_NameMapKind kind, RDI { for(RDI_U64 idx = 0; idx < n->count; idx += 1) { + RDIM_Temp scratch = rdim_scratch_begin(&arena, 1); RDI_U64 src_file_idx = rdim_idx_from_src_file(&n->v[idx]); - rdim_bake_name_map_push(arena, map, n->v[idx].normal_full_path, (RDI_U32)src_file_idx); // TODO(rjf): @u64_to_u32 + RDIM_String8 normalized_path = rdim_lower_from_str8(scratch.arena, n->v[idx].path); + rdim_bake_name_map_push(arena, map, normalized_path, (RDI_U32)src_file_idx); // TODO(rjf): @u64_to_u32 + rdim_scratch_end(scratch); } } }break; @@ -2545,7 +2566,7 @@ rdim_bake_path_tree_from_params(RDIM_Arena *arena, RDIM_BakeParams *params) { for(RDI_U64 idx = 0; idx < n->count; idx += 1) { - RDIM_BakePathNode *node = rdim_bake_path_tree_insert(arena, tree, n->v[idx].normal_full_path); + RDIM_BakePathNode *node = rdim_bake_path_tree_insert(arena, tree, n->v[idx].path); node->src_file = &n->v[idx]; } } @@ -3056,9 +3077,12 @@ rdim_bake_src_files(RDIM_Arena *arena, RDIM_BakeStringMapTight *strings, RDIM_Ba //////////////////////// //- rjf: fill file info // - dst_file->file_path_node_idx = rdim_bake_path_node_idx_from_string(path_tree, src_file->normal_full_path); - dst_file->normal_full_path_string_idx = rdim_bake_idx_from_string(strings, src_file->normal_full_path); + RDI_U64 scratch_pos_restore = rdim_arena_pos(scratch.arena); + RDIM_String8 normalized_path = rdim_lower_from_str8(scratch.arena, src_file->path); + dst_file->file_path_node_idx = rdim_bake_path_node_idx_from_string(path_tree, src_file->path); + dst_file->normal_full_path_string_idx = rdim_bake_idx_from_string(strings, normalized_path); dst_file->source_line_map_idx = (RDI_U32)(dst_map ? (dst_map - dst_maps) : 0); + rdim_arena_pop_to(scratch.arena, scratch_pos_restore); } } diff --git a/src/lib_rdi_make/rdi_make.h b/src/lib_rdi_make/rdi_make.h index a798af08..624b79c9 100644 --- a/src/lib_rdi_make/rdi_make.h +++ b/src/lib_rdi_make/rdi_make.h @@ -535,7 +535,7 @@ typedef struct RDIM_SrcFile RDIM_SrcFile; struct RDIM_SrcFile { struct RDIM_SrcFileChunkNode *chunk; - RDIM_String8 normal_full_path; + RDIM_String8 path; RDIM_SrcFileLineMapFragment *first_line_map_fragment; RDIM_SrcFileLineMapFragment *last_line_map_fragment; }; @@ -1411,6 +1411,7 @@ RDI_PROC RDI_S32 rdim_str8_match(RDIM_String8 a, RDIM_String8 b, RDIM_StringMatc #define rdim_str8_lit(S) rdim_str8((RDI_U8*)(S), sizeof(S) - 1) #define rdim_str8_struct(S) rdim_str8((RDI_U8*)(S), sizeof(*(S))) #define rdim_str8_struct_array(S, C) rdim_str8((RDI_U8*)(S), sizeof(*(S)) * (C)) +RDI_PROC RDIM_String8 rdim_lower_from_str8(RDIM_Arena *arena, RDIM_String8 string); //- rjf: string lists RDI_PROC void rdim_str8_list_push(RDIM_Arena *arena, RDIM_String8List *list, RDIM_String8 string); diff --git a/src/radbin/radbin.c b/src/radbin/radbin.c index 1106bb06..4a071a6f 100644 --- a/src/radbin/radbin.c +++ b/src/radbin/radbin.c @@ -762,7 +762,7 @@ rb_entry_point(CmdLine *cmdline) for(U64 idx = 0; idx < n->count; idx += 1) { U64 file_idx = rdim_idx_from_src_file(&n->v[idx]); - String8 src_path = n->v[idx].normal_full_path; + String8 src_path = n->v[idx].path; str8_list_pushf(arena, &dump, "FILE %I64u %S\n", file_idx, src_path); } } diff --git a/src/rdi_from_dwarf/rdi_from_dwarf.c b/src/rdi_from_dwarf/rdi_from_dwarf.c index 4bee8f72..a7c3703c 100644 --- a/src/rdi_from_dwarf/rdi_from_dwarf.c +++ b/src/rdi_from_dwarf/rdi_from_dwarf.c @@ -1238,12 +1238,11 @@ d2r_convert(Arena *arena, ASYNC_Root *async_root, D2R_ConvertParams *params) String8List file_path_split = str8_split_path(scratch.arena, file_path); str8_path_list_resolve_dots_in_place(&file_path_split, PathStyle_WindowsAbsolute); String8 file_path_resolved = str8_path_list_join_by_style(scratch.arena, &file_path_split, PathStyle_WindowsAbsolute); - String8 file_path_normalized = lower_from_str8(scratch.arena, file_path_resolved); - RDIM_SrcFile *src_file = hash_table_search_path_raw(source_file_ht, file_path_normalized); + RDIM_SrcFile *src_file = hash_table_search_path_raw(source_file_ht, file_path_resolved); if (src_file == 0) { - src_file = rdim_src_file_chunk_list_push(arena, &src_files, SRC_FILE_CAP); - src_file->normal_full_path = push_str8_copy(arena, file_path_normalized); - hash_table_push_path_raw(scratch.arena, source_file_ht, src_file->normal_full_path, src_file); + src_file = rdim_src_file_chunk_list_push(arena, &src_files, SRC_FILE_CAP); + src_file->path = push_str8_copy(arena, file_path_resolved); + hash_table_push_path_raw(scratch.arena, source_file_ht, src_file->path, src_file); } src_file_map[file_idx] = src_file; } diff --git a/src/rdi_from_pdb/rdi_from_pdb.c b/src/rdi_from_pdb/rdi_from_pdb.c index 93fc40f4..cd4ff652 100644 --- a/src/rdi_from_pdb/rdi_from_pdb.c +++ b/src/rdi_from_pdb/rdi_from_pdb.c @@ -525,30 +525,30 @@ ASYNC_WORK_DEF(p2r_gather_unit_src_file_work) lines_n != 0; lines_n = lines_n->next) { - // rjf: file name -> normalized file path + // rjf: file name -> sanitized file path String8 file_path = lines_n->v.file_name; - String8 file_path_normalized = lower_from_str8(scratch.arena, str8_skip_chop_whitespace(file_path)); + String8 file_path_sanitized = str8_copy(scratch.arena, str8_skip_chop_whitespace(file_path)); { - PathStyle file_path_normalized_style = path_style_from_str8(file_path_normalized); - String8List file_path_normalized_parts = str8_split_path(scratch.arena, file_path_normalized); - if(file_path_normalized_style == PathStyle_Relative) + PathStyle file_path_sanitized_style = path_style_from_str8(file_path_sanitized); + String8List file_path_sanitized_parts = str8_split_path(scratch.arena, file_path_sanitized); + if(file_path_sanitized_style == PathStyle_Relative) { String8List obj_folder_path_parts = str8_split_path(scratch.arena, obj_folder_path); - str8_list_concat_in_place(&obj_folder_path_parts, &file_path_normalized_parts); - file_path_normalized_parts = obj_folder_path_parts; - file_path_normalized_style = path_style_from_str8(obj_folder_path); + str8_list_concat_in_place(&obj_folder_path_parts, &file_path_sanitized_parts); + file_path_sanitized_parts = obj_folder_path_parts; + file_path_sanitized_style = path_style_from_str8(obj_folder_path); } - str8_path_list_resolve_dots_in_place(&file_path_normalized_parts, file_path_normalized_style); - file_path_normalized = str8_path_list_join_by_style(scratch.arena, &file_path_normalized_parts, file_path_normalized_style); + str8_path_list_resolve_dots_in_place(&file_path_sanitized_parts, file_path_sanitized_style); + file_path_sanitized = str8_path_list_join_by_style(scratch.arena, &file_path_sanitized_parts, file_path_sanitized_style); } - // rjf: normalized file path -> source file node - U64 file_path_normalized_hash = rdi_hash(file_path_normalized.str, file_path_normalized.size); - U64 hit_path_slot = file_path_normalized_hash%hit_path_slots_count; + // rjf: sanitized file path -> source file node + U64 file_path_sanitized_hash = rdi_hash(file_path_sanitized.str, file_path_sanitized.size); + U64 hit_path_slot = file_path_sanitized_hash%hit_path_slots_count; String8Node *hit_path_node = 0; for(String8Node *n = hit_path_slots[hit_path_slot]; n != 0; n = n->next) { - if(str8_match(n->string, file_path_normalized, 0)) + if(str8_match(n->string, file_path_sanitized, 0)) { hit_path_node = n; break; @@ -558,8 +558,8 @@ ASYNC_WORK_DEF(p2r_gather_unit_src_file_work) { hit_path_node = push_array(scratch.arena, String8Node, 1); SLLStackPush(hit_path_slots[hit_path_slot], hit_path_node); - hit_path_node->string = file_path_normalized; - str8_list_push(scratch.arena, &src_file_paths, push_str8_copy(arena, file_path_normalized)); + hit_path_node->string = file_path_sanitized; + str8_list_push(scratch.arena, &src_file_paths, push_str8_copy(arena, file_path_sanitized)); } } } @@ -788,32 +788,32 @@ ASYNC_WORK_DEF(p2r_unit_convert_work) { CV_C13LinesParsed *lines = &lines_n->v; - // rjf: file name -> normalized file path + // rjf: file name -> sanitized file path String8 file_path = lines->file_name; - String8 file_path_normalized = lower_from_str8(scratch.arena, str8_skip_chop_whitespace(file_path)); + String8 file_path_sanitized = str8_copy(scratch.arena, str8_skip_chop_whitespace(file_path)); { - PathStyle file_path_normalized_style = path_style_from_str8(file_path_normalized); - String8List file_path_normalized_parts = str8_split_path(scratch.arena, file_path_normalized); - if(file_path_normalized_style == PathStyle_Relative) + PathStyle file_path_sanitized_style = path_style_from_str8(file_path_sanitized); + String8List file_path_sanitized_parts = str8_split_path(scratch.arena, file_path_sanitized); + if(file_path_sanitized_style == PathStyle_Relative) { String8List obj_folder_path_parts = str8_split_path(scratch.arena, obj_folder_path); - str8_list_concat_in_place(&obj_folder_path_parts, &file_path_normalized_parts); - file_path_normalized_parts = obj_folder_path_parts; - file_path_normalized_style = path_style_from_str8(obj_folder_path); + str8_list_concat_in_place(&obj_folder_path_parts, &file_path_sanitized_parts); + file_path_sanitized_parts = obj_folder_path_parts; + file_path_sanitized_style = path_style_from_str8(obj_folder_path); } - str8_path_list_resolve_dots_in_place(&file_path_normalized_parts, file_path_normalized_style); - file_path_normalized = str8_path_list_join_by_style(scratch.arena, &file_path_normalized_parts, file_path_normalized_style); + str8_path_list_resolve_dots_in_place(&file_path_sanitized_parts, file_path_sanitized_style); + file_path_sanitized = str8_path_list_join_by_style(scratch.arena, &file_path_sanitized_parts, file_path_sanitized_style); } - // rjf: normalized file path -> source file node - U64 file_path_normalized_hash = rdi_hash(file_path_normalized.str, file_path_normalized.size); - U64 src_file_slot = file_path_normalized_hash%in->src_file_map->slots_count; + // rjf: sanitized file path -> source file node + U64 file_path_sanitized_hash = rdi_hash(file_path_sanitized.str, file_path_sanitized.size); + U64 src_file_slot = file_path_sanitized_hash%in->src_file_map->slots_count; P2R_SrcFileNode *src_file_node = 0; if(lines->line_count != 0) { for(P2R_SrcFileNode *n = in->src_file_map->slots[src_file_slot]; n != 0; n = n->next) { - if(str8_match(n->src_file->normal_full_path, file_path_normalized, 0)) + if(str8_match(n->src_file->path, file_path_sanitized, 0)) { src_file_node = n; break; @@ -984,30 +984,30 @@ ASYNC_WORK_DEF(p2r_unit_convert_work) seq_file_name = pdb_strtbl_string_from_off(in->pdb_strtbl, name_off); } - // rjf: file name -> normalized file path + // rjf: file name -> sanitized file path String8 file_path = seq_file_name; - String8 file_path_normalized = lower_from_str8(scratch.arena, str8_skip_chop_whitespace(file_path)); + String8 file_path_sanitized = str8_copy(scratch.arena, str8_skip_chop_whitespace(file_path)); { - PathStyle file_path_normalized_style = path_style_from_str8(file_path_normalized); - String8List file_path_normalized_parts = str8_split_path(scratch.arena, file_path_normalized); - if(file_path_normalized_style == PathStyle_Relative) + PathStyle file_path_sanitized_style = path_style_from_str8(file_path_sanitized); + String8List file_path_sanitized_parts = str8_split_path(scratch.arena, file_path_sanitized); + if(file_path_sanitized_style == PathStyle_Relative) { String8List obj_folder_path_parts = str8_split_path(scratch.arena, obj_folder_path); - str8_list_concat_in_place(&obj_folder_path_parts, &file_path_normalized_parts); - file_path_normalized_parts = obj_folder_path_parts; - file_path_normalized_style = path_style_from_str8(obj_folder_path); + str8_list_concat_in_place(&obj_folder_path_parts, &file_path_sanitized_parts); + file_path_sanitized_parts = obj_folder_path_parts; + file_path_sanitized_style = path_style_from_str8(obj_folder_path); } - str8_path_list_resolve_dots_in_place(&file_path_normalized_parts, file_path_normalized_style); - file_path_normalized = str8_path_list_join_by_style(scratch.arena, &file_path_normalized_parts, file_path_normalized_style); + str8_path_list_resolve_dots_in_place(&file_path_sanitized_parts, file_path_sanitized_style); + file_path_sanitized = str8_path_list_join_by_style(scratch.arena, &file_path_sanitized_parts, file_path_sanitized_style); } - // rjf: normalized file path -> source file node - U64 file_path_normalized_hash = rdi_hash(file_path_normalized.str, file_path_normalized.size); - U64 src_file_slot = file_path_normalized_hash%in->src_file_map->slots_count; + // rjf: sanitized file path -> source file node + U64 file_path_sanitized_hash = rdi_hash(file_path_sanitized.str, file_path_sanitized.size); + U64 src_file_slot = file_path_sanitized_hash%in->src_file_map->slots_count; P2R_SrcFileNode *src_file_node = 0; for(P2R_SrcFileNode *n = in->src_file_map->slots[src_file_slot]; n != 0; n = n->next) { - if(str8_match(n->src_file->normal_full_path, file_path_normalized, 0)) + if(str8_match(n->src_file->path, file_path_sanitized, 0)) { src_file_node = n; break; @@ -3518,13 +3518,13 @@ p2r_convert(Arena *arena, ASYNC_Root *async_root, P2R_ConvertParams *in) { for EachIndex(path_idx, tasks_outputs[idx].src_file_paths.count) { - String8 file_path_normalized = tasks_outputs[idx].src_file_paths.v[path_idx]; - U64 file_path_normalized_hash = rdi_hash(file_path_normalized.str, file_path_normalized.size); - U64 src_file_slot = file_path_normalized_hash%src_file_map.slots_count; + String8 file_path_sanitized = tasks_outputs[idx].src_file_paths.v[path_idx]; + U64 file_path_sanitized_hash = rdi_hash(file_path_sanitized.str, file_path_sanitized.size); + U64 src_file_slot = file_path_sanitized_hash%src_file_map.slots_count; P2R_SrcFileNode *src_file_node = 0; for(P2R_SrcFileNode *n = src_file_map.slots[src_file_slot]; n != 0; n = n->next) { - if(str8_match(n->src_file->normal_full_path, file_path_normalized, 0)) + if(str8_match(n->src_file->path, file_path_sanitized, 0)) { src_file_node = n; break; @@ -3535,7 +3535,7 @@ p2r_convert(Arena *arena, ASYNC_Root *async_root, P2R_ConvertParams *in) src_file_node = push_array(scratch.arena, P2R_SrcFileNode, 1); SLLStackPush(src_file_map.slots[src_file_slot], src_file_node); src_file_node->src_file = rdim_src_file_chunk_list_push(arena, &all_src_files__sequenceless, total_path_count); - src_file_node->src_file->normal_full_path = push_str8_copy(arena, file_path_normalized); + src_file_node->src_file->path = push_str8_copy(arena, file_path_sanitized); } } }