From eba7500f733fcba26c50d594e1a2775df498752d Mon Sep 17 00:00:00 2001 From: Nikita Smith Date: Wed, 30 Oct 2024 14:36:11 -0700 Subject: [PATCH] /rad_pdb_hash_type_names:lenient Less aggressive optimization for type names. When lenient enabled, linker replaces only mangled names and compiler generated lambda types. --- src/linker/lnk.c | 4 +- src/linker/lnk_config.c | 38 ++++++++- src/linker/lnk_config.h | 19 +++-- src/linker/lnk_debug_info.c | 151 +++++++++++++++++++++++++++--------- src/linker/lnk_debug_info.h | 2 +- 5 files changed, 167 insertions(+), 47 deletions(-) diff --git a/src/linker/lnk.c b/src/linker/lnk.c index 324c4013..6bc79623 100644 --- a/src/linker/lnk.c +++ b/src/linker/lnk.c @@ -4132,8 +4132,8 @@ l.count += 1; \ if (config->debug_mode == LNK_DebugMode_Full || config->debug_mode == LNK_DebugMode_GHash) { lnk_timer_begin(LNK_Timer_Pdb); - if (config->pdb_hash_type_names == LNK_SwitchState_Yes) { - lnk_replace_type_names_with_hashes(tp, tp_arena, types[CV_TypeIndexSource_TPI], config->pdb_hash_type_name_length, config->pdb_hash_type_name_map); + if (config->pdb_hash_type_names != LNK_TypeNameHashMode_Null && config->pdb_hash_type_names != LNK_TypeNameHashMode_None) { + lnk_replace_type_names_with_hashes(tp, tp_arena, types[CV_TypeIndexSource_TPI], config->pdb_hash_type_names, config->pdb_hash_type_name_length, config->pdb_hash_type_name_map); } String8List pdb_data = lnk_build_pdb(tp, diff --git a/src/linker/lnk_config.c b/src/linker/lnk_config.c index f314f3b0..4b9f9426 100644 --- a/src/linker/lnk_config.c +++ b/src/linker/lnk_config.c @@ -137,7 +137,7 @@ read_only struct { LNK_CmdSwitch_Rad_MtPath, "RAD_MT_PATH", ":EXEPATH", "Path to manifest tool." }, { LNK_CmdSwitch_Rad_OsVer, "RAD_OS_VER", ":##,##", "" }, { LNK_CmdSwitch_Rad_PageSize, "RAD_PAGE_SIZE", ":#", "Must be power of two." }, - { LNK_CmdSwitch_Rad_PdbHashTypeNames, "RAD_PDB_HASH_TYPE_NAMES", ":[NO]", "Replace type names in LF_STRUCTURE and LF_CLASS with hashes." }, + { LNK_CmdSwitch_Rad_PdbHashTypeNames, "RAD_PDB_HASH_TYPE_NAMES", ":{NONE|LENIENT|FULL}", "Replace type names in LF_STRUCTURE and LF_CLASS with hashes." }, { LNK_CmdSwitch_Rad_PdbHashTypeNameMap, "RAD_PDB_HASH_TYPE_NAME_MAP", ":FILENAME", "Produce map file with hash -> type name mappings." }, { LNK_CmdSwitch_Rad_PdbHashTypeNameLength, "RAD_PDB_HASH_TYPE_NAME_LENGTH", ":#", "Number of hash bytes to use to replace type name. Default 8 bytes (Max 16)." }, { LNK_CmdSwitch_Rad_PathStyle, "RAD_PATH_STYLE", ":{WindowsAbsolute|UnixAbsolute}", "" }, @@ -227,6 +227,27 @@ lnk_debug_mode_from_string(String8 string) return LNK_DebugMode_Null; } +read_only struct +{ + char *name; + LNK_TypeNameHashMode mode; +} g_type_name_hash_mode_map[] = { + { "none", LNK_TypeNameHashMode_None }, + { "lenient", LNK_TypeNameHashMode_Lenient }, + { "full", LNK_TypeNameHashMode_Full } +}; + +internal LNK_TypeNameHashMode +lnk_type_name_hash_mode_from_string(String8 string) +{ + for (U64 i = 0; i < ArrayCount(g_type_name_hash_mode_map); ++i) { + if (str8_match(str8_cstring(g_type_name_hash_mode_map[i].name), string, StringMatchFlag_CaseInsensitive)) { + return g_type_name_hash_mode_map[i].mode; + } + } + return LNK_TypeNameHashMode_Null; +} + //////////////////////////////// internal LNK_CmdOption * @@ -928,6 +949,7 @@ lnk_config_from_cmd_line(Arena *arena, String8List raw_cmd_line) config->heap_commit = KB(1); config->stack_reserve = MB(1); config->stack_commit = KB(1); + config->pdb_hash_type_names = LNK_TypeNameHashMode_None; config->pdb_hash_type_name_length = 8; // process command line switches @@ -1590,7 +1612,19 @@ lnk_config_from_cmd_line(Arena *arena, String8List raw_cmd_line) } break; case LNK_CmdSwitch_Rad_PdbHashTypeNames: { - lnk_cmd_switch_parse_flag(cmd->value_strings, cmd_switch, &config->pdb_hash_type_names); + String8 mode_string = str8_list_first(&cmd->value_strings); + + LNK_TypeNameHashMode mode; + if (mode_string.size == 0) { + config->pdb_hash_type_names = LNK_TypeNameHashMode_Lenient; + } else { + mode = lnk_type_name_hash_mode_from_string(mode_string); + if (mode == LNK_TypeNameHashMode_Null) { + lnk_error_cmd_switch(LNK_Error_Cmdl, cmd_switch, "unknown parameter: \"%S\"", mode_string); + } else { + config->pdb_hash_type_names = mode; + } + } } break; case LNK_CmdSwitch_Rad_PdbHashTypeNameMap: { diff --git a/src/linker/lnk_config.h b/src/linker/lnk_config.h index 0ae45b89..1637d576 100644 --- a/src/linker/lnk_config.h +++ b/src/linker/lnk_config.h @@ -226,6 +226,14 @@ typedef enum Lnk_DebugInfoGuid_ImageBlake3, } LNK_DebugInfoGuidType; +typedef enum +{ + LNK_TypeNameHashMode_Null, + LNK_TypeNameHashMode_None, + LNK_TypeNameHashMode_Lenient, + LNK_TypeNameHashMode_Full, +} LNK_TypeNameHashMode; + typedef struct LNK_Config { LNK_ConfigFlags flags; @@ -274,7 +282,7 @@ typedef struct LNK_Config String8 pdb_name; String8 pdb_alt_path; String8 mt_path; - LNK_SwitchState pdb_hash_type_names; + LNK_TypeNameHashMode pdb_hash_type_names; String8 pdb_hash_type_name_map; U64 pdb_hash_type_name_length; String8List input_list[LNK_Input_Count]; @@ -436,10 +444,11 @@ typedef enum //////////////////////////////// // Enum <-> String -internal String8 lnk_string_cmd_switch_type(LNK_CmdSwitchType type); -internal LNK_CmdSwitchType lnk_cmd_switch_from_string(String8 string); -internal LNK_InputType lnk_input_type_from_string(String8 string); -internal LNK_DebugMode lnk_debug_mode_from_string(String8 string); +internal String8 lnk_string_cmd_switch_type(LNK_CmdSwitchType type); +internal LNK_CmdSwitchType lnk_cmd_switch_from_string(String8 string); +internal LNK_InputType lnk_input_type_from_string(String8 string); +internal LNK_DebugMode lnk_debug_mode_from_string(String8 string); +internal LNK_TypeNameHashMode lnk_type_name_hash_mode_from_string(String8 string); //////////////////////////////// // Command Line Helpers diff --git a/src/linker/lnk_debug_info.c b/src/linker/lnk_debug_info.c index 703d7450..f6fb0ed3 100644 --- a/src/linker/lnk_debug_info.c +++ b/src/linker/lnk_debug_info.c @@ -2493,7 +2493,84 @@ lnk_format_u128(U8 *buf, U64 buf_max, U64 length, U128 v) } internal -THREAD_POOL_TASK_FUNC(lnk_replace_type_names_with_hashes_task) +THREAD_POOL_TASK_FUNC(lnk_replace_type_names_with_hashes_lenient_task) +{ + ProfBeginFunction(); + + LNK_TypeNameReplacer *task = raw_task; + Rng1U64 range = task->ranges[task_id]; + CV_DebugT debug_t = task->debug_t; + U64 hash_length = task->hash_length; + + B32 make_map = task->make_map; + Arena *map_arena = 0; + String8List *map = 0; + if (make_map) { + map_arena = task->map_arena->v[task_id]; + map = &task->maps[task_id]; + } + + U64 hash_max_chars = hash_length; + U8 temp[128]; + + for (U64 leaf_idx = range.min; leaf_idx < range.max; ++leaf_idx) { + CV_Leaf leaf = cv_debug_t_get_leaf(debug_t, leaf_idx); + if (leaf.kind == CV_LeafKind_STRUCTURE || leaf.kind == CV_LeafKind_CLASS) { + CV_UDTInfo udt_info = cv_get_udt_info(leaf.kind, leaf.data); + + if (udt_info.props & CV_TypeProp_HasUniqueName && udt_info.unique_name.size > hash_max_chars) { + // hash unique name + U128 name_hash; + blake3_hasher hasher; blake3_hasher_init(&hasher); + blake3_hasher_update(&hasher, udt_info.unique_name.str, udt_info.unique_name.size); + blake3_hasher_finalize(&hasher, (U8*)&name_hash, sizeof(name_hash)); + + // emit hash -> unique name map + if (make_map) { + lnk_format_u128(temp, sizeof(temp), hash_length, name_hash); + str8_list_pushf(map_arena, map, "%s %.*s\n", temp, str8_varg(udt_info.unique_name)); + } + + // parse leaf size + CV_NumericParsed dummy; + U64 numeric_size = cv_read_numeric(leaf.data, sizeof(CV_LeafStruct), &dummy); + + U64 colon_pos = str8_find_needle_reverse(udt_info.name, 0, str8_lit("size = sizeof(CV_LeafKind) + sizeof(CV_LeafStruct) + numeric_size + udt_info.name.size + 1; + + // discard unique name + CV_LeafStruct *lf = (CV_LeafStruct *)(header + 1); + lf->props &= ~CV_TypeProp_HasUniqueName; + } else { + // replace uniuqe type name with hash + udt_info.unique_name.str = udt_info.name.str + udt_info.name.size + 1; + udt_info.unique_name.size = lnk_format_u128(udt_info.unique_name.str, udt_info.unique_name.size, hash_length, name_hash); + + // update leaf header + CV_LeafHeader *header = cv_debug_t_get_leaf_header(debug_t, leaf_idx); + header->size = sizeof(CV_LeafKind) + + sizeof(CV_LeafStruct) + + numeric_size + + udt_info.name.size + 1 + + udt_info.unique_name.size + 1; + } + } + } + } + + ProfEnd(); +} + +internal +THREAD_POOL_TASK_FUNC(lnk_replace_type_names_with_hashes_full_task) { ProfBeginFunction(); @@ -2518,54 +2595,39 @@ THREAD_POOL_TASK_FUNC(lnk_replace_type_names_with_hashes_task) if (leaf.kind == CV_LeafKind_STRUCTURE || leaf.kind == CV_LeafKind_CLASS) { CV_UDTInfo udt_info = cv_get_udt_info(leaf.kind, leaf.data); - CV_NumericParsed dummy; - U64 numeric_size = cv_read_numeric(leaf.data, sizeof(CV_LeafStruct), &dummy); - - U128 name_hash; - if (udt_info.props & CV_TypeProp_HasUniqueName) { - blake3_hasher hasher; blake3_hasher_init(&hasher); - blake3_hasher_update(&hasher, udt_info.unique_name.str, udt_info.unique_name.size); - blake3_hasher_finalize(&hasher, (U8*)&name_hash, sizeof(name_hash)); - - if (make_map) { - lnk_format_u128(temp, sizeof(temp), hash_length, name_hash); - str8_list_pushf(map_arena, map, "%s %.*s\n", temp, str8_varg(udt_info.unique_name)); + if (udt_info.name.size > hash_max_chars) { + // pick name to hash + String8 name; + if (udt_info.props & CV_TypeProp_HasUniqueName) { + name = udt_info.unique_name; + } else { + name = udt_info.name; } - } else { + + // hash name + U128 name_hash; blake3_hasher hasher; blake3_hasher_init(&hasher); blake3_hasher_update(&hasher, udt_info.name.str, udt_info.name.size); blake3_hasher_finalize(&hasher, (U8*)&name_hash, sizeof(name_hash)); + // emit hash -> name map if (make_map) { lnk_format_u128(temp, sizeof(temp), hash_length, name_hash); - str8_list_pushf(map_arena, map, "%s %.*s\n", temp, str8_varg(udt_info.name)); + str8_list_pushf(map_arena, map, "%s %.*s\n", temp, str8_varg(name)); } - } - if (udt_info.name.size < hash_max_chars) { - - U64 buf_size = sizeof(CV_LeafHeader) + sizeof(CV_LeafStruct) + numeric_size + (hash_max_chars + 1) * 2; - U8 *buf = push_array(arena, U8, buf_size); - - MemoryCopy(buf + sizeof(CV_LeafHeader), leaf.data.str, sizeof(CV_LeafStruct) + numeric_size); - - CV_LeafStruct *lf = (CV_LeafStruct *)(buf + sizeof(CV_LeafHeader)); - lf->props &= ~CV_TypeProp_HasUniqueName; - - udt_info.name.str = buf + sizeof(CV_LeafHeader) + sizeof(CV_LeafStruct) + numeric_size; - udt_info.name.size = lnk_format_u128(udt_info.name.str, hash_max_chars + 1, hash_length, name_hash); - - CV_LeafHeader *new_header = (CV_LeafHeader *)buf; - new_header->size = sizeof(CV_LeafKind) + sizeof(CV_LeafStruct) + numeric_size + udt_info.name.size + 1; - new_header->kind = leaf.kind; - - debug_t.v[leaf_idx] = buf; - } else { + // replace name with hash udt_info.name.size = lnk_format_u128(udt_info.name.str, udt_info.name.size, hash_length, name_hash); + // parse struct size + CV_NumericParsed dummy; + U64 numeric_size = cv_read_numeric(leaf.data, sizeof(CV_LeafStruct), &dummy); + + // update header CV_LeafHeader *header = cv_debug_t_get_leaf_header(debug_t, leaf_idx); header->size = sizeof(CV_LeafKind) + sizeof(CV_LeafStruct) + numeric_size + udt_info.name.size + 1; + // discard unique name CV_LeafStruct *lf = (CV_LeafStruct *)(header + 1); lf->props &= ~CV_TypeProp_HasUniqueName; } @@ -2576,23 +2638,38 @@ THREAD_POOL_TASK_FUNC(lnk_replace_type_names_with_hashes_task) } internal void -lnk_replace_type_names_with_hashes(TP_Context *tp, TP_Arena *arena, CV_DebugT debug_t, U64 hash_length, String8 map_name) +lnk_replace_type_names_with_hashes(TP_Context *tp, TP_Arena *arena, CV_DebugT debug_t, LNK_TypeNameHashMode mode, U64 hash_length, String8 map_name) { ProfBeginFunction(); Temp scratch = scratch_begin(arena->v, arena->count); + // init task context LNK_TypeNameReplacer task = {0}; task.debug_t = debug_t; task.ranges = tp_divide_work(scratch.arena, debug_t.count, tp->worker_count); task.hash_length = Clamp(1, hash_length, 16); + if (map_name.size > 0) { task.make_map = 1; task.map_arena = tp_arena_alloc(tp); task.maps = push_array(scratch.arena, String8List, tp->worker_count); } - tp_for_parallel(tp, arena, tp->worker_count, lnk_replace_type_names_with_hashes_task, &task); + // pick task function + TP_TaskFunc *func = 0; + switch (mode) { + case LNK_TypeNameHashMode_Null: + case LNK_TypeNameHashMode_None: + break; + case LNK_TypeNameHashMode_Lenient: func = lnk_replace_type_names_with_hashes_lenient_task; break; + case LNK_TypeNameHashMode_Full: func = lnk_replace_type_names_with_hashes_full_task; break; + } + + // run task + tp_for_parallel(tp, arena, tp->worker_count, func, &task); + + // optionally write out map file if (task.make_map) { String8List map = {0}; str8_list_concat_in_place_array(&map, task.maps, tp->worker_count); diff --git a/src/linker/lnk_debug_info.h b/src/linker/lnk_debug_info.h index 8f9c1fa7..39abcc79 100644 --- a/src/linker/lnk_debug_info.h +++ b/src/linker/lnk_debug_info.h @@ -534,7 +534,7 @@ internal void lnk_patch_leaves(TP_Context *tp, LNK_CodeViewInput internal String8Node * lnk_copy_raw_leaf_arr_to_type_server(TP_Context *tp, CV_DebugT types, PDB_TypeServer *type_server); internal CV_DebugT * lnk_import_types(TP_Context *tp, TP_Arena *tp_temp, LNK_CodeViewInput *input); -internal void lnk_replace_type_names_with_hashes(TP_Context *tp, TP_Arena *arena, CV_DebugT debug_t, U64 hash_length, String8 map_name); +internal void lnk_replace_type_names_with_hashes(TP_Context *tp, TP_Arena *arena, CV_DebugT debug_t, LNK_TypeNameHashMode mode, U64 hash_length, String8 map_name); //////////////////////////////// // RAD Debug info