diff --git a/build.bat b/build.bat index 6529d9e5..f77e5df9 100644 --- a/build.bat +++ b/build.bat @@ -107,6 +107,7 @@ if not "%no_meta%"=="1" ( pushd build if "%raddbg%"=="1" set didbuild=1 && %compile% ..\src\raddbg\raddbg_main.c %compile_link% %link_icon% %out%raddbg.exe || exit /b 1 if "%radlink%"=="1" set didbuild=1 && %compile% ..\src\linker\lnk.c %compile_link% %link_natvis%"%~dp0\src\linker\linker.natvis" %out%radlink.exe || exit /b 1 +if "%radcon%"=="1" set didbuild=1 && %compile% ..\src\radcon\radcon_main.c %compile_link% %out%radcon.exe || exit /b 1 if "%raddump%"=="1" set didbuild=1 && %compile% ..\src\raddump\raddump_main.c %compile_link% %out%raddump.exe || exit /b 1 if "%rdi_from_pdb%"=="1" set didbuild=1 && %compile% ..\src\rdi_from_pdb\rdi_from_pdb_main.c %compile_link% %out%rdi_from_pdb.exe || exit /b 1 if "%rdi_from_dwarf%"=="1" set didbuild=1 && %compile% ..\src\rdi_from_dwarf\rdi_from_dwarf_main.c %compile_link% %out%rdi_from_dwarf.exe || exit /b 1 diff --git a/src/radcon/radcon.c b/src/radcon/radcon.c new file mode 100644 index 00000000..e7c6decf --- /dev/null +++ b/src/radcon/radcon.c @@ -0,0 +1,454 @@ +internal String8 +rc_data_from_file_path(Arena *arena, String8 path) +{ + String8 data = os_data_from_file_path(arena, path); + if (data.size == 0) { + fprintf(stderr, "error: unable to read file %.*s\n", str8_varg(path)); + os_abort(1); + } + return data; +} + +internal RC_Context +rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) +{ + Temp scratch = scratch_begin(&arena, 1); + + if (cmdl->inputs.node_count > 2) { + fprintf(stderr, "error: too many input files on the command line.\n"); + os_abort(1); + } + + B32 is_pe_present = 0; + B32 is_pdb_present = 0; + B32 is_elf_present = 0; + B32 is_elf_debug_present = 0; + String8 pe_name = {0}; + String8 pe_data = {0}; + String8 pdb_name = {0}; + String8 pdb_data = {0}; + String8 elf_name = {0}; + String8 elf_data = {0}; + String8 elf_debug_name = {0}; + String8 elf_debug_data = {0}; + + // + // Set typed inputs + // + if (cmd_line_has_flag(cmdl, str8_lit("pe"))) { + pe_name = cmd_line_string(cmdl, str8_lit("pe")); + pe_data = rc_data_from_file_path(arena, pe_name); + if (!pe_check_magic(pe_data)) { + fprintf(stderr, "error: -pe:%.*s is not of PE format\n", str8_varg(pe_name)); + os_abort(1); + } + is_pe_present = 1; + } + if (cmd_line_has_flag(cmdl, str8_lit("pdb"))) { + pdb_name = cmd_line_string(cmdl, str8_lit("pdb")); + pdb_data = rc_data_from_file_path(arena, pdb_name); + if (!msf_check_magic_20(pdb_data) && !msf_check_magic_70(pdb_data)) { + fprintf(stderr, "error: -pdb:%.*s is not of PDB format\n", str8_varg(pdb_name)); + os_abort(1); + } + is_pdb_present = 1; + } + if (cmd_line_has_flag(cmdl, str8_lit("elf"))) { + elf_name = cmd_line_string(cmdl, str8_lit("elf")); + elf_data = rc_data_from_file_path(arena, elf_name); + if (!elf_check_magic(elf_data)) { + fprintf(stderr, "error: -elf:%.*s is not of ELF format\n", str8_varg(elf_name)); + os_abort(1); + } + is_elf_present = 1; + } + if (cmd_line_has_flag(cmdl, str8_lit("elf_debug"))) { + elf_debug_name = cmd_line_string(cmdl, str8_lit("elf_debug")); + elf_debug_data = rc_data_from_file_path(arena, elf_debug_name); + if (!elf_check_magic(elf_debug_data)) { + fprintf(stderr, "error: -elf_debug:%.*s is not of ELF format\n", str8_varg(elf_debug_name)); + os_abort(1); + } + is_elf_debug_present = 1; + } + + // + // Load inputs + // + for (String8Node *input_n = cmdl->inputs.first; input_n != 0; input_n = input_n->next) { + String8 input_data = os_data_from_file_path(arena, input_n->string); + + if (input_data.size == 0) { + fprintf(stderr, "unable to read input %.*s\n", str8_varg(input_n->string)); + os_abort(1); + } + + if (pe_check_magic(input_data)) { + if (is_pe_present) { + fprintf(stderr, "error: too many PE files are specified on the command line\n"); + fprintf(stderr, " selected: %.*s\n", str8_varg(pe_name)); + fprintf(stderr, " current: %.*s\n", str8_varg(input_n->string)); + os_abort(1); + } + pe_data = input_data; + pe_name = input_n->string; + is_pe_present = 1; + } else if (elf_check_magic(input_data)) { + ELF_BinInfo elf = elf_bin_from_data(input_data); + B32 is_dwarf_present = dw_is_dwarf_present_elf_section_table(input_data, &elf); + if (is_dwarf_present) { + if (is_elf_debug_present) { + fprintf(stderr, "error: ambiguous input, both ELFs have DWARF debug sections, please use --elf: --elf_debug: to clarify inputs.\n"); + os_abort(1); + } + elf_debug_name = input_n->string; + elf_debug_data = input_data; + is_elf_debug_present = 1; + } else { + elf_name = input_n->string; + elf_data = input_data; + is_elf_present = 1; + } + } else if (msf_check_magic_20(input_data) || msf_check_magic_70(input_data)) { + if (is_pdb_present) { + fprintf(stderr, "error: too many PDB files are specified on the command line\n"); + fprintf(stderr, " selected: %.*s\n", str8_varg(pdb_name)); + fprintf(stderr, " current: %.*s\n", str8_varg(input_n->string)); + continue; + } + pdb_name = input_n->string; + pdb_data = input_data; + is_pdb_present = 1; + } else { + fprintf(stderr, "error: unknown file format %.*s\n", str8_varg(input_n->string)); + } + } + + // + // Validate input combos + // + if ((is_pe_present || is_pdb_present) && (is_elf_present || is_elf_debug_present)) { + fprintf(stderr, "error: invalid combination of inputs provided, we convert only (PE|PDB) or (ELF|ELF_DEBUG) at a time.\n"); + if (is_pe_present) { + fprintf(stderr, " PE: %.*s\n", str8_varg(pe_name)); + } + if (is_pdb_present) { + fprintf(stderr, " PDB: %.*s\n", str8_varg(pdb_name)); + } + if (is_elf_present) { + fprintf(stderr, " ELF: %.*s\n", str8_varg(elf_name)); + } + if (is_elf_debug_present) { + fprintf(stderr, " ELF Debug: %.*s\n", str8_varg(elf_debug_name)); + } + os_abort(1); + } + + // + // Pick conversion driver + // + RC_Driver driver = RC_Driver_Null; + if (cmd_line_has_flag(cmdl, str8_lit("driver"))) { + String8 driver_name = cmd_line_string(cmdl, str8_lit("driver")); + if (str8_match(driver_name, str8_lit("dwarf"), StringMatchFlag_CaseInsensitive)) { + driver = RC_Driver_Dwarf; + } else if (str8_match(driver_name, str8_lit("pdb"), StringMatchFlag_CaseInsensitive)) { + driver = RC_Driver_Pdb; + } else { + fprintf(stderr, "error: unknown driver %.*s\n", str8_varg(driver_name)); + os_abort(1); + } + } + + ImageType image = Image_Null; + String8 image_name = {0}; + String8 image_data = {0}; + String8 debug_name = {0}; + String8 debug_data = {0}; + + // + // Input is PE/COFF + // + B32 check_guid = 0; + Guid pe_pdb_guid = {0}; + if (is_pe_present) { + PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, pe_data); + + String8 raw_debug_dir = str8_substr(pe_data, pe.data_dir_franges[PE_DataDirectoryIndex_DEBUG]); + PE_DebugInfoList debug_dir = pe_parse_debug_directory(scratch.arena, pe_data, raw_debug_dir); + for (PE_DebugInfoNode *debug_n = debug_dir.first; debug_n != 0; debug_n = debug_n->next) { + PE_DebugInfo *debug = &debug_n->v; + if (debug->header.type == PE_DebugDirectoryType_CODEVIEW) { + if (debug->u.codeview.magic == PE_CODEVIEW_PDB70_MAGIC) { + check_guid = 1; + pe_pdb_guid = debug->u.codeview.pdb70.header.guid; + + if (!is_pdb_present) { + pdb_name = debug->u.codeview.pdb70.path; + pdb_data = rc_data_from_file_path(arena, pdb_name); + is_pdb_present = 1; + } + + break; + } + } + } + + if (driver == RC_Driver_Dwarf || driver == RC_Driver_Null) { + String8 raw_sections = str8_substr(pe_data, rng_1u64(pe.section_array_off, pe.section_array_off+sizeof(COFF_SectionHeader)*pe.section_count)); + U64 section_count = raw_sections.size / sizeof(COFF_SectionHeader); + COFF_SectionHeader *section_array = (COFF_SectionHeader *)raw_sections.str; + if (dw_is_dwarf_present_coff_section_table(pe_data, pe.string_table_off, section_count, section_array)) { + driver = RC_Driver_Dwarf; + image = Image_CoffPe; + image_name = pe_name; + image_data = pe_data; + debug_name = pe_name; + debug_data = pe_data; + goto driver_found; + } else { + if (driver == RC_Driver_Dwarf) { + fprintf(stderr, "error: image doesn't have DWARF debug sections.\n"); + os_abort(1); + } + } + } + } + + + // + // Input is PDB + // + if (driver == RC_Driver_Null && is_pdb_present) { + if (is_pe_present) { + image = Image_CoffPe; + image_name = pe_name; + image_data = pe_data; + } + driver = RC_Driver_Pdb; + debug_name = pdb_name; + debug_data = pdb_data; + goto driver_found; + } + + B32 elf_has_debug_link = 0; + ELF_GnuDebugLink debug_link = {0}; + if (is_elf_present || is_elf_debug_present) { + if (driver != RC_Driver_Null && driver != RC_Driver_Dwarf) { + fprintf(stderr, "ELF inputs are only supported when using DWARF driver.\n"); + os_abort(1); + } + + // + // Load image ELF + // + ELF_BinInfo elf = elf_bin_from_data(elf_data); + B32 has_elf_dwarf = dw_is_dwarf_present_elf_section_table(elf_data, &elf); + + // + // ELF doesn't have debug info and no .debug was specified on command line, + // try to load .debug via debug link + // + if (is_elf_present && !is_elf_debug_present) { + elf_has_debug_link = elf_parse_debug_link(elf_data, &elf, &debug_link); + } + if (elf_has_debug_link) { + elf_debug_data = rc_data_from_file_path(arena, debug_link.path); + is_elf_debug_present = 1; + } + + // + // Load .debug ELF + // + ELF_BinInfo elf_debug = elf_bin_from_data(elf_debug_data); + B32 has_elf_debug_dwarf = dw_is_dwarf_present_elf_section_table(elf_debug_data, &elf_debug); + + // + // Input is image ELF and .debug ELF + // + B32 is_split_elf = is_elf_present && is_elf_debug_present && !has_elf_dwarf && has_elf_debug_dwarf; + if (is_split_elf) { + driver = RC_Driver_Dwarf; + image = ELF_HdrIs64Bit(elf.hdr.e_ident) ? Image_Elf64 : Image_Elf32; + image_name = elf_name; + image_data = elf_data; + debug_name = elf_debug_name; + debug_data = elf_debug_data; + goto driver_found; + } + + // + // Input ELF is image with debug info + // + B32 is_monolithic_elf = is_elf_present && !is_elf_debug_present && has_elf_dwarf; + if (is_monolithic_elf) { + driver = RC_Driver_Dwarf; + image = ELF_HdrIs64Bit(elf.hdr.e_ident) ? Image_Elf64 : Image_Elf32; + image_name = elf_name; + image_data = elf_data; + debug_name = elf_name; + debug_data = elf_data; + goto driver_found; + } + + // + // Input ELF is .debug + // + B32 is_debug_elf = !is_elf_present && is_elf_debug_present && has_elf_debug_dwarf; + if (is_debug_elf) { + driver = RC_Driver_Dwarf; + image = ELF_HdrIs64Bit(elf_debug.hdr.e_ident) ? Image_Elf64 : Image_Elf32; + debug_name = elf_debug_name; + debug_data = elf_debug_data; + goto driver_found; + } + } + + driver_found:; + + // + // Handle -out param + // + String8 out_name = {0}; + if (cmd_line_has_flag(cmdl, str8_lit("out"))) { + out_name = cmd_line_string(cmdl, str8_lit("out")); + if (out_name.size == 0) { + fprintf(stderr, "error: -out parameter doesn't have a value\n"); + os_abort(1); + } + } else { + if (image_name.size) { + out_name = path_replace_file_extension(arena, image_name, str8_lit("rdi")); + } else { + out_name = path_replace_file_extension(arena, debug_name, str8_lit("rdi")); + } + } + + + // + // Validate driver input + // + if (driver == RC_Driver_Pdb && + !is_pdb_present && (is_elf_present || is_elf_debug_present)) { + fprintf(stderr, "error: DWARF is an invalid input for PDB driver\n"); + os_abort(1); + } + + + RC_Context ctx = {0}; + ctx.driver = driver; + ctx.image = image; + ctx.image_name = image_name; + ctx.image_data = image_data; + ctx.debug_name = debug_name; + ctx.debug_data = debug_data; + if (check_guid) { + ctx.flags |= RC_Flag_CheckPdbGuid; + ctx.guid = pe_pdb_guid; + } + if (elf_has_debug_link) { + ctx.flags |= RC_Flag_CheckElfChecksum; + ctx.debug_link = debug_link; + } + ctx.out_name = out_name; + + scratch_end(scratch); + return ctx; +} + +internal String8List +rc_run(Arena *arena, RC_Context *rc) +{ + Temp scratch = scratch_begin(&arena, 1); + + ProfBegin("Convert"); + RDIM_HelpState *help_state = rdim_help_init(); + RDIM_BakeParams *convert2bake = 0; + switch (rc->driver) { + case RC_Driver_Null: break; + case RC_Driver_Dwarf: convert2bake = d2r_convert(scratch.arena, help_state, rc); break; + case RC_Driver_Pdb: convert2bake = p2r_convert(scratch.arena, help_state, rc); break; + } + ProfEnd(); + + if (rc->errors.node_count) { + NotImplemented; + } + + ProfBegin("Bake"); + RDIM_BakeResults bake2srlz = rdim_bake(help_state, convert2bake); + ProfEnd(); + + ProfBegin("Serialize Bake"); + RDIM_SerializedSectionBundle srlz2file = rdim_serialized_section_bundle_from_bake_results(&bake2srlz); + ProfEnd(); + + RDIM_SerializedSectionBundle srlz2file_compressed = srlz2file; + if (rc->flags & RC_Flag_Compress) { + ProfBegin("Compress"); + srlz2file_compressed = rdim_compress(scratch.arena, &srlz2file); + ProfEnd(); + } + + ProfBegin("Serialize"); + String8List raw_rdi = rdim_file_blobs_from_section_bundle(scratch.arena, &srlz2file_compressed); + ProfEnd(); + + scratch_end(scratch); + return raw_rdi; +} + +internal String8 +rc_rdi_from_cmd_line(Arena *arena, CmdLine *cmdl) +{ + Temp scratch = scratch_begin(&arena, 1); + RC_Context rc = rc_context_from_cmd_line(scratch.arena, cmdl); + String8List raw_rdi = rc_run(scratch.arena, &rc); + String8 result = str8_list_join(arena, &raw_rdi, 0); + scratch_end(scratch); + return result; +} + +internal void +rc_main(CmdLine *cmdl) +{ + B32 do_help = (cmd_line_has_flag(cmdl, str8_lit("help")) || + cmd_line_has_flag(cmdl, str8_lit("h")) || + cmd_line_has_flag(cmdl, str8_lit("?")) || + cmdl->argc == 1); + if (do_help) { + fprintf(stderr, "--- Help ---------------------------------------------------------------------\n"); + fprintf(stderr, " %s\n\n", BUILD_TITLE_STRING_LITERAL); + fprintf(stderr, " Usage: radcon [Options] [Files]\n\n"); + fprintf(stderr, " Options:\n"); + fprintf(stderr, " -pe: Path to Win32 executable image\n"); + fprintf(stderr, " -pdb: Path to PDB\n"); + fprintf(stderr, " -elf: Path to ELF\n"); + fprintf(stderr, " -elf_debug: Path to ELF with debug info\n"); + fprintf(stderr, " -out: Path at which the output RDI debug info will be written\n"); + fprintf(stderr, " -driver: Sets converter for debug info\n"); + } else { + Temp scratch = scratch_begin(0,0); + + // make converter context + RC_Context rc = rc_context_from_cmd_line(scratch.arena, cmdl); + + // make RDI from context + String8List raw_rdi = rc_run(scratch.arena, &rc); + + // output RDI + if (rc.errors.node_count == 0) { + if (!os_write_data_list_to_file_path(rc.out_name, raw_rdi)) { + str8_list_pushf(scratch.arena, &rc.errors, "no write access to path %.*s", str8_varg(rc.out_name)); + } + } + + // report any errors + for (String8Node *error_n = rc.errors.first; error_n != 0; error_n = error_n->next) { + fprintf(stderr, "error: %.*s\n", str8_varg(error_n->string)); + } + + scratch_end(scratch); + } +} + diff --git a/src/radcon/radcon.h b/src/radcon/radcon.h new file mode 100644 index 00000000..95631068 --- /dev/null +++ b/src/radcon/radcon.h @@ -0,0 +1,66 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef RADCON_H +#define RADCON_H + +typedef U32 RC_Flags; +enum +{ + RC_Flag_Strings = (1 << 0), + RC_Flag_IndexRuns = (1 << 1), + RC_Flag_BinarySections = (1 << 2), + RC_Flag_Units = (1 << 3), + RC_Flag_Procedures = (1 << 4), + RC_Flag_GlobalVariables = (1 << 5), + RC_Flag_ThreadVariables = (1 << 6), + RC_Flag_Scopes = (1 << 7), + RC_Flag_Locals = (1 << 8), + RC_Flag_Types = (1 << 9), + RC_Flag_UDTs = (1 << 10), + RC_Flag_LineInfo = (1 << 11), + RC_Flag_GlobalVariableNameMap = (1 << 12), + RC_Flag_ThreadVariableNameMap = (1 << 13), + RC_Flag_ProcedureNameMap = (1 << 14), + RC_Flag_TypeNameMap = (1 << 15), + RC_Flag_LinkNameProcedureNameMap= (1 << 16), + RC_Flag_NormalSourcePathNameMap = (1 << 17), + RC_Flag_Compress = (1 << 18), + RC_Flag_StrictDwarfParse = (1 << 19), + RC_Flag_Deterministic = (1 << 20), + RC_Flag_CheckPdbGuid = (1 << 21), + RC_Flag_CheckElfChecksum = (1 << 22), + RC_Flag_All = 0xffffffff, +}; + +typedef enum +{ + RC_Driver_Null, + RC_Driver_Dwarf, + RC_Driver_Pdb, +} RC_Driver; + +typedef struct RC_Context +{ + ImageType image; + RC_Driver driver; + String8 image_name; + String8 image_data; + String8 debug_name; + String8 debug_data; + String8 out_name; + RC_Flags flags; + Guid guid; + ELF_GnuDebugLink debug_link; + String8List errors; +} RC_Context; + +//////////////////////////////// + +internal RC_Context rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl); +internal String8List rc_run(Arena *arena, RC_Context *rc); +internal String8 rc_rdi_from_cmd_line(Arena *arena, CmdLine *cmdl); +internal void rc_main(CmdLine *cmdl); + +#endif // RADCON_H + diff --git a/src/radcon/radcon_coff.c b/src/radcon/radcon_coff.c new file mode 100644 index 00000000..9f5b6059 --- /dev/null +++ b/src/radcon/radcon_coff.c @@ -0,0 +1,102 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +internal RDI_Arch +c2r_rdi_arch_from_coff_machine(COFF_MachineType machine) +{ + switch (machine) { + case COFF_Machine_X86: return RDI_Arch_X86; + case COFF_Machine_X64: return RDI_Arch_X64; + + case COFF_Machine_Unknown: + case COFF_Machine_Am33: + case COFF_Machine_Arm: + case COFF_Machine_Arm64: + case COFF_Machine_ArmNt: + case COFF_Machine_Ebc: + case COFF_Machine_Ia64: + case COFF_Machine_M32R: + case COFF_Machine_Mips16: + case COFF_Machine_MipsFpu: + case COFF_Machine_MipsFpu16: + case COFF_Machine_PowerPc: + case COFF_Machine_PowerPcFp: + case COFF_Machine_R4000: + case COFF_Machine_RiscV32: + case COFF_Machine_RiscV64: + case COFF_Machine_Sh3: + case COFF_Machine_Sh3Dsp: + case COFF_Machine_Sh4: + case COFF_Machine_Sh5: + case COFF_Machine_Thumb: + case COFF_Machine_WceMipsV2: + NotImplemented; + default: + return RDI_Arch_NULL; + } +} + +internal RDI_BinarySectionFlags +c2r_rdi_binary_section_flags_from_coff_section_flags(COFF_SectionFlags flags) +{ + RDI_BinarySectionFlags result = 0; + if(flags & COFF_SectionFlag_MemRead) + { + result |= RDI_BinarySectionFlag_Read; + } + if(flags & COFF_SectionFlag_MemWrite) + { + result |= RDI_BinarySectionFlag_Write; + } + if(flags & COFF_SectionFlag_MemExecute) + { + result |= RDI_BinarySectionFlag_Execute; + } + return(result); +} + +internal RDIM_BinarySectionList +c2r_rdi_binary_sections_from_coff_sections(Arena *arena, String8 image_data, U64 string_table_off, U64 sectab_count, COFF_SectionHeader *sectab) +{ + ProfBeginFunction(); + + RDIM_BinarySectionList binary_sections = {0}; + + for (U64 isec = 0; isec < sectab_count; ++isec) { + COFF_SectionHeader *coff_sec = §ab[isec]; + RDIM_BinarySection *sec = rdim_binary_section_list_push(arena, &binary_sections); + + sec->name = coff_name_from_section_header(image_data, coff_sec, string_table_off); + sec->flags = c2r_rdi_binary_section_flags_from_coff_section_flags(coff_sec->flags); + sec->voff_first = coff_sec->voff; + sec->voff_opl = coff_sec->voff + coff_sec->vsize; + sec->foff_first = coff_sec->foff; + sec->foff_opl = coff_sec->foff + coff_sec->fsize; + } + + ProfEnd(); + return binary_sections; +} + +internal RDIM_TopLevelInfo +c2r_make_rdim_top_level_info(String8 image_name, RDI_Arch arch, U64 exe_hash, U64 sectab_count, COFF_SectionHeader *sectab) +{ + U64 exe_voff_max = 0; + { + COFF_SectionHeader *coff_sec_ptr = sectab; + COFF_SectionHeader *coff_ptr_opl = sectab + sectab_count; + for (;coff_sec_ptr < coff_ptr_opl; coff_sec_ptr += 1) { + U64 sec_voff_max = coff_sec_ptr->voff + coff_sec_ptr->vsize; + exe_voff_max = Max(exe_voff_max, sec_voff_max); + } + } + + RDIM_TopLevelInfo top_level_info = {0}; + top_level_info.arch = arch; + top_level_info.exe_name = str8_skip_last_slash(image_name); + top_level_info.exe_hash = exe_hash; + top_level_info.voff_max = exe_voff_max; + top_level_info.producer_name = str8_lit(BUILD_TITLE_STRING_LITERAL); + + return top_level_info; +} diff --git a/src/radcon/radcon_coff.h b/src/radcon/radcon_coff.h new file mode 100644 index 00000000..abed4aef --- /dev/null +++ b/src/radcon/radcon_coff.h @@ -0,0 +1,13 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef RADCON_COFF_H +#define RADCON_COFF_H + +internal RDI_Arch c2r_rdi_arch_from_coff_machine(COFF_MachineType machine); +internal RDI_BinarySectionFlags c2r_rdi_binary_section_flags_from_coff_section_flags(COFF_SectionFlags flags); +internal RDIM_BinarySectionList c2r_rdi_binary_sections_from_coff_sections(Arena *arena, String8 image_data, U64 string_table_off, U64 sectab_count, COFF_SectionHeader *sectab); +internal RDIM_TopLevelInfo c2r_make_rdim_top_level_info(String8 image_name, RDI_Arch arch, U64 exe_hash, U64 sectab_count, COFF_SectionHeader *sectab); + +#endif // RADCON_COFF_H + diff --git a/src/radcon/radcon_cv.c b/src/radcon/radcon_cv.c new file mode 100644 index 00000000..6b687feb --- /dev/null +++ b/src/radcon/radcon_cv.c @@ -0,0 +1,252 @@ +//////////////////////////////// +//~ rjf: CodeView <-> RDI Canonical Conversions + +internal RDI_Arch +cv2r_rdi_arch_from_cv_arch(CV_Arch cv_arch) +{ + RDI_Arch result = 0; + switch(cv_arch) + { + case CV_Arch_8086: result = RDI_Arch_X86; break; + case CV_Arch_X64: result = RDI_Arch_X64; break; + //case CV_Arch_8080: break; + //case CV_Arch_80286: break; + //case CV_Arch_80386: break; + //case CV_Arch_80486: break; + //case CV_Arch_PENTIUM: break; + //case CV_Arch_PENTIUMII: break; + //case CV_Arch_PENTIUMIII: break; + //case CV_Arch_MIPS: break; + //case CV_Arch_MIPS16: break; + //case CV_Arch_MIPS32: break; + //case CV_Arch_MIPS64: break; + //case CV_Arch_MIPSI: break; + //case CV_Arch_MIPSII: break; + //case CV_Arch_MIPSIII: break; + //case CV_Arch_MIPSIV: break; + //case CV_Arch_MIPSV: break; + //case CV_Arch_M68000: break; + //case CV_Arch_M68010: break; + //case CV_Arch_M68020: break; + //case CV_Arch_M68030: break; + //case CV_Arch_M68040: break; + //case CV_Arch_ALPHA: break; + //case CV_Arch_ALPHA_21164: break; + //case CV_Arch_ALPHA_21164A: break; + //case CV_Arch_ALPHA_21264: break; + //case CV_Arch_ALPHA_21364: break; + //case CV_Arch_PPC601: break; + //case CV_Arch_PPC603: break; + //case CV_Arch_PPC604: break; + //case CV_Arch_PPC620: break; + //case CV_Arch_PPCFP: break; + //case CV_Arch_PPCBE: break; + //case CV_Arch_SH3: break; + //case CV_Arch_SH3E: break; + //case CV_Arch_SH3DSP: break; + //case CV_Arch_SH4: break; + //case CV_Arch_SHMEDIA: break; + //case CV_Arch_ARM3: break; + //case CV_Arch_ARM4: break; + //case CV_Arch_ARM4T: break; + //case CV_Arch_ARM5: break; + //case CV_Arch_ARM5T: break; + //case CV_Arch_ARM6: break; + //case CV_Arch_ARM_XMAC: break; + //case CV_Arch_ARM_WMMX: break; + //case CV_Arch_ARM7: break; + //case CV_Arch_OMNI: break; + //case CV_Arch_IA64_1: break; + //case CV_Arch_IA64_2: break; + //case CV_Arch_CEE: break; + //case CV_Arch_AM33: break; + //case CV_Arch_M32R: break; + //case CV_Arch_TRICORE: break; + //case CV_Arch_EBC: break; + //case CV_Arch_THUMB: break; + //case CV_Arch_ARMNT: break; + //case CV_Arch_ARM64: break; + //case CV_Arch_D3D11_SHADER: break; + } + return(result); +} + +internal RDI_RegCode +cv2r_rdi_reg_code_from_cv_reg_code(RDI_Arch arch, CV_Reg reg_code) +{ + RDI_RegCode result = 0; + switch(arch) + { + case RDI_Arch_X86: + { + switch(reg_code) + { +#define X(CVN,C,RDN,BP,BZ) case C: result = RDI_RegCodeX86_##RDN; break; + CV_Reg_X86_XList(X) +#undef X + } + }break; + case RDI_Arch_X64: + { + switch(reg_code) + { +#define X(CVN,C,RDN,BP,BZ) case C: result = RDI_RegCodeX64_##RDN; break; + CV_Reg_X64_XList(X) +#undef X + } + }break; + } + return(result); +} + +internal RDI_Language +cv2r_rdi_language_from_cv_language(CV_Language cv_language) +{ + RDI_Language result = 0; + switch(cv_language) + { + case CV_Language_C: result = RDI_Language_C; break; + case CV_Language_CXX: result = RDI_Language_CPlusPlus; break; + //case CV_Language_FORTRAN: result = ; break; + //case CV_Language_MASM: result = ; break; + //case CV_Language_PASCAL: result = ; break; + //case CV_Language_BASIC: result = ; break; + //case CV_Language_COBOL: result = ; break; + //case CV_Language_LINK: result = ; break; + //case CV_Language_CVTRES: result = ; break; + //case CV_Language_CVTPGD: result = ; break; + //case CV_Language_CSHARP: result = ; break; + //case CV_Language_VB: result = ; break; + //case CV_Language_ILASM: result = ; break; + //case CV_Language_JAVA: result = ; break; + //case CV_Language_JSCRIPT: result = ; break; + //case CV_Language_MSIL: result = ; break; + //case CV_Language_HLSL: result = ; break; + } + return(result); +} + +internal RDI_RegCode +cv2r_reg_code_from_arch_encoded_fp_reg(RDI_Arch arch, CV_EncodedFramePtrReg encoded_reg) +{ + RDI_RegCode result = 0; + switch(arch) + { + case RDI_Arch_X86: + { + switch(encoded_reg) + { + case CV_EncodedFramePtrReg_StackPtr: + { + // TODO(allen): support CV_AllReg_VFRAME + // TODO(allen): error + }break; + case CV_EncodedFramePtrReg_FramePtr: + { + result = RDI_RegCodeX86_ebp; + }break; + case CV_EncodedFramePtrReg_BasePtr: + { + result = RDI_RegCodeX86_ebx; + }break; + } + }break; + case RDI_Arch_X64: + { + switch(encoded_reg) + { + case CV_EncodedFramePtrReg_StackPtr: + { + result = RDI_RegCodeX64_rsp; + }break; + case CV_EncodedFramePtrReg_FramePtr: + { + result = RDI_RegCodeX64_rbp; + }break; + case CV_EncodedFramePtrReg_BasePtr: + { + result = RDI_RegCodeX64_r13; + }break; + } + }break; + } + return(result); +} + + +internal RDI_TypeKind +cv2r_rdi_type_kind_from_cv_basic_type(CV_BasicType basic_type) +{ + RDI_TypeKind result = RDI_TypeKind_NULL; + switch(basic_type) + { + case CV_BasicType_VOID: {result = RDI_TypeKind_Void;}break; + case CV_BasicType_HRESULT: {result = RDI_TypeKind_HResult;}break; + + case CV_BasicType_RCHAR: + case CV_BasicType_CHAR: + case CV_BasicType_CHAR8: + {result = RDI_TypeKind_Char8;}break; + + case CV_BasicType_UCHAR: {result = RDI_TypeKind_UChar8;}break; + case CV_BasicType_WCHAR: {result = RDI_TypeKind_UChar16;}break; + case CV_BasicType_CHAR16: {result = RDI_TypeKind_Char16;}break; + case CV_BasicType_CHAR32: {result = RDI_TypeKind_Char32;}break; + + case CV_BasicType_BOOL8: + case CV_BasicType_INT8: + {result = RDI_TypeKind_S8;}break; + + case CV_BasicType_BOOL16: + case CV_BasicType_INT16: + case CV_BasicType_SHORT: + {result = RDI_TypeKind_S16;}break; + + case CV_BasicType_BOOL32: + case CV_BasicType_INT32: + case CV_BasicType_LONG: + {result = RDI_TypeKind_S32;}break; + + case CV_BasicType_BOOL64: + case CV_BasicType_INT64: + case CV_BasicType_QUAD: + {result = RDI_TypeKind_S64;}break; + + case CV_BasicType_INT128: + case CV_BasicType_OCT: + {result = RDI_TypeKind_S128;}break; + + case CV_BasicType_UINT8: {result = RDI_TypeKind_U8;}break; + + case CV_BasicType_UINT16: + case CV_BasicType_USHORT: + {result = RDI_TypeKind_U16;}break; + + case CV_BasicType_UINT32: + case CV_BasicType_ULONG: + {result = RDI_TypeKind_U32;}break; + + case CV_BasicType_UINT64: + case CV_BasicType_UQUAD: + {result = RDI_TypeKind_U64;}break; + + case CV_BasicType_UINT128: + case CV_BasicType_UOCT: + {result = RDI_TypeKind_U128;}break; + + case CV_BasicType_FLOAT16:{result = RDI_TypeKind_F16;}break; + case CV_BasicType_FLOAT32:{result = RDI_TypeKind_F32;}break; + case CV_BasicType_FLOAT32PP:{result = RDI_TypeKind_F32PP;}break; + case CV_BasicType_FLOAT48:{result = RDI_TypeKind_F48;}break; + case CV_BasicType_FLOAT64:{result = RDI_TypeKind_F64;}break; + case CV_BasicType_FLOAT80:{result = RDI_TypeKind_F80;}break; + case CV_BasicType_FLOAT128:{result = RDI_TypeKind_F128;}break; + case CV_BasicType_COMPLEX32:{result = RDI_TypeKind_ComplexF32;}break; + case CV_BasicType_COMPLEX64:{result = RDI_TypeKind_ComplexF64;}break; + case CV_BasicType_COMPLEX80:{result = RDI_TypeKind_ComplexF80;}break; + case CV_BasicType_COMPLEX128:{result = RDI_TypeKind_ComplexF128;}break; + case CV_BasicType_PTR:{result = RDI_TypeKind_Handle;}break; + } + return result; +} + diff --git a/src/radcon/radcon_cv.h b/src/radcon/radcon_cv.h new file mode 100644 index 00000000..014a4c9e --- /dev/null +++ b/src/radcon/radcon_cv.h @@ -0,0 +1,10 @@ +#pragma once + +//////////////////////////////// +//~ rjf: CodeView => RDI Canonical Conversions + +internal RDI_Arch cv2r_rdi_arch_from_cv_arch(CV_Arch arch); +internal RDI_RegCode cv2r_rdi_reg_code_from_cv_reg_code(RDI_Arch arch, CV_Reg reg_code); +internal RDI_Language cv2r_rdi_language_from_cv_language(CV_Language language); +internal RDI_RegCode cv2r_reg_code_from_arch_encoded_fp_reg(RDI_Arch arch, CV_EncodedFramePtrReg encoded_reg); +internal RDI_TypeKind cv2r_rdi_type_kind_from_cv_basic_type(CV_BasicType basic_type); diff --git a/src/radcon/radcon_dwarf.c b/src/radcon/radcon_dwarf.c new file mode 100644 index 00000000..8601cdac --- /dev/null +++ b/src/radcon/radcon_dwarf.c @@ -0,0 +1,1878 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +// TODO: +// +// [ ] Currently converter relies on clang's -gdwarf-aranges to generate compile unit ranges, +// however it is optional and in case it is missing converter has to generate the ranges from scopes. +// [ ] Error handling + +internal RDI_RegCode +d2r_rdi_reg_from_dw_reg_code_x64(U64 reg_code) +{ + switch (reg_code) { +#define X(reg_name_dw, reg_code_dw, reg_name_rdi, reg_pos, reg_size) case DW_RegX64_##reg_name_dw: return RDI_RegCodeX64_##reg_name_rdi; + DW_Regs_X64_XList(X) +#undef X + } + InvalidPath; + return 0; +} + +internal RDI_RegCode +d2r_rdi_reg_from_dw_reg_code_x86(U64 reg_code) +{ + switch (reg_code) { +#define X(reg_name_dw, reg_code_dw, reg_name_rdi, reg_pos, reg_size) case DW_RegX86_##reg_name_dw: return RDI_RegCodeX86_##reg_name_rdi; + DW_Regs_X86_XList(X) +#undef X + } + InvalidPath; + return 0; +} + +internal RDI_RegCode +d2r_rdi_reg_from_dw_reg_code(RDI_Arch arch, U64 reg_code) +{ + switch (arch) { + case RDI_Arch_NULL: return 0; + case RDI_Arch_X64: return d2r_rdi_reg_from_dw_reg_code_x64(reg_code); + case RDI_Arch_X86: return d2r_rdi_reg_from_dw_reg_code_x86(reg_code); + } + InvalidPath; + return 0; +} + +internal RDIM_Type * +d2r_create_type(Arena *arena, D2R_TypeTable *type_table) +{ + RDIM_Type *type = rdim_type_chunk_list_push(arena, type_table->types, type_table->type_chunk_cap); + return type; +} + +internal RDIM_Type * +d2r_find_or_create_type_from_offset(Arena *arena, D2R_TypeTable *type_table, U64 info_off) +{ + RDIM_Type *type = 0; + KeyValuePair *is_type_present = hash_table_search_u64(type_table->ht, info_off); + if (is_type_present) { + type = is_type_present->value_raw; + } else { + type = d2r_create_type(arena, type_table); + hash_table_push_u64_raw(arena, type_table->ht, info_off, type); + } + return type; +} + +internal RDIM_Type * +d2r_type_from_attrib(Arena *arena, D2R_TypeTable *type_table, DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + RDIM_Type *type = 0; + + // find attrib + DW_Attrib *attrib = dw_attrib_from_tag(input, cu, tag, kind); + + // does tag have this attribute? + if (attrib->attrib_kind == kind) { + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + + if (value_class == DW_AttribClass_Reference) { + // resolve reference + DW_Reference ref = dw_ref_from_attrib_ptr(input, cu, attrib); + + // TODO: support for external compile unit references + AssertAlways(ref.cu == cu); + + // find or create type + type = d2r_find_or_create_type_from_offset(arena, type_table, ref.info_off); + } else { + Assert(!"unexpected attrib class"); + } + } else if (attrib->attrib_kind == DW_Attrib_Null) { + type = type_table->void_type; + } + + return type; +} + +internal Rng1U64List +d2r_range_list_from_tag(Arena *arena, DW_Input *input, DW_CompUnit *cu, U64 image_base, DW_Tag tag) +{ + // collect non-contiguous range + Rng1U64List ranges = dw_rnglist_from_attrib(arena, input, cu, tag, DW_Attrib_Ranges); + + // collect contiguous range + DW_Attrib *lo_pc_attrib = dw_attrib_from_tag(input, cu, tag, DW_Attrib_LowPc); + DW_Attrib *hi_pc_attrib = dw_attrib_from_tag(input, cu, tag, DW_Attrib_HighPc); + if (lo_pc_attrib->attrib_kind != DW_Attrib_Null && hi_pc_attrib->attrib_kind != DW_Attrib_Null) { + U64 lo_pc = dw_address_from_attrib_ptr(input, cu, lo_pc_attrib); + + U64 hi_pc; + DW_AttribClass hi_pc_class = dw_value_class_from_attrib(cu, hi_pc_attrib); + if (hi_pc_class == DW_AttribClass_Address) { + hi_pc = dw_address_from_attrib_ptr(input, cu, hi_pc_attrib); + } else if (hi_pc_class == DW_AttribClass_Const) { + hi_pc = dw_const_u64_from_attrib_ptr(input, cu, hi_pc_attrib); + hi_pc += lo_pc; + } else { + AssertAlways(!"undefined attrib encoding"); + } + + // TODO: error handling + AssertAlways(lo_pc >= image_base); + AssertAlways(hi_pc >= image_base); + AssertAlways(lo_pc <= hi_pc); + + U64 lo_voff = lo_pc - image_base; + U64 hi_voff = hi_pc - image_base; + rng1u64_list_push(arena, &ranges, rng_1u64(lo_voff, hi_voff)); + } + + return ranges; +} + +internal RDIM_Type ** +d2r_collect_proc_params(Arena *arena, D2R_TypeTable *type_table, DW_Input *input, DW_CompUnit *cu, DW_TagNode *cur_node, U64 *param_count_out) +{ + Temp scratch = scratch_begin(&arena, 1); + + RDIM_TypeList list = {0}; + B32 has_vargs = 0; + for (DW_TagNode *i = cur_node->first_child; i != 0; i = i->sibling) { + if (i->tag.kind == DW_Tag_FormalParameter) { + RDIM_TypeNode *n = push_array(scratch.arena, RDIM_TypeNode, 1); + n->v = d2r_type_from_attrib(arena, type_table, input, cu, i->tag, DW_Attrib_Type); + SLLQueuePush(list.first, list.last, n); + ++list.count; + } else if (i->tag.kind == DW_Tag_UnspecifiedParameters) { + has_vargs = 1; + } + } + + if (has_vargs) { + RDIM_TypeNode *n = push_array(scratch.arena, RDIM_TypeNode, 1); + n->v = type_table->varg_type; + SLLQueuePush(list.first, list.last, n); + ++list.count; + } + + // collect params + *param_count_out = list.count; + RDIM_Type **params = rdim_array_from_type_list(arena, list); + + scratch_end(scratch); + return params; +} + + +internal RDIM_EvalBytecode +d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, String8 expr) +{ + RDIM_EvalBytecode bc = {0}; + + for (U64 cursor = 0; cursor < expr.size; ) { + U8 op = 0; + cursor += str8_deserial_read_struct(expr, cursor, &op); + + U64 size_param; + switch (op) { + case DW_ExprOp_Lit0: case DW_ExprOp_Lit1: case DW_ExprOp_Lit2: + case DW_ExprOp_Lit3: case DW_ExprOp_Lit4: case DW_ExprOp_Lit5: + case DW_ExprOp_Lit6: case DW_ExprOp_Lit7: case DW_ExprOp_Lit8: + case DW_ExprOp_Lit9: case DW_ExprOp_Lit10: case DW_ExprOp_Lit11: + case DW_ExprOp_Lit12: case DW_ExprOp_Lit13: case DW_ExprOp_Lit14: + case DW_ExprOp_Lit15: case DW_ExprOp_Lit16: case DW_ExprOp_Lit17: + case DW_ExprOp_Lit18: case DW_ExprOp_Lit19: case DW_ExprOp_Lit20: + case DW_ExprOp_Lit21: case DW_ExprOp_Lit22: case DW_ExprOp_Lit23: + case DW_ExprOp_Lit24: case DW_ExprOp_Lit25: case DW_ExprOp_Lit26: + case DW_ExprOp_Lit27: case DW_ExprOp_Lit28: case DW_ExprOp_Lit29: + case DW_ExprOp_Lit30: case DW_ExprOp_Lit31: { + U64 lit = op - DW_ExprOp_Lit0; + rdim_bytecode_push_uconst(arena, &bc, lit); + } break; + + case DW_ExprOp_Const1U: size_param = 1; goto const_unsigned; + case DW_ExprOp_Const2U: size_param = 2; goto const_unsigned; + case DW_ExprOp_Const4U: size_param = 4; goto const_unsigned; + case DW_ExprOp_Const8U: size_param = 8; goto const_unsigned; + const_unsigned: { + U64 val = 0; + cursor += str8_deserial_read(expr, cursor, &val, size_param, size_param); + rdim_bytecode_push_uconst(arena, &bc, val); + } break; + + case DW_ExprOp_Const1S:size_param = 1; goto const_signed; + case DW_ExprOp_Const2S:size_param = 2; goto const_signed; + case DW_ExprOp_Const4S:size_param = 4; goto const_signed; + case DW_ExprOp_Const8S:size_param = 8; goto const_signed; + const_signed: { + S64 val = 0; + cursor += str8_deserial_read(expr, cursor, &val, size_param, size_param); + val = extend_sign64(val, size_param); + rdim_bytecode_push_sconst(arena, &bc, val); + } break; + + case DW_ExprOp_ConstU: { + U64 val = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &val); + rdim_bytecode_push_uconst(arena, &bc, val); + } break; + + case DW_ExprOp_ConstS: { + S64 val = 0; + cursor += str8_deserial_read_sleb128(expr, cursor, &val); + rdim_bytecode_push_sconst(arena, &bc, val); + } break; + + case DW_ExprOp_Addr: { + U64 addr = 0; + cursor += str8_deserial_read(expr, cursor, &addr, address_size, address_size); + if (addr >= image_base) { + U64 voff = addr - image_base; + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_ModuleOff, voff); + } else { + // TODO: error handling + AssertAlways(!"unable to relocate address"); + } + } break; + + case DW_ExprOp_Reg0: case DW_ExprOp_Reg1: case DW_ExprOp_Reg2: + case DW_ExprOp_Reg3: case DW_ExprOp_Reg4: case DW_ExprOp_Reg5: + case DW_ExprOp_Reg6: case DW_ExprOp_Reg7: case DW_ExprOp_Reg8: + case DW_ExprOp_Reg9: case DW_ExprOp_Reg10: case DW_ExprOp_Reg11: + case DW_ExprOp_Reg12: case DW_ExprOp_Reg13: case DW_ExprOp_Reg14: + case DW_ExprOp_Reg15: case DW_ExprOp_Reg16: case DW_ExprOp_Reg17: + case DW_ExprOp_Reg18: case DW_ExprOp_Reg19: case DW_ExprOp_Reg20: + case DW_ExprOp_Reg21: case DW_ExprOp_Reg22: case DW_ExprOp_Reg23: + case DW_ExprOp_Reg24: case DW_ExprOp_Reg25: case DW_ExprOp_Reg26: + case DW_ExprOp_Reg27: case DW_ExprOp_Reg28: case DW_ExprOp_Reg29: + case DW_ExprOp_Reg30: case DW_ExprOp_Reg31: { + U64 reg_code_dw = op - DW_ExprOp_Reg0; + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + U32 regread_param = RDI_EncodeRegReadParam(reg_code_rdi, 8, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegRead, regread_param); + } break; + + case DW_ExprOp_RegX: { + U64 reg_code_dw = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, ®_code_dw); + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + U32 regread_param = RDI_EncodeRegReadParam(reg_code_rdi, 8, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegRead, regread_param); + } break; + + case DW_ExprOp_ImplicitValue: { + U64 value_size = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &value_size); + + String8 val = str8_substr(expr, rng_1u64(cursor, cursor + value_size)); + if (val.size <= sizeof(U64)) { + U64 val64 = 0; + MemoryCopy(&val64, val.str, val.size); + rdim_bytecode_push_uconst(arena, &bc, val64); + } else { + // TODO: currenlty no way to encode string in RDIM_EvalBytecodeOp + NotImplemented; + } + } break; + + case DW_ExprOp_Piece: { + NotImplemented; + } break; + + case DW_ExprOp_BitPiece: { + NotImplemented; + } break; + + case DW_ExprOp_Pick: { + U8 stack_idx = 0; + cursor += str8_deserial_read_struct(expr, cursor, &stack_idx); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Pick, stack_idx); + } break; + + case DW_ExprOp_PlusUConst: { + U64 addend = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &addend); + rdim_bytecode_push_uconst(arena, &bc, addend); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + } break; + + case DW_ExprOp_Skip: { + S16 skip = 0; + cursor += str8_deserial_read_struct(expr, cursor, &skip); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Skip, skip); + } break; + + case DW_ExprOp_Bra: { + NotImplemented; + } break; + + case DW_ExprOp_BReg0: case DW_ExprOp_BReg1: case DW_ExprOp_BReg2: + case DW_ExprOp_BReg3: case DW_ExprOp_BReg4: case DW_ExprOp_BReg5: + case DW_ExprOp_BReg6: case DW_ExprOp_BReg7: case DW_ExprOp_BReg8: + case DW_ExprOp_BReg9: case DW_ExprOp_BReg10: case DW_ExprOp_BReg11: + case DW_ExprOp_BReg12: case DW_ExprOp_BReg13: case DW_ExprOp_BReg14: + case DW_ExprOp_BReg15: case DW_ExprOp_BReg16: case DW_ExprOp_BReg17: + case DW_ExprOp_BReg18: case DW_ExprOp_BReg19: case DW_ExprOp_BReg20: + case DW_ExprOp_BReg21: case DW_ExprOp_BReg22: case DW_ExprOp_BReg23: + case DW_ExprOp_BReg24: case DW_ExprOp_BReg25: case DW_ExprOp_BReg26: + case DW_ExprOp_BReg27: case DW_ExprOp_BReg28: case DW_ExprOp_BReg29: + case DW_ExprOp_BReg30: case DW_ExprOp_BReg31: { + U64 reg_code_dw = op - DW_ExprOp_BReg0; + S64 reg_off = 0; + cursor += str8_deserial_read_sleb128(expr, cursor, ®_off); + + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegReadDyn, reg_code_rdi); + rdim_bytecode_push_sconst(arena, &bc, reg_off); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + } break; + + case DW_ExprOp_BRegX: { + U64 reg_code_dw = 0; + S64 reg_off = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, ®_code_dw); + cursor += str8_deserial_read_sleb128(expr, cursor, ®_off); + + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegReadDyn, reg_code_rdi); + rdim_bytecode_push_sconst(arena, &bc, reg_off); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + } break; + + case DW_ExprOp_FBReg: { + S64 frame_off = 0; + cursor += str8_deserial_read_sleb128(expr, cursor, &frame_off); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_FrameOff, frame_off); + } break; + + case DW_ExprOp_Deref: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_MemRead, address_size); + } break; + + case DW_ExprOp_DerefSize: { + U8 deref_size_in_bytes = 0; + cursor += str8_deserial_read_struct(expr, cursor, &deref_size_in_bytes); + if (0 < deref_size_in_bytes && deref_size_in_bytes <= address_size) { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_MemRead, deref_size_in_bytes); + } else { + // TODO: error handling + AssertAlways(!"ill formed expression"); + } + } break; + + case DW_ExprOp_XDerefSize: { + // TODO: error handling + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Call2: + case DW_ExprOp_Call4: + case DW_ExprOp_CallRef: { + // TODO: error handling + AssertAlways(!"calls are not supported"); + } break; + + case DW_ExprOp_ImplicitPointer: + case DW_ExprOp_GNU_ImplicitPointer: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_Convert: + case DW_ExprOp_GNU_Convert: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_GNU_ParameterRef: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_DerefType: + case DW_ExprOp_GNU_DerefType: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_ConstType: + case DW_ExprOp_GNU_ConstType: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_RegvalType: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_EntryValue: + case DW_ExprOp_GNU_EntryValue: { + U64 block_size = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &block_size); + + String8 entry_value_expr = {0}; + cursor += str8_deserial_read_block(expr, cursor, block_size, &entry_value_expr); + + RDIM_EvalBytecode entry_value_bc = d2r_bytecode_from_expression(arena, image_base, address_size, arch, addr_lu, entry_value_expr); + } break; + + case DW_ExprOp_Addrx: { + U64 addr_idx = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &addr_idx); + U64 addr = dw_addr_from_list_unit(addr_lu, addr_idx); + if (addr != max_U64) { + if (addr >= image_base) { + U64 voff = addr - image_base; + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_ModuleOff, voff); + } else { + // TODO: error handling + AssertAlways(!"unable to relocate address"); + } + } else { + // TODO: error handling + AssertAlways(!"out of bounds index"); + } + } break; + + case DW_ExprOp_CallFrameCfa: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_FrameOff, 0); + } break; + + case DW_ExprOp_FormTlsAddress: { + // TODO: + AssertAlways(!"RDI_EvalOp_TLSOff accepts immediate"); + } break; + + case DW_ExprOp_PushObjectAddress: { + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_Nop: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Noop, 0); + } break; + + case DW_ExprOp_Eq: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_EqEq, 0); + } break; + + case DW_ExprOp_Ge: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_GrEq, 0); + } break; + + case DW_ExprOp_Gt: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Grtr, 0); + } break; + + case DW_ExprOp_Le: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_LsEq, 0); + } break; + + case DW_ExprOp_Lt: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Less, 0); + } break; + + case DW_ExprOp_Ne: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_NtEq, 0); + } break; + + case DW_ExprOp_Shl: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_LShift, 0); + } break; + + case DW_ExprOp_Shr: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RShift, 0); + } break; + + case DW_ExprOp_Shra: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_Xor: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitXor, 0); + } break; + + case DW_ExprOp_XDeref: { + // TODO: error handling + Assert(!"multiple address spaces are not supported"); + } break; + + case DW_ExprOp_Abs: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Abs, 0); + } break; + + case DW_ExprOp_And: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitAnd, 0); + } break; + + case DW_ExprOp_Div: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Div, 0); + } break; + + case DW_ExprOp_Minus: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Sub, 0); + } break; + + case DW_ExprOp_Mod: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Mod, 0); + } break; + + case DW_ExprOp_Mul: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Mul, 0); + } break; + + case DW_ExprOp_Neg: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Neg, 0); + } break; + + case DW_ExprOp_Not: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitNot, 0); + } break; + + case DW_ExprOp_Or: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitOr, 0); + } break; + + case DW_ExprOp_Plus: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + } break; + + case DW_ExprOp_Rot: { + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Swap: { + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Dup: { + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Drop: { + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Over: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Pick, 1); + } break; + + case DW_ExprOp_StackValue: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Stop, 0); + } break; + + default: InvalidPath; break; + } + } + + return bc; +} + +internal RDIM_Location * +d2r_transpile_expression(Arena *arena, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, String8 expr) +{ + RDIM_Location *loc = 0; + if (expr.size) { + loc = push_array(arena, RDIM_Location, 1); + loc->kind = RDI_LocationKind_AddrBytecodeStream; + loc->bytecode = d2r_bytecode_from_expression(arena, image_base, address_size, arch, addr_lu, expr); + } + return loc; +} + +internal RDIM_LocationSet +d2r_convert_loclist(Arena *arena, RDIM_ScopeChunkList *scopes, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, DW_LocList loclist) +{ +} + +internal RDIM_Location * +d2r_location_from_attrib(Arena *arena, DW_Input *input, DW_CompUnit *cu, U64 image_base, RDI_Arch arch, DW_Tag tag, DW_AttribKind kind) +{ + String8 expr = dw_exprloc_from_attrib(input, cu, tag, kind); + RDIM_Location *location = d2r_transpile_expression(arena, image_base, cu->address_size, arch, cu->addr_lu, expr); + return location; +} + +internal RDIM_LocationSet +d2r_locset_from_attrib(Arena *arena, + DW_Input *input, + DW_CompUnit *cu, + RDIM_ScopeChunkList *scopes, + RDIM_Scope *curr_scope, + U64 image_base, + RDI_Arch arch, + DW_Tag tag, + DW_AttribKind kind) +{ + RDIM_LocationSet result = {0}; + + // extract attrib from tag + DW_Attrib *attrib = dw_attrib_from_tag(input, cu, tag, kind); + DW_AttribClass attrib_class = dw_value_class_from_attrib(cu, attrib); + + if (attrib_class == DW_AttribClass_LocList || attrib_class == DW_AttribClass_LocListPtr) { + Temp scratch = scratch_begin(&arena, 1); + + // extract location list from attrib + DW_LocList loclist = dw_loclist_from_attrib_ptr(scratch.arena, input, cu, attrib); + + // convert location list to RDIM location set + for (DW_LocNode *loc_n = loclist.first; loc_n != 0; loc_n = loc_n->next) { + RDIM_Location *location = d2r_transpile_expression(arena, image_base, cu->address_size, arch, cu->addr_lu, loc_n->v.expr); + RDIM_Rng1U64 voff_range = { .min = loc_n->v.range.min - image_base, .min = loc_n->v.range.max - image_base }; + rdim_location_set_push_case(arena, scopes, &result, voff_range, location); + } + + scratch_end(scratch); + } else if (attrib_class == DW_AttribClass_ExprLoc) { + // extract expression from attrib + String8 expr = dw_exprloc_from_attrib_ptr(input, cu, attrib); + + // convert expression and inherit life-time ranges from enclosed scope + RDIM_Location *location = d2r_transpile_expression(arena, image_base, cu->address_size, arch, cu->addr_lu, expr); + for (RDIM_Rng1U64Node *range_n = curr_scope->voff_ranges.first; range_n != 0; range_n = range_n->next) { + rdim_location_set_push_case(arena, scopes, &result, range_n->v, location); + } + } else if (attrib_class != DW_AttribClass_Null) { + AssertAlways(!"unexpected attrib class"); + } + + return result; +} + +internal D2R_CompUnitContribMap +d2r_cu_contrib_map_from_aranges(Arena *arena, DW_Input *input, U64 image_base) +{ + Temp scratch = scratch_begin(&arena, 1); + + String8 aranges_data = input->sec[DW_Section_ARanges].data; + Rng1U64List unit_range_list = dw_unit_ranges_from_data(scratch.arena, aranges_data); + + D2R_CompUnitContribMap cm = {0}; + cm.count = 0; + cm.info_off_arr = push_array(arena, U64, unit_range_list.count); + cm.voff_range_arr = push_array(arena, RDIM_Rng1U64List, unit_range_list.count); + + for (Rng1U64Node *range_n = unit_range_list.first; range_n != 0; range_n = range_n->next) { + String8 unit_data = str8_substr(aranges_data, range_n->v); + U64 unit_cursor = 0; + + U64 unit_length = 0; + U64 unit_length_size = str8_deserial_read_dwarf_packed_size(unit_data, unit_cursor, &unit_length); + if (unit_length_size == 0) { + continue; + } + unit_cursor += unit_length_size; + + DW_Version version = 0; + U64 version_size = str8_deserial_read_struct(unit_data, unit_cursor, &version); + if (version_size == 0) { + continue; + } + unit_cursor += version; + + if (version != DW_Version_2) { + AssertAlways(!"unknown .debug_aranges version"); + continue; + } + + DW_Format unit_format = DW_FormatFromSize(unit_length); + U64 cu_info_off = 0; + U64 cu_info_off_size = str8_deserial_read_dwarf_uint(unit_data, unit_cursor, unit_format, &cu_info_off); + if (cu_info_off_size == 0) { + continue; + } + unit_cursor += cu_info_off_size; + + U8 address_size = 0; + U64 address_size_size = str8_deserial_read_struct(unit_data, unit_cursor, &address_size); + if (address_size_size == 0) { + continue; + } + unit_cursor += address_size_size; + + U8 segment_selector_size = 0; + U64 segment_selector_size_size = str8_deserial_read_struct(unit_data, unit_cursor, &segment_selector_size); + if (segment_selector_size_size == 0) { + continue; + } + unit_cursor += segment_selector_size_size; + + U64 tuple_size = address_size * 2 + segment_selector_size; + U64 bytes_too_far_past_boundary = unit_cursor % tuple_size; + if (bytes_too_far_past_boundary > 0) { + unit_cursor += tuple_size - bytes_too_far_past_boundary; + } + + RDIM_Rng1U64List voff_ranges = {0}; + if (segment_selector_size == 0) { + while (unit_cursor + address_size * 2 <= unit_data.size) { + U64 address = 0; + U64 length = 0; + unit_cursor += str8_deserial_read(unit_data, unit_cursor, &address, address_size, address_size); + unit_cursor += str8_deserial_read(unit_data, unit_cursor, &length, address_size, address_size); + + if (address == 0 && length == 0) { + break; + } + + // TODO: error handling + AssertAlways(address >= image_base); + + U64 min = address - image_base; + U64 max = min + length; + rdim_rng1u64_list_push(arena, &voff_ranges, (RDIM_Rng1U64){.min = min, .max = max}); + } + } else { + // TODO: segment relative addressing + NotImplemented; + } + + U64 map_idx = cm.count++; + cm.info_off_arr[map_idx] = cu_info_off; + cm.voff_range_arr[map_idx] = voff_ranges; + } + + scratch_end(scratch); + return cm; +} + +internal RDIM_Rng1U64List +d2r_voff_ranges_from_cu_info_off(D2R_CompUnitContribMap map, U64 info_off) +{ + RDIM_Rng1U64List voff_ranges = {0}; + U64 voff_list_idx = u64_array_bsearch(map.info_off_arr, map.count, info_off); + if (voff_list_idx < map.count) { + voff_ranges = map.voff_range_arr[voff_list_idx]; + } + return voff_ranges; +} + +internal RDIM_Scope * +d2r_push_scope(Arena *arena, RDIM_ScopeChunkList *scopes, U64 scope_chunk_cap, D2R_TagNode *tag_stack, Rng1U64List ranges) +{ + // fill out scope + RDIM_Scope *scope = rdim_scope_chunk_list_push(arena, scopes, scope_chunk_cap); + + // push ranges + for (Rng1U64Node *i = ranges.first; i != 0; i = i->next) { + rdim_scope_push_voff_range(arena, scopes, scope, (RDIM_Rng1U64){.min = i->v.min, i->v.max}); + } + + // associate scope with tag + tag_stack->scope = scope; + + // update scope hierarchy + DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; + if (parent_tag_kind == DW_Tag_SubProgram || parent_tag_kind == DW_Tag_InlinedSubroutine || parent_tag_kind == DW_Tag_LexicalBlock) { + RDIM_Scope *parent = tag_stack->next->scope; + + scope->parent_scope = tag_stack->next->scope; + + if (parent->last_child) { + parent->last_child->next_sibling = scope; + } + + SLLQueuePush_N(parent->first_child, parent->last_child, scope, next_sibling); + } + + // propagate scope symbol + if (tag_stack->cur_node->tag.kind == DW_Tag_LexicalBlock) { + scope->symbol = tag_stack->next->scope->symbol; + } + + return scope; +} + +internal RDIM_BakeParams * +d2r_convert(Arena *arena, RDIM_HelpState *help_state, RC_Context *in) +{ + Temp scratch = scratch_begin(&arena, 1); + + //////////////////////////////// + + ProfBegin("compute exe hash"); + U64 exe_hash = rdi_hash(in->image_data.str, in->image_data.size); + ProfEnd(); + + //////////////////////////////// + + RDI_Arch arch = RDI_Arch_NULL; + RDIM_BinarySectionList binary_sections = {0}; + RDIM_TopLevelInfo top_level_info = {0}; + + U64 image_base = 0; + DW_Input input = {0}; + DW_ListUnitInput lui = {0}; + + if (in->image == Image_CoffPe) { + PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, in->image_data); + + // convert arch + switch (pe.arch) { + case Arch_Null: arch = RDI_Arch_NULL; break; + case Arch_x64: arch = RDI_Arch_X64; break; + case Arch_x86: arch = RDI_Arch_X86; break; + default: NotImplemented; break; + } + + // get image base + image_base = pe.image_base; + + // get COFF sections + String8 raw_sections = str8_substr(in->image_data, rng_1u64(pe.section_array_off, pe.section_array_off+sizeof(COFF_SectionHeader)*pe.section_count)); + U64 section_count = raw_sections.size / sizeof(COFF_SectionHeader); + COFF_SectionHeader *section_array = (COFF_SectionHeader *)raw_sections.str; + + // convert sections & top level info + binary_sections = c2r_rdi_binary_sections_from_coff_sections(arena, in->image_data, pe.string_table_off, section_count, section_array); + top_level_info = c2r_make_rdim_top_level_info(in->image_name, arch, exe_hash, section_count, section_array); + + // find DWARF sections + input = dw_input_from_coff_section_table(scratch.arena, in->image_data, pe.string_table_off, section_count, section_array); + } else if (in->image == Image_Elf32 || in->image == Image_Elf64) { + ELF_BinInfo elf = elf_bin_from_data(in->debug_data); + + // get image base + image_base = elf_base_addr_from_bin(&elf.hdr); + + // convert arch + switch (elf.hdr.e_machine) { + case ELF_MachineKind_None: arch = RDI_Arch_NULL; break; + case ELF_MachineKind_X86_64: arch = RDI_Arch_X64; break; + case ELF_MachineKind_386: arch = RDI_Arch_X86; break; + default: NotImplemented; break; + } + + ELF_Shdr64Array shdrs = elf_shdr64_array_from_bin(scratch.arena, in->debug_data, &elf.hdr); + + // convert sections & top level info + binary_sections = e2r_rdi_binary_sections_from_elf_section_table(arena, shdrs); + top_level_info = e2r_make_rdim_top_level_info(in->debug_data, exe_hash, shdrs); + + // find DWARF sections + input = dw_input_from_elf_section_table(scratch.arena, in->debug_data, &elf); + } else { + InvalidPath; + } + + //////////////////////////////// + + U64 arch_addr_size = rdi_addr_size_from_arch(arch); + + //////////////////////////////// + + static const U64 UNIT_CHUNK_CAP = 256; + static const U64 UDT_CHUNK_CAP = 256; + static const U64 TYPE_CHUNK_CAP = 256; + static const U64 GVAR_CHUNK_CAP = 256; + static const U64 TVAR_CHUNK_CAP = 256; + static const U64 PROC_CHUNK_CAP = 256; + static const U64 SCOPE_CHUNK_CAP = 256; + static const U64 INLINE_SITE_CHUNK_CAP = 256; + static const U64 SRC_FILE_CAP = 256; + static const U64 LINE_TABLE_CAP = 256; + static const U64 CALL_SITE_CHUNK_CAP = 256; + + RDIM_UnitChunkList units = {0}; + RDIM_UDTChunkList udts = {0}; + RDIM_TypeChunkList types = {0}; + RDIM_SymbolChunkList gvars = {0}; + RDIM_SymbolChunkList tvars = {0}; + RDIM_SymbolChunkList procs = {0}; + RDIM_ScopeChunkList scopes = {0}; + RDIM_InlineSiteChunkList inline_sites = {0}; + RDIM_SrcFileChunkList src_files = {0}; + RDIM_LineTableChunkList line_tables = {0}; + + //////////////////////////////// + + RDIM_Scope *global_scope = rdim_scope_chunk_list_push(arena, &scopes, SCOPE_CHUNK_CAP); + + //////////////////////////////// + + ProfBegin("Make Unit Contrib Map"); + D2R_CompUnitContribMap cu_contrib_map = {0}; + if (input.sec[DW_Section_ARanges].data.size > 0) { + cu_contrib_map = d2r_cu_contrib_map_from_aranges(arena, &input, image_base); + } else { + // TODO: synthesize cu ranges from scopes + NotImplemented; + } + ProfEnd(); + + ProfBegin("Parse Comop Unit Ranges"); + DW_ListUnitInput lu_input = dw_list_unit_input_from_input(scratch.arena, &input); + Rng1U64List cu_range_list = dw_unit_ranges_from_data(scratch.arena, input.sec[DW_Section_Info].data); + Rng1U64Array cu_ranges = rng1u64_array_from_list(scratch.arena, &cu_range_list); + ProfEnd(); + + //////////////////////////////// + + ProfBegin("Parse Compile Unit Headers"); + B32 is_parse_relaxed = !(in->flags & RC_Flag_StrictDwarfParse); + DW_CompUnit *cu_arr = push_array(scratch.arena, DW_CompUnit, cu_ranges.count); + for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { + cu_arr[cu_idx] = dw_cu_from_info_off(scratch.arena, &input, lu_input, cu_ranges.v[cu_idx].min, is_parse_relaxed); + } + ProfEnd(); + + //////////////////////////////// + + ProfBegin("Parse Line Tables"); + DW_LineTableParseResult *cu_line_tables = push_array(scratch.arena, DW_LineTableParseResult, cu_ranges.count); + for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { + DW_CompUnit *cu = &cu_arr[cu_idx]; + String8 cu_stmt_list = dw_line_ptr_from_attrib(&input, cu, cu->tag, DW_Attrib_StmtList); + String8 cu_dir = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_CompDir); + String8 cu_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_Name); + cu_line_tables[cu_idx] = dw_parsed_line_table_from_data(scratch.arena, cu_stmt_list, &input, cu_dir, cu_name, cu->address_size, cu->str_offsets_lu); + } + ProfEnd(); + + //////////////////////////////// + + ProfBegin("Convert Line Tables"); + + HashTable *source_file_ht = hash_table_init(scratch.arena, 0x4000); + RDIM_LineTable **cu_line_tables_rdi = push_array(scratch.arena, RDIM_LineTable *, cu_ranges.count); + + for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { + cu_line_tables_rdi[cu_idx] = rdim_line_table_chunk_list_push(arena, &line_tables, LINE_TABLE_CAP); + + DW_LineTableParseResult *line_table = &cu_line_tables[cu_idx]; + DW_LineVMFileArray *dir_table = &line_table->vm_header.dir_table; + DW_LineVMFileArray *file_table = &line_table->vm_header.file_table; + RDIM_SrcFile **src_file_map = push_array(scratch.arena, RDIM_SrcFile *, file_table->count); + for (U64 file_idx = 0; file_idx < file_table->count; ++file_idx) { + DW_LineFile *file = &file_table->v[file_idx]; + String8 file_path = dw_path_from_file_idx(scratch.arena, &line_table->vm_header, file_idx); + String8List file_path_split = str8_split_path(scratch.arena, file_path); + str8_path_list_resolve_dots_in_place(&file_path_split, PathStyle_WindowsAbsolute); + String8 file_path_resolved = str8_path_list_join_by_style(scratch.arena, &file_path_split, PathStyle_WindowsAbsolute); + String8 file_path_normalized = lower_from_str8(scratch.arena, file_path_resolved); + RDIM_SrcFile *src_file = hash_table_search_path_raw(source_file_ht, file_path_normalized); + if (src_file == 0) { + src_file = rdim_src_file_chunk_list_push(arena, &src_files, SRC_FILE_CAP); + src_file->normal_full_path = push_str8_copy(arena, file_path_normalized); + hash_table_push_path_raw(scratch.arena, source_file_ht, src_file->normal_full_path, src_file); + } + src_file_map[file_idx] = src_file; + } + + for (DW_LineSeqNode *line_seq = line_table->first_seq; line_seq != 0; line_seq = line_seq->next) { + if (line_seq->count == 0) { + continue; + } + + U64 *voffs = push_array(arena, U64, line_seq->count); + U32 *line_nums = push_array(arena, U32, line_seq->count); + U16 *col_nums = 0; + U64 line_idx = 0; + + DW_LineNode *file_line_n = line_seq->first; + U64 file_line_count = 0; + + for (DW_LineNode *line_n = file_line_n; line_n != 0; line_n = line_n->next) { + if (file_line_n->v.file_index != line_n->v.file_index || line_n->next == 0) { + U64 file_index = file_line_n->v.file_index; + U64 *file_voffs = &voffs[line_idx]; + U32 *file_line_nums = &line_nums[line_idx]; + U16 *file_col_nums = 0; + + U64 lines_written = 0; + U64 prev_ln = max_U64; + DW_LineNode *sentinel = line_n->v.file_index != file_line_n->v.file_index ? line_n : 0; + for (; file_line_n != sentinel; file_line_n = file_line_n->next) { + if (file_line_n->v.line != prev_ln) { + // TODO: error handling + AssertAlways(file_line_n->v.address >= image_base); + + voffs[line_idx] = file_line_n->v.address - image_base; + line_nums[line_idx] = file_line_n->v.line; + + ++lines_written; + ++line_idx; + + prev_ln = file_line_n->v.line; + } + } + + RDIM_SrcFile *src_file = src_file_map[file_index]; + RDIM_LineSequence *line_seq = rdim_line_table_push_sequence(arena, &line_tables, cu_line_tables_rdi[cu_idx], src_file, file_voffs, file_line_nums, file_col_nums, lines_written); + rdim_src_file_push_line_sequence(arena, &src_files, src_file, line_seq); + + file_line_count = 1; + } else { + ++file_line_count; + } + } + + // handle last line + if (file_line_n) { + U64 file_index = file_line_n->v.file_index; + U64 *file_voffs = &voffs[line_idx]; + U32 *file_line_nums = &line_nums[line_idx]; + U16 *file_col_nums = 0; + + for (; file_line_n != 0; file_line_n = file_line_n->next, ++line_idx) { + // TODO: error handling + AssertAlways(file_line_n->v.address >= image_base); + voffs[line_idx] = file_line_n->v.address - image_base; + line_nums[line_idx] = file_line_n->v.line; + } + + RDIM_SrcFile *src_file = src_file_map[file_index]; + RDIM_LineSequence *line_seq = rdim_line_table_push_sequence(arena, &line_tables, cu_line_tables_rdi[cu_idx], src_file, file_voffs, file_line_nums, file_col_nums, file_line_count); + rdim_src_file_push_line_sequence(arena, &src_files, src_file, line_seq); + } + + //Assert(line_idx == line_seq->count); + } + } + + ProfEnd(); + + //////////////////////////////// + + ProfBegin("Convert Units"); + + for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { + Temp comp_temp = temp_begin(scratch.arena); + + DW_CompUnit *cu = &cu_arr[cu_idx]; + + // parse and build tag tree + DW_TagTree tag_tree = dw_tag_tree_from_cu(comp_temp.arena, &input, cu); + + // build tag hash table for abstract origin resolution + cu->tag_ht = dw_make_tag_hash_table(comp_temp.arena, tag_tree); + + String8 dwo_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_DwoName); + String8 gnu_dwo_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_GNU_DwoName); + if (dwo_name.size || gnu_dwo_name.size || cu->dwo_id) { + // TODO: report that we dont support DWO + continue; + } + + // get unit's contribution ranges + RDIM_Rng1U64List cu_voff_ranges = d2r_voff_ranges_from_cu_info_off(cu_contrib_map, cu_ranges.v[cu_idx].min); + + String8 cu_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_Name); + String8 cu_dir = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_CompDir); + String8 cu_prod = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_Producer); + DW_Language cu_lang = dw_const_u64_from_attrib(&input, cu, cu->tag, DW_Attrib_Language); + + RDIM_Unit *unit = rdim_unit_chunk_list_push(arena, &units, UNIT_CHUNK_CAP); + unit->unit_name = cu_name; + unit->compiler_name = cu_prod; + unit->source_file = str8_zero(); + unit->object_file = str8_zero(); + unit->archive_file = str8_zero(); + unit->build_path = cu_dir; + unit->language = rdi_language_from_dw_language(cu_lang); + unit->line_table = cu_line_tables_rdi[cu_idx]; + unit->voff_ranges = cu_voff_ranges; + + D2R_TypeTable *type_table = push_array(comp_temp.arena, D2R_TypeTable, 1); + type_table->ht = hash_table_init(comp_temp.arena, 0x4000); + type_table->types = &types; + type_table->type_chunk_cap = TYPE_CHUNK_CAP; + type_table->void_type = d2r_create_type(arena, type_table); + type_table->void_type->kind = RDI_TypeKind_Void; + type_table->varg_type = d2r_create_type(arena, type_table); + type_table->varg_type->kind = RDI_TypeKind_Variadic; + + D2R_TagNode *free_tags = push_array(comp_temp.arena, D2R_TagNode, 1); + D2R_TagNode *tag_stack = push_array(comp_temp.arena, D2R_TagNode, 1); + tag_stack->cur_node = tag_tree.root; + + while (tag_stack) { + while (tag_stack->cur_node) { + DW_TagNode *cur_node = tag_stack->cur_node; + DW_Tag tag = cur_node->tag; + B32 visit_children = 1; + + switch (tag.kind) { + case DW_Tag_Null: { + InvalidPath; + } break; + case DW_Tag_ClassType: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + + B32 is_decl = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_Declaration); + if (is_decl) { + type->kind = RDI_TypeKind_IncompleteClass; + + Assert(!cur_node->first_child); + visit_children = 0; + } else { + RDIM_UDT *udt = rdim_udt_chunk_list_push(arena, &udts, UDT_CHUNK_CAP); + udt->self_type = type; + + type->kind = RDI_TypeKind_Class; + type->byte_size = dw_byte_size_32_from_tag(&input, cu, tag); + type->udt = udt; + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + tag_stack->type = type; + } + } break; + case DW_Tag_StructureType: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + + B32 is_decl = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_Declaration); + if (is_decl) { + type->kind = RDI_TypeKind_IncompleteStruct; + + // TODO: error handling + Assert(!cur_node->first_child); + visit_children = 0; + } else { + RDIM_UDT *udt = rdim_udt_chunk_list_push(arena, &udts, UDT_CHUNK_CAP); + udt->self_type = type; + + type->kind = RDI_TypeKind_Struct; + type->udt = udt; + type->byte_size = dw_byte_size_32_from_tag(&input, cu, tag); + + tag_stack->type = type; + } + } break; + case DW_Tag_UnionType: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + + B32 is_decl = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_Declaration); + if (is_decl) { + type->kind = RDI_TypeKind_IncompleteUnion; + + // TODO: error handling + Assert(!cur_node->first_child); + visit_children = 0; + } else { + RDIM_UDT *udt = rdim_udt_chunk_list_push(arena, &udts, UDT_CHUNK_CAP); + udt->self_type = type; + + type->kind = RDI_TypeKind_Union; + type->byte_size = dw_byte_size_32_from_tag(&input, cu, tag); + type->udt = udt; + + tag_stack->type = type; + } + } break; + case DW_Tag_EnumerationType: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + + B32 is_decl = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_Declaration); + if (is_decl) { + type->kind = RDI_TypeKind_IncompleteEnum; + + // TODO: error handling + Assert(!cur_node->first_child); + visit_children = 0; + } else { + RDIM_UDT *udt = rdim_udt_chunk_list_push(arena, &udts, UDT_CHUNK_CAP); + udt->self_type = type; + + type->kind = RDI_TypeKind_Enum; + type->byte_size = dw_byte_size_32_from_tag(&input, cu, tag); + type->udt = udt; + + tag_stack->type = type; + } + } break; + case DW_Tag_SubroutineType: { + // collect parameters + RDIM_TypeList param_list = {0}; + for (DW_TagNode *n = cur_node->first_child; n != 0; n = n->sibling) { + if (n->tag.kind == DW_Tag_FormalParameter) { + RDIM_Type *param_type = d2r_type_from_attrib(arena, type_table, &input, cu, n->tag, DW_Attrib_Type); + rdim_type_list_push(comp_temp.arena, ¶m_list, param_type); + } else if (n->tag.kind == DW_Tag_UnspecifiedParameters) { + rdim_type_list_push(comp_temp.arena, ¶m_list, type_table->varg_type); + } else { + // TODO: error handling + AssertAlways(!"unexpected tag"); + } + } + + // init proceudre type + RDIM_Type *ret_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Function; + type->byte_size = arch_addr_size; + type->direct_type = ret_type; + type->count = param_list.count; + type->param_types = rdim_array_from_type_list(arena, param_list); + + visit_children = 0; + } break; + case DW_Tag_Typedef: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Alias; + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + } break; + case DW_Tag_BaseType: { + DW_ATE encoding = dw_const_u64_from_attrib(&input, cu, tag, DW_Attrib_Encoding); + U64 byte_size = dw_byte_size_from_tag(&input, cu, tag); + + // convert base type encoding to RDI version + RDI_TypeKind kind = RDI_TypeKind_NULL; + switch (encoding) { + case DW_ATE_Null: kind = RDI_TypeKind_NULL; break; + case DW_ATE_Address: kind = RDI_TypeKind_Void; break; + case DW_ATE_Boolean: kind = RDI_TypeKind_Bool; break; + case DW_ATE_ComplexFloat: { + switch (byte_size) { + case 4: kind = RDI_TypeKind_ComplexF32; break; + case 8: kind = RDI_TypeKind_ComplexF64; break; + case 10: kind = RDI_TypeKind_ComplexF80; break; + case 16: kind = RDI_TypeKind_ComplexF128; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_Float: { + switch (byte_size) { + case 2: kind = RDI_TypeKind_F16; break; + case 4: kind = RDI_TypeKind_F32; break; + case 6: kind = RDI_TypeKind_F48; break; + case 8: kind = RDI_TypeKind_F64; break; + case 16: kind = RDI_TypeKind_F128; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_Signed: { + switch (byte_size) { + case 1: kind = RDI_TypeKind_S8; break; + case 2: kind = RDI_TypeKind_S16; break; + case 4: kind = RDI_TypeKind_S32; break; + case 8: kind = RDI_TypeKind_S64; break; + case 16: kind = RDI_TypeKind_S128; break; + case 32: kind = RDI_TypeKind_S256; break; + case 64: kind = RDI_TypeKind_S512; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_SignedChar: { + switch (byte_size) { + case 1: kind = RDI_TypeKind_Char8; break; + case 2: kind = RDI_TypeKind_Char16; break; + case 4: kind = RDI_TypeKind_Char32; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_Unsigned: { + switch (byte_size) { + case 1: kind = RDI_TypeKind_U8; break; + case 2: kind = RDI_TypeKind_U16; break; + case 4: kind = RDI_TypeKind_U32; break; + case 8: kind = RDI_TypeKind_U64; break; + case 16: kind = RDI_TypeKind_U128; break; + case 32: kind = RDI_TypeKind_U256; break; + case 64: kind = RDI_TypeKind_U512; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_UnsignedChar: { + switch (byte_size) { + case 1: kind = RDI_TypeKind_UChar8; break; + case 2: kind = RDI_TypeKind_UChar16; break; + case 4: kind = RDI_TypeKind_UChar32; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_ImaginaryFloat: { + NotImplemented; + } break; + case DW_ATE_PackedDecimal: { + NotImplemented; + } break; + case DW_ATE_NumericString: { + NotImplemented; + } break; + case DW_ATE_Edited: { + NotImplemented; + } break; + case DW_ATE_SignedFixed: { + NotImplemented; + } break; + case DW_ATE_UnsignedFixed: { + NotImplemented; + } break; + case DW_ATE_DecimalFloat: { + NotImplemented; + } break; + case DW_ATE_Utf: { + NotImplemented; + } break; + case DW_ATE_Ucs: { + NotImplemented; + } break; + case DW_ATE_Ascii: { + NotImplemented; + } break; + default: AssertAlways(!"unexpected base type encoding"); break; // TODO: error handling + } + + RDIM_Type *base_type = d2r_create_type(arena, type_table); + base_type->kind = kind; + base_type->byte_size = byte_size; + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Alias; + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + type->direct_type = base_type; + } break; + case DW_Tag_PointerType: { + RDIM_Type *direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + // TODO: + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Allocated)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Associated)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Alignment)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Name)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_AddressClass)); + + U64 byte_size = arch_addr_size; + if (cu->version == DW_Version_5 || cu->relaxed) { + dw_try_byte_size_from_tag(&input, cu, tag, &byte_size); + } + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Ptr; + type->byte_size = byte_size; + type->direct_type = direct_type; + } break; + case DW_Tag_RestrictType: { + // TODO: + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Alignment)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Name)); + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Modifier; + type->byte_size = arch_addr_size; + type->flags = RDI_TypeModifierFlag_Restrict; + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + } break; + case DW_Tag_VolatileType: { + // TODO: + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Name)); + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Modifier; + type->byte_size = arch_addr_size; + type->flags = RDI_TypeModifierFlag_Volatile; + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + } break; + case DW_Tag_ConstType: { + // TODO: + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Name)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Alignment)); + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Modifier; + type->byte_size = arch_addr_size; + type->flags = RDI_TypeModifierFlag_Const; + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + } break; + case DW_Tag_ArrayType: { + // * DWARF vs RDI Array Type Graph * + // + // For example lets take following decl: + // + // int (*foo[2])[3][4]; + // + // This compiles to in DWARF: + // + // foo -> DW_TAG_ArrayType -> (A0) DW_TAG_Subrange [2] + // \ + // -> (B0) DW_TAG_PointerType -> (A1) DW_TAG_ArrayType -> DW_TAG_Subrange [3] -> DW_Tag_Subrange [4] + // \ + // -> (B1) DW_TAG_BaseType (int) + // + // RDI expects: + // + // foo -> Array (2) -> Pointer -> Array (3) -> Array (4) -> int + // + // Note that DWARF forks the graph on DW_TAG_ArrayType to describe array ranges in branch A and + // in branch B describes array type which might be a struct, pointer, base type, or any other type tag. + // However, in RDI we have a simple list of type nodes and to convert we need to append type nodes from + // B to A. + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Array; + type->direct_type = 0; + + U64 subrange_count = 0; + RDIM_Type *t = type; + for (DW_TagNode *n = cur_node->first_child; n != 0; n = n->sibling) { + if (n->tag.kind != DW_Tag_SubrangeType) { + // TODO: error handling + AssertAlways(!"unexpected tag"); + continue; + } + + if (subrange_count > 0) { + // init array type node + RDIM_Type *s = d2r_create_type(arena, type_table); + s->kind = RDI_TypeKind_Array; + s->direct_type = 0; + + // append new array type node + t->direct_type = s; + t = s; + } + + // resolve array lower bound + U64 lower_bound = 0; + if (dw_tag_has_attrib(&input, cu, n->tag, DW_Attrib_LowerBound)) { + lower_bound = dw_u64_from_attrib(&input, cu, n->tag, DW_Attrib_LowerBound); + } else { + lower_bound = dw_pick_default_lower_bound(cu_lang); + } + + // resolve array upper bound + U64 upper_bound = 0; + if (dw_tag_has_attrib(&input, cu, n->tag, DW_Attrib_Count)) { + U64 count = dw_u64_from_attrib(&input, cu, n->tag, DW_Attrib_Count); + upper_bound = lower_bound + count; + } else if (dw_tag_has_attrib(&input, cu, n->tag, DW_Attrib_UpperBound)) { + upper_bound = dw_u64_from_attrib(&input, cu, n->tag, DW_Attrib_UpperBound); + // turn upper bound into exclusive range + upper_bound += 1; + } else { + // zero size array + } + + t->count = upper_bound - lower_bound; + ++subrange_count; + } + + Assert(t->direct_type == 0); + t->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + visit_children = 0; + } break; + case DW_Tag_SubrangeType: { + // TODO: error handling + AssertAlways(!"unexpected tag"); + } break; + case DW_Tag_Inheritance: { + DW_TagNode *parent_node = tag_stack->next->cur_node; + if (parent_node->tag.kind != DW_Tag_StructureType && + parent_node->tag.kind != DW_Tag_ClassType) { + // TODO: error handling + AssertAlways(!"unexpected parent tag"); + } + + RDIM_Type *parent = tag_stack->next->type; + RDIM_UDTMember *member = rdim_udt_push_member(arena, &udts, parent->udt); + member->kind = RDI_MemberKind_Base; + member->type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + member->off = safe_cast_u32(dw_const_u32_from_attrib(&input, cu, tag, DW_Attrib_DataMemberLocation)); + } break; + case DW_Tag_Enumerator: { + DW_TagNode *parent_node = tag_stack->next->cur_node; + if (parent_node->tag.kind != DW_Tag_EnumerationType) { + // TODO: error handling + AssertAlways(!"unexpected parent tag"); + } + + RDIM_Type *type = tag_stack->next->type; + RDIM_UDTEnumVal *member = rdim_udt_push_enum_val(arena, &udts, type->udt); + member->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + member->val = dw_const_u64_from_attrib(&input, cu, tag, DW_Attrib_ConstValue); + } break; + case DW_Tag_Member: { + DW_TagNode *parent_node = tag_stack->next->cur_node; + if (parent_node->tag.kind != DW_Tag_StructureType && + parent_node->tag.kind != DW_Tag_ClassType && + parent_node->tag.kind != DW_Tag_UnionType && + parent_node->tag.kind != DW_Tag_EnumerationType) { + // TODO: error handling + AssertAlways(!"unexpected parent tag"); + } + + DW_Attrib *data_member_location = dw_attrib_from_tag(&input, cu, tag, DW_Attrib_DataMemberLocation); + DW_AttribClass data_member_location_class = dw_value_class_from_attrib(cu, data_member_location); + if (data_member_location_class == DW_AttribClass_LocList) { + AssertAlways(!"UDT member with multiple locations are not supported"); + } + + RDIM_Type *type = tag_stack->next->type; + RDIM_UDTMember *member = rdim_udt_push_member(arena, &udts, type->udt); + member->kind = RDI_MemberKind_DataField; + member->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + member->type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + member->off = dw_const_u64_from_attrib(&input, cu, tag, DW_Attrib_DataMemberLocation); + } break; + case DW_Tag_SubProgram: { + DW_InlKind inl = dw_u64_from_attrib(&input, cu, tag, DW_Attrib_Inline); + switch (inl) { + case DW_Inl_NotInlined: { + U64 param_count = 0; + RDIM_Type **params = d2r_collect_proc_params(arena, type_table, &input, cu, cur_node, ¶m_count); + + // get return type + RDIM_Type *ret_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + // fill out proc type + RDIM_Type *proc_type = d2r_create_type(arena, type_table); + proc_type->kind = RDI_TypeKind_Function; + proc_type->byte_size = arch_addr_size; + proc_type->direct_type = ret_type; + proc_type->count = param_count; + proc_type->param_types = params; + + // get container type + RDIM_Type *container_type = 0; + if (dw_tag_has_attrib(&input, cu, tag, DW_Attrib_ContainingType)) { + container_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_ContainingType); + } + + // get frame base expression + String8 frame_base_expr = dw_exprloc_from_attrib(&input, cu, tag, DW_Attrib_FrameBase); + + // get proc container symbol + RDIM_Symbol *proc = rdim_symbol_chunk_list_push(arena, &procs, PROC_CHUNK_CAP ); + + // make scope + Rng1U64List ranges = d2r_range_list_from_tag(comp_temp.arena, &input, cu, image_base, tag); + RDIM_Scope *root_scope = d2r_push_scope(arena, &scopes, SCOPE_CHUNK_CAP, tag_stack, ranges); + root_scope->symbol = proc; + + // fill out proc + proc->is_extern = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_External); + proc->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + proc->link_name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_LinkageName); + proc->type = proc_type; + proc->container_symbol = 0; + proc->container_type = container_type; + proc->root_scope = root_scope; + proc->frame_base = d2r_locset_from_attrib(arena, &input, cu, &scopes, root_scope, image_base, arch, tag, DW_Attrib_FrameBase); + + // sub program with user-defined parent tag is a method + DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; + if (parent_tag_kind == DW_Tag_ClassType || parent_tag_kind == DW_Tag_StructureType) { + RDI_MemberKind member_kind = RDI_MemberKind_NULL; + DW_VirtualityKind virtuality = dw_const_u64_from_attrib(&input, cu, tag, DW_Attrib_Virtuality); + switch (virtuality) { + case DW_VirtualityKind_None: member_kind = RDI_MemberKind_Method; break; + case DW_VirtualityKind_Virtual: member_kind = RDI_MemberKind_VirtualMethod; break; + case DW_VirtualityKind_PureVirtual: member_kind = RDI_MemberKind_VirtualMethod; break; // TODO: create kind for pure virutal + default: InvalidPath; break; + } + + RDIM_Type *type = tag_stack->next->type; + RDIM_UDTMember *member = rdim_udt_push_member(arena, &udts, type->udt); + member->kind = member_kind; + member->type = type; + member->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + } else if (parent_tag_kind != DW_Tag_CompileUnit) { + AssertAlways(!"unexpected tag"); + } + + tag_stack->scope = root_scope; + } break; + case DW_Inl_DeclaredNotInlined: + case DW_Inl_DeclaredInlined: + case DW_Inl_Inlined: { + visit_children = 0; + } break; + default: InvalidPath; break; + } + } break; + case DW_Tag_InlinedSubroutine: { + U64 param_count = 0; + RDIM_Type **params = d2r_collect_proc_params(arena, type_table, &input, cu, tag_stack->cur_node, ¶m_count); + + // get return type + RDIM_Type *ret_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + // fill out proc type + RDIM_Type *proc_type = d2r_create_type(arena, type_table); + proc_type->kind = RDI_TypeKind_Function; + proc_type->byte_size = arch_addr_size; + proc_type->direct_type = ret_type; + proc_type->count = param_count; + proc_type->param_types = params; + + // get container type + RDIM_Type *owner = 0; + if (dw_tag_has_attrib(&input, cu, tag, DW_Attrib_ContainingType)) { + owner = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_ContainingType); + } + + // fill out inline site + RDIM_InlineSite *inline_site = rdim_inline_site_chunk_list_push(arena, &inline_sites, INLINE_SITE_CHUNK_CAP); + inline_site->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + inline_site->type = proc_type; + inline_site->owner = owner; + inline_site->line_table = 0; + + // make scope + Rng1U64List ranges = d2r_range_list_from_tag(comp_temp.arena, &input, cu, image_base, tag); + RDIM_Scope *root_scope = d2r_push_scope(arena, &scopes, SCOPE_CHUNK_CAP, tag_stack, ranges); + root_scope->inline_site = inline_site; + } break; + case DW_Tag_Variable: { + String8 name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + RDIM_Type *type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; + if (parent_tag_kind == DW_Tag_SubProgram || + parent_tag_kind == DW_Tag_InlinedSubroutine || + parent_tag_kind == DW_Tag_LexicalBlock) { + RDIM_Scope *scope = tag_stack->next->scope; + RDIM_Local *local = rdim_scope_push_local(arena, &scopes, tag_stack->next->scope); + local->kind = RDI_LocalKind_Variable; + local->name = name; + local->type = type; + local->locset = d2r_locset_from_attrib(arena, &input, cu, &scopes, scope, image_base, arch, tag, DW_Attrib_Location); + } else { + + // NOTE: due to a bug in clang in stb_sprint.h local variables + // are declared in global scope without a name + if (name.size == 0) { + break; + } + + RDIM_Symbol *gvar = rdim_symbol_chunk_list_push(arena, &gvars, GVAR_CHUNK_CAP); + gvar->is_extern = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_External); + gvar->name = name; + gvar->link_name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_LinkageName); + gvar->type = type; + //gvar->locset = d2r_locset_from_attrib(arena, &input, cu, &scopes, global_scope, image_base, arch, tag, DW_Attrib_Location); + gvar->container_symbol = 0; + gvar->container_type = 0; // TODO: NotImplemented; + } + } break; + case DW_Tag_FormalParameter: { + DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; + if (parent_tag_kind == DW_Tag_SubProgram || parent_tag_kind == DW_Tag_InlinedSubroutine) { + RDIM_Scope *scope = tag_stack->next->scope; + RDIM_Local *param = rdim_scope_push_local(arena, &scopes, scope); + param->kind = RDI_LocalKind_Parameter; + param->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + param->type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + param->locset = d2r_locset_from_attrib(arena, &input, cu, &scopes, scope, image_base, arch, tag, DW_Attrib_Location); + } else { + // TODO: error handling + AssertAlways(!"this is a local variable"); + } + } break; + case DW_Tag_LexicalBlock: { + if (tag_stack->next->cur_node->tag.kind == DW_Tag_SubProgram || + tag_stack->next->cur_node->tag.kind == DW_Tag_InlinedSubroutine || + tag_stack->next->cur_node->tag.kind == DW_Tag_LexicalBlock) { + Rng1U64List ranges = d2r_range_list_from_tag(comp_temp.arena, &input, cu, image_base, tag); + d2r_push_scope(arena, &scopes, SCOPE_CHUNK_CAP, tag_stack, ranges); + } + } break; + case DW_Tag_Label: + case DW_Tag_CompileUnit: + case DW_Tag_UnspecifiedParameters: + break; + default: NotImplemented; break; + } + + if (tag_stack->cur_node->first_child && visit_children) { + D2R_TagNode *frame = free_tags; + if (frame) { + SLLStackPop(free_tags); + MemoryZeroStruct(frame); + } else { + frame = push_array(scratch.arena, D2R_TagNode, 1); + } + frame->cur_node = tag_stack->cur_node->first_child; + SLLStackPush(tag_stack, frame); + } else { + tag_stack->cur_node = tag_stack->cur_node->sibling; + } + } + + // recycle free frame + D2R_TagNode *frame = tag_stack; + SLLStackPop(tag_stack); + SLLStackPush(free_tags, frame); + + if (tag_stack) { + tag_stack->cur_node = tag_stack->cur_node->sibling; + } + } + + temp_end(comp_temp); + } + + ProfEnd(); + + { + for (RDIM_TypeChunkNode *chunk_n = types.first; chunk_n != 0; chunk_n = chunk_n->next) { + for (U64 i = 0; i < chunk_n->count; ++i) { + RDIM_Type *type = &chunk_n->v[i]; + if (type->kind == RDI_TypeKind_Alias) { + for (RDIM_Type *t = type->direct_type; t != 0; t = t->direct_type) { + if (t->byte_size != 0) { + type->byte_size = t->byte_size; + break; + } + } + } + } + } + } + + { + RDIM_TypeNode *type_stack = 0; + RDIM_TypeNode *free_types = 0; + + for (RDIM_TypeChunkNode *chunk_n = types.first; chunk_n != 0; chunk_n = chunk_n->next) { + for (U64 i = 0; i < chunk_n->count; ++i) { + RDIM_Type *type = &chunk_n->v[i]; + if (type->kind == RDI_TypeKind_Array) { + if (type->byte_size != 0) + continue; + + RDIM_Type *t; + for (t = type; t != 0 && t->kind == RDI_TypeKind_Array; t = t->direct_type) { + RDIM_TypeNode *f = free_types; + if (f == 0) { + f = push_array(scratch.arena, RDIM_TypeNode, 1); + } else { + SLLStackPop(free_types); + } + f->v = t; + SLLStackPush(type_stack, f); + } + + U64 base_type_size = 0; + if (t) { + base_type_size = t->byte_size; + } + + U64 array_size = base_type_size; + while (type_stack) { + if (type_stack->v->count) { + array_size *= type_stack->v->count; + } else { + array_size += type_stack->v->byte_size; + } + SLLStackPop(type_stack); + } + + type->count = 0; + type->byte_size = array_size; + + // recycle frames + free_types = type_stack; + type_stack = 0; + } + } + } + } + + //////////////////////////////// + + RDIM_BakeParams *bake_params = push_array(arena, RDIM_BakeParams, 1); + bake_params->top_level_info = top_level_info; + bake_params->binary_sections = binary_sections; + bake_params->units = units; + bake_params->types = types; + bake_params->udts = udts; + bake_params->src_files = src_files; + bake_params->line_tables = line_tables; + bake_params->global_variables = gvars; + bake_params->thread_variables = tvars; + bake_params->procedures = procs; + bake_params->scopes = scopes; + bake_params->inline_sites = inline_sites; + + scratch_end(scratch); + return bake_params; +} + +internal RDIM_BakeResults +d2r_bake(RDIM_HelpState *state, RDIM_BakeParams *in_params) +{ + return rdim_bake(state, in_params); +} + +internal RDIM_SerializedSectionBundle +d2r_compress(Arena *arena, RDIM_SerializedSectionBundle in) +{ + RDIM_SerializedSectionBundle result = {0}; + return result; +} + +internal RDI_Language +rdi_language_from_dw_language(DW_Language v) +{ + RDI_Language result = RDI_Language_NULL; + switch (v) { + case DW_Language_Null: result = RDI_Language_NULL; break; + + case DW_Language_C89: + case DW_Language_C99: + case DW_Language_C11: + case DW_Language_C: + result = RDI_Language_C; + break; + + case DW_Language_CPlusPlus03: + case DW_Language_CPlusPlus11: + case DW_Language_CPlusPlus14: + case DW_Language_CPlusPlus: + result = RDI_Language_CPlusPlus; + break; + + default: NotImplemented; break; + } + return result; +} + +internal RDI_RegCodeX86 +rdi_reg_from_dw_reg_x86(DW_RegX86 v) +{ + RDI_RegCodeX86 result = RDI_RegCode_nil; + switch (v) { +#define X(reg_dw, val_dw, reg_rdi, ...) case DW_RegX86_##reg_dw: result = RDI_RegCodeX86_##reg_rdi; break; + DW_Regs_X86_XList(X) +#undef X + default: NotImplemented; break; + } + return result; +} + +internal B32 +rdi_reg_from_dw_reg_x64(DW_RegX64 v, RDI_RegCodeX64 *code_out, U64 *off_out, U64 *size_out) +{ + RDI_RegCodeX64 result = RDI_RegCode_nil; + switch (v) { +#define X(reg_dw, val_dw, reg_rdi, off, size) case DW_RegX64_##reg_dw: result = RDI_RegCodeX64_##reg_rdi; *off_out = off; *size_out = size; break; + DW_Regs_X64_XList(X) +#undef X + default: NotImplemented; break; + } + return result; +} + +internal B32 +rdi_reg_from_dw_reg(Arch arch, DW_Reg v, RDI_RegCode *code_out, U64 *off_out, U64 *size_out) +{ + RDI_RegCode result = RDI_RegCode_nil; + switch (arch) { + case Arch_Null: break; + case Arch_x86: ; break; + case Arch_x64: return rdi_reg_from_dw_reg_x64(v, code_out, off_out, size_out); + default: NotImplemented; break; + } + return 0; +} + diff --git a/src/radcon/radcon_dwarf.h b/src/radcon/radcon_dwarf.h new file mode 100644 index 00000000..688e90e8 --- /dev/null +++ b/src/radcon/radcon_dwarf.h @@ -0,0 +1,43 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef RADCON_DWARF_H +#define RADCON_DWARF_H + +typedef struct D2R_TypeTable +{ + HashTable *ht; + RDIM_TypeChunkList *types; + U64 type_chunk_cap; + RDIM_Type *void_type; + RDIM_Type *varg_type; +} D2R_TypeTable; + +typedef struct D2R_TagNode +{ + struct D2R_TagNode *next; + DW_TagNode *cur_node; + RDIM_Type *type; + RDIM_Scope *scope; +} D2R_TagNode; + +typedef struct D2R_CompUnitContribMap +{ + U64 count; + U64 *info_off_arr; + RDIM_Rng1U64List *voff_range_arr; +} D2R_CompUnitContribMap; + +//////////////////////////////// + +internal RDIM_BakeParams * d2r_convert(Arena *arena, RDIM_HelpState *help_state, RC_Context *in); + +//////////////////////////////// + +internal RDI_Language rdi_language_from_dw_language(DW_Language v); +internal RDI_RegCodeX86 rdi_reg_from_dw_reg_x86(DW_RegX86 v); +internal B32 rdi_reg_from_dw_reg_x64(DW_RegX64 v, RDI_RegCodeX64 *code_out, U64 *off_out, U64 *size_out); +internal B32 rdi_reg_from_dw_reg(Arch arch, DW_Reg v, RDI_RegCode *code_out, U64 *off_out, U64 *size_out); + +#endif // RADCON_DWARF_H + diff --git a/src/radcon/radcon_elf.c b/src/radcon/radcon_elf.c new file mode 100644 index 00000000..6a262ee2 --- /dev/null +++ b/src/radcon/radcon_elf.c @@ -0,0 +1,17 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +internal RDIM_BinarySectionList +e2r_rdi_binary_sections_from_elf_section_table(Arena *arena, ELF_Shdr64Array shdrs) +{ + RDIM_BinarySectionList result = {0}; + return result; +} + +internal RDIM_TopLevelInfo +e2r_make_rdim_top_level_info(String8 image_name, RDI_Arch arch, ELF_Shdr64Array shdrs) +{ + RDIM_TopLevelInfo top_level_info = {0}; + return top_level_info; +} + diff --git a/src/radcon/radcon_elf.h b/src/radcon/radcon_elf.h new file mode 100644 index 00000000..bcf40e08 --- /dev/null +++ b/src/radcon/radcon_elf.h @@ -0,0 +1,10 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef RADCON_ELF_H +#define RADCON_ELF_H + +internal RDIM_BinarySectionList e2r_rdi_binary_sections_from_elf_section_table(Arena *arena, ELF_Shdr64Array shdrs); +internal RDIM_TopLevelInfo e2r_make_rdim_top_level_info(String8 image_name, RDI_Arch arch, ELF_Shdr64Array shdrs); + +#endif // RADCON_ELF_H diff --git a/src/radcon/radcon_main.c b/src/radcon/radcon_main.c new file mode 100644 index 00000000..584877f4 --- /dev/null +++ b/src/radcon/radcon_main.c @@ -0,0 +1,97 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#define BUILD_TITLE "Epic Games Tools (R) RAD Debug Info Converter" +#define BUILD_CONSOLE_INTERFACE 1 + +//////////////////////////////// +// Third Party + +#include "third_party/rad_lzb_simple/rad_lzb_simple.h" +#include "third_party/rad_lzb_simple/rad_lzb_simple.c" +#define XXH_STATIC_LINKING_ONLY +#include "third_party/xxHash/xxhash.c" +#include "third_party/xxHash/xxhash.h" +#define SINFL_IMPLEMENTATION +#include "third_party/sinfl/sinfl.h" + +//////////////////////////////// +// RDI Format Library + +#include "lib_rdi_format/rdi_format.h" +#include "lib_rdi_format/rdi_format.c" + +//////////////////////////////// +// Headers + +#include "base/base_inc.h" +#include "os/os_inc.h" +#include "async/async.h" +#include "path/path.h" +#include "rdi_make/rdi_make_local.h" +#include "rdi_make/rdi_make_help.h" +#include "linker/hash_table.h" +#include "coff/coff.h" +#include "coff/coff_parse.h" +#include "pe/pe.h" +#include "elf/elf.h" +#include "elf/elf_parse.h" +#include "codeview/codeview.h" +#include "codeview/codeview_parse.h" +#include "dwarf/dwarf.h" +#include "dwarf/dwarf_parse.h" +#include "dwarf/dwarf_coff.h" +#include "dwarf/dwarf_elf.h" +#include "msf/msf.h" +#include "msf/msf_parse.h" +#include "pdb/pdb.h" +#include "pdb/pdb_parse.h" +#include "pdb/pdb_stringize.h" +#include "radcon.h" +#include "radcon_coff.h" +#include "radcon_elf.h" +#include "radcon_cv.h" +#include "radcon_dwarf.h" +#include "radcon_pdb.h" + +//////////////////////////////// +// Implementations + +#include "base/base_inc.c" +#include "os/os_inc.c" +#include "async/async.c" +#include "path/path.c" +#include "rdi_make/rdi_make_local.c" +#include "rdi_make/rdi_make_help.c" +#include "linker/hash_table.c" +#include "coff/coff.c" +#include "coff/coff_parse.c" +#include "pe/pe.c" +#include "elf/elf.c" +#include "elf/elf_parse.c" +#include "codeview/codeview.c" +#include "codeview/codeview_parse.c" +#include "msf/msf.c" +#include "msf/msf_parse.c" +#include "pdb/pdb.c" +#include "pdb/pdb_parse.c" +#include "pdb/pdb_stringize.c" +#include "dwarf/dwarf.c" +#include "dwarf/dwarf_parse.c" +#include "dwarf/dwarf_coff.c" +#include "dwarf/dwarf_elf.c" +#include "radcon.c" +#include "radcon_coff.c" +#include "radcon_elf.c" +#include "radcon_cv.c" +#include "radcon_dwarf.c" +#include "radcon_pdb.c" + +//////////////////////////////// + +internal void +entry_point(CmdLine *cmdl) +{ + rc_main(cmdl); +} + diff --git a/src/radcon/radcon_pdb.c b/src/radcon/radcon_pdb.c new file mode 100644 index 00000000..226e4405 --- /dev/null +++ b/src/radcon/radcon_pdb.c @@ -0,0 +1,3207 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +// TODO(rjf): eliminate redundant null checks, just always allocate +// empty results, and have nulls gracefully fall through +// +// (search for != 0 instances, inserted to prevent prior crashes) + +global RDIM_HelpState *g_p2r_help_state = 0; + +//////////////////////////////// +//~ rjf: Basic Helpers + +internal U64 +p2r_end_of_cplusplus_container_name(String8 str) +{ + // NOTE: This finds the index one past the last "::" contained in str. + // if no "::" is contained in str, then the returned index is 0. + // The intent is that [0,clamp_bot(0,result - 2)) gives the + // "container name" and [result,str.size) gives the leaf name. + U64 result = 0; + if(str.size >= 2) + { + for(U64 i = str.size; i >= 2; i -= 1) + { + if(str.str[i - 2] == ':' && str.str[i - 1] == ':') + { + result = i; + break; + } + } + } + return(result); +} + +internal U64 +p2r_hash_from_voff(U64 voff) +{ + U64 hash = (voff >> 3) ^ ((7 & voff) << 6); + return hash; +} + +//////////////////////////////// +//~ rjf: Location Info Building Helpers + +internal RDIM_Location * +p2r_location_from_addr_reg_off(Arena *arena, RDI_Arch arch, RDI_RegCode reg_code, U32 reg_byte_size, U32 reg_byte_pos, S64 offset, B32 extra_indirection) +{ + RDIM_Location *result = 0; + if(0 <= offset && offset <= (S64)max_U16) + { + if(extra_indirection) + { + result = rdim_push_location_addr_addr_reg_plus_u16(arena, reg_code, (U16)offset); + } + else + { + result = rdim_push_location_addr_reg_plus_u16(arena, reg_code, (U16)offset); + } + } + else + { + RDIM_EvalBytecode bytecode = {0}; + U32 regread_param = RDI_EncodeRegReadParam(reg_code, reg_byte_size, reg_byte_pos); + rdim_bytecode_push_op(arena, &bytecode, RDI_EvalOp_RegRead, regread_param); + rdim_bytecode_push_sconst(arena, &bytecode, offset); + rdim_bytecode_push_op(arena, &bytecode, RDI_EvalOp_Add, 0); + if(extra_indirection) + { + U64 addr_size = rdi_addr_size_from_arch(arch); + rdim_bytecode_push_op(arena, &bytecode, RDI_EvalOp_MemRead, addr_size); + } + result = rdim_push_location_addr_bytecode_stream(arena, &bytecode); + } + return result; +} + +internal void +p2r_location_over_lvar_addr_range(Arena *arena, RDIM_ScopeChunkList *scopes, RDIM_LocationSet *locset, RDIM_Location *location, CV_LvarAddrRange *range, COFF_SectionHeader *section, CV_LvarAddrGap *gaps, U64 gap_count) +{ + //- rjf: extract range info + U64 voff_first = 0; + U64 voff_opl = 0; + if(section != 0) + { + voff_first = section->voff + range->off; + voff_opl = voff_first + range->len; + } + + //- rjf: emit ranges + CV_LvarAddrGap *gap_ptr = gaps; + U64 voff_cursor = voff_first; + for(U64 i = 0; i < gap_count; i += 1, gap_ptr += 1) + { + U64 voff_gap_first = voff_first + gap_ptr->off; + U64 voff_gap_opl = voff_gap_first + gap_ptr->len; + if(voff_cursor < voff_gap_first) + { + RDIM_Rng1U64 voff_range = {voff_cursor, voff_gap_first}; + rdim_location_set_push_case(arena, scopes, locset, voff_range, location); + } + voff_cursor = voff_gap_opl; + } + + //- rjf: emit remaining range + if(voff_cursor < voff_opl) + { + RDIM_Rng1U64 voff_range = {voff_cursor, voff_opl}; + rdim_location_set_push_case(arena, scopes, locset, voff_range, location); + } +} + +//////////////////////////////// +//~ rjf: Initial Parsing & Preparation Pass Tasks + +ASYNC_WORK_DEF(p2r_exe_hash_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_EXEHashIn *in = (P2R_EXEHashIn *)input; + U64 *out = push_array(arena, U64, 1); + ProfScope("hash exe") *out = rdi_hash(in->exe_data.str, in->exe_data.size); + ProfEnd(); + return out; +} + +ASYNC_WORK_DEF(p2r_tpi_hash_parse_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_TPIHashParseIn *in = (P2R_TPIHashParseIn *)input; + void *out = 0; + ProfScope("parse tpi hash") out = pdb_tpi_hash_from_data(arena, in->strtbl, in->tpi, in->hash_data, in->aux_data); + ProfEnd(); + return out; +} + +ASYNC_WORK_DEF(p2r_tpi_leaf_parse_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_TPILeafParseIn *in = (P2R_TPILeafParseIn *)input; + void *out = 0; + ProfScope("parse tpi leaf") out = cv_leaf_from_data(arena, in->leaf_data, in->itype_first); + ProfEnd(); + return out; +} + +ASYNC_WORK_DEF(p2r_symbol_stream_parse_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_SymbolStreamParseIn *in = (P2R_SymbolStreamParseIn *)input; + void *out = 0; + ProfScope("parse symbol stream") out = cv_sym_from_data(arena, in->data, 4); + ProfEnd(); + return out; +} + +ASYNC_WORK_DEF(p2r_c13_stream_parse_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_C13StreamParseIn *in = (P2R_C13StreamParseIn *)input; + void *out = 0; + ProfScope("parse c13 stream") out = cv_c13_parsed_from_data(arena, in->data, in->strtbl, in->coff_sections); + ProfEnd(); + return out; +} + +ASYNC_WORK_DEF(p2r_comp_unit_parse_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_CompUnitParseIn *in = (P2R_CompUnitParseIn *)input; + void *out = 0; + ProfScope("parse comp units") out = pdb_comp_unit_array_from_data(arena, in->data); + ProfEnd(); + return out; +} + +ASYNC_WORK_DEF(p2r_comp_unit_contributions_parse_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_CompUnitContributionsParseIn *in = (P2R_CompUnitContributionsParseIn *)input; + void *out = 0; + ProfScope("parse comp unit contributions") out = pdb_comp_unit_contribution_array_from_data(arena, in->data, in->coff_sections); + ProfEnd(); + return out; +} + +//////////////////////////////// +//~ rjf: Unit Conversion Tasks + +ASYNC_WORK_DEF(p2r_units_convert_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + Temp scratch = scratch_begin(&arena, 1); + P2R_UnitConvertIn *in = (P2R_UnitConvertIn *)input; + P2R_UnitConvertOut *out = push_array(arena, P2R_UnitConvertOut, 1); + ProfScope("build units, initial src file map, & collect unit source files") + if(in->comp_units != 0) + { + U64 units_chunk_cap = in->comp_units->count; + P2R_SrcFileMap src_file_map = {0}; + src_file_map.slots_count = 65536; + src_file_map.slots = push_array(scratch.arena, P2R_SrcFileNode *, src_file_map.slots_count); + + //////////////////////////// + //- rjf: pass 1: build per-unit info & per-unit line tables + // + ProfScope("pass 1: build per-unit info & per-unit line tables") + for(U64 comp_unit_idx = 0; comp_unit_idx < in->comp_units->count; comp_unit_idx += 1) + { + PDB_CompUnit *pdb_unit = in->comp_units->units[comp_unit_idx]; + CV_SymParsed *pdb_unit_sym = in->comp_unit_syms[comp_unit_idx]; + CV_C13Parsed *pdb_unit_c13 = in->comp_unit_c13s[comp_unit_idx]; + + //- rjf: produce unit name + String8 unit_name = pdb_unit->obj_name; + if(unit_name.size != 0) + { + String8 unit_name_past_last_slash = str8_skip_last_slash(unit_name); + if(unit_name_past_last_slash.size != 0) + { + unit_name = unit_name_past_last_slash; + } + } + + //- rjf: produce obj name + String8 obj_name = pdb_unit->obj_name; + if(str8_match(obj_name, str8_lit("* Linker *"), 0) || + str8_match(obj_name, str8_lit("Import:"), StringMatchFlag_RightSideSloppy)) + { + MemoryZeroStruct(&obj_name); + } + + //- rjf: build this unit's line table, fill out primary line info (inline info added after) + RDIM_LineTable *line_table = 0; + for(CV_C13SubSectionNode *node = pdb_unit_c13->first_sub_section; + node != 0; + node = node->next) + { + if(node->kind == CV_C13SubSectionKind_Lines) + { + for(CV_C13LinesParsedNode *lines_n = node->lines_first; + lines_n != 0; + lines_n = lines_n->next) + { + CV_C13LinesParsed *lines = &lines_n->v; + + // rjf: file name -> normalized file path + String8 file_path = lines->file_name; + String8 file_path_normalized = lower_from_str8(scratch.arena, str8_skip_chop_whitespace(file_path)); + for(U64 idx = 0; idx < file_path_normalized.size; idx += 1) + { + if(file_path_normalized.str[idx] == '\\') + { + file_path_normalized.str[idx] = '/'; + } + } + + // rjf: normalized file path -> source file node + U64 file_path_normalized_hash = rdi_hash(file_path_normalized.str, file_path_normalized.size); + U64 src_file_slot = file_path_normalized_hash%src_file_map.slots_count; + P2R_SrcFileNode *src_file_node = 0; + for(P2R_SrcFileNode *n = src_file_map.slots[src_file_slot]; n != 0; n = n->next) + { + if(str8_match(n->src_file->normal_full_path, file_path_normalized, 0)) + { + src_file_node = n; + break; + } + } + if(src_file_node == 0) + { + src_file_node = push_array(scratch.arena, P2R_SrcFileNode, 1); + SLLStackPush(src_file_map.slots[src_file_slot], src_file_node); + src_file_node->src_file = rdim_src_file_chunk_list_push(arena, &out->src_files, 4096); + src_file_node->src_file->normal_full_path = push_str8_copy(arena, file_path_normalized); + } + + // rjf: push sequence into both line table & source file's line map + if(lines->line_count != 0) + { + if(line_table == 0) + { + line_table = rdim_line_table_chunk_list_push(arena, &out->line_tables, 256); + } + RDIM_LineSequence *seq = rdim_line_table_push_sequence(arena, &out->line_tables, line_table, src_file_node->src_file, lines->voffs, lines->line_nums, lines->col_nums, lines->line_count); + rdim_src_file_push_line_sequence(arena, &out->src_files, src_file_node->src_file, seq); + } + } + } + } + + //- rjf: build unit + RDIM_Unit *dst_unit = rdim_unit_chunk_list_push(arena, &out->units, units_chunk_cap); + dst_unit->unit_name = unit_name; + dst_unit->compiler_name = pdb_unit_sym->info.compiler_name; + dst_unit->object_file = obj_name; + dst_unit->archive_file = pdb_unit->group_name; + dst_unit->language = cv2r_rdi_language_from_cv_language(pdb_unit_sym->info.language); + dst_unit->line_table = line_table; + } + + //////////////////////////// + //- rjf: pass 2: build per-unit voff ranges from comp unit contributions table + // + PDB_CompUnitContribution *contrib_ptr = in->comp_unit_contributions->contributions; + PDB_CompUnitContribution *contrib_opl = contrib_ptr + in->comp_unit_contributions->count; + ProfScope("pass 2: build per-unit voff ranges from comp unit contributions table") + for(;contrib_ptr < contrib_opl; contrib_ptr += 1) + { + if(contrib_ptr->mod < in->comp_units->count) + { + RDIM_Unit *unit = &out->units.first->v[contrib_ptr->mod]; + RDIM_Rng1U64 range = {contrib_ptr->voff_first, contrib_ptr->voff_opl}; + rdim_rng1u64_list_push(arena, &unit->voff_ranges, range); + } + } + + //////////////////////////// + //- rjf: pass 3: parse all inlinee line tables + // + out->units_first_inline_site_line_tables = push_array(arena, RDIM_LineTable *, in->comp_units->count); + ProfScope("pass 3: parse all inlinee line tables") + for(U64 comp_unit_idx = 0; comp_unit_idx < in->comp_units->count; comp_unit_idx += 1) + { + CV_SymParsed *unit_sym = in->comp_unit_syms[comp_unit_idx]; + CV_C13Parsed *unit_c13 = in->comp_unit_c13s[comp_unit_idx]; + CV_RecRange *rec_ranges_first = unit_sym->sym_ranges.ranges; + CV_RecRange *rec_ranges_opl = rec_ranges_first+unit_sym->sym_ranges.count; + U64 base_voff = 0; + for(CV_RecRange *rec_range = rec_ranges_first; + rec_range < rec_ranges_opl; + rec_range += 1) + { + //- rjf: rec range -> symbol info range + U64 sym_off_first = rec_range->off + 2; + U64 sym_off_opl = rec_range->off + rec_range->hdr.size; + + //- rjf: skip invalid ranges + if(sym_off_opl > unit_sym->data.size || sym_off_first > unit_sym->data.size || sym_off_first > sym_off_opl) + { + continue; + } + + //- rjf: unpack symbol info + CV_SymKind kind = rec_range->hdr.kind; + U64 sym_header_struct_size = cv_header_struct_size_from_sym_kind(kind); + void *sym_header_struct_base = unit_sym->data.str + sym_off_first; + void *sym_data_opl = unit_sym->data.str + sym_off_opl; + + //- rjf: skip bad sizes + if(sym_off_first + sym_header_struct_size > sym_off_opl) + { + continue; + } + + //- rjf: process symbol + switch(kind) + { + default:{}break; + + //- rjf: LPROC32/GPROC32 (gather base address) + case CV_SymKind_LPROC32: + case CV_SymKind_GPROC32: + { + CV_SymProc32 *proc32 = (CV_SymProc32 *)sym_header_struct_base; + COFF_SectionHeader *section = (0 < proc32->sec && proc32->sec <= in->coff_sections.count) ? &in->coff_sections.v[proc32->sec-1] : 0; + if(section != 0) + { + base_voff = section->voff + proc32->off; + } + }break; + + //- rjf: INLINESITE + case CV_SymKind_INLINESITE: + { + // rjf: unpack sym + CV_SymInlineSite *sym = (CV_SymInlineSite *)sym_header_struct_base; + String8 binary_annots = str8((U8 *)(sym+1), rec_range->hdr.size - sizeof(rec_range->hdr.kind) - sizeof(*sym)); + + // rjf: map inlinee -> parsed cv c13 inlinee line info + CV_C13InlineeLinesParsed *inlinee_lines_parsed = 0; + { + U64 hash = cv_hash_from_item_id(sym->inlinee); + U64 slot_idx = hash%unit_c13->inlinee_lines_parsed_slots_count; + for(CV_C13InlineeLinesParsedNode *n = unit_c13->inlinee_lines_parsed_slots[slot_idx]; n != 0; n = n->hash_next) + { + if(n->v.inlinee == sym->inlinee) + { + inlinee_lines_parsed = &n->v; + break; + } + } + } + + // rjf: build line table, fill with parsed binary annotations + + if(inlinee_lines_parsed != 0) + { + // rjf: grab checksums sub-section + CV_C13SubSectionNode *file_chksms = unit_c13->file_chksms_sub_section; + + // rjf: gathered lines + typedef struct LineChunk LineChunk; + struct LineChunk + { + LineChunk *next; + U64 cap; + U64 count; + U64 *voffs; // [line_count + 1] (sorted) + U32 *line_nums; // [line_count] + U16 *col_nums; // [2*line_count] + }; + LineChunk *first_line_chunk = 0; + LineChunk *last_line_chunk = 0; + U64 total_line_chunk_line_count = 0; + U32 last_file_off = max_U32; + U32 curr_file_off = max_U32; + RDIM_LineTable* line_table = 0; + + CV_C13InlineSiteDecoder decoder = cv_c13_inline_site_decoder_init(inlinee_lines_parsed->file_off, inlinee_lines_parsed->first_source_ln, base_voff); + for(;;) + { + CV_C13InlineSiteDecoderStep step = cv_c13_inline_site_decoder_step(&decoder, binary_annots); + + if(step.flags & CV_C13InlineSiteDecoderStepFlag_EmitFile) + { + last_file_off = curr_file_off; + curr_file_off = step.file_off; + } + if(step.flags == 0 && total_line_chunk_line_count > 0) + { + last_file_off = curr_file_off; + curr_file_off = max_U32; + } + if((last_file_off != max_U32 && last_file_off != curr_file_off)) + { + String8 seq_file_name = {0}; + + if(last_file_off + sizeof(CV_C13Checksum) <= file_chksms->size) + { + CV_C13Checksum *checksum = (CV_C13Checksum*)(unit_c13->data.str + file_chksms->off + last_file_off); + U32 name_off = checksum->name_off; + seq_file_name = pdb_strtbl_string_from_off(in->pdb_strtbl, name_off); + } + + // rjf: file name -> normalized file path + String8 file_path = seq_file_name; + String8 file_path_normalized = lower_from_str8(scratch.arena, str8_skip_chop_whitespace(file_path)); + for(U64 idx = 0; idx < file_path_normalized.size; idx += 1) + { + if(file_path_normalized.str[idx] == '\\') + { + file_path_normalized.str[idx] = '/'; + } + } + + // rjf: normalized file path -> source file node + U64 file_path_normalized_hash = rdi_hash(file_path_normalized.str, file_path_normalized.size); + U64 src_file_slot = file_path_normalized_hash%src_file_map.slots_count; + P2R_SrcFileNode *src_file_node = 0; + for(P2R_SrcFileNode *n = src_file_map.slots[src_file_slot]; n != 0; n = n->next) + { + if(str8_match(n->src_file->normal_full_path, file_path_normalized, 0)) + { + src_file_node = n; + break; + } + } + if(src_file_node == 0) + { + src_file_node = push_array(scratch.arena, P2R_SrcFileNode, 1); + SLLStackPush(src_file_map.slots[src_file_slot], src_file_node); + src_file_node->src_file = rdim_src_file_chunk_list_push(arena, &out->src_files, 4096); + src_file_node->src_file->normal_full_path = push_str8_copy(arena, file_path_normalized); + } + + // rjf: gather all lines + RDI_U64 *voffs = push_array_no_zero(arena, RDI_U64, total_line_chunk_line_count+1); + RDI_U32 *line_nums = push_array_no_zero(arena, RDI_U32, total_line_chunk_line_count); + RDI_U64 line_count = total_line_chunk_line_count; + { + U64 dst_idx = 0; + for(LineChunk *chunk = first_line_chunk; chunk != 0; chunk = chunk->next) + { + MemoryCopy(voffs+dst_idx, chunk->voffs, sizeof(U64)*(chunk->count+1)); + MemoryCopy(line_nums+dst_idx, chunk->line_nums, sizeof(U32)*chunk->count); + dst_idx += chunk->count; + } + } + + // rjf: push + if(line_count != 0) + { + if(line_table == 0) + { + line_table = rdim_line_table_chunk_list_push(arena, &out->line_tables, 256); + if(out->units_first_inline_site_line_tables[comp_unit_idx] == 0) + { + out->units_first_inline_site_line_tables[comp_unit_idx] = line_table; + } + } + RDIM_LineSequence *seq = rdim_line_table_push_sequence(arena, &out->line_tables, line_table, src_file_node->src_file, voffs, line_nums, 0, line_count); + rdim_src_file_push_line_sequence(arena, &out->src_files, src_file_node->src_file, seq); + } + + // rjf: clear line chunks for subsequent sequences + first_line_chunk = last_line_chunk = 0; + total_line_chunk_line_count = 0; + } + + if(step.flags & CV_C13InlineSiteDecoderStepFlag_EmitLine) + { + LineChunk *chunk = last_line_chunk; + if(chunk == 0 || chunk->count+1 >= chunk->cap) + { + chunk = push_array(scratch.arena, LineChunk, 1); + SLLQueuePush(first_line_chunk, last_line_chunk, chunk); + chunk->cap = 256; + chunk->voffs = push_array_no_zero(scratch.arena, U64, chunk->cap); + chunk->line_nums = push_array_no_zero(scratch.arena, U32, chunk->cap); + } + chunk->voffs[chunk->count] = step.line_voff; + chunk->voffs[chunk->count+1] = step.line_voff_end; + chunk->line_nums[chunk->count] = step.ln; + chunk->count += 1; + total_line_chunk_line_count += 1; + } + + if(step.flags == 0) + { + break; + } + } + } + }break; + } + } + } + } + scratch_end(scratch); + ProfEnd(); + return out; +} + +//////////////////////////////// +//~ rjf: Link Name Map Building Tasks + +ASYNC_WORK_DEF(p2r_link_name_map_build_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_LinkNameMapBuildIn *in = (P2R_LinkNameMapBuildIn *)input; + CV_RecRange *rec_ranges_first = in->sym->sym_ranges.ranges; + CV_RecRange *rec_ranges_opl = rec_ranges_first + in->sym->sym_ranges.count; + for(CV_RecRange *rec_range = rec_ranges_first; + rec_range < rec_ranges_opl; + rec_range += 1) + { + //- rjf: unpack symbol range info + CV_SymKind kind = rec_range->hdr.kind; + U64 header_struct_size = cv_header_struct_size_from_sym_kind(kind); + U8 *sym_first = in->sym->data.str + rec_range->off + 2; + U8 *sym_opl = sym_first + rec_range->hdr.size; + + //- rjf: skip bad ranges + if(sym_opl > in->sym->data.str + in->sym->data.size || sym_first + header_struct_size > in->sym->data.str + in->sym->data.size) + { + continue; + } + + //- rjf: consume symbol + switch(kind) + { + default:{}break; + case CV_SymKind_PUB32: + { + // rjf: unpack sym + CV_SymPub32 *pub32 = (CV_SymPub32 *)sym_first; + String8 name = str8_cstring_capped(pub32+1, sym_opl); + COFF_SectionHeader *section = (0 < pub32->sec && pub32->sec <= in->coff_sections.count) ? &in->coff_sections.v[pub32->sec-1] : 0; + U64 voff = 0; + if(section != 0) + { + voff = section->voff + pub32->off; + } + + // rjf: commit to link name map + U64 hash = p2r_hash_from_voff(voff); + U64 bucket_idx = hash%in->link_name_map->buckets_count; + P2R_LinkNameNode *node = push_array(arena, P2R_LinkNameNode, 1); + SLLStackPush(in->link_name_map->buckets[bucket_idx], node); + node->voff = voff; + node->name = name; + in->link_name_map->link_name_count += 1; + in->link_name_map->bucket_collision_count += (node->next != 0); + }break; + } + } + ProfEnd(); + return 0; +} + +//////////////////////////////// +//~ rjf: UDT Conversion Tasks + +ASYNC_WORK_DEF(p2r_udt_convert_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + P2R_UDTConvertIn *in = (P2R_UDTConvertIn *)input; +#define p2r_type_ptr_from_itype(itype) ((in->itype_type_ptrs && (itype) < in->tpi_leaf->itype_opl) ? (in->itype_type_ptrs[itype]) : 0) + RDIM_UDTChunkList *udts = push_array(arena, RDIM_UDTChunkList, 1); + RDI_U64 udts_chunk_cap = 1024; + ProfScope("convert UDT info") + { + for(CV_TypeId itype = in->itype_first; itype < in->itype_opl; itype += 1) + { + //- rjf: skip basics + if(itype < in->tpi_leaf->itype_first) { continue; } + + //- rjf: grab type for this itype - skip if empty + RDIM_Type *dst_type = in->itype_type_ptrs[itype]; + if(dst_type == 0) { continue; } + + //- rjf: unpack itype leaf range - skip if out-of-range + CV_RecRange *range = &in->tpi_leaf->leaf_ranges.ranges[itype-in->tpi_leaf->itype_first]; + CV_LeafKind kind = range->hdr.kind; + U64 header_struct_size = cv_header_struct_size_from_leaf_kind(kind); + U8 *itype_leaf_first = in->tpi_leaf->data.str + range->off+2; + U8 *itype_leaf_opl = itype_leaf_first + range->hdr.size-2; + if(range->off+range->hdr.size > in->tpi_leaf->data.size || + range->off+2+header_struct_size > in->tpi_leaf->data.size || + range->hdr.size < 2) + { + continue; + } + + //- rjf: build UDT + CV_TypeId field_itype = 0; + switch(kind) + { + default:{}break; + + //////////////////////// + //- rjf: structs/unions/classes -> equip members + // + case CV_LeafKind_CLASS: + case CV_LeafKind_STRUCTURE: + { + CV_LeafStruct *lf = (CV_LeafStruct *)itype_leaf_first; + if(lf->props & CV_TypeProp_FwdRef) + { + break; + } + field_itype = lf->field_itype; + }goto equip_members; + case CV_LeafKind_UNION: + { + CV_LeafUnion *lf = (CV_LeafUnion *)itype_leaf_first; + if(lf->props & CV_TypeProp_FwdRef) + { + break; + } + field_itype = lf->field_itype; + }goto equip_members; + case CV_LeafKind_CLASS2: + case CV_LeafKind_STRUCT2: + { + CV_LeafStruct2 *lf = (CV_LeafStruct2 *)itype_leaf_first; + if(lf->props & CV_TypeProp_FwdRef) + { + break; + } + field_itype = lf->field_itype; + }goto equip_members; + equip_members: + { + Temp scratch = scratch_begin(&arena, 1); + + //- rjf: grab UDT info + RDIM_UDT *dst_udt = dst_type->udt; + if(dst_udt == 0) + { + dst_udt = dst_type->udt = rdim_udt_chunk_list_push(arena, udts, udts_chunk_cap); + dst_udt->self_type = dst_type; + } + + //- rjf: gather all fields + typedef struct FieldListTask FieldListTask; + struct FieldListTask + { + FieldListTask *next; + CV_TypeId itype; + }; + FieldListTask start_fl_task = {0, field_itype}; + FieldListTask *fl_todo_stack = &start_fl_task; + FieldListTask *fl_done_stack = 0; + for(;fl_todo_stack != 0;) + { + //- rjf: take & unpack task + FieldListTask *fl_task = fl_todo_stack; + SLLStackPop(fl_todo_stack); + SLLStackPush(fl_done_stack, fl_task); + CV_TypeId field_list_itype = fl_task->itype; + + //- rjf: skip bad itypes + if(field_list_itype < in->tpi_leaf->itype_first || in->tpi_leaf->itype_opl <= field_list_itype) + { + continue; + } + + //- rjf: field list itype -> range + CV_RecRange *range = &in->tpi_leaf->leaf_ranges.ranges[field_list_itype-in->tpi_leaf->itype_first]; + + //- rjf: skip bad headers + if(range->off+range->hdr.size > in->tpi_leaf->data.size || + range->hdr.size < 2 || + range->hdr.kind != CV_LeafKind_FIELDLIST) + { + continue; + } + + //- rjf: loop over all fields + { + U8 *field_list_first = in->tpi_leaf->data.str+range->off+2; + U8 *field_list_opl = field_list_first+range->hdr.size-2; + for(U8 *read_ptr = field_list_first, *next_read_ptr = field_list_opl; + read_ptr < field_list_opl; + read_ptr = next_read_ptr) + { + // rjf: unpack field + CV_LeafKind field_kind = *(CV_LeafKind *)read_ptr; + U64 field_leaf_header_size = cv_header_struct_size_from_leaf_kind(field_kind); + U8 *field_leaf_first = read_ptr+2; + U8 *field_leaf_opl = field_list_opl; + next_read_ptr = field_leaf_opl; + + // rjf: skip out-of-bounds fields + if(field_leaf_first+field_leaf_header_size > field_list_opl) + { + continue; + } + + // rjf: process field + switch(field_kind) + { + //- rjf: unhandled/invalid cases + default: + { + // TODO(rjf): log + }break; + + //- rjf: INDEX + case CV_LeafKind_INDEX: + { + // rjf: unpack leaf + CV_LeafIndex *lf = (CV_LeafIndex *)field_leaf_first; + CV_TypeId new_itype = lf->itype; + + // rjf: bump next read pointer past header + next_read_ptr = (U8 *)(lf+1); + + // rjf: determine if index itype is new + B32 is_new = 1; + for(FieldListTask *t = fl_done_stack; t != 0; t = t->next) + { + if(t->itype == new_itype) + { + is_new = 0; + break; + } + } + + // rjf: if new -> push task to follow new itype + if(is_new) + { + FieldListTask *new_task = push_array(scratch.arena, FieldListTask, 1); + SLLStackPush(fl_todo_stack, new_task); + new_task->itype = new_itype; + } + }break; + + //- rjf: MEMBER + case CV_LeafKind_MEMBER: + { + // TODO(rjf): log on bad offset + + // rjf: unpack leaf + CV_LeafMember *lf = (CV_LeafMember *)field_leaf_first; + U8 *offset_ptr = (U8 *)(lf+1); + CV_NumericParsed offset = cv_numeric_from_data_range(offset_ptr, field_leaf_opl); + U64 offset64 = cv_u64_from_numeric(&offset); + U8 *name_ptr = offset_ptr + offset.encoded_size; + String8 name = str8_cstring_capped(name_ptr, field_leaf_opl); + + // rjf: bump next read pointer past variable length parts + next_read_ptr = name.str+name.size+1; + + // rjf: emit member + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_DataField; + mem->name = name; + mem->type = p2r_type_ptr_from_itype(lf->itype); + mem->off = (U32)offset64; + }break; + + //- rjf: STMEMBER + case CV_LeafKind_STMEMBER: + { + // TODO(rjf): handle attribs + + // rjf: unpack leaf + CV_LeafStMember *lf = (CV_LeafStMember *)field_leaf_first; + U8 *name_ptr = (U8 *)(lf+1); + String8 name = str8_cstring_capped(name_ptr, field_leaf_opl); + + // rjf: bump next read pointer past variable length parts + next_read_ptr = name.str+name.size+1; + + // rjf: emit member + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_StaticData; + mem->name = name; + mem->type = p2r_type_ptr_from_itype(lf->itype); + }break; + + //- rjf: METHOD + case CV_LeafKind_METHOD: + { + // rjf: unpack leaf + CV_LeafMethod *lf = (CV_LeafMethod *)field_leaf_first; + U8 *name_ptr = (U8 *)(lf+1); + String8 name = str8_cstring_capped(name_ptr, field_leaf_opl); + + // rjf: bump next read pointer past variable length parts + next_read_ptr = name.str+name.size+1; + + //- rjf: method list itype -> range + CV_RecRange *method_list_range = &in->tpi_leaf->leaf_ranges.ranges[lf->list_itype-in->tpi_leaf->itype_first]; + + //- rjf: skip bad method lists + if(method_list_range->off+method_list_range->hdr.size > in->tpi_leaf->data.size || + method_list_range->hdr.size < 2 || + method_list_range->hdr.kind != CV_LeafKind_METHODLIST) + { + break; + } + + //- rjf: loop through all methods & emit members + U8 *method_list_first = in->tpi_leaf->data.str + method_list_range->off + 2; + U8 *method_list_opl = method_list_first + method_list_range->hdr.size-2; + for(U8 *method_read_ptr = method_list_first, *next_method_read_ptr = method_list_opl; + method_read_ptr < method_list_opl; + method_read_ptr = next_method_read_ptr) + { + CV_LeafMethodListMember *method = (CV_LeafMethodListMember*)method_read_ptr; + CV_MethodProp prop = CV_FieldAttribs_Extract_MethodProp(method->attribs); + RDIM_Type *method_type = p2r_type_ptr_from_itype(method->itype); + next_method_read_ptr = (U8 *)(method+1); + + // TODO(allen): PROBLEM + // We only get offsets for virtual functions (the "vbaseoff") from + // "Intro" and "PureIntro". In C++ inheritance, when we have a chain + // of inheritance (let's just talk single inheritance for now) the + // first class in the chain that introduces a new virtual function + // has this "Intro" method. If a later class in the chain redefines + // the virtual function it only has a "Virtual" method which does + // not update the offset. There is a "Virtual" and "PureVirtual" + // variant of "Virtual". The "Pure" in either case means there + // is no concrete procedure. When there is no "Pure" the method + // should have a corresponding procedure symbol id. + // + // The issue is we will want to mark all of our virtual methods as + // virtual and give them an offset, but that means we have to do + // some extra figuring to propogate offsets from "Intro" methods + // to "Virtual" methods in inheritance trees. That is - IF we want + // to start preserving the offsets of virtuals. There is room in + // the method struct to make this work, but for now I've just + // decided to drop this information. It is not urgently useful to + // us and greatly complicates matters. + + // rjf: read vbaseoff + U32 vbaseoff = 0; + if(prop == CV_MethodProp_Intro || prop == CV_MethodProp_PureIntro) + { + if(next_method_read_ptr+4 <= method_list_opl) + { + vbaseoff = *(U32 *)next_method_read_ptr; + } + next_method_read_ptr += 4; + } + + // rjf: emit method + switch(prop) + { + default: + { + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_Method; + mem->name = name; + mem->type = method_type; + }break; + case CV_MethodProp_Static: + { + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_StaticMethod; + mem->name = name; + mem->type = method_type; + }break; + case CV_MethodProp_Virtual: + case CV_MethodProp_PureVirtual: + case CV_MethodProp_Intro: + case CV_MethodProp_PureIntro: + { + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_VirtualMethod; + mem->name = name; + mem->type = method_type; + }break; + } + } + + }break; + + //- rjf: ONEMETHOD + case CV_LeafKind_ONEMETHOD: + { + // TODO(rjf): handle attribs + + // rjf: unpack leaf + CV_LeafOneMethod *lf = (CV_LeafOneMethod *)field_leaf_first; + CV_MethodProp prop = CV_FieldAttribs_Extract_MethodProp(lf->attribs); + U8 *vbaseoff_ptr = (U8 *)(lf+1); + U8 *vbaseoff_opl_ptr = vbaseoff_ptr; + U32 vbaseoff = 0; + if(prop == CV_MethodProp_Intro || prop == CV_MethodProp_PureIntro) + { + vbaseoff = *(U32 *)(vbaseoff_ptr); + vbaseoff_opl_ptr += sizeof(U32); + } + U8 *name_ptr = vbaseoff_opl_ptr; + String8 name = str8_cstring_capped(name_ptr, field_leaf_opl); + RDIM_Type *method_type = p2r_type_ptr_from_itype(lf->itype); + + // rjf: bump next read pointer past variable length parts + next_read_ptr = name.str+name.size+1; + + // rjf: emit method + switch(prop) + { + default: + { + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_Method; + mem->name = name; + mem->type = method_type; + }break; + + case CV_MethodProp_Static: + { + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_StaticMethod; + mem->name = name; + mem->type = method_type; + }break; + + case CV_MethodProp_Virtual: + case CV_MethodProp_PureVirtual: + case CV_MethodProp_Intro: + case CV_MethodProp_PureIntro: + { + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_VirtualMethod; + mem->name = name; + mem->type = method_type; + }break; + } + }break; + + //- rjf: NESTTYPE + case CV_LeafKind_NESTTYPE: + { + // rjf: unpack leaf + CV_LeafNestType *lf = (CV_LeafNestType *)field_leaf_first; + U8 *name_ptr = (U8 *)(lf+1); + String8 name = str8_cstring_capped(name_ptr, field_leaf_opl); + + // rjf: bump next read pointer past variable length parts + next_read_ptr = name.str+name.size+1; + + // rjf: emit member + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_NestedType; + mem->name = name; + mem->type = p2r_type_ptr_from_itype(lf->itype); + }break; + + //- rjf: NESTTYPEEX + case CV_LeafKind_NESTTYPEEX: + { + // TODO(rjf): handle attribs + + // rjf: unpack leaf + CV_LeafNestTypeEx *lf = (CV_LeafNestTypeEx *)field_leaf_first; + U8 *name_ptr = (U8 *)(lf+1); + String8 name = str8_cstring_capped(name_ptr, field_leaf_opl); + + // rjf: bump next read pointer past variable length parts + next_read_ptr = name.str+name.size+1; + + // rjf: emit member + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_NestedType; + mem->name = name; + mem->type = p2r_type_ptr_from_itype(lf->itype); + }break; + + //- rjf: BCLASS + case CV_LeafKind_BCLASS: + { + // TODO(rjf): log on bad offset + + // rjf: unpack leaf + CV_LeafBClass *lf = (CV_LeafBClass *)field_leaf_first; + U8 *offset_ptr = (U8 *)(lf+1); + CV_NumericParsed offset = cv_numeric_from_data_range(offset_ptr, field_leaf_opl); + U64 offset64 = cv_u64_from_numeric(&offset); + + // rjf: bump next read pointer past variable length parts + next_read_ptr = offset_ptr+offset.encoded_size; + + // rjf: emit member + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_Base; + mem->type = p2r_type_ptr_from_itype(lf->itype); + mem->off = (U32)offset64; + }break; + + //- rjf: VBCLASS/IVBCLASS + case CV_LeafKind_VBCLASS: + case CV_LeafKind_IVBCLASS: + { + // TODO(rjf): log on bad offsets + // TODO(rjf): handle attribs + // TODO(rjf): offsets? + + // rjf: unpack leaf + CV_LeafVBClass *lf = (CV_LeafVBClass *)field_leaf_first; + U8 *num1_ptr = (U8 *)(lf+1); + CV_NumericParsed num1 = cv_numeric_from_data_range(num1_ptr, field_leaf_opl); + U8 *num2_ptr = num1_ptr + num1.encoded_size; + CV_NumericParsed num2 = cv_numeric_from_data_range(num2_ptr, field_leaf_opl); + + // rjf: bump next read pointer past header + next_read_ptr = (U8 *)(lf+1); + + // rjf: emit member + RDIM_UDTMember *mem = rdim_udt_push_member(arena, udts, dst_udt); + mem->kind = RDI_MemberKind_VirtualBase; + mem->type = p2r_type_ptr_from_itype(lf->itype); + }break; + + //- rjf: VFUNCTAB + case CV_LeafKind_VFUNCTAB: + { + CV_LeafVFuncTab *lf = (CV_LeafVFuncTab *)field_leaf_first; + + // rjf: bump next read pointer past header + next_read_ptr = (U8 *)(lf+1); + + // NOTE(rjf): currently no-op this case + (void)lf; + }break; + } + + // rjf: align-up next field + next_read_ptr = (U8 *)AlignPow2((U64)next_read_ptr, 4); + } + } + } + + scratch_end(scratch); + }break; + + //////////////////////// + //- rjf: enums -> equip enumerates + // + case CV_LeafKind_ENUM: + { + CV_LeafEnum *lf = (CV_LeafEnum *)itype_leaf_first; + if(lf->props & CV_TypeProp_FwdRef) + { + break; + } + field_itype = lf->field_itype; + }goto equip_enum_vals; + equip_enum_vals:; + { + Temp scratch = scratch_begin(&arena, 1); + + //- rjf: grab UDT info + RDIM_UDT *dst_udt = dst_type->udt; + if(dst_udt == 0) + { + dst_udt = dst_type->udt = rdim_udt_chunk_list_push(arena, udts, udts_chunk_cap); + dst_udt->self_type = dst_type; + } + + //- rjf: gather all fields + typedef struct FieldListTask FieldListTask; + struct FieldListTask + { + FieldListTask *next; + CV_TypeId itype; + }; + FieldListTask start_fl_task = {0, field_itype}; + FieldListTask *fl_todo_stack = &start_fl_task; + FieldListTask *fl_done_stack = 0; + for(;fl_todo_stack != 0;) + { + //- rjf: take & unpack task + FieldListTask *fl_task = fl_todo_stack; + SLLStackPop(fl_todo_stack); + SLLStackPush(fl_done_stack, fl_task); + CV_TypeId field_list_itype = fl_task->itype; + + //- rjf: skip bad itypes + if(field_list_itype < in->tpi_leaf->itype_first || in->tpi_leaf->itype_opl <= field_list_itype) + { + continue; + } + + //- rjf: field list itype -> range + CV_RecRange *range = &in->tpi_leaf->leaf_ranges.ranges[field_list_itype-in->tpi_leaf->itype_first]; + + //- rjf: skip bad headers + if(range->off+range->hdr.size > in->tpi_leaf->data.size || + range->hdr.size < 2 || + range->hdr.kind != CV_LeafKind_FIELDLIST) + { + continue; + } + + //- rjf: loop over all fields + { + U8 *field_list_first = in->tpi_leaf->data.str+range->off+2; + U8 *field_list_opl = field_list_first+range->hdr.size-2; + for(U8 *read_ptr = field_list_first, *next_read_ptr = field_list_opl; + read_ptr < field_list_opl; + read_ptr = next_read_ptr) + { + // rjf: unpack field + CV_LeafKind field_kind = *(CV_LeafKind *)read_ptr; + U64 field_leaf_header_size = cv_header_struct_size_from_leaf_kind(field_kind); + U8 *field_leaf_first = read_ptr+2; + U8 *field_leaf_opl = field_leaf_first+range->hdr.size-2; + next_read_ptr = field_leaf_opl; + + // rjf: skip out-of-bounds fields + if(field_leaf_first+field_leaf_header_size > field_list_opl) + { + continue; + } + + // rjf: process field + switch(field_kind) + { + //- rjf: unhandled/invalid cases + default: + { + // TODO(rjf): log + }break; + + //- rjf: INDEX + case CV_LeafKind_INDEX: + { + // rjf: unpack leaf + CV_LeafIndex *lf = (CV_LeafIndex *)field_leaf_first; + CV_TypeId new_itype = lf->itype; + + // rjf: determine if index itype is new + B32 is_new = 1; + for(FieldListTask *t = fl_done_stack; t != 0; t = t->next) + { + if(t->itype == new_itype) + { + is_new = 0; + break; + } + } + + // rjf: if new -> push task to follow new itype + if(is_new) + { + FieldListTask *new_task = push_array(scratch.arena, FieldListTask, 1); + SLLStackPush(fl_todo_stack, new_task); + new_task->itype = new_itype; + } + }break; + + //- rjf: ENUMERATE + case CV_LeafKind_ENUMERATE: + { + // TODO(rjf): attribs + + // rjf: unpack leaf + CV_LeafEnumerate *lf = (CV_LeafEnumerate *)field_leaf_first; + U8 *val_ptr = (U8 *)(lf+1); + CV_NumericParsed val = cv_numeric_from_data_range(val_ptr, field_leaf_opl); + U64 val64 = cv_u64_from_numeric(&val); + U8 *name_ptr = val_ptr + val.encoded_size; + String8 name = str8_cstring_capped(name_ptr, field_leaf_opl); + + // rjf: bump next read pointer past variable length parts + next_read_ptr = name.str+name.size+1; + + // rjf: emit member + RDIM_UDTEnumVal *enum_val = rdim_udt_push_enum_val(arena, udts, dst_udt); + enum_val->name = name; + enum_val->val = val64; + }break; + } + + // rjf: align-up next field + next_read_ptr = (U8 *)AlignPow2((U64)next_read_ptr, 4); + } + } + } + + scratch_end(scratch); + }break; + } + } + } +#undef p2r_type_ptr_from_itype + ProfEnd(); + return udts; +} + +//////////////////////////////// +//~ rjf: Symbol Stream Conversion Path & Thread + +ASYNC_WORK_DEF(p2r_symbol_stream_convert_work) +{ + ProfBeginFunction(); + Arena *arena = g_p2r_help_state->work_thread_arenas[thread_idx]; + Temp scratch = scratch_begin(&arena, 1); + P2R_SymbolStreamConvertIn *in = (P2R_SymbolStreamConvertIn *)input; +#define p2r_type_ptr_from_itype(itype) ((in->itype_type_ptrs && (itype) < in->tpi_leaf->itype_opl) ? (in->itype_type_ptrs[itype]) : 0) + + ////////////////////////// + //- rjf: set up outputs for this sym stream + // + U64 sym_procedures_chunk_cap = 1024; + U64 sym_global_variables_chunk_cap = 1024; + U64 sym_thread_variables_chunk_cap = 1024; + U64 sym_scopes_chunk_cap = 1024; + U64 sym_inline_sites_chunk_cap = 1024; + RDIM_SymbolChunkList sym_procedures = {0}; + RDIM_SymbolChunkList sym_global_variables = {0}; + RDIM_SymbolChunkList sym_thread_variables = {0}; + RDIM_ScopeChunkList sym_scopes = {0}; + RDIM_InlineSiteChunkList sym_inline_sites = {0}; + + ////////////////////////// + //- rjf: symbols pass 1: produce procedure frame info map (procedure -> frame info) + // + U64 procedure_frameprocs_count = 0; + U64 procedure_frameprocs_cap = (in->sym_ranges_opl - in->sym_ranges_first); + CV_SymFrameproc **procedure_frameprocs = push_array_no_zero(scratch.arena, CV_SymFrameproc *, procedure_frameprocs_cap); + ProfScope("symbols pass 1: produce procedure frame info map (procedure -> frame info)") + { + U64 procedure_num = 0; + CV_RecRange *rec_ranges_first = in->sym->sym_ranges.ranges + in->sym_ranges_first; + CV_RecRange *rec_ranges_opl = in->sym->sym_ranges.ranges + in->sym_ranges_opl; + for(CV_RecRange *rec_range = rec_ranges_first; + rec_range < rec_ranges_opl; + rec_range += 1) + { + //- rjf: rec range -> symbol info range + U64 sym_off_first = rec_range->off + 2; + U64 sym_off_opl = rec_range->off + rec_range->hdr.size; + + //- rjf: skip invalid ranges + if(sym_off_opl > in->sym->data.size || sym_off_first > in->sym->data.size || sym_off_first > sym_off_opl) + { + continue; + } + + //- rjf: unpack symbol info + CV_SymKind kind = rec_range->hdr.kind; + U64 sym_header_struct_size = cv_header_struct_size_from_sym_kind(kind); + void *sym_header_struct_base = in->sym->data.str + sym_off_first; + + //- rjf: skip bad sizes + if(sym_off_first + sym_header_struct_size > sym_off_opl) + { + continue; + } + + //- rjf: consume symbol based on kind + switch(kind) + { + default:{}break; + + //- rjf: FRAMEPROC + case CV_SymKind_FRAMEPROC: + { + if(procedure_num == 0) { break; } + if(procedure_num > procedure_frameprocs_cap) { break; } + CV_SymFrameproc *frameproc = (CV_SymFrameproc*)sym_header_struct_base; + procedure_frameprocs[procedure_num-1] = frameproc; + procedure_frameprocs_count = Max(procedure_frameprocs_count, procedure_num); + }break; + + //- rjf: LPROC32/GPROC32 + case CV_SymKind_LPROC32: + case CV_SymKind_GPROC32: + { + procedure_num += 1; + }break; + } + } + U64 scratch_overkill = sizeof(procedure_frameprocs[0])*(procedure_frameprocs_cap-procedure_frameprocs_count); + arena_pop(scratch.arena, scratch_overkill); + } + + ////////////////////////// + //- rjf: symbols pass 2: construct all symbols, given procedure frame info map + // + ProfScope("symbols pass 2: construct all symbols, given procedure frame info map") + { + RDIM_LocationSet *defrange_target = 0; + B32 defrange_target_is_param = 0; + U64 procedure_num = 0; + U64 procedure_base_voff = 0; + CV_RecRange *rec_ranges_first = in->sym->sym_ranges.ranges + in->sym_ranges_first; + CV_RecRange *rec_ranges_opl = in->sym->sym_ranges.ranges + in->sym_ranges_opl; + typedef struct P2R_ScopeNode P2R_ScopeNode; + struct P2R_ScopeNode + { + P2R_ScopeNode *next; + RDIM_Scope *scope; + }; + P2R_ScopeNode *top_scope_node = 0; + P2R_ScopeNode *free_scope_node = 0; + RDIM_LineTable *inline_site_line_table = in->first_inline_site_line_table; + for(CV_RecRange *rec_range = rec_ranges_first; + rec_range < rec_ranges_opl; + rec_range += 1) + { + //- rjf: rec range -> symbol info range + U64 sym_off_first = rec_range->off + 2; + U64 sym_off_opl = rec_range->off + rec_range->hdr.size; + + //- rjf: skip invalid ranges + if(sym_off_opl > in->sym->data.size || sym_off_first > in->sym->data.size || sym_off_first > sym_off_opl) + { + continue; + } + + //- rjf: unpack symbol info + CV_SymKind kind = rec_range->hdr.kind; + U64 sym_header_struct_size = cv_header_struct_size_from_sym_kind(kind); + void *sym_header_struct_base = in->sym->data.str + sym_off_first; + void *sym_data_opl = in->sym->data.str + sym_off_opl; + + //- rjf: skip bad sizes + if(sym_off_first + sym_header_struct_size > sym_off_opl) + { + continue; + } + + //- rjf: consume symbol based on kind + switch(kind) + { + default:{}break; + + //- rjf: END + case CV_SymKind_END: + { + P2R_ScopeNode *n = top_scope_node; + if(n != 0) + { + SLLStackPop(top_scope_node); + SLLStackPush(free_scope_node, n); + } + defrange_target = 0; + defrange_target_is_param = 0; + }break; + + //- rjf: BLOCK32 + case CV_SymKind_BLOCK32: + { + // rjf: unpack sym + CV_SymBlock32 *block32 = (CV_SymBlock32 *)sym_header_struct_base; + + // rjf: build scope, insert into current parent scope + RDIM_Scope *scope = rdim_scope_chunk_list_push(arena, &sym_scopes, sym_scopes_chunk_cap); + { + if(top_scope_node == 0) + { + // TODO(rjf): log + } + if(top_scope_node != 0) + { + RDIM_Scope *top_scope = top_scope_node->scope; + SLLQueuePush_N(top_scope->first_child, top_scope->last_child, scope, next_sibling); + scope->parent_scope = top_scope; + scope->symbol = top_scope->symbol; + } + COFF_SectionHeader *section = (0 < block32->sec && block32->sec <= in->coff_sections.count) ? &in->coff_sections.v[block32->sec-1] : 0; + if(section != 0) + { + U64 voff_first = section->voff + block32->off; + U64 voff_last = voff_first + block32->len; + RDIM_Rng1U64 voff_range = {voff_first, voff_last}; + rdim_scope_push_voff_range(arena, &sym_scopes, scope, voff_range); + } + } + + // rjf: push this scope to scope stack + { + P2R_ScopeNode *node = free_scope_node; + if(node != 0) { SLLStackPop(free_scope_node); } + else { node = push_array_no_zero(scratch.arena, P2R_ScopeNode, 1); } + node->scope = scope; + SLLStackPush(top_scope_node, node); + } + }break; + + //- rjf: LDATA32/GDATA32 + case CV_SymKind_LDATA32: + case CV_SymKind_GDATA32: + { + // rjf: unpack sym + CV_SymData32 *data32 = (CV_SymData32 *)sym_header_struct_base; + String8 name = str8_cstring_capped(data32+1, sym_data_opl); + COFF_SectionHeader *section = (0 < data32->sec && data32->sec <= in->coff_sections.count) ? &in->coff_sections.v[data32->sec-1] : 0; + U64 voff = (section ? section->voff : 0) + data32->off; + + // rjf: determine if this is an exact duplicate global + // + // PDB likes to have duplicates of these spread across different + // symbol streams so we deduplicate across the entire translation + // context. + // + B32 is_duplicate = 0; + { + // TODO(rjf): @important global symbol dedup + } + + // rjf: is not duplicate -> push new global + if(!is_duplicate) + { + // rjf: unpack global variable's type + RDIM_Type *type = p2r_type_ptr_from_itype(data32->itype); + + // rjf: unpack global's container type + RDIM_Type *container_type = 0; + U64 container_name_opl = p2r_end_of_cplusplus_container_name(name); + if(container_name_opl > 2) + { + String8 container_name = str8(name.str, container_name_opl - 2); + CV_TypeId cv_type_id = pdb_tpi_first_itype_from_name(in->tpi_hash, in->tpi_leaf, container_name, 0); + container_type = p2r_type_ptr_from_itype(cv_type_id); + } + + // rjf: unpack global's container symbol + RDIM_Symbol *container_symbol = 0; + if(container_type == 0 && top_scope_node != 0) + { + container_symbol = top_scope_node->scope->symbol; + } + + // form a VOFF location +#if 0 + RDIM_LocationSet locset = {0}; + RDIM_Location *voff_loc = rdim_push_location_voff(arena, voff); + rdim_location_set_push_case(arena, &locset, (RDIM_Rng1U64){0,max_U64}, voff_loc); +#endif + + // rjf: build symbol + RDIM_Symbol *symbol = rdim_symbol_chunk_list_push(arena, &sym_global_variables, sym_global_variables_chunk_cap); + symbol->is_extern = (kind == CV_SymKind_GDATA32); + symbol->name = name; + symbol->type = type; + //symbol->locset = locset; + symbol->container_symbol = container_symbol; + symbol->container_type = container_type; + } + }break; + + //- rjf: LPROC32/GPROC32 + case CV_SymKind_LPROC32: + case CV_SymKind_GPROC32: + { + // rjf: unpack sym + CV_SymProc32 *proc32 = (CV_SymProc32 *)sym_header_struct_base; + String8 name = str8_cstring_capped(proc32+1, sym_data_opl); + RDIM_Type *type = p2r_type_ptr_from_itype(proc32->itype); + + // rjf: unpack proc's container type + RDIM_Type *container_type = 0; + U64 container_name_opl = p2r_end_of_cplusplus_container_name(name); + if(container_name_opl > 2 && in->tpi_hash != 0 && in->tpi_leaf != 0) + { + String8 container_name = str8(name.str, container_name_opl - 2); + CV_TypeId cv_type_id = pdb_tpi_first_itype_from_name(in->tpi_hash, in->tpi_leaf, container_name, 0); + container_type = p2r_type_ptr_from_itype(cv_type_id); + } + + // rjf: unpack proc's container symbol + RDIM_Symbol *container_symbol = 0; + if(container_type == 0 && top_scope_node != 0) + { + container_symbol = top_scope_node->scope->symbol; + } + + // rjf: build procedure's root scope + // + // NOTE: even if there could be a containing scope at this point (which should be + // illegal in C/C++ but not necessarily in another language) we would not use + // it here because these scopes refer to the ranges of code that make up a + // procedure *not* the namespaces, so a procedure's root scope always has + // no parent. + RDIM_Scope *procedure_root_scope = rdim_scope_chunk_list_push(arena, &sym_scopes, sym_scopes_chunk_cap); + { + COFF_SectionHeader *section = (0 < proc32->sec && proc32->sec <= in->coff_sections.count) ? &in->coff_sections.v[proc32->sec-1] : 0; + if(section != 0) + { + U64 voff_first = section->voff + proc32->off; + U64 voff_last = voff_first + proc32->len; + RDIM_Rng1U64 voff_range = {voff_first, voff_last}; + rdim_scope_push_voff_range(arena, &sym_scopes, procedure_root_scope, voff_range); + procedure_base_voff = voff_first; + } + } + + // rjf: root scope voff minimum range -> link name + String8 link_name = {0}; + if(procedure_root_scope->voff_ranges.min != 0) + { + U64 voff = procedure_root_scope->voff_ranges.min; + U64 hash = p2r_hash_from_voff(voff); + U64 bucket_idx = hash%in->link_name_map->buckets_count; + P2R_LinkNameNode *node = 0; + for(P2R_LinkNameNode *n = in->link_name_map->buckets[bucket_idx]; n != 0; n = n->next) + { + if(n->voff == voff) + { + link_name = n->name; + break; + } + } + } + + // rjf: build procedure symbol + RDIM_Symbol *procedure_symbol = rdim_symbol_chunk_list_push(arena, &sym_procedures, sym_procedures_chunk_cap); + procedure_symbol->is_extern = (kind == CV_SymKind_GPROC32); + procedure_symbol->name = name; + procedure_symbol->link_name = link_name; + procedure_symbol->type = type; + procedure_symbol->container_symbol = container_symbol; + procedure_symbol->container_type = container_type; + procedure_symbol->root_scope = procedure_root_scope; + + // rjf: fill root scope's symbol + procedure_root_scope->symbol = procedure_symbol; + + // rjf: push scope to scope stack + { + P2R_ScopeNode *node = free_scope_node; + if(node != 0) { SLLStackPop(free_scope_node); } + else { node = push_array_no_zero(scratch.arena, P2R_ScopeNode, 1); } + node->scope = procedure_root_scope; + SLLStackPush(top_scope_node, node); + } + + // rjf: increment procedure counter + procedure_num += 1; + }break; + + //- rjf: REGREL32 + case CV_SymKind_REGREL32: + { + // TODO(rjf): apparently some of the information here may end up being + // redundant with "better" information from CV_SymKind_LOCAL record. + // we don't currently handle this, but if those cases arise then it + // will obviously be better to prefer the better information from both + // records. + + // rjf: no containing scope? -> malformed data; locals cannot be produced + // outside of a containing scope + if(top_scope_node == 0) + { + break; + } + + // rjf: unpack sym + CV_SymRegrel32 *regrel32 = (CV_SymRegrel32 *)sym_header_struct_base; + String8 name = str8_cstring_capped(regrel32+1, sym_data_opl); + RDIM_Type *type = p2r_type_ptr_from_itype(regrel32->itype); + CV_Reg cv_reg = regrel32->reg; + U32 var_off = regrel32->reg_off; + + // rjf: determine if this is a parameter + RDI_LocalKind local_kind = RDI_LocalKind_Variable; + { + B32 is_stack_reg = 0; + switch(in->arch) + { + default:{}break; + case RDI_Arch_X86:{is_stack_reg = (cv_reg == CV_Regx86_ESP);}break; + case RDI_Arch_X64:{is_stack_reg = (cv_reg == CV_Regx64_RSP);}break; + } + if(is_stack_reg) + { + U32 frame_size = 0xFFFFFFFF; + if(procedure_num != 0 && procedure_frameprocs[procedure_num-1] != 0 && procedure_num < procedure_frameprocs_count) + { + CV_SymFrameproc *frameproc = procedure_frameprocs[procedure_num-1]; + frame_size = frameproc->frame_size; + } + if(var_off > frame_size) + { + local_kind = RDI_LocalKind_Parameter; + } + } + } + + // TODO(rjf): is this correct? + // rjf: redirect type, if 0, and if outside frame, to the return type of the + // containing procedure + if(local_kind == RDI_LocalKind_Parameter && regrel32->itype == 0 && + top_scope_node->scope->symbol != 0 && + top_scope_node->scope->symbol->type != 0) + { + type = top_scope_node->scope->symbol->type->direct_type; + } + + // rjf: build local + RDIM_Scope *scope = top_scope_node->scope; + RDIM_Local *local = rdim_scope_push_local(arena, &sym_scopes, scope); + local->kind = local_kind; + local->name = name; + local->type = type; + + // rjf: add location info to local + if(type != 0) + { + // rjf: determine if we need an extra indirection to the value + B32 extra_indirection_to_value = 0; + switch(in->arch) + { + case RDI_Arch_X86: + { + extra_indirection_to_value = (local_kind == RDI_LocalKind_Parameter && (type->byte_size > 4 || !IsPow2OrZero(type->byte_size))); + }break; + case RDI_Arch_X64: + { + extra_indirection_to_value = (local_kind == RDI_LocalKind_Parameter && (type->byte_size > 8 || !IsPow2OrZero(type->byte_size))); + }break; + } + + // rjf: get raddbg register code + RDI_RegCode reg_code = cv2r_rdi_reg_code_from_cv_reg_code(in->arch, cv_reg); + // TODO(rjf): real byte_size & byte_pos from cv_reg goes here + U32 byte_size = 8; + U32 byte_pos = 0; + + // rjf: set location case + RDIM_Location *loc = p2r_location_from_addr_reg_off(arena, in->arch, reg_code, byte_size, byte_pos, (S64)(S32)var_off, extra_indirection_to_value); + RDIM_Rng1U64 voff_range = {0, max_U64}; + rdim_location_set_push_case(arena, &sym_scopes, &local->locset, voff_range, loc); + } + }break; + + //- rjf: LTHREAD32/GTHREAD32 + case CV_SymKind_LTHREAD32: + case CV_SymKind_GTHREAD32: + { + // rjf: unpack sym + CV_SymThread32 *thread32 = (CV_SymThread32 *)sym_header_struct_base; + String8 name = str8_cstring_capped(thread32+1, sym_data_opl); + U32 tls_off = thread32->tls_off; + RDIM_Type *type = p2r_type_ptr_from_itype(thread32->itype); + + // rjf: unpack thread variable's container type + RDIM_Type *container_type = 0; + U64 container_name_opl = p2r_end_of_cplusplus_container_name(name); + if(container_name_opl > 2) + { + String8 container_name = str8(name.str, container_name_opl - 2); + CV_TypeId cv_type_id = pdb_tpi_first_itype_from_name(in->tpi_hash, in->tpi_leaf, container_name, 0); + container_type = p2r_type_ptr_from_itype(cv_type_id); + } + + // rjf: unpack thread variable's container symbol + RDIM_Symbol *container_symbol = 0; + if(container_type == 0 && top_scope_node != 0) + { + container_symbol = top_scope_node->scope->symbol; + } + + // form TLS OFF location +#if 0 + RDIM_LocationSet locset = {0}; + RDIM_Location *tls_off_loc = rdim_push_location_tls_off(arena, tls_off); + rdim_location_set_push_case(arena, &locset, (RDIM_Rng1U64){0,max_U64}, tls_off_loc); +#endif + + // rjf: build symbol + RDIM_Symbol *tvar = rdim_symbol_chunk_list_push(arena, &sym_thread_variables, sym_thread_variables_chunk_cap); + tvar->name = name; + tvar->type = type; + tvar->is_extern = (kind == CV_SymKind_GTHREAD32); + //tvar->locset = locset; + tvar->container_type = container_type; + tvar->container_symbol = container_symbol; + }break; + + //- rjf: LOCAL + case CV_SymKind_LOCAL: + { + // rjf: no containing scope? -> malformed data; locals cannot be produced + // outside of a containing scope + if(top_scope_node == 0) + { + break; + } + + // rjf: unpack sym + CV_SymLocal *slocal = (CV_SymLocal *)sym_header_struct_base; + String8 name = str8_cstring_capped(slocal+1, sym_data_opl); + RDIM_Type *type = p2r_type_ptr_from_itype(slocal->itype); + + // rjf: determine if this symbol encodes the beginning of a global modification + B32 is_global_modification = 0; + if((slocal->flags & CV_LocalFlag_Global) || + (slocal->flags & CV_LocalFlag_Static)) + { + is_global_modification = 1; + } + + // rjf: is global modification -> emit global modification symbol + if(is_global_modification) + { + // TODO(rjf): add global modification symbols + defrange_target = 0; + defrange_target_is_param = 0; + } + + // rjf: is not a global modification -> emit a local variable + if(!is_global_modification) + { + // rjf: determine local kind + RDI_LocalKind local_kind = RDI_LocalKind_Variable; + if(slocal->flags & CV_LocalFlag_Param) + { + local_kind = RDI_LocalKind_Parameter; + } + + // rjf: build local + RDIM_Scope *scope = top_scope_node->scope; + RDIM_Local *local = rdim_scope_push_local(arena, &sym_scopes, scope); + local->kind = local_kind; + local->name = name; + local->type = type; + + // rjf: save defrange target, for subsequent defrange symbols + defrange_target = &local->locset; + defrange_target_is_param = (local_kind == RDI_LocalKind_Parameter); + } + }break; + + //- rjf: DEFRANGE_REGISTESR + case CV_SymKind_DEFRANGE_REGISTER: + { + // rjf: no defrange target? -> somehow we got to a defrange symbol without first seeing + // a local - break immediately + if(defrange_target == 0) + { + break; + } + + // rjf: unpack sym + CV_SymDefrangeRegister *defrange_register = (CV_SymDefrangeRegister*)sym_header_struct_base; + CV_Reg cv_reg = defrange_register->reg; + CV_LvarAddrRange *range = &defrange_register->range; + COFF_SectionHeader *range_section = (0 < range->sec && range->sec <= in->coff_sections.count) ? &in->coff_sections.v[range->sec-1] : 0; + CV_LvarAddrGap *gaps = (CV_LvarAddrGap*)(defrange_register+1); + U64 gap_count = ((U8*)sym_data_opl - (U8*)gaps) / sizeof(*gaps); + RDI_RegCode reg_code = cv2r_rdi_reg_code_from_cv_reg_code(in->arch, cv_reg); + + // rjf: build location + RDIM_Location *location = rdim_push_location_val_reg(arena, reg_code); + + // rjf: emit locations over ranges + p2r_location_over_lvar_addr_range(arena, &sym_scopes, defrange_target, location, range, range_section, gaps, gap_count); + }break; + + //- rjf: DEFRANGE_FRAMEPOINTER_REL + case CV_SymKind_DEFRANGE_FRAMEPOINTER_REL: + { + // rjf: no defrange target? -> somehow we got to a defrange symbol without first seeing + // a local - break immediately + if(defrange_target == 0) + { + break; + } + + // rjf: find current procedure's frameproc + CV_SymFrameproc *frameproc = 0; + if(procedure_num != 0 && procedure_num <= procedure_frameprocs_count && procedure_frameprocs[procedure_num-1] != 0) + { + frameproc = procedure_frameprocs[procedure_num-1]; + } + + // rjf: no current valid frameproc? -> somehow we got a to a framepointer-relative defrange + // without having an actually active procedure - break + if(frameproc == 0) + { + break; + } + + // rjf: unpack sym + CV_SymDefrangeFramepointerRel *defrange_fprel = (CV_SymDefrangeFramepointerRel*)sym_header_struct_base; + CV_LvarAddrRange *range = &defrange_fprel->range; + COFF_SectionHeader *range_section = (0 < range->sec && range->sec <= in->coff_sections.count) ? &in->coff_sections.v[range->sec-1] : 0; + CV_LvarAddrGap *gaps = (CV_LvarAddrGap*)(defrange_fprel + 1); + U64 gap_count = ((U8*)sym_data_opl - (U8*)gaps) / sizeof(*gaps); + + // rjf: select frame pointer register + CV_EncodedFramePtrReg encoded_fp_reg = cv_pick_fp_encoding(frameproc, defrange_target_is_param); + RDI_RegCode fp_register_code = cv2r_reg_code_from_arch_encoded_fp_reg(in->arch, encoded_fp_reg); + + // rjf: build location + B32 extra_indirection = 0; + U32 byte_size = rdi_addr_size_from_arch(in->arch); + U32 byte_pos = 0; + S64 var_off = (S64)defrange_fprel->off; + RDIM_Location *location = p2r_location_from_addr_reg_off(arena, in->arch, fp_register_code, byte_size, byte_pos, var_off, extra_indirection); + + // rjf: emit locations over ranges + p2r_location_over_lvar_addr_range(arena, &sym_scopes, defrange_target, location, range, range_section, gaps, gap_count); + }break; + + //- rjf: DEFRANGE_SUBFIELD_REGISTER + case CV_SymKind_DEFRANGE_SUBFIELD_REGISTER: + { + // rjf: no defrange target? -> somehow we got to a defrange symbol without first seeing + // a local - break immediately + if(defrange_target == 0) + { + break; + } + + // rjf: unpack sym + CV_SymDefrangeSubfieldRegister *defrange_subfield_register = (CV_SymDefrangeSubfieldRegister*)sym_header_struct_base; + CV_Reg cv_reg = defrange_subfield_register->reg; + CV_LvarAddrRange *range = &defrange_subfield_register->range; + COFF_SectionHeader *range_section = (0 < range->sec && range->sec <= in->coff_sections.count) ? &in->coff_sections.v[range->sec-1] : 0; + CV_LvarAddrGap *gaps = (CV_LvarAddrGap*)(defrange_subfield_register + 1); + U64 gap_count = ((U8*)sym_data_opl - (U8*)gaps) / sizeof(*gaps); + RDI_RegCode reg_code = cv2r_rdi_reg_code_from_cv_reg_code(in->arch, cv_reg); + + // rjf: skip "subfield" location info - currently not supported + if(defrange_subfield_register->field_offset != 0) + { + break; + } + + // rjf: build location + RDIM_Location *location = rdim_push_location_val_reg(arena, reg_code); + + // rjf: emit locations over ranges + p2r_location_over_lvar_addr_range(arena, &sym_scopes, defrange_target, location, range, range_section, gaps, gap_count); + }break; + + //- rjf: DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE + case CV_SymKind_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE: + { + // rjf: no defrange target? -> somehow we got to a defrange symbol without first seeing + // a local - break immediately + if(defrange_target == 0) + { + break; + } + + // rjf: find current procedure's frameproc + CV_SymFrameproc *frameproc = 0; + if(procedure_num != 0 && procedure_num <= procedure_frameprocs_count && procedure_frameprocs[procedure_num-1] != 0) + { + frameproc = procedure_frameprocs[procedure_num-1]; + } + + // rjf: no current valid frameproc? -> somehow we got a to a framepointer-relative defrange + // without having an actually active procedure - break + if(frameproc == 0) + { + break; + } + + // rjf: unpack sym + CV_SymDefrangeFramepointerRelFullScope *defrange_fprel_full_scope = (CV_SymDefrangeFramepointerRelFullScope*)sym_header_struct_base; + CV_EncodedFramePtrReg encoded_fp_reg = cv_pick_fp_encoding(frameproc, defrange_target_is_param); + RDI_RegCode fp_register_code = cv2r_reg_code_from_arch_encoded_fp_reg(in->arch, encoded_fp_reg); + + // rjf: build location + B32 extra_indirection = 0; + U32 byte_size = rdi_addr_size_from_arch(in->arch); + U32 byte_pos = 0; + S64 var_off = (S64)defrange_fprel_full_scope->off; + RDIM_Location *location = p2r_location_from_addr_reg_off(arena, in->arch, fp_register_code, byte_size, byte_pos, var_off, extra_indirection); + + // rjf: emit location over ranges + RDIM_Rng1U64 voff_range = {0, max_U64}; + rdim_location_set_push_case(arena, &sym_scopes, defrange_target, voff_range, location); + }break; + + //- rjf: DEFRANGE_REGISTER_REL + case CV_SymKind_DEFRANGE_REGISTER_REL: + { + // rjf: no defrange target? -> somehow we got to a defrange symbol without first seeing + // a local - break immediately + if(defrange_target == 0) + { + break; + } + + // rjf: unpack sym + CV_SymDefrangeRegisterRel *defrange_register_rel = (CV_SymDefrangeRegisterRel*)sym_header_struct_base; + CV_Reg cv_reg = defrange_register_rel->reg; + RDI_RegCode reg_code = cv2r_rdi_reg_code_from_cv_reg_code(in->arch, cv_reg); + CV_LvarAddrRange *range = &defrange_register_rel->range; + COFF_SectionHeader *range_section = (0 < range->sec && range->sec <= in->coff_sections.count) ? &in->coff_sections.v[range->sec-1] : 0; + CV_LvarAddrGap *gaps = (CV_LvarAddrGap*)(defrange_register_rel + 1); + U64 gap_count = ((U8*)sym_data_opl - (U8*)gaps) / sizeof(*gaps); + + // rjf: build location + // TODO(rjf): offset & size from cv_reg code + U32 byte_size = rdi_addr_size_from_arch(in->arch); + U32 byte_pos = 0; + B32 extra_indirection_to_value = 0; + S64 var_off = defrange_register_rel->reg_off; + RDIM_Location *location = p2r_location_from_addr_reg_off(arena, in->arch, reg_code, byte_size, byte_pos, var_off, extra_indirection_to_value); + + // rjf: emit locations over ranges + p2r_location_over_lvar_addr_range(arena, &sym_scopes, defrange_target, location, range, range_section, gaps, gap_count); + }break; + + //- rjf: FILESTATIC + case CV_SymKind_FILESTATIC: + { + CV_SymFileStatic *file_static = (CV_SymFileStatic*)sym_header_struct_base; + String8 name = str8_cstring_capped(file_static+1, sym_data_opl); + RDIM_Type *type = p2r_type_ptr_from_itype(file_static->itype); + // TODO(rjf): emit a global modifier symbol + defrange_target = 0; + defrange_target_is_param = 0; + }break; + + //- rjf: INLINESITE + case CV_SymKind_INLINESITE: + { + // rjf: unpack sym + CV_SymInlineSite *sym = (CV_SymInlineSite *)sym_header_struct_base; + String8 binary_annots = str8((U8 *)(sym+1), rec_range->hdr.size - sizeof(rec_range->hdr.kind) - sizeof(*sym)); + + // rjf: extract external info about inline site + String8 name = str8_zero(); + RDIM_Type *type = 0; + RDIM_Type *owner = 0; + if(in->ipi_leaf != 0 && in->ipi_leaf->itype_first <= sym->inlinee && sym->inlinee < in->ipi_leaf->itype_opl) + { + CV_RecRange rec_range = in->ipi_leaf->leaf_ranges.ranges[sym->inlinee - in->ipi_leaf->itype_first]; + String8 rec_data = str8_substr(in->ipi_leaf->data, rng_1u64(rec_range.off, rec_range.off + rec_range.hdr.size)); + void *raw_leaf = rec_data.str + sizeof(U16); + + // rjf: extract method inline info + if(rec_range.hdr.kind == CV_LeafKind_MFUNC_ID && + rec_range.hdr.size >= sizeof(CV_LeafMFuncId)) + { + CV_LeafMFuncId *mfunc_id = (CV_LeafMFuncId*)raw_leaf; + name = str8_cstring_capped(mfunc_id + 1, rec_data.str + rec_data.size); + type = p2r_type_ptr_from_itype(mfunc_id->itype); + owner = mfunc_id->owner_itype != 0 ? p2r_type_ptr_from_itype(mfunc_id->owner_itype) : 0; + } + + // rjf: extract non-method function inline info + else if(rec_range.hdr.kind == CV_LeafKind_FUNC_ID && + rec_range.hdr.size >= sizeof(CV_LeafFuncId)) + { + CV_LeafFuncId *func_id = (CV_LeafFuncId*)raw_leaf; + name = str8_cstring_capped(func_id + 1, rec_data.str + rec_data.size); + type = p2r_type_ptr_from_itype(func_id->itype); + owner = func_id->scope_string_id != 0 ? p2r_type_ptr_from_itype(func_id->scope_string_id) : 0; + } + } + + // rjf: build inline site + RDIM_InlineSite *inline_site = rdim_inline_site_chunk_list_push(arena, &sym_inline_sites, sym_inline_sites_chunk_cap); + inline_site->name = name; + inline_site->type = type; + inline_site->owner = owner; + inline_site->line_table = inline_site_line_table; + + // rjf: increment to next inline site line table in this unit + if(inline_site_line_table != 0 && inline_site_line_table->chunk != 0) + { + RDIM_LineTableChunkNode *chunk = inline_site_line_table->chunk; + U64 current_idx = (U64)(inline_site_line_table - chunk->v); + if(current_idx+1 < chunk->count) + { + inline_site_line_table += 1; + } + else + { + chunk = chunk->next; + inline_site_line_table = 0; + if(chunk != 0) + { + inline_site_line_table = chunk->v; + } + } + } + + // rjf: build scope + RDIM_Scope *scope = rdim_scope_chunk_list_push(arena, &sym_scopes, sym_scopes_chunk_cap); + scope->inline_site = inline_site; + if(top_scope_node == 0) + { + // TODO(rjf): log + } + if(top_scope_node != 0) + { + RDIM_Scope *top_scope = top_scope_node->scope; + SLLQueuePush_N(top_scope->first_child, top_scope->last_child, scope, next_sibling); + scope->parent_scope = top_scope; + scope->symbol = top_scope->symbol; + } + + // rjf: push this scope to scope stack + { + P2R_ScopeNode *node = free_scope_node; + if(node != 0) { SLLStackPop(free_scope_node); } + else { node = push_array_no_zero(scratch.arena, P2R_ScopeNode, 1); } + node->scope = scope; + SLLStackPush(top_scope_node, node); + } + + // rjf: parse offset ranges of this inline site - attach to scope + { + CV_C13InlineSiteDecoder decoder = cv_c13_inline_site_decoder_init(0, 0, procedure_base_voff); + for(;;) + { + CV_C13InlineSiteDecoderStep step = cv_c13_inline_site_decoder_step(&decoder, binary_annots); + + if(step.flags & CV_C13InlineSiteDecoderStepFlag_EmitRange) + { + // rjf: build new range & add to scope + RDIM_Rng1U64 voff_range = { step.range.min, step.range.max }; + rdim_scope_push_voff_range(arena, &sym_scopes, scope, voff_range); + } + + if(step.flags & CV_C13InlineSiteDecoderStepFlag_ExtendLastRange) + { + if(scope->voff_ranges.last != 0) + { + scope->voff_ranges.last->v.max = step.range.max; + } + } + + if(step.flags == 0) + { + break; + } + } + } + }break; + + //- rjf: INLINESITE_END + case CV_SymKind_INLINESITE_END: + { + P2R_ScopeNode *n = top_scope_node; + if(n != 0) + { + SLLStackPop(top_scope_node); + SLLStackPush(free_scope_node, n); + } + defrange_target = 0; + defrange_target_is_param = 0; + }break; + } + } + } + + ////////////////////////// + //- rjf: allocate & fill output + // + P2R_SymbolStreamConvertOut *out = push_array(arena, P2R_SymbolStreamConvertOut, 1); + { + out->procedures = sym_procedures; + out->global_variables = sym_global_variables; + out->thread_variables = sym_thread_variables; + out->scopes = sym_scopes; + out->inline_sites = sym_inline_sites; + } + +#undef p2r_type_ptr_from_itype + scratch_end(scratch); + ProfEnd(); + return out; +} + +//////////////////////////////// +//~ rjf: Top-Level Conversion Entry Point + +internal RDIM_BakeParams * +p2r_convert(Arena *arena, RDIM_HelpState *help_state, RC_Context *in) +{ + Temp scratch = scratch_begin(&arena, 1); + + g_p2r_help_state = help_state; + + ////////////////////////////////////////////////////////////// + //- rjf: parse MSF structure + // + MSF_Parsed *msf = 0; + if(in->debug_data.size != 0) ProfScope("parse MSF structure") + { + msf = msf_parsed_from_data(arena, in->debug_data); + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse PDB auth_guid & named streams table + // + PDB_NamedStreamTable *named_streams = 0; + Guid auth_guid = {0}; + if(msf != 0) ProfScope("parse PDB auth_guid & named streams table") + { + Temp scratch = scratch_begin(&arena, 1); + String8 info_data = msf_data_from_stream(msf, PDB_FixedStream_Info); + PDB_Info *info = pdb_info_from_data(scratch.arena, info_data); + named_streams = pdb_named_stream_table_from_info(arena, info); + MemoryCopyStruct(&auth_guid, &info->auth_guid); + scratch_end(scratch); + + if (info->features & PDB_FeatureFlag_MINIMAL_DBG_INFO) { + fprintf(stderr, "ERROR: PDB was linked with /DEBUG:FASTLINK (partial debug info is not supported). Please relink using /DEBUG:FULL."); + os_abort(1); + } + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse PDB strtbl + // + PDB_Strtbl *strtbl = 0; + String8 raw_strtbl = str8_zero(); + if(named_streams != 0) ProfScope("parse PDB strtbl") + { + MSF_StreamNumber strtbl_sn = named_streams->sn[PDB_NamedStream_StringTable]; + String8 strtbl_data = msf_data_from_stream(msf, strtbl_sn); + strtbl = pdb_strtbl_from_data(arena, strtbl_data); + raw_strtbl = str8_substr(strtbl_data, rng_1u64(strtbl->strblock_min, strtbl->strblock_max)); + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse dbi + // + PDB_DbiParsed *dbi = 0; + if(msf != 0) ProfScope("parse dbi") + { + String8 dbi_data = msf_data_from_stream(msf, PDB_FixedStream_Dbi); + dbi = pdb_dbi_from_data(arena, dbi_data); + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse tpi + // + PDB_TpiParsed *tpi = 0; + if(msf != 0) ProfScope("parse tpi") + { + String8 tpi_data = msf_data_from_stream(msf, PDB_FixedStream_Tpi); + tpi = pdb_tpi_from_data(arena, tpi_data); + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse ipi + // + PDB_TpiParsed *ipi = 0; + if(msf != 0) ProfScope("parse ipi") + { + String8 ipi_data = msf_data_from_stream(msf, PDB_FixedStream_Ipi); + ipi = pdb_tpi_from_data(arena, ipi_data); + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse coff sections + // + COFF_SectionHeaderArray coff_sections = {0}; + if(dbi != 0) ProfScope("parse coff sections") + { + MSF_StreamNumber section_stream = dbi->dbg_streams[PDB_DbiStream_SECTION_HEADER]; + String8 section_data = msf_data_from_stream(msf, section_stream); + coff_sections = pdb_coff_section_array_from_data(arena, section_data); + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse gsi + // + PDB_GsiParsed *gsi = 0; + if(dbi != 0) ProfScope("parse gsi") + { + String8 gsi_data = msf_data_from_stream(msf, dbi->gsi_sn); + gsi = pdb_gsi_from_data(arena, gsi_data); + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse psi + // + PDB_GsiParsed *psi_gsi_part = 0; + if(dbi != 0) ProfScope("parse psi") + { + String8 psi_data = msf_data_from_stream(msf, dbi->psi_sn); + String8 psi_data_gsi_part = str8_range(psi_data.str + sizeof(PDB_PsiHeader), psi_data.str + psi_data.size); + psi_gsi_part = pdb_gsi_from_data(arena, psi_data_gsi_part); + } + + ////////////////////////////////////////////////////////////// + //- rjf: kickoff EXE hash + // + P2R_EXEHashIn exe_hash_in = {in->image_data}; + ASYNC_Task *exe_hash_task = async_task_launch(scratch.arena, p2r_exe_hash_work, .input = &exe_hash_in); + + ////////////////////////////////////////////////////////////// + //- rjf: kickoff TPI hash parse + // + P2R_TPIHashParseIn tpi_hash_in = {0}; + ASYNC_Task *tpi_hash_task = 0; + if(tpi != 0) + { + tpi_hash_in.strtbl = strtbl; + tpi_hash_in.tpi = tpi; + tpi_hash_in.hash_data = msf_data_from_stream(msf, tpi->hash_sn); + tpi_hash_in.aux_data = msf_data_from_stream(msf, tpi->hash_sn_aux); + tpi_hash_task = async_task_launch(scratch.arena, p2r_tpi_hash_parse_work, .input = &tpi_hash_in); + } + + ////////////////////////////////////////////////////////////// + //- rjf: kickoff TPI leaf parse + // + P2R_TPILeafParseIn tpi_leaf_in = {0}; + ASYNC_Task *tpi_leaf_task = 0; + if(tpi != 0) + { + tpi_leaf_in.leaf_data = pdb_leaf_data_from_tpi(tpi); + tpi_leaf_in.itype_first = tpi->itype_first; + tpi_leaf_task = async_task_launch(scratch.arena, p2r_tpi_leaf_parse_work, .input = &tpi_leaf_in); + } + + ////////////////////////////////////////////////////////////// + //- rjf: kickoff IPI hash parse + // + P2R_TPIHashParseIn ipi_hash_in = {0}; + ASYNC_Task *ipi_hash_task = 0; + if(ipi != 0) + { + ipi_hash_in.strtbl = strtbl; + ipi_hash_in.tpi = ipi; + ipi_hash_in.hash_data = msf_data_from_stream(msf, ipi->hash_sn); + ipi_hash_in.aux_data = msf_data_from_stream(msf, ipi->hash_sn_aux); + ipi_hash_task = async_task_launch(scratch.arena, p2r_tpi_hash_parse_work, .input = &ipi_hash_in); + } + + ////////////////////////////////////////////////////////////// + //- rjf: kickoff IPI leaf parse + // + P2R_TPILeafParseIn ipi_leaf_in = {0}; + ASYNC_Task *ipi_leaf_task = 0; + if(ipi != 0) + { + ipi_leaf_in.leaf_data = pdb_leaf_data_from_tpi(ipi); + ipi_leaf_in.itype_first = ipi->itype_first; + ipi_leaf_task = async_task_launch(scratch.arena, p2r_tpi_leaf_parse_work, .input = &ipi_leaf_in); + } + + ////////////////////////////////////////////////////////////// + //- rjf: kickoff top-level global symbol stream parse + // + P2R_SymbolStreamParseIn sym_parse_in = {dbi ? msf_data_from_stream(msf, dbi->sym_sn) : str8_zero()}; + ASYNC_Task *sym_parse_task = !dbi ? 0 : async_task_launch(scratch.arena, p2r_symbol_stream_parse_work, .input = &sym_parse_in); + + ////////////////////////////////////////////////////////////// + //- rjf: kickoff compilation unit parses + // + P2R_CompUnitParseIn comp_unit_parse_in = {dbi ? pdb_data_from_dbi_range(dbi, PDB_DbiRange_ModuleInfo) : str8_zero()}; + P2R_CompUnitContributionsParseIn comp_unit_contributions_parse_in = {dbi ? pdb_data_from_dbi_range(dbi, PDB_DbiRange_SecCon) : str8_zero(), coff_sections}; + ASYNC_Task *comp_unit_parse_task = !dbi ? 0 : async_task_launch(scratch.arena, p2r_comp_unit_parse_work, .input = &comp_unit_parse_in); + ASYNC_Task *comp_unit_contributions_parse_task = !dbi ? 0 : async_task_launch(scratch.arena, p2r_comp_unit_contributions_parse_work, .input = &comp_unit_contributions_parse_in); + + ////////////////////////////////////////////////////////////// + //- rjf: join compilation unit parses + // + PDB_CompUnitArray *comp_units = 0; + U64 comp_unit_count = 0; + PDB_CompUnitContributionArray *comp_unit_contributions = 0; + U64 comp_unit_contribution_count = 0; + { + comp_units = async_task_join_struct(comp_unit_parse_task, PDB_CompUnitArray); + comp_unit_contributions = async_task_join_struct(comp_unit_contributions_parse_task, PDB_CompUnitContributionArray); + comp_unit_count = comp_units ? comp_units->count : 0; + comp_unit_contribution_count = comp_unit_contributions ? comp_unit_contributions->count : 0; + } + + ////////////////////////////////////////////////////////////// + //- rjf: parse syms & line info for each compilation unit + // + CV_SymParsed **sym_for_unit = push_array(arena, CV_SymParsed *, comp_unit_count); + CV_C13Parsed **c13_for_unit = push_array(arena, CV_C13Parsed *, comp_unit_count); + if(comp_units != 0) ProfScope("parse syms & line info for each compilation unit") + { + //- rjf: kick off tasks + P2R_SymbolStreamParseIn *sym_tasks_inputs = push_array(scratch.arena, P2R_SymbolStreamParseIn, comp_unit_count); + ASYNC_Task **sym_tasks = push_array(scratch.arena, ASYNC_Task *, comp_unit_count); + P2R_C13StreamParseIn *c13_tasks_inputs = push_array(scratch.arena, P2R_C13StreamParseIn, comp_unit_count); + ASYNC_Task **c13_tasks = push_array(scratch.arena, ASYNC_Task *, comp_unit_count); + for(U64 idx = 0; idx < comp_unit_count; idx += 1) + { + PDB_CompUnit *unit = comp_units->units[idx]; + sym_tasks_inputs[idx].data = pdb_data_from_unit_range(msf, unit, PDB_DbiCompUnitRange_Symbols); + sym_tasks[idx] = async_task_launch(scratch.arena, p2r_symbol_stream_parse_work, .input = &sym_tasks_inputs[idx]); + c13_tasks_inputs[idx].data = pdb_data_from_unit_range(msf, unit, PDB_DbiCompUnitRange_C13); + c13_tasks_inputs[idx].strtbl = raw_strtbl; + c13_tasks_inputs[idx].coff_sections = coff_sections; + c13_tasks[idx] = async_task_launch(scratch.arena, p2r_c13_stream_parse_work, .input = &c13_tasks_inputs[idx]); + } + + //- rjf: join tasks + for(U64 idx = 0; idx < comp_unit_count; idx += 1) + { + sym_for_unit[idx] = async_task_join_struct(sym_tasks[idx], CV_SymParsed); + c13_for_unit[idx] = async_task_join_struct(c13_tasks[idx], CV_C13Parsed); + } + } + + ////////////////////////////////////////////////////////////// + //- rjf: determine architecture + // + RDI_Arch arch = cv2r_rdi_arch_from_cv_arch(dbi->machine_type); + U64 arch_addr_size = rdi_addr_size_from_arch(arch); + + ////////////////////////////////////////////////////////////// + //- rjf: join EXE hash + // + U64 exe_hash = *async_task_join_struct(exe_hash_task, U64); + + ////////////////////////////////////////////////////////////// + //- rjf: produce top-level-info + // + RDIM_TopLevelInfo top_level_info = c2r_make_rdim_top_level_info(in->image_name, arch, exe_hash, coff_sections.count, coff_sections.v); + + ////////////////////////////////////////////////////////////// + //- rjf: build binary sections list + // + RDIM_BinarySectionList binary_sections = c2r_rdi_binary_sections_from_coff_sections(arena, str8_zero(), 0, coff_sections.count, coff_sections.v); + + ////////////////////////////////////////////////////////////// + //- rjf: kick off unit conversion & source file collection + // + P2R_UnitConvertIn unit_convert_in = {strtbl, coff_sections, comp_units, comp_unit_contributions, sym_for_unit, c13_for_unit}; + ASYNC_Task *unit_convert_task = async_task_launch(scratch.arena, p2r_units_convert_work, .input = &unit_convert_in); + + ////////////////////////////////////////////////////////////// + //- rjf: join global sym stream parse + // + CV_SymParsed *sym = async_task_join_struct(sym_parse_task, CV_SymParsed); + + ////////////////////////////// + //- rjf: predict symbol count + // + U64 symbol_count_prediction = 0; + ProfScope("predict symbol count") + { + U64 rec_range_count = 0; + if(sym != 0) + { + rec_range_count += sym->sym_ranges.count; + } + for(U64 comp_unit_idx = 0; comp_unit_idx < comp_unit_count; comp_unit_idx += 1) + { + CV_SymParsed *unit_sym = sym_for_unit[comp_unit_idx]; + rec_range_count += unit_sym->sym_ranges.count; + } + symbol_count_prediction = rec_range_count/8; + if(symbol_count_prediction < 256) + { + symbol_count_prediction = 256; + } + } + + ////////////////////////////////////////////////////////////// + //- rjf: kick off link name map production + // + P2R_LinkNameMap link_name_map__in_progress = {0}; + P2R_LinkNameMapBuildIn link_name_map_build_in = {0}; + ASYNC_Task *link_name_map_task = 0; + if(sym != 0) ProfScope("kick off link name map build task") + { + link_name_map__in_progress.buckets_count = symbol_count_prediction; + link_name_map__in_progress.buckets = push_array(arena, P2R_LinkNameNode *, link_name_map__in_progress.buckets_count); + link_name_map_build_in.sym = sym; + link_name_map_build_in.coff_sections = coff_sections; + link_name_map_build_in.link_name_map = &link_name_map__in_progress; + link_name_map_task = async_task_launch(scratch.arena, p2r_link_name_map_build_work, .input = &link_name_map_build_in); + } + + ////////////////////////////////////////////////////////////// + //- rjf: join ipi/tpi hash/leaf parses + // + PDB_TpiHashParsed *tpi_hash = 0; + CV_LeafParsed *tpi_leaf = 0; + PDB_TpiHashParsed *ipi_hash = 0; + CV_LeafParsed *ipi_leaf = 0; + { + tpi_hash = async_task_join_struct(tpi_hash_task, PDB_TpiHashParsed); + tpi_leaf = async_task_join_struct(tpi_leaf_task, CV_LeafParsed); + ipi_hash = async_task_join_struct(ipi_hash_task, PDB_TpiHashParsed); + ipi_leaf = async_task_join_struct(ipi_leaf_task, CV_LeafParsed); + } + + ////////////////////////////////////////////////////////////// + //- rjf: types pass 1: construct all types from TPI + // + // this doesn't gather struct/class/union/enum members, which is done by + // subsequent passes, to build RDI "UDT" information, which is distinct + // from regular type info. + // + RDIM_Type **itype_type_ptrs = 0; + RDIM_TypeChunkList all_types = {0}; +#define p2r_type_ptr_from_itype(itype) (((itype) < tpi_leaf->itype_opl) ? itype_type_ptrs[itype] : 0) + if(in->flags & RC_Flag_Types) ProfScope("types pass 1: construct all root/stub types from TPI") + { + itype_type_ptrs = push_array(arena, RDIM_Type *, tpi_leaf->itype_opl); + + ////////////////////////// + //- build basic type + // + { + RDIM_DataModel data_model = rdim_infer_data_model(OperatingSystem_Windows, top_level_info.arch); + + RDI_TypeKind short_type = rdim_short_type_from_data_model(data_model); + RDI_TypeKind ushort_type = rdim_unsigned_short_type_from_data_model(data_model); + RDI_TypeKind int_type = rdim_int_type_from_data_model(data_model); + RDI_TypeKind uint_type = rdim_unsigned_int_type_from_data_model(data_model); + RDI_TypeKind long_type = rdim_long_type_from_data_model(data_model); + RDI_TypeKind ulong_type = rdim_unsigned_long_type_from_data_model(data_model); + RDI_TypeKind long_long_type = rdim_long_long_type_from_data_model(data_model); + RDI_TypeKind ulong_long_type = rdim_unsigned_long_long_type_from_data_model(data_model); + RDI_TypeKind ptr_type = rdim_pointer_size_t_type_from_data_model(data_model); + + struct + { + char * name; + RDI_TypeKind kind_rdi; + CV_LeafKind kind_cv; + B32 make_pointer_near; + B32 make_pointer_32; + B32 make_pointer_64; + } + table[] = + { + { "" , RDI_TypeKind_NULL , CV_BasicType_NOTYPE , 0, 0, 0 }, + { "void" , RDI_TypeKind_Void , CV_BasicType_VOID , 1, 1, 1 }, + { "HRESULT" , RDI_TypeKind_Handle , CV_BasicType_HRESULT , 0, 1, 1 }, + { "signed char" , RDI_TypeKind_Char8 , CV_BasicType_CHAR , 1, 1, 1 }, + { "short" , short_type , CV_BasicType_SHORT , 1, 1, 1 }, + { "long" , long_type , CV_BasicType_LONG , 1, 1, 1 }, + { "long long" , long_long_type , CV_BasicType_QUAD , 1, 1, 1 }, + { "__int128" , RDI_TypeKind_S128 , CV_BasicType_OCT , 1, 1, 1 }, // Clang type + { "unsigned char" , RDI_TypeKind_UChar8 , CV_BasicType_UCHAR , 1, 1, 1 }, + { "unsigned short" , ushort_type , CV_BasicType_USHORT , 1, 1, 1 }, + { "unsigned long" , ulong_type , CV_BasicType_ULONG , 1, 1, 1 }, + { "unsigned long long" , ulong_long_type , CV_BasicType_UQUAD , 1, 1, 1 }, + { "__uint128" , RDI_TypeKind_U128 , CV_BasicType_UOCT , 1, 1, 1 }, // Clang type + { "bool" , RDI_TypeKind_S8 , CV_BasicType_BOOL8 , 1, 1, 1 }, + { "__bool16" , RDI_TypeKind_S16 , CV_BasicType_BOOL16 , 1, 1, 1 }, // not real C type + { "__bool32" , RDI_TypeKind_S32 , CV_BasicType_BOOL32 , 1, 1, 1 }, // not real C type + { "float" , RDI_TypeKind_F32 , CV_BasicType_FLOAT32 , 1, 1, 1 }, + { "double" , RDI_TypeKind_F64 , CV_BasicType_FLOAT64 , 1, 1, 1 }, + { "long double" , RDI_TypeKind_F80 , CV_BasicType_FLOAT80 , 1, 1, 1 }, + { "__float128" , RDI_TypeKind_F128 , CV_BasicType_FLOAT128 , 1, 1, 1 }, // Clang type + { "__float48" , RDI_TypeKind_F48 , CV_BasicType_FLOAT48 , 1, 1, 1 }, // not real C type + { "__float32pp" , RDI_TypeKind_F32PP , CV_BasicType_FLOAT32PP , 1, 1, 1 }, // not real C type + { "_Complex float" , RDI_TypeKind_ComplexF32 , CV_BasicType_COMPLEX32 , 0, 0, 0 }, + { "_Complex double" , RDI_TypeKind_ComplexF64 , CV_BasicType_COMPLEX64 , 0, 0, 0 }, + { "_Complex long double" , RDI_TypeKind_ComplexF80 , CV_BasicType_COMPLEX80 , 0, 0, 0 }, + { "_Complex __float128" , RDI_TypeKind_ComplexF128, CV_BasicType_COMPLEX128 , 0, 0, 0 }, + { "__int8" , RDI_TypeKind_S8 , CV_BasicType_INT8 , 1, 1, 1 }, + { "__uint8" , RDI_TypeKind_U8 , CV_BasicType_UINT8 , 1, 1, 1 }, + { "__int16" , RDI_TypeKind_S16 , CV_BasicType_INT16 , 1, 1, 1 }, + { "__uint16" , RDI_TypeKind_U16 , CV_BasicType_UINT16 , 1, 1, 1 }, + { "int" , int_type , CV_BasicType_INT32 , 1, 1, 1 }, + { "unsigned int" , uint_type , CV_BasicType_UINT32 , 1, 1, 1 }, + { "__int64" , RDI_TypeKind_S64 , CV_BasicType_INT64 , 1, 1, 1 }, + { "__uint64" , RDI_TypeKind_U64 , CV_BasicType_UINT64 , 1, 1, 1 }, + { "__int128" , RDI_TypeKind_S128 , CV_BasicType_INT128 , 1, 1, 1 }, + { "__uint128" , RDI_TypeKind_U128 , CV_BasicType_UINT128 , 1, 1, 1 }, + { "char" , RDI_TypeKind_Char8 , CV_BasicType_RCHAR , 1, 1, 1 }, // always ASCII + { "wchar_t" , RDI_TypeKind_UChar16 , CV_BasicType_WCHAR , 1, 1, 1 }, // on windows always UTF-16 + { "char8_t" , RDI_TypeKind_Char8 , CV_BasicType_CHAR8 , 1, 1, 1 }, // always UTF-8 + { "char16_t" , RDI_TypeKind_Char16 , CV_BasicType_CHAR16 , 1, 1, 1 }, // always UTF-16 + { "char32_t" , RDI_TypeKind_Char32 , CV_BasicType_CHAR32 , 1, 1, 1 }, // always UTF-32 + { "__pointer" , ptr_type , CV_BasicType_PTR , 0, 0, 0 } + }; + + for(U64 i = 0; i < ArrayCount(table); i += 1) + { + U64 builtin_size; + if(table[i].kind_rdi == RDI_TypeKind_Void || table[i].kind_rdi == RDI_TypeKind_Handle) + { + builtin_size = arch_addr_size; + } + else + { + builtin_size = rdi_size_from_basic_type_kind(table[i].kind_rdi); + } + + RDIM_Type *builtin = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + builtin->kind = table[i].kind_rdi; + builtin->name = str8_cstring(table[i].name); + builtin->byte_size = builtin_size; + + itype_type_ptrs[table[i].kind_cv] = builtin; + + if(table[i].make_pointer_near) + { + CV_TypeIndex near_ptr_itype = table[i].kind_cv | 0x100; + RDIM_Type *ptr_near = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + ptr_near->kind = RDI_TypeKind_Ptr; + ptr_near->byte_size = 2; + ptr_near->direct_type = builtin; + + itype_type_ptrs[near_ptr_itype] = ptr_near; + } + if(table[i].make_pointer_32) + { + CV_TypeIndex ptr_32_itype = table[i].kind_cv | 0x400; + RDIM_Type *ptr_32 = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + ptr_32->kind = RDI_TypeKind_Ptr; + ptr_32->byte_size = 4; + ptr_32->direct_type = builtin; + + itype_type_ptrs[ptr_32_itype] = ptr_32; + } + if(table[i].make_pointer_64) + { + CV_TypeIndex ptr_64_itype = table[i].kind_cv | 0x600; + RDIM_Type *ptr_64 = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + ptr_64->kind = RDI_TypeKind_Ptr; + ptr_64->byte_size = 8; + ptr_64->direct_type = builtin; + + itype_type_ptrs[ptr_64_itype] = ptr_64; + } + } + } + + ////////////////////////// + //- rjf: build non-basic type + // + for(CV_TypeId itype = tpi_leaf->itype_first; itype < tpi_leaf->itype_opl; itype += 1) + { + RDIM_Type *dst_type = 0; + CV_RecRange *range = &tpi_leaf->leaf_ranges.ranges[itype-tpi_leaf->itype_first]; + CV_LeafKind kind = range->hdr.kind; + U64 header_struct_size = cv_header_struct_size_from_leaf_kind(kind); + + if(range->off+range->hdr.size <= tpi_leaf->data.size && + range->off+2+header_struct_size <= tpi_leaf->data.size && + range->hdr.size >= 2) + { + U8 *itype_leaf_first = tpi_leaf->data.str + range->off+2; + U8 *itype_leaf_opl = itype_leaf_first + range->hdr.size-2; + switch(kind) + { + //- rjf: MODIFIER + case CV_LeafKind_MODIFIER: + { + // rjf: unpack leaf + CV_LeafModifier *lf = (CV_LeafModifier *)itype_leaf_first; + + // rjf: cv -> rdi flags + RDI_TypeModifierFlags flags = 0; + if(lf->flags & CV_ModifierFlag_Const) {flags |= RDI_TypeModifierFlag_Const;} + if(lf->flags & CV_ModifierFlag_Volatile) {flags |= RDI_TypeModifierFlag_Volatile;} + + // rjf: fill type + if(flags == 0) + { + dst_type = p2r_type_ptr_from_itype(lf->itype); + } + else + { + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + dst_type->kind = RDI_TypeKind_Modifier; + dst_type->flags = flags; + dst_type->direct_type = p2r_type_ptr_from_itype(lf->itype); + dst_type->byte_size = dst_type->direct_type ? dst_type->direct_type->byte_size : 0; + } + }break; + + //- rjf: POINTER + case CV_LeafKind_POINTER: + { + // TODO(rjf): if ptr_mode in {PtrMem, PtrMethod} then output a member pointer instead + + // rjf: unpack leaf + CV_LeafPointer *lf = (CV_LeafPointer *)itype_leaf_first; + RDIM_Type *direct_type = p2r_type_ptr_from_itype(lf->itype); + CV_PointerKind ptr_kind = CV_PointerAttribs_Extract_Kind(lf->attribs); + CV_PointerMode ptr_mode = CV_PointerAttribs_Extract_Mode(lf->attribs); + U32 ptr_size = CV_PointerAttribs_Extract_Size(lf->attribs); + + // rjf: cv -> rdi modifier flags + RDI_TypeModifierFlags modifier_flags = 0; + if(lf->attribs & CV_PointerAttrib_Const) {modifier_flags |= RDI_TypeModifierFlag_Const;} + if(lf->attribs & CV_PointerAttrib_Volatile) {modifier_flags |= RDI_TypeModifierFlag_Volatile;} + if(lf->attribs & CV_PointerAttrib_Restricted) {modifier_flags |= RDI_TypeModifierFlag_Restrict;} + + // rjf: cv info -> rdi pointer type kind + RDI_TypeKind type_kind = RDI_TypeKind_Ptr; + { + if(lf->attribs & CV_PointerAttrib_LRef) + { + type_kind = RDI_TypeKind_LRef; + } + else if(lf->attribs & CV_PointerAttrib_RRef) + { + type_kind = RDI_TypeKind_RRef; + } + if(ptr_mode == CV_PointerMode_LRef) + { + type_kind = RDI_TypeKind_LRef; + } + else if(ptr_mode == CV_PointerMode_RRef) + { + type_kind = RDI_TypeKind_RRef; + } + } + + // rjf: fill type + if(modifier_flags != 0) + { + RDIM_Type *pointer_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + dst_type->kind = RDI_TypeKind_Modifier; + dst_type->flags = modifier_flags; + dst_type->direct_type = pointer_type; + dst_type->byte_size = arch_addr_size; + pointer_type->kind = type_kind; + pointer_type->byte_size = arch_addr_size; + pointer_type->direct_type = direct_type; + } + else + { + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + dst_type->kind = type_kind; + dst_type->byte_size = arch_addr_size; + dst_type->direct_type = direct_type; + } + }break; + + //- rjf: PROCEDURE + case CV_LeafKind_PROCEDURE: + { + // TODO(rjf): handle call_kind & attribs + + // rjf: unpack leaf + CV_LeafProcedure *lf = (CV_LeafProcedure *)itype_leaf_first; + RDIM_Type *ret_type = p2r_type_ptr_from_itype(lf->ret_itype); + + // rjf: fill type's basics + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + dst_type->kind = RDI_TypeKind_Function; + dst_type->byte_size = arch_addr_size; + dst_type->direct_type = ret_type; + + // rjf: unpack arglist range + CV_RecRange *arglist_range = &tpi_leaf->leaf_ranges.ranges[lf->arg_itype-tpi_leaf->itype_first]; + if(arglist_range->hdr.kind != CV_LeafKind_ARGLIST || + arglist_range->hdr.size<2 || + arglist_range->off + arglist_range->hdr.size > tpi_leaf->data.size) + { + break; + } + U8 *arglist_first = tpi_leaf->data.str + arglist_range->off + 2; + U8 *arglist_opl = arglist_first+arglist_range->hdr.size-2; + if(arglist_first + sizeof(CV_LeafArgList) > arglist_opl) + { + break; + } + + // rjf: unpack arglist info + CV_LeafArgList *arglist = (CV_LeafArgList*)arglist_first; + CV_TypeId *arglist_itypes_base = (CV_TypeId *)(arglist+1); + U32 arglist_itypes_count = arglist->count; + + // rjf: build param type array + RDIM_Type **params = push_array(arena, RDIM_Type *, arglist_itypes_count); + for(U32 idx = 0; idx < arglist_itypes_count; idx += 1) + { + params[idx] = p2r_type_ptr_from_itype(arglist_itypes_base[idx]); + } + + // rjf: fill dst type + dst_type->count = arglist_itypes_count; + dst_type->param_types = params; + }break; + + //- rjf: MFUNCTION + case CV_LeafKind_MFUNCTION: + { + // TODO(rjf): handle call_kind & attribs + // TODO(rjf): preserve "this_adjust" + + // rjf: unpack leaf + CV_LeafMFunction *lf = (CV_LeafMFunction *)itype_leaf_first; + RDIM_Type *ret_type = p2r_type_ptr_from_itype(lf->ret_itype); + + // rjf: fill type + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + dst_type->kind = (lf->this_itype != 0) ? RDI_TypeKind_Method : RDI_TypeKind_Function; + dst_type->byte_size = arch_addr_size; + dst_type->direct_type = ret_type; + + // rjf: unpack arglist range + CV_RecRange *arglist_range = &tpi_leaf->leaf_ranges.ranges[lf->arg_itype-tpi_leaf->itype_first]; + if(arglist_range->hdr.kind != CV_LeafKind_ARGLIST || + arglist_range->hdr.size<2 || + arglist_range->off + arglist_range->hdr.size > tpi_leaf->data.size) + { + break; + } + U8 *arglist_first = tpi_leaf->data.str + arglist_range->off + 2; + U8 *arglist_opl = arglist_first+arglist_range->hdr.size-2; + if(arglist_first + sizeof(CV_LeafArgList) > arglist_opl) + { + break; + } + + // rjf: unpack arglist info + CV_LeafArgList *arglist = (CV_LeafArgList*)arglist_first; + CV_TypeId *arglist_itypes_base = (CV_TypeId *)(arglist+1); + U32 arglist_itypes_count = arglist->count; + + // rjf: build param type array + U64 num_this_extras = 1; + if(lf->this_itype == 0) + { + num_this_extras = 0; + } + RDIM_Type **params = push_array(arena, RDIM_Type *, arglist_itypes_count+num_this_extras); + for(U32 idx = 0; idx < arglist_itypes_count; idx += 1) + { + params[idx+num_this_extras] = p2r_type_ptr_from_itype(arglist_itypes_base[idx]); + } + if(lf->this_itype != 0) + { + params[0] = p2r_type_ptr_from_itype(lf->this_itype); + } + + // rjf: fill dst type + dst_type->count = arglist_itypes_count+num_this_extras; + dst_type->param_types = params; + }break; + + //- rjf: BITFIELD + case CV_LeafKind_BITFIELD: + { + // rjf: unpack leaf + CV_LeafBitField *lf = (CV_LeafBitField *)itype_leaf_first; + RDIM_Type *direct_type = p2r_type_ptr_from_itype(lf->itype); + + // rjf: fill type + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + dst_type->kind = RDI_TypeKind_Bitfield; + dst_type->off = lf->pos; + dst_type->count = lf->len; + dst_type->byte_size = direct_type?direct_type->byte_size:0; + dst_type->direct_type = direct_type; + }break; + + //- rjf: ARRAY + case CV_LeafKind_ARRAY: + { + // rjf: unpack leaf + CV_LeafArray *lf = (CV_LeafArray *)itype_leaf_first; + RDIM_Type *direct_type = p2r_type_ptr_from_itype(lf->entry_itype); + U8 *numeric_ptr = (U8*)(lf + 1); + CV_NumericParsed array_count = cv_numeric_from_data_range(numeric_ptr, itype_leaf_opl); + U64 full_size = cv_u64_from_numeric(&array_count); + + // rjf: fill type + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + dst_type->kind = RDI_TypeKind_Array; + dst_type->direct_type = direct_type; + dst_type->byte_size = full_size; + }break; + + //- rjf: CLASS/STRUCTURE + case CV_LeafKind_CLASS: + case CV_LeafKind_STRUCTURE: + { + // TODO(rjf): handle props + + // rjf: unpack leaf + CV_LeafStruct *lf = (CV_LeafStruct *)itype_leaf_first; + U8 *numeric_ptr = (U8*)(lf + 1); + CV_NumericParsed size = cv_numeric_from_data_range(numeric_ptr, itype_leaf_opl); + U64 size_u64 = cv_u64_from_numeric(&size); + U8 *name_ptr = numeric_ptr + size.encoded_size; + String8 name = str8_cstring_capped(name_ptr, itype_leaf_opl); + + // rjf: fill type + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + if(lf->props & CV_TypeProp_FwdRef) + { + dst_type->kind = (kind == CV_LeafKind_CLASS ? RDI_TypeKind_IncompleteClass : RDI_TypeKind_IncompleteStruct); + } + else + { + dst_type->kind = (kind == CV_LeafKind_CLASS ? RDI_TypeKind_Class : RDI_TypeKind_Struct); + } + + B32 do_unique_name_lookup = (((lf->props & CV_TypeProp_Scoped) != 0) && + ((lf->props & CV_TypeProp_HasUniqueName) != 0)); + if(do_unique_name_lookup) + { + U8 *unique_name_ptr = name_ptr + name.size + 1; + dst_type->link_name = str8_cstring_capped(unique_name_ptr, itype_leaf_opl); + } + + dst_type->name = name; + dst_type->byte_size = safe_cast_u32(size_u64); + }break; + + //- rjf: CLASS2/STRUCT2 + case CV_LeafKind_CLASS2: + case CV_LeafKind_STRUCT2: + { + // TODO(rjf): handle props + + // rjf: unpack leaf + CV_LeafStruct2 *lf = (CV_LeafStruct2 *)itype_leaf_first; + U8 *numeric_ptr = (U8*)(lf + 1); + CV_NumericParsed size = cv_numeric_from_data_range(numeric_ptr, itype_leaf_opl); + U64 size_u64 = cv_u64_from_numeric(&size); + U8 *name_ptr = numeric_ptr + size.encoded_size; + String8 name = str8_cstring_capped(name_ptr, itype_leaf_opl); + + // rjf: fill type + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + if(lf->props & CV_TypeProp_FwdRef) + { + dst_type->kind = (kind == CV_LeafKind_CLASS2 ? RDI_TypeKind_IncompleteClass : RDI_TypeKind_IncompleteStruct); + dst_type->name = name; + } + else + { + dst_type->kind = (kind == CV_LeafKind_CLASS2 ? RDI_TypeKind_Class : RDI_TypeKind_Struct); + dst_type->byte_size = (U32)size_u64; + dst_type->name = name; + } + + B32 do_unique_name_lookup = (((lf->props & CV_TypeProp_Scoped) != 0) && + ((lf->props & CV_TypeProp_HasUniqueName) != 0)); + if(do_unique_name_lookup) + { + U8 *unique_name_ptr = name_ptr + name.size + 1; + dst_type->link_name = str8_cstring_capped(unique_name_ptr, itype_leaf_opl); + } + }break; + + //- rjf: UNION + case CV_LeafKind_UNION: + { + // TODO(rjf): handle props + + // rjf: unpack leaf + CV_LeafUnion *lf = (CV_LeafUnion *)itype_leaf_first; + U8 *numeric_ptr = (U8*)(lf + 1); + CV_NumericParsed size = cv_numeric_from_data_range(numeric_ptr, itype_leaf_opl); + U64 size_u64 = cv_u64_from_numeric(&size); + U8 *name_ptr = numeric_ptr + size.encoded_size; + String8 name = str8_cstring_capped(name_ptr, itype_leaf_opl); + + // rjf: fill type + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + if(lf->props & CV_TypeProp_FwdRef) + { + dst_type->kind = RDI_TypeKind_IncompleteUnion; + dst_type->name = name; + } + else + { + dst_type->kind = RDI_TypeKind_Union; + dst_type->byte_size = (U32)size_u64; + dst_type->name = name; + } + + B32 do_unique_name_lookup = (((lf->props & CV_TypeProp_Scoped) != 0) && + ((lf->props & CV_TypeProp_HasUniqueName) != 0)); + if(do_unique_name_lookup) + { + U8 *unique_name_ptr = name_ptr + name.size + 1; + dst_type->link_name = str8_cstring_capped(unique_name_ptr, itype_leaf_opl); + } + }break; + + //- rjf: ENUM + case CV_LeafKind_ENUM: + { + // TODO(rjf): handle props + + // rjf: unpack leaf + CV_LeafEnum *lf = (CV_LeafEnum *)itype_leaf_first; + RDIM_Type *direct_type = p2r_type_ptr_from_itype(lf->base_itype); + U8 *name_ptr = (U8 *)(lf + 1); + String8 name = str8_cstring_capped(name_ptr, itype_leaf_opl); + + // rjf: fill type + dst_type = rdim_type_chunk_list_push(arena, &all_types, tpi_leaf->itype_opl); + if(lf->props & CV_TypeProp_FwdRef) + { + dst_type->kind = RDI_TypeKind_IncompleteEnum; + dst_type->name = name; + } + else + { + dst_type->kind = RDI_TypeKind_Enum; + dst_type->direct_type = direct_type; + dst_type->byte_size = direct_type ? direct_type->byte_size : 0; + dst_type->name = name; + } + + B32 do_unique_name_lookup = (((lf->props & CV_TypeProp_Scoped) != 0) && + ((lf->props & CV_TypeProp_HasUniqueName) != 0)); + if(do_unique_name_lookup) + { + U8 *unique_name_ptr = name_ptr + name.size + 1; + dst_type->link_name = str8_cstring_capped(unique_name_ptr, itype_leaf_opl); + } + }break; + } + } + + //- rjf: store finalized type to this itype's slot + itype_type_ptrs[itype] = dst_type; + } + } + + ////////////////////////////////////////////////////////////// + //- rjf: types pass 2: kick off UDT build + // + U64 udt_task_size_itypes = 4096; + U64 udt_tasks_count = (tpi_leaf->itype_opl+(udt_task_size_itypes-1))/udt_task_size_itypes; + P2R_UDTConvertIn *udt_tasks_inputs = push_array(scratch.arena, P2R_UDTConvertIn, udt_tasks_count); + ASYNC_Task **udt_tasks = push_array(scratch.arena, ASYNC_Task *, udt_tasks_count); + if(in->flags & RC_Flag_UDTs) ProfScope("types pass 2: kick off UDT build") + { + for(U64 idx = 0; idx < udt_tasks_count; idx += 1) + { + udt_tasks_inputs[idx].tpi_leaf = tpi_leaf; + udt_tasks_inputs[idx].itype_first = idx*udt_task_size_itypes; + udt_tasks_inputs[idx].itype_opl = udt_tasks_inputs[idx].itype_first + udt_task_size_itypes; + udt_tasks_inputs[idx].itype_opl = ClampTop(udt_tasks_inputs[idx].itype_opl, tpi_leaf->itype_opl); + udt_tasks_inputs[idx].itype_type_ptrs = itype_type_ptrs; + udt_tasks[idx] = async_task_launch(scratch.arena, p2r_udt_convert_work, .input = &udt_tasks_inputs[idx]); + } + } + + ////////////////////////////////////////////////////////////// + //- rjf: join link name map building task + // + P2R_LinkNameMap *link_name_map = 0; + ProfScope("join link name map building task") + { + async_task_join(link_name_map_task); + link_name_map = &link_name_map__in_progress; + } + + ////////////////////////////////////////////////////////////// + //- rjf: join unit conversion & src file & line table tasks + // + RDIM_UnitChunkList all_units = {0}; + RDIM_SrcFileChunkList all_src_files = {0}; + RDIM_LineTableChunkList all_line_tables = {0}; + RDIM_LineTable **units_first_inline_site_line_tables = 0; + ProfScope("join unit conversion & src file tasks") + { + P2R_UnitConvertOut *out = async_task_join_struct(unit_convert_task, P2R_UnitConvertOut); + all_units = out->units; + all_src_files = out->src_files; + all_line_tables = out->line_tables; + units_first_inline_site_line_tables = out->units_first_inline_site_line_tables; + } + + ////////////////////////////////////////////////////////////// + //- rjf: produce symbols from all streams + // + RDIM_SymbolChunkList all_procedures = {0}; + RDIM_SymbolChunkList all_global_variables = {0}; + RDIM_SymbolChunkList all_thread_variables = {0}; + RDIM_ScopeChunkList all_scopes = {0}; + RDIM_InlineSiteChunkList all_inline_sites = {0}; + ProfScope("produce symbols from all streams") + { + //////////////////////////// + //- rjf: kick off all symbol conversion tasks + // + U64 global_stream_subdivision_tasks_count = sym ? (sym->sym_ranges.count+16383)/16384 : 0; + U64 global_stream_syms_per_task = sym ? sym->sym_ranges.count/global_stream_subdivision_tasks_count : 0; + U64 tasks_count = comp_unit_count + global_stream_subdivision_tasks_count; + P2R_SymbolStreamConvertIn *tasks_inputs = push_array(scratch.arena, P2R_SymbolStreamConvertIn, tasks_count); + ASYNC_Task **tasks = push_array(scratch.arena, ASYNC_Task *, tasks_count); + ProfScope("kick off all symbol conversion tasks") + { + for(U64 idx = 0; idx < tasks_count; idx += 1) + { + tasks_inputs[idx].arch = arch; + tasks_inputs[idx].coff_sections = coff_sections; + tasks_inputs[idx].tpi_hash = tpi_hash; + tasks_inputs[idx].tpi_leaf = tpi_leaf; + tasks_inputs[idx].ipi_leaf = ipi_leaf; + tasks_inputs[idx].itype_type_ptrs = itype_type_ptrs; + tasks_inputs[idx].link_name_map = link_name_map; + if(idx < global_stream_subdivision_tasks_count) + { + tasks_inputs[idx].sym = sym; + tasks_inputs[idx].sym_ranges_first= idx*global_stream_syms_per_task; + tasks_inputs[idx].sym_ranges_opl = tasks_inputs[idx].sym_ranges_first + global_stream_syms_per_task; + tasks_inputs[idx].sym_ranges_opl = ClampTop(tasks_inputs[idx].sym_ranges_opl, sym->sym_ranges.count); + } + else + { + tasks_inputs[idx].sym = sym_for_unit[idx-global_stream_subdivision_tasks_count]; + tasks_inputs[idx].sym_ranges_first= 0; + tasks_inputs[idx].sym_ranges_opl = sym_for_unit[idx-global_stream_subdivision_tasks_count]->sym_ranges.count; + tasks_inputs[idx].first_inline_site_line_table = units_first_inline_site_line_tables[idx-global_stream_subdivision_tasks_count]; + } + tasks[idx] = async_task_launch(scratch.arena, p2r_symbol_stream_convert_work, .input = &tasks_inputs[idx]); + } + } + + //////////////////////////// + //- rjf: join tasks, merge with top-level collections + // + ProfScope("join tasks, merge with top-level collections") + { + for(U64 idx = 0; idx < tasks_count; idx += 1) + { + P2R_SymbolStreamConvertOut *out = async_task_join_struct(tasks[idx], P2R_SymbolStreamConvertOut); + rdim_symbol_chunk_list_concat_in_place(&all_procedures, &out->procedures); + rdim_symbol_chunk_list_concat_in_place(&all_global_variables, &out->global_variables); + rdim_symbol_chunk_list_concat_in_place(&all_thread_variables, &out->thread_variables); + rdim_scope_chunk_list_concat_in_place(&all_scopes, &out->scopes); + rdim_inline_site_chunk_list_concat_in_place(&all_inline_sites,&out->inline_sites); + } + } + } + + ////////////////////////////////////////////////////////////// + //- rjf: types pass 5: join UDT build tasks + // + RDIM_UDTChunkList all_udts = {0}; + for(U64 idx = 0; idx < udt_tasks_count; idx += 1) + { + RDIM_UDTChunkList *udts = async_task_join_struct(udt_tasks[idx], RDIM_UDTChunkList); + rdim_udt_chunk_list_concat_in_place(&all_udts, udts); + } + + ////////////////////////////////////////////////////////////// + //- rjf: fill output + // + RDIM_BakeParams *out = push_array(arena, RDIM_BakeParams, 1); + { + out->top_level_info = top_level_info; + out->binary_sections = binary_sections; + out->units = all_units; + out->types = all_types; + out->udts = all_udts; + out->src_files = all_src_files; + out->line_tables = all_line_tables; + out->global_variables = all_global_variables; + out->thread_variables = all_thread_variables; + out->procedures = all_procedures; + out->scopes = all_scopes; + out->inline_sites = all_inline_sites; + } + + scratch_end(scratch); + return out; +} + +//////////////////////////////// + +internal B32 +p2r_has_symbol_ref(String8 msf_data, String8List symbol_list, MSF_RawStreamTable *st) +{ + Temp scratch = scratch_begin(0,0); + + B32 has_ref = 0; + + String8 dbi_data = msf_data_from_stream_number(scratch.arena, msf_data, st, PDB_FixedStream_Dbi); + PDB_DbiParsed *dbi = pdb_dbi_from_data(scratch.arena, dbi_data); + if(dbi) + { + String8 gsi_data = msf_data_from_stream_number(scratch.arena, msf_data, st, dbi->gsi_sn); + PDB_GsiParsed *gsi_parsed = pdb_gsi_from_data(scratch.arena, gsi_data); + if(gsi_parsed) + { + String8 symbol_data = msf_data_from_stream_number(scratch.arena, msf_data, st, dbi->sym_sn); + + for(String8Node *symbol_n = symbol_list.first; symbol_n != 0; symbol_n = symbol_n->next) + { + U64 symbol_off = pdb_gsi_symbol_from_string(gsi_parsed, symbol_data, symbol_n->string); + if(symbol_off < symbol_data.size) + { + has_ref = 1; + break; + } + } + } + } + + scratch_end(scratch); + return has_ref; +} + +internal B32 +p2r_has_file_ref(String8 msf_data, String8List file_list, MSF_RawStreamTable *st) +{ + Temp scratch = scratch_begin(0,0); + + B32 has_ref = 0; + + String8 info_data = msf_data_from_stream_number(scratch.arena, msf_data, st, PDB_FixedStream_Info); + PDB_Info *info = pdb_info_from_data(scratch.arena, info_data); + if(info) + { + PDB_NamedStreamTable *named_streams = pdb_named_stream_table_from_info(scratch.arena, info); + if(named_streams) + { + MSF_StreamNumber strtbl_sn = named_streams->sn[PDB_NamedStream_StringTable]; + String8 strtbl_data = msf_data_from_stream_number(scratch.arena, msf_data, st, strtbl_sn); + PDB_Strtbl *strtbl = pdb_strtbl_from_data(scratch.arena, strtbl_data); + if(strtbl) + { + for(String8Node *file_n = file_list.first; file_n != 0; file_n = file_n->next) + { + U32 off = pdb_strtbl_off_from_string(strtbl, file_n->string); + if(off != max_U32) + { + has_ref = 1; + break; + } + } + } + } + } + + scratch_end(scratch); + return has_ref; +} + +internal B32 +p2r_has_symbol_or_file_ref(String8 msf_data, String8List symbol_list, String8List file_list) +{ + Temp scratch = scratch_begin(0,0); + + B32 has_ref = 0; + + MSF_RawStreamTable *st = msf_raw_stream_table_from_data(scratch.arena, msf_data); + + if(!has_ref && symbol_list.node_count) + { + has_ref = p2r_has_symbol_ref(msf_data, symbol_list, st); + } + + if(!has_ref && file_list.node_count) + { + has_ref = p2r_has_file_ref(msf_data, file_list, st); + } + + scratch_end(scratch); + return has_ref; +} + diff --git a/src/radcon/radcon_pdb.h b/src/radcon/radcon_pdb.h new file mode 100644 index 00000000..e4574a32 --- /dev/null +++ b/src/radcon/radcon_pdb.h @@ -0,0 +1,238 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef RADCON_PDB_H +#define RADCON_PDB_H + +//////////////////////////////// +//~ rjf: Initial PDB Information Extraction & Conversion Preparation Task Types + +//- rjf: tpi hash parsing + +typedef struct P2R_TPIHashParseIn P2R_TPIHashParseIn; +struct P2R_TPIHashParseIn +{ + PDB_Strtbl *strtbl; + PDB_TpiParsed *tpi; + String8 hash_data; + String8 aux_data; +}; + +//- rjf: tpi leaves parsing + +typedef struct P2R_TPILeafParseIn P2R_TPILeafParseIn; +struct P2R_TPILeafParseIn +{ + String8 leaf_data; + CV_TypeId itype_first; +}; + +//- rjf: exe hashing + +typedef struct P2R_EXEHashIn P2R_EXEHashIn; +struct P2R_EXEHashIn +{ + String8 exe_data; +}; + +//- rjf: symbol stream parsing + +typedef struct P2R_SymbolStreamParseIn P2R_SymbolStreamParseIn; +struct P2R_SymbolStreamParseIn +{ + String8 data; +}; + +//- rjf: c13 line info stream parsing + +typedef struct P2R_C13StreamParseIn P2R_C13StreamParseIn; +struct P2R_C13StreamParseIn +{ + String8 data; + String8 strtbl; + COFF_SectionHeaderArray coff_sections; +}; + +//- rjf: comp unit parsing + +typedef struct P2R_CompUnitParseIn P2R_CompUnitParseIn; +struct P2R_CompUnitParseIn +{ + String8 data; +}; + +//- rjf: comp unit contribution table parsing + +typedef struct P2R_CompUnitContributionsParseIn P2R_CompUnitContributionsParseIn; +struct P2R_CompUnitContributionsParseIn +{ + String8 data; + COFF_SectionHeaderArray coff_sections; +}; + +//////////////////////////////// +//~ rjf: Conversion Data Structure & Task Types + +//- rjf: link name map (voff -> string) + +typedef struct P2R_LinkNameNode P2R_LinkNameNode; +struct P2R_LinkNameNode +{ + P2R_LinkNameNode *next; + U64 voff; + String8 name; +}; + +typedef struct P2R_LinkNameMap P2R_LinkNameMap; +struct P2R_LinkNameMap +{ + P2R_LinkNameNode **buckets; + U64 buckets_count; + U64 bucket_collision_count; + U64 link_name_count; +}; + +//- rjf: normalized file path -> source file map + +typedef struct P2R_SrcFileNode P2R_SrcFileNode; +struct P2R_SrcFileNode +{ + P2R_SrcFileNode *next; + RDIM_SrcFile *src_file; +}; + +typedef struct P2R_SrcFileMap P2R_SrcFileMap; +struct P2R_SrcFileMap +{ + P2R_SrcFileNode **slots; + U64 slots_count; +}; + +//- rjf: unit conversion tasks + +typedef struct P2R_UnitConvertIn P2R_UnitConvertIn; +struct P2R_UnitConvertIn +{ + PDB_Strtbl *pdb_strtbl; + COFF_SectionHeaderArray coff_sections; + PDB_CompUnitArray *comp_units; + PDB_CompUnitContributionArray *comp_unit_contributions; + CV_SymParsed **comp_unit_syms; + CV_C13Parsed **comp_unit_c13s; +}; + +typedef struct P2R_UnitConvertOut P2R_UnitConvertOut; +struct P2R_UnitConvertOut +{ + RDIM_UnitChunkList units; + RDIM_SrcFileChunkList src_files; + RDIM_LineTableChunkList line_tables; + RDIM_LineTable **units_first_inline_site_line_tables; +}; + +//- rjf: link name map building tasks + +typedef struct P2R_LinkNameMapBuildIn P2R_LinkNameMapBuildIn; +struct P2R_LinkNameMapBuildIn +{ + CV_SymParsed *sym; + COFF_SectionHeaderArray coff_sections; + P2R_LinkNameMap *link_name_map; +}; + +//- rjf: udt conversion + +typedef struct P2R_UDTConvertIn P2R_UDTConvertIn; +struct P2R_UDTConvertIn +{ + CV_LeafParsed *tpi_leaf; + CV_TypeId itype_first; + CV_TypeId itype_opl; + RDIM_Type **itype_type_ptrs; +}; + +//- rjf: symbol stream conversion + +typedef struct P2R_SymbolStreamConvertIn P2R_SymbolStreamConvertIn; +struct P2R_SymbolStreamConvertIn +{ + RDI_Arch arch; + COFF_SectionHeaderArray coff_sections; + PDB_TpiHashParsed *tpi_hash; + CV_LeafParsed *tpi_leaf; + CV_LeafParsed *ipi_leaf; + CV_SymParsed *sym; + U64 sym_ranges_first; + U64 sym_ranges_opl; + RDIM_Type **itype_type_ptrs; + P2R_LinkNameMap *link_name_map; + RDIM_LineTable *first_inline_site_line_table; +}; + +typedef struct P2R_SymbolStreamConvertOut P2R_SymbolStreamConvertOut; +struct P2R_SymbolStreamConvertOut +{ + RDIM_SymbolChunkList procedures; + RDIM_SymbolChunkList global_variables; + RDIM_SymbolChunkList thread_variables; + RDIM_ScopeChunkList scopes; + RDIM_InlineSiteChunkList inline_sites; +}; + +//////////////////////////////// +//~ rjf: Basic Helpers + +internal U64 p2r_end_of_cplusplus_container_name(String8 str); +internal U64 p2r_hash_from_voff(U64 voff); + +//////////////////////////////// +//~ rjf: Location Info Building Helpers + +internal RDIM_Location *p2r_location_from_addr_reg_off(Arena *arena, RDI_Arch arch, RDI_RegCode reg_code, U32 reg_byte_size, U32 reg_byte_pos, S64 offset, B32 extra_indirection); +internal RDI_RegCode p2r_reg_code_from_arch_encoded_fp_reg(RDI_Arch arch, CV_EncodedFramePtrReg encoded_reg); +internal void p2r_location_over_lvar_addr_range(Arena *arena, RDIM_ScopeChunkList *scopes, RDIM_LocationSet *locset, RDIM_Location *location, CV_LvarAddrRange *range, COFF_SectionHeader *section, CV_LvarAddrGap *gaps, U64 gap_count); + +//////////////////////////////// +//~ rjf: Initial Parsing & Preparation Pass Tasks + +ASYNC_WORK_DEF(p2r_exe_hash_work); +ASYNC_WORK_DEF(p2r_tpi_hash_parse_work); +ASYNC_WORK_DEF(p2r_tpi_leaf_work); +ASYNC_WORK_DEF(p2r_symbol_stream_parse_work); +ASYNC_WORK_DEF(p2r_c13_stream_parse_work); +ASYNC_WORK_DEF(p2r_comp_unit_parse_work); +ASYNC_WORK_DEF(p2r_comp_unit_contributions_parse_work); + +//////////////////////////////// +//~ rjf: Unit Conversion Tasks + +ASYNC_WORK_DEF(p2r_units_convert_work); + +//////////////////////////////// +//~ rjf: Link Name Map Building Tasks + +ASYNC_WORK_DEF(p2r_link_name_map_build_work); + +//////////////////////////////// +//~ rjf: UDT Conversion Tasks + +ASYNC_WORK_DEF(p2r_udt_convert_work); + +//////////////////////////////// +//~ rjf: Symbol Stream Conversion Tasks + +ASYNC_WORK_DEF(p2r_symbol_stream_convert_work); + +//////////////////////////////// +//~ rjf: Top-Level Conversion Entry Point + +internal RDIM_BakeParams *p2r_convert(Arena *arena, RDIM_HelpState *help_state, RC_Context *in); + +//////////////////////////////// + +internal B32 p2r_has_symbol_ref(String8 msf_data, String8List symbol_list, MSF_RawStreamTable *st); +internal B32 p2r_has_file_ref(String8 msf_data, String8List file_list, MSF_RawStreamTable *st); +internal B32 p2r_has_symbol_or_file_ref(String8 msf_data, String8List symbol_list, String8List file_list); + +#endif // RADCON_PDB_H + diff --git a/src/raddump/raddump.c b/src/raddump/raddump.c index 531f5d74..d369ff54 100644 --- a/src/raddump/raddump.c +++ b/src/raddump/raddump.c @@ -42,78 +42,21 @@ rd_stderr(char *fmt, ...) scratch_end(scratch); } -internal String8 -rd_invoke_rdi_converter(Arena *arena, String8 exe_name, String8 exe_data, String8 pdb_path) -{ - Temp scratch = scratch_begin(0,0); - - P2R_User2Convert user2convert = {0}; - user2convert.input_pdb_name = pdb_path; - user2convert.input_pdb_data = os_data_from_file_path(scratch.arena, pdb_path); - user2convert.input_exe_name = exe_name; - user2convert.input_exe_data = exe_data; - user2convert.output_name = str8_zero(); - user2convert.flags = P2R_ConvertFlag_All; - - P2R_Convert2Bake *convert2bake = p2r_convert(scratch.arena, &user2convert); - P2R_Bake2Serialize *bake2srlz = p2r_bake(scratch.arena, convert2bake); - RDIM_SerializedSectionBundle bundle = rdim_serialized_section_bundle_from_bake_results(&bake2srlz->bake_results); - String8List rdi_blobs = rdim_file_blobs_from_section_bundle(scratch.arena, &bundle); - String8 raw_rdi = str8_list_join(arena, &rdi_blobs, 0); - - scratch_end(scratch); - return raw_rdi; -} - internal RDI_Parsed * -rd_rdi_from_pe(Arena *arena, String8 data_path, String8 raw_data) +rd_rdi_from_pe(Arena *arena, String8 pe_path) { Temp scratch = scratch_begin(&arena, 1); + // make command line for converter + String8List cmdl_string = {0}; + str8_list_pushf(scratch.arena, &cmdl_string, "-pe:%S", pe_path); + CmdLine cmdl = cmd_line_from_string_list(scratch.arena, cmdl_string); + + // run converter + String8 raw_rdi = rc_rdi_from_cmd_line(scratch.arena, &cmdl); + + // load RDI RDI_Parsed *rdi = 0; - - PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, raw_data); - String8 raw_debug_dir = str8_substr(raw_data, pe.data_dir_franges[PE_DataDirectoryIndex_DEBUG]); - PE_DebugInfoList dbg_list = pe_parse_debug_directory(scratch.arena, raw_data, raw_debug_dir); - - String8 raw_rdi = {0}; - Guid rdi_guid = {0}; - for (PE_DebugInfoNode *n = dbg_list.first; n != 0; n = n->next) { - PE_DebugInfo *v = &n->v; - if (v->header.type == PE_DebugDirectoryType_CODEVIEW) { - if (v->u.codeview.magic == PE_CODEVIEW_RDI_MAGIC) { - if (raw_rdi.size) { - rd_warningf("multiple RDI paths defined in %S"); - } else { - raw_rdi = os_data_from_file_path(arena, v->u.codeview.rdi.path); - rdi_guid = v->u.codeview.rdi.header.guid; - if (raw_rdi.size == 0) { - rd_errorf("unable to open RDI: %S", v->u.codeview.rdi.path); - } - } - } - } - } - - if (!raw_rdi.size) { - String8 pdb_path = str8_zero(); - Guid pdb_guid = {0}; - B32 convert_pdb = 0; - for (PE_DebugInfoNode *n = dbg_list.first; n != 0; n = n->next) { - PE_DebugInfo *v = &n->v; - if (v->header.type == PE_DebugDirectoryType_CODEVIEW) { - pdb_path = v->u.codeview.pdb70.path; - pdb_guid = v->u.codeview.pdb70.header.guid; - convert_pdb = 1; - break; - } - } - - if (convert_pdb) { - raw_rdi = rd_invoke_rdi_converter(scratch.arena, data_path, raw_data, pdb_path); - } - } - if (raw_rdi.size) { rdi = push_array(arena, RDI_Parsed, 1); diff --git a/src/raddump/raddump_main.c b/src/raddump/raddump_main.c index f72c53d6..c8225dba 100644 --- a/src/raddump/raddump_main.c +++ b/src/raddump/raddump_main.c @@ -53,8 +53,12 @@ #include "dwarf/dwarf_coff.h" #include "dwarf/dwarf_elf.h" #include "dwarf/dwarf_enum.h" -#include "rdi_from_pdb/rdi_from_pdb.h" -#include "rdi_from_dwarf/rdi_from_dwarf.h" +#include "radcon/radcon.h" +#include "radcon/radcon_coff.h" +#include "radcon/radcon_cv.h" +#include "radcon/radcon_elf.h" +#include "radcon/radcon_pdb.h" +#include "radcon/radcon_dwarf.h" #include "base/base_inc.c" #include "linker/base_ext/base_inc.c" @@ -87,8 +91,12 @@ #include "dwarf/dwarf_coff.c" #include "dwarf/dwarf_elf.c" #include "dwarf/dwarf_enum.c" -#include "rdi_from_pdb/rdi_from_pdb.c" -#include "rdi_from_dwarf/rdi_from_dwarf.c" +#include "radcon/radcon_coff.c" +#include "radcon/radcon_cv.c" +#include "radcon/radcon_elf.c" +#include "radcon/radcon_pdb.c" +#include "radcon/radcon_dwarf.c" +#include "radcon/radcon.c" #include "linker/thread_pool/thread_pool.h" #include "linker/thread_pool/thread_pool.c" @@ -288,7 +296,7 @@ entry_point(CmdLine *cmdline) } else if (pe_check_magic(raw_data)) { RDI_Parsed *rdi = 0; if (!(opts & RD_Option_NoRdi)) { - rdi = rd_rdi_from_pe(arena, file_path, raw_data); + rdi = rd_rdi_from_pe(arena, file_path); } pe_print(arena, out, indent, raw_data, opts, rdi); } else if (pe_is_res(raw_data)) {