From 49de09883eff5931765511b67d4fd848d6890b0d Mon Sep 17 00:00:00 2001 From: Ryan Fleury Date: Tue, 10 Jun 2025 08:53:57 -0700 Subject: [PATCH] plug in dwarf converter to radbin; hook up to both rdi generation & breakpad generation; radbin exe -> pdb / elf -> dwarf inferences; cleanup / unification passes --- build.bat | 1 - src/base/base_core.h | 16 +- src/ctrl/ctrl_core.c | 11 - src/dwarf/dwarf_elf.c | 71 +- src/lib_raddbg_markup/raddbg_markup.h | 16 +- src/linker/hash_table.c | 20 +- src/os/core/os_core.c | 21 + src/os/core/os_core.h | 1 + src/pe/pe.c | 99 ++- src/pe/pe.h | 33 +- src/radbin/generated/radbin.meta.c | 5 +- src/radbin/generated/radbin.meta.h | 5 +- src/radbin/radbin.c | 973 +++++++++++++++----------- src/radbin/radbin.h | 36 + src/radbin/radbin.mdesk | 3 +- src/radbin/radbin_main.c | 16 + src/radcon/radcon.c | 142 ++-- src/radcon/radcon.h | 2 +- src/radcon/radcon_dwarf.c | 37 +- src/radcon/radcon_dwarf.h | 6 +- src/raddbg/raddbg_main.c | 18 +- src/raddump/raddump.c | 14 +- src/raddump/raddump.h | 4 +- src/rdi_from_coff/rdi_from_coff.c | 79 +++ src/rdi_from_coff/rdi_from_coff.h | 11 + src/rdi_from_dwarf/rdi_from_dwarf.c | 840 +++++++++++++--------- src/rdi_from_dwarf/rdi_from_dwarf.h | 45 +- src/rdi_from_elf/rdi_from_elf.c | 9 + src/rdi_from_elf/rdi_from_elf.h | 9 + 29 files changed, 1524 insertions(+), 1019 deletions(-) create mode 100644 src/rdi_from_coff/rdi_from_coff.c create mode 100644 src/rdi_from_coff/rdi_from_coff.h create mode 100644 src/rdi_from_elf/rdi_from_elf.c create mode 100644 src/rdi_from_elf/rdi_from_elf.h diff --git a/build.bat b/build.bat index e608d679..76ce1902 100644 --- a/build.bat +++ b/build.bat @@ -111,7 +111,6 @@ pushd build if "%raddbg%"=="1" set didbuild=1 && %compile% ..\src\raddbg\raddbg_main.c %compile_link% %link_icon% %out%raddbg.exe || exit /b 1 if "%radlink%"=="1" set didbuild=1 && %compile% ..\src\linker\lnk.c %compile_link% %linker% /NOIMPLIB %linker% /NATVIS:"%~dp0\src\linker\linker.natvis" %out%radlink.exe || exit /b 1 if "%radbin%"=="1" set didbuild=1 && %compile% ..\src\radbin\radbin_main.c %compile_link% %out%radbin.exe || exit /b 1 -if "%radcon%"=="1" set didbuild=1 && %compile% ..\src\radcon\radcon_main.c %compile_link% %out%radcon.exe || exit /b 1 if "%raddump%"=="1" set didbuild=1 && %compile% ..\src\raddump\raddump_main.c %compile_link% %out%raddump.exe || exit /b 1 if "%tester%"=="1" set didbuild=1 && %compile% ..\src\tester\tester_main.c %compile_link% %out%tester.exe || exit /b 1 if "%ryan_scratch%"=="1" set didbuild=1 && %compile% ..\src\scratch\ryan_scratch.c %compile_link% %out%ryan_scratch.exe || exit /b 1 diff --git a/src/base/base_core.h b/src/base/base_core.h index f38333cc..511020f8 100644 --- a/src/base/base_core.h +++ b/src/base/base_core.h @@ -487,14 +487,16 @@ typedef enum OperatingSystem } OperatingSystem; -typedef enum ImageType +typedef enum ExecutableImageKind { - Image_Null, - Image_CoffPe, - Image_Elf32, - Image_Elf64, - Image_Macho -} ImageType; + ExecutableImageKind_Null, + ExecutableImageKind_CoffPe, + ExecutableImageKind_Elf32, + ExecutableImageKind_Elf64, + ExecutableImageKind_Macho, + ExecutableImageKind_COUNT +} +ExecutableImageKind; typedef enum Arch { diff --git a/src/ctrl/ctrl_core.c b/src/ctrl/ctrl_core.c index c88ad7c0..e996b72f 100644 --- a/src/ctrl/ctrl_core.c +++ b/src/ctrl/ctrl_core.c @@ -3989,13 +3989,8 @@ ctrl_thread__module_open(CTRL_Handle process, CTRL_Handle module, Rng1U64 vaddr_ file_header_off + sizeof(COFF_FileHeader) + opt_ext_size); //- rjf: read optional header - U16 optional_magic = 0; - U64 image_base = 0; U64 entry_point = 0; U32 data_dir_count = 0; - U64 virt_section_align = 0; - U64 file_section_align = 0; - Rng1U64 *data_dir_franges = 0; if(opt_ext_size > 0) { // rjf: read magic number @@ -4011,10 +4006,7 @@ ctrl_thread__module_open(CTRL_Handle process, CTRL_Handle module, Rng1U64 vaddr_ { PE_OptionalHeader32 pe_optional = {0}; dmn_process_read_struct(process.dmn_handle, vaddr_range.min + opt_ext_off_range.min, &pe_optional); - image_base = pe_optional.image_base; entry_point = pe_optional.entry_point_va; - virt_section_align = pe_optional.section_alignment; - file_section_align = pe_optional.file_alignment; reported_data_dir_offset = sizeof(pe_optional); reported_data_dir_count = pe_optional.data_dir_count; }break; @@ -4022,10 +4014,7 @@ ctrl_thread__module_open(CTRL_Handle process, CTRL_Handle module, Rng1U64 vaddr_ { PE_OptionalHeader32Plus pe_optional = {0}; dmn_process_read_struct(process.dmn_handle, vaddr_range.min + opt_ext_off_range.min, &pe_optional); - image_base = pe_optional.image_base; entry_point = pe_optional.entry_point_va; - virt_section_align = pe_optional.section_alignment; - file_section_align = pe_optional.file_alignment; reported_data_dir_offset = sizeof(pe_optional); reported_data_dir_count = pe_optional.data_dir_count; }break; diff --git a/src/dwarf/dwarf_elf.c b/src/dwarf/dwarf_elf.c index 99117b2e..9154741b 100644 --- a/src/dwarf/dwarf_elf.c +++ b/src/dwarf/dwarf_elf.c @@ -5,72 +5,75 @@ internal B32 dw_is_dwarf_present_elf_section_table(String8 raw_image, ELF_BinInfo *bin) { Temp scratch = scratch_begin(0,0); - + B32 is_dwarf_present = 0; - + ELF_Shdr64Array sections = elf_shdr64_array_from_bin(scratch.arena, raw_image, &bin->hdr); - + for (U64 i = 0; i < sections.count; ++i) { ELF_Shdr64 *shdr = §ions.v[i]; String8 name = elf_name_from_shdr64(raw_image, &bin->hdr, bin->sh_name_range, shdr); - + if (shdr->sh_type != ELF_SectionCode_ProgBits) { continue; } - + DW_SectionKind s = dw_section_kind_from_string(name); if (s == DW_Section_Null) { s = dw_section_dwo_kind_from_string(name); } - + is_dwarf_present = s != DW_Section_Null; if (is_dwarf_present) { break; } } - + scratch_end(scratch); return is_dwarf_present; } +#define SINFL_IMPLEMENTATION +#include "third_party/sinfl/sinfl.h" + internal DW_Input dw_input_from_elf_section_table(Arena *arena, String8 raw_image, ELF_BinInfo *bin) { Temp scratch = scratch_begin(&arena, 1); - + DW_Input result = {0}; B32 sect_status[ArrayCount(result.sec)] = {0}; - + ELF_Shdr64Array sections = elf_shdr64_array_from_bin(scratch.arena, raw_image, &bin->hdr); - + for (U64 sect_idx = 1; sect_idx < sections.count; ++sect_idx) { ELF_Shdr64 *shdr = §ions.v[sect_idx]; - + // skip BSS sections if (shdr->sh_type != ELF_SectionCode_ProgBits) { continue; } - + String8 name = elf_name_from_shdr64(raw_image, &bin->hdr, bin->sh_name_range, shdr); - + DW_SectionKind s = dw_section_kind_from_string(name); B32 is_dwo = 0; if (s == DW_Section_Null) { s = dw_section_dwo_kind_from_string(name); is_dwo = 1; } - + if (s != DW_Section_Null) { if (sect_status[s]) { Assert(!"too many debug sections with identical name, picking first"); } else { Rng1U64 raw_data_range = rng_1u64(shdr->sh_offset, shdr->sh_offset + shdr->sh_size); String8 data = str8_substr(raw_image, raw_data_range); - + // ELF was compiled with compressed debug info if (shdr->sh_flags & ELF_Shf_Compressed) { String8 comp_data_with_header = data; - + // read header ELF_Chdr64 chdr64 = {0}; U64 chdr_size = 0; @@ -86,38 +89,38 @@ dw_input_from_elf_section_table(Arena *arena, String8 raw_image, ELF_BinInfo *bi chdr64 = elf_chdr64_from_chdr32(chdr32); } } - + AssertAlways(IsPow2(chdr64.ch_addr_align)); - + // skip header String8 comp_data = str8_skip(comp_data_with_header, chdr_size); - + // push buffer for the decompressor U8 *decomp_buffer = push_array_no_zero_aligned(arena, U8, chdr64.ch_size, chdr64.ch_addr_align); U64 actual_decomp_size = 0; // decompress switch (chdr64.ch_type) { - case ELF_CompressType_None: { - AssertAlways(!"unexpected compression type"); - } break; - case ELF_CompressType_ZLib: { - actual_decomp_size = zsinflate(decomp_buffer, chdr64.ch_size, comp_data.str, comp_data.size); - } break; - case ELF_CompressType_ZStd: { - // TODO: zstd lib - NotImplemented; - } break; - default: InvalidPath; break; + case ELF_CompressType_None: { + AssertAlways(!"unexpected compression type"); + } break; + case ELF_CompressType_ZLib: { + actual_decomp_size = zsinflate(decomp_buffer, chdr64.ch_size, comp_data.str, comp_data.size); + } break; + case ELF_CompressType_ZStd: { + // TODO: zstd lib + NotImplemented; + } break; + default: InvalidPath; break; } - + // TODO: error handling AssertAlways(actual_decomp_size == chdr64.ch_size); - + // set decompressed section data data = str8(decomp_buffer, actual_decomp_size); } - + sect_status[s] = 1; DW_Section *d = &result.sec[s]; d->name = push_str8_copy(arena, name); @@ -126,7 +129,7 @@ dw_input_from_elf_section_table(Arena *arena, String8 raw_image, ELF_BinInfo *bi } } } - + scratch_end(scratch); return result; } diff --git a/src/lib_raddbg_markup/raddbg_markup.h b/src/lib_raddbg_markup/raddbg_markup.h index 71ff0223..fce7545b 100644 --- a/src/lib_raddbg_markup/raddbg_markup.h +++ b/src/lib_raddbg_markup/raddbg_markup.h @@ -83,18 +83,20 @@ void raddbg_annotate_vaddr_range__impl(void *ptr, unsigned __int64 size, char *f //////////////////////////////// //~ Win32 Implementations -#if defined(RADDBG_MARKUP_IMPLEMENTATION) && !defined(RADDBG_MARKUP_STUBS) -#if defined(_WIN32) +#if defined(_WIN32) && !defined(RADDBG_MARKUP_STUBS) + +//- section allocating +#pragma section(".raddbg", read, write) +#define raddbg_exe_data __declspec(allocate(".raddbg")) + +//- one-time implementations +#if defined(RADDBG_MARKUP_IMPLEMENTATION) //- default includes #if RADDBG_MARKUP_DEFAULT_VSNPRINTF #include #endif -//- section allocating -#pragma section(".raddbg", read, write) -#define raddbg_exe_data __declspec(allocate(".raddbg")) - //- first byte of exe data section -> is attached static raddbg_exe_data unsigned char raddbg_is_attached_byte_marker[1]; @@ -459,8 +461,8 @@ raddbg_annotate_vaddr_range__impl(void *ptr, unsigned __int64 size, char *fmt, . } } -#endif // defined(_WIN32) #endif // defined(RADDBG_MARKUP_IMPLEMENTATION) +#endif // defined(_WIN32) && !defined(RADDBG_MARKUP_STUBS) //////////////////////////////// //~ Win32 STL Type Views diff --git a/src/linker/hash_table.c b/src/linker/hash_table.c index 1f1ed5ec..f7f78bf7 100644 --- a/src/linker/hash_table.c +++ b/src/linker/hash_table.c @@ -26,6 +26,10 @@ bucket_list_pop(BucketList *list) //////////////////////////////// +#define XXH_STATIC_LINKING_ONLY +#include "third_party/xxHash/xxhash.c" +#include "third_party/xxHash/xxhash.h" + internal U64 hash_table_hasher(String8 string) { @@ -47,7 +51,7 @@ hash_table_purge(HashTable *ht) { // reset key count ht->count = 0; - + // concat buckets for (U64 ibucket = 0; ibucket < ht->cap; ++ibucket) { bucket_list_concat_in_place(&ht->free_buckets, &ht->buckets[ibucket]); @@ -339,20 +343,20 @@ internal U64Array remove_duplicates_u64_array(Arena *arena, U64Array arr) { Temp scratch = scratch_begin(&arena, 1); - + HashTable *ht = hash_table_init(scratch.arena, ((U64)(F64)arr.count * 0.5)); - + for (U64 i = 0; i < arr.count; ++i) { KeyValuePair *is_present = hash_table_search_u64(ht, arr.v[i]); if (!is_present) { hash_table_push_u64_raw(scratch.arena, ht, arr.v[i], 0); } } - + U64Array result = {0}; result.count = ht->count; result.v = keys_from_hash_table_u64(arena, ht); - + scratch_end(scratch); return result; } @@ -361,10 +365,10 @@ internal String8List remove_duplicates_str8_list(Arena *arena, String8List list) { Temp scratch = scratch_begin(&arena, 1); - + String8List result = {0}; HashTable *ht = hash_table_init(scratch.arena, list.node_count); - + for (String8Node *node = list.first; node != 0; node = node->next) { KeyValuePair *is_present = hash_table_search_string(ht, node->string); if (!is_present) { @@ -372,7 +376,7 @@ remove_duplicates_str8_list(Arena *arena, String8List list) str8_list_push(arena, &result, node->string); } } - + scratch_end(scratch); return result; } diff --git a/src/os/core/os_core.c b/src/os/core/os_core.c index 6f4b972c..c305b370 100644 --- a/src/os/core/os_core.c +++ b/src/os/core/os_core.c @@ -151,6 +151,27 @@ os_string_from_file_range(Arena *arena, OS_Handle file, Rng1U64 range) return result; } +internal String8 +os_file_read_cstring(Arena *arena, OS_Handle file, U64 off) +{ + Temp scratch = scratch_begin(&arena, 1); + String8List block_list = {0}; + for(U64 cursor = off, stride = 256;; cursor += stride) + { + U8 *raw_block = push_array_no_zero(scratch.arena, U8, stride); + U64 read_size = os_file_read(file, r1u64(cursor, cursor + stride), raw_block); + String8 block = str8_cstring_capped(raw_block, raw_block+read_size); + str8_list_push(scratch.arena, &block_list, block); + if(read_size != stride || (block.size+1 <= read_size && block.str[block.size] == 0)) + { + break; + } + } + String8 result = str8_list_join(arena, &block_list, 0); + scratch_end(scratch); + return result; +} + //////////////////////////////// //~ rjf: Process Launcher Helpers diff --git a/src/os/core/os_core.h b/src/os/core/os_core.h index b07f511c..922f793c 100644 --- a/src/os/core/os_core.h +++ b/src/os/core/os_core.h @@ -156,6 +156,7 @@ internal B32 os_append_data_to_file_path(String8 path, String8 data); internal OS_FileID os_id_from_file_path(String8 path); internal S64 os_file_id_compare(OS_FileID a, OS_FileID b); internal String8 os_string_from_file_range(Arena *arena, OS_Handle file, Rng1U64 range); +internal String8 os_file_read_cstring(Arena *arena, OS_Handle file, U64 off); //////////////////////////////// //~ rjf: Process Launcher Helpers diff --git a/src/pe/pe.c b/src/pe/pe.c index 9046ddb1..cf7cedfb 100644 --- a/src/pe/pe.c +++ b/src/pe/pe.c @@ -543,7 +543,7 @@ pe_bin_info_from_data(Arena *arena, String8 data) data_dir_count = ClampTop(reported_data_dir_count, data_dir_max); // rjf: convert PE directories to ranges - data_dir_franges = push_array(arena, Rng1U64, data_dir_count); + data_dir_franges = push_array(arena, Rng1U64, Max(data_dir_count, PE_DataDirectoryIndex_COUNT)); for(U32 dir_idx = 0; dir_idx < data_dir_count; dir_idx += 1) { U64 dir_offset = optional_range.min + reported_data_dir_offset + sizeof(PE_DataDirectory)*dir_idx; @@ -620,87 +620,66 @@ pe_bin_info_from_data(Arena *arena, String8 data) } internal PE_DebugInfoList -pe_parse_debug_directory(Arena *arena, String8 raw_image, String8 raw_debug_dir) +pe_debug_info_list_from_raw_debug_dir(Arena *arena, String8 raw_image, String8 raw_debug_dir) { PE_DebugInfoList result = {0}; - PE_DebugDirectory *debug_entry = str8_deserial_get_raw_ptr(raw_debug_dir, 0, sizeof(*debug_entry)); PE_DebugDirectory *debug_entry_opl = debug_entry + raw_debug_dir.size/sizeof(*debug_entry_opl); - for (PE_DebugDirectory *entry = debug_entry; entry < debug_entry_opl; ++entry) { - switch (entry->type) { - default: { - PE_DebugInfoNode *n = push_array(arena, PE_DebugInfoNode, 1); - n->v.header = *entry; - n->v.u.raw_data = str8_substr(raw_image, rng_1u64(entry->foff, entry->foff + entry->size)); - - SLLQueuePush(result.first, result.last, n); - ++result.count; - } break; - case PE_DebugDirectoryType_CODEVIEW: { - U32 cv_magic = 0; - str8_deserial_read_struct(raw_image, entry->foff, &cv_magic); - - switch (cv_magic) { - case PE_CODEVIEW_PDB20_MAGIC: { + for(PE_DebugDirectory *entry = debug_entry; entry < debug_entry_opl; entry += 1) + { + PE_DebugInfoNode *n = push_array(arena, PE_DebugInfoNode, 1); + SLLQueuePush(result.first, result.last, n); + result.count += 1; + n->v.header = *entry; + switch(entry->type) + { + default:{}break; + case PE_DebugDirectoryType_CODEVIEW: + { + str8_deserial_read_struct(raw_image, entry->foff, &n->v.cv_magic); + switch(n->v.cv_magic) + { + case PE_CODEVIEW_PDB20_MAGIC: + { PE_CvHeaderPDB20 cv = {0}; U64 cv_read_size = str8_deserial_read_struct(raw_image, entry->foff, &cv); - if (cv_read_size == sizeof(cv)) { + if(cv_read_size == sizeof(cv)) + { String8 path = {0}; str8_deserial_read_cstr(raw_image, entry->foff+sizeof(cv), &path); - - PE_DebugInfoNode *n = push_array(arena, PE_DebugInfoNode, 1); - n->v.header = *entry; - n->v.u.codeview.pdb20.header = cv; - n->v.u.codeview.pdb20.path = path; - - SLLQueuePush(result.first, result.last, n); - ++result.count; - } else { - Assert(!"unable to read PE_CvHeaderPDB20"); + n->v.cv_pdb20_header = cv; + n->v.path = path; } - } break; - case PE_CODEVIEW_PDB70_MAGIC: { + }break; + case PE_CODEVIEW_PDB70_MAGIC: + { PE_CvHeaderPDB70 cv = {0}; U64 cv_read_size = str8_deserial_read_struct(raw_image, entry->foff, &cv); - if (cv_read_size == sizeof(cv)) { + if(cv_read_size == sizeof(cv)) + { String8 path = {0}; str8_deserial_read_cstr(raw_image, entry->foff+sizeof(cv), &path); - - PE_DebugInfoNode *n = push_array(arena, PE_DebugInfoNode, 1); - n->v.header = *entry; - n->v.u.codeview.pdb70.header = cv; - n->v.u.codeview.pdb70.path = path; - - SLLQueuePush(result.first, result.last, n); - ++result.count; - } else { - Assert(!"unable to read PE_CvHeaderPDB70"); + n->v.cv_pdb70_header = cv; + n->v.path = path; } - } break; - case PE_CODEVIEW_RDI_MAGIC: { + }break; + case PE_CODEVIEW_RDI_MAGIC: + { PE_CvHeaderRDI cv = {0}; U64 cv_read_size = str8_deserial_read_struct(raw_image, entry->foff, &cv); - if (cv_read_size == sizeof(cv)) { + if(cv_read_size == sizeof(cv)) + { String8 path = {0}; str8_deserial_read_cstr(raw_image, entry->foff+sizeof(cv), &path); - - PE_DebugInfoNode *n = push_array(arena, PE_DebugInfoNode, 1); - n->v.header = *entry; - n->v.u.codeview.rdi.header = cv; - n->v.u.codeview.rdi.path = path; - - SLLQueuePush(result.first, result.last, n); - ++result.count; - } else { - Assert(!"unable to read PE_CvHeaderRDI"); + n->v.cv_rdi_header = cv; + n->v.path = path; } - } break; - default: break; + }break; + default:{}break; } - } break; + }break; } } - return result; } diff --git a/src/pe/pe.h b/src/pe/pe.h index cefd15a4..61f6c783 100644 --- a/src/pe/pe.h +++ b/src/pe/pe.h @@ -1012,30 +1012,13 @@ struct PE_BinInfo typedef struct PE_DebugInfo { PE_DebugDirectory header; - union - { - union - { - U32 magic; - struct - { - PE_CvHeaderPDB20 header; - String8 path; - } pdb20; - struct - { - PE_CvHeaderPDB70 header; - String8 path; - } pdb70; - struct - { - PE_CvHeaderRDI header; - String8 path; - } rdi; - } codeview; - String8 raw_data; - } u; -} PE_DebugInfo; + U32 cv_magic; + PE_CvHeaderPDB20 cv_pdb20_header; + PE_CvHeaderPDB70 cv_pdb70_header; + PE_CvHeaderRDI cv_rdi_header; + String8 path; +} +PE_DebugInfo; typedef struct PE_DebugInfoNode { @@ -1075,7 +1058,7 @@ internal String8 pe_string_from_dll_characteristics(Arena *arena, PE_DllCharacte internal B32 pe_check_magic(String8 data); internal PE_BinInfo pe_bin_info_from_data(Arena *arena, String8 data); -internal PE_DebugInfoList pe_parse_debug_directory(Arena *arena, String8 raw_image, String8 raw_debug_dir); +internal PE_DebugInfoList pe_debug_info_list_from_raw_debug_dir(Arena *arena, String8 raw_image, String8 raw_debug_dir); internal PE_ParsedStaticImportTable pe_static_imports_from_data(Arena *arena, B32 is_pe32, U64 section_count, COFF_SectionHeader *sections, String8 raw_data, Rng1U64 dir_file_range); internal PE_ParsedDelayImportTable pe_delay_imports_from_data(Arena *arena, B32 is_pe32, U64 section_count, COFF_SectionHeader *sections, String8 raw_data, Rng1U64 dir_file_range); internal PE_ParsedExportTable pe_exports_from_data(Arena *arena, U64 section_count, COFF_SectionHeader *sections, String8 raw_data, Rng1U64 dir_file_range, Rng1U64 dir_virt_range); diff --git a/src/radbin/generated/radbin.meta.c b/src/radbin/generated/radbin.meta.c index cb284113..11d620e7 100644 --- a/src/radbin/generated/radbin.meta.c +++ b/src/radbin/generated/radbin.meta.c @@ -4,7 +4,7 @@ //- GENERATED CODE C_LINKAGE_BEGIN -String8 rb_file_format_display_name_table[9] = +String8 rb_file_format_display_name_table[10] = { {0}, str8_lit_comp("PDB"), @@ -13,7 +13,8 @@ str8_lit_comp("COFF (OBJ)"), str8_lit_comp("COFF (Big OBJ)"), str8_lit_comp("COFF (Archive)"), str8_lit_comp("COFF (Thin Archive)"), -str8_lit_comp("ELF"), +str8_lit_comp("ELF32"), +str8_lit_comp("ELF64"), str8_lit_comp("RDI"), }; diff --git a/src/radbin/generated/radbin.meta.h b/src/radbin/generated/radbin.meta.h index 9ad6a7a1..8f8260de 100644 --- a/src/radbin/generated/radbin.meta.h +++ b/src/radbin/generated/radbin.meta.h @@ -15,13 +15,14 @@ RB_FileFormat_COFF_OBJ, RB_FileFormat_COFF_BigOBJ, RB_FileFormat_COFF_Archive, RB_FileFormat_COFF_ThinArchive, -RB_FileFormat_ELF, +RB_FileFormat_ELF32, +RB_FileFormat_ELF64, RB_FileFormat_RDI, RB_FileFormat_COUNT, } RB_FileFormat; C_LINKAGE_BEGIN -extern String8 rb_file_format_display_name_table[9]; +extern String8 rb_file_format_display_name_table[10]; C_LINKAGE_END diff --git a/src/radbin/radbin.c b/src/radbin/radbin.c index e7289af5..eed4141c 100644 --- a/src/radbin/radbin.c +++ b/src/radbin/radbin.c @@ -19,227 +19,318 @@ rb_entry_point(CmdLine *cmdline) log_scope_begin(); ////////////////////////////// - //- rjf: analyze command line input files + //- rjf: analyze & load command line input files // - typedef struct File File; - struct File + RB_FileList input_files = {0}; { - File *next; - RB_FileFormat format; - String8 path; - }; - File *first_input_file = 0; - File *last_input_file = 0; - for(String8Node *n = cmdline->inputs.first; n != 0; n = n->next) - { - OS_Handle file = os_file_open(OS_AccessFlag_Read, n->string); - RB_FileFormat file_format = RB_FileFormat_Null; - - //- rjf: PDB magic -> PDB input - if(file_format == RB_FileFormat_Null) + String8List input_file_path_tasks = str8_list_copy(arena, &cmdline->inputs); + for(String8Node *n = input_file_path_tasks.first; n != 0; n = n->next) { - U8 msf20_magic_maybe[sizeof(msf_msf20_magic)] = {0}; - os_file_read(file, r1u64(0, sizeof(msf20_magic_maybe)), msf20_magic_maybe); - if(MemoryMatch(msf20_magic_maybe, msf_msf20_magic, sizeof(msf20_magic_maybe))) + ////////////////////////// + //- rjf: do thin analysis of file + // + RB_FileFormat file_format = RB_FileFormat_Null; + RB_FileFormatFlags file_format_flags = 0; { - file_format = RB_FileFormat_PDB; - } - } - if(file_format == RB_FileFormat_Null) - { - U8 msf70_magic_maybe[sizeof(msf_msf70_magic)] = {0}; - os_file_read(file, r1u64(0, sizeof(msf70_magic_maybe)), msf70_magic_maybe); - if(MemoryMatch(msf70_magic_maybe, msf_msf70_magic, sizeof(msf70_magic_maybe))) - { - file_format = RB_FileFormat_PDB; - } - } - - //- rjf: PE magic -> PE input - if(file_format == RB_FileFormat_Null) - { - PE_DosHeader dos_header_maybe = {0}; - os_file_read_struct(file, 0, &dos_header_maybe); - if(dos_header_maybe.magic == PE_DOS_MAGIC) - { - U32 pe_magic_maybe = 0; - os_file_read_struct(file, dos_header_maybe.coff_file_offset, &pe_magic_maybe); - if(pe_magic_maybe == PE_MAGIC) + OS_Handle file = os_file_open(OS_AccessFlag_Read, n->string); + FileProperties props = os_properties_from_file(file); + + //- rjf: PDB magic -> PDB input + if(file_format == RB_FileFormat_Null) { - file_format = RB_FileFormat_PE; - } - } - } - - //- rjf: COFF archive magic -> COFF archive input - if(file_format == RB_FileFormat_Null) - { - U8 coff_archive_sig_maybe[sizeof(g_coff_archive_sig)] = {0}; - os_file_read(file, r1u64(0, sizeof(coff_archive_sig_maybe)), coff_archive_sig_maybe); - if(MemoryMatch(coff_archive_sig_maybe, g_coff_archive_sig, sizeof(g_coff_archive_sig))) - { - file_format = RB_FileFormat_COFF_Archive; - } - } - if(file_format == RB_FileFormat_Null) - { - U8 coff_thin_archive_sig_maybe[sizeof(g_coff_thin_archive_sig)] = {0}; - os_file_read(file, r1u64(0, sizeof(coff_thin_archive_sig_maybe)), coff_thin_archive_sig_maybe); - if(MemoryMatch(coff_thin_archive_sig_maybe, g_coff_thin_archive_sig, sizeof(g_coff_thin_archive_sig))) - { - file_format = RB_FileFormat_COFF_ThinArchive; - } - } - - //- rjf: COFF obj magic -> COFF obj input - if(file_format == RB_FileFormat_Null) - { - COFF_BigObjHeader header_maybe = {0}; - os_file_read_struct(file, 0, &header_maybe); - if(header_maybe.sig1 == COFF_MachineType_Unknown && - header_maybe.sig2 == max_U16 && - header_maybe.version >= 2 && - MemoryMatch(header_maybe.magic, g_coff_big_header_magic, sizeof(header_maybe.magic))) - { - file_format = RB_FileFormat_COFF_BigOBJ; - } - } - if(file_format == RB_FileFormat_Null) - { - Temp scratch = scratch_begin(&arena, 1); - COFF_FileHeader header_maybe = {0}; - os_file_read_struct(file, 0, &header_maybe); - U64 section_count = header_maybe.section_count; - U64 section_hdr_opl_off = sizeof(header_maybe) + section_count*sizeof(COFF_SectionHeader); - FileProperties props = os_properties_from_file(file); - - // rjf: check if machine type is valid - B32 machine_type_is_valid = 0; - switch(header_maybe.machine) - { - case COFF_MachineType_Unknown: - case COFF_MachineType_X86: case COFF_MachineType_X64: - case COFF_MachineType_Am33: case COFF_MachineType_Arm: - case COFF_MachineType_Arm64: case COFF_MachineType_ArmNt: - case COFF_MachineType_Ebc: case COFF_MachineType_Ia64: - case COFF_MachineType_M32R: case COFF_MachineType_Mips16: - case COFF_MachineType_MipsFpu:case COFF_MachineType_MipsFpu16: - case COFF_MachineType_PowerPc:case COFF_MachineType_PowerPcFp: - case COFF_MachineType_R4000: case COFF_MachineType_RiscV32: - case COFF_MachineType_RiscV64:case COFF_MachineType_RiscV128: - case COFF_MachineType_Sh3: case COFF_MachineType_Sh3Dsp: - case COFF_MachineType_Sh4: case COFF_MachineType_Sh5: - case COFF_MachineType_Thumb: case COFF_MachineType_WceMipsV2: - { - machine_type_is_valid = 1; - }break; - } - - // rjf: check if sections are valid - B32 sections_are_valid = 0; - if(machine_type_is_valid) - { - if(props.size >= section_hdr_opl_off) - { - COFF_SectionHeader *section_hdrs = push_array(scratch.arena, COFF_SectionHeader, section_count); - os_file_read(file, r1u64(sizeof(header_maybe), sizeof(header_maybe) + section_count*sizeof(COFF_SectionHeader)), section_hdrs); - B32 section_ranges_valid = 1; - for EachIndex(section_hdr_idx, section_count) + U8 msf20_magic_maybe[sizeof(msf_msf20_magic)] = {0}; + os_file_read(file, r1u64(0, sizeof(msf20_magic_maybe)), msf20_magic_maybe); + if(MemoryMatch(msf20_magic_maybe, msf_msf20_magic, sizeof(msf20_magic_maybe))) { - COFF_SectionHeader *hdr = §ion_hdrs[section_hdr_idx]; - if(!(hdr->flags & COFF_SectionFlag_CntUninitializedData)) + file_format = RB_FileFormat_PDB; + } + } + if(file_format == RB_FileFormat_Null) + { + U8 msf70_magic_maybe[sizeof(msf_msf70_magic)] = {0}; + os_file_read(file, r1u64(0, sizeof(msf70_magic_maybe)), msf70_magic_maybe); + if(MemoryMatch(msf70_magic_maybe, msf_msf70_magic, sizeof(msf70_magic_maybe))) + { + file_format = RB_FileFormat_PDB; + } + } + + //- rjf: PE magic -> PE input + if(file_format == RB_FileFormat_Null) + { + PE_DosHeader dos_header_maybe = {0}; + os_file_read_struct(file, 0, &dos_header_maybe); + if(dos_header_maybe.magic == PE_DOS_MAGIC) + { + U32 pe_magic_maybe = 0; + os_file_read_struct(file, dos_header_maybe.coff_file_offset, &pe_magic_maybe); + if(pe_magic_maybe == PE_MAGIC) { - U64 min = hdr->foff; - U64 max = min + hdr->fsize; - if(hdr->fsize > 0 && !(section_hdr_opl_off <= min && min <= max && max <= props.size)) - { - section_ranges_valid = 0; - break; - } + file_format = RB_FileFormat_PE; } } - sections_are_valid = section_ranges_valid; } - } - - // rjf: check if symbol table is valid - B32 symbol_table_is_valid = 0; - if(sections_are_valid) - { - U64 symbol_table_off = header_maybe.symbol_table_foff; - U64 symbol_table_size = sizeof(COFF_Symbol16)*header_maybe.symbol_count; - U64 symbol_table_opl_off = symbol_table_off+symbol_table_size; - if(symbol_table_off == 0 && symbol_table_size == 0) + + //- rjf: COFF archive magic -> COFF archive input + if(file_format == RB_FileFormat_Null) { - symbol_table_off = section_hdr_opl_off; - symbol_table_opl_off = section_hdr_opl_off; + U8 coff_archive_sig_maybe[sizeof(g_coff_archive_sig)] = {0}; + os_file_read(file, r1u64(0, sizeof(coff_archive_sig_maybe)), coff_archive_sig_maybe); + if(MemoryMatch(coff_archive_sig_maybe, g_coff_archive_sig, sizeof(g_coff_archive_sig))) + { + file_format = RB_FileFormat_COFF_Archive; + } } - symbol_table_is_valid = (section_hdr_opl_off <= symbol_table_off && - symbol_table_off <= symbol_table_opl_off && - symbol_table_opl_off <= props.size); + if(file_format == RB_FileFormat_Null) + { + U8 coff_thin_archive_sig_maybe[sizeof(g_coff_thin_archive_sig)] = {0}; + os_file_read(file, r1u64(0, sizeof(coff_thin_archive_sig_maybe)), coff_thin_archive_sig_maybe); + if(MemoryMatch(coff_thin_archive_sig_maybe, g_coff_thin_archive_sig, sizeof(g_coff_thin_archive_sig))) + { + file_format = RB_FileFormat_COFF_ThinArchive; + } + } + + //- rjf: COFF obj magic -> COFF obj input + if(file_format == RB_FileFormat_Null) + { + COFF_BigObjHeader header_maybe = {0}; + os_file_read_struct(file, 0, &header_maybe); + if(header_maybe.sig1 == COFF_MachineType_Unknown && + header_maybe.sig2 == max_U16 && + header_maybe.version >= 2 && + MemoryMatch(header_maybe.magic, g_coff_big_header_magic, sizeof(header_maybe.magic))) + { + file_format = RB_FileFormat_COFF_BigOBJ; + } + } + if(file_format == RB_FileFormat_Null) + { + Temp scratch = scratch_begin(&arena, 1); + COFF_FileHeader header_maybe = {0}; + os_file_read_struct(file, 0, &header_maybe); + U64 section_count = header_maybe.section_count; + U64 section_hdr_opl_off = sizeof(header_maybe) + section_count*sizeof(COFF_SectionHeader); + + // rjf: check if machine type is valid + B32 machine_type_is_valid = 0; + switch(header_maybe.machine) + { + case COFF_MachineType_Unknown: + case COFF_MachineType_X86: case COFF_MachineType_X64: + case COFF_MachineType_Am33: case COFF_MachineType_Arm: + case COFF_MachineType_Arm64: case COFF_MachineType_ArmNt: + case COFF_MachineType_Ebc: case COFF_MachineType_Ia64: + case COFF_MachineType_M32R: case COFF_MachineType_Mips16: + case COFF_MachineType_MipsFpu:case COFF_MachineType_MipsFpu16: + case COFF_MachineType_PowerPc:case COFF_MachineType_PowerPcFp: + case COFF_MachineType_R4000: case COFF_MachineType_RiscV32: + case COFF_MachineType_RiscV64:case COFF_MachineType_RiscV128: + case COFF_MachineType_Sh3: case COFF_MachineType_Sh3Dsp: + case COFF_MachineType_Sh4: case COFF_MachineType_Sh5: + case COFF_MachineType_Thumb: case COFF_MachineType_WceMipsV2: + { + machine_type_is_valid = 1; + }break; + } + + // rjf: check if sections are valid + B32 sections_are_valid = 0; + if(machine_type_is_valid) + { + if(props.size >= section_hdr_opl_off) + { + COFF_SectionHeader *section_hdrs = push_array(scratch.arena, COFF_SectionHeader, section_count); + os_file_read(file, r1u64(sizeof(header_maybe), sizeof(header_maybe) + section_count*sizeof(COFF_SectionHeader)), section_hdrs); + B32 section_ranges_valid = 1; + for EachIndex(section_hdr_idx, section_count) + { + COFF_SectionHeader *hdr = §ion_hdrs[section_hdr_idx]; + if(!(hdr->flags & COFF_SectionFlag_CntUninitializedData)) + { + U64 min = hdr->foff; + U64 max = min + hdr->fsize; + if(hdr->fsize > 0 && !(section_hdr_opl_off <= min && min <= max && max <= props.size)) + { + section_ranges_valid = 0; + break; + } + } + } + sections_are_valid = section_ranges_valid; + } + } + + // rjf: check if symbol table is valid + B32 symbol_table_is_valid = 0; + if(sections_are_valid) + { + U64 symbol_table_off = header_maybe.symbol_table_foff; + U64 symbol_table_size = sizeof(COFF_Symbol16)*header_maybe.symbol_count; + U64 symbol_table_opl_off = symbol_table_off+symbol_table_size; + if(symbol_table_off == 0 && symbol_table_size == 0) + { + symbol_table_off = section_hdr_opl_off; + symbol_table_opl_off = section_hdr_opl_off; + } + symbol_table_is_valid = (section_hdr_opl_off <= symbol_table_off && + symbol_table_off <= symbol_table_opl_off && + symbol_table_opl_off <= props.size); + } + + // rjf: symbol table is valid -> is COFF OBJ + if(symbol_table_is_valid) + { + file_format = RB_FileFormat_COFF_OBJ; + } + + scratch_end(scratch); + } + + //- rjf: ELF magic -> ELF input + if(file_format == RB_FileFormat_Null) + { + U8 identifier_maybe[ELF_Identifier_Max] = {0}; + os_file_read(file, r1u64(0, sizeof(identifier_maybe)), identifier_maybe); + B32 is_elf_magic = (identifier_maybe[ELF_Identifier_Mag0] == 0x7f && + identifier_maybe[ELF_Identifier_Mag1] == 'E' && + identifier_maybe[ELF_Identifier_Mag2] == 'L' && + identifier_maybe[ELF_Identifier_Mag3] == 'F'); + if(is_elf_magic) + { + file_format = ELF_HdrIs64Bit(identifier_maybe) ? RB_FileFormat_ELF64 : RB_FileFormat_ELF32; + } + } + + //- rjf: RDI magic -> RDI input + if(file_format == RB_FileFormat_Null) + { + RDI_Header rdi_header_maybe = {0}; + os_file_read_struct(file, 0, &rdi_header_maybe); + if(rdi_header_maybe.magic == RDI_MAGIC_CONSTANT) + { + file_format = RB_FileFormat_RDI; + } + } + + os_file_close(file); } - // rjf: symbol table is valid -> is COFF OBJ - if(symbol_table_is_valid) + ////////////////////////// + //- rjf: log file recognition + // + if(file_format != RB_FileFormat_Null) { - file_format = RB_FileFormat_COFF_OBJ; + log_infof("%S recognized as %S\n", n->string, rb_file_format_display_name_table[file_format]); + } + else + { + log_infof("%S was not recognized as a supported format.\n", n->string); } - scratch_end(scratch); - } - - //- rjf: ELF magic -> ELF input - if(file_format == RB_FileFormat_Null) - { - U8 identifier_maybe[ELF_Identifier_Max] = {0}; - os_file_read(file, r1u64(0, sizeof(identifier_maybe)), identifier_maybe); - B32 is_elf_magic = (identifier_maybe[ELF_Identifier_Mag0] == 0x7f && - identifier_maybe[ELF_Identifier_Mag1] == 'E' && - identifier_maybe[ELF_Identifier_Mag2] == 'L' && - identifier_maybe[ELF_Identifier_Mag3] == 'F'); - if(is_elf_magic) + ////////////////////////// + //- rjf: load recognized files + // + String8 file_data = {0}; + if(file_format != RB_FileFormat_Null) { - file_format = RB_FileFormat_ELF; + file_data = os_data_from_file_path(arena, n->string); + } + + ////////////////////////// + //- rjf: PE format => generate new implicit path tasks for PDBs + // + if(file_format == RB_FileFormat_PE) + { + Temp scratch = scratch_begin(&arena, 1); + PE_BinInfo pe_bin_info = pe_bin_info_from_data(scratch.arena, file_data); + String8 raw_debug_dir = str8_substr(file_data, pe_bin_info.data_dir_franges[PE_DataDirectoryIndex_DEBUG]); + PE_DebugInfoList debug_dir = pe_debug_info_list_from_raw_debug_dir(scratch.arena, file_data, raw_debug_dir); + for(PE_DebugInfoNode *n = debug_dir.first; n != 0; n = n->next) + { + if(n->v.path.size != 0) + { + str8_list_push(arena, &input_file_path_tasks, n->v.path); + } + } + scratch_end(scratch); + } + + ////////////////////////// + //- rjf: ELF format => generate new implicit path tasks for debug files + // + if(file_format == RB_FileFormat_ELF32 || + file_format == RB_FileFormat_ELF64) + { + ELF_BinInfo elf = elf_bin_from_data(file_data); + ELF_GnuDebugLink debug_link = {0}; + if(elf_parse_debug_link(file_data, &elf, &debug_link) && + debug_link.path.size != 0) + { + str8_list_push(arena, &input_file_path_tasks, debug_link.path); + } + } + + ////////////////////////// + //- rjf: PE => check if contains DWARF + // + if(file_format == RB_FileFormat_PE) + { + Temp scratch = scratch_begin(&arena, 1); + PE_BinInfo pe_bin_info = pe_bin_info_from_data(scratch.arena, file_data); + String8 raw_section_table = str8_substr(file_data, pe_bin_info.section_table_range); + String8 string_table = str8_substr(file_data, pe_bin_info.string_table_range); + U64 section_count = raw_section_table.size / sizeof(COFF_SectionHeader); + COFF_SectionHeader *section_table = (COFF_SectionHeader *)raw_section_table.str; + if(dw_is_dwarf_present_coff_section_table(file_data, string_table, section_count, section_table)) + { + file_format_flags |= RB_FileFormatFlag_HasDWARF; + } + scratch_end(scratch); + } + + ////////////////////////// + //- rjf: ELF => check if contains DWARF + // + if(file_format == RB_FileFormat_ELF32 || + file_format == RB_FileFormat_ELF64) + { + Temp scratch = scratch_begin(&arena, 1); + ELF_BinInfo elf_bin = elf_bin_from_data(file_data); + if(dw_is_dwarf_present_elf_section_table(file_data, &elf_bin)) + { + file_format_flags |= RB_FileFormatFlag_HasDWARF; + } + scratch_end(scratch); + } + + ////////////////////////// + //- rjf: push to list + // + { + RB_File *f = push_array(arena, RB_File, 1); + f->format = file_format; + f->format_flags = file_format_flags; + f->path = n->string; + f->data = file_data; + RB_FileNode *file_n = push_array(arena, RB_FileNode, 1); + file_n->v = f; + SLLQueuePush(input_files.first, input_files.last, file_n); + input_files.count += 1; } } - - //- rjf: RDI magic -> RDI input - if(file_format == RB_FileFormat_Null) - { - RDI_Header rdi_header_maybe = {0}; - os_file_read_struct(file, 0, &rdi_header_maybe); - if(rdi_header_maybe.magic == RDI_MAGIC_CONSTANT) - { - file_format = RB_FileFormat_RDI; - } - } - - //- rjf: log file recognition - if(file_format != RB_FileFormat_Null) - { - log_infof("%S recognized as %S\n", n->string, rb_file_format_display_name_table[file_format]); - } - else - { - log_infof("%S was not recognized as a supported format.\n", n->string); - } - - //- rjf: push to list - File *file_n = push_array(arena, File, 1); - file_n->path = n->string; - file_n->format = file_format; - SLLQueuePush(first_input_file, last_input_file, file_n); - - os_file_close(file); } ////////////////////////////// //- rjf: bucket input files by format // - String8List input_paths_from_format_table[RB_FileFormat_COUNT] = {0}; - for(File *f = first_input_file; f != 0; f = f->next) + RB_FileList input_files_from_format_table[RB_FileFormat_COUNT] = {0}; + for(RB_FileNode *n = input_files.first; n != 0; n = n->next) { - str8_list_push(arena, &input_paths_from_format_table[f->format], f->path); + RB_FileNode *file_n = push_array(arena, RB_FileNode, 1); + file_n->v = n->v; + SLLQueuePush(input_files_from_format_table[n->v->format].first, input_files_from_format_table[n->v->format].last, file_n); + input_files_from_format_table[n->v->format].count += 1; } ////////////////////////////// @@ -284,7 +375,7 @@ rb_entry_point(CmdLine *cmdline) } //- rjf: we can infer from the user-specified output path - else if(str8_match(str8_skip_last_dot(output_path), str8_lit("rdi"), StringMatchFlag_CaseInsensitive)) + if(str8_match(str8_skip_last_dot(output_path), str8_lit("rdi"), StringMatchFlag_CaseInsensitive)) { output_kind = OutputKind_RDI; log_infof("Output path has .rdi extension; performing `%S`\n", output_kind_info[output_kind].title); @@ -362,196 +453,278 @@ rb_entry_point(CmdLine *cmdline) }break; //////////////////////////// - //- rjf: RDI -> conversion based on inputs + //- rjf: RDI, Breakpad -> conversion based on inputs // case OutputKind_RDI: + case OutputKind_Breakpad: { + //- rjf: no inputs => help + if(cmdline->inputs.node_count == 0) switch(output_kind) + { + default: + case OutputKind_RDI: + { + fprintf(stderr, "The following arguments are accepted:\n\n"); + + fprintf(stderr, "--compress Compresses the RDI file's contents.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "--only: Specifies that only the named subsets of debug\n"); + fprintf(stderr, " information should be generated. See below for\n"); + fprintf(stderr, " a list of valid debug info subset names.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "--omit: Specifies that the named subsets of debug\n"); + fprintf(stderr, " information should not be generated. See below\n"); + fprintf(stderr, " for a list of valid debug info subset names.\n"); + fprintf(stderr, "\n"); + + fprintf(stderr, "RAD Debug Info Subsets:\n"); + fprintf(stderr, " - binary_sections Sections in the executable image\n"); + fprintf(stderr, " - units Compilation unit info\n"); + fprintf(stderr, " - procedures Procedure info\n"); + fprintf(stderr, " - global_variables Global variable info\n"); + fprintf(stderr, " - thread_variables Thread-local variable info\n"); + fprintf(stderr, " - scopes Scope info\n"); + fprintf(stderr, " - locals Local variable info\n"); + fprintf(stderr, " - types Type nodes\n"); + fprintf(stderr, " - udts User-defined-type (UDT) info\n"); + fprintf(stderr, " - line_info Source code line <-> virtual offset mapping\n"); + fprintf(stderr, " - global_variable_name_map The name -> global variable table\n"); + fprintf(stderr, " - thread_variable_name_map The name -> thread variable table\n"); + fprintf(stderr, " - procedure_name_map The name -> procedure table\n"); + fprintf(stderr, " - constant_name_map The name -> constant table\n"); + fprintf(stderr, " - type_name_map The name -> user-defined-type table\n"); + fprintf(stderr, " - link_name_procedure_name_map The link_name -> procedure table\n"); + fprintf(stderr, " - normal_source_path_name_map The path -> source file table\n"); + }break; + case OutputKind_Breakpad: + { + fprintf(stderr, "All input files specified on the command line will be dumped. The following\n"); + fprintf(stderr, "formats are currently supported: PE, COFF, RDI, and ELF\n\n"); + } + } + //- rjf: unpack subset flags RDIM_SubsetFlags subset_flags = 0xffffffff; + switch(output_kind) { - String8List only_names = cmd_line_strings(cmdline, str8_lit("only")); - if(only_names.node_count != 0) + case OutputKind_RDI: { - subset_flags = 0; - } - for(String8Node *n = only_names.first; n != 0; n = n->next) - { - if(0){} + String8List only_names = cmd_line_strings(cmdline, str8_lit("only")); + if(only_names.node_count != 0) + { + subset_flags = 0; + } + for(String8Node *n = only_names.first; n != 0; n = n->next) + { + if(0){} #define X(name, name_lower) else if(str8_match(n->string, str8_lit(#name_lower), 0)) { subset_flags |= RDIM_SubsetFlag_##name; } - RDIM_Subset_XList + RDIM_Subset_XList #undef X - } - String8List omit_names = cmd_line_strings(cmdline, str8_lit("omit")); - for(String8Node *n = omit_names.first; n != 0; n = n->next) - { - if(0){} + } + String8List omit_names = cmd_line_strings(cmdline, str8_lit("omit")); + for(String8Node *n = omit_names.first; n != 0; n = n->next) + { + if(0){} #define X(name, name_lower) else if(str8_match(n->string, str8_lit(#name_lower), 0)) { subset_flags &= ~RDIM_SubsetFlag_##name; } - RDIM_Subset_XList + RDIM_Subset_XList #undef X - } + } + }break; + case OutputKind_Breakpad: + { + subset_flags = RDIM_SubsetFlag_All & ~(RDIM_SubsetFlag_Types|RDIM_SubsetFlag_UDTs); + }break; } - //- rjf: no inputs => help - if(cmdline->inputs.node_count == 0) + //- rjf: convert inputs to RDI info + B32 convert_done = 0; + RDIM_BakeParams bake_params = {0}; { - fprintf(stderr, "The following arguments are accepted:\n\n"); - - fprintf(stderr, "--compress Compresses the RDI file's contents.\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "--only: Specifies that only the named subsets of debug\n"); - fprintf(stderr, " information should be generated. See below for\n"); - fprintf(stderr, " a list of valid debug info subset names.\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "--omit: Specifies that the named subsets of debug\n"); - fprintf(stderr, " information should not be generated. See below\n"); - fprintf(stderr, " for a list of valid debug info subset names.\n"); - fprintf(stderr, "\n"); - - fprintf(stderr, "RAD Debug Info Subsets:\n"); - fprintf(stderr, " - binary_sections Sections in the executable image\n"); - fprintf(stderr, " - units Compilation unit info\n"); - fprintf(stderr, " - procedures Procedure info\n"); - fprintf(stderr, " - global_variables Global variable info\n"); - fprintf(stderr, " - thread_variables Thread-local variable info\n"); - fprintf(stderr, " - scopes Scope info\n"); - fprintf(stderr, " - locals Local variable info\n"); - fprintf(stderr, " - types Type nodes\n"); - fprintf(stderr, " - udts User-defined-type (UDT) info\n"); - fprintf(stderr, " - line_info Source code line <-> virtual offset mapping\n"); - fprintf(stderr, " - global_variable_name_map The name -> global variable table\n"); - fprintf(stderr, " - thread_variable_name_map The name -> thread variable table\n"); - fprintf(stderr, " - procedure_name_map The name -> procedure table\n"); - fprintf(stderr, " - constant_name_map The name -> constant table\n"); - fprintf(stderr, " - type_name_map The name -> user-defined-type table\n"); - fprintf(stderr, " - link_name_procedure_name_map The link_name -> procedure table\n"); - fprintf(stderr, " - normal_source_path_name_map The path -> source file table\n"); - } - - //- rjf: PDB inputs => PDB -> RDI conversion - else if(input_paths_from_format_table[RB_FileFormat_PDB].node_count != 0) - { - log_infof("PDBs specified; producing RDI by converting PDB data\n"); - - // rjf: convert - P2R_ConvertParams convert_params = {0}; + //- rjf: PE inputs w/ DWARF, or ELF inputs => DWARF -> RDI conversion + if(!convert_done && + ((input_files_from_format_table[RB_FileFormat_PE].count != 0 && + input_files_from_format_table[RB_FileFormat_PE].first->v->format_flags & RB_FileFormatFlag_HasDWARF) || + (input_files_from_format_table[RB_FileFormat_ELF32].count != 0 || + input_files_from_format_table[RB_FileFormat_ELF64].count != 0))) { - convert_params.input_pdb_name = str8_list_first(&input_paths_from_format_table[RB_FileFormat_PDB]); - convert_params.input_exe_name = str8_list_first(&input_paths_from_format_table[RB_FileFormat_PE]); - convert_params.input_pdb_data = os_data_from_file_path(arena, convert_params.input_pdb_name); - convert_params.input_exe_data = os_data_from_file_path(arena, convert_params.input_exe_name); - convert_params.subset_flags = subset_flags; - convert_params.deterministic = cmd_line_has_flag(cmdline, str8_lit("deterministic")); - } - RDIM_BakeParams bake_params = {0}; - ProfScope("convert") bake_params = p2r_convert(arena, async_root, &convert_params); - - // rjf: bake - RDIM_BakeResults bake_results = {0}; - ProfScope("bake") bake_results = rdim_bake(arena, async_root, &bake_params); - - // rjf: serialize - RDIM_SerializedSectionBundle serialized_section_bundle = {0}; - ProfScope("serialize") serialized_section_bundle = rdim_serialized_section_bundle_from_bake_results(&bake_results); - - // rjf: compress - RDIM_SerializedSectionBundle serialized_section_bundle__compressed = serialized_section_bundle; - if(cmd_line_has_flag(cmdline, str8_lit("compress"))) ProfScope("compress") - { - serialized_section_bundle__compressed = rdim_compress(arena, &serialized_section_bundle); + convert_done = 1; + log_infof("PEs w/ DWARF, or ELFs specified; producing RDI by converting DWARF data\n"); + + // rjf: convert + D2R_ConvertParams convert_params = {0}; + { + B32 got_exe = 0; + B32 got_dbg = 0; + if(!got_exe && !got_dbg) + { + for(RB_FileNode *n = input_files_from_format_table[RB_FileFormat_PE].first; n != 0; n = n->next) + { + if(n->v->format_flags & RB_FileFormatFlag_HasDWARF) + { + got_exe = 1; + got_dbg = 1; + convert_params.dbg_name = n->v->path; + convert_params.dbg_data = n->v->data; + convert_params.exe_name = n->v->path; + convert_params.exe_data = n->v->data; + convert_params.exe_kind = ExecutableImageKind_CoffPe; + break; + } + } + } + if(!got_exe) + { + for(RB_FileNode *n = input_files_from_format_table[RB_FileFormat_ELF32].first; n != 0; n = n->next) + { + got_exe = 1; + convert_params.exe_name = n->v->path; + convert_params.exe_data = n->v->data; + convert_params.exe_kind = ExecutableImageKind_Elf32; + if(!(n->v->format_flags & RB_FileFormatFlag_HasDWARF)) + { + break; + } + } + } + if(!got_exe) + { + for(RB_FileNode *n = input_files_from_format_table[RB_FileFormat_ELF64].first; n != 0; n = n->next) + { + got_exe = 1; + convert_params.exe_name = n->v->path; + convert_params.exe_data = n->v->data; + convert_params.exe_kind = ExecutableImageKind_Elf64; + if(!(n->v->format_flags & RB_FileFormatFlag_HasDWARF)) + { + break; + } + } + } + if(!got_dbg) + { + for(RB_FileNode *n = input_files_from_format_table[RB_FileFormat_ELF32].first; n != 0; n = n->next) + { + if(n->v->format_flags & RB_FileFormatFlag_HasDWARF) + { + got_dbg = 1; + convert_params.dbg_name = n->v->path; + convert_params.dbg_data = n->v->data; + break; + } + } + } + if(!got_dbg) + { + for(RB_FileNode *n = input_files_from_format_table[RB_FileFormat_ELF64].first; n != 0; n = n->next) + { + if(n->v->format_flags & RB_FileFormatFlag_HasDWARF) + { + got_dbg = 1; + convert_params.dbg_name = n->v->path; + convert_params.dbg_data = n->v->data; + break; + } + } + } + convert_params.subset_flags = subset_flags; + convert_params.deterministic = cmd_line_has_flag(cmdline, str8_lit("deterministic")); + } + ProfScope("convert") bake_params = d2r_convert(arena, async_root, &convert_params); + + // rjf: no output path? -> pick one based on debug + if(output_path.size == 0) + { + output_path = push_str8f(arena, "%S.rdi", str8_chop_last_dot(convert_params.dbg_name)); + } } - // rjf: serialize - String8List blobs = rdim_file_blobs_from_section_bundle(arena, &serialized_section_bundle__compressed); - str8_list_concat_in_place(&output_blobs, &blobs); - - // rjf: no output path? -> pick one based on PDB - if(output_path.size == 0) + //- rjf: PDB inputs => PDB -> RDI conversion + if(!convert_done && + input_files_from_format_table[RB_FileFormat_PDB].count != 0) { - output_path = push_str8f(arena, "%S.rdi", str8_chop_last_dot(convert_params.input_pdb_name)); + convert_done = 1; + log_infof("PDBs specified; producing RDI by converting PDB data\n"); + + // rjf: get EXE/PDB file data + RB_File *exe_file = rb_file_list_first(&input_files_from_format_table[RB_FileFormat_PE]); + RB_File *pdb_file = rb_file_list_first(&input_files_from_format_table[RB_FileFormat_PDB]); + String8 exe_path = exe_file->path; + String8 pdb_path = pdb_file->path; + String8 exe_data = os_data_from_file_path(arena, exe_path); + String8 pdb_data = os_data_from_file_path(arena, pdb_path); + + // rjf: convert + P2R_ConvertParams convert_params = {0}; + { + convert_params.input_pdb_name = pdb_path; + convert_params.input_exe_name = exe_path; + convert_params.input_pdb_data = pdb_data; + convert_params.input_exe_data = exe_data; + convert_params.subset_flags = subset_flags; + convert_params.deterministic = cmd_line_has_flag(cmdline, str8_lit("deterministic")); + } + ProfScope("convert") bake_params = p2r_convert(arena, async_root, &convert_params); + + // rjf: no output path? -> pick one based on PDB + if(output_path.size == 0) + { + output_path = push_str8f(arena, "%S.rdi", str8_chop_last_dot(convert_params.input_pdb_name)); + } } } //- rjf: no viable input paths - else + if(!convert_done && cmdline->inputs.node_count != 0) { - log_user_errorf("Could not create an RDI file from the specified inputs. You must provide either a valid PDB file or an executable image (PE, ELF) file with DWARF information."); - } - }break; - - //////////////////////////// - //- rjf: dump -> textual dump of inputs - // - case OutputKind_Dump: - { - //- rjf: no inputs => help - if(cmdline->inputs.node_count == 0) - { - fprintf(stderr, "All input files specified on the command line will be dumped. The following\n"); - fprintf(stderr, "formats are currently supported: PE, COFF, RDI, and ELF\n\n"); + log_user_errorf("Could not load debug info from the specified inputs. You must provide either a valid PDB file or an executable image (PE, ELF) file with DWARF debug info."); } - //- rjf: dump input files in order - String8List dump = {0}; - for(File *f = first_input_file; f != 0; f = f->next) + //- rjf: convert done => generate output + if(convert_done) switch(output_kind) { - - } - - //- rjf: join with output - str8_list_concat_in_place(&output_blobs, &dump); - }break; - - //////////////////////////// - //- rjf: breakpad -> conversion based on inputs - // - case OutputKind_Breakpad: - { - //- rjf: no inputs => help - if(cmdline->inputs.node_count == 0) - { - fprintf(stderr, "Pass a path to a PDB file, and optionally its associated PE file, for which\n"); - fprintf(stderr, "a Breakpad file should be generated.\n"); - } - - //- rjf: PDB inputs => PDB -> Breakpad conversion - else if(input_paths_from_format_table[RB_FileFormat_PDB].node_count != 0) - { - log_infof("PDBs specified; producing Breakpad by converting PDB data\n"); - - // rjf: convert - P2R_ConvertParams convert_params = {0}; + //- rjf: generate RDI blobs + case OutputKind_RDI: { - convert_params.input_pdb_name = str8_list_first(&input_paths_from_format_table[RB_FileFormat_PDB]); - convert_params.input_exe_name = str8_list_first(&input_paths_from_format_table[RB_FileFormat_PE]); - convert_params.input_pdb_data = os_data_from_file_path(arena, convert_params.input_pdb_name); - convert_params.input_exe_data = os_data_from_file_path(arena, convert_params.input_exe_name); - convert_params.subset_flags = RDIM_SubsetFlag_All & ~(RDIM_SubsetFlag_Types|RDIM_SubsetFlag_UDTs); - convert_params.deterministic = cmd_line_has_flag(cmdline, str8_lit("deterministic")); - } - RDIM_BakeParams bake_params = {0}; - ProfScope("convert") bake_params = p2r_convert(arena, async_root, &convert_params); + // rjf: bake + RDIM_BakeResults bake_results = {0}; + ProfScope("bake") bake_results = rdim_bake(arena, async_root, &bake_params); + + // rjf: serialize + RDIM_SerializedSectionBundle serialized_section_bundle = {0}; + ProfScope("serialize") serialized_section_bundle = rdim_serialized_section_bundle_from_bake_results(&bake_results); + + // rjf: compress + RDIM_SerializedSectionBundle serialized_section_bundle__compressed = serialized_section_bundle; + if(cmd_line_has_flag(cmdline, str8_lit("compress"))) ProfScope("compress") + { + serialized_section_bundle__compressed = rdim_compress(arena, &serialized_section_bundle); + } + + // rjf: serialize + String8List blobs = rdim_file_blobs_from_section_bundle(arena, &serialized_section_bundle__compressed); + str8_list_concat_in_place(&output_blobs, &blobs); + }break; - //- rjf: produce breakpad text - String8List dump = {0}; - ProfScope("dump breakpad text") + //- rjf: generate breakpad text + case OutputKind_Breakpad: { p2b_async_root = async_root; - RDIM_BakeParams *params = &bake_params; + String8List dump = {0}; //- rjf: kick off unit vmap baking - P2B_BakeUnitVMapIn bake_unit_vmap_in = {¶ms->units}; + P2B_BakeUnitVMapIn bake_unit_vmap_in = {&bake_params.units}; ASYNC_Task *bake_unit_vmap_task = async_task_launch(arena, p2b_bake_unit_vmap_work, .input = &bake_unit_vmap_in); //- rjf: kick off line-table baking - P2B_BakeLineTablesIn bake_line_tables_in = {¶ms->line_tables}; + P2B_BakeLineTablesIn bake_line_tables_in = {&bake_params.line_tables}; ASYNC_Task *bake_line_tables_task = async_task_launch(arena, p2b_bake_line_table_work, .input = &bake_line_tables_in); //- rjf: build unit -> line table idx array - U64 unit_count = params->units.total_count; + U64 unit_count = bake_params.units.total_count; U32 *unit_line_table_idxs = push_array(arena, U32, unit_count+1); { U64 dst_idx = 1; - for(RDIM_UnitChunkNode *n = params->units.first; n != 0; n = n->next) + for(RDIM_UnitChunkNode *n = bake_params.units.first; n != 0; n = n->next) { for(U64 n_idx = 0; n_idx < n->count; n_idx += 1, dst_idx += 1) { @@ -561,12 +734,12 @@ rb_entry_point(CmdLine *cmdline) } //- rjf: dump MODULE record - str8_list_pushf(arena, &dump, "MODULE windows x86_64 %I64x %S\n", params->top_level_info.exe_hash, params->top_level_info.exe_name); + str8_list_pushf(arena, &dump, "MODULE windows x86_64 %I64x %S\n", bake_params.top_level_info.exe_hash, bake_params.top_level_info.exe_name); //- rjf: dump FILE records ProfScope("dump FILE records") { - for(RDIM_SrcFileChunkNode *n = params->src_files.first; n != 0; n = n->next) + for(RDIM_SrcFileChunkNode *n = bake_params.src_files.first; n != 0; n = n->next) { for(U64 idx = 0; idx < n->count; idx += 1) { @@ -590,12 +763,12 @@ rb_entry_point(CmdLine *cmdline) ProfEnd(); //- rjf: kick off FUNC & line record dump tasks - P2B_DumpProcChunkIn *dump_proc_chunk_in = push_array(arena, P2B_DumpProcChunkIn, params->procedures.chunk_count); - ASYNC_Task **dump_proc_chunk_tasks = push_array(arena, ASYNC_Task *, params->procedures.chunk_count); + P2B_DumpProcChunkIn *dump_proc_chunk_in = push_array(arena, P2B_DumpProcChunkIn, bake_params.procedures.chunk_count); + ASYNC_Task **dump_proc_chunk_tasks = push_array(arena, ASYNC_Task *, bake_params.procedures.chunk_count); ProfScope("kick off FUNC & line record dump tasks") { U64 task_idx = 0; - for(RDIM_SymbolChunkNode *n = params->procedures.first; n != 0; n = n->next, task_idx += 1) + for(RDIM_SymbolChunkNode *n = bake_params.procedures.first; n != 0; n = n->next, task_idx += 1) { dump_proc_chunk_in[task_idx].unit_vmap = unit_vmap; dump_proc_chunk_in[task_idx].unit_vmap_count = unit_vmap_count; @@ -610,30 +783,69 @@ rb_entry_point(CmdLine *cmdline) //- rjf: join FUNC & line record dump tasks ProfScope("join FUNC & line record dump tasks") { - for(U64 idx = 0; idx < params->procedures.chunk_count; idx += 1) + for(U64 idx = 0; idx < bake_params.procedures.chunk_count; idx += 1) { String8List *out = async_task_join_struct(dump_proc_chunk_tasks[idx], String8List); str8_list_concat_in_place(&dump, out); } } - } - - //- rjf: join with out - str8_list_concat_in_place(&output_blobs, &dump); - - // rjf: no output path? -> pick one based on PDB - if(output_path.size == 0) - { - output_path = push_str8f(arena, "%S.psyms", str8_chop_last_dot(convert_params.input_pdb_name)); - } - } - - //- rjf: no viable input paths - else - { - log_user_errorf("Could not create a Breakpad file from the specified inputs. You must provide either a valid PDB file or an executable image (PE, ELF) file with DWARF information."); + + str8_list_concat_in_place(&output_blobs, &dump); + }break; } }break; + + //////////////////////////// + //- rjf: dump -> textual dump of inputs + // + case OutputKind_Dump: + { + //- rjf: no inputs => help + if(cmdline->inputs.node_count == 0) + { + fprintf(stderr, "All input files specified on the command line will be dumped. The following\n"); + fprintf(stderr, "formats are currently supported: PE, COFF, RDI, and ELF\n\n"); + } + + //- rjf: dump input files in order + String8List dump = {0}; + for(RB_FileNode *n = input_files.first; n != 0; n = n->next) + { + // TODO(rjf) + } + + //- rjf: join with output + str8_list_concat_in_place(&output_blobs, &dump); + }break; + } + + ////////////////////////////// + //- rjf: write outputs + // + if(output_path.size != 0) ProfScope("write outputs [file]") + { + OS_Handle output_file = os_file_open(OS_AccessFlag_Read|OS_AccessFlag_Write, output_path); + U64 off = 0; + for(String8Node *n = output_blobs.first; n != 0; n = n->next) + { + os_file_write(output_file, r1u64(off, off+n->string.size), n->string.str); + off += n->string.size; + } + os_file_close(output_file); + log_infof("Results written to %S", output_path); + } + else ProfScope("write outputs [stdout]") + { + for(String8Node *n = output_blobs.first; n != 0; n = n->next) + { + for(U64 off = 0; off < n->string.size;) + { + U64 size_to_write = Min(n->string.size - off, GB(2)); + fwrite(n->string.str + off, size_to_write, 1, stdout); + off += size_to_write; + } + } + log_info(str8_lit("Results written to stdout")); } ////////////////////////////// @@ -656,31 +868,4 @@ rb_entry_point(CmdLine *cmdline) fprintf(stderr, "%.*s\n", str8_varg(n->string)); } } - - ////////////////////////////// - //- rjf: write outputs - // - if(output_path.size != 0) ProfScope("write outputs [file]") - { - OS_Handle output_file = os_file_open(OS_AccessFlag_Read|OS_AccessFlag_Write, output_path); - U64 off = 0; - for(String8Node *n = output_blobs.first; n != 0; n = n->next) - { - os_file_write(output_file, r1u64(off, off+n->string.size), n->string.str); - off += n->string.size; - } - os_file_close(output_file); - } - else ProfScope("write outputs [stdout]") - { - for(String8Node *n = output_blobs.first; n != 0; n = n->next) - { - for(U64 off = 0; off < n->string.size;) - { - U64 size_to_write = Min(n->string.size - off, GB(2)); - fwrite(n->string.str + off, size_to_write, 1, stdout); - off += size_to_write; - } - } - } } diff --git a/src/radbin/radbin.h b/src/radbin/radbin.h index 86c8e064..fc98e130 100644 --- a/src/radbin/radbin.h +++ b/src/radbin/radbin.h @@ -9,6 +9,42 @@ #include "radbin/generated/radbin.meta.h" +//////////////////////////////// +//~ rjf: File Types + +typedef U32 RB_FileFormatFlags; +enum +{ + RB_FileFormatFlag_HasDWARF = (1<<0), +}; + +typedef struct RB_File RB_File; +struct RB_File +{ + RB_FileFormat format; + RB_FileFormatFlags format_flags; + String8 path; + String8 data; +}; + +typedef struct RB_FileNode RB_FileNode; +struct RB_FileNode +{ + RB_FileNode *next; + RB_File *v; +}; + +typedef struct RB_FileList RB_FileList; +struct RB_FileList +{ + RB_FileNode *first; + RB_FileNode *last; + U64 count; +}; + +read_only global RB_File rb_file_nil = {0}; +#define rb_file_list_first(list) ((list)->first ? (list)->first->v : &rb_file_nil) + //////////////////////////////// //~ rjf: Top-Level Entry Point diff --git a/src/radbin/radbin.mdesk b/src/radbin/radbin.mdesk index 7678420d..a729b554 100644 --- a/src/radbin/radbin.mdesk +++ b/src/radbin/radbin.mdesk @@ -10,7 +10,8 @@ RB_FileFormatTable: { COFF_BigOBJ "COFF (Big OBJ)" } { COFF_Archive "COFF (Archive)" } { COFF_ThinArchive "COFF (Thin Archive)" } - { ELF "ELF" } + { ELF32 "ELF32" } + { ELF64 "ELF64" } { RDI "RDI" } } diff --git a/src/radbin/radbin_main.c b/src/radbin/radbin_main.c index c5a95fbb..776931e8 100644 --- a/src/radbin/radbin_main.c +++ b/src/radbin/radbin_main.c @@ -16,6 +16,7 @@ //- rjf: [h] #include "base/base_inc.h" +#include "linker/hash_table.h" #include "os/os_inc.h" #include "path/path.h" #include "async/async.h" @@ -28,17 +29,25 @@ #include "elf/elf_parse.h" #include "codeview/codeview.h" #include "codeview/codeview_parse.h" +#include "dwarf/dwarf.h" +#include "dwarf/dwarf_parse.h" +#include "dwarf/dwarf_coff.h" +#include "dwarf/dwarf_elf.h" #include "msf/msf.h" #include "msf/msf_parse.h" #include "pdb/pdb.h" #include "pdb/pdb_parse.h" #include "pdb/pdb_stringize.h" +#include "rdi_from_coff/rdi_from_coff.h" +#include "rdi_from_elf/rdi_from_elf.h" #include "rdi_from_pdb/rdi_from_pdb.h" #include "rdi_breakpad_from_pdb/rdi_breakpad_from_pdb.h" +#include "rdi_from_dwarf/rdi_from_dwarf.h" #include "radbin/radbin.h" //- rjf: [c] #include "base/base_inc.c" +#include "linker/hash_table.c" #include "os/os_inc.c" #include "path/path.c" #include "async/async.c" @@ -51,13 +60,20 @@ #include "elf/elf_parse.c" #include "codeview/codeview.c" #include "codeview/codeview_parse.c" +#include "dwarf/dwarf.c" +#include "dwarf/dwarf_parse.c" +#include "dwarf/dwarf_coff.c" +#include "dwarf/dwarf_elf.c" #include "msf/msf.c" #include "msf/msf_parse.c" #include "pdb/pdb.c" #include "pdb/pdb_parse.c" #include "pdb/pdb_stringize.c" +#include "rdi_from_coff/rdi_from_coff.c" +#include "rdi_from_elf/rdi_from_elf.c" #include "rdi_from_pdb/rdi_from_pdb.c" #include "rdi_breakpad_from_pdb/rdi_breakpad_from_pdb.c" +#include "rdi_from_dwarf/rdi_from_dwarf.c" #include "radbin/radbin.c" //////////////////////////////// diff --git a/src/radcon/radcon.c b/src/radcon/radcon.c index c66e40ea..55593b54 100644 --- a/src/radcon/radcon.c +++ b/src/radcon/radcon.c @@ -13,12 +13,12 @@ internal RC_Context rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) { Temp scratch = scratch_begin(&arena, 1); - + if (cmdl->inputs.node_count > 2) { fprintf(stderr, "error: too many input files on the command line.\n"); os_abort(1); } - + B32 is_pe_present = 0; B32 is_pdb_present = 0; B32 is_elf_present = 0; @@ -31,7 +31,7 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) String8 elf_data = {0}; String8 elf_debug_name = {0}; String8 elf_debug_data = {0}; - + // // Set typed inputs // @@ -71,7 +71,7 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) } is_elf_debug_present = 1; } - + // // Pick conversion driver // @@ -87,18 +87,18 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) os_abort(1); } } - + // // Load inputs // for (String8Node *input_n = cmdl->inputs.first; input_n != 0; input_n = input_n->next) { String8 input_data = os_data_from_file_path(arena, input_n->string); - + if (input_data.size == 0) { fprintf(stderr, "unable to read input %.*s\n", str8_varg(input_n->string)); os_abort(1); } - + if (pe_check_magic(input_data)) { if (is_pe_present) { fprintf(stderr, "error: too many PE files are specified on the command line\n"); @@ -139,7 +139,7 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) fprintf(stderr, "error: unknown file format %.*s\n", str8_varg(input_n->string)); } } - + // // Validate input combos // @@ -159,54 +159,54 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) } os_abort(1); } - + if (is_pe_present && (is_elf_present || is_elf_debug_present)) { fprintf(stderr, "error: command line has too many image types specified.\n"); os_abort(1); } - - - ImageType image = Image_Null; + + + ExecutableImageKind image = ExecutableImageKind_Null; String8 image_name = {0}; String8 image_data = {0}; String8 debug_name = {0}; String8 debug_data = {0}; - + B32 check_guid = 0; Guid pe_pdb_guid = {0}; - + B32 elf_has_debug_link = 0; ELF_GnuDebugLink debug_link = {0}; - + // // Input has PE/COFF // if (is_pe_present) { - image = Image_CoffPe; + image = ExecutableImageKind_CoffPe; image_name = pe_name; image_data = pe_data; - + PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, pe_data); String8 raw_debug_dir = str8_substr(pe_data, pe.data_dir_franges[PE_DataDirectoryIndex_DEBUG]); - PE_DebugInfoList debug_dir = pe_parse_debug_directory(scratch.arena, pe_data, raw_debug_dir); + PE_DebugInfoList debug_dir = pe_debug_info_list_from_raw_debug_dir(scratch.arena, pe_data, raw_debug_dir); for (PE_DebugInfoNode *debug_n = debug_dir.first; debug_n != 0; debug_n = debug_n->next) { PE_DebugInfo *debug = &debug_n->v; if (debug->header.type == PE_DebugDirectoryType_CODEVIEW) { if (debug->u.codeview.magic == PE_CODEVIEW_PDB70_MAGIC) { check_guid = 1; pe_pdb_guid = debug->u.codeview.pdb70.header.guid; - + if (!is_pdb_present) { pdb_name = debug->u.codeview.pdb70.path; pdb_data = rc_data_from_file_path(arena, pdb_name); is_pdb_present = 1; } - + break; } } } - + if (driver == RC_Driver_Null || driver == RC_Driver_Dwarf) { PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, pe_data); String8 raw_section_table = str8_substr(pe_data, pe.section_table_range); @@ -224,19 +224,19 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) } } } - + if (is_elf_present || is_elf_debug_present) { if (driver != RC_Driver_Null && driver != RC_Driver_Dwarf) { fprintf(stderr, "error: ELF inputs are only supported when using DWARF driver.\n"); os_abort(1); } - + // // Load image ELF // ELF_BinInfo elf = elf_bin_from_data(elf_data); B32 has_elf_dwarf = dw_is_dwarf_present_elf_section_table(elf_data, &elf); - + // // ELF doesn't have debug info and no .debug was specified on command line, // try to load .debug via debug link @@ -248,54 +248,54 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) elf_debug_data = rc_data_from_file_path(arena, debug_link.path); is_elf_debug_present = 1; } - + // // Load .debug ELF // ELF_BinInfo elf_debug = elf_bin_from_data(elf_debug_data); B32 has_elf_debug_dwarf = dw_is_dwarf_present_elf_section_table(elf_debug_data, &elf_debug); - + // // Input is image ELF and .debug ELF // B32 is_split_elf = is_elf_present && is_elf_debug_present && !has_elf_dwarf && has_elf_debug_dwarf; if (is_split_elf) { driver = RC_Driver_Dwarf; - image = ELF_HdrIs64Bit(elf.hdr.e_ident) ? Image_Elf64 : Image_Elf32; + image = ELF_HdrIs64Bit(elf.hdr.e_ident) ? ExecutableImageKind_Elf64 : ExecutableImageKind_Elf32; image_name = elf_name; image_data = elf_data; debug_name = elf_debug_name; debug_data = elf_debug_data; goto driver_found; } - + // // Input ELF is image with debug info // B32 is_monolithic_elf = is_elf_present && !is_elf_debug_present && has_elf_dwarf; if (is_monolithic_elf) { driver = RC_Driver_Dwarf; - image = ELF_HdrIs64Bit(elf.hdr.e_ident) ? Image_Elf64 : Image_Elf32; + image = ELF_HdrIs64Bit(elf.hdr.e_ident) ? ExecutableImageKind_Elf64 : ExecutableImageKind_Elf32; image_name = elf_name; image_data = elf_data; debug_name = elf_name; debug_data = elf_data; goto driver_found; } - + // // Input ELF is .debug // B32 is_debug_elf = !is_elf_present && is_elf_debug_present && has_elf_debug_dwarf; if (is_debug_elf) { driver = RC_Driver_Dwarf; - image = ELF_HdrIs64Bit(elf_debug.hdr.e_ident) ? Image_Elf64 : Image_Elf32; + image = ELF_HdrIs64Bit(elf_debug.hdr.e_ident) ? ExecutableImageKind_Elf64 : ExecutableImageKind_Elf32; debug_name = elf_debug_name; debug_data = elf_debug_data; goto driver_found; } } - + // // Input is PDB // @@ -312,9 +312,9 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) InvalidPath; } } - + driver_found:; - + // // Handle -out param // @@ -332,8 +332,8 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) out_name = path_replace_file_extension(arena, debug_name, str8_lit("rdi")); } } - - + + // // Validate driver input // @@ -342,8 +342,8 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) fprintf(stderr, "error: DWARF is an invalid input for PDB driver\n"); os_abort(1); } - - + + RC_Context ctx = {0}; ctx.driver = driver; ctx.image = image; @@ -352,23 +352,23 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) ctx.debug_name = debug_name; ctx.debug_data = debug_data; ctx.flags = RC_Flag_Strings| - RC_Flag_IndexRuns| - RC_Flag_BinarySections| - RC_Flag_Units| - RC_Flag_Procedures| - RC_Flag_GlobalVariables| - RC_Flag_ThreadVariables| - RC_Flag_Scopes| - RC_Flag_Locals| - RC_Flag_Types| - RC_Flag_UDTs| - RC_Flag_LineInfo| - RC_Flag_GlobalVariableNameMap| - RC_Flag_ThreadVariableNameMap| - RC_Flag_ProcedureNameMap| - RC_Flag_TypeNameMap| - RC_Flag_LinkNameProcedureNameMap| - RC_Flag_NormalSourcePathNameMap; + RC_Flag_IndexRuns| + RC_Flag_BinarySections| + RC_Flag_Units| + RC_Flag_Procedures| + RC_Flag_GlobalVariables| + RC_Flag_ThreadVariables| + RC_Flag_Scopes| + RC_Flag_Locals| + RC_Flag_Types| + RC_Flag_UDTs| + RC_Flag_LineInfo| + RC_Flag_GlobalVariableNameMap| + RC_Flag_ThreadVariableNameMap| + RC_Flag_ProcedureNameMap| + RC_Flag_TypeNameMap| + RC_Flag_LinkNameProcedureNameMap| + RC_Flag_NormalSourcePathNameMap; if (check_guid) { ctx.flags |= RC_Flag_CheckPdbGuid; ctx.guid = pe_pdb_guid; @@ -378,7 +378,7 @@ rc_context_from_cmd_line(Arena *arena, CmdLine *cmdl) ctx.debug_link = debug_link; } ctx.out_name = out_name; - + scratch_end(scratch); return ctx; } @@ -387,40 +387,40 @@ internal String8List rc_run(Arena *arena, RC_Context *rc) { Temp scratch = scratch_begin(&arena, 1); - + ProfBegin("Convert"); RDIM_LocalState *local_state = rdim_local_init(); RDIM_BakeParams *convert2bake = 0; switch (rc->driver) { - case RC_Driver_Null: break; - case RC_Driver_Dwarf: convert2bake = d2r_convert(scratch.arena, local_state, rc); break; - case RC_Driver_Pdb: convert2bake = p2r_convert(scratch.arena, local_state, rc); break; + case RC_Driver_Null: break; + case RC_Driver_Dwarf: convert2bake = d2r_convert(scratch.arena, local_state, rc); break; + case RC_Driver_Pdb: convert2bake = p2r_convert(scratch.arena, local_state, rc); break; } ProfEnd(); - + if (rc->errors.node_count) { NotImplemented; } - + ProfBegin("Bake"); RDIM_BakeResults bake2srlz = rdim_bake(local_state, convert2bake); ProfEnd(); - + ProfBegin("Serialize Bake"); RDIM_SerializedSectionBundle srlz2file = rdim_serialized_section_bundle_from_bake_results(&bake2srlz); ProfEnd(); - + RDIM_SerializedSectionBundle srlz2file_compressed = srlz2file; if (rc->flags & RC_Flag_Compress) { ProfBegin("Compress"); srlz2file_compressed = rdim_compress(scratch.arena, &srlz2file); ProfEnd(); } - + ProfBegin("Serialize"); String8List raw_rdi = rdim_file_blobs_from_section_bundle(scratch.arena, &srlz2file_compressed); ProfEnd(); - + scratch_end(scratch); return raw_rdi; } @@ -456,25 +456,25 @@ rc_main(CmdLine *cmdl) fprintf(stderr, " -driver: Sets converter for debug info\n"); } else { Temp scratch = scratch_begin(0,0); - + // make converter context RC_Context rc = rc_context_from_cmd_line(scratch.arena, cmdl); - + // make RDI from context String8List raw_rdi = rc_run(scratch.arena, &rc); - + // output RDI if (rc.errors.node_count == 0) { if (!os_write_data_list_to_file_path(rc.out_name, raw_rdi)) { str8_list_pushf(scratch.arena, &rc.errors, "no write access to path %.*s", str8_varg(rc.out_name)); } } - + // report any errors for (String8Node *error_n = rc.errors.first; error_n != 0; error_n = error_n->next) { fprintf(stderr, "error: %.*s\n", str8_varg(error_n->string)); } - + scratch_end(scratch); } } diff --git a/src/radcon/radcon.h b/src/radcon/radcon.h index faebfd3c..06458d85 100644 --- a/src/radcon/radcon.h +++ b/src/radcon/radcon.h @@ -42,7 +42,7 @@ typedef enum typedef struct RC_Context { - ImageType image; + ExecutableImageKind image; RC_Driver driver; String8 image_name; String8 image_data; diff --git a/src/radcon/radcon_dwarf.c b/src/radcon/radcon_dwarf.c index c26154e6..06ed7627 100644 --- a/src/radcon/radcon_dwarf.c +++ b/src/radcon/radcon_dwarf.c @@ -1031,10 +1031,10 @@ d2r_cu_contrib_map_from_aranges(Arena *arena, DW_Input *input, U64 image_base) return cm; } -internal RDIM_Rng1U64List +internal RDIM_Rng1U64ChunkList d2r_voff_ranges_from_cu_info_off(D2R_CompUnitContribMap map, U64 info_off) { - RDIM_Rng1U64List voff_ranges = {0}; + RDIM_Rng1U64ChunkList voff_ranges = {0}; U64 voff_list_idx = u64_array_bsearch(map.info_off_arr, map.count, info_off); if (voff_list_idx < map.count) { voff_ranges = map.voff_range_arr[voff_list_idx]; @@ -1073,7 +1073,7 @@ d2r_push_scope(Arena *arena, RDIM_ScopeChunkList *scopes, U64 scope_chunk_cap, D return scope; } -internal RDIM_BakeParams * +internal RDIM_BakeParams d2r_convert(Arena *arena, RDIM_LocalState *local_state, RC_Context *in) { Temp scratch = scratch_begin(&arena, 1); @@ -1091,7 +1091,7 @@ d2r_convert(Arena *arena, RDIM_LocalState *local_state, RC_Context *in) RDIM_BinarySectionList binary_sections = {0}; DW_Input input = {0}; - if (in->image == Image_CoffPe) { + if (in->image == ExecutableImageKind_CoffPe) { PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, in->image_data); // get image arch @@ -1111,7 +1111,7 @@ d2r_convert(Arena *arena, RDIM_LocalState *local_state, RC_Context *in) // make DWARF input input = dw_input_from_coff_section_table(scratch.arena, in->image_data, string_table, section_count, section_table); - } else if (in->image == Image_Elf32 || in->image == Image_Elf64) { + } else if (in->image == ExecutableImageKind_Elf32 || in->image == ExecutableImageKind_Elf64) { ELF_BinInfo elf = elf_bin_from_data(in->debug_data); // get image arch @@ -2066,19 +2066,19 @@ d2r_convert(Arena *arena, RDIM_LocalState *local_state, RC_Context *in) //////////////////////////////// - RDIM_BakeParams *bake_params = push_array(arena, RDIM_BakeParams, 1); - bake_params->top_level_info = top_level_info; - bake_params->binary_sections = binary_sections; - bake_params->units = units; - bake_params->types = types; - bake_params->udts = udts; - bake_params->src_files = src_files; - bake_params->line_tables = line_tables; - bake_params->global_variables = gvars; - bake_params->thread_variables = tvars; - bake_params->procedures = procs; - bake_params->scopes = scopes; - bake_params->inline_sites = inline_sites; + RDIM_BakeParams bake_params = {0}; + bake_params.top_level_info = top_level_info; + bake_params.binary_sections = binary_sections; + bake_params.units = units; + bake_params.types = types; + bake_params.udts = udts; + bake_params.src_files = src_files; + bake_params.line_tables = line_tables; + bake_params.global_variables = gvars; + bake_params.thread_variables = tvars; + bake_params.procedures = procs; + bake_params.scopes = scopes; + bake_params.inline_sites = inline_sites; scratch_end(scratch); return bake_params; @@ -2148,4 +2148,3 @@ rdi_reg_from_dw_reg(Arch arch, DW_Reg v, RDI_RegCode *code_out, U64 *off_out, U6 } return 0; } - diff --git a/src/radcon/radcon_dwarf.h b/src/radcon/radcon_dwarf.h index 9ec4e728..7e802ca6 100644 --- a/src/radcon/radcon_dwarf.h +++ b/src/radcon/radcon_dwarf.h @@ -22,9 +22,9 @@ typedef struct D2R_TagNode typedef struct D2R_CompUnitContribMap { - U64 count; - U64 *info_off_arr; - RDIM_Rng1U64List *voff_range_arr; + U64 count; + U64 *info_off_arr; + RDIM_Rng1U64ChunkList *voff_range_arr; } D2R_CompUnitContribMap; //////////////////////////////// diff --git a/src/raddbg/raddbg_main.c b/src/raddbg/raddbg_main.c index 4261d92a..2d486d4a 100644 --- a/src/raddbg/raddbg_main.c +++ b/src/raddbg/raddbg_main.c @@ -5,7 +5,6 @@ //~ rjf: post-0.9.19 TODO notes // //- memory view -// [x] auto-annotations for non-locals // [ ] have smaller visible range than entire memory // space, within some bounds (e.g. 64KB) // [ ] dynamically expand memory space, based on @@ -180,6 +179,7 @@ // [x] if a breakpoint matches the entry point's starting address, its hit count // is not correctly incremented. // [x] output: add option for scroll-to-bottom - ensure this shows up in universal ctx menu +// [x] auto-annotations for non-locals //////////////////////////////// //~ rjf: Build Options @@ -203,6 +203,7 @@ //- rjf: [h] #include "base/base_inc.h" +#include "linker/hash_table.h" #include "os/os_inc.h" #include "async/async.h" #include "rdi_format/rdi_format_local.h" @@ -225,7 +226,14 @@ #include "pdb/pdb.h" #include "pdb/pdb_parse.h" #include "pdb/pdb_stringize.h" +#include "dwarf/dwarf.h" +#include "dwarf/dwarf_parse.h" +#include "dwarf/dwarf_coff.h" +#include "dwarf/dwarf_elf.h" +#include "rdi_from_coff/rdi_from_coff.h" +#include "rdi_from_elf/rdi_from_elf.h" #include "rdi_from_pdb/rdi_from_pdb.h" +#include "rdi_from_dwarf/rdi_from_dwarf.h" #include "rdi_breakpad_from_pdb/rdi_breakpad_from_pdb.h" #include "radbin/radbin.h" #include "regs/regs.h" @@ -249,6 +257,7 @@ //- rjf: [c] #include "base/base_inc.c" +#include "linker/hash_table.c" #include "os/os_inc.c" #include "async/async.c" #include "rdi_format/rdi_format_local.c" @@ -271,7 +280,14 @@ #include "pdb/pdb.c" #include "pdb/pdb_parse.c" #include "pdb/pdb_stringize.c" +#include "dwarf/dwarf.c" +#include "dwarf/dwarf_parse.c" +#include "dwarf/dwarf_coff.c" +#include "dwarf/dwarf_elf.c" +#include "rdi_from_coff/rdi_from_coff.c" +#include "rdi_from_elf/rdi_from_elf.c" #include "rdi_from_pdb/rdi_from_pdb.c" +#include "rdi_from_dwarf/rdi_from_dwarf.c" #include "rdi_breakpad_from_pdb/rdi_breakpad_from_pdb.c" #include "radbin/radbin.c" #include "regs/regs.c" diff --git a/src/raddump/raddump.c b/src/raddump/raddump.c index be54449e..f8114ff5 100644 --- a/src/raddump/raddump.c +++ b/src/raddump/raddump.c @@ -3043,7 +3043,7 @@ dw_print_debug_str(Arena *arena, String8List *out, String8 indent, DW_Input *inp } internal void -dw_print_debug_loc(Arena *arena, String8List *out, String8 indent, DW_Input *input, Arch arch, ImageType image_type, B32 relaxed) +dw_print_debug_loc(Arena *arena, String8List *out, String8 indent, DW_Input *input, Arch arch, ExecutableImageKind image_type, B32 relaxed) { #if 0 DW_Section info = input->sec[DW_Section_Info]; @@ -3190,7 +3190,7 @@ dw_print_debug_loc(Arena *arena, String8List *out, String8 indent, DW_Input *inp } internal void -dw_print_debug_ranges(Arena *arena, String8List *out, String8 indent, DW_Input *input, Arch arch, ImageType image_type, B32 relaxed) +dw_print_debug_ranges(Arena *arena, String8List *out, String8 indent, DW_Input *input, Arch arch, ExecutableImageKind image_type, B32 relaxed) { NotImplemented; #if 0 @@ -3956,7 +3956,7 @@ dw_print_debug_str_offsets(Arena *arena, String8List *out, String8 indent, DW_In } internal void -dw_format(Arena *arena, String8List *out, String8 indent, RD_Option opts, DW_Input *input, Arch arch, ImageType image_type) +dw_format(Arena *arena, String8List *out, String8 indent, RD_Option opts, DW_Input *input, Arch arch, ExecutableImageKind image_type) { Temp scratch = scratch_begin(&arena, 1); @@ -6610,7 +6610,7 @@ coff_print_obj(Arena *arena, String8List *out, String8 indent, String8 raw_data, if (opts & RD_Option_Dwarf) { DW_Input dwarf_input = dw_input_from_coff_section_table(scratch.arena, raw_data, raw_string_table, header->section_count, section_table); - dw_format(arena, out, indent, opts, &dwarf_input, arch, Image_CoffPe); + dw_format(arena, out, indent, opts, &dwarf_input, arch, ExecutableImageKind_CoffPe); } exit:; @@ -7111,7 +7111,7 @@ pe_print_debug_diretory(Arena *arena, String8List *out, String8 indent, String8 rd_printf("# Debug Directory"); rd_indent(); - PE_DebugInfoList debug_info_list = pe_parse_debug_directory(scratch.arena, raw_data, raw_dir); + PE_DebugInfoList debug_info_list = pe_debug_info_list_from_raw_debug_dir(scratch.arena, raw_data, raw_dir); U64 i = 0; for (PE_DebugInfoNode *entry = debug_info_list.first; entry != 0; entry = entry->next, ++i) { PE_DebugInfo *de = &entry->v; @@ -8300,7 +8300,7 @@ pe_print(Arena *arena, String8List *out, String8 indent, String8 raw_data, RD_Op if (opts & RD_Option_Dwarf) { DW_Input dwarf_input = dw_input_from_coff_section_table(scratch.arena, raw_data, raw_string_table, file_header->section_count, sections); - dw_format(arena, out, indent, opts, &dwarf_input, arch, Image_CoffPe); + dw_format(arena, out, indent, opts, &dwarf_input, arch, ExecutableImageKind_CoffPe); } exit:; @@ -8317,7 +8317,7 @@ elf_print_dwarf_expressions(Arena *arena, String8List *out, String8 indent, Stri Arch arch = arch_from_elf_machine(bin.hdr.e_machine); DW_Input dwarf_input = dw_input_from_elf_section_table(scratch.arena, raw_data, &bin); ELF_Class elf_class = bin.hdr.e_ident[ELF_Identifier_Class]; - ImageType image_type = elf_class == ELF_Class_32 ? Image_Elf32 : elf_class == ELF_Class_64 ? Image_Elf64 : ELF_Class_None; + ExecutableImageKind image_type = elf_class == ELF_Class_32 ? ExecutableImageKind_Elf32 : elf_class == ELF_Class_64 ? ExecutableImageKind_Elf64 : ELF_Class_None; B32 relaxed = 1; Rng1U64List cu_ranges = dw_unit_ranges_from_data(scratch.arena, dwarf_input.sec[DW_Section_Info].data); DW_ListUnitInput lu_input = dw_list_unit_input_from_input(scratch.arena, &dwarf_input); diff --git a/src/raddump/raddump.h b/src/raddump/raddump.h index 4c0bab92..5638e0ed 100644 --- a/src/raddump/raddump.h +++ b/src/raddump/raddump.h @@ -231,8 +231,8 @@ internal void dw_print_debug_info (Arena *arena, String8List *out, String8 internal void dw_print_debug_abbrev (Arena *arena, String8List *out, String8 indent, DW_Input *input); internal void dw_print_debug_line (Arena *arena, String8List *out, String8 indent, DW_Input *input, DW_ListUnitInput lu_input, B32 relaxed); internal void dw_print_debug_str (Arena *arena, String8List *out, String8 indent, DW_Input *input); -internal void dw_print_debug_loc (Arena *arena, String8List *out, String8 indent, DW_Input *input, Arch arch, ImageType image_type, B32 relaxed); -internal void dw_print_debug_ranges (Arena *arena, String8List *out, String8 indent, DW_Input *input, Arch arch, ImageType image_type, B32 relaxed); +internal void dw_print_debug_loc (Arena *arena, String8List *out, String8 indent, DW_Input *input, Arch arch, ExecutableImageKind image_type, B32 relaxed); +internal void dw_print_debug_ranges (Arena *arena, String8List *out, String8 indent, DW_Input *input, Arch arch, ExecutableImageKind image_type, B32 relaxed); internal void dw_print_debug_aranges (Arena *arena, String8List *out, String8 indent, DW_Input *input); internal void dw_print_debug_addr (Arena *arena, String8List *out, String8 indent, DW_Input *input); internal void dw_print_debug_loclists (Arena *arena, String8List *out, String8 indent, DW_Input *input, Rng1U64Array segment_vranges, Arch arch); diff --git a/src/rdi_from_coff/rdi_from_coff.c b/src/rdi_from_coff/rdi_from_coff.c new file mode 100644 index 00000000..d10d1b25 --- /dev/null +++ b/src/rdi_from_coff/rdi_from_coff.c @@ -0,0 +1,79 @@ +// Copyright (c) Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +internal RDI_Arch +c2r_rdi_arch_from_coff_machine(COFF_MachineType machine) +{ + switch (machine) { + case COFF_MachineType_X86: return RDI_Arch_X86; + case COFF_MachineType_X64: return RDI_Arch_X64; + + case COFF_MachineType_Unknown: + case COFF_MachineType_Am33: + case COFF_MachineType_Arm: + case COFF_MachineType_Arm64: + case COFF_MachineType_ArmNt: + case COFF_MachineType_Ebc: + case COFF_MachineType_Ia64: + case COFF_MachineType_M32R: + case COFF_MachineType_Mips16: + case COFF_MachineType_MipsFpu: + case COFF_MachineType_MipsFpu16: + case COFF_MachineType_PowerPc: + case COFF_MachineType_PowerPcFp: + case COFF_MachineType_R4000: + case COFF_MachineType_RiscV32: + case COFF_MachineType_RiscV64: + case COFF_MachineType_Sh3: + case COFF_MachineType_Sh3Dsp: + case COFF_MachineType_Sh4: + case COFF_MachineType_Sh5: + case COFF_MachineType_Thumb: + case COFF_MachineType_WceMipsV2: + NotImplemented; + default: + return RDI_Arch_NULL; + } +} + +internal RDI_BinarySectionFlags +c2r_rdi_binary_section_flags_from_coff_section_flags(COFF_SectionFlags flags) +{ + RDI_BinarySectionFlags result = 0; + if(flags & COFF_SectionFlag_MemRead) + { + result |= RDI_BinarySectionFlag_Read; + } + if(flags & COFF_SectionFlag_MemWrite) + { + result |= RDI_BinarySectionFlag_Write; + } + if(flags & COFF_SectionFlag_MemExecute) + { + result |= RDI_BinarySectionFlag_Execute; + } + return(result); +} + +internal RDIM_BinarySectionList +c2r_rdi_binary_sections_from_coff_sections(Arena *arena, String8 image_data, String8 string_table, U64 sectab_count, COFF_SectionHeader *sectab) +{ + ProfBeginFunction(); + + RDIM_BinarySectionList binary_sections = {0}; + + for (U64 isec = 0; isec < sectab_count; ++isec) { + COFF_SectionHeader *coff_sec = §ab[isec]; + RDIM_BinarySection *sec = rdim_binary_section_list_push(arena, &binary_sections); + + sec->name = coff_name_from_section_header(string_table, coff_sec); + sec->flags = c2r_rdi_binary_section_flags_from_coff_section_flags(coff_sec->flags); + sec->voff_first = coff_sec->voff; + sec->voff_opl = coff_sec->voff + coff_sec->vsize; + sec->foff_first = coff_sec->foff; + sec->foff_opl = coff_sec->foff + coff_sec->fsize; + } + + ProfEnd(); + return binary_sections; +} diff --git a/src/rdi_from_coff/rdi_from_coff.h b/src/rdi_from_coff/rdi_from_coff.h new file mode 100644 index 00000000..521fe8db --- /dev/null +++ b/src/rdi_from_coff/rdi_from_coff.h @@ -0,0 +1,11 @@ +// Copyright (c) Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef RDI_FROM_COFF_H +#define RDI_FROM_COFF_H + +internal RDI_Arch c2r_rdi_arch_from_coff_machine(COFF_MachineType machine); +internal RDI_BinarySectionFlags c2r_rdi_binary_section_flags_from_coff_section_flags(COFF_SectionFlags flags); +internal RDIM_BinarySectionList c2r_rdi_binary_sections_from_coff_sections(Arena *arena, String8 image_data, String8 string_table, U64 sectab_count, COFF_SectionHeader *sectab); + +#endif // RDI_FROM_COFF_H diff --git a/src/rdi_from_dwarf/rdi_from_dwarf.c b/src/rdi_from_dwarf/rdi_from_dwarf.c index ee0c47ed..f89a7560 100644 --- a/src/rdi_from_dwarf/rdi_from_dwarf.c +++ b/src/rdi_from_dwarf/rdi_from_dwarf.c @@ -1,72 +1,11 @@ // Copyright (c) Epic Games Tools // Licensed under the MIT license (https://opensource.org/license/mit/) -internal D2R_User2Convert * -d2r_user2convert_from_cmdln(Arena *arena, CmdLine *cmdline) -{ - D2R_User2Convert *result = push_array(arena, D2R_User2Convert, 1); - - String8 exe_name = cmd_line_string(cmdline, str8_lit("exe")); - String8 debug_name = cmd_line_string(cmdline, str8_lit("debug")); - String8 out_name = cmd_line_string(cmdline, str8_lit("out")); - - // error check params - if (exe_name.size == 0 && debug_name.size == 0) { - str8_list_pushf(arena, &result->errors, "Missing one of the required parameters: '--exe:' or '--debug:'"); - } - if (out_name.size == 0) { - str8_list_pushf(arena, &result->errors, "Missing required parameter: '--out:'"); - } - - // get input EXE or ELF - if (exe_name.size > 0) { - String8 exe_data = os_data_from_file_path(arena, exe_name); - if (exe_data.size == 0) { - str8_list_pushf(arena, &result->errors, "Could not load input EXE file from '%S'", exe_name); - } else { - result->input_exe_name = exe_name; - result->input_exe_data = exe_data; - } - } - - // get input DEBUG - if (debug_name.size > 0) { - String8 debug_data = os_data_from_file_path(arena, debug_name); - if (debug_data.size == 0) { - str8_list_pushf(arena, &result->errors, "Could not load input DEBUG file from '%S'", debug_name); - } else { - result->input_debug_name = debug_name; - result->input_debug_data = debug_data; - } - } - - result->output_name = out_name; - result->flags = ~0ull; - - String8List only_names = cmd_line_strings(cmdline, str8_lit("only")); - String8List omit_names = cmd_line_strings(cmdline, str8_lit("omit")); - - if (only_names.node_count > 0) { - result->flags = 0; - for (String8Node *i = only_names.first; i != 0; i = i->next) { -#define X(t,n,k) if (str8_match_lit(Stringify(n), i->string, StringMatchFlag_CaseInsensitive)) \ -result->flags |= D2R_ConvertFlag_##t; - RDI_SectionKind_XList -#undef X - } - } - - if (omit_names.node_count > 0) { - for (String8Node *i = omit_names.first; i != 0; i = i->next) { -#define X(t,n,k) if (str8_match_lit(Stringify(n), i->string, StringMatchFlag_CaseInsensitive)) \ -result->flags &= ~D2R_ConvertFlag_##t; - RDI_SectionKind_XList -#undef X - } - } - - return result; -} +// TODO: +// +// [ ] Currently converter relies on clang's -gdwarf-aranges to generate compile unit ranges, +// however it is optional and in case it is missing converter has to generate the ranges from scopes. +// [ ] Error handling internal RDI_RegCode d2r_rdi_reg_from_dw_reg_code_x64(U64 reg_code) @@ -93,14 +32,14 @@ d2r_rdi_reg_from_dw_reg_code_x86(U64 reg_code) } internal RDI_RegCode -d2r_rdi_reg_from_dw_reg_code(RDI_Arch arch, U64 reg_code) +d2r_rdi_reg_from_dw_reg_code(Arch arch, U64 reg_code) { switch (arch) { - case RDI_Arch_NULL: return 0; - case RDI_Arch_X64: return d2r_rdi_reg_from_dw_reg_code_x64(reg_code); - case RDI_Arch_X86: return d2r_rdi_reg_from_dw_reg_code_x86(reg_code); + case Arch_Null: return 0; + case Arch_x64: return d2r_rdi_reg_from_dw_reg_code_x64(reg_code); + case Arch_x86: return d2r_rdi_reg_from_dw_reg_code_x86(reg_code); + default: InvalidPath; } - InvalidPath; return 0; } @@ -150,7 +89,7 @@ d2r_type_from_attrib(Arena *arena, D2R_TypeTable *type_table, DW_Input *input, D Assert(!"unexpected attrib class"); } } else if (attrib->attrib_kind == DW_Attrib_Null) { - type = type_table->void_type; + type = rdim_builtin_type_from_kind(*type_table->types, RDI_TypeKind_NULL); } return type; @@ -162,6 +101,15 @@ d2r_range_list_from_tag(Arena *arena, DW_Input *input, DW_CompUnit *cu, U64 imag // collect non-contiguous range Rng1U64List ranges = dw_rnglist_from_attrib(arena, input, cu, tag, DW_Attrib_Ranges); + // debase ranges + for (Rng1U64Node *range_n = ranges.first; range_n != 0; range_n = range_n->next) { + // TODO: error handling + AssertAlways(range_n->v.min >= image_base); + AssertAlways(range_n->v.max >= image_base); + range_n->v.min -= image_base; + range_n->v.max -= image_base; + } + // collect contiguous range DW_Attrib *lo_pc_attrib = dw_attrib_from_tag(input, cu, tag, DW_Attrib_LowPc); DW_Attrib *hi_pc_attrib = dw_attrib_from_tag(input, cu, tag, DW_Attrib_HighPc); @@ -225,12 +173,110 @@ d2r_collect_proc_params(Arena *arena, D2R_TypeTable *type_table, DW_Input *input return params; } +internal RDI_TypeKind +d2r_unsigned_type_kind_from_size(U64 byte_size) +{ + RDI_TypeKind result = RDI_TypeKind_NULL; + switch (byte_size) { + case 1: result = RDI_TypeKind_U8; break; + case 2: result = RDI_TypeKind_U16; break; + case 4: result = RDI_TypeKind_U32; break; + case 8: result = RDI_TypeKind_U64; break; + } + return result; +} + +internal RDI_TypeKind +d2r_signed_type_kind_from_size(U64 byte_size) +{ + RDI_TypeKind result = RDI_TypeKind_NULL; + switch (byte_size) { + case 1: result = RDI_TypeKind_S8; break; + case 2: result = RDI_TypeKind_S16; break; + case 4: result = RDI_TypeKind_S32; break; + case 8: result = RDI_TypeKind_S64; break; + } + return result; +} + +internal RDI_EvalTypeGroup +d2r_type_group_from_type_kind(RDI_TypeKind x) +{ + switch (x) { + case RDI_TypeKind_NULL: + case RDI_TypeKind_Void: + case RDI_TypeKind_Handle: + break; + case RDI_TypeKind_UChar8: + case RDI_TypeKind_UChar16: + case RDI_TypeKind_UChar32: + case RDI_TypeKind_U8: + case RDI_TypeKind_U16: + case RDI_TypeKind_U32: + case RDI_TypeKind_U64: + case RDI_TypeKind_U128: + case RDI_TypeKind_U256: + case RDI_TypeKind_U512: + return RDI_EvalTypeGroup_U; + case RDI_TypeKind_Char8: + case RDI_TypeKind_Char16: + case RDI_TypeKind_Char32: + case RDI_TypeKind_S8: + case RDI_TypeKind_S16: + case RDI_TypeKind_S32: + case RDI_TypeKind_S64: + case RDI_TypeKind_S128: + case RDI_TypeKind_S256: + case RDI_TypeKind_S512: + return RDI_EvalTypeGroup_S; + case RDI_TypeKind_F32: + return RDI_EvalTypeGroup_F32; + case RDI_TypeKind_F64: + return RDI_EvalTypeGroup_F64; + default: InvalidPath; + } + return RDI_EvalTypeGroup_Other; +} internal RDIM_EvalBytecode -d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, String8 expr) +d2r_bytecode_from_expression(Arena *arena, + DW_Input *input, + U64 image_base, + U64 address_size, + Arch arch, + DW_ListUnit *addr_lu, + String8 expr, + DW_CompUnit *cu, + B32 *is_addr_out) { + Temp scratch = scratch_begin(&arena, 1); + RDIM_EvalBytecode bc = {0}; + *is_addr_out = 0; + + struct Frame { + struct Frame *next; + RDI_EvalTypeGroup value_type; + }; + struct Frame *stack = 0; +#define push_of_type(type) do { \ +struct Frame *f = push_array(scratch.arena, struct Frame, 1); \ +f->value_type = d2r_type_group_from_type_kind(type); \ +SLLStackPush(stack, f); \ +} while (0) +#define pop_type() stack->value_type; SLLStackPop(stack) +#define peek_type() stack->value_type + + + RDI_TypeKind addr_type_kind = RDI_TypeKind_NULL; + if (address_size == 4) { + addr_type_kind = RDI_TypeKind_U32; + } else if (address_size == 8) { + addr_type_kind = RDI_TypeKind_U64; + } + + for (U64 cursor = 0; cursor < expr.size; ) { U8 op = 0; cursor += str8_deserial_read_struct(expr, cursor, &op); @@ -249,40 +295,83 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI case DW_ExprOp_Lit27: case DW_ExprOp_Lit28: case DW_ExprOp_Lit29: case DW_ExprOp_Lit30: case DW_ExprOp_Lit31: { U64 lit = op - DW_ExprOp_Lit0; + rdim_bytecode_push_uconst(arena, &bc, lit); + push_of_type(RDI_TypeKind_U64); } break; - case DW_ExprOp_Const1U: size_param = 1; goto const_unsigned; - case DW_ExprOp_Const2U: size_param = 2; goto const_unsigned; - case DW_ExprOp_Const4U: size_param = 4; goto const_unsigned; - case DW_ExprOp_Const8U: size_param = 8; goto const_unsigned; - const_unsigned: { - U64 val = 0; - cursor += str8_deserial_read(expr, cursor, &val, size_param, size_param); + case DW_ExprOp_Const1U: { + U8 val = 0; + cursor += str8_deserial_read_struct(expr, cursor, &val); + rdim_bytecode_push_uconst(arena, &bc, val); + push_of_type(RDI_TypeKind_U8); + } break; + case DW_ExprOp_Const2U: { + U16 val = 0; + cursor += str8_deserial_read_struct(expr, cursor, &val); + + rdim_bytecode_push_uconst(arena, &bc, val); + push_of_type(RDI_TypeKind_U16); + } break; + case DW_ExprOp_Const4U: { + U32 val = 0; + cursor += str8_deserial_read_struct(expr, cursor, &val); + + rdim_bytecode_push_uconst(arena, &bc, val); + push_of_type(RDI_TypeKind_U32); + } break; + case DW_ExprOp_Const8U: { + U64 val = 0; + cursor += str8_deserial_read_struct(expr, cursor, &val); + + rdim_bytecode_push_uconst(arena, &bc, val); + push_of_type(RDI_TypeKind_U64); } break; - case DW_ExprOp_Const1S:size_param = 1; goto const_signed; - case DW_ExprOp_Const2S:size_param = 2; goto const_signed; - case DW_ExprOp_Const4S:size_param = 4; goto const_signed; - case DW_ExprOp_Const8S:size_param = 8; goto const_signed; - const_signed: { - S64 val = 0; - cursor += str8_deserial_read(expr, cursor, &val, size_param, size_param); - val = extend_sign64(val, size_param); + case DW_ExprOp_Const1S: { + S8 val = 0; + cursor += str8_deserial_read_struct(expr, cursor, &val); + rdim_bytecode_push_sconst(arena, &bc, val); + push_of_type(RDI_TypeKind_S8); + } break; + case DW_ExprOp_Const2S: { + S16 val = 0; + cursor += str8_deserial_read_struct(expr, cursor, &val); + + rdim_bytecode_push_sconst(arena, &bc, val); + push_of_type(RDI_TypeKind_S16); + } break; + case DW_ExprOp_Const4S: { + S32 val = 0; + cursor += str8_deserial_read_struct(expr, cursor, &val); + + rdim_bytecode_push_sconst(arena, &bc, val); + push_of_type(RDI_TypeKind_S32); + } break; + case DW_ExprOp_Const8S: { + S64 val = 0; + cursor += str8_deserial_read_struct(expr, cursor, &val); + + rdim_bytecode_push_sconst(arena, &bc, val); + push_of_type(RDI_TypeKind_S64); } break; case DW_ExprOp_ConstU: { U64 val = 0; cursor += str8_deserial_read_uleb128(expr, cursor, &val); + rdim_bytecode_push_uconst(arena, &bc, val); + push_of_type(RDI_TypeKind_U64); } break; case DW_ExprOp_ConstS: { S64 val = 0; cursor += str8_deserial_read_sleb128(expr, cursor, &val); + rdim_bytecode_push_sconst(arena, &bc, val); + push_of_type(RDI_TypeKind_S64); } break; case DW_ExprOp_Addr: { @@ -291,10 +380,13 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI if (addr >= image_base) { U64 voff = addr - image_base; rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_ModuleOff, voff); + push_of_type(addr_type_kind); } else { // TODO: error handling AssertAlways(!"unable to relocate address"); } + + *is_addr_out = 1; } break; case DW_ExprOp_Reg0: case DW_ExprOp_Reg1: case DW_ExprOp_Reg2: @@ -308,29 +400,42 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI case DW_ExprOp_Reg24: case DW_ExprOp_Reg25: case DW_ExprOp_Reg26: case DW_ExprOp_Reg27: case DW_ExprOp_Reg28: case DW_ExprOp_Reg29: case DW_ExprOp_Reg30: case DW_ExprOp_Reg31: { - U64 reg_code_dw = op - DW_ExprOp_Reg0; - RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); - U32 regread_param = RDI_EncodeRegReadParam(reg_code_rdi, 8, 0); + U64 reg_code_dw = op - DW_ExprOp_Reg0; + U64 reg_size = dw_reg_size_from_code(arch, reg_code_dw); + U64 reg_pos = dw_reg_pos_from_code(arch, reg_code_dw); + + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + U32 regread_param = RDI_EncodeRegReadParam(reg_code_rdi, reg_size, reg_pos); rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegRead, regread_param); + push_of_type(d2r_unsigned_type_kind_from_size(reg_size)); } break; case DW_ExprOp_RegX: { U64 reg_code_dw = 0; cursor += str8_deserial_read_uleb128(expr, cursor, ®_code_dw); + + U64 reg_size = dw_reg_size_from_code(arch, reg_code_dw); + U64 reg_pos = dw_reg_pos_from_code(arch, reg_code_dw); + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); - U32 regread_param = RDI_EncodeRegReadParam(reg_code_rdi, 8, 0); + U32 regread_param = RDI_EncodeRegReadParam(reg_code_rdi, reg_size, reg_pos); rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegRead, regread_param); + push_of_type(d2r_unsigned_type_kind_from_size(reg_size)); + + *is_addr_out = 1; } break; case DW_ExprOp_ImplicitValue: { - U64 value_size = 0; - cursor += str8_deserial_read_uleb128(expr, cursor, &value_size); - - String8 val = str8_substr(expr, rng_1u64(cursor, cursor + value_size)); + U64 val_size = 0; + String8 val = {0}; + cursor += str8_deserial_read_uleb128(expr, cursor, &val_size); + cursor += str8_deserial_read_block(expr, cursor, val_size, &val); if (val.size <= sizeof(U64)) { U64 val64 = 0; MemoryCopy(&val64, val.str, val.size); + rdim_bytecode_push_uconst(arena, &bc, val64); + push_of_type(d2r_unsigned_type_kind_from_size(val_size)); } else { // TODO: currenlty no way to encode string in RDIM_EvalBytecodeOp NotImplemented; @@ -338,11 +443,24 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI } break; case DW_ExprOp_Piece: { - NotImplemented; + U64 piece_byte_size = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &piece_byte_size); + + U64 partial_value_size32 = safe_cast_u32(piece_byte_size); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_PartialValue, partial_value_size32); } break; case DW_ExprOp_BitPiece: { - NotImplemented; + U64 piece_bit_size = 0; + U64 piece_bit_off = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &piece_bit_size); + cursor += str8_deserial_read_uleb128(expr, cursor, &piece_bit_off); + + U32 piece_bit_size32 = safe_cast_u32(piece_bit_size); + U32 piece_bit_off32 = safe_cast_u32(piece_bit_off); + + U64 partial_value = ((U64)piece_bit_size32 << 32) | (U64)piece_bit_off32; + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_PartialValueBit, partial_value); } break; case DW_ExprOp_Pick: { @@ -355,7 +473,7 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI U64 addend = 0; cursor += str8_deserial_read_uleb128(expr, cursor, &addend); rdim_bytecode_push_uconst(arena, &bc, addend); - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, RDI_EvalTypeGroup_U); } break; case DW_ExprOp_Skip: { @@ -385,8 +503,13 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegReadDyn, reg_code_rdi); - rdim_bytecode_push_sconst(arena, &bc, reg_off); - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + if (reg_off > 0) { + rdim_bytecode_push_sconst(arena, &bc, reg_off); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, RDI_EvalTypeGroup_S); + } + push_of_type(RDI_TypeKind_S64); + + *is_addr_out = 1; } break; case DW_ExprOp_BRegX: { @@ -397,14 +520,21 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegReadDyn, reg_code_rdi); - rdim_bytecode_push_sconst(arena, &bc, reg_off); - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + if (reg_off > 0) { + rdim_bytecode_push_sconst(arena, &bc, reg_off); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, RDI_EvalTypeGroup_S); + } + push_of_type(RDI_TypeKind_S64); + + *is_addr_out = 1; } break; case DW_ExprOp_FBReg: { S64 frame_off = 0; cursor += str8_deserial_read_sleb128(expr, cursor, &frame_off); rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_FrameOff, frame_off); + + *is_addr_out = 1; } break; case DW_ExprOp_Deref: { @@ -442,8 +572,60 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI case DW_ExprOp_Convert: case DW_ExprOp_GNU_Convert: { - // TODO: - AssertAlways(!"sample"); + U64 type_info_off = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &type_info_off); + + RDI_EvalTypeGroup in = stack ? d2r_type_group_from_type_kind(stack->value_type) : RDI_EvalTypeGroup_Other; + RDI_EvalTypeGroup out = RDI_EvalTypeGroup_Other; + + if (type_info_off == 0) { + // + // 2.5.1 + // Instead of a base type, elements can have a generic type, + // which is an integral type that has the size of an address + // on the target machine and unspecified signedness. + // + out = d2r_type_group_from_type_kind(addr_type_kind); + } else { + // find ref tag + DW_TagNode *tag_node = dw_tag_node_from_info_off(cu, type_info_off); + DW_Tag tag = tag_node->tag; + if (tag.kind == DW_Tag_BaseType) { + // extract encoding attribute + DW_ATE encoding = dw_const_u64_from_attrib(input, cu, tag, DW_Attrib_Encoding); + + // DW_ATE -> RDI_EvalTypeGroup + switch (encoding) { + case DW_ATE_SignedChar: + case DW_ATE_Signed: out = RDI_EvalTypeGroup_S; break; + case DW_ATE_UnsignedChar: + case DW_ATE_Unsigned: out = RDI_EvalTypeGroup_U; break; + case DW_ATE_Float: { + U64 byte_size = dw_const_u64_from_attrib(input, cu, tag, DW_Attrib_ByteSize); + switch (byte_size) { + case 4: out = RDI_EvalTypeGroup_F32; break; + case 8: out = RDI_EvalTypeGroup_F64; break; + default: InvalidPath; + } + } break; + default: InvalidPath; + } + } else { + AssertAlways(!"unexpected tag"); // TODO: error handling + } + } + + if (in == RDI_EvalTypeGroup_Other) { + push_of_type(out); + break; + } + + // TODO: error handling + AssertAlways(in != RDI_EvalTypeGroup_Other); + AssertAlways(out != RDI_EvalTypeGroup_Other); + + U16 operand = (U16)in | ((U16)out << 8); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Convert, operand); } break; case DW_ExprOp_GNU_ParameterRef: { @@ -470,8 +652,17 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI case DW_ExprOp_EntryValue: case DW_ExprOp_GNU_EntryValue: { - // TODO: - AssertAlways(!"sample"); + U64 entry_value_expr_size = 0; + String8 entry_value_expr = {0}; + cursor += str8_deserial_read_uleb128(expr, cursor, &entry_value_expr_size); + cursor += str8_deserial_read_block(expr, cursor, entry_value_expr_size, &entry_value_expr); + + B32 dummy = 0; + RDIM_EvalBytecode call_site_bc = d2r_bytecode_from_expression(arena, input, image_base, address_size, arch, addr_lu, entry_value_expr, cu, &dummy); + + U32 encoded_size32 = safe_cast_u32(call_site_bc.encoded_size); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_CallSiteValue, encoded_size32); + rdim_bytecode_concat_in_place(&bc, &call_site_bc); } break; case DW_ExprOp_Addrx: { @@ -510,44 +701,43 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI } break; case DW_ExprOp_Eq: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_EqEq, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_EqEq, peek_type()); } break; case DW_ExprOp_Ge: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_GrEq, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_GrEq, peek_type()); } break; case DW_ExprOp_Gt: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Grtr, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Grtr, peek_type()); } break; case DW_ExprOp_Le: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_LsEq, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_LsEq, peek_type()); } break; case DW_ExprOp_Lt: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Less, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Less, peek_type()); } break; case DW_ExprOp_Ne: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_NtEq, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_NtEq, peek_type()); } break; case DW_ExprOp_Shl: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_LShift, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_LShift, peek_type()); } break; case DW_ExprOp_Shr: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RShift, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RShift, RDI_EvalTypeGroup_U); } break; case DW_ExprOp_Shra: { - // TODO: - AssertAlways(!"sample"); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RShift, RDI_EvalTypeGroup_S); } break; case DW_ExprOp_Xor: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitXor, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitXor, peek_type()); } break; case DW_ExprOp_XDeref: { @@ -556,43 +746,43 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI } break; case DW_ExprOp_Abs: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Abs, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Abs, peek_type()); } break; case DW_ExprOp_And: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitAnd, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitAnd, peek_type()); } break; case DW_ExprOp_Div: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Div, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Div, peek_type()); } break; case DW_ExprOp_Minus: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Sub, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Sub, peek_type()); } break; case DW_ExprOp_Mod: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Mod, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Mod, peek_type()); } break; case DW_ExprOp_Mul: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Mul, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Mul, peek_type()); } break; case DW_ExprOp_Neg: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Neg, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Neg, peek_type()); } break; case DW_ExprOp_Not: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitNot, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitNot, peek_type()); } break; case DW_ExprOp_Or: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitOr, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitOr, peek_type()); } break; case DW_ExprOp_Plus: { - rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, peek_type()); } break; case DW_ExprOp_Rot: { @@ -623,31 +813,34 @@ d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI } } +#undef peek_type +#undef pop_type +#undef push_of_type + scratch_end(scratch); return bc; } internal RDIM_Location * -d2r_transpile_expression(Arena *arena, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, String8 expr) +d2r_transpile_expression(Arena *arena, DW_Input *input, U64 image_base, U64 address_size, Arch arch, DW_ListUnit *addr_lu, DW_CompUnit *cu, String8 expr) { RDIM_Location *loc = 0; if (expr.size) { + B32 is_addr = 0; + RDIM_EvalBytecode bytecode = d2r_bytecode_from_expression(arena, input, image_base, address_size, arch, addr_lu, expr, cu, &is_addr); + loc = push_array(arena, RDIM_Location, 1); - loc->kind = RDI_LocationKind_AddrBytecodeStream; - loc->bytecode = d2r_bytecode_from_expression(arena, image_base, address_size, arch, addr_lu, expr); + loc->kind = is_addr ? RDI_LocationKind_AddrBytecodeStream : RDI_LocationKind_ValBytecodeStream; + loc->bytecode = bytecode; } return loc; } -internal RDIM_LocationSet -d2r_convert_loclist(Arena *arena, RDIM_ScopeChunkList *scopes, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, DW_LocList loclist) +internal RDIM_Location * +d2r_location_from_attrib(Arena *arena, DW_Input *input, DW_CompUnit *cu, U64 image_base, Arch arch, DW_Tag tag, DW_AttribKind kind) { - RDIM_LocationSet locset = {0}; - for (DW_LocNode *loc_n = loclist.first; loc_n != 0; loc_n = loc_n->next) { - RDIM_Location *location = d2r_transpile_expression(arena, image_base, address_size, arch, addr_lu, loc_n->v.expr); - RDIM_Rng1U64 voff_range = { .min = loc_n->v.range.min - image_base, .min = loc_n->v.range.max - image_base }; - rdim_location_set_push_case(arena, scopes, &locset, voff_range, location); - } - return locset; + String8 expr = dw_exprloc_from_attrib(input, cu, tag, kind); + RDIM_Location *location = d2r_transpile_expression(arena, input, image_base, cu->address_size, arch, cu->addr_lu, cu, expr); + return location; } internal RDIM_LocationSet @@ -657,32 +850,88 @@ d2r_locset_from_attrib(Arena *arena, RDIM_ScopeChunkList *scopes, RDIM_Scope *curr_scope, U64 image_base, - U64 address_size, - RDI_Arch arch, - DW_ListUnit *addr_lu, + Arch arch, DW_Tag tag, DW_AttribKind kind) { - RDIM_LocationSet result = {0}; + RDIM_LocationSet locset = {0}; + // extract attrib from tag DW_Attrib *attrib = dw_attrib_from_tag(input, cu, tag, kind); DW_AttribClass attrib_class = dw_value_class_from_attrib(cu, attrib); if (attrib_class == DW_AttribClass_LocList || attrib_class == DW_AttribClass_LocListPtr) { Temp scratch = scratch_begin(&arena, 1); + + // extract location list from attrib DW_LocList loclist = dw_loclist_from_attrib_ptr(scratch.arena, input, cu, attrib); - result = d2r_convert_loclist(arena, scopes, image_base, address_size, arch, addr_lu, loclist); + + // convert location list to RDIM location set + for (DW_LocNode *loc_n = loclist.first; loc_n != 0; loc_n = loc_n->next) { + RDIM_Location *location = d2r_transpile_expression(arena, input, image_base, cu->address_size, arch, cu->addr_lu, cu, loc_n->v.expr); + RDIM_Rng1U64 voff_range = { .min = loc_n->v.range.min - image_base, .max = loc_n->v.range.max - image_base }; + rdim_location_set_push_case(arena, scopes, &locset, voff_range, location); + } + + scratch_end(scratch); } else if (attrib_class == DW_AttribClass_ExprLoc) { - String8 expr = dw_exprloc_from_attrib_ptr(input, cu, attrib); - RDIM_Location *location = d2r_transpile_expression(arena, image_base, address_size, arch, addr_lu, expr); + // extract expression from attrib + String8 expr = dw_exprloc_from_attrib_ptr(input, cu, attrib); + + // convert expression and inherit life-time ranges from enclosed scope + RDIM_Location *location = d2r_transpile_expression(arena, input, image_base, cu->address_size, arch, cu->addr_lu, cu, expr); for (RDIM_Rng1U64Node *range_n = curr_scope->voff_ranges.first; range_n != 0; range_n = range_n->next) { - rdim_location_set_push_case(arena, scopes, &result, range_n->v, location); + rdim_location_set_push_case(arena, scopes, &locset, range_n->v, location); } } else if (attrib_class != DW_AttribClass_Null) { AssertAlways(!"unexpected attrib class"); } - return result; + return locset; +} + +internal RDIM_LocationSet +d2r_var_locset_from_tag(Arena *arena, + DW_Input *input, + DW_CompUnit *cu, + RDIM_ScopeChunkList *scopes, + RDIM_Scope *curr_scope, + U64 image_base, + Arch arch, + DW_Tag tag) +{ + RDIM_LocationSet locset = {0}; + + B32 has_const_value = dw_tag_has_attrib(input, cu, tag, DW_Attrib_ConstValue); + B32 has_location = dw_tag_has_attrib(input, cu, tag, DW_Attrib_Location); + + if (has_const_value && has_location) { + // TODO: error handling + AssertAlways(!"unexpected variable encoding"); + } + + if (has_const_value) { + // extract const value + U64 const_value = dw_u64_from_attrib(input, cu, tag, DW_Attrib_ConstValue); + + // make value byte code + RDIM_EvalBytecode bc = {0}; + rdim_bytecode_push_uconst(arena, &bc, const_value); + + // fill out location + RDIM_Location *loc = push_array(arena, RDIM_Location, 1); + loc->kind = RDI_LocationKind_ValBytecodeStream; + loc->bytecode = bc; + + // push location cases + for (RDIM_Rng1U64Node *range_n = curr_scope->voff_ranges.first; range_n != 0; range_n = range_n->next) { + rdim_location_set_push_case(arena, scopes, &locset, range_n->v, loc); + } + } else if (has_location) { + locset = d2r_locset_from_attrib(arena, input, cu, scopes, curr_scope, image_base, arch, tag, DW_Attrib_Location); + } + + return locset; } internal D2R_CompUnitContribMap @@ -695,8 +944,8 @@ d2r_cu_contrib_map_from_aranges(Arena *arena, DW_Input *input, U64 image_base) D2R_CompUnitContribMap cm = {0}; cm.count = 0; - cm.info_off_arr = push_array(arena, U64, unit_range_list.count); - cm.voff_range_arr = push_array(arena, RDIM_Rng1U64List, unit_range_list.count); + cm.info_off_arr = push_array(arena, U64, unit_range_list.count); + cm.voff_range_arr = push_array(arena, RDIM_Rng1U64ChunkList, unit_range_list.count); for (Rng1U64Node *range_n = unit_range_list.first; range_n != 0; range_n = range_n->next) { String8 unit_data = str8_substr(aranges_data, range_n->v); @@ -749,7 +998,7 @@ d2r_cu_contrib_map_from_aranges(Arena *arena, DW_Input *input, U64 image_base) unit_cursor += tuple_size - bytes_too_far_past_boundary; } - RDIM_Rng1U64List voff_ranges = {0}; + RDIM_Rng1U64ChunkList voff_ranges = {0}; if (segment_selector_size == 0) { while (unit_cursor + address_size * 2 <= unit_data.size) { U64 address = 0; @@ -766,7 +1015,7 @@ d2r_cu_contrib_map_from_aranges(Arena *arena, DW_Input *input, U64 image_base) U64 min = address - image_base; U64 max = min + length; - rdim_rng1u64_list_push(arena, &voff_ranges, (RDIM_Rng1U64){.min = min, .max = max}); + rdim_rng1u64_chunk_list_push(arena, &voff_ranges, 256, (RDIM_Rng1U64){.min = min, .max = max}); } } else { // TODO: segment relative addressing @@ -782,10 +1031,10 @@ d2r_cu_contrib_map_from_aranges(Arena *arena, DW_Input *input, U64 image_base) return cm; } -internal RDIM_Rng1U64List +internal RDIM_Rng1U64ChunkList d2r_voff_ranges_from_cu_info_off(D2R_CompUnitContribMap map, U64 info_off) { - RDIM_Rng1U64List voff_ranges = {0}; + RDIM_Rng1U64ChunkList voff_ranges = {0}; U64 voff_list_idx = u64_array_bsearch(map.info_off_arr, map.count, info_off); if (voff_list_idx < map.count) { voff_ranges = map.voff_range_arr[voff_list_idx]; @@ -812,100 +1061,90 @@ d2r_push_scope(Arena *arena, RDIM_ScopeChunkList *scopes, U64 scope_chunk_cap, D if (parent_tag_kind == DW_Tag_SubProgram || parent_tag_kind == DW_Tag_InlinedSubroutine || parent_tag_kind == DW_Tag_LexicalBlock) { RDIM_Scope *parent = tag_stack->next->scope; - scope->parent_scope = tag_stack->next->scope; + scope->parent_scope = parent; + scope->symbol = parent->symbol; if (parent->last_child) { parent->last_child->next_sibling = scope; } - SLLQueuePush_N(parent->first_child, parent->last_child, scope, next_sibling); } - // propagate scope symbol - if (tag_stack->cur_node->tag.kind == DW_Tag_LexicalBlock) { - scope->symbol = tag_stack->next->scope->symbol; - } - return scope; } -internal RDIM_BakeParams * -d2r_convert(Arena *arena, D2R_User2Convert *in) +internal RDIM_BakeParams +d2r_convert(Arena *arena, ASYNC_Root *async_root, D2R_ConvertParams *params) { Temp scratch = scratch_begin(&arena, 1); - B32 is_parse_relaxed = !(in->flags & D2R_ConvertFlag_StrictParse); - - RDIM_BinarySectionList binary_sections = {0}; - Arch arch = Arch_Null; - U64 image_base = 0; - U64 voff_max = 0; - DW_Input input = {0}; - DW_ListUnitInput lui = {0}; - if (pe_check_magic(in->input_exe_data)) { - PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, in->input_exe_data); - - // infer exe info - arch = pe.arch; - image_base = pe.image_base; - - // get COFF sections - String8 raw_sections = str8_substr(in->input_exe_data, rng_1u64(pe.section_array_off, pe.section_array_off+sizeof(COFF_SectionHeader)*pe.section_count)); - U64 section_count = raw_sections.size / sizeof(COFF_SectionHeader); - COFF_SectionHeader *section_array = (COFF_SectionHeader *)raw_sections.str; - - // loop over section headers and pick max virtual offset - for (U64 i = 0; i < section_count; ++i) { - U64 sec_voff_max = section_array[i].voff + section_array[i].vsize; - voff_max = Max(voff_max, sec_voff_max); - } - - ProfBegin("binary sections"); - for (U64 i = 0; i < section_count; ++i) { - COFF_SectionHeader *coff_sec = §ion_array[i]; - RDIM_BinarySection *sec = rdim_binary_section_list_push(arena, &binary_sections); - - sec->name = coff_name_from_section_header(in->input_exe_data, coff_sec, pe.string_table_off); - sec->flags = rdi_binary_section_flags_from_coff_section_flags(coff_sec->flags); - sec->voff_first = coff_sec->voff; - sec->voff_opl = coff_sec->voff + coff_sec->vsize; - sec->foff_first = coff_sec->foff; - sec->foff_opl = coff_sec->foff + coff_sec->fsize; - } - ProfEnd(); - - // find DWARF sections - input = dw_input_from_coff_section_table(scratch.arena, in->input_exe_data, pe.string_table_off, section_count, section_array); - } - - //////////////////////////////// - - RDI_Arch arch_rdi = RDI_Arch_NULL; - switch (arch) { - case Arch_Null: arch_rdi = RDI_Arch_NULL; break; - case Arch_x64: arch_rdi = RDI_Arch_X64; break; - case Arch_x86: arch_rdi = RDI_Arch_X86; break; - default: NotImplemented; break; - } - - U64 arch_addr_size = rdi_addr_size_from_arch(arch_rdi); - //////////////////////////////// ProfBegin("compute exe hash"); - U64 exe_hash = rdi_hash(in->input_exe_data.str, in->input_exe_data.size); + U64 exe_hash = rdi_hash(params->exe_data.str, params->exe_data.size); ProfEnd(); //////////////////////////////// - ProfBegin("top level info"); - RDIM_TopLevelInfo top_level_info = {0}; - top_level_info.arch = arch_rdi; - top_level_info.exe_name = str8_skip_last_slash(in->input_exe_name); - top_level_info.exe_hash = exe_hash; - top_level_info.voff_max = voff_max; - top_level_info.producer_name = str8_lit(BUILD_TITLE_STRING_LITERAL); - ProfEnd(); + Arch arch = Arch_Null; + U64 image_base = 0; + RDIM_BinarySectionList binary_sections = {0}; + DW_Input input = {0}; + + switch(params->exe_kind) + { + default:{}break; + case ExecutableImageKind_CoffPe: + { + PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, params->exe_data); + + // get image arch + arch = pe.arch; + + // get image base + image_base = pe.image_base; + + // get image sections + String8 raw_sections = str8_substr(params->exe_data, pe.section_table_range); + U64 section_count = raw_sections.size / sizeof(COFF_SectionHeader); + COFF_SectionHeader *section_table = (COFF_SectionHeader *)raw_sections.str; + + // convert sections + String8 string_table = str8_substr(params->exe_data, pe.string_table_range); + binary_sections = c2r_rdi_binary_sections_from_coff_sections(arena, params->exe_data, string_table, section_count, section_table); + + // make DWARF input + input = dw_input_from_coff_section_table(scratch.arena, params->exe_data, string_table, section_count, section_table); + }break; + case ExecutableImageKind_Elf32: + case ExecutableImageKind_Elf64: + { + ELF_BinInfo elf = elf_bin_from_data(params->dbg_data); + + // get image arch + arch = arch_from_elf_machine(elf.hdr.e_machine); + + // get image base + image_base = elf_base_addr_from_bin(&elf.hdr); + + // get image sections + ELF_Shdr64Array shdrs = elf_shdr64_array_from_bin(scratch.arena, params->dbg_data, &elf.hdr); + + // convert sections + binary_sections = e2r_rdi_binary_sections_from_elf_section_table(arena, shdrs); + + // make DWARF input + input = dw_input_from_elf_section_table(scratch.arena, params->dbg_data, &elf); + }break; + } + + //////////////////////////////// + + RDIM_TopLevelInfo top_level_info = rdim_make_top_level_info(params->exe_name, arch, exe_hash, binary_sections); + + //////////////////////////////// + + U64 arch_addr_size = rdi_addr_size_from_arch(arch); //////////////////////////////// @@ -922,7 +1161,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) RDIM_UnitChunkList units = {0}; RDIM_UDTChunkList udts = {0}; - RDIM_TypeChunkList types = {0}; + RDIM_TypeChunkList types = rdim_init_type_chunk_list(arena, arch); RDIM_SymbolChunkList gvars = {0}; RDIM_SymbolChunkList tvars = {0}; RDIM_SymbolChunkList procs = {0}; @@ -933,6 +1172,10 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) //////////////////////////////// + RDIM_Scope *global_scope = rdim_scope_chunk_list_push(arena, &scopes, SCOPE_CHUNK_CAP); + + //////////////////////////////// + ProfBegin("Make Unit Contrib Map"); D2R_CompUnitContribMap cu_contrib_map = {0}; if (input.sec[DW_Section_ARanges].data.size > 0) { @@ -952,6 +1195,10 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) //////////////////////////////// ProfBegin("Parse Compile Unit Headers"); + // TODO(rjf): parse should always be relaxed. any verification checks we do + // should just be logged via log_info(...), and then the caller of this + // converter can collect those & display as necessary. + B32 is_parse_relaxed = 1; DW_CompUnit *cu_arr = push_array(scratch.arena, DW_CompUnit, cu_ranges.count); for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { cu_arr[cu_idx] = dw_cu_from_info_off(scratch.arena, &input, lu_input, cu_ranges.v[cu_idx].min, is_parse_relaxed); @@ -1097,7 +1344,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) } // get unit's contribution ranges - RDIM_Rng1U64List cu_voff_ranges = d2r_voff_ranges_from_cu_info_off(cu_contrib_map, cu_ranges.v[cu_idx].min); + RDIM_Rng1U64ChunkList cu_voff_ranges = d2r_voff_ranges_from_cu_info_off(cu_contrib_map, cu_ranges.v[cu_idx].min); String8 cu_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_Name); String8 cu_dir = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_CompDir); @@ -1119,8 +1366,6 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) type_table->ht = hash_table_init(comp_temp.arena, 0x4000); type_table->types = &types; type_table->type_chunk_cap = TYPE_CHUNK_CAP; - type_table->void_type = d2r_create_type(arena, type_table); - type_table->void_type->kind = RDI_TypeKind_Void; type_table->varg_type = d2r_create_type(arena, type_table); type_table->varg_type->kind = RDI_TypeKind_Variadic; @@ -1360,7 +1605,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) default: AssertAlways(!"unexpected base type encoding"); break; // TODO: error handling } - RDIM_Type *base_type = d2r_create_type(arena, type_table); + RDIM_Type *base_type = rdim_builtin_type_from_kind(types, kind); base_type->kind = kind; base_type->byte_size = byte_size; @@ -1595,7 +1840,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) proc->container_symbol = 0; proc->container_type = container_type; proc->root_scope = root_scope; - proc->frame_base = d2r_locset_from_attrib(arena, &input, cu, &scopes, root_scope, image_base, cu->address_size, arch_rdi, cu->addr_lu, tag, DW_Attrib_FrameBase); + proc->frame_base = d2r_locset_from_attrib(arena, &input, cu, &scopes, root_scope, image_base, arch, tag, DW_Attrib_FrameBase); // sub program with user-defined parent tag is a method DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; @@ -1606,7 +1851,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) case DW_VirtualityKind_None: member_kind = RDI_MemberKind_Method; break; case DW_VirtualityKind_Virtual: member_kind = RDI_MemberKind_VirtualMethod; break; case DW_VirtualityKind_PureVirtual: member_kind = RDI_MemberKind_VirtualMethod; break; // TODO: create kind for pure virutal - default: InvalidPath; break; + //default: InvalidPath; break; } RDIM_Type *type = tag_stack->next->type; @@ -1615,7 +1860,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) member->type = type; member->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); } else if (parent_tag_kind != DW_Tag_CompileUnit) { - AssertAlways(!"unexpected tag"); + //AssertAlways(!"unexpected tag"); } tag_stack->scope = root_scope; @@ -1674,7 +1919,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) local->kind = RDI_LocalKind_Variable; local->name = name; local->type = type; - local->locset = d2r_locset_from_attrib(arena, &input, cu, &scopes, scope, image_base, cu->address_size, arch_rdi, cu->addr_lu, tag, DW_Attrib_Location); + local->locset = d2r_var_locset_from_tag(arena, &input, cu, &scopes, scope, image_base, arch, tag); } else { // NOTE: due to a bug in clang in stb_sprint.h local variables @@ -1688,7 +1933,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) gvar->name = name; gvar->link_name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_LinkageName); gvar->type = type; - gvar->offset = 0; // TODO: NotImplemented; + //gvar->locset = d2r_locset_from_attrib(arena, &input, cu, &scopes, global_scope, image_base, arch, tag, DW_Attrib_Location); gvar->container_symbol = 0; gvar->container_type = 0; // TODO: NotImplemented; } @@ -1701,7 +1946,7 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) param->kind = RDI_LocalKind_Parameter; param->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); param->type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); - param->locset = d2r_locset_from_attrib(arena, &input, cu, &scopes, scope, image_base, cu->address_size, arch_rdi, cu->addr_lu, tag, DW_Attrib_Location); + param->locset = d2r_var_locset_from_tag(arena, &input, cu, &scopes, scope, image_base, arch, tag); } else { // TODO: error handling AssertAlways(!"this is a local variable"); @@ -1715,10 +1960,21 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) d2r_push_scope(arena, &scopes, SCOPE_CHUNK_CAP, tag_stack, ranges); } } break; + case DW_Tag_CallSite: { + // TODO + } break; + case DW_Tag_CallSiteParameter: { + // TODO + } break; case DW_Tag_Label: case DW_Tag_CompileUnit: case DW_Tag_UnspecifiedParameters: break; + case DW_Tag_Namespace: break; + case DW_Tag_ImportedDeclaration: break; + case DW_Tag_PtrToMemberType: break; + case DW_Tag_TemplateTypeParameter: break; + case DW_Tag_ReferenceType: break; default: NotImplemented; break; } @@ -1819,101 +2075,24 @@ d2r_convert(Arena *arena, D2R_User2Convert *in) //////////////////////////////// - RDIM_BakeParams *bake_params = push_array(arena, RDIM_BakeParams, 1); - bake_params->top_level_info = top_level_info; - bake_params->binary_sections = binary_sections; - bake_params->units = units; - bake_params->types = types; - bake_params->udts = udts; - bake_params->src_files = src_files; - bake_params->line_tables = line_tables; - bake_params->global_variables = gvars; - bake_params->thread_variables = tvars; - bake_params->procedures = procs; - bake_params->scopes = scopes; - bake_params->inline_sites = inline_sites; + RDIM_BakeParams bake_params = {0}; + bake_params.top_level_info = top_level_info; + bake_params.binary_sections = binary_sections; + bake_params.units = units; + bake_params.types = types; + bake_params.udts = udts; + bake_params.src_files = src_files; + bake_params.line_tables = line_tables; + bake_params.global_variables = gvars; + bake_params.thread_variables = tvars; + bake_params.procedures = procs; + bake_params.scopes = scopes; + bake_params.inline_sites = inline_sites; scratch_end(scratch); return bake_params; } -RDI_PROC void -rdim_assign_type_index(RDIM_Type *type, U64 *type_indices, U64 *curr_type_idx) -{ - RDI_U64 type_pos = rdim_idx_from_type(type); - - if(type->kind == RDI_TypeKind_NULL) - { - type_indices[type_pos] = 0; - return; - } - - if(type_indices[type_pos] == 0) - { - if(type->param_types) - { - for(RDI_U64 param_idx = 0; param_idx < type->count; param_idx += 1) - { - rdim_assign_type_index(type->param_types[param_idx], type_indices, curr_type_idx); - } - } - - if(type->direct_type) - { - rdim_assign_type_index(type->direct_type, type_indices, curr_type_idx); - } - - type_indices[type_pos] = *curr_type_idx; - *curr_type_idx += 1; - } -} - -RDI_PROC RDI_U64 * -rdim_make_type_indices(RDIM_Arena *arena, RDIM_TypeChunkList *types) -{ - ProfBeginFunction(); - - RDI_U64 *type_indices = rdim_push_array(arena, RDI_U64, types->total_count + 1); - RDI_U64 type_indices_count = 1; - - for(RDIM_TypeChunkNode *chunk = types->first; chunk != 0; chunk = chunk->next) - { - for(RDI_U64 i = 0; i < chunk->count; i += 1) - { - rdim_assign_type_index(&chunk->v[i], type_indices, &type_indices_count); - } - } - - ProfEnd(); - return type_indices; -} - -internal RDIM_BakeResults -d2r_bake(RDIM_LocalState *state, RDIM_BakeParams *in_params) -{ - //////////////////////////////// - // resolve incomplete types - - rdim_local_resolve_incomplete_types(&in_params->types, &in_params->udts); - - //////////////////////////////// - // compute type indices - - RDI_U64 *type_indices = rdim_make_type_indices(scratch.arena, &in_params->types); - - // using type indices create a correct type array layout - NotImplemented; - - return rdim_bake(state, in_params); -} - -internal RDIM_SerializedSectionBundle -d2r_compress(Arena *arena, RDIM_SerializedSectionBundle in) -{ - RDIM_SerializedSectionBundle result = {0}; - return result; -} - internal RDI_Language rdi_language_from_dw_language(DW_Language v) { @@ -1978,4 +2157,3 @@ rdi_reg_from_dw_reg(Arch arch, DW_Reg v, RDI_RegCode *code_out, U64 *off_out, U6 } return 0; } - diff --git a/src/rdi_from_dwarf/rdi_from_dwarf.h b/src/rdi_from_dwarf/rdi_from_dwarf.h index dbe16860..1831367c 100644 --- a/src/rdi_from_dwarf/rdi_from_dwarf.h +++ b/src/rdi_from_dwarf/rdi_from_dwarf.h @@ -3,32 +3,23 @@ #pragma once -typedef U64 D2R_ConvertFlags; -enum +typedef struct D2R_ConvertParams D2R_ConvertParams; +struct D2R_ConvertParams { -#define X(t,n,k) D2R_ConvertFlag_##t = (1ull << RDI_SectionKind_##t), - RDI_SectionKind_XList -#undef X - D2R_ConvertFlag_StrictParse, + String8 dbg_name; + String8 dbg_data; + String8 exe_name; + String8 exe_data; + ExecutableImageKind exe_kind; + RDIM_SubsetFlags subset_flags; + B32 deterministic; }; -typedef struct D2R_User2Convert -{ - String8 input_exe_name; - String8 input_exe_data; - String8 input_debug_name; - String8 input_debug_data; - String8 output_name; - D2R_ConvertFlags flags; - String8List errors; -} D2R_User2Convert; - typedef struct D2R_TypeTable { HashTable *ht; RDIM_TypeChunkList *types; U64 type_chunk_cap; - RDIM_Type *void_type; RDIM_Type *varg_type; } D2R_TypeTable; @@ -42,28 +33,18 @@ typedef struct D2R_TagNode typedef struct D2R_CompUnitContribMap { - U64 count; - U64 *info_off_arr; - RDIM_Rng1U64List *voff_range_arr; + U64 count; + U64 *info_off_arr; + RDIM_Rng1U64ChunkList *voff_range_arr; } D2R_CompUnitContribMap; //////////////////////////////// -// Command Line -> Conversion Inputs -internal D2R_User2Convert * d2r_user2convert_from_cmdln(Arena *arena, CmdLine *cmdline); +internal RDIM_BakeParams d2r_convert(Arena *arena, ASYNC_Root *async_root, D2R_ConvertParams *params); //////////////////////////////// -// Top-Level Conversion Entry Point - -internal RDIM_BakeParams * d2r_convert (Arena *arena, D2R_User2Convert *in); -internal RDIM_BakeResults d2r_bake (RDIM_LocalState *state, RDIM_BakeParams *in); -internal RDIM_SerializedSectionBundle d2r_compress(Arena *arena, RDIM_SerializedSectionBundle in); - -//////////////////////////////// -// Enum Conversion internal RDI_Language rdi_language_from_dw_language(DW_Language v); internal RDI_RegCodeX86 rdi_reg_from_dw_reg_x86(DW_RegX86 v); internal B32 rdi_reg_from_dw_reg_x64(DW_RegX64 v, RDI_RegCodeX64 *code_out, U64 *off_out, U64 *size_out); internal B32 rdi_reg_from_dw_reg(Arch arch, DW_Reg v, RDI_RegCode *code_out, U64 *off_out, U64 *size_out); - diff --git a/src/rdi_from_elf/rdi_from_elf.c b/src/rdi_from_elf/rdi_from_elf.c new file mode 100644 index 00000000..ccbb1351 --- /dev/null +++ b/src/rdi_from_elf/rdi_from_elf.c @@ -0,0 +1,9 @@ +// Copyright (c) Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +internal RDIM_BinarySectionList +e2r_rdi_binary_sections_from_elf_section_table(Arena *arena, ELF_Shdr64Array shdrs) +{ + RDIM_BinarySectionList result = {0}; + return result; +} diff --git a/src/rdi_from_elf/rdi_from_elf.h b/src/rdi_from_elf/rdi_from_elf.h new file mode 100644 index 00000000..3d979c8f --- /dev/null +++ b/src/rdi_from_elf/rdi_from_elf.h @@ -0,0 +1,9 @@ +// Copyright (c) Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef RDI_FROM_ELF_H +#define RDI_FROM_ELF_H + +internal RDIM_BinarySectionList e2r_rdi_binary_sections_from_elf_section_table(Arena *arena, ELF_Shdr64Array shdrs); + +#endif // RDI_FROM_ELF_H