diff --git a/build.bat b/build.bat index 2187d8f6..6529d9e5 100644 --- a/build.bat +++ b/build.bat @@ -109,7 +109,7 @@ if "%raddbg%"=="1" set didbuild=1 && %compile% ..\src\raddbg if "%radlink%"=="1" set didbuild=1 && %compile% ..\src\linker\lnk.c %compile_link% %link_natvis%"%~dp0\src\linker\linker.natvis" %out%radlink.exe || exit /b 1 if "%raddump%"=="1" set didbuild=1 && %compile% ..\src\raddump\raddump_main.c %compile_link% %out%raddump.exe || exit /b 1 if "%rdi_from_pdb%"=="1" set didbuild=1 && %compile% ..\src\rdi_from_pdb\rdi_from_pdb_main.c %compile_link% %out%rdi_from_pdb.exe || exit /b 1 -if "%rdi_from_dwarf%"=="1" set didbuild=1 && %compile% ..\src\rdi_from_dwarf\rdi_from_dwarf.c %compile_link% %out%rdi_from_dwarf.exe || exit /b 1 +if "%rdi_from_dwarf%"=="1" set didbuild=1 && %compile% ..\src\rdi_from_dwarf\rdi_from_dwarf_main.c %compile_link% %out%rdi_from_dwarf.exe || exit /b 1 if "%rdi_dump%"=="1" set didbuild=1 && %compile% ..\src\rdi_dump\rdi_dump_main.c %compile_link% %out%rdi_dump.exe || exit /b 1 if "%rdi_breakpad_from_pdb%"=="1" set didbuild=1 && %compile% ..\src\rdi_breakpad_from_pdb\rdi_breakpad_from_pdb_main.c %compile_link% %out%rdi_breakpad_from_pdb.exe || exit /b 1 if "%tester%"=="1" set didbuild=1 && %compile% ..\src\tester\tester_main.c %compile_link% %out%tester.exe || exit /b 1 diff --git a/src/dwarf/dwarf_coff.c b/src/dwarf/dwarf_coff.c index 942ca306..d8158251 100644 --- a/src/dwarf/dwarf_coff.c +++ b/src/dwarf/dwarf_coff.c @@ -3,5 +3,5 @@ #pragma once -internal DW_SectionArray dw_sections_from_coff_section_table(Arena *arena, String8 raw_image, U64 string_table_off, U64 section_count, COFF_SectionHeader *sections); +internal DW_Input dw_input_from_coff_section_table(Arena *arena, String8 raw_image, U64 string_table_off, U64 section_count, COFF_SectionHeader *sections); diff --git a/src/dwarf/dwarf_coff.h b/src/dwarf/dwarf_coff.h index 368bff78..3a772531 100644 --- a/src/dwarf/dwarf_coff.h +++ b/src/dwarf/dwarf_coff.h @@ -1,15 +1,15 @@ // Copyright (c) 2024 Epic Games Tools // Licensed under the MIT license (https://opensource.org/license/mit/) -internal DW_SectionArray -dw_sections_from_coff_section_table(Arena *arena, - String8 raw_image, - U64 string_table_off, - U64 section_count, - COFF_SectionHeader *sections) +internal DW_Input +dw_input_from_coff_section_table(Arena *arena, + String8 raw_image, + U64 string_table_off, + U64 section_count, + COFF_SectionHeader *sections) { - DW_SectionArray result = {0}; - B32 sect_status[ArrayCount(result.v)] = {0}; + DW_Input input = {0}; + B32 sect_status[ArrayCount(input.sec)] = {0}; for (U64 i = 0; i < section_count; ++i) { COFF_SectionHeader *header = §ions[i]; @@ -18,7 +18,7 @@ dw_sections_from_coff_section_table(Arena *arena, DW_SectionKind s = DW_Section_Null; B32 is_dwo = 0; - #define X(_K,_L,_M,_W) \ + #define X(_K,_L,_M,_W) \ if (str8_match_lit(_L, name, 0)) { s = DW_Section_##_K; } \ if (str8_match_lit(_M, name, 0)) { s = DW_Section_##_K; } \ if (str8_match_lit(_W, name, 0)) { s = DW_Section_##_K; is_dwo = 1; } @@ -30,16 +30,15 @@ dw_sections_from_coff_section_table(Arena *arena, Assert(!"too many debug sections with identical name, picking first"); } else { sect_status[s] = 1; - DW_Section *d = &result.v[s]; + DW_Section *d = &input.sec[s]; d->name = push_str8_copy(arena, name); d->data = str8_substr(raw_image, raw_data_range); - d->mode = dim_1u64(raw_data_range) > max_U32 ? DW_Mode_64Bit : DW_Mode_32Bit; d->is_dwo = is_dwo; } } } - return result; + return input; } diff --git a/src/dwarf/dwarf_elf.c b/src/dwarf/dwarf_elf.c new file mode 100644 index 00000000..55d6e4f7 --- /dev/null +++ b/src/dwarf/dwarf_elf.c @@ -0,0 +1,104 @@ +// Copyright (c) 2025 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +internal DW_Input +dw_input_from_elf_section_table(Arena *arena, String8 raw_image, ELF_BinInfo *bin) +{ + Temp scratch = scratch_begin(&arena, 1); + + DW_Input result = {0}; + B32 sect_status[ArrayCount(result.sec)] = {0}; + + ELF_Shdr64Array sections = elf_shdr64_array_from_bin(scratch.arena, raw_image, &bin->hdr); + String8 sh_names = str8_substr(raw_image, bin->sh_name_range); + + for (U64 sect_idx = 1; sect_idx < sections.count; ++sect_idx) { + ELF_Shdr64 *shdr = §ions.v[sect_idx]; + + // skip BSS sections + if (shdr->sh_type != ELF_SectionCode_ProgBits) { + continue; + } + + String8 name = {0}; + str8_deserial_read_cstr(sh_names, shdr->sh_name, &name); + + DW_SectionKind s = DW_Section_Null; + B32 is_dwo = 0; + #define X(_K,_L,_M,_W) \ + if (str8_match_lit(_L, name, 0)) { s = DW_Section_##_K; } \ + if (str8_match_lit(_M, name, 0)) { s = DW_Section_##_K; } \ + if (str8_match_lit(_W, name, 0)) { s = DW_Section_##_K; is_dwo = 1; } + DW_SectionKind_XList(X) + #undef X + + if (s != DW_Section_Null) { + if (sect_status[s]) { + Assert(!"too many debug sections with identical name, picking first"); + } else { + Rng1U64 raw_data_range = rng_1u64(shdr->sh_offset, shdr->sh_offset + shdr->sh_size); + String8 data = str8_substr(raw_image, raw_data_range); + + // ELF was compiled with compressed debug info + if (shdr->sh_flags & ELF_Shf_Compressed) { + String8 comp_data_with_header = data; + + // read header + ELF_Chdr64 chdr64 = {0}; + U64 chdr_size = 0; + if (ELF_HdrIs64Bit(bin->hdr.e_ident)) { + chdr_size = str8_deserial_read_struct(comp_data_with_header, 0, &chdr64); + if (chdr_size != sizeof(chdr64)) { + Assert(!"not enough bytes to read header"); + } + } else if (ELF_HdrIs32Bit(bin->hdr.e_ident)) { + ELF_Chdr32 chdr32 = {0}; + chdr_size = str8_deserial_read_struct(comp_data_with_header, 0, &chdr32); + if (chdr_size == sizeof(chdr32)) { + chdr64 = elf_chdr64_from_chdr32(chdr32); + } + } + + AssertAlways(IsPow2(chdr64.ch_addr_align)); + + // skip header + String8 comp_data = str8_skip(comp_data_with_header, chdr_size); + + // push buffer for the decompressor + U8 *decomp_buffer = push_array_no_zero_aligned(arena, U8, chdr64.ch_size, chdr64.ch_addr_align); + U64 actual_decomp_size = 0; + + // decompress + switch (chdr64.ch_type) { + case ELF_CompressType_None: { + AssertAlways(!"unexpected compression type"); + } break; + case ELF_CompressType_ZLib: { + actual_decomp_size = zsinflate(decomp_buffer, chdr64.ch_size, comp_data.str, comp_data.size); + } break; + case ELF_CompressType_ZStd: { + // TODO: zstd lib + NotImplemented; + } break; + default: InvalidPath; break; + } + + // TODO: error handling + AssertAlways(actual_decomp_size == chdr64.ch_size); + + // set decompressed section data + data = str8(decomp_buffer, actual_decomp_size); + } + + sect_status[s] = 1; + DW_Section *d = &result.sec[s]; + d->name = push_str8_copy(arena, name); + d->data = data; + d->is_dwo = is_dwo; + } + } + } + + scratch_end(scratch); + return result; +} diff --git a/src/dwarf/dwarf_elf.h b/src/dwarf/dwarf_elf.h new file mode 100644 index 00000000..776e7b2c --- /dev/null +++ b/src/dwarf/dwarf_elf.h @@ -0,0 +1,11 @@ +// Copyright (c) 2025 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef DWARF_ELF_H +#define DWARF_ELF_H + +internal DW_Input dw_input_from_elf_section_table(Arena *arena, String8 raw_image, ELF_BinInfo *bin); + +#endif // DWARF_ELF_H + + diff --git a/src/dwarf/dwarf_parse.c b/src/dwarf/dwarf_parse.c index aa2c7d3e..e0cd5e4e 100644 --- a/src/dwarf/dwarf_parse.c +++ b/src/dwarf/dwarf_parse.c @@ -1,66 +1,6 @@ // Copyright (c) 2024 Epic Games Tools // Licensed under the MIT license (https://opensource.org/license/mit/) -// TODO(rjf): -// -// [ ] Any time we encode a subrange of a section inside of a -// DW_AttribValue, we need to do that consistently, regardless of -// whether or not it is a string, memory block, etc. We should just use -// the DW_SectionKind and then the min/max pair. -// -// [ ] Things we are not reporting, or haven't figured out: -// @dwarf_expr @dwarf_v5 @dw_cross_unit -// [ ] currently, we're filtering out template arguments in the member accelerator. -// this is because they don't correspond one-to-one with anything in PDB, but -// they do contain useful information that we might want to expose another way -// somehow. -// [ ] DWARF V5 features that nobody seems to use right now -// [ ] ref_addr_desc + next_info_ctx -// apparently these are necessary when dereferencing some DWARF V5 ways of -// forming references. They don't seem to come up at all for any real data -// but might be a case somewhere. -// [ ] case when only .debug_line and .debug_line_str is available, without -// compilation unit debug info? do we care about this at all? -// [ ] DW_Form_RefSig8, which requires using .debug_names -// to do a lookup for a reference -// [ ] DWARF V5, but also V1 & V2 for dw_range_list_from_range_offset -// [ ] DW_AttribClass_RngList and DW_Form_RngListx -// [ ] DW_OpCode_XDEREF_SIZE + DW_OpCode_XDEREF -// [ ] DW_OpCode_PIECE + DW_OpCode_BIT_PIECE -// [ ] DW_ExtOpcode_DefineFile, for line info -// [ ] DWARF procedures in DWARF expr evaluation -// [ ] DW_Attrib_DataMemberLocation is not being *fully* handled right -// now; full handling requires evaluating a DWARF expression to find out the -// offset of a member. Right now we handle the common case, which is when it -// is encoded as a constant value. -// [ ] inline information -// [ ] full info we are not handling: -// [ ] friend classes -// [ ] DWARF macro info -// [ ] whether or not a function is the entry point -// [ ] attributes we are not handling that may be important: -// [ ] DW_Attrib_AbstractOrigin -// - ??? -// [ ] DW_Attrib_VariableParameter -// - determines whether or not a parameter to a function is mutable, I think? -// [ ] DW_Attrib_Mutable -// - I think this is for specific keywords, may not be relevant to C/++ -// [ ] DW_Attrib_CallColumn -// - column position of an inlined subroutine -// [ ] DW_Attrib_CallFile -// - file of inlined subroutine -// [ ] DW_Attrib_CallLine -// - line number of inlined subroutine -// [ ] DW_Attrib_ConstExpr -// - ??? maybe C++ constexpr? -// [ ] DW_Attrib_EnumClass -// - c++ thing that's an enum with a backing type -// [ ] DW_Attrib_LinkageName -// - name used to do linking - -//////////////////////////////// -//~ rjf: Basic Helpers - internal U64 dw_hash_from_string(String8 string) { @@ -68,86 +8,97 @@ dw_hash_from_string(String8 string) return hash64; } -//////////////////////////////// -//~ rjf: DWARF-Specific Based Range Reads - internal U64 -dw_based_range_read(void *base, Rng1U64 range, U64 offset, U64 size, void *out) +str8_deserial_read_dwarf_packed_size(String8 string, U64 off, U64 *size_out) { - String8 data = str8((U8*)base+range.min, dim_1u64(range)); - return str8_deserial_read(data, offset, out, size, 1); -} - -internal String8 -dw_based_range_read_string(void *base, Rng1U64 range, U64 offset) -{ - String8 data = str8((U8*)base+range.min, dim_1u64(range)); - String8 result = {0}; - str8_deserial_read_cstr(data, offset, &result); - return result; -} - -internal void * -dw_based_range_ptr(void *base, Rng1U64 range, U64 offset) -{ - Assert(offset < dim_1u64(range)); - U8 *data = (U8*)base + range.min + offset; - return data; -} - -internal void * -dw_based_range_ptr_size(void *base, Rng1U64 range, U64 offset, U64 size) -{ - void *ptr = 0; - if (size > 0 && offset + size <= dim_1u64(range)) { - ptr = dw_based_range_ptr(base, range, offset); - } - return ptr; -} - -internal U64 -dw_based_range_read_uleb128(void *base, Rng1U64 range, U64 offset, U64 *out_value) -{ - U64 value = 0; U64 bytes_read = 0; - U64 shift = 0; - U8 byte = 0; - for(U64 cursor = offset; - dw_based_range_read_struct(base, range, cursor, &byte) == 1; - cursor += 1) - { - bytes_read += 1; - U8 val = byte & 0x7fu; - value |= ((U64)val) << shift; - if((byte&0x80u) == 0) - { - break; + if (str8_deserial_read(string, off, size_out, sizeof(U32), sizeof(U32))) { + if (*size_out == max_U32) { + if (str8_deserial_read_struct(string, off+sizeof(U32), size_out)) { + bytes_read = sizeof(U32) + sizeof(U64); + } + } else { + *size_out &= (U64)max_U32; + bytes_read = sizeof(U32); } - shift += 7u; - } - if(out_value != 0) - { - *out_value = value; } return bytes_read; } internal U64 -dw_based_range_read_sleb128(void *base, Rng1U64 range, U64 offset, S64 *out_value) +str8_deserial_read_dwarf_uint(String8 string, U64 off, DW_Format format, U64 *uint_out) { - U64 value = 0; U64 bytes_read = 0; - U64 shift = 0; - U8 byte = 0; - for(U64 cursor = offset; - dw_based_range_read_struct(base, range, cursor, &byte) == 1; - cursor += 1) + switch (format) { + case DW_Format_Null: break; + case DW_Format_32Bit: { + *uint_out &= (U64)max_U32; + bytes_read = str8_deserial_read(string, off, uint_out, sizeof(U32), sizeof(U32)); + } break; + case DW_Format_64Bit: { + bytes_read = str8_deserial_read_struct(string, off, uint_out); + } break; + } + return bytes_read; +} + +internal U64 +str8_deserial_read_uleb128(String8 string, U64 off, U64 *value_out) +{ + U64 value = 0; + U64 shift = 0; + U64 cursor = off; + for(;;) { - bytes_read += 1; + U8 byte = 0; + U64 bytes_read = str8_deserial_read_struct(string, cursor, &byte); + + if(bytes_read != sizeof(byte)) + { + break; + } + U8 val = byte & 0x7fu; value |= ((U64)val) << shift; + + cursor += bytes_read; shift += 7u; - if((byte&0x80u) == 0) + + if((byte & 0x80u) == 0) + { + break; + } + } + if(value_out != 0) + { + *value_out = value; + } + U64 bytes_read = cursor - off; + return bytes_read; +} + +internal U64 +str8_deserial_read_sleb128(String8 string, U64 off, S64 *value_out) +{ + U64 value = 0; + U64 shift = 0; + U64 cursor = off; + for(;;) + { + U8 byte; + U64 bytes_read = str8_deserial_read_struct(string, cursor, &byte); + if(bytes_read != sizeof(byte)) + { + break; + } + + U8 val = byte & 0x7fu; + value |= ((U64)val) << shift; + + cursor += bytes_read; + shift += 7u; + + if((byte & 0x80u) == 0) { if(shift < sizeof(value) * 8 && (byte & 0x40u) != 0) { @@ -156,43 +107,315 @@ dw_based_range_read_sleb128(void *base, Rng1U64 range, U64 offset, S64 *out_valu break; } } - if(out_value != 0) + if(value_out != 0) { - *out_value = value; + *value_out = value; } + U64 bytes_read = cursor - off; return bytes_read; } internal U64 -dw_based_range_read_length(void *base, Rng1U64 range, U64 offset, U64 *out_value) +str8_deserial_read_uleb128_array(Arena *arena, String8 string, U64 off, U64 count, U64 **arr_out) { + Temp temp = temp_begin(arena); + + U64 *arr = push_array(arena, U64, count); + U64 i, cursor; + for (i = 0, cursor = off; i < count; ++i) { + U64 read_size = str8_deserial_read_uleb128(string, cursor, &arr[i]); + if (read_size == 0) { + break; + } + cursor += read_size; + } + U64 bytes_read = 0; - U64 value = 0; - U32 first32 = 0; - if(dw_based_range_read_struct(base, range, offset, &first32)) - { - // NOTE(rjf): DWARF 32-bit => use the first 32 bits as the size. - if(first32 != max_U32) - { - value = (U64)first32; - bytes_read = sizeof(U32); - } - // NOTE(rjf): DWARF 64-bit => first 32 are just a marker, use the next 64 bits as the size. - else if(dw_based_range_read_struct(base, range, offset + sizeof(U32), &value)) - { - value = 0; - bytes_read = sizeof(U32) + sizeof(U64); - } - } - if(out_value != 0) - { - *out_value = value; + if (i == count) { + *arr_out = arr; + bytes_read = cursor - off; + } else { + temp_end(temp); + *arr_out = 0; } + return bytes_read; } internal U64 -dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev) +str8_deserial_read_sleb128_array(Arena *arena, String8 string, U64 off, U64 count, S64 **arr_out) +{ + Temp temp = temp_begin(arena); + + S64 *arr = push_array(arena, S64, count); + U64 i, cursor; + for (i = 0, cursor = off; i < count; ++i) { + U64 read_size = str8_deserial_read_sleb128(string, cursor, &arr[i]); + if (read_size == 0) { + break; + } + cursor += read_size; + } + + U64 bytes_read = 0; + if (i == count) { + *arr_out = arr; + bytes_read = cursor - off; + } else { + temp_end(temp); + *arr_out = 0; + } + + return bytes_read; +} + +internal Rng1U64List +dw_unit_ranges_from_data(Arena *arena, String8 data) +{ + Rng1U64List result = {0}; + + for (U64 cursor = 0; cursor < data.size; ) { + // read CU size + U64 cu_size = 0; + U64 cu_size_size = str8_deserial_read_dwarf_packed_size(data, cursor, &cu_size); + + // was read ok? + if (cu_size_size == 0) { + break; + } + + if (cu_size > 0) { + // push unit range + rng1u64_list_push(arena, &result, rng_1u64(cursor, cursor+cu_size+cu_size_size)); + } + + // advance + cursor += cu_size_size; + cursor += cu_size; + } + + return result; +} + +internal U64 +dw_read_list_unit_header_addr(String8 unit_data, DW_ListUnit *lu_out) +{ + U64 header_size = 0; + + U64 unit_length = 0; + U64 unit_length_size = str8_deserial_read_dwarf_packed_size(unit_data, 0, &unit_length); + + if (unit_length_size) { + DW_Version version = DW_Version_Null; + U64 version_size = str8_deserial_read_struct(unit_data, unit_length_size, &version); + + if (version_size) { + if (version >= DW_Version_5) { + U8 address_size = 0; + U64 address_size_size = str8_deserial_read_struct(unit_data, + unit_length_size + version_size, + &address_size); + + if (address_size_size && address_size) { + U8 segment_selector_size = 0; + U64 segment_selector_size_size = str8_deserial_read_struct(unit_data, + unit_length_size + version_size + address_size_size, + &segment_selector_size); + if (segment_selector_size_size) { + header_size = unit_length_size + version_size + address_size_size + segment_selector_size_size; + + lu_out->version = version; + lu_out->segment_selector_size = segment_selector_size; + lu_out->address_size = address_size; + lu_out->entry_size = segment_selector_size + address_size; + lu_out->entries = str8_skip(unit_data, header_size); + } + } + } + } + } + + return header_size; +} + +internal U64 +dw_read_list_unit_header_str_offsets(String8 unit_data, DW_ListUnit *lu_out) +{ + U64 header_size = 0; + + U64 unit_length = 0; + U64 unit_length_size = str8_deserial_read_dwarf_packed_size(unit_data, 0, &unit_length); + + if (unit_length_size) { + DW_Version version = DW_Version_Null; + U64 version_size = str8_deserial_read_struct(unit_data, unit_length_size, &version); + + if (version >= DW_Version_5) { + U16 padding = 0; + U64 padding_size = str8_deserial_read_struct(unit_data, unit_length_size + version_size, &padding); + + if (padding_size && padding == 0) { + header_size = unit_length_size + version_size + padding_size; + + lu_out->version = version; + lu_out->address_size = 0; + lu_out->segment_selector_size = 0; + lu_out->entry_size = dw_size_from_format(DW_FormatFromSize(unit_length)); + lu_out->entries = str8_skip(unit_data, header_size); + } + } + } + + return header_size; +} + +internal U64 +dw_read_list_unit_header_list(String8 unit_data, DW_ListUnit *lu_out) +{ + U64 header_size = 0; + + U64 unit_length = 0; + U64 unit_length_size = str8_deserial_read_dwarf_packed_size(unit_data, 0, &unit_length); + + if (unit_length_size) { + DW_Version version = DW_Version_Null; + U64 version_size = str8_deserial_read_struct(unit_data, unit_length_size, &version); + + if (version >= DW_Version_5) { + U8 address_size = 0; + U64 address_size_size = str8_deserial_read_struct(unit_data, unit_length_size + version_size, &address_size); + + if (address_size_size && address_size > 0) { + U8 segment_selector_size = 0; + U64 segment_selector_size_size = str8_deserial_read_struct(unit_data, unit_length_size + version_size + address_size_size, &segment_selector_size); + + if (segment_selector_size_size) { + U32 offset_entry_count = 0; + U64 offset_entry_count_size = str8_deserial_read_struct(unit_data, unit_length_size + version_size + address_size_size + segment_selector_size, &offset_entry_count); + + if (offset_entry_count_size) { + header_size = unit_length_size + version_size + address_size_size + segment_selector_size_size + offset_entry_count_size; + + lu_out->version = version; + lu_out->address_size = address_size; + lu_out->segment_selector_size = segment_selector_size; + lu_out->entry_size = dw_size_from_format(DW_FormatFromSize(unit_length)); + lu_out->entries = str8_skip(unit_data, header_size); + } + } + } + } + } + + return header_size; +} + +internal DW_ListUnitInput +dw_list_unit_input_from_input(Arena *arena, DW_Input *input) +{ + Temp scratch = scratch_begin(&arena, 1); + + DW_ListUnitInput result = {0}; + + DW_Section debug_addr = input->sec[DW_Section_Addr]; + { + String8 data = debug_addr.data; + Rng1U64List unit_ranges = dw_unit_ranges_from_data(scratch.arena, data); + + result.addr_ranges = rng1u64_array_from_list(arena, &unit_ranges); + result.addr_count = unit_ranges.count; + result.addrs = push_array(arena, DW_ListUnit, unit_ranges.count); + + for (U64 unit_idx = 0; unit_idx < result.addr_ranges.count; ++unit_idx) { + String8 unit_data = str8_substr(debug_addr.data, result.addr_ranges.v[unit_idx]); + dw_read_list_unit_header_addr(unit_data, &result.addrs[unit_idx]); + } + } + + DW_Section debug_str_offsets = input->sec[DW_Section_StrOffsets]; + { + String8 data = debug_str_offsets.data; + Rng1U64List unit_ranges = dw_unit_ranges_from_data(scratch.arena, data); + + result.str_offset_ranges = rng1u64_array_from_list(arena, &unit_ranges); + result.str_offset_count = unit_ranges.count; + result.str_offsets = push_array(arena, DW_ListUnit, unit_ranges.count); + + for (U64 unit_idx = 0; unit_idx < result.str_offset_ranges.count; ++unit_idx) { + String8 unit_data = str8_substr(data, result.str_offset_ranges.v[unit_idx]); + dw_read_list_unit_header_str_offsets(unit_data, &result.str_offsets[unit_idx]); + } + } + + DW_Section debug_rnglists = input->sec[DW_Section_RngLists]; + { + String8 data = debug_rnglists.data; + Rng1U64List unit_ranges = dw_unit_ranges_from_data(scratch.arena, data); + + result.rnglist_ranges = rng1u64_array_from_list(arena, &unit_ranges); + result.rnglist_count = unit_ranges.count; + result.rnglists = push_array(arena, DW_ListUnit, unit_ranges.count); + + for (U64 unit_idx = 0; unit_idx < result.rnglist_ranges.count; ++unit_idx) { + String8 unit_data = str8_substr(data, result.rnglist_ranges.v[unit_idx]); + dw_read_list_unit_header_list(unit_data, &result.rnglists[unit_idx]); + } + } + + DW_Section debug_loclists = input->sec[DW_Section_LocLists]; + { + String8 data = debug_loclists.data; + Rng1U64List unit_ranges = dw_unit_ranges_from_data(scratch.arena, data); + + result.loclist_ranges = rng1u64_array_from_list(arena, &unit_ranges); + result.loclist_count = unit_ranges.count; + result.loclists = push_array(arena, DW_ListUnit, unit_ranges.count); + + for (U64 unit_idx = 0; unit_idx < result.loclist_ranges.count; ++unit_idx) { + String8 unit_data = str8_substr(data, result.loclist_ranges.v[unit_idx]); + dw_read_list_unit_header_list(unit_data, &result.loclists[unit_idx]); + } + } + + scratch_end(scratch); + return result; +} + +internal U64 +dw_offset_from_list_unit(DW_ListUnit *lu, U64 index) +{ + U64 offset; + U64 entry_off = index * lu->entry_size; + if (entry_off + lu->entry_size <= lu->entries.size) { + offset = 0; + MemoryCopy(&offset, lu->entries.str + entry_off, lu->entry_size); + } else { + offset = max_U64; + } + return offset; +} + +internal U64 +dw_addr_from_list_unit(DW_ListUnit *lu, U64 index) +{ + U64 seg = 0; + U64 addr = max_U64; + U64 entry_count = lu->entries.size / lu->entry_size; + if (index < entry_count) { + U64 seg_off = lu->entry_size * index; + U64 addr_off = seg_off + lu->segment_selector_size; + MemoryCopy(&seg, lu->entries.str + seg_off, lu->segment_selector_size); + MemoryCopy(&addr, lu->entries.str + addr_off, lu->address_size); + // TODO: segment-based addressing + AssertAlways(seg == 0); + } else { + Assert(!"out of bounds index"); + } + return addr; +} + +internal U64 +dw_read_abbrev_tag(String8 data, U64 offset, DW_Abbrev *out_abbrev) { U64 total_bytes_read = 0; @@ -201,7 +424,7 @@ dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev U64 sub_kind_off = id_off; U64 id = 0; { - U64 bytes_read = dw_based_range_read_uleb128(base, range, id_off, &id); + U64 bytes_read = str8_deserial_read_uleb128(data, id_off, &id); sub_kind_off += bytes_read; total_bytes_read += bytes_read; } @@ -211,7 +434,7 @@ dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev U64 next_off = sub_kind_off; if(id != 0) { - U64 bytes_read = dw_based_range_read_uleb128(base, range, sub_kind_off, &sub_kind); + U64 bytes_read = str8_deserial_read_uleb128(data, sub_kind_off, &sub_kind); next_off += bytes_read; total_bytes_read += bytes_read; } @@ -220,17 +443,16 @@ dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev U8 has_children = 0; if(id != 0) { - total_bytes_read += dw_based_range_read_struct(base, range, next_off, &has_children); + total_bytes_read += str8_deserial_read_struct(data, next_off, &has_children); } //- rjf: fill abbrev if(out_abbrev != 0) { - DW_Abbrev abbrev = {0}; - abbrev.kind = DW_Abbrev_Tag; - abbrev.abbrev_range = rng_1u64(range.min+offset, range.min+offset+total_bytes_read); - abbrev.sub_kind = sub_kind; - abbrev.id = id; + DW_Abbrev abbrev = {0}; + abbrev.kind = DW_Abbrev_Tag; + abbrev.sub_kind = sub_kind; + abbrev.id = id; if(has_children) { abbrev.flags |= DW_AbbrevFlag_HasChildren; @@ -242,7 +464,7 @@ dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev } internal U64 -dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev) +dw_read_abbrev_attrib(String8 data, U64 offset, DW_Abbrev *out_abbrev) { U64 total_bytes_read = 0; @@ -251,7 +473,7 @@ dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW U64 sub_kind_off = id_off; U64 id = 0; { - U64 bytes_read = dw_based_range_read_uleb128(base, range, id_off, &id); + U64 bytes_read = str8_deserial_read_uleb128(data, id_off, &id); sub_kind_off += bytes_read; total_bytes_read += bytes_read; } @@ -260,7 +482,7 @@ dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW U64 sub_kind = 0; U64 next_off = sub_kind_off; { - U64 bytes_read = dw_based_range_read_uleb128(base, range, sub_kind_off, &sub_kind); + U64 bytes_read = str8_deserial_read_uleb128(data, sub_kind_off, &sub_kind); next_off += bytes_read; total_bytes_read += bytes_read; } @@ -269,7 +491,7 @@ dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW U64 implicit_const = 0; if(sub_kind == DW_Form_ImplicitConst) { - U64 bytes_read = dw_based_range_read_uleb128(base, range, next_off, &implicit_const); + U64 bytes_read = str8_deserial_read_uleb128(data, next_off, &implicit_const); total_bytes_read += bytes_read; } @@ -278,7 +500,6 @@ dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW { DW_Abbrev abbrev = {0}; abbrev.kind = DW_Abbrev_Attrib; - abbrev.abbrev_range = rng_1u64(offset, offset+total_bytes_read); abbrev.sub_kind = sub_kind; abbrev.id = id; if(sub_kind == DW_Form_ImplicitConst) @@ -292,200 +513,16 @@ dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW return total_bytes_read; } -internal U64 -dw_based_range_read_attrib_form_value(void *base, Rng1U64 range, U64 offset, DW_Mode mode, U64 address_size, DW_FormKind form_kind, U64 implicit_const, DW_AttribValue *form_value_out) -{ - U64 bytes_read = 0; - U64 bytes_to_read = 0; - DW_AttribValue form_value = {0}; - - switch(form_kind) - { - case DW_Form_Null: break; - - //- rjf: 1-byte uint reads - case DW_Form_Ref1: case DW_Form_Data1: case DW_Form_Flag: - case DW_Form_Strx1: case DW_Form_Addrx1: - bytes_to_read = 1; goto read_fixed_uint; - - //- rjf: 2-byte uint reads - case DW_Form_Ref2: case DW_Form_Data2: case DW_Form_Strx2: - case DW_Form_Addrx2: - bytes_to_read = 2; goto read_fixed_uint; - - //- rjf: 3-byte uint reads - case DW_Form_Strx3: case DW_Form_Addrx3: - bytes_to_read = 3; goto read_fixed_uint; - - //- rjf: 4-byte uint reads - case DW_Form_Data4: case DW_Form_Ref4: case DW_Form_RefSup4: case DW_Form_Strx4: case DW_Form_Addrx4: - bytes_to_read = 4; goto read_fixed_uint; - - //- rjf: 8-byte uint reads - case DW_Form_Data8: case DW_Form_Ref8: case DW_Form_RefSig8: case DW_Form_RefSup8: - bytes_to_read = 8; goto read_fixed_uint; - - //- rjf: address-size reads - case DW_Form_Addr: bytes_to_read = address_size; goto read_fixed_uint; - - //- rjf: offset-size reads - case DW_Form_RefAddr: case DW_Form_SecOffset: case DW_Form_LineStrp: - case DW_Form_Strp: case DW_Form_StrpSup: - bytes_to_read = dw_offset_size_from_mode(mode); goto read_fixed_uint; - - //- rjf: fixed-size uint reads - { - read_fixed_uint:; - U64 value = 0; - bytes_read = dw_based_range_read(base, range, offset, bytes_to_read, &value); - form_value.v[0] = value; - } break; - - //- rjf: uleb128 reads - case DW_Form_UData: case DW_Form_RefUData: case DW_Form_Strx: - case DW_Form_Addrx: case DW_Form_LocListx: case DW_Form_RngListx: - { - U64 value = 0; - bytes_read = dw_based_range_read_uleb128(base, range, offset, &value); - form_value.v[0] = value; - } break; - - //- rjf: sleb128 reads - case DW_Form_SData: - { - S64 value = 0; - bytes_read = dw_based_range_read_sleb128(base, range, offset, &value); - form_value.v[0] = value; - } break; - - //- rjf: fixed-size uint read + skip - case DW_Form_Block1: bytes_to_read = 1; goto read_fixed_uint_skip; - case DW_Form_Block2: bytes_to_read = 2; goto read_fixed_uint_skip; - case DW_Form_Block4: bytes_to_read = 4; goto read_fixed_uint_skip; - { - read_fixed_uint_skip:; - U64 size = 0; - bytes_read = dw_based_range_read(base, range, offset, bytes_to_read, &size); - form_value.v[0] = size; - form_value.v[1] = offset; - bytes_read += size; - } break; - - //- rjf: uleb 128 read + skip - case DW_Form_Block: - { - U64 size = 0; - bytes_read = dw_based_range_read_uleb128(base, range, offset, &size); - form_value.v[0] = size; - form_value.v[1] = offset; - bytes_read += size; - } break; - - //- rjf: u64 ranges - case DW_Form_Data16: - { - U64 value1 = 0; - U64 value2 = 0; - bytes_read += dw_based_range_read_struct(base, range, offset, &value1); - bytes_read += dw_based_range_read_struct(base, range, offset + sizeof(U64), &value2); - form_value.v[0] = value1; - form_value.v[1] = value2; - } break; - - //- rjf: strings - case DW_Form_String: - { - String8 string = dw_based_range_read_string(base, range, offset); - bytes_read = string.size + 1; - U64 string_offset = offset; - U64 string_size = (offset + bytes_read) - string_offset; - form_value.v[0] = string_offset; - form_value.v[1] = string_offset+string_size-1; - } break; - - //- rjf: implicit const - case DW_Form_ImplicitConst: - { - // Special case. - // Unlike other forms that have their values stored in the .debug_info section, - // This one defines it's value in the .debug_abbrev section. - form_value.v[0] = implicit_const; - } break; - - //- rjf: expr loc - case DW_Form_ExprLoc: - { - U64 size = 0; - bytes_read = dw_based_range_read_uleb128(base, range, offset, &size); - form_value.v[0] = offset + bytes_read; - form_value.v[1] = size; - bytes_read += size; - } break; - - //- rjf: flag present - case DW_Form_FlagPresent: - { - form_value.v[0] = 1; - } break; - - case DW_Form_Indirect: - { - InvalidPath; - } break; - } - - if(form_value_out != 0) - { - *form_value_out = form_value; - } - - return bytes_read; -} - -//- rjf: important DWARF section base/range accessors - -internal DW_Mode -dw_mode_from_sec(DW_SectionArray *sections, DW_SectionKind kind) -{ - if(sections->v[kind].data.size > 0xffffffff) - { - return DW_Mode_64Bit; - } - else - { - return DW_Mode_32Bit; - } -} - -internal Rng1U64 -dw_range_from_sec(DW_SectionArray *sections, DW_SectionKind kind) -{ - Rng1U64 result = rng_1u64(0, sections->v[kind].data.size); - return result; -} - -internal void * -dw_base_from_sec(DW_SectionArray *sections, DW_SectionKind kind) -{ - return sections->v[kind].data.str; -} - -//////////////////////////////// -//~ rjf: Abbrev Table - internal DW_AbbrevTable -dw_make_abbrev_table(Arena *arena, DW_SectionArray *sections, U64 abbrev_offset) +dw_make_abbrev_table(Arena *arena, String8 abbrev_data, U64 abbrev_offset) { - void *file_base = dw_base_from_sec(sections, DW_Section_Abbrev); - Rng1U64 abbrev_range = dw_range_from_sec(sections, DW_Section_Abbrev); - //- rjf: count the tags we have U64 tag_count = 0; - for(U64 abbrev_read_off = abbrev_offset - abbrev_range.min;;) + for(U64 abbrev_read_off = abbrev_offset;;) { DW_Abbrev tag; { - U64 bytes_read = dw_based_range_read_abbrev_tag(file_base, abbrev_range, abbrev_read_off, &tag); + U64 bytes_read = dw_read_abbrev_tag(abbrev_data, abbrev_read_off, &tag); abbrev_read_off += bytes_read; if(bytes_read == 0 || tag.id == 0) { @@ -495,7 +532,7 @@ dw_make_abbrev_table(Arena *arena, DW_SectionArray *sections, U64 abbrev_offset) for(;;) { DW_Abbrev attrib = {0}; - U64 bytes_read = dw_based_range_read_abbrev_attrib_info(file_base, abbrev_range, abbrev_read_off, &attrib); + U64 bytes_read = dw_read_abbrev_attrib(abbrev_data, abbrev_read_off, &attrib); abbrev_read_off += bytes_read; if(bytes_read == 0 || attrib.id == 0) { @@ -512,11 +549,13 @@ dw_make_abbrev_table(Arena *arena, DW_SectionArray *sections, U64 abbrev_offset) MemorySet(table.entries, 0, sizeof(DW_AbbrevTableEntry)*table.count); U64 tag_idx = 0; - for(U64 abbrev_read_off = abbrev_offset - abbrev_range.min;;) + for(U64 abbrev_read_off = abbrev_offset;;) { + U64 tag_abbrev_off = abbrev_read_off; + DW_Abbrev tag; { - U64 bytes_read = dw_based_range_read_abbrev_tag(file_base, abbrev_range, abbrev_read_off, &tag); + U64 bytes_read = dw_read_abbrev_tag(abbrev_data, abbrev_read_off, &tag); abbrev_read_off += bytes_read; if(bytes_read == 0 || tag.id == 0) { @@ -527,14 +566,14 @@ dw_make_abbrev_table(Arena *arena, DW_SectionArray *sections, U64 abbrev_offset) // rjf: insert this tag into the table { table.entries[tag_idx].id = tag.id; - table.entries[tag_idx].off = tag.abbrev_range.min; + table.entries[tag_idx].off = tag_abbrev_off; tag_idx += 1; } for(;;) { DW_Abbrev attrib = {0}; - U64 bytes_read = dw_based_range_read_abbrev_attrib_info(file_base, abbrev_range, abbrev_read_off, &attrib); + U64 bytes_read = dw_read_abbrev_attrib(abbrev_data, abbrev_read_off, &attrib); abbrev_read_off += bytes_read; if(bytes_read == 0 || attrib.id == 0) { @@ -551,24 +590,15 @@ internal U64 dw_abbrev_offset_from_abbrev_id(DW_AbbrevTable table, U64 abbrev_id) { U64 abbrev_offset = max_U64; - if(table.count > 0) - { - S64 min = 0; - S64 max = (S64)table.count - 1; - while(min <= max) - { - S64 mid = (min + max) / 2; - if (abbrev_id > table.entries[mid].id) - { - min = mid + 1; - } - else if (abbrev_id < table.entries[mid].id) - { - max = mid - 1; - } - else - { - abbrev_offset = table.entries[mid].off; + if (table.count > 0) { + for (S64 l = 0, r = (S64)table.count - 1; l <= r; ) { + S64 m = l + (r - l) / 2; + if (abbrev_id > table.entries[m].id) { + l = m + 1; + } else if (abbrev_id < table.entries[m].id) { + r = m - 1; + } else { + abbrev_offset = table.entries[m].off; break; } } @@ -576,1168 +606,2108 @@ dw_abbrev_offset_from_abbrev_id(DW_AbbrevTable table, U64 abbrev_id) return abbrev_offset; } -//////////////////////////////// -//~ rjf: Miscellaneous DWARF Section Parsing - -//- rjf: .debug_ranges (DWARF V4) - -internal Rng1U64List -dw_v4_range_list_from_range_offset(Arena *arena, DW_SectionArray *sections, U64 addr_size, U64 comp_unit_base_addr, U64 range_off) +internal U64 +dw_read_form(String8 data, + U64 off, + DW_Version version, + DW_Format unit_format, + U64 address_size, + DW_FormKind form_kind, + U64 implicit_const, + DW_Form *form_out) { - void *base = dw_base_from_sec(sections, DW_Section_Ranges); - Rng1U64 rng = dw_range_from_sec(sections, DW_Section_Ranges); + U64 bytes_read = 0; + DW_Form form = {0}; - Rng1U64List list = {0}; - - U64 read_off = range_off; - U64 base_addr = comp_unit_base_addr; - - for(;read_off < rng.max;) - { - U64 v0 = 0; - U64 v1 = 0; - read_off += dw_based_range_read(base, rng, read_off, addr_size, &v0); - read_off += dw_based_range_read(base, rng, read_off, addr_size, &v1); - - //- rjf: base address entry - if((addr_size == 4 && v0 == 0xffffffff) || - (addr_size == 8 && v0 == 0xffffffffffffffff)) - { - base_addr = v1; + switch (form_kind) { + case DW_Form_Null: break; + + case DW_Form_Addr: { + bytes_read = str8_deserial_read_block(data, off, address_size, &form.addr); + } break; + case DW_Form_Block2: { + U16 size = 0; + U64 size_size = str8_deserial_read_struct(data, off, &size); + if (size_size) { + U64 block_size = str8_deserial_read_block(data, off + size_size, size, &form.block); + if (block_size) { + bytes_read = size_size + block_size; + } } - //- rjf: end-of-list entry - else if(v0 == 0 && v1 == 0) - { - break; + } break; + case DW_Form_Block4: { + U32 size = 0; + U64 size_size = str8_deserial_read_struct(data, off, &size); + if (size_size) { + U64 block_size = str8_deserial_read_block(data, off + size_size, size, &form.block); + if (block_size) { + bytes_read = size_size + block_size; + } } - //- rjf: range list entry - else - { - U64 min_addr = v0 + base_addr; - U64 max_addr = v1 + base_addr; - rng1u64_list_push(arena, &list, rng_1u64(min_addr, max_addr)); + } break; + case DW_Form_Data2: { + bytes_read = str8_deserial_read_block(data, off, sizeof(U16), &form.data); + } break; + case DW_Form_Data4: { + bytes_read = str8_deserial_read_block(data, off, sizeof(U32), &form.data); + } break; + case DW_Form_Data8: { + bytes_read = str8_deserial_read_block(data, off, sizeof(U64), &form.data); + } break; + case DW_Form_String: { + bytes_read = str8_deserial_read_cstr(data, off, &form.string); + } break; + case DW_Form_Block: { + U64 size = 0; + U64 size_size = str8_deserial_read_uleb128(data, off, &size); + if (size_size) { + U64 block_size = str8_deserial_read_block(data, off + size_size, size, &form.block); + if (block_size) { + bytes_read = size_size + block_size; + } } + } break; + case DW_Form_Block1: { + U8 size = 0; + U64 size_size = str8_deserial_read_struct(data, off, &size); + if (size_size) { + U64 block_size = str8_deserial_read_block(data, off, size, &form.block); + if (block_size == size) { + bytes_read = size_size + block_size; + } + } + } break; + case DW_Form_Data1: { + bytes_read = str8_deserial_read_block(data, off, sizeof(U8), &form.data); + } break; + case DW_Form_Flag: { + bytes_read = str8_deserial_read_struct(data, off, &form.flag); + } break; + case DW_Form_SData: { + bytes_read = str8_deserial_read_sleb128(data, off, &form.sdata); + } break; + case DW_Form_UData: { + bytes_read = str8_deserial_read_uleb128(data, off, &form.udata); + } break; + case DW_Form_RefAddr: { + if (version < DW_Version_3) { + bytes_read = str8_deserial_read(data, off, &form.ref, address_size, address_size); + } else { + bytes_read = str8_deserial_read_dwarf_uint(data, off, unit_format, &form.ref); + } + } break; + case DW_Form_GNU_RefAlt: { + bytes_read = str8_deserial_read_dwarf_uint(data, off, unit_format, &form.ref); + } break; + case DW_Form_Ref1: { + bytes_read = str8_deserial_read(data, off, &form.ref, 1, 1); + } break; + case DW_Form_Ref2: { + bytes_read = str8_deserial_read(data, off, &form.ref, 2, 2); + } break; + case DW_Form_Ref4: { + bytes_read = str8_deserial_read(data, off, &form.ref, 4, 4); + } break; + case DW_Form_Ref8: { + bytes_read = str8_deserial_read(data, off, &form.ref, 8, 8); + } break; + case DW_Form_RefUData: { + bytes_read = str8_deserial_read_uleb128(data, off, &form.ref); + } break; + case DW_Form_SecOffset: + case DW_Form_LineStrp: + case DW_Form_GNU_StrpAlt: + case DW_Form_Strp: { + bytes_read = str8_deserial_read_dwarf_uint(data, off, unit_format, &form.sec_offset); + } break; + case DW_Form_ExprLoc: { + U64 expr_size = 0; + U64 expr_size_size = str8_deserial_read_uleb128(data, off, &expr_size); + if (expr_size_size) { + if (str8_deserial_read_block(data, off + expr_size_size, expr_size, &form.exprloc)) { + bytes_read = expr_size_size + expr_size; + } + } + } break; + case DW_Form_FlagPresent: { + form.flag = 1; + } break; + case DW_Form_RefSig8: { + //U64 ref = 0; + //bytes_read = str8_deserial_read_struct(data, off, &ref); + NotImplemented; + } break; + case DW_Form_Addrx: + case DW_Form_RngListx: + case DW_Form_Strx: { + bytes_read = str8_deserial_read_uleb128(data, off, &form.xval); + } break; + case DW_Form_RefSup4: { + //U32 ref_sup4 = 0; + //bytes_read = str8_deserial_read_struct(data, off, &ref_sup4); + NotImplemented; + } break; + case DW_Form_StrpSup: { + bytes_read = str8_deserial_read_dwarf_uint(data, off, unit_format, &form.strp_sup); + } break; + case DW_Form_Data16: { + bytes_read = str8_deserial_read_block(data, off, 16, &form.data); + } break; + case DW_Form_ImplicitConst: { + // Special case. + // Unlike other forms that have their values stored in the .debug_info section, + // This one defines it's value in the .debug_abbrev section. + form.implicit_const = implicit_const; + } break; + case DW_Form_LocListx: { + bytes_read = str8_deserial_read_uleb128(data, off, &form.xval); + } break; + case DW_Form_RefSup8: { + NotImplemented; + } break; + case DW_Form_Strx1: { + bytes_read = str8_deserial_read(data, off, &form.xval, 1, 1); + } break; + case DW_Form_Strx2: { + bytes_read = str8_deserial_read(data, off, &form.xval, 2, 2); + } break; + case DW_Form_Strx3: { + bytes_read = str8_deserial_read(data, off, &form.xval, 3, 3); + } break; + case DW_Form_Strx4: { + bytes_read = str8_deserial_read(data, off, &form.xval, 4, 4); + } break; + case DW_Form_Addrx1: { + bytes_read = str8_deserial_read(data, off, &form.xval, 1, 1); + } break; + case DW_Form_Addrx2: { + bytes_read = str8_deserial_read(data, off, &form.xval, 2, 2); + } break; + case DW_Form_Addrx3: { + bytes_read = str8_deserial_read(data, off, &form.xval, 3, 3); + } break; + case DW_Form_Addrx4: { + bytes_read = str8_deserial_read(data, off, &form.xval, 4, 4); + } break; + default: InvalidPath; break; + } + + if (form_out) { + *form_out = form; } - return list; + return bytes_read; } -//- rjf: .debug_pubtypes + .debug_pubnames (DWARF V4) - -internal DW_PubStringsTable -dw_v4_pub_strings_table_from_section_kind(Arena *arena, DW_SectionArray *sections, DW_SectionKind section_kind) +internal U64 +dw_read_tag(Arena *arena, + String8 tag_data, + U64 tag_off, + U64 tag_base, + DW_AbbrevTable abbrev_table, + String8 abbrev_data, + DW_Version version, + DW_Format unit_format, + U64 address_size, + DW_Tag *tag_out) { - Temp scratch = scratch_begin(&arena, 1); + U64 tag_cursor = tag_off; - DW_PubStringsTable names_table = {0}; - - // TODO(rjf): Arbitrary choice. - names_table.size = 16384; - names_table.buckets = push_array(arena, DW_PubStringsBucket*, names_table.size); - - void *base = dw_base_from_sec(sections, section_kind); - Rng1U64 rng = dw_range_from_sec(sections, section_kind); - DW_Mode mode = sections->v[section_kind].mode; - U64 off_size = dw_offset_size_from_mode(mode); - U64 cursor = 0; - - U64 table_length = 0; - U16 unit_version = 0; - U64 cu_info_off = 0; - U64 cu_info_len = 0; - cursor += dw_based_range_read_length(base, rng, cursor, &table_length); - cursor += dw_based_range_read_struct(base, rng, cursor, &unit_version); - cursor += dw_based_range_read(base, rng, cursor, off_size, &cu_info_off); - cursor += dw_based_range_read_length(base, rng, cursor, &cu_info_len); - - for(;;) - { - U64 info_off = 0; - { - U64 bytes_read = dw_based_range_read(base, rng, cursor, off_size, &info_off); - cursor += bytes_read; - if(bytes_read == 0) - { + // read tag abbrev id + U64 tag_abbrev_id = 0; + U64 tag_abbrev_id_size = str8_deserial_read_uleb128(tag_data, tag_cursor, &tag_abbrev_id); + Assert(tag_abbrev_id_size); + tag_cursor += tag_abbrev_id_size; + + // read tag abbrev + U64 abbrev_cursor = dw_abbrev_offset_from_abbrev_id(abbrev_table, tag_abbrev_id); + DW_Abbrev tag_abbrev = {0}; + U64 tag_abbrev_size = dw_read_abbrev_tag(abbrev_data, abbrev_cursor, &tag_abbrev); + + // read attribs + DW_AttribList attribs = {0}; + if (tag_abbrev_size > 0) { + abbrev_cursor += tag_abbrev_size; + + for (; tag_cursor < tag_data.size && abbrev_cursor < abbrev_data.size; ) { + U64 attrib_tag_cursor = tag_cursor; + U64 attrib_abbrev_off = abbrev_cursor; + + // read attrib abbrev + DW_Abbrev attrib_abbrev = {0}; + abbrev_cursor += dw_read_abbrev_attrib(abbrev_data, abbrev_cursor, &attrib_abbrev); + if (attrib_abbrev.id == 0) { break; } - } - - //- rjf: if we got a nonzero .debug_info offset, we've found a valid entry. - if(info_off != 0) - { - String8 string = dw_based_range_read_string(base, rng, cursor); - cursor += string.size + 1; - U64 hash = dw_hash_from_string(string); - U64 bucket_idx = hash % names_table.size; - - DW_PubStringsBucket *bucket = push_array(arena, DW_PubStringsBucket, 1); - bucket->next = names_table.buckets[bucket_idx]; - bucket->string = string; - bucket->info_off = info_off; - bucket->cu_info_off = cu_info_off; - names_table.buckets[bucket_idx] = bucket; - } - - //- rjf: if we did not read a proper entry in the table, we need to try to - // read the header of the next table. - else - { - U64 next_table_length = 0; - { - U64 bytes_read = dw_based_range_read_length(base, rng, cursor, &next_table_length); - if(bytes_read == 0 || next_table_length == 0) - { + DW_AttribKind attrib_kind = (DW_AttribKind)attrib_abbrev.id; + DW_FormKind form_kind = (DW_FormKind)attrib_abbrev.sub_kind; + + // special case, allows producer to embed form in .debug_info + if (form_kind == DW_Form_Indirect) { + U64 form_kind_size = str8_deserial_read_uleb128(tag_data, tag_cursor, &form_kind); + + if (form_kind_size == 0) { + Assert(!"unable to read indirect form kind"); break; } - cursor += bytes_read; + + tag_cursor += form_kind_size; } - cursor += dw_based_range_read_struct(base, rng, cursor, &unit_version); - cursor += dw_based_range_read(base, rng, cursor, off_size, &cu_info_off); - cursor += dw_based_range_read_length(base, rng, cursor, &cu_info_len); + + // read form value + DW_Form form = {0}; + tag_cursor += dw_read_form(tag_data, tag_cursor, version, unit_format, address_size, form_kind, attrib_abbrev.const_value, &form); + + // fill out node + DW_AttribNode *attrib_n = push_array(arena, DW_AttribNode, 1); + attrib_n->v.info_off = tag_base + attrib_tag_cursor; + attrib_n->v.abbrev_off = attrib_abbrev_off; + attrib_n->v.abbrev_id = attrib_abbrev.id; + attrib_n->v.attrib_kind = attrib_kind; + attrib_n->v.form_kind = form_kind; + attrib_n->v.form = form; + + // push node to list + SLLQueuePush(attribs.first, attribs.last, attrib_n); + ++attribs.count; } } - - scratch_end(scratch); - - return names_table; + + // fill out tag + tag_out->abbrev_id = tag_abbrev_id; + tag_out->has_children = !!(tag_abbrev.flags & DW_AbbrevFlag_HasChildren); + tag_out->kind = (DW_TagKind)tag_abbrev.sub_kind; + tag_out->attribs = attribs; + tag_out->info_off = tag_base + tag_off; + + U64 bytes_read = tag_cursor - tag_off; + return bytes_read; } -//- rjf: .debug_str_offsets (DWARF V5) - internal U64 -dw_v5_offset_from_offs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index) +dw_read_tag_cu(Arena *arena, DW_Input *input, DW_CompUnit *cu, U64 info_off, DW_Tag *tag_out) { - U64 result = 0; - - DW_Mode mode = sections->v[section].mode; - void *sec_base = dw_base_from_sec(sections, section); - Rng1U64 rng = dw_range_from_sec(sections, section); - U64 cursor = base; - - //- rjf: get the length of each entry - U64 entry_len = mode == DW_Mode_64Bit ? 8 : 4; - - //- rjf: parse the unit's length (not including the length itself) - U64 unit_length = 0; - cursor += dw_based_range_read_length(sec_base, rng, cursor, &unit_length); - - //- rjf: parse version - U16 version = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &version); - Assert(version == 5); // must be 5 as of V5. - - //- rjf: parse padding - U16 padding = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &padding); - Assert(padding == 0); // must be 0 as of V5. - - //- rjf: read - if (unit_length >= sizeof(U16)*2) - { - void *entries = (U8 *)sec_base + cursor; - U64 count = (unit_length - sizeof(U16)*2) / entry_len; - if(0 <= index && index < count) - { - switch(entry_len) - { - default: break; - case 4: result = ((U32 *)entries)[index]; break; - case 8: result = ((U64 *)entries)[index]; break; + String8 tag_data = str8_substr(input->sec[DW_Section_Info].data, cu->info_range); + U64 tag_off = info_off - cu->info_range.min; + return dw_read_tag(arena, tag_data, tag_off, cu->info_range.min, cu->abbrev_table, cu->abbrev_data, cu->version, cu->format, cu->address_size, tag_out); +} + +internal B32 +dw_try_u64_from_const_value(U64 type_byte_size, DW_ATE type_encoding, String8 const_value, U64 *value_out) +{ + B32 is_parsed = 0; + if (const_value.size <= type_byte_size) { + U64 value_size = Min(type_byte_size, const_value.size); + if (value_size <= sizeof(*value_out)) { + MemoryZeroStruct(value_out); + MemoryCopy(value_out, const_value.str, value_size); + if (type_encoding == DW_ATE_Signed || type_encoding == DW_ATE_SignedChar) { + *value_out = extend_sign64(*value_out, value_size); } + is_parsed = 1; + } else { + Assert(!"out value overflow"); } } - - return result; + return is_parsed; } -//- rjf: .debug_addr parsing - internal U64 -dw_v5_addr_from_addrs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index) +dw_u64_from_const_value(String8 const_value) { - U64 result = 0; - - void *sec_base = dw_base_from_sec(sections, section); - Rng1U64 rng = dw_range_from_sec(sections, section); - U64 cursor = base; - - //- rjf: parse the unit's length (not including the length itself) - U64 unit_length = 0; - cursor += dw_based_range_read_length(sec_base, rng, cursor, &unit_length); - - //- rjf: parse version - U16 version = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &version); - Assert(version == 5); // must be 5 as of V5. - - //- rjf: parse address size - U8 address_size = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &address_size); - - //- rjf: parse segment selector size - U8 segment_selector_size = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &segment_selector_size); - - //- rjf: read - U64 entry_size = address_size + segment_selector_size; - U64 count = (unit_length - sizeof(U16)*2) / entry_size; - if(0 <= index && index < count) - { - void *entry = (U8 *)dw_based_range_ptr(sec_base, rng, cursor) + entry_size*index; - Rng1U64 entry_rng = rng_1u64(0, entry_size); - U64 segment = 0; - U64 addr = 0; - dw_based_range_read(entry, entry_rng, 0, sizeof(segment), &segment); - dw_based_range_read(entry, entry_rng, segment_selector_size, sizeof(addr), &addr); - result = addr; - } - + U64 result = 0; + B32 is_converted = dw_try_u64_from_const_value(sizeof(U64), DW_ATE_Unsigned, const_value, &result); + Assert(is_converted); // TODO: error handling return result; } -//- rjf: .debug_rnglists + .debug_loclists parsing - internal U64 -dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(DW_SectionArray *sections, DW_SectionKind section_kind, U64 base, U64 index) +dw_interp_sec_offset(DW_FormKind form_kind, DW_Form form) { - // - // NOTE(rjf): This is only appropriate to call when DW_Form_RngListx is - // used to access a range list, *OR* when DW_Form_LocListx is used to - // access a location list. Otherwise, DW_Form_SecOffset is required. - // - // See the DWARF V5 spec (February 13, 2017), page 242. (rnglists) - // See the DWARF V5 spec (February 13, 2017), page 215. (loclists) - // - - U64 result = 0; - - DW_Mode mode = sections->v[section_kind].mode; - void *sec_base = dw_base_from_sec(sections, section_kind); - Rng1U64 rng = dw_range_from_sec(sections, section_kind); - U64 cursor = base; - - //- rjf: get the length of each entry - U64 entry_len = mode == DW_Mode_64Bit ? 8 : 4; - - //- rjf: parse the unit's length (not including the length itself) - U64 unit_length = 0; - cursor += dw_based_range_read_length(sec_base, rng, cursor, &unit_length); - - //- rjf: parse version - U16 version = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &version); - Assert(version == 5); // must be 5 as of V5. - - //- rjf: parse address size - U8 address_size = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &address_size); - - //- rjf: parse segment selector size - U8 segment_selector_size = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &segment_selector_size); - - //- rjf: parse offset entry count - U32 offset_entry_count = 0; - cursor += dw_based_range_read_struct(sec_base, rng, cursor, &offset_entry_count); - - //- rjf: read from offsets array - U64 table_off = cursor; - void *offsets_arr = dw_based_range_ptr(sec_base, rng, cursor); - if(0 <= index && index < (U64)offset_entry_count) - { - U64 rnglist_offset = 0; - switch(entry_len) - { - default: break; - case 4: rnglist_offset = ((U32 *)offsets_arr)[index]; break; - case 8: rnglist_offset = ((U64 *)offsets_arr)[index]; break; - } - result = rnglist_offset+table_off; + U64 sec_offset = 0; + if (form_kind == DW_Form_SecOffset) { + sec_offset = form.sec_offset; + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); } - - return result; -} - -internal Rng1U64List -dw_v5_range_list_from_rnglist_offset(Arena *arena, DW_SectionArray *sections, DW_SectionKind section, U64 addr_size, U64 addr_section_base, U64 offset) -{ - Rng1U64List list = {0}; - - void *base = dw_base_from_sec(sections, section); - Rng1U64 rng = dw_range_from_sec(sections, section); - U64 cursor = offset; - - U64 base_addr = 0; - - for(B32 done = 0; !done;) - { - U8 kind8 = 0; - cursor += dw_based_range_read_struct(base, rng, cursor, &kind8); - DW_RngListEntryKind kind = (DW_RngListEntryKind)kind8; - - switch(kind) - { - //- rjf: can be used in split and non-split units: - default: - case DW_RngListEntryKind_EndOfList: - { - done = 1; - } break; - - case DW_RngListEntryKind_BaseAddressX: - { - U64 base_addr_idx = 0; - cursor += dw_based_range_read_uleb128(base, rng, cursor, &base_addr_idx); - base_addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, addr_section_base, base_addr_idx); - } break; - - case DW_RngListEntryKind_StartxEndx: - { - U64 start_addr_idx = 0; - U64 end_addr_idx = 0; - cursor += dw_based_range_read_uleb128(base, rng, cursor, &start_addr_idx); - cursor += dw_based_range_read_uleb128(base, rng, cursor, &end_addr_idx); - U64 start_addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, addr_section_base, start_addr_idx); - U64 end_addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, addr_section_base, end_addr_idx); - rng1u64_list_push(arena, &list, rng_1u64(start_addr, end_addr)); - } break; - - case DW_RngListEntryKind_StartxLength: - { - U64 start_addr_idx = 0; - U64 length = 0; - cursor += dw_based_range_read_uleb128(base, rng, cursor, &start_addr_idx); - cursor += dw_based_range_read_uleb128(base, rng, cursor, &length); - U64 start_addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, addr_section_base, start_addr_idx); - U64 end_addr = start_addr + length; - rng1u64_list_push(arena, &list, rng_1u64(start_addr, end_addr)); - } break; - - case DW_RngListEntryKind_OffsetPair: - { - U64 start_offset = 0; - U64 end_offset = 0; - cursor += dw_based_range_read_uleb128(base, rng, cursor, &start_offset); - cursor += dw_based_range_read_uleb128(base, rng, cursor, &end_offset); - rng1u64_list_push(arena, &list, rng_1u64(start_offset + base_addr, end_offset + base_addr)); - } break; - - //- rjf: non-split units only: - - case DW_RngListEntryKind_BaseAddress: - { - U64 new_base_addr = 0; - cursor += dw_based_range_read(base, rng, cursor, addr_size, &new_base_addr); - base_addr = new_base_addr; - } break; - - case DW_RngListEntryKind_StartEnd: - { - U64 start = 0; - U64 end = 0; - cursor += dw_based_range_read(base, rng, cursor, addr_size, &start); - cursor += dw_based_range_read(base, rng, cursor, addr_size, &end); - rng1u64_list_push(arena, &list, rng_1u64(start, end)); - } break; - - case DW_RngListEntryKind_StartLength: - { - U64 start = 0; - U64 length = 0; - cursor += dw_based_range_read(base, rng, cursor, addr_size, &start); - cursor += dw_based_range_read_uleb128(base, rng, cursor, &length); - rng1u64_list_push(arena, &list, rng_1u64(start, start+length)); - } break; - } - } - - return list; -} - -//////////////////////////////// -//~ rjf: Attrib Value Parsing - -internal DW_AttribValueResolveParams -dw_attrib_value_resolve_params_from_comp_root(DW_CompRoot *root) -{ - DW_AttribValueResolveParams params = {0}; - params.version = root->version; - params.language = root->language; - params.addr_size = root->address_size; - params.containing_unit_info_off = root->info_off; - params.debug_addrs_base = root->addrs_base; - params.debug_rnglists_base = root->rnglist_base; - params.debug_str_offs_base = root->stroffs_base; - params.debug_loclists_base = root->loclist_base; - return params; -} - -internal DW_AttribValue -dw_attrib_value_from_form_value(DW_SectionArray *sections, - DW_AttribValueResolveParams resolve_params, - DW_FormKind form_kind, - DW_AttribClass value_class, - DW_AttribValue form_value) -{ - DW_AttribValue value = {0}; - - //~ rjf: DWARF V5 value parsing - - //- rjf: (DWARF V5 ONLY) the form value is storing an address index (ADDRess indeX), which we - // must resolve to an actual address using the containing comp unit's contribution to the - // .debug_addr section. - if(resolve_params.version >= DW_Version_5 && - value_class == DW_AttribClass_Address && - (form_kind == DW_Form_Addrx || form_kind == DW_Form_Addrx1 || - form_kind == DW_Form_Addrx2 || form_kind == DW_Form_Addrx3 || - form_kind == DW_Form_Addrx4)) - { - U64 addr_index = form_value.v[0]; - U64 addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, resolve_params.debug_addrs_base, addr_index); - value.v[0] = addr; - } - //- rjf: (DWARF V5 ONLY) lookup into the .debug_loclists section via an index - else if(resolve_params.version >= DW_Version_5 && - value_class == DW_AttribClass_LocList && - form_kind == DW_Form_LocListx) - { - U64 loclist_index = form_value.v[0]; - U64 loclist_offset = dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(sections, DW_Section_LocLists, resolve_params.debug_loclists_base, loclist_index); - value.section = DW_Section_LocLists; - value.v[0] = loclist_offset; - } - //- rjf: (DWARF V5 ONLY) lookup into the .debug_loclists section via an offset - else if(resolve_params.version >= DW_Version_5 && - (value_class == DW_AttribClass_LocList || value_class == DW_AttribClass_LocListPtr) && - form_kind == DW_Form_SecOffset) - { - U64 loclist_offset = form_value.v[0]; - value.section = DW_Section_LocLists; - value.v[0] = loclist_offset; - } - //- rjf: (DWARF V5 ONLY) lookup into the .debug_rnglists section via an index - else if(resolve_params.version >= DW_Version_5 && - (value_class == DW_AttribClass_RngListPtr || value_class == DW_AttribClass_RngList) && - form_kind == DW_Form_RngListx) - { - U64 rnglist_index = form_value.v[0]; - U64 rnglist_offset = dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(sections, DW_Section_RngLists, resolve_params.debug_rnglists_base, rnglist_index); - value.section = DW_Section_RngLists; - value.v[0] = rnglist_offset; - } - //- rjf: (DWARF V5 ONLY) lookup into the .debug_rnglists section via an offset - else if(resolve_params.version >= DW_Version_5 && - (value_class == DW_AttribClass_RngListPtr || value_class == DW_AttribClass_RngList) && - form_kind != DW_Form_RngListx) - { - U64 rnglist_offset = form_value.v[0]; - value.section = DW_Section_RngLists; - value.v[0] = rnglist_offset; - } - //- rjf: (DWARF V5 ONLY) .debug_str_offsets table index, that we need to resolve - // using the containing compilation unit's contribution to the section - else if(resolve_params.version >= DW_Version_5 && - value_class == DW_AttribClass_String && - (form_kind == DW_Form_Strx || - form_kind == DW_Form_Strx1 || - form_kind == DW_Form_Strx2 || - form_kind == DW_Form_Strx3 || - form_kind == DW_Form_Strx4)) - { - DW_SectionKind section = DW_Section_Str; - U64 str_index = form_value.v[0]; - U64 str_offset = dw_v5_offset_from_offs_section_base_index(sections, DW_Section_StrOffsets, resolve_params.debug_str_offs_base, str_index); - void *base = dw_base_from_sec(sections, section); - Rng1U64 range = dw_range_from_sec(sections, section); - String8 string = dw_based_range_read_string(base, range, str_offset); - value.section = section; - value.v[0] = str_offset; - value.v[1] = value.v[0] + string.size; - } - //- rjf: (DWARF V5 ONLY) reference that we should resolve through ref_addr_desc - else if(resolve_params.version >= DW_Version_5 && - value_class == DW_AttribClass_Reference && - form_kind == DW_Form_RefAddr) - { - // TODO(nick): DWARF 5 @dwarf_v5 - } - //- TODO(rjf): (DWARF V5 ONLY) reference resolution using the .debug_names section - else if(resolve_params.version >= DW_Version_5 && - form_kind == DW_Form_RefSig8) - { - // TODO(nick): DWARF 5: We need to handle .debug_names section in order to resolve this value. @dwarf_v5 - value.v[0] = max_U64; - } - - //~ rjf: All other value parsing (DWARF V4 and below) - - //- rjf: reference to an offset relative to the compilation unit's info base - else if (value_class == DW_AttribClass_Reference && - (form_kind == DW_Form_Ref1 || - form_kind == DW_Form_Ref2 || - form_kind == DW_Form_Ref4 || - form_kind == DW_Form_Ref8 || - form_kind == DW_Form_RefUData)) - { - value.v[0] = resolve_params.containing_unit_info_off + form_value.v[0]; - } - - //- rjf: info-section string -- this is a string that is just pasted straight - // into the .debug_info section - else if(value_class == DW_AttribClass_String && form_kind == DW_Form_String) - { - value = form_value; - value.section = DW_Section_Info; - } - - //- rjf: string-section string -- this is a string that's inside the .debug_str - // section, and we've been provided an offset to it - else if(value_class == DW_AttribClass_String && - (form_kind == DW_Form_Strp || - form_kind == DW_Form_StrpSup)) - { - - DW_SectionKind section = DW_Section_Str; - void *base = dw_base_from_sec(sections, section); - Rng1U64 range = dw_range_from_sec(sections, section); - String8 string = dw_based_range_read_string(base, range, form_value.v[0]); - value.section = section; - value.v[0] = form_value.v[0]; - value.v[1] = value.v[0] + string.size; - } - //- rjf: line-string - else if(value_class == DW_AttribClass_String && form_kind == DW_Form_LineStrp) - { - DW_SectionKind section = DW_Section_LineStr; - void *base = dw_base_from_sec(sections, section); - Rng1U64 range = dw_range_from_sec(sections, section); - String8 string = dw_based_range_read_string(base, range, form_value.v[0]); - value.section = section; - value.v[0] = form_value.v[0]; - value.v[1] = value.v[0] + string.size; - } - //- rjf: .debug_ranges - else if(resolve_params.version < DW_Version_5 && - (value_class == DW_AttribClass_RngListPtr || value_class == DW_AttribClass_RngList) && - (form_kind == DW_Form_SecOffset)) - { - U64 ranges_offset = form_value.v[0]; - value.section = DW_Section_Ranges; - value.v[0] = ranges_offset; - } - //- rjf: .debug_loc - else if(resolve_params.version < DW_Version_5 && - (value_class == DW_AttribClass_LocListPtr || value_class == DW_AttribClass_LocList) && - (form_kind == DW_Form_SecOffset)) - { - U64 offset = form_value.v[0]; - value.section = DW_Section_Loc; - value.v[0] = offset; - } - //- rjf: invalid attribute class - else if(value_class == 0) - { - Assert(!"attribute class was not resolved"); - } - //- rjf: in all other cases, we can accept the form_value as the correct - // representation for the parsed value, so we can just copy it over. - else - { - value = form_value; - } - - return value; + return sec_offset; } internal String8 -dw_string_from_attrib_value(DW_SectionArray *sections, DW_AttribValue value) +dw_interp_exprloc(DW_FormKind form_kind, DW_Form form) { - DW_SectionKind section_kind = value.section; - void *base = dw_base_from_sec(sections, section_kind); - Rng1U64 range = dw_range_from_sec(sections, section_kind); + String8 expr = {0}; + if (form_kind == DW_Form_ExprLoc) { + expr = form.exprloc; + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); + } + return expr; +} +internal U128 +dw_interp_const_u128(DW_FormKind form_kind, DW_Form form) +{ + AssertAlways(form.data.size <= sizeof(U128)); + U128 result = {0}; + MemoryCopy(&result.u64[0], form.data.str, form.data.size); + return result; +} + +internal U64 +dw_interp_const64(U64 type_byte_size, DW_ATE type_encoding, DW_FormKind form_kind, DW_Form form) +{ + U64 result = max_U64; + if (form_kind == DW_Form_Data1 || form_kind == DW_Form_Data2 || form_kind == DW_Form_Data4 || form_kind == DW_Form_Data16) { + if (form.data.size <= sizeof(result)) { + if (!dw_try_u64_from_const_value(type_byte_size, type_encoding, form.data, &result)) { + Assert(!"unable to decode data"); + } + } else { + Assert(!"unable to cast U128 to U64"); + } + } else if (form_kind == DW_Form_UData) { + result = form.udata; + } else if (form_kind == DW_Form_SData) { + result = form.sdata; + } else if (form_kind == DW_Form_ImplicitConst) { + result = form.implicit_const; + } else if (form_kind == DW_Form_Null) { + // skip + } else { + AssertAlways(!"unexpected form"); + } + return result; +} + +internal U64 +dw_interp_const_u64(DW_FormKind form_kind, DW_Form form) +{ + return dw_interp_const64(DW_ATE_Unsigned, sizeof(U64), form_kind, form); +} + +internal U32 +dw_interp_const_u32(DW_FormKind form_kind, DW_Form form) +{ + U64 const64 = dw_interp_const_u64(form_kind, form); + U32 const32 = safe_cast_u32(const64); + return const32; +} + +internal S64 +dw_interp_const_s64(DW_FormKind form_kind, DW_Form form) +{ + U64 const_u64 = dw_interp_const_u64(form_kind, form); + S64 const_s64 = (S64)const_u64; + return const_s64; +} + +internal S32 +dw_interp_const_s32(DW_FormKind form_kind, DW_Form form) +{ + U32 const_u32 = dw_interp_const_u32(form_kind, form); + S32 const_s32 = (S32)const_u32; + return const_s32; +} + +internal U64 +dw_interp_address(U64 address_size, U64 base_addr, DW_ListUnit *addr_lu, DW_FormKind form_kind, DW_Form form) +{ + U64 address = 0; + if (form_kind == DW_Form_Addr) { + if (!dw_try_u64_from_const_value(address_size, DW_ATE_Address, form.addr, &address)) { + AssertAlways(!"unable to decode address"); + } + } else if (form_kind == DW_Form_Addrx || form_kind == DW_Form_Addrx1 || form_kind == DW_Form_Addrx2 || + form_kind == DW_Form_Addrx3 || form_kind == DW_Form_Addrx4) { + address = dw_addr_from_list_unit(addr_lu, form.xval); + } else if (form_kind == DW_Form_SecOffset) { + if (addr_lu->segment_selector_size > 0) { + AssertAlways(!"TODO: support for segmented address space"); + } + if (form.sec_offset + addr_lu->segment_selector_size + addr_lu->address_size <= addr_lu->entries.size) { + MemoryCopy(&address, addr_lu->entries.str + form.sec_offset, addr_lu->address_size); + } else { + Assert(!"out of bounds .debug_addr offset"); + } + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); + } + return address; +} + +internal String8 +dw_interp_block(DW_Input *input, DW_CompUnit *cu, DW_FormKind form_kind, DW_Form form) +{ + NotImplemented; + return str8_zero(); +} + +internal String8 +dw_interp_string(DW_Input *input, + DW_Format unit_format, + DW_ListUnit *str_offsets, + DW_FormKind form_kind, + DW_Form form) +{ String8 string = {0}; - string.str = (U8 *)dw_based_range_ptr(base, range, value.v[0]); - string.size = value.v[1] - value.v[0]; + if (form_kind == DW_Form_String) { + string = form.string; + } else if (form_kind == DW_Form_Strp) { + U64 bytes_read = str8_deserial_read_cstr(input->sec[DW_Section_Str].data, form.sec_offset, &string); + Assert(bytes_read > 0); + } else if (form_kind == DW_Form_LineStrp) { + U64 bytes_read = str8_deserial_read_cstr(input->sec[DW_Section_LineStr].data, form.sec_offset, &string); + Assert(bytes_read > 0); + } else if (form_kind == DW_Form_StrpSup) { + U64 bytes_read = str8_deserial_read_cstr(input->sec[DW_Section_Str].data, form.strp_sup, &string); + Assert(bytes_read > 0); + } else if (form_kind == DW_Form_Strx || form_kind == DW_Form_Strx1 || + form_kind == DW_Form_Strx2 || form_kind == DW_Form_Strx3 || + form_kind == DW_Form_Strx4) { + U64 sec_offset = dw_offset_from_list_unit(str_offsets, form.xval); + if (sec_offset < input->sec[DW_Section_Str].data.size) { + U64 bytes_read = str8_deserial_read_cstr(input->sec[DW_Section_Str].data, sec_offset, &string); + Assert(bytes_read > 0); + } else { + AssertAlways(!"unable to translate index to offset"); + } + } else if (form_kind == DW_Form_GNU_StrpAlt) { + NotImplemented; + } else if (form_kind == DW_Form_GNU_StrIndex) { + NotImplemented; + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); + } return string; } -internal Rng1U64List -dw_range_list_from_high_low_pc_and_ranges_attrib_value(Arena *arena, DW_SectionArray *sections, U64 address_size, U64 comp_unit_base_addr, U64 addr_section_base, U64 low_pc, U64 high_pc, DW_AttribValue ranges_value) +internal String8 +dw_interp_line_ptr(DW_Input *input, DW_FormKind form_kind, DW_Form form) { - Rng1U64List list = {0}; - switch(ranges_value.section) - { - //- rjf: (DWARF V5 ONLY) .debug_rnglists offset - case DW_Section_RngLists: - { - list = dw_v5_range_list_from_rnglist_offset(arena, sections, ranges_value.section, address_size, addr_section_base, ranges_value.v[0]); - } break; - - //- rjf: (DWARF V4 and earlier) .debug_ranges parsing - case DW_Section_Ranges: - { - list = dw_v4_range_list_from_range_offset(arena, sections, address_size, comp_unit_base_addr, ranges_value.v[0]); - } break; - - //- rjf: fall back to trying to use low/high PCs - default: - { - rng1u64_list_push(arena, &list, rng_1u64(low_pc, high_pc)); - } break; - } - return list; -} - -//////////////////////////////// -//~ rjf: Tag Parsing - -internal DW_AttribListParseResult -dw_parse_attrib_list_from_info_abbrev_offsets(Arena *arena, - DW_SectionArray *sections, - DW_Version ver, - DW_Ext ext, - DW_Language lang, - U64 address_size, - U64 info_off, - U64 abbrev_off, - B32 relaxed) -{ - //- rjf: set up prereqs - DW_Mode info_mode = sections->v[DW_Section_Info].mode; - DW_Mode abbrev_mode = sections->v[DW_Section_Abbrev].mode; - void *info_base = dw_base_from_sec(sections, DW_Section_Info); - void *abbrev_base = dw_base_from_sec(sections, DW_Section_Abbrev); - Rng1U64 info_range = dw_range_from_sec(sections, DW_Section_Info); - Rng1U64 abbrev_range = dw_range_from_sec(sections, DW_Section_Abbrev); - - //- rjf: set up read offsets - U64 info_read_off = info_off; - U64 abbrev_read_off = abbrev_off; - - //- rjf: parse all attributes - DW_AttribListParseResult result = {0}; - for(B32 good_abbrev = 1; good_abbrev;) - { - U64 attrib_info_offset = info_read_off; - - //- rjf: parse abbrev attrib info - DW_Abbrev abbrev = {0}; - { - U64 bytes_read = dw_based_range_read_abbrev_attrib_info(abbrev_base, abbrev_range, abbrev_read_off, &abbrev); - abbrev_read_off += bytes_read; - good_abbrev = abbrev.id != 0; - } - - //- rjf: extract attrib info from abbrev - DW_AttribKind attrib_kind = (DW_AttribKind)abbrev.id; - DW_FormKind form_kind = (DW_FormKind)abbrev.sub_kind; - DW_AttribClass attrib_class = dw_pick_attrib_value_class(ver, ext, lang, relaxed, attrib_kind, form_kind); - - //- rjf: parse the form value from the file - DW_AttribValue form_value = {0}; - if(good_abbrev) - { - // Special case form that allows user to encode attribute form in .debug_info - if(form_kind == DW_Form_Indirect) - { - U64 override_form_kind = 0; - info_read_off += dw_based_range_read_uleb128(info_base, info_range, info_read_off, &override_form_kind); - form_kind = (DW_FormKind)override_form_kind; - } - U64 bytes_read = dw_based_range_read_attrib_form_value(info_base, info_range, info_read_off, info_mode, address_size, - form_kind, abbrev.const_value, &form_value); - info_read_off += bytes_read; - } - - //- rjf: push this parsed attrib to the list - if(good_abbrev) - { - DW_AttribNode *node = push_array(arena, DW_AttribNode, 1); - node->attrib.info_off = attrib_info_offset; - node->attrib.abbrev_id = abbrev.id; - node->attrib.attrib_kind = attrib_kind; - node->attrib.form_kind = form_kind; - node->attrib.value_class = attrib_class; - node->attrib.form_value = form_value; - result.attribs.count += 1; - SLLQueuePush(result.attribs.first, result.attribs.last, node); - } - } - - result.max_info_off = info_read_off; - result.max_abbrev_off = abbrev_read_off; - return result; -} - -internal DW_Tag * -dw_tag_from_info_offset(Arena *arena, - DW_SectionArray *sections, - DW_AbbrevTable abbrev_table, - DW_Version ver, - DW_Ext ext, - DW_Language lang, - U64 address_size, - U64 info_offset, - B32 relaxed) -{ - void *info_base = dw_base_from_sec(sections, DW_Section_Info); - Rng1U64 info_range = dw_range_from_sec(sections, DW_Section_Info); - void *abbrev_base = dw_base_from_sec(sections, DW_Section_Abbrev); - Rng1U64 abbrev_range = dw_range_from_sec(sections, DW_Section_Abbrev); - - DW_Tag *tag = push_array(arena, DW_Tag, 1); - - //- rjf: calculate .debug_info read cursor, relative to info range minimum - U64 info_read_off = info_offset - info_range.min; - - //- rjf: read abbrev ID - U64 abbrev_id = 0; - info_read_off += dw_based_range_read_uleb128(info_base, info_range, info_read_off, &abbrev_id); - B32 good_abbrev_id = abbrev_id != 0; - - //- rjf: figure out abbrev offset for this ID - U64 abbrev_offset = 0; - if(good_abbrev_id) - { - abbrev_offset = dw_abbrev_offset_from_abbrev_id(abbrev_table, abbrev_id); - } - - //- rjf: calculate .debug_abbrev read cursor, relative to abbrev range minimum - U64 abbrev_read_off = abbrev_offset - abbrev_range.min; - - //- rjf: parse abbrev tag info - DW_Abbrev abbrev_tag_info = {0}; - B32 good_tag_abbrev = 0; - if(good_abbrev_id) - { - abbrev_read_off += dw_based_range_read_abbrev_tag(abbrev_base, abbrev_range, abbrev_read_off, &abbrev_tag_info); - good_tag_abbrev = 1;//abbrev_tag_info.id != 0; - } - - //- rjf: parse all attributes for this tag - U64 attribs_info_off = 0; - U64 attribs_abbrev_off = 0; - DW_AttribList attribs = {0}; - if(good_tag_abbrev) - { - DW_AttribListParseResult attribs_parse = dw_parse_attrib_list_from_info_abbrev_offsets(arena, sections, ver, ext, lang, address_size, info_read_off, abbrev_read_off, relaxed); - attribs_info_off = info_read_off; - attribs_abbrev_off = abbrev_read_off; - info_read_off = attribs_parse.max_info_off; - abbrev_read_off = attribs_parse.max_abbrev_off; - attribs = attribs_parse.attribs; - } - - //- rjf: fill tag - { - tag->abbrev_id = abbrev_id; - tag->info_range = rng_1u64(info_offset, info_range.min + info_read_off); - tag->abbrev_range = rng_1u64(abbrev_offset, abbrev_range.min + abbrev_read_off); - tag->has_children = !!(abbrev_tag_info.flags & DW_AbbrevFlag_HasChildren); - tag->kind = (DW_TagKind)abbrev_tag_info.sub_kind; - tag->attribs_info_off = attribs_info_off; - tag->attribs_abbrev_off = attribs_abbrev_off; - tag->attribs = attribs; - } - - return tag; -} - -//////////////////////////////// - -internal U64 -dw_v5_header_offset_from_table_offset(DW_SectionArray *sections, DW_SectionKind section, U64 table_off) -{ - // NOTE(rjf): From the DWARF V5 spec (February 13, 2017), page 401: - // - // " - // Each skeleton compilation unit also has a DW_AT_addr_base attribute, - // which provides the relocated offset to that compilation unit’s - // contribution in the executable’s .debug_addr section. Unlike the - // DW_AT_stmt_list attribute, the offset refers to the first address table - // slot, not to the section header. In this example, we see that the first - // address (slot 0) from demo1.o begins at offset 48. Because the - // .debug_addr section contains an 8-byte header, the object file’s - // contribution to the section actually begins at offset 40 (for a 64-bit - // DWARF object, the header would be 16 bytes long, and the value for the - // DW_AT_addr_base attribute would then be 56). All attributes in demo1.dwo - // that use DW_FORM_addrx, DW_FORM_addrx1, DW_FORM_addrx2, DW_FORM_addrx3 - // or DW_FORM_addrx4 would then refer to address table slots relative to - // that offset. Likewise, the .debug_addr contribution from demo2.dwo begins - // at offset 72, and its first address slot is at offset 80. Because these - // contributions have been processed by the linker, they contain relocated - // values for the addresses in the program that are referred to by the - // debug information. - // " - // - // This seems to at least partially explain why the addr_base is showing up - // 8 bytes later than we are expecting it to. We can't actually just store - // the base that we read from the DW_Attrib_AddrBase attrib, because - // it's showing up *after* the header, so we need to bump it back. - - // NOTE(rjf): From the DWARF V5 spec (February 13, 2017), page 66: - // - // " - // A DW_AT_rnglists_base attribute, whose value is of class rnglistsptr. This - // attribute points to the beginning of the offsets table (immediately - // following the header) of the compilation unit's contribution to the - // .debug_rnglists section. References to range lists (using DW_FORM_rnglistx) - // within the compilation unit are interpreted relative to this base. - // " - // - // Similarly, we need to figure out where to go to parse the header. - - U64 max_header_size = 0; - U64 min_header_size = 0; - switch(section) - { - default: - case DW_Section_Addr: - { - max_header_size = 16; - min_header_size = 8; - } break; - case DW_Section_StrOffsets: - { - max_header_size = 16; - min_header_size = 8; - } break; - case DW_Section_RngLists: - { - max_header_size = 20; - min_header_size = 12; - } break; - case DW_Section_LocLists: - { - // TODO(rjf) - NotImplemented; - } break; - } - - U64 past_header = table_off; - void *addr_base = dw_base_from_sec(sections, section); - Rng1U64 addr_rng = dw_range_from_sec(sections, section); - - //- rjf: figure out which sized header we have - U64 header_size = 0; - { - // rjf: try max header, and if it works, the header is the max size, otherwise we will - // need to rely on the min header size - U32 first32 = 0; - dw_based_range_read_struct(addr_base, addr_rng, past_header-max_header_size, &first32); - if(first32 == max_U32) - { - header_size = max_header_size; - } - else - { - header_size = min_header_size; - } - } - - return table_off - header_size; -} - -internal Rng1U64List -dw_comp_unit_ranges_from_info(Arena *arena, DW_Section info) -{ - Rng1U64List result = {0}; - void *base = info.data.str; - Rng1U64 range = rng_1u64(0, info.data.size); - for(U64 cursor = 0; cursor < info.data.size; ) - { - // read unit length - U64 unit_length = 0; - U64 bytes_read = dw_based_range_read_length(base, range, cursor, &unit_length); - - // was read ok? - if(bytes_read == 0) - { - break; - } - - // push unit range - rng1u64_list_push(arena, &result, rng_1u64(cursor, cursor+unit_length+bytes_read)); - - // advance - cursor += unit_length+bytes_read; + String8 result = {0}; + if (form_kind == DW_Form_SecOffset) { + result = str8_skip(input->sec[DW_Section_Line].data, form.sec_offset); + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); } return result; } -internal DW_Ext -dw_ext_from_params(String8 producer, Arch arch, ImageType image_type) +internal DW_LineFile * +dw_interp_file(DW_LineVMHeader *line_vm, DW_FormKind form_kind, DW_Form form) { - DW_Ext ext = DW_Ext_Null; - switch (image_type) { - case Image_Null: break; - case Image_CoffPe: { - if (str8_match_lit("clang", producer, StringMatchFlag_RightSideSloppy|StringMatchFlag_CaseInsensitive)) { - ext = DW_Ext_GNU | DW_Ext_LLVM; - } else if (str8_match_lit("GNU", producer, StringMatchFlag_RightSideSloppy|StringMatchFlag_CaseInsensitive)) { - ext = DW_Ext_GNU; - } - } break; - case Image_Elf32: - case Image_Elf64: { - if (str8_match_lit("clang", producer, StringMatchFlag_RightSideSloppy|StringMatchFlag_CaseInsensitive)) { - ext = DW_Ext_GNU | DW_Ext_LLVM; - } else if (str8_match_lit("GNU", producer, StringMatchFlag_RightSideSloppy|StringMatchFlag_CaseInsensitive)) { - ext = DW_Ext_GNU; - } - } break; - case Image_Macho: { - if (str8_match_lit("clang", producer, StringMatchFlag_RightSideSloppy|StringMatchFlag_CaseInsensitive)) { - ext = DW_Ext_LLVM | DW_Ext_APPLE; - } else if (str8_match_lit("GNU", producer, StringMatchFlag_RightSideSloppy|StringMatchFlag_CaseInsensitive)) { - ext = DW_Ext_GNU | DW_Ext_APPLE; - } - } break; + DW_LineFile *result = 0; + U64 file_idx = dw_interp_const_u64(form_kind, form); + if (file_idx < line_vm->file_table.count) { + result = &line_vm->file_table.v[file_idx]; + } else { + Assert(!"out of bounds file index"); } - return ext; + return result; } -internal DW_CompRoot -dw_comp_root_from_range(Arena *arena, DW_SectionArray *sections, Rng1U64 range, B32 relaxed) +internal DW_Reference +dw_interp_ref(DW_Input *input, DW_CompUnit *cu, DW_FormKind form_kind, DW_Form form) { - Temp scratch = scratch_begin(&arena, 1); - - void *info_base = dw_base_from_sec(sections, DW_Section_Info); - B32 is_info_dwo = sections->v[DW_Section_Info].is_dwo; - - //- rjf: up-front known parsing offsets (yep, that's right, it's only 1!) - U64 size_off = 0; - - //- rjf: parse size of this compilation unit's data - U64 size = 0; - U64 version_off = size_off; - { - U64 bytes_read = dw_based_range_read_length(info_base, range, size_off, &size); - version_off += bytes_read; + DW_Reference ref = {0}; + if (form_kind == DW_Form_Ref1 || form_kind == DW_Form_Ref2 || + form_kind == DW_Form_Ref4 || form_kind == DW_Form_Ref8 || + form_kind == DW_Form_RefUData) { + ref.cu = cu; + ref.info_off = form.ref; + } else if (form_kind == DW_Form_RefAddr) { + NotImplemented; + } else if (form_kind == DW_Form_RefSig8) { + NotImplemented; + } else if (form_kind == DW_Form_RefSup4 || form_kind == DW_Form_RefSup8) { + NotImplemented; + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); } - - //- rjf: parse version - B32 got_version = 0; - DW_Version version = 0; - U64 unit_off = version_off; - if(dw_based_range_read_struct(info_base, range, version_off, &version)) - { - unit_off += sizeof(version); - got_version = 1; - } - - //- rjf: parse unit kind, abbrev_base, address size - B32 got_unit_kind = 0; - U64 next_off = unit_off; - DW_CompUnitKind unit_kind = DW_CompUnitKind_Reserved; - U64 abbrev_base = max_U64; - U64 address_size = 0; - U64 spec_dwo_id = 0; - if(got_version) - { - switch(version) - { - default: break; - case DW_Version_2: { - abbrev_base = 0; - next_off += dw_based_range_read(info_base, range, next_off, 4, &abbrev_base); - next_off += dw_based_range_read(info_base, range, next_off, 1, &address_size); - got_unit_kind = 1; - } break; - case DW_Version_3: - case DW_Version_4: - { - next_off += dw_based_range_read_length(info_base, range, next_off, &abbrev_base); - next_off += dw_based_range_read(info_base, range, next_off, 1, &address_size); - got_unit_kind = 1; - } break; - case DW_Version_5: - { - next_off += dw_based_range_read_struct(info_base, range, next_off, &unit_kind); - next_off += dw_based_range_read(info_base, range, next_off, 1, &address_size); - next_off += dw_based_range_read_length(info_base, range, next_off, &abbrev_base); - got_unit_kind = 1; - - //- rjf: parse DWO ID if appropriate - if(unit_kind == DW_CompUnitKind_Skeleton || is_info_dwo) - { - next_off += dw_based_range_read(info_base, range, next_off, 8, &spec_dwo_id); - } - } break; - } - } - - //- rjf: build abbrev table - DW_AbbrevTable abbrev_table = {0}; - if(got_unit_kind) - { - abbrev_table = dw_make_abbrev_table(arena, sections, abbrev_base); - } - - //- rjf: parse compilation unit's tag - B32 got_comp_unit_tag = 0; - DW_Tag *comp_unit_tag = 0; - if(got_unit_kind) - { - U64 comp_root_tag_off = range.min + next_off; - comp_unit_tag = dw_tag_from_info_offset(scratch.arena, sections, abbrev_table, version, DW_Ext_Null, DW_Language_Null, address_size, comp_root_tag_off, relaxed); - got_comp_unit_tag = 1; - } - - //- rjf: get all of the attribute values we need to start resolving attribute values - DW_AttribValueResolveParams resolve_params = { .version = version }; - if(got_comp_unit_tag) - { - for(DW_AttribNode *attrib_n = comp_unit_tag->attribs.first; attrib_n; attrib_n = attrib_n->next) - { - DW_Attrib *attrib = &attrib_n->attrib; - - // NOTE(rjf): We'll have to rely on just the form value at this point, - // since we can't use the unit yet (since we're currently in the process - // of building it). This should always be enough, otherwise there would - // be a cyclic dependency in the requirements of each part of the - // compilation unit's parse. DWARF is pretty crazy, but not *that* crazy, - // so this should be good. - switch(attrib->attrib_kind) - { - default: break; - case DW_Attrib_AddrBase: resolve_params.debug_addrs_base = attrib->form_value.v[0]; break; - case DW_Attrib_StrOffsetsBase: resolve_params.debug_str_offs_base = attrib->form_value.v[0]; break; - case DW_Attrib_RngListsBase: resolve_params.debug_rnglists_base = attrib->form_value.v[0]; break; - case DW_Attrib_LocListsBase: resolve_params.debug_loclists_base = attrib->form_value.v[0]; break; - } - } - } - - //- rjf: correct table offsets to header offsets (since DWARF V5 insists on being as useless as possible) - if(got_comp_unit_tag && version >= DW_Version_5) - { - resolve_params.debug_addrs_base = dw_v5_header_offset_from_table_offset(sections, DW_Section_Addr, resolve_params.debug_addrs_base); - resolve_params.debug_str_offs_base = dw_v5_header_offset_from_table_offset(sections, DW_Section_StrOffsets, resolve_params.debug_str_offs_base); - resolve_params.debug_loclists_base = dw_v5_header_offset_from_table_offset(sections, DW_Section_LocLists, resolve_params.debug_loclists_base); - resolve_params.debug_rnglists_base = dw_v5_header_offset_from_table_offset(sections, DW_Section_RngLists, resolve_params.debug_rnglists_base); - } - - //- rjf: parse the rest of the compilation unit tag's attributes that we'd - // like to cache - String8 name = {0}; - String8 producer = {0}; - String8 compile_dir = {0}; - String8 external_dwo_name = {0}; - String8 external_gnu_dwo_name = {0}; - U64 gnu_dwo_id = 0; - DW_Language language = 0; - U64 name_case = 0; - B32 use_utf8 = 0; - U64 low_pc = 0; - U64 high_pc = 0; - B32 high_pc_is_relative = 0; - DW_AttribValue ranges_attrib_value = {DW_Section_Null}; - U64 line_base = 0; - if(got_comp_unit_tag) - { - for(DW_AttribNode *attrib_n = comp_unit_tag->attribs.first; attrib_n; attrib_n = attrib_n->next) - { - DW_Attrib *attrib = &attrib_n->attrib; - - //- rjf: form value => value - DW_AttribValue value = {0}; - B32 good_value = 0; - { - if(dw_are_attrib_class_and_form_kind_compatible(version, attrib->value_class, attrib->form_kind)) - { - value = dw_attrib_value_from_form_value(sections, resolve_params, attrib->form_kind, attrib->value_class, attrib->form_value); - good_value = 1; - } - } - - //- rjf: map value to extracted info - if(good_value) - { - switch(attrib->attrib_kind) - { - case DW_Attrib_Name: name = dw_string_from_attrib_value(sections, value); break; - case DW_Attrib_Producer: producer = dw_string_from_attrib_value(sections, value); break; - case DW_Attrib_CompDir: compile_dir = dw_string_from_attrib_value(sections, value); break; - case DW_Attrib_DwoName: external_dwo_name = dw_string_from_attrib_value(sections, value); break; - case DW_Attrib_GNU_DwoName: external_gnu_dwo_name = dw_string_from_attrib_value(sections, value); break; - case DW_Attrib_GNU_DwoId: gnu_dwo_id = value.v[0]; break; - case DW_Attrib_Language: language = safe_cast_u32(value.v[0]); break; - case DW_Attrib_IdentifierCase: name_case = value.v[0]; break; - case DW_Attrib_UseUtf8: use_utf8 = (B32)value.v[0]; break; - case DW_Attrib_LowPc: low_pc = value.v[0]; break; - case DW_Attrib_HighPc: high_pc = value.v[0]; high_pc_is_relative = attrib->value_class != DW_AttribClass_Address; break; - case DW_Attrib_Ranges: ranges_attrib_value = value; break; - case DW_Attrib_StmtList: line_base = value.v[0]; break; - default: break; - } - } - } - } - - //- rjf: build+fill unit - DW_CompRoot unit = {0}; - - //- rjf: fill header data - unit.size = size; - unit.kind = unit_kind; - unit.version = version; - unit.address_size = address_size; - unit.abbrev_off = abbrev_base; - unit.info_off = range.min; - unit.tags_info_range = rng_1u64(range.min+next_off, range.max); - unit.abbrev_table = abbrev_table; - - //- rjf: fill out offsets we need for attrib value resolution - unit.rnglist_base = resolve_params.debug_rnglists_base; - unit.loclist_base = resolve_params.debug_loclists_base; - unit.addrs_base = resolve_params.debug_addrs_base; - unit.stroffs_base = resolve_params.debug_str_offs_base; - - //- rjf: fill out general info - unit.name = name; - unit.producer = producer; - unit.compile_dir = compile_dir; - unit.external_dwo_name = external_dwo_name.size ? external_dwo_name : external_gnu_dwo_name; - if(external_dwo_name.size) - { - unit.dwo_id = spec_dwo_id; - } - else if(external_gnu_dwo_name.size) - { - unit.dwo_id = gnu_dwo_id; - } - unit.language = language; - unit.name_case = name_case; - unit.use_utf8 = use_utf8; - unit.line_off = line_base; - unit.low_pc = low_pc; - unit.high_pc = high_pc; - unit.ranges_attrib_value = ranges_attrib_value; - unit.base_addr = unit.low_pc; - - //- rjf: fill fixup of low/high PC situation - if(high_pc_is_relative) - { - unit.high_pc += unit.low_pc; - } - - scratch_end(scratch); - return unit; -} - -internal DW_ExtDebugRef -dw_ext_debug_ref_from_comp_root(DW_CompRoot *root) -{ - DW_ExtDebugRef ref = {0}; - ref.dwo_path = root->external_dwo_name; - ref.dwo_id = root->dwo_id; return ref; } -//- rjf: line info +internal DW_LocList +dw_interp_loclist(Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_FormKind form_kind, DW_Form form) +{ + DW_LocList loclist = {0}; + + if (cu->version < DW_Version_5) { + if (form_kind == DW_Form_SecOffset) { + U64 sec_offset = max_U64; + if (form_kind == DW_Form_SecOffset) { + sec_offset = form.sec_offset; + } else if (form_kind == DW_Form_Data8 || form_kind == DW_Form_Data4 || + form_kind == DW_Form_Data2 || form_kind == DW_Form_Data1) { + if (!dw_try_u64_from_const_value(form.data.size, DW_ATE_Unsigned, form.data, &sec_offset)) { + Assert(!"unable to extract section offset"); + } + } else if (form_kind == DW_Form_Null) { + Assert(!"unexpected form"); + } + + String8 sec = str8_skip(input->sec[DW_Section_Loc].data, sec_offset); + U64 base_addr = cu->low_pc; + U64 base_sel = DW_SentinelFromSize(cu->address_size); + for (U64 cursor = 0; cursor < sec.size; ) { + U64 range_min = 0; + U64 range_min_off = cursor; + U64 range_min_size = str8_deserial_read(sec, range_min_off, &range_min, cu->address_size, cu->address_size); + if (range_min_size == 0) { + break; + } + U64 range_max = 0; + U64 range_max_off = cursor + cu->address_size; + U64 range_max_size = str8_deserial_read(sec, range_max_off, &range_max, cu->address_size, cu->address_size); + if (range_max_size == 0) { + break; + } + cursor += cu->address_size * 2; + + // series terminator + if (range_min == 0 && range_max == 0) { + break; + } + // set new base address + else if (range_min == base_sel) { + base_addr = range_max; + } + // location + else { + U16 expr_size = 0; + U64 expr_size_size = str8_deserial_read_struct(sec, cursor, &expr_size); + if (expr_size_size == 0) { + Assert(!"unable to read expression size"); + break; + } + cursor += expr_size_size; + + Assert(cursor + expr_size <= sec.size); + Rng1U64 expr_range = rng_1u64(cursor, ClampTop(cursor + expr_size, sec.size)); + + DW_LocNode *loc_n = push_array(arena, DW_LocNode, 1); + loc_n->v.range = rng_1u64(base_addr + range_min, base_addr + range_max); + loc_n->v.expr = str8_substr(sec, expr_range); + + SLLQueuePush(loclist.first, loclist.last, loc_n); + ++loclist.count; + + // advance past expression + cursor += expr_size; + } + } + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); + } + } else { + DW_Version version = DW_Version_Null; + String8 raw_lle = {0}; + if (form_kind == DW_Form_SecOffset) { + // offset is from beginning of the section + U64 sec_offset = form.sec_offset; + raw_lle = str8_skip(input->sec[DW_Section_LocLists].data, sec_offset); + } else if (form_kind == DW_Form_LocListx) { + // offset is from beginning of the entries + U64 entries_off = dw_offset_from_list_unit(cu->loclists_lu, form.xval); + raw_lle = str8_skip(cu->loclists_lu->entries, entries_off); + version = cu->loclists_lu->version; + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); + } + + for (U64 cursor = 0, keep_parsing = 1, base_addr = cu->low_pc; + cursor < raw_lle.size && keep_parsing; ) { + DW_LLE kind = DW_LLE_EndOfList; + cursor += str8_deserial_read_struct(raw_lle, cursor, &kind); + + Rng1U64 range = {0}; + switch (kind) { + default: + Assert(!"unknown kind"); + case DW_LLE_EndOfList: { + keep_parsing = 0; + } break; + case DW_LLE_BaseAddressx: { + if (!cu->addr_lu) { + keep_parsing = 0; + break; + } + + U64 addrx = 0; + U64 addrx_size = str8_deserial_read_uleb128(raw_lle, cursor, &addrx); + if (addrx_size == 0) { + keep_parsing = 0; + break; + } + + U64 base_addr_new = dw_addr_from_list_unit(cu->addr_lu, addrx); + if (base_addr_new == max_U64) { + InvalidPath; + break; + } + + base_addr = base_addr_new; + cursor += addrx_size; + } break; + case DW_LLE_StartxEndx: { + U64 start_addrx = 0; + U64 start_addrx_size = str8_deserial_read_uleb128(raw_lle, cursor, &start_addrx); + if (start_addrx_size == 0) { + keep_parsing = 0; + break; + } + U64 end_addrx = 0; + U64 end_addrx_size = str8_deserial_read_uleb128(raw_lle, cursor + start_addrx_size, &end_addrx); + if (end_addrx_size == 0) { + keep_parsing = 0; + break; + } + cursor += start_addrx_size; + cursor += end_addrx_size; + + U64 start = dw_addr_from_list_unit(cu->addr_lu, start_addrx); + U64 end = dw_addr_from_list_unit(cu->addr_lu, end_addrx); + Assert(start != max_U64); + Assert(end != max_U64); + + range = rng_1u64(start, end); + } break; + case DW_LLE_StartxLength: { + U64 start_addrx = 0; + U64 start_addrx_size = str8_deserial_read_uleb128(raw_lle, cursor, &start_addrx); + if (start_addrx_size == 0) { + keep_parsing = 0; + break; + } + + // parse pre-standard & standard length + U64 length_off = cursor + start_addrx_size; + U64 length = 0; + U64 length_size = str8_deserial_read_uleb128(raw_lle, length_off, &length); + if (length_size == 0) { + keep_parsing = 0; + break; + } + + cursor += start_addrx_size; + cursor += length_size; + + if (cu->addr_lu) { + U64 start = dw_addr_from_list_unit(cu->addr_lu, start_addrx); + Assert(start < max_U64); + + range = rng_1u64(start, start + length); + } else { + Assert(!".debug_addr section is missing -- unable to interpret address index"); + } + } break; + case DW_LLE_OffsetPair: { + U64 start = 0; + U64 start_size = str8_deserial_read_uleb128(raw_lle, cursor, &start); + if (start_size == 0) { + keep_parsing = 0; + break; + } + U64 end = 0; + U64 end_size = str8_deserial_read_uleb128(raw_lle, cursor + start_size, &end); + if (end_size == 0) { + keep_parsing = 0; + break; + } + cursor += start_size; + cursor += end_size; + + range = rng_1u64(base_addr + start, base_addr + end); + } break; + case DW_LLE_DefaultLocation: { + // no range + int x = 0; + } break; + case DW_LLE_BaseAddress: { + U64 base_addr_size = str8_deserial_read(raw_lle, cursor, &base_addr, cu->address_size, cu->address_size); + if (base_addr_size == 0) { + keep_parsing = 0; + break; + } + cursor += base_addr_size; + } break; + case DW_LLE_StartEnd: { + U64 start = 0; + U64 start_size = str8_deserial_read(raw_lle, cursor, &start, cu->address_size, cu->address_size); + if (start_size == 0) { + keep_parsing = 0; + break; + } + + U64 end = 0; + U64 end_size = str8_deserial_read(raw_lle, cursor + start_size, &end, cu->address_size, cu->address_size); + if (end_size == 0) { + keep_parsing = 0; + break; + } + cursor += start_size; + cursor += end_size; + + range = rng_1u64(start, end); + } break; + case DW_LLE_StartLength: { + U64 start = 0; + U64 start_size = str8_deserial_read(raw_lle, cursor, &start, cu->address_size, cu->address_size); + if (start_size == 0) { + keep_parsing = 0; + break; + } + U64 length = 0; + U64 length_size = str8_deserial_read_uleb128(raw_lle, cursor + start_size, &length); + if (length_size == 0) { + keep_parsing = 0; + break; + } + cursor += start_size; + cursor += length_size; + + range = rng_1u64(start, start + length); + } break; + } + + B32 has_expr = keep_parsing && kind != DW_LLE_BaseAddressx && kind != DW_LLE_BaseAddress; + if (has_expr) { + U64 expr_size = 0; + U64 expr_size_size = str8_deserial_read_uleb128(raw_lle, cursor, &expr_size); + if (expr_size_size == 0) { + keep_parsing = 0; + break; + } + + String8 expr = {0}; + U64 expr_read_size = str8_deserial_read_block(raw_lle, cursor + expr_size_size, expr_size, &expr); + if (expr_read_size != expr_size) { + keep_parsing = 0; + break; + } + + cursor += expr_size_size; + cursor += expr_size; + + DW_LocNode *loc_n = push_array(arena, DW_LocNode, 1); + loc_n->v.range = range; + loc_n->v.expr = expr; + + SLLQueuePush(loclist.first, loclist.last, loc_n); + ++loclist.count; + } + } + } + + return loclist; +} + +internal B32 +dw_interp_flag(DW_FormKind form_kind, DW_Form form) +{ + B32 flag = 0; + if (form_kind == DW_Form_Flag || form_kind == DW_Form_FlagPresent) { + flag = form.flag; + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); + } + return flag; +} + +internal Rng1U64List +dw_interp_rnglist(Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_FormKind form_kind, DW_Form form) +{ + Rng1U64List rnglist = {0}; + + if (cu->version < DW_Version_5) { + // decode section offset + U64 sec_offset = max_U64; + if (form_kind == DW_Form_SecOffset) { + sec_offset = form.sec_offset; + } else if (form_kind == DW_Form_Data8 || form_kind == DW_Form_Data4 || + form_kind == DW_Form_Data2 || form_kind == DW_Form_Data1) { + if (!dw_try_u64_from_const_value(form.data.size, DW_ATE_Unsigned, form.data, &sec_offset)) { + Assert(!"unable to extract section offset"); + } + } else if (form_kind != DW_Form_Null) { + Assert(!"unexpected form"); + } + + String8 sec = str8_skip(input->sec[DW_Section_Ranges].data, sec_offset); + U64 base_addr = cu->low_pc; + U64 base_sel = DW_SentinelFromSize(cu->address_size); + for (U64 cursor = 0; cursor < sec.size; ) { + U64 range_min = 0; + U64 range_min_off = cursor; + U64 range_min_size = str8_deserial_read(sec, range_min_off, &range_min, cu->address_size, cu->address_size); + if (range_min_size == 0) { + break; + } + U64 range_max = 0; + U64 range_max_off = cursor + cu->address_size; + U64 range_max_size = str8_deserial_read(sec, range_max_off, &range_max, cu->address_size, cu->address_size); + if (range_max_size == 0) { + break; + } + cursor += cu->address_size * 2; + + // series terminator + if (range_min == 0 && range_max == 0) { + break; + } + // set new base address + else if (range_min == base_sel) { + base_addr = range_max; + } + // range + else { + Rng1U64 range = rng_1u64(base_addr + range_min, base_addr + range_max); + rng1u64_list_push(arena, &rnglist, range); + } + } + } else { + String8 raw_rle = {0}; + if (form_kind == DW_Form_SecOffset) { + // offset is from beginning of the section + U64 sec_offset = form.sec_offset; + raw_rle = str8_skip(input->sec[DW_Section_RngLists].data, sec_offset); + } else if (form_kind == DW_Form_RngListx) { + // offset is from beginning of the entries + U64 sec_offset = dw_offset_from_list_unit(cu->rnglists_lu, form.xval); + raw_rle = str8_skip(cu->rnglists_lu->entries, sec_offset); + } else if (form_kind != DW_Form_Null) { + AssertAlways(!"unexpected form"); + } + + U64 rle_invalid_value = DW_SentinelFromSize(cu->address_size); + U64 base_addr = cu->low_pc; + for (U64 cursor = 0, keep_parsing = 1; cursor < raw_rle.size && keep_parsing; ) { + DW_RLE kind = DW_RLE_EndOfList; + cursor += str8_deserial_read_struct(raw_rle, cursor, &kind); + + Rng1U64 range = rng_1u64(rle_invalid_value, rle_invalid_value); + switch (kind) { + default: + case DW_RLE_EndOfList: { + keep_parsing = 0; + } break; + case DW_RLE_BaseAddressx: { + U64 addrx = 0; + U64 addrx_size = str8_deserial_read_uleb128(raw_rle, cursor, &addrx); + if (addrx_size == 0) { + keep_parsing = 0; + break; + } + if (cu->addr_lu == 0) { + keep_parsing = 0; + break; + } + U64 base_addr_new = dw_addr_from_list_unit(cu->addr_lu, addrx); + if (base_addr_new < max_U64) { + base_addr = base_addr_new; + cursor += addrx_size; + } else { + keep_parsing = 0; + Assert(!"invalid addrx"); + } + } break; + case DW_RLE_StartxLength: { + U64 start_addrx = 0; + U64 start_addrx_size = str8_deserial_read_uleb128(raw_rle, cursor, &start_addrx); + if (start_addrx_size == 0) { + keep_parsing = 0; + break; + } + U64 length = 0; + U64 length_size = str8_deserial_read_uleb128(raw_rle, cursor + start_addrx_size, &length); + if (length_size == 0) { + keep_parsing = 0; + break; + } + cursor += start_addrx_size; + cursor += length_size; + + if (cu->addr_lu) { + U64 start = dw_addr_from_list_unit(cu->addr_lu, start_addrx); + AssertAlways(start < max_U64); + range = rng_1u64(start, start + length); + } + } break; + case DW_RLE_OffsetPair: { + U64 offset_start, offset_end = 0; + U64 offset_start_size = str8_deserial_read_uleb128(raw_rle, cursor, &offset_start); + if (offset_start_size == 0) { + keep_parsing = 0; + break; + } + U64 offset_end_size = str8_deserial_read_uleb128(raw_rle, cursor + offset_start_size, &offset_end); + if (offset_end_size == 0) { + keep_parsing = 0; + break; + } + cursor += offset_start_size; + cursor += offset_end_size; + + range = rng_1u64(base_addr + offset_start, base_addr + offset_end); + } break; + case DW_RLE_BaseAddress: { + U64 base_addr_size = str8_deserial_read(raw_rle, cursor, &base_addr, cu->address_size, cu->address_size); + if (base_addr_size == 0) { + keep_parsing = 0; + break; + } + cursor += base_addr_size; + } break; + case DW_RLE_StartEnd: { + U64 start = 0, end = 0; + + U64 start_size = str8_deserial_read(raw_rle, cursor, &start, cu->address_size, cu->address_size); + if (start_size == 0) { + keep_parsing = 0; + break; + } + U64 end_size = str8_deserial_read(raw_rle, cursor + start_size, &end, cu->address_size, cu->address_size); + if (end_size == 0) { + keep_parsing = 0; + break; + } + cursor += start_size; + cursor += end_size; + + range = rng_1u64(start, end); + } break; + case DW_RLE_StartLength: { + U64 start = 0, length = 0; + + U64 start_size = str8_deserial_read(raw_rle, cursor, &start, cu->address_size, cu->address_size); + if (start_size == 0) { + keep_parsing = 0; + break; + } + U64 length_size = str8_deserial_read_uleb128(raw_rle, cursor + start_size, &length); + if (length_size == 0) { + keep_parsing = 0; + break; + } + cursor += start_size; + cursor += length_size; + + range = rng_1u64(start, start + length); + } break; + } + + if (range.min != rle_invalid_value) { + rng1u64_list_push(arena, &rnglist, range); + } + } + } + + return rnglist; +} + +internal String8 +dw_interp_secptr(DW_Input *input, DW_SectionKind section, DW_FormKind form_kind, DW_Form form) +{ + String8 secptr = {0}; + if (form_kind == DW_Form_SecOffset) { + String8 sect = input->sec[section].data; + Rng1U64 range = rng_1u64(form.sec_offset, sect.size); + secptr = str8_substr(sect, range); + } else if (form_kind != DW_Form_Null) { + Assert(!"unexpected form"); + } + return secptr; +} + +internal String8 +dw_interp_addrptr(DW_Input *input, DW_FormKind form_kind, DW_Form form) +{ + return dw_interp_secptr(input, DW_Section_Addr, form_kind, form); +} + +internal String8 +dw_interp_str_offsets_ptr(DW_Input *input, DW_FormKind form_kind, DW_Form form) +{ + return dw_interp_secptr(input, DW_Section_StrOffsets, form_kind, form); +} + +internal String8 +dw_interp_rnglists_ptr(DW_Input *input, DW_FormKind form_kind, DW_Form form) +{ + return dw_interp_secptr(input, DW_Section_RngLists, form_kind, form); +} + +internal String8 +dw_interp_loclists_ptr(DW_Input *input, DW_FormKind form_kind, DW_Form form) +{ + return dw_interp_secptr(input, DW_Section_LocLists, form_kind, form); +} + +internal DW_AttribClass +dw_value_class_from_attrib(DW_CompUnit *cu, DW_Attrib *attrib) +{ + return dw_pick_attrib_value_class(cu->version, cu->ext, cu->relaxed, attrib->attrib_kind, attrib->form_kind); +} + +internal String8 +dw_exprloc_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_ExprLoc || value_class == DW_AttribClass_Block); + return dw_interp_exprloc(attrib->form_kind, attrib->form); +} + +internal U128 +dw_const_u128_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Const); + return dw_interp_const_u128(attrib->form_kind, attrib->form); +} + +internal U64 +dw_const_u64_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Const); + return dw_interp_const_u64(attrib->form_kind, attrib->form); +} + +internal U32 +dw_const_u32_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Const); + return dw_interp_const_u32(attrib->form_kind, attrib->form); +} + +internal S64 +dw_const_s64_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Const); + return dw_interp_const_s64(attrib->form_kind, attrib->form); +} + +internal S32 +dw_const_s32_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Const); + return dw_interp_const_s32(attrib->form_kind, attrib->form); +} + +internal B32 +dw_flag_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Flag); + return dw_interp_flag(attrib->form_kind, attrib->form); +} + +internal U64 +dw_address_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || + value_class == DW_AttribClass_Address || + value_class == DW_AttribClass_AddrPtr); + DW_FormKind form_kind = attrib->form_kind; + DW_Form form = attrib->form; + if (value_class == DW_AttribClass_AddrPtr) { + + if (attrib->form_kind == DW_Form_SecOffset) { + + + } else { + AssertAlways(!"unexpected form"); + } + + + form_kind = DW_Form_Addr; + form.addr = dw_interp_addrptr(input, attrib->form_kind, attrib->form); + } + return dw_interp_address(cu->address_size, cu->low_pc, cu->addr_lu, form_kind, form); +} + +internal String8 +dw_block_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Block); + return dw_interp_block(input, cu, attrib->form_kind, attrib->form); +} + +internal String8 +dw_string_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_String || value_class == DW_AttribClass_StrOffsetsPtr); + return dw_interp_string(input, cu->format, cu->str_offsets_lu, attrib->form_kind, attrib->form); +} + +internal String8 +dw_line_ptr_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_LinePtr); + return dw_interp_line_ptr(input, attrib->form_kind, attrib->form); +} + +internal DW_LineFile * +dw_file_from_attrib_ptr(DW_CompUnit *cu, DW_LineVMHeader *line_vm, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Const); + return dw_interp_file(line_vm, attrib->form_kind, attrib->form); +} + +internal DW_Reference +dw_ref_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || value_class == DW_AttribClass_Reference); + return dw_interp_ref(input, cu, attrib->form_kind, attrib->form); +} + +internal DW_LocList +dw_loclist_from_attrib_ptr(Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + AssertAlways(value_class == DW_AttribClass_Null || + value_class == DW_AttribClass_LocList || + value_class == DW_AttribClass_LocListPtr); + return dw_interp_loclist(arena, input, cu, attrib->form_kind, attrib->form); +} + +internal Rng1U64List +dw_rnglist_from_attrib_ptr(Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib) +{ + Rng1U64List rnglist = {0}; + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + if (value_class == DW_AttribClass_RngListPtr || DW_AttribClass_RngList) { + rnglist = dw_interp_rnglist(arena, input, cu, attrib->form_kind, attrib->form); + } else if (value_class != DW_AttribClass_Null) { + Assert(!"unexpected value class"); + } + return rnglist; +} + +internal DW_Attrib * +dw_attrib_from_tag_(DW_Tag tag, DW_AttribKind kind) +{ + local_persist read_only DW_Attrib null_attrib; + DW_Attrib *attrib = &null_attrib; + for (DW_AttribNode *attrib_n = tag.attribs.first; attrib_n != 0; attrib_n = attrib_n->next) { + if (attrib_n->v.attrib_kind == kind) { + attrib = &attrib_n->v; + break; + } + } + return attrib; +} + +internal DW_Attrib * +dw_attrib_from_tag(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + DW_Attrib *attrib = dw_attrib_from_tag_(tag, kind); + + if (attrib->attrib_kind == DW_Attrib_Null) { + if (cu && cu->tag_ht) { + DW_Attrib *ao_attrib = dw_attrib_from_tag_(tag, DW_Attrib_AbstractOrigin); + if (ao_attrib->attrib_kind == DW_Attrib_AbstractOrigin) { + DW_Reference ref = dw_interp_ref(input, cu, ao_attrib->form_kind, ao_attrib->form); + DW_TagNode *ref_tag = dw_tag_node_from_info_off(ref.cu, ref.info_off); + attrib = dw_attrib_from_tag_(ref_tag->tag, kind); + } + } + } + + return attrib; +} + +internal B32 +dw_tag_has_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + DW_Attrib *attrib = dw_attrib_from_tag(input, cu, tag, kind); + B32 has_attrib = attrib->attrib_kind != DW_Attrib_Null; + return has_attrib; +} + +internal String8 +dw_exprloc_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_exprloc_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal String8 +dw_block_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_block_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal U128 +dw_const_u128_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_const_u128_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal U64 +dw_const_u64_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_const_u64_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal U32 +dw_const_u32_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_const_u32_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal U64 +dw_address_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_address_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal String8 +dw_string_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_string_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal String8 +dw_line_ptr_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_line_ptr_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal DW_Reference +dw_ref_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_ref_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal DW_LocList +dw_loclist_from_attrib(Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_loclist_from_attrib_ptr(arena, input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal Rng1U64List +dw_rnglist_from_attrib(Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_rnglist_from_attrib_ptr(arena, input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal B32 +dw_flag_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + return dw_flag_from_attrib_ptr(input, cu, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal DW_LineFile * +dw_file_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_LineVMHeader *line_vm, DW_Tag tag, DW_AttribKind kind) +{ + return dw_file_from_attrib_ptr(cu, line_vm, dw_attrib_from_tag(input, cu, tag, kind)); +} + +internal B32 +dw_try_byte_size_from_tag(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, U64 *byte_size_out) +{ + B32 has_byte_size = dw_tag_has_attrib(input, cu, tag, DW_Attrib_ByteSize); + B32 has_bit_size = dw_tag_has_attrib(input, cu, tag, DW_Attrib_BitSize ); + + if (has_byte_size && has_bit_size) { + Assert(!"ill formated byte size"); + } + + if (has_byte_size) { + *byte_size_out = dw_const_u64_from_attrib(input, cu, tag, DW_Attrib_ByteSize); + return 1; + } else if (has_bit_size) { + U64 bit_size = dw_const_u64_from_attrib(input, cu, tag, DW_Attrib_BitSize); + *byte_size_out = bit_size / 8; + return 1; + } + + return 0; +} + +internal U64 +dw_byte_size_from_tag(DW_Input *input, DW_CompUnit *cu, DW_Tag tag) +{ + U64 byte_size = max_U64; + dw_try_byte_size_from_tag(input, cu, tag, &byte_size); + return byte_size; +} + +internal U32 +dw_byte_size_32_from_tag(DW_Input *input, DW_CompUnit *cu, DW_Tag tag) +{ + U32 byte_size32; + U64 byte_size64; + if (dw_try_byte_size_from_tag(input, cu, tag, &byte_size64)) { + byte_size32 = safe_cast_u32(byte_size64); + } + return byte_size32; +} + +internal U64 +dw_u64_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + U64 result = 0; + DW_Attrib *attrib = dw_attrib_from_tag(input, cu, tag, kind); + DW_AttribClass attrib_class = dw_value_class_from_attrib(cu, attrib); + if (attrib_class == DW_AttribClass_Const || attrib_class == DW_AttribClass_Block) { + if (dw_tag_has_attrib(input, cu, tag, DW_Attrib_Type)) { + Temp scratch = scratch_begin(0,0); + DW_Reference type_ref = dw_ref_from_attrib(input, cu, tag, DW_Attrib_Type); + DW_Tag type_tag = {0}; + dw_read_tag_cu(scratch.arena, input, type_ref.cu, type_ref.info_off, &type_tag); + U64 type_byte_size = dw_byte_size_from_tag(input, cu, type_tag); + DW_ATE type_encoding = dw_const_u64_from_attrib(input, type_ref.cu, type_tag, DW_Attrib_Encoding); + if (type_encoding == DW_ATE_Unsigned || type_encoding == DW_ATE_UnsignedChar) { + result = dw_interp_const64(type_byte_size, type_encoding, attrib->form_kind, attrib->form); + } + scratch_end(scratch); + } else { + result = dw_interp_const_u64(attrib->form_kind, attrib->form); + } + } else if (attrib_class == DW_AttribClass_Reference) { + NotImplemented; + } else if (attrib_class != DW_AttribClass_Null) { + AssertAlways(!"unexpected attrib class"); + } + return result; +} + +internal DW_CompUnit +dw_cu_from_info_off(Arena *arena, DW_Input *input, DW_ListUnitInput lu_input, U64 offset, B32 relaxed) +{ + DW_CompUnit cu = {0}; + + String8 info_data = input->sec[DW_Section_Info].data; + + // read unit size in bytes + U64 length = 0; + U64 length_size = str8_deserial_read_dwarf_packed_size(info_data, offset, &length); + + if (length_size) { + // compute unit range + Rng1U64 range = rng_1u64(offset, offset + length_size + length); + String8 data = str8_substr(info_data, range); + U64 cursor = length_size; + + // read version + DW_Version version = 0; + U64 version_size = str8_deserial_read_struct(data, cursor, &version); + cursor += version_size; + + if (version_size) { + DW_Format format = DW_FormatFromSize(length); + B32 is_header_ok = 0; + U64 abbrev_base = max_U64; + U8 address_size = 0; + DW_CompUnitKind unit_kind = DW_CompUnitKind_Reserved; + U64 spec_dwo_id = max_U64; + + switch (version) { + default: + case DW_Version_Null: + case DW_Version_1: + break; + case DW_Version_2: { + U32 abbrev_base32 = 0; + U64 abbrev_base_off = cursor; + U64 abbrev_base_size = str8_deserial_read_struct(data, abbrev_base_off, &abbrev_base32); + if (!abbrev_base_size) { + break; + } + + U64 address_size_off = abbrev_base_off + abbrev_base_size; + U64 address_size_size = str8_deserial_read_struct(data, address_size_off, &address_size); + if (!address_size_size) { + break; + } + + abbrev_base = abbrev_base32; + cursor = address_size_off + address_size_size; + is_header_ok = 1; + } break; + case DW_Version_3: + case DW_Version_4: { + U64 abbrev_base_off = cursor; + U64 abbrev_base_size = str8_deserial_read_dwarf_uint(data, abbrev_base_off, format, &abbrev_base); + if (!abbrev_base_size) { + break; + } + + U64 address_size_off = abbrev_base_off + abbrev_base_size; + U64 address_size_size = str8_deserial_read_struct(data, address_size_off, &address_size); + if (!address_size_size) { + break; + } + + cursor = address_size_off + address_size_size; + is_header_ok = 1; + } break; + case DW_Version_5: { + U64 unit_kind_off = cursor; + U64 unit_kind_size = str8_deserial_read_struct(data, unit_kind_off, &unit_kind); + if (unit_kind_size == 0) { + break; + } + + U64 address_size_off = unit_kind_off + unit_kind_size; + U64 address_size_size = str8_deserial_read_struct(data, address_size_off, &address_size); + if (!address_size_size) { + break; + } + + U64 abbrev_base_off = address_size_off + address_size_size; + U64 abbrev_base_size = str8_deserial_read_dwarf_uint(data, abbrev_base_off, format, &abbrev_base); + if (!abbrev_base_size) { + break; + } + + U64 spec_dwo_id_off = abbrev_base_off + abbrev_base_size; + U64 spec_dwo_id_size = 0; + if (unit_kind == DW_CompUnitKind_Skeleton || input->sec[DW_Section_Info].is_dwo) { + spec_dwo_id_size = str8_deserial_read_struct(data, spec_dwo_id_off, &spec_dwo_id); + if (!spec_dwo_id_size) { + break; + } + } + + cursor = spec_dwo_id_off + spec_dwo_id_size; + is_header_ok = 1; + } break; + } + + if (is_header_ok) { + Temp temp = temp_begin(arena); + + // TODO: cache abbrev tables with identical offsets + String8 abbrev_data = input->sec[DW_Section_Abbrev].data; + DW_AbbrevTable abbrev_table = dw_make_abbrev_table(arena, abbrev_data, abbrev_base); + + DW_Tag cu_tag = {0}; + dw_read_tag(arena, data, cursor, range.min, abbrev_table, abbrev_data, version, format, address_size, &cu_tag); + + // TODO: handle these unit types + Assert(cu_tag.kind != DW_Tag_SkeletonUnit); + Assert(cu_tag.kind != DW_Tag_TypeUnit); + + if (cu_tag.kind == DW_Tag_CompileUnit || cu_tag.kind == DW_Tag_PartialUnit) { + // fetch attribs for list sections + DW_Attrib *addr_base_attrib = dw_attrib_from_tag(0, 0, cu_tag, DW_Attrib_AddrBase ); + DW_Attrib *str_offsets_base_attrib = dw_attrib_from_tag(0, 0, cu_tag, DW_Attrib_StrOffsetsBase); + DW_Attrib *rnglists_base_attrib = dw_attrib_from_tag(0, 0, cu_tag, DW_Attrib_RngListsBase ); + DW_Attrib *loclists_base_attrib = dw_attrib_from_tag(0, 0, cu_tag, DW_Attrib_LocListsBase ); + + // interp attribs as section offsets + U64 addr_sec_off = dw_interp_sec_offset(addr_base_attrib->form_kind, addr_base_attrib->form ); + U64 str_offsets_sec_off = dw_interp_sec_offset(str_offsets_base_attrib->form_kind, str_offsets_base_attrib->form); + U64 rnglists_sec_off = dw_interp_sec_offset(rnglists_base_attrib->form_kind, rnglists_base_attrib->form ); + U64 loclists_sec_off = dw_interp_sec_offset(loclists_base_attrib->form_kind, loclists_base_attrib->form ); + + // map section offset to unit index + U64 addr_lu_idx = rng_1u64_array_bsearch(lu_input.addr_ranges, addr_sec_off ); + U64 str_offsets_lu_idx = rng_1u64_array_bsearch(lu_input.str_offset_ranges, str_offsets_sec_off); + U64 rnglists_lu_idx = rng_1u64_array_bsearch(lu_input.rnglist_ranges, rnglists_sec_off ); + U64 loclists_lu_idx = rng_1u64_array_bsearch(lu_input.loclist_ranges, loclists_sec_off ); + + // map index to unit + DW_ListUnit *addr_lu = addr_lu_idx < lu_input.addr_count ? &lu_input.addrs[addr_lu_idx] : 0; + DW_ListUnit *str_offsets_lu = str_offsets_lu_idx < lu_input.str_offset_count ? &lu_input.str_offsets[str_offsets_lu_idx] : 0; + DW_ListUnit *rnglists_lu = rnglists_lu_idx < lu_input.rnglist_count ? &lu_input.rnglists[rnglists_lu_idx] : 0; + DW_ListUnit *loclists_lu = loclists_lu_idx < lu_input.loclist_count ? &lu_input.loclists[loclists_lu_idx] : 0; + + // find compile unit base address + DW_Attrib *low_pc_attrib = dw_attrib_from_tag(0, 0, cu_tag, DW_Attrib_LowPc); + U64 low_pc = dw_interp_address(address_size, max_U64, addr_lu, low_pc_attrib->form_kind, low_pc_attrib->form); + + // fill out compile unit + cu.relaxed = relaxed; + cu.ext = DW_Ext_All; + cu.kind = unit_kind; + cu.version = version; + cu.format = format; + cu.address_size = address_size; + cu.abbrev_off = abbrev_base; + cu.info_range = range; + cu.first_tag_info_off = range.min + cursor; + cu.abbrev_table = abbrev_table; + cu.abbrev_data = abbrev_data; + cu.addr_lu = addr_lu; + cu.str_offsets_lu = str_offsets_lu; + cu.rnglists_lu = rnglists_lu; + cu.loclists_lu = loclists_lu; + cu.low_pc = low_pc; + cu.tag = cu_tag; + } else { + // unexpected tag, release memory and exit + temp_end(temp); + } + } + } + } + + return cu; +} + +internal void +dw_tag_tree_from_data(Arena *arena, String8 info_data, String8 abbrev_data, DW_CompUnit *cu, DW_TagNode *parent, U64 *cursor, U64 *tag_count) +{ + while (*cursor < info_data.size) { + // read tag + DW_Tag tag = {0}; + U64 tag_size = dw_read_tag(arena, info_data, *cursor, cu->info_range.min, cu->abbrev_table, abbrev_data, cu->version, cu->format, cu->address_size, &tag); + if (tag_size == 0) { + break; + } + *cursor += tag_size; + + // is this sentinel tag? + if (tag.kind == DW_Tag_Null) { + break; + } + + // normal tag + DW_TagNode *tag_n = push_array(arena, DW_TagNode, 1); + tag_n->tag = tag; + + SLLQueuePush_N(parent->first_child, parent->last_child, tag_n, sibling); + + // update tag count + *tag_count += 1; + + if (tag.has_children) { + dw_tag_tree_from_data(arena, info_data, abbrev_data, cu, tag_n, cursor, tag_count); + } + } +} + +internal DW_TagTree +dw_tag_tree_from_cu(Arena *arena, DW_Input *input, DW_CompUnit *cu) +{ + String8 abbrev_data = input->sec[DW_Section_Abbrev].data; + String8 info_data = str8_substr(input->sec[DW_Section_Info].data, cu->info_range); + DW_TagNode root = {0}; + U64 cursor = cu->first_tag_info_off; + U64 tag_count = 0; + dw_tag_tree_from_data(arena, info_data, abbrev_data, cu, &root, &cursor, &tag_count); + + DW_TagTree result = {0}; + result.root = root.first_child; + result.tag_count = tag_count; + + return result; +} + +internal HashTable * +dw_make_tag_hash_table(Arena *arena, DW_TagTree tag_tree) +{ + Temp scratch = scratch_begin(&arena, 1); + + struct Frame { + struct Frame *next; + DW_TagNode *node; + }; + + struct Frame *free_frames = 0; + struct Frame *stack = push_array(scratch.arena, struct Frame, 1); + stack->node = tag_tree.root; + + HashTable *ht = hash_table_init(arena, (U64)((F64)tag_tree.tag_count * 1.3)); + + while (stack) { + while (stack->node) { + hash_table_push_u64_raw(arena, ht, stack->node->tag.info_off, stack->node); + + if (stack->node->first_child) { + struct Frame *frame = free_frames; + if (frame) { + SLLStackPop(free_frames); + MemoryZeroStruct(frame); + } else { + frame = push_array(scratch.arena, struct Frame, 1); + } + frame->node = stack->node->first_child; + SLLStackPush(stack, frame); + } else { + stack->node = stack->node->sibling; + } + } + + // recycle free frame + struct Frame *frame = stack; + SLLStackPop(stack); + SLLStackPush(free_frames, frame); + + if (stack) { + stack->node = stack->node->sibling; + } + } + + scratch_end(scratch); + return ht; +} + +internal DW_TagNode * +dw_tag_node_from_info_off(DW_CompUnit *cu, U64 info_off) +{ + DW_TagNode *tag_node = hash_table_search_u64_raw(cu->tag_ht, info_off); + return tag_node; +} + +internal DW_LineVMFileArray +dw_line_vm_file_array_from_list(Arena *arena, DW_LineVMFileList list) +{ + DW_LineVMFileArray result = {0}; + result.count = 0; + result.v = push_array(arena, DW_LineFile, list.node_count); + + for (DW_LineVMFileNode *src = list.first; src != 0; src = src->next) { + DW_LineFile *dst = &result.v[result.count++]; + dst->file_name = push_str8_copy(arena, src->file.file_name); + dst->dir_idx = src->file.dir_idx; + dst->modify_time = src->file.modify_time; + dst->file_size = src->file.file_size; + } + + return result; +} + +internal U64 +dw_read_line_file(String8 data, + U64 off, + DW_Input *input, + DW_Version version, + DW_Format format, + DW_Ext ext, + U64 address_size, + DW_ListUnit *str_offsets, + U64 enc_count, + U64 *enc_arr, + DW_LineFile *line_file_out) +{ + MemoryZeroStruct(line_file_out); + U64 cursor = off; + for (U64 enc_idx = 0; enc_idx < enc_count; ++enc_idx) { + DW_LNCT lnct = enc_arr[enc_idx*2 + 0]; + DW_FormKind form_kind = enc_arr[enc_idx*2 + 1]; + DW_Form form = {0}; + U64 bytes_read; + switch (lnct) { + case DW_LNCT_Path: { + bytes_read = dw_read_form(data, cursor, version, format, address_size, form_kind, max_U64, &form); + line_file_out->file_name = dw_interp_string(input, format, str_offsets, form_kind, form); + } break; + case DW_LNCT_DirectoryIndex: { + bytes_read = dw_read_form(data, cursor, version, format, address_size, form_kind, max_U64, &form); + line_file_out->dir_idx = dw_interp_const_u64(form_kind, form); + } break; + case DW_LNCT_TimeStamp: { + bytes_read = dw_read_form(data, cursor, version, format, address_size, form_kind, max_U64, &form); + line_file_out->modify_time = dw_interp_const_u64(form_kind, form); + } break; + case DW_LNCT_Size: { + bytes_read = dw_read_form(data, cursor, version, format, address_size, form_kind, max_U64, &form); + line_file_out->file_size = dw_interp_const_u64(form_kind, form); + } break; + case DW_LNCT_MD5: { + bytes_read = dw_read_form(data, cursor, version, format, address_size, form_kind, max_U64, &form); + line_file_out->md5_digest = dw_interp_const_u128(form_kind, form); + } break; + case DW_LNCT_LLVM_Source: { + if (ext & DW_Ext_LLVM) { + bytes_read = dw_read_form(data, cursor, version, format, address_size, form_kind, max_U64, &form); + line_file_out->source = dw_interp_string(input, format, str_offsets, form_kind, form); + } else { + Assert(!"extension not supported"); + } + } break; + default: { + bytes_read = dw_read_form(data, cursor, version, format, address_size, form_kind, max_U64, &form); + Assert(!"unexpected LNTC encoding"); + } break; + } + Assert(bytes_read); + cursor += bytes_read; + } + U64 bytes_read = cursor - off; + return bytes_read; +} + +internal U64 +dw_read_line_file_array(Arena *arena, + String8 data, + U64 off, + DW_Input *input, + DW_Version version, + DW_Format format, + DW_Ext ext, + U64 address_size, + DW_ListUnit *str_offsets, + U64 enc_count, + U64 *enc_arr, + U64 table_count, + DW_LineVMFileArray *table_out) +{ + Temp temp = temp_begin(arena); + + table_out->count = table_count; + table_out->v = push_array(arena, DW_LineFile, table_count); + + U64 i, cursor; + for (i = 0, cursor = off; i < table_count; ++i) { + U64 bytes_read = dw_read_line_file(data, + cursor, + input, + version, + format, + ext, + address_size, + str_offsets, + enc_count, + enc_arr, + &table_out->v[i]); + if (bytes_read == 0) { + break; + } + cursor += bytes_read; + } + + U64 bytes_read = 0; + if (i == table_count) { + bytes_read = cursor - off; + } else { + temp_end(temp); + table_out->count = 0; + table_out->v = 0; + } + + return bytes_read; +} + +internal U64 +dw_read_line_vm_header(Arena *arena, + String8 line_data, + U64 line_off, + DW_Input *input, + String8 cu_dir, + String8 cu_name, + U8 cu_address_size, + DW_ListUnit *cu_str_offsets, + DW_LineVMHeader *header_out) +{ + Temp scratch = scratch_begin(&arena, 1); + + U64 bytes_read = 0; + + // read unit length + U64 unit_length = 0; + U64 unit_length_size = str8_deserial_read_dwarf_packed_size(line_data, line_off, &unit_length); + + U64 unit_opl = line_off + unit_length_size + unit_length; + Rng1U64 unit_range = rng_1u64(line_off, unit_opl); + DW_Format format = DW_FormatFromSize(unit_length); + U64 unit_cursor = unit_length_size; + String8 unit_data = str8_substr(line_data, unit_range); + + // read unit version + DW_Version version = DW_Version_Null; + U64 version_size = str8_deserial_read_struct(unit_data, unit_cursor, &version); + if (version_size == 0) { + goto exit; + } + unit_cursor += version_size; + + // read DWARF5 address & segment selector + U8 address_size = 0; + U8 segsel_size = 0; + if (version == DW_Version_5) { + U64 address_size_size = str8_deserial_read_struct(unit_data, unit_cursor, &address_size); + if (address_size_size == 0) { + goto exit; + } + unit_cursor += address_size_size; + + U64 segsel_size_size = str8_deserial_read_struct(unit_data, unit_cursor, &segsel_size); + if (segsel_size_size == 0) { + goto exit; + } + unit_cursor += segsel_size_size; + } else { + address_size = cu_address_size; + } + + // read header length + U64 header_length = 0; + U64 header_length_size = str8_deserial_read_dwarf_uint(unit_data, unit_cursor, format, &header_length); + if (header_length_size == 0) { + goto exit; + } + unit_cursor += header_length_size; + + // read min instruction length + U8 min_inst_len = 0; + U64 min_inst_len_size = str8_deserial_read_struct(unit_data, unit_cursor, &min_inst_len); + if (min_inst_len_size == 0) { + goto exit; + } + unit_cursor += min_inst_len_size; + + // read max operands for instruction + U8 max_ops_for_inst = 1; + if (version > DW_Version_3) { + U64 max_ops_for_inst_size = str8_deserial_read_struct(unit_data, unit_cursor, &max_ops_for_inst); + if (max_ops_for_inst_size == 0) { + goto exit; + } + unit_cursor += max_ops_for_inst_size; + } + Assert(max_ops_for_inst > 0); + + U8 default_is_stmt = 0; + U64 default_is_stmt_size = str8_deserial_read_struct(unit_data, unit_cursor, &default_is_stmt); + if (default_is_stmt_size == 0) { + goto exit; + } + unit_cursor += default_is_stmt_size; + + S8 line_base = 0; + U64 line_base_size = str8_deserial_read_struct(unit_data, unit_cursor, &line_base); + if (line_base_size == 0) { + goto exit; + } + unit_cursor += line_base_size; + + U8 line_range = 0; + U64 line_range_size = str8_deserial_read_struct(unit_data, unit_cursor, &line_range); + if (line_range_size == 0) { + goto exit; + } + unit_cursor += line_range_size; + + U8 opcode_base = 0; + U64 opcode_base_size = str8_deserial_read_struct(unit_data, unit_cursor, &opcode_base); + if (opcode_base_size == 0) { + goto exit; + } + unit_cursor += opcode_base_size; + + U64 num_opcode_lens = opcode_base > 0 ? opcode_base - 1 : 0; + U8 *opcode_lens = str8_deserial_get_raw_ptr(unit_data, unit_cursor, num_opcode_lens * sizeof(opcode_lens[0])); + if (opcode_lens == 0) { + goto exit; + } + unit_cursor += num_opcode_lens * sizeof(opcode_lens[0]); + + DW_LineVMFileArray dir_table = {0}; + DW_LineVMFileArray file_table = {0}; + if (version < DW_Version_5) { + // read directory table + DW_LineVMFileList dir_list = {0}; + { + // compile directory is always first in the table + DW_LineVMFileNode *node = push_array(scratch.arena, DW_LineVMFileNode, 1); + node->file.file_name = cu_dir; + SLLQueuePush(dir_list.first, dir_list.last, node); + ++dir_list.node_count; + } + + // parse additional directories + for (; unit_cursor < unit_data.size; ) { + String8 dir = {0}; + unit_cursor += str8_deserial_read_cstr(unit_data, unit_cursor, &dir); + if (dir.size == 0) { + break; + } + + DW_LineVMFileNode *node = push_array(scratch.arena, DW_LineVMFileNode, 1); + node->file.file_name = dir; + SLLQueuePush(dir_list.first, dir_list.last, node); + ++dir_list.node_count; + } + + DW_LineVMFileList file_list = {0}; + { + // compile unit name is always first in the file table + { + DW_LineVMFileNode *node = push_array(scratch.arena, DW_LineVMFileNode, 1); + node->file.file_name = cu_name; + SLLQueuePush(file_list.first, file_list.last, node); + ++file_list.node_count; + } + + // read file table + for (; unit_cursor < unit_data.size; ) { + String8 file_name = {0}; + unit_cursor += str8_deserial_read_cstr(unit_data, unit_cursor, &file_name); + if (file_name.size == 0) { + break; + } + + U64 dir_index = 0; + U64 dir_index_size = str8_deserial_read_uleb128(unit_data, unit_cursor, &dir_index); + if (dir_index_size == 0) { + goto exit; + } + unit_cursor += dir_index_size; + + U64 modify_time = 0; + U64 modify_time_size = str8_deserial_read_uleb128(unit_data, unit_cursor, &modify_time); + if (modify_time_size == 0) { + goto exit; + } + unit_cursor += modify_time_size; + + U64 file_size = 0; + U64 file_size_size = str8_deserial_read_uleb128(unit_data, unit_cursor, &file_size); + if (file_size_size == 0) { + goto exit; + } + unit_cursor += file_size_size; + + DW_LineVMFileNode *node = push_array(scratch.arena, DW_LineVMFileNode, 1); + node->file.file_name = file_name; + node->file.dir_idx = dir_index; + node->file.modify_time = modify_time; + node->file.file_size = file_size; + + SLLQueuePush(file_list.first, file_list.last, node); + ++file_list.node_count; + } + } + + // list -> array + dir_table = dw_line_vm_file_array_from_list(arena, dir_list); + file_table = dw_line_vm_file_array_from_list(arena, file_list); + } + // DWARF5 + else { + // directory table + { + // read table entry encoding count + U8 enc_count = 0; + U64 enc_count_size = str8_deserial_read_struct(unit_data, unit_cursor, &enc_count); + if (enc_count_size == 0) { + goto exit; + } + unit_cursor += enc_count_size; + + // read table entry encodings + U64 *enc_arr = 0; + U64 enc_arr_size = str8_deserial_read_uleb128_array(scratch.arena, unit_data, unit_cursor, enc_count*2, &enc_arr); + if (enc_arr_size == 0) { + goto exit; + } + unit_cursor += enc_arr_size; + + // read table count + U64 table_count = 0; + U64 table_count_size = str8_deserial_read_uleb128(unit_data, unit_cursor, &table_count); + if (table_count_size == 0) { + goto exit; + } + unit_cursor += table_count_size; + + // read table + U64 table_size = dw_read_line_file_array(arena, + unit_data, + unit_cursor, + input, + version, + format, + DW_Ext_All, + address_size, + cu_str_offsets, + enc_count, + enc_arr, + table_count, + &dir_table); + if (table_size == 0) { + goto exit; + } + unit_cursor += table_size; + } + + // file table + { + // read table entry encoding count + U8 enc_count = 0; + U64 enc_count_size = str8_deserial_read_struct(unit_data, unit_cursor, &enc_count); + if (enc_count == 0) { + goto exit; + } + unit_cursor += enc_count_size; + + // read table entry encodings + U64 *enc_arr = 0; + U64 enc_arr_size = str8_deserial_read_uleb128_array(scratch.arena, unit_data, unit_cursor, enc_count*2, &enc_arr); + if (enc_arr_size == 0) { + goto exit; + } + unit_cursor += enc_arr_size; + + // read table count + U64 table_count = 0; + U64 table_count_size = str8_deserial_read_uleb128(unit_data, unit_cursor, &table_count); + if (table_count_size == 0) { + goto exit; + } + unit_cursor += table_count_size; + + // read table + U64 file_table_size = dw_read_line_file_array(arena, + unit_data, + unit_cursor, + input, + version, + format, + DW_Ext_All, + address_size, + cu_str_offsets, + enc_count, + enc_arr, + table_count, + &file_table); + if (file_table_size == 0) { + goto exit; + } + unit_cursor += file_table_size; + } + } + + if (header_out) { + header_out->unit_range = unit_range; + header_out->version = version; + header_out->address_size = address_size; + header_out->segment_selector_size = segsel_size; + header_out->header_length = header_length; + header_out->min_inst_len = min_inst_len; + header_out->max_ops_for_inst = max_ops_for_inst; + header_out->default_is_stmt = default_is_stmt; + header_out->line_base = line_base; + header_out->line_range = line_range; + header_out->opcode_base = opcode_base; + header_out->num_opcode_lens = num_opcode_lens; + header_out->opcode_lens = opcode_lens; + header_out->dir_table = dir_table; + header_out->file_table = file_table; + } + + bytes_read = unit_cursor; + +exit:; + scratch_end(scratch); + return bytes_read; +} internal void dw_line_vm_reset(DW_LineVMState *state, B32 default_is_stmt) @@ -1775,286 +2745,44 @@ dw_push_line_seq(Arena* arena, DW_LineTableParseResult *parsed_tbl) internal DW_LineNode * dw_push_line(Arena *arena, DW_LineTableParseResult *tbl, DW_LineVMState *vm_state, B32 start_of_sequence) { - DW_LineNode *n = 0; - if(vm_state->busted_seq == 0) + DW_LineSeqNode *seq = tbl->last_seq; + if(seq == 0 || start_of_sequence) { - DW_LineSeqNode *seq = tbl->last_seq; - if(seq == 0 || start_of_sequence) - { - // ERROR! do not emit sequences with only one line... - Assert(seq && seq->count > 1); - seq = dw_push_line_seq(arena, tbl); - } - - n = push_array(arena, DW_LineNode, 1); - n->v.file_index = vm_state->file_index; - n->v.line = vm_state->line; - n->v.column = vm_state->column; - n->v.voff = vm_state->address; - - SLLQueuePush(seq->first, seq->last, n); - seq->count += 1; + seq = dw_push_line_seq(arena, tbl); } + + DW_LineNode *n = push_array(arena, DW_LineNode, 1); + n->v.file_index = vm_state->file_index; + n->v.line = vm_state->line; + n->v.column = vm_state->column; + n->v.address = vm_state->address; + + SLLQueuePush(seq->first, seq->last, n); + seq->count += 1; return n; } -internal DW_LineTableParseResult -dw_parsed_line_table_from_comp_root(Arena *arena, DW_SectionArray *sections, DW_CompRoot *root) -{ - DW_Mode mode = sections->v[DW_Section_Line].mode; - void *base = dw_base_from_sec(sections, DW_Section_Line); - Rng1U64 line_info_range = dw_range_from_sec(sections, DW_Section_Line); - U64 read_off_start = root->line_off - line_info_range.min; - U64 cursor = read_off_start; - - DW_AttribValueResolveParams resolve_params = dw_attrib_value_resolve_params_from_comp_root(root); - - DW_LineVMHeader vm_header = {0}; - cursor += dw_read_line_vm_header(arena, base, line_info_range, cursor, mode, sections, resolve_params, root->compile_dir, root->name, &vm_header); - - //- rjf: prep state for VM - DW_LineVMState vm_state = {0}; - dw_line_vm_reset(&vm_state, vm_header.default_is_stmt); - - //- rjf: VM loop; build output list - DW_LineTableParseResult result = {0}; - B32 end_of_seq = 0; - B32 error = 0; - for (;!error && cursor < vm_header.unit_opl;) { - //- rjf: parse opcode - U8 opcode = 0; - cursor += dw_based_range_read_struct(base, line_info_range, cursor, &opcode); - - //- rjf: do opcode action - switch (opcode) { - default: - { - //- rjf: special opcode case - if(opcode >= vm_header.opcode_base) - { - U32 adjusted_opcode = (U32)(opcode - vm_header.opcode_base); - U32 op_advance = adjusted_opcode / vm_header.line_range; - S32 line_inc = (S32)vm_header.line_base + ((S32)adjusted_opcode) % (S32)vm_header.line_range; - // TODO: can we just call dw_advance_line_vm_state_pc - U64 addr_inc = vm_header.min_inst_len * ((vm_state.op_index+op_advance) / vm_header.max_ops_for_inst); - - vm_state.address += addr_inc; - vm_state.op_index = (vm_state.op_index + op_advance) % vm_header.max_ops_for_inst; - vm_state.line = (U32)((S32)vm_state.line + line_inc); - vm_state.basic_block = 0; - vm_state.prologue_end = 0; - vm_state.epilogue_begin = 0; - vm_state.discriminator = 0; - - dw_push_line(arena, &result, &vm_state, end_of_seq); - end_of_seq = 0; - -#if 0 - // NOTE(rjf): DWARF has dummy lines at the end of groups of line ranges, where we'd like - // to break line info into sequences. - if(vm_state.line == 0) - { - end_of_seq = 1; - } -#endif - } - // Skipping unknown opcode. This is a valid case and - // it works because compiler stores operand lengths. - else - { - if(opcode > 0 && opcode <= vm_header.num_opcode_lens) - { - U8 num_operands = vm_header.opcode_lens[opcode - 1]; - for(U8 i = 0; i < num_operands; ++i) - { - U64 operand = 0; - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &operand); - } - } - else - { - error = 1; - goto exit; - } - } - } break; - - //- Standard opcodes - - case DW_StdOpcode_Copy: - { - dw_push_line(arena, &result, &vm_state, end_of_seq); - end_of_seq = 0; - vm_state.discriminator = 0; - vm_state.basic_block = 0; - vm_state.prologue_end = 0; - vm_state.epilogue_begin = 0; - } break; - - case DW_StdOpcode_AdvancePc: - { - U64 advance = 0; - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &advance); - dw_line_vm_advance(&vm_state, advance, vm_header.min_inst_len, vm_header.max_ops_for_inst); - } break; - - case DW_StdOpcode_AdvanceLine: - { - S64 s = 0; - cursor += dw_based_range_read_sleb128(base, line_info_range, cursor, &s); - vm_state.line += s; - } break; - - case DW_StdOpcode_SetFile: - { - U64 file_index = 0; - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &file_index); - vm_state.file_index = file_index; - } break; - - case DW_StdOpcode_SetColumn: - { - U64 column = 0; - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &column); - vm_state.column = column; - } break; - - case DW_StdOpcode_NegateStmt: - { - vm_state.is_stmt = !vm_state.is_stmt; - } break; - - case DW_StdOpcode_SetBasicBlock: - { - vm_state.basic_block = 1; - } break; - - case DW_StdOpcode_ConstAddPc: - { - U64 advance = (0xffu - vm_header.opcode_base)/vm_header.line_range; - dw_line_vm_advance(&vm_state, advance, vm_header.min_inst_len, vm_header.max_ops_for_inst); - } break; - - case DW_StdOpcode_FixedAdvancePc: - { - U16 operand = 0; - cursor += dw_based_range_read_struct(base, line_info_range, cursor, &operand); - vm_state.address += operand; - vm_state.op_index = 0; - } break; - - case DW_StdOpcode_SetPrologueEnd: - { - vm_state.prologue_end = 1; - } break; - - case DW_StdOpcode_SetEpilogueBegin: - { - vm_state.epilogue_begin = 1; - } break; - - case DW_StdOpcode_SetIsa: - { - U64 v = 0; - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &v); - vm_state.isa = v; - } break; - - //- Extended opcodes - case DW_StdOpcode_ExtendedOpcode: - { - U64 length = 0; - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &length); - U64 start_off = cursor; - U8 extended_opcode = 0; - cursor += dw_based_range_read_struct(base, line_info_range, cursor, &extended_opcode); - - switch (extended_opcode) { - case DW_ExtOpcode_EndSequence: - { - vm_state.end_sequence = 1; - dw_push_line(arena, &result, &vm_state, 0); - dw_line_vm_reset(&vm_state, vm_header.default_is_stmt); - end_of_seq = 1; - } break; - - case DW_ExtOpcode_SetAddress: - { - U64 address = 0; - cursor += dw_based_range_read(base, line_info_range, cursor, root->address_size, &address); - vm_state.address = address; - vm_state.op_index = 0; - vm_state.busted_seq = address != 0; // !(dbg->acceptable_vrange.min <= address && address < dbg->acceptable_vrange.max); - } break; - - case DW_ExtOpcode_DefineFile: - { - String8 file_name = dw_based_range_read_string(base, line_info_range, cursor); - U64 dir_index = 0; - U64 modify_time = 0; - U64 file_size = 0; - cursor += file_name.size + 1; - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &dir_index); - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &modify_time); - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &file_size); - - // TODO(rjf): Not fully implemented. By the DWARF V4 spec, the above is - // all that needs to be parsed, but the rest of the work that needs to - // happen here---allowing this file to be used by further opcodes---is - // not implemented. - // - // See the DWARF V4 spec (June 10, 2010), page 122. - error = 1; - AssertAlways(!"UNHANDLED DEFINE FILE!!!"); - } break; - - case DW_ExtOpcode_SetDiscriminator: - { - U64 v = 0; - cursor += dw_based_range_read_uleb128(base, line_info_range, cursor, &v); - vm_state.discriminator = v; - } break; - - default: break; - } - - U64 num_skip = cursor - (start_off + length); - cursor += num_skip; - if (dw_based_range_ptr(base, line_info_range, cursor) == 0 || start_off + length > cursor) { - error = 1; - } - - } break; - } - } - exit:; - - return result; -} - internal String8 -dw_path_from_file_idx(Arena *arena, DW_LineVMHeader *vm, U64 file_idx) +dw_path_from_file(Arena *arena, DW_LineVMHeader *vm, DW_LineFile *file) { Temp scratch = scratch_begin(&arena, 1); - - DW_LineFile *lf = &vm->file_table.v[file_idx]; - String8 dir = vm->dir_table.v[lf->dir_idx]; + String8 dir = vm->dir_table.v[file->dir_idx].file_name; PathStyle style = path_style_from_str8(dir); if (style == PathStyle_Null || style == PathStyle_Relative) { - style = path_style_from_str8(lf->file_name); + style = path_style_from_str8(file->file_name); } String8List path_list = {0}; if (str8_match_lit("..", dir, StringMatchFlag_RightSideSloppy)) { - String8List comp_dir_list = str8_split_path(scratch.arena, vm->dir_table.v[0]); + String8List comp_dir_list = str8_split_path(scratch.arena, vm->dir_table.v[0].file_name); str8_list_concat_in_place(&path_list, &comp_dir_list); } String8List dir_list = str8_split_path(scratch.arena, dir); str8_list_concat_in_place(&path_list, &dir_list); - str8_list_push(scratch.arena, &path_list, lf->file_name); + str8_list_push(scratch.arena, &path_list, file->file_name); str8_path_list_resolve_dots_in_place(&path_list, style); @@ -2064,299 +2792,298 @@ dw_path_from_file_idx(Arena *arena, DW_LineVMHeader *vm, U64 file_idx) return path; } -internal U64 -dw_read_line_file(void *line_base, - Rng1U64 line_rng, - U64 line_off, - DW_Mode mode, - DW_SectionArray *sections, - DW_AttribValueResolveParams resolve_params, - U8 address_size, - U64 format_count, - Rng1U64 *formats, - DW_LineFile *line_file_out) +internal String8 +dw_path_from_file_idx(Arena *arena, DW_LineVMHeader *vm, U64 file_idx) { - MemoryZeroStruct(line_file_out); + return dw_path_from_file(arena, vm, &vm->file_table.v[file_idx]); +} + +internal DW_LineTableParseResult +dw_parsed_line_table_from_data(Arena *arena, + String8 unit_data, + DW_Input *input, + String8 cu_dir, + String8 cu_name, + U8 cu_address_size, + DW_ListUnit *cu_str_offsets) +{ + DW_LineVMHeader vm_header = {0}; + U64 vm_header_size = dw_read_line_vm_header(arena, unit_data, 0, input, cu_dir, cu_name, cu_address_size, cu_str_offsets, &vm_header); + + U64 unit_cursor = vm_header_size; - U64 line_off_start = line_off; - for (U64 format_idx = 0; format_idx < format_count; ++format_idx) - { - DW_LNCT lnct = (DW_LNCT)formats[format_idx].min; - DW_FormKind form_kind = (DW_FormKind)formats[format_idx].max; - DW_AttribValue form_value = {0}; - line_off += dw_based_range_read_attrib_form_value(line_base, line_rng, line_off, mode, address_size, form_kind, 0, &form_value); - switch (lnct) - { - case DW_LNCT_Path: + //- rjf: prep state for VM + DW_LineVMState vm_state = {0}; + dw_line_vm_reset(&vm_state, vm_header.default_is_stmt); + + //- rjf: VM loop; build output list + DW_LineTableParseResult result = { .vm_header = vm_header }; + B32 end_of_seq = 0; + B32 error = 0; + for (; !error && unit_cursor < unit_data.size; ) { + //- rjf: parse opcode + U8 opcode = 0; + unit_cursor += str8_deserial_read_struct(unit_data, unit_cursor, &opcode); + + //- rjf: do opcode action + switch (opcode) { + default: { + //- rjf: special opcode case + if (opcode >= vm_header.opcode_base) { + U32 adjusted_opcode = (U32)(opcode - vm_header.opcode_base); + U32 op_advance = adjusted_opcode / vm_header.line_range; + S32 line_inc = (S32)vm_header.line_base + ((S32)adjusted_opcode) % (S32)vm_header.line_range; + // TODO: can we just call dw_advance_line_vm_state_pc + U64 addr_inc = vm_header.min_inst_len * ((vm_state.op_index+op_advance) / vm_header.max_ops_for_inst); + + vm_state.address += addr_inc; + vm_state.op_index = (vm_state.op_index + op_advance) % vm_header.max_ops_for_inst; + vm_state.line = (U32)((S32)vm_state.line + line_inc); + vm_state.basic_block = 0; + vm_state.prologue_end = 0; + vm_state.epilogue_begin = 0; + vm_state.discriminator = 0; + + if(vm_state.is_stmt) + { + dw_push_line(arena, &result, &vm_state, end_of_seq); + } + end_of_seq = 0; + +#if 0 + // NOTE(rjf): DWARF has dummy lines at the end of groups of line ranges, where we'd like + // to break line info into sequences. + if(vm_state.line == 0) + { + end_of_seq = 1; + } +#endif + } + // Skipping unknown opcode. This is a valid case and + // it works because compiler stores operand lengths. + else { + if (0 < opcode && opcode <= vm_header.num_opcode_lens) { + U8 num_operands = vm_header.opcode_lens[opcode - 1]; + for (U8 i = 0; i < num_operands; ++i) { + U64 operand = 0; + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &operand); + } + } else { + error = 1; + goto exit; + } + } + } break; + + //- Standard opcodes + + case DW_StdOpcode_Copy: { + if(vm_state.is_stmt) { - Assert(form_kind == DW_Form_String || form_kind == DW_Form_LineStrp || - form_kind == DW_Form_Strp || form_kind == DW_Form_StrpSup || - form_kind == DW_Form_Strx || form_kind == DW_Form_Strx1 || - form_kind == DW_Form_Strx2 || form_kind == DW_Form_Strx3 || - form_kind == DW_Form_Strx4); - DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_String, form_value); - line_file_out->file_name = dw_string_from_attrib_value(sections, attrib_value); + dw_push_line(arena, &result, &vm_state, end_of_seq); + } + end_of_seq = 0; + vm_state.discriminator = 0; + vm_state.basic_block = 0; + vm_state.prologue_end = 0; + vm_state.epilogue_begin = 0; + } break; + + case DW_StdOpcode_AdvancePc: { + U64 advance = 0; + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &advance); + dw_line_vm_advance(&vm_state, advance, vm_header.min_inst_len, vm_header.max_ops_for_inst); + } break; + + case DW_StdOpcode_AdvanceLine: { + S64 s = 0; + unit_cursor += str8_deserial_read_sleb128(unit_data, unit_cursor, &s); + vm_state.line += s; + } break; + + case DW_StdOpcode_SetFile: { + U64 file_index = 0; + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &file_index); + vm_state.file_index = file_index; + } break; + + case DW_StdOpcode_SetColumn: { + U64 column = 0; + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &column); + vm_state.column = column; + } break; + + case DW_StdOpcode_NegateStmt: { + vm_state.is_stmt = !vm_state.is_stmt; + } break; + + case DW_StdOpcode_SetBasicBlock: { + vm_state.basic_block = 1; + } break; + + case DW_StdOpcode_ConstAddPc: { + U64 advance = (0xffu - vm_header.opcode_base) / vm_header.line_range; + dw_line_vm_advance(&vm_state, advance, vm_header.min_inst_len, vm_header.max_ops_for_inst); + } break; + + case DW_StdOpcode_FixedAdvancePc: { + U16 operand = 0; + unit_cursor += str8_deserial_read_struct(unit_data, unit_cursor, &operand); + vm_state.address += operand; + vm_state.op_index = 0; + } break; + + case DW_StdOpcode_SetPrologueEnd: { + vm_state.prologue_end = 1; + } break; + + case DW_StdOpcode_SetEpilogueBegin: { + vm_state.epilogue_begin = 1; + } break; + + case DW_StdOpcode_SetIsa: { + U64 v = 0; + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &v); + vm_state.isa = v; + } break; + + //- Extended opcodes + case DW_StdOpcode_ExtendedOpcode: { + U64 length = 0; + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &length); + + U64 extended_opl = unit_cursor + length; + U8 extended_opcode = 0; + unit_cursor += str8_deserial_read_struct(unit_data, unit_cursor, &extended_opcode); + + switch (extended_opcode) { + case DW_ExtOpcode_EndSequence: { + vm_state.end_sequence = 1; + if(vm_state.is_stmt) + { + dw_push_line(arena, &result, &vm_state, 0); + } + dw_line_vm_reset(&vm_state, vm_header.default_is_stmt); + end_of_seq = 1; } break; - case DW_LNCT_DirectoryIndex: - { - Assert(form_kind == DW_Form_Data1 || form_kind == DW_Form_Data2 || - form_kind == DW_Form_UData); - DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_Block, form_value); - line_file_out->dir_idx = attrib_value.v[0]; + case DW_ExtOpcode_SetAddress: { + U64 address = 0; + unit_cursor += str8_deserial_read(unit_data, unit_cursor, &address, vm_header.address_size, vm_header.address_size); + vm_state.address = address; + vm_state.op_index = 0; } break; - case DW_LNCT_TimeStamp: - { - Assert(form_kind == DW_Form_UData || form_kind == DW_Form_Data4 || - form_kind == DW_Form_Data8 || form_kind == DW_Form_Block); - DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_Const, form_value); - line_file_out->modify_time = attrib_value.v[0]; + case DW_ExtOpcode_DefineFile: { + String8 file_name = {0}; + U64 dir_index = 0; + U64 modify_time = 0; + U64 file_size = 0; + + unit_cursor += str8_deserial_read_cstr(unit_data, unit_cursor, &file_name); + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &dir_index); + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &modify_time); + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &file_size); + + // TODO(rjf): Not fully implemented. By the DWARF V4 spec, the above is + // all that needs to be parsed, but the rest of the work that needs to + // happen here---allowing this file to be used by further opcodes---is + // not implemented. + // + // See the DWARF V4 spec (June 10, 2010), page 122. + error = 1; + AssertAlways(!"UNHANDLED DEFINE FILE!!!"); } break; - case DW_LNCT_Size: - { - Assert(form_kind == DW_Form_UData || form_kind == DW_Form_Data1 || - form_kind == DW_Form_Data2 || form_kind == DW_Form_Data4 || - form_kind == DW_Form_Data8); - DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_Block, form_value); - line_file_out->file_size = attrib_value.v[0]; + case DW_ExtOpcode_SetDiscriminator: { + U64 v = 0; + unit_cursor += str8_deserial_read_uleb128(unit_data, unit_cursor, &v); + vm_state.discriminator = v; } break; - case DW_LNCT_MD5: - { - Assert(form_kind == DW_Form_Data16); - DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_Block, form_value); - line_file_out->md5_digest[0] = attrib_value.v[0]; - line_file_out->md5_digest[1] = attrib_value.v[1]; - } break; + default: break; + } - default: - { - Assert(DW_LNCT_UserLo < lnct && lnct < DW_LNCT_UserHi); - } break; + unit_cursor = extended_opl; + } break; } } - U64 result = line_off - line_off_start; + + exit:; + return result; } -internal U64 -dw_read_line_vm_header(Arena *arena, - void *line_base, - Rng1U64 line_rng, - U64 line_off, - DW_Mode mode, - DW_SectionArray *sections, - DW_AttribValueResolveParams resolve_params, - String8 compile_dir, - String8 unit_name, - DW_LineVMHeader *header_out) +internal DW_PubStringsTable +dw_v4_pub_strings_table_from_section_kind(Arena *arena, DW_Input *input, DW_SectionKind section_kind) { Temp scratch = scratch_begin(&arena, 1); - MemoryZeroStruct(header_out); + DW_PubStringsTable names_table = {0}; + names_table.size = 16384; + names_table.buckets = push_array(arena, DW_PubStringsBucket*, names_table.size); - //- rjf: parse unit length - U64 unit_length = 0; - U64 unit_length_size = dw_based_range_read_length(line_base, line_rng, line_off, &unit_length); - - header_out->unit_length = unit_length; - header_out->unit_opl = line_off + unit_length + unit_length_size; + String8 section_data = input->sec[section_kind].data; + for(U64 cursor = 0; cursor < section_data.size; ) { - U64 cursor = unit_length_size; - Rng1U64 parse_rng = rng_1u64(line_off, header_out->unit_opl); + U64 unit_length = 0; + U64 unit_length_size = str8_deserial_read_dwarf_packed_size(section_data, cursor, &unit_length); + if (unit_length_size == 0) { + break; + } + cursor += unit_length_size; - //- rjf: parse version and header length - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->version); - - if(header_out->version == DW_Version_5) - { - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->address_size); - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->segment_selector_size); - } - - cursor += dw_based_range_read(line_base, parse_rng, cursor, dw_offset_size_from_mode(mode), &header_out->header_length); - - //- rjf: calculate program offset - header_out->program_off = parse_rng.min + cursor + header_out->header_length; - - //- rjf: parse minimum instruction length - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->min_inst_len); - - //- rjf: parse max ops for instruction - switch(header_out->version) - { - case DW_Version_5: - case DW_Version_4: - { - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->max_ops_for_inst); - Assert(header_out->max_ops_for_inst > 0); - } break; - case DW_Version_3: - case DW_Version_2: - case DW_Version_1: - { - header_out->max_ops_for_inst = 1; - } break; - default: break; - } - - //- rjf: parse rest of program info - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->default_is_stmt); - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->line_base); - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->line_range); - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &header_out->opcode_base); - - //- rjf: calculate opcode length array - header_out->num_opcode_lens = header_out->opcode_base > 0 ? header_out->opcode_base - 1u : 0; - header_out->opcode_lens = dw_based_range_ptr_size(line_base, parse_rng, cursor, header_out->num_opcode_lens * sizeof(header_out->opcode_lens[0])); - cursor += header_out->num_opcode_lens * sizeof(header_out->opcode_lens[0]); - - if(header_out->version == DW_Version_5) - { - //- parse directory names - U8 directory_entry_format_count = 0; - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &directory_entry_format_count); - Assert(directory_entry_format_count == 1); - Rng1U64 *directory_entry_formats = push_array(scratch.arena, Rng1U64, directory_entry_format_count); - for(U8 format_idx = 0; format_idx < directory_entry_format_count; ++format_idx) - { - U64 content_type_code = 0, form_code = 0; - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &content_type_code); - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &form_code); - directory_entry_formats[format_idx] = rng_1u64(content_type_code, form_code); + U64 cursor_opl = Min(cursor + unit_length, section_data.size); + if (cursor >= cursor_opl) { + break; } - U64 directories_count = 0; - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &directories_count); - header_out->dir_table.count = directories_count; - header_out->dir_table.v = push_array(arena, String8, header_out->dir_table.count); - for(U64 dir_idx = 0; dir_idx < directories_count; ++dir_idx) - { - DW_LineFile line_file; - cursor += dw_read_line_file(line_base, - parse_rng, - cursor, - mode, - sections, - resolve_params, - header_out->address_size, - directory_entry_format_count, - directory_entry_formats, - &line_file); - header_out->dir_table.v[dir_idx] = push_str8_copy(arena, line_file.file_name); + DW_Version unit_version = 0; + cursor += str8_deserial_read_struct(section_data, cursor, &unit_version); + if (cursor >= cursor_opl) { + break; } - //- parse file table - U8 file_name_entry_format_count = 0; - cursor += dw_based_range_read_struct(line_base, parse_rng, cursor, &file_name_entry_format_count); - Rng1U64 *file_name_entry_formats = push_array(scratch.arena, Rng1U64, file_name_entry_format_count); - for(U8 format_idx = 0; format_idx < file_name_entry_format_count; ++format_idx) - { - U64 content_type_code = 0, form_code = 0; - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &content_type_code); - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &form_code); - file_name_entry_formats[format_idx] = rng_1u64(content_type_code, form_code); + DW_Format format = DW_FormatFromSize(unit_length); + + U64 debug_info_off = 0; + cursor += str8_deserial_read_dwarf_uint(section_data, cursor, format, &debug_info_off); + if (cursor >= cursor_opl) { + break; } - U64 file_names_count = 0; - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &file_names_count); - header_out->file_table.count = file_names_count; - header_out->file_table.v = push_array(arena, DW_LineFile, header_out->file_table.count); - for(U64 file_idx = 0; file_idx < file_names_count; ++file_idx) - { - cursor += dw_read_line_file(line_base, - parse_rng, - cursor, - mode, - sections, - resolve_params, - header_out->address_size, - file_name_entry_format_count, - file_name_entry_formats, - &header_out->file_table.v[file_idx]); + U64 debug_info_length = 0; + cursor += str8_deserial_read_dwarf_packed_size(section_data, cursor, &debug_info_length); + if (cursor >= cursor_opl) { + break; } - } - else - { - String8List dir_list = {0}; - str8_list_push(scratch.arena, &dir_list, compile_dir); - for (;;) - { - String8 dir = dw_based_range_read_string(line_base, parse_rng, cursor); - cursor += dir.size + 1; - if (dir.size == 0) - { + + U64 off_size = dw_size_from_format(format); + for (; (cursor + off_size) <= cursor_opl;) { + U64 info_off = 0; + U64 info_off_size = str8_deserial_read_dwarf_uint(section_data, cursor, format, &info_off); + cursor += info_off_size; + + if (info_off_size == 0 || info_off == 0) { break; } - str8_list_push(scratch.arena, &dir_list, dir); - } - - DW_LineVMFileList file_list = {0}; - - //- rjf: push 0-index file (compile file) - { - DW_LineVMFileNode *node = push_array(scratch.arena, DW_LineVMFileNode, 1); - node->file.file_name = unit_name; - SLLQueuePush(file_list.first, file_list.last, node); - file_list.node_count += 1; - } - - for(;;) - { - String8 file_name = dw_based_range_read_string(line_base, parse_rng, cursor); - U64 dir_index = 0; - U64 modify_time = 0; - U64 file_size = 0; - cursor += file_name.size + 1; - if(file_name.size == 0) - { - break; - } - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &dir_index); - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &modify_time); - cursor += dw_based_range_read_uleb128(line_base, parse_rng, cursor, &file_size); - DW_LineVMFileNode *node = push_array(scratch.arena, DW_LineVMFileNode, 1); - node->file.file_name = file_name; - node->file.dir_idx = dir_index; - node->file.modify_time = modify_time; - node->file.file_size = file_size; - SLLQueuePush(file_list.first, file_list.last, node); - file_list.node_count += 1; - } - - //- rjf: build dir table - { - header_out->dir_table.count = dir_list.node_count; - header_out->dir_table.v = push_array(arena, String8, header_out->dir_table.count); + String8 string = {0}; + cursor += str8_deserial_read_cstr(section_data, cursor, &string); - String8Node *n = dir_list.first; - for(U64 idx = 0; n != 0 && idx < header_out->dir_table.count; idx += 1, n = n->next) - { - header_out->dir_table.v[idx] = push_str8_copy(arena, n->string); - } - } - - //- rjf: build file table - { - header_out->file_table.count = file_list.node_count; - header_out->file_table.v = push_array(arena, DW_LineFile, header_out->file_table.count); - - U64 file_idx = 0; - DW_LineVMFileNode *file_node = file_list.first; - for(; file_node != 0; file_idx += 1, file_node = file_node->next) - { - header_out->file_table.v[file_idx].file_name = push_str8_copy(arena, file_node->file.file_name); - header_out->file_table.v[file_idx].dir_idx = file_node->file.dir_idx; - header_out->file_table.v[file_idx].modify_time = file_node->file.modify_time; - header_out->file_table.v[file_idx].file_size = file_node->file.file_size; - } + U64 hash = dw_hash_from_string(string); + U64 bucket_idx = hash % names_table.size; + + DW_PubStringsBucket *bucket = push_array(arena, DW_PubStringsBucket, 1); + bucket->next = names_table.buckets[bucket_idx]; + bucket->string = string; + bucket->info_off = info_off; + bucket->cu_info_off = debug_info_off; + names_table.buckets[bucket_idx] = bucket; } } - + scratch_end(scratch); - return cursor; + return names_table; } diff --git a/src/dwarf/dwarf_parse.h b/src/dwarf/dwarf_parse.h index 6cce84b8..99a589d6 100644 --- a/src/dwarf/dwarf_parse.h +++ b/src/dwarf/dwarf_parse.h @@ -4,38 +4,49 @@ #ifndef DWARF_PARSE_H #define DWARF_PARSE_H -// NOTE(rjf): Some rules about the spaces of offsets and ranges: -// -// - Every stored/passed offset is relative to the base of its section. -// - Every stored/passed range has endpoints relative to the base of their section. -// - Upon calling a syms_based_range_* function, these offsets need to be -// converted into range-relative. - -//////////////////////////////// -//~ rjf: Constants - -#define DWARF_VOID_TYPE_ID 0xffffffffffffffffull - -//////////////////////////////// -//~ rjf: Files + External Debug References - -typedef struct DW_ExtDebugRef DW_ExtDebugRef; -struct DW_ExtDebugRef +typedef struct DW_Section { - // NOTE(rjf): .dwo => an external DWARF V5 .dwo file - String8 dwo_path; - U64 dwo_id; -}; + String8 name; + String8 data; + B32 is_dwo; +} DW_Section; -//////////////////////////////// -//~ rjf: Abbrev Table +typedef struct DW_Input +{ + DW_Section sec[DW_Section_Count]; + DW_Section sup[DW_Section_Count]; +} DW_Input; -typedef struct DW_AbbrevTableEntry DW_AbbrevTableEntry; -struct DW_AbbrevTableEntry +typedef struct DW_ListUnit +{ + DW_Version version; + U64 address_size; + U64 segment_selector_size; + U64 entry_size; + String8 entries; +} DW_ListUnit; + +typedef struct DW_ListUnitInput +{ + U64 addr_count; + U64 str_offset_count; + U64 rnglist_count; + U64 loclist_count; + Rng1U64Array addr_ranges; + Rng1U64Array str_offset_ranges; + Rng1U64Array rnglist_ranges; + Rng1U64Array loclist_ranges; + DW_ListUnit *addrs; + DW_ListUnit *str_offsets; + DW_ListUnit *rnglists; + DW_ListUnit *loclists; +} DW_ListUnitInput; + +typedef struct DW_AbbrevTableEntry { U64 id; U64 off; -}; +} DW_AbbrevTableEntry; typedef struct DW_AbbrevTable DW_AbbrevTable; struct DW_AbbrevTable @@ -44,62 +55,6 @@ struct DW_AbbrevTable DW_AbbrevTableEntry *entries; }; -//////////////////////////////// -//~ Sections - -typedef struct DW_Section DW_Section; -struct DW_Section -{ - String8 name; - String8 data; - DW_Mode mode; - B32 is_dwo; -}; - -typedef struct DW_SectionArray DW_SectionArray; -struct DW_SectionArray -{ - DW_Section v[DW_Section_Count]; -}; - -//////////////////////////////// -//~ rjf: Basic Line Info - -typedef struct DW_LineFile DW_LineFile; -struct DW_LineFile -{ - String8 file_name; - U64 dir_idx; - U64 modify_time; - U64 md5_digest[2]; - U64 file_size; -}; - -typedef struct DW_LineVMFileNode DW_LineVMFileNode; -struct DW_LineVMFileNode -{ - DW_LineVMFileNode *next; - DW_LineFile file; -}; - -typedef struct DW_LineVMFileList DW_LineVMFileList; -struct DW_LineVMFileList -{ - U64 node_count; - DW_LineVMFileNode *first; - DW_LineVMFileNode *last; -}; - -typedef struct DW_LineVMFileArray DW_LineVMFileArray; -struct DW_LineVMFileArray -{ - U64 count; - DW_LineFile *v; -}; - -//////////////////////////////// -//~ rjf: Abbrevs - typedef enum DW_AbbrevKind { DW_Abbrev_Null, @@ -108,215 +63,169 @@ typedef enum DW_AbbrevKind DW_Abbrev_AttribSequenceEnd, DW_Abbrev_DIEBegin, DW_Abbrev_DIEEnd, -} -DW_AbbrevKind; +} DW_AbbrevKind; typedef U32 DW_AbbrevFlags; -enum{ - DW_AbbrevFlag_HasImplicitConst = (1<<0), - DW_AbbrevFlag_HasChildren = (1<<1), +enum +{ + DW_AbbrevFlag_HasImplicitConst = (1 << 0), + DW_AbbrevFlag_HasChildren = (1 << 1), }; -typedef struct DW_Abbrev DW_Abbrev; -struct DW_Abbrev +typedef struct DW_Abbrev { DW_AbbrevKind kind; - Rng1U64 abbrev_range; U64 sub_kind; U64 id; U64 const_value; DW_AbbrevFlags flags; -}; +} DW_Abbrev; -//////////////////////////////// -//~ rjf: Attribs - -typedef struct DW_AttribValueResolveParams DW_AttribValueResolveParams; -struct DW_AttribValueResolveParams +typedef union DW_Form { - DW_Version version; - DW_Language language; - U64 addr_size; // NOTE(rjf): size in bytes of containing compilation unit's addresses - U64 containing_unit_info_off; // NOTE(rjf): containing compilation unit's offset into the .debug_info section - U64 debug_addrs_base; // NOTE(rjf): containing compilation unit's offset into the .debug_addrs section (DWARF V5 ONLY) - U64 debug_rnglists_base; // NOTE(rjf): containing compilation unit's offset into the .debug_rnglists section (DWARF V5 ONLY) - U64 debug_str_offs_base; // NOTE(rjf): containing compilation unit's offset into the .debug_str_offsets section (DWARF V5 ONLY) - U64 debug_loclists_base; // NOTE(rjf): containing compilation unit's offset into the .debug_loclists section (DWARF V5 ONLY) -}; + String8 addr; + String8 block; + String8 data; + String8 string; + String8 exprloc; + B8 flag; + S64 sdata; + U64 udata; + U64 sec_offset; + U64 ref; + U64 strp_sup; + U64 xval; + U64 addrx; + U64 strx; + U64 rnglistx; + U64 ptr; + U64 implicit_const; +} DW_Form; -typedef struct DW_AttribValue DW_AttribValue; -struct DW_AttribValue -{ - DW_SectionKind section; - U64 v[2]; -}; - -typedef struct DW_Attrib DW_Attrib; -struct DW_Attrib +typedef struct DW_Attrib { U64 info_off; + U64 abbrev_off; U64 abbrev_id; DW_AttribKind attrib_kind; DW_FormKind form_kind; - DW_AttribClass value_class; - DW_AttribValue form_value; -}; + DW_Form form; +} DW_Attrib; -typedef struct DW_AttribArray DW_AttribArray; -struct DW_AttribArray +typedef struct DW_AttribNode { - DW_Attrib *v; - U64 count; -}; + struct DW_AttribNode *next; + DW_Attrib v; +} DW_AttribNode; -typedef struct DW_AttribNode DW_AttribNode; -struct DW_AttribNode -{ - DW_AttribNode *next; - DW_Attrib attrib; -}; - -typedef struct DW_AttribList DW_AttribList; -struct DW_AttribList +typedef struct DW_AttribList { DW_AttribNode *first; DW_AttribNode *last; U64 count; -}; +} DW_AttribList; -typedef struct DW_AttribListParseResult DW_AttribListParseResult; -struct DW_AttribListParseResult +typedef struct DW_Tag { - DW_AttribList attribs; - U64 max_info_off; - U64 max_abbrev_off; -}; - -//////////////////////////////// -//~ rjf: Compilation Units + Accelerators - -typedef struct DW_CompRoot DW_CompRoot; -struct DW_CompRoot -{ - // NOTE(rjf): Header Data - U64 size; - DW_CompUnitKind kind; - DW_Version version; - U64 address_size; - U64 abbrev_off; - U64 info_off; - Rng1U64 tags_info_range; - DW_AbbrevTable abbrev_table; - - // NOTE(rjf): [parsed from DWARF attributes] Offsets For More Info (DWARF V5 ONLY) - U64 rnglist_base; // NOTE(rjf): Offset into the .debug_rnglists section where this comp unit's data is. - U64 loclist_base; // NOTE(rjf): Offset into the .debug_loclists section where this comp unit's data is. - U64 addrs_base; // NOTE(rjf): Offset into the .debug_addr section where this comp unit's data is. - U64 stroffs_base; // NOTE(rjf): Offset into the .debug_str_offsets section where this comp unit's data is. - - // NOTE(rjf): [parsed from DWARF attributes] General Info - String8 name; - String8 producer; - String8 compile_dir; - String8 external_dwo_name; - U64 dwo_id; - DW_Language language; - U64 name_case; - B32 use_utf8; - U64 line_off; - U64 low_pc; - U64 high_pc; - DW_AttribValue ranges_attrib_value; - U64 base_addr; -}; - -//////////////////////////////// -//~ rjf: Tags - -typedef struct DW_Tag DW_Tag; -struct DW_Tag -{ - DW_Tag *next_sibling; - DW_Tag *first_child; - DW_Tag *last_child; - DW_Tag *parent; - Rng1U64 info_range; - Rng1U64 abbrev_range; B32 has_children; U64 abbrev_id; DW_TagKind kind; - U64 attribs_info_off; - U64 attribs_abbrev_off; DW_AttribList attribs; -}; + U64 info_off; +} DW_Tag; -typedef U32 DW_TagStubFlags; -enum +typedef struct DW_TagNode { - DW_TagStubFlag_HasObjectPointerArg = (1<<0), - DW_TagStubFlag_HasLocation = (1<<1), - DW_TagStubFlag_HasExternal = (1<<2), - DW_TagStubFlag_HasSpecification = (1<<3), -}; + DW_Tag tag; + struct DW_TagNode *sibling; + struct DW_TagNode *first_child; + struct DW_TagNode *last_child; +} DW_TagNode; -typedef struct DW_TagStub DW_TagStub; -struct DW_TagStub +typedef struct DW_Loc { - U64 info_off; - DW_TagKind kind; - DW_TagStubFlags flags; - U64 children_info_off; - U64 attribs_info_off; - U64 attribs_abbrev_off; - - // NOTE(rjf): DW_Attrib_Specification is tacked onto definitions that - // are filling out more info about a "prototype". That attribute is a reference - // that points back at the declaration tag. The declaration tag has the - // DW_Attrib_Declaration attribute, which is sort of like the reverse - // of that, except there's no reference. So what we're doing here is just storing - // a reference on both, that point back to each other, so it's always easy to - // get from decl => spec, or from spec => decl. - //SYMS_SymbolID ref; - - // NOTE(rjf): DW_Attrib_AbstractOrigin is tacked onto some definitions - // that are used to specify information more specific to inlining, while wanting - // to refer to an "abstract" function DIE, that is not specific to any inline - // sites. The DWARF generator will not duplicate information across these, so - // we will occasionally need to look at an abstract origin to get abstract - // information, like name/linkage-name/etc. - //SYMS_SymbolID abstract_origin; - - U64 _unused_; -}; + Rng1U64 range; + String8 expr; +} DW_Loc; -typedef struct DW_TagStubNode DW_TagStubNode; -struct DW_TagStubNode +typedef struct DW_LocNode { - DW_TagStubNode *next; - DW_TagStub stub; -}; + DW_Loc v; + struct DW_LocNode *next; +} DW_LocNode; -typedef struct DW_TagStubList DW_TagStubList; -struct DW_TagStubList +typedef struct DW_LocList { - DW_TagStubNode *first; - DW_TagStubNode *last; - U64 count; -}; + U64 count; + DW_LocNode *first; + DW_LocNode *last; +} DW_LocList; -//////////////////////////////// -//~ rjf: Line Info VM Types - -typedef struct DW_LineVMHeader DW_LineVMHeader; -struct DW_LineVMHeader +typedef struct DW_CompUnit { - U64 unit_length; - U64 unit_opl; + B32 relaxed; + DW_Ext ext; + DW_CompUnitKind kind; + DW_Version version; + DW_Format format; + U64 address_size; + U64 abbrev_off; + Rng1U64 info_range; + U64 first_tag_info_off; + DW_AbbrevTable abbrev_table; + String8 abbrev_data; + DW_ListUnit *addr_lu; + DW_ListUnit *str_offsets_lu; + DW_ListUnit *rnglists_lu; + DW_ListUnit *loclists_lu; + U64 low_pc; + U64 dwo_id; + DW_Tag tag; + HashTable *tag_ht; +} DW_CompUnit; + +typedef struct DW_TagTree +{ + DW_TagNode *root; + U64 tag_count; +} DW_TagTree; + +typedef struct DW_LineFile +{ + String8 file_name; + U64 dir_idx; + U64 modify_time; + U64 file_size; + U128 md5_digest; + String8 source; +} DW_LineFile; + +typedef struct DW_LineVMFileNode +{ + struct DW_LineVMFileNode *next; + DW_LineFile file; +} DW_LineVMFileNode; + +typedef struct DW_LineVMFileList +{ + U64 node_count; + DW_LineVMFileNode *first; + DW_LineVMFileNode *last; +} DW_LineVMFileList; + +typedef struct DW_LineVMFileArray +{ + U64 count; + DW_LineFile *v; +} DW_LineVMFileArray; + +typedef struct DW_LineVMHeader +{ + Rng1U64 unit_range; DW_Version version; U8 address_size; // Duplicates size from the compilation unit but is needed to support stripped exe that just have .debug_line and .debug_line_str. U8 segment_selector_size; U64 header_length; - U64 program_off; U8 min_inst_len; U8 max_ops_for_inst; U8 default_is_stmt; @@ -325,12 +234,11 @@ struct DW_LineVMHeader U8 opcode_base; U64 num_opcode_lens; U8 *opcode_lens; - String8Array dir_table; + DW_LineVMFileArray dir_table; DW_LineVMFileArray file_table; -}; +} DW_LineVMHeader; -typedef struct DW_LineVMState DW_LineVMState; -struct DW_LineVMState +typedef struct DW_LineVMState { U64 address; // Address of a machine instruction. U32 op_index; // This is used by the VLIW instructions to indicate index of operation inside the instruction. @@ -351,144 +259,195 @@ struct DW_LineVMState // prepare stack for a function. B32 prologue_end; - B32 epilogue_begin; // NOTE(nick): Indicates that "address" points to section where function exits and unwinds stack. - U64 isa; // NOTE(nick): Instruction set that is used. - U64 discriminator; // NOTE(nick): Arbitrary id that indicates to which block these instructions belong. - B32 end_sequence; // NOTE(nick): Indicates that "address" points to the first instruction in the instruction block that follows. - - // NOTE(rjf): it looks like LTO might sometimes zero out high PC and low PCs, causing a - // swath of line info to map to a range starting at 0. This causes overlapping ranges - // which we do not want to report. So this B32 will turn on emission. - B32 busted_seq; -}; + B32 epilogue_begin; // Indicates that "address" points to section where function exits and unwinds stack. + U64 isa; // Instruction set that is used. + U64 discriminator; // Arbitrary id that indicates to which block these instructions belong. + B32 end_sequence; // Indicates that "address" points to the first instruction in the instruction block that follows. +} DW_LineVMState; -typedef struct DW_Line DW_Line; -struct DW_Line +typedef struct DW_Line { U64 file_index; U32 line; U32 column; - U64 voff; -}; + U64 address; +} DW_Line; -typedef struct DW_LineNode DW_LineNode; -struct DW_LineNode +typedef struct DW_LineNode { - DW_LineNode *next; - DW_Line v; -}; + struct DW_LineNode *next; + DW_Line v; +} DW_LineNode; -typedef struct DW_LineSeqNode DW_LineSeqNode; -struct DW_LineSeqNode +typedef struct DW_LineSeqNode { - DW_LineSeqNode *next; - U64 count; - DW_LineNode *first; - DW_LineNode *last; -}; + struct DW_LineSeqNode *next; + U64 count; + DW_LineNode *first; + DW_LineNode *last; +} DW_LineSeqNode; -typedef struct DW_LineTableParseResult DW_LineTableParseResult; -struct DW_LineTableParseResult +typedef struct DW_LineTableParseResult { + DW_LineVMHeader vm_header; U64 seq_count; DW_LineSeqNode *first_seq; DW_LineSeqNode *last_seq; -}; +} DW_LineTableParseResult; //////////////////////////////// -//~ rjf: .debug_pubnames and .debug_pubtypes +// .debug_pubnames and .debug_pubtypes -typedef struct DW_PubStringsBucket DW_PubStringsBucket; -struct DW_PubStringsBucket +typedef struct DW_PubStringsBucket { - DW_PubStringsBucket *next; - String8 string; - U64 info_off; - U64 cu_info_off; -}; + struct DW_PubStringsBucket *next; + String8 string; + U64 info_off; + U64 cu_info_off; +} DW_PubStringsBucket; -typedef struct DW_PubStringsTable DW_PubStringsTable; -struct DW_PubStringsTable +typedef struct DW_PubStringsTable { - U64 size; + U64 size; DW_PubStringsBucket **buckets; -}; +} DW_PubStringsTable; -//////////////////////////////// -//~ rjf: Basic Helpers +typedef struct DW_Reference +{ + DW_CompUnit *cu; + U64 info_off; +} DW_Reference; + +// hasher internal U64 dw_hash_from_string(String8 string); -//////////////////////////////// -//~ Specific Based Range Helpers +// deserial helpers -#define dw_based_range_read_struct(base, range, offset, out) dw_based_range_read(base, range, offset, sizeof(*out), out) +internal U64 str8_deserial_read_dwarf_packed_size(String8 string, U64 off, U64 *size_out); +internal U64 str8_deserial_read_dwarf_uint (String8 string, U64 off, DW_Format format, U64 *uint_out); +internal U64 str8_deserial_read_uleb128 (String8 string, U64 off, U64 *value_out); +internal U64 str8_deserial_read_sleb128 (String8 string, U64 off, S64 *value_out); +internal U64 str8_deserial_read_uleb128_array(Arena *arena, String8 string, U64 off, U64 count, U64 **arr_out); +internal U64 str8_deserial_read_sleb128_array(Arena *arena, String8 string, U64 off, U64 count, S64 **arr_out); -internal U64 dw_based_range_read(void *base, Rng1U64 range, U64 offset, U64 size, void *out); -internal String8 dw_based_range_read_string(void *base, Rng1U64 range, U64 offset); -internal void* dw_based_range_ptr(void *base, Rng1U64 range, U64 offset); -internal void* dw_based_range_ptr_size(void *base, Rng1U64 range, U64 offset, U64 size); -internal U64 dw_based_range_read_uleb128(void *base, Rng1U64 range, U64 offset, U64 *out_value); -internal U64 dw_based_range_read_sleb128(void *base, Rng1U64 range, U64 offset, S64 *out_value); -internal U64 dw_based_range_read_length(void *base, Rng1U64 range, U64 offset, U64 *out_value); -internal U64 dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev); -internal U64 dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev); -internal U64 dw_based_range_read_attrib_form_value(void *base, Rng1U64 range, U64 offset, DW_Mode mode, U64 address_size, DW_FormKind form_kind, U64 implicit_const, DW_AttribValue *form_value_out); +internal Rng1U64List dw_unit_ranges_from_data(Arena *arena, String8 data); -internal DW_Mode dw_mode_from_sec(DW_SectionArray *sections, DW_SectionKind kind); -internal B32 dw_sec_is_present(DW_SectionArray *sections, DW_SectionKind kind); -internal void* dw_base_from_sec(DW_SectionArray *sections, DW_SectionKind kind); -internal Rng1U64 dw_range_from_sec(DW_SectionArray *sections, DW_SectionKind kind); +// list units -//////////////////////////////// -//~ rjf: Abbrev Table +internal U64 dw_read_list_unit_header_addr (String8 unit_data, DW_ListUnit *lu_out); +internal U64 dw_read_list_unit_header_str_offsets(String8 unit_data, DW_ListUnit *lu_out); +internal U64 dw_read_list_unit_header_list (String8 unit_data, DW_ListUnit *lu_out); -internal DW_AbbrevTable dw_make_abbrev_table(Arena *arena, DW_SectionArray *sections, U64 start_abbrev_off); +internal DW_ListUnitInput dw_list_unit_input_from_input(Arena *arena, DW_Input *input); + +internal U64 dw_offset_from_list_unit(DW_ListUnit *lu, U64 index); +internal U64 dw_addr_from_list_unit (DW_ListUnit *lu, U64 index); + +// abbrev table + +internal U64 dw_read_abbrev_tag (String8 data, U64 offset, DW_Abbrev *out_abbrev); +internal U64 dw_read_abbrev_attrib(String8 data, U64 offset, DW_Abbrev *out_abbrev); +internal DW_AbbrevTable dw_make_abbrev_table(Arena *arena, String8 abbrev_data, U64 start_abbrev_off); internal U64 dw_abbrev_offset_from_abbrev_id(DW_AbbrevTable table, U64 abbrev_id); -//////////////////////////////// -//~ rjf: Miscellaneous DWARF Section Parsing +// form and tag -//- rjf: .debug_ranges (DWARF V4) -internal Rng1U64List dw_v4_range_list_from_range_offset(Arena *arena, DW_SectionArray *sections, U64 addr_size, U64 comp_unit_base_addr, U64 range_off); +internal U64 dw_read_form(String8 data, U64 off, DW_Version version, DW_Format unit_format, U64 address_size, DW_FormKind form_kind, U64 implicit_const, DW_Form *form_out); +internal U64 dw_read_tag (Arena *arena, String8 tag_data, U64 tag_off, U64 tag_base, DW_AbbrevTable abbrev_table, String8 abbrev_data, DW_Version version, DW_Format unit_format, U64 address_size, DW_Tag *tag_out); +internal U64 dw_read_tag_cu(Arena *arena, DW_Input *input, DW_CompUnit *cu, U64 info_off, DW_Tag *tag_out); -//- rjf: .debug_pubtypes + .debug_pubnames (DWARF V4) -internal DW_PubStringsTable dw_v4_pub_strings_table_from_section_kind(Arena *arena, DW_SectionArray *sections, DW_SectionKind section_kind); +// attrib interp -//- rjf: .debug_str_offsets (DWARF V5) -internal U64 dw_v5_offset_from_offs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index); +internal U64 dw_interp_sec_offset(DW_FormKind form_kind, DW_Form form); +internal String8 dw_interp_exprloc (DW_FormKind form_kind, DW_Form form); +internal U128 dw_interp_const_u128(DW_FormKind form_kind, DW_Form form); +internal U64 dw_interp_const_u64 (DW_FormKind form_kind, DW_Form form); +internal U32 dw_interp_const_u32 (DW_FormKind form_kind, DW_Form form); +internal S64 dw_interp_const_s64 (DW_FormKind form_kind, DW_Form form); +internal S32 dw_interp_const_s32 (DW_FormKind form_kind, DW_Form form); +internal B32 dw_interp_flag (DW_FormKind form_kind, DW_Form form); +internal U64 dw_interp_address (U64 address_size, U64 base_addr, DW_ListUnit *addr_xlist, DW_FormKind form_kind, DW_Form form); +internal String8 dw_interp_block (DW_Input *input, DW_CompUnit *cu, DW_FormKind form_kind, DW_Form form); +internal String8 dw_interp_string (DW_Input *input, DW_Format unit_format, DW_ListUnit *str_offsets, DW_FormKind form_kind, DW_Form form); +internal String8 dw_interp_line_ptr (DW_Input *input, DW_FormKind form_kind, DW_Form form); +internal DW_LineFile * dw_interp_file (DW_LineVMHeader *line_vm, DW_FormKind form_kind, DW_Form form); +internal DW_Reference dw_interp_ref (DW_Input *input, DW_CompUnit *cu, DW_FormKind form_kind, DW_Form form); +internal DW_LocList dw_interp_loclist (Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_FormKind form_kind, DW_Form form); +internal Rng1U64List dw_interp_rnglist (Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_FormKind form_kind, DW_Form form); -//- rjf: .debug_addr (DWARF V5) -internal U64 dw_v5_addr_from_addrs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index); +internal String8 dw_exprloc_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal U128 dw_const_u128_from_attrib_ptr(DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal U64 dw_const_u64_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal U32 dw_const_u32_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal S64 dw_const_s64_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal S32 dw_const_s32_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal B32 dw_flag_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal U64 dw_address_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal String8 dw_block_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal String8 dw_string_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal String8 dw_line_ptr_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal DW_LineFile * dw_file_from_attrib_ptr (DW_CompUnit *cu, DW_LineVMHeader *line_vm, DW_Attrib *attrib); +internal DW_Reference dw_ref_from_attrib_ptr (DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal DW_LocList dw_loclist_from_attrib_ptr (Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); +internal Rng1U64List dw_rnglist_from_attrib_ptr (Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_Attrib *attrib); -//- rjf: .debug_rnglists parsing (DWARF V5) -internal U64 dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(DW_SectionArray *sections, DW_SectionKind section_kind, U64 base, U64 index); -internal Rng1U64List dw_v5_range_list_from_rnglist_offset(Arena *arena, DW_SectionArray *sections, DW_SectionKind section, U64 addr_size, U64 addr_section_base, U64 offset); +internal String8 dw_exprloc_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal U128 dw_const_u128_from_attrib(DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal U64 dw_const_u64_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal U32 dw_const_u32_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal B32 dw_flag_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal U64 dw_address_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal String8 dw_block_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal String8 dw_string_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal String8 dw_line_ptr_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal String8 dw_line_ptr_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal DW_LineFile * dw_file_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_LineVMHeader *line_vm, DW_Tag tag, DW_AttribKind kind); +internal DW_Reference dw_ref_from_attrib (DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal DW_LocList dw_loclist_from_attrib (Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); +internal Rng1U64List dw_rnglist_from_attrib (Arena *arena, DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind); -//////////////////////////////// -//~ rjf: Attrib Value Parsing +// compile unit -internal DW_AttribValueResolveParams dw_attrib_value_resolve_params_from_comp_root(DW_CompRoot *root); -internal DW_AttribValue dw_attrib_value_from_form_value(DW_SectionArray *sections, DW_AttribValueResolveParams resolve_params, DW_FormKind form_kind, DW_AttribClass value_class, DW_AttribValue form_value); -internal String8 dw_string_from_attrib_value(DW_SectionArray *sections, DW_AttribValue value); -internal Rng1U64List dw_range_list_from_high_low_pc_and_ranges_attrib_value(Arena *arena, DW_SectionArray *sections, U64 address_size, U64 comp_unit_base_addr, U64 addr_section_base, U64 low_pc, U64 high_pc, DW_AttribValue ranges_value); +internal DW_CompUnit dw_cu_from_info_off(Arena *arena, DW_Input *input, DW_ListUnitInput lu_input, U64 offset, B32 relaxed); +internal DW_TagTree dw_tag_tree_from_cu(Arena *arena, DW_Input *input, DW_CompUnit *cu); +internal HashTable * dw_make_tag_hash_table(Arena *arena, DW_TagTree tag_tree); +internal DW_TagNode * dw_tag_node_from_info_off(DW_CompUnit *cu, U64 info_off); -//////////////////////////////// -//~ rjf: Tag Parsing +// line info -internal DW_AttribListParseResult dw_parse_attrib_list_from_info_abbrev_offsets(Arena *arena, DW_SectionArray *sections, DW_Version ver, DW_Ext ext, DW_Language lang, U64 address_size, U64 info_off, U64 abbrev_off, B32 relaxed); -internal DW_Tag* dw_tag_from_info_offset(Arena *arena, DW_SectionArray *sections, DW_AbbrevTable abbrev_table, DW_Version ver, DW_Ext ext, DW_Language lang, U64 address_size, U64 info_offset, B32 relaxed); -internal DW_TagStub dw_stub_from_tag(DW_SectionArray *sections, DW_AttribValueResolveParams resolve_params, DW_Tag *tag); +internal U64 dw_read_line_file(String8 line_data, + U64 line_off, + DW_Input *input, + DW_Version unit_version, + DW_Format unit_format, + DW_Ext ext, + U64 address_size, + DW_ListUnit *str_offsets, + U64 enc_count, + U64 *enc_arr, + DW_LineFile *line_file_out); +internal U64 dw_read_line_vm_header(Arena *arena, + String8 line_data, + U64 line_off, + DW_Input *input, + String8 cu_dir, + String8 cu_name, + U8 cu_address_size, + DW_ListUnit *cu_str_offsets, + DW_LineVMHeader *header_out); -//- rjf: line info -internal void dw_line_vm_reset(DW_LineVMState *state, B32 default_is_stmt); -internal void dw_line_vm_advance(DW_LineVMState *state, U64 advance, U64 min_inst_len, U64 max_ops_for_inst); +internal void dw_line_vm_reset(DW_LineVMState *state, B32 default_is_stmt); +internal void dw_line_vm_advance(DW_LineVMState *state, U64 advance, U64 min_inst_len, U64 max_ops_for_inst); +internal DW_LineSeqNode * dw_push_line_seq(Arena* arena, DW_LineTableParseResult *parsed_tbl); +internal DW_LineNode * dw_push_line(Arena *arena, DW_LineTableParseResult *tbl, DW_LineVMState *vm_state, B32 start_of_sequence); +internal String8 dw_path_from_file(Arena *arena, DW_LineVMHeader *vm, DW_LineFile *file); +internal String8 dw_path_from_file_idx(Arena *arena, DW_LineVMHeader *vm, U64 file_idx); -internal DW_LineSeqNode* dw_push_line_seq(Arena* arena, DW_LineTableParseResult *parsed_tbl); -internal DW_LineNode* dw_push_line(Arena *arena, DW_LineTableParseResult *tbl, DW_LineVMState *vm_state, B32 start_of_sequence); -internal DW_LineTableParseResult dw_parsed_line_table_from_comp_root(Arena *arena, DW_SectionArray *sections, DW_CompRoot *root); -internal U64 dw_read_line_file(void *line_base, Rng1U64 line_rng, U64 line_off, DW_Mode mode, DW_SectionArray *sections, DW_AttribValueResolveParams resolve_params, U8 address_size, U64 format_count, Rng1U64 *formats, DW_LineFile *line_file_out); -internal U64 dw_read_line_vm_header(Arena *arena, void *line_base, Rng1U64 line_rng, U64 line_off, DW_Mode mode, DW_SectionArray *sections, DW_AttribValueResolveParams resolve_params, String8 compile_dir, String8 unit_name, DW_LineVMHeader *header_out); +internal DW_LineTableParseResult dw_parsed_line_table_from_data(Arena *arena, String8 unit_data, DW_Input *input, String8 cu_dir, String8 cu_name, U8 cu_address_size, DW_ListUnit *cu_str_offsets); + +// helper for .debug_pubtypes and .debug_pubnames + +internal DW_PubStringsTable dw_v4_pub_strings_table_from_section_kind(Arena *arena, DW_Input *input, DW_SectionKind section_kind); #endif // DWARF_PARSE_H - diff --git a/src/dwarf/dwarf_unwind.c b/src/dwarf/dwarf_unwind.c index 4ca39ce9..84931716 100644 --- a/src/dwarf/dwarf_unwind.c +++ b/src/dwarf/dwarf_unwind.c @@ -410,6 +410,8 @@ dw_unwind_parse_pointer_x64(void *frame_base, Rng1U64 frame_range, DW_EhPtrCtx * internal void dw_unwind_parse_cie_x64(void *base, Rng1U64 range, DW_EhPtrCtx *ptr_ctx, U64 off, DW_CIEUnpacked *cie_out) { + NotImplemented; +#if 0 MemoryZeroStruct(cie_out); // get version @@ -521,6 +523,7 @@ dw_unwind_parse_cie_x64(void *base, Rng1U64 range, DW_EhPtrCtx *ptr_ctx, U64 off cie_out->cfi_range.min = cfi_off; cie_out->cfi_range.max = cfi_off + cfi_size; } +#endif } internal void diff --git a/src/linker/hash_table.c b/src/linker/hash_table.c index 219ac87c..ae70ab2a 100644 --- a/src/linker/hash_table.c +++ b/src/linker/hash_table.c @@ -187,6 +187,13 @@ hash_table_search_u64(HashTable *ht, U64 key_u64) return 0; } +internal void * +hash_table_search_u64_raw(HashTable *ht, U64 key_u64) +{ + KeyValuePair *kv = hash_table_search_u64(ht, key_u64); + return kv ? kv->value_raw : 0; +} + internal KeyValuePair * hash_table_search_path(HashTable *ht, String8 path) { diff --git a/src/rdi_from_dwarf/rdi_from_dwarf.c b/src/rdi_from_dwarf/rdi_from_dwarf.c index bfb70be2..3d4dcba8 100644 --- a/src/rdi_from_dwarf/rdi_from_dwarf.c +++ b/src/rdi_from_dwarf/rdi_from_dwarf.c @@ -1,894 +1,1917 @@ // Copyright (c) 2024 Epic Games Tools // Licensed under the MIT license (https://opensource.org/license/mit/) -//////////////////////////////// -//~ rjf: Build Options - -#define BUILD_TITLE "rdi_from_dwarf" -#define BUILD_CONSOLE_INTERFACE 1 - -//////////////////////////////// -//~ rjf: Includes - -//- rjf: [lib] -#include "lib_rdi_format/rdi_format.h" -#include "lib_rdi_format/rdi_format.c" -#include "third_party/rad_lzb_simple/rad_lzb_simple.h" -#include "third_party/rad_lzb_simple/rad_lzb_simple.c" - -//- rjf: [h] -#include "base/base_inc.h" -#include "os/os_inc.h" -#include "rdi_make/rdi_make_local.h" -#include "rdi_elf.h" -#include "rdi_dwarf.h" -#include "rdi_dwarf_stringize.h" -#include "rdi_from_dwarf.h" - -//- rjf: [c] -#include "base/base_inc.c" -#include "os/os_inc.c" -#include "rdi_make/rdi_make_local.c" -#include "rdi_elf.c" -#include "rdi_dwarf.c" -#include "rdi_dwarf_stringize.c" - -// TODO(allen): -// [ ] need sample data for .debug_names - -//////////////////////////////// -//~ Program Parameters Parser - -static DWARFCONV_Params* -dwarf_convert_params_from_cmd_line(Arena *arena, CmdLine *cmdline){ - DWARFCONV_Params *result = push_array(arena, DWARFCONV_Params, 1); - result->unit_idx_max = ~0ull; - - // get input pdb - { - String8 input_name = cmd_line_string(cmdline, str8_lit("elf")); - if (input_name.size == 0){ - str8_list_push(arena, &result->errors, - str8_lit("missing required parameter '--elf:'")); - } - - if (input_name.size > 0){ - String8 input_data = os_data_from_file_path(arena, input_name); - - if (input_data.size == 0){ - str8_list_pushf(arena, &result->errors, - "could not load input file '%.*s'", str8_varg(input_name)); - } - - if (input_data.size != 0){ - result->input_elf_name = input_name; - result->input_elf_data = input_data; - } - } - } - - // get output name - { - result->output_name = cmd_line_string(cmdline, str8_lit("out")); - } - - // error options - if (cmd_line_has_flag(cmdline, str8_lit("hide_errors"))){ - String8List vals = cmd_line_strings(cmdline, str8_lit("hide_errors")); - - // if no values - set all to hidden - if (vals.node_count == 0){ - B8 *ptr = (B8*)&result->hide_errors; - B8 *opl = ptr + sizeof(result->hide_errors); - for (;ptr < opl; ptr += 1){ - *ptr = 1; - } - } - - // for each explicit value set the corresponding flag to hidden - for (String8Node *node = vals.first; - node != 0; - node = node->next){ - if (str8_match(node->string, str8_lit("input"), 0)){ - result->hide_errors.input = 1; - } - } - - } - - // unit idx selector - if (cmd_line_has_flag(cmdline, str8_lit("unit_idx"))){ - String8List vals = cmd_line_strings(cmdline, str8_lit("unit_idx")); - - // single value unit index - if (vals.node_count == 1){ - U64 idx = u64_from_str8(vals.first->string, 10); - result->unit_idx_min = idx; - result->unit_idx_max = idx; - } - - // range value unit index - else if (vals.node_count >= 2){ - U64 idx_a = u64_from_str8(vals.first->string, 10); - U64 idx_b = u64_from_str8(vals.first->next->string, 10); - result->unit_idx_min = Min(idx_a, idx_b); - result->unit_idx_max = Max(idx_a, idx_b); - } - } - - // dump options - if (cmd_line_has_flag(cmdline, str8_lit("dump"))){ - result->dump = 1; - - String8List vals = cmd_line_strings(cmdline, str8_lit("dump")); - if (vals.first == 0){ - B8 *ptr = &result->dump__first; - for (; ptr < &result->dump__last; ptr += 1){ - *ptr = 1; - } - } - else{ - for (String8Node *node = vals.first; - node != 0; - node = node->next){ - if (str8_match(node->string, str8_lit("header"), 0)){ - result->dump_header = 1; - } - else if (str8_match(node->string, str8_lit("sections"), 0)){ - result->dump_sections = 1; - } - else if (str8_match(node->string, str8_lit("segments"), 0)){ - result->dump_segments = 1; - } - else if (str8_match(node->string, str8_lit("symtab"), 0)){ - result->dump_symtab = 1; - } - else if (str8_match(node->string, str8_lit("dynsym"), 0)){ - result->dump_dynsym = 1; - } - else if (str8_match(node->string, str8_lit("debug_sections"), 0)){ - result->dump_debug_sections = 1; - } - else if (str8_match(node->string, str8_lit("debug_info"), 0)){ - result->dump_debug_info = 1; - } - else if (str8_match(node->string, str8_lit("debug_abbrev"), 0)){ - result->dump_debug_abbrev = 1; - } - else if (str8_match(node->string, str8_lit("debug_pubnames"), 0)){ - result->dump_debug_pubnames = 1; - } - else if (str8_match(node->string, str8_lit("debug_pubtypes"), 0)){ - result->dump_debug_pubtypes = 1; - } - else if (str8_match(node->string, str8_lit("debug_names"), 0)){ - result->dump_debug_names = 1; - } - else if (str8_match(node->string, str8_lit("debug_aranges"), 0)){ - result->dump_debug_aranges = 1; - } - else if (str8_match(node->string, str8_lit("debug_addr"), 0)){ - result->dump_debug_addr = 1; - } - } - } - } - - return(result); -} - -//////////////////////////////// -//~ Entry Point - -static void -dump_symtab(Arena *arena, String8List *out, ELF_SymArray *symbols, String8 strtab, - U32 indent){ - static char spaces[] = " "; - - U8 *str_first = strtab.str; - U8 *str_opl = strtab.str + strtab.size; - - ELF_Sym64 *symbol = symbols->symbols; - U64 count = symbols->count; - for (U64 i = 0; i < count; i += 1, symbol += 1){ - U8 *name_first = str_first + symbol->st_name; - U8 *name_opl = name_first; - for (;name_opl < str_opl && *name_opl != 0;) name_opl += 1; - String8 name = str8_range(name_first, name_opl); - - ELF_SymbolBinding binding = ELF_SymBindingFromInfo(symbol->st_info); - String8 binding_string = elf_string_from_symbol_binding(binding); - - ELF_SymbolType type = ELF_SymTypeFromInfo(symbol->st_info); - String8 type_string = elf_string_from_symbol_type(type); - - ELF_SymbolVisibility vis = ELF_SymVisibilityFromOther(symbol->st_other); - String8 vis_string = elf_string_from_symbol_visibility(vis); - - str8_list_pushf(arena, out, - "%.*ssymbol[%5llu] %6.*s %7.*s %9.*s 0x%08llx size=%-5llu sec=%-5u " - "%.*s\n", - indent, spaces, i, - str8_varg(binding_string), str8_varg(type_string), - str8_varg(vis_string), - symbol->st_value, symbol->st_size, - symbol->st_shndx, str8_varg(name)); - } -} - -#if 0 -static void -dump_entry_tree(Arena *arena, String8List *out, - DWARF_Parsed *dwarf, DWARF_InfoUnit *unit, - DWARF_InfoEntry *entry, U32 indent){ - static char spaces[] = " "; - - DWARF_AbbrevDecl *abbrev_decl = entry->abbrev_decl; - - // tag - DWARF_Tag tag = abbrev_decl->tag; - String8 tag_string = dwarf_string_from_tag(tag); - str8_list_pushf(arena, out, "%.*sentry(@%llx) TAG %.*s\n", - indent, spaces, entry->info_offset, str8_varg(tag_string)); - - // attributes - U32 attrib_count = abbrev_decl->attrib_count; - DWARF_AbbrevAttribSpec *attrib_spec = abbrev_decl->attrib_specs; - DWARF_InfoAttribVal *attrib_val = entry->attrib_vals; - for (U32 i = 0; i < attrib_count; i += 1, attrib_spec += 1, attrib_val += 1){ - // attribute name - DWARF_AttributeName name = attrib_spec->name; - String8 name_string = dwarf_string_from_attribute_name(name); - str8_list_pushf(arena, out, "%.*sATTR %.*s ", indent + 4, spaces, str8_varg(name_string)); - - // attribute value - switch (attrib_spec->form){ - default: - { - String8 form_string = dwarf_string_from_attribute_form(attrib_spec->form); - str8_list_pushf(arena, out, " {%llu, 0x%p}\n", - str8_varg(form_string), attrib_val->val, attrib_val->dataptr); - }break; - - case DWARF_AttributeForm_strp: - { - String8 str = {0}; - - String8 data = dwarf->debug_data[DWARF_SectionCode_Str]; - U64 off = attrib_val->val; - if (off < data.size){ - U8 *start = data.str + off; - U8 *opl = data.str + data.size; - U8 *ptr = start; - for (;ptr < opl && *ptr != 0;) ptr += 1; - str = str8_range(start, ptr); - } - - str8_list_pushf(arena, out, "'%.*s'\n", str8_varg(str)); - }break; - - case DWARF_AttributeForm_sec_offset: - { - DWARF_AttributeClassFlags attr_classes1 = dwarf_attribute_class_from_name(name); - DWARF_AttributeClassFlags attr_classes2 = DWARF_AttributeClassFlag_sec_offset_classes; - DWARF_AttributeClassFlags attr_classes = attr_classes1&attr_classes2; - - DWARF_SectionCode sec_code = DWARF_SectionCode_Null; - if (unit->dwarf_version == 5){ - switch (attr_classes){ - case DWARF_AttributeClassFlag_addrptr: sec_code = DWARF_SectionCode_Addr; break; - case DWARF_AttributeClassFlag_lineptr: sec_code = DWARF_SectionCode_Line; break; - case DWARF_AttributeClassFlag_loclist: sec_code = DWARF_SectionCode_LocLists; break; - case DWARF_AttributeClassFlag_loclistsptr: sec_code = DWARF_SectionCode_LocLists; break; - case DWARF_AttributeClassFlag_macptr: sec_code = DWARF_SectionCode_Macro; break; - case DWARF_AttributeClassFlag_rnglist: sec_code = DWARF_SectionCode_RngLists; break; - case DWARF_AttributeClassFlag_rnglistsptr: sec_code = DWARF_SectionCode_RngLists; break; - case DWARF_AttributeClassFlag_stroffsetsptr: sec_code = DWARF_SectionCode_StrOffsets; break; - } - } - else if (unit->dwarf_version == 4){ - switch (attr_classes){ - case DWARF_AttributeClassFlag_lineptr: sec_code = DWARF_SectionCode_Line; break; - case DWARF_AttributeClassFlag_loclist: sec_code = DWARF_SectionCode_Loc; break; - case DWARF_AttributeClassFlag_macptr: sec_code = DWARF_SectionCode_MacInfo; break; - case DWARF_AttributeClassFlag_rnglist: sec_code = DWARF_SectionCode_Ranges; break; - } - } - - String8 sec_name = dwarf_name_from_debug_section(dwarf, sec_code); - str8_list_pushf(arena, out, "sec(%.*s) + %llu\n", str8_varg(sec_name), attrib_val->val); - }break; - - case DWARF_AttributeForm_ref1: - case DWARF_AttributeForm_ref2: - case DWARF_AttributeForm_ref4: - case DWARF_AttributeForm_ref8: - case DWARF_AttributeForm_ref_udata: - { - str8_list_pushf(arena, out, "entry(@%llx)\n", attrib_val->val); - }break; - - case DWARF_AttributeForm_addr: - { - str8_list_pushf(arena, out, "0x%llx\n", attrib_val->val); - }break; - - case DWARF_AttributeForm_exprloc: - { - str8_list_pushf(arena, out, "expression\n"); - // TODO(allen): dwarf expression dumping - }break; - - case DWARF_AttributeForm_strx1: - case DWARF_AttributeForm_strx2: - case DWARF_AttributeForm_strx3: - case DWARF_AttributeForm_strx4: - { - String8 str = {0}; - - U32 idx = attrib_val->val; - U64 str_offsets_off = unit->str_offsets_base + idx*unit->offset_size; - - String8 str_offsets = dwarf->debug_data[DWARF_SectionCode_StrOffsets]; - if (str_offsets_off + unit->offset_size < str_offsets.size){ - U64 off = 0; - MemoryCopy(&off, str_offsets.str + str_offsets_off, unit->offset_size); - - String8 data = dwarf->debug_data[DWARF_SectionCode_Str]; - if (off < data.size){ - U8 *start = data.str + off; - U8 *opl = data.str + data.size; - U8 *ptr = start; - for (;ptr < opl && *ptr != 0;) ptr += 1; - str = str8_range(start, ptr); - } - } - - str8_list_pushf(arena, out, "'%.*s'\n", str8_varg(str)); - }break; - - case DWARF_AttributeForm_addrx: - case DWARF_AttributeForm_addrx1: - case DWARF_AttributeForm_addrx2: - case DWARF_AttributeForm_addrx3: - case DWARF_AttributeForm_addrx4: - { - U64 address = 0; - - U32 idx = attrib_val->val; - U64 address_off = unit->addr_base + idx*unit->address_size; - - String8 data = dwarf->debug_data[DWARF_SectionCode_Addr]; - if (address_off + unit->address_size < data.size){ - MemoryCopy(&address, data.str + address_off, unit->address_size); - } - - str8_list_pushf(arena, out, "0x%x\n", address); - }break; - - case DWARF_AttributeForm_rnglistx: - { - U64 rnglist_off = unit->rnglists_base + attrib_val->val; - int x = 0; - }break; - - case DWARF_AttributeForm_data1: - case DWARF_AttributeForm_data2: - case DWARF_AttributeForm_data4: - case DWARF_AttributeForm_data8: - case DWARF_AttributeForm_data16: - case DWARF_AttributeForm_udata: - case DWARF_AttributeForm_implicit_const: - case DWARF_AttributeForm_flag: - case DWARF_AttributeForm_flag_present: - { - str8_list_pushf(arena, out, "%llu\n", attrib_val->val); - }break; - - case DWARF_AttributeForm_sdata: - { - str8_list_pushf(arena, out, "%lld\n", (S64)attrib_val->val); - }break; - - case DWARF_AttributeForm_string: - { - str8_list_pushf(arena, out, "'%.*s'\n", (int)attrib_val->val, attrib_val->dataptr); - }break; - } - } - - // dump children - for (DWARF_InfoEntry *child = entry->first_child; - child != 0; - child = child->next_sibling){ - dump_entry_tree(arena, out, dwarf, unit, child, indent + 1); - } -} -#endif - -internal void -entry_point(CmdLine *cmd_line) +internal D2R_User2Convert * +d2r_user2convert_from_cmdln(Arena *arena, CmdLine *cmdline) { - Arena *arena = arena_alloc(); - - // parse arguments - DWARFCONV_Params *params = dwarf_convert_params_from_cmd_line(arena, cmd_line); - - // show input errors - if (params->errors.node_count > 0 && - !params->hide_errors.input){ - for (String8Node *node = params->errors.first; - node != 0; - node = node->next){ - fprintf(stdout, "error(input): %.*s\n", str8_varg(node->string)); + D2R_User2Convert *result = push_array(arena, D2R_User2Convert, 1); + + String8 exe_name = cmd_line_string(cmdline, str8_lit("exe")); + String8 debug_name = cmd_line_string(cmdline, str8_lit("debug")); + String8 out_name = cmd_line_string(cmdline, str8_lit("out")); + + // error check params + if (exe_name.size == 0 && debug_name.size == 0) { + str8_list_pushf(arena, &result->errors, "Missing one of the required parameters: '--exe:' or '--debug:'"); + } + if (out_name.size == 0) { + str8_list_pushf(arena, &result->errors, "Missing required parameter: '--out:'"); + } + + // get input EXE or ELF + if (exe_name.size > 0) { + String8 exe_data = os_data_from_file_path(arena, exe_name); + if (exe_data.size == 0) { + str8_list_pushf(arena, &result->errors, "Could not load input EXE file from '%S'", exe_name); + } else { + result->input_exe_name = exe_name; + result->input_exe_data = exe_data; } } - - // will we try to parse an input file? - B32 try_parse_input = (params->errors.node_count == 0); - - // track parse success - B32 successful_parse = 1; - -#define PARSE_CHECK_ERROR(p, fmt, ...) do{ if ((p) == 0){ \ -successful_parse = 0; \ -fprintf(stdout, "error(parsing): " fmt "\n",##__VA_ARGS__); \ -} }while(0) - - // parse elf - ELF_Parsed *elf = 0; - if (try_parse_input) ProfScope("parse elf"){ - elf = elf_parsed_from_data(arena, params->input_elf_data); - PARSE_CHECK_ERROR(elf, "ELF"); - } - - // parse strtab - String8 strtab = {0}; - if (elf != 0) ProfScope("parse strtab"){ - strtab = elf_section_data_from_idx(elf, elf->strtab_idx); - } - - // parse symtab - ELF_SymArray symtab = {0}; - if (elf != 0) ProfScope("parse symtab"){ - String8 data = elf_section_data_from_idx(elf, elf->symtab_idx); - symtab = elf_sym_array_from_data(arena, elf->elf_class, data); - } - - // parse dynsym - ELF_SymArray dynsym = {0}; - if (elf != 0) ProfScope("parse dynsym"){ - String8 data = elf_section_data_from_idx(elf, elf->dynsym_idx); - dynsym = elf_sym_array_from_data(arena, elf->elf_class, data); - } - - // parse dwarf - DWARF_Parsed *dwarf = 0; - if (elf != 0) ProfScope("parse dwarf"){ - dwarf = dwarf_parsed_from_elf(arena, elf); - PARSE_CHECK_ERROR(dwarf, "DWARF"); - } - - // parse info - DWARF_InfoParsed *info = 0; - if (dwarf != 0){ - String8 data = dwarf->debug_data[DWARF_SectionCode_Info]; - if (data.size > 0) ProfScope("parse .debug_info"){ - info = dwarf_info_from_data(arena, data); - PARSE_CHECK_ERROR(info, "DEBUG INFO"); + + // get input DEBUG + if (debug_name.size > 0) { + String8 debug_data = os_data_from_file_path(arena, debug_name); + if (debug_data.size == 0) { + str8_list_pushf(arena, &result->errors, "Could not load input DEBUG file from '%S'", debug_name); + } else { + result->input_debug_name = debug_name; + result->input_debug_data = debug_data; } } - - // parse pubnames - DWARF_PubNamesParsed *pubnames = 0; - if (dwarf != 0){ - String8 data = dwarf->debug_data[DWARF_SectionCode_PubNames]; - if (data.size) ProfScope("parse .debug_pubnames"){ - pubnames = dwarf_pubnames_from_data(arena, data); - PARSE_CHECK_ERROR(pubnames, "DEBUG PUBNAMES"); + + result->output_name = out_name; + result->flags = ~0ull; + + String8List only_names = cmd_line_strings(cmdline, str8_lit("only")); + String8List omit_names = cmd_line_strings(cmdline, str8_lit("omit")); + + if (only_names.node_count > 0) { + result->flags = 0; + for (String8Node *i = only_names.first; i != 0; i = i->next) { +#define X(t,n,k) if (str8_match_lit(Stringify(n), i->string, StringMatchFlag_CaseInsensitive)) \ + result->flags |= D2R_ConvertFlag_##t; + RDI_SectionKind_XList +#undef X } } - - // parse pubtypes - DWARF_PubNamesParsed *pubtypes = 0; - if (dwarf != 0){ - String8 data = dwarf->debug_data[DWARF_SectionCode_PubTypes]; - if (data.size) ProfScope("parse .debug_pubtypes"){ - pubtypes = dwarf_pubnames_from_data(arena, data); - PARSE_CHECK_ERROR(pubtypes, "DEBUG PUBTYPES"); + + if (omit_names.node_count > 0) { + for (String8Node *i = omit_names.first; i != 0; i = i->next) { +#define X(t,n,k) if (str8_match_lit(Stringify(n), i->string, StringMatchFlag_CaseInsensitive)) \ + result->flags &= ~D2R_ConvertFlag_##t; + RDI_SectionKind_XList +#undef X } } - - // parse names - DWARF_NamesParsed *names = 0; - if (dwarf != 0){ - String8 data = dwarf->debug_data[DWARF_SectionCode_Names]; - if (data.size) ProfScope("parse .debug_names"){ - names = dwarf_names_from_data(arena, data); - PARSE_CHECK_ERROR(names, "DEBUG NAMES"); + + return result; +} + +internal RDI_RegCode +d2r_rdi_reg_from_dw_reg_code_x64(U64 reg_code) +{ + switch (reg_code) { +#define X(reg_name_dw, reg_code_dw, reg_name_rdi, reg_pos, reg_size) case DW_RegX64_##reg_name_dw: return RDI_RegCodeX64_##reg_name_rdi; + DW_Regs_X64_XList(X) +#undef X + } + InvalidPath; + return 0; +} + +internal RDI_RegCode +d2r_rdi_reg_from_dw_reg_code_x86(U64 reg_code) +{ + switch (reg_code) { +#define X(reg_name_dw, reg_code_dw, reg_name_rdi, reg_pos, reg_size) case DW_RegX86_##reg_name_dw: return RDI_RegCodeX86_##reg_name_rdi; + DW_Regs_X86_XList(X) +#undef X + } + InvalidPath; + return 0; +} + +internal RDI_RegCode +d2r_rdi_reg_from_dw_reg_code(RDI_Arch arch, U64 reg_code) +{ + switch (arch) { + case RDI_Arch_NULL: return 0; + case RDI_Arch_X64: return d2r_rdi_reg_from_dw_reg_code_x64(reg_code); + case RDI_Arch_X86: return d2r_rdi_reg_from_dw_reg_code_x86(reg_code); + } + InvalidPath; + return 0; +} + +internal RDIM_Type * +d2r_create_type(Arena *arena, D2R_TypeTable *type_table) +{ + RDIM_Type *type = rdim_type_chunk_list_push(arena, type_table->types, type_table->type_chunk_cap); + return type; +} + +internal RDIM_Type * +d2r_find_or_create_type_from_offset(Arena *arena, D2R_TypeTable *type_table, U64 info_off) +{ + RDIM_Type *type = 0; + KeyValuePair *is_type_present = hash_table_search_u64(type_table->ht, info_off); + if (is_type_present) { + type = is_type_present->value_raw; + } else { + type = d2r_create_type(arena, type_table); + hash_table_push_u64_raw(arena, type_table->ht, info_off, type); + } + return type; +} + +internal RDIM_Type * +d2r_type_from_attrib(Arena *arena, D2R_TypeTable *type_table, DW_Input *input, DW_CompUnit *cu, DW_Tag tag, DW_AttribKind kind) +{ + RDIM_Type *type = 0; + + // find attrib + DW_Attrib *attrib = dw_attrib_from_tag(input, cu, tag, kind); + + // does tag have this attribute? + if (attrib->attrib_kind == kind) { + DW_AttribClass value_class = dw_value_class_from_attrib(cu, attrib); + + if (value_class == DW_AttribClass_Reference) { + // resolve reference + DW_Reference ref = dw_ref_from_attrib_ptr(input, cu, attrib); + + // TODO: support for external compile unit references + AssertAlways(ref.cu == cu); + + // find or create type + type = d2r_find_or_create_type_from_offset(arena, type_table, ref.info_off); + } else { + Assert(!"unexpected attrib class"); + } + } else if (attrib->attrib_kind == DW_Attrib_Null) { + type = type_table->void_type; + } + + return type; +} + +internal Rng1U64List +d2r_range_list_from_tag(Arena *arena, DW_Input *input, DW_CompUnit *cu, U64 image_base, DW_Tag tag) +{ + // collect non-contiguous range + Rng1U64List ranges = dw_rnglist_from_attrib(arena, input, cu, tag, DW_Attrib_Ranges); + + // collect contiguous range + DW_Attrib *lo_pc_attrib = dw_attrib_from_tag(input, cu, tag, DW_Attrib_LowPc); + DW_Attrib *hi_pc_attrib = dw_attrib_from_tag(input, cu, tag, DW_Attrib_HighPc); + if (lo_pc_attrib->attrib_kind != DW_Attrib_Null && hi_pc_attrib->attrib_kind != DW_Attrib_Null) { + U64 lo_pc = dw_address_from_attrib_ptr(input, cu, lo_pc_attrib); + + U64 hi_pc; + DW_AttribClass hi_pc_class = dw_value_class_from_attrib(cu, hi_pc_attrib); + if (hi_pc_class == DW_AttribClass_Address) { + hi_pc = dw_address_from_attrib_ptr(input, cu, hi_pc_attrib); + } else if (hi_pc_class == DW_AttribClass_Const) { + hi_pc = dw_const_u64_from_attrib_ptr(input, cu, hi_pc_attrib); + hi_pc += lo_pc; + } else { + AssertAlways(!"undefined attrib encoding"); + } + + // TODO: error handling + AssertAlways(lo_pc >= image_base); + AssertAlways(hi_pc >= image_base); + AssertAlways(lo_pc <= hi_pc); + + U64 lo_voff = lo_pc - image_base; + U64 hi_voff = hi_pc - image_base; + rng1u64_list_push(arena, &ranges, rng_1u64(lo_voff, hi_voff)); + } + + return ranges; +} + +internal RDIM_Type ** +d2r_collect_proc_params(Arena *arena, D2R_TypeTable *type_table, DW_Input *input, DW_CompUnit *cu, DW_TagNode *cur_node, U64 *param_count_out) +{ + Temp scratch = scratch_begin(&arena, 1); + + RDIM_TypeList list = {0}; + B32 has_vargs = 0; + for (DW_TagNode *i = cur_node->first_child; i != 0; i = i->sibling) { + if (i->tag.kind == DW_Tag_FormalParameter) { + RDIM_TypeNode *n = push_array(scratch.arena, RDIM_TypeNode, 1); + n->v = d2r_type_from_attrib(arena, type_table, input, cu, i->tag, DW_Attrib_Type); + SLLQueuePush(list.first, list.last, n); + ++list.count; + } else if (i->tag.kind == DW_Tag_UnspecifiedParameters) { + has_vargs = 1; } } - - // parse aranges - DWARF_ArangesParsed *aranges = 0; - if (dwarf != 0){ - String8 data = dwarf->debug_data[DWARF_SectionCode_Aranges]; - if (data.size) ProfScope("parse .debug_aranges"){ - aranges = dwarf_aranges_from_data(arena, data); - PARSE_CHECK_ERROR(aranges, "DEBUG ARANGES"); - } + + if (has_vargs) { + RDIM_TypeNode *n = push_array(scratch.arena, RDIM_TypeNode, 1); + n->v = type_table->varg_type; + SLLQueuePush(list.first, list.last, n); + ++list.count; } - - // parse addr - DWARF_AddrParsed *addr = 0; - if (dwarf != 0){ - String8 data = dwarf->debug_data[DWARF_SectionCode_Addr]; - if (data.size) ProfScope("parse .debug_addr"){ - addr = dwarf_addr_from_data(arena, data); - PARSE_CHECK_ERROR(addr, "DEBUG ADDR"); - } - } - -#if 0 - // parse abbrev - DWARF_AbbrevParsed *abbrev = 0; - if (dwarf != 0){ - String8 data = dwarf->debug_data[DWARF_SectionCode_Abbrev]; - if (data.size > 0) ProfScope("parse .debug_abbrev"){ - DWARF_AbbrevParams abbrev_params = {0}; - abbrev_params.unit_idx_min = params->unit_idx_min; - abbrev_params.unit_idx_max = params->unit_idx_max; - abbrev = dwarf_abbrev_from_data(arena, data, &abbrev_params); - PARSE_CHECK_ERROR(abbrev, "DEBUG ABBREV"); - } - } - - // parse info - DWARF_InfoParsed *info = 0; - if (abbrev != 0){ - String8 data = dwarf->debug_data[DWARF_SectionCode_Info]; - if (data.size > 0) ProfScope("parse .debug_info"){ - DWARF_InfoParams info_params = {0}; - info_params.unit_idx_min = params->unit_idx_min; - info_params.unit_idx_max = params->unit_idx_max; - info = dwarf_info_from_data(arena, data, &info_params, abbrev); - PARSE_CHECK_ERROR(info, "DEBUG INFO"); - } - } -#endif - - // dump - if (params->dump) ProfScope("dump"){ - String8List dump = {0}; - - // ELF - if (params->dump_header){ - if (elf != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "ELF:\n")); - - // TODO: better stringizers for fields here - str8_list_pushf(arena, &dump, " elf_class=%u\n", elf->elf_class); - str8_list_pushf(arena, &dump, " arch=%u\n", elf->arch); - str8_list_pushf(arena, &dump, " section_count=%llu\n", elf->section_count); - str8_list_pushf(arena, &dump, " segment_count=%llu\n", elf->segment_count); - str8_list_pushf(arena, &dump, " vbase=0x%llx\n", elf->vbase); - str8_list_pushf(arena, &dump, " entry_vaddr=0x%llx\n", elf->vbase); - - str8_list_push(arena, &dump, str8_lit("\n")); + + // collect params + *param_count_out = list.count; + RDIM_Type **params = rdim_array_from_type_list(arena, list); + + scratch_end(scratch); + return params; +} + + +internal RDIM_EvalBytecode +d2r_bytecode_from_expression(Arena *arena, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, String8 expr) +{ + RDIM_EvalBytecode bc = {0}; + + for (U64 cursor = 0; cursor < expr.size; ) { + U8 op = 0; + cursor += str8_deserial_read_struct(expr, cursor, &op); + + U64 size_param; + switch (op) { + case DW_ExprOp_Lit0: case DW_ExprOp_Lit1: case DW_ExprOp_Lit2: + case DW_ExprOp_Lit3: case DW_ExprOp_Lit4: case DW_ExprOp_Lit5: + case DW_ExprOp_Lit6: case DW_ExprOp_Lit7: case DW_ExprOp_Lit8: + case DW_ExprOp_Lit9: case DW_ExprOp_Lit10: case DW_ExprOp_Lit11: + case DW_ExprOp_Lit12: case DW_ExprOp_Lit13: case DW_ExprOp_Lit14: + case DW_ExprOp_Lit15: case DW_ExprOp_Lit16: case DW_ExprOp_Lit17: + case DW_ExprOp_Lit18: case DW_ExprOp_Lit19: case DW_ExprOp_Lit20: + case DW_ExprOp_Lit21: case DW_ExprOp_Lit22: case DW_ExprOp_Lit23: + case DW_ExprOp_Lit24: case DW_ExprOp_Lit25: case DW_ExprOp_Lit26: + case DW_ExprOp_Lit27: case DW_ExprOp_Lit28: case DW_ExprOp_Lit29: + case DW_ExprOp_Lit30: case DW_ExprOp_Lit31: { + U64 lit = op - DW_ExprOp_Lit0; + rdim_bytecode_push_uconst(arena, &bc, lit); + } break; + + case DW_ExprOp_Const1U: size_param = 1; goto const_unsigned; + case DW_ExprOp_Const2U: size_param = 2; goto const_unsigned; + case DW_ExprOp_Const4U: size_param = 4; goto const_unsigned; + case DW_ExprOp_Const8U: size_param = 8; goto const_unsigned; + const_unsigned: { + U64 val = 0; + cursor += str8_deserial_read(expr, cursor, &val, size_param, size_param); + rdim_bytecode_push_uconst(arena, &bc, val); + } break; + + case DW_ExprOp_Const1S:size_param = 1; goto const_signed; + case DW_ExprOp_Const2S:size_param = 2; goto const_signed; + case DW_ExprOp_Const4S:size_param = 4; goto const_signed; + case DW_ExprOp_Const8S:size_param = 8; goto const_signed; + const_signed: { + S64 val = 0; + cursor += str8_deserial_read(expr, cursor, &val, size_param, size_param); + val = extend_sign64(val, size_param); + rdim_bytecode_push_sconst(arena, &bc, val); + } break; + + case DW_ExprOp_ConstU: { + U64 val = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &val); + rdim_bytecode_push_uconst(arena, &bc, val); + } break; + + case DW_ExprOp_ConstS: { + S64 val = 0; + cursor += str8_deserial_read_sleb128(expr, cursor, &val); + rdim_bytecode_push_sconst(arena, &bc, val); + } break; + + case DW_ExprOp_Addr: { + U64 addr = 0; + cursor += str8_deserial_read(expr, cursor, &addr, address_size, address_size); + if (addr >= image_base) { + U64 voff = addr - image_base; + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_ModuleOff, voff); + } else { + // TODO: error handling + AssertAlways(!"unable to relocate address"); } - } - - // SECTIONS - if (params->dump_sections){ - if (elf != 0){ - ELF_SectionArray section_array = elf_section_array_from_elf(elf); - String8Array section_name_array = elf_section_name_array_from_elf(elf); - - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "SECTIONS:\n")); - - ELF_Shdr64 *sec = section_array.sections; - String8 *sec_name = section_name_array.v; - U64 count = section_array.count; - for (U64 i = 0 ; i < count; i += 1, sec += 1, sec_name += 1){ - String8 type_string = elf_string_from_section_type(sec->sh_type); - - // TODO: better stringizers for fields here - str8_list_pushf(arena, &dump, " section[%llu]:\n", i); - str8_list_pushf(arena, &dump, " name='%.*s'\n", str8_varg(*sec_name)); - str8_list_pushf(arena, &dump, " type=%.*s\n", str8_varg(type_string)); - str8_list_pushf(arena, &dump, " flags=0x%llx\n", sec->sh_flags); - str8_list_pushf(arena, &dump, " addr=0x%llx\n", sec->sh_addr); - str8_list_pushf(arena, &dump, " offset=0x%llx\n", sec->sh_offset); - str8_list_pushf(arena, &dump, " size=%llu\n", sec->sh_size); - str8_list_pushf(arena, &dump, " link=%u\n", sec->sh_link); - str8_list_pushf(arena, &dump, " info=%u\n", sec->sh_info); - str8_list_pushf(arena, &dump, " addralign=0x%llx\n", sec->sh_addralign); - str8_list_pushf(arena, &dump, " entsize=%llu\n", sec->sh_entsize); - str8_list_push(arena, &dump, str8_lit("\n")); + } break; + + case DW_ExprOp_Reg0: case DW_ExprOp_Reg1: case DW_ExprOp_Reg2: + case DW_ExprOp_Reg3: case DW_ExprOp_Reg4: case DW_ExprOp_Reg5: + case DW_ExprOp_Reg6: case DW_ExprOp_Reg7: case DW_ExprOp_Reg8: + case DW_ExprOp_Reg9: case DW_ExprOp_Reg10: case DW_ExprOp_Reg11: + case DW_ExprOp_Reg12: case DW_ExprOp_Reg13: case DW_ExprOp_Reg14: + case DW_ExprOp_Reg15: case DW_ExprOp_Reg16: case DW_ExprOp_Reg17: + case DW_ExprOp_Reg18: case DW_ExprOp_Reg19: case DW_ExprOp_Reg20: + case DW_ExprOp_Reg21: case DW_ExprOp_Reg22: case DW_ExprOp_Reg23: + case DW_ExprOp_Reg24: case DW_ExprOp_Reg25: case DW_ExprOp_Reg26: + case DW_ExprOp_Reg27: case DW_ExprOp_Reg28: case DW_ExprOp_Reg29: + case DW_ExprOp_Reg30: case DW_ExprOp_Reg31: { + U64 reg_code_dw = op - DW_ExprOp_Reg0; + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + U32 regread_param = RDI_EncodeRegReadParam(reg_code_rdi, 8, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegRead, regread_param); + } break; + + case DW_ExprOp_RegX: { + U64 reg_code_dw = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, ®_code_dw); + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + U32 regread_param = RDI_EncodeRegReadParam(reg_code_rdi, 8, 0); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegRead, regread_param); + } break; + + case DW_ExprOp_ImplicitValue: { + U64 value_size = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &value_size); + + String8 val = str8_substr(expr, rng_1u64(cursor, cursor + value_size)); + if (val.size <= sizeof(U64)) { + U64 val64 = 0; + MemoryCopy(&val64, val.str, val.size); + rdim_bytecode_push_uconst(arena, &bc, val64); + } else { + // TODO: currenlty no way to encode string in RDIM_EvalBytecodeOp + NotImplemented; + } + } break; + + case DW_ExprOp_Piece: { + NotImplemented; + } break; + + case DW_ExprOp_BitPiece: { + NotImplemented; + } break; + + case DW_ExprOp_Pick: { + U8 stack_idx = 0; + cursor += str8_deserial_read_struct(expr, cursor, &stack_idx); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Pick, stack_idx); + } break; + + case DW_ExprOp_PlusUConst: { + U64 addend = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &addend); + rdim_bytecode_push_uconst(arena, &bc, addend); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + } break; + + case DW_ExprOp_Skip: { + S16 skip = 0; + cursor += str8_deserial_read_struct(expr, cursor, &skip); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Skip, skip); + } break; + + case DW_ExprOp_Bra: { + NotImplemented; + } break; + + case DW_ExprOp_BReg0: case DW_ExprOp_BReg1: case DW_ExprOp_BReg2: + case DW_ExprOp_BReg3: case DW_ExprOp_BReg4: case DW_ExprOp_BReg5: + case DW_ExprOp_BReg6: case DW_ExprOp_BReg7: case DW_ExprOp_BReg8: + case DW_ExprOp_BReg9: case DW_ExprOp_BReg10: case DW_ExprOp_BReg11: + case DW_ExprOp_BReg12: case DW_ExprOp_BReg13: case DW_ExprOp_BReg14: + case DW_ExprOp_BReg15: case DW_ExprOp_BReg16: case DW_ExprOp_BReg17: + case DW_ExprOp_BReg18: case DW_ExprOp_BReg19: case DW_ExprOp_BReg20: + case DW_ExprOp_BReg21: case DW_ExprOp_BReg22: case DW_ExprOp_BReg23: + case DW_ExprOp_BReg24: case DW_ExprOp_BReg25: case DW_ExprOp_BReg26: + case DW_ExprOp_BReg27: case DW_ExprOp_BReg28: case DW_ExprOp_BReg29: + case DW_ExprOp_BReg30: case DW_ExprOp_BReg31: { + U64 reg_code_dw = op - DW_ExprOp_BReg0; + S64 reg_off = 0; + cursor += str8_deserial_read_sleb128(expr, cursor, ®_off); + + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegReadDyn, reg_code_rdi); + rdim_bytecode_push_sconst(arena, &bc, reg_off); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + } break; + + case DW_ExprOp_BRegX: { + U64 reg_code_dw = 0; + S64 reg_off = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, ®_code_dw); + cursor += str8_deserial_read_sleb128(expr, cursor, ®_off); + + RDI_RegCode reg_code_rdi = d2r_rdi_reg_from_dw_reg_code(arch, reg_code_dw); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RegReadDyn, reg_code_rdi); + rdim_bytecode_push_sconst(arena, &bc, reg_off); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + } break; + + case DW_ExprOp_FBReg: { + S64 frame_off = 0; + cursor += str8_deserial_read_sleb128(expr, cursor, &frame_off); + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_FrameOff, frame_off); + } break; + + case DW_ExprOp_Deref: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_MemRead, address_size); + } break; + + case DW_ExprOp_DerefSize: { + U8 deref_size_in_bytes = 0; + cursor += str8_deserial_read_struct(expr, cursor, &deref_size_in_bytes); + if (0 < deref_size_in_bytes && deref_size_in_bytes <= address_size) { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_MemRead, deref_size_in_bytes); + } else { + // TODO: error handling + AssertAlways(!"ill formed expression"); + } + } break; + + case DW_ExprOp_XDerefSize: { + // TODO: error handling + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Call2: + case DW_ExprOp_Call4: + case DW_ExprOp_CallRef: { + // TODO: error handling + AssertAlways(!"calls are not supported"); + } break; + + case DW_ExprOp_ImplicitPointer: + case DW_ExprOp_GNU_ImplicitPointer: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_Convert: + case DW_ExprOp_GNU_Convert: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_GNU_ParameterRef: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_DerefType: + case DW_ExprOp_GNU_DerefType: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_ConstType: + case DW_ExprOp_GNU_ConstType: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_RegvalType: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_EntryValue: + case DW_ExprOp_GNU_EntryValue: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_Addrx: { + U64 addr_idx = 0; + cursor += str8_deserial_read_uleb128(expr, cursor, &addr_idx); + U64 addr = dw_addr_from_list_unit(addr_lu, addr_idx); + if (addr != max_U64) { + if (addr >= image_base) { + U64 voff = addr - image_base; + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_ModuleOff, voff); + } else { + // TODO: error handling + AssertAlways(!"unable to relocate address"); } + } else { + // TODO: error handling + AssertAlways(!"out of bounds index"); } + } break; + + case DW_ExprOp_CallFrameCfa: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_FrameOff, 0); + } break; + + case DW_ExprOp_FormTlsAddress: { + // TODO: + AssertAlways(!"RDI_EvalOp_TLSOff accepts immediate"); + } break; + + case DW_ExprOp_PushObjectAddress: { + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_Nop: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Noop, 0); + } break; + + case DW_ExprOp_Eq: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_EqEq, 0); + } break; + + case DW_ExprOp_Ge: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_GrEq, 0); + } break; + + case DW_ExprOp_Gt: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Grtr, 0); + } break; + + case DW_ExprOp_Le: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_LsEq, 0); + } break; + + case DW_ExprOp_Lt: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Less, 0); + } break; + + case DW_ExprOp_Ne: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_NtEq, 0); + } break; + + case DW_ExprOp_Shl: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_LShift, 0); + } break; + + case DW_ExprOp_Shr: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_RShift, 0); + } break; + + case DW_ExprOp_Shra: { + // TODO: + AssertAlways(!"sample"); + } break; + + case DW_ExprOp_Xor: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitXor, 0); + } break; + + case DW_ExprOp_XDeref: { + // TODO: error handling + Assert(!"multiple address spaces are not supported"); + } break; + + case DW_ExprOp_Abs: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Abs, 0); + } break; + + case DW_ExprOp_And: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitAnd, 0); + } break; + + case DW_ExprOp_Div: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Div, 0); + } break; + + case DW_ExprOp_Minus: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Sub, 0); + } break; + + case DW_ExprOp_Mod: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Mod, 0); + } break; + + case DW_ExprOp_Mul: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Mul, 0); + } break; + + case DW_ExprOp_Neg: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Neg, 0); + } break; + + case DW_ExprOp_Not: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitNot, 0); + } break; + + case DW_ExprOp_Or: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_BitOr, 0); + } break; + + case DW_ExprOp_Plus: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Add, 0); + } break; + + case DW_ExprOp_Rot: { + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Swap: { + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Dup: { + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Drop: { + AssertAlways(!"no suitable conversion"); + } break; + + case DW_ExprOp_Over: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Pick, 1); + } break; + + case DW_ExprOp_StackValue: { + rdim_bytecode_push_op(arena, &bc, RDI_EvalOp_Stop, 0); + } break; + + default: InvalidPath; break; } - - // SYMTAB - if (symtab.count > 0 && params->dump_symtab){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "SYMTAB:\n")); - str8_list_pushf(arena, &dump, " section: %llu\n", elf->symtab_idx); - dump_symtab(arena, &dump, &symtab, strtab, 1); - str8_list_push(arena, &dump, str8_lit("\n")); + } + + return bc; +} + +internal RDIM_Location * +d2r_transpile_expression(Arena *arena, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, String8 expr) +{ + RDIM_Location *loc = 0; + if (expr.size) { + loc = push_array(arena, RDIM_Location, 1); + loc->kind = RDI_LocationKind_AddrBytecodeStream; + loc->bytecode = d2r_bytecode_from_expression(arena, image_base, address_size, arch, addr_lu, expr); + } + return loc; +} + +internal RDIM_LocationSet +d2r_convert_loclist(Arena *arena, RDIM_ScopeChunkList *scopes, U64 image_base, U64 address_size, RDI_Arch arch, DW_ListUnit *addr_lu, DW_LocList loclist) +{ + RDIM_LocationSet locset = {0}; + for (DW_LocNode *loc_n = loclist.first; loc_n != 0; loc_n = loc_n->next) { + RDIM_Location *location = d2r_transpile_expression(arena, image_base, address_size, arch, addr_lu, loc_n->v.expr); + RDIM_Rng1U64 voff_range = { .min = loc_n->v.range.min - image_base, .min = loc_n->v.range.max - image_base }; + rdim_location_set_push_case(arena, scopes, &locset, voff_range, location); + } + return locset; +} + +internal RDIM_LocationSet +d2r_locset_from_attrib(Arena *arena, + DW_Input *input, + DW_CompUnit *cu, + RDIM_ScopeChunkList *scopes, + RDIM_Scope *curr_scope, + U64 image_base, + U64 address_size, + RDI_Arch arch, + DW_ListUnit *addr_lu, + DW_Tag tag, + DW_AttribKind kind) +{ + RDIM_LocationSet result = {0}; + + DW_Attrib *attrib = dw_attrib_from_tag(input, cu, tag, kind); + DW_AttribClass attrib_class = dw_value_class_from_attrib(cu, attrib); + + if (attrib_class == DW_AttribClass_LocList || attrib_class == DW_AttribClass_LocListPtr) { + Temp scratch = scratch_begin(&arena, 1); + DW_LocList loclist = dw_loclist_from_attrib_ptr(scratch.arena, input, cu, attrib); + result = d2r_convert_loclist(arena, scopes, image_base, address_size, arch, addr_lu, loclist); + } else if (attrib_class == DW_AttribClass_ExprLoc) { + String8 expr = dw_exprloc_from_attrib_ptr(input, cu, attrib); + RDIM_Location *location = d2r_transpile_expression(arena, image_base, address_size, arch, addr_lu, expr); + for (RDIM_Rng1U64Node *range_n = curr_scope->voff_ranges.first; range_n != 0; range_n = range_n->next) { + rdim_location_set_push_case(arena, scopes, &result, range_n->v, location); } - - // DYNSYM - if (dynsym.count > 0 && params->dump_dynsym){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DYNSYM:\n")); - str8_list_pushf(arena, &dump, " section: %llu\n", elf->dynsym_idx); - dump_symtab(arena, &dump, &dynsym, strtab, 1); - str8_list_push(arena, &dump, str8_lit("\n")); + } else if (attrib_class != DW_AttribClass_Null) { + AssertAlways(!"unexpected attrib class"); + } + + return result; +} + +internal D2R_CompUnitContribMap +d2r_cu_contrib_map_from_aranges(Arena *arena, DW_Input *input, U64 image_base) +{ + Temp scratch = scratch_begin(&arena, 1); + + String8 aranges_data = input->sec[DW_Section_ARanges].data; + Rng1U64List unit_range_list = dw_unit_ranges_from_data(scratch.arena, aranges_data); + + D2R_CompUnitContribMap cm = {0}; + cm.count = 0; + cm.info_off_arr = push_array(arena, U64, unit_range_list.count); + cm.voff_range_arr = push_array(arena, RDIM_Rng1U64List, unit_range_list.count); + + for (Rng1U64Node *range_n = unit_range_list.first; range_n != 0; range_n = range_n->next) { + String8 unit_data = str8_substr(aranges_data, range_n->v); + U64 unit_cursor = 0; + + U64 unit_length = 0; + U64 unit_length_size = str8_deserial_read_dwarf_packed_size(unit_data, unit_cursor, &unit_length); + if (unit_length_size == 0) { + continue; } - - // SEGMENTS - if (params->dump_segments){ - if (elf != 0){ - ELF_SegmentArray segment_array = elf_segment_array_from_elf(elf); - - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "SEGMENTS:\n")); - - ELF_Phdr64 *segments = segment_array.segments; - U64 count = segment_array.count; - for (U64 i = 0 ; i < count; i += 1){ - ELF_Phdr64 *seg = segments + i; - - // TODO: better stringizers for fields here - str8_list_pushf(arena, &dump, " segment[%llu]:\n", i); - str8_list_pushf(arena, &dump, " p_type=%u\n", seg->p_type); - str8_list_pushf(arena, &dump, " p_flags=0x%x\n", seg->p_flags); - str8_list_pushf(arena, &dump, " p_offset=0x%llx\n", seg->p_offset); - str8_list_pushf(arena, &dump, " p_vaddr=0x%llx\n", seg->p_vaddr); - str8_list_pushf(arena, &dump, " p_paddr=0x%llx\n", seg->p_paddr); - str8_list_pushf(arena, &dump, " p_filesz=%llu\n", seg->p_filesz); - str8_list_pushf(arena, &dump, " p_memsz=%llu\n", seg->p_memsz); - str8_list_pushf(arena, &dump, " p_align=%llu\n", seg->p_align); - str8_list_push(arena, &dump, str8_lit("\n")); + unit_cursor += unit_length_size; + + DW_Version version = 0; + U64 version_size = str8_deserial_read_struct(unit_data, unit_cursor, &version); + if (version_size == 0) { + continue; + } + unit_cursor += version; + + if (version != DW_Version_2) { + AssertAlways(!"unknown .debug_aranges version"); + continue; + } + + DW_Format unit_format = DW_FormatFromSize(unit_length); + U64 cu_info_off = 0; + U64 cu_info_off_size = str8_deserial_read_dwarf_uint(unit_data, unit_cursor, unit_format, &cu_info_off); + if (cu_info_off_size == 0) { + continue; + } + unit_cursor += cu_info_off_size; + + U8 address_size = 0; + U64 address_size_size = str8_deserial_read_struct(unit_data, unit_cursor, &address_size); + if (address_size_size == 0) { + continue; + } + unit_cursor += address_size_size; + + U8 segment_selector_size = 0; + U64 segment_selector_size_size = str8_deserial_read_struct(unit_data, unit_cursor, &segment_selector_size); + if (segment_selector_size_size == 0) { + continue; + } + unit_cursor += segment_selector_size_size; + + U64 tuple_size = address_size * 2 + segment_selector_size; + U64 bytes_too_far_past_boundary = unit_cursor % tuple_size; + if (bytes_too_far_past_boundary > 0) { + unit_cursor += tuple_size - bytes_too_far_past_boundary; + } + + RDIM_Rng1U64List voff_ranges = {0}; + if (segment_selector_size == 0) { + while (unit_cursor + address_size * 2 <= unit_data.size) { + U64 address = 0; + U64 length = 0; + unit_cursor += str8_deserial_read(unit_data, unit_cursor, &address, address_size, address_size); + unit_cursor += str8_deserial_read(unit_data, unit_cursor, &length, address_size, address_size); + + if (address == 0 && length == 0) { + break; } + + // TODO: error handling + AssertAlways(address >= image_base); + + U64 min = address - image_base; + U64 max = min + length; + rdim_rng1u64_list_push(arena, &voff_ranges, (RDIM_Rng1U64){.min = min, .max = max}); } + } else { + // TODO: segment relative addressing + NotImplemented; } - - // DEBUG SECTIONS - if (params->dump_debug_sections){ - if (dwarf != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG SECTIONS:\n")); - - U32 *debug_section_idx = dwarf->debug_section_idx; - String8 *debug_data = dwarf->debug_data; - for (U32 i = 1; i < DWARF_SectionCode_COUNT; i += 1, debug_data += 1){ - U32 idx = debug_section_idx[i]; - String8 name = dwarf_string_from_section_code(i); - str8_list_pushf(arena, &dump, " %-10.*s section_idx=%u\n", str8_varg(name), idx); - } - str8_list_push(arena, &dump, str8_lit("\n")); + + U64 map_idx = cm.count++; + cm.info_off_arr[map_idx] = cu_info_off; + cm.voff_range_arr[map_idx] = voff_ranges; + } + + scratch_end(scratch); + return cm; +} + +internal RDIM_Rng1U64List +d2r_voff_ranges_from_cu_info_off(D2R_CompUnitContribMap map, U64 info_off) +{ + RDIM_Rng1U64List voff_ranges = {0}; + U64 voff_list_idx = u64_array_bsearch(map.info_off_arr, map.count, info_off); + if (voff_list_idx < map.count) { + voff_ranges = map.voff_range_arr[voff_list_idx]; + } + return voff_ranges; +} + +internal RDIM_Scope * +d2r_push_scope(Arena *arena, RDIM_ScopeChunkList *scopes, U64 scope_chunk_cap, D2R_TagNode *tag_stack, Rng1U64List ranges) +{ + // fill out scope + RDIM_Scope *scope = rdim_scope_chunk_list_push(arena, scopes, scope_chunk_cap); + + // push ranges + for (Rng1U64Node *i = ranges.first; i != 0; i = i->next) { + rdim_scope_push_voff_range(arena, scopes, scope, (RDIM_Rng1U64){.min = i->v.min, i->v.max}); + } + + // associate scope with tag + tag_stack->scope = scope; + + // update scope hierarchy + DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; + if (parent_tag_kind == DW_Tag_SubProgram || parent_tag_kind == DW_Tag_InlinedSubroutine || parent_tag_kind == DW_Tag_LexicalBlock) { + RDIM_Scope *parent = tag_stack->next->scope; + + scope->parent_scope = tag_stack->next->scope; + + if (parent->last_child) { + parent->last_child->next_sibling = scope; + } + + SLLQueuePush_N(parent->first_child, parent->last_child, scope, next_sibling); + } + + // propagate scope symbol + if (tag_stack->cur_node->tag.kind == DW_Tag_LexicalBlock) { + scope->symbol = tag_stack->next->scope->symbol; + } + + return scope; +} + +internal RDIM_BakeParams * +d2r_convert(Arena *arena, D2R_User2Convert *in) +{ + Temp scratch = scratch_begin(&arena, 1); + + B32 is_parse_relaxed = !(in->flags & D2R_ConvertFlag_StrictParse); + + RDIM_BinarySectionList binary_sections = {0}; + Arch arch = Arch_Null; + U64 image_base = 0; + U64 voff_max = 0; + DW_Input input = {0}; + DW_ListUnitInput lui = {0}; + if (pe_check_magic(in->input_exe_data)) { + PE_BinInfo pe = pe_bin_info_from_data(scratch.arena, in->input_exe_data); + + // infer exe info + arch = pe.arch; + image_base = pe.image_base; + + // get COFF sections + String8 raw_sections = str8_substr(in->input_exe_data, rng_1u64(pe.section_array_off, pe.section_array_off+sizeof(COFF_SectionHeader)*pe.section_count)); + U64 section_count = raw_sections.size / sizeof(COFF_SectionHeader); + COFF_SectionHeader *section_array = (COFF_SectionHeader *)raw_sections.str; + + // loop over section headers and pick max virtual offset + for (U64 i = 0; i < section_count; ++i) { + U64 sec_voff_max = section_array[i].voff + section_array[i].vsize; + voff_max = Max(voff_max, sec_voff_max); + } + + ProfBegin("binary sections"); + for (U64 i = 0; i < section_count; ++i) { + COFF_SectionHeader *coff_sec = §ion_array[i]; + RDIM_BinarySection *sec = rdim_binary_section_list_push(arena, &binary_sections); + + sec->name = coff_name_from_section_header(in->input_exe_data, coff_sec, pe.string_table_off); + sec->flags = rdi_binary_section_flags_from_coff_section_flags(coff_sec->flags); + sec->voff_first = coff_sec->voff; + sec->voff_opl = coff_sec->voff + coff_sec->vsize; + sec->foff_first = coff_sec->foff; + sec->foff_opl = coff_sec->foff + coff_sec->fsize; + } + ProfEnd(); + + // find DWARF sections + input = dw_input_from_coff_section_table(scratch.arena, in->input_exe_data, pe.string_table_off, section_count, section_array); + } + + //////////////////////////////// + + RDI_Arch arch_rdi = RDI_Arch_NULL; + switch (arch) { + case Arch_Null: arch_rdi = RDI_Arch_NULL; break; + case Arch_x64: arch_rdi = RDI_Arch_X64; break; + case Arch_x86: arch_rdi = RDI_Arch_X86; break; + default: NotImplemented; break; + } + + U64 arch_addr_size = rdi_addr_size_from_arch(arch_rdi); + + //////////////////////////////// + + ProfBegin("compute exe hash"); + U64 exe_hash = rdi_hash(in->input_exe_data.str, in->input_exe_data.size); + ProfEnd(); + + //////////////////////////////// + + ProfBegin("top level info"); + RDIM_TopLevelInfo top_level_info = {0}; + top_level_info.arch = arch_rdi; + top_level_info.exe_name = str8_skip_last_slash(in->input_exe_name); + top_level_info.exe_hash = exe_hash; + top_level_info.voff_max = voff_max; + top_level_info.producer_name = str8_lit(BUILD_TITLE_STRING_LITERAL); + ProfEnd(); + + //////////////////////////////// + + static const U64 UNIT_CHUNK_CAP = 256; + static const U64 UDT_CHUNK_CAP = 256; + static const U64 TYPE_CHUNK_CAP = 256; + static const U64 GVAR_CHUNK_CAP = 256; + static const U64 TVAR_CHUNK_CAP = 256; + static const U64 PROC_CHUNK_CAP = 256; + static const U64 SCOPE_CHUNK_CAP = 256; + static const U64 INLINE_SITE_CHUNK_CAP = 256; + static const U64 SRC_FILE_CAP = 256; + static const U64 LINE_TABLE_CAP = 256; + + RDIM_UnitChunkList units = {0}; + RDIM_UDTChunkList udts = {0}; + RDIM_TypeChunkList types = {0}; + RDIM_SymbolChunkList gvars = {0}; + RDIM_SymbolChunkList tvars = {0}; + RDIM_SymbolChunkList procs = {0}; + RDIM_ScopeChunkList scopes = {0}; + RDIM_InlineSiteChunkList inline_sites = {0}; + RDIM_SrcFileChunkList src_files = {0}; + RDIM_LineTableChunkList line_tables = {0}; + + //////////////////////////////// + + ProfBegin("Make Unit Contrib Map"); + D2R_CompUnitContribMap cu_contrib_map = {0}; + if (input.sec[DW_Section_ARanges].data.size > 0) { + cu_contrib_map = d2r_cu_contrib_map_from_aranges(arena, &input, image_base); + } else { + // TODO: synthesize cu ranges from scopes + NotImplemented; + } + ProfEnd(); + + ProfBegin("Parse Comop Unit Ranges"); + DW_ListUnitInput lu_input = dw_list_unit_input_from_input(scratch.arena, &input); + Rng1U64List cu_range_list = dw_unit_ranges_from_data(scratch.arena, input.sec[DW_Section_Info].data); + Rng1U64Array cu_ranges = rng1u64_array_from_list(scratch.arena, &cu_range_list); + ProfEnd(); + + //////////////////////////////// + + ProfBegin("Parse Compile Unit Headers"); + DW_CompUnit *cu_arr = push_array(scratch.arena, DW_CompUnit, cu_ranges.count); + for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { + cu_arr[cu_idx] = dw_cu_from_info_off(scratch.arena, &input, lu_input, cu_ranges.v[cu_idx].min, is_parse_relaxed); + } + ProfEnd(); + + //////////////////////////////// + + ProfBegin("Parse Line Tables"); + DW_LineTableParseResult *cu_line_tables = push_array(scratch.arena, DW_LineTableParseResult, cu_ranges.count); + for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { + DW_CompUnit *cu = &cu_arr[cu_idx]; + String8 cu_stmt_list = dw_line_ptr_from_attrib(&input, cu, cu->tag, DW_Attrib_StmtList); + String8 cu_dir = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_CompDir); + String8 cu_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_Name); + cu_line_tables[cu_idx] = dw_parsed_line_table_from_data(scratch.arena, cu_stmt_list, &input, cu_dir, cu_name, cu->address_size, cu->str_offsets_lu); + } + ProfEnd(); + + //////////////////////////////// + + ProfBegin("Convert Line Tables"); + + HashTable *source_file_ht = hash_table_init(scratch.arena, 0x4000); + RDIM_LineTable **cu_line_tables_rdi = push_array(scratch.arena, RDIM_LineTable *, cu_ranges.count); + + for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { + cu_line_tables_rdi[cu_idx] = rdim_line_table_chunk_list_push(arena, &line_tables, LINE_TABLE_CAP); + + DW_LineTableParseResult *line_table = &cu_line_tables[cu_idx]; + DW_LineVMFileArray *dir_table = &line_table->vm_header.dir_table; + DW_LineVMFileArray *file_table = &line_table->vm_header.file_table; + RDIM_SrcFile **src_file_map = push_array(scratch.arena, RDIM_SrcFile *, file_table->count); + for (U64 file_idx = 0; file_idx < file_table->count; ++file_idx) { + DW_LineFile *file = &file_table->v[file_idx]; + String8 file_path = dw_path_from_file_idx(scratch.arena, &line_table->vm_header, file_idx); + String8List file_path_split = str8_split_path(scratch.arena, file_path); + str8_path_list_resolve_dots_in_place(&file_path_split, PathStyle_WindowsAbsolute); + String8 file_path_resolved = str8_path_list_join_by_style(scratch.arena, &file_path_split, PathStyle_WindowsAbsolute); + String8 file_path_normalized = lower_from_str8(scratch.arena, file_path_resolved); + RDIM_SrcFile *src_file = hash_table_search_path_raw(source_file_ht, file_path_normalized); + if (src_file == 0) { + src_file = rdim_src_file_chunk_list_push(arena, &src_files, SRC_FILE_CAP); + src_file->normal_full_path = push_str8_copy(arena, file_path_normalized); + hash_table_push_path_raw(scratch.arena, source_file_ht, src_file->normal_full_path, src_file); } + src_file_map[file_idx] = src_file; } - - // DEBUG INFO - if (params->dump_debug_info){ - if (info != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG INFO:\n")); - - U32 i = 0; - for (DWARF_InfoUnit *unit = info->unit_first; - unit != 0; - unit = unit->next, i += 1){ - str8_list_pushf(arena, &dump, " unit[%u]:\n", i); - dwarf_stringize_info(arena, &dump, unit, 2); - str8_list_push(arena, &dump, str8_lit("\n")); - } - + + for (DW_LineSeqNode *line_seq = line_table->first_seq; line_seq != 0; line_seq = line_seq->next) { + if (line_seq->count == 0) { + continue; } - } - - // DEBUG PUBNAMES - if (params->dump_debug_pubnames){ - if (pubnames != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG PUBNAMES:\n")); - - U32 i = 0; - for (DWARF_PubNamesUnit *unit = pubnames->unit_first; - unit != 0; - unit = unit->next, i += 1){ - str8_list_pushf(arena, &dump, " unit[%u]:\n", i); - dwarf_stringize_pubnames(arena, &dump, unit, 2); - str8_list_push(arena, &dump, str8_lit("\n")); - } - - } - } - - // DEBUG PUBTYPES - if (params->dump_debug_pubtypes){ - if (pubtypes != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG PUBTYPES:\n")); - - U32 i = 0; - for (DWARF_PubNamesUnit *unit = pubtypes->unit_first; - unit != 0; - unit = unit->next, i += 1){ - str8_list_pushf(arena, &dump, " unit[%u]:\n", i); - dwarf_stringize_pubnames(arena, &dump, unit, 2); - str8_list_push(arena, &dump, str8_lit("\n")); - } - - } - } - - // DEBUG NAMES - if (params->dump_debug_names){ - if (names != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG NAMES:\n")); - - U32 i = 0; - for (DWARF_NamesUnit *unit = names->unit_first; - unit != 0; - unit = unit->next, i += 1){ - str8_list_pushf(arena, &dump, " unit[%u]:\n", i); - dwarf_stringize_names(arena, &dump, unit, 2); - str8_list_push(arena, &dump, str8_lit("\n")); - } - - } - } - - // DEBUG ARANGES - if (params->dump_debug_aranges){ - if (aranges != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG ARANGES:\n")); - - U32 i = 0; - for (DWARF_ArangesUnit *unit = aranges->unit_first; - unit != 0; - unit = unit->next, i += 1){ - str8_list_pushf(arena, &dump, " unit[%u]:\n", i); - dwarf_stringize_aranges(arena, &dump, unit, 2); - str8_list_push(arena, &dump, str8_lit("\n")); - } - - } - } - - // DEBUG ADDR - if (params->dump_debug_addr){ - if (addr != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG ADDR:\n")); - - U32 i = 0; - for (DWARF_AddrUnit *unit = addr->unit_first; - unit != 0; - unit = unit->next, i += 1){ - str8_list_pushf(arena, &dump, " unit[%u]:\n", i); - dwarf_stringize_addr(arena, &dump, unit, 2); - str8_list_push(arena, &dump, str8_lit("\n")); - } - - } - } - -#if 0 - // DEBUG ABBREV - if (params->dump_debug_abbrev){ - if (abbrev != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG ABBREV:\n")); - - U32 i = 0; - for (DWARF_AbbrevUnit *unit = abbrev->unit_first; - unit != 0; - unit = unit->next, i += 1){ - U32 j = 0; - for (DWARF_AbbrevDecl *abbrev_decl = unit->first; - abbrev_decl != 0; - abbrev_decl = abbrev_decl->next, j += 1){ - String8 tag_string = dwarf_string_from_tag(abbrev_decl->tag); - - str8_list_pushf(arena, &dump, " unit[%u],abbrev[%u]:\n", i, j); - str8_list_pushf(arena, &dump, " code=%llu\n", abbrev_decl->abbrev_code); - str8_list_pushf(arena, &dump, " tag=%.*s\n", str8_varg(tag_string)); - str8_list_pushf(arena, &dump, " has_children=%u\n", abbrev_decl->has_children); - str8_list_pushf(arena, &dump, " attrib_count=%u\n", abbrev_decl->attrib_count); - str8_list_pushf(arena, &dump, " attribs:\n", abbrev_decl->attrib_count); - - U32 attrib_count = abbrev_decl->attrib_count; - DWARF_AbbrevAttribSpec *attrib_spec = abbrev_decl->attrib_specs; - for (U32 k = 0; k < attrib_count; k += 1, attrib_spec += 1){ - String8 name_string = dwarf_string_from_attribute_name(attrib_spec->name); - String8 form_string = dwarf_string_from_attribute_form(attrib_spec->form); - - str8_list_pushf(arena, &dump, " [%-14.*s %-10.*s]\n", - str8_varg(name_string), str8_varg(form_string)); + + U64 *voffs = push_array(arena, U64, line_seq->count); + U32 *line_nums = push_array(arena, U32, line_seq->count); + U16 *col_nums = 0; + U64 line_idx = 0; + + DW_LineNode *file_line_n = line_seq->first; + U64 file_line_count = 0; + + for (DW_LineNode *line_n = file_line_n; line_n != 0; line_n = line_n->next) { + if (file_line_n->v.file_index != line_n->v.file_index || line_n->next == 0) { + U64 file_index = file_line_n->v.file_index; + U64 *file_voffs = &voffs[line_idx]; + U32 *file_line_nums = &line_nums[line_idx]; + U16 *file_col_nums = 0; + + U64 lines_written = 0; + U64 prev_ln = max_U64; + DW_LineNode *sentinel = line_n->v.file_index != file_line_n->v.file_index ? line_n : 0; + for (; file_line_n != sentinel; file_line_n = file_line_n->next) { + if (file_line_n->v.line != prev_ln) { + // TODO: error handling + AssertAlways(file_line_n->v.address >= image_base); + + voffs[line_idx] = file_line_n->v.address - image_base; + line_nums[line_idx] = file_line_n->v.line; + + ++lines_written; + ++line_idx; + + prev_ln = file_line_n->v.line; } + } + + RDIM_SrcFile *src_file = src_file_map[file_index]; + RDIM_LineSequence *line_seq = rdim_line_table_push_sequence(arena, &line_tables, cu_line_tables_rdi[cu_idx], src_file, file_voffs, file_line_nums, file_col_nums, lines_written); + rdim_src_file_push_line_sequence(arena, &src_files, src_file, line_seq); + + file_line_count = 1; + } else { + ++file_line_count; + } + } + + // handle last line + if (file_line_n) { + U64 file_index = file_line_n->v.file_index; + U64 *file_voffs = &voffs[line_idx]; + U32 *file_line_nums = &line_nums[line_idx]; + U16 *file_col_nums = 0; + + for (; file_line_n != 0; file_line_n = file_line_n->next, ++line_idx) { + // TODO: error handling + AssertAlways(file_line_n->v.address >= image_base); + voffs[line_idx] = file_line_n->v.address - image_base; + line_nums[line_idx] = file_line_n->v.line; + } + + RDIM_SrcFile *src_file = src_file_map[file_index]; + RDIM_LineSequence *line_seq = rdim_line_table_push_sequence(arena, &line_tables, cu_line_tables_rdi[cu_idx], src_file, file_voffs, file_line_nums, file_col_nums, file_line_count); + rdim_src_file_push_line_sequence(arena, &src_files, src_file, line_seq); + } + + //Assert(line_idx == line_seq->count); + } + } + + ProfEnd(); + + //////////////////////////////// + + ProfBegin("Convert Units"); + + for (U64 cu_idx = 0; cu_idx < cu_ranges.count; ++cu_idx) { + Temp comp_temp = temp_begin(scratch.arena); + + DW_CompUnit *cu = &cu_arr[cu_idx]; + + // parse and build tag tree + DW_TagTree tag_tree = dw_tag_tree_from_cu(comp_temp.arena, &input, cu); + + // build tag hash table for abstract origin resolution + cu->tag_ht = dw_make_tag_hash_table(comp_temp.arena, tag_tree); + + String8 dwo_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_DwoName); + String8 gnu_dwo_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_GNU_DwoName); + if (dwo_name.size || gnu_dwo_name.size || cu->dwo_id) { + // TODO: report that we dont support DWO + continue; + } + + // get unit's contribution ranges + RDIM_Rng1U64List cu_voff_ranges = d2r_voff_ranges_from_cu_info_off(cu_contrib_map, cu_ranges.v[cu_idx].min); + + String8 cu_name = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_Name); + String8 cu_dir = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_CompDir); + String8 cu_prod = dw_string_from_attrib(&input, cu, cu->tag, DW_Attrib_Producer); + DW_Language cu_lang = dw_const_u64_from_attrib(&input, cu, cu->tag, DW_Attrib_Language); + + RDIM_Unit *unit = rdim_unit_chunk_list_push(arena, &units, UNIT_CHUNK_CAP); + unit->unit_name = cu_name; + unit->compiler_name = cu_prod; + unit->source_file = str8_zero(); + unit->object_file = str8_zero(); + unit->archive_file = str8_zero(); + unit->build_path = cu_dir; + unit->language = rdi_language_from_dw_language(cu_lang); + unit->line_table = cu_line_tables_rdi[cu_idx]; + unit->voff_ranges = cu_voff_ranges; + + D2R_TypeTable *type_table = push_array(comp_temp.arena, D2R_TypeTable, 1); + type_table->ht = hash_table_init(comp_temp.arena, 0x4000); + type_table->types = &types; + type_table->type_chunk_cap = TYPE_CHUNK_CAP; + type_table->void_type = d2r_create_type(arena, type_table); + type_table->void_type->kind = RDI_TypeKind_Void; + type_table->varg_type = d2r_create_type(arena, type_table); + type_table->varg_type->kind = RDI_TypeKind_Variadic; + + D2R_TagNode *free_tags = push_array(comp_temp.arena, D2R_TagNode, 1); + D2R_TagNode *tag_stack = push_array(comp_temp.arena, D2R_TagNode, 1); + tag_stack->cur_node = tag_tree.root; + + while (tag_stack) { + while (tag_stack->cur_node) { + DW_TagNode *cur_node = tag_stack->cur_node; + DW_Tag tag = cur_node->tag; + B32 visit_children = 1; + + switch (tag.kind) { + case DW_Tag_Null: { + InvalidPath; + } break; + case DW_Tag_ClassType: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + + B32 is_decl = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_Declaration); + if (is_decl) { + type->kind = RDI_TypeKind_IncompleteClass; + + Assert(!cur_node->first_child); + visit_children = 0; + } else { + RDIM_UDT *udt = rdim_udt_chunk_list_push(arena, &udts, UDT_CHUNK_CAP); + udt->self_type = type; + + type->kind = RDI_TypeKind_Class; + type->byte_size = dw_byte_size_32_from_tag(&input, cu, tag); + type->udt = udt; + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + tag_stack->type = type; + } + } break; + case DW_Tag_StructureType: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + + B32 is_decl = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_Declaration); + if (is_decl) { + type->kind = RDI_TypeKind_IncompleteStruct; + + // TODO: error handling + Assert(!cur_node->first_child); + visit_children = 0; + } else { + RDIM_UDT *udt = rdim_udt_chunk_list_push(arena, &udts, UDT_CHUNK_CAP); + udt->self_type = type; + + type->kind = RDI_TypeKind_Struct; + type->udt = udt; + type->byte_size = dw_byte_size_32_from_tag(&input, cu, tag); - str8_list_push(arena, &dump, str8_lit("\n")); + tag_stack->type = type; + } + } break; + case DW_Tag_UnionType: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + + B32 is_decl = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_Declaration); + if (is_decl) { + type->kind = RDI_TypeKind_IncompleteUnion; + + // TODO: error handling + Assert(!cur_node->first_child); + visit_children = 0; + } else { + RDIM_UDT *udt = rdim_udt_chunk_list_push(arena, &udts, UDT_CHUNK_CAP); + udt->self_type = type; + + type->kind = RDI_TypeKind_Union; + type->byte_size = dw_byte_size_32_from_tag(&input, cu, tag); + type->udt = udt; + + tag_stack->type = type; + } + } break; + case DW_Tag_EnumerationType: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + + B32 is_decl = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_Declaration); + if (is_decl) { + type->kind = RDI_TypeKind_IncompleteEnum; + + // TODO: error handling + Assert(!cur_node->first_child); + visit_children = 0; + } else { + RDIM_UDT *udt = rdim_udt_chunk_list_push(arena, &udts, UDT_CHUNK_CAP); + udt->self_type = type; + + type->kind = RDI_TypeKind_Enum; + type->byte_size = dw_byte_size_32_from_tag(&input, cu, tag); + type->udt = udt; + + tag_stack->type = type; + } + } break; + case DW_Tag_SubroutineType: { + // collect parameters + RDIM_TypeList param_list = {0}; + for (DW_TagNode *n = cur_node->first_child; n != 0; n = n->sibling) { + if (n->tag.kind == DW_Tag_FormalParameter) { + RDIM_Type *param_type = d2r_type_from_attrib(arena, type_table, &input, cu, n->tag, DW_Attrib_Type); + rdim_type_list_push(comp_temp.arena, ¶m_list, param_type); + } else if (n->tag.kind == DW_Tag_UnspecifiedParameters) { + rdim_type_list_push(comp_temp.arena, ¶m_list, type_table->varg_type); + } else { + // TODO: error handling + AssertAlways(!"unexpected tag"); + } + } + + // init proceudre type + RDIM_Type *ret_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Function; + type->byte_size = arch_addr_size; + type->direct_type = ret_type; + type->count = param_list.count; + type->param_types = rdim_array_from_type_list(arena, param_list); + + visit_children = 0; + } break; + case DW_Tag_Typedef: { + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Alias; + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + } break; + case DW_Tag_BaseType: { + DW_ATE encoding = dw_const_u64_from_attrib(&input, cu, tag, DW_Attrib_Encoding); + U64 byte_size = dw_byte_size_from_tag(&input, cu, tag); + + // convert base type encoding to RDI version + RDI_TypeKind kind = RDI_TypeKind_NULL; + switch (encoding) { + case DW_ATE_Null: kind = RDI_TypeKind_NULL; break; + case DW_ATE_Address: kind = RDI_TypeKind_Void; break; + case DW_ATE_Boolean: kind = RDI_TypeKind_Bool; break; + case DW_ATE_ComplexFloat: { + switch (byte_size) { + case 4: kind = RDI_TypeKind_ComplexF32; break; + case 8: kind = RDI_TypeKind_ComplexF64; break; + case 10: kind = RDI_TypeKind_ComplexF80; break; + case 16: kind = RDI_TypeKind_ComplexF128; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_Float: { + switch (byte_size) { + case 2: kind = RDI_TypeKind_F16; break; + case 4: kind = RDI_TypeKind_F32; break; + case 6: kind = RDI_TypeKind_F48; break; + case 8: kind = RDI_TypeKind_F64; break; + case 16: kind = RDI_TypeKind_F128; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_Signed: { + switch (byte_size) { + case 1: kind = RDI_TypeKind_S8; break; + case 2: kind = RDI_TypeKind_S16; break; + case 4: kind = RDI_TypeKind_S32; break; + case 8: kind = RDI_TypeKind_S64; break; + case 16: kind = RDI_TypeKind_S128; break; + case 32: kind = RDI_TypeKind_S256; break; + case 64: kind = RDI_TypeKind_S512; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_SignedChar: { + switch (byte_size) { + case 1: kind = RDI_TypeKind_Char8; break; + case 2: kind = RDI_TypeKind_Char16; break; + case 4: kind = RDI_TypeKind_Char32; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_Unsigned: { + switch (byte_size) { + case 1: kind = RDI_TypeKind_U8; break; + case 2: kind = RDI_TypeKind_U16; break; + case 4: kind = RDI_TypeKind_U32; break; + case 8: kind = RDI_TypeKind_U64; break; + case 16: kind = RDI_TypeKind_U128; break; + case 32: kind = RDI_TypeKind_U256; break; + case 64: kind = RDI_TypeKind_U512; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_UnsignedChar: { + switch (byte_size) { + case 1: kind = RDI_TypeKind_UChar8; break; + case 2: kind = RDI_TypeKind_UChar16; break; + case 4: kind = RDI_TypeKind_UChar32; break; + default: AssertAlways(!"unexpected size"); break; // TODO: error handling + } + } break; + case DW_ATE_ImaginaryFloat: { + NotImplemented; + } break; + case DW_ATE_PackedDecimal: { + NotImplemented; + } break; + case DW_ATE_NumericString: { + NotImplemented; + } break; + case DW_ATE_Edited: { + NotImplemented; + } break; + case DW_ATE_SignedFixed: { + NotImplemented; + } break; + case DW_ATE_UnsignedFixed: { + NotImplemented; + } break; + case DW_ATE_DecimalFloat: { + NotImplemented; + } break; + case DW_ATE_Utf: { + NotImplemented; + } break; + case DW_ATE_Ucs: { + NotImplemented; + } break; + case DW_ATE_Ascii: { + NotImplemented; + } break; + default: AssertAlways(!"unexpected base type encoding"); break; // TODO: error handling + } + + RDIM_Type *base_type = d2r_create_type(arena, type_table); + base_type->kind = kind; + base_type->byte_size = byte_size; + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Alias; + type->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + type->direct_type = base_type; + } break; + case DW_Tag_PointerType: { + RDIM_Type *direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + // TODO: + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Allocated)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Associated)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Alignment)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Name)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_AddressClass)); + + U64 byte_size = arch_addr_size; + if (cu->version == DW_Version_5 || cu->relaxed) { + dw_try_byte_size_from_tag(&input, cu, tag, &byte_size); + } + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Ptr; + type->byte_size = byte_size; + type->direct_type = direct_type; + } break; + case DW_Tag_RestrictType: { + // TODO: + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Alignment)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Name)); + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Modifier; + type->byte_size = arch_addr_size; + type->flags = RDI_TypeModifierFlag_Restrict; + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + } break; + case DW_Tag_VolatileType: { + // TODO: + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Name)); + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Modifier; + type->byte_size = arch_addr_size; + type->flags = RDI_TypeModifierFlag_Volatile; + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + } break; + case DW_Tag_ConstType: { + // TODO: + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Name)); + Assert(!dw_tag_has_attrib(&input, cu, tag, DW_Attrib_Alignment)); + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Modifier; + type->byte_size = arch_addr_size; + type->flags = RDI_TypeModifierFlag_Const; + type->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + } break; + case DW_Tag_ArrayType: { + // * DWARF vs RDI Array Type Graph * + // + // For example lets take following decl: + // + // int (*foo[2])[3][4]; + // + // This compiles to in DWARF: + // + // foo -> DW_TAG_ArrayType -> (A0) DW_TAG_Subrange [2] + // \ + // -> (B0) DW_TAG_PointerType -> (A1) DW_TAG_ArrayType -> DW_TAG_Subrange [3] -> DW_Tag_Subrange [4] + // \ + // -> (B1) DW_TAG_BaseType (int) + // + // RDI expects: + // + // foo -> Array (2) -> Pointer -> Array (3) -> Array (4) -> int + // + // Note that DWARF forks the graph on DW_TAG_ArrayType to describe array ranges in branch A and + // in branch B describes array type which might be a struct, pointer, base type, or any other type tag. + // However, in RDI we have a simple list of type nodes and to convert we need to append type nodes from + // B to A. + + RDIM_Type *type = d2r_find_or_create_type_from_offset(arena, type_table, tag.info_off); + type->kind = RDI_TypeKind_Array; + type->direct_type = 0; + + U64 subrange_count = 0; + RDIM_Type *t = type; + for (DW_TagNode *n = cur_node->first_child; n != 0; n = n->sibling) { + if (n->tag.kind != DW_Tag_SubrangeType) { + // TODO: error handling + AssertAlways(!"unexpected tag"); + continue; + } + + if (subrange_count > 0) { + // init array type node + RDIM_Type *s = d2r_create_type(arena, type_table); + s->kind = RDI_TypeKind_Array; + s->direct_type = 0; + + // append new array type node + t->direct_type = s; + t = s; + } + + // resolve array lower bound + U64 lower_bound = 0; + if (dw_tag_has_attrib(&input, cu, n->tag, DW_Attrib_LowerBound)) { + lower_bound = dw_u64_from_attrib(&input, cu, n->tag, DW_Attrib_LowerBound); + } else { + lower_bound = dw_pick_default_lower_bound(cu_lang); + } + + // resolve array upper bound + U64 upper_bound = 0; + if (dw_tag_has_attrib(&input, cu, n->tag, DW_Attrib_Count)) { + U64 count = dw_u64_from_attrib(&input, cu, n->tag, DW_Attrib_Count); + upper_bound = lower_bound + count; + } else if (dw_tag_has_attrib(&input, cu, n->tag, DW_Attrib_UpperBound)) { + upper_bound = dw_u64_from_attrib(&input, cu, n->tag, DW_Attrib_UpperBound); + // turn upper bound into exclusive range + upper_bound += 1; + } else { + // zero size array + } + + t->count = upper_bound - lower_bound; + ++subrange_count; + } + + Assert(t->direct_type == 0); + t->direct_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + visit_children = 0; + } break; + case DW_Tag_SubrangeType: { + // TODO: error handling + AssertAlways(!"unexpected tag"); + } break; + case DW_Tag_Inheritance: { + DW_TagNode *parent_node = tag_stack->next->cur_node; + if (parent_node->tag.kind != DW_Tag_StructureType && + parent_node->tag.kind != DW_Tag_ClassType) { + // TODO: error handling + AssertAlways(!"unexpected parent tag"); + } + + RDIM_Type *parent = tag_stack->next->type; + RDIM_UDTMember *member = rdim_udt_push_member(arena, &udts, parent->udt); + member->kind = RDI_MemberKind_Base; + member->type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + member->off = safe_cast_u32(dw_const_u32_from_attrib(&input, cu, tag, DW_Attrib_DataMemberLocation)); + } break; + case DW_Tag_Enumerator: { + DW_TagNode *parent_node = tag_stack->next->cur_node; + if (parent_node->tag.kind != DW_Tag_EnumerationType) { + // TODO: error handling + AssertAlways(!"unexpected parent tag"); + } + + RDIM_Type *type = tag_stack->next->type; + RDIM_UDTEnumVal *member = rdim_udt_push_enum_val(arena, &udts, type->udt); + member->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + member->val = dw_const_u64_from_attrib(&input, cu, tag, DW_Attrib_ConstValue); + } break; + case DW_Tag_Member: { + DW_TagNode *parent_node = tag_stack->next->cur_node; + if (parent_node->tag.kind != DW_Tag_StructureType && + parent_node->tag.kind != DW_Tag_ClassType && + parent_node->tag.kind != DW_Tag_UnionType && + parent_node->tag.kind != DW_Tag_EnumerationType) { + // TODO: error handling + AssertAlways(!"unexpected parent tag"); + } + + DW_Attrib *data_member_location = dw_attrib_from_tag(&input, cu, tag, DW_Attrib_DataMemberLocation); + DW_AttribClass data_member_location_class = dw_value_class_from_attrib(cu, data_member_location); + if (data_member_location_class == DW_AttribClass_LocList) { + AssertAlways(!"UDT member with multiple locations are not supported"); + } + + RDIM_Type *type = tag_stack->next->type; + RDIM_UDTMember *member = rdim_udt_push_member(arena, &udts, type->udt); + member->kind = RDI_MemberKind_DataField; + member->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + member->type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + member->off = dw_const_u64_from_attrib(&input, cu, tag, DW_Attrib_DataMemberLocation); + } break; + case DW_Tag_SubProgram: { + DW_InlKind inl = dw_u64_from_attrib(&input, cu, tag, DW_Attrib_Inline); + switch (inl) { + case DW_Inl_NotInlined: { + U64 param_count = 0; + RDIM_Type **params = d2r_collect_proc_params(arena, type_table, &input, cu, cur_node, ¶m_count); + + // get return type + RDIM_Type *ret_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + // fill out proc type + RDIM_Type *proc_type = d2r_create_type(arena, type_table); + proc_type->kind = RDI_TypeKind_Function; + proc_type->byte_size = arch_addr_size; + proc_type->direct_type = ret_type; + proc_type->count = param_count; + proc_type->param_types = params; + + // get container type + RDIM_Type *container_type = 0; + if (dw_tag_has_attrib(&input, cu, tag, DW_Attrib_ContainingType)) { + container_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_ContainingType); + } + + // get frame base expression + String8 frame_base_expr = dw_exprloc_from_attrib(&input, cu, tag, DW_Attrib_FrameBase); + + // get proc container symbol + RDIM_Symbol *proc = rdim_symbol_chunk_list_push(arena, &procs, PROC_CHUNK_CAP ); + + // make scope + Rng1U64List ranges = d2r_range_list_from_tag(comp_temp.arena, &input, cu, image_base, tag); + RDIM_Scope *root_scope = d2r_push_scope(arena, &scopes, SCOPE_CHUNK_CAP, tag_stack, ranges); + root_scope->symbol = proc; + + // fill out proc + proc->is_extern = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_External); + proc->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + proc->link_name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_LinkageName); + proc->type = proc_type; + proc->container_symbol = 0; + proc->container_type = container_type; + proc->root_scope = root_scope; + proc->frame_base = d2r_locset_from_attrib(arena, &input, cu, &scopes, root_scope, image_base, cu->address_size, arch_rdi, cu->addr_lu, tag, DW_Attrib_FrameBase); + + // sub program with user-defined parent tag is a method + DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; + if (parent_tag_kind == DW_Tag_ClassType || parent_tag_kind == DW_Tag_StructureType) { + RDI_MemberKind member_kind = RDI_MemberKind_NULL; + DW_VirtualityKind virtuality = dw_const_u64_from_attrib(&input, cu, tag, DW_Attrib_Virtuality); + switch (virtuality) { + case DW_VirtualityKind_None: member_kind = RDI_MemberKind_Method; break; + case DW_VirtualityKind_Virtual: member_kind = RDI_MemberKind_VirtualMethod; break; + case DW_VirtualityKind_PureVirtual: member_kind = RDI_MemberKind_VirtualMethod; break; // TODO: create kind for pure virutal + default: InvalidPath; break; + } + + RDIM_Type *type = tag_stack->next->type; + RDIM_UDTMember *member = rdim_udt_push_member(arena, &udts, type->udt); + member->kind = member_kind; + member->type = type; + member->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + } else if (parent_tag_kind != DW_Tag_CompileUnit) { + AssertAlways(!"unexpected tag"); + } + + tag_stack->scope = root_scope; + } break; + case DW_Inl_DeclaredNotInlined: + case DW_Inl_DeclaredInlined: + case DW_Inl_Inlined: { + visit_children = 0; + } break; + default: InvalidPath; break; + } + } break; + case DW_Tag_InlinedSubroutine: { + U64 param_count = 0; + RDIM_Type **params = d2r_collect_proc_params(arena, type_table, &input, cu, tag_stack->cur_node, ¶m_count); + + // get return type + RDIM_Type *ret_type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + // fill out proc type + RDIM_Type *proc_type = d2r_create_type(arena, type_table); + proc_type->kind = RDI_TypeKind_Function; + proc_type->byte_size = arch_addr_size; + proc_type->direct_type = ret_type; + proc_type->count = param_count; + proc_type->param_types = params; + + // get container type + RDIM_Type *owner = 0; + if (dw_tag_has_attrib(&input, cu, tag, DW_Attrib_ContainingType)) { + owner = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_ContainingType); + } + + // fill out inline site + RDIM_InlineSite *inline_site = rdim_inline_site_chunk_list_push(arena, &inline_sites, INLINE_SITE_CHUNK_CAP); + inline_site->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + inline_site->type = proc_type; + inline_site->owner = owner; + inline_site->line_table = 0; + + // make scope + Rng1U64List ranges = d2r_range_list_from_tag(comp_temp.arena, &input, cu, image_base, tag); + RDIM_Scope *root_scope = d2r_push_scope(arena, &scopes, SCOPE_CHUNK_CAP, tag_stack, ranges); + root_scope->inline_site = inline_site; + } break; + case DW_Tag_Variable: { + String8 name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + RDIM_Type *type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + + DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; + if (parent_tag_kind == DW_Tag_SubProgram || + parent_tag_kind == DW_Tag_InlinedSubroutine || + parent_tag_kind == DW_Tag_LexicalBlock) { + RDIM_Scope *scope = tag_stack->next->scope; + RDIM_Local *local = rdim_scope_push_local(arena, &scopes, tag_stack->next->scope); + local->kind = RDI_LocalKind_Variable; + local->name = name; + local->type = type; + local->locset = d2r_locset_from_attrib(arena, &input, cu, &scopes, scope, image_base, cu->address_size, arch_rdi, cu->addr_lu, tag, DW_Attrib_Location); + } else { + + // NOTE: due to a bug in clang in stb_sprint.h local variables + // are declared in global scope without a name + if (name.size == 0) { + break; + } + + RDIM_Symbol *gvar = rdim_symbol_chunk_list_push(arena, &gvars, GVAR_CHUNK_CAP); + gvar->is_extern = dw_flag_from_attrib(&input, cu, tag, DW_Attrib_External); + gvar->name = name; + gvar->link_name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_LinkageName); + gvar->type = type; + gvar->offset = 0; // TODO: NotImplemented; + gvar->container_symbol = 0; + gvar->container_type = 0; // TODO: NotImplemented; + } + } break; + case DW_Tag_FormalParameter: { + DW_TagKind parent_tag_kind = tag_stack->next->cur_node->tag.kind; + if (parent_tag_kind == DW_Tag_SubProgram || parent_tag_kind == DW_Tag_InlinedSubroutine) { + RDIM_Scope *scope = tag_stack->next->scope; + RDIM_Local *param = rdim_scope_push_local(arena, &scopes, scope); + param->kind = RDI_LocalKind_Parameter; + param->name = dw_string_from_attrib(&input, cu, tag, DW_Attrib_Name); + param->type = d2r_type_from_attrib(arena, type_table, &input, cu, tag, DW_Attrib_Type); + param->locset = d2r_locset_from_attrib(arena, &input, cu, &scopes, scope, image_base, cu->address_size, arch_rdi, cu->addr_lu, tag, DW_Attrib_Location); + } else { + // TODO: error handling + AssertAlways(!"this is a local variable"); + } + } break; + case DW_Tag_LexicalBlock: { + if (tag_stack->next->cur_node->tag.kind == DW_Tag_SubProgram || + tag_stack->next->cur_node->tag.kind == DW_Tag_InlinedSubroutine || + tag_stack->next->cur_node->tag.kind == DW_Tag_LexicalBlock) { + Rng1U64List ranges = d2r_range_list_from_tag(comp_temp.arena, &input, cu, image_base, tag); + d2r_push_scope(arena, &scopes, SCOPE_CHUNK_CAP, tag_stack, ranges); + } + } break; + case DW_Tag_Label: + case DW_Tag_CompileUnit: + case DW_Tag_UnspecifiedParameters: + break; + default: NotImplemented; break; + } + + if (tag_stack->cur_node->first_child && visit_children) { + D2R_TagNode *frame = free_tags; + if (frame) { + SLLStackPop(free_tags); + MemoryZeroStruct(frame); + } else { + frame = push_array(scratch.arena, D2R_TagNode, 1); + } + frame->cur_node = tag_stack->cur_node->first_child; + SLLStackPush(tag_stack, frame); + } else { + tag_stack->cur_node = tag_stack->cur_node->sibling; + } + } + + // recycle free frame + D2R_TagNode *frame = tag_stack; + SLLStackPop(tag_stack); + SLLStackPush(free_tags, frame); + + if (tag_stack) { + tag_stack->cur_node = tag_stack->cur_node->sibling; + } + } + + temp_end(comp_temp); + } + + ProfEnd(); + + { + for (RDIM_TypeChunkNode *chunk_n = types.first; chunk_n != 0; chunk_n = chunk_n->next) { + for (U64 i = 0; i < chunk_n->count; ++i) { + RDIM_Type *type = &chunk_n->v[i]; + if (type->kind == RDI_TypeKind_Alias) { + for (RDIM_Type *t = type->direct_type; t != 0; t = t->direct_type) { + if (t->byte_size != 0) { + type->byte_size = t->byte_size; + break; + } } } - } } -#endif - -#if 0 - // DEBUG INFO - if (params->dump_debug_info){ - if (info != 0){ - str8_list_push(arena, &dump, - str8_lit("################################" - "################################\n" - "DEBUG INFO:\n")); - - U32 i = 0; - for (DWARF_InfoUnit *unit = info->unit_first; - unit != 0; - unit = unit->next, i += 1){ - str8_list_pushf(arena, &dump, " unit[%u]:\n", i); - str8_list_pushf(arena, &dump, " [header]\n"); - str8_list_pushf(arena, &dump, " version=%u\n", unit->dwarf_version); - str8_list_pushf(arena, &dump, " offset_size=%u\n", unit->offset_size); - str8_list_pushf(arena, &dump, " address_size=%u\n", unit->address_size); - str8_list_pushf(arena, &dump, " [extracted attributes]\n"); - str8_list_pushf(arena, &dump, " langauge=%u\n", (U32)unit->language); - str8_list_pushf(arena, &dump, " line_info_offset=%llu\n", unit->line_info_offset); - str8_list_pushf(arena, &dump, " vbase=0x%llx\n", unit->vbase); - str8_list_pushf(arena, &dump, " str_offsets_base=%llu\n", unit->str_offsets_base); - str8_list_pushf(arena, &dump, " addr_base=%llu\n", unit->addr_base); - str8_list_pushf(arena, &dump, " rnglists_base=%llu\n", unit->rnglists_base); - str8_list_pushf(arena, &dump, " loclists_base=%llu\n", unit->loclists_base); - dump_entry_tree(arena, &dump, dwarf, unit, unit->entry_root, 2); - str8_list_push(arena, &dump, str8_lit("\n")); - } - - } - } -#endif - - // print dump - for (String8Node *node = dump.first; - node != 0; - node = node->next){ - fwrite(node->string.str, 1, node->string.size, stdout); - } } + + { + RDIM_TypeNode *type_stack = 0; + RDIM_TypeNode *free_types = 0; + + for (RDIM_TypeChunkNode *chunk_n = types.first; chunk_n != 0; chunk_n = chunk_n->next) { + for (U64 i = 0; i < chunk_n->count; ++i) { + RDIM_Type *type = &chunk_n->v[i]; + if (type->kind == RDI_TypeKind_Array) { + if (type->byte_size != 0) + continue; + + RDIM_Type *t; + for (t = type; t != 0 && t->kind == RDI_TypeKind_Array; t = t->direct_type) { + RDIM_TypeNode *f = free_types; + if (f == 0) { + f = push_array(scratch.arena, RDIM_TypeNode, 1); + } else { + SLLStackPop(free_types); + } + f->v = t; + SLLStackPush(type_stack, f); + } + + U64 base_type_size = 0; + if (t) { + base_type_size = t->byte_size; + } + + U64 array_size = base_type_size; + while (type_stack) { + if (type_stack->v->count) { + array_size *= type_stack->v->count; + } else { + array_size += type_stack->v->byte_size; + } + SLLStackPop(type_stack); + } + + type->count = 0; + type->byte_size = array_size; + + // recycle frames + free_types = type_stack; + type_stack = 0; + } + } + } + } + + //////////////////////////////// + + RDIM_BakeParams *bake_params = push_array(arena, RDIM_BakeParams, 1); + bake_params->top_level_info = top_level_info; + bake_params->binary_sections = binary_sections; + bake_params->units = units; + bake_params->types = types; + bake_params->udts = udts; + bake_params->src_files = src_files; + bake_params->line_tables = line_tables; + bake_params->global_variables = gvars; + bake_params->thread_variables = tvars; + bake_params->procedures = procs; + bake_params->scopes = scopes; + bake_params->inline_sites = inline_sites; + + scratch_end(scratch); + return bake_params; } + +internal RDIM_BakeResults +d2r_bake(RDIM_HelpState *state, RDIM_BakeParams *in_params) +{ + return rdim_bake(state, in_params); +} + +internal RDIM_SerializedSectionBundle +d2r_compress(Arena *arena, RDIM_SerializedSectionBundle in) +{ + RDIM_SerializedSectionBundle result = {0}; + return result; +} + +internal RDI_Language +rdi_language_from_dw_language(DW_Language v) +{ + RDI_Language result = RDI_Language_NULL; + switch (v) { + case DW_Language_Null: result = RDI_Language_NULL; break; + + case DW_Language_C89: + case DW_Language_C99: + case DW_Language_C11: + case DW_Language_C: + result = RDI_Language_C; + break; + + case DW_Language_CPlusPlus03: + case DW_Language_CPlusPlus11: + case DW_Language_CPlusPlus14: + case DW_Language_CPlusPlus: + result = RDI_Language_CPlusPlus; + break; + + default: NotImplemented; break; + } + return result; +} + +internal RDI_RegCodeX86 +rdi_reg_from_dw_reg_x86(DW_RegX86 v) +{ + RDI_RegCodeX86 result = RDI_RegCode_nil; + switch (v) { +#define X(reg_dw, val_dw, reg_rdi, ...) case DW_RegX86_##reg_dw: result = RDI_RegCodeX86_##reg_rdi; break; + DW_Regs_X86_XList(X) +#undef X + default: NotImplemented; break; + } + return result; +} + +internal B32 +rdi_reg_from_dw_reg_x64(DW_RegX64 v, RDI_RegCodeX64 *code_out, U64 *off_out, U64 *size_out) +{ + RDI_RegCodeX64 result = RDI_RegCode_nil; + switch (v) { +#define X(reg_dw, val_dw, reg_rdi, off, size) case DW_RegX64_##reg_dw: result = RDI_RegCodeX64_##reg_rdi; *off_out = off; *size_out = size; break; + DW_Regs_X64_XList(X) +#undef X + default: NotImplemented; break; + } + return result; +} + +internal B32 +rdi_reg_from_dw_reg(Arch arch, DW_Reg v, RDI_RegCode *code_out, U64 *off_out, U64 *size_out) +{ + RDI_RegCode result = RDI_RegCode_nil; + switch (arch) { + case Arch_Null: break; + case Arch_x86: ; break; + case Arch_x64: return rdi_reg_from_dw_reg_x64(v, code_out, off_out, size_out); + default: NotImplemented; break; + } + return 0; +} + diff --git a/src/rdi_from_dwarf/rdi_from_dwarf.h b/src/rdi_from_dwarf/rdi_from_dwarf.h index c5ea8251..63d27b32 100644 --- a/src/rdi_from_dwarf/rdi_from_dwarf.h +++ b/src/rdi_from_dwarf/rdi_from_dwarf.h @@ -1,50 +1,69 @@ // Copyright (c) 2024 Epic Games Tools // Licensed under the MIT license (https://opensource.org/license/mit/) -#ifndef RDI_FROM_DWARF_H -#define RDI_FROM_DWARF_H +#pragma once + +typedef U64 D2R_ConvertFlags; +enum +{ +#define X(t,n,k) D2R_ConvertFlag_##t = (1ull << RDI_SectionKind_##t), + RDI_SectionKind_XList +#undef X + D2R_ConvertFlag_StrictParse, +}; + +typedef struct D2R_User2Convert +{ + String8 input_exe_name; + String8 input_exe_data; + String8 input_debug_name; + String8 input_debug_data; + String8 output_name; + D2R_ConvertFlags flags; + String8List errors; +} D2R_User2Convert; + +typedef struct D2R_TypeTable +{ + HashTable *ht; + RDIM_TypeChunkList *types; + U64 type_chunk_cap; + RDIM_Type *void_type; + RDIM_Type *varg_type; +} D2R_TypeTable; + +typedef struct D2R_TagNode +{ + struct D2R_TagNode *next; + DW_TagNode *cur_node; + RDIM_Type *type; + RDIM_Scope *scope; +} D2R_TagNode; + +typedef struct D2R_CompUnitContribMap +{ + U64 count; + U64 *info_off_arr; + RDIM_Rng1U64List *voff_range_arr; +} D2R_CompUnitContribMap; //////////////////////////////// -//~ Program Parameters Type +// Command Line -> Conversion Inputs -typedef struct DWARFCONV_Params{ - String8 input_elf_name; - String8 input_elf_data; - - String8 output_name; - - U64 unit_idx_min; - U64 unit_idx_max; - - struct{ - B8 input; - } hide_errors; - - B8 dump; - B8 dump__first; - B8 dump_header; - B8 dump_sections; - B8 dump_segments; - B8 dump_symtab; - B8 dump_dynsym; - B8 dump_debug_sections; - B8 dump_debug_info; - B8 dump_debug_abbrev; - B8 dump_debug_pubnames; - B8 dump_debug_pubtypes; - B8 dump_debug_names; - B8 dump_debug_aranges; - B8 dump_debug_addr; - B8 dump__last; - - String8List errors; -} DWARFCONV_Params; +internal D2R_User2Convert * d2r_user2convert_from_cmdln(Arena *arena, CmdLine *cmdline); //////////////////////////////// -//~ Program Parameters Parser +// Top-Level Conversion Entry Point -static DWARFCONV_Params *dwarf_convert_params_from_cmd_line(Arena *arena, CmdLine *cmdline); +internal RDIM_BakeParams * d2r_convert (Arena *arena, D2R_User2Convert *in); +internal RDIM_BakeResults d2r_bake (RDIM_HelpState *state, RDIM_BakeParams *in); +internal RDIM_SerializedSectionBundle d2r_compress(Arena *arena, RDIM_SerializedSectionBundle in); +//////////////////////////////// +// Enum Conversion +internal RDI_Language rdi_language_from_dw_language(DW_Language v); +internal RDI_RegCodeX86 rdi_reg_from_dw_reg_x86(DW_RegX86 v); +internal B32 rdi_reg_from_dw_reg_x64(DW_RegX64 v, RDI_RegCodeX64 *code_out, U64 *off_out, U64 *size_out); +internal B32 rdi_reg_from_dw_reg(Arch arch, DW_Reg v, RDI_RegCode *code_out, U64 *off_out, U64 *size_out); -#endif //RDI_FROM_DWARF_H diff --git a/src/rdi_from_dwarf/rdi_from_dwarf_main.c b/src/rdi_from_dwarf/rdi_from_dwarf_main.c new file mode 100644 index 00000000..32ea0331 --- /dev/null +++ b/src/rdi_from_dwarf/rdi_from_dwarf_main.c @@ -0,0 +1,122 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#define BUILD_TITLE "Epic Games Tools (R) DWARF Converter" +#define BUILD_CONSOLE_INTERFACE 1 + +//////////////////////////////// + +#include "third_party/rad_lzb_simple/rad_lzb_simple.h" +#include "third_party/rad_lzb_simple/rad_lzb_simple.c" +#include "third_party/xxHash/xxhash.c" +#include "third_party/xxHash/xxhash.h" + +//////////////////////////////// + +#include "lib_rdi_format/rdi_format.h" +#include "lib_rdi_format/rdi_format.c" +#include "lib_rdi_format/rdi_format_parse.h" +#include "lib_rdi_format/rdi_format_parse.c" + +//////////////////////////////// + +#include "base/base_inc.h" +#include "os/os_inc.h" +#include "async/async.h" +#include "rdi_make/rdi_make_local.h" +#include "rdi_make/rdi_make_help.h" +#include "linker/path_ext/path.h" +#include "linker/hash_table.h" +#include "coff/coff.h" +#include "coff/coff_parse.h" +#include "dwarf/dwarf.h" +#include "dwarf/dwarf_parse.h" +#include "dwarf/dwarf_coff.h" +#include "pe/pe.h" +#include "linker/rdi/rdi_coff.h" +#include "rdi_from_dwarf/rdi_from_dwarf.h" + +#include "base/base_inc.c" +#include "os/os_inc.c" +#include "async/async.c" +#include "coff/coff.c" +#include "coff/coff_parse.c" +#include "pe/pe.c" +#include "rdi_make/rdi_make_local.c" +#include "rdi_make/rdi_make_help.c" +#include "linker/rdi/rdi_coff.c" +#include "linker/path_ext/path.c" +#include "linker/hash_table.c" +#include "dwarf/dwarf.c" +#include "dwarf/dwarf_parse.c" +#include "dwarf/dwarf_coff.c" +#include "rdi_from_dwarf/rdi_from_dwarf.c" + +//////////////////////////////// +// Entry Point + +internal void +entry_point(CmdLine *cmdline) +{ + // initialize state and unpack command line + Arena *arena = arena_alloc(); + B32 do_help = (cmd_line_has_flag(cmdline, str8_lit("help")) || + cmd_line_has_flag(cmdline, str8_lit("h")) || + cmd_line_has_flag(cmdline, str8_lit("?"))); + + D2R_User2Convert *user2convert = d2r_user2convert_from_cmdln(arena, cmdline); + + // display help + if (do_help) { + fprintf(stderr, "--- rdi_from_dwarf ------------------------------------------------------------\n\n"); + + fprintf(stderr, "This utility converts debug information from DWARF into the RAD Debug Info\n"); + fprintf(stderr, "format. The following arguments are accepted:\n\n"); + + fprintf(stderr, "--exe: [optional] Specifies the path of the executable filefor which the\n"); + fprintf(stderr, " debug info was generated.\n"); + fprintf(stderr, "--debug: Specifies the path of the .DEBUG debug info file to\n"); + fprintf(stderr, " convert.\n"); + fprintf(stderr, "--out: Specifies the path at which the output will be written.\n\n"); + + if (!do_help) { + for (String8Node *n = user2convert->errors.first; n != 0; n = n->next) { + fprintf(stderr, "error(input): %.*s\n", str8_varg(n->string)); + } + } + + os_abort(0); + } + + RDIM_HelpState *rdim_help_state = rdim_help_init(); + + ProfBegin("convert"); + RDIM_BakeParams *convert2bake = d2r_convert(arena, user2convert); + ProfEnd(); + + ProfBegin("bake"); + RDIM_BakeResults bake2srlz = d2r_bake(rdim_help_state, convert2bake); + ProfEnd(); + + ProfBegin("serialize bake"); + RDIM_SerializedSectionBundle srlz2file = rdim_serialized_section_bundle_from_bake_results(&bake2srlz); + ProfEnd(); + + RDIM_SerializedSectionBundle srlz2file_compressed = srlz2file; + if (cmd_line_has_flag(cmdline, str8_lit("compress"))) { + ProfBegin("compress"); + srlz2file_compressed = d2r_compress(arena, srlz2file); + ProfEnd(); + } + + ProfBegin("serialize blobs"); + String8List blobs = rdim_file_blobs_from_section_bundle(arena, &srlz2file_compressed); + ProfEnd(); + + ProfBegin("write"); + if (!os_write_data_list_to_file_path(user2convert->output_name, blobs)) { + fprintf(stderr, "error(ouptut): unable to write to %.*s\n", str8_varg(user2convert->output_name)); + } + ProfEnd(); +} +