Dwarf parser WIP

Copied the parser from internal repo and integrated with the base layer
(not tested)

Parser uses based/range pattern for which we have an alternative in
string layer (str8_deserial_*)
This commit is contained in:
Nikita Smith
2024-12-26 21:54:25 -08:00
parent 76b742ceb8
commit b27b783e6f
12 changed files with 6085 additions and 0 deletions
+19
View File
@@ -600,6 +600,25 @@ rng1u64_list_push(Arena *arena, Rng1U64List *list, Rng1U64 rng)
list->count += 1;
}
internal void
rng1u64_list_concat(Rng1U64List *list, Rng1U64List *to_concat)
{
if(to_concat->first)
{
if(list->first)
{
list->last->next = to_concat->first;
list->last = to_concat->last;
}
else
{
list->first = to_concat->first;
list->last = to_concat->last;
}
MemoryZeroStruct(to_concat);
}
}
internal Rng1U64Array
rng1u64_array_from_list(Arena *arena, Rng1U64List *list)
{
+1
View File
@@ -666,6 +666,7 @@ internal U32 u32_from_rgba(Vec4F32 rgba);
//~ rjf: List Type Functions
internal void rng1u64_list_push(Arena *arena, Rng1U64List *list, Rng1U64 rng);
internal void rng1u64_list_concat(Rng1U64List *list, Rng1U64List *to_concat);
internal Rng1U64Array rng1u64_array_from_list(Arena *arena, Rng1U64List *list);
internal void rng1s64_list_push(Arena *arena, Rng1S64List *list, Rng1S64 rng);
+64
View File
@@ -2370,3 +2370,67 @@ str8_deserial_read_block(String8 string, U64 off, U64 size, String8 *block_out)
*block_out = str8_substr(string, range);
return block_out->size;
}
internal U64
str8_deserial_read_uleb128(String8 string, U64 off, U64 *value_out)
{
U64 value = 0;
U64 shift = 0;
U64 cursor = off;
for( ;; ++cursor, shift += 7u)
{
U8 byte = 0;
U64 bytes_read = str8_deserial_read_struct(string, cursor, &byte);
if(bytes_read != sizeof(byte))
{
break;
}
U8 val = byte & 0x7fu;
value |= ((U64)val) << shift;
if((byte & 0x80u) == 0)
{
break;
}
}
if(value_out != 0)
{
*value_out = value;
}
U64 bytes_read = cursor - off;
return bytes_read;
}
internal U64
str8_deserial_read_sleb128(String8 string, U64 off, S64 *value_out)
{
U64 value = 0;
U64 shift = 0;
U64 cursor = off;
for( ;; ++cursor)
{
U8 byte;
U64 bytes_read = str8_deserial_read_struct(string, cursor, &byte);
if(bytes_read != sizeof(byte))
{
break;
}
U8 val = byte & 0x7fu;
value |= ((U64)val) << shift;
shift += 7u;
if((byte & 0x80u) == 0)
{
if(shift < sizeof(value) * 8 && (byte & 0x40u) != 0)
{
value |= -(S64)(1ull << shift);
}
break;
}
}
if(value_out != 0)
{
*value_out = value;
}
U64 bytes_read = cursor - off;
return bytes_read;
}
+2
View File
@@ -404,6 +404,8 @@ internal void * str8_deserial_get_raw_ptr(String8 string, U64 off, U64 size);
internal U64 str8_deserial_read_cstr(String8 string, U64 off, String8 *cstr_out);
internal U64 str8_deserial_read_windows_utf16_string16(String8 string, U64 off, String16 *str_out);
internal U64 str8_deserial_read_block(String8 string, U64 off, U64 size, String8 *block_out);
internal U64 str8_deserial_read_uleb128(String8 string, U64 off, U64 *value_out);
internal U64 str8_deserial_read_sleb128(String8 string, U64 off, S64 *value_out);
#define str8_deserial_read_array(string, off, ptr, count) str8_deserial_read((string), (off), (ptr), sizeof(*(ptr))*(count), sizeof(*(ptr)))
#define str8_deserial_read_struct(string, off, ptr) str8_deserial_read_array(string, off, ptr, 1)
+11
View File
@@ -0,0 +1,11 @@
// Copyright (c) 2024 Epic Games Tools
// Licensed under the MIT license (https://opensource.org/license/mit/)
internal String8 dw_string_from_expr_op(Arena *arena, DW_ExprOp op);
internal String8 dw_string_from_tag_kind(Arena *arena, DW_TagKind kind);
internal String8 dw_string_from_attrib_kind(Arena *arena, DW_AttribKind kind);
internal String8 dw_string_from_form_kind(Arena *arena, DW_FormKind kind);
//internal String8 dw_string_from_register(Arena *arena, Arch arch, U64 reg_id);
File diff suppressed because it is too large Load Diff
+301
View File
@@ -0,0 +1,301 @@
// Copyright (c) 2024 Epic Games Tools
// Licensed under the MIT license (https://opensource.org/license/mit/)
#ifndef DWARF_EXPR_H
#define DWARF_EXPR_H
////////////////////////////////
//~ Dwarf Register Layout
typedef struct DW_RegsX64
{
union {
struct {
U64 rax;
U64 rdx;
U64 rcx;
U64 rbx;
U64 rsi;
U64 rdi;
U64 rbp;
U64 rsp;
U64 r8;
U64 r9;
U64 r10;
U64 r11;
U64 r12;
U64 r13;
U64 r14;
U64 r15;
U64 rip;
};
U64 r[17];
};
} DW_RegsX64;
////////////////////////////////
//~ Dwarf Expression Eval Types
#define DW_READ_MEMORY_SIG(name) U64 name(U64 addr, U64 size, void *out, void *ud)
typedef DW_READ_MEMORY_SIG(DW_ReadMemorySig);
//- machine configuration types
typedef String8 DW_ExprResolveCallFunc(void *call_user_ptr, U64 p);
typedef struct DW_ExprMachineCallConfig
{
void *user_ptr;
DW_ExprResolveCallFunc *func;
} DW_ExprMachineCallConfig;
typedef struct DW_ExprMachineConfig
{
U64 max_step_count; // (read only in the eval functions)
DW_ReadMemorySig *read_memory;
void *read_memory_ud;
DW_RegsX64 *regs;
U64 *text_section_base;
U64 *frame_base;
U64 *object_address;
U64 *tls_address;
U64 *cfa;
DW_ExprMachineCallConfig call;
} DW_ExprMachineConfig;
//- detail analysis types
typedef U32 DW_ExprFlags;
enum
{
DW_ExprFlag_UsesTextBase = (1 << 0),
DW_ExprFlag_UsesMemory = (1 << 1),
DW_ExprFlag_UsesRegisters = (1 << 2),
DW_ExprFlag_UsesFrameBase = (1 << 3),
DW_ExprFlag_UsesObjectAddress = (1 << 4),
DW_ExprFlag_UsesTLSAddress = (1 << 5),
DW_ExprFlag_UsesCFA = (1 << 6),
DW_ExprFlag_UsesCallResolution = (1 << 7),
DW_ExprFlag_UsesComposite = (1 << 8),
DW_ExprFlag_NotSupported = (1 << 16),
DW_ExprFlag_BadData = (1 << 17),
DW_ExprFlag_NonLinearFlow = (1 << 18)
};
typedef struct DW_ExprAnalysis
{
DW_ExprFlags flags;
} DW_ExprAnalysis;
typedef struct DW_ExprAnalysisTask
{
struct DW_ExprAnalysisTask *next;
U64 p;
String8 data;
} DW_ExprAnalysisTask;
//- location types
typedef enum DW_SimpleLocKind
{
DW_SimpleLocKind_Address,
DW_SimpleLocKind_Register,
DW_SimpleLocKind_Value,
DW_SimpleLocKind_ValueLong,
DW_SimpleLocKind_Empty,
DW_SimpleLocKind_Fail,
} DW_SimpleLocKind;
typedef enum DW_LocFailKind
{
// Interpreting Fail Kinds
//
// BadData: the evaluator detected that the dwarf expression operation is incorrectly formed
// NotSupported: the evaluator does not support a dwarf feature that was found in the dwarf expression
// TimeOut: the evaluator hit the maximum step count
// TooComplicated: used by analyzer when it the expression uses features outside of the analyzer's scope
// Missing*: the dwarf machine config was missing necessary information to finish the evaluation
DW_LocFailKind_BadData,
DW_LocFailKind_NotSupported,
DW_LocFailKind_TimeOut,
DW_LocFailKind_TooComplicated,
DW_LocFailKind_MissingTextBase,
DW_LocFailKind_MissingMemory,
DW_LocFailKind_MissingRegisters,
DW_LocFailKind_MissingFrameBase,
DW_LocFailKind_MissingObjectAddress,
DW_LocFailKind_MissingTLSAddress,
DW_LocFailKind_MissingCFA,
DW_LocFailKind_MissingCallResolution,
DW_LocFailKind_MissingArenaForComposite,
} DW_LocFailKind;
typedef struct DW_SimpleLoc
{
DW_SimpleLocKind kind;
union {
U64 addr;
U64 reg_idx;
U64 val;
String8 val_long;
struct {
DW_LocFailKind fail_kind;
U64 fail_data;
};
};
} DW_SimpleLoc;
typedef struct DW_Piece
{
// Hint for Interpreting Pieces
//
// src = decode(loc, is_bit_loc, bit_size);
// dst |= (src >> bit_off) << bit_cursor;
// bit_cursor += bit_size;
struct DW_Piece *next;
DW_SimpleLoc loc;
U64 bit_size;
U64 bit_off;
B32 is_bit_loc;
} DW_Piece;
typedef struct DW_Location
{
// Interpreting a Dwarf Location
//
// CASE (any number of pieces, fail in the non-piece):
// this is how errors are reported, error information is in the non-piece
// the 'fail' location kind should never show up in a piece
// if there are any pieces they can be treated as correct information that
// was successfully decoded before the error was encountered
//
// CASE (no pieces, empty non-piece):
// the data is completely optimized out and unrecoverable
//
// CASE (no pieces, non-empty non-piece):
// the size of the data is not known by the location, but something in the
// surrounding context of the location (eg type info) should know the size
//
// CASE (one-or-more pieces, empty non-piece):
// the data is described by the pieces
//
// CASE (one-or-more pieces, non-empty non-fail non-piece):
// this is supposed to be impossible; the non-piece either carries an error
// or *all* of the location information about the data, there should never
// be a mix of piece-based location and non-piece-based location data.
DW_Piece *first_piece;
DW_Piece *last_piece;
U64 count;
DW_SimpleLoc non_piece_loc;
} DW_Location;
//- full evaluator state types
typedef struct DW_ExprStackNode
{
struct DW_ExprStackNode *next;
U64 val;
} DW_ExprStackNode;
typedef struct DW_ExprStack
{
DW_ExprStackNode *stack;
DW_ExprStackNode *free_nodes;
U64 count;
} DW_ExprStack;
typedef struct DW_ExprCall
{
struct DW_ExprCall *next;
void *ptr;
U64 size;
U64 cursor;
} DW_ExprCall;
typedef struct DW_ExprCallStack
{
DW_ExprCall *stack;
DW_ExprCall *free_calls;
U64 depth;
} DW_ExprCallStack;
////////////////////////////////
//~ Dwarf Expression Analysis & Eval Functions
//- analyzers
// This analyzer provides the most simplified dwarf expression
// decoding. If the expression consists of a single op that can be interpreted
// as a valid dwarf expression, then it represents that expression as a simple
// location.
//
// If there is a single 'piece' op that is represeted here as an empty simple
// location, losing whatever additional size information from the piece.
//
// If there is an op that requires the machine configuration data the analyzer
// fails with "too complicated" - unless the required configuration data is the
// text section base which this analyzer treats as a non-optional parameter and
// always decodes successfully.
//
// If the expression contains more than one op than the analyzer fails with
// "too complicated".
internal DW_SimpleLoc dw_expr__analyze_fast(void *base, Rng1U64 range, U64 text_section_base);
// This analyzer does a one-pass scan through the expression to
// help a caller determine what to expect before doing a full evaluation which
// has to maintain value stacks, perform more checks, and execute any loops
// that may appear in the expression, etc.
//
// For each piece of data that can be equipped to a machine config there is a
// 'Uses' flag in the analysis. A user can use these flags to determine what to
// prepare and equip before a full eval. This can be a lot more efficient than
// always preparing everything, or iteratively equipping and retrying after
// each failure.
//
// The analysis can also catch some cases of bad data and unsupported features.
// These flags are useful for short circuit style optimizations, but they are
// not definitive, some bad data can only be caught by the full evaluator.
// Sometimes the full evaluator might miss bad data that this analyzer will see
// if control flow in the evaluator completely skips the bad data. A forgiving
// interpretation of dwarf expression data would only rely on the results of
// the full evaluator. A more strict interpretation would consider it an error
// if either this analyzer or the evaluator finds bad data.
//
// The analyzer also determines if there is any possibility for non-linear
// flow. Jumps, branches, and call ops all create non-linear flow. An
// expression that doesn't have non-linear flow is trivially gauranteed to
// terminate and therefore a good candidate for conversion to a human readable
// expression.
//
// The call config is optional (may be null). If is provided the analysis
// includes features seen in all of the expressions that might be reached by
// call ops from the initial expression.
internal DW_ExprAnalysis dw_expr__analyze_details(void *base, Rng1U64 range, DW_ExprMachineCallConfig *call_config);
//- full eval
internal DW_Location dw_expr__eval(Arena *arena_optional, void *base, Rng1U64 range, DW_ExprMachineConfig *config);
//- dw expr val stack
internal DW_ExprStack dw_expr__stack_make(Arena *arena);
internal void dw_expr__stack_push(Arena *arena, DW_ExprStack *stack, U64 x);
internal U64 dw_expr__stack_pop(DW_ExprStack *stack);
internal U64 dw_expr__stack_pick(DW_ExprStack *stack, U64 idx);
internal B32 dw_expr__stack_is_empty(DW_ExprStack *stack);
//- dw expr call stack
internal DW_ExprCall* dw_expr__call_top(DW_ExprCallStack *stack);
internal void dw_expr__call_push(Arena *arena, DW_ExprCallStack *stack, void *ptr, U64 size);
internal void dw_expr__call_pop(DW_ExprCallStack *stack);
//- analysis tasks
internal DW_ExprAnalysisTask* dw_expr__analysis_task_from_p(DW_ExprAnalysisTask *first, U64 p);
#endif //DWARF_EXPR_H
+42
View File
@@ -0,0 +1,42 @@
--- DWARF NOTES ---------------------------------------------------------------
DWARF V4 Spec: http://www.dwarfstd.org/doc/DWARF4.pdf
DWARF V5 Spec: http://www.dwarfstd.org/doc/DWARF5.pdf
-------------------------------------------------------------------------------
$ (2021/04/30) On .debug_pubtypes, .debug_pubnames, and .debug_names:
.debug_pubtypes and .debug_pubnames are tables that map from a string (the name
of a type or function respectively) to an offset into .debug_info, which is the
offset of the Debug Information Entry (DIE, in DWARF terminology) of the info
associated with the string. THESE TWO SECTIONS ARE OPTIONAL. They don't show up
in every DWARF-holding file, and so they cannot be relied upon as acceleration
structures. But we're going to support parsing them, to make things a bit nicer
in cases where they are present. DWARF doesn't have much in the way of acceler-
ation structures built in, so our rationale is that we should take anything we
can get to make the format a bit more in some subset of the possible cases.
.debug_names is a DWARF V5 section that is intended to replace .debug_pubtypes
and .debug_pubnames. However, even in cases when DWARF V5 is produced at the
time of writing this, we have not found .debug_names sections being produced.
We did not exhaustively test all compilers and configurations, but it seems
that it is not well-supported at all by major compilers, and there's a very low
probability that a user will have that section, so our current thinking is
that there's no point in supporting it right now.
-------------------------------------------------------------------------------
$ (2021/04/30) On producing DWARF V5 with Clang:
https://lists.llvm.org/pipermail/llvm-dev/2018-August/125068.html
By default it looks like (at the time of writing this) that Clang, by default,
will produce DWARF V4. To produce DWARF V5, however, you can use the -gdwarf-5
option. Even when that option is used, it seems that some features of V5 are
not used (for example, .debug_names).
The above link also says that this will produce .debug_names, but it doesn't as
of Clang 10.
-------------------------------------------------------------------------------
File diff suppressed because it is too large Load Diff
+491
View File
@@ -0,0 +1,491 @@
// Copyright (c) 2024 Epic Games Tools
// Licensed under the MIT license (https://opensource.org/license/mit/)
#ifndef DWARF_PARSE_H
#define DWARF_PARSE_H
// NOTE(rjf): Some rules about the spaces of offsets and ranges:
//
// - Every stored/passed offset is relative to the base of its section.
// - Every stored/passed range has endpoints relative to the base of their section.
// - Upon calling a syms_based_range_* function, these offsets need to be
// converted into range-relative.
////////////////////////////////
//~ rjf: Constants
#define DWARF_VOID_TYPE_ID 0xffffffffffffffffull
////////////////////////////////
//~ rjf: Files + External Debug References
typedef struct DW_ExtDebugRef DW_ExtDebugRef;
struct DW_ExtDebugRef
{
// NOTE(rjf): .dwo => an external DWARF V5 .dwo file
String8 dwo_path;
U64 dwo_id;
};
////////////////////////////////
//~ rjf: Abbrev Table
typedef struct DW_AbbrevTableEntry DW_AbbrevTableEntry;
struct DW_AbbrevTableEntry
{
U64 id;
U64 off;
};
typedef struct DW_AbbrevTable DW_AbbrevTable;
struct DW_AbbrevTable
{
U64 count;
DW_AbbrevTableEntry *entries;
};
////////////////////////////////
//~ Sections
typedef struct DW_Section DW_Section;
struct DW_Section
{
String8 data;
DW_Mode mode;
B32 is_dwo;
};
typedef struct DW_SectionArray DW_SectionArray;
struct DW_SectionArray
{
DW_Section v[DW_Section_Count];
};
////////////////////////////////
//~ rjf: Basic Line Info
typedef struct DW_LineFile DW_LineFile;
struct DW_LineFile
{
String8 file_name;
U64 dir_idx;
U64 modify_time;
U64 md5_digest[2];
U64 file_size;
};
typedef struct DW_LineVMFileNode DW_LineVMFileNode;
struct DW_LineVMFileNode
{
DW_LineVMFileNode *next;
DW_LineFile file;
};
typedef struct DW_LineVMFileList DW_LineVMFileList;
struct DW_LineVMFileList
{
U64 node_count;
DW_LineVMFileNode *first;
DW_LineVMFileNode *last;
};
typedef struct DW_LineVMFileArray DW_LineVMFileArray;
struct DW_LineVMFileArray
{
U64 count;
DW_LineFile *v;
};
////////////////////////////////
//~ rjf: Abbrevs
typedef enum DW_AbbrevKind
{
DW_Abbrev_Null,
DW_Abbrev_Tag,
DW_Abbrev_Attrib,
DW_Abbrev_AttribSequenceEnd,
DW_Abbrev_DIEBegin,
DW_Abbrev_DIEEnd,
}
DW_AbbrevKind;
typedef U32 DW_AbbrevFlags;
enum{
DW_AbbrevFlag_HasImplicitConst = (1<<0),
DW_AbbrevFlag_HasChildren = (1<<1),
};
typedef struct DW_Abbrev DW_Abbrev;
struct DW_Abbrev
{
DW_AbbrevKind kind;
Rng1U64 abbrev_range;
U64 sub_kind;
U64 id;
U64 const_value;
DW_AbbrevFlags flags;
};
////////////////////////////////
//~ rjf: Attribs
typedef struct DW_AttribValueResolveParams DW_AttribValueResolveParams;
struct DW_AttribValueResolveParams
{
DW_Version version;
DW_Language language;
U64 addr_size; // NOTE(rjf): size in bytes of containing compilation unit's addresses
U64 containing_unit_info_off; // NOTE(rjf): containing compilation unit's offset into the .debug_info section
U64 debug_addrs_base; // NOTE(rjf): containing compilation unit's offset into the .debug_addrs section (DWARF V5 ONLY)
U64 debug_rnglists_base; // NOTE(rjf): containing compilation unit's offset into the .debug_rnglists section (DWARF V5 ONLY)
U64 debug_str_offs_base; // NOTE(rjf): containing compilation unit's offset into the .debug_str_offsets section (DWARF V5 ONLY)
U64 debug_loclists_base; // NOTE(rjf): containing compilation unit's offset into the .debug_loclists section (DWARF V5 ONLY)
};
typedef struct DW_AttribValue DW_AttribValue;
struct DW_AttribValue
{
DW_SectionKind section;
U64 v[2];
};
typedef struct DW_Attrib DW_Attrib;
struct DW_Attrib
{
U64 info_off;
U64 abbrev_id;
DW_AttribKind attrib_kind;
DW_FormKind form_kind;
DW_AttribClass value_class;
DW_AttribValue form_value;
};
typedef struct DW_AttribArray DW_AttribArray;
struct DW_AttribArray
{
DW_Attrib *v;
U64 count;
};
typedef struct DW_AttribNode DW_AttribNode;
struct DW_AttribNode
{
DW_AttribNode *next;
DW_Attrib attrib;
};
typedef struct DW_AttribList DW_AttribList;
struct DW_AttribList
{
DW_AttribNode *first;
DW_AttribNode *last;
U64 count;
};
typedef struct DW_AttribListParseResult DW_AttribListParseResult;
struct DW_AttribListParseResult
{
DW_AttribList attribs;
U64 max_info_off;
U64 max_abbrev_off;
};
////////////////////////////////
//~ rjf: Compilation Units + Accelerators
typedef struct DW_CompRoot DW_CompRoot;
struct DW_CompRoot
{
// NOTE(rjf): Header Data
U64 size;
DW_CompUnitKind kind;
DW_Version version;
DW_Ext ext;
U64 address_size;
U64 abbrev_off;
U64 info_off;
Rng1U64 tags_info_range;
DW_AbbrevTable abbrev_table;
// NOTE(rjf): [parsed from DWARF attributes] Offsets For More Info (DWARF V5 ONLY)
U64 rnglist_base; // NOTE(rjf): Offset into the .debug_rnglists section where this comp unit's data is.
U64 loclist_base; // NOTE(rjf): Offset into the .debug_loclists section where this comp unit's data is.
U64 addrs_base; // NOTE(rjf): Offset into the .debug_addr section where this comp unit's data is.
U64 stroffs_base; // NOTE(rjf): Offset into the .debug_str_offsets section where this comp unit's data is.
// NOTE(rjf): [parsed from DWARF attributes] General Info
String8 name;
String8 producer;
String8 compile_dir;
String8 external_dwo_name;
U64 dwo_id;
DW_Language language;
U64 name_case;
B32 use_utf8;
U64 line_off;
U64 low_pc;
U64 high_pc;
DW_AttribValue ranges_attrib_value;
U64 base_addr;
// NOTE(rjf): Line/File Info For This Comp Unit
String8Array dir_table;
DW_LineVMFileArray file_table;
};
////////////////////////////////
//~ rjf: Tags
typedef struct DW_Tag DW_Tag;
struct DW_Tag
{
DW_Tag *next_sibling;
DW_Tag *first_child;
DW_Tag *last_child;
DW_Tag *parent;
Rng1U64 info_range;
Rng1U64 abbrev_range;
B32 has_children;
U64 abbrev_id;
DW_TagKind kind;
U64 attribs_info_off;
U64 attribs_abbrev_off;
DW_AttribList attribs;
};
typedef U32 DW_TagStubFlags;
enum
{
DW_TagStubFlag_HasObjectPointerArg = (1<<0),
DW_TagStubFlag_HasLocation = (1<<1),
DW_TagStubFlag_HasExternal = (1<<2),
DW_TagStubFlag_HasSpecification = (1<<3),
};
typedef struct DW_TagStub DW_TagStub;
struct DW_TagStub
{
U64 info_off;
DW_TagKind kind;
DW_TagStubFlags flags;
U64 children_info_off;
U64 attribs_info_off;
U64 attribs_abbrev_off;
// NOTE(rjf): DW_Attrib_Specification is tacked onto definitions that
// are filling out more info about a "prototype". That attribute is a reference
// that points back at the declaration tag. The declaration tag has the
// DW_Attrib_Declaration attribute, which is sort of like the reverse
// of that, except there's no reference. So what we're doing here is just storing
// a reference on both, that point back to each other, so it's always easy to
// get from decl => spec, or from spec => decl.
//SYMS_SymbolID ref;
// NOTE(rjf): DW_Attrib_AbstractOrigin is tacked onto some definitions
// that are used to specify information more specific to inlining, while wanting
// to refer to an "abstract" function DIE, that is not specific to any inline
// sites. The DWARF generator will not duplicate information across these, so
// we will occasionally need to look at an abstract origin to get abstract
// information, like name/linkage-name/etc.
//SYMS_SymbolID abstract_origin;
U64 _unused_;
};
typedef struct DW_TagStubNode DW_TagStubNode;
struct DW_TagStubNode
{
DW_TagStubNode *next;
DW_TagStub stub;
};
typedef struct DW_TagStubList DW_TagStubList;
struct DW_TagStubList
{
DW_TagStubNode *first;
DW_TagStubNode *last;
U64 count;
};
////////////////////////////////
//~ rjf: Line Info VM Types
typedef struct DW_LineVMHeader DW_LineVMHeader;
struct DW_LineVMHeader
{
U64 unit_length;
U64 unit_opl;
U16 version;
U8 address_size; // NOTE(nick): duplicates size from the compilation unit but is needed to support stripped exe that just have .debug_line and .debug_line_str.
U8 segment_selector_size;
U64 header_length;
U64 program_off;
U8 min_inst_len;
U8 max_ops_for_inst;
U8 default_is_stmt;
S8 line_base;
U8 line_range;
U8 opcode_base;
U64 num_opcode_lens;
U8 *opcode_lens;
String8Array dir_table;
DW_LineVMFileArray file_table;
};
typedef struct DW_LineVMState DW_LineVMState;
struct DW_LineVMState
{
U64 address; // NOTE(nick): Address of a machine instruction.
U32 op_index; // NOTE(nick): This is used by the VLIW instructions to indicate index of operation inside the instruction.
// NOTE(nick): Line table doesn't contain full path to a file, instead
// DWARF encodes path as two indices. First index will point into a directory
// table, and second points into a file name table.
U32 file_index;
U32 line;
U32 column;
B32 is_stmt; // NOTE(nick): Indicates that "address" points to place suitable for a breakpoint.
B32 basic_block; // NOTE(nick): Indicates that the "address" is inside a basic block.
// NOTE(nick): Indicates that "address" points to place where function starts.
// Usually prologue is the place where compiler emits instructions to
// prepare stack for a function.
B32 prologue_end;
B32 epilogue_begin; // NOTE(nick): Indicates that "address" points to section where function exits and unwinds stack.
U64 isa; // NOTE(nick): Instruction set that is used.
U64 discriminator; // NOTE(nick): Arbitrary id that indicates to which block these instructions belong.
B32 end_sequence; // NOTE(nick): Indicates that "address" points to the first instruction in the instruction block that follows.
// NOTE(rjf): it looks like LTO might sometimes zero out high PC and low PCs, causing a
// swath of line info to map to a range starting at 0. This causes overlapping ranges
// which we do not want to report. So this B32 will turn on emission.
B32 busted_seq;
};
typedef struct DW_Line DW_Line;
struct DW_Line
{
U64 file_index;
U32 line;
U32 column;
U64 voff;
};
typedef struct DW_LineNode DW_LineNode;
struct DW_LineNode
{
DW_LineNode *next;
DW_Line v;
};
typedef struct DW_LineSeqNode DW_LineSeqNode;
struct DW_LineSeqNode
{
DW_LineSeqNode *next;
U64 count;
DW_LineNode *first;
DW_LineNode *last;
};
typedef struct DW_LineTableParseResult DW_LineTableParseResult;
struct DW_LineTableParseResult
{
U64 seq_count;
DW_LineSeqNode *first_seq;
DW_LineSeqNode *last_seq;
};
////////////////////////////////
//~ rjf: .debug_pubnames and .debug_pubtypes
typedef struct DW_PubStringsBucket DW_PubStringsBucket;
struct DW_PubStringsBucket
{
DW_PubStringsBucket *next;
String8 string;
U64 info_off;
U64 cu_info_off;
};
typedef struct DW_PubStringsTable DW_PubStringsTable;
struct DW_PubStringsTable
{
U64 size;
DW_PubStringsBucket **buckets;
};
////////////////////////////////
//~ rjf: Basic Helpers
internal U64 dw_hash_from_string(String8 string);
internal DW_AttribClass dw_pick_attrib_value_class(DW_Version ver, DW_Ext ext, DW_Language lang, DW_AttribKind attrib, DW_FormKind form_kind);
////////////////////////////////
//~ Specific Based Range Helpers
internal U64 dw_based_range_read_length(void *base, Rng1U64 range, U64 offset, U64 *out_value);
internal U64 dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev);
internal U64 dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev);
internal U64 dw_based_range_read_attrib_form_value(void *base, Rng1U64 range, U64 offset, DW_Mode mode, U64 address_size, DW_FormKind form_kind, U64 implicit_const, DW_AttribValue *form_value_out);
internal DW_Mode dw_mode_from_sec(DW_SectionArray *sections, DW_SectionKind kind);
internal B32 dw_sec_is_present(DW_SectionArray *sections, DW_SectionKind kind);
internal void* dw_base_from_sec(DW_SectionArray *sections, DW_SectionKind kind);
internal Rng1U64 dw_range_from_sec(DW_SectionArray *sections, DW_SectionKind kind);
////////////////////////////////
//~ rjf: Abbrev Table
internal DW_AbbrevTable dw_make_abbrev_table(Arena *arena, DW_SectionArray *sections, U64 start_abbrev_off);
internal U64 dw_abbrev_offset_from_abbrev_id(DW_AbbrevTable table, U64 abbrev_id);
////////////////////////////////
//~ rjf: Miscellaneous DWARF Section Parsing
//- rjf: .debug_ranges (DWARF V4)
internal Rng1U64List dw_v4_range_list_from_range_offset(Arena *arena, DW_SectionArray *sections, U64 addr_size, U64 comp_unit_base_addr, U64 range_off);
//- rjf: .debug_pubtypes + .debug_pubnames (DWARF V4)
internal DW_PubStringsTable dw_v4_pub_strings_table_from_section_kind(Arena *arena, DW_SectionArray *sections, DW_SectionKind section_kind);
//- rjf: .debug_str_offsets (DWARF V5)
internal U64 dw_v5_offset_from_offs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index);
//- rjf: .debug_addr (DWARF V5)
internal U64 dw_v5_addr_from_addrs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index);
//- rjf: .debug_rnglists parsing (DWARF V5)
internal U64 dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(DW_SectionArray *sections, DW_SectionKind section_kind, U64 base, U64 index);
internal Rng1U64List dw_v5_range_list_from_rnglist_offset(Arena *arena, DW_SectionArray *sections, DW_SectionKind section, U64 addr_size, U64 addr_section_base, U64 offset);
////////////////////////////////
//~ rjf: Attrib Value Parsing
internal DW_AttribValueResolveParams dw_attrib_value_resolve_params_from_comp_root(DW_CompRoot *root);
internal DW_AttribValue dw_attrib_value_from_form_value(DW_SectionArray *sections, DW_AttribValueResolveParams resolve_params, DW_FormKind form_kind, DW_AttribClass value_class, DW_AttribValue form_value);
internal String8 dw_string_from_attrib_value(DW_SectionArray *sections, DW_AttribValue value);
internal Rng1U64List dw_range_list_from_high_low_pc_and_ranges_attrib_value(Arena *arena, DW_SectionArray *sections, U64 address_size, U64 comp_unit_base_addr, U64 addr_section_base, U64 low_pc, U64 high_pc, DW_AttribValue ranges_value);
////////////////////////////////
//~ rjf: Tag Parsing
internal DW_AttribListParseResult dw_parse_attrib_list_from_info_abbrev_offsets(Arena *arena, DW_SectionArray *sections, DW_Version ver, DW_Ext ext, DW_Language lang, U64 address_size, U64 info_off, U64 abbrev_off);
internal DW_Tag* dw_tag_from_info_offset(Arena *arena, DW_SectionArray *sections, DW_AbbrevTable abbrev_table, DW_Version ver, DW_Ext ext, DW_Language lang, U64 address_size, U64 info_offset);
internal DW_TagStub dw_stub_from_tag(DW_SectionArray *sections, DW_AttribValueResolveParams resolve_params, DW_Tag *tag);
//- rjf: line info
internal void dw_line_vm_reset(DW_LineVMState *state, B32 default_is_stmt);
internal void dw_line_vm_advance(DW_LineVMState *state, U64 advance, U64 min_inst_len, U64 max_ops_for_inst);
internal DW_LineSeqNode* dw_push_line_seq(Arena* arena, DW_LineTableParseResult *parsed_tbl);
internal DW_LineNode* dw_push_line(Arena *arena, DW_LineTableParseResult *tbl, DW_LineVMState *vm_state, B32 start_of_sequence);
internal DW_LineTableParseResult dw_parsed_line_table_from_comp_root(Arena *arena, DW_SectionArray *sections, DW_CompRoot *root);
internal U64 dw_read_line_file(void *line_base, Rng1U64 line_rng, U64 line_off, DW_Mode mode, DW_SectionArray *sections, DW_CompRoot *unit, U8 address_size, U64 format_count, Rng1U64 *formats, DW_LineFile *line_file_out);
internal U64 dw_read_line_vm_header(Arena *arena, void *line_base, Rng1U64 line_rng, U64 line_off, DW_Mode mode, DW_SectionArray *sections, DW_CompRoot *unit, DW_LineVMHeader *header_out);
#endif // DWARF_PARSE_H
File diff suppressed because it is too large Load Diff
+224
View File
@@ -0,0 +1,224 @@
// Copyright (c) 2024 Epic Games Tools
// Licensed under the MIT license (https://opensource.org/license/mit/)
#ifndef DWARF_UNWIND_H
#define DWARF_UNWIND_H
typedef struct DW_UnwindResult
{
B32 is_invalid;
B32 missed_read;
U64 missed_read_addr;
U64 stack_pointer;
} DW_UnwindResult;
// EH: Exception Frames
typedef U8 DW_EhPtrEnc;
enum
{
DW_EhPtrEnc_TypeMask = 0x0F,
DW_EhPtrEnc_Ptr = 0x00, // Pointer sized unsigned value
DW_EhPtrEnc_ULEB128 = 0x01, // Unsigned LE base-128 value
DW_EhPtrEnc_UData2 = 0x02, // Unsigned 16-bit value
DW_EhPtrEnc_UData4 = 0x03, // Unsigned 32-bit value
DW_EhPtrEnc_UData8 = 0x04, // Unsigned 64-bit value
DW_EhPtrEnc_Signed = 0x08, // Signed pointer
DW_EhPtrEnc_SLEB128 = 0x09, // Signed LE base-128 value
DW_EhPtrEnc_SData2 = 0x0A, // Signed 16-bit value
DW_EhPtrEnc_SData4 = 0x0B, // Signed 32-bit value
DW_EhPtrEnc_SData8 = 0x0C, // Signed 64-bit value
};
enum
{
DW_EhPtrEnc_ModifyMask = 0x70,
DW_EhPtrEnc_PcRel = 0x10, // Value is relative to the current program counter.
DW_EhPtrEnc_TextRel = 0x20, // Value is relative to the .text section.
DW_EhPtrEnc_DataRel = 0x30, // Value is relative to the .got or .eh_frame_hdr section.
DW_EhPtrEnc_FuncRel = 0x40, // Value is relative to the function.
DW_EhPtrEnc_Aligned = 0x50, // Value is aligned to an address unit sized boundary.
};
enum
{
DW_EhPtrEnc_Indirect = 0x80, // This flag indicates that value is stored in virtual memory.
DW_EhPtrEnc_Omit = 0xFF,
};
typedef struct DW_EhPtrCtx
{
U64 raw_base_vaddr; // address where pointer is being read
U64 text_vaddr; // base address of section with instructions (used for encoding pointer on SH and IA64)
U64 data_vaddr; // base address of data section (used for encoding pointer on x86-64)
U64 func_vaddr; // base address of function where IP is located
} DW_EhPtrCtx;
// CIE: Common Information Entry
typedef struct DW_CIEUnpacked
{
U8 version;
DW_EhPtrEnc lsda_encoding;
DW_EhPtrEnc addr_encoding;
B32 has_augmentation_size;
U64 augmentation_size;
String8 augmentation;
U64 code_align_factor;
S64 data_align_factor;
U64 ret_addr_reg;
U64 handler_ip;
Rng1U64 cfi_range;
} DW_CIEUnpacked;
typedef struct DW_CIEUnpackedNode
{
struct DW_CIEUnpackedNode *next;
DW_CIEUnpacked cie;
U64 offset;
} DW_CIEUnpackedNode;
// FDE: Frame Description Entry
typedef struct DW_FDEUnpacked
{
Rng1U64 ip_voff_range;
U64 lsda_ip;
Rng1U64 cfi_range;
} DW_FDEUnpacked;
// CFI: Call Frame Information
typedef struct DW_CFIRecords
{
B32 valid;
DW_CIEUnpacked cie;
DW_FDEUnpacked fde;
} DW_CFIRecords;
typedef enum DW_CFICFARule{
DW_CFI_CFA_Rule_RegOff,
DW_CFI_CFA_Rule_Expr,
} DW_CFICFARule;
typedef struct DW_CFICFACell
{
DW_CFICFARule rule;
union {
struct {
U64 reg_idx;
S64 offset;
};
Rng1U64 expr;
};
} DW_CFICFACell;
typedef enum DW_CFIRegisterRule
{
DW_CFIRegisterRule_SameValue,
DW_CFIRegisterRule_Undefined,
DW_CFIRegisterRule_Offset,
DW_CFIRegisterRule_ValOffset,
DW_CFIRegisterRule_Register,
DW_CFIRegisterRule_Expression,
DW_CFIRegisterRule_ValExpression,
} DW_CFIRegisterRule;
typedef struct DW_CFICell
{
DW_CFIRegisterRule rule;
union {
S64 n;
Rng1U64 expr;
};
} DW_CFICell;
typedef struct DW_CFIRow
{
struct DW_CFIRow *next;
DW_CFICell *cells;
DW_CFICFACell cfa_cell;
} DW_CFIRow;
typedef struct DW_CFIMachine
{
U64 cells_per_row;
DW_CIEUnpacked *cie;
DW_EhPtrCtx *ptr_ctx;
DW_CFIRow *initial_row;
U64 fde_ip;
} DW_CFIMachine;
typedef U8 DW_CFADecode;
enum
{
DW_CFADecode_Nop = 0x0,
// 1,2,4,8 reserved for literal byte sizes
DW_CFADecode_Address = 0x9,
DW_CFADecode_ULEB128 = 0xA,
DW_CFADecode_SLEB128 = 0xB,
};
typedef U16 DW_CFAControlBits;
enum
{
DW_CFAControlBits_Dec1Mask = 0x00F,
DW_CFAControlBits_Dec2Mask = 0x0F0,
DW_CFAControlBits_IsReg0 = 0x100,
DW_CFAControlBits_IsReg1 = 0x200,
DW_CFAControlBits_IsReg2 = 0x400,
DW_CFAControlBits_NewRow = 0x800,
};
global read_only DW_CFAControlBits dw_unwind__cfa_control_bits_kind1[DW_CFA_OplKind1 + 1];
global read_only DW_CFAControlBits dw_unwind__cfa_control_bits_kind2[DW_CFA_OplKind2 + 1];
// register codes for unwinding match the DW_RegX64 register codes
#define DW_UNWIND_X64__REG_SLOT_COUNT 17
////////////////////////////////
// x64 Unwind Function
internal DW_UnwindResult
dw_unwind_x64(String8 raw_text,
String8 raw_eh_frame,
String8 raw_eh_frame_header,
Rng1U64 text_vrange,
Rng1U64 eh_frame_vrange,
Rng1U64 eh_frame_header_vrange,
U64 default_image_base,
U64 image_base,
U64 stack_pointer,
DW_RegsX64 *regs,
DW_ReadMemorySig *read_memory,
void *read_memory_ud);
internal DW_UnwindResult dw_unwind_x64__apply_frame_rules(String8 raw_eh_frame, DW_CFIRow *row, U64 text_base_vaddr, DW_ReadMemorySig *read_memory, void *read_memory_ud, U64 stack_pointer, DW_RegsX64 *regs);
////////////////////////////////
// x64 Unwind Helper Functions
internal void dw_unwind_init_x64(void);
internal U64 dw_unwind_parse_pointer_x64(void *base, Rng1U64 range, DW_EhPtrCtx *ptr_ctx, DW_EhPtrEnc ptr_enc, U64 off, U64 *ptr_out);
//- eh_frame parsing
internal void dw_unwind_parse_cie_x64(void *base,Rng1U64 range,DW_EhPtrCtx *ptr_ctx, U64 off, DW_CIEUnpacked *cie_out);
internal void dw_unwind_parse_fde_x64(void *base,Rng1U64 range,DW_EhPtrCtx *ptr_ctx, DW_CIEUnpacked *parent_cie, U64 off, DW_FDEUnpacked *fde_out);
internal DW_CFIRecords dw_unwind_eh_frame_cfi_from_ip_slow_x64(String8 raw_eh_frame, DW_EhPtrCtx *ptr_ctx, U64 ip_voff);
internal DW_CFIRecords dw_unwind_eh_frame_hdr_from_ip_fast_x64(String8 raw_eh_frame, String8 raw_eh_frame_hdr, DW_EhPtrCtx *ptr_ctx, U64 ip_voff);
//- cfi machine
internal DW_CFIMachine dw_unwind_make_machine_x64(U64 cells_per_row, DW_CIEUnpacked *cie, DW_EhPtrCtx *ptr_ctx);
internal void dw_unwind_machine_equip_initial_row_x64(DW_CFIMachine *machine, DW_CFIRow *initial_row);
internal void dw_unwind_machine_equip_fde_ip_x64(DW_CFIMachine *machine, U64 fde_ip);
internal DW_CFIRow* dw_unwind_row_alloc_x64(Arena *arena, U64 cells_per_row);
internal void dw_unwind_row_zero_x64(DW_CFIRow *row, U64 cells_per_row);
internal void dw_unwind_row_copy_x64(DW_CFIRow *dst, DW_CFIRow *src, U64 cells_per_row);
internal B32 dw_unwind_machine_run_to_ip_x64(void *base, Rng1U64 range, DW_CFIMachine *machine, U64 target_ip, DW_CFIRow *row_out);
#endif // DWARF_UNWIND_H