diff --git a/src/base/base_math.c b/src/base/base_math.c index 8f0be22e..2c6201f3 100644 --- a/src/base/base_math.c +++ b/src/base/base_math.c @@ -600,6 +600,25 @@ rng1u64_list_push(Arena *arena, Rng1U64List *list, Rng1U64 rng) list->count += 1; } +internal void +rng1u64_list_concat(Rng1U64List *list, Rng1U64List *to_concat) +{ + if(to_concat->first) + { + if(list->first) + { + list->last->next = to_concat->first; + list->last = to_concat->last; + } + else + { + list->first = to_concat->first; + list->last = to_concat->last; + } + MemoryZeroStruct(to_concat); + } +} + internal Rng1U64Array rng1u64_array_from_list(Arena *arena, Rng1U64List *list) { diff --git a/src/base/base_math.h b/src/base/base_math.h index dc404fb3..b6063ad5 100644 --- a/src/base/base_math.h +++ b/src/base/base_math.h @@ -666,6 +666,7 @@ internal U32 u32_from_rgba(Vec4F32 rgba); //~ rjf: List Type Functions internal void rng1u64_list_push(Arena *arena, Rng1U64List *list, Rng1U64 rng); +internal void rng1u64_list_concat(Rng1U64List *list, Rng1U64List *to_concat); internal Rng1U64Array rng1u64_array_from_list(Arena *arena, Rng1U64List *list); internal void rng1s64_list_push(Arena *arena, Rng1S64List *list, Rng1S64 rng); diff --git a/src/base/base_strings.c b/src/base/base_strings.c index 085e27ae..1e47ac37 100644 --- a/src/base/base_strings.c +++ b/src/base/base_strings.c @@ -2370,3 +2370,67 @@ str8_deserial_read_block(String8 string, U64 off, U64 size, String8 *block_out) *block_out = str8_substr(string, range); return block_out->size; } + +internal U64 +str8_deserial_read_uleb128(String8 string, U64 off, U64 *value_out) +{ + U64 value = 0; + U64 shift = 0; + U64 cursor = off; + for( ;; ++cursor, shift += 7u) + { + U8 byte = 0; + U64 bytes_read = str8_deserial_read_struct(string, cursor, &byte); + if(bytes_read != sizeof(byte)) + { + break; + } + U8 val = byte & 0x7fu; + value |= ((U64)val) << shift; + if((byte & 0x80u) == 0) + { + break; + } + } + if(value_out != 0) + { + *value_out = value; + } + U64 bytes_read = cursor - off; + return bytes_read; +} + +internal U64 +str8_deserial_read_sleb128(String8 string, U64 off, S64 *value_out) +{ + U64 value = 0; + U64 shift = 0; + U64 cursor = off; + for( ;; ++cursor) + { + U8 byte; + U64 bytes_read = str8_deserial_read_struct(string, cursor, &byte); + if(bytes_read != sizeof(byte)) + { + break; + } + U8 val = byte & 0x7fu; + value |= ((U64)val) << shift; + shift += 7u; + if((byte & 0x80u) == 0) + { + if(shift < sizeof(value) * 8 && (byte & 0x40u) != 0) + { + value |= -(S64)(1ull << shift); + } + break; + } + } + if(value_out != 0) + { + *value_out = value; + } + U64 bytes_read = cursor - off; + return bytes_read; +} + diff --git a/src/base/base_strings.h b/src/base/base_strings.h index 1b578b5c..fc946dd8 100644 --- a/src/base/base_strings.h +++ b/src/base/base_strings.h @@ -404,6 +404,8 @@ internal void * str8_deserial_get_raw_ptr(String8 string, U64 off, U64 size); internal U64 str8_deserial_read_cstr(String8 string, U64 off, String8 *cstr_out); internal U64 str8_deserial_read_windows_utf16_string16(String8 string, U64 off, String16 *str_out); internal U64 str8_deserial_read_block(String8 string, U64 off, U64 size, String8 *block_out); +internal U64 str8_deserial_read_uleb128(String8 string, U64 off, U64 *value_out); +internal U64 str8_deserial_read_sleb128(String8 string, U64 off, S64 *value_out); #define str8_deserial_read_array(string, off, ptr, count) str8_deserial_read((string), (off), (ptr), sizeof(*(ptr))*(count), sizeof(*(ptr))) #define str8_deserial_read_struct(string, off, ptr) str8_deserial_read_array(string, off, ptr, 1) diff --git a/src/dwarf/dwarf_enum.h b/src/dwarf/dwarf_enum.h new file mode 100644 index 00000000..6158c409 --- /dev/null +++ b/src/dwarf/dwarf_enum.h @@ -0,0 +1,11 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +internal String8 dw_string_from_expr_op(Arena *arena, DW_ExprOp op); +internal String8 dw_string_from_tag_kind(Arena *arena, DW_TagKind kind); +internal String8 dw_string_from_attrib_kind(Arena *arena, DW_AttribKind kind); +internal String8 dw_string_from_form_kind(Arena *arena, DW_FormKind kind); + +//internal String8 dw_string_from_register(Arena *arena, Arch arch, U64 reg_id); + + diff --git a/src/dwarf/dwarf_expr.c b/src/dwarf/dwarf_expr.c new file mode 100644 index 00000000..094ad78c --- /dev/null +++ b/src/dwarf/dwarf_expr.c @@ -0,0 +1,1433 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +//- analyzers + +internal DW_SimpleLoc +dw_expr__analyze_fast(void *base, Rng1U64 range, U64 text_section_base) +{ + DW_SimpleLoc result = {DW_SimpleLocKind_Empty}; + + U8 op = 0; + if (based_range_read(base, range, 0, 1, &op)) { + // step params + U64 size_param = 0; + B32 is_signed = 0; + + // step + U64 step_cursor = 1; + switch (op) { + + //// literal encodings //// + + case DW_ExprOp_Lit0: case DW_ExprOp_Lit1: case DW_ExprOp_Lit2: + case DW_ExprOp_Lit3: case DW_ExprOp_Lit4: case DW_ExprOp_Lit5: + case DW_ExprOp_Lit6: case DW_ExprOp_Lit7: case DW_ExprOp_Lit8: + case DW_ExprOp_Lit9: case DW_ExprOp_Lit10: case DW_ExprOp_Lit11: + case DW_ExprOp_Lit12: case DW_ExprOp_Lit13: case DW_ExprOp_Lit14: + case DW_ExprOp_Lit15: case DW_ExprOp_Lit16: case DW_ExprOp_Lit17: + case DW_ExprOp_Lit18: case DW_ExprOp_Lit19: case DW_ExprOp_Lit20: + case DW_ExprOp_Lit21: case DW_ExprOp_Lit22: case DW_ExprOp_Lit23: + case DW_ExprOp_Lit24: case DW_ExprOp_Lit25: case DW_ExprOp_Lit26: + case DW_ExprOp_Lit27: case DW_ExprOp_Lit28: case DW_ExprOp_Lit29: + case DW_ExprOp_Lit30: case DW_ExprOp_Lit31: + { + U64 x = op - DW_ExprOp_Lit0; + result.kind = DW_SimpleLocKind_Address; + result.addr = x; + } break; + + case DW_ExprOp_Const1U:size_param = 1; goto const_n; + case DW_ExprOp_Const2U:size_param = 2; goto const_n; + case DW_ExprOp_Const4U:size_param = 4; goto const_n; + case DW_ExprOp_Const8U:size_param = 8; goto const_n; + case DW_ExprOp_Const1S:size_param = 1; is_signed = 1; goto const_n; + case DW_ExprOp_Const2S:size_param = 2; is_signed = 1; goto const_n; + case DW_ExprOp_Const4S:size_param = 4; is_signed = 1; goto const_n; + case DW_ExprOp_Const8S:size_param = 8; is_signed = 1; goto const_n; + const_n: + { + U64 x = 0; + step_cursor += based_range_read(base, range, step_cursor, size_param, &x); + + if (is_signed) { + x = extend_sign64(x, size_param); + } + + result.kind = DW_SimpleLocKind_Address; + result.addr = x; + } break; + + case DW_ExprOp_Addr: + { + U64 offset = 0; + step_cursor += based_range_read(base, range, step_cursor, 8, &offset); + U64 x = text_section_base + offset; + result.kind = DW_SimpleLocKind_Address; + result.addr = x; + } break; + + case DW_ExprOp_ConstU: + { + U64 x = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, &x); + result.kind = DW_SimpleLocKind_Address; + result.addr = x; + } break; + + case DW_ExprOp_ConstS: + { + U64 x = 0; + step_cursor += based_range_read_sleb128(base, range, step_cursor, (S64*)&x); + result.kind = DW_SimpleLocKind_Address; + result.addr = x; + } break; + + + //// register location descriptions //// + + case DW_ExprOp_Reg0: case DW_ExprOp_Reg1: case DW_ExprOp_Reg2: + case DW_ExprOp_Reg3: case DW_ExprOp_Reg4: case DW_ExprOp_Reg5: + case DW_ExprOp_Reg6: case DW_ExprOp_Reg7: case DW_ExprOp_Reg8: + case DW_ExprOp_Reg9: case DW_ExprOp_Reg10: case DW_ExprOp_Reg11: + case DW_ExprOp_Reg12: case DW_ExprOp_Reg13: case DW_ExprOp_Reg14: + case DW_ExprOp_Reg15: case DW_ExprOp_Reg16: case DW_ExprOp_Reg17: + case DW_ExprOp_Reg18: case DW_ExprOp_Reg19: case DW_ExprOp_Reg20: + case DW_ExprOp_Reg21: case DW_ExprOp_Reg22: case DW_ExprOp_Reg23: + case DW_ExprOp_Reg24: case DW_ExprOp_Reg25: case DW_ExprOp_Reg26: + case DW_ExprOp_Reg27: case DW_ExprOp_Reg28: case DW_ExprOp_Reg29: + case DW_ExprOp_Reg30: case DW_ExprOp_Reg31: + { + U64 reg_idx = op - DW_ExprOp_Reg0; + result.kind = DW_SimpleLocKind_Register; + result.reg_idx = reg_idx; + } break; + + case DW_ExprOp_RegX: + { + U64 reg_idx = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, ®_idx); + result.kind = DW_SimpleLocKind_Register; + result.reg_idx = reg_idx; + } break; + + + //// implicit location descriptions //// + + case DW_ExprOp_ImplicitValue: + { + U64 size = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, &size); + if (step_cursor + size <= range.max) { + result.kind = DW_SimpleLocKind_ValueLong; + result.val_long.str = (U8*)base + range.min + step_cursor; + result.val_long.size = size; + } + step_cursor += size; + } break; + + case DW_ExprOp_StackValue: + { + // this op pops from the value stack, so if it comes first the dwarf expression is bad. + result.kind = DW_SimpleLocKind_Fail; + result.fail_kind = DW_LocFailKind_BadData; + } break; + + + //// composite location descriptions //// + + // if the first and only op is a piece, the expression is empty + + case DW_ExprOp_Piece: + { + U64 size = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, &size); + result.kind = DW_SimpleLocKind_Empty; + } break; + + case DW_ExprOp_BitPiece: + { + U64 bit_size = 0, bit_off = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, &bit_size); + step_cursor += based_range_read_uleb128(base, range, step_cursor, &bit_off); + result.kind = DW_SimpleLocKind_Empty; + } break; + + + //// final fallback //// + + default: + { + result.kind = DW_SimpleLocKind_Fail; + result.fail_kind = DW_LocFailKind_TooComplicated; + } break; + } + + // check this was the whole expression + if (range.min + step_cursor < range.max) { + result.kind = DW_SimpleLocKind_Fail; + result.fail_kind = DW_LocFailKind_TooComplicated; + } + } + + return result; +} + +internal DW_ExprAnalysis +dw_expr__analyze_details(void *in_base, Rng1U64 in_range, DW_ExprMachineCallConfig *call_config) +{ + Temp scratch = scratch_begin(0, 0); + + DW_ExprAnalysis result = {0}; + + // are we resolving calls? + B32 has_call_func = (call_config != 0 && call_config->func != 0); + + // tasks + DW_ExprAnalysisTask *unfinished_tasks = 0; + DW_ExprAnalysisTask *finished_tasks = 0; + + // convert range input to string + String8 in_data = str8((U8*)in_base + in_range.min, in_range.max - in_range.min); + + // put input task onto the list + { + DW_ExprAnalysisTask *new_task = push_array(scratch.arena, DW_ExprAnalysisTask, 1); + new_task->p = max_U64; + new_task->data = in_data; + SLLStackPush(unfinished_tasks, new_task); + } + + // state for checking implicit locations + B32 last_was_implicit_loc = 0; + + // task loop + for (;;) { + // get next task to handle + DW_ExprAnalysisTask *task = unfinished_tasks; + if (task == 0) { + break; + } + + String8 task_data = task->data; + U8 *task_base = task_data.str; + Rng1U64 task_range = rng_1u64(0, task_data.size); + + // move the task to finished now + SLLStackPop(unfinished_tasks); + SLLStackPush(finished_tasks, task); + + // analysis loop + for (U64 cursor = 0;;) { + // decode op + U64 op_offset = cursor; + U8 op = 0; + if (based_range_read(task_base, task_range, op_offset, 1, &op)) { + U64 after_op_off = cursor + 1; + + // require piece op after 'implicit' location descriptions + if (last_was_implicit_loc) { + if (op != DW_ExprOp_Piece && op != DW_ExprOp_BitPiece) { + result.flags |= DW_ExprFlag_BadData; + goto finish; + } + } + + // step params + U64 size_param = 0; + B32 is_signed = 0; + + // step + U64 step_cursor = after_op_off; + switch (op) { + + //// literal encodings //// + + case DW_ExprOp_Lit0: case DW_ExprOp_Lit1: case DW_ExprOp_Lit2: + case DW_ExprOp_Lit3: case DW_ExprOp_Lit4: case DW_ExprOp_Lit5: + case DW_ExprOp_Lit6: case DW_ExprOp_Lit7: case DW_ExprOp_Lit8: + case DW_ExprOp_Lit9: case DW_ExprOp_Lit10: case DW_ExprOp_Lit11: + case DW_ExprOp_Lit12: case DW_ExprOp_Lit13: case DW_ExprOp_Lit14: + case DW_ExprOp_Lit15: case DW_ExprOp_Lit16: case DW_ExprOp_Lit17: + case DW_ExprOp_Lit18: case DW_ExprOp_Lit19: case DW_ExprOp_Lit20: + case DW_ExprOp_Lit21: case DW_ExprOp_Lit22: case DW_ExprOp_Lit23: + case DW_ExprOp_Lit24: case DW_ExprOp_Lit25: case DW_ExprOp_Lit26: + case DW_ExprOp_Lit27: case DW_ExprOp_Lit28: case DW_ExprOp_Lit29: + case DW_ExprOp_Lit30: case DW_ExprOp_Lit31: + break; + + case DW_ExprOp_Const1U:size_param = 1; goto const_n; + case DW_ExprOp_Const2U:size_param = 2; goto const_n; + case DW_ExprOp_Const4U:size_param = 4; goto const_n; + case DW_ExprOp_Const8U:size_param = 8; goto const_n; + case DW_ExprOp_Const1S:size_param = 1; is_signed = 1; goto const_n; + case DW_ExprOp_Const2S:size_param = 2; is_signed = 1; goto const_n; + case DW_ExprOp_Const4S:size_param = 4; is_signed = 1; goto const_n; + case DW_ExprOp_Const8S:size_param = 8; is_signed = 1; goto const_n; + const_n: + { + U64 x = 0; + step_cursor += based_range_read(task_base, task_range, step_cursor, size_param, &x); + } break; + + case DW_ExprOp_Addr: + { + U64 offset = 0; + step_cursor += based_range_read(task_base, task_range, step_cursor, 8, &offset); + result.flags |= DW_ExprFlag_UsesTextBase; + } break; + + case DW_ExprOp_ConstU: + { + U64 x = 0; + step_cursor += based_range_read_uleb128(task_base, task_range, step_cursor, &x); + } break; + + case DW_ExprOp_ConstS: + { + U64 x = 0; + step_cursor += based_range_read_sleb128(task_base, task_range, step_cursor, (S64*)&x); + } break; + + + //// register based addressing //// + + case DW_ExprOp_FBReg: + { + S64 offset = 0; + step_cursor += based_range_read_sleb128(task_base, task_range, step_cursor, &offset); + result.flags |= DW_ExprFlag_UsesFrameBase; + } break; + + case DW_ExprOp_BReg0: case DW_ExprOp_BReg1: case DW_ExprOp_BReg2: + case DW_ExprOp_BReg3: case DW_ExprOp_BReg4: case DW_ExprOp_BReg5: + case DW_ExprOp_BReg6: case DW_ExprOp_BReg7: case DW_ExprOp_BReg8: + case DW_ExprOp_BReg9: case DW_ExprOp_BReg10: case DW_ExprOp_BReg11: + case DW_ExprOp_BReg12: case DW_ExprOp_BReg13: case DW_ExprOp_BReg14: + case DW_ExprOp_BReg15: case DW_ExprOp_BReg16: case DW_ExprOp_BReg17: + case DW_ExprOp_BReg18: case DW_ExprOp_BReg19: case DW_ExprOp_BReg20: + case DW_ExprOp_BReg21: case DW_ExprOp_BReg22: case DW_ExprOp_BReg23: + case DW_ExprOp_BReg24: case DW_ExprOp_BReg25: case DW_ExprOp_BReg26: + case DW_ExprOp_BReg27: case DW_ExprOp_BReg28: case DW_ExprOp_BReg29: + case DW_ExprOp_BReg30: case DW_ExprOp_BReg31: + { + S64 offset = 0; + step_cursor += based_range_read_sleb128(task_base, task_range, step_cursor, &offset); + result.flags |= DW_ExprFlag_UsesRegisters; + } break; + + case DW_ExprOp_BRegX: + { + U64 reg_idx = 0; S64 offset = 0; + step_cursor += based_range_read_uleb128(task_base, task_range, step_cursor, ®_idx); + step_cursor += based_range_read_sleb128(task_base, task_range, step_cursor, &offset); + result.flags |= DW_ExprFlag_UsesRegisters; + } break; + + + //// stack operations //// + + case DW_ExprOp_Dup: + case DW_ExprOp_Drop: + break; + + case DW_ExprOp_Pick: + { + U64 idx = 0; + step_cursor += based_range_read(task_base, task_range, step_cursor, 1, &idx); + } break; + + case DW_ExprOp_Over: + case DW_ExprOp_Swap: + case DW_ExprOp_Rot: + break; + + case DW_ExprOp_Deref: + { + result.flags |= DW_ExprFlag_UsesMemory; + } break; + + case DW_ExprOp_DerefSize: + { + U64 size = 0; + step_cursor += based_range_read(task_base, task_range, step_cursor, 1, &size); + result.flags |= DW_ExprFlag_UsesMemory; + } break; + + case DW_ExprOp_XDeref: + case DW_ExprOp_XDerefSize: + { + result.flags |= DW_ExprFlag_NotSupported; + } goto finish; + + case DW_ExprOp_PushObjectAddress: + { + result.flags |= DW_ExprFlag_UsesObjectAddress; + } break; + + case DW_ExprOp_GNU_PushTlsAddress: + case DW_ExprOp_FormTlsAddress: + { + result.flags |= DW_ExprFlag_UsesTLSAddress; + } break; + + case DW_ExprOp_CallFrameCfa: + { + result.flags |= DW_ExprFlag_UsesCFA; + } break; + + + //// arithmetic and logical operations //// + + case DW_ExprOp_Abs: + case DW_ExprOp_And: + case DW_ExprOp_Div: + case DW_ExprOp_Minus: + case DW_ExprOp_Mod: + case DW_ExprOp_Mul: + case DW_ExprOp_Neg: + case DW_ExprOp_Not: + case DW_ExprOp_Or: + case DW_ExprOp_Plus: + break; + + case DW_ExprOp_PlusUConst: + { + U64 y = 0; + step_cursor += based_range_read_uleb128(task_base, task_range, step_cursor, &y); + } break; + + case DW_ExprOp_Shl: + case DW_ExprOp_Shr: + case DW_ExprOp_Shra: + case DW_ExprOp_Xor: + break; + + + //// control flow operations //// + + case DW_ExprOp_Le: + case DW_ExprOp_Ge: + case DW_ExprOp_Eq: + case DW_ExprOp_Lt: + case DW_ExprOp_Gt: + case DW_ExprOp_Ne: + break; + + case DW_ExprOp_Skip: + case DW_ExprOp_Bra: + { + S16 d = 0; + step_cursor += based_range_read(task_base, task_range, step_cursor, 2, &d); + result.flags |= DW_ExprFlag_NonLinearFlow; + } break; + + case DW_ExprOp_Call2:size_param = 2; goto callN; + case DW_ExprOp_Call4:size_param = 4; goto callN; + callN: + { + U64 p = 0; + step_cursor += based_range_read(task_base, task_range, step_cursor, size_param, &p); + result.flags |= DW_ExprFlag_UsesCallResolution|DW_ExprFlag_NonLinearFlow; + + // add to task list + if (has_call_func) { + DW_ExprAnalysisTask *existing = dw_expr__analysis_task_from_p(unfinished_tasks, p); + if (existing == 0) { + existing = dw_expr__analysis_task_from_p(finished_tasks, p);; + } + if (existing == 0) { + DW_ExprAnalysisTask *new_task = push_array(scratch.arena, DW_ExprAnalysisTask, 1); + new_task->p = p; + new_task->data = call_config->func(call_config->user_ptr, p); + SLLStackPush(unfinished_tasks, new_task); + } + } + } break; + + case DW_ExprOp_CallRef: + { + result.flags |= DW_ExprFlag_NotSupported; + } goto finish; + + + //// special operations //// + + case DW_ExprOp_Nop:break; + + + //// register location descriptions //// + + case DW_ExprOp_Reg0: case DW_ExprOp_Reg1: case DW_ExprOp_Reg2: + case DW_ExprOp_Reg3: case DW_ExprOp_Reg4: case DW_ExprOp_Reg5: + case DW_ExprOp_Reg6: case DW_ExprOp_Reg7: case DW_ExprOp_Reg8: + case DW_ExprOp_Reg9: case DW_ExprOp_Reg10: case DW_ExprOp_Reg11: + case DW_ExprOp_Reg12: case DW_ExprOp_Reg13: case DW_ExprOp_Reg14: + case DW_ExprOp_Reg15: case DW_ExprOp_Reg16: case DW_ExprOp_Reg17: + case DW_ExprOp_Reg18: case DW_ExprOp_Reg19: case DW_ExprOp_Reg20: + case DW_ExprOp_Reg21: case DW_ExprOp_Reg22: case DW_ExprOp_Reg23: + case DW_ExprOp_Reg24: case DW_ExprOp_Reg25: case DW_ExprOp_Reg26: + case DW_ExprOp_Reg27: case DW_ExprOp_Reg28: case DW_ExprOp_Reg29: + case DW_ExprOp_Reg30: case DW_ExprOp_Reg31: + { + last_was_implicit_loc = 1; + } break; + + case DW_ExprOp_RegX: + { + U64 reg_idx = 0; + step_cursor += based_range_read(task_base, task_range, step_cursor, size_param, ®_idx); + last_was_implicit_loc = 1; + } break; + + + //// implicit location descriptions //// + + case DW_ExprOp_ImplicitValue: + { + U64 size = 0; + step_cursor += based_range_read(task_base, task_range, step_cursor, size_param, &size); + if (step_cursor + size > task_range.max) { + result.flags |= DW_ExprFlag_BadData; + goto finish; + } + step_cursor += size; + last_was_implicit_loc = 1; + } break; + + case DW_ExprOp_StackValue: + { + last_was_implicit_loc = 1; + } break; + + + //// composite location descriptions //// + + case DW_ExprOp_Piece: + { + U64 size = 0; + step_cursor += based_range_read_uleb128(task_base, task_range, step_cursor, &size); + result.flags |= DW_ExprFlag_UsesComposite; + + last_was_implicit_loc = 0; + } break; + + case DW_ExprOp_BitPiece: + { + U64 bit_size = 0; U64 bit_off = 0; + step_cursor += based_range_read_uleb128(task_base, task_range, step_cursor, &bit_size); + step_cursor += based_range_read_uleb128(task_base, task_range, step_cursor, &bit_off); + result.flags |= DW_ExprFlag_UsesComposite; + + last_was_implicit_loc = 0; + } break; + + + //// final fallback //// + + default: + { + result.flags |= DW_ExprFlag_NotSupported; + } goto finish; + } + + // increment cursor + cursor = step_cursor; + } + + // check for end of task + if (cursor < task_data.size) { + goto finish_task; + } + } + + finish_task:; + } + finish:; + + scratch_end(scratch); + return result; +} + +//- full eval + +internal DW_Location +dw_expr__eval(Arena *arena_optional, void *expr_base, Rng1U64 expr_range, DW_ExprMachineConfig *config) +{ + Temp scratch = scratch_begin(&arena_optional, 1); + + DW_Location result = {0}; + + // setup stack + DW_ExprStack stack = dw_expr__stack_make(scratch.arena); + + // adjust expr range + void *expr_ptr = (U8*)expr_base + expr_range.min; + U64 expr_size = expr_range.max - expr_range.min; + + // setup call stack + DW_ExprCallStack call_stack = {0}; + dw_expr__call_push(scratch.arena, &call_stack, expr_ptr, expr_size); + + // state variables + DW_SimpleLoc stashed_loc = {DW_SimpleLocKind_Address}; + + // run loop + U64 max_step_count = config->max_step_count; + U64 step_counter = 0; + for (;;) { + // check top of stack + DW_ExprCall *call = dw_expr__call_top(&call_stack); + if (call == 0) { + goto finish; + } + + // grab top of stack details + void *base = call->ptr; + Rng1U64 range = rng_1u64(0, call->size); + U64 cursor = call->cursor; + + // decode op + U64 op_offset = cursor; + U8 op = 0; + if (based_range_read(base, range, op_offset, 1, &op)) { + U64 after_op_off = cursor + 1; + + // require piece op after 'implicit' location descriptions + if (stashed_loc.kind != DW_SimpleLocKind_Address) { + if (op != DW_ExprOp_Piece && op != DW_ExprOp_BitPiece) { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_BadData; + goto finish; + } + } + + // step params + U64 size_param = 0; + B32 is_signed = 0; + + // step + U64 step_cursor = after_op_off; + switch (op) { + + //// literal encodings //// + + case DW_ExprOp_Lit0: case DW_ExprOp_Lit1: case DW_ExprOp_Lit2: + case DW_ExprOp_Lit3: case DW_ExprOp_Lit4: case DW_ExprOp_Lit5: + case DW_ExprOp_Lit6: case DW_ExprOp_Lit7: case DW_ExprOp_Lit8: + case DW_ExprOp_Lit9: case DW_ExprOp_Lit10: case DW_ExprOp_Lit11: + case DW_ExprOp_Lit12: case DW_ExprOp_Lit13: case DW_ExprOp_Lit14: + case DW_ExprOp_Lit15: case DW_ExprOp_Lit16: case DW_ExprOp_Lit17: + case DW_ExprOp_Lit18: case DW_ExprOp_Lit19: case DW_ExprOp_Lit20: + case DW_ExprOp_Lit21: case DW_ExprOp_Lit22: case DW_ExprOp_Lit23: + case DW_ExprOp_Lit24: case DW_ExprOp_Lit25: case DW_ExprOp_Lit26: + case DW_ExprOp_Lit27: case DW_ExprOp_Lit28: case DW_ExprOp_Lit29: + case DW_ExprOp_Lit30: case DW_ExprOp_Lit31: + { + U64 x = op - DW_ExprOp_Lit0; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Const1U:size_param = 1; goto const_n; + case DW_ExprOp_Const2U:size_param = 2; goto const_n; + case DW_ExprOp_Const4U:size_param = 4; goto const_n; + case DW_ExprOp_Const8U:size_param = 8; goto const_n; + case DW_ExprOp_Const1S:size_param = 1; is_signed = 1; goto const_n; + case DW_ExprOp_Const2S:size_param = 2; is_signed = 1; goto const_n; + case DW_ExprOp_Const4S:size_param = 4; is_signed = 1; goto const_n; + case DW_ExprOp_Const8S:size_param = 8; is_signed = 1; goto const_n; + const_n: + { + U64 x = 0; + step_cursor += based_range_read(base, range, step_cursor, size_param, &x); + if (is_signed) { + x = extend_sign64(x, size_param); + } + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Addr: + { + U64 offset = 0; + step_cursor += based_range_read(base, range, step_cursor, 8, &offset); + + // earlier versions of GCC emit TLS offset with DW_ExprOp_Addr. + B32 is_text_relative; + { + U8 next_op = 0; + based_range_read_struct(base, range, step_cursor, &next_op); + is_text_relative = (next_op != DW_ExprOp_GNU_PushTlsAddress); + } + + U64 addr = offset; + + if (is_text_relative) { + if (config->text_section_base != 0) { + addr += *config->text_section_base; + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingTextBase; + goto finish; + } + } + + dw_expr__stack_push(scratch.arena, &stack, addr); + } break; + + case DW_ExprOp_ConstU: + { + U64 x = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, &x); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_ConstS: + { + U64 x = 0; + step_cursor += based_range_read_sleb128(base, range, step_cursor, (S64*)&x); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + + //// register based addressing //// + + case DW_ExprOp_FBReg: + { + S64 offset = 0; + step_cursor += based_range_read_sleb128(base, range, step_cursor, &offset); + if (config->frame_base != 0) { + U64 x = *config->frame_base + offset; + dw_expr__stack_push(scratch.arena, &stack, x); + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingFrameBase; + goto finish; + } + } break; + + case DW_ExprOp_BReg0: case DW_ExprOp_BReg1: case DW_ExprOp_BReg2: + case DW_ExprOp_BReg3: case DW_ExprOp_BReg4: case DW_ExprOp_BReg5: + case DW_ExprOp_BReg6: case DW_ExprOp_BReg7: case DW_ExprOp_BReg8: + case DW_ExprOp_BReg9: case DW_ExprOp_BReg10: case DW_ExprOp_BReg11: + case DW_ExprOp_BReg12: case DW_ExprOp_BReg13: case DW_ExprOp_BReg14: + case DW_ExprOp_BReg15: case DW_ExprOp_BReg16: case DW_ExprOp_BReg17: + case DW_ExprOp_BReg18: case DW_ExprOp_BReg19: case DW_ExprOp_BReg20: + case DW_ExprOp_BReg21: case DW_ExprOp_BReg22: case DW_ExprOp_BReg23: + case DW_ExprOp_BReg24: case DW_ExprOp_BReg25: case DW_ExprOp_BReg26: + case DW_ExprOp_BReg27: case DW_ExprOp_BReg28: case DW_ExprOp_BReg29: + case DW_ExprOp_BReg30: case DW_ExprOp_BReg31: + { + S64 offset = 0; + step_cursor += based_range_read_sleb128(base, range, step_cursor, &offset); + U64 reg_idx = op - DW_ExprOp_BReg0; + DW_RegsX64 *regs = config->regs; + if (regs != 0) { + if (reg_idx < ArrayCount(regs->r)) { + U64 x = regs->r[reg_idx] + offset; + dw_expr__stack_push(scratch.arena, &stack, x); + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_BadData; + stashed_loc.fail_data = op_offset; + goto finish; + } + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingRegisters; + goto finish; + } + } break; + + case DW_ExprOp_BRegX: + { + U64 reg_idx = 0; S64 offset = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, ®_idx); + step_cursor += based_range_read_sleb128(base, range, step_cursor, &offset); + + DW_RegsX64 *regs = config->regs; + if (regs != 0) { + if (reg_idx < ArrayCount(regs->r)) { + U64 x = regs->r[reg_idx] + offset; + dw_expr__stack_push(scratch.arena, &stack, x); + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_BadData; + stashed_loc.fail_data = op_offset; + goto finish; + } + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingRegisters; + goto finish; + } + } break; + + + //// stack operations //// + + case DW_ExprOp_Dup: + { + U64 x = dw_expr__stack_pick(&stack, 0); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Drop: + { + dw_expr__stack_pop(&stack); + } break; + + case DW_ExprOp_Pick: + { + U64 idx = 0; + step_cursor += based_range_read(base, range, step_cursor, 1, &idx); + U64 x = dw_expr__stack_pick(&stack, idx); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Over: + { + U64 x = dw_expr__stack_pick(&stack, 1); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Swap: + { + U64 a = dw_expr__stack_pop(&stack); + U64 b = dw_expr__stack_pop(&stack); + dw_expr__stack_push(scratch.arena, &stack, b); + dw_expr__stack_push(scratch.arena, &stack, a); + } break; + + case DW_ExprOp_Rot: + { + U64 a = dw_expr__stack_pop(&stack); + U64 b = dw_expr__stack_pop(&stack); + U64 c = dw_expr__stack_pop(&stack); + dw_expr__stack_push(scratch.arena, &stack, a); + dw_expr__stack_push(scratch.arena, &stack, c); + dw_expr__stack_push(scratch.arena, &stack, b); + } break; + + case DW_ExprOp_Deref: + { + U64 addr = dw_expr__stack_pop(&stack); + + B32 read_success = 0; + if (config->read_memory) { + U64 x = 0; + if (config->read_memory(addr, sizeof(x), &x, config->read_memory_ud) == sizeof(x)) { + dw_expr__stack_push(scratch.arena, &stack, x); + read_success = 1; + } + } + + if (!read_success) { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingMemory; + stashed_loc.fail_data = addr; + goto finish; + } + } break; + + case DW_ExprOp_DerefSize: + { + U64 raw_size = 0; + step_cursor += based_range_read(base, range, step_cursor, 1, &raw_size); + + U64 size = ClampTop(raw_size, 8); + U64 addr = dw_expr__stack_pop(&stack); + + B32 read_success = 0; + if (config->read_memory) { + U64 x = 0; + if (config->read_memory(addr, size, &x, config->read_memory_ud) == sizeof(x)) { + dw_expr__stack_push(scratch.arena, &stack, x); + read_success = 1; + } + } + if (!read_success) { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingMemory; + stashed_loc.fail_data = addr; + goto finish; + } + } break; + + case DW_ExprOp_XDeref: + case DW_ExprOp_XDerefSize: + { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_NotSupported; + goto finish; + } break; + + case DW_ExprOp_PushObjectAddress: + { + if (config->object_address != 0) { + U64 x = *config->object_address; + dw_expr__stack_push(scratch.arena, &stack, x); + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingObjectAddress; + goto finish; + } + } break; + + // NOTE: pop offset from stack, convert it to TLS address, then push it back. + case DW_ExprOp_GNU_PushTlsAddress: + case DW_ExprOp_FormTlsAddress: + { + S64 s = (S64)dw_expr__stack_pop(&stack); + + if (config->tls_address != 0) { + U64 x = *config->tls_address + s; + dw_expr__stack_push(scratch.arena, &stack, x); + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingTLSAddress; + goto finish; + } + } break; + + case DW_ExprOp_CallFrameCfa: + { + if (config->cfa != 0) { + U64 x = *config->cfa; + dw_expr__stack_push(scratch.arena, &stack, x); + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingCFA; + goto finish; + } + } break; + + + //// arithmetic and logical operations //// + + case DW_ExprOp_Abs: + { + S64 s = (S64)dw_expr__stack_pop(&stack); + S64 x = abs_s64(s); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_And: + { + U64 x = dw_expr__stack_pop(&stack); + U64 y = dw_expr__stack_pop(&stack); + dw_expr__stack_push(scratch.arena, &stack, x&y); + } break; + + case DW_ExprOp_Div: + { + S64 d = (S64)dw_expr__stack_pop(&stack); + S64 n = (S64)dw_expr__stack_pop(&stack); + S64 x = (d == 0)?0:n/d; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Minus: + { + U64 b = dw_expr__stack_pop(&stack); + U64 a = dw_expr__stack_pop(&stack); + U64 x = a - b; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Mod: + { + S64 d = (S64)dw_expr__stack_pop(&stack); + S64 n = (S64)dw_expr__stack_pop(&stack); + S64 x = (d == 0)?0:n%d; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Mul: + { + U64 b = dw_expr__stack_pop(&stack); + U64 a = dw_expr__stack_pop(&stack); + U64 x = a*b; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Neg: + { + S64 s = (S64)dw_expr__stack_pop(&stack); + S64 x = -s; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Not: + { + U64 y = dw_expr__stack_pop(&stack); + U64 x = ~y; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Or: + { + U64 y = dw_expr__stack_pop(&stack); + U64 z = dw_expr__stack_pop(&stack); + U64 x = y | z; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Plus: + { + U64 y = dw_expr__stack_pop(&stack); + U64 z = dw_expr__stack_pop(&stack); + U64 x = y + z; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_PlusUConst: + { + U64 y = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, &y); + U64 z = dw_expr__stack_pop(&stack); + U64 x = y + z; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Shl: + { + U64 y = dw_expr__stack_pop(&stack); + U64 z = dw_expr__stack_pop(&stack); + U64 x = 0; + if (y < 64) { + x = z << y; + } + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Shr: + { + U64 y = dw_expr__stack_pop(&stack); + U64 z = dw_expr__stack_pop(&stack); + U64 x = 0; + if (y < 64) { + x = z >> y; + } + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Shra: + { + U64 y = dw_expr__stack_pop(&stack); + U64 z = dw_expr__stack_pop(&stack); + U64 x = 0; + if (y < 64) { + x = z >> y; + // sign extensions + if (y > 0 && (z & (1ull << 63))) { + x |= ~((1 << (64 - y)) - 1); + } + } + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Xor: + { + U64 y = dw_expr__stack_pop(&stack); + U64 z = dw_expr__stack_pop(&stack); + U64 x = y ^ z; + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + + //// control flow operations //// + + case DW_ExprOp_Le: + { + S64 b = (S64)dw_expr__stack_pop(&stack); + S64 a = (S64)dw_expr__stack_pop(&stack); + U64 x = (a <= b); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Ge: + { + S64 b = (S64)dw_expr__stack_pop(&stack); + S64 a = (S64)dw_expr__stack_pop(&stack); + U64 x = (a >= b); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Eq: + { + S64 b = (S64)dw_expr__stack_pop(&stack); + S64 a = (S64)dw_expr__stack_pop(&stack); + U64 x = (a == b); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Lt: + { + S64 b = (S64)dw_expr__stack_pop(&stack); + S64 a = (S64)dw_expr__stack_pop(&stack); + U64 x = (a < b); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Gt: + { + S64 b = (S64)dw_expr__stack_pop(&stack); + S64 a = (S64)dw_expr__stack_pop(&stack); + U64 x = (a > b); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Ne: + { + S64 b = (S64)dw_expr__stack_pop(&stack); + S64 a = (S64)dw_expr__stack_pop(&stack); + U64 x = (a != b); + dw_expr__stack_push(scratch.arena, &stack, x); + } break; + + case DW_ExprOp_Skip: + { + S16 d = 0; + step_cursor += based_range_read(base, range, step_cursor, 2, &d); + step_cursor = step_cursor + d; + } break; + + case DW_ExprOp_Bra: + { + S16 d = 0; + step_cursor += based_range_read(base, range, step_cursor, 2, &d); + U64 b = dw_expr__stack_pop(&stack); + if (b != 0) { + step_cursor = step_cursor + d; + } + } break; + + case DW_ExprOp_Call2: + { + U16 p = 0; + step_cursor += based_range_read(base, range, step_cursor, 2, &p); + if (config->call.func != 0) { + String8 sub_data = config->call.func(config->call.user_ptr, p); + dw_expr__call_push(scratch.arena, &call_stack, sub_data.str, sub_data.size); + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingCallResolution; + goto finish; + } + } break; + + case DW_ExprOp_Call4: + { + U32 p = 0; + step_cursor += based_range_read(base, range, step_cursor, 4, &p); + if (config->call.func != 0) { + String8 sub_data = config->call.func(config->call.user_ptr, p); + dw_expr__call_push(scratch.arena, &call_stack, sub_data.str, sub_data.size); + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingCallResolution; + goto finish; + } + } break; + + case DW_ExprOp_CallRef: + { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_NotSupported; + goto finish; + } break; + + + //// special operations //// + + case DW_ExprOp_Nop:break; + + + //// register location descriptions //// + + case DW_ExprOp_Reg0: case DW_ExprOp_Reg1: case DW_ExprOp_Reg2: + case DW_ExprOp_Reg3: case DW_ExprOp_Reg4: case DW_ExprOp_Reg5: + case DW_ExprOp_Reg6: case DW_ExprOp_Reg7: case DW_ExprOp_Reg8: + case DW_ExprOp_Reg9: case DW_ExprOp_Reg10: case DW_ExprOp_Reg11: + case DW_ExprOp_Reg12: case DW_ExprOp_Reg13: case DW_ExprOp_Reg14: + case DW_ExprOp_Reg15: case DW_ExprOp_Reg16: case DW_ExprOp_Reg17: + case DW_ExprOp_Reg18: case DW_ExprOp_Reg19: case DW_ExprOp_Reg20: + case DW_ExprOp_Reg21: case DW_ExprOp_Reg22: case DW_ExprOp_Reg23: + case DW_ExprOp_Reg24: case DW_ExprOp_Reg25: case DW_ExprOp_Reg26: + case DW_ExprOp_Reg27: case DW_ExprOp_Reg28: case DW_ExprOp_Reg29: + case DW_ExprOp_Reg30: case DW_ExprOp_Reg31: + { + U64 reg_idx = op - DW_ExprOp_Reg0; + stashed_loc.kind = DW_SimpleLocKind_Register; + stashed_loc.reg_idx = reg_idx; + } break; + + case DW_ExprOp_RegX: + { + U64 reg_idx = 0; + step_cursor += based_range_read(base, range, step_cursor, size_param, ®_idx); + stashed_loc.kind = DW_SimpleLocKind_Register; + stashed_loc.reg_idx = reg_idx; + } break; + + + //// implicit location descriptions //// + + case DW_ExprOp_ImplicitValue: + { + U64 size = 0; + step_cursor += based_range_read(base, range, step_cursor, size_param, &size); + if (step_cursor + size <= range.max) { + void *data = (U8*)base + range.min + step_cursor; + stashed_loc.kind = DW_SimpleLocKind_ValueLong; + stashed_loc.val_long.str = (U8*)data; + stashed_loc.val_long.size = size; + } else { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_BadData; + goto finish; + } + step_cursor += size; + } break; + + case DW_ExprOp_StackValue: + { + U64 x = dw_expr__stack_pop(&stack); + stashed_loc.kind = DW_SimpleLocKind_Value; + stashed_loc.val = x; + } break; + + + //// composite location descriptions //// + + case DW_ExprOp_Piece: + case DW_ExprOp_BitPiece: + { + if (arena_optional == 0) { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_MissingArenaForComposite; + goto finish; + } else { + // determine this piece's size & offset + U64 bit_size = 0; + U64 bit_off = 0; + B32 is_bit_loc = 0; + switch (op) { + case DW_ExprOp_Piece: + { + U64 size = 0; + step_cursor += based_range_read_uleb128(base, range, step_cursor, &size); + bit_size = size*8; + } break; + case DW_ExprOp_BitPiece: + { + step_cursor += based_range_read_uleb128(base, range, step_cursor, &bit_size); + step_cursor += based_range_read_uleb128(base, range, step_cursor, &bit_off); + is_bit_loc = 1; + } break; + } + + // determine this piece's location information + DW_SimpleLoc piece_loc = stashed_loc; + if (piece_loc.kind == DW_SimpleLocKind_Address) { + if (dw_expr__stack_is_empty(&stack)) { + piece_loc.kind = DW_SimpleLocKind_Empty; + } else { + U64 x = dw_expr__stack_pop(&stack); + piece_loc.addr = x; + } + } + + // push the piece + DW_Piece *piece = push_array(arena_optional, DW_Piece, 1); + SLLQueuePush(result.first_piece, result.last_piece, piece); + piece->loc = piece_loc; + piece->bit_size = bit_size; + piece->bit_off = bit_off; + piece->is_bit_loc = is_bit_loc; + + // zero the stached loc + MemoryZeroStruct(&stashed_loc); + } + } break; + + + //// final fallback //// + + default: + { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_NotSupported; + goto finish; + } break; + } + + // increment cursor + cursor = step_cursor; + } + + // advance cursor or finish call + if (cursor < call->size) { + call->cursor = cursor; + } else { + dw_expr__call_pop(&call_stack); + } + + // advance step counter + step_counter += 1; + if (step_counter == max_step_count) { + stashed_loc.kind = DW_SimpleLocKind_Fail; + stashed_loc.fail_kind = DW_LocFailKind_TimeOut; + goto finish; + } + } + + finish:; + + // non-piece location + { + DW_SimpleLoc loc = stashed_loc; + if (result.first_piece == 0) { + + // normal location resolution + loc = stashed_loc; + if (loc.kind == DW_SimpleLocKind_Address) { + if (dw_expr__stack_is_empty(&stack)) { + loc.kind = DW_SimpleLocKind_Empty; + } else { + U64 x = dw_expr__stack_pop(&stack); + loc.addr = x; + } + } + } + // non-piece location resolution after composite + else { + + // change the default kind to empty + if (loc.kind == DW_SimpleLocKind_Address) { + loc.kind = DW_SimpleLocKind_Empty; + } + + // the non-piece should either be empty or fail + if (loc.kind != DW_SimpleLocKind_Empty && + loc.kind != DW_SimpleLocKind_Fail) { + loc.kind = DW_SimpleLocKind_Fail; + loc.fail_kind = DW_LocFailKind_BadData; + } + } + + result.non_piece_loc = loc; + } + + // clear stack + scratch_end(scratch); + return result; +} + +//- dw expr val stack + +internal DW_ExprStack +dw_expr__stack_make(Arena *arena) +{ + DW_ExprStack result = {0}; + return result; +} + +internal void +dw_expr__stack_push(Arena *arena, DW_ExprStack *stack, U64 x) +{ + DW_ExprStackNode *node = stack->free_nodes; + if (node == 0) { + SLLStackPop(stack->free_nodes); + } else { + node = push_array(arena, DW_ExprStackNode, 1); + } + SLLStackPush(stack->stack, node); + node->val = x; + stack->count += 1; +} + +internal U64 +dw_expr__stack_pop(DW_ExprStack *stack) +{ + U64 result = 0; + DW_ExprStackNode *node = stack->stack; + if (node != 0) { + SLLStackPop(stack->stack); + stack->count -= 1; + result = node->val; + } + return result; +} + +internal U64 +dw_expr__stack_pick(DW_ExprStack *stack, U64 idx) +{ + U64 result = 0; + if (idx < stack->count) { + U64 counter = idx; + DW_ExprStackNode *node = stack->stack; + for (;node != 0 && counter > 0; node = node->next, counter -= 1); + if (counter == 0 && node != 0) { + result = node->val; + } + } + return result; +} + +internal B32 +dw_expr__stack_is_empty(DW_ExprStack *stack) +{ + B32 result = (stack->count == 0); + return result; +} + +//- dw expr call stack + +internal DW_ExprCall* +dw_expr__call_top(DW_ExprCallStack *stack) +{ + DW_ExprCall *call = stack->stack; + return call; +} + +internal void +dw_expr__call_push(Arena *arena, DW_ExprCallStack *stack, void *ptr, U64 size) +{ + DW_ExprCall *call = 0; + if (call != 0) { + SLLStackPop(stack->free_calls); + } else { + call = push_array(arena, DW_ExprCall, 1); + } + MemoryZeroStruct(call); + SLLStackPush(stack->stack, call); + stack->depth += 1; +} + +internal void +dw_expr__call_pop(DW_ExprCallStack *stack) +{ + DW_ExprCall *top = stack->stack; + if (top != 0) + { + SLLStackPop(stack->stack); + SLLStackPush(stack->free_calls, top); + } +} + +//- analysis tasks + +internal DW_ExprAnalysisTask* +dw_expr__analysis_task_from_p(DW_ExprAnalysisTask *first, U64 p) +{ + DW_ExprAnalysisTask *result = 0; + for (DW_ExprAnalysisTask *task = first; task != 0; task = task->next) { + if (task->p == p) { + result = task; + break; + } + } + return result; +} + diff --git a/src/dwarf/dwarf_expr.h b/src/dwarf/dwarf_expr.h new file mode 100644 index 00000000..0d8cb10a --- /dev/null +++ b/src/dwarf/dwarf_expr.h @@ -0,0 +1,301 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef DWARF_EXPR_H +#define DWARF_EXPR_H + +//////////////////////////////// +//~ Dwarf Register Layout + +typedef struct DW_RegsX64 +{ + union { + struct { + U64 rax; + U64 rdx; + U64 rcx; + U64 rbx; + U64 rsi; + U64 rdi; + U64 rbp; + U64 rsp; + U64 r8; + U64 r9; + U64 r10; + U64 r11; + U64 r12; + U64 r13; + U64 r14; + U64 r15; + U64 rip; + }; + U64 r[17]; + }; +} DW_RegsX64; + +//////////////////////////////// +//~ Dwarf Expression Eval Types + +#define DW_READ_MEMORY_SIG(name) U64 name(U64 addr, U64 size, void *out, void *ud) +typedef DW_READ_MEMORY_SIG(DW_ReadMemorySig); + +//- machine configuration types +typedef String8 DW_ExprResolveCallFunc(void *call_user_ptr, U64 p); + +typedef struct DW_ExprMachineCallConfig +{ + void *user_ptr; + DW_ExprResolveCallFunc *func; +} DW_ExprMachineCallConfig; + +typedef struct DW_ExprMachineConfig +{ + U64 max_step_count; // (read only in the eval functions) + DW_ReadMemorySig *read_memory; + void *read_memory_ud; + DW_RegsX64 *regs; + U64 *text_section_base; + U64 *frame_base; + U64 *object_address; + U64 *tls_address; + U64 *cfa; + DW_ExprMachineCallConfig call; +} DW_ExprMachineConfig; + + +//- detail analysis types +typedef U32 DW_ExprFlags; +enum +{ + DW_ExprFlag_UsesTextBase = (1 << 0), + DW_ExprFlag_UsesMemory = (1 << 1), + DW_ExprFlag_UsesRegisters = (1 << 2), + DW_ExprFlag_UsesFrameBase = (1 << 3), + DW_ExprFlag_UsesObjectAddress = (1 << 4), + DW_ExprFlag_UsesTLSAddress = (1 << 5), + DW_ExprFlag_UsesCFA = (1 << 6), + DW_ExprFlag_UsesCallResolution = (1 << 7), + DW_ExprFlag_UsesComposite = (1 << 8), + + DW_ExprFlag_NotSupported = (1 << 16), + DW_ExprFlag_BadData = (1 << 17), + DW_ExprFlag_NonLinearFlow = (1 << 18) +}; + +typedef struct DW_ExprAnalysis +{ + DW_ExprFlags flags; +} DW_ExprAnalysis; + +typedef struct DW_ExprAnalysisTask +{ + struct DW_ExprAnalysisTask *next; + U64 p; + String8 data; +} DW_ExprAnalysisTask; + + +//- location types +typedef enum DW_SimpleLocKind +{ + DW_SimpleLocKind_Address, + DW_SimpleLocKind_Register, + DW_SimpleLocKind_Value, + DW_SimpleLocKind_ValueLong, + DW_SimpleLocKind_Empty, + DW_SimpleLocKind_Fail, +} DW_SimpleLocKind; + +typedef enum DW_LocFailKind +{ + // Interpreting Fail Kinds + // + // BadData: the evaluator detected that the dwarf expression operation is incorrectly formed + // NotSupported: the evaluator does not support a dwarf feature that was found in the dwarf expression + // TimeOut: the evaluator hit the maximum step count + // TooComplicated: used by analyzer when it the expression uses features outside of the analyzer's scope + // Missing*: the dwarf machine config was missing necessary information to finish the evaluation + + DW_LocFailKind_BadData, + DW_LocFailKind_NotSupported, + DW_LocFailKind_TimeOut, + DW_LocFailKind_TooComplicated, + DW_LocFailKind_MissingTextBase, + DW_LocFailKind_MissingMemory, + DW_LocFailKind_MissingRegisters, + DW_LocFailKind_MissingFrameBase, + DW_LocFailKind_MissingObjectAddress, + DW_LocFailKind_MissingTLSAddress, + DW_LocFailKind_MissingCFA, + DW_LocFailKind_MissingCallResolution, + DW_LocFailKind_MissingArenaForComposite, +} DW_LocFailKind; + +typedef struct DW_SimpleLoc +{ + DW_SimpleLocKind kind; + union { + U64 addr; + U64 reg_idx; + U64 val; + String8 val_long; + struct { + DW_LocFailKind fail_kind; + U64 fail_data; + }; + }; +} DW_SimpleLoc; + +typedef struct DW_Piece +{ + // Hint for Interpreting Pieces + // + // src = decode(loc, is_bit_loc, bit_size); + // dst |= (src >> bit_off) << bit_cursor; + // bit_cursor += bit_size; + + struct DW_Piece *next; + DW_SimpleLoc loc; + U64 bit_size; + U64 bit_off; + B32 is_bit_loc; +} DW_Piece; + +typedef struct DW_Location +{ + // Interpreting a Dwarf Location + // + // CASE (any number of pieces, fail in the non-piece): + // this is how errors are reported, error information is in the non-piece + // the 'fail' location kind should never show up in a piece + // if there are any pieces they can be treated as correct information that + // was successfully decoded before the error was encountered + // + // CASE (no pieces, empty non-piece): + // the data is completely optimized out and unrecoverable + // + // CASE (no pieces, non-empty non-piece): + // the size of the data is not known by the location, but something in the + // surrounding context of the location (eg type info) should know the size + // + // CASE (one-or-more pieces, empty non-piece): + // the data is described by the pieces + // + // CASE (one-or-more pieces, non-empty non-fail non-piece): + // this is supposed to be impossible; the non-piece either carries an error + // or *all* of the location information about the data, there should never + // be a mix of piece-based location and non-piece-based location data. + + DW_Piece *first_piece; + DW_Piece *last_piece; + U64 count; + + DW_SimpleLoc non_piece_loc; +} DW_Location; + + +//- full evaluator state types +typedef struct DW_ExprStackNode +{ + struct DW_ExprStackNode *next; + U64 val; +} DW_ExprStackNode; + +typedef struct DW_ExprStack +{ + DW_ExprStackNode *stack; + DW_ExprStackNode *free_nodes; + U64 count; +} DW_ExprStack; + +typedef struct DW_ExprCall +{ + struct DW_ExprCall *next; + void *ptr; + U64 size; + U64 cursor; +} DW_ExprCall; + +typedef struct DW_ExprCallStack +{ + DW_ExprCall *stack; + DW_ExprCall *free_calls; + U64 depth; +} DW_ExprCallStack; + +//////////////////////////////// +//~ Dwarf Expression Analysis & Eval Functions + +//- analyzers + +// This analyzer provides the most simplified dwarf expression +// decoding. If the expression consists of a single op that can be interpreted +// as a valid dwarf expression, then it represents that expression as a simple +// location. +// +// If there is a single 'piece' op that is represeted here as an empty simple +// location, losing whatever additional size information from the piece. +// +// If there is an op that requires the machine configuration data the analyzer +// fails with "too complicated" - unless the required configuration data is the +// text section base which this analyzer treats as a non-optional parameter and +// always decodes successfully. +// +// If the expression contains more than one op than the analyzer fails with +// "too complicated". + +internal DW_SimpleLoc dw_expr__analyze_fast(void *base, Rng1U64 range, U64 text_section_base); + +// This analyzer does a one-pass scan through the expression to +// help a caller determine what to expect before doing a full evaluation which +// has to maintain value stacks, perform more checks, and execute any loops +// that may appear in the expression, etc. +// +// For each piece of data that can be equipped to a machine config there is a +// 'Uses' flag in the analysis. A user can use these flags to determine what to +// prepare and equip before a full eval. This can be a lot more efficient than +// always preparing everything, or iteratively equipping and retrying after +// each failure. +// +// The analysis can also catch some cases of bad data and unsupported features. +// These flags are useful for short circuit style optimizations, but they are +// not definitive, some bad data can only be caught by the full evaluator. +// Sometimes the full evaluator might miss bad data that this analyzer will see +// if control flow in the evaluator completely skips the bad data. A forgiving +// interpretation of dwarf expression data would only rely on the results of +// the full evaluator. A more strict interpretation would consider it an error +// if either this analyzer or the evaluator finds bad data. +// +// The analyzer also determines if there is any possibility for non-linear +// flow. Jumps, branches, and call ops all create non-linear flow. An +// expression that doesn't have non-linear flow is trivially gauranteed to +// terminate and therefore a good candidate for conversion to a human readable +// expression. +// +// The call config is optional (may be null). If is provided the analysis +// includes features seen in all of the expressions that might be reached by +// call ops from the initial expression. + +internal DW_ExprAnalysis dw_expr__analyze_details(void *base, Rng1U64 range, DW_ExprMachineCallConfig *call_config); + +//- full eval +internal DW_Location dw_expr__eval(Arena *arena_optional, void *base, Rng1U64 range, DW_ExprMachineConfig *config); + +//- dw expr val stack +internal DW_ExprStack dw_expr__stack_make(Arena *arena); +internal void dw_expr__stack_push(Arena *arena, DW_ExprStack *stack, U64 x); +internal U64 dw_expr__stack_pop(DW_ExprStack *stack); +internal U64 dw_expr__stack_pick(DW_ExprStack *stack, U64 idx); +internal B32 dw_expr__stack_is_empty(DW_ExprStack *stack); + +//- dw expr call stack +internal DW_ExprCall* dw_expr__call_top(DW_ExprCallStack *stack); +internal void dw_expr__call_push(Arena *arena, DW_ExprCallStack *stack, void *ptr, U64 size); +internal void dw_expr__call_pop(DW_ExprCallStack *stack); + + +//- analysis tasks +internal DW_ExprAnalysisTask* dw_expr__analysis_task_from_p(DW_ExprAnalysisTask *first, U64 p); + +#endif //DWARF_EXPR_H + diff --git a/src/dwarf/dwarf_notes.txt b/src/dwarf/dwarf_notes.txt new file mode 100644 index 00000000..2c02214f --- /dev/null +++ b/src/dwarf/dwarf_notes.txt @@ -0,0 +1,42 @@ +--- DWARF NOTES --------------------------------------------------------------- + +DWARF V4 Spec: http://www.dwarfstd.org/doc/DWARF4.pdf +DWARF V5 Spec: http://www.dwarfstd.org/doc/DWARF5.pdf + +------------------------------------------------------------------------------- + +$ (2021/04/30) On .debug_pubtypes, .debug_pubnames, and .debug_names: + +.debug_pubtypes and .debug_pubnames are tables that map from a string (the name +of a type or function respectively) to an offset into .debug_info, which is the +offset of the Debug Information Entry (DIE, in DWARF terminology) of the info +associated with the string. THESE TWO SECTIONS ARE OPTIONAL. They don't show up +in every DWARF-holding file, and so they cannot be relied upon as acceleration +structures. But we're going to support parsing them, to make things a bit nicer +in cases where they are present. DWARF doesn't have much in the way of acceler- +ation structures built in, so our rationale is that we should take anything we +can get to make the format a bit more in some subset of the possible cases. + +.debug_names is a DWARF V5 section that is intended to replace .debug_pubtypes +and .debug_pubnames. However, even in cases when DWARF V5 is produced at the +time of writing this, we have not found .debug_names sections being produced. +We did not exhaustively test all compilers and configurations, but it seems +that it is not well-supported at all by major compilers, and there's a very low +probability that a user will have that section, so our current thinking is +that there's no point in supporting it right now. + +------------------------------------------------------------------------------- + +$ (2021/04/30) On producing DWARF V5 with Clang: + +https://lists.llvm.org/pipermail/llvm-dev/2018-August/125068.html + +By default it looks like (at the time of writing this) that Clang, by default, +will produce DWARF V4. To produce DWARF V5, however, you can use the -gdwarf-5 +option. Even when that option is used, it seems that some features of V5 are +not used (for example, .debug_names). + +The above link also says that this will produce .debug_names, but it doesn't as +of Clang 10. + +------------------------------------------------------------------------------- diff --git a/src/dwarf/dwarf_parse.c b/src/dwarf/dwarf_parse.c new file mode 100644 index 00000000..d1919758 --- /dev/null +++ b/src/dwarf/dwarf_parse.c @@ -0,0 +1,2340 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +// TODO(rjf): +// +// [ ] Any time we encode a subrange of a section inside of a +// DW_AttribValue, we need to do that consistently, regardless of +// whether or not it is a string, memory block, etc. We should just use +// the DW_SectionKind and then the min/max pair. +// +// [ ] Things we are not reporting, or haven't figured out: +// @dwarf_expr @dwarf_v5 @dw_cross_unit +// [ ] currently, we're filtering out template arguments in the member accelerator. +// this is because they don't correspond one-to-one with anything in PDB, but +// they do contain useful information that we might want to expose another way +// somehow. +// [ ] DWARF V5 features that nobody seems to use right now +// [ ] ref_addr_desc + next_info_ctx +// apparently these are necessary when dereferencing some DWARF V5 ways of +// forming references. They don't seem to come up at all for any real data +// but might be a case somewhere. +// [ ] case when only .debug_line and .debug_line_str is available, without +// compilation unit debug info? do we care about this at all? +// [ ] DW_Form_RefSig8, which requires using .debug_names +// to do a lookup for a reference +// [ ] DWARF V5, but also V1 & V2 for dw_range_list_from_range_offset +// [ ] DW_AttribClass_RngList and DW_Form_RngListx +// [ ] DW_OpCode_XDEREF_SIZE + DW_OpCode_XDEREF +// [ ] DW_OpCode_PIECE + DW_OpCode_BIT_PIECE +// [ ] DW_ExtOpcode_DefineFile, for line info +// [ ] DWARF procedures in DWARF expr evaluation +// [ ] DW_Attrib_DataMemberLocation is not being *fully* handled right +// now; full handling requires evaluating a DWARF expression to find out the +// offset of a member. Right now we handle the common case, which is when it +// is encoded as a constant value. +// [ ] inline information +// [ ] full info we are not handling: +// [ ] friend classes +// [ ] DWARF macro info +// [ ] whether or not a function is the entry point +// [ ] attributes we are not handling that may be important: +// [ ] DW_Attrib_AbstractOrigin +// - ??? +// [ ] DW_Attrib_VariableParameter +// - determines whether or not a parameter to a function is mutable, I think? +// [ ] DW_Attrib_Mutable +// - I think this is for specific keywords, may not be relevant to C/++ +// [ ] DW_Attrib_CallColumn +// - column position of an inlined subroutine +// [ ] DW_Attrib_CallFile +// - file of inlined subroutine +// [ ] DW_Attrib_CallLine +// - line number of inlined subroutine +// [ ] DW_Attrib_ConstExpr +// - ??? maybe C++ constexpr? +// [ ] DW_Attrib_EnumClass +// - c++ thing that's an enum with a backing type +// [ ] DW_Attrib_LinkageName +// - name used to do linking + +//////////////////////////////// +//~ rjf: Basic Helpers + +internal U64 +dw_hash_from_string(String8 string) +{ + XXH64_hash_t hash64 = XXH3_64bits(string.str, string.size); + return hash64; +} + +internal DW_AttribClass +dw_pick_attrib_value_class(DW_Version ver, DW_Ext ext, DW_Language lang, DW_AttribKind attrib_kind, DW_FormKind form_kind) +{ + // NOTE(rjf): DWARF's spec specifies two mappings: + // (DW_AttribKind) => List(DW_AttribClass) + // (DW_FormKind) => List(DW_AttribClass) + // + // This function's purpose is to find the overlapping class between an + // DW_AttribKind and DW_FormKind. + + DW_AttribClass attrib_class = dw_attrib_class_from_attrib_kind(ver, ext, attrib_kind); + DW_AttribClass form_class = dw_attrib_class_from_form_kind(ver, form_kind); + + // rust compiler is busted, it writes version 5 attributes + if(ver == DW_Version_2 && lang == DW_Language_Rust && (attrib_class == DW_AttribClass_Null || form_class == DW_AttribClass_Null)) + { + attrib_class = dw_attrib_class_from_attrib_kind(DW_Version_5, ext, attrib_kind); + form_class = dw_attrib_class_from_form_kind(DW_Version_5, form_kind); + } + + DW_AttribClass result = DW_AttribClass_Null; + if(attrib_class != DW_AttribClass_Null && form_class != DW_AttribClass_Null) + { + result = DW_AttribClass_Undefined; + + for(U32 i = 0; i < 32; ++i) + { + U32 n = 1u << i; + if((attrib_class & n) != 0 && (form_class & n) != 0) + { + result = ((DW_AttribClass) n); + break; + } + } + + Assert(result != DW_AttribClass_Undefined); + } + + return result; +} + +//////////////////////////////// +//~ rjf: DWARF-Specific Based Range Reads + +internal U64 +based_range_read(void *base, Rng1U64 range, U64 offset, U64 size, void *out) +{ + String8 data = str8((U8*)base+range.min, dim_1u64(range)); + return str8_deserial_read(data, offset, out, size, 1); +} + +#define based_range_read_struct(base, range, offset, out) based_range_read(base, range, offset, sizeof(*out), out) + +internal String8 +based_range_read_string(void *base, Rng1U64 range, U64 offset) +{ + String8 data = str8((U8*)base+range.min, dim_1u64(range)); + String8 result = {0}; + str8_deserial_read_cstr(data, offset, &result); + return result; +} + +internal void * +based_range_ptr(void *base, Rng1U64 range, U64 offset) +{ + Assert(offset < dim_1u64(range)); + U8 *data = (U8*)base + range.min + offset; + return data; +} + +internal U64 +based_range_read_uleb128(void *base, Rng1U64 range, U64 offset, U64 *out_value) +{ + U64 value = 0; + U64 bytes_read = 0; + U64 shift = 0; + U8 byte = 0; + for(U64 read_offset = offset; + based_range_read_struct(base, range, read_offset, &byte) == 1; + read_offset += 1) + { + bytes_read += 1; + U8 val = byte & 0x7fu; + value |= ((U64)val) << shift; + if((byte&0x80u) == 0) + { + break; + } + shift += 7u; + } + if(out_value != 0) + { + *out_value = value; + } + return bytes_read; +} + +internal U64 +based_range_read_sleb128(void *base, Rng1U64 range, U64 offset, S64 *out_value) +{ + U64 value = 0; + U64 bytes_read = 0; + U64 shift = 0; + U8 byte = 0; + for(U64 read_offset = offset; + based_range_read_struct(base, range, read_offset, &byte) == 1; + read_offset += 1) + { + bytes_read += 1; + U8 val = byte & 0x7fu; + value |= ((U64)val) << shift; + shift += 7u; + if((byte&0x80u) == 0) + { + if(shift < sizeof(value) * 8 && (byte & 0x40u) != 0) + { + value |= -(S64)(1ull << shift); + } + break; + } + } + if(out_value != 0) + { + *out_value = value; + } + return bytes_read; +} + +//////////////////////////////// + +internal U64 +dw_based_range_read_length(void *base, Rng1U64 range, U64 offset, U64 *out_value) +{ + U64 bytes_read = 0; + U64 value = 0; + U32 first32 = 0; + if(based_range_read_struct(base, range, offset, &first32)) + { + // NOTE(rjf): DWARF 32-bit => use the first 32 bits as the size. + if(first32 != max_U32) + { + value = (U64)first32; + bytes_read = sizeof(U32); + } + // NOTE(rjf): DWARF 64-bit => first 32 are just a marker, use the next 64 bits as the size. + else if(based_range_read_struct(base, range, offset + sizeof(U32), &value)) + { + value = 0; + bytes_read = sizeof(U32) + sizeof(U64); + } + } + if(out_value != 0) + { + *out_value = value; + } + return bytes_read; +} + +internal U64 +dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev) +{ + U64 total_bytes_read = 0; + + //- rjf: parse ID + U64 id_off = offset; + U64 sub_kind_off = id_off; + U64 id = 0; + { + U64 bytes_read = based_range_read_uleb128(base, range, id_off, &id); + sub_kind_off += bytes_read; + total_bytes_read += bytes_read; + } + + //- rjf: parse sub-kind + U64 sub_kind = 0; + U64 next_off = sub_kind_off; + if(id != 0) + { + U64 bytes_read = based_range_read_uleb128(base, range, sub_kind_off, &sub_kind); + next_off += bytes_read; + total_bytes_read += bytes_read; + } + + //- rjf: parse whether this tag has children + U8 has_children = 0; + if(id != 0) + { + total_bytes_read += based_range_read_struct(base, range, next_off, &has_children); + } + + //- rjf: fill abbrev + if(out_abbrev != 0) + { + DW_Abbrev abbrev = {0}; + abbrev.kind = DW_Abbrev_Tag; + abbrev.abbrev_range = rng_1u64(range.min+offset, range.min+offset+total_bytes_read); + abbrev.sub_kind = sub_kind; + abbrev.id = id; + if(has_children) + { + abbrev.flags |= DW_AbbrevFlag_HasChildren; + } + *out_abbrev = abbrev; + } + + return total_bytes_read; +} + +internal U64 +dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev) +{ + U64 total_bytes_read = 0; + + //- rjf: parse ID + U64 id_off = offset; + U64 sub_kind_off = id_off; + U64 id = 0; + { + U64 bytes_read = based_range_read_uleb128(base, range, id_off, &id); + sub_kind_off += bytes_read; + total_bytes_read += bytes_read; + } + + //- rjf: parse sub-kind (form-kind) + U64 sub_kind = 0; + U64 next_off = sub_kind_off; + { + U64 bytes_read = based_range_read_uleb128(base, range, sub_kind_off, &sub_kind); + next_off += bytes_read; + total_bytes_read += bytes_read; + } + + //- rjf: parse implicit const + U64 implicit_const = 0; + if(sub_kind == DW_Form_ImplicitConst) + { + U64 bytes_read = based_range_read_uleb128(base, range, next_off, &implicit_const); + total_bytes_read += bytes_read; + } + + //- rjf: fill abbrev + if(out_abbrev != 0) + { + DW_Abbrev abbrev = {0}; + abbrev.kind = DW_Abbrev_Attrib; + abbrev.abbrev_range = rng_1u64(offset, offset+total_bytes_read); + abbrev.sub_kind = sub_kind; + abbrev.id = id; + if(sub_kind == DW_Form_ImplicitConst) + { + abbrev.flags |= DW_AbbrevFlag_HasImplicitConst; + abbrev.const_value = implicit_const; + } + *out_abbrev = abbrev; + } + + return total_bytes_read; +} + +internal U64 +dw_based_range_read_attrib_form_value(void *base, Rng1U64 range, U64 offset, DW_Mode mode, U64 address_size, DW_FormKind form_kind, U64 implicit_const, DW_AttribValue *form_value_out) +{ + U64 bytes_read = 0; + U64 bytes_to_read = 0; + DW_AttribValue form_value = {0}; + + switch(form_kind) + { + case DW_Form_Null: break; + + //- rjf: 1-byte uint reads + case DW_Form_Ref1: case DW_Form_Data1: case DW_Form_Flag: + case DW_Form_Strx1: case DW_Form_Addrx1: + bytes_to_read = 1; goto read_fixed_uint; + + //- rjf: 2-byte uint reads + case DW_Form_Ref2: case DW_Form_Data2: case DW_Form_Strx2: + case DW_Form_Addrx2: + bytes_to_read = 2; goto read_fixed_uint; + + //- rjf: 3-byte uint reads + case DW_Form_Strx3: case DW_Form_Addrx3: + bytes_to_read = 3; goto read_fixed_uint; + + //- rjf: 4-byte uint reads + case DW_Form_Data4: case DW_Form_Ref4: case DW_Form_RefSup4: case DW_Form_Strx4: case DW_Form_Addrx4: + bytes_to_read = 4; goto read_fixed_uint; + + //- rjf: 8-byte uint reads + case DW_Form_Data8: case DW_Form_Ref8: case DW_Form_RefSig8: case DW_Form_RefSup8: + bytes_to_read = 8; goto read_fixed_uint; + + //- rjf: address-size reads + case DW_Form_Addr: bytes_to_read = address_size; goto read_fixed_uint; + + //- rjf: offset-size reads + case DW_Form_RefAddr: case DW_Form_SecOffset: case DW_Form_LineStrp: + case DW_Form_Strp: case DW_Form_StrpSup: + bytes_to_read = dw_offset_size_from_mode(mode); goto read_fixed_uint; + + //- rjf: fixed-size uint reads + { + read_fixed_uint:; + U64 value = 0; + bytes_read = based_range_read(base, range, offset, bytes_to_read, &value); + form_value.v[0] = value; + } break; + + //- rjf: uleb128 reads + case DW_Form_UData: case DW_Form_RefUData: case DW_Form_Strx: + case DW_Form_Addrx: case DW_Form_LocListx: case DW_Form_RngListx: + { + U64 value = 0; + bytes_read = based_range_read_uleb128(base, range, offset, &value); + form_value.v[0] = value; + } break; + + //- rjf: sleb128 reads + case DW_Form_SData: + { + S64 value = 0; + bytes_read = based_range_read_sleb128(base, range, offset, &value); + form_value.v[0] = value; + } break; + + //- rjf: fixed-size uint read + skip + case DW_Form_Block1: bytes_to_read = 1; goto read_fixed_uint_skip; + case DW_Form_Block2: bytes_to_read = 2; goto read_fixed_uint_skip; + case DW_Form_Block4: bytes_to_read = 4; goto read_fixed_uint_skip; + { + read_fixed_uint_skip:; + U64 size = 0; + bytes_read = based_range_read(base, range, offset, bytes_to_read, &size); + form_value.v[0] = size; + form_value.v[1] = offset; + bytes_read += size; + } break; + + //- rjf: uleb 128 read + skip + case DW_Form_Block: + { + U64 size = 0; + bytes_read = based_range_read_uleb128(base, range, offset, &size); + form_value.v[0] = size; + form_value.v[1] = offset; + bytes_read += size; + } break; + + //- rjf: u64 ranges + case DW_Form_Data16: + { + U64 value1 = 0; + U64 value2 = 0; + bytes_read += based_range_read_struct(base, range, offset, &value1); + bytes_read += based_range_read_struct(base, range, offset + sizeof(U64), &value2); + form_value.v[0] = value1; + form_value.v[1] = value2; + } break; + + //- rjf: strings + case DW_Form_String: + { + String8 string = based_range_read_string(base, range, offset); + bytes_read = string.size + 1; + U64 string_offset = offset; + U64 string_size = (offset + bytes_read) - string_offset; + form_value.v[0] = string_offset; + form_value.v[1] = string_offset+string_size-1; + } break; + + //- rjf: implicit const + case DW_Form_ImplicitConst: + { + // Special case. + // Unlike other forms that have their values stored in the .debug_info section, + // This one defines it's value in the .debug_abbrev section. + form_value.v[0] = implicit_const; + } break; + + //- rjf: expr loc + case DW_Form_ExprLoc: + { + U64 size = 0; + bytes_read = based_range_read_uleb128(base, range, offset, &size); + form_value.v[0] = offset + bytes_read; + form_value.v[1] = size; + bytes_read += size; + } break; + + //- rjf: flag present + case DW_Form_FlagPresent: + { + form_value.v[0] = 1; + } break; + + case DW_Form_Indirect: + { + InvalidPath; + } break; + } + + if(form_value_out != 0) + { + *form_value_out = form_value; + } + + return bytes_read; +} + +//- rjf: important DWARF section base/range accessors + +internal DW_Mode +dw_mode_from_sec(DW_SectionArray *sections, DW_SectionKind kind) +{ + if(sections->v[kind].data.size > 0xffffffff) + { + return DW_Mode_64Bit; + } + else + { + return DW_Mode_32Bit; + } +} + +internal Rng1U64 +dw_range_from_sec(DW_SectionArray *sections, DW_SectionKind kind) +{ + Rng1U64 result = rng_1u64(0, sections->v[kind].data.size); + return result; +} + +internal void * +dw_base_from_sec(DW_SectionArray *sections, DW_SectionKind kind) +{ + return sections->v[kind].data.str; +} + +//////////////////////////////// +//~ rjf: Abbrev Table + +internal DW_AbbrevTable +dw_make_abbrev_table(Arena *arena, DW_SectionArray *sections, U64 abbrev_offset) +{ + void *file_base = dw_base_from_sec(sections, DW_Section_Abbrev); + Rng1U64 abbrev_range = dw_range_from_sec(sections, DW_Section_Abbrev); + + //- rjf: count the tags we have + U64 tag_count = 0; + for(U64 abbrev_read_off = abbrev_offset - abbrev_range.min;;) + { + DW_Abbrev tag; + { + U64 bytes_read = dw_based_range_read_abbrev_tag(file_base, abbrev_range, abbrev_read_off, &tag); + abbrev_read_off += bytes_read; + if(bytes_read == 0 || tag.id == 0) + { + break; + } + } + for(;;) + { + DW_Abbrev attrib = {0}; + U64 bytes_read = dw_based_range_read_abbrev_attrib_info(file_base, abbrev_range, abbrev_read_off, &attrib); + abbrev_read_off += bytes_read; + if(bytes_read == 0 || attrib.id == 0) + { + break; + } + } + tag_count += 1; + } + + //- rjf: build table + DW_AbbrevTable table = {0}; + table.count = tag_count; + table.entries = push_array(arena, DW_AbbrevTableEntry, table.count); + MemorySet(table.entries, 0, sizeof(DW_AbbrevTableEntry)*table.count); + + U64 tag_idx = 0; + for(U64 abbrev_read_off = abbrev_offset - abbrev_range.min;;) + { + DW_Abbrev tag; + { + U64 bytes_read = dw_based_range_read_abbrev_tag(file_base, abbrev_range, abbrev_read_off, &tag); + abbrev_read_off += bytes_read; + if(bytes_read == 0 || tag.id == 0) + { + break; + } + } + + // rjf: insert this tag into the table + { + table.entries[tag_idx].id = tag.id; + table.entries[tag_idx].off = tag.abbrev_range.min; + tag_idx += 1; + } + + for(;;) + { + DW_Abbrev attrib = {0}; + U64 bytes_read = dw_based_range_read_abbrev_attrib_info(file_base, abbrev_range, abbrev_read_off, &attrib); + abbrev_read_off += bytes_read; + if(bytes_read == 0 || attrib.id == 0) + { + break; + } + } + tag_count += 1; + } + + return table; +} + +internal U64 +dw_abbrev_offset_from_abbrev_id(DW_AbbrevTable table, U64 abbrev_id) +{ + U64 abbrev_offset = max_U64; + if(table.count > 0) + { + S64 min = 0; + S64 max = (S64)table.count - 1; + while(min <= max) + { + S64 mid = (min + max) / 2; + if (abbrev_id > table.entries[mid].id) + { + min = mid + 1; + } + else if (abbrev_id < table.entries[mid].id) + { + max = mid - 1; + } + else + { + abbrev_offset = table.entries[mid].off; + break; + } + } + } + return abbrev_offset; +} + +//////////////////////////////// +//~ rjf: Miscellaneous DWARF Section Parsing + +//- rjf: .debug_ranges (DWARF V4) + +internal Rng1U64List +dw_v4_range_list_from_range_offset(Arena *arena, DW_SectionArray *sections, U64 addr_size, U64 comp_unit_base_addr, U64 range_off) +{ + void *base = dw_base_from_sec(sections, DW_Section_Ranges); + Rng1U64 rng = dw_range_from_sec(sections, DW_Section_Ranges); + + Rng1U64List list = {0}; + + U64 read_off = range_off; + U64 base_addr = comp_unit_base_addr; + + for(;read_off < rng.max;) + { + U64 v0 = 0; + U64 v1 = 0; + read_off += based_range_read(base, rng, read_off, addr_size, &v0); + read_off += based_range_read(base, rng, read_off, addr_size, &v1); + + //- rjf: base address entry + if((addr_size == 4 && v0 == 0xffffffff) || + (addr_size == 8 && v0 == 0xffffffffffffffff)) + { + base_addr = v1; + } + //- rjf: end-of-list entry + else if(v0 == 0 && v1 == 0) + { + break; + } + //- rjf: range list entry + else + { + U64 min_addr = v0 + base_addr; + U64 max_addr = v1 + base_addr; + rng1u64_list_push(arena, &list, rng_1u64(min_addr, max_addr)); + } + } + + return list; +} + +//- rjf: .debug_pubtypes + .debug_pubnames (DWARF V4) + +internal DW_PubStringsTable +dw_v4_pub_strings_table_from_section_kind(Arena *arena, DW_SectionArray *sections, DW_SectionKind section_kind) +{ + Temp scratch = scratch_begin(&arena, 1); + + DW_PubStringsTable names_table = {0}; + + // TODO(rjf): Arbitrary choice. + names_table.size = 16384; + names_table.buckets = push_array(arena, DW_PubStringsBucket*, names_table.size); + + void *base = dw_base_from_sec(sections, section_kind); + Rng1U64 rng = dw_range_from_sec(sections, section_kind); + DW_Mode mode = sections->v[section_kind].mode; + U64 off_size = dw_offset_size_from_mode(mode); + U64 cursor = 0; + + U64 table_length = 0; + U16 unit_version = 0; + U64 cu_info_off = 0; + U64 cu_info_len = 0; + cursor += dw_based_range_read_length(base, rng, cursor, &table_length); + cursor += based_range_read_struct(base, rng, cursor, &unit_version); + cursor += based_range_read(base, rng, cursor, off_size, &cu_info_off); + cursor += dw_based_range_read_length(base, rng, cursor, &cu_info_len); + + for(;;) + { + U64 info_off = 0; + { + U64 bytes_read = based_range_read(base, rng, cursor, off_size, &info_off); + cursor += bytes_read; + if(bytes_read == 0) + { + break; + } + } + + //- rjf: if we got a nonzero .debug_info offset, we've found a valid entry. + if(info_off != 0) + { + String8 string = based_range_read_string(base, rng, cursor); + cursor += string.size + 1; + + U64 hash = dw_hash_from_string(string); + U64 bucket_idx = hash % names_table.size; + + DW_PubStringsBucket *bucket = push_array(arena, DW_PubStringsBucket, 1); + bucket->next = names_table.buckets[bucket_idx]; + bucket->string = string; + bucket->info_off = info_off; + bucket->cu_info_off = cu_info_off; + names_table.buckets[bucket_idx] = bucket; + } + + //- rjf: if we did not read a proper entry in the table, we need to try to + // read the header of the next table. + else + { + U64 next_table_length = 0; + { + U64 bytes_read = dw_based_range_read_length(base, rng, cursor, &next_table_length); + if(bytes_read == 0 || next_table_length == 0) + { + break; + } + cursor += bytes_read; + } + cursor += based_range_read_struct(base, rng, cursor, &unit_version); + cursor += based_range_read(base, rng, cursor, off_size, &cu_info_off); + cursor += dw_based_range_read_length(base, rng, cursor, &cu_info_len); + } + } + + scratch_end(scratch); + + return names_table; +} + +//- rjf: .debug_str_offsets (DWARF V5) + +internal U64 +dw_v5_offset_from_offs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index) +{ + U64 result = 0; + + DW_Mode mode = sections->v[section].mode; + void *sec_base = dw_base_from_sec(sections, section); + Rng1U64 rng = dw_range_from_sec(sections, section); + U64 cursor = base; + + //- rjf: get the length of each entry + U64 entry_len = mode == DW_Mode_64Bit ? 8 : 4; + + //- rjf: parse the unit's length (not including the length itself) + U64 unit_length = 0; + cursor += dw_based_range_read_length(sec_base, rng, cursor, &unit_length); + + //- rjf: parse version + U16 version = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &version); + Assert(version == 5); // must be 5 as of V5. + + //- rjf: parse padding + U16 padding = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &padding); + Assert(padding == 0); // must be 0 as of V5. + + //- rjf: read + if (unit_length >= sizeof(U16)*2) + { + void *entries = (U8 *)sec_base + cursor; + U64 count = (unit_length - sizeof(U16)*2) / entry_len; + if(0 <= index && index < count) + { + switch(entry_len) + { + default: break; + case 4: result = ((U32 *)entries)[index]; break; + case 8: result = ((U64 *)entries)[index]; break; + } + } + } + + return result; +} + +//- rjf: .debug_addr parsing + +internal U64 +dw_v5_addr_from_addrs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index) +{ + U64 result = 0; + + void *sec_base = dw_base_from_sec(sections, section); + Rng1U64 rng = dw_range_from_sec(sections, section); + U64 cursor = base; + + //- rjf: parse the unit's length (not including the length itself) + U64 unit_length = 0; + cursor += dw_based_range_read_length(sec_base, rng, cursor, &unit_length); + + //- rjf: parse version + U16 version = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &version); + Assert(version == 5); // must be 5 as of V5. + + //- rjf: parse address size + U8 address_size = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &address_size); + + //- rjf: parse segment selector size + U8 segment_selector_size = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &segment_selector_size); + + //- rjf: read + U64 entry_size = address_size + segment_selector_size; + U64 count = (unit_length - sizeof(U16)*2) / entry_size; + if(0 <= index && index < count) + { + void *entry = (U8 *)based_range_ptr(sec_base, rng, cursor) + entry_size*index; + Rng1U64 entry_rng = rng_1u64(0, entry_size); + U64 segment = 0; + U64 addr = 0; + based_range_read(entry, entry_rng, 0, sizeof(segment), &segment); + based_range_read(entry, entry_rng, segment_selector_size, sizeof(addr), &addr); + result = addr; + } + + return result; +} + +//- rjf: .debug_rnglists + .debug_loclists parsing + +internal U64 +dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(DW_SectionArray *sections, DW_SectionKind section_kind, U64 base, U64 index) +{ + // + // NOTE(rjf): This is only appropriate to call when DW_Form_RngListx is + // used to access a range list, *OR* when DW_Form_LocListx is used to + // access a location list. Otherwise, DW_Form_SecOffset is required. + // + // See the DWARF V5 spec (February 13, 2017), page 242. (rnglists) + // See the DWARF V5 spec (February 13, 2017), page 215. (loclists) + // + + U64 result = 0; + + DW_Mode mode = sections->v[section_kind].mode; + void *sec_base = dw_base_from_sec(sections, section_kind); + Rng1U64 rng = dw_range_from_sec(sections, section_kind); + U64 cursor = base; + + //- rjf: get the length of each entry + U64 entry_len = mode == DW_Mode_64Bit ? 8 : 4; + + //- rjf: parse the unit's length (not including the length itself) + U64 unit_length = 0; + cursor += dw_based_range_read_length(sec_base, rng, cursor, &unit_length); + + //- rjf: parse version + U16 version = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &version); + Assert(version == 5); // must be 5 as of V5. + + //- rjf: parse address size + U8 address_size = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &address_size); + + //- rjf: parse segment selector size + U8 segment_selector_size = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &segment_selector_size); + + //- rjf: parse offset entry count + U32 offset_entry_count = 0; + cursor += based_range_read_struct(sec_base, rng, cursor, &offset_entry_count); + + //- rjf: read from offsets array + U64 table_off = cursor; + void *offsets_arr = based_range_ptr(sec_base, rng, cursor); + if(0 <= index && index < (U64)offset_entry_count) + { + U64 rnglist_offset = 0; + switch(entry_len) + { + default: break; + case 4: rnglist_offset = ((U32 *)offsets_arr)[index]; break; + case 8: rnglist_offset = ((U64 *)offsets_arr)[index]; break; + } + result = rnglist_offset+table_off; + } + + return result; +} + +internal Rng1U64List +dw_v5_range_list_from_rnglist_offset(Arena *arena, DW_SectionArray *sections, DW_SectionKind section, U64 addr_size, U64 addr_section_base, U64 offset) +{ + Rng1U64List list = {0}; + + void *base = dw_base_from_sec(sections, section); + Rng1U64 rng = dw_range_from_sec(sections, section); + U64 cursor = offset; + + U64 base_addr = 0; + + for(B32 done = 0; !done;) + { + U8 kind8 = 0; + cursor += based_range_read_struct(base, rng, cursor, &kind8); + DW_RngListEntryKind kind = (DW_RngListEntryKind)kind8; + + switch(kind) + { + //- rjf: can be used in split and non-split units: + default: + case DW_RngListEntryKind_EndOfList: + { + done = 1; + } break; + + case DW_RngListEntryKind_BaseAddressX: + { + U64 base_addr_idx = 0; + cursor += based_range_read_uleb128(base, rng, cursor, &base_addr_idx); + base_addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, addr_section_base, base_addr_idx); + } break; + + case DW_RngListEntryKind_StartxEndx: + { + U64 start_addr_idx = 0; + U64 end_addr_idx = 0; + cursor += based_range_read_uleb128(base, rng, cursor, &start_addr_idx); + cursor += based_range_read_uleb128(base, rng, cursor, &end_addr_idx); + U64 start_addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, addr_section_base, start_addr_idx); + U64 end_addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, addr_section_base, end_addr_idx); + rng1u64_list_push(arena, &list, rng_1u64(start_addr, end_addr)); + } break; + + case DW_RngListEntryKind_StartxLength: + { + U64 start_addr_idx = 0; + U64 length = 0; + cursor += based_range_read_uleb128(base, rng, cursor, &start_addr_idx); + cursor += based_range_read_uleb128(base, rng, cursor, &length); + U64 start_addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, addr_section_base, start_addr_idx); + U64 end_addr = start_addr + length; + rng1u64_list_push(arena, &list, rng_1u64(start_addr, end_addr)); + } break; + + case DW_RngListEntryKind_OffsetPair: + { + U64 start_offset = 0; + U64 end_offset = 0; + cursor += based_range_read_uleb128(base, rng, cursor, &start_offset); + cursor += based_range_read_uleb128(base, rng, cursor, &end_offset); + rng1u64_list_push(arena, &list, rng_1u64(start_offset + base_addr, end_offset + base_addr)); + } break; + + //- rjf: non-split units only: + + case DW_RngListEntryKind_BaseAddress: + { + U64 new_base_addr = 0; + cursor += based_range_read(base, rng, cursor, addr_size, &new_base_addr); + base_addr = new_base_addr; + } break; + + case DW_RngListEntryKind_StartEnd: + { + U64 start = 0; + U64 end = 0; + cursor += based_range_read(base, rng, cursor, addr_size, &start); + cursor += based_range_read(base, rng, cursor, addr_size, &end); + rng1u64_list_push(arena, &list, rng_1u64(start, end)); + } break; + + case DW_RngListEntryKind_StartLength: + { + U64 start = 0; + U64 length = 0; + cursor += based_range_read(base, rng, cursor, addr_size, &start); + cursor += based_range_read_uleb128(base, rng, cursor, &length); + rng1u64_list_push(arena, &list, rng_1u64(start, start+length)); + } break; + } + } + + return list; +} + +//////////////////////////////// +//~ rjf: Attrib Value Parsing + +internal DW_AttribValueResolveParams +dw_attrib_value_resolve_params_from_comp_root(DW_CompRoot *root) +{ + DW_AttribValueResolveParams params = {0}; + params.version = root->version; + params.language = root->language; + params.addr_size = root->address_size; + params.containing_unit_info_off = root->info_off; + params.debug_addrs_base = root->addrs_base; + params.debug_rnglists_base = root->rnglist_base; + params.debug_str_offs_base = root->stroffs_base; + params.debug_loclists_base = root->loclist_base; + return params; +} + +internal DW_AttribValue +dw_attrib_value_from_form_value(DW_SectionArray *sections, + DW_AttribValueResolveParams resolve_params, + DW_FormKind form_kind, + DW_AttribClass value_class, + DW_AttribValue form_value) +{ + DW_AttribValue value = {0}; + + //~ rjf: DWARF V5 value parsing + + //- rjf: (DWARF V5 ONLY) the form value is storing an address index (ADDRess indeX), which we + // must resolve to an actual address using the containing comp unit's contribution to the + // .debug_addr section. + if(resolve_params.version >= DW_Version_5 && + value_class == DW_AttribClass_Address && + (form_kind == DW_Form_Addrx || form_kind == DW_Form_Addrx1 || + form_kind == DW_Form_Addrx2 || form_kind == DW_Form_Addrx3 || + form_kind == DW_Form_Addrx4)) + { + U64 addr_index = form_value.v[0]; + U64 addr = dw_v5_addr_from_addrs_section_base_index(sections, DW_Section_Addr, resolve_params.debug_addrs_base, addr_index); + value.v[0] = addr; + } + //- rjf: (DWARF V5 ONLY) lookup into the .debug_loclists section via an index + else if(resolve_params.version >= DW_Version_5 && + value_class == DW_AttribClass_LocList && + form_kind == DW_Form_LocListx) + { + U64 loclist_index = form_value.v[0]; + U64 loclist_offset = dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(sections, DW_Section_LocLists, resolve_params.debug_loclists_base, loclist_index); + value.section = DW_Section_LocLists; + value.v[0] = loclist_offset; + } + //- rjf: (DWARF V5 ONLY) lookup into the .debug_loclists section via an offset + else if(resolve_params.version >= DW_Version_5 && + (value_class == DW_AttribClass_LocList || value_class == DW_AttribClass_LocListPtr) && + form_kind == DW_Form_SecOffset) + { + U64 loclist_offset = form_value.v[0]; + value.section = DW_Section_LocLists; + value.v[0] = loclist_offset; + } + //- rjf: (DWARF V5 ONLY) lookup into the .debug_rnglists section via an index + else if(resolve_params.version >= DW_Version_5 && + (value_class == DW_AttribClass_RngListPtr || value_class == DW_AttribClass_RngList) && + form_kind == DW_Form_RngListx) + { + U64 rnglist_index = form_value.v[0]; + U64 rnglist_offset = dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(sections, DW_Section_RngLists, resolve_params.debug_rnglists_base, rnglist_index); + value.section = DW_Section_RngLists; + value.v[0] = rnglist_offset; + } + //- rjf: (DWARF V5 ONLY) lookup into the .debug_rnglists section via an offset + else if(resolve_params.version >= DW_Version_5 && + (value_class == DW_AttribClass_RngListPtr || value_class == DW_AttribClass_RngList) && + form_kind != DW_Form_RngListx) + { + U64 rnglist_offset = form_value.v[0]; + value.section = DW_Section_RngLists; + value.v[0] = rnglist_offset; + } + //- rjf: (DWARF V5 ONLY) .debug_str_offsets table index, that we need to resolve + // using the containing compilation unit's contribution to the section + else if(resolve_params.version >= DW_Version_5 && + value_class == DW_AttribClass_String && + (form_kind == DW_Form_Strx || + form_kind == DW_Form_Strx1 || + form_kind == DW_Form_Strx2 || + form_kind == DW_Form_Strx3 || + form_kind == DW_Form_Strx4)) + { + DW_SectionKind section = DW_Section_Str; + U64 str_index = form_value.v[0]; + U64 str_offset = dw_v5_offset_from_offs_section_base_index(sections, DW_Section_StrOffsets, resolve_params.debug_str_offs_base, str_index); + void *base = dw_base_from_sec(sections, section); + Rng1U64 range = dw_range_from_sec(sections, section); + String8 string = based_range_read_string(base, range, str_offset); + value.section = section; + value.v[0] = str_offset; + value.v[1] = value.v[0] + string.size; + } + //- rjf: (DWARF V5 ONLY) reference that we should resolve through ref_addr_desc + else if(resolve_params.version >= DW_Version_5 && + value_class == DW_AttribClass_Reference && + form_kind == DW_Form_RefAddr) + { + // TODO(nick): DWARF 5 @dwarf_v5 + } + //- TODO(rjf): (DWARF V5 ONLY) reference resolution using the .debug_names section + else if(resolve_params.version >= DW_Version_5 && + form_kind == DW_Form_RefSig8) + { + // TODO(nick): DWARF 5: We need to handle .debug_names section in order to resolve this value. @dwarf_v5 + value.v[0] = max_U64; + } + + //~ rjf: All other value parsing (DWARF V4 and below) + + //- rjf: reference to an offset relative to the compilation unit's info base + else if (value_class == DW_AttribClass_Reference && + (form_kind == DW_Form_Ref1 || + form_kind == DW_Form_Ref2 || + form_kind == DW_Form_Ref4 || + form_kind == DW_Form_Ref8 || + form_kind == DW_Form_RefUData)) + { + value.v[0] = resolve_params.containing_unit_info_off + form_value.v[0]; + } + + //- rjf: info-section string -- this is a string that is just pasted straight + // into the .debug_info section + else if(value_class == DW_AttribClass_String && form_kind == DW_Form_String) + { + value = form_value; + value.section = DW_Section_Info; + } + + //- rjf: string-section string -- this is a string that's inside the .debug_str + // section, and we've been provided an offset to it + else if(value_class == DW_AttribClass_String && + (form_kind == DW_Form_Strp || + form_kind == DW_Form_StrpSup)) + { + + DW_SectionKind section = DW_Section_Str; + void *base = dw_base_from_sec(sections, section); + Rng1U64 range = dw_range_from_sec(sections, section); + String8 string = based_range_read_string(base, range, form_value.v[0]); + value.section = section; + value.v[0] = form_value.v[0]; + value.v[1] = value.v[0] + string.size; + } + //- rjf: line-string + else if(value_class == DW_AttribClass_String && form_kind == DW_Form_LineStrp) + { + DW_SectionKind section = DW_Section_LineStr; + void *base = dw_base_from_sec(sections, section); + Rng1U64 range = dw_range_from_sec(sections, section); + String8 string = based_range_read_string(base, range, form_value.v[0]); + value.section = section; + value.v[0] = form_value.v[0]; + value.v[1] = value.v[0] + string.size; + } + //- rjf: .debug_ranges + else if(resolve_params.version < DW_Version_5 && + (value_class == DW_AttribClass_RngListPtr || value_class == DW_AttribClass_RngList) && + (form_kind == DW_Form_SecOffset)) + { + U64 ranges_offset = form_value.v[0]; + value.section = DW_Section_Ranges; + value.v[0] = ranges_offset; + } + //- rjf: .debug_loc + else if(resolve_params.version < DW_Version_5 && + (value_class == DW_AttribClass_LocListPtr || value_class == DW_AttribClass_LocList) && + (form_kind == DW_Form_SecOffset)) + { + U64 offset = form_value.v[0]; + value.section = DW_Section_Loc; + value.v[0] = offset; + } + //- rjf: invalid attribute class + else if(value_class == 0) + { + Assert(!"attribute class was not resolved"); + } + //- rjf: in all other cases, we can accept the form_value as the correct + // representation for the parsed value, so we can just copy it over. + else + { + value = form_value; + } + + return value; +} + +internal String8 +dw_string_from_attrib_value(DW_SectionArray *sections, DW_AttribValue value) +{ + DW_SectionKind section_kind = value.section; + void *base = dw_base_from_sec(sections, section_kind); + Rng1U64 range = dw_range_from_sec(sections, section_kind); + + String8 string = {0}; + string.str = (U8 *)based_range_ptr(base, range, value.v[0]); + string.size = value.v[1] - value.v[0]; + return string; +} + +internal Rng1U64List +dw_range_list_from_high_low_pc_and_ranges_attrib_value(Arena *arena, DW_SectionArray *sections, U64 address_size, U64 comp_unit_base_addr, U64 addr_section_base, U64 low_pc, U64 high_pc, DW_AttribValue ranges_value) +{ + Rng1U64List list = {0}; + switch(ranges_value.section) + { + //- rjf: (DWARF V5 ONLY) .debug_rnglists offset + case DW_Section_RngLists: + { + list = dw_v5_range_list_from_rnglist_offset(arena, sections, ranges_value.section, address_size, addr_section_base, ranges_value.v[0]); + } break; + + //- rjf: (DWARF V4 and earlier) .debug_ranges parsing + case DW_Section_Ranges: + { + list = dw_v4_range_list_from_range_offset(arena, sections, address_size, comp_unit_base_addr, ranges_value.v[0]); + } break; + + //- rjf: fall back to trying to use low/high PCs + default: + { + rng1u64_list_push(arena, &list, rng_1u64(low_pc, high_pc)); + } break; + } + return list; +} + +//////////////////////////////// +//~ rjf: Tag Parsing + +internal DW_AttribListParseResult +dw_parse_attrib_list_from_info_abbrev_offsets(Arena *arena, + DW_SectionArray *sections, + DW_Version ver, + DW_Ext ext, + DW_Language lang, + U64 address_size, + U64 info_off, + U64 abbrev_off) +{ + //- rjf: set up prereqs + DW_Mode info_mode = sections->v[DW_Section_Info].mode; + DW_Mode abbrev_mode = sections->v[DW_Section_Abbrev].mode; + void *info_base = dw_base_from_sec(sections, DW_Section_Info); + void *abbrev_base = dw_base_from_sec(sections, DW_Section_Abbrev); + Rng1U64 info_range = dw_range_from_sec(sections, DW_Section_Info); + Rng1U64 abbrev_range = dw_range_from_sec(sections, DW_Section_Abbrev); + + //- rjf: set up read offsets + U64 info_read_off = info_off; + U64 abbrev_read_off = abbrev_off; + + //- rjf: parse all attributes + DW_AttribListParseResult result = {0}; + for(B32 good_abbrev = 1; good_abbrev;) + { + U64 attrib_info_offset = info_read_off; + + //- rjf: parse abbrev attrib info + DW_Abbrev abbrev = {0}; + { + U64 bytes_read = dw_based_range_read_abbrev_attrib_info(abbrev_base, abbrev_range, abbrev_read_off, &abbrev); + abbrev_read_off += bytes_read; + good_abbrev = abbrev.id != 0; + } + + //- rjf: extract attrib info from abbrev + DW_AttribKind attrib_kind = (DW_AttribKind)abbrev.id; + DW_FormKind form_kind = (DW_FormKind)abbrev.sub_kind; + DW_AttribClass attrib_class = dw_pick_attrib_value_class(ver, ext, lang, attrib_kind, form_kind); + + //- rjf: parse the form value from the file + DW_AttribValue form_value = {0}; + if(good_abbrev) + { + // NOTE(nick): This is a special case form. Basically it let's user to + // define attribute form in the .debug_info. + if(form_kind == DW_Form_Indirect) + { + U64 override_form_kind = 0; + info_read_off += based_range_read_uleb128(info_base, info_range, info_read_off, &override_form_kind); + form_kind = (DW_FormKind)override_form_kind; + } + U64 bytes_read = dw_based_range_read_attrib_form_value(info_base, info_range, info_read_off, info_mode, address_size, + form_kind, abbrev.const_value, &form_value); + info_read_off += bytes_read; + } + + //- rjf: push this parsed attrib to the list + if(good_abbrev) + { + DW_AttribNode *node = push_array(arena, DW_AttribNode, 1); + node->attrib.info_off = attrib_info_offset; + node->attrib.abbrev_id = abbrev.id; + node->attrib.attrib_kind = attrib_kind; + node->attrib.form_kind = form_kind; + node->attrib.value_class = attrib_class; + node->attrib.form_value = form_value; + result.attribs.count += 1; + SLLQueuePush(result.attribs.first, result.attribs.last, node); + } + } + + result.max_info_off = info_read_off; + result.max_abbrev_off = abbrev_read_off; + return result; +} + +internal DW_Tag * +dw_tag_from_info_offset(Arena *arena, + DW_SectionArray *sections, + DW_AbbrevTable abbrev_table, + DW_Version ver, + DW_Ext ext, + DW_Language lang, + U64 address_size, + U64 info_offset) +{ + void *info_base = dw_base_from_sec(sections, DW_Section_Info); + Rng1U64 info_range = dw_range_from_sec(sections, DW_Section_Info); + void *abbrev_base = dw_base_from_sec(sections, DW_Section_Abbrev); + Rng1U64 abbrev_range = dw_range_from_sec(sections, DW_Section_Abbrev); + + DW_Tag *tag = push_array(arena, DW_Tag, 1); + + //- rjf: calculate .debug_info read cursor, relative to info range minimum + U64 info_read_off = info_offset - info_range.min; + + //- rjf: read abbrev ID + U64 abbrev_id = 0; + info_read_off += based_range_read_uleb128(info_base, info_range, info_read_off, &abbrev_id); + B32 good_abbrev_id = abbrev_id != 0; + + //- rjf: figure out abbrev offset for this ID + U64 abbrev_offset = 0; + if(good_abbrev_id) + { + abbrev_offset = dw_abbrev_offset_from_abbrev_id(abbrev_table, abbrev_id); + } + + //- rjf: calculate .debug_abbrev read cursor, relative to abbrev range minimum + U64 abbrev_read_off = abbrev_offset - abbrev_range.min; + + //- rjf: parse abbrev tag info + DW_Abbrev abbrev_tag_info = {0}; + B32 good_tag_abbrev = 0; + if(good_abbrev_id) + { + abbrev_read_off += dw_based_range_read_abbrev_tag(abbrev_base, abbrev_range, abbrev_read_off, &abbrev_tag_info); + good_tag_abbrev = 1;//abbrev_tag_info.id != 0; + } + + //- rjf: parse all attributes for this tag + U64 attribs_info_off = 0; + U64 attribs_abbrev_off = 0; + DW_AttribList attribs = {0}; + if(good_tag_abbrev) + { + DW_AttribListParseResult attribs_parse = dw_parse_attrib_list_from_info_abbrev_offsets(arena, sections, ver, ext, lang, address_size, info_read_off, abbrev_read_off); + attribs_info_off = info_read_off; + attribs_abbrev_off = abbrev_read_off; + info_read_off = attribs_parse.max_info_off; + abbrev_read_off = attribs_parse.max_abbrev_off; + attribs = attribs_parse.attribs; + } + + //- rjf: fill tag + { + tag->abbrev_id = abbrev_id; + tag->info_range = rng_1u64(info_offset, info_range.min + info_read_off); + tag->abbrev_range = rng_1u64(abbrev_offset, abbrev_range.min + abbrev_read_off); + tag->has_children = !!(abbrev_tag_info.flags & DW_AbbrevFlag_HasChildren); + tag->kind = (DW_TagKind)abbrev_tag_info.sub_kind; + tag->attribs_info_off = attribs_info_off; + tag->attribs_abbrev_off = attribs_abbrev_off; + tag->attribs = attribs; + } + + return tag; +} + +//////////////////////////////// + +internal U64 +dw_v5_header_offset_from_table_offset(DW_SectionArray *sections, DW_SectionKind section, U64 table_off) +{ + // NOTE(rjf): From the DWARF V5 spec (February 13, 2017), page 401: + // + // " + // Each skeleton compilation unit also has a DW_AT_addr_base attribute, + // which provides the relocated offset to that compilation unit’s + // contribution in the executable’s .debug_addr section. Unlike the + // DW_AT_stmt_list attribute, the offset refers to the first address table + // slot, not to the section header. In this example, we see that the first + // address (slot 0) from demo1.o begins at offset 48. Because the + // .debug_addr section contains an 8-byte header, the object file’s + // contribution to the section actually begins at offset 40 (for a 64-bit + // DWARF object, the header would be 16 bytes long, and the value for the + // DW_AT_addr_base attribute would then be 56). All attributes in demo1.dwo + // that use DW_FORM_addrx, DW_FORM_addrx1, DW_FORM_addrx2, DW_FORM_addrx3 + // or DW_FORM_addrx4 would then refer to address table slots relative to + // that offset. Likewise, the .debug_addr contribution from demo2.dwo begins + // at offset 72, and its first address slot is at offset 80. Because these + // contributions have been processed by the linker, they contain relocated + // values for the addresses in the program that are referred to by the + // debug information. + // " + // + // This seems to at least partially explain why the addr_base is showing up + // 8 bytes later than we are expecting it to. We can't actually just store + // the base that we read from the DW_Attrib_AddrBase attrib, because + // it's showing up *after* the header, so we need to bump it back. + + // NOTE(rjf): From the DWARF V5 spec (February 13, 2017), page 66: + // + // " + // A DW_AT_rnglists_base attribute, whose value is of class rnglistsptr. This + // attribute points to the beginning of the offsets table (immediately + // following the header) of the compilation unit's contribution to the + // .debug_rnglists section. References to range lists (using DW_FORM_rnglistx) + // within the compilation unit are interpreted relative to this base. + // " + // + // Similarly, we need to figure out where to go to parse the header. + + U64 max_header_size = 0; + U64 min_header_size = 0; + switch(section) + { + default: + case DW_Section_Addr: + { + max_header_size = 16; + min_header_size = 8; + } break; + case DW_Section_StrOffsets: + { + max_header_size = 16; + min_header_size = 8; + } break; + case DW_Section_RngLists: + { + max_header_size = 20; + min_header_size = 12; + } break; + case DW_Section_LocLists: + { + // TODO(rjf) + NotImplemented; + } break; + } + + U64 past_header = table_off; + void *addr_base = dw_base_from_sec(sections, section); + Rng1U64 addr_rng = dw_range_from_sec(sections, section); + + //- rjf: figure out which sized header we have + U64 header_size = 0; + { + // rjf: try max header, and if it works, the header is the max size, otherwise we will + // need to rely on the min header size + U32 first32 = 0; + based_range_read_struct(addr_base, addr_rng, past_header-max_header_size, &first32); + if(first32 == max_U32) + { + header_size = max_header_size; + } + else + { + header_size = min_header_size; + } + } + + return table_off - header_size; +} + +internal Rng1U64List +dw_comp_unit_ranges_from_info(Arena *arena, DW_Section info) +{ + Rng1U64List result = {0}; + void *base = info.data.str; + Rng1U64 range = rng_1u64(0, info.data.size); + for(U64 cursor = 0; cursor < info.data.size; ) + { + // read unit length + U64 unit_length = 0; + U64 bytes_read = dw_based_range_read_length(base, range, cursor, &unit_length); + + // was read ok? + if(bytes_read == 0) + { + break; + } + + // push unit range + rng1u64_list_push(arena, &result, rng_1u64(cursor, cursor+unit_length+bytes_read)); + + // advance + cursor += unit_length+bytes_read; + } + return result; +} + +internal DW_CompRoot +dw_comp_root_from_range(Arena *arena, DW_SectionArray *sections, Rng1U64 range) +{ + Temp scratch = scratch_begin(&arena, 1); + + void *info_base = dw_base_from_sec(sections, DW_Section_Info); + B32 is_info_dwo = sections->v[DW_Section_Info].is_dwo; + + //- rjf: up-front known parsing offsets (yep, that's right, it's only 1!) + U64 size_off = 0; + + //- rjf: parse size of this compilation unit's data + U64 size = 0; + U64 version_off = size_off; + { + U64 bytes_read = dw_based_range_read_length(info_base, range, size_off, &size); + version_off += bytes_read; + } + + //- rjf: parse version + B32 got_version = 0; + DW_Version version = 0; + U64 unit_off = version_off; + if(based_range_read_struct(info_base, range, version_off, &version)) + { + unit_off += sizeof(version); + got_version = 1; + } + + //- rjf: parse unit kind, abbrev_base, address size + B32 got_unit_kind = 0; + U64 next_off = unit_off; + DW_CompUnitKind unit_kind = DW_CompUnitKind_Reserved; + U64 abbrev_base = max_U64; + U64 address_size = 0; + U64 spec_dwo_id = 0; + if(got_version) + { + switch(version) + { + default: break; + case DW_Version_2: { + abbrev_base = 0; + next_off += based_range_read(info_base, range, next_off, 4, &abbrev_base); + next_off += based_range_read(info_base, range, next_off, 1, &address_size); + got_unit_kind = 1; + } break; + case DW_Version_3: + case DW_Version_4: + { + next_off += dw_based_range_read_length(info_base, range, next_off, &abbrev_base); + next_off += based_range_read(info_base, range, next_off, 1, &address_size); + got_unit_kind = 1; + } break; + case DW_Version_5: + { + next_off += based_range_read_struct(info_base, range, next_off, &unit_kind); + next_off += based_range_read(info_base, range, next_off, 1, &address_size); + next_off += dw_based_range_read_length(info_base, range, next_off, &abbrev_base); + got_unit_kind = 1; + + //- rjf: parse DWO ID if appropriate + if(unit_kind == DW_CompUnitKind_Skeleton || is_info_dwo) + { + next_off += based_range_read(info_base, range, next_off, 8, &spec_dwo_id); + } + } break; + } + } + + //- rjf: build abbrev table + DW_AbbrevTable abbrev_table = {0}; + if(got_unit_kind) + { + abbrev_table = dw_make_abbrev_table(arena, sections, abbrev_base); + } + + //- rjf: parse compilation unit's tag + B32 got_comp_unit_tag = 0; + DW_Tag *comp_unit_tag = 0; + if(got_unit_kind) + { + U64 comp_root_tag_off = range.min + next_off; + comp_unit_tag = dw_tag_from_info_offset(scratch.arena, sections, abbrev_table, version, DW_Ext_Null, DW_Language_Null, address_size, comp_root_tag_off); + got_comp_unit_tag = 1; + } + + //- rjf: get all of the attribute values we need to start resolving attribute values + DW_AttribValueResolveParams resolve_params = { .version = version }; + if(got_comp_unit_tag) + { + for(DW_AttribNode *attrib_n = comp_unit_tag->attribs.first; attrib_n; attrib_n = attrib_n->next) + { + DW_Attrib *attrib = &attrib_n->attrib; + + // NOTE(rjf): We'll have to rely on just the form value at this point, + // since we can't use the unit yet (since we're currently in the process + // of building it). This should always be enough, otherwise there would + // be a cyclic dependency in the requirements of each part of the + // compilation unit's parse. DWARF is pretty crazy, but not *that* crazy, + // so this should be good. + switch(attrib->attrib_kind) + { + default: break; + case DW_Attrib_AddrBase: resolve_params.debug_addrs_base = attrib->form_value.v[0]; break; + case DW_Attrib_StrOffsetsBase: resolve_params.debug_str_offs_base = attrib->form_value.v[0]; break; + case DW_Attrib_RngListsBase: resolve_params.debug_rnglists_base = attrib->form_value.v[0]; break; + case DW_Attrib_LocListsBase: resolve_params.debug_loclists_base = attrib->form_value.v[0]; break; + } + } + } + + //- rjf: correct table offsets to header offsets (since DWARF V5 insists on being as useless as possible) + if(got_comp_unit_tag && version >= DW_Version_5) + { + resolve_params.debug_addrs_base = dw_v5_header_offset_from_table_offset(sections, DW_Section_Addr, resolve_params.debug_addrs_base); + resolve_params.debug_str_offs_base = dw_v5_header_offset_from_table_offset(sections, DW_Section_StrOffsets, resolve_params.debug_str_offs_base); + resolve_params.debug_loclists_base = dw_v5_header_offset_from_table_offset(sections, DW_Section_LocLists, resolve_params.debug_loclists_base); + resolve_params.debug_rnglists_base = dw_v5_header_offset_from_table_offset(sections, DW_Section_RngLists, resolve_params.debug_rnglists_base); + } + + //- rjf: parse the rest of the compilation unit tag's attributes that we'd + // like to cache + String8 name = {0}; + String8 producer = {0}; + String8 compile_dir = {0}; + String8 external_dwo_name = {0}; + String8 external_gnu_dwo_name = {0}; + U64 gnu_dwo_id = 0; + U64 language = 0; + U64 name_case = 0; + B32 use_utf8 = 0; + U64 low_pc = 0; + U64 high_pc = 0; + B32 high_pc_is_relative = 0; + DW_AttribValue ranges_attrib_value = {DW_Section_Null}; + U64 line_base = 0; + if(got_comp_unit_tag) + { + for(DW_AttribNode *attrib_n = comp_unit_tag->attribs.first; attrib_n; attrib_n = attrib_n->next) + { + DW_Attrib *attrib = &attrib_n->attrib; + + //- rjf: form value => value + DW_AttribValue value = {0}; + B32 good_value = 0; + { + if(dw_are_attrib_class_and_form_kind_compatible(version, attrib->value_class, attrib->form_kind)) + { + value = dw_attrib_value_from_form_value(sections, resolve_params, attrib->form_kind, attrib->value_class, attrib->form_value); + good_value = 1; + } + } + + //- rjf: map value to extracted info + if(good_value) + { + switch(attrib->attrib_kind) + { + case DW_Attrib_Name: name = dw_string_from_attrib_value(sections, value); break; + case DW_Attrib_Producer: producer = dw_string_from_attrib_value(sections, value); break; + case DW_Attrib_CompDir: compile_dir = dw_string_from_attrib_value(sections, value); break; + case DW_Attrib_DwoName: external_dwo_name = dw_string_from_attrib_value(sections, value); break; + case DW_Attrib_GNU_DwoName: external_gnu_dwo_name = dw_string_from_attrib_value(sections, value); break; + case DW_Attrib_GNU_DwoId: gnu_dwo_id = value.v[0]; break; + case DW_Attrib_Language: language = value.v[0]; break; + case DW_Attrib_IdentifierCase: name_case = value.v[0]; break; + case DW_Attrib_UseUtf8: use_utf8 = (B32)value.v[0]; break; + case DW_Attrib_LowPc: low_pc = value.v[0]; break; + case DW_Attrib_HighPc: high_pc = value.v[0]; high_pc_is_relative = attrib->value_class != DW_AttribClass_Address; break; + case DW_Attrib_Ranges: ranges_attrib_value = value; break; + case DW_Attrib_StmtList: line_base = value.v[0]; break; + default: break; + } + } + } + } + + //- rjf: build+fill unit + DW_CompRoot unit = {0}; + + //- rjf: fill header data + unit.size = size; + unit.kind = unit_kind; + unit.version = version; + unit.ext = DW_Ext_Null; + unit.address_size = address_size; + unit.abbrev_off = abbrev_base; + unit.info_off = range.min; + unit.tags_info_range = rng_1u64(range.min+next_off, range.max); + unit.abbrev_table = abbrev_table; + + //- rjf: fill out offsets we need for attrib value resolution + unit.rnglist_base = resolve_params.debug_rnglists_base; + unit.loclist_base = resolve_params.debug_loclists_base; + unit.addrs_base = resolve_params.debug_addrs_base; + unit.stroffs_base = resolve_params.debug_str_offs_base; + + //- rjf: fill out general info + unit.name = name; + unit.producer = producer; + unit.compile_dir = compile_dir; + unit.external_dwo_name = external_dwo_name.size != 0 ? external_dwo_name : external_gnu_dwo_name; + if(external_dwo_name.size) + { + unit.dwo_id = spec_dwo_id; + } + else if(external_gnu_dwo_name.size) + { + unit.dwo_id = gnu_dwo_id; + } + unit.language = (DW_Language)language; + unit.name_case = name_case; + unit.use_utf8 = use_utf8; + unit.line_off = line_base; + unit.low_pc = low_pc; + unit.high_pc = high_pc; + unit.ranges_attrib_value = ranges_attrib_value; + + //- rjf: fill fixup of low/high PC situation + if(high_pc_is_relative) + { + unit.high_pc += unit.low_pc; + } + + //- rjf: fill base address + { + unit.base_addr = unit.low_pc; + } + + //- rjf: build+fill directory and file tables + { + DW_Mode line_mode = dw_mode_from_sec(sections, DW_Section_Line); + void *line_base = dw_base_from_sec(sections, DW_Section_Line); + Rng1U64 line_rng = dw_range_from_sec(sections, DW_Section_Line); + DW_LineVMHeader vm_header = {0}; + U64 read_size = dw_read_line_vm_header(arena, line_base, line_rng, unit.line_off, line_mode, sections, &unit, &vm_header); + if (read_size > 0) { + unit.dir_table = vm_header.dir_table; + unit.file_table = vm_header.file_table; + } + } + + scratch_end(scratch); + return unit; +} + +internal DW_ExtDebugRef +dw_ext_debug_ref_from_comp_root(DW_CompRoot *root) +{ + DW_ExtDebugRef ref = {0}; + ref.dwo_path = root->external_dwo_name; + ref.dwo_id = root->dwo_id; + return ref; +} + +//- rjf: line info + +internal void +dw_line_vm_reset(DW_LineVMState *state, B32 default_is_stmt) +{ + state->address = 0; + state->op_index = 0; + state->file_index = 1; + state->line = 1; + state->column = 0; + state->is_stmt = default_is_stmt; + state->basic_block = 0; + state->prologue_end = 0; + state->epilogue_begin = 0; + state->isa = 0; + state->discriminator = 0; +} + +internal void +dw_line_vm_advance(DW_LineVMState *state, U64 advance, U64 min_inst_len, U64 max_ops_for_inst) +{ + U64 op_index = state->op_index + advance; + state->address += min_inst_len*(op_index/max_ops_for_inst); + state->op_index = op_index % max_ops_for_inst; +} + +internal DW_LineSeqNode * +dw_push_line_seq(Arena* arena, DW_LineTableParseResult *parsed_tbl) +{ + DW_LineSeqNode *new_seq = push_array(arena, DW_LineSeqNode, 1); + SLLQueuePush(parsed_tbl->first_seq, parsed_tbl->last_seq, new_seq); + parsed_tbl->seq_count += 1; + return new_seq; +} + +internal DW_LineNode * +dw_push_line(Arena *arena, DW_LineTableParseResult *tbl, DW_LineVMState *vm_state, B32 start_of_sequence) +{ + DW_LineNode *n = 0; + if(vm_state->busted_seq == 0) + { + DW_LineSeqNode *seq = tbl->last_seq; + if(seq == 0 || start_of_sequence) + { + // ERROR! do not emit sequences with only one line... + if (seq) Assert(seq->count > 1); + + seq = dw_push_line_seq(arena, tbl); + } + + n = push_array(arena, DW_LineNode, 1); + n->v.file_index = vm_state->file_index; + n->v.line = vm_state->line; + n->v.column = vm_state->column; + n->v.voff = vm_state->address; + + SLLQueuePush(seq->first, seq->last, n); + seq->count += 1; + } + return n; +} + +internal DW_LineTableParseResult +dw_parsed_line_table_from_comp_root(Arena *arena, DW_SectionArray *sections, DW_CompRoot *root) +{ + DW_Mode mode = sections->v[DW_Section_Line].mode; + void *base = dw_base_from_sec(sections, DW_Section_Line); + Rng1U64 line_info_range = dw_range_from_sec(sections, DW_Section_Line); + U64 read_off_start = root->line_off - line_info_range.min; + U64 cursor = read_off_start; + + DW_LineVMHeader vm_header = {0}; + cursor += dw_read_line_vm_header(arena, base, line_info_range, cursor, mode, sections, root, &vm_header); + + //- rjf: prep state for VM + DW_LineVMState vm_state = {0}; + dw_line_vm_reset(&vm_state, vm_header.default_is_stmt); + + //- rjf: VM loop; build output list + DW_LineTableParseResult result = {0}; + B32 end_of_seq = 0; + B32 error = 0; + for (;!error && cursor < vm_header.unit_opl;) { + //- rjf: parse opcode + U8 opcode = 0; + cursor += based_range_read_struct(base, line_info_range, cursor, &opcode); + + //- rjf: do opcode action + switch (opcode) { + default: + { + //- rjf: special opcode case + if(opcode >= vm_header.opcode_base) + { + U32 adjusted_opcode = (U32)(opcode - vm_header.opcode_base); + U32 op_advance = adjusted_opcode / vm_header.line_range; + S32 line_inc = (S32)vm_header.line_base + ((S32)adjusted_opcode) % (S32)vm_header.line_range; + // TODO: can we just call dw_advance_line_vm_state_pc + U64 addr_inc = vm_header.min_inst_len * ((vm_state.op_index+op_advance) / vm_header.max_ops_for_inst); + + vm_state.address += addr_inc; + vm_state.op_index = (vm_state.op_index + op_advance) % vm_header.max_ops_for_inst; + vm_state.line = (U32)((S32)vm_state.line + line_inc); + vm_state.basic_block = 0; + vm_state.prologue_end = 0; + vm_state.epilogue_begin = 0; + vm_state.discriminator = 0; + + dw_push_line(arena, &result, &vm_state, end_of_seq); + end_of_seq = 0; + +#if 0 + // NOTE(rjf): DWARF has dummy lines at the end of groups of line ranges, where we'd like + // to break line info into sequences. + if(vm_state.line == 0) + { + end_of_seq = 1; + } +#endif + } + // Skipping unknown opcode. This is a valid case and + // it works because compiler stores operand lengths. + else + { + if(opcode > 0 && opcode <= vm_header.num_opcode_lens) + { + U8 num_operands = vm_header.opcode_lens[opcode - 1]; + for(U8 i = 0; i < num_operands; ++i) + { + U64 operand = 0; + cursor += based_range_read_uleb128(base, line_info_range, cursor, &operand); + } + } + else + { + error = 1; + goto exit; + } + } + } break; + + //- Standard opcodes + + case DW_StdOpcode_Copy: + { + dw_push_line(arena, &result, &vm_state, end_of_seq); + end_of_seq = 0; + vm_state.discriminator = 0; + vm_state.basic_block = 0; + vm_state.prologue_end = 0; + vm_state.epilogue_begin = 0; + } break; + + case DW_StdOpcode_AdvancePc: + { + U64 advance = 0; + cursor += based_range_read_uleb128(base, line_info_range, cursor, &advance); + dw_line_vm_advance(&vm_state, advance, vm_header.min_inst_len, vm_header.max_ops_for_inst); + } break; + + case DW_StdOpcode_AdvanceLine: + { + S64 s = 0; + cursor += based_range_read_sleb128(base, line_info_range, cursor, &s); + vm_state.line += s; + } break; + + case DW_StdOpcode_SetFile: + { + U64 file_index = 0; + cursor += based_range_read_uleb128(base, line_info_range, cursor, &file_index); + vm_state.file_index = file_index; + } break; + + case DW_StdOpcode_SetColumn: + { + U64 column = 0; + cursor += based_range_read_uleb128(base, line_info_range, cursor, &column); + vm_state.column = column; + } break; + + case DW_StdOpcode_NegateStmt: + { + vm_state.is_stmt = !vm_state.is_stmt; + } break; + + case DW_StdOpcode_SetBasicBlock: + { + vm_state.basic_block = 1; + } break; + + case DW_StdOpcode_ConstAddPc: + { + U64 advance = (0xffu - vm_header.opcode_base)/vm_header.line_range; + dw_line_vm_advance(&vm_state, advance, vm_header.min_inst_len, vm_header.max_ops_for_inst); + } break; + + case DW_StdOpcode_FixedAdvancePc: + { + U16 operand = 0; + cursor += based_range_read_struct(base, line_info_range, cursor, &operand); + vm_state.address += operand; + vm_state.op_index = 0; + } break; + + case DW_StdOpcode_SetPrologueEnd: + { + vm_state.prologue_end = 1; + } break; + + case DW_StdOpcode_SetEpilogueBegin: + { + vm_state.epilogue_begin = 1; + } break; + + case DW_StdOpcode_SetIsa: + { + U64 v = 0; + cursor += based_range_read_uleb128(base, line_info_range, cursor, &v); + vm_state.isa = v; + } break; + + //- Extended opcodes + case DW_StdOpcode_ExtendedOpcode: + { + U64 length = 0; + cursor += based_range_read_uleb128(base, line_info_range, cursor, &length); + U64 start_off = cursor; + U8 extended_opcode = 0; + cursor += based_range_read_struct(base, line_info_range, cursor, &extended_opcode); + + switch (extended_opcode) { + case DW_ExtOpcode_EndSequence: + { + vm_state.end_sequence = 1; + dw_push_line(arena, &result, &vm_state, 0); + dw_line_vm_reset(&vm_state, vm_header.default_is_stmt); + end_of_seq = 1; + } break; + + case DW_ExtOpcode_SetAddress: + { + U64 address = 0; + cursor += based_range_read(base, line_info_range, cursor, root->address_size, &address); + vm_state.address = address; + vm_state.op_index = 0; + vm_state.busted_seq = address != 0; // !(dbg->acceptable_vrange.min <= address && address < dbg->acceptable_vrange.max); + } break; + + case DW_ExtOpcode_DefineFile: + { + String8 file_name = based_range_read_string(base, line_info_range, cursor); + U64 dir_index = 0; + U64 modify_time = 0; + U64 file_size = 0; + cursor += file_name.size + 1; + cursor += based_range_read_uleb128(base, line_info_range, cursor, &dir_index); + cursor += based_range_read_uleb128(base, line_info_range, cursor, &modify_time); + cursor += based_range_read_uleb128(base, line_info_range, cursor, &file_size); + + // TODO(rjf): Not fully implemented. By the DWARF V4 spec, the above is + // all that needs to be parsed, but the rest of the work that needs to + // happen here---allowing this file to be used by further opcodes---is + // not implemented. + // + // See the DWARF V4 spec (June 10, 2010), page 122. + error = 1; + AssertAlways(!"UNHANDLED DEFINE FILE!!!"); + } break; + + case DW_ExtOpcode_SetDiscriminator: + { + U64 v = 0; + cursor += based_range_read_uleb128(base, line_info_range, cursor, &v); + vm_state.discriminator = v; + } break; + + default: break; + } + + U64 num_skip = cursor - (start_off + length); + cursor += num_skip; + if (based_range_ptr(base, line_info_range, cursor) == 0 || start_off + length > cursor) { + error = 1; + } + + } break; + } + } + exit:; + + return result; +} + +internal U64 +dw_read_line_file(void *line_base, Rng1U64 line_rng, U64 line_off, DW_Mode mode, DW_SectionArray *sections, DW_CompRoot *unit, U8 address_size, U64 format_count, Rng1U64 *formats, DW_LineFile *line_file_out) +{ + MemoryZeroStruct(line_file_out); + + DW_AttribValueResolveParams resolve_params = dw_attrib_value_resolve_params_from_comp_root(unit); + U64 line_off_start = line_off; + for (U64 format_idx = 0; format_idx < format_count; ++format_idx) + { + DW_LNCT lnct = (DW_LNCT)formats[format_idx].min; + DW_FormKind form_kind = (DW_FormKind)formats[format_idx].max; + DW_AttribValue form_value = {0}; + line_off += dw_based_range_read_attrib_form_value(line_base, line_rng, line_off, mode, address_size, form_kind, 0, &form_value); + switch (lnct) + { + case DW_LNCT_Path: + { + Assert(form_kind == DW_Form_String || form_kind == DW_Form_LineStrp || + form_kind == DW_Form_Strp || form_kind == DW_Form_StrpSup || + form_kind == DW_Form_Strx || form_kind == DW_Form_Strx1 || + form_kind == DW_Form_Strx2 || form_kind == DW_Form_Strx3 || + form_kind == DW_Form_Strx4); + DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_String, form_value); + line_file_out->file_name = dw_string_from_attrib_value(sections, attrib_value); + } break; + + case DW_LNCT_DirectoryIndex: + { + Assert(form_kind == DW_Form_Data1 || form_kind == DW_Form_Data2 || + form_kind == DW_Form_UData); + DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_Block, form_value); + line_file_out->dir_idx = attrib_value.v[0]; + } break; + + case DW_LNCT_TimeStamp: + { + Assert(form_kind == DW_Form_UData || form_kind == DW_Form_Data4 || + form_kind == DW_Form_Data8 || form_kind == DW_Form_Block); + DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_Const, form_value); + line_file_out->modify_time = attrib_value.v[0]; + } break; + + case DW_LNCT_Size: + { + Assert(form_kind == DW_Form_UData || form_kind == DW_Form_Data1 || + form_kind == DW_Form_Data2 || form_kind == DW_Form_Data4 || + form_kind == DW_Form_Data8); + DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_Block, form_value); + line_file_out->file_size = attrib_value.v[0]; + } break; + + case DW_LNCT_MD5: + { + Assert(form_kind == DW_Form_Data16); + DW_AttribValue attrib_value = dw_attrib_value_from_form_value(sections, resolve_params, form_kind, DW_AttribClass_Block, form_value); + line_file_out->md5_digest[0] = attrib_value.v[0]; + line_file_out->md5_digest[1] = attrib_value.v[1]; + } break; + + default: + { + Assert(DW_LNCT_UserLo < lnct && lnct < DW_LNCT_UserHi); + } break; + } + } + U64 result = line_off - line_off_start; + return result; +} + +internal U64 +dw_read_line_vm_header(Arena *arena, + void *line_base, + Rng1U64 line_rng, + U64 line_off, + DW_Mode mode, + DW_SectionArray *sections, + DW_CompRoot *unit, + DW_LineVMHeader *header_out) +{ + U64 line_off_start = line_off; + + Temp scratch = scratch_begin(&arena, 1); + + //- rjf: parse unit length + header_out->unit_length = 0; + { + line_off += dw_based_range_read_length(line_base, line_rng, line_off, &header_out->unit_length); + } + + header_out->unit_opl = line_off + header_out->unit_length; + + //- rjf: parse version and header length + line_off += based_range_read_struct(line_base, line_rng, line_off, &header_out->version); + + if (header_out->version == DW_Version_5) { + line_off += based_range_read_struct(line_base, line_rng, line_off, &header_out->address_size); + line_off += based_range_read_struct(line_base, line_rng, line_off, &header_out->segment_selector_size); + } + + { + U64 off_size = dw_offset_size_from_mode(mode); + line_off += based_range_read(line_base, line_rng, line_off, off_size, &header_out->header_length); + } + + //- rjf: calculate program offset + header_out->program_off = line_off + header_out->header_length; + + //- rjf: parse minimum instruction length + { + line_off += based_range_read_struct(line_base, line_rng, line_off, &header_out->min_inst_len); + } + + //- rjf: parse max ops for instruction + switch (header_out->version) + { + case DW_Version_5: + case DW_Version_4: + { + line_off += based_range_read_struct(line_base, line_rng, line_off, &header_out->max_ops_for_inst); + Assert(header_out->max_ops_for_inst > 0); + } break; + case DW_Version_3: + case DW_Version_2: + case DW_Version_1: + { + header_out->max_ops_for_inst = 1; + } break; + default: break; + } + + //- rjf: parse rest of program info + based_range_read_struct(line_base, line_rng, line_off + 0 * sizeof(U8), &header_out->default_is_stmt); + based_range_read_struct(line_base, line_rng, line_off + 1 * sizeof(U8), &header_out->line_base); + based_range_read_struct(line_base, line_rng, line_off + 2 * sizeof(U8), &header_out->line_range); + based_range_read_struct(line_base, line_rng, line_off + 3 * sizeof(U8), &header_out->opcode_base); + line_off += 4 * sizeof(U8); + Assert(header_out->opcode_base != 0 && header_out->opcode_base > 0); + + //- rjf: calculate opcode length array + header_out->num_opcode_lens = header_out->opcode_base - 1u; + header_out->opcode_lens = (U8 *)based_range_ptr(line_base, line_rng, line_off); + line_off += header_out->num_opcode_lens * sizeof(U8); + + if (header_out->version == DW_Version_5) { + //- parse directory names + U8 directory_entry_format_count = 0; + line_off += based_range_read_struct(line_base, line_rng, line_off, &directory_entry_format_count); + Assert(directory_entry_format_count == 1); + Rng1U64 *directory_entry_formats = push_array(scratch.arena, Rng1U64, directory_entry_format_count); + for (U8 format_idx = 0; format_idx < directory_entry_format_count; ++format_idx) + { + U64 content_type_code = 0, form_code = 0; + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &content_type_code); + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &form_code); + directory_entry_formats[format_idx] = rng_1u64(content_type_code, form_code); + } + U64 directories_count = 0; + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &directories_count); + header_out->dir_table.count = directories_count; + header_out->dir_table.v = push_array(arena, String8, header_out->dir_table.count); + for (U64 dir_idx = 0; dir_idx < directories_count; ++dir_idx) + { + DW_LineFile line_file; + line_off += dw_read_line_file(line_base, + line_rng, + line_off, + mode, + sections, + unit, + header_out->address_size, + directory_entry_format_count, + directory_entry_formats, + &line_file); + header_out->dir_table.v[dir_idx] = push_str8_copy(arena, line_file.file_name); + } + //- parse file table + U8 file_name_entry_format_count = 0; + line_off += based_range_read_struct(line_base, line_rng, line_off, &file_name_entry_format_count); + Rng1U64 *file_name_entry_formats = push_array(scratch.arena, Rng1U64, file_name_entry_format_count); + for (U8 format_idx = 0; format_idx < file_name_entry_format_count; ++format_idx) + { + U64 content_type_code = 0, form_code = 0; + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &content_type_code); + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &form_code); + file_name_entry_formats[format_idx] = rng_1u64(content_type_code, form_code); + } + U64 file_names_count = 0; + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &file_names_count); + header_out->file_table.count = file_names_count; + header_out->file_table.v = push_array(arena, DW_LineFile, header_out->file_table.count); + for (U64 file_idx = 0; file_idx < file_names_count; ++file_idx) + { + line_off += dw_read_line_file(line_base, + line_rng, + line_off, + mode, + sections, + unit, + header_out->address_size, + file_name_entry_format_count, + file_name_entry_formats, + &header_out->file_table.v[file_idx]); + } + } + else + { + String8List dir_list = {0}; + + str8_list_push(scratch.arena, &dir_list, unit->compile_dir); + for (;;) + { + String8 dir = based_range_read_string(line_base, line_rng, line_off); + line_off += dir.size + 1; + if (dir.size == 0) + { + break; + } + str8_list_push(scratch.arena, &dir_list, dir); + } + + DW_LineVMFileList file_list = {0}; + + //- rjf: push 0-index file (compile file) + { + DW_LineVMFileNode *node = push_array(scratch.arena, DW_LineVMFileNode, 1); + node->file.file_name = unit->name; + SLLQueuePush(file_list.first, file_list.last, node); + file_list.node_count += 1; + } + + for (;;) + { + String8 file_name = based_range_read_string(line_base, line_rng, line_off); + U64 dir_index = 0; + U64 modify_time = 0; + U64 file_size = 0; + line_off += file_name.size + 1; + if (file_name.size == 0) + { + break; + } + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &dir_index); + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &modify_time); + line_off += based_range_read_uleb128(line_base, line_rng, line_off, &file_size); + + DW_LineVMFileNode *node = push_array(scratch.arena, DW_LineVMFileNode, 1); + node->file.file_name = file_name; + node->file.dir_idx = dir_index; + node->file.modify_time = modify_time; + node->file.file_size = file_size; + SLLQueuePush(file_list.first, file_list.last, node); + file_list.node_count += 1; + } + + //- rjf: build dir table + { + header_out->dir_table.count = dir_list.node_count; + header_out->dir_table.v = push_array(arena, String8, header_out->dir_table.count); + String8Node *n = dir_list.first; + for(U64 idx = 0; n != 0 && idx < header_out->dir_table.count; idx += 1, n = n->next) + { + header_out->dir_table.v[idx] = push_str8_copy(arena, n->string); + } + } + + //- rjf: build file table + { + header_out->file_table.count = file_list.node_count; + header_out->file_table.v = push_array(arena, DW_LineFile, header_out->file_table.count); + U64 file_idx = 0; + DW_LineVMFileNode *file_node = file_list.first; + for(; file_node != 0; file_idx += 1, file_node = file_node->next) + { + header_out->file_table.v[file_idx].file_name = push_str8_copy(arena, file_node->file.file_name); + header_out->file_table.v[file_idx].dir_idx = file_node->file.dir_idx; + header_out->file_table.v[file_idx].modify_time = file_node->file.modify_time; + header_out->file_table.v[file_idx].file_size = file_node->file.file_size; + } + } + } + + U64 result = line_off - line_off_start; + + scratch_end(scratch); + return result; +} + diff --git a/src/dwarf/dwarf_parse.h b/src/dwarf/dwarf_parse.h new file mode 100644 index 00000000..f7b8b1b2 --- /dev/null +++ b/src/dwarf/dwarf_parse.h @@ -0,0 +1,491 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef DWARF_PARSE_H +#define DWARF_PARSE_H + +// NOTE(rjf): Some rules about the spaces of offsets and ranges: +// +// - Every stored/passed offset is relative to the base of its section. +// - Every stored/passed range has endpoints relative to the base of their section. +// - Upon calling a syms_based_range_* function, these offsets need to be +// converted into range-relative. + +//////////////////////////////// +//~ rjf: Constants + +#define DWARF_VOID_TYPE_ID 0xffffffffffffffffull + +//////////////////////////////// +//~ rjf: Files + External Debug References + +typedef struct DW_ExtDebugRef DW_ExtDebugRef; +struct DW_ExtDebugRef +{ + // NOTE(rjf): .dwo => an external DWARF V5 .dwo file + String8 dwo_path; + U64 dwo_id; +}; + +//////////////////////////////// +//~ rjf: Abbrev Table + +typedef struct DW_AbbrevTableEntry DW_AbbrevTableEntry; +struct DW_AbbrevTableEntry +{ + U64 id; + U64 off; +}; + +typedef struct DW_AbbrevTable DW_AbbrevTable; +struct DW_AbbrevTable +{ + U64 count; + DW_AbbrevTableEntry *entries; +}; + +//////////////////////////////// +//~ Sections + +typedef struct DW_Section DW_Section; +struct DW_Section +{ + String8 data; + DW_Mode mode; + B32 is_dwo; +}; + +typedef struct DW_SectionArray DW_SectionArray; +struct DW_SectionArray +{ + DW_Section v[DW_Section_Count]; +}; + +//////////////////////////////// +//~ rjf: Basic Line Info + +typedef struct DW_LineFile DW_LineFile; +struct DW_LineFile +{ + String8 file_name; + U64 dir_idx; + U64 modify_time; + U64 md5_digest[2]; + U64 file_size; +}; + +typedef struct DW_LineVMFileNode DW_LineVMFileNode; +struct DW_LineVMFileNode +{ + DW_LineVMFileNode *next; + DW_LineFile file; +}; + +typedef struct DW_LineVMFileList DW_LineVMFileList; +struct DW_LineVMFileList +{ + U64 node_count; + DW_LineVMFileNode *first; + DW_LineVMFileNode *last; +}; + +typedef struct DW_LineVMFileArray DW_LineVMFileArray; +struct DW_LineVMFileArray +{ + U64 count; + DW_LineFile *v; +}; + +//////////////////////////////// +//~ rjf: Abbrevs + +typedef enum DW_AbbrevKind +{ + DW_Abbrev_Null, + DW_Abbrev_Tag, + DW_Abbrev_Attrib, + DW_Abbrev_AttribSequenceEnd, + DW_Abbrev_DIEBegin, + DW_Abbrev_DIEEnd, +} +DW_AbbrevKind; + +typedef U32 DW_AbbrevFlags; +enum{ + DW_AbbrevFlag_HasImplicitConst = (1<<0), + DW_AbbrevFlag_HasChildren = (1<<1), +}; + +typedef struct DW_Abbrev DW_Abbrev; +struct DW_Abbrev +{ + DW_AbbrevKind kind; + Rng1U64 abbrev_range; + U64 sub_kind; + U64 id; + U64 const_value; + DW_AbbrevFlags flags; +}; + +//////////////////////////////// +//~ rjf: Attribs + +typedef struct DW_AttribValueResolveParams DW_AttribValueResolveParams; +struct DW_AttribValueResolveParams +{ + DW_Version version; + DW_Language language; + U64 addr_size; // NOTE(rjf): size in bytes of containing compilation unit's addresses + U64 containing_unit_info_off; // NOTE(rjf): containing compilation unit's offset into the .debug_info section + U64 debug_addrs_base; // NOTE(rjf): containing compilation unit's offset into the .debug_addrs section (DWARF V5 ONLY) + U64 debug_rnglists_base; // NOTE(rjf): containing compilation unit's offset into the .debug_rnglists section (DWARF V5 ONLY) + U64 debug_str_offs_base; // NOTE(rjf): containing compilation unit's offset into the .debug_str_offsets section (DWARF V5 ONLY) + U64 debug_loclists_base; // NOTE(rjf): containing compilation unit's offset into the .debug_loclists section (DWARF V5 ONLY) +}; + +typedef struct DW_AttribValue DW_AttribValue; +struct DW_AttribValue +{ + DW_SectionKind section; + U64 v[2]; +}; + +typedef struct DW_Attrib DW_Attrib; +struct DW_Attrib +{ + U64 info_off; + U64 abbrev_id; + DW_AttribKind attrib_kind; + DW_FormKind form_kind; + DW_AttribClass value_class; + DW_AttribValue form_value; +}; + +typedef struct DW_AttribArray DW_AttribArray; +struct DW_AttribArray +{ + DW_Attrib *v; + U64 count; +}; + +typedef struct DW_AttribNode DW_AttribNode; +struct DW_AttribNode +{ + DW_AttribNode *next; + DW_Attrib attrib; +}; + +typedef struct DW_AttribList DW_AttribList; +struct DW_AttribList +{ + DW_AttribNode *first; + DW_AttribNode *last; + U64 count; +}; + +typedef struct DW_AttribListParseResult DW_AttribListParseResult; +struct DW_AttribListParseResult +{ + DW_AttribList attribs; + U64 max_info_off; + U64 max_abbrev_off; +}; + +//////////////////////////////// +//~ rjf: Compilation Units + Accelerators + +typedef struct DW_CompRoot DW_CompRoot; +struct DW_CompRoot +{ + // NOTE(rjf): Header Data + U64 size; + DW_CompUnitKind kind; + DW_Version version; + DW_Ext ext; + U64 address_size; + U64 abbrev_off; + U64 info_off; + Rng1U64 tags_info_range; + DW_AbbrevTable abbrev_table; + + // NOTE(rjf): [parsed from DWARF attributes] Offsets For More Info (DWARF V5 ONLY) + U64 rnglist_base; // NOTE(rjf): Offset into the .debug_rnglists section where this comp unit's data is. + U64 loclist_base; // NOTE(rjf): Offset into the .debug_loclists section where this comp unit's data is. + U64 addrs_base; // NOTE(rjf): Offset into the .debug_addr section where this comp unit's data is. + U64 stroffs_base; // NOTE(rjf): Offset into the .debug_str_offsets section where this comp unit's data is. + + // NOTE(rjf): [parsed from DWARF attributes] General Info + String8 name; + String8 producer; + String8 compile_dir; + String8 external_dwo_name; + U64 dwo_id; + DW_Language language; + U64 name_case; + B32 use_utf8; + U64 line_off; + U64 low_pc; + U64 high_pc; + DW_AttribValue ranges_attrib_value; + U64 base_addr; + + // NOTE(rjf): Line/File Info For This Comp Unit + String8Array dir_table; + DW_LineVMFileArray file_table; +}; + +//////////////////////////////// +//~ rjf: Tags + +typedef struct DW_Tag DW_Tag; +struct DW_Tag +{ + DW_Tag *next_sibling; + DW_Tag *first_child; + DW_Tag *last_child; + DW_Tag *parent; + Rng1U64 info_range; + Rng1U64 abbrev_range; + B32 has_children; + U64 abbrev_id; + DW_TagKind kind; + U64 attribs_info_off; + U64 attribs_abbrev_off; + DW_AttribList attribs; +}; + +typedef U32 DW_TagStubFlags; +enum +{ + DW_TagStubFlag_HasObjectPointerArg = (1<<0), + DW_TagStubFlag_HasLocation = (1<<1), + DW_TagStubFlag_HasExternal = (1<<2), + DW_TagStubFlag_HasSpecification = (1<<3), +}; + +typedef struct DW_TagStub DW_TagStub; +struct DW_TagStub +{ + U64 info_off; + DW_TagKind kind; + DW_TagStubFlags flags; + U64 children_info_off; + U64 attribs_info_off; + U64 attribs_abbrev_off; + + // NOTE(rjf): DW_Attrib_Specification is tacked onto definitions that + // are filling out more info about a "prototype". That attribute is a reference + // that points back at the declaration tag. The declaration tag has the + // DW_Attrib_Declaration attribute, which is sort of like the reverse + // of that, except there's no reference. So what we're doing here is just storing + // a reference on both, that point back to each other, so it's always easy to + // get from decl => spec, or from spec => decl. + //SYMS_SymbolID ref; + + // NOTE(rjf): DW_Attrib_AbstractOrigin is tacked onto some definitions + // that are used to specify information more specific to inlining, while wanting + // to refer to an "abstract" function DIE, that is not specific to any inline + // sites. The DWARF generator will not duplicate information across these, so + // we will occasionally need to look at an abstract origin to get abstract + // information, like name/linkage-name/etc. + //SYMS_SymbolID abstract_origin; + + U64 _unused_; +}; + +typedef struct DW_TagStubNode DW_TagStubNode; +struct DW_TagStubNode +{ + DW_TagStubNode *next; + DW_TagStub stub; +}; + +typedef struct DW_TagStubList DW_TagStubList; +struct DW_TagStubList +{ + DW_TagStubNode *first; + DW_TagStubNode *last; + U64 count; +}; + +//////////////////////////////// +//~ rjf: Line Info VM Types + +typedef struct DW_LineVMHeader DW_LineVMHeader; +struct DW_LineVMHeader +{ + U64 unit_length; + U64 unit_opl; + U16 version; + U8 address_size; // NOTE(nick): duplicates size from the compilation unit but is needed to support stripped exe that just have .debug_line and .debug_line_str. + U8 segment_selector_size; + U64 header_length; + U64 program_off; + U8 min_inst_len; + U8 max_ops_for_inst; + U8 default_is_stmt; + S8 line_base; + U8 line_range; + U8 opcode_base; + U64 num_opcode_lens; + U8 *opcode_lens; + String8Array dir_table; + DW_LineVMFileArray file_table; +}; + +typedef struct DW_LineVMState DW_LineVMState; +struct DW_LineVMState +{ + U64 address; // NOTE(nick): Address of a machine instruction. + U32 op_index; // NOTE(nick): This is used by the VLIW instructions to indicate index of operation inside the instruction. + + // NOTE(nick): Line table doesn't contain full path to a file, instead + // DWARF encodes path as two indices. First index will point into a directory + // table, and second points into a file name table. + U32 file_index; + + U32 line; + U32 column; + + B32 is_stmt; // NOTE(nick): Indicates that "address" points to place suitable for a breakpoint. + B32 basic_block; // NOTE(nick): Indicates that the "address" is inside a basic block. + + // NOTE(nick): Indicates that "address" points to place where function starts. + // Usually prologue is the place where compiler emits instructions to + // prepare stack for a function. + B32 prologue_end; + + B32 epilogue_begin; // NOTE(nick): Indicates that "address" points to section where function exits and unwinds stack. + U64 isa; // NOTE(nick): Instruction set that is used. + U64 discriminator; // NOTE(nick): Arbitrary id that indicates to which block these instructions belong. + B32 end_sequence; // NOTE(nick): Indicates that "address" points to the first instruction in the instruction block that follows. + + // NOTE(rjf): it looks like LTO might sometimes zero out high PC and low PCs, causing a + // swath of line info to map to a range starting at 0. This causes overlapping ranges + // which we do not want to report. So this B32 will turn on emission. + B32 busted_seq; +}; + +typedef struct DW_Line DW_Line; +struct DW_Line +{ + U64 file_index; + U32 line; + U32 column; + U64 voff; +}; + +typedef struct DW_LineNode DW_LineNode; +struct DW_LineNode +{ + DW_LineNode *next; + DW_Line v; +}; + +typedef struct DW_LineSeqNode DW_LineSeqNode; +struct DW_LineSeqNode +{ + DW_LineSeqNode *next; + U64 count; + DW_LineNode *first; + DW_LineNode *last; +}; + +typedef struct DW_LineTableParseResult DW_LineTableParseResult; +struct DW_LineTableParseResult +{ + U64 seq_count; + DW_LineSeqNode *first_seq; + DW_LineSeqNode *last_seq; +}; + +//////////////////////////////// +//~ rjf: .debug_pubnames and .debug_pubtypes + +typedef struct DW_PubStringsBucket DW_PubStringsBucket; +struct DW_PubStringsBucket +{ + DW_PubStringsBucket *next; + String8 string; + U64 info_off; + U64 cu_info_off; +}; + +typedef struct DW_PubStringsTable DW_PubStringsTable; +struct DW_PubStringsTable +{ + U64 size; + DW_PubStringsBucket **buckets; +}; + +//////////////////////////////// +//~ rjf: Basic Helpers + +internal U64 dw_hash_from_string(String8 string); +internal DW_AttribClass dw_pick_attrib_value_class(DW_Version ver, DW_Ext ext, DW_Language lang, DW_AttribKind attrib, DW_FormKind form_kind); + +//////////////////////////////// +//~ Specific Based Range Helpers + +internal U64 dw_based_range_read_length(void *base, Rng1U64 range, U64 offset, U64 *out_value); +internal U64 dw_based_range_read_abbrev_tag(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev); +internal U64 dw_based_range_read_abbrev_attrib_info(void *base, Rng1U64 range, U64 offset, DW_Abbrev *out_abbrev); +internal U64 dw_based_range_read_attrib_form_value(void *base, Rng1U64 range, U64 offset, DW_Mode mode, U64 address_size, DW_FormKind form_kind, U64 implicit_const, DW_AttribValue *form_value_out); + +internal DW_Mode dw_mode_from_sec(DW_SectionArray *sections, DW_SectionKind kind); +internal B32 dw_sec_is_present(DW_SectionArray *sections, DW_SectionKind kind); +internal void* dw_base_from_sec(DW_SectionArray *sections, DW_SectionKind kind); +internal Rng1U64 dw_range_from_sec(DW_SectionArray *sections, DW_SectionKind kind); + +//////////////////////////////// +//~ rjf: Abbrev Table + +internal DW_AbbrevTable dw_make_abbrev_table(Arena *arena, DW_SectionArray *sections, U64 start_abbrev_off); +internal U64 dw_abbrev_offset_from_abbrev_id(DW_AbbrevTable table, U64 abbrev_id); + +//////////////////////////////// +//~ rjf: Miscellaneous DWARF Section Parsing + +//- rjf: .debug_ranges (DWARF V4) +internal Rng1U64List dw_v4_range_list_from_range_offset(Arena *arena, DW_SectionArray *sections, U64 addr_size, U64 comp_unit_base_addr, U64 range_off); + +//- rjf: .debug_pubtypes + .debug_pubnames (DWARF V4) +internal DW_PubStringsTable dw_v4_pub_strings_table_from_section_kind(Arena *arena, DW_SectionArray *sections, DW_SectionKind section_kind); + +//- rjf: .debug_str_offsets (DWARF V5) +internal U64 dw_v5_offset_from_offs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index); + +//- rjf: .debug_addr (DWARF V5) +internal U64 dw_v5_addr_from_addrs_section_base_index(DW_SectionArray *sections, DW_SectionKind section, U64 base, U64 index); + +//- rjf: .debug_rnglists parsing (DWARF V5) +internal U64 dw_v5_sec_offset_from_rnglist_or_loclist_section_base_index(DW_SectionArray *sections, DW_SectionKind section_kind, U64 base, U64 index); +internal Rng1U64List dw_v5_range_list_from_rnglist_offset(Arena *arena, DW_SectionArray *sections, DW_SectionKind section, U64 addr_size, U64 addr_section_base, U64 offset); + +//////////////////////////////// +//~ rjf: Attrib Value Parsing + +internal DW_AttribValueResolveParams dw_attrib_value_resolve_params_from_comp_root(DW_CompRoot *root); +internal DW_AttribValue dw_attrib_value_from_form_value(DW_SectionArray *sections, DW_AttribValueResolveParams resolve_params, DW_FormKind form_kind, DW_AttribClass value_class, DW_AttribValue form_value); +internal String8 dw_string_from_attrib_value(DW_SectionArray *sections, DW_AttribValue value); +internal Rng1U64List dw_range_list_from_high_low_pc_and_ranges_attrib_value(Arena *arena, DW_SectionArray *sections, U64 address_size, U64 comp_unit_base_addr, U64 addr_section_base, U64 low_pc, U64 high_pc, DW_AttribValue ranges_value); + +//////////////////////////////// +//~ rjf: Tag Parsing + +internal DW_AttribListParseResult dw_parse_attrib_list_from_info_abbrev_offsets(Arena *arena, DW_SectionArray *sections, DW_Version ver, DW_Ext ext, DW_Language lang, U64 address_size, U64 info_off, U64 abbrev_off); +internal DW_Tag* dw_tag_from_info_offset(Arena *arena, DW_SectionArray *sections, DW_AbbrevTable abbrev_table, DW_Version ver, DW_Ext ext, DW_Language lang, U64 address_size, U64 info_offset); +internal DW_TagStub dw_stub_from_tag(DW_SectionArray *sections, DW_AttribValueResolveParams resolve_params, DW_Tag *tag); + +//- rjf: line info +internal void dw_line_vm_reset(DW_LineVMState *state, B32 default_is_stmt); +internal void dw_line_vm_advance(DW_LineVMState *state, U64 advance, U64 min_inst_len, U64 max_ops_for_inst); + +internal DW_LineSeqNode* dw_push_line_seq(Arena* arena, DW_LineTableParseResult *parsed_tbl); +internal DW_LineNode* dw_push_line(Arena *arena, DW_LineTableParseResult *tbl, DW_LineVMState *vm_state, B32 start_of_sequence); +internal DW_LineTableParseResult dw_parsed_line_table_from_comp_root(Arena *arena, DW_SectionArray *sections, DW_CompRoot *root); +internal U64 dw_read_line_file(void *line_base, Rng1U64 line_rng, U64 line_off, DW_Mode mode, DW_SectionArray *sections, DW_CompRoot *unit, U8 address_size, U64 format_count, Rng1U64 *formats, DW_LineFile *line_file_out); +internal U64 dw_read_line_vm_header(Arena *arena, void *line_base, Rng1U64 line_rng, U64 line_off, DW_Mode mode, DW_SectionArray *sections, DW_CompRoot *unit, DW_LineVMHeader *header_out); + +#endif // DWARF_PARSE_H + diff --git a/src/dwarf/dwarf_unwind.c b/src/dwarf/dwarf_unwind.c new file mode 100644 index 00000000..bf86d59f --- /dev/null +++ b/src/dwarf/dwarf_unwind.c @@ -0,0 +1,1157 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +//////////////////////////////// +// x64 Unwind Function + +internal DW_UnwindResult +dw_unwind_x64(String8 raw_text, + String8 raw_eh_frame, + String8 raw_eh_frame_hdr, + Rng1U64 text_vrange, + Rng1U64 eh_frame_vrange, + Rng1U64 eh_frame_hdr_vrange, + U64 default_image_base, + U64 image_base, + U64 stack_pointer, + DW_RegsX64 *regs, + DW_ReadMemorySig *read_memory, + void *read_memory_ud) +{ + // TODO: What if ELF has two sections with instructions and pointer is ecnoded relative to .text2? + Temp scratch = scratch_begin(0, 0); + + DW_UnwindResult result = {0}; + + dw_unwind_init_x64(); + + // rebase + U64 rebase_voff_to_vaddr = (image_base - default_image_base); + + // get ip register values + U64 ip_value = regs->rip; + U64 ip_voff = ip_value - rebase_voff_to_vaddr; + + // check sections + B32 has_needed_sections = (raw_text.size > 0 && raw_eh_frame.size > 0); + if (!has_needed_sections) { + result.is_invalid = 1; + } + + //- get frame info range + void *frame_base = raw_eh_frame.str; + Rng1U64 frame_range = rng_1u64(0, raw_eh_frame.size); + + //- section vaddrs + U64 text_base_vaddr = text_vrange.min + rebase_voff_to_vaddr; + U64 frame_base_voff = text_vrange.min; + U64 data_base_vaddr = eh_frame_hdr_vrange.min + rebase_voff_to_vaddr; + + //- find cfi records + DW_CFIRecords cfi_recs = {0}; + if (has_needed_sections) { + DW_EhPtrCtx ptr_ctx = {0}; + ptr_ctx.raw_base_vaddr = frame_base_voff; + ptr_ctx.text_vaddr = text_base_vaddr; + ptr_ctx.data_vaddr = data_base_vaddr; + ptr_ctx.func_vaddr = 0; + if (raw_eh_frame_hdr.size) { + cfi_recs = dw_unwind_eh_frame_hdr_from_ip_fast_x64(raw_eh_frame, raw_eh_frame_hdr, &ptr_ctx, ip_voff); + } else { + cfi_recs = dw_unwind_eh_frame_cfi_from_ip_slow_x64(raw_eh_frame, &ptr_ctx, ip_voff); + } + } + + //- check cfi records + if (!cfi_recs.valid) { + result.is_invalid = 1; + } + + //- cfi machine setup + DW_CFIMachine machine = {0}; + if (cfi_recs.valid) { + DW_EhPtrCtx ptr_ctx = {0}; + ptr_ctx.raw_base_vaddr = frame_base_voff; + ptr_ctx.text_vaddr = text_base_vaddr; + ptr_ctx.data_vaddr = data_base_vaddr; + ptr_ctx.func_vaddr = cfi_recs.fde.ip_voff_range.min + rebase_voff_to_vaddr; // TODO: it's not super clear how to set up this member, need more test cases + machine = dw_unwind_make_machine_x64(DW_UNWIND_X64__REG_SLOT_COUNT, &cfi_recs.cie, &ptr_ctx); + } + + // initial row + DW_CFIRow *init_row = 0; + if (cfi_recs.valid) { + Rng1U64 init_cfi_range = cfi_recs.cie.cfi_range; + DW_CFIRow *row = dw_unwind_row_alloc_x64(scratch.arena, machine.cells_per_row); + if (dw_unwind_machine_run_to_ip_x64(frame_base, init_cfi_range, &machine, max_U64, row)) { + init_row = row; + } + if (init_row == 0) { + result.is_invalid = 1; + } + } + + // main row + DW_CFIRow *main_row = 0; + if (init_row != 0) { + // upgrade machine with new equipment + dw_unwind_machine_equip_initial_row_x64(&machine, init_row); + dw_unwind_machine_equip_fde_ip_x64(&machine, cfi_recs.fde.ip_voff_range.min); + + // decode main row + Rng1U64 main_cfi_range = cfi_recs.fde.cfi_range; + DW_CFIRow *row = dw_unwind_row_alloc_x64(scratch.arena, machine.cells_per_row); + if (dw_unwind_machine_run_to_ip_x64(frame_base, main_cfi_range, &machine, ip_value, row)) { + main_row = row; + } + if (main_row == 0) { + result.is_invalid = 1; + } + } + + // apply main row to modify the registers + if (main_row != 0) { + result = dw_unwind_x64__apply_frame_rules(raw_eh_frame, main_row, text_base_vaddr, read_memory, read_memory_ud, stack_pointer, regs); + } + + scratch_end(scratch); + return result; +} + +internal DW_UnwindResult +dw_unwind_x64__apply_frame_rules(String8 raw_eh_frame, + DW_CFIRow *row, + U64 text_base_vaddr, + DW_ReadMemorySig *read_memory, + void *read_memory_ud, + U64 stack_pointer, + DW_RegsX64 *regs) +{ + DW_UnwindResult result = {0}; + + U64 missed_read_addr = 0; + + //- setup a dwarf expression machine + DW_ExprMachineConfig dwexpr_config = {0}; + dwexpr_config.max_step_count = 0xFFFF; + dwexpr_config.read_memory = read_memory; + dwexpr_config.read_memory_ud = read_memory_ud; + dwexpr_config.regs = regs; + dwexpr_config.text_section_base = &text_base_vaddr; + + //- compute cfa + U64 cfa = 0; + switch (row->cfa_cell.rule) { + case DW_CFI_CFA_Rule_RegOff: { + // TODO: have we done anything to gaurantee reg_idx here? + U64 reg_idx = row->cfa_cell.reg_idx; + + // is this a roll-over CFA? + B32 is_roll_over_cfa = 0; + if (reg_idx == DW_Reg_x64_Rsp) { + DW_CFIRegisterRule rule = row->cells[reg_idx].rule; + if (rule == DW_CFIRegisterRule_Undefined || rule == DW_CFIRegisterRule_SameValue) { + is_roll_over_cfa = 1; + } + } + + // compute cfa + if (is_roll_over_cfa) { + cfa = stack_pointer + row->cfa_cell.offset; + } else { + cfa = regs->r[reg_idx] + row->cfa_cell.offset; + } + } break; + + case DW_CFI_CFA_Rule_Expr: { + Rng1U64 expr_range = row->cfa_cell.expr; + DW_Location location = dw_expr__eval(0, raw_eh_frame.str, expr_range, &dwexpr_config); + if (location.non_piece_loc.kind == DW_SimpleLocKind_Fail && location.non_piece_loc.fail_kind == DW_LocFailKind_MissingMemory) { + missed_read_addr = location.non_piece_loc.fail_data; + goto error_out; + } + if (location.non_piece_loc.kind == DW_SimpleLocKind_Address) { + cfa = location.non_piece_loc.addr; + } + } break; + } + + // compute registers + { + DW_CFICell *cell = row->cells; + DW_RegsX64 new_regs = {0}; + for (U64 i = 0; i < DW_UNWIND_X64__REG_SLOT_COUNT; ++i, ++cell) { + // compute value + U64 v = 0; + switch (cell->rule) { + default: + { + Assert(!"UNEXPECTED-RULE"); + } break; + + case DW_CFIRegisterRule_Undefined: + { + Assert(!"UNDEFINED"); + } break; + + case DW_CFIRegisterRule_SameValue: + { + v = regs->r[i]; + } break; + + case DW_CFIRegisterRule_Offset: + { + U64 addr = cfa + cell->n; + U64 read_size = read_memory(addr, sizeof(v), &v, read_memory_ud); + if (read_size != sizeof(v)) { + missed_read_addr = addr; + goto error_out; + } + } break; + + case DW_CFIRegisterRule_ValOffset: + { + v = cfa + cell->n; + } break; + + case DW_CFIRegisterRule_Register: + { + v = regs->r[i]; + } break; + + case DW_CFIRegisterRule_Expression: + { + Rng1U64 expr_range = cell->expr; + U64 addr = 0; + DW_Location location = dw_expr__eval(0, raw_eh_frame.str, expr_range, &dwexpr_config); + if (location.non_piece_loc.kind == DW_SimpleLocKind_Fail && location.non_piece_loc.fail_kind == DW_LocFailKind_MissingMemory) { + missed_read_addr = location.non_piece_loc.fail_data; + goto error_out; + } + if (location.non_piece_loc.kind == DW_SimpleLocKind_Address) { + addr = location.non_piece_loc.addr; + } + U64 read_size = read_memory(addr, sizeof(v), &v, read_memory_ud); + if (read_size != sizeof(v)) { + missed_read_addr = addr; + goto error_out; + } + } break; + + case DW_CFIRegisterRule_ValExpression: + { + Rng1U64 expr_range = cell->expr; + DW_Location location = dw_expr__eval(0, raw_eh_frame.str, expr_range, &dwexpr_config); + if (location.non_piece_loc.kind == DW_SimpleLocKind_Fail && location.non_piece_loc.fail_kind == DW_LocFailKind_MissingMemory) { + missed_read_addr = location.non_piece_loc.fail_data; + goto error_out; + } + if (location.non_piece_loc.kind == DW_SimpleLocKind_Address) { + v = location.non_piece_loc.addr; + } + } break; + } + + // commit value to output slot + new_regs.r[i] = v; + } + + // commit all new regs + MemoryCopy(regs, &new_regs, sizeof(new_regs)); + } + + //- save new stack pointer + result.stack_pointer = cfa; + + error_out:; + if (missed_read_addr) { + result.is_invalid = 1; + result.missed_read = 1; + result.missed_read_addr = missed_read_addr; + } + + return result; +} + +//////////////////////////////// +// Helper Functions + +internal void +dw_unwind_init_x64(void) +{ + local_persist B32 did_init = 0; + + if (!did_init) { + did_init = 1; + + // control bits tables + dw_unwind__cfa_control_bits_kind1[DW_CFA_Nop ] = 0x000; + dw_unwind__cfa_control_bits_kind1[DW_CFA_SetLoc ] = 0x809; + dw_unwind__cfa_control_bits_kind1[DW_CFA_AdvanceLoc1 ] = 0x801; + dw_unwind__cfa_control_bits_kind1[DW_CFA_AdvanceLoc2 ] = 0x802; + dw_unwind__cfa_control_bits_kind1[DW_CFA_AdvanceLoc4 ] = 0x804; + dw_unwind__cfa_control_bits_kind1[DW_CFA_OffsetExt ] = 0x2AA; + dw_unwind__cfa_control_bits_kind1[DW_CFA_RestoreExt ] = 0x20A; + dw_unwind__cfa_control_bits_kind1[DW_CFA_Undefined ] = 0x20A; + dw_unwind__cfa_control_bits_kind1[DW_CFA_SameValue ] = 0x20A; + dw_unwind__cfa_control_bits_kind1[DW_CFA_Register ] = 0x6AA; + dw_unwind__cfa_control_bits_kind1[DW_CFA_RememberState ] = 0x000; + dw_unwind__cfa_control_bits_kind1[DW_CFA_RestoreState ] = 0x000; + dw_unwind__cfa_control_bits_kind1[DW_CFA_DefCfa ] = 0x2AA; + dw_unwind__cfa_control_bits_kind1[DW_CFA_DefCfaRegister ] = 0x20A; + dw_unwind__cfa_control_bits_kind1[DW_CFA_DefCfaOffset ] = 0x00A; + dw_unwind__cfa_control_bits_kind1[DW_CFA_DefCfaExpr ] = 0x00A; + dw_unwind__cfa_control_bits_kind1[DW_CFA_Expr ] = 0x2AA; + dw_unwind__cfa_control_bits_kind1[DW_CFA_OffsetExtSf ] = 0x2BA; + dw_unwind__cfa_control_bits_kind1[DW_CFA_DefCfaSf ] = 0x2BA; + dw_unwind__cfa_control_bits_kind1[DW_CFA_DefCfaOffsetSf ] = 0x00B; + dw_unwind__cfa_control_bits_kind1[DW_CFA_ValOffset ] = 0x2AA; + dw_unwind__cfa_control_bits_kind1[DW_CFA_ValOffsetSf ] = 0x2BA; + dw_unwind__cfa_control_bits_kind1[DW_CFA_ValExpr ] = 0x2AA; + + dw_unwind__cfa_control_bits_kind2[DW_CFA_AdvanceLoc >> 6] = 0x800; + dw_unwind__cfa_control_bits_kind2[DW_CFA_Offset >> 6] = 0x10A; + dw_unwind__cfa_control_bits_kind2[DW_CFA_Restore >> 6] = 0x100; + } +} + +internal U64 +dw_unwind_parse_pointer_x64(void *frame_base, Rng1U64 frame_range, DW_EhPtrCtx *ptr_ctx, DW_EhPtrEnc encoding, U64 off, U64 *ptr_out) +{ + // aligned offset + U64 pointer_off = off; + if (encoding == DW_EhPtrEnc_Aligned) { + pointer_off = AlignPow2(off, 8); // TODO: align to 4 bytes when we parse x86 ELF binary + encoding = DW_EhPtrEnc_Ptr; + } + + // decode pointer value + U64 size_param = 0; + U64 after_pointer_off = 0; + U64 raw_pointer = 0; + switch (encoding & DW_EhPtrEnc_TypeMask) { + default:break; + + case DW_EhPtrEnc_Ptr : size_param = 8; goto ufixed; + case DW_EhPtrEnc_UData2: size_param = 2; goto ufixed; + case DW_EhPtrEnc_UData4: size_param = 4; goto ufixed; + case DW_EhPtrEnc_UData8: size_param = 8; goto ufixed; + ufixed: + { + based_range_read(frame_base, frame_range, pointer_off, size_param, &raw_pointer); + after_pointer_off = pointer_off + size_param; + } break; + + // TODO: Signed is actually just a flag that indicates this int is negavite. + // There shouldn't be a read for Signed. + // For instance, (DW_EhPtrEnc_UData2 | DW_EhPtrEnc_Signed) == DW_EhPtrEnc_SData etc. + case DW_EhPtrEnc_Signed:size_param = 8; goto sfixed; + + case DW_EhPtrEnc_SData2:size_param = 2; goto sfixed; + case DW_EhPtrEnc_SData4:size_param = 4; goto sfixed; + case DW_EhPtrEnc_SData8:size_param = 8; goto sfixed; + sfixed: + { + based_range_read(frame_base, frame_range, pointer_off, size_param, &raw_pointer); + after_pointer_off = pointer_off + size_param; + // sign extension + U64 sign_bit = size_param*8 - 1; + if ((raw_pointer >> sign_bit) != 0) { + raw_pointer |= (~(1 << sign_bit)) + 1; + } + } break; + + case DW_EhPtrEnc_ULEB128: + { + U64 size = based_range_read_uleb128(frame_base, frame_range, pointer_off, &raw_pointer); + after_pointer_off = pointer_off + size; + } break; + + case DW_EhPtrEnc_SLEB128: + { + U64 size = based_range_read_sleb128(frame_base, frame_range, pointer_off, + (S64*)&raw_pointer); + after_pointer_off = pointer_off + size; + } break; + } + + // apply relative bases + U64 pointer = raw_pointer; + if (pointer != 0) { + switch (encoding & DW_EhPtrEnc_ModifyMask) { + case DW_EhPtrEnc_PcRel: + { + pointer = ptr_ctx->raw_base_vaddr + frame_range.min + off + raw_pointer; + } break; + case DW_EhPtrEnc_TextRel: + { + pointer = ptr_ctx->text_vaddr + raw_pointer; + } break; + case DW_EhPtrEnc_DataRel: + { + pointer = ptr_ctx->data_vaddr + raw_pointer; + } break; + case DW_EhPtrEnc_FuncRel: + { + Assert(!"TODO: need a sample to verify implementation"); + pointer = ptr_ctx->func_vaddr + raw_pointer; + } break; + } + } + + // return + *ptr_out = pointer; + U64 result = after_pointer_off - off; + return(result); +} + +//- eh_frame parsing + +internal void +dw_unwind_parse_cie_x64(void *base, Rng1U64 range, DW_EhPtrCtx *ptr_ctx, U64 off, DW_CIEUnpacked *cie_out) +{ + MemoryZeroStruct(cie_out); + + // get version + U64 version_off = off; + U8 version = 0; + based_range_read(base, range, version_off, 1, &version); + + // check version + if (version == 1 || version == 3) { + + // read augmentation + U64 augmentation_off = version_off + 1; + String8 augmentation = based_range_read_string(base, range, augmentation_off); + + // read code align + U64 code_align_factor_off = augmentation_off + augmentation.size + 1; + U64 code_align_factor = 0; + U64 code_align_factor_size = based_range_read_uleb128(base, range, code_align_factor_off, &code_align_factor); + + // read data align + U64 data_align_factor_off = code_align_factor_off + code_align_factor_size; + S64 data_align_factor = 0; + U64 data_align_factor_size = based_range_read_sleb128(base, range, data_align_factor_off, &data_align_factor); + + // return address register + U64 ret_addr_reg_off = data_align_factor_off + data_align_factor_size; + U64 after_ret_addr_reg_off = 0; + U64 ret_addr_reg = 0; + if (version == 1) { + based_range_read(base, range, ret_addr_reg_off, 1, &ret_addr_reg); + after_ret_addr_reg_off = ret_addr_reg_off + 1; + } else { + U64 ret_addr_reg_size = based_range_read_uleb128(base, range, ret_addr_reg_off, &ret_addr_reg); + after_ret_addr_reg_off = ret_addr_reg_off + ret_addr_reg_size; + } + + // TODO: + // Handle "eh" param, it indicates presence of EH Data field. + // On 32bit arch it is a 4-byte and on 64-bit 8-byte value. + // Reference: https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html + // Reference doc doesn't clarify structure for EH Data though + + // check for augmentation data + U64 aug_size_off = after_ret_addr_reg_off; + U64 after_aug_size_off = after_ret_addr_reg_off; + B32 has_augmentation_size = 0; + U64 augmentation_size = 0; + if (augmentation.size > 0 && augmentation.str[0] == 'z') { + has_augmentation_size = 1; + U64 aug_size_size = based_range_read_uleb128(base, range, aug_size_off, &augmentation_size); + after_aug_size_off += aug_size_size; + } + + // read augmentation data + U64 aug_data_off = after_aug_size_off; + U64 after_aug_data_off = after_aug_size_off; + + DW_EhPtrEnc lsda_encoding = DW_EhPtrEnc_Omit; + U64 handler_ip = 0; + DW_EhPtrEnc addr_encoding = DW_EhPtrEnc_UData8; + + if (has_augmentation_size > 0) { + U64 aug_data_cursor = aug_data_off; + for (U8 *ptr = augmentation.str + 1, *opl = augmentation.str + augmentation.size; ptr < opl; ++ptr) { + switch (*ptr) { + case 'L': { + based_range_read_struct(base, range, aug_data_cursor, &lsda_encoding); + aug_data_cursor += sizeof(lsda_encoding); + } break; + case 'P': { + DW_EhPtrEnc handler_encoding = DW_EhPtrEnc_Omit; + based_range_read_struct(base, range, aug_data_cursor, &handler_encoding); + + U64 ptr_off = aug_data_cursor + sizeof(handler_encoding); + U64 ptr_size = dw_unwind_parse_pointer_x64(base, range, ptr_ctx, handler_encoding, ptr_off, &handler_ip); + aug_data_cursor = ptr_off + ptr_size; + } break; + case 'R': { + based_range_read_struct(base, range, aug_data_cursor, &addr_encoding); + aug_data_cursor += sizeof(addr_encoding); + } break; + default: { + goto dbl_break_aug; + } break; + } + } + dbl_break_aug:; + after_aug_data_off = aug_data_cursor; + } + + // cfi range + U64 cfi_off = range.min + after_aug_data_off; + U64 cfi_size = 0; + if (range.max > cfi_off) { + cfi_size = range.max - cfi_off; + } + + // commit values to out + cie_out->version = version; + cie_out->lsda_encoding = lsda_encoding; + cie_out->addr_encoding = addr_encoding; + cie_out->has_augmentation_size = has_augmentation_size; + cie_out->augmentation_size = augmentation_size; + cie_out->augmentation = augmentation; + cie_out->code_align_factor = code_align_factor; + cie_out->data_align_factor = data_align_factor; + cie_out->ret_addr_reg = ret_addr_reg; + cie_out->handler_ip = handler_ip; + cie_out->cfi_range.min = cfi_off; + cie_out->cfi_range.max = cfi_off + cfi_size; + } +} + +internal void +dw_unwind_parse_fde_x64(void *base, Rng1U64 range, DW_EhPtrCtx *ptr_ctx, DW_CIEUnpacked *cie, U64 off, DW_FDEUnpacked *fde_out) +{ + // pull out pointer encoding field + DW_EhPtrEnc ptr_enc = cie->addr_encoding; + + // ip first + U64 ip_first_off = off; + U64 ip_first = 0; + U64 ip_first_size = dw_unwind_parse_pointer_x64(base, range, ptr_ctx, ptr_enc, ip_first_off, &ip_first); + + // ip range size + U64 ip_range_size_off = ip_first_off + ip_first_size; + U64 ip_range_size = 0; + U64 ip_range_size_size = dw_unwind_parse_pointer_x64(base, range, ptr_ctx, ptr_enc & DW_EhPtrEnc_TypeMask, ip_range_size_off, &ip_range_size); + + // augmentation data + U64 aug_data_off = ip_range_size_off + ip_range_size_size; + U64 after_aug_data_off = aug_data_off; + U64 lsda_ip = 0; + + if (cie->has_augmentation_size) { + // augmentation size + U64 augmentation_size = 0; + U64 aug_size_size = based_range_read_uleb128(base, range, aug_data_off, &augmentation_size); + U64 after_aug_size_off = aug_data_off + aug_size_size; + + // extract lsda (only thing that can actually be in FDE's augmentation data as far as we know) + DW_EhPtrEnc lsda_encoding = cie->lsda_encoding; + if (lsda_encoding != DW_EhPtrEnc_Omit) { + U64 lsda_off = after_aug_size_off; + dw_unwind_parse_pointer_x64(base, range, ptr_ctx, lsda_encoding, lsda_off, &lsda_ip); + } + + // set offset at end of augmentation data + after_aug_data_off = after_aug_size_off + augmentation_size; + } + + // cfi range + U64 cfi_off = range.min + after_aug_data_off; + U64 cfi_size = 0; + if (range.max > cfi_off) { + cfi_size = range.max - cfi_off; + } + + // commit values to out + fde_out->ip_voff_range.min = ip_first; + fde_out->ip_voff_range.max = ip_first + ip_range_size; + fde_out->lsda_ip = lsda_ip; + fde_out->cfi_range.min = cfi_off; + fde_out->cfi_range.max = cfi_off + cfi_size; +} + +internal DW_CFIRecords +dw_unwind_eh_frame_cfi_from_ip_slow_x64(String8 raw_eh_frame, DW_EhPtrCtx *ptr_ctx, U64 ip_voff) +{ + Temp scratch = scratch_begin(0, 0); + + DW_CFIRecords result = {0}; + + DW_CIEUnpackedNode *cie_first = 0; + DW_CIEUnpackedNode *cie_last = 0; + + U64 cursor = 0; + for (;;) { + // CIE/FDE size + U64 rec_off = cursor; + U64 after_rec_size_off = 0; + U64 rec_size = 0; + + { + str8_deserial_read(raw_eh_frame, rec_off, &rec_size, 4, 1); + after_rec_size_off = 4; + if (rec_size == max_U32) { + str8_deserial_read(raw_eh_frame, rec_off + 4, &rec_size, 8, 1); + after_rec_size_off = 12; + } + } + + // zero size is the end of the loop + if (rec_size == 0) { + break; + } + + // compute end offset + U64 rec_opl = rec_off + after_rec_size_off + rec_size; + + // sub-range the rest of the reads + Rng1U64 rec_range = rng_1u64(rec_off, rec_opl); + String8 raw_rec = str8_substr(raw_eh_frame, rec_range); + + + // discriminator + U64 discrim_off = after_rec_size_off; + U32 discrim = 0; + str8_deserial_read(raw_rec, discrim_off, &discrim, 4, 1); + + U64 after_discrim_off = discrim_off + 4; + + // CIE + if (discrim == 0) { + DW_CIEUnpacked cie = {0}; + dw_unwind_parse_cie_x64(raw_rec.str, rng_1u64(0, raw_rec.size), ptr_ctx, after_discrim_off, &cie); + if (cie.version != 0) { + DW_CIEUnpackedNode *node = push_array(scratch.arena, DW_CIEUnpackedNode, 1); + node->cie = cie; + node->offset = rec_off; + SLLQueuePush(cie_first, cie_last, node); + } + } + // FDE + else { + // compute cie offset + U64 cie_offset = rec_range.min + discrim_off - discrim; + + // get cie node + DW_CIEUnpackedNode *cie_node = 0; + for (DW_CIEUnpackedNode *node = cie_first; node != 0; node = node->next) { + if (node->offset == cie_offset) { + cie_node = node; + break; + } + } + + // parse fde + DW_FDEUnpacked fde = {0}; + if (cie_node != 0) { + dw_unwind_parse_fde_x64(raw_rec.str, rng_1u64(0,raw_rec.size), ptr_ctx, &cie_node->cie, after_discrim_off, &fde); + } + + if (contains_1u64(fde.ip_voff_range, ip_voff)) { + result.valid = 1; + result.cie = cie_node->cie; + result.fde = fde; + break; + } + } + + // advance cursor + cursor = rec_opl; + } + + scratch_end(scratch); + + return(result); +} + +internal U64 +dw_search_eh_frame_hdr_linear_x64(String8 raw_eh_frame_hdr, DW_EhPtrCtx *ptr_ctx, U64 location) +{ + // Table contains only addresses for first instruction in a function and we cannot + // guarantee that result is FDE that corresponds to the input location. + // So input location must be cheked against range from FDE header again. + + U64 closest_location = max_U64; + U64 closest_address = max_U64; + + U64 cursor = 0; + + U8 version = 0; + cursor += str8_deserial_read_struct(raw_eh_frame_hdr, cursor, &version); + + if (version == 1) { +#if 0 + DW_EhPtrCtx ptr_ctx = {0}; + // Set this to base address of .eh_frame_hdr. Entries are relative + // to this section for some reason. + ptr_ctx.data_vaddr = range.min; + // If input location is VMA then set this to address of .text. + // Pointer parsing function will adjust "init_location" to correct VMA. + ptr_ctx.text_vaddr = 0; +#endif + + DW_EhPtrEnc eh_frame_ptr_enc = 0, fde_count_enc = 0, table_enc = 0; + cursor += str8_deserial_read_struct(raw_eh_frame_hdr, cursor, &eh_frame_ptr_enc); + cursor += str8_deserial_read_struct(raw_eh_frame_hdr, cursor, &fde_count_enc); + cursor += str8_deserial_read_struct(raw_eh_frame_hdr, cursor, &table_enc); + + U64 eh_frame_ptr = 0, fde_count = 0; + cursor += dw_unwind_parse_pointer_x64(raw_eh_frame_hdr.str, rng_1u64(0, raw_eh_frame_hdr.size), ptr_ctx, eh_frame_ptr_enc, cursor, &eh_frame_ptr); + cursor += dw_unwind_parse_pointer_x64(raw_eh_frame_hdr.str, rng_1u64(0, raw_eh_frame_hdr.size), ptr_ctx, fde_count_enc, cursor, &fde_count); + + for (U64 fde_idx = 0; fde_idx < fde_count; ++fde_idx) { + U64 init_location = 0, address = 0; + cursor += dw_unwind_parse_pointer_x64(raw_eh_frame_hdr.str, rng_1u64(0, raw_eh_frame_hdr.size), ptr_ctx, table_enc, cursor, &init_location); + cursor += dw_unwind_parse_pointer_x64(raw_eh_frame_hdr.str, rng_1u64(0, raw_eh_frame_hdr.size), ptr_ctx, table_enc, cursor, &address); + + S64 current_delta = (S64)(location - init_location); + S64 closest_delta = (S64)(location - closest_location); + if (0 <= current_delta && current_delta < closest_delta) { + closest_location = init_location; + closest_address = address; + } + } + } + + // address where to find corresponding FDE, this is an absolute offset + // into the image file. + return closest_address; +} + +internal DW_CFIRecords +dw_unwind_eh_frame_hdr_from_ip_fast_x64(String8 raw_eh_frame, String8 raw_eh_frame_hdr, DW_EhPtrCtx *ptr_ctx, U64 ip_voff) +{ + DW_CFIRecords result = {0}; + + // find FDE offset + void *eh_frame_hdr = raw_eh_frame.str; + U64 fde_offset = dw_search_eh_frame_hdr_linear_x64(raw_eh_frame_hdr, ptr_ctx, ip_voff); + + B32 is_fde_offset_valid = (fde_offset != max_U64); + if (is_fde_offset_valid) { + U64 fde_read_offset = (fde_offset - ptr_ctx->raw_base_vaddr); + + // read FDE size + U64 fde_size = 0; + fde_read_offset += dw_based_range_read_length(raw_eh_frame.str, rng_1u64(0,raw_eh_frame.size), fde_read_offset, &fde_size); + + // read FDE discriminator + U32 fde_discrim = 0; + fde_read_offset += str8_deserial_read_struct(raw_eh_frame, fde_read_offset, &fde_discrim); + + // compute parent CIE offset + U64 cie_read_offset = fde_read_offset - (fde_discrim + sizeof(fde_discrim)); + + // read CIE size + U64 cie_size = 0; + cie_read_offset += dw_based_range_read_length(raw_eh_frame.str, rng_1u64(0,raw_eh_frame.size), cie_read_offset, &cie_size); + + // read CIE discriminator + U32 cie_discrim = max_U32; + cie_read_offset += str8_deserial_read_struct(raw_eh_frame, cie_read_offset, &cie_discrim); + + B32 is_fde = (fde_discrim != 0); + B32 is_cie = (cie_discrim == 0); + if (is_fde && is_cie) { + Rng1U64 cie_range = rng_1u64(0, cie_read_offset + (cie_size - sizeof(cie_discrim))); + Rng1U64 fde_range = rng_1u64(0, fde_read_offset + (fde_size - sizeof(fde_discrim))); + + // parse CIE + DW_CIEUnpacked cie = {0}; + dw_unwind_parse_cie_x64(raw_eh_frame.str, cie_range, ptr_ctx, cie_read_offset, &cie); + + // parse FDE + DW_FDEUnpacked fde = {0}; + dw_unwind_parse_fde_x64(raw_eh_frame.str, fde_range, ptr_ctx, &cie, fde_read_offset, &fde); + + // range check instruction pointer + if (contains_1u64(fde.ip_voff_range, ip_voff)) { + result.valid = 1; + result.cie = cie; + result.fde = fde; + } + } + } + + return result; +} + +//- cfi machine + +internal DW_CFIMachine +dw_unwind_make_machine_x64(U64 cells_per_row, DW_CIEUnpacked *cie, DW_EhPtrCtx *ptr_ctx) +{ + DW_CFIMachine result = {0}; + result.cells_per_row = cells_per_row; + result.cie = cie; + result.ptr_ctx = ptr_ctx; + return result; +} + +internal void +dw_unwind_machine_equip_initial_row_x64(DW_CFIMachine *machine, DW_CFIRow *initial_row) +{ + machine->initial_row = initial_row; +} + +internal void +dw_unwind_machine_equip_fde_ip_x64(DW_CFIMachine *machine, U64 fde_ip) +{ + machine->fde_ip = fde_ip; +} + +internal DW_CFIRow* +dw_unwind_row_alloc_x64(Arena *arena, U64 cells_per_row) +{ + DW_CFIRow *result = push_array(arena, DW_CFIRow, 1); + result->cells = push_array(arena, DW_CFICell, cells_per_row); + return result; +} + +internal void +dw_unwind_row_zero_x64(DW_CFIRow *row, U64 cells_per_row) { + MemorySet(row->cells, 0, sizeof(*row->cells)*cells_per_row); + MemoryZeroStruct(&row->cfa_cell); +} + +internal void +dw_unwind_row_copy_x64(DW_CFIRow *dst, DW_CFIRow *src, U64 cells_per_row) +{ + MemoryCopy(dst->cells, src->cells, sizeof(*src->cells)*cells_per_row); + dst->cfa_cell = src->cfa_cell; +} + +internal B32 +dw_unwind_machine_run_to_ip_x64(void *base, Rng1U64 range, DW_CFIMachine *machine, U64 target_ip, DW_CFIRow *row) +{ + Temp scratch = scratch_begin(0, 0); + + B32 result = 0; + + // pull out machine's equipment + DW_CIEUnpacked *cie = machine->cie; + DW_EhPtrCtx *ptr_ctx = machine->ptr_ctx; + U64 cells_per_row = machine->cells_per_row; + DW_CFIRow *initial_row = machine->initial_row; + + // start with an empty stack + DW_CFIRow *stack = 0; + DW_CFIRow *free_rows = 0; + + // initialize the row + if (initial_row != 0) { + dw_unwind_row_copy_x64(row, initial_row, cells_per_row); + } else { + dw_unwind_row_zero_x64(row, cells_per_row); + } + U64 table_ip = machine->fde_ip; + + // loop + U64 cfi_off = 0; + for (;;) { + // op variables + DW_CFA opcode = 0; + U64 operand0 = 0; + U64 operand1 = 0; + U64 operand2 = 0; + DW_CFAControlBits control_bits = 0; + + // decode opcode/operand0 + if (!based_range_read(base, range, cfi_off, 1, &opcode)) { + result = 1; + goto done; + } + if ((opcode & DW_CFAMask_OpcodeHi) != 0) { + operand0 = (opcode & DW_CFAMask_Operand); + opcode = (opcode & DW_CFAMask_OpcodeHi); + control_bits = dw_unwind__cfa_control_bits_kind2[opcode >> 6]; + } else { + if (opcode < DW_CFA_OplKind1) { + control_bits = dw_unwind__cfa_control_bits_kind1[opcode]; + } + } + + // decode operand1/operand2 + U64 decode_cursor = cfi_off + 1; + { + // setup loop ins/outs + U64 o[2]; + DW_CFADecode dec[2] = {0}; + dec[0] = (control_bits & 0xF); + dec[1] = ((control_bits >> 4) & 0xF); + + // loop + U64 *out = o; + for (U64 i = 0; i < 2; i += 1, out += 1) { + DW_CFADecode d = dec[i]; + U64 o_size = 0; + switch (d) { + case 0: { + *out = 0; + } break; + default: { + if (d <= 8) { + based_range_read(base, range, decode_cursor, d, out); + o_size = d; + } + } break; + case DW_CFADecode_Address: { + o_size = dw_unwind_parse_pointer_x64(base, range, ptr_ctx, cie->addr_encoding, decode_cursor, out); + } break; + case DW_CFADecode_ULEB128: { + o_size = based_range_read_uleb128(base, range, decode_cursor, out); + } break; + case DW_CFADecode_SLEB128: { + o_size = based_range_read_sleb128(base, range, decode_cursor, (S64*)out); + } break; + } + decode_cursor += o_size; + } + + // commit out values + operand1 = o[0]; + operand2 = o[1]; + } + U64 after_decode_off = decode_cursor; + + // register checks + if (control_bits & DW_CFAControlBits_IsReg0) { + if (operand0 >= cells_per_row) { + goto done; + } + } + if (control_bits & DW_CFAControlBits_IsReg1) { + if (operand1 >= cells_per_row) { + goto done; + } + } + if (control_bits & DW_CFAControlBits_IsReg2) { + if (operand2 >= cells_per_row) { + goto done; + } + } + + // values for deferred work + U64 new_table_ip = table_ip; + + // step + U64 step_cursor = after_decode_off; + switch (opcode) { + default: goto done; + case DW_CFA_Nop:break; + + //// new row/IP opcodes //// + + case DW_CFA_SetLoc: { + new_table_ip = operand1; + } break; + case DW_CFA_AdvanceLoc: { + new_table_ip = table_ip + operand0*cie->code_align_factor; + } break; + case DW_CFA_AdvanceLoc1: + case DW_CFA_AdvanceLoc2: + case DW_CFA_AdvanceLoc4: { + U64 advance = operand1*cie->code_align_factor; + new_table_ip = table_ip + advance; + } break; + + //// change CFA (canonical frame address) opcodes //// + + case DW_CFA_DefCfa: { + row->cfa_cell.rule = DW_CFI_CFA_Rule_RegOff; + row->cfa_cell.reg_idx = operand1; + row->cfa_cell.offset = operand2; + } break; + + case DW_CFA_DefCfaSf: { + row->cfa_cell.rule = DW_CFI_CFA_Rule_RegOff; + row->cfa_cell.reg_idx = operand1; + row->cfa_cell.offset = ((S64)operand2)*cie->data_align_factor; + } break; + + case DW_CFA_DefCfaRegister: { + // check rule + if (row->cfa_cell.rule != DW_CFI_CFA_Rule_RegOff) { + goto done; + } + // commit new cfa + row->cfa_cell.reg_idx = operand1; + } break; + + case DW_CFA_DefCfaOffset: { + // check rule + if (row->cfa_cell.rule != DW_CFI_CFA_Rule_RegOff) { + goto done; + } + // commit new cfa + row->cfa_cell.offset = operand1; + } break; + + case DW_CFA_DefCfaOffsetSf: { + // check rule + if (row->cfa_cell.rule != DW_CFI_CFA_Rule_RegOff) { + goto done; + } + // commit new cfa + row->cfa_cell.offset = ((S64)operand1)*cie->data_align_factor; + } break; + + case DW_CFA_DefCfaExpr: { + // setup expr range + U64 expr_first = range.min + after_decode_off; + U64 expr_size = operand1; + step_cursor += expr_size; + + // commit new cfa + row->cfa_cell.rule = DW_CFI_CFA_Rule_Expr; + row->cfa_cell.expr.min = expr_first; + row->cfa_cell.expr.max = expr_first + expr_size; + } break; + + + //// change register rules //// + + case DW_CFA_Undefined: { + row->cells[operand1].rule = DW_CFIRegisterRule_Undefined; + } break; + + case DW_CFA_SameValue: { + row->cells[operand1].rule = DW_CFIRegisterRule_SameValue; + } break; + + case DW_CFA_Offset: { + DW_CFICell *cell = &row->cells[operand0]; + cell->rule = DW_CFIRegisterRule_Offset; + cell->n = operand1*cie->data_align_factor; + } break; + + case DW_CFA_OffsetExt: { + DW_CFICell *cell = &row->cells[operand1]; + cell->rule = DW_CFIRegisterRule_Offset; + cell->n = operand2*cie->data_align_factor; + } break; + + case DW_CFA_OffsetExtSf: { + DW_CFICell *cell = &row->cells[operand1]; + cell->rule = DW_CFIRegisterRule_Offset; + cell->n = ((S64)operand2)*cie->data_align_factor; + } break; + + case DW_CFA_ValOffset: { + DW_CFICell *cell = &row->cells[operand1]; + cell->rule = DW_CFIRegisterRule_ValOffset; + cell->n = operand2*cie->data_align_factor; + } break; + + case DW_CFA_ValOffsetSf: { + DW_CFICell *cell = &row->cells[operand1]; + cell->rule = DW_CFIRegisterRule_ValOffset; + cell->n = ((S64)operand2)*cie->data_align_factor; + } break; + + case DW_CFA_Register: { + DW_CFICell *cell = &row->cells[operand1]; + cell->rule = DW_CFIRegisterRule_Register; + cell->n = operand2; + } break; + + case DW_CFA_Expr: { + // setup expr range + U64 expr_first = range.min + after_decode_off; + U64 expr_size = operand2; + step_cursor += expr_size; + + // commit new rule + DW_CFICell *cell = &row->cells[operand1]; + cell->rule = DW_CFIRegisterRule_Expression; + cell->expr.min = expr_first; + cell->expr.max = expr_first + expr_size; + } break; + + case DW_CFA_ValExpr: { + // setup expr range + U64 expr_first = range.min + after_decode_off; + U64 expr_size = operand2; + step_cursor += expr_size; + + // commit new rule + DW_CFICell *cell = &row->cells[operand1]; + cell->rule = DW_CFIRegisterRule_ValExpression; + cell->expr.min = expr_first; + cell->expr.max = expr_first + expr_size; + } break; + + case DW_CFA_Restore: { + // check initial row + if (initial_row == 0) { + goto done; + } + // commit new rule + row->cells[operand0] = initial_row->cells[operand0]; + } break; + + case DW_CFA_RestoreExt: { + // check initial row + if (initial_row == 0) { + goto done; + } + // commit new rule + row->cells[operand1] = initial_row->cells[operand1]; + } break; + + + //// row stack //// + + case DW_CFA_RememberState: { + DW_CFIRow *stack_row = free_rows; + if (stack_row != 0) { + SLLStackPop(free_rows); + } else { + stack_row = dw_unwind_row_alloc_x64(scratch.arena, cells_per_row); + } + dw_unwind_row_copy_x64(stack_row, row, cells_per_row); + SLLStackPush(stack, stack_row); + } break; + + case DW_CFA_RestoreState: { + if (stack != 0) { + DW_CFIRow *stack_row = stack; + SLLStackPop(stack); + dw_unwind_row_copy_x64(row, stack_row, cells_per_row); + SLLStackPush(free_rows, stack_row); + } else { + dw_unwind_row_zero_x64(row, cells_per_row); + } + } break; + } + + // apply location change + if (control_bits & DW_CFAControlBits_NewRow) { + // new ip should always grow the ip + if (new_table_ip <= table_ip) { + goto done; + } + // stop if this encloses the target ip + if (table_ip <= target_ip && target_ip < new_table_ip) { + result = 1; + goto done; + } + // commit new ip + table_ip = new_table_ip; + } + + // advance + cfi_off = step_cursor; + } + done:; + + scratch_end(scratch); + return result; +} + diff --git a/src/dwarf/dwarf_unwind.h b/src/dwarf/dwarf_unwind.h new file mode 100644 index 00000000..8a0018e6 --- /dev/null +++ b/src/dwarf/dwarf_unwind.h @@ -0,0 +1,224 @@ +// Copyright (c) 2024 Epic Games Tools +// Licensed under the MIT license (https://opensource.org/license/mit/) + +#ifndef DWARF_UNWIND_H +#define DWARF_UNWIND_H + +typedef struct DW_UnwindResult +{ + B32 is_invalid; + B32 missed_read; + U64 missed_read_addr; + U64 stack_pointer; +} DW_UnwindResult; + +// EH: Exception Frames + +typedef U8 DW_EhPtrEnc; +enum +{ + DW_EhPtrEnc_TypeMask = 0x0F, + DW_EhPtrEnc_Ptr = 0x00, // Pointer sized unsigned value + DW_EhPtrEnc_ULEB128 = 0x01, // Unsigned LE base-128 value + DW_EhPtrEnc_UData2 = 0x02, // Unsigned 16-bit value + DW_EhPtrEnc_UData4 = 0x03, // Unsigned 32-bit value + DW_EhPtrEnc_UData8 = 0x04, // Unsigned 64-bit value + DW_EhPtrEnc_Signed = 0x08, // Signed pointer + DW_EhPtrEnc_SLEB128 = 0x09, // Signed LE base-128 value + DW_EhPtrEnc_SData2 = 0x0A, // Signed 16-bit value + DW_EhPtrEnc_SData4 = 0x0B, // Signed 32-bit value + DW_EhPtrEnc_SData8 = 0x0C, // Signed 64-bit value +}; + +enum +{ + DW_EhPtrEnc_ModifyMask = 0x70, + DW_EhPtrEnc_PcRel = 0x10, // Value is relative to the current program counter. + DW_EhPtrEnc_TextRel = 0x20, // Value is relative to the .text section. + DW_EhPtrEnc_DataRel = 0x30, // Value is relative to the .got or .eh_frame_hdr section. + DW_EhPtrEnc_FuncRel = 0x40, // Value is relative to the function. + DW_EhPtrEnc_Aligned = 0x50, // Value is aligned to an address unit sized boundary. +}; + +enum +{ + DW_EhPtrEnc_Indirect = 0x80, // This flag indicates that value is stored in virtual memory. + DW_EhPtrEnc_Omit = 0xFF, +}; + +typedef struct DW_EhPtrCtx +{ + U64 raw_base_vaddr; // address where pointer is being read + U64 text_vaddr; // base address of section with instructions (used for encoding pointer on SH and IA64) + U64 data_vaddr; // base address of data section (used for encoding pointer on x86-64) + U64 func_vaddr; // base address of function where IP is located +} DW_EhPtrCtx; + +// CIE: Common Information Entry +typedef struct DW_CIEUnpacked +{ + U8 version; + DW_EhPtrEnc lsda_encoding; + DW_EhPtrEnc addr_encoding; + + B32 has_augmentation_size; + U64 augmentation_size; + String8 augmentation; + + U64 code_align_factor; + S64 data_align_factor; + U64 ret_addr_reg; + + U64 handler_ip; + + Rng1U64 cfi_range; +} DW_CIEUnpacked; + +typedef struct DW_CIEUnpackedNode +{ + struct DW_CIEUnpackedNode *next; + DW_CIEUnpacked cie; + U64 offset; +} DW_CIEUnpackedNode; + +// FDE: Frame Description Entry +typedef struct DW_FDEUnpacked +{ + Rng1U64 ip_voff_range; + U64 lsda_ip; + Rng1U64 cfi_range; +} DW_FDEUnpacked; + +// CFI: Call Frame Information +typedef struct DW_CFIRecords +{ + B32 valid; + DW_CIEUnpacked cie; + DW_FDEUnpacked fde; +} DW_CFIRecords; + +typedef enum DW_CFICFARule{ + DW_CFI_CFA_Rule_RegOff, + DW_CFI_CFA_Rule_Expr, +} DW_CFICFARule; + +typedef struct DW_CFICFACell +{ + DW_CFICFARule rule; + union { + struct { + U64 reg_idx; + S64 offset; + }; + Rng1U64 expr; + }; +} DW_CFICFACell; + +typedef enum DW_CFIRegisterRule +{ + DW_CFIRegisterRule_SameValue, + DW_CFIRegisterRule_Undefined, + DW_CFIRegisterRule_Offset, + DW_CFIRegisterRule_ValOffset, + DW_CFIRegisterRule_Register, + DW_CFIRegisterRule_Expression, + DW_CFIRegisterRule_ValExpression, +} DW_CFIRegisterRule; + +typedef struct DW_CFICell +{ + DW_CFIRegisterRule rule; + union { + S64 n; + Rng1U64 expr; + }; +} DW_CFICell; + +typedef struct DW_CFIRow +{ + struct DW_CFIRow *next; + DW_CFICell *cells; + DW_CFICFACell cfa_cell; +} DW_CFIRow; + +typedef struct DW_CFIMachine +{ + U64 cells_per_row; + DW_CIEUnpacked *cie; + DW_EhPtrCtx *ptr_ctx; + DW_CFIRow *initial_row; + U64 fde_ip; +} DW_CFIMachine; + +typedef U8 DW_CFADecode; +enum +{ + DW_CFADecode_Nop = 0x0, + // 1,2,4,8 reserved for literal byte sizes + DW_CFADecode_Address = 0x9, + DW_CFADecode_ULEB128 = 0xA, + DW_CFADecode_SLEB128 = 0xB, +}; + +typedef U16 DW_CFAControlBits; +enum +{ + DW_CFAControlBits_Dec1Mask = 0x00F, + DW_CFAControlBits_Dec2Mask = 0x0F0, + DW_CFAControlBits_IsReg0 = 0x100, + DW_CFAControlBits_IsReg1 = 0x200, + DW_CFAControlBits_IsReg2 = 0x400, + DW_CFAControlBits_NewRow = 0x800, +}; + +global read_only DW_CFAControlBits dw_unwind__cfa_control_bits_kind1[DW_CFA_OplKind1 + 1]; +global read_only DW_CFAControlBits dw_unwind__cfa_control_bits_kind2[DW_CFA_OplKind2 + 1]; + +// register codes for unwinding match the DW_RegX64 register codes +#define DW_UNWIND_X64__REG_SLOT_COUNT 17 + +//////////////////////////////// +// x64 Unwind Function + +internal DW_UnwindResult +dw_unwind_x64(String8 raw_text, + String8 raw_eh_frame, + String8 raw_eh_frame_header, + Rng1U64 text_vrange, + Rng1U64 eh_frame_vrange, + Rng1U64 eh_frame_header_vrange, + U64 default_image_base, + U64 image_base, + U64 stack_pointer, + DW_RegsX64 *regs, + DW_ReadMemorySig *read_memory, + void *read_memory_ud); + +internal DW_UnwindResult dw_unwind_x64__apply_frame_rules(String8 raw_eh_frame, DW_CFIRow *row, U64 text_base_vaddr, DW_ReadMemorySig *read_memory, void *read_memory_ud, U64 stack_pointer, DW_RegsX64 *regs); + +//////////////////////////////// +// x64 Unwind Helper Functions + +internal void dw_unwind_init_x64(void); +internal U64 dw_unwind_parse_pointer_x64(void *base, Rng1U64 range, DW_EhPtrCtx *ptr_ctx, DW_EhPtrEnc ptr_enc, U64 off, U64 *ptr_out); + +//- eh_frame parsing +internal void dw_unwind_parse_cie_x64(void *base,Rng1U64 range,DW_EhPtrCtx *ptr_ctx, U64 off, DW_CIEUnpacked *cie_out); +internal void dw_unwind_parse_fde_x64(void *base,Rng1U64 range,DW_EhPtrCtx *ptr_ctx, DW_CIEUnpacked *parent_cie, U64 off, DW_FDEUnpacked *fde_out); +internal DW_CFIRecords dw_unwind_eh_frame_cfi_from_ip_slow_x64(String8 raw_eh_frame, DW_EhPtrCtx *ptr_ctx, U64 ip_voff); +internal DW_CFIRecords dw_unwind_eh_frame_hdr_from_ip_fast_x64(String8 raw_eh_frame, String8 raw_eh_frame_hdr, DW_EhPtrCtx *ptr_ctx, U64 ip_voff); + +//- cfi machine + +internal DW_CFIMachine dw_unwind_make_machine_x64(U64 cells_per_row, DW_CIEUnpacked *cie, DW_EhPtrCtx *ptr_ctx); +internal void dw_unwind_machine_equip_initial_row_x64(DW_CFIMachine *machine, DW_CFIRow *initial_row); +internal void dw_unwind_machine_equip_fde_ip_x64(DW_CFIMachine *machine, U64 fde_ip); + +internal DW_CFIRow* dw_unwind_row_alloc_x64(Arena *arena, U64 cells_per_row); +internal void dw_unwind_row_zero_x64(DW_CFIRow *row, U64 cells_per_row); +internal void dw_unwind_row_copy_x64(DW_CFIRow *dst, DW_CFIRow *src, U64 cells_per_row); + +internal B32 dw_unwind_machine_run_to_ip_x64(void *base, Rng1U64 range, DW_CFIMachine *machine, U64 target_ip, DW_CFIRow *row_out); + +#endif // DWARF_UNWIND_H +