kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 3021470e7878a91d1960a1d19527d941d7803b8c
parent 500614de42f086b4c4190fc9d736d50e28ec7b23
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 10:25:08 -0700

Refactor DWARF reader into debug module

Diffstat:
Mdoc/DWARF.md | 31+++++++++++++++++++++----------
Msrc/api/stubs.c | 4++--
Msrc/debug/debug.h | 2+-
Msrc/debug/debug_internal.h | 139+------------------------------------------------------------------------------
Asrc/debug/dwarf_cfi.c | 392+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/debug/dwarf_defs.h | 260+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/debug/dwarf_die.c | 431+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/debug/dwarf_internal.h | 437+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/debug/dwarf_line.c | 611+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/debug/dwarf_loc.c | 369+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/debug/dwarf_open.c | 750+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/debug/dwarf_query.c | 370+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/debug/dwarf_type.c | 509+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/dwarf/dwarf_cfi.c | 437-------------------------------------------------------------------------------
Dsrc/dwarf/dwarf_die.c | 431-------------------------------------------------------------------------------
Dsrc/dwarf/dwarf_internal.h | 622-------------------------------------------------------------------------------
Dsrc/dwarf/dwarf_line.c | 611-------------------------------------------------------------------------------
Dsrc/dwarf/dwarf_loc.c | 380-------------------------------------------------------------------------------
Dsrc/dwarf/dwarf_open.c | 750-------------------------------------------------------------------------------
Dsrc/dwarf/dwarf_query.c | 370-------------------------------------------------------------------------------
Dsrc/dwarf/dwarf_type.c | 509-------------------------------------------------------------------------------
Msrc/link/link_jit.c | 2+-
Mtest/debug/roundtrip_unit.c | 1+
23 files changed, 4156 insertions(+), 4262 deletions(-)

diff --git a/doc/DWARF.md b/doc/DWARF.md @@ -3,10 +3,10 @@ Scope: what it takes for cfree to produce a DWARF-bearing object file and to read DWARF back out of one. The producer side is `Debug` (`src/debug/debug.h`) + the MCEmitter line program; the consumer side -is the `cfree_dwarf_*` family (`include/cfree.h:1224-1450`). Both sides -share `ObjBuilder` as the carrier — debug bytes are sections, abbrev -codes are interned, and DIE references are section-relative -relocations. +is the `cfree_dwarf_*` family (`include/cfree.h:1224-1450`), implemented +by the `src/debug/dwarf_*.c` reader. Both sides share `ObjBuilder` as +the carrier — debug bytes are sections, abbrev codes are interned, and +DIE references are section-relative relocations. Today the headers are real, the implementations are stubs, and the W path in `test/cg/run.sh` is staged and waiting for them. The first @@ -248,12 +248,21 @@ lifter is the parser, the host backend is the backend. src/debug/ debug.h (existing) c_debug.h (existing) + dwarf_defs.h shared DWARF wire-format constants, no state debug.c NEW: state, type DIEs, func/scope/var, line program debug_emit.c NEW: linearize to .debug_* sections in ObjBuilder debug_abbrev.c NEW: abbrev pool, dedup, encode debug_form.c NEW: form encoders (LEB128, strx, addrx, sec_offset) debug_eh.c NEW: .eh_frame CIE+FDE assembler (Phase 4) c_debug.c NEW: c_debug_type adapter + Type* → DebugTypeId cache + dwarf_internal.h reader-private state + dwarf_open.c cfree_dwarf_open/close, sections, forms, abbrevs + dwarf_die.c DIE walking for subprograms, locals, globals + dwarf_line.c .debug_line decoder and line queries + dwarf_query.c public variable/subprogram query entry points + dwarf_type.c type DIE resolution + dwarf_loc.c location expression and loclist evaluator + dwarf_cfi.c CFI machine and unwind step ``` State held by `Debug`: @@ -664,12 +673,14 @@ direction — CG using Debug — is fine and already in `cg.h`. ### Consumer / producer separation -`src/debug/` and the consumer must not share state types behind the -public API. The consumer reads bytes; the producer writes bytes; the -public DWARF wire format is the only contract between them. Concretely: -no `#include "debug/debug.h"` from the consumer module, and no -`#include "debug/consumer.h"` from `debug.c`. This is what lets -`test/debug/` self-roundtrip catch encoder bugs. +The producer and consumer are colocated under `src/debug/`, but they +must not share state types behind the public API. The consumer reads +bytes; the producer writes bytes; the public DWARF wire format is the +only contract between them. `debug/dwarf_defs.h` is the allowed shared +header because it contains only numeric wire-format constants. +Concretely: no `#include "debug/debug.h"` from `dwarf_*.c`, and no +`#include "debug/dwarf_internal.h"` from the producer files. This is +what lets `test/debug/` self-roundtrip catch encoder bugs. --- diff --git a/src/api/stubs.c b/src/api/stubs.c @@ -96,8 +96,8 @@ ObjBuilder* read_wasm(Compiler* c, const char* n, const u8* d, size_t l) { /* JIT session implementation lives in src/dbg/ (session.c, bp.c, step.c, * displaced.c, arch_aa64.c, mem.c). */ -/* DWARF consumer: the cfree_dwarf_* implementations live in src/dwarf/. - * Their stubs were removed when src/dwarf/dwarf_*.c took ownership of +/* DWARF consumer: the cfree_dwarf_* implementations live in src/debug/. + * Their stubs were removed when src/debug/dwarf_*.c took ownership of * the symbols. */ /* Emulator (cfree emu) lives under src/emu/ — cfree_emu_run / new / diff --git a/src/debug/debug.h b/src/debug/debug.h @@ -5,7 +5,7 @@ #include "core/core.h" /* DWARF debug info. The producer side (CG, CGTarget/MCEmitter, opt) feeds - * events here as compilation runs; the consumer side writes .debug_* + * events here as compilation runs; the emitter side writes .debug_* * sections into the same ObjBuilder when debug_emit is called. * * Type DIEs are addressed through opaque DebugTypeId handles. The core diff --git a/src/debug/debug_internal.h b/src/debug/debug_internal.h @@ -8,147 +8,10 @@ #include "core/core.h" #include "core/heap.h" #include "debug/debug.h" +#include "debug/dwarf_defs.h" #include "obj/obj.h" /* ---------------------------------------------------------------- */ -/* DWARF wire-format constants used by the producer. - * Subset of dwarf.h; we only declare what we emit. */ - -/* Tags */ -#define DW_TAG_array_type 0x01 -#define DW_TAG_enumeration_type 0x04 -#define DW_TAG_formal_parameter 0x05 -#define DW_TAG_lexical_block 0x0b -#define DW_TAG_member 0x0d -#define DW_TAG_pointer_type 0x0f -#define DW_TAG_compile_unit 0x11 -#define DW_TAG_structure_type 0x13 -#define DW_TAG_subroutine_type 0x15 -#define DW_TAG_typedef 0x16 -#define DW_TAG_union_type 0x17 -#define DW_TAG_unspecified_parameters 0x18 -#define DW_TAG_subrange_type 0x21 -#define DW_TAG_base_type 0x24 -#define DW_TAG_const_type 0x26 -#define DW_TAG_enumerator 0x28 -#define DW_TAG_subprogram 0x2e -#define DW_TAG_variable 0x34 -#define DW_TAG_volatile_type 0x35 -#define DW_TAG_restrict_type 0x37 - -/* Children flag */ -#define DW_CHILDREN_no 0 -#define DW_CHILDREN_yes 1 - -/* Attributes */ -#define DW_AT_sibling 0x01 -#define DW_AT_location 0x02 -#define DW_AT_name 0x03 -#define DW_AT_byte_size 0x0b -#define DW_AT_bit_offset 0x0c -#define DW_AT_bit_size 0x0d -#define DW_AT_stmt_list 0x10 -#define DW_AT_low_pc 0x11 -#define DW_AT_high_pc 0x12 -#define DW_AT_language 0x13 -#define DW_AT_comp_dir 0x1b -#define DW_AT_const_value 0x1c -#define DW_AT_upper_bound 0x2f -#define DW_AT_producer 0x25 -#define DW_AT_prototyped 0x27 -#define DW_AT_decl_file 0x3a -#define DW_AT_decl_line 0x3b -#define DW_AT_encoding 0x3e -#define DW_AT_external 0x3f -#define DW_AT_frame_base 0x40 -#define DW_AT_count 0x37 -#define DW_AT_data_member_location 0x38 -#define DW_AT_type 0x49 -#define DW_AT_ranges 0x55 -#define DW_AT_addr_base 0x73 -#define DW_AT_rnglists_base 0x74 -#define DW_AT_str_offsets_base 0x72 -#define DW_AT_loclists_base 0x8c - -/* Forms */ -#define DW_FORM_addr 0x01 -#define DW_FORM_block2 0x03 -#define DW_FORM_block4 0x04 -#define DW_FORM_data2 0x05 -#define DW_FORM_data4 0x06 -#define DW_FORM_data8 0x07 -#define DW_FORM_string 0x08 -#define DW_FORM_block 0x09 -#define DW_FORM_block1 0x0a -#define DW_FORM_data1 0x0b -#define DW_FORM_flag 0x0c -#define DW_FORM_sdata 0x0d -#define DW_FORM_udata 0x0f -#define DW_FORM_ref_addr 0x10 -#define DW_FORM_ref4 0x13 -#define DW_FORM_sec_offset 0x17 -#define DW_FORM_exprloc 0x18 -#define DW_FORM_flag_present 0x19 -#define DW_FORM_strx 0x1a -#define DW_FORM_addrx 0x1b -#define DW_FORM_ref_sup4 0x1c -#define DW_FORM_strp_sup 0x1d -#define DW_FORM_loclistx 0x22 -#define DW_FORM_rnglistx 0x23 -#define DW_FORM_strx1 0x26 -#define DW_FORM_strx2 0x27 -#define DW_FORM_strx3 0x28 -#define DW_FORM_strx4 0x29 -#define DW_FORM_line_strp 0x1f - -/* Languages (DWARF 5) */ -#define DW_LANG_C11 0x001d -#define DW_LANG_C17 0x002c - -/* Base type encodings */ -#define DW_ATE_address 0x01 -#define DW_ATE_boolean 0x02 -#define DW_ATE_float 0x04 -#define DW_ATE_signed 0x05 -#define DW_ATE_signed_char 0x06 -#define DW_ATE_unsigned 0x07 -#define DW_ATE_unsigned_char 0x08 -#define DW_ATE_UTF 0x10 - -/* Line program */ -#define DW_LNS_copy 0x01 -#define DW_LNS_advance_pc 0x02 -#define DW_LNS_advance_line 0x03 -#define DW_LNS_set_file 0x04 -#define DW_LNS_set_column 0x05 -#define DW_LNS_negate_stmt 0x06 -#define DW_LNS_set_basic_block 0x07 -#define DW_LNS_const_add_pc 0x08 -#define DW_LNS_fixed_advance_pc 0x09 -#define DW_LNE_end_sequence 0x01 -#define DW_LNE_set_address 0x02 -#define DW_LNCT_path 0x01 -#define DW_LNCT_directory_index 0x02 - -/* Range-list opcodes */ -#define DW_RLE_end_of_list 0x00 -#define DW_RLE_start_length 0x07 -#define DW_RLE_offset_pair 0x04 - -/* DWARF expression ops */ -#define DW_OP_addr 0x03 -#define DW_OP_const1u 0x08 -#define DW_OP_consts 0x11 -#define DW_OP_reg0 0x50 -#define DW_OP_breg0 0x70 -#define DW_OP_regx 0x90 -#define DW_OP_fbreg 0x91 -#define DW_OP_call_frame_cfa 0x9c - -/* Unit types */ -#define DW_UT_compile 0x01 - -/* ---------------------------------------------------------------- */ /* Type DIE pool */ typedef enum DebugTypeKind { diff --git a/src/debug/dwarf_cfi.c b/src/debug/dwarf_cfi.c @@ -0,0 +1,392 @@ +/* dwarf_cfi.c — CFI machine + cfree_dwarf_unwind_step. + * + * Per doc/DWARF.md §4.5: walk .eh_frame from the highest-address end + * (CIEs first), run the FDE program for the FDE whose + * (initial_location, address_range) covers frame->pc. Output mutates + * frame->pc, frame->cfa, and caller-saved register slots. + * + * Status: minimal Phase-4 implementation. Decodes the FDE that covers + * `frame->pc` and applies a small subset of CFA opcodes sufficient for + * the aarch64 frame-pointer prologues the producer emits today. Returns + * 1 (no caller info) if no FDE matches or the section is empty — + * callers must treat 1 as "stack bottom" per the API contract. + */ + +#include <cfree.h> +#include <stdint.h> +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" +#include "debug/dwarf_internal.h" + +#define CFI_REG_MAX 32 + +typedef struct CfiRule { + /* 0=undefined, 1=offset(cfa+N), 2=register(R), 3=same_value */ + u8 kind; + i64 offset; + u32 reg; +} CfiRule; + +typedef struct CfiState { + /* CFA: cfa = regs[reg] + offset (kind 0), or expression (kind 1). */ + int cfa_kind; /* 0 = reg+offset; 1 = expression (unhandled) */ + u32 cfa_reg; + i64 cfa_offset; + CfiRule rules[CFI_REG_MAX]; + i32 code_align; + i32 data_align; + u32 return_reg; +} CfiState; + +static u64 read_eh_ptr(const u8* base, u32 size, u32* off, u8 enc) { + u64 v = 0; + switch (enc & 0x0f) { + case DW_EH_PE_absptr: + case DW_EH_PE_udata8: + v = dw_u64(base, size, off); + break; + case DW_EH_PE_uleb128: + v = dw_uleb(base, size, off); + break; + case DW_EH_PE_udata2: + v = dw_u16(base, size, off); + break; + case DW_EH_PE_udata4: + v = dw_u32(base, size, off); + break; + case DW_EH_PE_sleb128: + v = (u64)dw_sleb(base, size, off); + break; + case DW_EH_PE_sdata2: + v = (u64)(i64)(i16)dw_u16(base, size, off); + break; + case DW_EH_PE_sdata4: + v = (u64)(i64)(i32)dw_u32(base, size, off); + break; + case DW_EH_PE_sdata8: + v = (u64)dw_u64(base, size, off); + break; + default: + break; + } + return v; +} + +static void run_cfi(const u8* prog, u32 plen, CfiState* st, u64* loc, + u64 stop_pc) { + u32 off = 0; + while (off < plen) { + u8 op = prog[off++]; + u8 hi = op & 0xc0; + u8 lo = op & 0x3f; + if (hi == DW_CFA_advance_loc) { + *loc += (u64)lo * (u64)st->code_align; + if (*loc > stop_pc) return; + continue; + } + if (hi == DW_CFA_offset) { + u64 fac = dw_uleb(prog, plen, &off); + if (lo < CFI_REG_MAX) { + st->rules[lo].kind = 1; + st->rules[lo].offset = (i64)fac * (i64)st->data_align; + } + continue; + } + if (hi == DW_CFA_restore) { + if (lo < CFI_REG_MAX) st->rules[lo].kind = 0; + continue; + } + switch (op) { + case DW_CFA_nop: + break; + case DW_CFA_advance_loc1: { + u8 v = dw_u8(prog, plen, &off); + *loc += (u64)v * (u64)st->code_align; + if (*loc > stop_pc) return; + } break; + case DW_CFA_advance_loc2: { + u16 v = dw_u16(prog, plen, &off); + *loc += (u64)v * (u64)st->code_align; + if (*loc > stop_pc) return; + } break; + case DW_CFA_advance_loc4: { + u32 v = dw_u32(prog, plen, &off); + *loc += (u64)v * (u64)st->code_align; + if (*loc > stop_pc) return; + } break; + case DW_CFA_set_loc: + *loc = dw_u64(prog, plen, &off); + if (*loc > stop_pc) return; + break; + case DW_CFA_def_cfa: { + u64 r = dw_uleb(prog, plen, &off); + u64 o = dw_uleb(prog, plen, &off); + st->cfa_kind = 0; + st->cfa_reg = (u32)r; + st->cfa_offset = (i64)o; + } break; + case DW_CFA_def_cfa_register: { + u64 r = dw_uleb(prog, plen, &off); + st->cfa_reg = (u32)r; + } break; + case DW_CFA_def_cfa_offset: { + u64 o = dw_uleb(prog, plen, &off); + st->cfa_offset = (i64)o; + } break; + case DW_CFA_def_cfa_sf: { + u64 r = dw_uleb(prog, plen, &off); + i64 o = dw_sleb(prog, plen, &off); + st->cfa_kind = 0; + st->cfa_reg = (u32)r; + st->cfa_offset = o * st->data_align; + } break; + case DW_CFA_def_cfa_offset_sf: { + i64 o = dw_sleb(prog, plen, &off); + st->cfa_offset = o * st->data_align; + } break; + case DW_CFA_offset_extended: { + u64 r = dw_uleb(prog, plen, &off); + u64 fac = dw_uleb(prog, plen, &off); + if (r < CFI_REG_MAX) { + st->rules[r].kind = 1; + st->rules[r].offset = (i64)fac * (i64)st->data_align; + } + } break; + case DW_CFA_offset_extended_sf: { + u64 r = dw_uleb(prog, plen, &off); + i64 fac = dw_sleb(prog, plen, &off); + if (r < CFI_REG_MAX) { + st->rules[r].kind = 1; + st->rules[r].offset = fac * st->data_align; + } + } break; + case DW_CFA_register: { + u64 r1 = dw_uleb(prog, plen, &off); + u64 r2 = dw_uleb(prog, plen, &off); + if (r1 < CFI_REG_MAX) { + st->rules[r1].kind = 2; + st->rules[r1].reg = (u32)r2; + } + } break; + case DW_CFA_undefined: { + u64 r = dw_uleb(prog, plen, &off); + if (r < CFI_REG_MAX) st->rules[r].kind = 0; + } break; + case DW_CFA_same_value: { + u64 r = dw_uleb(prog, plen, &off); + if (r < CFI_REG_MAX) st->rules[r].kind = 3; + } break; + case DW_CFA_remember_state: + case DW_CFA_restore_state: + /* Not modelled — would need a state stack. Best-effort: skip. */ + break; + case DW_CFA_def_cfa_expression: { + u64 n = dw_uleb(prog, plen, &off); + off += (u32)n; + st->cfa_kind = 1; /* expression — we can't evaluate without frame */ + } break; + case DW_CFA_expression: + case DW_CFA_val_expression: { + (void)dw_uleb(prog, plen, &off); + { + u64 n = dw_uleb(prog, plen, &off); + off += (u32)n; + } + } break; + case DW_CFA_val_offset: { + (void)dw_uleb(prog, plen, &off); + (void)dw_uleb(prog, plen, &off); + } break; + case DW_CFA_val_offset_sf: { + (void)dw_uleb(prog, plen, &off); + (void)dw_sleb(prog, plen, &off); + } break; + default: + return; /* unknown opcode — bail */ + } + } +} + +int cfree_dwarf_unwind_step(CfreeDebugInfo* d, CfreeUnwindFrame* frame) { + u32 off; + if (!d || !frame) return 1; + if (d->eh_frame.sec_idx == UINT32_MAX || d->eh_frame.size == 0) return 1; + /* Sweep .eh_frame entries, locating the FDE that covers frame->pc. */ + off = 0; + while (off < d->eh_frame.size) { + u32 length = dw_u32(d->eh_frame.data, d->eh_frame.size, &off); + u32 entry_end; + u32 cie_id_off = off; + u32 cie_id; + if (length == 0) break; /* terminator */ + if (length == 0xffffffffu) return 1; /* 64-bit eh_frame unsupported */ + entry_end = off + length; + cie_id = dw_u32(d->eh_frame.data, d->eh_frame.size, &off); + if (cie_id == 0) { + /* CIE — skip body; we'll re-read on demand when its FDEs reference it. */ + off = entry_end; + continue; + } + { + /* FDE: cie_id is a backwards offset to the CIE. */ + u32 cie_pointer_pos = cie_id_off; /* offset of the cie_id field */ + u32 cie_start = cie_pointer_pos - cie_id; + u32 cie_off, cie_len, cie_ver; + const char* aug; + u8 fde_pe = DW_EH_PE_absptr; + i32 code_align; + i32 data_align; + u32 return_reg; + u32 cie_id_at_cie; + u32 cie_aug_data_len = 0; + u8 has_aug_data = 0; + u32 cie_inst_off, cie_inst_end; + u64 fde_pc; + u64 fde_range; + CfiState st; + + /* Parse CIE header. */ + cie_off = cie_start; + cie_len = dw_u32(d->eh_frame.data, d->eh_frame.size, &cie_off); + (void)cie_len; + cie_id_at_cie = dw_u32(d->eh_frame.data, d->eh_frame.size, &cie_off); + (void)cie_id_at_cie; /* should be 0 */ + cie_ver = dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); + if (cie_ver != 1 && cie_ver != 3 && cie_ver != 4) { + off = entry_end; + continue; + } + aug = dw_cstr(d->eh_frame.data, d->eh_frame.size, &cie_off); + if (cie_ver == 4) { + (void)dw_u8(d->eh_frame.data, d->eh_frame.size, + &cie_off); /* address_size */ + (void)dw_u8(d->eh_frame.data, d->eh_frame.size, + &cie_off); /* segment_size */ + } + code_align = (i32)dw_uleb(d->eh_frame.data, d->eh_frame.size, &cie_off); + data_align = (i32)dw_sleb(d->eh_frame.data, d->eh_frame.size, &cie_off); + if (cie_ver == 1) { + return_reg = dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); + } else { + return_reg = (u32)dw_uleb(d->eh_frame.data, d->eh_frame.size, &cie_off); + } + /* Parse augmentation. */ + { + const char* a = aug; + if (a && a[0] == 'z') { + cie_aug_data_len = + (u32)dw_uleb(d->eh_frame.data, d->eh_frame.size, &cie_off); + has_aug_data = 1; + (void)cie_aug_data_len; + a++; + while (*a) { + switch (*a) { + case 'R': + fde_pe = dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); + break; + case 'P': { + u8 enc = dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); + (void)read_eh_ptr(d->eh_frame.data, d->eh_frame.size, &cie_off, + enc); + } break; + case 'L': + (void)dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); + break; + case 'S': + case 'B': + break; + default: + break; + } + a++; + } + } else if (a && a[0] != 0) { + /* Unknown augmentation chars without 'z' — bail. */ + off = entry_end; + continue; + } + } + cie_inst_off = cie_off; + /* CIE body extends to entry_start of CIE plus 4 + cie_len. We already + * consumed length+id, so the upper bound is cie_start + 4 + cie_len. */ + cie_inst_end = cie_start + 4 + cie_len; + (void)has_aug_data; + + /* Run CIE initial instructions. */ + memset(&st, 0, sizeof(st)); + st.code_align = code_align; + st.data_align = data_align; + st.return_reg = return_reg; + run_cfi(d->eh_frame.data + cie_inst_off, + cie_inst_end > cie_inst_off ? cie_inst_end - cie_inst_off : 0, + &st, &(u64){0}, ~(u64)0); + + /* Parse FDE pc, range. */ + { + u32 pc_off = off; + fde_pc = read_eh_ptr(d->eh_frame.data, d->eh_frame.size, &off, fde_pe); + if ((fde_pe & 0xf0) == DW_EH_PE_pcrel) { + /* pcrel: address is relative to the location of the encoded + * pointer itself within the section. We interpret as offset from + * pc_off. The runtime address is unknown to us absent a base — + * for an unrelocated obj, just keep the relative value. */ + fde_pc += pc_off; /* relative-to-section-offset best-effort */ + } + fde_range = read_eh_ptr(d->eh_frame.data, d->eh_frame.size, &off, + fde_pe & 0x0f); + } + /* Skip FDE augmentation data if CIE's z aug was set. */ + if (has_aug_data) { + u64 aug_len = dw_uleb(d->eh_frame.data, d->eh_frame.size, &off); + off += (u32)aug_len; + } + if (frame->pc < fde_pc || frame->pc >= fde_pc + fde_range) { + off = entry_end; + continue; + } + /* Run FDE instructions up to frame->pc. */ + { + u64 loc = fde_pc; + u32 fde_inst_off = off; + u32 fde_inst_end = entry_end; + run_cfi(d->eh_frame.data + fde_inst_off, + fde_inst_end > fde_inst_off ? fde_inst_end - fde_inst_off : 0, + &st, &loc, frame->pc); + } + /* Compute caller frame. */ + if (st.cfa_kind != 0 || st.cfa_reg >= 32) return 1; + { + u64 cfa = frame->regs[st.cfa_reg] + (u64)st.cfa_offset; + u32 r; + u64 ret_addr = 0; + /* For each register with a rule, we'd read CFA-relative memory to + * recover its caller value. Without a memory provider we can't + * actually load — leave registers as-is and just update cfa/pc. + * The return address sits in the rule for st.return_reg. If + * undefined, we're at the bottom. */ + if (st.return_reg < CFI_REG_MAX && st.rules[st.return_reg].kind == 1) { + /* ret_addr = *(cfa + offset) — but we have no JIT session here. + * Caller-supplied frames typically include enough register state + * that the harness already captured x30. We treat "undefined" + * as bottom-of-stack. */ + ret_addr = 0; + } else if (st.return_reg < 32 && st.rules[st.return_reg].kind == 2) { + ret_addr = frame->regs[st.rules[st.return_reg].reg]; + } else { + return 1; /* bottom of stack */ + } + frame->cfa = cfa; + frame->pc = ret_addr; + for (r = 0; r < 32; ++r) { + /* Without memory access we can't load offset rules; leave the + * register value unchanged (best-effort). */ + (void)r; + } + } + return 0; + } + } + return 1; +} diff --git a/src/debug/dwarf_defs.h b/src/debug/dwarf_defs.h @@ -0,0 +1,260 @@ +#ifndef CFREE_DWARF_DEFS_H +#define CFREE_DWARF_DEFS_H + +/* Shared DWARF wire-format constants. + * + * This header is intentionally limited to numeric encodings from the DWARF + * format. Producer and reader internals may both include it, but neither side + * should expose or share implementation state through it. + */ + +/* DW_TAG */ +#define DW_TAG_array_type 0x01 +#define DW_TAG_class_type 0x02 +#define DW_TAG_enumeration_type 0x04 +#define DW_TAG_formal_parameter 0x05 +#define DW_TAG_lexical_block 0x0b +#define DW_TAG_member 0x0d +#define DW_TAG_pointer_type 0x0f +#define DW_TAG_reference_type 0x10 +#define DW_TAG_compile_unit 0x11 +#define DW_TAG_structure_type 0x13 +#define DW_TAG_subroutine_type 0x15 +#define DW_TAG_typedef 0x16 +#define DW_TAG_union_type 0x17 +#define DW_TAG_unspecified_parameters 0x18 +#define DW_TAG_inheritance 0x1c +#define DW_TAG_inlined_subroutine 0x1d +#define DW_TAG_subrange_type 0x21 +#define DW_TAG_base_type 0x24 +#define DW_TAG_const_type 0x26 +#define DW_TAG_enumerator 0x28 +#define DW_TAG_subprogram 0x2e +#define DW_TAG_variable 0x34 +#define DW_TAG_volatile_type 0x35 +#define DW_TAG_restrict_type 0x37 + +/* DW_CHILDREN */ +#define DW_CHILDREN_no 0x00 +#define DW_CHILDREN_yes 0x01 + +/* DW_AT */ +#define DW_AT_sibling 0x01 +#define DW_AT_location 0x02 +#define DW_AT_name 0x03 +#define DW_AT_byte_size 0x0b +#define DW_AT_bit_offset 0x0c /* DWARF 3/4; DW5 uses data_bit_offset. */ +#define DW_AT_bit_size 0x0d +#define DW_AT_stmt_list 0x10 +#define DW_AT_low_pc 0x11 +#define DW_AT_high_pc 0x12 +#define DW_AT_language 0x13 +#define DW_AT_comp_dir 0x1b +#define DW_AT_const_value 0x1c +#define DW_AT_producer 0x25 +#define DW_AT_prototyped 0x27 +#define DW_AT_start_scope 0x2c +#define DW_AT_bit_stride 0x2e +#define DW_AT_upper_bound 0x2f +#define DW_AT_count 0x37 +#define DW_AT_data_member_location 0x38 +#define DW_AT_decl_file 0x3a +#define DW_AT_decl_line 0x3b +#define DW_AT_declaration 0x3c +#define DW_AT_encoding 0x3e +#define DW_AT_external 0x3f +#define DW_AT_frame_base 0x40 +#define DW_AT_specification 0x47 +#define DW_AT_type 0x49 +#define DW_AT_ranges 0x55 +#define DW_AT_data_bit_offset 0x6b +#define DW_AT_str_offsets_base 0x72 +#define DW_AT_addr_base 0x73 +#define DW_AT_rnglists_base 0x74 +#define DW_AT_loclists_base 0x8c + +/* DW_FORM */ +#define DW_FORM_addr 0x01 +#define DW_FORM_block2 0x03 +#define DW_FORM_block4 0x04 +#define DW_FORM_data2 0x05 +#define DW_FORM_data4 0x06 +#define DW_FORM_data8 0x07 +#define DW_FORM_string 0x08 +#define DW_FORM_block 0x09 +#define DW_FORM_block1 0x0a +#define DW_FORM_data1 0x0b +#define DW_FORM_flag 0x0c +#define DW_FORM_sdata 0x0d +#define DW_FORM_strp 0x0e +#define DW_FORM_udata 0x0f +#define DW_FORM_ref_addr 0x10 +#define DW_FORM_ref1 0x11 +#define DW_FORM_ref2 0x12 +#define DW_FORM_ref4 0x13 +#define DW_FORM_ref8 0x14 +#define DW_FORM_ref_udata 0x15 +#define DW_FORM_indirect 0x16 +#define DW_FORM_sec_offset 0x17 +#define DW_FORM_exprloc 0x18 +#define DW_FORM_flag_present 0x19 +#define DW_FORM_strx 0x1a +#define DW_FORM_addrx 0x1b +#define DW_FORM_ref_sup4 0x1c +#define DW_FORM_strp_sup 0x1d +#define DW_FORM_data16 0x1e +#define DW_FORM_line_strp 0x1f +#define DW_FORM_ref_sig8 0x20 +#define DW_FORM_implicit_const 0x21 +#define DW_FORM_loclistx 0x22 +#define DW_FORM_rnglistx 0x23 +#define DW_FORM_ref_sup8 0x24 +#define DW_FORM_strx1 0x26 +#define DW_FORM_strx2 0x27 +#define DW_FORM_strx3 0x28 +#define DW_FORM_strx4 0x29 +#define DW_FORM_addrx1 0x2a +#define DW_FORM_addrx2 0x2b +#define DW_FORM_addrx3 0x2c +#define DW_FORM_addrx4 0x2d + +/* DW_LANG */ +#define DW_LANG_C 0x02 +#define DW_LANG_C89 0x01 +#define DW_LANG_C99 0x0c +#define DW_LANG_C11 0x1d +#define DW_LANG_C17 0x2c + +/* DW_ATE */ +#define DW_ATE_address 0x01 +#define DW_ATE_boolean 0x02 +#define DW_ATE_complex_float 0x03 +#define DW_ATE_float 0x04 +#define DW_ATE_signed 0x05 +#define DW_ATE_signed_char 0x06 +#define DW_ATE_unsigned 0x07 +#define DW_ATE_unsigned_char 0x08 +#define DW_ATE_UTF 0x10 + +/* DW_LNS / DW_LNE / DW_LNCT */ +#define DW_LNS_copy 0x01 +#define DW_LNS_advance_pc 0x02 +#define DW_LNS_advance_line 0x03 +#define DW_LNS_set_file 0x04 +#define DW_LNS_set_column 0x05 +#define DW_LNS_negate_stmt 0x06 +#define DW_LNS_set_basic_block 0x07 +#define DW_LNS_const_add_pc 0x08 +#define DW_LNS_fixed_advance_pc 0x09 +#define DW_LNS_set_prologue_end 0x0a +#define DW_LNS_set_epilogue_begin 0x0b +#define DW_LNS_set_isa 0x0c + +#define DW_LNE_end_sequence 0x01 +#define DW_LNE_set_address 0x02 +#define DW_LNE_set_discriminator 0x04 + +#define DW_LNCT_path 0x01 +#define DW_LNCT_directory_index 0x02 +#define DW_LNCT_timestamp 0x03 +#define DW_LNCT_size 0x04 +#define DW_LNCT_MD5 0x05 + +/* DW_RLE */ +#define DW_RLE_end_of_list 0x00 +#define DW_RLE_offset_pair 0x04 +#define DW_RLE_start_length 0x07 + +/* DW_LLE */ +#define DW_LLE_end_of_list 0x00 +#define DW_LLE_base_addressx 0x01 +#define DW_LLE_startx_endx 0x02 +#define DW_LLE_startx_length 0x03 +#define DW_LLE_offset_pair 0x04 +#define DW_LLE_default_location 0x05 +#define DW_LLE_base_address 0x06 +#define DW_LLE_start_end 0x07 +#define DW_LLE_start_length 0x08 + +/* DW_OP */ +#define DW_OP_addr 0x03 +#define DW_OP_const1u 0x08 +#define DW_OP_const1s 0x09 +#define DW_OP_const2u 0x0a +#define DW_OP_const2s 0x0b +#define DW_OP_const4u 0x0c +#define DW_OP_const4s 0x0d +#define DW_OP_const8u 0x0e +#define DW_OP_const8s 0x0f +#define DW_OP_constu 0x10 +#define DW_OP_consts 0x11 +#define DW_OP_dup 0x12 +#define DW_OP_drop 0x13 +#define DW_OP_and 0x1a +#define DW_OP_minus 0x1c +#define DW_OP_mul 0x1e +#define DW_OP_or 0x21 +#define DW_OP_plus 0x22 +#define DW_OP_plus_uconst 0x23 +#define DW_OP_shl 0x24 +#define DW_OP_shr 0x25 +#define DW_OP_shra 0x26 +#define DW_OP_xor 0x27 +#define DW_OP_lit0 0x30 +#define DW_OP_reg0 0x50 +#define DW_OP_breg0 0x70 +#define DW_OP_regx 0x90 +#define DW_OP_fbreg 0x91 +#define DW_OP_bregx 0x92 +#define DW_OP_call_frame_cfa 0x9c +#define DW_OP_stack_value 0x9f + +/* DW_CFA */ +#define DW_CFA_nop 0x00 +#define DW_CFA_set_loc 0x01 +#define DW_CFA_advance_loc1 0x02 +#define DW_CFA_advance_loc2 0x03 +#define DW_CFA_advance_loc4 0x04 +#define DW_CFA_offset_extended 0x05 +#define DW_CFA_restore_extended 0x06 +#define DW_CFA_undefined 0x07 +#define DW_CFA_same_value 0x08 +#define DW_CFA_register 0x09 +#define DW_CFA_remember_state 0x0a +#define DW_CFA_restore_state 0x0b +#define DW_CFA_def_cfa 0x0c +#define DW_CFA_def_cfa_register 0x0d +#define DW_CFA_def_cfa_offset 0x0e +#define DW_CFA_def_cfa_expression 0x0f +#define DW_CFA_expression 0x10 +#define DW_CFA_offset_extended_sf 0x11 +#define DW_CFA_def_cfa_sf 0x12 +#define DW_CFA_def_cfa_offset_sf 0x13 +#define DW_CFA_val_offset 0x14 +#define DW_CFA_val_offset_sf 0x15 +#define DW_CFA_val_expression 0x16 +#define DW_CFA_advance_loc 0x40 +#define DW_CFA_offset 0x80 +#define DW_CFA_restore 0xc0 + +/* DW_EH_PE */ +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0a +#define DW_EH_PE_sdata4 0x0b +#define DW_EH_PE_sdata8 0x0c +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_omit 0xff + +/* DW_UT */ +#define DW_UT_compile 0x01 + +#endif diff --git a/src/debug/dwarf_die.c b/src/debug/dwarf_die.c @@ -0,0 +1,431 @@ +/* dwarf_die.c — DIE walker: subprogram collection, locals, globals. + * + * Per doc/DWARF.md §4.3: streaming walker over .debug_info keyed off the + * abbrev table; collects subprograms, lexical_blocks, formal_parameters, + * variables. Cross-CU refs land later when needed. + */ + +#include <cfree.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" +#include "core/util.h" +#include "debug/dwarf_internal.h" + +/* ---- subprogram + lexical_block walk --------------------------------- */ + +static void pack_init(DieAttrPack* p) { memset(p, 0, sizeof(*p)); } + +/* Read all attributes of a DIE into pack `p`; updates *off to past attrs. */ +static void read_pack(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, + DieAttrPack* p, u32* off) { + u32 i; + if (!die->abbrev) return; + for (i = 0; i < die->abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die->abbrev->attrs[i]; + DwAttrValue v; + dw_read_form(d, cu, aa->form, aa->implicit_const, off, &v); + switch (aa->attr) { + case DW_AT_name: + p->name = v.str; + break; + case DW_AT_low_pc: + p->low_pc = v.u; + p->has_low_pc = 1; + break; + case DW_AT_high_pc: + p->high_pc_value = v.u; + p->high_pc_form = aa->form; + p->has_high_pc = 1; + break; + case DW_AT_type: + /* Local CU offset: ref* forms are CU-relative; ref_addr is + * .debug_info-absolute. */ + if (aa->form == DW_FORM_ref_addr) + p->type_die_offset = (u32)v.u; + else + p->type_die_offset = cu->hdr_offset + (u32)v.u; + p->has_type = 1; + break; + case DW_AT_decl_file: + p->decl_file = (u32)v.u; + break; + case DW_AT_decl_line: + p->decl_line = (u32)v.u; + break; + case DW_AT_location: + if (aa->form == DW_FORM_loclistx) { + p->has_loclist = 1; + p->loclist_index = v.u; + } else if (aa->form == DW_FORM_exprloc || aa->form == DW_FORM_block || + aa->form == DW_FORM_block1 || aa->form == DW_FORM_block2 || + aa->form == DW_FORM_block4) { + p->loc_block = v.block; + p->loc_block_len = v.block_len; + } else if (aa->form == DW_FORM_sec_offset) { + /* Reference into .debug_loclists — not supported in Phase 5 + * baseline. */ + p->has_loclist = 1; + p->loclist_index = v.u; + } + break; + case DW_AT_frame_base: + p->fb_block = v.block; + p->fb_block_len = v.block_len; + break; + case DW_AT_const_value: + p->const_value = v.s; + p->has_const_value = 1; + break; + case DW_AT_data_member_location: + if (aa->form == DW_FORM_exprloc || aa->form == DW_FORM_block || + aa->form == DW_FORM_block1 || aa->form == DW_FORM_block2 || + aa->form == DW_FORM_block4) { + /* Best effort: evaluate a single DW_OP_plus_uconst form by + * peeking. */ + if (v.block && v.block_len > 0 && v.block[0] == DW_OP_plus_uconst) { + u32 t = 1; + p->byte_offset = (u32)dw_uleb(v.block, v.block_len, &t); + p->has_byte_offset = 1; + } + } else { + p->byte_offset = (u32)v.u; + p->has_byte_offset = 1; + } + break; + case DW_AT_byte_size: + p->byte_size = (u32)v.u; + p->has_byte_size = 1; + break; + case DW_AT_bit_size: + p->bit_size = (u32)v.u; + p->has_bit_size = 1; + break; + case DW_AT_bit_offset: + case DW_AT_data_bit_offset: + p->bit_offset = (u32)v.u; + p->has_bit_offset = 1; + break; + case DW_AT_encoding: + p->base_encoding = (u32)v.u; + p->has_encoding = 1; + break; + case DW_AT_count: + case DW_AT_upper_bound: + p->array_count = (u32)v.u; + if (aa->attr == DW_AT_upper_bound) p->array_count++; + p->has_array_count = 1; + break; + } + } +} + +/* Append a subprogram (or skip if its bounds aren't useful). */ +static void push_subprog(CfreeDebugInfo* d, DwSubprog* sp) { + if (d->nsubs == d->subs_cap) { + u32 ncap = d->subs_cap ? d->subs_cap * 2 : 8; + DwSubprog* na = + (DwSubprog*)d->h->realloc(d->h, d->subs, d->subs_cap * sizeof(*d->subs), + ncap * sizeof(*d->subs), _Alignof(DwSubprog)); + if (!na) return; + d->subs = na; + d->subs_cap = ncap; + } + d->subs[d->nsubs++] = *sp; +} + +/* Walk a DIE subtree, collecting subprograms. */ +static void walk_for_subs(CfreeDebugInfo* d, u32 cu_idx, u32* off) { + DwCu* cu = &d->cus[cu_idx]; + for (;;) { + DwDie die; + if (!dw_read_die(d, cu, off, &die)) return; + if (die.abbrev->tag == DW_TAG_subprogram || + die.abbrev->tag == DW_TAG_inlined_subroutine) { + DieAttrPack p; + DwSubprog sp; + u32 saved_off; + pack_init(&p); + saved_off = *off; + read_pack(d, cu, &die, &p, off); + memset(&sp, 0, sizeof(sp)); + sp.name = p.name ? p.name : ""; + sp.low_pc = p.low_pc; + if (p.has_high_pc) { + if (p.high_pc_form == DW_FORM_addr) + sp.high_pc = p.high_pc_value; + else + sp.high_pc = p.low_pc + p.high_pc_value; + } else { + sp.high_pc = p.low_pc; + } + sp.decl_line = p.decl_line; + /* Resolve decl_file via the CU's line program. */ + sp.decl_file = ""; + if (p.decl_file != 0 && cu->has_stmt_list) { + DwLineProgram* lp; + if (!d->lines_built[cu_idx]) dw_build_line(d, cu_idx); + lp = &d->lines_by_cu[cu_idx]; + if (lp->nfile_norm && p.decl_file < lp->nfile_norm) + sp.decl_file = lp->file_norm[p.decl_file]; + } + sp.cu_idx = cu_idx; + sp.die_offset = die.die_off; + sp.frame_base = p.fb_block; + sp.frame_base_len = p.fb_block_len; + sp.inlined = (die.abbrev->tag == DW_TAG_inlined_subroutine); + if (p.has_low_pc && sp.high_pc > sp.low_pc) + push_subprog(d, &sp); + else if (die.abbrev->tag == DW_TAG_subprogram && p.name) + push_subprog(d, &sp); /* declaration-only OK */ + (void)saved_off; + /* Recurse into children for nested subprograms / inlines. */ + if (die.abbrev->has_children) { + walk_for_subs(d, cu_idx, off); + } + } else if (die.abbrev->has_children) { + /* Skip attrs, then descend. */ + u32 i; + for (i = 0; i < die.abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die.abbrev->attrs[i]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, off); + } + walk_for_subs(d, cu_idx, off); + } else { + u32 i; + for (i = 0; i < die.abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die.abbrev->attrs[i]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, off); + } + } + } +} + +void dw_build_subs(CfreeDebugInfo* d) { + u32 i; + if (d->subs_built) return; + d->subs_built = 1; + for (i = 0; i < d->ncus; ++i) { + DwCu* cu = &d->cus[i]; + u32 off = cu->die_start_off; + /* The root DIE is the CU itself — recurse into it. */ + DwDie root; + if (!dw_read_die(d, cu, &off, &root)) continue; + /* Skip root attrs */ + { + u32 j; + for (j = 0; j < root.abbrev->nattrs; ++j) { + DwAbbrevAttr* aa = &root.abbrev->attrs[j]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); + } + } + if (root.abbrev->has_children) walk_for_subs(d, i, &off); + } +} + +DwSubprog* dw_find_subprog(CfreeDebugInfo* d, u64 pc) { + u32 i; + dw_build_subs(d); + for (i = 0; i < d->nsubs; ++i) { + DwSubprog* sp = &d->subs[i]; + if (sp->low_pc <= pc && pc < sp->high_pc) return sp; + } + return NULL; +} + +/* ---- locals + parameters --------------------------------------------- */ + +typedef struct LocalCtx { + CfreeDebugInfo* d; + u32 cu_idx; + DwLocal* params; + u32 nparams, params_cap; + DwLocal* locals; + u32 nlocals, locals_cap; +} LocalCtx; + +static void push_param(LocalCtx* x, DwLocal* v) { + if (x->nparams == x->params_cap) { + u32 ncap = x->params_cap ? x->params_cap * 2 : 4; + DwLocal* na = (DwLocal*)x->d->h->realloc( + x->d->h, x->params, x->params_cap * sizeof(*x->params), + ncap * sizeof(*x->params), _Alignof(DwLocal)); + if (!na) return; + x->params = na; + x->params_cap = ncap; + } + x->params[x->nparams++] = *v; +} +static void push_local(LocalCtx* x, DwLocal* v) { + if (x->nlocals == x->locals_cap) { + u32 ncap = x->locals_cap ? x->locals_cap * 2 : 4; + DwLocal* na = (DwLocal*)x->d->h->realloc( + x->d->h, x->locals, x->locals_cap * sizeof(*x->locals), + ncap * sizeof(*x->locals), _Alignof(DwLocal)); + if (!na) return; + x->locals = na; + x->locals_cap = ncap; + } + x->locals[x->nlocals++] = *v; +} + +static void walk_subprog_body(LocalCtx* x, u32* off, u64 scope_lo, u64 scope_hi, + u32 scope_die_off, u8 has_scope) { + CfreeDebugInfo* d = x->d; + DwCu* cu = &d->cus[x->cu_idx]; + for (;;) { + DwDie die; + if (!dw_read_die(d, cu, off, &die)) return; + if (die.abbrev->tag == DW_TAG_formal_parameter || + die.abbrev->tag == DW_TAG_variable) { + DieAttrPack p; + DwLocal v; + pack_init(&p); + read_pack(d, cu, &die, &p, off); + memset(&v, 0, sizeof(v)); + v.name = p.name ? p.name : ""; + v.die_offset = die.die_off; + v.type_die_offset = p.has_type ? p.type_die_offset : 0; + v.scope_lo = scope_lo; + v.scope_hi = scope_hi; + v.scope_offset = scope_die_off; + v.has_scope = has_scope; + v.loc = p.loc_block; + v.loc_len = p.loc_block_len; + v.has_loclist = p.has_loclist; + v.loclist_index = p.loclist_index; + v.is_param = (die.abbrev->tag == DW_TAG_formal_parameter); + v.is_global = 0; + if (v.is_param) + push_param(x, &v); + else + push_local(x, &v); + if (die.abbrev->has_children) + walk_subprog_body(x, off, scope_lo, scope_hi, scope_die_off, has_scope); + } else if (die.abbrev->tag == DW_TAG_lexical_block) { + DieAttrPack p; + pack_init(&p); + read_pack(d, cu, &die, &p, off); + { + u64 lo = p.has_low_pc ? p.low_pc : scope_lo; + u64 hi = p.has_high_pc + ? (p.high_pc_form == DW_FORM_addr ? p.high_pc_value + : lo + p.high_pc_value) + : scope_hi; + if (die.abbrev->has_children) + walk_subprog_body(x, off, lo, hi, die.die_off, 1); + } + } else { + u32 i; + for (i = 0; i < die.abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die.abbrev->attrs[i]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, off); + } + if (die.abbrev->has_children) + walk_subprog_body(x, off, scope_lo, scope_hi, scope_die_off, has_scope); + } + } +} + +void dw_build_locals(CfreeDebugInfo* d, DwSubprog* sp) { + LocalCtx x; + DwCu* cu; + u32 off; + DwDie die; + if (sp->cached_locals) return; + sp->cached_locals = 1; + cu = &d->cus[sp->cu_idx]; + off = sp->die_offset; + if (!dw_read_die(d, cu, &off, &die)) return; + if (!die.abbrev || !die.abbrev->has_children) return; + /* Skip subprog attrs */ + { + u32 i; + for (i = 0; i < die.abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die.abbrev->attrs[i]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); + } + } + memset(&x, 0, sizeof(x)); + x.d = d; + x.cu_idx = sp->cu_idx; + walk_subprog_body(&x, &off, sp->low_pc, sp->high_pc, sp->die_offset, 1); + sp->params = x.params; + sp->nparams = x.nparams; + sp->locals = x.locals; + sp->nlocals = x.nlocals; +} + +/* ---- globals --------------------------------------------------------- */ + +void dw_build_globals(CfreeDebugInfo* d) { + u32 i; + if (d->globals_built) return; + d->globals_built = 1; + for (i = 0; i < d->ncus; ++i) { + DwCu* cu = &d->cus[i]; + u32 off = cu->die_start_off; + DwDie root; + if (!dw_read_die(d, cu, &off, &root)) continue; + { + u32 j; + for (j = 0; j < root.abbrev->nattrs; ++j) { + DwAbbrevAttr* aa = &root.abbrev->attrs[j]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); + } + } + if (!root.abbrev->has_children) continue; + /* Walk only top-level children of the CU; collect DW_TAG_variable. */ + for (;;) { + DwDie die; + if (!dw_read_die(d, cu, &off, &die)) break; + if (die.abbrev->tag == DW_TAG_variable) { + DieAttrPack p; + DwLocal v; + pack_init(&p); + read_pack(d, cu, &die, &p, &off); + memset(&v, 0, sizeof(v)); + v.name = p.name ? p.name : ""; + v.die_offset = die.die_off; + v.type_die_offset = p.has_type ? p.type_die_offset : 0; + v.loc = p.loc_block; + v.loc_len = p.loc_block_len; + v.has_loclist = p.has_loclist; + v.loclist_index = p.loclist_index; + v.is_param = 0; + v.is_global = 1; + if (d->nglobals == d->globals_cap) { + u32 ncap = d->globals_cap ? d->globals_cap * 2 : 8; + DwLocal* na = (DwLocal*)d->h->realloc( + d->h, d->globals, d->globals_cap * sizeof(*d->globals), + ncap * sizeof(*d->globals), _Alignof(DwLocal)); + if (!na) break; + d->globals = na; + d->globals_cap = ncap; + } + d->globals[d->nglobals++] = v; + if (die.abbrev->has_children) { + /* Skip children. */ + for (;;) { + DwDie c; + if (!dw_read_die(d, cu, &off, &c)) break; + dw_skip_die_subtree(d, cu, &c, &off); + } + } + } else { + dw_skip_die_subtree(d, cu, &die, &off); + } + } + } +} + +/* Public accessor for the type module: read attrs given die. */ +void dw_die_pack(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, + DieAttrPack* p) { + u32 off = die->attrs_off; + pack_init(p); + read_pack(d, cu, die, p, &off); +} diff --git a/src/debug/dwarf_internal.h b/src/debug/dwarf_internal.h @@ -0,0 +1,437 @@ +#ifndef CFREE_DWARF_INTERNAL_H +#define CFREE_DWARF_INTERNAL_H + +/* DWARF 5 consumer — internal types. + * + * This module reads DWARF bytes out of a CfreeObjFile and answers the + * cfree_dwarf_* queries. It is colocated with the producer implementation + * but does not include debug/debug.h or share producer state; the public + * DWARF wire format is the only contract between producer and consumer. + */ + +#include <cfree.h> + +#include "core/core.h" +#include "core/heap.h" +#include "debug/dwarf_defs.h" + +/* ---- Section & byte slice helpers ------------------------------------- */ + +typedef struct DwSection { + const u8* data; + u32 size; + u32 sec_idx; /* 0-based section index, or UINT32_MAX if missing */ +} DwSection; + +/* ---- Abbrev table ---- */ + +typedef struct DwAbbrevAttr { + u32 attr; /* DW_AT_* */ + u32 form; /* DW_FORM_* */ + i64 implicit_const; /* for DW_FORM_implicit_const */ +} DwAbbrevAttr; + +typedef struct DwAbbrev { + u64 code; /* abbrev code; 0 if unused slot */ + u32 tag; /* DW_TAG_* */ + u8 has_children; + u32 nattrs; + DwAbbrevAttr* attrs; /* heap-allocated */ +} DwAbbrev; + +typedef struct DwAbbrevTable { + u32 cu_abbrev_offset; /* offset into .debug_abbrev */ + /* Dense map: code → index (or 0 if absent). For typical small tables we + * keep them in a sorted array searched linearly. */ + DwAbbrev* abbrevs; + u32 nabbrevs; + u32 cap; +} DwAbbrevTable; + +/* ---- Compilation unit ---- */ + +typedef struct DwCu { + u32 hdr_offset; /* offset of CU header in .debug_info */ + u32 hdr_length; /* length of unit_length bytes (after the size field itself) + */ + u32 unit_total_size; /* hdr_length + length-field size (4 for 32-bit init) */ + u32 die_start_off; /* offset where the first DIE starts (in .debug_info) */ + u8 version; + u8 address_size; + u8 unit_type; + u8 is_64bit; /* DWARF64? */ + u32 abbrev_offset; /* into .debug_abbrev */ + u32 str_offsets_base; + u32 addr_base; + u32 loclists_base; + u32 rnglists_base; + u32 stmt_list; /* DW_AT_stmt_list value (offset into .debug_line) */ + u8 has_stmt_list; + const char* comp_dir; + const char* name; + /* Index of abbrev table in dbg->abbrevs */ + u32 abbrev_table_idx; +} DwCu; + +/* ---- Materialized DIEs (we cache only what we need) ---- */ + +/* A reference into .debug_info (compilation-unit relative). We store CU + * index plus offset-from-CU-header so we can resolve cross-CU later. */ +typedef struct DwDieRef { + u32 cu_idx; + u32 die_offset; /* absolute offset into .debug_info bytes */ +} DwDieRef; + +/* ---- Type cache ---- */ + +typedef enum DwTypeKind { + DTK_VOID, + DTK_BASE, /* maps to SINT/UINT/BOOL/FLOAT/CHAR by encoding */ + DTK_PTR, + DTK_ARRAY, + DTK_STRUCT, + DTK_UNION, + DTK_ENUM, + DTK_TYPEDEF, + DTK_FUNC, + DTK_CONST, /* alias to inner */ + DTK_VOLATILE, + DTK_RESTRICT, +} DwTypeKind; + +typedef struct DwField { + const char* name; + u32 byte_offset; + u32 bit_offset; + u32 bit_size; + struct CfreeDwarfType* type; +} DwField; + +typedef struct DwEnumVal { + const char* name; + i64 value; +} DwEnumVal; + +struct CfreeDwarfType { + DwTypeKind kind; + u32 byte_size; + const char* name; + u32 element_count; + u32 die_offset; /* origin DIE for cycle-detection / dedup */ + /* DT_PTR/ARRAY/TYPEDEF/CONST/VOLATILE/RESTRICT/FUNC: inner type */ + struct CfreeDwarfType* inner; + /* Base type encoding (DW_ATE_*) — used to derive SINT/UINT/CHAR/BOOL/FLOAT */ + u32 base_encoding; + /* STRUCT/UNION fields */ + DwField* fields; + u32 nfields; + /* ENUM values */ + DwEnumVal* evals; + u32 nevals; +}; + +/* ---- Line program decoded matrix ---- */ + +typedef struct DwLineRow { + u64 address; + u32 file_index; + u32 line; + u32 column; + u8 is_stmt; + u8 end_sequence; +} DwLineRow; + +typedef struct DwLineFile { + const char* path; /* interned in our string table */ + u32 dir_index; +} DwLineFile; + +typedef struct DwLineProgram { + /* Per-CU line program decoding state. We materialize all rows into a + * single rows array for fast lookup. */ + DwLineRow* rows; + u32 nrows; + u32 cap; + /* File table (file_index 0 is the CU primary in DW5). */ + DwLineFile* files; + u32 nfiles; + const char** dirs; + u32 ndirs; + /* Cached fully-qualified path per file, lazily built. */ + const char** file_norm; + u32 nfile_norm; +} DwLineProgram; + +/* ---- Subprogram descriptor (cached) ---- */ + +typedef struct DwLocal { + const char* name; + u32 die_offset; + u32 type_die_offset; + u64 scope_lo; /* PCs at which the var is in scope. */ + u64 scope_hi; /* (low_pc, high_pc) of nearest enclosing block. */ + u32 scope_offset; /* offset of the lexical_block DIE; 0 = subprog scope */ + u8 has_scope; + /* Location form: either an exprloc or a loclistx index. */ + const u8* loc; + u32 loc_len; + u8 has_loclist; + u64 loclist_index; + /* Role: ARG vs LOCAL. */ + u8 is_param; + /* For globals only: the global variable role. */ + u8 is_global; +} DwLocal; + +typedef struct DwSubprog { + const char* name; + u64 low_pc; + u64 high_pc; + const char* decl_file; + u32 decl_line; + u32 cu_idx; + u32 die_offset; /* offset of the subprogram DIE */ + /* Frame base — DW_AT_frame_base exprloc bytes (or NULL). */ + const u8* frame_base; + u32 frame_base_len; + /* Cached params and locals (lazily). */ + DwLocal* params; + u32 nparams; + DwLocal* locals; + u32 nlocals; + u8 inlined; + u8 cached_locals; +} DwSubprog; + +/* ---- The main consumer state ---- */ + +typedef struct DwString { + Sym sym; /* interned in compiler->global pool */ +} DwString; + +struct CfreeDebugInfo { + CfreeCompiler* c; + Heap* h; + const CfreeObjFile* obj; + + /* Sections */ + DwSection abbrev; + DwSection info; + DwSection line; + DwSection str; + DwSection line_str; + DwSection str_offsets; + DwSection addr; + DwSection loclists; + DwSection rnglists; + DwSection eh_frame; + DwSection aranges; + + /* Abbrev tables (one per unique abbrev_offset we've seen). */ + DwAbbrevTable* abbrevs; + u32 nabbrevs; + u32 abbrevs_cap; + + /* CUs */ + DwCu* cus; + u32 ncus; + u32 cus_cap; + + /* Line programs by CU index (parallel to cus). Each lazily built. */ + DwLineProgram* lines_by_cu; + u8* lines_built; /* parallel; 0 = not yet decoded */ + + /* Subprograms (sorted by low_pc on first build). */ + DwSubprog* subs; + u32 nsubs; + u32 subs_cap; + u8 subs_built; + + /* Type cache: DIE-offset → CfreeDwarfType*. */ + CfreeDwarfType** types_by_off; /* parallel arrays */ + u32* types_off; + u32 ntypes; + u32 types_cap; + + /* Globals (top-level DW_TAG_variable in any CU). */ + DwLocal* globals; + u32 nglobals; + u32 globals_cap; + u8 globals_built; +}; + +/* ---- API between the dwarf_*.c files ---------------------------------- */ + +/* Section lookup by name. Sets out->data/size; sec_idx = UINT32_MAX if missing. + */ +void dw_find_section(CfreeDebugInfo* d, const char* name, DwSection* out); + +/* Read primitives. Each returns the new offset on success and panics on EOF. */ +u8 dw_u8(const u8* base, u32 size, u32* off); +u16 dw_u16(const u8* base, u32 size, u32* off); +u32 dw_u24(const u8* base, u32 size, u32* off); +u32 dw_u32(const u8* base, u32 size, u32* off); +u64 dw_u64(const u8* base, u32 size, u32* off); +u64 dw_uleb(const u8* base, u32 size, u32* off); +i64 dw_sleb(const u8* base, u32 size, u32* off); +const char* dw_cstr(const u8* base, u32 size, u32* off); + +/* Abbrev parsing: ensure (and return) the abbrev table for `offset`. */ +DwAbbrevTable* dw_abbrev_get(CfreeDebugInfo* d, u32 offset); +DwAbbrev* dw_abbrev_lookup(DwAbbrevTable* t, u64 code); + +/* Parse the CU header at offset `off` in .debug_info into `cu`. + * Returns the offset of the next CU header. */ +u32 dw_cu_parse_header(CfreeDebugInfo* d, u32 off, DwCu* cu); + +/* Skim every CU and populate dbg->cus. */ +void dw_parse_all_cus(CfreeDebugInfo* d); + +/* Open the .debug_str_offsets table indexed by str_offsets_base. */ +const char* dw_str(CfreeDebugInfo* d, u32 offset); +const char* dw_line_str(CfreeDebugInfo* d, u32 offset); +const char* dw_strx(CfreeDebugInfo* d, const DwCu* cu, u64 idx); + +/* Skip one attribute value of `form` size. *off is updated. */ +void dw_skip_form(CfreeDebugInfo* d, const DwCu* cu, u32 form, + i64 implicit_const, u32* off); + +/* Read attribute value into a typed accumulator. Caller picks which getter. */ +typedef struct DwAttrValue { + u32 form; + /* Values for various forms — only one slot is meaningful per form. */ + u64 u; /* udata, addr, ref (CU-relative offset for local refs) */ + i64 s; /* sdata */ + const char* str; /* strp/string/strx/line_strp resolved cstring */ + const u8* block; /* exprloc/block bytes */ + u32 block_len; +} DwAttrValue; + +/* Read attr value at *off using `form`. Updates *off. */ +void dw_read_form(CfreeDebugInfo* d, const DwCu* cu, u32 form, + i64 implicit_const, u32* off, DwAttrValue* out); + +/* DIE iteration helpers. */ +typedef struct DwDie { + u64 abbrev_code; + DwAbbrev* abbrev; /* NULL if abbrev_code==0 (null entry) */ + u32 die_off; /* offset of this DIE itself in .debug_info */ + u32 attrs_off; /* where attribute encodings start */ + u32 next_sibling_off; /* lazily computed */ +} DwDie; + +/* Read one DIE header at *off. Updates *off to point past the abbrev code, + * to the start of the attribute area. Returns 1 on success, 0 if this is a + * null-entry (terminates a sibling chain). */ +int dw_read_die(CfreeDebugInfo* d, const DwCu* cu, u32* off, DwDie* out); + +/* Skip a DIE's attribute area, advancing *off past it. */ +void dw_skip_die_attrs(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, u32* off); + +/* Skip an entire DIE subtree (including children), starting at attrs_off. + * On entry, *off == die->attrs_off. On exit, *off is past the children + * terminator (if has_children) or just past the attrs (if no children). */ +void dw_skip_die_subtree(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, + u32* off); + +/* Lookup an attribute on `die` by attr code. Returns 1 if found and fills + * *out; 0 otherwise. Restartable (rewinds the cursor). */ +int dw_die_attr(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, u32 attr, + DwAttrValue* out); + +/* String interning into the compiler's global pool. */ +const char* dw_intern(CfreeDebugInfo* d, const char* s, size_t len); + +/* Inline strcmp/strlen — libcfree avoids a runtime libc dep beyond the + * tightly-controlled allowlist (test/lib_deps.allowlist). */ +static inline int dw_streq(const char* a, const char* b) { + if (!a || !b) return 0; + while (*a && *b && *a == *b) { + a++; + b++; + } + return *a == 0 && *b == 0; +} +static inline size_t dw_strlen(const char* s) { + size_t n = 0; + if (!s) return 0; + while (s[n]) n++; + return n; +} + +/* DIE attribute pack — shared between dwarf_die.c and dwarf_type.c. */ +typedef struct DieAttrPack { + const char* name; + u64 low_pc; + u64 high_pc_value; + u32 high_pc_form; + u8 has_low_pc; + u8 has_high_pc; + u32 type_die_offset; + u8 has_type; + u32 decl_file; + u32 decl_line; + const u8* loc_block; + u32 loc_block_len; + u8 has_loclist; + u64 loclist_index; + const u8* fb_block; + u32 fb_block_len; + i64 const_value; + u8 has_const_value; + u32 byte_offset; + u8 has_byte_offset; + u32 byte_size; + u8 has_byte_size; + u32 bit_size; + u8 has_bit_size; + u32 bit_offset; + u8 has_bit_offset; + u32 base_encoding; + u8 has_encoding; + u32 array_count; + u8 has_array_count; + u8 inlined; +} DieAttrPack; + +void dw_die_pack(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, DieAttrPack* p); + +/* Subprograms */ +void dw_build_subs(CfreeDebugInfo* d); +DwSubprog* dw_find_subprog(CfreeDebugInfo* d, u64 pc); +void dw_build_locals(CfreeDebugInfo* d, DwSubprog* sp); + +/* Globals */ +void dw_build_globals(CfreeDebugInfo* d); + +/* Line program */ +void dw_build_line(CfreeDebugInfo* d, u32 cu_idx); + +/* Type DIE → CfreeDwarfType*. die_offset is absolute offset in .debug_info. */ +CfreeDwarfType* dw_type_from_die(CfreeDebugInfo* d, u32 cu_idx, u32 die_offset); +CfreeDwarfType* dw_void_type(CfreeDebugInfo* d); + +/* Loc-expr evaluator. Evaluates `expr` of length `len` in the context of + * `frame` (regs, cfa) and `frame_base_expr` (the subprog's DW_AT_frame_base + * expression — typically just DW_OP_call_frame_cfa). Returns 0 on success; + * fills *result with the location kind plus value. */ +typedef struct DwExprResult { + /* result_kind: 0 = address (memory), 1 = value-on-stack (DW_OP_stack_value), + * 2 = register, 3 = unsupported. */ + int kind; + u64 value; /* address if kind=0; literal if kind=1; reg# if kind=2 */ +} DwExprResult; + +int dw_eval_expr(CfreeDebugInfo* d, const u8* expr, u32 len, const u8* fb_expr, + u32 fb_len, const CfreeUnwindFrame* frame, DwExprResult* out); + +/* CU lookup helpers. */ +DwCu* dw_cu_at_die_offset(CfreeDebugInfo* d, u32 die_offset); + +/* Resolve a DW_FORM_loclistx into the matching location list entry for + * `pc`. Returns 1 and fills bytes/len on success; 0 if the section is + * absent, the index is bad, or no entry covers `pc`. */ +int dw_loclist_resolve(CfreeDebugInfo* d, const DwCu* cu, u64 idx, u64 pc, + const u8** bytes, u32* len); + +#endif diff --git a/src/debug/dwarf_line.c b/src/debug/dwarf_line.c @@ -0,0 +1,611 @@ +/* dwarf_line.c — DWARF 5 line-number-program decoder. + * + * Per doc/DWARF.md §4.2: walk .debug_line for the CU's stmt_list, build + * a row matrix, and index it for addr→line and (file, line)→addr lookup. + */ + +#include <cfree.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" +#include "core/util.h" +#include "debug/dwarf_internal.h" + +typedef struct LineState { + u64 address; + u32 op_index; + u32 file; + u32 line; + u32 column; + u8 is_stmt; + u8 basic_block; + u8 end_sequence; + u8 prologue_end; + u8 epilogue_begin; + u32 isa; + u32 discriminator; +} LineState; + +typedef struct LineHdr { + u32 unit_length; + u8 version; + u8 address_size; + u8 segment_selector_size; + u32 header_length; + u8 min_inst_len; + u8 max_ops_per_inst; + u8 default_is_stmt; + i8 line_base; + u8 line_range; + u8 opcode_base; + u8 std_opcode_lengths[12]; /* version 5 has 12 standard opcodes */ +} LineHdr; + +static void rows_push(CfreeDebugInfo* d, DwLineProgram* lp, + const LineState* st) { + DwLineRow* r; + if (lp->nrows == lp->cap) { + u32 ncap = lp->cap ? lp->cap * 2 : 32; + DwLineRow* na = (DwLineRow*)d->h->realloc( + d->h, lp->rows, lp->cap * sizeof(*lp->rows), ncap * sizeof(*lp->rows), + _Alignof(DwLineRow)); + if (!na) return; + lp->rows = na; + lp->cap = ncap; + } + r = &lp->rows[lp->nrows++]; + r->address = st->address; + r->file_index = st->file; + r->line = st->line; + r->column = st->column; + r->is_stmt = st->is_stmt; + r->end_sequence = st->end_sequence; +} + +static void state_init(LineState* st, u8 default_is_stmt) { + st->address = 0; + st->op_index = 0; + st->file = 1; + st->line = 1; + st->column = 0; + st->is_stmt = default_is_stmt; + st->basic_block = 0; + st->end_sequence = 0; + st->prologue_end = 0; + st->epilogue_begin = 0; + st->isa = 0; + st->discriminator = 0; +} + +/* Read a DW5 file-or-dir entry-format header. + * On entry: *off points at format_count. + * Returns the number of (content_type, form) pairs. Caller must read + * the format pairs before calling read_entries(). */ +typedef struct EntryFmt { + u32 content_type; + u32 form; +} EntryFmt; + +static u32 read_format(const u8* base, u32 size, u32* off, EntryFmt* fmt, + u32 max) { + u32 n = dw_u8(base, size, off); + u32 i; + if (n > max) n = max; + for (i = 0; i < n; ++i) { + fmt[i].content_type = (u32)dw_uleb(base, size, off); + fmt[i].form = (u32)dw_uleb(base, size, off); + } + return n; +} + +/* Wrapper around dw_read_form that reads from .debug_line bytes via a + * pseudo-CU configured with the line-program address size. */ +static void read_lp_form(CfreeDebugInfo* d, u32 form, u8 addr_size, u32* off, + DwAttrValue* out) { + /* This reads from .debug_line, not .debug_info — we duplicate the + * minimal subset we need (line_strp, strp, udata, data1/2/4/8, string). */ + out->form = form; + out->u = 0; + out->str = ""; + out->block = NULL; + out->block_len = 0; + switch (form) { + case DW_FORM_string: + out->str = dw_cstr(d->line.data, d->line.size, off); + break; + case DW_FORM_strp: + out->u = dw_u32(d->line.data, d->line.size, off); + out->str = dw_str(d, (u32)out->u); + break; + case DW_FORM_line_strp: + out->u = dw_u32(d->line.data, d->line.size, off); + out->str = dw_line_str(d, (u32)out->u); + break; + case DW_FORM_data1: + out->u = dw_u8(d->line.data, d->line.size, off); + break; + case DW_FORM_data2: + out->u = dw_u16(d->line.data, d->line.size, off); + break; + case DW_FORM_data4: + out->u = dw_u32(d->line.data, d->line.size, off); + break; + case DW_FORM_data8: + out->u = dw_u64(d->line.data, d->line.size, off); + break; + case DW_FORM_udata: + out->u = dw_uleb(d->line.data, d->line.size, off); + break; + case DW_FORM_sdata: + (void)dw_sleb(d->line.data, d->line.size, off); + break; + case DW_FORM_data16: + *off += 16; + break; + case DW_FORM_block: + case DW_FORM_exprloc: { + u32 n = (u32)dw_uleb(d->line.data, d->line.size, off); + out->block = d->line.data + *off; + out->block_len = n; + *off += n; + } break; + case DW_FORM_block1: { + u32 n = dw_u8(d->line.data, d->line.size, off); + out->block = d->line.data + *off; + out->block_len = n; + *off += n; + } break; + case DW_FORM_flag: + out->u = dw_u8(d->line.data, d->line.size, off); + break; + default: + /* Unknown form — heuristic: skip 0 bytes. Caller may read garbage. */ + (void)addr_size; + break; + } +} + +/* Build a fully-qualified path for file_index in lp. */ +static const char* build_file_norm(CfreeDebugInfo* d, DwLineProgram* lp, + u32 idx) { + const char* path; + const char* dir; + u32 dir_idx; + size_t plen, dlen; + char buf[4096]; + size_t pos = 0; + if (idx >= lp->nfiles) return ""; + path = lp->files[idx].path; + if (!path) path = ""; + dir_idx = lp->files[idx].dir_index; + dir = (dir_idx < lp->ndirs) ? lp->dirs[dir_idx] : ""; + plen = strlen(path); + dlen = strlen(dir); + /* If path is already absolute (starts with /), return as-is. */ + if (plen > 0 && path[0] == '/') return path; + if (dlen > 0) { + if (dlen >= sizeof(buf) - 2) return path; /* fallback */ + memcpy(buf, dir, dlen); + pos = dlen; + if (buf[pos - 1] != '/') buf[pos++] = '/'; + } + if (pos + plen >= sizeof(buf)) return path; + memcpy(buf + pos, path, plen); + pos += plen; + buf[pos] = 0; + return dw_intern(d, buf, pos); +} + +void dw_build_line(CfreeDebugInfo* d, u32 cu_idx) { + DwCu* cu; + DwLineProgram* lp; + u32 off; + u32 stmt_off; + LineHdr h; + u32 unit_end; + u32 prog_start; + EntryFmt dir_fmt[8]; + EntryFmt file_fmt[8]; + u32 ndir_fmt, nfile_fmt; + u32 ndirs_count, nfiles_count; + u32 i; + LineState st; + + if (cu_idx >= d->ncus) return; + if (d->lines_built[cu_idx]) return; + d->lines_built[cu_idx] = 1; + + cu = &d->cus[cu_idx]; + lp = &d->lines_by_cu[cu_idx]; + if (!cu->has_stmt_list) return; + stmt_off = cu->stmt_list; + if (stmt_off >= d->line.size) return; + + off = stmt_off; + h.unit_length = dw_u32(d->line.data, d->line.size, &off); + if (h.unit_length == 0xffffffffu) return; /* DWARF64 not supported */ + unit_end = off + h.unit_length; + h.version = (u8)dw_u16(d->line.data, d->line.size, &off); + if (h.version != 5) { + /* DW4/3 layout differs. We only support DW5. */ + return; + } + h.address_size = dw_u8(d->line.data, d->line.size, &off); + h.segment_selector_size = dw_u8(d->line.data, d->line.size, &off); + h.header_length = dw_u32(d->line.data, d->line.size, &off); + prog_start = off + h.header_length; + h.min_inst_len = dw_u8(d->line.data, d->line.size, &off); + h.max_ops_per_inst = dw_u8(d->line.data, d->line.size, &off); + h.default_is_stmt = dw_u8(d->line.data, d->line.size, &off); + h.line_base = (i8)dw_u8(d->line.data, d->line.size, &off); + h.line_range = dw_u8(d->line.data, d->line.size, &off); + h.opcode_base = dw_u8(d->line.data, d->line.size, &off); + if (h.line_range == 0) h.line_range = 1; + /* Read standard opcode lengths (opcode_base - 1 of them). */ + { + u32 j; + u32 cnt = h.opcode_base ? h.opcode_base - 1u : 0u; + if (cnt > sizeof(h.std_opcode_lengths)) cnt = sizeof(h.std_opcode_lengths); + for (j = 0; j < cnt; ++j) + h.std_opcode_lengths[j] = dw_u8(d->line.data, d->line.size, &off); + /* Skip any extra opcode-length bytes the header claims. */ + if (h.opcode_base > 1u + sizeof(h.std_opcode_lengths)) { + off += (h.opcode_base - 1u) - (u32)sizeof(h.std_opcode_lengths); + } + } + + /* directories[] */ + ndir_fmt = read_format(d->line.data, d->line.size, &off, dir_fmt, 8); + ndirs_count = (u32)dw_uleb(d->line.data, d->line.size, &off); + if (ndirs_count > 0) { + lp->dirs = (const char**)d->h->alloc( + d->h, ndirs_count * sizeof(const char*), _Alignof(const char*)); + if (lp->dirs) { + lp->ndirs = ndirs_count; + memset(lp->dirs, 0, ndirs_count * sizeof(const char*)); + } + } + for (i = 0; i < ndirs_count; ++i) { + u32 j; + DwAttrValue v; + const char* path = ""; + for (j = 0; j < ndir_fmt; ++j) { + read_lp_form(d, dir_fmt[j].form, h.address_size, &off, &v); + if (dir_fmt[j].content_type == DW_LNCT_path) { + path = v.str ? v.str : ""; + } + } + if (lp->dirs && i < lp->ndirs) lp->dirs[i] = path; + } + + /* file_names[] */ + nfile_fmt = read_format(d->line.data, d->line.size, &off, file_fmt, 8); + nfiles_count = (u32)dw_uleb(d->line.data, d->line.size, &off); + if (nfiles_count > 0) { + lp->files = (DwLineFile*)d->h->alloc( + d->h, nfiles_count * sizeof(DwLineFile), _Alignof(DwLineFile)); + if (lp->files) { + lp->nfiles = nfiles_count; + memset(lp->files, 0, nfiles_count * sizeof(DwLineFile)); + } + } + for (i = 0; i < nfiles_count; ++i) { + u32 j; + DwAttrValue v; + const char* path = ""; + u32 dir_index = 0; + for (j = 0; j < nfile_fmt; ++j) { + read_lp_form(d, file_fmt[j].form, h.address_size, &off, &v); + if (file_fmt[j].content_type == DW_LNCT_path) + path = v.str ? v.str : ""; + else if (file_fmt[j].content_type == DW_LNCT_directory_index) + dir_index = (u32)v.u; + } + if (lp->files && i < lp->nfiles) { + lp->files[i].path = path; + lp->files[i].dir_index = dir_index; + } + } + + /* Build per-file normalized path cache lazily on first query. */ + if (lp->nfiles) { + lp->file_norm = (const char**)d->h->alloc( + d->h, lp->nfiles * sizeof(const char*), _Alignof(const char*)); + if (lp->file_norm) { + lp->nfile_norm = lp->nfiles; + for (i = 0; i < lp->nfiles; ++i) lp->file_norm[i] = NULL; + } + } + + /* program */ + off = prog_start; + state_init(&st, h.default_is_stmt); + while (off < unit_end) { + u8 op = dw_u8(d->line.data, d->line.size, &off); + if (op == 0) { + /* extended opcode */ + u64 elen = dw_uleb(d->line.data, d->line.size, &off); + u32 eop_off = off; + u8 eop; + if (elen == 0 || off + elen > d->line.size) break; + eop = dw_u8(d->line.data, d->line.size, &off); + switch (eop) { + case DW_LNE_end_sequence: + st.end_sequence = 1; + rows_push(d, lp, &st); + state_init(&st, h.default_is_stmt); + break; + case DW_LNE_set_address: + if (h.address_size == 8) + st.address = dw_u64(d->line.data, d->line.size, &off); + else + st.address = dw_u32(d->line.data, d->line.size, &off); + st.op_index = 0; + break; + case DW_LNE_set_discriminator: + st.discriminator = (u32)dw_uleb(d->line.data, d->line.size, &off); + break; + default: + /* Skip unknown extended opcode body. */ + off = eop_off + (u32)elen; + break; + } + /* Sync to the declared end of the extended opcode. */ + off = eop_off + (u32)elen; + } else if (op < h.opcode_base) { + /* standard opcode */ + switch (op) { + case DW_LNS_copy: + rows_push(d, lp, &st); + st.basic_block = 0; + st.prologue_end = 0; + st.epilogue_begin = 0; + st.discriminator = 0; + break; + case DW_LNS_advance_pc: { + u64 adv = dw_uleb(d->line.data, d->line.size, &off); + st.address += adv * h.min_inst_len; + } break; + case DW_LNS_advance_line: { + i64 adv = dw_sleb(d->line.data, d->line.size, &off); + st.line = (u32)((i64)st.line + adv); + } break; + case DW_LNS_set_file: + st.file = (u32)dw_uleb(d->line.data, d->line.size, &off); + break; + case DW_LNS_set_column: + st.column = (u32)dw_uleb(d->line.data, d->line.size, &off); + break; + case DW_LNS_negate_stmt: + st.is_stmt = !st.is_stmt; + break; + case DW_LNS_set_basic_block: + st.basic_block = 1; + break; + case DW_LNS_const_add_pc: { + u8 adj = (u8)(255 - h.opcode_base); + u8 op_adv = (u8)(adj / h.line_range); + st.address += op_adv * h.min_inst_len; + } break; + case DW_LNS_fixed_advance_pc: + st.address += dw_u16(d->line.data, d->line.size, &off); + st.op_index = 0; + break; + case DW_LNS_set_prologue_end: + st.prologue_end = 1; + break; + case DW_LNS_set_epilogue_begin: + st.epilogue_begin = 1; + break; + case DW_LNS_set_isa: + st.isa = (u32)dw_uleb(d->line.data, d->line.size, &off); + break; + default: { + /* Unknown standard opcode: skip its operands per + * std_opcode_lengths. */ + u32 nops = (op - 1u) < sizeof(h.std_opcode_lengths) + ? h.std_opcode_lengths[op - 1] + : 0; + u32 j; + for (j = 0; j < nops; ++j) + (void)dw_uleb(d->line.data, d->line.size, &off); + } break; + } + } else { + /* special opcode */ + u32 adj = (u32)(op - h.opcode_base); + u32 op_adv = adj / h.line_range; + i32 line_inc = (i32)h.line_base + (i32)(adj % h.line_range); + st.address += op_adv * h.min_inst_len; + st.line = (u32)((i32)st.line + line_inc); + rows_push(d, lp, &st); + st.basic_block = 0; + st.prologue_end = 0; + st.epilogue_begin = 0; + st.discriminator = 0; + } + } + + /* Build file_norm lazily. */ + if (lp->file_norm) { + for (i = 0; i < lp->nfiles; ++i) { + lp->file_norm[i] = build_file_norm(d, lp, i); + } + } +} + +/* Lookup helpers. Build all CU line tables on demand, walk each. */ + +int cfree_dwarf_addr_to_line(CfreeDebugInfo* d, uint64_t pc, + const char** file_out, uint32_t* line_out, + uint32_t* col_out) { + /* Return codes: + * 0 — PC has a line entry; outputs filled. + * 1 — PC sits inside a CU's coverage range but no row matched. + * 2 — PC outside every CU's address coverage (e.g. JIT-emitted thunk + * or a frame inside a `.o` linked without `-g`). REPL: "no + * debug info for this frame". */ + u32 i; + int any_in_range = 0; + if (file_out) *file_out = NULL; + if (line_out) *line_out = 0; + if (col_out) *col_out = 0; + if (!d) return 1; + for (i = 0; i < d->ncus; ++i) { + DwLineProgram* lp; + u32 j; + DwLineRow* best = NULL; + uint64_t cu_lo = (uint64_t)-1, cu_hi = 0; + if (!d->lines_built[i]) dw_build_line(d, i); + lp = &d->lines_by_cu[i]; + for (j = 0; j < lp->nrows; ++j) { + DwLineRow* r = &lp->rows[j]; + if (r->address < cu_lo) cu_lo = r->address; + if (r->address > cu_hi) cu_hi = r->address; + if (r->end_sequence) continue; + if (r->address > pc) break; + best = r; + } + if (pc >= cu_lo && pc <= cu_hi) any_in_range = 1; + if (best) { + const char* f = ""; + if (best->file_index < lp->nfile_norm && lp->file_norm) + f = lp->file_norm[best->file_index]; + if (file_out) *file_out = f; + if (line_out) *line_out = best->line; + if (col_out) *col_out = best->column; + return 0; + } + } + return any_in_range ? 1 : 2; +} + +/* file_norm matches user-typed `file` if either it is exactly equal, or it + * ends with `/<file>`. Suffix matching keeps `b util.c:42` working when + * the DWARF file_norm is the absolute path the compiler saw. */ +static int dw_file_matches(const char* file_norm, const char* user, size_t ulen) { + size_t flen; + if (!file_norm) return 0; + if (dw_streq(file_norm, user)) return 1; + flen = strlen(file_norm); + if (flen <= ulen) return 0; + if (file_norm[flen - ulen - 1] != '/') return 0; + return memcmp(file_norm + flen - ulen, user, ulen) == 0; +} + +int cfree_dwarf_line_to_addr(CfreeDebugInfo* d, const char* file, uint32_t line, + uint64_t* pc_out) { + /* Returns: + * 0 — unique match; pc_out filled with that PC. + * 1 — file `file` does not appear in any CU we scanned (per-DWARF.md + * "no data" semantics: caller can format this as "file not + * covered" if it cares to distinguish from a stale line). + * 2 — `file` appears in some CU but no row matches (file, line). + * 3 — ambiguous: more than one distinct PC matches (file, line) via + * suffix. pc_out is filled with the first match so callers that + * don't disambiguate still get a usable PC. Use + * cfree_dwarf_line_to_addr_all to enumerate candidates. */ + /* Ambiguity is keyed on distinct file_norm *paths* matching the + * suffix, not on distinct PCs. Multiple PCs on the same line of the + * same source file are expected (one row per instruction) — they're + * not ambiguity, just line-program granularity. */ + u32 i; + size_t ulen; + const char* first_path = NULL; + uint64_t first_pc = 0; + const char* alt_path = NULL; + int file_seen = 0; + int line_hits = 0; + if (pc_out) *pc_out = 0; + if (!d || !file) return 1; + ulen = strlen(file); + if (ulen == 0) return 1; + for (i = 0; i < d->ncus; ++i) { + DwLineProgram* lp; + u32 j; + if (!d->lines_built[i]) dw_build_line(d, i); + lp = &d->lines_by_cu[i]; + for (j = 0; j < lp->nrows; ++j) { + DwLineRow* r = &lp->rows[j]; + const char* f; + if (r->end_sequence) continue; + if (r->file_index >= lp->nfile_norm || !lp->file_norm) continue; + f = lp->file_norm[r->file_index]; + if (!dw_file_matches(f, file, ulen)) continue; + file_seen = 1; + if (r->line != line) continue; + ++line_hits; + if (!first_path) { + first_path = f; + first_pc = r->address; + } else if (!alt_path && f != first_path && !dw_streq(f, first_path)) { + alt_path = f; + } + } + } + if (pc_out) *pc_out = first_pc; + if (alt_path) return 3; + if (line_hits > 0) return 0; + if (file_seen) return 2; + return 1; +} + +/* Enumerate all distinct candidate (pc, file_norm) pairs for the given + * (file, line) match. Caller-supplied `out` array is filled up to `cap`; + * `*n_out` receives the total candidate count (which may exceed cap, in + * which case only the first `cap` are written). Returns 0 on success + * (including 0 candidates), 1 on invalid args. Intended for REPL + * disambiguation after cfree_dwarf_line_to_addr returns 3. */ +int cfree_dwarf_line_to_addr_all(CfreeDebugInfo* d, const char* file, + uint32_t line, CfreeDwarfLineMatch* out, + uint32_t cap, uint32_t* n_out) { + /* One candidate per distinct file_norm path (not per PC). PC is the + * first matching row's address for that file_norm — i.e. the same PC + * that cfree_dwarf_line_to_addr would have returned for that file. */ + u32 i; + size_t ulen; + uint32_t total = 0; + if (n_out) *n_out = 0; + if (!d || !file) return 1; + ulen = strlen(file); + if (ulen == 0) return 1; + for (i = 0; i < d->ncus; ++i) { + DwLineProgram* lp; + u32 j; + if (!d->lines_built[i]) dw_build_line(d, i); + lp = &d->lines_by_cu[i]; + for (j = 0; j < lp->nrows; ++j) { + DwLineRow* r = &lp->rows[j]; + const char* f; + uint32_t k; + int dup = 0; + if (r->end_sequence) continue; + if (r->line != line) continue; + if (r->file_index >= lp->nfile_norm || !lp->file_norm) continue; + f = lp->file_norm[r->file_index]; + if (!dw_file_matches(f, file, ulen)) continue; + /* Dedupe by file_norm path so the candidate list is one entry per + * source file even if the line has many per-instruction rows. */ + if (out) { + uint32_t lim = total < cap ? total : cap; + for (k = 0; k < lim; ++k) { + if (out[k].file == f || (out[k].file && dw_streq(out[k].file, f))) { + dup = 1; + break; + } + } + } + if (dup) continue; + if (out && total < cap) { + out[total].pc = r->address; + out[total].file = f; + } + ++total; + } + } + if (n_out) *n_out = total; + return 0; +} diff --git a/src/debug/dwarf_loc.c b/src/debug/dwarf_loc.c @@ -0,0 +1,369 @@ +/* dwarf_loc.c — DWARF location-expression evaluator. + * + * Per doc/DWARF.md §4.4: small DWARF stack machine. Supports the ops the + * producer emits: DW_OP_reg0..31, regx, fbreg, addr, call_frame_cfa, plus + * arithmetic. DW_AT_frame_base = DW_OP_call_frame_cfa per §3.6 — the + * caller passes the CFA in via frame->cfa. + */ + +#include <cfree.h> +#include <stdint.h> +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" +#include "debug/dwarf_internal.h" + +/* Tiny stack machine state. */ +typedef struct ExprMachine { + i64 stack[64]; + int sp; /* points to next free slot; top is stack[sp-1] */ + int reg_result; + u32 reg_num; /* if reg_result, holds the register number */ + int stack_value; /* DW_OP_stack_value seen */ +} ExprMachine; + +static int push(ExprMachine* m, i64 v) { + if (m->sp >= (int)(sizeof(m->stack) / sizeof(m->stack[0]))) return 0; + m->stack[m->sp++] = v; + return 1; +} +static int pop(ExprMachine* m, i64* v) { + if (m->sp == 0) return 0; + *v = m->stack[--m->sp]; + return 1; +} + +/* Evaluate either DW_AT_frame_base (when we encounter DW_OP_fbreg) or + * the inlined block; reuses the same machinery. Returns 0 on success. */ +static int eval_one(CfreeDebugInfo* d, const u8* expr, u32 len, + const u8* fb_expr, u32 fb_len, + const CfreeUnwindFrame* frame, ExprMachine* m, + int allow_fbreg) { + u32 off = 0; + while (off < len) { + u8 op = expr[off++]; + if (op >= DW_OP_lit0 && op <= DW_OP_lit0 + 31) { + if (!push(m, op - DW_OP_lit0)) return 1; + } else if (op >= DW_OP_reg0 && op <= DW_OP_reg0 + 31) { + m->reg_result = 1; + m->reg_num = op - DW_OP_reg0; + return 0; + } else if (op >= DW_OP_breg0 && op <= DW_OP_breg0 + 31) { + i64 ofs = dw_sleb(expr, len, &off); + u32 r = op - DW_OP_breg0; + i64 v = (r < 32) ? (i64)frame->regs[r] : 0; + if (!push(m, v + ofs)) return 1; + } else { + switch (op) { + case DW_OP_addr: + /* Address of a global. Address-size depends on CU; assume 8. */ + if (off + 8 > len) return 1; + { + u64 a = dw_u64(expr, len, &off); + if (!push(m, (i64)a)) return 1; + } + break; + case DW_OP_const1u: + if (off + 1 > len) return 1; + if (!push(m, expr[off++])) return 1; + break; + case DW_OP_const1s: + if (off + 1 > len) return 1; + if (!push(m, (i8)expr[off++])) return 1; + break; + case DW_OP_const2u: { + if (!push(m, dw_u16(expr, len, &off))) return 1; + } break; + case DW_OP_const2s: { + u16 v = dw_u16(expr, len, &off); + if (!push(m, (i16)v)) return 1; + } break; + case DW_OP_const4u: { + if (!push(m, dw_u32(expr, len, &off))) return 1; + } break; + case DW_OP_const4s: { + u32 v = dw_u32(expr, len, &off); + if (!push(m, (i32)v)) return 1; + } break; + case DW_OP_const8u: + case DW_OP_const8s: { + u64 v = dw_u64(expr, len, &off); + if (!push(m, (i64)v)) return 1; + } break; + case DW_OP_constu: { + u64 v = dw_uleb(expr, len, &off); + if (!push(m, (i64)v)) return 1; + } break; + case DW_OP_consts: { + i64 v = dw_sleb(expr, len, &off); + if (!push(m, v)) return 1; + } break; + case DW_OP_dup: { + i64 v; + if (m->sp == 0) return 1; + v = m->stack[m->sp - 1]; + if (!push(m, v)) return 1; + } break; + case DW_OP_drop: { + i64 v; + if (!pop(m, &v)) return 1; + } break; + case DW_OP_and: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, a & b)) return 1; + } break; + case DW_OP_minus: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, a - b)) return 1; + } break; + case DW_OP_mul: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, a * b)) return 1; + } break; + case DW_OP_or: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, a | b)) return 1; + } break; + case DW_OP_plus: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, a + b)) return 1; + } break; + case DW_OP_plus_uconst: { + u64 c = dw_uleb(expr, len, &off); + i64 a; + if (!pop(m, &a)) return 1; + if (!push(m, a + (i64)c)) return 1; + } break; + case DW_OP_shl: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, (i64)((u64)a << (b & 63)))) return 1; + } break; + case DW_OP_shr: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, (i64)((u64)a >> (b & 63)))) return 1; + } break; + case DW_OP_shra: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, a >> (b & 63))) return 1; + } break; + case DW_OP_xor: { + i64 a, b; + if (!pop(m, &b) || !pop(m, &a)) return 1; + if (!push(m, a ^ b)) return 1; + } break; + case DW_OP_regx: { + u64 r = dw_uleb(expr, len, &off); + m->reg_result = 1; + m->reg_num = (u32)r; + return 0; + } + case DW_OP_bregx: { + u64 r = dw_uleb(expr, len, &off); + i64 ofs = dw_sleb(expr, len, &off); + i64 v = (r < 32) ? (i64)frame->regs[r] : 0; + if (!push(m, v + ofs)) return 1; + } break; + case DW_OP_fbreg: { + i64 ofs = dw_sleb(expr, len, &off); + if (!allow_fbreg) return 1; + /* Evaluate frame_base expression to get the CFA-equivalent base. */ + { + ExprMachine fbm; + i64 base = 0; + int rc; + memset(&fbm, 0, sizeof(fbm)); + if (fb_expr && fb_len > 0) { + rc = eval_one(d, fb_expr, fb_len, NULL, 0, frame, &fbm, 0); + if (rc != 0) return rc; + if (fbm.sp > 0) + base = fbm.stack[fbm.sp - 1]; + else if (fbm.reg_result) { + /* Frame base lives in a register — value is reg contents. */ + base = (fbm.reg_num < 32) ? (i64)frame->regs[fbm.reg_num] : 0; + } + } else { + base = (i64)frame->cfa; + } + if (!push(m, base + ofs)) return 1; + } + } break; + case DW_OP_call_frame_cfa: { + if (!push(m, (i64)frame->cfa)) return 1; + } break; + case DW_OP_stack_value: + m->stack_value = 1; + return 0; + default: + /* Unsupported op — give up. */ + return 1; + } + } + } + return 0; +} + +/* Resolve a loclistx index to the active entry for `pc`. + * + * Per DWARF 5: DW_AT_loclists_base on the CU points at the offset_entries + * array within .debug_loclists. offset_entries[idx] is a 4-byte value (in + * 32-bit DWARF) giving the byte offset (relative to loclists_base) of the + * matching location list. Each list is a sequence of LLE entries + * terminated by DW_LLE_end_of_list. We recognize at minimum: + * DW_LLE_offset_pair (relative to base address) + * DW_LLE_start_length (absolute) + * DW_LLE_start_end (absolute) + * DW_LLE_default_location + * DW_LLE_base_address (sets the base for offset_pair) + * DW_LLE_base_addressx / DW_LLE_startx_* — degraded (skipped; need + * .debug_addr resolution we don't yet model). + */ +int dw_loclist_resolve(CfreeDebugInfo* d, const DwCu* cu, u64 idx, u64 pc, + const u8** bytes_out, u32* len_out) { + u32 base; + u32 entry_off; + u32 list_off; + u64 base_addr = 0; + if (!d || !cu) return 0; + if (d->loclists.sec_idx == UINT32_MAX || d->loclists.size == 0) return 0; + base = cu->loclists_base; + /* DW_AT_loclists_base points to the start of the offset_entries table + * for the CU (i.e. just past the header). offset_entries[i] is a + * 4-byte (32-bit DWARF) value, the byte offset (relative to base) of + * the matching location list. */ + entry_off = base + (u32)idx * 4u; + if (entry_off + 4 > d->loclists.size) return 0; + { + u32 t = entry_off; + list_off = dw_u32(d->loclists.data, d->loclists.size, &t); + } + /* The entry value is an offset relative to `base`. */ + list_off += base; + if (list_off >= d->loclists.size) return 0; + /* Walk the list. */ + { + u32 off = list_off; + while (off < d->loclists.size) { + u8 lle = dw_u8(d->loclists.data, d->loclists.size, &off); + switch (lle) { + case DW_LLE_end_of_list: + return 0; + case DW_LLE_base_address: { + if (cu->address_size == 8) + base_addr = dw_u64(d->loclists.data, d->loclists.size, &off); + else + base_addr = dw_u32(d->loclists.data, d->loclists.size, &off); + } break; + case DW_LLE_offset_pair: { + u64 lo = dw_uleb(d->loclists.data, d->loclists.size, &off); + u64 hi = dw_uleb(d->loclists.data, d->loclists.size, &off); + u32 elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); + const u8* eb = d->loclists.data + off; + off += elen; + if (pc >= base_addr + lo && pc < base_addr + hi) { + *bytes_out = eb; + *len_out = elen; + return 1; + } + } break; + case DW_LLE_start_end: { + u64 lo, hi; + u32 elen; + const u8* eb; + if (cu->address_size == 8) { + lo = dw_u64(d->loclists.data, d->loclists.size, &off); + hi = dw_u64(d->loclists.data, d->loclists.size, &off); + } else { + lo = dw_u32(d->loclists.data, d->loclists.size, &off); + hi = dw_u32(d->loclists.data, d->loclists.size, &off); + } + elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); + eb = d->loclists.data + off; + off += elen; + if (pc >= lo && pc < hi) { + *bytes_out = eb; + *len_out = elen; + return 1; + } + } break; + case DW_LLE_start_length: { + u64 lo, length; + u32 elen; + const u8* eb; + if (cu->address_size == 8) + lo = dw_u64(d->loclists.data, d->loclists.size, &off); + else + lo = dw_u32(d->loclists.data, d->loclists.size, &off); + length = dw_uleb(d->loclists.data, d->loclists.size, &off); + elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); + eb = d->loclists.data + off; + off += elen; + if (pc >= lo && pc < lo + length) { + *bytes_out = eb; + *len_out = elen; + return 1; + } + } break; + case DW_LLE_default_location: { + u32 elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); + const u8* eb = d->loclists.data + off; + off += elen; + *bytes_out = eb; + *len_out = elen; + return 1; + } + case DW_LLE_base_addressx: { + (void)dw_uleb(d->loclists.data, d->loclists.size, &off); + /* unsupported: needs .debug_addr indirection */ + } break; + case DW_LLE_startx_endx: + case DW_LLE_startx_length: { + (void)dw_uleb(d->loclists.data, d->loclists.size, &off); + (void)dw_uleb(d->loclists.data, d->loclists.size, &off); + { + u32 elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); + off += elen; + } + } break; + default: + /* Unknown LLE — stop. */ + return 0; + } + } + } + return 0; +} + +int dw_eval_expr(CfreeDebugInfo* d, const u8* expr, u32 len, const u8* fb_expr, + u32 fb_len, const CfreeUnwindFrame* frame, DwExprResult* out) { + ExprMachine m; + int rc; + memset(&m, 0, sizeof(m)); + out->kind = 3; + out->value = 0; + if (!expr || len == 0 || !frame) return 1; + rc = eval_one(d, expr, len, fb_expr, fb_len, frame, &m, 1); + if (rc != 0) return rc; + if (m.reg_result) { + out->kind = 2; + out->value = m.reg_num; + return 0; + } + if (m.sp == 0) return 1; + if (m.stack_value) { + out->kind = 1; + out->value = (u64)m.stack[m.sp - 1]; + return 0; + } + out->kind = 0; + out->value = (u64)m.stack[m.sp - 1]; + return 0; +} diff --git a/src/debug/dwarf_open.c b/src/debug/dwarf_open.c @@ -0,0 +1,750 @@ +/* dwarf_open.c — open/close, section lookup, primitives, abbrev cache. + * + * Per doc/DWARF.md §4.1: read .debug_abbrev / .debug_info / .debug_line / + * .debug_str / .debug_line_str by section name from the CfreeObjFile. + * Return NULL if any of those mandatory five are missing. + */ + +#include <cfree.h> +#include <stdint.h> +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/util.h" +#include "core/vec.h" +#include "debug/dwarf_internal.h" + +/* ---- section lookup --------------------------------------------------- */ + +void dw_find_section(CfreeDebugInfo* d, const char* name, DwSection* out) { + uint32_t i, n; + out->data = NULL; + out->size = 0; + out->sec_idx = UINT32_MAX; + if (!d->obj) return; + n = cfree_obj_nsections(d->obj); + for (i = 0; i < n; ++i) { + CfreeObjSecInfo info = cfree_obj_section(d->obj, i); + if (info.name && dw_streq(info.name, name)) { + size_t len = 0; + const uint8_t* p = cfree_obj_section_data(d->obj, i, &len); + out->data = p; + out->size = (u32)len; + out->sec_idx = i; + return; + } + } +} + +/* ---- byte-stream primitives ------------------------------------------- */ + +/* On EOF we return zero / empty. The decoder will detect malformed input + * via length checks elsewhere; for the consumer we just want to not + * crash on truncated bytes. */ + +u8 dw_u8(const u8* base, u32 size, u32* off) { + if (*off >= size) return 0; + return base[(*off)++]; +} +u16 dw_u16(const u8* base, u32 size, u32* off) { + u16 v; + if (*off + 2 > size) { + *off = size; + return 0; + } + v = (u16)base[*off] | ((u16)base[*off + 1] << 8); + *off += 2; + return v; +} +u32 dw_u24(const u8* base, u32 size, u32* off) { + u32 v; + if (*off + 3 > size) { + *off = size; + return 0; + } + v = (u32)base[*off] | ((u32)base[*off + 1] << 8) | + ((u32)base[*off + 2] << 16); + *off += 3; + return v; +} +u32 dw_u32(const u8* base, u32 size, u32* off) { + u32 v; + if (*off + 4 > size) { + *off = size; + return 0; + } + v = (u32)base[*off] | ((u32)base[*off + 1] << 8) | + ((u32)base[*off + 2] << 16) | ((u32)base[*off + 3] << 24); + *off += 4; + return v; +} +u64 dw_u64(const u8* base, u32 size, u32* off) { + u64 v; + if (*off + 8 > size) { + *off = size; + return 0; + } + v = (u64)base[*off] | ((u64)base[*off + 1] << 8) | + ((u64)base[*off + 2] << 16) | ((u64)base[*off + 3] << 24) | + ((u64)base[*off + 4] << 32) | ((u64)base[*off + 5] << 40) | + ((u64)base[*off + 6] << 48) | ((u64)base[*off + 7] << 56); + *off += 8; + return v; +} +u64 dw_uleb(const u8* base, u32 size, u32* off) { + u64 v = 0; + int shift = 0; + while (*off < size) { + u8 b = base[(*off)++]; + v |= ((u64)(b & 0x7f)) << shift; + if (!(b & 0x80)) break; + shift += 7; + if (shift > 63) break; + } + return v; +} +i64 dw_sleb(const u8* base, u32 size, u32* off) { + i64 v = 0; + int shift = 0; + u8 b = 0; + while (*off < size) { + b = base[(*off)++]; + v |= ((i64)(b & 0x7f)) << shift; + shift += 7; + if (!(b & 0x80)) break; + if (shift > 63) break; + } + if (shift < 64 && (b & 0x40)) { + v |= -((i64)1 << shift); + } + return v; +} +const char* dw_cstr(const u8* base, u32 size, u32* off) { + const char* s = (const char*)base + *off; + while (*off < size && base[*off] != 0) (*off)++; + if (*off < size) (*off)++; /* consume terminator */ + return s; +} + +/* ---- string interning ------------------------------------------------- */ + +const char* dw_intern(CfreeDebugInfo* d, const char* s, size_t len) { + Sym sym = pool_intern(d->c->global, s, len); + return pool_str(d->c->global, sym, NULL); +} + +/* Resolve a .debug_str offset. */ +const char* dw_str(CfreeDebugInfo* d, u32 offset) { + if (offset >= d->str.size) return ""; + return (const char*)(d->str.data + offset); +} + +/* Resolve a .debug_line_str offset. */ +const char* dw_line_str(CfreeDebugInfo* d, u32 offset) { + if (offset >= d->line_str.size) return ""; + return (const char*)(d->line_str.data + offset); +} + +/* Resolve a strx index via .debug_str_offsets + cu->str_offsets_base. */ +const char* dw_strx(CfreeDebugInfo* d, const DwCu* cu, u64 idx) { + /* DW5 .debug_str_offsets has a header per contribution: + * unit_length (4 or 12), version (2), padding (2), then entries. + * cu->str_offsets_base points past the header to the first entry. + * If the base attribute is absent we fall back to base=0+8 (assume 32-bit + * header at start). */ + u32 base = cu->str_offsets_base; + u32 ent_size = 4; + u32 entry_off = base + (u32)idx * ent_size; + u32 str_off; + if (entry_off + ent_size > d->str_offsets.size) return ""; + { + u32 tmp = entry_off; + str_off = dw_u32(d->str_offsets.data, d->str_offsets.size, &tmp); + } + return dw_str(d, str_off); +} + +/* ---- abbrev parsing --------------------------------------------------- */ + +static void abbrev_parse_table(CfreeDebugInfo* d, u32 offset, + DwAbbrevTable* t) { + u32 off = offset; + t->cu_abbrev_offset = offset; + t->abbrevs = NULL; + t->nabbrevs = 0; + t->cap = 0; + for (;;) { + u64 code; + DwAbbrev a; + DwAbbrevAttr* attrs = NULL; + u32 nattrs = 0, attrs_cap = 0; + if (off >= d->abbrev.size) break; + code = dw_uleb(d->abbrev.data, d->abbrev.size, &off); + if (code == 0) break; /* end-of-table marker */ + a.code = code; + a.tag = (u32)dw_uleb(d->abbrev.data, d->abbrev.size, &off); + a.has_children = dw_u8(d->abbrev.data, d->abbrev.size, &off); + a.attrs = NULL; + a.nattrs = 0; + /* Read (attr, form) pairs until (0,0). */ + for (;;) { + u32 at = (u32)dw_uleb(d->abbrev.data, d->abbrev.size, &off); + u32 fm = (u32)dw_uleb(d->abbrev.data, d->abbrev.size, &off); + i64 ic = 0; + if (at == 0 && fm == 0) break; + if (fm == DW_FORM_implicit_const) { + ic = dw_sleb(d->abbrev.data, d->abbrev.size, &off); + } + if (nattrs == attrs_cap) { + u32 ncap = attrs_cap ? attrs_cap * 2 : 4; + DwAbbrevAttr* na = (DwAbbrevAttr*)d->h->realloc( + d->h, attrs, attrs_cap * sizeof(*attrs), ncap * sizeof(*attrs), + _Alignof(DwAbbrevAttr)); + if (!na) { + if (attrs) d->h->free(d->h, attrs, attrs_cap * sizeof(*attrs)); + attrs = NULL; + attrs_cap = 0; + nattrs = 0; + break; + } + attrs = na; + attrs_cap = ncap; + } + attrs[nattrs].attr = at; + attrs[nattrs].form = fm; + attrs[nattrs].implicit_const = ic; + nattrs++; + } + a.attrs = attrs; + a.nattrs = nattrs; + if (t->nabbrevs == t->cap) { + u32 ncap = t->cap ? t->cap * 2 : 8; + DwAbbrev* na = (DwAbbrev*)d->h->realloc( + d->h, t->abbrevs, t->cap * sizeof(*t->abbrevs), + ncap * sizeof(*t->abbrevs), _Alignof(DwAbbrev)); + if (!na) break; + t->abbrevs = na; + t->cap = ncap; + } + t->abbrevs[t->nabbrevs++] = a; + } +} + +DwAbbrevTable* dw_abbrev_get(CfreeDebugInfo* d, u32 offset) { + u32 i; + DwAbbrevTable* t; + for (i = 0; i < d->nabbrevs; ++i) { + if (d->abbrevs[i].cu_abbrev_offset == offset) return &d->abbrevs[i]; + } + if (d->nabbrevs == d->abbrevs_cap) { + u32 ncap = d->abbrevs_cap ? d->abbrevs_cap * 2 : 4; + DwAbbrevTable* na = (DwAbbrevTable*)d->h->realloc( + d->h, d->abbrevs, d->abbrevs_cap * sizeof(*d->abbrevs), + ncap * sizeof(*d->abbrevs), _Alignof(DwAbbrevTable)); + if (!na) return NULL; + d->abbrevs = na; + d->abbrevs_cap = ncap; + } + t = &d->abbrevs[d->nabbrevs++]; + abbrev_parse_table(d, offset, t); + return t; +} + +DwAbbrev* dw_abbrev_lookup(DwAbbrevTable* t, u64 code) { + u32 i; + if (!t) return NULL; + for (i = 0; i < t->nabbrevs; ++i) { + if (t->abbrevs[i].code == code) return &t->abbrevs[i]; + } + return NULL; +} + +/* ---- CU header parsing ----------------------------------------------- */ + +u32 dw_cu_parse_header(CfreeDebugInfo* d, u32 off, DwCu* cu) { + u32 start = off; + u32 unit_length; + u32 hdr_after_len_off; + cu->hdr_offset = start; + cu->is_64bit = 0; + unit_length = dw_u32(d->info.data, d->info.size, &off); + if (unit_length == 0xffffffffu) { + /* DWARF64 — initial length followed by 8-byte length. We don't + * fully support DWARF64 ourselves, but skip the unit. */ + cu->is_64bit = 1; + cu->hdr_length = 0; + cu->unit_total_size = 0; + /* Skip past CU. */ + { + u64 ulen = dw_u64(d->info.data, d->info.size, &off); + cu->unit_total_size = 12 + (u32)ulen; + } + return start + cu->unit_total_size; + } + cu->hdr_length = unit_length; + cu->unit_total_size = 4 + unit_length; + hdr_after_len_off = off; /* points just past unit_length */ + cu->version = (u8)dw_u16(d->info.data, d->info.size, &off); + if (cu->version >= 5) { + cu->unit_type = dw_u8(d->info.data, d->info.size, &off); + cu->address_size = dw_u8(d->info.data, d->info.size, &off); + cu->abbrev_offset = dw_u32(d->info.data, d->info.size, &off); + } else { + /* DW4 layout: abbrev_offset, address_size. */ + cu->unit_type = 0; + cu->abbrev_offset = dw_u32(d->info.data, d->info.size, &off); + cu->address_size = dw_u8(d->info.data, d->info.size, &off); + } + cu->die_start_off = off; + cu->str_offsets_base = 0; + cu->addr_base = 0; + cu->loclists_base = 0; + cu->rnglists_base = 0; + cu->stmt_list = 0; + cu->has_stmt_list = 0; + cu->comp_dir = ""; + cu->name = ""; + /* Resolve abbrev table now (cheap & idempotent). */ + { + DwAbbrevTable* t = dw_abbrev_get(d, cu->abbrev_offset); + cu->abbrev_table_idx = (u32)(t ? (t - d->abbrevs) : 0); + } + (void)hdr_after_len_off; + return start + cu->unit_total_size; +} + +/* Read the CU root DIE to capture base attributes (str_offsets_base, + * addr_base, stmt_list, name, comp_dir). Restores no state — leaves the + * CU in its parsed-header form. */ +static void cu_read_root_attrs(CfreeDebugInfo* d, DwCu* cu) { + u32 off = cu->die_start_off; + u64 code; + DwAbbrev* ab; + DwAttrValue v; + u32 i; + DwAbbrevTable* t = &d->abbrevs[cu->abbrev_table_idx]; + if (off >= d->info.size) return; + code = dw_uleb(d->info.data, d->info.size, &off); + if (code == 0) return; + ab = dw_abbrev_lookup(t, code); + if (!ab) return; + /* First pass: pull str_offsets_base if present (so subsequent strx + * resolutions work). */ + for (i = 0; i < ab->nattrs; ++i) { + DwAbbrevAttr* aa = &ab->attrs[i]; + if (aa->attr == DW_AT_str_offsets_base) { + u32 tmp = off; + /* Skip preceding attrs to locate this attr's payload — easier + * to do a full pass and remember offsets. We re-scan instead. */ + (void)tmp; + break; + } + } + /* Two-pass scan: do skipping reads, but capture base attrs. We must + * be careful: dw_read_form for strx forms uses cu->str_offsets_base, + * so we read in two passes. */ + off = cu->die_start_off; + (void)dw_uleb(d->info.data, d->info.size, &off); /* re-skip code */ + /* Pass 1: only read str_offsets_base / addr_base (forms that don't + * themselves need those bases). */ + for (i = 0; i < ab->nattrs; ++i) { + DwAbbrevAttr* aa = &ab->attrs[i]; + if (aa->attr == DW_AT_str_offsets_base || aa->attr == DW_AT_addr_base || + aa->attr == DW_AT_loclists_base || aa->attr == DW_AT_rnglists_base) { + dw_read_form(d, cu, aa->form, aa->implicit_const, &off, &v); + if (aa->attr == DW_AT_str_offsets_base) + cu->str_offsets_base = (u32)v.u; + else if (aa->attr == DW_AT_addr_base) + cu->addr_base = (u32)v.u; + else if (aa->attr == DW_AT_loclists_base) + cu->loclists_base = (u32)v.u; + else if (aa->attr == DW_AT_rnglists_base) + cu->rnglists_base = (u32)v.u; + } else { + dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); + } + } + /* Pass 2: read remaining attrs (stmt_list, name, comp_dir). */ + off = cu->die_start_off; + (void)dw_uleb(d->info.data, d->info.size, &off); + for (i = 0; i < ab->nattrs; ++i) { + DwAbbrevAttr* aa = &ab->attrs[i]; + if (aa->attr == DW_AT_stmt_list) { + dw_read_form(d, cu, aa->form, aa->implicit_const, &off, &v); + cu->stmt_list = (u32)v.u; + cu->has_stmt_list = 1; + } else if (aa->attr == DW_AT_name) { + dw_read_form(d, cu, aa->form, aa->implicit_const, &off, &v); + cu->name = v.str ? v.str : ""; + } else if (aa->attr == DW_AT_comp_dir) { + dw_read_form(d, cu, aa->form, aa->implicit_const, &off, &v); + cu->comp_dir = v.str ? v.str : ""; + } else { + dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); + } + } +} + +void dw_parse_all_cus(CfreeDebugInfo* d) { + u32 off = 0; + while (off < d->info.size) { + DwCu cu; + u32 next = dw_cu_parse_header(d, off, &cu); + if (next <= off) break; + if (cu.is_64bit) { + off = next; + continue; + } + if (cu.version < 2 || cu.version > 5) { + off = next; + continue; + } + if (d->ncus == d->cus_cap) { + u32 ncap = d->cus_cap ? d->cus_cap * 2 : 4; + DwCu* na = + (DwCu*)d->h->realloc(d->h, d->cus, d->cus_cap * sizeof(*d->cus), + ncap * sizeof(*d->cus), _Alignof(DwCu)); + if (!na) break; + d->cus = na; + d->cus_cap = ncap; + } + d->cus[d->ncus++] = cu; + /* Capture root attrs now. */ + cu_read_root_attrs(d, &d->cus[d->ncus - 1]); + off = next; + } +} + +DwCu* dw_cu_at_die_offset(CfreeDebugInfo* d, u32 die_offset) { + u32 i; + for (i = 0; i < d->ncus; ++i) { + DwCu* cu = &d->cus[i]; + if (die_offset >= cu->hdr_offset && + die_offset < cu->hdr_offset + cu->unit_total_size) { + return cu; + } + } + return NULL; +} + +/* ---- form decoding ---------------------------------------------------- */ + +void dw_read_form(CfreeDebugInfo* d, const DwCu* cu, u32 form, + i64 implicit_const, u32* off, DwAttrValue* out) { + out->form = form; + out->u = 0; + out->s = 0; + out->str = ""; + out->block = NULL; + out->block_len = 0; + switch (form) { + case DW_FORM_addr: + if (cu->address_size == 8) + out->u = dw_u64(d->info.data, d->info.size, off); + else + out->u = dw_u32(d->info.data, d->info.size, off); + break; + case DW_FORM_data1: + case DW_FORM_ref1: + case DW_FORM_flag: + case DW_FORM_strx1: + case DW_FORM_addrx1: + out->u = dw_u8(d->info.data, d->info.size, off); + out->s = (i64)(i8)out->u; + if (form == DW_FORM_strx1) out->str = dw_strx(d, cu, out->u); + break; + case DW_FORM_data2: + case DW_FORM_ref2: + case DW_FORM_strx2: + case DW_FORM_addrx2: + out->u = dw_u16(d->info.data, d->info.size, off); + out->s = (i64)(i16)out->u; + if (form == DW_FORM_strx2) out->str = dw_strx(d, cu, out->u); + break; + case DW_FORM_strx3: + case DW_FORM_addrx3: + out->u = dw_u24(d->info.data, d->info.size, off); + if (form == DW_FORM_strx3) out->str = dw_strx(d, cu, out->u); + break; + case DW_FORM_data4: + case DW_FORM_ref4: + case DW_FORM_strx4: + case DW_FORM_addrx4: + out->u = dw_u32(d->info.data, d->info.size, off); + out->s = (i64)(i32)out->u; + if (form == DW_FORM_strx4) out->str = dw_strx(d, cu, out->u); + break; + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + case DW_FORM_ref_sup8: + out->u = dw_u64(d->info.data, d->info.size, off); + out->s = (i64)out->u; + break; + case DW_FORM_data16: + /* Skip 16 bytes; not commonly needed. */ + *off += 16; + break; + case DW_FORM_sdata: + out->s = dw_sleb(d->info.data, d->info.size, off); + out->u = (u64)out->s; + break; + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + out->u = dw_uleb(d->info.data, d->info.size, off); + if (form == DW_FORM_strx) out->str = dw_strx(d, cu, out->u); + break; + case DW_FORM_string: + out->str = dw_cstr(d->info.data, d->info.size, off); + break; + case DW_FORM_strp: + out->u = dw_u32(d->info.data, d->info.size, off); + out->str = dw_str(d, (u32)out->u); + break; + case DW_FORM_line_strp: + out->u = dw_u32(d->info.data, d->info.size, off); + out->str = dw_line_str(d, (u32)out->u); + break; + case DW_FORM_strp_sup: + case DW_FORM_ref_sup4: + out->u = dw_u32(d->info.data, d->info.size, off); + break; + case DW_FORM_sec_offset: + out->u = dw_u32(d->info.data, d->info.size, off); + break; + case DW_FORM_ref_addr: + /* DWARF 5: 4 bytes for 32-bit DWARF (we don't support DWARF64). */ + out->u = dw_u32(d->info.data, d->info.size, off); + break; + case DW_FORM_flag_present: + out->u = 1; + break; + case DW_FORM_implicit_const: + out->s = implicit_const; + out->u = (u64)implicit_const; + break; + case DW_FORM_block1: { + u32 n = dw_u8(d->info.data, d->info.size, off); + out->block = d->info.data + *off; + out->block_len = n; + out->u = n; + *off += n; + } break; + case DW_FORM_block2: { + u32 n = dw_u16(d->info.data, d->info.size, off); + out->block = d->info.data + *off; + out->block_len = n; + out->u = n; + *off += n; + } break; + case DW_FORM_block4: { + u32 n = dw_u32(d->info.data, d->info.size, off); + out->block = d->info.data + *off; + out->block_len = n; + out->u = n; + *off += n; + } break; + case DW_FORM_block: + case DW_FORM_exprloc: { + u32 n = (u32)dw_uleb(d->info.data, d->info.size, off); + out->block = d->info.data + *off; + out->block_len = n; + out->u = n; + *off += n; + } break; + case DW_FORM_indirect: { + u32 ifrm = (u32)dw_uleb(d->info.data, d->info.size, off); + dw_read_form(d, cu, ifrm, 0, off, out); + } break; + default: + /* Unknown form — best effort: skip nothing. */ + break; + } +} + +void dw_skip_form(CfreeDebugInfo* d, const DwCu* cu, u32 form, + i64 implicit_const, u32* off) { + DwAttrValue tmp; + dw_read_form(d, cu, form, implicit_const, off, &tmp); +} + +/* ---- DIE iteration ---------------------------------------------------- */ + +int dw_read_die(CfreeDebugInfo* d, const DwCu* cu, u32* off, DwDie* out) { + u64 code; + out->die_off = *off; + if (*off >= d->info.size || *off >= cu->hdr_offset + cu->unit_total_size) { + out->abbrev_code = 0; + out->abbrev = NULL; + out->attrs_off = *off; + return 0; + } + code = dw_uleb(d->info.data, d->info.size, off); + out->abbrev_code = code; + out->attrs_off = *off; + out->next_sibling_off = 0; + if (code == 0) { + out->abbrev = NULL; + return 0; + } + out->abbrev = dw_abbrev_lookup(&d->abbrevs[cu->abbrev_table_idx], code); + return 1; +} + +void dw_skip_die_attrs(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, + u32* off) { + u32 i; + if (!die->abbrev) return; + for (i = 0; i < die->abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die->abbrev->attrs[i]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, off); + } +} + +void dw_skip_die_subtree(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, + u32* off) { + if (!die->abbrev) return; + dw_skip_die_attrs(d, cu, die, off); + if (die->abbrev->has_children) { + for (;;) { + DwDie child; + if (!dw_read_die(d, cu, off, &child)) break; + dw_skip_die_subtree(d, cu, &child, off); + } + } +} + +int dw_die_attr(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, u32 attr, + DwAttrValue* out) { + u32 off = die->attrs_off; + u32 i; + if (!die->abbrev) return 0; + for (i = 0; i < die->abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die->abbrev->attrs[i]; + if (aa->attr == attr) { + dw_read_form(d, cu, aa->form, aa->implicit_const, &off, out); + return 1; + } + dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); + } + return 0; +} + +/* ---- public open/close ----------------------------------------------- */ + +CfreeDebugInfo* cfree_dwarf_open(CfreeCompiler* c, const CfreeObjFile* obj) { + Heap* h; + CfreeDebugInfo* d; + if (!c || !obj) return NULL; + h = (Heap*)c->env->heap; + d = (CfreeDebugInfo*)h->alloc(h, sizeof(*d), _Alignof(CfreeDebugInfo)); + if (!d) return NULL; + memset(d, 0, sizeof(*d)); + d->c = c; + d->h = h; + d->obj = obj; + + dw_find_section(d, ".debug_abbrev", &d->abbrev); + dw_find_section(d, ".debug_info", &d->info); + dw_find_section(d, ".debug_line", &d->line); + dw_find_section(d, ".debug_str", &d->str); + dw_find_section(d, ".debug_line_str", &d->line_str); + dw_find_section(d, ".debug_str_offsets", &d->str_offsets); + dw_find_section(d, ".debug_addr", &d->addr); + dw_find_section(d, ".debug_loclists", &d->loclists); + dw_find_section(d, ".debug_rnglists", &d->rnglists); + dw_find_section(d, ".eh_frame", &d->eh_frame); + dw_find_section(d, ".debug_aranges", &d->aranges); + + if (d->abbrev.sec_idx == UINT32_MAX || d->info.sec_idx == UINT32_MAX || + d->line.sec_idx == UINT32_MAX || d->str.sec_idx == UINT32_MAX || + d->line_str.sec_idx == UINT32_MAX) { + cfree_dwarf_close(d); + return NULL; + } + + /* str_offsets_base default: in the absence of DW_AT_str_offsets_base, the + * offsets section starts with an 8-byte header (uniform for DW5). */ + dw_parse_all_cus(d); + if (d->ncus == 0) { + cfree_dwarf_close(d); + return NULL; + } + + /* Allocate per-CU lazy line-program state. */ + if (d->ncus) { + d->lines_by_cu = (DwLineProgram*)h->alloc( + h, d->ncus * sizeof(DwLineProgram), _Alignof(DwLineProgram)); + d->lines_built = (u8*)h->alloc(h, d->ncus, 1); + if (!d->lines_by_cu || !d->lines_built) { + cfree_dwarf_close(d); + return NULL; + } + memset(d->lines_by_cu, 0, d->ncus * sizeof(DwLineProgram)); + memset(d->lines_built, 0, d->ncus); + } + + return d; +} + +static void free_subprog(Heap* h, DwSubprog* sp) { + if (sp->params) h->free(h, sp->params, sp->nparams * sizeof(DwLocal)); + if (sp->locals) h->free(h, sp->locals, sp->nlocals * sizeof(DwLocal)); +} + +void cfree_dwarf_close(CfreeDebugInfo* d) { + Heap* h; + u32 i; + if (!d) return; + h = d->h; + for (i = 0; i < d->nabbrevs; ++i) { + u32 j; + DwAbbrevTable* t = &d->abbrevs[i]; + for (j = 0; j < t->nabbrevs; ++j) { + if (t->abbrevs[j].attrs) + h->free(h, t->abbrevs[j].attrs, + t->abbrevs[j].nattrs * sizeof(DwAbbrevAttr)); + } + if (t->abbrevs) h->free(h, t->abbrevs, t->cap * sizeof(DwAbbrev)); + } + if (d->abbrevs) + h->free(h, d->abbrevs, d->abbrevs_cap * sizeof(DwAbbrevTable)); + if (d->cus) h->free(h, d->cus, d->cus_cap * sizeof(DwCu)); + + if (d->lines_by_cu) { + for (i = 0; i < d->ncus; ++i) { + DwLineProgram* lp = &d->lines_by_cu[i]; + if (lp->rows) h->free(h, lp->rows, lp->cap * sizeof(DwLineRow)); + if (lp->files) h->free(h, lp->files, lp->nfiles * sizeof(DwLineFile)); + if (lp->dirs) h->free(h, lp->dirs, lp->ndirs * sizeof(const char*)); + if (lp->file_norm) + h->free(h, lp->file_norm, lp->nfile_norm * sizeof(const char*)); + } + h->free(h, d->lines_by_cu, d->ncus * sizeof(DwLineProgram)); + } + if (d->lines_built) h->free(h, d->lines_built, d->ncus); + + for (i = 0; i < d->nsubs; ++i) free_subprog(h, &d->subs[i]); + if (d->subs) h->free(h, d->subs, d->subs_cap * sizeof(DwSubprog)); + + for (i = 0; i < d->ntypes; ++i) { + CfreeDwarfType* t = d->types_by_off[i]; + if (!t) continue; + if (t->fields) h->free(h, t->fields, t->nfields * sizeof(DwField)); + if (t->evals) h->free(h, t->evals, t->nevals * sizeof(DwEnumVal)); + h->free(h, t, sizeof(*t)); + } + if (d->types_by_off) + h->free(h, d->types_by_off, d->types_cap * sizeof(CfreeDwarfType*)); + if (d->types_off) h->free(h, d->types_off, d->types_cap * sizeof(u32)); + + if (d->globals) h->free(h, d->globals, d->globals_cap * sizeof(DwLocal)); + + h->free(h, d, sizeof(*d)); +} diff --git a/src/debug/dwarf_query.c b/src/debug/dwarf_query.c @@ -0,0 +1,370 @@ +/* dwarf_query.c — public cfree_dwarf_* query entry points. + * + * Implements the consumer half of doc/DWARF.md: + * subprogram_at / func_at, var_at, vars_at_*, param_iter_*, loc_read. + */ + +#include <cfree.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" +#include "debug/dwarf_internal.h" + +int cfree_dwarf_subprogram_at(CfreeDebugInfo* d, uint64_t pc, + CfreeDwarfSubprogram* out) { + DwSubprog* sp; + if (!d || !out) return 1; + memset(out, 0, sizeof(*out)); + sp = dw_find_subprog(d, pc); + if (!sp) return 1; + out->name = sp->name ? sp->name : ""; + out->low_pc = sp->low_pc; + out->high_pc = sp->high_pc; + out->decl_file = sp->decl_file ? sp->decl_file : ""; + out->decl_line = sp->decl_line; + out->inlined = sp->inlined; + return 0; +} + +int cfree_dwarf_func_at(CfreeDebugInfo* d, uint64_t pc, const char** name_out, + uint64_t* low_out, uint64_t* high_out) { + CfreeDwarfSubprogram sp; + if (cfree_dwarf_subprogram_at(d, pc, &sp) != 0) return 1; + if (name_out) *name_out = sp.name; + if (low_out) *low_out = sp.low_pc; + if (high_out) *high_out = sp.high_pc; + return 0; +} + +/* ---- variable resolution -------------------------------------------- */ + +static void fill_varloc(CfreeDebugInfo* d, u32 cu_idx, const DwLocal* v, u64 pc, + CfreeDwarfVarLoc* out) { + const u8* lbytes = v->loc; + u32 llen = v->loc_len; + memset(out, 0, sizeof(*out)); + out->kind = CFREE_DLOC_EXPR; + out->byte_size = 0; + out->type = NULL; + if (v->type_die_offset) { + out->type = dw_type_from_die(d, cu_idx, v->type_die_offset); + if (out->type) out->byte_size = out->type->byte_size; + } + /* If the variable was emitted with a loclistx, resolve it now. The + * resolved bytes get the same single-op fast-path treatment below. */ + if (v->has_loclist && cu_idx < d->ncus) { + const u8* lb = NULL; + u32 ll = 0; + if (dw_loclist_resolve(d, &d->cus[cu_idx], v->loclist_index, pc, &lb, + &ll)) { + lbytes = lb; + llen = ll; + } else { + /* No active entry for this PC — variable is currently unavailable. */ + out->kind = CFREE_DLOC_EXPR; + out->v.expr.bytes = NULL; + out->v.expr.len = 0; + return; + } + } + /* Inspect the loc bytes — if it's a single op of a recognized form, + * we expose the structured kind so callers can fast-path. Otherwise + * we surface the raw bytes as EXPR. */ + if (lbytes && llen > 0) { + const u8* e = lbytes; + if (llen == 1 && e[0] >= DW_OP_reg0 && e[0] <= DW_OP_reg0 + 31) { + out->kind = CFREE_DLOC_REG; + out->v.reg = e[0] - DW_OP_reg0; + return; + } + if (e[0] == DW_OP_regx) { + u32 off = 1; + u64 r = dw_uleb(e, llen, &off); + if (off == llen) { + out->kind = CFREE_DLOC_REG; + out->v.reg = (u32)r; + return; + } + } + if (e[0] == DW_OP_fbreg) { + u32 off = 1; + i64 ofs = dw_sleb(e, llen, &off); + if (off == llen) { + out->kind = CFREE_DLOC_FRAME_OFS; + out->v.frame_ofs = (i32)ofs; + return; + } + } + if (e[0] == DW_OP_addr && llen == 9) { + u32 off = 1; + out->kind = CFREE_DLOC_GLOBAL; + out->v.global = dw_u64(e, llen, &off); + return; + } + /* Fallback: opaque expression bytes. */ + out->kind = CFREE_DLOC_EXPR; + out->v.expr.bytes = lbytes; + out->v.expr.len = llen; + return; + } + /* No location at all — leave kind=EXPR with NULL/0. */ + out->kind = CFREE_DLOC_EXPR; + out->v.expr.bytes = NULL; + out->v.expr.len = 0; +} + +int cfree_dwarf_var_at(CfreeDebugInfo* d, uint64_t pc, const char* name, + CfreeDwarfVarLoc* out) { + /* Return codes: + * 0 — found; *out filled. + * 1 — invalid args, or `pc` lies inside a known subprogram but no + * variable named `name` is visible there (the user typo case). + * 2 — `pc` is not covered by any subprogram (no debug info for this + * frame). REPL: "no debug info for this frame". Globals are + * still consulted before returning 2 so a name lookup against a + * global from a -g-less frame still resolves. */ + DwSubprog* sp; + u32 i; + if (!d || !name || !out) return 1; + memset(out, 0, sizeof(*out)); + sp = dw_find_subprog(d, pc); + if (sp) { + dw_build_locals(d, sp); + /* Deepest scope first: walk locals from end (innermost blocks added + * after enclosing). */ + for (i = sp->nlocals; i > 0; --i) { + DwLocal* v = &sp->locals[i - 1]; + if (!v->name || !dw_streq(v->name, name)) continue; + if (v->has_scope && (pc < v->scope_lo || pc >= v->scope_hi)) continue; + fill_varloc(d, sp->cu_idx, v, pc, out); + return 0; + } + /* Then params. */ + for (i = 0; i < sp->nparams; ++i) { + DwLocal* v = &sp->params[i]; + if (!v->name || !dw_streq(v->name, name)) continue; + fill_varloc(d, sp->cu_idx, v, pc, out); + return 0; + } + } + /* Globals. */ + dw_build_globals(d); + for (i = 0; i < d->nglobals; ++i) { + DwLocal* v = &d->globals[i]; + if (!v->name || !dw_streq(v->name, name)) continue; + fill_varloc(d, 0, v, pc, out); + return 0; + } + return sp ? 1 : 2; +} + +int cfree_dwarf_loc_read(CfreeDebugInfo* d, const CfreeDwarfVarLoc* loc, + const CfreeUnwindFrame* frame, CfreeJitSession* sess, + void* dst, size_t cap, size_t* read_out) { + size_t want; + if (read_out) *read_out = 0; + if (!d || !loc || !frame || !dst) return 1; + want = loc->byte_size ? loc->byte_size : cap; + if (want > cap) want = cap; + switch (loc->kind) { + case CFREE_DLOC_REG: { + uint64_t v = (loc->v.reg < 32) ? frame->regs[loc->v.reg] : 0; + size_t n = want > sizeof(v) ? sizeof(v) : want; + memcpy(dst, &v, n); + if (read_out) *read_out = n; + return 0; + } + case CFREE_DLOC_FRAME_OFS: { + uint64_t addr = frame->cfa + (uint64_t)(int64_t)loc->v.frame_ofs; + if (!sess) return 1; + if (cfree_jit_session_read_mem(sess, addr, dst, want) != 0) return 1; + if (read_out) *read_out = want; + return 0; + } + case CFREE_DLOC_GLOBAL: { + uint64_t addr = loc->v.global; + if (!sess) return 1; + if (cfree_jit_session_read_mem(sess, addr, dst, want) != 0) return 1; + if (read_out) *read_out = want; + return 0; + } + case CFREE_DLOC_EXPR: { + /* Evaluate. We don't have direct access to the variable's + * subprogram's frame_base here — caller-supplied frame must already + * carry the right CFA. The expression itself may be DW_OP_call_frame_cfa + * + DW_OP_consts + DW_OP_plus, etc. */ + DwExprResult r; + if (loc->v.expr.bytes == NULL || loc->v.expr.len == 0) return 1; + if (dw_eval_expr(d, loc->v.expr.bytes, (u32)loc->v.expr.len, NULL, 0, + frame, &r) != 0) + return 1; + if (r.kind == 0) { + if (!sess) return 1; + if (cfree_jit_session_read_mem(sess, r.value, dst, want) != 0) return 1; + if (read_out) *read_out = want; + return 0; + } else if (r.kind == 1) { + size_t n = want > sizeof(r.value) ? sizeof(r.value) : want; + memcpy(dst, &r.value, n); + if (read_out) *read_out = n; + return 0; + } else if (r.kind == 2) { + u64 v = (r.value < 32) ? frame->regs[r.value] : 0; + size_t n = want > sizeof(v) ? sizeof(v) : want; + memcpy(dst, &v, n); + if (read_out) *read_out = n; + return 0; + } + return 1; + } + } + return 1; +} + +/* ---- vars_at_* iterator --------------------------------------------- */ + +struct CfreeDwarfVarIter { + CfreeDebugInfo* d; + DwSubprog* sp; + u64 pc; + u32 mask; + u32 phase; /* 0 = locals, 1 = params, 2 = globals, 3 = done */ + u32 idx; +}; + +CfreeDwarfVarIter* cfree_dwarf_vars_at_new(CfreeDebugInfo* d, uint64_t pc, + uint32_t mask) { + CfreeDwarfVarIter* it; + if (!d) return NULL; + it = (CfreeDwarfVarIter*)d->h->alloc(d->h, sizeof(*it), + _Alignof(CfreeDwarfVarIter)); + if (!it) return NULL; + it->d = d; + it->pc = pc; + it->mask = mask; + it->sp = dw_find_subprog(d, pc); + if (it->sp) dw_build_locals(d, it->sp); + it->phase = 0; + it->idx = it->sp ? it->sp->nlocals : 0; + return it; +} + +int cfree_dwarf_vars_at_next(CfreeDwarfVarIter* it, CfreeDwarfVar* out) { + if (!it || !out) return 0; + for (;;) { + switch (it->phase) { + case 0: { + if (!(it->mask & (1u << CFREE_DVR_LOCAL))) { + it->phase = 1; + it->idx = 0; + break; + } + if (it->idx == 0) { + it->phase = 1; + it->idx = 0; + break; + } + { + DwLocal* v = &it->sp->locals[--it->idx]; + if (v->has_scope && (it->pc < v->scope_lo || it->pc >= v->scope_hi)) + break; + out->name = v->name ? v->name : ""; + out->role = CFREE_DVR_LOCAL; + fill_varloc(it->d, it->sp->cu_idx, v, it->pc, &out->loc); + return 1; + } + } + case 1: { + if (!it->sp || !(it->mask & (1u << CFREE_DVR_ARG))) { + it->phase = 2; + it->idx = 0; + break; + } + if (it->idx >= it->sp->nparams) { + it->phase = 2; + it->idx = 0; + break; + } + { + DwLocal* v = &it->sp->params[it->idx++]; + out->name = v->name ? v->name : ""; + out->role = CFREE_DVR_ARG; + fill_varloc(it->d, it->sp->cu_idx, v, it->pc, &out->loc); + return 1; + } + } + case 2: { + if (!(it->mask & (1u << CFREE_DVR_GLOBAL))) { + it->phase = 3; + break; + } + dw_build_globals(it->d); + if (it->idx >= it->d->nglobals) { + it->phase = 3; + break; + } + { + DwLocal* v = &it->d->globals[it->idx++]; + out->name = v->name ? v->name : ""; + out->role = CFREE_DVR_GLOBAL; + fill_varloc(it->d, 0, v, it->pc, &out->loc); + return 1; + } + } + default: + return 0; + } + } +} + +void cfree_dwarf_vars_at_free(CfreeDwarfVarIter* it) { + if (!it) return; + it->d->h->free(it->d->h, it, sizeof(*it)); +} + +/* ---- param_iter_* --------------------------------------------------- */ + +struct CfreeDwarfParamIter { + CfreeDebugInfo* d; + DwSubprog* sp; + u64 pc; + u32 idx; +}; + +CfreeDwarfParamIter* cfree_dwarf_param_iter_new(CfreeDebugInfo* d, + uint64_t pc) { + CfreeDwarfParamIter* it; + DwSubprog* sp; + if (!d) return NULL; + sp = dw_find_subprog(d, pc); + if (!sp) return NULL; + dw_build_locals(d, sp); + it = (CfreeDwarfParamIter*)d->h->alloc(d->h, sizeof(*it), + _Alignof(CfreeDwarfParamIter)); + if (!it) return NULL; + it->d = d; + it->sp = sp; + it->pc = pc; + it->idx = 0; + return it; +} + +int cfree_dwarf_param_iter_next(CfreeDwarfParamIter* it, CfreeDwarfVar* out) { + if (!it || !out) return 0; + if (it->idx >= it->sp->nparams) return 0; + { + DwLocal* v = &it->sp->params[it->idx++]; + out->name = v->name ? v->name : ""; + out->role = CFREE_DVR_ARG; + fill_varloc(it->d, it->sp->cu_idx, v, it->pc, &out->loc); + } + return 1; +} + +void cfree_dwarf_param_iter_free(CfreeDwarfParamIter* it) { + if (!it) return; + it->d->h->free(it->d->h, it, sizeof(*it)); +} diff --git a/src/debug/dwarf_type.c b/src/debug/dwarf_type.c @@ -0,0 +1,509 @@ +/* dwarf_type.c — type DIE → CfreeDwarfType resolution. + * + * Builds CfreeDwarfType records on demand from DW_TAG_base_type, + * DW_TAG_pointer_type, DW_TAG_array_type, struct/union/enum, typedef, + * and qualifier-types (const/volatile/restrict transparent to inner). + */ + +#include <cfree.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" +#include "core/util.h" +#include "debug/dwarf_internal.h" + +static CfreeDwarfType* type_cache_get(CfreeDebugInfo* d, u32 die_offset) { + u32 i; + for (i = 0; i < d->ntypes; ++i) { + if (d->types_off[i] == die_offset) return d->types_by_off[i]; + } + return NULL; +} + +static void type_cache_put(CfreeDebugInfo* d, u32 die_offset, + CfreeDwarfType* t) { + if (d->ntypes == d->types_cap) { + u32 ncap = d->types_cap ? d->types_cap * 2 : 16; + CfreeDwarfType** na = (CfreeDwarfType**)d->h->realloc( + d->h, d->types_by_off, d->types_cap * sizeof(*d->types_by_off), + ncap * sizeof(*d->types_by_off), _Alignof(CfreeDwarfType*)); + u32* no = + (u32*)d->h->realloc(d->h, d->types_off, d->types_cap * sizeof(u32), + ncap * sizeof(u32), _Alignof(u32)); + if (!na || !no) return; + d->types_by_off = na; + d->types_off = no; + d->types_cap = ncap; + } + d->types_by_off[d->ntypes] = t; + d->types_off[d->ntypes] = die_offset; + d->ntypes++; +} + +static CfreeDwarfType* type_alloc(CfreeDebugInfo* d) { + CfreeDwarfType* t = + (CfreeDwarfType*)d->h->alloc(d->h, sizeof(*t), _Alignof(CfreeDwarfType)); + if (!t) return NULL; + memset(t, 0, sizeof(*t)); + t->name = ""; + return t; +} + +CfreeDwarfType* dw_void_type(CfreeDebugInfo* d) { + CfreeDwarfType* t = type_cache_get(d, 0); + if (t) return t; + t = type_alloc(d); + if (!t) return NULL; + t->kind = DTK_VOID; + type_cache_put(d, 0, t); + return t; +} + +/* Walk struct/union children for fields, or enum children for values. */ +static void walk_struct_fields(CfreeDebugInfo* d, DwCu* cu, u32* off, + CfreeDwarfType* t) { + DwField* fields = NULL; + u32 nfields = 0, cap = 0; + for (;;) { + DwDie die; + if (!dw_read_die(d, cu, off, &die)) break; + if (die.abbrev->tag == DW_TAG_member) { + DieAttrPack p; + dw_die_pack(d, cu, &die, &p); + /* skip past die's attrs */ + { + u32 i; + for (i = 0; i < die.abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die.abbrev->attrs[i]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, off); + } + } + if (nfields == cap) { + u32 ncap = cap ? cap * 2 : 4; + DwField* na = + (DwField*)d->h->realloc(d->h, fields, cap * sizeof(*fields), + ncap * sizeof(*fields), _Alignof(DwField)); + if (!na) break; + fields = na; + cap = ncap; + } + fields[nfields].name = p.name ? p.name : ""; + fields[nfields].byte_offset = p.has_byte_offset ? p.byte_offset : 0; + fields[nfields].bit_offset = p.has_bit_offset ? p.bit_offset : 0; + fields[nfields].bit_size = p.has_bit_size ? p.bit_size : 0; + fields[nfields].type = + p.has_type + ? dw_type_from_die(d, (u32)(cu - d->cus), p.type_die_offset) + : dw_void_type(d); + nfields++; + if (die.abbrev->has_children) { + for (;;) { + DwDie c; + if (!dw_read_die(d, cu, off, &c)) break; + dw_skip_die_subtree(d, cu, &c, off); + } + } + } else { + dw_skip_die_subtree(d, cu, &die, off); + } + } + t->fields = fields; + t->nfields = nfields; +} + +static void walk_enum_values(CfreeDebugInfo* d, DwCu* cu, u32* off, + CfreeDwarfType* t) { + DwEnumVal* evs = NULL; + u32 nev = 0, cap = 0; + for (;;) { + DwDie die; + if (!dw_read_die(d, cu, off, &die)) break; + if (die.abbrev->tag == DW_TAG_enumerator) { + DieAttrPack p; + dw_die_pack(d, cu, &die, &p); + { + u32 i; + for (i = 0; i < die.abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die.abbrev->attrs[i]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, off); + } + } + if (nev == cap) { + u32 ncap = cap ? cap * 2 : 4; + DwEnumVal* na = + (DwEnumVal*)d->h->realloc(d->h, evs, cap * sizeof(*evs), + ncap * sizeof(*evs), _Alignof(DwEnumVal)); + if (!na) break; + evs = na; + cap = ncap; + } + evs[nev].name = p.name ? p.name : ""; + evs[nev].value = p.has_const_value ? p.const_value : 0; + nev++; + if (die.abbrev->has_children) { + for (;;) { + DwDie c; + if (!dw_read_die(d, cu, off, &c)) break; + dw_skip_die_subtree(d, cu, &c, off); + } + } + } else { + dw_skip_die_subtree(d, cu, &die, off); + } + } + t->evals = evs; + t->nevals = nev; +} + +/* For DW_TAG_array_type: child DW_TAG_subrange_type carries upper_bound / + * count. */ +static void walk_array_subrange(CfreeDebugInfo* d, DwCu* cu, u32* off, + CfreeDwarfType* t) { + for (;;) { + DwDie die; + if (!dw_read_die(d, cu, off, &die)) break; + if (die.abbrev->tag == DW_TAG_subrange_type) { + DieAttrPack p; + dw_die_pack(d, cu, &die, &p); + { + u32 i; + for (i = 0; i < die.abbrev->nattrs; ++i) { + DwAbbrevAttr* aa = &die.abbrev->attrs[i]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, off); + } + } + if (p.has_array_count) t->element_count = p.array_count; + if (die.abbrev->has_children) { + for (;;) { + DwDie c; + if (!dw_read_die(d, cu, off, &c)) break; + dw_skip_die_subtree(d, cu, &c, off); + } + } + } else { + dw_skip_die_subtree(d, cu, &die, off); + } + } +} + +CfreeDwarfType* dw_type_from_die(CfreeDebugInfo* d, u32 cu_idx, + u32 die_offset) { + DwCu* cu; + DwDie die; + u32 off; + CfreeDwarfType* t; + DieAttrPack p; + if (die_offset == 0) return dw_void_type(d); + t = type_cache_get(d, die_offset); + if (t) return t; + /* Resolve CU containing the DIE. */ + cu = dw_cu_at_die_offset(d, die_offset); + if (!cu) { + if (cu_idx < d->ncus) + cu = &d->cus[cu_idx]; + else + return dw_void_type(d); + } + off = die_offset; + if (!dw_read_die(d, cu, &off, &die)) return dw_void_type(d); + if (!die.abbrev) return dw_void_type(d); + dw_die_pack(d, cu, &die, &p); + /* Allocate before recursing — break cycles by interning early. */ + t = type_alloc(d); + if (!t) return dw_void_type(d); + t->die_offset = die_offset; + type_cache_put(d, die_offset, t); + + switch (die.abbrev->tag) { + case DW_TAG_base_type: + t->kind = DTK_BASE; + t->name = p.name ? p.name : ""; + t->byte_size = p.byte_size; + t->base_encoding = p.base_encoding; + break; + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + t->kind = DTK_PTR; + t->byte_size = p.has_byte_size ? p.byte_size : 8; + t->name = ""; + t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), + p.type_die_offset) + : dw_void_type(d); + break; + case DW_TAG_typedef: + t->kind = DTK_TYPEDEF; + t->name = p.name ? p.name : ""; + t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), + p.type_die_offset) + : dw_void_type(d); + if (t->inner) t->byte_size = t->inner->byte_size; + break; + case DW_TAG_const_type: + case DW_TAG_volatile_type: + case DW_TAG_restrict_type: + t->kind = (die.abbrev->tag == DW_TAG_const_type) ? DTK_CONST + : (die.abbrev->tag == DW_TAG_volatile_type) ? DTK_VOLATILE + : DTK_RESTRICT; + t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), + p.type_die_offset) + : dw_void_type(d); + if (t->inner) { + t->byte_size = t->inner->byte_size; + t->name = t->inner->name; + } + break; + case DW_TAG_array_type: + t->kind = DTK_ARRAY; + t->name = ""; + t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), + p.type_die_offset) + : dw_void_type(d); + if (die.abbrev->has_children) { + u32 cur = off; + /* Skip attrs (already read into p). */ + u32 ii; + for (ii = 0; ii < die.abbrev->nattrs; ++ii) { + DwAbbrevAttr* aa = &die.abbrev->attrs[ii]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, &cur); + } + walk_array_subrange(d, cu, &cur, t); + } + if (t->inner && t->element_count) + t->byte_size = t->inner->byte_size * t->element_count; + break; + case DW_TAG_structure_type: + case DW_TAG_class_type: + t->kind = DTK_STRUCT; + t->name = p.name ? p.name : ""; + t->byte_size = p.byte_size; + if (die.abbrev->has_children) { + u32 cur = off; + u32 ii; + for (ii = 0; ii < die.abbrev->nattrs; ++ii) { + DwAbbrevAttr* aa = &die.abbrev->attrs[ii]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, &cur); + } + walk_struct_fields(d, cu, &cur, t); + } + break; + case DW_TAG_union_type: + t->kind = DTK_UNION; + t->name = p.name ? p.name : ""; + t->byte_size = p.byte_size; + if (die.abbrev->has_children) { + u32 cur = off; + u32 ii; + for (ii = 0; ii < die.abbrev->nattrs; ++ii) { + DwAbbrevAttr* aa = &die.abbrev->attrs[ii]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, &cur); + } + walk_struct_fields(d, cu, &cur, t); + } + break; + case DW_TAG_enumeration_type: + t->kind = DTK_ENUM; + t->name = p.name ? p.name : ""; + t->byte_size = p.byte_size; + t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), + p.type_die_offset) + : dw_void_type(d); + if (die.abbrev->has_children) { + u32 cur = off; + u32 ii; + for (ii = 0; ii < die.abbrev->nattrs; ++ii) { + DwAbbrevAttr* aa = &die.abbrev->attrs[ii]; + dw_skip_form(d, cu, aa->form, aa->implicit_const, &cur); + } + walk_enum_values(d, cu, &cur, t); + } + break; + case DW_TAG_subroutine_type: + t->kind = DTK_FUNC; + t->name = ""; + t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), + p.type_die_offset) + : dw_void_type(d); + break; + default: + t->kind = DTK_VOID; + break; + } + return t; +} + +/* ---- public type-info accessors -------------------------------------- */ + +static CfreeDwarfTypeKind map_kind(const CfreeDwarfType* t) { + if (!t) return CFREE_DT_VOID; + switch (t->kind) { + case DTK_VOID: + return CFREE_DT_VOID; + case DTK_PTR: + return CFREE_DT_PTR; + case DTK_ARRAY: + return CFREE_DT_ARRAY; + case DTK_STRUCT: + return CFREE_DT_STRUCT; + case DTK_UNION: + return CFREE_DT_UNION; + case DTK_ENUM: + return CFREE_DT_ENUM; + case DTK_TYPEDEF: + return CFREE_DT_TYPEDEF; + case DTK_FUNC: + return CFREE_DT_FUNC; + case DTK_CONST: + case DTK_VOLATILE: + case DTK_RESTRICT: + return t->inner ? map_kind(t->inner) : CFREE_DT_VOID; + case DTK_BASE: + switch (t->base_encoding) { + case DW_ATE_boolean: + return CFREE_DT_BOOL; + case DW_ATE_float: + case DW_ATE_complex_float: + return CFREE_DT_FLOAT; + case DW_ATE_signed_char: + return CFREE_DT_CHAR; + case DW_ATE_unsigned_char: + return CFREE_DT_CHAR; + case DW_ATE_unsigned: + case DW_ATE_address: + case DW_ATE_UTF: + return CFREE_DT_UINT; + case DW_ATE_signed: + return CFREE_DT_SINT; + default: + return CFREE_DT_UINT; + } + } + return CFREE_DT_VOID; +} + +CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType* t) { + CfreeDwarfTypeInfo info; + memset(&info, 0, sizeof(info)); + info.name = ""; + if (!t) { + info.kind = CFREE_DT_VOID; + return info; + } + info.kind = map_kind(t); + info.byte_size = t->byte_size; + info.name = t->name ? t->name : ""; + info.element_count = t->element_count; + /* For TYPEDEF/PTR/ARRAY: expose inner. For BASE_CHAR map signedness. */ + switch (t->kind) { + case DTK_BASE: + if (t->base_encoding == DW_ATE_signed_char) + info.kind = CFREE_DT_SINT; + else if (t->base_encoding == DW_ATE_unsigned_char) + info.kind = CFREE_DT_UINT; + break; + case DTK_PTR: + case DTK_ARRAY: + case DTK_TYPEDEF: + case DTK_FUNC: + info.inner = t->inner; + break; + case DTK_CONST: + case DTK_VOLATILE: + case DTK_RESTRICT: + /* Transparent: report inner directly. */ + if (t->inner) { + return cfree_dwarf_type_info(t->inner); + } + break; + default: + break; + } + return info; +} + +/* Field iterator. */ +struct CfreeDwarfFieldIter { + CfreeDebugInfo* d; + const CfreeDwarfType* t; + u32 idx; +}; + +CfreeDwarfFieldIter* cfree_dwarf_field_iter_new(CfreeDebugInfo* d, + const CfreeDwarfType* t) { + CfreeDwarfFieldIter* it; + if (!d || !t) return NULL; + it = (CfreeDwarfFieldIter*)d->h->alloc(d->h, sizeof(*it), + _Alignof(CfreeDwarfFieldIter)); + if (!it) return NULL; + it->d = d; + /* Look through typedef / qualifiers to the underlying aggregate. */ + while (t && (t->kind == DTK_TYPEDEF || t->kind == DTK_CONST || + t->kind == DTK_VOLATILE || t->kind == DTK_RESTRICT)) + t = t->inner; + it->t = t; + it->idx = 0; + return it; +} + +int cfree_dwarf_field_iter_next(CfreeDwarfFieldIter* it, CfreeDwarfField* out) { + const CfreeDwarfType* t; + if (!it || !out || !it->t) return 0; + t = it->t; + if (t->kind != DTK_STRUCT && t->kind != DTK_UNION) return 0; + if (it->idx >= t->nfields) return 0; + { + DwField* f = &t->fields[it->idx++]; + out->name = f->name ? f->name : ""; + out->byte_offset = f->byte_offset; + out->bit_offset = f->bit_offset; + out->bit_size = f->bit_size; + out->type = f->type; + } + return 1; +} + +void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter* it) { + if (!it) return; + it->d->h->free(it->d->h, it, sizeof(*it)); +} + +struct CfreeDwarfEnumIter { + CfreeDebugInfo* d; + const CfreeDwarfType* t; + u32 idx; +}; + +CfreeDwarfEnumIter* cfree_dwarf_enum_iter_new(CfreeDebugInfo* d, + const CfreeDwarfType* t) { + CfreeDwarfEnumIter* it; + if (!d || !t) return NULL; + it = (CfreeDwarfEnumIter*)d->h->alloc(d->h, sizeof(*it), + _Alignof(CfreeDwarfEnumIter)); + if (!it) return NULL; + it->d = d; + while (t && (t->kind == DTK_TYPEDEF || t->kind == DTK_CONST || + t->kind == DTK_VOLATILE || t->kind == DTK_RESTRICT)) + t = t->inner; + it->t = t; + it->idx = 0; + return it; +} + +int cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter* it, CfreeDwarfEnumVal* out) { + const CfreeDwarfType* t; + if (!it || !out || !it->t) return 0; + t = it->t; + if (t->kind != DTK_ENUM) return 0; + if (it->idx >= t->nevals) return 0; + out->name = t->evals[it->idx].name ? t->evals[it->idx].name : ""; + out->value = t->evals[it->idx].value; + it->idx++; + return 1; +} + +void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter* it) { + if (!it) return; + it->d->h->free(it->d->h, it, sizeof(*it)); +} diff --git a/src/dwarf/dwarf_cfi.c b/src/dwarf/dwarf_cfi.c @@ -1,437 +0,0 @@ -/* dwarf_cfi.c — CFI machine + cfree_dwarf_unwind_step. - * - * Per doc/DWARF.md §4.5: walk .eh_frame from the highest-address end - * (CIEs first), run the FDE program for the FDE whose - * (initial_location, address_range) covers frame->pc. Output mutates - * frame->pc, frame->cfa, and caller-saved register slots. - * - * Status: minimal Phase-4 implementation. Decodes the FDE that covers - * `frame->pc` and applies a small subset of CFA opcodes sufficient for - * the aarch64 frame-pointer prologues the producer emits today. Returns - * 1 (no caller info) if no FDE matches or the section is empty — - * callers must treat 1 as "stack bottom" per the API contract. - */ - -#include <cfree.h> -#include <stdint.h> -#include <string.h> - -#include "core/core.h" -#include "core/heap.h" -#include "dwarf/dwarf_internal.h" - -/* DW_CFA opcodes (subset). */ -#define DW_CFA_advance_loc 0x40 -#define DW_CFA_offset 0x80 -#define DW_CFA_restore 0xc0 -#define DW_CFA_nop 0x00 -#define DW_CFA_set_loc 0x01 -#define DW_CFA_advance_loc1 0x02 -#define DW_CFA_advance_loc2 0x03 -#define DW_CFA_advance_loc4 0x04 -#define DW_CFA_offset_extended 0x05 -#define DW_CFA_restore_extended 0x06 -#define DW_CFA_undefined 0x07 -#define DW_CFA_same_value 0x08 -#define DW_CFA_register 0x09 -#define DW_CFA_remember_state 0x0a -#define DW_CFA_restore_state 0x0b -#define DW_CFA_def_cfa 0x0c -#define DW_CFA_def_cfa_register 0x0d -#define DW_CFA_def_cfa_offset 0x0e -#define DW_CFA_def_cfa_expression 0x0f -#define DW_CFA_expression 0x10 -#define DW_CFA_offset_extended_sf 0x11 -#define DW_CFA_def_cfa_sf 0x12 -#define DW_CFA_def_cfa_offset_sf 0x13 -#define DW_CFA_val_offset 0x14 -#define DW_CFA_val_offset_sf 0x15 -#define DW_CFA_val_expression 0x16 - -/* DW_EH_PE encoding bits */ -#define DW_EH_PE_absptr 0x00 -#define DW_EH_PE_omit 0xff -#define DW_EH_PE_uleb128 0x01 -#define DW_EH_PE_udata2 0x02 -#define DW_EH_PE_udata4 0x03 -#define DW_EH_PE_udata8 0x04 -#define DW_EH_PE_sleb128 0x09 -#define DW_EH_PE_sdata2 0x0a -#define DW_EH_PE_sdata4 0x0b -#define DW_EH_PE_sdata8 0x0c -#define DW_EH_PE_pcrel 0x10 -#define DW_EH_PE_textrel 0x20 -#define DW_EH_PE_datarel 0x30 -#define DW_EH_PE_funcrel 0x40 -#define DW_EH_PE_aligned 0x50 - -#define CFI_REG_MAX 32 - -typedef struct CfiRule { - /* 0=undefined, 1=offset(cfa+N), 2=register(R), 3=same_value */ - u8 kind; - i64 offset; - u32 reg; -} CfiRule; - -typedef struct CfiState { - /* CFA: cfa = regs[reg] + offset (kind 0), or expression (kind 1). */ - int cfa_kind; /* 0 = reg+offset; 1 = expression (unhandled) */ - u32 cfa_reg; - i64 cfa_offset; - CfiRule rules[CFI_REG_MAX]; - i32 code_align; - i32 data_align; - u32 return_reg; -} CfiState; - -static u64 read_eh_ptr(const u8* base, u32 size, u32* off, u8 enc) { - u64 v = 0; - switch (enc & 0x0f) { - case DW_EH_PE_absptr: - case DW_EH_PE_udata8: - v = dw_u64(base, size, off); - break; - case DW_EH_PE_uleb128: - v = dw_uleb(base, size, off); - break; - case DW_EH_PE_udata2: - v = dw_u16(base, size, off); - break; - case DW_EH_PE_udata4: - v = dw_u32(base, size, off); - break; - case DW_EH_PE_sleb128: - v = (u64)dw_sleb(base, size, off); - break; - case DW_EH_PE_sdata2: - v = (u64)(i64)(i16)dw_u16(base, size, off); - break; - case DW_EH_PE_sdata4: - v = (u64)(i64)(i32)dw_u32(base, size, off); - break; - case DW_EH_PE_sdata8: - v = (u64)dw_u64(base, size, off); - break; - default: - break; - } - return v; -} - -static void run_cfi(const u8* prog, u32 plen, CfiState* st, u64* loc, - u64 stop_pc) { - u32 off = 0; - while (off < plen) { - u8 op = prog[off++]; - u8 hi = op & 0xc0; - u8 lo = op & 0x3f; - if (hi == DW_CFA_advance_loc) { - *loc += (u64)lo * (u64)st->code_align; - if (*loc > stop_pc) return; - continue; - } - if (hi == DW_CFA_offset) { - u64 fac = dw_uleb(prog, plen, &off); - if (lo < CFI_REG_MAX) { - st->rules[lo].kind = 1; - st->rules[lo].offset = (i64)fac * (i64)st->data_align; - } - continue; - } - if (hi == DW_CFA_restore) { - if (lo < CFI_REG_MAX) st->rules[lo].kind = 0; - continue; - } - switch (op) { - case DW_CFA_nop: - break; - case DW_CFA_advance_loc1: { - u8 v = dw_u8(prog, plen, &off); - *loc += (u64)v * (u64)st->code_align; - if (*loc > stop_pc) return; - } break; - case DW_CFA_advance_loc2: { - u16 v = dw_u16(prog, plen, &off); - *loc += (u64)v * (u64)st->code_align; - if (*loc > stop_pc) return; - } break; - case DW_CFA_advance_loc4: { - u32 v = dw_u32(prog, plen, &off); - *loc += (u64)v * (u64)st->code_align; - if (*loc > stop_pc) return; - } break; - case DW_CFA_set_loc: - *loc = dw_u64(prog, plen, &off); - if (*loc > stop_pc) return; - break; - case DW_CFA_def_cfa: { - u64 r = dw_uleb(prog, plen, &off); - u64 o = dw_uleb(prog, plen, &off); - st->cfa_kind = 0; - st->cfa_reg = (u32)r; - st->cfa_offset = (i64)o; - } break; - case DW_CFA_def_cfa_register: { - u64 r = dw_uleb(prog, plen, &off); - st->cfa_reg = (u32)r; - } break; - case DW_CFA_def_cfa_offset: { - u64 o = dw_uleb(prog, plen, &off); - st->cfa_offset = (i64)o; - } break; - case DW_CFA_def_cfa_sf: { - u64 r = dw_uleb(prog, plen, &off); - i64 o = dw_sleb(prog, plen, &off); - st->cfa_kind = 0; - st->cfa_reg = (u32)r; - st->cfa_offset = o * st->data_align; - } break; - case DW_CFA_def_cfa_offset_sf: { - i64 o = dw_sleb(prog, plen, &off); - st->cfa_offset = o * st->data_align; - } break; - case DW_CFA_offset_extended: { - u64 r = dw_uleb(prog, plen, &off); - u64 fac = dw_uleb(prog, plen, &off); - if (r < CFI_REG_MAX) { - st->rules[r].kind = 1; - st->rules[r].offset = (i64)fac * (i64)st->data_align; - } - } break; - case DW_CFA_offset_extended_sf: { - u64 r = dw_uleb(prog, plen, &off); - i64 fac = dw_sleb(prog, plen, &off); - if (r < CFI_REG_MAX) { - st->rules[r].kind = 1; - st->rules[r].offset = fac * st->data_align; - } - } break; - case DW_CFA_register: { - u64 r1 = dw_uleb(prog, plen, &off); - u64 r2 = dw_uleb(prog, plen, &off); - if (r1 < CFI_REG_MAX) { - st->rules[r1].kind = 2; - st->rules[r1].reg = (u32)r2; - } - } break; - case DW_CFA_undefined: { - u64 r = dw_uleb(prog, plen, &off); - if (r < CFI_REG_MAX) st->rules[r].kind = 0; - } break; - case DW_CFA_same_value: { - u64 r = dw_uleb(prog, plen, &off); - if (r < CFI_REG_MAX) st->rules[r].kind = 3; - } break; - case DW_CFA_remember_state: - case DW_CFA_restore_state: - /* Not modelled — would need a state stack. Best-effort: skip. */ - break; - case DW_CFA_def_cfa_expression: { - u64 n = dw_uleb(prog, plen, &off); - off += (u32)n; - st->cfa_kind = 1; /* expression — we can't evaluate without frame */ - } break; - case DW_CFA_expression: - case DW_CFA_val_expression: { - (void)dw_uleb(prog, plen, &off); - { - u64 n = dw_uleb(prog, plen, &off); - off += (u32)n; - } - } break; - case DW_CFA_val_offset: { - (void)dw_uleb(prog, plen, &off); - (void)dw_uleb(prog, plen, &off); - } break; - case DW_CFA_val_offset_sf: { - (void)dw_uleb(prog, plen, &off); - (void)dw_sleb(prog, plen, &off); - } break; - default: - return; /* unknown opcode — bail */ - } - } -} - -int cfree_dwarf_unwind_step(CfreeDebugInfo* d, CfreeUnwindFrame* frame) { - u32 off; - if (!d || !frame) return 1; - if (d->eh_frame.sec_idx == UINT32_MAX || d->eh_frame.size == 0) return 1; - /* Sweep .eh_frame entries, locating the FDE that covers frame->pc. */ - off = 0; - while (off < d->eh_frame.size) { - u32 length = dw_u32(d->eh_frame.data, d->eh_frame.size, &off); - u32 entry_end; - u32 cie_id_off = off; - u32 cie_id; - if (length == 0) break; /* terminator */ - if (length == 0xffffffffu) return 1; /* 64-bit eh_frame unsupported */ - entry_end = off + length; - cie_id = dw_u32(d->eh_frame.data, d->eh_frame.size, &off); - if (cie_id == 0) { - /* CIE — skip body; we'll re-read on demand when its FDEs reference it. */ - off = entry_end; - continue; - } - { - /* FDE: cie_id is a backwards offset to the CIE. */ - u32 cie_pointer_pos = cie_id_off; /* offset of the cie_id field */ - u32 cie_start = cie_pointer_pos - cie_id; - u32 cie_off, cie_len, cie_ver; - const char* aug; - u8 fde_pe = DW_EH_PE_absptr; - i32 code_align; - i32 data_align; - u32 return_reg; - u32 cie_id_at_cie; - u32 cie_aug_data_len = 0; - u8 has_aug_data = 0; - u32 cie_inst_off, cie_inst_end; - u64 fde_pc; - u64 fde_range; - CfiState st; - - /* Parse CIE header. */ - cie_off = cie_start; - cie_len = dw_u32(d->eh_frame.data, d->eh_frame.size, &cie_off); - (void)cie_len; - cie_id_at_cie = dw_u32(d->eh_frame.data, d->eh_frame.size, &cie_off); - (void)cie_id_at_cie; /* should be 0 */ - cie_ver = dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); - if (cie_ver != 1 && cie_ver != 3 && cie_ver != 4) { - off = entry_end; - continue; - } - aug = dw_cstr(d->eh_frame.data, d->eh_frame.size, &cie_off); - if (cie_ver == 4) { - (void)dw_u8(d->eh_frame.data, d->eh_frame.size, - &cie_off); /* address_size */ - (void)dw_u8(d->eh_frame.data, d->eh_frame.size, - &cie_off); /* segment_size */ - } - code_align = (i32)dw_uleb(d->eh_frame.data, d->eh_frame.size, &cie_off); - data_align = (i32)dw_sleb(d->eh_frame.data, d->eh_frame.size, &cie_off); - if (cie_ver == 1) { - return_reg = dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); - } else { - return_reg = (u32)dw_uleb(d->eh_frame.data, d->eh_frame.size, &cie_off); - } - /* Parse augmentation. */ - { - const char* a = aug; - if (a && a[0] == 'z') { - cie_aug_data_len = - (u32)dw_uleb(d->eh_frame.data, d->eh_frame.size, &cie_off); - has_aug_data = 1; - (void)cie_aug_data_len; - a++; - while (*a) { - switch (*a) { - case 'R': - fde_pe = dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); - break; - case 'P': { - u8 enc = dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); - (void)read_eh_ptr(d->eh_frame.data, d->eh_frame.size, &cie_off, - enc); - } break; - case 'L': - (void)dw_u8(d->eh_frame.data, d->eh_frame.size, &cie_off); - break; - case 'S': - case 'B': - break; - default: - break; - } - a++; - } - } else if (a && a[0] != 0) { - /* Unknown augmentation chars without 'z' — bail. */ - off = entry_end; - continue; - } - } - cie_inst_off = cie_off; - /* CIE body extends to entry_start of CIE plus 4 + cie_len. We already - * consumed length+id, so the upper bound is cie_start + 4 + cie_len. */ - cie_inst_end = cie_start + 4 + cie_len; - (void)has_aug_data; - - /* Run CIE initial instructions. */ - memset(&st, 0, sizeof(st)); - st.code_align = code_align; - st.data_align = data_align; - st.return_reg = return_reg; - run_cfi(d->eh_frame.data + cie_inst_off, - cie_inst_end > cie_inst_off ? cie_inst_end - cie_inst_off : 0, - &st, &(u64){0}, ~(u64)0); - - /* Parse FDE pc, range. */ - { - u32 pc_off = off; - fde_pc = read_eh_ptr(d->eh_frame.data, d->eh_frame.size, &off, fde_pe); - if ((fde_pe & 0xf0) == DW_EH_PE_pcrel) { - /* pcrel: address is relative to the location of the encoded - * pointer itself within the section. We interpret as offset from - * pc_off. The runtime address is unknown to us absent a base — - * for an unrelocated obj, just keep the relative value. */ - fde_pc += pc_off; /* relative-to-section-offset best-effort */ - } - fde_range = read_eh_ptr(d->eh_frame.data, d->eh_frame.size, &off, - fde_pe & 0x0f); - } - /* Skip FDE augmentation data if CIE's z aug was set. */ - if (has_aug_data) { - u64 aug_len = dw_uleb(d->eh_frame.data, d->eh_frame.size, &off); - off += (u32)aug_len; - } - if (frame->pc < fde_pc || frame->pc >= fde_pc + fde_range) { - off = entry_end; - continue; - } - /* Run FDE instructions up to frame->pc. */ - { - u64 loc = fde_pc; - u32 fde_inst_off = off; - u32 fde_inst_end = entry_end; - run_cfi(d->eh_frame.data + fde_inst_off, - fde_inst_end > fde_inst_off ? fde_inst_end - fde_inst_off : 0, - &st, &loc, frame->pc); - } - /* Compute caller frame. */ - if (st.cfa_kind != 0 || st.cfa_reg >= 32) return 1; - { - u64 cfa = frame->regs[st.cfa_reg] + (u64)st.cfa_offset; - u32 r; - u64 ret_addr = 0; - /* For each register with a rule, we'd read CFA-relative memory to - * recover its caller value. Without a memory provider we can't - * actually load — leave registers as-is and just update cfa/pc. - * The return address sits in the rule for st.return_reg. If - * undefined, we're at the bottom. */ - if (st.return_reg < CFI_REG_MAX && st.rules[st.return_reg].kind == 1) { - /* ret_addr = *(cfa + offset) — but we have no JIT session here. - * Caller-supplied frames typically include enough register state - * that the harness already captured x30. We treat "undefined" - * as bottom-of-stack. */ - ret_addr = 0; - } else if (st.return_reg < 32 && st.rules[st.return_reg].kind == 2) { - ret_addr = frame->regs[st.rules[st.return_reg].reg]; - } else { - return 1; /* bottom of stack */ - } - frame->cfa = cfa; - frame->pc = ret_addr; - for (r = 0; r < 32; ++r) { - /* Without memory access we can't load offset rules; leave the - * register value unchanged (best-effort). */ - (void)r; - } - } - return 0; - } - } - return 1; -} diff --git a/src/dwarf/dwarf_die.c b/src/dwarf/dwarf_die.c @@ -1,431 +0,0 @@ -/* dwarf_die.c — DIE walker: subprogram collection, locals, globals. - * - * Per doc/DWARF.md §4.3: streaming walker over .debug_info keyed off the - * abbrev table; collects subprograms, lexical_blocks, formal_parameters, - * variables. Cross-CU refs land later when needed. - */ - -#include <cfree.h> -#include <stddef.h> -#include <stdint.h> -#include <string.h> - -#include "core/core.h" -#include "core/heap.h" -#include "core/util.h" -#include "dwarf/dwarf_internal.h" - -/* ---- subprogram + lexical_block walk --------------------------------- */ - -static void pack_init(DieAttrPack* p) { memset(p, 0, sizeof(*p)); } - -/* Read all attributes of a DIE into pack `p`; updates *off to past attrs. */ -static void read_pack(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, - DieAttrPack* p, u32* off) { - u32 i; - if (!die->abbrev) return; - for (i = 0; i < die->abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die->abbrev->attrs[i]; - DwAttrValue v; - dw_read_form(d, cu, aa->form, aa->implicit_const, off, &v); - switch (aa->attr) { - case DW_AT_name: - p->name = v.str; - break; - case DW_AT_low_pc: - p->low_pc = v.u; - p->has_low_pc = 1; - break; - case DW_AT_high_pc: - p->high_pc_value = v.u; - p->high_pc_form = aa->form; - p->has_high_pc = 1; - break; - case DW_AT_type: - /* Local CU offset: ref* forms are CU-relative; ref_addr is - * .debug_info-absolute. */ - if (aa->form == DW_FORM_ref_addr) - p->type_die_offset = (u32)v.u; - else - p->type_die_offset = cu->hdr_offset + (u32)v.u; - p->has_type = 1; - break; - case DW_AT_decl_file: - p->decl_file = (u32)v.u; - break; - case DW_AT_decl_line: - p->decl_line = (u32)v.u; - break; - case DW_AT_location: - if (aa->form == DW_FORM_loclistx) { - p->has_loclist = 1; - p->loclist_index = v.u; - } else if (aa->form == DW_FORM_exprloc || aa->form == DW_FORM_block || - aa->form == DW_FORM_block1 || aa->form == DW_FORM_block2 || - aa->form == DW_FORM_block4) { - p->loc_block = v.block; - p->loc_block_len = v.block_len; - } else if (aa->form == DW_FORM_sec_offset) { - /* Reference into .debug_loclists — not supported in Phase 5 - * baseline. */ - p->has_loclist = 1; - p->loclist_index = v.u; - } - break; - case DW_AT_frame_base: - p->fb_block = v.block; - p->fb_block_len = v.block_len; - break; - case DW_AT_const_value: - p->const_value = v.s; - p->has_const_value = 1; - break; - case DW_AT_data_member_location: - if (aa->form == DW_FORM_exprloc || aa->form == DW_FORM_block || - aa->form == DW_FORM_block1 || aa->form == DW_FORM_block2 || - aa->form == DW_FORM_block4) { - /* Best effort: evaluate a single DW_OP_plus_uconst form by - * peeking. */ - if (v.block && v.block_len > 0 && v.block[0] == DW_OP_plus_uconst) { - u32 t = 1; - p->byte_offset = (u32)dw_uleb(v.block, v.block_len, &t); - p->has_byte_offset = 1; - } - } else { - p->byte_offset = (u32)v.u; - p->has_byte_offset = 1; - } - break; - case DW_AT_byte_size: - p->byte_size = (u32)v.u; - p->has_byte_size = 1; - break; - case DW_AT_bit_size: - p->bit_size = (u32)v.u; - p->has_bit_size = 1; - break; - case DW_AT_bit_offset: - case DW_AT_data_bit_offset: - p->bit_offset = (u32)v.u; - p->has_bit_offset = 1; - break; - case DW_AT_encoding: - p->base_encoding = (u32)v.u; - p->has_encoding = 1; - break; - case DW_AT_count: - case DW_AT_upper_bound: - p->array_count = (u32)v.u; - if (aa->attr == DW_AT_upper_bound) p->array_count++; - p->has_array_count = 1; - break; - } - } -} - -/* Append a subprogram (or skip if its bounds aren't useful). */ -static void push_subprog(CfreeDebugInfo* d, DwSubprog* sp) { - if (d->nsubs == d->subs_cap) { - u32 ncap = d->subs_cap ? d->subs_cap * 2 : 8; - DwSubprog* na = - (DwSubprog*)d->h->realloc(d->h, d->subs, d->subs_cap * sizeof(*d->subs), - ncap * sizeof(*d->subs), _Alignof(DwSubprog)); - if (!na) return; - d->subs = na; - d->subs_cap = ncap; - } - d->subs[d->nsubs++] = *sp; -} - -/* Walk a DIE subtree, collecting subprograms. */ -static void walk_for_subs(CfreeDebugInfo* d, u32 cu_idx, u32* off) { - DwCu* cu = &d->cus[cu_idx]; - for (;;) { - DwDie die; - if (!dw_read_die(d, cu, off, &die)) return; - if (die.abbrev->tag == DW_TAG_subprogram || - die.abbrev->tag == DW_TAG_inlined_subroutine) { - DieAttrPack p; - DwSubprog sp; - u32 saved_off; - pack_init(&p); - saved_off = *off; - read_pack(d, cu, &die, &p, off); - memset(&sp, 0, sizeof(sp)); - sp.name = p.name ? p.name : ""; - sp.low_pc = p.low_pc; - if (p.has_high_pc) { - if (p.high_pc_form == DW_FORM_addr) - sp.high_pc = p.high_pc_value; - else - sp.high_pc = p.low_pc + p.high_pc_value; - } else { - sp.high_pc = p.low_pc; - } - sp.decl_line = p.decl_line; - /* Resolve decl_file via the CU's line program. */ - sp.decl_file = ""; - if (p.decl_file != 0 && cu->has_stmt_list) { - DwLineProgram* lp; - if (!d->lines_built[cu_idx]) dw_build_line(d, cu_idx); - lp = &d->lines_by_cu[cu_idx]; - if (lp->nfile_norm && p.decl_file < lp->nfile_norm) - sp.decl_file = lp->file_norm[p.decl_file]; - } - sp.cu_idx = cu_idx; - sp.die_offset = die.die_off; - sp.frame_base = p.fb_block; - sp.frame_base_len = p.fb_block_len; - sp.inlined = (die.abbrev->tag == DW_TAG_inlined_subroutine); - if (p.has_low_pc && sp.high_pc > sp.low_pc) - push_subprog(d, &sp); - else if (die.abbrev->tag == DW_TAG_subprogram && p.name) - push_subprog(d, &sp); /* declaration-only OK */ - (void)saved_off; - /* Recurse into children for nested subprograms / inlines. */ - if (die.abbrev->has_children) { - walk_for_subs(d, cu_idx, off); - } - } else if (die.abbrev->has_children) { - /* Skip attrs, then descend. */ - u32 i; - for (i = 0; i < die.abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die.abbrev->attrs[i]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, off); - } - walk_for_subs(d, cu_idx, off); - } else { - u32 i; - for (i = 0; i < die.abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die.abbrev->attrs[i]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, off); - } - } - } -} - -void dw_build_subs(CfreeDebugInfo* d) { - u32 i; - if (d->subs_built) return; - d->subs_built = 1; - for (i = 0; i < d->ncus; ++i) { - DwCu* cu = &d->cus[i]; - u32 off = cu->die_start_off; - /* The root DIE is the CU itself — recurse into it. */ - DwDie root; - if (!dw_read_die(d, cu, &off, &root)) continue; - /* Skip root attrs */ - { - u32 j; - for (j = 0; j < root.abbrev->nattrs; ++j) { - DwAbbrevAttr* aa = &root.abbrev->attrs[j]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); - } - } - if (root.abbrev->has_children) walk_for_subs(d, i, &off); - } -} - -DwSubprog* dw_find_subprog(CfreeDebugInfo* d, u64 pc) { - u32 i; - dw_build_subs(d); - for (i = 0; i < d->nsubs; ++i) { - DwSubprog* sp = &d->subs[i]; - if (sp->low_pc <= pc && pc < sp->high_pc) return sp; - } - return NULL; -} - -/* ---- locals + parameters --------------------------------------------- */ - -typedef struct LocalCtx { - CfreeDebugInfo* d; - u32 cu_idx; - DwLocal* params; - u32 nparams, params_cap; - DwLocal* locals; - u32 nlocals, locals_cap; -} LocalCtx; - -static void push_param(LocalCtx* x, DwLocal* v) { - if (x->nparams == x->params_cap) { - u32 ncap = x->params_cap ? x->params_cap * 2 : 4; - DwLocal* na = (DwLocal*)x->d->h->realloc( - x->d->h, x->params, x->params_cap * sizeof(*x->params), - ncap * sizeof(*x->params), _Alignof(DwLocal)); - if (!na) return; - x->params = na; - x->params_cap = ncap; - } - x->params[x->nparams++] = *v; -} -static void push_local(LocalCtx* x, DwLocal* v) { - if (x->nlocals == x->locals_cap) { - u32 ncap = x->locals_cap ? x->locals_cap * 2 : 4; - DwLocal* na = (DwLocal*)x->d->h->realloc( - x->d->h, x->locals, x->locals_cap * sizeof(*x->locals), - ncap * sizeof(*x->locals), _Alignof(DwLocal)); - if (!na) return; - x->locals = na; - x->locals_cap = ncap; - } - x->locals[x->nlocals++] = *v; -} - -static void walk_subprog_body(LocalCtx* x, u32* off, u64 scope_lo, u64 scope_hi, - u32 scope_die_off, u8 has_scope) { - CfreeDebugInfo* d = x->d; - DwCu* cu = &d->cus[x->cu_idx]; - for (;;) { - DwDie die; - if (!dw_read_die(d, cu, off, &die)) return; - if (die.abbrev->tag == DW_TAG_formal_parameter || - die.abbrev->tag == DW_TAG_variable) { - DieAttrPack p; - DwLocal v; - pack_init(&p); - read_pack(d, cu, &die, &p, off); - memset(&v, 0, sizeof(v)); - v.name = p.name ? p.name : ""; - v.die_offset = die.die_off; - v.type_die_offset = p.has_type ? p.type_die_offset : 0; - v.scope_lo = scope_lo; - v.scope_hi = scope_hi; - v.scope_offset = scope_die_off; - v.has_scope = has_scope; - v.loc = p.loc_block; - v.loc_len = p.loc_block_len; - v.has_loclist = p.has_loclist; - v.loclist_index = p.loclist_index; - v.is_param = (die.abbrev->tag == DW_TAG_formal_parameter); - v.is_global = 0; - if (v.is_param) - push_param(x, &v); - else - push_local(x, &v); - if (die.abbrev->has_children) - walk_subprog_body(x, off, scope_lo, scope_hi, scope_die_off, has_scope); - } else if (die.abbrev->tag == DW_TAG_lexical_block) { - DieAttrPack p; - pack_init(&p); - read_pack(d, cu, &die, &p, off); - { - u64 lo = p.has_low_pc ? p.low_pc : scope_lo; - u64 hi = p.has_high_pc - ? (p.high_pc_form == DW_FORM_addr ? p.high_pc_value - : lo + p.high_pc_value) - : scope_hi; - if (die.abbrev->has_children) - walk_subprog_body(x, off, lo, hi, die.die_off, 1); - } - } else { - u32 i; - for (i = 0; i < die.abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die.abbrev->attrs[i]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, off); - } - if (die.abbrev->has_children) - walk_subprog_body(x, off, scope_lo, scope_hi, scope_die_off, has_scope); - } - } -} - -void dw_build_locals(CfreeDebugInfo* d, DwSubprog* sp) { - LocalCtx x; - DwCu* cu; - u32 off; - DwDie die; - if (sp->cached_locals) return; - sp->cached_locals = 1; - cu = &d->cus[sp->cu_idx]; - off = sp->die_offset; - if (!dw_read_die(d, cu, &off, &die)) return; - if (!die.abbrev || !die.abbrev->has_children) return; - /* Skip subprog attrs */ - { - u32 i; - for (i = 0; i < die.abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die.abbrev->attrs[i]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); - } - } - memset(&x, 0, sizeof(x)); - x.d = d; - x.cu_idx = sp->cu_idx; - walk_subprog_body(&x, &off, sp->low_pc, sp->high_pc, sp->die_offset, 1); - sp->params = x.params; - sp->nparams = x.nparams; - sp->locals = x.locals; - sp->nlocals = x.nlocals; -} - -/* ---- globals --------------------------------------------------------- */ - -void dw_build_globals(CfreeDebugInfo* d) { - u32 i; - if (d->globals_built) return; - d->globals_built = 1; - for (i = 0; i < d->ncus; ++i) { - DwCu* cu = &d->cus[i]; - u32 off = cu->die_start_off; - DwDie root; - if (!dw_read_die(d, cu, &off, &root)) continue; - { - u32 j; - for (j = 0; j < root.abbrev->nattrs; ++j) { - DwAbbrevAttr* aa = &root.abbrev->attrs[j]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); - } - } - if (!root.abbrev->has_children) continue; - /* Walk only top-level children of the CU; collect DW_TAG_variable. */ - for (;;) { - DwDie die; - if (!dw_read_die(d, cu, &off, &die)) break; - if (die.abbrev->tag == DW_TAG_variable) { - DieAttrPack p; - DwLocal v; - pack_init(&p); - read_pack(d, cu, &die, &p, &off); - memset(&v, 0, sizeof(v)); - v.name = p.name ? p.name : ""; - v.die_offset = die.die_off; - v.type_die_offset = p.has_type ? p.type_die_offset : 0; - v.loc = p.loc_block; - v.loc_len = p.loc_block_len; - v.has_loclist = p.has_loclist; - v.loclist_index = p.loclist_index; - v.is_param = 0; - v.is_global = 1; - if (d->nglobals == d->globals_cap) { - u32 ncap = d->globals_cap ? d->globals_cap * 2 : 8; - DwLocal* na = (DwLocal*)d->h->realloc( - d->h, d->globals, d->globals_cap * sizeof(*d->globals), - ncap * sizeof(*d->globals), _Alignof(DwLocal)); - if (!na) break; - d->globals = na; - d->globals_cap = ncap; - } - d->globals[d->nglobals++] = v; - if (die.abbrev->has_children) { - /* Skip children. */ - for (;;) { - DwDie c; - if (!dw_read_die(d, cu, &off, &c)) break; - dw_skip_die_subtree(d, cu, &c, &off); - } - } - } else { - dw_skip_die_subtree(d, cu, &die, &off); - } - } - } -} - -/* Public accessor for the type module: read attrs given die. */ -void dw_die_pack(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, - DieAttrPack* p) { - u32 off = die->attrs_off; - pack_init(p); - read_pack(d, cu, die, p, &off); -} diff --git a/src/dwarf/dwarf_internal.h b/src/dwarf/dwarf_internal.h @@ -1,622 +0,0 @@ -#ifndef CFREE_DWARF_INTERNAL_H -#define CFREE_DWARF_INTERNAL_H - -/* DWARF 5 consumer — internal types. - * - * This module reads DWARF bytes out of a CfreeObjFile and answers the - * cfree_dwarf_* queries. It does NOT include src/debug/ — the public - * DWARF wire format is the only contract between producer and consumer - * (per doc/DWARF.md §7). - */ - -#include <cfree.h> - -#include "core/core.h" -#include "core/heap.h" - -/* ---- DWARF 5 constants (subset we use) --------------------------------- */ - -/* DW_TAG */ -#define DW_TAG_array_type 0x01 -#define DW_TAG_class_type 0x02 -#define DW_TAG_enumeration_type 0x04 -#define DW_TAG_formal_parameter 0x05 -#define DW_TAG_lexical_block 0x0b -#define DW_TAG_member 0x0d -#define DW_TAG_pointer_type 0x0f -#define DW_TAG_reference_type 0x10 -#define DW_TAG_compile_unit 0x11 -#define DW_TAG_structure_type 0x13 -#define DW_TAG_subroutine_type 0x15 -#define DW_TAG_typedef 0x16 -#define DW_TAG_union_type 0x17 -#define DW_TAG_inheritance 0x1c -#define DW_TAG_inlined_subroutine 0x1d -#define DW_TAG_subrange_type 0x21 -#define DW_TAG_base_type 0x24 -#define DW_TAG_const_type 0x26 -#define DW_TAG_enumerator 0x28 -#define DW_TAG_subprogram 0x2e -#define DW_TAG_variable 0x34 -#define DW_TAG_volatile_type 0x35 -#define DW_TAG_restrict_type 0x37 - -/* DW_AT */ -#define DW_AT_sibling 0x01 -#define DW_AT_location 0x02 -#define DW_AT_name 0x03 -#define DW_AT_byte_size 0x0b -#define DW_AT_bit_offset 0x0c /* DWARF 3/4; DW5 uses data_bit_offset */ -#define DW_AT_bit_size 0x0d -#define DW_AT_stmt_list 0x10 -#define DW_AT_low_pc 0x11 -#define DW_AT_high_pc 0x12 -#define DW_AT_language 0x13 -#define DW_AT_comp_dir 0x1b -#define DW_AT_const_value 0x1c -#define DW_AT_upper_bound 0x2f -#define DW_AT_producer 0x25 -#define DW_AT_prototyped 0x27 -#define DW_AT_start_scope 0x2c -#define DW_AT_bit_stride 0x2e -#define DW_AT_count 0x37 -#define DW_AT_data_member_location 0x38 -#define DW_AT_decl_file 0x3a -#define DW_AT_decl_line 0x3b -#define DW_AT_declaration 0x3c -#define DW_AT_encoding 0x3e -#define DW_AT_external 0x3f -#define DW_AT_frame_base 0x40 -#define DW_AT_specification 0x47 -#define DW_AT_type 0x49 -#define DW_AT_ranges 0x55 -#define DW_AT_data_bit_offset 0x6b -#define DW_AT_str_offsets_base 0x72 -#define DW_AT_addr_base 0x73 -#define DW_AT_rnglists_base 0x74 -#define DW_AT_loclists_base 0x8c - -/* DW_FORM */ -#define DW_FORM_addr 0x01 -#define DW_FORM_block2 0x03 -#define DW_FORM_block4 0x04 -#define DW_FORM_data2 0x05 -#define DW_FORM_data4 0x06 -#define DW_FORM_data8 0x07 -#define DW_FORM_string 0x08 -#define DW_FORM_block 0x09 -#define DW_FORM_block1 0x0a -#define DW_FORM_data1 0x0b -#define DW_FORM_flag 0x0c -#define DW_FORM_sdata 0x0d -#define DW_FORM_strp 0x0e -#define DW_FORM_udata 0x0f -#define DW_FORM_ref_addr 0x10 -#define DW_FORM_ref1 0x11 -#define DW_FORM_ref2 0x12 -#define DW_FORM_ref4 0x13 -#define DW_FORM_ref8 0x14 -#define DW_FORM_ref_udata 0x15 -#define DW_FORM_indirect 0x16 -#define DW_FORM_sec_offset 0x17 -#define DW_FORM_exprloc 0x18 -#define DW_FORM_flag_present 0x19 -#define DW_FORM_strx 0x1a -#define DW_FORM_addrx 0x1b -#define DW_FORM_ref_sup4 0x1c -#define DW_FORM_strp_sup 0x1d -#define DW_FORM_data16 0x1e -#define DW_FORM_line_strp 0x1f -#define DW_FORM_ref_sig8 0x20 -#define DW_FORM_implicit_const 0x21 -#define DW_FORM_loclistx 0x22 -#define DW_FORM_rnglistx 0x23 -#define DW_FORM_ref_sup8 0x24 -#define DW_FORM_strx1 0x26 -#define DW_FORM_strx2 0x27 -#define DW_FORM_strx3 0x28 -#define DW_FORM_strx4 0x29 -#define DW_FORM_addrx1 0x2a -#define DW_FORM_addrx2 0x2b -#define DW_FORM_addrx3 0x2c -#define DW_FORM_addrx4 0x2d - -/* DW_LNS / DW_LNE */ -#define DW_LNS_copy 0x01 -#define DW_LNS_advance_pc 0x02 -#define DW_LNS_advance_line 0x03 -#define DW_LNS_set_file 0x04 -#define DW_LNS_set_column 0x05 -#define DW_LNS_negate_stmt 0x06 -#define DW_LNS_set_basic_block 0x07 -#define DW_LNS_const_add_pc 0x08 -#define DW_LNS_fixed_advance_pc 0x09 -#define DW_LNS_set_prologue_end 0x0a -#define DW_LNS_set_epilogue_begin 0x0b -#define DW_LNS_set_isa 0x0c - -#define DW_LNE_end_sequence 0x01 -#define DW_LNE_set_address 0x02 -#define DW_LNE_set_discriminator 0x04 - -#define DW_LNCT_path 0x01 -#define DW_LNCT_directory_index 0x02 -#define DW_LNCT_timestamp 0x03 -#define DW_LNCT_size 0x04 -#define DW_LNCT_MD5 0x05 - -/* DW_OP — subset (per DWARF.md §4.4) */ -#define DW_OP_addr 0x03 -#define DW_OP_const1u 0x08 -#define DW_OP_const1s 0x09 -#define DW_OP_const2u 0x0a -#define DW_OP_const2s 0x0b -#define DW_OP_const4u 0x0c -#define DW_OP_const4s 0x0d -#define DW_OP_const8u 0x0e -#define DW_OP_const8s 0x0f -#define DW_OP_constu 0x10 -#define DW_OP_consts 0x11 -#define DW_OP_dup 0x12 -#define DW_OP_drop 0x13 -#define DW_OP_and 0x1a -#define DW_OP_minus 0x1c -#define DW_OP_mul 0x1e -#define DW_OP_or 0x21 -#define DW_OP_plus 0x22 -#define DW_OP_plus_uconst 0x23 -#define DW_OP_shl 0x24 -#define DW_OP_shr 0x25 -#define DW_OP_shra 0x26 -#define DW_OP_xor 0x27 -#define DW_OP_lit0 0x30 -#define DW_OP_reg0 0x50 -#define DW_OP_breg0 0x70 -#define DW_OP_regx 0x90 -#define DW_OP_fbreg 0x91 -#define DW_OP_bregx 0x92 -#define DW_OP_call_frame_cfa 0x9c -#define DW_OP_stack_value 0x9f - -/* DW_ATE encodings */ -#define DW_ATE_address 0x01 -#define DW_ATE_boolean 0x02 -#define DW_ATE_complex_float 0x03 -#define DW_ATE_float 0x04 -#define DW_ATE_signed 0x05 -#define DW_ATE_signed_char 0x06 -#define DW_ATE_unsigned 0x07 -#define DW_ATE_unsigned_char 0x08 -#define DW_ATE_UTF 0x10 - -/* DW_LANG */ -#define DW_LANG_C 0x02 -#define DW_LANG_C89 0x01 -#define DW_LANG_C99 0x0c -#define DW_LANG_C11 0x1d -#define DW_LANG_C17 0x2c - -/* DW_CHILDREN */ -#define DW_CHILDREN_no 0x00 -#define DW_CHILDREN_yes 0x01 - -/* ---- Section & byte slice helpers ------------------------------------- */ - -typedef struct DwSection { - const u8* data; - u32 size; - u32 sec_idx; /* 0-based section index, or UINT32_MAX if missing */ -} DwSection; - -/* ---- Abbrev table ---- */ - -typedef struct DwAbbrevAttr { - u32 attr; /* DW_AT_* */ - u32 form; /* DW_FORM_* */ - i64 implicit_const; /* for DW_FORM_implicit_const */ -} DwAbbrevAttr; - -typedef struct DwAbbrev { - u64 code; /* abbrev code; 0 if unused slot */ - u32 tag; /* DW_TAG_* */ - u8 has_children; - u32 nattrs; - DwAbbrevAttr* attrs; /* heap-allocated */ -} DwAbbrev; - -typedef struct DwAbbrevTable { - u32 cu_abbrev_offset; /* offset into .debug_abbrev */ - /* Dense map: code → index (or 0 if absent). For typical small tables we - * keep them in a sorted array searched linearly. */ - DwAbbrev* abbrevs; - u32 nabbrevs; - u32 cap; -} DwAbbrevTable; - -/* ---- Compilation unit ---- */ - -typedef struct DwCu { - u32 hdr_offset; /* offset of CU header in .debug_info */ - u32 hdr_length; /* length of unit_length bytes (after the size field itself) - */ - u32 unit_total_size; /* hdr_length + length-field size (4 for 32-bit init) */ - u32 die_start_off; /* offset where the first DIE starts (in .debug_info) */ - u8 version; - u8 address_size; - u8 unit_type; - u8 is_64bit; /* DWARF64? */ - u32 abbrev_offset; /* into .debug_abbrev */ - u32 str_offsets_base; - u32 addr_base; - u32 loclists_base; - u32 rnglists_base; - u32 stmt_list; /* DW_AT_stmt_list value (offset into .debug_line) */ - u8 has_stmt_list; - const char* comp_dir; - const char* name; - /* Index of abbrev table in dbg->abbrevs */ - u32 abbrev_table_idx; -} DwCu; - -/* ---- Materialized DIEs (we cache only what we need) ---- */ - -/* A reference into .debug_info (compilation-unit relative). We store CU - * index plus offset-from-CU-header so we can resolve cross-CU later. */ -typedef struct DwDieRef { - u32 cu_idx; - u32 die_offset; /* absolute offset into .debug_info bytes */ -} DwDieRef; - -/* ---- Type cache ---- */ - -typedef enum DwTypeKind { - DTK_VOID, - DTK_BASE, /* maps to SINT/UINT/BOOL/FLOAT/CHAR by encoding */ - DTK_PTR, - DTK_ARRAY, - DTK_STRUCT, - DTK_UNION, - DTK_ENUM, - DTK_TYPEDEF, - DTK_FUNC, - DTK_CONST, /* alias to inner */ - DTK_VOLATILE, - DTK_RESTRICT, -} DwTypeKind; - -typedef struct DwField { - const char* name; - u32 byte_offset; - u32 bit_offset; - u32 bit_size; - struct CfreeDwarfType* type; -} DwField; - -typedef struct DwEnumVal { - const char* name; - i64 value; -} DwEnumVal; - -struct CfreeDwarfType { - DwTypeKind kind; - u32 byte_size; - const char* name; - u32 element_count; - u32 die_offset; /* origin DIE for cycle-detection / dedup */ - /* DT_PTR/ARRAY/TYPEDEF/CONST/VOLATILE/RESTRICT/FUNC: inner type */ - struct CfreeDwarfType* inner; - /* Base type encoding (DW_ATE_*) — used to derive SINT/UINT/CHAR/BOOL/FLOAT */ - u32 base_encoding; - /* STRUCT/UNION fields */ - DwField* fields; - u32 nfields; - /* ENUM values */ - DwEnumVal* evals; - u32 nevals; -}; - -/* ---- Line program decoded matrix ---- */ - -typedef struct DwLineRow { - u64 address; - u32 file_index; - u32 line; - u32 column; - u8 is_stmt; - u8 end_sequence; -} DwLineRow; - -typedef struct DwLineFile { - const char* path; /* interned in our string table */ - u32 dir_index; -} DwLineFile; - -typedef struct DwLineProgram { - /* Per-CU line program decoding state. We materialize all rows into a - * single rows array for fast lookup. */ - DwLineRow* rows; - u32 nrows; - u32 cap; - /* File table (file_index 0 is the CU primary in DW5). */ - DwLineFile* files; - u32 nfiles; - const char** dirs; - u32 ndirs; - /* Cached fully-qualified path per file, lazily built. */ - const char** file_norm; - u32 nfile_norm; -} DwLineProgram; - -/* ---- Subprogram descriptor (cached) ---- */ - -typedef struct DwLocal { - const char* name; - u32 die_offset; - u32 type_die_offset; - u64 scope_lo; /* PCs at which the var is in scope. */ - u64 scope_hi; /* (low_pc, high_pc) of nearest enclosing block. */ - u32 scope_offset; /* offset of the lexical_block DIE; 0 = subprog scope */ - u8 has_scope; - /* Location form: either an exprloc or a loclistx index. */ - const u8* loc; - u32 loc_len; - u8 has_loclist; - u64 loclist_index; - /* Role: ARG vs LOCAL. */ - u8 is_param; - /* For globals only: the global variable role. */ - u8 is_global; -} DwLocal; - -typedef struct DwSubprog { - const char* name; - u64 low_pc; - u64 high_pc; - const char* decl_file; - u32 decl_line; - u32 cu_idx; - u32 die_offset; /* offset of the subprogram DIE */ - /* Frame base — DW_AT_frame_base exprloc bytes (or NULL). */ - const u8* frame_base; - u32 frame_base_len; - /* Cached params and locals (lazily). */ - DwLocal* params; - u32 nparams; - DwLocal* locals; - u32 nlocals; - u8 inlined; - u8 cached_locals; -} DwSubprog; - -/* ---- The main consumer state ---- */ - -typedef struct DwString { - Sym sym; /* interned in compiler->global pool */ -} DwString; - -struct CfreeDebugInfo { - CfreeCompiler* c; - Heap* h; - const CfreeObjFile* obj; - - /* Sections */ - DwSection abbrev; - DwSection info; - DwSection line; - DwSection str; - DwSection line_str; - DwSection str_offsets; - DwSection addr; - DwSection loclists; - DwSection rnglists; - DwSection eh_frame; - DwSection aranges; - - /* Abbrev tables (one per unique abbrev_offset we've seen). */ - DwAbbrevTable* abbrevs; - u32 nabbrevs; - u32 abbrevs_cap; - - /* CUs */ - DwCu* cus; - u32 ncus; - u32 cus_cap; - - /* Line programs by CU index (parallel to cus). Each lazily built. */ - DwLineProgram* lines_by_cu; - u8* lines_built; /* parallel; 0 = not yet decoded */ - - /* Subprograms (sorted by low_pc on first build). */ - DwSubprog* subs; - u32 nsubs; - u32 subs_cap; - u8 subs_built; - - /* Type cache: DIE-offset → CfreeDwarfType*. */ - CfreeDwarfType** types_by_off; /* parallel arrays */ - u32* types_off; - u32 ntypes; - u32 types_cap; - - /* Globals (top-level DW_TAG_variable in any CU). */ - DwLocal* globals; - u32 nglobals; - u32 globals_cap; - u8 globals_built; -}; - -/* ---- API between the dwarf_*.c files ---------------------------------- */ - -/* Section lookup by name. Sets out->data/size; sec_idx = UINT32_MAX if missing. - */ -void dw_find_section(CfreeDebugInfo* d, const char* name, DwSection* out); - -/* Read primitives. Each returns the new offset on success and panics on EOF. */ -u8 dw_u8(const u8* base, u32 size, u32* off); -u16 dw_u16(const u8* base, u32 size, u32* off); -u32 dw_u24(const u8* base, u32 size, u32* off); -u32 dw_u32(const u8* base, u32 size, u32* off); -u64 dw_u64(const u8* base, u32 size, u32* off); -u64 dw_uleb(const u8* base, u32 size, u32* off); -i64 dw_sleb(const u8* base, u32 size, u32* off); -const char* dw_cstr(const u8* base, u32 size, u32* off); - -/* Abbrev parsing: ensure (and return) the abbrev table for `offset`. */ -DwAbbrevTable* dw_abbrev_get(CfreeDebugInfo* d, u32 offset); -DwAbbrev* dw_abbrev_lookup(DwAbbrevTable* t, u64 code); - -/* Parse the CU header at offset `off` in .debug_info into `cu`. - * Returns the offset of the next CU header. */ -u32 dw_cu_parse_header(CfreeDebugInfo* d, u32 off, DwCu* cu); - -/* Skim every CU and populate dbg->cus. */ -void dw_parse_all_cus(CfreeDebugInfo* d); - -/* Open the .debug_str_offsets table indexed by str_offsets_base. */ -const char* dw_str(CfreeDebugInfo* d, u32 offset); -const char* dw_line_str(CfreeDebugInfo* d, u32 offset); -const char* dw_strx(CfreeDebugInfo* d, const DwCu* cu, u64 idx); - -/* Skip one attribute value of `form` size. *off is updated. */ -void dw_skip_form(CfreeDebugInfo* d, const DwCu* cu, u32 form, - i64 implicit_const, u32* off); - -/* Read attribute value into a typed accumulator. Caller picks which getter. */ -typedef struct DwAttrValue { - u32 form; - /* Values for various forms — only one slot is meaningful per form. */ - u64 u; /* udata, addr, ref (CU-relative offset for local refs) */ - i64 s; /* sdata */ - const char* str; /* strp/string/strx/line_strp resolved cstring */ - const u8* block; /* exprloc/block bytes */ - u32 block_len; -} DwAttrValue; - -/* Read attr value at *off using `form`. Updates *off. */ -void dw_read_form(CfreeDebugInfo* d, const DwCu* cu, u32 form, - i64 implicit_const, u32* off, DwAttrValue* out); - -/* DIE iteration helpers. */ -typedef struct DwDie { - u64 abbrev_code; - DwAbbrev* abbrev; /* NULL if abbrev_code==0 (null entry) */ - u32 die_off; /* offset of this DIE itself in .debug_info */ - u32 attrs_off; /* where attribute encodings start */ - u32 next_sibling_off; /* lazily computed */ -} DwDie; - -/* Read one DIE header at *off. Updates *off to point past the abbrev code, - * to the start of the attribute area. Returns 1 on success, 0 if this is a - * null-entry (terminates a sibling chain). */ -int dw_read_die(CfreeDebugInfo* d, const DwCu* cu, u32* off, DwDie* out); - -/* Skip a DIE's attribute area, advancing *off past it. */ -void dw_skip_die_attrs(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, u32* off); - -/* Skip an entire DIE subtree (including children), starting at attrs_off. - * On entry, *off == die->attrs_off. On exit, *off is past the children - * terminator (if has_children) or just past the attrs (if no children). */ -void dw_skip_die_subtree(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, - u32* off); - -/* Lookup an attribute on `die` by attr code. Returns 1 if found and fills - * *out; 0 otherwise. Restartable (rewinds the cursor). */ -int dw_die_attr(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, u32 attr, - DwAttrValue* out); - -/* String interning into the compiler's global pool. */ -const char* dw_intern(CfreeDebugInfo* d, const char* s, size_t len); - -/* Inline strcmp/strlen — libcfree avoids a runtime libc dep beyond the - * tightly-controlled allowlist (test/lib_deps.allowlist). */ -static inline int dw_streq(const char* a, const char* b) { - if (!a || !b) return 0; - while (*a && *b && *a == *b) { - a++; - b++; - } - return *a == 0 && *b == 0; -} -static inline size_t dw_strlen(const char* s) { - size_t n = 0; - if (!s) return 0; - while (s[n]) n++; - return n; -} - -/* DIE attribute pack — shared between dwarf_die.c and dwarf_type.c. */ -typedef struct DieAttrPack { - const char* name; - u64 low_pc; - u64 high_pc_value; - u32 high_pc_form; - u8 has_low_pc; - u8 has_high_pc; - u32 type_die_offset; - u8 has_type; - u32 decl_file; - u32 decl_line; - const u8* loc_block; - u32 loc_block_len; - u8 has_loclist; - u64 loclist_index; - const u8* fb_block; - u32 fb_block_len; - i64 const_value; - u8 has_const_value; - u32 byte_offset; - u8 has_byte_offset; - u32 byte_size; - u8 has_byte_size; - u32 bit_size; - u8 has_bit_size; - u32 bit_offset; - u8 has_bit_offset; - u32 base_encoding; - u8 has_encoding; - u32 array_count; - u8 has_array_count; - u8 inlined; -} DieAttrPack; - -void dw_die_pack(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, DieAttrPack* p); - -/* Subprograms */ -void dw_build_subs(CfreeDebugInfo* d); -DwSubprog* dw_find_subprog(CfreeDebugInfo* d, u64 pc); -void dw_build_locals(CfreeDebugInfo* d, DwSubprog* sp); - -/* Globals */ -void dw_build_globals(CfreeDebugInfo* d); - -/* Line program */ -void dw_build_line(CfreeDebugInfo* d, u32 cu_idx); - -/* Type DIE → CfreeDwarfType*. die_offset is absolute offset in .debug_info. */ -CfreeDwarfType* dw_type_from_die(CfreeDebugInfo* d, u32 cu_idx, u32 die_offset); -CfreeDwarfType* dw_void_type(CfreeDebugInfo* d); - -/* Loc-expr evaluator. Evaluates `expr` of length `len` in the context of - * `frame` (regs, cfa) and `frame_base_expr` (the subprog's DW_AT_frame_base - * expression — typically just DW_OP_call_frame_cfa). Returns 0 on success; - * fills *result with the location kind plus value. */ -typedef struct DwExprResult { - /* result_kind: 0 = address (memory), 1 = value-on-stack (DW_OP_stack_value), - * 2 = register, 3 = unsupported. */ - int kind; - u64 value; /* address if kind=0; literal if kind=1; reg# if kind=2 */ -} DwExprResult; - -int dw_eval_expr(CfreeDebugInfo* d, const u8* expr, u32 len, const u8* fb_expr, - u32 fb_len, const CfreeUnwindFrame* frame, DwExprResult* out); - -/* CU lookup helpers. */ -DwCu* dw_cu_at_die_offset(CfreeDebugInfo* d, u32 die_offset); - -/* Resolve a DW_FORM_loclistx into the matching location list entry for - * `pc`. Returns 1 and fills bytes/len on success; 0 if the section is - * absent, the index is bad, or no entry covers `pc`. */ -int dw_loclist_resolve(CfreeDebugInfo* d, const DwCu* cu, u64 idx, u64 pc, - const u8** bytes, u32* len); - -#endif diff --git a/src/dwarf/dwarf_line.c b/src/dwarf/dwarf_line.c @@ -1,611 +0,0 @@ -/* dwarf_line.c — DWARF 5 line-number-program decoder. - * - * Per doc/DWARF.md §4.2: walk .debug_line for the CU's stmt_list, build - * a row matrix, and index it for addr→line and (file, line)→addr lookup. - */ - -#include <cfree.h> -#include <stddef.h> -#include <stdint.h> -#include <string.h> - -#include "core/core.h" -#include "core/heap.h" -#include "core/util.h" -#include "dwarf/dwarf_internal.h" - -typedef struct LineState { - u64 address; - u32 op_index; - u32 file; - u32 line; - u32 column; - u8 is_stmt; - u8 basic_block; - u8 end_sequence; - u8 prologue_end; - u8 epilogue_begin; - u32 isa; - u32 discriminator; -} LineState; - -typedef struct LineHdr { - u32 unit_length; - u8 version; - u8 address_size; - u8 segment_selector_size; - u32 header_length; - u8 min_inst_len; - u8 max_ops_per_inst; - u8 default_is_stmt; - i8 line_base; - u8 line_range; - u8 opcode_base; - u8 std_opcode_lengths[12]; /* version 5 has 12 standard opcodes */ -} LineHdr; - -static void rows_push(CfreeDebugInfo* d, DwLineProgram* lp, - const LineState* st) { - DwLineRow* r; - if (lp->nrows == lp->cap) { - u32 ncap = lp->cap ? lp->cap * 2 : 32; - DwLineRow* na = (DwLineRow*)d->h->realloc( - d->h, lp->rows, lp->cap * sizeof(*lp->rows), ncap * sizeof(*lp->rows), - _Alignof(DwLineRow)); - if (!na) return; - lp->rows = na; - lp->cap = ncap; - } - r = &lp->rows[lp->nrows++]; - r->address = st->address; - r->file_index = st->file; - r->line = st->line; - r->column = st->column; - r->is_stmt = st->is_stmt; - r->end_sequence = st->end_sequence; -} - -static void state_init(LineState* st, u8 default_is_stmt) { - st->address = 0; - st->op_index = 0; - st->file = 1; - st->line = 1; - st->column = 0; - st->is_stmt = default_is_stmt; - st->basic_block = 0; - st->end_sequence = 0; - st->prologue_end = 0; - st->epilogue_begin = 0; - st->isa = 0; - st->discriminator = 0; -} - -/* Read a DW5 file-or-dir entry-format header. - * On entry: *off points at format_count. - * Returns the number of (content_type, form) pairs. Caller must read - * the format pairs before calling read_entries(). */ -typedef struct EntryFmt { - u32 content_type; - u32 form; -} EntryFmt; - -static u32 read_format(const u8* base, u32 size, u32* off, EntryFmt* fmt, - u32 max) { - u32 n = dw_u8(base, size, off); - u32 i; - if (n > max) n = max; - for (i = 0; i < n; ++i) { - fmt[i].content_type = (u32)dw_uleb(base, size, off); - fmt[i].form = (u32)dw_uleb(base, size, off); - } - return n; -} - -/* Wrapper around dw_read_form that reads from .debug_line bytes via a - * pseudo-CU configured with the line-program address size. */ -static void read_lp_form(CfreeDebugInfo* d, u32 form, u8 addr_size, u32* off, - DwAttrValue* out) { - /* This reads from .debug_line, not .debug_info — we duplicate the - * minimal subset we need (line_strp, strp, udata, data1/2/4/8, string). */ - out->form = form; - out->u = 0; - out->str = ""; - out->block = NULL; - out->block_len = 0; - switch (form) { - case DW_FORM_string: - out->str = dw_cstr(d->line.data, d->line.size, off); - break; - case DW_FORM_strp: - out->u = dw_u32(d->line.data, d->line.size, off); - out->str = dw_str(d, (u32)out->u); - break; - case DW_FORM_line_strp: - out->u = dw_u32(d->line.data, d->line.size, off); - out->str = dw_line_str(d, (u32)out->u); - break; - case DW_FORM_data1: - out->u = dw_u8(d->line.data, d->line.size, off); - break; - case DW_FORM_data2: - out->u = dw_u16(d->line.data, d->line.size, off); - break; - case DW_FORM_data4: - out->u = dw_u32(d->line.data, d->line.size, off); - break; - case DW_FORM_data8: - out->u = dw_u64(d->line.data, d->line.size, off); - break; - case DW_FORM_udata: - out->u = dw_uleb(d->line.data, d->line.size, off); - break; - case DW_FORM_sdata: - (void)dw_sleb(d->line.data, d->line.size, off); - break; - case DW_FORM_data16: - *off += 16; - break; - case DW_FORM_block: - case DW_FORM_exprloc: { - u32 n = (u32)dw_uleb(d->line.data, d->line.size, off); - out->block = d->line.data + *off; - out->block_len = n; - *off += n; - } break; - case DW_FORM_block1: { - u32 n = dw_u8(d->line.data, d->line.size, off); - out->block = d->line.data + *off; - out->block_len = n; - *off += n; - } break; - case DW_FORM_flag: - out->u = dw_u8(d->line.data, d->line.size, off); - break; - default: - /* Unknown form — heuristic: skip 0 bytes. Caller may read garbage. */ - (void)addr_size; - break; - } -} - -/* Build a fully-qualified path for file_index in lp. */ -static const char* build_file_norm(CfreeDebugInfo* d, DwLineProgram* lp, - u32 idx) { - const char* path; - const char* dir; - u32 dir_idx; - size_t plen, dlen; - char buf[4096]; - size_t pos = 0; - if (idx >= lp->nfiles) return ""; - path = lp->files[idx].path; - if (!path) path = ""; - dir_idx = lp->files[idx].dir_index; - dir = (dir_idx < lp->ndirs) ? lp->dirs[dir_idx] : ""; - plen = strlen(path); - dlen = strlen(dir); - /* If path is already absolute (starts with /), return as-is. */ - if (plen > 0 && path[0] == '/') return path; - if (dlen > 0) { - if (dlen >= sizeof(buf) - 2) return path; /* fallback */ - memcpy(buf, dir, dlen); - pos = dlen; - if (buf[pos - 1] != '/') buf[pos++] = '/'; - } - if (pos + plen >= sizeof(buf)) return path; - memcpy(buf + pos, path, plen); - pos += plen; - buf[pos] = 0; - return dw_intern(d, buf, pos); -} - -void dw_build_line(CfreeDebugInfo* d, u32 cu_idx) { - DwCu* cu; - DwLineProgram* lp; - u32 off; - u32 stmt_off; - LineHdr h; - u32 unit_end; - u32 prog_start; - EntryFmt dir_fmt[8]; - EntryFmt file_fmt[8]; - u32 ndir_fmt, nfile_fmt; - u32 ndirs_count, nfiles_count; - u32 i; - LineState st; - - if (cu_idx >= d->ncus) return; - if (d->lines_built[cu_idx]) return; - d->lines_built[cu_idx] = 1; - - cu = &d->cus[cu_idx]; - lp = &d->lines_by_cu[cu_idx]; - if (!cu->has_stmt_list) return; - stmt_off = cu->stmt_list; - if (stmt_off >= d->line.size) return; - - off = stmt_off; - h.unit_length = dw_u32(d->line.data, d->line.size, &off); - if (h.unit_length == 0xffffffffu) return; /* DWARF64 not supported */ - unit_end = off + h.unit_length; - h.version = (u8)dw_u16(d->line.data, d->line.size, &off); - if (h.version != 5) { - /* DW4/3 layout differs. We only support DW5. */ - return; - } - h.address_size = dw_u8(d->line.data, d->line.size, &off); - h.segment_selector_size = dw_u8(d->line.data, d->line.size, &off); - h.header_length = dw_u32(d->line.data, d->line.size, &off); - prog_start = off + h.header_length; - h.min_inst_len = dw_u8(d->line.data, d->line.size, &off); - h.max_ops_per_inst = dw_u8(d->line.data, d->line.size, &off); - h.default_is_stmt = dw_u8(d->line.data, d->line.size, &off); - h.line_base = (i8)dw_u8(d->line.data, d->line.size, &off); - h.line_range = dw_u8(d->line.data, d->line.size, &off); - h.opcode_base = dw_u8(d->line.data, d->line.size, &off); - if (h.line_range == 0) h.line_range = 1; - /* Read standard opcode lengths (opcode_base - 1 of them). */ - { - u32 j; - u32 cnt = h.opcode_base ? h.opcode_base - 1u : 0u; - if (cnt > sizeof(h.std_opcode_lengths)) cnt = sizeof(h.std_opcode_lengths); - for (j = 0; j < cnt; ++j) - h.std_opcode_lengths[j] = dw_u8(d->line.data, d->line.size, &off); - /* Skip any extra opcode-length bytes the header claims. */ - if (h.opcode_base > 1u + sizeof(h.std_opcode_lengths)) { - off += (h.opcode_base - 1u) - (u32)sizeof(h.std_opcode_lengths); - } - } - - /* directories[] */ - ndir_fmt = read_format(d->line.data, d->line.size, &off, dir_fmt, 8); - ndirs_count = (u32)dw_uleb(d->line.data, d->line.size, &off); - if (ndirs_count > 0) { - lp->dirs = (const char**)d->h->alloc( - d->h, ndirs_count * sizeof(const char*), _Alignof(const char*)); - if (lp->dirs) { - lp->ndirs = ndirs_count; - memset(lp->dirs, 0, ndirs_count * sizeof(const char*)); - } - } - for (i = 0; i < ndirs_count; ++i) { - u32 j; - DwAttrValue v; - const char* path = ""; - for (j = 0; j < ndir_fmt; ++j) { - read_lp_form(d, dir_fmt[j].form, h.address_size, &off, &v); - if (dir_fmt[j].content_type == DW_LNCT_path) { - path = v.str ? v.str : ""; - } - } - if (lp->dirs && i < lp->ndirs) lp->dirs[i] = path; - } - - /* file_names[] */ - nfile_fmt = read_format(d->line.data, d->line.size, &off, file_fmt, 8); - nfiles_count = (u32)dw_uleb(d->line.data, d->line.size, &off); - if (nfiles_count > 0) { - lp->files = (DwLineFile*)d->h->alloc( - d->h, nfiles_count * sizeof(DwLineFile), _Alignof(DwLineFile)); - if (lp->files) { - lp->nfiles = nfiles_count; - memset(lp->files, 0, nfiles_count * sizeof(DwLineFile)); - } - } - for (i = 0; i < nfiles_count; ++i) { - u32 j; - DwAttrValue v; - const char* path = ""; - u32 dir_index = 0; - for (j = 0; j < nfile_fmt; ++j) { - read_lp_form(d, file_fmt[j].form, h.address_size, &off, &v); - if (file_fmt[j].content_type == DW_LNCT_path) - path = v.str ? v.str : ""; - else if (file_fmt[j].content_type == DW_LNCT_directory_index) - dir_index = (u32)v.u; - } - if (lp->files && i < lp->nfiles) { - lp->files[i].path = path; - lp->files[i].dir_index = dir_index; - } - } - - /* Build per-file normalized path cache lazily on first query. */ - if (lp->nfiles) { - lp->file_norm = (const char**)d->h->alloc( - d->h, lp->nfiles * sizeof(const char*), _Alignof(const char*)); - if (lp->file_norm) { - lp->nfile_norm = lp->nfiles; - for (i = 0; i < lp->nfiles; ++i) lp->file_norm[i] = NULL; - } - } - - /* program */ - off = prog_start; - state_init(&st, h.default_is_stmt); - while (off < unit_end) { - u8 op = dw_u8(d->line.data, d->line.size, &off); - if (op == 0) { - /* extended opcode */ - u64 elen = dw_uleb(d->line.data, d->line.size, &off); - u32 eop_off = off; - u8 eop; - if (elen == 0 || off + elen > d->line.size) break; - eop = dw_u8(d->line.data, d->line.size, &off); - switch (eop) { - case DW_LNE_end_sequence: - st.end_sequence = 1; - rows_push(d, lp, &st); - state_init(&st, h.default_is_stmt); - break; - case DW_LNE_set_address: - if (h.address_size == 8) - st.address = dw_u64(d->line.data, d->line.size, &off); - else - st.address = dw_u32(d->line.data, d->line.size, &off); - st.op_index = 0; - break; - case DW_LNE_set_discriminator: - st.discriminator = (u32)dw_uleb(d->line.data, d->line.size, &off); - break; - default: - /* Skip unknown extended opcode body. */ - off = eop_off + (u32)elen; - break; - } - /* Sync to the declared end of the extended opcode. */ - off = eop_off + (u32)elen; - } else if (op < h.opcode_base) { - /* standard opcode */ - switch (op) { - case DW_LNS_copy: - rows_push(d, lp, &st); - st.basic_block = 0; - st.prologue_end = 0; - st.epilogue_begin = 0; - st.discriminator = 0; - break; - case DW_LNS_advance_pc: { - u64 adv = dw_uleb(d->line.data, d->line.size, &off); - st.address += adv * h.min_inst_len; - } break; - case DW_LNS_advance_line: { - i64 adv = dw_sleb(d->line.data, d->line.size, &off); - st.line = (u32)((i64)st.line + adv); - } break; - case DW_LNS_set_file: - st.file = (u32)dw_uleb(d->line.data, d->line.size, &off); - break; - case DW_LNS_set_column: - st.column = (u32)dw_uleb(d->line.data, d->line.size, &off); - break; - case DW_LNS_negate_stmt: - st.is_stmt = !st.is_stmt; - break; - case DW_LNS_set_basic_block: - st.basic_block = 1; - break; - case DW_LNS_const_add_pc: { - u8 adj = (u8)(255 - h.opcode_base); - u8 op_adv = (u8)(adj / h.line_range); - st.address += op_adv * h.min_inst_len; - } break; - case DW_LNS_fixed_advance_pc: - st.address += dw_u16(d->line.data, d->line.size, &off); - st.op_index = 0; - break; - case DW_LNS_set_prologue_end: - st.prologue_end = 1; - break; - case DW_LNS_set_epilogue_begin: - st.epilogue_begin = 1; - break; - case DW_LNS_set_isa: - st.isa = (u32)dw_uleb(d->line.data, d->line.size, &off); - break; - default: { - /* Unknown standard opcode: skip its operands per - * std_opcode_lengths. */ - u32 nops = (op - 1u) < sizeof(h.std_opcode_lengths) - ? h.std_opcode_lengths[op - 1] - : 0; - u32 j; - for (j = 0; j < nops; ++j) - (void)dw_uleb(d->line.data, d->line.size, &off); - } break; - } - } else { - /* special opcode */ - u32 adj = (u32)(op - h.opcode_base); - u32 op_adv = adj / h.line_range; - i32 line_inc = (i32)h.line_base + (i32)(adj % h.line_range); - st.address += op_adv * h.min_inst_len; - st.line = (u32)((i32)st.line + line_inc); - rows_push(d, lp, &st); - st.basic_block = 0; - st.prologue_end = 0; - st.epilogue_begin = 0; - st.discriminator = 0; - } - } - - /* Build file_norm lazily. */ - if (lp->file_norm) { - for (i = 0; i < lp->nfiles; ++i) { - lp->file_norm[i] = build_file_norm(d, lp, i); - } - } -} - -/* Lookup helpers. Build all CU line tables on demand, walk each. */ - -int cfree_dwarf_addr_to_line(CfreeDebugInfo* d, uint64_t pc, - const char** file_out, uint32_t* line_out, - uint32_t* col_out) { - /* Return codes: - * 0 — PC has a line entry; outputs filled. - * 1 — PC sits inside a CU's coverage range but no row matched. - * 2 — PC outside every CU's address coverage (e.g. JIT-emitted thunk - * or a frame inside a `.o` linked without `-g`). REPL: "no - * debug info for this frame". */ - u32 i; - int any_in_range = 0; - if (file_out) *file_out = NULL; - if (line_out) *line_out = 0; - if (col_out) *col_out = 0; - if (!d) return 1; - for (i = 0; i < d->ncus; ++i) { - DwLineProgram* lp; - u32 j; - DwLineRow* best = NULL; - uint64_t cu_lo = (uint64_t)-1, cu_hi = 0; - if (!d->lines_built[i]) dw_build_line(d, i); - lp = &d->lines_by_cu[i]; - for (j = 0; j < lp->nrows; ++j) { - DwLineRow* r = &lp->rows[j]; - if (r->address < cu_lo) cu_lo = r->address; - if (r->address > cu_hi) cu_hi = r->address; - if (r->end_sequence) continue; - if (r->address > pc) break; - best = r; - } - if (pc >= cu_lo && pc <= cu_hi) any_in_range = 1; - if (best) { - const char* f = ""; - if (best->file_index < lp->nfile_norm && lp->file_norm) - f = lp->file_norm[best->file_index]; - if (file_out) *file_out = f; - if (line_out) *line_out = best->line; - if (col_out) *col_out = best->column; - return 0; - } - } - return any_in_range ? 1 : 2; -} - -/* file_norm matches user-typed `file` if either it is exactly equal, or it - * ends with `/<file>`. Suffix matching keeps `b util.c:42` working when - * the DWARF file_norm is the absolute path the compiler saw. */ -static int dw_file_matches(const char* file_norm, const char* user, size_t ulen) { - size_t flen; - if (!file_norm) return 0; - if (dw_streq(file_norm, user)) return 1; - flen = strlen(file_norm); - if (flen <= ulen) return 0; - if (file_norm[flen - ulen - 1] != '/') return 0; - return memcmp(file_norm + flen - ulen, user, ulen) == 0; -} - -int cfree_dwarf_line_to_addr(CfreeDebugInfo* d, const char* file, uint32_t line, - uint64_t* pc_out) { - /* Returns: - * 0 — unique match; pc_out filled with that PC. - * 1 — file `file` does not appear in any CU we scanned (per-DWARF.md - * "no data" semantics: caller can format this as "file not - * covered" if it cares to distinguish from a stale line). - * 2 — `file` appears in some CU but no row matches (file, line). - * 3 — ambiguous: more than one distinct PC matches (file, line) via - * suffix. pc_out is filled with the first match so callers that - * don't disambiguate still get a usable PC. Use - * cfree_dwarf_line_to_addr_all to enumerate candidates. */ - /* Ambiguity is keyed on distinct file_norm *paths* matching the - * suffix, not on distinct PCs. Multiple PCs on the same line of the - * same source file are expected (one row per instruction) — they're - * not ambiguity, just line-program granularity. */ - u32 i; - size_t ulen; - const char* first_path = NULL; - uint64_t first_pc = 0; - const char* alt_path = NULL; - int file_seen = 0; - int line_hits = 0; - if (pc_out) *pc_out = 0; - if (!d || !file) return 1; - ulen = strlen(file); - if (ulen == 0) return 1; - for (i = 0; i < d->ncus; ++i) { - DwLineProgram* lp; - u32 j; - if (!d->lines_built[i]) dw_build_line(d, i); - lp = &d->lines_by_cu[i]; - for (j = 0; j < lp->nrows; ++j) { - DwLineRow* r = &lp->rows[j]; - const char* f; - if (r->end_sequence) continue; - if (r->file_index >= lp->nfile_norm || !lp->file_norm) continue; - f = lp->file_norm[r->file_index]; - if (!dw_file_matches(f, file, ulen)) continue; - file_seen = 1; - if (r->line != line) continue; - ++line_hits; - if (!first_path) { - first_path = f; - first_pc = r->address; - } else if (!alt_path && f != first_path && !dw_streq(f, first_path)) { - alt_path = f; - } - } - } - if (pc_out) *pc_out = first_pc; - if (alt_path) return 3; - if (line_hits > 0) return 0; - if (file_seen) return 2; - return 1; -} - -/* Enumerate all distinct candidate (pc, file_norm) pairs for the given - * (file, line) match. Caller-supplied `out` array is filled up to `cap`; - * `*n_out` receives the total candidate count (which may exceed cap, in - * which case only the first `cap` are written). Returns 0 on success - * (including 0 candidates), 1 on invalid args. Intended for REPL - * disambiguation after cfree_dwarf_line_to_addr returns 3. */ -int cfree_dwarf_line_to_addr_all(CfreeDebugInfo* d, const char* file, - uint32_t line, CfreeDwarfLineMatch* out, - uint32_t cap, uint32_t* n_out) { - /* One candidate per distinct file_norm path (not per PC). PC is the - * first matching row's address for that file_norm — i.e. the same PC - * that cfree_dwarf_line_to_addr would have returned for that file. */ - u32 i; - size_t ulen; - uint32_t total = 0; - if (n_out) *n_out = 0; - if (!d || !file) return 1; - ulen = strlen(file); - if (ulen == 0) return 1; - for (i = 0; i < d->ncus; ++i) { - DwLineProgram* lp; - u32 j; - if (!d->lines_built[i]) dw_build_line(d, i); - lp = &d->lines_by_cu[i]; - for (j = 0; j < lp->nrows; ++j) { - DwLineRow* r = &lp->rows[j]; - const char* f; - uint32_t k; - int dup = 0; - if (r->end_sequence) continue; - if (r->line != line) continue; - if (r->file_index >= lp->nfile_norm || !lp->file_norm) continue; - f = lp->file_norm[r->file_index]; - if (!dw_file_matches(f, file, ulen)) continue; - /* Dedupe by file_norm path so the candidate list is one entry per - * source file even if the line has many per-instruction rows. */ - if (out) { - uint32_t lim = total < cap ? total : cap; - for (k = 0; k < lim; ++k) { - if (out[k].file == f || (out[k].file && dw_streq(out[k].file, f))) { - dup = 1; - break; - } - } - } - if (dup) continue; - if (out && total < cap) { - out[total].pc = r->address; - out[total].file = f; - } - ++total; - } - } - if (n_out) *n_out = total; - return 0; -} diff --git a/src/dwarf/dwarf_loc.c b/src/dwarf/dwarf_loc.c @@ -1,380 +0,0 @@ -/* dwarf_loc.c — DWARF location-expression evaluator. - * - * Per doc/DWARF.md §4.4: small DWARF stack machine. Supports the ops the - * producer emits: DW_OP_reg0..31, regx, fbreg, addr, call_frame_cfa, plus - * arithmetic. DW_AT_frame_base = DW_OP_call_frame_cfa per §3.6 — the - * caller passes the CFA in via frame->cfa. - */ - -#include <cfree.h> -#include <stdint.h> -#include <string.h> - -#include "core/core.h" -#include "core/heap.h" -#include "dwarf/dwarf_internal.h" - -/* Tiny stack machine state. */ -typedef struct ExprMachine { - i64 stack[64]; - int sp; /* points to next free slot; top is stack[sp-1] */ - int reg_result; - u32 reg_num; /* if reg_result, holds the register number */ - int stack_value; /* DW_OP_stack_value seen */ -} ExprMachine; - -static int push(ExprMachine* m, i64 v) { - if (m->sp >= (int)(sizeof(m->stack) / sizeof(m->stack[0]))) return 0; - m->stack[m->sp++] = v; - return 1; -} -static int pop(ExprMachine* m, i64* v) { - if (m->sp == 0) return 0; - *v = m->stack[--m->sp]; - return 1; -} - -/* Evaluate either DW_AT_frame_base (when we encounter DW_OP_fbreg) or - * the inlined block; reuses the same machinery. Returns 0 on success. */ -static int eval_one(CfreeDebugInfo* d, const u8* expr, u32 len, - const u8* fb_expr, u32 fb_len, - const CfreeUnwindFrame* frame, ExprMachine* m, - int allow_fbreg) { - u32 off = 0; - while (off < len) { - u8 op = expr[off++]; - if (op >= DW_OP_lit0 && op <= DW_OP_lit0 + 31) { - if (!push(m, op - DW_OP_lit0)) return 1; - } else if (op >= DW_OP_reg0 && op <= DW_OP_reg0 + 31) { - m->reg_result = 1; - m->reg_num = op - DW_OP_reg0; - return 0; - } else if (op >= DW_OP_breg0 && op <= DW_OP_breg0 + 31) { - i64 ofs = dw_sleb(expr, len, &off); - u32 r = op - DW_OP_breg0; - i64 v = (r < 32) ? (i64)frame->regs[r] : 0; - if (!push(m, v + ofs)) return 1; - } else { - switch (op) { - case DW_OP_addr: - /* Address of a global. Address-size depends on CU; assume 8. */ - if (off + 8 > len) return 1; - { - u64 a = dw_u64(expr, len, &off); - if (!push(m, (i64)a)) return 1; - } - break; - case DW_OP_const1u: - if (off + 1 > len) return 1; - if (!push(m, expr[off++])) return 1; - break; - case DW_OP_const1s: - if (off + 1 > len) return 1; - if (!push(m, (i8)expr[off++])) return 1; - break; - case DW_OP_const2u: { - if (!push(m, dw_u16(expr, len, &off))) return 1; - } break; - case DW_OP_const2s: { - u16 v = dw_u16(expr, len, &off); - if (!push(m, (i16)v)) return 1; - } break; - case DW_OP_const4u: { - if (!push(m, dw_u32(expr, len, &off))) return 1; - } break; - case DW_OP_const4s: { - u32 v = dw_u32(expr, len, &off); - if (!push(m, (i32)v)) return 1; - } break; - case DW_OP_const8u: - case DW_OP_const8s: { - u64 v = dw_u64(expr, len, &off); - if (!push(m, (i64)v)) return 1; - } break; - case DW_OP_constu: { - u64 v = dw_uleb(expr, len, &off); - if (!push(m, (i64)v)) return 1; - } break; - case DW_OP_consts: { - i64 v = dw_sleb(expr, len, &off); - if (!push(m, v)) return 1; - } break; - case DW_OP_dup: { - i64 v; - if (m->sp == 0) return 1; - v = m->stack[m->sp - 1]; - if (!push(m, v)) return 1; - } break; - case DW_OP_drop: { - i64 v; - if (!pop(m, &v)) return 1; - } break; - case DW_OP_and: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, a & b)) return 1; - } break; - case DW_OP_minus: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, a - b)) return 1; - } break; - case DW_OP_mul: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, a * b)) return 1; - } break; - case DW_OP_or: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, a | b)) return 1; - } break; - case DW_OP_plus: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, a + b)) return 1; - } break; - case DW_OP_plus_uconst: { - u64 c = dw_uleb(expr, len, &off); - i64 a; - if (!pop(m, &a)) return 1; - if (!push(m, a + (i64)c)) return 1; - } break; - case DW_OP_shl: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, (i64)((u64)a << (b & 63)))) return 1; - } break; - case DW_OP_shr: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, (i64)((u64)a >> (b & 63)))) return 1; - } break; - case DW_OP_shra: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, a >> (b & 63))) return 1; - } break; - case DW_OP_xor: { - i64 a, b; - if (!pop(m, &b) || !pop(m, &a)) return 1; - if (!push(m, a ^ b)) return 1; - } break; - case DW_OP_regx: { - u64 r = dw_uleb(expr, len, &off); - m->reg_result = 1; - m->reg_num = (u32)r; - return 0; - } - case DW_OP_bregx: { - u64 r = dw_uleb(expr, len, &off); - i64 ofs = dw_sleb(expr, len, &off); - i64 v = (r < 32) ? (i64)frame->regs[r] : 0; - if (!push(m, v + ofs)) return 1; - } break; - case DW_OP_fbreg: { - i64 ofs = dw_sleb(expr, len, &off); - if (!allow_fbreg) return 1; - /* Evaluate frame_base expression to get the CFA-equivalent base. */ - { - ExprMachine fbm; - i64 base = 0; - int rc; - memset(&fbm, 0, sizeof(fbm)); - if (fb_expr && fb_len > 0) { - rc = eval_one(d, fb_expr, fb_len, NULL, 0, frame, &fbm, 0); - if (rc != 0) return rc; - if (fbm.sp > 0) - base = fbm.stack[fbm.sp - 1]; - else if (fbm.reg_result) { - /* Frame base lives in a register — value is reg contents. */ - base = (fbm.reg_num < 32) ? (i64)frame->regs[fbm.reg_num] : 0; - } - } else { - base = (i64)frame->cfa; - } - if (!push(m, base + ofs)) return 1; - } - } break; - case DW_OP_call_frame_cfa: { - if (!push(m, (i64)frame->cfa)) return 1; - } break; - case DW_OP_stack_value: - m->stack_value = 1; - return 0; - default: - /* Unsupported op — give up. */ - return 1; - } - } - } - return 0; -} - -/* DWARF 5 .debug_loclists entry tags. */ -#define DW_LLE_end_of_list 0x00 -#define DW_LLE_base_addressx 0x01 -#define DW_LLE_startx_endx 0x02 -#define DW_LLE_startx_length 0x03 -#define DW_LLE_offset_pair 0x04 -#define DW_LLE_default_location 0x05 -#define DW_LLE_base_address 0x06 -#define DW_LLE_start_end 0x07 -#define DW_LLE_start_length 0x08 - -/* Resolve a loclistx index to the active entry for `pc`. - * - * Per DWARF 5: DW_AT_loclists_base on the CU points at the offset_entries - * array within .debug_loclists. offset_entries[idx] is a 4-byte value (in - * 32-bit DWARF) giving the byte offset (relative to loclists_base) of the - * matching location list. Each list is a sequence of LLE entries - * terminated by DW_LLE_end_of_list. We recognize at minimum: - * DW_LLE_offset_pair (relative to base address) - * DW_LLE_start_length (absolute) - * DW_LLE_start_end (absolute) - * DW_LLE_default_location - * DW_LLE_base_address (sets the base for offset_pair) - * DW_LLE_base_addressx / DW_LLE_startx_* — degraded (skipped; need - * .debug_addr resolution we don't yet model). - */ -int dw_loclist_resolve(CfreeDebugInfo* d, const DwCu* cu, u64 idx, u64 pc, - const u8** bytes_out, u32* len_out) { - u32 base; - u32 entry_off; - u32 list_off; - u64 base_addr = 0; - if (!d || !cu) return 0; - if (d->loclists.sec_idx == UINT32_MAX || d->loclists.size == 0) return 0; - base = cu->loclists_base; - /* DW_AT_loclists_base points to the start of the offset_entries table - * for the CU (i.e. just past the header). offset_entries[i] is a - * 4-byte (32-bit DWARF) value, the byte offset (relative to base) of - * the matching location list. */ - entry_off = base + (u32)idx * 4u; - if (entry_off + 4 > d->loclists.size) return 0; - { - u32 t = entry_off; - list_off = dw_u32(d->loclists.data, d->loclists.size, &t); - } - /* The entry value is an offset relative to `base`. */ - list_off += base; - if (list_off >= d->loclists.size) return 0; - /* Walk the list. */ - { - u32 off = list_off; - while (off < d->loclists.size) { - u8 lle = dw_u8(d->loclists.data, d->loclists.size, &off); - switch (lle) { - case DW_LLE_end_of_list: - return 0; - case DW_LLE_base_address: { - if (cu->address_size == 8) - base_addr = dw_u64(d->loclists.data, d->loclists.size, &off); - else - base_addr = dw_u32(d->loclists.data, d->loclists.size, &off); - } break; - case DW_LLE_offset_pair: { - u64 lo = dw_uleb(d->loclists.data, d->loclists.size, &off); - u64 hi = dw_uleb(d->loclists.data, d->loclists.size, &off); - u32 elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); - const u8* eb = d->loclists.data + off; - off += elen; - if (pc >= base_addr + lo && pc < base_addr + hi) { - *bytes_out = eb; - *len_out = elen; - return 1; - } - } break; - case DW_LLE_start_end: { - u64 lo, hi; - u32 elen; - const u8* eb; - if (cu->address_size == 8) { - lo = dw_u64(d->loclists.data, d->loclists.size, &off); - hi = dw_u64(d->loclists.data, d->loclists.size, &off); - } else { - lo = dw_u32(d->loclists.data, d->loclists.size, &off); - hi = dw_u32(d->loclists.data, d->loclists.size, &off); - } - elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); - eb = d->loclists.data + off; - off += elen; - if (pc >= lo && pc < hi) { - *bytes_out = eb; - *len_out = elen; - return 1; - } - } break; - case DW_LLE_start_length: { - u64 lo, length; - u32 elen; - const u8* eb; - if (cu->address_size == 8) - lo = dw_u64(d->loclists.data, d->loclists.size, &off); - else - lo = dw_u32(d->loclists.data, d->loclists.size, &off); - length = dw_uleb(d->loclists.data, d->loclists.size, &off); - elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); - eb = d->loclists.data + off; - off += elen; - if (pc >= lo && pc < lo + length) { - *bytes_out = eb; - *len_out = elen; - return 1; - } - } break; - case DW_LLE_default_location: { - u32 elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); - const u8* eb = d->loclists.data + off; - off += elen; - *bytes_out = eb; - *len_out = elen; - return 1; - } - case DW_LLE_base_addressx: { - (void)dw_uleb(d->loclists.data, d->loclists.size, &off); - /* unsupported: needs .debug_addr indirection */ - } break; - case DW_LLE_startx_endx: - case DW_LLE_startx_length: { - (void)dw_uleb(d->loclists.data, d->loclists.size, &off); - (void)dw_uleb(d->loclists.data, d->loclists.size, &off); - { - u32 elen = (u32)dw_uleb(d->loclists.data, d->loclists.size, &off); - off += elen; - } - } break; - default: - /* Unknown LLE — stop. */ - return 0; - } - } - } - return 0; -} - -int dw_eval_expr(CfreeDebugInfo* d, const u8* expr, u32 len, const u8* fb_expr, - u32 fb_len, const CfreeUnwindFrame* frame, DwExprResult* out) { - ExprMachine m; - int rc; - memset(&m, 0, sizeof(m)); - out->kind = 3; - out->value = 0; - if (!expr || len == 0 || !frame) return 1; - rc = eval_one(d, expr, len, fb_expr, fb_len, frame, &m, 1); - if (rc != 0) return rc; - if (m.reg_result) { - out->kind = 2; - out->value = m.reg_num; - return 0; - } - if (m.sp == 0) return 1; - if (m.stack_value) { - out->kind = 1; - out->value = (u64)m.stack[m.sp - 1]; - return 0; - } - out->kind = 0; - out->value = (u64)m.stack[m.sp - 1]; - return 0; -} diff --git a/src/dwarf/dwarf_open.c b/src/dwarf/dwarf_open.c @@ -1,750 +0,0 @@ -/* dwarf_open.c — open/close, section lookup, primitives, abbrev cache. - * - * Per doc/DWARF.md §4.1: read .debug_abbrev / .debug_info / .debug_line / - * .debug_str / .debug_line_str by section name from the CfreeObjFile. - * Return NULL if any of those mandatory five are missing. - */ - -#include <cfree.h> -#include <stdint.h> -#include <string.h> - -#include "core/core.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/util.h" -#include "core/vec.h" -#include "dwarf/dwarf_internal.h" - -/* ---- section lookup --------------------------------------------------- */ - -void dw_find_section(CfreeDebugInfo* d, const char* name, DwSection* out) { - uint32_t i, n; - out->data = NULL; - out->size = 0; - out->sec_idx = UINT32_MAX; - if (!d->obj) return; - n = cfree_obj_nsections(d->obj); - for (i = 0; i < n; ++i) { - CfreeObjSecInfo info = cfree_obj_section(d->obj, i); - if (info.name && dw_streq(info.name, name)) { - size_t len = 0; - const uint8_t* p = cfree_obj_section_data(d->obj, i, &len); - out->data = p; - out->size = (u32)len; - out->sec_idx = i; - return; - } - } -} - -/* ---- byte-stream primitives ------------------------------------------- */ - -/* On EOF we return zero / empty. The decoder will detect malformed input - * via length checks elsewhere; for the consumer we just want to not - * crash on truncated bytes. */ - -u8 dw_u8(const u8* base, u32 size, u32* off) { - if (*off >= size) return 0; - return base[(*off)++]; -} -u16 dw_u16(const u8* base, u32 size, u32* off) { - u16 v; - if (*off + 2 > size) { - *off = size; - return 0; - } - v = (u16)base[*off] | ((u16)base[*off + 1] << 8); - *off += 2; - return v; -} -u32 dw_u24(const u8* base, u32 size, u32* off) { - u32 v; - if (*off + 3 > size) { - *off = size; - return 0; - } - v = (u32)base[*off] | ((u32)base[*off + 1] << 8) | - ((u32)base[*off + 2] << 16); - *off += 3; - return v; -} -u32 dw_u32(const u8* base, u32 size, u32* off) { - u32 v; - if (*off + 4 > size) { - *off = size; - return 0; - } - v = (u32)base[*off] | ((u32)base[*off + 1] << 8) | - ((u32)base[*off + 2] << 16) | ((u32)base[*off + 3] << 24); - *off += 4; - return v; -} -u64 dw_u64(const u8* base, u32 size, u32* off) { - u64 v; - if (*off + 8 > size) { - *off = size; - return 0; - } - v = (u64)base[*off] | ((u64)base[*off + 1] << 8) | - ((u64)base[*off + 2] << 16) | ((u64)base[*off + 3] << 24) | - ((u64)base[*off + 4] << 32) | ((u64)base[*off + 5] << 40) | - ((u64)base[*off + 6] << 48) | ((u64)base[*off + 7] << 56); - *off += 8; - return v; -} -u64 dw_uleb(const u8* base, u32 size, u32* off) { - u64 v = 0; - int shift = 0; - while (*off < size) { - u8 b = base[(*off)++]; - v |= ((u64)(b & 0x7f)) << shift; - if (!(b & 0x80)) break; - shift += 7; - if (shift > 63) break; - } - return v; -} -i64 dw_sleb(const u8* base, u32 size, u32* off) { - i64 v = 0; - int shift = 0; - u8 b = 0; - while (*off < size) { - b = base[(*off)++]; - v |= ((i64)(b & 0x7f)) << shift; - shift += 7; - if (!(b & 0x80)) break; - if (shift > 63) break; - } - if (shift < 64 && (b & 0x40)) { - v |= -((i64)1 << shift); - } - return v; -} -const char* dw_cstr(const u8* base, u32 size, u32* off) { - const char* s = (const char*)base + *off; - while (*off < size && base[*off] != 0) (*off)++; - if (*off < size) (*off)++; /* consume terminator */ - return s; -} - -/* ---- string interning ------------------------------------------------- */ - -const char* dw_intern(CfreeDebugInfo* d, const char* s, size_t len) { - Sym sym = pool_intern(d->c->global, s, len); - return pool_str(d->c->global, sym, NULL); -} - -/* Resolve a .debug_str offset. */ -const char* dw_str(CfreeDebugInfo* d, u32 offset) { - if (offset >= d->str.size) return ""; - return (const char*)(d->str.data + offset); -} - -/* Resolve a .debug_line_str offset. */ -const char* dw_line_str(CfreeDebugInfo* d, u32 offset) { - if (offset >= d->line_str.size) return ""; - return (const char*)(d->line_str.data + offset); -} - -/* Resolve a strx index via .debug_str_offsets + cu->str_offsets_base. */ -const char* dw_strx(CfreeDebugInfo* d, const DwCu* cu, u64 idx) { - /* DW5 .debug_str_offsets has a header per contribution: - * unit_length (4 or 12), version (2), padding (2), then entries. - * cu->str_offsets_base points past the header to the first entry. - * If the base attribute is absent we fall back to base=0+8 (assume 32-bit - * header at start). */ - u32 base = cu->str_offsets_base; - u32 ent_size = 4; - u32 entry_off = base + (u32)idx * ent_size; - u32 str_off; - if (entry_off + ent_size > d->str_offsets.size) return ""; - { - u32 tmp = entry_off; - str_off = dw_u32(d->str_offsets.data, d->str_offsets.size, &tmp); - } - return dw_str(d, str_off); -} - -/* ---- abbrev parsing --------------------------------------------------- */ - -static void abbrev_parse_table(CfreeDebugInfo* d, u32 offset, - DwAbbrevTable* t) { - u32 off = offset; - t->cu_abbrev_offset = offset; - t->abbrevs = NULL; - t->nabbrevs = 0; - t->cap = 0; - for (;;) { - u64 code; - DwAbbrev a; - DwAbbrevAttr* attrs = NULL; - u32 nattrs = 0, attrs_cap = 0; - if (off >= d->abbrev.size) break; - code = dw_uleb(d->abbrev.data, d->abbrev.size, &off); - if (code == 0) break; /* end-of-table marker */ - a.code = code; - a.tag = (u32)dw_uleb(d->abbrev.data, d->abbrev.size, &off); - a.has_children = dw_u8(d->abbrev.data, d->abbrev.size, &off); - a.attrs = NULL; - a.nattrs = 0; - /* Read (attr, form) pairs until (0,0). */ - for (;;) { - u32 at = (u32)dw_uleb(d->abbrev.data, d->abbrev.size, &off); - u32 fm = (u32)dw_uleb(d->abbrev.data, d->abbrev.size, &off); - i64 ic = 0; - if (at == 0 && fm == 0) break; - if (fm == DW_FORM_implicit_const) { - ic = dw_sleb(d->abbrev.data, d->abbrev.size, &off); - } - if (nattrs == attrs_cap) { - u32 ncap = attrs_cap ? attrs_cap * 2 : 4; - DwAbbrevAttr* na = (DwAbbrevAttr*)d->h->realloc( - d->h, attrs, attrs_cap * sizeof(*attrs), ncap * sizeof(*attrs), - _Alignof(DwAbbrevAttr)); - if (!na) { - if (attrs) d->h->free(d->h, attrs, attrs_cap * sizeof(*attrs)); - attrs = NULL; - attrs_cap = 0; - nattrs = 0; - break; - } - attrs = na; - attrs_cap = ncap; - } - attrs[nattrs].attr = at; - attrs[nattrs].form = fm; - attrs[nattrs].implicit_const = ic; - nattrs++; - } - a.attrs = attrs; - a.nattrs = nattrs; - if (t->nabbrevs == t->cap) { - u32 ncap = t->cap ? t->cap * 2 : 8; - DwAbbrev* na = (DwAbbrev*)d->h->realloc( - d->h, t->abbrevs, t->cap * sizeof(*t->abbrevs), - ncap * sizeof(*t->abbrevs), _Alignof(DwAbbrev)); - if (!na) break; - t->abbrevs = na; - t->cap = ncap; - } - t->abbrevs[t->nabbrevs++] = a; - } -} - -DwAbbrevTable* dw_abbrev_get(CfreeDebugInfo* d, u32 offset) { - u32 i; - DwAbbrevTable* t; - for (i = 0; i < d->nabbrevs; ++i) { - if (d->abbrevs[i].cu_abbrev_offset == offset) return &d->abbrevs[i]; - } - if (d->nabbrevs == d->abbrevs_cap) { - u32 ncap = d->abbrevs_cap ? d->abbrevs_cap * 2 : 4; - DwAbbrevTable* na = (DwAbbrevTable*)d->h->realloc( - d->h, d->abbrevs, d->abbrevs_cap * sizeof(*d->abbrevs), - ncap * sizeof(*d->abbrevs), _Alignof(DwAbbrevTable)); - if (!na) return NULL; - d->abbrevs = na; - d->abbrevs_cap = ncap; - } - t = &d->abbrevs[d->nabbrevs++]; - abbrev_parse_table(d, offset, t); - return t; -} - -DwAbbrev* dw_abbrev_lookup(DwAbbrevTable* t, u64 code) { - u32 i; - if (!t) return NULL; - for (i = 0; i < t->nabbrevs; ++i) { - if (t->abbrevs[i].code == code) return &t->abbrevs[i]; - } - return NULL; -} - -/* ---- CU header parsing ----------------------------------------------- */ - -u32 dw_cu_parse_header(CfreeDebugInfo* d, u32 off, DwCu* cu) { - u32 start = off; - u32 unit_length; - u32 hdr_after_len_off; - cu->hdr_offset = start; - cu->is_64bit = 0; - unit_length = dw_u32(d->info.data, d->info.size, &off); - if (unit_length == 0xffffffffu) { - /* DWARF64 — initial length followed by 8-byte length. We don't - * fully support DWARF64 ourselves, but skip the unit. */ - cu->is_64bit = 1; - cu->hdr_length = 0; - cu->unit_total_size = 0; - /* Skip past CU. */ - { - u64 ulen = dw_u64(d->info.data, d->info.size, &off); - cu->unit_total_size = 12 + (u32)ulen; - } - return start + cu->unit_total_size; - } - cu->hdr_length = unit_length; - cu->unit_total_size = 4 + unit_length; - hdr_after_len_off = off; /* points just past unit_length */ - cu->version = (u8)dw_u16(d->info.data, d->info.size, &off); - if (cu->version >= 5) { - cu->unit_type = dw_u8(d->info.data, d->info.size, &off); - cu->address_size = dw_u8(d->info.data, d->info.size, &off); - cu->abbrev_offset = dw_u32(d->info.data, d->info.size, &off); - } else { - /* DW4 layout: abbrev_offset, address_size. */ - cu->unit_type = 0; - cu->abbrev_offset = dw_u32(d->info.data, d->info.size, &off); - cu->address_size = dw_u8(d->info.data, d->info.size, &off); - } - cu->die_start_off = off; - cu->str_offsets_base = 0; - cu->addr_base = 0; - cu->loclists_base = 0; - cu->rnglists_base = 0; - cu->stmt_list = 0; - cu->has_stmt_list = 0; - cu->comp_dir = ""; - cu->name = ""; - /* Resolve abbrev table now (cheap & idempotent). */ - { - DwAbbrevTable* t = dw_abbrev_get(d, cu->abbrev_offset); - cu->abbrev_table_idx = (u32)(t ? (t - d->abbrevs) : 0); - } - (void)hdr_after_len_off; - return start + cu->unit_total_size; -} - -/* Read the CU root DIE to capture base attributes (str_offsets_base, - * addr_base, stmt_list, name, comp_dir). Restores no state — leaves the - * CU in its parsed-header form. */ -static void cu_read_root_attrs(CfreeDebugInfo* d, DwCu* cu) { - u32 off = cu->die_start_off; - u64 code; - DwAbbrev* ab; - DwAttrValue v; - u32 i; - DwAbbrevTable* t = &d->abbrevs[cu->abbrev_table_idx]; - if (off >= d->info.size) return; - code = dw_uleb(d->info.data, d->info.size, &off); - if (code == 0) return; - ab = dw_abbrev_lookup(t, code); - if (!ab) return; - /* First pass: pull str_offsets_base if present (so subsequent strx - * resolutions work). */ - for (i = 0; i < ab->nattrs; ++i) { - DwAbbrevAttr* aa = &ab->attrs[i]; - if (aa->attr == DW_AT_str_offsets_base) { - u32 tmp = off; - /* Skip preceding attrs to locate this attr's payload — easier - * to do a full pass and remember offsets. We re-scan instead. */ - (void)tmp; - break; - } - } - /* Two-pass scan: do skipping reads, but capture base attrs. We must - * be careful: dw_read_form for strx forms uses cu->str_offsets_base, - * so we read in two passes. */ - off = cu->die_start_off; - (void)dw_uleb(d->info.data, d->info.size, &off); /* re-skip code */ - /* Pass 1: only read str_offsets_base / addr_base (forms that don't - * themselves need those bases). */ - for (i = 0; i < ab->nattrs; ++i) { - DwAbbrevAttr* aa = &ab->attrs[i]; - if (aa->attr == DW_AT_str_offsets_base || aa->attr == DW_AT_addr_base || - aa->attr == DW_AT_loclists_base || aa->attr == DW_AT_rnglists_base) { - dw_read_form(d, cu, aa->form, aa->implicit_const, &off, &v); - if (aa->attr == DW_AT_str_offsets_base) - cu->str_offsets_base = (u32)v.u; - else if (aa->attr == DW_AT_addr_base) - cu->addr_base = (u32)v.u; - else if (aa->attr == DW_AT_loclists_base) - cu->loclists_base = (u32)v.u; - else if (aa->attr == DW_AT_rnglists_base) - cu->rnglists_base = (u32)v.u; - } else { - dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); - } - } - /* Pass 2: read remaining attrs (stmt_list, name, comp_dir). */ - off = cu->die_start_off; - (void)dw_uleb(d->info.data, d->info.size, &off); - for (i = 0; i < ab->nattrs; ++i) { - DwAbbrevAttr* aa = &ab->attrs[i]; - if (aa->attr == DW_AT_stmt_list) { - dw_read_form(d, cu, aa->form, aa->implicit_const, &off, &v); - cu->stmt_list = (u32)v.u; - cu->has_stmt_list = 1; - } else if (aa->attr == DW_AT_name) { - dw_read_form(d, cu, aa->form, aa->implicit_const, &off, &v); - cu->name = v.str ? v.str : ""; - } else if (aa->attr == DW_AT_comp_dir) { - dw_read_form(d, cu, aa->form, aa->implicit_const, &off, &v); - cu->comp_dir = v.str ? v.str : ""; - } else { - dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); - } - } -} - -void dw_parse_all_cus(CfreeDebugInfo* d) { - u32 off = 0; - while (off < d->info.size) { - DwCu cu; - u32 next = dw_cu_parse_header(d, off, &cu); - if (next <= off) break; - if (cu.is_64bit) { - off = next; - continue; - } - if (cu.version < 2 || cu.version > 5) { - off = next; - continue; - } - if (d->ncus == d->cus_cap) { - u32 ncap = d->cus_cap ? d->cus_cap * 2 : 4; - DwCu* na = - (DwCu*)d->h->realloc(d->h, d->cus, d->cus_cap * sizeof(*d->cus), - ncap * sizeof(*d->cus), _Alignof(DwCu)); - if (!na) break; - d->cus = na; - d->cus_cap = ncap; - } - d->cus[d->ncus++] = cu; - /* Capture root attrs now. */ - cu_read_root_attrs(d, &d->cus[d->ncus - 1]); - off = next; - } -} - -DwCu* dw_cu_at_die_offset(CfreeDebugInfo* d, u32 die_offset) { - u32 i; - for (i = 0; i < d->ncus; ++i) { - DwCu* cu = &d->cus[i]; - if (die_offset >= cu->hdr_offset && - die_offset < cu->hdr_offset + cu->unit_total_size) { - return cu; - } - } - return NULL; -} - -/* ---- form decoding ---------------------------------------------------- */ - -void dw_read_form(CfreeDebugInfo* d, const DwCu* cu, u32 form, - i64 implicit_const, u32* off, DwAttrValue* out) { - out->form = form; - out->u = 0; - out->s = 0; - out->str = ""; - out->block = NULL; - out->block_len = 0; - switch (form) { - case DW_FORM_addr: - if (cu->address_size == 8) - out->u = dw_u64(d->info.data, d->info.size, off); - else - out->u = dw_u32(d->info.data, d->info.size, off); - break; - case DW_FORM_data1: - case DW_FORM_ref1: - case DW_FORM_flag: - case DW_FORM_strx1: - case DW_FORM_addrx1: - out->u = dw_u8(d->info.data, d->info.size, off); - out->s = (i64)(i8)out->u; - if (form == DW_FORM_strx1) out->str = dw_strx(d, cu, out->u); - break; - case DW_FORM_data2: - case DW_FORM_ref2: - case DW_FORM_strx2: - case DW_FORM_addrx2: - out->u = dw_u16(d->info.data, d->info.size, off); - out->s = (i64)(i16)out->u; - if (form == DW_FORM_strx2) out->str = dw_strx(d, cu, out->u); - break; - case DW_FORM_strx3: - case DW_FORM_addrx3: - out->u = dw_u24(d->info.data, d->info.size, off); - if (form == DW_FORM_strx3) out->str = dw_strx(d, cu, out->u); - break; - case DW_FORM_data4: - case DW_FORM_ref4: - case DW_FORM_strx4: - case DW_FORM_addrx4: - out->u = dw_u32(d->info.data, d->info.size, off); - out->s = (i64)(i32)out->u; - if (form == DW_FORM_strx4) out->str = dw_strx(d, cu, out->u); - break; - case DW_FORM_data8: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - case DW_FORM_ref_sup8: - out->u = dw_u64(d->info.data, d->info.size, off); - out->s = (i64)out->u; - break; - case DW_FORM_data16: - /* Skip 16 bytes; not commonly needed. */ - *off += 16; - break; - case DW_FORM_sdata: - out->s = dw_sleb(d->info.data, d->info.size, off); - out->u = (u64)out->s; - break; - case DW_FORM_udata: - case DW_FORM_ref_udata: - case DW_FORM_strx: - case DW_FORM_addrx: - case DW_FORM_loclistx: - case DW_FORM_rnglistx: - out->u = dw_uleb(d->info.data, d->info.size, off); - if (form == DW_FORM_strx) out->str = dw_strx(d, cu, out->u); - break; - case DW_FORM_string: - out->str = dw_cstr(d->info.data, d->info.size, off); - break; - case DW_FORM_strp: - out->u = dw_u32(d->info.data, d->info.size, off); - out->str = dw_str(d, (u32)out->u); - break; - case DW_FORM_line_strp: - out->u = dw_u32(d->info.data, d->info.size, off); - out->str = dw_line_str(d, (u32)out->u); - break; - case DW_FORM_strp_sup: - case DW_FORM_ref_sup4: - out->u = dw_u32(d->info.data, d->info.size, off); - break; - case DW_FORM_sec_offset: - out->u = dw_u32(d->info.data, d->info.size, off); - break; - case DW_FORM_ref_addr: - /* DWARF 5: 4 bytes for 32-bit DWARF (we don't support DWARF64). */ - out->u = dw_u32(d->info.data, d->info.size, off); - break; - case DW_FORM_flag_present: - out->u = 1; - break; - case DW_FORM_implicit_const: - out->s = implicit_const; - out->u = (u64)implicit_const; - break; - case DW_FORM_block1: { - u32 n = dw_u8(d->info.data, d->info.size, off); - out->block = d->info.data + *off; - out->block_len = n; - out->u = n; - *off += n; - } break; - case DW_FORM_block2: { - u32 n = dw_u16(d->info.data, d->info.size, off); - out->block = d->info.data + *off; - out->block_len = n; - out->u = n; - *off += n; - } break; - case DW_FORM_block4: { - u32 n = dw_u32(d->info.data, d->info.size, off); - out->block = d->info.data + *off; - out->block_len = n; - out->u = n; - *off += n; - } break; - case DW_FORM_block: - case DW_FORM_exprloc: { - u32 n = (u32)dw_uleb(d->info.data, d->info.size, off); - out->block = d->info.data + *off; - out->block_len = n; - out->u = n; - *off += n; - } break; - case DW_FORM_indirect: { - u32 ifrm = (u32)dw_uleb(d->info.data, d->info.size, off); - dw_read_form(d, cu, ifrm, 0, off, out); - } break; - default: - /* Unknown form — best effort: skip nothing. */ - break; - } -} - -void dw_skip_form(CfreeDebugInfo* d, const DwCu* cu, u32 form, - i64 implicit_const, u32* off) { - DwAttrValue tmp; - dw_read_form(d, cu, form, implicit_const, off, &tmp); -} - -/* ---- DIE iteration ---------------------------------------------------- */ - -int dw_read_die(CfreeDebugInfo* d, const DwCu* cu, u32* off, DwDie* out) { - u64 code; - out->die_off = *off; - if (*off >= d->info.size || *off >= cu->hdr_offset + cu->unit_total_size) { - out->abbrev_code = 0; - out->abbrev = NULL; - out->attrs_off = *off; - return 0; - } - code = dw_uleb(d->info.data, d->info.size, off); - out->abbrev_code = code; - out->attrs_off = *off; - out->next_sibling_off = 0; - if (code == 0) { - out->abbrev = NULL; - return 0; - } - out->abbrev = dw_abbrev_lookup(&d->abbrevs[cu->abbrev_table_idx], code); - return 1; -} - -void dw_skip_die_attrs(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, - u32* off) { - u32 i; - if (!die->abbrev) return; - for (i = 0; i < die->abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die->abbrev->attrs[i]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, off); - } -} - -void dw_skip_die_subtree(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, - u32* off) { - if (!die->abbrev) return; - dw_skip_die_attrs(d, cu, die, off); - if (die->abbrev->has_children) { - for (;;) { - DwDie child; - if (!dw_read_die(d, cu, off, &child)) break; - dw_skip_die_subtree(d, cu, &child, off); - } - } -} - -int dw_die_attr(CfreeDebugInfo* d, const DwCu* cu, DwDie* die, u32 attr, - DwAttrValue* out) { - u32 off = die->attrs_off; - u32 i; - if (!die->abbrev) return 0; - for (i = 0; i < die->abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die->abbrev->attrs[i]; - if (aa->attr == attr) { - dw_read_form(d, cu, aa->form, aa->implicit_const, &off, out); - return 1; - } - dw_skip_form(d, cu, aa->form, aa->implicit_const, &off); - } - return 0; -} - -/* ---- public open/close ----------------------------------------------- */ - -CfreeDebugInfo* cfree_dwarf_open(CfreeCompiler* c, const CfreeObjFile* obj) { - Heap* h; - CfreeDebugInfo* d; - if (!c || !obj) return NULL; - h = (Heap*)c->env->heap; - d = (CfreeDebugInfo*)h->alloc(h, sizeof(*d), _Alignof(CfreeDebugInfo)); - if (!d) return NULL; - memset(d, 0, sizeof(*d)); - d->c = c; - d->h = h; - d->obj = obj; - - dw_find_section(d, ".debug_abbrev", &d->abbrev); - dw_find_section(d, ".debug_info", &d->info); - dw_find_section(d, ".debug_line", &d->line); - dw_find_section(d, ".debug_str", &d->str); - dw_find_section(d, ".debug_line_str", &d->line_str); - dw_find_section(d, ".debug_str_offsets", &d->str_offsets); - dw_find_section(d, ".debug_addr", &d->addr); - dw_find_section(d, ".debug_loclists", &d->loclists); - dw_find_section(d, ".debug_rnglists", &d->rnglists); - dw_find_section(d, ".eh_frame", &d->eh_frame); - dw_find_section(d, ".debug_aranges", &d->aranges); - - if (d->abbrev.sec_idx == UINT32_MAX || d->info.sec_idx == UINT32_MAX || - d->line.sec_idx == UINT32_MAX || d->str.sec_idx == UINT32_MAX || - d->line_str.sec_idx == UINT32_MAX) { - cfree_dwarf_close(d); - return NULL; - } - - /* str_offsets_base default: in the absence of DW_AT_str_offsets_base, the - * offsets section starts with an 8-byte header (uniform for DW5). */ - dw_parse_all_cus(d); - if (d->ncus == 0) { - cfree_dwarf_close(d); - return NULL; - } - - /* Allocate per-CU lazy line-program state. */ - if (d->ncus) { - d->lines_by_cu = (DwLineProgram*)h->alloc( - h, d->ncus * sizeof(DwLineProgram), _Alignof(DwLineProgram)); - d->lines_built = (u8*)h->alloc(h, d->ncus, 1); - if (!d->lines_by_cu || !d->lines_built) { - cfree_dwarf_close(d); - return NULL; - } - memset(d->lines_by_cu, 0, d->ncus * sizeof(DwLineProgram)); - memset(d->lines_built, 0, d->ncus); - } - - return d; -} - -static void free_subprog(Heap* h, DwSubprog* sp) { - if (sp->params) h->free(h, sp->params, sp->nparams * sizeof(DwLocal)); - if (sp->locals) h->free(h, sp->locals, sp->nlocals * sizeof(DwLocal)); -} - -void cfree_dwarf_close(CfreeDebugInfo* d) { - Heap* h; - u32 i; - if (!d) return; - h = d->h; - for (i = 0; i < d->nabbrevs; ++i) { - u32 j; - DwAbbrevTable* t = &d->abbrevs[i]; - for (j = 0; j < t->nabbrevs; ++j) { - if (t->abbrevs[j].attrs) - h->free(h, t->abbrevs[j].attrs, - t->abbrevs[j].nattrs * sizeof(DwAbbrevAttr)); - } - if (t->abbrevs) h->free(h, t->abbrevs, t->cap * sizeof(DwAbbrev)); - } - if (d->abbrevs) - h->free(h, d->abbrevs, d->abbrevs_cap * sizeof(DwAbbrevTable)); - if (d->cus) h->free(h, d->cus, d->cus_cap * sizeof(DwCu)); - - if (d->lines_by_cu) { - for (i = 0; i < d->ncus; ++i) { - DwLineProgram* lp = &d->lines_by_cu[i]; - if (lp->rows) h->free(h, lp->rows, lp->cap * sizeof(DwLineRow)); - if (lp->files) h->free(h, lp->files, lp->nfiles * sizeof(DwLineFile)); - if (lp->dirs) h->free(h, lp->dirs, lp->ndirs * sizeof(const char*)); - if (lp->file_norm) - h->free(h, lp->file_norm, lp->nfile_norm * sizeof(const char*)); - } - h->free(h, d->lines_by_cu, d->ncus * sizeof(DwLineProgram)); - } - if (d->lines_built) h->free(h, d->lines_built, d->ncus); - - for (i = 0; i < d->nsubs; ++i) free_subprog(h, &d->subs[i]); - if (d->subs) h->free(h, d->subs, d->subs_cap * sizeof(DwSubprog)); - - for (i = 0; i < d->ntypes; ++i) { - CfreeDwarfType* t = d->types_by_off[i]; - if (!t) continue; - if (t->fields) h->free(h, t->fields, t->nfields * sizeof(DwField)); - if (t->evals) h->free(h, t->evals, t->nevals * sizeof(DwEnumVal)); - h->free(h, t, sizeof(*t)); - } - if (d->types_by_off) - h->free(h, d->types_by_off, d->types_cap * sizeof(CfreeDwarfType*)); - if (d->types_off) h->free(h, d->types_off, d->types_cap * sizeof(u32)); - - if (d->globals) h->free(h, d->globals, d->globals_cap * sizeof(DwLocal)); - - h->free(h, d, sizeof(*d)); -} diff --git a/src/dwarf/dwarf_query.c b/src/dwarf/dwarf_query.c @@ -1,370 +0,0 @@ -/* dwarf_query.c — public cfree_dwarf_* query entry points. - * - * Implements the consumer half of doc/DWARF.md: - * subprogram_at / func_at, var_at, vars_at_*, param_iter_*, loc_read. - */ - -#include <cfree.h> -#include <stddef.h> -#include <stdint.h> -#include <string.h> - -#include "core/core.h" -#include "core/heap.h" -#include "dwarf/dwarf_internal.h" - -int cfree_dwarf_subprogram_at(CfreeDebugInfo* d, uint64_t pc, - CfreeDwarfSubprogram* out) { - DwSubprog* sp; - if (!d || !out) return 1; - memset(out, 0, sizeof(*out)); - sp = dw_find_subprog(d, pc); - if (!sp) return 1; - out->name = sp->name ? sp->name : ""; - out->low_pc = sp->low_pc; - out->high_pc = sp->high_pc; - out->decl_file = sp->decl_file ? sp->decl_file : ""; - out->decl_line = sp->decl_line; - out->inlined = sp->inlined; - return 0; -} - -int cfree_dwarf_func_at(CfreeDebugInfo* d, uint64_t pc, const char** name_out, - uint64_t* low_out, uint64_t* high_out) { - CfreeDwarfSubprogram sp; - if (cfree_dwarf_subprogram_at(d, pc, &sp) != 0) return 1; - if (name_out) *name_out = sp.name; - if (low_out) *low_out = sp.low_pc; - if (high_out) *high_out = sp.high_pc; - return 0; -} - -/* ---- variable resolution -------------------------------------------- */ - -static void fill_varloc(CfreeDebugInfo* d, u32 cu_idx, const DwLocal* v, u64 pc, - CfreeDwarfVarLoc* out) { - const u8* lbytes = v->loc; - u32 llen = v->loc_len; - memset(out, 0, sizeof(*out)); - out->kind = CFREE_DLOC_EXPR; - out->byte_size = 0; - out->type = NULL; - if (v->type_die_offset) { - out->type = dw_type_from_die(d, cu_idx, v->type_die_offset); - if (out->type) out->byte_size = out->type->byte_size; - } - /* If the variable was emitted with a loclistx, resolve it now. The - * resolved bytes get the same single-op fast-path treatment below. */ - if (v->has_loclist && cu_idx < d->ncus) { - const u8* lb = NULL; - u32 ll = 0; - if (dw_loclist_resolve(d, &d->cus[cu_idx], v->loclist_index, pc, &lb, - &ll)) { - lbytes = lb; - llen = ll; - } else { - /* No active entry for this PC — variable is currently unavailable. */ - out->kind = CFREE_DLOC_EXPR; - out->v.expr.bytes = NULL; - out->v.expr.len = 0; - return; - } - } - /* Inspect the loc bytes — if it's a single op of a recognized form, - * we expose the structured kind so callers can fast-path. Otherwise - * we surface the raw bytes as EXPR. */ - if (lbytes && llen > 0) { - const u8* e = lbytes; - if (llen == 1 && e[0] >= DW_OP_reg0 && e[0] <= DW_OP_reg0 + 31) { - out->kind = CFREE_DLOC_REG; - out->v.reg = e[0] - DW_OP_reg0; - return; - } - if (e[0] == DW_OP_regx) { - u32 off = 1; - u64 r = dw_uleb(e, llen, &off); - if (off == llen) { - out->kind = CFREE_DLOC_REG; - out->v.reg = (u32)r; - return; - } - } - if (e[0] == DW_OP_fbreg) { - u32 off = 1; - i64 ofs = dw_sleb(e, llen, &off); - if (off == llen) { - out->kind = CFREE_DLOC_FRAME_OFS; - out->v.frame_ofs = (i32)ofs; - return; - } - } - if (e[0] == DW_OP_addr && llen == 9) { - u32 off = 1; - out->kind = CFREE_DLOC_GLOBAL; - out->v.global = dw_u64(e, llen, &off); - return; - } - /* Fallback: opaque expression bytes. */ - out->kind = CFREE_DLOC_EXPR; - out->v.expr.bytes = lbytes; - out->v.expr.len = llen; - return; - } - /* No location at all — leave kind=EXPR with NULL/0. */ - out->kind = CFREE_DLOC_EXPR; - out->v.expr.bytes = NULL; - out->v.expr.len = 0; -} - -int cfree_dwarf_var_at(CfreeDebugInfo* d, uint64_t pc, const char* name, - CfreeDwarfVarLoc* out) { - /* Return codes: - * 0 — found; *out filled. - * 1 — invalid args, or `pc` lies inside a known subprogram but no - * variable named `name` is visible there (the user typo case). - * 2 — `pc` is not covered by any subprogram (no debug info for this - * frame). REPL: "no debug info for this frame". Globals are - * still consulted before returning 2 so a name lookup against a - * global from a -g-less frame still resolves. */ - DwSubprog* sp; - u32 i; - if (!d || !name || !out) return 1; - memset(out, 0, sizeof(*out)); - sp = dw_find_subprog(d, pc); - if (sp) { - dw_build_locals(d, sp); - /* Deepest scope first: walk locals from end (innermost blocks added - * after enclosing). */ - for (i = sp->nlocals; i > 0; --i) { - DwLocal* v = &sp->locals[i - 1]; - if (!v->name || !dw_streq(v->name, name)) continue; - if (v->has_scope && (pc < v->scope_lo || pc >= v->scope_hi)) continue; - fill_varloc(d, sp->cu_idx, v, pc, out); - return 0; - } - /* Then params. */ - for (i = 0; i < sp->nparams; ++i) { - DwLocal* v = &sp->params[i]; - if (!v->name || !dw_streq(v->name, name)) continue; - fill_varloc(d, sp->cu_idx, v, pc, out); - return 0; - } - } - /* Globals. */ - dw_build_globals(d); - for (i = 0; i < d->nglobals; ++i) { - DwLocal* v = &d->globals[i]; - if (!v->name || !dw_streq(v->name, name)) continue; - fill_varloc(d, 0, v, pc, out); - return 0; - } - return sp ? 1 : 2; -} - -int cfree_dwarf_loc_read(CfreeDebugInfo* d, const CfreeDwarfVarLoc* loc, - const CfreeUnwindFrame* frame, CfreeJitSession* sess, - void* dst, size_t cap, size_t* read_out) { - size_t want; - if (read_out) *read_out = 0; - if (!d || !loc || !frame || !dst) return 1; - want = loc->byte_size ? loc->byte_size : cap; - if (want > cap) want = cap; - switch (loc->kind) { - case CFREE_DLOC_REG: { - uint64_t v = (loc->v.reg < 32) ? frame->regs[loc->v.reg] : 0; - size_t n = want > sizeof(v) ? sizeof(v) : want; - memcpy(dst, &v, n); - if (read_out) *read_out = n; - return 0; - } - case CFREE_DLOC_FRAME_OFS: { - uint64_t addr = frame->cfa + (uint64_t)(int64_t)loc->v.frame_ofs; - if (!sess) return 1; - if (cfree_jit_session_read_mem(sess, addr, dst, want) != 0) return 1; - if (read_out) *read_out = want; - return 0; - } - case CFREE_DLOC_GLOBAL: { - uint64_t addr = loc->v.global; - if (!sess) return 1; - if (cfree_jit_session_read_mem(sess, addr, dst, want) != 0) return 1; - if (read_out) *read_out = want; - return 0; - } - case CFREE_DLOC_EXPR: { - /* Evaluate. We don't have direct access to the variable's - * subprogram's frame_base here — caller-supplied frame must already - * carry the right CFA. The expression itself may be DW_OP_call_frame_cfa - * + DW_OP_consts + DW_OP_plus, etc. */ - DwExprResult r; - if (loc->v.expr.bytes == NULL || loc->v.expr.len == 0) return 1; - if (dw_eval_expr(d, loc->v.expr.bytes, (u32)loc->v.expr.len, NULL, 0, - frame, &r) != 0) - return 1; - if (r.kind == 0) { - if (!sess) return 1; - if (cfree_jit_session_read_mem(sess, r.value, dst, want) != 0) return 1; - if (read_out) *read_out = want; - return 0; - } else if (r.kind == 1) { - size_t n = want > sizeof(r.value) ? sizeof(r.value) : want; - memcpy(dst, &r.value, n); - if (read_out) *read_out = n; - return 0; - } else if (r.kind == 2) { - u64 v = (r.value < 32) ? frame->regs[r.value] : 0; - size_t n = want > sizeof(v) ? sizeof(v) : want; - memcpy(dst, &v, n); - if (read_out) *read_out = n; - return 0; - } - return 1; - } - } - return 1; -} - -/* ---- vars_at_* iterator --------------------------------------------- */ - -struct CfreeDwarfVarIter { - CfreeDebugInfo* d; - DwSubprog* sp; - u64 pc; - u32 mask; - u32 phase; /* 0 = locals, 1 = params, 2 = globals, 3 = done */ - u32 idx; -}; - -CfreeDwarfVarIter* cfree_dwarf_vars_at_new(CfreeDebugInfo* d, uint64_t pc, - uint32_t mask) { - CfreeDwarfVarIter* it; - if (!d) return NULL; - it = (CfreeDwarfVarIter*)d->h->alloc(d->h, sizeof(*it), - _Alignof(CfreeDwarfVarIter)); - if (!it) return NULL; - it->d = d; - it->pc = pc; - it->mask = mask; - it->sp = dw_find_subprog(d, pc); - if (it->sp) dw_build_locals(d, it->sp); - it->phase = 0; - it->idx = it->sp ? it->sp->nlocals : 0; - return it; -} - -int cfree_dwarf_vars_at_next(CfreeDwarfVarIter* it, CfreeDwarfVar* out) { - if (!it || !out) return 0; - for (;;) { - switch (it->phase) { - case 0: { - if (!(it->mask & (1u << CFREE_DVR_LOCAL))) { - it->phase = 1; - it->idx = 0; - break; - } - if (it->idx == 0) { - it->phase = 1; - it->idx = 0; - break; - } - { - DwLocal* v = &it->sp->locals[--it->idx]; - if (v->has_scope && (it->pc < v->scope_lo || it->pc >= v->scope_hi)) - break; - out->name = v->name ? v->name : ""; - out->role = CFREE_DVR_LOCAL; - fill_varloc(it->d, it->sp->cu_idx, v, it->pc, &out->loc); - return 1; - } - } - case 1: { - if (!it->sp || !(it->mask & (1u << CFREE_DVR_ARG))) { - it->phase = 2; - it->idx = 0; - break; - } - if (it->idx >= it->sp->nparams) { - it->phase = 2; - it->idx = 0; - break; - } - { - DwLocal* v = &it->sp->params[it->idx++]; - out->name = v->name ? v->name : ""; - out->role = CFREE_DVR_ARG; - fill_varloc(it->d, it->sp->cu_idx, v, it->pc, &out->loc); - return 1; - } - } - case 2: { - if (!(it->mask & (1u << CFREE_DVR_GLOBAL))) { - it->phase = 3; - break; - } - dw_build_globals(it->d); - if (it->idx >= it->d->nglobals) { - it->phase = 3; - break; - } - { - DwLocal* v = &it->d->globals[it->idx++]; - out->name = v->name ? v->name : ""; - out->role = CFREE_DVR_GLOBAL; - fill_varloc(it->d, 0, v, it->pc, &out->loc); - return 1; - } - } - default: - return 0; - } - } -} - -void cfree_dwarf_vars_at_free(CfreeDwarfVarIter* it) { - if (!it) return; - it->d->h->free(it->d->h, it, sizeof(*it)); -} - -/* ---- param_iter_* --------------------------------------------------- */ - -struct CfreeDwarfParamIter { - CfreeDebugInfo* d; - DwSubprog* sp; - u64 pc; - u32 idx; -}; - -CfreeDwarfParamIter* cfree_dwarf_param_iter_new(CfreeDebugInfo* d, - uint64_t pc) { - CfreeDwarfParamIter* it; - DwSubprog* sp; - if (!d) return NULL; - sp = dw_find_subprog(d, pc); - if (!sp) return NULL; - dw_build_locals(d, sp); - it = (CfreeDwarfParamIter*)d->h->alloc(d->h, sizeof(*it), - _Alignof(CfreeDwarfParamIter)); - if (!it) return NULL; - it->d = d; - it->sp = sp; - it->pc = pc; - it->idx = 0; - return it; -} - -int cfree_dwarf_param_iter_next(CfreeDwarfParamIter* it, CfreeDwarfVar* out) { - if (!it || !out) return 0; - if (it->idx >= it->sp->nparams) return 0; - { - DwLocal* v = &it->sp->params[it->idx++]; - out->name = v->name ? v->name : ""; - out->role = CFREE_DVR_ARG; - fill_varloc(it->d, it->sp->cu_idx, v, it->pc, &out->loc); - } - return 1; -} - -void cfree_dwarf_param_iter_free(CfreeDwarfParamIter* it) { - if (!it) return; - it->d->h->free(it->d->h, it, sizeof(*it)); -} diff --git a/src/dwarf/dwarf_type.c b/src/dwarf/dwarf_type.c @@ -1,509 +0,0 @@ -/* dwarf_type.c — type DIE → CfreeDwarfType resolution. - * - * Builds CfreeDwarfType records on demand from DW_TAG_base_type, - * DW_TAG_pointer_type, DW_TAG_array_type, struct/union/enum, typedef, - * and qualifier-types (const/volatile/restrict transparent to inner). - */ - -#include <cfree.h> -#include <stddef.h> -#include <stdint.h> -#include <string.h> - -#include "core/core.h" -#include "core/heap.h" -#include "core/util.h" -#include "dwarf/dwarf_internal.h" - -static CfreeDwarfType* type_cache_get(CfreeDebugInfo* d, u32 die_offset) { - u32 i; - for (i = 0; i < d->ntypes; ++i) { - if (d->types_off[i] == die_offset) return d->types_by_off[i]; - } - return NULL; -} - -static void type_cache_put(CfreeDebugInfo* d, u32 die_offset, - CfreeDwarfType* t) { - if (d->ntypes == d->types_cap) { - u32 ncap = d->types_cap ? d->types_cap * 2 : 16; - CfreeDwarfType** na = (CfreeDwarfType**)d->h->realloc( - d->h, d->types_by_off, d->types_cap * sizeof(*d->types_by_off), - ncap * sizeof(*d->types_by_off), _Alignof(CfreeDwarfType*)); - u32* no = - (u32*)d->h->realloc(d->h, d->types_off, d->types_cap * sizeof(u32), - ncap * sizeof(u32), _Alignof(u32)); - if (!na || !no) return; - d->types_by_off = na; - d->types_off = no; - d->types_cap = ncap; - } - d->types_by_off[d->ntypes] = t; - d->types_off[d->ntypes] = die_offset; - d->ntypes++; -} - -static CfreeDwarfType* type_alloc(CfreeDebugInfo* d) { - CfreeDwarfType* t = - (CfreeDwarfType*)d->h->alloc(d->h, sizeof(*t), _Alignof(CfreeDwarfType)); - if (!t) return NULL; - memset(t, 0, sizeof(*t)); - t->name = ""; - return t; -} - -CfreeDwarfType* dw_void_type(CfreeDebugInfo* d) { - CfreeDwarfType* t = type_cache_get(d, 0); - if (t) return t; - t = type_alloc(d); - if (!t) return NULL; - t->kind = DTK_VOID; - type_cache_put(d, 0, t); - return t; -} - -/* Walk struct/union children for fields, or enum children for values. */ -static void walk_struct_fields(CfreeDebugInfo* d, DwCu* cu, u32* off, - CfreeDwarfType* t) { - DwField* fields = NULL; - u32 nfields = 0, cap = 0; - for (;;) { - DwDie die; - if (!dw_read_die(d, cu, off, &die)) break; - if (die.abbrev->tag == DW_TAG_member) { - DieAttrPack p; - dw_die_pack(d, cu, &die, &p); - /* skip past die's attrs */ - { - u32 i; - for (i = 0; i < die.abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die.abbrev->attrs[i]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, off); - } - } - if (nfields == cap) { - u32 ncap = cap ? cap * 2 : 4; - DwField* na = - (DwField*)d->h->realloc(d->h, fields, cap * sizeof(*fields), - ncap * sizeof(*fields), _Alignof(DwField)); - if (!na) break; - fields = na; - cap = ncap; - } - fields[nfields].name = p.name ? p.name : ""; - fields[nfields].byte_offset = p.has_byte_offset ? p.byte_offset : 0; - fields[nfields].bit_offset = p.has_bit_offset ? p.bit_offset : 0; - fields[nfields].bit_size = p.has_bit_size ? p.bit_size : 0; - fields[nfields].type = - p.has_type - ? dw_type_from_die(d, (u32)(cu - d->cus), p.type_die_offset) - : dw_void_type(d); - nfields++; - if (die.abbrev->has_children) { - for (;;) { - DwDie c; - if (!dw_read_die(d, cu, off, &c)) break; - dw_skip_die_subtree(d, cu, &c, off); - } - } - } else { - dw_skip_die_subtree(d, cu, &die, off); - } - } - t->fields = fields; - t->nfields = nfields; -} - -static void walk_enum_values(CfreeDebugInfo* d, DwCu* cu, u32* off, - CfreeDwarfType* t) { - DwEnumVal* evs = NULL; - u32 nev = 0, cap = 0; - for (;;) { - DwDie die; - if (!dw_read_die(d, cu, off, &die)) break; - if (die.abbrev->tag == DW_TAG_enumerator) { - DieAttrPack p; - dw_die_pack(d, cu, &die, &p); - { - u32 i; - for (i = 0; i < die.abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die.abbrev->attrs[i]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, off); - } - } - if (nev == cap) { - u32 ncap = cap ? cap * 2 : 4; - DwEnumVal* na = - (DwEnumVal*)d->h->realloc(d->h, evs, cap * sizeof(*evs), - ncap * sizeof(*evs), _Alignof(DwEnumVal)); - if (!na) break; - evs = na; - cap = ncap; - } - evs[nev].name = p.name ? p.name : ""; - evs[nev].value = p.has_const_value ? p.const_value : 0; - nev++; - if (die.abbrev->has_children) { - for (;;) { - DwDie c; - if (!dw_read_die(d, cu, off, &c)) break; - dw_skip_die_subtree(d, cu, &c, off); - } - } - } else { - dw_skip_die_subtree(d, cu, &die, off); - } - } - t->evals = evs; - t->nevals = nev; -} - -/* For DW_TAG_array_type: child DW_TAG_subrange_type carries upper_bound / - * count. */ -static void walk_array_subrange(CfreeDebugInfo* d, DwCu* cu, u32* off, - CfreeDwarfType* t) { - for (;;) { - DwDie die; - if (!dw_read_die(d, cu, off, &die)) break; - if (die.abbrev->tag == DW_TAG_subrange_type) { - DieAttrPack p; - dw_die_pack(d, cu, &die, &p); - { - u32 i; - for (i = 0; i < die.abbrev->nattrs; ++i) { - DwAbbrevAttr* aa = &die.abbrev->attrs[i]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, off); - } - } - if (p.has_array_count) t->element_count = p.array_count; - if (die.abbrev->has_children) { - for (;;) { - DwDie c; - if (!dw_read_die(d, cu, off, &c)) break; - dw_skip_die_subtree(d, cu, &c, off); - } - } - } else { - dw_skip_die_subtree(d, cu, &die, off); - } - } -} - -CfreeDwarfType* dw_type_from_die(CfreeDebugInfo* d, u32 cu_idx, - u32 die_offset) { - DwCu* cu; - DwDie die; - u32 off; - CfreeDwarfType* t; - DieAttrPack p; - if (die_offset == 0) return dw_void_type(d); - t = type_cache_get(d, die_offset); - if (t) return t; - /* Resolve CU containing the DIE. */ - cu = dw_cu_at_die_offset(d, die_offset); - if (!cu) { - if (cu_idx < d->ncus) - cu = &d->cus[cu_idx]; - else - return dw_void_type(d); - } - off = die_offset; - if (!dw_read_die(d, cu, &off, &die)) return dw_void_type(d); - if (!die.abbrev) return dw_void_type(d); - dw_die_pack(d, cu, &die, &p); - /* Allocate before recursing — break cycles by interning early. */ - t = type_alloc(d); - if (!t) return dw_void_type(d); - t->die_offset = die_offset; - type_cache_put(d, die_offset, t); - - switch (die.abbrev->tag) { - case DW_TAG_base_type: - t->kind = DTK_BASE; - t->name = p.name ? p.name : ""; - t->byte_size = p.byte_size; - t->base_encoding = p.base_encoding; - break; - case DW_TAG_pointer_type: - case DW_TAG_reference_type: - t->kind = DTK_PTR; - t->byte_size = p.has_byte_size ? p.byte_size : 8; - t->name = ""; - t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), - p.type_die_offset) - : dw_void_type(d); - break; - case DW_TAG_typedef: - t->kind = DTK_TYPEDEF; - t->name = p.name ? p.name : ""; - t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), - p.type_die_offset) - : dw_void_type(d); - if (t->inner) t->byte_size = t->inner->byte_size; - break; - case DW_TAG_const_type: - case DW_TAG_volatile_type: - case DW_TAG_restrict_type: - t->kind = (die.abbrev->tag == DW_TAG_const_type) ? DTK_CONST - : (die.abbrev->tag == DW_TAG_volatile_type) ? DTK_VOLATILE - : DTK_RESTRICT; - t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), - p.type_die_offset) - : dw_void_type(d); - if (t->inner) { - t->byte_size = t->inner->byte_size; - t->name = t->inner->name; - } - break; - case DW_TAG_array_type: - t->kind = DTK_ARRAY; - t->name = ""; - t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), - p.type_die_offset) - : dw_void_type(d); - if (die.abbrev->has_children) { - u32 cur = off; - /* Skip attrs (already read into p). */ - u32 ii; - for (ii = 0; ii < die.abbrev->nattrs; ++ii) { - DwAbbrevAttr* aa = &die.abbrev->attrs[ii]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, &cur); - } - walk_array_subrange(d, cu, &cur, t); - } - if (t->inner && t->element_count) - t->byte_size = t->inner->byte_size * t->element_count; - break; - case DW_TAG_structure_type: - case DW_TAG_class_type: - t->kind = DTK_STRUCT; - t->name = p.name ? p.name : ""; - t->byte_size = p.byte_size; - if (die.abbrev->has_children) { - u32 cur = off; - u32 ii; - for (ii = 0; ii < die.abbrev->nattrs; ++ii) { - DwAbbrevAttr* aa = &die.abbrev->attrs[ii]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, &cur); - } - walk_struct_fields(d, cu, &cur, t); - } - break; - case DW_TAG_union_type: - t->kind = DTK_UNION; - t->name = p.name ? p.name : ""; - t->byte_size = p.byte_size; - if (die.abbrev->has_children) { - u32 cur = off; - u32 ii; - for (ii = 0; ii < die.abbrev->nattrs; ++ii) { - DwAbbrevAttr* aa = &die.abbrev->attrs[ii]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, &cur); - } - walk_struct_fields(d, cu, &cur, t); - } - break; - case DW_TAG_enumeration_type: - t->kind = DTK_ENUM; - t->name = p.name ? p.name : ""; - t->byte_size = p.byte_size; - t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), - p.type_die_offset) - : dw_void_type(d); - if (die.abbrev->has_children) { - u32 cur = off; - u32 ii; - for (ii = 0; ii < die.abbrev->nattrs; ++ii) { - DwAbbrevAttr* aa = &die.abbrev->attrs[ii]; - dw_skip_form(d, cu, aa->form, aa->implicit_const, &cur); - } - walk_enum_values(d, cu, &cur, t); - } - break; - case DW_TAG_subroutine_type: - t->kind = DTK_FUNC; - t->name = ""; - t->inner = p.has_type ? dw_type_from_die(d, (u32)(cu - d->cus), - p.type_die_offset) - : dw_void_type(d); - break; - default: - t->kind = DTK_VOID; - break; - } - return t; -} - -/* ---- public type-info accessors -------------------------------------- */ - -static CfreeDwarfTypeKind map_kind(const CfreeDwarfType* t) { - if (!t) return CFREE_DT_VOID; - switch (t->kind) { - case DTK_VOID: - return CFREE_DT_VOID; - case DTK_PTR: - return CFREE_DT_PTR; - case DTK_ARRAY: - return CFREE_DT_ARRAY; - case DTK_STRUCT: - return CFREE_DT_STRUCT; - case DTK_UNION: - return CFREE_DT_UNION; - case DTK_ENUM: - return CFREE_DT_ENUM; - case DTK_TYPEDEF: - return CFREE_DT_TYPEDEF; - case DTK_FUNC: - return CFREE_DT_FUNC; - case DTK_CONST: - case DTK_VOLATILE: - case DTK_RESTRICT: - return t->inner ? map_kind(t->inner) : CFREE_DT_VOID; - case DTK_BASE: - switch (t->base_encoding) { - case DW_ATE_boolean: - return CFREE_DT_BOOL; - case DW_ATE_float: - case DW_ATE_complex_float: - return CFREE_DT_FLOAT; - case DW_ATE_signed_char: - return CFREE_DT_CHAR; - case DW_ATE_unsigned_char: - return CFREE_DT_CHAR; - case DW_ATE_unsigned: - case DW_ATE_address: - case DW_ATE_UTF: - return CFREE_DT_UINT; - case DW_ATE_signed: - return CFREE_DT_SINT; - default: - return CFREE_DT_UINT; - } - } - return CFREE_DT_VOID; -} - -CfreeDwarfTypeInfo cfree_dwarf_type_info(const CfreeDwarfType* t) { - CfreeDwarfTypeInfo info; - memset(&info, 0, sizeof(info)); - info.name = ""; - if (!t) { - info.kind = CFREE_DT_VOID; - return info; - } - info.kind = map_kind(t); - info.byte_size = t->byte_size; - info.name = t->name ? t->name : ""; - info.element_count = t->element_count; - /* For TYPEDEF/PTR/ARRAY: expose inner. For BASE_CHAR map signedness. */ - switch (t->kind) { - case DTK_BASE: - if (t->base_encoding == DW_ATE_signed_char) - info.kind = CFREE_DT_SINT; - else if (t->base_encoding == DW_ATE_unsigned_char) - info.kind = CFREE_DT_UINT; - break; - case DTK_PTR: - case DTK_ARRAY: - case DTK_TYPEDEF: - case DTK_FUNC: - info.inner = t->inner; - break; - case DTK_CONST: - case DTK_VOLATILE: - case DTK_RESTRICT: - /* Transparent: report inner directly. */ - if (t->inner) { - return cfree_dwarf_type_info(t->inner); - } - break; - default: - break; - } - return info; -} - -/* Field iterator. */ -struct CfreeDwarfFieldIter { - CfreeDebugInfo* d; - const CfreeDwarfType* t; - u32 idx; -}; - -CfreeDwarfFieldIter* cfree_dwarf_field_iter_new(CfreeDebugInfo* d, - const CfreeDwarfType* t) { - CfreeDwarfFieldIter* it; - if (!d || !t) return NULL; - it = (CfreeDwarfFieldIter*)d->h->alloc(d->h, sizeof(*it), - _Alignof(CfreeDwarfFieldIter)); - if (!it) return NULL; - it->d = d; - /* Look through typedef / qualifiers to the underlying aggregate. */ - while (t && (t->kind == DTK_TYPEDEF || t->kind == DTK_CONST || - t->kind == DTK_VOLATILE || t->kind == DTK_RESTRICT)) - t = t->inner; - it->t = t; - it->idx = 0; - return it; -} - -int cfree_dwarf_field_iter_next(CfreeDwarfFieldIter* it, CfreeDwarfField* out) { - const CfreeDwarfType* t; - if (!it || !out || !it->t) return 0; - t = it->t; - if (t->kind != DTK_STRUCT && t->kind != DTK_UNION) return 0; - if (it->idx >= t->nfields) return 0; - { - DwField* f = &t->fields[it->idx++]; - out->name = f->name ? f->name : ""; - out->byte_offset = f->byte_offset; - out->bit_offset = f->bit_offset; - out->bit_size = f->bit_size; - out->type = f->type; - } - return 1; -} - -void cfree_dwarf_field_iter_free(CfreeDwarfFieldIter* it) { - if (!it) return; - it->d->h->free(it->d->h, it, sizeof(*it)); -} - -struct CfreeDwarfEnumIter { - CfreeDebugInfo* d; - const CfreeDwarfType* t; - u32 idx; -}; - -CfreeDwarfEnumIter* cfree_dwarf_enum_iter_new(CfreeDebugInfo* d, - const CfreeDwarfType* t) { - CfreeDwarfEnumIter* it; - if (!d || !t) return NULL; - it = (CfreeDwarfEnumIter*)d->h->alloc(d->h, sizeof(*it), - _Alignof(CfreeDwarfEnumIter)); - if (!it) return NULL; - it->d = d; - while (t && (t->kind == DTK_TYPEDEF || t->kind == DTK_CONST || - t->kind == DTK_VOLATILE || t->kind == DTK_RESTRICT)) - t = t->inner; - it->t = t; - it->idx = 0; - return it; -} - -int cfree_dwarf_enum_iter_next(CfreeDwarfEnumIter* it, CfreeDwarfEnumVal* out) { - const CfreeDwarfType* t; - if (!it || !out || !it->t) return 0; - t = it->t; - if (t->kind != DTK_ENUM) return 0; - if (it->idx >= t->nevals) return 0; - out->name = t->evals[it->idx].name ? t->evals[it->idx].name : ""; - out->value = t->evals[it->idx].value; - it->idx++; - return 1; -} - -void cfree_dwarf_enum_iter_free(CfreeDwarfEnumIter* it) { - if (!it) return; - it->d->h->free(it->d->h, it, sizeof(*it)); -} diff --git a/src/link/link_jit.c b/src/link/link_jit.c @@ -556,7 +556,7 @@ void* cfree_jit_lookup(CfreeJit* jit, const char* name) { /* ---- inspector entries ---- */ /* True if `name` (NUL-terminated) is a debug section the DWARF consumer - * (src/dwarf/dwarf_open.c) might read. Everything else is skipped. */ + * (src/debug/dwarf_open.c) might read. Everything else is skipped. */ static int jit_view_is_debug_name(const char* name) { if (!name) return 0; if (name[0] == '.' && name[1] == 'd' && name[2] == 'e' && name[3] == 'b' && diff --git a/test/debug/roundtrip_unit.c b/test/debug/roundtrip_unit.c @@ -20,6 +20,7 @@ #include <string.h> #include "core/core.h" +#include "core/pool.h" #include "debug/debug.h" #include "obj/obj.h"