kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit df67c1c69c18c95b176eaf6dc0c256e3e1046428
parent 2ebefab9c911e0a289223a435d8debe7c9240449
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 27 May 2026 10:58:13 -0700

driver: add strings tool and objdump --dwarf

strings: new multitool command that scans raw file bytes for printable
runs (-n/--bytes, -t radix, -f, stdin). Registered alongside the other
binutils-style tools.

objdump --dwarf[=info,abbrev,line,str]: dump the raw DWARF structure of
an object's .debug_* sections. Backed by a new public structural-
enumeration API (cfree_dwarf_{cu,die,attr,abbrev,abbrev_attr,line,str}
iterators) in dwarf.h, implemented in src/debug/dwarf_dump.c over the
existing internal decoders. Symbolic DW_TAG/DW_AT/DW_FORM rendering and
all display logic stay in the driver; the library hands back numeric
codes.

Fix a latent non-idempotency in dw_parse_all_cus: it appended CUs on
every call, so a second invocation (now reachable via the enumeration
iterators after cfree_dwarf_open) duplicated the whole CU table. Guard
on an already-built table, matching dw_build_subs/globals.

Tests: test/strings (4 cases) and a committed DWARF5 fixture golden for
objdump --dwarf (test/objdump/dwarf), plus iterator coverage in
test/dwarf/dwarf_test.c (regenerable fixture via CFREE_DWARF_WRITE_FIXTURE).

Diffstat:
MMakefile | 3+++
Mdriver/driver.h | 2++
Mdriver/main.c | 4++++
Mdriver/objdump.c | 334+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Adriver/strings.c | 260+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/cfree/config.h | 1+
Minclude/cfree/dwarf.h | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmk/config.mk | 1+
Msrc/core/config_assert.c | 1+
Asrc/debug/dwarf_dump.c | 481+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/debug/dwarf_open.c | 4++++
Mtest/dwarf/dwarf_test.c | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/objdump/dwarf/cases/01-dwarf-all.expected | 139+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/objdump/dwarf/cases/01-dwarf-all.sh | 8++++++++
Atest/objdump/dwarf/cases/dwarf.o | 0
Atest/strings/cases/01-default-min.expected | 2++
Atest/strings/cases/01-default-min.sh | 5+++++
Atest/strings/cases/02-min-length.expected | 1+
Atest/strings/cases/02-min-length.sh | 4++++
Atest/strings/cases/03-offset-hex.expected | 2++
Atest/strings/cases/03-offset-hex.sh | 4++++
Atest/strings/cases/04-stdin.expected | 2++
Atest/strings/cases/04-stdin.sh | 2++
Atest/strings/run.sh | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/test.mk | 6+++++-
25 files changed, 1648 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile @@ -336,6 +336,9 @@ endif ifeq ($(CFREE_TOOL_ADDR2LINE_ENABLED),1) DRIVER_TOOL_SRCS += driver/addr2line.c endif +ifeq ($(CFREE_TOOL_STRINGS_ENABLED),1) +DRIVER_TOOL_SRCS += driver/strings.c +endif DRIVER_SRCS += $(sort $(DRIVER_TOOL_SRCS)) ifneq ($(filter 1,$(CFREE_TOOL_CC_ENABLED) $(CFREE_TOOL_CHECK_ENABLED) $(CFREE_TOOL_CPP_ENABLED) $(CFREE_TOOL_AS_ENABLED) $(CFREE_TOOL_DBG_ENABLED) $(CFREE_TOOL_RUN_ENABLED)),) DRIVER_SRCS += driver/cflags.c diff --git a/driver/driver.h b/driver/driver.h @@ -52,6 +52,7 @@ int driver_emu(int argc, char** argv); int driver_nm(int argc, char** argv); int driver_size(int argc, char** argv); int driver_addr2line(int argc, char** argv); +int driver_strings(int argc, char** argv); /* Per-tool help printers. Write a multi-section help text to stdout and * return. The tool entry-points call these when invoked with no args, -h, @@ -73,6 +74,7 @@ void driver_help_emu(void); void driver_help_nm(void); void driver_help_size(void); void driver_help_addr2line(void); +void driver_help_strings(void); /* Multi-call top-level help (`cfree`, `cfree -h`, `cfree --help`, * `cfree help`). Lists each tool with a one-line summary and explains diff --git a/driver/main.c b/driver/main.c @@ -84,6 +84,10 @@ static const DriverToolDesc driver_tools[] = { {"addr2line", driver_addr2line, driver_help_addr2line, "Translate addresses to file:line using debug info"}, #endif +#if CFREE_TOOL_STRINGS_ENABLED + {"strings", driver_strings, driver_help_strings, + "Print printable character sequences found in a file"}, +#endif {NULL, NULL, NULL, NULL}, }; diff --git a/driver/objdump.c b/driver/objdump.c @@ -1,8 +1,10 @@ #include <cfree/archive.h> #include <cfree/core.h> #include <cfree/disasm.h> +#include <cfree/dwarf.h> #include <cfree/object.h> #include <stdio.h> +#include <string.h> #include "driver.h" @@ -24,10 +26,20 @@ typedef struct ObjdumpOpts { int r; /* -r: relocations */ int s; /* -s: hex section contents */ int p; /* -p / --private-headers: PE optional header + data dirs */ + unsigned dwarf; /* --dwarf: bitmask of OBJDUMP_DWARF_* (0 = off) */ const char* j[MAX_J_FILTERS]; int nj; } ObjdumpOpts; +/* --dwarf section selectors. */ +#define OBJDUMP_DWARF_INFO 0x1u +#define OBJDUMP_DWARF_ABBREV 0x2u +#define OBJDUMP_DWARF_LINE 0x4u +#define OBJDUMP_DWARF_STR 0x8u +#define OBJDUMP_DWARF_ALL \ + (OBJDUMP_DWARF_INFO | OBJDUMP_DWARF_ABBREV | OBJDUMP_DWARF_LINE | \ + OBJDUMP_DWARF_STR) + static void objdump_usage(void) { driver_errf(OBJDUMP_TOOL, "%.*s", CFREE_SLICE_ARG(CFREE_SLICE_LIT( @@ -77,6 +89,9 @@ void driver_help_objdump(void) { " Print PE optional header, data directories,\n" " and per-DLL import lists (PE images only)\n" " -x Aggregate: -f -h -r -t\n" + " --dwarf[=LIST] Dump DWARF debug sections. LIST is a comma-\n" + " separated subset of info, abbrev, line, str;\n" + " bare --dwarf dumps all four.\n" "\n" "FILTERS\n" " -j NAME Restrict output to the named section. " @@ -1022,8 +1037,272 @@ static void dump_file_header(CfreeObjFile* f, const char* label) { (void)label; } -static void dump_obj(const CfreeDisasmContext* dctx, const char* label, - CfreeObjFile* f, const ObjdumpOpts* opts) { +/* ---- DWARF structural dump (`--dwarf`) ---- + * + * Pulls the raw .debug_info / .debug_abbrev / .debug_line / .debug_str + * structure out via the cfree_dwarf_*_iter API and formats it. The library + * hands back numeric DWARF codes; the symbolic-name tables below live here + * (display logic stays in the driver). Unknown codes fall back to hex. */ + +static const char* dw_tag_name(uint32_t tag) { + switch (tag) { + case 0x01: return "DW_TAG_array_type"; + case 0x04: return "DW_TAG_enumeration_type"; + case 0x05: return "DW_TAG_formal_parameter"; + case 0x0b: return "DW_TAG_lexical_block"; + case 0x0d: return "DW_TAG_member"; + case 0x0f: return "DW_TAG_pointer_type"; + case 0x11: return "DW_TAG_compile_unit"; + case 0x13: return "DW_TAG_structure_type"; + case 0x15: return "DW_TAG_subroutine_type"; + case 0x16: return "DW_TAG_typedef"; + case 0x17: return "DW_TAG_union_type"; + case 0x18: return "DW_TAG_unspecified_parameters"; + case 0x1d: return "DW_TAG_inlined_subroutine"; + case 0x21: return "DW_TAG_subrange_type"; + case 0x24: return "DW_TAG_base_type"; + case 0x26: return "DW_TAG_const_type"; + case 0x28: return "DW_TAG_enumerator"; + case 0x2e: return "DW_TAG_subprogram"; + case 0x34: return "DW_TAG_variable"; + case 0x35: return "DW_TAG_volatile_type"; + case 0x37: return "DW_TAG_restrict_type"; + case 0x3b: return "DW_TAG_unspecified_type"; + default: return NULL; + } +} + +static const char* dw_at_name(uint32_t at) { + switch (at) { + case 0x01: return "DW_AT_sibling"; + case 0x02: return "DW_AT_location"; + case 0x03: return "DW_AT_name"; + case 0x0b: return "DW_AT_byte_size"; + case 0x0d: return "DW_AT_bit_size"; + case 0x10: return "DW_AT_stmt_list"; + case 0x11: return "DW_AT_low_pc"; + case 0x12: return "DW_AT_high_pc"; + case 0x13: return "DW_AT_language"; + case 0x1b: return "DW_AT_comp_dir"; + case 0x1c: return "DW_AT_const_value"; + case 0x25: return "DW_AT_producer"; + case 0x27: return "DW_AT_prototyped"; + case 0x2f: return "DW_AT_upper_bound"; + case 0x34: return "DW_AT_artificial"; + case 0x37: return "DW_AT_count"; + case 0x38: return "DW_AT_data_member_location"; + case 0x39: return "DW_AT_decl_column"; + case 0x3a: return "DW_AT_decl_file"; + case 0x3b: return "DW_AT_decl_line"; + case 0x3c: return "DW_AT_declaration"; + case 0x3e: return "DW_AT_encoding"; + case 0x3f: return "DW_AT_external"; + case 0x40: return "DW_AT_frame_base"; + case 0x49: return "DW_AT_type"; + case 0x6e: return "DW_AT_linkage_name"; + case 0x88: return "DW_AT_alignment"; + default: return NULL; + } +} + +static const char* dw_form_name(uint32_t form) { + switch (form) { + case 0x01: return "DW_FORM_addr"; + case 0x05: return "DW_FORM_data2"; + case 0x06: return "DW_FORM_data4"; + case 0x07: return "DW_FORM_data8"; + case 0x08: return "DW_FORM_string"; + case 0x09: return "DW_FORM_block"; + case 0x0b: return "DW_FORM_data1"; + case 0x0c: return "DW_FORM_flag"; + case 0x0d: return "DW_FORM_sdata"; + case 0x0e: return "DW_FORM_strp"; + case 0x0f: return "DW_FORM_udata"; + case 0x10: return "DW_FORM_ref_addr"; + case 0x11: return "DW_FORM_ref1"; + case 0x12: return "DW_FORM_ref2"; + case 0x13: return "DW_FORM_ref4"; + case 0x14: return "DW_FORM_ref8"; + case 0x15: return "DW_FORM_ref_udata"; + case 0x17: return "DW_FORM_sec_offset"; + case 0x18: return "DW_FORM_exprloc"; + case 0x19: return "DW_FORM_flag_present"; + case 0x1a: return "DW_FORM_strx"; + case 0x1b: return "DW_FORM_addrx"; + case 0x1f: return "DW_FORM_line_strp"; + case 0x21: return "DW_FORM_implicit_const"; + case 0x25: return "DW_FORM_strx1"; + default: return NULL; + } +} + +/* Print a symbolic DWARF code or, when unknown, its hex value. */ +static void dw_emit_code(const char* name, uint32_t val) { + if (name) + driver_printf("%s", name); + else + driver_printf("0x%x", val); +} + +static void dw_emit_attr_value(const CfreeDwarfAttr* a) { + switch (a->form_class) { + case CFREE_DWARF_FC_STRING: + driver_printf("\"%.*s\"", CFREE_SLICE_ARG(a->str)); + break; + case CFREE_DWARF_FC_SDATA: + driver_printf("%lld", (long long)a->s); + break; + case CFREE_DWARF_FC_FLAG: + driver_printf("%s", a->u ? "true" : "false"); + break; + case CFREE_DWARF_FC_BLOCK: { + uint32_t i; + driver_printf("%u byte block:", a->block_len); + for (i = 0; i < a->block_len; ++i) + driver_printf(" %02x", a->block[i]); + break; + } + case CFREE_DWARF_FC_ADDR: + case CFREE_DWARF_FC_REF: + case CFREE_DWARF_FC_UDATA: + default: + driver_printf("0x%llx", (unsigned long long)a->u); + break; + } +} + +static void dump_dwarf_die_attrs(CfreeDebugInfo* dbg, uint32_t die_offset, + uint32_t depth) { + CfreeDwarfAttrIter* ai = NULL; + CfreeDwarfAttr a; + uint32_t k; + if (cfree_dwarf_attr_iter_new(dbg, die_offset, &ai) != CFREE_OK) return; + while (cfree_dwarf_attr_iter_next(ai, &a) == CFREE_ITER_ITEM) { + for (k = 0; k <= depth + 1; ++k) driver_printf(" "); + dw_emit_code(dw_at_name(a.attr), a.attr); + driver_printf(" ("); + dw_emit_code(dw_form_name(a.form), a.form); + driver_printf(") = "); + dw_emit_attr_value(&a); + driver_printf("\n"); + } + cfree_dwarf_attr_iter_free(ai); +} + +static void dump_dwarf_info(CfreeDebugInfo* dbg) { + CfreeDwarfCuIter* cui = NULL; + CfreeDwarfDieIter* dii = NULL; + CfreeDwarfCu cu; + CfreeDwarfDie die; + driver_printf(".debug_info contents:\n"); + if (cfree_dwarf_cu_iter_new(dbg, &cui) == CFREE_OK) { + while (cfree_dwarf_cu_iter_next(cui, &cu) == CFREE_ITER_ITEM) { + driver_printf( + " Compilation Unit @ offset 0x%x: version %u, abbrev_offset 0x%x, " + "addr_size %u, length 0x%x\n", + cu.offset, (unsigned)cu.version, cu.abbrev_offset, + (unsigned)cu.address_size, cu.length); + } + cfree_dwarf_cu_iter_free(cui); + } + if (cfree_dwarf_die_iter_new(dbg, &dii) != CFREE_OK) return; + while (cfree_dwarf_die_iter_next(dii, &die) == CFREE_ITER_ITEM) { + uint32_t k; + for (k = 0; k <= die.depth; ++k) driver_printf(" "); + driver_printf("<0x%x> ", die.offset); + dw_emit_code(dw_tag_name(die.tag), die.tag); + driver_printf("\n"); + dump_dwarf_die_attrs(dbg, die.offset, die.depth); + } + cfree_dwarf_die_iter_free(dii); + driver_printf("\n"); +} + +static void dump_dwarf_abbrev(CfreeDebugInfo* dbg) { + CfreeDwarfAbbrevIter* it = NULL; + CfreeDwarfAbbrev ab; + uint32_t cur_table = 0xffffffffu; + driver_printf(".debug_abbrev contents:\n"); + if (cfree_dwarf_abbrev_iter_new(dbg, &it) != CFREE_OK) return; + while (cfree_dwarf_abbrev_iter_next(it, &ab) == CFREE_ITER_ITEM) { + CfreeDwarfAbbrevAttrIter* ait = NULL; + CfreeDwarfAbbrevAttr aa; + if (ab.table_offset != cur_table) { + cur_table = ab.table_offset; + driver_printf(" Abbrev table @ offset 0x%x:\n", cur_table); + } + driver_printf(" [%llu] ", (unsigned long long)ab.code); + dw_emit_code(dw_tag_name(ab.tag), ab.tag); + driver_printf(" %s\n", ab.has_children ? "[has children]" : "[no children]"); + if (cfree_dwarf_abbrev_attr_iter_new(dbg, ab.table_offset, ab.code, + &ait) != CFREE_OK) + continue; + while (cfree_dwarf_abbrev_attr_iter_next(ait, &aa) == CFREE_ITER_ITEM) { + driver_printf(" "); + dw_emit_code(dw_at_name(aa.attr), aa.attr); + driver_printf(" "); + dw_emit_code(dw_form_name(aa.form), aa.form); + driver_printf("\n"); + } + cfree_dwarf_abbrev_attr_iter_free(ait); + } + cfree_dwarf_abbrev_iter_free(it); + driver_printf("\n"); +} + +static void dump_dwarf_line(CfreeDebugInfo* dbg) { + CfreeDwarfCuIter* cui = NULL; + CfreeDwarfCu cu; + driver_printf(".debug_line contents:\n"); + if (cfree_dwarf_cu_iter_new(dbg, &cui) != CFREE_OK) return; + while (cfree_dwarf_cu_iter_next(cui, &cu) == CFREE_ITER_ITEM) { + CfreeDwarfLineIter* li = NULL; + CfreeDwarfLineRow row; + if (cfree_dwarf_line_iter_new(dbg, cu.offset, &li) != CFREE_OK) continue; + driver_printf(" CU @ offset 0x%x:\n", cu.offset); + driver_printf(" %-18s %-6s %-6s %-4s %s\n", "Address", "File", "Line", + "Col", "Flags"); + while (cfree_dwarf_line_iter_next(li, &row) == CFREE_ITER_ITEM) { + driver_printf(" 0x%016llx %-6u %-6u %-4u %s%s\n", + (unsigned long long)row.address, row.file_index, row.line, + row.column, row.is_stmt ? "stmt " : "", + row.end_sequence ? "end_seq" : ""); + } + cfree_dwarf_line_iter_free(li); + } + cfree_dwarf_cu_iter_free(cui); + driver_printf("\n"); +} + +static void dump_dwarf_str(CfreeDebugInfo* dbg) { + CfreeDwarfStrIter* it = NULL; + CfreeDwarfStr s; + driver_printf(".debug_str contents:\n"); + if (cfree_dwarf_str_iter_new(dbg, &it) != CFREE_OK) return; + while (cfree_dwarf_str_iter_next(it, &s) == CFREE_ITER_ITEM) { + driver_printf(" 0x%x \"%.*s\"\n", s.offset, CFREE_SLICE_ARG(s.str)); + } + cfree_dwarf_str_iter_free(it); + driver_printf("\n"); +} + +static void dump_dwarf(const CfreeContext* ctx, CfreeObjFile* f, + const ObjdumpOpts* opts) { + CfreeDebugInfo* dbg = NULL; + if (cfree_dwarf_open(ctx, f, &dbg) != CFREE_OK || !dbg) { + driver_errf(OBJDUMP_TOOL, "no DWARF debug info found"); + return; + } + if (opts->dwarf & OBJDUMP_DWARF_INFO) dump_dwarf_info(dbg); + if (opts->dwarf & OBJDUMP_DWARF_ABBREV) dump_dwarf_abbrev(dbg); + if (opts->dwarf & OBJDUMP_DWARF_LINE) dump_dwarf_line(dbg); + if (opts->dwarf & OBJDUMP_DWARF_STR) dump_dwarf_str(dbg); + cfree_dwarf_free(dbg); +} + +static void dump_obj(const CfreeContext* ctx, const CfreeDisasmContext* dctx, + const char* label, CfreeObjFile* f, + const ObjdumpOpts* opts) { CfreeTarget target = cfree_obj_target(f); CfreeObjFmt fmt = cfree_obj_fmt(f); @@ -1040,6 +1319,7 @@ static void dump_obj(const CfreeDisasmContext* dctx, const char* label, if (opts->s) dump_hex(f, opts); if (opts->d || opts->D) dump_disasm(dctx, f, opts); if (opts->r) dump_relocs(f, opts); + if (opts->dwarf) dump_dwarf(ctx, f, opts); } static int dump_archive(const char* path, const CfreeSlice* input, @@ -1082,7 +1362,7 @@ static int dump_archive(const char* path, const CfreeSlice* input, CFREE_SLICE_ARG(cfree_slice_cstr(label))); continue; } - dump_obj(dctx, label, f, opts); + dump_obj(ctx, dctx, label, f, opts); cfree_obj_free(f); } cfree_ar_iter_free(it); @@ -1171,6 +1451,39 @@ static int parse_long_flag(const char* arg, ObjdumpOpts* o) { return 0; } +/* Match one comma-separated --dwarf section selector against `tok` of + * length `n`. Returns the OBJDUMP_DWARF_* bit, or 0 if unrecognized. */ +static unsigned dwarf_sel_bit(const char* tok, size_t n) { + if (n == 4 && memcmp(tok, "info", 4) == 0) return OBJDUMP_DWARF_INFO; + if (n == 6 && memcmp(tok, "abbrev", 6) == 0) return OBJDUMP_DWARF_ABBREV; + if (n == 4 && memcmp(tok, "line", 4) == 0) return OBJDUMP_DWARF_LINE; + if (n == 3 && memcmp(tok, "str", 3) == 0) return OBJDUMP_DWARF_STR; + return 0; +} + +/* Parse a `--dwarf` / `--dwarf=sec,sec` argument into o->dwarf. Returns 1 + * if `arg` was a dwarf flag (handled), 0 if not, -1 on a bad selector. */ +static int parse_dwarf_flag(const char* arg, ObjdumpOpts* o) { + const char *list, *p; + if (driver_streq(arg, "--dwarf")) { + o->dwarf = OBJDUMP_DWARF_ALL; + return 1; + } + if (strncmp(arg, "--dwarf=", 8) != 0) return 0; + list = arg + 8; + for (p = list; *p;) { + const char* start = p; + unsigned bit; + while (*p && *p != ',') ++p; + bit = dwarf_sel_bit(start, (size_t)(p - start)); + if (!bit) return -1; + o->dwarf |= bit; + if (*p == ',') ++p; + } + if (!o->dwarf) return -1; + return 1; +} + int driver_objdump(int argc, char** argv) { DriverEnv env; ObjdumpOpts opts = {0}; @@ -1213,6 +1526,17 @@ int driver_objdump(int argc, char** argv) { opts.j[opts.nj++] = argv[++i]; continue; } + { + int dr = parse_dwarf_flag(a, &opts); + if (dr < 0) { + driver_errf(OBJDUMP_TOOL, "%.*s", + CFREE_SLICE_ARG(CFREE_SLICE_LIT( + "--dwarf sections: info, abbrev, line, str"))); + rc = 2; + goto done; + } + if (dr > 0) continue; + } if (parse_long_flag(a, &opts)) continue; if (parse_short_flags(a, &opts) != 0) { objdump_usage(); @@ -1222,7 +1546,7 @@ int driver_objdump(int argc, char** argv) { } saw_op = opts.f || opts.h || opts.t || opts.d || opts.D || opts.r || opts.s || - opts.p; + opts.p || opts.dwarf != 0; if (!saw_op) { /* Default = -h -t (matches the prior behavior). */ opts.h = 1; opts.t = 1; @@ -1305,7 +1629,7 @@ int driver_objdump(int argc, char** argv) { rc = 1; } } else { - dump_obj(dctx_p, a, f, &opts); + dump_obj(&ctx, dctx_p, a, f, &opts); if (opts.p && bin == CFREE_BIN_PE) { dump_pe_private(a, input.data, input.len); } diff --git a/driver/strings.c b/driver/strings.c @@ -0,0 +1,260 @@ +#include <cfree/core.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "driver.h" +#include "env.h" + +/* `cfree strings` — print sequences of printable characters found in a + * file. Unlike most tools here it is format-agnostic: it scans the raw + * bytes of any input (objects, archives, or arbitrary data), matching + * GNU strings' default whole-file behaviour. All display logic lives + * here; the driver env supplies the path/stdin byte loaders. */ + +#define STRINGS_TOOL "strings" + +#define STRINGS_DEFAULT_MIN 4 + +typedef enum StringsRadix { + STRINGS_RAD_NONE, /* default: no offset prefix */ + STRINGS_RAD_OCT, /* -t o */ + STRINGS_RAD_DEC, /* -t d */ + STRINGS_RAD_HEX, /* -t x */ +} StringsRadix; + +typedef struct StringsOpts { + size_t min_len; /* -n N / --bytes=N / -N ; default STRINGS_DEFAULT_MIN */ + StringsRadix radix; /* -t {o,d,x} */ + int print_filename; /* -f / --print-file-name */ +} StringsOpts; + +/* A byte is "printable" for the purposes of a run if it is a graphic + * ASCII character (0x20..0x7e) or a horizontal tab, matching GNU + * strings' default (non-locale) classification. */ +static int strings_is_printable(uint8_t c) { + return (c >= 0x20 && c <= 0x7e) || c == '\t'; +} + +static void strings_emit(const char* path, const StringsOpts* opts, + size_t offset, const uint8_t* s, size_t len) { + if (opts->print_filename && path) driver_printf("%s: ", path); + switch (opts->radix) { + case STRINGS_RAD_OCT: + driver_printf("%7llo ", (unsigned long long)offset); + break; + case STRINGS_RAD_DEC: + driver_printf("%7llu ", (unsigned long long)offset); + break; + case STRINGS_RAD_HEX: + driver_printf("%7llx ", (unsigned long long)offset); + break; + default: + break; + } + driver_printf("%.*s\n", (int)len, (const char*)s); +} + +/* Scan `data` for printable runs of at least opts->min_len bytes. */ +static void strings_scan(const char* path, const StringsOpts* opts, + const uint8_t* data, size_t size) { + size_t i = 0; + size_t run_start = 0; + size_t run_len = 0; + for (i = 0; i < size; ++i) { + if (strings_is_printable(data[i])) { + if (run_len == 0) run_start = i; + ++run_len; + continue; + } + if (run_len >= opts->min_len) { + strings_emit(path, opts, run_start, data + run_start, run_len); + } + run_len = 0; + } + if (run_len >= opts->min_len) { + strings_emit(path, opts, run_start, data + run_start, run_len); + } +} + +void driver_help_strings(void) { + driver_printf( + "%.*s", + CFREE_SLICE_ARG(CFREE_SLICE_LIT( + "cfree strings — print printable character sequences in a file\n" + "\n" + "USAGE\n" + " cfree strings [OPTIONS] [FILE...]\n" + "\n" + "DESCRIPTION\n" + " Scans the raw bytes of each input and prints runs of at least\n" + " N printable characters (default 4). Works on any file, not\n" + " just object files. With no FILE, or with `-`, reads stdin.\n" + "\n" + "OPTIONS\n" + " -n N, --bytes=N minimum run length (default 4)\n" + " -N shorthand for -n N (e.g. -8)\n" + " -t {o,d,x} print the byte offset of each string in\n" + " octal / decimal / hexadecimal\n" + " -f, --print-file-name\n" + " print the input file name before each string\n" + " -h, --help show this help\n" + "\n" + "EXIT CODES\n" + " 0 success 1 I/O error 2 bad usage\n"))); +} + +/* Parse the argument of -n / --bytes=. Returns 0 and stores the value on + * success, or non-zero on a malformed/zero value. */ +static int strings_parse_min(const char* s, size_t* out) { + size_t v = 0; + if (!s || !*s) return 1; + for (; *s; ++s) { + if (*s < '0' || *s > '9') return 1; + v = v * 10u + (size_t)(*s - '0'); + } + if (v == 0) return 1; + *out = v; + return 0; +} + +int driver_strings(int argc, char** argv) { + DriverEnv env; + StringsOpts opts; + int i, rc = 1, any_input = 0; + + if (driver_argv_wants_help(argc, argv, 1)) { + driver_help_strings(); + return 0; + } + + memset(&opts, 0, sizeof opts); + opts.min_len = STRINGS_DEFAULT_MIN; + opts.radix = STRINGS_RAD_NONE; + driver_env_init(&env); + + /* First pass: options. */ + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + if (driver_streq(a, "-n") || driver_streq(a, "--bytes")) { + if (i + 1 >= argc || strings_parse_min(argv[++i], &opts.min_len) != 0) { + driver_errf(STRINGS_TOOL, "option %s requires a positive integer", a); + rc = 2; + goto done; + } + continue; + } + if (strncmp(a, "--bytes=", 8) == 0) { + if (strings_parse_min(a + 8, &opts.min_len) != 0) { + driver_errf(STRINGS_TOOL, "invalid --bytes value: %s", a + 8); + rc = 2; + goto done; + } + continue; + } + if (driver_streq(a, "-t")) { + const char* v; + if (i + 1 >= argc) { + driver_errf(STRINGS_TOOL, "option -t requires o, d, or x"); + rc = 2; + goto done; + } + v = argv[++i]; + if (driver_streq(v, "o")) { + opts.radix = STRINGS_RAD_OCT; + } else if (driver_streq(v, "d")) { + opts.radix = STRINGS_RAD_DEC; + } else if (driver_streq(v, "x")) { + opts.radix = STRINGS_RAD_HEX; + } else { + driver_errf(STRINGS_TOOL, "invalid -t radix: %s (want o, d, or x)", v); + rc = 2; + goto done; + } + continue; + } + if (driver_streq(a, "-f") || driver_streq(a, "--print-file-name")) { + opts.print_filename = 1; + continue; + } + /* `-<digits>` is GNU shorthand for `-n <digits>`. */ + if (a[0] == '-' && a[1] >= '0' && a[1] <= '9') { + if (strings_parse_min(a + 1, &opts.min_len) != 0) { + driver_errf(STRINGS_TOOL, "invalid length: %s", a); + rc = 2; + goto done; + } + continue; + } + if (driver_streq(a, "-")) { + any_input = 1; /* stdin, handled in second pass */ + continue; + } + if (a[0] == '-' && a[1] != '\0') { + driver_errf(STRINGS_TOOL, "unknown option: %s", a); + rc = 2; + goto done; + } + any_input = 1; + } + + /* No file operands: scan stdin. */ + if (!any_input) { + uint8_t* buf = NULL; + size_t n = 0; + if (!driver_read_stdin(&env, &buf, &n)) { + driver_errf(STRINGS_TOOL, "failed to read stdin"); + rc = 1; + goto done; + } + strings_scan(NULL, &opts, buf, n); + driver_free(&env, buf, n); + rc = 0; + goto done; + } + + /* Second pass: inputs, in argv order. */ + for (i = 1; i < argc; ++i) { + const char* a = argv[i]; + /* Skip the options consumed above (and their values). */ + if (driver_streq(a, "-n") || driver_streq(a, "--bytes") || + driver_streq(a, "-t")) { + ++i; + continue; + } + if (strncmp(a, "--bytes=", 8) == 0 || + driver_streq(a, "-f") || driver_streq(a, "--print-file-name") || + (a[0] == '-' && a[1] >= '0' && a[1] <= '9')) { + continue; + } + if (driver_streq(a, "-")) { + uint8_t* buf = NULL; + size_t n = 0; + if (!driver_read_stdin(&env, &buf, &n)) { + driver_errf(STRINGS_TOOL, "failed to read stdin"); + rc = 1; + goto done; + } + strings_scan(opts.print_filename ? "{standard input}" : NULL, &opts, buf, + n); + driver_free(&env, buf, n); + continue; + } + { + DriverLoad ld = {0}; + CfreeSlice input; + if (driver_load_bytes(&env.file_io, STRINGS_TOOL, a, &ld, &input) != 0) { + rc = 1; + goto done; + } + strings_scan(a, &opts, input.data, input.len); + driver_release_bytes(&env.file_io, &ld); + } + } + + rc = 0; +done: + driver_env_fini(&env); + return rc; +} diff --git a/include/cfree/config.h b/include/cfree/config.h @@ -90,5 +90,6 @@ #define CFREE_TOOL_NM_ENABLED 1 #define CFREE_TOOL_SIZE_ENABLED 1 #define CFREE_TOOL_ADDR2LINE_ENABLED 1 +#define CFREE_TOOL_STRINGS_ENABLED 1 #endif /* CFREE_CONFIG_H */ diff --git a/include/cfree/dwarf.h b/include/cfree/dwarf.h @@ -184,4 +184,149 @@ CFREE_API CfreeIterResult cfree_dwarf_param_iter_next(CfreeDwarfParamIter*, CfreeDwarfVar* out); CFREE_API void cfree_dwarf_param_iter_free(CfreeDwarfParamIter*); +/* ---- Structural enumeration (objdump --dwarf / dwarfdump) ------------- + * + * Unlike the PC/name-keyed query API above, these iterators expose the raw + * structure of the .debug_info / .debug_abbrev / .debug_line / .debug_str + * sections for tools that dump DWARF. Values are numeric DWARF codes + * (DW_TAG_*, DW_AT_*, DW_FORM_*); rendering them symbolically is the + * caller's job. All iterators borrow from the CfreeDebugInfo and are + * invalidated when it is freed. */ + +/* .debug_info compilation-unit headers, in section order. */ +typedef struct CfreeDwarfCuIter CfreeDwarfCuIter; +typedef struct CfreeDwarfCu { + uint32_t offset; /* offset of the CU header in .debug_info */ + uint32_t length; /* unit_length (bytes after the length field) */ + uint16_t version; + uint8_t address_size; + uint8_t unit_type; /* DW_UT_* (0 for pre-DWARF5 units) */ + uint32_t abbrev_offset; /* into .debug_abbrev */ + bool is_64bit; /* DWARF64 initial-length form */ +} CfreeDwarfCu; + +CFREE_API CfreeStatus cfree_dwarf_cu_iter_new(CfreeDebugInfo*, + CfreeDwarfCuIter** out); +CFREE_API CfreeIterResult cfree_dwarf_cu_iter_next(CfreeDwarfCuIter*, + CfreeDwarfCu* out); +CFREE_API void cfree_dwarf_cu_iter_free(CfreeDwarfCuIter*); + +/* .debug_info DIE tree, depth-first across every CU. `depth` 0 marks a CU + * root DIE; `cu_offset` ties each DIE back to its owning CU header. */ +typedef struct CfreeDwarfDieIter CfreeDwarfDieIter; +typedef struct CfreeDwarfDie { + uint32_t offset; /* absolute offset of the DIE in .debug_info */ + uint32_t cu_offset; /* owning CU header offset */ + uint32_t tag; /* DW_TAG_* */ + uint32_t depth; /* nesting level; 0 = CU root */ + bool has_children; +} CfreeDwarfDie; + +CFREE_API CfreeStatus cfree_dwarf_die_iter_new(CfreeDebugInfo*, + CfreeDwarfDieIter** out); +CFREE_API CfreeIterResult cfree_dwarf_die_iter_next(CfreeDwarfDieIter*, + CfreeDwarfDie* out); +CFREE_API void cfree_dwarf_die_iter_free(CfreeDwarfDieIter*); + +/* Decoded attributes of a single DIE, identified by its .debug_info + * offset (as returned in CfreeDwarfDie::offset). `form_class` says which + * value slot is meaningful. */ +typedef enum CfreeDwarfFormClass { + CFREE_DWARF_FC_UNKNOWN, + CFREE_DWARF_FC_UDATA, /* unsigned constant in `u` */ + CFREE_DWARF_FC_SDATA, /* signed constant in `s` */ + CFREE_DWARF_FC_ADDR, /* address in `u` */ + CFREE_DWARF_FC_REF, /* CU-relative or section DIE offset in `u` */ + CFREE_DWARF_FC_FLAG, /* boolean in `u` (0/1) */ + CFREE_DWARF_FC_STRING, /* resolved string in `str` */ + CFREE_DWARF_FC_BLOCK, /* raw bytes in `block`/`block_len` */ +} CfreeDwarfFormClass; + +typedef struct CfreeDwarfAttrIter CfreeDwarfAttrIter; +typedef struct CfreeDwarfAttr { + uint32_t attr; /* DW_AT_* */ + uint32_t form; /* DW_FORM_* */ + CfreeDwarfFormClass form_class; + uint64_t u; + int64_t s; + CfreeSlice str; + const uint8_t* block; + uint32_t block_len; +} CfreeDwarfAttr; + +CFREE_API CfreeStatus cfree_dwarf_attr_iter_new(CfreeDebugInfo*, + uint32_t die_offset, + CfreeDwarfAttrIter** out); +CFREE_API CfreeIterResult cfree_dwarf_attr_iter_next(CfreeDwarfAttrIter*, + CfreeDwarfAttr* out); +CFREE_API void cfree_dwarf_attr_iter_free(CfreeDwarfAttrIter*); + +/* .debug_abbrev: one item per abbreviation declaration. `table_offset` + * identifies the owning abbrev table (shared by CUs). */ +typedef struct CfreeDwarfAbbrevIter CfreeDwarfAbbrevIter; +typedef struct CfreeDwarfAbbrev { + uint32_t table_offset; + uint64_t code; + uint32_t tag; + bool has_children; +} CfreeDwarfAbbrev; + +CFREE_API CfreeStatus cfree_dwarf_abbrev_iter_new(CfreeDebugInfo*, + CfreeDwarfAbbrevIter** out); +CFREE_API CfreeIterResult cfree_dwarf_abbrev_iter_next(CfreeDwarfAbbrevIter*, + CfreeDwarfAbbrev* out); +CFREE_API void cfree_dwarf_abbrev_iter_free(CfreeDwarfAbbrevIter*); + +/* Attribute specifications of one abbrev (keyed by table_offset + code + * from CfreeDwarfAbbrev above). */ +typedef struct CfreeDwarfAbbrevAttrIter CfreeDwarfAbbrevAttrIter; +typedef struct CfreeDwarfAbbrevAttr { + uint32_t attr; + uint32_t form; + int64_t implicit_const; /* meaningful only for DW_FORM_implicit_const */ +} CfreeDwarfAbbrevAttr; + +CFREE_API CfreeStatus cfree_dwarf_abbrev_attr_iter_new( + CfreeDebugInfo*, uint32_t table_offset, uint64_t code, + CfreeDwarfAbbrevAttrIter** out); +CFREE_API CfreeIterResult cfree_dwarf_abbrev_attr_iter_next( + CfreeDwarfAbbrevAttrIter*, CfreeDwarfAbbrevAttr* out); +CFREE_API void cfree_dwarf_abbrev_attr_iter_free(CfreeDwarfAbbrevAttrIter*); + +/* .debug_line decoded rows for one CU (by CU header offset). */ +typedef struct CfreeDwarfLineIter CfreeDwarfLineIter; +typedef struct CfreeDwarfLineRow { + uint64_t address; + uint32_t file_index; + uint32_t line; + uint32_t column; + bool is_stmt; + bool end_sequence; +} CfreeDwarfLineRow; + +CFREE_API CfreeStatus cfree_dwarf_line_iter_new(CfreeDebugInfo*, + uint32_t cu_offset, + CfreeDwarfLineIter** out); +CFREE_API CfreeIterResult cfree_dwarf_line_iter_next(CfreeDwarfLineIter*, + CfreeDwarfLineRow* out); +CFREE_API void cfree_dwarf_line_iter_free(CfreeDwarfLineIter*); + +/* Resolve a row's file_index to its path within the same CU. */ +CFREE_API CfreeStatus cfree_dwarf_line_file(CfreeDebugInfo*, uint32_t cu_offset, + uint32_t file_index, + CfreeSlice* out); + +/* .debug_str raw string table, in offset order. */ +typedef struct CfreeDwarfStrIter CfreeDwarfStrIter; +typedef struct CfreeDwarfStr { + uint32_t offset; + CfreeSlice str; +} CfreeDwarfStr; + +CFREE_API CfreeStatus cfree_dwarf_str_iter_new(CfreeDebugInfo*, + CfreeDwarfStrIter** out); +CFREE_API CfreeIterResult cfree_dwarf_str_iter_next(CfreeDwarfStrIter*, + CfreeDwarfStr* out); +CFREE_API void cfree_dwarf_str_iter_free(CfreeDwarfStrIter*); + #endif diff --git a/mk/config.mk b/mk/config.mk @@ -48,3 +48,4 @@ CFREE_TOOL_EMU_ENABLED := $(call cfg_flag,CFREE_TOOL_EMU_ENABLED) CFREE_TOOL_NM_ENABLED := $(call cfg_flag,CFREE_TOOL_NM_ENABLED) CFREE_TOOL_SIZE_ENABLED := $(call cfg_flag,CFREE_TOOL_SIZE_ENABLED) CFREE_TOOL_ADDR2LINE_ENABLED := $(call cfg_flag,CFREE_TOOL_ADDR2LINE_ENABLED) +CFREE_TOOL_STRINGS_ENABLED := $(call cfg_flag,CFREE_TOOL_STRINGS_ENABLED) diff --git a/src/core/config_assert.c b/src/core/config_assert.c @@ -57,6 +57,7 @@ CFREE_ASSERT_BOOL(CFREE_TOOL_EMU_ENABLED); CFREE_ASSERT_BOOL(CFREE_TOOL_NM_ENABLED); CFREE_ASSERT_BOOL(CFREE_TOOL_SIZE_ENABLED); CFREE_ASSERT_BOOL(CFREE_TOOL_ADDR2LINE_ENABLED); +CFREE_ASSERT_BOOL(CFREE_TOOL_STRINGS_ENABLED); #undef CFREE_ASSERT_BOOL diff --git a/src/debug/dwarf_dump.c b/src/debug/dwarf_dump.c @@ -0,0 +1,481 @@ +/* dwarf_dump.c — structural enumeration of DWARF sections. + * + * Implements the cfree_dwarf_*_iter API used by `objdump --dwarf` and + * similar dumpers. Unlike the PC/name-keyed query API, these walk the raw + * .debug_info / .debug_abbrev / .debug_line / .debug_str structure and + * hand back numeric DWARF codes; symbolic rendering is the caller's job. + * + * The iterators are thin cursors over the consumer state already built by + * dwarf_open.c (CUs, abbrev tables) and dwarf_line.c (decoded line rows). + */ + +#include <cfree/dwarf.h> +#include <stddef.h> +#include <stdint.h> + +#include "core/core.h" +#include "core/heap.h" +#include "debug/dwarf_defs.h" +#include "debug/dwarf_internal.h" + +/* ---- shared helpers --------------------------------------------------- */ + +#define DW_ITER_ALLOC(d, T) \ + ((T*)(d)->h->alloc((d)->h, sizeof(T), _Alignof(T))) + +static void dw_iter_free(CfreeDebugInfo* d, void* p, size_t n) { + if (p) d->h->free(d->h, p, n); +} + +/* Find the CU index whose header sits at `cu_offset`, or UINT32_MAX. */ +static u32 dw_cu_index_at(CfreeDebugInfo* d, u32 cu_offset) { + u32 i; + for (i = 0; i < d->ncus; ++i) { + if (d->cus[i].hdr_offset == cu_offset) return i; + } + return UINT32_MAX; +} + +/* ---- CU iterator ------------------------------------------------------ */ + +struct CfreeDwarfCuIter { + CfreeDebugInfo* d; + u32 idx; +}; + +CfreeStatus cfree_dwarf_cu_iter_new(CfreeDebugInfo* d, CfreeDwarfCuIter** out) { + CfreeDwarfCuIter* it; + if (!out) return CFREE_INVALID; + *out = NULL; + if (!d) return CFREE_INVALID; + dw_parse_all_cus(d); + it = DW_ITER_ALLOC(d, CfreeDwarfCuIter); + if (!it) return CFREE_NOMEM; + it->d = d; + it->idx = 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_dwarf_cu_iter_next(CfreeDwarfCuIter* it, + CfreeDwarfCu* out) { + DwCu* cu; + if (!it || !out) return CFREE_ITER_ERROR; + if (it->idx >= it->d->ncus) return CFREE_ITER_END; + cu = &it->d->cus[it->idx++]; + out->offset = cu->hdr_offset; + out->length = cu->hdr_length; + out->version = cu->version; + out->address_size = cu->address_size; + out->unit_type = cu->unit_type; + out->abbrev_offset = cu->abbrev_offset; + out->is_64bit = cu->is_64bit != 0; + return CFREE_ITER_ITEM; +} + +void cfree_dwarf_cu_iter_free(CfreeDwarfCuIter* it) { + if (it) dw_iter_free(it->d, it, sizeof(*it)); +} + +/* ---- DIE iterator (depth-first across all CUs) ------------------------ */ + +struct CfreeDwarfDieIter { + CfreeDebugInfo* d; + u32 cu_idx; /* current CU */ + u32 off; /* cursor into .debug_info */ + u32 depth; /* current nesting level */ +}; + +CfreeStatus cfree_dwarf_die_iter_new(CfreeDebugInfo* d, + CfreeDwarfDieIter** out) { + CfreeDwarfDieIter* it; + if (!out) return CFREE_INVALID; + *out = NULL; + if (!d) return CFREE_INVALID; + dw_parse_all_cus(d); + it = DW_ITER_ALLOC(d, CfreeDwarfDieIter); + if (!it) return CFREE_NOMEM; + it->d = d; + it->cu_idx = 0; + it->depth = 0; + it->off = d->ncus ? d->cus[0].die_start_off : 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_dwarf_die_iter_next(CfreeDwarfDieIter* it, + CfreeDwarfDie* out) { + CfreeDebugInfo* d; + if (!it || !out) return CFREE_ITER_ERROR; + d = it->d; + for (;;) { + DwCu* cu; + DwDie die; + if (it->cu_idx >= d->ncus) return CFREE_ITER_END; + cu = &d->cus[it->cu_idx]; + if (!dw_read_die(d, cu, &it->off, &die)) { + /* Null entry or end-of-CU: close one level, or advance to the next + * CU when we have unwound back past the root. */ + if (it->depth > 0) { + it->depth--; + continue; + } + it->cu_idx++; + it->depth = 0; + if (it->cu_idx < d->ncus) it->off = d->cus[it->cu_idx].die_start_off; + continue; + } + out->offset = die.die_off; + out->cu_offset = cu->hdr_offset; + out->tag = die.abbrev ? die.abbrev->tag : 0; + out->depth = it->depth; + out->has_children = die.abbrev && die.abbrev->has_children; + dw_skip_die_attrs(d, cu, &die, &it->off); + if (die.abbrev && die.abbrev->has_children) it->depth++; + return CFREE_ITER_ITEM; + } +} + +void cfree_dwarf_die_iter_free(CfreeDwarfDieIter* it) { + if (it) dw_iter_free(it->d, it, sizeof(*it)); +} + +/* ---- DIE attribute iterator ------------------------------------------- */ + +struct CfreeDwarfAttrIter { + CfreeDebugInfo* d; + const DwCu* cu; + DwAbbrev* abbrev; + u32 attr_idx; /* index into abbrev->attrs */ + u32 off; /* running cursor in .debug_info */ +}; + +static CfreeDwarfFormClass dw_form_class(u32 form) { + switch (form) { + case DW_FORM_string: + case DW_FORM_strp: + case DW_FORM_line_strp: + case DW_FORM_strx: + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx3: + case DW_FORM_strx4: + return CFREE_DWARF_FC_STRING; + case DW_FORM_addr: + case DW_FORM_addrx: + case DW_FORM_addrx1: + case DW_FORM_addrx2: + case DW_FORM_addrx3: + case DW_FORM_addrx4: + return CFREE_DWARF_FC_ADDR; + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: + case DW_FORM_ref_addr: + case DW_FORM_ref_sig8: + case DW_FORM_ref_sup4: + case DW_FORM_ref_sup8: + return CFREE_DWARF_FC_REF; + case DW_FORM_sdata: + return CFREE_DWARF_FC_SDATA; + case DW_FORM_flag: + case DW_FORM_flag_present: + return CFREE_DWARF_FC_FLAG; + case DW_FORM_block: + case DW_FORM_block1: + case DW_FORM_block2: + case DW_FORM_block4: + case DW_FORM_exprloc: + case DW_FORM_data16: + return CFREE_DWARF_FC_BLOCK; + case DW_FORM_data1: + case DW_FORM_data2: + case DW_FORM_data4: + case DW_FORM_data8: + case DW_FORM_udata: + case DW_FORM_sec_offset: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + case DW_FORM_implicit_const: + return CFREE_DWARF_FC_UDATA; + default: + return CFREE_DWARF_FC_UNKNOWN; + } +} + +CfreeStatus cfree_dwarf_attr_iter_new(CfreeDebugInfo* d, uint32_t die_offset, + CfreeDwarfAttrIter** out) { + CfreeDwarfAttrIter* it; + DwCu* cu; + DwDie die; + u32 off; + if (!out) return CFREE_INVALID; + *out = NULL; + if (!d) return CFREE_INVALID; + dw_parse_all_cus(d); + cu = dw_cu_at_die_offset(d, die_offset); + if (!cu) return CFREE_NOT_FOUND; + off = die_offset; + if (!dw_read_die(d, cu, &off, &die) || !die.abbrev) return CFREE_NOT_FOUND; + it = DW_ITER_ALLOC(d, CfreeDwarfAttrIter); + if (!it) return CFREE_NOMEM; + it->d = d; + it->cu = cu; + it->abbrev = die.abbrev; + it->attr_idx = 0; + it->off = die.attrs_off; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_dwarf_attr_iter_next(CfreeDwarfAttrIter* it, + CfreeDwarfAttr* out) { + DwAbbrevAttr* spec; + DwAttrValue v; + if (!it || !out) return CFREE_ITER_ERROR; + if (it->attr_idx >= it->abbrev->nattrs) return CFREE_ITER_END; + spec = &it->abbrev->attrs[it->attr_idx++]; + dw_read_form(it->d, it->cu, spec->form, spec->implicit_const, &it->off, &v); + out->attr = spec->attr; + out->form = spec->form; + out->form_class = dw_form_class(spec->form); + out->u = v.u; + out->s = v.s; + out->str = v.str ? cfree_slice_cstr(v.str) : CFREE_SLICE_NULL; + out->block = v.block; + out->block_len = v.block_len; + return CFREE_ITER_ITEM; +} + +void cfree_dwarf_attr_iter_free(CfreeDwarfAttrIter* it) { + if (it) dw_iter_free(it->d, it, sizeof(*it)); +} + +/* ---- abbrev iterators ------------------------------------------------- */ + +struct CfreeDwarfAbbrevIter { + CfreeDebugInfo* d; + u32 tbl; /* index into d->abbrevs */ + u32 slot; /* index into table->abbrevs */ +}; + +/* Ensure every CU's abbrev table is materialized before walking them. */ +static void dw_ensure_abbrevs(CfreeDebugInfo* d) { + u32 i; + dw_parse_all_cus(d); + for (i = 0; i < d->ncus; ++i) dw_abbrev_get(d, d->cus[i].abbrev_offset); +} + +CfreeStatus cfree_dwarf_abbrev_iter_new(CfreeDebugInfo* d, + CfreeDwarfAbbrevIter** out) { + CfreeDwarfAbbrevIter* it; + if (!out) return CFREE_INVALID; + *out = NULL; + if (!d) return CFREE_INVALID; + dw_ensure_abbrevs(d); + it = DW_ITER_ALLOC(d, CfreeDwarfAbbrevIter); + if (!it) return CFREE_NOMEM; + it->d = d; + it->tbl = 0; + it->slot = 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_dwarf_abbrev_iter_next(CfreeDwarfAbbrevIter* it, + CfreeDwarfAbbrev* out) { + CfreeDebugInfo* d; + if (!it || !out) return CFREE_ITER_ERROR; + d = it->d; + while (it->tbl < d->nabbrevs) { + DwAbbrevTable* t = &d->abbrevs[it->tbl]; + if (it->slot >= t->nabbrevs) { + it->tbl++; + it->slot = 0; + continue; + } + { + DwAbbrev* a = &t->abbrevs[it->slot++]; + if (a->code == 0) continue; /* unused slot */ + out->table_offset = t->cu_abbrev_offset; + out->code = a->code; + out->tag = a->tag; + out->has_children = a->has_children != 0; + return CFREE_ITER_ITEM; + } + } + return CFREE_ITER_END; +} + +void cfree_dwarf_abbrev_iter_free(CfreeDwarfAbbrevIter* it) { + if (it) dw_iter_free(it->d, it, sizeof(*it)); +} + +struct CfreeDwarfAbbrevAttrIter { + CfreeDebugInfo* d; + DwAbbrev* abbrev; + u32 idx; +}; + +CfreeStatus cfree_dwarf_abbrev_attr_iter_new(CfreeDebugInfo* d, + uint32_t table_offset, + uint64_t code, + CfreeDwarfAbbrevAttrIter** out) { + CfreeDwarfAbbrevAttrIter* it; + DwAbbrevTable* t; + DwAbbrev* a; + if (!out) return CFREE_INVALID; + *out = NULL; + if (!d) return CFREE_INVALID; + dw_ensure_abbrevs(d); + t = dw_abbrev_get(d, table_offset); + if (!t) return CFREE_NOT_FOUND; + a = dw_abbrev_lookup(t, code); + if (!a) return CFREE_NOT_FOUND; + it = DW_ITER_ALLOC(d, CfreeDwarfAbbrevAttrIter); + if (!it) return CFREE_NOMEM; + it->d = d; + it->abbrev = a; + it->idx = 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_dwarf_abbrev_attr_iter_next(CfreeDwarfAbbrevAttrIter* it, + CfreeDwarfAbbrevAttr* out) { + DwAbbrevAttr* aa; + if (!it || !out) return CFREE_ITER_ERROR; + if (it->idx >= it->abbrev->nattrs) return CFREE_ITER_END; + aa = &it->abbrev->attrs[it->idx++]; + out->attr = aa->attr; + out->form = aa->form; + out->implicit_const = aa->implicit_const; + return CFREE_ITER_ITEM; +} + +void cfree_dwarf_abbrev_attr_iter_free(CfreeDwarfAbbrevAttrIter* it) { + if (it) dw_iter_free(it->d, it, sizeof(*it)); +} + +/* ---- line row iterator ------------------------------------------------ */ + +struct CfreeDwarfLineIter { + CfreeDebugInfo* d; + u32 cu_idx; + u32 row; +}; + +CfreeStatus cfree_dwarf_line_iter_new(CfreeDebugInfo* d, uint32_t cu_offset, + CfreeDwarfLineIter** out) { + CfreeDwarfLineIter* it; + u32 cu_idx; + if (!out) return CFREE_INVALID; + *out = NULL; + if (!d) return CFREE_INVALID; + dw_parse_all_cus(d); + cu_idx = dw_cu_index_at(d, cu_offset); + if (cu_idx == UINT32_MAX) return CFREE_NOT_FOUND; + if (!d->cus[cu_idx].has_stmt_list) return CFREE_NOT_FOUND; + if (!d->lines_built[cu_idx]) dw_build_line(d, cu_idx); + it = DW_ITER_ALLOC(d, CfreeDwarfLineIter); + if (!it) return CFREE_NOMEM; + it->d = d; + it->cu_idx = cu_idx; + it->row = 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_dwarf_line_iter_next(CfreeDwarfLineIter* it, + CfreeDwarfLineRow* out) { + DwLineProgram* lp; + DwLineRow* r; + if (!it || !out) return CFREE_ITER_ERROR; + lp = &it->d->lines_by_cu[it->cu_idx]; + if (it->row >= lp->nrows) return CFREE_ITER_END; + r = &lp->rows[it->row++]; + out->address = r->address; + out->file_index = r->file_index; + out->line = r->line; + out->column = r->column; + out->is_stmt = r->is_stmt != 0; + out->end_sequence = r->end_sequence != 0; + return CFREE_ITER_ITEM; +} + +void cfree_dwarf_line_iter_free(CfreeDwarfLineIter* it) { + if (it) dw_iter_free(it->d, it, sizeof(*it)); +} + +CfreeStatus cfree_dwarf_line_file(CfreeDebugInfo* d, uint32_t cu_offset, + uint32_t file_index, CfreeSlice* out) { + u32 cu_idx; + DwLineProgram* lp; + const char* path = NULL; + if (!out) return CFREE_INVALID; + *out = CFREE_SLICE_NULL; + if (!d) return CFREE_INVALID; + dw_parse_all_cus(d); + cu_idx = dw_cu_index_at(d, cu_offset); + if (cu_idx == UINT32_MAX) return CFREE_NOT_FOUND; + if (!d->cus[cu_idx].has_stmt_list) return CFREE_NOT_FOUND; + if (!d->lines_built[cu_idx]) dw_build_line(d, cu_idx); + lp = &d->lines_by_cu[cu_idx]; + if (file_index < lp->nfile_norm && lp->file_norm[file_index]) + path = lp->file_norm[file_index]; + else if (file_index < lp->nfiles) + path = lp->files[file_index].path; + if (!path) return CFREE_NOT_FOUND; + *out = cfree_slice_cstr(path); + return CFREE_OK; +} + +/* ---- .debug_str iterator ---------------------------------------------- */ + +struct CfreeDwarfStrIter { + CfreeDebugInfo* d; + u32 off; +}; + +CfreeStatus cfree_dwarf_str_iter_new(CfreeDebugInfo* d, + CfreeDwarfStrIter** out) { + CfreeDwarfStrIter* it; + if (!out) return CFREE_INVALID; + *out = NULL; + if (!d) return CFREE_INVALID; + it = DW_ITER_ALLOC(d, CfreeDwarfStrIter); + if (!it) return CFREE_NOMEM; + it->d = d; + it->off = 0; + *out = it; + return CFREE_OK; +} + +CfreeIterResult cfree_dwarf_str_iter_next(CfreeDwarfStrIter* it, + CfreeDwarfStr* out) { + CfreeDebugInfo* d; + const u8* base; + u32 size, start, n; + if (!it || !out) return CFREE_ITER_ERROR; + d = it->d; + base = d->str.data; + size = d->str.size; + if (!base || it->off >= size) return CFREE_ITER_END; + start = it->off; + n = 0; + while (it->off < size && base[it->off] != 0) { + it->off++; + n++; + } + if (it->off < size) it->off++; /* step over the NUL */ + out->offset = start; + out->str.s = (const char*)(base + start); + out->str.len = n; + return CFREE_ITER_ITEM; +} + +void cfree_dwarf_str_iter_free(CfreeDwarfStrIter* it) { + if (it) dw_iter_free(it->d, it, sizeof(*it)); +} diff --git a/src/debug/dwarf_open.c b/src/debug/dwarf_open.c @@ -394,6 +394,10 @@ static void cu_read_root_attrs(CfreeDebugInfo* d, DwCu* cu) { void dw_parse_all_cus(CfreeDebugInfo* d) { u32 off = 0; + /* Idempotent: a successful cfree_dwarf_open already populates d->cus, and + * the structural-enumeration iterators call this again. Re-parsing would + * append duplicate CUs, so bail once the table is built. */ + if (d->ncus) return; while (off < d->info.size) { DwCu cu; u32 next = dw_cu_parse_header(d, off, &cu); diff --git a/test/dwarf/dwarf_test.c b/test/dwarf/dwarf_test.c @@ -707,6 +707,164 @@ static void build_debug_sections(ByteBuf* abbrev, ByteBuf* info, ByteBuf* line, } } +/* ---- structural enumeration (objdump --dwarf) ----------------------- */ + +static void run_dump_tests(CfreeDebugInfo* di) { + /* CU iterator: the fixture has exactly one DWARF5 CU at offset 0. */ + uint32_t root_off = 0; + int have_root = 0; + { + CfreeDwarfCuIter* it = NULL; + CfreeDwarfCu cu; + int ncu = 0; + EXPECT(cfree_dwarf_cu_iter_new(di, &it) == CFREE_OK, "cu_iter_new failed"); + while (cfree_dwarf_cu_iter_next(it, &cu) == CFREE_ITER_ITEM) { + if (ncu == 0) { + EXPECT(cu.offset == 0, "cu offset expected 0, got 0x%x", cu.offset); + EXPECT(cu.version == 5, "cu version expected 5, got %u", + (unsigned)cu.version); + EXPECT(cu.address_size == 8, "cu addr_size expected 8, got %u", + (unsigned)cu.address_size); + } + ncu++; + } + EXPECT(ncu == 1, "expected 1 CU, got %d", ncu); + cfree_dwarf_cu_iter_free(it); + } + + /* DIE iterator: first DIE is the CU root (depth 0, compile_unit). We + * should also encounter a subprogram and a base_type along the way. */ + { + CfreeDwarfDieIter* it = NULL; + CfreeDwarfDie die; + int ndie = 0, saw_subprog = 0, saw_base = 0; + EXPECT(cfree_dwarf_die_iter_new(di, &it) == CFREE_OK, + "die_iter_new failed"); + while (cfree_dwarf_die_iter_next(it, &die) == CFREE_ITER_ITEM) { + if (ndie == 0) { + EXPECT(die.depth == 0, "root die depth expected 0, got %u", die.depth); + EXPECT(die.tag == DW_TAG_compile_unit, + "root die tag expected compile_unit (0x11), got 0x%x", die.tag); + root_off = die.offset; + have_root = 1; + } + if (die.tag == DW_TAG_subprogram) saw_subprog = 1; + if (die.tag == DW_TAG_base_type) saw_base = 1; + ndie++; + } + EXPECT(ndie >= 4, "expected several DIEs, got %d", ndie); + EXPECT(saw_subprog, "expected a DW_TAG_subprogram DIE"); + EXPECT(saw_base, "expected a DW_TAG_base_type DIE"); + cfree_dwarf_die_iter_free(it); + } + + /* Attribute iterator on the CU root: the fixture's compile_unit carries + * DW_AT_low_pc == 0x1000 and a string-valued DW_AT_name. */ + if (have_root) { + CfreeDwarfAttrIter* it = NULL; + CfreeDwarfAttr a; + int saw_low_pc = 0, saw_string = 0; + EXPECT(cfree_dwarf_attr_iter_new(di, root_off, &it) == CFREE_OK, + "attr_iter_new failed"); + while (cfree_dwarf_attr_iter_next(it, &a) == CFREE_ITER_ITEM) { + if (a.attr == DW_AT_low_pc) { + saw_low_pc = 1; + EXPECT(a.u == 0x1000, "root low_pc expected 0x1000, got 0x%llx", + (unsigned long long)a.u); + } + if (a.form_class == CFREE_DWARF_FC_STRING && a.str.len) saw_string = 1; + } + EXPECT(saw_low_pc, "root DIE missing DW_AT_low_pc"); + EXPECT(saw_string, "root DIE missing a string-valued attribute"); + cfree_dwarf_attr_iter_free(it); + } + + /* Abbrev iterator + per-abbrev attribute specs. */ + { + CfreeDwarfAbbrevIter* it = NULL; + CfreeDwarfAbbrev ab; + int nabbrev = 0, saw_cu_abbrev = 0; + uint32_t cu_code_tbl = 0; + uint64_t cu_code = 0; + EXPECT(cfree_dwarf_abbrev_iter_new(di, &it) == CFREE_OK, + "abbrev_iter_new failed"); + while (cfree_dwarf_abbrev_iter_next(it, &ab) == CFREE_ITER_ITEM) { + if (ab.tag == DW_TAG_compile_unit) { + saw_cu_abbrev = 1; + cu_code_tbl = ab.table_offset; + cu_code = ab.code; + EXPECT(ab.has_children, "compile_unit abbrev should have children"); + } + nabbrev++; + } + EXPECT(nabbrev >= 5, "expected several abbrevs, got %d", nabbrev); + EXPECT(saw_cu_abbrev, "expected a compile_unit abbrev"); + cfree_dwarf_abbrev_iter_free(it); + + if (saw_cu_abbrev) { + CfreeDwarfAbbrevAttrIter* ait = NULL; + CfreeDwarfAbbrevAttr aa; + int saw_name_spec = 0; + EXPECT(cfree_dwarf_abbrev_attr_iter_new(di, cu_code_tbl, cu_code, &ait) == + CFREE_OK, + "abbrev_attr_iter_new failed"); + while (cfree_dwarf_abbrev_attr_iter_next(ait, &aa) == CFREE_ITER_ITEM) { + if (aa.attr == DW_AT_name) saw_name_spec = 1; + } + EXPECT(saw_name_spec, "compile_unit abbrev missing DW_AT_name spec"); + cfree_dwarf_abbrev_attr_iter_free(ait); + } + } + + /* Line iterator: a row at address 0x1000 (line 10), and its file_index + * resolves to a path containing test.c. */ + { + CfreeDwarfLineIter* it = NULL; + CfreeDwarfLineRow row; + int saw_1000 = 0; + uint32_t file_at_1000 = 0; + EXPECT(cfree_dwarf_line_iter_new(di, 0, &it) == CFREE_OK, + "line_iter_new failed"); + while (cfree_dwarf_line_iter_next(it, &row) == CFREE_ITER_ITEM) { + if (row.address == 0x1000 && !row.end_sequence) { + saw_1000 = 1; + file_at_1000 = row.file_index; + EXPECT(row.line == 10, "row at 0x1000 expected line 10, got %u", + row.line); + } + } + EXPECT(saw_1000, "expected a line row at address 0x1000"); + cfree_dwarf_line_iter_free(it); + + if (saw_1000) { + CfreeSlice path = CFREE_SLICE_NULL; + EXPECT(cfree_dwarf_line_file(di, 0, file_at_1000, &path) == CFREE_OK, + "line_file lookup failed"); + EXPECT(path.s && strstr(path.s, "test.c") != NULL, + "line file should contain test.c, got %.*s", + CFREE_SLICE_ARG(path)); + } + } + + /* .debug_str iterator: the fixture interns these known strings. */ + { + CfreeDwarfStrIter* it = NULL; + CfreeDwarfStr s; + int saw_int = 0, saw_point = 0, saw_testc = 0; + EXPECT(cfree_dwarf_str_iter_new(di, &it) == CFREE_OK, + "str_iter_new failed"); + while (cfree_dwarf_str_iter_next(it, &s) == CFREE_ITER_ITEM) { + if (cfree_slice_eq_cstr(s.str, "int")) saw_int = 1; + if (cfree_slice_eq_cstr(s.str, "Point")) saw_point = 1; + if (cfree_slice_eq_cstr(s.str, "test.c")) saw_testc = 1; + } + EXPECT(saw_int && saw_point && saw_testc, + "expected int/Point/test.c in .debug_str (%d/%d/%d)", saw_int, + saw_point, saw_testc); + cfree_dwarf_str_iter_free(it); + } +} + /* ---- main ----------------------------------------------------------- */ static void run_tests(CfreeDebugInfo* di) { @@ -1015,6 +1173,21 @@ int main(void) { const uint8_t* obj_bytes = cfree_writer_mem_bytes(w, &obj_len); fprintf(stderr, "built obj: %zu bytes\n", obj_len); + /* Optional: dump the synthesized DWARF-bearing ELF to a file so the + * objdump driver test has a real fixture (cc -g is the usual source, but + * this keeps the fixture reproducible from a known byte layout). */ + { + const char* dump = getenv("CFREE_DWARF_WRITE_FIXTURE"); + if (dump && obj_bytes) { + FILE* fp = fopen(dump, "wb"); + if (fp) { + fwrite(obj_bytes, 1, obj_len, fp); + fclose(fp); + fprintf(stderr, "wrote fixture: %s\n", dump); + } + } + } + /* Re-open via the public API. */ CfreeSlice in; memset(&in, 0, sizeof in); @@ -1031,6 +1204,7 @@ int main(void) { "cfree_dwarf_open failed"); if (di) { run_tests(di); + run_dump_tests(di); cfree_dwarf_free(di); } cfree_obj_free(obj); diff --git a/test/objdump/dwarf/cases/01-dwarf-all.expected b/test/objdump/dwarf/cases/01-dwarf-all.expected @@ -0,0 +1,139 @@ +dwarf.o: file format elf64-arm64 + +.debug_info contents: + Compilation Unit @ offset 0x0: version 5, abbrev_offset 0x0, addr_size 8, length 0xce + <0xc> DW_TAG_compile_unit + DW_AT_name (DW_FORM_strp) = "test.c" + DW_AT_comp_dir (DW_FORM_strp) = "/proj" + DW_AT_language (DW_FORM_udata) = 0x1d + DW_AT_stmt_list (DW_FORM_sec_offset) = 0x0 + DW_AT_low_pc (DW_FORM_addr) = 0x1000 + DW_AT_high_pc (DW_FORM_data8) = 0x8 + <0x2a> DW_TAG_base_type + DW_AT_name (DW_FORM_strp) = "int" + DW_AT_byte_size (DW_FORM_data1) = 0x4 + DW_AT_encoding (DW_FORM_data1) = 0x5 + <0x31> DW_TAG_pointer_type + DW_AT_byte_size (DW_FORM_data1) = 0x8 + DW_AT_type (DW_FORM_ref4) = 0x2a + <0x37> DW_TAG_typedef + DW_AT_name (DW_FORM_strp) = "my_int" + DW_AT_type (DW_FORM_ref4) = 0x2a + <0x40> DW_TAG_array_type + DW_AT_type (DW_FORM_ref4) = 0x2a + <0x45> DW_TAG_subrange_type + DW_AT_count (DW_FORM_data1) = 0x4 + <0x48> DW_TAG_structure_type + DW_AT_name (DW_FORM_strp) = "Point" + DW_AT_byte_size (DW_FORM_data1) = 0x8 + <0x4e> DW_TAG_member + DW_AT_name (DW_FORM_strp) = "x_field" + DW_AT_type (DW_FORM_ref4) = 0x2a + DW_AT_data_member_location (DW_FORM_udata) = 0x0 + <0x58> DW_TAG_member + DW_AT_name (DW_FORM_strp) = "y_field" + DW_AT_type (DW_FORM_ref4) = 0x2a + DW_AT_data_member_location (DW_FORM_udata) = 0x4 + <0x63> DW_TAG_subprogram + DW_AT_name (DW_FORM_strp) = "test_main" + DW_AT_low_pc (DW_FORM_addr) = 0x1000 + DW_AT_high_pc (DW_FORM_data8) = 0x8 + DW_AT_frame_base (DW_FORM_exprloc) = 1 byte block: 9c + DW_AT_decl_file (DW_FORM_udata) = 0x1 + DW_AT_decl_line (DW_FORM_udata) = 0x9 + <0x7c> DW_TAG_variable + DW_AT_name (DW_FORM_strp) = "x" + DW_AT_type (DW_FORM_ref4) = 0x2a + DW_AT_location (DW_FORM_exprloc) = 2 byte block: 91 70 + <0x88> DW_TAG_variable + DW_AT_name (DW_FORM_strp) = "y" + DW_AT_type (DW_FORM_ref4) = 0x2a + DW_AT_location (DW_FORM_exprloc) = 2 byte block: 91 78 + <0x94> DW_TAG_formal_parameter + DW_AT_name (DW_FORM_strp) = "arg" + DW_AT_type (DW_FORM_ref4) = 0x2a + DW_AT_location (DW_FORM_exprloc) = 1 byte block: 50 + <0x9f> DW_TAG_variable + DW_AT_name (DW_FORM_strp) = "my_int" + DW_AT_type (DW_FORM_ref4) = 0x31 + DW_AT_location (DW_FORM_exprloc) = 2 byte block: 91 68 + <0xab> DW_TAG_variable + DW_AT_name (DW_FORM_strp) = "Point" + DW_AT_type (DW_FORM_ref4) = 0x37 + DW_AT_location (DW_FORM_exprloc) = 2 byte block: 91 60 + <0xb7> DW_TAG_variable + DW_AT_name (DW_FORM_strp) = "x_field" + DW_AT_type (DW_FORM_ref4) = 0x40 + DW_AT_location (DW_FORM_exprloc) = 2 byte block: 91 40 + <0xc3> DW_TAG_variable + DW_AT_name (DW_FORM_strp) = "y_field" + DW_AT_type (DW_FORM_ref4) = 0x48 + DW_AT_location (DW_FORM_exprloc) = 3 byte block: 91 b8 7f + +.debug_abbrev contents: + Abbrev table @ offset 0x0: + [1] DW_TAG_compile_unit [has children] + DW_AT_name DW_FORM_strp + DW_AT_comp_dir DW_FORM_strp + DW_AT_language DW_FORM_udata + DW_AT_stmt_list DW_FORM_sec_offset + DW_AT_low_pc DW_FORM_addr + DW_AT_high_pc DW_FORM_data8 + [2] DW_TAG_subprogram [has children] + DW_AT_name DW_FORM_strp + DW_AT_low_pc DW_FORM_addr + DW_AT_high_pc DW_FORM_data8 + DW_AT_frame_base DW_FORM_exprloc + DW_AT_decl_file DW_FORM_udata + DW_AT_decl_line DW_FORM_udata + [3] DW_TAG_base_type [no children] + DW_AT_name DW_FORM_strp + DW_AT_byte_size DW_FORM_data1 + DW_AT_encoding DW_FORM_data1 + [4] DW_TAG_variable [no children] + DW_AT_name DW_FORM_strp + DW_AT_type DW_FORM_ref4 + DW_AT_location DW_FORM_exprloc + [5] DW_TAG_formal_parameter [no children] + DW_AT_name DW_FORM_strp + DW_AT_type DW_FORM_ref4 + DW_AT_location DW_FORM_exprloc + [6] DW_TAG_pointer_type [no children] + DW_AT_byte_size DW_FORM_data1 + DW_AT_type DW_FORM_ref4 + [7] DW_TAG_typedef [no children] + DW_AT_name DW_FORM_strp + DW_AT_type DW_FORM_ref4 + [8] DW_TAG_array_type [has children] + DW_AT_type DW_FORM_ref4 + [9] DW_TAG_subrange_type [no children] + DW_AT_count DW_FORM_data1 + [10] DW_TAG_structure_type [has children] + DW_AT_name DW_FORM_strp + DW_AT_byte_size DW_FORM_data1 + [11] DW_TAG_member [no children] + DW_AT_name DW_FORM_strp + DW_AT_type DW_FORM_ref4 + DW_AT_data_member_location DW_FORM_udata + +.debug_line contents: + CU @ offset 0x0: + Address File Line Col Flags + 0x0000000000001000 0 10 0 stmt + 0x0000000000001004 0 11 0 stmt + 0x0000000000001004 0 11 0 stmt end_seq + +.debug_str contents: + 0x0 "" + 0x1 "test.c" + 0x8 "/proj" + 0xe "test_main" + 0x18 "int" + 0x1c "x" + 0x1e "y" + 0x20 "arg" + 0x24 "my_int" + 0x2b "Point" + 0x31 "x_field" + 0x39 "y_field" + diff --git a/test/objdump/dwarf/cases/01-dwarf-all.sh b/test/objdump/dwarf/cases/01-dwarf-all.sh @@ -0,0 +1,8 @@ +# `objdump --dwarf` over a committed DWARF5 fixture (a hand-built ELF with +# .debug_info / .debug_abbrev / .debug_line / .debug_str; see +# test/dwarf/dwarf_test.c, regenerable via CFREE_DWARF_WRITE_FIXTURE). The +# usual source is `cc -g`, but a committed fixture keeps this golden stable +# and decoupled from the code generator. Copied into the sandbox under a +# fixed name so the file-format label is deterministic. +cp "$(dirname "$0")/dwarf.o" dwarf.o +"$CFREE" objdump --dwarf dwarf.o diff --git a/test/objdump/dwarf/cases/dwarf.o b/test/objdump/dwarf/cases/dwarf.o Binary files differ. diff --git a/test/strings/cases/01-default-min.expected b/test/strings/cases/01-default-min.expected @@ -0,0 +1,2 @@ +Hello +longword12 diff --git a/test/strings/cases/01-default-min.sh b/test/strings/cases/01-default-min.sh @@ -0,0 +1,5 @@ +# Default minimum run length is 4: "ab" and "xy" (2 chars each) are +# dropped; "Hello" and "longword12" survive. NUL and control bytes +# terminate a run. +printf 'ab\000Hello\000\001\002longword12\000xy' > f.bin +"$CFREE" strings f.bin diff --git a/test/strings/cases/02-min-length.expected b/test/strings/cases/02-min-length.expected @@ -0,0 +1 @@ +longword12 diff --git a/test/strings/cases/02-min-length.sh b/test/strings/cases/02-min-length.sh @@ -0,0 +1,4 @@ +# -n 6 raises the threshold: "Hello" (5) is now dropped too, leaving +# only "longword12" (10). +printf 'ab\000Hello\000\001\002longword12\000xy' > f.bin +"$CFREE" strings -n 6 f.bin diff --git a/test/strings/cases/03-offset-hex.expected b/test/strings/cases/03-offset-hex.expected @@ -0,0 +1,2 @@ + 3 Hello + b longword12 diff --git a/test/strings/cases/03-offset-hex.sh b/test/strings/cases/03-offset-hex.sh @@ -0,0 +1,4 @@ +# -t x prefixes each string with its hex byte offset. "Hello" starts at +# offset 3, "longword12" at offset 11 (0xb). +printf 'ab\000Hello\000\001\002longword12\000xy' > f.bin +"$CFREE" strings -t x f.bin diff --git a/test/strings/cases/04-stdin.expected b/test/strings/cases/04-stdin.expected @@ -0,0 +1,2 @@ +tiny +pipedinput diff --git a/test/strings/cases/04-stdin.sh b/test/strings/cases/04-stdin.sh @@ -0,0 +1,2 @@ +# With no file operand, strings reads stdin. +printf 'tiny\000\001pipedinput\000zz' | "$CFREE" strings diff --git a/test/strings/run.sh b/test/strings/run.sh @@ -0,0 +1,69 @@ +#!/bin/sh +# Driver-level `cfree strings` test harness. Same shape as test/strip/run.sh. + +set -u + +script_dir=$(cd "$(dirname "$0")" && pwd) +repo_root=$(cd "$script_dir/../.." && pwd) +cases_dir="$script_dir/cases" + +CFREE="${CFREE:-$repo_root/build/cfree}" +export CFREE + +if [ ! -x "$CFREE" ]; then + echo "strings-driver: cfree binary not found at $CFREE" >&2 + exit 2 +fi + +work_root=$(mktemp -d "${TMPDIR:-/tmp}/cfree-strings-test.XXXXXX") +trap 'rm -rf "$work_root"' EXIT + +pass=0 +fail=0 +failures= + +for sh in "$cases_dir"/*.sh; do + [ -e "$sh" ] || continue + name=$(basename "${sh%.sh}") + expected="${sh%.sh}.expected" + actual="$work_root/$name.actual" + + if [ ! -e "$expected" ]; then + printf 'FAIL %s (missing %s)\n' "$name" "$(basename "$expected")" + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + sandbox="$work_root/$name" + mkdir -p "$sandbox" + ( cd "$sandbox" && sh "$sh" ) > "$actual" 2>&1 + case_rc=$? + + if [ "$case_rc" -ne 0 ]; then + printf 'FAIL %s (script exit=%d)\n' "$name" "$case_rc" + diff -u "$expected" "$actual" || true + fail=$((fail + 1)) + failures="$failures $name" + continue + fi + + if diff -u "$expected" "$actual" >/dev/null 2>&1; then + printf 'PASS %s\n' "$name" + pass=$((pass + 1)) + else + printf 'FAIL %s\n' "$name" + diff -u "$expected" "$actual" || true + cp "$actual" "$cases_dir/$name.actual" 2>/dev/null || true + fail=$((fail + 1)) + failures="$failures $name" + fi +done + +total=$((pass + fail)) +if [ "$fail" -gt 0 ]; then + printf '\nstrings-driver: failures:%s\n' "$failures" + printf 'strings-driver: %d/%d passed\n' "$pass" "$total" + exit 1 +fi +printf '\nstrings-driver: %d/%d passed\n' "$pass" "$total" diff --git a/test/test.mk b/test/test.mk @@ -47,6 +47,7 @@ TEST_TARGETS = \ test-driver-cc \ test-driver-objcopy \ test-driver-objdump \ + test-driver-strings \ test-driver-strip \ test-dwarf \ test-elf \ @@ -110,7 +111,7 @@ DEFAULT_TEST_TARGETS = \ test: $(DEFAULT_TEST_TARGETS) -test-driver: test-driver-cc test-driver-ar test-driver-strip test-driver-objcopy test-driver-objdump +test-driver: test-driver-cc test-driver-ar test-driver-strip test-driver-objcopy test-driver-objdump test-driver-strings test-driver-cc: bin @CFREE=$(abspath $(BIN)) sh test/driver/run.sh @@ -184,6 +185,9 @@ test-driver-objcopy: bin test-driver-objdump: bin @CFREE=$(abspath $(BIN)) sh test/objdump/run.sh +test-driver-strings: bin + @CFREE=$(abspath $(BIN)) sh test/strings/run.sh + # DWARF consumer unit test: builds a hand-crafted DWARF-bearing ELF in # memory and exercises every cfree_dwarf_* entry. It reaches into the # internal object builder to synthesize the fixture, so link individual