commit 21ac717e705ac204fb507e83fa04f467397a494a
parent b3f14e4ecb76fed2930be28041e7e68a692b073f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 5 Jun 2026 09:49:29 -0700
Implement kit symbolize (BACKTRACE L3a/WS5)
Add `kit symbolize`, the hosted batching symbolizer that matches kit's
actual backtrace artifact: it reads the raw "#N 0x<hex>" stream that
__kit_print_backtrace emits, finds the address on each line, resolves it
through the existing DWARF reader, and rewrites the line in place as
#0 0x401136 bt_leaf at addr2line_prog.c:51:3
preserving the "#N" framing that addr2line structurally can't keep.
Lines with no 0x<hex> token pass through verbatim, so a mixed panic log
(banner + frames + register dump) survives unharmed.
addr2line stays a faithful GNU/LLVM clone (bare addresses in, file:line
out); its output contract is unchanged. The DWARF-open + func/line core
is factored into driver/lib/dwarfsym.c, shared by both tools, which now
differ only in formatting. symbolize is a non-default (GROUP_OTHER) tool
like mc/disas; a single -e today, with multi--e/module-map (for libc.so
frames that need their own load slide) as the natural extension.
Testing: test/rt/addr2line.sh (make target test-rt-backtrace) grows a
second lane per arch/opt — it pipes the captured stream into
`kit symbolize -e <exe>` and asserts each frame keeps its "#N 0x.." form
with "<func> at file:line" appended. aa64/x64/rv64 at O0/O1; the x64/O1
lane stays the documented pre-existing -g -O1 compiler red (cc aborts
before producing a binary, so it never reaches symbolization).
Diffstat:
12 files changed, 466 insertions(+), 83 deletions(-)
diff --git a/doc/plan/BACKTRACE.md b/doc/plan/BACKTRACE.md
@@ -1,6 +1,6 @@
# Plan: stack-trace builtins & runtime backtrace
-## Status — 2026-06-05 — L1 + L2 + L3a shipped (WS1–WS4); WS5/L3b remaining
+## Status — 2026-06-05 — L1 + L2 + L3a + `kit symbolize` shipped (WS1–WS4 + WS5 symbolizer); WS5/L3c + L3b remaining
L3a (WS4) is now shipped on top of L1/L2:
@@ -21,16 +21,30 @@ L3a (WS4) is now shipped on top of L1/L2:
through the same weak sink (printf-free). Pulling `__kit_assert_fail` therefore
also pulls `print_backtrace.o` → `backtrace.o` from the archive — the intended
wiring.
-- **Symbolization** is out-of-process via the existing `kit addr2line` — verified
- round-trip: a static non-PIE ELF prints its own trace at runtime, the captured
- addresses feed straight to `kit addr2line -f -e <image>`, resolving
- `bt_leaf`/`bt_mid`/`bt_root`/`test_main` (outer no-`-g` frames show `??`).
+- **Symbolization** is out-of-process via two hosted tools that share one
+ DWARF-open + func/line core (`driver/lib/dwarfsym.c`):
+ - `kit addr2line` — the faithful GNU/LLVM clone (bare addresses in,
+ `file:line` out), unchanged in contract.
+ - `kit symbolize` (`driver/cmd/symbolize.c`, **shipped**) — reads the raw
+ `#N 0x<hex>` stream `__kit_print_backtrace` emits, finds the address on each
+ line, resolves it through the same DWARF reader, and rewrites the line in
+ place as `#0 0x401136 bt_leaf at addr2line_prog.c:51:3`, keeping the `#N`
+ framing addr2line structurally can't. Lines with no address pass through
+ verbatim. A single `-e <image>` today; multi-`-e`/module-map (for `libc.so`
+ frames that need their own load slide) is the natural extension.
+ Verified round-trip: a static non-PIE ELF prints its own trace at runtime, and
+ the captured addresses resolve `bt_leaf`/`bt_mid`/`bt_root`/`test_main` through
+ both `kit addr2line -f -e <image>` and `kit symbolize -e <image>` (outer
+ no-`-g` frames show `??`).
Tests (L3a): `test/rt/cases/print_backtrace.c` (in-process parse of the emitted
`#N 0xADDR` lines, aa64/x64/rv64 under exec, exit 42) and `test/rt/addr2line.sh`
-+ `test/rt/addr2line_prog.c` (the `kit addr2line` round-trip, make target
-`test-rt-backtrace`). `test/rt/smoke.c` also includes `<kit/backtrace.h>` so the
-header compiles on every rt-header target.
++ `test/rt/addr2line_prog.c` (the symbolization round-trip, make target
+`test-rt-backtrace`). The round-trip script runs the captured stream through
+**both** lanes per arch/opt: `kit addr2line -f` over the bare addresses, and
+`kit symbolize` over the raw `#N 0xADDR` stream (asserting the `#N` framing is
+preserved and `<func> at file:line` is appended). `test/rt/smoke.c` also
+includes `<kit/backtrace.h>` so the header compiles on every rt-header target.
**Opt coverage — the backtrace path passes at O0 *and* O1 on all three arches.**
The rt-runtime corpus (`test/rt/run.sh`) and the addr2line round-trip
@@ -89,9 +103,13 @@ Nothing in L1/L2/L3a is outstanding. What's left is the rest of L3:
- ~~**WS4 — L3a:**~~ **done** (see Status) — `__kit_print_backtrace()` + weak
`__kit_backtrace_write` sink + assert-path hook + `kit addr2line` round-trip.
-- **WS5 — L3c (recommended next):** tool-side auto-backtrace in `kit run`/`kit
- emu`/`dbg` fault handlers (reuses the existing DWARF reader + `dbg bt`; never
- crosses into rt).
+- ~~**WS5 — `kit symbolize`:**~~ **done** (see Status) — the hosted batching
+ symbolizer that reads the `#N 0x<hex>` stream and annotates it in place,
+ sharing `driver/lib/dwarfsym.c` with `addr2line`. Tested by the second lane of
+ `test/rt/addr2line.sh`.
+- **WS5 — L3c (tool-side auto-backtrace, recommended next):** auto-print a
+ symbolized trace from `kit run`/`kit emu`/`dbg` fault handlers (reuses the
+ existing DWARF reader + `dbg bt`; never crosses into rt).
- **L3b:** in-process self-symbolization (hosted-only `libkit_bt.a`); deferred
until a concrete consumer needs in-binary symbolized panics.
@@ -285,9 +303,11 @@ shipping **L3a now**, leaving L3b/L3c as documented extensions.
via `__kit_backtrace`, and writes raw lines (`#0 0x401136`, …) to a
host-provided sink (the weak `__kit_backtrace_write(const char*, size_t)` the
host or `_start` wires to `write(2)`; freestanding default is a no-op).
- Symbolization is a separate step through the **existing** `kit addr2line` tool
- (a thin batching `kit symbolize` remains a possible future convenience). Zero
- new symbolization code, fully freestanding, matches how minimal panic handlers
+ Symbolization is a separate hosted step through either `kit addr2line` (bare
+ addresses) or `kit symbolize` (the raw `#N 0x<hex>` stream, annotated in place
+ — **shipped**; see Status). Both share the DWARF-open + func/line core in
+ `driver/lib/dwarfsym.c` and reuse the existing reader, so the freestanding
+ image carries zero new symbolization code, matching how minimal panic handlers
work in the wild.
- **L3b — in-process self-symbolization (hosted-only).** A trimmed line/func
@@ -308,9 +328,11 @@ shipping **L3a now**, leaving L3b/L3c as documented extensions.
### Tests (L3)
- L3a [done]: `test/rt/addr2line.sh` (+ `addr2line_prog.c`) runs a kit-compiled
- program that prints its own trace, then pipes the captured addresses through
- `kit addr2line -f`, asserting `bt_leaf`/`bt_mid`/`bt_root`/`test_main` appear
- (make target `test-rt-backtrace`, aa64/x64/rv64). In-process companion:
+ program that prints its own trace, then symbolizes the captured stream two
+ ways — `kit addr2line -f` over the bare addresses, and `kit symbolize` over the
+ raw `#N 0xADDR` stream (asserting the `#N` framing survives and `<func> at
+ file:line` is appended) — checking `bt_leaf`/`bt_mid`/`bt_root`/`test_main`
+ appear (make target `test-rt-backtrace`, aa64/x64/rv64). In-process companion:
`test/rt/cases/print_backtrace.c` parses the emitted `#N 0xADDR` lines.
- L3c: an `kit emu` fault test asserting a symbolized frame line on stderr.
@@ -325,8 +347,11 @@ shipping **L3a now**, leaving L3b/L3c as documented extensions.
to WS4 — it needs the L3 print fn).
4. **WS4 — L3a** raw print (`__kit_print_backtrace` + weak `__kit_backtrace_write`
sink) + `kit addr2line` round-trip; wire the assert hook. ✅ done.
-5. **WS5 — L3c** tool-side auto-backtrace (optional, parallelizable). ⏳ remaining (next).
-6. **L3b** deferred until a consumer needs in-binary symbolized panics.
+5. **WS5 — `kit symbolize`** hosted batching symbolizer over the `#N 0x<hex>`
+ stream, sharing `driver/lib/dwarfsym.c` with `addr2line`; second lane of
+ `test/rt/addr2line.sh`. ✅ done.
+6. **WS5 — L3c** tool-side auto-backtrace (optional, parallelizable). ⏳ remaining (next).
+7. **L3b** deferred until a consumer needs in-binary symbolized panics.
## Open questions
diff --git a/driver/cmd/addr2line.c b/driver/cmd/addr2line.c
@@ -6,6 +6,7 @@
#include <string.h>
#include "driver.h"
+#include "dwarfsym.h"
#define A2L_TOOL "addr2line"
@@ -27,33 +28,27 @@ static void a2l_strip_basename(const char** path_ptr) {
if (slash) *path_ptr = slash + 1;
}
-static void a2l_translate(KitDebugInfo* dwarf, uint64_t addr,
+static void a2l_translate(DriverDwarfSym* sym, uint64_t addr,
const A2lOpts* opts) {
- KitSlice file;
- uint32_t line = 0, col = 0;
- KitSlice func;
- uint64_t func_lo = 0, func_hi = 0;
- KitStatus st = kit_dwarf_addr_to_line(dwarf, addr, &file, &line, &col);
- int have_func = 0;
+ DriverSymLoc loc;
- if (opts->functions) {
- if (kit_dwarf_func_at(dwarf, addr, &func, &func_lo, &func_hi) == KIT_OK)
- have_func = 1;
- }
+ /* Share the DWARF open + func/line queries with `kit symbolize`; the two
+ * tools differ only in how they format the result. */
+ driver_dwarfsym_lookup(sym, addr, opts->functions, &loc);
if (opts->show_addr) driver_printf("0x%llx", (unsigned long long)addr);
if (opts->pretty) {
if (opts->show_addr) driver_printf(": ");
- if (have_func)
- driver_printf("%.*s at ", (int)func.len, func.s);
+ if (loc.have_func)
+ driver_printf("%.*s at ", (int)loc.func.len, loc.func.s);
else if (opts->functions)
driver_printf("?? at ");
- if (st == KIT_OK) {
- const char* f = file.s;
+ if (loc.have_line) {
+ const char* f = loc.file.s;
if (opts->basenames) a2l_strip_basename(&f);
- driver_printf("%s:%u", f, line);
- if (col) driver_printf(":%u", col);
+ driver_printf("%s:%u", f, loc.line);
+ if (loc.col) driver_printf(":%u", loc.col);
} else {
driver_printf("??:0");
}
@@ -65,17 +60,17 @@ static void a2l_translate(KitDebugInfo* dwarf, uint64_t addr,
if (opts->show_addr) driver_printf(": ");
if (opts->functions) {
- if (have_func)
- driver_printf("%.*s\n", (int)func.len, func.s);
+ if (loc.have_func)
+ driver_printf("%.*s\n", (int)loc.func.len, loc.func.s);
else
driver_printf("??\n");
}
- if (st == KIT_OK) {
- const char* f = file.s;
+ if (loc.have_line) {
+ const char* f = loc.file.s;
if (opts->basenames) a2l_strip_basename(&f);
- driver_printf("%s:%u", f, line);
- if (col) driver_printf(":%u", col);
+ driver_printf("%s:%u", f, loc.line);
+ if (loc.col) driver_printf(":%u", loc.col);
} else {
driver_printf("??:0");
}
@@ -106,13 +101,9 @@ void driver_help_addr2line(void) {
int driver_addr2line(int argc, char** argv) {
DriverEnv env;
- KitContext ctx;
A2lOpts opts;
- KitObjFile* of = NULL;
- KitDebugInfo* dwarf = NULL;
- DriverLoad ld = {0};
- KitSlice input;
- int i, rc = 1;
+ DriverDwarfSym sym;
+ int i, rc = 1, opened = 0;
int stdin_addr_count = 0;
if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
@@ -122,7 +113,6 @@ int driver_addr2line(int argc, char** argv) {
memset(&opts, 0, sizeof opts);
driver_env_init(&env);
- ctx = driver_env_to_context(&env);
for (i = 1; i < argc; ++i) {
const char* a = argv[i];
@@ -164,23 +154,10 @@ int driver_addr2line(int argc, char** argv) {
goto done;
}
- if (driver_load_bytes(&env.file_io, A2L_TOOL, opts.exe_path, &ld, &input) !=
- 0) {
- rc = 1;
- goto done;
- }
-
- {
- KitSlice name_slice = kit_slice_cstr(opts.exe_path);
- if (kit_obj_open(&ctx, name_slice, &input, &of) != KIT_OK) {
- driver_errf(A2L_TOOL, "%s: not a recognized object file", opts.exe_path);
- rc = 1;
- goto done;
- }
- }
-
- if (kit_dwarf_open(&ctx, of, &dwarf) != KIT_OK) {
- driver_errf(A2L_TOOL, "%s: no debug info available", opts.exe_path);
+ /* open() memsets `sym` before any fallible step, so once we are past this
+ * point driver_dwarfsym_close is always safe — even on a partial failure. */
+ opened = 1;
+ if (driver_dwarfsym_open(&sym, &env, A2L_TOOL, opts.exe_path) != 0) {
rc = 1;
goto done;
}
@@ -197,7 +174,7 @@ int driver_addr2line(int argc, char** argv) {
}
{
uint64_t addr = (uint64_t)strtoull(a, NULL, 16);
- a2l_translate(dwarf, addr, &opts);
+ a2l_translate(&sym, addr, &opts);
stdin_addr_count++;
}
}
@@ -209,7 +186,7 @@ int driver_addr2line(int argc, char** argv) {
if (n <= 0) break;
{
uint64_t addr = (uint64_t)strtoull(line, NULL, 16);
- a2l_translate(dwarf, addr, &opts);
+ a2l_translate(&sym, addr, &opts);
}
}
}
@@ -217,9 +194,7 @@ int driver_addr2line(int argc, char** argv) {
rc = 0;
done:
- if (dwarf) kit_dwarf_free(dwarf);
- if (of) kit_obj_free(of);
- if (ld.loaded) driver_release_bytes(&env.file_io, &ld);
+ if (opened) driver_dwarfsym_close(&sym);
driver_env_fini(&env);
return rc;
}
diff --git a/driver/cmd/symbolize.c b/driver/cmd/symbolize.c
@@ -0,0 +1,205 @@
+#include <kit/core.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "driver.h"
+#include "dwarfsym.h"
+
+/* kit symbolize — annotate a __kit_print_backtrace stream in place.
+ *
+ * Where `addr2line` is a faithful clone of the GNU/LLVM "addresses in,
+ * file:line out" contract, `symbolize` matches kit's actual backtrace
+ * artifact: it reads the raw "#N 0x<hex>" lines that __kit_print_backtrace
+ * writes (rt/lib/stack/print_backtrace.c), finds the address on each line,
+ * resolves it through the same DWARF reader (kit_dwarf_func_at +
+ * kit_dwarf_addr_to_line, via driver/lib/dwarfsym), and rewrites the line as
+ *
+ * #0 0x401136 bt_leaf at addr2line_prog.c:51:3
+ *
+ * preserving the original "#N" framing that addr2line structurally can't keep.
+ * Lines with no recognizable 0x<hex> token pass through verbatim, so a mixed
+ * log (banner + frames) survives unharmed. See doc/plan/BACKTRACE.md (WS5). */
+
+#define SYM_TOOL "symbolize"
+
+typedef struct SymOpts {
+ const char* exe_path;
+ int basenames; /* --basenames */
+} SymOpts;
+
+/* Find the first "0x"/"0X"-prefixed hex run in [s, s+len) and decode it.
+ * Returns 1 and stores the value in *out when a token with at least one hex
+ * digit is found, else 0. Works on a non-NUL-terminated span (the line is a
+ * slice of the stdin buffer), so it never reads past `len`. */
+static int sym_line_addr(const char* s, size_t len, uint64_t* out) {
+ size_t i;
+ for (i = 0; i + 1 < len; ++i) {
+ if (s[i] == '0' && (s[i + 1] == 'x' || s[i + 1] == 'X')) {
+ uint64_t v = 0;
+ size_t j = i + 2;
+ int any = 0;
+ for (; j < len; ++j) {
+ char c = s[j];
+ int d;
+ if (c >= '0' && c <= '9')
+ d = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ d = c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F')
+ d = c - 'A' + 10;
+ else
+ break;
+ v = (v << 4) | (uint64_t)d;
+ any = 1;
+ }
+ if (any) {
+ *out = v;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Print " <func> at <file>:<line>[:<col>]" for a resolved address, using the
+ * "??" / "??:0" placeholders addr2line's pretty mode uses for the missing
+ * halves. Mirrors a2l_translate's pretty path so the two tools render an
+ * unresolved frame identically. */
+static void sym_emit_annotation(const DriverSymLoc* loc, const SymOpts* opts) {
+ driver_printf(" ");
+ if (loc->have_func)
+ driver_printf("%.*s at ", (int)loc->func.len, loc->func.s);
+ else
+ driver_printf("?? at ");
+
+ if (loc->have_line) {
+ const char* f = loc->file.s;
+ if (opts->basenames) f = driver_basename(f);
+ driver_printf("%s:%u", f, loc->line);
+ if (loc->col) driver_printf(":%u", loc->col);
+ } else {
+ driver_printf("??:0");
+ }
+}
+
+void driver_help_symbolize(void) {
+ driver_printf(
+ "%.*s",
+ KIT_SLICE_ARG(KIT_SLICE_LIT(
+ "kit symbolize — annotate a kit backtrace stream with func at "
+ "file:line\n"
+ "\n"
+ "USAGE\n"
+ " <prog that prints a backtrace> | kit symbolize -e FILE\n"
+ " kit symbolize -e FILE < backtrace.txt\n"
+ "\n"
+ "OPTIONS\n"
+ " -e FILE object file with debug info (required)\n"
+ " --basenames strip directory from file paths\n"
+ " -h, --help show this help\n"
+ "\n"
+ "Reads the raw \"#N 0x<hex>\" lines that __kit_print_backtrace "
+ "writes\n"
+ "on standard input, resolves the address on each line via the "
+ "image's\n"
+ "debug info, and rewrites the line as\n"
+ " #0 0x401136 bt_leaf at file.c:51:3\n"
+ "keeping the original \"#N\" framing. Lines with no 0x<hex> address "
+ "pass\n"
+ "through unchanged. For a static non-PIE image the runtime "
+ "addresses\n"
+ "equal the link-time addresses, so no load-bias adjustment is "
+ "needed.\n")));
+}
+
+int driver_symbolize(int argc, char** argv) {
+ DriverEnv env;
+ SymOpts opts;
+ DriverDwarfSym sym;
+ uint8_t* data = NULL;
+ size_t size = 0;
+ int i, rc = 1, opened = 0;
+ size_t pos;
+
+ if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
+ driver_help_symbolize();
+ return 0;
+ }
+
+ memset(&opts, 0, sizeof opts);
+ driver_env_init(&env);
+
+ for (i = 1; i < argc; ++i) {
+ const char* a = argv[i];
+ if (driver_streq(a, "-e")) {
+ if (i + 1 >= argc) {
+ driver_errf(SYM_TOOL, "-e requires a path");
+ rc = 2;
+ goto done;
+ }
+ opts.exe_path = argv[++i];
+ continue;
+ }
+ if (driver_streq(a, "--basenames")) {
+ opts.basenames = 1;
+ continue;
+ }
+ if (a[0] == '-' && a[1] != '\0') {
+ driver_errf(SYM_TOOL, "unknown option: %s", a);
+ rc = 2;
+ goto done;
+ }
+ driver_errf(SYM_TOOL, "unexpected argument: %s", a);
+ rc = 2;
+ goto done;
+ }
+
+ if (!opts.exe_path) {
+ driver_errf(SYM_TOOL, "no object file specified (-e FILE)");
+ rc = 2;
+ goto done;
+ }
+
+ /* open() memsets `sym` before any fallible step, so once we are past this
+ * point driver_dwarfsym_close is always safe — even on a partial failure. */
+ opened = 1;
+ if (driver_dwarfsym_open(&sym, &env, SYM_TOOL, opts.exe_path) != 0) goto done;
+
+ if (!driver_read_stdin(&env, &data, &size)) {
+ driver_errf(SYM_TOOL, "failed to read backtrace stream from stdin");
+ goto done;
+ }
+
+ /* Emit each line verbatim, appending an annotation when it carries an
+ * address. Split on '\n'; a final line without a trailing newline is still
+ * processed (and printed without one). */
+ pos = 0;
+ while (pos < size) {
+ size_t start = pos;
+ size_t end = start;
+ uint64_t addr = 0;
+ while (end < size && data[end] != '\n') ++end;
+
+ driver_printf("%.*s", (int)(end - start), (const char*)(data + start));
+ if (sym_line_addr((const char*)(data + start), end - start, &addr)) {
+ DriverSymLoc loc;
+ driver_dwarfsym_lookup(&sym, addr, 1, &loc);
+ sym_emit_annotation(&loc, &opts);
+ }
+ if (end < size) {
+ driver_printf("\n");
+ pos = end + 1;
+ } else {
+ pos = end; /* last line had no terminator */
+ }
+ }
+
+ rc = 0;
+
+done:
+ if (data) driver_free(&env, data, size);
+ if (opened) driver_dwarfsym_close(&sym);
+ driver_env_fini(&env);
+ return rc;
+}
diff --git a/driver/driver.h b/driver/driver.h
@@ -56,6 +56,7 @@ int driver_emu(int argc, char** argv);
int driver_nm(int argc, char** argv);
int driver_size(int argc, char** argv);
int driver_addr2line(int argc, char** argv);
+int driver_symbolize(int argc, char** argv);
int driver_strings(int argc, char** argv);
int driver_cas(int argc, char** argv);
int driver_pkg(int argc, char** argv);
@@ -97,6 +98,7 @@ void driver_help_emu(void);
void driver_help_nm(void);
void driver_help_size(void);
void driver_help_addr2line(void);
+void driver_help_symbolize(void);
void driver_help_strings(void);
void driver_help_cas(void);
void driver_help_pkg(void);
diff --git a/driver/lib/dwarfsym.c b/driver/lib/dwarfsym.c
@@ -0,0 +1,70 @@
+#include "dwarfsym.h"
+
+#include <string.h>
+
+#include "driver.h"
+
+int driver_dwarfsym_open(DriverDwarfSym* s, DriverEnv* env, const char* tool,
+ const char* path) {
+ KitSlice input;
+
+ memset(s, 0, sizeof *s);
+ s->env = env;
+ s->ctx = driver_env_to_context(env);
+
+ if (driver_load_bytes(&env->file_io, tool, path, &s->ld, &input) != 0)
+ return 1;
+
+ {
+ KitSlice name_slice = kit_slice_cstr(path);
+ if (kit_obj_open(&s->ctx, name_slice, &input, &s->of) != KIT_OK) {
+ driver_errf(tool, "%s: not a recognized object file", path);
+ return 1;
+ }
+ }
+
+ if (kit_dwarf_open(&s->ctx, s->of, &s->dwarf) != KIT_OK) {
+ driver_errf(tool, "%s: no debug info available", path);
+ return 1;
+ }
+
+ return 0;
+}
+
+void driver_dwarfsym_close(DriverDwarfSym* s) {
+ if (!s) return;
+ if (s->dwarf) {
+ kit_dwarf_free(s->dwarf);
+ s->dwarf = NULL;
+ }
+ if (s->of) {
+ kit_obj_free(s->of);
+ s->of = NULL;
+ }
+ if (s->env && s->ld.loaded) driver_release_bytes(&s->env->file_io, &s->ld);
+}
+
+void driver_dwarfsym_lookup(DriverDwarfSym* s, uint64_t addr, int want_func,
+ DriverSymLoc* out) {
+ KitSlice file;
+ uint32_t line = 0, col = 0;
+
+ memset(out, 0, sizeof *out);
+
+ if (kit_dwarf_addr_to_line(s->dwarf, addr, &file, &line, &col) == KIT_OK) {
+ out->have_line = 1;
+ out->file = file;
+ out->line = line;
+ out->col = col;
+ }
+
+ if (want_func) {
+ KitSlice func;
+ uint64_t func_lo = 0, func_hi = 0;
+ if (kit_dwarf_func_at(s->dwarf, addr, &func, &func_lo, &func_hi) ==
+ KIT_OK) {
+ out->have_func = 1;
+ out->func = func;
+ }
+ }
+}
diff --git a/driver/lib/dwarfsym.h b/driver/lib/dwarfsym.h
@@ -0,0 +1,61 @@
+#ifndef KIT_DRIVER_DWARFSYM_H
+#define KIT_DRIVER_DWARFSYM_H
+
+#include <kit/core.h>
+#include <kit/dwarf.h>
+#include <kit/object.h>
+#include <stdint.h>
+
+#include "env.h"
+
+/* Shared address -> symbol core for the `addr2line` and `symbolize` tools.
+ *
+ * A DriverDwarfSym holds one image's loaded bytes plus its opened DWARF
+ * reader, so a tool opens the object once and translates many addresses.
+ * driver_dwarfsym_lookup runs the same kit_dwarf_func_at /
+ * kit_dwarf_addr_to_line queries both tools share and reports the result in a
+ * DriverSymLoc; each tool then formats that however it likes (addr2line one
+ * line per address; symbolize annotating a backtrace stream in place).
+ *
+ * Addresses are image-relative, matching the kit_dwarf_* contract. The of /
+ * dwarf borrow from the loaded bytes, so driver_dwarfsym_close frees in the
+ * reverse order (dwarf, then object, then the byte buffer). */
+
+typedef struct DriverDwarfSym {
+ DriverEnv* env;
+ KitContext ctx;
+ KitObjFile* of;
+ KitDebugInfo* dwarf;
+ DriverLoad ld;
+} DriverDwarfSym;
+
+/* Load `path` and open its DWARF, keyed off the (already-initialized) `env`.
+ * Returns 0 on success. On failure emits a diagnostic via driver_errf(tool,...)
+ * and returns 1; the caller must still call driver_dwarfsym_close to release
+ * any partial state. */
+int driver_dwarfsym_open(DriverDwarfSym* s, DriverEnv* env, const char* tool,
+ const char* path);
+
+/* Release the DWARF reader, object, and byte buffer. Idempotent; the `env`
+ * itself is owned by the caller and is left untouched. */
+void driver_dwarfsym_close(DriverDwarfSym* s);
+
+/* The resolved location for one address. `have_func` / `have_line` say which
+ * fields are valid; an unresolved query returns with both clear (and the
+ * slices empty). `func`, `file`, `line`, and `col` mirror kit_dwarf_func_at /
+ * kit_dwarf_addr_to_line outputs. */
+typedef struct DriverSymLoc {
+ int have_func;
+ KitSlice func;
+ int have_line;
+ KitSlice file;
+ uint32_t line;
+ uint32_t col;
+} DriverSymLoc;
+
+/* Translate one image-relative address. `want_func` gates the (separate)
+ * DW function-name query so callers that never print a name skip the work. */
+void driver_dwarfsym_lookup(DriverDwarfSym* s, uint64_t addr, int want_func,
+ DriverSymLoc* out);
+
+#endif /* KIT_DRIVER_DWARFSYM_H */
diff --git a/driver/main.c b/driver/main.c
@@ -119,6 +119,11 @@ static const DriverToolDesc driver_tools[] = {
"Translate addresses to file:line using debug info",
DRIVER_GROUP_TOOLCHAIN},
#endif
+#if KIT_TOOL_SYMBOLIZE_ENABLED
+ {"symbolize", driver_symbolize, driver_help_symbolize,
+ "Annotate a kit backtrace stream with func at file:line",
+ DRIVER_GROUP_OTHER},
+#endif
#if KIT_TOOL_STRINGS_ENABLED
{"strings", driver_strings, driver_help_strings,
"Print printable character sequences found in a file",
diff --git a/include/kit/config.h b/include/kit/config.h
@@ -122,6 +122,7 @@
#define KIT_TOOL_NM_ENABLED 1
#define KIT_TOOL_SIZE_ENABLED 1
#define KIT_TOOL_ADDR2LINE_ENABLED 1
+#define KIT_TOOL_SYMBOLIZE_ENABLED 1
#define KIT_TOOL_STRINGS_ENABLED 1
#define KIT_TOOL_CAS_ENABLED 1
#define KIT_TOOL_PKG_ENABLED 1
diff --git a/mk/driver_srcs.mk b/mk/driver_srcs.mk
@@ -37,6 +37,7 @@ DRIVER_TOOL_SRCS = \
$(call tool-cmd,NM,nm) \
$(call tool-cmd,SIZE,size) \
$(call tool-cmd,ADDR2LINE,addr2line) \
+ $(call tool-cmd,SYMBOLIZE,symbolize) \
$(call tool-cmd,STRINGS,strings) \
$(call tool-cmd,CAS,cas) \
$(call tool-cmd,PKG,pkg) \
@@ -70,6 +71,7 @@ DRIVER_SRCS += $(call need-any,AR RANLIB STRIP DBG RUN BUILD_EXE BUILD_LIB BUILD
DRIVER_SRCS += $(call need-any,RUN,driver/lib/wasm_run.c)
DRIVER_SRCS += $(call need-any,CC CHECK BUILD_EXE BUILD_LIB BUILD_OBJ,driver/lib/compile_engine.c)
DRIVER_SRCS += $(call need-any,CAS PKG,driver/lib/dist_host.c)
+DRIVER_SRCS += $(call need-any,ADDR2LINE SYMBOLIZE,driver/lib/dwarfsym.c)
DRIVER_SRCS := $(sort $(DRIVER_SRCS))
DRIVER_OBJS = $(patsubst driver/%.c,$(BUILD_DIR)/driver/%.o,$(DRIVER_SRCS))
diff --git a/mk/test.mk b/mk/test.mk
@@ -582,9 +582,11 @@ test-rt-runtime: bin $(RT_RUNTIME_DEPS) $(LINK_EXE_RUNNER)
@bash test/rt/run.sh
# L3a backtrace round-trip: run a kit-compiled program that prints its own
-# backtrace, then symbolize the captured addresses with `kit addr2line`. Same
-# per-arch deps as test-rt-runtime (each arch's linux rt archive + the Path-E
-# link runner). See test/rt/addr2line.sh and doc/plan/BACKTRACE.md (L3a).
+# backtrace, then symbolize the captured addresses two ways — `kit addr2line`
+# (bare addresses) and `kit symbolize` (the raw "#N 0xADDR" stream, annotated
+# in place). Same per-arch deps as test-rt-runtime (each arch's linux rt
+# archive + the Path-E link runner). See test/rt/addr2line.sh and
+# doc/plan/BACKTRACE.md (L3a / WS5).
test-rt-backtrace: bin $(RT_RUNTIME_DEPS) $(LINK_EXE_RUNNER)
@bash test/rt/addr2line.sh
diff --git a/src/core/config_assert.c b/src/core/config_assert.c
@@ -59,6 +59,7 @@ KIT_ASSERT_BOOL(KIT_TOOL_EMU_ENABLED);
KIT_ASSERT_BOOL(KIT_TOOL_NM_ENABLED);
KIT_ASSERT_BOOL(KIT_TOOL_SIZE_ENABLED);
KIT_ASSERT_BOOL(KIT_TOOL_ADDR2LINE_ENABLED);
+KIT_ASSERT_BOOL(KIT_TOOL_SYMBOLIZE_ENABLED);
KIT_ASSERT_BOOL(KIT_TOOL_STRINGS_ENABLED);
KIT_ASSERT_BOOL(KIT_TOOL_CAS_ENABLED);
KIT_ASSERT_BOOL(KIT_TOOL_PKG_ENABLED);
@@ -115,6 +116,8 @@ _Static_assert(!KIT_TOOL_SIZE_ENABLED || KIT_AR_ENABLED,
"KIT_TOOL_SIZE_ENABLED requires ar support");
_Static_assert(!KIT_TOOL_ADDR2LINE_ENABLED || KIT_DWARF_ENABLED,
"KIT_TOOL_ADDR2LINE_ENABLED requires DWARF support");
+_Static_assert(!KIT_TOOL_SYMBOLIZE_ENABLED || KIT_DWARF_ENABLED,
+ "KIT_TOOL_SYMBOLIZE_ENABLED requires DWARF support");
_Static_assert(!KIT_TOOL_CAS_ENABLED || KIT_CAS_ENABLED,
"KIT_TOOL_CAS_ENABLED requires CAS support");
_Static_assert(!KIT_TOOL_PKG_ENABLED || KIT_PKG_ENABLED,
diff --git a/test/rt/addr2line.sh b/test/rt/addr2line.sh
@@ -1,12 +1,19 @@
#!/usr/bin/env bash
-# test/rt/addr2line.sh — L3a backtrace round-trip.
+# test/rt/addr2line.sh — L3a backtrace round-trip (addr2line + symbolize).
#
# Compiles test/rt/addr2line_prog.c (which prints its own backtrace via
# __kit_print_backtrace), links a static non-PIE ELF, runs it to capture the
-# raw "#N 0xADDR" lines, then feeds those addresses to `kit addr2line -f` and
-# asserts the expected function names (bt_leaf / bt_mid / bt_root / test_main)
-# appear — proving the freestanding capture/print path emits addresses the
-# hosted DWARF tools resolve. See doc/plan/BACKTRACE.md (L3a).
+# raw "#N 0xADDR" lines, then symbolizes those addresses two ways, asserting
+# the expected function names (bt_leaf / bt_mid / bt_root / test_main) appear:
+#
+# * `kit addr2line -f` — the GNU-style "addresses in, file:line out" lane,
+# fed the bare addresses grepped from the stream.
+# * `kit symbolize` — fed the raw "#N 0xADDR" stream verbatim; it must
+# keep the "#N" framing and append "<func> at file:line" in place (the
+# "#0 0x401136 bt_leaf at addr2line_prog.c:51:3" shape).
+#
+# Together they prove the freestanding capture/print path emits addresses the
+# hosted DWARF tools resolve. See doc/plan/BACKTRACE.md (L3a / WS5).
#
# The per-arch wiring mirrors test/rt/run.sh's lane R: each arch maps to an
# <arch>-linux exec tuple, the matching build/rt/<triple>/libkit_rt.a, a clang
@@ -89,7 +96,7 @@ clang_extra_flags() {
WANT_FUNCS="bt_leaf bt_mid bt_root test_main"
run_one() { # <arch> <opt>
- local arch="$1" opt="$2" name="$1/O$2 round-trip"
+ local arch="$1" opt="$2" name="$1/O$2 round-trip" sym_name="$1/O$2 symbolize"
local triple rtlib extra work obj exe start_obj
triple="$(arch_triple "$arch")" || { not_ok "$name" "unknown arch"; return; }
rtlib="$(rt_archive "$arch")"
@@ -128,7 +135,7 @@ run_one() { # <arch> <opt>
not_ok "$name" "$work/run.diag"; return
fi
- # Captured backtrace lines -> addresses -> addr2line -f.
+ # --- Lane 1: kit addr2line -f over the bare captured addresses. ---
local addrs
addrs="$(grep -oE '0x[0-9a-fA-F]+' "$work/run.out" 2>/dev/null | tr '\n' ' ')"
if [ -z "$addrs" ]; then
@@ -147,9 +154,34 @@ run_one() { # <arch> <opt>
printf 'addr2line missing function(s):%s\naddresses: %s\nsymbolized:\n' \
"$missing" "$addrs" > "$work/sym.diag"
cat "$work/sym.out" >> "$work/sym.diag" 2>/dev/null
- not_ok "$name" "$work/sym.diag"; return
+ not_ok "$name" "$work/sym.diag"
+ else
+ ok "$name"
+ fi
+
+ # --- Lane 2: kit symbolize annotates the raw "#N 0xADDR" stream in place. ---
+ # The whole captured stream is piped in verbatim; symbolize must keep the
+ # "#N 0x<hex>" framing and append "<func> at file:line[:col]" on each frame
+ # line (lines without an address pass through untouched).
+ "$KIT" symbolize -e "$exe" <"$work/run.out" >"$work/symz.out" 2>"$work/symz.err"
+
+ local sym_missing=""
+ for fn in $WANT_FUNCS; do
+ # Require BOTH the preserved framing and the appended symbolization:
+ # #<n> 0x<hex> <fn> at <file>:<line>
+ grep -qE "^#[0-9]+ 0x[0-9a-fA-F]+ ${fn} at .+:[0-9]+" "$work/symz.out" \
+ || sym_missing="$sym_missing $fn"
+ done
+ if [ -n "$sym_missing" ]; then
+ printf 'symbolize missing annotated frame(s):%s\nstream:\n' \
+ "$sym_missing" > "$work/symz.diag"
+ cat "$work/run.out" >> "$work/symz.diag" 2>/dev/null
+ printf '\nsymbolized:\n' >> "$work/symz.diag"
+ cat "$work/symz.out" >> "$work/symz.diag" 2>/dev/null
+ not_ok "$sym_name" "$work/symz.diag"
+ else
+ ok "$sym_name"
fi
- ok "$name"
}
for arch in ${KIT_RT_RUNTIME_ARCHES:-aa64 x64 rv64}; do