commit b3f14e4ecb76fed2930be28041e7e68a692b073f
parent 5a7a304b5a1d6dd328cc662759c895feebb5340c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 5 Jun 2026 09:26:10 -0700
Implement __kit_print_backtrace & assert hook (BACKTRACE L3a/WS4)
Ship L3a of the backtrace roadmap: raw backtrace print + out-of-process
symbolization, plus the deferred assert-path hook.
- rt/lib/stack/print_backtrace.c: __kit_print_backtrace walks via
__kit_backtrace(skip=1) and writes "#N 0x<hex>" lines (hand-rolled
fmt, no printf) to the weak no-op __kit_backtrace_write sink. Both
decls in rt/include/kit/backtrace.h; added to RT_BASE_SRCS.
- rt/lib/assert/assert.c: __kit_assert_fail emits a banner +
__kit_print_backtrace() before __builtin_trap(), via the same sink.
- Resolves the L3a open question: weak no-op sink default (host/_start
overrides to write(2)), so freestanding images still link.
Tests (aa64/x64/rv64, O0 + O1):
- test/rt/cases/print_backtrace.c: in-process parse of the emitted lines.
- test/rt/addr2line.sh + addr2line_prog.c: kit addr2line round-trip
(make target test-rt-backtrace).
- test/rt/run.sh now sweeps O0+O1 (KIT_RT_OPT_LEVELS); smoke.c includes
<kit/backtrace.h>.
The backtrace path passes at O0 and O1 on all three arches. Sweeping O1
surfaced two unrelated pre-existing bugs, left RED (not skipped) and
logged in doc/plan/TODO.md:
- x86-64 -g -O1 + 4-operand register-pinned syscall asm aborts the
compiler (too many memory asm operands); fails test-rt-backtrace x64/O1.
- setjmp/longjmp miscompiled at -O1 on every arch (second-return value
not observed); fails test-rt-runtime setjmp_runtime/O1.
Diffstat:
12 files changed, 682 insertions(+), 40 deletions(-)
diff --git a/doc/plan/BACKTRACE.md b/doc/plan/BACKTRACE.md
@@ -1,6 +1,53 @@
# Plan: stack-trace builtins & runtime backtrace
-## Status — 2026-06-05 — L1 + L2 shipped (WS1–WS3); L3 remaining
+## Status — 2026-06-05 — L1 + L2 + L3a shipped (WS1–WS4); WS5/L3b remaining
+
+L3a (WS4) is now shipped on top of L1/L2:
+
+- **L3a print** `__kit_print_backtrace` — `rt/lib/stack/print_backtrace.c` walks
+ via `__kit_backtrace(buf, 64, skip=1)` (skip hides the print frame, so `#0` is
+ the caller) and writes one raw `#N 0x<hex>` line per frame to the **weak**
+ `__kit_backtrace_write(const char*, size_t)` sink. Integer/hex formatting is
+ hand-rolled (no printf/libc pulled into the panic path); the address uses
+ `uintptr_t` so it is not truncated on LLP64. Declared in
+ `rt/include/kit/backtrace.h`; added to `RT_BASE_SRCS`.
+- **Output sink (open question resolved):** weak no-op `__kit_backtrace_write`
+ default, so freestanding images that never wire a sink still link; the host /
+ `_start` overrides it to route bytes to `write(2)` (or a UART). Chosen over a
+ mandatory explicit-sink param to keep freestanding builds link-clean.
+- **Assert hook (deferred from L2)** — `rt/lib/assert/assert.c::__kit_assert_fail`
+ now emits a `kit: assertion failed: <expr>, file <file>, line <line>, function
+ <func>` banner then `__kit_print_backtrace()` before `__builtin_trap()`, all
+ through the same weak sink (printf-free). Pulling `__kit_assert_fail` therefore
+ also pulls `print_backtrace.o` → `backtrace.o` from the archive — the intended
+ wiring.
+- **Symbolization** is out-of-process via the existing `kit addr2line` — verified
+ round-trip: a static non-PIE ELF prints its own trace at runtime, the captured
+ addresses feed straight to `kit addr2line -f -e <image>`, resolving
+ `bt_leaf`/`bt_mid`/`bt_root`/`test_main` (outer no-`-g` frames show `??`).
+
+Tests (L3a): `test/rt/cases/print_backtrace.c` (in-process parse of the emitted
+`#N 0xADDR` lines, aa64/x64/rv64 under exec, exit 42) and `test/rt/addr2line.sh`
++ `test/rt/addr2line_prog.c` (the `kit addr2line` round-trip, make target
+`test-rt-backtrace`). `test/rt/smoke.c` also includes `<kit/backtrace.h>` so the
+header compiles on every rt-header target.
+
+**Opt coverage — the backtrace path passes at O0 *and* O1 on all three arches.**
+The rt-runtime corpus (`test/rt/run.sh`) and the addr2line round-trip
+(`test/rt/addr2line.sh`) now sweep both opt levels (`KIT_RT_OPT_LEVELS`), so
+`backtrace_capture` (L2) and `print_backtrace` (L3a) are exercised against
+optimized callers — all green at O0/O1. Sweeping O1 also surfaced **two
+unrelated, pre-existing kit bugs**, left red (not skipped) and logged in
+doc/plan/TODO.md: (1) **x86-64 `-g -O1` + the 4-operand register-pinned syscall
+idiom** aborts the compiler (`too many memory asm operands`,
+`src/arch/x64/native.c:4014`) — this is why the `x64/O1` lane of
+`test-rt-backtrace` is red, though x64/O1 backtrace correctness is still proven
+by `print_backtrace`/`backtrace_capture` (no asm); (2) **setjmp/longjmp is
+miscompiled at `-O1`** on every arch (`setjmp_runtime/O1` returns 1, not 42 —
+the second-return value isn't observed), failing `test-rt-runtime`.
+
+Remaining: **WS5 (L3c)** tool-side auto-backtrace and **L3b** in-process
+self-symbolization.
Implemented and tested through L2:
@@ -38,23 +85,18 @@ D/R/E/J/C lanes at O0/O1, `test/toy/cases/154_frame_return_address.toy`.
### Remaining tasks (L3)
-Nothing in L1/L2 is outstanding. What's left is all of L3 — symbolize & print:
-
-- **WS4 — L3a (recommended next):** `__kit_print_backtrace()` in rt — walk via
- `__kit_backtrace`, write raw `#N 0xADDR` lines to a weak
- `__kit_backtrace_write` sink, symbolize out-of-process via `kit addr2line`.
- Then wire the **assert-path hook** (`rt/lib/assert/assert.c::__kit_assert_fail`
- → `__kit_print_backtrace()` before `__builtin_trap()`) — deferred from L2
- because it calls this L3 function. The end-to-end round-trip is already proven
- manually (static + dynamic, aa64/x64/rv64 — see Status); WS4 packages it as a
- shipped `__kit_print_backtrace` + test.
-- **WS5 — L3c:** tool-side auto-backtrace in `kit run`/`kit emu`/`dbg` fault
- handlers (reuses the existing DWARF reader + `dbg bt`; never crosses into rt).
+Nothing in L1/L2/L3a is outstanding. What's left is the rest of L3:
+
+- ~~**WS4 — L3a:**~~ **done** (see Status) — `__kit_print_backtrace()` + weak
+ `__kit_backtrace_write` sink + assert-path hook + `kit addr2line` round-trip.
+- **WS5 — L3c (recommended next):** tool-side auto-backtrace in `kit run`/`kit
+ emu`/`dbg` fault handlers (reuses the existing DWARF reader + `dbg bt`; never
+ crosses into rt).
- **L3b:** in-process self-symbolization (hosted-only `libkit_bt.a`); deferred
until a concrete consumer needs in-binary symbolized panics.
-One open question remains (the L3a output sink — see Open questions); the others
-listed below were resolved while building L1/L2.
+All Open-questions items are now resolved (the L3a output sink chose the weak
+default — see Open questions).
## Overview
@@ -217,10 +259,10 @@ to the target pointer width automatically — no offset table, no `#ifdef` casca
- `mk/rt.mk` — added `rt/lib/stack/backtrace.c` to `RT_BASE_SRCS` (built for
every variant; `rt/lib/stack/` already compiled the Windows chkstk helper).
-- **Assert-path hook — deferred to L3 (WS4):** making
- `rt/lib/assert/assert.c::__kit_assert_fail` print a backtrace before
- `__builtin_trap()` needs the L3 `__kit_print_backtrace()`, so it lands with
- WS4, not here.
+- **Assert-path hook — landed in WS4 (was deferred):**
+ `rt/lib/assert/assert.c::__kit_assert_fail` now emits a banner +
+ `__kit_print_backtrace()` before `__builtin_trap()`. It needed the L3
+ `__kit_print_backtrace()`, so it shipped with WS4 rather than L2.
### Tests (L2) [done]
@@ -238,14 +280,15 @@ This is where the freestanding boundary bites: turning an address into
sub-options, ordered by how cleanly they respect that boundary. Recommend
shipping **L3a now**, leaving L3b/L3c as documented extensions.
-- **L3a — raw print + out-of-process symbolization (recommended default).**
- `__kit_print_backtrace()` lives in rt, walks via `__kit_backtrace`, and writes
- raw lines (`#0 0x401136`, …) to a host-provided sink (a weak
- `__kit_backtrace_write(const char*, size_t)` the host or `_start` wires to
- `write(2)`; freestanding default is a no-op). Symbolization is then a separate
- step through the **existing** `kit addr2line` tool (or a thin new `kit
- symbolize` that batches). Zero new symbolization code, fully freestanding,
- matches how minimal panic handlers work in the wild.
+- **L3a — raw print + out-of-process symbolization (shipped — WS4).**
+ `__kit_print_backtrace()` lives in rt (`rt/lib/stack/print_backtrace.c`), walks
+ via `__kit_backtrace`, and writes raw lines (`#0 0x401136`, …) to a
+ host-provided sink (the weak `__kit_backtrace_write(const char*, size_t)` the
+ host or `_start` wires to `write(2)`; freestanding default is a no-op).
+ Symbolization is a separate step through the **existing** `kit addr2line` tool
+ (a thin batching `kit symbolize` remains a possible future convenience). Zero
+ new symbolization code, fully freestanding, matches how minimal panic handlers
+ work in the wild.
- **L3b — in-process self-symbolization (hosted-only).** A trimmed line/func
reader (reusing `kit_dwarf_addr_to_line` + `kit_dwarf_func_at`) linked into a
@@ -264,8 +307,11 @@ shipping **L3a now**, leaving L3b/L3c as documented extensions.
### Tests (L3)
-- L3a: smoke test piping captured addresses through `kit addr2line`, asserting
- the expected function names appear.
+- L3a [done]: `test/rt/addr2line.sh` (+ `addr2line_prog.c`) runs a kit-compiled
+ program that prints its own trace, then pipes the captured addresses through
+ `kit addr2line -f`, asserting `bt_leaf`/`bt_mid`/`bt_root`/`test_main` appear
+ (make target `test-rt-backtrace`, aa64/x64/rv64). In-process companion:
+ `test/rt/cases/print_backtrace.c` parses the emitted `#N 0xADDR` lines.
- L3c: an `kit emu` fault test asserting a symbolized frame line on stderr.
---
@@ -278,15 +324,21 @@ shipping **L3a now**, leaving L3b/L3c as documented extensions.
3. **WS3 — L2 `__kit_backtrace`** in rt + capture test. ✅ done (assert-hook moved
to WS4 — it needs the L3 print fn).
4. **WS4 — L3a** raw print (`__kit_print_backtrace` + weak `__kit_backtrace_write`
- sink) + `kit addr2line` round-trip; wire the assert hook. ⏳ remaining (next).
-5. **WS5 — L3c** tool-side auto-backtrace (optional, parallelizable). ⏳ remaining.
+ sink) + `kit addr2line` round-trip; wire the assert hook. ✅ done.
+5. **WS5 — L3c** tool-side auto-backtrace (optional, parallelizable). ⏳ remaining (next).
6. **L3b** deferred until a consumer needs in-binary symbolized panics.
## Open questions
-- **Output sink for L3a (open):** weak `__kit_backtrace_write` vs. requiring the
- host to pass a sink explicitly. Weak-symbol default keeps freestanding builds
- linking. Resolve in WS4.
+None outstanding.
+
+Resolved in WS4:
+
+- ~~**Output sink for L3a:**~~ weak `__kit_backtrace_write` (no-op default) vs.
+ requiring the host to pass a sink explicitly. **Chose the weak default** — it
+ keeps freestanding builds linking with no sink, and a host / `_start`
+ overrides it to route bytes to `write(2)` or a UART. (Resolved while building
+ WS4.)
Resolved while building L1/L2:
diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md
@@ -80,6 +80,114 @@ Found while building a freestanding Linux backtrace demo (it needed `write`/
`exit`); worked around with inline asm. The toy frontend maps `@syscall` to the
same `INTRIN_NONE` and likewise can't lower it (`test/toy/cases/unsupported_syscall`).
+## Inline asm: kit rejects machine-specific register constraints (e.g. x86 `"=a"`)
+
+kit's inline-asm lowering only recognizes the **architecture-neutral** register
+classes `r` (general), `f`/`x`/`w` (FP/SIMD), plus `i` (immediate input), `m`
+(memory), and matching digits (`0`–`9`). Any GCC *machine* constraint letter —
+most commonly x86's `"=a"`/`"a"` (the canonical `syscall`/`cpuid`/`rdtsc` idiom),
+but also `b`/`c`/`d`/`S`/`D`/`q`/`Q`, etc. — is rejected with
+`KitCg: unsupported asm output constraint` (or `… input constraint`). GCC accepts
+`"=a"` on x86; kit does not.
+
+Root cause is target-independent and lives in the **CG layer**, not a backend:
+`api_asm_is_reg_constraint` (`src/cg/asm.c:41`) returns true only for
+`r`/`f`/`x`/`w`, and the output/input constraint handlers
+(`src/cg/asm.c:195` / `:234`) `compiler_panic` on anything else. Because this is
+the front-of-pipe lowering (before any arch backend, before any opt pass), the
+rejection is identical on **every arch and every opt level**. Verified matrix
+(x86_64/aarch64/riscv64 × O0/O1): `"=a"` output → all 6 fail with the same
+message; the register-pinned workaround → all 6 compile.
+
+Workaround (what the rest of the tree uses, e.g. `test/link/harness/start.c` and
+`test/rt/addr2line_prog.c`): pin to a hard register with a GNU local register
+variable bound to a plain `r`/`+r` constraint, never the machine letter:
+
+```c
+/* NOT supported: __asm__("syscall" : "=a"(ret) : "a"(n) : ...); */
+register long rax __asm__("rax") = n; /* hard-register pin */
+__asm__ volatile("syscall" : "+r"(rax) : : "rcx", "r11", "memory");
+return rax; /* read result back from rax */
+```
+
+The hard-register pin (`AsmConstraint.reg`) rides alongside the `r` operand and
+selects the exact register, so this is fully general. A proper fix would map the
+common machine constraint letters to their register class + a hard-register pin
+(at least x86 `a`/`b`/`c`/`d`/`S`/`D` → `r`+pin) so stock GCC/Clang syscall and
+cpuid snippets compile unchanged. Found writing the freestanding backtrace
+round-trip program (`test/rt/addr2line_prog.c`), whose x86-64 `write` syscall
+first used `"=a"` and had to be rewritten to the register-pinned form
+(doc/plan/BACKTRACE.md, WS4).
+
+## x86-64 inline asm: `-g -O1` + a 4-operand register idiom → `too many memory asm operands` (compiler abort)
+
+A register-pinned inline-asm syscall (4 operands: `rax`/`rdi`/`rsi`/`rdx` via GNU
+local register variables + an `"r"`/`"+r"` constraint, the only syscall idiom kit
+accepts — see the entry above) aborts the compiler **only on x86-64 at `-O1` with
+`-g`**. The bracket is exact: `-g -O0` OK, `-O1` (no `-g`) OK, `-O0` OK, and
+aarch64/riscv64 compile it fine at `-g -O1`; only x64 + O1 + -g fails. The
+message is `fatal: x64 inline asm: too many memory asm operands`.
+
+Minimal repro (`kit cc -target x86_64-linux-gnu -g -O1 -c`):
+
+```c
+static long w(int fd, const char* b, unsigned long n) {
+ register long rax __asm__("rax") = 1, rdi __asm__("rdi") = fd;
+ register long rsi __asm__("rsi") = (long)b, rdx __asm__("rdx") = (long)n;
+ __asm__ volatile("syscall" : "+r"(rax) : "r"(rdi), "r"(rsi), "r"(rdx)
+ : "rcx", "r11", "memory");
+ return rax;
+}
+```
+
+Root cause: the x64 asm lowering stages a *memory-resident* `"r"` operand into a
+scratch register before the asm, but the scratch pool is only **two** registers
+(`X64_TMP_INT` / `X64_TMP_INT2`), and `src/arch/x64/native.c:4014` panics on the
+third. At `-O1 -g` the four pinned `register long` operands are left stack-
+resident at the asm point (the GNU `register asm` hint binds the operand, it does
+not pin residency across statements; the `-g` location tracking perturbs the
+allocator into spilling), so 3+ need staging and it trips. `-O0` keeps them in
+registers, so `ntmp` stays ≤ 2. Fix: when an `"r"` operand carries a hard-
+register pin, load it straight into that pinned register instead of a shared
+scratch temp (no temp needed at all); failing that, stage through more than two
+scratch regs. **Secondary:** the fatal itself does not exit cleanly — under the
+ASan host build `compiler_panic`'s `longjmp` (`src/core/core.c:179`) SEGVs, so the
+diagnostic becomes a SIGABRT/SEGV instead of a clean `fatal:` exit. Found writing
+the WS4 backtrace round-trip (`test/rt/addr2line_prog.c`), whose x86-64 `write`
+sink is exactly this idiom; surfaced by sweeping that test at `-O1` per
+doc/plan/BACKTRACE.md — left red (`test-rt-backtrace`, `x64/O1` lane).
+
+## setjmp/longjmp miscompiled at `-O1`: the longjmp'd `setjmp` return value is wrong
+
+A textbook setjmp/longjmp round-trip returns the right answer at `-O0` but the
+wrong one at `-O1` on **all three** native arches (aa64/x64/rv64) — a wrong-
+answer miscompile, not a crash. `test/rt/cases/setjmp_runtime.c` exits 42 at O0
+and **1** at O1: the `int rc = setjmp(env)` value observed after the `longjmp`
+is not `1`. (`marker` is `volatile`, so the test is well-formed — the bad read is
+the `setjmp` result itself, not the local.)
+
+Minimal repro (`kit cc -O1`, run it):
+
+```c
+#include <setjmp.h>
+int test_main(void) {
+ jmp_buf env;
+ volatile int marker = 11;
+ int rc = setjmp(env); /* O1: second return value not observed */
+ if (rc == 0) { marker = 31; longjmp(env, 1); }
+ return (marker == 31 && rc == 1) ? 42 : 1; /* O0: 42, O1: 1 */
+}
+```
+
+Classic "`setjmp` not modeled as returns-twice": the optimizer treats the call as
+returning once, so the SSA value for `rc` (and anything else live across the
+`setjmp`) is folded/cached to the first-return value rather than reloaded on the
+`longjmp` re-entry. Target-independent (fails on every arch), so the fix is in the
+opt/IR layer — mark `setjmp`-family calls returns-twice (force a reload of values
+live across them; pin them to memory), as GCC/Clang do. Found sweeping the rt
+runtime corpus at O0+O1 for the WS4 backtrace work (doc/plan/BACKTRACE.md); left
+red (`test-rt-runtime`, `setjmp_runtime/O1`).
+
## `-no-pie` does not produce a non-PIE (ET_EXEC) executable
`-no-pie` sets `o->target.pic = KIT_PIC_NONE` (`driver/cmd/cc.c:1185`) but does
diff --git a/mk/rt.mk b/mk/rt.mk
@@ -232,6 +232,7 @@ RT_BASE_SRCS = \
rt/lib/atomic/atomic_freestanding.c \
rt/lib/cache/clear_cache.c \
rt/lib/stack/backtrace.c \
+ rt/lib/stack/print_backtrace.c \
rt/lib/kit/ifunc_init.c
RT_COMPILER_SRCS = \
diff --git a/mk/test.mk b/mk/test.mk
@@ -106,6 +106,7 @@ TEST_TARGETS = \
test-pp-ok \
test-rt-headers \
test-rt-runtime \
+ test-rt-backtrace \
test-link-x64 \
test-rv64-inline \
test-rv64-jit \
@@ -166,6 +167,7 @@ DEFAULT_TEST_TARGETS = \
test-libc \
test-link-x64 \
test-rt-runtime \
+ test-rt-backtrace \
test-bounce \
bootstrap \
test-bootstrap-toy
@@ -579,6 +581,13 @@ LINK_EXE_RUNNER = build/test/link-exe-runner
test-rt-runtime: bin $(RT_RUNTIME_DEPS) $(LINK_EXE_RUNNER)
@bash test/rt/run.sh
+# L3a backtrace round-trip: run a kit-compiled program that prints its own
+# backtrace, then symbolize the captured addresses with `kit addr2line`. Same
+# per-arch deps as test-rt-runtime (each arch's linux rt archive + the Path-E
+# link runner). See test/rt/addr2line.sh and doc/plan/BACKTRACE.md (L3a).
+test-rt-backtrace: bin $(RT_RUNTIME_DEPS) $(LINK_EXE_RUNNER)
+ @bash test/rt/addr2line.sh
+
# Test harness binaries shared by test-elf and test-link.
# Declared as Make targets (not built by the run.sh scripts) so they pick
# up libkit.a changes deterministically.
diff --git a/rt/include/kit/backtrace.h b/rt/include/kit/backtrace.h
@@ -19,6 +19,8 @@
#ifndef KIT_BACKTRACE_H
#define KIT_BACKTRACE_H
+#include <stddef.h> /* size_t — a freestanding header */
+
/* Fill buf[0..max) with return addresses, innermost first, and return the
* number written. The walk starts at __kit_backtrace's own frame, so with
* skip == 0 buf[0] is the return address into the direct caller of
@@ -34,4 +36,27 @@
* `skip` is treated as 0; non-positive `max` returns 0. */
int __kit_backtrace(void** buf, int max, int skip);
+/* Walk the current call stack and emit one raw frame line per return address,
+ * innermost caller first, to __kit_backtrace_write in the form
+ *
+ * #0 0x<hex>\n
+ * #1 0x<hex>\n
+ * ...
+ *
+ * __kit_print_backtrace's own frame is skipped, so #0 is the return address
+ * into its direct caller (the site that asked for a trace). The address is the
+ * unsymbolized return address; pipe it to `kit addr2line -e <image>` to recover
+ * `func at file:line`. Like __kit_backtrace this is a freestanding frame-pointer
+ * walk — no libc, no DWARF, no printf — so it is safe to call from a crash or
+ * panic handler. See doc/plan/BACKTRACE.md (L3a). */
+void __kit_print_backtrace(void);
+
+/* Output sink for __kit_print_backtrace (and the freestanding assert handler).
+ * It is a WEAK no-op by default, so a freestanding image that never wires a
+ * sink still links and runs; a host start file or runtime overrides it to route
+ * the bytes somewhere visible — typically write(2, buf, len) to stderr, or a
+ * UART on bare metal. `buf` is not NUL-terminated; exactly `len` bytes are
+ * valid. */
+void __kit_backtrace_write(const char* buf, size_t len);
+
#endif /* KIT_BACKTRACE_H */
diff --git a/rt/lib/assert/assert.c b/rt/lib/assert/assert.c
@@ -2,13 +2,53 @@
//
// SPDX-License-Identifier: 0BSD
//===----------------------------------------------------------------------===//
+//
+// __kit_assert_fail is the freestanding target of a failed assert(). It emits a
+// human-readable banner and then a raw backtrace, both through the weak
+// __kit_backtrace_write sink (see rt/include/kit/backtrace.h), before trapping.
+// When no sink is wired the banner and trace are discarded, so this stays
+// link-clean and printf-free in a bare freestanding image; a host that wires
+// the sink (typically write(2) to stderr) gets a diagnosable trap.
+
+#include <kit/backtrace.h>
+#include <stddef.h>
+
+static void bt_emit(const char* s) {
+ size_t n = 0;
+ if (!s) return;
+ while (s[n]) n++;
+ __kit_backtrace_write(s, n);
+}
+
+static void bt_emit_int(int v) {
+ char buf[12]; /* "-2147483648" + slack; we pass an explicit length */
+ int k = (int)sizeof buf;
+ int neg = v < 0;
+ unsigned u = neg ? 0u - (unsigned)v : (unsigned)v;
+ do {
+ buf[--k] = (char)('0' + (int)(u % 10u));
+ u /= 10u;
+ } while (u && k > 0);
+ if (neg && k > 0) buf[--k] = '-';
+ __kit_backtrace_write(buf + k, (size_t)((int)sizeof buf - k));
+}
__attribute__((weak)) void __kit_assert_fail(const char* expr, const char* file,
int line, const char* func) {
- (void)expr;
- (void)file;
- (void)line;
- (void)func;
+ bt_emit("kit: assertion failed: ");
+ bt_emit(expr ? expr : "(unknown)");
+ if (file) {
+ bt_emit(", file ");
+ bt_emit(file);
+ }
+ bt_emit(", line ");
+ bt_emit_int(line);
+ if (func) {
+ bt_emit(", function ");
+ bt_emit(func);
+ }
+ bt_emit("\n");
+ __kit_print_backtrace();
__builtin_trap();
for (;;) {
}
diff --git a/rt/lib/stack/print_backtrace.c b/rt/lib/stack/print_backtrace.c
@@ -0,0 +1,71 @@
+/*
+ * __kit_print_backtrace -- raw (unsymbolized) backtrace print, built on the
+ * freestanding __kit_backtrace frame-pointer walk. See doc/plan/BACKTRACE.md
+ * (L3a) and rt/include/kit/backtrace.h for the contract.
+ *
+ * Each captured return address is written as a "#<n> 0x<hex>\n" line to the
+ * weak __kit_backtrace_write sink. Symbolization is deliberately out of
+ * process: pipe the addresses to `kit addr2line -e <image>`. The integer
+ * formatting is done by hand so the panic path drags in no printf/libc and
+ * stays usable from a crash handler.
+ */
+#include <kit/backtrace.h>
+#include <stdint.h>
+
+/* Weak default sink: discard. A host start file / runtime overrides this to
+ * route the bytes to write(2), a serial port, etc. Keeping it weak means a
+ * freestanding image that never wires a sink still links and runs. */
+__attribute__((weak)) void __kit_backtrace_write(const char* buf, size_t len) {
+ (void)buf;
+ (void)len;
+}
+
+#define KIT_BT_PRINT_MAX 64
+
+/* Format "#<idx> 0x<hex>\n" into `out` and return the byte count. `out` must
+ * hold the longest line: '#' + up to 10 decimal digits (u32 idx) + " 0x" + up
+ * to 16 hex digits (64-bit pointer) + '\n' == 31 bytes; the caller's line[48]
+ * is comfortably large. The hex address uses uintptr_t so it is not truncated
+ * on an LLP64 target, where `unsigned long` would be too narrow. */
+static int bt_format_frame(char* out, unsigned idx, void* addr) {
+ static const char hexd[] = "0123456789abcdef";
+ char digits[10];
+ uintptr_t a = (uintptr_t)addr;
+ int started = 0, shift, k = 0, j = 0;
+
+ out[k++] = '#';
+ do {
+ digits[j++] = (char)('0' + (int)(idx % 10u));
+ idx /= 10u;
+ } while (idx);
+ while (j > 0) out[k++] = digits[--j];
+
+ out[k++] = ' ';
+ out[k++] = '0';
+ out[k++] = 'x';
+ /* Most-significant nibble first; suppress leading zeros but always emit at
+ * least the final nibble so an address of 0 still prints as "0x0". */
+ for (shift = (int)(sizeof(uintptr_t) * 8) - 4; shift >= 0; shift -= 4) {
+ unsigned nib = (unsigned)((a >> shift) & 0xfu);
+ if (nib != 0u || started || shift == 0) {
+ out[k++] = hexd[nib];
+ started = 1;
+ }
+ }
+ out[k++] = '\n';
+ return k;
+}
+
+void __kit_print_backtrace(void) {
+ void* frames[KIT_BT_PRINT_MAX];
+ char line[48];
+ int n, i;
+
+ /* skip == 1 hides __kit_print_backtrace's own frame, so frame #0 is the
+ * return address into its direct caller. */
+ n = __kit_backtrace(frames, KIT_BT_PRINT_MAX, 1);
+ for (i = 0; i < n; i++) {
+ int len = bt_format_frame(line, (unsigned)i, frames[i]);
+ __kit_backtrace_write(line, (size_t)len);
+ }
+}
diff --git a/test/rt/addr2line.sh b/test/rt/addr2line.sh
@@ -0,0 +1,164 @@
+#!/usr/bin/env bash
+# test/rt/addr2line.sh — L3a backtrace round-trip.
+#
+# Compiles test/rt/addr2line_prog.c (which prints its own backtrace via
+# __kit_print_backtrace), links a static non-PIE ELF, runs it to capture the
+# raw "#N 0xADDR" lines, then feeds those addresses to `kit addr2line -f` and
+# asserts the expected function names (bt_leaf / bt_mid / bt_root / test_main)
+# appear — proving the freestanding capture/print path emits addresses the
+# hosted DWARF tools resolve. See doc/plan/BACKTRACE.md (L3a).
+#
+# The per-arch wiring mirrors test/rt/run.sh's lane R: each arch maps to an
+# <arch>-linux exec tuple, the matching build/rt/<triple>/libkit_rt.a, a clang
+# freestanding start.o, a link via link-exe-runner, and an exec via
+# exec_target. Because the image is a static non-PIE ELF, the return addresses
+# printed at run time equal its link-time addresses, so they pipe straight to
+# addr2line. Anything missing (rt archive, exec runner, clang start.o) is a
+# SKIP, not a failure. Set KIT_TEST_ALLOW_SKIP=1 to exit 0 with skips.
+
+set -u
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+PROG_SRC="$ROOT/test/rt/addr2line_prog.c"
+BUILD_DIR="$ROOT/build/test/rt-addr2line"
+KIT="$ROOT/build/kit"
+LINK_EXE_RUNNER="$ROOT/build/test/link-exe-runner"
+START_SRC="$ROOT/test/link/harness/start.c"
+
+export KIT_KIT_DIR="$ROOT/test/lib"
+# shellcheck source=../lib/kit_sh_kit.sh
+. "$ROOT/test/lib/kit_sh_kit.sh"
+kit_report_init
+[ "${KIT_TEST_ALLOW_SKIP:-0}" = 1 ] || KIT_SKIP_IS_FAILURE=1
+
+mkdir -p "$BUILD_DIR"
+
+if [ ! -x "$KIT" ]; then
+ skip_test "kit" "kit driver missing at $KIT -- run \`make bin\` first"
+ kit_summary test-rt-addr2line
+ kit_exit
+fi
+if [ ! -x "$LINK_EXE_RUNNER" ]; then
+ skip_test "link-exe-runner" "missing at $LINK_EXE_RUNNER -- run \`make test-rt-runtime\`"
+ kit_summary test-rt-addr2line
+ kit_exit
+fi
+
+# exec_target wiring (same host-detection knobs test/rt/run.sh exports).
+have_qemu=0
+QEMU_BIN="$(command -v qemu-aarch64-static 2>/dev/null || command -v qemu-aarch64 2>/dev/null || true)"
+[ -n "$QEMU_BIN" ] && have_qemu=1
+have_podman=0
+command -v podman >/dev/null 2>&1 && have_podman=1
+arch_raw="$(uname -m 2>/dev/null || true)"
+is_aarch64=0
+if [ "$(uname -s 2>/dev/null)" = "Linux" ]; then
+ { [ "$arch_raw" = "aarch64" ] || [ "$arch_raw" = "arm64" ]; } && is_aarch64=1
+fi
+export have_qemu QEMU_BIN have_podman is_aarch64
+EXEC_TARGET_MOUNT_ROOT="$BUILD_DIR"
+export EXEC_TARGET_MOUNT_ROOT
+# shellcheck source=../lib/exec_target.sh
+. "$ROOT/test/lib/exec_target.sh"
+
+arch_triple() {
+ case "$1" in
+ aa64) echo "aarch64-linux-gnu" ;;
+ x64) echo "x86_64-linux-gnu" ;;
+ rv64) echo "riscv64-linux-gnu" ;;
+ *) return 1 ;;
+ esac
+}
+rt_archive() {
+ case "$1" in
+ aa64) echo "$ROOT/build/rt/aarch64-linux/libkit_rt.a" ;;
+ x64) echo "$ROOT/build/rt/x86_64-linux/libkit_rt.a" ;;
+ rv64) echo "$ROOT/build/rt/riscv64-linux/libkit_rt.a" ;;
+ *) return 1 ;;
+ esac
+}
+clang_extra_flags() {
+ case "$1" in
+ rv64) echo "-march=rv64gc" ;;
+ *) echo "" ;;
+ esac
+}
+
+# Functions the backtrace must symbolize to, innermost first. _start may show
+# as ?? (start.o is built without -g), so it is not required.
+WANT_FUNCS="bt_leaf bt_mid bt_root test_main"
+
+run_one() { # <arch> <opt>
+ local arch="$1" opt="$2" name="$1/O$2 round-trip"
+ local triple rtlib extra work obj exe start_obj
+ triple="$(arch_triple "$arch")" || { not_ok "$name" "unknown arch"; return; }
+ rtlib="$(rt_archive "$arch")"
+ extra="$(clang_extra_flags "$arch")"
+ work="$BUILD_DIR/$arch/O$opt"
+ mkdir -p "$work"
+ obj="$work/prog.o"; exe="$work/prog.exe"; start_obj="$work/start.o"
+
+ if [ ! -f "$rtlib" ]; then
+ skip_test "$name" "runtime archive missing at $rtlib"; return
+ fi
+ if ! exec_target_supported "$arch"; then
+ skip_test "$name" "no execution runner"; return
+ fi
+ if ! clang --target="$triple" $extra -O1 -ffreestanding -fno-stack-protector \
+ -fno-PIC -fno-pie -c "$START_SRC" -o "$start_obj" \
+ >"$work/start.out" 2>"$work/start.err"; then
+ skip_test "$name" "clang cannot build start.o for $triple"; return
+ fi
+
+ # -g so addr2line has DWARF; static non-PIE so runtime addr == link addr.
+ # The chain is @[.noinline] + non-tail, so the frames survive at O1 too.
+ if ! "$KIT" cc -target "$triple" -g -O"$opt" -Werror -c "$PROG_SRC" -o "$obj" \
+ >"$work/cc.out" 2>"$work/cc.err"; then
+ not_ok "$name" "$work/cc.err"; return
+ fi
+ if ! KIT_TEST_ARCH="$arch" "$LINK_EXE_RUNNER" -o "$exe" "$obj" "$start_obj" \
+ --archive "$rtlib" >"$work/link.out" 2>"$work/link.err"; then
+ not_ok "$name" "$work/link.err"; return
+ fi
+
+ exec_target_run "$arch" "$exe" "$work/run.out" "$work/run.err"
+ if [ "$RUN_RC" -ne 42 ]; then
+ printf 'expected exit 42, got %s\n' "$RUN_RC" > "$work/run.diag"
+ cat "$work/run.err" >> "$work/run.diag" 2>/dev/null
+ not_ok "$name" "$work/run.diag"; return
+ fi
+
+ # Captured backtrace lines -> addresses -> addr2line -f.
+ local addrs
+ addrs="$(grep -oE '0x[0-9a-fA-F]+' "$work/run.out" 2>/dev/null | tr '\n' ' ')"
+ if [ -z "$addrs" ]; then
+ printf 'no "#N 0xADDR" lines captured; run.out was:\n' > "$work/sym.diag"
+ cat "$work/run.out" >> "$work/sym.diag" 2>/dev/null
+ not_ok "$name" "$work/sym.diag"; return
+ fi
+ # shellcheck disable=SC2086
+ "$KIT" addr2line -f -e "$exe" $addrs >"$work/sym.out" 2>"$work/sym.err"
+
+ local fn missing=""
+ for fn in $WANT_FUNCS; do
+ grep -qw "$fn" "$work/sym.out" || missing="$missing $fn"
+ done
+ if [ -n "$missing" ]; then
+ printf 'addr2line missing function(s):%s\naddresses: %s\nsymbolized:\n' \
+ "$missing" "$addrs" > "$work/sym.diag"
+ cat "$work/sym.out" >> "$work/sym.diag" 2>/dev/null
+ not_ok "$name" "$work/sym.diag"; return
+ fi
+ ok "$name"
+}
+
+for arch in ${KIT_RT_RUNTIME_ARCHES:-aa64 x64 rv64}; do
+ case "$arch" in
+ aa64|x64|rv64)
+ for opt in ${KIT_RT_OPT_LEVELS:-0 1}; do run_one "$arch" "$opt"; done ;;
+ *) not_ok "$arch" "unknown arch" ;;
+ esac
+done
+
+kit_summary test-rt-addr2line
+kit_exit
diff --git a/test/rt/addr2line_prog.c b/test/rt/addr2line_prog.c
@@ -0,0 +1,56 @@
+/* L3a backtrace round-trip program (driven by test/rt/addr2line.sh).
+ *
+ * Wires the weak __kit_backtrace_write sink to a freestanding write(2) and
+ * calls __kit_print_backtrace() from the bottom of a named, @[.noinline] call
+ * chain. The emitted "#N 0xADDR" lines go to stdout; the harness pipes the
+ * addresses to `kit addr2line -f` and checks that bt_leaf / bt_mid / bt_root /
+ * test_main appear. Built as a static non-PIE ELF, so the return addresses
+ * printed at run time equal the link-time addresses addr2line resolves.
+ *
+ * Linux-only (the rt runtime test tuples are all *-linux); the syscall numbers
+ * are the generic-unified-ABI write(2) for each arch (x86-64 uses its own). */
+#include <kit/backtrace.h>
+
+static long bt_write(int fd, const char* buf, unsigned long len) {
+#if defined(__aarch64__)
+ register long x8 __asm__("x8") = 64; /* sys_write */
+ register long x0 __asm__("x0") = fd;
+ register long x1 __asm__("x1") = (long)buf;
+ register long x2 __asm__("x2") = (long)len;
+ __asm__ volatile("svc #0" : "+r"(x0) : "r"(x8), "r"(x1), "r"(x2) : "memory");
+ return x0;
+#elif defined(__x86_64__)
+ register long rax __asm__("rax") = 1; /* sys_write */
+ register long rdi __asm__("rdi") = fd;
+ register long rsi __asm__("rsi") = (long)buf;
+ register long rdx __asm__("rdx") = (long)len;
+ __asm__ volatile("syscall"
+ : "+r"(rax)
+ : "r"(rdi), "r"(rsi), "r"(rdx)
+ : "rcx", "r11", "memory");
+ return rax;
+#elif defined(__riscv) && __riscv_xlen == 64
+ register long a7 __asm__("a7") = 64; /* sys_write */
+ register long a0 __asm__("a0") = fd;
+ register long a1 __asm__("a1") = (long)buf;
+ register long a2 __asm__("a2") = (long)len;
+ __asm__ volatile("ecall" : "+r"(a0) : "r"(a7), "r"(a1), "r"(a2) : "memory");
+ return a0;
+#else
+#error "addr2line_prog.c: unsupported architecture"
+#endif
+}
+
+/* Override the weak rt default: route the backtrace bytes to stdout. */
+void __kit_backtrace_write(const char* buf, size_t len) {
+ bt_write(1, buf, (unsigned long)len);
+}
+
+__attribute__((noinline)) int bt_leaf(void) {
+ __kit_print_backtrace();
+ return 1;
+}
+__attribute__((noinline)) int bt_mid(void) { return bt_leaf() + 1; }
+__attribute__((noinline)) int bt_root(void) { return bt_mid() + 1; }
+
+int test_main(void) { return bt_root() == 3 ? 42 : 1; }
diff --git a/test/rt/cases/print_backtrace.c b/test/rt/cases/print_backtrace.c
@@ -0,0 +1,95 @@
+/* __kit_print_backtrace format test (the in-process half of the L3a round-trip).
+ *
+ * Overrides the weak __kit_backtrace_write sink to capture the emitted bytes,
+ * calls __kit_print_backtrace() from the bottom of a known @[.noinline]
+ * recursion, then parses the captured "#N 0xADDR" lines in process. Asserts:
+ * the sink was called and ends on a line boundary; every line is "#<i> 0x<hex>"
+ * with a sequential index and a non-zero address; and the recursive frames
+ * share a return address (so the printed chain really followed the frame
+ * links). The address text is exactly what `kit addr2line -e <image>` consumes,
+ * so a clean parse here is the in-process complement of test/rt/addr2line.sh.
+ * Exits 42. Runs under test/rt/run.sh across the aa64/x64/rv64 tuples. */
+#include <kit/backtrace.h>
+
+#define DEPTH 6
+#define CAP 1024
+#define MAXLINES 128
+
+static char g_buf[CAP];
+static int g_len;
+
+/* Override the weak rt default: capture the raw bytes instead of discarding.
+ * The signature must match the header's (size_t) under -Werror. */
+void __kit_backtrace_write(const char* buf, size_t len) {
+ size_t i;
+ for (i = 0; i < len && g_len < CAP; i++) g_buf[g_len++] = buf[i];
+}
+
+/* Non-tail recursion (work after the call) so every level keeps a live frame,
+ * and noinline so the chain survives if the harness opt level rises. */
+__attribute__((noinline)) static int recurse(int n) {
+ if (n > 0) {
+ int r = recurse(n - 1);
+ return r + 1;
+ }
+ __kit_print_backtrace();
+ return 0;
+}
+
+int test_main(void) {
+ int idx[MAXLINES];
+ unsigned long addr[MAXLINES];
+ int cnt = 0;
+ int i = 0;
+
+ recurse(DEPTH);
+
+ if (g_len <= 0) return 1; /* the sink must have been called */
+ if (g_buf[g_len - 1] != '\n') return 2; /* every line is terminated */
+
+ /* Parse each "#<dec> 0x<hex>\n" line strictly, recording index and address. */
+ while (i < g_len) {
+ int v = 0;
+ unsigned long a = 0;
+ if (g_buf[i] != '#') return 3;
+ i++;
+ if (i >= g_len || g_buf[i] < '0' || g_buf[i] > '9') return 4;
+ while (i < g_len && g_buf[i] >= '0' && g_buf[i] <= '9')
+ v = v * 10 + (g_buf[i++] - '0');
+ if (i + 2 >= g_len || g_buf[i] != ' ' || g_buf[i + 1] != '0' ||
+ g_buf[i + 2] != 'x')
+ return 5;
+ i += 3;
+ if (i >= g_len || g_buf[i] == '\n') return 6; /* need >= 1 hex digit */
+ while (i < g_len && g_buf[i] != '\n') {
+ char c = g_buf[i];
+ int h;
+ if (c >= '0' && c <= '9')
+ h = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ h = c - 'a' + 10;
+ else
+ return 7;
+ a = (a << 4) | (unsigned long)h;
+ i++;
+ }
+ if (i >= g_len || g_buf[i] != '\n') return 8;
+ i++; /* consume the newline */
+ if (cnt >= MAXLINES) break;
+ idx[cnt] = v;
+ addr[cnt] = a;
+ cnt++;
+ }
+
+ /* The chain holds at least the recurse() frames plus test_main. */
+ if (cnt < DEPTH + 1) return 9;
+ for (i = 0; i < cnt; i++) {
+ if (idx[i] != i) return 10; /* frame numbers are sequential from 0 */
+ if (addr[i] == 0) return 11; /* a real frame never has a null retaddr */
+ }
+ /* #1..#DEPTH all return to the single recursive call site, so consecutive
+ * recursive frames share a return address. (#0 is the print call site.) */
+ if (addr[1] != addr[2]) return 12;
+
+ return 42;
+}
diff --git a/test/rt/run.sh b/test/rt/run.sh
@@ -113,9 +113,15 @@ kit_lane_R() {
kit_skip "$name" "clang cannot build start.o for $triple"; return
fi
+ # Opt axis: KIT_OPT is the corpus opt level ("0"/"1"/…); "-" is the
+ # no-axis sentinel. Pass -O<level> only for a real level so the FP-chain
+ # walk is exercised against both unoptimized and optimized callers.
+ local optflag=""
+ case "$KIT_OPT" in 0|1|2|3|s|z) optflag="-O$KIT_OPT" ;; esac
+
local obj="$KIT_WORK/$KIT_BASE.o"
local exe="$KIT_WORK/$KIT_BASE.exe"
- if ! "$KIT" cc -target "$triple" -Werror -c "$KIT_SRC" -o "$obj" \
+ if ! "$KIT" cc -target "$triple" $optflag -Werror -c "$KIT_SRC" -o "$obj" \
>"$KIT_WORK/cc.out" 2>"$KIT_WORK/cc.err"; then
kit_fail "$name" "compile (see $KIT_WORK/cc.err)"; return
fi
@@ -144,9 +150,13 @@ for arch in $ARCHES; do
esac
done
+# Opt axis: sweep O0 and O1 so the runtime cases (notably the FP-chain
+# backtrace walk) are exercised against both unoptimized and optimized
+# callers. Override with KIT_RT_OPT_LEVELS (e.g. "0" for a faster smoke).
KIT_LABEL=test-rt-runtime KIT_BUILD_DIR="$BUILD_DIR" \
KIT_CORPUS_GLOBS="$CASES_DIR/*.c" KIT_CORPUS_EXT=c KIT_SIDECAR_DIR="$CASES_DIR" \
- KIT_LANES="R" KIT_OPT_LEVELS="" KIT_TUPLES="$TUPLES" KIT_TARGETS_EXT="" \
+ KIT_LANES="R" KIT_OPT_LEVELS="${KIT_RT_OPT_LEVELS:-0 1}" KIT_TUPLES="$TUPLES" \
+ KIT_TARGETS_EXT="" \
KIT_PARALLELIZABLE="${KIT_RT_PARALLEL:-1}" kit_corpus_run
kit_summary test-rt-runtime
diff --git a/test/rt/smoke.c b/test/rt/smoke.c
@@ -28,6 +28,7 @@
#include <float.h>
#include <iso646.h>
+#include <kit/backtrace.h>
#include <kit/coro.h>
#include <limits.h>
#include <setjmp.h>
@@ -212,11 +213,21 @@ static int kit_atomic_ok(void) {
return 1;
}
+/* kit/backtrace: the capture/print surface compiles and resolves. Compile-only
+ -- smoke.c never links against a libkit_rt, so the actual walk never runs. */
+static int kit_backtrace_compiles(void) {
+ void* frames[8];
+ int n = __kit_backtrace(frames, 8, 1);
+ __kit_print_backtrace();
+ return n;
+}
+
/* Reference everything so -Wunused-* stays quiet. */
int kit_smoke_ok(void) {
(void)aligned_buf;
if (0) kit_trap();
if (0) (void)kit_setjmp_compiles(0);
if (0) (void)kit_coro_compiles();
+ if (0) (void)kit_backtrace_compiles();
return sum_n(3, 1, 2, 3) == 6 && kit_atomic_ok();
}