kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit b3f14e4ecb76fed2930be28041e7e68a692b073f
parent 5a7a304b5a1d6dd328cc662759c895feebb5340c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri,  5 Jun 2026 09:26:10 -0700

Implement __kit_print_backtrace & assert hook (BACKTRACE L3a/WS4)

Ship L3a of the backtrace roadmap: raw backtrace print + out-of-process
symbolization, plus the deferred assert-path hook.

- rt/lib/stack/print_backtrace.c: __kit_print_backtrace walks via
  __kit_backtrace(skip=1) and writes "#N 0x<hex>" lines (hand-rolled
  fmt, no printf) to the weak no-op __kit_backtrace_write sink. Both
  decls in rt/include/kit/backtrace.h; added to RT_BASE_SRCS.
- rt/lib/assert/assert.c: __kit_assert_fail emits a banner +
  __kit_print_backtrace() before __builtin_trap(), via the same sink.
- Resolves the L3a open question: weak no-op sink default (host/_start
  overrides to write(2)), so freestanding images still link.

Tests (aa64/x64/rv64, O0 + O1):
- test/rt/cases/print_backtrace.c: in-process parse of the emitted lines.
- test/rt/addr2line.sh + addr2line_prog.c: kit addr2line round-trip
  (make target test-rt-backtrace).
- test/rt/run.sh now sweeps O0+O1 (KIT_RT_OPT_LEVELS); smoke.c includes
  <kit/backtrace.h>.

The backtrace path passes at O0 and O1 on all three arches. Sweeping O1
surfaced two unrelated pre-existing bugs, left RED (not skipped) and
logged in doc/plan/TODO.md:
- x86-64 -g -O1 + 4-operand register-pinned syscall asm aborts the
  compiler (too many memory asm operands); fails test-rt-backtrace x64/O1.
- setjmp/longjmp miscompiled at -O1 on every arch (second-return value
  not observed); fails test-rt-runtime setjmp_runtime/O1.

Diffstat:
Mdoc/plan/BACKTRACE.md | 120++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Mdoc/plan/TODO.md | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmk/rt.mk | 1+
Mmk/test.mk | 9+++++++++
Mrt/include/kit/backtrace.h | 25+++++++++++++++++++++++++
Mrt/lib/assert/assert.c | 48++++++++++++++++++++++++++++++++++++++++++++----
Art/lib/stack/print_backtrace.c | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/rt/addr2line.sh | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/rt/addr2line_prog.c | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/rt/cases/print_backtrace.c | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/rt/run.sh | 14++++++++++++--
Mtest/rt/smoke.c | 11+++++++++++
12 files changed, 682 insertions(+), 40 deletions(-)

diff --git a/doc/plan/BACKTRACE.md b/doc/plan/BACKTRACE.md @@ -1,6 +1,53 @@ # Plan: stack-trace builtins & runtime backtrace -## Status — 2026-06-05 — L1 + L2 shipped (WS1–WS3); L3 remaining +## Status — 2026-06-05 — L1 + L2 + L3a shipped (WS1–WS4); WS5/L3b remaining + +L3a (WS4) is now shipped on top of L1/L2: + +- **L3a print** `__kit_print_backtrace` — `rt/lib/stack/print_backtrace.c` walks + via `__kit_backtrace(buf, 64, skip=1)` (skip hides the print frame, so `#0` is + the caller) and writes one raw `#N 0x<hex>` line per frame to the **weak** + `__kit_backtrace_write(const char*, size_t)` sink. Integer/hex formatting is + hand-rolled (no printf/libc pulled into the panic path); the address uses + `uintptr_t` so it is not truncated on LLP64. Declared in + `rt/include/kit/backtrace.h`; added to `RT_BASE_SRCS`. +- **Output sink (open question resolved):** weak no-op `__kit_backtrace_write` + default, so freestanding images that never wire a sink still link; the host / + `_start` overrides it to route bytes to `write(2)` (or a UART). Chosen over a + mandatory explicit-sink param to keep freestanding builds link-clean. +- **Assert hook (deferred from L2)** — `rt/lib/assert/assert.c::__kit_assert_fail` + now emits a `kit: assertion failed: <expr>, file <file>, line <line>, function + <func>` banner then `__kit_print_backtrace()` before `__builtin_trap()`, all + through the same weak sink (printf-free). Pulling `__kit_assert_fail` therefore + also pulls `print_backtrace.o` → `backtrace.o` from the archive — the intended + wiring. +- **Symbolization** is out-of-process via the existing `kit addr2line` — verified + round-trip: a static non-PIE ELF prints its own trace at runtime, the captured + addresses feed straight to `kit addr2line -f -e <image>`, resolving + `bt_leaf`/`bt_mid`/`bt_root`/`test_main` (outer no-`-g` frames show `??`). + +Tests (L3a): `test/rt/cases/print_backtrace.c` (in-process parse of the emitted +`#N 0xADDR` lines, aa64/x64/rv64 under exec, exit 42) and `test/rt/addr2line.sh` ++ `test/rt/addr2line_prog.c` (the `kit addr2line` round-trip, make target +`test-rt-backtrace`). `test/rt/smoke.c` also includes `<kit/backtrace.h>` so the +header compiles on every rt-header target. + +**Opt coverage — the backtrace path passes at O0 *and* O1 on all three arches.** +The rt-runtime corpus (`test/rt/run.sh`) and the addr2line round-trip +(`test/rt/addr2line.sh`) now sweep both opt levels (`KIT_RT_OPT_LEVELS`), so +`backtrace_capture` (L2) and `print_backtrace` (L3a) are exercised against +optimized callers — all green at O0/O1. Sweeping O1 also surfaced **two +unrelated, pre-existing kit bugs**, left red (not skipped) and logged in +doc/plan/TODO.md: (1) **x86-64 `-g -O1` + the 4-operand register-pinned syscall +idiom** aborts the compiler (`too many memory asm operands`, +`src/arch/x64/native.c:4014`) — this is why the `x64/O1` lane of +`test-rt-backtrace` is red, though x64/O1 backtrace correctness is still proven +by `print_backtrace`/`backtrace_capture` (no asm); (2) **setjmp/longjmp is +miscompiled at `-O1`** on every arch (`setjmp_runtime/O1` returns 1, not 42 — +the second-return value isn't observed), failing `test-rt-runtime`. + +Remaining: **WS5 (L3c)** tool-side auto-backtrace and **L3b** in-process +self-symbolization. Implemented and tested through L2: @@ -38,23 +85,18 @@ D/R/E/J/C lanes at O0/O1, `test/toy/cases/154_frame_return_address.toy`. ### Remaining tasks (L3) -Nothing in L1/L2 is outstanding. What's left is all of L3 — symbolize & print: - -- **WS4 — L3a (recommended next):** `__kit_print_backtrace()` in rt — walk via - `__kit_backtrace`, write raw `#N 0xADDR` lines to a weak - `__kit_backtrace_write` sink, symbolize out-of-process via `kit addr2line`. - Then wire the **assert-path hook** (`rt/lib/assert/assert.c::__kit_assert_fail` - → `__kit_print_backtrace()` before `__builtin_trap()`) — deferred from L2 - because it calls this L3 function. The end-to-end round-trip is already proven - manually (static + dynamic, aa64/x64/rv64 — see Status); WS4 packages it as a - shipped `__kit_print_backtrace` + test. -- **WS5 — L3c:** tool-side auto-backtrace in `kit run`/`kit emu`/`dbg` fault - handlers (reuses the existing DWARF reader + `dbg bt`; never crosses into rt). +Nothing in L1/L2/L3a is outstanding. What's left is the rest of L3: + +- ~~**WS4 — L3a:**~~ **done** (see Status) — `__kit_print_backtrace()` + weak + `__kit_backtrace_write` sink + assert-path hook + `kit addr2line` round-trip. +- **WS5 — L3c (recommended next):** tool-side auto-backtrace in `kit run`/`kit + emu`/`dbg` fault handlers (reuses the existing DWARF reader + `dbg bt`; never + crosses into rt). - **L3b:** in-process self-symbolization (hosted-only `libkit_bt.a`); deferred until a concrete consumer needs in-binary symbolized panics. -One open question remains (the L3a output sink — see Open questions); the others -listed below were resolved while building L1/L2. +All Open-questions items are now resolved (the L3a output sink chose the weak +default — see Open questions). ## Overview @@ -217,10 +259,10 @@ to the target pointer width automatically — no offset table, no `#ifdef` casca - `mk/rt.mk` — added `rt/lib/stack/backtrace.c` to `RT_BASE_SRCS` (built for every variant; `rt/lib/stack/` already compiled the Windows chkstk helper). -- **Assert-path hook — deferred to L3 (WS4):** making - `rt/lib/assert/assert.c::__kit_assert_fail` print a backtrace before - `__builtin_trap()` needs the L3 `__kit_print_backtrace()`, so it lands with - WS4, not here. +- **Assert-path hook — landed in WS4 (was deferred):** + `rt/lib/assert/assert.c::__kit_assert_fail` now emits a banner + + `__kit_print_backtrace()` before `__builtin_trap()`. It needed the L3 + `__kit_print_backtrace()`, so it shipped with WS4 rather than L2. ### Tests (L2) [done] @@ -238,14 +280,15 @@ This is where the freestanding boundary bites: turning an address into sub-options, ordered by how cleanly they respect that boundary. Recommend shipping **L3a now**, leaving L3b/L3c as documented extensions. -- **L3a — raw print + out-of-process symbolization (recommended default).** - `__kit_print_backtrace()` lives in rt, walks via `__kit_backtrace`, and writes - raw lines (`#0 0x401136`, …) to a host-provided sink (a weak - `__kit_backtrace_write(const char*, size_t)` the host or `_start` wires to - `write(2)`; freestanding default is a no-op). Symbolization is then a separate - step through the **existing** `kit addr2line` tool (or a thin new `kit - symbolize` that batches). Zero new symbolization code, fully freestanding, - matches how minimal panic handlers work in the wild. +- **L3a — raw print + out-of-process symbolization (shipped — WS4).** + `__kit_print_backtrace()` lives in rt (`rt/lib/stack/print_backtrace.c`), walks + via `__kit_backtrace`, and writes raw lines (`#0 0x401136`, …) to a + host-provided sink (the weak `__kit_backtrace_write(const char*, size_t)` the + host or `_start` wires to `write(2)`; freestanding default is a no-op). + Symbolization is a separate step through the **existing** `kit addr2line` tool + (a thin batching `kit symbolize` remains a possible future convenience). Zero + new symbolization code, fully freestanding, matches how minimal panic handlers + work in the wild. - **L3b — in-process self-symbolization (hosted-only).** A trimmed line/func reader (reusing `kit_dwarf_addr_to_line` + `kit_dwarf_func_at`) linked into a @@ -264,8 +307,11 @@ shipping **L3a now**, leaving L3b/L3c as documented extensions. ### Tests (L3) -- L3a: smoke test piping captured addresses through `kit addr2line`, asserting - the expected function names appear. +- L3a [done]: `test/rt/addr2line.sh` (+ `addr2line_prog.c`) runs a kit-compiled + program that prints its own trace, then pipes the captured addresses through + `kit addr2line -f`, asserting `bt_leaf`/`bt_mid`/`bt_root`/`test_main` appear + (make target `test-rt-backtrace`, aa64/x64/rv64). In-process companion: + `test/rt/cases/print_backtrace.c` parses the emitted `#N 0xADDR` lines. - L3c: an `kit emu` fault test asserting a symbolized frame line on stderr. --- @@ -278,15 +324,21 @@ shipping **L3a now**, leaving L3b/L3c as documented extensions. 3. **WS3 — L2 `__kit_backtrace`** in rt + capture test. ✅ done (assert-hook moved to WS4 — it needs the L3 print fn). 4. **WS4 — L3a** raw print (`__kit_print_backtrace` + weak `__kit_backtrace_write` - sink) + `kit addr2line` round-trip; wire the assert hook. ⏳ remaining (next). -5. **WS5 — L3c** tool-side auto-backtrace (optional, parallelizable). ⏳ remaining. + sink) + `kit addr2line` round-trip; wire the assert hook. ✅ done. +5. **WS5 — L3c** tool-side auto-backtrace (optional, parallelizable). ⏳ remaining (next). 6. **L3b** deferred until a consumer needs in-binary symbolized panics. ## Open questions -- **Output sink for L3a (open):** weak `__kit_backtrace_write` vs. requiring the - host to pass a sink explicitly. Weak-symbol default keeps freestanding builds - linking. Resolve in WS4. +None outstanding. + +Resolved in WS4: + +- ~~**Output sink for L3a:**~~ weak `__kit_backtrace_write` (no-op default) vs. + requiring the host to pass a sink explicitly. **Chose the weak default** — it + keeps freestanding builds linking with no sink, and a host / `_start` + overrides it to route bytes to `write(2)` or a UART. (Resolved while building + WS4.) Resolved while building L1/L2: diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md @@ -80,6 +80,114 @@ Found while building a freestanding Linux backtrace demo (it needed `write`/ `exit`); worked around with inline asm. The toy frontend maps `@syscall` to the same `INTRIN_NONE` and likewise can't lower it (`test/toy/cases/unsupported_syscall`). +## Inline asm: kit rejects machine-specific register constraints (e.g. x86 `"=a"`) + +kit's inline-asm lowering only recognizes the **architecture-neutral** register +classes `r` (general), `f`/`x`/`w` (FP/SIMD), plus `i` (immediate input), `m` +(memory), and matching digits (`0`–`9`). Any GCC *machine* constraint letter — +most commonly x86's `"=a"`/`"a"` (the canonical `syscall`/`cpuid`/`rdtsc` idiom), +but also `b`/`c`/`d`/`S`/`D`/`q`/`Q`, etc. — is rejected with +`KitCg: unsupported asm output constraint` (or `… input constraint`). GCC accepts +`"=a"` on x86; kit does not. + +Root cause is target-independent and lives in the **CG layer**, not a backend: +`api_asm_is_reg_constraint` (`src/cg/asm.c:41`) returns true only for +`r`/`f`/`x`/`w`, and the output/input constraint handlers +(`src/cg/asm.c:195` / `:234`) `compiler_panic` on anything else. Because this is +the front-of-pipe lowering (before any arch backend, before any opt pass), the +rejection is identical on **every arch and every opt level**. Verified matrix +(x86_64/aarch64/riscv64 × O0/O1): `"=a"` output → all 6 fail with the same +message; the register-pinned workaround → all 6 compile. + +Workaround (what the rest of the tree uses, e.g. `test/link/harness/start.c` and +`test/rt/addr2line_prog.c`): pin to a hard register with a GNU local register +variable bound to a plain `r`/`+r` constraint, never the machine letter: + +```c +/* NOT supported: __asm__("syscall" : "=a"(ret) : "a"(n) : ...); */ +register long rax __asm__("rax") = n; /* hard-register pin */ +__asm__ volatile("syscall" : "+r"(rax) : : "rcx", "r11", "memory"); +return rax; /* read result back from rax */ +``` + +The hard-register pin (`AsmConstraint.reg`) rides alongside the `r` operand and +selects the exact register, so this is fully general. A proper fix would map the +common machine constraint letters to their register class + a hard-register pin +(at least x86 `a`/`b`/`c`/`d`/`S`/`D` → `r`+pin) so stock GCC/Clang syscall and +cpuid snippets compile unchanged. Found writing the freestanding backtrace +round-trip program (`test/rt/addr2line_prog.c`), whose x86-64 `write` syscall +first used `"=a"` and had to be rewritten to the register-pinned form +(doc/plan/BACKTRACE.md, WS4). + +## x86-64 inline asm: `-g -O1` + a 4-operand register idiom → `too many memory asm operands` (compiler abort) + +A register-pinned inline-asm syscall (4 operands: `rax`/`rdi`/`rsi`/`rdx` via GNU +local register variables + an `"r"`/`"+r"` constraint, the only syscall idiom kit +accepts — see the entry above) aborts the compiler **only on x86-64 at `-O1` with +`-g`**. The bracket is exact: `-g -O0` OK, `-O1` (no `-g`) OK, `-O0` OK, and +aarch64/riscv64 compile it fine at `-g -O1`; only x64 + O1 + -g fails. The +message is `fatal: x64 inline asm: too many memory asm operands`. + +Minimal repro (`kit cc -target x86_64-linux-gnu -g -O1 -c`): + +```c +static long w(int fd, const char* b, unsigned long n) { + register long rax __asm__("rax") = 1, rdi __asm__("rdi") = fd; + register long rsi __asm__("rsi") = (long)b, rdx __asm__("rdx") = (long)n; + __asm__ volatile("syscall" : "+r"(rax) : "r"(rdi), "r"(rsi), "r"(rdx) + : "rcx", "r11", "memory"); + return rax; +} +``` + +Root cause: the x64 asm lowering stages a *memory-resident* `"r"` operand into a +scratch register before the asm, but the scratch pool is only **two** registers +(`X64_TMP_INT` / `X64_TMP_INT2`), and `src/arch/x64/native.c:4014` panics on the +third. At `-O1 -g` the four pinned `register long` operands are left stack- +resident at the asm point (the GNU `register asm` hint binds the operand, it does +not pin residency across statements; the `-g` location tracking perturbs the +allocator into spilling), so 3+ need staging and it trips. `-O0` keeps them in +registers, so `ntmp` stays ≤ 2. Fix: when an `"r"` operand carries a hard- +register pin, load it straight into that pinned register instead of a shared +scratch temp (no temp needed at all); failing that, stage through more than two +scratch regs. **Secondary:** the fatal itself does not exit cleanly — under the +ASan host build `compiler_panic`'s `longjmp` (`src/core/core.c:179`) SEGVs, so the +diagnostic becomes a SIGABRT/SEGV instead of a clean `fatal:` exit. Found writing +the WS4 backtrace round-trip (`test/rt/addr2line_prog.c`), whose x86-64 `write` +sink is exactly this idiom; surfaced by sweeping that test at `-O1` per +doc/plan/BACKTRACE.md — left red (`test-rt-backtrace`, `x64/O1` lane). + +## setjmp/longjmp miscompiled at `-O1`: the longjmp'd `setjmp` return value is wrong + +A textbook setjmp/longjmp round-trip returns the right answer at `-O0` but the +wrong one at `-O1` on **all three** native arches (aa64/x64/rv64) — a wrong- +answer miscompile, not a crash. `test/rt/cases/setjmp_runtime.c` exits 42 at O0 +and **1** at O1: the `int rc = setjmp(env)` value observed after the `longjmp` +is not `1`. (`marker` is `volatile`, so the test is well-formed — the bad read is +the `setjmp` result itself, not the local.) + +Minimal repro (`kit cc -O1`, run it): + +```c +#include <setjmp.h> +int test_main(void) { + jmp_buf env; + volatile int marker = 11; + int rc = setjmp(env); /* O1: second return value not observed */ + if (rc == 0) { marker = 31; longjmp(env, 1); } + return (marker == 31 && rc == 1) ? 42 : 1; /* O0: 42, O1: 1 */ +} +``` + +Classic "`setjmp` not modeled as returns-twice": the optimizer treats the call as +returning once, so the SSA value for `rc` (and anything else live across the +`setjmp`) is folded/cached to the first-return value rather than reloaded on the +`longjmp` re-entry. Target-independent (fails on every arch), so the fix is in the +opt/IR layer — mark `setjmp`-family calls returns-twice (force a reload of values +live across them; pin them to memory), as GCC/Clang do. Found sweeping the rt +runtime corpus at O0+O1 for the WS4 backtrace work (doc/plan/BACKTRACE.md); left +red (`test-rt-runtime`, `setjmp_runtime/O1`). + ## `-no-pie` does not produce a non-PIE (ET_EXEC) executable `-no-pie` sets `o->target.pic = KIT_PIC_NONE` (`driver/cmd/cc.c:1185`) but does diff --git a/mk/rt.mk b/mk/rt.mk @@ -232,6 +232,7 @@ RT_BASE_SRCS = \ rt/lib/atomic/atomic_freestanding.c \ rt/lib/cache/clear_cache.c \ rt/lib/stack/backtrace.c \ + rt/lib/stack/print_backtrace.c \ rt/lib/kit/ifunc_init.c RT_COMPILER_SRCS = \ diff --git a/mk/test.mk b/mk/test.mk @@ -106,6 +106,7 @@ TEST_TARGETS = \ test-pp-ok \ test-rt-headers \ test-rt-runtime \ + test-rt-backtrace \ test-link-x64 \ test-rv64-inline \ test-rv64-jit \ @@ -166,6 +167,7 @@ DEFAULT_TEST_TARGETS = \ test-libc \ test-link-x64 \ test-rt-runtime \ + test-rt-backtrace \ test-bounce \ bootstrap \ test-bootstrap-toy @@ -579,6 +581,13 @@ LINK_EXE_RUNNER = build/test/link-exe-runner test-rt-runtime: bin $(RT_RUNTIME_DEPS) $(LINK_EXE_RUNNER) @bash test/rt/run.sh +# L3a backtrace round-trip: run a kit-compiled program that prints its own +# backtrace, then symbolize the captured addresses with `kit addr2line`. Same +# per-arch deps as test-rt-runtime (each arch's linux rt archive + the Path-E +# link runner). See test/rt/addr2line.sh and doc/plan/BACKTRACE.md (L3a). +test-rt-backtrace: bin $(RT_RUNTIME_DEPS) $(LINK_EXE_RUNNER) + @bash test/rt/addr2line.sh + # Test harness binaries shared by test-elf and test-link. # Declared as Make targets (not built by the run.sh scripts) so they pick # up libkit.a changes deterministically. diff --git a/rt/include/kit/backtrace.h b/rt/include/kit/backtrace.h @@ -19,6 +19,8 @@ #ifndef KIT_BACKTRACE_H #define KIT_BACKTRACE_H +#include <stddef.h> /* size_t — a freestanding header */ + /* Fill buf[0..max) with return addresses, innermost first, and return the * number written. The walk starts at __kit_backtrace's own frame, so with * skip == 0 buf[0] is the return address into the direct caller of @@ -34,4 +36,27 @@ * `skip` is treated as 0; non-positive `max` returns 0. */ int __kit_backtrace(void** buf, int max, int skip); +/* Walk the current call stack and emit one raw frame line per return address, + * innermost caller first, to __kit_backtrace_write in the form + * + * #0 0x<hex>\n + * #1 0x<hex>\n + * ... + * + * __kit_print_backtrace's own frame is skipped, so #0 is the return address + * into its direct caller (the site that asked for a trace). The address is the + * unsymbolized return address; pipe it to `kit addr2line -e <image>` to recover + * `func at file:line`. Like __kit_backtrace this is a freestanding frame-pointer + * walk — no libc, no DWARF, no printf — so it is safe to call from a crash or + * panic handler. See doc/plan/BACKTRACE.md (L3a). */ +void __kit_print_backtrace(void); + +/* Output sink for __kit_print_backtrace (and the freestanding assert handler). + * It is a WEAK no-op by default, so a freestanding image that never wires a + * sink still links and runs; a host start file or runtime overrides it to route + * the bytes somewhere visible — typically write(2, buf, len) to stderr, or a + * UART on bare metal. `buf` is not NUL-terminated; exactly `len` bytes are + * valid. */ +void __kit_backtrace_write(const char* buf, size_t len); + #endif /* KIT_BACKTRACE_H */ diff --git a/rt/lib/assert/assert.c b/rt/lib/assert/assert.c @@ -2,13 +2,53 @@ // // SPDX-License-Identifier: 0BSD //===----------------------------------------------------------------------===// +// +// __kit_assert_fail is the freestanding target of a failed assert(). It emits a +// human-readable banner and then a raw backtrace, both through the weak +// __kit_backtrace_write sink (see rt/include/kit/backtrace.h), before trapping. +// When no sink is wired the banner and trace are discarded, so this stays +// link-clean and printf-free in a bare freestanding image; a host that wires +// the sink (typically write(2) to stderr) gets a diagnosable trap. + +#include <kit/backtrace.h> +#include <stddef.h> + +static void bt_emit(const char* s) { + size_t n = 0; + if (!s) return; + while (s[n]) n++; + __kit_backtrace_write(s, n); +} + +static void bt_emit_int(int v) { + char buf[12]; /* "-2147483648" + slack; we pass an explicit length */ + int k = (int)sizeof buf; + int neg = v < 0; + unsigned u = neg ? 0u - (unsigned)v : (unsigned)v; + do { + buf[--k] = (char)('0' + (int)(u % 10u)); + u /= 10u; + } while (u && k > 0); + if (neg && k > 0) buf[--k] = '-'; + __kit_backtrace_write(buf + k, (size_t)((int)sizeof buf - k)); +} __attribute__((weak)) void __kit_assert_fail(const char* expr, const char* file, int line, const char* func) { - (void)expr; - (void)file; - (void)line; - (void)func; + bt_emit("kit: assertion failed: "); + bt_emit(expr ? expr : "(unknown)"); + if (file) { + bt_emit(", file "); + bt_emit(file); + } + bt_emit(", line "); + bt_emit_int(line); + if (func) { + bt_emit(", function "); + bt_emit(func); + } + bt_emit("\n"); + __kit_print_backtrace(); __builtin_trap(); for (;;) { } diff --git a/rt/lib/stack/print_backtrace.c b/rt/lib/stack/print_backtrace.c @@ -0,0 +1,71 @@ +/* + * __kit_print_backtrace -- raw (unsymbolized) backtrace print, built on the + * freestanding __kit_backtrace frame-pointer walk. See doc/plan/BACKTRACE.md + * (L3a) and rt/include/kit/backtrace.h for the contract. + * + * Each captured return address is written as a "#<n> 0x<hex>\n" line to the + * weak __kit_backtrace_write sink. Symbolization is deliberately out of + * process: pipe the addresses to `kit addr2line -e <image>`. The integer + * formatting is done by hand so the panic path drags in no printf/libc and + * stays usable from a crash handler. + */ +#include <kit/backtrace.h> +#include <stdint.h> + +/* Weak default sink: discard. A host start file / runtime overrides this to + * route the bytes to write(2), a serial port, etc. Keeping it weak means a + * freestanding image that never wires a sink still links and runs. */ +__attribute__((weak)) void __kit_backtrace_write(const char* buf, size_t len) { + (void)buf; + (void)len; +} + +#define KIT_BT_PRINT_MAX 64 + +/* Format "#<idx> 0x<hex>\n" into `out` and return the byte count. `out` must + * hold the longest line: '#' + up to 10 decimal digits (u32 idx) + " 0x" + up + * to 16 hex digits (64-bit pointer) + '\n' == 31 bytes; the caller's line[48] + * is comfortably large. The hex address uses uintptr_t so it is not truncated + * on an LLP64 target, where `unsigned long` would be too narrow. */ +static int bt_format_frame(char* out, unsigned idx, void* addr) { + static const char hexd[] = "0123456789abcdef"; + char digits[10]; + uintptr_t a = (uintptr_t)addr; + int started = 0, shift, k = 0, j = 0; + + out[k++] = '#'; + do { + digits[j++] = (char)('0' + (int)(idx % 10u)); + idx /= 10u; + } while (idx); + while (j > 0) out[k++] = digits[--j]; + + out[k++] = ' '; + out[k++] = '0'; + out[k++] = 'x'; + /* Most-significant nibble first; suppress leading zeros but always emit at + * least the final nibble so an address of 0 still prints as "0x0". */ + for (shift = (int)(sizeof(uintptr_t) * 8) - 4; shift >= 0; shift -= 4) { + unsigned nib = (unsigned)((a >> shift) & 0xfu); + if (nib != 0u || started || shift == 0) { + out[k++] = hexd[nib]; + started = 1; + } + } + out[k++] = '\n'; + return k; +} + +void __kit_print_backtrace(void) { + void* frames[KIT_BT_PRINT_MAX]; + char line[48]; + int n, i; + + /* skip == 1 hides __kit_print_backtrace's own frame, so frame #0 is the + * return address into its direct caller. */ + n = __kit_backtrace(frames, KIT_BT_PRINT_MAX, 1); + for (i = 0; i < n; i++) { + int len = bt_format_frame(line, (unsigned)i, frames[i]); + __kit_backtrace_write(line, (size_t)len); + } +} diff --git a/test/rt/addr2line.sh b/test/rt/addr2line.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +# test/rt/addr2line.sh — L3a backtrace round-trip. +# +# Compiles test/rt/addr2line_prog.c (which prints its own backtrace via +# __kit_print_backtrace), links a static non-PIE ELF, runs it to capture the +# raw "#N 0xADDR" lines, then feeds those addresses to `kit addr2line -f` and +# asserts the expected function names (bt_leaf / bt_mid / bt_root / test_main) +# appear — proving the freestanding capture/print path emits addresses the +# hosted DWARF tools resolve. See doc/plan/BACKTRACE.md (L3a). +# +# The per-arch wiring mirrors test/rt/run.sh's lane R: each arch maps to an +# <arch>-linux exec tuple, the matching build/rt/<triple>/libkit_rt.a, a clang +# freestanding start.o, a link via link-exe-runner, and an exec via +# exec_target. Because the image is a static non-PIE ELF, the return addresses +# printed at run time equal its link-time addresses, so they pipe straight to +# addr2line. Anything missing (rt archive, exec runner, clang start.o) is a +# SKIP, not a failure. Set KIT_TEST_ALLOW_SKIP=1 to exit 0 with skips. + +set -u + +ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +PROG_SRC="$ROOT/test/rt/addr2line_prog.c" +BUILD_DIR="$ROOT/build/test/rt-addr2line" +KIT="$ROOT/build/kit" +LINK_EXE_RUNNER="$ROOT/build/test/link-exe-runner" +START_SRC="$ROOT/test/link/harness/start.c" + +export KIT_KIT_DIR="$ROOT/test/lib" +# shellcheck source=../lib/kit_sh_kit.sh +. "$ROOT/test/lib/kit_sh_kit.sh" +kit_report_init +[ "${KIT_TEST_ALLOW_SKIP:-0}" = 1 ] || KIT_SKIP_IS_FAILURE=1 + +mkdir -p "$BUILD_DIR" + +if [ ! -x "$KIT" ]; then + skip_test "kit" "kit driver missing at $KIT -- run \`make bin\` first" + kit_summary test-rt-addr2line + kit_exit +fi +if [ ! -x "$LINK_EXE_RUNNER" ]; then + skip_test "link-exe-runner" "missing at $LINK_EXE_RUNNER -- run \`make test-rt-runtime\`" + kit_summary test-rt-addr2line + kit_exit +fi + +# exec_target wiring (same host-detection knobs test/rt/run.sh exports). +have_qemu=0 +QEMU_BIN="$(command -v qemu-aarch64-static 2>/dev/null || command -v qemu-aarch64 2>/dev/null || true)" +[ -n "$QEMU_BIN" ] && have_qemu=1 +have_podman=0 +command -v podman >/dev/null 2>&1 && have_podman=1 +arch_raw="$(uname -m 2>/dev/null || true)" +is_aarch64=0 +if [ "$(uname -s 2>/dev/null)" = "Linux" ]; then + { [ "$arch_raw" = "aarch64" ] || [ "$arch_raw" = "arm64" ]; } && is_aarch64=1 +fi +export have_qemu QEMU_BIN have_podman is_aarch64 +EXEC_TARGET_MOUNT_ROOT="$BUILD_DIR" +export EXEC_TARGET_MOUNT_ROOT +# shellcheck source=../lib/exec_target.sh +. "$ROOT/test/lib/exec_target.sh" + +arch_triple() { + case "$1" in + aa64) echo "aarch64-linux-gnu" ;; + x64) echo "x86_64-linux-gnu" ;; + rv64) echo "riscv64-linux-gnu" ;; + *) return 1 ;; + esac +} +rt_archive() { + case "$1" in + aa64) echo "$ROOT/build/rt/aarch64-linux/libkit_rt.a" ;; + x64) echo "$ROOT/build/rt/x86_64-linux/libkit_rt.a" ;; + rv64) echo "$ROOT/build/rt/riscv64-linux/libkit_rt.a" ;; + *) return 1 ;; + esac +} +clang_extra_flags() { + case "$1" in + rv64) echo "-march=rv64gc" ;; + *) echo "" ;; + esac +} + +# Functions the backtrace must symbolize to, innermost first. _start may show +# as ?? (start.o is built without -g), so it is not required. +WANT_FUNCS="bt_leaf bt_mid bt_root test_main" + +run_one() { # <arch> <opt> + local arch="$1" opt="$2" name="$1/O$2 round-trip" + local triple rtlib extra work obj exe start_obj + triple="$(arch_triple "$arch")" || { not_ok "$name" "unknown arch"; return; } + rtlib="$(rt_archive "$arch")" + extra="$(clang_extra_flags "$arch")" + work="$BUILD_DIR/$arch/O$opt" + mkdir -p "$work" + obj="$work/prog.o"; exe="$work/prog.exe"; start_obj="$work/start.o" + + if [ ! -f "$rtlib" ]; then + skip_test "$name" "runtime archive missing at $rtlib"; return + fi + if ! exec_target_supported "$arch"; then + skip_test "$name" "no execution runner"; return + fi + if ! clang --target="$triple" $extra -O1 -ffreestanding -fno-stack-protector \ + -fno-PIC -fno-pie -c "$START_SRC" -o "$start_obj" \ + >"$work/start.out" 2>"$work/start.err"; then + skip_test "$name" "clang cannot build start.o for $triple"; return + fi + + # -g so addr2line has DWARF; static non-PIE so runtime addr == link addr. + # The chain is @[.noinline] + non-tail, so the frames survive at O1 too. + if ! "$KIT" cc -target "$triple" -g -O"$opt" -Werror -c "$PROG_SRC" -o "$obj" \ + >"$work/cc.out" 2>"$work/cc.err"; then + not_ok "$name" "$work/cc.err"; return + fi + if ! KIT_TEST_ARCH="$arch" "$LINK_EXE_RUNNER" -o "$exe" "$obj" "$start_obj" \ + --archive "$rtlib" >"$work/link.out" 2>"$work/link.err"; then + not_ok "$name" "$work/link.err"; return + fi + + exec_target_run "$arch" "$exe" "$work/run.out" "$work/run.err" + if [ "$RUN_RC" -ne 42 ]; then + printf 'expected exit 42, got %s\n' "$RUN_RC" > "$work/run.diag" + cat "$work/run.err" >> "$work/run.diag" 2>/dev/null + not_ok "$name" "$work/run.diag"; return + fi + + # Captured backtrace lines -> addresses -> addr2line -f. + local addrs + addrs="$(grep -oE '0x[0-9a-fA-F]+' "$work/run.out" 2>/dev/null | tr '\n' ' ')" + if [ -z "$addrs" ]; then + printf 'no "#N 0xADDR" lines captured; run.out was:\n' > "$work/sym.diag" + cat "$work/run.out" >> "$work/sym.diag" 2>/dev/null + not_ok "$name" "$work/sym.diag"; return + fi + # shellcheck disable=SC2086 + "$KIT" addr2line -f -e "$exe" $addrs >"$work/sym.out" 2>"$work/sym.err" + + local fn missing="" + for fn in $WANT_FUNCS; do + grep -qw "$fn" "$work/sym.out" || missing="$missing $fn" + done + if [ -n "$missing" ]; then + printf 'addr2line missing function(s):%s\naddresses: %s\nsymbolized:\n' \ + "$missing" "$addrs" > "$work/sym.diag" + cat "$work/sym.out" >> "$work/sym.diag" 2>/dev/null + not_ok "$name" "$work/sym.diag"; return + fi + ok "$name" +} + +for arch in ${KIT_RT_RUNTIME_ARCHES:-aa64 x64 rv64}; do + case "$arch" in + aa64|x64|rv64) + for opt in ${KIT_RT_OPT_LEVELS:-0 1}; do run_one "$arch" "$opt"; done ;; + *) not_ok "$arch" "unknown arch" ;; + esac +done + +kit_summary test-rt-addr2line +kit_exit diff --git a/test/rt/addr2line_prog.c b/test/rt/addr2line_prog.c @@ -0,0 +1,56 @@ +/* L3a backtrace round-trip program (driven by test/rt/addr2line.sh). + * + * Wires the weak __kit_backtrace_write sink to a freestanding write(2) and + * calls __kit_print_backtrace() from the bottom of a named, @[.noinline] call + * chain. The emitted "#N 0xADDR" lines go to stdout; the harness pipes the + * addresses to `kit addr2line -f` and checks that bt_leaf / bt_mid / bt_root / + * test_main appear. Built as a static non-PIE ELF, so the return addresses + * printed at run time equal the link-time addresses addr2line resolves. + * + * Linux-only (the rt runtime test tuples are all *-linux); the syscall numbers + * are the generic-unified-ABI write(2) for each arch (x86-64 uses its own). */ +#include <kit/backtrace.h> + +static long bt_write(int fd, const char* buf, unsigned long len) { +#if defined(__aarch64__) + register long x8 __asm__("x8") = 64; /* sys_write */ + register long x0 __asm__("x0") = fd; + register long x1 __asm__("x1") = (long)buf; + register long x2 __asm__("x2") = (long)len; + __asm__ volatile("svc #0" : "+r"(x0) : "r"(x8), "r"(x1), "r"(x2) : "memory"); + return x0; +#elif defined(__x86_64__) + register long rax __asm__("rax") = 1; /* sys_write */ + register long rdi __asm__("rdi") = fd; + register long rsi __asm__("rsi") = (long)buf; + register long rdx __asm__("rdx") = (long)len; + __asm__ volatile("syscall" + : "+r"(rax) + : "r"(rdi), "r"(rsi), "r"(rdx) + : "rcx", "r11", "memory"); + return rax; +#elif defined(__riscv) && __riscv_xlen == 64 + register long a7 __asm__("a7") = 64; /* sys_write */ + register long a0 __asm__("a0") = fd; + register long a1 __asm__("a1") = (long)buf; + register long a2 __asm__("a2") = (long)len; + __asm__ volatile("ecall" : "+r"(a0) : "r"(a7), "r"(a1), "r"(a2) : "memory"); + return a0; +#else +#error "addr2line_prog.c: unsupported architecture" +#endif +} + +/* Override the weak rt default: route the backtrace bytes to stdout. */ +void __kit_backtrace_write(const char* buf, size_t len) { + bt_write(1, buf, (unsigned long)len); +} + +__attribute__((noinline)) int bt_leaf(void) { + __kit_print_backtrace(); + return 1; +} +__attribute__((noinline)) int bt_mid(void) { return bt_leaf() + 1; } +__attribute__((noinline)) int bt_root(void) { return bt_mid() + 1; } + +int test_main(void) { return bt_root() == 3 ? 42 : 1; } diff --git a/test/rt/cases/print_backtrace.c b/test/rt/cases/print_backtrace.c @@ -0,0 +1,95 @@ +/* __kit_print_backtrace format test (the in-process half of the L3a round-trip). + * + * Overrides the weak __kit_backtrace_write sink to capture the emitted bytes, + * calls __kit_print_backtrace() from the bottom of a known @[.noinline] + * recursion, then parses the captured "#N 0xADDR" lines in process. Asserts: + * the sink was called and ends on a line boundary; every line is "#<i> 0x<hex>" + * with a sequential index and a non-zero address; and the recursive frames + * share a return address (so the printed chain really followed the frame + * links). The address text is exactly what `kit addr2line -e <image>` consumes, + * so a clean parse here is the in-process complement of test/rt/addr2line.sh. + * Exits 42. Runs under test/rt/run.sh across the aa64/x64/rv64 tuples. */ +#include <kit/backtrace.h> + +#define DEPTH 6 +#define CAP 1024 +#define MAXLINES 128 + +static char g_buf[CAP]; +static int g_len; + +/* Override the weak rt default: capture the raw bytes instead of discarding. + * The signature must match the header's (size_t) under -Werror. */ +void __kit_backtrace_write(const char* buf, size_t len) { + size_t i; + for (i = 0; i < len && g_len < CAP; i++) g_buf[g_len++] = buf[i]; +} + +/* Non-tail recursion (work after the call) so every level keeps a live frame, + * and noinline so the chain survives if the harness opt level rises. */ +__attribute__((noinline)) static int recurse(int n) { + if (n > 0) { + int r = recurse(n - 1); + return r + 1; + } + __kit_print_backtrace(); + return 0; +} + +int test_main(void) { + int idx[MAXLINES]; + unsigned long addr[MAXLINES]; + int cnt = 0; + int i = 0; + + recurse(DEPTH); + + if (g_len <= 0) return 1; /* the sink must have been called */ + if (g_buf[g_len - 1] != '\n') return 2; /* every line is terminated */ + + /* Parse each "#<dec> 0x<hex>\n" line strictly, recording index and address. */ + while (i < g_len) { + int v = 0; + unsigned long a = 0; + if (g_buf[i] != '#') return 3; + i++; + if (i >= g_len || g_buf[i] < '0' || g_buf[i] > '9') return 4; + while (i < g_len && g_buf[i] >= '0' && g_buf[i] <= '9') + v = v * 10 + (g_buf[i++] - '0'); + if (i + 2 >= g_len || g_buf[i] != ' ' || g_buf[i + 1] != '0' || + g_buf[i + 2] != 'x') + return 5; + i += 3; + if (i >= g_len || g_buf[i] == '\n') return 6; /* need >= 1 hex digit */ + while (i < g_len && g_buf[i] != '\n') { + char c = g_buf[i]; + int h; + if (c >= '0' && c <= '9') + h = c - '0'; + else if (c >= 'a' && c <= 'f') + h = c - 'a' + 10; + else + return 7; + a = (a << 4) | (unsigned long)h; + i++; + } + if (i >= g_len || g_buf[i] != '\n') return 8; + i++; /* consume the newline */ + if (cnt >= MAXLINES) break; + idx[cnt] = v; + addr[cnt] = a; + cnt++; + } + + /* The chain holds at least the recurse() frames plus test_main. */ + if (cnt < DEPTH + 1) return 9; + for (i = 0; i < cnt; i++) { + if (idx[i] != i) return 10; /* frame numbers are sequential from 0 */ + if (addr[i] == 0) return 11; /* a real frame never has a null retaddr */ + } + /* #1..#DEPTH all return to the single recursive call site, so consecutive + * recursive frames share a return address. (#0 is the print call site.) */ + if (addr[1] != addr[2]) return 12; + + return 42; +} diff --git a/test/rt/run.sh b/test/rt/run.sh @@ -113,9 +113,15 @@ kit_lane_R() { kit_skip "$name" "clang cannot build start.o for $triple"; return fi + # Opt axis: KIT_OPT is the corpus opt level ("0"/"1"/…); "-" is the + # no-axis sentinel. Pass -O<level> only for a real level so the FP-chain + # walk is exercised against both unoptimized and optimized callers. + local optflag="" + case "$KIT_OPT" in 0|1|2|3|s|z) optflag="-O$KIT_OPT" ;; esac + local obj="$KIT_WORK/$KIT_BASE.o" local exe="$KIT_WORK/$KIT_BASE.exe" - if ! "$KIT" cc -target "$triple" -Werror -c "$KIT_SRC" -o "$obj" \ + if ! "$KIT" cc -target "$triple" $optflag -Werror -c "$KIT_SRC" -o "$obj" \ >"$KIT_WORK/cc.out" 2>"$KIT_WORK/cc.err"; then kit_fail "$name" "compile (see $KIT_WORK/cc.err)"; return fi @@ -144,9 +150,13 @@ for arch in $ARCHES; do esac done +# Opt axis: sweep O0 and O1 so the runtime cases (notably the FP-chain +# backtrace walk) are exercised against both unoptimized and optimized +# callers. Override with KIT_RT_OPT_LEVELS (e.g. "0" for a faster smoke). KIT_LABEL=test-rt-runtime KIT_BUILD_DIR="$BUILD_DIR" \ KIT_CORPUS_GLOBS="$CASES_DIR/*.c" KIT_CORPUS_EXT=c KIT_SIDECAR_DIR="$CASES_DIR" \ - KIT_LANES="R" KIT_OPT_LEVELS="" KIT_TUPLES="$TUPLES" KIT_TARGETS_EXT="" \ + KIT_LANES="R" KIT_OPT_LEVELS="${KIT_RT_OPT_LEVELS:-0 1}" KIT_TUPLES="$TUPLES" \ + KIT_TARGETS_EXT="" \ KIT_PARALLELIZABLE="${KIT_RT_PARALLEL:-1}" kit_corpus_run kit_summary test-rt-runtime diff --git a/test/rt/smoke.c b/test/rt/smoke.c @@ -28,6 +28,7 @@ #include <float.h> #include <iso646.h> +#include <kit/backtrace.h> #include <kit/coro.h> #include <limits.h> #include <setjmp.h> @@ -212,11 +213,21 @@ static int kit_atomic_ok(void) { return 1; } +/* kit/backtrace: the capture/print surface compiles and resolves. Compile-only + -- smoke.c never links against a libkit_rt, so the actual walk never runs. */ +static int kit_backtrace_compiles(void) { + void* frames[8]; + int n = __kit_backtrace(frames, 8, 1); + __kit_print_backtrace(); + return n; +} + /* Reference everything so -Wunused-* stays quiet. */ int kit_smoke_ok(void) { (void)aligned_buf; if (0) kit_trap(); if (0) (void)kit_setjmp_compiles(0); if (0) (void)kit_coro_compiles(); + if (0) (void)kit_backtrace_compiles(); return sum_n(3, 1, 2, 3) == 6 && kit_atomic_ok(); }