boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 4cd7ae1656d42c902f4e371502ea984b539bd9cf
parent 01a04a7e5bb40b5987cfb63291b193535e09d7b6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 29 Apr 2026 18:46:17 -0700

p1pp: tracepoint machinery

Diffstat:
MP1/P1pp.P1pp | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mcc/cc.scm | 38++++++++++++++++++++++++++++++++++----
Adocs/DEBUG.md | 160+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdocs/TCC-TODO.md | 44+++-----------------------------------------
Mscripts/m1-symbols.py | 112++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
5 files changed, 396 insertions(+), 51 deletions(-)

diff --git a/P1/P1pp.P1pp b/P1/P1pp.P1pp @@ -1436,6 +1436,99 @@ }) # ========================================================================= +# Tracepoint +# ========================================================================= +# +# %trace(tag_addr, tag_len) — emit a runtime stderr probe at the call +# site. Prints `[trace @0xHEX TAG]\n` to stderr, where 0xHEX is the +# runtime address of the instruction immediately following the trace's +# call sequence (the address of `:@here` in this site's expansion) and +# TAG is the byte string at [tag_addr..tag_addr+tag_len). +# +# `tag_addr` is a label reference token (e.g. `&cc__str_3`) — the +# caller is responsible for emitting the bytes at that label. cc.scm's +# --cc-trace-emit interns the mangled function name through the +# regular string pool, which already pads each entry to an 8-byte +# multiple, so the next item past the tag stays aligned. `tag_len` is +# the *logical* byte count to print (without trailing NUL or pad). +# +# To map a printed address back to source, disassemble the ELF +# (`scripts/disasm-elf.sh`) and locate the printed address. cc.scm +# guarantees that each function's first instruction *is* a trace call, +# so the printed address falls on a known function-entry boundary. +# +# Clobbers: a0..a2, ra, t0..t2 (per %call ABI). Use only inside a %fn +# body where the caller has already spilled live argument regs (or +# doesn't need them past the trace point). +%macro trace(tag_addr, tag_len) + :@here + %la(a0, &@here) + %la(a1, tag_addr) + %li(a2, tag_len) + %call(&libp1pp__trace) +%endm + +# libp1pp__trace(addr=a0, tag_addr=a1, tag_len=a2) — print +# "[trace @0xHEX TAG]\n" to stderr. +%fn(libp1pp__trace, 32, { + %st(s0, sp, 0) + %st(s1, sp, 8) + %st(s2, sp, 16) + %st(s3, sp, 24) + %mov(s0, a0) + %mov(s1, a1) + %mov(s2, a2) + + %la(a0, &libp1pp__trace_pre) + %li(a1, 8) + %call(&eprint) + + %la(a0, &libp1pp__num_buf) + %mov(a1, s0) + %call(&fmt_hex) + %mov(s3, a0) + %la(a0, &libp1pp__num_buf) + %mov(a1, s3) + %call(&eprint) + + %la(a0, &libp1pp__trace_sep) + %li(a1, 1) + %call(&eprint) + + %mov(a0, s1) + %mov(a1, s2) + %call(&eprint) + + %la(a0, &libp1pp__trace_post) + %li(a1, 2) + %call(&eprint) + + %ld(s0, sp, 0) + %ld(s1, sp, 8) + %ld(s2, sp, 16) + %ld(s3, sp, 24) +}) + +# Tracepoint message fragments. M0 implicitly NUL-terminates every +# `"..."` literal, so each blob is (visible bytes + 1 NUL) + hex +# padding to land on a 16- or 8-byte boundary, keeping the next +# file-scope label 8-aligned (aarch64 LDR / 4-byte inline-data loads +# fault otherwise. +# eprint reads only the leading visible-byte count (8, 1, 2); the +# trailing NUL + hex zeros are pad. +# +# trace_pre = "[trace @" 8 visible + 1 NUL + 7 pad = 16 bytes +# trace_sep = " " 1 visible + 1 NUL + 6 pad = 8 bytes +# trace_post = ']' '\n' 2 visible 6 pad = 8 bytes +# +# trace_post avoids `"]"` because that would inject a NUL between +# the `]` and the `\n` we need adjacent — raw hex bytes give us +# exact placement. +:libp1pp__trace_pre "[trace @" '00000000000000' +:libp1pp__trace_sep " " '000000000000' +:libp1pp__trace_post '5d' '0a' '000000000000' + +# ========================================================================= # Internal data # ========================================================================= diff --git a/cc/cc.scm b/cc/cc.scm @@ -240,6 +240,17 @@ (define (debug-log-off!) (set! %debug-log-enabled #f)) (define (debug-log? ) %debug-log-enabled) +;; --cc-trace-emit: if on, cg-fn-end injects a `%trace(MANGLED)` line +;; at the top of each emitted function body (right after the prologue's +;; argument-spill, so the macro is free to clobber a0..a3). Pairs with +;; libp1pp's %trace macro + libp1pp__trace runtime helper to produce a +;; stderr line per function entry, with the runtime address of the +;; first body instruction. See P1/P1pp.P1pp's "Tracepoint" section. +(define %trace-emit-enabled #f) +(define (trace-emit-on!) (set! %trace-emit-enabled #t)) +(define (trace-emit-off!) (set! %trace-emit-enabled #f)) +(define (trace-emit?) %trace-emit-enabled) + (define (debug-log msg . irritants) (cond (%debug-log-enabled @@ -3019,6 +3030,21 @@ (buf-push! tb ", {\n") ;; prologue + body, drained byte-for-byte (buf-drain! tb (cg-prologue-buf cg)) + ;; --cc-trace-emit: emit `%trace(&LBL, LEN)` between prologue (which + ;; spilled live argument regs to slots) and body, so the macro can + ;; freely clobber a0..a2. The mangled name rides through the + ;; regular string pool — cg-intern-string emits it with a trailing + ;; NUL and pads to 8-byte alignment, so the next data label stays + ;; aligned. We pass the *logical* byte length (no NUL) so the + ;; runtime print stops at the actual end of the name. + (cond + ((trace-emit?) + (let ((tag-lbl (cg-intern-string cg mangled))) + (buf-push! tb "%trace(&") + (buf-push! tb tag-lbl) + (buf-push! tb ", ") + (buf-push! tb (%n (bytevector-length mangled))) + (buf-push! tb ")\n")))) (buf-drain! tb (cg-fn-buf cg)) ;; ret block: ≤8B → a0; 9–16B → a0+a1; >16B sret → a0 = saved sret ptr. (buf-push! tb "::ret\n") @@ -6652,7 +6678,8 @@ ;; cc/main.scm — driver. Argv, file I/O, ties phases together. ;; -------------------------------------------------------------------- -;; CLI: cc [--cc-debug] <input.c> <output.P1pp> +;; CLI: cc [--cc-debug] [--cc-trace-emit] [--lib=PFX] +;; <input.c> <output.P1pp> ;; ;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1] ;; is the catm'd compiler source path, argv[2..] are the user-facing @@ -6717,12 +6744,15 @@ (define (cc-main av) (let* ((raw (cdr (cdr av))) (dbg (%cc-flag? raw "--cc-debug")) - (a1 (%cc-strip-flag raw "--cc-debug"))) + (a1 (%cc-strip-flag raw "--cc-debug")) + (tr (%cc-flag? a1 "--cc-trace-emit")) + (a2 (%cc-strip-flag a1 "--cc-trace-emit"))) (cond (dbg (debug-log-on!))) - (let-values (((lib-prefix args) (%cc-take-lib a1))) + (cond (tr (trace-emit-on!))) + (let-values (((lib-prefix args) (%cc-take-lib a2))) (cond ((or (null? args) (null? (cdr args))) - (die #f "usage: cc [--cc-debug] [--lib=PFX] <input.c> <output.P1pp>"))) + (die #f "usage: cc [--cc-debug] [--cc-trace-emit] [--lib=PFX] <input.c> <output.P1pp>"))) (let* ((in-path (car args)) (out-path (car (cdr args))) (lib? (cond ((= 0 (bytevector-length lib-prefix)) #f) diff --git a/docs/DEBUG.md b/docs/DEBUG.md @@ -0,0 +1,160 @@ +# DEBUG + +Debugging facilities for the cc / P1pp / M1 / hex2 pipeline. Each +section: what the tool does, how to turn it on, what you get back. + +## Tracepoints (`%trace` / `--cc-trace-emit`) + +Per-function-entry stderr probe. With `CC_TRACE_EMIT=1` set, cc.scm +injects a `%trace(&LBL, LEN)` between each function's prologue and +body. At runtime each entry prints one line: + +``` +[trace @601a34 main] +``` + +The hex is the runtime address of the instruction immediately after +the trace's call sequence (i.e. the first instruction of the body +proper). The trailing word is the mangled function name, interned +through cc's regular string pool. + +Build + run: + +```sh +CC_TRACE_EMIT=1 sh scripts/run-tests.sh --suite cc --arch aarch64 007-call-with-args +# stderr: +# [trace @6019fc main] +# [trace @6018fc g] +``` + +For a built ELF outside the test runner: + +```sh +make tcc-boot2 ARCH=aarch64 CC_TRACE_EMIT=1 +./build/aarch64/tcc-boot2/tcc-boot2 -version 2>trace.log +``` + +Cost: ~6 instructions + one call per traced function. Off by default; +the `%trace` macro itself lives in [P1/P1pp.P1pp](../P1/P1pp.P1pp) +(§Tracepoint) and can also be invoked manually — drop a +`%trace(&label, len)` into any `combined.M1pp` snapshot under +`build/$ARCH/.work/<src>/`, re-run the m1pp/M0/hex2 stages, and bisect +by stderr position. + +To map an address back to its function, see the lookup tool below. + +## Address → label lookup (`m1-symbols.py lookup`) + +Resolves a runtime address (e.g. from a `%trace` line) to its +enclosing function. Reads the label map straight out of `prog.hex2` +and finds the largest label address `<= target`, skipping M1pp's +mangled macro-locals (`:@name` → `:name__N`) so a trace address +resolves to the *function* containing it, not the trace's own +`:@here`. + +```sh +# Pass the ELF; the tool reads <ELF>.workdir to find prog.hex2. +scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/007-call-with-args 0x6019fc 0x6018fc +# 0x6019fc main+0x24 +# 0x6018fc g+0x2c + +# Pipe the trace log through it. +./build/aarch64/tests/cc/007-call-with-args 2>&1 \ + | grep -oE '@[0-9a-f]+' | tr -d @ \ + | scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/007-call-with-args +``` + +Other input modes: `--hex2 <prog.hex2>` (skip the sidecar lookup) or +`--map <file>` (use a pre-built map from `m1-symbols.py map`). Pass +`--include-macro-locals` to see the closest label even when it's a +`name__N` artifact — useful when you want to know which trace site +fired vs. which function it sits in. + +Output is `0xADDR\tLABEL+0xN`, tab-separated, one per line. + +## Disassembly (`disasm-elf.sh`) + +llvm-objdump wrapper that handles two quirks of our seed ELF: oversized +ph_memsz (truncated to ph_filesz on a temp copy) and the absent section +table (replaced by labels injected from `prog.hex2`). The output has +real `<funcname>:` headers and `<PT_LOAD#0+0xNNN>` xrefs rewritten to +`<label+offset>`: + +```sh +scripts/disasm-elf.sh build/aarch64/tests/cc/007-call-with-args +# 0000000000601a34 <main>: +# 601a34: ... +# 601a40: ldr w17, ... <libp1pp__trace+0x0> +``` + +Defaults to `-d` (text). Pass `-D` for data + text. `--start-address` +defaults to `e_entry` (skipping the on-disk ELF header bytes); override +with your own `--start-address=` to see the header. The +`<elf>.workdir` sidecar must exist for label annotation; it's written +automatically by `boot-build-p1*.sh`. Set `NO_LABELS=1` to disable +annotation. + +## cc.scm phase tracing (`CC_DEBUG=1`) + +cc.scm has a sticky `(debug-log ...)` channel for between-phase heap +usage. Toggle with `CC_DEBUG=1` (boot-build-cc.sh) or `--cc-debug` +(direct invocation). One stderr line per phase: + +```sh +CC_DEBUG=1 sh scripts/run-tests.sh --suite cc --arch aarch64 007-call-with-args +# [cc] lex ... heap=... +# [cc] pp ... heap=... +# ... +``` + +Most useful when bisecting an OOM or watching where parse memory +balloons. Independent of `CC_TRACE_EMIT` — combine freely. + +## Pipeline intermediates (`build/$ARCH/.work/`) + +Every P1pp/P1 build leaves its intermediates next to the ELF: + +``` +build/$ARCH/.work/<src-path>/ + combined.M1pp # backend + frontend + libp1pp + user TU, catm'd + expanded.M1 # m1pp output, ready for M0 + prog.hex2 # M0 output (hex2 with labels) + linked.hex2 # ELF header + prog.hex2 + cc.log # cc.scm stderr (if --cc-debug or trace-emit) + p1pp.log # m1pp/M0/hex2 stderr +``` + +Each ELF also gets a one-line `<elf>.workdir` sidecar pointing at this +directory — that's how `disasm-elf.sh` and `m1-symbols.py lookup +--elf` find `prog.hex2`. On a failing build the runner prints the +partial-intermediates path; on a passing build the files stay around +for inspection. + +To re-run a single intermediate stage by hand: edit the file in place +and invoke the next tool directly (`build/$ARCH/tools/M0 +prog.hex2.M0`, etc.). Useful for poking `%trace` calls into +`combined.M1pp` without recompiling cc.scm. + +## End-to-end debugging recipe + +A typical "binary segfaults, where?" loop: + +```sh +# 1. Build with traces. +CC_TRACE_EMIT=1 make build/aarch64/tests/cc/myprog ARCH=aarch64 + +# 2. Run; capture trace. +./build/aarch64/tests/cc/myprog 2>trace.log; echo "exit=$?" + +# 3. Last trace line shows the last function entered before the crash. +tail -1 trace.log +# [trace @601b40 parse_decl] + +# 4. (Optional) Resolve all addresses to functions for context. +grep -oE '@[0-9a-f]+' trace.log | tr -d @ \ + | scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/myprog + +# 5. Inspect the disassembly around the crash site. +scripts/disasm-elf.sh build/aarch64/tests/cc/myprog \ + | grep -B 2 -A 20 '<parse_decl>:' +``` diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md @@ -279,47 +279,9 @@ decl complete with parse heap at ~31 MB on the 1612-line cut. ## Tracepoint instrumentation (`%trace` / `--cc-trace-emit`) -For debugging a built ELF that segfaults at startup, the cheapest -"how far did we get?" probe is the libp1pp `%trace(tag)` macro -(defined in [P1/P1pp.P1pp](../P1/P1pp.P1pp), §Tracepoint). At each -call site it emits one stderr line of the form: - -``` -[trace @0x000000000060013c] -``` - -where the address is the runtime location of the instruction -immediately following the trace's call sequence. To map an address -back to a function: disassemble (`scripts/disasm-elf.sh <elf>`) and -locate the line at that PC — the surrounding `:scope__*` label and -the `%fn(...)` it sits in identify the source. - -The `tag` argument is purely source-level documentation (`grep` for -`%trace(<tag>)` in your inputs / in the expanded `.M1`). Embedding -the tag bytes in the runtime print would require length-dependent -padding to keep the next instruction aligned; we skip that. - -`cc.scm --cc-trace-emit` (or `CC_TRACE_EMIT=1` to -`scripts/boot-build-cc.sh`) injects a `%trace(<mangled>)` line at the -top of every emitted function body, between argument-spill and body. -With this on, every call into compiled C produces a stderr line — -the printed address falls on a known function-entry boundary, so the -disassembly lookup is one-step. - -End-to-end on tcc-boot2: - -``` -make tcc-boot2 ARCH=aarch64 CC_TRACE_EMIT=1 -./build/aarch64/tcc-boot2/tcc-boot2 -version 2> trace.log -scripts/disasm-elf.sh build/aarch64/tcc-boot2/tcc-boot2 \ - | grep -A 2 "<address from last trace.log line>" -``` - -Cost per probe: ~6 instrs + the `libp1pp__trace` call. Free-form -manual `%trace(tag)` inserts work too — drop them into any -`build/$ARCH/.work/<src-path>/combined.M1pp` snapshot (path is also -recorded in the sidecar `<elf>.workdir` next to each binary), re-run -the m1pp/M0/hex2 stages, and bisect by stderr position. +See [DEBUG.md](DEBUG.md) — `CC_TRACE_EMIT=1` injects per-function-entry +stderr probes; `m1-symbols.py lookup` resolves the printed addresses +back to functions. ## Expected next-tier blockers (downstream of cc.scm) diff --git a/scripts/m1-symbols.py b/scripts/m1-symbols.py @@ -22,6 +22,14 @@ Subcommands: Read llvm-objdump output on stdin; inject "ADDR <LABEL>:" headers and rewrite "<PT_LOAD#0+0xNNN>" xrefs to "<LABEL>" or "<LABEL+N>". Writes annotated output to stdout. + + lookup [--elf ELF | --hex2 HEX2 | --map MAP] ADDR [ADDR ...] + For each address, print "0xADDR LABEL+0xN" (or just "LABEL" when + delta=0). Pairs with `%trace`'s stderr output: paste a trace + address and get the enclosing function. With --elf, locates the + sibling <ELF>.workdir sidecar (written by boot-build-p1*.sh) to + find prog.hex2, so you don't have to know the .work path. + Reads addresses from stdin if none are passed positionally. """ import argparse @@ -80,9 +88,13 @@ def load_map(path): return by_addr -def nearest_label(by_addr, sorted_addrs, target): +def nearest_label(by_addr, sorted_addrs, target, skip=None): """Find label whose address is the largest <= target. Returns - (label, offset) or None.""" + (label, offset) or None. If `skip` is a callable, addresses whose + only labels are all skip(name)==True are walked past — useful for + filtering M1pp's `:@name`→`:name__N` macro-locals so a trace + address resolves to its enclosing function rather than the + macro-internal `:@here`.""" # Binary search for rightmost addr <= target. lo, hi = 0, len(sorted_addrs) while lo < hi: @@ -91,10 +103,24 @@ def nearest_label(by_addr, sorted_addrs, target): lo = mid + 1 else: hi = mid - if lo == 0: - return None - addr = sorted_addrs[lo - 1] - return by_addr[addr][0], target - addr + while lo > 0: + addr = sorted_addrs[lo - 1] + names = by_addr[addr] + keep = [n for n in names if not (skip and skip(n))] if skip else names + if keep: + return keep[0], target - addr + lo -= 1 + return None + + +# Matches M1pp's `:@name`→`:name__N` rewrite. The `__\d+` tail is +# unique to that mangling, since user labels can't legally end with +# `__<digits>` (only the M1pp expansion counter produces them). +MACRO_LOCAL_RE = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*__\d+$') + + +def is_macro_local(name): + return bool(MACRO_LOCAL_RE.match(name)) # Matches ` 600078: f94003e0 ldr ...` — leading spaces, hex addr, colon. @@ -144,6 +170,57 @@ def cmd_annotate(args): print(line) +def _resolve_hex2_from_elf(elf_path): + """Mirror disasm-elf.sh's <elf>.workdir sidecar lookup.""" + import os + sidecar = elf_path + '.workdir' + if not os.path.exists(sidecar): + sys.exit(f"m1-symbols: no {sidecar} sidecar — rebuild the ELF " + f"with boot-build-p1*.sh to generate it") + with open(sidecar) as f: + workdir = f.read().strip() + if not os.path.isabs(workdir): + repo_root = os.path.abspath( + os.path.join(os.path.dirname(__file__), '..')) + workdir = os.path.join(repo_root, workdir) + hex2 = os.path.join(workdir, 'prog.hex2') + if not os.path.exists(hex2): + sys.exit(f"m1-symbols: {sidecar} -> {workdir}, but no prog.hex2 there") + return hex2 + + +def _build_map_from_args(args): + """Resolve --elf / --hex2 / --map into the {addr: [labels]} dict.""" + if args.map: + return load_map(args.map) + hex2 = args.hex2 or _resolve_hex2_from_elf(args.elf) + by_addr = {} + for off, name in parse_hex2(hex2, ref_size=args.ref_size): + by_addr.setdefault(args.base + args.header + off, []).append(name) + return by_addr + + +def cmd_lookup(args): + by_addr = _build_map_from_args(args) + sorted_addrs = sorted(by_addr) + skip = None if args.include_macro_locals else is_macro_local + addrs = args.addrs or [line.strip() for line in sys.stdin if line.strip()] + for raw in addrs: + try: + target = int(raw, 0) if raw.lower().startswith('0x') \ + else int(raw, 16) + except ValueError: + print(f"0x{raw}\t<bad address>") + continue + hit = nearest_label(by_addr, sorted_addrs, target, skip=skip) + if hit is None: + print(f"0x{target:x}\t<no label <= addr>") + continue + label, delta = hit + suffix = f"+0x{delta:x}" if delta else "" + print(f"0x{target:x}\t{label}{suffix}") + + def main(): ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -160,6 +237,29 @@ def main(): p_an.add_argument('map') p_an.set_defaults(func=cmd_annotate) + p_lk = sub.add_parser('lookup', + help='resolve addrs to nearest preceding label') + src = p_lk.add_mutually_exclusive_group(required=True) + src.add_argument('--elf', help='ELF path; uses <ELF>.workdir sidecar ' + 'to find prog.hex2') + src.add_argument('--hex2', help='prog.hex2 path') + src.add_argument('--map', help='pre-built address->label map ' + '(from `m1-symbols.py map`)') + p_lk.add_argument('--base', type=lambda s: int(s, 0), default=0x600000) + p_lk.add_argument('--header', type=lambda s: int(s, 0), default=0x78) + p_lk.add_argument('--ref-size', type=int, default=4) + p_lk.add_argument('--include-macro-locals', action='store_true', + help='include M1pp-mangled local labels ' + '(`:@name` → `:name__N`) when picking the ' + 'nearest preceding label. Off by default ' + 'so trace addresses resolve to the ' + 'enclosing function, not the macro internal ' + ':@here.') + p_lk.add_argument('addrs', nargs='*', + help='addresses (hex, with or without 0x prefix); ' + 'reads stdin one per line if omitted') + p_lk.set_defaults(func=cmd_lookup) + args = ap.parse_args() args.func(args)