commit 4cd7ae1656d42c902f4e371502ea984b539bd9cf
parent 01a04a7e5bb40b5987cfb63291b193535e09d7b6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 29 Apr 2026 18:46:17 -0700
p1pp: tracepoint machinery
Diffstat:
| M | P1/P1pp.P1pp | | | 93 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | cc/cc.scm | | | 38 | ++++++++++++++++++++++++++++++++++---- |
| A | docs/DEBUG.md | | | 160 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | docs/TCC-TODO.md | | | 44 | +++----------------------------------------- |
| M | scripts/m1-symbols.py | | | 112 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
5 files changed, 396 insertions(+), 51 deletions(-)
diff --git a/P1/P1pp.P1pp b/P1/P1pp.P1pp
@@ -1436,6 +1436,99 @@
})
# =========================================================================
+# Tracepoint
+# =========================================================================
+#
+# %trace(tag_addr, tag_len) — emit a runtime stderr probe at the call
+# site. Prints `[trace @0xHEX TAG]\n` to stderr, where 0xHEX is the
+# runtime address of the instruction immediately following the trace's
+# call sequence (the address of `:@here` in this site's expansion) and
+# TAG is the byte string at [tag_addr..tag_addr+tag_len).
+#
+# `tag_addr` is a label reference token (e.g. `&cc__str_3`) — the
+# caller is responsible for emitting the bytes at that label. cc.scm's
+# --cc-trace-emit interns the mangled function name through the
+# regular string pool, which already pads each entry to an 8-byte
+# multiple, so the next item past the tag stays aligned. `tag_len` is
+# the *logical* byte count to print (without trailing NUL or pad).
+#
+# To map a printed address back to source, disassemble the ELF
+# (`scripts/disasm-elf.sh`) and locate the printed address. cc.scm
+# guarantees that each function's first instruction *is* a trace call,
+# so the printed address falls on a known function-entry boundary.
+#
+# Clobbers: a0..a2, ra, t0..t2 (per %call ABI). Use only inside a %fn
+# body where the caller has already spilled live argument regs (or
+# doesn't need them past the trace point).
+%macro trace(tag_addr, tag_len)
+ :@here
+ %la(a0, &@here)
+ %la(a1, tag_addr)
+ %li(a2, tag_len)
+ %call(&libp1pp__trace)
+%endm
+
+# libp1pp__trace(addr=a0, tag_addr=a1, tag_len=a2) — print
+# "[trace @0xHEX TAG]\n" to stderr.
+%fn(libp1pp__trace, 32, {
+ %st(s0, sp, 0)
+ %st(s1, sp, 8)
+ %st(s2, sp, 16)
+ %st(s3, sp, 24)
+ %mov(s0, a0)
+ %mov(s1, a1)
+ %mov(s2, a2)
+
+ %la(a0, &libp1pp__trace_pre)
+ %li(a1, 8)
+ %call(&eprint)
+
+ %la(a0, &libp1pp__num_buf)
+ %mov(a1, s0)
+ %call(&fmt_hex)
+ %mov(s3, a0)
+ %la(a0, &libp1pp__num_buf)
+ %mov(a1, s3)
+ %call(&eprint)
+
+ %la(a0, &libp1pp__trace_sep)
+ %li(a1, 1)
+ %call(&eprint)
+
+ %mov(a0, s1)
+ %mov(a1, s2)
+ %call(&eprint)
+
+ %la(a0, &libp1pp__trace_post)
+ %li(a1, 2)
+ %call(&eprint)
+
+ %ld(s0, sp, 0)
+ %ld(s1, sp, 8)
+ %ld(s2, sp, 16)
+ %ld(s3, sp, 24)
+})
+
+# Tracepoint message fragments. M0 implicitly NUL-terminates every
+# `"..."` literal, so each blob is (visible bytes + 1 NUL) + hex
+# padding to land on a 16- or 8-byte boundary, keeping the next
+# file-scope label 8-aligned (aarch64 LDR / 4-byte inline-data loads
+# fault otherwise.
+# eprint reads only the leading visible-byte count (8, 1, 2); the
+# trailing NUL + hex zeros are pad.
+#
+# trace_pre = "[trace @" 8 visible + 1 NUL + 7 pad = 16 bytes
+# trace_sep = " " 1 visible + 1 NUL + 6 pad = 8 bytes
+# trace_post = ']' '\n' 2 visible 6 pad = 8 bytes
+#
+# trace_post avoids `"]"` because that would inject a NUL between
+# the `]` and the `\n` we need adjacent — raw hex bytes give us
+# exact placement.
+:libp1pp__trace_pre "[trace @" '00000000000000'
+:libp1pp__trace_sep " " '000000000000'
+:libp1pp__trace_post '5d' '0a' '000000000000'
+
+# =========================================================================
# Internal data
# =========================================================================
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -240,6 +240,17 @@
(define (debug-log-off!) (set! %debug-log-enabled #f))
(define (debug-log? ) %debug-log-enabled)
+;; --cc-trace-emit: if on, cg-fn-end injects a `%trace(MANGLED)` line
+;; at the top of each emitted function body (right after the prologue's
+;; argument-spill, so the macro is free to clobber a0..a3). Pairs with
+;; libp1pp's %trace macro + libp1pp__trace runtime helper to produce a
+;; stderr line per function entry, with the runtime address of the
+;; first body instruction. See P1/P1pp.P1pp's "Tracepoint" section.
+(define %trace-emit-enabled #f)
+(define (trace-emit-on!) (set! %trace-emit-enabled #t))
+(define (trace-emit-off!) (set! %trace-emit-enabled #f))
+(define (trace-emit?) %trace-emit-enabled)
+
(define (debug-log msg . irritants)
(cond
(%debug-log-enabled
@@ -3019,6 +3030,21 @@
(buf-push! tb ", {\n")
;; prologue + body, drained byte-for-byte
(buf-drain! tb (cg-prologue-buf cg))
+ ;; --cc-trace-emit: emit `%trace(&LBL, LEN)` between prologue (which
+ ;; spilled live argument regs to slots) and body, so the macro can
+ ;; freely clobber a0..a2. The mangled name rides through the
+ ;; regular string pool — cg-intern-string emits it with a trailing
+ ;; NUL and pads to 8-byte alignment, so the next data label stays
+ ;; aligned. We pass the *logical* byte length (no NUL) so the
+ ;; runtime print stops at the actual end of the name.
+ (cond
+ ((trace-emit?)
+ (let ((tag-lbl (cg-intern-string cg mangled)))
+ (buf-push! tb "%trace(&")
+ (buf-push! tb tag-lbl)
+ (buf-push! tb ", ")
+ (buf-push! tb (%n (bytevector-length mangled)))
+ (buf-push! tb ")\n"))))
(buf-drain! tb (cg-fn-buf cg))
;; ret block: ≤8B → a0; 9–16B → a0+a1; >16B sret → a0 = saved sret ptr.
(buf-push! tb "::ret\n")
@@ -6652,7 +6678,8 @@
;; cc/main.scm — driver. Argv, file I/O, ties phases together.
;; --------------------------------------------------------------------
-;; CLI: cc [--cc-debug] <input.c> <output.P1pp>
+;; CLI: cc [--cc-debug] [--cc-trace-emit] [--lib=PFX]
+;; <input.c> <output.P1pp>
;;
;; scheme1 passes (argv) as a list of bvs; argv[0] is "scheme1", argv[1]
;; is the catm'd compiler source path, argv[2..] are the user-facing
@@ -6717,12 +6744,15 @@
(define (cc-main av)
(let* ((raw (cdr (cdr av)))
(dbg (%cc-flag? raw "--cc-debug"))
- (a1 (%cc-strip-flag raw "--cc-debug")))
+ (a1 (%cc-strip-flag raw "--cc-debug"))
+ (tr (%cc-flag? a1 "--cc-trace-emit"))
+ (a2 (%cc-strip-flag a1 "--cc-trace-emit")))
(cond (dbg (debug-log-on!)))
- (let-values (((lib-prefix args) (%cc-take-lib a1)))
+ (cond (tr (trace-emit-on!)))
+ (let-values (((lib-prefix args) (%cc-take-lib a2)))
(cond
((or (null? args) (null? (cdr args)))
- (die #f "usage: cc [--cc-debug] [--lib=PFX] <input.c> <output.P1pp>")))
+ (die #f "usage: cc [--cc-debug] [--cc-trace-emit] [--lib=PFX] <input.c> <output.P1pp>")))
(let* ((in-path (car args))
(out-path (car (cdr args)))
(lib? (cond ((= 0 (bytevector-length lib-prefix)) #f)
diff --git a/docs/DEBUG.md b/docs/DEBUG.md
@@ -0,0 +1,160 @@
+# DEBUG
+
+Debugging facilities for the cc / P1pp / M1 / hex2 pipeline. Each
+section: what the tool does, how to turn it on, what you get back.
+
+## Tracepoints (`%trace` / `--cc-trace-emit`)
+
+Per-function-entry stderr probe. With `CC_TRACE_EMIT=1` set, cc.scm
+injects a `%trace(&LBL, LEN)` between each function's prologue and
+body. At runtime each entry prints one line:
+
+```
+[trace @601a34 main]
+```
+
+The hex is the runtime address of the instruction immediately after
+the trace's call sequence (i.e. the first instruction of the body
+proper). The trailing word is the mangled function name, interned
+through cc's regular string pool.
+
+Build + run:
+
+```sh
+CC_TRACE_EMIT=1 sh scripts/run-tests.sh --suite cc --arch aarch64 007-call-with-args
+# stderr:
+# [trace @6019fc main]
+# [trace @6018fc g]
+```
+
+For a built ELF outside the test runner:
+
+```sh
+make tcc-boot2 ARCH=aarch64 CC_TRACE_EMIT=1
+./build/aarch64/tcc-boot2/tcc-boot2 -version 2>trace.log
+```
+
+Cost: ~6 instructions + one call per traced function. Off by default;
+the `%trace` macro itself lives in [P1/P1pp.P1pp](../P1/P1pp.P1pp)
+(§Tracepoint) and can also be invoked manually — drop a
+`%trace(&label, len)` into any `combined.M1pp` snapshot under
+`build/$ARCH/.work/<src>/`, re-run the m1pp/M0/hex2 stages, and bisect
+by stderr position.
+
+To map an address back to its function, see the lookup tool below.
+
+## Address → label lookup (`m1-symbols.py lookup`)
+
+Resolves a runtime address (e.g. from a `%trace` line) to its
+enclosing function. Reads the label map straight out of `prog.hex2`
+and finds the largest label address `<= target`, skipping M1pp's
+mangled macro-locals (`:@name` → `:name__N`) so a trace address
+resolves to the *function* containing it, not the trace's own
+`:@here`.
+
+```sh
+# Pass the ELF; the tool reads <ELF>.workdir to find prog.hex2.
+scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/007-call-with-args 0x6019fc 0x6018fc
+# 0x6019fc main+0x24
+# 0x6018fc g+0x2c
+
+# Pipe the trace log through it.
+./build/aarch64/tests/cc/007-call-with-args 2>&1 \
+ | grep -oE '@[0-9a-f]+' | tr -d @ \
+ | scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/007-call-with-args
+```
+
+Other input modes: `--hex2 <prog.hex2>` (skip the sidecar lookup) or
+`--map <file>` (use a pre-built map from `m1-symbols.py map`). Pass
+`--include-macro-locals` to see the closest label even when it's a
+`name__N` artifact — useful when you want to know which trace site
+fired vs. which function it sits in.
+
+Output is `0xADDR\tLABEL+0xN`, tab-separated, one per line.
+
+## Disassembly (`disasm-elf.sh`)
+
+llvm-objdump wrapper that handles two quirks of our seed ELF: oversized
+ph_memsz (truncated to ph_filesz on a temp copy) and the absent section
+table (replaced by labels injected from `prog.hex2`). The output has
+real `<funcname>:` headers and `<PT_LOAD#0+0xNNN>` xrefs rewritten to
+`<label+offset>`:
+
+```sh
+scripts/disasm-elf.sh build/aarch64/tests/cc/007-call-with-args
+# 0000000000601a34 <main>:
+# 601a34: ...
+# 601a40: ldr w17, ... <libp1pp__trace+0x0>
+```
+
+Defaults to `-d` (text). Pass `-D` for data + text. `--start-address`
+defaults to `e_entry` (skipping the on-disk ELF header bytes); override
+with your own `--start-address=` to see the header. The
+`<elf>.workdir` sidecar must exist for label annotation; it's written
+automatically by `boot-build-p1*.sh`. Set `NO_LABELS=1` to disable
+annotation.
+
+## cc.scm phase tracing (`CC_DEBUG=1`)
+
+cc.scm has a sticky `(debug-log ...)` channel for between-phase heap
+usage. Toggle with `CC_DEBUG=1` (boot-build-cc.sh) or `--cc-debug`
+(direct invocation). One stderr line per phase:
+
+```sh
+CC_DEBUG=1 sh scripts/run-tests.sh --suite cc --arch aarch64 007-call-with-args
+# [cc] lex ... heap=...
+# [cc] pp ... heap=...
+# ...
+```
+
+Most useful when bisecting an OOM or watching where parse memory
+balloons. Independent of `CC_TRACE_EMIT` — combine freely.
+
+## Pipeline intermediates (`build/$ARCH/.work/`)
+
+Every P1pp/P1 build leaves its intermediates next to the ELF:
+
+```
+build/$ARCH/.work/<src-path>/
+ combined.M1pp # backend + frontend + libp1pp + user TU, catm'd
+ expanded.M1 # m1pp output, ready for M0
+ prog.hex2 # M0 output (hex2 with labels)
+ linked.hex2 # ELF header + prog.hex2
+ cc.log # cc.scm stderr (if --cc-debug or trace-emit)
+ p1pp.log # m1pp/M0/hex2 stderr
+```
+
+Each ELF also gets a one-line `<elf>.workdir` sidecar pointing at this
+directory — that's how `disasm-elf.sh` and `m1-symbols.py lookup
+--elf` find `prog.hex2`. On a failing build the runner prints the
+partial-intermediates path; on a passing build the files stay around
+for inspection.
+
+To re-run a single intermediate stage by hand: edit the file in place
+and invoke the next tool directly (`build/$ARCH/tools/M0
+prog.hex2.M0`, etc.). Useful for poking `%trace` calls into
+`combined.M1pp` without recompiling cc.scm.
+
+## End-to-end debugging recipe
+
+A typical "binary segfaults, where?" loop:
+
+```sh
+# 1. Build with traces.
+CC_TRACE_EMIT=1 make build/aarch64/tests/cc/myprog ARCH=aarch64
+
+# 2. Run; capture trace.
+./build/aarch64/tests/cc/myprog 2>trace.log; echo "exit=$?"
+
+# 3. Last trace line shows the last function entered before the crash.
+tail -1 trace.log
+# [trace @601b40 parse_decl]
+
+# 4. (Optional) Resolve all addresses to functions for context.
+grep -oE '@[0-9a-f]+' trace.log | tr -d @ \
+ | scripts/m1-symbols.py lookup --elf build/aarch64/tests/cc/myprog
+
+# 5. Inspect the disassembly around the crash site.
+scripts/disasm-elf.sh build/aarch64/tests/cc/myprog \
+ | grep -B 2 -A 20 '<parse_decl>:'
+```
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -279,47 +279,9 @@ decl complete with parse heap at ~31 MB on the 1612-line cut.
## Tracepoint instrumentation (`%trace` / `--cc-trace-emit`)
-For debugging a built ELF that segfaults at startup, the cheapest
-"how far did we get?" probe is the libp1pp `%trace(tag)` macro
-(defined in [P1/P1pp.P1pp](../P1/P1pp.P1pp), §Tracepoint). At each
-call site it emits one stderr line of the form:
-
-```
-[trace @0x000000000060013c]
-```
-
-where the address is the runtime location of the instruction
-immediately following the trace's call sequence. To map an address
-back to a function: disassemble (`scripts/disasm-elf.sh <elf>`) and
-locate the line at that PC — the surrounding `:scope__*` label and
-the `%fn(...)` it sits in identify the source.
-
-The `tag` argument is purely source-level documentation (`grep` for
-`%trace(<tag>)` in your inputs / in the expanded `.M1`). Embedding
-the tag bytes in the runtime print would require length-dependent
-padding to keep the next instruction aligned; we skip that.
-
-`cc.scm --cc-trace-emit` (or `CC_TRACE_EMIT=1` to
-`scripts/boot-build-cc.sh`) injects a `%trace(<mangled>)` line at the
-top of every emitted function body, between argument-spill and body.
-With this on, every call into compiled C produces a stderr line —
-the printed address falls on a known function-entry boundary, so the
-disassembly lookup is one-step.
-
-End-to-end on tcc-boot2:
-
-```
-make tcc-boot2 ARCH=aarch64 CC_TRACE_EMIT=1
-./build/aarch64/tcc-boot2/tcc-boot2 -version 2> trace.log
-scripts/disasm-elf.sh build/aarch64/tcc-boot2/tcc-boot2 \
- | grep -A 2 "<address from last trace.log line>"
-```
-
-Cost per probe: ~6 instrs + the `libp1pp__trace` call. Free-form
-manual `%trace(tag)` inserts work too — drop them into any
-`build/$ARCH/.work/<src-path>/combined.M1pp` snapshot (path is also
-recorded in the sidecar `<elf>.workdir` next to each binary), re-run
-the m1pp/M0/hex2 stages, and bisect by stderr position.
+See [DEBUG.md](DEBUG.md) — `CC_TRACE_EMIT=1` injects per-function-entry
+stderr probes; `m1-symbols.py lookup` resolves the printed addresses
+back to functions.
## Expected next-tier blockers (downstream of cc.scm)
diff --git a/scripts/m1-symbols.py b/scripts/m1-symbols.py
@@ -22,6 +22,14 @@ Subcommands:
Read llvm-objdump output on stdin; inject "ADDR <LABEL>:" headers and
rewrite "<PT_LOAD#0+0xNNN>" xrefs to "<LABEL>" or "<LABEL+N>". Writes
annotated output to stdout.
+
+ lookup [--elf ELF | --hex2 HEX2 | --map MAP] ADDR [ADDR ...]
+ For each address, print "0xADDR LABEL+0xN" (or just "LABEL" when
+ delta=0). Pairs with `%trace`'s stderr output: paste a trace
+ address and get the enclosing function. With --elf, locates the
+ sibling <ELF>.workdir sidecar (written by boot-build-p1*.sh) to
+ find prog.hex2, so you don't have to know the .work path.
+ Reads addresses from stdin if none are passed positionally.
"""
import argparse
@@ -80,9 +88,13 @@ def load_map(path):
return by_addr
-def nearest_label(by_addr, sorted_addrs, target):
+def nearest_label(by_addr, sorted_addrs, target, skip=None):
"""Find label whose address is the largest <= target. Returns
- (label, offset) or None."""
+ (label, offset) or None. If `skip` is a callable, addresses whose
+ only labels are all skip(name)==True are walked past — useful for
+ filtering M1pp's `:@name`→`:name__N` macro-locals so a trace
+ address resolves to its enclosing function rather than the
+ macro-internal `:@here`."""
# Binary search for rightmost addr <= target.
lo, hi = 0, len(sorted_addrs)
while lo < hi:
@@ -91,10 +103,24 @@ def nearest_label(by_addr, sorted_addrs, target):
lo = mid + 1
else:
hi = mid
- if lo == 0:
- return None
- addr = sorted_addrs[lo - 1]
- return by_addr[addr][0], target - addr
+ while lo > 0:
+ addr = sorted_addrs[lo - 1]
+ names = by_addr[addr]
+ keep = [n for n in names if not (skip and skip(n))] if skip else names
+ if keep:
+ return keep[0], target - addr
+ lo -= 1
+ return None
+
+
+# Matches M1pp's `:@name`→`:name__N` rewrite. The `__\d+` tail is
+# unique to that mangling, since user labels can't legally end with
+# `__<digits>` (only the M1pp expansion counter produces them).
+MACRO_LOCAL_RE = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*__\d+$')
+
+
+def is_macro_local(name):
+ return bool(MACRO_LOCAL_RE.match(name))
# Matches ` 600078: f94003e0 ldr ...` — leading spaces, hex addr, colon.
@@ -144,6 +170,57 @@ def cmd_annotate(args):
print(line)
+def _resolve_hex2_from_elf(elf_path):
+ """Mirror disasm-elf.sh's <elf>.workdir sidecar lookup."""
+ import os
+ sidecar = elf_path + '.workdir'
+ if not os.path.exists(sidecar):
+ sys.exit(f"m1-symbols: no {sidecar} sidecar — rebuild the ELF "
+ f"with boot-build-p1*.sh to generate it")
+ with open(sidecar) as f:
+ workdir = f.read().strip()
+ if not os.path.isabs(workdir):
+ repo_root = os.path.abspath(
+ os.path.join(os.path.dirname(__file__), '..'))
+ workdir = os.path.join(repo_root, workdir)
+ hex2 = os.path.join(workdir, 'prog.hex2')
+ if not os.path.exists(hex2):
+ sys.exit(f"m1-symbols: {sidecar} -> {workdir}, but no prog.hex2 there")
+ return hex2
+
+
+def _build_map_from_args(args):
+ """Resolve --elf / --hex2 / --map into the {addr: [labels]} dict."""
+ if args.map:
+ return load_map(args.map)
+ hex2 = args.hex2 or _resolve_hex2_from_elf(args.elf)
+ by_addr = {}
+ for off, name in parse_hex2(hex2, ref_size=args.ref_size):
+ by_addr.setdefault(args.base + args.header + off, []).append(name)
+ return by_addr
+
+
+def cmd_lookup(args):
+ by_addr = _build_map_from_args(args)
+ sorted_addrs = sorted(by_addr)
+ skip = None if args.include_macro_locals else is_macro_local
+ addrs = args.addrs or [line.strip() for line in sys.stdin if line.strip()]
+ for raw in addrs:
+ try:
+ target = int(raw, 0) if raw.lower().startswith('0x') \
+ else int(raw, 16)
+ except ValueError:
+ print(f"0x{raw}\t<bad address>")
+ continue
+ hit = nearest_label(by_addr, sorted_addrs, target, skip=skip)
+ if hit is None:
+ print(f"0x{target:x}\t<no label <= addr>")
+ continue
+ label, delta = hit
+ suffix = f"+0x{delta:x}" if delta else ""
+ print(f"0x{target:x}\t{label}{suffix}")
+
+
def main():
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
@@ -160,6 +237,29 @@ def main():
p_an.add_argument('map')
p_an.set_defaults(func=cmd_annotate)
+ p_lk = sub.add_parser('lookup',
+ help='resolve addrs to nearest preceding label')
+ src = p_lk.add_mutually_exclusive_group(required=True)
+ src.add_argument('--elf', help='ELF path; uses <ELF>.workdir sidecar '
+ 'to find prog.hex2')
+ src.add_argument('--hex2', help='prog.hex2 path')
+ src.add_argument('--map', help='pre-built address->label map '
+ '(from `m1-symbols.py map`)')
+ p_lk.add_argument('--base', type=lambda s: int(s, 0), default=0x600000)
+ p_lk.add_argument('--header', type=lambda s: int(s, 0), default=0x78)
+ p_lk.add_argument('--ref-size', type=int, default=4)
+ p_lk.add_argument('--include-macro-locals', action='store_true',
+ help='include M1pp-mangled local labels '
+ '(`:@name` → `:name__N`) when picking the '
+ 'nearest preceding label. Off by default '
+ 'so trace addresses resolve to the '
+ 'enclosing function, not the macro internal '
+ ':@here.')
+ p_lk.add_argument('addrs', nargs='*',
+ help='addresses (hex, with or without 0x prefix); '
+ 'reads stdin one per line if omitted')
+ p_lk.set_defaults(func=cmd_lookup)
+
args = ap.parse_args()
args.func(args)