commit fef118f0953af066d31d0a5b886bc08be8d06f0b
parent 6f4189daceee7ae5a24b181d1b83a1ce80e9d0fd
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 28 Apr 2026 18:43:27 -0700
tcc-boot2: trace pipeline phases and inventory unresolved symbols
The tcc-boot2 build runs cat→m1pp→M0→catm→hex2-0 in one container
invocation, and hex2-0 fails silently when it hits an unresolved
label, leaving no breadcrumb pointing at which tool actually choked.
Two small diagnostic aids:
- boot-build-p1pp.sh gains a trace() helper gated on P1PP_TRACE=1.
Each pipeline stage prints a one-line marker with the output
path's byte size; missing markers point at the failed phase.
Off by default so the test suite stays quiet.
- The Makefile's tcc-boot2 rule sets P1PP_TRACE=1 by default —
long-running build, the trace is worth its row of output.
- boot-undef.sh runs a comm-style set difference over `&ref` and
`:def` lines in linked.hex2 and prints the unresolved set in
full, since native hex2 only reports the first miss before
bailing.
docs/LIBC.txt is the boot-undef.sh output for the current
tcc.flat.P1pp build. 41 entries — every one of them a genuine libc
external (no cc__-prefixed leftovers), the surface area we have to
provide before tcc-boot2 can link.
Diffstat:
4 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
@@ -239,7 +239,7 @@ $(TCC_BOOT2_P1PPS): build/%/tcc-boot2/tcc.flat.P1pp: \
$(TCC_BOOT2_BINS): build/%/tcc-boot2/tcc-boot2: \
build/%/tcc-boot2/tcc.flat.P1pp $(P1PP_BUILD_DEPS)
- $(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $< $@
+ $(call PODMAN,$*) env P1PP_TRACE=1 sh scripts/boot-build-p1pp.sh $< $@
# --- Native tools (opt-in dev-loop helpers) -------------------------------
diff --git a/docs/LIBC.txt b/docs/LIBC.txt
@@ -0,0 +1,41 @@
+__assert_fail
+abort
+atoi
+close
+errno
+execvp
+exit
+fclose
+fdopen
+fflush
+fopen
+fprintf
+fputc
+fputs
+fread
+free
+fseek
+ftell
+fwrite
+lseek
+malloc
+memmove
+open
+printf
+qsort
+read
+realloc
+remove
+snprintf
+sprintf
+strchr
+strcpy
+strncmp
+strrchr
+strstr
+strtof
+strtol
+strtoul
+strtoull
+unlink
+vsnprintf
diff --git a/scripts/boot-build-p1pp.sh b/scripts/boot-build-p1pp.sh
@@ -20,10 +20,20 @@
## code-size tax (~a few KB).
##
## Env: ARCH=aarch64|amd64|riscv64
+## P1PP_TRACE=1 — print a one-line marker (phase, in/out path, size)
+## before each pipeline stage so the failing tool is
+## obvious. Off by default to keep test runs quiet.
## Usage: boot-build-p1pp.sh <src> <out>
set -eu
+trace() {
+ [ "${P1PP_TRACE:-0}" = "1" ] || return 0
+ label=$1; path=$2
+ sz=$(wc -c < "$path" 2>/dev/null || echo "?")
+ printf '[p1pp %s] %s (%s bytes) %s\n' "$ARCH" "$label" "$sz" "$path" >&2
+}
+
: "${ARCH:?ARCH must be set}"
[ "$#" -eq 2 ] || { echo "usage: ARCH=<arch> $0 <src> <out>" >&2; exit 2; }
@@ -41,12 +51,17 @@ WORK=build/$ARCH/.work/$NAME
mkdir -p "$WORK" "$(dirname "$OUT")"
cat "$BACKEND" "$FRONTEND" "$LIBP1PP" "$SRC" > /tmp/combined.M1pp
+trace "cat: combined" /tmp/combined.M1pp
"$M1PP_BIN" /tmp/combined.M1pp /tmp/expanded.M1
+trace "m1pp: expanded" /tmp/expanded.M1
"$TOOLS/M0" /tmp/expanded.M1 /tmp/prog.hex2
+trace "M0: prog.hex2" /tmp/prog.hex2
cp "$ELF_HDR" /tmp/elf.hex2
"$TOOLS/catm" /tmp/linked.hex2 /tmp/elf.hex2 /tmp/prog.hex2
+trace "catm: linked" /tmp/linked.hex2
"$TOOLS/hex2-0" /tmp/linked.hex2 /tmp/prog.bin
+trace "hex2-0: out" /tmp/prog.bin
cp /tmp/combined.M1pp "$WORK/combined.M1pp"
cp /tmp/expanded.M1 "$WORK/expanded.M1"
diff --git a/scripts/boot-undef.sh b/scripts/boot-undef.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+## scripts/boot-undef.sh — list M1/hex2 references with no matching definition.
+##
+## Cheap-and-cheerful linker-diagnostic for the live boot pipeline. M0 emits
+## linked.hex2 in asm-style: `:label` defines, `&label` references. A symbol
+## with refs but no def is unresolved — the same thing hex2-0 would flag,
+## except hex2-0 only prints the first miss before bailing, so this dumps
+## the full list.
+##
+## Defaults to the linked.hex2 produced by the most recent
+## `make tcc-boot2 ARCH=<arch>` build. Run that first if missing.
+##
+## Caveats:
+## - Reads post-m1pp / post-M0 output, so %la(...) macro args are already
+## expanded. Running this on the raw .P1pp would miss them.
+## - m1pp rewrites local labels (@body, @end, ...) to per-expansion suffixed
+## names, so they appear under both refs and defs naturally.
+##
+## Usage:
+## scripts/boot-undef.sh [--arch <aarch64|amd64|riscv64>] [<linked.hex2>]
+
+set -eu
+
+ARCH=aarch64
+LINKED=
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --arch) ARCH=$2; shift 2 ;;
+ -h|--help) sed -n 's/^## \{0,1\}//p' "$0"; exit 0 ;;
+ --) shift; break ;;
+ -*) echo "unknown arg: $1" >&2; exit 2 ;;
+ *) LINKED=$1; shift ;;
+ esac
+done
+
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+: "${LINKED:=$ROOT/build/$ARCH/.work/tcc.flat/linked.hex2}"
+
+[ -r "$LINKED" ] || {
+ echo "missing $LINKED" >&2
+ echo " run: make tcc-boot2 ARCH=$ARCH" >&2
+ exit 1
+}
+
+REFS=$(mktemp)
+DEFS=$(mktemp)
+trap 'rm -f "$REFS" "$DEFS"' EXIT
+
+grep -oE '&[a-zA-Z_][a-zA-Z_0-9]*' "$LINKED" | cut -c2- | sort -u > "$REFS"
+grep -oE '^:[a-zA-Z_][a-zA-Z_0-9]*' "$LINKED" | cut -c2- | sort -u > "$DEFS"
+
+UNDEF=$(comm -23 "$REFS" "$DEFS")
+NREF=$(wc -l < "$REFS" | tr -d ' ')
+NDEF=$(wc -l < "$DEFS" | tr -d ' ')
+NUND=$(printf '%s\n' "$UNDEF" | grep -c . || true)
+
+printf '[boot-undef %s] %s\n' "$ARCH" "$LINKED" >&2
+printf ' refs=%s defs=%s undef=%s\n' "$NREF" "$NDEF" "$NUND" >&2
+
+[ "$NUND" -eq 0 ] || printf '%s\n' "$UNDEF"