commit cdce36fdc3fcc9b36301ebb36784a8d36208ea10
parent 46357be005c8c1f148a395e799779dd19dbaafaf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 4 May 2026 09:34:21 -0700
boot3 drop host cc for amd64/riscv64
Diffstat:
3 files changed, 93 insertions(+), 62 deletions(-)
diff --git a/scripts/boot3.sh b/scripts/boot3.sh
@@ -35,19 +35,17 @@
## build/$ARCH/vendor/mes-libc/libc.flat.c — flattened mes-libc TU
##
## ─── Inputs (binaries from prior stages) ──────────────────────────────
-## build/$ARCH/boot0/catm — built by scripts/boot0.sh
-## build/$ARCH/boot1/{M1pp, hex2pp} — built by scripts/boot1.sh
+## build/$ARCH/boot1/{M1pp, hex2pp, catm} — built by scripts/boot1.sh
## build/$ARCH/boot2/scheme1 — built by scripts/boot2.sh
##
## ─── Tools ────────────────────────────────────────────────────────────
## In container: scratch + busybox (no libc, no /etc, no resolver).
-## On host: cross-assembler for {start.S, sys_stubs.S}:
-## aarch64/amd64 -> $HOST_CC -target ...
-## riscv64 -> boot2-alpine-gcc:riscv64 image
-## (built on demand from
-## scripts/Containerfile.alpine-gcc)
-## These two .S files are the only host-side build step
-## in stage 3; tcc-boot2's codegen does not accept .S.
+## On host: aarch64 only — cross-assembler for {start.S,
+## sys_stubs.S} via $HOST_CC -target aarch64-linux-gnu.
+## tcc 0.9.26 has no aarch64 assembler (no arm64-asm.c),
+## so .S inputs are pre-compiled host-side. amd64 and
+## riscv64 have CONFIG_TCC_ASM in their backends and feed
+## .S straight to tcc-boot2 in stages C+D — no host tool.
##
## ─── Outputs ──────────────────────────────────────────────────────────
## build/$ARCH/boot3/tcc-boot2 — cc.scm-built tcc (compile 1)
@@ -66,19 +64,16 @@ ARCH=$1
case "$ARCH" in
aarch64) PLATFORM=linux/arm64;
TCC_TARGET=ARM64;
- HOST_CC_TARGET=aarch64-linux-gnu;
LIB_HELPER_SRC=lib-arm64.c;
LIB_HELPER_OBJ=lib-arm64.o;
LIB_HELPER_DEFINES="-D HAVE_CONFIG_H=1 -D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1" ;;
amd64) PLATFORM=linux/amd64;
TCC_TARGET=X86_64;
- HOST_CC_TARGET=x86_64-linux-gnu;
LIB_HELPER_SRC=va_list.c;
LIB_HELPER_OBJ=va_list.o;
LIB_HELPER_DEFINES="-D TCC_TARGET_X86_64=1" ;;
riscv64) PLATFORM=linux/riscv64;
TCC_TARGET=RISCV64;
- HOST_CC_TARGET=;
LIB_HELPER_SRC=lib-arm64.c;
LIB_HELPER_OBJ=lib-arm64.o;
LIB_HELPER_DEFINES="-D HAVE_CONFIG_H=1 -D TCC_TARGET_RISCV64=1" ;;
@@ -89,8 +84,6 @@ ROOT=$(cd "$(dirname "$0")/.." && pwd)
cd "$ROOT"
IMAGE=boot2-scratch:$ARCH
-ALPINE_IMAGE=boot2-alpine-gcc:$ARCH
-BOOT0=build/$ARCH/boot0
BOOT1=build/$ARCH/boot1
BOOT2=build/$ARCH/boot2
OUT=build/$ARCH/boot3
@@ -109,7 +102,7 @@ if ! podman image exists "$IMAGE"; then
fi
# ── prerequisite: prior-stage binaries ────────────────────────────────
-[ -x "$BOOT0/catm" ] || { echo "[boot3 $ARCH] missing $BOOT0/catm (run scripts/boot0.sh $ARCH)" >&2; exit 1; }
+[ -x "$BOOT1/catm" ] || { echo "[boot3 $ARCH] missing $BOOT1/catm (run scripts/boot1.sh $ARCH)" >&2; exit 1; }
[ -x "$BOOT1/M1pp" ] || { echo "[boot3 $ARCH] missing $BOOT1/M1pp (run scripts/boot1.sh $ARCH)" >&2; exit 1; }
[ -x "$BOOT1/hex2pp" ] || { echo "[boot3 $ARCH] missing $BOOT1/hex2pp (run scripts/boot1.sh $ARCH)" >&2; exit 1; }
[ -x "$BOOT2/scheme1" ] || { echo "[boot3 $ARCH] missing $BOOT2/scheme1 (run scripts/boot2.sh $ARCH)" >&2; exit 1; }
@@ -125,7 +118,7 @@ rm -rf "$STAGE"
mkdir -p "$STAGE/in" "$STAGE/in/tcc-include" "$STAGE/out" "$OUT"
# Prior-stage binaries
-cp "$BOOT0/catm" "$STAGE/in/catm"
+cp "$BOOT1/catm" "$STAGE/in/catm"
cp "$BOOT1/M1pp" "$STAGE/in/M1pp"
cp "$BOOT1/hex2pp" "$STAGE/in/hex2pp"
cp "$BOOT2/scheme1" "$STAGE/in/scheme1"
@@ -160,28 +153,21 @@ cp "$LIBC_FLAT" "$STAGE/in/libc.flat.c"
# -I resolves stdarg.h etc. Recursive cp keeps directory layout.
cp -R "$TCC_DIR/include/." "$STAGE/in/tcc-include/"
-# ── HOST cross-assembly of start.o + sys_stubs.o ──────────────────────
-# tcc-boot2's codegen does not accept .S. Two-line shim in host tooling.
-host_asm() {
- out=$1; src=$2
- if [ "$ARCH" = "riscv64" ]; then
- if ! podman image exists "$ALPINE_IMAGE"; then
- echo "[boot3 $ARCH] building $ALPINE_IMAGE"
- podman build --platform "$PLATFORM" -t "$ALPINE_IMAGE" \
- -f scripts/Containerfile.alpine-gcc scripts/
- fi
- podman run --rm --pull=never --platform "$PLATFORM" \
- -v "$ROOT/$STAGE:/work" -w /work "$ALPINE_IMAGE" \
- cc -c -o "$out" -x assembler "$src"
- else
- $HOST_CC -target "$HOST_CC_TARGET" -c -o "$ROOT/$STAGE/$out" \
- -x assembler "$ROOT/$STAGE/$src"
- fi
-}
-
-echo "[boot3 $ARCH] cross-asm: start.S + sys_stubs.S -> .o (host)"
-host_asm in/start.o in/start.S
-host_asm in/sys_stubs.o in/sys_stubs.S
+# ── HOST cross-assembly of start.o + sys_stubs.o (aarch64 only) ───────
+# tcc 0.9.26's aarch64 backend has no assembler (no arm64-asm.c), so
+# .S inputs are pre-compiled host-side. amd64 and riscv64 backends ship
+# CONFIG_TCC_ASM and assemble .S directly inside the container in
+# stages C+D.
+if [ "$ARCH" = "aarch64" ]; then
+ echo "[boot3 $ARCH] cross-asm: start.S + sys_stubs.S -> .o (host)"
+ $HOST_CC -target aarch64-linux-gnu -c \
+ -o "$ROOT/$STAGE/in/start.o" -x assembler "$ROOT/$STAGE/in/start.S"
+ $HOST_CC -target aarch64-linux-gnu -c \
+ -o "$ROOT/$STAGE/in/sys_stubs.o" -x assembler "$ROOT/$STAGE/in/sys_stubs.S"
+ ASM_BUILD_NEEDED=0
+else
+ ASM_BUILD_NEEDED=1
+fi
# ── run the full Stage A + B + C + D pipeline in one container ────────
# Stage A: cc.scm bundle, libc.P1pp + tcc.flat.P1pp via scheme1 + cc.scm,
@@ -195,6 +181,7 @@ podman run --rm -i --pull=never --platform "$PLATFORM" \
-e LIB_HELPER_SRC="$LIB_HELPER_SRC" \
-e LIB_HELPER_OBJ="$LIB_HELPER_OBJ" \
-e LIB_HELPER_DEFINES="$LIB_HELPER_DEFINES" \
+ -e ASM_BUILD_NEEDED="$ASM_BUILD_NEEDED" \
-v "$ROOT/$STAGE:/work" -w /work "$IMAGE" \
sh -eu -s <<'CONTAINER'
IN=/work/in
@@ -211,15 +198,29 @@ $IN/scheme1 /tmp/cc-bundled.scm --lib=libc__ $IN/libc.flat.c /tmp/libc.P1pp
$IN/scheme1 /tmp/cc-bundled.scm --lib=tcc__ $IN/tcc.flat.c /tmp/tcc.flat.P1pp
# ── Stage A.4: M1pp + hex2pp pipeline -> tcc-boot2 ELF ────────────────
-cat $IN/backend.M1pp $IN/frontend.M1pp $IN/libp1pp.P1pp \
- $IN/entry-libc.P1pp /tmp/libc.P1pp /tmp/tcc.flat.P1pp $IN/elf-end.P1pp \
- > /tmp/combined.M1pp
+$IN/catm /tmp/combined.M1pp \
+ $IN/backend.M1pp $IN/frontend.M1pp $IN/libp1pp.P1pp \
+ $IN/entry-libc.P1pp /tmp/libc.P1pp /tmp/tcc.flat.P1pp $IN/elf-end.P1pp
$IN/M1pp /tmp/combined.M1pp /tmp/expanded.hex2pp
-cat $IN/ELF.hex2 /tmp/expanded.hex2pp > /tmp/linked.hex2pp
+$IN/catm /tmp/linked.hex2pp $IN/ELF.hex2 /tmp/expanded.hex2pp
$IN/hex2pp -B 0x600000 /tmp/linked.hex2pp $OUT/tcc-boot2
-chmod +x $OUT/tcc-boot2
# ── Stage B: tcc-boot2 builds helper objects ──────────────────────────
+# build_asm produces start.o + sys_stubs.o into $workdir. amd64/riscv64
+# assemble .S in-container via tcc's CONFIG_TCC_ASM (no -include flag —
+# the asm parser doesn't accept C typedefs from va_list_shim.h's
+# transitive stdarg.h include). aarch64 has no in-tcc assembler, so the
+# host-built .o is copied through from $IN.
+build_asm() {
+ cc=$1; workdir=$2
+ if [ "$ASM_BUILD_NEEDED" = "1" ]; then
+ "$cc" -nostdlib -c -o "$workdir/start.o" "$IN/start.S"
+ "$cc" -nostdlib -c -o "$workdir/sys_stubs.o" "$IN/sys_stubs.S"
+ else
+ cp "$IN/start.o" "$workdir/start.o"
+ cp "$IN/sys_stubs.o" "$workdir/sys_stubs.o"
+ fi
+}
build_helpers() {
cc=$1; workdir=$2
"$cc" -nostdlib -I "$TCC_INC" -c -o "$workdir/mem.o" $IN/mem.c
@@ -230,19 +231,23 @@ build_helpers() {
-c -o "$workdir/$LIB_HELPER_OBJ" "$IN/$LIB_HELPER_SRC"
}
mkdir -p /tmp/stage2 /tmp/stage3
+build_asm $OUT/tcc-boot2 /tmp/stage2
build_helpers $OUT/tcc-boot2 /tmp/stage2
# ── Stage C: tcc-boot2 -> tcc-tcc ─────────────────────────────────────
$OUT/tcc-boot2 -nostdlib -I "$TCC_INC" -include $IN/va_list_shim.h \
- $IN/start.o $IN/sys_stubs.o /tmp/stage2/mem.o /tmp/stage2/libc.o \
+ /tmp/stage2/start.o /tmp/stage2/sys_stubs.o \
+ /tmp/stage2/mem.o /tmp/stage2/libc.o \
/tmp/stage2/$LIB_HELPER_OBJ \
$IN/tcc.flat.c -o $OUT/tcc-tcc
chmod +x $OUT/tcc-tcc
# ── Stage D: tcc-tcc rebuilds helpers, links tcc-tcc-tcc ──────────────
+build_asm $OUT/tcc-tcc /tmp/stage3
build_helpers $OUT/tcc-tcc /tmp/stage3
$OUT/tcc-tcc -nostdlib -I "$TCC_INC" -include $IN/va_list_shim.h \
- $IN/start.o $IN/sys_stubs.o /tmp/stage3/mem.o /tmp/stage3/libc.o \
+ /tmp/stage3/start.o /tmp/stage3/sys_stubs.o \
+ /tmp/stage3/mem.o /tmp/stage3/libc.o \
/tmp/stage3/$LIB_HELPER_OBJ \
$IN/tcc.flat.c -o $OUT/tcc-tcc-tcc
chmod +x $OUT/tcc-tcc-tcc
diff --git a/tcc-libc/riscv64/start.S b/tcc-libc/riscv64/start.S
@@ -1,20 +1,42 @@
/* tcc-libc entry stub — riscv64 sibling of tcc-libc/aarch64/start.S.
* Linux brings argc at [sp] and argv at sp+8 on entry. Call
* __libc_init(argc, argv) so `environ` is set, then main(argc, argv),
- * then exit with main's return value. */
+ * then exit with main's return value.
+ *
+ * Built by two assemblers from the same source: GAS (host alpine-gcc
+ * for the Makefile harness path) and tcc 0.9.26's riscv64-asm.c (in
+ * scripts/boot3.sh, scratch container). They agree on most mnemonics
+ * but diverge on load/store memory syntax: GAS uses `ld rd, off(rs)`,
+ * tcc-asm uses the 3-operand `ld rd, rs, off`. The LD/SD macros below
+ * branch on __TINYC__ to keep one source of truth.
+ *
+ * `jal ra, sym` and `jalr zero, ra, 0` are the canonical 2/3-operand
+ * forms both assemblers accept (GAS's `call` / `ret` / `j` pseudos are
+ * not in tcc-asm), so callsites use those directly. */
+
+/* tcc-asm's S-type encoding takes the base register first, opposite of
+ * GAS: `sd base, src, off` vs GAS's `sd src, off(base)`. Hide the
+ * difference behind ST8(src, base, off). */
+#ifdef __TINYC__
+# define LD8(rd, base, off) ld rd, base, off
+# define ST8(src, base, off) sd base, src, off
+#else
+# define LD8(rd, base, off) ld rd, off(base)
+# define ST8(src, base, off) sd src, off(base)
+#endif
.globl _start
_start:
- ld a0, 0(sp) /* argc */
+ LD8(a0, sp, 0) /* argc */
addi a1, sp, 8 /* argv */
addi sp, sp, -16 /* save argc/argv across __libc_init */
- sd a0, 0(sp)
- sd a1, 8(sp)
- call __libc_init
- ld a0, 0(sp)
- ld a1, 8(sp)
+ ST8(a0, sp, 0)
+ ST8(a1, sp, 8)
+ jal ra, __libc_init
+ LD8(a0, sp, 0)
+ LD8(a1, sp, 8)
addi sp, sp, 16
- call main
+ jal ra, main
/* main's return is in a0 — feed it to exit(2). */
li a7, 93 /* NR_exit */
ecall
diff --git a/tcc-libc/riscv64/sys_stubs.S b/tcc-libc/riscv64/sys_stubs.S
@@ -12,7 +12,10 @@
* sys_open/sys_unlink shuffle args because Linux's openat/unlinkat
* take an AT_FDCWD prefix that the libp1pp-compatible wrappers
* don't surface to callers.
- */
+ *
+ * tcc 0.9.26's riscv64-asm.c lacks the GAS `ret` / `j` pseudos, so
+ * callsites use the canonical `jalr zero, ra, 0` / `jal zero, label`
+ * forms — both assemblers accept them. */
.globl sys_read, sys_write, sys_close, sys_open
.globl sys_lseek, sys_brk, sys_unlink, sys_exit
@@ -20,17 +23,17 @@
sys_read:
li a7, 63
ecall
- ret
+ jalr zero, ra, 0
sys_write:
li a7, 64
ecall
- ret
+ jalr zero, ra, 0
sys_close:
li a7, 57
ecall
- ret
+ jalr zero, ra, 0
sys_open:
/* (path, flags, mode) -> openat(AT_FDCWD, path, flags, mode) */
@@ -40,17 +43,17 @@ sys_open:
li a0, -100 /* AT_FDCWD */
li a7, 56
ecall
- ret
+ jalr zero, ra, 0
sys_lseek:
li a7, 62
ecall
- ret
+ jalr zero, ra, 0
sys_brk:
li a7, 214
ecall
- ret
+ jalr zero, ra, 0
sys_unlink:
/* (path) -> unlinkat(AT_FDCWD, path, 0) */
@@ -59,10 +62,11 @@ sys_unlink:
li a2, 0 /* flags */
li a7, 35
ecall
- ret
+ jalr zero, ra, 0
sys_exit:
li a7, 93
ecall
/* unreachable */
-1: j 1b
+sys_exit_spin:
+ jal zero, sys_exit_spin