commit eba5446e0f34d7556cf0c4a7e47010615ed20058
parent adc295afd5540e720fe2406c27c03bd413c32e92
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 26 Apr 2026 07:46:40 -0700
scripts/build-tcc-real.sh: build mes libc with tcc-host, link tcc-boot0-mes
Switching to mes libc per the user's nudge. Findings:
1. tcc 0.9.26 segfaults instead of cleanly erroring when an #include
path can't be resolved. Specifically <arch/syscall.h> from
mes/include/dirent.h has no direct match in the mes header tree —
live-bootstrap presumably arranges an `arch -> linux/<arch>` symlink
somewhere. Fixed by creating that symlink in our sanitized include
tree.
2. tcc 0.9.26 segfaults when ~22+ libc files are concatenated into a
single TU and one of them (e.g., linux/<arch>-mes-gcc/_exit.c, which
uses inline asm) trips an internal accumulator path. Fixed by
compiling each .c file as a separate TU and ar-ing the results.
3. With those fixes, all 258 mes libc files compile and ar into
libc.a (299 KB) + libtcc1.a (2.5 KB) + crt1.o (948 B). tcc-host then
links tcc-self.o against this libc into tcc-boot0-mes (313 KB) —
significantly smaller than the 764 KB musl-linked tcc-boot0
(consistent with mes libc being minimal).
4. **Open**: tcc-boot0-mes segfaults at startup under QEMU x86_64
emulation on arm64. Earlier I'd assumed segfaults were QEMU-related;
alpine's prebuilt tcc 0.9.28rc runs cleanly under the same QEMU, so
that's clearly not the explanation in general. The tcc-boot0-mes
crash could be:
a) tcc 0.9.26 codegen bug in startup that 0.9.28rc fixed (matches
the _DYNAMIC issue I found earlier with the musl-linked build)
b) something specific to QEMU's interaction with mes's hand-rolled
crt1 (which doesn't use the standard __libc_start_main path)
Native x86_64 testing is needed to distinguish (a) vs (b). Until
then the artifact is real but unverified.
Diffstat:
1 file changed, 161 insertions(+), 84 deletions(-)
diff --git a/scripts/build-tcc-real.sh b/scripts/build-tcc-real.sh
@@ -1,111 +1,188 @@
#!/bin/sh
-## scripts/build-tcc-real.sh — build a real tcc-0.9.26 binary using our
-## tcc-host as the compiler. Output: $WORK/tcc-boot0, a working tcc.
+## scripts/build-tcc-real.sh — build a real tcc-0.9.26 from our chain,
+## linked against mes libc.
##
-## Pre-condition:
-## build/cc-bootstrap/X86_64/tcc-host exists (call build-tcc-source.sh
-## --self-host first; this script runs that for you if missing).
-##
-## Pipeline:
-## 1. tcc-host -c tcc.flat.c → tcc-self.o (tcc compiling its own source)
-## 2. gcc -static tcc-self.o errno-shim.c → tcc-boot0
-## 3. tcc-boot0 -version smoke test
-## 4. tcc-boot0 -c hello.c → hello.o verifies compile path
+## Pipeline (all inside a linux/amd64 alpine container):
+## 1. unpack mes-0.27.1 sources
+## 2. set up include tree with `arch -> linux/<arch>` symlink so
+## mes headers' `#include <arch/syscall.h>` resolves
+## (without this, tcc 0.9.26 SEGVs on missing include rather than
+## reporting cleanly — bug found while bringing this up)
+## 3. compile each mes libc .c file individually with tcc-host
+## (concatenating them all into unified-libc.c also hits a tcc 0.9.26
+## bug — accumulated symbol-table state segfaults around 22+ files
+## when one of them uses inline asm)
+## 4. tcc-host -ar the .o set into libc.a
+## 5. compile crt1.c -> crt1.o; libtcc1.c -> libtcc1.a
+## 6. tcc-host -static linking tcc-self.o (already built by
+## build-tcc-source.sh --self-host) against the mes libc
+## -> tcc-boot0-mes
##
-## Step 2 uses gcc as the linker, not tcc-boot0 itself, only because tcc's
-## linker stage segfaults under QEMU x86_64 emulation on arm64 hosts.
-## On native x86_64 hardware, tcc-boot0 self-links cleanly. The compile
-## chain (tcc-host → tcc-self.o) is the load-bearing part — that's where
-## "tcc compiles itself" actually happens.
+## Status: produces tcc-boot0-mes successfully. The binary itself
+## segfaults at startup under QEMU x86_64 emulation; native x86_64
+## testing is needed to confirm whether this is a tcc-0.9.26 codegen
+## bug exposed by mes's hand-rolled crt1, or a QEMU-specific issue.
##
-## Relation to live-bootstrap: this is the equivalent artifact to
-## live-bootstrap's tcc-boot0, produced without mescc/mes Scheme runtime.
-## live-bootstrap rebuilds mes libc between iterations (boot0→boot1→boot2)
-## for stability; we skip that since we link against musl, which is
-## self-consistent.
+## Pre-condition:
+## build/cc-bootstrap/X86_64/tcc-host (build-tcc-source.sh --self-host)
+## build/cc-bootstrap/X86_64/tcc-self.o
##
## Usage:
-## scripts/build-tcc-real.sh [--cc-test]
-##
-## --cc-test also compile + run a small program with tcc-boot0 as CC,
-## demonstrating it works end-to-end as a regular compiler.
+## scripts/build-tcc-real.sh
set -eu
-CC_TEST=0
-while [ $# -gt 0 ]; do
- case "$1" in
- --cc-test) CC_TEST=1; shift ;;
- -h|--help) sed -n 's/^## \{0,1\}//p' "$0"; exit 0 ;;
- *) echo "unknown arg: $1" >&2; exit 2 ;;
- esac
-done
-
ROOT=$(cd "$(dirname "$0")/.." && pwd)
WORK=$ROOT/build/cc-bootstrap/X86_64
+MES_TAR=$ROOT/../lb-work/distfiles/mes-0.27.1.tar.gz
+MES_PKG=mes-0.27.1
+
+[ -r "$MES_TAR" ] || { echo "missing $MES_TAR" >&2; exit 1; }
-# --- ensure tcc-host + tcc-self.o exist ------------------------------
if [ ! -x "$WORK/tcc-host" ] || [ ! -r "$WORK/tcc-self.o" ]; then
echo "tcc-host or tcc-self.o missing — running build-tcc-source.sh --self-host"
"$ROOT/scripts/build-tcc-source.sh" --arch X86_64 --self-host
fi
-# --- link tcc-boot0 in alpine container ------------------------------
-echo "--- linking tcc-self.o + errno-shim -> tcc-boot0 ---"
+MES_SRC=$WORK/$MES_PKG
+if [ ! -d "$MES_SRC" ]; then
+ tar -xzf "$MES_TAR" -C "$WORK"
+fi
+mkdir -p "$MES_SRC/include/mes"
+: > "$MES_SRC/include/mes/config.h"
+
+echo "running build inside linux/amd64 alpine container (a few minutes under QEMU)"
+
podman run --rm -i --platform linux/amd64 \
-v "$ROOT":/work -w /work alpine:latest sh -s <<'CONTAINER_SCRIPT'
set -eu
-apk add --no-cache gcc musl-dev >/dev/null 2>&1
WORK=/work/build/cc-bootstrap/X86_64
-gcc -w -static -no-pie \
- -o "$WORK/tcc-boot0" \
- "$WORK/tcc-self.o" \
- "$WORK/errno-shim.c"
+TCC=$WORK/tcc-host
+MES_SRC=$WORK/mes-0.27.1
+MES_ARCH=x86_64
+
+# --- (2) build a sanitized include tree ------------------------------
+INC=/tmp/mes-inc
+mkdir -p $INC
+cp -r $MES_SRC/include/. $INC/
+ln -sfn linux/$MES_ARCH $INC/arch
+
+# --- (3) compile each mes libc .c file individually ------------------
+mkdir -p /tmp/objs
+cd $MES_SRC/lib
+
+# The canonical catm list from steps/tcc-0.9.26/pass1.kaem:99 (X86_64).
+# We compile these as separate TUs to dodge tcc-host's accumulator bug.
+ALL_FILES="ctype/isalnum.c ctype/isalpha.c ctype/isascii.c ctype/iscntrl.c \
+ctype/isdigit.c ctype/isgraph.c ctype/islower.c ctype/isnumber.c \
+ctype/isprint.c ctype/ispunct.c ctype/isspace.c ctype/isupper.c \
+ctype/isxdigit.c ctype/tolower.c ctype/toupper.c \
+dirent/closedir.c dirent/__getdirentries.c dirent/opendir.c \
+linux/readdir.c linux/access.c linux/brk.c linux/chdir.c linux/chmod.c \
+linux/clock_gettime.c linux/close.c linux/dup2.c linux/dup.c linux/execve.c \
+linux/fcntl.c linux/fork.c linux/fsync.c linux/fstat.c linux/_getcwd.c \
+linux/getdents.c linux/getegid.c linux/geteuid.c linux/getgid.c linux/getpid.c \
+linux/getppid.c linux/getrusage.c linux/gettimeofday.c linux/getuid.c \
+linux/ioctl.c linux/ioctl3.c linux/kill.c linux/link.c linux/lseek.c \
+linux/lstat.c linux/malloc.c linux/mkdir.c linux/mknod.c linux/nanosleep.c \
+linux/_open3.c linux/pipe.c linux/_read.c linux/readlink.c linux/rename.c \
+linux/rmdir.c linux/setgid.c linux/settimer.c linux/setuid.c linux/signal.c \
+linux/sigprogmask.c linux/symlink.c linux/stat.c linux/time.c linux/unlink.c \
+linux/waitpid.c linux/wait4.c \
+linux/${MES_ARCH}-mes-gcc/_exit.c linux/${MES_ARCH}-mes-gcc/syscall.c \
+linux/${MES_ARCH}-mes-gcc/_write.c \
+math/ceil.c math/fabs.c math/floor.c \
+mes/abtod.c mes/abtol.c mes/__assert_fail.c mes/assert_msg.c \
+mes/__buffered_read.c mes/__init_io.c mes/cast.c mes/dtoab.c \
+mes/eputc.c mes/eputs.c mes/fdgetc.c mes/fdgets.c mes/fdputc.c mes/fdputs.c \
+mes/fdungetc.c mes/globals.c mes/itoa.c mes/ltoab.c mes/ltoa.c \
+mes/__mes_debug.c mes/mes_open.c mes/ntoab.c mes/oputc.c mes/oputs.c \
+mes/search-path.c mes/ultoa.c mes/utoa.c \
+posix/alarm.c posix/buffered-read.c posix/execl.c posix/execlp.c \
+posix/execv.c posix/execvp.c posix/getcwd.c posix/getenv.c posix/isatty.c \
+posix/mktemp.c posix/open.c posix/pathconf.c posix/raise.c posix/sbrk.c \
+posix/setenv.c posix/sleep.c posix/unsetenv.c posix/wait.c posix/write.c \
+stdio/clearerr.c stdio/fclose.c stdio/fdopen.c stdio/feof.c stdio/ferror.c \
+stdio/fflush.c stdio/fgetc.c stdio/fgets.c stdio/fileno.c stdio/fopen.c \
+stdio/fprintf.c stdio/fputc.c stdio/fputs.c stdio/fread.c stdio/freopen.c \
+stdio/fscanf.c stdio/fseek.c stdio/ftell.c stdio/fwrite.c stdio/getc.c \
+stdio/getchar.c stdio/perror.c stdio/printf.c stdio/putc.c stdio/putchar.c \
+stdio/remove.c stdio/snprintf.c stdio/sprintf.c stdio/sscanf.c stdio/ungetc.c \
+stdio/vfprintf.c stdio/vfscanf.c stdio/vprintf.c stdio/vsnprintf.c \
+stdio/vsprintf.c stdio/vsscanf.c \
+stdlib/abort.c stdlib/abs.c stdlib/alloca.c stdlib/atexit.c stdlib/atof.c \
+stdlib/atoi.c stdlib/atol.c stdlib/calloc.c stdlib/__exit.c stdlib/exit.c \
+stdlib/free.c stdlib/mbstowcs.c stdlib/puts.c stdlib/qsort.c stdlib/realloc.c \
+stdlib/strtod.c stdlib/strtof.c stdlib/strtol.c stdlib/strtold.c \
+stdlib/strtoll.c stdlib/strtoul.c stdlib/strtoull.c \
+string/bcmp.c string/bcopy.c string/bzero.c string/index.c string/memchr.c \
+string/memcmp.c string/memcpy.c string/memmem.c string/memmove.c string/memset.c \
+string/rindex.c string/strcat.c string/strchr.c string/strcmp.c string/strcpy.c \
+string/strcspn.c string/strdup.c string/strerror.c string/strlen.c \
+string/strlwr.c string/strncat.c string/strncmp.c string/strncpy.c \
+string/strpbrk.c string/strrchr.c string/strspn.c string/strstr.c string/strupr.c \
+stub/atan2.c stub/bsearch.c stub/chown.c stub/__cleanup.c stub/cos.c \
+stub/ctime.c stub/exp.c stub/fpurge.c stub/freadahead.c stub/frexp.c \
+stub/getgrgid.c stub/getgrnam.c stub/getlogin.c stub/getpgid.c stub/getpgrp.c \
+stub/getpwnam.c stub/getpwuid.c stub/gmtime.c stub/ldexp.c stub/localtime.c \
+stub/log.c stub/mktime.c stub/modf.c stub/mprotect.c stub/pclose.c \
+stub/popen.c stub/pow.c stub/putenv.c stub/rand.c stub/realpath.c stub/rewind.c \
+stub/setbuf.c stub/setgrent.c stub/setlocale.c stub/setvbuf.c stub/sigaction.c \
+stub/sigaddset.c stub/sigblock.c stub/sigdelset.c stub/sigemptyset.c \
+stub/sigsetmask.c stub/sin.c stub/sys_siglist.c stub/system.c stub/sqrt.c \
+stub/strftime.c stub/times.c stub/ttyname.c stub/umask.c stub/utime.c \
+${MES_ARCH}-mes-gcc/setjmp.c"
+
+OBJS=
+n_compiled=0
+n_failed=0
+for f in $ALL_FILES; do
+ name=$(echo "$f" | tr / _)
+ o=/tmp/objs/${name%.c}.o
+ if "$TCC" -c -D HAVE_CONFIG_H=1 -I "$INC" -I "$INC/linux/$MES_ARCH" \
+ -o "$o" "$f" 2>/dev/null; then
+ OBJS="$OBJS $o"
+ n_compiled=$((n_compiled+1))
+ else
+ echo "compile failed: $f" >&2
+ n_failed=$((n_failed+1))
+ fi
+done
+echo "compiled $n_compiled libc .o files (failed: $n_failed)"
+[ "$n_failed" -eq 0 ] || { echo "abort: some libc files failed" >&2; exit 1; }
+
+# --- (4) ar -> libc.a -----------------------------------------------
+mkdir -p /lib/tcc /include/mes
+"$TCC" -ar cr /lib/libc.a $OBJS
+
+# --- (5) crt1.o, libtcc1.a -------------------------------------------
+"$TCC" -c -D HAVE_CONFIG_H=1 -I "$INC" -I "$INC/linux/$MES_ARCH" \
+ -o /lib/crt1.o "linux/$MES_ARCH-mes-gcc/crt1.c"
+: > /lib/crtn.o
+: > /lib/crti.o
+
+"$TCC" -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \
+ -I "$INC" -I "$INC/linux/$MES_ARCH" \
+ -o /tmp/libtcc1.o libtcc1.c
+"$TCC" -ar cr /lib/tcc/libtcc1.a /tmp/libtcc1.o
+
+# Install headers at the path tcc-host has baked in (CONFIG_TCC_SYSINCLUDEPATHS)
+cp -r "$INC/." /include/mes/
+
+ls -la /lib/crt1.o /lib/libc.a /lib/tcc/libtcc1.a
+
+# --- (6) link tcc-self.o against mes libc -> tcc-boot0-mes -----------
echo
-echo "--- tcc-boot0 -version ---"
-"$WORK/tcc-boot0" -version
+echo "--- linking tcc-self.o against mes libc ---"
+"$TCC" -static -o "$WORK/tcc-boot0-mes" "$WORK/tcc-self.o"
+ls -la "$WORK/tcc-boot0-mes"
echo
-echo "--- tcc-boot0 -c hi.c (compile-only smoke test) ---"
-cat > /tmp/hi.c <<'C'
-int main(int argc, char **argv) { return argc + 41; }
-C
-"$WORK/tcc-boot0" -c -o /tmp/hi.o /tmp/hi.c
-ls -la /tmp/hi.o
-echo "tcc-boot0 compile path: OK"
+echo "--- tcc-boot0-mes -version (expected: may segfault under QEMU) ---"
+rc=0; "$WORK/tcc-boot0-mes" -version 2>&1 || rc=$?
+echo "exit=$rc"
CONTAINER_SCRIPT
-ls -la "$WORK/tcc-boot0"
echo
-echo "build of real tcc complete: $WORK/tcc-boot0"
-
-# --- optional: full CC= cycle ----------------------------------------
-if [ "$CC_TEST" -eq 1 ]; then
- echo
- echo "--- CC= test: compile + link a tiny program with tcc-boot0 ---"
- podman run --rm -i --platform linux/amd64 \
- -v "$ROOT":/work -w /work alpine:latest sh -s <<'CONTAINER_SCRIPT'
-set -eu
-apk add --no-cache gcc musl-dev >/dev/null 2>&1
-WORK=/work/build/cc-bootstrap/X86_64
-cat > /tmp/argc.c <<'C'
-int main(int argc, char **argv) { return argc + 41; }
-C
-
-# Try tcc-boot0 as a full CC (compile + link). On QEMU x86_64 the link
-# step typically segfaults; on native x86_64 it works. Either way the
-# compile-only path above has already proven the compiler works.
-echo "--- attempting tcc-boot0 -B/usr/lib -L/usr/lib /tmp/argc.c -o /tmp/argc ---"
-"$WORK/tcc-boot0" -B/usr/lib -L/usr/lib -o /tmp/argc /tmp/argc.c 2>&1 || {
- echo "(link step failed — expected under QEMU x86_64 emulation;"
- echo " falls back to gcc as linker for demonstration)"
- "$WORK/tcc-boot0" -c -o /tmp/argc.o /tmp/argc.c
- gcc -static -no-pie -o /tmp/argc /tmp/argc.o
-}
-ls -la /tmp/argc
-rc=0; /tmp/argc a b c || rc=$?
-echo "tcc-boot0 -> /tmp/argc(a b c) returned $rc (expected argc+41 = 4+41 = 45)"
-[ "$rc" = "45" ] || { echo "FAIL: expected 45" >&2; exit 1; }
-echo "CC= test PASSED"
-CONTAINER_SCRIPT
-fi
+echo "artifacts:"
+ls -la "$WORK/tcc-boot0-mes" 2>/dev/null || echo "(no tcc-boot0-mes)"