boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 495942511e1e7cd5aa46d018bc40f72dfc2338a5
parent 21076563f70aed6d68f32a4a79d6ed992e2d552f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed,  6 May 2026 20:47:07 -0700

headers: consolidate flatten-time includes under bootprep/headers

The host-preprocessor flatten step pulled headers from two sibling
trees (cc/include/ for the bulk, bootprep/include/ for stdarg.h alone)
even though both serve the same consumer and neither is touched by
cc.scm. Collapse them into bootprep/headers/, drop the now-redundant
-I bootprep/include from both flatten scripts, refresh the stdarg
shim's header comment (the old one referenced mes/include which we
deleted in the libc.c collapse), and rewrite docs/LIBC.md so the
Layout / Build / Notes sections describe the single-file
vendor/mes-libc/libc.c reality instead of the pre-collapse multi-
subdir tree, including per-fixture script paths that have since
moved to tests/.

Diffstat:
Rcc/include/alloca.h -> bootprep/headers/alloca.h | 0
Rcc/include/assert.h -> bootprep/headers/assert.h | 0
Rcc/include/ctype.h -> bootprep/headers/ctype.h | 0
Rcc/include/endian.h -> bootprep/headers/endian.h | 0
Rcc/include/errno.h -> bootprep/headers/errno.h | 0
Rcc/include/fcntl.h -> bootprep/headers/fcntl.h | 0
Rcc/include/inttypes.h -> bootprep/headers/inttypes.h | 0
Rcc/include/limits.h -> bootprep/headers/limits.h | 0
Rcc/include/math.h -> bootprep/headers/math.h | 0
Rcc/include/setjmp.h -> bootprep/headers/setjmp.h | 0
Abootprep/headers/stdarg.h | 43+++++++++++++++++++++++++++++++++++++++++++
Rcc/include/stddef.h -> bootprep/headers/stddef.h | 0
Rcc/include/stdint.h -> bootprep/headers/stdint.h | 0
Rcc/include/stdio.h -> bootprep/headers/stdio.h | 0
Rcc/include/stdlib.h -> bootprep/headers/stdlib.h | 0
Rcc/include/string.h -> bootprep/headers/string.h | 0
Rcc/include/sys/stat.h -> bootprep/headers/sys/stat.h | 0
Rcc/include/sys/time.h -> bootprep/headers/sys/time.h | 0
Rcc/include/sys/types.h -> bootprep/headers/sys/types.h | 0
Rcc/include/time.h -> bootprep/headers/time.h | 0
Rcc/include/unistd.h -> bootprep/headers/unistd.h | 0
Dbootprep/include/stdarg.h | 39---------------------------------------
Mbootprep/libc-flatten.sh | 3+--
Mbootprep/stage1-flatten.sh | 14+++++++-------
Mdocs/LIBC.md | 343+++++++++++++++++++++++++++++--------------------------------------------------
25 files changed, 177 insertions(+), 265 deletions(-)

diff --git a/cc/include/alloca.h b/bootprep/headers/alloca.h diff --git a/cc/include/assert.h b/bootprep/headers/assert.h diff --git a/cc/include/ctype.h b/bootprep/headers/ctype.h diff --git a/cc/include/endian.h b/bootprep/headers/endian.h diff --git a/cc/include/errno.h b/bootprep/headers/errno.h diff --git a/cc/include/fcntl.h b/bootprep/headers/fcntl.h diff --git a/cc/include/inttypes.h b/bootprep/headers/inttypes.h diff --git a/cc/include/limits.h b/bootprep/headers/limits.h diff --git a/cc/include/math.h b/bootprep/headers/math.h diff --git a/cc/include/setjmp.h b/bootprep/headers/setjmp.h diff --git a/bootprep/headers/stdarg.h b/bootprep/headers/stdarg.h @@ -0,0 +1,43 @@ +/* boot2 stdarg.h — flatten-time stdarg shim used by host `cc -E` in + * bootprep/stage1-flatten.sh (tcc.flat.c) and bootprep/libc-flatten.sh + * (libc.flat.c). Both pass -nostdinc and -I bootprep/headers, so this + * is what tcc's source and vendor/mes-libc/libc.c see for <stdarg.h> + * when the host preprocessor flattens them. + * + * Routes va_* through __builtin_va_*, so the resulting flat.c + * compiles cleanly under both cc.scm (which recognizes + * __builtin_va_list and __builtin_va_start/arg/end as builtins) + * and stock gcc/clang (where they're native). + * + * Distinct from build/<arch>/vendor/tcc/stdarg-bridge.h, which is + * generated from tcc-0.9.26's own include/stdarg.h (patched) and + * prepended to libc.flat.c so that tcc itself — which has no + * __builtin_va_* keywords — can compile through the flattened libc + * by mapping __builtin_va_* onto its native __va_* intrinsics. + */ +#ifndef __MES_STDARG_H +#define __MES_STDARG_H 1 + +typedef __builtin_va_list va_list; + +#define va_start(v, l) __builtin_va_start((v), (l)) +#define va_end(v) __builtin_va_end((v)) +#define va_arg(v, t) __builtin_va_arg((v), t) +#define va_arg8(ap, type) va_arg((ap), type) +#define va_copy(d, s) __builtin_va_copy((d), (s)) + +/* mes/include/stdarg.h forward-declares the v* family here (instead + * of in <stdio.h>); tcc.c calls vsnprintf without ever including + * <stdio.h>, so dropping mes's stdarg.h in favor of this shim must + * still leak these prototypes. FILE and size_t come from a prior + * include in mes-libc TUs; tcc.c works because it includes + * <sys/types.h> for size_t and uses (FILE*) implicitly. */ +int vexec (char const *file_name, va_list ap); +int vfprintf (FILE *stream, char const *template, va_list ap); +int vfscanf (FILE *stream, char const *template, va_list ap); +int vprintf (char const *format, va_list ap); +int vsprintf (char *str, char const *format, va_list ap); +int vsnprintf(char *str, size_t size, char const *format, va_list ap); +int vsscanf (char const *s, char const *template, va_list ap); + +#endif /* __MES_STDARG_H */ diff --git a/cc/include/stddef.h b/bootprep/headers/stddef.h diff --git a/cc/include/stdint.h b/bootprep/headers/stdint.h diff --git a/cc/include/stdio.h b/bootprep/headers/stdio.h diff --git a/cc/include/stdlib.h b/bootprep/headers/stdlib.h diff --git a/cc/include/string.h b/bootprep/headers/string.h diff --git a/cc/include/sys/stat.h b/bootprep/headers/sys/stat.h diff --git a/cc/include/sys/time.h b/bootprep/headers/sys/time.h diff --git a/cc/include/sys/types.h b/bootprep/headers/sys/types.h diff --git a/cc/include/time.h b/bootprep/headers/time.h diff --git a/cc/include/unistd.h b/bootprep/headers/unistd.h diff --git a/bootprep/include/stdarg.h b/bootprep/include/stdarg.h @@ -1,39 +0,0 @@ -/* boot2 stdarg.h — shadows mes/include/stdarg.h for both flatten - * paths (bootprep/{stage1,libc}-flatten.sh both have -I on the - * containing dir ahead of mes's include tree). Routes va_* through - * __builtin_va_*, so tcc.flat.c and libc.flat.c compile cleanly - * under both our cc.scm (which recognizes __builtin_va_list and - * __builtin_va_start/arg/end) and stock gcc/clang (where they're - * native). - * - * Mes's stdarg.h has a similar __builtin-routed branch but only - * activates under __riscv. We can't set -D __riscv at flatten time - * without also flipping setjmp.h and tcc-internal arch logic, so we - * shadow the whole header instead. - */ -#ifndef __MES_STDARG_H -#define __MES_STDARG_H 1 - -typedef __builtin_va_list va_list; - -#define va_start(v, l) __builtin_va_start((v), (l)) -#define va_end(v) __builtin_va_end((v)) -#define va_arg(v, t) __builtin_va_arg((v), t) -#define va_arg8(ap, type) va_arg((ap), type) -#define va_copy(d, s) __builtin_va_copy((d), (s)) - -/* mes/include/stdarg.h forward-declares the v* family here (instead - * of in <stdio.h>); tcc.c calls vsnprintf without ever including - * <stdio.h>, so dropping mes's stdarg.h in favor of this shim must - * still leak these prototypes. FILE and size_t come from a prior - * include in mes-libc TUs; tcc.c works because it includes - * <sys/types.h> for size_t and uses (FILE*) implicitly. */ -int vexec (char const *file_name, va_list ap); -int vfprintf (FILE *stream, char const *template, va_list ap); -int vfscanf (FILE *stream, char const *template, va_list ap); -int vprintf (char const *format, va_list ap); -int vsprintf (char *str, char const *format, va_list ap); -int vsnprintf(char *str, size_t size, char const *format, va_list ap); -int vsscanf (char const *s, char const *template, va_list ap); - -#endif /* __MES_STDARG_H */ diff --git a/bootprep/libc-flatten.sh b/bootprep/libc-flatten.sh @@ -50,7 +50,7 @@ VENDOR=$ROOT/vendor/mes-libc WORK=$ROOT/build/$ARCH/vendor/mes-libc STAGE=$WORK/libc-stage FLAT=$WORK/libc.flat.c -SYS_INCLUDE=$ROOT/cc/include +SYS_INCLUDE=$ROOT/bootprep/headers [ -d "$VENDOR" ] || { echo "missing $VENDOR" >&2; exit 1; } [ -d "$SYS_INCLUDE" ] || { echo "missing $SYS_INCLUDE" >&2; exit 1; } @@ -74,7 +74,6 @@ BRIDGE=$ROOT/build/$ARCH/vendor/tcc/stdarg-bridge.h "$HOST_CC" -E -P \ -nostdinc \ - -I "$ROOT/bootprep/include" \ -I "$SYS_INCLUDE" \ -D HAVE_CONFIG_H=0 \ -D __linux__=1 \ diff --git a/bootprep/stage1-flatten.sh b/bootprep/stage1-flatten.sh @@ -51,17 +51,18 @@ esac # simple-patches, copied in # for in-tree builds # vendor/tcc/patches/ — our own tcc patches -# vendor/mes-libc/include/ — vendored mes-libc headers -# (byte-identical to upstream -# mes/include) -# bootprep/include/ — our own header shim, wins -# -I priority for stdarg.h +# bootprep/headers/ — our hand-rolled libc headers used at +# flatten time by host `cc -E -nostdinc`. +# stdarg.h here routes va_* through +# __builtin_va_*; the rest are minimal +# stubs so tcc.c parses through the host +# preprocessor. ROOT=$(cd "$(dirname "$0")/.." && pwd) WORK=$ROOT/build/$BOOT_ARCH/vendor/tcc DISTFILES=$ROOT/vendor/tcc LB_PATCHES=$ROOT/vendor/tcc/patches-lb OUR_PATCHES=$ROOT/vendor/tcc/patches -SYS_INCLUDE=$ROOT/cc/include +SYS_INCLUDE=$ROOT/bootprep/headers TCC_TAR=$DISTFILES/0.9.26.tar.gz TCC_PKG=tcc-0.9.26-1147-gee75a10c @@ -309,7 +310,6 @@ FLAT=$WORK/tcc.flat.c -nostdinc \ -I "$SRC" \ -I "$WORK/mes-overlay" \ - -I "$ROOT/bootprep/include" \ -I "$SYS_INCLUDE" \ -D __linux__=1 \ -D __${CPP_ARCH}__=1 \ diff --git a/docs/LIBC.md b/docs/LIBC.md @@ -15,49 +15,54 @@ phases: Until Phase B lands, tcc-boot2 can only run paths that don't need to link external archives — `-version`, parse-only smokes. -Strategy in one sentence: **vendor a curated subset of mes libc as -source, patch a handful of small things, replace mes's per-arch -inline-asm syscall wrappers with one hand-written file -(`boot2-syscall.c`) that calls our P1pp labelled `sys_*` entry -points, then build it three different ways: as P1pp linked into -tcc-boot2 (Phase A), as ELF object files via tcc-boot2 itself -(Phase B1), and tcc's own `lib/libtcc1.c` via tcc-boot2 (Phase B2).** - -Anchors: mes source lives at `../mes/lib/`. P1pp syscall block is at -[P1/P1pp.P1pp:986-1058](../P1/P1pp.P1pp). cc.scm's C linkage rule is -commit `6488cca`. Live-bootstrap's reference catm command is the long -line in `../live-bootstrap/steps/tcc-0.9.26/pass1.kaem` (search for -`unified-libc.c`). +Strategy in one sentence: **maintain a single hand-collapsed libc +source (`vendor/mes-libc/libc.c`, started from mes-libc 0.24) whose +syscall layer calls our P1pp labelled `sys_*` entry points directly, +then build it three different ways: as P1pp linked into tcc-boot2 +(Phase A), as an ELF archive via tcc-boot2 itself (Phase B1), and +tcc's own `lib/libtcc1.c` via tcc-boot2 (Phase B2).** + +Anchor: P1pp syscall block at +[P1/P1pp.P1pp:986-1058](../P1/P1pp.P1pp). The header comment in +[`vendor/mes-libc/libc.c`](../vendor/mes-libc/libc.c) is the +authoritative manifest of what's provided / required / ordered. ## Layout ``` vendor/mes-libc/ -├── ctype/ string/ stdlib/ stdio/ posix/ mes/ (verbatim from -│ └── *.c ../mes/lib/) -├── linux/ -│ └── malloc.c (only file kept; the others are replaced -│ by boot2-syscall.c) -├── include/ (verbatim copy of ../mes/include/, plus an -│ empty mes/config.h shim) -├── patches/ (literal-block .before/.after pairs) -├── boot2-syscall.c (our hand-written replacement for mes's -│ per-arch inline-asm syscall.c + glue) -├── unified-libc.c (#include's every .c above; host -E flattens) -└── LICENSE (mes's COPYING; libc subset is GPLv3+) - -scripts/ -├── libc-flatten.sh host (no boot- prefix); stage + patch + -E -└── boot-build-cc.sh container; cc.scm → libc.P1pp (CC_LIB=PFX - selects cc.scm --lib= mode; see §Linking) +├── libc.c single-file libc (~1000 lines). Includes +│ syscall wrappers (_read/_write/.../brk), +│ FILE-globals, malloc/free, printf family, +│ ctype, string ops, __libc_init, __assert_fail. +└── LICENSE mes's COPYING; libc subset is GPLv3+. + +bootprep/ +├── libc-flatten.sh host: cc -E -nostdinc -I bootprep/headers +│ libc.c → build/$ARCH/vendor/mes-libc/libc.flat.c. +│ Prepends build/$ARCH/vendor/tcc/stdarg-bridge.h +│ (see docs/TCC.md) so tcc itself can later +│ compile through the flattened libc. +└── headers/ hand-rolled libc headers consumed at flatten + time by the host preprocessor; `stdarg.h` + here routes va_* through __builtin_va_*. + +tests/ +├── build-cc.sh container: cc.scm <flat.c> → P1pp +│ (CC_LIB=PFX selects --lib= mode; see §Linking) +└── build-p1pp.sh container: catm + M1pp → P1pp/M1/asm/ELF. P1/ -├── entry-libc.P1pp :p1_main wrapper (calls __libc_init, main) -└── elf-end.P1pp single :ELF_end terminator label +├── entry-libc.P1pp :p1_main wrapper (calls __libc_init, main) +└── elf-end.P1pp single :ELF_end terminator label -tests/cc-libc/ targeted fixtures for cc.scm + libc TDD +tests/cc-libc/ targeted fixtures for cc.scm + libc TDD ``` +Memory helpers (`memcpy`, `memmove`, `memset`, `memcmp`) come from +`tcc/cc/mem.c`, not from `libc.c` — they're shared with `-nostdlib` +test/runtime paths. + ## Phase A — link tcc-boot2 ### P1pp syscall wrappers @@ -80,122 +85,52 @@ identical across arches. Acceptance fixture: `tests/p1/sys_calls.P1pp` exercises all three on every arch via `make test SUITE=p1`. -### Vendored manifest - -Verbatim copies from `../mes/lib/`: - -``` -ctype/ isalnum.c isalpha.c isascii.c iscntrl.c isdigit.c - isgraph.c islower.c isnumber.c isprint.c ispunct.c - isspace.c isupper.c isxdigit.c tolower.c toupper.c - -string/ memchr.c memcmp.c memcpy.c memmem.c memmove.c memset.c - strcat.c strchr.c strcmp.c strcpy.c strcspn.c strdup.c - strerror.c strlen.c strncat.c strncmp.c strncpy.c - strpbrk.c strrchr.c strspn.c strstr.c strupr.c - -stdlib/ abort.c atoi.c atol.c calloc.c exit.c __exit.c free.c - puts.c qsort.c realloc.c strtof.c strtol.c strtoll.c - strtoul.c strtoull.c - -stdio/ clearerr.c fclose.c fdopen.c feof.c ferror.c fflush.c - fgetc.c fgets.c fileno.c fopen.c fprintf.c fputc.c - fputs.c fread.c fseek.c ftell.c fwrite.c getc.c - perror.c printf.c putc.c remove.c snprintf.c sprintf.c - ungetc.c vfprintf.c vprintf.c vsnprintf.c vsprintf.c - -linux/ malloc.c (only) - -posix/ buffered-read.c execvp.c getcwd.c getenv.c open.c - sbrk.c write.c - -mes/ abtol.c __assert_fail.c __buffered_read.c cast.c dtoab.c - eputc.c eputs.c fdgetc.c fdgets.c fdputc.c fdputs.c - fdungetc.c globals.c __init_io.c itoa.c ltoa.c ltoab.c - __mes_debug.c mes_open.c ntoab.c oputc.c oputs.c - search-path.c ultoa.c utoa.c -``` - -Linux syscall-touching files (`brk`, `close`, `lseek`, `_open3`, -`_read`, `unlink`) are replaced by `boot2-syscall.c` directly so we -don't drag in mes's `_sys_callN` indirection. `linux/malloc.c` stays -— it's a free-list allocator on top of `brk()`, no syscall plumbing -of its own. - -Headers: `vendor/mes-libc/include/` is a verbatim copy of -`../mes/include/`, plus an empty `mes/config.h` shim (no-op for -`HAVE_CONFIG_H` consumers). - -### Patches - -`vendor/mes-libc/patches/*.{before,after}` are literal-block pairs -applied by `bootprep/libc-flatten.sh` (same `apply_simple_patch` -shape `stage1-flatten.sh` uses for tcc): - -| patch | target | reason | -|------------------------|-------------------------|-----------------------------------------------------------------------| -| malloc-max-align | linux/malloc.c | `sizeof(max_align_t)` → `16`. cc.scm has no `max_align_t`. | -| strstr-drop-mman | string/strstr.c | drop unused `#include <sys/mman.h>`. | -| libmini-write-proto | include/mes/lib-mini.h | `void __init_io ();` → typed prototype. cc.scm rejects empty-arg-list redecls when followed by a typed definition. | -| libmini-write-proto2 | include/mes/lib-mini.h | same fix for `ssize_t _write ();`. | -| lib-mes-debug-proto | include/mes/lib.h | same fix for `__mes_debug ()` and `__ungetc_init ()`. | -| ntoab-inline-defined | mes/ntoab.c | drop a helper macro that wraps `defined()` (UB per ISO C; clang `-Wexpansion-to-defined`). The mes author's FIXME comment already had the inlined form. | - -`mes/globals.c` is intentionally **not** patched — it must stay plain -int globals (no TLS). - -`stdio/{snprintf,sprintf,vsprintf,fprintf,printf}.c`'s `#if __GNUC__ -&& __x86_64__ && !SYSTEM_LIBC` blocks evaluate to zero under our -defines (no `__GNUC__`); confirm by grep on `libc.flat.c` if in doubt. - -### boot2-syscall.c - -The only file we author. Provides: - -- `_read`, `_write`, `_open3`, `close`, `_lseek`, `lseek`, `brk`, - `unlink`, `_exit` — thin C wrappers around our `sys_*` P1pp labels. - Public posix functions (`read`, `write`, `open`, …) come from - `posix/*.c` on top. -- `__libc_init(argc, argv)` — populates `environ` from argv's NULL - terminator at process entry. Without this, tcc's first `getenv()` - in `tcc_new()` dereferences a NULL `environ` and segfaults. -- Real `stdin` / `stdout` / `stderr` symbols (`(FILE*)0/1/2`). - mes's `<stdio.h>` defines these as macros; we `#undef` them after - libc's includes and define globals so client code can use canonical - `extern FILE *stdout; fputs(s, stdout);`. -- ENOSYS stubs for libc-internal symbols transitively pulled in by - the manifest but not exercised by the tcc-boot2 path: `access`, - `assert_msg`, `execve`, `fsync`, `raise`, `rmdir`, `stat`, - `_getcwd`, `strtod`. Replace with real wrappers if any surfaces in - a real workload. +### What `libc.c` provides + +The header comment in [`vendor/mes-libc/libc.c`](../vendor/mes-libc/libc.c) +is the source of truth; in summary: + +- **syscalls** — `_read _write _open3 close lseek brk unlink _exit + raise abort` plus `environ getenv __libc_init` and ENOSYS stubs + for `access execve fsync rmdir stat strtod`. Each thin C wrapper + calls the matching `sys_*` P1pp label. +- **I/O** — `stdin / stdout / stderr` as real `FILE*` symbols (FILE + is a long-typed alias for the fd), plus `fopen fdopen fclose + fflush fseek ftell remove fread fwrite fputs fputc fgetc puts + strdup`, the printf family (`fprintf printf snprintf sprintf + vfprintf vsnprintf vprintf vsprintf`). +- **stdlib** — `malloc free realloc qsort exit atoi strtol strtoul + strtoull strtof`. Allocator is a brk-backed free list. +- **string** — `strlen strcmp strcpy strncmp strncpy strchr strrchr + strstr strcat strdup memmem`. +- **ctype** — `isdigit islower isnumber isspace isxdigit toupper`. +- **assertions** — `__assert_fail`. + +`mem*` (`memcpy memmove memset memcmp`) come from `tcc/cc/mem.c`, +not `libc.c` — they're also linked into the `-nostdlib` tcc +runtime and tests. + +`__libc_init(argc, argv)` walks argv's NULL terminator to populate +`environ` so the first `getenv()` doesn't dereference a NULL +environment pointer. `P1/entry-libc.P1pp` calls it ahead of `main`. ### Build -`bootprep/libc-flatten.sh --arch <a>` (host): - -1. Stage `vendor/mes-libc/` to `build/$ARCH/vendor/mes-libc/libc-stage/` - so patching is non-destructive. -2. `ln -sfn linux/$MES_ARCH include/arch` so mes's `<arch/...>` - includes resolve through the canonical `<sys/stat.h>` chain. -3. Apply patches on the staged copy. -4. `host_cc -E -nostdinc -I include -I . -D __linux__=1 - -D __${MES_ARCH}__=1 -D __riscv_xlen=64 unified-libc.c - → build/$ARCH/vendor/mes-libc/libc.flat.c` (~52 KB, ~2400 lines). +`bootprep/libc-flatten.sh --arch <a>` (host): stages +`vendor/mes-libc/libc.c` to `build/$ARCH/vendor/mes-libc/libc-stage/`, +then runs `host_cc -E -P -nostdinc -I bootprep/headers +-D __linux__=1 -D __${MES_ARCH}__=1 -D __riscv_xlen=64 +-D HAVE_CONFIG_H=0 -D inline=` against it and prepends +`build/$ARCH/vendor/tcc/stdarg-bridge.h` (guarded by `#ifndef CCSCM`) +to produce `build/$ARCH/vendor/mes-libc/libc.flat.c` (~27 KB). `MES_ARCH` mapping is `aarch64→riscv64`, `amd64→x86_64`, -`riscv64→riscv64`. mes ships no aarch64 headers; the riscv64 set -suffices because nothing in our flatten ends up referencing arch- -specific syscall numbers or struct stat layouts (boot2-syscall.c -goes around them). - -`-I` order is **`include` before `include/linux/$MES_ARCH`** — -reverse breaks `<signal.h>` resolution because the per-arch tree -holds a partial `signal.h` that shadows the canonical `typedef long -stack_t;`. +`riscv64→riscv64` — `bootprep/headers/` is arch-agnostic, so the +mapping only feeds the `__${MES_ARCH}__` predefine that gates a few +preprocessor branches inside `libc.c`. -`scripts/boot-build-cc.sh` (container) then runs `cc.scm` over -`libc.flat.c` to produce `build/$ARCH/vendor/mes-libc/libc.P1pp` -(~520 KB, ~21 K lines). +`tests/build-cc.sh` (container) then runs `cc.scm` over +`libc.flat.c` to produce `build/$ARCH/vendor/mes-libc/libc.P1pp`. ### Linking — catm chain @@ -218,13 +153,13 @@ build/$ARCH/vendor/mes-libc/libc.P1pp # cc.scm --lib=libc__ → libc__cc__st P1/elf-end.P1pp # :ELF_end ``` -`scripts/boot-build-p1pp.sh` already cats its inputs in front of -the M1pp expander, so the catm chain is just its source-list -arguments. Both the tcc-boot2 link rule (Makefile) and the cc-libc -test suite (boot-run-tests.sh) compose this way; the tcc-boot2 -client uses prefix `tcc__`, every cc-libc fixture uses `app__`. +`tests/build-p1pp.sh` already cats its inputs in front of the M1pp +expander, so the catm chain is just its source-list arguments. Both +the tcc-boot2 link rule (Makefile) and the cc-libc test suite +(`tests/run-suite.sh`) compose this way; the tcc-boot2 client uses +prefix `tcc__`, every cc-libc fixture uses `app__`. -`__libc_init` (`vendor/mes-libc/boot2-syscall.c`) walks argv's +`__libc_init` (defined in `vendor/mes-libc/libc.c`) walks argv's NULL terminator to populate `environ`; it must run before any libc function that reads the environment. That's why the entry fragment calls it ahead of `main`. @@ -296,8 +231,8 @@ Per-fixture artefacts: When triaging a failure, the catm'd source the M1pp expander sees lives at `build/$ARCH/.work/tests/cc-libc/<name>/combined.M1pp` -(boot-build-p1pp.sh copies it there alongside the rest of the per- -stage scratch outputs; the path is also recorded in the sidecar +(`tests/build-p1pp.sh` copies it there alongside the rest of the +per-stage scratch outputs; the path is also recorded in the sidecar `<elf>.workdir` next to the binary). Grep that for the symbol or sequence in question. @@ -312,63 +247,56 @@ help here — that one is linked into tcc-boot2 itself in P1pp form. The archives are tcc-boot2's *output* world. Build them with tcc-boot2 itself, mirroring live-bootstrap's -`pass1.kaem` (search for `unified-libc.o` and `libtcc1.o`). - -### B1. libc.a from the same vendored sources +`pass1.kaem` (search for `libtcc1.o`). -Reuse `vendor/mes-libc/unified-libc.c` from Phase A. Compile with -tcc-boot2 (per arch). Add `scripts/boot-build-libc-archive.sh` (boot-* -because tcc-boot2 itself runs in the container): +### B1. libc.a from the same vendored source -```sh -TCC_BOOT2=build/$ARCH/tcc-boot2 -$TCC_BOOT2 -c -D HAVE_CONFIG_H=1 \ - -I vendor/mes-libc/include \ - -I vendor/mes-libc/include/linux/$MES_ARCH \ - -o build/$ARCH/libc.o \ - vendor/mes-libc/unified-libc.c -$TCC_BOOT2 -ar cr build/$ARCH/libc.a build/$ARCH/libc.o -``` +Reuse `vendor/mes-libc/libc.c` (i.e. the *unflattened* source — +or the already-flattened `libc.flat.c`, since it was preprocessed +through our own `bootprep/headers/`). Compile with tcc-boot2 per +arch. The exact tcc invocation will need to match whatever set of +predefines `libc.c` expects (see the `bootprep/libc-flatten.sh` +flags above). Archive with `tcc -ar cr libc.a libc.o`. Install at `$LIBDIR/libc.a` where `$LIBDIR` is whatever `CONFIG_TCC_CRTPREFIX` was baked into tcc-boot2 (default `build/$ARCH/sysroot/lib`; align with the `-D CONFIG_TCC_CRTPREFIX` -in the Makefile). Also produce `crt1.o` from -`vendor/mes-libc/linux/$MES_ARCH-mes-gcc/crt1.c` if the link needs -one — for static binaries with our hand-written `_start` it can be -skipped; check by linking the smoke test below. +in the Makefile). Also produce `crt1.o` if the link needs one — for +static binaries with the existing `_start` (`tcc/libc/$ARCH/start.S`) +it can be skipped; check by linking the smoke test below. The chicken-and-egg concern is moot: tcc-boot2's codegen for P1-64 targets does not emit `__divdi3`-class calls when compiling -mes libc (long-long is native register width on X86_64 / aarch64 / -riscv64, and `HAVE_FLOAT` paths in mes libc are dead under -`HAVE_CONFIG_H=1` with our config). So building libc.a needs no -prior libtcc1.a. +the libc (long-long is native register width on X86_64 / aarch64 / +riscv64). So building libc.a needs no prior libtcc1.a. ### B2. libtcc1.a from upstream tcc The file is already vendored implicitly via `stage1-flatten.sh` (it's `tcc-0.9.26-1147-gee75a10c/lib/libtcc1.c` inside the tarball). -Add `scripts/boot-build-libtcc1.sh`: +The compile uses tcc's own bundled headers from +`tcc-0.9.26-1147-gee75a10c/include/` (stdarg, stddef, stdbool, +float, varargs); no `bootprep/headers/` involvement at this point. + +Sketch: ```sh TCC_BOOT2=build/$ARCH/tcc-boot2/tcc-boot2 TCC_SRC=build/$ARCH/vendor/tcc/tcc-0.9.26-1147-gee75a10c $TCC_BOOT2 -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \ - -I vendor/mes-libc/include \ - -I vendor/mes-libc/include/linux/$MES_ARCH \ + -I "$TCC_SRC" -I "$TCC_SRC/include" \ -o build/$ARCH/libtcc1.o \ $TCC_SRC/lib/libtcc1.c -# riscv64 also pulls in lib-arm64.c per upstream: -if [ "$ARCH" = riscv64 ]; then +# aarch64 also pulls in lib-arm64.c per upstream: +if [ "$ARCH" = aarch64 ]; then $TCC_BOOT2 -c ... -o build/$ARCH/lib-arm64.o $TCC_SRC/lib/lib-arm64.c EXTRA=build/$ARCH/lib-arm64.o fi $TCC_BOOT2 -ar cr build/$ARCH/libtcc1.a build/$ARCH/libtcc1.o $EXTRA ``` -Install at `$LIBDIR/tcc/libtcc1.a` (matches tcc.flat.c line 11234 -`tcc_add_support(s1, "libtcc1.a")` against `tcc_lib_path`). +Install at `$LIBDIR/tcc/libtcc1.a` (matches `tcc_add_support(s1, +"libtcc1.a")` against `tcc_lib_path`). ### B3. Wire into the Makefile @@ -405,45 +333,26 @@ That's tracked in [TCC.md](TCC.md), not here. ## Out of scope -- **Threading, locale, dynamic linker, IEEE-754 math.** tcc-mes - defines have `HAVE_FLOAT` / `HAVE_SETJMP` off; the fp paths in mes - libc compile but are dead code under those defines. +- **Threading, locale, dynamic linker, IEEE-754 math.** `libc.c`'s + fp paths (`strtof`, etc.) are present but the rest of the boot + pipeline doesn't exercise them seriously. - **errno from threads.** `errno` is a single global int. cc.scm has no TLS; tcc-boot2 is single-threaded. ## Notes for the engineer -- If a mes file pulls in a header path we don't have, the right move - is almost always to copy the matching `mes/include/` header - verbatim — don't write a substitute. +- `libc.c` is a single editable file; if a fixture surfaces a + missing routine, add it directly there rather than reaching for + upstream mes-libc. - cc.scm's `--cc-debug` flag prints per-phase heap usage on stderr. - libc.flat.c is small (~52 KB after flatten) so heap should be - flat; if it isn't, that's a cc.scm bug, not a libc bug. -- The existing `vendor/seed/` layout is `<tool>/<arch>/...`. mes-libc - is per-arch only via headers; the .c manifest is arch-agnostic. - Layout `vendor/mes-libc/{ctype,string,...}/` flat, with - `vendor/mes-libc/include/linux/<arch>/` per-arch. - -### cc.scm bugs surfaced by Phase A - -The four link-composition issues — string alignment, per-TU label -namespacing, missing library mode, missing :ELF_end suppression — -are now fixed in cc.scm itself (string alignment is emitted with -`.align 8`; the other three are gated on the `--lib=PFX` -flag, see §Linking). Remaining issues: - -- **Empty-arg-list redecl rejection.** mes headers' K&R-style `f();` - followed by a typed definition fails with `redecl: type mismatch`. - Workaround: vendored-header patches that prototype the offenders - (see [§Patches](#patches)). -- **Tentative-vs-initialized merge (suspected).** `mes/globals.c`'s - `int __stdout;` (tentative) plus `mes/mes_open.c`'s `int __stdout - = STDOUT;` (initialized) seems to resolve to the tentative zero - rather than the initializer in a single TU. Symptom is - cc-libc/06-puts; under investigation. -- **Varargs lowering (suspected).** `printf("got %d\n", 42)` pulls - 100 instead of 42 from `va_arg`. Symptom is cc-libc/05-printf-int; - not yet diagnosed. -- **Floating-point literals.** cc.scm rejects `0.0`. Use `(double) 0` - or similar non-literal forms (one site in boot2-syscall.c's - `strtod` stub). + `libc.flat.c` is ~27 KB so heap should be flat; if it isn't, + that's a cc.scm bug, not a libc bug. +- Layout: `vendor/mes-libc/libc.c` is arch-agnostic. The only + per-arch input feeding the libc build is the `__${MES_ARCH}__` + predefine that `bootprep/libc-flatten.sh` passes through. +- The flatten-time stdarg shim (`bootprep/headers/stdarg.h`) and + the per-arch tcc stdarg bridge (generated at + `build/<arch>/vendor/tcc/stdarg-bridge.h`, prepended to + `libc.flat.c`) are *both* required and serve different consumers + — see the comment at the top of `bootprep/headers/stdarg.h` and + [docs/TCC.md](TCC.md).