boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 39fab2ed7106d643242b35b0fb553b5f659eb5d9
parent cdadeb225ec44876ca034f3552b7a0675a1598f3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 22:38:14 -0700

libp1pp: promote mem* to canonical compiler-builtin runtime

Rename libp1pp__memcpy / _memcmp / _memset to plain memcpy / memcmp /
memset and add memmove. Drop the same four entries from the mes-libc
flatten so libp1pp owns the symbols across every build chain — cc.scm
+ libp1pp, cc-libc (libp1pp + libc), tcc-cc, and tcc-gcc all resolve
bare extern memcpy against one source. tcc-cc/mem.c gains memcmp and
is linked into the gcc-built tcc-gcc binary so its libc.flat.c stays
mem*-free without breaking that path. Update %memcpy_call and the
scheme1 call sites; rename and reframe 129-extern-libp1pp ->
129-extern-mem-builtins.

cc 178/1 -> 179/0; cc-libc, tcc-cc, tcc-gcc unchanged at parity.

Diffstat:
MMakefile | 2++
MP1/P1pp.P1pp | 59++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mdocs/LIBP1PP.md | 13++++++++++++-
Mdocs/TCC-TODO.md | 28+++++++++++++---------------
Mscheme1/scheme1.P1pp | 16++++++++--------
Mscripts/build-tcc-gcc.sh | 2++
Mtcc-cc/mem.c | 16+++++++++++++++-
Dtests/cc/129-extern-libp1pp.c | 66------------------------------------------------------------------
Atests/cc/129-extern-mem-builtins.c | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rtests/cc/129-extern-libp1pp.expected-exit -> tests/cc/129-extern-mem-builtins.expected-exit | 0
Mvendor/mes-libc/unified-libc.c | 7+++----
11 files changed, 165 insertions(+), 106 deletions(-)

diff --git a/Makefile b/Makefile @@ -380,6 +380,8 @@ $(TCC_CC_START): tcc-cc/$(TCC_CC_ARCH)/start.S # its ARM64 libtcc1 (lib-arm64.o) doesn't define them — upstream # expects libc to. The tcc-cc suite links -nostdlib, so we compile # this fallback with tcc-boot2 itself and link it alongside start.o. +# memcmp lives here too, as a plain compiler-builtin for fixtures +# that reach it via `extern int memcmp(...)`. $(TCC_CC_MEM): tcc-cc/mem.c \ build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \ build/$(TCC_CC_ARCH)/.image diff --git a/P1/P1pp.P1pp b/P1/P1pp.P1pp @@ -289,7 +289,7 @@ %li(a2, n_imm) %mov(a1, src_reg) %mov(a0, dst_reg) - %call(&libp1pp__memcpy) + %call(&memcpy) %endm # ========================================================================= @@ -890,13 +890,15 @@ # Memory and strings # ========================================================================= -# libp1pp__memcpy(dst=a0, src=a1, n=a2) -> dst (a0) +# memcpy(dst=a0, src=a1, n=a2) -> dst (a0) # Leaf. Copies n bytes from src to dst. No overlap support where -# dst > src && dst < src + n. Internal name so a libc that defines its -# own :memcpy can be linked alongside libp1pp without duplicate-label -# errors at hex2++ time. cc.scm's %memcpy_call macro and scheme1 -# bind this prefixed name directly. -:libp1pp__memcpy +# dst > src && dst < src + n; use memmove for that case. These mem* +# entries are the canonical compiler-builtin runtime — every build +# process in this tree (cc.scm + libp1pp + libc, tcc-cc, tcc-gcc) +# resolves bare `extern memcpy` against this implementation. The +# vendored mes-libc is flattened with its own memcpy/memmove/memset/ +# memcmp omitted so the symbols are not duplicated at hex2++ time. +:memcpy .scope %mov(a3, a0) %li(t0, 0) @@ -913,8 +915,43 @@ %ret .endscope -# libp1pp__memset(dst=a0, byte=a1, n=a2) -> dst (a0) -:libp1pp__memset +# memmove(dst=a0, src=a1, n=a2) -> dst (a0) +# Leaf. Like memcpy but tolerates overlap by picking the safe direction. +:memmove +.scope + %mov(a3, a0) + %beq(a0, a1, &.done) + %beqz(a2, &.done) + %bltu(a0, a1, &.fwd) + # dst > src: copy from the high end down so an overlap that would + # clobber a yet-unread src byte is harmless. + %mov(t0, a2) + :.bwd_loop + %addi(t0, t0, -1) + %add(t1, a1, t0) + %lb(t1, t1, 0) + %add(t2, a3, t0) + %sb(t1, t2, 0) + %bnez(t0, &.bwd_loop) + %b(&.done) + :.fwd + # dst < src: forward copy is safe. + %li(t0, 0) + :.fwd_loop + %beq(t0, a2, &.done) + %add(t1, a1, t0) + %lb(t1, t1, 0) + %add(t2, a3, t0) + %sb(t1, t2, 0) + %addi(t0, t0, 1) + %b(&.fwd_loop) + :.done + %mov(a0, a3) + %ret +.endscope + +# memset(dst=a0, byte=a1, n=a2) -> dst (a0) +:memset .scope %mov(a3, a0) %li(t0, 0) @@ -929,8 +966,8 @@ %ret .endscope -# libp1pp__memcmp(a=a0, b=a1, n=a2) -> -1/0/1 (a0) -:libp1pp__memcmp +# memcmp(a=a0, b=a1, n=a2) -> -1/0/1 (a0) +:memcmp .scope %li(t0, 0) :.loop diff --git a/docs/LIBP1PP.md b/docs/LIBP1PP.md @@ -345,10 +345,21 @@ body in `.scope` / `.endscope` if they want scope-local dotted labels. ### Byte-buffer primitives memcpy(dst, src, n) -> dst + memmove(dst, src, n) -> dst memset(dst, byte, n) -> dst memcmp(a, b, n) -> sign # -1 / 0 / 1 -`memcpy` does not support overlapping ranges where `dst > src && dst < src + n`. +These four entries are the **canonical compiler-builtin mem* runtime** +for every build chain in this tree. cc.scm + libp1pp, cc-libc (libp1pp ++ libc), tcc-cc, and tcc-gcc all resolve bare `extern memcpy` against +libp1pp here; the vendored mes-libc is flattened with its own copies +omitted so the symbols are not duplicated at hex2++ time, and the +gcc-built tcc-gcc binary links `tcc-cc/mem.c` for the same reason. + +`memcpy` does not support overlapping ranges where `dst > src && dst < src + n`; +use `memmove` for overlap. + +`memmove` picks the safe direction based on `dst` vs `src`. `memset` stores only the low 8 bits of `byte`. diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md @@ -48,6 +48,13 @@ build/aarch64/tcc-boot2/tcc-boot2 \ ./build/aarch64/tests/tcc-cc/NAME ``` +`mem.o` is the compiler-builtin mem* runtime — `memcpy/memmove/memset` +that tcc emits direct calls to for struct copies and bulk init, plus +`memcmp` for fixtures that reach it via bare `extern int memcmp(...)`. +The tcc-gcc sibling supplies the equivalent four symbols by compiling +mes-libc's `string/{memcpy,memmove,memset,memcmp}.c` into its runtime +archive. + The result is compared against the same `.expected` and `.expected-exit` files used by the regular `cc` suite. The suite is aarch64-only today because it needs generated binaries to run natively @@ -62,22 +69,16 @@ NAMES='002-arith 007-call-with-args' make test SUITE=tcc-cc ## Latest Result ```text -make test SUITE=tcc-cc cc.scm-built tcc-boot2: 176 passed, 2 failed -scripts/run-gcc-libc-flat-tcc.sh gcc-built tcc-gcc: 176 passed, 2 failed +make test SUITE=tcc-cc cc.scm-built tcc-boot2: 178 passed, 1 failed +scripts/run-gcc-libc-flat-tcc.sh gcc-built tcc-gcc: 178 passed, 1 failed ``` -Exact parity. Both paths fail on the same two fixtures, neither of -which is a cc.scm bug: +Exact parity. The single remaining failure is the same on both paths +and is not a cc.scm bug: - **`200-lex-char-type`** — upstream tcc 0.9.26 bug (also fails under the gcc-built control). Fixing it requires a `simple-patches/` patch against tcc itself. -- **`129-extern-libp1pp`** — linkage-only failure. The fixture extern's - `libp1pp__memcpy` / `_memcmp` / `_memset` (the namespaced public - entry points from libp1pp), which neither tcc-cc nor the gcc-libc-flat - control link against. The fixture is a regression test for the cc.scm - `extern`-passthrough rule, not for tcc; running it against - tcc-built binaries is out of suite scope. The path from earlier results to here: @@ -87,6 +88,7 @@ The path from earlier results to here: | 163/15 | added `tcc-cc/mem.c` runtime; cleared the `mem*` undefined-symbol cluster | | 175/3 | cc.scm migration to M1pp + hex2++ pipeline (dotted local labels, `.scope`/`.endscope`, `.align` directives, bare-hex string emission) cleared the entire `assert fail: 0@12051` cluster (14 fixtures) plus a hex2pp.P1 BSS-overlap fix that unblocked the tcc-boot2 link itself for inputs >1 MiB | | 176/2 | ternary-arms common-type fix in `cg-ifelse-merge` cleared `220-const-promote` (was: arm 1's type leaked through as the result type, truncating wider arm 2 to 32-bit; tcc's `gen_opic` sign-extension idiom hit this) | +| 178/1 | reframed mem* as compiler builtins supplied by the build process: renamed libp1pp's `libp1pp__memcpy` / `_memcmp` / `_memset` to plain `memcpy` / `memcmp` / `memset` and added `memmove`; dropped mes-libc's `string/memcpy.c` / `memmove.c` / `memset.c` / `memcmp.c` from `unified-libc.c` so the symbols are not duplicated; added `memcmp` to `tcc-cc/mem.c` and linked it into the gcc-built tcc-gcc binary; updated and renamed the regression fixture (`129-extern-libp1pp` → `129-extern-mem-builtins`) to extern the plain names. Cleared the fixture on every path (cc, cc-libc, tcc-cc, tcc-gcc). | ## Host Baseline @@ -152,10 +154,6 @@ suite shouldn't depend on: ## Next steps The cc.scm path matches the gcc baseline; further `tcc-cc` progress is -gated on upstream tcc bugs, not on our compiler. Options when those -become priorities: +gated on upstream tcc bugs, not on our compiler. - Backport tcc's `200-lex-char-type` fix as a `simple-patches/` entry. -- Either move `tests/cc/129-extern-libp1pp.c` out of the directories - that `tcc-cc` runs against, or wire libp1pp into the tcc-cc link - set (mirrors what the cc-libc suite does). diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp @@ -881,7 +881,7 @@ %mov(a0, t2) %la(a1, name_label) %li(a2, len) - %call(&libp1pp__memcmp) + %call(&memcmp) %bnez(a0, &.bad) %li(a0, value) %mkfix(a0, a0) @@ -2989,7 +2989,7 @@ %ld(a0, t1, %SYMENT.name_ptr) %ldl(a1, name_ptr) %ldl(a2, name_len) - %call(&libp1pp__memcmp) + %call(&memcmp) %beqz(a0, &.found) :.next @@ -3014,7 +3014,7 @@ %call(&alloc_bytes_main) %ldl(a1, name_ptr) %ldl(a2, name_len) - %call(&libp1pp__memcpy) ; returns dst in a0 = stable copy + %call(&memcpy) ; returns dst in a0 = stable copy %ldl(t0, idx) %symtab_entry(t1, t0, t2) @@ -3390,7 +3390,7 @@ %cdr(t0, t0) %stl(t0, args) - %call(&libp1pp__memcpy) + %call(&memcpy) %b(&.copy_loop) :.copy_done @@ -3425,7 +3425,7 @@ %ldl(a2, len) ; len %heap_ld(t0, a0, %BV.data) ; dst = bv.data %mov(a0, t0) - %call(&libp1pp__memcpy) + %call(&memcpy) %ldl(a0, bv) }) @@ -3985,7 +3985,7 @@ %ldl(a0, new_data_ptr) %heap_ld(a1, t0, %BV.data) ; old data ptr %ldl(a2, raw) - %call(&libp1pp__memcpy) + %call(&memcpy) %ldl(t0, bv) %ldl(t1, new_data_ptr) @@ -4812,7 +4812,7 @@ %add(a0, a0, t1) ; dst = data + old_len %ldl(a1, src) %ldl(a2, n) - %call(&libp1pp__memcpy) + %call(&memcpy) # hdr = (old_len + n) << 8 | HDR.BV. HDR.BV is 0. %ldl(t0, old_len) @@ -5731,7 +5731,7 @@ %ld(a1, t1, 0) %heap_ld(t1, t0, %BV.hdr) %shri(a2, t1, 8) - %call(&libp1pp__memcpy) + %call(&memcpy) # cell = cons(bv, NIL); append to list head/tail. %ldl(a0, bv) diff --git a/scripts/build-tcc-gcc.sh b/scripts/build-tcc-gcc.sh @@ -30,11 +30,13 @@ HARNESS=tcc-gcc/$ARCH mkdir -p "$(dirname "$OUT")" gcc -static -nodefaultlibs -nostartfiles -fno-stack-protector \ + -fno-builtin \ -Wno-implicit-function-declaration \ -Wno-builtin-declaration-mismatch \ -Wno-incompatible-pointer-types \ -Wno-int-conversion \ -e _start \ "$HARNESS/start.S" "$HARNESS/sys_stubs.c" \ + tcc-cc/mem.c \ "$TCC_FLAT" "$LIBC_FLAT" \ -lgcc -o "$OUT" diff --git a/tcc-cc/mem.c b/tcc-cc/mem.c @@ -2,7 +2,9 @@ struct copies and bulk zero-init past its inline thresholds. tcc's own libtcc1 (lib-arm64.o) does not define these — upstream assumes they come from libc, but the tcc-cc suite links with - -nostdlib so we ship them here. */ + -nostdlib so we ship them here. memcmp lives here too as a plain + compiler-builtin for fixtures that reach it via `extern int + memcmp(...)`. */ typedef unsigned long size_t; @@ -33,3 +35,15 @@ void *memset(void *dst, int c, size_t n) { while (n--) *d++ = b; return dst; } + +int memcmp(const void *a, const void *b, size_t n) { + const unsigned char *p = (const unsigned char *)a; + const unsigned char *q = (const unsigned char *)b; + while (n--) { + unsigned char x = *p++; + unsigned char y = *q++; + if (x < y) return -1; + if (x > y) return 1; + } + return 0; +} diff --git a/tests/cc/129-extern-libp1pp.c b/tests/cc/129-extern-libp1pp.c @@ -1,66 +0,0 @@ -/* Calls into libp1pp routines via plain C `extern` declarations. The - * libp1pp side provides `:libp1pp__memcpy`, `:libp1pp__memcmp`, and - * `:libp1pp__memset` as bare-name labels (see P1/P1pp.P1pp). For these - * to link, cc.scm must NOT prefix `extern`-but-not-defined-here symbols - * with its `cc__` namespace — bare-name extern decls should pass through. - * - * The `libp1pp__` prefix keeps these freestanding helpers from clashing - * with libc's own `:memcpy` / `:memcmp` / `:memset` when a translation - * unit is linked against the mes-libc chain (cc-libc / tcc-boot2). - * Plain tests/cc fixtures stay freestanding and bind the prefixed names - * directly. Do not test stdio/libc string APIs here. - */ - -extern void *libp1pp__memcpy(void *, const void *, unsigned long); -extern int libp1pp__memcmp(const void *, const void *, unsigned long); -extern void *libp1pp__memset(void *, int, unsigned long); - -int test_memcpy(void) { - char buf[8]; - libp1pp__memcpy(buf, "abcdefg", 8); - if (buf[0] != 'a') return 1; - if (buf[3] != 'd') return 2; - if (buf[6] != 'g') return 3; - if (buf[7] != 0) return 4; - return 0; -} - -int test_memcmp(void) { - if (libp1pp__memcmp("hello", "hello", 5) != 0) return 1; - if (libp1pp__memcmp("hello", "help!", 5) == 0) return 2; - if (libp1pp__memcmp("a", "b", 1) == 0) return 3; - return 0; -} - -int test_memset(void) { - char buf[6]; - libp1pp__memset(buf, 'X', 5); - buf[5] = 0; - if (buf[0] != 'X') return 1; - if (buf[2] != 'X') return 2; - if (buf[4] != 'X') return 3; - if (buf[5] != 0) return 4; - return 0; -} - -int test_extern_then_define(void) { - /* If a function is declared extern AND later defined here in the - * same TU, the definition's `cc__` prefix takes precedence — the - * scope-bind! merge sets defined?=#t, the call resolves to the - * local definition rather than the bare libp1pp symbol. */ - extern int helper_local(int); /* declared local */ - return helper_local(7); /* should call the cc__helper_local below */ -} - -int helper_local(int x) { - return x == 7 ? 0 : 99; -} - -int main(int argc, char **argv) { - int r; - if ((r = test_memcpy())) return 20 + r; - if ((r = test_memcmp())) return 30 + r; - if ((r = test_memset())) return 40 + r; - if ((r = test_extern_then_define())) return 50 + r; - return 0; -} diff --git a/tests/cc/129-extern-mem-builtins.c b/tests/cc/129-extern-mem-builtins.c @@ -0,0 +1,62 @@ +/* Calls into mem* compiler-builtin helpers via plain C `extern` + * declarations. memcpy / memcmp / memset are treated as compiler + * builtins: every build process in this tree (cc.scm + libp1pp, + * cc-libc, tcc-cc, tcc-gcc) supplies an implementation, so a TU can + * reach them via bare `extern` decls without per-suite plumbing. The + * test exists to lock cc.scm's `extern`-passthrough rule: bare-name + * extern decls must NOT get prefixed with cc.scm's `cc__` namespace. + */ + +extern void *memcpy(void *, const void *, unsigned long); +extern int memcmp(const void *, const void *, unsigned long); +extern void *memset(void *, int, unsigned long); + +int test_memcpy(void) { + char buf[8]; + memcpy(buf, "abcdefg", 8); + if (buf[0] != 'a') return 1; + if (buf[3] != 'd') return 2; + if (buf[6] != 'g') return 3; + if (buf[7] != 0) return 4; + return 0; +} + +int test_memcmp(void) { + if (memcmp("hello", "hello", 5) != 0) return 1; + if (memcmp("hello", "help!", 5) == 0) return 2; + if (memcmp("a", "b", 1) == 0) return 3; + return 0; +} + +int test_memset(void) { + char buf[6]; + memset(buf, 'X', 5); + buf[5] = 0; + if (buf[0] != 'X') return 1; + if (buf[2] != 'X') return 2; + if (buf[4] != 'X') return 3; + if (buf[5] != 0) return 4; + return 0; +} + +int test_extern_then_define(void) { + /* If a function is declared extern AND later defined here in the + * same TU, the definition's `cc__` prefix takes precedence — the + * scope-bind! merge sets defined?=#t, the call resolves to the + * local definition rather than the bare external symbol. */ + extern int helper_local(int); /* declared local */ + return helper_local(7); /* should call the cc__helper_local below */ +} + +int helper_local(int x) { + return x == 7 ? 0 : 99; +} + +int main(int argc, char **argv) { + int r; + if ((r = test_memcpy())) return 20 + r; + if ((r = test_memcmp())) return 30 + r; + if ((r = test_memset())) return 40 + r; + if ((r = test_extern_then_define())) return 50 + r; + return 0; +} diff --git a/tests/cc/129-extern-libp1pp.expected-exit b/tests/cc/129-extern-mem-builtins.expected-exit diff --git a/vendor/mes-libc/unified-libc.c b/vendor/mes-libc/unified-libc.c @@ -40,12 +40,11 @@ #include "ctype/toupper.c" /* ---- string ----------------------------------------------------- */ +/* memcpy/memmove/memset/memcmp are compiler-builtin runtime supplied + by libp1pp (see P1/P1pp.P1pp). Omit mes-libc's copies so the + symbols are not duplicated at hex2++ link time. */ #include "string/memchr.c" -#include "string/memcmp.c" -#include "string/memcpy.c" #include "string/memmem.c" -#include "string/memmove.c" -#include "string/memset.c" #include "string/strcat.c" #include "string/strchr.c" #include "string/strcmp.c"