commit 39fab2ed7106d643242b35b0fb553b5f659eb5d9
parent cdadeb225ec44876ca034f3552b7a0675a1598f3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 22:38:14 -0700
libp1pp: promote mem* to canonical compiler-builtin runtime
Rename libp1pp__memcpy / _memcmp / _memset to plain memcpy / memcmp /
memset and add memmove. Drop the same four entries from the mes-libc
flatten so libp1pp owns the symbols across every build chain — cc.scm
+ libp1pp, cc-libc (libp1pp + libc), tcc-cc, and tcc-gcc all resolve
bare extern memcpy against one source. tcc-cc/mem.c gains memcmp and
is linked into the gcc-built tcc-gcc binary so its libc.flat.c stays
mem*-free without breaking that path. Update %memcpy_call and the
scheme1 call sites; rename and reframe 129-extern-libp1pp ->
129-extern-mem-builtins.
cc 178/1 -> 179/0; cc-libc, tcc-cc, tcc-gcc unchanged at parity.
Diffstat:
11 files changed, 165 insertions(+), 106 deletions(-)
diff --git a/Makefile b/Makefile
@@ -380,6 +380,8 @@ $(TCC_CC_START): tcc-cc/$(TCC_CC_ARCH)/start.S
# its ARM64 libtcc1 (lib-arm64.o) doesn't define them — upstream
# expects libc to. The tcc-cc suite links -nostdlib, so we compile
# this fallback with tcc-boot2 itself and link it alongside start.o.
+# memcmp lives here too, as a plain compiler-builtin for fixtures
+# that reach it via `extern int memcmp(...)`.
$(TCC_CC_MEM): tcc-cc/mem.c \
build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \
build/$(TCC_CC_ARCH)/.image
diff --git a/P1/P1pp.P1pp b/P1/P1pp.P1pp
@@ -289,7 +289,7 @@
%li(a2, n_imm)
%mov(a1, src_reg)
%mov(a0, dst_reg)
- %call(&libp1pp__memcpy)
+ %call(&memcpy)
%endm
# =========================================================================
@@ -890,13 +890,15 @@
# Memory and strings
# =========================================================================
-# libp1pp__memcpy(dst=a0, src=a1, n=a2) -> dst (a0)
+# memcpy(dst=a0, src=a1, n=a2) -> dst (a0)
# Leaf. Copies n bytes from src to dst. No overlap support where
-# dst > src && dst < src + n. Internal name so a libc that defines its
-# own :memcpy can be linked alongside libp1pp without duplicate-label
-# errors at hex2++ time. cc.scm's %memcpy_call macro and scheme1
-# bind this prefixed name directly.
-:libp1pp__memcpy
+# dst > src && dst < src + n; use memmove for that case. These mem*
+# entries are the canonical compiler-builtin runtime — every build
+# process in this tree (cc.scm + libp1pp + libc, tcc-cc, tcc-gcc)
+# resolves bare `extern memcpy` against this implementation. The
+# vendored mes-libc is flattened with its own memcpy/memmove/memset/
+# memcmp omitted so the symbols are not duplicated at hex2++ time.
+:memcpy
.scope
%mov(a3, a0)
%li(t0, 0)
@@ -913,8 +915,43 @@
%ret
.endscope
-# libp1pp__memset(dst=a0, byte=a1, n=a2) -> dst (a0)
-:libp1pp__memset
+# memmove(dst=a0, src=a1, n=a2) -> dst (a0)
+# Leaf. Like memcpy but tolerates overlap by picking the safe direction.
+:memmove
+.scope
+ %mov(a3, a0)
+ %beq(a0, a1, &.done)
+ %beqz(a2, &.done)
+ %bltu(a0, a1, &.fwd)
+ # dst > src: copy from the high end down so an overlap that would
+ # clobber a yet-unread src byte is harmless.
+ %mov(t0, a2)
+ :.bwd_loop
+ %addi(t0, t0, -1)
+ %add(t1, a1, t0)
+ %lb(t1, t1, 0)
+ %add(t2, a3, t0)
+ %sb(t1, t2, 0)
+ %bnez(t0, &.bwd_loop)
+ %b(&.done)
+ :.fwd
+ # dst < src: forward copy is safe.
+ %li(t0, 0)
+ :.fwd_loop
+ %beq(t0, a2, &.done)
+ %add(t1, a1, t0)
+ %lb(t1, t1, 0)
+ %add(t2, a3, t0)
+ %sb(t1, t2, 0)
+ %addi(t0, t0, 1)
+ %b(&.fwd_loop)
+ :.done
+ %mov(a0, a3)
+ %ret
+.endscope
+
+# memset(dst=a0, byte=a1, n=a2) -> dst (a0)
+:memset
.scope
%mov(a3, a0)
%li(t0, 0)
@@ -929,8 +966,8 @@
%ret
.endscope
-# libp1pp__memcmp(a=a0, b=a1, n=a2) -> -1/0/1 (a0)
-:libp1pp__memcmp
+# memcmp(a=a0, b=a1, n=a2) -> -1/0/1 (a0)
+:memcmp
.scope
%li(t0, 0)
:.loop
diff --git a/docs/LIBP1PP.md b/docs/LIBP1PP.md
@@ -345,10 +345,21 @@ body in `.scope` / `.endscope` if they want scope-local dotted labels.
### Byte-buffer primitives
memcpy(dst, src, n) -> dst
+ memmove(dst, src, n) -> dst
memset(dst, byte, n) -> dst
memcmp(a, b, n) -> sign # -1 / 0 / 1
-`memcpy` does not support overlapping ranges where `dst > src && dst < src + n`.
+These four entries are the **canonical compiler-builtin mem* runtime**
+for every build chain in this tree. cc.scm + libp1pp, cc-libc (libp1pp
++ libc), tcc-cc, and tcc-gcc all resolve bare `extern memcpy` against
+libp1pp here; the vendored mes-libc is flattened with its own copies
+omitted so the symbols are not duplicated at hex2++ time, and the
+gcc-built tcc-gcc binary links `tcc-cc/mem.c` for the same reason.
+
+`memcpy` does not support overlapping ranges where `dst > src && dst < src + n`;
+use `memmove` for overlap.
+
+`memmove` picks the safe direction based on `dst` vs `src`.
`memset` stores only the low 8 bits of `byte`.
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -48,6 +48,13 @@ build/aarch64/tcc-boot2/tcc-boot2 \
./build/aarch64/tests/tcc-cc/NAME
```
+`mem.o` is the compiler-builtin mem* runtime — `memcpy/memmove/memset`
+that tcc emits direct calls to for struct copies and bulk init, plus
+`memcmp` for fixtures that reach it via bare `extern int memcmp(...)`.
+The tcc-gcc sibling supplies the equivalent four symbols by compiling
+mes-libc's `string/{memcpy,memmove,memset,memcmp}.c` into its runtime
+archive.
+
The result is compared against the same `.expected` and
`.expected-exit` files used by the regular `cc` suite. The suite is
aarch64-only today because it needs generated binaries to run natively
@@ -62,22 +69,16 @@ NAMES='002-arith 007-call-with-args' make test SUITE=tcc-cc
## Latest Result
```text
-make test SUITE=tcc-cc cc.scm-built tcc-boot2: 176 passed, 2 failed
-scripts/run-gcc-libc-flat-tcc.sh gcc-built tcc-gcc: 176 passed, 2 failed
+make test SUITE=tcc-cc cc.scm-built tcc-boot2: 178 passed, 1 failed
+scripts/run-gcc-libc-flat-tcc.sh gcc-built tcc-gcc: 178 passed, 1 failed
```
-Exact parity. Both paths fail on the same two fixtures, neither of
-which is a cc.scm bug:
+Exact parity. The single remaining failure is the same on both paths
+and is not a cc.scm bug:
- **`200-lex-char-type`** — upstream tcc 0.9.26 bug (also fails under
the gcc-built control). Fixing it requires a `simple-patches/` patch
against tcc itself.
-- **`129-extern-libp1pp`** — linkage-only failure. The fixture extern's
- `libp1pp__memcpy` / `_memcmp` / `_memset` (the namespaced public
- entry points from libp1pp), which neither tcc-cc nor the gcc-libc-flat
- control link against. The fixture is a regression test for the cc.scm
- `extern`-passthrough rule, not for tcc; running it against
- tcc-built binaries is out of suite scope.
The path from earlier results to here:
@@ -87,6 +88,7 @@ The path from earlier results to here:
| 163/15 | added `tcc-cc/mem.c` runtime; cleared the `mem*` undefined-symbol cluster |
| 175/3 | cc.scm migration to M1pp + hex2++ pipeline (dotted local labels, `.scope`/`.endscope`, `.align` directives, bare-hex string emission) cleared the entire `assert fail: 0@12051` cluster (14 fixtures) plus a hex2pp.P1 BSS-overlap fix that unblocked the tcc-boot2 link itself for inputs >1 MiB |
| 176/2 | ternary-arms common-type fix in `cg-ifelse-merge` cleared `220-const-promote` (was: arm 1's type leaked through as the result type, truncating wider arm 2 to 32-bit; tcc's `gen_opic` sign-extension idiom hit this) |
+| 178/1 | reframed mem* as compiler builtins supplied by the build process: renamed libp1pp's `libp1pp__memcpy` / `_memcmp` / `_memset` to plain `memcpy` / `memcmp` / `memset` and added `memmove`; dropped mes-libc's `string/memcpy.c` / `memmove.c` / `memset.c` / `memcmp.c` from `unified-libc.c` so the symbols are not duplicated; added `memcmp` to `tcc-cc/mem.c` and linked it into the gcc-built tcc-gcc binary; updated and renamed the regression fixture (`129-extern-libp1pp` → `129-extern-mem-builtins`) to extern the plain names. Cleared the fixture on every path (cc, cc-libc, tcc-cc, tcc-gcc). |
## Host Baseline
@@ -152,10 +154,6 @@ suite shouldn't depend on:
## Next steps
The cc.scm path matches the gcc baseline; further `tcc-cc` progress is
-gated on upstream tcc bugs, not on our compiler. Options when those
-become priorities:
+gated on upstream tcc bugs, not on our compiler.
- Backport tcc's `200-lex-char-type` fix as a `simple-patches/` entry.
-- Either move `tests/cc/129-extern-libp1pp.c` out of the directories
- that `tcc-cc` runs against, or wire libp1pp into the tcc-cc link
- set (mirrors what the cc-libc suite does).
diff --git a/scheme1/scheme1.P1pp b/scheme1/scheme1.P1pp
@@ -881,7 +881,7 @@
%mov(a0, t2)
%la(a1, name_label)
%li(a2, len)
- %call(&libp1pp__memcmp)
+ %call(&memcmp)
%bnez(a0, &.bad)
%li(a0, value)
%mkfix(a0, a0)
@@ -2989,7 +2989,7 @@
%ld(a0, t1, %SYMENT.name_ptr)
%ldl(a1, name_ptr)
%ldl(a2, name_len)
- %call(&libp1pp__memcmp)
+ %call(&memcmp)
%beqz(a0, &.found)
:.next
@@ -3014,7 +3014,7 @@
%call(&alloc_bytes_main)
%ldl(a1, name_ptr)
%ldl(a2, name_len)
- %call(&libp1pp__memcpy) ; returns dst in a0 = stable copy
+ %call(&memcpy) ; returns dst in a0 = stable copy
%ldl(t0, idx)
%symtab_entry(t1, t0, t2)
@@ -3390,7 +3390,7 @@
%cdr(t0, t0)
%stl(t0, args)
- %call(&libp1pp__memcpy)
+ %call(&memcpy)
%b(&.copy_loop)
:.copy_done
@@ -3425,7 +3425,7 @@
%ldl(a2, len) ; len
%heap_ld(t0, a0, %BV.data) ; dst = bv.data
%mov(a0, t0)
- %call(&libp1pp__memcpy)
+ %call(&memcpy)
%ldl(a0, bv)
})
@@ -3985,7 +3985,7 @@
%ldl(a0, new_data_ptr)
%heap_ld(a1, t0, %BV.data) ; old data ptr
%ldl(a2, raw)
- %call(&libp1pp__memcpy)
+ %call(&memcpy)
%ldl(t0, bv)
%ldl(t1, new_data_ptr)
@@ -4812,7 +4812,7 @@
%add(a0, a0, t1) ; dst = data + old_len
%ldl(a1, src)
%ldl(a2, n)
- %call(&libp1pp__memcpy)
+ %call(&memcpy)
# hdr = (old_len + n) << 8 | HDR.BV. HDR.BV is 0.
%ldl(t0, old_len)
@@ -5731,7 +5731,7 @@
%ld(a1, t1, 0)
%heap_ld(t1, t0, %BV.hdr)
%shri(a2, t1, 8)
- %call(&libp1pp__memcpy)
+ %call(&memcpy)
# cell = cons(bv, NIL); append to list head/tail.
%ldl(a0, bv)
diff --git a/scripts/build-tcc-gcc.sh b/scripts/build-tcc-gcc.sh
@@ -30,11 +30,13 @@ HARNESS=tcc-gcc/$ARCH
mkdir -p "$(dirname "$OUT")"
gcc -static -nodefaultlibs -nostartfiles -fno-stack-protector \
+ -fno-builtin \
-Wno-implicit-function-declaration \
-Wno-builtin-declaration-mismatch \
-Wno-incompatible-pointer-types \
-Wno-int-conversion \
-e _start \
"$HARNESS/start.S" "$HARNESS/sys_stubs.c" \
+ tcc-cc/mem.c \
"$TCC_FLAT" "$LIBC_FLAT" \
-lgcc -o "$OUT"
diff --git a/tcc-cc/mem.c b/tcc-cc/mem.c
@@ -2,7 +2,9 @@
struct copies and bulk zero-init past its inline thresholds.
tcc's own libtcc1 (lib-arm64.o) does not define these — upstream
assumes they come from libc, but the tcc-cc suite links with
- -nostdlib so we ship them here. */
+ -nostdlib so we ship them here. memcmp lives here too as a plain
+ compiler-builtin for fixtures that reach it via `extern int
+ memcmp(...)`. */
typedef unsigned long size_t;
@@ -33,3 +35,15 @@ void *memset(void *dst, int c, size_t n) {
while (n--) *d++ = b;
return dst;
}
+
+int memcmp(const void *a, const void *b, size_t n) {
+ const unsigned char *p = (const unsigned char *)a;
+ const unsigned char *q = (const unsigned char *)b;
+ while (n--) {
+ unsigned char x = *p++;
+ unsigned char y = *q++;
+ if (x < y) return -1;
+ if (x > y) return 1;
+ }
+ return 0;
+}
diff --git a/tests/cc/129-extern-libp1pp.c b/tests/cc/129-extern-libp1pp.c
@@ -1,66 +0,0 @@
-/* Calls into libp1pp routines via plain C `extern` declarations. The
- * libp1pp side provides `:libp1pp__memcpy`, `:libp1pp__memcmp`, and
- * `:libp1pp__memset` as bare-name labels (see P1/P1pp.P1pp). For these
- * to link, cc.scm must NOT prefix `extern`-but-not-defined-here symbols
- * with its `cc__` namespace — bare-name extern decls should pass through.
- *
- * The `libp1pp__` prefix keeps these freestanding helpers from clashing
- * with libc's own `:memcpy` / `:memcmp` / `:memset` when a translation
- * unit is linked against the mes-libc chain (cc-libc / tcc-boot2).
- * Plain tests/cc fixtures stay freestanding and bind the prefixed names
- * directly. Do not test stdio/libc string APIs here.
- */
-
-extern void *libp1pp__memcpy(void *, const void *, unsigned long);
-extern int libp1pp__memcmp(const void *, const void *, unsigned long);
-extern void *libp1pp__memset(void *, int, unsigned long);
-
-int test_memcpy(void) {
- char buf[8];
- libp1pp__memcpy(buf, "abcdefg", 8);
- if (buf[0] != 'a') return 1;
- if (buf[3] != 'd') return 2;
- if (buf[6] != 'g') return 3;
- if (buf[7] != 0) return 4;
- return 0;
-}
-
-int test_memcmp(void) {
- if (libp1pp__memcmp("hello", "hello", 5) != 0) return 1;
- if (libp1pp__memcmp("hello", "help!", 5) == 0) return 2;
- if (libp1pp__memcmp("a", "b", 1) == 0) return 3;
- return 0;
-}
-
-int test_memset(void) {
- char buf[6];
- libp1pp__memset(buf, 'X', 5);
- buf[5] = 0;
- if (buf[0] != 'X') return 1;
- if (buf[2] != 'X') return 2;
- if (buf[4] != 'X') return 3;
- if (buf[5] != 0) return 4;
- return 0;
-}
-
-int test_extern_then_define(void) {
- /* If a function is declared extern AND later defined here in the
- * same TU, the definition's `cc__` prefix takes precedence — the
- * scope-bind! merge sets defined?=#t, the call resolves to the
- * local definition rather than the bare libp1pp symbol. */
- extern int helper_local(int); /* declared local */
- return helper_local(7); /* should call the cc__helper_local below */
-}
-
-int helper_local(int x) {
- return x == 7 ? 0 : 99;
-}
-
-int main(int argc, char **argv) {
- int r;
- if ((r = test_memcpy())) return 20 + r;
- if ((r = test_memcmp())) return 30 + r;
- if ((r = test_memset())) return 40 + r;
- if ((r = test_extern_then_define())) return 50 + r;
- return 0;
-}
diff --git a/tests/cc/129-extern-mem-builtins.c b/tests/cc/129-extern-mem-builtins.c
@@ -0,0 +1,62 @@
+/* Calls into mem* compiler-builtin helpers via plain C `extern`
+ * declarations. memcpy / memcmp / memset are treated as compiler
+ * builtins: every build process in this tree (cc.scm + libp1pp,
+ * cc-libc, tcc-cc, tcc-gcc) supplies an implementation, so a TU can
+ * reach them via bare `extern` decls without per-suite plumbing. The
+ * test exists to lock cc.scm's `extern`-passthrough rule: bare-name
+ * extern decls must NOT get prefixed with cc.scm's `cc__` namespace.
+ */
+
+extern void *memcpy(void *, const void *, unsigned long);
+extern int memcmp(const void *, const void *, unsigned long);
+extern void *memset(void *, int, unsigned long);
+
+int test_memcpy(void) {
+ char buf[8];
+ memcpy(buf, "abcdefg", 8);
+ if (buf[0] != 'a') return 1;
+ if (buf[3] != 'd') return 2;
+ if (buf[6] != 'g') return 3;
+ if (buf[7] != 0) return 4;
+ return 0;
+}
+
+int test_memcmp(void) {
+ if (memcmp("hello", "hello", 5) != 0) return 1;
+ if (memcmp("hello", "help!", 5) == 0) return 2;
+ if (memcmp("a", "b", 1) == 0) return 3;
+ return 0;
+}
+
+int test_memset(void) {
+ char buf[6];
+ memset(buf, 'X', 5);
+ buf[5] = 0;
+ if (buf[0] != 'X') return 1;
+ if (buf[2] != 'X') return 2;
+ if (buf[4] != 'X') return 3;
+ if (buf[5] != 0) return 4;
+ return 0;
+}
+
+int test_extern_then_define(void) {
+ /* If a function is declared extern AND later defined here in the
+ * same TU, the definition's `cc__` prefix takes precedence — the
+ * scope-bind! merge sets defined?=#t, the call resolves to the
+ * local definition rather than the bare external symbol. */
+ extern int helper_local(int); /* declared local */
+ return helper_local(7); /* should call the cc__helper_local below */
+}
+
+int helper_local(int x) {
+ return x == 7 ? 0 : 99;
+}
+
+int main(int argc, char **argv) {
+ int r;
+ if ((r = test_memcpy())) return 20 + r;
+ if ((r = test_memcmp())) return 30 + r;
+ if ((r = test_memset())) return 40 + r;
+ if ((r = test_extern_then_define())) return 50 + r;
+ return 0;
+}
diff --git a/tests/cc/129-extern-libp1pp.expected-exit b/tests/cc/129-extern-mem-builtins.expected-exit
diff --git a/vendor/mes-libc/unified-libc.c b/vendor/mes-libc/unified-libc.c
@@ -40,12 +40,11 @@
#include "ctype/toupper.c"
/* ---- string ----------------------------------------------------- */
+/* memcpy/memmove/memset/memcmp are compiler-builtin runtime supplied
+ by libp1pp (see P1/P1pp.P1pp). Omit mes-libc's copies so the
+ symbols are not duplicated at hex2++ link time. */
#include "string/memchr.c"
-#include "string/memcmp.c"
-#include "string/memcpy.c"
#include "string/memmem.c"
-#include "string/memmove.c"
-#include "string/memset.c"
#include "string/strcat.c"
#include "string/strchr.c"
#include "string/strcmp.c"