commit d411ecb83f52cf356983e9a3a877e54ace7849ae
parent 9dfc688cb237a9e09da34062faf775820a01c67c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 2 May 2026 08:29:19 -0700
cc/cg: struct/union `=` must memcpy whole aggregate
cg-assign emitted a single 8-byte load+store for every type, so any
struct/union assignment of size > 8 bytes silently dropped fields at
offset >= 8. SValue is 64 bytes and tcc's vswap() does three struct
copies, so every vswap was a partial no-op: only the first 8 bytes
swapped, leaving SValue.r (offset 16) etc. unchanged. That made
gv2(rc1, rc2)'s `vswap; gv(rc1); vswap; gv(rc2)` sequence load only
vtop[0], leaving vtop[-1] at VT_LOCAL|VT_LVAL, and the
arm64-gen.c assert(vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST) at
the top of every binop fired across 30 tcc-cc fixtures.
Fix: route the parser's `=` operator through a new cg-assign-struct
when the lhs type is struct/union. cg-assign-struct emits %memcpy_call
between the lvalue and the rhs, then pushes lhs back so the
assignment expression has a result for parse-expr-stmt's trailing
cg-pop. The scalar path is unchanged.
A struct rhs may show up as a frame *rvalue* (not just an lvalue) —
the comma operator forces rval! on its right-hand subexpression and
function/ternary spill paths produce the same shape. The existing
%cg-emit-addr-of refuses non-lvalues, so cg-assign-struct goes
through a new sibling %cg-emit-addr-of-any that lea's the frame slot
regardless of the lval flag.
tcc-cc (aarch64) result jumps from 14/162 -> 148/30 failing on the
ARM64-targeted tcc-boot2; all `vtop[-1].r < VT_CONST` failures clear.
docs/TCC-TODO.md refreshed with the new pass count, regrouped failure
clusters, and updated next-debug pointers.
Tests:
333-struct-assign-big direct + pointer-indexed swap of a
40-byte struct with sub-word fields past
offset 8 (the SValue/vswap shape).
334-struct-assign-rval-rhs `y = (e, a)` — comma-rval'd struct on the
rhs, exercising %cg-emit-addr-of-any.
Without the helper, cc.scm aborts with
"cg-emit-addr-of: not an lvalue".
cc 177/177, cc-cg 58/58, cc-pp 44/44, cc-libc 17/17 stay green.
Diffstat:
6 files changed, 212 insertions(+), 97 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -3362,6 +3362,40 @@
(%cg-emit-addr-of cg dst 't2)
(%cg-emit-byte-copy cg 't2 't0 't1 sz)))
+;; Struct/union `=` assignment: pop src lval, pop dst lval, memcpy,
+;; then push dst back so the assignment expression has a result for
+;; the surrounding parser to consume (parse-expr-stmt's trailing
+;; cg-pop, etc.). Distinct from cg-copy-struct because the
+;; initializer caller needs no result on the vstack.
+(define (cg-assign-struct cg)
+ (let* ((src (cg-pop cg))
+ (dst (cg-pop cg))
+ (sty (opnd-type dst))
+ (sz (ctype-size sty)))
+ (cond ((not (opnd-lval? dst)) (die #f "cg-assign-struct: dst not lvalue")))
+ ;; A struct rhs may show up as either a frame lvalue (named local
+ ;; slot, *p deref, callee return-slot) or a frame rvalue (anonymous
+ ;; slot from a temp spill). Either way, it sits in a frame slot
+ ;; whose address is the source. Compute addresses directly so we
+ ;; don't have to fight %cg-emit-addr-of's lval-only contract.
+ (%cg-emit-addr-of-any cg src 't0)
+ (%cg-emit-addr-of cg dst 't2)
+ (%cg-emit-byte-copy cg 't2 't0 't1 sz)
+ (cg-push cg dst)))
+
+(define (%cg-emit-addr-of-any cg op reg)
+ (let ((reg-bv (%cg-reg->bv reg)))
+ (pmatch op
+ (($ opnd? (kind frame) (lval? #t) (ext ,off))
+ (guard (%cg-indirect? cg off))
+ (%cg-emit-ld-slot cg reg off))
+ (($ opnd? (kind frame) (ext ,off))
+ (%cg-emit-lea-slot cg reg-bv (%cg-slot-expr cg off)))
+ (($ opnd? (kind global) (ext ,lbl))
+ (%cg-emit-la cg reg lbl))
+ (else (die #f "cg-emit-addr-of-any: unsupported opnd"
+ (opnd-kind op) (opnd-lval? op))))))
+
;; Struct copy: defer to libp1pp memcpy via %memcpy_call. dst-reg and
;; src-reg hold the addresses; size is the byte count. tmp-reg is no
;; longer needed by this helper (kept in the signature so existing
@@ -6707,8 +6741,23 @@
(cg-pop (ps-cg ps))
(parse-expr-bp ps rb) (rval! ps))
((eq? op 'assign)
- (parse-expr-bp ps rb) (rval! ps)
- (cg-assign (ps-cg ps)))
+ ;; Struct/union assignment must memcpy the whole
+ ;; aggregate. The scalar cg-assign path loads/stores
+ ;; via a single 8-byte register, dropping any field at
+ ;; offset >= 8. Detect via the lhs (already on the
+ ;; vstack) and route to cg-assign-struct, which keeps
+ ;; rhs as an lvalue and emits a memcpy.
+ (let* ((lhs-top (cg-top (ps-cg ps)))
+ (lk (cond ((and (opnd? lhs-top) (opnd-lval? lhs-top))
+ (ctype-kind (opnd-type lhs-top)))
+ (else #f))))
+ (cond
+ ((or (eq? lk 'struct) (eq? lk 'union))
+ (parse-expr-bp ps rb)
+ (cg-assign-struct (ps-cg ps)))
+ (else
+ (parse-expr-bp ps rb) (rval! ps)
+ (cg-assign (ps-cg ps))))))
((compound-op op)
(let ((b (compound-op op)))
(cg-dup (ps-cg ps))
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -74,17 +74,19 @@ make test SUITE=tcc-cc
Result:
```text
-14 passed, 162 failed
+148 passed, 30 failed
```
-(176 fixtures total. The fixture set has grown since the previous
-snapshot; the same 14 fixtures still pass. `068-main-noret` was
-retired earlier as part of the host-baseline cleanup;
-`134-decl-define-in-ifdef` was added as a regression for the
-`pps-cond-stack` promotion fix in cc.scm. Newer additions in the
-2xx/3xx ranges exercise lex/preproc/codegen edges; only
-`220-const-promote` from that range currently makes it past compile
-on the cc.scm-built path, and it then exits wrong.)
+(178 fixtures total. The big jump from the previous `14 passed, 162
+failed` snapshot came from one cc.scm fix: cg-assign treated `=` as
+scalar (8-byte load+store) for every type, so any struct/union
+assignment of size > 8 bytes silently dropped fields at offset ≥ 8.
+`SValue` is 64 bytes and `vswap()` does three struct copies, so every
+vswap was a partial no-op and the dominant `vtop[-1].r < VT_CONST`
+cluster all turned green. The fix routes struct/union `=` through a
+new `cg-assign-struct` that emits a memcpy (see
+`tests/cc/333-struct-assign-big.c`, plus `334-struct-assign-rval-rhs.c`
+for the comma-operator rval-of-struct rhs path).)
Raw run log:
@@ -92,7 +94,7 @@ Raw run log:
build/aarch64/.work/tests/tcc-cc/full-run.log
```
-Most `store(...); assert fail: 0` lines are now prefixed with a
+The remaining `assert fail: 0` lines are still prefixed with a
`vfprintf: skipping second: l` line. That's mes-libc's `vfprintf`
warning that it ignored a second `l` length-modifier in tcc's
`%lld` format strings (vendor/mes-libc/stdio/vfprintf.c:89). The
@@ -100,40 +102,17 @@ warning is benign noise from the cc.scm-built tcc-boot2's runtime
libc — not from the failing fixture itself — and it appears in a
fixture's tcc.log because we capture tcc-boot2's stderr there.
-Passing fixtures:
-
-```text
-000-empty-main
-000-return-argc
-001-return-argc
-002-add-const
-003-local-assign
-004-if-else
-005-while-break
-012-comparison
-014-mul-paren
-018-sext-narrow
-026-sizeof-expr
-049-init-scalar-global
-072-enum-const
-134-decl-define-in-ifdef
-```
-
Failure groups from per-fixture `tcc.log` files:
| group | count | examples |
|------:|------:|----------|
-| plain segfault during compile/link | 73 | `006-call-no-args`, `008-pointer-deref`, `011-struct`, `054-init-struct-desig`, `310-tag-shadow-inner-scope` |
-| `store(...); assert fail: 0`, then segfault | 54 | `002-arith`, `004-inc-dec`, `020-switch`, `133-for-continue` |
-| `assert fail: vtop[-1].r < VT_CONST && vtop[0].r < VT_CONST`, then segfault | 30 | `007-call-with-args`, `013-call`, `015-variadic`, `024-globals`, `076-vararg-recv`, `240-parse-const-shortcircuit`, `290-parse-const-ternary-shortcircuit` |
-| compile succeeds, generated program exits wrong | 3 | `019-zext-narrow`, `101-char-escapes`, `220-const-promote` |
-| `too many field init` diagnostic | 1 | `001-kitchen-sink` |
-| `field not found` diagnostic | 1 | `331-fs-compound-struct-addr` |
-
-The shape is unchanged: 159 of 162 failures happen before the
-generated fixture binary runs. The dominant problem is still the
-compiled `tcc-boot2` while it is compiling/linking C input, not the
-runtime behavior of most generated test binaries.
+| `assert fail: 0`, then segfault | 14 | `001-kitchen-sink`, `003-compound`, `013-call`, `019-static`, `027-void-call`, `071-fnptr-call`, `082-union-basic`, `117-compound-literal`, `118-const-expr`, `127-string-escapes`, `129-extern-libp1pp`, `131-vararg-mixed`, `200-lex-char-type`, `250-stringize-punct` |
+| `tcc: error: undefined symbol 'memmove'` | 10 | `084-struct-assign`, `109-typedef-anon`, `111-struct-ret-1word` … `116-struct-ret-vararg`, `333-struct-assign-big`, `334-struct-assign-rval-rhs` |
+| `tcc: error: undefined symbol 'memset'` | 5 | `032-local-struct-desig`, `096-fwd-struct`, `099-init-zero-tail`, `108-typedef-fnptr`, `125-anon-union` |
+| compile succeeds, generated program exits wrong | 1 | `220-const-promote` |
+
+29 of 30 failures still happen before the generated fixture binary
+runs.
One failure is not cc.scm miscompilation — it reproduces on the
gcc-built control (see Host Baseline below):
@@ -142,58 +121,43 @@ gcc-built control (see Host Baseline below):
This is an upstream tcc bug and would need a `simple-patches/`
patch to fix. It caps the achievable cc.scm-built result at
-`175 passed, 1 failed` until tcc itself is patched.
-
-The const-expression short-circuit pair (`240-parse-const-shortcircuit`
-and `290-parse-const-ternary-shortcircuit`) used to also be capped by
-upstream tcc — `gen_opic` raised `division by zero in constant` from
-`1 || (1/0)` and `1 ? 7 : 1/0` because `expr_land`/`expr_lor`/
-`expr_cond` bumped `nocode_wanted` around the unevaluated arm but
-left `const_wanted` set, so const-folding still aborted. The
-`const-divzero-shortcircuit-int` simple-patch in
-`scripts/simple-patches/tcc-0.9.26/` gates that error on
-`!nocode_wanted`. Both fixtures now pass on the gcc-built control;
-on the cc.scm-built path they shifted into the dominant
-`vtop[-1].r < VT_CONST` cluster (cc.scm-side miscompile), which is
-why this fix didn't move the cc.scm-built pass count.
-
-Two diagnostics are new on this snapshot and look like targeted
-miscompiles rather than missing categories:
-
-- `001-kitchen-sink:29: too many field init` — this fixture parses
- on the gcc-built tcc, so cc.scm-built tcc has a corrupted
- field-init counter on this aggregate shape.
-- `331-fs-compound-struct-addr:3: field not found: x` — file-scope
- compound literal (`&(struct point){3,4}`) loses its struct type
- on the cc.scm-built path; the gcc-built control accepts it.
-
-Working hypothesis: our compiler is miscompiling tcc itself. In this
-suite, `tcc-boot2` is a tcc binary produced by `cc.scm`; the failures
-look like that produced tcc is executing bad compiler/codegen logic and
-therefore emitting bad code, asserting, or crashing while compiling the
-fixtures. The host baseline below rules out the fixtures and expected
-files as the main source of the failures.
+`177 passed, 1 failed` until tcc itself is patched.
+
+The `memmove`/`memset` cluster is a libc gap, not a cc.scm bug:
+tcc emits calls to `memmove`/`memset` for struct copies and bulk
+zero-init that exceed its inline thresholds, and the linked
+mes-libc TU does not export those symbols. Adding them to the
+flattened libc would clear all 15 fixtures in one shot.
+
+Working hypothesis for the remaining `assert fail: 0` cluster: our
+compiler is still miscompiling tcc itself in narrower spots. In this
+suite, `tcc-boot2` is a tcc binary produced by `cc.scm`; the
+remaining failures look like that produced tcc executing bad
+compiler/codegen logic and therefore asserting while compiling
+fixtures. The host baseline below rules out the fixtures and
+expected files.
A stronger control is to compile the same ARM64 `tcc.flat.c` with
Alpine gcc and use that gcc-built tcc to run the same `tests/cc`
fixtures:
```text
-gcc-built ARM64 tcc.flat.c (libc + tcc hdrs): 175 passed, 1 failed
-cc.scm-built ARM64 tcc-boot2: 14 passed, 162 failed
+gcc-built ARM64 tcc.flat.c (libc + tcc hdrs): 177 passed, 1 failed
+cc.scm-built ARM64 tcc-boot2: 148 passed, 30 failed
```
The gcc-built control's only remaining failure (`200-lex-char-type`)
is an upstream tcc bug, not cc.scm miscompilation, and it caps the
-achievable cc.scm result at 175/176 until tcc itself is patched.
-Aside from those, the gcc-built control is green: it links the
+achievable cc.scm result at 177/178 until tcc itself is patched.
+Aside from that, the gcc-built control is green: it links the
flattened tcc against `libc.flat.c` + libtcc1 + a tiny mes-libc
string runtime, and passes `-I tcc/include` so the bundled
`<stdarg.h>` resolves under `-nostdlib`. Run with
`scripts/run-gcc-libc-flat-tcc.sh`. This proves the fixtures and the
-flattened tcc source are coherent end-to-end, so the remaining 159
-cc.scm-only failures are evidence that our compiler is miscompiling
-tcc.
+flattened tcc source are coherent end-to-end, so the remaining 29
+cc.scm-only failures are evidence that our compiler is still
+miscompiling tcc in some places (or, for the `mem*` cluster, that
+our libc is missing symbols tcc emits).
## Host Baseline
@@ -226,7 +190,7 @@ Current result:
```text
tcc version 0.9.26 (AArch64 Linux)
-175 passed, 1 failed
+177 passed, 1 failed
```
The only remaining failure (`200-lex-char-type`) is an upstream tcc
@@ -272,22 +236,21 @@ NAMES='125-anon-union 132-tentative-bss-sizing' \
## Next Debug Targets
-Start with the earliest minimal failures in each dominant group:
-
-- `002-arith`: first `store(...); assert fail: 0` case. This is a
- small arithmetic fixture and is likely the highest-leverage entry
- point into the ARM64 tcc codegen path compiled by `cc.scm`.
-- `007-call-with-args`: first clear `vtop[-1].r < VT_CONST` assertion
- on an ordinary call with arguments.
-- `006-call-no-args`: first plain segfault with a very small source.
-- `001-kitchen-sink`: only remaining `too many field init` case;
- good lens on aggregate-init counter miscompile.
-- `331-fs-compound-struct-addr`: only remaining `field not found`
- case; lens on file-scope compound-literal type tracking.
-- `019-zext-narrow`, `101-char-escapes`, `220-const-promote`: the
- three current failures where `tcc-boot2` successfully emits and
- links a binary but the binary returns the wrong status — closest
- to "isolated codegen miscompile."
+Start with the earliest minimal failures in each remaining group:
+
+- `003-compound`: small fixture in the `assert fail: 0` cluster;
+ good entry point for finding which tcc function is still being
+ miscompiled.
+- `013-call`, `027-void-call`, `071-fnptr-call`: short call-site
+ failures — likely a different code path than the struct-copy fix
+ that cleared the vtop cluster.
+- `220-const-promote`: only remaining "compile succeeds, exits wrong"
+ case — closest to "isolated codegen miscompile."
+- `mem*` cluster (`032-local-struct-desig`, `084-struct-assign`,
+ `111-struct-ret-1word`, …): not a cc.scm bug — tcc emits calls
+ to `memset`/`memmove` for struct init/copy past inline thresholds,
+ and the linked mes-libc TU does not export those symbols. Adding
+ them clears all 15 fixtures in one shot.
Keep using `make test SUITE=cc ARCH=aarch64 NAMES=...` as the control
path for fixture semantics, and `make test SUITE=tcc-cc NAMES=...` as
diff --git a/tests/cc/333-struct-assign-big.c b/tests/cc/333-struct-assign-big.c
@@ -0,0 +1,70 @@
+/* Struct/union assignment must memcpy the whole aggregate, not just
+ * the first 8 bytes. cg-assign's scalar path emitted one 8-byte
+ * load + store, dropping every field at offset >= 8 — which silently
+ * broke vswap() in tcc.flat.c (SValue is 64 bytes), surfacing as the
+ * vtop[-1].r < VT_CONST assertion in arm64-gen.c during tcc-cc.
+ *
+ * Exercises:
+ * - direct struct lvalue = lvalue assignment
+ * - assignment through pointer indices (vtop[0] = vtop[-1] shape)
+ * - tmp = src; src = dst; dst = tmp three-way swap
+ * - fields at offsets 0, 4, 8, 16+ — including a sub-word field
+ * past offset 8 (mirrors SValue.r at offset 16)
+ */
+
+struct Big {
+ int a;
+ int b;
+ unsigned short r;
+ unsigned short r2;
+ long c[2];
+ void *sym;
+};
+
+static struct Big buf[4];
+
+static void swap2(struct Big *p) {
+ struct Big tmp;
+ tmp = p[0];
+ p[0] = p[-1];
+ p[-1] = tmp;
+}
+
+int main(int argc, char **argv) {
+ /* Direct assignment between two named struct lvalues. */
+ struct Big x;
+ struct Big y;
+ x.a = 0xA1; x.b = 0xA2; x.r = 0xA3; x.r2 = 0xA4;
+ x.c[0] = 0xA5; x.c[1] = 0xA6; x.sym = (void *)0xA7;
+ y = x;
+ if (y.a != 0xA1) return 1;
+ if (y.b != 0xA2) return 2;
+ if (y.r != 0xA3) return 3;
+ if (y.r2 != 0xA4) return 4;
+ if (y.c[0] != 0xA5) return 5;
+ if (y.c[1] != 0xA6) return 6;
+ if (y.sym != (void *)0xA7) return 7;
+
+ /* Pointer-indexed struct swap (tcc's vswap pattern). */
+ buf[0].a = 0x01; buf[0].b = 0x02; buf[0].r = 0x03; buf[0].r2 = 0x04;
+ buf[0].c[0] = 0x05; buf[0].c[1] = 0x06; buf[0].sym = (void *)0x07;
+ buf[1].a = 0x11; buf[1].b = 0x12; buf[1].r = 0x13; buf[1].r2 = 0x14;
+ buf[1].c[0] = 0x15; buf[1].c[1] = 0x16; buf[1].sym = (void *)0x17;
+ swap2(&buf[1]);
+ if (buf[0].a != 0x11) return 11;
+ if (buf[0].b != 0x12) return 12;
+ if (buf[0].r != 0x13) return 13;
+ if (buf[0].r2 != 0x14) return 14;
+ if (buf[0].c[0] != 0x15) return 15;
+ if (buf[0].c[1] != 0x16) return 16;
+ if (buf[0].sym != (void *)0x17) return 17;
+ if (buf[1].a != 0x01) return 21;
+ if (buf[1].b != 0x02) return 22;
+ if (buf[1].r != 0x03) return 23;
+ if (buf[1].r2 != 0x04) return 24;
+ if (buf[1].c[0] != 0x05) return 25;
+ if (buf[1].c[1] != 0x06) return 26;
+ if (buf[1].sym != (void *)0x07) return 27;
+
+ return 0;
+}
diff --git a/tests/cc/333-struct-assign-big.expected-exit b/tests/cc/333-struct-assign-big.expected-exit
@@ -0,0 +1 @@
+0
diff --git a/tests/cc/334-struct-assign-rval-rhs.c b/tests/cc/334-struct-assign-rval-rhs.c
@@ -0,0 +1,31 @@
+/* Regression: struct `=` rhs may be a struct *rvalue* (frame opnd
+ * with lval? = #f), not just an lvalue. The comma operator forces
+ * rval! on its right-hand subexpression, so `y = (e, s);` lands a
+ * struct rvalue on the vstack as the rhs of cg-assign-struct.
+ *
+ * Before the addr-of-any fix, cg-assign-struct used %cg-emit-addr-of
+ * which dies on any non-lvalue, so cc.scm aborted with
+ * "cg-emit-addr-of: not an lvalue" before emitting any code. The
+ * fix routes the src through %cg-emit-addr-of-any, which computes
+ * the address of a frame slot regardless of the lval flag.
+ *
+ * Struct kept ≤ 8 bytes so the underlying cg-load-on-struct
+ * truncation (separate bug) doesn't mask the addr-of regression
+ * with a wrong-bytes failure.
+ */
+
+struct Pair { int a; int b; };
+
+static int side_effect(int x) { return x; }
+
+int main(int argc, char **argv) {
+ struct Pair a;
+ struct Pair y;
+ a.a = 11; a.b = 22;
+ /* Comma operator: lhs `side_effect(0)` is discarded, rhs `a`
+ * is rval!'d into a struct rvalue, then assigned to `y`. */
+ y = (side_effect(0), a);
+ if (y.a != 11) return 1;
+ if (y.b != 22) return 2;
+ return 0;
+}
diff --git a/tests/cc/334-struct-assign-rval-rhs.expected-exit b/tests/cc/334-struct-assign-rval-rhs.expected-exit
@@ -0,0 +1 @@
+0