boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit a73a78dfa060ba0a69f92bf8e1fd941ac8dc61dd
parent 8e2de46571802726e1a2f0c3d9d7e6d422bb53be
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 23:50:08 -0700

cc: split aggregate args across two ABI slots on call + receive

cc.scm was treating every parameter as a single 8-byte ABI slot on
both the call and receive sides. AAPCS hands a 9..16-byte aggregate
in two consecutive arg positions (regs or stack words); cc.scm dropped
the second word, so any callee with a struct-by-value param wider than
8 bytes saw its second half clobbered by the next argument or by
uninitialized stack.

Surfaced earlier while debugging the tcc-tcc self-host: tcc.flat.c's
expr_cond hands CType structs by value through helpers like
gen_op / gen_cast. tcc-boot2 happened not to exercise the broken
path on its own (locals only, not params), so it self-hosted, but
any user code with struct-by-value params miscompiled silently.

Receive side (cg-fn-begin/v): allocate a slot sized to the aggregate,
spill ⌈size/8⌉ consecutive ABI positions into successive 8-byte chunks,
and advance idx by that count.

Call side (cg-call): when the arg is an aggregate >8B, stage its
address once and chunk-load 8 bytes at a time into successive arg
regs / stack words; account for the extra ABI slot in the
outgoing-stack-arg footprint.

>16B aggregates would normally pass by reference per AAPCS — left
unimplemented; cc.scm dies cleanly if user code hits it.

Regression locked by tests/cc/337-struct-by-value-arg.

cc 180/0 -> 181/0; tcc-cc 180/1 unchanged at parity with tcc-gcc;
tcc-libc 17/0; all other suites unchanged.

Diffstat:
Mcc/cc.scm | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Atests/cc/337-struct-by-value-arg.c | 40++++++++++++++++++++++++++++++++++++++++
2 files changed, 118 insertions(+), 11 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -3015,12 +3015,44 @@ (let* ((p (car ps)) (nm (car p)) (ty (cdr p)) - (off (cg-alloc-slot cg 8 8)) + ;; AAPCS: 9..16B aggregates ride two consecutive arg + ;; positions (regs or stack slots), wider-than-16B + ;; aggregates would normally pass by reference — not + ;; supported here yet. + (n (%cg-param-reg-count ty)) + (sz (cond ((%cg-param-aggregate? ty) + (align-up (ctype-size ty) 8)) + (else 8))) + (al (cond ((%cg-param-aggregate? ty) + (max 8 (ctype-align ty))) + (else 8))) + (off (cg-alloc-slot cg sz al)) (psym (%sym nm 'param #f ty off #t))) - (spill (+ idx sret-shift) off) - (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out) + (let chunk ((i 0)) + (cond ((>= i n) 0) + (else + (spill (+ idx sret-shift i) (+ off (* i 8))) + (chunk (+ i 1))))) + (walk (cdr ps) (+ idx n) (cons (cons nm psym) out) (or first-slot off)))))))) +;; Number of consecutive ABI slots (regs or stack words) consumed by a +;; parameter of TY. Aggregates ≤16B take ⌈size/8⌉; everything else 1. +(define (%cg-param-reg-count ty) + (cond + ((%cg-param-aggregate? ty) + (let ((sz (ctype-size ty))) + (cond + ((> sz 16) + (die #f "cg: aggregate arg/param >16B not supported" sz)) + ((> sz 8) 2) + (else 1)))) + (else 1))) + +(define (%cg-param-aggregate? ty) + (let ((k (ctype-kind ty))) + (or (eq? k 'struct) (eq? k 'union)))) + (define (cg-fn-end cg) ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain! ;; (memcpy, no allocation). Header/footer pieces go through buf-push! @@ -3779,16 +3811,51 @@ (cond ((null? xs) 0) (else - (let ((abi (+ idx sret-shift))) + (let* ((arg (car xs)) + (aty (opnd-type arg)) + (n (%cg-param-reg-count aty))) (cond - ((< abi 4) - (%cg-load-opnd-into cg (car xs) (%reg-by-idx abi)) - (stage (cdr xs) (+ idx 1))) + ;; Aggregate >8B: load both halves into successive arg + ;; regs / stack slots. Stage the struct's address in t0 + ;; once and chunk-load 8 bytes at a time. + ((and (%cg-param-aggregate? aty) (> n 1)) + (%cg-emit-addr-of cg arg 't0) + (let chunk ((i 0)) + (cond + ((>= i n) 0) + (else + (let ((tabi (+ idx sret-shift i))) + (cond + ((< tabi 4) + (%cg-emit-many cg + (list "%ld(" + (%cg-reg->bv (%reg-by-idx tabi)) + ", t0, " (%n (* i 8)) ")\n"))) + (else + (%cg-emit-many cg + (list "%ld(t1, t0, " + (%n (* i 8)) ")\n")) + (%cg-emit-st cg 't1 'sp (* 8 (- tabi 4)))))) + (chunk (+ i 1))))) + (stage (cdr xs) (+ idx n))) (else - (%cg-load-opnd-into cg (car xs) 't0) - (%cg-emit-st cg 't0 'sp (* 8 (- abi 4))) - (stage (cdr xs) (+ idx 1)))))))) - (let ((sa (if sret? (max 0 (- arity 3)) (max 0 (- arity 4))))) + (let ((abi (+ idx sret-shift))) + (cond + ((< abi 4) + (%cg-load-opnd-into cg arg (%reg-by-idx abi)) + (stage (cdr xs) (+ idx 1))) + (else + (%cg-load-opnd-into cg arg 't0) + (%cg-emit-st cg 't0 'sp (* 8 (- abi 4))) + (stage (cdr xs) (+ idx 1))))))))))) + ;; Stack-arg footprint accounts for the extra ABI slot any + ;; >8B-aggregate arg consumed beyond its single-position cousin. + (let* ((nabi (let count ((xs args) (n sret-shift)) + (cond ((null? xs) n) + (else (count (cdr xs) + (+ n (%cg-param-reg-count + (opnd-type (car xs))))))))) + (sa (max 0 (- nabi 4)))) (cond ((> sa 0) (%cg-bump-outgoing! cg sa)) (else 0))) (cond (sret? diff --git a/tests/cc/337-struct-by-value-arg.c b/tests/cc/337-struct-by-value-arg.c @@ -0,0 +1,40 @@ +/* Struct-by-value parameter passing. aarch64 AAPCS hands a 9..16-byte + * aggregate in two consecutive arg registers (or two stack slots if + * neither fits); cc.scm has to mirror that on both the call and + * receive sides. Until this works, every callee with a wider-than-8B + * struct param sees the second word truncated, which silently + * miscompiles tcc.flat.c's CType-passing helpers and any user code + * with similar shapes. + */ + +struct Pair { long a; long b; }; + +static int probe(struct Pair x, struct Pair y, long ea1, long ea2, + long eb1, long eb2) +{ + if (x.a != ea1) return 1; + if (x.b != ea2) return 2; + if (y.a != eb1) return 3; + if (y.b != eb2) return 4; + return 0; +} + +static int probe_after_int(int prefix, struct Pair p, long ea, long eb) +{ + if (prefix != 99) return 5; + if (p.a != ea) return 6; + if (p.b != eb) return 7; + return 0; +} + +int main(void) +{ + struct Pair a; a.a = 10; a.b = 20; + struct Pair b; b.a = 30; b.b = 40; + + int r; + if ((r = probe(a, b, 10, 20, 30, 40))) return 10 + r; + if ((r = probe_after_int(99, a, 10, 20))) return 30 + r; + + return 0; +}