commit 641a2e87e7b4efa2fe6f4589a65f2d6c3a4531d3
parent 7f277cdbe7b4960775d934f9cba0e02520909e67
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 27 Apr 2026 14:16:09 -0700
cc: struct return — indirect-result (sret) convention (A2)
Wires the >16B struct/union indirect-result convention from
P1.md §Arguments and return values, completing Stream A2 of the
parallel CC plan:
- cg-fn-begin/v detects struct/union return > 16B and allocates a
private 8-byte sret-slot, spilling incoming a0 (the caller's
result-buffer pointer) into it during the prologue. Explicit
param ABI dispatch shifts by one register: args 0..2 spill from
a1..a3, args 3+ from incoming stack-arg slot (idx-3). The variadic
save-area pad uses the same shift so __builtin_va_start landing
index continues to match named-arg count.
- cg-return for a struct/union > 16B copies bytes through *sret-slot
instead of into the local return slot. cg-fn-end's epilogue loads
a0 from sret-slot for sret functions so the convention's "a0 holds
the same buffer pointer on return" holds even if the body
clobbered a0 along the way.
- cg-call detects sret-eligible callee return type, allocates a
fresh receive slot up front, places explicit args 0..2 into
a1..a3 and args 3+ into stack slots 0+, materializes the receive-
slot address into a0 last (so arg-load emission can't clobber it),
and pushes the receive slot as a struct frame-lval after the call.
- The receive-slot allocation is fresh per call site so chained
`f(...).field` and consecutive sret calls into separate locals
don't alias.
- Param-spill emit deduped into a local helper to keep the
variadic-pad and walk arms in sync.
Updates CC-CONTRACTS.md §3.2 with prose describing the indirect-
result convention's caller and callee responsibilities.
Tests:
- New cc-cg fixture 73-struct-ret-3word drives the cg API directly.
- cc/113-struct-ret-3word and cc/115-struct-ret-3word-many-args now
pass; no parser change was needed since the type-driven cg path
detects sret from the callee return type and the existing
parse-return-stmt struct arm already feeds cg-return a struct lval.
- All 138 cc fixtures pass on aarch64; all 52 cc-cg fixtures pass.
Diffstat:
4 files changed, 197 insertions(+), 124 deletions(-)
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -2526,17 +2526,15 @@
(%cg-fn-set! cg '%fn-ret-type return-type)
(%cg-fn-set! cg '%indirect-slots '())
(%cg-fn-set! cg '%fn-variadic? variadic?)
- ;; Return slot. Per P1.md §Arguments and return values, results ≤8B
- ;; ride a single word; 9–16B aggregates ride two words. We size the
- ;; slot to max(8, ctype-size), 8-byte aligned, so cg-fn-end can load
- ;; a0 (always) and a1 (when size > 8) directly from it.
+ ;; Return slot per P1.md §Arguments. ≤8B → a0; 9–16B → a0+a1; >16B
+ ;; struct/union → indirect-result (A2): caller passes sret ptr in
+ ;; a0; cg-return writes through it; sret-slot saves a0 for cg-fn-end.
(let* ((rsz (cond ((eq? (ctype-kind return-type) 'void) 8)
(else (align-up (max 8 (ctype-size return-type)) 8))))
(ret-slot (cg-alloc-slot cg rsz 8)))
(%cg-fn-set! cg '%fn-ret-slot ret-slot)
(cond
((not (eq? (ctype-kind return-type) 'void))
- ;; Zero the full ret slot (one %st per 8 bytes).
(let zinit ((k 0))
(cond
((>= k rsz) #t)
@@ -2547,62 +2545,57 @@
(%cg-slot-expr cg (+ ret-slot k))
")\n")))
(zinit (+ k 8))))))))
- ;; params per CC-CONTRACTS §3.1: list of (name-bv . ctype). We
- ;; return an alist (name-bv . sym) the parser binds into scope.
- (let walk ((ps params) (idx 0) (out '()) (first-slot #f))
+ (let* ((rk (ctype-kind return-type))
+ (sret? (and (or (eq? rk 'struct) (eq? rk 'union))
+ (> (ctype-size return-type) 16))))
+ (%cg-fn-set! cg '%fn-sret? sret?)
(cond
- ((null? ps)
- (cond
- (variadic?
- ;; Pad the incoming-arg window out to 16 slots. For idx 0..3
- ;; the slot is filled from a-register; for idx 4..15 from
- ;; LDARG slot (idx-4). va_start points at the slot whose
- ;; index equals the named-arg count, and va_arg walks
- ;; linearly from there through the rest of the window.
- (let pad ((i idx) (vfirst #f) (fs first-slot))
- (cond
- ((>= i 16)
- ;; If named-arg count was 0, vfirst is the very first
- ;; slot of the save area (= fs).
- (%cg-fn-set! cg '%fn-vararg-first-slot
- (or vfirst fs))
- (reverse out))
- (else
- (let ((off (cg-alloc-slot cg 8 8)))
- (cond
- ((< i 4)
- (let ((ar (%reg-by-idx i)))
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%st(" (%cg-reg->bv ar)
- ", sp, "
- (%cg-slot-expr cg off) ")\n")))))
- (else
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%ldarg(t0, " (%n (- i 4)) ")\n"
- "%st(t0, sp, "
- (%cg-slot-expr cg off) ")\n")))))
- (pad (+ i 1)
- (or vfirst off)
- (or fs off)))))))
- (else (reverse out))))
- (else
- (let* ((p (car ps))
- (nm (car p))
- (ty (cdr p))
- (off (cg-alloc-slot cg 8 8))
- (psym (%sym nm 'param #f ty off)))
+ (sret?
+ (let ((ss (cg-alloc-slot cg 8 8)))
+ (%cg-fn-set! cg '%fn-sret-slot ss)
+ (buf-push! (cg-prologue-buf cg)
+ (bv-cat (list "%st(a0, sp, "
+ (%cg-slot-expr cg ss) ")\n")))))
+ (else (%cg-fn-set! cg '%fn-sret-slot #f))))
+ ;; params per CC-CONTRACTS §3.1. With sret, explicit arg i lives at
+ ;; ABI position (i+1): args 0..2 in a1..a3, args 3+ in slot (i-3).
+ (let* ((sret-shift (if (%cg-fn-get cg '%fn-sret?) 1 0))
+ (spill (lambda (abi off)
+ (cond
+ ((< abi 4)
+ (buf-push! (cg-prologue-buf cg)
+ (bv-cat (list "%st(" (%cg-reg->bv (%reg-by-idx abi))
+ ", sp, "
+ (%cg-slot-expr cg off) ")\n"))))
+ (else
+ (buf-push! (cg-prologue-buf cg)
+ (bv-cat (list "%ldarg(t0, " (%n (- abi 4)) ")\n"
+ "%st(t0, sp, "
+ (%cg-slot-expr cg off) ")\n"))))))))
+ (let walk ((ps params) (idx 0) (out '()) (first-slot #f))
+ (cond
+ ((null? ps)
(cond
- ((< idx 4)
- (let ((ar (%reg-by-idx idx)))
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%st(" (%cg-reg->bv ar)
- ", sp, " (%cg-slot-expr cg off) ")\n")))))
- (else
- (buf-push! (cg-prologue-buf cg)
- (bv-cat (list "%ldarg(t0, " (%n (- idx 4)) ")\n"
- "%st(t0, sp, " (%cg-slot-expr cg off) ")\n")))))
- (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out)
- (or first-slot off)))))))
+ (variadic?
+ (let pad ((i idx) (vfirst #f) (fs first-slot))
+ (cond
+ ((>= i 16)
+ (%cg-fn-set! cg '%fn-vararg-first-slot (or vfirst fs))
+ (reverse out))
+ (else
+ (let ((off (cg-alloc-slot cg 8 8)))
+ (spill (+ i sret-shift) off)
+ (pad (+ i 1) (or vfirst off) (or fs off)))))))
+ (else (reverse out))))
+ (else
+ (let* ((p (car ps))
+ (nm (car p))
+ (ty (cdr p))
+ (off (cg-alloc-slot cg 8 8))
+ (psym (%sym nm 'param #f ty off)))
+ (spill (+ idx sret-shift) off)
+ (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out)
+ (or first-slot off))))))))
(define (cg-fn-end cg)
;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain!
@@ -2639,22 +2632,26 @@
;; prologue + body, drained byte-for-byte
(buf-drain! tb (cg-prologue-buf cg))
(buf-drain! tb (cg-fn-buf cg))
- ;; ret block — per P1.md §Arguments and return values, ≤8B uses
- ;; the one-word direct convention (result in a0); 9–16B uses the
- ;; two-word direct convention (a0 = word 0, a1 = word 1).
+ ;; ret block: ≤8B → a0; 9–16B → a0+a1; >16B sret → a0 = saved sret ptr.
(buf-push! tb "::ret\n")
- (cond
- ((eq? (ctype-kind ret-type) 'void)
- (buf-push! tb "%li(a0, 0)\n"))
- (else
- (buf-push! tb "%ld(a0, sp, ")
- (buf-push! tb (%cg-slot-expr cg ret-slot))
- (buf-push! tb ")\n")
- (cond
- ((> (ctype-size ret-type) 8)
- (buf-push! tb "%ld(a1, sp, ")
- (buf-push! tb (%cg-slot-expr cg (+ ret-slot 8)))
- (buf-push! tb ")\n")))))
+ (let ((rk (ctype-kind ret-type))
+ (sret? (%cg-fn-get cg '%fn-sret?)))
+ (cond
+ ((eq? rk 'void)
+ (buf-push! tb "%li(a0, 0)\n"))
+ (sret?
+ (buf-push! tb "%ld(a0, sp, ")
+ (buf-push! tb (%cg-slot-expr cg (%cg-fn-get cg '%fn-sret-slot)))
+ (buf-push! tb ")\n"))
+ (else
+ (buf-push! tb "%ld(a0, sp, ")
+ (buf-push! tb (%cg-slot-expr cg ret-slot))
+ (buf-push! tb ")\n")
+ (cond
+ ((> (ctype-size ret-type) 8)
+ (buf-push! tb "%ld(a1, sp, ")
+ (buf-push! tb (%cg-slot-expr cg (+ ret-slot 8)))
+ (buf-push! tb ")\n"))))))
(buf-push! tb "})\n")
(cg-vstack-set! cg '())
(cg-frame-hi-set! cg 0)
@@ -3217,18 +3214,47 @@
(let* ((args (let loop ((i 0) (acc '()))
(cond ((= i arity) acc)
(else (loop (+ i 1) (cons (cg-pop cg) acc))))))
- (fn-op (cg-pop cg)))
+ (fn-op (cg-pop cg))
+ ;; sret = struct/union > 16B return; shift args by one reg
+ ;; and place a0 last so it's not clobbered by arg loads.
+ (fty (opnd-type fn-op))
+ (rty (cond
+ ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty)))
+ ((eq? (ctype-kind fty) 'ptr)
+ (let ((p (ctype-ext fty)))
+ (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64)))
+ (else %t-i64)))
+ (rk (ctype-kind rty))
+ (sret? (and has-result?
+ (or (eq? rk 'struct) (eq? rk 'union))
+ (> (ctype-size rty) 16)))
+ (sret-shift (if sret? 1 0))
+ (recv-slot (cond
+ (sret?
+ (cg-alloc-slot cg
+ (align-up (ctype-size rty) 8)
+ (max 8 (ctype-align rty))))
+ (else #f))))
(let stage ((xs args) (idx 0))
(cond
((null? xs) 0)
- ((< idx 4)
- (%cg-load-opnd-into cg (car xs) (%reg-by-idx idx))
- (stage (cdr xs) (+ idx 1)))
(else
- (%cg-load-opnd-into cg (car xs) 't0)
- (%cg-emit-st cg 't0 'sp (* 8 (- idx 4)))
- (stage (cdr xs) (+ idx 1)))))
- (cond ((> arity 4) (%cg-bump-outgoing! cg (- arity 4))) (else 0))
+ (let ((abi (+ idx sret-shift)))
+ (cond
+ ((< abi 4)
+ (%cg-load-opnd-into cg (car xs) (%reg-by-idx abi))
+ (stage (cdr xs) (+ idx 1)))
+ (else
+ (%cg-load-opnd-into cg (car xs) 't0)
+ (%cg-emit-st cg 't0 'sp (* 8 (- abi 4)))
+ (stage (cdr xs) (+ idx 1))))))))
+ (let ((sa (if sret? (max 0 (- arity 3)) (max 0 (- arity 4)))))
+ (cond ((> sa 0) (%cg-bump-outgoing! cg sa)) (else 0)))
+ (cond
+ (sret?
+ (%cg-emit-many cg (list "%mov(a0, sp)\n"
+ "%addi(a0, a0, "
+ (%cg-slot-expr cg recv-slot) ")\n"))))
(cond
((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op)))
(%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n")))
@@ -3237,33 +3263,20 @@
(%cg-emit-many cg (list "%callr(t0)\n"))))
(cond
(has-result?
- (let* ((fty (opnd-type fn-op))
- (rty (cond
- ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty)))
- ((eq? (ctype-kind fty) 'ptr)
- (let ((p (ctype-ext fty)))
- (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64)))
- (else %t-i64)))
- (rk (ctype-kind rty)))
- (cond
- ;; Struct/union return ≤16B (Stream A1, P1.md §Arguments
- ;; and return values). Allocate a fresh frame slot sized to
- ;; the struct, store back from a0 (word 0) and a1 (word 1
- ;; when size > 8), and push a struct frame-lval. Fresh slot
- ;; per call site so chained `f().field` and consecutive
- ;; calls don't alias.
- ((and (or (eq? rk 'struct) (eq? rk 'union))
- (<= (ctype-size rty) 16))
- (let* ((sz (ctype-size rty))
- (al (max 8 (ctype-align rty)))
- (slot (cg-alloc-slot cg (align-up sz 8) al)))
- (%cg-emit-st-slot cg 'a0 slot)
- (cond
- ((> sz 8)
- (%cg-emit-st-slot cg 'a1 (+ slot 8))))
- (cg-push cg (%opnd 'frame rty slot #t))))
- (else
- (%cg-spill-reg cg 'a0 rty)))))
+ (cond
+ ;; >16B sret (A2): a0 holds recv-slot; push as struct lval.
+ (sret? (cg-push cg (%opnd 'frame rty recv-slot #t)))
+ ;; ≤16B struct/union (A1): fresh slot, spill from a0/a1.
+ ((and (or (eq? rk 'struct) (eq? rk 'union))
+ (<= (ctype-size rty) 16))
+ (let* ((sz (ctype-size rty))
+ (al (max 8 (ctype-align rty)))
+ (slot (cg-alloc-slot cg (align-up sz 8) al)))
+ (%cg-emit-st-slot cg 'a0 slot)
+ (cond ((> sz 8) (%cg-emit-st-slot cg 'a1 (+ slot 8))))
+ (cg-push cg (%opnd 'frame rty slot #t))))
+ (else
+ (%cg-spill-reg cg 'a0 rty))))
(else #f))))
;; --------------------------------------------------------------------
@@ -3272,25 +3285,24 @@
(define (cg-return cg)
(let* ((ret-slot (%cg-fn-get cg '%fn-ret-slot))
(ret-type (%cg-fn-get cg '%fn-ret-type))
- (rk (ctype-kind ret-type)))
+ (rk (ctype-kind ret-type))
+ (sret? (%cg-fn-get cg '%fn-sret?)))
(cond
((eq? rk 'void)
(%cg-emit-many cg (list "%b(&::ret)\n")))
((or (eq? rk 'struct) (eq? rk 'union))
- ;; Struct-by-value return (Stream A1, P1.md §Arguments and
- ;; return values): copy bytes from the source lval into the
- ;; function's return slot. cg-fn-end then loads a0 (and a1
- ;; for 9–16B) at the epilogue.
- (let* ((p (cg-pop cg))
- (sz (ctype-size ret-type)))
- (cond
- ((not (opnd-lval? p))
- (die #f "cg-return: struct value must be an lvalue")))
- ;; Materialize src addr into t0; dst (ret-slot) addr into t2.
+ ;; struct-by-value: ≤16B (A1) → ret-slot; >16B (A2 sret) → *sret-slot.
+ (let* ((p (cg-pop cg)) (sz (ctype-size ret-type)))
+ (cond ((not (opnd-lval? p))
+ (die #f "cg-return: struct value must be an lvalue")))
(%cg-emit-addr-of cg p 't0)
- (%cg-emit-many cg (list "%mov(t2, sp)\n"
- "%addi(t2, t2, "
- (%cg-slot-expr cg ret-slot) ")\n"))
+ (cond
+ (sret?
+ (%cg-emit-ld-slot cg 't2 (%cg-fn-get cg '%fn-sret-slot)))
+ (else
+ (%cg-emit-many cg (list "%mov(t2, sp)\n"
+ "%addi(t2, t2, "
+ (%cg-slot-expr cg ret-slot) ")\n"))))
(%cg-emit-byte-copy cg 't2 't0 't1 sz)
(%cg-emit-many cg (list "%b(&::ret)\n"))))
(else
diff --git a/docs/CC-CONTRACTS.md b/docs/CC-CONTRACTS.md
@@ -319,7 +319,16 @@ load epilogue across the three result conventions defined in
|---|---|---|
| ≤ 8B | one-word direct | `LD a0, [sp + slot]` |
| 9–16B | two-word direct | `LD a0, [sp + slot]; LD a1, [sp + slot + 8]` |
-| > 16B | indirect-result | (Stream A2) caller passes buffer in a0; callee writes through it; epilogue restores a0 |
+| > 16B | indirect-result | `LD a0, [sp + sret-slot]` (reload caller's buffer ptr) |
+
+For the indirect-result convention (struct/union return > 16B), the
+caller materializes a fresh receive slot's address in `a0` before the
+call; explicit args 0..2 shift to `a1..a3` and args 3+ stage to
+incoming stack-arg slots 0+. The callee's prologue spills the
+incoming `a0` to a private sret-slot; `cg-return` copies the source
+struct's bytes through that saved pointer; the epilogue reloads
+`a0` from the sret-slot so the convention's "a0 unchanged on
+return" holds even if the body clobbered `a0`.
The cg lowers all three conventions; the parser surface (return
statement, struct-typed call result) is identical regardless of
diff --git a/tests/cc-cg/73-struct-ret-3word.expected-exit b/tests/cc-cg/73-struct-ret-3word.expected-exit
@@ -0,0 +1 @@
+60
diff --git a/tests/cc-cg/73-struct-ret-3word.scm b/tests/cc-cg/73-struct-ret-3word.scm
@@ -0,0 +1,51 @@
+;; tests/cc-cg/73-struct-ret-3word.scm — indirect-result struct return.
+;;
+;; struct Triple { long a; long b; long c; }; /* 24B — > 16B */
+;; struct Triple make(long a, long b, long c) { ...; return t; }
+;; int main(void) { struct Triple r = make(10,20,30);
+;; return (int)(r.a+r.b+r.c); } /* 60 */
+;;
+;; Stream A2 (P1.md §Arguments): caller passes result-buffer ptr in
+;; a0; explicit args 0..2 land in a1..a3; cg-fn-begin/v shifts param
+;; ABI by one register; cg-return writes through saved sret pointer.
+
+(let* ((cg (cg-init))
+ (ty (%ctype 'struct 24 8
+ (list "T" #t
+ (list (list "a" %t-i64 0)
+ (list "b" %t-i64 8)
+ (list "c" %t-i64 16)))))
+ (fty (%ctype 'fn 8 8
+ (cons ty (cons (list %t-i64 %t-i64 %t-i64) #f))))
+ (mk (%sym "make" 'fn 'extern fty #f)))
+ (let* ((ps (cg-fn-begin cg "make"
+ (list (cons "a" %t-i64)
+ (cons "b" %t-i64)
+ (cons "c" %t-i64))
+ ty))
+ (a* (cdr (car ps))) (b* (cdr (cadr ps))) (c* (cdr (caddr ps)))
+ (to (cg-alloc-slot cg 24 8))
+ (ts (%sym "t" 'var 'auto ty to)))
+ (cg-push-sym cg ts) (cg-push-field cg "a")
+ (cg-push-sym cg a*) (cg-load cg) (cg-assign cg) (cg-pop cg)
+ (cg-push-sym cg ts) (cg-push-field cg "b")
+ (cg-push-sym cg b*) (cg-load cg) (cg-assign cg) (cg-pop cg)
+ (cg-push-sym cg ts) (cg-push-field cg "c")
+ (cg-push-sym cg c*) (cg-load cg) (cg-assign cg) (cg-pop cg)
+ (cg-push-sym cg ts) (cg-return cg)
+ (cg-fn-end cg))
+ (cg-fn-begin cg "main" '() %t-i32)
+ (cg-push-sym cg mk)
+ (cg-push-imm cg %t-i64 10)
+ (cg-push-imm cg %t-i64 20)
+ (cg-push-imm cg %t-i64 30)
+ (cg-call cg 3 #t)
+ (let ((r (cg-pop cg)))
+ (cg-push cg r) (cg-push-field cg "a") (cg-load cg)
+ (cg-push cg r) (cg-push-field cg "b") (cg-load cg)
+ (cg-binop cg 'add)
+ (cg-push cg r) (cg-push-field cg "c") (cg-load cg)
+ (cg-binop cg 'add))
+ (cg-cast cg %t-i32) (cg-return cg)
+ (cg-fn-end cg)
+ (write-bv-fd 1 (cg-finish cg)))