boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 641a2e87e7b4efa2fe6f4589a65f2d6c3a4531d3
parent 7f277cdbe7b4960775d934f9cba0e02520909e67
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 27 Apr 2026 14:16:09 -0700

cc: struct return — indirect-result (sret) convention (A2)

Wires the >16B struct/union indirect-result convention from
P1.md §Arguments and return values, completing Stream A2 of the
parallel CC plan:

- cg-fn-begin/v detects struct/union return > 16B and allocates a
  private 8-byte sret-slot, spilling incoming a0 (the caller's
  result-buffer pointer) into it during the prologue. Explicit
  param ABI dispatch shifts by one register: args 0..2 spill from
  a1..a3, args 3+ from incoming stack-arg slot (idx-3). The variadic
  save-area pad uses the same shift so __builtin_va_start landing
  index continues to match named-arg count.
- cg-return for a struct/union > 16B copies bytes through *sret-slot
  instead of into the local return slot. cg-fn-end's epilogue loads
  a0 from sret-slot for sret functions so the convention's "a0 holds
  the same buffer pointer on return" holds even if the body
  clobbered a0 along the way.
- cg-call detects sret-eligible callee return type, allocates a
  fresh receive slot up front, places explicit args 0..2 into
  a1..a3 and args 3+ into stack slots 0+, materializes the receive-
  slot address into a0 last (so arg-load emission can't clobber it),
  and pushes the receive slot as a struct frame-lval after the call.
- The receive-slot allocation is fresh per call site so chained
  `f(...).field` and consecutive sret calls into separate locals
  don't alias.
- Param-spill emit deduped into a local helper to keep the
  variadic-pad and walk arms in sync.

Updates CC-CONTRACTS.md §3.2 with prose describing the indirect-
result convention's caller and callee responsibilities.

Tests:
- New cc-cg fixture 73-struct-ret-3word drives the cg API directly.
- cc/113-struct-ret-3word and cc/115-struct-ret-3word-many-args now
  pass; no parser change was needed since the type-driven cg path
  detects sret from the callee return type and the existing
  parse-return-stmt struct arm already feeds cg-return a struct lval.
- All 138 cc fixtures pass on aarch64; all 52 cc-cg fixtures pass.

Diffstat:
Mcc/cc.scm | 258+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Mdocs/CC-CONTRACTS.md | 11++++++++++-
Atests/cc-cg/73-struct-ret-3word.expected-exit | 1+
Atests/cc-cg/73-struct-ret-3word.scm | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 197 insertions(+), 124 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -2526,17 +2526,15 @@ (%cg-fn-set! cg '%fn-ret-type return-type) (%cg-fn-set! cg '%indirect-slots '()) (%cg-fn-set! cg '%fn-variadic? variadic?) - ;; Return slot. Per P1.md §Arguments and return values, results ≤8B - ;; ride a single word; 9–16B aggregates ride two words. We size the - ;; slot to max(8, ctype-size), 8-byte aligned, so cg-fn-end can load - ;; a0 (always) and a1 (when size > 8) directly from it. + ;; Return slot per P1.md §Arguments. ≤8B → a0; 9–16B → a0+a1; >16B + ;; struct/union → indirect-result (A2): caller passes sret ptr in + ;; a0; cg-return writes through it; sret-slot saves a0 for cg-fn-end. (let* ((rsz (cond ((eq? (ctype-kind return-type) 'void) 8) (else (align-up (max 8 (ctype-size return-type)) 8)))) (ret-slot (cg-alloc-slot cg rsz 8))) (%cg-fn-set! cg '%fn-ret-slot ret-slot) (cond ((not (eq? (ctype-kind return-type) 'void)) - ;; Zero the full ret slot (one %st per 8 bytes). (let zinit ((k 0)) (cond ((>= k rsz) #t) @@ -2547,62 +2545,57 @@ (%cg-slot-expr cg (+ ret-slot k)) ")\n"))) (zinit (+ k 8)))))))) - ;; params per CC-CONTRACTS §3.1: list of (name-bv . ctype). We - ;; return an alist (name-bv . sym) the parser binds into scope. - (let walk ((ps params) (idx 0) (out '()) (first-slot #f)) + (let* ((rk (ctype-kind return-type)) + (sret? (and (or (eq? rk 'struct) (eq? rk 'union)) + (> (ctype-size return-type) 16)))) + (%cg-fn-set! cg '%fn-sret? sret?) (cond - ((null? ps) - (cond - (variadic? - ;; Pad the incoming-arg window out to 16 slots. For idx 0..3 - ;; the slot is filled from a-register; for idx 4..15 from - ;; LDARG slot (idx-4). va_start points at the slot whose - ;; index equals the named-arg count, and va_arg walks - ;; linearly from there through the rest of the window. - (let pad ((i idx) (vfirst #f) (fs first-slot)) - (cond - ((>= i 16) - ;; If named-arg count was 0, vfirst is the very first - ;; slot of the save area (= fs). - (%cg-fn-set! cg '%fn-vararg-first-slot - (or vfirst fs)) - (reverse out)) - (else - (let ((off (cg-alloc-slot cg 8 8))) - (cond - ((< i 4) - (let ((ar (%reg-by-idx i))) - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%st(" (%cg-reg->bv ar) - ", sp, " - (%cg-slot-expr cg off) ")\n"))))) - (else - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%ldarg(t0, " (%n (- i 4)) ")\n" - "%st(t0, sp, " - (%cg-slot-expr cg off) ")\n"))))) - (pad (+ i 1) - (or vfirst off) - (or fs off))))))) - (else (reverse out)))) - (else - (let* ((p (car ps)) - (nm (car p)) - (ty (cdr p)) - (off (cg-alloc-slot cg 8 8)) - (psym (%sym nm 'param #f ty off))) + (sret? + (let ((ss (cg-alloc-slot cg 8 8))) + (%cg-fn-set! cg '%fn-sret-slot ss) + (buf-push! (cg-prologue-buf cg) + (bv-cat (list "%st(a0, sp, " + (%cg-slot-expr cg ss) ")\n"))))) + (else (%cg-fn-set! cg '%fn-sret-slot #f)))) + ;; params per CC-CONTRACTS §3.1. With sret, explicit arg i lives at + ;; ABI position (i+1): args 0..2 in a1..a3, args 3+ in slot (i-3). + (let* ((sret-shift (if (%cg-fn-get cg '%fn-sret?) 1 0)) + (spill (lambda (abi off) + (cond + ((< abi 4) + (buf-push! (cg-prologue-buf cg) + (bv-cat (list "%st(" (%cg-reg->bv (%reg-by-idx abi)) + ", sp, " + (%cg-slot-expr cg off) ")\n")))) + (else + (buf-push! (cg-prologue-buf cg) + (bv-cat (list "%ldarg(t0, " (%n (- abi 4)) ")\n" + "%st(t0, sp, " + (%cg-slot-expr cg off) ")\n")))))))) + (let walk ((ps params) (idx 0) (out '()) (first-slot #f)) + (cond + ((null? ps) (cond - ((< idx 4) - (let ((ar (%reg-by-idx idx))) - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%st(" (%cg-reg->bv ar) - ", sp, " (%cg-slot-expr cg off) ")\n"))))) - (else - (buf-push! (cg-prologue-buf cg) - (bv-cat (list "%ldarg(t0, " (%n (- idx 4)) ")\n" - "%st(t0, sp, " (%cg-slot-expr cg off) ")\n"))))) - (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out) - (or first-slot off))))))) + (variadic? + (let pad ((i idx) (vfirst #f) (fs first-slot)) + (cond + ((>= i 16) + (%cg-fn-set! cg '%fn-vararg-first-slot (or vfirst fs)) + (reverse out)) + (else + (let ((off (cg-alloc-slot cg 8 8))) + (spill (+ i sret-shift) off) + (pad (+ i 1) (or vfirst off) (or fs off))))))) + (else (reverse out)))) + (else + (let* ((p (car ps)) + (nm (car p)) + (ty (cdr p)) + (off (cg-alloc-slot cg 8 8)) + (psym (%sym nm 'param #f ty off))) + (spill (+ idx sret-shift) off) + (walk (cdr ps) (+ idx 1) (cons (cons nm psym) out) + (or first-slot off)))))))) (define (cg-fn-end cg) ;; Drain prologue-buf and fn-buf directly into cg-text via buf-drain! @@ -2639,22 +2632,26 @@ ;; prologue + body, drained byte-for-byte (buf-drain! tb (cg-prologue-buf cg)) (buf-drain! tb (cg-fn-buf cg)) - ;; ret block — per P1.md §Arguments and return values, ≤8B uses - ;; the one-word direct convention (result in a0); 9–16B uses the - ;; two-word direct convention (a0 = word 0, a1 = word 1). + ;; ret block: ≤8B → a0; 9–16B → a0+a1; >16B sret → a0 = saved sret ptr. (buf-push! tb "::ret\n") - (cond - ((eq? (ctype-kind ret-type) 'void) - (buf-push! tb "%li(a0, 0)\n")) - (else - (buf-push! tb "%ld(a0, sp, ") - (buf-push! tb (%cg-slot-expr cg ret-slot)) - (buf-push! tb ")\n") - (cond - ((> (ctype-size ret-type) 8) - (buf-push! tb "%ld(a1, sp, ") - (buf-push! tb (%cg-slot-expr cg (+ ret-slot 8))) - (buf-push! tb ")\n"))))) + (let ((rk (ctype-kind ret-type)) + (sret? (%cg-fn-get cg '%fn-sret?))) + (cond + ((eq? rk 'void) + (buf-push! tb "%li(a0, 0)\n")) + (sret? + (buf-push! tb "%ld(a0, sp, ") + (buf-push! tb (%cg-slot-expr cg (%cg-fn-get cg '%fn-sret-slot))) + (buf-push! tb ")\n")) + (else + (buf-push! tb "%ld(a0, sp, ") + (buf-push! tb (%cg-slot-expr cg ret-slot)) + (buf-push! tb ")\n") + (cond + ((> (ctype-size ret-type) 8) + (buf-push! tb "%ld(a1, sp, ") + (buf-push! tb (%cg-slot-expr cg (+ ret-slot 8))) + (buf-push! tb ")\n")))))) (buf-push! tb "})\n") (cg-vstack-set! cg '()) (cg-frame-hi-set! cg 0) @@ -3217,18 +3214,47 @@ (let* ((args (let loop ((i 0) (acc '())) (cond ((= i arity) acc) (else (loop (+ i 1) (cons (cg-pop cg) acc)))))) - (fn-op (cg-pop cg))) + (fn-op (cg-pop cg)) + ;; sret = struct/union > 16B return; shift args by one reg + ;; and place a0 last so it's not clobbered by arg loads. + (fty (opnd-type fn-op)) + (rty (cond + ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty))) + ((eq? (ctype-kind fty) 'ptr) + (let ((p (ctype-ext fty))) + (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64))) + (else %t-i64))) + (rk (ctype-kind rty)) + (sret? (and has-result? + (or (eq? rk 'struct) (eq? rk 'union)) + (> (ctype-size rty) 16))) + (sret-shift (if sret? 1 0)) + (recv-slot (cond + (sret? + (cg-alloc-slot cg + (align-up (ctype-size rty) 8) + (max 8 (ctype-align rty)))) + (else #f)))) (let stage ((xs args) (idx 0)) (cond ((null? xs) 0) - ((< idx 4) - (%cg-load-opnd-into cg (car xs) (%reg-by-idx idx)) - (stage (cdr xs) (+ idx 1))) (else - (%cg-load-opnd-into cg (car xs) 't0) - (%cg-emit-st cg 't0 'sp (* 8 (- idx 4))) - (stage (cdr xs) (+ idx 1))))) - (cond ((> arity 4) (%cg-bump-outgoing! cg (- arity 4))) (else 0)) + (let ((abi (+ idx sret-shift))) + (cond + ((< abi 4) + (%cg-load-opnd-into cg (car xs) (%reg-by-idx abi)) + (stage (cdr xs) (+ idx 1))) + (else + (%cg-load-opnd-into cg (car xs) 't0) + (%cg-emit-st cg 't0 'sp (* 8 (- abi 4))) + (stage (cdr xs) (+ idx 1)))))))) + (let ((sa (if sret? (max 0 (- arity 3)) (max 0 (- arity 4))))) + (cond ((> sa 0) (%cg-bump-outgoing! cg sa)) (else 0))) + (cond + (sret? + (%cg-emit-many cg (list "%mov(a0, sp)\n" + "%addi(a0, a0, " + (%cg-slot-expr cg recv-slot) ")\n")))) (cond ((and (eq? (opnd-kind fn-op) 'global) (not (opnd-lval? fn-op))) (%cg-emit-many cg (list "%call(&" (opnd-ext fn-op) ")\n"))) @@ -3237,33 +3263,20 @@ (%cg-emit-many cg (list "%callr(t0)\n")))) (cond (has-result? - (let* ((fty (opnd-type fn-op)) - (rty (cond - ((eq? (ctype-kind fty) 'fn) (car (ctype-ext fty))) - ((eq? (ctype-kind fty) 'ptr) - (let ((p (ctype-ext fty))) - (if (eq? (ctype-kind p) 'fn) (car (ctype-ext p)) %t-i64))) - (else %t-i64))) - (rk (ctype-kind rty))) - (cond - ;; Struct/union return ≤16B (Stream A1, P1.md §Arguments - ;; and return values). Allocate a fresh frame slot sized to - ;; the struct, store back from a0 (word 0) and a1 (word 1 - ;; when size > 8), and push a struct frame-lval. Fresh slot - ;; per call site so chained `f().field` and consecutive - ;; calls don't alias. - ((and (or (eq? rk 'struct) (eq? rk 'union)) - (<= (ctype-size rty) 16)) - (let* ((sz (ctype-size rty)) - (al (max 8 (ctype-align rty))) - (slot (cg-alloc-slot cg (align-up sz 8) al))) - (%cg-emit-st-slot cg 'a0 slot) - (cond - ((> sz 8) - (%cg-emit-st-slot cg 'a1 (+ slot 8)))) - (cg-push cg (%opnd 'frame rty slot #t)))) - (else - (%cg-spill-reg cg 'a0 rty))))) + (cond + ;; >16B sret (A2): a0 holds recv-slot; push as struct lval. + (sret? (cg-push cg (%opnd 'frame rty recv-slot #t))) + ;; ≤16B struct/union (A1): fresh slot, spill from a0/a1. + ((and (or (eq? rk 'struct) (eq? rk 'union)) + (<= (ctype-size rty) 16)) + (let* ((sz (ctype-size rty)) + (al (max 8 (ctype-align rty))) + (slot (cg-alloc-slot cg (align-up sz 8) al))) + (%cg-emit-st-slot cg 'a0 slot) + (cond ((> sz 8) (%cg-emit-st-slot cg 'a1 (+ slot 8)))) + (cg-push cg (%opnd 'frame rty slot #t)))) + (else + (%cg-spill-reg cg 'a0 rty)))) (else #f)))) ;; -------------------------------------------------------------------- @@ -3272,25 +3285,24 @@ (define (cg-return cg) (let* ((ret-slot (%cg-fn-get cg '%fn-ret-slot)) (ret-type (%cg-fn-get cg '%fn-ret-type)) - (rk (ctype-kind ret-type))) + (rk (ctype-kind ret-type)) + (sret? (%cg-fn-get cg '%fn-sret?))) (cond ((eq? rk 'void) (%cg-emit-many cg (list "%b(&::ret)\n"))) ((or (eq? rk 'struct) (eq? rk 'union)) - ;; Struct-by-value return (Stream A1, P1.md §Arguments and - ;; return values): copy bytes from the source lval into the - ;; function's return slot. cg-fn-end then loads a0 (and a1 - ;; for 9–16B) at the epilogue. - (let* ((p (cg-pop cg)) - (sz (ctype-size ret-type))) - (cond - ((not (opnd-lval? p)) - (die #f "cg-return: struct value must be an lvalue"))) - ;; Materialize src addr into t0; dst (ret-slot) addr into t2. + ;; struct-by-value: ≤16B (A1) → ret-slot; >16B (A2 sret) → *sret-slot. + (let* ((p (cg-pop cg)) (sz (ctype-size ret-type))) + (cond ((not (opnd-lval? p)) + (die #f "cg-return: struct value must be an lvalue"))) (%cg-emit-addr-of cg p 't0) - (%cg-emit-many cg (list "%mov(t2, sp)\n" - "%addi(t2, t2, " - (%cg-slot-expr cg ret-slot) ")\n")) + (cond + (sret? + (%cg-emit-ld-slot cg 't2 (%cg-fn-get cg '%fn-sret-slot))) + (else + (%cg-emit-many cg (list "%mov(t2, sp)\n" + "%addi(t2, t2, " + (%cg-slot-expr cg ret-slot) ")\n")))) (%cg-emit-byte-copy cg 't2 't0 't1 sz) (%cg-emit-many cg (list "%b(&::ret)\n")))) (else diff --git a/docs/CC-CONTRACTS.md b/docs/CC-CONTRACTS.md @@ -319,7 +319,16 @@ load epilogue across the three result conventions defined in |---|---|---| | ≤ 8B | one-word direct | `LD a0, [sp + slot]` | | 9–16B | two-word direct | `LD a0, [sp + slot]; LD a1, [sp + slot + 8]` | -| > 16B | indirect-result | (Stream A2) caller passes buffer in a0; callee writes through it; epilogue restores a0 | +| > 16B | indirect-result | `LD a0, [sp + sret-slot]` (reload caller's buffer ptr) | + +For the indirect-result convention (struct/union return > 16B), the +caller materializes a fresh receive slot's address in `a0` before the +call; explicit args 0..2 shift to `a1..a3` and args 3+ stage to +incoming stack-arg slots 0+. The callee's prologue spills the +incoming `a0` to a private sret-slot; `cg-return` copies the source +struct's bytes through that saved pointer; the epilogue reloads +`a0` from the sret-slot so the convention's "a0 unchanged on +return" holds even if the body clobbered `a0`. The cg lowers all three conventions; the parser surface (return statement, struct-typed call result) is identical regardless of diff --git a/tests/cc-cg/73-struct-ret-3word.expected-exit b/tests/cc-cg/73-struct-ret-3word.expected-exit @@ -0,0 +1 @@ +60 diff --git a/tests/cc-cg/73-struct-ret-3word.scm b/tests/cc-cg/73-struct-ret-3word.scm @@ -0,0 +1,51 @@ +;; tests/cc-cg/73-struct-ret-3word.scm — indirect-result struct return. +;; +;; struct Triple { long a; long b; long c; }; /* 24B — > 16B */ +;; struct Triple make(long a, long b, long c) { ...; return t; } +;; int main(void) { struct Triple r = make(10,20,30); +;; return (int)(r.a+r.b+r.c); } /* 60 */ +;; +;; Stream A2 (P1.md §Arguments): caller passes result-buffer ptr in +;; a0; explicit args 0..2 land in a1..a3; cg-fn-begin/v shifts param +;; ABI by one register; cg-return writes through saved sret pointer. + +(let* ((cg (cg-init)) + (ty (%ctype 'struct 24 8 + (list "T" #t + (list (list "a" %t-i64 0) + (list "b" %t-i64 8) + (list "c" %t-i64 16))))) + (fty (%ctype 'fn 8 8 + (cons ty (cons (list %t-i64 %t-i64 %t-i64) #f)))) + (mk (%sym "make" 'fn 'extern fty #f))) + (let* ((ps (cg-fn-begin cg "make" + (list (cons "a" %t-i64) + (cons "b" %t-i64) + (cons "c" %t-i64)) + ty)) + (a* (cdr (car ps))) (b* (cdr (cadr ps))) (c* (cdr (caddr ps))) + (to (cg-alloc-slot cg 24 8)) + (ts (%sym "t" 'var 'auto ty to))) + (cg-push-sym cg ts) (cg-push-field cg "a") + (cg-push-sym cg a*) (cg-load cg) (cg-assign cg) (cg-pop cg) + (cg-push-sym cg ts) (cg-push-field cg "b") + (cg-push-sym cg b*) (cg-load cg) (cg-assign cg) (cg-pop cg) + (cg-push-sym cg ts) (cg-push-field cg "c") + (cg-push-sym cg c*) (cg-load cg) (cg-assign cg) (cg-pop cg) + (cg-push-sym cg ts) (cg-return cg) + (cg-fn-end cg)) + (cg-fn-begin cg "main" '() %t-i32) + (cg-push-sym cg mk) + (cg-push-imm cg %t-i64 10) + (cg-push-imm cg %t-i64 20) + (cg-push-imm cg %t-i64 30) + (cg-call cg 3 #t) + (let ((r (cg-pop cg))) + (cg-push cg r) (cg-push-field cg "a") (cg-load cg) + (cg-push cg r) (cg-push-field cg "b") (cg-load cg) + (cg-binop cg 'add) + (cg-push cg r) (cg-push-field cg "c") (cg-load cg) + (cg-binop cg 'add)) + (cg-cast cg %t-i32) (cg-return cg) + (cg-fn-end cg) + (write-bv-fd 1 (cg-finish cg)))