commit cf009085124f0bab052ea6f5201030ae727c7d5f parent 242fc8d4057cf42576e05f634c2707e8ebc8bfdc Author: Ryan Sepassi <rsepassi@gmail.com> Date: Mon, 11 May 2026 07:54:05 -0700 parse: aggregate-by-value params and returns — 2-reg DIRECT and INDIRECT copy cg_call now passes struct/union args through as borrowed lvalues so the backend can read each ABI part from src_offset (or pass the address for INDIRECT/byval), and allocates a hidden return slot for aggregate returns. cg_ret mirrors this for the callee side. to_rvalue treats aggregates like arrays (no scalar load), and the implicit-fallthrough return skips the zero-value push when the return type is aggregate. release_arg_storage no longer recycles aggregate-typed OPK_LOCAL slots into the scalar spill pool. New CORPUS rows under §6.5.2.2 and §6.8.6.4 cover 1-reg / 2-reg / large variants for both directions plus the take(mk()) chain. Diffstat:
21 files changed, 226 insertions(+), 19 deletions(-)
diff --git a/src/cg/cg.c b/src/cg/cg.c @@ -393,13 +393,21 @@ static SValue* pick_victim(CG* g, u8 cls) { /* Release the resources owned by a single in-flight CGABIValue arg * after the call has returned: REG storage goes back to the reg pool, - * LOCAL storage (only produced by spill_avs_victim) returns its slot - * to the per-class spill-slot pool. IMM and other kinds carry no - * runtime ownership and need nothing. */ + * LOCAL storage produced by spill_avs_victim returns its slot to the + * per-class spill-slot pool. IMM and other kinds carry no runtime + * ownership and need nothing. + * + * Aggregate-typed OPK_LOCAL is a borrowed lvalue — the slot belongs to + * a user local or a stable byval/return frame slot — and must NOT + * return to the spill pool (size and class mismatch corrupt it). The + * scalar-vs-aggregate type check is the discriminator since + * spill_avs_victim only ever spills scalar-typed REG storage. */ static void release_arg_storage(CG* g, const Operand* st) { if (st->kind == OPK_REG) { g->target->free_reg(g->target, st->v.reg, st->cls); } else if (st->kind == OPK_LOCAL) { + const Type* t = st->type; + if (t && (t->kind == TY_STRUCT || t->kind == TY_UNION)) return; return_spill_slot(g, st->v.frame_slot, st->cls); } } @@ -1096,18 +1104,17 @@ void cg_call(CG* g, u32 nargs, const Type* fn_type) { g->avs_in_flight_n = nargs; /* Pop args in reverse so we can fill avs[i] in declaration order. - * Lvalues materialize into a register through force_reg (which also - * frees an old INDIRECT base); OPK_IMM and OPK_REG pass through so - * the call sees the same operand. */ + * Scalar lvalues materialize into a register through force_reg (which + * also frees an old INDIRECT base); OPK_IMM and OPK_REG pass through + * so the call sees the same operand. Aggregate args (struct/union) + * stay as lvalues — the backend reads each ABI part from + * &storage + part->src_offset (DIRECT) or passes the address + * itself (INDIRECT/byval). The parser is expected to have left an + * OPK_LOCAL/GLOBAL/INDIRECT on the value stack for them. */ for (u32 i = 0; i < nargs; ++i) { u32 idx = nargs - 1u - i; SValue arg = pop(g); ensure_reg(g, &arg); - /* Variadic callees: idx >= nparams indexes into the trailing `...`, - * which has no entry in fn.params or abi->params. The type comes - * from the argument itself (already promoted by the parser per - * §6.5.2.2 ¶6) and the per-arg ABI is left NULL so the backend - * synthesizes a one-part DIRECT classification on the spot. */ int is_vararg = (idx >= abi->nparams); const Type* aty; if (is_vararg) { @@ -1117,7 +1124,24 @@ void cg_call(CG* g, u32 nargs, const Type* fn_type) { } avs[idx].type = aty; avs[idx].abi = is_vararg ? NULL : &abi->params[idx]; - avs[idx].storage = is_lvalue(&arg.op) ? force_reg(g, &arg, aty) : arg.op; + int is_aggregate = aty && (aty->kind == TY_STRUCT || aty->kind == TY_UNION); + if (is_aggregate) { + if (!is_lvalue(&arg.op)) { + compiler_panic(g->c, g->cur_loc, + "cg_call: aggregate arg requires an lvalue source " + "(got operand kind %d)", + (int)arg.op.kind); + } + /* Stamp the operand's type with the aggregate type so + * release_arg_storage recognizes this as a borrowed lvalue and + * leaves the slot alone. */ + Operand st = arg.op; + st.type = aty; + avs[idx].storage = st; + } else { + avs[idx].storage = + is_lvalue(&arg.op) ? force_reg(g, &arg, aty) : arg.op; + } } SValue callee = pop(g); @@ -1138,9 +1162,28 @@ void cg_call(CG* g, u32 nargs, const Type* fn_type) { desc.flags = CG_CALL_NONE; desc.ret.type = ret_ty; desc.ret.abi = &abi->ret; + int ret_is_aggregate = + has_result && (ret_ty->kind == TY_STRUCT || ret_ty->kind == TY_UNION); + FrameSlot ret_slot = FRAME_SLOT_NONE; if (has_result) { - Reg r = alloc_reg_or_spill(g, type_class(ret_ty), ret_ty); - desc.ret.storage = op_reg(r, ret_ty); + if (ret_is_aggregate) { + /* Caller-side home for the return: INDIRECT (sret) writes through + * the hidden destination pointer into this slot; DIRECT multi-part + * has the backend store each return register at part->src_offset + * within it. Either way the parser receives an OPK_LOCAL lvalue. */ + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.type = ret_ty; + fsd.size = abi_sizeof(g->abi, ret_ty); + fsd.align = abi_alignof(g->abi, ret_ty); + fsd.kind = FS_LOCAL; + fsd.flags = FSF_ADDR_TAKEN; + ret_slot = g->target->frame_slot(g->target, &fsd); + desc.ret.storage = op_local(ret_slot, ret_ty); + } else { + Reg r = alloc_reg_or_spill(g, type_class(ret_ty), ret_ty); + desc.ret.storage = op_reg(r, ret_ty); + } } T->call(T, &desc); @@ -1180,11 +1223,29 @@ void cg_ret(CG* g, int has_value) { { SValue v = pop(g); const Type* rty = g->fn_ret_type; - Operand ret_op = force_reg(g, &v, rty); + int is_aggregate = rty && (rty->kind == TY_STRUCT || rty->kind == TY_UNION); CGABIValue av; memset(&av, 0, sizeof av); av.type = rty; av.abi = &abi->ret; + if (is_aggregate) { + /* Aggregate return: backend reads parts from the source lvalue + * (DIRECT) or memcpys it through the sret pointer (INDIRECT). */ + if (!is_lvalue(&v.op)) { + compiler_panic(g->c, g->cur_loc, + "cg_ret: aggregate return requires an lvalue source " + "(got operand kind %d)", + (int)v.op.kind); + } + av.storage = v.op; + av.storage.type = rty; + T->ret(T, &av); + /* No register/spill obligation to release — the source slot is + * borrowed and the underlying lvalue's owner (e.g. the function's + * local) cleans up at func_end. */ + return; + } + Operand ret_op = force_reg(g, &v, rty); av.storage = ret_op; T->ret(T, &av); release(g, &v); diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -1619,6 +1619,11 @@ static void to_rvalue(Parser* p) { cg_addr(p->cg); return; } + /* Aggregates do not load into a single scratch register — they are + * consumed by cg_call/cg_ret/struct-copy as addressable storage. The + * value stack already holds an lvalue (LOCAL/GLOBAL/INDIRECT) or the + * call return's hidden slot lvalue; leave it alone. */ + if (t->kind == TY_STRUCT || t->kind == TY_UNION) return; } cg_load(p->cg); } @@ -5401,10 +5406,15 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, parse_compound_stmt(p); /* Implicit fall-through return: emit a return so the function's epilogue - * always has a tail to chain into. For non-void functions this returns - * a zero value, which is undefined behavior at the language level but - * a useful safety belt against trailing-fall-through. */ - if (fn_ty->fn.ret && fn_ty->fn.ret->kind != TY_VOID) { + * always has a tail to chain into. For non-void scalar returns this + * returns a zero value, which is undefined behavior at the language + * level but a useful safety belt against trailing-fall-through. + * Aggregate returns can't synthesize a typed zero rvalue (no scalar + * source), so emit a bare ret — the epilogue still runs and the + * return value is whatever was last written into the destination + * (UB by the same token). */ + if (fn_ty->fn.ret && fn_ty->fn.ret->kind != TY_VOID && + fn_ty->fn.ret->kind != TY_STRUCT && fn_ty->fn.ret->kind != TY_UNION) { cg_push_int(p->cg, 0, fn_ty->fn.ret); cg_ret(p->cg, 1); } else { diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md @@ -182,6 +182,22 @@ here for completeness once they're real cases. | `6_5_56_compound_literal_struct` | · | `struct S s = (struct S){.a=20,.b=22}; return s.a+s.b;` — struct compound literal with designated init | 42 | | `6_5_57_unsigned_wrap_add` | ★ | `unsigned x=0xFFFFFFFFU; x+=1; return (int)(x & 0xff);` — unsigned addition wraps modulo 2^32 | 0 | +## §6.5.2.2 Aggregate function arguments + +Aggregate-by-value argument passing. The classification boundary on all +three ABIs is 16 bytes: aggregates ≤16B go through `ABI_ARG_DIRECT` in 1 +or 2 GPRs, larger ones through `ABI_ARG_INDIRECT` with a caller-side +copy (BYVAL). Each row exercises one cell of the (1-reg / 2-reg / copy) +× (homogeneous / mixed-field) matrix. + +| Case | Status | Body | Expected | +|---|---|---|---| +| `6_5_2_2_01_struct_param_1reg` | ★ | `struct S{int a,b;}; int take(struct S s){return s.a+s.b;}` — 8-byte struct, 1-part DIRECT | 42 | +| `6_5_2_2_02_struct_param_2reg` | ★ | `struct S{long a,b;}; long take(struct S s){return s.a+s.b;}` — 16-byte struct, 2-part DIRECT | 42 | +| `6_5_2_2_03_struct_param_2reg_mixed` | ★ | `struct S{int a,b,c,d;};` — 16-byte struct of four ints; 2-part DIRECT with sub-8B chunking | 42 | +| `6_5_2_2_04_struct_param_large` | ★ | `struct S{int a[8];};` — 32-byte struct, INDIRECT/BYVAL caller copy | 42 | +| `6_5_2_2_05_struct_param_with_scalars` | ★ | `int take(int pre, struct S s, int post)` — 2-reg struct between scalar args; arg-cursor accounting | 42 | + ## §6.6 Constant expressions | Case | Status | Body | Expected | @@ -405,6 +421,20 @@ cover compound typedef targets. | `6_8_27_label_on_null_stmt` | ★ | `end: ;` — label applied to a null statement | 42 | | `6_8_28_return_narrow_convert` | ★ | `unsigned char narrow(int x){return x;}` — 298 & 0xff = 42; narrowing on return | 42 | +## §6.8.6.4 Aggregate return values + +Aggregate-by-value return. Mirrors §6.5.2.2: ≤16B uses `ABI_ARG_DIRECT` +in 1 or 2 return registers; larger uses `ABI_ARG_INDIRECT` with the +caller passing a hidden destination pointer (sret) that the callee +memcpys into before `ret`. + +| Case | Status | Body | Expected | +|---|---|---|---| +| `6_8_6_4_01_struct_return_1reg` | ★ | `struct S{int a,b;}; struct S mk(void){...}` — 8-byte struct returned in one reg | 42 | +| `6_8_6_4_02_struct_return_2reg` | ★ | `struct S{long a,b;}; struct S mk(void){...}` — 16-byte struct returned across two regs | 42 | +| `6_8_6_4_03_struct_return_large` | ★ | `struct S{int a[8];}; struct S mk(void){...}` — 32-byte struct returned via sret pointer | 42 | +| `6_8_6_4_04_struct_return_call_chain` | ★ | `take(mk())` — 2-reg return immediately fed as 2-reg byval arg with no named local | 42 | + ## §6.9 External definitions | Case | Status | Body | Expected | diff --git a/test/parse/cases/6_5_2_2_01_struct_param_1reg.c b/test/parse/cases/6_5_2_2_01_struct_param_1reg.c @@ -0,0 +1,9 @@ +/* 8-byte struct passed by value — fits in one GPR on all targets + * (AAPCS64: x0; SysV-x64: rdi; RV64: a0). Exercises DIRECT classification + * with a single ABI part loaded from the caller's source lvalue. */ +struct S { int a, b; }; +int take(struct S s) { return s.a + s.b; } +int test_main(void) { + struct S s = {20, 22}; + return take(s); +} diff --git a/test/parse/cases/6_5_2_2_01_struct_param_1reg.expected b/test/parse/cases/6_5_2_2_01_struct_param_1reg.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_5_2_2_02_struct_param_2reg.c b/test/parse/cases/6_5_2_2_02_struct_param_2reg.c @@ -0,0 +1,10 @@ +/* 16-byte struct passed by value — splits across two GPRs on all targets + * (AAPCS64: x0,x1; SysV-x64: rdi,rsi; RV64: a0,a1). The DIRECT + * classification carries two ABI parts; the backend loads each part from + * the caller's source local at src_offset 0 and 8. */ +struct S { long a, b; }; +long take(struct S s) { return s.a + s.b; } +int test_main(void) { + struct S s = {20L, 22L}; + return (int)take(s); +} diff --git a/test/parse/cases/6_5_2_2_02_struct_param_2reg.expected b/test/parse/cases/6_5_2_2_02_struct_param_2reg.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_5_2_2_03_struct_param_2reg_mixed.c b/test/parse/cases/6_5_2_2_03_struct_param_2reg_mixed.c @@ -0,0 +1,9 @@ +/* 16-byte struct of four ints — 2-reg DIRECT path with sub-8B chunking. + * Field values are chosen so a+b+c+d == 42 but a+b+a+b == 34: if part 1 + * is dropped or aliased to part 0 the sum collapses to 34. */ +struct S { int a, b, c, d; }; +int take(struct S s) { return s.a + s.b + s.c + s.d; } +int test_main(void) { + struct S s = {5, 12, 10, 15}; + return take(s); +} diff --git a/test/parse/cases/6_5_2_2_03_struct_param_2reg_mixed.expected b/test/parse/cases/6_5_2_2_03_struct_param_2reg_mixed.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_5_2_2_04_struct_param_large.c b/test/parse/cases/6_5_2_2_04_struct_param_large.c @@ -0,0 +1,10 @@ +/* 32-byte struct (>16B) — INDIRECT/byval on all three ABIs. The caller + * makes a copy in its frame and passes the address; the callee receives + * the pointer in the first int-arg register and treats its home slot as + * a pointer-to-copy. */ +struct S { int a[8]; }; +int take(struct S s) { return s.a[0] + s.a[7]; } +int test_main(void) { + struct S s = {{20, 0, 0, 0, 0, 0, 0, 22}}; + return take(s); +} diff --git a/test/parse/cases/6_5_2_2_04_struct_param_large.expected b/test/parse/cases/6_5_2_2_04_struct_param_large.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_5_2_2_05_struct_param_with_scalars.c b/test/parse/cases/6_5_2_2_05_struct_param_with_scalars.c @@ -0,0 +1,11 @@ +/* Interleaved scalar/aggregate args. The 2-reg struct consumes the next + * two int-arg slots and shifts the trailing scalar; this exercises the + * arg-cursor accounting around a multi-part DIRECT argument. */ +struct S { long a, b; }; +int take(int pre, struct S s, int post) { + return pre + (int)s.a + (int)s.b + post; +} +int test_main(void) { + struct S s = {10L, 11L}; + return take(10, s, 11); +} diff --git a/test/parse/cases/6_5_2_2_05_struct_param_with_scalars.expected b/test/parse/cases/6_5_2_2_05_struct_param_with_scalars.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_8_6_4_01_struct_return_1reg.c b/test/parse/cases/6_8_6_4_01_struct_return_1reg.c @@ -0,0 +1,12 @@ +/* Return an 8-byte struct — DIRECT, one part returned in the first + * int-return register (x0/rax/a0). Caller stores the return reg into + * its destination slot before per-field access. */ +struct S { int a, b; }; +struct S mk(void) { + struct S s = {20, 22}; + return s; +} +int test_main(void) { + struct S r = mk(); + return r.a + r.b; +} diff --git a/test/parse/cases/6_8_6_4_01_struct_return_1reg.expected b/test/parse/cases/6_8_6_4_01_struct_return_1reg.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_8_6_4_02_struct_return_2reg.c b/test/parse/cases/6_8_6_4_02_struct_return_2reg.c @@ -0,0 +1,12 @@ +/* Return a 16-byte struct — DIRECT, two parts returned across the first + * two int-return registers (x0,x1 / rax,rdx / a0,a1). Callee loads parts + * from its local; caller stores them back into a destination slot. */ +struct S { long a, b; }; +struct S mk(void) { + struct S s = {20L, 22L}; + return s; +} +int test_main(void) { + struct S r = mk(); + return (int)(r.a + r.b); +} diff --git a/test/parse/cases/6_8_6_4_02_struct_return_2reg.expected b/test/parse/cases/6_8_6_4_02_struct_return_2reg.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_8_6_4_03_struct_return_large.c b/test/parse/cases/6_8_6_4_03_struct_return_large.c @@ -0,0 +1,12 @@ +/* Return a 32-byte struct — INDIRECT/sret. The caller passes a hidden + * destination pointer in the sret register (x8 / rdi / a0); the callee + * memcpys the return value into [sret_ptr] before returning. */ +struct S { int a[8]; }; +struct S mk(void) { + struct S s = {{20, 0, 0, 0, 0, 0, 0, 22}}; + return s; +} +int test_main(void) { + struct S r = mk(); + return r.a[0] + r.a[7]; +} diff --git a/test/parse/cases/6_8_6_4_03_struct_return_large.expected b/test/parse/cases/6_8_6_4_03_struct_return_large.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/6_8_6_4_04_struct_return_call_chain.c b/test/parse/cases/6_8_6_4_04_struct_return_call_chain.c @@ -0,0 +1,12 @@ +/* The callee returns a 2-reg struct that is immediately fed as a 2-reg + * struct argument to the next call — no intervening named local. The + * caller's destination slot for mk() is the same slot that take() reads + * its byval source from, so cg must keep the slot pinned across both + * sides of the call boundary. */ +struct S { long a, b; }; +struct S mk(void) { + struct S s = {20L, 22L}; + return s; +} +int take(struct S s) { return (int)(s.a + s.b); } +int test_main(void) { return take(mk()); } diff --git a/test/parse/cases/6_8_6_4_04_struct_return_call_chain.expected b/test/parse/cases/6_8_6_4_04_struct_return_call_chain.expected @@ -0,0 +1 @@ +42