kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f2d3e01ce9089ff005e5f0542dda5288a416dbab
parent a1d47efeb5b8d5d363e1b04559ab2180db66add7
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 11 May 2026 11:27:44 -0700

cg/arch: handle OPK_INDIRECT in call-arg + return paths

`line[i].loc` lowers to an OPK_INDIRECT source operand; the aa64/x64/rv64
backends only knew how to read OPK_IMM/REG/LOCAL and panicked. Extend the
INT/FP/BYVAL arg branches plus the call return-receive and *_ret paths to
load each ABI part from [base + ind.ofs + src_offset]. cg already
populates the new CGABIValue.size field, which the backends now need for
the sret memcpy whose source is OPK_INDIRECT (no frame slot to query).

Parse tests cover small / two-part / byval struct args, FP-field args,
indirect ret-receive, DIRECT struct return, and sret struct return.

Diffstat:
Msrc/arch/aarch64.c | 198+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/arch/arch.h | 10++++++++++
Msrc/arch/rv64.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msrc/arch/x64.c | 103++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Atest/parse/cases/call_indirect_arg_fp_field.c | 13+++++++++++++
Atest/parse/cases/call_indirect_arg_fp_field.expected | 1+
Atest/parse/cases/call_indirect_arg_struct_byval.c | 16++++++++++++++++
Atest/parse/cases/call_indirect_arg_struct_byval.expected | 1+
Atest/parse/cases/call_indirect_arg_struct_field.c | 21+++++++++++++++++++++
Atest/parse/cases/call_indirect_arg_struct_field.expected | 1+
Atest/parse/cases/call_indirect_arg_struct_field_two_parts.c | 17+++++++++++++++++
Atest/parse/cases/call_indirect_arg_struct_field_two_parts.expected | 1+
Atest/parse/cases/call_indirect_ret_into_indirect.c | 16++++++++++++++++
Atest/parse/cases/call_indirect_ret_into_indirect.expected | 1+
Atest/parse/cases/call_indirect_ret_struct_byval.c | 15+++++++++++++++
Atest/parse/cases/call_indirect_ret_struct_byval.expected | 1+
Atest/parse/cases/call_indirect_ret_struct_direct.c | 15+++++++++++++++
Atest/parse/cases/call_indirect_ret_struct_direct.expected | 1+
18 files changed, 476 insertions(+), 72 deletions(-)

diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c @@ -33,6 +33,7 @@ #include "arch/aa64_asm.h" #include "arch/aa64_isa.h" +#include "arch/aa64_regs.h" #include "arch/arch.h" #include "core/arena.h" #include "obj/obj.h" @@ -958,6 +959,33 @@ static const Reg* aa_clobbers(CGTarget* t, RegClass c, u32* n) { (void)n; aa_panic(t, "clobbers"); } + +static int aa_resolve_reg_name(CGTarget* t, Sym name, Reg* out, + RegClass* cls_out) { + (void)t; + size_t len = 0; + const char* s = pool_str(t->c->global, name, &len); + if (!s || !len) return 1; + /* pool_str does not guarantee NUL-termination; copy into a small buffer. */ + char buf[8]; + if (len >= sizeof buf) return 1; + memcpy(buf, s, len); + buf[len] = '\0'; + u32 dwarf; + if (aa64_register_index(buf, &dwarf) != 0) return 1; + if (dwarf <= 30u) { /* x0..x30 */ + if (out) *out = (Reg)dwarf; + if (cls_out) *cls_out = RC_INT; + return 0; + } + if (dwarf >= 64u && dwarf <= 95u) { /* v0..v31 */ + if (out) *out = (Reg)(dwarf - 64u); + if (cls_out) *cls_out = RC_FP; + return 0; + } + /* sp/pc and others — not allocatable, treat as unresolvable. */ + return 1; +} static void aa_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma) { AAImpl* a = impl_of(t); @@ -2049,6 +2077,11 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, AASlot* s = slot_get(a, av->storage.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad byval slot"); emit32(t->mc, aa64_sub_imm(1, dst_reg, 29, s->off, 0)); + } else if (av->storage.kind == OPK_INDIRECT) { + /* BYVAL from an indirect lvalue: pass the address `base + ind.ofs` + * itself in the arg register. */ + emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f, + av->storage.v.ind.ofs); } else { compiler_panic(t->c, a->loc, "aarch64 call: INDIRECT arg storage kind %d unsupported", @@ -2091,6 +2124,22 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, emit32(t->mc, aa64_ldur(sidx, dst_reg, 29, off)); break; } + case OPK_INDIRECT: { + /* Source is `[base + ind.ofs]`. Load each part from + * `[base, ind.ofs + part->src_offset]`. The cg layer hands out + * INDIRECT base regs from the callee-saved pool (x19..x28), + * which is disjoint from arg regs (x0..x7) and the x9 scratch, + * so the base survives every iteration of the part loop. */ + Operand src; + memset(&src, 0, sizeof src); + src.kind = OPK_INDIRECT; + src.v.ind.base = av->storage.v.ind.base; + src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; + i32 off; + u32 base = addr_base(t, src, &off, /*tmp=*/9); + emit32(t->mc, aa64_ldur(sidx, dst_reg, base, off)); + break; + } default: compiler_panic(t->c, a->loc, "aarch64 call: arg storage kind %d unsupported", @@ -2110,6 +2159,17 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, emit32(t->mc, aa64_fmov_reg(type, dst_reg, reg_num(av->storage))); break; } + case OPK_INDIRECT: { + Operand src; + memset(&src, 0, sizeof src); + src.kind = OPK_INDIRECT; + src.v.ind.base = av->storage.v.ind.base; + src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; + i32 off; + u32 base = addr_base(t, src, &off, /*tmp=*/9); + emit32(t->mc, aa64_ldur_fp(sidx, dst_reg, base, off)); + break; + } default: compiler_panic(t->c, a->loc, "aarch64 call: FP arg storage kind %d unsupported", @@ -2124,6 +2184,21 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, emit32(t->mc, aa64_stur_fp(sidx, reg_num(av->storage), 31, (i32)*stack_off)); break; + case OPK_INDIRECT: { + /* No direct mem-to-mem on aa64: route through a caller-saved + * scratch FP reg (v16) to avoid clobbering v0..v7 already + * loaded with earlier FP args. */ + Operand src; + memset(&src, 0, sizeof src); + src.kind = OPK_INDIRECT; + src.v.ind.base = av->storage.v.ind.base; + src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; + i32 off; + u32 base = addr_base(t, src, &off, /*tmp=*/9); + emit32(t->mc, aa64_ldur_fp(sidx, /*Vt=*/16u, base, off)); + emit32(t->mc, aa64_stur_fp(sidx, /*Vt=*/16u, 31, (i32)*stack_off)); + break; + } default: compiler_panic( t->c, a->loc, @@ -2214,15 +2289,24 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) { u32 type = (p->size == 8) ? 1u : 0u; emit32(mc, aa64_fmov_reg(type, reg_num(rs), src_reg)); } - } else if (rs.kind == OPK_LOCAL) { - AASlot* s = slot_get(a, rs.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad ret slot"); + } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (rs.kind == OPK_LOCAL) { + AASlot* s = slot_get(a, rs.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad ret slot"); + base_reg = 29; + base_off = -(i32)s->off; + } else { + base_reg = rs.v.ind.base & 0x1f; + base_off = rs.v.ind.ofs; + } u32 sidx = size_idx_for_bytes(p->size); - i32 off = -(i32)s->off + (i32)p->src_offset; + i32 off = base_off + (i32)p->src_offset; if (p->cls == ABI_CLASS_INT) { - emit32(mc, aa64_stur(sidx, src_reg, 29, off)); + emit32(mc, aa64_stur(sidx, src_reg, base_reg, off)); } else { - emit32(mc, aa64_stur_fp(sidx, src_reg, 29, off)); + emit32(mc, aa64_stur_fp(sidx, src_reg, base_reg, off)); } } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { /* Void return placeholder — nothing to do. */ @@ -2274,6 +2358,41 @@ static void aa_ret(CGTarget* t, const CGABIValue* val) { emit32(mc, aa64_str_uimm(0, 9, 8, i)); i += 1; } + } else if (val->storage.kind == OPK_INDIRECT) { + /* sret memcpy from `[base + ind.ofs]` into [x8]. cg populates + * `val->size` with the aggregate byte count. */ + u32 nbytes = val->size; + if (!nbytes) { + compiler_panic(t->c, a->loc, + "aarch64 ret indirect: missing aggregate size"); + } + if (a->sret_ptr_slot != FRAME_SLOT_NONE) { + AASlot* sp = slot_get(a, a->sret_ptr_slot); + if (sp) emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off)); + } + u32 base_reg = val->storage.v.ind.base & 0x1f; + i32 base_off = val->storage.v.ind.ofs; + u32 i = 0; + while (i + 8 <= nbytes) { + emit32(mc, aa64_ldur(3, 9, base_reg, base_off + (i32)i)); + emit32(mc, aa64_str_uimm(3, 9, 8, i)); + i += 8; + } + while (i + 4 <= nbytes) { + emit32(mc, aa64_ldur(2, 9, base_reg, base_off + (i32)i)); + emit32(mc, aa64_str_uimm(2, 9, 8, i)); + i += 4; + } + while (i + 2 <= nbytes) { + emit32(mc, aa64_ldur(1, 9, base_reg, base_off + (i32)i)); + emit32(mc, aa64_str_uimm(1, 9, 8, i)); + i += 2; + } + while (i < nbytes) { + emit32(mc, aa64_ldur(0, 9, base_reg, base_off + (i32)i)); + emit32(mc, aa64_str_uimm(0, 9, 8, i)); + i += 1; + } } else { compiler_panic(t->c, a->loc, "aarch64 ret indirect: storage kind %d unsupported", @@ -2290,21 +2409,32 @@ static void aa_ret(CGTarget* t, const CGABIValue* val) { } else if (val->storage.kind == OPK_IMM) { u32 sf = type_is_64(val->storage.type) ? 1u : 0u; emit_load_imm(mc, sf, /*Rd=*/0, val->storage.v.imm); - } else if (val->storage.kind == OPK_LOCAL) { - /* DIRECT return whose source is a local: load each part into - * x0/x1 (or v0/v1) per the ABI classification. Used for - * small structs returned in registers. */ - AASlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad local slot"); + } else if (val->storage.kind == OPK_LOCAL || + val->storage.kind == OPK_INDIRECT) { + /* DIRECT return whose source is a local or an indirect lvalue: + * load each part into x0/x1 (or v0/v1) per the ABI classification. + * cg hands out INDIRECT base regs from x19..x28, disjoint from the + * x0/x1 (v0/v1) return regs, so the base survives the part loop. */ + u32 base_reg; + i32 base_off; + if (val->storage.kind == OPK_LOCAL) { + AASlot* s = slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad local slot"); + base_reg = 29; /* fp */ + base_off = -(i32)s->off; + } else { + base_reg = val->storage.v.ind.base & 0x1f; + base_off = val->storage.v.ind.ofs; + } const ABIArgInfo* ri = val->abi; for (u16 i = 0; i < (ri ? ri->nparts : 0); ++i) { const ABIArgPart* pt = &ri->parts[i]; u32 sidx = size_idx_for_bytes(pt->size); - i32 off = -(i32)s->off + (i32)pt->src_offset; + i32 off = base_off + (i32)pt->src_offset; if (pt->cls == ABI_CLASS_INT) { - emit32(mc, aa64_ldur(sidx, /*Rt=*/i, 29, off)); + emit32(mc, aa64_ldur(sidx, /*Rt=*/i, base_reg, off)); } else if (pt->cls == ABI_CLASS_FP) { - emit32(mc, aa64_ldur_fp(sidx, /*Rt=*/i, 29, off)); + emit32(mc, aa64_ldur_fp(sidx, /*Rt=*/i, base_reg, off)); } else { compiler_panic(t->c, a->loc, "aarch64 ret: ret part cls %d unimpl", (int)pt->cls); @@ -2539,8 +2669,6 @@ static inline u32 aa64_stlxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) { return (sf64 ? 0xC800FC00u : 0x8800FC00u) | ((Rs & 0x1f) << 16) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); } -static inline u32 aa64_dmb_ish(void) { return 0xD5033BBFu; } -static inline u32 aa64_clrex(void) { return 0xD5033F5Fu; } /* CBNZ Rt, imm19 */ static inline u32 aa64_cbnz(u32 sf64, u32 Rt) { return 0x35000000u | (sf64 << 31) | (Rt & 0x1f); @@ -2789,7 +2917,7 @@ static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, /* L_fail: clear monitor; ok = 0 */ mc->label_place(mc, L_fail); - emit32(mc, aa64_clrex()); + emit32(mc, aa64_clrex(AA64_BARRIER_OPT_SY)); emit_load_imm(mc, 0, reg_num(ok), 0); mc->label_place(mc, L_done); @@ -2800,7 +2928,7 @@ static void aa_fence(CGTarget* t, MemOrder o) { /* Conservative: full-system DMB ISH for any release/acquire/seq_cst. * RELAXED fence is a no-op. */ if (o == MO_RELAXED) return; - emit32(t->mc, aa64_dmb_ish()); + emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH)); } /* ---- intrinsics ---- */ @@ -3106,11 +3234,32 @@ static void aa_asm_block(CGTarget* t, const char* tmpl, * source text and dispatching each line through the standalone * aa64_asm_insn parser. * - * Register-name clobbers (Sym entries like "x0") are not yet routed - * to the aarch64 RA — Track B's cg_inline_asm is responsible for - * marking the RA's clobber set before/after this call. v1 trusts - * that contract; the binder runs the template, the RA plumbing lands - * with Track B. */ + * cg_inline_asm has already spilled any live SValues bound to + * physical regs named in `clobs` (via target->resolve_reg_name). Here + * we additionally bump the callee-save high-water marks so the + * prologue saves/restores any callee-saved reg the asm body trashes + * even when no SValue ever used it. */ + AAImpl* a_impl = impl_of(t); + for (u32 i = 0; i < nc; ++i) { + Reg phys; + RegClass cls; + if (aa_resolve_reg_name(t, clobs[i], &phys, &cls) != 0) continue; + if (cls == RC_INT) { + u32 idx = (u32)phys; + RegPool* p = &a_impl->int_pool; + if (idx >= p->base && idx < (u32)(p->base + p->nregs)) { + u32 off = idx - p->base + 1u; + if (off > p->hwm) p->hwm = off; + } + } else if (cls == RC_FP) { + u32 idx = (u32)phys; + RegPool* p = &a_impl->fp_pool; + if (idx >= p->base && idx < (u32)(p->base + p->nregs)) { + u32 off = idx - p->base + 1u; + if (off > p->hwm) p->hwm = off; + } + } + } AA64Asm* a = aa64_asm_open(t->c); aa64_inline_bind(a, outs, no, oo, ins, ni, io, clobs, nc); aa64_asm_run_template(a, t->mc, tmpl); @@ -3198,6 +3347,7 @@ CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->intrinsic = aa_intrinsic; t->asm_block = aa_asm_block; + t->resolve_reg_name = aa_resolve_reg_name; t->set_loc = aa_set_loc; t->finalize = aa_finalize; diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -291,6 +291,10 @@ typedef struct CGABIValue { storage; /* address for indirect/byval/sret, REG/IMM for simple values */ const CGABIPart* parts; u32 nparts; + /* Aggregate byte size of `type`, populated by cg for struct/union args + * and returns. Backends need this to memcpy through OPK_INDIRECT byval + * sources where no frame-slot size is available; left 0 for scalars. */ + u32 size; } CGABIValue; typedef struct CGParamDesc { @@ -620,6 +624,12 @@ struct CGTarget { u32 nout, Operand* out_ops, const AsmConstraint* ins, u32 nin, const Operand* in_ops, const Sym* clobbers, u32 nclob); + /* Resolve a register-name clobber Sym (e.g. "x20", "v8") to its physical + * Reg + RegClass. Returns 0 on success, nonzero if `name` is not a + * register (e.g. "memory", "cc", or an unknown identifier). Used by + * cg_inline_asm to spill SValues bound to clobbered regs. Optional — + * backends that leave it NULL accept all named clobbers as no-ops. */ + int (*resolve_reg_name)(CGTarget*, Sym name, Reg* out, RegClass* cls_out); /* ---- source-location tracking ---- * Sets the SrcLoc inherited by subsequent emit-side calls (binop/load/...). diff --git a/src/arch/rv64.c b/src/arch/rv64.c @@ -1644,6 +1644,15 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, emit_load_imm(mc, 1, dst_reg, (i64)off); emit32(mc, rv_add(dst_reg, RV_S0, dst_reg)); } + } else if (av->storage.kind == OPK_INDIRECT) { + u32 base = av->storage.v.ind.base & 0x1fu; + i32 off = av->storage.v.ind.ofs; + if (off >= -2048 && off <= 2047) { + emit32(mc, rv_addi(dst_reg, base, off)); + } else { + emit_load_imm(mc, 1, dst_reg, (i64)off); + emit32(mc, rv_add(dst_reg, base, dst_reg)); + } } else { compiler_panic(t->c, a->loc, "rv64 call: INDIRECT storage kind %d NYI", @@ -1687,6 +1696,14 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, emit32(mc, enc_int_load(sz, 0, dst_reg, RV_S0, off)); break; } + case OPK_INDIRECT: { + /* cg holds INDIRECT base regs in s2..s11, disjoint from arg + * regs a0..a7 and the t0 stack-arg scratch. */ + u32 base = av->storage.v.ind.base & 0x1fu; + i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; + emit32(mc, enc_int_load(sz, 0, dst_reg, base, off)); + break; + } default: compiler_panic(t->c, a->loc, "rv64 call: storage kind %d NYI", @@ -1707,6 +1724,13 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, emit32(mc, rv_fsgnj(fmt, freg, r, r)); break; } + case OPK_INDIRECT: { + u32 base = av->storage.v.ind.base & 0x1fu; + i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; + emit32(mc, (sz == 8) ? rv_fld(freg, base, off) + : rv_flw(freg, base, off)); + break; + } default: compiler_panic(t->c, a->loc, "rv64 call: FP storage kind %d NYI", (int)av->storage.kind); @@ -1717,6 +1741,20 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, if (sz == 8) emit32(mc, rv_fsd(reg_num(av->storage), RV_SP, (i32)*stack_off)); else emit32(mc, rv_fsw(reg_num(av->storage), RV_SP, (i32)*stack_off)); break; + case OPK_INDIRECT: { + /* Route through ft0 — it is in {ft0..ft7}, caller-saved + * scratch outside the cg fs2..fs11 pool. */ + u32 base = av->storage.v.ind.base & 0x1fu; + i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset; + if (sz == 8) { + emit32(mc, rv_fld(/*ft0=*/0u, base, off)); + emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off)); + } else { + emit32(mc, rv_flw(/*ft0=*/0u, base, off)); + emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off)); + } + break; + } default: compiler_panic(t->c, a->loc, "rv64 call: FP stack-arg NYI"); } @@ -1795,15 +1833,24 @@ static void rv_call(CGTarget* t, const CGCallDesc* d) { u32 fmt = (p->size == 8) ? RV_FMT_D : RV_FMT_S; emit32(mc, rv_fsgnj(fmt, reg_num(rs), src_reg, src_reg)); } - } else if (rs.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, rs.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad ret slot"); - i32 off = -(i32)s->off + (i32)p->src_offset; + } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (rs.kind == OPK_LOCAL) { + RvSlot* s = slot_get(a, rs.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad ret slot"); + base_reg = RV_S0; + base_off = -(i32)s->off; + } else { + base_reg = rs.v.ind.base & 0x1fu; + base_off = rs.v.ind.ofs; + } + i32 off = base_off + (i32)p->src_offset; if (p->cls == ABI_CLASS_INT) { - emit32(mc, enc_int_store(p->size, src_reg, RV_S0, off)); + emit32(mc, enc_int_store(p->size, src_reg, base_reg, off)); } else { - if (p->size == 8) emit32(mc, rv_fsd(src_reg, RV_S0, off)); - else emit32(mc, rv_fsw(src_reg, RV_S0, off)); + if (p->size == 8) emit32(mc, rv_fsd(src_reg, base_reg, off)); + else emit32(mc, rv_fsw(src_reg, base_reg, off)); } } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { /* void return placeholder — nothing to do. */ @@ -1822,37 +1869,51 @@ static void rv_ret(CGTarget* t, const CGABIValue* val) { const ABIArgInfo* ri = val->abi; if (ri && ri->kind == ABI_ARG_INDIRECT) { /* sret: reload destination pointer from sret_ptr_slot into t0, - * then memcpy from val->storage (must be OPK_LOCAL) into [t0]. */ - if (val->storage.kind != OPK_LOCAL) { + * then memcpy from val->storage into [t0]. */ + u32 src_base; + i32 src_base_off; + u32 nbytes; + if (val->storage.kind == OPK_LOCAL) { + RvSlot* s = slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad sret slot"); + src_base = RV_S0; + src_base_off = -(i32)s->off; + nbytes = s->size; + } else if (val->storage.kind == OPK_INDIRECT) { + src_base = val->storage.v.ind.base & 0x1fu; + src_base_off = val->storage.v.ind.ofs; + nbytes = val->size; + if (!nbytes) { + compiler_panic(t->c, a->loc, + "rv64 ret indirect: missing aggregate size"); + } + } else { compiler_panic(t->c, a->loc, "rv64 ret indirect: storage kind %d NYI", (int)val->storage.kind); } - RvSlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad sret slot"); RvSlot* sp = (a->sret_ptr_slot != FRAME_SLOT_NONE) ? slot_get(a, a->sret_ptr_slot) : NULL; if (sp) emit32(mc, rv_ld(RV_T0, RV_S0, -(i32)sp->off)); - u32 nbytes = s->size; u32 i = 0; while (i + 8 <= nbytes) { - emit32(mc, rv_ld(RV_T1, RV_S0, -(i32)s->off + (i32)i)); + emit32(mc, rv_ld(RV_T1, src_base, src_base_off + (i32)i)); emit32(mc, rv_sd(RV_T1, RV_T0, (i32)i)); i += 8; } while (i + 4 <= nbytes) { - emit32(mc, rv_lwu(RV_T1, RV_S0, -(i32)s->off + (i32)i)); + emit32(mc, rv_lwu(RV_T1, src_base, src_base_off + (i32)i)); emit32(mc, rv_sw(RV_T1, RV_T0, (i32)i)); i += 4; } while (i + 2 <= nbytes) { - emit32(mc, rv_lhu(RV_T1, RV_S0, -(i32)s->off + (i32)i)); + emit32(mc, rv_lhu(RV_T1, src_base, src_base_off + (i32)i)); emit32(mc, rv_sh(RV_T1, RV_T0, (i32)i)); i += 2; } while (i < nbytes) { - emit32(mc, rv_lbu(RV_T1, RV_S0, -(i32)s->off + (i32)i)); + emit32(mc, rv_lbu(RV_T1, src_base, src_base_off + (i32)i)); emit32(mc, rv_sb(RV_T1, RV_T0, (i32)i)); i += 1; } @@ -1867,20 +1928,30 @@ static void rv_ret(CGTarget* t, const CGABIValue* val) { } else if (val->storage.kind == OPK_IMM) { u32 sf = type_is_64(val->storage.type) ? 1u : 0u; emit_load_imm(mc, sf, RV_A0, val->storage.v.imm); - } else if (val->storage.kind == OPK_LOCAL) { - RvSlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad local slot"); + } else if (val->storage.kind == OPK_LOCAL || + val->storage.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (val->storage.kind == OPK_LOCAL) { + RvSlot* s = slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad local slot"); + base_reg = RV_S0; + base_off = -(i32)s->off; + } else { + base_reg = val->storage.v.ind.base & 0x1fu; + base_off = val->storage.v.ind.ofs; + } const ABIArgInfo* ri2 = val->abi; u32 nir = 0, nfr = 0; for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) { const ABIArgPart* pt = &ri2->parts[i]; - i32 off = -(i32)s->off + (i32)pt->src_offset; + i32 off = base_off + (i32)pt->src_offset; if (pt->cls == ABI_CLASS_INT) { - emit32(mc, enc_int_load(pt->size, 0, RV_A0 + nir++, RV_S0, off)); + emit32(mc, enc_int_load(pt->size, 0, RV_A0 + nir++, base_reg, off)); } else if (pt->cls == ABI_CLASS_FP) { u32 freg = 10u + nfr++; - if (pt->size == 8) emit32(mc, rv_fld(freg, RV_S0, off)); - else emit32(mc, rv_flw(freg, RV_S0, off)); + if (pt->size == 8) emit32(mc, rv_fld(freg, base_reg, off)); + else emit32(mc, rv_flw(freg, base_reg, off)); } else { compiler_panic(t->c, a->loc, "rv64 ret: part cls %d unimpl", (int)pt->cls); diff --git a/src/arch/x64.c b/src/arch/x64.c @@ -1756,6 +1756,9 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, XSlot* s = slot_get(a, av->storage.v.frame_slot); if (!s) compiler_panic(t->c, a->loc, "x64 call: bad byval slot"); emit_lea(t->mc, dst_reg, X64_RBP, -(i32)s->off); + } else if (av->storage.kind == OPK_INDIRECT) { + emit_lea(t->mc, dst_reg, av->storage.v.ind.base & 0xFu, + av->storage.v.ind.ofs); } else { compiler_panic(t->c, a->loc, "x64 call: INDIRECT arg storage kind %d unsupported", @@ -1793,6 +1796,14 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, -(i32)s->off + (i32)pt->src_offset); break; } + case OPK_INDIRECT: { + /* cg holds INDIRECT base regs in {RBX, R10, R12..R15}, disjoint + * from arg regs (RDI/RSI/RDX/RCX/R8/R9) and the RAX scratch, so + * the base survives across the part loop. */ + emit_mov_load(t->mc, sz, 0, dst_reg, av->storage.v.ind.base & 0xFu, + av->storage.v.ind.ofs + (i32)pt->src_offset); + break; + } default: compiler_panic(t->c, a->loc, "x64 call: arg storage kind %d unsupported", @@ -1804,14 +1815,16 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, } } else if (pt->cls == ABI_CLASS_FP) { int to_stack = (*next_fp >= 8); + u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; if (!to_stack) { u32 dst_x = (*next_fp)++; if (av->storage.kind == OPK_REG) { u32 sx = av->storage.v.reg & 0xFu; - if (sx != dst_x) { - u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; - emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx); - } + if (sx != dst_x) emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx); + } else if (av->storage.kind == OPK_INDIRECT) { + emit_sse_load(t->mc, prefix2, 0x10, dst_x, + av->storage.v.ind.base & 0xFu, + av->storage.v.ind.ofs + (i32)pt->src_offset); } else { compiler_panic(t->c, a->loc, "x64 call: FP arg storage kind %d unsupported", @@ -1819,9 +1832,16 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int, } } else { if (av->storage.kind == OPK_REG) { - u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3; emit_sse_store(t->mc, prefix2, 0x11, av->storage.v.reg & 0xFu, X64_RSP, (i32)*stack_off); + } else if (av->storage.kind == OPK_INDIRECT) { + /* Load through xmm15 (scratch — last in g_fp_order so cg won't + * have it live mid-call) then store. */ + emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15, + av->storage.v.ind.base & 0xFu, + av->storage.v.ind.ofs + (i32)pt->src_offset); + emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP, + (i32)*stack_off); } else { compiler_panic(t->c, a->loc, "x64 call: FP stack-arg storage kind %d unsupported", @@ -1913,15 +1933,24 @@ static void x_call(CGTarget* t, const CGCallDesc* d) { u32 dr = rs.v.reg & 0xFu; if (dr != src_reg) emit_sse_rr(mc, prefix2, 0x10, dr, src_reg); } - } else if (rs.kind == OPK_LOCAL) { - XSlot* s = slot_get(a, rs.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 call: bad ret slot"); - i32 off = -(i32)s->off + (i32)p->src_offset; + } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { + u32 base_reg; + i32 base_off; + if (rs.kind == OPK_LOCAL) { + XSlot* s = slot_get(a, rs.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 call: bad ret slot"); + base_reg = X64_RBP; + base_off = -(i32)s->off; + } else { + base_reg = rs.v.ind.base & 0xFu; + base_off = rs.v.ind.ofs; + } + i32 off = base_off + (i32)p->src_offset; if (p->cls == ABI_CLASS_INT) { - emit_mov_store(mc, p->size, src_reg, X64_RBP, off); + emit_mov_store(mc, p->size, src_reg, base_reg, off); } else { u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3; - emit_sse_store(mc, prefix2, 0x11, src_reg, X64_RBP, off); + emit_sse_store(mc, prefix2, 0x11, src_reg, base_reg, off); } } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) { /* void ret placeholder — nothing to do. */ @@ -1941,36 +1970,50 @@ static void x_ret(CGTarget* t, const CGABIValue* val) { const ABIArgInfo* ri = val->abi; if (ri && ri->kind == ABI_ARG_INDIRECT) { /* sret: reload destination pointer into rdi, memcpy source into [rdi]. */ - if (val->storage.kind != OPK_LOCAL) { + u32 src_base; + i32 src_base_off; + u32 nbytes; + if (val->storage.kind == OPK_LOCAL) { + XSlot* s = slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad sret slot"); + src_base = X64_RBP; + src_base_off = -(i32)s->off; + nbytes = s->size; + } else if (val->storage.kind == OPK_INDIRECT) { + src_base = val->storage.v.ind.base & 0xFu; + src_base_off = val->storage.v.ind.ofs; + nbytes = val->size; + if (!nbytes) { + compiler_panic(t->c, a->loc, + "x64 ret indirect: missing aggregate size"); + } + } else { compiler_panic(t->c, a->loc, "x64 ret indirect: storage kind %d unsupported", (int)val->storage.kind); } - XSlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad sret slot"); if (a->sret_ptr_slot != FRAME_SLOT_NONE) { XSlot* sp = slot_get(a, a->sret_ptr_slot); if (sp) emit_mov_load(mc, 8, 0, X64_RDI, X64_RBP, -(i32)sp->off); } - u32 nbytes = s->size; u32 i = 0; while (i + 8 <= nbytes) { - emit_mov_load(mc, 8, 0, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); + emit_mov_load(mc, 8, 0, X64_RAX, src_base, src_base_off + (i32)i); emit_mov_store(mc, 8, X64_RAX, X64_RDI, (i32)i); i += 8; } while (i + 4 <= nbytes) { - emit_mov_load(mc, 4, 0, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); + emit_mov_load(mc, 4, 0, X64_RAX, src_base, src_base_off + (i32)i); emit_mov_store(mc, 4, X64_RAX, X64_RDI, (i32)i); i += 4; } while (i + 2 <= nbytes) { - emit_mov_load(mc, 2, 0, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); + emit_mov_load(mc, 2, 0, X64_RAX, src_base, src_base_off + (i32)i); emit_mov_store(mc, 2, X64_RAX, X64_RDI, (i32)i); i += 2; } while (i < nbytes) { - emit_mov_load(mc, 1, 0, X64_RAX, X64_RBP, -(i32)s->off + (i32)i); + emit_mov_load(mc, 1, 0, X64_RAX, src_base, src_base_off + (i32)i); emit_mov_store(mc, 1, X64_RAX, X64_RDI, (i32)i); i += 1; } @@ -1989,23 +2032,33 @@ static void x_ret(CGTarget* t, const CGABIValue* val) { } else if (val->storage.kind == OPK_IMM) { int w = type_is_64(val->storage.type) ? 1 : 0; emit_load_imm(mc, w, X64_RAX, val->storage.v.imm); - } else if (val->storage.kind == OPK_LOCAL) { + } else if (val->storage.kind == OPK_LOCAL || + val->storage.kind == OPK_INDIRECT) { /* DIRECT struct return: load each part into rax/rdx or xmm0/xmm1. */ - XSlot* s = slot_get(a, val->storage.v.frame_slot); - if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad local slot"); + u32 base_reg; + i32 base_off; + if (val->storage.kind == OPK_LOCAL) { + XSlot* s = slot_get(a, val->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad local slot"); + base_reg = X64_RBP; + base_off = -(i32)s->off; + } else { + base_reg = val->storage.v.ind.base & 0xFu; + base_off = val->storage.v.ind.ofs; + } const ABIArgInfo* ri2 = val->abi; u32 next_int_ret = 0, next_fp_ret = 0; static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX}; for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) { const ABIArgPart* pt = &ri2->parts[i]; - i32 off = -(i32)s->off + (i32)pt->src_offset; + i32 off = base_off + (i32)pt->src_offset; if (pt->cls == ABI_CLASS_INT) { emit_mov_load(mc, pt->size, 0, ret_int_regs[next_int_ret++], - X64_RBP, off); + base_reg, off); } else if (pt->cls == ABI_CLASS_FP) { u8 prefix2 = (pt->size == 8) ? 0xF2 : 0xF3; emit_sse_load(mc, prefix2, 0x10, (u32)(X64_XMM0 + next_fp_ret++), - X64_RBP, off); + base_reg, off); } else { compiler_panic(t->c, a->loc, "x64 ret: ret part cls %d unimpl", (int)pt->cls); diff --git a/test/parse/cases/call_indirect_arg_fp_field.c b/test/parse/cases/call_indirect_arg_fp_field.c @@ -0,0 +1,13 @@ +/* FP-class arg lowered from an OPK_INDIRECT source. */ +typedef struct { double v; int x; } Row; + +int sink(double d) { return (int)d; } + +int call_fp(Row* row, int i) { return sink(row[i].v); } + +int test_main(void) { + Row rows[2]; + rows[0].v = 42.0; rows[0].x = 0; + rows[1].v = 99.0; rows[1].x = 0; + return call_fp(rows, 0); +} diff --git a/test/parse/cases/call_indirect_arg_fp_field.expected b/test/parse/cases/call_indirect_arg_fp_field.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/call_indirect_arg_struct_byval.c b/test/parse/cases/call_indirect_arg_struct_byval.c @@ -0,0 +1,16 @@ +/* Big struct (>16 bytes) passed via ABI_ARG_INDIRECT (byval) from an + * OPK_INDIRECT source. The caller must compute `base + ind.ofs` and + * pass that pointer in the first int-arg slot. */ +typedef struct { int a[8]; } Big; +typedef struct { Big big; int x; } Row; + +int sink(Big b) { return b.a[0] + b.a[7]; } + +int call_big(Row* row, int i) { return sink(row[i].big); } + +int test_main(void) { + Row rows[2]; + rows[0].big.a[0] = 20; rows[0].big.a[7] = 22; rows[0].x = 0; + rows[1].big.a[0] = 99; rows[1].big.a[7] = 99; rows[1].x = 0; + return call_big(rows, 0); +} diff --git a/test/parse/cases/call_indirect_arg_struct_byval.expected b/test/parse/cases/call_indirect_arg_struct_byval.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/call_indirect_arg_struct_field.c b/test/parse/cases/call_indirect_arg_struct_field.c @@ -0,0 +1,21 @@ +/* Pass a struct field reached through an indexed pointer as a function + * argument. `line[i].loc` lowers to an OPK_INDIRECT source operand — + * the backend's call-arg lowering must load each ABI part from + * `[base, ind.ofs + part->src_offset]`, not panic. + * + * Loc is 8 bytes (one INT ABI part on aa64/x64/rv64), so this exercises + * the single-part DIRECT path; the multi-part variant lives in + * call_indirect_arg_struct_field_two_parts.c. */ +typedef struct { unsigned a; unsigned b; } Loc; +typedef struct { Loc loc; int x; } Line; + +int sink(Loc l) { return (int)l.a + (int)l.b; } + +int call_loc(Line* line, int i) { return sink(line[i].loc); } + +int test_main(void) { + Line lines[2]; + lines[0].loc.a = 40u; lines[0].loc.b = 2u; lines[0].x = 0; + lines[1].loc.a = 99u; lines[1].loc.b = 99u; lines[1].x = 0; + return call_loc(lines, 0); +} diff --git a/test/parse/cases/call_indirect_arg_struct_field.expected b/test/parse/cases/call_indirect_arg_struct_field.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/call_indirect_arg_struct_field_two_parts.c b/test/parse/cases/call_indirect_arg_struct_field_two_parts.c @@ -0,0 +1,17 @@ +/* Two-part DIRECT struct (16 bytes, two INT parts) passed via an + * OPK_INDIRECT source operand. Exercises the multi-part load path: + * each part must read from the same base register at distinct + * `src_offset`s, so the base must survive across iterations. */ +typedef struct { unsigned long a; unsigned long b; } Pair; +typedef struct { Pair p; int x; } Row; + +int sink(Pair v) { return (int)(v.a + v.b); } + +int call_pair(Row* row, int i) { return sink(row[i].p); } + +int test_main(void) { + Row rows[2]; + rows[0].p.a = 30; rows[0].p.b = 12; rows[0].x = 0; + rows[1].p.a = 99; rows[1].p.b = 99; rows[1].x = 0; + return call_pair(rows, 0); +} diff --git a/test/parse/cases/call_indirect_arg_struct_field_two_parts.expected b/test/parse/cases/call_indirect_arg_struct_field_two_parts.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/call_indirect_ret_into_indirect.c b/test/parse/cases/call_indirect_ret_into_indirect.c @@ -0,0 +1,16 @@ +/* The destination of a call's return value is an OPK_INDIRECT lvalue: + * `row[i].v = make_int()`. The call's return-receive must store the + * return register through the indirect destination. */ +typedef struct { int v; int x; } Row; + +int make_int(void) { return 42; } + +void store_ret(Row* row, int i) { row[i].v = make_int(); } + +int test_main(void) { + Row rows[2]; + rows[0].v = 0; rows[0].x = 0; + rows[1].v = 0; rows[1].x = 0; + store_ret(rows, 0); + return rows[0].v; +} diff --git a/test/parse/cases/call_indirect_ret_into_indirect.expected b/test/parse/cases/call_indirect_ret_into_indirect.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/call_indirect_ret_struct_byval.c b/test/parse/cases/call_indirect_ret_struct_byval.c @@ -0,0 +1,15 @@ +/* Return a big struct (>16B) via sret from an OPK_INDIRECT source. + * The callee must memcpy from `[base + ind.ofs]` through the sret + * pointer, not assume the source is OPK_LOCAL. */ +typedef struct { int a[8]; } Big; +typedef struct { Big big; int x; } Row; + +Big pick(Row* row, int i) { return row[i].big; } + +int test_main(void) { + Row rows[2]; + rows[0].big.a[0] = 20; rows[0].big.a[7] = 22; rows[0].x = 0; + rows[1].big.a[0] = 99; rows[1].big.a[7] = 99; rows[1].x = 0; + Big b = pick(rows, 0); + return b.a[0] + b.a[7]; +} diff --git a/test/parse/cases/call_indirect_ret_struct_byval.expected b/test/parse/cases/call_indirect_ret_struct_byval.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/call_indirect_ret_struct_direct.c b/test/parse/cases/call_indirect_ret_struct_direct.c @@ -0,0 +1,15 @@ +/* Return a struct field reached through an indexed pointer. The + * returned aggregate is DIRECT (fits in registers), so the callee + * must load each return part from the OPK_INDIRECT source. */ +typedef struct { unsigned a; unsigned b; } Loc; +typedef struct { Loc loc; int x; } Line; + +Loc pick(Line* line, int i) { return line[i].loc; } + +int test_main(void) { + Line lines[2]; + lines[0].loc.a = 40u; lines[0].loc.b = 2u; lines[0].x = 0; + lines[1].loc.a = 99u; lines[1].loc.b = 99u; lines[1].x = 0; + Loc l = pick(lines, 0); + return (int)l.a + (int)l.b; +} diff --git a/test/parse/cases/call_indirect_ret_struct_direct.expected b/test/parse/cases/call_indirect_ret_struct_direct.expected @@ -0,0 +1 @@ +42