commit 5ae1703a745e575657fee65c946f9e1f241d4a3a
parent b871959ddf3e4188f507041b2f9f7181d2662750
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 15:17:20 -0700
cg/aa64: implement Groups G, H, I (calls, control flow, alloca)
- Calls: FP binops (FADD/FSUB/FMUL/FDIV), addr_of for OPK_GLOBAL via
ADRP+ADD, indirect calls (BLR Xn), multi-part returns (HFA into
LOCAL storage), and memcpy-on-entry for ABI_ARG_INDIRECT params.
- Control flow: SCOPE_LOOP / SCOPE_BLOCK as bookkeeping over the
caller-driven label_place/jump, with break_to/continue_to forwarding
to the recorded labels.
- Alloca: SUB SP by an aligned const or runtime size, return
SP + max_outgoing via a placeholder ADD patched at func_end. For
has_alloca functions, restore SP from FP at the epilogue.
Diffstat:
| M | src/arch/aarch64.c | | | 328 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------ |
1 file changed, 279 insertions(+), 49 deletions(-)
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -153,6 +153,12 @@ static inline u32 aa64_fmov_d_x(u32 Rd, u32 Rn) /* GPR→FP, double */
static inline u32 aa64_fmov_x_d(u32 Rd, u32 Rn) /* FP→GPR, double */
{ return 0x9E660000u | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+/* SUB (extended register), 64-bit, UXTX, shift 0. Unlike SUB shifted-reg
+ * (where Rd=31 means ZR), this form treats Rd/Rn=31 as SP — needed to
+ * decrement SP by a register amount during alloca. */
+static inline u32 aa64_sub_extreg_x_uxtx(u32 Rd, u32 Rn, u32 Rm)
+{ return 0xCB206000u | ((Rm&0x1f)<<16) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+
/* SUBS shifted register (Rd=ZR encodes CMP). */
static inline u32 aa64_subs_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm)
{ return 0x6B000000u | (sf<<31) | ((Rm&0x1f)<<16) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
@@ -168,6 +174,16 @@ static inline u32 aa64_csinc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond)
static inline u32 aa64_cset(u32 sf, u32 Rd, u32 cond)
{ return aa64_csinc(sf, Rd, 31u, 31u, cond ^ 1u); }
+/* FADD / FSUB / FMUL / FDIV (scalar). type: 0=S (float), 1=D (double). */
+static inline u32 aa64_fadd(u32 type, u32 Rd, u32 Rn, u32 Rm)
+{ return 0x1E202800u | ((type&3)<<22) | ((Rm&0x1f)<<16) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_fsub(u32 type, u32 Rd, u32 Rn, u32 Rm)
+{ return 0x1E203800u | ((type&3)<<22) | ((Rm&0x1f)<<16) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_fmul(u32 type, u32 Rd, u32 Rn, u32 Rm)
+{ return 0x1E200800u | ((type&3)<<22) | ((Rm&0x1f)<<16) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+static inline u32 aa64_fdiv(u32 type, u32 Rd, u32 Rn, u32 Rm)
+{ return 0x1E201800u | ((type&3)<<22) | ((Rm&0x1f)<<16) | ((Rn&0x1f)<<5) | (Rd&0x1f); }
+
/* SBFM / UBFM / BFM (bitfield move family).
* sf opc(2) 100110 N immr(6) imms(6) Rn(5) Rd(5)
* opc: 00=SBFM, 01=BFM, 10=UBFM. N must equal sf. */
@@ -198,6 +214,8 @@ typedef struct AAScope {
u8 pad[2];
MCLabel else_label; /* SCOPE_IF: false branch target / end-of-then */
MCLabel end_label; /* SCOPE_IF: join point past the whole if/else */
+ Label break_label; /* SCOPE_LOOP/BLOCK: explicit break target */
+ Label continue_label;/* SCOPE_LOOP: explicit continue target */
} AAScope;
typedef struct AAImpl {
@@ -234,6 +252,15 @@ typedef struct AAImpl {
AAScope* scopes;
u32 nscopes;
u32 scopes_cap;
+
+ /* alloca: each call emits an `ADD result, SP, #0` placeholder; at
+ * func_end the imm12 is patched with the final max_outgoing. Tracks
+ * (instruction pos, dst reg) for each placeholder. has_alloca also
+ * triggers SP-from-FP restoration in the epilogue. */
+ u8 has_alloca;
+ struct AAAllocaPatch { u32 pos; u32 dst_reg; }* add_patches;
+ u32 nadd_patches;
+ u32 add_patches_cap;
} AAImpl;
static AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; }
@@ -405,6 +432,8 @@ static void aa_func_begin(CGTarget* t, const CGFuncDesc* fd)
a->used_fp = 0;
a->nslots = 0;
a->nscopes = 0;
+ a->has_alloca = 0;
+ a->nadd_patches= 0;
a->sret_ptr_slot = FRAME_SLOT_NONE;
a->epilogue_label = mc->label_new(mc);
@@ -454,6 +483,19 @@ static void aa_func_end(CGTarget* t)
* branches land here. */
mc->label_place(mc, a->epilogue_label);
+ /* If the body called alloca, SP may sit below the locals area.
+ * Restore SP from FP before reloading callee-saves, since those use
+ * SP-relative offsets. */
+ if (a->has_alloca) {
+ if (fp_lr_off <= 0xfff) {
+ emit32(mc, aa64_sub_imm(1, /*Rd=SP*/31, /*Rn=*/29, fp_lr_off, 0));
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64: has_alloca + fp_lr_off %u out of imm12 range",
+ fp_lr_off);
+ }
+ }
+
/* Restore FP saves, then INT saves, then fp/lr, then add sp + ret. */
for (i32 i = (i32)n_fp_pairs - 1; i >= 0; --i) {
u32 r0 = 8u + (u32)i * 2u;
@@ -526,6 +568,19 @@ overflow:
patch32(obj, sec, pos + i*4u, words[i]);
}
+ /* Patch each alloca's `ADD dst, SP, #0` placeholder with the final
+ * max_outgoing offset, now that the high-water mark is known. */
+ if (a->max_outgoing > 0xfff) {
+ compiler_panic(t->c, a->loc,
+ "aarch64: max_outgoing %u out of imm12 range for alloca patch",
+ a->max_outgoing);
+ }
+ for (u32 i = 0; i < a->nadd_patches; ++i) {
+ u32 dr = a->add_patches[i].dst_reg;
+ u32 word = aa64_add_imm(1, dr, /*Rn=SP*/31, a->max_outgoing, 0);
+ patch32(obj, sec, a->add_patches[i].pos, word);
+ }
+
/* Define the function symbol. */
u32 end = mc->pos(mc);
obj_symbol_define(obj, a->fd->sym, sec,
@@ -608,18 +663,40 @@ static void aa_param(CGTarget* t, const CGParamDesc* p)
if (ai->kind == ABI_ARG_IGNORE) return;
if (ai->kind == ABI_ARG_INDIRECT) {
- /* Caller passes a pointer to the data. Pointer comes in next
- * INT arg reg; store it into the home slot (which holds the
- * pointer-sized address). */
+ /* Caller passes a pointer to a copy. Materialize that pointer
+ * into a scratch reg, then memcpy `s->size` bytes from there
+ * into the slot — so subsequent LOCAL_op(slot) reads/writes the
+ * struct contents directly, not the pointer. */
+ u32 ptr_reg;
if (a->next_param_int < 8) {
- u32 reg = a->next_param_int++;
- emit32(t->mc, aa64_stur(3, reg, 29, -(i32)s->off));
+ ptr_reg = a->next_param_int++;
} else {
- /* Pointer on stack — load and store. */
u32 caller_off = a->next_param_stack;
a->next_param_stack += 8;
emit32(t->mc, aa64_ldur(3, 9, 29, (i32)(16 + caller_off)));
- emit32(t->mc, aa64_stur(3, 9, 29, -(i32)s->off));
+ ptr_reg = 9;
+ }
+ u32 nbytes = s->size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit32(t->mc, aa64_ldur(3, 10, ptr_reg, (i32)i));
+ emit32(t->mc, aa64_stur(3, 10, 29, -(i32)s->off + (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit32(t->mc, aa64_ldur(2, 10, ptr_reg, (i32)i));
+ emit32(t->mc, aa64_stur(2, 10, 29, -(i32)s->off + (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit32(t->mc, aa64_ldur(1, 10, ptr_reg, (i32)i));
+ emit32(t->mc, aa64_stur(1, 10, 29, -(i32)s->off + (i32)i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit32(t->mc, aa64_ldur(0, 10, ptr_reg, (i32)i));
+ emit32(t->mc, aa64_stur(0, 10, 29, -(i32)s->off + (i32)i));
+ i += 1;
}
return;
}
@@ -751,12 +828,16 @@ static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d)
a->scopes_cap = ncap;
}
AAScope* sc = &a->scopes[a->nscopes];
- sc->kind = (u8)d->kind;
- sc->has_else = 0;
- sc->else_label = t->mc->label_new(t->mc);
- sc->end_label = t->mc->label_new(t->mc);
+ sc->kind = (u8)d->kind;
+ sc->has_else = 0;
+ sc->else_label = 0;
+ sc->end_label = 0;
+ sc->break_label = d->break_label;
+ sc->continue_label = d->continue_label;
if (d->kind == SCOPE_IF) {
+ sc->else_label = t->mc->label_new(t->mc);
+ sc->end_label = t->mc->label_new(t->mc);
/* Test cond against zero, branch to else_label on EQ (false). */
u32 sf = type_is_64(d->cond.type) ? 1u : 0u;
u32 rn = force_reg_int(t, d->cond, sf, 9);
@@ -764,8 +845,11 @@ static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d)
emit32(t->mc, aa64_b_cond(0x0u /*EQ*/));
t->mc->emit_label_ref(t->mc, sc->else_label,
R_AARCH64_CONDBR19, 4, 0);
+ } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
+ /* Structured loop/block: bookkeep only. The caller drives
+ * label_place + jump itself; break_to/continue_to forward to the
+ * recorded labels. No instructions emitted here. */
} else {
- /* BLOCK / LOOP not yet exercised by the corpus. */
compiler_panic(t->c, a->loc,
"aarch64 scope_begin: kind %d not yet implemented", (int)d->kind);
}
@@ -798,15 +882,37 @@ static void aa_scope_end(CGTarget* t, CGScope s)
(unsigned)s);
}
AAScope* sc = &a->scopes[s - 1];
- if (sc->kind == SCOPE_IF && !sc->has_else) {
- /* No else body — false-branch lands at scope_end. */
- t->mc->label_place(t->mc, sc->else_label);
+ if (sc->kind == SCOPE_IF) {
+ if (!sc->has_else) {
+ /* No else body — false-branch lands at scope_end. */
+ t->mc->label_place(t->mc, sc->else_label);
+ }
+ t->mc->label_place(t->mc, sc->end_label);
+ }
+ /* SCOPE_LOOP / SCOPE_BLOCK: caller has already placed the break_label. */
+}
+
+static void aa_break_to(CGTarget* t, CGScope s)
+{
+ AAImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "aarch64 break_to: bad scope %u",
+ (unsigned)s);
}
- t->mc->label_place(t->mc, sc->end_label);
+ AAScope* sc = &a->scopes[s - 1];
+ aa_jump(t, sc->break_label);
}
-static void aa_break_to (CGTarget* t, CGScope s) { (void)s; aa_panic(t, "break_to"); }
-static void aa_continue_to(CGTarget* t, CGScope s) { (void)s; aa_panic(t, "continue_to"); }
+static void aa_continue_to(CGTarget* t, CGScope s)
+{
+ AAImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "aarch64 continue_to: bad scope %u",
+ (unsigned)s);
+ }
+ AAScope* sc = &a->scopes[s - 1];
+ aa_jump(t, sc->continue_label);
+}
/* ---- data movement ---- */
@@ -972,6 +1078,23 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv)
}
return;
}
+ if (lv.kind == OPK_GLOBAL) {
+ /* ADRP Xd, sym ; ADD Xd, Xd, #:lo12:sym (with addend baked into both
+ * relocations). Used to materialize a function or data pointer. */
+ u32 rd = reg_num(dst);
+ u32 sec = t->mc->section_id;
+ u32 adrp_pos = t->mc->pos(t->mc);
+ emit32(t->mc, aa64_adrp_base(rd));
+ t->mc->emit_reloc_at(t->mc, sec, adrp_pos,
+ R_AARCH64_ADR_PREL_PG_HI21,
+ lv.v.global.sym, lv.v.global.addend, 0, 0);
+ u32 add_pos = t->mc->pos(t->mc);
+ emit32(t->mc, aa64_add_imm(1, rd, rd, 0, 0));
+ t->mc->emit_reloc_at(t->mc, sec, add_pos,
+ R_AARCH64_ADD_ABS_LO12_NC,
+ lv.v.global.sym, lv.v.global.addend, 0, 0);
+ return;
+ }
aa_panic(t, "addr_of");
}
@@ -1144,6 +1267,29 @@ static u32 force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch)
static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, Operand b_op)
{
MCEmitter* mc = t->mc;
+
+ /* FP binops route through scalar FADD/FSUB/FMUL/FDIV. */
+ if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) {
+ if (a_op.kind != OPK_REG || b_op.kind != OPK_REG || dst.cls != RC_FP) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 binop: FP op requires REG operands");
+ }
+ u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+ u32 rd = reg_num(dst);
+ u32 rn = reg_num(a_op);
+ u32 rm = reg_num(b_op);
+ u32 w;
+ switch (op) {
+ case BO_FADD: w = aa64_fadd(type, rd, rn, rm); break;
+ case BO_FSUB: w = aa64_fsub(type, rd, rn, rm); break;
+ case BO_FMUL: w = aa64_fmul(type, rd, rn, rm); break;
+ case BO_FDIV: w = aa64_fdiv(type, rd, rn, rm); break;
+ default: w = 0; break; /* unreachable */
+ }
+ emit32(mc, w);
+ return;
+ }
+
u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
u32 rn = force_reg_int(t, a_op, sf, 9);
@@ -1434,16 +1580,19 @@ static void aa_call(CGTarget* t, const CGCallDesc* d)
u32 needed = (stack_off + 15u) & ~15u;
if (needed > a->max_outgoing) a->max_outgoing = needed;
- /* BL <callee> — direct only. */
- if (d->callee.kind != OPK_GLOBAL) {
+ /* Direct (BL <sym>) vs. indirect (BLR Xn). */
+ if (d->callee.kind == OPK_GLOBAL) {
+ u32 bl_pos = mc->pos(mc);
+ emit32(mc, aa64_bl_base());
+ mc->emit_reloc_at(mc, mc->section_id, bl_pos,
+ R_AARCH64_CALL26, d->callee.v.global.sym,
+ d->callee.v.global.addend, 0, 0);
+ } else if (d->callee.kind == OPK_REG) {
+ emit32(mc, aa64_blr(reg_num(d->callee)));
+ } else {
compiler_panic(t->c, a->loc,
- "aarch64 call: indirect call not yet supported");
+ "aarch64 call: callee kind %d unsupported", (int)d->callee.kind);
}
- u32 bl_pos = mc->pos(mc);
- emit32(mc, aa64_bl_base());
- mc->emit_reloc_at(mc, mc->section_id, bl_pos,
- R_AARCH64_CALL26, d->callee.v.global.sym,
- d->callee.v.global.addend, 0, 0);
/* Receive return value. */
const ABIArgInfo* ri = &d->abi->ret;
@@ -1451,37 +1600,54 @@ static void aa_call(CGTarget* t, const CGCallDesc* d)
/* Nothing to copy — sret was placed directly into the dst slot. */
return;
}
- /* DIRECT scalar in our coverage: a single INT or FP part placed in
- * x0 / v0. Move into ret_storage. */
if (ri->nparts == 0) return;
- const ABIArgPart* p0 = &ri->parts[0];
+
Operand rs = d->ret.storage;
- if (p0->cls == ABI_CLASS_INT) {
- u32 sf = (p0->size == 8) ? 1u : 0u;
- if (rs.kind == OPK_REG) {
- emit32(mc, aa64_mov_reg(sf, reg_num(rs), 0));
- } else if (rs.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, rs.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad ret slot");
- u32 sidx = size_idx_for_bytes(p0->size);
- emit32(mc, aa64_stur(sidx, 0, 29, -(i32)s->off));
+ /* Walk parts; INT parts come from x0, x1, ...; FP parts from v0, v1, .... */
+ u32 next_int_ret = 0, next_fp_ret = 0;
+ for (u16 i = 0; i < ri->nparts; ++i) {
+ const ABIArgPart* p = &ri->parts[i];
+ u32 src_reg;
+ if (p->cls == ABI_CLASS_INT) {
+ src_reg = next_int_ret++;
+ } else if (p->cls == ABI_CLASS_FP) {
+ src_reg = next_fp_ret++;
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: ret part cls %d unimpl", (int)p->cls);
}
- } else if (p0->cls == ABI_CLASS_FP) {
- u32 type = (p0->size == 8) ? 1u : 0u;
+
if (rs.kind == OPK_REG) {
- emit32(mc, aa64_fmov_reg(type, reg_num(rs), 0));
+ if (ri->nparts != 1) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: REG ret_storage with %u parts", (unsigned)ri->nparts);
+ }
+ if (p->cls == ABI_CLASS_INT) {
+ u32 sf = (p->size == 8) ? 1u : 0u;
+ emit32(mc, aa64_mov_reg(sf, reg_num(rs), src_reg));
+ } else {
+ u32 type = (p->size == 8) ? 1u : 0u;
+ emit32(mc, aa64_fmov_reg(type, reg_num(rs), src_reg));
+ }
} else if (rs.kind == OPK_LOCAL) {
AASlot* s = slot_get(a, rs.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad ret slot");
- u32 sidx = size_idx_for_bytes(p0->size);
- emit32(mc, aa64_stur_fp(sidx, 0, 29, -(i32)s->off));
+ u32 sidx = size_idx_for_bytes(p->size);
+ i32 off = -(i32)s->off + (i32)p->src_offset;
+ if (p->cls == ABI_CLASS_INT) {
+ emit32(mc, aa64_stur(sidx, src_reg, 29, off));
+ } else {
+ emit32(mc, aa64_stur_fp(sidx, src_reg, 29, off));
+ }
+ } else if (rs.kind == OPK_IMM && rs.type
+ && rs.type->kind == TY_VOID) {
+ /* Void return placeholder — nothing to do. */
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: ret_storage kind %d unsupported",
+ (int)rs.kind);
}
}
- /* Multi-part returns: not exercised yet. */
- if (ri->nparts > 1) {
- compiler_panic(t->c, a->loc,
- "aarch64 call: multi-part return not yet supported");
- }
}
/* Materialize the return value, then branch to the function epilogue. */
@@ -1571,7 +1737,71 @@ static void aa_ret(CGTarget* t, const CGABIValue* val)
(void)bpos;
}
-static void aa_alloca_ (CGTarget* t, Operand d, Operand s, u32 a) { (void)d;(void)s;(void)a; aa_panic(t, "alloca"); }
+/* Dynamic stack allocation. Layout: outgoing-args (max_outgoing bytes,
+ * 16-aligned) sit at the bottom of SP; the alloca block goes immediately
+ * above. After lowering SP by an aligned size, the new block's address is
+ * (SP + max_outgoing). max_outgoing is only known at func_end, so each
+ * alloca emits a placeholder `ADD dst, SP, #0` and registers a patch site;
+ * func_end rewrites the imm12 with the final max_outgoing. */
+static void aa_alloca_(CGTarget* t, Operand d, Operand sz, u32 align)
+{
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ if (d.kind != OPK_REG) {
+ compiler_panic(t->c, a->loc, "aarch64 alloca: dst must be REG");
+ }
+ /* SP is 16-aligned and we lower it by an aligned amount, so result
+ * inherits 16-byte alignment. Larger requests would need an
+ * additional mask on the result; reject so we notice. */
+ if (align > 16) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 alloca: align %u > 16 not yet supported", align);
+ }
+
+ if (sz.kind == OPK_IMM) {
+ i64 v = sz.v.imm;
+ if (v < 0) {
+ compiler_panic(t->c, a->loc, "aarch64 alloca: negative size");
+ }
+ u64 aligned = ((u64)v + 15u) & ~(u64)15u;
+ if (aligned == 0) aligned = 16; /* keep SP changing */
+ if (aligned > 0xfffu) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 alloca: const size %llu too large for v1",
+ (unsigned long long)aligned);
+ }
+ emit32(mc, aa64_sub_imm(1, /*Rd=SP*/31, /*Rn=SP*/31, (u32)aligned, 0));
+ } else if (sz.kind == OPK_REG) {
+ /* Round size up to a 16-byte multiple, then `sub sp, sp, x9`
+ * (extended-register form so Rd/Rn=SP work). */
+ u32 sz_reg = reg_num(sz);
+ emit32(mc, aa64_add_imm(1, 9, sz_reg, 15u, 0)); /* x9 = size+15 */
+ emit32(mc, aa64_ubfm(1, 9, 9, 4, 63)); /* lsr x9, x9, #4 */
+ emit32(mc, aa64_ubfm(1, 9, 9, 60, 59)); /* lsl x9, x9, #4 */
+ emit32(mc, aa64_sub_extreg_x_uxtx(/*SP*/31, /*SP*/31, 9));
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 alloca: size kind %d unsupported", (int)sz.kind);
+ }
+
+ /* Placeholder ADD dst, SP, #<max_outgoing>. Patched at func_end. */
+ if (a->nadd_patches == a->add_patches_cap) {
+ u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4;
+ struct AAAllocaPatch* nb = arena_array(t->c->tu,
+ struct AAAllocaPatch, ncap);
+ if (a->add_patches) memcpy(nb, a->add_patches,
+ sizeof(*nb) * a->nadd_patches);
+ a->add_patches = nb;
+ a->add_patches_cap = ncap;
+ }
+ u32 dst_reg = reg_num(d);
+ a->add_patches[a->nadd_patches].pos = mc->pos(mc);
+ a->add_patches[a->nadd_patches].dst_reg = dst_reg;
+ a->nadd_patches++;
+ emit32(mc, aa64_add_imm(1, dst_reg, /*Rn=SP*/31, 0, 0));
+ a->has_alloca = 1;
+}
static void aa_va_start_(CGTarget* t, Operand a) { (void)a; aa_panic(t, "va_start"); }
static void aa_va_arg_ (CGTarget* t, Operand d, Operand a, const Type* ty) { (void)d;(void)a;(void)ty; aa_panic(t, "va_arg"); }
static void aa_va_end_ (CGTarget* t, Operand a) { (void)a; aa_panic(t, "va_end"); }