commit c89c0ddd48dfe37accf41decefb7ec977713edab
parent 042552da5134e5ade4b7183f419ef24220a873ea
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 27 May 2026 08:09:37 -0700
opt: route incoming params straight into their allocated register
bind_param now receives the destination NativeLoc the allocator chose for each
parameter instead of a mandatory frame home. A register-allocated scalar param
is moved directly from its incoming arg register (or loaded from the stack)
into its hard register; address-taken / aggregate / spilled params still go to
a frame slot. This removes the store-to-home + reload-into-PReg round trip that
every parameter previously incurred. The IR_PARAM_DECL marker now emits nothing
(the value is placed at entry by bind_param), and the param-home bookkeeping
(allocate_param_home / local_home_for_preg / param_home_by_preg) is deleted.
Incoming arg registers are not in the allocable set, so a register destination
never aliases an incoming arg register and the per-param moves need no ordering.
Verified O0==O1 on register-pressure int/fp and address-taken programs; full
toy suite 1333 pass / 0 fail.
Diffstat:
3 files changed, 59 insertions(+), 66 deletions(-)
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -2515,7 +2515,7 @@ static void aa_finalize(NativeTarget* t) {
}
static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
- NativeFrameSlot home);
+ NativeLoc dst);
/* Caller-saved allocables come first so the allocator prefers them (lower
* spill_cost); callee-saved x19..x28 / v8..v15 are appended and only chosen
@@ -2745,14 +2745,22 @@ NativeTarget* aa64_native_target_new(Compiler* c, ObjBuilder* obj,
return t;
}
+/* Place the incoming parameter into `dst`: a hard register (the common
+ * register-allocated scalar case -> a single arg-reg move, or a stack load
+ * straight into the register), a frame slot (address-taken / aggregate /
+ * spilled), or nowhere (unused). Incoming arg registers are never allocable,
+ * so a register dst never aliases an incoming arg register. */
static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
- NativeFrameSlot home) {
+ NativeLoc dst) {
AANativeTarget* a = aa_of(t);
const ABIFuncInfo* abi = abi_cg_func_info(t->c->abi, a->func->fn_type);
const ABIArgInfo* ai =
p->index < abi->nparams ? &abi->params[p->index] : NULL;
+ int to_reg = dst.kind == NATIVE_LOC_REG;
if (!ai || ai->kind == ABI_ARG_IGNORE) return;
if (ai->kind == ABI_ARG_INDIRECT) {
+ NativeAddr d_addr, from;
+ AggregateAccess access;
NativeLoc src =
aa_reg_loc(p->type, NATIVE_REG_INT,
a->next_param_int < 8u ? a->next_param_int++ : AA_TMP0);
@@ -2765,12 +2773,12 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
aa_emit_mem(a, 1, src, saddr, aa_mem_for_type(t, p->type, 8));
a->next_param_stack += 8u;
}
- NativeAddr dst, from;
- AggregateAccess access;
- memset(&dst, 0, sizeof dst);
- dst.base_kind = NATIVE_ADDR_BASE_FRAME;
- dst.base.frame = home;
- dst.base_type = p->type;
+ if (dst.kind != NATIVE_LOC_FRAME)
+ aa_panic(a, "indirect parameter requires a frame destination");
+ memset(&d_addr, 0, sizeof d_addr);
+ d_addr.base_kind = NATIVE_ADDR_BASE_FRAME;
+ d_addr.base.frame = dst.v.frame;
+ d_addr.base_type = p->type;
memset(&from, 0, sizeof from);
from.base_kind = NATIVE_ADDR_BASE_REG;
from.base.reg = src.v.reg;
@@ -2779,21 +2787,25 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
access.type = p->type;
access.size = p->size ? p->size : (u32)cg_type_size(t->c, p->type);
access.align = p->align ? p->align : type_align32(t, p->type);
- aa_copy_bytes(t, dst, from, access);
+ aa_copy_bytes(t, d_addr, from, access);
return;
}
for (u32 i = 0; i < ai->nparts; ++i) {
const ABIArgPart* part = &ai->parts[i];
NativeAllocClass cls =
part->cls == ABI_CLASS_FP ? NATIVE_REG_FP : NATIVE_REG_INT;
+ int reg_dst = to_reg && (NativeAllocClass)dst.cls == cls;
NativeLoc src;
if (cls == NATIVE_REG_FP && a->next_param_fp < 8u) {
src = aa_reg_loc(p->type, cls, a->next_param_fp++);
} else if (cls == NATIVE_REG_INT && a->next_param_int < 8u) {
src = aa_reg_loc(p->type, cls, a->next_param_int++);
} else {
- src = aa_reg_loc(p->type, cls, cls == NATIVE_REG_FP ? 16u : AA_TMP0);
+ /* Stack-passed part: load straight into the dst register when possible,
+ * otherwise a scratch for the store-to-frame path. */
+ Reg tmp = reg_dst ? (Reg)dst.v.reg : (cls == NATIVE_REG_FP ? 16u : AA_TMP0);
NativeAddr saddr;
+ src = aa_reg_loc(p->type, cls, tmp);
a->next_param_stack =
align_up_u32(a->next_param_stack, aa_part_stack_align(part));
memset(&saddr, 0, sizeof saddr);
@@ -2804,16 +2816,31 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p,
aa_emit_mem(a, 1, src, saddr, aa_mem_for_type(t, p->type, part->size));
a->next_param_stack += aa_part_stack_size(part);
}
- aa_store_part(t, aa_stack_loc(p->type, home, (i32)part->src_offset), src, 0,
- part->size);
+ if (dst.kind == NATIVE_LOC_NONE) {
+ /* Unused parameter: only the ABI cursor advances. */
+ } else if (to_reg) {
+ NativeLoc d = aa_reg_loc(dst.type ? dst.type : p->type,
+ (NativeAllocClass)dst.cls, (Reg)dst.v.reg);
+ if (!(src.kind == NATIVE_LOC_REG && src.v.reg == d.v.reg &&
+ (NativeAllocClass)src.cls == (NativeAllocClass)d.cls))
+ aa_move(t, d, src);
+ } else {
+ aa_store_part(t, aa_stack_loc(p->type, dst.v.frame, (i32)part->src_offset),
+ src, 0, part->size);
+ }
}
a->incoming_stack_size = align_up_u32(a->next_param_stack, 16u);
}
static void aa_bind_param(NativeDirectTarget* d, const CGParamDesc* p,
CGLocal local, NativeDirectLocal* l) {
+ NativeLoc dst;
(void)local;
- aa_bind_native_param(d->native, p, l->home);
+ memset(&dst, 0, sizeof dst);
+ dst.kind = NATIVE_LOC_FRAME;
+ dst.type = p->type;
+ dst.v.frame = l->home;
+ aa_bind_native_param(d->native, p, dst);
}
static const char* aa_no_tail(NativeDirectTarget* d, const CGCallDesc* call) {
diff --git a/src/arch/native_target.h b/src/arch/native_target.h
@@ -282,7 +282,14 @@ struct NativeTarget {
void (*func_end)(NativeTarget*);
NativeFrameSlot (*frame_slot)(NativeTarget*, const NativeFrameSlotDesc*);
- void (*bind_param)(NativeTarget*, const CGParamDesc*, NativeFrameSlot home);
+ /* Place the incoming parameter into `dst`. The caller (which has run register
+ * allocation) chooses the destination: a hard register (NATIVE_LOC_REG) for a
+ * register-allocated scalar param, a frame slot (NATIVE_LOC_FRAME) for an
+ * address-taken / spilled / aggregate param. NATIVE_LOC_NONE means the param
+ * is unused and only the ABI register/stack cursor must advance. Incoming arg
+ * registers are never allocable, so reg destinations never alias an incoming
+ * arg register and ordering across params is unconstrained. */
+ void (*bind_param)(NativeTarget*, const CGParamDesc*, NativeLoc dst);
MCLabel (*label_new)(NativeTarget*);
void (*label_place)(NativeTarget*, MCLabel);
diff --git a/src/opt/pass_native_emit.c b/src/opt/pass_native_emit.c
@@ -22,7 +22,6 @@ typedef struct NativeEmitCtx {
Func* f;
NativeTarget* target;
NativeFrameSlot* slot_map;
- NativeFrameSlot* param_home_by_preg;
MCLabel* labels;
u8* label_placed;
u32 max_outgoing;
@@ -527,35 +526,6 @@ static CGParamDesc semantic_param_desc(const IRParam* p) {
return out;
}
-static NativeFrameSlot local_home_for_preg(Func* f, PReg preg) {
- for (u32 i = 0; i < f->nlocals; ++i) {
- IRLocal* l = &f->locals[i];
- if (l->storage.kind == CG_LOCAL_STORAGE_REG &&
- (PReg)l->storage.v.reg == preg && l->home_slot)
- return l->home_slot;
- }
- return NATIVE_FRAME_SLOT_NONE;
-}
-
-static NativeFrameSlot allocate_param_home(NativeEmitCtx* e, const IRParam* p) {
- NativeFrameSlot opt_home = NATIVE_FRAME_SLOT_NONE;
- NativeFrameSlotDesc d;
- if (p->storage.kind == CG_LOCAL_STORAGE_REG)
- opt_home = local_home_for_preg(e->f, (PReg)p->storage.v.reg);
- if (opt_home) return map_slot(e, opt_home, p->loc);
- memset(&d, 0, sizeof d);
- d.type = p->type;
- d.name = p->name;
- d.loc = p->loc;
- d.size = p->size ? p->size : type_size_or(e->c, p->type, 8u);
- d.align = p->align ? p->align : type_align_or(e->c, p->type, 8u);
- d.kind = NATIVE_FRAME_SLOT_PARAM;
- if (p->flags & CG_LOCAL_ADDR_TAKEN) d.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN;
- if (p->flags & CG_LOCAL_MEMORY_REQUIRED)
- d.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED;
- return e->target->frame_slot(e->target, &d);
-}
-
static NativeLoc loc_for_preg(NativeEmitCtx* e, PReg preg, CfreeCgTypeId type,
SrcLoc loc) {
u8 kind = opt_preg_alloc_kind(e->f, preg);
@@ -569,35 +539,24 @@ static NativeLoc loc_for_preg(NativeEmitCtx* e, PReg preg, CfreeCgTypeId type,
}
static void bind_params(NativeEmitCtx* e) {
- u32 nregs = opt_reg_count(e->f);
- e->param_home_by_preg =
- arena_zarray(e->f->arena, NativeFrameSlot, nregs ? nregs : 1u);
for (u32 i = 0; i < e->f->nparams; ++i) {
IRParam* p = &e->f->params[i];
CGParamDesc sd = semantic_param_desc(p);
- NativeFrameSlot home = allocate_param_home(e, p);
- if (p->storage.kind == CG_LOCAL_STORAGE_REG && p->storage.v.reg < nregs)
- e->param_home_by_preg[p->storage.v.reg] = home;
- if (p->storage.kind == CG_LOCAL_STORAGE_FRAME)
- home = map_slot(e, p->storage.v.frame_slot, p->loc);
- if (e->target->bind_param) e->target->bind_param(e->target, &sd, home);
+ NativeLoc dst;
+ if (p->storage.kind == CG_LOCAL_STORAGE_REG)
+ dst = loc_for_preg(e, (PReg)p->storage.v.reg, p->type, p->loc);
+ else
+ dst = loc_frame(p->type, class_for_type(e, p->type),
+ map_slot(e, p->storage.v.frame_slot, p->loc));
+ if (e->target->bind_param) e->target->bind_param(e->target, &sd, dst);
}
}
+/* The parameter value is placed into its allocated location by bind_param at
+ * function entry; the IR_PARAM_DECL marker emits nothing. */
static void emit_param_decl(NativeEmitCtx* e, Inst* in) {
- IRParamDeclAux* aux = (IRParamDeclAux*)in->extra.aux;
- NativeFrameSlot home;
- NativeLoc src, dst;
- MemAccess mem;
- if (!aux || aux->desc.storage.kind != CG_LOCAL_STORAGE_REG) return;
- PReg preg = (PReg)aux->desc.storage.v.reg;
- if (!preg || preg >= opt_reg_count(e->f)) return;
- home = e->param_home_by_preg ? e->param_home_by_preg[preg] : 0u;
- if (!home) return;
- src = loc_frame(aux->desc.type, class_for_type(e, aux->desc.type), home);
- dst = loc_for_preg(e, preg, aux->desc.type, in->loc);
- mem = mem_for_type(e->c, aux->desc.type);
- write_loc(e, dst, src, mem, in->loc);
+ (void)e;
+ (void)in;
}
static NativeFrameSlot temp_slot(NativeEmitCtx* e, CfreeCgTypeId type,