kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit c89c0ddd48dfe37accf41decefb7ec977713edab
parent 042552da5134e5ade4b7183f419ef24220a873ea
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 27 May 2026 08:09:37 -0700

opt: route incoming params straight into their allocated register

bind_param now receives the destination NativeLoc the allocator chose for each
parameter instead of a mandatory frame home. A register-allocated scalar param
is moved directly from its incoming arg register (or loaded from the stack)
into its hard register; address-taken / aggregate / spilled params still go to
a frame slot. This removes the store-to-home + reload-into-PReg round trip that
every parameter previously incurred. The IR_PARAM_DECL marker now emits nothing
(the value is placed at entry by bind_param), and the param-home bookkeeping
(allocate_param_home / local_home_for_preg / param_home_by_preg) is deleted.

Incoming arg registers are not in the allocable set, so a register destination
never aliases an incoming arg register and the per-param moves need no ordering.

Verified O0==O1 on register-pressure int/fp and address-taken programs; full
toy suite 1333 pass / 0 fail.

Diffstat:
Msrc/arch/aa64/native.c | 53++++++++++++++++++++++++++++++++++++++++-------------
Msrc/arch/native_target.h | 9++++++++-
Msrc/opt/pass_native_emit.c | 63+++++++++++----------------------------------------------------
3 files changed, 59 insertions(+), 66 deletions(-)

diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c @@ -2515,7 +2515,7 @@ static void aa_finalize(NativeTarget* t) { } static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p, - NativeFrameSlot home); + NativeLoc dst); /* Caller-saved allocables come first so the allocator prefers them (lower * spill_cost); callee-saved x19..x28 / v8..v15 are appended and only chosen @@ -2745,14 +2745,22 @@ NativeTarget* aa64_native_target_new(Compiler* c, ObjBuilder* obj, return t; } +/* Place the incoming parameter into `dst`: a hard register (the common + * register-allocated scalar case -> a single arg-reg move, or a stack load + * straight into the register), a frame slot (address-taken / aggregate / + * spilled), or nowhere (unused). Incoming arg registers are never allocable, + * so a register dst never aliases an incoming arg register. */ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p, - NativeFrameSlot home) { + NativeLoc dst) { AANativeTarget* a = aa_of(t); const ABIFuncInfo* abi = abi_cg_func_info(t->c->abi, a->func->fn_type); const ABIArgInfo* ai = p->index < abi->nparams ? &abi->params[p->index] : NULL; + int to_reg = dst.kind == NATIVE_LOC_REG; if (!ai || ai->kind == ABI_ARG_IGNORE) return; if (ai->kind == ABI_ARG_INDIRECT) { + NativeAddr d_addr, from; + AggregateAccess access; NativeLoc src = aa_reg_loc(p->type, NATIVE_REG_INT, a->next_param_int < 8u ? a->next_param_int++ : AA_TMP0); @@ -2765,12 +2773,12 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p, aa_emit_mem(a, 1, src, saddr, aa_mem_for_type(t, p->type, 8)); a->next_param_stack += 8u; } - NativeAddr dst, from; - AggregateAccess access; - memset(&dst, 0, sizeof dst); - dst.base_kind = NATIVE_ADDR_BASE_FRAME; - dst.base.frame = home; - dst.base_type = p->type; + if (dst.kind != NATIVE_LOC_FRAME) + aa_panic(a, "indirect parameter requires a frame destination"); + memset(&d_addr, 0, sizeof d_addr); + d_addr.base_kind = NATIVE_ADDR_BASE_FRAME; + d_addr.base.frame = dst.v.frame; + d_addr.base_type = p->type; memset(&from, 0, sizeof from); from.base_kind = NATIVE_ADDR_BASE_REG; from.base.reg = src.v.reg; @@ -2779,21 +2787,25 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p, access.type = p->type; access.size = p->size ? p->size : (u32)cg_type_size(t->c, p->type); access.align = p->align ? p->align : type_align32(t, p->type); - aa_copy_bytes(t, dst, from, access); + aa_copy_bytes(t, d_addr, from, access); return; } for (u32 i = 0; i < ai->nparts; ++i) { const ABIArgPart* part = &ai->parts[i]; NativeAllocClass cls = part->cls == ABI_CLASS_FP ? NATIVE_REG_FP : NATIVE_REG_INT; + int reg_dst = to_reg && (NativeAllocClass)dst.cls == cls; NativeLoc src; if (cls == NATIVE_REG_FP && a->next_param_fp < 8u) { src = aa_reg_loc(p->type, cls, a->next_param_fp++); } else if (cls == NATIVE_REG_INT && a->next_param_int < 8u) { src = aa_reg_loc(p->type, cls, a->next_param_int++); } else { - src = aa_reg_loc(p->type, cls, cls == NATIVE_REG_FP ? 16u : AA_TMP0); + /* Stack-passed part: load straight into the dst register when possible, + * otherwise a scratch for the store-to-frame path. */ + Reg tmp = reg_dst ? (Reg)dst.v.reg : (cls == NATIVE_REG_FP ? 16u : AA_TMP0); NativeAddr saddr; + src = aa_reg_loc(p->type, cls, tmp); a->next_param_stack = align_up_u32(a->next_param_stack, aa_part_stack_align(part)); memset(&saddr, 0, sizeof saddr); @@ -2804,16 +2816,31 @@ static void aa_bind_native_param(NativeTarget* t, const CGParamDesc* p, aa_emit_mem(a, 1, src, saddr, aa_mem_for_type(t, p->type, part->size)); a->next_param_stack += aa_part_stack_size(part); } - aa_store_part(t, aa_stack_loc(p->type, home, (i32)part->src_offset), src, 0, - part->size); + if (dst.kind == NATIVE_LOC_NONE) { + /* Unused parameter: only the ABI cursor advances. */ + } else if (to_reg) { + NativeLoc d = aa_reg_loc(dst.type ? dst.type : p->type, + (NativeAllocClass)dst.cls, (Reg)dst.v.reg); + if (!(src.kind == NATIVE_LOC_REG && src.v.reg == d.v.reg && + (NativeAllocClass)src.cls == (NativeAllocClass)d.cls)) + aa_move(t, d, src); + } else { + aa_store_part(t, aa_stack_loc(p->type, dst.v.frame, (i32)part->src_offset), + src, 0, part->size); + } } a->incoming_stack_size = align_up_u32(a->next_param_stack, 16u); } static void aa_bind_param(NativeDirectTarget* d, const CGParamDesc* p, CGLocal local, NativeDirectLocal* l) { + NativeLoc dst; (void)local; - aa_bind_native_param(d->native, p, l->home); + memset(&dst, 0, sizeof dst); + dst.kind = NATIVE_LOC_FRAME; + dst.type = p->type; + dst.v.frame = l->home; + aa_bind_native_param(d->native, p, dst); } static const char* aa_no_tail(NativeDirectTarget* d, const CGCallDesc* call) { diff --git a/src/arch/native_target.h b/src/arch/native_target.h @@ -282,7 +282,14 @@ struct NativeTarget { void (*func_end)(NativeTarget*); NativeFrameSlot (*frame_slot)(NativeTarget*, const NativeFrameSlotDesc*); - void (*bind_param)(NativeTarget*, const CGParamDesc*, NativeFrameSlot home); + /* Place the incoming parameter into `dst`. The caller (which has run register + * allocation) chooses the destination: a hard register (NATIVE_LOC_REG) for a + * register-allocated scalar param, a frame slot (NATIVE_LOC_FRAME) for an + * address-taken / spilled / aggregate param. NATIVE_LOC_NONE means the param + * is unused and only the ABI register/stack cursor must advance. Incoming arg + * registers are never allocable, so reg destinations never alias an incoming + * arg register and ordering across params is unconstrained. */ + void (*bind_param)(NativeTarget*, const CGParamDesc*, NativeLoc dst); MCLabel (*label_new)(NativeTarget*); void (*label_place)(NativeTarget*, MCLabel); diff --git a/src/opt/pass_native_emit.c b/src/opt/pass_native_emit.c @@ -22,7 +22,6 @@ typedef struct NativeEmitCtx { Func* f; NativeTarget* target; NativeFrameSlot* slot_map; - NativeFrameSlot* param_home_by_preg; MCLabel* labels; u8* label_placed; u32 max_outgoing; @@ -527,35 +526,6 @@ static CGParamDesc semantic_param_desc(const IRParam* p) { return out; } -static NativeFrameSlot local_home_for_preg(Func* f, PReg preg) { - for (u32 i = 0; i < f->nlocals; ++i) { - IRLocal* l = &f->locals[i]; - if (l->storage.kind == CG_LOCAL_STORAGE_REG && - (PReg)l->storage.v.reg == preg && l->home_slot) - return l->home_slot; - } - return NATIVE_FRAME_SLOT_NONE; -} - -static NativeFrameSlot allocate_param_home(NativeEmitCtx* e, const IRParam* p) { - NativeFrameSlot opt_home = NATIVE_FRAME_SLOT_NONE; - NativeFrameSlotDesc d; - if (p->storage.kind == CG_LOCAL_STORAGE_REG) - opt_home = local_home_for_preg(e->f, (PReg)p->storage.v.reg); - if (opt_home) return map_slot(e, opt_home, p->loc); - memset(&d, 0, sizeof d); - d.type = p->type; - d.name = p->name; - d.loc = p->loc; - d.size = p->size ? p->size : type_size_or(e->c, p->type, 8u); - d.align = p->align ? p->align : type_align_or(e->c, p->type, 8u); - d.kind = NATIVE_FRAME_SLOT_PARAM; - if (p->flags & CG_LOCAL_ADDR_TAKEN) d.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN; - if (p->flags & CG_LOCAL_MEMORY_REQUIRED) - d.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED; - return e->target->frame_slot(e->target, &d); -} - static NativeLoc loc_for_preg(NativeEmitCtx* e, PReg preg, CfreeCgTypeId type, SrcLoc loc) { u8 kind = opt_preg_alloc_kind(e->f, preg); @@ -569,35 +539,24 @@ static NativeLoc loc_for_preg(NativeEmitCtx* e, PReg preg, CfreeCgTypeId type, } static void bind_params(NativeEmitCtx* e) { - u32 nregs = opt_reg_count(e->f); - e->param_home_by_preg = - arena_zarray(e->f->arena, NativeFrameSlot, nregs ? nregs : 1u); for (u32 i = 0; i < e->f->nparams; ++i) { IRParam* p = &e->f->params[i]; CGParamDesc sd = semantic_param_desc(p); - NativeFrameSlot home = allocate_param_home(e, p); - if (p->storage.kind == CG_LOCAL_STORAGE_REG && p->storage.v.reg < nregs) - e->param_home_by_preg[p->storage.v.reg] = home; - if (p->storage.kind == CG_LOCAL_STORAGE_FRAME) - home = map_slot(e, p->storage.v.frame_slot, p->loc); - if (e->target->bind_param) e->target->bind_param(e->target, &sd, home); + NativeLoc dst; + if (p->storage.kind == CG_LOCAL_STORAGE_REG) + dst = loc_for_preg(e, (PReg)p->storage.v.reg, p->type, p->loc); + else + dst = loc_frame(p->type, class_for_type(e, p->type), + map_slot(e, p->storage.v.frame_slot, p->loc)); + if (e->target->bind_param) e->target->bind_param(e->target, &sd, dst); } } +/* The parameter value is placed into its allocated location by bind_param at + * function entry; the IR_PARAM_DECL marker emits nothing. */ static void emit_param_decl(NativeEmitCtx* e, Inst* in) { - IRParamDeclAux* aux = (IRParamDeclAux*)in->extra.aux; - NativeFrameSlot home; - NativeLoc src, dst; - MemAccess mem; - if (!aux || aux->desc.storage.kind != CG_LOCAL_STORAGE_REG) return; - PReg preg = (PReg)aux->desc.storage.v.reg; - if (!preg || preg >= opt_reg_count(e->f)) return; - home = e->param_home_by_preg ? e->param_home_by_preg[preg] : 0u; - if (!home) return; - src = loc_frame(aux->desc.type, class_for_type(e, aux->desc.type), home); - dst = loc_for_preg(e, preg, aux->desc.type, in->loc); - mem = mem_for_type(e->c, aux->desc.type); - write_loc(e, dst, src, mem, in->loc); + (void)e; + (void)in; } static NativeFrameSlot temp_slot(NativeEmitCtx* e, CfreeCgTypeId type,