commit 9bd61e8158c1156c1282a4265d7d31a53e5dfbd3
parent 1b065cf837d4d5833314562416492fa11d90b786
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 27 May 2026 12:38:56 -0700
opt: emit param_decls into a dedicated prologue block
When a function body begins with a loop, the body's first block is the
loop header and the back-edge targets it. With the IR_PARAM_DECL phantom
defs living in that block, two things miscompiled at -O1:
- liveness/regalloc read each param_decl as a per-iteration redefinition,
so an induction variable carried in a parameter register looked dead
across the loop and its register was reused in the body; and
- the emitter never places a label for the entry block, so the back-edge
into it resolved to displacement 0 and became a branch-to-self.
Both produced infinite loops (e.g. the matrix benchmark's freematrix at
-O1). Emit the param_decls into a dedicated prologue entry block that
falls through to the body so the loop header is never the entry block.
The prologue emits no code (param_decls are markers, the fall-through is
free, and the entry label is elided), and jump_cleanup keeps it separate
only when the body's first block is actually a loop header, so it is free
in the common case.
test-opt + test-toy: 1022 pass, 0 fail. Fixes the matrix -O1 hang.
Diffstat:
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/src/opt/cg_ir_lower.c b/src/opt/cg_ir_lower.c
@@ -350,9 +350,26 @@ static void make_blocks(CgIrLower* l, const u32* label_place) {
static void emit_param_decls(CgIrLower* l) {
if (!l->f->nparams || l->f->entry >= l->f->nblocks) return;
+ /* Emit the IR_PARAM_DECL phantom defs into a dedicated prologue block that
+ * falls through to the body, and make it the function entry. This keeps the
+ * parameter defs out of the body's first block, which matters when the body
+ * begins with a loop: that first block is then the loop header and the
+ * back-edge targets it. With the param_decls in the header, liveness reads
+ * each parameter as redefined every iteration (killing the liveness of an
+ * induction variable carried in a parameter register), and because the entry
+ * block's label is not placed by the emitter the back-edge resolves to a
+ * branch-to-self. Both miscompile loop-first functions at -O1. The prologue
+ * block emits no code (param_decls are markers, the fall-through is free, and
+ * the entry label is elided), so this is free in the common case. */
+ u32 prologue = ir_block_new(l->f);
+ l->f->entry = prologue;
+ ir_note_emit(l->f, prologue);
+ for (u32 i = l->f->emit_order_n - 1u; i > 0; --i)
+ l->f->emit_order[i] = l->f->emit_order[i - 1u];
+ l->f->emit_order[0] = prologue;
for (u32 i = 0; i < l->f->nparams; ++i) {
IRParam* p = &l->f->params[i];
- Inst* in = ir_emit(l->f, l->f->entry, IR_PARAM_DECL);
+ Inst* in = ir_emit(l->f, prologue, IR_PARAM_DECL);
IRParamDeclAux* aux = arena_znew(l->f->arena, IRParamDeclAux);
in->loc = p->loc;
in->type = p->type;