commit ef59b5c177f8397efc36282cc508a49977c974f6
parent 57f1dd7b4466ca7780e7231a501980836d83c462
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 12 May 2026 07:25:32 -0700
Implement OPT1 lowering and allocation
Diffstat:
| A | doc/OPT1.md | | | 80 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/opt/ir.h | | | 42 | ++++++++++++++++++++++++++++++++++++++++++ |
| M | src/opt/opt.c | | | 78 | ++++++++++++++++++++++++++++++++++++++++++++++++------------------------------ |
| A | src/opt/pass_lower.c | | | 670 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | test/opt/opt_test.c | | | 495 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | test/opt/run.sh | | | 5 | +++++ |
| M | test/test.mk | | | 11 | ++++++++++- |
7 files changed, 1350 insertions(+), 31 deletions(-)
diff --git a/doc/OPT1.md b/doc/OPT1.md
@@ -0,0 +1,80 @@
+# OPT1 Checklist
+
+This is the focused implementation checklist for the first production `-O1`
+slice. The fuller design in `doc/OPT.md` is the source of truth for pass
+semantics, phase order, and exit criteria, especially:
+
+- `doc/OPT.md` §3.3, "Lowering and Allocation"
+- `doc/OPT.md` §4.1, "`-O1` Minimal Schedule"
+- `doc/OPT.md` Phase A, "Production `-O1` Lowering"
+
+If this checklist is ambiguous, follow `doc/OPT.md`; do not reduce scope or
+substitute a behaviorally similar shortcut without updating both documents.
+
+- [x] Set up `test/opt/` as the first task. Add IR-level and mock-target
+ red-green tests before implementation:
+ - `opt_liveness_branch`
+ - `opt_regalloc_priority`
+ - `opt_rewrite_spill_use_def`
+ - `opt_emit_no_virtual_alloc`
+ These tests must assert pass/rewrite shape, not just executable output.
+- [x] Add the liveness data model: block `live_in`/`live_out`, value live
+ ranges, loop depth/frequency, and complete use/def walkers.
+- [x] Implement simple `opt_regalloc(..., false)`: priority allocation by
+ tied hard-reg needs, frequency, live length, then stable id.
+- [x] Add the rewrite pass: map virtual regs to hard regs or `FS_SPILL`
+ slots, inserting reloads/stores for spilled uses and defs.
+- [x] Make `opt_emit` stop relying on wrapped-target `alloc_reg` for virtual
+ values after rewrite.
+- [ ] Fill in target-aware `opt_machinize`/`opt_combine`, starting with
+ AArch64 ABI/call constraints, noop move deletion, and safe single-use
+ folds. Partially complete: AArch64 register pools/call-clobber handling
+ and noop physical move deletion are implemented; general safe single-use
+ folds remain future work.
+- [x] Add focused `-O1` tests for branch liveness, call-clobber preservation,
+ spill pressure, inline asm tied/fixed registers, and post-rewrite DCE.
+- [ ] Review AArch64 backend internal scratch-register usage and decide how
+ those backend scratch conventions should interact with `opt` register
+ allocation.
+
+## Completion Notes
+
+- Implemented in `src/opt/pass_lower.c`, `src/opt/opt.c`, and
+ `src/opt/ir.h`.
+- Added `test/opt/opt_test.c`, `test/opt/run.sh`, and `make test-opt`.
+- `-O1` now runs CFG, machinize, liveness, simple allocation/rewrite,
+ combine, DCE, then post-rewrite emit.
+- Focused opt tests currently cover:
+ `opt_liveness_branch`, `opt_regalloc_priority`,
+ `opt_rewrite_spill_use_def`, `opt_emit_no_virtual_alloc`,
+ `opt_call_clobber_preservation`, `opt_spill_pressure`,
+ `opt_inline_asm_tied_fixed_regs`, and `opt_post_rewrite_dce`.
+- Validation run after implementation:
+ - `make test-opt` passed with 23 checks.
+ - `CFREE_OPT_LEVELS=1 CFREE_TEST_PATHS=D make test-cg` passed
+ 196 cases with 0 failures.
+ - Targeted `h06`, `h07`, and `q05` `CFREE_TEST_PATHS=E` runs passed.
+
+## Deviations
+
+- `opt_combine` is intentionally narrow: it removes noop physical copies but
+ does not yet implement a broader safe single-use fold framework.
+- `opt_dce` is conservative post-rewrite cleanup, currently covering `IR_NOP`
+ and empty non-side-effecting instructions rather than full dead-definition
+ elimination.
+- AArch64 allocation uses a conservative hard-register pool and explicit
+ caller-saved save/restore around calls. This is enough for the current `-O1`
+ slice but should be reconciled with backend-owned scratch-register
+ conventions.
+- x64 and RV64 register pools exist only as initial placeholders and have not
+ received the same targeted validation as AArch64.
+
+## Remaining Todos
+
+- Finish the general safe single-use fold portion of `opt_combine`.
+- Expand post-rewrite DCE into true dead-definition elimination once the
+ rewritten IR has enough precise side-effect and use/def coverage.
+- Review AArch64 backend internal scratch usage and decide the long-term
+ contract between backend scratch registers and opt register allocation.
+- Add targeted x64/RV64 `-O1` lowering/allocation tests before treating those
+ targets as production-ready for OPT1.
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -244,8 +244,38 @@ typedef struct Block {
u32 npreds;
u32 succ[2];
u8 nsucc;
+ u8 loop_depth;
+ u16 pad;
+ u32 frequency;
+ u64* live_in;
+ u64* live_out;
+ u64* live_use;
+ u64* live_def;
} Block;
+typedef enum OptAllocKind {
+ OPT_ALLOC_NONE,
+ OPT_ALLOC_HARD,
+ OPT_ALLOC_SPILL,
+} OptAllocKind;
+
+typedef struct OptValInfo {
+ u32 first_pos;
+ u32 last_pos;
+ u32 live_length;
+ u32 frequency;
+ i32 tied_hard_reg; /* -1 means no fixed/tied physical register need. */
+ Reg hard_reg;
+ FrameSlot spill_slot;
+ u8 alloc_kind; /* OptAllocKind */
+ u8 cls; /* RegClass */
+ u8 pad[2];
+} OptValInfo;
+
+#define OPT_REG_CLASSES 3u
+#define OPT_MAX_HARD_REGS 32u
+#define OPT_MAX_SCRATCH_REGS 4u
+
typedef struct Func {
Arena* arena;
CGFuncDesc desc; /* preserved for level-1 replay func_begin */
@@ -283,6 +313,18 @@ typedef struct Func {
* skips them. */
u32* emit_order;
u32 emit_order_n, emit_order_cap;
+
+ Target opt_target;
+ u8 opt_has_target;
+ u8 opt_rewritten;
+ u16 opt_live_words;
+ u32 opt_position_count;
+ OptValInfo* val_info; /* indexed by Val, length nvals */
+
+ Reg opt_hard_regs[OPT_REG_CLASSES][OPT_MAX_HARD_REGS];
+ u32 opt_hard_reg_count[OPT_REG_CLASSES];
+ Reg opt_scratch_regs[OPT_REG_CLASSES][OPT_MAX_SCRATCH_REGS];
+ u32 opt_scratch_reg_count[OPT_REG_CLASSES];
} Func;
/* ---- API ---- */
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -4,10 +4,9 @@
* coincide with their IR counterparts (Reg ↔ Val, label ↔ block id,
* vslot ↔ IR FrameSlot, vscope ↔ scope_aux index — doc/OPT.md §5.1).
*
- * Phase 3 (current): on func_end the wrapper optionally runs
- * opt_build_cfg + opt_build_ssa (level 2; output discarded), then
- * replays each Func into the wrapped target. Level 1 skips the SSA
- * passes; in both cases replay is the level-1 1:1 walk.
+ * OPT1: level 1 records the CGTarget stream, runs the minimal backend
+ * lowering schedule, rewrites virtual regs to hard regs/spill slots,
+ * and emits the rewritten IR into the wrapped target.
*
* Methods the wrapper rejects under unbounded virtuals:
* - clobbers / spill_reg / reload_reg are CG -O0 register-pressure
@@ -739,7 +738,8 @@ static void w_set_loc(CGTarget* t, SrcLoc loc) {
* ============================================================ */
typedef struct ReplayCtx {
- OptImpl* o;
+ Compiler* c;
+ Func* f;
CGTarget* tgt;
Reg* val_to_reg;
FrameSlot* slot_map;
@@ -747,14 +747,16 @@ typedef struct ReplayCtx {
CGScope* scope_map;
u8* val_alloced;
u8* block_label_placed;
+ u8 identity_regs;
} ReplayCtx;
static Reg val_to_target_reg(ReplayCtx* r, Val v) {
- Func* f = r->o->f;
+ Func* f = r->f;
if (v == VAL_NONE) return REG_NONE;
+ if (r->identity_regs) return (Reg)v;
if (v >= f->nvals) {
SrcLoc loc = {0, 0, 0};
- compiler_panic(r->o->c, loc, "opt replay: Val %u out of range", v);
+ compiler_panic(r->c, loc, "opt replay: Val %u out of range", v);
}
if (!r->val_alloced[v]) {
r->val_to_reg[v] =
@@ -766,9 +768,9 @@ static Reg val_to_target_reg(ReplayCtx* r, Val v) {
static FrameSlot slot_to_target(ReplayCtx* r, FrameSlot vs) {
if (vs == FRAME_SLOT_NONE) return FRAME_SLOT_NONE;
- if (vs >= r->o->f->nframe_slots + 1u) {
+ if (vs >= r->f->nframe_slots + 1u) {
SrcLoc loc = {0, 0, 0};
- compiler_panic(r->o->c, loc, "opt replay: vslot %u out of range",
+ compiler_panic(r->c, loc, "opt replay: vslot %u out of range",
(unsigned)vs);
}
return r->slot_map[vs];
@@ -809,7 +811,7 @@ static CGABIValue xlat_abivalue(ReplayCtx* r, const CGABIValue* in,
}
static Label ensure_label(ReplayCtx* r, u32 b) {
- if (b >= r->o->f->nblocks) return LABEL_NONE;
+ if (b >= r->f->nblocks) return LABEL_NONE;
if (r->label_map[b] == LABEL_NONE) {
r->label_map[b] = r->tgt->label_new(r->tgt);
}
@@ -819,7 +821,7 @@ static Label ensure_label(ReplayCtx* r, u32 b) {
static void ensure_label_placed(ReplayCtx* r, u32 b) {
if (r->block_label_placed[b]) return;
r->block_label_placed[b] = 1;
- if (b == r->o->f->entry) return;
+ if (b == r->f->entry) return;
Label l = ensure_label(r, b);
r->tgt->label_place(r->tgt, l);
}
@@ -841,13 +843,13 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
Operand* in_ops_ = NULL;
Operand* out_ops_ = NULL;
if (aux->nin) {
- in_ops_ = arena_array(r->o->f->arena, Operand, aux->nin);
+ in_ops_ = arena_array(r->f->arena, Operand, aux->nin);
for (u32 k = 0; k < aux->nin; ++k) {
in_ops_[k] = xlat_op(r, aux->in_ops[k]);
}
}
if (aux->nout) {
- out_ops_ = arena_array(r->o->f->arena, Operand, aux->nout);
+ out_ops_ = arena_array(r->f->arena, Operand, aux->nout);
for (u32 k = 0; k < aux->nout; ++k) {
out_ops_[k] = xlat_op(r, aux->out_ops[k]);
}
@@ -956,11 +958,11 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
cd.callee = xlat_op(r, cd.callee);
CGABIValue* args = NULL;
if (cd.nargs) {
- args = arena_array(r->o->f->arena, CGABIValue, cd.nargs);
+ args = arena_array(r->f->arena, CGABIValue, cd.nargs);
for (u32 k = 0; k < cd.nargs; ++k) {
CGABIPart* parts =
aux->desc.args[k].nparts
- ? arena_array(r->o->f->arena, CGABIPart,
+ ? arena_array(r->f->arena, CGABIPart,
aux->desc.args[k].nparts)
: NULL;
args[k] = xlat_abivalue(r, &aux->desc.args[k], parts);
@@ -971,14 +973,14 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
}
CGABIPart* ret_parts =
cd.ret.nparts
- ? arena_array(r->o->f->arena, CGABIPart, cd.ret.nparts)
+ ? arena_array(r->f->arena, CGABIPart, cd.ret.nparts)
: NULL;
cd.ret = xlat_abivalue(r, &aux->desc.ret, ret_parts);
w->call(w, &cd);
break;
}
case IR_BR: {
- Block* bl = &r->o->f->blocks[b];
+ Block* bl = &r->f->blocks[b];
if (bl->nsucc < 1) break;
Label l = ensure_label(r, bl->succ[0]);
w->jump(w, l);
@@ -987,7 +989,7 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
case IR_CMP_BRANCH: {
Operand a = xlat_op(r, in->opnds[0]);
Operand bo = xlat_op(r, in->opnds[1]);
- Block* bl = &r->o->f->blocks[b];
+ Block* bl = &r->f->blocks[b];
Label taken = ensure_label(r, bl->succ[0]);
w->cmp_branch(w, (CmpOp)in->extra.imm, a, bo, taken);
break;
@@ -999,7 +1001,7 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
} else {
CGABIPart* parts =
aux->val.nparts
- ? arena_array(r->o->f->arena, CGABIPart, aux->val.nparts)
+ ? arena_array(r->f->arena, CGABIPart, aux->val.nparts)
: NULL;
CGABIValue v = xlat_abivalue(r, &aux->val, parts);
w->ret(w, &v);
@@ -1114,9 +1116,9 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
case IR_INTRINSIC: {
IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
Operand* dsts =
- aux->ndst ? arena_array(r->o->f->arena, Operand, aux->ndst) : NULL;
+ aux->ndst ? arena_array(r->f->arena, Operand, aux->ndst) : NULL;
Operand* args =
- aux->narg ? arena_array(r->o->f->arena, Operand, aux->narg) : NULL;
+ aux->narg ? arena_array(r->f->arena, Operand, aux->narg) : NULL;
for (u32 k = 0; k < aux->ndst; ++k) dsts[k] = xlat_op(r, aux->dsts[k]);
for (u32 k = 0; k < aux->narg; ++k) args[k] = xlat_op(r, aux->args[k]);
w->intrinsic(w, aux->kind, dsts, aux->ndst, args, aux->narg);
@@ -1126,7 +1128,7 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
}
static void replay_block(ReplayCtx* r, u32 b) {
- Func* f = r->o->f;
+ Func* f = r->f;
if (b >= f->nblocks) return;
ensure_label_placed(r, b);
Block* bl = &f->blocks[b];
@@ -1135,13 +1137,12 @@ static void replay_block(ReplayCtx* r, u32 b) {
}
}
-static void replay_func(OptImpl* o) {
- Func* f = o->f;
- CGTarget* w = o->target;
-
+static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
ReplayCtx r;
- r.o = o;
+ r.c = c;
+ r.f = f;
r.tgt = w;
+ r.identity_regs = identity ? 1u : 0u;
u32 nv = f->nvals ? f->nvals : 1u;
r.val_to_reg = arena_zarray(f->arena, Reg, nv);
for (u32 i = 0; i < nv; ++i) r.val_to_reg[i] = REG_NONE;
@@ -1195,18 +1196,35 @@ static void replay_func(OptImpl* o) {
w->func_end(w);
}
+static void replay_func(OptImpl* o) {
+ replay_func_to(o->c, o->f, o->target, 0);
+}
+
+void opt_emit(Compiler* c, Func* f, CGTarget* target) {
+ replay_func_to(c, f, target, 1);
+}
+
/* ---- func_end: optionally run dry-run passes; replay; reset ---- */
static void w_func_end(CGTarget* t) {
OptImpl* o = impl_of(t);
if (!o->f) return;
- if (o->level >= 2) {
+ if (o->level == 1) {
+ opt_build_cfg(o->f);
+ opt_machinize(o->f, o->c->target);
+ opt_live_info(o->f);
+ opt_regalloc(o->f, 0);
+ opt_combine(o->f);
+ opt_dce(o->f);
+ opt_emit(o->c, o->f, o->target);
+ } else if (o->level >= 2) {
opt_build_cfg(o->f);
opt_build_ssa(o->f);
+ replay_func(o);
+ } else {
+ replay_func(o);
}
-
- replay_func(o);
o->f = NULL;
o->cur = 0;
}
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -0,0 +1,670 @@
+#include "opt/opt.h"
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/core.h"
+
+static u32 type_size_fallback(const Func* f, const Type* t) {
+ if (!t) return f->opt_target.ptr_size ? f->opt_target.ptr_size : 8u;
+ switch ((TypeKind)t->kind) {
+ case TY_BOOL:
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_UCHAR:
+ return 1;
+ case TY_SHORT:
+ case TY_USHORT:
+ return 2;
+ case TY_INT:
+ case TY_UINT:
+ case TY_FLOAT:
+ return 4;
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_DOUBLE:
+ case TY_PTR:
+ return 8;
+ case TY_INT128:
+ case TY_UINT128:
+ case TY_LDOUBLE:
+ return 16;
+ default:
+ return f->opt_target.ptr_size ? f->opt_target.ptr_size : 8u;
+ }
+}
+
+static u32 bit_words(u32 nvals) { return (nvals + 63u) / 64u; }
+
+static void bit_set(u64* bits, Val v) { bits[v / 64u] |= 1ull << (v % 64u); }
+static int bit_has(const u64* bits, Val v) {
+ return (bits[v / 64u] & (1ull << (v % 64u))) != 0;
+}
+static int bit_or_changed(u64* dst, const u64* src, u32 n) {
+ int changed = 0;
+ for (u32 i = 0; i < n; ++i) {
+ u64 old = dst[i];
+ dst[i] |= src[i];
+ changed |= dst[i] != old;
+ }
+ return changed;
+}
+static int bit_copy_changed(u64* dst, const u64* src, u32 n) {
+ int changed = 0;
+ for (u32 i = 0; i < n; ++i) {
+ changed |= dst[i] != src[i];
+ dst[i] = src[i];
+ }
+ return changed;
+}
+
+typedef void (*OperandWalkFn)(Func*, Inst*, Operand*, int is_def, void*);
+
+static int val_in_inst_defs(const Inst* in, Val v) {
+ if (v == VAL_NONE) return 0;
+ if (in->def == v) return 1;
+ for (u32 i = 0; i < in->ndefs; ++i)
+ if (in->defs[i] == v) return 1;
+ return 0;
+}
+
+static void walk_operand(Func* f, Inst* in, Operand* op, int is_def,
+ OperandWalkFn fn, void* ctx) {
+ if (!op) return;
+ if (op->kind == OPK_REG) {
+ fn(f, in, op, is_def, ctx);
+ } else if (op->kind == OPK_INDIRECT) {
+ Operand base = *op;
+ base.kind = OPK_REG;
+ base.cls = RC_INT;
+ base.v.reg = op->v.ind.base;
+ fn(f, in, &base, 0, ctx);
+ op->v.ind.base = base.v.reg;
+ }
+}
+
+static void walk_abivalue(Func* f, Inst* in, CGABIValue* v, int storage_def,
+ OperandWalkFn fn, void* ctx) {
+ walk_operand(f, in, &v->storage, storage_def, fn, ctx);
+ for (u32 i = 0; i < v->nparts; ++i)
+ walk_operand(f, in, (Operand*)&v->parts[i].op, storage_def, fn, ctx);
+}
+
+static void walk_inst_operands(Func* f, Inst* in, OperandWalkFn fn, void* ctx) {
+ for (u32 i = 0; i < in->nopnds; ++i) {
+ int is_def = 0;
+ if (in->opnds[i].kind == OPK_REG)
+ is_def = i == 0 && val_in_inst_defs(in, (Val)in->opnds[i].v.reg);
+ if (!is_def) walk_operand(f, in, &in->opnds[i], 0, fn, ctx);
+ }
+ for (u32 i = 0; i < in->nopnds; ++i) {
+ int is_def = 0;
+ if (in->opnds[i].kind == OPK_REG)
+ is_def = i == 0 && val_in_inst_defs(in, (Val)in->opnds[i].v.reg);
+ if (is_def) walk_operand(f, in, &in->opnds[i], 1, fn, ctx);
+ }
+
+ switch ((IROp)in->op) {
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) break;
+ walk_operand(f, in, &aux->desc.callee, 0, fn, ctx);
+ for (u32 i = 0; i < aux->desc.nargs; ++i)
+ walk_abivalue(f, in, (CGABIValue*)&aux->desc.args[i], 0, fn, ctx);
+ walk_abivalue(f, in, &aux->desc.ret, 1, fn, ctx);
+ break;
+ }
+ case IR_RET: {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ if (aux && aux->present) walk_abivalue(f, in, &aux->val, 0, fn, ctx);
+ break;
+ }
+ case IR_SCOPE_BEGIN: {
+ IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
+ if (aux) walk_operand(f, in, &aux->desc.cond, 0, fn, ctx);
+ break;
+ }
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 i = 0; i < aux->nin; ++i)
+ walk_operand(f, in, &aux->in_ops[i], 0, fn, ctx);
+ for (u32 i = 0; i < aux->nout; ++i)
+ walk_operand(f, in, &aux->out_ops[i], 1, fn, ctx);
+ break;
+ }
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 i = 0; i < aux->narg; ++i)
+ walk_operand(f, in, &aux->args[i], 0, fn, ctx);
+ for (u32 i = 0; i < aux->ndst; ++i)
+ walk_operand(f, in, &aux->dsts[i], 1, fn, ctx);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+typedef struct UseDefCtx {
+ u64* use;
+ u64* def;
+ u32 pos;
+ u32 freq;
+} UseDefCtx;
+
+static void collect_use_def(Func* f, Inst* in, Operand* op, int is_def,
+ void* arg) {
+ (void)in;
+ UseDefCtx* c = (UseDefCtx*)arg;
+ if (op->kind != OPK_REG) return;
+ Val v = (Val)op->v.reg;
+ if (v == VAL_NONE || v >= f->nvals) return;
+ if (!f->val_info) return;
+ OptValInfo* vi = &f->val_info[v];
+ if (vi->first_pos == 0 || c->pos < vi->first_pos) vi->first_pos = c->pos;
+ if (c->pos > vi->last_pos) vi->last_pos = c->pos;
+ vi->frequency += c->freq ? c->freq : 1u;
+ vi->cls = f->val_cls[v];
+ if (is_def) {
+ bit_set(c->def, v);
+ } else {
+ if (!bit_has(c->def, v)) bit_set(c->use, v);
+ }
+}
+
+typedef struct BitsCtx {
+ u64* use;
+ u64* def;
+} BitsCtx;
+
+static void collect_bits(Func* f, Inst* in, Operand* op, int is_def,
+ void* arg) {
+ (void)in;
+ BitsCtx* c = (BitsCtx*)arg;
+ if (op->kind != OPK_REG) return;
+ Val v = (Val)op->v.reg;
+ if (v == VAL_NONE || v >= f->nvals) return;
+ if (is_def)
+ bit_set(c->def, v);
+ else
+ bit_set(c->use, v);
+}
+
+void opt_machinize(Func* f, Target t) {
+ f->opt_target = t;
+ f->opt_has_target = 1;
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
+ f->opt_hard_reg_count[c] = 0;
+ f->opt_scratch_reg_count[c] = 0;
+ }
+
+ switch (t.arch) {
+ case CFREE_ARCH_ARM_64: {
+ static const Reg ints[] = {13, 14, 15};
+ static const Reg fps[] = {17, 18, 19, 20, 21, 22, 23};
+ for (u32 i = 0; i < sizeof ints / sizeof ints[0]; ++i)
+ f->opt_hard_regs[RC_INT][f->opt_hard_reg_count[RC_INT]++] = ints[i];
+ for (u32 i = 0; i < sizeof fps / sizeof fps[0]; ++i)
+ f->opt_hard_regs[RC_FP][f->opt_hard_reg_count[RC_FP]++] = fps[i];
+ f->opt_scratch_regs[RC_INT][0] = 16;
+ f->opt_scratch_regs[RC_INT][1] = 17;
+ f->opt_scratch_reg_count[RC_INT] = 2;
+ f->opt_scratch_regs[RC_FP][0] = 24;
+ f->opt_scratch_regs[RC_FP][1] = 25;
+ f->opt_scratch_reg_count[RC_FP] = 2;
+ break;
+ }
+ case CFREE_ARCH_X86_64: {
+ static const Reg ints[] = {10};
+ static const Reg fps[] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ for (u32 i = 0; i < sizeof ints / sizeof ints[0]; ++i)
+ f->opt_hard_regs[RC_INT][f->opt_hard_reg_count[RC_INT]++] = ints[i];
+ for (u32 i = 0; i < sizeof fps / sizeof fps[0]; ++i)
+ f->opt_hard_regs[RC_FP][f->opt_hard_reg_count[RC_FP]++] = fps[i];
+ f->opt_scratch_regs[RC_INT][0] = 11;
+ f->opt_scratch_reg_count[RC_INT] = 1;
+ f->opt_scratch_regs[RC_FP][0] = 0;
+ f->opt_scratch_reg_count[RC_FP] = 1;
+ break;
+ }
+ case CFREE_ARCH_RV64: {
+ for (Reg r = 28; r <= 31; ++r)
+ f->opt_hard_regs[RC_INT][f->opt_hard_reg_count[RC_INT]++] = r;
+ for (Reg r = 28; r <= 31; ++r)
+ f->opt_hard_regs[RC_FP][f->opt_hard_reg_count[RC_FP]++] = r;
+ f->opt_scratch_regs[RC_INT][0] = 5;
+ f->opt_scratch_regs[RC_INT][1] = 6;
+ f->opt_scratch_reg_count[RC_INT] = 2;
+ f->opt_scratch_regs[RC_FP][0] = 0;
+ f->opt_scratch_reg_count[RC_FP] = 1;
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static void build_loop_depth(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ f->blocks[b].loop_depth = 0;
+ f->blocks[b].frequency = 1;
+ }
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 s = 0; s < bl->nsucc; ++s) {
+ u32 h = bl->succ[s];
+ if (h > b || h >= f->nblocks) continue;
+ for (u32 k = h; k <= b; ++k)
+ if (f->blocks[k].loop_depth < 31) ++f->blocks[k].loop_depth;
+ }
+ }
+ for (u32 b = 0; b < f->nblocks; ++b)
+ f->blocks[b].frequency = 1u << f->blocks[b].loop_depth;
+}
+
+void opt_live_info(Func* f) {
+ f->opt_live_words = (u16)bit_words(f->nvals);
+ f->opt_position_count = 0;
+ u32 words = f->opt_live_words;
+ if (!f->val_info) f->val_info = arena_zarray(f->arena, OptValInfo, f->nvals);
+ for (u32 v = 0; v < f->nvals; ++v) {
+ f->val_info[v].first_pos = 0;
+ f->val_info[v].last_pos = 0;
+ f->val_info[v].live_length = 0;
+ f->val_info[v].frequency = 0;
+ f->val_info[v].tied_hard_reg = -1;
+ f->val_info[v].hard_reg = REG_NONE;
+ f->val_info[v].spill_slot = FRAME_SLOT_NONE;
+ f->val_info[v].alloc_kind = OPT_ALLOC_NONE;
+ f->val_info[v].cls = f->val_cls[v];
+ }
+ build_loop_depth(f);
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ bl->live_in = arena_zarray(f->arena, u64, words);
+ bl->live_out = arena_zarray(f->arena, u64, words);
+ bl->live_use = arena_zarray(f->arena, u64, words);
+ bl->live_def = arena_zarray(f->arena, u64, words);
+ UseDefCtx c;
+ memset(&c, 0, sizeof c);
+ c.use = bl->live_use;
+ c.def = bl->live_def;
+ c.freq = bl->frequency;
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ c.pos = ++f->opt_position_count;
+ walk_inst_operands(f, &bl->insts[i], collect_use_def, &c);
+ }
+ }
+ int changed;
+ do {
+ changed = 0;
+ for (u32 bi = f->nblocks; bi > 0; --bi) {
+ u32 b = bi - 1u;
+ Block* bl = &f->blocks[b];
+ u64* new_out = arena_zarray(f->arena, u64, words);
+ u64* new_in = arena_zarray(f->arena, u64, words);
+ for (u32 s = 0; s < bl->nsucc; ++s) {
+ u32 t = bl->succ[s];
+ if (t < f->nblocks) bit_or_changed(new_out, f->blocks[t].live_in, words);
+ }
+ for (u32 w = 0; w < words; ++w)
+ new_in[w] = bl->live_use[w] | (new_out[w] & ~bl->live_def[w]);
+ changed |= bit_copy_changed(bl->live_out, new_out, words);
+ changed |= bit_copy_changed(bl->live_in, new_in, words);
+ }
+ } while (changed);
+
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (Val v = 1; v < f->nvals; ++v) {
+ if (bit_has(bl->live_in, v) || bit_has(bl->live_out, v)) {
+ OptValInfo* vi = &f->val_info[v];
+ if (vi->first_pos == 0) vi->first_pos = 1;
+ if (vi->last_pos < f->opt_position_count)
+ vi->last_pos = f->opt_position_count;
+ vi->frequency += bl->frequency;
+ }
+ }
+ }
+ for (Val v = 1; v < f->nvals; ++v) {
+ OptValInfo* vi = &f->val_info[v];
+ if (vi->first_pos && vi->last_pos >= vi->first_pos)
+ vi->live_length = vi->last_pos - vi->first_pos + 1u;
+ }
+}
+
+static int ranges_overlap(const OptValInfo* a, const OptValInfo* b) {
+ if (!a->first_pos || !b->first_pos) return 0;
+ return a->first_pos <= b->last_pos && b->first_pos <= a->last_pos;
+}
+
+static int val_higher_priority(Func* f, Val a, Val b) {
+ OptValInfo* av = &f->val_info[a];
+ OptValInfo* bv = &f->val_info[b];
+ int at = av->tied_hard_reg >= 0;
+ int bt = bv->tied_hard_reg >= 0;
+ if (at != bt) return at > bt;
+ if (av->frequency != bv->frequency) return av->frequency > bv->frequency;
+ if (av->live_length != bv->live_length) return av->live_length < bv->live_length;
+ return a < b;
+}
+
+static int hard_conflicts(Func* f, Val* assigned, u32 nassigned, Val v, Reg r) {
+ for (u32 i = 0; i < nassigned; ++i) {
+ Val ov = assigned[i];
+ if (f->val_info[ov].alloc_kind != OPT_ALLOC_HARD) continue;
+ if (f->val_info[ov].hard_reg != r) continue;
+ if (ranges_overlap(&f->val_info[ov], &f->val_info[v])) return 1;
+ }
+ return 0;
+}
+
+static FrameSlot spill_slot_for(Func* f, Val v) {
+ if (f->val_info[v].spill_slot != FRAME_SLOT_NONE)
+ return f->val_info[v].spill_slot;
+ FrameSlotDesc d;
+ memset(&d, 0, sizeof d);
+ d.type = f->val_type[v];
+ d.size = type_size_fallback(f, f->val_type[v]);
+ d.align = d.size >= 8 ? 8 : d.size;
+ d.kind = FS_SPILL;
+ f->val_info[v].spill_slot = ir_frame_slot_new(f, &d);
+ return f->val_info[v].spill_slot;
+}
+
+typedef struct RewriteList {
+ Inst* data;
+ u32 n;
+ u32 cap;
+} RewriteList;
+
+static Inst* list_push(Func* f, RewriteList* l, IROp op) {
+ if (l->n == l->cap) {
+ u32 ncap = l->cap ? l->cap * 2u : 16u;
+ Inst* nb = arena_zarray(f->arena, Inst, ncap);
+ if (l->data) memcpy(nb, l->data, sizeof(Inst) * l->n);
+ l->data = nb;
+ l->cap = ncap;
+ }
+ Inst* in = &l->data[l->n++];
+ memset(in, 0, sizeof *in);
+ in->op = (u16)op;
+ return in;
+}
+
+static MemAccess spill_mem(Func* f, Val v) {
+ MemAccess m;
+ memset(&m, 0, sizeof m);
+ m.type = f->val_type[v];
+ m.size = type_size_fallback(f, f->val_type[v]);
+ m.align = m.size >= 8 ? 8 : m.size;
+ m.alias.kind = ALIAS_LOCAL;
+ m.alias.v.local_id = (i32)spill_slot_for(f, v);
+ return m;
+}
+
+static Operand spill_addr(Func* f, Val v) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_LOCAL;
+ o.cls = RC_INT;
+ o.type = f->val_type[v];
+ o.v.frame_slot = spill_slot_for(f, v);
+ return o;
+}
+
+static Operand hard_operand(Func* f, Val v) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_REG;
+ o.cls = f->val_info[v].cls;
+ o.type = f->val_type[v];
+ o.v.reg = f->val_info[v].hard_reg;
+ return o;
+}
+
+static void append_store_val(Func* f, RewriteList* out, Val v) {
+ Inst* st = list_push(f, out, IR_STORE);
+ st->opnds = arena_array(f->arena, Operand, 2);
+ st->opnds[0] = spill_addr(f, v);
+ st->opnds[1] = hard_operand(f, v);
+ st->nopnds = 2;
+ st->extra.mem = spill_mem(f, v);
+}
+
+static void append_load_val(Func* f, RewriteList* out, Val v) {
+ Inst* ld = list_push(f, out, IR_LOAD);
+ ld->opnds = arena_array(f->arena, Operand, 2);
+ ld->opnds[0] = hard_operand(f, v);
+ ld->opnds[1] = spill_addr(f, v);
+ ld->nopnds = 2;
+ ld->extra.mem = spill_mem(f, v);
+}
+
+static Reg scratch_for(Func* f, u8 cls, u32* next) {
+ u32 n = f->opt_scratch_reg_count[cls];
+ if (!n) return REG_NONE;
+ Reg r = f->opt_scratch_regs[cls][*next % n];
+ ++*next;
+ return r;
+}
+
+typedef struct RewriteCtx {
+ RewriteList* before;
+ RewriteList* after;
+ u32 next_scratch[OPT_REG_CLASSES];
+} RewriteCtx;
+
+static void rewrite_one_operand(Func* f, Inst* owner, Operand* op, int is_def,
+ void* arg) {
+ (void)owner;
+ RewriteCtx* c = (RewriteCtx*)arg;
+ if (op->kind != OPK_REG) return;
+ Val v = (Val)op->v.reg;
+ if (v == VAL_NONE || v >= f->nvals) return;
+ OptValInfo* vi = &f->val_info[v];
+ if (vi->alloc_kind == OPT_ALLOC_HARD) {
+ op->v.reg = vi->hard_reg;
+ return;
+ }
+ if (vi->alloc_kind != OPT_ALLOC_SPILL) return;
+ u8 cls = vi->cls;
+ Reg scratch = scratch_for(f, cls, &c->next_scratch[cls]);
+ op->v.reg = scratch;
+ if (!is_def) {
+ Inst* ld = list_push(f, c->before, IR_LOAD);
+ ld->opnds = arena_array(f->arena, Operand, 2);
+ ld->opnds[0] = *op;
+ ld->opnds[1] = spill_addr(f, v);
+ ld->nopnds = 2;
+ ld->extra.mem = spill_mem(f, v);
+ } else {
+ Inst* st = list_push(f, c->after, IR_STORE);
+ st->opnds = arena_array(f->arena, Operand, 2);
+ st->opnds[0] = spill_addr(f, v);
+ st->opnds[1] = *op;
+ st->nopnds = 2;
+ st->extra.mem = spill_mem(f, v);
+ }
+}
+
+static void rewrite_func(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ u64** live_after = arena_array(f->arena, u64*, bl->ninsts ? bl->ninsts : 1u);
+ u64* live = arena_zarray(f->arena, u64, f->opt_live_words);
+ for (u32 w = 0; w < f->opt_live_words; ++w) live[w] = bl->live_out[w];
+ for (u32 ri = bl->ninsts; ri > 0; --ri) {
+ u32 i = ri - 1u;
+ live_after[i] = arena_zarray(f->arena, u64, f->opt_live_words);
+ for (u32 w = 0; w < f->opt_live_words; ++w) live_after[i][w] = live[w];
+ u64* use = arena_zarray(f->arena, u64, f->opt_live_words);
+ u64* def = arena_zarray(f->arena, u64, f->opt_live_words);
+ BitsCtx bc = {use, def};
+ walk_inst_operands(f, &bl->insts[i], collect_bits, &bc);
+ for (u32 w = 0; w < f->opt_live_words; ++w)
+ live[w] = (live[w] & ~def[w]) | use[w];
+ }
+
+ RewriteList out;
+ memset(&out, 0, sizeof out);
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst in = bl->insts[i];
+ u64* def = arena_zarray(f->arena, u64, f->opt_live_words);
+ BitsCtx db = {arena_zarray(f->arena, u64, f->opt_live_words), def};
+ walk_inst_operands(f, &in, collect_bits, &db);
+ RewriteList before, after;
+ memset(&before, 0, sizeof before);
+ memset(&after, 0, sizeof after);
+ RewriteCtx ctx;
+ memset(&ctx, 0, sizeof ctx);
+ ctx.before = &before;
+ ctx.after = &after;
+ walk_inst_operands(f, &in, rewrite_one_operand, &ctx);
+ for (u32 k = 0; k < before.n; ++k) {
+ Inst* dst = list_push(f, &out, (IROp)before.data[k].op);
+ *dst = before.data[k];
+ }
+ if ((IROp)in.op == IR_CALL) {
+ for (Val v = 1; v < f->nvals; ++v) {
+ if (!bit_has(live_after[i], v) || bit_has(def, v)) continue;
+ if (f->val_info[v].alloc_kind == OPT_ALLOC_HARD) append_store_val(f, &out, v);
+ }
+ }
+ Inst* dst = list_push(f, &out, (IROp)in.op);
+ *dst = in;
+ if ((IROp)in.op == IR_CALL) {
+ for (Val v = 1; v < f->nvals; ++v) {
+ if (!bit_has(live_after[i], v) || bit_has(def, v)) continue;
+ if (f->val_info[v].alloc_kind == OPT_ALLOC_HARD) append_load_val(f, &out, v);
+ }
+ }
+ for (u32 k = 0; k < after.n; ++k) {
+ Inst* ad = list_push(f, &out, (IROp)after.data[k].op);
+ *ad = after.data[k];
+ }
+ }
+ bl->insts = out.data;
+ bl->ninsts = out.n;
+ bl->cap = out.cap;
+ }
+ f->opt_rewritten = 1;
+}
+
+void opt_regalloc(Func* f, int allow_live_range_split) {
+ (void)allow_live_range_split;
+ if (!f->val_info) opt_live_info(f);
+ Val* order = arena_array(f->arena, Val, f->nvals ? f->nvals : 1u);
+ u32 norder = 0;
+ for (Val v = 1; v < f->nvals; ++v)
+ if (f->val_info[v].first_pos) order[norder++] = v;
+ for (u32 i = 1; i < norder; ++i) {
+ Val key = order[i];
+ u32 j = i;
+ while (j > 0 && val_higher_priority(f, key, order[j - 1u])) {
+ order[j] = order[j - 1u];
+ --j;
+ }
+ order[j] = key;
+ }
+ Val* assigned = arena_array(f->arena, Val, norder ? norder : 1u);
+ u32 nassigned = 0;
+ for (u32 i = 0; i < norder; ++i) {
+ Val v = order[i];
+ OptValInfo* vi = &f->val_info[v];
+ u8 cls = vi->cls;
+ if (vi->tied_hard_reg >= 0 &&
+ !hard_conflicts(f, assigned, nassigned, v, (Reg)vi->tied_hard_reg)) {
+ vi->alloc_kind = OPT_ALLOC_HARD;
+ vi->hard_reg = (Reg)vi->tied_hard_reg;
+ assigned[nassigned++] = v;
+ continue;
+ }
+ int found = 0;
+ for (u32 r = 0; r < f->opt_hard_reg_count[cls]; ++r) {
+ Reg hr = f->opt_hard_regs[cls][r];
+ if (!hard_conflicts(f, assigned, nassigned, v, hr)) {
+ vi->alloc_kind = OPT_ALLOC_HARD;
+ vi->hard_reg = hr;
+ assigned[nassigned++] = v;
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ vi->alloc_kind = OPT_ALLOC_SPILL;
+ vi->spill_slot = spill_slot_for(f, v);
+ }
+ }
+ rewrite_func(f);
+}
+
+void opt_combine(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ u32 w = 0;
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op == IR_COPY && in->nopnds == 2 &&
+ in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG &&
+ in->opnds[0].v.reg == in->opnds[1].v.reg) {
+ continue;
+ }
+ bl->insts[w++] = *in;
+ }
+ bl->ninsts = w;
+ }
+}
+
+static int side_effecting(IROp op) {
+ switch (op) {
+ case IR_STORE:
+ case IR_AGG_COPY:
+ case IR_AGG_SET:
+ case IR_BITFIELD_STORE:
+ case IR_CALL:
+ case IR_BR:
+ case IR_CONDBR:
+ case IR_CMP_BRANCH:
+ case IR_RET:
+ case IR_SCOPE_BEGIN:
+ case IR_SCOPE_ELSE:
+ case IR_SCOPE_END:
+ case IR_BREAK_TO:
+ case IR_CONTINUE_TO:
+ case IR_VA_START:
+ case IR_VA_END:
+ case IR_VA_COPY:
+ case IR_SETJMP:
+ case IR_LONGJMP:
+ case IR_ATOMIC_STORE:
+ case IR_ATOMIC_RMW:
+ case IR_ATOMIC_CAS:
+ case IR_FENCE:
+ case IR_ASM_BLOCK:
+ case IR_INTRINSIC:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+void opt_dce(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ u32 w = 0;
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op == IR_NOP) continue;
+ if (!side_effecting((IROp)in->op) && in->def == VAL_NONE &&
+ in->ndefs == 0 && in->nopnds == 0)
+ continue;
+ bl->insts[w++] = *in;
+ }
+ bl->ninsts = w;
+ }
+}
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -0,0 +1,495 @@
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cfree.h>
+
+#include "abi/abi.h"
+#include "arch/arch.h"
+#include "core/core.h"
+#include "core/heap.h"
+#include "opt/ir.h"
+#include "opt/opt.h"
+#include "type/type.h"
+
+static void* h_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void h_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+static int g_fails;
+static int g_checks;
+
+#define EXPECT(cond, ...) \
+ do { \
+ ++g_checks; \
+ if (!(cond)) { \
+ ++g_fails; \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ } \
+ } while (0)
+
+typedef struct TestCtx {
+ Compiler cc;
+ Compiler* c;
+ const Type* i32;
+ const Type* i64;
+} TestCtx;
+
+static void tc_init(TestCtx* tc) {
+ CfreeEnv env;
+ memset(&env, 0, sizeof env);
+ env.heap = &g_heap;
+ env.diag = &g_diag;
+ env.now = -1;
+
+ CfreeTarget tgt;
+ memset(&tgt, 0, sizeof tgt);
+ tgt.arch = CFREE_ARCH_ARM_64;
+ tgt.os = CFREE_OS_LINUX;
+ tgt.obj = CFREE_OBJ_ELF;
+ tgt.ptr_size = 8;
+ tgt.ptr_align = 8;
+
+ static CfreeEnv s_env;
+ s_env = env;
+ compiler_init(&tc->cc, tgt, &s_env);
+ tc->c = &tc->cc;
+ tc->i32 = type_prim(tc->c->global, TY_INT);
+ tc->i64 = type_prim(tc->c->global, TY_LLONG);
+}
+
+static void tc_fini(TestCtx* tc) { compiler_fini(&tc->cc); }
+
+static Operand op_reg_(Reg r, const Type* ty) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_REG;
+ o.cls = RC_INT;
+ o.type = ty;
+ o.v.reg = r;
+ return o;
+}
+
+static Operand op_imm_(i64 v, const Type* ty) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_IMM;
+ o.cls = RC_INT;
+ o.type = ty;
+ o.v.imm = v;
+ return o;
+}
+
+static Func* new_func(TestCtx* tc) {
+ CGFuncDesc fd;
+ memset(&fd, 0, sizeof fd);
+ fd.fn_type = type_func(tc->c->global, tc->i32, NULL, 0, 0);
+ Func* f = ir_func_new(tc->c, &fd);
+ f->entry = ir_block_new(f);
+ ir_note_emit(f, f->entry);
+ return f;
+}
+
+static Val add_val(Func* f, const Type* ty) {
+ return ir_alloc_val(f, ty, RC_INT);
+}
+
+static Inst* emit_load_imm(Func* f, u32 b, Val dst, const Type* ty, i64 imm) {
+ Inst* in = ir_emit(f, b, IR_LOAD_IMM);
+ in->opnds = arena_array(f->arena, Operand, 1);
+ in->opnds[0] = op_reg_(dst, ty);
+ in->nopnds = 1;
+ in->def = dst;
+ in->type = ty;
+ in->extra.imm = imm;
+ f->val_def_block[dst] = b;
+ f->val_def_inst[dst] = f->blocks[b].ninsts - 1u;
+ return in;
+}
+
+static Inst* emit_copy(Func* f, u32 b, Val dst, Val src, const Type* ty) {
+ Inst* in = ir_emit(f, b, IR_COPY);
+ in->opnds = arena_array(f->arena, Operand, 2);
+ in->opnds[0] = op_reg_(dst, ty);
+ in->opnds[1] = op_reg_(src, ty);
+ in->nopnds = 2;
+ in->def = dst;
+ in->type = ty;
+ f->val_def_block[dst] = b;
+ f->val_def_inst[dst] = f->blocks[b].ninsts - 1u;
+ return in;
+}
+
+static Inst* emit_binop(Func* f, u32 b, Val dst, Val a, Val c, const Type* ty) {
+ Inst* in = ir_emit(f, b, IR_BINOP);
+ in->opnds = arena_array(f->arena, Operand, 3);
+ in->opnds[0] = op_reg_(dst, ty);
+ in->opnds[1] = op_reg_(a, ty);
+ in->opnds[2] = op_reg_(c, ty);
+ in->nopnds = 3;
+ in->def = dst;
+ in->type = ty;
+ in->extra.imm = BO_IADD;
+ f->val_def_block[dst] = b;
+ f->val_def_inst[dst] = f->blocks[b].ninsts - 1u;
+ return in;
+}
+
+static Inst* emit_call_void(Func* f, u32 b) {
+ Inst* in = ir_emit(f, b, IR_CALL);
+ IRCallAux* aux = arena_znew(f->arena, IRCallAux);
+ in->extra.aux = aux;
+ return in;
+}
+
+static int bit_has(const u64* bits, Val v) {
+ return (bits[v / 64u] & (1ull << (v % 64u))) != 0;
+}
+
+static void setup_one_int_reg(Func* f) {
+ f->opt_hard_regs[RC_INT][0] = 19;
+ f->opt_hard_reg_count[RC_INT] = 1;
+ f->opt_scratch_regs[RC_INT][0] = 9;
+ f->opt_scratch_regs[RC_INT][1] = 10;
+ f->opt_scratch_reg_count[RC_INT] = 2;
+}
+
+static void opt_liveness_branch(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 b0 = f->entry;
+ u32 b1 = ir_block_new(f);
+ u32 b2 = ir_block_new(f);
+ ir_note_emit(f, b1);
+ ir_note_emit(f, b2);
+ Val v = add_val(f, tc.i32);
+ Val c = add_val(f, tc.i32);
+ Val t = add_val(f, tc.i32);
+ Val u = add_val(f, tc.i32);
+ emit_load_imm(f, b0, v, tc.i32, 7);
+ emit_load_imm(f, b0, c, tc.i32, 1);
+ Inst* br = ir_emit(f, b0, IR_CMP_BRANCH);
+ br->opnds = arena_array(f->arena, Operand, 2);
+ br->opnds[0] = op_reg_(c, tc.i32);
+ br->opnds[1] = op_imm_(0, tc.i32);
+ br->nopnds = 2;
+ br->extra.imm = CMP_NE;
+ f->blocks[b0].succ[0] = b1;
+ f->blocks[b0].succ[1] = b2;
+ f->blocks[b0].nsucc = 2;
+ emit_copy(f, b1, t, v, tc.i32);
+ ir_emit(f, b1, IR_RET);
+ emit_copy(f, b2, u, v, tc.i32);
+ ir_emit(f, b2, IR_RET);
+
+ opt_build_cfg(f);
+ opt_live_info(f);
+
+ EXPECT(bit_has(f->blocks[b0].live_out, v), "v%u live_out of branch block",
+ v);
+ EXPECT(bit_has(f->blocks[b1].live_in, v), "v%u live_in true block", v);
+ EXPECT(bit_has(f->blocks[b2].live_in, v), "v%u live_in false block", v);
+ EXPECT(!bit_has(f->blocks[b1].live_out, v), "v%u dies in true block", v);
+ tc_fini(&tc);
+}
+
+static void opt_regalloc_priority(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ setup_one_int_reg(f);
+ Val pinned = add_val(f, tc.i32);
+ Val hot = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, pinned, tc.i32, 1);
+ emit_load_imm(f, f->entry, hot, tc.i32, 2);
+ emit_binop(f, f->entry, out, pinned, hot, tc.i32);
+ ir_emit(f, f->entry, IR_RET);
+ opt_build_cfg(f);
+ opt_live_info(f);
+ f->val_info[pinned].tied_hard_reg = 19;
+ f->val_info[hot].frequency += 1000;
+ opt_regalloc(f, 0);
+ EXPECT(f->val_info[pinned].alloc_kind == OPT_ALLOC_HARD,
+ "tied value should get a hard register");
+ EXPECT(f->val_info[pinned].hard_reg == 19,
+ "tied value should get hard r19, got r%u",
+ (unsigned)f->val_info[pinned].hard_reg);
+ EXPECT(f->val_info[hot].alloc_kind == OPT_ALLOC_SPILL,
+ "overlapping untied value should spill under one-reg pressure");
+ tc_fini(&tc);
+}
+
+static int count_op(Func* f, IROp op) {
+ int n = 0;
+ for (u32 b = 0; b < f->nblocks; ++b)
+ for (u32 i = 0; i < f->blocks[b].ninsts; ++i)
+ if ((IROp)f->blocks[b].insts[i].op == op) ++n;
+ return n;
+}
+
+static void opt_rewrite_spill_use_def(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ setup_one_int_reg(f);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ Val c = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, a, tc.i32, 1);
+ emit_load_imm(f, f->entry, b, tc.i32, 2);
+ emit_binop(f, f->entry, c, a, b, tc.i32);
+ ir_emit(f, f->entry, IR_RET);
+ opt_build_cfg(f);
+ opt_live_info(f);
+ opt_regalloc(f, 0);
+ EXPECT(f->opt_rewritten, "regalloc should rewrite pseudos");
+ EXPECT(count_op(f, IR_STORE) >= 1, "spill def should insert store");
+ EXPECT(count_op(f, IR_LOAD) >= 1, "spill use should insert reload");
+ int saw_spill_slot = 0;
+ for (u32 i = 0; i < f->nframe_slots; ++i)
+ if (f->frame_slots[i].kind == FS_SPILL) saw_spill_slot = 1;
+ EXPECT(saw_spill_slot, "rewrite should allocate FS_SPILL slot");
+ tc_fini(&tc);
+}
+
+static void opt_call_clobber_preservation(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ setup_one_int_reg(f);
+ Val live = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, live, tc.i32, 11);
+ emit_call_void(f, f->entry);
+ emit_copy(f, f->entry, out, live, tc.i32);
+ ir_emit(f, f->entry, IR_RET);
+ opt_build_cfg(f);
+ opt_live_info(f);
+ opt_regalloc(f, 0);
+
+ Block* b = &f->blocks[f->entry];
+ int saw_call_save_restore = 0;
+ for (u32 i = 1; i + 1 < b->ninsts; ++i) {
+ if ((IROp)b->insts[i].op == IR_CALL &&
+ (IROp)b->insts[i - 1u].op == IR_STORE &&
+ (IROp)b->insts[i + 1u].op == IR_LOAD) {
+ saw_call_save_restore = 1;
+ }
+ }
+ EXPECT(saw_call_save_restore,
+ "live hard reg across call should be stored before and loaded after");
+ tc_fini(&tc);
+}
+
+static void opt_spill_pressure(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ setup_one_int_reg(f);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ Val c = add_val(f, tc.i32);
+ Val d = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, a, tc.i32, 1);
+ emit_load_imm(f, f->entry, b, tc.i32, 2);
+ emit_load_imm(f, f->entry, c, tc.i32, 3);
+ emit_binop(f, f->entry, d, a, b, tc.i32);
+ emit_binop(f, f->entry, d, d, c, tc.i32);
+ ir_emit(f, f->entry, IR_RET);
+ opt_build_cfg(f);
+ opt_live_info(f);
+ opt_regalloc(f, 0);
+
+ int spills = 0;
+ for (Val v = 1; v < f->nvals; ++v)
+ if (f->val_info[v].alloc_kind == OPT_ALLOC_SPILL) ++spills;
+ EXPECT(spills >= 2, "one hard reg should force multiple spills, got %d",
+ spills);
+ EXPECT(count_op(f, IR_LOAD) >= 2, "spilled uses should reload");
+ EXPECT(count_op(f, IR_STORE) >= 2, "spilled defs should store");
+ tc_fini(&tc);
+}
+
+static void opt_inline_asm_tied_fixed_regs(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ setup_one_int_reg(f);
+ Val tied = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, tied, tc.i32, 5);
+
+ Inst* in = ir_emit(f, f->entry, IR_ASM_BLOCK);
+ IRAsmAux* aux = arena_znew(f->arena, IRAsmAux);
+ aux->tmpl = "add %0, %0, #1";
+ aux->nout = 1;
+ aux->nin = 1;
+ aux->outs = arena_array(f->arena, AsmConstraint, 1);
+ aux->ins = arena_array(f->arena, AsmConstraint, 1);
+ aux->out_ops = arena_array(f->arena, Operand, 1);
+ aux->in_ops = arena_array(f->arena, Operand, 1);
+ memset(aux->outs, 0, sizeof(AsmConstraint));
+ memset(aux->ins, 0, sizeof(AsmConstraint));
+ aux->outs[0].str = "+r";
+ aux->outs[0].dir = ASM_INOUT;
+ aux->outs[0].type = tc.i32;
+ aux->ins[0].str = "0";
+ aux->ins[0].dir = ASM_IN;
+ aux->ins[0].type = tc.i32;
+ aux->out_ops[0] = op_reg_(tied, tc.i32);
+ aux->in_ops[0] = op_reg_(tied, tc.i32);
+ in->extra.aux = aux;
+ ir_emit(f, f->entry, IR_RET);
+
+ opt_build_cfg(f);
+ opt_live_info(f);
+ f->val_info[tied].tied_hard_reg = 19;
+ opt_regalloc(f, 0);
+ aux = (IRAsmAux*)in->extra.aux;
+ EXPECT(f->val_info[tied].alloc_kind == OPT_ALLOC_HARD,
+ "tied asm val should allocate hard");
+ EXPECT(f->val_info[tied].hard_reg == 19,
+ "tied asm val should get r19");
+ EXPECT(aux->out_ops[0].v.reg == 19 && aux->in_ops[0].v.reg == 19,
+ "asm tied operands should rewrite to the fixed hard reg");
+ tc_fini(&tc);
+}
+
+static void opt_post_rewrite_dce(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ setup_one_int_reg(f);
+ Val a = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, a, tc.i32, 1);
+ emit_copy(f, f->entry, a, a, tc.i32);
+ ir_emit(f, f->entry, IR_NOP);
+ ir_emit(f, f->entry, IR_RET);
+ opt_build_cfg(f);
+ opt_live_info(f);
+ opt_regalloc(f, 0);
+ opt_combine(f);
+ opt_dce(f);
+ EXPECT(count_op(f, IR_COPY) == 0, "noop physical copy should be removed");
+ EXPECT(count_op(f, IR_NOP) == 0, "post-rewrite DCE should remove nops");
+ tc_fini(&tc);
+}
+
+typedef struct MockEmit {
+ CGTarget base;
+ int alloc_calls;
+ int load_imm_calls;
+ Reg last_dst;
+} MockEmit;
+
+static Reg me_alloc(CGTarget* t, RegClass cls, const Type* ty) {
+ (void)cls;
+ (void)ty;
+ MockEmit* m = (MockEmit*)t;
+ ++m->alloc_calls;
+ return 1000u + (Reg)m->alloc_calls;
+}
+static void me_func_begin(CGTarget* t, const CGFuncDesc* d) {
+ (void)t;
+ (void)d;
+}
+static void me_func_end(CGTarget* t) { (void)t; }
+static void me_load_imm(CGTarget* t, Operand dst, i64 imm) {
+ (void)imm;
+ MockEmit* m = (MockEmit*)t;
+ ++m->load_imm_calls;
+ m->last_dst = dst.v.reg;
+}
+static void me_ret(CGTarget* t, const CGABIValue* v) {
+ (void)t;
+ (void)v;
+}
+static FrameSlot me_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+ (void)t;
+ (void)d;
+ static FrameSlot next = 1;
+ return next++;
+}
+static void me_set_loc(CGTarget* t, SrcLoc loc) {
+ (void)t;
+ (void)loc;
+}
+
+static void mock_emit_init(MockEmit* m, Compiler* c) {
+ memset(m, 0, sizeof *m);
+ m->base.c = c;
+ m->base.func_begin = me_func_begin;
+ m->base.func_end = me_func_end;
+ m->base.alloc_reg = me_alloc;
+ m->base.frame_slot = me_frame_slot;
+ m->base.load_imm = me_load_imm;
+ m->base.ret = me_ret;
+ m->base.set_loc = me_set_loc;
+}
+
+static void opt_emit_no_virtual_alloc(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ setup_one_int_reg(f);
+ Val a = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, a, tc.i32, 42);
+ ir_emit(f, f->entry, IR_RET);
+ opt_build_cfg(f);
+ opt_live_info(f);
+ opt_regalloc(f, 0);
+ MockEmit m;
+ mock_emit_init(&m, tc.c);
+ opt_emit(tc.c, f, &m.base);
+ EXPECT(m.alloc_calls == 0, "opt_emit should not allocate virtual regs");
+ EXPECT(m.load_imm_calls == 1, "expected one emitted load_imm");
+ EXPECT(m.last_dst == 19, "emitted hard dst should be r19, got r%u",
+ (unsigned)m.last_dst);
+ tc_fini(&tc);
+}
+
+int main(void) {
+ opt_liveness_branch();
+ opt_regalloc_priority();
+ opt_rewrite_spill_use_def();
+ opt_call_clobber_preservation();
+ opt_spill_pressure();
+ opt_inline_asm_tied_fixed_regs();
+ opt_post_rewrite_dce();
+ opt_emit_no_virtual_alloc();
+ if (g_fails) {
+ fprintf(stderr, "opt tests: %d failed (%d checks)\n", g_fails, g_checks);
+ return 1;
+ }
+ printf("opt tests: PASS (%d checks)\n", g_checks);
+ return 0;
+}
diff --git a/test/opt/run.sh b/test/opt/run.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+"$ROOT/build/test/opt_test"
diff --git a/test/test.mk b/test/test.mk
@@ -29,7 +29,7 @@
# parse_asm / cfree_disasm_iter_* are still stubs; the harness builds
# and runs end-to-end so the wiring stays exercised. See doc/ASM.md.
-.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
+.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-lib-deps
@@ -177,6 +177,15 @@ test-link: lib $(ROUNDTRIP_BIN) $(ROUNDTRIP_BIN_MACHO) $(LINK_EXE_RUNNER) $(JIT_
test-cg: lib $(ROUNDTRIP_BIN) $(LINK_EXE_RUNNER) $(JIT_RUNNER)
bash test/cg/run.sh
+OPT_TEST_BIN = build/test/opt_test
+
+test-opt: $(OPT_TEST_BIN)
+ bash test/opt/run.sh
+
+$(OPT_TEST_BIN): test/opt/opt_test.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(DRIVER_CFLAGS) -Isrc test/opt/opt_test.c $(LIB_AR) -o $@
+
test-parse: lib $(PARSE_RUNNER) $(ROUNDTRIP_BIN) $(LINK_EXE_RUNNER) $(JIT_RUNNER)
bash test/parse/run.sh