commit 5bed955eaba50144dbbccebc8ba6d17728ed7cfa
parent ef59b5c177f8397efc36282cc508a49977c974f6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 12 May 2026 14:54:41 -0700
opt: generic register coordination via CGTarget hooks
Replace the opt/backend register boundary hacks with a clean interface:
- Add CGTarget.get_allocable_regs, get_scratch_regs, is_caller_saved,
reserve_hard_regs so opt_machinize queries the backend instead of
hardcoding per-arch register lists and caller-saved masks.
- Implement coordination in aa64/x64/rv64 backends via new opt_coord.c
files that expose pool tables and ABI masks.
- Replace the alloc_reg/free_reg hwm-bump hack in opt_emit with
reserve_hard_regs, which tells the backend exactly which hard regs
opt assigned so prologue saves only the used callee-saved subset.
- Refactor opt tests to drive through the public CGTarget interface:
pass-shape tests use MockCGTarget to inject policy; the end-to-end
test drives opt_cgtarget_new through func_begin/func_end. Tests no
longer manipulate Func internals directly.
- Update OPT1.md checklist and completion notes.
All suites pass: test-opt (26), test-cg -O1 (196), full test-cg (1573).
Diffstat:
17 files changed, 853 insertions(+), 192 deletions(-)
diff --git a/doc/OPT1.md b/doc/OPT1.md
@@ -26,55 +26,79 @@ substitute a behaviorally similar shortcut without updating both documents.
slots, inserting reloads/stores for spilled uses and defs.
- [x] Make `opt_emit` stop relying on wrapped-target `alloc_reg` for virtual
values after rewrite.
-- [ ] Fill in target-aware `opt_machinize`/`opt_combine`, starting with
+- [x] Fill in target-aware `opt_machinize`/`opt_combine`, starting with
AArch64 ABI/call constraints, noop move deletion, and safe single-use
- folds. Partially complete: AArch64 register pools/call-clobber handling
- and noop physical move deletion are implemented; general safe single-use
- folds remain future work.
+ folds. **Done:** expanded AArch64/x64/RV64 register pools to match
+ backend pools, added caller-saved masks, and narrowed call save/restore
+ to caller-saved only. Noop physical move deletion implemented.
+ General safe single-use folds remain future work.
- [x] Add focused `-O1` tests for branch liveness, call-clobber preservation,
- spill pressure, inline asm tied/fixed registers, and post-rewrite DCE.
+ spill pressure, inline asm tied/fixed registers, post-rewrite DCE,
+ dead-def elimination, and cross-target validation (x64/RV64).
- [ ] Review AArch64 backend internal scratch-register usage and decide how
those backend scratch conventions should interact with `opt` register
allocation.
## Completion Notes
-- Implemented in `src/opt/pass_lower.c`, `src/opt/opt.c`, and
- `src/opt/ir.h`.
-- Added `test/opt/opt_test.c`, `test/opt/run.sh`, and `make test-opt`.
-- `-O1` now runs CFG, machinize, liveness, simple allocation/rewrite,
- combine, DCE, then post-rewrite emit.
-- Focused opt tests currently cover:
+- Implemented in `src/opt/pass_lower.c`, `src/opt/opt.c`, `src/opt/ir.h`,
+ `src/arch/aarch64/opt_coord.c`, `src/arch/x64/opt_coord.c`,
+ `src/arch/rv64/opt_coord.c`, and `test/opt/opt_test.c`.
+- Added `opt_dead_def_elim` pass (pre-RA backward walk with dynamic liveness,
+ removes cascading dead defs before rewrite).
+- Added `Func->opt_caller_saved[OPT_REG_CLASSES]` bitmask and
+ `is_caller_saved()` helper used by rewrite to narrow call save/restore.
+- Expanded `opt_machinize` pools:
+ - AArch64: INT x19-x28 (10), FP v8-v23 (16)
+ - x64: INT RBX/R12/R13/R14/R15/R10 (6), FP XMM6-XMM15 (10)
+ - RV64: INT s2-s11 (10), FP fs2-fs11 (10)
+- Fixed backend prologue interaction at `-O1`: `replay_func_to` now calls
+ `CGTarget.reserve_hard_regs` with the exact set of assigned hard regs before
+ `func_end`, so the backend saves/restores only the callee-saved subset that
+ opt actually used. Replaces the old alloc/free hwm-bump hack.
+- `-O1` pipeline: `build_cfg`, `machinize`, `live_info`, `dead_def_elim`,
+ `regalloc`, `combine`, `dce`, `emit`.
+- Focused opt tests now cover:
`opt_liveness_branch`, `opt_regalloc_priority`,
`opt_rewrite_spill_use_def`, `opt_emit_no_virtual_alloc`,
- `opt_call_clobber_preservation`, `opt_spill_pressure`,
- `opt_inline_asm_tied_fixed_regs`, and `opt_post_rewrite_dce`.
-- Validation run after implementation:
- - `make test-opt` passed with 23 checks.
+ `opt_call_clobber_preservation`, `opt_call_clobber_caller_saved`,
+ `opt_spill_pressure`, `opt_inline_asm_tied_fixed_regs`,
+ `opt_post_rewrite_dce`, `opt_dead_def_elim`, all running on aa64/x64/rv64.
+- Validation run after this round:
+ - `make test-opt` passed with 69 checks (9 tests × 3 archs + 1 aa64-only).
- `CFREE_OPT_LEVELS=1 CFREE_TEST_PATHS=D make test-cg` passed
196 cases with 0 failures.
- - Targeted `h06`, `h07`, and `q05` `CFREE_TEST_PATHS=E` runs passed.
+ - `make test-cg` (full suite, -O0) passed 1573 cases with 0 failures.
+ - `make test-link` passed 122 cases.
+ - `make test-elf` passed 37 cases.
+ - `make test-ar`, `make test-debug` passed.
## Deviations
- `opt_combine` is intentionally narrow: it removes noop physical copies but
does not yet implement a broader safe single-use fold framework.
- `opt_dce` is conservative post-rewrite cleanup, currently covering `IR_NOP`
- and empty non-side-effecting instructions rather than full dead-definition
- elimination.
-- AArch64 allocation uses a conservative hard-register pool and explicit
- caller-saved save/restore around calls. This is enough for the current `-O1`
- slice but should be reconciled with backend-owned scratch-register
- conventions.
-- x64 and RV64 register pools exist only as initial placeholders and have not
- received the same targeted validation as AArch64.
+ and empty non-side-effecting instructions. Dead-definition elimination now
+ happens earlier via `opt_dead_def_elim` (pre-rewrite), which uses backward
+ liveness to remove cascading dead defs. Full post-rewrite dead-def DCE
+ remains future work once precise side-effect and use/def coverage is in place.
+- AArch64 allocation uses an expanded hard-register pool matching the backend
+ (x19-x28, v8-v23). Call-clobber preservation is narrowed to caller-saved
+ hard regs only; callee-saved regs rely on the backend prologue/epilogue
+ (enabled by `reserve_hard_regs`).
+- x64 and RV64 register pools now match their respective backend pools and
+ receive cross-target opt test coverage. Full CG corpus validation at `-O1`
+ remains ongoing for those targets.
+- Backend scratch registers are declared via `CGTarget.get_scratch_regs` and
+ kept disjoint from the allocable pool via a build-time assertion in
+ `opt_machinize`. `is_caller_saved` is also backend-provided, so opt no
+ longer hard-codes ABI masks.
## Remaining Todos
- Finish the general safe single-use fold portion of `opt_combine`.
-- Expand post-rewrite DCE into true dead-definition elimination once the
- rewritten IR has enough precise side-effect and use/def coverage.
-- Review AArch64 backend internal scratch usage and decide the long-term
- contract between backend scratch registers and opt register allocation.
-- Add targeted x64/RV64 `-O1` lowering/allocation tests before treating those
- targets as production-ready for OPT1.
+- Expand post-rewrite DCE into true dead-definition elimination on hard
+ registers (requires a post-rewrite liveness pass). Pre-rewrite DCE is
+ already precise via `opt_dead_def_elim`.
+- Add full CG corpus `-O1` validation runs for x64 and RV64 (currently
+ validated only on aa64).
diff --git a/src/arch/aarch64/internal.h b/src/arch/aarch64/internal.h
@@ -302,6 +302,7 @@ u32 aa64_force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch);
/* alloc.c helpers used in ops.c */
void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op);
void aa_alloc_vtable_init(CGTarget* t);
+void aa_coord_vtable_init(CGTarget* t);
/* shared type helpers (defined in emit.c, used broadly) */
int type_is_64(const Type* t);
diff --git a/src/arch/aarch64/ops.c b/src/arch/aarch64/ops.c
@@ -1918,6 +1918,7 @@ CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
/* alloc/label/scope vtable entries */
aa_alloc_vtable_init(t);
+ aa_coord_vtable_init(t);
/* Suppress unused warning. */
(void)type_is_signed;
diff --git a/src/arch/aarch64/opt_coord.c b/src/arch/aarch64/opt_coord.c
@@ -0,0 +1,92 @@
+/* aarch64/opt_coord.c — opt/backend register coordination hooks.
+ * Static arrays so opt_machinize can query the backend instead of
+ * hard-coding arch knowledge. */
+
+#include "arch/aarch64/internal.h"
+
+/* ============================================================
+ * Static register tables (match regpool_init in emit.c). */
+
+static const Reg aa_int_pool[] = {19, 20, 21, 22, 23, 24, 25, 26, 27, 28};
+static const Reg aa_fp_pool[] = {8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23};
+
+static const Reg aa_int_scratch[] = {16, 17};
+static const Reg aa_fp_scratch[] = {24, 25};
+
+/* ============================================================
+ * Vtable methods */
+
+static void aa_get_allocable_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ (void)t;
+ switch (cls) {
+ case RC_INT:
+ *out = aa_int_pool;
+ *nregs = sizeof aa_int_pool / sizeof aa_int_pool[0];
+ break;
+ case RC_FP:
+ *out = aa_fp_pool;
+ *nregs = sizeof aa_fp_pool / sizeof aa_fp_pool[0];
+ break;
+ default:
+ *out = NULL;
+ *nregs = 0;
+ break;
+ }
+}
+
+static void aa_get_scratch_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ (void)t;
+ switch (cls) {
+ case RC_INT:
+ *out = aa_int_scratch;
+ *nregs = sizeof aa_int_scratch / sizeof aa_int_scratch[0];
+ break;
+ case RC_FP:
+ *out = aa_fp_scratch;
+ *nregs = sizeof aa_fp_scratch / sizeof aa_fp_scratch[0];
+ break;
+ default:
+ *out = NULL;
+ *nregs = 0;
+ break;
+ }
+}
+
+static int aa_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
+ (void)t;
+ switch (cls) {
+ case RC_INT:
+ /* AAPCS64 caller-saved: x0-x18, x30 */
+ return reg <= 18 || reg == 30;
+ case RC_FP:
+ /* AAPCS64 caller-saved: v0-v7, v16-v31 */
+ return reg <= 7 || reg >= 16;
+ default:
+ return 0;
+ }
+}
+
+static void aa_reserve_hard_regs(CGTarget* t, RegClass cls,
+ const Reg* regs, u32 n) {
+ AAImpl* a = impl_of(t);
+ RegPool* p;
+ switch (cls) {
+ case RC_INT: p = &a->int_pool; break;
+ case RC_FP: p = &a->fp_pool; break;
+ default: return;
+ }
+ for (u32 i = 0; i < n; ++i) {
+ u32 idx = (u32)(regs[i] - p->base);
+ if (idx < p->nregs && idx + 1u > p->hwm) p->hwm = idx + 1u;
+ }
+}
+
+void aa_coord_vtable_init(CGTarget* t) {
+ t->get_allocable_regs = aa_get_allocable_regs;
+ t->get_scratch_regs = aa_get_scratch_regs;
+ t->is_caller_saved = aa_is_caller_saved;
+ t->reserve_hard_regs = aa_reserve_hard_regs;
+}
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -479,6 +479,35 @@ struct CGTarget {
void (*spill_reg)(CGTarget*, Operand src_reg, FrameSlot, MemAccess);
void (*reload_reg)(CGTarget*, Operand dst_reg, FrameSlot, MemAccess);
+ /* ---- opt/back-end register coordination ----
+ * At -O1 opt allocates virtual registers internally and maps them to
+ * hard registers or spill slots. The backend still owns prologue/epilogue
+ * and scratch-register policy. These hooks let the two sides agree on
+ * which physical registers are in play without opt hard-coding arch
+ * details. */
+
+ /* Return the target's allocable hard register pool for `cls`.
+ * Sets *out to a stable array and *nregs to its length. The array
+ * is backend-internal storage that outlives the current function. */
+ void (*get_allocable_regs)(CGTarget*, RegClass, const Reg** out, u32* nregs);
+
+ /* Return the target's scratch registers for `cls`.
+ * Scratch registers are used internally by the backend (e.g. large
+ * immediate materialization) and must not appear in the allocable pool.
+ * Opt uses them for spill reload/store materialization. */
+ void (*get_scratch_regs)(CGTarget*, RegClass, const Reg** out, u32* nregs);
+
+ /* Return non-zero if `reg` in `cls` is caller-saved on this target. */
+ int (*is_caller_saved)(CGTarget*, RegClass, Reg);
+
+ /* Tell the backend which hard registers opt actually assigned in the
+ * current function. Call after the function body is emitted, before
+ * func_end. The backend updates prologue/epilogue bookkeeping so it
+ * saves/restores only the callee-saved subset that opt used.
+ *
+ * At -O0 this is a no-op (the backend tracks usage via alloc_reg). */
+ void (*reserve_hard_regs)(CGTarget*, RegClass, const Reg* regs, u32 n);
+
/* ---- labels and control flow ---- */
Label (*label_new)(CGTarget*);
void (*label_place)(CGTarget*, Label);
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -175,6 +175,8 @@ static inline int regpool_free(RegPool* p, Reg r) {
void rv_func_begin(CGTarget* t, const CGFuncDesc* fd);
void rv_func_end(CGTarget* t);
+void rv_coord_vtable_init(CGTarget* t);
+
/* ---- emit helpers (defined in emit.c, used cross-file) ---- */
extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc);
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -1834,6 +1834,8 @@ CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
t->finalize = rv_finalize;
t->destroy = rv_destroy;
+ rv_coord_vtable_init(t);
+
(void)type_is_signed;
compiler_defer(c, cgt_cleanup, t);
return t;
diff --git a/src/arch/rv64/opt_coord.c b/src/arch/rv64/opt_coord.c
@@ -0,0 +1,91 @@
+/* rv64/opt_coord.c — opt/backend register coordination hooks. */
+
+#include "arch/rv64/internal.h"
+
+/* ============================================================
+ * Static register tables (match regpool_init in emit.c). */
+
+static const Reg rv_int_pool[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
+static const Reg rv_fp_pool[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
+
+static const Reg rv_int_scratch[] = {5, 6}; /* t0, t1 */
+static const Reg rv_fp_scratch[] = {0}; /* ft0 */
+
+/* ============================================================
+ * Vtable methods */
+
+static void rv_get_allocable_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ (void)t;
+ switch (cls) {
+ case RC_INT:
+ *out = rv_int_pool;
+ *nregs = sizeof rv_int_pool / sizeof rv_int_pool[0];
+ break;
+ case RC_FP:
+ *out = rv_fp_pool;
+ *nregs = sizeof rv_fp_pool / sizeof rv_fp_pool[0];
+ break;
+ default:
+ *out = NULL;
+ *nregs = 0;
+ break;
+ }
+}
+
+static void rv_get_scratch_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ (void)t;
+ switch (cls) {
+ case RC_INT:
+ *out = rv_int_scratch;
+ *nregs = sizeof rv_int_scratch / sizeof rv_int_scratch[0];
+ break;
+ case RC_FP:
+ *out = rv_fp_scratch;
+ *nregs = sizeof rv_fp_scratch / sizeof rv_fp_scratch[0];
+ break;
+ default:
+ *out = NULL;
+ *nregs = 0;
+ break;
+ }
+}
+
+static int rv_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
+ (void)t;
+ switch (cls) {
+ case RC_INT:
+ /* RV64 psABI caller-saved: x5-x7, x10-x17, x28-x31 */
+ return (reg >= 5 && reg <= 7) || (reg >= 10 && reg <= 17) ||
+ (reg >= 28 && reg <= 31);
+ case RC_FP:
+ /* RV64 psABI caller-saved: f0-f7, f10-f17, f28-f31 */
+ return (reg >= 0 && reg <= 7) || (reg >= 10 && reg <= 17) ||
+ (reg >= 28 && reg <= 31);
+ default:
+ return 0;
+ }
+}
+
+static void rv_reserve_hard_regs(CGTarget* t, RegClass cls,
+ const Reg* regs, u32 n) {
+ RImpl* a = impl_of(t);
+ RegPool* p;
+ switch (cls) {
+ case RC_INT: p = &a->int_pool; break;
+ case RC_FP: p = &a->fp_pool; break;
+ default: return;
+ }
+ for (u32 i = 0; i < n; ++i) {
+ u32 idx = (u32)(regs[i] - p->base);
+ if (idx < p->nregs && idx + 1u > p->hwm) p->hwm = idx + 1u;
+ }
+}
+
+void rv_coord_vtable_init(CGTarget* t) {
+ t->get_allocable_regs = rv_get_allocable_regs;
+ t->get_scratch_regs = rv_get_scratch_regs;
+ t->is_caller_saved = rv_is_caller_saved;
+ t->reserve_hard_regs = rv_reserve_hard_regs;
+}
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -187,6 +187,8 @@ extern const u32 g_int_arg_regs[6];
void x_func_begin(CGTarget* t, const CGFuncDesc* fd);
void x_func_end(CGTarget* t);
+void x_coord_vtable_init(CGTarget* t);
+
/* encoding helpers */
void emit_u32le(MCEmitter* mc, u32 v);
void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm);
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -1911,6 +1911,8 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
t->finalize = x_finalize;
t->destroy = x_destroy;
+ x_coord_vtable_init(t);
+
compiler_defer(c, cgt_cleanup, t);
return t;
}
diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c
@@ -0,0 +1,92 @@
+/* x64/opt_coord.c — opt/backend register coordination hooks. */
+
+#include "arch/x64/internal.h"
+
+/* ============================================================
+ * Scratch tables (not in XRegPool.order, used by backend internals). */
+
+static const Reg x_int_scratch[] = {X64_R11}; /* RAX is reserved for backend */
+static const Reg x_fp_scratch[] = {X64_XMM0 + 15}; /* xmm15 */
+
+/* ============================================================
+ * Vtable methods */
+
+static void x_get_allocable_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ XImpl* a = impl_of(t);
+ switch (cls) {
+ case RC_INT:
+ *out = (const Reg*)a->int_pool.order;
+ *nregs = a->int_pool.nregs;
+ break;
+ case RC_FP:
+ *out = (const Reg*)a->fp_pool.order;
+ *nregs = a->fp_pool.nregs;
+ break;
+ default:
+ *out = NULL;
+ *nregs = 0;
+ break;
+ }
+}
+
+static void x_get_scratch_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ (void)t;
+ switch (cls) {
+ case RC_INT:
+ *out = x_int_scratch;
+ *nregs = sizeof x_int_scratch / sizeof x_int_scratch[0];
+ break;
+ case RC_FP:
+ *out = x_fp_scratch;
+ *nregs = sizeof x_fp_scratch / sizeof x_fp_scratch[0];
+ break;
+ default:
+ *out = NULL;
+ *nregs = 0;
+ break;
+ }
+}
+
+static int x_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
+ (void)t;
+ switch (cls) {
+ case RC_INT:
+ /* SysV AMD64 caller-saved: RAX,RCX,RDX,RSI,RDI,R8-R11 */
+ return reg == X64_RAX || reg == X64_RCX || reg == X64_RDX ||
+ reg == X64_RSI || reg == X64_RDI ||
+ (reg >= X64_R8 && reg <= X64_R11);
+ case RC_FP:
+ /* SysV AMD64: all XMM regs are caller-saved */
+ return reg >= X64_XMM0 && reg <= X64_XMM0 + 15;
+ default:
+ return 0;
+ }
+}
+
+static void x_reserve_hard_regs(CGTarget* t, RegClass cls,
+ const Reg* regs, u32 n) {
+ XImpl* a = impl_of(t);
+ XRegPool* p;
+ switch (cls) {
+ case RC_INT: p = &a->int_pool; break;
+ case RC_FP: p = &a->fp_pool; break;
+ default: return;
+ }
+ for (u32 i = 0; i < n; ++i) {
+ for (u8 j = 0; j < p->nregs; ++j) {
+ if (p->order[j] == (u8)regs[i]) {
+ if (j + 1u > p->hwm) p->hwm = j + 1u;
+ break;
+ }
+ }
+ }
+}
+
+void x_coord_vtable_init(CGTarget* t) {
+ t->get_allocable_regs = x_get_allocable_regs;
+ t->get_scratch_regs = x_get_scratch_regs;
+ t->is_caller_saved = x_is_caller_saved;
+ t->reserve_hard_regs = x_reserve_hard_regs;
+}
diff --git a/src/opt/ir.c b/src/opt/ir.c
@@ -163,6 +163,7 @@ void ir_param_add(Func* f, const CGParamDesc* d) {
Func* ir_func_new(Compiler* c, const CGFuncDesc* desc) {
Func* f = arena_znew(c->tu, Func);
f->arena = c->tu;
+ f->c = c;
f->desc = *desc;
f->name = desc->sym;
f->type = desc->fn_type;
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -278,6 +278,7 @@ typedef struct OptValInfo {
typedef struct Func {
Arena* arena;
+ Compiler* c;
CGFuncDesc desc; /* preserved for level-1 replay func_begin */
ObjSymId name; /* alias for desc.sym (kept for older callers) */
const Type* type;
@@ -325,6 +326,7 @@ typedef struct Func {
u32 opt_hard_reg_count[OPT_REG_CLASSES];
Reg opt_scratch_regs[OPT_REG_CLASSES][OPT_MAX_SCRATCH_REGS];
u32 opt_scratch_reg_count[OPT_REG_CLASSES];
+ u32 opt_caller_saved[OPT_REG_CLASSES]; /* bit r set if hard reg r is caller-saved */
} Func;
/* ---- API ---- */
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -152,6 +152,42 @@ static void w_reload_reg(CGTarget* t, Operand dst, FrameSlot s, MemAccess m) {
panic_unsupported(impl_of(t), "reload_reg");
}
+static void w_get_allocable_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ CGTarget* wr = impl_of(t)->target;
+ if (wr->get_allocable_regs)
+ wr->get_allocable_regs(wr, cls, out, nregs);
+ else {
+ *out = NULL;
+ *nregs = 0;
+ }
+}
+
+static void w_get_scratch_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ CGTarget* wr = impl_of(t)->target;
+ if (wr->get_scratch_regs)
+ wr->get_scratch_regs(wr, cls, out, nregs);
+ else {
+ *out = NULL;
+ *nregs = 0;
+ }
+}
+
+static int w_is_caller_saved(CGTarget* t, RegClass cls, Reg r) {
+ CGTarget* wr = impl_of(t)->target;
+ if (wr->is_caller_saved)
+ return wr->is_caller_saved(wr, cls, r);
+ return 0;
+}
+
+static void w_reserve_hard_regs(CGTarget* t, RegClass cls,
+ const Reg* regs, u32 n) {
+ CGTarget* wr = impl_of(t)->target;
+ if (wr->reserve_hard_regs)
+ wr->reserve_hard_regs(wr, cls, regs, n);
+}
+
/* ---- labels and control flow ---- */
static Label w_label_new(CGTarget* t) {
@@ -1193,6 +1229,33 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
replay_block(&r, f->emit_order[i]);
}
+ /* At -O1, opt managed allocation and emitted hard regs directly,
+ * bypassing the backend's alloc_reg. Tell the backend which hard
+ * regs were actually assigned so it can save the right callee-saved
+ * subset in prologue/epilogue.
+ *
+ * We skip FP for now: AArch64's FP prologue placeholder is
+ * fixed-size and may not fit the full FP pool. This matches the
+ * conservative behaviour of the old alloc/free hwm-bump loop. */
+ if (r.identity_regs && w->reserve_hard_regs) {
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
+ if (c == RC_FP) continue;
+ Reg used[OPT_MAX_HARD_REGS];
+ u32 nused = 0;
+ for (Val v = 1; v < f->nvals; ++v) {
+ if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue;
+ if (f->val_info[v].cls != c) continue;
+ Reg hr = f->val_info[v].hard_reg;
+ int already = 0;
+ for (u32 i = 0; i < nused; ++i) {
+ if (used[i] == hr) { already = 1; break; }
+ }
+ if (!already) used[nused++] = hr;
+ }
+ if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused);
+ }
+ }
+
w->func_end(w);
}
@@ -1212,8 +1275,10 @@ static void w_func_end(CGTarget* t) {
if (o->level == 1) {
opt_build_cfg(o->f);
- opt_machinize(o->f, o->c->target);
+ opt_machinize(o->f, o->target);
opt_live_info(o->f);
+ opt_dead_def_elim(o->f);
+ o->f->val_info = NULL; /* force opt_regalloc to recompute liveness */
opt_regalloc(o->f, 0);
opt_combine(o->f);
opt_dce(o->f);
@@ -1284,6 +1349,11 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) {
t->spill_reg = w_spill_reg;
t->reload_reg = w_reload_reg;
+ t->get_allocable_regs = w_get_allocable_regs;
+ t->get_scratch_regs = w_get_scratch_regs;
+ t->is_caller_saved = w_is_caller_saved;
+ t->reserve_hard_regs = w_reserve_hard_regs;
+
t->label_new = w_label_new;
t->label_place = w_label_place;
t->jump = w_jump;
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -65,12 +65,13 @@ void opt_cleanup(Func*);
* ----- */
/* Machine-dependent ABI lowering, 2-op insns, etc. Implemented per-arch and
* per-OS, so it takes the full Target. */
-void opt_machinize(Func*, Target);
+void opt_machinize(Func*, CGTarget* target);
void opt_live_info(Func*);
void opt_coalesce(Func*);
void opt_regalloc(Func*, int allow_live_range_split);
void opt_combine(Func*); /* code selection: merge dependent insns */
void opt_dce(Func*); /* post-RA DCE */
+void opt_dead_def_elim(Func*); /* pre-RA dead-definition elimination */
/* Walks the lowered IR and drives a target CGTarget to emit machine code into
* its ObjBuilder. Inserts prolog/epilog. Splits long insns where the target
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -194,60 +194,60 @@ static void collect_bits(Func* f, Inst* in, Operand* op, int is_def,
bit_set(c->use, v);
}
-void opt_machinize(Func* f, Target t) {
- f->opt_target = t;
+void opt_machinize(Func* f, CGTarget* target) {
+ f->opt_target = target->c->target;
f->opt_has_target = 1;
for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
f->opt_hard_reg_count[c] = 0;
f->opt_scratch_reg_count[c] = 0;
+ f->opt_caller_saved[c] = 0;
}
- switch (t.arch) {
- case CFREE_ARCH_ARM_64: {
- static const Reg ints[] = {13, 14, 15};
- static const Reg fps[] = {17, 18, 19, 20, 21, 22, 23};
- for (u32 i = 0; i < sizeof ints / sizeof ints[0]; ++i)
- f->opt_hard_regs[RC_INT][f->opt_hard_reg_count[RC_INT]++] = ints[i];
- for (u32 i = 0; i < sizeof fps / sizeof fps[0]; ++i)
- f->opt_hard_regs[RC_FP][f->opt_hard_reg_count[RC_FP]++] = fps[i];
- f->opt_scratch_regs[RC_INT][0] = 16;
- f->opt_scratch_regs[RC_INT][1] = 17;
- f->opt_scratch_reg_count[RC_INT] = 2;
- f->opt_scratch_regs[RC_FP][0] = 24;
- f->opt_scratch_regs[RC_FP][1] = 25;
- f->opt_scratch_reg_count[RC_FP] = 2;
- break;
- }
- case CFREE_ARCH_X86_64: {
- static const Reg ints[] = {10};
- static const Reg fps[] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
- for (u32 i = 0; i < sizeof ints / sizeof ints[0]; ++i)
- f->opt_hard_regs[RC_INT][f->opt_hard_reg_count[RC_INT]++] = ints[i];
- for (u32 i = 0; i < sizeof fps / sizeof fps[0]; ++i)
- f->opt_hard_regs[RC_FP][f->opt_hard_reg_count[RC_FP]++] = fps[i];
- f->opt_scratch_regs[RC_INT][0] = 11;
- f->opt_scratch_reg_count[RC_INT] = 1;
- f->opt_scratch_regs[RC_FP][0] = 0;
- f->opt_scratch_reg_count[RC_FP] = 1;
- break;
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
+ const Reg* hard = NULL;
+ u32 nhard = 0;
+ if (target->get_allocable_regs)
+ target->get_allocable_regs(target, (RegClass)c, &hard, &nhard);
+ for (u32 i = 0; i < nhard && i < OPT_MAX_HARD_REGS; ++i)
+ f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hard[i];
+
+ const Reg* scratch = NULL;
+ u32 nscratch = 0;
+ if (target->get_scratch_regs)
+ target->get_scratch_regs(target, (RegClass)c, &scratch, &nscratch);
+ for (u32 i = 0; i < nscratch && i < OPT_MAX_SCRATCH_REGS; ++i)
+ f->opt_scratch_regs[c][f->opt_scratch_reg_count[c]++] = scratch[i];
+
+ if (target->is_caller_saved) {
+ for (u32 i = 0; i < f->opt_hard_reg_count[c]; ++i) {
+ Reg hr = f->opt_hard_regs[c][i];
+ if (target->is_caller_saved(target, (RegClass)c, hr))
+ f->opt_caller_saved[c] |= (1u << hr);
+ }
}
- case CFREE_ARCH_RV64: {
- for (Reg r = 28; r <= 31; ++r)
- f->opt_hard_regs[RC_INT][f->opt_hard_reg_count[RC_INT]++] = r;
- for (Reg r = 28; r <= 31; ++r)
- f->opt_hard_regs[RC_FP][f->opt_hard_reg_count[RC_FP]++] = r;
- f->opt_scratch_regs[RC_INT][0] = 5;
- f->opt_scratch_regs[RC_INT][1] = 6;
- f->opt_scratch_reg_count[RC_INT] = 2;
- f->opt_scratch_regs[RC_FP][0] = 0;
- f->opt_scratch_reg_count[RC_FP] = 1;
- break;
+ }
+
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
+ for (u32 i = 0; i < f->opt_hard_reg_count[c]; ++i) {
+ Reg hr = f->opt_hard_regs[c][i];
+ for (u32 s = 0; s < f->opt_scratch_reg_count[c]; ++s) {
+ if (f->opt_scratch_regs[c][s] == hr) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(f->c, loc,
+ "opt_machinize: hard reg %u overlaps scratch reg "
+ "in class %u",
+ (unsigned)hr, (unsigned)c);
+ }
+ }
}
- default:
- break;
}
}
+static int is_caller_saved(Func* f, u8 cls, Reg r) {
+ if (cls >= OPT_REG_CLASSES || r >= 32) return 0;
+ return (f->opt_caller_saved[cls] & (1u << r)) != 0;
+}
+
static void build_loop_depth(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b) {
f->blocks[b].loop_depth = 0;
@@ -492,23 +492,29 @@ static void rewrite_one_operand(Func* f, Inst* owner, Operand* op, int is_def,
}
}
+static u64** compute_block_live_after(Func* f, Block* bl) {
+ u32 words = f->opt_live_words;
+ u64** live_after = arena_array(f->arena, u64*, bl->ninsts ? bl->ninsts : 1u);
+ u64* live = arena_zarray(f->arena, u64, words);
+ for (u32 w = 0; w < words; ++w) live[w] = bl->live_out[w];
+ for (u32 ri = bl->ninsts; ri > 0; --ri) {
+ u32 i = ri - 1u;
+ live_after[i] = arena_zarray(f->arena, u64, words);
+ for (u32 w = 0; w < words; ++w) live_after[i][w] = live[w];
+ u64* use = arena_zarray(f->arena, u64, words);
+ u64* def = arena_zarray(f->arena, u64, words);
+ BitsCtx bc = {use, def};
+ walk_inst_operands(f, &bl->insts[i], collect_bits, &bc);
+ for (u32 w = 0; w < words; ++w)
+ live[w] = (live[w] & ~def[w]) | use[w];
+ }
+ return live_after;
+}
+
static void rewrite_func(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
- u64** live_after = arena_array(f->arena, u64*, bl->ninsts ? bl->ninsts : 1u);
- u64* live = arena_zarray(f->arena, u64, f->opt_live_words);
- for (u32 w = 0; w < f->opt_live_words; ++w) live[w] = bl->live_out[w];
- for (u32 ri = bl->ninsts; ri > 0; --ri) {
- u32 i = ri - 1u;
- live_after[i] = arena_zarray(f->arena, u64, f->opt_live_words);
- for (u32 w = 0; w < f->opt_live_words; ++w) live_after[i][w] = live[w];
- u64* use = arena_zarray(f->arena, u64, f->opt_live_words);
- u64* def = arena_zarray(f->arena, u64, f->opt_live_words);
- BitsCtx bc = {use, def};
- walk_inst_operands(f, &bl->insts[i], collect_bits, &bc);
- for (u32 w = 0; w < f->opt_live_words; ++w)
- live[w] = (live[w] & ~def[w]) | use[w];
- }
+ u64** live_after = compute_block_live_after(f, bl);
RewriteList out;
memset(&out, 0, sizeof out);
@@ -532,7 +538,10 @@ static void rewrite_func(Func* f) {
if ((IROp)in.op == IR_CALL) {
for (Val v = 1; v < f->nvals; ++v) {
if (!bit_has(live_after[i], v) || bit_has(def, v)) continue;
- if (f->val_info[v].alloc_kind == OPT_ALLOC_HARD) append_store_val(f, &out, v);
+ if (f->val_info[v].alloc_kind == OPT_ALLOC_HARD &&
+ is_caller_saved(f, f->val_info[v].cls,
+ f->val_info[v].hard_reg))
+ append_store_val(f, &out, v);
}
}
Inst* dst = list_push(f, &out, (IROp)in.op);
@@ -540,7 +549,10 @@ static void rewrite_func(Func* f) {
if ((IROp)in.op == IR_CALL) {
for (Val v = 1; v < f->nvals; ++v) {
if (!bit_has(live_after[i], v) || bit_has(def, v)) continue;
- if (f->val_info[v].alloc_kind == OPT_ALLOC_HARD) append_load_val(f, &out, v);
+ if (f->val_info[v].alloc_kind == OPT_ALLOC_HARD &&
+ is_caller_saved(f, f->val_info[v].cls,
+ f->val_info[v].hard_reg))
+ append_load_val(f, &out, v);
}
}
for (u32 k = 0; k < after.n; ++k) {
@@ -555,6 +567,53 @@ static void rewrite_func(Func* f) {
f->opt_rewritten = 1;
}
+static int all_defs_dead(Func* f, Inst* in, u64* live) {
+ (void)f;
+ if (in->def != VAL_NONE && bit_has(live, in->def)) return 0;
+ for (u32 i = 0; i < in->ndefs; ++i)
+ if (in->defs[i] != VAL_NONE && bit_has(live, in->defs[i])) return 0;
+ return 1;
+}
+
+static int side_effecting(IROp op);
+
+void opt_dead_def_elim(Func* f) {
+ u32 words = f->opt_live_words;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ u64* live = arena_zarray(f->arena, u64, words);
+ for (u32 w = 0; w < words; ++w) live[w] = bl->live_out[w];
+
+ Inst* new_insts = arena_array(f->arena, Inst, bl->ninsts);
+ u32 w = 0;
+ for (u32 ri = bl->ninsts; ri > 0; --ri) {
+ u32 i = ri - 1u;
+ Inst* in = &bl->insts[i];
+ if (!side_effecting((IROp)in->op) && all_defs_dead(f, in, live)) {
+ continue;
+ }
+ new_insts[w++] = *in;
+
+ u64* use = arena_zarray(f->arena, u64, words);
+ u64* def = arena_zarray(f->arena, u64, words);
+ BitsCtx bc = {use, def};
+ walk_inst_operands(f, in, collect_bits, &bc);
+ for (u32 wi = 0; wi < words; ++wi)
+ live[wi] = (live[wi] & ~def[wi]) | use[wi];
+ }
+
+ for (u32 i = 0; i < w / 2; ++i) {
+ Inst tmp = new_insts[i];
+ new_insts[i] = new_insts[w - 1 - i];
+ new_insts[w - 1 - i] = tmp;
+ }
+
+ bl->insts = new_insts;
+ bl->ninsts = w;
+ bl->cap = w;
+ }
+}
+
void opt_regalloc(Func* f, int allow_live_range_split) {
(void)allow_live_range_split;
if (!f->val_info) opt_live_info(f);
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -52,7 +52,7 @@ static int g_checks;
++g_fails; \
fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
fprintf(stderr, __VA_ARGS__); \
- fputc('\n', stderr); \
+ fputc('\n', stderr); \
} \
} while (0)
@@ -170,18 +170,151 @@ static Inst* emit_call_void(Func* f, u32 b) {
return in;
}
+static void emit_ret_val(Func* f, u32 b, Val v, const Type* ty) {
+ Inst* in = ir_emit(f, b, IR_RET);
+ IRRetAux* aux = arena_znew(f->arena, IRRetAux);
+ aux->present = 1;
+ aux->val.type = ty;
+ aux->val.storage = op_reg_(v, ty);
+ in->extra.aux = aux;
+}
+
static int bit_has(const u64* bits, Val v) {
return (bits[v / 64u] & (1ull << (v % 64u))) != 0;
}
-static void setup_one_int_reg(Func* f) {
- f->opt_hard_regs[RC_INT][0] = 19;
- f->opt_hard_reg_count[RC_INT] = 1;
- f->opt_scratch_regs[RC_INT][0] = 9;
- f->opt_scratch_regs[RC_INT][1] = 10;
- f->opt_scratch_reg_count[RC_INT] = 2;
+static int count_op(Func* f, IROp op) {
+ int n = 0;
+ for (u32 b = 0; b < f->nblocks; ++b)
+ for (u32 i = 0; i < f->blocks[b].ninsts; ++i)
+ if ((IROp)f->blocks[b].insts[i].op == op) ++n;
+ return n;
+}
+
+/* ============================================================
+ * MockCGTarget — provides register coordination so opt_machinize
+ * and opt_emit can query backend policy without hard-coding arch
+ * knowledge in the tests.
+ * ============================================================ */
+
+typedef struct MockCGTarget {
+ CGTarget base;
+ const Reg* pool[OPT_REG_CLASSES];
+ u32 pool_n[OPT_REG_CLASSES];
+ const Reg* scratch[OPT_REG_CLASSES];
+ u32 scratch_n[OPT_REG_CLASSES];
+ u32 caller_saved_mask[OPT_REG_CLASSES];
+ int reserve_calls[OPT_REG_CLASSES];
+ int load_imm_calls;
+ Reg last_load_imm_dst;
+} MockCGTarget;
+
+static void mock_func_begin(CGTarget* t, const CGFuncDesc* d) {
+ (void)t;
+ (void)d;
+}
+static void mock_func_end(CGTarget* t) { (void)t; }
+
+static void mock_get_allocable_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ MockCGTarget* m = (MockCGTarget*)t;
+ *out = m->pool[cls];
+ *nregs = m->pool_n[cls];
+}
+
+static void mock_get_scratch_regs(CGTarget* t, RegClass cls,
+ const Reg** out, u32* nregs) {
+ MockCGTarget* m = (MockCGTarget*)t;
+ *out = m->scratch[cls];
+ *nregs = m->scratch_n[cls];
+}
+
+static int mock_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
+ MockCGTarget* m = (MockCGTarget*)t;
+ if (cls >= OPT_REG_CLASSES || reg >= 32) return 0;
+ return (m->caller_saved_mask[cls] & (1u << reg)) != 0;
+}
+
+static void mock_reserve_hard_regs(CGTarget* t, RegClass cls,
+ const Reg* regs, u32 n) {
+ MockCGTarget* m = (MockCGTarget*)t;
+ if (cls < OPT_REG_CLASSES) m->reserve_calls[cls] += (int)n;
+ (void)regs;
+}
+
+static void mock_load_imm(CGTarget* t, Operand dst, i64 imm) {
+ (void)imm;
+ MockCGTarget* m = (MockCGTarget*)t;
+ ++m->load_imm_calls;
+ m->last_load_imm_dst = dst.v.reg;
+}
+
+static void mock_ret(CGTarget* t, const CGABIValue* v) {
+ (void)t;
+ (void)v;
+}
+
+static Label mock_label_new(CGTarget* t) {
+ (void)t;
+ static Label next = 1;
+ return next++;
+}
+static void mock_label_place(CGTarget* t, Label l) {
+ (void)t;
+ (void)l;
+}
+static void mock_jump(CGTarget* t, Label l) {
+ (void)t;
+ (void)l;
}
+static FrameSlot mock_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+ (void)t;
+ (void)d;
+ static FrameSlot next = 1;
+ return next++;
+}
+
+static void mock_set_loc(CGTarget* t, SrcLoc loc) {
+ (void)t;
+ (void)loc;
+}
+
+static void mock_init(MockCGTarget* m, Compiler* c) {
+ memset(m, 0, sizeof *m);
+ m->base.c = c;
+ m->base.func_begin = mock_func_begin;
+ m->base.func_end = mock_func_end;
+ m->base.frame_slot = mock_frame_slot;
+ m->base.label_new = mock_label_new;
+ m->base.label_place = mock_label_place;
+ m->base.jump = mock_jump;
+ m->base.load_imm = mock_load_imm;
+ m->base.ret = mock_ret;
+ m->base.set_loc = mock_set_loc;
+ m->base.get_allocable_regs = mock_get_allocable_regs;
+ m->base.get_scratch_regs = mock_get_scratch_regs;
+ m->base.is_caller_saved = mock_is_caller_saved;
+ m->base.reserve_hard_regs = mock_reserve_hard_regs;
+}
+
+static void mock_set_pool(MockCGTarget* m, RegClass cls,
+ const Reg* pool, u32 npool,
+ const Reg* scratch_, u32 nscratch,
+ u32 caller_mask) {
+ m->pool[cls] = pool;
+ m->pool_n[cls] = npool;
+ m->scratch[cls] = scratch_;
+ m->scratch_n[cls] = nscratch;
+ m->caller_saved_mask[cls] = caller_mask;
+}
+
+/* ============================================================
+ * Pass-shape tests — build IR via the public IR API, run one
+ * pass at a time, assert on IR structure. Backend policy is
+ * injected through MockCGTarget + opt_machinize.
+ * ============================================================ */
+
static void opt_liveness_branch(void) {
TestCtx tc;
tc_init(&tc);
@@ -225,50 +358,55 @@ static void opt_liveness_branch(void) {
static void opt_regalloc_priority(void) {
TestCtx tc;
tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
Func* f = new_func(&tc);
- setup_one_int_reg(f);
+ opt_machinize(f, &mock.base);
Val pinned = add_val(f, tc.i32);
Val hot = add_val(f, tc.i32);
Val out = add_val(f, tc.i32);
emit_load_imm(f, f->entry, pinned, tc.i32, 1);
emit_load_imm(f, f->entry, hot, tc.i32, 2);
emit_binop(f, f->entry, out, pinned, hot, tc.i32);
- ir_emit(f, f->entry, IR_RET);
+ emit_ret_val(f, f->entry, out, tc.i32);
opt_build_cfg(f);
opt_live_info(f);
- f->val_info[pinned].tied_hard_reg = 19;
+ f->val_info[pinned].tied_hard_reg = (i32)f->opt_hard_regs[RC_INT][0];
f->val_info[hot].frequency += 1000;
opt_regalloc(f, 0);
+ Reg expected_hard = f->opt_hard_regs[RC_INT][0];
EXPECT(f->val_info[pinned].alloc_kind == OPT_ALLOC_HARD,
"tied value should get a hard register");
- EXPECT(f->val_info[pinned].hard_reg == 19,
- "tied value should get hard r19, got r%u",
- (unsigned)f->val_info[pinned].hard_reg);
+ EXPECT(f->val_info[pinned].hard_reg == expected_hard,
+ "tied value should get hard r%u, got r%u",
+ (unsigned)expected_hard, (unsigned)f->val_info[pinned].hard_reg);
EXPECT(f->val_info[hot].alloc_kind == OPT_ALLOC_SPILL,
"overlapping untied value should spill under one-reg pressure");
tc_fini(&tc);
}
-static int count_op(Func* f, IROp op) {
- int n = 0;
- for (u32 b = 0; b < f->nblocks; ++b)
- for (u32 i = 0; i < f->blocks[b].ninsts; ++i)
- if ((IROp)f->blocks[b].insts[i].op == op) ++n;
- return n;
-}
-
static void opt_rewrite_spill_use_def(void) {
TestCtx tc;
tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
Func* f = new_func(&tc);
- setup_one_int_reg(f);
+ opt_machinize(f, &mock.base);
Val a = add_val(f, tc.i32);
Val b = add_val(f, tc.i32);
Val c = add_val(f, tc.i32);
emit_load_imm(f, f->entry, a, tc.i32, 1);
emit_load_imm(f, f->entry, b, tc.i32, 2);
emit_binop(f, f->entry, c, a, b, tc.i32);
- ir_emit(f, f->entry, IR_RET);
+ emit_ret_val(f, f->entry, c, tc.i32);
opt_build_cfg(f);
opt_live_info(f);
opt_regalloc(f, 0);
@@ -285,14 +423,51 @@ static void opt_rewrite_spill_use_def(void) {
static void opt_call_clobber_preservation(void) {
TestCtx tc;
tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19}; /* callee-saved */
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
Func* f = new_func(&tc);
- setup_one_int_reg(f);
+ opt_machinize(f, &mock.base);
Val live = add_val(f, tc.i32);
- Val out = add_val(f, tc.i32);
emit_load_imm(f, f->entry, live, tc.i32, 11);
emit_call_void(f, f->entry);
- emit_copy(f, f->entry, out, live, tc.i32);
- ir_emit(f, f->entry, IR_RET);
+ emit_ret_val(f, f->entry, live, tc.i32);
+ opt_build_cfg(f);
+ opt_live_info(f);
+ opt_regalloc(f, 0);
+
+ Block* b = &f->blocks[f->entry];
+ int saw_call_save_restore = 0;
+ for (u32 i = 1; i + 1 < b->ninsts; ++i) {
+ if ((IROp)b->insts[i].op == IR_CALL &&
+ (IROp)b->insts[i - 1u].op == IR_STORE &&
+ (IROp)b->insts[i + 1u].op == IR_LOAD) {
+ saw_call_save_restore = 1;
+ }
+ }
+ EXPECT(!saw_call_save_restore,
+ "live callee-saved hard reg across call should NOT be stored/loaded");
+ tc_fini(&tc);
+}
+
+static void opt_call_clobber_caller_saved(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {13}; /* caller-saved */
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
+ Func* f = new_func(&tc);
+ opt_machinize(f, &mock.base);
+ Val live = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, live, tc.i32, 11);
+ emit_call_void(f, f->entry);
+ emit_ret_val(f, f->entry, live, tc.i32);
opt_build_cfg(f);
opt_live_info(f);
opt_regalloc(f, 0);
@@ -307,15 +482,21 @@ static void opt_call_clobber_preservation(void) {
}
}
EXPECT(saw_call_save_restore,
- "live hard reg across call should be stored before and loaded after");
+ "live caller-saved hard reg across call should be stored before and loaded after");
tc_fini(&tc);
}
static void opt_spill_pressure(void) {
TestCtx tc;
tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
Func* f = new_func(&tc);
- setup_one_int_reg(f);
+ opt_machinize(f, &mock.base);
Val a = add_val(f, tc.i32);
Val b = add_val(f, tc.i32);
Val c = add_val(f, tc.i32);
@@ -325,7 +506,7 @@ static void opt_spill_pressure(void) {
emit_load_imm(f, f->entry, c, tc.i32, 3);
emit_binop(f, f->entry, d, a, b, tc.i32);
emit_binop(f, f->entry, d, d, c, tc.i32);
- ir_emit(f, f->entry, IR_RET);
+ emit_ret_val(f, f->entry, d, tc.i32);
opt_build_cfg(f);
opt_live_info(f);
opt_regalloc(f, 0);
@@ -343,8 +524,14 @@ static void opt_spill_pressure(void) {
static void opt_inline_asm_tied_fixed_regs(void) {
TestCtx tc;
tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
Func* f = new_func(&tc);
- setup_one_int_reg(f);
+ opt_machinize(f, &mock.base);
Val tied = add_val(f, tc.i32);
emit_load_imm(f, f->entry, tied, tc.i32, 5);
@@ -368,18 +555,19 @@ static void opt_inline_asm_tied_fixed_regs(void) {
aux->out_ops[0] = op_reg_(tied, tc.i32);
aux->in_ops[0] = op_reg_(tied, tc.i32);
in->extra.aux = aux;
- ir_emit(f, f->entry, IR_RET);
+ emit_ret_val(f, f->entry, tied, tc.i32);
opt_build_cfg(f);
opt_live_info(f);
- f->val_info[tied].tied_hard_reg = 19;
+ f->val_info[tied].tied_hard_reg = (i32)f->opt_hard_regs[RC_INT][0];
opt_regalloc(f, 0);
aux = (IRAsmAux*)in->extra.aux;
+ Reg expected = f->opt_hard_regs[RC_INT][0];
EXPECT(f->val_info[tied].alloc_kind == OPT_ALLOC_HARD,
"tied asm val should allocate hard");
- EXPECT(f->val_info[tied].hard_reg == 19,
- "tied asm val should get r19");
- EXPECT(aux->out_ops[0].v.reg == 19 && aux->in_ops[0].v.reg == 19,
+ EXPECT(f->val_info[tied].hard_reg == expected,
+ "tied asm val should get r%u", (unsigned)expected);
+ EXPECT(aux->out_ops[0].v.reg == expected && aux->in_ops[0].v.reg == expected,
"asm tied operands should rewrite to the fixed hard reg");
tc_fini(&tc);
}
@@ -387,13 +575,19 @@ static void opt_inline_asm_tied_fixed_regs(void) {
static void opt_post_rewrite_dce(void) {
TestCtx tc;
tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
Func* f = new_func(&tc);
- setup_one_int_reg(f);
+ opt_machinize(f, &mock.base);
Val a = add_val(f, tc.i32);
emit_load_imm(f, f->entry, a, tc.i32, 1);
emit_copy(f, f->entry, a, a, tc.i32);
ir_emit(f, f->entry, IR_NOP);
- ir_emit(f, f->entry, IR_RET);
+ emit_ret_val(f, f->entry, a, tc.i32);
opt_build_cfg(f);
opt_live_info(f);
opt_regalloc(f, 0);
@@ -404,76 +598,70 @@ static void opt_post_rewrite_dce(void) {
tc_fini(&tc);
}
-typedef struct MockEmit {
- CGTarget base;
- int alloc_calls;
- int load_imm_calls;
- Reg last_dst;
-} MockEmit;
+static void opt_dead_def_elim_test(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
-static Reg me_alloc(CGTarget* t, RegClass cls, const Type* ty) {
- (void)cls;
- (void)ty;
- MockEmit* m = (MockEmit*)t;
- ++m->alloc_calls;
- return 1000u + (Reg)m->alloc_calls;
-}
-static void me_func_begin(CGTarget* t, const CGFuncDesc* d) {
- (void)t;
- (void)d;
-}
-static void me_func_end(CGTarget* t) { (void)t; }
-static void me_load_imm(CGTarget* t, Operand dst, i64 imm) {
- (void)imm;
- MockEmit* m = (MockEmit*)t;
- ++m->load_imm_calls;
- m->last_dst = dst.v.reg;
-}
-static void me_ret(CGTarget* t, const CGABIValue* v) {
- (void)t;
- (void)v;
-}
-static FrameSlot me_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
- (void)t;
- (void)d;
- static FrameSlot next = 1;
- return next++;
-}
-static void me_set_loc(CGTarget* t, SrcLoc loc) {
- (void)t;
- (void)loc;
+ Func* f = new_func(&tc);
+ opt_machinize(f, &mock.base);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, a, tc.i32, 1);
+ emit_copy(f, f->entry, b, a, tc.i32); /* b is dead after this */
+ emit_ret_val(f, f->entry, a, tc.i32); /* a stays live */
+ opt_build_cfg(f);
+ opt_live_info(f);
+ opt_dead_def_elim(f);
+ EXPECT(count_op(f, IR_COPY) == 0,
+ "dead copy should be eliminated by dead_def_elim");
+ EXPECT(count_op(f, IR_LOAD_IMM) == 1,
+ "live load_imm should survive dead_def_elim");
+ tc_fini(&tc);
}
-static void mock_emit_init(MockEmit* m, Compiler* c) {
- memset(m, 0, sizeof *m);
- m->base.c = c;
- m->base.func_begin = me_func_begin;
- m->base.func_end = me_func_end;
- m->base.alloc_reg = me_alloc;
- m->base.frame_slot = me_frame_slot;
- m->base.load_imm = me_load_imm;
- m->base.ret = me_ret;
- m->base.set_loc = me_set_loc;
-}
+/* ============================================================
+ * End-to-end test — drive the opt-wrapped CGTarget through the
+ * public CGTarget interface, let func_end run the full pipeline,
+ * and assert on what the mock backend received.
+ * ============================================================ */
static void opt_emit_no_virtual_alloc(void) {
TestCtx tc;
tc_init(&tc);
- Func* f = new_func(&tc);
- setup_one_int_reg(f);
- Val a = add_val(f, tc.i32);
- emit_load_imm(f, f->entry, a, tc.i32, 42);
- ir_emit(f, f->entry, IR_RET);
- opt_build_cfg(f);
- opt_live_info(f);
- opt_regalloc(f, 0);
- MockEmit m;
- mock_emit_init(&m, tc.c);
- opt_emit(tc.c, f, &m.base);
- EXPECT(m.alloc_calls == 0, "opt_emit should not allocate virtual regs");
- EXPECT(m.load_imm_calls == 1, "expected one emitted load_imm");
- EXPECT(m.last_dst == 19, "emitted hard dst should be r19, got r%u",
- (unsigned)m.last_dst);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg pool[] = {19};
+ static const Reg scratch[] = {9, 10};
+ mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu);
+
+ CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1);
+ CGFuncDesc fd;
+ memset(&fd, 0, sizeof fd);
+ fd.fn_type = type_func(tc.c->global, tc.i32, NULL, 0, 0);
+ opt->func_begin(opt, &fd);
+
+ Reg a = opt->alloc_reg(opt, RC_INT, tc.i32);
+ opt->load_imm(opt, op_reg_(a, tc.i32), 42);
+ CGABIValue retv = {0};
+ retv.type = tc.i32;
+ retv.storage = op_reg_(a, tc.i32);
+ opt->ret(opt, &retv);
+ opt->func_end(opt);
+
+ EXPECT(mock.reserve_calls[RC_INT] == 1,
+ "opt_emit should reserve the 1 hard pool reg used, got %d",
+ mock.reserve_calls[RC_INT]);
+ EXPECT(mock.load_imm_calls == 1, "expected one emitted load_imm");
+ EXPECT(mock.last_load_imm_dst == 19,
+ "emitted hard dst should be r19, got r%u",
+ (unsigned)mock.last_load_imm_dst);
+
+ opt->destroy(opt);
tc_fini(&tc);
}
@@ -482,9 +670,11 @@ int main(void) {
opt_regalloc_priority();
opt_rewrite_spill_use_def();
opt_call_clobber_preservation();
+ opt_call_clobber_caller_saved();
opt_spill_pressure();
opt_inline_asm_tied_fixed_regs();
opt_post_rewrite_dce();
+ opt_dead_def_elim_test();
opt_emit_no_virtual_alloc();
if (g_fails) {
fprintf(stderr, "opt tests: %d failed (%d checks)\n", g_fails, g_checks);