commit b83a60ab03163c763a4e01b3d03435f784131628
parent 5a7642085de670403406e9ad6a3f29cbd73ef3e1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 14 May 2026 12:06:06 -0700
Move CG register allocation out of CGTarget
Diffstat:
18 files changed, 229 insertions(+), 178 deletions(-)
diff --git a/doc/OPT1.md b/doc/OPT1.md
@@ -24,7 +24,7 @@ substitute a behaviorally similar shortcut without updating both documents.
tied hard-reg needs, frequency, live length, then stable id.
- [x] Add the rewrite pass: map virtual regs to hard regs or `FS_SPILL`
slots, inserting reloads/stores for spilled uses and defs.
-- [x] Make `opt_emit` stop relying on wrapped-target `alloc_reg` for virtual
+- [x] Make `opt_emit` stop relying on wrapped-target register allocation for virtual
values after rewrite.
- [x] Fill in target-aware `opt_machinize`/`opt_combine`, starting with
AArch64 ABI/call constraints, noop move deletion, and safe single-use
diff --git a/src/api/cg.c b/src/api/cg.c
@@ -8,6 +8,7 @@
#include "api/cg_api.h"
#include "api/cg_type.h"
#include "arch/arch.h"
+#include "arch/regalloc.h"
#include "core/arena.h"
#include "core/heap.h"
#include "core/pool.h"
@@ -945,6 +946,7 @@ struct CfreeCg {
ObjBuilder* obj;
CGTarget* target;
MCEmitter* mc;
+ CGSimpleRegAlloc regalloc;
ApiSValue* stack;
u32 sp;
@@ -1223,6 +1225,61 @@ static ApiSValue* api_pick_victim(CfreeCg* g, u8 cls) {
static MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv);
+static void api_regalloc_begin(CfreeCg* g) {
+ CGTarget* T = g->target;
+ if (T->virtual_regs) {
+ cg_simple_regalloc_init_virtual(&g->regalloc);
+ return;
+ }
+ cg_simple_regalloc_init(&g->regalloc);
+ for (u32 c = 0; c < 3u; ++c) {
+ const Reg* regs = NULL;
+ u32 nregs = 0;
+ if (T->get_allocable_regs)
+ T->get_allocable_regs(T, (RegClass)c, ®s, &nregs);
+ if (regs && nregs)
+ cg_simple_regalloc_set_ordered(&g->regalloc, (RegClass)c, regs, nregs);
+ }
+}
+
+static void api_regalloc_finish(CfreeCg* g) {
+ if (cg_simple_regalloc_is_virtual(&g->regalloc)) return;
+ if (!g->target->reserve_hard_regs) return;
+ for (u32 c = 0; c < 3u; ++c) {
+ CGSimpleRegPool* p = &g->regalloc.pools[c];
+ Reg used[CG_SIMPLE_REGALLOC_MAX_REGS];
+ u32 nused = 0;
+ for (u32 i = 0; i < p->hwm && i < p->nregs; ++i) {
+ Reg r = cg_simple_regpool_reg_at(p, i);
+ if (r != (Reg)REG_NONE) used[nused++] = r;
+ }
+ if (nused) g->target->reserve_hard_regs(g->target, (RegClass)c, used, nused);
+ }
+}
+
+static Reg api_alloc_reg(CfreeCg* g, u8 cls) {
+ Reg r = cg_simple_regalloc_alloc(&g->regalloc, (RegClass)cls);
+ if (r == (Reg)REG_NONE && cg_simple_regalloc_is_virtual(&g->regalloc)) {
+ compiler_panic(g->c, g->cur_loc, "CfreeCg: virtual regalloc exhausted");
+ }
+ return r;
+}
+
+static void api_free_reg(CfreeCg* g, Reg r, u8 cls) {
+ int rc;
+ if (r == (Reg)REG_NONE) return;
+ rc = cg_simple_regalloc_free(&g->regalloc, (RegClass)cls, r);
+ if (rc == 1) return;
+ if (rc == -1) {
+ compiler_panic(g->c, g->cur_loc,
+ "CfreeCg: regalloc - reg %u already free in class %u",
+ (unsigned)r, (unsigned)cls);
+ }
+ compiler_panic(g->c, g->cur_loc,
+ "CfreeCg: regalloc - reg %u not in class %u pool",
+ (unsigned)r, (unsigned)cls);
+}
+
static int api_spill_avs_victim(CfreeCg* g, u8 cls) {
CGTarget* T = g->target;
if (!g->avs_in_flight) return 0;
@@ -1233,6 +1290,7 @@ static int api_spill_avs_victim(CfreeCg* g, u8 cls) {
FrameSlot slot = api_take_spill_slot(g, cls);
ApiSValue tmp = api_make_sv(av->storage, av->type);
T->spill_reg(T, av->storage, slot, api_mem_for_spill(g, &tmp));
+ api_free_reg(g, av->storage.v.reg, cls);
Operand local = api_op_local(slot, av->type);
local.cls = cls;
av->storage = local;
@@ -1286,7 +1344,9 @@ static MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv) {
static Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty) {
CGTarget* T = g->target;
- Reg r = T->alloc_reg(T, cls, ty);
+ Reg r;
+ (void)ty;
+ r = api_alloc_reg(g, cls);
if (r != (Reg)REG_NONE) return r;
ApiSValue* victim = api_pick_victim(g, cls);
@@ -1295,6 +1355,7 @@ static Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty) {
CfreeCgTypeId rty = api_owned_reg_type(g, victim);
Operand victim_reg = api_op_reg((Reg)api_reg_of_sv(victim), rty);
T->spill_reg(T, victim_reg, slot, api_mem_for_spill(g, victim));
+ api_free_reg(g, victim_reg.v.reg, cls);
victim->spill_slot = slot;
victim->res = RES_SPILLED;
api_set_owned_reg(victim, (Reg)REG_NONE);
@@ -1304,7 +1365,7 @@ static Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty) {
(unsigned)cls);
}
- r = T->alloc_reg(T, cls, ty);
+ r = api_alloc_reg(g, cls);
if (r == (Reg)REG_NONE) {
compiler_panic(g->c, g->cur_loc,
"CfreeCg: regalloc - class %u still empty after spill",
@@ -1342,7 +1403,7 @@ static Operand api_force_reg(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) {
} else if (api_is_lvalue_sv(v)) {
T->load(T, dst, v->op, api_mem_for_lvalue(g, &v->op, ty));
if (v->op.kind == OPK_INDIRECT) {
- T->free_reg(T, v->op.v.ind.base, RC_INT);
+ api_free_reg(g, v->op.v.ind.base, RC_INT);
}
} else if (v->op.kind == OPK_GLOBAL) {
T->addr_of(T, dst, v->op);
@@ -1363,7 +1424,7 @@ static Operand api_force_reg_unless_imm(CfreeCg* g, ApiSValue* v,
static void api_release(CfreeCg* g, ApiSValue* sv) {
if (sv->res == RES_REG) {
- g->target->free_reg(g->target, (Reg)api_reg_of_sv(sv), api_class_of_sv(sv));
+ api_free_reg(g, (Reg)api_reg_of_sv(sv), api_class_of_sv(sv));
} else if (sv->res == RES_SPILLED) {
api_return_spill_slot(g, sv->spill_slot, api_class_of_sv(sv));
sv->spill_slot = FRAME_SLOT_NONE;
@@ -1373,13 +1434,13 @@ static void api_release(CfreeCg* g, ApiSValue* sv) {
static void api_release_arg_storage(CfreeCg* g, Operand* storage) {
if (storage->kind == OPK_REG) {
- g->target->free_reg(g->target, storage->v.reg, storage->cls);
+ api_free_reg(g, storage->v.reg, storage->cls);
} else if (storage->kind == OPK_LOCAL && storage->cls < 3) {
CfreeCgTypeId ty = storage->type;
if (cg_type_is_aggregate(g->c, ty)) return;
api_return_spill_slot(g, storage->v.frame_slot, storage->cls);
} else if (storage->kind == OPK_INDIRECT) {
- g->target->free_reg(g->target, storage->v.ind.base, RC_INT);
+ api_free_reg(g, storage->v.ind.base, RC_INT);
}
}
@@ -1837,10 +1898,12 @@ void cfree_cg_func_begin(CfreeCg* g, CfreeCgSym cg_sym) {
g->avs_in_flight_n = 0;
T->func_begin(T, &g->fn_desc);
+ api_regalloc_begin(g);
}
void cfree_cg_func_end(CfreeCg* g) {
if (!g) return;
+ api_regalloc_finish(g);
g->target->func_end(g->target);
g->fn_abi = NULL;
g->fn_ret_type = CFREE_CG_TYPE_NONE;
@@ -2760,7 +2823,7 @@ static void api_asm_spill_sv(CfreeCg* g, ApiSValue* sv, Reg phys,
FrameSlot slot = api_take_spill_slot(g, cls);
Operand victim_reg = api_op_reg(phys, api_owned_reg_type(g, sv));
g->target->spill_reg(g->target, victim_reg, slot, api_mem_for_spill(g, sv));
- g->target->free_reg(g->target, phys, cls);
+ api_free_reg(g, phys, cls);
sv->spill_slot = slot;
sv->res = RES_SPILLED;
api_set_owned_reg(sv, (Reg)REG_NONE);
@@ -3631,9 +3694,9 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) {
T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty));
}
T->binop(T, BO_IADD, result, base_op, scaled);
- T->free_reg(T, sr, RC_INT);
+ api_free_reg(g, sr, RC_INT);
}
- if (free_base_op) T->free_reg(T, base_op.v.reg, RC_INT);
+ if (free_base_op) api_free_reg(g, base_op.v.reg, RC_INT);
if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY) api_release(g, &base);
api_release(g, &idx);
api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, elem_ty), elem_ty));
@@ -3696,7 +3759,7 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) {
result = api_op_reg(fr, rec_ptr_ty);
T->binop(T, BO_IADD, result, base_addr,
api_op_imm((i64)field_offset, rec_ptr_ty));
- T->free_reg(T, base_addr.v.reg, RC_INT);
+ api_free_reg(g, base_addr.v.reg, RC_INT);
}
api_push(g,
api_make_lv(api_op_indirect(result.v.reg, 0, field_ty), field_ty));
@@ -3811,7 +3874,7 @@ void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type,
g->avs_in_flight_n = 0;
if (callee.op.kind != OPK_GLOBAL) {
- T->free_reg(T, callee_op.v.reg, RC_INT);
+ api_free_reg(g, callee_op.v.reg, RC_INT);
}
if (has_result) {
@@ -3876,7 +3939,7 @@ static void api_cg_tail_call(CfreeCg* g, uint32_t nargs,
api_release_arg_storage(g, &avs[i].storage);
}
if (callee.op.kind != OPK_GLOBAL) {
- T->free_reg(T, callee_op.v.reg, RC_INT);
+ api_free_reg(g, callee_op.v.reg, RC_INT);
}
}
diff --git a/src/arch/aarch64/alloc.c b/src/arch/aarch64/alloc.c
@@ -17,14 +17,6 @@ void regpool_init(RegPool* p, u8 base, u8 nregs) {
cg_simple_regpool_init_range(p, base, nregs);
}
-Reg regpool_alloc(RegPool* p) {
- return cg_simple_regpool_alloc(p);
-}
-
-int regpool_free(RegPool* p, Reg r) {
- return cg_simple_regpool_free(p, r);
-}
-
/* ============================================================
* Slot accessor
* ============================================================ */
@@ -34,39 +26,6 @@ AASlot* aa64_slot_get(AAImpl* a, FrameSlot fs) {
return &a->slots[fs - 1];
}
-/* ============================================================
- * Register allocation / free
- * ============================================================ */
-
-static Reg aa_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty) {
- AAImpl* a = impl_of(t);
- (void)ty;
- if (cls == RC_INT) return regpool_alloc(&a->int_pool);
- if (cls == RC_FP) return regpool_alloc(&a->fp_pool);
- compiler_panic(t->c, a->loc, "aarch64 alloc_reg: class %d unimpl", (int)cls);
-}
-
-void aa_free_reg(CGTarget* t, Reg r, RegClass cls) {
- AAImpl* a = impl_of(t);
- RegPool* p;
- switch (cls) {
- case RC_INT: p = &a->int_pool; break;
- case RC_FP: p = &a->fp_pool; break;
- default:
- compiler_panic(t->c, a->loc, "aarch64 free_reg: class %d unimpl",
- (int)cls);
- }
- int rc = regpool_free(p, r);
- if (rc == 1) return;
- if (rc == -1) {
- compiler_panic(t->c, a->loc,
- "aarch64 free_reg: reg %u already free in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
- }
- compiler_panic(t->c, a->loc, "aarch64 free_reg: reg %u not in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
-}
-
static int aa_resolve_reg_name(CGTarget* t, Sym name, Reg* out,
RegClass* cls_out) {
(void)t;
@@ -105,7 +64,6 @@ static void aa_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
addr.type = ma.type;
addr.v.frame_slot = slot;
aa_store(t, addr, src, ma);
- aa_free_reg(t, src.v.reg, src.cls);
}
static void aa_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
@@ -279,8 +237,6 @@ static void aa_continue_to(CGTarget* t, CGScope s) {
/* Expose vtable entries to ops.c constructor via a registration helper.
* ops.c calls this after the basic ops vtable is populated. */
void aa_alloc_vtable_init(CGTarget* t) {
- t->alloc_reg = aa_alloc_reg;
- t->free_reg = aa_free_reg;
t->spill_reg = aa_spill_reg;
t->reload_reg = aa_reload_reg;
t->resolve_reg_name = aa_resolve_reg_name;
diff --git a/src/arch/aarch64/internal.h b/src/arch/aarch64/internal.h
@@ -276,9 +276,6 @@ typedef struct AAImpl {
/* regpool (alloc.c) */
void regpool_init(RegPool* p, u8 base, u8 nregs);
-Reg regpool_alloc(RegPool* p);
-int regpool_free(RegPool* p, Reg r);
-
/* emit.c helpers used in alloc.c / ops.c */
void aa64_emit32(MCEmitter* mc, u32 word);
void aa64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word);
@@ -297,7 +294,6 @@ void aa_param(CGTarget* t, const CGParamDesc* p);
/* alloc.c helpers used in emit.c / ops.c */
AAImpl* impl_of(CGTarget* t);
AASlot* aa64_slot_get(AAImpl* a, FrameSlot fs);
-void aa_free_reg(CGTarget* t, Reg r, RegClass cls);
void aa_jump(CGTarget* t, Label l);
/* ops.c helpers used in alloc.c */
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -455,6 +455,8 @@ struct CGTarget {
Compiler* c;
ObjBuilder* obj;
MCEmitter* mc;
+ u8 virtual_regs;
+ u8 pad0[3];
/* Optional. When non-NULL, per-instruction emit calls Debug to record
* line rows; func_begin/func_end attribute PC ranges to the active
@@ -468,24 +470,20 @@ struct CGTarget {
void (*func_begin)(CGTarget*, const CGFuncDesc*);
void (*func_end)(CGTarget*);
- /* ---- registers and frame slots ----
- * At -O0 CG is TCC-style and owns the value stack: it decides which live
- * values must be spilled/reloaded across register pressure, calls, and asm.
- * Real targets return physical scratch registers and implement spill/reload
- * mechanics; opt_cgtarget returns fresh virtual regs and ignores spills. */
- Reg (*alloc_reg)(CGTarget*, RegClass, CfreeCgTypeId);
- void (*free_reg)(CGTarget*, Reg, RegClass); /* hint; opt_cgtarget ignores */
+ /* ---- frame slots and spill/reload ----
+ * CG and opt allocate caller-visible registers and pass concrete Operand
+ * regs to the target. Plain machine targets consume hard regs; opt_cgtarget
+ * sets virtual_regs and records virtual Reg ids as SSA values. */
FrameSlot (*frame_slot)(CGTarget*, const FrameSlotDesc*);
void (*param)(CGTarget*, const CGParamDesc*);
void (*spill_reg)(CGTarget*, Operand src_reg, FrameSlot, MemAccess);
void (*reload_reg)(CGTarget*, Operand dst_reg, FrameSlot, MemAccess);
/* ---- opt/back-end register coordination ----
- * At -O1 opt allocates virtual registers internally and maps them to
- * hard registers or spill slots. The backend still owns prologue/epilogue
- * and scratch-register policy. These hooks let the two sides agree on
- * which physical registers are in play without opt hard-coding arch
- * details. */
+ * CGTarget users allocate caller-visible registers before invoking target
+ * ops. Direct CG uses these hooks to initialize its simple hard-reg
+ * allocator. opt uses them after IR rewriting to coordinate assigned hard
+ * regs and backend scratch policy without hard-coding arch details. */
/* Return the target's allocable hard register pool for `cls`.
* Sets *out to a stable array and *nregs to its length. The array
@@ -506,7 +504,7 @@ struct CGTarget {
* func_end. The backend updates prologue/epilogue bookkeeping so it
* saves/restores only the callee-saved subset that opt used.
*
- * At -O0 this is a no-op (the backend tracks usage via alloc_reg). */
+ * Direct CG and opt both call this after emitting hard-register operands. */
void (*reserve_hard_regs)(CGTarget*, RegClass, const Reg* regs, u32 n);
/* ---- labels and control flow ---- */
diff --git a/src/arch/regalloc.c b/src/arch/regalloc.c
@@ -64,6 +64,12 @@ void cg_simple_regalloc_init(CGSimpleRegAlloc* a) {
memset(a, 0, sizeof *a);
}
+void cg_simple_regalloc_init_virtual(CGSimpleRegAlloc* a) {
+ memset(a, 0, sizeof *a);
+ a->virtual_regs = 1;
+ a->next_virtual = 1;
+}
+
void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, RegClass cls, Reg base,
u32 nregs) {
if ((u32)cls >= 3u) return;
@@ -78,15 +84,25 @@ void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls,
Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, RegClass cls) {
if ((u32)cls >= 3u) return (Reg)REG_NONE;
+ if (a->virtual_regs) return a->next_virtual++;
return cg_simple_regpool_alloc(&a->pools[cls]);
}
int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r) {
if ((u32)cls >= 3u) return -2;
+ if (a->virtual_regs) {
+ (void)r;
+ return 1;
+ }
return cg_simple_regpool_free(&a->pools[cls], r);
}
void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r) {
if ((u32)cls >= 3u) return;
+ if (a->virtual_regs) return;
cg_simple_regpool_reserve(&a->pools[cls], r);
}
+
+int cg_simple_regalloc_is_virtual(const CGSimpleRegAlloc* a) {
+ return a->virtual_regs != 0;
+}
diff --git a/src/arch/regalloc.h b/src/arch/regalloc.h
@@ -15,6 +15,8 @@ typedef struct CGSimpleRegPool {
typedef struct CGSimpleRegAlloc {
CGSimpleRegPool pools[3]; /* indexed by RegClass */
+ u32 virtual_regs;
+ Reg next_virtual;
} CGSimpleRegAlloc;
void cg_simple_regpool_init_range(CGSimpleRegPool* p, Reg base, u32 nregs);
@@ -26,6 +28,7 @@ void cg_simple_regpool_reserve(CGSimpleRegPool* p, Reg r);
Reg cg_simple_regpool_reg_at(const CGSimpleRegPool* p, u32 idx);
void cg_simple_regalloc_init(CGSimpleRegAlloc* a);
+void cg_simple_regalloc_init_virtual(CGSimpleRegAlloc* a);
void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, RegClass cls, Reg base,
u32 nregs);
void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls,
@@ -33,5 +36,6 @@ void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls,
Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, RegClass cls);
int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r);
void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r);
+int cg_simple_regalloc_is_virtual(const CGSimpleRegAlloc* a);
#endif
diff --git a/src/arch/rv64/alloc.c b/src/arch/rv64/alloc.c
@@ -2,34 +2,7 @@
#include "arch/rv64/internal.h"
-/* ---- regs / frame ---- */
-
-Reg rv_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty) {
- RImpl* a = impl_of(t);
- (void)ty;
- if (cls == RC_INT) return regpool_alloc(&a->int_pool);
- if (cls == RC_FP) return regpool_alloc(&a->fp_pool);
- compiler_panic(t->c, a->loc, "rv64 alloc_reg: class %d unimpl", (int)cls);
-}
-
-void rv_free_reg(CGTarget* t, Reg r, RegClass cls) {
- RImpl* a = impl_of(t);
- RegPool* p;
- switch (cls) {
- case RC_INT: p = &a->int_pool; break;
- case RC_FP: p = &a->fp_pool; break;
- default:
- compiler_panic(t->c, a->loc, "rv64 free_reg: class %d unimpl", (int)cls);
- }
- int rc = regpool_free(p, r);
- if (rc == 1) return;
- if (rc == -1) {
- compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u already free in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
- }
- compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u not in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
-}
+/* ---- frame ---- */
FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
RImpl* a = impl_of(t);
@@ -176,7 +149,6 @@ void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
addr.type = ma.type;
addr.v.frame_slot = slot;
rv_store(t, addr, src, ma);
- rv_free_reg(t, src.v.reg, src.cls);
}
void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -112,12 +112,6 @@ static inline u32 reg_num(Operand op) { return op.v.reg & 0x1fu; }
static inline void regpool_init(RegPool* p, u8 base, u8 nregs) {
cg_simple_regpool_init_range(p, base, nregs);
}
-static inline Reg regpool_alloc(RegPool* p) {
- return cg_simple_regpool_alloc(p);
-}
-static inline int regpool_free(RegPool* p, Reg r) {
- return cg_simple_regpool_free(p, r);
-}
/* ---- emit.c: function lifecycle (referenced by ops.c vtable) ---- */
void rv_func_begin(CGTarget* t, const CGFuncDesc* fd);
@@ -137,8 +131,6 @@ void emit_sp_addi(MCEmitter* mc, i64 imm);
_Noreturn void rv_panic(CGTarget* t, const char* what);
/* ---- alloc.c: all functions (non-static; referenced by ops.c vtable) ---- */
-Reg rv_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty);
-void rv_free_reg(CGTarget* t, Reg r, RegClass cls);
FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d);
RvSlot* rv64_slot_get(RImpl* a, FrameSlot fs);
void rv_param(CGTarget* t, const CGParamDesc* p);
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -1773,8 +1773,6 @@ CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
t->func_begin = rv_func_begin;
t->func_end = rv_func_end;
- t->alloc_reg = rv_alloc_reg;
- t->free_reg = rv_free_reg;
t->frame_slot = rv_frame_slot;
t->param = rv_param;
t->spill_reg = rv_spill_reg;
diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c
@@ -1,7 +1,7 @@
/* arch/x64/alloc.c — register pool, spill/reload, labels, control flow.
*
- * Covers: xpool_init/alloc/free, x_alloc_reg, x_free_reg, x_frame_slot,
- * x64_slot_get, x_param, x_spill_reg, x_reload_reg, x_label_*,
+ * Covers: xpool_init, x_frame_slot, x64_slot_get, x_param, x_spill_reg,
+ * x_reload_reg, x_label_*,
* emit_jmp_label, emit_jcc_label, x_jump, x64_force_reg_int, emit_cmp_ab,
* x_cmp_branch, x_cmp, x_scope_*, x_break_to, x_continue_to. */
@@ -23,38 +23,9 @@ void xpool_init(XRegPool* p, const Reg* order, u32 nregs) {
cg_simple_regpool_init_ordered(p, order, nregs);
}
-static Reg xpool_alloc(XRegPool* p) {
- return cg_simple_regpool_alloc(p);
-}
-
-static int xpool_free(XRegPool* p, Reg r) {
- return cg_simple_regpool_free(p, r);
-}
-
/* ============================================================
* Registers / frame */
-Reg x_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty) {
- XImpl* a = impl_of(t);
- (void)ty;
- if (cls == RC_INT) return xpool_alloc(&a->int_pool);
- if (cls == RC_FP) return xpool_alloc(&a->fp_pool);
- compiler_panic(t->c, a->loc, "x64 alloc_reg: class %d unimpl", (int)cls);
-}
-
-void x_free_reg(CGTarget* t, Reg r, RegClass cls) {
- XImpl* a = impl_of(t);
- XRegPool* p = (cls == RC_FP) ? &a->fp_pool : &a->int_pool;
- int rc = xpool_free(p, r);
- if (rc == 1) return;
- if (rc == -1) {
- compiler_panic(t->c, a->loc, "x64 free_reg: reg %u already free",
- (unsigned)r);
- }
- compiler_panic(t->c, a->loc, "x64 free_reg: reg %u not in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
-}
-
FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
XImpl* a = impl_of(t);
if (a->nslots == a->slots_cap) {
@@ -179,7 +150,6 @@ void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
addr.type = ma.type;
addr.v.frame_slot = slot;
x_store(t, addr, src, ma);
- x_free_reg(t, src.v.reg, src.cls);
}
void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -193,8 +193,6 @@ void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst,
void xpool_init(XRegPool* p, const Reg* order, u32 nregs);
XSlot* x64_slot_get(XImpl* a, FrameSlot fs);
FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d);
-Reg x_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty);
-void x_free_reg(CGTarget* t, Reg r, RegClass cls);
void x_param(CGTarget* t, const CGParamDesc* p);
void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma);
void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma);
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -1849,8 +1849,6 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
t->func_begin = x_func_begin;
t->func_end = x_func_end;
- t->alloc_reg = x_alloc_reg;
- t->free_reg = x_free_reg;
t->frame_slot = x_frame_slot;
t->param = x_param;
t->spill_reg = x_spill_reg;
diff --git a/src/opt/ir.c b/src/opt/ir.c
@@ -1,7 +1,7 @@
/* ir.c — Func/Block/Inst plumbing for the SSA IR (doc/OPT.md §1).
*
- * Each CGTarget call recorded by opt_cgtarget produces exactly one Inst
- * (or none, for pure bookkeeping calls like alloc_reg). Storage is per-
+ * Each CGTarget call recorded by opt_cgtarget produces exactly one Inst.
+ * Storage is per-
* Func arena, allocated against c->tu so the Func survives until
* cgtarget_finalize.
*
@@ -9,10 +9,11 @@
* - VAL_NONE (= 0) is reserved; first allocated Val is 1.
* - val_def_block / val_def_inst / val_type / val_cls are parallel
* arrays indexed by Val.
- * - Inst.opnds is Operand[] (not Val[]): Reg/Val are collapsed
- * (doc/OPT.md §5.1) and OPK_REG operands' v.reg field IS the Val
- * used at this site. Other OpKinds (IMM/LOCAL/GLOBAL/INDIRECT) are
- * not Val uses for SSA dataflow.
+ * - Inst.opnds is Operand[] (not Val[]): virtual Reg/Val are collapsed
+ * (doc/OPT.md §5.1) and OPK_REG operands' v.reg field IS the Val used
+ * at this site. CG mints those virtual regs through the shared simple
+ * allocator when driving opt_cgtarget. Other OpKinds
+ * (IMM/LOCAL/GLOBAL/INDIRECT) are not Val uses for SSA dataflow.
*/
#include "opt/ir.h"
@@ -60,6 +61,24 @@ Val ir_alloc_val(Func* f, CfreeCgTypeId t, u8 cls) {
return v;
}
+void ir_ensure_val(Func* f, Val v, CfreeCgTypeId t, u8 cls) {
+ if (v == VAL_NONE) return;
+ if (f->nvals == 0) {
+ val_table_grow(f, 16);
+ f->nvals = 1; /* reserve slot 0 for VAL_NONE */
+ }
+ if (v >= f->vals_cap) val_table_grow(f, v + 1u);
+ while (f->nvals <= v) {
+ f->val_def_block[f->nvals] = 0;
+ f->val_def_inst[f->nvals] = 0;
+ f->val_type[f->nvals] = 0;
+ f->val_cls[f->nvals] = RC_INT;
+ f->nvals++;
+ }
+ if (!f->val_type[v]) f->val_type[v] = t;
+ f->val_cls[v] = cls;
+}
+
/* ---- blocks ---- */
u32 ir_block_new(Func* f) {
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -333,6 +333,7 @@ FrameSlot ir_frame_slot_new(Func*, const FrameSlotDesc*);
void ir_param_add(Func*, const CGParamDesc*);
Val ir_alloc_val(Func*, CfreeCgTypeId, u8 cls);
+void ir_ensure_val(Func*, Val, CfreeCgTypeId, u8 cls);
Inst* ir_emit(Func*, u32 block, IROp);
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -12,13 +12,13 @@
* - spill_reg / reload_reg are CG -O0 register-pressure
* mechanics. CG never invokes them on real backends in v1, and
* they're meaningless for opt's vreg space — calling them is a
- * wiring bug, so we panic loudly.
- * - free_reg is documented as a hint and is silently ignored. */
+ * wiring bug, so we panic loudly. */
#include "opt/opt.h"
#include <string.h>
+#include "arch/regalloc.h"
#include "core/arena.h"
#include "core/core.h"
#include "opt/ir.h"
@@ -69,8 +69,24 @@ static int intrinsic_terminates(IntrinKind kind) {
kind == INTRIN_UNREACHABLE;
}
+static void ensure_operand(Func* f, const Operand* op) {
+ if (!op) return;
+ if (op->kind == OPK_REG) {
+ ir_ensure_val(f, (Val)op->v.reg, op->type, op->cls);
+ } else if (op->kind == OPK_INDIRECT) {
+ ir_ensure_val(f, (Val)op->v.ind.base, 0, RC_INT);
+ }
+}
+
+static void ensure_abivalue(Func* f, const CGABIValue* v) {
+ if (!v) return;
+ ensure_operand(f, &v->storage);
+ for (u32 i = 0; i < v->nparts; ++i) ensure_operand(f, &v->parts[i].op);
+}
+
static Operand* dup_opnds(Func* f, const Operand* src, u32 n) {
if (!n) return NULL;
+ for (u32 i = 0; i < n; ++i) ensure_operand(f, &src[i]);
Operand* dst = arena_array(f->arena, Operand, n);
memcpy(dst, src, sizeof(Operand) * n);
return dst;
@@ -115,18 +131,6 @@ static void w_func_end(CGTarget* t);
/* ---- registers and frame slots ---- */
-static Reg w_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty) {
- OptImpl* o = impl_of(t);
- Val v = ir_alloc_val(o->f, ty, (u8)cls);
- return (Reg)v;
-}
-
-static void w_free_reg(CGTarget* t, Reg r, RegClass cls) {
- (void)t;
- (void)r;
- (void)cls;
-}
-
static FrameSlot w_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
OptImpl* o = impl_of(t);
return ir_frame_slot_new(o->f, d);
@@ -544,20 +548,26 @@ static void w_call(CGTarget* t, const CGCallDesc* d) {
OptImpl* o = impl_of(t);
Inst* in = rec(o, IR_CALL);
IRCallAux* aux = arena_znew(o->f->arena, IRCallAux);
+ ensure_operand(o->f, &d->callee);
aux->desc = *d;
if (d->nargs) {
CGABIValue* args = arena_array(o->f->arena, CGABIValue, d->nargs);
for (u32 i = 0; i < d->nargs; ++i) {
+ ensure_abivalue(o->f, &d->args[i]);
args[i] = d->args[i];
args[i].parts =
dup_parts(o->f->arena, d->args[i].parts, d->args[i].nparts);
}
aux->desc.args = args;
}
+ ensure_abivalue(o->f, &d->ret);
aux->desc.ret = d->ret;
aux->desc.ret.parts = dup_parts(o->f->arena, d->ret.parts, d->ret.nparts);
in->extra.aux = aux;
in->type = d->fn_type;
+ if (d->ret.storage.kind == OPK_REG) {
+ set_def(o->f, in, o->cur, (Val)d->ret.storage.v.reg, d->ret.type);
+ }
}
static void w_ret(CGTarget* t, const CGABIValue* v) {
@@ -565,6 +575,7 @@ static void w_ret(CGTarget* t, const CGABIValue* v) {
Inst* in = rec(o, IR_RET);
IRRetAux* aux = arena_znew(o->f->arena, IRRetAux);
if (v) {
+ ensure_abivalue(o->f, v);
aux->present = 1;
aux->val = *v;
aux->val.parts = dup_parts(o->f->arena, v->parts, v->nparts);
@@ -678,6 +689,16 @@ static void w_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
in->extra.aux = aux;
if (prior.kind == OPK_REG)
set_def(o->f, in, o->cur, (Val)prior.v.reg, prior.type);
+ if (ok.kind == OPK_REG) {
+ in->ndefs = 2;
+ in->defs = arena_array(o->f->arena, Val, 2);
+ in->defs[0] = (prior.kind == OPK_REG) ? (Val)prior.v.reg : VAL_NONE;
+ in->defs[1] = (Val)ok.v.reg;
+ if (in->defs[1] != VAL_NONE && in->defs[1] < o->f->nvals) {
+ o->f->val_def_block[in->defs[1]] = o->cur;
+ o->f->val_def_inst[in->defs[1]] = o->f->blocks[o->cur].ninsts - 1u;
+ }
+ }
}
static void w_fence(CGTarget* t, MemOrder mo) {
@@ -696,8 +717,14 @@ static void w_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
aux->narg = na;
aux->dsts = nd ? arena_array(o->f->arena, Operand, nd) : NULL;
aux->args = na ? arena_array(o->f->arena, Operand, na) : NULL;
- if (nd) memcpy(aux->dsts, dsts, sizeof(Operand) * nd);
- if (na) memcpy(aux->args, args, sizeof(Operand) * na);
+ if (nd) {
+ memcpy(aux->dsts, dsts, sizeof(Operand) * nd);
+ for (u32 i = 0; i < nd; ++i) ensure_operand(o->f, &aux->dsts[i]);
+ }
+ if (na) {
+ memcpy(aux->args, args, sizeof(Operand) * na);
+ for (u32 i = 0; i < na; ++i) ensure_operand(o->f, &aux->args[i]);
+ }
in->extra.aux = aux;
if (nd == 1 && dsts[0].kind == OPK_REG) {
set_def(o->f, in, o->cur, (Val)dsts[0].v.reg, dsts[0].type);
@@ -748,18 +775,34 @@ static void w_asm_block(CGTarget* t, const char* tmpl,
memcpy(aux->outs, outs, nout * sizeof *outs);
aux->out_ops = arena_array(o->f->arena, Operand, nout);
memcpy(aux->out_ops, out_ops, nout * sizeof *out_ops);
+ for (u32 i = 0; i < nout; ++i) ensure_operand(o->f, &aux->out_ops[i]);
}
if (nin) {
aux->ins = arena_array(o->f->arena, AsmConstraint, nin);
memcpy(aux->ins, ins, nin * sizeof *ins);
aux->in_ops = arena_array(o->f->arena, Operand, nin);
memcpy(aux->in_ops, in_ops, nin * sizeof *in_ops);
+ for (u32 i = 0; i < nin; ++i) ensure_operand(o->f, &aux->in_ops[i]);
}
if (nclob) {
aux->clobbers = arena_array(o->f->arena, Sym, nclob);
memcpy(aux->clobbers, clobbers, nclob * sizeof *clobbers);
}
in->extra.aux = aux;
+ if (nout) {
+ in->ndefs = nout;
+ in->defs = arena_array(o->f->arena, Val, nout);
+ for (u32 i = 0; i < nout; ++i) {
+ in->defs[i] =
+ (out_ops[i].kind == OPK_REG) ? (Val)out_ops[i].v.reg : VAL_NONE;
+ if (in->defs[i] != VAL_NONE && in->defs[i] < o->f->nvals) {
+ o->f->val_def_block[in->defs[i]] = o->cur;
+ o->f->val_def_inst[in->defs[i]] = o->f->blocks[o->cur].ninsts - 1u;
+ }
+ }
+ in->def = in->defs[0];
+ in->type = out_ops[0].type;
+ }
}
static void w_set_loc(CGTarget* t, SrcLoc loc) {
@@ -782,6 +825,7 @@ typedef struct ReplayCtx {
u8* val_alloced;
u8* block_label_placed;
u8 identity_regs;
+ CGSimpleRegAlloc regalloc;
} ReplayCtx;
static Reg val_to_target_reg(ReplayCtx* r, Val v) {
@@ -794,7 +838,11 @@ static Reg val_to_target_reg(ReplayCtx* r, Val v) {
}
if (!r->val_alloced[v]) {
r->val_to_reg[v] =
- r->tgt->alloc_reg(r->tgt, (RegClass)f->val_cls[v], f->val_type[v]);
+ cg_simple_regalloc_alloc(&r->regalloc, (RegClass)f->val_cls[v]);
+ if (r->val_to_reg[v] == (Reg)REG_NONE) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(r->c, loc, "opt replay: hard reg pool exhausted");
+ }
r->val_alloced[v] = 1;
}
return r->val_to_reg[v];
@@ -1165,6 +1213,7 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
r.f = f;
r.tgt = w;
r.identity_regs = identity ? 1u : 0u;
+ cg_simple_regalloc_init(&r.regalloc);
u32 nv = f->nvals ? f->nvals : 1u;
r.val_to_reg = arena_zarray(f->arena, Reg, nv);
for (u32 i = 0; i < nv; ++i) r.val_to_reg[i] = REG_NONE;
@@ -1183,6 +1232,17 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
* dereference them so we don't translate. */
w->func_begin(w, &f->desc);
+ if (!r.identity_regs) {
+ for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) {
+ const Reg* regs = NULL;
+ u32 nregs = 0;
+ if (w->get_allocable_regs)
+ w->get_allocable_regs(w, (RegClass)cidx, ®s, &nregs);
+ if (regs && nregs)
+ cg_simple_regalloc_set_ordered(&r.regalloc, (RegClass)cidx, regs, nregs);
+ }
+ }
+
for (u32 i = 0; i < f->nframe_slots; ++i) {
IRFrameSlot* s = &f->frame_slots[i];
FrameSlotDesc d = {0};
@@ -1216,7 +1276,7 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
}
/* At -O1, opt managed allocation and emitted hard regs directly,
- * bypassing the backend's alloc_reg. Tell the backend which hard
+ * bypassing backend-local allocation. Tell the backend which hard
* regs were actually assigned so it can save the right callee-saved
* subset in prologue/epilogue.
*
@@ -1240,6 +1300,17 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
}
if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused);
}
+ } else if (!r.identity_regs && w->reserve_hard_regs) {
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
+ CGSimpleRegPool* p = &r.regalloc.pools[c];
+ Reg used[CG_SIMPLE_REGALLOC_MAX_REGS];
+ u32 nused = 0;
+ for (u32 i = 0; i < p->hwm && i < p->nregs; ++i) {
+ Reg hr = cg_simple_regpool_reg_at(p, i);
+ if (hr != (Reg)REG_NONE) used[nused++] = hr;
+ }
+ if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused);
+ }
}
w->func_end(w);
@@ -1323,12 +1394,11 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) {
t->obj = target->obj;
t->mc = target->mc;
t->debug = target->debug;
+ t->virtual_regs = 1;
t->func_begin = w_func_begin;
t->func_end = w_func_end;
- t->alloc_reg = w_alloc_reg;
- t->free_reg = w_free_reg;
t->frame_slot = w_frame_slot;
t->param = w_param;
t->spill_reg = w_spill_reg;
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -6,8 +6,8 @@
/* opt_cgtarget: a CGTarget wrapper that records each function as IR.
*
- * - alloc_reg returns a fresh virtual reg per call (typed). The Reg space is
- * unbounded for opt_cgtarget; free_reg is treated as a hint and ignored.
+ * - opt_cgtarget advertises virtual_regs. CG mints unbounded virtual Reg ids
+ * through the shared simple allocator and passes them to normal emit calls.
* - Every other emit-side call is recorded into the current block as one
* SSA Inst (with the current SrcLoc from set_loc).
* - On CGTarget.func_end it runs the intra-procedural pipeline (down through
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -754,7 +754,7 @@ static void opt_emit_no_virtual_alloc(void) {
fd.fn_type = cfree_cg_type_func(tc.c, sig);
opt->func_begin(opt, &fd);
- Reg a = opt->alloc_reg(opt, RC_INT, tc.i32);
+ Reg a = 1;
opt->load_imm(opt, op_reg_(a, tc.i32), 42);
CGABIValue retv = {0};
retv.type = tc.i32;