kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit b83a60ab03163c763a4e01b3d03435f784131628
parent 5a7642085de670403406e9ad6a3f29cbd73ef3e1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 12:06:06 -0700

Move CG register allocation out of CGTarget

Diffstat:
Mdoc/OPT1.md | 2+-
Msrc/api/cg.c | 87++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Msrc/arch/aarch64/alloc.c | 44--------------------------------------------
Msrc/arch/aarch64/internal.h | 4----
Msrc/arch/arch.h | 24+++++++++++-------------
Msrc/arch/regalloc.c | 16++++++++++++++++
Msrc/arch/regalloc.h | 4++++
Msrc/arch/rv64/alloc.c | 30+-----------------------------
Msrc/arch/rv64/internal.h | 8--------
Msrc/arch/rv64/ops.c | 2--
Msrc/arch/x64/alloc.c | 34++--------------------------------
Msrc/arch/x64/internal.h | 2--
Msrc/arch/x64/ops.c | 2--
Msrc/opt/ir.c | 31+++++++++++++++++++++++++------
Msrc/opt/ir.h | 1+
Msrc/opt/opt.c | 110++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Msrc/opt/opt.h | 4++--
Mtest/opt/opt_test.c | 2+-
18 files changed, 229 insertions(+), 178 deletions(-)

diff --git a/doc/OPT1.md b/doc/OPT1.md @@ -24,7 +24,7 @@ substitute a behaviorally similar shortcut without updating both documents. tied hard-reg needs, frequency, live length, then stable id. - [x] Add the rewrite pass: map virtual regs to hard regs or `FS_SPILL` slots, inserting reloads/stores for spilled uses and defs. -- [x] Make `opt_emit` stop relying on wrapped-target `alloc_reg` for virtual +- [x] Make `opt_emit` stop relying on wrapped-target register allocation for virtual values after rewrite. - [x] Fill in target-aware `opt_machinize`/`opt_combine`, starting with AArch64 ABI/call constraints, noop move deletion, and safe single-use diff --git a/src/api/cg.c b/src/api/cg.c @@ -8,6 +8,7 @@ #include "api/cg_api.h" #include "api/cg_type.h" #include "arch/arch.h" +#include "arch/regalloc.h" #include "core/arena.h" #include "core/heap.h" #include "core/pool.h" @@ -945,6 +946,7 @@ struct CfreeCg { ObjBuilder* obj; CGTarget* target; MCEmitter* mc; + CGSimpleRegAlloc regalloc; ApiSValue* stack; u32 sp; @@ -1223,6 +1225,61 @@ static ApiSValue* api_pick_victim(CfreeCg* g, u8 cls) { static MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv); +static void api_regalloc_begin(CfreeCg* g) { + CGTarget* T = g->target; + if (T->virtual_regs) { + cg_simple_regalloc_init_virtual(&g->regalloc); + return; + } + cg_simple_regalloc_init(&g->regalloc); + for (u32 c = 0; c < 3u; ++c) { + const Reg* regs = NULL; + u32 nregs = 0; + if (T->get_allocable_regs) + T->get_allocable_regs(T, (RegClass)c, &regs, &nregs); + if (regs && nregs) + cg_simple_regalloc_set_ordered(&g->regalloc, (RegClass)c, regs, nregs); + } +} + +static void api_regalloc_finish(CfreeCg* g) { + if (cg_simple_regalloc_is_virtual(&g->regalloc)) return; + if (!g->target->reserve_hard_regs) return; + for (u32 c = 0; c < 3u; ++c) { + CGSimpleRegPool* p = &g->regalloc.pools[c]; + Reg used[CG_SIMPLE_REGALLOC_MAX_REGS]; + u32 nused = 0; + for (u32 i = 0; i < p->hwm && i < p->nregs; ++i) { + Reg r = cg_simple_regpool_reg_at(p, i); + if (r != (Reg)REG_NONE) used[nused++] = r; + } + if (nused) g->target->reserve_hard_regs(g->target, (RegClass)c, used, nused); + } +} + +static Reg api_alloc_reg(CfreeCg* g, u8 cls) { + Reg r = cg_simple_regalloc_alloc(&g->regalloc, (RegClass)cls); + if (r == (Reg)REG_NONE && cg_simple_regalloc_is_virtual(&g->regalloc)) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: virtual regalloc exhausted"); + } + return r; +} + +static void api_free_reg(CfreeCg* g, Reg r, u8 cls) { + int rc; + if (r == (Reg)REG_NONE) return; + rc = cg_simple_regalloc_free(&g->regalloc, (RegClass)cls, r); + if (rc == 1) return; + if (rc == -1) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: regalloc - reg %u already free in class %u", + (unsigned)r, (unsigned)cls); + } + compiler_panic(g->c, g->cur_loc, + "CfreeCg: regalloc - reg %u not in class %u pool", + (unsigned)r, (unsigned)cls); +} + static int api_spill_avs_victim(CfreeCg* g, u8 cls) { CGTarget* T = g->target; if (!g->avs_in_flight) return 0; @@ -1233,6 +1290,7 @@ static int api_spill_avs_victim(CfreeCg* g, u8 cls) { FrameSlot slot = api_take_spill_slot(g, cls); ApiSValue tmp = api_make_sv(av->storage, av->type); T->spill_reg(T, av->storage, slot, api_mem_for_spill(g, &tmp)); + api_free_reg(g, av->storage.v.reg, cls); Operand local = api_op_local(slot, av->type); local.cls = cls; av->storage = local; @@ -1286,7 +1344,9 @@ static MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv) { static Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty) { CGTarget* T = g->target; - Reg r = T->alloc_reg(T, cls, ty); + Reg r; + (void)ty; + r = api_alloc_reg(g, cls); if (r != (Reg)REG_NONE) return r; ApiSValue* victim = api_pick_victim(g, cls); @@ -1295,6 +1355,7 @@ static Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty) { CfreeCgTypeId rty = api_owned_reg_type(g, victim); Operand victim_reg = api_op_reg((Reg)api_reg_of_sv(victim), rty); T->spill_reg(T, victim_reg, slot, api_mem_for_spill(g, victim)); + api_free_reg(g, victim_reg.v.reg, cls); victim->spill_slot = slot; victim->res = RES_SPILLED; api_set_owned_reg(victim, (Reg)REG_NONE); @@ -1304,7 +1365,7 @@ static Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty) { (unsigned)cls); } - r = T->alloc_reg(T, cls, ty); + r = api_alloc_reg(g, cls); if (r == (Reg)REG_NONE) { compiler_panic(g->c, g->cur_loc, "CfreeCg: regalloc - class %u still empty after spill", @@ -1342,7 +1403,7 @@ static Operand api_force_reg(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) { } else if (api_is_lvalue_sv(v)) { T->load(T, dst, v->op, api_mem_for_lvalue(g, &v->op, ty)); if (v->op.kind == OPK_INDIRECT) { - T->free_reg(T, v->op.v.ind.base, RC_INT); + api_free_reg(g, v->op.v.ind.base, RC_INT); } } else if (v->op.kind == OPK_GLOBAL) { T->addr_of(T, dst, v->op); @@ -1363,7 +1424,7 @@ static Operand api_force_reg_unless_imm(CfreeCg* g, ApiSValue* v, static void api_release(CfreeCg* g, ApiSValue* sv) { if (sv->res == RES_REG) { - g->target->free_reg(g->target, (Reg)api_reg_of_sv(sv), api_class_of_sv(sv)); + api_free_reg(g, (Reg)api_reg_of_sv(sv), api_class_of_sv(sv)); } else if (sv->res == RES_SPILLED) { api_return_spill_slot(g, sv->spill_slot, api_class_of_sv(sv)); sv->spill_slot = FRAME_SLOT_NONE; @@ -1373,13 +1434,13 @@ static void api_release(CfreeCg* g, ApiSValue* sv) { static void api_release_arg_storage(CfreeCg* g, Operand* storage) { if (storage->kind == OPK_REG) { - g->target->free_reg(g->target, storage->v.reg, storage->cls); + api_free_reg(g, storage->v.reg, storage->cls); } else if (storage->kind == OPK_LOCAL && storage->cls < 3) { CfreeCgTypeId ty = storage->type; if (cg_type_is_aggregate(g->c, ty)) return; api_return_spill_slot(g, storage->v.frame_slot, storage->cls); } else if (storage->kind == OPK_INDIRECT) { - g->target->free_reg(g->target, storage->v.ind.base, RC_INT); + api_free_reg(g, storage->v.ind.base, RC_INT); } } @@ -1837,10 +1898,12 @@ void cfree_cg_func_begin(CfreeCg* g, CfreeCgSym cg_sym) { g->avs_in_flight_n = 0; T->func_begin(T, &g->fn_desc); + api_regalloc_begin(g); } void cfree_cg_func_end(CfreeCg* g) { if (!g) return; + api_regalloc_finish(g); g->target->func_end(g->target); g->fn_abi = NULL; g->fn_ret_type = CFREE_CG_TYPE_NONE; @@ -2760,7 +2823,7 @@ static void api_asm_spill_sv(CfreeCg* g, ApiSValue* sv, Reg phys, FrameSlot slot = api_take_spill_slot(g, cls); Operand victim_reg = api_op_reg(phys, api_owned_reg_type(g, sv)); g->target->spill_reg(g->target, victim_reg, slot, api_mem_for_spill(g, sv)); - g->target->free_reg(g->target, phys, cls); + api_free_reg(g, phys, cls); sv->spill_slot = slot; sv->res = RES_SPILLED; api_set_owned_reg(sv, (Reg)REG_NONE); @@ -3631,9 +3694,9 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) { T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty)); } T->binop(T, BO_IADD, result, base_op, scaled); - T->free_reg(T, sr, RC_INT); + api_free_reg(g, sr, RC_INT); } - if (free_base_op) T->free_reg(T, base_op.v.reg, RC_INT); + if (free_base_op) api_free_reg(g, base_op.v.reg, RC_INT); if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY) api_release(g, &base); api_release(g, &idx); api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, elem_ty), elem_ty)); @@ -3696,7 +3759,7 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) { result = api_op_reg(fr, rec_ptr_ty); T->binop(T, BO_IADD, result, base_addr, api_op_imm((i64)field_offset, rec_ptr_ty)); - T->free_reg(T, base_addr.v.reg, RC_INT); + api_free_reg(g, base_addr.v.reg, RC_INT); } api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, field_ty), field_ty)); @@ -3811,7 +3874,7 @@ void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type, g->avs_in_flight_n = 0; if (callee.op.kind != OPK_GLOBAL) { - T->free_reg(T, callee_op.v.reg, RC_INT); + api_free_reg(g, callee_op.v.reg, RC_INT); } if (has_result) { @@ -3876,7 +3939,7 @@ static void api_cg_tail_call(CfreeCg* g, uint32_t nargs, api_release_arg_storage(g, &avs[i].storage); } if (callee.op.kind != OPK_GLOBAL) { - T->free_reg(T, callee_op.v.reg, RC_INT); + api_free_reg(g, callee_op.v.reg, RC_INT); } } diff --git a/src/arch/aarch64/alloc.c b/src/arch/aarch64/alloc.c @@ -17,14 +17,6 @@ void regpool_init(RegPool* p, u8 base, u8 nregs) { cg_simple_regpool_init_range(p, base, nregs); } -Reg regpool_alloc(RegPool* p) { - return cg_simple_regpool_alloc(p); -} - -int regpool_free(RegPool* p, Reg r) { - return cg_simple_regpool_free(p, r); -} - /* ============================================================ * Slot accessor * ============================================================ */ @@ -34,39 +26,6 @@ AASlot* aa64_slot_get(AAImpl* a, FrameSlot fs) { return &a->slots[fs - 1]; } -/* ============================================================ - * Register allocation / free - * ============================================================ */ - -static Reg aa_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty) { - AAImpl* a = impl_of(t); - (void)ty; - if (cls == RC_INT) return regpool_alloc(&a->int_pool); - if (cls == RC_FP) return regpool_alloc(&a->fp_pool); - compiler_panic(t->c, a->loc, "aarch64 alloc_reg: class %d unimpl", (int)cls); -} - -void aa_free_reg(CGTarget* t, Reg r, RegClass cls) { - AAImpl* a = impl_of(t); - RegPool* p; - switch (cls) { - case RC_INT: p = &a->int_pool; break; - case RC_FP: p = &a->fp_pool; break; - default: - compiler_panic(t->c, a->loc, "aarch64 free_reg: class %d unimpl", - (int)cls); - } - int rc = regpool_free(p, r); - if (rc == 1) return; - if (rc == -1) { - compiler_panic(t->c, a->loc, - "aarch64 free_reg: reg %u already free in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); - } - compiler_panic(t->c, a->loc, "aarch64 free_reg: reg %u not in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); -} - static int aa_resolve_reg_name(CGTarget* t, Sym name, Reg* out, RegClass* cls_out) { (void)t; @@ -105,7 +64,6 @@ static void aa_spill_reg(CGTarget* t, Operand src, FrameSlot slot, addr.type = ma.type; addr.v.frame_slot = slot; aa_store(t, addr, src, ma); - aa_free_reg(t, src.v.reg, src.cls); } static void aa_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, @@ -279,8 +237,6 @@ static void aa_continue_to(CGTarget* t, CGScope s) { /* Expose vtable entries to ops.c constructor via a registration helper. * ops.c calls this after the basic ops vtable is populated. */ void aa_alloc_vtable_init(CGTarget* t) { - t->alloc_reg = aa_alloc_reg; - t->free_reg = aa_free_reg; t->spill_reg = aa_spill_reg; t->reload_reg = aa_reload_reg; t->resolve_reg_name = aa_resolve_reg_name; diff --git a/src/arch/aarch64/internal.h b/src/arch/aarch64/internal.h @@ -276,9 +276,6 @@ typedef struct AAImpl { /* regpool (alloc.c) */ void regpool_init(RegPool* p, u8 base, u8 nregs); -Reg regpool_alloc(RegPool* p); -int regpool_free(RegPool* p, Reg r); - /* emit.c helpers used in alloc.c / ops.c */ void aa64_emit32(MCEmitter* mc, u32 word); void aa64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word); @@ -297,7 +294,6 @@ void aa_param(CGTarget* t, const CGParamDesc* p); /* alloc.c helpers used in emit.c / ops.c */ AAImpl* impl_of(CGTarget* t); AASlot* aa64_slot_get(AAImpl* a, FrameSlot fs); -void aa_free_reg(CGTarget* t, Reg r, RegClass cls); void aa_jump(CGTarget* t, Label l); /* ops.c helpers used in alloc.c */ diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -455,6 +455,8 @@ struct CGTarget { Compiler* c; ObjBuilder* obj; MCEmitter* mc; + u8 virtual_regs; + u8 pad0[3]; /* Optional. When non-NULL, per-instruction emit calls Debug to record * line rows; func_begin/func_end attribute PC ranges to the active @@ -468,24 +470,20 @@ struct CGTarget { void (*func_begin)(CGTarget*, const CGFuncDesc*); void (*func_end)(CGTarget*); - /* ---- registers and frame slots ---- - * At -O0 CG is TCC-style and owns the value stack: it decides which live - * values must be spilled/reloaded across register pressure, calls, and asm. - * Real targets return physical scratch registers and implement spill/reload - * mechanics; opt_cgtarget returns fresh virtual regs and ignores spills. */ - Reg (*alloc_reg)(CGTarget*, RegClass, CfreeCgTypeId); - void (*free_reg)(CGTarget*, Reg, RegClass); /* hint; opt_cgtarget ignores */ + /* ---- frame slots and spill/reload ---- + * CG and opt allocate caller-visible registers and pass concrete Operand + * regs to the target. Plain machine targets consume hard regs; opt_cgtarget + * sets virtual_regs and records virtual Reg ids as SSA values. */ FrameSlot (*frame_slot)(CGTarget*, const FrameSlotDesc*); void (*param)(CGTarget*, const CGParamDesc*); void (*spill_reg)(CGTarget*, Operand src_reg, FrameSlot, MemAccess); void (*reload_reg)(CGTarget*, Operand dst_reg, FrameSlot, MemAccess); /* ---- opt/back-end register coordination ---- - * At -O1 opt allocates virtual registers internally and maps them to - * hard registers or spill slots. The backend still owns prologue/epilogue - * and scratch-register policy. These hooks let the two sides agree on - * which physical registers are in play without opt hard-coding arch - * details. */ + * CGTarget users allocate caller-visible registers before invoking target + * ops. Direct CG uses these hooks to initialize its simple hard-reg + * allocator. opt uses them after IR rewriting to coordinate assigned hard + * regs and backend scratch policy without hard-coding arch details. */ /* Return the target's allocable hard register pool for `cls`. * Sets *out to a stable array and *nregs to its length. The array @@ -506,7 +504,7 @@ struct CGTarget { * func_end. The backend updates prologue/epilogue bookkeeping so it * saves/restores only the callee-saved subset that opt used. * - * At -O0 this is a no-op (the backend tracks usage via alloc_reg). */ + * Direct CG and opt both call this after emitting hard-register operands. */ void (*reserve_hard_regs)(CGTarget*, RegClass, const Reg* regs, u32 n); /* ---- labels and control flow ---- */ diff --git a/src/arch/regalloc.c b/src/arch/regalloc.c @@ -64,6 +64,12 @@ void cg_simple_regalloc_init(CGSimpleRegAlloc* a) { memset(a, 0, sizeof *a); } +void cg_simple_regalloc_init_virtual(CGSimpleRegAlloc* a) { + memset(a, 0, sizeof *a); + a->virtual_regs = 1; + a->next_virtual = 1; +} + void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, RegClass cls, Reg base, u32 nregs) { if ((u32)cls >= 3u) return; @@ -78,15 +84,25 @@ void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls, Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, RegClass cls) { if ((u32)cls >= 3u) return (Reg)REG_NONE; + if (a->virtual_regs) return a->next_virtual++; return cg_simple_regpool_alloc(&a->pools[cls]); } int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r) { if ((u32)cls >= 3u) return -2; + if (a->virtual_regs) { + (void)r; + return 1; + } return cg_simple_regpool_free(&a->pools[cls], r); } void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r) { if ((u32)cls >= 3u) return; + if (a->virtual_regs) return; cg_simple_regpool_reserve(&a->pools[cls], r); } + +int cg_simple_regalloc_is_virtual(const CGSimpleRegAlloc* a) { + return a->virtual_regs != 0; +} diff --git a/src/arch/regalloc.h b/src/arch/regalloc.h @@ -15,6 +15,8 @@ typedef struct CGSimpleRegPool { typedef struct CGSimpleRegAlloc { CGSimpleRegPool pools[3]; /* indexed by RegClass */ + u32 virtual_regs; + Reg next_virtual; } CGSimpleRegAlloc; void cg_simple_regpool_init_range(CGSimpleRegPool* p, Reg base, u32 nregs); @@ -26,6 +28,7 @@ void cg_simple_regpool_reserve(CGSimpleRegPool* p, Reg r); Reg cg_simple_regpool_reg_at(const CGSimpleRegPool* p, u32 idx); void cg_simple_regalloc_init(CGSimpleRegAlloc* a); +void cg_simple_regalloc_init_virtual(CGSimpleRegAlloc* a); void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, RegClass cls, Reg base, u32 nregs); void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls, @@ -33,5 +36,6 @@ void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls, Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, RegClass cls); int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r); void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r); +int cg_simple_regalloc_is_virtual(const CGSimpleRegAlloc* a); #endif diff --git a/src/arch/rv64/alloc.c b/src/arch/rv64/alloc.c @@ -2,34 +2,7 @@ #include "arch/rv64/internal.h" -/* ---- regs / frame ---- */ - -Reg rv_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty) { - RImpl* a = impl_of(t); - (void)ty; - if (cls == RC_INT) return regpool_alloc(&a->int_pool); - if (cls == RC_FP) return regpool_alloc(&a->fp_pool); - compiler_panic(t->c, a->loc, "rv64 alloc_reg: class %d unimpl", (int)cls); -} - -void rv_free_reg(CGTarget* t, Reg r, RegClass cls) { - RImpl* a = impl_of(t); - RegPool* p; - switch (cls) { - case RC_INT: p = &a->int_pool; break; - case RC_FP: p = &a->fp_pool; break; - default: - compiler_panic(t->c, a->loc, "rv64 free_reg: class %d unimpl", (int)cls); - } - int rc = regpool_free(p, r); - if (rc == 1) return; - if (rc == -1) { - compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u already free in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); - } - compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u not in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); -} +/* ---- frame ---- */ FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d) { RImpl* a = impl_of(t); @@ -176,7 +149,6 @@ void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot, addr.type = ma.type; addr.v.frame_slot = slot; rv_store(t, addr, src, ma); - rv_free_reg(t, src.v.reg, src.cls); } void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -112,12 +112,6 @@ static inline u32 reg_num(Operand op) { return op.v.reg & 0x1fu; } static inline void regpool_init(RegPool* p, u8 base, u8 nregs) { cg_simple_regpool_init_range(p, base, nregs); } -static inline Reg regpool_alloc(RegPool* p) { - return cg_simple_regpool_alloc(p); -} -static inline int regpool_free(RegPool* p, Reg r) { - return cg_simple_regpool_free(p, r); -} /* ---- emit.c: function lifecycle (referenced by ops.c vtable) ---- */ void rv_func_begin(CGTarget* t, const CGFuncDesc* fd); @@ -137,8 +131,6 @@ void emit_sp_addi(MCEmitter* mc, i64 imm); _Noreturn void rv_panic(CGTarget* t, const char* what); /* ---- alloc.c: all functions (non-static; referenced by ops.c vtable) ---- */ -Reg rv_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty); -void rv_free_reg(CGTarget* t, Reg r, RegClass cls); FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d); RvSlot* rv64_slot_get(RImpl* a, FrameSlot fs); void rv_param(CGTarget* t, const CGParamDesc* p); diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -1773,8 +1773,6 @@ CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->func_begin = rv_func_begin; t->func_end = rv_func_end; - t->alloc_reg = rv_alloc_reg; - t->free_reg = rv_free_reg; t->frame_slot = rv_frame_slot; t->param = rv_param; t->spill_reg = rv_spill_reg; diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c @@ -1,7 +1,7 @@ /* arch/x64/alloc.c — register pool, spill/reload, labels, control flow. * - * Covers: xpool_init/alloc/free, x_alloc_reg, x_free_reg, x_frame_slot, - * x64_slot_get, x_param, x_spill_reg, x_reload_reg, x_label_*, + * Covers: xpool_init, x_frame_slot, x64_slot_get, x_param, x_spill_reg, + * x_reload_reg, x_label_*, * emit_jmp_label, emit_jcc_label, x_jump, x64_force_reg_int, emit_cmp_ab, * x_cmp_branch, x_cmp, x_scope_*, x_break_to, x_continue_to. */ @@ -23,38 +23,9 @@ void xpool_init(XRegPool* p, const Reg* order, u32 nregs) { cg_simple_regpool_init_ordered(p, order, nregs); } -static Reg xpool_alloc(XRegPool* p) { - return cg_simple_regpool_alloc(p); -} - -static int xpool_free(XRegPool* p, Reg r) { - return cg_simple_regpool_free(p, r); -} - /* ============================================================ * Registers / frame */ -Reg x_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty) { - XImpl* a = impl_of(t); - (void)ty; - if (cls == RC_INT) return xpool_alloc(&a->int_pool); - if (cls == RC_FP) return xpool_alloc(&a->fp_pool); - compiler_panic(t->c, a->loc, "x64 alloc_reg: class %d unimpl", (int)cls); -} - -void x_free_reg(CGTarget* t, Reg r, RegClass cls) { - XImpl* a = impl_of(t); - XRegPool* p = (cls == RC_FP) ? &a->fp_pool : &a->int_pool; - int rc = xpool_free(p, r); - if (rc == 1) return; - if (rc == -1) { - compiler_panic(t->c, a->loc, "x64 free_reg: reg %u already free", - (unsigned)r); - } - compiler_panic(t->c, a->loc, "x64 free_reg: reg %u not in %s pool", - (unsigned)r, cls == RC_FP ? "fp" : "int"); -} - FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) { XImpl* a = impl_of(t); if (a->nslots == a->slots_cap) { @@ -179,7 +150,6 @@ void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, addr.type = ma.type; addr.v.frame_slot = slot; x_store(t, addr, src, ma); - x_free_reg(t, src.v.reg, src.cls); } void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -193,8 +193,6 @@ void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst, void xpool_init(XRegPool* p, const Reg* order, u32 nregs); XSlot* x64_slot_get(XImpl* a, FrameSlot fs); FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d); -Reg x_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty); -void x_free_reg(CGTarget* t, Reg r, RegClass cls); void x_param(CGTarget* t, const CGParamDesc* p); void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma); void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma); diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -1849,8 +1849,6 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->func_begin = x_func_begin; t->func_end = x_func_end; - t->alloc_reg = x_alloc_reg; - t->free_reg = x_free_reg; t->frame_slot = x_frame_slot; t->param = x_param; t->spill_reg = x_spill_reg; diff --git a/src/opt/ir.c b/src/opt/ir.c @@ -1,7 +1,7 @@ /* ir.c — Func/Block/Inst plumbing for the SSA IR (doc/OPT.md §1). * - * Each CGTarget call recorded by opt_cgtarget produces exactly one Inst - * (or none, for pure bookkeeping calls like alloc_reg). Storage is per- + * Each CGTarget call recorded by opt_cgtarget produces exactly one Inst. + * Storage is per- * Func arena, allocated against c->tu so the Func survives until * cgtarget_finalize. * @@ -9,10 +9,11 @@ * - VAL_NONE (= 0) is reserved; first allocated Val is 1. * - val_def_block / val_def_inst / val_type / val_cls are parallel * arrays indexed by Val. - * - Inst.opnds is Operand[] (not Val[]): Reg/Val are collapsed - * (doc/OPT.md §5.1) and OPK_REG operands' v.reg field IS the Val - * used at this site. Other OpKinds (IMM/LOCAL/GLOBAL/INDIRECT) are - * not Val uses for SSA dataflow. + * - Inst.opnds is Operand[] (not Val[]): virtual Reg/Val are collapsed + * (doc/OPT.md §5.1) and OPK_REG operands' v.reg field IS the Val used + * at this site. CG mints those virtual regs through the shared simple + * allocator when driving opt_cgtarget. Other OpKinds + * (IMM/LOCAL/GLOBAL/INDIRECT) are not Val uses for SSA dataflow. */ #include "opt/ir.h" @@ -60,6 +61,24 @@ Val ir_alloc_val(Func* f, CfreeCgTypeId t, u8 cls) { return v; } +void ir_ensure_val(Func* f, Val v, CfreeCgTypeId t, u8 cls) { + if (v == VAL_NONE) return; + if (f->nvals == 0) { + val_table_grow(f, 16); + f->nvals = 1; /* reserve slot 0 for VAL_NONE */ + } + if (v >= f->vals_cap) val_table_grow(f, v + 1u); + while (f->nvals <= v) { + f->val_def_block[f->nvals] = 0; + f->val_def_inst[f->nvals] = 0; + f->val_type[f->nvals] = 0; + f->val_cls[f->nvals] = RC_INT; + f->nvals++; + } + if (!f->val_type[v]) f->val_type[v] = t; + f->val_cls[v] = cls; +} + /* ---- blocks ---- */ u32 ir_block_new(Func* f) { diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -333,6 +333,7 @@ FrameSlot ir_frame_slot_new(Func*, const FrameSlotDesc*); void ir_param_add(Func*, const CGParamDesc*); Val ir_alloc_val(Func*, CfreeCgTypeId, u8 cls); +void ir_ensure_val(Func*, Val, CfreeCgTypeId, u8 cls); Inst* ir_emit(Func*, u32 block, IROp); diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -12,13 +12,13 @@ * - spill_reg / reload_reg are CG -O0 register-pressure * mechanics. CG never invokes them on real backends in v1, and * they're meaningless for opt's vreg space — calling them is a - * wiring bug, so we panic loudly. - * - free_reg is documented as a hint and is silently ignored. */ + * wiring bug, so we panic loudly. */ #include "opt/opt.h" #include <string.h> +#include "arch/regalloc.h" #include "core/arena.h" #include "core/core.h" #include "opt/ir.h" @@ -69,8 +69,24 @@ static int intrinsic_terminates(IntrinKind kind) { kind == INTRIN_UNREACHABLE; } +static void ensure_operand(Func* f, const Operand* op) { + if (!op) return; + if (op->kind == OPK_REG) { + ir_ensure_val(f, (Val)op->v.reg, op->type, op->cls); + } else if (op->kind == OPK_INDIRECT) { + ir_ensure_val(f, (Val)op->v.ind.base, 0, RC_INT); + } +} + +static void ensure_abivalue(Func* f, const CGABIValue* v) { + if (!v) return; + ensure_operand(f, &v->storage); + for (u32 i = 0; i < v->nparts; ++i) ensure_operand(f, &v->parts[i].op); +} + static Operand* dup_opnds(Func* f, const Operand* src, u32 n) { if (!n) return NULL; + for (u32 i = 0; i < n; ++i) ensure_operand(f, &src[i]); Operand* dst = arena_array(f->arena, Operand, n); memcpy(dst, src, sizeof(Operand) * n); return dst; @@ -115,18 +131,6 @@ static void w_func_end(CGTarget* t); /* ---- registers and frame slots ---- */ -static Reg w_alloc_reg(CGTarget* t, RegClass cls, CfreeCgTypeId ty) { - OptImpl* o = impl_of(t); - Val v = ir_alloc_val(o->f, ty, (u8)cls); - return (Reg)v; -} - -static void w_free_reg(CGTarget* t, Reg r, RegClass cls) { - (void)t; - (void)r; - (void)cls; -} - static FrameSlot w_frame_slot(CGTarget* t, const FrameSlotDesc* d) { OptImpl* o = impl_of(t); return ir_frame_slot_new(o->f, d); @@ -544,20 +548,26 @@ static void w_call(CGTarget* t, const CGCallDesc* d) { OptImpl* o = impl_of(t); Inst* in = rec(o, IR_CALL); IRCallAux* aux = arena_znew(o->f->arena, IRCallAux); + ensure_operand(o->f, &d->callee); aux->desc = *d; if (d->nargs) { CGABIValue* args = arena_array(o->f->arena, CGABIValue, d->nargs); for (u32 i = 0; i < d->nargs; ++i) { + ensure_abivalue(o->f, &d->args[i]); args[i] = d->args[i]; args[i].parts = dup_parts(o->f->arena, d->args[i].parts, d->args[i].nparts); } aux->desc.args = args; } + ensure_abivalue(o->f, &d->ret); aux->desc.ret = d->ret; aux->desc.ret.parts = dup_parts(o->f->arena, d->ret.parts, d->ret.nparts); in->extra.aux = aux; in->type = d->fn_type; + if (d->ret.storage.kind == OPK_REG) { + set_def(o->f, in, o->cur, (Val)d->ret.storage.v.reg, d->ret.type); + } } static void w_ret(CGTarget* t, const CGABIValue* v) { @@ -565,6 +575,7 @@ static void w_ret(CGTarget* t, const CGABIValue* v) { Inst* in = rec(o, IR_RET); IRRetAux* aux = arena_znew(o->f->arena, IRRetAux); if (v) { + ensure_abivalue(o->f, v); aux->present = 1; aux->val = *v; aux->val.parts = dup_parts(o->f->arena, v->parts, v->nparts); @@ -678,6 +689,16 @@ static void w_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, in->extra.aux = aux; if (prior.kind == OPK_REG) set_def(o->f, in, o->cur, (Val)prior.v.reg, prior.type); + if (ok.kind == OPK_REG) { + in->ndefs = 2; + in->defs = arena_array(o->f->arena, Val, 2); + in->defs[0] = (prior.kind == OPK_REG) ? (Val)prior.v.reg : VAL_NONE; + in->defs[1] = (Val)ok.v.reg; + if (in->defs[1] != VAL_NONE && in->defs[1] < o->f->nvals) { + o->f->val_def_block[in->defs[1]] = o->cur; + o->f->val_def_inst[in->defs[1]] = o->f->blocks[o->cur].ninsts - 1u; + } + } } static void w_fence(CGTarget* t, MemOrder mo) { @@ -696,8 +717,14 @@ static void w_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, aux->narg = na; aux->dsts = nd ? arena_array(o->f->arena, Operand, nd) : NULL; aux->args = na ? arena_array(o->f->arena, Operand, na) : NULL; - if (nd) memcpy(aux->dsts, dsts, sizeof(Operand) * nd); - if (na) memcpy(aux->args, args, sizeof(Operand) * na); + if (nd) { + memcpy(aux->dsts, dsts, sizeof(Operand) * nd); + for (u32 i = 0; i < nd; ++i) ensure_operand(o->f, &aux->dsts[i]); + } + if (na) { + memcpy(aux->args, args, sizeof(Operand) * na); + for (u32 i = 0; i < na; ++i) ensure_operand(o->f, &aux->args[i]); + } in->extra.aux = aux; if (nd == 1 && dsts[0].kind == OPK_REG) { set_def(o->f, in, o->cur, (Val)dsts[0].v.reg, dsts[0].type); @@ -748,18 +775,34 @@ static void w_asm_block(CGTarget* t, const char* tmpl, memcpy(aux->outs, outs, nout * sizeof *outs); aux->out_ops = arena_array(o->f->arena, Operand, nout); memcpy(aux->out_ops, out_ops, nout * sizeof *out_ops); + for (u32 i = 0; i < nout; ++i) ensure_operand(o->f, &aux->out_ops[i]); } if (nin) { aux->ins = arena_array(o->f->arena, AsmConstraint, nin); memcpy(aux->ins, ins, nin * sizeof *ins); aux->in_ops = arena_array(o->f->arena, Operand, nin); memcpy(aux->in_ops, in_ops, nin * sizeof *in_ops); + for (u32 i = 0; i < nin; ++i) ensure_operand(o->f, &aux->in_ops[i]); } if (nclob) { aux->clobbers = arena_array(o->f->arena, Sym, nclob); memcpy(aux->clobbers, clobbers, nclob * sizeof *clobbers); } in->extra.aux = aux; + if (nout) { + in->ndefs = nout; + in->defs = arena_array(o->f->arena, Val, nout); + for (u32 i = 0; i < nout; ++i) { + in->defs[i] = + (out_ops[i].kind == OPK_REG) ? (Val)out_ops[i].v.reg : VAL_NONE; + if (in->defs[i] != VAL_NONE && in->defs[i] < o->f->nvals) { + o->f->val_def_block[in->defs[i]] = o->cur; + o->f->val_def_inst[in->defs[i]] = o->f->blocks[o->cur].ninsts - 1u; + } + } + in->def = in->defs[0]; + in->type = out_ops[0].type; + } } static void w_set_loc(CGTarget* t, SrcLoc loc) { @@ -782,6 +825,7 @@ typedef struct ReplayCtx { u8* val_alloced; u8* block_label_placed; u8 identity_regs; + CGSimpleRegAlloc regalloc; } ReplayCtx; static Reg val_to_target_reg(ReplayCtx* r, Val v) { @@ -794,7 +838,11 @@ static Reg val_to_target_reg(ReplayCtx* r, Val v) { } if (!r->val_alloced[v]) { r->val_to_reg[v] = - r->tgt->alloc_reg(r->tgt, (RegClass)f->val_cls[v], f->val_type[v]); + cg_simple_regalloc_alloc(&r->regalloc, (RegClass)f->val_cls[v]); + if (r->val_to_reg[v] == (Reg)REG_NONE) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(r->c, loc, "opt replay: hard reg pool exhausted"); + } r->val_alloced[v] = 1; } return r->val_to_reg[v]; @@ -1165,6 +1213,7 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { r.f = f; r.tgt = w; r.identity_regs = identity ? 1u : 0u; + cg_simple_regalloc_init(&r.regalloc); u32 nv = f->nvals ? f->nvals : 1u; r.val_to_reg = arena_zarray(f->arena, Reg, nv); for (u32 i = 0; i < nv; ++i) r.val_to_reg[i] = REG_NONE; @@ -1183,6 +1232,17 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { * dereference them so we don't translate. */ w->func_begin(w, &f->desc); + if (!r.identity_regs) { + for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) { + const Reg* regs = NULL; + u32 nregs = 0; + if (w->get_allocable_regs) + w->get_allocable_regs(w, (RegClass)cidx, &regs, &nregs); + if (regs && nregs) + cg_simple_regalloc_set_ordered(&r.regalloc, (RegClass)cidx, regs, nregs); + } + } + for (u32 i = 0; i < f->nframe_slots; ++i) { IRFrameSlot* s = &f->frame_slots[i]; FrameSlotDesc d = {0}; @@ -1216,7 +1276,7 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { } /* At -O1, opt managed allocation and emitted hard regs directly, - * bypassing the backend's alloc_reg. Tell the backend which hard + * bypassing backend-local allocation. Tell the backend which hard * regs were actually assigned so it can save the right callee-saved * subset in prologue/epilogue. * @@ -1240,6 +1300,17 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { } if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused); } + } else if (!r.identity_regs && w->reserve_hard_regs) { + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { + CGSimpleRegPool* p = &r.regalloc.pools[c]; + Reg used[CG_SIMPLE_REGALLOC_MAX_REGS]; + u32 nused = 0; + for (u32 i = 0; i < p->hwm && i < p->nregs; ++i) { + Reg hr = cg_simple_regpool_reg_at(p, i); + if (hr != (Reg)REG_NONE) used[nused++] = hr; + } + if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused); + } } w->func_end(w); @@ -1323,12 +1394,11 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) { t->obj = target->obj; t->mc = target->mc; t->debug = target->debug; + t->virtual_regs = 1; t->func_begin = w_func_begin; t->func_end = w_func_end; - t->alloc_reg = w_alloc_reg; - t->free_reg = w_free_reg; t->frame_slot = w_frame_slot; t->param = w_param; t->spill_reg = w_spill_reg; diff --git a/src/opt/opt.h b/src/opt/opt.h @@ -6,8 +6,8 @@ /* opt_cgtarget: a CGTarget wrapper that records each function as IR. * - * - alloc_reg returns a fresh virtual reg per call (typed). The Reg space is - * unbounded for opt_cgtarget; free_reg is treated as a hint and ignored. + * - opt_cgtarget advertises virtual_regs. CG mints unbounded virtual Reg ids + * through the shared simple allocator and passes them to normal emit calls. * - Every other emit-side call is recorded into the current block as one * SSA Inst (with the current SrcLoc from set_loc). * - On CGTarget.func_end it runs the intra-procedural pipeline (down through diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -754,7 +754,7 @@ static void opt_emit_no_virtual_alloc(void) { fd.fn_type = cfree_cg_type_func(tc.c, sig); opt->func_begin(opt, &fd); - Reg a = opt->alloc_reg(opt, RC_INT, tc.i32); + Reg a = 1; opt->load_imm(opt, op_reg_(a, tc.i32), 42); CGABIValue retv = {0}; retv.type = tc.i32;