kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 3f5436216eb8b140100c44135c7ae268dd04781e
parent 71ff7998381c7bd7718d0b7e1c7e38d4ef613d59
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 12:06:07 -0700

Clean up O1 preg namespace

Diffstat:
Mdoc/MIR_RA_REPORT.md | 8++++----
Mdoc/OPT.md | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Mdoc/PERF.md | 4++--
Ddoc/VIRTUAL_REGS.md | 113-------------------------------------------------------------------------------
Msrc/opt/ir.c | 15++-------------
Msrc/opt/ir.h | 8++++----
Msrc/opt/opt.c | 73++++++++++++++++++++++++++++++++++++-------------------------------------
Msrc/opt/opt.h | 28++++++++++++++--------------
Msrc/opt/opt_internal.h | 25+++++++++++++++++++++++++
Msrc/opt/opt_util.c | 4++--
Msrc/opt/pass_analysis.c | 11+++++++----
Msrc/opt/pass_emit.c | 33+++++++++++++++++----------------
Msrc/opt/pass_live.c | 448++++++++++++++++++++++++++++++++++++++++---------------------------------------
Msrc/opt/pass_lower.c | 292++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mtest/opt/opt_test.c | 105++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mtest/opt/phase0_guardrails.sh | 4++--
16 files changed, 610 insertions(+), 657 deletions(-)

diff --git a/doc/MIR_RA_REPORT.md b/doc/MIR_RA_REPORT.md @@ -283,7 +283,7 @@ typedef struct OptLiveRange { typedef struct OptLiveRangeSet { OptLiveRange* ranges; u32 nranges; - u32* first_range_by_val; + u32* first_range_by_preg; u32 point_count; } OptLiveRangeSet; ``` @@ -557,7 +557,7 @@ a clean fast implementation over shims for old pass boundaries. - [x] `opt.ranges` - [x] `opt.range_points` - [x] `opt.range.point_visits` - - [x] `opt.range.value_scans` + - [x] `opt.range.preg_scans` - [x] `opt.range.live_words_touched` - [x] `opt.alloc.used_loc_words` - [x] `opt.alloc.hard_loc_words` @@ -796,8 +796,8 @@ Phase 8 cleanup status: trailing zero words, and bound copy/ior/and-not work by active words rather than the function's maximum value id. - [x] Reduce `opt.live_ranges.regalloc` on spill-heavy wide functions. - Current `opt.range.value_scans` is linear, so investigate live-word - touches, range insertion/compression, final per-value summaries, and + Current `opt.range.preg_scans` is linear, so investigate live-word + touches, range insertion/compression, final per-preg summaries, and live-across-call bookkeeping in `src/opt/pass_live.c`. - [x] Reduce `opt.rewrite.live_words_touched`. Rewrite no longer stores `Block.live_after`, but its rolling live set still grows faster than diff --git a/doc/OPT.md b/doc/OPT.md @@ -42,7 +42,8 @@ parse -> cg -> opt_cgtarget { record Func IR } -O2 func_end/finalize: build_cfg - block_cloning / addr_xform setup + build_reg_ssa + block_cloning build_ssa gvn copy_prop @@ -363,6 +364,7 @@ recording, CFG, machinize, liveness, allocation, combine, DCE, or emission. ``` build_cfg +build_reg_ssa if no address-transform candidates: block_cloning build_ssa @@ -415,20 +417,32 @@ Current implementation subset: build_cfg jump_cleanup(CFG) build_cfg +verify(o2-pre-ssa-cfg) +build_reg_ssa +verify(o2-reg-ssa) block_cloning -ssa_dce -copy_cleanup +verify(o2-block-clone-cfg) build_ssa +verify(o2-ssa) ssa_dce copy_cleanup +verify(o2-pre-addr-cleanup) addr_xform +verify(o2-addr-xform-ssa) ssa_dce +verify(o2-addr-xform-dce) copy_cleanup +verify(o2-addr-xform-copy-cleanup) +ssa_dce +verify(o2-copy-dce) make_conventional_ssa +verify(o2-conventional-ssa) undo_ssa copy_cleanup +verify(o2-undo-copy-cleanup) build_cfg -machinize ... opt_emit +verify(o2-undo-ssa) +shared O1 lowering pipeline (machinize ... jump_cleanup ... opt_emit) ``` Every CFG or value-use mutation above is followed by the relevant verifier @@ -467,9 +481,9 @@ The optimizer is no longer just a stub: Current behavior: - Level `1` records IR, builds CFG, machinizes, builds loop frequencies, - computes liveness, runs the current priority allocator/rewrite path, - combines, runs post-RA DCE, cleans jumps/layout, and emits through the - wrapped target. + computes PReg liveness and live ranges, runs the current priority + allocator/rewrite path over mutable PRegs, combines, runs post-RA DCE, + cleans jumps/layout, and emits through the wrapped target. - Level `2` has its own scheduler entry point. It now converts mutable pseudo-registers to SSA values, builds real mem2reg SSA, verifies phi/use-def invariants, runs the first small O2 transforms, lowers phis through @@ -501,9 +515,9 @@ Current behavior: Temporary defensive/pessimizing checks to lift: -- [ ] O2 register-backed locals/params are forced to frame storage before - mem2reg. Lift this when mutable pseudo-register SSA represents source-local - storage identity directly instead of relying on frame-slot promotion. +- [ ] O2 scalar locals/params that would be register-backed at O1 are still + initially recorded as frame slots for mem2reg. Lift this when source-local + storage identity can be represented directly through pseudo-register SSA. - [ ] O2 functions containing `IR_ASM_BLOCK` route through the non-SSA lowering path. Lift this when inline-asm tied operands, inout operands, early-clobbers, fixed-register constraints, and clobber liveness are modeled @@ -534,7 +548,9 @@ the exact pass being introduced, then expand through the CG corpus. Status: implemented as the public level-1 path. Keep extending tests and target-specific legality checks here, but do not make O1 depend on SSA/value -passes. +passes. The final O1 namespace is mutable `PReg`: `ir_ensure_preg` grows only +the pseudo-register metadata table, and O1 liveness, allocation, rewrite, and +metrics are PReg-indexed. Goal: make `-O1` stop replaying and emit through the optimized backend path without SSA value passes. @@ -560,9 +576,8 @@ Keep the allocator simple but not naive: Exit criteria: -- `CFREE_TOY_OPT_LEVELS="0 1" make test-toy` passes for targeted AArch64 - cases. -- Add focused allocation cases for call-clobber saves, stack spills, tied +- [x] `CFREE_TOY_OPT_LEVELS="0 1" make test-toy` passes for targeted cases. +- [x] Focused allocation cases cover call-clobber saves, stack spills, tied hard regs from inline asm, and values live through branches. ### Phase B - O2 Analysis Substrate @@ -612,6 +627,12 @@ Exit criteria: ### Phase C - Full Allocation Infrastructure +Status: pending quality work. The current priority allocator/rewrite path is +shared by O1 and O2 lowering and accepts the O2 live-range-splitting policy +bit, but coalescing and splitting are not implemented yet. Several small O2 +SSA cleanup passes landed first; this phase remains the allocation-quality +milestone before heavier value/memory optimization. + Goal: bring `-O2` allocation quality up to MIR's model before value passes start changing code aggressively. @@ -627,7 +648,7 @@ Exit criteria: - `-O1` remains green. - `-O2` can use the same lowering path with coalescing/splitting enabled, even - if all SSA value passes are still disabled. + while preserving the current small SSA cleanup passes. ### Phase D - SSA-Backed Value and Memory Passes @@ -641,15 +662,16 @@ Implement in order: 2. [x] `opt_copy_cleanup` 3. [x] `opt_block_cloning` 4. [x] `opt_addr_xform` -5. `opt_gvn` -6. `opt_copy_prop` -7. `opt_dse` -8. `opt_licm` -9. `opt_pressure_relief` -10. `opt_make_conventional_ssa` -11. `opt_ssa_combine` -12. `opt_undo_ssa` -13. `opt_jump_opt` +5. [ ] scalar-only `opt_gvn` +6. [ ] `opt_copy_prop` +7. [ ] memory-aware `opt_gvn` / redundant load elimination design and tests +8. [ ] `opt_dse` +9. [ ] `opt_licm` +10. [ ] `opt_pressure_relief` +11. [x] `opt_make_conventional_ssa` +12. [ ] `opt_ssa_combine` +13. [x] `opt_undo_ssa` +14. [ ] full O2 `opt_jump_opt` beyond the shared O1 jump cleanup Do not batch these into one landing. Each pass needs a pass-local corpus case that fails red without the pass or its bug fix. @@ -665,7 +687,7 @@ after scalar GVN is green and before `opt_gvn` starts rewriting `IR_LOAD` or reasoning across `IR_STORE`, calls, atomics, fences, volatile operations, TLS, globals, or unknown memory. -Exit criteria: +Completed pre-GVN slice exit criteria: - [x] `make test-opt` - [x] Focused toy O2 branch/join/address-memory subset: @@ -675,7 +697,17 @@ Exit criteria: - [x] Disassembly sanity check shows narrow churn: existing `10_pointer_addr_deref` removes separate address materialization in O2, while the new branch/join/address fixture has only minor register-choice churn. -- [x] Pass-local dump tests prove each later intended rewrite fires. +- [x] Pass-local dump tests prove the implemented SSA DCE, copy cleanup, block + cloning, and address-folding rewrites fire. + +Remaining Phase D exit criteria: + +- [ ] Scalar GVN pass-local dump tests prove redundant pure expressions are + replaced and constant branches fold. +- [ ] Memory numbering and alias invalidation are documented before any memory + GVN or redundant-load rewrite lands. +- [ ] DSE, LICM, pressure relief, SSA combine, and full O2 jump optimization + each have focused red-green tests before they enter the default O2 schedule. ### Phase E - Inlining and Cleanup @@ -753,8 +785,9 @@ src/opt/ pass_jump.c jump optimization pass_machinize.c target ABI and machine-form lowering pass_live.c liveness, pressure, live ranges - pass_coalesce.c move collection and coalescing - pass_regalloc.c assignment, rewrite, splitting + pass_lower.c current assignment/rewrite implementation + pass_coalesce.c move collection and coalescing (split out when implemented) + pass_regalloc.c assignment, rewrite, splitting (split out when warranted) pass_combine.c target-aware code selection pass_emit.c drive wrapped CGTarget pass_inline.c inlining and cleanup @@ -770,9 +803,10 @@ specifics belong behind `Target`, `TargetABI`, or explicit helper hooks. - No VLAs and no global optimizer state. MIR uses generator context; cfree should hang all pass state off `Compiler`, `Func`, `FuncSet`, or explicit pass contexts. -- `Reg` and `Val` identity remains valid while recording. Lowering may map - Vals to new physical registers or stack slots, but that mapping belongs to - allocation state. +- `PReg` names mutable virtual-register storage before reg-SSA. `Val` names + SSA values after pseudo-register SSA construction. Lowering may map PRegs or + post-SSA Vals to physical registers or stack slots, but that mapping belongs + to allocation state. - Frame slots remain frame slots until a pass proves promotion is legal. - Calls are memory barriers unless alias information proves otherwise. - Inline asm is side-effecting and may pin hard regs; the allocator and diff --git a/doc/PERF.md b/doc/PERF.md @@ -40,11 +40,11 @@ Top-level `cfree run --time` scopes: Important O1 counters: -- `opt.funcs`, `opt.blocks`, `opt.insts`, `opt.vals` +- `opt.funcs`, `opt.blocks`, `opt.insts`, `opt.pregs` - `opt.live.block_bytes`, `opt.live.active_words`, `opt.live.bitset_words_touched` - `opt.ranges`, `opt.range_points`, `opt.range_raw_points`, - `opt.range.point_visits`, `opt.range.value_scans`, + `opt.range.point_visits`, `opt.range.preg_scans`, `opt.range.live_words_touched` - `opt.alloc.used_loc_words`, `opt.alloc.hard_loc_words`, `opt.alloc.stack_loc_words`, `opt.alloc.stack_slots` diff --git a/doc/VIRTUAL_REGS.md b/doc/VIRTUAL_REGS.md @@ -1,113 +0,0 @@ -# Mutable Virtual Registers - -## Issue - -`opt_cgtarget` used to record `OPK_REG` operands as if the virtual register -number was also the IR value number. The old contract was effectively: - -```text -virtual Reg id == SSA-ish Val id -``` - -That is too strong for the CG layer. CG virtual registers should be legal -mutable pseudo registers: a register names a persistent storage location, and -instructions may read and write that location many times. - -The x64 jump-table bug exposed the mismatch. Switch lowering computes: - -```text -idx = selector - min -if idx > span - 1 goto default -addr = table[idx] -indirect_branch addr -``` - -In the optimized x64 path, delayed materialization and address lowering could -reuse the same virtual register name for different program-point values. Since -opt interpreted that register name as one value identity, the table scale could -use a stale source instead of the checked index. O0/direct x64 mechanics were -not the problem; the broken piece was the optimized IR recording model. - -Register-backed CG locals did not have the same failure mode because they -carried separate local storage identity through `IRLocal`/`source_local`. Plain -virtual temporaries did not; they were only `Val`s. - -## Desired Model - -Use a MIR-style split: - -```text -virtual Reg id == mutable pseudo-register storage -Val id == one produced value/version -``` - -At CG/recording level, `OPK_REG` should mean a mutable pseudo register. It is -valid to emit destructive operations such as: - -```text -r = r + 1 -r = f(r) -``` - -Optimization passes that require SSA may build SSA values from these mutable -pseudos and later destroy SSA. O1 should not require SSA. - -## O1 Plan - -Teach the non-SSA opt path to reason about mutable virtual registers directly: - -1. Keep a virtual-register namespace distinct from `Val`. -2. Record each instruction's input and output operands explicitly. -3. Run liveness over mutable pseudo registers: - - input operands are uses - - output operands are defs/kills - - memory base/index registers are uses - - calls add ABI hard-reg clobbers and argument uses -4. Register allocation assigns each pseudo register to a hard register or spill - slot based on those live ranges. -5. Rewriting replaces pseudo registers with their assigned locations. - -This is the same broad model MIR uses in its `-O1` path: mutable pseudo -registers plus classic dataflow, without building SSA. - -## O2 Plan - -SSA becomes an optimization representation, not the base virtual-register -semantics. - -1. Build SSA from mutable pseudo registers after CFG construction. -2. Insert phis at joins for pseudos with multiple reaching definitions. -3. Rename each pseudo-register definition to a fresh `Val`. -4. Rewrite uses to the reaching `Val`. -5. Run SSA optimizations. -6. Lower out of SSA into copies/mutable pseudos before normal register - allocation and emission. - -## Migration Notes - -- Do not add new CG restrictions that make virtual registers single-assignment. -- Do not fix individual stale-register cases by assuming virtual registers are - immutable values. -- Register-backed locals and params need careful handling because they already - have storage identity. The final model should make that identity explicit - instead of relying on the virtual register number also being a value number. - -## Current Implementation - -The optimizer now has an explicit pseudo-register namespace: - -- `PReg` names mutable CG virtual-register storage. -- `Val` names SSA values produced by SSA construction and optimization. -- `Func.preg_type` / `Func.preg_cls` are dense metadata tables indexed by - `PReg`; `Func.val_type` / `Func.val_cls` remain indexed by `Val`. -- During initial recording and O1 lowering, `OPK_REG` operands carry `PReg` - ids. O2 calls `opt_build_reg_ssa` to insert register phis and rewrite - register operands/defs to `Val` ids before mem2reg SSA. - -There is one transitional compatibility layer: `ir_ensure_preg` reserves -matching shadow `Val` slots for old O1 liveness/allocation arrays that are -still indexed by the numeric operand id. Those slots are not the ownership -model for CG virtual registers; the pseudo-register table is. - -Remaining pessimizing guardrails are tracked in `doc/OPT.md` so they can be -lifted after the dependent O2 modeling work lands. diff --git a/src/opt/ir.c b/src/opt/ir.c @@ -11,9 +11,6 @@ * arrays indexed by Val. * - preg_type / preg_cls are parallel arrays indexed by mutable CG virtual * Reg id. Before opt_build_reg_ssa, OPK_REG operands name these pseudos. - * - O1/non-SSA lowering still uses shadow Val slots for allocation bitsets - * until the whole allocator is renamed from Val to PReg. Those slots are - * reserved by ir_ensure_preg; SSA values allocated later remain distinct. */ #include "opt/ir.h" @@ -110,12 +107,6 @@ void ir_ensure_preg(Func* f, PReg r, CfreeCgTypeId t, u8 cls) { } if (!f->preg_type[r]) f->preg_type[r] = t; f->preg_cls[r] = cls; - - /* Transitional compatibility: O1 allocation/liveness arrays are still - * indexed by the numeric operand id. Reserve matching Val slots so those - * passes keep their existing bounds and metadata while the source of truth - * is the pseudo-register table above. */ - ir_ensure_val(f, (Val)r, t, cls); } PReg ir_alloc_preg(Func* f, CfreeCgTypeId t, u8 cls) { @@ -155,8 +146,7 @@ void ir_block_set_nsucc(Func* f, u32 block, u32 n) { bl = &f->blocks[block]; if (n > bl->succ_cap) { u32* nb = arena_zarray(f->arena, u32, n); - if (bl->succ && bl->nsucc) - memcpy(nb, bl->succ, sizeof(u32) * bl->nsucc); + if (bl->succ && bl->nsucc) memcpy(nb, bl->succ, sizeof(u32) * bl->nsucc); bl->succ = nb; bl->succ_cap = n; } @@ -174,8 +164,7 @@ void ir_note_emit(Func* f, u32 block) { if (f->emit_order_n == f->emit_order_cap) { u32 ncap = f->emit_order_cap ? f->emit_order_cap * 2u : 8u; u32* nb = arena_array(f->arena, u32, ncap); - if (f->emit_order) - memcpy(nb, f->emit_order, sizeof(u32) * f->emit_order_n); + if (f->emit_order) memcpy(nb, f->emit_order, sizeof(u32) * f->emit_order_n); f->emit_order = nb; f->emit_order_cap = ncap; } diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -327,7 +327,7 @@ typedef enum OptAllocKind { OPT_ALLOC_SPILL, } OptAllocKind; -typedef struct OptValInfo { +typedef struct OptPRegInfo { u32 first_pos; u32 last_pos; u32 live_length; @@ -343,8 +343,8 @@ typedef struct OptValInfo { u8 alloc_kind; /* OptAllocKind */ u8 cls; /* RegClass */ u8 pad[2]; - u32 forbidden_hard_regs; /* bit r means Val may not allocate hard reg r. */ -} OptValInfo; + u32 forbidden_hard_regs; /* bit r means PReg may not allocate hard reg r. */ +} OptPRegInfo; typedef enum OptUseKind { OPT_USE_OPERAND, @@ -436,7 +436,7 @@ typedef struct Func { u64 opt_rewrite_live_words_touched; u64 opt_dde_live_words_touched; InstId next_inst_id; - OptValInfo* val_info; /* indexed by Val, length nvals */ + OptPRegInfo* preg_info; /* indexed by the current allocation reg namespace */ OptUse* opt_uses; u32 opt_nuses, opt_uses_cap; diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -57,12 +57,10 @@ static Inst* rec(OptImpl* o, IROp op) { static void set_preg_def(Func* f, Inst* in, u32 block, PReg r, CfreeCgTypeId t) { + (void)f; + (void)block; in->def = (Val)r; in->type = t; - if (r != PREG_NONE && (Val)r < f->nvals) { - f->val_def_block[(Val)r] = block; - f->val_def_inst[(Val)r] = f->blocks[block].ninsts - 1u; - } } static int intrinsic_terminates(IntrinKind kind) { @@ -442,8 +440,6 @@ static CGLocalStorage w_param(CGTarget* t, const CGParamDesc* d) { in->opnds[0].type = d->type; in->opnds[0].v.reg = st.v.reg; in->nopnds = 1; - o->f->val_def_block[st.v.reg] = o->cur; - o->f->val_def_inst[st.v.reg] = o->f->blocks[o->cur].ninsts - 1u; } return st; } @@ -808,7 +804,8 @@ static void w_load_imm(CGTarget* t, Operand dst, i64 imm) { in->opnds = dup_opnds(o->f, ops, 1); in->nopnds = 1; in->extra.imm = imm; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_load_const(CGTarget* t, Operand dst, ConstBytes cb) { @@ -823,7 +820,8 @@ static void w_load_const(CGTarget* t, Operand dst, ConstBytes cb) { memcpy(bytes, cb.bytes, cb.size); in->extra.cbytes.bytes = bytes; } - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_copy(CGTarget* t, Operand dst, Operand src) { @@ -832,7 +830,8 @@ static void w_copy(CGTarget* t, Operand dst, Operand src) { Operand ops[2] = {dst, src}; in->opnds = dup_opnds(o->f, ops, 2); in->nopnds = 2; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_load(CGTarget* t, Operand dst, Operand addr, MemAccess m) { @@ -842,7 +841,8 @@ static void w_load(CGTarget* t, Operand dst, Operand addr, MemAccess m) { in->opnds = dup_opnds(o->f, ops, 2); in->nopnds = 2; in->extra.mem = m; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_store(CGTarget* t, Operand addr, Operand src, MemAccess m) { @@ -860,7 +860,8 @@ static void w_addr_of(CGTarget* t, Operand dst, Operand lv) { Operand ops[2] = {dst, lv}; in->opnds = dup_opnds(o->f, ops, 2); in->nopnds = 2; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { @@ -873,7 +874,8 @@ static void w_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) { aux->sym = sym; aux->addend = addend; in->extra.aux = aux; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_copy_bytes(CGTarget* t, Operand dst, Operand src, @@ -910,7 +912,8 @@ static void w_bitfield_load(CGTarget* t, Operand dst, Operand record, IRBitFieldAux* aux = arena_znew(o->f->arena, IRBitFieldAux); aux->access = bf; in->extra.aux = aux; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_bitfield_store(CGTarget* t, Operand record, Operand src, @@ -934,7 +937,8 @@ static void w_binop(CGTarget* t, BinOp op, Operand dst, Operand a, Operand b) { in->opnds = dup_opnds(o->f, ops, 3); in->nopnds = 3; in->extra.imm = (i64)op; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_unop(CGTarget* t, UnOp op, Operand dst, Operand a) { @@ -944,7 +948,8 @@ static void w_unop(CGTarget* t, UnOp op, Operand dst, Operand a) { in->opnds = dup_opnds(o->f, ops, 2); in->nopnds = 2; in->extra.imm = (i64)op; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { @@ -954,7 +959,8 @@ static void w_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { in->opnds = dup_opnds(o->f, ops, 3); in->nopnds = 3; in->extra.imm = (i64)op; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { @@ -964,7 +970,8 @@ static void w_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { in->opnds = dup_opnds(o->f, ops, 2); in->nopnds = 2; in->extra.imm = (i64)k; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } /* ---- calls / return ---- */ @@ -1056,7 +1063,8 @@ static void w_alloca_(CGTarget* t, Operand dst, Operand size, u32 align) { in->opnds = dup_opnds(o->f, ops, 2); in->nopnds = 2; in->extra.imm = (i64)align; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_va_start_(CGTarget* t, Operand ap) { @@ -1074,7 +1082,8 @@ static void w_va_arg_(CGTarget* t, Operand dst, Operand ap, CfreeCgTypeId ty) { in->opnds = dup_opnds(o->f, ops, 2); in->nopnds = 2; in->extra.aux = (void*)(uintptr_t)ty; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_va_end_(CGTarget* t, Operand ap) { @@ -1104,7 +1113,8 @@ static void w_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess m, aux->mem = m; aux->mo = mo; in->extra.aux = aux; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_atomic_store(CGTarget* t, Operand addr, Operand src, MemAccess m, @@ -1132,7 +1142,8 @@ static void w_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr, aux->mo = mo; aux->op = (u8)op; in->extra.aux = aux; - if (dst.kind == OPK_REG) set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); + if (dst.kind == OPK_REG) + set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type); } static void w_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, @@ -1155,10 +1166,6 @@ static void w_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr, in->defs = arena_array(o->f->arena, Val, 2); in->defs[0] = (prior.kind == OPK_REG) ? (Val)prior.v.reg : VAL_NONE; in->defs[1] = (Val)ok.v.reg; - if (in->defs[1] != VAL_NONE && in->defs[1] < o->f->nvals) { - o->f->val_def_block[in->defs[1]] = o->cur; - o->f->val_def_inst[in->defs[1]] = o->f->blocks[o->cur].ninsts - 1u; - } } } @@ -1194,10 +1201,6 @@ static void w_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, in->defs = arena_array(o->f->arena, Val, nd); for (u32 i = 0; i < nd; ++i) { in->defs[i] = (dsts[i].kind == OPK_REG) ? (Val)dsts[i].v.reg : VAL_NONE; - if (in->defs[i] != VAL_NONE && in->defs[i] < o->f->nvals) { - o->f->val_def_block[in->defs[i]] = o->cur; - o->f->val_def_inst[in->defs[i]] = o->f->blocks[o->cur].ninsts - 1u; - } } in->def = in->defs[0]; in->type = dsts[0].type; @@ -1254,10 +1257,6 @@ static void w_asm_block(CGTarget* t, const char* tmpl, for (u32 i = 0; i < nout; ++i) { in->defs[i] = (out_ops[i].kind == OPK_REG) ? (Val)out_ops[i].v.reg : VAL_NONE; - if (in->defs[i] != VAL_NONE && in->defs[i] < o->f->nvals) { - o->f->val_def_block[in->defs[i]] = o->cur; - o->f->val_def_inst[in->defs[i]] = o->f->blocks[o->cur].ninsts - 1u; - } } in->def = in->defs[0]; in->type = out_ops[0].type; @@ -1287,9 +1286,9 @@ static int inst_spill_local(Func* f, const Inst* in, u32 op_idx) { static u64 func_spill_alloc_count(Func* f) { u64 n = 0; - if (!f || !f->val_info) return 0; - for (Val v = 1; v < f->nvals; ++v) - if (f->val_info[v].alloc_kind == OPT_ALLOC_SPILL) ++n; + if (!f || !f->preg_info) return 0; + for (PReg r = 1; r < opt_reg_count(f); ++r) + if (f->preg_info[r].alloc_kind == OPT_ALLOC_SPILL) ++n; return n; } @@ -1325,7 +1324,7 @@ static void opt_run_lowering_pipeline(OptImpl* o, const char* total_scope, metrics_count(o->c, "opt.funcs", 1); metrics_count(o->c, "opt.blocks", o->f->nblocks); metrics_count(o->c, "opt.insts", func_inst_count(o->f)); - metrics_count(o->c, "opt.vals", o->f->nvals); + metrics_count(o->c, "opt.pregs", o->f->npregs); metrics_scope_begin(o->c, "opt.build_cfg"); opt_build_cfg(o->f); opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG); diff --git a/src/opt/opt.h b/src/opt/opt.h @@ -104,7 +104,7 @@ typedef struct OptLiveInfo { #define OPT_RANGE_NONE ((u32)~0u) typedef struct OptLiveRange { - Val val; + PReg preg; u32 start; u32 end; u32 next; @@ -119,20 +119,20 @@ typedef struct OptLiveRangeSet { OptLiveRange* ranges; u32 nranges; u32 cap; - u32* first_range_by_val; - u32* live_length_by_val; - u32* use_freq_by_val; - u32* def_freq_by_val; - u32* live_block_freq_by_val; - u32* live_across_call_freq_by_val; - u32* spill_cost_by_val; + u32* first_range_by_preg; + u32* live_length_by_preg; + u32* use_freq_by_preg; + u32* def_freq_by_preg; + u32* live_block_freq_by_preg; + u32* live_across_call_freq_by_preg; + u32* spill_cost_by_preg; u32 point_count; u32 raw_point_count; - u32 max_ranges_per_val; + u32 max_ranges_per_preg; u32 max_live_length; u32 whole_block_spans; u64 range_point_visits; - u64 value_scans; + u64 preg_scans; u64 live_words_touched; } OptLiveRangeSet; @@ -149,12 +149,12 @@ typedef struct OptLoc { FrameSlot spill_slot; } OptLoc; -typedef void (*OptBitsetIterFn)(Val, void*); +typedef void (*OptBitsetIterFn)(PReg, void*); void opt_bitset_clear(OptBitset*); -void opt_bitset_set(OptBitset*, Val); -void opt_bitset_clear_bit(OptBitset*, Val); -int opt_bitset_has(const OptBitset*, Val); +void opt_bitset_set(OptBitset*, PReg); +void opt_bitset_clear_bit(OptBitset*, PReg); +int opt_bitset_has(const OptBitset*, PReg); int opt_bitset_copy(OptBitset*, const OptBitset*); int opt_bitset_union(OptBitset*, const OptBitset*); int opt_bitset_union_and_not(OptBitset*, const OptBitset*, const OptBitset*); diff --git a/src/opt/opt_internal.h b/src/opt/opt_internal.h @@ -54,6 +54,31 @@ typedef enum OptAnalysisFlag { typedef void (*OptOperandWalkFn)(Func*, Inst*, Operand*, int is_def, void*); +static inline u32 opt_reg_count(const Func* f) { + if (!f) return 0; + if (f->opt_reg_ssa) return f->nvals; + /* Real recorded O1 functions have npregs > 1. The nvals fallback keeps + * standalone pass tests that hand-build Val-shaped IR working without + * making ir_ensure_preg grow the Val table. */ + return f->npregs > 1 ? f->npregs : f->nvals; +} + +static inline int opt_reg_valid(const Func* f, PReg r) { + return r != PREG_NONE && r != 0 && r < opt_reg_count(f); +} + +static inline CfreeCgTypeId opt_reg_type(const Func* f, PReg r) { + if (!opt_reg_valid(f, r)) return 0; + if (f->opt_reg_ssa || f->npregs <= 1) return f->val_type[(Val)r]; + return f->preg_type[r]; +} + +static inline u8 opt_reg_cls(const Func* f, PReg r) { + if (!opt_reg_valid(f, r)) return RC_INT; + if (f->opt_reg_ssa || f->npregs <= 1) return f->val_cls[(Val)r]; + return f->preg_cls[r]; +} + void opt_analysis_mark_valid(Func*, u32 flags); void opt_analysis_invalidate(Func*, u32 flags); int opt_analysis_has(Func*, u32 flags); diff --git a/src/opt/opt_util.c b/src/opt/opt_util.c @@ -29,8 +29,8 @@ void opt_walk_operand(Func* f, Inst* in, Operand* op, int is_def, base.kind = OPK_REG; base.cls = RC_INT; base.v.reg = op->v.ind.base; - if ((Val)base.v.reg < f->nvals && f->val_type[(Val)base.v.reg]) - base.type = f->val_type[(Val)base.v.reg]; + if ((PReg)base.v.reg < opt_reg_count(f) && opt_reg_type(f, base.v.reg)) + base.type = opt_reg_type(f, base.v.reg); fn(f, in, &base, 0, ctx); op->v.ind.base = base.v.reg; } diff --git a/src/opt/pass_analysis.c b/src/opt/pass_analysis.c @@ -335,8 +335,9 @@ static void verify_operand(Func* f, Inst* in, Operand* op, int is_def, const char* stage = (const char*)arg; if (op->kind != OPK_REG) return; Val v = (Val)op->v.reg; - if (v == VAL_NONE || v >= f->nvals) - opt_fail(f, stage, is_def ? "bad def val" : "bad use val", v, f->nvals); + u32 nregs = verify_stage_is_ssa(stage) ? f->nvals : opt_reg_count(f); + if (v == VAL_NONE || v >= nregs) + opt_fail(f, stage, is_def ? "bad def val" : "bad use val", v, nregs); if (!verify_stage_is_ssa(stage)) return; if (op->cls != f->val_cls[v]) opt_fail(f, stage, is_def ? "def class mismatch" : "use class mismatch", v, @@ -356,14 +357,16 @@ static void verify_values(Func* f, const char* stage) { if (seen_inst[in->id]) opt_fail(f, stage, "duplicate inst id", in->id, b); seen_inst[in->id] = 1; if (in->def != VAL_NONE) { - if (in->def >= f->nvals) opt_fail(f, stage, "bad inst def", b, i); + u32 ndefs = verify_stage_is_ssa(stage) ? f->nvals : opt_reg_count(f); + if (in->def >= ndefs) opt_fail(f, stage, "bad inst def", b, i); if (verify_stage_is_ssa(stage) && (IROp)in->op == IR_PHI && in->type && f->val_type[in->def] && in->type != f->val_type[in->def]) opt_fail(f, stage, "inst def type mismatch", in->def, b); } for (u32 d = 0; d < in->ndefs; ++d) { Val v = in->defs[d]; - if (v == VAL_NONE || v >= f->nvals) + u32 ndefs = verify_stage_is_ssa(stage) ? f->nvals : opt_reg_count(f); + if (v == VAL_NONE || v >= ndefs) opt_fail(f, stage, "bad inst multi-def", b, d); } opt_walk_inst_operands(f, in, verify_operand, (void*)stage); diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c @@ -54,20 +54,20 @@ static CGLocalStorage xlat_storage(ReplayCtx* r, CGLocalStorage st, CfreeCgTypeId ty) { (void)ty; if (st.kind == CG_LOCAL_STORAGE_REG) { - Val v = (Val)st.v.reg; - if (r->identity_regs && r->f->opt_rewritten && v < r->f->nvals && - r->f->val_info) { - OptValInfo* vi = &r->f->val_info[v]; + PReg pr = (PReg)st.v.reg; + if (r->identity_regs && r->f->opt_rewritten && pr != 0 && + pr < opt_reg_count(r->f) && r->f->preg_info) { + OptPRegInfo* vi = &r->f->preg_info[pr]; if (vi->alloc_kind == OPT_ALLOC_HARD) { st.v.reg = vi->hard_reg; } else if (vi->alloc_kind == OPT_ALLOC_SPILL) { st.kind = CG_LOCAL_STORAGE_FRAME; st.v.frame_slot = slot_to_target(r, vi->spill_slot); } else { - st.v.reg = val_to_target_reg(r, v); + st.v.reg = val_to_target_reg(r, (Val)pr); } } else { - st.v.reg = val_to_target_reg(r, v); + st.v.reg = val_to_target_reg(r, (Val)pr); } } else { st.v.frame_slot = slot_to_target(r, st.v.frame_slot); @@ -77,11 +77,11 @@ static CGLocalStorage xlat_storage(ReplayCtx* r, CGLocalStorage st, static int replay_reg_storage_unused(ReplayCtx* r, CGLocalStorage st) { if (!r || st.kind != CG_LOCAL_STORAGE_REG) return 0; - if (!(r->identity_regs && r->f->opt_rewritten && r->f->val_info)) return 0; - Val v = (Val)st.v.reg; - if (v == VAL_NONE || v >= r->f->nvals) return 0; - return r->f->val_info[v].alloc_kind == OPT_ALLOC_NONE || - r->f->val_info[v].use_freq == 0; + if (!(r->identity_regs && r->f->opt_rewritten && r->f->preg_info)) return 0; + PReg pr = (PReg)st.v.reg; + if (pr == 0 || pr >= opt_reg_count(r->f)) return 0; + return r->f->preg_info[pr].alloc_kind == OPT_ALLOC_NONE || + r->f->preg_info[pr].use_freq == 0; } static Operand xlat_op(ReplayCtx* r, Operand op) { @@ -597,7 +597,8 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) { replay_planned_call(r, aux); break; } - compiler_panic(r->c, in->loc, "opt replay: call has no supported call plan%s%s", + compiler_panic(r->c, in->loc, + "opt replay: call has no supported call plan%s%s", plan_reason ? ": " : "", plan_reason ? plan_reason : ""); break; } @@ -827,13 +828,13 @@ static void collect_replayed_abivalue_regs(const CGABIValue* v, RegClass cls, static void collect_replayed_param_regs(Func* f, RegClass cls, Reg* used, u32* nused, u32 cap) { - if (!f->opt_rewritten || !f->val_info) return; + if (!f->opt_rewritten || !f->preg_info) return; for (u32 i = 0; i < f->nparams; ++i) { IRParam* p = &f->params[i]; if (p->storage.kind != CG_LOCAL_STORAGE_REG) continue; - Val v = (Val)p->storage.v.reg; - if (v == VAL_NONE || v >= f->nvals) continue; - OptValInfo* vi = &f->val_info[v]; + PReg pr = (PReg)p->storage.v.reg; + if (pr == 0 || pr >= opt_reg_count(f)) continue; + OptPRegInfo* vi = &f->preg_info[pr]; if (vi->alloc_kind != OPT_ALLOC_HARD || vi->cls != cls) continue; add_unique_reg(used, nused, cap, vi->hard_reg); } diff --git a/src/opt/pass_live.c b/src/opt/pass_live.c @@ -6,7 +6,7 @@ #include "opt/opt.h" #include "opt/opt_internal.h" -static u32 opt_bit_words(u32 nvals) { return (nvals + 63u) / 64u; } +static u32 opt_bit_words(u32 nregs) { return (nregs + 63u) / 64u; } static void opt_bitset_init(Arena* arena, OptBitset* bs, u32 nwords) { memset(bs, 0, sizeof *bs); @@ -26,27 +26,27 @@ void opt_bitset_clear(OptBitset* bs) { bs->active_words = 0; } -void opt_bitset_set(OptBitset* bs, Val v) { +void opt_bitset_set(OptBitset* bs, PReg r) { if (!bs || !bs->words) return; - u32 w = v / 64u; + u32 w = r / 64u; if (w >= bs->nwords) return; - bs->words[w] |= 1ull << (v % 64u); + bs->words[w] |= 1ull << (r % 64u); if (bs->active_words <= w) bs->active_words = w + 1u; } -void opt_bitset_clear_bit(OptBitset* bs, Val v) { +void opt_bitset_clear_bit(OptBitset* bs, PReg r) { if (!bs || !bs->words) return; - u32 w = v / 64u; + u32 w = r / 64u; if (w >= bs->active_words) return; - bs->words[w] &= ~(1ull << (v % 64u)); + bs->words[w] &= ~(1ull << (r % 64u)); if (w + 1u == bs->active_words && bs->words[w] == 0) opt_bitset_trim(bs); } -int opt_bitset_has(const OptBitset* bs, Val v) { +int opt_bitset_has(const OptBitset* bs, PReg r) { if (!bs || !bs->words) return 0; - u32 w = v / 64u; + u32 w = r / 64u; if (w >= bs->active_words) return 0; - return (bs->words[w] & (1ull << (v % 64u))) != 0; + return (bs->words[w] & (1ull << (r % 64u))) != 0; } int opt_bitset_copy(OptBitset* dst, const OptBitset* src) { @@ -122,17 +122,17 @@ static void live_collect_use_def(Func* f, Inst* in, Operand* op, int is_def, (void)in; LiveUseDefCtx* c = (LiveUseDefCtx*)arg; if (op->kind != OPK_REG) return; - Val v = (Val)op->v.reg; - if (v == VAL_NONE || v >= f->nvals) return; + PReg r = (PReg)op->v.reg; + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return; if (is_def) { - opt_bitset_set(c->def, v); - } else if (!opt_bitset_has(c->def, v)) { - opt_bitset_set(c->use, v); + opt_bitset_set(c->def, r); + } else if (!opt_bitset_has(c->def, r)) { + opt_bitset_set(c->use, r); } } -static void live_count_bit(Val v, void* arg) { - (void)v; +static void live_count_bit(PReg r, void* arg) { + (void)r; ++*(u64*)arg; } @@ -201,7 +201,7 @@ void opt_live_blocks(Func* f, OptLiveInfo* live) { memset(live, 0, sizeof *live); live->arena = f->arena; live->f = f; - live->words = opt_bit_words(f->nvals); + live->words = opt_bit_words(opt_reg_count(f)); f->opt_live_words = (u16)live->words; live->blocks = arena_zarray(f->arena, OptBlockLive, f->nblocks ? f->nblocks : 1u); @@ -256,10 +256,10 @@ static void dump_write(Writer* w, const char* s) { cfree_writer_write(w, s, strlen(s)); } -static void dump_bit(Val v, void* arg) { +static void dump_bit(PReg r, void* arg) { Writer* w = (Writer*)arg; char buf[32]; - snprintf(buf, sizeof buf, " v%u", (unsigned)v); + snprintf(buf, sizeof buf, " r%u", (unsigned)r); dump_write(w, buf); } @@ -288,24 +288,24 @@ void opt_live_dump_blocks(Func* f, const OptLiveInfo* live, Writer* w) { typedef struct RangeBuildCtx { Func* f; OptLiveRangeSet* ranges; - u32* last_range_by_val; - u32* open_end_by_val; - u32* open_gen_by_val; + u32* last_range_by_preg; + u32* open_end_by_preg; + u32* open_gen_by_preg; u32 open_gen; - Val* open_vals; - u32 nopen_vals; - u32 open_vals_cap; - Val* range_vals; - u32 nrange_vals; - u32 range_vals_cap; + PReg* open_pregs; + u32 nopen_pregs; + u32 open_pregs_cap; + PReg* range_pregs; + u32 nrange_pregs; + u32 range_pregs_cap; u32* raw_points; u32 nraw_points; u32 raw_points_cap; } RangeBuildCtx; typedef struct RangeInstRefs { - Val* uses; - Val* defs; + PReg* uses; + PReg* defs; u32 nuses; u32 ndefs; u32 use_cap; @@ -315,18 +315,20 @@ typedef struct RangeInstRefs { u32 gen; } RangeInstRefs; -typedef struct RangeLiveVals { +typedef struct RangeLivePregs { Func* f; - Val* vals; - u32 nvals; + PReg* regs; + u32 nregs; u32 cap; - u32* pos_by_val; -} RangeLiveVals; + u32* pos_by_preg; +} RangeLivePregs; static void range_refs_init(Func* f, RangeInstRefs* refs) { memset(refs, 0, sizeof *refs); - refs->use_mark = arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); - refs->def_mark = arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + refs->use_mark = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + refs->def_mark = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); refs->gen = 1; } @@ -335,84 +337,87 @@ static void range_refs_reset(Func* f, RangeInstRefs* refs) { refs->ndefs = 0; ++refs->gen; if (refs->gen) return; - memset(refs->use_mark, 0, - sizeof(refs->use_mark[0]) * (f->nvals ? f->nvals : 1u)); - memset(refs->def_mark, 0, - sizeof(refs->def_mark[0]) * (f->nvals ? f->nvals : 1u)); + memset( + refs->use_mark, 0, + sizeof(refs->use_mark[0]) * (opt_reg_count(f) ? opt_reg_count(f) : 1u)); + memset( + refs->def_mark, 0, + sizeof(refs->def_mark[0]) * (opt_reg_count(f) ? opt_reg_count(f) : 1u)); refs->gen = 1; } -static void range_refs_push_use(Func* f, RangeInstRefs* refs, Val v) { - if (refs->use_mark[v] == refs->gen) return; - refs->use_mark[v] = refs->gen; +static void range_refs_push_use(Func* f, RangeInstRefs* refs, PReg r) { + if (refs->use_mark[r] == refs->gen) return; + refs->use_mark[r] = refs->gen; if (refs->nuses == refs->use_cap) { u32 ncap = refs->use_cap ? refs->use_cap * 2u : 8u; - Val* nv = arena_array(f->arena, Val, ncap); - if (refs->uses) memcpy(nv, refs->uses, sizeof(refs->uses[0]) * refs->nuses); - refs->uses = nv; + PReg* nr = arena_array(f->arena, PReg, ncap); + if (refs->uses) memcpy(nr, refs->uses, sizeof(refs->uses[0]) * refs->nuses); + refs->uses = nr; refs->use_cap = ncap; } - refs->uses[refs->nuses++] = v; + refs->uses[refs->nuses++] = r; } -static void range_refs_push_def(Func* f, RangeInstRefs* refs, Val v) { - if (refs->def_mark[v] == refs->gen) return; - refs->def_mark[v] = refs->gen; +static void range_refs_push_def(Func* f, RangeInstRefs* refs, PReg r) { + if (refs->def_mark[r] == refs->gen) return; + refs->def_mark[r] = refs->gen; if (refs->ndefs == refs->def_cap) { u32 ncap = refs->def_cap ? refs->def_cap * 2u : 4u; - Val* nv = arena_array(f->arena, Val, ncap); - if (refs->defs) memcpy(nv, refs->defs, sizeof(refs->defs[0]) * refs->ndefs); - refs->defs = nv; + PReg* nr = arena_array(f->arena, PReg, ncap); + if (refs->defs) memcpy(nr, refs->defs, sizeof(refs->defs[0]) * refs->ndefs); + refs->defs = nr; refs->def_cap = ncap; } - refs->defs[refs->ndefs++] = v; + refs->defs[refs->ndefs++] = r; } -static int range_refs_has_def(const RangeInstRefs* refs, Val v) { - return refs->def_mark[v] == refs->gen; +static int range_refs_has_def(const RangeInstRefs* refs, PReg r) { + return refs->def_mark[r] == refs->gen; } -static void range_live_vals_init(Func* f, RangeLiveVals* live) { +static void range_live_pregs_init(Func* f, RangeLivePregs* live) { memset(live, 0, sizeof *live); live->f = f; - live->pos_by_val = arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + live->pos_by_preg = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); } -static void range_live_vals_clear(RangeLiveVals* live) { - for (u32 i = 0; i < live->nvals; ++i) live->pos_by_val[live->vals[i]] = 0; - live->nvals = 0; +static void range_live_pregs_clear(RangeLivePregs* live) { + for (u32 i = 0; i < live->nregs; ++i) live->pos_by_preg[live->regs[i]] = 0; + live->nregs = 0; } -static void range_live_vals_add(RangeLiveVals* live, Val v) { +static void range_live_pregs_add(RangeLivePregs* live, PReg r) { Func* f = live->f; - if (v == VAL_NONE || v >= f->nvals) return; - if (live->pos_by_val[v]) return; - if (live->nvals == live->cap) { + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return; + if (live->pos_by_preg[r]) return; + if (live->nregs == live->cap) { u32 ncap = live->cap ? live->cap * 2u : 64u; - Val* nv = arena_array(f->arena, Val, ncap); - if (live->vals) memcpy(nv, live->vals, sizeof(live->vals[0]) * live->nvals); - live->vals = nv; + PReg* nr = arena_array(f->arena, PReg, ncap); + if (live->regs) memcpy(nr, live->regs, sizeof(live->regs[0]) * live->nregs); + live->regs = nr; live->cap = ncap; } - live->vals[live->nvals] = v; - live->pos_by_val[v] = live->nvals + 1u; - ++live->nvals; + live->regs[live->nregs] = r; + live->pos_by_preg[r] = live->nregs + 1u; + ++live->nregs; } -static void range_live_vals_remove(RangeLiveVals* live, Val v) { - if (v == VAL_NONE || v >= live->f->nvals) return; - u32 pos = live->pos_by_val[v]; +static void range_live_pregs_remove(RangeLivePregs* live, PReg r) { + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(live->f)) return; + u32 pos = live->pos_by_preg[r]; if (!pos) return; u32 idx = pos - 1u; - Val last = live->vals[live->nvals - 1u]; - live->vals[idx] = last; - live->pos_by_val[last] = idx + 1u; - live->pos_by_val[v] = 0; - --live->nvals; + PReg last = live->regs[live->nregs - 1u]; + live->regs[idx] = last; + live->pos_by_preg[last] = idx + 1u; + live->pos_by_preg[r] = 0; + --live->nregs; } -static void range_live_vals_add_bit(Val v, void* arg) { - range_live_vals_add((RangeLiveVals*)arg, v); +static void range_live_pregs_add_bit(PReg r, void* arg) { + range_live_pregs_add((RangeLivePregs*)arg, r); } static void range_collect_bits(Func* f, Inst* in, Operand* op, int is_def, @@ -420,12 +425,12 @@ static void range_collect_bits(Func* f, Inst* in, Operand* op, int is_def, (void)in; RangeInstRefs* refs = (RangeInstRefs*)arg; if (op->kind != OPK_REG) return; - Val v = (Val)op->v.reg; - if (v == VAL_NONE || v >= f->nvals) return; + PReg r = (PReg)op->v.reg; + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return; if (is_def) - range_refs_push_def(f, refs, v); + range_refs_push_def(f, refs, r); else - range_refs_push_use(f, refs, v); + range_refs_push_use(f, refs, r); } static void range_push_raw_point(RangeBuildCtx* c, u32 p) { @@ -440,64 +445,65 @@ static void range_push_raw_point(RangeBuildCtx* c, u32 p) { c->raw_points[c->nraw_points++] = p; } -static void range_push_open_val(RangeBuildCtx* c, Val v) { - if (c->nopen_vals == c->open_vals_cap) { - u32 ncap = c->open_vals_cap ? c->open_vals_cap * 2u : 64u; - Val* nv = arena_array(c->f->arena, Val, ncap); - if (c->open_vals) - memcpy(nv, c->open_vals, sizeof(c->open_vals[0]) * c->nopen_vals); - c->open_vals = nv; - c->open_vals_cap = ncap; +static void range_push_open_preg(RangeBuildCtx* c, PReg r) { + if (c->nopen_pregs == c->open_pregs_cap) { + u32 ncap = c->open_pregs_cap ? c->open_pregs_cap * 2u : 64u; + PReg* nr = arena_array(c->f->arena, PReg, ncap); + if (c->open_pregs) + memcpy(nr, c->open_pregs, sizeof(c->open_pregs[0]) * c->nopen_pregs); + c->open_pregs = nr; + c->open_pregs_cap = ncap; } - c->open_vals[c->nopen_vals++] = v; + c->open_pregs[c->nopen_pregs++] = r; } -static void range_push_range_val(RangeBuildCtx* c, Val v) { - if (c->nrange_vals == c->range_vals_cap) { - u32 ncap = c->range_vals_cap ? c->range_vals_cap * 2u : 64u; - Val* nv = arena_array(c->f->arena, Val, ncap); - if (c->range_vals) - memcpy(nv, c->range_vals, sizeof(c->range_vals[0]) * c->nrange_vals); - c->range_vals = nv; - c->range_vals_cap = ncap; +static void range_push_range_preg(RangeBuildCtx* c, PReg r) { + if (c->nrange_pregs == c->range_pregs_cap) { + u32 ncap = c->range_pregs_cap ? c->range_pregs_cap * 2u : 64u; + PReg* nr = arena_array(c->f->arena, PReg, ncap); + if (c->range_pregs) + memcpy(nr, c->range_pregs, sizeof(c->range_pregs[0]) * c->nrange_pregs); + c->range_pregs = nr; + c->range_pregs_cap = ncap; } - c->range_vals[c->nrange_vals++] = v; + c->range_pregs[c->nrange_pregs++] = r; } static void range_block_reset(RangeBuildCtx* c) { - c->nopen_vals = 0; + c->nopen_pregs = 0; ++c->open_gen; if (c->open_gen) return; - memset(c->open_gen_by_val, 0, - sizeof(c->open_gen_by_val[0]) * (c->f->nvals ? c->f->nvals : 1u)); + memset(c->open_gen_by_preg, 0, + sizeof(c->open_gen_by_preg[0]) * + (opt_reg_count(c->f) ? opt_reg_count(c->f) : 1u)); c->open_gen = 1; } -static int range_is_open(const RangeBuildCtx* c, Val v) { - return v < c->f->nvals && c->open_gen_by_val[v] == c->open_gen; +static int range_is_open(const RangeBuildCtx* c, PReg r) { + return r < opt_reg_count(c->f) && c->open_gen_by_preg[r] == c->open_gen; } -static u32 range_open_end(const RangeBuildCtx* c, Val v) { - return range_is_open(c, v) ? c->open_end_by_val[v] : OPT_RANGE_NONE; +static u32 range_open_end(const RangeBuildCtx* c, PReg r) { + return range_is_open(c, r) ? c->open_end_by_preg[r] : OPT_RANGE_NONE; } -static void range_set_open_end(RangeBuildCtx* c, Val v, u32 end) { - if (v == VAL_NONE || v >= c->f->nvals) return; - if (!range_is_open(c, v)) { - c->open_gen_by_val[v] = c->open_gen; - range_push_open_val(c, v); +static void range_set_open_end(RangeBuildCtx* c, PReg r, u32 end) { + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return; + if (!range_is_open(c, r)) { + c->open_gen_by_preg[r] = c->open_gen; + range_push_open_preg(c, r); } - c->open_end_by_val[v] = end; + c->open_end_by_preg[r] = end; } -static void range_close_open_end(RangeBuildCtx* c, Val v) { - if (v == VAL_NONE || v >= c->f->nvals) return; - c->open_gen_by_val[v] = 0; +static void range_close_open_end(RangeBuildCtx* c, PReg r) { + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return; + c->open_gen_by_preg[r] = 0; } -static void range_append(RangeBuildCtx* c, Val v, u32 start, u32 end, u32 block, - int whole_block) { - if (v == VAL_NONE || v >= c->f->nvals) return; +static void range_append(RangeBuildCtx* c, PReg r, u32 start, u32 end, + u32 block, int whole_block) { + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return; if (end <= start) end = start + 1u; OptLiveRangeSet* ranges = c->ranges; if (ranges->nranges == ranges->cap) { @@ -509,21 +515,21 @@ static void range_append(RangeBuildCtx* c, Val v, u32 start, u32 end, u32 block, ranges->cap = ncap; } u32 idx = ranges->nranges++; - OptLiveRange* r = &ranges->ranges[idx]; - memset(r, 0, sizeof *r); - r->val = v; - r->start = start; - r->end = end; - r->next = OPT_RANGE_NONE; - r->block = block; - r->whole_block = whole_block ? 1u : 0u; - if (ranges->first_range_by_val[v] == OPT_RANGE_NONE) { - ranges->first_range_by_val[v] = idx; - range_push_range_val(c, v); + OptLiveRange* lr = &ranges->ranges[idx]; + memset(lr, 0, sizeof *lr); + lr->preg = r; + lr->start = start; + lr->end = end; + lr->next = OPT_RANGE_NONE; + lr->block = block; + lr->whole_block = whole_block ? 1u : 0u; + if (ranges->first_range_by_preg[r] == OPT_RANGE_NONE) { + ranges->first_range_by_preg[r] = idx; + range_push_range_preg(c, r); } else { - ranges->ranges[c->last_range_by_val[v]].next = idx; + ranges->ranges[c->last_range_by_preg[r]].next = idx; } - c->last_range_by_val[v] = idx; + c->last_range_by_preg[r] = idx; range_push_raw_point(c, start); range_push_raw_point(c, end); if (whole_block) ++ranges->whole_block_spans; @@ -534,9 +540,9 @@ typedef struct RangeOpenCtx { u32 raw_end; } RangeOpenCtx; -static void range_open_at_end(Val v, void* arg) { +static void range_open_at_end(PReg r, void* arg) { RangeOpenCtx* c = (RangeOpenCtx*)arg; - range_set_open_end(c->build, v, c->raw_end); + range_set_open_end(c->build, r, c->raw_end); } typedef struct RangeCloseCtx { @@ -545,18 +551,18 @@ typedef struct RangeCloseCtx { u32 block; } RangeCloseCtx; -static void range_close_def(Val v, void* arg) { +static void range_close_def(PReg r, void* arg) { RangeCloseCtx* c = (RangeCloseCtx*)arg; RangeBuildCtx* b = c->build; - if (v >= b->f->nvals) return; - u32 open_end = range_open_end(b, v); + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(b->f)) return; + u32 open_end = range_open_end(b, r); if (open_end != OPT_RANGE_NONE) { - range_append(b, v, c->start, open_end, c->block, 0); - range_close_open_end(b, v); + range_append(b, r, c->start, open_end, c->block, 0); + range_close_open_end(b, r); } else { - range_append(b, v, c->start, c->start + 1u, c->block, 0); + range_append(b, r, c->start, c->start + 1u, c->block, 0); } - b->ranges->def_freq_by_val[v] += b->f->blocks[c->block].frequency; + b->ranges->def_freq_by_preg[r] += b->f->blocks[c->block].frequency; } typedef struct RangeUseCtx { @@ -565,12 +571,12 @@ typedef struct RangeUseCtx { u32 block; } RangeUseCtx; -static void range_open_use(Val v, void* arg) { +static void range_open_use(PReg r, void* arg) { RangeUseCtx* c = (RangeUseCtx*)arg; RangeBuildCtx* b = c->build; - if (v >= b->f->nvals) return; - if (!range_is_open(b, v)) range_set_open_end(b, v, c->end); - b->ranges->use_freq_by_val[v] += b->f->blocks[c->block].frequency; + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(b->f)) return; + if (!range_is_open(b, r)) range_set_open_end(b, r, c->end); + b->ranges->use_freq_by_preg[r] += b->f->blocks[c->block].frequency; } typedef struct RangeBlockFreqCtx { @@ -578,9 +584,9 @@ typedef struct RangeBlockFreqCtx { u32 freq; } RangeBlockFreqCtx; -static void range_add_block_freq(Val v, void* arg) { +static void range_add_block_freq(PReg r, void* arg) { RangeBlockFreqCtx* c = (RangeBlockFreqCtx*)arg; - c->ranges->live_block_freq_by_val[v] += c->freq; + c->ranges->live_block_freq_by_preg[r] += c->freq; } typedef struct RangeCallCtx { @@ -590,19 +596,19 @@ typedef struct RangeCallCtx { u32 freq; } RangeCallCtx; -static void range_add_live_across_call(Val v, void* arg) { +static void range_add_live_across_call(PReg r, void* arg) { RangeCallCtx* c = (RangeCallCtx*)arg; - if (v == VAL_NONE || v >= c->f->nvals) return; - if (range_refs_has_def(c->refs, v)) return; - c->ranges->live_across_call_freq_by_val[v] += c->freq; + if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return; + if (range_refs_has_def(c->refs, r)) return; + c->ranges->live_across_call_freq_by_preg[r] += c->freq; } -static void range_live_vals_update_before(RangeLiveVals* live, - const RangeInstRefs* refs) { +static void range_live_pregs_update_before(RangeLivePregs* live, + const RangeInstRefs* refs) { for (u32 i = 0; i < refs->ndefs; ++i) - range_live_vals_remove(live, refs->defs[i]); + range_live_pregs_remove(live, refs->defs[i]); for (u32 i = 0; i < refs->nuses; ++i) - range_live_vals_add(live, refs->uses[i]); + range_live_pregs_add(live, refs->uses[i]); } static int u32_cmp(const void* a, const void* b) { @@ -642,7 +648,7 @@ static void range_compress_points(OptLiveRangeSet* ranges, if (r->end <= r->start) r->end = r->start + 1u; u32 len = r->end - r->start; ranges->range_point_visits += len; - ranges->live_length_by_val[r->val] += len; + ranges->live_length_by_preg[r->preg] += len; if (ranges->max_live_length < len) ranges->max_live_length = len; } } @@ -653,34 +659,38 @@ void opt_live_ranges_build(Func* f, const OptLiveInfo* live, if (!f || !live) return; ranges->arena = f->arena; ranges->f = f; - ranges->first_range_by_val = - arena_array(f->arena, u32, f->nvals ? f->nvals : 1u); - ranges->live_length_by_val = - arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); - ranges->use_freq_by_val = - arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); - ranges->def_freq_by_val = - arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); - ranges->live_block_freq_by_val = - arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); - ranges->live_across_call_freq_by_val = - arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); - ranges->spill_cost_by_val = - arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); - memset(ranges->first_range_by_val, 0xff, - sizeof(ranges->first_range_by_val[0]) * (f->nvals ? f->nvals : 1u)); + ranges->first_range_by_preg = + arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + ranges->live_length_by_preg = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + ranges->use_freq_by_preg = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + ranges->def_freq_by_preg = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + ranges->live_block_freq_by_preg = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + ranges->live_across_call_freq_by_preg = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + ranges->spill_cost_by_preg = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + memset(ranges->first_range_by_preg, 0xff, + sizeof(ranges->first_range_by_preg[0]) * + (opt_reg_count(f) ? opt_reg_count(f) : 1u)); RangeBuildCtx build; memset(&build, 0, sizeof build); build.f = f; build.ranges = ranges; - build.last_range_by_val = - arena_array(f->arena, u32, f->nvals ? f->nvals : 1u); - build.open_end_by_val = arena_array(f->arena, u32, f->nvals ? f->nvals : 1u); - build.open_gen_by_val = arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + build.last_range_by_preg = + arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + build.open_end_by_preg = + arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); + build.open_gen_by_preg = + arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); build.open_gen = 1; - memset(build.last_range_by_val, 0xff, - sizeof(build.last_range_by_val[0]) * (f->nvals ? f->nvals : 1u)); + memset(build.last_range_by_preg, 0xff, + sizeof(build.last_range_by_preg[0]) * + (opt_reg_count(f) ? opt_reg_count(f) : 1u)); u32* block_base = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); u32 raw = 0; @@ -694,20 +704,20 @@ void opt_live_ranges_build(Func* f, const OptLiveInfo* live, opt_bitset_init(f->arena, &block_live, live->words); RangeInstRefs refs; range_refs_init(f, &refs); - RangeLiveVals live_vals; - range_live_vals_init(f, &live_vals); + RangeLivePregs live_pregs; + range_live_pregs_init(f, &live_pregs); for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; const OptBlockLive* lb = &live->blocks[b]; range_block_reset(&build); - range_live_vals_clear(&live_vals); + range_live_pregs_clear(&live_pregs); u32 raw_start = block_base[b]; u32 raw_end = raw_start + (bl->ninsts ? bl->ninsts : 1u); RangeOpenCtx open_ctx = {&build, raw_end}; ranges->live_words_touched += lb->live_out.active_words; opt_bitset_iter_set(&lb->live_out, range_open_at_end, &open_ctx); - opt_bitset_iter_set(&lb->live_out, range_live_vals_add_bit, &live_vals); + opt_bitset_iter_set(&lb->live_out, range_live_pregs_add_bit, &live_pregs); RangeBlockFreqCtx freq_ctx = {ranges, bl->frequency}; ranges->live_words_touched += block_live.nwords < lb->live_in.active_words @@ -732,9 +742,9 @@ void opt_live_ranges_build(Func* f, const OptLiveInfo* live, if ((IROp)in->op == IR_CALL) { RangeCallCtx call_ctx = {f, ranges, &refs, bl->frequency}; - ranges->live_words_touched += live_vals.nvals; - for (u32 k = 0; k < live_vals.nvals; ++k) - range_add_live_across_call(live_vals.vals[k], &call_ctx); + ranges->live_words_touched += live_pregs.nregs; + for (u32 k = 0; k < live_pregs.nregs; ++k) + range_add_live_across_call(live_pregs.regs[k], &call_ctx); } RangeCloseCtx close_ctx = {&build, raw_start + i, b}; @@ -743,39 +753,39 @@ void opt_live_ranges_build(Func* f, const OptLiveInfo* live, RangeUseCtx use_ctx = {&build, raw_start + i + 1u, b}; for (u32 k = 0; k < refs.nuses; ++k) range_open_use(refs.uses[k], &use_ctx); - range_live_vals_update_before(&live_vals, &refs); + range_live_pregs_update_before(&live_pregs, &refs); } - for (u32 oi = 0; oi < build.nopen_vals; ++oi) { - Val v = build.open_vals[oi]; - if (!range_is_open(&build, v)) continue; - u32 open_end = range_open_end(&build, v); + for (u32 oi = 0; oi < build.nopen_pregs; ++oi) { + PReg r = build.open_pregs[oi]; + if (!range_is_open(&build, r)) continue; + u32 open_end = range_open_end(&build, r); if (open_end == OPT_RANGE_NONE) continue; int whole_block = open_end == raw_end && - !opt_bitset_has(&lb->live_use, v) && - !opt_bitset_has(&lb->live_def, v); - range_append(&build, v, raw_start, open_end, b, whole_block); - range_close_open_end(&build, v); + !opt_bitset_has(&lb->live_use, r) && + !opt_bitset_has(&lb->live_def, r); + range_append(&build, r, raw_start, open_end, b, whole_block); + range_close_open_end(&build, r); } - ranges->value_scans += build.nopen_vals; + ranges->preg_scans += build.nopen_pregs; } range_compress_points(ranges, &build); - for (u32 vi = 0; vi < build.nrange_vals; ++vi) { - Val v = build.range_vals[vi]; + for (u32 vi = 0; vi < build.nrange_pregs; ++vi) { + PReg r = build.range_pregs[vi]; u32 nranges = 0; - for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; - r = ranges->ranges[r].next) + for (u32 ri = ranges->first_range_by_preg[r]; ri != OPT_RANGE_NONE; + ri = ranges->ranges[ri].next) ++nranges; - if (ranges->max_ranges_per_val < nranges) - ranges->max_ranges_per_val = nranges; - ranges->spill_cost_by_val[v] = (ranges->use_freq_by_val[v] * 2u) + - ranges->def_freq_by_val[v] + - ranges->live_across_call_freq_by_val[v] + - ranges->live_block_freq_by_val[v]; + if (ranges->max_ranges_per_preg < nranges) + ranges->max_ranges_per_preg = nranges; + ranges->spill_cost_by_preg[r] = (ranges->use_freq_by_preg[r] * 2u) + + ranges->def_freq_by_preg[r] + + ranges->live_across_call_freq_by_preg[r] + + ranges->live_block_freq_by_preg[r]; } - ranges->value_scans += build.nrange_vals; + ranges->preg_scans += build.nrange_pregs; } void opt_live_dump_ranges(Func* f, const OptLiveRangeSet* ranges, Writer* w) { @@ -788,15 +798,15 @@ void opt_live_dump_ranges(Func* f, const OptLiveRangeSet* ranges, Writer* w) { (unsigned)ranges->raw_point_count, (unsigned)ranges->whole_block_spans); dump_write(w, buf); - for (Val v = 1; v < ranges->f->nvals; ++v) { - if (ranges->first_range_by_val[v] == OPT_RANGE_NONE) continue; - snprintf(buf, sizeof buf, "v%u len=%u spill=%u:", (unsigned)v, - (unsigned)ranges->live_length_by_val[v], - (unsigned)ranges->spill_cost_by_val[v]); + for (PReg r = 1; r < opt_reg_count(ranges->f); ++r) { + if (ranges->first_range_by_preg[r] == OPT_RANGE_NONE) continue; + snprintf(buf, sizeof buf, "r%u len=%u spill=%u:", (unsigned)r, + (unsigned)ranges->live_length_by_preg[r], + (unsigned)ranges->spill_cost_by_preg[r]); dump_write(w, buf); - for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; - r = ranges->ranges[r].next) { - const OptLiveRange* lr = &ranges->ranges[r]; + for (u32 ri = ranges->first_range_by_preg[r]; ri != OPT_RANGE_NONE; + ri = ranges->ranges[ri].next) { + const OptLiveRange* lr = &ranges->ranges[ri]; snprintf(buf, sizeof buf, " [%u,%u)b%u%s", (unsigned)lr->start, (unsigned)lr->end, (unsigned)lr->block, lr->whole_block ? "*" : ""); diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -44,13 +44,13 @@ static u32 type_size_fallback(const Func* f, CfreeCgTypeId t) { } } -static u32 bit_words(u32 nvals) { return (nvals + 63u) / 64u; } +static u32 bit_words(u32 npregs) { return (npregs + 63u) / 64u; } -static void bit_set(u64* bits, Val v) { bits[v / 64u] |= 1ull << (v % 64u); } -static void bit_clear(u64* bits, Val v) { +static void bit_set(u64* bits, PReg v) { bits[v / 64u] |= 1ull << (v % 64u); } +static void bit_clear(u64* bits, PReg v) { bits[v / 64u] &= ~(1ull << (v % 64u)); } -static int bit_has(const u64* bits, Val v) { +static int bit_has(const u64* bits, PReg v) { return (bits[v / 64u] & (1ull << (v % 64u))) != 0; } @@ -60,8 +60,8 @@ typedef struct BitsCtx { } BitsCtx; typedef struct InstRefs { - Val* uses; - Val* defs; + PReg* uses; + PReg* defs; u32 nuses; u32 ndefs; u32 use_cap; @@ -73,8 +73,8 @@ static void collect_bits(Func* f, Inst* in, Operand* op, int is_def, (void)in; BitsCtx* c = (BitsCtx*)arg; if (op->kind != OPK_REG) return; - Val v = (Val)op->v.reg; - if (v == VAL_NONE || v >= f->nvals) return; + PReg v = (PReg)op->v.reg; + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return; if (is_def) bit_set(c->def, v); else @@ -86,17 +86,17 @@ static void refs_reset(InstRefs* refs) { refs->ndefs = 0; } -static void refs_push(Func* f, Val** vals, u32* nvals, u32* cap, Val v) { - for (u32 i = 0; i < *nvals; ++i) - if ((*vals)[i] == v) return; - if (*nvals == *cap) { +static void refs_push(Func* f, PReg** pregs, u32* npregs, u32* cap, PReg v) { + for (u32 i = 0; i < *npregs; ++i) + if ((*pregs)[i] == v) return; + if (*npregs == *cap) { u32 ncap = *cap ? *cap * 2u : 8u; - Val* nv = arena_array(f->arena, Val, ncap); - if (*vals) memcpy(nv, *vals, sizeof((*vals)[0]) * *nvals); - *vals = nv; + PReg* nv = arena_array(f->arena, PReg, ncap); + if (*pregs) memcpy(nv, *pregs, sizeof((*pregs)[0]) * *npregs); + *pregs = nv; *cap = ncap; } - (*vals)[(*nvals)++] = v; + (*pregs)[(*npregs)++] = v; } static void refs_collect(Func* f, Inst* in, Operand* op, int is_def, @@ -104,15 +104,15 @@ static void refs_collect(Func* f, Inst* in, Operand* op, int is_def, (void)in; InstRefs* refs = (InstRefs*)arg; if (op->kind != OPK_REG) return; - Val v = (Val)op->v.reg; - if (v == VAL_NONE || v >= f->nvals) return; + PReg v = (PReg)op->v.reg; + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return; if (is_def) refs_push(f, &refs->defs, &refs->ndefs, &refs->def_cap, v); else refs_push(f, &refs->uses, &refs->nuses, &refs->use_cap, v); } -static int refs_has_def(const InstRefs* refs, Val v) { +static int refs_has_def(const InstRefs* refs, PReg v) { for (u32 i = 0; i < refs->ndefs; ++i) if (refs->defs[i] == v) return 1; return 0; @@ -126,15 +126,15 @@ static void live_update_refs_before(u64* live, const InstRefs* refs) { static u32 live_update_refs_before_active(u64* live, u32 active_words, u32 nwords, const InstRefs* refs) { for (u32 i = 0; i < refs->ndefs; ++i) { - Val v = refs->defs[i]; - if (v == VAL_NONE) continue; + PReg v = refs->defs[i]; + if (v == PREG_NONE || v == 0) continue; u32 w = v / 64u; if (w < active_words) live[w] &= ~(1ull << (v % 64u)); } while (active_words && live[active_words - 1u] == 0) --active_words; for (u32 i = 0; i < refs->nuses; ++i) { - Val v = refs->uses[i]; - if (v == VAL_NONE) continue; + PReg v = refs->uses[i]; + if (v == PREG_NONE || v == 0) continue; u32 w = v / 64u; if (w >= nwords) continue; live[w] |= 1ull << (v % 64u); @@ -143,29 +143,30 @@ static u32 live_update_refs_before_active(u64* live, u32 active_words, return active_words; } -static void forbid_val_reg(Func* f, Val v, u8 cls, Reg r) { - if (v == VAL_NONE || v >= f->nvals || cls >= OPT_REG_CLASSES || r >= 32) +static void forbid_preg_reg(Func* f, PReg v, u8 cls, Reg r) { + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f) || + cls >= OPT_REG_CLASSES || r >= 32) return; - if (f->val_cls[v] != cls) return; - f->val_info[v].forbidden_hard_regs |= 1u << r; + if (opt_reg_cls(f, v) != cls) return; + f->preg_info[v].forbidden_hard_regs |= 1u << r; } static void apply_fixed_asm_operand(Func* f, Operand* op, i32 fixed, u8 fixed_cls) { if (!op || op->kind != OPK_REG || fixed < 0) return; - Val v = (Val)op->v.reg; - if (v == VAL_NONE || v >= f->nvals) return; - if (fixed_cls >= OPT_REG_CLASSES || f->val_cls[v] != fixed_cls) { + PReg v = (PReg)op->v.reg; + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return; + if (fixed_cls >= OPT_REG_CLASSES || opt_reg_cls(f, v) != fixed_cls) { SrcLoc loc = {0, 0, 0}; compiler_panic(f->c, loc, "opt asm: fixed register class mismatch"); } - f->val_info[v].tied_hard_reg = fixed; + f->preg_info[v].tied_hard_reg = fixed; } static void apply_asm_register_constraints(Func* f, Inst* in, u64* use, u64* def, u64* live_after) { IRAsmAux* aux = (IRAsmAux*)in->extra.aux; - if (!aux || !f->val_info) return; + if (!aux || !f->preg_info) return; for (u32 i = 0; i < aux->nout; ++i) { i32 fixed = aux->out_fixed_regs ? aux->out_fixed_regs[i] : -1; @@ -183,11 +184,11 @@ static void apply_asm_register_constraints(Func* f, Inst* in, u64* use, if (!mask) continue; for (Reg r = 0; r < 32; ++r) { if ((mask & (1u << r)) == 0) continue; - for (Val v = 1; v < f->nvals; ++v) { + for (PReg v = 1; v < opt_reg_count(f); ++v) { if (!bit_has(use, v) && !bit_has(def, v) && !(live_after && bit_has(live_after, v))) continue; - forbid_val_reg(f, v, (u8)cls, r); + forbid_preg_reg(f, v, (u8)cls, r); } } } @@ -208,7 +209,7 @@ static int phys_arg_reg_for_index(Func* f, u8 cls, u32 abi_index, Reg* out) { static int hard_available(Func* f, u8 cls, Reg r); static void apply_param_incoming_register_hazards(Func* f) { - if (!f || !f->val_info || !f->desc.abi || !f->nparams) return; + if (!f || !f->preg_info || !f->desc.abi || !f->nparams) return; Reg incoming_regs[64]; u8 incoming_cls[64]; u8 has_incoming[64]; @@ -260,19 +261,19 @@ static void apply_param_incoming_register_hazards(Func* f) { for (u32 i = 0; i < nparams; ++i) { IRParam* p = &f->params[i]; if (p->storage.kind != CG_LOCAL_STORAGE_REG) continue; - Val v = (Val)p->storage.v.reg; - if (v == VAL_NONE || v >= f->nvals) continue; - u8 cls = f->val_info[v].cls; + PReg v = (PReg)p->storage.v.reg; + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) continue; + u8 cls = f->preg_info[v].cls; if (has_incoming[i] && incoming_cls[i] == cls && - f->val_info[v].tied_hard_reg < 0 && - f->val_info[v].live_across_call_freq == 0 && + f->preg_info[v].tied_hard_reg < 0 && + f->preg_info[v].live_across_call_freq == 0 && hard_available(f, cls, incoming_regs[i]) && incoming_regs[i] < 32 && - (f->val_info[v].forbidden_hard_regs & (1u << incoming_regs[i])) == 0) { - f->val_info[v].tied_hard_reg = (i32)incoming_regs[i]; + (f->preg_info[v].forbidden_hard_regs & (1u << incoming_regs[i])) == 0) { + f->preg_info[v].tied_hard_reg = (i32)incoming_regs[i]; } for (u32 j = i + 1u; j < nparams; ++j) { if (!has_incoming[j] || incoming_cls[j] != cls) continue; - forbid_val_reg(f, v, cls, incoming_regs[j]); + forbid_preg_reg(f, v, cls, incoming_regs[j]); } } } @@ -298,17 +299,17 @@ static const CGPhysRegInfo* phys_info_for(Func* f, u8 cls, Reg r) { return NULL; } -static FrameSlot spill_slot_for(Func* f, Val v) { - if (f->val_info[v].spill_slot != FRAME_SLOT_NONE) - return f->val_info[v].spill_slot; +static FrameSlot spill_slot_for(Func* f, PReg v) { + if (f->preg_info[v].spill_slot != FRAME_SLOT_NONE) + return f->preg_info[v].spill_slot; FrameSlotDesc d; memset(&d, 0, sizeof d); - d.type = f->val_type[v]; - d.size = type_size_fallback(f, f->val_type[v]); + d.type = opt_reg_type(f, v); + d.size = type_size_fallback(f, opt_reg_type(f, v)); d.align = d.size >= 8 ? 8 : d.size; d.kind = FS_SPILL; - f->val_info[v].spill_slot = ir_frame_slot_new(f, &d); - return f->val_info[v].spill_slot; + f->preg_info[v].spill_slot = ir_frame_slot_new(f, &d); + return f->preg_info[v].spill_slot; } static u32 loc_bit_words(u32 nbits) { return (nbits + 63u) / 64u; } @@ -316,7 +317,7 @@ static u32 loc_bit_words(u32 nbits) { return (nbits + 63u) / 64u; } static u32 hard_loc_bit(u8 cls, Reg r) { return ((u32)cls * 32u) + (u32)r; } typedef struct OptAllocCandidate { - Val v; + PReg v; u32 spill_cost; u32 live_length; u8 tied; @@ -354,7 +355,7 @@ typedef struct OptAllocator { } OptAllocator; static u32 hard_reg_alloc_score(Func* f, const OptAllocator* a, - const OptValInfo* vi, Reg hr) { + const OptPRegInfo* vi, Reg hr) { const CGPhysRegInfo* pi = phys_info_for(f, vi->cls, hr); u32 score = pi ? pi->use_cost : 0; if (vi->live_across_call_freq) { @@ -455,9 +456,9 @@ static void alloc_interval_insert(Func* f, AllocIntervalVec* v, u32 start, } static int alloc_ranges_overlap_vec(OptAllocator* a, - const OptLiveRangeSet* ranges, Val v, + const OptLiveRangeSet* ranges, PReg v, const AllocIntervalVec* vec, u64* visits) { - for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; + for (u32 r = ranges->first_range_by_preg[v]; r != OPT_RANGE_NONE; r = ranges->ranges[r].next) { const OptLiveRange* lr = &ranges->ranges[r]; u32 end = lr->end < a->point_count ? lr->end : a->point_count; @@ -469,9 +470,9 @@ static int alloc_ranges_overlap_vec(OptAllocator* a, } static void alloc_mark_vec(Func* f, OptAllocator* a, - const OptLiveRangeSet* ranges, Val v, + const OptLiveRangeSet* ranges, PReg v, AllocIntervalVec* vec, u64* marks) { - for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; + for (u32 r = ranges->first_range_by_preg[v]; r != OPT_RANGE_NONE; r = ranges->ranges[r].next) { const OptLiveRange* lr = &ranges->ranges[r]; u32 end = lr->end < a->point_count ? lr->end : a->point_count; @@ -481,29 +482,29 @@ static void alloc_mark_vec(Func* f, OptAllocator* a, } } -static void opt_init_val_info_from_ranges(Func* f, - const OptLiveRangeSet* ranges) { - OptValInfo* old = f->val_info; - OptValInfo* info = - arena_zarray(f->arena, OptValInfo, f->nvals ? f->nvals : 1u); - for (Val v = 0; v < f->nvals; ++v) { +static void opt_init_preg_info_from_ranges(Func* f, + const OptLiveRangeSet* ranges) { + OptPRegInfo* old = f->preg_info; + OptPRegInfo* info = arena_zarray(f->arena, OptPRegInfo, + opt_reg_count(f) ? opt_reg_count(f) : 1u); + for (PReg v = 0; v < opt_reg_count(f); ++v) { i32 tied = old ? old[v].tied_hard_reg : -1; u32 forbidden = old ? old[v].forbidden_hard_regs : 0; u32 old_frequency = old ? old[v].frequency : 0; - OptValInfo* vi = &info[v]; + OptPRegInfo* vi = &info[v]; vi->tied_hard_reg = tied; vi->hard_reg = REG_NONE; vi->spill_slot = FRAME_SLOT_NONE; vi->alloc_kind = OPT_ALLOC_NONE; - vi->cls = f->val_cls[v]; + vi->cls = opt_reg_cls(f, v); vi->forbidden_hard_regs = forbidden; - if (!ranges || v == VAL_NONE || - ranges->first_range_by_val[v] == OPT_RANGE_NONE) { + if (!ranges || v == PREG_NONE || v == 0 || + ranges->first_range_by_preg[v] == OPT_RANGE_NONE) { continue; } u32 first = (u32)~0u; u32 last = 0; - for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; + for (u32 r = ranges->first_range_by_preg[v]; r != OPT_RANGE_NONE; r = ranges->ranges[r].next) { const OptLiveRange* lr = &ranges->ranges[r]; if (lr->start < first) first = lr->start; @@ -511,16 +512,16 @@ static void opt_init_val_info_from_ranges(Func* f, } vi->first_pos = first == (u32)~0u ? 0 : first + 1u; vi->last_pos = last; - vi->live_length = ranges->live_length_by_val[v]; - vi->use_freq = ranges->use_freq_by_val[v]; - vi->def_freq = ranges->def_freq_by_val[v]; - vi->live_block_freq = ranges->live_block_freq_by_val[v]; - vi->live_across_call_freq = ranges->live_across_call_freq_by_val[v]; - vi->spill_cost = ranges->spill_cost_by_val[v]; + vi->live_length = ranges->live_length_by_preg[v]; + vi->use_freq = ranges->use_freq_by_preg[v]; + vi->def_freq = ranges->def_freq_by_preg[v]; + vi->live_block_freq = ranges->live_block_freq_by_preg[v]; + vi->live_across_call_freq = ranges->live_across_call_freq_by_preg[v]; + vi->spill_cost = ranges->spill_cost_by_preg[v]; vi->frequency = vi->spill_cost; if (old_frequency > vi->frequency) vi->frequency = old_frequency; } - f->val_info = info; + f->preg_info = info; } static void bits_clear(u64* bits, u32 words) { @@ -568,7 +569,7 @@ static void opt_apply_asm_constraints_from_live(Func* f, if (!has_asm) return; u32 words = live_info ? live_info->words : f->opt_live_words; - if (!words) words = bit_words(f->nvals); + if (!words) words = bit_words(opt_reg_count(f)); f->opt_live_words = (u16)words; u64* live = arena_zarray(f->arena, u64, words ? words : 1u); @@ -591,10 +592,10 @@ static void opt_apply_asm_constraints_from_live(Func* f, } } -static int spill_slot_compatible(Func* f, FrameSlot fs, Val v) { +static int spill_slot_compatible(Func* f, FrameSlot fs, PReg v) { if (fs == FRAME_SLOT_NONE || fs > f->nframe_slots) return 0; IRFrameSlot* s = &f->frame_slots[fs - 1u]; - u32 size = type_size_fallback(f, f->val_type[v]); + u32 size = type_size_fallback(f, opt_reg_type(f, v)); u32 align = size >= 8 ? 8 : size; if (s->kind != FS_SPILL) return 0; if (s->size < size) return 0; @@ -603,21 +604,21 @@ static int spill_slot_compatible(Func* f, FrameSlot fs, Val v) { } static int alloc_hard_conflicts(OptAllocator* a, const OptLiveRangeSet* ranges, - Val v, u32 bit) { + PReg v, u32 bit) { if (bit >= a->hard_loc_bits) return 1; return alloc_ranges_overlap_vec(a, ranges, v, &a->hard_used_locs[bit], &a->hard_point_visits); } static int alloc_stack_conflicts(OptAllocator* a, const OptLiveRangeSet* ranges, - Val v, u32 stack_idx) { + PReg v, u32 stack_idx) { if (stack_idx >= a->stack_slot_count) return 1; return alloc_ranges_overlap_vec(a, ranges, v, &a->stack_used_locs[stack_idx], &a->stack_point_visits); } static void alloc_mark_hard_loc(OptAllocator* a, const OptLiveRangeSet* ranges, - Val v, u32 bit, Func* f) { + PReg v, u32 bit, Func* f) { if (bit >= a->hard_loc_bits) return; alloc_mark_vec(f, a, ranges, v, &a->hard_used_locs[bit], &a->hard_mark_points); @@ -640,15 +641,15 @@ static void alloc_grow_stack_locs(Func* f, OptAllocator* a, u32 need_slots) { } static void alloc_mark_stack_loc(OptAllocator* a, const OptLiveRangeSet* ranges, - Val v, u32 stack_idx, Func* f) { + PReg v, u32 stack_idx, Func* f) { if (stack_idx >= a->stack_slot_count) return; alloc_mark_vec(f, a, ranges, v, &a->stack_used_locs[stack_idx], &a->stack_mark_points); } static void alloc_assign_hard(Func* f, OptAllocator* a, - const OptLiveRangeSet* ranges, Val v, Reg r) { - OptValInfo* vi = &f->val_info[v]; + const OptLiveRangeSet* ranges, PReg v, Reg r) { + OptPRegInfo* vi = &f->preg_info[v]; vi->alloc_kind = OPT_ALLOC_HARD; vi->hard_reg = r; a->locs[v].kind = OPT_LOC_HARD; @@ -659,8 +660,8 @@ static void alloc_assign_hard(Func* f, OptAllocator* a, } static void alloc_assign_stack(Func* f, OptAllocator* a, - const OptLiveRangeSet* ranges, Val v) { - OptValInfo* vi = &f->val_info[v]; + const OptLiveRangeSet* ranges, PReg v) { + OptPRegInfo* vi = &f->preg_info[v]; u32 stack_idx = a->stack_slot_count; FrameSlot slot = FRAME_SLOT_NONE; for (u32 i = 0; i < a->stack_slot_count; ++i) { @@ -693,10 +694,11 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges, a->point_count = ranges->point_count ? ranges->point_count : 1u; a->hard_loc_bits = OPT_REG_CLASSES * 32u; u32 ncands = 0; - for (Val v = 1; v < f->nvals; ++v) - if (ranges->first_range_by_val[v] != OPT_RANGE_NONE) ++ncands; + for (PReg v = 1; v < opt_reg_count(f); ++v) + if (ranges->first_range_by_preg[v] != OPT_RANGE_NONE) ++ncands; a->hard_loc_words = loc_bit_words(a->hard_loc_bits); - a->locs = arena_zarray(f->arena, OptLoc, f->nvals ? f->nvals : 1u); + a->locs = + arena_zarray(f->arena, OptLoc, opt_reg_count(f) ? opt_reg_count(f) : 1u); a->hard_used_locs = arena_zarray(f->arena, AllocIntervalVec, a->hard_loc_bits ? a->hard_loc_bits : 1u); a->stack_slots = NULL; @@ -706,9 +708,9 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges, OptAllocCandidate* cands = arena_array(f->arena, OptAllocCandidate, ncands ? ncands : 1u); u32 n = 0; - for (Val v = 1; v < f->nvals; ++v) { - if (ranges->first_range_by_val[v] == OPT_RANGE_NONE) continue; - OptValInfo* vi = &f->val_info[v]; + for (PReg v = 1; v < opt_reg_count(f); ++v) { + if (ranges->first_range_by_preg[v] == OPT_RANGE_NONE) continue; + OptPRegInfo* vi = &f->preg_info[v]; cands[n].v = v; cands[n].spill_cost = vi->frequency ? vi->frequency : vi->spill_cost; cands[n].live_length = vi->live_length; @@ -719,8 +721,8 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges, alloc_sort_candidates(cands, n); for (u32 i = 0; i < n; ++i) { - Val v = cands[i].v; - OptValInfo* vi = &f->val_info[v]; + PReg v = cands[i].v; + OptPRegInfo* vi = &f->preg_info[v]; u8 cls = vi->cls; if (vi->tied_hard_reg >= 0) { Reg fixed = (Reg)vi->tied_hard_reg; @@ -851,38 +853,38 @@ static void out_prepend_inst(Func* f, RewriteOut* out, const Inst* in) { *dst = *in; } -static MemAccess spill_mem(Func* f, Val v) { +static MemAccess spill_mem(Func* f, PReg v) { MemAccess m; memset(&m, 0, sizeof m); - m.type = f->val_type[v]; - m.size = type_size_fallback(f, f->val_type[v]); + m.type = opt_reg_type(f, v); + m.size = type_size_fallback(f, opt_reg_type(f, v)); m.align = m.size >= 8 ? 8 : m.size; m.alias.kind = ALIAS_LOCAL; m.alias.v.local_id = (i32)spill_slot_for(f, v); return m; } -static Operand spill_addr(Func* f, Val v) { +static Operand spill_addr(Func* f, PReg v) { Operand o; memset(&o, 0, sizeof o); o.kind = OPK_LOCAL; o.cls = RC_INT; - o.type = f->val_type[v]; + o.type = opt_reg_type(f, v); o.v.frame_slot = spill_slot_for(f, v); return o; } -static Operand hard_operand(Func* f, Val v) { +static Operand hard_operand(Func* f, PReg v) { Operand o; memset(&o, 0, sizeof o); o.kind = OPK_REG; - o.cls = f->val_info[v].cls; - o.type = f->val_type[v]; - o.v.reg = f->val_info[v].hard_reg; + o.cls = f->preg_info[v].cls; + o.type = opt_reg_type(f, v); + o.v.reg = f->preg_info[v].hard_reg; return o; } -static void append_store_val(Func* f, RewriteList* out, Val v) { +static void append_store_preg(Func* f, RewriteList* out, PReg v) { Inst* st = list_push(f, out, IR_STORE); st->opnds = arena_array(f->arena, Operand, 2); st->opnds[0] = spill_addr(f, v); @@ -891,7 +893,7 @@ static void append_store_val(Func* f, RewriteList* out, Val v) { st->extra.mem = spill_mem(f, v); } -static void append_load_val(Func* f, RewriteList* out, Val v) { +static void append_load_preg(Func* f, RewriteList* out, PReg v) { Inst* ld = list_push(f, out, IR_LOAD); ld->opnds = arena_array(f->arena, Operand, 2); ld->opnds[0] = hard_operand(f, v); @@ -918,9 +920,9 @@ static void rewrite_one_operand(Func* f, Inst* owner, Operand* op, int is_def, void* arg) { RewriteCtx* c = (RewriteCtx*)arg; if (op->kind != OPK_REG) return; - Val v = (Val)op->v.reg; - if (v == VAL_NONE || v >= f->nvals) return; - OptValInfo* vi = &f->val_info[v]; + PReg v = (PReg)op->v.reg; + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return; + OptPRegInfo* vi = &f->preg_info[v]; if (vi->alloc_kind == OPT_ALLOC_HARD) { op->v.reg = vi->hard_reg; return; @@ -954,9 +956,9 @@ static void rewrite_one_operand(Func* f, Inst* owner, Operand* op, int is_def, static void rewrite_call_arg_operand(Func* f, Operand* op) { if (!op || op->kind != OPK_REG) return; - Val v = (Val)op->v.reg; - if (v == VAL_NONE || v >= f->nvals) return; - OptValInfo* vi = &f->val_info[v]; + PReg v = (PReg)op->v.reg; + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return; + OptPRegInfo* vi = &f->preg_info[v]; if (vi->alloc_kind == OPT_ALLOC_HARD) { op->v.reg = vi->hard_reg; } else if (vi->alloc_kind == OPT_ALLOC_SPILL) { @@ -995,31 +997,31 @@ typedef struct RewriteCallSaveCtx { int emit_restore; } RewriteCallSaveCtx; -static void rewrite_call_save_one(Val v, void* arg) { +static void rewrite_call_save_one(PReg v, void* arg) { RewriteCallSaveCtx* c = (RewriteCallSaveCtx*)arg; Func* f = c->f; - if (v == VAL_NONE || v >= f->nvals) return; + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return; if (c->refs && refs_has_def(c->refs, v)) return; - if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) return; - u8 cls = f->val_info[v].cls; - Reg hr = f->val_info[v].hard_reg; + if (f->preg_info[v].alloc_kind != OPT_ALLOC_HARD) return; + u8 cls = f->preg_info[v].cls; + Reg hr = f->preg_info[v].hard_reg; if (cls >= OPT_REG_CLASSES || hr >= 32u) return; if ((opt_call_clobber_mask_for(f, c->call, cls) & (1u << hr)) == 0) return; if (c->emit_restore) - append_load_val(f, c->out, v); + append_load_preg(f, c->out, v); else - append_store_val(f, c->out, v); + append_store_preg(f, c->out, v); } static void append_live_call_saves(Func* f, RewriteList* out, const Inst* call, const u64* live_after, u32 live_active_words, const InstRefs* refs, - const Val* call_save_vals, - u32 ncall_save_vals, int emit_restore) { + const PReg* call_save_pregs, + u32 ncall_save_pregs, int emit_restore) { RewriteCallSaveCtx ctx = {f, out, refs, call, emit_restore}; - f->opt_rewrite_live_words_touched += ncall_save_vals; - for (u32 i = 0; i < ncall_save_vals; ++i) { - Val v = call_save_vals[i]; + f->opt_rewrite_live_words_touched += ncall_save_pregs; + for (u32 i = 0; i < ncall_save_pregs; ++i) { + PReg v = call_save_pregs[i]; u32 w = v / 64u; if (w >= live_active_words) continue; if (!bit_has(live_after, v)) continue; @@ -1027,36 +1029,36 @@ static void append_live_call_saves(Func* f, RewriteList* out, const Inst* call, } } -static Val* rewrite_collect_call_save_vals(Func* f, u32* count_out) { +static PReg* rewrite_collect_call_save_pregs(Func* f, u32* count_out) { u32 n = 0; - for (Val v = 1; v < f->nvals; ++v) { - OptValInfo* vi = &f->val_info[v]; + for (PReg v = 1; v < opt_reg_count(f); ++v) { + OptPRegInfo* vi = &f->preg_info[v]; if (vi->alloc_kind != OPT_ALLOC_HARD) continue; if (!vi->live_across_call_freq) continue; ++n; } - Val* vals = arena_array(f->arena, Val, n ? n : 1u); + PReg* pregs = arena_array(f->arena, PReg, n ? n : 1u); u32 w = 0; - for (Val v = 1; v < f->nvals; ++v) { - OptValInfo* vi = &f->val_info[v]; + for (PReg v = 1; v < opt_reg_count(f); ++v) { + OptPRegInfo* vi = &f->preg_info[v]; if (vi->alloc_kind != OPT_ALLOC_HARD) continue; if (!vi->live_across_call_freq) continue; - vals[w++] = v; + pregs[w++] = v; } *count_out = n; - return vals; + return pregs; } static void rewrite_func(Func* f, const OptLiveInfo* live_info) { u32 words = live_info ? live_info->words : f->opt_live_words; - if (!words) words = bit_words(f->nvals); + if (!words) words = bit_words(opt_reg_count(f)); f->opt_live_words = (u16)words; f->opt_rewrite_inserted_insts = 0; f->opt_rewrite_live_words_touched = 0; u64* live = arena_zarray(f->arena, u64, words ? words : 1u); - u32 ncall_save_vals = 0; - Val* call_save_vals = rewrite_collect_call_save_vals(f, &ncall_save_vals); + u32 ncall_save_pregs = 0; + PReg* call_save_pregs = rewrite_collect_call_save_pregs(f, &ncall_save_pregs); InstRefs refs; memset(&refs, 0, sizeof refs); u32 live_active_words = 0; @@ -1116,9 +1118,9 @@ static void rewrite_func(Func* f, const OptLiveInfo* live_info) { } if ((IROp)in.op == IR_CALL) { append_live_call_saves(f, &call_saves, &in, live, live_active_words, - &refs, call_save_vals, ncall_save_vals, 0); + &refs, call_save_pregs, ncall_save_pregs, 0); append_live_call_saves(f, &call_restores, &in, live, live_active_words, - &refs, call_save_vals, ncall_save_vals, 1); + &refs, call_save_pregs, ncall_save_pregs, 1); } out_prepend_list_reverse(f, &out, &after); @@ -1144,8 +1146,8 @@ static void rewrite_dump_write(Writer* w, const char* s) { void opt_rewrite_dump(Func* f, Writer* w) { if (!f || !w) return; char buf[96]; - snprintf(buf, sizeof buf, "rewrite blocks=%u vals=%u rewritten=%u\n", - (unsigned)f->nblocks, (unsigned)f->nvals, + snprintf(buf, sizeof buf, "rewrite blocks=%u pregs=%u rewritten=%u\n", + (unsigned)f->nblocks, (unsigned)opt_reg_count(f), (unsigned)f->opt_rewritten); rewrite_dump_write(w, buf); for (u32 b = 0; b < f->nblocks; ++b) { @@ -1163,10 +1165,12 @@ void opt_rewrite_dump(Func* f, Writer* w) { } static int all_defs_dead(Func* f, Inst* in, u64* live) { - (void)f; - if (in->def != VAL_NONE && bit_has(live, in->def)) return 0; - for (u32 i = 0; i < in->ndefs; ++i) - if (in->defs[i] != VAL_NONE && bit_has(live, in->defs[i])) return 0; + if (in->def != 0 && in->def < opt_reg_count(f) && bit_has(live, in->def)) + return 0; + for (u32 i = 0; i < in->ndefs; ++i) { + PReg r = (PReg)in->defs[i]; + if (r != 0 && r < opt_reg_count(f) && bit_has(live, r)) return 0; + } return 1; } @@ -1226,14 +1230,14 @@ void opt_regalloc(Func* f, int allow_live_range_split) { opt_live_blocks(f, &live); OptLiveRangeSet ranges; opt_live_ranges_build(f, &live, &ranges); - opt_init_val_info_from_ranges(f, &ranges); + opt_init_preg_info_from_ranges(f, &ranges); opt_apply_asm_constraints_from_live(f, &live); apply_param_incoming_register_hazards(f); metrics_count(f->c, "opt.live_words", f->opt_live_words); metrics_count(f->c, "opt.ranges", ranges.nranges); metrics_count(f->c, "opt.range_points", ranges.point_count); metrics_count(f->c, "opt.range_raw_points", ranges.raw_point_count); - metrics_count(f->c, "opt.range_max_per_val", ranges.max_ranges_per_val); + metrics_count(f->c, "opt.range_max_per_preg", ranges.max_ranges_per_preg); metrics_count(f->c, "opt.range_max_length", ranges.max_live_length); metrics_count(f->c, "opt.range_whole_block_spans", ranges.whole_block_spans); metrics_count(f->c, "opt.live.bitset_words_touched", @@ -1242,7 +1246,7 @@ void opt_regalloc(Func* f, int allow_live_range_split) { metrics_count(f->c, "opt.live.dataflow_block_visits", live.dataflow_block_visits); metrics_count(f->c, "opt.range.point_visits", ranges.range_point_visits); - metrics_count(f->c, "opt.range.value_scans", ranges.value_scans); + metrics_count(f->c, "opt.range.preg_scans", ranges.preg_scans); metrics_count(f->c, "opt.range.live_words_touched", ranges.live_words_touched); metrics_count(f->c, "opt.conflict_bytes", 0); diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -421,22 +421,23 @@ static void expect_ir_dump_eq(Func* f, const char* expected, cfree_writer_close(w); } -static void ensure_test_val_info(Func* f) { - if (f->val_info) return; - f->val_info = arena_zarray(f->arena, OptValInfo, f->nvals ? f->nvals : 1u); - for (Val v = 0; v < f->nvals; ++v) { - f->val_info[v].tied_hard_reg = -1; - f->val_info[v].hard_reg = REG_NONE; - f->val_info[v].spill_slot = FRAME_SLOT_NONE; - f->val_info[v].cls = f->val_cls[v]; +static void ensure_test_preg_info(Func* f) { + if (f->preg_info) return; + u32 nregs = opt_reg_count(f); + f->preg_info = arena_zarray(f->arena, OptPRegInfo, nregs ? nregs : 1u); + for (PReg r = 0; r < nregs; ++r) { + f->preg_info[r].tied_hard_reg = -1; + f->preg_info[r].hard_reg = REG_NONE; + f->preg_info[r].spill_slot = FRAME_SLOT_NONE; + f->preg_info[r].cls = opt_reg_cls(f, r); } } static int ranges_overlap(const OptLiveRangeSet* ranges, Val a, Val b) { - for (u32 ar = ranges->first_range_by_val[a]; ar != OPT_RANGE_NONE; + for (u32 ar = ranges->first_range_by_preg[a]; ar != OPT_RANGE_NONE; ar = ranges->ranges[ar].next) { const OptLiveRange* ra = &ranges->ranges[ar]; - for (u32 br = ranges->first_range_by_val[b]; br != OPT_RANGE_NONE; + for (u32 br = ranges->first_range_by_preg[b]; br != OPT_RANGE_NONE; br = ranges->ranges[br].next) { const OptLiveRange* rb = &ranges->ranges[br]; if (ra->start < rb->end && rb->start < ra->end) return 1; @@ -479,7 +480,7 @@ static u32 count_uses_of(Func* f, Val v) { static u32 live_range_count_for(const OptLiveRangeSet* ranges, Val v) { u32 n = 0; - for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; + for (u32 r = ranges->first_range_by_preg[v]; r != OPT_RANGE_NONE; r = ranges->ranges[r].next) ++n; return n; @@ -1342,11 +1343,11 @@ static void opt_regalloc_prefers_caller_saved_for_non_call_value(void) { opt_build_loop_tree(f); opt_regalloc(f, 0); - EXPECT(f->val_info[v].alloc_kind == OPT_ALLOC_HARD, + EXPECT(f->preg_info[v].alloc_kind == OPT_ALLOC_HARD, "single live value should allocate a hard register"); - EXPECT(f->val_info[v].hard_reg == 2, + EXPECT(f->preg_info[v].hard_reg == 2, "non-call-crossing value should prefer caller-saved r2, got r%u", - (unsigned)f->val_info[v].hard_reg); + (unsigned)f->preg_info[v].hard_reg); tc_fini(&tc); } @@ -1533,24 +1534,24 @@ static void opt_live_ranges_phase2(void) { OptLiveRangeSet ranges; opt_live_ranges_build(f, &live, &ranges); - EXPECT(ranges.first_range_by_val[loop_v] != OPT_RANGE_NONE, + EXPECT(ranges.first_range_by_preg[loop_v] != OPT_RANGE_NONE, "loop value should have a live range"); - EXPECT(ranges.first_range_by_val[call_live] != OPT_RANGE_NONE, + EXPECT(ranges.first_range_by_preg[call_live] != OPT_RANGE_NONE, "call-live value should have a live range"); EXPECT(live_range_count_for(&ranges, loop_v) >= 2, "loop-carried value should have ranges in multiple blocks"); - EXPECT(ranges.live_length_by_val[loop_v] != 0, + EXPECT(ranges.live_length_by_preg[loop_v] != 0, "loop value should record live length"); - EXPECT(ranges.spill_cost_by_val[loop_v] != 0, + EXPECT(ranges.spill_cost_by_preg[loop_v] != 0, "loop value should record spill cost"); - EXPECT(ranges.live_across_call_freq_by_val[call_live] != 0, + EXPECT(ranges.live_across_call_freq_by_preg[call_live] != 0, "call-live value should record live-across-call frequency"); EXPECT(ranges.point_count != 0, "range builder should compress points"); EXPECT(ranges.point_count <= ranges.raw_point_count, "compressed point count should not exceed raw point count"); EXPECT(ranges.whole_block_spans != 0, "range builder should record whole-block spans"); - EXPECT(ranges.max_ranges_per_val >= live_range_count_for(&ranges, loop_v), + EXPECT(ranges.max_ranges_per_preg >= live_range_count_for(&ranges, loop_v), "range metrics should record max ranges per value"); CfreeWriter* w = NULL; @@ -1560,8 +1561,8 @@ static void opt_live_ranges_phase2(void) { const unsigned char* bytes = cfree_writer_mem_bytes(w, &len); EXPECT(bytes_contains(bytes, len, "ranges total="), "range dump should include summary"); - EXPECT(bytes_contains(bytes, len, "v"), - "range dump should include value rows"); + EXPECT(bytes_contains(bytes, len, "r"), + "range dump should include preg rows"); cfree_writer_close(w); tc_fini(&tc); } @@ -1595,11 +1596,11 @@ static void opt_range_liveness_linear(void) { (void)ib; (void)ic; (void)ir; - EXPECT(ranges.first_range_by_val[a] != OPT_RANGE_NONE, + EXPECT(ranges.first_range_by_preg[a] != OPT_RANGE_NONE, "a should get a live range"); - EXPECT(ranges.first_range_by_val[bv] != OPT_RANGE_NONE, + EXPECT(ranges.first_range_by_preg[bv] != OPT_RANGE_NONE, "b should get a live range"); - EXPECT(ranges.first_range_by_val[c] != OPT_RANGE_NONE, + EXPECT(ranges.first_range_by_preg[c] != OPT_RANGE_NONE, "c should get a live range"); EXPECT(ranges_overlap(&ranges, a, bv), "a and b ranges should overlap"); EXPECT(ranges_overlap(&ranges, a, c), @@ -1645,11 +1646,11 @@ static void opt_interference_branch_disjoint(void) { "branch-local values v%u and v%u should not overlap", a, b); opt_regalloc(f, 0); - EXPECT(f->val_info[a].alloc_kind == OPT_ALLOC_HARD, + EXPECT(f->preg_info[a].alloc_kind == OPT_ALLOC_HARD, "then value should get the one hard register"); - EXPECT(f->val_info[b].alloc_kind == OPT_ALLOC_HARD, + EXPECT(f->preg_info[b].alloc_kind == OPT_ALLOC_HARD, "else value should share the one hard register"); - EXPECT(f->val_info[a].hard_reg == f->val_info[b].hard_reg, + EXPECT(f->preg_info[a].hard_reg == f->preg_info[b].hard_reg, "disjoint branch values should share a hard register"); tc_fini(&tc); } @@ -1714,9 +1715,9 @@ static void opt_loop_frequency_weights_ranges(void) { opt_live_blocks(f, &live); opt_live_ranges_build(f, &live, &ranges); - EXPECT(ranges.use_freq_by_val[loop_v] > ranges.use_freq_by_val[exit_v], + EXPECT(ranges.use_freq_by_preg[loop_v] > ranges.use_freq_by_preg[exit_v], "loop-used value should have higher weighted use frequency"); - EXPECT(ranges.spill_cost_by_val[loop_v] > ranges.spill_cost_by_val[exit_v], + EXPECT(ranges.spill_cost_by_preg[loop_v] > ranges.spill_cost_by_preg[exit_v], "loop-used value should have higher spill cost"); tc_fini(&tc); } @@ -1741,9 +1742,9 @@ static void opt_live_across_call_frequency(void) { opt_live_blocks(f, &live_info); opt_live_ranges_build(f, &live_info, &ranges); - EXPECT(ranges.live_across_call_freq_by_val[live_val] > 0, + EXPECT(ranges.live_across_call_freq_by_preg[live_val] > 0, "live value should be marked live across call"); - EXPECT(ranges.live_across_call_freq_by_val[dead] == 0, + EXPECT(ranges.live_across_call_freq_by_preg[dead] == 0, "dead value should not be marked live across call"); tc_fini(&tc); } @@ -1812,7 +1813,7 @@ static void opt_cfg_prunes_unreachable(void) { EXPECT(f->emit_order_n == 2, "emit_order should skip unreachable block, got %u", (unsigned)f->emit_order_n); - EXPECT(ranges.first_range_by_val[dead_val] == OPT_RANGE_NONE, + EXPECT(ranges.first_range_by_preg[dead_val] == OPT_RANGE_NONE, "unreachable value should not get a live range"); tc_fini(&tc); } @@ -2664,17 +2665,17 @@ static void opt_regalloc_priority(void) { emit_ret_val(f, f->entry, out, tc.i32); opt_build_cfg(f); opt_build_loop_tree(f); - ensure_test_val_info(f); - f->val_info[pinned].tied_hard_reg = (i32)f->opt_hard_regs[RC_INT][0]; - f->val_info[hot].frequency += 1000; + ensure_test_preg_info(f); + f->preg_info[pinned].tied_hard_reg = (i32)f->opt_hard_regs[RC_INT][0]; + f->preg_info[hot].frequency += 1000; opt_regalloc(f, 0); Reg expected_hard = f->opt_hard_regs[RC_INT][0]; - EXPECT(f->val_info[pinned].alloc_kind == OPT_ALLOC_HARD, + EXPECT(f->preg_info[pinned].alloc_kind == OPT_ALLOC_HARD, "tied value should get a hard register"); - EXPECT(f->val_info[pinned].hard_reg == expected_hard, + EXPECT(f->preg_info[pinned].hard_reg == expected_hard, "tied value should get hard r%u, got r%u", (unsigned)expected_hard, - (unsigned)f->val_info[pinned].hard_reg); - EXPECT(f->val_info[hot].alloc_kind == OPT_ALLOC_SPILL, + (unsigned)f->preg_info[pinned].hard_reg); + EXPECT(f->preg_info[hot].alloc_kind == OPT_ALLOC_SPILL, "overlapping untied value should spill under one-reg pressure"); tc_fini(&tc); } @@ -2707,11 +2708,11 @@ static void opt_range_regalloc_no_conflicts_and_stack_reuse(void) { "range allocator should record allocated stack slots"); EXPECT(f->opt_alloc_stack_loc_words <= f->opt_used_loc_words, "stack occupancy should be reported separately"); - EXPECT(f->val_info[a].alloc_kind == OPT_ALLOC_SPILL, + EXPECT(f->preg_info[a].alloc_kind == OPT_ALLOC_SPILL, "no hard regs should force v%u to stack", (unsigned)a); - EXPECT(f->val_info[b].alloc_kind == OPT_ALLOC_SPILL, + EXPECT(f->preg_info[b].alloc_kind == OPT_ALLOC_SPILL, "no hard regs should force v%u to stack", (unsigned)b); - EXPECT(f->val_info[a].spill_slot == f->val_info[b].spill_slot, + EXPECT(f->preg_info[a].spill_slot == f->preg_info[b].spill_slot, "disjoint stack ranges should reuse a spill slot"); tc_fini(&tc); } @@ -2852,7 +2853,7 @@ static void opt_spill_pressure(void) { int spills = 0; for (Val v = 1; v < f->nvals; ++v) - if (f->val_info[v].alloc_kind == OPT_ALLOC_SPILL) ++spills; + if (f->preg_info[v].alloc_kind == OPT_ALLOC_SPILL) ++spills; EXPECT(spills >= 2, "one hard reg should force multiple spills, got %d", spills); EXPECT(count_op(f, IR_LOAD) >= 2, "spilled uses should reload"); @@ -2898,14 +2899,14 @@ static void opt_inline_asm_tied_fixed_regs(void) { opt_build_cfg(f); opt_build_loop_tree(f); - ensure_test_val_info(f); - f->val_info[tied].tied_hard_reg = (i32)f->opt_hard_regs[RC_INT][0]; + ensure_test_preg_info(f); + f->preg_info[tied].tied_hard_reg = (i32)f->opt_hard_regs[RC_INT][0]; opt_regalloc(f, 0); aux = (IRAsmAux*)in->extra.aux; Reg expected = f->opt_hard_regs[RC_INT][0]; - EXPECT(f->val_info[tied].alloc_kind == OPT_ALLOC_HARD, + EXPECT(f->preg_info[tied].alloc_kind == OPT_ALLOC_HARD, "tied asm val should allocate hard"); - EXPECT(f->val_info[tied].hard_reg == expected, "tied asm val should get r%u", + EXPECT(f->preg_info[tied].hard_reg == expected, "tied asm val should get r%u", (unsigned)expected); EXPECT(aux->out_ops[0].v.reg == expected && aux->in_ops[0].v.reg == expected, "asm tied operands should rewrite to the fixed hard reg"); @@ -2967,11 +2968,11 @@ static void opt_inline_asm_constraints_and_clobbers(void) { opt_regalloc(f, 0); aux = (IRAsmAux*)in->extra.aux; - EXPECT(f->val_info[fixed].alloc_kind == OPT_ALLOC_HARD && - f->val_info[fixed].hard_reg == 19, + EXPECT(f->preg_info[fixed].alloc_kind == OPT_ALLOC_HARD && + f->preg_info[fixed].hard_reg == 19, "fixed asm input should allocate x19"); - EXPECT(f->val_info[live].alloc_kind != OPT_ALLOC_HARD || - f->val_info[live].hard_reg != 20, + EXPECT(f->preg_info[live].alloc_kind != OPT_ALLOC_HARD || + f->preg_info[live].hard_reg != 20, "value live across asm clobber should not allocate clobbered x20"); EXPECT(aux->in_ops[0].kind == OPK_REG && aux->in_ops[0].v.reg == 19, "fixed asm input should rewrite to x19"); diff --git a/test/opt/phase0_guardrails.sh b/test/opt/phase0_guardrails.sh @@ -243,11 +243,11 @@ check_metrics() { opt.ranges \ opt.range_points \ opt.range_raw_points \ - opt.range_max_per_val \ + opt.range_max_per_preg \ opt.range_max_length \ opt.range_whole_block_spans \ opt.range.point_visits \ - opt.range.value_scans \ + opt.range.preg_scans \ opt.range.live_words_touched \ opt.dde.live_words_touched \ opt.alloc.used_loc_words \