kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 0dbab6ba4f57c50e1ff6cd5a36e59dc21bd2dfdb
parent 0849cc998838c260588811ad254b1aa8a5a8823c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 14:35:34 -0700

opt: model asm and call hard-reg effects

Diffstat:
Msrc/opt/ir.h | 15+++++++++++----
Msrc/opt/pass_lower.c | 172++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mtest/opt/opt_test.c | 197+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 374 insertions(+), 10 deletions(-)

diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -104,6 +104,10 @@ typedef struct IRBitFieldAux { BitFieldAccess access; } IRBitFieldAux; +#define OPT_REG_CLASSES 3u +#define OPT_MAX_HARD_REGS 32u +#define OPT_MAX_SCRATCH_REGS 4u + typedef struct IRGepAux { CfreeCgTypeId base_type; u32 nindices; @@ -170,6 +174,12 @@ typedef struct IRAsmAux { Operand* out_ops; /* nout slots; the wrapped target may fill in REG location */ Operand* in_ops; /* nin slots; recorded by w_asm_block, xlat'd at replay */ u32 nout, nin, nclob; + /* Filled by opt_machinize from backend register-name resolution. */ + u32 clobber_mask[OPT_REG_CLASSES]; + i32* out_fixed_regs; /* nout, -1 when unconstrained */ + i32* in_fixed_regs; /* nin, -1 when unconstrained */ + u8* out_fixed_cls; /* RegClass, parallel to out_fixed_regs */ + u8* in_fixed_cls; /* RegClass, parallel to in_fixed_regs */ } IRAsmAux; typedef struct IRIntrinAux { @@ -271,12 +281,9 @@ typedef struct OptValInfo { u8 alloc_kind; /* OptAllocKind */ u8 cls; /* RegClass */ u8 pad[2]; + u32 forbidden_hard_regs; /* bit r means Val may not allocate hard reg r. */ } OptValInfo; -#define OPT_REG_CLASSES 3u -#define OPT_MAX_HARD_REGS 32u -#define OPT_MAX_SCRATCH_REGS 4u - typedef struct Func { Arena* arena; Compiler* c; diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -4,6 +4,7 @@ #include "core/arena.h" #include "core/core.h" +#include "core/pool.h" enum { OPT_CG_TYPE_SEG_SHIFT = 6, @@ -235,10 +236,124 @@ static void copy_bits(u64* dst, const u64* src, u32 words) { for (u32 w = 0; w < words; ++w) dst[w] = src[w]; } +static void forbid_val_reg(Func* f, Val v, u8 cls, Reg r) { + if (v == VAL_NONE || v >= f->nvals || cls >= OPT_REG_CLASSES || r >= 32) + return; + if (f->val_cls[v] != cls) return; + f->val_info[v].forbidden_hard_regs |= 1u << r; +} + +static void apply_fixed_asm_operand(Func* f, Operand* op, i32 fixed, + u8 fixed_cls) { + if (!op || op->kind != OPK_REG || fixed < 0) return; + Val v = (Val)op->v.reg; + if (v == VAL_NONE || v >= f->nvals) return; + if (fixed_cls >= OPT_REG_CLASSES || f->val_cls[v] != fixed_cls) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, "opt asm: fixed register class mismatch"); + } + f->val_info[v].tied_hard_reg = fixed; +} + +static void apply_asm_register_constraints(Func* f, Inst* in, u64* use, + u64* def, u64* live_after) { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux || !f->val_info) return; + + for (u32 i = 0; i < aux->nout; ++i) { + i32 fixed = aux->out_fixed_regs ? aux->out_fixed_regs[i] : -1; + u8 cls = aux->out_fixed_cls ? aux->out_fixed_cls[i] : 0; + apply_fixed_asm_operand(f, &aux->out_ops[i], fixed, cls); + } + for (u32 i = 0; i < aux->nin; ++i) { + i32 fixed = aux->in_fixed_regs ? aux->in_fixed_regs[i] : -1; + u8 cls = aux->in_fixed_cls ? aux->in_fixed_cls[i] : 0; + apply_fixed_asm_operand(f, &aux->in_ops[i], fixed, cls); + } + + for (u32 cls = 0; cls < OPT_REG_CLASSES; ++cls) { + u32 mask = aux->clobber_mask[cls]; + if (!mask) continue; + for (Reg r = 0; r < 32; ++r) { + if ((mask & (1u << r)) == 0) continue; + for (Val v = 1; v < f->nvals; ++v) { + if (!bit_has(use, v) && !bit_has(def, v) && + !(live_after && bit_has(live_after, v))) + continue; + forbid_val_reg(f, v, (u8)cls, r); + } + } + } +} + static int mem_observable(const MemAccess* m) { return (m->flags & (MF_VOLATILE | MF_ATOMIC)) != 0; } +static const char* asm_constraint_body(const char* s) { + if (!s) return ""; + if (s[0] == '=' && s[1] == '&') return s + 2; + if (s[0] == '=' || s[0] == '+' || s[0] == '&') return s + 1; + return s; +} + +static int asm_resolve_fixed_constraint(Func* f, CGTarget* target, + const char* constraint, + Reg* reg_out, RegClass* cls_out) { + const char* body = asm_constraint_body(constraint); + if (!target->resolve_reg_name) return 0; + if (body[0] != '{') return 0; + const char* end = body + 1; + while (*end && *end != '}') ++end; + if (*end != '}' || end == body + 1) return 0; + Sym name = pool_intern(f->c->global, body + 1, (size_t)(end - body - 1)); + return target->resolve_reg_name(target, name, reg_out, cls_out) == 0; +} + +static void asm_prepare_constraints(Func* f, CGTarget* target, IRAsmAux* aux) { + if (!aux) return; + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) aux->clobber_mask[c] = 0; + + if (aux->nout && !aux->out_fixed_regs) { + aux->out_fixed_regs = arena_array(f->arena, i32, aux->nout); + aux->out_fixed_cls = arena_zarray(f->arena, u8, aux->nout); + for (u32 i = 0; i < aux->nout; ++i) aux->out_fixed_regs[i] = -1; + } + if (aux->nin && !aux->in_fixed_regs) { + aux->in_fixed_regs = arena_array(f->arena, i32, aux->nin); + aux->in_fixed_cls = arena_zarray(f->arena, u8, aux->nin); + for (u32 i = 0; i < aux->nin; ++i) aux->in_fixed_regs[i] = -1; + } + + if (target->resolve_reg_name) { + for (u32 i = 0; i < aux->nclob; ++i) { + Reg r; + RegClass cls; + if (target->resolve_reg_name(target, aux->clobbers[i], &r, &cls) != 0) + continue; + if ((u32)cls < OPT_REG_CLASSES && r < 32) + aux->clobber_mask[cls] |= 1u << r; + } + } + + for (u32 i = 0; i < aux->nout; ++i) { + Reg r; + RegClass cls; + if (asm_resolve_fixed_constraint(f, target, aux->outs[i].str, &r, &cls)) { + aux->out_fixed_regs[i] = (i32)r; + aux->out_fixed_cls[i] = (u8)cls; + } + } + for (u32 i = 0; i < aux->nin; ++i) { + Reg r; + RegClass cls; + if (asm_resolve_fixed_constraint(f, target, aux->ins[i].str, &r, &cls)) { + aux->in_fixed_regs[i] = (i32)r; + aux->in_fixed_cls[i] = (u8)cls; + } + } +} + void opt_machinize(Func* f, CGTarget* target) { f->opt_target = target->c->target; f->opt_has_target = 1; @@ -286,6 +401,15 @@ void opt_machinize(Func* f, CGTarget* target) { } } } + + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op == IR_ASM_BLOCK) + asm_prepare_constraints(f, target, (IRAsmAux*)in->extra.aux); + } + } } static int is_caller_saved(Func* f, u8 cls, Reg r) { @@ -465,6 +589,7 @@ void opt_live_info(Func* f) { f->val_info[v].spill_slot = FRAME_SLOT_NONE; f->val_info[v].alloc_kind = OPT_ALLOC_NONE; f->val_info[v].cls = f->val_cls[v]; + f->val_info[v].forbidden_hard_regs = 0; } for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; @@ -557,6 +682,8 @@ void opt_live_info(Func* f) { f->val_info[v].live_across_call_freq += bl->frequency; } } + if ((IROp)in->op == IR_ASM_BLOCK) + apply_asm_register_constraints(f, in, use, def, after); for (u32 w = 0; w < words; ++w) live[w] = (live[w] & ~def[w]) | use[w]; @@ -605,6 +732,13 @@ static int hard_conflicts(Func* f, Val* assigned, u32 nassigned, Val v, Reg r) { return 0; } +static int hard_available(Func* f, u8 cls, Reg r) { + if (cls >= OPT_REG_CLASSES) return 0; + for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) + if (f->opt_hard_regs[cls][i] == r) return 1; + return 0; +} + static FrameSlot spill_slot_for(Func* f, Val v) { if (f->val_info[v].spill_slot != FRAME_SLOT_NONE) return f->val_info[v].spill_slot; @@ -703,7 +837,6 @@ typedef struct RewriteCtx { static void rewrite_one_operand(Func* f, Inst* owner, Operand* op, int is_def, void* arg) { - (void)owner; RewriteCtx* c = (RewriteCtx*)arg; if (op->kind != OPK_REG) return; Val v = (Val)op->v.reg; @@ -716,6 +849,12 @@ static void rewrite_one_operand(Func* f, Inst* owner, Operand* op, int is_def, if (vi->alloc_kind != OPT_ALLOC_SPILL) return; u8 cls = vi->cls; Reg scratch = scratch_for(f, cls, &c->next_scratch[cls]); + if (scratch == (Reg)REG_NONE) { + SrcLoc loc = owner ? owner->loc : (SrcLoc){0, 0, 0}; + compiler_panic(f->c, loc, + "opt rewrite: no scratch register for spilled class %u", + (unsigned)cls); + } op->v.reg = scratch; if (!is_def) { Inst* ld = list_push(f, c->before, IR_LOAD); @@ -859,16 +998,35 @@ void opt_regalloc(Func* f, int allow_live_range_split) { Val v = order[i]; OptValInfo* vi = &f->val_info[v]; u8 cls = vi->cls; - if (vi->tied_hard_reg >= 0 && - !hard_conflicts(f, assigned, nassigned, v, (Reg)vi->tied_hard_reg)) { + if (vi->tied_hard_reg >= 0) { + Reg fixed = (Reg)vi->tied_hard_reg; + if (!hard_available(f, cls, fixed)) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, + "opt regalloc: fixed hard reg %u unavailable in class %u", + (unsigned)fixed, (unsigned)cls); + } + if (fixed < 32 && (vi->forbidden_hard_regs & (1u << fixed))) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, + "opt regalloc: fixed hard reg %u is clobbered", + (unsigned)fixed); + } + if (hard_conflicts(f, assigned, nassigned, v, fixed)) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, + "opt regalloc: conflicting fixed hard reg %u", + (unsigned)fixed); + } vi->alloc_kind = OPT_ALLOC_HARD; - vi->hard_reg = (Reg)vi->tied_hard_reg; + vi->hard_reg = fixed; assigned[nassigned++] = v; continue; } int found = 0; for (u32 r = 0; r < f->opt_hard_reg_count[cls]; ++r) { Reg hr = f->opt_hard_regs[cls][r]; + if (hr < 32 && (vi->forbidden_hard_regs & (1u << hr))) continue; if (!hard_conflicts(f, assigned, nassigned, v, hr)) { vi->alloc_kind = OPT_ALLOC_HARD; vi->hard_reg = hr; @@ -1396,7 +1554,7 @@ static void hard_def_abivalue(HardRegSet* def, const CGABIValue* v) { for (u32 i = 0; i < v->nparts; ++i) hard_def_operand(def, &v->parts[i].op); } -static void hard_inst_use_def(const Inst* in, HardRegSet* use, +static void hard_inst_use_def(Func* f, const Inst* in, HardRegSet* use, HardRegSet* def) { memset(use, 0, sizeof *use); memset(def, 0, sizeof *def); @@ -1441,6 +1599,8 @@ static void hard_inst_use_def(const Inst* in, HardRegSet* use, for (u32 i = 0; i < aux->desc.nargs; ++i) hard_use_abivalue(use, &aux->desc.args[i]); hard_def_abivalue(def, &aux->desc.ret); + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) + def->cls[c] |= f->opt_caller_saved[c]; break; } case IR_CMP_BRANCH: @@ -1525,7 +1685,7 @@ void opt_dce(Func* f) { Inst* in = &bl->insts[i]; HardRegSet use, def; if ((IROp)in->op == IR_NOP) continue; - hard_inst_use_def(in, &use, &def); + hard_inst_use_def(f, in, &use, &def); if (!inst_has_side_effect(f, in) && !hard_empty(&def) && !hard_intersects(&def, &live)) { continue; diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -10,6 +10,7 @@ #include "arch/regalloc.h" #include "core/core.h" #include "core/heap.h" +#include "core/pool.h" #include "opt/ir.h" #include "opt/opt.h" @@ -30,12 +31,14 @@ static void h_free(CfreeHeap* h, void* p, size_t n) { free(p); } static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL}; +static int g_suppress_expected_panic_diag; static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, const char* fmt, va_list ap) { static const char* names[] = {"note", "warning", "error", "fatal"}; (void)s; (void)loc; + if (g_suppress_expected_panic_diag && k == CFREE_DIAG_FATAL) return; fprintf(stderr, "%s: ", names[k]); vfprintf(stderr, fmt, ap); fputc('\n', stderr); @@ -288,6 +291,21 @@ static int val_conflicts(const Func* f, Val a, Val b) { return bit_has(bits, b); } +static int expect_panic(Compiler* c, void (*fn)(void*), void* arg) { + PanicSave saved; + int panicked = 0; + compiler_panic_save(c, &saved); + g_suppress_expected_panic_diag++; + if (setjmp(c->panic)) { + panicked = 1; + } else { + fn(arg); + } + g_suppress_expected_panic_diag--; + compiler_panic_restore(c, &saved); + return panicked; +} + static int count_op(Func* f, IROp op) { int n = 0; for (u32 b = 0; b < f->nblocks; ++b) @@ -347,6 +365,25 @@ static void mock_reserve_hard_regs(CGTarget* t, RegClass cls, (void)regs; } +static int mock_resolve_reg_name(CGTarget* t, Sym name, Reg* out, + RegClass* cls_out) { + size_t len = 0; + const char* s = pool_str(t->c->global, name, &len); + if (!s || len < 2) return 1; + if ((s[0] != 'r' && s[0] != 'x' && s[0] != 'v') || + s[1] < '0' || s[1] > '9') + return 1; + u32 n = 0; + for (size_t i = 1; i < len; ++i) { + if (s[i] < '0' || s[i] > '9') return 1; + n = n * 10u + (u32)(s[i] - '0'); + } + if (n >= 32u) return 1; + if (out) *out = (Reg)n; + if (cls_out) *cls_out = (s[0] == 'v') ? RC_FP : RC_INT; + return 0; +} + static void mock_load_imm(CGTarget* t, Operand dst, i64 imm) { (void)imm; MockCGTarget* m = (MockCGTarget*)t; @@ -401,6 +438,7 @@ static void mock_init(MockCGTarget* m, Compiler* c) { m->base.get_scratch_regs = mock_get_scratch_regs; m->base.is_caller_saved = mock_is_caller_saved; m->base.reserve_hard_regs = mock_reserve_hard_regs; + m->base.resolve_reg_name = mock_resolve_reg_name; } static void mock_set_pool(MockCGTarget* m, RegClass cls, @@ -1131,6 +1169,73 @@ static void opt_inline_asm_tied_fixed_regs(void) { tc_fini(&tc); } +static void opt_inline_asm_constraints_and_clobbers(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + static const Reg pool[] = {19, 20}; + static const Reg scratch[] = {9, 10}; + mock_set_pool(&mock, RC_INT, pool, 2, scratch, 2, 0); + + Func* f = new_func(&tc); + Val fixed = add_val(f, tc.i32); + Val live = add_val(f, tc.i32); + Val out = add_val(f, tc.i32); + emit_load_imm(f, f->entry, fixed, tc.i32, 5); + emit_load_imm(f, f->entry, live, tc.i32, 7); + + Inst* in = ir_emit(f, f->entry, IR_ASM_BLOCK); + IRAsmAux* aux = arena_znew(f->arena, IRAsmAux); + aux->tmpl = "add %0, %1, #1"; + aux->nout = 1; + aux->nin = 1; + aux->nclob = 1; + aux->outs = arena_array(f->arena, AsmConstraint, 1); + aux->ins = arena_array(f->arena, AsmConstraint, 1); + aux->out_ops = arena_array(f->arena, Operand, 1); + aux->in_ops = arena_array(f->arena, Operand, 1); + aux->clobbers = arena_array(f->arena, Sym, 1); + memset(aux->outs, 0, sizeof(AsmConstraint)); + memset(aux->ins, 0, sizeof(AsmConstraint)); + aux->outs[0].str = "=r"; + aux->outs[0].dir = ASM_OUT; + aux->outs[0].type = tc.i32; + aux->ins[0].str = "{x19}"; + aux->ins[0].dir = ASM_IN; + aux->ins[0].type = tc.i32; + aux->out_ops[0] = op_reg_(out, tc.i32); + aux->in_ops[0] = op_reg_(fixed, tc.i32); + aux->clobbers[0] = pool_intern_cstr(tc.c->global, "x20"); + in->extra.aux = aux; + in->ndefs = 1; + in->defs = arena_array(f->arena, Val, 1); + in->defs[0] = out; + in->def = out; + f->val_def_block[out] = f->entry; + f->val_def_inst[out] = f->blocks[f->entry].ninsts - 1u; + + emit_binop(f, f->entry, out, out, live, tc.i32); + emit_ret_val(f, f->entry, out, tc.i32); + + opt_machinize(f, &mock.base); + opt_build_cfg(f); + opt_build_loop_tree(f); + opt_live_info(f); + opt_regalloc(f, 0); + aux = (IRAsmAux*)in->extra.aux; + + EXPECT(f->val_info[fixed].alloc_kind == OPT_ALLOC_HARD && + f->val_info[fixed].hard_reg == 19, + "fixed asm input should allocate x19"); + EXPECT(f->val_info[live].alloc_kind != OPT_ALLOC_HARD || + f->val_info[live].hard_reg != 20, + "value live across asm clobber should not allocate clobbered x20"); + EXPECT(aux->in_ops[0].kind == OPK_REG && aux->in_ops[0].v.reg == 19, + "fixed asm input should rewrite to x19"); + tc_fini(&tc); +} + static void opt_post_rewrite_dce(void) { TestCtx tc; tc_init(&tc); @@ -1158,6 +1263,95 @@ static void opt_post_rewrite_dce(void) { tc_fini(&tc); } +static void opt_dce_call_clobbers_hard_regs(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + static const Reg pool[] = {10}; + static const Reg scratch[] = {9}; + mock_set_pool(&mock, RC_INT, pool, 1, scratch, 1, 1u << 10); + + Func* f = new_func(&tc); + opt_machinize(f, &mock.base); + f->opt_rewritten = 1; + Inst* li = ir_emit(f, f->entry, IR_LOAD_IMM); + li->opnds = arena_array(f->arena, Operand, 1); + li->opnds[0] = op_reg_(10, tc.i32); + li->nopnds = 1; + li->extra.imm = 7; + emit_call_void(f, f->entry); + Inst* add = ir_emit(f, f->entry, IR_BINOP); + add->opnds = arena_array(f->arena, Operand, 3); + add->opnds[0] = op_reg_(19, tc.i32); + add->opnds[1] = op_reg_(10, tc.i32); + add->opnds[2] = op_imm_(1, tc.i32); + add->nopnds = 3; + add->extra.imm = BO_IADD; + emit_ret_val(f, f->entry, 19, tc.i32); + + opt_dce(f); + EXPECT(count_op(f, IR_LOAD_IMM) == 0, + "pre-call caller-saved def should not satisfy a post-call use"); + + Func* g = new_func(&tc); + opt_machinize(g, &mock.base); + g->opt_rewritten = 1; + li = ir_emit(g, g->entry, IR_LOAD_IMM); + li->opnds = arena_array(g->arena, Operand, 1); + li->opnds[0] = op_reg_(10, tc.i32); + li->nopnds = 1; + li->extra.imm = 9; + Inst* call = emit_call_void(g, g->entry); + IRCallAux* caux = (IRCallAux*)call->extra.aux; + CGABIValue* args = arena_zarray(g->arena, CGABIValue, 1); + args[0].storage = op_reg_(10, tc.i32); + caux->desc.nargs = 1; + caux->desc.args = args; + emit_ret_val(g, g->entry, 10, tc.i32); + + opt_dce(g); + EXPECT(count_op(g, IR_LOAD_IMM) == 1, + "hard-reg call argument should keep its reaching def"); + tc_fini(&tc); +} + +typedef struct NoScratchCtx { + Func* f; +} NoScratchCtx; + +static void run_no_scratch_regalloc(void* arg) { + NoScratchCtx* ctx = (NoScratchCtx*)arg; + opt_regalloc(ctx->f, 0); +} + +static void opt_regalloc_spill_requires_scratch(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + static const Reg pool[] = {19}; + mock_set_pool(&mock, RC_INT, pool, 1, NULL, 0, 0); + + Func* f = new_func(&tc); + opt_machinize(f, &mock.base); + Val a = add_val(f, tc.i32); + Val b = add_val(f, tc.i32); + Val c = add_val(f, tc.i32); + emit_load_imm(f, f->entry, a, tc.i32, 1); + emit_load_imm(f, f->entry, b, tc.i32, 2); + emit_binop(f, f->entry, c, a, b, tc.i32); + emit_ret_val(f, f->entry, c, tc.i32); + opt_build_cfg(f); + opt_build_loop_tree(f); + opt_live_info(f); + + NoScratchCtx nctx = {f}; + EXPECT(expect_panic(tc.c, run_no_scratch_regalloc, &nctx), + "spilling without a scratch register should panic"); + tc_fini(&tc); +} + static void opt_combine_spill_peeps(void) { TestCtx tc; tc_init(&tc); @@ -1619,7 +1813,10 @@ int main(void) { opt_call_clobber_caller_saved(); opt_spill_pressure(); opt_inline_asm_tied_fixed_regs(); + opt_inline_asm_constraints_and_clobbers(); opt_post_rewrite_dce(); + opt_dce_call_clobbers_hard_regs(); + opt_regalloc_spill_requires_scratch(); opt_combine_spill_peeps(); opt_combine_single_use_copy_and_imm(); opt_combine_keeps_unsafe_and_multiuse_defs();