kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ae82f93f8ba736c4ba664969e703abbc4c27cce6
parent 20712035bb5304ff0edae2d9285f1a6ff1890c34
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 12 May 2026 15:10:58 -0700

Refactor setjmp longjmp as intrinsics

Diffstat:
Msrc/arch/aarch64/alloc.c | 7-------
Msrc/arch/aarch64/ops.c | 3---
Msrc/arch/arch.h | 32++++++++++++--------------------
Msrc/arch/rv64/alloc.c | 6------
Msrc/arch/rv64/internal.h | 1-
Msrc/arch/rv64/ops.c | 4----
Msrc/arch/x64/alloc.c | 7+------
Msrc/arch/x64/internal.h | 1-
Msrc/arch/x64/ops.c | 4----
Msrc/cg/cg.c | 30++++++++++++++++++++++++------
Msrc/cg/cg.h | 9++-------
Msrc/opt/ir.h | 4----
Msrc/opt/opt.c | 64+++++++++++++++++++---------------------------------------------
Msrc/opt/pass_cfg.c | 17++++++++---------
Msrc/opt/pass_lower.c | 2--
15 files changed, 66 insertions(+), 125 deletions(-)

diff --git a/src/arch/aarch64/alloc.c b/src/arch/aarch64/alloc.c @@ -80,12 +80,6 @@ void aa_free_reg(CGTarget* t, Reg r, RegClass cls) { (unsigned)r, cls == RC_FP ? "fp" : "int"); } -static const Reg* aa_clobbers(CGTarget* t, RegClass c, u32* n) { - (void)c; - (void)n; - compiler_panic(t->c, impl_of(t)->loc, "aarch64: clobbers not implemented"); -} - static int aa_resolve_reg_name(CGTarget* t, Sym name, Reg* out, RegClass* cls_out) { (void)t; @@ -299,7 +293,6 @@ static void aa_continue_to(CGTarget* t, CGScope s) { void aa_alloc_vtable_init(CGTarget* t) { t->alloc_reg = aa_alloc_reg; t->free_reg = aa_free_reg; - t->clobbers = aa_clobbers; t->spill_reg = aa_spill_reg; t->reload_reg = aa_reload_reg; t->resolve_reg_name = aa_resolve_reg_name; diff --git a/src/arch/aarch64/ops.c b/src/arch/aarch64/ops.c @@ -1900,9 +1900,6 @@ CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->va_end_ = aa_va_end_; t->va_copy_ = aa_va_copy_; - t->setjmp_ = NULL; - t->longjmp_ = NULL; - t->atomic_load = aa_atomic_load; t->atomic_store = aa_atomic_store; t->atomic_rmw = aa_atomic_rmw; diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -108,7 +108,7 @@ typedef enum MemOrder { } MemOrder; /* Compiler-intrinsic kinds dispatched through CGTarget.intrinsic and carried - * on IR_INTRINSIC (extra.imm = IntrinKind). The set is bounded: a backend + * on IR_INTRINSIC via IRIntrinAux.kind. The set is bounded: a backend * must know each one to choose inline-vs-libcall. Hint intrinsics * (EXPECT/UNREACHABLE/TRAP/PREFETCH/ASSUME_ALIGNED) ride the same dispatch: * the backend decides whether they emit an instruction or a no-op. @@ -116,10 +116,9 @@ typedef enum MemOrder { * Not every C builtin lives here. Parser-evaluated builtins * (__builtin_offsetof, __builtin_constant_p, __builtin_choose_expr, * __builtin_types_compatible_p) fold at parse and never reach IR. Builtins - * that already have dedicated CGTarget methods (alloca, va_*, setjmp/longjmp, - * atomics) keep them. Returns-twice routines like __cfree_setjmp on real - * arches stay magic external symbol names so opt's call-site analysis sees a - * normal IR_CALL. */ + * that already have dedicated CGTarget methods (alloca, va_*, atomics) keep + * them. Returns-twice and no-return control intrinsics use this dispatch so + * opt can preserve their CFG effects without growing backend vtable hooks. */ typedef enum IntrinKind { INTRIN_NONE = 0, @@ -143,6 +142,10 @@ typedef enum IntrinKind { INTRIN_UNREACHABLE, INTRIN_TRAP, + /* non-local control */ + INTRIN_SETJMP, + INTRIN_LONGJMP, + /* checked arith — multi-result (value, overflow_flag) */ INTRIN_ADD_OVERFLOW, INTRIN_SUB_OVERFLOW, @@ -475,7 +478,6 @@ struct CGTarget { void (*free_reg)(CGTarget*, Reg, RegClass); /* hint; opt_cgtarget ignores */ FrameSlot (*frame_slot)(CGTarget*, const FrameSlotDesc*); void (*param)(CGTarget*, const CGParamDesc*); - const Reg* (*clobbers)(CGTarget*, RegClass, u32* nregs); void (*spill_reg)(CGTarget*, Operand src_reg, FrameSlot, MemAccess); void (*reload_reg)(CGTarget*, Operand dst_reg, FrameSlot, MemAccess); @@ -601,18 +603,6 @@ struct CGTarget { void (*va_end_)(CGTarget*, Operand ap_addr); void (*va_copy_)(CGTarget*, Operand dst_ap_addr, Operand src_ap_addr); - /* ---- setjmp / longjmp ---- - * Optional. Real backends leave these NULL: the parser lowers <setjmp.h>'s - * setjmp to a normal call to __cfree_setjmp and opt recognizes the symbol - * by name as returns-twice. The WASM backend implements them via the - * exception-handling proposal so that a longjmp can unwind across WASM - * frames (which lack a saveable native SP). - * - * setjmp pops &buf, returns i32 in `dst` (0 on direct return, nonzero on - * longjmp). longjmp pops &buf and val; control does not return. */ - void (*setjmp_)(CGTarget*, Operand dst /*REG, i32*/, Operand buf_addr); - void (*longjmp_)(CGTarget*, Operand buf_addr, Operand val); - /* ---- atomics ---- */ void (*atomic_load)(CGTarget*, Operand dst /*REG*/, Operand addr, MemAccess, MemOrder); @@ -629,8 +619,8 @@ struct CGTarget { * Typed dispatch for builtins whose lowering is backend-relevant * (inline-vs-libcall, inline sequence selection) or whose semantics opt * cares about (hint pattern matching, exhaustiveness). The IR carries - * IR_INTRINSIC + extra.imm = IntrinKind; the wrapped target receives the - * same call at lowering time with materialized operands. + * IR_INTRINSIC + IRIntrinAux.kind; the wrapped target receives the same call + * at lowering time with materialized operands. * * Operand shapes by IntrinKind: * POPCOUNT/CTZ/CLZ/BSWAP* : dsts[0] REG result; args[0] REG input @@ -640,6 +630,8 @@ struct CGTarget { * ASSUME_ALIGNED : dsts[0] REG; args = (ptr, align [, offset]) * EXPECT : dsts[0] REG; args = (val, expected) * UNREACHABLE / TRAP : dsts none; args none + * SETJMP : dsts[0] REG i32 result; args = (&buf) + * LONGJMP : dsts none; args = (&buf, val); no return * ADD/SUB/MUL_OVERFLOW : dsts[0] REG result, dsts[1] REG i1 overflow; * args = (a, b) * diff --git a/src/arch/rv64/alloc.c b/src/arch/rv64/alloc.c @@ -163,12 +163,6 @@ void rv_param(CGTarget* t, const CGParamDesc* p) { } } -const Reg* rv_clobbers(CGTarget* t, RegClass c, u32* n) { - (void)c; - (void)n; - rv_panic(t, "clobbers"); -} - void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma) { RImpl* a = impl_of(t); diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -194,7 +194,6 @@ void rv_free_reg(CGTarget* t, Reg r, RegClass cls); FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d); RvSlot* rv64_slot_get(RImpl* a, FrameSlot fs); void rv_param(CGTarget* t, const CGParamDesc* p); -const Reg* rv_clobbers(CGTarget* t, RegClass c, u32* n); void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma); void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma); Label rv_label_new(CGTarget* t); diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -1777,7 +1777,6 @@ CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->free_reg = rv_free_reg; t->frame_slot = rv_frame_slot; t->param = rv_param; - t->clobbers = rv_clobbers; t->spill_reg = rv_spill_reg; t->reload_reg = rv_reload_reg; @@ -1818,9 +1817,6 @@ CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->va_end_ = rv_va_end_; t->va_copy_ = rv_va_copy_; - t->setjmp_ = NULL; - t->longjmp_ = NULL; - t->atomic_load = rv_atomic_load; t->atomic_store = rv_atomic_store; t->atomic_rmw = rv_atomic_rmw; diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c @@ -1,7 +1,7 @@ /* arch/x64/alloc.c — register pool, spill/reload, labels, control flow. * * Covers: xpool_init/alloc/free, x_alloc_reg, x_free_reg, x_frame_slot, - * x64_slot_get, x_param, x_clobbers, x_spill_reg, x_reload_reg, x_label_*, + * x64_slot_get, x_param, x_spill_reg, x_reload_reg, x_label_*, * emit_jmp_label, emit_jcc_label, x_jump, x64_force_reg_int, emit_cmp_ab, * x_cmp_branch, x_cmp, x_scope_*, x_break_to, x_continue_to. */ @@ -184,11 +184,6 @@ void x_param(CGTarget* t, const CGParamDesc* p) { } } -const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n) { - (void)c; - (void)n; - x_panic(t, "clobbers"); -} void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma) { XImpl* a = impl_of(t); diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -237,7 +237,6 @@ FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d); Reg x_alloc_reg(CGTarget* t, RegClass cls, const Type* ty); void x_free_reg(CGTarget* t, Reg r, RegClass cls); void x_param(CGTarget* t, const CGParamDesc* p); -const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n); void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma); void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma); Label x_label_new(CGTarget* t); diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -1854,7 +1854,6 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->free_reg = x_free_reg; t->frame_slot = x_frame_slot; t->param = x_param; - t->clobbers = x_clobbers; t->spill_reg = x_spill_reg; t->reload_reg = x_reload_reg; @@ -1895,9 +1894,6 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->va_end_ = x_va_end_; t->va_copy_ = x_va_copy_; - t->setjmp_ = NULL; - t->longjmp_ = NULL; - t->atomic_load = x_atomic_load; t->atomic_store = x_atomic_store; t->atomic_rmw = x_atomic_rmw; diff --git a/src/cg/cg.c b/src/cg/cg.c @@ -31,9 +31,9 @@ * the value stack runs out of victims; this lets calls with more * reg-class args than the pool size can hold lower correctly. * - * Aggregates, atomics, asm/setjmp/intrinsic methods are placeholders - * pending their corpus rows. The interface in cg.h is the commitment; - * this file fills in the slice that's exercised today. */ + * Some aggregate and backend-specific intrinsic cases are still limited by + * their corpus rows. The interface in cg.h is the commitment; this file fills + * in the slice that's exercised today. */ #include "cg/cg.h" @@ -1356,7 +1356,7 @@ void cg_ret(CG* g, int has_value) { } /* ============================================================ - * alloca / variadics / setjmp / atomics — placeholders + * alloca / variadics / setjmp / atomics * ============================================================ */ void cg_alloca(CG* g) { @@ -1415,10 +1415,28 @@ void cg_va_copy_(CG* g) { release(g, &dst); } void cg_setjmp(CG* g) { - compiler_panic(g->c, g->cur_loc, "cg_setjmp: not in v1 slice"); + CGTarget* T = g->target; + SValue buf = pop(g); + Operand buf_op = force_reg(g, &buf, sv_type(&buf)); + const Type* int_ty = type_prim(g->pool, TY_INT); + Reg dst_r = alloc_reg_or_spill(g, RC_INT, int_ty); + Operand dst = op_reg(dst_r, int_ty); + T->intrinsic(T, INTRIN_SETJMP, &dst, 1u, &buf_op, 1u); + release(g, &buf); + push(g, make_sv(dst, int_ty)); } void cg_longjmp(CG* g) { - compiler_panic(g->c, g->cur_loc, "cg_longjmp: not in v1 slice"); + CGTarget* T = g->target; + SValue val = pop(g); + SValue buf = pop(g); + Operand args[2]; + args[0] = force_reg(g, &buf, sv_type(&buf)); + args[1] = (val.op.kind == OPK_IMM || val.op.kind == OPK_REG) + ? val.op + : force_reg(g, &val, sv_type(&val)); + T->intrinsic(T, INTRIN_LONGJMP, NULL, 0u, args, 2u); + release(g, &val); + release(g, &buf); } /* Atomics. The parser pushes the address as a pointer rvalue (typed `T*`) * and any value operands as plain rvalues; cg pops them, materializes diff --git a/src/cg/cg.h b/src/cg/cg.h @@ -106,13 +106,8 @@ void cg_va_end_(CG*); /* pop &ap */ void cg_va_copy_(CG*); /* pop &dst, &src */ /* ----- setjmp / longjmp ----- - * On real arches these are NOT emitted: the parser lowers <setjmp.h>'s setjmp - * to a normal extern call to __cfree_setjmp; opt recognizes the symbol by name - * as returns-twice (no inlining across; values defined before the call are not - * GVN-merged with values defined after). On WASM the parser instead emits - * cg_setjmp/cg_longjmp, which forward to CGTarget.setjmp/CGTarget.longjmp; the - * WASM backend lowers via the exception-handling proposal. - * + * Intrinsic lowering for targets that cannot use a plain libc call. Real + * native arches generally parse <setjmp.h>'s setjmp as a normal call. * cg_setjmp pops &buf and pushes i32 (0 on direct return, nonzero on longjmp). * cg_longjmp pops &buf and val; does not return. */ void cg_setjmp(CG*); diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -73,10 +73,6 @@ typedef enum IROp { IR_VA_END, /* opnds = [ap] */ IR_VA_COPY, /* opnds = [dst, src] */ - /* setjmp/longjmp. */ - IR_SETJMP, /* opnds = [dst REG, buf] */ - IR_LONGJMP, /* opnds = [buf, val]; (terminator-like, no fallthrough) */ - /* Atomics. */ IR_ATOMIC_LOAD, /* opnds = [dst, addr]; extra.aux = IRAtomicAux */ IR_ATOMIC_STORE, /* opnds = [addr, src]; extra.aux = IRAtomicAux */ diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -9,7 +9,7 @@ * and emits the rewritten IR into the wrapped target. * * Methods the wrapper rejects under unbounded virtuals: - * - clobbers / spill_reg / reload_reg are CG -O0 register-pressure + * - spill_reg / reload_reg are CG -O0 register-pressure * mechanics. CG never invokes them on real backends in v1, and * they're meaningless for opt's vreg space — calling them is a * wiring bug, so we panic loudly. @@ -64,6 +64,11 @@ static void set_def(Func* f, Inst* in, u32 block, Val v, const Type* t) { } } +static int intrinsic_terminates(IntrinKind kind) { + return kind == INTRIN_LONGJMP || kind == INTRIN_TRAP || + kind == INTRIN_UNREACHABLE; +} + static Operand* dup_opnds(Func* f, const Operand* src, u32 n) { if (!n) return NULL; Operand* dst = arena_array(f->arena, Operand, n); @@ -75,8 +80,13 @@ static int cur_terminated(OptImpl* o) { Block* b = &o->f->blocks[o->cur]; if (b->nsucc > 0) return 1; if (b->ninsts == 0) return 0; - IROp last = (IROp)b->insts[b->ninsts - 1].op; - return last == IR_RET || last == IR_LONGJMP; + Inst* last = &b->insts[b->ninsts - 1]; + if ((IROp)last->op == IR_RET) return 1; + if ((IROp)last->op == IR_INTRINSIC) { + IRIntrinAux* aux = (IRIntrinAux*)last->extra.aux; + return aux && intrinsic_terminates(aux->kind); + } + return 0; } static void set_cur(OptImpl* o, u32 b) { @@ -134,11 +144,6 @@ static void w_param(CGTarget* t, const CGParamDesc* d) { ir_param_add(o->f, &copy); } -static const Reg* w_clobbers(CGTarget* t, RegClass cls, u32* nregs) { - (void)cls; - (void)nregs; - panic_unsupported(impl_of(t), "clobbers"); -} static void w_spill_reg(CGTarget* t, Operand src, FrameSlot s, MemAccess m) { (void)src; (void)s; @@ -562,7 +567,7 @@ static void w_ret(CGTarget* t, const CGABIValue* v) { after_terminator(o); } -/* ---- alloca / variadics / setjmp / atomics / fence / intrinsic ---- */ +/* ---- alloca / variadics / atomics / fence / intrinsic ---- */ static void w_alloca_(CGTarget* t, Operand dst, Operand size, u32 align) { OptImpl* o = impl_of(t); @@ -608,26 +613,6 @@ static void w_va_copy_(CGTarget* t, Operand dst, Operand src) { in->nopnds = 2; } -static void w_setjmp_(CGTarget* t, Operand dst, Operand buf) { - OptImpl* o = impl_of(t); - Inst* in = rec(o, IR_SETJMP); - Operand ops[2] = {dst, buf}; - in->opnds = dup_opnds(o->f, ops, 2); - in->nopnds = 2; - if (dst.kind == OPK_REG) set_def(o->f, in, o->cur, (Val)dst.v.reg, dst.type); -} - -static void w_longjmp_(CGTarget* t, Operand buf, Operand val) { - OptImpl* o = impl_of(t); - Inst* in = rec(o, IR_LONGJMP); - Operand ops[2] = {buf, val}; - in->opnds = dup_opnds(o->f, ops, 2); - in->nopnds = 2; - Block* cb = &o->f->blocks[o->cur]; - cb->nsucc = 0; - after_terminator(o); -} - static void w_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess m, MemOrder mo) { OptImpl* o = impl_of(t); @@ -723,6 +708,11 @@ static void w_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd, in->def = in->defs[0]; in->type = dsts[0].type; } + if (intrinsic_terminates(kind)) { + Block* cb = &o->f->blocks[o->cur]; + cb->nsucc = 0; + after_terminator(o); + } } static void w_asm_block(CGTarget* t, const char* tmpl, @@ -1101,18 +1091,6 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) { w->va_copy_(w, a, src); break; } - case IR_SETJMP: { - Operand dst = xlat_op(r, in->opnds[0]); - Operand buf = xlat_op(r, in->opnds[1]); - w->setjmp_(w, dst, buf); - break; - } - case IR_LONGJMP: { - Operand buf = xlat_op(r, in->opnds[0]); - Operand val = xlat_op(r, in->opnds[1]); - w->longjmp_(w, buf, val); - break; - } case IR_ATOMIC_LOAD: { Operand dst = xlat_op(r, in->opnds[0]); Operand addr = xlat_op(r, in->opnds[1]); @@ -1345,7 +1323,6 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) { t->free_reg = w_free_reg; t->frame_slot = w_frame_slot; t->param = w_param; - t->clobbers = w_clobbers; t->spill_reg = w_spill_reg; t->reload_reg = w_reload_reg; @@ -1391,9 +1368,6 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) { t->va_end_ = w_va_end_; t->va_copy_ = w_va_copy_; - t->setjmp_ = target->setjmp_ ? w_setjmp_ : NULL; - t->longjmp_ = target->longjmp_ ? w_longjmp_ : NULL; - t->atomic_load = w_atomic_load; t->atomic_store = w_atomic_store; t->atomic_rmw = w_atomic_rmw; diff --git a/src/opt/pass_cfg.c b/src/opt/pass_cfg.c @@ -6,16 +6,14 @@ * IR_CONDBR — 2 succs ([true, false]) * IR_CMP_BRANCH — 2 succs ([taken, fallthrough]) * IR_RET — 0 succs - * IR_LONGJMP — 0 succs - * IR_INTRINSIC TRAP/UNREACHABLE — 0 succs + * IR_INTRINSIC LONGJMP/TRAP/UNREACHABLE — 0 succs * IR_BREAK_TO / IR_CONTINUE_TO — 0 succs (control transferred to * the scope's break/continue label, * which is a successor encoded on * the IRScopeAux; pass populates * succ from there) * - * IR_SETJMP is a control barrier: the recorder splits its block but - * IR_SETJMP itself falls through. pass_cfg sees it as a normal inst. + * INTRIN_SETJMP falls through, so pass_cfg sees it as a normal inst. * * For scope ops the wrapper's recording assigns succ[] at emit time * (since it owns the vlabel→block_id mapping). pass_cfg trusts that @@ -36,13 +34,15 @@ static int is_terminator(const Inst* in) { case IR_CONDBR: case IR_CMP_BRANCH: case IR_RET: - case IR_LONGJMP: case IR_BREAK_TO: case IR_CONTINUE_TO: return 1; - case IR_INTRINSIC: - return in->extra.imm == INTRIN_TRAP || - in->extra.imm == INTRIN_UNREACHABLE; + case IR_INTRINSIC: { + IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; + return aux && (aux->kind == INTRIN_LONGJMP || + aux->kind == INTRIN_TRAP || + aux->kind == INTRIN_UNREACHABLE); + } default: return 0; } @@ -71,7 +71,6 @@ void opt_build_cfg(Func* f) { } switch ((IROp)last->op) { case IR_RET: - case IR_LONGJMP: bl->nsucc = 0; break; case IR_INTRINSIC: diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -698,8 +698,6 @@ static int side_effecting(IROp op) { case IR_VA_START: case IR_VA_END: case IR_VA_COPY: - case IR_SETJMP: - case IR_LONGJMP: case IR_ATOMIC_STORE: case IR_ATOMIC_RMW: case IR_ATOMIC_CAS: