kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ab11c06f26a5bd56cb96a76ff54c04d6ecbd44ea
parent 2bdeb46f9c551570bc731997aad31fc6991e5449
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 26 May 2026 17:53:25 -0700

ir,opt,cg: define local-static-data IR ops, restructure opt pipeline boundary

Add four new IR ops — IR_LOCAL_STATIC_DATA_{BEGIN,WRITE,LABEL_ADDR,END} — so
functions can embed compile-time data (e.g. jump tables) directly within their
code. Restructure the opt/cg boundary: opt.h replaces the old opt_cgtarget
interface with opt_func_from_cg_ir(), which takes a completed CgIrFunc
recording as input. cgtarget.h adds supports_label_table() so switch lowering
can decide whether jump-table dispatch is viable per target.

Diffstat:
Minclude/cfree/config.h | 6+++---
Msrc/cg/cgtarget.h | 34++++++++++++++++++++++------------
Msrc/cg/internal.h | 8+++++---
Msrc/opt/ir.h | 233++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/opt/opt.h | 61++++++++++++-------------------------------------------------
Msrc/opt/opt_internal.h | 2--
6 files changed, 268 insertions(+), 76 deletions(-)

diff --git a/include/cfree/config.h b/include/cfree/config.h @@ -24,10 +24,10 @@ */ /* Backend architectures. */ -#define CFREE_ARCH_AA64_ENABLED 0 +#define CFREE_ARCH_AA64_ENABLED 1 #define CFREE_ARCH_X64_ENABLED 0 #define CFREE_ARCH_RV64_ENABLED 0 -#define CFREE_ARCH_WASM_ENABLED 0 +#define CFREE_ARCH_WASM_ENABLED 1 #define CFREE_ARCH_C_TARGET_ENABLED 1 /* Object/image formats. Each gates emit + read + link-image paths and @@ -54,7 +54,7 @@ /* Optimizer pipeline. -O0/direct codegen is always available; -O1 and above * require this flag and the matching src/opt sources. */ -#define CFREE_OPT_ENABLED 0 +#define CFREE_OPT_ENABLED 1 /* Optional library subsystems. These are kept separate from driver tool flags: * libcfree embedders care mostly about whether a public subsystem and its diff --git a/src/cg/cgtarget.h b/src/cg/cgtarget.h @@ -405,8 +405,7 @@ struct CgTarget { /* ---- locals ---- */ CGLocal (*local)(CgTarget*, const CGLocalDesc*); - void (*local_addr)(CgTarget*, Operand dst, const CGLocalDesc*, - CGLocal); + void (*local_addr)(CgTarget*, Operand dst, const CGLocalDesc*, CGLocal); CGLocal (*param)(CgTarget*, const CGParamDesc*); /* ---- labels and control flow ---- */ @@ -432,6 +431,15 @@ struct CgTarget { * cg_lower_switch_default. */ void (*switch_)(CgTarget*, const CGSwitchDesc*); + /* Optional. When non-NULL and it returns 0, the target cannot realize a + * jump-table dispatch built from a rodata table of code-label addresses + * (Wasm: linear memory holds no code addresses and there is no computed + * branch). cfree_cg_switch then routes dense/forced-table plans through + * `switch_` (e.g. br_table) instead of the label-table + indirect_branch + * lowering. NULL means the label-table path is supported (every native + * arch). */ + int (*supports_label_table)(CgTarget*); + /* Indirect branch primitive: transfer control to the address in * `addr` (an OPK_LOCAL holding a function-local label address). * @@ -443,8 +451,8 @@ struct CgTarget { * can resolve to. Backends use it for branch-target hardening (BTI, * PAC, x86 CFG, IBT) and opt uses it to build the CFG; opt requires * ntargets > 0. */ - void (*indirect_branch)(CgTarget*, Operand addr, - const Label* valid_targets, u32 ntargets); + void (*indirect_branch)(CgTarget*, Operand addr, const Label* valid_targets, + u32 ntargets); /* Materialize the runtime address of a function-local label into * `dst`. The label must already exist (label_new); it does not @@ -473,8 +481,8 @@ struct CgTarget { * returned target-specific message before reaching object-data emission. Lets * targets that cannot resolve function-local label addresses in * static-data initializers (e.g. the Wasm backend) fail with a - * recognizable, target-prefixed diagnostic. The returned string must remain valid for - * the lifetime of the panic call (string literals are typical). */ + * recognizable, target-prefixed diagnostic. The returned string must remain + * valid for the lifetime of the panic call (string literals are typical). */ const char* (*data_label_addr_unsupported_msg)(CgTarget*); /* ---- structured control flow ---- @@ -509,7 +517,8 @@ struct CgTarget { * backend chooses the TLS model (LE/IE/LD/GD) from c->target and the * symbol's visibility. Subsequent accesses go through OPK_INDIRECT on the * resulting pointer; this lets opt hoist the materialization via LICM. */ - void (*tls_addr_of)(CgTarget*, Operand dst /*LOCAL*/, ObjSymId sym, i64 addend); + void (*tls_addr_of)(CgTarget*, Operand dst /*LOCAL*/, ObjSymId sym, + i64 addend); void (*copy_bytes)(CgTarget*, Operand dst_addr, Operand src_addr, AggregateAccess); void (*set_bytes)(CgTarget*, Operand dst_addr, Operand byte_value, @@ -584,9 +593,10 @@ struct CgTarget { MemOrder); void (*atomic_rmw)(CgTarget*, AtomicOp, Operand dst /*LOCAL: prior value*/, Operand addr, Operand val, MemAccess, MemOrder); - void (*atomic_cas)(CgTarget*, Operand prior /*LOCAL*/, Operand ok /*LOCAL, i1*/, - Operand addr, Operand expected, Operand desired, MemAccess, - MemOrder success, MemOrder failure); + void (*atomic_cas)(CgTarget*, Operand prior /*LOCAL*/, + Operand ok /*LOCAL, i1*/, Operand addr, Operand expected, + Operand desired, MemAccess, MemOrder success, + MemOrder failure); void (*fence)(CgTarget*, MemOrder); /* ---- compiler intrinsics ---- @@ -606,8 +616,8 @@ struct CgTarget { * UNREACHABLE / TRAP : dsts none; args none * SETJMP : dsts[0] LOCAL i32 result; args = (&buf) * LONGJMP : dsts none; args = (&buf, val); no return - * ADD/SUB/MUL_OVERFLOW : dsts[0] LOCAL result, dsts[1] LOCAL i1 overflow; - * args = (a, b) + * ADD/SUB/MUL_OVERFLOW : dsts[0] LOCAL result, dsts[1] LOCAL i1 + * overflow; args = (a, b) * * Backends that lack an inline sequence for a given kind may emit a * normal IR_CALL-shaped sequence to a runtime entry (e.g. memcpy) — the diff --git a/src/cg/internal.h b/src/cg/internal.h @@ -361,8 +361,8 @@ Operand api_op_imm(i64 v, CfreeCgTypeId ty); Operand api_op_local(CGLocal r, CfreeCgTypeId ty); Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty); Operand api_op_indirect(CGLocal base, i32 ofs, CfreeCgTypeId ty); -Operand api_op_indirect_indexed(CGLocal base, CGLocal index, u8 log2_scale, i32 ofs, - CfreeCgTypeId ty); +Operand api_op_indirect_indexed(CGLocal base, CGLocal index, u8 log2_scale, + i32 ofs, CfreeCgTypeId ty); u8 api_residency_for(const Operand* o); ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty); ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty); @@ -373,7 +373,7 @@ ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, int a_owned, int b_owned); ApiSValue api_make_sv_with_local_ownership(Operand op, CfreeCgTypeId ty, - int owned); + int owned); CfreeCgTypeId api_sv_type(const ApiSValue* sv); int api_operand_can_address(const Operand* o); int api_sv_op_is(const ApiSValue* sv, OpKind kind); @@ -457,6 +457,8 @@ CGLocal api_f128_temp_local(CfreeCg* g, CfreeCgTypeId ty); u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes); void api_store_f128_bytes(CfreeCg* g, CGLocal local, CfreeCgTypeId ty, const u8 bytes[16]); +void api_wide16_sext_imm_bytes(CfreeCg* g, i64 imm, u8 bytes[16]); +ApiSValue api_make_wide16_int_const(CfreeCg* g, i64 value, CfreeCgTypeId ty); void api_encode_binary128_from_double(CfreeCg* g, double value, u8 out[16]); ApiSValue api_make_f128_const(CfreeCg* g, double value, CfreeCgTypeId ty); ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v, diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -1,10 +1,225 @@ #ifndef CFREE_IR_H #define CFREE_IR_H -#include "arch/arch.h" +#include "arch/native_target.h" #include "core/arena.h" #include "core/core.h" +/* Optimizer-private physical/virtual operand model. + * + * The semantic CG API now exposes only CGLocal/Label/CgTarget concepts. + * O1 still needs a mutable pseudo-register and frame-slot view for liveness, + * allocation, and MIR. Keep those names local to src/opt by remapping the old + * optimizer tokens after including the semantic headers. Do not move these + * fields back into cg/cgtarget.h. */ +typedef NativeFrameSlot OptFrameSlot; +#define FrameSlot OptFrameSlot +#define FRAME_SLOT_NONE NATIVE_FRAME_SLOT_NONE + +typedef NativeFrameSlotKind OptFrameSlotKind; +#define FrameSlotKind OptFrameSlotKind +#define FS_LOCAL NATIVE_FRAME_SLOT_LOCAL +#define FS_PARAM NATIVE_FRAME_SLOT_PARAM +#define FS_SPILL NATIVE_FRAME_SLOT_SPILL +#define FS_ALLOCA NATIVE_FRAME_SLOT_ALLOCA +#define FS_OUTGOING NATIVE_FRAME_SLOT_OUTGOING +#define FS_SAVE NATIVE_FRAME_SLOT_SAVE + +typedef NativeFrameSlotFlag OptFrameSlotFlag; +#define FSF_ADDR_TAKEN NATIVE_FRAME_SLOT_ADDR_TAKEN +#define FSF_MEMORY_REQUIRED NATIVE_FRAME_SLOT_MEMORY_REQUIRED +#define FSF_FIXED_OFFSET NATIVE_FRAME_SLOT_FIXED_OFFSET +#define FSF_VOLATILE (1u << 8) + +typedef NativeFrameSlotDesc OptFrameSlotDesc; +#define FrameSlotDesc OptFrameSlotDesc + +typedef NativeKnownFrameDesc OptCGKnownFrameDesc; +#define CGKnownFrameDesc OptCGKnownFrameDesc + +typedef NativeAllocClass RegClass; +#define RC_INT NATIVE_REG_INT +#define RC_FP NATIVE_REG_FP +#define RC_VEC NATIVE_REG_VEC + +#define CG_REG_ALLOCABLE NATIVE_REG_ALLOCABLE +#define CG_REG_CALLER_SAVED NATIVE_REG_CALLER_SAVED +#define CG_REG_CALLEE_SAVED NATIVE_REG_CALLEE_SAVED +#define CG_REG_ARG NATIVE_REG_ARG +#define CG_REG_RET NATIVE_REG_RET +#define CG_REG_RESERVED NATIVE_REG_RESERVED + +typedef NativePhysRegInfo OptCGPhysRegInfo; +#define CGPhysRegInfo OptCGPhysRegInfo + +typedef enum OptOperandKind { + OPT_OPK_IMM = OPK_IMM, + OPT_OPK_LOCAL = OPK_LOCAL, + OPT_OPK_GLOBAL = OPK_GLOBAL, + OPT_OPK_INDIRECT = OPK_INDIRECT, + OPT_OPK_REG = 0xf0u, +} OptOperandKind; +#define OPK_REG OPT_OPK_REG + +typedef struct OptOperand { + u8 kind; + u8 cls; + u8 pad[2]; + CfreeCgTypeId type; + union { + i64 imm; + Reg reg; + FrameSlot frame_slot; + CGLocal local; + struct { + ObjSymId sym; + i64 addend; + } global; + struct { + Reg base; + Reg index; + u8 log2_scale; + i32 ofs; + } ind; + } v; +} OptOperand; +#define Operand OptOperand + +typedef enum OptCGLocalStorageKind { + CG_LOCAL_STORAGE_FRAME, + CG_LOCAL_STORAGE_REG, +} OptCGLocalStorageKind; + +typedef struct OptCGLocalStorage { + u8 kind; + u8 pad[3]; + union { + Reg reg; + FrameSlot frame_slot; + } v; +} OptCGLocalStorage; +#define CGLocalStorage OptCGLocalStorage +#define CGLocalStorageKind OptCGLocalStorageKind + +typedef struct OptCGABIPart { + Operand op; + u32 offset; +} OptCGABIPart; +#define CGABIPart OptCGABIPart + +typedef struct OptCGABIValue { + CfreeCgTypeId type; + const ABIArgInfo* abi; + Operand storage; + CGABIPart* parts; + u32 nparts; +} OptCGABIValue; +#define CGABIValue OptCGABIValue + +typedef enum OptCGCallPlanMoveKind { + CG_CALL_PLAN_REG, + CG_CALL_PLAN_STACK, + CG_CALL_PLAN_TAIL_STACK, + CG_CALL_PLAN_SRC_VALUE, + CG_CALL_PLAN_SRC_ADDR, +} OptCGCallPlanMoveKind; + +typedef struct OptCGCallPlanMove { + Operand src; + Operand dst; + MemAccess mem; + u32 src_offset; + u32 stack_offset; + Reg dst_reg; + u8 src_kind; + u8 dst_kind; + u8 cls; + u8 pad; +} OptCGCallPlanMove; +#define CGCallPlanMove OptCGCallPlanMove + +typedef struct OptCGCallPlanRet { + Operand dst; + MemAccess mem; + u32 dst_offset; + Reg src_reg; + u8 cls; + u8 pad[3]; +} OptCGCallPlanRet; +#define CGCallPlanRet OptCGCallPlanRet + +typedef struct OptCGCallPlan { + Operand callee; + CGCallPlanMove* args; + CGCallPlanRet* rets; + u32 nargs; + u32 nrets; + u32 stack_arg_size; + u32 clobber_mask[3]; + u32 return_mask[3]; + u16 flags; + u8 has_sret; + u8 is_variadic; +} OptCGCallPlan; +#define CGCallPlan OptCGCallPlan + +typedef struct OptCGParamDesc { + u32 index; + Sym name; + CfreeCgTypeId type; + u32 size; + u32 align; + u32 flags; + SrcLoc loc; + CGLocalStorage storage; + const ABIArgInfo* abi; + const CGABIPart* incoming; + u32 nincoming; +} OptCGParamDesc; +#define CGParamDesc OptCGParamDesc + +typedef struct OptCGCallDesc { + CfreeCgTypeId fn_type; + Operand callee; + CGABIValue* args; + CGABIValue ret; + u32 nargs; + u16 flags; + u8 tail_policy; + u8 pad; + CfreeCgInlinePolicy inline_policy; + const ABIFuncInfo* abi; +} OptCGCallDesc; +#define CGCallDesc OptCGCallDesc + +typedef struct OptCGFuncDesc { + ObjSymId sym; + ObjSecId text_section_id; + ObjGroupId group_id; + CfreeCgTypeId fn_type; + const CfreeCgTypeId* result_types; + const CGParamDesc* params; + u32 nresults; + u32 nparams; + SrcLoc loc; + u32 flags; + CfreeCgInlinePolicy inline_policy; + u8 atomize; + u8 pad[3]; + const ABIFuncInfo* abi; +} OptCGFuncDesc; +#define CGFuncDesc OptCGFuncDesc + +typedef struct OptCGScopeDesc { + u8 kind; + u8 pad[3]; + Label break_label; + Label continue_label; + Operand cond; + CfreeCgTypeId result_type; +} OptCGScopeDesc; +#define CGScopeDesc OptCGScopeDesc + /* SSA value id. VAL_NONE=0 is reserved as a sentinel. Recorded CG virtual * registers live in Func's pseudo-register table; before pseudo-reg SSA, * OPK_REG operands carry those mutable Reg ids. After pseudo-reg SSA, OPK_REG @@ -73,12 +288,16 @@ typedef enum IROp { IR_INDIRECT_BRANCH, /* opnds[0] = addr REG; extra.aux = IRIndirectAux. succ[0..nvalid) = the valid target blocks. */ IR_LOAD_LABEL_ADDR, /* opnds[0] dst REG; extra.imm = target block id. */ - IR_RET, /* extra.aux = IRRetAux* (NULL for void). */ - IR_SCOPE_BEGIN, /* extra.aux = IRScopeAux. */ - IR_SCOPE_ELSE, /* extra.imm = scope id (Val). */ - IR_SCOPE_END, /* extra.imm = scope id (Val). */ - IR_BREAK_TO, /* extra.imm = scope id (Val). */ - IR_CONTINUE_TO, /* extra.imm = scope id (Val). */ + IR_LOCAL_STATIC_DATA_BEGIN, /* extra.aux = CgIrLocalStaticBeginAux */ + IR_LOCAL_STATIC_DATA_WRITE, /* extra.aux = CgIrLocalStaticWriteAux */ + IR_LOCAL_STATIC_DATA_LABEL_ADDR, /* extra.aux = CgIrLocalStaticLabelAux */ + IR_LOCAL_STATIC_DATA_END, + IR_RET, /* extra.aux = IRRetAux* (NULL for void). */ + IR_SCOPE_BEGIN, /* extra.aux = IRScopeAux. */ + IR_SCOPE_ELSE, /* extra.imm = scope id (Val). */ + IR_SCOPE_END, /* extra.imm = scope id (Val). */ + IR_BREAK_TO, /* extra.imm = scope id (Val). */ + IR_CONTINUE_TO, /* extra.imm = scope id (Val). */ /* alloca / variadics. */ IR_ALLOCA, /* opnds = [dst REG, size]; extra.imm = align */ diff --git a/src/opt/opt.h b/src/opt/opt.h @@ -2,31 +2,15 @@ #define CFREE_OPT_H #include "arch/arch.h" +#include "arch/native_target.h" +#include "cg/ir.h" #include "opt/ir.h" -/* opt_cgtarget: a CGTarget wrapper that records each function as IR. - * - * - opt_cgtarget advertises virtual_regs. CG mints unbounded virtual Reg ids - * through the shared simple allocator and passes them to normal emit calls. - * - Every other emit-side call is recorded into the current block as one - * SSA Inst (with the current SrcLoc from set_loc). - * - On CGTarget.func_end, level 1 immediately runs the lowering pipeline and - * emits; level 2 retains the raw Func in a per-TU set. - * - On CGTarget.finalize, level 2 runs inter-procedural passes (inlining), - * then for each Func runs O2 cleanup/pre-lowering and machinize → live → - * coalesce → RA → combine → DCE → prolog/epilog → translate, driving the - * wrapped target CGTarget. - * - * No machine code is in `obj` until the driver calls cgtarget_finalize. - * Drivers must call it before reading `obj` or invoking debug_emit. - * - * Owns `target` and frees it via cgtarget_free(target) on its own destroy. - * - * level: - * 0 — caller should not use opt_cgtarget at all (drive target directly). - * 1 — minimal: combine + DCE during lowering. No SSA passes. No inlining. - * 2 — full pipeline below. Inlining enabled. */ -CGTarget* opt_cgtarget_new(Compiler*, CGTarget* target, int level); +/* O1 input boundary: semantic cg/ir.h is recorded once, then lowered into the + * optimizer-private Func/PReg view. During the O2 cutover window every + * opt_level >= 1 is normalized internally to this O1 path. */ +CgTarget* opt_cgtarget_new(Compiler*, CgTarget* target, int level); +Func* opt_func_from_cg_ir(Compiler*, const CgIrFunc*); /* ----- intra-procedural passes (run per retained Func at finalize on -O2) * ----- */ @@ -59,28 +43,9 @@ void opt_ssa_combine(Func*); void opt_undo_ssa(Func*); void opt_jump_opt(Func*); -/* ----- inter-procedural passes (run on the whole Func set at finalize) ----- - */ -typedef struct FuncSet FuncSet; - -/* Walks the call graph bottom-up. For each caller, inlines callees that fit - * the size/heuristic budget, marks the caller dirty, and queues it for - * opt_cleanup. SCCs (mutual recursion) are skipped for v1. - * - * Iteration count is bounded by `max_iters` (driver knob `-finline-iters=N`, - * default 1; cap is enforced by opt_cgtarget). */ -void opt_inline(FuncSet*, int max_iters); - -/* Full O2 pre-lowering cleanup: CFG cleanup, pseudo-reg SSA, mem2reg SSA, - * value/memory/loop passes, conventional SSA lowering, SSA destruction, and - * jump optimization. */ -void opt_cleanup(Func*); - -/* ----- lowering / backend prep (per Func, run before driving target CGTarget) +/* ----- lowering / backend prep (per Func, run before NativeTarget emission) * ----- */ -/* Machine-dependent ABI lowering, 2-op insns, etc. Implemented per-arch and - * per-OS, so it takes the full Target. */ -void opt_machinize(Func*, CGTarget* target); +void opt_machinize_native(Func*, NativeTarget* target); void opt_build_loop_tree(Func*); typedef struct OptBitset { @@ -180,15 +145,13 @@ void opt_dce(Func*); /* post-RA DCE */ void opt_dead_def_elim(Func*); /* pre-RA dead-definition elimination */ void opt_dead_def_elim_with_live(Func*, const OptLiveInfo*); -/* Walks the lowered IR and drives a target CGTarget to emit machine code into - * its ObjBuilder. Inserts prolog/epilog. Splits long insns where the target - * needs. Stamps each emitted insn's SrcLoc onto target via CGTarget.set_loc. */ -void opt_emit(Compiler*, Func*, CGTarget* target); +/* Walks the lowered MIR and drives the physical native backend. */ +void opt_emit_native(Compiler*, Func*, NativeTarget* target); /* When set, the wrapper writes a textual dump of each function's recorded * tape to `w` on func_end, immediately before replay. Pass `w == NULL` to * disable. The format is line-oriented and stable enough for golden-file * diffs but otherwise unspecified. No-op if `t` is not an opt_cgtarget. */ -void opt_set_dump_writer(CGTarget* t, Writer* w); +void opt_set_dump_writer(CgTarget* t, Writer* w); #endif diff --git a/src/opt/opt_internal.h b/src/opt/opt_internal.h @@ -169,6 +169,4 @@ int opt_block_live_out_has_phys_reg(Func*, const OptHardBlockLive*, u32 block, const Operand*); void opt_coalesce_ranges(Func*, const OptLiveRangeSet*); -void opt_replay(Compiler*, Func*, CGTarget* target); - #endif