commit ab11c06f26a5bd56cb96a76ff54c04d6ecbd44ea
parent 2bdeb46f9c551570bc731997aad31fc6991e5449
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 26 May 2026 17:53:25 -0700
ir,opt,cg: define local-static-data IR ops, restructure opt pipeline boundary
Add four new IR ops — IR_LOCAL_STATIC_DATA_{BEGIN,WRITE,LABEL_ADDR,END} — so
functions can embed compile-time data (e.g. jump tables) directly within their
code. Restructure the opt/cg boundary: opt.h replaces the old opt_cgtarget
interface with opt_func_from_cg_ir(), which takes a completed CgIrFunc
recording as input. cgtarget.h adds supports_label_table() so switch lowering
can decide whether jump-table dispatch is viable per target.
Diffstat:
6 files changed, 268 insertions(+), 76 deletions(-)
diff --git a/include/cfree/config.h b/include/cfree/config.h
@@ -24,10 +24,10 @@
*/
/* Backend architectures. */
-#define CFREE_ARCH_AA64_ENABLED 0
+#define CFREE_ARCH_AA64_ENABLED 1
#define CFREE_ARCH_X64_ENABLED 0
#define CFREE_ARCH_RV64_ENABLED 0
-#define CFREE_ARCH_WASM_ENABLED 0
+#define CFREE_ARCH_WASM_ENABLED 1
#define CFREE_ARCH_C_TARGET_ENABLED 1
/* Object/image formats. Each gates emit + read + link-image paths and
@@ -54,7 +54,7 @@
/* Optimizer pipeline. -O0/direct codegen is always available; -O1 and above
* require this flag and the matching src/opt sources. */
-#define CFREE_OPT_ENABLED 0
+#define CFREE_OPT_ENABLED 1
/* Optional library subsystems. These are kept separate from driver tool flags:
* libcfree embedders care mostly about whether a public subsystem and its
diff --git a/src/cg/cgtarget.h b/src/cg/cgtarget.h
@@ -405,8 +405,7 @@ struct CgTarget {
/* ---- locals ---- */
CGLocal (*local)(CgTarget*, const CGLocalDesc*);
- void (*local_addr)(CgTarget*, Operand dst, const CGLocalDesc*,
- CGLocal);
+ void (*local_addr)(CgTarget*, Operand dst, const CGLocalDesc*, CGLocal);
CGLocal (*param)(CgTarget*, const CGParamDesc*);
/* ---- labels and control flow ---- */
@@ -432,6 +431,15 @@ struct CgTarget {
* cg_lower_switch_default. */
void (*switch_)(CgTarget*, const CGSwitchDesc*);
+ /* Optional. When non-NULL and it returns 0, the target cannot realize a
+ * jump-table dispatch built from a rodata table of code-label addresses
+ * (Wasm: linear memory holds no code addresses and there is no computed
+ * branch). cfree_cg_switch then routes dense/forced-table plans through
+ * `switch_` (e.g. br_table) instead of the label-table + indirect_branch
+ * lowering. NULL means the label-table path is supported (every native
+ * arch). */
+ int (*supports_label_table)(CgTarget*);
+
/* Indirect branch primitive: transfer control to the address in
* `addr` (an OPK_LOCAL holding a function-local label address).
*
@@ -443,8 +451,8 @@ struct CgTarget {
* can resolve to. Backends use it for branch-target hardening (BTI,
* PAC, x86 CFG, IBT) and opt uses it to build the CFG; opt requires
* ntargets > 0. */
- void (*indirect_branch)(CgTarget*, Operand addr,
- const Label* valid_targets, u32 ntargets);
+ void (*indirect_branch)(CgTarget*, Operand addr, const Label* valid_targets,
+ u32 ntargets);
/* Materialize the runtime address of a function-local label into
* `dst`. The label must already exist (label_new); it does not
@@ -473,8 +481,8 @@ struct CgTarget {
* returned target-specific message before reaching object-data emission. Lets
* targets that cannot resolve function-local label addresses in
* static-data initializers (e.g. the Wasm backend) fail with a
- * recognizable, target-prefixed diagnostic. The returned string must remain valid for
- * the lifetime of the panic call (string literals are typical). */
+ * recognizable, target-prefixed diagnostic. The returned string must remain
+ * valid for the lifetime of the panic call (string literals are typical). */
const char* (*data_label_addr_unsupported_msg)(CgTarget*);
/* ---- structured control flow ----
@@ -509,7 +517,8 @@ struct CgTarget {
* backend chooses the TLS model (LE/IE/LD/GD) from c->target and the
* symbol's visibility. Subsequent accesses go through OPK_INDIRECT on the
* resulting pointer; this lets opt hoist the materialization via LICM. */
- void (*tls_addr_of)(CgTarget*, Operand dst /*LOCAL*/, ObjSymId sym, i64 addend);
+ void (*tls_addr_of)(CgTarget*, Operand dst /*LOCAL*/, ObjSymId sym,
+ i64 addend);
void (*copy_bytes)(CgTarget*, Operand dst_addr, Operand src_addr,
AggregateAccess);
void (*set_bytes)(CgTarget*, Operand dst_addr, Operand byte_value,
@@ -584,9 +593,10 @@ struct CgTarget {
MemOrder);
void (*atomic_rmw)(CgTarget*, AtomicOp, Operand dst /*LOCAL: prior value*/,
Operand addr, Operand val, MemAccess, MemOrder);
- void (*atomic_cas)(CgTarget*, Operand prior /*LOCAL*/, Operand ok /*LOCAL, i1*/,
- Operand addr, Operand expected, Operand desired, MemAccess,
- MemOrder success, MemOrder failure);
+ void (*atomic_cas)(CgTarget*, Operand prior /*LOCAL*/,
+ Operand ok /*LOCAL, i1*/, Operand addr, Operand expected,
+ Operand desired, MemAccess, MemOrder success,
+ MemOrder failure);
void (*fence)(CgTarget*, MemOrder);
/* ---- compiler intrinsics ----
@@ -606,8 +616,8 @@ struct CgTarget {
* UNREACHABLE / TRAP : dsts none; args none
* SETJMP : dsts[0] LOCAL i32 result; args = (&buf)
* LONGJMP : dsts none; args = (&buf, val); no return
- * ADD/SUB/MUL_OVERFLOW : dsts[0] LOCAL result, dsts[1] LOCAL i1 overflow;
- * args = (a, b)
+ * ADD/SUB/MUL_OVERFLOW : dsts[0] LOCAL result, dsts[1] LOCAL i1
+ * overflow; args = (a, b)
*
* Backends that lack an inline sequence for a given kind may emit a
* normal IR_CALL-shaped sequence to a runtime entry (e.g. memcpy) — the
diff --git a/src/cg/internal.h b/src/cg/internal.h
@@ -361,8 +361,8 @@ Operand api_op_imm(i64 v, CfreeCgTypeId ty);
Operand api_op_local(CGLocal r, CfreeCgTypeId ty);
Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty);
Operand api_op_indirect(CGLocal base, i32 ofs, CfreeCgTypeId ty);
-Operand api_op_indirect_indexed(CGLocal base, CGLocal index, u8 log2_scale, i32 ofs,
- CfreeCgTypeId ty);
+Operand api_op_indirect_indexed(CGLocal base, CGLocal index, u8 log2_scale,
+ i32 ofs, CfreeCgTypeId ty);
u8 api_residency_for(const Operand* o);
ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty);
ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty);
@@ -373,7 +373,7 @@ ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty,
ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty,
int a_owned, int b_owned);
ApiSValue api_make_sv_with_local_ownership(Operand op, CfreeCgTypeId ty,
- int owned);
+ int owned);
CfreeCgTypeId api_sv_type(const ApiSValue* sv);
int api_operand_can_address(const Operand* o);
int api_sv_op_is(const ApiSValue* sv, OpKind kind);
@@ -457,6 +457,8 @@ CGLocal api_f128_temp_local(CfreeCg* g, CfreeCgTypeId ty);
u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes);
void api_store_f128_bytes(CfreeCg* g, CGLocal local, CfreeCgTypeId ty,
const u8 bytes[16]);
+void api_wide16_sext_imm_bytes(CfreeCg* g, i64 imm, u8 bytes[16]);
+ApiSValue api_make_wide16_int_const(CfreeCg* g, i64 value, CfreeCgTypeId ty);
void api_encode_binary128_from_double(CfreeCg* g, double value, u8 out[16]);
ApiSValue api_make_f128_const(CfreeCg* g, double value, CfreeCgTypeId ty);
ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v,
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -1,10 +1,225 @@
#ifndef CFREE_IR_H
#define CFREE_IR_H
-#include "arch/arch.h"
+#include "arch/native_target.h"
#include "core/arena.h"
#include "core/core.h"
+/* Optimizer-private physical/virtual operand model.
+ *
+ * The semantic CG API now exposes only CGLocal/Label/CgTarget concepts.
+ * O1 still needs a mutable pseudo-register and frame-slot view for liveness,
+ * allocation, and MIR. Keep those names local to src/opt by remapping the old
+ * optimizer tokens after including the semantic headers. Do not move these
+ * fields back into cg/cgtarget.h. */
+typedef NativeFrameSlot OptFrameSlot;
+#define FrameSlot OptFrameSlot
+#define FRAME_SLOT_NONE NATIVE_FRAME_SLOT_NONE
+
+typedef NativeFrameSlotKind OptFrameSlotKind;
+#define FrameSlotKind OptFrameSlotKind
+#define FS_LOCAL NATIVE_FRAME_SLOT_LOCAL
+#define FS_PARAM NATIVE_FRAME_SLOT_PARAM
+#define FS_SPILL NATIVE_FRAME_SLOT_SPILL
+#define FS_ALLOCA NATIVE_FRAME_SLOT_ALLOCA
+#define FS_OUTGOING NATIVE_FRAME_SLOT_OUTGOING
+#define FS_SAVE NATIVE_FRAME_SLOT_SAVE
+
+typedef NativeFrameSlotFlag OptFrameSlotFlag;
+#define FSF_ADDR_TAKEN NATIVE_FRAME_SLOT_ADDR_TAKEN
+#define FSF_MEMORY_REQUIRED NATIVE_FRAME_SLOT_MEMORY_REQUIRED
+#define FSF_FIXED_OFFSET NATIVE_FRAME_SLOT_FIXED_OFFSET
+#define FSF_VOLATILE (1u << 8)
+
+typedef NativeFrameSlotDesc OptFrameSlotDesc;
+#define FrameSlotDesc OptFrameSlotDesc
+
+typedef NativeKnownFrameDesc OptCGKnownFrameDesc;
+#define CGKnownFrameDesc OptCGKnownFrameDesc
+
+typedef NativeAllocClass RegClass;
+#define RC_INT NATIVE_REG_INT
+#define RC_FP NATIVE_REG_FP
+#define RC_VEC NATIVE_REG_VEC
+
+#define CG_REG_ALLOCABLE NATIVE_REG_ALLOCABLE
+#define CG_REG_CALLER_SAVED NATIVE_REG_CALLER_SAVED
+#define CG_REG_CALLEE_SAVED NATIVE_REG_CALLEE_SAVED
+#define CG_REG_ARG NATIVE_REG_ARG
+#define CG_REG_RET NATIVE_REG_RET
+#define CG_REG_RESERVED NATIVE_REG_RESERVED
+
+typedef NativePhysRegInfo OptCGPhysRegInfo;
+#define CGPhysRegInfo OptCGPhysRegInfo
+
+typedef enum OptOperandKind {
+ OPT_OPK_IMM = OPK_IMM,
+ OPT_OPK_LOCAL = OPK_LOCAL,
+ OPT_OPK_GLOBAL = OPK_GLOBAL,
+ OPT_OPK_INDIRECT = OPK_INDIRECT,
+ OPT_OPK_REG = 0xf0u,
+} OptOperandKind;
+#define OPK_REG OPT_OPK_REG
+
+typedef struct OptOperand {
+ u8 kind;
+ u8 cls;
+ u8 pad[2];
+ CfreeCgTypeId type;
+ union {
+ i64 imm;
+ Reg reg;
+ FrameSlot frame_slot;
+ CGLocal local;
+ struct {
+ ObjSymId sym;
+ i64 addend;
+ } global;
+ struct {
+ Reg base;
+ Reg index;
+ u8 log2_scale;
+ i32 ofs;
+ } ind;
+ } v;
+} OptOperand;
+#define Operand OptOperand
+
+typedef enum OptCGLocalStorageKind {
+ CG_LOCAL_STORAGE_FRAME,
+ CG_LOCAL_STORAGE_REG,
+} OptCGLocalStorageKind;
+
+typedef struct OptCGLocalStorage {
+ u8 kind;
+ u8 pad[3];
+ union {
+ Reg reg;
+ FrameSlot frame_slot;
+ } v;
+} OptCGLocalStorage;
+#define CGLocalStorage OptCGLocalStorage
+#define CGLocalStorageKind OptCGLocalStorageKind
+
+typedef struct OptCGABIPart {
+ Operand op;
+ u32 offset;
+} OptCGABIPart;
+#define CGABIPart OptCGABIPart
+
+typedef struct OptCGABIValue {
+ CfreeCgTypeId type;
+ const ABIArgInfo* abi;
+ Operand storage;
+ CGABIPart* parts;
+ u32 nparts;
+} OptCGABIValue;
+#define CGABIValue OptCGABIValue
+
+typedef enum OptCGCallPlanMoveKind {
+ CG_CALL_PLAN_REG,
+ CG_CALL_PLAN_STACK,
+ CG_CALL_PLAN_TAIL_STACK,
+ CG_CALL_PLAN_SRC_VALUE,
+ CG_CALL_PLAN_SRC_ADDR,
+} OptCGCallPlanMoveKind;
+
+typedef struct OptCGCallPlanMove {
+ Operand src;
+ Operand dst;
+ MemAccess mem;
+ u32 src_offset;
+ u32 stack_offset;
+ Reg dst_reg;
+ u8 src_kind;
+ u8 dst_kind;
+ u8 cls;
+ u8 pad;
+} OptCGCallPlanMove;
+#define CGCallPlanMove OptCGCallPlanMove
+
+typedef struct OptCGCallPlanRet {
+ Operand dst;
+ MemAccess mem;
+ u32 dst_offset;
+ Reg src_reg;
+ u8 cls;
+ u8 pad[3];
+} OptCGCallPlanRet;
+#define CGCallPlanRet OptCGCallPlanRet
+
+typedef struct OptCGCallPlan {
+ Operand callee;
+ CGCallPlanMove* args;
+ CGCallPlanRet* rets;
+ u32 nargs;
+ u32 nrets;
+ u32 stack_arg_size;
+ u32 clobber_mask[3];
+ u32 return_mask[3];
+ u16 flags;
+ u8 has_sret;
+ u8 is_variadic;
+} OptCGCallPlan;
+#define CGCallPlan OptCGCallPlan
+
+typedef struct OptCGParamDesc {
+ u32 index;
+ Sym name;
+ CfreeCgTypeId type;
+ u32 size;
+ u32 align;
+ u32 flags;
+ SrcLoc loc;
+ CGLocalStorage storage;
+ const ABIArgInfo* abi;
+ const CGABIPart* incoming;
+ u32 nincoming;
+} OptCGParamDesc;
+#define CGParamDesc OptCGParamDesc
+
+typedef struct OptCGCallDesc {
+ CfreeCgTypeId fn_type;
+ Operand callee;
+ CGABIValue* args;
+ CGABIValue ret;
+ u32 nargs;
+ u16 flags;
+ u8 tail_policy;
+ u8 pad;
+ CfreeCgInlinePolicy inline_policy;
+ const ABIFuncInfo* abi;
+} OptCGCallDesc;
+#define CGCallDesc OptCGCallDesc
+
+typedef struct OptCGFuncDesc {
+ ObjSymId sym;
+ ObjSecId text_section_id;
+ ObjGroupId group_id;
+ CfreeCgTypeId fn_type;
+ const CfreeCgTypeId* result_types;
+ const CGParamDesc* params;
+ u32 nresults;
+ u32 nparams;
+ SrcLoc loc;
+ u32 flags;
+ CfreeCgInlinePolicy inline_policy;
+ u8 atomize;
+ u8 pad[3];
+ const ABIFuncInfo* abi;
+} OptCGFuncDesc;
+#define CGFuncDesc OptCGFuncDesc
+
+typedef struct OptCGScopeDesc {
+ u8 kind;
+ u8 pad[3];
+ Label break_label;
+ Label continue_label;
+ Operand cond;
+ CfreeCgTypeId result_type;
+} OptCGScopeDesc;
+#define CGScopeDesc OptCGScopeDesc
+
/* SSA value id. VAL_NONE=0 is reserved as a sentinel. Recorded CG virtual
* registers live in Func's pseudo-register table; before pseudo-reg SSA,
* OPK_REG operands carry those mutable Reg ids. After pseudo-reg SSA, OPK_REG
@@ -73,12 +288,16 @@ typedef enum IROp {
IR_INDIRECT_BRANCH, /* opnds[0] = addr REG; extra.aux = IRIndirectAux.
succ[0..nvalid) = the valid target blocks. */
IR_LOAD_LABEL_ADDR, /* opnds[0] dst REG; extra.imm = target block id. */
- IR_RET, /* extra.aux = IRRetAux* (NULL for void). */
- IR_SCOPE_BEGIN, /* extra.aux = IRScopeAux. */
- IR_SCOPE_ELSE, /* extra.imm = scope id (Val). */
- IR_SCOPE_END, /* extra.imm = scope id (Val). */
- IR_BREAK_TO, /* extra.imm = scope id (Val). */
- IR_CONTINUE_TO, /* extra.imm = scope id (Val). */
+ IR_LOCAL_STATIC_DATA_BEGIN, /* extra.aux = CgIrLocalStaticBeginAux */
+ IR_LOCAL_STATIC_DATA_WRITE, /* extra.aux = CgIrLocalStaticWriteAux */
+ IR_LOCAL_STATIC_DATA_LABEL_ADDR, /* extra.aux = CgIrLocalStaticLabelAux */
+ IR_LOCAL_STATIC_DATA_END,
+ IR_RET, /* extra.aux = IRRetAux* (NULL for void). */
+ IR_SCOPE_BEGIN, /* extra.aux = IRScopeAux. */
+ IR_SCOPE_ELSE, /* extra.imm = scope id (Val). */
+ IR_SCOPE_END, /* extra.imm = scope id (Val). */
+ IR_BREAK_TO, /* extra.imm = scope id (Val). */
+ IR_CONTINUE_TO, /* extra.imm = scope id (Val). */
/* alloca / variadics. */
IR_ALLOCA, /* opnds = [dst REG, size]; extra.imm = align */
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -2,31 +2,15 @@
#define CFREE_OPT_H
#include "arch/arch.h"
+#include "arch/native_target.h"
+#include "cg/ir.h"
#include "opt/ir.h"
-/* opt_cgtarget: a CGTarget wrapper that records each function as IR.
- *
- * - opt_cgtarget advertises virtual_regs. CG mints unbounded virtual Reg ids
- * through the shared simple allocator and passes them to normal emit calls.
- * - Every other emit-side call is recorded into the current block as one
- * SSA Inst (with the current SrcLoc from set_loc).
- * - On CGTarget.func_end, level 1 immediately runs the lowering pipeline and
- * emits; level 2 retains the raw Func in a per-TU set.
- * - On CGTarget.finalize, level 2 runs inter-procedural passes (inlining),
- * then for each Func runs O2 cleanup/pre-lowering and machinize → live →
- * coalesce → RA → combine → DCE → prolog/epilog → translate, driving the
- * wrapped target CGTarget.
- *
- * No machine code is in `obj` until the driver calls cgtarget_finalize.
- * Drivers must call it before reading `obj` or invoking debug_emit.
- *
- * Owns `target` and frees it via cgtarget_free(target) on its own destroy.
- *
- * level:
- * 0 — caller should not use opt_cgtarget at all (drive target directly).
- * 1 — minimal: combine + DCE during lowering. No SSA passes. No inlining.
- * 2 — full pipeline below. Inlining enabled. */
-CGTarget* opt_cgtarget_new(Compiler*, CGTarget* target, int level);
+/* O1 input boundary: semantic cg/ir.h is recorded once, then lowered into the
+ * optimizer-private Func/PReg view. During the O2 cutover window every
+ * opt_level >= 1 is normalized internally to this O1 path. */
+CgTarget* opt_cgtarget_new(Compiler*, CgTarget* target, int level);
+Func* opt_func_from_cg_ir(Compiler*, const CgIrFunc*);
/* ----- intra-procedural passes (run per retained Func at finalize on -O2)
* ----- */
@@ -59,28 +43,9 @@ void opt_ssa_combine(Func*);
void opt_undo_ssa(Func*);
void opt_jump_opt(Func*);
-/* ----- inter-procedural passes (run on the whole Func set at finalize) -----
- */
-typedef struct FuncSet FuncSet;
-
-/* Walks the call graph bottom-up. For each caller, inlines callees that fit
- * the size/heuristic budget, marks the caller dirty, and queues it for
- * opt_cleanup. SCCs (mutual recursion) are skipped for v1.
- *
- * Iteration count is bounded by `max_iters` (driver knob `-finline-iters=N`,
- * default 1; cap is enforced by opt_cgtarget). */
-void opt_inline(FuncSet*, int max_iters);
-
-/* Full O2 pre-lowering cleanup: CFG cleanup, pseudo-reg SSA, mem2reg SSA,
- * value/memory/loop passes, conventional SSA lowering, SSA destruction, and
- * jump optimization. */
-void opt_cleanup(Func*);
-
-/* ----- lowering / backend prep (per Func, run before driving target CGTarget)
+/* ----- lowering / backend prep (per Func, run before NativeTarget emission)
* ----- */
-/* Machine-dependent ABI lowering, 2-op insns, etc. Implemented per-arch and
- * per-OS, so it takes the full Target. */
-void opt_machinize(Func*, CGTarget* target);
+void opt_machinize_native(Func*, NativeTarget* target);
void opt_build_loop_tree(Func*);
typedef struct OptBitset {
@@ -180,15 +145,13 @@ void opt_dce(Func*); /* post-RA DCE */
void opt_dead_def_elim(Func*); /* pre-RA dead-definition elimination */
void opt_dead_def_elim_with_live(Func*, const OptLiveInfo*);
-/* Walks the lowered IR and drives a target CGTarget to emit machine code into
- * its ObjBuilder. Inserts prolog/epilog. Splits long insns where the target
- * needs. Stamps each emitted insn's SrcLoc onto target via CGTarget.set_loc. */
-void opt_emit(Compiler*, Func*, CGTarget* target);
+/* Walks the lowered MIR and drives the physical native backend. */
+void opt_emit_native(Compiler*, Func*, NativeTarget* target);
/* When set, the wrapper writes a textual dump of each function's recorded
* tape to `w` on func_end, immediately before replay. Pass `w == NULL` to
* disable. The format is line-oriented and stable enough for golden-file
* diffs but otherwise unspecified. No-op if `t` is not an opt_cgtarget. */
-void opt_set_dump_writer(CGTarget* t, Writer* w);
+void opt_set_dump_writer(CgTarget* t, Writer* w);
#endif
diff --git a/src/opt/opt_internal.h b/src/opt/opt_internal.h
@@ -169,6 +169,4 @@ int opt_block_live_out_has_phys_reg(Func*, const OptHardBlockLive*, u32 block,
const Operand*);
void opt_coalesce_ranges(Func*, const OptLiveRangeSet*);
-void opt_replay(Compiler*, Func*, CGTarget* target);
-
#endif