kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 6042aecbf851abe32a7a0428bda3d13c803fb0cb
parent fe074537ddf0db080abc3cc1080993eaf382cc7d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 26 May 2026 09:32:45 -0700

Cut over Cg to semantic CgTarget

Diffstat:
Mdoc/CGTARGET.md | 29+++++++++++++++--------------
Mdriver/env.c | 7+++++++
Minclude/cfree/core.h | 4++++
Msrc/arch/arch.h | 970+------------------------------------------------------------------------------
Msrc/arch/c_target/emit.c | 1169+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Msrc/arch/c_target/internal.h | 49++++++++++++++++++++++---------------------------
Msrc/arch/c_target/target.c | 266+++++++++++++++++++------------------------------------------------------------
Msrc/arch/cgtarget.c | 48+++++++++++++++++++++---------------------------
Msrc/arch/check_target.c | 272+++++++++++++++++++------------------------------------------------------------
Msrc/arch/regalloc.c | 12++++++------
Msrc/arch/regalloc.h | 28+++++++++++++++++-----------
Msrc/arch/registry.c | 2+-
Msrc/cg/arith.c | 318++++++++++++++------------------------------------------------------------------
Msrc/cg/asm.c | 115+++++++++++++++++++++++--------------------------------------------------------
Msrc/cg/atomic.c | 66+++++++++++++++++++++---------------------------------------------
Msrc/cg/call.c | 359++++++++++++++++++++++++++++++-------------------------------------------------
Asrc/cg/cgtarget.h | 679+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/cg/control.c | 214++++++++++++++++++++++++++++++++++++++++----------------------------------------
Msrc/cg/data.c | 150+++++++++----------------------------------------------------------------------
Msrc/cg/internal.h | 113++++++++++++++++++++++++++-----------------------------------------------------
Msrc/cg/local.c | 35+++++------------------------------
Msrc/cg/memory.c | 459+++++++++++++++++++++++++++++++++++++------------------------------------------
Msrc/cg/session.c | 48++++++++++++++++--------------------------------
Msrc/cg/type.c | 2+-
Msrc/cg/value.c | 518+++++++++++++++++++------------------------------------------------------------
Msrc/cg/wide.c | 71++++++++++++++++++++++++++++++++++++++++-------------------------------
Msrc/core/core.c | 7+++++++
Mtest/test.mk | 7++++---
28 files changed, 2328 insertions(+), 3689 deletions(-)

diff --git a/doc/CGTARGET.md b/doc/CGTARGET.md @@ -73,7 +73,7 @@ recorded directly as clean lowered-CG IR: - immediates, globals, locals, and indirect addresses; - labels and structured scopes; - target-data-layout-specific memory accesses; -- ABI-shaped calls and returns; +- calls and returns as typed semantic locals, including multi-result returns; - aggregate, bitfield, atomic, vararg, intrinsic, and inline-asm operations; - sticky source locations. @@ -107,7 +107,7 @@ The semantic API still includes operations such as: - `binop`, `unop`, `cmp`, and `convert`; - labels, branches, switches, label-address materialization, indirect branches, and structured scopes; -- `call` and `ret` using ABI-shaped descriptors; +- `call` and `ret` using local-only semantic descriptors; - `alloca_`, `va_*`, atomics, fences, intrinsics, inline asm, and source location tracking. @@ -206,10 +206,10 @@ typedef struct NativeLocal { `NativeDirectTarget` then uses local greedy helpers: ```c -Reg materialize(NativeDirectTarget *, Operand op, RegClass cls); -Reg ensure_writable_reg(NativeDirectTarget *, CGLocal dst, RegClass cls); +Reg materialize(NativeDirectTarget *, Operand op, NativeAllocClass cls); +Reg ensure_writable_reg(NativeDirectTarget *, CGLocal dst, NativeAllocClass cls); void flush_local(NativeDirectTarget *, CGLocal local); -void spill_one(NativeDirectTarget *, RegClass cls); +void spill_one(NativeDirectTarget *, NativeAllocClass cls); ``` The cache policy can be simple: @@ -397,8 +397,8 @@ This interface owns the machine-level concerns removed from semantic Static register-file metadata belongs in `NativeRegInfo`: ```c -typedef struct NativeRegClassInfo { - RegClass cls; +typedef struct NativeAllocClassInfo { + NativeAllocClass cls; const Reg *allocable; u32 nallocable; @@ -414,16 +414,17 @@ typedef struct NativeRegClassInfo { u32 arg_mask; u32 ret_mask; u32 reserved_mask; -} NativeRegClassInfo; +} NativeAllocClassInfo; typedef struct NativeRegInfo { - const NativeRegClassInfo *classes; + const NativeAllocClassInfo *classes; u32 nclasses; int (*resolve_name)(const NativeRegInfo *, Sym name, Reg *out, - RegClass *cls_out); - const char *(*debug_name)(const NativeRegInfo *, RegClass cls, Reg reg); - u32 (*dwarf_reg)(const NativeRegInfo *, RegClass cls, Reg reg); + NativeAllocClass *cls_out); + const char *(*debug_name)(const NativeRegInfo *, NativeAllocClass cls, + Reg reg); + u32 (*dwarf_reg)(const NativeRegInfo *, NativeAllocClass cls, Reg reg); } NativeRegInfo; ``` @@ -432,7 +433,7 @@ assembly dialects later affect name resolution, the callback can take a small dialect context. Call-specific answers should not be static register metadata when they depend -on ABI, calling convention, variadic state, sret, vector ABI, or attributes. +on ABI, calling convention, variadic state, vector ABI, or attributes. Those belong to native call planning: ```text @@ -459,7 +460,7 @@ It still provides: - lvalue/rvalue conversion; - aggregate, bitfield, call, switch, computed-goto, vararg, alloca, and inline asm lowering; -- construction of ABI-shaped `CGCallDesc` and `CGABIValue` records; +- construction of local-only `CGCallDesc` records; - delayed semantic patterns such as delayed compares for branches; - a single diagnostic point for misuse of the public CG API; - a convenient frontend interface for simple non-C producers. diff --git a/driver/env.c b/driver/env.c @@ -1456,6 +1456,13 @@ void driver_logf(const char* fmt, ...) { fputc('\n', stderr); } +void cfree_debug_printf(const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + void driver_printf(const char* fmt, ...) { va_list ap; va_start(ap, fmt); diff --git a/include/cfree/core.h b/include/cfree/core.h @@ -324,6 +324,10 @@ CFREE_API CfreeSlice cfree_compiler_file_name(CfreeCompiler*, uint32_t file_id); CFREE_API CfreeSym cfree_sym_intern(CfreeCompiler*, CfreeSlice); CFREE_API CfreeSlice cfree_sym_str(CfreeCompiler*, CfreeSym); +/* Optional host-provided debug stream for temporary libcfree instrumentation. + * libcfree supplies a weak no-op fallback; hosted binaries may override it. */ +CFREE_API void cfree_debug_printf(const char* fmt, ...); + CFREE_API CfreeStatus cfree_writer_mem(CfreeHeap*, CfreeWriter** out); CFREE_API const uint8_t* cfree_writer_mem_bytes(CfreeWriter*, size_t* len_out); diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -6,539 +6,22 @@ #include <cfree/disasm.h> #include "abi/abi.h" +#include "cg/cgtarget.h" #include "core/core.h" #include "obj/obj.h" -/* Forward-declared so CGTarget can carry an optional Debug* without +/* Forward-declared so CgTarget can carry an optional Debug* without * pulling debug/debug.h into every translation unit that includes arch.h. * Per doc/DWARF.md §3.2 the backend gets exactly one new dependency on * Debug: this forward decl plus debug_emit_row (declared by the few * backend TUs that actually emit line rows). */ typedef struct Debug Debug; -/* Reg is wide enough for opt_cgtarget to hand out unbounded virtual registers - * (one per defined value). Target backends use only a small subset. */ +/* Native-only register id. The semantic CgTarget surface uses CGLocal; this + * remains here for MC/native helpers and disabled native backends. */ typedef u32 Reg; #define REG_NONE 0xffffffffu -/* Vector / SIMD forward compat: vector ops will arrive as new variants in - * the BinOp, UnOp, CmpOp, ConvKind families. Backend switches over these - * enums must use `default:` (unreachable / panic) rather than exhaustive - * case lists, so adding a new variant later does not silently mis-handle on - * backends that haven't been taught about it. Vector loads/stores reuse the - * existing load/store methods with vector-typed Operands and appropriate - * MemAccess. RegClass may grow mask/predicate subclasses (e.g. AVX-512 - * k-regs, SVE predicate regs) — Operand.cls is u8 and has room. */ -typedef enum RegClass { - RC_INT, - RC_FP, - RC_VEC, -} RegClass; - -typedef enum BinOp { - BO_IADD, - BO_ISUB, - BO_IMUL, - BO_SDIV, - BO_UDIV, - BO_SREM, - BO_UREM, - BO_FADD, - BO_FSUB, - BO_FMUL, - BO_FDIV, - BO_AND, - BO_OR, - BO_XOR, - BO_SHL, - BO_SHR_S, - BO_SHR_U, -} BinOp; - -typedef enum UnOp { - UO_NEG, - UO_FNEG, - UO_NOT, /* logical: 0/1 */ - UO_BNOT, /* bitwise ~ */ -} UnOp; - -typedef enum CmpOp { - CMP_EQ, - CMP_NE, - CMP_LT_S, - CMP_LE_S, - CMP_GT_S, - CMP_GE_S, - CMP_LT_U, - CMP_LE_U, - CMP_GT_U, - CMP_GE_U, - CMP_LT_F, - CMP_LE_F, - CMP_GT_F, - CMP_GE_F, -} CmpOp; - -typedef enum ConvKind { - CV_SEXT, - CV_ZEXT, - CV_TRUNC, - CV_ITOF_S, - CV_ITOF_U, - CV_FTOI_S, - CV_FTOI_U, - CV_FEXT, - CV_FTRUNC, - CV_BITCAST, -} ConvKind; - -typedef enum AtomicOp { - AO_XCHG, - AO_ADD, - AO_SUB, - AO_AND, - AO_OR, - AO_XOR, - AO_NAND, -} AtomicOp; - -typedef enum MemOrder { - MO_RELAXED, - MO_CONSUME, - MO_ACQUIRE, - MO_RELEASE, - MO_ACQ_REL, - MO_SEQ_CST, -} MemOrder; - -/* Compiler-intrinsic kinds dispatched through CGTarget.intrinsic and carried - * on IR_INTRINSIC via IRIntrinAux.kind. The set is bounded: a backend - * must know each one to choose inline-vs-libcall. Hint intrinsics - * (EXPECT/UNREACHABLE/TRAP/PREFETCH/ASSUME_ALIGNED) ride the same dispatch: - * the backend decides whether they emit an instruction or a no-op. - * - * Not every C builtin lives here. Parser-evaluated builtins - * (__builtin_offsetof, __builtin_constant_p, __builtin_choose_expr, - * __builtin_types_compatible_p) fold at parse and never reach IR. Builtins - * that already have dedicated CGTarget methods (alloca, va_*, atomics) keep - * them. Returns-twice and no-return control intrinsics use this dispatch so - * opt can preserve their CFG effects without growing backend vtable hooks. */ -typedef enum IntrinKind { - INTRIN_NONE = 0, - - /* bit ops */ - INTRIN_POPCOUNT, - INTRIN_CTZ, - INTRIN_CLZ, - INTRIN_BSWAP16, - INTRIN_BSWAP32, - INTRIN_BSWAP64, - - /* memory */ - INTRIN_MEMCPY, - INTRIN_MEMMOVE, - INTRIN_MEMSET, - INTRIN_PREFETCH, - INTRIN_ASSUME_ALIGNED, - - /* hints */ - INTRIN_EXPECT, - INTRIN_UNREACHABLE, - INTRIN_TRAP, - - /* non-local control */ - INTRIN_SETJMP, - INTRIN_LONGJMP, - - /* checked arith — multi-result (value, overflow_flag) */ - INTRIN_SADD_OVERFLOW, - INTRIN_UADD_OVERFLOW, - INTRIN_SSUB_OVERFLOW, - INTRIN_USUB_OVERFLOW, - INTRIN_SMUL_OVERFLOW, - INTRIN_UMUL_OVERFLOW, -} IntrinKind; - -typedef enum OpKind { - OPK_IMM, - OPK_REG, - OPK_LOCAL, /* frame-relative; v.frame_slot identifies the slot */ - OPK_GLOBAL, /* address: symbol+addend, not a load */ - OPK_INDIRECT, /* [reg + ofs] */ -} OpKind; - -typedef u32 FrameSlot; -#define FRAME_SLOT_NONE 0u - -typedef enum FrameSlotKind { - FS_LOCAL, - FS_PARAM, - FS_SPILL, - FS_SRET, - FS_ALLOCA, -} FrameSlotKind; - -typedef enum FrameSlotFlag { - FSF_NONE = 0, - FSF_ADDR_TAKEN = 1u << 0, - FSF_VOLATILE = 1u << 1, -} FrameSlotFlag; - -typedef struct FrameSlotDesc { - CfreeCgTypeId type; - Sym name; - SrcLoc loc; - u32 size; - u32 align; - u8 kind; /* FrameSlotKind */ - u8 pad; - u16 flags; /* FrameSlotFlag */ -} FrameSlotDesc; - -typedef enum CGLocalFlag { - CG_LOCAL_NONE = 0, - CG_LOCAL_ADDR_TAKEN = 1u << 0, - CG_LOCAL_MEMORY_REQUIRED = 1u << 1, -} CGLocalFlag; - -typedef struct CGLocalDesc { - CfreeCgTypeId type; - Sym name; - SrcLoc loc; - u32 size; - u32 align; - u32 flags; /* CGLocalFlag */ -} CGLocalDesc; - -typedef enum CGLocalStorageKind { - CG_LOCAL_STORAGE_FRAME, - CG_LOCAL_STORAGE_REG, -} CGLocalStorageKind; - -typedef struct CGLocalStorage { - u8 kind; /* CGLocalStorageKind */ - u8 pad[3]; - union { - FrameSlot frame_slot; - Reg reg; - } v; -} CGLocalStorage; - -typedef enum MemFlag { - MF_NONE = 0, - MF_VOLATILE = 1u << 0, - MF_ATOMIC = 1u << 1, - MF_RESTRICT = 1u << 2, - MF_READONLY = 1u << 3, - MF_WRITEONLY = 1u << 4, - MF_UNALIGNED = 1u << 5, -} MemFlag; - -typedef enum AliasKind { - ALIAS_UNKNOWN, - ALIAS_LOCAL, - ALIAS_GLOBAL, - ALIAS_PARAM, - ALIAS_HEAP, - ALIAS_STRING, -} AliasKind; - -typedef struct AliasRoot { - u8 kind; /* AliasKind */ - u8 pad[3]; - union { - i32 local_id; - ObjSymId global; - u32 param_idx; - Sym string_id; - } v; -} AliasRoot; - -typedef struct MemAccess { - CfreeCgTypeId type; /* codegen object type accessed */ - u32 size; /* ABI byte size of this access */ - u32 align; /* known byte alignment; 0 means unknown */ - u16 flags; /* MemFlag */ - u16 addr_space; - AliasRoot alias; -} MemAccess; - -typedef struct ConstBytes { - CfreeCgTypeId type; - const u8* bytes; /* ABI representation, little/big endian per target */ - u32 size; - u32 align; -} ConstBytes; - -typedef struct AggregateAccess { - CfreeCgTypeId type; - u32 size; - u32 align; - MemAccess mem; -} AggregateAccess; - -typedef struct BitFieldAccess { - CfreeCgTypeId field_type; - MemAccess storage; - u32 storage_offset; /* byte offset from record base */ - u16 bit_offset; /* target-endian bit offset within storage unit */ - u16 bit_width; /* may be 0 for zero-width layout barriers */ - u8 signed_; - u8 pad[3]; -} BitFieldAccess; - -typedef struct Operand { - u8 kind; - u8 cls; /* RegClass */ - u16 pad; - CfreeCgTypeId type; - union { - i64 imm; - Reg reg; - FrameSlot frame_slot; - struct { - ObjSymId sym; - i64 addend; - } global; - struct { - Reg base; - Reg index; /* REG_NONE when no index operand */ - u8 log2_scale; /* 0..3 -> 1/2/4/8 bytes; ignored when index == REG_NONE */ - i32 ofs; - } ind; - } v; -} Operand; - -typedef enum CGABIPartFlag { - CG_ABI_PART_NONE = 0, - CG_ABI_PART_SRET = 1u << 0, - CG_ABI_PART_BYVAL = 1u << 1, - CG_ABI_PART_INDIRECT = 1u << 2, -} CGABIPartFlag; - -typedef struct CGABIPart { - const ABIArgPart* abi_part; - Operand op; - u32 src_offset; - u32 size; - u16 flags; /* CGABIPartFlag */ - u16 pad; -} CGABIPart; - -typedef struct CGABIValue { - CfreeCgTypeId type; - const ABIArgInfo* abi; - Operand - storage; /* address for indirect/byval/sret, REG/IMM for simple values */ - const CGABIPart* parts; - u32 nparts; - /* Aggregate byte size of `type`, populated by cg for struct/union args - * and returns. Backends need this to memcpy through OPK_INDIRECT byval - * sources where no frame-slot size is available; left 0 for scalars. */ - u32 size; -} CGABIValue; - -typedef struct CGParamDesc { - u32 index; - Sym name; - CfreeCgTypeId type; - u32 size; - u32 align; - u32 flags; /* CGLocalFlag */ - CGLocalStorage storage; - const ABIArgInfo* abi; - const CGABIPart* incoming; - u32 nincoming; - SrcLoc loc; -} CGParamDesc; - -/* text_section_id and group_id are per-function so that -ffunction-sections, - * __attribute__((section)) on functions, and COMDAT for C11 inline-with- - * external-definition all work with no extra plumbing. Decl.section_id already - * carries the user's request; CG/decl decides the section name policy - * (default .text, vs .text.<sym> under -ffunction-sections, vs explicit - * attribute). The backend just writes to the named section. */ -/* Phase 2 attribute-derived hints. The backends are free to ignore these; - * they exist so the parser can communicate _Noreturn / __attribute__ - * info down to CG without forcing every backend to consult the Decl. */ -typedef enum CGFuncDescFlag { - CGFD_NONE = 0, - CGFD_NORETURN = 1u << 0, -} CGFuncDescFlag; - -typedef struct CGFuncDesc { - ObjSymId sym; - ObjSecId text_section_id; - ObjGroupId group_id; /* OBJ_GROUP_NONE if none */ - CfreeCgTypeId fn_type; - const ABIFuncInfo* abi; - const CGParamDesc* params; - u32 nparams; - SrcLoc loc; - u32 flags; /* CGFuncDescFlag */ - CfreeCgInlinePolicy inline_policy; - u8 atomize; - u8 pad[3]; -} CGFuncDesc; - -typedef struct CGKnownFrameDesc { - const FrameSlotDesc* slots; - u32 nslots; - u32 max_outgoing; - u8 has_alloca; - u8 has_call; - u8 may_omit_frame; - u8 pad; -} CGKnownFrameDesc; - -typedef enum CGCallFlag { - CG_CALL_NONE = 0, - /* Sibling call. The target emits the caller's epilogue, transfers - * control to the callee (B/JUMP26 for direct, BR Xn for indirect), and - * does NOT emit a return-style continuation. CG will not invoke - * target->ret afterwards. - * - * Realizability is verified before this flag is set: CG only sets it after - * tail_call_unrealizable_reason() returns NULL for the same desc and frame - * state, so the target can emit the sibling call unconditionally. The - * target may assert/compiler_panic if the flag is set on an unrealizable - * desc, but that is an internal-consistency check — fallback and - * diagnostics for unrealizable tail calls are CG's responsibility, not the - * target's. - * - * When the desc has an sret return (abi->has_sret), a tail call forwards - * the function's own incoming sret pointer to the callee's sret slot rather - * than pointing at a fresh local; ret.storage is the void sentinel and must - * not be used. CG's return-shape precondition guarantees the forwarded - * pointer matches the callee's expectation. */ - CG_CALL_TAIL = 1u << 0, -} CGCallFlag; - -typedef struct CGCallDesc { - CfreeCgTypeId fn_type; - const ABIFuncInfo* abi; - Operand callee; - const CGABIValue* args; - u32 nargs; - u16 flags; /* CGCallFlag */ - u8 tail_policy; /* CfreeCgTailPolicy; meaningful when CG_CALL_TAIL is set. - * The opt recorder accepts every tail and preserves this so - * the replay can pick: emit tail (realizable), fall back to - * call+ret (ALLOWED), or diagnose (MUST). */ - u8 pad; - CfreeCgInlinePolicy inline_policy; - CGABIValue ret; -} CGCallDesc; - -typedef enum CGPhysRegFlag { - CG_REG_ALLOCABLE = 1u << 0, - CG_REG_CALLER_SAVED = 1u << 1, - CG_REG_CALLEE_SAVED = 1u << 2, - CG_REG_ARG = 1u << 3, - CG_REG_RET = 1u << 4, - CG_REG_TEMP_PREFERRED = 1u << 5, - CG_REG_PLATFORM = 1u << 6, - CG_REG_RESERVED = 1u << 7, -} CGPhysRegFlag; - -typedef struct CGPhysRegInfo { - Reg reg; - u8 cls; /* RegClass */ - u8 abi_index; /* arg/ret order when applicable, otherwise 0xff */ - u16 flags; /* CGPhysRegFlag */ - u16 save_cost; - u16 use_cost; -} CGPhysRegInfo; - -typedef enum CGCallPlanLocKind { - CG_CALL_PLAN_REG, - CG_CALL_PLAN_STACK, - /* Stack argument for a sibling call. The slot is addressed where the - * caller's stack pointer will be after this frame is restored. */ - CG_CALL_PLAN_TAIL_STACK, - CG_CALL_PLAN_IGNORE, -} CGCallPlanLocKind; - -typedef enum CGCallPlanSrcKind { - CG_CALL_PLAN_SRC_VALUE, - CG_CALL_PLAN_SRC_ADDR, -} CGCallPlanSrcKind; - -typedef struct CGCallPlanMove { - Operand src; - u8 dst_kind; /* CGCallPlanLocKind */ - u8 src_kind; /* CGCallPlanSrcKind */ - u8 cls; /* RegClass for register destinations */ - u8 pad; - Reg dst_reg; - u32 src_offset; - u32 stack_offset; - MemAccess mem; -} CGCallPlanMove; - -typedef struct CGCallPlanRet { - Operand dst; - u8 cls; - Reg src_reg; - u32 dst_offset; - MemAccess mem; -} CGCallPlanRet; - -#define CG_CALL_PLAN_REG_CLASSES 3u - -typedef struct CGCallPlan { - CGCallPlanMove* args; - u32 nargs; - CGCallPlanRet* rets; - u32 nrets; - Operand callee; - u32 clobber_mask[CG_CALL_PLAN_REG_CLASSES]; - u32 return_mask[CG_CALL_PLAN_REG_CLASSES]; - u32 stack_arg_size; - u8 variadic_fp_count; - u8 is_variadic; - u8 has_sret; - u8 pad; - u16 flags; /* CGCallFlag */ - u16 pad2; -} CGCallPlan; - -typedef u32 Label; -#define LABEL_NONE 0 - -typedef enum ScopeKind { - SCOPE_BLOCK, /* break exits forward */ - SCOPE_LOOP, /* break exits forward; continue uses explicit target */ - SCOPE_IF, /* cond consumed at scope_begin */ -} ScopeKind; - -typedef u32 CGScope; -#define CG_SCOPE_NONE 0u - -typedef struct CGScopeDesc { - u8 kind; /* ScopeKind */ - u8 pad[3]; - Label break_label; /* explicit target for break; LABEL_NONE => target creates - one */ - Label continue_label; /* explicit target for continue; LABEL_NONE for - non-loops */ - Operand cond; /* SCOPE_IF condition; ignored otherwise */ - CfreeCgTypeId result_type; /* reserved for structured expression results */ -} CGScopeDesc; - -typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir; - -typedef struct AsmConstraint { - const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */ - Sym name; /* GCC `[name]` symbolic operand; 0 if absent */ - CfreeCgTypeId type; /* codegen type of the bound expression (output lvalue or - input rvalue). Drives RegClass + width for the - binder. NULL only for hand-built test constraints - (binder falls back to a 64-bit int default). */ - u8 dir; /* AsmDir */ - u8 pad[3]; -} AsmConstraint; - typedef u32 MCLabel; #define MC_LABEL_NONE 0u @@ -643,420 +126,12 @@ struct MCEmitter { void (*destroy)(MCEmitter*); }; -typedef struct CGSwitchCase { - /* Bit pattern matched against the selector; interpreted using - * selector_type's width and signedness (signed comparison uses - * sign-extension to selector_type's width). */ - u64 value; - Label label; -} CGSwitchCase; - -typedef struct CGSwitchDesc { - Operand selector; /* OPK_REG or OPK_IMM */ - CfreeCgTypeId selector_type; - Label default_label; /* LABEL_NONE means "fall through past the switch" */ - const CGSwitchCase* cases; - u32 ncases; - u8 hint; /* CfreeCgSwitchHint */ - u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */ - u8 pad[2]; -} CGSwitchDesc; - -typedef struct CGLocalStaticDataDesc { - ObjSymId sym; - CfreeCgTypeId type; - CfreeCgDataDefAttrs attrs; - u32 align; -} CGLocalStaticDataDesc; - -typedef struct CGTarget CGTarget; -struct CGTarget { - /* Typed IR lowering context. Subclasses extend. */ - Compiler* c; - ObjBuilder* obj; - MCEmitter* mc; - u8 virtual_regs; - u8 pad0[3]; - - /* Optional. When non-NULL, per-instruction emit calls Debug to record - * line rows; func_begin/func_end attribute PC ranges to the active - * subprogram. cg_new sets this from its Debug* argument; the cg_test - * harness sets it directly when it constructs a CGTarget+Debug pair as - * the parser stand-in. NULL means -g is off and the backend skips all - * Debug fanout. */ - Debug* debug; - - /* ---- function lifecycle ---- */ - void (*func_begin)(CGTarget*, const CGFuncDesc*); - void (*func_end)(CGTarget*); - - /* Symbol-aliasing hook. Optional (may be NULL). cg invokes this from - * cfree_cg_alias after the obj symbol-table mirror is wired so the - * backend can emit any out-of-band representation it needs — e.g. the - * C-source target writes - * `T alias_sym(...) __attribute__((alias("target")));` - * because the alias relationship isn't expressible by sharing a - * (section, value) pair the way a relocatable object can. Native - * machine-code backends don't need this hook because obj_symbol_define - * already aliases the bytes. `type` is the alias's CG type (function - * or object), needed by the C target to render the prototype. */ - void (*alias)(CGTarget*, ObjSymId alias_sym, ObjSymId target_sym, - CfreeCgTypeId type); - - /* Optional fast path for optimized emitters that know all frame slots and - * outgoing call area needs before body emission. `out_slots`, when non-NULL, - * has `frame->nslots` entries and receives target FrameSlot ids in order. */ - void (*func_begin_known_frame)(CGTarget*, const CGFuncDesc*, - const CGKnownFrameDesc*, FrameSlot* out_slots); - - /* ---- frame slots and spill/reload ---- - * CG and opt allocate caller-visible registers and pass concrete Operand - * regs to the target. Plain machine targets consume hard regs; opt_cgtarget - * sets virtual_regs and records virtual Reg ids as SSA values. */ - FrameSlot (*frame_slot)(CGTarget*, const FrameSlotDesc*); - CGLocalStorage (*local)(CGTarget*, const CGLocalDesc*); - void (*local_addr)(CGTarget*, Operand dst, const CGLocalDesc*, - CGLocalStorage); - CGLocalStorage (*param)(CGTarget*, const CGParamDesc*); - void (*spill_reg)(CGTarget*, Operand src_reg, FrameSlot, MemAccess); - void (*reload_reg)(CGTarget*, Operand dst_reg, FrameSlot, MemAccess); - - /* ---- opt/back-end register coordination ---- - * CGTarget users allocate caller-visible registers before invoking target - * ops. Direct CG uses these hooks to initialize its simple hard-reg - * allocator. opt uses them after IR rewriting to coordinate assigned hard - * regs and backend scratch policy without hard-coding arch details. */ - - /* Return the target's allocable hard register pool for `cls`. - * Sets *out to a stable array and *nregs to its length. The array - * is backend-internal storage that outlives the current function. */ - void (*get_allocable_regs)(CGTarget*, RegClass, const Reg** out, u32* nregs); - - /* Return the target's physical register file for `cls`. Unlike - * get_allocable_regs, this is descriptive metadata: callers filter by - * CGPhysRegFlag instead of assuming the list is already an allocation - * policy. */ - void (*get_phys_regs)(CGTarget*, RegClass, const CGPhysRegInfo** out, - u32* nregs); - - /* Return the target's scratch registers for `cls`. - * Scratch registers are used internally by the backend (e.g. large - * immediate materialization) and must not appear in the allocable pool. - * Opt uses them for spill reload/store materialization. */ - void (*get_scratch_regs)(CGTarget*, RegClass, const Reg** out, u32* nregs); - - /* Return non-zero if `reg` in `cls` is caller-saved on this target. */ - int (*is_caller_saved)(CGTarget*, RegClass, Reg); - - u32 (*call_clobber_mask)(CGTarget*, const CGCallDesc*, RegClass); - u32 (*return_reg_mask)(CGTarget*, const ABIFuncInfo*, RegClass); - u32 (*callee_save_mask)(CGTarget*, RegClass); - - /* Tell the backend which hard registers opt is going to assign in the next - * function before func_begin reserves its prologue placeholder. Backends use - * this only as a sizing hint; reserve_hard_regs remains the authoritative - * per-function preservation hook. */ - void (*plan_hard_regs)(CGTarget*, RegClass, const Reg* regs, u32 n); - - /* Tell the backend which hard registers opt actually assigned in the - * current function. Call after the function body is emitted, before - * func_end. The backend updates prologue/epilogue bookkeeping so it - * saves/restores only the callee-saved subset that opt used. - * - * Direct CG and opt both call this after emitting hard-register operands. */ - void (*reserve_hard_regs)(CGTarget*, RegClass, const Reg* regs, u32 n); - - /* Return the outgoing stack argument area needed by this call after target - * ABI routing/alignment. Optional; only needed by known-frame emitters. */ - u32 (*call_stack_size)(CGTarget*, const CGCallDesc*); - - /* ---- labels and control flow ---- */ - Label (*label_new)(CGTarget*); - void (*label_place)(CGTarget*, Label); - /* Translate a CGTarget-visible Label to the underlying MCEmitter - * MCLabel id. For direct CG backends Label IS the MCLabel id so this - * is an identity function (optional — NULL is treated as identity). - * The opt wrapper overrides this to look up the IR block's - * pre-allocated MCLabel; cfree_cg_data_label_addr needs the stable - * MCLabel id at IR-recording time, before opt has built its - * per-function label_map. */ - MCLabel (*cg_label_to_mc_label)(CGTarget*, Label); - void (*jump)(CGTarget*, Label); - /* Fused compare-and-branch. cg's preferred form: avoids materializing 0/1 - * for a normal `if (a < b)`. For an arbitrary i1 in a register, callers - * synthesize cmp_branch(CMP_NE, val, IMM_ZERO, label). */ - void (*cmp_branch)(CGTarget*, CmpOp, Operand a, Operand b, Label); - - /* Structured switch dispatch. - * - * Optional: when NULL, cg's shared `cg_lower_switch_default` runs and - * lowers in terms of cmp_branch / jump / indirect_branch / data ops — - * the path every native arch uses. Backends override switch_ only when - * they can express the construct natively: the C-source target emits - * `switch (val) { case V: goto L_V; ... default: goto L_def; }`; a - * future WASM target would emit `br_table`. - * - * The descriptor carries the full structured form (selector + paired - * cases + default + frontend hint); density policy lives in - * cg_lower_switch_default. */ - void (*switch_)(CGTarget*, const CGSwitchDesc*); - - /* Indirect branch primitive: transfer control to the address in - * `addr_reg` (an OPK_REG holding a function-local label address). - * - * Required on every native arch and used by: - * - cfree_cg_computed_goto for direct-threaded dispatch - * - opt-level jump-table lowerings of IR_SWITCH (when implemented) - * - * `valid_targets[0..ntargets)` is the closed set of labels the address - * can resolve to. Backends use it for branch-target hardening (BTI, - * PAC, x86 CFG, IBT) and opt uses it to build the CFG; opt requires - * ntargets > 0. */ - void (*indirect_branch)(CGTarget*, Operand addr_reg, - const Label* valid_targets, u32 ntargets); - - /* Materialize the runtime address of a function-local label into - * `dst_reg`. The label must already exist (label_new); it does not - * need to be placed yet. Backends emit the arch's PC-relative load: - * x86_64 `lea L(%rip), %r`, aarch64 `adr X, L`, riscv `auipc/addi`. - * - * The resulting pointer is a function-local label address (per the - * public cfree_cg_push_label_addr contract) and must only be consumed - * by indirect_branch inside the defining function's activation. */ - void (*load_label_addr)(CGTarget*, Operand dst_reg, Label label); - - /* Optional source-backend hook for function-local static data definitions - * that need function label scope, currently used for C `&&label` - * dispatch-table initializers. Returning non-zero from begin means the - * target consumes bytes/zeros/label addresses until end; ordinary object - * data emission is skipped for that definition. */ - int (*local_static_data_begin)(CGTarget*, const CGLocalStaticDataDesc*); - /* data == NULL means append len zero bytes. */ - void (*local_static_data_write)(CGTarget*, const u8* data, u64 len); - void (*local_static_data_label_addr)(CGTarget*, Label target, i64 addend, - u32 width, u32 address_space); - void (*local_static_data_end)(CGTarget*); - - /* Optional. When non-NULL, cfree_cg_data_label_addr panics with the - * returned wasm-style message before reaching the MCEmitter. Lets - * targets that cannot resolve function-local label addresses in - * static-data initializers (e.g. the Wasm backend) fail with a - * recognizable, target-prefixed diagnostic instead of an MCEmitter - * "bad label" assertion. The returned string must remain valid for - * the lifetime of the panic call (string literals are typical). */ - const char* (*data_label_addr_unsupported_msg)(CGTarget*); - - /* ---- structured control flow ---- - * Mirrors CG's scope ops. CG passes explicit break/continue targets so C - * `for` continues can land on the increment expression rather than the loop - * header. Real backends shim these onto label_new/label_place/jump. - * The WASM backend consumes them natively to emit block/loop/if with - * structurally-bounded br targets, which is what gives WASM its CFI. - * - * For SCOPE_IF, `cond` is the i1 operand; ignored for BLOCK/LOOP. - * `result_type` is reserved for if-as-expression on WASM (NULL for the - * statement case used by C); other backends ignore it. */ - CGScope (*scope_begin)(CGTarget*, const CGScopeDesc*); - void (*scope_else)(CGTarget*, CGScope); - void (*scope_end)(CGTarget*, CGScope); - void (*break_to)(CGTarget*, CGScope); - void (*continue_to)(CGTarget*, CGScope); - - /* ---- data movement (split, no overloading) ---- */ - void (*load_imm)(CGTarget*, Operand dst /*REG*/, i64 imm); - void (*load_const)(CGTarget*, Operand dst /*REG*/, ConstBytes); - void (*copy)(CGTarget*, Operand dst /*REG*/, Operand src /*REG*/); - void (*load)(CGTarget*, Operand dst /*REG*/, - Operand addr /*LOCAL|GLOBAL|INDIRECT*/, MemAccess); - void (*store)(CGTarget*, Operand addr /*LOCAL|GLOBAL|INDIRECT*/, - Operand src /*REG|IMM*/, MemAccess); - void (*addr_of)(CGTarget*, Operand dst /*REG*/, - Operand lv /*LOCAL|GLOBAL|INDIRECT*/); - /* Materializes the address of a thread-local symbol into `dst`. Distinct - * from addr_of because TLS resolution can be a multi-instruction sequence - * or a runtime call (e.g. GD model), not a cheap addressing mode. The - * backend chooses the TLS model (LE/IE/LD/GD) from c->target and the - * symbol's visibility. Subsequent accesses go through OPK_INDIRECT on the - * resulting pointer; this lets opt hoist the materialization via LICM. */ - void (*tls_addr_of)(CGTarget*, Operand dst /*REG*/, ObjSymId sym, i64 addend); - void (*copy_bytes)(CGTarget*, Operand dst_addr, Operand src_addr, - AggregateAccess); - void (*set_bytes)(CGTarget*, Operand dst_addr, Operand byte_value, - AggregateAccess); - void (*bitfield_load)(CGTarget*, Operand dst /*REG*/, Operand record_addr, - BitFieldAccess); - void (*bitfield_store)(CGTarget*, Operand record_addr, - Operand src /*REG|IMM*/, BitFieldAccess); - - /* ---- arithmetic, compare, convert ---- - * binop/unop/cmp accept OPK_REG or OPK_IMM in source operand positions - * (`a`, `b`); `dst` is always OPK_REG. The backend chooses between an - * imm-form encoding and materializing the literal into a scratch - * register based on whether the value fits the instruction's imm - * field. FP binops and UO_FNEG require REG sources — FP literals reach the - * value stack through load_const into OPK_REG. cg and opt's machinize/emit - * both rely on this contract to pass small constants through without - * burning a value-stack register on materialization. */ - void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/, Operand a /*REG|IMM*/, - Operand b /*REG|IMM*/); - void (*unop)(CGTarget*, UnOp, Operand dst /*REG*/, Operand a /*REG|IMM*/); - void (*cmp)(CGTarget*, CmpOp, Operand dst /*REG*/, Operand a /*REG|IMM*/, - Operand b /*REG|IMM*/); /* materialize 0/1 */ - void (*convert)(CGTarget*, ConvKind, Operand dst, Operand src); - - /* ---- calls / return ---- - * CGCallDesc carries the type-checked signature, inspectable ABI - * classification, source operands, and the already-materialized ABI parts - * for direct, indirect/byval, sret, split, and multi-register values. - * `callee.kind == OPK_GLOBAL` is direct; any other kind is indirect. */ - void (*call)(CGTarget*, const CGCallDesc*); - /* Pure query: can `d` be emitted as a sibling (tail) call on this target, - * given the current frame state? Returns NULL if yes; otherwise a short, - * static, human-readable string naming the blocker, used verbatim in the - * musttail diagnostic. Must not emit code and must not abort. - * - * Realizable means the target can transfer control to the callee while - * reusing (and tearing down) the current frame, with the callee's outgoing - * argument area and return mechanism fitting the space the caller itself - * received. CG guarantees the return shapes already match (a frontend - * precondition), so neither sret nor variadic is an inherent blocker: - * - sret: realized by forwarding the function's own incoming sret pointer - * to the callee's sret slot — sound because both sides return the same - * type via sret. Not a blocker on its own. - * - variadic: realizable when the callee's (variadic) argument area fits - * the caller's incoming parameter area on this arch. It is a blocker - * only where the realization cannot survive the frame teardown — e.g. - * wasm packs varargs into a caller-frame buffer that a sibling call - * would dangle. - * Typical blockers are therefore stack/argument areas that exceed the - * caller's incoming space, or arch-specific constraints like the wasm - * vararg buffer. A target may also return a "not yet implemented" reason - * for shapes whose codegen it has not built yet; that is honest and safe. - * - * CG owns the tail policy: it calls this first and only sets CG_CALL_TAIL - * when it returns NULL, so a NULL result must guarantee a later call() with - * CG_CALL_TAIL can emit the sibling call. May itself be NULL, meaning the - * target supports no tail calls at all. */ - const char* (*tail_call_unrealizable_reason)(CGTarget*, const CGCallDesc*); - void (*plan_call)(CGTarget*, const CGCallDesc*, CGCallPlan* out); - void (*load_call_arg)(CGTarget*, Operand dst, const CGCallPlanMove*); - void (*store_call_arg)(CGTarget*, const CGCallPlanMove*); - void (*store_call_ret)(CGTarget*, const CGCallPlanRet*, Operand src); - void (*emit_call_plan)(CGTarget*, const CGCallPlan*); - void (*ret)(CGTarget*, const CGABIValue* val_or_null); - - /* ---- alloca ---- - * Dynamic stack allocation. `size` is i64 bytes; `align` is the required - * alignment of the returned pointer. Backend grows the (linear-memory or - * native) shadow stack, returns the pointer in `dst`. v1 only emits this - * via __builtin_alloca; C VLAs are not parsed (__STDC_NO_VLA__). */ - void (*alloca_)(CGTarget*, Operand dst /*REG*/, Operand size, u32 align); - - /* ---- variadics ---- - * va_list type is per-arch (defined in <stdarg.h>); these methods - * implement the four C macros after builtin substitution. ap is always - * passed as &ap; on SysV x86-64 the backend manages the register-save - * area, on WASM the backend walks the spilled-args memory. */ - void (*va_start_)(CGTarget*, Operand ap_addr); - void (*va_arg_)(CGTarget*, Operand dst /*REG*/, Operand ap_addr, - CfreeCgTypeId t); - void (*va_end_)(CGTarget*, Operand ap_addr); - void (*va_copy_)(CGTarget*, Operand dst_ap_addr, Operand src_ap_addr); - - /* ---- atomics ---- */ - void (*atomic_load)(CGTarget*, Operand dst /*REG*/, Operand addr, MemAccess, - MemOrder); - void (*atomic_store)(CGTarget*, Operand addr, Operand src, MemAccess, - MemOrder); - void (*atomic_rmw)(CGTarget*, AtomicOp, Operand dst /*REG: prior value*/, - Operand addr, Operand val, MemAccess, MemOrder); - void (*atomic_cas)(CGTarget*, Operand prior /*REG*/, Operand ok /*REG, i1*/, - Operand addr, Operand expected, Operand desired, MemAccess, - MemOrder success, MemOrder failure); - void (*fence)(CGTarget*, MemOrder); - - /* ---- compiler intrinsics ---- - * Typed dispatch for builtins whose lowering is backend-relevant - * (inline-vs-libcall, inline sequence selection) or whose semantics opt - * cares about (hint pattern matching, exhaustiveness). The IR carries - * IR_INTRINSIC + IRIntrinAux.kind; the wrapped target receives the same call - * at lowering time with materialized operands. - * - * Operand shapes by IntrinKind: - * POPCOUNT/CTZ/CLZ/BSWAP* : dsts[0] REG result; args[0] REG input - * MEMCPY/MEMMOVE : dsts none; args = (dst_addr, src_addr, n) - * MEMSET : dsts none; args = (dst_addr, byte, n) - * PREFETCH : dsts none; args = (addr [, rw [, locality]]) - * ASSUME_ALIGNED : dsts[0] REG; args = (ptr, align [, offset]) - * EXPECT : dsts[0] REG; args = (val, expected) - * UNREACHABLE / TRAP : dsts none; args none - * SETJMP : dsts[0] REG i32 result; args = (&buf) - * LONGJMP : dsts none; args = (&buf, val); no return - * ADD/SUB/MUL_OVERFLOW : dsts[0] REG result, dsts[1] REG i1 overflow; - * args = (a, b) - * - * Backends that lack an inline sequence for a given kind may emit a - * normal IR_CALL-shaped sequence to a runtime entry (e.g. memcpy) — the - * IR records intent, the backend chooses mechanism. Hint kinds may be - * lowered as no-ops where the arch has nothing to emit. */ - void (*intrinsic)(CGTarget*, IntrinKind, Operand* dsts, u32 ndst, - const Operand* args, u32 narg); - - /* ---- inline asm ---- - * Per-arch constraint binding + template assembly, packaged as one block. - * ins[i] are pre-evaluated input operands. - * out_ops[i] is filled by the arch with the location holding the result - * for outs[i]; the caller (cg) reads them out after the call. - * "=&r" early-clobber outputs must be allocated disjoint from any input. - * opt_cgtarget records this as a single IR_ASM_BLOCK; the wrapped target - * receives the same call at lowering time with materialized operands. */ - void (*asm_block)(CGTarget*, const char* tmpl, const AsmConstraint* outs, - u32 nout, Operand* out_ops, const AsmConstraint* ins, - u32 nin, const Operand* in_ops, const Sym* clobbers, - u32 nclob); - /* Resolve a register-name clobber Sym (e.g. "x20", "v8") to its physical - * Reg + RegClass. Returns 0 on success, nonzero if `name` is not a - * register (e.g. "memory", "cc", or an unknown identifier). Used by - * cg_inline_asm to spill SValues bound to clobbered regs. Optional — - * backends that leave it NULL accept all named clobbers as no-ops. */ - int (*resolve_reg_name)(CGTarget*, Sym name, Reg* out, RegClass* cls_out); - - /* Optional: handle a top-level `__asm__("...")` block (file scope, not - * inside a function). Backends that leave this NULL fall back to the - * generic asm-parser path through CfreeCg.mc. Wasm overrides this to - * diagnose-and-fail since the wasm module has no native asm parser. */ - void (*file_scope_asm)(CGTarget*, const char* src, size_t len); - - /* ---- source-location tracking ---- - * Sets the SrcLoc inherited by subsequent emit-side calls (binop/load/...). - * opt_cgtarget stamps it on every recorded Inst; target CGTargets forward it - * to MCEmitter for Debug line emission. Sticky until the next set_loc. */ - void (*set_loc)(CGTarget*, SrcLoc); - - /* ---- end-of-TU hook ---- - * No-op for plain target CGTargets. opt_cgtarget runs cross-function passes - * (inlining + cleanup) and lowers all buffered IR functions into the - * wrapped target CGTarget. Drivers must call this after the last func_end and - * before reading from `obj` or calling debug_emit. */ - void (*finalize)(CGTarget*); - - void (*destroy)(CGTarget*); -}; - -/* Shared switch lowering. cg's cfree_cg_switch installs this as the - * default target->switch_ behavior; opt's pass_emit calls it when - * replaying IR_SWITCH against a backend that doesn't override switch_. - * Emits a cmp-and-branch chain over (target->cmp_branch + target->jump) - * — fast at -O0 and the input shape an opt-level jump-table rewrite - * starts from. */ -void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* desc); - /* Construct the right target/emitter pair for c->target. */ MCEmitter* mc_new(Compiler*, ObjBuilder*); void mc_free(MCEmitter*); /* Per-function context helpers. Backends call mc_begin_function from - * their CGTarget func_begin (after computing the post-alignment function + * their CgTarget func_begin (after computing the post-alignment function * start) and mc_end_function from func_end. The pair sets / clears * MCEmitter.cur_func_* — the metadata that emit_label_data_reloc reads * to resolve deferred intra-function label fixups in data sections. */ @@ -1068,10 +143,6 @@ void mc_end_function(MCEmitter*); * No-op when no functions called cfi_startproc. Idempotent. */ void mc_emit_eh_frame(MCEmitter*); -CGTarget* cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*); -void cgtarget_finalize(CGTarget*); -void cgtarget_free(CGTarget*); - /* Construct the MCEmitter + (optionally) Debug pair that a machine-code * CGBackend's `make` typically needs. On success, sets *out_mc to a fresh * MCEmitter; sets *out_debug to a Debug producer (and wires mc->debug) when @@ -1086,11 +157,11 @@ CfreeStatus cg_mc_debug_new(Compiler*, ObjBuilder*, const CfreeCodeOptions*, * base + (index << log2_scale) into `scratch` and returns a plain * OPK_INDIRECT(scratch, ofs). Otherwise returns `addr` unchanged. The caller * supplies the scratch register from its scratch pool. */ -Operand arch_lower_indexed(CGTarget*, Operand addr, Reg scratch); +Operand arch_lower_indexed(CgTarget*, Operand addr, Reg scratch); /* ---- Disassembler hook ---- * Bytes -> records, not frontend-driven lowering, so this is a separate - * hook from CGTarget/MCEmitter. The internal implementation may share + * hook from CgTarget/MCEmitter. The internal implementation may share * encoding tables with the per-arch backend (sequencing concern, not an * interface concern). Constructed for c->target. * @@ -1240,17 +311,6 @@ typedef struct ArchDbgOps { int (*is_call)(const ArchDbgInsn* insn); } ArchDbgOps; -/* A CGBackend is the unit the registry hands out: "give me a CGTarget for - * this Compiler + ObjBuilder + emit options." Machine-code archs expose one - * (which internally creates MCEmitter + optional Debug then builds the - * arch-specific CGTarget); c_target exposes one (which reads - * opts->c_source_writer). Callers never branch on backend kind — they - * just call backend->make(...). */ -typedef struct CGBackend { - const char* name; - CGTarget* (*make)(Compiler*, ObjBuilder*, const CfreeCodeOptions*); -} CGBackend; - typedef struct ArchImpl { /* First field, so `(const CGBackend*)&arch_impl_x` is the arch's backend * view. Every machine-code arch is a CGBackend by composition; c_target @@ -1260,10 +320,10 @@ typedef struct ArchImpl { CfreeArchKind kind; const char* name; - /* Low-level CGTarget constructor: caller supplies the MCEmitter. Tests use + /* Low-level CgTarget constructor: caller supplies the MCEmitter. Tests use * this directly via the cgtarget_new() wrapper; the arch's `backend.make` * also calls it after creating an MCEmitter internally. */ - CGTarget* (*cgtarget_new)(Compiler*, ObjBuilder*, MCEmitter*); + CgTarget* (*cgtarget_new)(Compiler*, ObjBuilder*, MCEmitter*); ArchAsm* (*asm_new)(Compiler*); ArchDisasm* (*disasm_new)(Compiler*); int (*apply_label_fixup)(Compiler*, const ArchLabelFixup*); @@ -1296,18 +356,6 @@ typedef struct ArchImpl { const ArchImpl* arch_lookup(CfreeArchKind); const ArchImpl* arch_for_compiler(const Compiler*); -/* Pick the right CGBackend for a session given the compiler's target arch - * and the per-emit CodeOptions. Returns &arch_for_compiler(c)->backend for - * normal machine-code emission, or &cg_backend_c_target when opts requests - * C-source emission. Returns NULL when no backend in this build can serve - * the request — callers should treat that the same as any other - * unsupported-target outcome (CFREE_UNSUPPORTED). - * - * This is the only entry point session-level emission code needs; the - * #if CFREE_ARCH_*_ENABLED gating lives entirely inside the registry. */ -const CGBackend* cg_backend_for_session(const Compiler*, - const CfreeCodeOptions*); - ArchDisasm* arch_disasm_new(Compiler*); u32 arch_disasm_decode(ArchDisasm*, const u8* bytes, size_t len, u64 vaddr, CfreeInsn* out); diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c @@ -1,4 +1,4 @@ -/* C-source emission for the CGTarget vtable. See doc/CBACKEND.md. +/* C-source emission for the CgTarget vtable. See doc/CBACKEND.md. * * Output strategy * --------------- @@ -10,12 +10,11 @@ * * c_finalize flushes a tiny prologue + body to the writer. * - * Register declaration is lazy: every operand emit goes through c_ensure_reg, - * which the first time it sees a Reg id appends a declaration to decls keyed - * on the Operand's source type. Frame slots are declared eagerly when CG - * calls c_frame_slot. */ + * Local declaration is lazy: every operand emit goes through c_ensure_local, + * which appends one declaration for each semantic local. */ #include <stdio.h> +#include <string.h> #include "arch/c_target/internal.h" #include "cg/type.h" @@ -29,7 +28,24 @@ static void c_ensure_typedef(CTarget* t, CfreeCgTypeId tid); static const char* c_typedef_name(CTarget* t, CfreeCgTypeId tid); static const char* c_typename(CTarget* t, CfreeCgTypeId type); +static CfreeCgTypeId c_local_type_or_panic(CTarget* t, CGLocal local); +static void c_ensure_tuple_typedef(CTarget* t, const CfreeCgTypeId* types, + u32 ntypes); +static void c_emit_tuple_type_name(CTarget* t, CBuf* b, + const CfreeCgTypeId* types, u32 ntypes); +static Operand c_op_local(CGLocal local, CfreeCgTypeId type); +static int c_type_is_aggregate(CTarget* t, CfreeCgTypeId type); +static int c_type_is_bool(CTarget* t, CfreeCgTypeId type); +static int c_type_is_ptr(CTarget* t, CfreeCgTypeId type); static int c_operand_is_ptr_typed(CTarget* t, Operand op); +static void c_emit_addr_deref(CTarget* t, Operand addr, + CfreeCgTypeId access_type); +static void c_emit_copy_addr(CTarget* t, Operand addr); +CGLocal c_local(CgTarget* T, const CGLocalDesc* d); +static void c_ensure_forward_decl_with_results(CTarget* t, ObjSymId sym, + CfreeCgTypeId fn_type, + const CfreeCgTypeId* result_types, + u32 nresults); /* Private accessor on ObjBuilder (defined in obj/obj.c, not in obj.h). * Same forward-decl trick as obj_tls.c uses. */ ObjSymId obj_tlv_bootstrap_get(const ObjBuilder*); @@ -50,7 +66,7 @@ void c_writer_puts(CTarget* t, const char* s) { c_writer_write(t, s, n); } -/* === Reg / type emission === */ +/* === CLocal / type emission === */ static const char* c_int_type_name_for_width(u32 width, int signed_) { switch (width) { @@ -63,6 +79,8 @@ static const char* c_int_type_name_for_width(u32 width, int signed_) { return signed_ ? "int32_t" : "uint32_t"; case 64: return signed_ ? "int64_t" : "uint64_t"; + case 128: + return signed_ ? "__int128" : "unsigned __int128"; default: return NULL; } @@ -299,33 +317,65 @@ void c_emit_type(CTarget* t, CBuf* b, CfreeCgTypeId type) { cbuf_puts(b, c_typename(t, type)); } -void c_reg_name(Reg r, char* out, size_t cap) { - size_t i = 0; - if (cap == 0) return; - if (cap > 1) out[i++] = 'v'; - char tmp[16]; - size_t n = 0; - u32 v = (u32)r; - if (v == 0) { - tmp[n++] = '0'; - } else { - while (v) { - tmp[n++] = (char)('0' + (v % 10)); - v /= 10; - } +static CfreeCgTypeId c_local_type_or_panic(CTarget* t, CGLocal local) { + if ((u32)local < t->local_cap && t->local_declared[local] && + t->local_type[local]) { + return t->local_type[local]; } - while (n && i + 1 < cap) out[i++] = tmp[--n]; - out[i] = '\0'; + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: unknown local type for v%u", (unsigned)local); + return CFREE_CG_TYPE_NONE; +} + +static void c_emit_tuple_type_name(CTarget* t, CBuf* b, + const CfreeCgTypeId* types, u32 ntypes) { + (void)t; + cbuf_puts(b, "__cfree_tuple"); + cbuf_put_u64(b, (u64)ntypes); + for (u32 i = 0; i < ntypes; ++i) { + cbuf_putc(b, '_'); + cbuf_put_u64(b, (u64)api_unalias_type(t->c, types[i])); + } +} + +static void c_ensure_tuple_typedef(CTarget* t, const CfreeCgTypeId* types, + u32 ntypes) { + if (ntypes <= 1) return; + cbuf_puts(&t->typedefs, "#ifndef __cfree_tuple_guard_"); + c_emit_tuple_type_name(t, &t->typedefs, types, ntypes); + cbuf_puts(&t->typedefs, "\n#define __cfree_tuple_guard_"); + c_emit_tuple_type_name(t, &t->typedefs, types, ntypes); + cbuf_puts(&t->typedefs, "\ntypedef struct "); + c_emit_tuple_type_name(t, &t->typedefs, types, ntypes); + cbuf_puts(&t->typedefs, " {"); + for (u32 i = 0; i < ntypes; ++i) { + cbuf_putc(&t->typedefs, ' '); + c_emit_type(t, &t->typedefs, types[i]); + cbuf_puts(&t->typedefs, " f"); + cbuf_put_u64(&t->typedefs, (u64)i); + cbuf_putc(&t->typedefs, ';'); + } + cbuf_puts(&t->typedefs, " } "); + c_emit_tuple_type_name(t, &t->typedefs, types, ntypes); + cbuf_puts(&t->typedefs, ";\n#endif\n"); +} + +static Operand c_op_local(CGLocal local, CfreeCgTypeId type) { + Operand op; + memset(&op, 0, sizeof op); + op.kind = OPK_LOCAL; + op.type = type; + op.v.local = local; + return op; } -static void c_slot_name(FrameSlot s, char* out, size_t cap) { +void c_local_name(CLocal r, char* out, size_t cap) { size_t i = 0; if (cap == 0) return; - const char* prefix = "slot_"; - while (*prefix && i + 1 < cap) out[i++] = *prefix++; + if (cap > 1) out[i++] = 'v'; char tmp[16]; size_t n = 0; - u32 v = (u32)s; + u32 v = (u32)r; if (v == 0) { tmp[n++] = '0'; } else { @@ -338,39 +388,24 @@ static void c_slot_name(FrameSlot s, char* out, size_t cap) { out[i] = '\0'; } -static void c_grow_reg_table(CTarget* t, u32 needed) { +static void c_grow_local_table(CTarget* t, u32 needed) { Heap* h = t->c->ctx->heap; - u32 newcap = t->reg_cap ? t->reg_cap : 16; + u32 newcap = t->local_cap ? t->local_cap : 16; while (newcap < needed) newcap *= 2; - u8* nd = (u8*)h->realloc(h, t->reg_declared, t->reg_cap, newcap, 1); + u8* nd = (u8*)h->realloc(h, t->local_declared, t->local_cap, newcap, 1); CfreeCgTypeId* nt = (CfreeCgTypeId*)h->realloc( - h, t->reg_type, t->reg_cap * sizeof(CfreeCgTypeId), + h, t->local_type, t->local_cap * sizeof(CfreeCgTypeId), newcap * sizeof(CfreeCgTypeId), _Alignof(CfreeCgTypeId)); if ((!nd && newcap) || (!nt && newcap)) { compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: out of memory"); } - for (u32 i = t->reg_cap; i < newcap; ++i) { + for (u32 i = t->local_cap; i < newcap; ++i) { nd[i] = 0; nt[i] = 0; } - t->reg_declared = nd; - t->reg_type = nt; - t->reg_cap = newcap; -} - -static void c_grow_slot_table(CTarget* t, u32 needed) { - Heap* h = t->c->ctx->heap; - u32 newcap = t->slot_cap ? t->slot_cap : 8; - while (newcap < needed) newcap *= 2; - CfreeCgTypeId* nt = (CfreeCgTypeId*)h->realloc( - h, t->slot_type, t->slot_cap * sizeof(CfreeCgTypeId), - newcap * sizeof(CfreeCgTypeId), _Alignof(CfreeCgTypeId)); - if (!nt && newcap) { - compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: out of memory"); - } - for (u32 i = t->slot_cap; i < newcap; ++i) nt[i] = 0; - t->slot_type = nt; - t->slot_cap = newcap; + t->local_declared = nd; + t->local_type = nt; + t->local_cap = newcap; } /* Emit the trailing `__attribute__((unused)) = INIT;` for a local decl of @@ -384,21 +419,30 @@ static void c_emit_zero_init(CTarget* t, CfreeCgTypeId ty) { : " __attribute__((unused)) = 0;\n"); } -void c_ensure_reg(CTarget* t, Reg r, CfreeCgTypeId type, RegClass cls) { - (void)cls; - if (r == (Reg)REG_NONE) { +void c_ensure_local(CTarget* t, CLocal r, CfreeCgTypeId type) { + if (r == (CLocal)CG_LOCAL_NONE) { compiler_panic(t->c, (SrcLoc){0, 0, 0}, - "C target: REG_NONE reached emission"); + "C target: CG_LOCAL_NONE reached emission"); + } + if ((u32)r >= t->local_cap) c_grow_local_table(t, (u32)r + 1u); + if (t->local_declared[r]) { + if (type && api_unalias_type(t->c, t->local_type[r]) != + api_unalias_type(t->c, type)) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: local v%u used with inconsistent type " + "(declared %u, used %u)", + (unsigned)r, (unsigned)api_unalias_type(t->c, t->local_type[r]), + (unsigned)api_unalias_type(t->c, type)); + } + return; } - if ((u32)r >= t->reg_cap) c_grow_reg_table(t, (u32)r + 1u); - if (t->reg_declared[r]) return; - t->reg_declared[r] = 1; - t->reg_type[r] = type; + t->local_declared[r] = 1; + t->local_type[r] = type; cbuf_puts(&t->decls, " "); c_emit_type(t, &t->decls, type); cbuf_puts(&t->decls, " "); char buf[24]; - c_reg_name(r, buf, sizeof buf); + c_local_name(r, buf, sizeof buf); cbuf_puts(&t->decls, buf); /* Zero-init kills -Wsometimes-uninitialized for control flow clang can't * reason through; the host C compiler DSEs the init when a real @@ -423,9 +467,9 @@ static void c_emit_imm_literal(CTarget* t, i64 v) { * `addr_mode` helper in the machine-code backends so all targets share a * single in-backend view of `base [+ index << log2_scale] + ofs`. */ typedef struct CAddrMode { - Reg base; - Reg index; /* REG_NONE when no index operand */ - u8 log2_scale; /* meaningful only when index != REG_NONE */ + CLocal base; + CLocal index; /* CG_LOCAL_NONE when no index operand */ + u8 log2_scale; /* meaningful only when index != CG_LOCAL_NONE */ i32 ofs; } CAddrMode; @@ -445,11 +489,11 @@ static CAddrMode c_addr_mode(Operand addr) { static void c_emit_indirect_addr_expr(CTarget* t, CAddrMode m) { char rbuf[24]; cbuf_puts(&t->body, "(char*)"); - c_reg_name(m.base, rbuf, sizeof rbuf); + c_local_name(m.base, rbuf, sizeof rbuf); cbuf_puts(&t->body, rbuf); - if (m.index != REG_NONE) { + if (m.index != CG_LOCAL_NONE) { cbuf_puts(&t->body, " + (uintptr_t)"); - c_reg_name(m.index, rbuf, sizeof rbuf); + c_local_name(m.index, rbuf, sizeof rbuf); cbuf_puts(&t->body, rbuf); cbuf_puts(&t->body, " * "); /* Spell as the explicit 1/2/4/8 literal corresponding to log2_scale. @@ -467,7 +511,7 @@ static void c_emit_indirect_addr_expr(CTarget* t, CAddrMode m) { * copy_bytes/set_bytes, inline asm). */ static void c_assert_no_index(CTarget* t, Operand addr, const char* where) { if (addr.kind != OPK_INDIRECT) return; - if (addr.v.ind.index == REG_NONE) return; + if (addr.v.ind.index == CG_LOCAL_NONE) return; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; compiler_panic(t->c, loc, "C target: %.*s: indexed OPK_INDIRECT not allowed here", @@ -477,11 +521,6 @@ static void c_assert_no_index(CTarget* t, Operand addr, const char* where) { void c_emit_operand(CTarget* t, Operand op) { char buf[24]; switch (op.kind) { - case OPK_REG: - c_ensure_reg(t, op.v.reg, op.type, (RegClass)op.cls); - c_reg_name(op.v.reg, buf, sizeof buf); - cbuf_puts(&t->body, buf); - return; case OPK_IMM: if (op.type == CFREE_CG_TYPE_NONE) { /* Untyped IMM (e.g. memset byte value): emit the literal raw. */ @@ -497,24 +536,9 @@ void c_emit_operand(CTarget* t, Operand op) { } return; case OPK_LOCAL: { - /* If the operand's type matches the slot's declared type (the common - * case), emit `slot_N` directly. Otherwise, deref the slot's address - * cast to a pointer of the operand type so we read it as the type CG - * actually wants. */ - c_slot_name(op.v.frame_slot, buf, sizeof buf); - u32 idx = (u32)op.v.frame_slot - 1u; - CfreeCgTypeId slot_ty = - (idx < t->nslots) ? t->slot_type[idx] : (CfreeCgTypeId)0; - if (op.type == 0 || slot_ty == 0 || - api_unalias_type(t->c, op.type) == api_unalias_type(t->c, slot_ty)) { - cbuf_puts(&t->body, buf); - } else { - cbuf_puts(&t->body, "(*("); - c_emit_type(t, &t->body, op.type); - cbuf_puts(&t->body, "*)&"); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, ")"); - } + c_ensure_local(t, op.v.local, op.type); + c_local_name(op.v.local, buf, sizeof buf); + cbuf_puts(&t->body, buf); return; } case OPK_INDIRECT: { @@ -580,13 +604,6 @@ void c_emit_operand(CTarget* t, Operand op) { } } -/* CG's value stack reuses Reg ids across different operand types (e.g. a - * register first written as void* may later receive a bool result). The C - * declaration is fixed at first sighting, so every write must explicitly - * cast the RHS back to the declared type. For pointer-typed destinations we - * insert a uintptr_t bridge to avoid -Wint-conversion when the RHS happens - * to be an integer expression; float destinations skip the bridge since - * pointer/float mixing doesn't occur in cfree IR. */ static int c_type_is_float(CTarget* t, CfreeCgTypeId type) { if (type == CFREE_CG_TYPE_NONE) return 0; const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, type)); @@ -599,46 +616,21 @@ static int c_types_equiv(CTarget* t, CfreeCgTypeId a, CfreeCgTypeId b) { return api_unalias_type(t->c, a) == api_unalias_type(t->c, b); } -/* CG type that the C expression for `op` will produce when emitted via - * c_emit_operand. Differs from op.type only when reg-id reuse caused the - * reg's first-sighting decl type to lock to something else: in that case - * the emitted `vN` has the declared type, and a downstream cast back to - * op.type is still needed (the uintptr_t bridge handles that). */ -static CfreeCgTypeId c_operand_emit_type(CTarget* t, Operand op) { - if (op.kind == OPK_REG && (u32)op.v.reg < t->reg_cap && - t->reg_declared[op.v.reg]) { - return t->reg_type[op.v.reg]; - } - if (op.kind == OPK_LOCAL) { - u32 idx = (u32)op.v.frame_slot - 1u; - if (idx < t->nslots) { - /* c_emit_operand emits `slot_N` (slot_type) when op.type matches, - * otherwise a deref-cast to op.type. */ - if (op.type == 0 || api_unalias_type(t->c, op.type) == - api_unalias_type(t->c, t->slot_type[idx])) { - return t->slot_type[idx]; - } - } - } - return op.type; -} - -/* Emit " vN = (DECL_T)(uintptr_t)(" or, for float regs, " vN = (DECL_T)(". - * Caller must then emit the RHS expression and call c_emit_reg_assign_close. +/* Emit " vN = " plus any cast needed for a C assignment expression. + * Caller must then emit the RHS expression and call c_emit_local_assign_close. * * `rhs_ty` is the CG type the RHS expression will produce (or 0 if unknown). - * When it matches the reg's declared type, the cast wrappers are elided — - * `(DECL_T)(uintptr_t)(...)` is a defensive bridge for reg-id reuse across - * types, and emitting it when types match buries the actual code in noise. - * The outer `(...)` parens are kept so the closer's `);` stays balanced. */ -static void c_emit_reg_assign_open(CTarget* t, Reg r, CfreeCgTypeId rhs_ty) { - if ((u32)r >= t->reg_cap || !t->reg_declared[r]) { + * Pointer/int crossings bridge through uintptr_t to keep host-C diagnostics + * quiet. The outer `(...)` parens are kept so the closer's `);` stays + * balanced. */ +static void c_emit_local_assign_open(CTarget* t, CLocal r, CfreeCgTypeId rhs_ty) { + if ((u32)r >= t->local_cap || !t->local_declared[r]) { compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, - "C target: assign to undeclared reg v%u", (unsigned)r); + "C target: assign to undeclared local v%u", (unsigned)r); } - CfreeCgTypeId decl = t->reg_type[r]; + CfreeCgTypeId decl = t->local_type[r]; char buf[24]; - c_reg_name(r, buf, sizeof buf); + c_local_name(r, buf, sizeof buf); cbuf_puts(&t->body, " "); cbuf_puts(&t->body, buf); cbuf_puts(&t->body, " = "); @@ -646,14 +638,15 @@ static void c_emit_reg_assign_open(CTarget* t, Reg r, CfreeCgTypeId rhs_ty) { cbuf_putc(&t->body, '('); c_emit_type(t, &t->body, decl); cbuf_putc(&t->body, ')'); - if (!c_type_is_float(t, decl)) { + if (!c_type_is_float(t, decl) && + (!rhs_ty || c_type_is_ptr(t, decl) || c_type_is_ptr(t, rhs_ty))) { cbuf_puts(&t->body, "(uintptr_t)"); } } cbuf_puts(&t->body, "("); } -static void c_emit_reg_assign_close(CTarget* t) { cbuf_puts(&t->body, ");\n"); } +static void c_emit_local_assign_close(CTarget* t) { cbuf_puts(&t->body, ");\n"); } void c_emit_operand_signed(CTarget* t, Operand op, int signed_) { u32 w = c_int_width_for_signedness(t, op.type); @@ -667,17 +660,14 @@ void c_emit_operand_signed(CTarget* t, Operand op, int signed_) { c_emit_operand(t, op); return; } - /* If the operand's C declaration is pointer-typed (reg-id reuse across - * types), bridge through uintptr_t so the narrow int cast doesn't trip - * -Wvoid-pointer-to-int-cast. */ int via_uptr = c_operand_is_ptr_typed(t, op); - /* CG ints are width-only; the C target declares every int reg/slot/IMM + /* CG ints are width-only; the C target declares every int local/IMM * as the signed `int{W}_t` of its width. So when `signed_` is true and * the operand's emit-width matches `w`, the explicit cast is redundant * with what c_emit_operand already produces. Skipping it cuts the * ubiquitous `((int32_t)((int32_t)23))` double-cast down to one. */ if (!via_uptr && signed_) { - CfreeCgTypeId et = c_operand_emit_type(t, op); + CfreeCgTypeId et = op.type; if (c_int_width_for_signedness(t, et) == w) { c_emit_operand(t, op); return; @@ -700,41 +690,39 @@ static int c_type_is_ptr(CTarget* t, CfreeCgTypeId type) { return ty && ty->kind == CFREE_CG_TYPE_PTR; } -/* Returns 1 if `op`'s emitted C expression has pointer type. Beyond the - * obvious `op.type` check, CG can hand us a reg whose first-sighting type - * was `void*` paired with an integer-typed operand later (the IR reuses ids - * across types). The C declaration is locked at first sighting, so the - * lvalue we'd emit is still a `void*`. */ +static int c_type_is_bool(CTarget* t, CfreeCgTypeId type) { + if (type == CFREE_CG_TYPE_NONE) return 0; + const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, type)); + return ty && ty->kind == CFREE_CG_TYPE_BOOL; +} + +static int c_type_is_aggregate(CTarget* t, CfreeCgTypeId type) { + if (type == CFREE_CG_TYPE_NONE) return 0; + const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, type)); + return ty && + (ty->kind == CFREE_CG_TYPE_RECORD || ty->kind == CFREE_CG_TYPE_ARRAY); +} + static int c_operand_is_ptr_typed(CTarget* t, Operand op) { if (c_type_is_ptr(t, op.type)) return 1; - if (op.kind == OPK_REG && (u32)op.v.reg < t->reg_cap && - t->reg_declared[op.v.reg]) { - if (c_type_is_ptr(t, t->reg_type[op.v.reg])) return 1; - } - if (op.kind == OPK_LOCAL) { - u32 idx = (u32)op.v.frame_slot - 1u; - if (idx < t->nslots && c_type_is_ptr(t, t->slot_type[idx])) return 1; - } return 0; } /* Emit `(target_ty)(uintptr_t)(op)` (or `(target_ty)(op)` for float - * target_ty). Used when the caller knows the C expression type they want and - * the source operand may have been declared with a different type (CG reuses - * reg ids across types). Without the bridge, gcc trips -Wint-conversion. - * - * When the operand's emit-type already matches target_ty (the common case - * when reg-id reuse hasn't happened), drop the wrappers — they add noise - * for no semantic gain. */ + * target_ty). Used when the caller needs a specific C expression type. + * Pointer/int crossings bridge through uintptr_t. */ static void c_emit_operand_as(CTarget* t, Operand op, CfreeCgTypeId target_ty) { - if (c_types_equiv(t, c_operand_emit_type(t, op), target_ty)) { + if (c_types_equiv(t, op.type, target_ty)) { c_emit_operand(t, op); return; } cbuf_puts(&t->body, "("); c_emit_type(t, &t->body, target_ty); cbuf_puts(&t->body, ")"); - if (!c_type_is_float(t, target_ty)) cbuf_puts(&t->body, "(uintptr_t)"); + if (!c_type_is_float(t, target_ty) && + (!op.type || c_type_is_ptr(t, op.type) || c_type_is_ptr(t, target_ty))) { + cbuf_puts(&t->body, "(uintptr_t)"); + } cbuf_puts(&t->body, "("); c_emit_operand(t, op); cbuf_puts(&t->body, ")"); @@ -776,26 +764,19 @@ static void c_emit_operand_arith_signed(CTarget* t, Operand op, int signed_) { /* Emit a C lvalue expression for an addr operand (OPK_LOCAL / OPK_GLOBAL / * OPK_INDIRECT) using `access_type` as the access type. The result is the - * full `*(T*)(...)` dereference (or the C variable directly when the access - * type matches the underlying slot/symbol). */ + * full `*(T*)(...)` dereference, or the C variable directly when the access + * type matches the underlying local/global object. */ static void c_emit_addr_deref(CTarget* t, Operand addr, CfreeCgTypeId access_type) { char buf[24]; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; switch (addr.kind) { case OPK_LOCAL: { - /* slot_N is a typed C variable. If access_type matches the slot's - * declared type, emit `slot_N` directly. Otherwise, treat the slot as - * raw storage and deref through `*(access_ty*)&slot_N` so the access - * sees the requested type (CG can store/load a slot with a wider or - * differently typed view than the declared one). */ - c_slot_name(addr.v.frame_slot, buf, sizeof buf); - u32 idx = (u32)addr.v.frame_slot - 1u; - CfreeCgTypeId slot_ty = - (idx < t->nslots) ? t->slot_type[idx] : (CfreeCgTypeId)0; - if (access_type == 0 || slot_ty == 0 || + c_ensure_local(t, addr.v.local, addr.type); + c_local_name(addr.v.local, buf, sizeof buf); + if (access_type == 0 || addr.type == 0 || api_unalias_type(t->c, access_type) == - api_unalias_type(t->c, slot_ty)) { + api_unalias_type(t->c, addr.type)) { cbuf_puts(&t->body, buf); } else { cbuf_puts(&t->body, "(*("); @@ -822,17 +803,15 @@ static void c_emit_addr_deref(CTarget* t, Operand addr, } case OPK_INDIRECT: { CAddrMode m = c_addr_mode(addr); - /* Ensure the base reg is declared. We can't readily look up its type - * post-hoc, so reuse whatever it was first declared with. */ - if ((u32)m.base >= t->reg_cap || !t->reg_declared[m.base]) { + if ((u32)m.base >= t->local_cap || !t->local_declared[m.base]) { compiler_panic(t->c, loc, - "C target: indirect on undeclared base reg v%u", + "C target: indirect on undeclared base local v%u", (unsigned)m.base); } - if (m.index != REG_NONE && - ((u32)m.index >= t->reg_cap || !t->reg_declared[m.index])) { + if (m.index != CG_LOCAL_NONE && + ((u32)m.index >= t->local_cap || !t->local_declared[m.index])) { compiler_panic(t->c, loc, - "C target: indirect on undeclared index reg v%u", + "C target: indirect on undeclared index local v%u", (unsigned)m.index); } cbuf_puts(&t->body, "(*("); @@ -858,8 +837,10 @@ static void c_emit_lvalue_addr(CTarget* t, Operand lv, CfreeCgTypeId dst_type) { case OPK_LOCAL: cbuf_puts(&t->body, "(("); c_emit_type(t, &t->body, dst_type); - cbuf_puts(&t->body, ")&"); - c_slot_name(lv.v.frame_slot, buf, sizeof buf); + cbuf_puts(&t->body, ")"); + cbuf_puts(&t->body, "&"); + c_ensure_local(t, lv.v.local, lv.type); + c_local_name(lv.v.local, buf, sizeof buf); cbuf_puts(&t->body, buf); cbuf_puts(&t->body, ")"); return; @@ -972,14 +953,27 @@ void c_emit_prologue(CTarget* t) { /* Write `RetT name(P0, P1, ...)` (without trailing `;` or `{`) to `b`. */ static void c_emit_func_signature(CTarget* t, CBuf* b, const char* name, - CfreeCgTypeId fn_type) { + CfreeCgTypeId fn_type, + const CfreeCgTypeId* result_types, + u32 nresults) { CfreeCgTypeId ret_type = cg_type_func_ret_id(t->c, fn_type); const CgType* fty = cg_type_get(t->c, api_unalias_type(t->c, fn_type)); SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; if (!fty || fty->kind != CFREE_CG_TYPE_FUNC) { compiler_panic(t->c, loc, "C target: fn_type is not a function type"); } - c_emit_type(t, b, ret_type); + if (!result_types) { + nresults = cg_type_is_void(t->c, ret_type) ? 0u : 1u; + result_types = &ret_type; + } + if (nresults == 0) { + cbuf_puts(b, "void"); + } else if (nresults == 1) { + c_emit_type(t, b, result_types[0]); + } else { + c_ensure_tuple_typedef(t, result_types, nresults); + c_emit_tuple_type_name(t, b, result_types, nresults); + } cbuf_puts(b, " "); cbuf_puts(b, name); cbuf_puts(b, "("); @@ -1000,19 +994,19 @@ static void c_emit_func_signature(CTarget* t, CBuf* b, const char* name, cbuf_puts(b, ")"); } -void c_func_begin(CGTarget* T, const CGFuncDesc* fd) { +void c_func_begin(CgTarget* T, const CGFuncDesc* fd) { CTarget* t = (CTarget*)T; c_emit_prologue(t); t->cur_fn = fd; cbuf_reset(&t->decls); - for (u32 i = 0; i < t->reg_cap; ++i) { - t->reg_declared[i] = 0; - t->reg_type[i] = 0; + for (u32 i = 0; i < t->local_cap; ++i) { + t->local_declared[i] = 0; + t->local_type[i] = 0; } - t->nslots = 0; t->next_label = 0; + t->next_local = 0; t->next_tmp = 0; t->nscopes = 0; t->last_was_terminator = 0; @@ -1023,14 +1017,19 @@ void c_func_begin(CGTarget* T, const CGFuncDesc* fd) { /* Forward-declare so out-of-order callers and same-TU references find the * prototype regardless of definition order. */ - c_ensure_forward_decl(t, fd->sym, fd->fn_type); + c_ensure_forward_decl_with_results(t, fd->sym, fd->fn_type, fd->result_types, + fd->nresults); - c_emit_func_signature(t, &t->body, name, fd->fn_type); + c_emit_func_signature(t, &t->body, name, fd->fn_type, fd->result_types, + fd->nresults); cbuf_puts(&t->body, " {\n"); t->fn_body_start = t->body.len; } -void c_ensure_forward_decl(CTarget* t, ObjSymId sym, CfreeCgTypeId fn_type) { +static void c_ensure_forward_decl_with_results(CTarget* t, ObjSymId sym, + CfreeCgTypeId fn_type, + const CfreeCgTypeId* result_types, + u32 nresults) { Heap* h = t->c->ctx->heap; if ((u32)sym >= t->sym_forwarded_cap) { u32 newcap = t->sym_forwarded_cap ? t->sym_forwarded_cap : 16; @@ -1049,7 +1048,8 @@ void c_ensure_forward_decl(CTarget* t, ObjSymId sym, CfreeCgTypeId fn_type) { const char* name = c_sym_name(t, sym); const ObjSym* os = obj_symbol_get(t->obj, sym); if ((os && (os->kind == SK_FUNC || os->kind == SK_IFUNC)) || fn_type != 0) { - c_emit_func_signature(t, &t->forwards, name, fn_type); + c_emit_func_signature(t, &t->forwards, name, fn_type, result_types, + nresults); cbuf_puts(&t->forwards, ";\n"); } else { if (os && os->bind == SB_LOCAL) @@ -1068,7 +1068,11 @@ void c_ensure_forward_decl(CTarget* t, ObjSymId sym, CfreeCgTypeId fn_type) { } } -void c_func_end(CGTarget* T) { +void c_ensure_forward_decl(CTarget* t, ObjSymId sym, CfreeCgTypeId fn_type) { + c_ensure_forward_decl_with_results(t, sym, fn_type, NULL, 0); +} + +void c_func_end(CgTarget* T) { CTarget* t = (CTarget*)T; size_t splice_at = t->fn_body_start; size_t body_after = t->body.len; @@ -1098,65 +1102,41 @@ void c_func_end(CGTarget* T) { t->cur_fn = NULL; } -/* === frame_slot, param === */ - -FrameSlot c_frame_slot(CGTarget* T, const FrameSlotDesc* fsd) { - CTarget* t = (CTarget*)T; - if (t->nslots + 1u >= t->slot_cap) c_grow_slot_table(t, t->nslots + 2u); - /* Slot ids start at 1 (FRAME_SLOT_NONE == 0). */ - FrameSlot id = (FrameSlot)(t->nslots + 1u); - t->slot_type[t->nslots] = fsd->type; - t->nslots += 1u; - - cbuf_puts(&t->decls, " "); - c_emit_type(t, &t->decls, fsd->type); - cbuf_puts(&t->decls, " "); - char buf[24]; - c_slot_name(id, buf, sizeof buf); - cbuf_puts(&t->decls, buf); - /* See c_ensure_reg — same zero-init reasoning. */ - c_emit_zero_init(t, fsd->type); - return id; -} +/* === locals, params === */ -CGLocalStorage c_param(CGTarget* T, const CGParamDesc* pd) { +CGLocal c_param(CgTarget* T, const CGParamDesc* pd) { CTarget* t = (CTarget*)T; - CGLocalStorage st = pd->storage; - /* Allocate a frame slot for the param, then emit "slot_N = pN;". */ - FrameSlotDesc fsd; - fsd.type = pd->type; - fsd.name = pd->name; - fsd.loc = pd->loc; - fsd.size = pd->size; - fsd.align = pd->align; - fsd.kind = FS_PARAM; - fsd.pad = 0; - fsd.flags = 0; - if (pd->flags & CG_LOCAL_ADDR_TAKEN) fsd.flags |= FSF_ADDR_TAKEN; - FrameSlot slot = c_frame_slot(T, &fsd); + CGLocalDesc d; + memset(&d, 0, sizeof d); + d.type = pd->type; + d.name = pd->name; + d.loc = pd->loc; + d.size = pd->size; + d.align = pd->align; + d.flags = pd->flags; + CGLocal local = c_local(T, &d); + c_ensure_local(t, local, pd->type); char buf[24]; - c_slot_name(slot, buf, sizeof buf); + c_local_name(local, buf, sizeof buf); cbuf_puts(&t->body, " "); cbuf_puts(&t->body, buf); cbuf_puts(&t->body, " = p"); cbuf_put_u64(&t->body, (u64)pd->index); cbuf_puts(&t->body, ";\n"); - st.kind = CG_LOCAL_STORAGE_FRAME; - st.v.frame_slot = slot; - return st; + return local; } /* === load_imm, copy, binop === */ -void c_load_imm(CGTarget* T, Operand dst, i64 imm) { +void c_load_imm(CgTarget* T, Operand dst, i64 imm) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: load_imm dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: load_imm dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* The literal is emitted bare; its C type is `long long`. We can drop * the bridge cast iff the bare assignment compiles cleanly: * - integer dst: imm must fit in dst's signed range (else @@ -1174,22 +1154,22 @@ void c_load_imm(CGTarget* T, Operand dst, i64 imm) { } else { can_drop_bridge = 0; } - c_emit_reg_assign_open(t, dst.v.reg, + c_emit_local_assign_open(t, dst.v.local, can_drop_bridge ? dst.type : (CfreeCgTypeId)0); c_emit_imm_literal(t, imm); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } -void c_copy(CGTarget* T, Operand dst, Operand src) { +void c_copy(CgTarget* T, Operand dst, Operand src) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: copy dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: copy dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg, c_operand_emit_type(t, src)); + c_ensure_local(t, dst.v.local, dst.type); + c_emit_local_assign_open(t, dst.v.local, src.type); c_emit_operand(t, src); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } static const char* binop_to_c(BinOp op) { @@ -1250,24 +1230,24 @@ static BinSignCast binop_sign_kind(BinOp op, int* lhs_signed_out) { } } -void c_binop(CGTarget* T, BinOp op, Operand dst, Operand a, Operand b) { +void c_binop(CgTarget* T, BinOp op, Operand dst, Operand a, Operand b) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; const char* sym = binop_to_c(op); if (!sym) { compiler_panic(t->c, loc, "C target: unknown binop %d", (int)op); } - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: binop dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: binop dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* Pointer operands get cast to uintptr_t inside c_emit_operand_arith, * so the binop's C result type is `uintptr_t`, not the original pointer * type. Keep the bridge when dst or either operand is pointer-typed so * the assignment back to a pointer dst doesn't trip -Wint-conversion. */ int has_ptr = c_operand_is_ptr_typed(t, dst) || c_operand_is_ptr_typed(t, a) || c_operand_is_ptr_typed(t, b); - c_emit_reg_assign_open(t, dst.v.reg, has_ptr ? (CfreeCgTypeId)0 : dst.type); + c_emit_local_assign_open(t, dst.v.local, has_ptr ? (CfreeCgTypeId)0 : dst.type); int lhs_signed = 1; BinSignCast bsc = binop_sign_kind(op, &lhs_signed); switch (bsc) { @@ -1300,18 +1280,18 @@ void c_binop(CGTarget* T, BinOp op, Operand dst, Operand a, Operand b) { c_emit_operand(t, b); break; } - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } /* ===== unop ===== */ -void c_unop(CGTarget* T, UnOp op, Operand dst, Operand a) { +void c_unop(CgTarget* T, UnOp op, Operand dst, Operand a) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: unop dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: unop dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); const char* sym = NULL; switch (op) { case UO_NEG: @@ -1327,10 +1307,10 @@ void c_unop(CGTarget* T, UnOp op, Operand dst, Operand a) { default: compiler_panic(t->c, loc, "C target: unknown unop %d", (int)op); } - c_emit_reg_assign_open(t, dst.v.reg, dst.type); + c_emit_local_assign_open(t, dst.v.local, dst.type); cbuf_puts(&t->body, sym); c_emit_operand(t, a); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } /* ===== compare ops ===== */ @@ -1399,21 +1379,21 @@ static void c_emit_cmp_operands(CTarget* t, CmpOp op, Operand a, Operand b) { } } -void c_cmp(CGTarget* T, CmpOp op, Operand dst, Operand a, Operand b) { +void c_cmp(CgTarget* T, CmpOp op, Operand dst, Operand a, Operand b) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: cmp dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: cmp dst must be LOCAL"); } if (!cmp_to_c(op)) { compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* Compare result is C `int` (0/1); assigning to integer dst.type narrows * implicitly without -Wall complaint. */ - c_emit_reg_assign_open(t, dst.v.reg, dst.type); + c_emit_local_assign_open(t, dst.v.local, dst.type); c_emit_cmp_operands(t, op, a, b); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } /* ===== labels, jump, cmp_branch ===== */ @@ -1438,13 +1418,13 @@ static void c_label_name(Label l, char* out, size_t cap) { out[i] = '\0'; } -Label c_label_new(CGTarget* T) { +Label c_label_new(CgTarget* T) { CTarget* t = (CTarget*)T; t->next_label += 1; return (Label)t->next_label; } -void c_label_place(CGTarget* T, Label l) { +void c_label_place(CgTarget* T, Label l) { CTarget* t = (CTarget*)T; char buf[24]; c_label_name(l, buf, sizeof buf); @@ -1470,7 +1450,7 @@ static const char* c_scope_kw_for_label(CTarget* t, Label l) { return NULL; } -void c_jump(CGTarget* T, Label l) { +void c_jump(CgTarget* T, Label l) { CTarget* t = (CTarget*)T; if (t->last_was_terminator) return; const char* kw = c_scope_kw_for_label(t, l); @@ -1488,7 +1468,7 @@ void c_jump(CGTarget* T, Label l) { t->last_was_terminator = 1; } -void c_cmp_branch(CGTarget* T, CmpOp op, Operand a, Operand b, Label l) { +void c_cmp_branch(CgTarget* T, CmpOp op, Operand a, Operand b, Label l) { CTarget* t = (CTarget*)T; if (t->last_was_terminator) return; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; @@ -1539,7 +1519,7 @@ static void c_grow_scopes(CTarget* t, u32 needed) { t->scopes_cap = newcap; } -CGScope c_scope_begin(CGTarget* T, const CGScopeDesc* d) { +CGScope c_scope_begin(CgTarget* T, const CGScopeDesc* d) { CTarget* t = (CTarget*)T; if (t->nscopes + 1u >= t->scopes_cap) c_grow_scopes(t, t->nscopes + 2u); u32 idx = t->nscopes; @@ -1568,14 +1548,14 @@ CGScope c_scope_begin(CGTarget* T, const CGScopeDesc* d) { return (CGScope)(idx + 1u); } -void c_scope_else(CGTarget* T, CGScope s) { +void c_scope_else(CgTarget* T, CGScope s) { (void)T; (void)s; /* Public API doesn't emit SCOPE_IF; if it ever does, the frontend is * responsible for placing the else label and the break_label itself. */ } -void c_scope_end(CGTarget* T, CGScope s) { +void c_scope_end(CgTarget* T, CGScope s) { CTarget* t = (CTarget*)T; if (s == 0 || (u32)s > t->nscopes) { compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, @@ -1598,7 +1578,7 @@ void c_scope_end(CGTarget* T, CGScope s) { t->nscopes -= 1u; } -void c_break_to(CGTarget* T, CGScope s) { +void c_break_to(CgTarget* T, CGScope s) { CTarget* t = (CTarget*)T; if (s == 0 || (u32)s > t->nscopes) { compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, @@ -1607,7 +1587,7 @@ void c_break_to(CGTarget* T, CGScope s) { c_jump(T, t->scopes[s - 1u].break_label); } -void c_continue_to(CGTarget* T, CGScope s) { +void c_continue_to(CgTarget* T, CGScope s) { CTarget* t = (CTarget*)T; if (s == 0 || (u32)s > t->nscopes) { compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, @@ -1635,7 +1615,7 @@ static void c_emit_case_value(CTarget* t, CfreeCgTypeId sel_ty, u64 v) { cbuf_puts(&t->body, ":"); } -void c_switch_(CGTarget* T, const CGSwitchDesc* d) { +void c_switch_(CgTarget* T, const CGSwitchDesc* d) { CTarget* t = (CTarget*)T; /* gcc/clang ignore strategy hints and pick their own dispatch shape. */ (void)d->hint; @@ -1676,22 +1656,22 @@ void c_switch_(CGTarget* T, const CGSwitchDesc* d) { * the current function, and `goto *p;` jumps to such an address. This * is the lowering every cc1-like backend uses (and what the toy * frontend ultimately compiles to via the C target). */ -void c_load_label_addr(CGTarget* T, Operand dst, Label l) { +void c_load_label_addr(CgTarget* T, Operand dst, Label l) { CTarget* t = (CTarget*)T; char buf[24]; - if (dst.kind != OPK_REG) { + if (dst.kind != OPK_LOCAL) { compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, - "C target: load_label_addr dst must be REG"); + "C target: load_label_addr dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0); + c_ensure_local(t, dst.v.local, dst.type); + c_emit_local_assign_open(t, dst.v.local, (CfreeCgTypeId)0); cbuf_puts(&t->body, "(void*)&&"); c_label_name(l, buf, sizeof buf); cbuf_puts(&t->body, buf); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } -void c_indirect_branch(CGTarget* T, Operand addr, const Label* valid_targets, +void c_indirect_branch(CgTarget* T, Operand addr, const Label* valid_targets, u32 ntargets) { CTarget* t = (CTarget*)T; (void)valid_targets; @@ -1749,7 +1729,7 @@ static void c_grow_local_static_entries(CTarget* t, u32 want) { t->local_static_entries_cap = newcap; } -int c_local_static_data_begin(CGTarget* T, const CGLocalStaticDataDesc* desc) { +int c_local_static_data_begin(CgTarget* T, const CGLocalStaticDataDesc* desc) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; if (!t->cur_fn) { @@ -1804,7 +1784,7 @@ int c_local_static_data_begin(CGTarget* T, const CGLocalStaticDataDesc* desc) { return 1; } -void c_local_static_data_write(CGTarget* T, const u8* data, u64 len) { +void c_local_static_data_write(CgTarget* T, const u8* data, u64 len) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; if (!t->local_static_active || !len) return; @@ -1820,7 +1800,7 @@ void c_local_static_data_write(CGTarget* T, const u8* data, u64 len) { t->local_static_offset += len; } -void c_local_static_data_label_addr(CGTarget* T, Label target, i64 addend, +void c_local_static_data_label_addr(CgTarget* T, Label target, i64 addend, u32 width, u32 address_space) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; @@ -1876,7 +1856,7 @@ static void c_emit_local_static_label_expr(CTarget* t, cbuf_puts(&t->decls, ")"); } -void c_local_static_data_end(CGTarget* T) { +void c_local_static_data_end(CgTarget* T) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; if (!t->local_static_active) return; @@ -1919,65 +1899,47 @@ void c_local_static_data_end(CGTarget* T) { /* ===== local, local_addr ===== */ -CGLocalStorage c_local(CGTarget* T, const CGLocalDesc* d) { - /* Map every local to a frame slot. virtual_regs=1 means we never spill, - * and at the C level frame slots and regs both look like plain locals, - * so there's nothing to gain from picking REG storage for non-addressable - * locals. The uniform FRAME mapping also lets local_addr just emit - * `&slot_N` without bookkeeping. */ - FrameSlotDesc fsd; - fsd.type = d->type; - fsd.name = d->name; - fsd.loc = d->loc; - fsd.size = d->size; - fsd.align = d->align; - fsd.kind = FS_LOCAL; - fsd.pad = 0; - fsd.flags = 0; - if (d->flags & CG_LOCAL_ADDR_TAKEN) fsd.flags |= FSF_ADDR_TAKEN; - FrameSlot slot = c_frame_slot(T, &fsd); - CGLocalStorage st; - st.kind = CG_LOCAL_STORAGE_FRAME; - st.pad[0] = st.pad[1] = st.pad[2] = 0; - st.v.frame_slot = slot; - return st; -} - -void c_local_addr(CGTarget* T, Operand dst, const CGLocalDesc* d, - CGLocalStorage s) { +CGLocal c_local(CgTarget* T, const CGLocalDesc* d) { + CTarget* t = (CTarget*)T; + t->next_local += 1u; + if (t->next_local == CG_LOCAL_NONE) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: semantic local id exhausted"); + return CG_LOCAL_NONE; + } + c_ensure_local(t, (CGLocal)t->next_local, d->type); + return (CGLocal)t->next_local; +} + +void c_local_addr(CgTarget* T, Operand dst, const CGLocalDesc* d, + CGLocal s) { CTarget* t = (CTarget*)T; (void)d; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: local_addr dst must be REG"); - } - if (s.kind != CG_LOCAL_STORAGE_FRAME) { - compiler_panic(t->c, loc, "C target: local_addr expects frame storage"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: local_addr dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); + c_ensure_local(t, s, d->type); char buf[24]; - /* RHS is `&slot_N` — a pointer to the slot's storage type. dst.type is - * declared as void* / a typed pointer; leave the bridge to the caller's - * hint logic. We pass 0 so the bridge stays — `&slot_N` would otherwise - * be `RECORD_TYPE*` and not match dst.type (typically void*). */ - c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0); + c_emit_local_assign_open(t, dst.v.local, (CfreeCgTypeId)0); cbuf_puts(&t->body, "&"); - c_slot_name(s.v.frame_slot, buf, sizeof buf); + c_local_name(s, buf, sizeof buf); cbuf_puts(&t->body, buf); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } /* ===== convert ===== */ -void c_convert(CGTarget* T, ConvKind k, Operand dst, Operand src) { +void c_convert(CgTarget* T, ConvKind k, Operand dst, Operand src) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: convert dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: convert dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); char buf[24]; - c_reg_name(dst.v.reg, buf, sizeof buf); + c_local_name(dst.v.local, buf, sizeof buf); if (k == CV_BITCAST) { /* Same-size reinterpretation. Use __builtin_memcpy through a temp so @@ -2000,6 +1962,17 @@ void c_convert(CGTarget* T, ConvKind k, Operand dst, Operand src) { return; } + if (c_type_is_bool(t, dst.type)) { + c_emit_local_assign_open(t, dst.v.local, dst.type); + cbuf_puts(&t->body, "("); + c_emit_type(t, &t->body, dst.type); + cbuf_puts(&t->body, ")("); + c_emit_operand(t, src); + cbuf_puts(&t->body, " != 0)"); + c_emit_local_assign_close(t); + return; + } + /* Integer and float conversions: a C cast does the right thing once the * source is first cast to the appropriate signedness (for SEXT/ZEXT and * ITOF_S/U / FTOI_S/U). */ @@ -2016,7 +1989,7 @@ void c_convert(CGTarget* T, ConvKind k, Operand dst, Operand src) { } /* The cast `(dst.type)(src)` produces a value of dst.type. */ - c_emit_reg_assign_open(t, dst.v.reg, dst.type); + c_emit_local_assign_open(t, dst.v.local, dst.type); cbuf_puts(&t->body, "("); c_emit_type(t, &t->body, dst.type); cbuf_puts(&t->body, ")"); @@ -2033,33 +2006,73 @@ void c_convert(CGTarget* T, ConvKind k, Operand dst, Operand src) { /* TRUNC / FTOI / ITOF / FEXT / FTRUNC: rely on C cast semantics. */ c_emit_operand(t, src); } - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } /* === call === */ -/* Emit an argument expression for a call. The CGABIValue's `type` is the - * source (front-end) type; for aggregates the storage is by-address. We - * dereference such addresses to recover the value the C callee expects. - * - * The CG also routes scalar-typed values through OPK_LOCAL (frame-slot - * params land there in this target). c_emit_operand already does the right - * thing for OPK_LOCAL when the slot's declared type matches; for aggregate - * args coming from a frame slot of the aggregate type, the slot was - * declared with the aggregate type, and `slot_N` works as a struct value. - */ -static void c_emit_call_arg(CTarget* t, const CGABIValue* v) { - Operand op = v->storage; - /* If the source type is an aggregate but the storage is some address - * carrier, the emission already produces the deref via c_emit_operand's - * OPK_LOCAL/INDIRECT paths. CGABIValue.type carries the front-end type; - * use that to override the operand type so the deref expression uses the - * aggregate type. */ - op.type = v->type; - c_emit_operand(t, op); +static CfreeCgTypeId c_call_arg_type(CTarget* t, const CgType* fty, + const CGCallDesc* d, u32 i) { + if (i < fty->func.nparams) return fty->func.params[i].type; + return c_local_type_or_panic(t, d->args[i]); } -const char* c_tail_call_unrealizable_reason(CGTarget* T, +static void c_emit_call_arg(CTarget* t, const CgType* fty, + const CGCallDesc* d, u32 i) { + CfreeCgTypeId ty = c_call_arg_type(t, fty, d, i); + c_ensure_local(t, d->args[i], ty); + c_emit_operand(t, c_op_local(d->args[i], ty)); +} + +static void c_emit_call_expr(CTarget* t, const CgType* fty, + const CGCallDesc* d, + const CfreeCgTypeId* result_types) { + if (d->callee.kind == OPK_GLOBAL) { + c_ensure_forward_decl_with_results( + t, d->callee.v.global.sym, d->fn_type, + d->nresults > 1 ? result_types : NULL, d->nresults); + cbuf_puts(&t->body, c_sym_name(t, d->callee.v.global.sym)); + } else if (d->callee.kind == OPK_LOCAL) { + cbuf_puts(&t->body, "(("); + if (d->nresults > 1) { + c_emit_tuple_type_name(t, &t->body, result_types, d->nresults); + cbuf_puts(&t->body, " (*)("); + if (fty->func.nparams == 0 && !fty->func.abi_variadic) { + cbuf_puts(&t->body, "void"); + } else { + for (u32 i = 0; i < fty->func.nparams; ++i) { + if (i > 0) cbuf_puts(&t->body, ", "); + c_emit_type(t, &t->body, fty->func.params[i].type); + } + if (fty->func.abi_variadic) { + if (fty->func.nparams > 0) cbuf_puts(&t->body, ", "); + cbuf_puts(&t->body, "..."); + } + } + cbuf_puts(&t->body, ")"); + } else { + const char* fp = c_typedef_name(t, d->fn_type); + c_ensure_typedef(t, d->fn_type); + cbuf_puts(&t->body, fp); + } + cbuf_puts(&t->body, ")"); + c_emit_operand(t, d->callee); + cbuf_puts(&t->body, ")"); + } else { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: callee kind %d not supported", + (int)d->callee.kind); + } + + cbuf_puts(&t->body, "("); + for (u32 i = 0; i < d->nargs; ++i) { + if (i > 0) cbuf_puts(&t->body, ", "); + c_emit_call_arg(t, fty, d, i); + } + cbuf_puts(&t->body, ")"); +} + +const char* c_tail_call_unrealizable_reason(CgTarget* T, const CGCallDesc* d) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; @@ -2088,132 +2101,114 @@ const char* c_tail_call_unrealizable_reason(CGTarget* T, return NULL; } -void c_call(CGTarget* T, const CGCallDesc* d) { +void c_call(CgTarget* T, const CGCallDesc* d) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + Heap* h = t->c->ctx->heap; const CgType* fty = cg_type_get(t->c, api_unalias_type(t->c, d->fn_type)); if (!fty || fty->kind != CFREE_CG_TYPE_FUNC) { compiler_panic(t->c, loc, "C target: call: bad fn_type"); } CfreeCgTypeId ret_type = fty->func.ret; - int fn_returns = !cg_type_is_void(t->c, ret_type); int is_tail = (d->flags & CG_CALL_TAIL) != 0; - /* When tail-called, CG sets ret.storage to a void IMM and will not emit a - * separate ret. Clang's C statement attribute gives us a checked musttail - * lowering while still spelling the source-level value forwarding exactly. */ + CfreeCgTypeId* result_types = NULL; + if (d->nresults > 1) { + result_types = (CfreeCgTypeId*)h->alloc( + h, sizeof(CfreeCgTypeId) * d->nresults, _Alignof(CfreeCgTypeId)); + if (!result_types) { + compiler_panic(t->c, loc, "C target: out of memory"); + return; + } + for (u32 i = 0; i < d->nresults; ++i) + result_types[i] = c_local_type_or_panic(t, d->results[i]); + c_ensure_tuple_typedef(t, result_types, d->nresults); + } - /* === Emit the LHS / open the call statement === */ if (is_tail) { cbuf_puts(&t->body, " __attribute__((musttail)) return "); - } else if (fn_returns) { - Operand rs = d->ret.storage; - if (rs.kind == OPK_REG) { - c_ensure_reg(t, rs.v.reg, ret_type, (RegClass)rs.cls); - /* Callee returns ret_type; the assignment is direct. */ - c_emit_reg_assign_open(t, rs.v.reg, ret_type); - } else if (rs.kind == OPK_LOCAL) { - char buf[24]; - c_slot_name(rs.v.frame_slot, buf, sizeof buf); - cbuf_puts(&t->body, " "); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, " = "); - } else if (rs.kind == OPK_INDIRECT) { - cbuf_puts(&t->body, " "); - c_emit_addr_deref(t, rs, ret_type); - cbuf_puts(&t->body, " = "); - } else if (rs.kind == OPK_IMM && cg_type_is_void(t->c, rs.type)) { - /* Result discarded by CG; emit a statement-only call. */ - cbuf_puts(&t->body, " "); - } else { - compiler_panic(t->c, loc, - "C target: call ret storage kind %d not supported", - (int)rs.kind); - } - } else { + c_emit_call_expr(t, fty, d, result_types); + cbuf_puts(&t->body, ";\n"); + t->last_was_terminator = 1; + } else if (d->nresults == 0) { cbuf_puts(&t->body, " "); - } - - /* === Emit the callee expression === */ - if (d->callee.kind == OPK_GLOBAL) { - c_ensure_forward_decl(t, d->callee.v.global.sym, d->fn_type); - cbuf_puts(&t->body, c_sym_name(t, d->callee.v.global.sym)); - } else if (d->callee.kind == OPK_REG) { - /* Indirect call: cast the (void*) reg to the right function-pointer - * type and call. The typedef machinery makes a function-pointer typedef - * for d->fn_type available as __ty_<id>. */ - const char* fp = c_typedef_name(t, d->fn_type); - c_ensure_typedef(t, d->fn_type); - cbuf_puts(&t->body, "(("); - cbuf_puts(&t->body, fp); - cbuf_puts(&t->body, ")"); - c_emit_operand(t, d->callee); - cbuf_puts(&t->body, ")"); - } else { - compiler_panic(t->c, loc, "C target: callee kind %d not supported", - (int)d->callee.kind); - } - - /* === Emit args === */ - cbuf_puts(&t->body, "("); - for (u32 i = 0; i < d->nargs; ++i) { - if (i > 0) cbuf_puts(&t->body, ", "); - c_emit_call_arg(t, &d->args[i]); - } - cbuf_puts(&t->body, ")"); - - /* === Close the statement === */ - if (!is_tail && fn_returns && d->ret.storage.kind == OPK_REG) { - c_emit_reg_assign_close(t); + c_emit_call_expr(t, fty, d, result_types); + cbuf_puts(&t->body, ";\n"); + } else if (d->nresults == 1) { + c_ensure_local(t, d->results[0], ret_type); + c_emit_local_assign_open(t, d->results[0], ret_type); + c_emit_call_expr(t, fty, d, result_types); + c_emit_local_assign_close(t); } else { + char tmp[32]; + u32 tmp_id = t->next_tmp++; + snprintf(tmp, sizeof tmp, "__cfree_call_%u", (unsigned)tmp_id); + cbuf_puts(&t->body, " "); + c_emit_tuple_type_name(t, &t->body, result_types, d->nresults); + cbuf_putc(&t->body, ' '); + cbuf_puts(&t->body, tmp); + cbuf_puts(&t->body, " = "); + c_emit_call_expr(t, fty, d, result_types); cbuf_puts(&t->body, ";\n"); + for (u32 i = 0; i < d->nresults; ++i) { + c_ensure_local(t, d->results[i], result_types[i]); + c_emit_local_assign_open(t, d->results[i], result_types[i]); + cbuf_puts(&t->body, tmp); + cbuf_puts(&t->body, ".f"); + cbuf_put_u64(&t->body, (u64)i); + c_emit_local_assign_close(t); + } } - if (is_tail) t->last_was_terminator = 1; + if (result_types) h->free(h, result_types, + sizeof(CfreeCgTypeId) * d->nresults); } /* === load / store === */ -void c_load(CGTarget* T, Operand dst, Operand addr, MemAccess m) { +void c_load(CgTarget* T, Operand dst, Operand addr, MemAccess m) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: load dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: load dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); CfreeCgTypeId access_ty = m.type ? m.type : dst.type; + if (c_type_is_aggregate(t, access_ty) && !c_type_is_aggregate(t, dst.type)) + access_ty = dst.type; /* The deref `*(access_ty*)addr` produces a value of access_ty. */ - c_emit_reg_assign_open(t, dst.v.reg, access_ty); + c_emit_local_assign_open(t, dst.v.local, access_ty); c_emit_addr_deref(t, addr, access_ty); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } -void c_store(CGTarget* T, Operand addr, Operand src, MemAccess m) { +void c_store(CgTarget* T, Operand addr, Operand src, MemAccess m) { CTarget* t = (CTarget*)T; CfreeCgTypeId access_ty = m.type ? m.type : src.type; + if (c_type_is_aggregate(t, access_ty) && !c_type_is_aggregate(t, src.type)) + access_ty = src.type; cbuf_puts(&t->body, " "); c_emit_addr_deref(t, addr, access_ty); - /* CG reuses reg ids across types; the src reg's C declaration may differ - * from access_ty. c_emit_operand_as bridges through uintptr_t so int/ptr + /* c_emit_operand_as bridges int/ptr crossings through uintptr_t so * roundtrips don't trip `-Wint-conversion`. */ cbuf_puts(&t->body, " = "); c_emit_operand_as(t, src, access_ty); cbuf_puts(&t->body, ";\n"); } -void c_addr_of(CGTarget* T, Operand dst, Operand lv) { +void c_addr_of(CgTarget* T, Operand dst, Operand lv) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: addr_of dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: addr_of dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* `c_emit_lvalue_addr` casts its output to dst.type already. */ - c_emit_reg_assign_open(t, dst.v.reg, dst.type); + c_emit_local_assign_open(t, dst.v.local, dst.type); c_emit_lvalue_addr(t, lv, dst.type); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } -void c_ret(CGTarget* T, const CGABIValue* val) { +void c_ret(CgTarget* T, const CGLocal* values, u32 nvalues) { CTarget* t = (CTarget*)T; /* Already-terminated block: this ret is unreachable (the frontend's * defensive `return 0;` epilogue lands here right after a user return). */ @@ -2222,35 +2217,46 @@ void c_ret(CGTarget* T, const CGABIValue* val) { * a non-void function that's unreachable; emitting a bare `return;` would * trip -Wreturn-type. Spell it as `__builtin_unreachable()` so the host C * compiler sees the path is dead without us inventing a fake value. */ - if (!val && t->cur_fn) { - CfreeCgTypeId rt = cg_type_func_ret_id(t->c, t->cur_fn->fn_type); - if (rt && !cg_type_is_void(t->c, rt)) { + if (nvalues == 0 && t->cur_fn) { + if (t->cur_fn->nresults != 0) { cbuf_puts(&t->body, " __builtin_unreachable();\n"); t->last_was_terminator = 1; return; } } cbuf_puts(&t->body, " return"); - if (val) { + if (nvalues == 1) { cbuf_puts(&t->body, " "); - /* The function's declared C return type and the operand's underlying C - * type may differ: CG reuses Reg ids across types, so the C local backing - * `val->storage` was declared at first sighting and may not match. For - * scalar return types, bridge through (ret_type)(uintptr_t)(op) so - * -Wint-conversion doesn't trip; for aggregates we trust c_emit_operand - * (it already deref-casts via the slot type). */ CfreeCgTypeId ret_type = t->cur_fn - ? cg_type_func_ret_id(t->c, t->cur_fn->fn_type) + ? t->cur_fn->result_types[0] : (CfreeCgTypeId)0; const CgType* rty = ret_type ? cg_type_get(t->c, api_unalias_type(t->c, ret_type)) : NULL; int is_aggregate = rty && (rty->kind == CFREE_CG_TYPE_RECORD || rty->kind == CFREE_CG_TYPE_ARRAY); if (ret_type && !is_aggregate) { - c_emit_operand_as(t, val->storage, ret_type); + CfreeCgTypeId value_ty = c_local_type_or_panic(t, values[0]); + c_emit_operand_as(t, c_op_local(values[0], value_ty), ret_type); } else { - c_emit_operand(t, val->storage); + c_emit_operand(t, c_op_local(values[0], ret_type)); + } + } else if (nvalues > 1) { + const CfreeCgTypeId* result_types = + t->cur_fn ? t->cur_fn->result_types : NULL; + if (!result_types || t->cur_fn->nresults != nvalues) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: multi-return shape mismatch"); + return; + } + c_ensure_tuple_typedef(t, result_types, nvalues); + cbuf_puts(&t->body, " ("); + c_emit_tuple_type_name(t, &t->body, result_types, nvalues); + cbuf_puts(&t->body, "){"); + for (u32 i = 0; i < nvalues; ++i) { + if (i > 0) cbuf_puts(&t->body, ", "); + c_emit_operand(t, c_op_local(values[i], result_types[i])); } + cbuf_puts(&t->body, "}"); } cbuf_puts(&t->body, ";\n"); t->last_was_terminator = 1; @@ -2271,7 +2277,7 @@ void c_ret(CGTarget* T, const CGABIValue* val) { * * The emitted decl serves as the alias definition AND a forward prototype * for callers, so we mark sym_forwarded to dedup against a later c_call. */ -void c_alias(CGTarget* T, ObjSymId alias_sym, ObjSymId target_sym, +void c_alias(CgTarget* T, ObjSymId alias_sym, ObjSymId target_sym, CfreeCgTypeId type) { CTarget* t = (CTarget*)T; Heap* h = t->c->ctx->heap; @@ -2297,7 +2303,7 @@ void c_alias(CGTarget* T, ObjSymId alias_sym, ObjSymId target_sym, if (t->c->target.obj != CFREE_OBJ_MACHO) { /* Attribute form. Works for both function and object aliases on ELF * and PE/COFF. */ - c_emit_func_signature(t, &t->forwards, alias_name, type); + c_emit_func_signature(t, &t->forwards, alias_name, type, NULL, 0); cbuf_puts(&t->forwards, " __attribute__((alias(\""); cbuf_puts(&t->forwards, target_name); cbuf_puts(&t->forwards, "\")));\n"); @@ -2314,7 +2320,7 @@ void c_alias(CGTarget* T, ObjSymId alias_sym, ObjSymId target_sym, * via c_func_begin). Also dedup that. */ c_ensure_forward_decl(t, target_sym, type); /* `static`? No — alias must be externally visible. */ - c_emit_func_signature(t, &t->forwards, alias_name, type); + c_emit_func_signature(t, &t->forwards, alias_name, type, NULL, 0); cbuf_puts(&t->forwards, " { "); CfreeCgTypeId ret_type = cg_type_func_ret_id(t->c, type); if (!cg_type_is_void(t->c, ret_type)) cbuf_puts(&t->forwards, "return "); @@ -2381,7 +2387,7 @@ static const char* c_overflow_builtin(IntrinKind k) { } } -void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, +void c_intrinsic(CgTarget* T, IntrinKind k, Operand* dsts, u32 ndst, const Operand* args, u32 narg) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; @@ -2402,22 +2408,22 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, return; } case INTRIN_ASSUME_ALIGNED: { - /* dsts[0] is the result reg (pointer); args = (ptr, align [, ofs]) */ + /* dsts[0] is the result local (pointer); args = (ptr, align [, ofs]) */ if (ndst != 1) { compiler_panic(t->c, loc, "C target: assume_aligned: expected 1 dst, got %u", (unsigned)ndst); } - c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); + c_ensure_local(t, dsts[0].v.local, dsts[0].type); /* Returns void*; bridge to dst pointer type. */ - c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0); + c_emit_local_assign_open(t, dsts[0].v.local, (CfreeCgTypeId)0); cbuf_puts(&t->body, "__builtin_assume_aligned("); for (u32 i = 0; i < narg; ++i) { if (i > 0) cbuf_puts(&t->body, ", "); c_emit_operand(t, args[i]); } cbuf_puts(&t->body, ")"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); return; } case INTRIN_EXPECT: { @@ -2427,15 +2433,15 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, "C target: expect: bad shape (ndst=%u narg=%u)", (unsigned)ndst, (unsigned)narg); } - c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); + c_ensure_local(t, dsts[0].v.local, dsts[0].type); /* Returns `long`; dst.type may be a narrower int — keep the bridge. */ - c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0); + c_emit_local_assign_open(t, dsts[0].v.local, (CfreeCgTypeId)0); cbuf_puts(&t->body, "__builtin_expect((long)"); c_emit_operand(t, args[0]); cbuf_puts(&t->body, ", (long)"); c_emit_operand(t, args[1]); cbuf_puts(&t->body, ")"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); return; } case INTRIN_POPCOUNT: @@ -2455,15 +2461,15 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, compiler_panic(t->c, loc, "C target: bit-intrin width %u unsupported", (unsigned)w); } - c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); + c_ensure_local(t, dsts[0].v.local, dsts[0].type); /* __builtin_popcount/ctz/clz return `int`; bswap returns its input * type. Narrow to dst.type via the bridge. */ - c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0); + c_emit_local_assign_open(t, dsts[0].v.local, (CfreeCgTypeId)0); cbuf_puts(&t->body, fn); cbuf_puts(&t->body, "("); c_emit_operand(t, args[0]); cbuf_puts(&t->body, ")"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); return; } case INTRIN_MEMCPY: @@ -2478,7 +2484,7 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, for (u32 i = 0; i < narg; ++i) { if (i > 0) cbuf_puts(&t->body, ", "); /* The pointer operands (dst for all three; src for mem{cpy,move}) - * may be typed as a plain integer reg when they come from address + * may be typed as a plain integer local when they come from address * arithmetic, which the C target declares as int64_t. The builtins * take void*, so cast explicitly to avoid -Wint-conversion. */ int is_ptr_arg = @@ -2495,12 +2501,12 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, case INTRIN_USUB_OVERFLOW: case INTRIN_SMUL_OVERFLOW: case INTRIN_UMUL_OVERFLOW: { - /* dsts[0] = value reg, dsts[1] = i1 overflow flag. + /* dsts[0] = value local, dsts[1] = i1 overflow flag. * * Signedness comes from the intrinsic kind, but cfree's CG int type * is width-only and the C target declares every result as a signed * fixed-width (int{8,16,32,64}_t). __builtin_*_overflow keys its - * overflow check on the result type, so passing the signed reg + * overflow check on the result type, so passing the signed local * directly makes a UADD test as if it were signed and miss true * unsigned overflow. Wrap the call in a block with a scratch result * of the right signedness and copy it back through the int/uint @@ -2512,11 +2518,11 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, (k == INTRIN_UADD_OVERFLOW || k == INTRIN_USUB_OVERFLOW || k == INTRIN_UMUL_OVERFLOW); const char* fn = c_overflow_builtin(k); - c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); - c_ensure_reg(t, dsts[1].v.reg, dsts[1].type, (RegClass)dsts[1].cls); + c_ensure_local(t, dsts[0].v.local, dsts[0].type); + c_ensure_local(t, dsts[1].v.local, dsts[1].type); char vbuf[24], obuf[24]; - c_reg_name(dsts[0].v.reg, vbuf, sizeof vbuf); - c_reg_name(dsts[1].v.reg, obuf, sizeof obuf); + c_local_name(dsts[0].v.local, vbuf, sizeof vbuf); + c_local_name(dsts[1].v.local, obuf, sizeof obuf); u32 w = c_int_width_for_signedness(t, dsts[0].type); const char* sty = c_int_type_name_for_width(w, !is_unsigned); if (!sty) { @@ -2552,13 +2558,13 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, if (ndst != 1 || narg != 1) { compiler_panic(t->c, loc, "C target: setjmp: bad shape"); } - c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); + c_ensure_local(t, dsts[0].v.local, dsts[0].type); /* setjmp returns `int`; bridge to dst.type. */ - c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0); + c_emit_local_assign_open(t, dsts[0].v.local, (CfreeCgTypeId)0); cbuf_puts(&t->body, "setjmp(*(jmp_buf*)("); c_emit_operand(t, args[0]); cbuf_puts(&t->body, "))"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); return; } case INTRIN_LONGJMP: { @@ -2579,15 +2585,15 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, /* === alloca === */ -void c_alloca(CGTarget* T, Operand dst, Operand size, u32 align) { +void c_alloca(CgTarget* T, Operand dst, Operand size, u32 align) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: alloca dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: alloca dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* __builtin_alloca returns `void*`; dst.type is typically void* too. */ - c_emit_reg_assign_open(t, dst.v.reg, dst.type); + c_emit_local_assign_open(t, dst.v.local, dst.type); if (align > 1) { /* gcc has __builtin_alloca_with_align taking bits, not bytes. */ cbuf_puts(&t->body, "__builtin_alloca_with_align("); @@ -2600,16 +2606,16 @@ void c_alloca(CGTarget* T, Operand dst, Operand size, u32 align) { c_emit_operand(t, size); cbuf_puts(&t->body, ")"); } - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } /* === varargs === * * The C-target va_list is the host toolchain's `va_list` from <stdarg.h>. - * The first arg of all va_* is `ap_addr` — the address of the va_list slot. + * The first arg of all va_* is `ap_addr` - the address of the va_list local. * We deref to get the va_list lvalue C's macros expect. */ -void c_va_start(CGTarget* T, Operand ap_addr) { +void c_va_start(CgTarget* T, Operand ap_addr) { CTarget* t = (CTarget*)T; t->need_stdarg = 1; /* va_start needs the "last named parameter". CG doesn't pass that to the @@ -2631,14 +2637,14 @@ void c_va_start(CGTarget* T, Operand ap_addr) { cbuf_puts(&t->body, ");\n"); } -void c_va_end(CGTarget* T, Operand ap_addr) { +void c_va_end(CgTarget* T, Operand ap_addr) { CTarget* t = (CTarget*)T; cbuf_puts(&t->body, " __builtin_va_end(*(va_list*)("); c_emit_operand(t, ap_addr); cbuf_puts(&t->body, "));\n"); } -void c_va_copy(CGTarget* T, Operand dst_addr, Operand src_addr) { +void c_va_copy(CgTarget* T, Operand dst_addr, Operand src_addr) { CTarget* t = (CTarget*)T; cbuf_puts(&t->body, " __builtin_va_copy(*(va_list*)("); c_emit_operand(t, dst_addr); @@ -2647,26 +2653,26 @@ void c_va_copy(CGTarget* T, Operand dst_addr, Operand src_addr) { cbuf_puts(&t->body, "));\n"); } -void c_va_arg(CGTarget* T, Operand dst, Operand ap_addr, CfreeCgTypeId ty) { +void c_va_arg(CgTarget* T, Operand dst, Operand ap_addr, CfreeCgTypeId ty) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: va_arg dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: va_arg dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* __builtin_va_arg yields a value of `ty`. */ - c_emit_reg_assign_open(t, dst.v.reg, ty); + c_emit_local_assign_open(t, dst.v.local, ty); cbuf_puts(&t->body, "__builtin_va_arg(*(va_list*)("); c_emit_operand(t, ap_addr); cbuf_puts(&t->body, "), "); c_emit_type(t, &t->body, ty); cbuf_puts(&t->body, ")"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } /* === copy_bytes / set_bytes === */ -void c_copy_bytes(CGTarget* T, Operand dst_addr, Operand src_addr, +void c_copy_bytes(CgTarget* T, Operand dst_addr, Operand src_addr, AggregateAccess m) { CTarget* t = (CTarget*)T; c_assert_no_index(t, dst_addr, "copy_bytes dst"); @@ -2675,23 +2681,59 @@ void c_copy_bytes(CGTarget* T, Operand dst_addr, Operand src_addr, * int64_t); __builtin_memcpy takes void*, so cast to avoid * -Wint-conversion. */ cbuf_puts(&t->body, " __builtin_memcpy((void*)"); - c_emit_operand(t, dst_addr); + c_emit_copy_addr(t, dst_addr); cbuf_puts(&t->body, ", (void*)"); - c_emit_operand(t, src_addr); + c_emit_copy_addr(t, src_addr); cbuf_puts(&t->body, ", "); cbuf_put_u64(&t->body, (u64)m.size); cbuf_puts(&t->body, ");\n"); } -void c_set_bytes(CGTarget* T, Operand dst_addr, Operand byte_value, +static void c_emit_copy_addr(CTarget* t, Operand addr) { + char buf[24]; + SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; + switch (addr.kind) { + case OPK_LOCAL: + c_ensure_local(t, addr.v.local, addr.type); + if (c_operand_is_ptr_typed(t, addr)) { + c_emit_operand(t, addr); + } else { + cbuf_putc(&t->body, '&'); + c_local_name(addr.v.local, buf, sizeof buf); + cbuf_puts(&t->body, buf); + } + return; + case OPK_GLOBAL: { + obj_sym_mark_referenced(t->obj, addr.v.global.sym); + cbuf_puts(&t->body, "((char*)&"); + cbuf_puts(&t->body, c_sym_name(t, addr.v.global.sym)); + if (addr.v.global.addend != 0) { + cbuf_puts(&t->body, " + "); + cbuf_put_i64(&t->body, addr.v.global.addend); + } + cbuf_putc(&t->body, ')'); + return; + } + case OPK_INDIRECT: + c_emit_indirect_addr_expr(t, c_addr_mode(addr)); + return; + default: + compiler_panic(t->c, loc, + "C target: copy_bytes address operand kind %d not " + "supported", + (int)addr.kind); + } +} + +void c_set_bytes(CgTarget* T, Operand dst_addr, Operand byte_value, AggregateAccess m) { CTarget* t = (CTarget*)T; c_assert_no_index(t, dst_addr, "set_bytes dst"); - /* dst may be a plain integer reg from address arithmetic (declared + /* dst may be a plain integer local from address arithmetic (declared * int64_t); __builtin_memset takes void*, so cast to avoid * -Wint-conversion. */ cbuf_puts(&t->body, " __builtin_memset((void*)"); - c_emit_operand(t, dst_addr); + c_emit_copy_addr(t, dst_addr); cbuf_puts(&t->body, ", (int)"); c_emit_operand(t, byte_value); cbuf_puts(&t->body, ", "); @@ -2705,19 +2747,19 @@ void c_set_bytes(CGTarget* T, Operand dst_addr, Operand byte_value, * during c_emit_data, and tls_addr_of spells `((char*)&name + addend)` with * the requested pointer type. The host C compiler picks the TLS model. */ -void c_tls_addr_of(CGTarget* T, Operand dst, ObjSymId sym, i64 addend); +void c_tls_addr_of(CgTarget* T, Operand dst, ObjSymId sym, i64 addend); -void c_tls_addr_of(CGTarget* T, Operand dst, ObjSymId sym, i64 addend) { +void c_tls_addr_of(CgTarget* T, Operand dst, ObjSymId sym, i64 addend) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: tls_addr_of dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: tls_addr_of dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); const char* nm = c_sym_name(t, sym); /* RHS spells `(char*)&sym + addend` — pointer type that may not match * dst.type; keep the bridge to cast through cleanly. */ - c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0); + c_emit_local_assign_open(t, dst.v.local, (CfreeCgTypeId)0); cbuf_puts(&t->body, "((char*)&"); cbuf_puts(&t->body, nm); if (addend != 0) { @@ -2725,7 +2767,7 @@ void c_tls_addr_of(CGTarget* T, Operand dst, ObjSymId sym, i64 addend) { cbuf_put_i64(&t->body, addend); } cbuf_puts(&t->body, ")"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } /* === bitfields === @@ -2737,8 +2779,8 @@ void c_tls_addr_of(CGTarget* T, Operand dst, ObjSymId sym, i64 addend) { * usual address-deref path), which sidesteps the C bitfield ABI ambiguity * entirely. */ -void c_bitfield_load(CGTarget* T, Operand dst, Operand addr, BitFieldAccess bf); -void c_bitfield_store(CGTarget* T, Operand addr, Operand src, +void c_bitfield_load(CgTarget* T, Operand dst, Operand addr, BitFieldAccess bf); +void c_bitfield_store(CgTarget* T, Operand addr, Operand src, BitFieldAccess bf); /* Returns the unsigned C integer type matching the storage-unit byte size. */ @@ -2766,7 +2808,8 @@ static void c_emit_lvalue_addr_expr_raw(CTarget* t, Operand addr) { switch (addr.kind) { case OPK_LOCAL: cbuf_putc(&t->body, '&'); - c_slot_name(addr.v.frame_slot, buf, sizeof buf); + c_ensure_local(t, addr.v.local, addr.type); + c_local_name(addr.v.local, buf, sizeof buf); cbuf_puts(&t->body, buf); return; case OPK_GLOBAL: { @@ -2783,9 +2826,9 @@ static void c_emit_lvalue_addr_expr_raw(CTarget* t, Operand addr) { } case OPK_INDIRECT: { CAddrMode m = c_addr_mode(addr); - if ((u32)m.base >= t->reg_cap || !t->reg_declared[m.base]) { + if ((u32)m.base >= t->local_cap || !t->local_declared[m.base]) { compiler_panic(t->c, loc, - "C target: bitfield on undeclared base reg v%u", + "C target: bitfield on undeclared base local v%u", (unsigned)m.base); } cbuf_putc(&t->body, '('); @@ -2815,22 +2858,22 @@ static void c_bf_storage_lvalue(CTarget* t, Operand addr, BitFieldAccess bf, cbuf_puts(&t->body, "))"); } -void c_bitfield_load(CGTarget* T, Operand dst, Operand addr, +void c_bitfield_load(CgTarget* T, Operand dst, Operand addr, BitFieldAccess bf) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: bitfield_load dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: bitfield_load dst must be LOCAL"); } c_assert_no_index(t, addr, "bitfield_load"); if (bf.bit_width == 0) { /* Zero-width — layout barrier only; nothing to load. Emit a no-op - * assignment so the dst reg still gets a defined value. */ - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + * assignment so the dst local still gets a defined value. */ + c_ensure_local(t, dst.v.local, dst.type); /* RHS is the literal 0 (int); narrowing to dst.type is fine. */ - c_emit_reg_assign_open(t, dst.v.reg, dst.type); + c_emit_local_assign_open(t, dst.v.local, dst.type); cbuf_puts(&t->body, "0"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); return; } const char* sty = c_bf_storage_type(bf.storage.size); @@ -2838,10 +2881,10 @@ void c_bitfield_load(CGTarget* T, Operand dst, Operand addr, compiler_panic(t->c, loc, "C target: bitfield storage size %u unsupported", (unsigned)bf.storage.size); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* RHS is the storage-width int from the mask/shift expression; bridge * to dst.type so any signedness/width adjustment is explicit. */ - c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0); + c_emit_local_assign_open(t, dst.v.local, (CfreeCgTypeId)0); /* For signed bitfields, sign-extend via the standard shift-up / arith-shift- * down trick on a signed integer of the storage width. For unsigned, mask * the extracted bits. @@ -2886,10 +2929,10 @@ void c_bitfield_load(CGTarget* T, Operand dst, Operand addr, } cbuf_puts(&t->body, ")"); } - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } -void c_bitfield_store(CGTarget* T, Operand addr, Operand src, +void c_bitfield_store(CgTarget* T, Operand addr, Operand src, BitFieldAccess bf) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; @@ -2949,7 +2992,7 @@ void c_bitfield_store(CGTarget* T, Operand addr, Operand src, * so we pass the template through and emit the constraint+operand pairs in * order. */ -void c_asm_block(CGTarget* T, const char* tmpl, const AsmConstraint* outs, +void c_asm_block(CgTarget* T, const char* tmpl, const AsmConstraint* outs, u32 no, Operand* oo, const AsmConstraint* ins, u32 ni, const Operand* io, const Sym* clobs, u32 nc); @@ -2978,7 +3021,7 @@ static void c_emit_c_string_literal(CBuf* b, const char* s) { cbuf_putc(b, '"'); } -void c_asm_block(CGTarget* T, const char* tmpl, const AsmConstraint* outs, +void c_asm_block(CgTarget* T, const char* tmpl, const AsmConstraint* outs, u32 no, Operand* oo, const AsmConstraint* ins, u32 ni, const Operand* io, const Sym* clobs, u32 nc) { CTarget* t = (CTarget*)T; @@ -2997,12 +3040,12 @@ void c_asm_block(CGTarget* T, const char* tmpl, const AsmConstraint* outs, } c_emit_c_string_literal(&t->body, outs[i].str ? outs[i].str : ""); cbuf_puts(&t->body, "("); - /* Outputs must be an lvalue. OPK_REG → the C reg is a plain local; this + /* Outputs must be an lvalue. OPK_LOCAL is a plain C local; this * works directly. OPK_LOCAL / OPK_INDIRECT also produce lvalues. */ - if (oo[i].kind == OPK_REG) { - c_ensure_reg(t, oo[i].v.reg, oo[i].type, (RegClass)oo[i].cls); + if (oo[i].kind == OPK_LOCAL) { + c_ensure_local(t, oo[i].v.local, oo[i].type); char rb[24]; - c_reg_name(oo[i].v.reg, rb, sizeof rb); + c_local_name(oo[i].v.local, rb, sizeof rb); cbuf_puts(&t->body, rb); } else { c_emit_addr_deref(t, oo[i], oo[i].type); @@ -3046,20 +3089,20 @@ void c_asm_block(CGTarget* T, const char* tmpl, const AsmConstraint* outs, * * Used by CG for non-integer literal pushes (mainly floats — * `cfree_cg_push_float`). Bytes are the target's ABI encoding of the value; we - * copy them into the dst reg via a static const byte array and __builtin_memcpy + * copy them into the dst local via a static const byte array and __builtin_memcpy * so any host C compiler sees the same bit pattern. */ -void c_load_const(CGTarget* T, Operand dst, ConstBytes cb); +void c_load_const(CgTarget* T, Operand dst, ConstBytes cb); -void c_load_const(CGTarget* T, Operand dst, ConstBytes cb) { +void c_load_const(CgTarget* T, Operand dst, ConstBytes cb) { CTarget* t = (CTarget*)T; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; - if (dst.kind != OPK_REG) { - compiler_panic(t->c, loc, "C target: load_const dst must be REG"); + if (dst.kind != OPK_LOCAL) { + compiler_panic(t->c, loc, "C target: load_const dst must be LOCAL"); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); char buf[24]; - c_reg_name(dst.v.reg, buf, sizeof buf); + c_local_name(dst.v.local, buf, sizeof buf); u32 id = ++t->next_tmp; cbuf_puts(&t->body, " { static const uint8_t __k"); cbuf_put_u64(&t->body, (u64)id); @@ -3106,25 +3149,25 @@ static const char* c_memorder_token(MemOrder o) { return "__ATOMIC_SEQ_CST"; } -void c_atomic_load(CGTarget* T, Operand dst, Operand addr, MemAccess m, +void c_atomic_load(CgTarget* T, Operand dst, Operand addr, MemAccess m, MemOrder o); -void c_atomic_store(CGTarget* T, Operand addr, Operand src, MemAccess m, +void c_atomic_store(CgTarget* T, Operand addr, Operand src, MemAccess m, MemOrder o); -void c_atomic_rmw(CGTarget* T, AtomicOp op, Operand dst, Operand addr, +void c_atomic_rmw(CgTarget* T, AtomicOp op, Operand dst, Operand addr, Operand val, MemAccess m, MemOrder o); -void c_atomic_cas(CGTarget* T, Operand prior, Operand ok, Operand addr, +void c_atomic_cas(CgTarget* T, Operand prior, Operand ok, Operand addr, Operand expected, Operand desired, MemAccess m, MemOrder so, MemOrder fo); -void c_fence(CGTarget* T, MemOrder o); +void c_fence(CgTarget* T, MemOrder o); -void c_atomic_load(CGTarget* T, Operand dst, Operand addr, MemAccess m, +void c_atomic_load(CgTarget* T, Operand dst, Operand addr, MemAccess m, MemOrder o) { CTarget* t = (CTarget*)T; (void)m; c_assert_no_index(t, addr, "atomic_load"); - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* __atomic_load_n returns a value of the pointed-to type (dst.type). */ - c_emit_reg_assign_open(t, dst.v.reg, dst.type); + c_emit_local_assign_open(t, dst.v.local, dst.type); cbuf_puts(&t->body, "__atomic_load_n(("); c_emit_type(t, &t->body, dst.type); cbuf_puts(&t->body, "*)"); @@ -3132,10 +3175,10 @@ void c_atomic_load(CGTarget* T, Operand dst, Operand addr, MemAccess m, cbuf_puts(&t->body, ", "); cbuf_puts(&t->body, c_memorder_token(o)); cbuf_puts(&t->body, ")"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } -void c_atomic_store(CGTarget* T, Operand addr, Operand src, MemAccess m, +void c_atomic_store(CgTarget* T, Operand addr, Operand src, MemAccess m, MemOrder o) { CTarget* t = (CTarget*)T; (void)m; @@ -3171,7 +3214,7 @@ static const char* c_atomic_op_builtin(AtomicOp op) { return NULL; } -void c_atomic_rmw(CGTarget* T, AtomicOp op, Operand dst, Operand addr, +void c_atomic_rmw(CgTarget* T, AtomicOp op, Operand dst, Operand addr, Operand val, MemAccess m, MemOrder o) { CTarget* t = (CTarget*)T; (void)m; @@ -3181,9 +3224,9 @@ void c_atomic_rmw(CGTarget* T, AtomicOp op, Operand dst, Operand addr, if (!fn) { compiler_panic(t->c, loc, "C target: unknown atomic op %d", (int)op); } - c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_ensure_local(t, dst.v.local, dst.type); /* __atomic_fetch_* returns the prior value of the pointed-to type. */ - c_emit_reg_assign_open(t, dst.v.reg, val.type); + c_emit_local_assign_open(t, dst.v.local, val.type); cbuf_puts(&t->body, fn); cbuf_puts(&t->body, "(("); c_emit_type(t, &t->body, val.type); @@ -3194,23 +3237,20 @@ void c_atomic_rmw(CGTarget* T, AtomicOp op, Operand dst, Operand addr, cbuf_puts(&t->body, ", "); cbuf_puts(&t->body, c_memorder_token(o)); cbuf_puts(&t->body, ")"); - c_emit_reg_assign_close(t); + c_emit_local_assign_close(t); } -void c_atomic_cas(CGTarget* T, Operand prior, Operand ok, Operand addr, +void c_atomic_cas(CgTarget* T, Operand prior, Operand ok, Operand addr, Operand expected, Operand desired, MemAccess m, MemOrder so, MemOrder fo) { CTarget* t = (CTarget*)T; (void)m; c_assert_no_index(t, addr, "atomic_cas"); /* gcc's __atomic_compare_exchange_n needs a real lvalue holding the - * expected value (it's updated on failure). We can't use `&prior_reg` - * directly because CG reuses reg ids across types — the C declaration may - * have been locked at a different type. Materialize a fresh scratch local - * typed by `prior.type`, then copy out to the prior reg using the existing - * uintptr_t bridge. */ - c_ensure_reg(t, prior.v.reg, prior.type, (RegClass)prior.cls); - c_ensure_reg(t, ok.v.reg, ok.type, (RegClass)ok.cls); + * expected value because it is updated on failure. Materialize a scratch + * local with the compare type, then copy it out to the prior result. */ + c_ensure_local(t, prior.v.local, prior.type); + c_ensure_local(t, ok.v.local, ok.type); u32 id = ++t->next_tmp; cbuf_puts(&t->body, " { "); c_emit_type(t, &t->body, prior.type); @@ -3220,8 +3260,8 @@ void c_atomic_cas(CGTarget* T, Operand prior, Operand ok, Operand addr, c_emit_operand_as(t, expected, prior.type); cbuf_puts(&t->body, "; "); char ok_name[24], prior_name[24]; - c_reg_name(ok.v.reg, ok_name, sizeof ok_name); - c_reg_name(prior.v.reg, prior_name, sizeof prior_name); + c_local_name(ok.v.local, ok_name, sizeof ok_name); + c_local_name(prior.v.local, prior_name, sizeof prior_name); cbuf_puts(&t->body, ok_name); cbuf_puts(&t->body, " = ("); c_emit_type(t, &t->body, ok.type); @@ -3238,19 +3278,14 @@ void c_atomic_cas(CGTarget* T, Operand prior, Operand ok, Operand addr, cbuf_puts(&t->body, ", "); cbuf_puts(&t->body, c_memorder_token(fo)); cbuf_puts(&t->body, "); "); - /* prior reg = (DECL)(uintptr_t)__cas; */ + /* prior local = __cas; */ cbuf_puts(&t->body, prior_name); - cbuf_puts(&t->body, " = ("); - CfreeCgTypeId decl = t->reg_type[prior.v.reg]; - c_emit_type(t, &t->body, decl); - cbuf_puts(&t->body, ")"); - if (!c_type_is_float(t, decl)) cbuf_puts(&t->body, "(uintptr_t)"); - cbuf_puts(&t->body, "__cas"); + cbuf_puts(&t->body, " = __cas"); cbuf_put_u64(&t->body, (u64)id); cbuf_puts(&t->body, "; }\n"); } -void c_fence(CGTarget* T, MemOrder o) { +void c_fence(CgTarget* T, MemOrder o) { CTarget* t = (CTarget*)T; cbuf_puts(&t->body, " __atomic_thread_fence("); cbuf_puts(&t->body, c_memorder_token(o)); @@ -3289,7 +3324,7 @@ static void cbuf_put_line_filename(CBuf* b, CfreeSlice s) { cbuf_putc(b, '"'); } -void c_set_loc(CGTarget* T, SrcLoc l) { +void c_set_loc(CgTarget* T, SrcLoc l) { CTarget* t = (CTarget*)T; CfreeSlice file; @@ -3422,7 +3457,7 @@ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) { if (os->kind == SK_FUNC || os->kind == SK_IFUNC) return; if (os->kind == SK_SECTION || os->kind == SK_FILE) return; /* On Mach-O, obj_tls.c synthesizes `__tlv_bootstrap` as an SK_UNDEF - * extern for the TLV descriptor's first slot. The C target delegates all + * extern for the TLV descriptor's first field. The C target delegates all * TLS lowering to the host compiler via `_Thread_local`, so this * descriptor-time-only symbol has no place in the emitted source. */ if (os->kind == SK_UNDEF && t->c->target.obj == CFREE_OBJ_MACHO) { @@ -3735,7 +3770,7 @@ static void c_emit_data(CTarget* t) { /* === finalize / destroy === */ -void c_finalize(CGTarget* T) { +void c_finalize(CgTarget* T) { CTarget* t = (CTarget*)T; if (t->finalized) return; t->finalized = 1; @@ -3759,7 +3794,7 @@ void c_finalize(CGTarget* T) { if (t->body.len) c_writer_write(t, t->body.data, t->body.len); } -void c_destroy(CGTarget* T) { +void c_destroy(CgTarget* T) { CTarget* t = (CTarget*)T; Heap* h = t->c->ctx->heap; cbuf_fini(&t->forwards); @@ -3778,19 +3813,15 @@ void c_destroy(CGTarget* T) { h->free(h, t->local_static_entries, t->local_static_entries_cap * sizeof(*t->local_static_entries)); } - if (t->reg_declared) h->free(h, t->reg_declared, t->reg_cap); - if (t->reg_type) h->free(h, t->reg_type, t->reg_cap * sizeof(CfreeCgTypeId)); - if (t->slot_type) - h->free(h, t->slot_type, t->slot_cap * sizeof(CfreeCgTypeId)); + if (t->local_declared) h->free(h, t->local_declared, t->local_cap); + if (t->local_type) h->free(h, t->local_type, t->local_cap * sizeof(CfreeCgTypeId)); if (t->scopes) h->free(h, t->scopes, t->scopes_cap * sizeof(CScopeInfo)); - t->reg_declared = NULL; - t->reg_type = NULL; - t->slot_type = NULL; + t->local_declared = NULL; + t->local_type = NULL; t->scopes = NULL; t->local_static_syms = NULL; t->local_static_entries = NULL; - t->reg_cap = 0; - t->slot_cap = 0; + t->local_cap = 0; t->scopes_cap = 0; t->local_static_syms_cap = 0; t->local_static_entries_cap = 0; diff --git a/src/arch/c_target/internal.h b/src/arch/c_target/internal.h @@ -1,18 +1,19 @@ #ifndef CFREE_C_TARGET_INTERNAL_H #define CFREE_C_TARGET_INTERNAL_H -/* C-source emission CGTarget. See doc/CBACKEND.md. +/* C-source emission CgTarget. See doc/CBACKEND.md. * - * This target replaces the machine-code CGTarget when CodeOptions.emit_c_source - * is set. It writes target-locked C source to CodeOptions.c_source_writer - * instead of object bytes via MCEmitter. Operates with virtual_regs=1, so CG - * mints fresh Reg ids and never spills. */ + * This target is selected when CodeOptions.emit_c_source is set. It writes + * target-locked C source to CodeOptions.c_source_writer instead of object + * bytes. Operates with semantic temporary locals minted by CG. */ #include <cfree/core.h> -#include "arch/arch.h" +#include "cg/cgtarget.h" #include "core/core.h" +typedef CGLocal CLocal; + /* Heap-backed growable byte buffer. Used for the per-function declaration * and body buffers; CG needs decls at function top but doesn't surface them * before body emission, so we accumulate both and flush at func_end. */ @@ -40,7 +41,7 @@ typedef struct CLocalStaticLabelEntry { } CLocalStaticLabelEntry; typedef struct CTarget { - CGTarget base; + CgTarget base; Compiler* c; ObjBuilder* obj; @@ -104,19 +105,12 @@ typedef struct CTarget { u8 local_static_readonly; u8 pad_local_static; - /* Per-function regdecl tracking: for each Reg id seen, mark whether we - * have already emitted a declaration into `decls`. Sized by reg_cap. - * Grown lazily as new reg ids appear. */ - u8* reg_declared; - CfreeCgTypeId* reg_type; /* type each reg was first seen with */ - u32 reg_cap; - - /* Per-function frame-slot table. The C target invents its own slot ids; - * each slot becomes a `T slot_N;` declaration. slot_type[i] is the CG type - * the slot was declared with. */ - CfreeCgTypeId* slot_type; - u32 slot_cap; - u32 nslots; /* count of slots in current function */ + /* Per-function local-decl tracking: for each local id seen, mark whether we + * have already emitted a declaration into `decls`. Sized by local_cap. + * Grown lazily as new local ids appear. */ + u8* local_declared; + CfreeCgTypeId* local_type; /* declared type for each semantic local */ + u32 local_cap; /* Splice bookmark: byte offset into body where the current function's body * region starts (right after the open brace). func_end uses this to insert @@ -134,8 +128,9 @@ typedef struct CTarget { /* Label minting: ids 1..next_label. 0 is reserved as LABEL_NONE. */ u32 next_label; + u32 next_local; - /* Stack of active scopes. CGScope handles are (slot_index + 1). */ + /* Stack of active scopes. CGScope handles are (scope_index + 1). */ struct CScopeInfo* scopes; u32 scopes_cap; u32 nscopes; @@ -164,18 +159,18 @@ typedef struct CScopeInfo { Label continue_label; } CScopeInfo; -CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w); +CgTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w); /* Helpers shared across emit.c. */ void c_emit_prologue(CTarget* t); -/* Ensure reg `r` (typed `type`, class `cls`) has been declared. */ -void c_ensure_reg(CTarget* t, Reg r, CfreeCgTypeId type, RegClass cls); -/* Get a stable C identifier for reg r. Writes into caller-supplied buf. */ -void c_reg_name(Reg r, char* out, size_t cap); +/* Ensure local `r` (typed `type`) has been declared. */ +void c_ensure_local(CTarget* t, CLocal r, CfreeCgTypeId type); +/* Get a stable C identifier for local r. Writes into caller-supplied buf. */ +void c_local_name(CLocal r, char* out, size_t cap); /* Write the C type for a CG int/float/ptr type to `b`. */ void c_emit_type(CTarget* t, CBuf* b, CfreeCgTypeId type); /* Write operand expression to body (e.g. "v3", "(int32_t)42"). Supports - * OPK_REG / OPK_IMM / OPK_LOCAL / OPK_GLOBAL. INDIRECT is only valid in + * OPK_LOCAL / OPK_IMM / OPK_GLOBAL. INDIRECT is only valid in * lvalue positions and is emitted via load/store/addr_of paths. */ void c_emit_operand(CTarget* t, Operand op); /* Like c_emit_operand but wraps in an explicit signed/unsigned cast of the diff --git a/src/arch/c_target/target.c b/src/arch/c_target/target.c @@ -1,8 +1,8 @@ -/* C-source CGTarget construction and vtable wiring. +/* C-source CgTarget construction and vtable wiring. * * See doc/CBACKEND.md. The C target writes portable, target-locked C source * text to the CfreeWriter passed via CodeOptions.c_source_writer. CG operates - * with virtual_regs=1, so we never run register allocation or spilling. */ + * with semantic temporary locals. */ #include <string.h> @@ -11,184 +11,70 @@ #include "core/core.h" #include "core/heap.h" -/* Forward declarations for all CGTarget methods. Implementations either land +/* Forward declarations for all CgTarget methods. Implementations either land * in emit.c (working) or are stubs that compiler_panic. */ -void c_func_begin(CGTarget*, const CGFuncDesc*); -void c_func_end(CGTarget*); -void c_alias(CGTarget*, ObjSymId, ObjSymId, CfreeCgTypeId); -void c_ret(CGTarget*, const CGABIValue*); -void c_load_imm(CGTarget*, Operand, i64); -void c_copy(CGTarget*, Operand, Operand); -void c_binop(CGTarget*, BinOp, Operand, Operand, Operand); -void c_unop(CGTarget*, UnOp, Operand, Operand); -void c_cmp(CGTarget*, CmpOp, Operand, Operand, Operand); -void c_convert(CGTarget*, ConvKind, Operand, Operand); -void c_call(CGTarget*, const CGCallDesc*); -const char* c_tail_call_unrealizable_reason(CGTarget*, const CGCallDesc*); -void c_load(CGTarget*, Operand, Operand, MemAccess); -void c_store(CGTarget*, Operand, Operand, MemAccess); -void c_addr_of(CGTarget*, Operand, Operand); -CGLocalStorage c_param(CGTarget*, const CGParamDesc*); -CGLocalStorage c_local(CGTarget*, const CGLocalDesc*); -void c_local_addr(CGTarget*, Operand, const CGLocalDesc*, CGLocalStorage); -FrameSlot c_frame_slot(CGTarget*, const FrameSlotDesc*); -Label c_label_new(CGTarget*); -void c_label_place(CGTarget*, Label); -void c_jump(CGTarget*, Label); -void c_cmp_branch(CGTarget*, CmpOp, Operand, Operand, Label); -void c_switch_(CGTarget*, const CGSwitchDesc*); -void c_indirect_branch(CGTarget*, Operand, const Label*, u32); -void c_load_label_addr(CGTarget*, Operand, Label); -int c_local_static_data_begin(CGTarget*, const CGLocalStaticDataDesc*); -void c_local_static_data_write(CGTarget*, const u8*, u64); -void c_local_static_data_label_addr(CGTarget*, Label, i64, u32, u32); -void c_local_static_data_end(CGTarget*); -CGScope c_scope_begin(CGTarget*, const CGScopeDesc*); -void c_scope_else(CGTarget*, CGScope); -void c_scope_end(CGTarget*, CGScope); -void c_break_to(CGTarget*, CGScope); -void c_continue_to(CGTarget*, CGScope); -void c_set_loc(CGTarget*, SrcLoc); -void c_finalize(CGTarget*); -void c_destroy(CGTarget*); -void c_intrinsic(CGTarget*, IntrinKind, Operand*, u32, const Operand*, u32); -void c_alloca(CGTarget*, Operand, Operand, u32); -void c_va_start(CGTarget*, Operand); -void c_va_arg(CGTarget*, Operand, Operand, CfreeCgTypeId); -void c_va_end(CGTarget*, Operand); -void c_va_copy(CGTarget*, Operand, Operand); -void c_copy_bytes(CGTarget*, Operand, Operand, AggregateAccess); -void c_set_bytes(CGTarget*, Operand, Operand, AggregateAccess); -void c_load_const(CGTarget*, Operand, ConstBytes); -void c_asm_block(CGTarget*, const char*, const AsmConstraint*, u32, Operand*, +void c_func_begin(CgTarget*, const CGFuncDesc*); +void c_func_end(CgTarget*); +void c_alias(CgTarget*, ObjSymId, ObjSymId, CfreeCgTypeId); +void c_ret(CgTarget*, const CGLocal*, u32); +void c_load_imm(CgTarget*, Operand, i64); +void c_copy(CgTarget*, Operand, Operand); +void c_binop(CgTarget*, BinOp, Operand, Operand, Operand); +void c_unop(CgTarget*, UnOp, Operand, Operand); +void c_cmp(CgTarget*, CmpOp, Operand, Operand, Operand); +void c_convert(CgTarget*, ConvKind, Operand, Operand); +void c_call(CgTarget*, const CGCallDesc*); +const char* c_tail_call_unrealizable_reason(CgTarget*, const CGCallDesc*); +void c_load(CgTarget*, Operand, Operand, MemAccess); +void c_store(CgTarget*, Operand, Operand, MemAccess); +void c_addr_of(CgTarget*, Operand, Operand); +CGLocal c_param(CgTarget*, const CGParamDesc*); +CGLocal c_local(CgTarget*, const CGLocalDesc*); +void c_local_addr(CgTarget*, Operand, const CGLocalDesc*, CGLocal); +Label c_label_new(CgTarget*); +void c_label_place(CgTarget*, Label); +void c_jump(CgTarget*, Label); +void c_cmp_branch(CgTarget*, CmpOp, Operand, Operand, Label); +void c_switch_(CgTarget*, const CGSwitchDesc*); +void c_indirect_branch(CgTarget*, Operand, const Label*, u32); +void c_load_label_addr(CgTarget*, Operand, Label); +int c_local_static_data_begin(CgTarget*, const CGLocalStaticDataDesc*); +void c_local_static_data_write(CgTarget*, const u8*, u64); +void c_local_static_data_label_addr(CgTarget*, Label, i64, u32, u32); +void c_local_static_data_end(CgTarget*); +CGScope c_scope_begin(CgTarget*, const CGScopeDesc*); +void c_scope_else(CgTarget*, CGScope); +void c_scope_end(CgTarget*, CGScope); +void c_break_to(CgTarget*, CGScope); +void c_continue_to(CgTarget*, CGScope); +void c_set_loc(CgTarget*, SrcLoc); +void c_finalize(CgTarget*); +void c_destroy(CgTarget*); +void c_intrinsic(CgTarget*, IntrinKind, Operand*, u32, const Operand*, u32); +void c_alloca(CgTarget*, Operand, Operand, u32); +void c_va_start(CgTarget*, Operand); +void c_va_arg(CgTarget*, Operand, Operand, CfreeCgTypeId); +void c_va_end(CgTarget*, Operand); +void c_va_copy(CgTarget*, Operand, Operand); +void c_copy_bytes(CgTarget*, Operand, Operand, AggregateAccess); +void c_set_bytes(CgTarget*, Operand, Operand, AggregateAccess); +void c_load_const(CgTarget*, Operand, ConstBytes); +void c_asm_block(CgTarget*, const char*, const AsmConstraint*, u32, Operand*, const AsmConstraint*, u32, const Operand*, const Sym*, u32); -void c_bitfield_load(CGTarget*, Operand, Operand, BitFieldAccess); -void c_bitfield_store(CGTarget*, Operand, Operand, BitFieldAccess); -void c_tls_addr_of(CGTarget*, Operand, ObjSymId, i64); -void c_atomic_load(CGTarget*, Operand, Operand, MemAccess, MemOrder); -void c_atomic_store(CGTarget*, Operand, Operand, MemAccess, MemOrder); -void c_atomic_rmw(CGTarget*, AtomicOp, Operand, Operand, Operand, MemAccess, +void c_bitfield_load(CgTarget*, Operand, Operand, BitFieldAccess); +void c_bitfield_store(CgTarget*, Operand, Operand, BitFieldAccess); +void c_tls_addr_of(CgTarget*, Operand, ObjSymId, i64); +void c_atomic_load(CgTarget*, Operand, Operand, MemAccess, MemOrder); +void c_atomic_store(CgTarget*, Operand, Operand, MemAccess, MemOrder); +void c_atomic_rmw(CgTarget*, AtomicOp, Operand, Operand, Operand, MemAccess, MemOrder); -void c_atomic_cas(CGTarget*, Operand, Operand, Operand, Operand, Operand, +void c_atomic_cas(CgTarget*, Operand, Operand, Operand, Operand, Operand, MemAccess, MemOrder, MemOrder); -void c_fence(CGTarget*, MemOrder); +void c_fence(CgTarget*, MemOrder); -/* Unimplemented stubs panic with the method name in the message — the toy - * harness recognizes "C target: method <name> not implemented" as a graceful - * SKIP rather than a hard FAIL. */ -#define C_UNIMPL(name) \ - compiler_panic( \ - ((CTarget*)t)->c, \ - ((CTarget*)t)->cur_fn ? ((CTarget*)t)->cur_fn->loc : (SrcLoc){0, 0, 0}, \ - "C target: method " name " not implemented") +static void cgt_cleanup(void* arg) { cgtarget_free((CgTarget*)arg); } -static void c_unimpl_spill_reg(CGTarget* t, Operand a, FrameSlot s, - MemAccess m) { - (void)a; - (void)s; - (void)m; - C_UNIMPL("spill_reg"); -} - -static void c_unimpl_reload_reg(CGTarget* t, Operand a, FrameSlot s, - MemAccess m) { - (void)a; - (void)s; - (void)m; - C_UNIMPL("reload_reg"); -} - -/* Register-pool descriptors: virtual_regs=1 means CG skips these, but the - * non-null contract requires a callable. Return empty pools. */ -static void c_no_regs(CGTarget* t, RegClass cls, const Reg** out, u32* n) { - (void)t; - (void)cls; - *out = NULL; - *n = 0; -} - -static void c_no_phys_regs(CGTarget* t, RegClass cls, const CGPhysRegInfo** out, - u32* n) { - (void)t; - (void)cls; - *out = NULL; - *n = 0; -} - -static int c_is_caller_saved(CGTarget* t, RegClass cls, Reg r) { - (void)t; - (void)cls; - (void)r; - return 0; -} - -static u32 c_zero_mask(CGTarget* t, const CGCallDesc* d, RegClass cls) { - (void)t; - (void)d; - (void)cls; - return 0; -} -static u32 c_zero_ret_mask(CGTarget* t, const ABIFuncInfo* f, RegClass cls) { - (void)t; - (void)f; - (void)cls; - return 0; -} -static u32 c_zero_cs_mask(CGTarget* t, RegClass cls) { - (void)t; - (void)cls; - return 0; -} - -static void c_noop_plan_regs(CGTarget* t, RegClass cls, const Reg* r, u32 n) { - (void)t; - (void)cls; - (void)r; - (void)n; -} -static void c_noop_reserve_regs(CGTarget* t, RegClass cls, const Reg* r, - u32 n) { - (void)t; - (void)cls; - (void)r; - (void)n; -} -static u32 c_call_stack_size_zero(CGTarget* t, const CGCallDesc* d) { - (void)t; - (void)d; - return 0; -} - -static void c_unimpl_plan_call(CGTarget* t, const CGCallDesc* d, - CGCallPlan* p) { - (void)d; - (void)p; - C_UNIMPL("plan_call"); -} -static void c_unimpl_load_call_arg(CGTarget* t, Operand d, - const CGCallPlanMove* m) { - (void)d; - (void)m; - C_UNIMPL("load_call_arg"); -} -static void c_unimpl_store_call_arg(CGTarget* t, const CGCallPlanMove* m) { - (void)m; - C_UNIMPL("store_call_arg"); -} -static void c_unimpl_store_call_ret(CGTarget* t, const CGCallPlanRet* r, - Operand s) { - (void)r; - (void)s; - C_UNIMPL("store_call_ret"); -} -static void c_unimpl_emit_call_plan(CGTarget* t, const CGCallPlan* p) { - (void)p; - C_UNIMPL("emit_call_plan"); -} - -static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } - -CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { +CgTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { CTarget* x = arena_new(c->tu, CTarget); memset(x, 0, sizeof *x); @@ -201,37 +87,19 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { cbuf_init(&x->decls, c->ctx->heap); cbuf_init(&x->body, c->ctx->heap); - CGTarget* t = &x->base; + CgTarget* t = &x->base; t->c = c; t->obj = o; - t->mc = NULL; - t->virtual_regs = 1; /* ---- function lifecycle ---- */ t->func_begin = c_func_begin; - t->func_begin_known_frame = NULL; t->func_end = c_func_end; t->alias = c_alias; - /* ---- frame slots and locals ---- */ - t->frame_slot = c_frame_slot; + /* ---- locals ---- */ t->local = c_local; t->local_addr = c_local_addr; t->param = c_param; - t->spill_reg = c_unimpl_spill_reg; - t->reload_reg = c_unimpl_reload_reg; - - /* ---- regalloc coordination (virtual_regs => mostly inert) ---- */ - t->get_allocable_regs = c_no_regs; - t->get_phys_regs = c_no_phys_regs; - t->get_scratch_regs = c_no_regs; - t->is_caller_saved = c_is_caller_saved; - t->call_clobber_mask = c_zero_mask; - t->return_reg_mask = c_zero_ret_mask; - t->callee_save_mask = c_zero_cs_mask; - t->plan_hard_regs = c_noop_plan_regs; - t->reserve_hard_regs = c_noop_reserve_regs; - t->call_stack_size = c_call_stack_size_zero; /* ---- labels and control flow ---- */ t->label_new = c_label_new; @@ -273,11 +141,6 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { /* ---- calls / return ---- */ t->call = c_call; t->tail_call_unrealizable_reason = c_tail_call_unrealizable_reason; - t->plan_call = c_unimpl_plan_call; - t->load_call_arg = c_unimpl_load_call_arg; - t->store_call_arg = c_unimpl_store_call_arg; - t->store_call_ret = c_unimpl_store_call_ret; - t->emit_call_plan = c_unimpl_emit_call_plan; t->ret = c_ret; /* ---- alloca / varargs ---- */ @@ -297,7 +160,6 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { /* ---- intrinsics / asm ---- */ t->intrinsic = c_intrinsic; t->asm_block = c_asm_block; - t->resolve_reg_name = NULL; t->set_loc = c_set_loc; t->finalize = c_finalize; @@ -307,9 +169,9 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { return t; } -static CGTarget* c_target_backend_make(Compiler* c, ObjBuilder* o, +static CgTarget* c_target_backend_make(Compiler* c, ObjBuilder* o, const CfreeCodeOptions* opts) { - /* c_target ignores opt_level, debug_info, and MCEmitter entirely — see + /* c_target ignores opt_level and debug_info entirely — see * doc/CBACKEND.md §"Sequencing with opt". It only needs the writer. */ if (!opts || !opts->c_source_writer) { compiler_panic(c, (SrcLoc){0, 0, 0}, diff --git a/src/arch/cgtarget.c b/src/arch/cgtarget.c @@ -1,4 +1,4 @@ -/* Public CGTarget constructor — dispatches through the registered arch impl. +/* Public CgTarget constructor — dispatches through the registered arch impl. * * The lifecycle helpers (cgtarget_finalize, cgtarget_free) are arch-agnostic * shims over the vtable. */ @@ -10,10 +10,10 @@ #include "cg/type.h" #include "debug/debug.h" -CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { +CgTarget* cgtarget_new(Compiler* c, ObjBuilder* o) { const ArchImpl* arch = arch_for_compiler(c); if (arch && arch->cgtarget_new) { - return arch->cgtarget_new(c, o, m); + return arch->cgtarget_new(c, o, NULL); } { SrcLoc loc = {0, 0, 0}; @@ -22,11 +22,11 @@ CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { } } -void cgtarget_finalize(CGTarget* t) { +void cgtarget_finalize(CgTarget* t) { if (t && t->finalize) t->finalize(t); } -void cgtarget_free(CGTarget* t) { +void cgtarget_free(CgTarget* t) { if (!t) return; /* Arena-backed; nothing to free. */ } @@ -63,15 +63,15 @@ CfreeStatus cg_mc_debug_new(Compiler* c, ObjBuilder* o, /* Default fold for backends without a native indexed addressing mode. * - * If `addr` carries an index register (addr.v.ind.index != REG_NONE), + * If `addr` carries an index local (addr.v.ind.index != CG_LOCAL_NONE), * materialize `base + (index << log2_scale)` into `scratch` and return a plain - * OPK_INDIRECT(scratch, ofs) with `index == REG_NONE`. Otherwise return `addr` + * OPK_INDIRECT(scratch, ofs) with `index == CG_LOCAL_NONE`. Otherwise return `addr` * unchanged. * * The caller supplies `scratch` from its own scratch pool and is responsible * for freeing it after the memop completes. The returned operand keeps the * caller's `type` so the backend's load/store sees the same access type. */ -Operand arch_lower_indexed(CGTarget* t, Operand addr, Reg scratch) { +Operand arch_lower_indexed(CgTarget* t, Operand addr, Reg scratch) { Operand scratch_op; Operand shifted; CfreeCgTypeId ty; @@ -81,42 +81,37 @@ Operand arch_lower_indexed(CGTarget* t, Operand addr, Reg scratch) { ty = builtin_id(CFREE_CG_BUILTIN_I64); log2 = addr.v.ind.log2_scale; - scratch_op.kind = OPK_REG; - scratch_op.cls = RC_INT; - scratch_op.pad = 0; + memset(&scratch_op, 0, sizeof scratch_op); + scratch_op.kind = OPK_LOCAL; scratch_op.type = ty; - scratch_op.v.reg = scratch; + scratch_op.v.local = scratch; - shifted.kind = OPK_REG; - shifted.cls = RC_INT; - shifted.pad = 0; + memset(&shifted, 0, sizeof shifted); + shifted.kind = OPK_LOCAL; shifted.type = ty; - shifted.v.reg = addr.v.ind.index; + shifted.v.local = addr.v.ind.index; if (log2 == 0) { /* index * 1: just add the index directly to the base. */ Operand base_op; - base_op.kind = OPK_REG; - base_op.cls = RC_INT; - base_op.pad = 0; + memset(&base_op, 0, sizeof base_op); + base_op.kind = OPK_LOCAL; base_op.type = ty; - base_op.v.reg = addr.v.ind.base; + base_op.v.local = addr.v.ind.base; t->binop(t, BO_IADD, scratch_op, base_op, shifted); } else { /* scratch = index << log2 */ Operand shamt; Operand base_op; + memset(&shamt, 0, sizeof shamt); shamt.kind = OPK_IMM; - shamt.cls = RC_INT; - shamt.pad = 0; shamt.type = ty; shamt.v.imm = (i64)log2; t->binop(t, BO_SHL, scratch_op, shifted, shamt); - base_op.kind = OPK_REG; - base_op.cls = RC_INT; - base_op.pad = 0; + memset(&base_op, 0, sizeof base_op); + base_op.kind = OPK_LOCAL; base_op.type = ty; - base_op.v.reg = addr.v.ind.base; + base_op.v.local = addr.v.ind.base; /* scratch = base + scratch */ t->binop(t, BO_IADD, scratch_op, base_op, scratch_op); } @@ -125,7 +120,6 @@ Operand arch_lower_indexed(CGTarget* t, Operand addr, Reg scratch) { Operand out; memset(&out, 0, sizeof out); out.kind = OPK_INDIRECT; - out.cls = RC_INT; out.type = ty; out.v.ind.base = scratch; out.v.ind.index = REG_NONE; diff --git a/src/arch/check_target.c b/src/arch/check_target.c @@ -4,164 +4,71 @@ #include "core/arena.h" typedef struct CheckTarget { - CGTarget base; + CgTarget base; Compiler* c; ObjBuilder* obj; Label next_label; - FrameSlot next_slot; + CGLocal next_local; CGScope next_scope; } CheckTarget; -static CheckTarget* check_of(CGTarget* t) { return (CheckTarget*)t; } +static CheckTarget* check_of(CgTarget* t) { return (CheckTarget*)t; } -static void check_func_begin(CGTarget* t, const CGFuncDesc* d) { +static void check_func_begin(CgTarget* t, const CGFuncDesc* d) { (void)t; (void)d; } -static void check_func_end(CGTarget* t) { (void)t; } +static void check_func_end(CgTarget* t) { (void)t; } -static void check_alias(CGTarget* t, ObjSymId a, ObjSymId b, CfreeCgTypeId ty) { +static void check_alias(CgTarget* t, ObjSymId a, ObjSymId b, CfreeCgTypeId ty) { (void)t; (void)a; (void)b; (void)ty; } -static FrameSlot check_frame_slot(CGTarget* t, const FrameSlotDesc* d) { +static CGLocal check_local_id(CgTarget* t) { CheckTarget* x = check_of(t); - (void)d; - if (++x->next_slot == FRAME_SLOT_NONE) ++x->next_slot; - return x->next_slot; -} - -static CGLocalStorage check_local(CGTarget* t, const CGLocalDesc* d) { - CGLocalStorage s; - FrameSlotDesc fsd; - memset(&s, 0, sizeof s); - memset(&fsd, 0, sizeof fsd); - fsd.type = d ? d->type : CFREE_CG_TYPE_NONE; - fsd.name = d ? d->name : 0; - fsd.loc = d ? d->loc : (SrcLoc){0, 0, 0}; - fsd.size = d ? d->size : 0; - fsd.align = d ? d->align : 0; - fsd.kind = FS_LOCAL; - s.kind = CG_LOCAL_STORAGE_FRAME; - s.v.frame_slot = check_frame_slot(t, &fsd); - return s; -} - -static CGLocalStorage check_param(CGTarget* t, const CGParamDesc* d) { - CGLocalStorage s; - FrameSlotDesc fsd; - memset(&s, 0, sizeof s); - memset(&fsd, 0, sizeof fsd); - fsd.type = d ? d->type : CFREE_CG_TYPE_NONE; - fsd.name = d ? d->name : 0; - fsd.loc = d ? d->loc : (SrcLoc){0, 0, 0}; - fsd.size = d ? d->size : 0; - fsd.align = d ? d->align : 0; - fsd.kind = FS_PARAM; - s.kind = CG_LOCAL_STORAGE_FRAME; - s.v.frame_slot = check_frame_slot(t, &fsd); - return s; -} - -static void check_local_addr(CGTarget* t, Operand dst, const CGLocalDesc* d, - CGLocalStorage s) { - (void)t; - (void)dst; - (void)d; - (void)s; -} - -static void check_spill_reload(CGTarget* t, Operand r, FrameSlot s, - MemAccess m) { - (void)t; - (void)r; - (void)s; - (void)m; + if (++x->next_local == CG_LOCAL_NONE) ++x->next_local; + return x->next_local; } -static void check_no_regs(CGTarget* t, RegClass cls, const Reg** out, u32* n) { - (void)t; - (void)cls; - if (out) *out = NULL; - if (n) *n = 0; -} - -static void check_no_phys_regs(CGTarget* t, RegClass cls, - const CGPhysRegInfo** out, u32* n) { - (void)t; - (void)cls; - if (out) *out = NULL; - if (n) *n = 0; -} - -static int check_is_caller_saved(CGTarget* t, RegClass cls, Reg r) { - (void)t; - (void)cls; - (void)r; - return 0; -} - -static u32 check_zero_call_mask(CGTarget* t, const CGCallDesc* d, - RegClass cls) { - (void)t; +static CGLocal check_local(CgTarget* t, const CGLocalDesc* d) { (void)d; - (void)cls; - return 0; + return check_local_id(t); } -static u32 check_zero_ret_mask(CGTarget* t, const ABIFuncInfo* f, - RegClass cls) { - (void)t; - (void)f; - (void)cls; - return 0; -} - -static u32 check_zero_cs_mask(CGTarget* t, RegClass cls) { - (void)t; - (void)cls; - return 0; -} - -static void check_plan_regs(CGTarget* t, RegClass cls, const Reg* regs, u32 n) { - (void)t; - (void)cls; - (void)regs; - (void)n; +static CGLocal check_param(CgTarget* t, const CGParamDesc* d) { + (void)d; + return check_local_id(t); } -static u32 check_call_stack_size(CGTarget* t, const CGCallDesc* d) { +static void check_local_addr(CgTarget* t, Operand dst, const CGLocalDesc* d, + CGLocal s) { (void)t; + (void)dst; (void)d; - return 0; + (void)s; } -static Label check_label_new(CGTarget* t) { +static Label check_label_new(CgTarget* t) { CheckTarget* x = check_of(t); if (++x->next_label == LABEL_NONE) ++x->next_label; return x->next_label; } -static void check_label_place(CGTarget* t, Label l) { +static void check_label_place(CgTarget* t, Label l) { (void)t; (void)l; } -static MCLabel check_cg_label_to_mc_label(CGTarget* t, Label l) { - (void)t; - return (MCLabel)l; -} - -static void check_jump(CGTarget* t, Label l) { +static void check_jump(CgTarget* t, Label l) { (void)t; (void)l; } -static void check_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, +static void check_cmp_branch(CgTarget* t, CmpOp op, Operand a, Operand b, Label l) { (void)t; (void)op; @@ -170,12 +77,12 @@ static void check_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, (void)l; } -static void check_switch(CGTarget* t, const CGSwitchDesc* d) { +static void check_switch(CgTarget* t, const CGSwitchDesc* d) { (void)t; (void)d; } -static void check_indirect_branch(CGTarget* t, Operand a, const Label* targets, +static void check_indirect_branch(CgTarget* t, Operand a, const Label* targets, u32 ntargets) { (void)t; (void)a; @@ -183,27 +90,27 @@ static void check_indirect_branch(CGTarget* t, Operand a, const Label* targets, (void)ntargets; } -static void check_load_label_addr(CGTarget* t, Operand dst, Label l) { +static void check_load_label_addr(CgTarget* t, Operand dst, Label l) { (void)t; (void)dst; (void)l; } -static int check_local_static_data_begin(CGTarget* t, +static int check_local_static_data_begin(CgTarget* t, const CGLocalStaticDataDesc* d) { (void)t; (void)d; return 1; } -static void check_local_static_data_write(CGTarget* t, const u8* data, +static void check_local_static_data_write(CgTarget* t, const u8* data, u64 len) { (void)t; (void)data; (void)len; } -static void check_local_static_data_label_addr(CGTarget* t, Label target, +static void check_local_static_data_label_addr(CgTarget* t, Label target, i64 addend, u32 width, u32 address_space) { (void)t; @@ -213,62 +120,62 @@ static void check_local_static_data_label_addr(CGTarget* t, Label target, (void)address_space; } -static void check_local_static_data_end(CGTarget* t) { (void)t; } +static void check_local_static_data_end(CgTarget* t) { (void)t; } -static CGScope check_scope_begin(CGTarget* t, const CGScopeDesc* d) { +static CGScope check_scope_begin(CgTarget* t, const CGScopeDesc* d) { CheckTarget* x = check_of(t); (void)d; if (++x->next_scope == CG_SCOPE_NONE) ++x->next_scope; return x->next_scope; } -static void check_scope_else(CGTarget* t, CGScope s) { +static void check_scope_else(CgTarget* t, CGScope s) { (void)t; (void)s; } -static void check_scope_end(CGTarget* t, CGScope s) { +static void check_scope_end(CgTarget* t, CGScope s) { (void)t; (void)s; } -static void check_scope_xfer(CGTarget* t, CGScope s) { +static void check_scope_xfer(CgTarget* t, CGScope s) { (void)t; (void)s; } -static void check_load_imm(CGTarget* t, Operand dst, i64 imm) { +static void check_load_imm(CgTarget* t, Operand dst, i64 imm) { (void)t; (void)dst; (void)imm; } -static void check_load_const(CGTarget* t, Operand dst, ConstBytes c) { +static void check_load_const(CgTarget* t, Operand dst, ConstBytes c) { (void)t; (void)dst; (void)c; } -static void check_copy(CGTarget* t, Operand dst, Operand src) { +static void check_copy(CgTarget* t, Operand dst, Operand src) { (void)t; (void)dst; (void)src; } -static void check_load_store(CGTarget* t, Operand a, Operand b, MemAccess m) { +static void check_load_store(CgTarget* t, Operand a, Operand b, MemAccess m) { (void)t; (void)a; (void)b; (void)m; } -static void check_addr_of(CGTarget* t, Operand dst, Operand src) { +static void check_addr_of(CgTarget* t, Operand dst, Operand src) { (void)t; (void)dst; (void)src; } -static void check_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, +static void check_tls_addr_of(CgTarget* t, Operand dst, ObjSymId sym, i64 addend) { (void)t; (void)dst; @@ -276,7 +183,7 @@ static void check_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, (void)addend; } -static void check_aggregate(CGTarget* t, Operand dst, Operand src, +static void check_aggregate(CgTarget* t, Operand dst, Operand src, AggregateAccess a) { (void)t; (void)dst; @@ -284,7 +191,7 @@ static void check_aggregate(CGTarget* t, Operand dst, Operand src, (void)a; } -static void check_bitfield(CGTarget* t, Operand a, Operand b, +static void check_bitfield(CgTarget* t, Operand a, Operand b, BitFieldAccess bf) { (void)t; (void)a; @@ -292,7 +199,7 @@ static void check_bitfield(CGTarget* t, Operand a, Operand b, (void)bf; } -static void check_binop(CGTarget* t, BinOp op, Operand dst, Operand a, +static void check_binop(CgTarget* t, BinOp op, Operand dst, Operand a, Operand b) { (void)t; (void)op; @@ -301,14 +208,14 @@ static void check_binop(CGTarget* t, BinOp op, Operand dst, Operand a, (void)b; } -static void check_unop(CGTarget* t, UnOp op, Operand dst, Operand a) { +static void check_unop(CgTarget* t, UnOp op, Operand dst, Operand a) { (void)t; (void)op; (void)dst; (void)a; } -static void check_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, +static void check_cmp(CgTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { (void)t; (void)op; @@ -317,61 +224,32 @@ static void check_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, (void)b; } -static void check_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) { +static void check_convert(CgTarget* t, ConvKind k, Operand dst, Operand src) { (void)t; (void)k; (void)dst; (void)src; } -static void check_call(CGTarget* t, const CGCallDesc* d) { +static void check_call(CgTarget* t, const CGCallDesc* d) { (void)t; (void)d; } -static void check_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* p) { - (void)t; - (void)d; - if (p) memset(p, 0, sizeof *p); -} - -static void check_load_call_arg(CGTarget* t, Operand dst, - const CGCallPlanMove* m) { - (void)t; - (void)dst; - (void)m; -} - -static void check_store_call_arg(CGTarget* t, const CGCallPlanMove* m) { - (void)t; - (void)m; -} - -static void check_store_call_ret(CGTarget* t, const CGCallPlanRet* r, - Operand src) { - (void)t; - (void)r; - (void)src; -} - -static void check_emit_call_plan(CGTarget* t, const CGCallPlan* p) { - (void)t; - (void)p; -} - -static void check_ret(CGTarget* t, const CGABIValue* v) { +static void check_ret(CgTarget* t, const CGLocal* values, u32 nvalues) { (void)t; - (void)v; + (void)values; + (void)nvalues; } -static void check_alloca(CGTarget* t, Operand dst, Operand size, u32 align) { +static void check_alloca(CgTarget* t, Operand dst, Operand size, u32 align) { (void)t; (void)dst; (void)size; (void)align; } -static void check_va_arg(CGTarget* t, Operand dst, Operand ap, +static void check_va_arg(CgTarget* t, Operand dst, Operand ap, CfreeCgTypeId ty) { (void)t; (void)dst; @@ -379,18 +257,18 @@ static void check_va_arg(CGTarget* t, Operand dst, Operand ap, (void)ty; } -static void check_one_operand(CGTarget* t, Operand a) { +static void check_one_operand(CgTarget* t, Operand a) { (void)t; (void)a; } -static void check_two_operands(CGTarget* t, Operand a, Operand b) { +static void check_two_operands(CgTarget* t, Operand a, Operand b) { (void)t; (void)a; (void)b; } -static void check_intrinsic(CGTarget* t, IntrinKind k, Operand* dsts, u32 ndst, +static void check_intrinsic(CgTarget* t, IntrinKind k, Operand* dsts, u32 ndst, const Operand* args, u32 narg) { (void)t; (void)k; @@ -400,7 +278,7 @@ static void check_intrinsic(CGTarget* t, IntrinKind k, Operand* dsts, u32 ndst, (void)narg; } -static void check_asm_block(CGTarget* t, const char* tmpl, +static void check_asm_block(CgTarget* t, const char* tmpl, const AsmConstraint* outs, u32 nout, Operand* out_ops, const AsmConstraint* ins, u32 nin, const Operand* in_ops, const Sym* clobbers, @@ -417,7 +295,7 @@ static void check_asm_block(CGTarget* t, const char* tmpl, (void)nclob; } -static void check_atomic_load(CGTarget* t, Operand dst, Operand addr, +static void check_atomic_load(CgTarget* t, Operand dst, Operand addr, MemAccess m, MemOrder order) { (void)t; (void)dst; @@ -426,7 +304,7 @@ static void check_atomic_load(CGTarget* t, Operand dst, Operand addr, (void)order; } -static void check_atomic_store(CGTarget* t, Operand addr, Operand src, +static void check_atomic_store(CgTarget* t, Operand addr, Operand src, MemAccess m, MemOrder order) { (void)t; (void)addr; @@ -435,7 +313,7 @@ static void check_atomic_store(CGTarget* t, Operand addr, Operand src, (void)order; } -static void check_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, +static void check_atomic_rmw(CgTarget* t, AtomicOp op, Operand dst, Operand addr, Operand val, MemAccess m, MemOrder order) { (void)t; @@ -447,7 +325,7 @@ static void check_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, (void)order; } -static void check_atomic_cas(CGTarget* t, Operand prior, Operand ok, +static void check_atomic_cas(CgTarget* t, Operand prior, Operand ok, Operand addr, Operand expected, Operand desired, MemAccess m, MemOrder success, MemOrder failure) { (void)t; @@ -461,24 +339,24 @@ static void check_atomic_cas(CGTarget* t, Operand prior, Operand ok, (void)failure; } -static void check_fence(CGTarget* t, MemOrder order) { +static void check_fence(CgTarget* t, MemOrder order) { (void)t; (void)order; } -static void check_set_loc(CGTarget* t, SrcLoc loc) { +static void check_set_loc(CgTarget* t, SrcLoc loc) { (void)t; (void)loc; } -static void check_finalize(CGTarget* t) { (void)t; } +static void check_finalize(CgTarget* t) { (void)t; } -static void check_destroy(CGTarget* t) { (void)t; } +static void check_destroy(CgTarget* t) { (void)t; } -static CGTarget* check_backend_make(Compiler* c, ObjBuilder* o, +static CgTarget* check_backend_make(Compiler* c, ObjBuilder* o, const CfreeCodeOptions* opts) { CheckTarget* x; - CGTarget* t; + CgTarget* t; (void)opts; x = arena_new(c->tu, CheckTarget); if (!x) return NULL; @@ -488,30 +366,15 @@ static CGTarget* check_backend_make(Compiler* c, ObjBuilder* o, t = &x->base; t->c = c; t->obj = o; - t->virtual_regs = 1; t->func_begin = check_func_begin; t->func_end = check_func_end; t->alias = check_alias; - t->frame_slot = check_frame_slot; t->local = check_local; t->local_addr = check_local_addr; t->param = check_param; - t->spill_reg = check_spill_reload; - t->reload_reg = check_spill_reload; - t->get_allocable_regs = check_no_regs; - t->get_phys_regs = check_no_phys_regs; - t->get_scratch_regs = check_no_regs; - t->is_caller_saved = check_is_caller_saved; - t->call_clobber_mask = check_zero_call_mask; - t->return_reg_mask = check_zero_ret_mask; - t->callee_save_mask = check_zero_cs_mask; - t->plan_hard_regs = check_plan_regs; - t->reserve_hard_regs = check_plan_regs; - t->call_stack_size = check_call_stack_size; t->label_new = check_label_new; t->label_place = check_label_place; - t->cg_label_to_mc_label = check_cg_label_to_mc_label; t->jump = check_jump; t->cmp_branch = check_cmp_branch; t->switch_ = check_switch; @@ -542,11 +405,6 @@ static CGTarget* check_backend_make(Compiler* c, ObjBuilder* o, t->cmp = check_cmp; t->convert = check_convert; t->call = check_call; - t->plan_call = check_plan_call; - t->load_call_arg = check_load_call_arg; - t->store_call_arg = check_store_call_arg; - t->store_call_ret = check_store_call_ret; - t->emit_call_plan = check_emit_call_plan; t->ret = check_ret; t->alloca_ = check_alloca; t->va_start_ = check_one_operand; diff --git a/src/arch/regalloc.c b/src/arch/regalloc.c @@ -80,25 +80,25 @@ void cg_simple_regalloc_init_virtual(CGSimpleRegAlloc* a) { a->next_virtual = 1; } -void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, RegClass cls, Reg base, +void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, NativeAllocClass cls, Reg base, u32 nregs) { if ((u32)cls >= 3u) return; cg_simple_regpool_init_range(&a->pools[cls], base, nregs); } -void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls, +void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, NativeAllocClass cls, const Reg* regs, u32 nregs) { if ((u32)cls >= 3u) return; cg_simple_regpool_init_ordered(&a->pools[cls], regs, nregs); } -Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, RegClass cls) { +Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, NativeAllocClass cls) { if ((u32)cls >= 3u) return (Reg)REG_NONE; if (a->virtual_regs) return a->next_virtual++; return cg_simple_regpool_alloc(&a->pools[cls]); } -int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r) { +int cg_simple_regalloc_free(CGSimpleRegAlloc* a, NativeAllocClass cls, Reg r) { if ((u32)cls >= 3u) return -2; if (a->virtual_regs) { (void)r; @@ -107,7 +107,7 @@ int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r) { return cg_simple_regpool_free(&a->pools[cls], r); } -void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r) { +void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, NativeAllocClass cls, Reg r) { if ((u32)cls >= 3u) return; if (a->virtual_regs) { if (r != (Reg)REG_NONE && r >= a->next_virtual) a->next_virtual = r + 1u; @@ -116,7 +116,7 @@ void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r) { cg_simple_regpool_reserve(&a->pools[cls], r); } -u32 cg_simple_regalloc_used_regs(const CGSimpleRegAlloc* a, RegClass cls, +u32 cg_simple_regalloc_used_regs(const CGSimpleRegAlloc* a, NativeAllocClass cls, Reg* out, u32 cap) { if ((u32)cls >= 3u || a->virtual_regs) return 0; return cg_simple_regpool_used_regs(&a->pools[cls], out, cap); diff --git a/src/arch/regalloc.h b/src/arch/regalloc.h @@ -5,16 +5,22 @@ #define CG_SIMPLE_REGALLOC_MAX_REGS 32u +typedef enum NativeAllocClass { + NATIVE_REG_INT, + NATIVE_REG_FP, + NATIVE_REG_VEC, +} NativeAllocClass; + typedef struct CGSimpleRegPool { - u32 free; /* bit i set iff reg_at(i) is free */ - u32 used; /* bit i set iff reg_at(i) was allocated/reserved */ - const Reg* order; /* optional ordered hard-reg table */ - Reg base; /* used when order is NULL: reg_at(i) = base + i */ + u32 free; /* bit i set iff entry i is free */ + u32 used; /* bit i set iff entry i was allocated/reserved */ + const Reg* order; /* optional ordered physical-id table */ + Reg base; /* used when order is NULL: entry i = base + i */ u32 nregs; } CGSimpleRegPool; typedef struct CGSimpleRegAlloc { - CGSimpleRegPool pools[3]; /* indexed by RegClass */ + CGSimpleRegPool pools[3]; /* indexed by NativeAllocClass */ u32 virtual_regs; Reg next_virtual; } CGSimpleRegAlloc; @@ -30,14 +36,14 @@ u32 cg_simple_regpool_used_regs(const CGSimpleRegPool* p, Reg* out, u32 cap); void cg_simple_regalloc_init(CGSimpleRegAlloc* a); void cg_simple_regalloc_init_virtual(CGSimpleRegAlloc* a); -void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, RegClass cls, Reg base, +void cg_simple_regalloc_set_range(CGSimpleRegAlloc* a, NativeAllocClass cls, Reg base, u32 nregs); -void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, RegClass cls, +void cg_simple_regalloc_set_ordered(CGSimpleRegAlloc* a, NativeAllocClass cls, const Reg* regs, u32 nregs); -Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, RegClass cls); -int cg_simple_regalloc_free(CGSimpleRegAlloc* a, RegClass cls, Reg r); -void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, RegClass cls, Reg r); -u32 cg_simple_regalloc_used_regs(const CGSimpleRegAlloc* a, RegClass cls, +Reg cg_simple_regalloc_alloc(CGSimpleRegAlloc* a, NativeAllocClass cls); +int cg_simple_regalloc_free(CGSimpleRegAlloc* a, NativeAllocClass cls, Reg r); +void cg_simple_regalloc_reserve(CGSimpleRegAlloc* a, NativeAllocClass cls, Reg r); +u32 cg_simple_regalloc_used_regs(const CGSimpleRegAlloc* a, NativeAllocClass cls, Reg* out, u32 cap); int cg_simple_regalloc_is_virtual(const CGSimpleRegAlloc* a); diff --git a/src/arch/registry.c b/src/arch/registry.c @@ -7,7 +7,7 @@ * register file, etc.). * * Conceptually: - * - A CGBackend is "something that can build a CGTarget from a Compiler + + * - A CGBackend is "something that can build a CgTarget from a Compiler + * ObjBuilder + CfreeCodeOptions". Machine-code arches and c_target both * qualify. * - An ArchImpl is a CGBackend plus the machine-code metadata (its first diff --git a/src/cg/arith.c b/src/cg/arith.c @@ -28,10 +28,10 @@ static int api_try_fold_int_convert(CfreeCg* g, ConvKind ck, CfreeCgTypeId sty, void api_cg_binop(CfreeCg* g, BinOp iop, u32 flags) { ApiSValue b, a; - CGTarget* T; + CgTarget* T; CfreeCgTypeId ty; Operand ra, rb; - Reg rr; + CGLocal rr; Operand dst; ApiSValue folded_sv; i64 folded; @@ -41,22 +41,6 @@ void api_cg_binop(CfreeCg* g, BinOp iop, u32 flags) { a = api_pop(g); ty = a.type ? a.type : b.type; - if (api_is_i128_type(g->c, ty)) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); - CfreeCgTypeId ps[2]; - ApiSValue args[2]; - const char* name = api_i128_binop_helper(iop); - if (!name) - compiler_panic(g->c, g->cur_loc, "CfreeCg: i128 binop unsupported"); - args[0] = a; - args[1] = b; - ps[0] = i128; - ps[1] = (iop == BO_SHL || iop == BO_SHR_U || iop == BO_SHR_S) ? i32 : i128; - api_runtime_call_values(g, name, i128, ps, 2, args); - return; - } - if (!flags && api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) && api_try_fold_int_binop(g, iop, ty, a.op.v.imm, b.op.v.imm, &folded)) { api_release(g, &a); @@ -73,12 +57,12 @@ void api_cg_binop(CfreeCg* g, BinOp iop, u32 flags) { return; } - if (api_type_class(ty) == RC_FP) { - ra = api_force_reg(g, &a, ty); - rb = api_force_reg(g, &b, ty); + if (api_type_is_float(g->c, ty)) { + ra = api_force_local(g, &a, ty); + rb = api_force_local(g, &b, ty); } else { - ra = api_force_reg_unless_imm(g, &a, ty); - rb = api_force_reg_unless_imm(g, &b, ty); + ra = api_force_local_unless_imm(g, &a, ty); + rb = api_force_local_unless_imm(g, &b, ty); } if (api_can_delay_int_arith(g, ty, flags) && @@ -90,11 +74,11 @@ void api_cg_binop(CfreeCg* g, BinOp iop, u32 flags) { } if (api_can_delay_int_arith(g, ty, flags) && - (ra.kind == OPK_REG || rb.kind == OPK_REG) && - (ra.kind == OPK_REG || ra.kind == OPK_IMM) && - (rb.kind == OPK_REG || rb.kind == OPK_IMM)) { - int a_owned = api_sv_owns_operand_reg(&a, &ra); - int b_owned = api_sv_owns_operand_reg(&b, &rb); + (ra.kind == OPK_LOCAL || rb.kind == OPK_LOCAL) && + (ra.kind == OPK_LOCAL || ra.kind == OPK_IMM) && + (rb.kind == OPK_LOCAL || rb.kind == OPK_IMM)) { + int a_owned = api_sv_owns_operand_local(&a, &ra); + int b_owned = api_sv_owns_operand_local(&b, &rb); api_push(g, api_make_arith_binop(iop, ra, rb, ty, a_owned, b_owned)); if (a_owned) a.res = RES_INHERENT; if (b_owned) b.res = RES_INHERENT; @@ -103,8 +87,8 @@ void api_cg_binop(CfreeCg* g, BinOp iop, u32 flags) { return; } - rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - dst = api_op_reg(rr, ty); + rr = api_alloc_temp_local(g, ty); + dst = api_op_local(rr, ty); T->binop(T, iop, dst, ra, rb); api_release(g, &a); api_release(g, &b); @@ -113,10 +97,10 @@ void api_cg_binop(CfreeCg* g, BinOp iop, u32 flags) { void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags) { ApiSValue a; - CGTarget* T; + CgTarget* T; CfreeCgTypeId ty; Operand ra; - Reg rr; + CGLocal rr; Operand dst; ApiSValue folded_sv; i64 folded; @@ -125,35 +109,14 @@ void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags) { a = api_pop(g); ty = a.type ? a.type : a.op.type; - if (api_is_i128_type(g->c, ty)) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); - if (iop == UO_NEG || iop == UO_BNOT) { - const char* name = (iop == UO_NEG) ? "__negti2" : "__cfree_notti3"; - api_push(g, a); - api_f128_call_unary(g, name, i128, i128); - return; - } - if (iop == UO_NOT) { - CfreeCgTypeId ps[2] = {i128, i128}; - ApiSValue args[2]; - args[0] = a; - args[1] = api_make_sv(api_op_imm(0, i128), i128); - api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args); - cfree_cg_push_int(g, 0, i32); - api_cg_cmp(g, CMP_EQ); - return; - } - } - if (iop == UO_FNEG) { - if (api_type_class(ty) != RC_FP) { + if (!api_type_is_float(g->c, ty)) { compiler_panic(g->c, g->cur_loc, "CfreeCg: FP negation requires floating operand"); } - ra = api_force_reg(g, &a, ty); - rr = api_alloc_reg_or_spill(g, RC_FP, ty); - dst = api_op_reg(rr, ty); + ra = api_force_local(g, &a, ty); + rr = api_alloc_temp_local(g, ty); + dst = api_op_local(rr, ty); T->unop(T, iop, dst, ra); api_release(g, &a); api_push(g, api_make_sv(dst, ty)); @@ -174,16 +137,16 @@ void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags) { return; } - ra = api_force_reg_unless_imm(g, &a, ty); - if (api_can_delay_int_arith(g, ty, flags) && ra.kind == OPK_REG) { - int a_owned = api_sv_owns_operand_reg(&a, &ra); + ra = api_force_local_unless_imm(g, &a, ty); + if (api_can_delay_int_arith(g, ty, flags) && ra.kind == OPK_LOCAL) { + int a_owned = api_sv_owns_operand_local(&a, &ra); api_push(g, api_make_arith_unop(iop, ra, ty, a_owned)); if (a_owned) a.res = RES_INHERENT; api_release(g, &a); return; } - rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - dst = api_op_reg(rr, ty); + rr = api_alloc_temp_local(g, ty); + dst = api_op_local(rr, ty); T->unop(T, iop, dst, ra); api_release(g, &a); api_push(g, api_make_sv(dst, ty)); @@ -191,11 +154,11 @@ void api_cg_unop(CfreeCg* g, UnOp iop, u32 flags) { void api_cg_cmp(CfreeCg* g, CmpOp cop) { ApiSValue b, a; - CGTarget* T; + CgTarget* T; CfreeCgTypeId opty; CfreeCgTypeId i32; Operand ra, rb; - Reg rr; + CGLocal rr; Operand dst; i64 folded; if (!g) return; @@ -205,48 +168,6 @@ void api_cg_cmp(CfreeCg* g, CmpOp cop) { opty = a.type ? a.type : b.type; i32 = builtin_id(CFREE_CG_BUILTIN_I32); - if (api_is_i128_type(g->c, opty)) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId ps[2] = {i128, i128}; - ApiSValue args[2]; - CmpOp icmp = CMP_EQ; - const char* name = - api_i128_cmp_is_unsigned(cop) ? "__cfree_ucmpti2" : "__cfree_cmpti2"; - switch (cop) { - case CMP_EQ: - icmp = CMP_EQ; - break; - case CMP_NE: - icmp = CMP_NE; - break; - case CMP_LT_S: - case CMP_LT_U: - icmp = CMP_LT_S; - break; - case CMP_LE_S: - case CMP_LE_U: - icmp = CMP_LE_S; - break; - case CMP_GT_S: - case CMP_GT_U: - icmp = CMP_GT_S; - break; - case CMP_GE_S: - case CMP_GE_U: - icmp = CMP_GE_S; - break; - default: - icmp = CMP_EQ; - break; - } - args[0] = a; - args[1] = b; - api_runtime_call_values(g, name, i32, ps, 2, args); - cfree_cg_push_int(g, 0, i32); - api_cg_cmp(g, icmp); - return; - } - if (api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) && api_try_fold_int_cmp(g, cop, opty, a.op.v.imm, b.op.v.imm, &folded)) { api_release(g, &a); @@ -255,15 +176,15 @@ void api_cg_cmp(CfreeCg* g, CmpOp cop) { return; } - ra = api_force_reg_unless_imm(g, &a, opty); - rb = api_force_reg_unless_imm(g, &b, opty); - if (api_type_class(opty) != RC_FP) { - api_push(g, api_make_cmp(cop, ra, rb, i32, api_sv_owns_operand_reg(&a, &ra), - api_sv_owns_operand_reg(&b, &rb))); + ra = api_force_local_unless_imm(g, &a, opty); + rb = api_force_local_unless_imm(g, &b, opty); + if (!api_type_is_float(g->c, opty)) { + api_push(g, api_make_cmp(cop, ra, rb, i32, api_sv_owns_operand_local(&a, &ra), + api_sv_owns_operand_local(&b, &rb))); return; } - rr = api_alloc_reg_or_spill(g, RC_INT, i32); - dst = api_op_reg(rr, i32); + rr = api_alloc_temp_local(g, i32); + dst = api_op_local(rr, i32); T->cmp(T, cop, dst, ra, rb); api_release(g, &a); api_release(g, &b); @@ -272,11 +193,11 @@ void api_cg_cmp(CfreeCg* g, CmpOp cop) { void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { ApiSValue v; - CGTarget* T; + CgTarget* T; CfreeCgTypeId sty; CfreeCgTypeId dty; Operand src; - Reg rr; + CGLocal rr; Operand dst; if (!g) return; T = g->target; @@ -295,131 +216,6 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { api_push(g, v); return; } - if (api_is_i128_type(g->c, sty) && api_type_is_bool(g->c, dty) && - ck != CV_BITCAST) { - CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); - CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); - CfreeCgTypeId ps[2] = {i128, i128}; - ApiSValue args[2]; - ApiSValue r; - args[0] = v; - args[1] = api_make_sv(api_op_imm(0, i128), i128); - api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args); - cfree_cg_push_int(g, 0, i32); - api_cg_cmp(g, CMP_NE); - r = api_pop(g); - r.type = dty; - r.op.type = dty; - api_push(g, r); - return; - } - if (api_is_i128_type(g->c, dty) && !api_is_i128_type(g->c, sty) && - ck != CV_BITCAST) { - u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); - CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); - FrameSlot slot = api_f128_temp_slot(g, dty); - Operand dst_lv = api_op_local(slot, dty); - if (api_sv_op_is(&v, OPK_IMM)) { - u8 bytes[16]; - u64 lo = (u64)v.op.v.imm; - u64 hi = 0; - if (ck == CV_SEXT && sz <= 8) { - u32 bits = sz * 8u; - u64 mask = bits >= 64u ? ~(u64)0 : ((1ull << bits) - 1ull); - u64 sign = 1ull << (bits - 1u); - u64 u = lo & mask; - if (u & sign) u |= ~mask; - lo = u; - hi = (u & (1ull << 63)) ? ~(u64)0 : 0; - } - memset(bytes, 0, sizeof bytes); - for (u32 i = 0; i < 8; ++i) { - u32 lo_idx = g->c->target.big_endian ? 15u - i : i; - u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; - bytes[lo_idx] = (u8)(lo >> (i * 8u)); - bytes[hi_idx] = (u8)(hi >> (i * 8u)); - } - api_store_f128_bytes(g, slot, dty, bytes); - api_release(g, &v); - api_push(g, api_make_lv(dst_lv, dty)); - return; - } - { - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty); - CfreeCgTypeId src_ty = sty; - Operand src = api_force_reg(g, &v, sty); - Operand low = src; - Operand base; - Reg low_tmp = REG_NONE; - Reg ar; - MemAccess ma; - memset(&ma, 0, sizeof ma); - ma.type = i64_ty; - ma.size = 8; - ma.align = 8; - if (sz < 8 && ck == CV_SEXT) { - low_tmp = api_alloc_reg_or_spill(g, RC_INT, i64_ty); - low = api_op_reg(low_tmp, i64_ty); - T->convert(T, CV_SEXT, low, src); - src_ty = i64_ty; - } else { - low.type = i64_ty; - } - if (ck != CV_SEXT && !g->c->target.big_endian) { - T->store(T, dst_lv, low, ma); - if (low_tmp != REG_NONE) api_free_reg(g, low_tmp, RC_INT); - api_release(g, &v); - ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - base = api_op_reg(ar, ptr_ty); - T->addr_of(T, base, dst_lv); - T->store(T, api_op_indirect(ar, 8, i64_ty), api_op_imm(0, i64_ty), ma); - api_free_reg(g, ar, RC_INT); - api_push(g, api_make_lv(dst_lv, dty)); - return; - } - ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - base = api_op_reg(ar, ptr_ty); - T->addr_of(T, base, dst_lv); - T->store(T, api_op_indirect(ar, 0, i64_ty), low, ma); - if (ck == CV_SEXT) { - Reg hr = api_alloc_reg_or_spill(g, RC_INT, i64_ty); - Operand high = api_op_reg(hr, i64_ty); - T->binop(T, BO_SHR_S, high, low, api_op_imm(63, i64_ty)); - T->store(T, api_op_indirect(ar, 8, i64_ty), high, ma); - api_free_reg(g, hr, RC_INT); - } else { - T->store(T, api_op_indirect(ar, 8, i64_ty), api_op_imm(0, i64_ty), ma); - } - if (low_tmp != REG_NONE) api_free_reg(g, low_tmp, RC_INT); - (void)src_ty; - api_free_reg(g, ar, RC_INT); - api_release(g, &v); - api_push(g, api_make_lv(dst_lv, dty)); - } - return; - } - if (api_is_i128_type(g->c, sty) && !api_is_i128_type(g->c, dty) && - ck == CV_TRUNC && abi_cg_sizeof(g->c->abi, dty) <= 8) { - Reg rr = api_alloc_reg_or_spill(g, RC_INT, dty); - Operand dst = api_op_reg(rr, dty); - if (api_is_lvalue_sv(&v) || v.op.kind == OPK_LOCAL || - v.op.kind == OPK_INDIRECT || v.op.kind == OPK_GLOBAL) { - ApiSValue lv = v; - lv.lvalue = 1; - T->load(T, dst, lv.op, api_mem_for_lvalue(g, &lv.op, dty)); - } else if (v.op.kind == OPK_IMM) { - T->load_imm(T, dst, v.op.v.imm); - } else if (v.op.kind == OPK_REG) { - Operand src_addr = api_op_indirect(v.op.v.reg, 0, dty); - T->load(T, dst, src_addr, api_mem_for_lvalue(g, &src_addr, dty)); - } else { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: unsupported i128 truncation source"); - } - api_release(g, &v); - api_push(g, api_make_sv(dst, dty)); - return; - } if (api_sv_op_is(&v, OPK_IMM)) { i64 folded; if (api_try_fold_int_convert(g, ck, sty, dty, v.op.v.imm, &folded)) { @@ -428,19 +224,11 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { return; } } - if (ck == CV_BITCAST && - abi_cg_sizeof(g->c->abi, sty) == abi_cg_sizeof(g->c->abi, dst_type) && - api_type_class(sty) == api_type_class(dty)) { - v.type = dty; - v.op.type = dty; - api_push(g, v); - return; - } if (ck == CV_BITCAST && abi_cg_sizeof(g->c->abi, sty) == 16 && abi_cg_sizeof(g->c->abi, dty) == 16 && (api_is_f128_type(g->c, sty) || api_is_f128_type(g->c, dty))) { - FrameSlot slot = api_f128_temp_slot(g, dty); - Operand dst_lv = api_op_local(slot, dty); + CGLocal local = api_f128_temp_local(g, dty); + Operand dst_lv = api_op_local(local, dty); if (api_is_lvalue_sv(&v) || v.op.kind == OPK_LOCAL || v.op.kind == OPK_INDIRECT || v.op.kind == OPK_GLOBAL) { CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty); @@ -459,9 +247,9 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { agg.size = 16; agg.align = 16; g->target->copy_bytes(g->target, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); - } else if (v.op.kind == OPK_REG) { + api_release_temp_local(g, dst_addr.v.local); + api_release_temp_local(g, src_addr.v.local); + } else if (v.op.kind == OPK_LOCAL) { g->target->store(g->target, dst_lv, v.op, api_mem_for_lvalue(g, &dst_lv, sty)); } else if (v.op.kind == OPK_IMM) { @@ -472,7 +260,7 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { u32 idx = g->c->target.big_endian ? 15u - i : i; bytes[idx] = (u8)(lo >> (i * 8u)); } - api_store_f128_bytes(g, slot, dty, bytes); + api_store_f128_bytes(g, local, dty, bytes); } else { compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported 16-byte bitcast source"); @@ -482,9 +270,9 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { return; } - src = api_force_reg(g, &v, sty); - rr = api_alloc_reg_or_spill(g, api_type_class(dty), dty); - dst = api_op_reg(rr, dty); + src = api_force_local(g, &v, sty); + rr = api_alloc_temp_local(g, dty); + dst = api_op_local(rr, dty); T->convert(T, ck, dst, src); api_release(g, &v); api_push(g, api_make_sv(dst, dty)); @@ -871,7 +659,7 @@ int api_intrinsic_is_overflow(CfreeCgIntrinsic intrin) { void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, CfreeCgTypeId result_type) { - CGTarget* T; + CgTarget* T; CfreeCgTypeId rty; CfreeCgTypeId int_ty; IntrinKind kind; @@ -908,7 +696,7 @@ void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, intrin == CFREE_CG_INTRIN_PREFETCH)) { args[idx] = svs[idx].op; } else { - args[idx] = api_force_reg(g, &svs[idx], aty); + args[idx] = api_force_local(g, &svs[idx], aty); } } } @@ -916,14 +704,14 @@ void cfree_cg_intrinsic(CfreeCg* g, CfreeCgIntrinsic intrin, uint32_t nargs, if (api_intrinsic_is_overflow(intrin)) { CfreeCgTypeId vty = rty ? rty : (nargs ? api_sv_type(&svs[0]) : int_ty); CfreeCgTypeId bool_ty = builtin_id(CFREE_CG_BUILTIN_BOOL); - Reg rr = api_alloc_reg_or_spill(g, api_type_class(vty), vty); - Reg ok = api_alloc_reg_or_spill(g, RC_INT, bool_ty); - dsts[0] = api_op_reg(rr, vty); - dsts[1] = api_op_reg(ok, bool_ty); + CGLocal rr = api_alloc_temp_local(g, vty); + CGLocal ok = api_alloc_temp_local(g, bool_ty); + dsts[0] = api_op_local(rr, vty); + dsts[1] = api_op_local(ok, bool_ty); ndst = 2; } else if (!api_intrinsic_is_void(intrin) && !cg_type_is_void(g->c, rty)) { - Reg rr = api_alloc_reg_or_spill(g, api_type_class(rty), rty); - dsts[0] = api_op_reg(rr, rty); + CGLocal rr = api_alloc_temp_local(g, rty); + dsts[0] = api_op_local(rr, rty); ndst = 1; } diff --git a/src/cg/asm.c b/src/cg/asm.c @@ -29,20 +29,16 @@ int api_asm_is_early_clobber(const char* s) { return (s[0] == '=' && s[1] == '&') || s[0] == '&'; } -void api_asm_spill_sv(CfreeCg* g, ApiSValue* sv, Reg phys, RegClass cls) { - FrameSlot slot = api_take_spill_slot(g, cls); - Operand victim_reg = api_op_reg(phys, api_owned_reg_type(g, sv)); - g->target->spill_reg(g->target, victim_reg, slot, api_mem_for_spill(g, sv)); - api_free_reg(g, phys, cls); - sv->spill_slot = slot; - sv->res = RES_SPILLED; - api_set_owned_reg(sv, (Reg)REG_NONE); +void api_asm_memory_clobber_sv(CfreeCg* g, ApiSValue* sv, CGLocal local_id) { + (void)g; + (void)sv; + (void)local_id; } void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { static const char* const match_strs[10] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; - CGTarget* T; + CgTarget* T; Heap* h; CfreeCgTypeId fallback_ty; AsmConstraint* outs; @@ -51,7 +47,7 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { ApiSValue* in_svs; Operand* in_ops; Operand* out_ops; - u8* out_reg_owned; + u8* out_local_owned; const char* tmpl_str; Sym sym_memory; int has_memory_clobber; @@ -80,7 +76,7 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { in_svs = NULL; in_ops = NULL; out_ops = NULL; - out_reg_owned = NULL; + out_local_owned = NULL; if (noutputs) { outs = (AsmConstraint*)h->alloc(h, sizeof(*outs) * noutputs, @@ -104,8 +100,8 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { out_ops = (Operand*)h->alloc(h, sizeof(*out_ops) * noutputs, _Alignof(Operand)); memset(out_ops, 0, sizeof(*out_ops) * noutputs); - out_reg_owned = (u8*)h->alloc(h, noutputs, 1); - memset(out_reg_owned, 0, noutputs); + out_local_owned = (u8*)h->alloc(h, noutputs, 1); + memset(out_local_owned, 0, noutputs); } total_inputs = ninputs + ninout; @@ -137,7 +133,7 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { for (u32 i = 0; i < total_inputs; ++i) { u32 idx = total_inputs - 1u - i; in_svs[idx] = api_pop(g); - api_ensure_reg(g, &in_svs[idx]); + api_ensure_local(g, &in_svs[idx]); } } @@ -151,9 +147,9 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { if (api_asm_is_early_clobber(outs[i].str)) continue; if (body[0] == 'r') { CfreeCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; - Reg r = api_alloc_reg_or_spill(g, api_type_class(oty), oty); - out_ops[i] = api_op_reg(r, oty); - out_reg_owned[i] = 1; + CGLocal r = api_alloc_temp_local(g, oty); + out_ops[i] = api_op_local(r, oty); + out_local_owned[i] = 1; } else { compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported asm output constraint"); @@ -177,17 +173,17 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { continue; } bound = out_ops[matched]; - if (api_sv_op_is(&in_svs[i], OPK_REG) && - in_svs[i].op.v.reg == bound.v.reg) { + if (api_sv_op_is(&in_svs[i], OPK_LOCAL) && + in_svs[i].op.v.local == bound.v.local) { } else if (api_sv_op_is(&in_svs[i], OPK_IMM)) { T->load_imm(T, bound, in_svs[i].op.v.imm); } else { - Operand src = api_force_reg(g, &in_svs[i], ity); + Operand src = api_force_local(g, &in_svs[i], ity); T->copy(T, bound, src); } in_ops[i] = bound; } else if (s[0] == 'r') { - in_ops[i] = api_force_reg(g, &in_svs[i], ity); + in_ops[i] = api_force_local(g, &in_svs[i], ity); } else if (s[0] == 'i') { if (!api_sv_op_is(&in_svs[i], OPK_IMM)) { compiler_panic(g->c, g->cur_loc, @@ -201,8 +197,8 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { CfreeCgTypeId pty = cg_type_ptr_to(g->c, ity ? ity : builtin_id(CFREE_CG_BUILTIN_VOID)); Operand dst = api_lvalue_addr(g, &in_svs[i], pty); - in_svs[i].op = api_op_indirect(dst.v.reg, 0, ity); - in_svs[i].res = RES_REG; + in_svs[i].op = api_op_indirect(dst.v.local, 0, ity); + in_svs[i].res = RES_LOCAL; in_ops[i] = in_svs[i].op; } else { compiler_panic(g->c, g->cur_loc, @@ -217,7 +213,7 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { for (u32 i = 0; i < noutputs; ++i) { const char* body; CfreeCgTypeId oty; - Reg r; + CGLocal r; if (!api_asm_is_early_clobber(outs[i].str)) continue; body = api_asm_constraint_body(outs[i].str); if (body[0] != 'r') { @@ -226,16 +222,16 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { continue; } oty = outs[i].type ? outs[i].type : fallback_ty; - r = api_alloc_reg_or_spill(g, api_type_class(oty), oty); + r = api_alloc_temp_local(g, oty); for (u32 k = 0; k < total_inputs; ++k) { - if ((in_ops[k].kind == OPK_REG && in_ops[k].v.reg == r) || + if ((in_ops[k].kind == OPK_LOCAL && in_ops[k].v.local == r) || (in_ops[k].kind == OPK_INDIRECT && in_ops[k].v.ind.base == r)) { compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm early-clobber register collision"); + "CfreeCg: asm early-clobber local collision"); } } - out_ops[i] = api_op_reg(r, oty); - out_reg_owned[i] = 1; + out_ops[i] = api_op_local(r, oty); + out_local_owned[i] = 1; } sym_memory = pool_intern_slice(g->c->global, SLICE_LIT("memory")); @@ -246,39 +242,10 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { if (has_memory_clobber) { for (u32 i = 0; i < g->sp; ++i) { ApiSValue* sv = &g->stack[i]; - Reg phys; - RegClass cls; - if (sv->res != RES_REG) continue; - phys = api_reg_of_sv(sv); - cls = (RegClass)api_class_of_sv(sv); - api_asm_spill_sv(g, sv, phys, cls); - } - } else if (T->resolve_reg_name) { - for (u32 i = 0; i < nclobbers; ++i) { - Reg phys; - RegClass cls; - if (T->resolve_reg_name(T, clobs[i], &phys, &cls) != 0) continue; - for (u32 k = 0; k < noutputs; ++k) { - if (out_ops[k].kind == OPK_REG && out_ops[k].cls == cls && - (Reg)out_ops[k].v.reg == phys) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm clobber overlaps output"); - } - } - for (u32 k = 0; k < total_inputs; ++k) { - if (in_ops[k].kind == OPK_REG && in_ops[k].cls == cls && - (Reg)in_ops[k].v.reg == phys) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: asm clobber overlaps input"); - } - } - for (u32 k = 0; k < g->sp; ++k) { - ApiSValue* sv = &g->stack[k]; - if (sv->res != RES_REG) continue; - if (api_class_of_sv(sv) != (u8)cls) continue; - if ((Reg)api_reg_of_sv(sv) != phys) continue; - api_asm_spill_sv(g, sv, phys, cls); - } + CGLocal local_id; + if (sv->res != RES_LOCAL) continue; + local_id = api_local_of_sv(sv); + api_asm_memory_clobber_sv(g, sv, local_id); } } @@ -289,7 +256,7 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { for (u32 i = 0; i < noutputs; ++i) { CfreeCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; ApiSValue sv = api_make_sv(out_ops[i], oty); - if (!out_reg_owned[i] && sv.res == RES_REG) sv.res = RES_INHERENT; + if (!out_local_owned[i] && sv.res == RES_LOCAL) sv.res = RES_INHERENT; api_push(g, sv); } @@ -299,35 +266,19 @@ void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block) { if (in_svs) h->free(h, in_svs, sizeof(*in_svs) * total_inputs); if (in_ops) h->free(h, in_ops, sizeof(*in_ops) * total_inputs); if (out_ops) h->free(h, out_ops, sizeof(*out_ops) * noutputs); - if (out_reg_owned) h->free(h, out_reg_owned, noutputs); + if (out_local_owned) h->free(h, out_local_owned, noutputs); } void cfree_cg_file_scope_asm(CfreeCg* g, CfreeSlice asm_source) { - AsmLexer* lex; if (!g || !asm_source.s) return; if (g->check_only) return; - /* Target-level override (e.g. wasm) takes precedence — those backends - * have no native asm parser and diagnose explicitly. */ if (g->target && g->target->file_scope_asm) { api_local_const_memory_boundary(g); g->target->file_scope_asm(g->target, asm_source.s, asm_source.len); return; } - /* The C-source backend bypasses MCEmitter entirely; file-scope asm has - * no portable C source equivalent (Phase 4 territory — see - * doc/CBACKEND.md). Panic with the same shape as other unimplemented - * C-target paths so the test harness recognizes the diagnostic. */ - if (!g->mc) { - compiler_panic(g->c, api_no_loc(), - "C target: file-scope asm not yet supported"); - } - api_local_const_memory_boundary(g); - lex = - asm_lex_open_mem(g->c, "<file-scope asm>", asm_source.s, asm_source.len); - if (!lex) - compiler_panic(g->c, api_no_loc(), "CfreeCg: file-scope asm out of memory"); - asm_parse(g->c, lex, g->mc); - asm_lex_close(lex); + compiler_panic(g->c, api_no_loc(), + "CfreeCg: file-scope asm requires target support"); } /* ============================================================ diff --git a/src/cg/atomic.c b/src/cg/atomic.c @@ -37,7 +37,7 @@ void cfree_cg_atomic_load(CfreeCg* g, CfreeCgMemAccess access, ApiSValue ptr; CfreeCgTypeId pty, val_ty; Operand addr, dst; - Reg rr; + CGLocal rr; if (!g) return; api_local_const_memory_boundary(g); ptr = api_pop(g); @@ -45,9 +45,9 @@ void cfree_cg_atomic_load(CfreeCg* g, CfreeCgMemAccess access, val_ty = resolve_type(g->c, access.type); if (!val_ty) val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_load"); api_require_pointer_value(g, "atomic_load pointer", pty); - addr = api_force_reg(g, &ptr, pty); - rr = api_alloc_reg_or_spill(g, api_type_class(val_ty), val_ty); - dst = api_op_reg(rr, val_ty); + addr = api_force_local(g, &ptr, pty); + rr = api_alloc_temp_local(g, val_ty); + dst = api_op_local(rr, val_ty); g->target->atomic_load(g->target, dst, addr, api_mem_for_atomic(g, val_ty), api_map_mem_order(order)); api_release(g, &ptr); @@ -68,8 +68,8 @@ void cfree_cg_atomic_store(CfreeCg* g, CfreeCgMemAccess access, if (!val_ty) val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_store"); api_require_pointer_value(g, "atomic_store pointer", pty); api_validate_memory_value(g, "atomic_store", val_ty, api_sv_type(&val)); - addr = api_force_reg(g, &ptr, pty); - src = api_sv_op_is_reg_or_imm(&val) ? val.op : api_force_reg(g, &val, val_ty); + addr = api_force_local(g, &ptr, pty); + src = api_sv_op_is_local_or_imm(&val) ? val.op : api_force_local(g, &val, val_ty); g->target->atomic_store(g->target, addr, src, api_mem_for_atomic(g, val_ty), api_map_mem_order(order)); api_release(g, &val); @@ -81,7 +81,7 @@ void cfree_cg_atomic_rmw(CfreeCg* g, CfreeCgMemAccess access, ApiSValue val, ptr; CfreeCgTypeId pty, val_ty; Operand addr, vop, dst; - Reg rr; + CGLocal rr; if (!g) return; api_local_const_memory_boundary(g); val = api_pop(g); @@ -91,10 +91,10 @@ void cfree_cg_atomic_rmw(CfreeCg* g, CfreeCgMemAccess access, if (!val_ty) val_ty = api_atomic_pointee(g, pty, "CfreeCg: atomic_rmw"); api_require_pointer_value(g, "atomic_rmw pointer", pty); api_validate_memory_value(g, "atomic_rmw", val_ty, api_sv_type(&val)); - addr = api_force_reg(g, &ptr, pty); - vop = api_sv_op_is_reg_or_imm(&val) ? val.op : api_force_reg(g, &val, val_ty); - rr = api_alloc_reg_or_spill(g, api_type_class(val_ty), val_ty); - dst = api_op_reg(rr, val_ty); + addr = api_force_local(g, &ptr, pty); + vop = api_sv_op_is_local_or_imm(&val) ? val.op : api_force_local(g, &val, val_ty); + rr = api_alloc_temp_local(g, val_ty); + dst = api_op_local(rr, val_ty); g->target->atomic_rmw(g->target, api_map_atomic_op(op), dst, addr, vop, api_mem_for_atomic(g, val_ty), api_map_mem_order(order)); @@ -103,34 +103,13 @@ void cfree_cg_atomic_rmw(CfreeCg* g, CfreeCgMemAccess access, api_push(g, api_make_sv(dst, val_ty)); } -int api_take_dead_owned_reg(ApiSValue* sv, u8 cls, Reg avoid, Reg* out) { - Reg r; - if (sv->res != RES_REG || sv->pinned) return 0; - if (api_class_of_sv(sv) != cls) return 0; - r = api_reg_of_sv(sv); - if (r == (Reg)REG_NONE || r == avoid) return 0; - sv->res = RES_INHERENT; - *out = r; - return 1; -} - -Reg api_alloc_dead_input_or_spill(CfreeCg* g, ApiSValue* a, ApiSValue* b, - ApiSValue* c, u8 cls, CfreeCgTypeId ty, - Reg avoid) { - Reg r; - if (api_take_dead_owned_reg(a, cls, avoid, &r)) return r; - if (api_take_dead_owned_reg(b, cls, avoid, &r)) return r; - if (api_take_dead_owned_reg(c, cls, avoid, &r)) return r; - return api_alloc_reg_or_spill(g, cls, ty); -} - void cfree_cg_atomic_cmpxchg(CfreeCg* g, CfreeCgMemAccess access, CfreeCgMemOrder success, CfreeCgMemOrder failure, int weak) { ApiSValue desired, expected, ptr; CfreeCgTypeId pty, val_ty, bool_ty; Operand addr, exp_op, des_op, prior, ok; - Reg pr, kr; + CGLocal pr, kr; if (!g) return; api_local_const_memory_boundary(g); (void)weak; @@ -145,21 +124,18 @@ void cfree_cg_atomic_cmpxchg(CfreeCg* g, CfreeCgMemAccess access, api_sv_type(&expected)); api_validate_memory_value(g, "atomic_cmpxchg desired", val_ty, api_sv_type(&desired)); - addr = api_force_reg(g, &ptr, pty); - exp_op = api_sv_op_is_reg_or_imm(&expected) + addr = api_force_local(g, &ptr, pty); + exp_op = api_sv_op_is_local_or_imm(&expected) ? expected.op - : api_force_reg(g, &expected, val_ty); - des_op = api_sv_op_is_reg_or_imm(&desired) + : api_force_local(g, &expected, val_ty); + des_op = api_sv_op_is_local_or_imm(&desired) ? desired.op - : api_force_reg(g, &desired, val_ty); + : api_force_local(g, &desired, val_ty); bool_ty = builtin_id(CFREE_CG_BUILTIN_BOOL); - pr = api_alloc_dead_input_or_spill(g, &ptr, &expected, &desired, - api_type_class(val_ty), val_ty, - (Reg)REG_NONE); - kr = api_alloc_dead_input_or_spill(g, &ptr, &expected, &desired, RC_INT, - bool_ty, pr); - prior = api_op_reg(pr, val_ty); - ok = api_op_reg(kr, bool_ty); + pr = api_alloc_temp_local(g, val_ty); + kr = api_alloc_temp_local(g, bool_ty); + prior = api_op_local(pr, val_ty); + ok = api_op_local(kr, bool_ty); g->target->atomic_cas(g->target, prior, ok, addr, exp_op, des_op, api_mem_for_atomic(g, val_ty), api_map_mem_order(success), api_map_mem_order(failure)); diff --git a/src/cg/call.c b/src/cg/call.c @@ -1,150 +1,110 @@ #include "cg/internal.h" -CGABIValue* api_alloc_call_args(CfreeCg* g, u32 nargs) { - CGABIValue* avs = NULL; - if (nargs) { - avs = arena_array(g->c->tu, CGABIValue, nargs); - memset(avs, 0, sizeof(CGABIValue) * nargs); - } - g->avs_in_flight = avs; - g->avs_in_flight_n = nargs; - return avs; +static u32 api_func_nparams(CfreeCg* g, CfreeCgTypeId fty) { + const CgType* ty = cg_type_get(g->c, api_unalias_type(g->c, fty)); + if (!ty || ty->kind != CFREE_CG_TYPE_FUNC) return 0; + return ty->func.nparams; } -void api_pack_call_arg(CfreeCg* g, CGABIValue* av, CfreeCgTypeId fty, - const ABIFuncInfo* abi, u32 idx) { - ApiSValue arg = api_pop(g); - int is_vararg = (idx >= abi->nparams); - CfreeCgTypeId aty = is_vararg ? (arg.type ? arg.type : api_sv_type(&arg)) - : cg_type_func_param_id(g->c, fty, idx); - if (!aty) aty = arg.type; - - av->type = aty; - av->abi = is_vararg ? NULL : &abi->params[idx]; +CGLocal* api_alloc_call_args(CfreeCg* g, u32 nargs) { + if (!nargs) return NULL; + CGLocal* args = arena_array(g->c->tu, CGLocal, nargs); + memset(args, 0, sizeof(CGLocal) * nargs); + return args; +} - if (api_is_wide16_scalar_type(g->c, aty)) { - ApiSValue lv = api_wide16_materialize_lvalue(g, &arg, aty); - av->storage = lv.op; - av->storage.type = aty; - av->size = 16; - } else if (cg_type_is_aggregate(g->c, aty)) { - api_ensure_reg(g, &arg); - Operand st = arg.op; - if (!api_is_lvalue_sv(&arg) && st.kind == OPK_REG) { - st = api_op_indirect(st.v.reg, 0, aty); +static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg, + CfreeCgTypeId ty) { + if (cg_type_is_aggregate(g->c, ty)) { + if (api_is_lvalue_sv(arg) && arg->op.kind == OPK_LOCAL) { + if (api_unalias_type(g->c, arg->op.type) == api_unalias_type(g->c, ty)) + return arg->op.v.local; } - st.type = aty; - av->storage = st; - av->size = abi_cg_sizeof(g->c->abi, aty); - } else { - api_ensure_reg(g, &arg); - av->storage = (api_is_lvalue_sv(&arg) || arg.op.kind == OPK_GLOBAL) - ? api_force_reg(g, &arg, aty) - : arg.op; + CGLocal r = api_alloc_temp_local(g, ty); + Operand dst = api_op_local(r, ty); + MemAccess ma; + memset(&ma, 0, sizeof ma); + ma.type = ty; + ma.size = abi_cg_sizeof(g->c->abi, ty); + ma.align = abi_cg_alignof(g->c->abi, ty); + if (api_is_lvalue_sv(arg) && api_operand_can_address(&arg->op)) { + g->target->load(g->target, dst, arg->op, ma); + } else if (arg->op.kind == OPK_GLOBAL || arg->op.kind == OPK_INDIRECT) { + g->target->load(g->target, dst, arg->op, ma); + } else if (arg->op.kind == OPK_LOCAL) { + g->target->load(g->target, dst, api_op_indirect(arg->op.v.local, 0, ty), + ma); + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: aggregate call argument is not addressable"); + } + return r; + } + CfreeCgTypeId src_ty = api_sv_type(arg); + Operand op = api_force_local_unless_imm(g, arg, src_ty); + if (op.kind == OPK_LOCAL && + api_unalias_type(g->c, op.type) == api_unalias_type(g->c, ty)) { + return op.v.local; } -} -void api_alloc_call_ret_storage(CfreeCg* g, CGTarget* T, CfreeCgTypeId ret_ty, - Operand* out) { - if (api_arg_storage_must_be_addr(g->c, ret_ty)) { - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.type = ret_ty; - fsd.size = abi_cg_sizeof(g->c->abi, ret_ty); - fsd.align = abi_cg_alignof(g->c->abi, ret_ty); - fsd.kind = FS_LOCAL; - fsd.flags = FSF_ADDR_TAKEN; - FrameSlot slot = T->frame_slot(T, &fsd); - *out = api_op_local(slot, ret_ty); + CGLocal r = api_alloc_temp_local(g, ty); + Operand dst = api_op_local(r, ty); + if (op.kind == OPK_IMM) { + g->target->load_imm(g->target, dst, op.v.imm); + } else if (op.kind == OPK_LOCAL) { + g->target->copy(g->target, dst, op); } else { - Reg r = api_alloc_reg_or_spill(g, api_type_class(ret_ty), ret_ty); - *out = api_op_reg(r, ret_ty); + compiler_panic(g->c, g->cur_loc, + "CfreeCg: scalar call argument is not materialized"); } + api_release(g, arg); + return r; +} + +void api_pack_call_arg(CfreeCg* g, CGLocal* out, CfreeCgTypeId fty, u32 idx) { + ApiSValue arg = api_pop(g); + u32 nfixed = api_func_nparams(g, fty); + CfreeCgTypeId aty = idx >= nfixed ? api_sv_type(&arg) + : cg_type_func_param_id(g->c, fty, idx); + if (!aty) aty = api_sv_type(&arg); + *out = api_materialize_call_local(g, &arg, aty); +} + +CGLocal api_alloc_call_result(CfreeCg* g, CfreeCgTypeId ret_ty) { + return api_alloc_temp_local(g, ret_ty); } -void api_release_call_args(CfreeCg* g, CGABIValue* avs, u32 nargs) { +void api_release_call_args(CfreeCg* g, CGLocal* args, u32 nargs) { for (u32 i = 0; i < nargs; ++i) { - api_release_arg_storage(g, &avs[i].storage); + if (args[i] != CG_LOCAL_NONE) api_release_temp_local(g, args[i]); } - g->avs_in_flight = NULL; - g->avs_in_flight_n = 0; } -void api_push_call_result(CfreeCg* g, Operand ret_storage, - CfreeCgTypeId ret_ty) { - if (ret_storage.kind == OPK_LOCAL || ret_storage.kind == OPK_GLOBAL || - ret_storage.kind == OPK_INDIRECT) { - api_push(g, api_make_lv(ret_storage, ret_ty)); +void api_push_call_result(CfreeCg* g, CGLocal result, CfreeCgTypeId ret_ty) { + Operand op = api_op_local(result, ret_ty); + if (cg_type_is_aggregate(g->c, ret_ty) || api_is_wide16_scalar_type(g->c, ret_ty)) { + api_push(g, api_make_lv(op, ret_ty)); } else { - api_push(g, api_make_sv(ret_storage, ret_ty)); + api_push(g, api_make_sv(op, ret_ty)); } } -static void api_spill_call_clobbered_stack(CfreeCg* g, const CGCallDesc* d) { - CGTarget* T = g->target; - u32 masks[3]; - if (cg_simple_regalloc_is_virtual(&g->regalloc)) return; - if (!T->call_clobber_mask) return; - for (u32 c = 0; c < 3u; ++c) { - masks[c] = T->call_clobber_mask(T, d, (RegClass)c); - } - for (u32 i = 0; i < g->sp; ++i) { - ApiSValue* sv = &g->stack[i]; - u8 cls; - Reg reg; - FrameSlot slot; - Operand src; - if (sv->res != RES_REG) continue; - cls = api_class_of_sv(sv); - if (cls >= 3u) continue; - reg = api_reg_of_sv(sv); - if (reg == (Reg)REG_NONE || reg >= 32u) continue; - if ((masks[cls] & (1u << reg)) == 0) continue; - if (sv->pinned) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: call clobbers pinned register %u class %u", - (unsigned)reg, (unsigned)cls); - continue; - } - slot = api_take_spill_slot(g, cls); - src = api_op_reg(reg, api_owned_reg_type(g, sv)); - T->spill_reg(T, src, slot, api_mem_for_spill(g, sv)); - api_free_reg(g, reg, cls); - api_set_owned_reg(sv, (Reg)REG_NONE); - sv->spill_slot = slot; - sv->res = RES_SPILLED; - } +static void api_call_clobber_boundary(CfreeCg* g, const CGCallDesc* d) { + (void)g; + (void)d; } -/* Return-ABI compatibility between the enclosing function and a tail callee. - * A tail call only makes sense when the callee's return value flows directly - * out of the caller, so the two return shapes must match. This is a frontend - * precondition; a mismatch is a frontend bug, not a fallback case. */ -static int api_tail_ret_compatible(const ABIArgInfo* caller, - const ABIArgInfo* callee) { - if (!caller || !callee) return 0; - if (caller->kind != callee->kind) return 0; - if (caller->kind == ABI_ARG_IGNORE || caller->kind == ABI_ARG_INDIRECT) - return 1; - if (caller->nparts != callee->nparts) return 0; - for (u16 i = 0; i < caller->nparts; ++i) { - if (caller->parts[i].cls != callee->parts[i].cls) return 0; - if (caller->parts[i].size != callee->parts[i].size) return 0; - } - return 1; +static int api_tail_ret_compatible(CfreeCg* g, CfreeCgTypeId callee_fn_type) { + CfreeCgTypeId callee_ret = cg_type_func_ret_id(g->c, callee_fn_type); + return api_unalias_type(g->c, g->fn_ret_type) == + api_unalias_type(g->c, callee_ret); } -/* Decide whether a requested tail call (policy ALLOWED or MUST) is emitted as - * a sibling call. Returns 1 to emit a tail/terminator, 0 for the ALLOWED - * fallback to an ordinary call. Aborts on a return-shape precondition - * violation (any policy) or an unrealizable MUST tail call. The target - * authors the realizability verdict and, when it blocks, the reason string. */ static int api_tail_decide(CfreeCg* g, const CGCallDesc* desc, CfreeCgTailPolicy policy) { - CGTarget* T = g->target; - const ABIArgInfo* caller_ret = g->fn_abi ? &g->fn_abi->ret : NULL; + CgTarget* T = g->target; const char* reason; - if (!api_tail_ret_compatible(caller_ret, &desc->abi->ret)) { + if (!api_tail_ret_compatible(g, desc->fn_type)) { compiler_panic(g->c, g->cur_loc, "tail call: callee return type is incompatible with the " "enclosing function's return type"); @@ -153,35 +113,49 @@ static int api_tail_decide(CfreeCg* g, const CGCallDesc* desc, reason = T->tail_call_unrealizable_reason ? T->tail_call_unrealizable_reason(T, desc) : "target does not support tail calls"; - if (!reason) return 1; /* realizable */ + if (!reason) return 1; if (policy == CFREE_CG_TAIL_MUST) { compiler_panic(g->c, g->cur_loc, "musttail call not realizable: %s", reason); return 0; } - return 0; /* ALLOWED: fall back to an ordinary call. */ + return 0; } -/* Emit the caller's return for an ALLOWED tail call that fell back to an - * ordinary call. A tail call is a terminator, so the frontend emits no ret of - * its own; the fallback must supply one that forwards the call result. */ static void api_tail_fallback_ret(CfreeCg* g, CfreeCgTypeId ret_ty) { if (cg_type_is_void(g->c, ret_ty)) - g->target->ret(g->target, NULL); + g->target->ret(g->target, NULL, 0); else - cfree_cg_ret(g); /* pops the result pushed by api_push_call_result */ + cfree_cg_ret(g); +} + +static void api_finish_call(CfreeCg* g, CGCallDesc* desc, CGLocal* args, + u32 nargs, Operand callee_op, ApiSValue* callee, + CfreeCgTypeId ret_ty, int has_result, + int want_tail, int emit_tail) { + if (emit_tail) api_temp_locals_finish(g); + if (!emit_tail) api_call_clobber_boundary(g, desc); + g->target->call(g->target, desc); + + api_release_call_args(g, args, nargs); + if (callee && callee->op.kind != OPK_GLOBAL) { + api_release_temp_local(g, callee_op.v.local); + } + if (has_result) api_push_call_result(g, desc->results[0], ret_ty); + if (want_tail && !emit_tail) api_tail_fallback_ret(g, ret_ty); } void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type, CfreeCgCallAttrs attrs) { - CGTarget* T; + CgTarget* T; CfreeCgTypeId fty; - const ABIFuncInfo* abi; CfreeCgTypeId ret_ty; int has_result; - CGABIValue* avs; + CGLocal* args; + CGLocal* results = NULL; CGCallDesc desc; ApiSValue callee; + Operand callee_op; int want_tail; int emit_tail; if (!g) return; @@ -191,7 +165,6 @@ void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type, T = g->target; fty = resolve_type(g->c, fn_type); if (!fty) return; - abi = abi_cg_func_info(g->c->abi, fty); ret_ty = cg_type_func_ret_id(g->c, fty); if (g->sp < (u32)nargs + 1u) { @@ -199,17 +172,17 @@ void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type, return; } - avs = api_alloc_call_args(g, nargs); + args = api_alloc_call_args(g, nargs); for (u32 i = 0; i < nargs; ++i) { u32 idx = nargs - 1u - i; - api_pack_call_arg(g, &avs[idx], fty, abi, idx); + api_pack_call_arg(g, &args[idx], fty, idx); } callee = api_pop(g); - api_ensure_reg(g, &callee); - Operand callee_op = (callee.op.kind == OPK_GLOBAL) - ? callee.op - : api_force_reg(g, &callee, fty); + api_ensure_local(g, &callee); + callee_op = (callee.op.kind == OPK_GLOBAL) + ? callee.op + : api_force_local(g, &callee, api_sv_type(&callee)); CfreeCgInlinePolicy inline_policy = attrs.inline_policy; if (inline_policy == CFREE_CG_INLINE_DEFAULT && callee_op.kind == OPK_GLOBAL && callee_op.v.global.addend == 0) { @@ -221,52 +194,35 @@ void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type, memset(&desc, 0, sizeof desc); desc.fn_type = fty; - desc.abi = abi; desc.callee = callee_op; - desc.args = avs; + desc.args = args; desc.nargs = nargs; desc.tail_policy = (u8)attrs.tail; desc.inline_policy = inline_policy; - desc.ret.type = ret_ty; - desc.ret.abi = &abi->ret; emit_tail = want_tail ? api_tail_decide(g, &desc, attrs.tail) : 0; has_result = !emit_tail && !cg_type_is_void(g->c, ret_ty); desc.flags = emit_tail ? CG_CALL_TAIL : CG_CALL_NONE; if (has_result) { - api_alloc_call_ret_storage(g, T, ret_ty, &desc.ret.storage); - } else { - desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); - } - - if (emit_tail) api_regalloc_finish(g); - if (!emit_tail) api_spill_call_clobbered_stack(g, &desc); - T->call(T, &desc); - - api_release_call_args(g, avs, nargs); - - if (callee.op.kind != OPK_GLOBAL) { - api_free_reg(g, callee_op.v.reg, RC_INT); - } - - if (has_result) { - api_push_call_result(g, desc.ret.storage, ret_ty); + results = arena_array(g->c->tu, CGLocal, 1); + results[0] = api_alloc_call_result(g, ret_ty); + desc.results = results; + desc.nresults = 1; } - /* ALLOWED tail call that could not be realized: forward the result via a - * synthesized return so the function still terminates correctly. */ - if (want_tail && !emit_tail) api_tail_fallback_ret(g, ret_ty); + (void)T; + api_finish_call(g, &desc, args, nargs, callee_op, &callee, ret_ty, has_result, + want_tail, emit_tail); } void api_call_symbol_common(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, CfreeCgCallAttrs attrs) { - CGTarget* T; CfreeCgTypeId fty; - const ABIFuncInfo* abi; CfreeCgTypeId ret_ty; int has_result; - CGABIValue* avs; + CGLocal* args; + CGLocal* results = NULL; CGCallDesc desc; Operand callee_op; CfreeCgInlinePolicy inline_policy; @@ -276,19 +232,17 @@ void api_call_symbol_common(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, api_local_const_memory_boundary(g); want_tail = attrs.tail == CFREE_CG_TAIL_ALLOWED || attrs.tail == CFREE_CG_TAIL_MUST; - T = g->target; fty = api_sym_type(g, sym); if (!fty) return; - abi = abi_cg_func_info(g->c->abi, fty); ret_ty = cg_type_func_ret_id(g->c, fty); if (g->sp < nargs) { compiler_panic(g->c, g->cur_loc, "CfreeCg: call stack underflow"); return; } - avs = api_alloc_call_args(g, nargs); + args = api_alloc_call_args(g, nargs); for (u32 i = 0; i < nargs; ++i) { u32 idx = nargs - 1u - i; - api_pack_call_arg(g, &avs[idx], fty, abi, idx); + api_pack_call_arg(g, &args[idx], fty, idx); } callee_op = api_op_global((ObjSymId)sym, 0, cg_type_ptr_to(g->c, fty)); inline_policy = attrs.inline_policy; @@ -299,32 +253,24 @@ void api_call_symbol_common(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, } memset(&desc, 0, sizeof desc); desc.fn_type = fty; - desc.abi = abi; desc.callee = callee_op; - desc.args = avs; + desc.args = args; desc.nargs = nargs; desc.tail_policy = (u8)attrs.tail; desc.inline_policy = inline_policy; - desc.ret.type = ret_ty; - desc.ret.abi = &abi->ret; emit_tail = want_tail ? api_tail_decide(g, &desc, attrs.tail) : 0; has_result = !emit_tail && !cg_type_is_void(g->c, ret_ty); desc.flags = emit_tail ? CG_CALL_TAIL : CG_CALL_NONE; if (has_result) { - api_alloc_call_ret_storage(g, T, ret_ty, &desc.ret.storage); - } else { - desc.ret.storage = api_op_imm(0, builtin_id(CFREE_CG_BUILTIN_VOID)); - } - if (emit_tail) api_regalloc_finish(g); - if (!emit_tail) api_spill_call_clobbered_stack(g, &desc); - T->call(T, &desc); - api_release_call_args(g, avs, nargs); - if (has_result) { - api_push_call_result(g, desc.ret.storage, ret_ty); + results = arena_array(g->c->tu, CGLocal, 1); + results[0] = api_alloc_call_result(g, ret_ty); + desc.results = results; + desc.nresults = 1; } - if (want_tail && !emit_tail) api_tail_fallback_ret(g, ret_ty); + api_finish_call(g, &desc, args, nargs, callee_op, NULL, ret_ty, has_result, + want_tail, emit_tail); } void cfree_cg_call_symbol(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, @@ -334,58 +280,29 @@ void cfree_cg_call_symbol(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, void cfree_cg_ret(CfreeCg* g) { ApiSValue v; - CGTarget* T; CfreeCgTypeId rty; - CGABIValue av; Operand ret_op; + CGLocal value; if (!g) return; - T = g->target; rty = g->fn_ret_type; if (cg_type_is_void(g->c, rty)) { - T->ret(T, NULL); + g->target->ret(g->target, NULL, 0); return; } v = api_pop(g); - memset(&av, 0, sizeof av); - av.type = rty; - av.abi = &g->fn_abi->ret; - int is_aggregate = cg_type_is_aggregate(g->c, rty); - if (is_aggregate) { - api_ensure_reg(g, &v); - av.storage = v.op; - if (!api_is_lvalue_sv(&v) && av.storage.kind == OPK_REG) { - av.storage = api_op_indirect(av.storage.v.reg, 0, rty); - } - av.storage.type = rty; - av.size = abi_cg_sizeof(g->c->abi, rty); - T->ret(T, &av); - return; - } - if (api_is_wide16_scalar_type(g->c, rty)) { - ApiSValue lv = api_wide16_materialize_lvalue(g, &v, rty); - av.storage = lv.op; - av.storage.type = rty; - av.size = 16; - T->ret(T, &av); - return; - } - if (api_sv_op_is(&v, OPK_IMM)) { - ret_op = v.op; - ret_op.type = rty; - av.storage = ret_op; - T->ret(T, &av); - api_release(g, &v); - return; + if (cg_type_is_aggregate(g->c, rty)) { + value = api_materialize_call_local(g, &v, rty); + } else { + ret_op = api_force_local(g, &v, rty); + value = ret_op.v.local; } - ret_op = api_force_reg(g, &v, rty); - av.storage = ret_op; - T->ret(T, &av); + g->target->ret(g->target, &value, 1); api_release(g, &v); } void cfree_cg_ret_void(CfreeCg* g) { if (!g) return; - g->target->ret(g->target, NULL); + g->target->ret(g->target, NULL, 0); } /* ============================================================ diff --git a/src/cg/cgtarget.h b/src/cg/cgtarget.h @@ -0,0 +1,679 @@ +#ifndef CFREE_CG_CGTARGET_H +#define CFREE_CG_CGTARGET_H + +#include <cfree/cg.h> +#include <cfree/compile.h> + +#include "core/core.h" +#include "obj/obj.h" + +typedef u32 CGLocal; +#define CG_LOCAL_NONE 0u + +/* Vector / SIMD forward compat: vector ops will arrive as new variants in + * the BinOp, UnOp, CmpOp, ConvKind families. Backend switches over these + * enums must use `default:` (unreachable / panic) rather than exhaustive + * case lists, so adding a new variant later does not silently mis-handle on + * backends that haven't been taught about it. Vector loads/stores reuse the + * existing load/store methods with vector-typed Operands and appropriate + * MemAccess. */ + +typedef enum BinOp { + BO_IADD, + BO_ISUB, + BO_IMUL, + BO_SDIV, + BO_UDIV, + BO_SREM, + BO_UREM, + BO_FADD, + BO_FSUB, + BO_FMUL, + BO_FDIV, + BO_AND, + BO_OR, + BO_XOR, + BO_SHL, + BO_SHR_S, + BO_SHR_U, +} BinOp; + +typedef enum UnOp { + UO_NEG, + UO_FNEG, + UO_NOT, /* logical: 0/1 */ + UO_BNOT, /* bitwise ~ */ +} UnOp; + +typedef enum CmpOp { + CMP_EQ, + CMP_NE, + CMP_LT_S, + CMP_LE_S, + CMP_GT_S, + CMP_GE_S, + CMP_LT_U, + CMP_LE_U, + CMP_GT_U, + CMP_GE_U, + CMP_LT_F, + CMP_LE_F, + CMP_GT_F, + CMP_GE_F, +} CmpOp; + +typedef enum ConvKind { + CV_SEXT, + CV_ZEXT, + CV_TRUNC, + CV_ITOF_S, + CV_ITOF_U, + CV_FTOI_S, + CV_FTOI_U, + CV_FEXT, + CV_FTRUNC, + CV_BITCAST, +} ConvKind; + +typedef enum AtomicOp { + AO_XCHG, + AO_ADD, + AO_SUB, + AO_AND, + AO_OR, + AO_XOR, + AO_NAND, +} AtomicOp; + +typedef enum MemOrder { + MO_RELAXED, + MO_CONSUME, + MO_ACQUIRE, + MO_RELEASE, + MO_ACQ_REL, + MO_SEQ_CST, +} MemOrder; + +/* Compiler-intrinsic kinds dispatched through CgTarget.intrinsic and carried + * on IR_INTRINSIC via IRIntrinAux.kind. The set is bounded: a backend + * must know each one to choose inline-vs-libcall. Hint intrinsics + * (EXPECT/UNREACHABLE/TRAP/PREFETCH/ASSUME_ALIGNED) ride the same dispatch: + * the backend decides whether they emit an instruction or a no-op. + * + * Not every C builtin lives here. Parser-evaluated builtins + * (__builtin_offsetof, __builtin_constant_p, __builtin_choose_expr, + * __builtin_types_compatible_p) fold at parse and never reach IR. Builtins + * that already have dedicated CgTarget methods (alloca, va_*, atomics) keep + * them. Returns-twice and no-return control intrinsics use this dispatch so + * opt can preserve their CFG effects without growing backend vtable hooks. */ +typedef enum IntrinKind { + INTRIN_NONE = 0, + + /* bit ops */ + INTRIN_POPCOUNT, + INTRIN_CTZ, + INTRIN_CLZ, + INTRIN_BSWAP16, + INTRIN_BSWAP32, + INTRIN_BSWAP64, + + /* memory */ + INTRIN_MEMCPY, + INTRIN_MEMMOVE, + INTRIN_MEMSET, + INTRIN_PREFETCH, + INTRIN_ASSUME_ALIGNED, + + /* hints */ + INTRIN_EXPECT, + INTRIN_UNREACHABLE, + INTRIN_TRAP, + + /* non-local control */ + INTRIN_SETJMP, + INTRIN_LONGJMP, + + /* checked arith — multi-result (value, overflow_flag) */ + INTRIN_SADD_OVERFLOW, + INTRIN_UADD_OVERFLOW, + INTRIN_SSUB_OVERFLOW, + INTRIN_USUB_OVERFLOW, + INTRIN_SMUL_OVERFLOW, + INTRIN_UMUL_OVERFLOW, +} IntrinKind; + +typedef enum OpKind { + OPK_IMM, + OPK_LOCAL, /* typed semantic local */ + OPK_GLOBAL, /* address: symbol+addend, not a load */ + OPK_INDIRECT, /* [local + ofs], with optional indexed local */ +} OpKind; + +typedef enum CGLocalFlag { + CG_LOCAL_FLAG_NONE = 0, + CG_LOCAL_ADDR_TAKEN = 1u << 0, + CG_LOCAL_MEMORY_REQUIRED = 1u << 1, +} CGLocalFlag; + +typedef struct CGLocalDesc { + CfreeCgTypeId type; + Sym name; + SrcLoc loc; + u32 size; + u32 align; + u32 flags; /* CGLocalFlag */ +} CGLocalDesc; + +typedef enum MemFlag { + MF_NONE = 0, + MF_VOLATILE = 1u << 0, + MF_ATOMIC = 1u << 1, + MF_RESTRICT = 1u << 2, + MF_READONLY = 1u << 3, + MF_WRITEONLY = 1u << 4, + MF_UNALIGNED = 1u << 5, +} MemFlag; + +typedef enum AliasKind { + ALIAS_UNKNOWN, + ALIAS_LOCAL, + ALIAS_GLOBAL, + ALIAS_PARAM, + ALIAS_HEAP, + ALIAS_STRING, +} AliasKind; + +typedef struct AliasRoot { + u8 kind; /* AliasKind */ + u8 pad[3]; + union { + i32 local_id; + ObjSymId global; + u32 param_idx; + Sym string_id; + } v; +} AliasRoot; + +typedef struct MemAccess { + CfreeCgTypeId type; /* codegen object type accessed */ + u32 size; /* ABI byte size of this access */ + u32 align; /* known byte alignment; 0 means unknown */ + u16 flags; /* MemFlag */ + u16 addr_space; + AliasRoot alias; +} MemAccess; + +typedef struct ConstBytes { + CfreeCgTypeId type; + const u8* bytes; /* ABI representation, little/big endian per target */ + u32 size; + u32 align; +} ConstBytes; + +typedef struct AggregateAccess { + CfreeCgTypeId type; + u32 size; + u32 align; + MemAccess mem; +} AggregateAccess; + +typedef struct BitFieldAccess { + CfreeCgTypeId field_type; + MemAccess storage; + u32 storage_offset; /* byte offset from record base */ + u16 bit_offset; /* target-endian bit offset within storage unit */ + u16 bit_width; /* may be 0 for zero-width layout barriers */ + u8 signed_; + u8 pad[3]; +} BitFieldAccess; + +typedef struct Operand { + u8 kind; + u8 pad[3]; + CfreeCgTypeId type; + union { + i64 imm; + CGLocal local; + struct { + ObjSymId sym; + i64 addend; + } global; + struct { + CGLocal base; + CGLocal index; /* CG_LOCAL_NONE when no index operand */ + u8 log2_scale; /* 0..3 -> 1/2/4/8 bytes; ignored when no index */ + i32 ofs; + } ind; + } v; +} Operand; + +typedef struct CGParamDesc { + u32 index; + Sym name; + CfreeCgTypeId type; + u32 size; + u32 align; + u32 flags; /* CGLocalFlag */ + SrcLoc loc; +} CGParamDesc; + +/* text_section_id and group_id are per-function so that -ffunction-sections, + * __attribute__((section)) on functions, and COMDAT for C11 inline-with- + * external-definition all work with no extra plumbing. Decl.section_id already + * carries the user's request; CG/decl decides the section name policy + * (default .text, vs .text.<sym> under -ffunction-sections, vs explicit + * attribute). The backend just writes to the named section. */ +/* Phase 2 attribute-derived hints. The backends are free to ignore these; + * they exist so the parser can communicate _Noreturn / __attribute__ + * info down to CG without forcing every backend to consult the Decl. */ +typedef enum CGFuncDescFlag { + CGFD_NONE = 0, + CGFD_NORETURN = 1u << 0, +} CGFuncDescFlag; + +typedef struct CGFuncDesc { + ObjSymId sym; + ObjSecId text_section_id; + ObjGroupId group_id; /* OBJ_GROUP_NONE if none */ + CfreeCgTypeId fn_type; + const CfreeCgTypeId* result_types; + const CGParamDesc* params; + u32 nresults; + u32 nparams; + SrcLoc loc; + u32 flags; /* CGFuncDescFlag */ + CfreeCgInlinePolicy inline_policy; + u8 atomize; + u8 pad[3]; +} CGFuncDesc; + +typedef enum CGCallFlag { + CG_CALL_NONE = 0, + /* Sibling call. The target emits a tail-position call and does NOT emit a + * return-style continuation. CG will not invoke target->ret afterwards. + * + * Realizability is verified before this flag is set: CG only sets it after + * tail_call_unrealizable_reason() returns NULL for the same desc and call + * state, so the target can emit the sibling call unconditionally. The + * target may assert/compiler_panic if the flag is set on an unrealizable + * desc, but that is an internal-consistency check — fallback and + * diagnostics for unrealizable tail calls are CG's responsibility, not the + * target's. */ + CG_CALL_TAIL = 1u << 0, +} CGCallFlag; + +typedef struct CGCallDesc { + CfreeCgTypeId fn_type; + Operand callee; + const CGLocal* args; + const CGLocal* results; + u32 nargs; + u32 nresults; + u16 flags; /* CGCallFlag */ + u8 tail_policy; /* CfreeCgTailPolicy; meaningful when CG_CALL_TAIL is set. + * The opt recorder accepts every tail and preserves this so + * the replay can pick: emit tail (realizable), fall back to + * call+ret (ALLOWED), or diagnose (MUST). */ + u8 pad; + CfreeCgInlinePolicy inline_policy; +} CGCallDesc; + +typedef u32 Label; +#define LABEL_NONE 0 + +typedef enum ScopeKind { + SCOPE_BLOCK, /* break exits forward */ + SCOPE_LOOP, /* break exits forward; continue uses explicit target */ + SCOPE_IF, /* cond consumed at scope_begin */ +} ScopeKind; + +typedef u32 CGScope; +#define CG_SCOPE_NONE 0u + +typedef struct CGScopeDesc { + u8 kind; /* ScopeKind */ + u8 pad[3]; + Label break_label; /* explicit target for break; LABEL_NONE => target creates + one */ + Label continue_label; /* explicit target for continue; LABEL_NONE for + non-loops */ + Operand cond; /* SCOPE_IF condition; ignored otherwise */ + CfreeCgTypeId result_type; /* reserved for structured expression results */ +} CGScopeDesc; + +typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir; + +typedef struct AsmConstraint { + const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */ + Sym name; /* GCC `[name]` symbolic operand; 0 if absent */ + CfreeCgTypeId type; /* codegen type of the bound expression (output lvalue or + input rvalue). Drives type width for the binder. + NULL only for hand-built test constraints (binder + falls back to a 64-bit int default). */ + u8 dir; /* AsmDir */ + u8 pad[3]; +} AsmConstraint; + +typedef struct CGSwitchCase { + /* Bit pattern matched against the selector; interpreted using + * selector_type's width and signedness (signed comparison uses + * sign-extension to selector_type's width). */ + u64 value; + Label label; +} CGSwitchCase; + +typedef struct CGSwitchDesc { + Operand selector; /* OPK_LOCAL or OPK_IMM */ + CfreeCgTypeId selector_type; + Label default_label; /* LABEL_NONE means "fall through past the switch" */ + const CGSwitchCase* cases; + u32 ncases; + u8 hint; /* CfreeCgSwitchHint */ + u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */ + u8 pad[2]; +} CGSwitchDesc; + +typedef struct CGLocalStaticDataDesc { + ObjSymId sym; + CfreeCgTypeId type; + CfreeCgDataDefAttrs attrs; + u32 align; +} CGLocalStaticDataDesc; + +typedef struct CgTarget CgTarget; +struct CgTarget { + /* Typed IR lowering context. Subclasses extend. */ + Compiler* c; + ObjBuilder* obj; + + /* ---- function lifecycle ---- */ + void (*func_begin)(CgTarget*, const CGFuncDesc*); + void (*func_end)(CgTarget*); + + /* Symbol-aliasing hook. Optional (may be NULL). cg invokes this from + * cfree_cg_alias after the obj symbol-table mirror is wired so the + * backend can emit any out-of-band representation it needs — e.g. the + * C-source target writes + * `T alias_sym(...) __attribute__((alias("target")));` + * because the alias relationship isn't expressible by sharing a + * (section, value) pair the way a relocatable object can. Native + * machine-code backends don't need this hook because obj_symbol_define + * already aliases the bytes. `type` is the alias's CG type (function + * or object), needed by the C target to render the prototype. */ + void (*alias)(CgTarget*, ObjSymId alias_sym, ObjSymId target_sym, + CfreeCgTypeId type); + + /* ---- locals ---- */ + CGLocal (*local)(CgTarget*, const CGLocalDesc*); + void (*local_addr)(CgTarget*, Operand dst, const CGLocalDesc*, + CGLocal); + CGLocal (*param)(CgTarget*, const CGParamDesc*); + + /* ---- labels and control flow ---- */ + Label (*label_new)(CgTarget*); + void (*label_place)(CgTarget*, Label); + void (*jump)(CgTarget*, Label); + /* Fused compare-and-branch. cg's preferred form: avoids materializing 0/1 + * for a normal `if (a < b)`. For an arbitrary i1 in a local, callers + * synthesize cmp_branch(CMP_NE, val, IMM_ZERO, label). */ + void (*cmp_branch)(CgTarget*, CmpOp, Operand a, Operand b, Label); + + /* Structured switch dispatch. + * + * Optional: when NULL, cg's shared `cg_lower_switch_default` runs and + * lowers in terms of cmp_branch / jump / indirect_branch / data ops — + * the path every native arch uses. Backends override switch_ only when + * they can express the construct natively: the C-source target emits + * `switch (val) { case V: goto L_V; ... default: goto L_def; }`; a + * future WASM target would emit `br_table`. + * + * The descriptor carries the full structured form (selector + paired + * cases + default + frontend hint); density policy lives in + * cg_lower_switch_default. */ + void (*switch_)(CgTarget*, const CGSwitchDesc*); + + /* Indirect branch primitive: transfer control to the address in + * `addr` (an OPK_LOCAL holding a function-local label address). + * + * Required on every native arch and used by: + * - cfree_cg_computed_goto for direct-threaded dispatch + * - opt-level jump-table lowerings of IR_SWITCH (when implemented) + * + * `valid_targets[0..ntargets)` is the closed set of labels the address + * can resolve to. Backends use it for branch-target hardening (BTI, + * PAC, x86 CFG, IBT) and opt uses it to build the CFG; opt requires + * ntargets > 0. */ + void (*indirect_branch)(CgTarget*, Operand addr, + const Label* valid_targets, u32 ntargets); + + /* Materialize the runtime address of a function-local label into + * `dst`. The label must already exist (label_new); it does not + * need to be placed yet. Backends emit the target's relative address + * materialization: + * x86_64 `lea L(%rip), %r`, aarch64 `adr X, L`, riscv `auipc/addi`. + * + * The resulting pointer is a function-local label address (per the + * public cfree_cg_push_label_addr contract) and must only be consumed + * by indirect_branch inside the defining function's activation. */ + void (*load_label_addr)(CgTarget*, Operand dst, Label label); + + /* Optional source-backend hook for function-local static data definitions + * that need function label scope, currently used for C `&&label` + * dispatch-table initializers. Returning non-zero from begin means the + * target consumes bytes/zeros/label addresses until end; ordinary object + * data emission is skipped for that definition. */ + int (*local_static_data_begin)(CgTarget*, const CGLocalStaticDataDesc*); + /* data == NULL means append len zero bytes. */ + void (*local_static_data_write)(CgTarget*, const u8* data, u64 len); + void (*local_static_data_label_addr)(CgTarget*, Label target, i64 addend, + u32 width, u32 address_space); + void (*local_static_data_end)(CgTarget*); + + /* Optional. When non-NULL, cfree_cg_data_label_addr panics with the + * returned target-specific message before reaching object-data emission. Lets + * targets that cannot resolve function-local label addresses in + * static-data initializers (e.g. the Wasm backend) fail with a + * recognizable, target-prefixed diagnostic. The returned string must remain valid for + * the lifetime of the panic call (string literals are typical). */ + const char* (*data_label_addr_unsupported_msg)(CgTarget*); + + /* ---- structured control flow ---- + * Mirrors CG's scope ops. CG passes explicit break/continue targets so C + * `for` continues can land on the increment expression rather than the loop + * header. Real backends shim these onto label_new/label_place/jump. + * The WASM backend consumes them natively to emit block/loop/if with + * structurally-bounded br targets, which is what gives WASM its CFI. + * + * For SCOPE_IF, `cond` is the i1 operand; ignored for BLOCK/LOOP. + * `result_type` is reserved for if-as-expression on WASM (NULL for the + * statement case used by C); other backends ignore it. */ + CGScope (*scope_begin)(CgTarget*, const CGScopeDesc*); + void (*scope_else)(CgTarget*, CGScope); + void (*scope_end)(CgTarget*, CGScope); + void (*break_to)(CgTarget*, CGScope); + void (*continue_to)(CgTarget*, CGScope); + + /* ---- data movement (split, no overloading) ---- */ + void (*load_imm)(CgTarget*, Operand dst /*LOCAL*/, i64 imm); + void (*load_const)(CgTarget*, Operand dst /*LOCAL*/, ConstBytes); + void (*copy)(CgTarget*, Operand dst /*LOCAL*/, Operand src /*LOCAL*/); + void (*load)(CgTarget*, Operand dst /*LOCAL*/, + Operand addr /*LOCAL|GLOBAL|INDIRECT*/, MemAccess); + void (*store)(CgTarget*, Operand addr /*LOCAL|GLOBAL|INDIRECT*/, + Operand src /*LOCAL|IMM*/, MemAccess); + void (*addr_of)(CgTarget*, Operand dst /*LOCAL*/, + Operand lv /*LOCAL|GLOBAL|INDIRECT*/); + /* Materializes the address of a thread-local symbol into `dst`. Distinct + * from addr_of because TLS resolution can be a multi-instruction sequence + * or a runtime call (e.g. GD model), not a cheap addressing mode. The + * backend chooses the TLS model (LE/IE/LD/GD) from c->target and the + * symbol's visibility. Subsequent accesses go through OPK_INDIRECT on the + * resulting pointer; this lets opt hoist the materialization via LICM. */ + void (*tls_addr_of)(CgTarget*, Operand dst /*LOCAL*/, ObjSymId sym, i64 addend); + void (*copy_bytes)(CgTarget*, Operand dst_addr, Operand src_addr, + AggregateAccess); + void (*set_bytes)(CgTarget*, Operand dst_addr, Operand byte_value, + AggregateAccess); + void (*bitfield_load)(CgTarget*, Operand dst /*LOCAL*/, Operand record_addr, + BitFieldAccess); + void (*bitfield_store)(CgTarget*, Operand record_addr, + Operand src /*LOCAL|IMM*/, BitFieldAccess); + + /* ---- arithmetic, compare, convert ---- + * binop/unop/cmp accept OPK_LOCAL or OPK_IMM in source operand positions + * (`a`, `b`); `dst` is always OPK_LOCAL. The backend chooses between an + * imm-form encoding and materializing the literal into a scratch + * local based on whether the value fits the instruction's imm + * field. FP binops and UO_FNEG require local sources — FP literals reach the + * value stack through load_const into OPK_LOCAL. cg and opt's machinize/emit + * both rely on this contract to pass small constants through without + * burning a value-stack local on materialization. */ + void (*binop)(CgTarget*, BinOp, Operand dst /*LOCAL*/, + Operand a /*LOCAL|IMM*/, Operand b /*LOCAL|IMM*/); + void (*unop)(CgTarget*, UnOp, Operand dst /*LOCAL*/, Operand a /*LOCAL|IMM*/); + void (*cmp)(CgTarget*, CmpOp, Operand dst /*LOCAL*/, Operand a /*LOCAL|IMM*/, + Operand b /*LOCAL|IMM*/); /* materialize 0/1 */ + void (*convert)(CgTarget*, ConvKind, Operand dst, Operand src); + + /* ---- calls / return ---- + * CGCallDesc carries the type-checked signature, semantic callee operand, + * local arguments, and local result destinations. The semantic target does + * not expose calling-convention lowering; native targets derive physical + * argument/return placement from fn_type and local metadata internally. + * Multiple results are semantic multi-results, not ABI split parts. */ + void (*call)(CgTarget*, const CGCallDesc*); + /* Pure query: can `d` be emitted as a sibling (tail) call on this target, + * given the current target state? Returns NULL if yes; otherwise a short, + * static, human-readable string naming the blocker, used verbatim in the + * musttail diagnostic. Must not emit code and must not abort. + * + * Realizable means the target can transfer control to the callee while + * preserving the source-level call/return semantics of this function. CG + * verifies type compatibility before setting CG_CALL_TAIL; target-specific + * blockers such as variadic lowering, frame teardown constraints, or + * unavailable tail-call support are reported here. + * + * CG owns the tail policy: it calls this first and only sets CG_CALL_TAIL + * when it returns NULL, so a NULL result must guarantee a later call() with + * CG_CALL_TAIL can emit the sibling call. May itself be NULL, meaning the + * target supports no tail calls at all. */ + const char* (*tail_call_unrealizable_reason)(CgTarget*, const CGCallDesc*); + void (*ret)(CgTarget*, const CGLocal* values, u32 nvalues); + + /* ---- alloca ---- + * Dynamic stack allocation. `size` is i64 bytes; `align` is the required + * alignment of the returned pointer. Backend grows the (linear-memory or + * native) shadow stack, returns the pointer in `dst`. v1 only emits this + * via __builtin_alloca; C VLAs are not parsed (__STDC_NO_VLA__). */ + void (*alloca_)(CgTarget*, Operand dst /*LOCAL*/, Operand size, u32 align); + + /* ---- variadics ---- + * va_list type is per-arch (defined in <stdarg.h>); these methods + * implement the four C macros after builtin substitution. ap is always + * passed as &ap. */ + void (*va_start_)(CgTarget*, Operand ap_addr); + void (*va_arg_)(CgTarget*, Operand dst /*LOCAL*/, Operand ap_addr, + CfreeCgTypeId t); + void (*va_end_)(CgTarget*, Operand ap_addr); + void (*va_copy_)(CgTarget*, Operand dst_ap_addr, Operand src_ap_addr); + + /* ---- atomics ---- */ + void (*atomic_load)(CgTarget*, Operand dst /*LOCAL*/, Operand addr, MemAccess, + MemOrder); + void (*atomic_store)(CgTarget*, Operand addr, Operand src, MemAccess, + MemOrder); + void (*atomic_rmw)(CgTarget*, AtomicOp, Operand dst /*LOCAL: prior value*/, + Operand addr, Operand val, MemAccess, MemOrder); + void (*atomic_cas)(CgTarget*, Operand prior /*LOCAL*/, Operand ok /*LOCAL, i1*/, + Operand addr, Operand expected, Operand desired, MemAccess, + MemOrder success, MemOrder failure); + void (*fence)(CgTarget*, MemOrder); + + /* ---- compiler intrinsics ---- + * Typed dispatch for builtins whose lowering is backend-relevant + * (inline-vs-libcall, inline sequence selection) or whose semantics opt + * cares about (hint pattern matching, exhaustiveness). The IR carries + * IR_INTRINSIC + IRIntrinAux.kind; the wrapped target receives the same call + * at lowering time with materialized operands. + * + * Operand shapes by IntrinKind: + * POPCOUNT/CTZ/CLZ/BSWAP* : dsts[0] LOCAL result; args[0] LOCAL input + * MEMCPY/MEMMOVE : dsts none; args = (dst_addr, src_addr, n) + * MEMSET : dsts none; args = (dst_addr, byte, n) + * PREFETCH : dsts none; args = (addr [, rw [, locality]]) + * ASSUME_ALIGNED : dsts[0] LOCAL; args = (ptr, align [, offset]) + * EXPECT : dsts[0] LOCAL; args = (val, expected) + * UNREACHABLE / TRAP : dsts none; args none + * SETJMP : dsts[0] LOCAL i32 result; args = (&buf) + * LONGJMP : dsts none; args = (&buf, val); no return + * ADD/SUB/MUL_OVERFLOW : dsts[0] LOCAL result, dsts[1] LOCAL i1 overflow; + * args = (a, b) + * + * Backends that lack an inline sequence for a given kind may emit a + * normal IR_CALL-shaped sequence to a runtime entry (e.g. memcpy) — the + * IR records intent, the backend chooses mechanism. Hint kinds may be + * lowered as no-ops where the arch has nothing to emit. */ + void (*intrinsic)(CgTarget*, IntrinKind, Operand* dsts, u32 ndst, + const Operand* args, u32 narg); + + /* ---- inline asm ---- + * Per-arch constraint binding + template assembly, packaged as one block. + * ins[i] are pre-evaluated input operands. + * out_ops[i] is filled by the arch with the location holding the result + * for outs[i]; the caller (cg) reads them out after the call. + * "=&r" early-clobber outputs must be allocated disjoint from any input. + * opt_cgtarget records this as a single IR_ASM_BLOCK; the wrapped target + * receives the same call at lowering time with materialized operands. */ + void (*asm_block)(CgTarget*, const char* tmpl, const AsmConstraint* outs, + u32 nout, Operand* out_ops, const AsmConstraint* ins, + u32 nin, const Operand* in_ops, const Sym* clobbers, + u32 nclob); + + /* Optional: handle a top-level `__asm__("...")` block (file scope, not + * inside a function). Backends that leave this NULL fall back to the + * generic asm-parser path through CfreeCg.mc. Wasm overrides this to + * diagnose-and-fail since the wasm module has no native asm parser. */ + void (*file_scope_asm)(CgTarget*, const char* src, size_t len); + + /* ---- source-location tracking ---- + * Sets the SrcLoc inherited by subsequent emit-side calls (binop/load/...). + * opt_cgtarget stamps it on every recorded Inst. Sticky until the next + * set_loc. */ + void (*set_loc)(CgTarget*, SrcLoc); + + /* ---- end-of-TU hook ---- + * No-op for plain target CGTargets. opt_cgtarget runs cross-function passes + * (inlining + cleanup) and lowers all buffered IR functions into the + * wrapped target CgTarget. Drivers must call this after the last func_end and + * before reading from `obj` or calling debug_emit. */ + void (*finalize)(CgTarget*); + + void (*destroy)(CgTarget*); +}; + +/* Shared switch lowering. cg's cfree_cg_switch installs this as the + * default target->switch_ behavior; opt's pass_emit calls it when + * replaying IR_SWITCH against a backend that doesn't override switch_. + * Emits a cmp-and-branch chain over (target->cmp_branch + target->jump) + * — fast at -O0 and the input shape an opt-level jump-table rewrite + * starts from. */ +void cg_lower_switch_default(CgTarget* t, const CGSwitchDesc* desc); + +CgTarget* cgtarget_new(Compiler*, ObjBuilder*); +void cgtarget_finalize(CgTarget*); +void cgtarget_free(CgTarget*); + +/* A CGBackend is the unit the registry hands out: "give me a CgTarget for + * this Compiler + ObjBuilder + emit options." */ +typedef struct CGBackend { + const char* name; + CgTarget* (*make)(Compiler*, ObjBuilder*, const CfreeCodeOptions*); +} CGBackend; + +/* Pick the right CGBackend for a session given the compiler's target arch + * and the per-emit CodeOptions. Returns NULL when no backend in this build can + * serve the request. */ +const CGBackend* cg_backend_for_session(const Compiler*, + const CfreeCodeOptions*); + +#endif diff --git a/src/cg/control.c b/src/cg/control.c @@ -20,7 +20,7 @@ void cfree_cg_jump(CfreeCg* g, CfreeCgLabel label) { void api_branch_if(CfreeCg* g, ApiSValue* v, int branch_when_true, Label label) { - CGTarget* T; + CgTarget* T; CfreeCgTypeId ty; if (!g) return; api_local_const_control_boundary(g); @@ -52,7 +52,7 @@ void api_branch_if(CfreeCg* g, ApiSValue* v, int branch_when_true, return; } { - Operand a = api_force_reg(g, v, ty); + Operand a = api_force_local(g, v, ty); Operand zero = api_op_imm(0, ty); T->cmp_branch(T, branch_when_true ? CMP_NE : CMP_EQ, a, zero, label); api_release(g, v); @@ -73,7 +73,7 @@ void cfree_cg_branch_false(CfreeCg* g, CfreeCgLabel label) { api_branch_if(g, &v, 0, (Label)label); } -void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* d) { +void cg_lower_switch_default(CgTarget* t, const CGSwitchDesc* d) { /* Cmp-and-branch chain: one cmp_branch per case, then jump to * default (or fall through if LABEL_NONE). The fallback shape; the * frontend-facing cfree_cg_switch picks chain vs. jump-table up @@ -100,13 +100,13 @@ void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* d) { #define CG_SWITCH_TABLE_MAX_SPAN_O1 4096u #define CG_SWITCH_TABLE_DENSITY_RECIP_O1 4u /* ncases * recip >= span */ -typedef enum CGSwitchClass { - CG_SWITCH_CLS_CHAIN, - CG_SWITCH_CLS_TABLE, -} CGSwitchClass; +typedef enum CGSwitchPlanKind { + CG_SWITCH_PLAN_CHAIN, + CG_SWITCH_PLAN_TABLE, +} CGSwitchPlanKind; typedef struct CGSwitchPlan { - CGSwitchClass cls; + CGSwitchPlanKind kind; i64 vmin; u64 span; } CGSwitchPlan; @@ -146,7 +146,7 @@ static int cg_switch_extents(Compiler* c, const CGSwitchDesc* d, i64* out_vmin, static CGSwitchPlan cg_plan_switch(CfreeCg* g, const CGSwitchDesc* d) { CGSwitchPlan plan; - plan.cls = CG_SWITCH_CLS_CHAIN; + plan.kind = CG_SWITCH_PLAN_CHAIN; plan.vmin = 0; plan.span = 0; if (d->ncases == 0) return plan; @@ -158,7 +158,7 @@ static CGSwitchPlan cg_plan_switch(CfreeCg* g, const CGSwitchDesc* d) { * out of bounds — a forced hint shouldn't blow up code size on a * misshapen switch. */ if (plan.span > CG_SWITCH_TABLE_MAX_SPAN_O1) return plan; - plan.cls = CG_SWITCH_CLS_TABLE; + plan.kind = CG_SWITCH_PLAN_TABLE; return plan; } /* TARGET_DEFAULT: O0 keeps the chain; O1+ runs the density check. */ @@ -168,7 +168,7 @@ static CGSwitchPlan cg_plan_switch(CfreeCg* g, const CGSwitchDesc* d) { if (plan.span > CG_SWITCH_TABLE_MAX_SPAN_O1) return plan; if (plan.span > (u64)d->ncases * CG_SWITCH_TABLE_DENSITY_RECIP_O1) return plan; - plan.cls = CG_SWITCH_CLS_TABLE; + plan.kind = CG_SWITCH_PLAN_TABLE; return plan; } @@ -230,7 +230,7 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d, cfree_cg_zext(g, i64_ty); } - /* 5. Build the dense label[] slot array and emit the rodata table. */ + /* 5. Build the dense label[] table and emit the rodata table. */ labels = (Label*)h->alloc(h, (size_t)plan->span * sizeof *labels, _Alignof(Label)); if (!labels) compiler_panic(c, g->cur_loc, "cfree_cg_switch: oom"); @@ -239,14 +239,14 @@ static void cg_emit_switch_table(CfreeCg* g, const CGSwitchDesc* d, for (i = 0; i < d->ncases; ++i) { i64 vi = (width == 64u) ? (i64)d->cases[i].value : api_sign_extend_width(d->cases[i].value, width); - u64 slot = (u64)(vi - plan->vmin); - labels[slot] = d->cases[i].label; + u64 table_index = (u64)(vi - plan->vmin); + labels[table_index] = d->cases[i].label; } table_sym = api_emit_label_table(g, labels, (u32)plan->span); h->free(h, labels, (size_t)plan->span * sizeof *labels); if (table_sym == OBJ_SYM_NONE) { /* api_emit_label_table panics on real failure; this only fires if - * a future caller asks for a 0-slot table (which cg_plan_switch + * a future caller asks for a 0-entry table (which cg_plan_switch * already rules out). */ compiler_panic(c, g->cur_loc, "cfree_cg_switch: table emission failed"); return; @@ -324,10 +324,10 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) { * statement and forces opt_level=0; route everything to it * unchanged. Native + opt targets both flow through the planner. */ native_switch_override = (g->target->switch_ && g->opt_level == 0); - plan = native_switch_override ? (CGSwitchPlan){CG_SWITCH_CLS_CHAIN, 0, 0} + plan = native_switch_override ? (CGSwitchPlan){CG_SWITCH_PLAN_CHAIN, 0, 0} : cg_plan_switch(g, &desc); - if (plan.cls == CG_SWITCH_CLS_TABLE) { + if (plan.kind == CG_SWITCH_PLAN_TABLE) { /* Selector stays on the value stack; cg_emit_switch_table consumes * it via cg-API ops so the path also records cleanly under opt. */ metrics_count(g->c, "cg.switch.table", 1); @@ -335,7 +335,7 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) { } else { metrics_count(g->c, "cg.switch.chain", 1); selector = api_pop(g); - desc.selector = api_force_reg_unless_imm(g, &selector, desc.selector_type); + desc.selector = api_force_local_unless_imm(g, &selector, desc.selector_type); if (g->target->switch_) { g->target->switch_(g->target, &desc); } else { @@ -352,13 +352,13 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) { void cfree_cg_push_label_addr(CfreeCg* g, CfreeCgLabel label, CfreeCgTypeId ptr_type) { CfreeCgTypeId ty; - Reg r; + CGLocal r; Operand dst; if (!g) return; ty = resolve_type(g->c, ptr_type); if (!ty) ty = cg_type_ptr_to(g->c, builtin_id(CFREE_CG_BUILTIN_VOID)); - r = api_alloc_reg_or_spill(g, RC_INT, ty); - dst = api_op_reg(r, ty); + r = api_alloc_temp_local(g, ty); + dst = api_op_local(r, ty); g->target->load_label_addr(g->target, dst, (Label)label); api_push(g, api_make_sv(dst, ty)); } @@ -377,7 +377,7 @@ void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets, api_local_const_control_boundary(g); target = api_pop(g); target_ty = api_sv_type(&target); - target_op = api_force_reg(g, &target, target_ty); + target_op = api_force_local(g, &target, target_ty); g->target->indirect_branch(g->target, target_op, (const Label*)valid_targets, ntargets); api_release(g, &target); @@ -399,29 +399,29 @@ CfreeCgScope api_scope_handle(u32 idx, u32 generation) { ApiCgScope* api_scope_from_handle(CfreeCg* g, CfreeCgScope scope, int require_top, const char* who) { - u32 slot; + u32 scope_index; u32 generation; ApiCgScope* s; if (!g || scope == 0) return NULL; - slot = ((u32)scope & 0xffu); + scope_index = ((u32)scope & 0xffu); generation = ((u32)scope >> 8); - if (slot == 0 || slot > API_CG_MAX_SCOPES) { + if (scope_index == 0 || scope_index > API_CG_MAX_SCOPES) { compiler_panic(g->c, g->cur_loc, "%.*s: invalid scope handle", SLICE_ARG(slice_from_cstr(who))); return NULL; } - slot--; - if (slot >= g->nscopes) { + scope_index--; + if (scope_index >= g->nscopes) { compiler_panic(g->c, g->cur_loc, "%.*s: stale scope handle", SLICE_ARG(slice_from_cstr(who))); return NULL; } - if (require_top && slot + 1u != g->nscopes) { + if (require_top && scope_index + 1u != g->nscopes) { compiler_panic(g->c, g->cur_loc, "%.*s: non-LIFO scope end", SLICE_ARG(slice_from_cstr(who))); return NULL; } - s = &g->scopes[slot]; + s = &g->scopes[scope_index]; if (!s->active || s->generation != generation) { compiler_panic(g->c, g->cur_loc, "%.*s: stale scope handle", SLICE_ARG(slice_from_cstr(who))); @@ -438,10 +438,10 @@ void api_scope_store_result(CfreeCg* g, ApiCgScope* s, ApiSValue* result) { Operand dst; Operand src; if (!api_scope_has_result(s)) return; - dst = api_op_local(s->result_slot, s->result_type); - src = api_sv_op_is_reg_or_imm(result) + dst = api_op_local(s->result_local, s->result_type); + src = api_sv_op_is_local_or_imm(result) ? result->op - : api_force_reg(g, result, s->result_type); + : api_force_local(g, result, s->result_type); g->target->store(g->target, dst, src, api_mem_for_lvalue(g, &dst, s->result_type)); api_release(g, result); @@ -450,11 +450,11 @@ void api_scope_store_result(CfreeCg* g, ApiCgScope* s, ApiSValue* result) { void api_scope_push_result(CfreeCg* g, ApiCgScope* s) { Operand dst; Operand src; - Reg r; + CGLocal r; if (!api_scope_has_result(s)) return; - r = api_alloc_reg_or_spill(g, api_type_class(s->result_type), s->result_type); - dst = api_op_reg(r, s->result_type); - src = api_op_local(s->result_slot, s->result_type); + r = api_alloc_temp_local(g, s->result_type); + dst = api_op_local(r, s->result_type); + src = api_op_local(s->result_local, s->result_type); g->target->load(g->target, dst, src, api_mem_for_lvalue(g, &src, s->result_type)); api_push(g, api_make_sv(dst, s->result_type)); @@ -495,15 +495,15 @@ static CfreeCgScope api_scope_begin_kind(CfreeCg* g, u8 kind, d.result_type = s->result_type; target_scope = g->target->scope_begin(g->target, &d); s->target_scope = target_scope; - s->result_slot = FRAME_SLOT_NONE; + s->result_local = CG_LOCAL_NONE; if (api_scope_has_result(s)) { - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.type = s->result_type; - fsd.size = abi_cg_sizeof(g->c->abi, result_type); - fsd.align = abi_cg_alignof(g->c->abi, result_type); - fsd.kind = FS_LOCAL; - s->result_slot = g->target->frame_slot(g->target, &fsd); + CGLocalDesc ld; + memset(&ld, 0, sizeof ld); + ld.type = s->result_type; + ld.size = abi_cg_sizeof(g->c->abi, result_type); + ld.align = abi_cg_alignof(g->c->abi, result_type); + ld.flags = CG_LOCAL_MEMORY_REQUIRED; + s->result_local = g->target->local(g->target, &ld); } return api_scope_handle(idx, s->generation); @@ -655,10 +655,10 @@ void cfree_cg_continue_false(CfreeCg* g, CfreeCgScope scope) { void cfree_cg_alloca(CfreeCg* g, uint32_t align, CfreeCgTypeId result_ptr_type) { ApiSValue sz; - CGTarget* T; + CgTarget* T; CfreeCgTypeId pty; Operand sz_op; - Reg rr; + CGLocal rr; Operand dst; if (!g) return; T = g->target; @@ -666,9 +666,9 @@ void cfree_cg_alloca(CfreeCg* g, uint32_t align, pty = resolve_type(g->c, result_ptr_type); if (!pty) pty = cg_type_ptr_to(g->c, builtin_id(CFREE_CG_BUILTIN_VOID)); sz_op = api_sv_op_is(&sz, OPK_IMM) ? sz.op - : api_force_reg(g, &sz, api_sv_type(&sz)); - rr = api_alloc_reg_or_spill(g, RC_INT, pty); - dst = api_op_reg(rr, pty); + : api_force_local(g, &sz, api_sv_type(&sz)); + rr = api_alloc_temp_local(g, pty); + dst = api_op_local(rr, pty); T->alloca_(T, dst, sz_op, align ? align : 16); api_release(g, &sz); api_push(g, api_make_sv(dst, pty)); @@ -676,31 +676,31 @@ void cfree_cg_alloca(CfreeCg* g, uint32_t align, void cfree_cg_vararg_start(CfreeCg* g) { ApiSValue ap; - CGTarget* T; + CgTarget* T; Operand ap_op; if (!g) return; T = g->target; ap = api_pop(g); - ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); + ap_op = api_force_local(g, &ap, api_sv_type(&ap)); T->va_start_(T, ap_op); api_release(g, &ap); } void cfree_cg_vararg_next(CfreeCg* g, CfreeCgTypeId type) { ApiSValue ap; - CGTarget* T; + CgTarget* T; CfreeCgTypeId ty; Operand ap_op; - Reg rr; + CGLocal rr; Operand dst; if (!g) return; T = g->target; ty = resolve_type(g->c, type); if (!ty) return; ap = api_pop(g); - ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); - rr = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - dst = api_op_reg(rr, ty); + ap_op = api_force_local(g, &ap, api_sv_type(&ap)); + rr = api_alloc_temp_local(g, ty); + dst = api_op_local(rr, ty); T->va_arg_(T, dst, ap_op, ty); api_release(g, &ap); api_push(g, api_make_sv(dst, ty)); @@ -708,26 +708,26 @@ void cfree_cg_vararg_next(CfreeCg* g, CfreeCgTypeId type) { void cfree_cg_vararg_end(CfreeCg* g) { ApiSValue ap; - CGTarget* T; + CgTarget* T; Operand ap_op; if (!g) return; T = g->target; ap = api_pop(g); - ap_op = api_force_reg(g, &ap, api_sv_type(&ap)); + ap_op = api_force_local(g, &ap, api_sv_type(&ap)); T->va_end_(T, ap_op); api_release(g, &ap); } void cfree_cg_vararg_copy(CfreeCg* g) { ApiSValue src, dst; - CGTarget* T; + CgTarget* T; Operand src_op, dst_op; if (!g) return; T = g->target; src = api_pop(g); dst = api_pop(g); - src_op = api_force_reg(g, &src, api_sv_type(&src)); - dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); + src_op = api_force_local(g, &src, api_sv_type(&src)); + dst_op = api_force_local(g, &dst, api_sv_type(&dst)); T->va_copy_(T, dst_op, src_op); api_release(g, &src); api_release(g, &dst); @@ -740,14 +740,14 @@ void cfree_cg_vararg_copy(CfreeCg* g) { void cfree_cg_memcpy(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, CfreeCgMemAccess src_access) { ApiSValue src, dst; - CGTarget* T; + CgTarget* T; AggregateAccess agg; Operand dst_op, src_op; if (!g) return; api_local_const_memory_boundary(g); (void)src_access; if (size > UINT32_MAX) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: memcpy size exceeds CGTarget"); + compiler_panic(g->c, g->cur_loc, "CfreeCg: memcpy size exceeds CgTarget"); return; } T = g->target; @@ -755,8 +755,8 @@ void cfree_cg_memcpy(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, dst = api_pop(g); api_require_pointer_value(g, "memcpy destination", api_sv_type(&dst)); api_require_pointer_value(g, "memcpy source", api_sv_type(&src)); - dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); - src_op = api_force_reg(g, &src, api_sv_type(&src)); + dst_op = api_force_local(g, &dst, api_sv_type(&dst)); + src_op = api_force_local(g, &src, api_sv_type(&src)); memset(&agg, 0, sizeof agg); agg.size = (u32)size; agg.align = dst_access.align ? dst_access.align : (u32)size; @@ -774,15 +774,15 @@ void cfree_cg_memmove(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, (void)dst_access; (void)src_access; if (size > INT64_MAX) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: memmove size exceeds CGTarget"); + compiler_panic(g->c, g->cur_loc, "CfreeCg: memmove size exceeds CgTarget"); return; } src = api_pop(g); dst = api_pop(g); api_require_pointer_value(g, "memmove destination", api_sv_type(&dst)); api_require_pointer_value(g, "memmove source", api_sv_type(&src)); - args[0] = api_force_reg(g, &dst, api_sv_type(&dst)); - args[1] = api_force_reg(g, &src, api_sv_type(&src)); + args[0] = api_force_local(g, &dst, api_sv_type(&dst)); + args[1] = api_force_local(g, &src, api_sv_type(&src)); args[2] = api_op_imm((i64)size, builtin_id(CFREE_CG_BUILTIN_I64)); g->target->intrinsic(g->target, INTRIN_MEMMOVE, NULL, 0, args, 3); api_release(g, &dst); @@ -792,19 +792,19 @@ void cfree_cg_memmove(CfreeCg* g, uint64_t size, CfreeCgMemAccess dst_access, void cfree_cg_memset(CfreeCg* g, uint8_t val, uint64_t size, CfreeCgMemAccess dst_access) { ApiSValue dst; - CGTarget* T; + CgTarget* T; AggregateAccess agg; Operand dst_op, byte_val; if (!g) return; api_local_const_memory_boundary(g); if (size > UINT32_MAX) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: memset size exceeds CGTarget"); + compiler_panic(g->c, g->cur_loc, "CfreeCg: memset size exceeds CgTarget"); return; } T = g->target; dst = api_pop(g); api_require_pointer_value(g, "memset destination", api_sv_type(&dst)); - dst_op = api_force_reg(g, &dst, api_sv_type(&dst)); + dst_op = api_force_local(g, &dst, api_sv_type(&dst)); byte_val = api_op_imm((i64)val, CFREE_CG_TYPE_NONE); memset(&agg, 0, sizeof agg); agg.size = (u32)size; @@ -815,13 +815,13 @@ void cfree_cg_memset(CfreeCg* g, uint8_t val, uint64_t size, void cfree_cg_index(CfreeCg* g, uint64_t offset) { ApiSValue idx, base; - CGTarget* T; + CgTarget* T; CfreeCgTypeId base_ty, base_ptr_ty, elem_ty, idx_ty; const CgType* base_info; u32 elemsz; int free_base_op = 0; Operand base_op, idx_op, result; - Reg rr; + CGLocal rr; if (!g) return; if (offset > INT64_MAX) { compiler_panic(g->c, g->cur_loc, "CfreeCg: index offset too large"); @@ -830,7 +830,7 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) { T = g->target; idx = api_pop(g); base = api_pop(g); - api_ensure_reg(g, &base); + api_ensure_local(g, &base); base_ty = api_sv_type(&base); base_info = cg_type_get(g->c, base_ty); if (base_info && base_info->kind == CFREE_CG_TYPE_PTR) { @@ -849,29 +849,29 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) { idx_ty = idx.type ? idx.type : idx.op.type; if (!idx_ty) idx_ty = builtin_id(CFREE_CG_BUILTIN_I32); if (base_info && base_info->kind == CFREE_CG_TYPE_ARRAY) { - rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); - base_op = api_op_reg(rr, base_ptr_ty); + rr = api_alloc_temp_local(g, base_ptr_ty); + base_op = api_op_local(rr, base_ptr_ty); T->addr_of(T, base_op, base.op); api_release(g, &base); free_base_op = 1; } else { - base_op = api_force_reg(g, &base, base_ptr_ty); + base_op = api_force_local(g, &base, base_ptr_ty); } - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - rr = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); - result = api_op_reg(rr, base_ptr_ty); + idx_op = api_force_local_unless_imm(g, &idx, idx_ty); + rr = api_alloc_temp_local(g, base_ptr_ty); + result = api_op_local(rr, base_ptr_ty); if (idx_op.kind == OPK_IMM) { i64 total_offset = idx_op.v.imm * (i64)elemsz + (i64)offset; T->binop(T, BO_IADD, result, base_op, api_op_imm(total_offset, base_ptr_ty)); } else { - Reg sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); - Operand scaled = api_op_reg(sr, idx_ty); + CGLocal sr = api_alloc_temp_local(g, idx_ty); + Operand scaled = api_op_local(sr, idx_ty); /* Allocating `scaled` can materialize a delayed index expression into a - * fresh virtual register under opt. Refresh idx_op so the multiply uses + * fresh virtual local under opt. Refresh idx_op so the multiply uses * the materialized value, not the pre-materialization source operand. */ - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - if (idx.op.kind == OPK_REG) idx_op = idx.op; + idx_op = api_force_local_unless_imm(g, &idx, idx_ty); + if (idx.op.kind == OPK_LOCAL) idx_op = idx.op; T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)elemsz, idx_ty)); if (offset > 0) { T->binop(T, BO_IADD, scaled, scaled, api_op_imm((i64)offset, idx_ty)); @@ -881,32 +881,32 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) { u32 idx_sz = (u32)abi_cg_sizeof(g->c->abi, idx_ty); u32 ptr_sz = (u32)abi_cg_sizeof(g->c->abi, base_ptr_ty); if (idx_sz && ptr_sz && idx_sz > ptr_sz) { - Reg narrow_r = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); - Operand narrow = api_op_reg(narrow_r, base_ptr_ty); + CGLocal narrow_r = api_alloc_temp_local(g, base_ptr_ty); + Operand narrow = api_op_local(narrow_r, base_ptr_ty); T->convert(T, CV_TRUNC, narrow, scaled); T->binop(T, BO_IADD, result, base_op, narrow); - api_free_reg(g, narrow_r, RC_INT); + api_release_temp_local(g, narrow_r); } else if (idx_sz && ptr_sz && idx_sz < ptr_sz) { - Reg wide_r = api_alloc_reg_or_spill(g, RC_INT, base_ptr_ty); - Operand wide = api_op_reg(wide_r, base_ptr_ty); + CGLocal wide_r = api_alloc_temp_local(g, base_ptr_ty); + Operand wide = api_op_local(wide_r, base_ptr_ty); T->convert(T, CV_ZEXT, wide, scaled); T->binop(T, BO_IADD, result, base_op, wide); - api_free_reg(g, wide_r, RC_INT); + api_release_temp_local(g, wide_r); } else { T->binop(T, BO_IADD, result, base_op, scaled); } - api_free_reg(g, sr, RC_INT); + api_release_temp_local(g, sr); } - if (free_base_op) api_free_reg(g, base_op.v.reg, RC_INT); + if (free_base_op) api_release_temp_local(g, base_op.v.local); if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY) api_release(g, &base); api_release(g, &idx); - api_push(g, api_make_lv(api_op_indirect(result.v.reg, 0, elem_ty), elem_ty)); + api_push(g, api_make_lv(api_op_indirect(result.v.local, 0, elem_ty), elem_ty)); } void cfree_cg_field(CfreeCg* g, uint32_t field_index) { ApiSValue base; - CGTarget* T; + CgTarget* T; CfreeCgTypeId rec_ty; CfreeCgTypeId base_ty; CfreeCgTypeId field_ty; @@ -917,11 +917,11 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) { u32 field_offset; int base_is_lvalue; Operand result; - Reg rr; + CGLocal rr; if (!g) return; T = g->target; base = api_pop(g); - api_ensure_reg(g, &base); + api_ensure_local(g, &base); base_ty = api_sv_type(&base); base_is_lvalue = api_is_lvalue_sv(&base); if (base_is_lvalue) { @@ -984,19 +984,19 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) { return; } if (!base_is_lvalue) { - Operand base_op = api_force_reg(g, &base, rec_ptr_ty); + Operand base_op = api_force_local(g, &base, rec_ptr_ty); if (field_offset == 0) { result = base_op; base.res = RES_INHERENT; } else { - rr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); - result = api_op_reg(rr, rec_ptr_ty); + rr = api_alloc_temp_local(g, rec_ptr_ty); + result = api_op_local(rr, rec_ptr_ty); T->binop(T, BO_IADD, result, base_op, api_op_imm((i64)field_offset, rec_ptr_ty)); api_release(g, &base); } api_push(g, - api_make_lv(api_op_indirect(result.v.reg, 0, field_ty), field_ty)); + api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty)); } else if (base.op.kind == OPK_GLOBAL) { result = api_op_global(base.op.v.global.sym, @@ -1009,21 +1009,21 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) { api_push(g, api_make_lv(result, field_ty)); } else { Operand base_addr; - rr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); - base_addr = api_op_reg(rr, rec_ptr_ty); + rr = api_alloc_temp_local(g, rec_ptr_ty); + base_addr = api_op_local(rr, rec_ptr_ty); T->addr_of(T, base_addr, base.op); api_release(g, &base); if (field_offset == 0) { result = base_addr; } else { - Reg fr = api_alloc_reg_or_spill(g, RC_INT, rec_ptr_ty); - result = api_op_reg(fr, rec_ptr_ty); + CGLocal fr = api_alloc_temp_local(g, rec_ptr_ty); + result = api_op_local(fr, rec_ptr_ty); T->binop(T, BO_IADD, result, base_addr, api_op_imm((i64)field_offset, rec_ptr_ty)); - api_free_reg(g, base_addr.v.reg, RC_INT); + api_release_temp_local(g, base_addr.v.local); } api_push(g, - api_make_lv(api_op_indirect(result.v.reg, 0, field_ty), field_ty)); + api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty)); } } diff --git a/src/cg/data.c b/src/cg/data.c @@ -437,9 +437,8 @@ void cfree_cg_data_addr(CfreeCg* g, CfreeCgSym target, int64_t addend, void cfree_cg_data_label_addr(CfreeCg* g, CfreeCgLabel target, int64_t addend, uint32_t width, uint32_t address_space) { u8 pad[8]; - RelocKind rk; - u32 data_offset; - MCLabel ml; + (void)target; + (void)addend; (void)address_space; if (!g) return; if (!width || width > sizeof(pad)) { @@ -461,57 +460,9 @@ void cfree_cg_data_label_addr(CfreeCg* g, CfreeCgLabel target, int64_t addend, return; } } - if (!g->mc) { - /* The C-source target has no MCEmitter and can't emit a relocation - * that resolves to an intra-function label address: GCC's `&&L` - * operator only works in function-local-static initializers, and - * the data path here writes to a file-scope C object. Silently - * writing zeros would produce a binary that jumps to NULL at - * runtime — fail loudly instead. Tests that intentionally exercise - * this can opt out via a `.cbackend.skip` file. */ - compiler_panic(g->c, g->cur_loc, - "cfree_cg_data_label_addr: --emit=c (C-source target) " - "does not support intra-function label addresses in " - "data sections (GCC's &&L is only valid in " - "function-local-static initializers)"); - return; - } - /* Resolve the cfree_cg_label to a stable MCLabel. Under direct CG - * the two ids coincide; under opt the wrapper pre-allocated an - * MCLabel at w_label_new time and stashed it on the IR block. */ - if (g->target->cg_label_to_mc_label) { - ml = g->target->cg_label_to_mc_label(g->target, (Label)target); - } else { - ml = (MCLabel)target; - } - if (ml == MC_LABEL_NONE) { - compiler_panic(g->c, g->cur_loc, - "cfree_cg_data_label_addr: label has no MCLabel"); - return; - } - rk = api_data_reloc_kind(/*pcrel=*/0, width); - if (rk == R_NONE) { - compiler_panic(g->c, g->cur_loc, - "cfree_cg_data_label_addr: unsupported width %u", - (unsigned)width); - return; - } - memset(pad, 0, sizeof pad); - if (g->data_tls_collect) { - compiler_panic(g->c, g->cur_loc, - "cfree_cg_data_label_addr: TLS label-address data is not " - "supported"); - return; - } - data_offset = g->data_base + (u32)g->data_size; - obj_write(g->obj, g->data_sec, pad, width); - g->data_size += width; - /* The MCEmitter resolves this to obj_reloc(data_sec, data_offset, kind, - * cur_func_sym, label_offset - cur_func_start + addend) at label - * placement time. cur_func_sym/cur_func_start were set by the backend - * func_begin; at -O1 they're set when pass_emit replays the function. */ - g->mc->emit_label_data_reloc(g->mc, g->data_sec, data_offset, ml, rk, width, - addend); + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_label_addr: target does not support " + "intra-function label addresses in data sections"); } void cfree_cg_data_pcrel(CfreeCg* g, CfreeCgSym target, int64_t addend, @@ -651,85 +602,16 @@ void cfree_cg_data_end(CfreeCg* g) { g->data_retain = 0; } -/* Emit a function-local jump-table of `n` label addresses into .rodata - * and return the anonymous ObjSymId backing it. Each slot is one pointer - * wide. Used by api_cg_switch_lower_table to materialize the dispatch - * table inline during cg recording — under direct CG the reloc resolves - * immediately when the label is later placed; under opt, the - * label-data reloc is queued on the MCEmitter and resolved at replay - * time once the wrapped backend's func_begin has set cur_func_sym. The - * helper does not register the symbol with CfreeCg's sym table; - * callers wire its CfreeCg type via api_remember_sym so subsequent - * cfree_cg_push_symbol_addr + load(scale=ptr_size) can address it as - * an array of pointers. */ +/* Source targets with a native switch form should override target->switch_. + * The old machine-code jump-table path is intentionally not part of the + * semantic CgTarget cutover. */ ObjSymId api_emit_label_table(CfreeCg* g, const Label* labels, u32 n) { - Compiler* c; - ObjBuilder* ob; - MCEmitter* mc; - CGTarget* T; - u32 ptrsz; - u32 ptral; - Sym sec_name; - ObjSecId sec; - u32 base; - RelocKind rk; - u8 pad[8]; - char name_buf[40]; - StrBuf name_sb; - Sym anon; - ObjSymId sym; - u32 i; - if (!g || !labels || !n) return OBJ_SYM_NONE; - c = g->c; - ob = g->obj; - mc = g->mc; - T = g->target; - if (!mc) { - compiler_panic(c, g->cur_loc, - "api_emit_label_table: requires MCEmitter " - "(unsupported with --emit=c)"); - return OBJ_SYM_NONE; - } - ptrsz = (u32)c->target.ptr_size; - ptral = (u32)c->target.ptr_align; - if (ptrsz != 4u && ptrsz != 8u) { - compiler_panic(c, g->cur_loc, - "api_emit_label_table: unsupported ptr_size %u", - (unsigned)ptrsz); - return OBJ_SYM_NONE; - } - rk = api_data_reloc_kind(/*pcrel=*/0, ptrsz); - if (rk == R_NONE) { - compiler_panic(c, g->cur_loc, - "api_emit_label_table: unsupported reloc width %u", - (unsigned)ptrsz); - return OBJ_SYM_NONE; - } - sec_name = pool_intern_slice(c->global, SLICE_LIT(".rodata")); - sec = obj_section(ob, sec_name, SEC_RODATA, SF_ALLOC, ptral); - base = obj_align_to(ob, sec, ptral); - /* Write `n` placeholder slots first; emit_label_data_reloc later - * patches each one inline (via obj_patch) at label-place time. */ - memset(pad, 0, sizeof pad); - for (i = 0; i < n; ++i) { - obj_write(ob, sec, pad, ptrsz); - } - for (i = 0; i < n; ++i) { - MCLabel ml = T->cg_label_to_mc_label ? T->cg_label_to_mc_label(T, labels[i]) - : (MCLabel)labels[i]; - if (ml == MC_LABEL_NONE) { - compiler_panic(c, g->cur_loc, - "api_emit_label_table: label has no MCLabel"); - return OBJ_SYM_NONE; - } - mc->emit_label_data_reloc(mc, sec, base + i * ptrsz, ml, rk, ptrsz, - /*extra_addend=*/0); - } - strbuf_init(&name_sb, name_buf, sizeof name_buf); - strbuf_put_slice(&name_sb, SLICE_LIT(".Lcfree_jt.")); - strbuf_put_u64(&name_sb, g->rodata_counter++); - anon = pool_intern_slice(c->global, (Slice){.s = strbuf_cstr(&name_sb), - .len = strbuf_len(&name_sb)}); - sym = obj_symbol(ob, anon, SB_LOCAL, SK_OBJ, sec, base, (u64)n * ptrsz); - return sym; + (void)labels; + (void)n; + if (g) { + compiler_panic(g->c, g->cur_loc, + "api_emit_label_table: target does not support semantic " + "label tables"); + } + return OBJ_SYM_NONE; } diff --git a/src/cg/internal.h b/src/cg/internal.h @@ -8,10 +8,7 @@ #include <string.h> #include "abi/abi.h" -#include "arch/arch.h" -#include "arch/regalloc.h" #include "asm/asm.h" -#include "asm/asm_lex.h" #include "cg/type.h" #include "core/arena.h" #include "core/heap.h" @@ -22,15 +19,13 @@ #include "debug/debug.h" #include "obj/obj.h" -typedef struct CGTarget CGTarget; -typedef struct MCEmitter MCEmitter; +typedef struct CgTarget CgTarget; typedef uint32_t ObjSymId; typedef enum SResidency { RES_INHERENT, - RES_REG, - RES_SPILLED, - RES_FIXED_REG, + RES_LOCAL, + RES_FIXED_LOCAL, } SResidency; typedef enum ApiSValueKind { @@ -77,7 +72,6 @@ typedef struct ApiSValue { u8 pinned; u8 lvalue; u8 bitfield_lvalue; - FrameSlot spill_slot; CfreeCgLocal source_local; } ApiSValue; @@ -88,7 +82,7 @@ typedef struct ApiCgScope { Label continue_lbl; CGScope target_scope; CfreeCgTypeId result_type; - FrameSlot result_slot; + CGLocal result_local; u32 generation; u8 active; u8 pad[3]; @@ -107,7 +101,7 @@ typedef struct ApiSourceLocal { CfreeCgLocalAttrs attrs; SrcLoc loc; CGLocalDesc desc; - CGLocalStorage storage; + CGLocal storage; i64 const_value; u32 param_index; u8 kind; @@ -118,10 +112,8 @@ typedef struct ApiSourceLocal { struct CfreeCg { Compiler* c; ObjBuilder* obj; - CGTarget* target; - MCEmitter* mc; + CgTarget* target; Debug* debug; - CGSimpleRegAlloc regalloc; ApiSValue* stack; u32 sp; @@ -131,17 +123,8 @@ struct CfreeCg { u32 nlocals; u32 locals_cap; - struct { - FrameSlot* free; - u32 n; - u32 cap; - } slot_pools[3]; - - CGABIValue* avs_in_flight; - u32 avs_in_flight_n; - CfreeCgTypeId fn_ret_type; - const ABIFuncInfo* fn_abi; + CfreeCgTypeId fn_result_types[1]; SrcLoc cur_loc; CGFuncDesc fn_desc; @@ -226,7 +209,7 @@ const char* api_sym_cstr(CfreeCg* g, CfreeSym sym); int api_asm_parse_match_index(const char* s); const char* api_asm_constraint_body(const char* s); int api_asm_is_early_clobber(const char* s); -void api_asm_spill_sv(CfreeCg* g, ApiSValue* sv, Reg phys, RegClass cls); +void api_asm_memory_clobber_sv(CfreeCg* g, ApiSValue* sv, CGLocal local); void cfree_cg_inline_asm(CfreeCg* g, CfreeCgInlineAsm asm_block); void cfree_cg_file_scope_asm(CfreeCg* g, CfreeSlice asm_source); MemAccess api_mem_for_atomic(CfreeCg* g, CfreeCgTypeId val_ty); @@ -239,22 +222,15 @@ void cfree_cg_atomic_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgMemOrder order); void cfree_cg_atomic_rmw(CfreeCg* g, CfreeCgMemAccess access, CfreeCgAtomicOp op, CfreeCgMemOrder order); -int api_take_dead_owned_reg(ApiSValue* sv, u8 cls, Reg avoid, Reg* out); -Reg api_alloc_dead_input_or_spill(CfreeCg* g, ApiSValue* a, ApiSValue* b, - ApiSValue* c, u8 cls, CfreeCgTypeId ty, - Reg avoid); void cfree_cg_atomic_cmpxchg(CfreeCg* g, CfreeCgMemAccess access, CfreeCgMemOrder success, CfreeCgMemOrder failure, int weak); void cfree_cg_atomic_fence(CfreeCg* g, CfreeCgMemOrder order); -CGABIValue* api_alloc_call_args(CfreeCg* g, u32 nargs); -void api_pack_call_arg(CfreeCg* g, CGABIValue* av, CfreeCgTypeId fty, - const ABIFuncInfo* abi, u32 idx); -void api_alloc_call_ret_storage(CfreeCg* g, CGTarget* T, CfreeCgTypeId ret_ty, - Operand* out); -void api_release_call_args(CfreeCg* g, CGABIValue* avs, u32 nargs); -void api_push_call_result(CfreeCg* g, Operand ret_storage, - CfreeCgTypeId ret_ty); +CGLocal* api_alloc_call_args(CfreeCg* g, u32 nargs); +void api_pack_call_arg(CfreeCg* g, CGLocal* out, CfreeCgTypeId fty, u32 idx); +CGLocal api_alloc_call_result(CfreeCg* g, CfreeCgTypeId ret_ty); +void api_release_call_args(CfreeCg* g, CGLocal* args, u32 nargs); +void api_push_call_result(CfreeCg* g, CGLocal result, CfreeCgTypeId ret_ty); void cfree_cg_call(CfreeCg* g, uint32_t nargs, CfreeCgTypeId fn_type, CfreeCgCallAttrs attrs); void api_call_symbol_common(CfreeCg* g, CfreeCgSym sym, uint32_t nargs, @@ -329,7 +305,7 @@ int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty, CfreeCgLocal api_local_handle(u32 index); int api_grow_locals(CfreeCg* g, u32 want); ApiSourceLocal* api_local_from_handle(CfreeCg* g, CfreeCgLocal local); -CGLocalStorage api_frame_local_storage(CfreeCg* g, const CGLocalDesc* d); +CGLocal api_frame_local_storage(CfreeCg* g, const CGLocalDesc* d); CfreeCgLocal cfree_cg_local(CfreeCg* g, CfreeCgTypeId type, CfreeCgLocalAttrs attrs); CfreeCgLocal cfree_cg_param(CfreeCg* g, uint32_t index, CfreeCgTypeId type, @@ -339,11 +315,9 @@ void cfree_cg_push_float(CfreeCg* g, double value, CfreeCgTypeId type); void cfree_cg_push_null(CfreeCg* g, CfreeCgTypeId ptr_type); CfreeCgSym cfree_cg_const_data(CfreeCg* g, const uint8_t* data, size_t len, uint32_t align, CfreeCgTypeId pointee_type); -void api_push_frame_lvalue(CfreeCg* g, FrameSlot slot, CfreeCgTypeId type); -void api_push_source_frame_lvalue(CfreeCg* g, CfreeCgLocal local, - FrameSlot slot, CfreeCgTypeId type); -void api_push_source_reg_lvalue(CfreeCg* g, CfreeCgLocal local, Reg reg, - CfreeCgTypeId type); +void api_push_local_lvalue(CfreeCg* g, CGLocal local, CfreeCgTypeId type); +void api_push_source_local_lvalue(CfreeCg* g, CfreeCgLocal local, + CGLocal storage, CfreeCgTypeId type); void cfree_cg_push_local(CfreeCg* g, CfreeCgLocal local); void cfree_cg_push_local_addr(CfreeCg* g, CfreeCgLocal local); void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend); @@ -379,17 +353,15 @@ CfreeCgDecl api_sym_attrs(CfreeCg* g, CfreeCgSym sym); int api_sym_is_tls(CfreeCg* g, CfreeCgSym sym); RelocKind api_data_reloc_kind(int pcrel, uint32_t width); SrcLoc api_no_loc(void); -u8 api_type_class(CfreeCgTypeId ty); +int api_type_is_float(Compiler* c, CfreeCgTypeId ty); int api_is_f128_type(Compiler* c, CfreeCgTypeId ty); int api_is_i128_type(Compiler* c, CfreeCgTypeId ty); int api_is_wide16_scalar_type(Compiler* c, CfreeCgTypeId ty); -int api_arg_storage_must_be_addr(Compiler* c, CfreeCgTypeId ty); Operand api_op_imm(i64 v, CfreeCgTypeId ty); -Operand api_op_reg(Reg r, CfreeCgTypeId ty); -Operand api_op_local(FrameSlot s, CfreeCgTypeId ty); +Operand api_op_local(CGLocal r, CfreeCgTypeId ty); Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty); -Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty); -Operand api_op_indirect_indexed(Reg base, Reg index, u8 log2_scale, i32 ofs, +Operand api_op_indirect(CGLocal base, i32 ofs, CfreeCgTypeId ty); +Operand api_op_indirect_indexed(CGLocal base, CGLocal index, u8 log2_scale, i32 ofs, CfreeCgTypeId ty); u8 api_residency_for(const Operand* o); ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty); @@ -400,29 +372,23 @@ ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, int a_owned); ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, int a_owned, int b_owned); -ApiSValue api_make_sv_with_reg_ownership(Operand op, CfreeCgTypeId ty, +ApiSValue api_make_sv_with_local_ownership(Operand op, CfreeCgTypeId ty, int owned); CfreeCgTypeId api_sv_type(const ApiSValue* sv); int api_operand_can_address(const Operand* o); int api_sv_op_is(const ApiSValue* sv, OpKind kind); -int api_sv_op_is_reg_or_imm(const ApiSValue* sv); +int api_sv_op_is_local_or_imm(const ApiSValue* sv); int api_is_lvalue_sv(const ApiSValue* sv); void api_stack_grow(CfreeCg* g, u32 want); void api_push(CfreeCg* g, ApiSValue v); ApiSValue api_pop(CfreeCg* g); -u8 api_class_of_sv(const ApiSValue* sv); -Reg api_reg_of_sv(const ApiSValue* sv); -void api_set_owned_reg(ApiSValue* sv, Reg r); -CfreeCgTypeId api_owned_reg_type(CfreeCg* g, const ApiSValue* sv); -void api_take_spill_slot_alloc(CfreeCg* g, u8 cls, FrameSlot* out); -FrameSlot api_take_spill_slot(CfreeCg* g, u8 cls); -void api_return_spill_slot(CfreeCg* g, FrameSlot s, u8 cls); -ApiSValue* api_pick_victim(CfreeCg* g, u8 cls); -void api_regalloc_begin(CfreeCg* g); -void api_regalloc_finish(CfreeCg* g); -Reg api_alloc_reg(CfreeCg* g, u8 cls); -void api_free_reg(CfreeCg* g, Reg r, u8 cls); -int api_spill_avs_victim(CfreeCg* g, u8 cls); +CGLocal api_local_of_sv(const ApiSValue* sv); +void api_set_owned_local(ApiSValue* sv, CGLocal r); +CfreeCgTypeId api_owned_local_type(CfreeCg* g, const ApiSValue* sv); +void api_temp_locals_begin(CfreeCg* g); +void api_temp_locals_finish(CfreeCg* g); +CGLocal api_alloc_temp_local(CfreeCg* g, CfreeCgTypeId ty); +void api_release_temp_local(CfreeCg* g, CGLocal r); MemAccess api_mem_for_lvalue(CfreeCg* g, const Operand* lv, CfreeCgTypeId ty); MemAccess api_mem_from_access(CfreeCg* g, const Operand* lv, CfreeCgMemAccess access); @@ -433,22 +399,17 @@ void api_require_scalar_mem_type(CfreeCg* g, const char* who, CfreeCgTypeId ty); void api_require_pointer_value(CfreeCg* g, const char* who, CfreeCgTypeId ty); void api_validate_memory_value(CfreeCg* g, const char* who, CfreeCgTypeId access_ty, CfreeCgTypeId value_ty); -MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv); -void api_release_operand_reg(CfreeCg* g, Operand op); -int api_sv_owns_operand_reg(const ApiSValue* sv, const Operand* op); +void api_release_operand_local(CfreeCg* g, Operand op); +int api_sv_owns_operand_local(const ApiSValue* sv, const Operand* op); void api_release_cmp(CfreeCg* g, ApiSValue* sv); void api_release_arith(CfreeCg* g, ApiSValue* sv); void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst); void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst); int api_arith_rhs_reusable(const ApiSValue* sv); -int api_materialize_cmp_victim(CfreeCg* g, u8 cls); -int api_materialize_arith_victim(CfreeCg* g, u8 cls); -Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty); -void api_ensure_reg(CfreeCg* g, ApiSValue* sv); -Operand api_force_reg(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty); -Operand api_force_reg_unless_imm(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty); +void api_ensure_local(CfreeCg* g, ApiSValue* sv); +Operand api_force_local(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty); +Operand api_force_local_unless_imm(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty); void api_release(CfreeCg* g, ApiSValue* sv); -void api_release_arg_storage(CfreeCg* g, Operand* storage); BinOp api_map_int_binop(CfreeCgIntBinOp op); BinOp api_map_fp_binop(CfreeCgFpBinOp op); UnOp api_map_int_unop(CfreeCgIntUnOp op); @@ -492,9 +453,9 @@ int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, ApiSValue* a, ApiSValue* b, ApiSValue* out); int api_try_fold_unary_chain(ApiSValue* a, UnOp op, CfreeCgTypeId ty, ApiSValue* out); -FrameSlot api_f128_temp_slot(CfreeCg* g, CfreeCgTypeId ty); +CGLocal api_f128_temp_local(CfreeCg* g, CfreeCgTypeId ty); u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes); -void api_store_f128_bytes(CfreeCg* g, FrameSlot slot, CfreeCgTypeId ty, +void api_store_f128_bytes(CfreeCg* g, CGLocal local, CfreeCgTypeId ty, const u8 bytes[16]); void api_encode_binary128_from_double(CfreeCg* g, double value, u8 out[16]); ApiSValue api_make_f128_const(CfreeCg* g, double value, CfreeCgTypeId ty); diff --git a/src/cg/local.c b/src/cg/local.c @@ -4,9 +4,7 @@ int api_local_requires_memory(CfreeCg* g, CfreeCgTypeId ty, CfreeCgLocalAttrs attrs) { (void)attrs; /* Aggregates (records, arrays), wide16 (f128/i128), vararg state, and any - * non-scalar type must live in memory. All scalar locals are placed on the - * frame at CG time; opt is responsible for promoting non-escaped scalars - * back into PRegs. */ + * non-scalar type must live in memory. */ if (api_is_wide16_scalar_type(g->c, ty)) return 1; return !(cg_type_is_int(g->c, ty) || cg_type_is_float(g->c, ty) || cg_type_is_ptr(g->c, ty)); @@ -48,27 +46,15 @@ ApiSourceLocal* api_local_from_handle(CfreeCg* g, CfreeCgLocal local) { return &g->locals[index]; } -CGLocalStorage api_frame_local_storage(CfreeCg* g, const CGLocalDesc* d) { - FrameSlotDesc fsd; - CGLocalStorage st; - memset(&fsd, 0, sizeof fsd); - fsd.type = d->type; - fsd.name = d->name; - fsd.loc = d->loc; - fsd.size = d->size; - fsd.align = d->align; - fsd.kind = FS_LOCAL; - if (d->flags & CG_LOCAL_ADDR_TAKEN) fsd.flags |= FSF_ADDR_TAKEN; - st.kind = CG_LOCAL_STORAGE_FRAME; - st.v.frame_slot = g->target->frame_slot(g->target, &fsd); - return st; +CGLocal api_frame_local_storage(CfreeCg* g, const CGLocalDesc* d) { + return g->target->local(g->target, d); } CfreeCgLocal cfree_cg_local(CfreeCg* g, CfreeCgTypeId type, CfreeCgLocalAttrs attrs) { CfreeCgTypeId ty; CGLocalDesc desc; - CGLocalStorage storage; + CGLocal storage; ApiSourceLocal* rec; CfreeCgLocal handle; if (!g) return CFREE_CG_LOCAL_NONE; @@ -89,10 +75,6 @@ CfreeCgLocal cfree_cg_local(CfreeCg* g, CfreeCgTypeId type, storage = g->target->local(g->target, &desc); else storage = api_frame_local_storage(g, &desc); - if (storage.kind == CG_LOCAL_STORAGE_REG) { - cg_simple_regalloc_reserve(&g->regalloc, (RegClass)api_type_class(ty), - storage.v.reg); - } rec = &g->locals[g->nlocals++]; memset(rec, 0, sizeof *rec); rec->type = ty; @@ -112,7 +94,7 @@ CfreeCgLocal cfree_cg_param(CfreeCg* g, uint32_t index, CfreeCgTypeId type, CGParamDesc pd; ApiSourceLocal* rec; CfreeCgLocal handle; - CGLocalStorage storage; + CGLocal storage; u32 size; u32 align; if (!g) return CFREE_CG_LOCAL_NONE; @@ -134,15 +116,8 @@ CfreeCgLocal cfree_cg_param(CfreeCg* g, uint32_t index, CfreeCgTypeId type, pd.align = align; if (api_local_requires_memory(g, ty, attrs)) pd.flags |= CG_LOCAL_MEMORY_REQUIRED; - if (g->fn_abi && index < g->fn_abi->nparams) { - pd.abi = &g->fn_abi->params[index]; - } pd.loc = g->cur_loc; storage = g->target->param(g->target, &pd); - if (storage.kind == CG_LOCAL_STORAGE_REG) { - cg_simple_regalloc_reserve(&g->regalloc, (RegClass)api_type_class(ty), - storage.v.reg); - } rec = &g->locals[g->nlocals++]; memset(rec, 0, sizeof *rec); diff --git a/src/cg/memory.c b/src/cg/memory.c @@ -10,14 +10,14 @@ void cfree_cg_push_int(CfreeCg* g, uint64_t value, CfreeCgTypeId type) { void cfree_cg_push_float(CfreeCg* g, double value, CfreeCgTypeId type) { CfreeCgTypeId ty; - CGTarget* T; + CgTarget* T; ConstBytes cb; union { double d; float f; uint8_t b[8]; } u; - Reg r; + CGLocal r; Operand dst; if (!g) return; ty = resolve_type(g->c, type); @@ -35,8 +35,8 @@ void cfree_cg_push_float(CfreeCg* g, double value, CfreeCgTypeId type) { else u.d = value; cb.bytes = u.b; - r = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - dst = api_op_reg(r, ty); + r = api_alloc_temp_local(g, ty); + dst = api_op_local(r, ty); T->load_const(T, dst, cb); api_push(g, api_make_sv(dst, ty)); } @@ -89,27 +89,17 @@ CfreeCgSym cfree_cg_const_data(CfreeCg* g, const uint8_t* data, size_t len, return (CfreeCgSym)sym; } -void api_push_frame_lvalue(CfreeCg* g, FrameSlot slot, CfreeCgTypeId type) { +void api_push_local_lvalue(CfreeCg* g, CGLocal local, CfreeCgTypeId type) { if (!g) return; - api_push(g, api_make_lv(api_op_local(slot, type), type)); + api_push(g, api_make_lv(api_op_local(local, type), type)); } -void api_push_source_frame_lvalue(CfreeCg* g, CfreeCgLocal local, - FrameSlot slot, CfreeCgTypeId type) { +void api_push_source_local_lvalue(CfreeCg* g, CfreeCgLocal source_local, + CGLocal storage, CfreeCgTypeId type) { ApiSValue sv; if (!g) return; - sv = api_make_lv(api_op_local(slot, type), type); - sv.source_local = local; - api_push(g, sv); -} - -void api_push_source_reg_lvalue(CfreeCg* g, CfreeCgLocal local, Reg reg, - CfreeCgTypeId type) { - ApiSValue sv; - if (!g) return; - sv = api_make_lv(api_op_reg(reg, type), type); - sv.res = RES_FIXED_REG; - sv.source_local = local; + sv = api_make_lv(api_op_local(storage, type), type); + sv.source_local = source_local; api_push(g, sv); } @@ -118,14 +108,7 @@ void cfree_cg_push_local(CfreeCg* g, CfreeCgLocal local) { if (!g) return; rec = api_local_from_handle(g, local); if (!rec) return; - if (rec->storage.kind == CG_LOCAL_STORAGE_REG) { - api_push_source_reg_lvalue(g, local, rec->storage.v.reg, rec->type); - } else if (rec->kind == API_SOURCE_LOCAL_AUTO) { - api_push_source_frame_lvalue(g, local, rec->storage.v.frame_slot, - rec->type); - } else { - api_push_frame_lvalue(g, rec->storage.v.frame_slot, rec->type); - } + api_push_source_local_lvalue(g, local, rec->storage, rec->type); } void cfree_cg_push_local_addr(CfreeCg* g, CfreeCgLocal local) { @@ -141,8 +124,8 @@ void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend) { if (!ty) ty = builtin_id(CFREE_CG_BUILTIN_VOID); ptr_ty = cg_type_ptr_to(g->c, ty); if (api_sym_is_tls(g, sym)) { - Reg r = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - Operand dst = api_op_reg(r, ptr_ty); + CGLocal r = api_alloc_temp_local(g, ptr_ty); + Operand dst = api_op_local(r, ptr_ty); g->target->tls_addr_of(g->target, dst, (ObjSymId)sym, addend); api_push(g, api_make_sv(dst, ptr_ty)); } else { @@ -160,14 +143,14 @@ void cfree_cg_push_symbol_addr(CfreeCg* g, CfreeCgSym sym, int64_t addend) { * 2. lvalue base, scaled index -> take the lvalue's address, then form an * indexed OPK_INDIRECT and emit a single memop. * 3. pointer-rvalue base, no index -> for OPK_GLOBAL fold the offset into - * the addend; otherwise use [reg + offset]. + * the addend; otherwise use [local + offset]. * 4. pointer-rvalue base, scaled index -> form OPK_INDIRECT against the - * pointer register with index/scale. + * pointer local with index/scale. * * Scale normalization (ea.scale != 0): * {1,2,4,8} -> log2_scale 0..3 on OPK_INDIRECT. - * otherwise -> compute index*scale into a fresh register, fold into base, - * dispatch with index = REG_NONE. + * otherwise -> compute index*scale into a fresh local, fold into base, + * dispatch with index = CG_LOCAL_NONE. * ============================================================ */ /* log2 of a {1,2,4,8} scale, else -1. */ @@ -190,38 +173,50 @@ static int scale_to_log2(uint32_t scale) { * load/store. `addr` may be OPK_LOCAL, OPK_GLOBAL, or OPK_INDIRECT. The * `offset` is folded into the operand; the indexed form is encoded directly. * If the index path requires arithmetic on a global/local, the address is - * first lowered into a register via T->addr_of and then combined. + * first lowered into a local via T->addr_of and then combined. * - * The caller owns `index_reg` (REG_NONE if no index). On return, - * *out_owned_base is set to the register that the caller must free after the - * memop completes (REG_NONE if no new register was allocated). The returned - * operand's index register is freed by the caller separately when applicable; + * The caller owns `index_local` (CG_LOCAL_NONE if no index). On return, + * *out_owned_base is set to the local that the caller must free after the + * memop completes (CG_LOCAL_NONE if no new local was allocated). The returned + * operand's index local is freed by the caller separately when applicable; * this helper does not free it. */ static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset, - Reg index, u8 log2_scale, - CfreeCgTypeId access_ty, - Reg* out_owned_base) { - CGTarget* T = g->target; + CGLocal index, u8 log2_scale, + CfreeCgTypeId access_ty, + int addr_is_pointer_value, + CGLocal* out_owned_base) { + CgTarget* T = g->target; CfreeCgTypeId base_ty = cg_type_is_ptr(g->c, addr.type) ? addr.type : cg_type_ptr_to(g->c, access_ty); - *out_owned_base = REG_NONE; + *out_owned_base = CG_LOCAL_NONE; - if (index == REG_NONE) { + if (index == CG_LOCAL_NONE) { /* No index: fold offset into the operand directly. */ if (addr.kind == OPK_LOCAL) { - Operand r; + if (addr_is_pointer_value) { + if (offset >= INT32_MIN && offset <= INT32_MAX) { + return api_op_indirect(addr.v.local, (i32)offset, access_ty); + } + { + CGLocal br = api_alloc_temp_local(g, base_ty); + Operand base_reg = api_op_local(br, base_ty); + T->binop(T, BO_IADD, base_reg, api_op_local(addr.v.local, base_ty), + api_op_imm(offset, base_ty)); + *out_owned_base = br; + return api_op_indirect(br, 0, access_ty); + } + } if (offset == 0) { - r = addr; - r.type = access_ty; - return r; + (void)access_ty; + return addr; } - /* Local frame slots have no native displacement encoding in OPK_LOCAL; - * materialize the base address into a register and apply the offset. */ + /* OPK_LOCAL has no displacement field; materialize the base address into + * a local and apply the offset. */ { - Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty); - Operand base_reg = api_op_reg(br, base_ty); + CGLocal br = api_alloc_temp_local(g, base_ty); + Operand base_reg = api_op_local(br, base_ty); T->addr_of(T, base_reg, addr); if (offset >= INT32_MIN && offset <= INT32_MAX) { *out_owned_base = br; @@ -246,9 +241,9 @@ static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset, } /* Offset too large for i32 displacement; materialize. */ { - Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty); - Operand base_reg = api_op_reg(br, base_ty); - T->copy(T, base_reg, api_op_reg(addr.v.ind.base, base_ty)); + CGLocal br = api_alloc_temp_local(g, base_ty); + Operand base_reg = api_op_local(br, base_ty); + T->copy(T, base_reg, api_op_local(addr.v.ind.base, base_ty)); T->binop(T, BO_IADD, base_reg, base_reg, api_op_imm(offset, base_ty)); *out_owned_base = br; return api_op_indirect_indexed(br, addr.v.ind.index, @@ -256,43 +251,34 @@ static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset, access_ty); } } - /* OPK_REG (pointer rvalue) */ - if (offset >= INT32_MIN && offset <= INT32_MAX) { - return api_op_indirect(addr.v.reg, (i32)offset, access_ty); - } - { - Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty); - Operand base_reg = api_op_reg(br, base_ty); - T->binop(T, BO_IADD, base_reg, api_op_reg(addr.v.reg, base_ty), - api_op_imm(offset, base_ty)); - *out_owned_base = br; - return api_op_indirect(br, 0, access_ty); - } } - /* Indexed form. addr must be reduced to a base register first when it is - * not already an OPK_INDIRECT with a free index slot. */ - if (addr.kind == OPK_INDIRECT && addr.v.ind.index == REG_NONE && + /* Indexed form. addr must be reduced to a base local first when it is + * not already an OPK_INDIRECT with room for an index local. */ + if (addr.kind == OPK_INDIRECT && addr.v.ind.index == CG_LOCAL_NONE && offset == 0) { /* Reuse existing INDIRECT base; add index and scale. The displacement * stays whatever the operand already had. */ return api_op_indirect_indexed(addr.v.ind.base, index, log2_scale, addr.v.ind.ofs, access_ty); } - if (addr.kind == OPK_INDIRECT && addr.v.ind.index == REG_NONE) { + if (addr.kind == OPK_INDIRECT && addr.v.ind.index == CG_LOCAL_NONE) { i64 sum = (i64)addr.v.ind.ofs + offset; if (sum >= INT32_MIN && sum <= INT32_MAX) { return api_op_indirect_indexed(addr.v.ind.base, index, log2_scale, (i32)sum, access_ty); } } - /* Otherwise, materialize addr into a register and then build the indexed + /* Otherwise, materialize addr into a local and then build the indexed * operand around it. */ { - Reg br = api_alloc_reg_or_spill(g, RC_INT, base_ty); - Operand base_reg = api_op_reg(br, base_ty); - if (addr.kind == OPK_REG) { - T->copy(T, base_reg, api_op_reg(addr.v.reg, base_ty)); + CGLocal br = api_alloc_temp_local(g, base_ty); + Operand base_reg = api_op_local(br, base_ty); + if (addr.kind == OPK_LOCAL) { + if (addr_is_pointer_value) + T->copy(T, base_reg, api_op_local(addr.v.local, base_ty)); + else + T->addr_of(T, base_reg, addr); } else { T->addr_of(T, base_reg, addr); } @@ -310,20 +296,20 @@ static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset, } /* Pop the index operand for a scaled-index memop. Returns the index in a - * freshly allocated register that the caller owns and must free after the + * freshly allocated local that the caller owns and must free after the * memop. Handles the scale-not-in-{1,2,4,8} case by computing index*scale. * * On return: * *out_log2 = log2_scale (0..3) if scale was normalized to one of {1,2,4,8} * or to 0 if we materialized the scaled value (log2=0). */ -static Reg pop_and_normalize_index(CfreeCg* g, uint32_t scale, u8* out_log2) { +static CGLocal pop_and_normalize_index(CfreeCg* g, uint32_t scale, u8* out_log2) { ApiSValue idx; CfreeCgTypeId idx_ty; int lg2; Operand idx_op; - CGTarget* T = g->target; - Reg sr; + CgTarget* T = g->target; + CGLocal sr; Operand scaled; idx = api_pop(g); @@ -333,32 +319,32 @@ static Reg pop_and_normalize_index(CfreeCg* g, uint32_t scale, u8* out_log2) { lg2 = scale_to_log2(scale); if (lg2 >= 0) { *out_log2 = (u8)lg2; - /* Always allocate a fresh register so the caller has unambiguous + /* Always allocate a fresh local so the caller has unambiguous * ownership; copy the index value in. */ - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); - scaled = api_op_reg(sr, idx_ty); + idx_op = api_force_local_unless_imm(g, &idx, idx_ty); + sr = api_alloc_temp_local(g, idx_ty); + scaled = api_op_local(sr, idx_ty); if (idx_op.kind == OPK_IMM) { T->load_imm(T, scaled, idx_op.v.imm); } else { /* Re-fetch in case alloc materialized a delayed expression. */ - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - if (idx.op.kind == OPK_REG) idx_op = idx.op; + idx_op = api_force_local_unless_imm(g, &idx, idx_ty); + if (idx.op.kind == OPK_LOCAL) idx_op = idx.op; T->copy(T, scaled, idx_op); } api_release(g, &idx); return sr; } - /* Non-power-of-two scale: materialize index*scale into a fresh register. */ - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - sr = api_alloc_reg_or_spill(g, RC_INT, idx_ty); - scaled = api_op_reg(sr, idx_ty); + /* Non-power-of-two scale: materialize index*scale into a fresh local. */ + idx_op = api_force_local_unless_imm(g, &idx, idx_ty); + sr = api_alloc_temp_local(g, idx_ty); + scaled = api_op_local(sr, idx_ty); if (idx_op.kind == OPK_IMM) { T->load_imm(T, scaled, idx_op.v.imm * (i64)scale); } else { - idx_op = api_force_reg_unless_imm(g, &idx, idx_ty); - if (idx.op.kind == OPK_REG) idx_op = idx.op; + idx_op = api_force_local_unless_imm(g, &idx, idx_ty); + if (idx.op.kind == OPK_LOCAL) idx_op = idx.op; T->binop(T, BO_IMUL, scaled, idx_op, api_op_imm((i64)scale, idx_ty)); } api_release(g, &idx); @@ -382,8 +368,19 @@ static BitFieldAccess bf_from_access(CfreeCg* g, CfreeCgMemAccess access, return bf; } +static int api_sv_local_storage_is_aggregate(CfreeCg* g, const ApiSValue* sv) { + ApiSourceLocal* rec; + if (!sv || sv->op.kind != OPK_LOCAL || + sv->source_local == CFREE_CG_LOCAL_NONE) { + return 0; + } + rec = api_local_from_handle(g, sv->source_local); + return rec && rec->storage == sv->op.v.local && + cg_type_is_aggregate(g->c, rec->type); +} + /* Pop the base for a memop; populate `*base_addr` with an operand the backend - * can consume (LOCAL/GLOBAL/INDIRECT for lvalue forms, or REG holding a + * can consume (LOCAL/GLOBAL/INDIRECT for lvalue forms, or LOCAL holding a * pointer for rvalue forms). Returns 1 if `base` is an lvalue, 0 otherwise. * * Sets `*source_local_out` to the lvalue's source_local handle when applicable @@ -395,14 +392,14 @@ static BitFieldAccess bf_from_access(CfreeCg* g, CfreeCgMemAccess access, void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { ApiSValue base; - CGTarget* T; + CgTarget* T; CfreeCgTypeId ty; CfreeCgTypeId access_ty; - Reg owned_base = REG_NONE; - Reg owned_index = REG_NONE; + CGLocal owned_base = CG_LOCAL_NONE; + CGLocal owned_index = CG_LOCAL_NONE; u8 log2_scale = 0; Operand mem_op; - Reg dst_r; + CGLocal dst_r; Operand dst; int is_lvalue; int is_bitfield; @@ -479,45 +476,45 @@ void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { return; } - /* Source-local register lvalue (param in a hard reg): plain no-EA load - * returns the register value directly. */ + /* Scalar local lvalue: plain no-EA load returns the local value directly. + * Aggregate locals are storage; even offset-zero field accesses must go + * through the memory path so the access type controls the dereference instead + * of treating the whole aggregate as a scalar value. */ if (!has_index && !is_bitfield && ea.offset == 0 && is_lvalue && - base.source_local != CFREE_CG_LOCAL_NONE && base.op.kind == OPK_REG) { - Operand val = base.op; - val.type = ty; - base.op = val; - base.type = ty; + base.source_local != CFREE_CG_LOCAL_NONE && base.op.kind == OPK_LOCAL && + !api_sv_local_storage_is_aggregate(g, &base) && + !cg_type_is_aggregate(g->c, api_sv_type(&base)) && + !cg_type_is_aggregate(g->c, ty) && + api_unalias_type(g->c, api_sv_type(&base)) == api_unalias_type(g->c, ty)) { base.lvalue = 0; - base.res = RES_FIXED_REG; + base.res = RES_FIXED_LOCAL; api_push(g, base); return; } /* Wide-16 scalar lvalue load: keep the addressable storage as the value. * For fields at a fixed offset, fold the EA into a new lvalue operand instead - * of asking the backend for a single 16-byte register load. */ + * of asking the backend for a single 16-byte local load. */ if (!has_index && !is_bitfield && is_lvalue && api_is_wide16_scalar_type(g->c, ty)) { if (ea.offset == 0) { - base.type = ty; - base.op.type = ty; api_push(g, base); return; } if (!api_operand_can_address(&base.op)) { CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base)); Operand addr = api_lvalue_addr(g, &base, pty); - mem_op = fold_ea_into_operand(g, addr, ea.offset, REG_NONE, 0, ty, + mem_op = fold_ea_into_operand(g, addr, ea.offset, CG_LOCAL_NONE, 0, ty, 1, &owned_base); - if (owned_base == REG_NONE) - owned_base = addr.v.reg; - else if (owned_base != addr.v.reg) - api_free_reg(g, addr.v.reg, RC_INT); + if (owned_base == CG_LOCAL_NONE) + owned_base = addr.v.local; + else if (owned_base != addr.v.local) + api_release_temp_local(g, addr.v.local); } else { - mem_op = fold_ea_into_operand(g, base.op, ea.offset, REG_NONE, 0, ty, - &owned_base); + mem_op = fold_ea_into_operand(g, base.op, ea.offset, CG_LOCAL_NONE, 0, ty, + 0, &owned_base); } - if (mem_op.kind == OPK_INDIRECT && owned_base == REG_NONE && + if (mem_op.kind == OPK_INDIRECT && owned_base == CG_LOCAL_NONE && base.op.kind == OPK_INDIRECT) { base.res = RES_INHERENT; } @@ -527,38 +524,38 @@ void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { } /* Compute the memop operand. Lvalue bases preserve named-storage operands; - * pointer rvalues use the register holding the address. */ + * pointer rvalues use the local holding the address. */ if (is_lvalue) { if (!api_operand_can_address(&base.op)) { - /* Source-local in a hard register but we need to compute an EA: take + /* Source-local needs an EA: take * the lvalue's address first. */ CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base)); Operand addr = api_lvalue_addr(g, &base, pty); mem_op = fold_ea_into_operand(g, addr, ea.offset, owned_index, log2_scale, - access_ty, &owned_base); - /* `addr` is an owned register from api_lvalue_addr. */ - if (owned_base == REG_NONE) - owned_base = addr.v.reg; - else if (owned_base != addr.v.reg) - api_free_reg(g, addr.v.reg, RC_INT); + access_ty, 1, &owned_base); + /* `addr` is an owned local from api_lvalue_addr. */ + if (owned_base == CG_LOCAL_NONE) + owned_base = addr.v.local; + else if (owned_base != addr.v.local) + api_release_temp_local(g, addr.v.local); } else { /* The lvalue carries its own operand; fold the EA into it. */ mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index, - log2_scale, access_ty, &owned_base); + log2_scale, access_ty, 0, &owned_base); } } else if (base.kind == SV_OPERAND && base.op.kind == OPK_GLOBAL) { /* Pointer-rvalue OPK_GLOBAL: fold the EA directly against the global * (matching the lvalue OPK_GLOBAL path) so the backend can emit a single * PC-relative or absolute access. */ mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index, - log2_scale, access_ty, &owned_base); + log2_scale, access_ty, 0, &owned_base); } else { - /* Pointer rvalue: ensure the address is in a register and treat that as + /* Pointer rvalue: ensure the address is in a local and treat that as * the base. */ CfreeCgTypeId pty = api_sv_type(&base); - Operand ptr_op = api_force_reg(g, &base, pty); + Operand ptr_op = api_force_local(g, &base, pty); mem_op = fold_ea_into_operand(g, ptr_op, ea.offset, owned_index, log2_scale, - access_ty, &owned_base); + access_ty, 1, &owned_base); } /* Mutate source-local tracking. Any EA-shaped load through a tracked local @@ -570,38 +567,38 @@ void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { if (is_bitfield) { BitFieldAccess bf = bf_from_access(g, access, access_ty, &mem_op); - Reg rr = api_alloc_reg_or_spill(g, RC_INT, access_ty); - dst = api_op_reg(rr, access_ty); + CGLocal rr = api_alloc_temp_local(g, access_ty); + dst = api_op_local(rr, access_ty); T->bitfield_load(T, dst, mem_op, bf); } else { - dst_r = api_alloc_reg_or_spill(g, api_type_class(access_ty), access_ty); - dst = api_op_reg(dst_r, access_ty); + dst_r = api_alloc_temp_local(g, access_ty); + dst = api_op_local(dst_r, access_ty); T->load(T, dst, mem_op, api_mem_from_access(g, &mem_op, access)); } - /* Release the base lvalue/rvalue and any owned registers. */ + /* Release the base lvalue/rvalue and any owned locals. */ if (is_lvalue) { - /* If the original lvalue's operand was OPK_INDIRECT, its base register + /* If the original lvalue's operand was OPK_INDIRECT, its base local * was owned by the lvalue and is still in mem_op.v.ind.base when we did * not allocate a new owned_base. Free that base when no new owned_base * shadows it. */ - if (base.op.kind == OPK_INDIRECT && owned_base == REG_NONE) { - /* mem_op uses the same base register as base.op; free it via the + if (base.op.kind == OPK_INDIRECT && owned_base == CG_LOCAL_NONE) { + /* mem_op uses the same base local as base.op; free it via the * lvalue release. */ } api_release(g, &base); } else { - /* For rvalue-pointer bases, the register holding the pointer was the - * owned reg of `base`; api_release will free it unless the EA folding + /* For rvalue-pointer bases, the local holding the pointer was the + * owned local of `base`; api_release will free it unless the EA folding * already absorbed it into mem_op. The fold_ea_into_operand path for - * REG returns either OPK_INDIRECT(addr.v.reg, ofs) (no new owned_base) + * LOCAL returns either OPK_INDIRECT(addr.v.local, ofs) (no new owned_base) * or a freshly allocated owned_base. In either case api_release(&base) - * frees the pointer register; that is fine because we already issued + * frees the pointer local; that is fine because we already issued * the memop. */ api_release(g, &base); } - if (owned_base != REG_NONE) api_free_reg(g, owned_base, RC_INT); - if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT); + if (owned_base != CG_LOCAL_NONE) api_release_temp_local(g, owned_base); + if (owned_index != CG_LOCAL_NONE) api_release_temp_local(g, owned_index); api_push(g, api_make_sv(dst, access_ty)); } @@ -622,18 +619,17 @@ void cfree_cg_addr(CfreeCg* g) { void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { ApiSValue base, rv; - CGTarget* T; + CgTarget* T; CfreeCgTypeId ty; CfreeCgTypeId access_ty; Operand src; - Reg owned_base = REG_NONE; - Reg owned_index = REG_NONE; + CGLocal owned_base = CG_LOCAL_NONE; + CGLocal owned_index = CG_LOCAL_NONE; u8 log2_scale = 0; Operand mem_op; int is_lvalue; int is_bitfield; int has_index; - int scalar_aggregate_store = 0; if (!g) return; T = g->target; if (access.flags & CFREE_CG_MEM_VOLATILE) api_local_const_memory_boundary(g); @@ -669,7 +665,6 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { * the access type itself is aggregate. Scalar stores at an offset into an * aggregate lvalue are field-stores under the EA model and fall through to * the scalar store path. */ - (void)scalar_aggregate_store; if (!has_index && !is_bitfield && ea.offset == 0 && (cg_type_is_aggregate(g->c, ty) || cg_type_is_aggregate(g->c, api_sv_type(&rv)))) { @@ -714,11 +709,11 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { dst_addr = api_lvalue_addr(g, &base, ptr_ty); dst_addr_owned = 1; } else { - dst_addr = api_force_reg(g, &base, api_sv_type(&base)); + dst_addr = api_force_local(g, &base, api_sv_type(&base)); dst_addr_owned = 0; } if (src_ptr_rvalue) { - src_addr = api_force_reg(g, &rv, api_sv_type(&rv)); + src_addr = api_force_local(g, &rv, api_sv_type(&rv)); src_addr_owned = 0; } else { src_addr = api_lvalue_addr(g, &rv, ptr_ty); @@ -728,8 +723,8 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { agg.size = access_size; agg.align = access.align ? access.align : abi_cg_alignof(g->c->abi, ty); T->copy_bytes(T, dst_addr, src_addr, agg); - if (dst_addr_owned) api_free_reg(g, dst_addr.v.reg, RC_INT); - if (src_addr_owned) api_free_reg(g, src_addr.v.reg, RC_INT); + if (dst_addr_owned) api_release_temp_local(g, dst_addr.v.local); + if (src_addr_owned) api_release_temp_local(g, src_addr.v.local); api_release(g, &base); api_release(g, &rv); return; @@ -757,15 +752,15 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { if (ea.offset == 0) { dst_addr = base.op; } else { - dst_addr = fold_ea_into_operand(g, base.op, ea.offset, REG_NONE, 0, - ty, &owned_base); - dst_addr_owned = owned_base != REG_NONE; + dst_addr = fold_ea_into_operand(g, base.op, ea.offset, CG_LOCAL_NONE, 0, + ty, 0, &owned_base); + dst_addr_owned = owned_base != CG_LOCAL_NONE; } } else if (is_lvalue) { dst_addr = api_lvalue_addr(g, &base, ptr_ty); dst_addr_owned = 1; } else { - dst_addr = api_force_reg(g, &base, api_sv_type(&base)); + dst_addr = api_force_local(g, &base, api_sv_type(&base)); } if (rv.op.kind == OPK_LOCAL) { src_addr = rv.op; @@ -778,12 +773,11 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { agg.align = access.align ? access.align : 16; T->copy_bytes(T, dst_addr, src_addr, agg); if (dst_addr_owned) { - api_free_reg(g, + api_release_temp_local(g, dst_addr.kind == OPK_INDIRECT ? dst_addr.v.ind.base - : dst_addr.v.reg, - RC_INT); + : dst_addr.v.local); } - if (src_addr_owned) api_free_reg(g, src_addr.v.reg, RC_INT); + if (src_addr_owned) api_release_temp_local(g, src_addr.v.local); } else if (rv.op.kind == OPK_IMM) { u8 bytes[16]; u64 lo = (u64)rv.op.v.imm; @@ -796,32 +790,32 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { bytes[hi_idx] = (u8)(hi >> (i * 8u)); } if (base.op.kind == OPK_LOCAL) { - api_store_f128_bytes(g, base.op.v.frame_slot, ty, bytes); + api_store_f128_bytes(g, base.op.v.local, ty, bytes); } else { - FrameSlot slot = api_f128_temp_slot(g, ty); - ApiSValue tmp = api_make_lv(api_op_local(slot, ty), ty); + CGLocal local = api_f128_temp_local(g, ty); + ApiSValue tmp = api_make_lv(api_op_local(local, ty), ty); CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); Operand dst_addr; Operand src_addr; int dst_addr_owned = 0; AggregateAccess agg; - api_store_f128_bytes(g, slot, ty, bytes); + api_store_f128_bytes(g, local, ty, bytes); if (is_lvalue) { dst_addr = api_lvalue_addr(g, &base, ptr_ty); dst_addr_owned = 1; } else { - dst_addr = api_force_reg(g, &base, api_sv_type(&base)); + dst_addr = api_force_local(g, &base, api_sv_type(&base)); } src_addr = api_lvalue_addr(g, &tmp, ptr_ty); memset(&agg, 0, sizeof agg); agg.size = 16; agg.align = access.align ? access.align : 16; T->copy_bytes(T, dst_addr, src_addr, agg); - if (dst_addr_owned) api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); + if (dst_addr_owned) api_release_temp_local(g, dst_addr.v.local); + api_release_temp_local(g, src_addr.v.local); } } else { - src = api_force_reg(g, &rv, ty); + src = api_force_local(g, &rv, ty); T->store(T, base.op, src, api_mem_from_access(g, &base.op, access)); } api_release(g, &base); @@ -832,34 +826,38 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { /* General EA-shaped scalar / bit-field store. Resolve the EA into a * single operand the backend can consume. */ - /* Compute the source operand first so its register lifetime doesn't + /* Compute the source operand first so its local lifetime doesn't * overlap any EA-arith we issue. */ - api_ensure_reg(g, &rv); - if (api_sv_op_is_reg_or_imm(&rv)) { + api_ensure_local(g, &rv); + if (api_sv_op_is_local_or_imm(&rv)) { src = rv.op; } else { - src = api_force_reg(g, &rv, api_sv_type(&rv)); + src = api_force_local(g, &rv, api_sv_type(&rv)); } - /* Source-local register-resident lvalue, plain no-EA store: just copy - * into the bound hard register. This must run before the general EA path; - * otherwise api_lvalue_addr would unnecessarily home the local and mark its - * address taken. */ + /* Scalar local-resident lvalue, plain no-EA store: just copy into the local. + * Aggregate locals are storage; field stores into them need the normal memory + * path so offset-zero fields still use the scalar access type. */ if (!has_index && !is_bitfield && ea.offset == 0 && is_lvalue && - base.source_local != CFREE_CG_LOCAL_NONE && base.op.kind == OPK_REG) { + base.source_local != CFREE_CG_LOCAL_NONE && base.op.kind == OPK_LOCAL && + !api_sv_local_storage_is_aggregate(g, &base) && + !cg_type_is_aggregate(g->c, api_sv_type(&base)) && + !cg_type_is_aggregate(g->c, ty) && + api_unalias_type(g->c, api_sv_type(&base)) == api_unalias_type(g->c, ty)) { Operand dst = base.op; - dst.type = ty; if (src.kind == OPK_IMM) { T->load_imm(T, dst, src.v.imm); - api_local_const_store(g, base.source_local, access, src.v.imm); + if (base.source_local != CFREE_CG_LOCAL_NONE) + api_local_const_store(g, base.source_local, access, src.v.imm); } else { - if (src.kind != OPK_REG) src = api_force_reg(g, &rv, ty); - if (src.v.reg != dst.v.reg || src.cls != dst.cls) T->copy(T, dst, src); - api_local_const_clear(api_local_from_handle(g, base.source_local)); + if (src.kind != OPK_LOCAL) src = api_force_local(g, &rv, ty); + if (src.v.local != dst.v.local) T->copy(T, dst, src); + if (base.source_local != CFREE_CG_LOCAL_NONE) + api_local_const_clear(api_local_from_handle(g, base.source_local)); } api_release(g, &base); api_release(g, &rv); - if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT); + if (owned_index != CG_LOCAL_NONE) api_release_temp_local(g, owned_index); return; } @@ -868,24 +866,24 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base)); Operand addr = api_lvalue_addr(g, &base, pty); mem_op = fold_ea_into_operand(g, addr, ea.offset, owned_index, log2_scale, - access_ty, &owned_base); - if (owned_base == REG_NONE) - owned_base = addr.v.reg; - else if (owned_base != addr.v.reg) - api_free_reg(g, addr.v.reg, RC_INT); + access_ty, 1, &owned_base); + if (owned_base == CG_LOCAL_NONE) + owned_base = addr.v.local; + else if (owned_base != addr.v.local) + api_release_temp_local(g, addr.v.local); } else { mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index, - log2_scale, access_ty, &owned_base); + log2_scale, access_ty, 0, &owned_base); } } else if (base.kind == SV_OPERAND && base.op.kind == OPK_GLOBAL) { /* Pointer-rvalue OPK_GLOBAL: fold EA directly. */ mem_op = fold_ea_into_operand(g, base.op, ea.offset, owned_index, - log2_scale, access_ty, &owned_base); + log2_scale, access_ty, 0, &owned_base); } else { CfreeCgTypeId pty = api_sv_type(&base); - Operand ptr_op = api_force_reg(g, &base, pty); + Operand ptr_op = api_force_local(g, &base, pty); mem_op = fold_ea_into_operand(g, ptr_op, ea.offset, owned_index, log2_scale, - access_ty, &owned_base); + access_ty, 1, &owned_base); } /* Source-local tracking. Only the plain no-EA scalar-to-scalar store can @@ -914,8 +912,8 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { api_release(g, &base); api_release(g, &rv); - if (owned_base != REG_NONE) api_free_reg(g, owned_base, RC_INT); - if (owned_index != REG_NONE) api_free_reg(g, owned_index, RC_INT); + if (owned_base != CG_LOCAL_NONE) api_release_temp_local(g, owned_base); + if (owned_index != CG_LOCAL_NONE) api_release_temp_local(g, owned_index); } /* ============================================================ @@ -926,37 +924,23 @@ void cfree_cg_dup(CfreeCg* g) { ApiSValue v, dup; ApiSValue* top; CfreeCgTypeId ty; - Reg r; + CGLocal r; Operand dst; if (!g || g->sp == 0) return; top = &g->stack[g->sp - 1]; - api_ensure_reg(g, top); + api_ensure_local(g, top); v = *top; - if (v.res != RES_REG) { - if (v.res == RES_FIXED_REG && !api_is_lvalue_sv(&v) && - v.op.kind == OPK_REG) { - ty = api_owned_reg_type(g, &v); - r = api_alloc_reg(g, api_class_of_sv(&v)); - if (r == (Reg)REG_NONE) { - FrameSlot slot = api_take_spill_slot(g, api_class_of_sv(&v)); - Operand src = api_op_reg((Reg)api_reg_of_sv(&v), ty); - g->target->spill_reg(g->target, src, slot, api_mem_for_spill(g, &v)); - g->stack[g->sp - 1].spill_slot = slot; - g->stack[g->sp - 1].res = RES_SPILLED; - api_set_owned_reg(&g->stack[g->sp - 1], (Reg)REG_NONE); - dup = v; - dup.pinned = 0; - dup.spill_slot = FRAME_SLOT_NONE; - api_push(g, dup); - return; - } - dst = api_op_reg(r, ty); - g->target->copy(g->target, dst, api_op_reg((Reg)api_reg_of_sv(&v), ty)); + if (v.res != RES_LOCAL) { + if (v.res == RES_FIXED_LOCAL && !api_is_lvalue_sv(&v) && + v.op.kind == OPK_LOCAL) { + ty = api_owned_local_type(g, &v); + r = api_alloc_temp_local(g, ty); + dst = api_op_local(r, ty); + g->target->copy(g->target, dst, api_op_local((CGLocal)api_local_of_sv(&v), ty)); dup = v; - api_set_owned_reg(&dup, r); - dup.res = RES_REG; + api_set_owned_local(&dup, r); + dup.res = RES_LOCAL; dup.pinned = 0; - dup.spill_slot = FRAME_SLOT_NONE; dup.source_local = CFREE_CG_LOCAL_NONE; g->stack[g->sp - 1] = dup; api_push(g, v); @@ -966,42 +950,27 @@ void cfree_cg_dup(CfreeCg* g) { return; } top->pinned = 1; - ty = api_owned_reg_type(g, &v); - r = api_alloc_reg(g, api_class_of_sv(&v)); - if (r == (Reg)REG_NONE) { - FrameSlot slot = api_take_spill_slot(g, api_class_of_sv(&v)); - Operand src = api_op_reg((Reg)api_reg_of_sv(&v), ty); - g->target->spill_reg(g->target, src, slot, api_mem_for_spill(g, &v)); - top->pinned = 0; - top->spill_slot = slot; - top->res = RES_SPILLED; - api_set_owned_reg(top, (Reg)REG_NONE); - dup = v; - dup.pinned = 0; - dup.spill_slot = FRAME_SLOT_NONE; - api_push(g, dup); - return; - } - dst = api_op_reg(r, ty); - g->target->copy(g->target, dst, api_op_reg((Reg)api_reg_of_sv(&v), ty)); + ty = api_owned_local_type(g, &v); + r = api_alloc_temp_local(g, ty); + dst = api_op_local(r, ty); + g->target->copy(g->target, dst, api_op_local((CGLocal)api_local_of_sv(&v), ty)); g->stack[g->sp - 1].pinned = 0; dup = v; - api_set_owned_reg(&dup, r); - dup.res = RES_REG; + api_set_owned_local(&dup, r); + dup.res = RES_LOCAL; dup.pinned = 0; - dup.spill_slot = FRAME_SLOT_NONE; api_push(g, dup); } -/* Duplicate the top two stack slots. The lower of the two is the deeper +/* Duplicate the top two value-stack entries. The lower of the two is the deeper * element; the higher is TOS. After dup2, the stack contains [a, b, a, b] * where TOS was [..., a, b]. Used to support compound assignment through a * scaled-index lvalue: the frontend duplicates [base, index] so it can * read-modify-write with a single EA expression each side. * - * The current implementation duplicates the two slots one at a time using - * cfree_cg_dup with a rot3 between them so register/operand sharing stays - * correct under the per-slot machinery. */ + * The current implementation duplicates the two entries one at a time using + * cfree_cg_dup with a rot3 between them so local/operand sharing stays + * correct under the per-entry machinery. */ void cfree_cg_dup2(CfreeCg* g) { if (!g || g->sp < 2) return; /* Stack: [..., a, b] diff --git a/src/cg/session.c b/src/cg/session.c @@ -12,7 +12,6 @@ static void cg_free_obj_state(CfreeCg* g) { Heap* h; - u32 i; if (!g) return; h = g->c->ctx->heap; if (g->stack) { @@ -31,13 +30,6 @@ static void cg_free_obj_state(CfreeCg* g) { h->free(h, g->sym_attrs, sizeof(*g->sym_attrs) * g->sym_cap); g->sym_attrs = NULL; } - for (i = 0; i < 3; ++i) { - if (g->slot_pools[i].free) { - h->free(h, g->slot_pools[i].free, - sizeof(FrameSlot) * g->slot_pools[i].cap); - g->slot_pools[i].free = NULL; - } - } if (g->data_tls_collect) { buf_fini(&g->data_tls_bytes); g->data_tls_collect = 0; @@ -52,11 +44,8 @@ static void cg_free_obj_state(CfreeCg* g) { g->nlocals = 0; g->locals_cap = 0; g->sym_cap = 0; - memset(g->slot_pools, 0, sizeof(g->slot_pools)); - g->avs_in_flight = NULL; - g->avs_in_flight_n = 0; g->fn_ret_type = 0; - g->fn_abi = NULL; + g->fn_result_types[0] = 0; memset(&g->fn_desc, 0, sizeof(g->fn_desc)); memset(g->fn_params, 0, sizeof(g->fn_params)); memset(g->scopes, 0, sizeof(g->scopes)); @@ -96,11 +85,11 @@ CfreeStatus cfree_cg_new(CfreeCompiler* c, CfreeCg** cg_out) { CfreeStatus cfree_cg_begin_obj(CfreeCg* g, CfreeObjBuilder* out, const CfreeCodeOptions* opts) { CfreeCompiler* c; - CGTarget* target; + CgTarget* target; const CGBackend* backend; int opt_level = opts ? opts->opt_level : 0; if (!g || !g->c || !out) return CFREE_INVALID; - if (g->obj || g->target || g->mc || g->debug) return CFREE_INVALID; + if (g->obj || g->target || g->debug) return CFREE_INVALID; c = (CfreeCompiler*)g->c; if (opt_level < 0 || opt_level > 2) { compiler_panic((Compiler*)c, api_no_loc(), @@ -125,8 +114,7 @@ CfreeStatus cfree_cg_begin_obj(CfreeCg* g, CfreeObjBuilder* out, #endif g->obj = (ObjBuilder*)out; g->target = target; - g->mc = target->mc; - g->debug = target->debug; + g->debug = NULL; g->opt_level = opt_level; g->check_only = (opts && opts->check_only) ? 1u : 0u; g->function_sections = (opts && opts->function_sections) ? 1u : 0u; @@ -138,18 +126,13 @@ CfreeStatus cfree_cg_end_obj(CfreeCg* g) { if (!g) return CFREE_INVALID; if (!g->obj) return CFREE_INVALID; cgtarget_finalize(g->target); - /* Flush buffered CFI into .eh_frame before debug_emit. Needed whether - * or not -g is on. */ - if (g->mc) mc_emit_eh_frame(g->mc); if (g->debug) { debug_emit(g->debug); debug_free(g->debug); } cgtarget_free(g->target); - mc_free(g->mc); g->obj = NULL; g->target = NULL; - g->mc = NULL; g->debug = NULL; cg_free_obj_state(g); return CFREE_OK; @@ -252,11 +235,10 @@ void cfree_cg_func_begin_attrs(CfreeCg* g, CfreeCgSym cg_sym, CfreeCgFuncAttrs begin_attrs) { Compiler* c; ObjBuilder* ob; - CGTarget* T; + CgTarget* T; ObjSymId sym; ObjSecId text_sec; CfreeCgTypeId fty; - const ABIFuncInfo* abi; CfreeCgDecl attrs; Sym sec_name; if (!g) return; @@ -267,7 +249,6 @@ void cfree_cg_func_begin_attrs(CfreeCg* g, CfreeCgSym cg_sym, fty = api_sym_type(g, cg_sym); if (!fty) return; attrs = api_sym_attrs(g, cg_sym); - abi = abi_cg_func_info(c->abi, fty); sec_name = begin_attrs.section ? (Sym)begin_attrs.section : (Sym)attrs.as.func.section; @@ -291,7 +272,7 @@ void cfree_cg_func_begin_attrs(CfreeCg* g, CfreeCgSym cg_sym, g->fn_desc.text_section_id = text_sec; g->fn_desc.group_id = OBJ_GROUP_NONE; g->fn_desc.fn_type = fty; - g->fn_desc.abi = abi; + g->fn_desc.result_types = g->fn_result_types; g->fn_desc.loc = g->cur_loc; g->fn_desc.atomize = atomize ? 1u : 0u; if (attrs.as.func.flags & CFREE_CG_FUNC_NORETURN) { @@ -304,19 +285,22 @@ void cfree_cg_func_begin_attrs(CfreeCg* g, CfreeCgSym cg_sym, g->fn_desc.flags |= CGFD_NORETURN; g->fn_ret_type = cg_type_func_ret_id(c, fty); - g->fn_abi = abi; + if (cg_type_is_void(c, g->fn_ret_type)) { + g->fn_desc.nresults = 0; + g->fn_result_types[0] = CFREE_CG_TYPE_NONE; + } else { + g->fn_desc.nresults = 1; + g->fn_result_types[0] = g->fn_ret_type; + } g->nlocals = 0; g->sp = 0; - for (u32 i = 0; i < 3; ++i) g->slot_pools[i].n = 0; - g->avs_in_flight = NULL; - g->avs_in_flight_n = 0; if (g->debug) { DebugTypeId dt = api_debug_type(g, fty); if (dt != DEBUG_TYPE_NONE) debug_func_begin(g->debug, sym, dt, g->cur_loc); } T->func_begin(T, &g->fn_desc); - api_regalloc_begin(g); + api_temp_locals_begin(g); } void cfree_cg_func_begin(CfreeCg* g, CfreeCgSym cg_sym) { @@ -327,11 +311,11 @@ void cfree_cg_func_begin(CfreeCg* g, CfreeCgSym cg_sym) { void cfree_cg_func_end(CfreeCg* g) { if (!g) return; - api_regalloc_finish(g); + api_temp_locals_finish(g); g->target->func_end(g->target); if (g->debug) debug_func_end(g->debug); - g->fn_abi = NULL; g->fn_ret_type = CFREE_CG_TYPE_NONE; + g->fn_result_types[0] = CFREE_CG_TYPE_NONE; g->nscopes = 0; memset(g->scopes, 0, sizeof g->scopes); } diff --git a/src/cg/type.c b/src/cg/type.c @@ -965,5 +965,5 @@ void cg_api_fini(Compiler* c) { /* ============================================================ * CfreeCg: public codegen API implementation * - * Drives CGTarget directly with its own value stack. + * Drives CgTarget directly with its own value stack. * ============================================================ */ diff --git a/src/cg/value.c b/src/cg/value.c @@ -1,12 +1,10 @@ #include "cg/internal.h" -u8 api_type_class(CfreeCgTypeId ty) { - if (ty == builtin_id(CFREE_CG_BUILTIN_F32) || - ty == builtin_id(CFREE_CG_BUILTIN_F64) || - ty == builtin_id(CFREE_CG_BUILTIN_F128)) { - return RC_FP; - } - return RC_INT; +int api_type_is_float(Compiler* c, CfreeCgTypeId ty) { + const CgType* cg; + ty = api_unalias_type(c, ty); + cg = cg_type_get(c, ty); + return cg && cg->kind == CFREE_CG_TYPE_FLOAT; } int api_is_f128_type(Compiler* c, CfreeCgTypeId ty) { @@ -27,43 +25,21 @@ int api_is_wide16_scalar_type(Compiler* c, CfreeCgTypeId ty) { return api_is_f128_type(c, ty) || api_is_i128_type(c, ty); } -/* Whether a CGABIValue.storage for `ty` must be an address operand (pointing - * to a memory image of the value) rather than a value operand. Today this is - * driven by the type shape — aggregates and wide16 scalars cannot fit in a - * single Operand. A future refactor will key this off ABIArgInfo so a - * trivial-DIRECT ABI (e.g. for a C-source backend) can keep aggregates as - * value operands. See doc/CBACKEND.md. */ -int api_arg_storage_must_be_addr(Compiler* c, CfreeCgTypeId ty) { - return cg_type_is_aggregate(c, ty) || api_is_wide16_scalar_type(c, ty); -} - Operand api_op_imm(i64 v, CfreeCgTypeId ty) { Operand o; memset(&o, 0, sizeof o); o.kind = OPK_IMM; - o.cls = api_type_class(ty); o.type = ty; o.v.imm = v; return o; } -Operand api_op_reg(Reg r, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_REG; - o.cls = api_type_class(ty); - o.type = ty; - o.v.reg = r; - return o; -} - -Operand api_op_local(FrameSlot s, CfreeCgTypeId ty) { +Operand api_op_local(CGLocal local, CfreeCgTypeId ty) { Operand o; memset(&o, 0, sizeof o); o.kind = OPK_LOCAL; - o.cls = RC_INT; o.type = ty; - o.v.frame_slot = s; + o.v.local = local; return o; } @@ -71,32 +47,29 @@ Operand api_op_global(ObjSymId sym, i64 addend, CfreeCgTypeId ty) { Operand o; memset(&o, 0, sizeof o); o.kind = OPK_GLOBAL; - o.cls = RC_INT; o.type = ty; o.v.global.sym = sym; o.v.global.addend = addend; return o; } -Operand api_op_indirect(Reg base, i32 ofs, CfreeCgTypeId ty) { +Operand api_op_indirect(CGLocal base, i32 ofs, CfreeCgTypeId ty) { Operand o; memset(&o, 0, sizeof o); o.kind = OPK_INDIRECT; - o.cls = RC_INT; o.type = ty; o.v.ind.base = base; - o.v.ind.index = REG_NONE; + o.v.ind.index = CG_LOCAL_NONE; o.v.ind.log2_scale = 0; o.v.ind.ofs = ofs; return o; } -Operand api_op_indirect_indexed(Reg base, Reg index, u8 log2_scale, i32 ofs, +Operand api_op_indirect_indexed(CGLocal base, CGLocal index, u8 log2_scale, i32 ofs, CfreeCgTypeId ty) { Operand o; memset(&o, 0, sizeof o); o.kind = OPK_INDIRECT; - o.cls = RC_INT; o.type = ty; o.v.ind.base = base; o.v.ind.index = index; @@ -106,7 +79,7 @@ Operand api_op_indirect_indexed(Reg base, Reg index, u8 log2_scale, i32 ofs, } u8 api_residency_for(const Operand* o) { - if (o->kind == OPK_REG || o->kind == OPK_INDIRECT) return RES_REG; + if (o->kind == OPK_LOCAL || o->kind == OPK_INDIRECT) return RES_LOCAL; return RES_INHERENT; } @@ -117,7 +90,6 @@ ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty) { sv.op = op; sv.type = ty; sv.res = api_residency_for(&op); - sv.spill_slot = FRAME_SLOT_NONE; sv.source_local = CFREE_CG_LOCAL_NONE; return sv; } @@ -140,7 +112,6 @@ ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, CfreeCgTypeId result_ty, sv.delayed.cmp.a_owned = a_owned ? 1u : 0u; sv.delayed.cmp.b_owned = b_owned ? 1u : 0u; sv.res = RES_INHERENT; - sv.spill_slot = FRAME_SLOT_NONE; sv.source_local = CFREE_CG_LOCAL_NONE; return sv; } @@ -156,7 +127,6 @@ ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, sv.delayed.arith.a = a; sv.delayed.arith.a_owned = a_owned ? 1u : 0u; sv.res = RES_INHERENT; - sv.spill_slot = FRAME_SLOT_NONE; sv.source_local = CFREE_CG_LOCAL_NONE; return sv; } @@ -174,15 +144,14 @@ ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, sv.delayed.arith.a_owned = a_owned ? 1u : 0u; sv.delayed.arith.b_owned = b_owned ? 1u : 0u; sv.res = RES_INHERENT; - sv.spill_slot = FRAME_SLOT_NONE; sv.source_local = CFREE_CG_LOCAL_NONE; return sv; } -ApiSValue api_make_sv_with_reg_ownership(Operand op, CfreeCgTypeId ty, +ApiSValue api_make_sv_with_local_ownership(Operand op, CfreeCgTypeId ty, int owned) { ApiSValue sv = api_make_sv(op, ty); - if (op.kind == OPK_REG && !owned) sv.res = RES_FIXED_REG; + if (op.kind == OPK_LOCAL && !owned) sv.res = RES_FIXED_LOCAL; return sv; } @@ -199,15 +168,15 @@ int api_sv_op_is(const ApiSValue* sv, OpKind kind) { return sv->kind == SV_OPERAND && sv->op.kind == kind; } -int api_sv_op_is_reg_or_imm(const ApiSValue* sv) { +int api_sv_op_is_local_or_imm(const ApiSValue* sv) { return sv->kind == SV_OPERAND && - (sv->op.kind == OPK_IMM || sv->op.kind == OPK_REG); + (sv->op.kind == OPK_IMM || sv->op.kind == OPK_LOCAL); } int api_is_lvalue_sv(const ApiSValue* sv) { return sv->lvalue && (sv->bitfield_lvalue || api_operand_can_address(&sv->op) || - (sv->source_local != CFREE_CG_LOCAL_NONE && sv->op.kind == OPK_REG)); + (sv->source_local != CFREE_CG_LOCAL_NONE && sv->op.kind == OPK_LOCAL)); } void api_stack_grow(CfreeCg* g, u32 want) { @@ -237,30 +206,23 @@ ApiSValue api_pop(CfreeCg* g) { return g->stack[--g->sp]; } -/* ---- register class helpers ---- */ - -u8 api_class_of_sv(const ApiSValue* sv) { - if (sv->kind == SV_CMP || sv->kind == SV_ARITH) return RC_INT; - if (sv->op.kind == OPK_INDIRECT) return RC_INT; - if (sv->op.kind == OPK_IMM || sv->op.kind == OPK_REG) return sv->op.cls; - return api_type_class(api_sv_type(sv)); -} +/* ---- local helpers ---- */ -Reg api_reg_of_sv(const ApiSValue* sv) { - if (sv->kind == SV_ARITH || sv->kind == SV_CMP) return (Reg)REG_NONE; - if (sv->op.kind == OPK_REG) return sv->op.v.reg; +CGLocal api_local_of_sv(const ApiSValue* sv) { + if (sv->kind == SV_ARITH || sv->kind == SV_CMP) return (CGLocal)CG_LOCAL_NONE; + if (sv->op.kind == OPK_LOCAL) return sv->op.v.local; if (sv->op.kind == OPK_INDIRECT) return sv->op.v.ind.base; - return (Reg)REG_NONE; + return (CGLocal)CG_LOCAL_NONE; } -void api_set_owned_reg(ApiSValue* sv, Reg r) { - if (sv->op.kind == OPK_REG) - sv->op.v.reg = r; +void api_set_owned_local(ApiSValue* sv, CGLocal r) { + if (sv->op.kind == OPK_LOCAL) + sv->op.v.local = r; else if (sv->op.kind == OPK_INDIRECT) sv->op.v.ind.base = r; } -CfreeCgTypeId api_owned_reg_type(CfreeCg* g, const ApiSValue* sv) { +CfreeCgTypeId api_owned_local_type(CfreeCg* g, const ApiSValue* sv) { if (sv->op.kind == OPK_INDIRECT) { CfreeCgTypeId base = sv->type ? sv->type : builtin_id(CFREE_CG_BUILTIN_VOID); @@ -269,132 +231,36 @@ CfreeCgTypeId api_owned_reg_type(CfreeCg* g, const ApiSValue* sv) { return api_sv_type(sv); } -/* ---- spill slot management ---- */ - -void api_take_spill_slot_alloc(CfreeCg* g, u8 cls, FrameSlot* out) { - CGTarget* T = g->target; - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.kind = FS_SPILL; - fsd.size = (cls == RC_FP) ? 16 : 8; - fsd.align = fsd.size; - *out = T->frame_slot(T, &fsd); -} - -FrameSlot api_take_spill_slot(CfreeCg* g, u8 cls) { - if (cls < 3 && g->slot_pools[cls].n > 0) { - return g->slot_pools[cls].free[--g->slot_pools[cls].n]; - } - FrameSlot s; - api_take_spill_slot_alloc(g, cls, &s); - return s; -} - -void api_return_spill_slot(CfreeCg* g, FrameSlot s, u8 cls) { - Heap* h; - if (s == FRAME_SLOT_NONE) return; - if (cls >= 3) return; - h = g->c->ctx->heap; - if (g->slot_pools[cls].n >= g->slot_pools[cls].cap) { - u32 new_cap = g->slot_pools[cls].cap ? g->slot_pools[cls].cap * 2 : 8; - FrameSlot* nb = (FrameSlot*)h->alloc(h, sizeof(FrameSlot) * new_cap, - _Alignof(FrameSlot)); - if (g->slot_pools[cls].free) { - memcpy(nb, g->slot_pools[cls].free, - sizeof(FrameSlot) * g->slot_pools[cls].n); - h->free(h, g->slot_pools[cls].free, - sizeof(FrameSlot) * g->slot_pools[cls].cap); - } - g->slot_pools[cls].free = nb; - g->slot_pools[cls].cap = new_cap; - } - g->slot_pools[cls].free[g->slot_pools[cls].n++] = s; -} +/* ---- temporary local allocation ---- */ -/* ---- register allocation / spill ---- */ - -ApiSValue* api_pick_victim(CfreeCg* g, u8 cls) { - for (u32 i = 0; i < g->sp; ++i) { - ApiSValue* sv = &g->stack[i]; - if (sv->res != RES_REG || sv->pinned) continue; - if (api_class_of_sv(sv) != cls) continue; - return sv; - } - return NULL; +void api_temp_locals_begin(CfreeCg* g) { + (void)g; } -MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv); -u8 api_type_class(CfreeCgTypeId ty); - -void api_regalloc_begin(CfreeCg* g) { - CGTarget* T = g->target; - if (T->virtual_regs) { - cg_simple_regalloc_init_virtual(&g->regalloc); - return; - } - cg_simple_regalloc_init(&g->regalloc); - for (u32 c = 0; c < 3u; ++c) { - const Reg* regs = NULL; - u32 nregs = 0; - if (T->get_allocable_regs) - T->get_allocable_regs(T, (RegClass)c, &regs, &nregs); - if (regs && nregs) - cg_simple_regalloc_set_ordered(&g->regalloc, (RegClass)c, regs, nregs); - } +void api_temp_locals_finish(CfreeCg* g) { + (void)g; } -void api_regalloc_finish(CfreeCg* g) { - if (cg_simple_regalloc_is_virtual(&g->regalloc)) return; - if (!g->target->reserve_hard_regs) return; - for (u32 c = 0; c < 3u; ++c) { - Reg used[CG_SIMPLE_REGALLOC_MAX_REGS]; - u32 nused = cg_simple_regalloc_used_regs(&g->regalloc, (RegClass)c, used, - CG_SIMPLE_REGALLOC_MAX_REGS); - if (nused) - g->target->reserve_hard_regs(g->target, (RegClass)c, used, nused); +CGLocal api_alloc_temp_local(CfreeCg* g, CfreeCgTypeId ty) { + CGLocalDesc d; + CGLocal local; + memset(&d, 0, sizeof d); + d.type = ty; + if (ty) { + d.size = abi_cg_sizeof(g->c->abi, ty); + d.align = abi_cg_alignof(g->c->abi, ty); } -} - -Reg api_alloc_reg(CfreeCg* g, u8 cls) { - Reg r = cg_simple_regalloc_alloc(&g->regalloc, (RegClass)cls); - if (r == (Reg)REG_NONE && cg_simple_regalloc_is_virtual(&g->regalloc)) { - compiler_panic(g->c, g->cur_loc, "CfreeCg: virtual regalloc exhausted"); + local = g->target->local(g->target, &d); + if (local == CG_LOCAL_NONE) { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: target failed to allocate temporary local"); } - return r; + return local; } -void api_free_reg(CfreeCg* g, Reg r, u8 cls) { - int rc; - if (r == (Reg)REG_NONE) return; - rc = cg_simple_regalloc_free(&g->regalloc, (RegClass)cls, r); - if (rc == 1) return; - if (rc == -1) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: regalloc - reg %u already free in class %u", - (unsigned)r, (unsigned)cls); - } - compiler_panic(g->c, g->cur_loc, - "CfreeCg: regalloc - reg %u not in class %u pool", (unsigned)r, - (unsigned)cls); -} - -int api_spill_avs_victim(CfreeCg* g, u8 cls) { - CGTarget* T = g->target; - if (!g->avs_in_flight) return 0; - for (u32 i = 0; i < g->avs_in_flight_n; ++i) { - CGABIValue* av = &g->avs_in_flight[i]; - if (av->storage.kind != OPK_REG) continue; - if (av->storage.cls != cls) continue; - FrameSlot slot = api_take_spill_slot(g, cls); - ApiSValue tmp = api_make_sv(av->storage, av->type); - T->spill_reg(T, av->storage, slot, api_mem_for_spill(g, &tmp)); - api_free_reg(g, av->storage.v.reg, cls); - Operand local = api_op_local(slot, av->type); - local.cls = cls; - av->storage = local; - return 1; - } - return 0; +void api_release_temp_local(CfreeCg* g, CGLocal r) { + (void)g; + (void)r; } MemAccess api_mem_for_lvalue(CfreeCg* g, const Operand* lv, CfreeCgTypeId ty) { @@ -406,7 +272,7 @@ MemAccess api_mem_for_lvalue(CfreeCg* g, const Operand* lv, CfreeCgTypeId ty) { m.flags = MF_NONE; if (lv->kind == OPK_LOCAL) { m.alias.kind = (u8)ALIAS_LOCAL; - m.alias.v.local_id = (i32)lv->v.frame_slot; + m.alias.v.local_id = (i32)lv->v.local; } else if (lv->kind == OPK_GLOBAL) { m.alias.kind = (u8)ALIAS_GLOBAL; } else { @@ -493,7 +359,7 @@ void api_validate_memory_value(CfreeCg* g, const char* who, access_size = api_mem_type_size(g, access_ty, who); value_size = api_mem_type_size(g, value_ty, who); if (access_size != value_size || - api_type_class(access_ty) != api_type_class(value_ty)) { + api_type_is_float(g->c, access_ty) != api_type_is_float(g->c, value_ty)) { compiler_panic(g->c, g->cur_loc, "CfreeCg: %.*s value type/size mismatch: access size %u, " "value size %u", @@ -502,34 +368,23 @@ void api_validate_memory_value(CfreeCg* g, const char* who, } } -MemAccess api_mem_for_spill(CfreeCg* g, const ApiSValue* sv) { - CfreeCgTypeId ty = api_owned_reg_type(g, sv); - MemAccess m; - memset(&m, 0, sizeof m); - m.type = ty; - m.size = ty ? abi_cg_sizeof(g->c->abi, ty) : 8; - m.align = ty ? abi_cg_alignof(g->c->abi, ty) : 8; - m.alias.kind = (u8)ALIAS_UNKNOWN; - return m; -} - -void api_release_operand_reg(CfreeCg* g, Operand op) { - if (op.kind == OPK_REG) api_free_reg(g, op.v.reg, op.cls); +void api_release_operand_local(CfreeCg* g, Operand op) { + if (op.kind == OPK_LOCAL) + api_release_temp_local(g, op.v.local); } -int api_sv_owns_operand_reg(const ApiSValue* sv, const Operand* op) { - return sv->res == RES_REG && op->kind == OPK_REG && sv->op.kind == OPK_REG && - sv->op.v.reg == op->v.reg && sv->op.cls == op->cls; +int api_sv_owns_operand_local(const ApiSValue* sv, const Operand* op) { + return sv->res == RES_LOCAL && op->kind == OPK_LOCAL && sv->op.kind == OPK_LOCAL && + sv->op.v.local == op->v.local; } void api_release_cmp(CfreeCg* g, ApiSValue* sv) { - if (sv->delayed.cmp.a_owned) api_release_operand_reg(g, sv->delayed.cmp.a); + if (sv->delayed.cmp.a_owned) api_release_operand_local(g, sv->delayed.cmp.a); if (sv->delayed.cmp.b_owned && - (sv->delayed.cmp.b.kind != OPK_REG || sv->delayed.cmp.a.kind != OPK_REG || - sv->delayed.cmp.b.v.reg != sv->delayed.cmp.a.v.reg || - sv->delayed.cmp.b.cls != sv->delayed.cmp.a.cls || + (sv->delayed.cmp.b.kind != OPK_LOCAL || sv->delayed.cmp.a.kind != OPK_LOCAL || + sv->delayed.cmp.b.v.local != sv->delayed.cmp.a.v.local || !sv->delayed.cmp.a_owned)) { - api_release_operand_reg(g, sv->delayed.cmp.b); + api_release_operand_local(g, sv->delayed.cmp.b); } memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); @@ -540,14 +395,13 @@ void api_release_cmp(CfreeCg* g, ApiSValue* sv) { void api_release_arith(CfreeCg* g, ApiSValue* sv) { if (sv->delayed.arith.a_owned) - api_release_operand_reg(g, sv->delayed.arith.a); + api_release_operand_local(g, sv->delayed.arith.a); if (sv->delayed.arith.b_owned && - (sv->delayed.arith.b.kind != OPK_REG || - sv->delayed.arith.a.kind != OPK_REG || - sv->delayed.arith.b.v.reg != sv->delayed.arith.a.v.reg || - sv->delayed.arith.b.cls != sv->delayed.arith.a.cls || + (sv->delayed.arith.b.kind != OPK_LOCAL || + sv->delayed.arith.a.kind != OPK_LOCAL || + sv->delayed.arith.b.v.local != sv->delayed.arith.a.v.local || !sv->delayed.arith.a_owned)) { - api_release_operand_reg(g, sv->delayed.arith.b); + api_release_operand_local(g, sv->delayed.arith.b); } memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); @@ -559,15 +413,13 @@ void api_release_arith(CfreeCg* g, ApiSValue* sv) { void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst) { g->target->cmp(g->target, sv->delayed.cmp.op, dst, sv->delayed.cmp.a, sv->delayed.cmp.b); - if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && - (sv->delayed.cmp.a.v.reg != dst.v.reg || - sv->delayed.cmp.a.cls != dst.cls)) { - api_release_operand_reg(g, sv->delayed.cmp.a); + if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_LOCAL && + sv->delayed.cmp.a.v.local != dst.v.local) { + api_release_operand_local(g, sv->delayed.cmp.a); } - if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && - (sv->delayed.cmp.b.v.reg != dst.v.reg || - sv->delayed.cmp.b.cls != dst.cls)) { - api_release_operand_reg(g, sv->delayed.cmp.b); + if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_LOCAL && + sv->delayed.cmp.b.v.local != dst.v.local) { + api_release_operand_local(g, sv->delayed.cmp.b); } memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); @@ -576,7 +428,7 @@ void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst) { sv->kind = SV_OPERAND; sv->op = dst; sv->type = dst.type; - sv->res = RES_REG; + sv->res = RES_LOCAL; sv->lvalue = 0; } @@ -588,15 +440,13 @@ void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst) { g->target->binop(g->target, sv->delayed.arith.bin_op, dst, sv->delayed.arith.a, sv->delayed.arith.b); } - if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && - (sv->delayed.arith.a.v.reg != dst.v.reg || - sv->delayed.arith.a.cls != dst.cls)) { - api_release_operand_reg(g, sv->delayed.arith.a); + if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_LOCAL && + sv->delayed.arith.a.v.local != dst.v.local) { + api_release_operand_local(g, sv->delayed.arith.a); } - if (sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_REG && - (sv->delayed.arith.b.v.reg != dst.v.reg || - sv->delayed.arith.b.cls != dst.cls)) { - api_release_operand_reg(g, sv->delayed.arith.b); + if (sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_LOCAL && + sv->delayed.arith.b.v.local != dst.v.local) { + api_release_operand_local(g, sv->delayed.arith.b); } memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); @@ -605,7 +455,7 @@ void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst) { sv->kind = SV_OPERAND; sv->op = dst; sv->type = dst.type; - sv->res = RES_REG; + sv->res = RES_LOCAL; sv->lvalue = 0; } @@ -623,105 +473,21 @@ int api_arith_rhs_reusable(const ApiSValue* sv) { } } -int api_materialize_cmp_victim(CfreeCg* g, u8 cls) { - if (cls != RC_INT) return 0; - for (u32 i = 0; i < g->sp; ++i) { - ApiSValue* sv = &g->stack[i]; - Operand dst; - if (sv->kind != SV_CMP || sv->pinned) continue; - if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && - sv->delayed.cmp.a.cls == RC_INT) { - dst = api_op_reg(sv->delayed.cmp.a.v.reg, api_sv_type(sv)); - } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && - sv->delayed.cmp.b.cls == RC_INT) { - dst = api_op_reg(sv->delayed.cmp.b.v.reg, api_sv_type(sv)); - } else { - continue; - } - api_materialize_cmp_to(g, sv, dst); - return 1; - } - return 0; -} - -int api_materialize_arith_victim(CfreeCg* g, u8 cls) { - if (cls != RC_INT) return 0; - for (u32 i = 0; i < g->sp; ++i) { - ApiSValue* sv = &g->stack[i]; - Operand dst; - if (sv->kind != SV_ARITH || sv->pinned) continue; - if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && - sv->delayed.arith.a.cls == RC_INT) { - dst = api_op_reg(sv->delayed.arith.a.v.reg, api_sv_type(sv)); - } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned && - sv->delayed.arith.b.kind == OPK_REG && - sv->delayed.arith.b.cls == RC_INT) { - dst = api_op_reg(sv->delayed.arith.b.v.reg, api_sv_type(sv)); - } else { - continue; - } - api_materialize_arith_to(g, sv, dst); - return 1; - } - return 0; -} - -Reg api_alloc_reg_or_spill(CfreeCg* g, u8 cls, CfreeCgTypeId ty) { - CGTarget* T = g->target; - Reg r; - (void)ty; - r = api_alloc_reg(g, cls); - if (r != (Reg)REG_NONE) return r; - - ApiSValue* victim = api_pick_victim(g, cls); - if (!victim && api_materialize_cmp_victim(g, cls)) { - r = api_alloc_reg(g, cls); - if (r != (Reg)REG_NONE) return r; - victim = api_pick_victim(g, cls); - } - if (!victim && api_materialize_arith_victim(g, cls)) { - r = api_alloc_reg(g, cls); - if (r != (Reg)REG_NONE) return r; - victim = api_pick_victim(g, cls); - } - if (victim) { - FrameSlot slot = api_take_spill_slot(g, cls); - CfreeCgTypeId rty = api_owned_reg_type(g, victim); - Operand victim_reg = api_op_reg((Reg)api_reg_of_sv(victim), rty); - T->spill_reg(T, victim_reg, slot, api_mem_for_spill(g, victim)); - api_free_reg(g, victim_reg.v.reg, cls); - victim->spill_slot = slot; - victim->res = RES_SPILLED; - api_set_owned_reg(victim, (Reg)REG_NONE); - } else if (!api_spill_avs_victim(g, cls)) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: regalloc - no spillable victim (class %u)", - (unsigned)cls); - } - - r = api_alloc_reg(g, cls); - if (r == (Reg)REG_NONE) { - compiler_panic(g->c, g->cur_loc, - "CfreeCg: regalloc - class %u still empty after spill", - (unsigned)cls); - } - return r; -} - -void api_ensure_reg(CfreeCg* g, ApiSValue* sv) { +void api_ensure_local(CfreeCg* g, ApiSValue* sv) { if (sv->kind == SV_CMP) { CfreeCgTypeId ty = api_sv_type(sv); Operand dst; - if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_REG && - sv->delayed.cmp.a.cls == RC_INT) { - dst = api_op_reg(sv->delayed.cmp.a.v.reg, ty); - } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_REG && - sv->delayed.cmp.b.cls == RC_INT) { - dst = api_op_reg(sv->delayed.cmp.b.v.reg, ty); + if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_LOCAL && + api_unalias_type(g->c, sv->delayed.cmp.a.type) == + api_unalias_type(g->c, ty)) { + dst = api_op_local(sv->delayed.cmp.a.v.local, ty); + } else if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_LOCAL && + api_unalias_type(g->c, sv->delayed.cmp.b.type) == + api_unalias_type(g->c, ty)) { + dst = api_op_local(sv->delayed.cmp.b.v.local, ty); } else { - Reg r = api_alloc_reg_or_spill( - g, RC_INT, ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); - dst = api_op_reg(r, ty); + CGLocal r = api_alloc_temp_local(g, ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); + dst = api_op_local(r, ty); } api_materialize_cmp_to(g, sv, dst); return; @@ -729,72 +495,55 @@ void api_ensure_reg(CfreeCg* g, ApiSValue* sv) { if (sv->kind == SV_ARITH) { CfreeCgTypeId ty = api_sv_type(sv); Operand dst; - if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_REG && - sv->delayed.arith.a.cls == RC_INT) { - dst = api_op_reg(sv->delayed.arith.a.v.reg, ty); + if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_LOCAL && + api_unalias_type(g->c, sv->delayed.arith.a.type) == + api_unalias_type(g->c, ty)) { + dst = api_op_local(sv->delayed.arith.a.v.local, ty); } else if (api_arith_rhs_reusable(sv) && sv->delayed.arith.b_owned && - sv->delayed.arith.b.kind == OPK_REG && - sv->delayed.arith.b.cls == RC_INT) { - dst = api_op_reg(sv->delayed.arith.b.v.reg, ty); + sv->delayed.arith.b.kind == OPK_LOCAL && + api_unalias_type(g->c, sv->delayed.arith.b.type) == + api_unalias_type(g->c, ty)) { + dst = api_op_local(sv->delayed.arith.b.v.local, ty); } else { - Reg r = api_alloc_reg_or_spill( - g, RC_INT, ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); - dst = api_op_reg(r, ty); + CGLocal r = api_alloc_temp_local(g, ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); + dst = api_op_local(r, ty); } api_materialize_arith_to(g, sv, dst); return; } - if (sv->res != RES_SPILLED) return; - CGTarget* T = g->target; - u8 cls = api_class_of_sv(sv); - CfreeCgTypeId ty = api_owned_reg_type(g, sv); - Reg r = api_alloc_reg_or_spill(g, cls, - ty ? ty : builtin_id(CFREE_CG_BUILTIN_I32)); - T->reload_reg(T, api_op_reg(r, ty), sv->spill_slot, api_mem_for_spill(g, sv)); - api_return_spill_slot(g, sv->spill_slot, cls); - sv->spill_slot = FRAME_SLOT_NONE; - if (sv->op.kind == OPK_INDIRECT) { - sv->op.v.ind.base = r; - } else { - sv->op = api_op_reg(r, api_sv_type(sv)); - } - sv->res = RES_REG; + return; } -Operand api_force_reg(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) { - CGTarget* T = g->target; +Operand api_force_local(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) { + CgTarget* T = g->target; ty = api_unalias_type(g->c, ty); - api_ensure_reg(g, v); - if (v->op.kind == OPK_REG) { - if (ty) { - v->op.type = ty; - v->type = ty; - } + api_ensure_local(g, v); + if (v->op.kind == OPK_LOCAL && !api_is_lvalue_sv(v)) { return v->op; } - Reg r = api_alloc_reg_or_spill(g, api_type_class(ty), ty); - Operand dst = api_op_reg(r, ty); + CGLocal r = api_alloc_temp_local(g, ty); + Operand dst = api_op_local(r, ty); if (v->op.kind == OPK_IMM) { T->load_imm(T, dst, v->op.v.imm); } else if (api_is_lvalue_sv(v)) { T->load(T, dst, v->op, api_mem_for_lvalue(g, &v->op, ty)); if (v->op.kind == OPK_INDIRECT) { - api_free_reg(g, v->op.v.ind.base, RC_INT); + api_release_temp_local(g, v->op.v.ind.base); } } else if (v->op.kind == OPK_GLOBAL) { T->addr_of(T, dst, v->op); } else { compiler_panic(g->c, g->cur_loc, - "CfreeCg: cannot force operand to register"); + "CfreeCg: cannot force operand to local"); } v->op = dst; - v->res = RES_REG; + v->res = RES_LOCAL; return dst; } -Operand api_force_reg_unless_imm(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) { +Operand api_force_local_unless_imm(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) { if (api_sv_op_is(v, OPK_IMM)) return v->op; - return api_force_reg(g, v, ty); + return api_force_local(g, v, ty); } void api_release(CfreeCg* g, ApiSValue* sv) { @@ -802,27 +551,12 @@ void api_release(CfreeCg* g, ApiSValue* sv) { api_release_cmp(g, sv); } else if (sv->kind == SV_ARITH) { api_release_arith(g, sv); - } else if (sv->res == RES_REG) { - api_free_reg(g, (Reg)api_reg_of_sv(sv), api_class_of_sv(sv)); - } else if (sv->res == RES_SPILLED) { - api_return_spill_slot(g, sv->spill_slot, api_class_of_sv(sv)); - sv->spill_slot = FRAME_SLOT_NONE; + } else if (sv->res == RES_LOCAL) { + api_release_temp_local(g, (CGLocal)api_local_of_sv(sv)); } sv->res = RES_INHERENT; } -void api_release_arg_storage(CfreeCg* g, Operand* storage) { - if (storage->kind == OPK_REG) { - api_free_reg(g, storage->v.reg, storage->cls); - } else if (storage->kind == OPK_LOCAL && storage->cls < 3) { - CfreeCgTypeId ty = storage->type; - if (api_arg_storage_must_be_addr(g->c, ty)) return; - api_return_spill_slot(g, storage->v.frame_slot, storage->cls); - } else if (storage->kind == OPK_INDIRECT) { - api_free_reg(g, storage->v.ind.base, RC_INT); - } -} - /* ---- BinOp / UnOp / CmpOp mapping ---- */ BinOp api_map_int_binop(CfreeCgIntBinOp op) { @@ -1227,22 +961,22 @@ void api_local_const_address_taken(CfreeCg* g, CfreeCgLocal local) { } Operand api_lvalue_addr(CfreeCg* g, ApiSValue* v, CfreeCgTypeId pty) { - CGTarget* T; + CgTarget* T; ApiSourceLocal* rec; - Reg r; + CGLocal r; Operand dst; api_local_const_address_taken(g, v->source_local); - api_ensure_reg(g, v); + api_ensure_local(g, v); if (!api_is_lvalue_sv(v)) { compiler_panic(g->c, g->cur_loc, "CfreeCg: addr operand is not an lvalue"); } T = g->target; - r = api_alloc_reg_or_spill(g, RC_INT, pty); - dst = api_op_reg(r, pty); + r = api_alloc_temp_local(g, pty); + dst = api_op_local(r, pty); rec = v->source_local != CFREE_CG_LOCAL_NONE ? api_local_from_handle(g, v->source_local) : NULL; - if (rec && rec->storage.kind == CG_LOCAL_STORAGE_REG && T->local_addr) + if (rec && rec->storage != CG_LOCAL_NONE && T->local_addr) T->local_addr(T, dst, &rec->desc, rec->storage); else T->addr_of(T, dst, v->op); @@ -1347,8 +1081,8 @@ int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) { - *out = api_make_sv_with_reg_ownership(a->op, ty, - api_sv_owns_operand_reg(a, &a->op)); + *out = api_make_sv_with_local_ownership(a->op, ty, + api_sv_owns_operand_local(a, &a->op)); a->res = RES_INHERENT; return 1; } @@ -1375,8 +1109,8 @@ int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, (op == BO_IADD || op == BO_IMUL || op == BO_OR || op == BO_XOR || op == BO_AND) && api_op_is_int_identity(g, op, ty, a->op.v.imm)) { - *out = api_make_sv_with_reg_ownership(b->op, ty, - api_sv_owns_operand_reg(b, &b->op)); + *out = api_make_sv_with_local_ownership(b->op, ty, + api_sv_owns_operand_local(b, &b->op)); b->res = RES_INHERENT; return 1; } @@ -1400,7 +1134,7 @@ int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 folded; BinOp result_op; if (a->kind != SV_ARITH || a->delayed.arith.kind != API_DELAYED_BINOP || - a->delayed.arith.a.kind != OPK_REG || + a->delayed.arith.a.kind != OPK_LOCAL || a->delayed.arith.b.kind != OPK_IMM || b->kind != SV_OPERAND || b->op.kind != OPK_IMM) { return 0; @@ -1462,7 +1196,7 @@ int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, return 0; } if (api_op_is_int_identity(g, result_op, ty, folded)) { - *out = api_make_sv_with_reg_ownership(a->delayed.arith.a, ty, + *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty, a->delayed.arith.a_owned); a->delayed.arith.a_owned = 0; memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); @@ -1482,10 +1216,10 @@ int api_try_fold_unary_chain(ApiSValue* a, UnOp op, CfreeCgTypeId ty, ApiSValue* out) { if (op != UO_BNOT || a->kind != SV_ARITH || a->delayed.arith.kind != API_DELAYED_UNOP || - a->delayed.arith.un_op != UO_BNOT || a->delayed.arith.a.kind != OPK_REG) { + a->delayed.arith.un_op != UO_BNOT || a->delayed.arith.a.kind != OPK_LOCAL) { return 0; } - *out = api_make_sv_with_reg_ownership(a->delayed.arith.a, ty, + *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty, a->delayed.arith.a_owned); a->delayed.arith.a_owned = 0; memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); diff --git a/src/cg/wide.c b/src/cg/wide.c @@ -1,14 +1,13 @@ #include "cg/internal.h" -FrameSlot api_f128_temp_slot(CfreeCg* g, CfreeCgTypeId ty) { - FrameSlotDesc fsd; - memset(&fsd, 0, sizeof fsd); - fsd.type = ty; - fsd.size = 16; - fsd.align = 16; - fsd.kind = FS_LOCAL; - fsd.flags = FSF_ADDR_TAKEN; - return g->target->frame_slot(g->target, &fsd); +CGLocal api_f128_temp_local(CfreeCg* g, CfreeCgTypeId ty) { + CGLocalDesc d; + memset(&d, 0, sizeof d); + d.type = ty; + d.size = 16; + d.align = 16; + d.flags = CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED; + return g->target->local(g->target, &d); } u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes) { @@ -20,25 +19,25 @@ u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes) { return v; } -void api_store_f128_bytes(CfreeCg* g, FrameSlot slot, CfreeCgTypeId ty, +void api_store_f128_bytes(CfreeCg* g, CGLocal local, CfreeCgTypeId ty, const u8 bytes[16]) { CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Reg ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); - Operand base = api_op_reg(ar, ptr_ty); + CGLocal ar = api_alloc_temp_local(g, ptr_ty); + Operand base = api_op_local(ar, ptr_ty); MemAccess ma; memset(&ma, 0, sizeof ma); ma.type = i64_ty; ma.size = 8; ma.align = 8; - g->target->addr_of(g->target, base, api_op_local(slot, ty)); + g->target->addr_of(g->target, base, api_op_local(local, ty)); g->target->store(g->target, api_op_indirect(ar, 0, i64_ty), api_op_imm((i64)api_u64_from_target_bytes(g, bytes), i64_ty), ma); g->target->store( g->target, api_op_indirect(ar, 8, i64_ty), api_op_imm((i64)api_u64_from_target_bytes(g, bytes + 8), i64_ty), ma); - api_free_reg(g, ar, RC_INT); + api_release_temp_local(g, ar); } void api_encode_binary128_from_double(CfreeCg* g, double value, u8 out[16]) { @@ -96,24 +95,34 @@ void api_encode_binary128_from_double(CfreeCg* g, double value, u8 out[16]) { ApiSValue api_make_f128_const(CfreeCg* g, double value, CfreeCgTypeId ty) { u8 bytes[16]; - FrameSlot slot; + CGLocal local; api_encode_binary128_from_double(g, value, bytes); - slot = api_f128_temp_slot(g, ty); - api_store_f128_bytes(g, slot, ty, bytes); - return api_make_lv(api_op_local(slot, ty), ty); + local = api_f128_temp_local(g, ty); + api_store_f128_bytes(g, local, ty, bytes); + return api_make_lv(api_op_local(local, ty), ty); } ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty) { - if (v->op.kind == OPK_LOCAL || v->op.kind == OPK_INDIRECT) { - v->type = ty; - v->op.type = ty; + if (v->op.kind == OPK_LOCAL && + api_unalias_type(g->c, v->op.type) == api_unalias_type(g->c, ty)) { + v->lvalue = 1; + return *v; + } + if (v->op.kind == OPK_INDIRECT) { + ApiSValue out = *v; + out.type = ty; + out.op.type = ty; + out.lvalue = 1; + return out; + } + if (v->op.kind == OPK_LOCAL) { v->lvalue = 1; return *v; } if (v->op.kind == OPK_GLOBAL) { - FrameSlot slot = api_f128_temp_slot(g, ty); - Operand dst_lv = api_op_local(slot, ty); + CGLocal local = api_f128_temp_local(g, ty); + Operand dst_lv = api_op_local(local, ty); Operand dst_addr; Operand src_addr; AggregateAccess agg; @@ -125,13 +134,13 @@ ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v, agg.size = 16; agg.align = 16; g->target->copy_bytes(g->target, dst_addr, src_addr, agg); - api_free_reg(g, dst_addr.v.reg, RC_INT); - api_free_reg(g, src_addr.v.reg, RC_INT); + api_release_temp_local(g, dst_addr.v.local); + api_release_temp_local(g, src_addr.v.local); return api_make_lv(dst_lv, ty); } - if (v->op.kind == OPK_REG) { - FrameSlot slot = api_f128_temp_slot(g, ty); - Operand dst = api_op_local(slot, ty); + if (v->op.kind == OPK_LOCAL) { + CGLocal local = api_f128_temp_local(g, ty); + Operand dst = api_op_local(local, ty); g->target->store(g->target, dst, v->op, api_mem_for_lvalue(g, &dst, ty)); return api_make_lv(dst, ty); } @@ -143,9 +152,9 @@ ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v, u32 idx = g->c->target.big_endian ? 15u - i : i; bytes[idx] = (u8)(lo >> (i * 8u)); } - FrameSlot slot = api_f128_temp_slot(g, ty); - api_store_f128_bytes(g, slot, ty, bytes); - return api_make_lv(api_op_local(slot, ty), ty); + CGLocal local = api_f128_temp_local(g, ty); + api_store_f128_bytes(g, local, ty, bytes); + return api_make_lv(api_op_local(local, ty), ty); } compiler_panic( g->c, g->cur_loc, diff --git a/src/core/core.c b/src/core/core.c @@ -16,6 +16,13 @@ _Static_assert(sizeof(jmp_buf) <= COMPILER_PANIC_BYTES, "Compiler panic save buffer is too small for jmp_buf"); +#if defined(__GNUC__) || defined(__clang__) || defined(__cfree__) +__attribute__((weak)) +#endif +void cfree_debug_printf(const char* fmt, ...) { + (void)fmt; +} + SourceManager* source_new(Compiler*); void source_free(SourceManager*); diff --git a/test/test.mk b/test/test.mk @@ -118,10 +118,11 @@ test-driver-cc: bin # frontend exercises both the existing backends and the C backend. # Together they prove the CGTarget seam is frontend-agnostic. # Unimplemented CGTarget methods report as SKIP; see doc/CBACKEND.md. +CFREE_CBACKEND_TEST_JOBS ?= $(if $(CFREE_TEST_JOBS),$(CFREE_TEST_JOBS),1) test-cbackend: bin - @CFREE_TEST_PATHS=C CFREE_TEST_ALLOW_SKIP=1 sh test/parse/run.sh - @CFREE_TEST_PATHS=C CFREE=$(abspath $(BIN)) sh test/toy/run.sh - @CFREE_TEST_PATHS=C CFREE=$(abspath $(BIN)) bash test/wasm/run.sh + @CFREE_TEST_JOBS=$(CFREE_CBACKEND_TEST_JOBS) CFREE_TEST_PATHS=C CFREE_TEST_ALLOW_SKIP=1 sh test/parse/run.sh + @CFREE_TEST_JOBS=$(CFREE_CBACKEND_TEST_JOBS) CFREE_TEST_PATHS=C CFREE=$(abspath $(BIN)) sh test/toy/run.sh + @CFREE_TEST_JOBS=$(CFREE_CBACKEND_TEST_JOBS) CFREE_TEST_PATHS=C CFREE=$(abspath $(BIN)) bash test/wasm/run.sh # test-wasm-toy: opt-in Toy -> Wasm -> JIT roundtrip. Runs the toy corpus # under the W path (compile -target wasm32-none, then `cfree run` the .wasm,