kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

cgtarget.h (33533B)


      1 #ifndef KIT_CG_CGTARGET_H
      2 #define KIT_CG_CGTARGET_H
      3 
      4 #include <kit/cg.h>
      5 #include <kit/compile.h>
      6 
      7 #include "core/core.h"
      8 #include "obj/obj.h"
      9 
     10 typedef u32 CGLocal;
     11 #define CG_LOCAL_NONE 0u
     12 
     13 /* Vector / SIMD forward compat: vector ops will arrive as new variants in
     14  * the BinOp, UnOp, CmpOp, ConvKind families. Backend switches over these
     15  * enums must use `default:` (unreachable / panic) rather than exhaustive
     16  * case lists, so adding a new variant later does not silently mis-handle on
     17  * backends that haven't been taught about it. Vector loads/stores reuse the
     18  * existing load/store methods with vector-typed Operands and appropriate
     19  * MemAccess. */
     20 
     21 /* Integer/float binary ops. Edge-case semantics are fully defined (no undefined
     22  * behavior) in doc/IR.md: iadd/isub/imul (and UO_NEG) wrap modulo 2^width;
     23  * sdiv/udiv/srem/urem and the shifts have a portable default plus an opt-in
     24  * target-defined mode selected per instruction via CgIrInstFlag (src/cg/ir.h).
     25  * FP ops are strict IEEE-754 in the target's default rounding/exception
     26  * environment; there is no FP remainder op (the frontend calls fmod). */
     27 typedef enum BinOp {
     28   BO_IADD,
     29   BO_ISUB,
     30   BO_IMUL,
     31   BO_SDIV,
     32   BO_UDIV,
     33   BO_SREM,
     34   BO_UREM,
     35   BO_FADD,
     36   BO_FSUB,
     37   BO_FMUL,
     38   BO_FDIV,
     39   BO_AND,
     40   BO_OR,
     41   BO_XOR,
     42   BO_SHL,
     43   BO_SHR_S,
     44   BO_SHR_U,
     45 } BinOp;
     46 
     47 typedef enum UnOp {
     48   UO_NEG,
     49   UO_FNEG,
     50   UO_NOT,  /* logical: 0/1 */
     51   UO_BNOT, /* bitwise ~  */
     52 } UnOp;
     53 
     54 /* Compares producing i1. The 10 integer members (CMP_EQ..CMP_GE_U) are total
     55  * and 1:1 with KitCgIntCmpOp; on integers CMP_EQ/CMP_NE are plain equality.
     56  *
     57  * The 12 floating-point members form a disjoint block laid out *after* the
     58  * integer block, in the same order as the public KitCgFpCmpOp, and are
     59  * IEEE-complete: each predicate encodes ordered (NaN -> false) vs unordered
     60  * (NaN -> true) explicitly, so the distinction reaches every backend. The
     61  * identity used throughout the backends is unordered-R == NOT(ordered-not-R)
     62  * (e.g. ULT == !(OGE), UNE == !(OEQ)). CMP_OEQ_F is the FP boundary: an op is a
     63  * floating compare iff op >= CMP_OEQ_F. */
     64 typedef enum CmpOp {
     65   CMP_EQ,
     66   CMP_NE,
     67   CMP_LT_S,
     68   CMP_LE_S,
     69   CMP_GT_S,
     70   CMP_GE_S,
     71   CMP_LT_U,
     72   CMP_LE_U,
     73   CMP_GT_U,
     74   CMP_GE_U,
     75   /* Ordered FP relationals (NaN -> false). */
     76   CMP_OEQ_F,
     77   CMP_ONE_F,
     78   CMP_OLT_F,
     79   CMP_OLE_F,
     80   CMP_OGT_F,
     81   CMP_OGE_F,
     82   /* Unordered FP relationals (NaN -> true). */
     83   CMP_UEQ_F,
     84   CMP_UNE_F,
     85   CMP_ULT_F,
     86   CMP_ULE_F,
     87   CMP_UGT_F,
     88   CMP_UGE_F,
     89 } CmpOp;
     90 
     91 /* Conversions. Widths must order correctly (sext/zext widen, trunc narrows,
     92  * bitcast preserves byte size). itof, fext, and ftrunc round to nearest-even;
     93  * ftoi_s/ftoi_u round toward zero with a portable saturating out-of-range
     94  * default (NaN -> 0) and an opt-in target-defined mode
     95  * (CG_IR_INST_TARGET_FPTOINT_EDGES in src/cg/ir.h). Full rules in doc/IR.md. */
     96 typedef enum ConvKind {
     97   CV_SEXT,
     98   CV_ZEXT,
     99   CV_TRUNC,
    100   CV_ITOF_S,
    101   CV_ITOF_U,
    102   CV_FTOI_S,
    103   CV_FTOI_U,
    104   CV_FEXT,
    105   CV_FTRUNC,
    106   CV_BITCAST,
    107 } ConvKind;
    108 
    109 /* Atomic op kinds (KitCgAtomicOp) and memory orders (KitCgMemOrder) come
    110  * straight from the public API. Which orders are legal depends on the atomic
    111  * op: load excludes release/acq_rel; store excludes acquire/consume/acq_rel;
    112  * CAS failure order is one of relaxed/consume/acquire/seq_cst and no stronger
    113  * than success. See the Atomics edge-case rules in doc/IR.md (mirrored by
    114  * kit_cg_atomic_is_legal). */
    115 
    116 /* Compiler-intrinsic kinds dispatched through CgTarget.intrinsic and carried
    117  * on IR_INTRINSIC via IRIntrinAux.kind. The set is bounded: a backend
    118  * must know each one to choose inline-vs-libcall. Hint intrinsics
    119  * (EXPECT/TRAP/PREFETCH/ASSUME_ALIGNED) ride the same dispatch:
    120  * the backend decides whether they emit an instruction or a no-op.
    121  * `unreachable` is NOT here: it is a first-class control terminator with
    122  * its own CgTarget hook (see below), not an intrinsic.
    123  *
    124  * Not every C builtin lives here. Parser-evaluated builtins
    125  * (__builtin_offsetof, __builtin_constant_p, __builtin_choose_expr,
    126  * __builtin_types_compatible_p) fold at parse and never reach IR. Builtins
    127  * that already have dedicated CgTarget methods (alloca, va_*, atomics) keep
    128  * them. Returns-twice and no-return control intrinsics use this dispatch so
    129  * opt can preserve their CFG effects without growing backend vtable hooks. */
    130 typedef enum IntrinKind {
    131   INTRIN_NONE = 0,
    132 
    133   /* bit ops */
    134   INTRIN_POPCOUNT,
    135   INTRIN_CTZ,
    136   INTRIN_CLZ,
    137   INTRIN_BSWAP,
    138 
    139   /* memory. memcpy/memset are the dedicated copy_bytes/set_bytes hooks
    140    * (kit_cg_memcpy/_memset); only memmove flows through the intrinsic path. */
    141   INTRIN_MEMMOVE,
    142   INTRIN_PREFETCH,
    143   INTRIN_ASSUME_ALIGNED,
    144 
    145   /* hints */
    146   INTRIN_EXPECT,
    147   INTRIN_TRAP,
    148 
    149   /* OS trap: args[0] is the syscall number, args[1..6] are integer/pointer
    150    * payloads; dsts[0] receives the target long result. */
    151   INTRIN_SYSCALL,
    152 
    153   /* non-local control */
    154   INTRIN_SETJMP,
    155   INTRIN_LONGJMP,
    156 
    157   /* checked arith — multi-result (value, overflow_flag) */
    158   INTRIN_SADD_OVERFLOW,
    159   INTRIN_UADD_OVERFLOW,
    160   INTRIN_SSUB_OVERFLOW,
    161   INTRIN_USUB_OVERFLOW,
    162   INTRIN_SMUL_OVERFLOW,
    163   INTRIN_UMUL_OVERFLOW,
    164 
    165   /* baremetal CPU control — single-instruction, no operands unless noted.
    166    * dsts/args empty except IRQ_SAVE (dsts[0] = saved interrupt state) and
    167    * IRQ_RESTORE (args[0] = state to restore). Privileged forms (WFI/WFE/SEV
    168    * and the IRQ family) trap at user level; backends still emit the one
    169    * instruction and frontends gate any runtime use behind a capability test. */
    170   INTRIN_CPU_NOP,
    171   INTRIN_CPU_YIELD,
    172   INTRIN_WFI,
    173   INTRIN_WFE,
    174   INTRIN_SEV,
    175   INTRIN_ISB,
    176   INTRIN_DMB,
    177   INTRIN_DSB,
    178   INTRIN_IRQ_SAVE,
    179   INTRIN_IRQ_RESTORE,
    180   INTRIN_IRQ_ENABLE,
    181   INTRIN_IRQ_DISABLE,
    182 
    183   /* frame-pointer-chain introspection — value-producing, single immediate
    184    * operand (the constant level). args[0] is the level (OPK_IMM); dsts[0] is
    185    * the void* result. Lowered as an unrolled FP walk; modeled as an ordinary
    186    * frame-dependent memory read (IR_INTRINSIC is already conservatively
    187    * side-effecting in opt, so it is never hoisted, CSE'd, or eliminated). */
    188   INTRIN_FRAME_ADDRESS,
    189   INTRIN_RETURN_ADDRESS,
    190 } IntrinKind;
    191 
    192 typedef enum OpKind {
    193   OPK_IMM,
    194   OPK_LOCAL,    /* typed semantic local */
    195   OPK_GLOBAL,   /* address: symbol+addend, not a load */
    196   OPK_INDIRECT, /* [local + ofs], with optional indexed local */
    197 } OpKind;
    198 
    199 typedef enum CGLocalFlag {
    200   CG_LOCAL_FLAG_NONE = 0,
    201   CG_LOCAL_ADDR_TAKEN = 1u << 0,
    202   CG_LOCAL_MEMORY_REQUIRED = 1u << 1,
    203 } CGLocalFlag;
    204 
    205 typedef struct CGLocalDesc {
    206   KitCgTypeId type;
    207   Sym name;
    208   SrcLoc loc;
    209   u32 size;
    210   u32 align;
    211   u32 flags; /* CGLocalFlag */
    212 } CGLocalDesc;
    213 
    214 typedef enum MemFlag {
    215   MF_NONE = 0,
    216   MF_VOLATILE = 1u << 0,
    217   MF_ATOMIC = 1u << 1,
    218   MF_RESTRICT = 1u << 2,
    219   MF_READONLY = 1u << 3,
    220   MF_WRITEONLY = 1u << 4,
    221   MF_UNALIGNED = 1u << 5,
    222 } MemFlag;
    223 
    224 typedef enum AliasKind {
    225   ALIAS_UNKNOWN,
    226   ALIAS_LOCAL,
    227   ALIAS_GLOBAL,
    228   ALIAS_PARAM,
    229   ALIAS_HEAP,
    230   ALIAS_STRING,
    231 } AliasKind;
    232 
    233 typedef struct AliasRoot {
    234   u8 kind; /* AliasKind */
    235   u8 pad[3];
    236   union {
    237     i32 local_id;
    238     ObjSymId global;
    239     u32 param_idx;
    240     Sym string_id;
    241   } v;
    242 } AliasRoot;
    243 
    244 typedef struct MemAccess {
    245   KitCgTypeId type; /* codegen object type accessed */
    246   u32 size;         /* ABI byte size of this access (storage-unit size for a
    247                      * bit-field) */
    248   u32 align;        /* known byte alignment; 0 means unknown */
    249   u16 flags;        /* MemFlag */
    250   u16 addr_space;
    251   /* Bit-field rider: when bf_width != 0 this access is a bit-field, so `load`
    252    * extracts (shift+mask+extend) and `store` inserts (read-modify-write) within
    253    * the storage unit described by {type,size}. The CgTarget impls translate
    254    * this to the physical NativeTarget bitfield_load/store (or the recorder IR
    255    * op); the semantic CgTarget no longer carries a separate bit-field method.
    256    */
    257   u16 bf_offset; /* target-endian bit offset within the storage unit */
    258   u16 bf_width;  /* 0 => not a bit-field access */
    259   u8 bf_signed;  /* signed extraction on load */
    260   u8 bf_pad[3];
    261   AliasRoot alias;
    262 } MemAccess;
    263 
    264 typedef struct ConstBytes {
    265   KitCgTypeId type;
    266   const u8* bytes; /* ABI representation, little/big endian per target */
    267   u32 size;
    268   u32 align;
    269 } ConstBytes;
    270 
    271 typedef struct AggregateAccess {
    272   KitCgTypeId type;
    273   u32 size;
    274   u32 align;
    275   MemAccess mem;
    276 } AggregateAccess;
    277 
    278 typedef struct BitFieldAccess {
    279   KitCgTypeId field_type;
    280   MemAccess storage;
    281   u32 storage_offset; /* byte offset from record base */
    282   u16 bit_offset;     /* target-endian bit offset within storage unit */
    283   u16 bit_width;      /* may be 0 for zero-width layout barriers */
    284   u8 signed_;
    285   u8 pad[3];
    286 } BitFieldAccess;
    287 
    288 /* Reconstruct the BitFieldAccess a CgTarget impl needs from the bit-field
    289  * MemAccess that rides the generic load/store (bf_width != 0). The storage unit
    290  * is {m.type, m.size}; the bit geometry is the bf_* rider. */
    291 static inline BitFieldAccess bf_from_mem(MemAccess m) {
    292   BitFieldAccess bf = {0};
    293   bf.field_type = m.type;
    294   bf.storage = m;
    295   bf.storage.bf_offset = 0;
    296   bf.storage.bf_width = 0;
    297   bf.storage.bf_signed = 0;
    298   bf.bit_offset = m.bf_offset;
    299   bf.bit_width = m.bf_width;
    300   bf.signed_ = m.bf_signed;
    301   return bf;
    302 }
    303 
    304 typedef struct Operand {
    305   u8 kind;
    306   u8 pad[3];
    307   KitCgTypeId type;
    308   union {
    309     i64 imm;
    310     CGLocal local;
    311     struct {
    312       ObjSymId sym;
    313       i64 addend;
    314     } global;
    315     struct {
    316       CGLocal base;
    317       CGLocal index; /* CG_LOCAL_NONE when no index operand */
    318       u8 log2_scale; /* 0..3 -> 1/2/4/8 bytes; ignored when no index */
    319       i32 ofs;
    320     } ind;
    321   } v;
    322 } Operand;
    323 
    324 typedef struct CGParamDesc {
    325   u32 index;
    326   Sym name;
    327   KitCgTypeId type;
    328   u32 size;
    329   u32 align;
    330   u32 flags; /* CGLocalFlag */
    331   SrcLoc loc;
    332 } CGParamDesc;
    333 
    334 /* text_section_id and group_id are per-function so that -ffunction-sections,
    335  * __attribute__((section)) on functions, and COMDAT for C11 inline-with-
    336  * external-definition all work with no extra plumbing. Decl.section_id already
    337  * carries the user's request; CG/decl decides the section name policy
    338  * (default .text, vs .text.<sym> under -ffunction-sections, vs explicit
    339  * attribute). The backend just writes to the named section. */
    340 /* Phase 2 attribute-derived hints. The backends are free to ignore these;
    341  * they exist so the parser can communicate _Noreturn / __attribute__
    342  * info down to CG without forcing every backend to consult the Decl. */
    343 typedef enum CGFuncDescFlag {
    344   CGFD_NONE = 0,
    345   CGFD_NORETURN = 1u << 0,
    346 } CGFuncDescFlag;
    347 
    348 typedef struct CGFuncDesc {
    349   ObjSymId sym;
    350   ObjSecId text_section_id;
    351   ObjGroupId group_id; /* OBJ_GROUP_NONE if none */
    352   KitCgTypeId fn_type;
    353   KitCgTypeId result_type; /* KIT_CG_TYPE_NONE/void == no result */
    354   const CGParamDesc* params;
    355   u32 nparams;
    356   SrcLoc loc;
    357   u32 flags; /* CGFuncDescFlag */
    358   KitCgInlinePolicy inline_policy;
    359   u16 sym_bind; /* SymBind */
    360   u16 sym_kind; /* SymKind */
    361   u8 sym_vis;   /* SymVis */
    362   u8 atomize;
    363   u8 pad[2];
    364 } CGFuncDesc;
    365 
    366 typedef enum CGCallFlag {
    367   CG_CALL_NONE = 0,
    368   /* Sibling call. The target emits a tail-position call and does NOT emit a
    369    * return-style continuation. CG will not invoke target->ret afterwards.
    370    *
    371    * Realizability is verified before this flag is set: CG only sets it after
    372    * tail_call_unrealizable_reason() returns NULL for the same desc and call
    373    * state, so the target can emit the sibling call unconditionally. The
    374    * target may assert/compiler_panic if the flag is set on an unrealizable
    375    * desc, but that is an internal-consistency check — fallback and
    376    * diagnostics for unrealizable tail calls are CG's responsibility, not the
    377    * target's. */
    378   CG_CALL_TAIL = 1u << 0,
    379 } CGCallFlag;
    380 
    381 typedef struct CGCallDesc {
    382   KitCgTypeId fn_type;
    383   Operand callee;
    384   const CGLocal* args;
    385   CGLocal result; /* CG_LOCAL_NONE == void callee (no result) */
    386   u32 nargs;
    387   u16 flags;      /* CGCallFlag */
    388   u8 tail_policy; /* KitCgTailPolicy; meaningful when CG_CALL_TAIL is set.
    389                    * The opt recorder accepts every tail and preserves this so
    390                    * the replay can pick: emit tail (realizable), fall back to
    391                    * call+ret (ALLOWED), or diagnose (MUST). */
    392   u8 pad;
    393   KitCgInlinePolicy inline_policy;
    394 } CGCallDesc;
    395 
    396 typedef u32 Label;
    397 #define LABEL_NONE 0
    398 
    399 typedef enum ScopeKind {
    400   SCOPE_BLOCK, /* break exits forward */
    401   SCOPE_LOOP,  /* break exits forward; continue uses explicit target */
    402 } ScopeKind;
    403 
    404 typedef u32 CGScope;
    405 #define CG_SCOPE_NONE 0u
    406 
    407 typedef struct CGScopeDesc {
    408   u8 kind; /* ScopeKind */
    409   u8 pad[3];
    410   Label break_label; /* explicit target for break; LABEL_NONE => target creates
    411                         one */
    412   Label continue_label;    /* explicit target for continue; LABEL_NONE for
    413                               non-loops */
    414   KitCgTypeId result_type; /* reserved for structured expression results */
    415 } CGScopeDesc;
    416 
    417 typedef struct AsmConstraint {
    418   const char* str;  /* GCC-style: "r", "=&r", "+m", "i", "0" ... */
    419   Sym name;         /* GCC `[name]` symbolic operand; 0 if absent */
    420   KitCgTypeId type; /* codegen type of the bound expression (output lvalue or
    421                        input rvalue). Drives type width for the binder.
    422                        NULL only for hand-built test constraints (binder
    423                        falls back to a 64-bit int default). */
    424   Sym reg;          /* Explicit hard-register name ("r10"/"x8"/...) this operand
    425                        must occupy — a GNU local register variable bound as an
    426                        operand; 0 = unconstrained. Only the target's register
    427                        file resolves the name to a physical register. */
    428   u8 dir;           /* KitCgAsmDir */
    429   u8 pad[3];
    430 } AsmConstraint;
    431 
    432 typedef struct CGSwitchCase {
    433   /* Bit pattern matched against the selector; interpreted using
    434    * selector_type's width and signedness (signed comparison uses
    435    * sign-extension to selector_type's width). */
    436   u64 value;
    437   Label label;
    438 } CGSwitchCase;
    439 
    440 typedef struct CGSwitchDesc {
    441   Operand selector; /* OPK_LOCAL or OPK_IMM */
    442   KitCgTypeId selector_type;
    443   Label default_label; /* LABEL_NONE means "fall through past the switch" */
    444   const CGSwitchCase* cases;
    445   u32 ncases;
    446   u8 hint;      /* KitCgSwitchHint */
    447   u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */
    448   u8 pad[2];
    449 } CGSwitchDesc;
    450 
    451 typedef struct CGLocalStaticDataDesc {
    452   ObjSymId sym;
    453   KitCgTypeId type;
    454   KitCgDataDefAttrs attrs;
    455   u32 align;
    456 } CGLocalStaticDataDesc;
    457 
    458 typedef enum CGDebugLocKind {
    459   CG_DEBUG_LOC_NONE,
    460   CG_DEBUG_LOC_FRAME,
    461   CG_DEBUG_LOC_REG,
    462   CG_DEBUG_LOC_GLOBAL,
    463 } CGDebugLocKind;
    464 
    465 typedef struct CGDebugLoc {
    466   u8 kind; /* CGDebugLocKind */
    467   u8 pad[3];
    468   union {
    469     /* Offset in the same target-defined frame-base coordinate system that the
    470      * target/debugger pair uses to materialize frame-relative variables. CG
    471      * treats this as opaque target data and only maps it into the debug
    472      * producer's generic frame-location form. */
    473     i32 frame_ofs;
    474     u32 reg;
    475     ObjSymId global;
    476   } v;
    477 } CGDebugLoc;
    478 
    479 /* Forward-declared (same as arch/mc.h) so a CgTarget can carry an optional
    480  * Debug producer without this header depending on debug/debug.h. */
    481 typedef struct Debug Debug;
    482 
    483 typedef struct CgFinishPolicy {
    484   u8 output_kind;          /* KitCgOutputKind */
    485   u8 interposition_policy; /* KitCgInterpositionPolicy */
    486   u8 pad[2];
    487   const ObjSymId* preserved_symbols;
    488   u32 npreserved_symbols;
    489 } CgFinishPolicy;
    490 
    491 typedef struct CgTarget CgTarget;
    492 struct CgTarget {
    493   /* Typed IR lowering context. Subclasses extend. */
    494   Compiler* c;
    495   ObjBuilder* obj;
    496 
    497   /* Optional DWARF producer, created by the backend's `make` when
    498    * opts->debug_info is set (else NULL). The session reads this back into
    499    * its own g->debug to drive func/line/emit; the backend's MCEmitter
    500    * shares the same object for line-row emission. */
    501   Debug* debug;
    502 
    503   CgFinishPolicy finish_policy;
    504 
    505   /* ---- function lifecycle ---- */
    506   void (*func_begin)(CgTarget*, const CGFuncDesc*);
    507   void (*func_end)(CgTarget*);
    508 
    509   /* Symbol-aliasing hook. Optional (may be NULL). cg invokes this from
    510    * kit_cg_alias after the obj symbol-table mirror is wired so the
    511    * backend can emit any out-of-band representation it needs — e.g. the
    512    * C-source target writes
    513    *   `T alias_sym(...) __attribute__((alias("target")));`
    514    * because the alias relationship isn't expressible by sharing a
    515    * (section, value) pair the way a relocatable object can. Native
    516    * machine-code backends don't need this hook because obj_symbol_define
    517    * already aliases the bytes. `type` is the alias's CG type (function
    518    * or object), needed by the C target to render the prototype. */
    519   void (*alias)(CgTarget*, ObjSymId alias_sym, ObjSymId target_sym,
    520                 KitCgTypeId type);
    521 
    522   /* ---- locals ---- */
    523   CGLocal (*local)(CgTarget*, const CGLocalDesc*);
    524   void (*local_addr)(CgTarget*, Operand dst, const CGLocalDesc*, CGLocal);
    525   CGLocal (*param)(CgTarget*, const CGParamDesc*);
    526   /* Optional debug-info query after function frame layout is finalized.
    527    * Targets return a target-authored location for semantic local storage; CG
    528    * owns deciding which source locals/params get emitted and translating the
    529    * target-neutral CGDebugLoc into the debug producer API. */
    530   int (*local_debug_loc)(CgTarget*, CGLocal, CGDebugLoc*);
    531 
    532   /* ---- labels and control flow ---- */
    533   Label (*label_new)(CgTarget*);
    534   void (*label_place)(CgTarget*, Label);
    535   void (*jump)(CgTarget*, Label);
    536   /* Fused compare-and-branch. cg's preferred form: avoids materializing 0/1
    537    * for a normal `if (a < b)`. For an arbitrary i1 in a local, callers
    538    * synthesize cmp_branch(CMP_NE, val, IMM_ZERO, label). */
    539   void (*cmp_branch)(CgTarget*, CmpOp, Operand a, Operand b, Label);
    540 
    541   /* Structured switch dispatch.
    542    *
    543    * Optional: when NULL, cg's shared `cg_lower_switch_default` runs and
    544    * lowers in terms of cmp_branch / jump / indirect_branch / data ops —
    545    * the path every native arch uses. Backends override switch_ only when
    546    * they can express the construct natively: the C-source target emits
    547    * `switch (val) { case V: goto L_V; ... default: goto L_def; }`; a
    548    * future WASM target would emit `br_table`.
    549    *
    550    * The descriptor carries the full structured form (selector + paired
    551    * cases + default + frontend hint); density policy lives in
    552    * cg_lower_switch_default. */
    553   void (*switch_)(CgTarget*, const CGSwitchDesc*);
    554 
    555   /* Optional. When non-NULL and it returns 0, the target cannot realize a
    556    * jump-table dispatch built from a rodata table of code-label addresses
    557    * (Wasm: linear memory holds no code addresses and there is no computed
    558    * branch). kit_cg_switch then routes dense/forced-table plans through
    559    * `switch_` (e.g. br_table) instead of the label-table + indirect_branch
    560    * lowering. NULL means the label-table path is supported (every native
    561    * arch). */
    562   int (*supports_label_table)(CgTarget*);
    563 
    564   /* Indirect branch primitive: transfer control to the address in
    565    * `addr` (an OPK_LOCAL holding a function-local label address).
    566    *
    567    * Required on every native arch and used by:
    568    *   - kit_cg_computed_goto for direct-threaded dispatch
    569    *   - opt-level jump-table lowerings of IR_SWITCH (when implemented)
    570    *
    571    * `valid_targets[0..ntargets)` is the closed set of labels the address
    572    * can resolve to. Backends use it for branch-target hardening (BTI,
    573    * PAC, x86 CFG, IBT) and opt uses it to build the CFG; opt requires
    574    * ntargets > 0. */
    575   void (*indirect_branch)(CgTarget*, Operand addr, const Label* valid_targets,
    576                           u32 ntargets);
    577 
    578   /* Materialize the runtime address of a function-local label into
    579    * `dst`. The label must already exist (label_new); it does not
    580    * need to be placed yet. Backends emit the target's relative address
    581    * materialization:
    582    * x86_64 `lea L(%rip), %r`, aarch64 `adr X, L`, riscv `auipc/addi`.
    583    *
    584    * The resulting pointer is a function-local label address (per the
    585    * public kit_cg_push_label_addr contract) and must only be consumed
    586    * by indirect_branch inside the defining function's activation. */
    587   void (*load_label_addr)(CgTarget*, Operand dst, Label label);
    588 
    589   /* Optional source-backend hook for function-local static data definitions
    590    * that need function label scope, currently used for C `&&label`
    591    * dispatch-table initializers. Returning non-zero from begin means the
    592    * target consumes bytes/zeros/label addresses until end; ordinary object
    593    * data emission is skipped for that definition. */
    594   int (*local_static_data_begin)(CgTarget*, const CGLocalStaticDataDesc*);
    595   /* data == NULL means append len zero bytes. */
    596   void (*local_static_data_write)(CgTarget*, const u8* data, u64 len);
    597   void (*local_static_data_label_addr)(CgTarget*, Label target, i64 addend,
    598                                        u32 width, u32 address_space);
    599   void (*local_static_data_end)(CgTarget*);
    600 
    601   /* Optional. When non-NULL, kit_cg_data_label_addr panics with the
    602    * returned target-specific message before reaching object-data emission. Lets
    603    * targets that cannot resolve function-local label addresses in
    604    * static-data initializers (e.g. the Wasm backend) fail with a
    605    * recognizable, target-prefixed diagnostic. The returned string must remain
    606    * valid for the lifetime of the panic call (string literals are typical). */
    607   const char* (*data_label_addr_unsupported_msg)(CgTarget*);
    608 
    609   /* ---- structured control flow ----
    610    * Mirrors CG's scope ops. CG passes explicit break/continue targets so C
    611    * `for` continues can land on the increment expression rather than the loop
    612    * header. Real backends shim these onto label_new/label_place/jump.
    613    * The WASM backend consumes them natively to emit block/loop with
    614    * structurally-bounded br targets, which is what gives WASM its CFI.
    615    *
    616    * `result_type` is reserved for structured expression results on WASM (NULL
    617    * for the statement case used by C); other backends ignore it. */
    618   CGScope (*scope_begin)(CgTarget*, const CGScopeDesc*);
    619   void (*scope_end)(CgTarget*, CGScope);
    620   void (*break_to)(CgTarget*, CGScope);
    621   void (*continue_to)(CgTarget*, CGScope);
    622 
    623   /* ---- data movement (split, no overloading) ---- */
    624   void (*load_imm)(CgTarget*, Operand dst /*LOCAL*/, i64 imm);
    625   void (*load_const)(CgTarget*, Operand dst /*LOCAL*/, ConstBytes);
    626   void (*copy)(CgTarget*, Operand dst /*LOCAL*/, Operand src /*LOCAL*/);
    627   void (*load)(CgTarget*, Operand dst /*LOCAL*/,
    628                Operand addr /*LOCAL|GLOBAL|INDIRECT*/, MemAccess);
    629   void (*store)(CgTarget*, Operand addr /*LOCAL|GLOBAL|INDIRECT*/,
    630                 Operand src /*LOCAL|IMM*/, MemAccess);
    631   void (*addr_of)(CgTarget*, Operand dst /*LOCAL*/,
    632                   Operand lv /*LOCAL|GLOBAL|INDIRECT*/);
    633   /* Materializes the address of a thread-local symbol into `dst`. Distinct
    634    * from addr_of because TLS resolution can be a multi-instruction sequence
    635    * or a runtime call (e.g. GD model), not a cheap addressing mode. The
    636    * backend chooses the TLS model (LE/IE/LD/GD) from c->target and the
    637    * symbol's visibility. Subsequent accesses go through OPK_INDIRECT on the
    638    * resulting pointer; this lets opt hoist the materialization via LICM. */
    639   void (*tls_addr_of)(CgTarget*, Operand dst /*LOCAL*/, ObjSymId sym,
    640                       i64 addend);
    641   void (*copy_bytes)(CgTarget*, Operand dst_addr, Operand src_addr,
    642                      AggregateAccess);
    643   void (*set_bytes)(CgTarget*, Operand dst_addr, Operand byte_value,
    644                     AggregateAccess);
    645   /* Bit-fields are not a separate CgTarget method: a bit-field load/store rides
    646    * the generic `load`/`store` above with a bit-field MemAccess (bf_width !=
    647    * 0). Each CgTarget impl translates it (NativeDirectTarget -> NativeTarget's
    648    * bitfield_load/store; IrRecorder -> CG_IR_BITFIELD_LOAD/STORE). */
    649 
    650   /* ---- arithmetic, compare, convert ----
    651    * binop/unop/cmp accept OPK_LOCAL or OPK_IMM in source operand positions
    652    * (`a`, `b`); `dst` is always OPK_LOCAL. The backend chooses between an
    653    * imm-form encoding and materializing the literal into a scratch
    654    * local based on whether the value fits the instruction's imm
    655    * field. FP binops and UO_FNEG require local sources — FP literals reach the
    656    * value stack through load_const into OPK_LOCAL. cg and opt's machinize/emit
    657    * both rely on this contract to pass small constants through without
    658    * burning a value-stack local on materialization. */
    659   void (*binop)(CgTarget*, BinOp, Operand dst /*LOCAL*/,
    660                 Operand a /*LOCAL|IMM*/, Operand b /*LOCAL|IMM*/);
    661   void (*unop)(CgTarget*, UnOp, Operand dst /*LOCAL*/, Operand a /*LOCAL|IMM*/);
    662   void (*cmp)(CgTarget*, CmpOp, Operand dst /*LOCAL*/, Operand a /*LOCAL|IMM*/,
    663               Operand b /*LOCAL|IMM*/); /* materialize 0/1 */
    664   void (*convert)(CgTarget*, ConvKind, Operand dst, Operand src);
    665 
    666   /* ---- calls / return ----
    667    * CGCallDesc carries the type-checked signature, semantic callee operand,
    668    * local arguments, and local result destinations. The semantic target does
    669    * not expose calling-convention lowering; native targets derive physical
    670    * argument/return placement from fn_type and local metadata internally.
    671    * `result` is the single local destination, or CG_LOCAL_NONE for void. */
    672   void (*call)(CgTarget*, const CGCallDesc*);
    673   /* Pure query: can `d` be emitted as a sibling (tail) call on this target,
    674    * given the current target state? Returns NULL if yes; otherwise a short,
    675    * static, human-readable string naming the blocker, used verbatim in the
    676    * musttail diagnostic. Must not emit code and must not abort.
    677    *
    678    * Realizable means the target can transfer control to the callee while
    679    * preserving the source-level call/return semantics of this function. CG
    680    * verifies type compatibility before setting CG_CALL_TAIL; target-specific
    681    * blockers such as variadic lowering, frame teardown constraints, or
    682    * unavailable tail-call support are reported here.
    683    *
    684    * CG owns the tail policy: it calls this first and only sets CG_CALL_TAIL
    685    * when it returns NULL, so a NULL result must guarantee a later call() with
    686    * CG_CALL_TAIL can emit the sibling call. May itself be NULL, meaning the
    687    * target supports no tail calls at all. */
    688   const char* (*tail_call_unrealizable_reason)(CgTarget*, const CGCallDesc*);
    689   /* Return from the function. `value` is the single returned local, or
    690    * CG_LOCAL_NONE for a void return. */
    691   void (*ret)(CgTarget*, CGLocal value);
    692   /* Control terminator marking statically-unreachable code (the C
    693    * __builtin_unreachable point). Like ret/jump it ends the current basic
    694    * block: no fall-through successor is implied. Backends typically emit a
    695    * trap instruction (brk/ud2/ebreak), a Wasm `unreachable`, or a
    696    * `__builtin_unreachable()` in the C-source target; an interpreter faults.
    697    * Distinct from INTRIN_TRAP, which is an expression-level intrinsic that
    698    * does not terminate the block. */
    699   void (*unreachable)(CgTarget*);
    700 
    701   /* ---- alloca ----
    702    * Dynamic stack allocation. `size` is i64 bytes; `align` is the required
    703    * alignment of the returned pointer. Backend grows the (linear-memory or
    704    * native) shadow stack, returns the pointer in `dst`. v1 only emits this
    705    * via __builtin_alloca; C VLAs are not parsed (__STDC_NO_VLA__). */
    706   void (*alloca_)(CgTarget*, Operand dst /*LOCAL*/, Operand size, u32 align);
    707 
    708   /* ---- variadics ----
    709    * va_list type is per-arch (defined in <stdarg.h>); these methods
    710    * implement the four C macros after builtin substitution. ap is always
    711    * passed as &ap. */
    712   void (*va_start_)(CgTarget*, Operand ap_addr);
    713   void (*va_arg_)(CgTarget*, Operand dst /*LOCAL*/, Operand ap_addr,
    714                   KitCgTypeId t);
    715   void (*va_end_)(CgTarget*, Operand ap_addr);
    716   void (*va_copy_)(CgTarget*, Operand dst_ap_addr, Operand src_ap_addr);
    717 
    718   /* ---- atomics ---- */
    719   void (*atomic_load)(CgTarget*, Operand dst /*LOCAL*/, Operand addr, MemAccess,
    720                       KitCgMemOrder);
    721   void (*atomic_store)(CgTarget*, Operand addr, Operand src, MemAccess,
    722                        KitCgMemOrder);
    723   void (*atomic_rmw)(CgTarget*, KitCgAtomicOp,
    724                      Operand dst /*LOCAL: prior value*/, Operand addr,
    725                      Operand val, MemAccess, KitCgMemOrder);
    726   void (*atomic_cas)(CgTarget*, Operand prior /*LOCAL*/,
    727                      Operand ok /*LOCAL, i1*/, Operand addr, Operand expected,
    728                      Operand desired, MemAccess, KitCgMemOrder success,
    729                      KitCgMemOrder failure);
    730   void (*fence)(CgTarget*, KitCgMemOrder);
    731 
    732   /* ---- compiler intrinsics ----
    733    * Typed dispatch for builtins whose lowering is backend-relevant
    734    * (inline-vs-libcall, inline sequence selection) or whose semantics opt
    735    * cares about (hint pattern matching, exhaustiveness). The IR carries
    736    * IR_INTRINSIC + IRIntrinAux.kind; the wrapped target receives the same call
    737    * at lowering time with materialized operands.
    738    *
    739    * Operand shapes by IntrinKind:
    740    *   POPCOUNT/CTZ/CLZ/BSWAP*  : dsts[0] LOCAL result; args[0] LOCAL input
    741    *   MEMCPY/MEMMOVE           : dsts none; args = (dst_addr, src_addr, n)
    742    *   MEMSET                   : dsts none; args = (dst_addr, byte, n)
    743    *   PREFETCH                 : dsts none; args = (addr [, rw [, locality]])
    744    *   ASSUME_ALIGNED           : dsts[0] LOCAL; args = (ptr, align [, offset])
    745    *   EXPECT                   : dsts[0] LOCAL; args = (val, expected)
    746    *   TRAP                     : dsts none; args none
    747    *   SETJMP                   : dsts[0] LOCAL i32 result; args = (&buf)
    748    *   LONGJMP                  : dsts none; args = (&buf, val); no return
    749    *   ADD/SUB/MUL_OVERFLOW     : dsts[0] LOCAL result, dsts[1] LOCAL i1
    750    * overflow; args = (a, b)
    751    *
    752    * Backends that lack an inline sequence for a given kind may emit a
    753    * normal IR_CALL-shaped sequence to a runtime entry (e.g. memcpy) — the
    754    * IR records intent, the backend chooses mechanism. Hint kinds may be
    755    * lowered as no-ops where the arch has nothing to emit. */
    756   void (*intrinsic)(CgTarget*, IntrinKind, Operand* dsts, u32 ndst,
    757                     const Operand* args, u32 narg);
    758 
    759   /* ---- inline asm ----
    760    * Per-arch constraint binding + template assembly, packaged as one block.
    761    *   ins[i] are pre-evaluated input operands.
    762    *   out_ops[i] is filled by the arch with the location holding the result
    763    *     for outs[i]; the caller (cg) reads them out after the call.
    764    *   "=&r" early-clobber outputs must be allocated disjoint from any input.
    765    * opt_cgtarget records this as a single IR_ASM_BLOCK; the wrapped target
    766    * receives the same call at lowering time with materialized operands. */
    767   int (*asm_is_reg_constraint)(CgTarget*, const char* constraint);
    768   void (*asm_block)(CgTarget*, const char* tmpl, const AsmConstraint* outs,
    769                     u32 nout, Operand* out_ops, const AsmConstraint* ins,
    770                     u32 nin, const Operand* in_ops, const Sym* clobbers,
    771                     u32 nclob, u32 clobber_abi_sets);
    772 
    773   /* Optional: handle a top-level `__asm__("...")` block (file scope, not
    774    * inside a function). Backends that leave this NULL fall back to the
    775    * generic asm-parser path through KitCg.mc. Wasm overrides this to
    776    * diagnose-and-fail since the wasm module has no native asm parser. */
    777   void (*file_scope_asm)(CgTarget*, const char* src, size_t len);
    778 
    779   /* ---- source-location tracking ----
    780    * Sets the SrcLoc inherited by subsequent emit-side calls (binop/load/...).
    781    * opt_cgtarget stamps it on every recorded Inst. Sticky until the next
    782    * set_loc. */
    783   void (*set_loc)(CgTarget*, SrcLoc);
    784 
    785   /* ---- end-of-TU hook ----
    786    * No-op for plain target CGTargets. opt_cgtarget runs cross-function passes
    787    * (inlining + cleanup) and lowers all buffered IR functions into the
    788    * wrapped target CgTarget. Drivers must call this after the last func_end and
    789    * before reading from `obj` or calling debug_emit. */
    790   void (*finalize)(CgTarget*);
    791 
    792   void (*destroy)(CgTarget*);
    793 };
    794 
    795 /* Shared switch lowering. cg's kit_cg_switch installs this as the
    796  * default target->switch_ behavior; opt's pass_emit calls it when
    797  * replaying IR_SWITCH against a backend that doesn't override switch_.
    798  * Emits a cmp-and-branch chain over (target->cmp_branch + target->jump)
    799  * — fast at -O0 and the input shape an opt-level jump-table rewrite
    800  * starts from. */
    801 void cg_lower_switch_default(CgTarget* t, const CGSwitchDesc* desc);
    802 
    803 CgTarget* cgtarget_new(Compiler*, ObjBuilder*);
    804 void cgtarget_set_finish_policy(CgTarget*, const CgFinishPolicy*);
    805 void cgtarget_finalize(CgTarget*);
    806 void cgtarget_free(CgTarget*);
    807 
    808 /* A CGBackend is the unit the registry hands out: "give me a CgTarget for
    809  * this Compiler + ObjBuilder + emit options." */
    810 typedef struct CGBackend {
    811   const char* name;
    812   CgTarget* (*make)(Compiler*, ObjBuilder*, const KitCodeOptions*);
    813 } CGBackend;
    814 
    815 /* Pick the right CGBackend for a session given the compiler's target arch
    816  * and the per-emit CodeOptions. Returns NULL when no backend in this build can
    817  * serve the request. */
    818 const CGBackend* cg_backend_for_session(const Compiler*, const KitCodeOptions*);
    819 
    820 /* Human-readable arch name for diagnostics, independent of which backends
    821  * are compiled in (so it can name a target whose backend is disabled). */
    822 const char* arch_kind_name(KitArchKind);
    823 
    824 #endif