cgtarget.h (33533B)
1 #ifndef KIT_CG_CGTARGET_H 2 #define KIT_CG_CGTARGET_H 3 4 #include <kit/cg.h> 5 #include <kit/compile.h> 6 7 #include "core/core.h" 8 #include "obj/obj.h" 9 10 typedef u32 CGLocal; 11 #define CG_LOCAL_NONE 0u 12 13 /* Vector / SIMD forward compat: vector ops will arrive as new variants in 14 * the BinOp, UnOp, CmpOp, ConvKind families. Backend switches over these 15 * enums must use `default:` (unreachable / panic) rather than exhaustive 16 * case lists, so adding a new variant later does not silently mis-handle on 17 * backends that haven't been taught about it. Vector loads/stores reuse the 18 * existing load/store methods with vector-typed Operands and appropriate 19 * MemAccess. */ 20 21 /* Integer/float binary ops. Edge-case semantics are fully defined (no undefined 22 * behavior) in doc/IR.md: iadd/isub/imul (and UO_NEG) wrap modulo 2^width; 23 * sdiv/udiv/srem/urem and the shifts have a portable default plus an opt-in 24 * target-defined mode selected per instruction via CgIrInstFlag (src/cg/ir.h). 25 * FP ops are strict IEEE-754 in the target's default rounding/exception 26 * environment; there is no FP remainder op (the frontend calls fmod). */ 27 typedef enum BinOp { 28 BO_IADD, 29 BO_ISUB, 30 BO_IMUL, 31 BO_SDIV, 32 BO_UDIV, 33 BO_SREM, 34 BO_UREM, 35 BO_FADD, 36 BO_FSUB, 37 BO_FMUL, 38 BO_FDIV, 39 BO_AND, 40 BO_OR, 41 BO_XOR, 42 BO_SHL, 43 BO_SHR_S, 44 BO_SHR_U, 45 } BinOp; 46 47 typedef enum UnOp { 48 UO_NEG, 49 UO_FNEG, 50 UO_NOT, /* logical: 0/1 */ 51 UO_BNOT, /* bitwise ~ */ 52 } UnOp; 53 54 /* Compares producing i1. The 10 integer members (CMP_EQ..CMP_GE_U) are total 55 * and 1:1 with KitCgIntCmpOp; on integers CMP_EQ/CMP_NE are plain equality. 56 * 57 * The 12 floating-point members form a disjoint block laid out *after* the 58 * integer block, in the same order as the public KitCgFpCmpOp, and are 59 * IEEE-complete: each predicate encodes ordered (NaN -> false) vs unordered 60 * (NaN -> true) explicitly, so the distinction reaches every backend. The 61 * identity used throughout the backends is unordered-R == NOT(ordered-not-R) 62 * (e.g. ULT == !(OGE), UNE == !(OEQ)). CMP_OEQ_F is the FP boundary: an op is a 63 * floating compare iff op >= CMP_OEQ_F. */ 64 typedef enum CmpOp { 65 CMP_EQ, 66 CMP_NE, 67 CMP_LT_S, 68 CMP_LE_S, 69 CMP_GT_S, 70 CMP_GE_S, 71 CMP_LT_U, 72 CMP_LE_U, 73 CMP_GT_U, 74 CMP_GE_U, 75 /* Ordered FP relationals (NaN -> false). */ 76 CMP_OEQ_F, 77 CMP_ONE_F, 78 CMP_OLT_F, 79 CMP_OLE_F, 80 CMP_OGT_F, 81 CMP_OGE_F, 82 /* Unordered FP relationals (NaN -> true). */ 83 CMP_UEQ_F, 84 CMP_UNE_F, 85 CMP_ULT_F, 86 CMP_ULE_F, 87 CMP_UGT_F, 88 CMP_UGE_F, 89 } CmpOp; 90 91 /* Conversions. Widths must order correctly (sext/zext widen, trunc narrows, 92 * bitcast preserves byte size). itof, fext, and ftrunc round to nearest-even; 93 * ftoi_s/ftoi_u round toward zero with a portable saturating out-of-range 94 * default (NaN -> 0) and an opt-in target-defined mode 95 * (CG_IR_INST_TARGET_FPTOINT_EDGES in src/cg/ir.h). Full rules in doc/IR.md. */ 96 typedef enum ConvKind { 97 CV_SEXT, 98 CV_ZEXT, 99 CV_TRUNC, 100 CV_ITOF_S, 101 CV_ITOF_U, 102 CV_FTOI_S, 103 CV_FTOI_U, 104 CV_FEXT, 105 CV_FTRUNC, 106 CV_BITCAST, 107 } ConvKind; 108 109 /* Atomic op kinds (KitCgAtomicOp) and memory orders (KitCgMemOrder) come 110 * straight from the public API. Which orders are legal depends on the atomic 111 * op: load excludes release/acq_rel; store excludes acquire/consume/acq_rel; 112 * CAS failure order is one of relaxed/consume/acquire/seq_cst and no stronger 113 * than success. See the Atomics edge-case rules in doc/IR.md (mirrored by 114 * kit_cg_atomic_is_legal). */ 115 116 /* Compiler-intrinsic kinds dispatched through CgTarget.intrinsic and carried 117 * on IR_INTRINSIC via IRIntrinAux.kind. The set is bounded: a backend 118 * must know each one to choose inline-vs-libcall. Hint intrinsics 119 * (EXPECT/TRAP/PREFETCH/ASSUME_ALIGNED) ride the same dispatch: 120 * the backend decides whether they emit an instruction or a no-op. 121 * `unreachable` is NOT here: it is a first-class control terminator with 122 * its own CgTarget hook (see below), not an intrinsic. 123 * 124 * Not every C builtin lives here. Parser-evaluated builtins 125 * (__builtin_offsetof, __builtin_constant_p, __builtin_choose_expr, 126 * __builtin_types_compatible_p) fold at parse and never reach IR. Builtins 127 * that already have dedicated CgTarget methods (alloca, va_*, atomics) keep 128 * them. Returns-twice and no-return control intrinsics use this dispatch so 129 * opt can preserve their CFG effects without growing backend vtable hooks. */ 130 typedef enum IntrinKind { 131 INTRIN_NONE = 0, 132 133 /* bit ops */ 134 INTRIN_POPCOUNT, 135 INTRIN_CTZ, 136 INTRIN_CLZ, 137 INTRIN_BSWAP, 138 139 /* memory. memcpy/memset are the dedicated copy_bytes/set_bytes hooks 140 * (kit_cg_memcpy/_memset); only memmove flows through the intrinsic path. */ 141 INTRIN_MEMMOVE, 142 INTRIN_PREFETCH, 143 INTRIN_ASSUME_ALIGNED, 144 145 /* hints */ 146 INTRIN_EXPECT, 147 INTRIN_TRAP, 148 149 /* OS trap: args[0] is the syscall number, args[1..6] are integer/pointer 150 * payloads; dsts[0] receives the target long result. */ 151 INTRIN_SYSCALL, 152 153 /* non-local control */ 154 INTRIN_SETJMP, 155 INTRIN_LONGJMP, 156 157 /* checked arith — multi-result (value, overflow_flag) */ 158 INTRIN_SADD_OVERFLOW, 159 INTRIN_UADD_OVERFLOW, 160 INTRIN_SSUB_OVERFLOW, 161 INTRIN_USUB_OVERFLOW, 162 INTRIN_SMUL_OVERFLOW, 163 INTRIN_UMUL_OVERFLOW, 164 165 /* baremetal CPU control — single-instruction, no operands unless noted. 166 * dsts/args empty except IRQ_SAVE (dsts[0] = saved interrupt state) and 167 * IRQ_RESTORE (args[0] = state to restore). Privileged forms (WFI/WFE/SEV 168 * and the IRQ family) trap at user level; backends still emit the one 169 * instruction and frontends gate any runtime use behind a capability test. */ 170 INTRIN_CPU_NOP, 171 INTRIN_CPU_YIELD, 172 INTRIN_WFI, 173 INTRIN_WFE, 174 INTRIN_SEV, 175 INTRIN_ISB, 176 INTRIN_DMB, 177 INTRIN_DSB, 178 INTRIN_IRQ_SAVE, 179 INTRIN_IRQ_RESTORE, 180 INTRIN_IRQ_ENABLE, 181 INTRIN_IRQ_DISABLE, 182 183 /* frame-pointer-chain introspection — value-producing, single immediate 184 * operand (the constant level). args[0] is the level (OPK_IMM); dsts[0] is 185 * the void* result. Lowered as an unrolled FP walk; modeled as an ordinary 186 * frame-dependent memory read (IR_INTRINSIC is already conservatively 187 * side-effecting in opt, so it is never hoisted, CSE'd, or eliminated). */ 188 INTRIN_FRAME_ADDRESS, 189 INTRIN_RETURN_ADDRESS, 190 } IntrinKind; 191 192 typedef enum OpKind { 193 OPK_IMM, 194 OPK_LOCAL, /* typed semantic local */ 195 OPK_GLOBAL, /* address: symbol+addend, not a load */ 196 OPK_INDIRECT, /* [local + ofs], with optional indexed local */ 197 } OpKind; 198 199 typedef enum CGLocalFlag { 200 CG_LOCAL_FLAG_NONE = 0, 201 CG_LOCAL_ADDR_TAKEN = 1u << 0, 202 CG_LOCAL_MEMORY_REQUIRED = 1u << 1, 203 } CGLocalFlag; 204 205 typedef struct CGLocalDesc { 206 KitCgTypeId type; 207 Sym name; 208 SrcLoc loc; 209 u32 size; 210 u32 align; 211 u32 flags; /* CGLocalFlag */ 212 } CGLocalDesc; 213 214 typedef enum MemFlag { 215 MF_NONE = 0, 216 MF_VOLATILE = 1u << 0, 217 MF_ATOMIC = 1u << 1, 218 MF_RESTRICT = 1u << 2, 219 MF_READONLY = 1u << 3, 220 MF_WRITEONLY = 1u << 4, 221 MF_UNALIGNED = 1u << 5, 222 } MemFlag; 223 224 typedef enum AliasKind { 225 ALIAS_UNKNOWN, 226 ALIAS_LOCAL, 227 ALIAS_GLOBAL, 228 ALIAS_PARAM, 229 ALIAS_HEAP, 230 ALIAS_STRING, 231 } AliasKind; 232 233 typedef struct AliasRoot { 234 u8 kind; /* AliasKind */ 235 u8 pad[3]; 236 union { 237 i32 local_id; 238 ObjSymId global; 239 u32 param_idx; 240 Sym string_id; 241 } v; 242 } AliasRoot; 243 244 typedef struct MemAccess { 245 KitCgTypeId type; /* codegen object type accessed */ 246 u32 size; /* ABI byte size of this access (storage-unit size for a 247 * bit-field) */ 248 u32 align; /* known byte alignment; 0 means unknown */ 249 u16 flags; /* MemFlag */ 250 u16 addr_space; 251 /* Bit-field rider: when bf_width != 0 this access is a bit-field, so `load` 252 * extracts (shift+mask+extend) and `store` inserts (read-modify-write) within 253 * the storage unit described by {type,size}. The CgTarget impls translate 254 * this to the physical NativeTarget bitfield_load/store (or the recorder IR 255 * op); the semantic CgTarget no longer carries a separate bit-field method. 256 */ 257 u16 bf_offset; /* target-endian bit offset within the storage unit */ 258 u16 bf_width; /* 0 => not a bit-field access */ 259 u8 bf_signed; /* signed extraction on load */ 260 u8 bf_pad[3]; 261 AliasRoot alias; 262 } MemAccess; 263 264 typedef struct ConstBytes { 265 KitCgTypeId type; 266 const u8* bytes; /* ABI representation, little/big endian per target */ 267 u32 size; 268 u32 align; 269 } ConstBytes; 270 271 typedef struct AggregateAccess { 272 KitCgTypeId type; 273 u32 size; 274 u32 align; 275 MemAccess mem; 276 } AggregateAccess; 277 278 typedef struct BitFieldAccess { 279 KitCgTypeId field_type; 280 MemAccess storage; 281 u32 storage_offset; /* byte offset from record base */ 282 u16 bit_offset; /* target-endian bit offset within storage unit */ 283 u16 bit_width; /* may be 0 for zero-width layout barriers */ 284 u8 signed_; 285 u8 pad[3]; 286 } BitFieldAccess; 287 288 /* Reconstruct the BitFieldAccess a CgTarget impl needs from the bit-field 289 * MemAccess that rides the generic load/store (bf_width != 0). The storage unit 290 * is {m.type, m.size}; the bit geometry is the bf_* rider. */ 291 static inline BitFieldAccess bf_from_mem(MemAccess m) { 292 BitFieldAccess bf = {0}; 293 bf.field_type = m.type; 294 bf.storage = m; 295 bf.storage.bf_offset = 0; 296 bf.storage.bf_width = 0; 297 bf.storage.bf_signed = 0; 298 bf.bit_offset = m.bf_offset; 299 bf.bit_width = m.bf_width; 300 bf.signed_ = m.bf_signed; 301 return bf; 302 } 303 304 typedef struct Operand { 305 u8 kind; 306 u8 pad[3]; 307 KitCgTypeId type; 308 union { 309 i64 imm; 310 CGLocal local; 311 struct { 312 ObjSymId sym; 313 i64 addend; 314 } global; 315 struct { 316 CGLocal base; 317 CGLocal index; /* CG_LOCAL_NONE when no index operand */ 318 u8 log2_scale; /* 0..3 -> 1/2/4/8 bytes; ignored when no index */ 319 i32 ofs; 320 } ind; 321 } v; 322 } Operand; 323 324 typedef struct CGParamDesc { 325 u32 index; 326 Sym name; 327 KitCgTypeId type; 328 u32 size; 329 u32 align; 330 u32 flags; /* CGLocalFlag */ 331 SrcLoc loc; 332 } CGParamDesc; 333 334 /* text_section_id and group_id are per-function so that -ffunction-sections, 335 * __attribute__((section)) on functions, and COMDAT for C11 inline-with- 336 * external-definition all work with no extra plumbing. Decl.section_id already 337 * carries the user's request; CG/decl decides the section name policy 338 * (default .text, vs .text.<sym> under -ffunction-sections, vs explicit 339 * attribute). The backend just writes to the named section. */ 340 /* Phase 2 attribute-derived hints. The backends are free to ignore these; 341 * they exist so the parser can communicate _Noreturn / __attribute__ 342 * info down to CG without forcing every backend to consult the Decl. */ 343 typedef enum CGFuncDescFlag { 344 CGFD_NONE = 0, 345 CGFD_NORETURN = 1u << 0, 346 } CGFuncDescFlag; 347 348 typedef struct CGFuncDesc { 349 ObjSymId sym; 350 ObjSecId text_section_id; 351 ObjGroupId group_id; /* OBJ_GROUP_NONE if none */ 352 KitCgTypeId fn_type; 353 KitCgTypeId result_type; /* KIT_CG_TYPE_NONE/void == no result */ 354 const CGParamDesc* params; 355 u32 nparams; 356 SrcLoc loc; 357 u32 flags; /* CGFuncDescFlag */ 358 KitCgInlinePolicy inline_policy; 359 u16 sym_bind; /* SymBind */ 360 u16 sym_kind; /* SymKind */ 361 u8 sym_vis; /* SymVis */ 362 u8 atomize; 363 u8 pad[2]; 364 } CGFuncDesc; 365 366 typedef enum CGCallFlag { 367 CG_CALL_NONE = 0, 368 /* Sibling call. The target emits a tail-position call and does NOT emit a 369 * return-style continuation. CG will not invoke target->ret afterwards. 370 * 371 * Realizability is verified before this flag is set: CG only sets it after 372 * tail_call_unrealizable_reason() returns NULL for the same desc and call 373 * state, so the target can emit the sibling call unconditionally. The 374 * target may assert/compiler_panic if the flag is set on an unrealizable 375 * desc, but that is an internal-consistency check — fallback and 376 * diagnostics for unrealizable tail calls are CG's responsibility, not the 377 * target's. */ 378 CG_CALL_TAIL = 1u << 0, 379 } CGCallFlag; 380 381 typedef struct CGCallDesc { 382 KitCgTypeId fn_type; 383 Operand callee; 384 const CGLocal* args; 385 CGLocal result; /* CG_LOCAL_NONE == void callee (no result) */ 386 u32 nargs; 387 u16 flags; /* CGCallFlag */ 388 u8 tail_policy; /* KitCgTailPolicy; meaningful when CG_CALL_TAIL is set. 389 * The opt recorder accepts every tail and preserves this so 390 * the replay can pick: emit tail (realizable), fall back to 391 * call+ret (ALLOWED), or diagnose (MUST). */ 392 u8 pad; 393 KitCgInlinePolicy inline_policy; 394 } CGCallDesc; 395 396 typedef u32 Label; 397 #define LABEL_NONE 0 398 399 typedef enum ScopeKind { 400 SCOPE_BLOCK, /* break exits forward */ 401 SCOPE_LOOP, /* break exits forward; continue uses explicit target */ 402 } ScopeKind; 403 404 typedef u32 CGScope; 405 #define CG_SCOPE_NONE 0u 406 407 typedef struct CGScopeDesc { 408 u8 kind; /* ScopeKind */ 409 u8 pad[3]; 410 Label break_label; /* explicit target for break; LABEL_NONE => target creates 411 one */ 412 Label continue_label; /* explicit target for continue; LABEL_NONE for 413 non-loops */ 414 KitCgTypeId result_type; /* reserved for structured expression results */ 415 } CGScopeDesc; 416 417 typedef struct AsmConstraint { 418 const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */ 419 Sym name; /* GCC `[name]` symbolic operand; 0 if absent */ 420 KitCgTypeId type; /* codegen type of the bound expression (output lvalue or 421 input rvalue). Drives type width for the binder. 422 NULL only for hand-built test constraints (binder 423 falls back to a 64-bit int default). */ 424 Sym reg; /* Explicit hard-register name ("r10"/"x8"/...) this operand 425 must occupy — a GNU local register variable bound as an 426 operand; 0 = unconstrained. Only the target's register 427 file resolves the name to a physical register. */ 428 u8 dir; /* KitCgAsmDir */ 429 u8 pad[3]; 430 } AsmConstraint; 431 432 typedef struct CGSwitchCase { 433 /* Bit pattern matched against the selector; interpreted using 434 * selector_type's width and signedness (signed comparison uses 435 * sign-extension to selector_type's width). */ 436 u64 value; 437 Label label; 438 } CGSwitchCase; 439 440 typedef struct CGSwitchDesc { 441 Operand selector; /* OPK_LOCAL or OPK_IMM */ 442 KitCgTypeId selector_type; 443 Label default_label; /* LABEL_NONE means "fall through past the switch" */ 444 const CGSwitchCase* cases; 445 u32 ncases; 446 u8 hint; /* KitCgSwitchHint */ 447 u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */ 448 u8 pad[2]; 449 } CGSwitchDesc; 450 451 typedef struct CGLocalStaticDataDesc { 452 ObjSymId sym; 453 KitCgTypeId type; 454 KitCgDataDefAttrs attrs; 455 u32 align; 456 } CGLocalStaticDataDesc; 457 458 typedef enum CGDebugLocKind { 459 CG_DEBUG_LOC_NONE, 460 CG_DEBUG_LOC_FRAME, 461 CG_DEBUG_LOC_REG, 462 CG_DEBUG_LOC_GLOBAL, 463 } CGDebugLocKind; 464 465 typedef struct CGDebugLoc { 466 u8 kind; /* CGDebugLocKind */ 467 u8 pad[3]; 468 union { 469 /* Offset in the same target-defined frame-base coordinate system that the 470 * target/debugger pair uses to materialize frame-relative variables. CG 471 * treats this as opaque target data and only maps it into the debug 472 * producer's generic frame-location form. */ 473 i32 frame_ofs; 474 u32 reg; 475 ObjSymId global; 476 } v; 477 } CGDebugLoc; 478 479 /* Forward-declared (same as arch/mc.h) so a CgTarget can carry an optional 480 * Debug producer without this header depending on debug/debug.h. */ 481 typedef struct Debug Debug; 482 483 typedef struct CgFinishPolicy { 484 u8 output_kind; /* KitCgOutputKind */ 485 u8 interposition_policy; /* KitCgInterpositionPolicy */ 486 u8 pad[2]; 487 const ObjSymId* preserved_symbols; 488 u32 npreserved_symbols; 489 } CgFinishPolicy; 490 491 typedef struct CgTarget CgTarget; 492 struct CgTarget { 493 /* Typed IR lowering context. Subclasses extend. */ 494 Compiler* c; 495 ObjBuilder* obj; 496 497 /* Optional DWARF producer, created by the backend's `make` when 498 * opts->debug_info is set (else NULL). The session reads this back into 499 * its own g->debug to drive func/line/emit; the backend's MCEmitter 500 * shares the same object for line-row emission. */ 501 Debug* debug; 502 503 CgFinishPolicy finish_policy; 504 505 /* ---- function lifecycle ---- */ 506 void (*func_begin)(CgTarget*, const CGFuncDesc*); 507 void (*func_end)(CgTarget*); 508 509 /* Symbol-aliasing hook. Optional (may be NULL). cg invokes this from 510 * kit_cg_alias after the obj symbol-table mirror is wired so the 511 * backend can emit any out-of-band representation it needs — e.g. the 512 * C-source target writes 513 * `T alias_sym(...) __attribute__((alias("target")));` 514 * because the alias relationship isn't expressible by sharing a 515 * (section, value) pair the way a relocatable object can. Native 516 * machine-code backends don't need this hook because obj_symbol_define 517 * already aliases the bytes. `type` is the alias's CG type (function 518 * or object), needed by the C target to render the prototype. */ 519 void (*alias)(CgTarget*, ObjSymId alias_sym, ObjSymId target_sym, 520 KitCgTypeId type); 521 522 /* ---- locals ---- */ 523 CGLocal (*local)(CgTarget*, const CGLocalDesc*); 524 void (*local_addr)(CgTarget*, Operand dst, const CGLocalDesc*, CGLocal); 525 CGLocal (*param)(CgTarget*, const CGParamDesc*); 526 /* Optional debug-info query after function frame layout is finalized. 527 * Targets return a target-authored location for semantic local storage; CG 528 * owns deciding which source locals/params get emitted and translating the 529 * target-neutral CGDebugLoc into the debug producer API. */ 530 int (*local_debug_loc)(CgTarget*, CGLocal, CGDebugLoc*); 531 532 /* ---- labels and control flow ---- */ 533 Label (*label_new)(CgTarget*); 534 void (*label_place)(CgTarget*, Label); 535 void (*jump)(CgTarget*, Label); 536 /* Fused compare-and-branch. cg's preferred form: avoids materializing 0/1 537 * for a normal `if (a < b)`. For an arbitrary i1 in a local, callers 538 * synthesize cmp_branch(CMP_NE, val, IMM_ZERO, label). */ 539 void (*cmp_branch)(CgTarget*, CmpOp, Operand a, Operand b, Label); 540 541 /* Structured switch dispatch. 542 * 543 * Optional: when NULL, cg's shared `cg_lower_switch_default` runs and 544 * lowers in terms of cmp_branch / jump / indirect_branch / data ops — 545 * the path every native arch uses. Backends override switch_ only when 546 * they can express the construct natively: the C-source target emits 547 * `switch (val) { case V: goto L_V; ... default: goto L_def; }`; a 548 * future WASM target would emit `br_table`. 549 * 550 * The descriptor carries the full structured form (selector + paired 551 * cases + default + frontend hint); density policy lives in 552 * cg_lower_switch_default. */ 553 void (*switch_)(CgTarget*, const CGSwitchDesc*); 554 555 /* Optional. When non-NULL and it returns 0, the target cannot realize a 556 * jump-table dispatch built from a rodata table of code-label addresses 557 * (Wasm: linear memory holds no code addresses and there is no computed 558 * branch). kit_cg_switch then routes dense/forced-table plans through 559 * `switch_` (e.g. br_table) instead of the label-table + indirect_branch 560 * lowering. NULL means the label-table path is supported (every native 561 * arch). */ 562 int (*supports_label_table)(CgTarget*); 563 564 /* Indirect branch primitive: transfer control to the address in 565 * `addr` (an OPK_LOCAL holding a function-local label address). 566 * 567 * Required on every native arch and used by: 568 * - kit_cg_computed_goto for direct-threaded dispatch 569 * - opt-level jump-table lowerings of IR_SWITCH (when implemented) 570 * 571 * `valid_targets[0..ntargets)` is the closed set of labels the address 572 * can resolve to. Backends use it for branch-target hardening (BTI, 573 * PAC, x86 CFG, IBT) and opt uses it to build the CFG; opt requires 574 * ntargets > 0. */ 575 void (*indirect_branch)(CgTarget*, Operand addr, const Label* valid_targets, 576 u32 ntargets); 577 578 /* Materialize the runtime address of a function-local label into 579 * `dst`. The label must already exist (label_new); it does not 580 * need to be placed yet. Backends emit the target's relative address 581 * materialization: 582 * x86_64 `lea L(%rip), %r`, aarch64 `adr X, L`, riscv `auipc/addi`. 583 * 584 * The resulting pointer is a function-local label address (per the 585 * public kit_cg_push_label_addr contract) and must only be consumed 586 * by indirect_branch inside the defining function's activation. */ 587 void (*load_label_addr)(CgTarget*, Operand dst, Label label); 588 589 /* Optional source-backend hook for function-local static data definitions 590 * that need function label scope, currently used for C `&&label` 591 * dispatch-table initializers. Returning non-zero from begin means the 592 * target consumes bytes/zeros/label addresses until end; ordinary object 593 * data emission is skipped for that definition. */ 594 int (*local_static_data_begin)(CgTarget*, const CGLocalStaticDataDesc*); 595 /* data == NULL means append len zero bytes. */ 596 void (*local_static_data_write)(CgTarget*, const u8* data, u64 len); 597 void (*local_static_data_label_addr)(CgTarget*, Label target, i64 addend, 598 u32 width, u32 address_space); 599 void (*local_static_data_end)(CgTarget*); 600 601 /* Optional. When non-NULL, kit_cg_data_label_addr panics with the 602 * returned target-specific message before reaching object-data emission. Lets 603 * targets that cannot resolve function-local label addresses in 604 * static-data initializers (e.g. the Wasm backend) fail with a 605 * recognizable, target-prefixed diagnostic. The returned string must remain 606 * valid for the lifetime of the panic call (string literals are typical). */ 607 const char* (*data_label_addr_unsupported_msg)(CgTarget*); 608 609 /* ---- structured control flow ---- 610 * Mirrors CG's scope ops. CG passes explicit break/continue targets so C 611 * `for` continues can land on the increment expression rather than the loop 612 * header. Real backends shim these onto label_new/label_place/jump. 613 * The WASM backend consumes them natively to emit block/loop with 614 * structurally-bounded br targets, which is what gives WASM its CFI. 615 * 616 * `result_type` is reserved for structured expression results on WASM (NULL 617 * for the statement case used by C); other backends ignore it. */ 618 CGScope (*scope_begin)(CgTarget*, const CGScopeDesc*); 619 void (*scope_end)(CgTarget*, CGScope); 620 void (*break_to)(CgTarget*, CGScope); 621 void (*continue_to)(CgTarget*, CGScope); 622 623 /* ---- data movement (split, no overloading) ---- */ 624 void (*load_imm)(CgTarget*, Operand dst /*LOCAL*/, i64 imm); 625 void (*load_const)(CgTarget*, Operand dst /*LOCAL*/, ConstBytes); 626 void (*copy)(CgTarget*, Operand dst /*LOCAL*/, Operand src /*LOCAL*/); 627 void (*load)(CgTarget*, Operand dst /*LOCAL*/, 628 Operand addr /*LOCAL|GLOBAL|INDIRECT*/, MemAccess); 629 void (*store)(CgTarget*, Operand addr /*LOCAL|GLOBAL|INDIRECT*/, 630 Operand src /*LOCAL|IMM*/, MemAccess); 631 void (*addr_of)(CgTarget*, Operand dst /*LOCAL*/, 632 Operand lv /*LOCAL|GLOBAL|INDIRECT*/); 633 /* Materializes the address of a thread-local symbol into `dst`. Distinct 634 * from addr_of because TLS resolution can be a multi-instruction sequence 635 * or a runtime call (e.g. GD model), not a cheap addressing mode. The 636 * backend chooses the TLS model (LE/IE/LD/GD) from c->target and the 637 * symbol's visibility. Subsequent accesses go through OPK_INDIRECT on the 638 * resulting pointer; this lets opt hoist the materialization via LICM. */ 639 void (*tls_addr_of)(CgTarget*, Operand dst /*LOCAL*/, ObjSymId sym, 640 i64 addend); 641 void (*copy_bytes)(CgTarget*, Operand dst_addr, Operand src_addr, 642 AggregateAccess); 643 void (*set_bytes)(CgTarget*, Operand dst_addr, Operand byte_value, 644 AggregateAccess); 645 /* Bit-fields are not a separate CgTarget method: a bit-field load/store rides 646 * the generic `load`/`store` above with a bit-field MemAccess (bf_width != 647 * 0). Each CgTarget impl translates it (NativeDirectTarget -> NativeTarget's 648 * bitfield_load/store; IrRecorder -> CG_IR_BITFIELD_LOAD/STORE). */ 649 650 /* ---- arithmetic, compare, convert ---- 651 * binop/unop/cmp accept OPK_LOCAL or OPK_IMM in source operand positions 652 * (`a`, `b`); `dst` is always OPK_LOCAL. The backend chooses between an 653 * imm-form encoding and materializing the literal into a scratch 654 * local based on whether the value fits the instruction's imm 655 * field. FP binops and UO_FNEG require local sources — FP literals reach the 656 * value stack through load_const into OPK_LOCAL. cg and opt's machinize/emit 657 * both rely on this contract to pass small constants through without 658 * burning a value-stack local on materialization. */ 659 void (*binop)(CgTarget*, BinOp, Operand dst /*LOCAL*/, 660 Operand a /*LOCAL|IMM*/, Operand b /*LOCAL|IMM*/); 661 void (*unop)(CgTarget*, UnOp, Operand dst /*LOCAL*/, Operand a /*LOCAL|IMM*/); 662 void (*cmp)(CgTarget*, CmpOp, Operand dst /*LOCAL*/, Operand a /*LOCAL|IMM*/, 663 Operand b /*LOCAL|IMM*/); /* materialize 0/1 */ 664 void (*convert)(CgTarget*, ConvKind, Operand dst, Operand src); 665 666 /* ---- calls / return ---- 667 * CGCallDesc carries the type-checked signature, semantic callee operand, 668 * local arguments, and local result destinations. The semantic target does 669 * not expose calling-convention lowering; native targets derive physical 670 * argument/return placement from fn_type and local metadata internally. 671 * `result` is the single local destination, or CG_LOCAL_NONE for void. */ 672 void (*call)(CgTarget*, const CGCallDesc*); 673 /* Pure query: can `d` be emitted as a sibling (tail) call on this target, 674 * given the current target state? Returns NULL if yes; otherwise a short, 675 * static, human-readable string naming the blocker, used verbatim in the 676 * musttail diagnostic. Must not emit code and must not abort. 677 * 678 * Realizable means the target can transfer control to the callee while 679 * preserving the source-level call/return semantics of this function. CG 680 * verifies type compatibility before setting CG_CALL_TAIL; target-specific 681 * blockers such as variadic lowering, frame teardown constraints, or 682 * unavailable tail-call support are reported here. 683 * 684 * CG owns the tail policy: it calls this first and only sets CG_CALL_TAIL 685 * when it returns NULL, so a NULL result must guarantee a later call() with 686 * CG_CALL_TAIL can emit the sibling call. May itself be NULL, meaning the 687 * target supports no tail calls at all. */ 688 const char* (*tail_call_unrealizable_reason)(CgTarget*, const CGCallDesc*); 689 /* Return from the function. `value` is the single returned local, or 690 * CG_LOCAL_NONE for a void return. */ 691 void (*ret)(CgTarget*, CGLocal value); 692 /* Control terminator marking statically-unreachable code (the C 693 * __builtin_unreachable point). Like ret/jump it ends the current basic 694 * block: no fall-through successor is implied. Backends typically emit a 695 * trap instruction (brk/ud2/ebreak), a Wasm `unreachable`, or a 696 * `__builtin_unreachable()` in the C-source target; an interpreter faults. 697 * Distinct from INTRIN_TRAP, which is an expression-level intrinsic that 698 * does not terminate the block. */ 699 void (*unreachable)(CgTarget*); 700 701 /* ---- alloca ---- 702 * Dynamic stack allocation. `size` is i64 bytes; `align` is the required 703 * alignment of the returned pointer. Backend grows the (linear-memory or 704 * native) shadow stack, returns the pointer in `dst`. v1 only emits this 705 * via __builtin_alloca; C VLAs are not parsed (__STDC_NO_VLA__). */ 706 void (*alloca_)(CgTarget*, Operand dst /*LOCAL*/, Operand size, u32 align); 707 708 /* ---- variadics ---- 709 * va_list type is per-arch (defined in <stdarg.h>); these methods 710 * implement the four C macros after builtin substitution. ap is always 711 * passed as &ap. */ 712 void (*va_start_)(CgTarget*, Operand ap_addr); 713 void (*va_arg_)(CgTarget*, Operand dst /*LOCAL*/, Operand ap_addr, 714 KitCgTypeId t); 715 void (*va_end_)(CgTarget*, Operand ap_addr); 716 void (*va_copy_)(CgTarget*, Operand dst_ap_addr, Operand src_ap_addr); 717 718 /* ---- atomics ---- */ 719 void (*atomic_load)(CgTarget*, Operand dst /*LOCAL*/, Operand addr, MemAccess, 720 KitCgMemOrder); 721 void (*atomic_store)(CgTarget*, Operand addr, Operand src, MemAccess, 722 KitCgMemOrder); 723 void (*atomic_rmw)(CgTarget*, KitCgAtomicOp, 724 Operand dst /*LOCAL: prior value*/, Operand addr, 725 Operand val, MemAccess, KitCgMemOrder); 726 void (*atomic_cas)(CgTarget*, Operand prior /*LOCAL*/, 727 Operand ok /*LOCAL, i1*/, Operand addr, Operand expected, 728 Operand desired, MemAccess, KitCgMemOrder success, 729 KitCgMemOrder failure); 730 void (*fence)(CgTarget*, KitCgMemOrder); 731 732 /* ---- compiler intrinsics ---- 733 * Typed dispatch for builtins whose lowering is backend-relevant 734 * (inline-vs-libcall, inline sequence selection) or whose semantics opt 735 * cares about (hint pattern matching, exhaustiveness). The IR carries 736 * IR_INTRINSIC + IRIntrinAux.kind; the wrapped target receives the same call 737 * at lowering time with materialized operands. 738 * 739 * Operand shapes by IntrinKind: 740 * POPCOUNT/CTZ/CLZ/BSWAP* : dsts[0] LOCAL result; args[0] LOCAL input 741 * MEMCPY/MEMMOVE : dsts none; args = (dst_addr, src_addr, n) 742 * MEMSET : dsts none; args = (dst_addr, byte, n) 743 * PREFETCH : dsts none; args = (addr [, rw [, locality]]) 744 * ASSUME_ALIGNED : dsts[0] LOCAL; args = (ptr, align [, offset]) 745 * EXPECT : dsts[0] LOCAL; args = (val, expected) 746 * TRAP : dsts none; args none 747 * SETJMP : dsts[0] LOCAL i32 result; args = (&buf) 748 * LONGJMP : dsts none; args = (&buf, val); no return 749 * ADD/SUB/MUL_OVERFLOW : dsts[0] LOCAL result, dsts[1] LOCAL i1 750 * overflow; args = (a, b) 751 * 752 * Backends that lack an inline sequence for a given kind may emit a 753 * normal IR_CALL-shaped sequence to a runtime entry (e.g. memcpy) — the 754 * IR records intent, the backend chooses mechanism. Hint kinds may be 755 * lowered as no-ops where the arch has nothing to emit. */ 756 void (*intrinsic)(CgTarget*, IntrinKind, Operand* dsts, u32 ndst, 757 const Operand* args, u32 narg); 758 759 /* ---- inline asm ---- 760 * Per-arch constraint binding + template assembly, packaged as one block. 761 * ins[i] are pre-evaluated input operands. 762 * out_ops[i] is filled by the arch with the location holding the result 763 * for outs[i]; the caller (cg) reads them out after the call. 764 * "=&r" early-clobber outputs must be allocated disjoint from any input. 765 * opt_cgtarget records this as a single IR_ASM_BLOCK; the wrapped target 766 * receives the same call at lowering time with materialized operands. */ 767 int (*asm_is_reg_constraint)(CgTarget*, const char* constraint); 768 void (*asm_block)(CgTarget*, const char* tmpl, const AsmConstraint* outs, 769 u32 nout, Operand* out_ops, const AsmConstraint* ins, 770 u32 nin, const Operand* in_ops, const Sym* clobbers, 771 u32 nclob, u32 clobber_abi_sets); 772 773 /* Optional: handle a top-level `__asm__("...")` block (file scope, not 774 * inside a function). Backends that leave this NULL fall back to the 775 * generic asm-parser path through KitCg.mc. Wasm overrides this to 776 * diagnose-and-fail since the wasm module has no native asm parser. */ 777 void (*file_scope_asm)(CgTarget*, const char* src, size_t len); 778 779 /* ---- source-location tracking ---- 780 * Sets the SrcLoc inherited by subsequent emit-side calls (binop/load/...). 781 * opt_cgtarget stamps it on every recorded Inst. Sticky until the next 782 * set_loc. */ 783 void (*set_loc)(CgTarget*, SrcLoc); 784 785 /* ---- end-of-TU hook ---- 786 * No-op for plain target CGTargets. opt_cgtarget runs cross-function passes 787 * (inlining + cleanup) and lowers all buffered IR functions into the 788 * wrapped target CgTarget. Drivers must call this after the last func_end and 789 * before reading from `obj` or calling debug_emit. */ 790 void (*finalize)(CgTarget*); 791 792 void (*destroy)(CgTarget*); 793 }; 794 795 /* Shared switch lowering. cg's kit_cg_switch installs this as the 796 * default target->switch_ behavior; opt's pass_emit calls it when 797 * replaying IR_SWITCH against a backend that doesn't override switch_. 798 * Emits a cmp-and-branch chain over (target->cmp_branch + target->jump) 799 * — fast at -O0 and the input shape an opt-level jump-table rewrite 800 * starts from. */ 801 void cg_lower_switch_default(CgTarget* t, const CGSwitchDesc* desc); 802 803 CgTarget* cgtarget_new(Compiler*, ObjBuilder*); 804 void cgtarget_set_finish_policy(CgTarget*, const CgFinishPolicy*); 805 void cgtarget_finalize(CgTarget*); 806 void cgtarget_free(CgTarget*); 807 808 /* A CGBackend is the unit the registry hands out: "give me a CgTarget for 809 * this Compiler + ObjBuilder + emit options." */ 810 typedef struct CGBackend { 811 const char* name; 812 CgTarget* (*make)(Compiler*, ObjBuilder*, const KitCodeOptions*); 813 } CGBackend; 814 815 /* Pick the right CGBackend for a session given the compiler's target arch 816 * and the per-emit CodeOptions. Returns NULL when no backend in this build can 817 * serve the request. */ 818 const CGBackend* cg_backend_for_session(const Compiler*, const KitCodeOptions*); 819 820 /* Human-readable arch name for diagnostics, independent of which backends 821 * are compiled in (so it can name a target whose backend is disabled). */ 822 const char* arch_kind_name(KitArchKind); 823 824 #endif