native_target.h (27597B)
1 #ifndef KIT_ARCH_NATIVE_TARGET_H 2 #define KIT_ARCH_NATIVE_TARGET_H 3 4 #include <string.h> 5 6 #include "arch/mc.h" 7 #include "cg/cgtarget.h" 8 #include "cg/type.h" 9 #include "core/core.h" 10 #include "core/slice.h" /* Slice, for resolve_name */ 11 12 /* NativeTarget is the physical native-emission contract. It is driven after 13 * semantic CG has been either direct-lowered by NativeDirectTarget or recorded, 14 * optimized, machinized, and allocated. It must not speak in semantic CGLocal 15 * ids except where a descriptor is carried for diagnostics or ABI queries. */ 16 17 typedef u32 NativeFrameSlot; 18 #define NATIVE_FRAME_SLOT_NONE 0u 19 20 typedef enum NativeFrameSlotKind { 21 NATIVE_FRAME_SLOT_LOCAL, 22 NATIVE_FRAME_SLOT_PARAM, 23 NATIVE_FRAME_SLOT_SPILL, 24 NATIVE_FRAME_SLOT_ALLOCA, 25 NATIVE_FRAME_SLOT_OUTGOING, 26 NATIVE_FRAME_SLOT_SAVE, 27 } NativeFrameSlotKind; 28 29 typedef enum NativeFrameSlotFlag { 30 NATIVE_FRAME_SLOT_NONE_FLAG = 0, 31 NATIVE_FRAME_SLOT_ADDR_TAKEN = 1u << 0, 32 NATIVE_FRAME_SLOT_MEMORY_REQUIRED = 1u << 1, 33 NATIVE_FRAME_SLOT_FIXED_OFFSET = 1u << 2, 34 } NativeFrameSlotFlag; 35 36 typedef struct NativeFrameSlotDesc { 37 KitCgTypeId type; 38 Sym name; 39 SrcLoc loc; 40 u32 size; 41 u32 align; 42 i32 fixed_offset; 43 u8 kind; /* NativeFrameSlotKind */ 44 u8 pad; 45 u16 flags; /* NativeFrameSlotFlag */ 46 } NativeFrameSlotDesc; 47 48 typedef struct NativeKnownFrameDesc { 49 const NativeFrameSlotDesc* slots; 50 u32 nslots; 51 u32 max_outgoing; 52 u32 align; 53 /* Callee-saved hard registers the allocator assigned, one bitmask per 54 * NativeAllocClass (indexed by class id). The backend reserves a save slot 55 * and emits the prologue save / epilogue restore for each — equivalent to a 56 * reserve_callee_saves() call, but folded into the known-frame setup so the 57 * full frame is fixed before the prologue is emitted. NULL / 0 means none. */ 58 const u32* callee_saved_used; 59 u32 ncallee_classes; 60 /* Union of the clobber register names of every inline-asm block in the body. 61 * Inline-asm clobbers are invisible to the operand scan that builds 62 * callee_saved_used, so the optimizer forwards the raw names here and the 63 * backend resolves them with its own clobber parser, folding the callee-saved 64 * ones into its save set (applying its ABI predicate, which excludes the 65 * frame pointer and keeps any reserved-but-callee-saved scratch such as x64 66 * rbx). The prologue/epilogue then preserve them, so the asm hook needs no 67 * per-block spill — which on the known-frame path would request a frame slot 68 * after the frame is already final. NULL / 0 when the body contains no inline 69 * asm. */ 70 const Sym* asm_clobbers; 71 u32 nasm_clobbers; 72 /* Union of KitCgAsmClobberAbiSet bits over the body's inline-asm blocks: an 73 * arch-neutral "clobbers the whole caller/callee-saved set" the backend 74 * expands against its own register file, alongside the named asm_clobbers. */ 75 u32 asm_clobber_abi_sets; 76 /* Whether the function body contains a dynamic alloca. The backend needs this 77 * up front (before the body) to decide prologue/epilogue form, since with a 78 * known frame the slim-epilogue eligibility is settled at func_begin. */ 79 u8 has_alloca; 80 /* Whether the body has an operation that needs a backend-internal scratch 81 * spill slot — on aa64, an atomic read-modify-write, whose retry loop spills 82 * one scratch register. The backend reserves the slot up front so the body 83 * never grows the frame after the prologue. */ 84 u8 needs_scratch_spill; 85 /* Whether the function is a leaf — its body contains no call of any kind 86 * (regular or sibling/tail). A leaf does not clobber the return-address 87 * register or the stack below sp through a call, so backends can omit the 88 * saved-frame record entirely (rv64 leaf tier) or skip the stack reservation 89 * and keep locals in the red zone (x64 SysV red-zone tier) — but ONLY when 90 * `has_asm` is also clear (see below). Conservatively false whenever any 91 * IR_CALL is present. */ 92 u8 is_leaf; 93 /* Whether the body contains an inline-asm block. Inline asm can clobber the 94 * return-address register (rv64 ra) or write into the red zone / make a call 95 * (x64) without the optimizer modelling it, so the frame-eliding 96 * leaf/red-zone tiers must NOT fire when this is set — even for an 97 * otherwise-leaf function. The single-pass and fat known-frame shapes always 98 * save the return address and reserve their stack, so they are unaffected. */ 99 u8 has_asm; 100 /* Whether the body reads its own frame-pointer chain via 101 * __builtin_frame_address / __builtin_return_address (INTRIN_FRAME_ADDRESS / 102 * INTRIN_RETURN_ADDRESS). Such a function must keep a valid frame record and 103 * frame pointer, so the frameless-leaf tier (rv64 slim_prologue, which emits 104 * no prologue and never anchors s0) must NOT fire. aa64/x64 keep the frame 105 * record in every prologue shape, so they ignore this flag. */ 106 u8 reads_frame; 107 } NativeKnownFrameDesc; 108 109 typedef enum NativeAllocClass { 110 NATIVE_REG_INT, 111 NATIVE_REG_FP, 112 NATIVE_REG_VEC, 113 } NativeAllocClass; 114 115 typedef enum NativeRegFlag { 116 NATIVE_REG_NONE = 0, 117 NATIVE_REG_ALLOCABLE = 1u << 0, 118 NATIVE_REG_CALLER_SAVED = 1u << 1, 119 NATIVE_REG_CALLEE_SAVED = 1u << 2, 120 NATIVE_REG_ARG = 1u << 3, 121 NATIVE_REG_RET = 1u << 4, 122 NATIVE_REG_RESERVED = 1u << 5, 123 NATIVE_REG_TEMP_PREFERRED = 1u << 6, 124 } NativeRegFlag; 125 126 typedef struct NativePhysRegInfo { 127 Reg reg; 128 u8 cls; /* NativeAllocClass */ 129 u8 abi_index; /* 0xff when not an ordered ABI arg/ret register */ 130 u16 flags; /* NativeRegFlag */ 131 u16 spill_cost; 132 u16 copy_cost; 133 } NativePhysRegInfo; 134 135 typedef struct NativeAllocClassInfo { 136 u8 cls; /* NativeAllocClass */ 137 u8 pad[3]; 138 139 const Reg* allocable; 140 u32 nallocable; 141 142 const Reg* scratch; 143 u32 nscratch; 144 145 const NativePhysRegInfo* phys; 146 u32 nphys; 147 148 u32 caller_saved_mask; 149 u32 callee_saved_mask; 150 u32 arg_mask; 151 u32 ret_mask; 152 u32 reserved_mask; 153 } NativeAllocClassInfo; 154 155 typedef struct NativeRegInfo NativeRegInfo; 156 struct NativeRegInfo { 157 const NativeAllocClassInfo* classes; 158 u32 nclasses; 159 160 /* Map a register name to its (Reg, class). `name` is the raw spelling 161 * ("rax", "x8", "a7"); the caller resolves any Sym to its bytes first so this 162 * stays pool-free. Returns 0 on success, non-zero for a non-register name. */ 163 int (*resolve_name)(const NativeRegInfo*, Slice name, Reg* out, 164 NativeAllocClass* cls_out); 165 /* True when (cls, reg) is a valid hard-register home for an inline-asm value 166 * operand. This is intentionally separate from allocator availability: 167 * syscall idioms need ABI registers such as x8/a7, while stack/frame, zero, 168 * link, platform, and backend scratch registers must stay unavailable even if 169 * the assembler can name them. */ 170 int (*asm_operand_reg_ok)(const NativeRegInfo*, NativeAllocClass cls, 171 Reg reg); 172 /* Optional target-specific register-constraint parser for inline asm. The 173 * input is the constraint body after generic modifiers ('=', '+', '&') have 174 * been stripped. Return non-zero only for constraints that name a register 175 * class; set fixed_out to REG_NONE for a free class or to a physical register 176 * when the constraint hard-wires the operand (x86 "a" -> rax). Set 177 * allowed_mask_out to 0 for the whole class, or a physical-register bitmask 178 * when the constraint names a restricted class subset. */ 179 int (*asm_constraint_reg)(const NativeRegInfo*, const char* body, 180 NativeAllocClass* cls_out, Reg* fixed_out, 181 u32* allowed_mask_out); 182 const char* (*debug_name)(const NativeRegInfo*, NativeAllocClass, Reg); 183 u32 (*dwarf_reg)(const NativeRegInfo*, NativeAllocClass, Reg); 184 }; 185 186 typedef enum NativeLocKind { 187 NATIVE_LOC_NONE, 188 NATIVE_LOC_REG, 189 NATIVE_LOC_FRAME, 190 NATIVE_LOC_STACK, 191 NATIVE_LOC_IMM, 192 NATIVE_LOC_GLOBAL, 193 NATIVE_LOC_ADDR, 194 } NativeLocKind; 195 196 typedef enum NativeAddrBaseKind { 197 NATIVE_ADDR_BASE_NONE, 198 NATIVE_ADDR_BASE_REG, 199 NATIVE_ADDR_BASE_FRAME, 200 NATIVE_ADDR_BASE_FRAME_VALUE, 201 NATIVE_ADDR_BASE_GLOBAL, 202 } NativeAddrBaseKind; 203 204 typedef enum NativeAddrIndexKind { 205 NATIVE_ADDR_INDEX_NONE, 206 NATIVE_ADDR_INDEX_REG, 207 NATIVE_ADDR_INDEX_FRAME_VALUE, 208 } NativeAddrIndexKind; 209 210 typedef enum NativeImmUse { 211 NATIVE_IMM_MOVE, 212 NATIVE_IMM_BINOP, 213 NATIVE_IMM_CMP, 214 NATIVE_IMM_ADDR_OFFSET, 215 } NativeImmUse; 216 217 typedef struct NativeAddr { 218 u8 base_kind; /* NativeAddrBaseKind */ 219 u8 cls; /* NativeAllocClass for base value */ 220 u8 index_kind; /* NativeAddrIndexKind */ 221 u8 index_cls; /* NativeAllocClass for index value */ 222 u8 log2_scale; 223 u8 pad[3]; 224 KitCgTypeId base_type; 225 KitCgTypeId index_type; 226 union { 227 Reg reg; 228 NativeFrameSlot frame; 229 struct { 230 ObjSymId sym; 231 i64 addend; 232 } global; 233 } base; 234 union { 235 Reg reg; 236 NativeFrameSlot frame; 237 } index; 238 i32 offset; 239 } NativeAddr; 240 241 typedef struct NativeLoc { 242 u8 kind; /* NativeLocKind */ 243 u8 cls; /* NativeAllocClass for register-like locations */ 244 u8 pad[2]; 245 KitCgTypeId type; 246 union { 247 Reg reg; 248 NativeFrameSlot frame; 249 struct { 250 NativeFrameSlot slot; 251 i32 offset; 252 } stack; 253 i64 imm; 254 struct { 255 ObjSymId sym; 256 i64 addend; 257 } global; 258 NativeAddr addr; 259 } v; 260 } NativeLoc; 261 262 typedef struct NativeInst NativeInst; 263 264 typedef enum NativePatchKind { 265 NATIVE_PATCH_FRAME_SIZE, 266 NATIVE_PATCH_MAX_OUTGOING, 267 NATIVE_PATCH_ARCH = 0x1000, 268 } NativePatchKind; 269 270 typedef struct NativePatch { 271 u32 kind; /* NativePatchKind or arch-private */ 272 u32 section_id; 273 u32 offset; 274 u32 width; 275 i64 addend; 276 u64 value; 277 } NativePatch; 278 279 typedef struct NativeFramePatchState { 280 u32 max_outgoing; 281 u32 max_align; 282 } NativeFramePatchState; 283 284 #define NATIVE_CALL_PLAN_CLASSES 3u 285 286 /* A semantic machine operation, enough for the target to report the physical 287 * registers its encoding clobbers as a side effect (e.g. x86 idiv writes 288 * rax/rdx, variable shifts use cl). Built by the optimizer from an instruction; 289 * the descriptor keeps the backend from depending on the optimizer IR. */ 290 typedef enum NativeMachineOpKind { 291 NATIVE_MOP_BINOP, 292 NATIVE_MOP_VA_START, 293 NATIVE_MOP_VA_ARG, 294 NATIVE_MOP_ATOMIC_CAS, 295 NATIVE_MOP_ATOMIC_RMW, 296 NATIVE_MOP_INTRINSIC, 297 /* A thread-local address materialization (IR_TLS_ADDR_OF). On targets whose 298 * TLS access model uses fixed scratch/result registers or a resolver-thunk 299 * call (e.g. Mach-O TLV descriptors → x0/x16/x17/lr), the encoding clobbers 300 * those regs even though the IR op only declares its destination. Targets 301 * whose TLS sequence touches only the destination register (ELF Local-Exec) 302 * report no clobbers. */ 303 NATIVE_MOP_TLS_ADDR, 304 } NativeMachineOpKind; 305 306 typedef struct NativeMachineOp { 307 u8 kind; /* NativeMachineOpKind */ 308 u8 binop; /* BinOp, when kind == NATIVE_MOP_BINOP */ 309 u8 intrin; /* IntrinKind, when kind == NATIVE_MOP_INTRINSIC */ 310 u8 second_is_reg; /* binop's second operand is a register (not an immediate) 311 */ 312 u8 result_is_fp; /* result lands in an FP register (e.g. va_arg of a double) 313 */ 314 } NativeMachineOp; 315 316 typedef struct NativeCallDesc { 317 KitCgTypeId fn_type; 318 NativeLoc callee; 319 const NativeLoc* args; 320 const NativeLoc* results; 321 u32 nargs; 322 u32 nresults; 323 u16 flags; /* CGCallFlag */ 324 u8 tail_policy; /* KitCgTailPolicy */ 325 u8 pad; 326 KitCgInlinePolicy inline_policy; 327 } NativeCallDesc; 328 329 typedef enum NativeCallPlanMoveKind { 330 NATIVE_CALL_MOVE_NONE, 331 NATIVE_CALL_MOVE_VALUE, 332 NATIVE_CALL_MOVE_ADDR, 333 } NativeCallPlanMoveKind; 334 335 typedef struct NativeCallPlanMove { 336 NativeLoc src; 337 NativeLoc dst; 338 MemAccess mem; 339 u8 src_kind; /* NativeCallPlanMoveKind */ 340 u8 dst_kind; /* NativeLocKind */ 341 u8 pad[2]; 342 } NativeCallPlanMove; 343 344 typedef struct NativeCallPlanRet { 345 NativeLoc src; 346 NativeLoc dst; 347 MemAccess mem; 348 } NativeCallPlanRet; 349 350 typedef struct NativeCallPlan { 351 NativeLoc callee; 352 NativeCallPlanMove* args; 353 NativeCallPlanRet* rets; 354 u32 nargs; 355 u32 nrets; 356 u32 stack_arg_size; 357 u32 clobber_mask[NATIVE_CALL_PLAN_CLASSES]; 358 u32 return_mask[NATIVE_CALL_PLAN_CLASSES]; 359 u16 flags; /* CGCallFlag */ 360 u8 has_sret; 361 u8 is_variadic; 362 } NativeCallPlan; 363 364 typedef struct NativeTarget NativeTarget; 365 struct NativeTarget { 366 Compiler* c; 367 ObjBuilder* obj; 368 MCEmitter* mc; 369 const NativeRegInfo* regs; 370 371 NativeAllocClass (*class_for_type)(NativeTarget*, KitCgTypeId); 372 int (*imm_legal)(NativeTarget*, NativeImmUse, u32 op, KitCgTypeId, i64); 373 int (*addr_legal)(NativeTarget*, const NativeAddr*, MemAccess); 374 /* Optional. Report the physical registers the target's encoding of `op` 375 * clobbers as a side effect (not its declared operands/results), one bitmask 376 * per NativeAllocClass. The optimizer keeps values live ACROSS the 377 * instruction out of these registers, so the backend may use them freely (x86 378 * idiv writes rax/rdx; a variable shift uses cl; atomics use rax/rcx/rdx). 379 * Return non-zero if any register is clobbered, 0 otherwise (the common, 380 * unconstrained case). NULL means no instruction clobbers fixed registers 381 * (aa64/rv64). */ 382 int (*machine_op_clobbers)(NativeTarget*, const NativeMachineOp* op, 383 u32 clobber_mask[NATIVE_CALL_PLAN_CLASSES]); 384 385 void (*func_begin)(NativeTarget*, const CGFuncDesc*); 386 void (*func_begin_known_frame)(NativeTarget*, const CGFuncDesc*, 387 const NativeKnownFrameDesc*, 388 NativeFrameSlot* out_slots); 389 void (*note_frame_state)(NativeTarget*, const NativeFramePatchState*); 390 /* Optional. Called once after func_begin and before frame-slot mapping, with 391 * the set of callee-saved hard registers the allocator assigned (one bitmask 392 * per NativeAllocClass, indexed by class id). The target reserves save slots 393 * and emits the prologue save / epilogue restore for each. Register 394 * allocation is complete before emission, so the caller knows the full set 395 * up front. */ 396 void (*reserve_callee_saves)(NativeTarget*, const u32* used_by_class, 397 u32 nclasses); 398 /* Optional live-ABI caller/callee-saved register masks for a class. Static 399 * NativeAllocClassInfo masks describe the target register file, but some 400 * targets vary preservation rules by OS ABI (x64 SysV vs Win64 XMM regs). 401 * The optimizer and direct emission use these to keep allocation, call 402 * clobbers, and prologue save sets aligned with the selected ABI. NULL falls 403 * back to NativeAllocClassInfo.{caller,callee}_saved_mask. */ 404 u32 (*caller_saved_mask)(NativeTarget*, NativeAllocClass); 405 u32 (*callee_saved_mask)(NativeTarget*, NativeAllocClass); 406 /* Optional. When set, the optimizer emit path calls this once — after 407 * func_begin, reserve_callee_saves, and frame-slot mapping, but before the 408 * body — to emit a minimal, exact-size prologue in place (no reserved NOP 409 * region). Frame-size immediates are still patched in func_end, since the 410 * final frame size isn't known until body emission allocates its temporaries. 411 * Backends that leave this NULL fall back to the single-pass 412 * reserve-and-patch prologue used by NativeDirectTarget. Gated by 413 * `emit_minimal_prologue`, which the optimizer emit path sets before 414 * func_begin so func_begin can skip the reserved region. */ 415 void (*emit_prologue)(NativeTarget*); 416 u8 emit_minimal_prologue; 417 /* Bytes of stack-passed arguments the fixed parameters of this function 418 * signature use (the part beyond the register arg pools). Sets *variadic to 419 * whether the signature is variadic and *nparams to the fixed parameter 420 * count. Used to decide tail-call (sibling) realizability: the callee's 421 * outgoing stack args must fit the area the caller itself received. Either 422 * out-pointer may be NULL. May itself be NULL. */ 423 u32 (*signature_stack_bytes)(NativeTarget*, KitCgTypeId fn_type, 424 int* variadic, u32* nparams); 425 /* Pure query: the outgoing stack-argument bytes a call with this descriptor 426 * uses, rounded to the ABI's outgoing-area alignment. Reads only fn_type, 427 * flags, nargs, and each args[i].type — never argument *locations* — so the 428 * optimizer can call it in a frame-planning pre-pass, before any argument 429 * marshalling is emitted, to size the outgoing area. Must equal the 430 * stack_arg_size plan_call computes for the same descriptor. May be NULL. */ 431 u32 (*call_stack_bytes)(NativeTarget*, const NativeCallDesc*); 432 /* Integer hardware zero register, if the ISA has one (aa64 wzr/xzr, rv64 433 * x0). When `has_store_zero_reg` is set, the emit path stores a constant 0 434 * straight from `store_zero_reg` instead of materializing 0 into a scratch 435 * with a mov/movz first. */ 436 u8 has_store_zero_reg; 437 Reg store_zero_reg; 438 void (*func_end)(NativeTarget*); 439 440 NativeFrameSlot (*frame_slot)(NativeTarget*, const NativeFrameSlotDesc*); 441 /* Optional post-finalization query for a native frame slot's debug location. 442 * Each arch owns the frame layout math and returns the coordinate system its 443 * debugger/unwinder path can materialize. */ 444 int (*frame_slot_debug_loc)(NativeTarget*, NativeFrameSlot, CGDebugLoc*); 445 /* Place the incoming parameter into `dst`. The caller (which has run register 446 * allocation) chooses the destination: a hard register (NATIVE_LOC_REG) for a 447 * register-allocated scalar param, a frame slot (NATIVE_LOC_FRAME) for an 448 * address-taken / spilled / aggregate param. NATIVE_LOC_NONE means the param 449 * is unused and only the ABI register/stack cursor must advance. Incoming arg 450 * registers are never allocable, so reg destinations never alias an incoming 451 * arg register and ordering across params is unconstrained. */ 452 void (*bind_param)(NativeTarget*, const CGParamDesc*, NativeLoc dst); 453 /* Optional. Called once by the optimizer emit path after the last bind_param, 454 * before the body. Lets a backend that defers register-destination param 455 * binds (to resolve them as a parallel copy, since the allocator may rotate 456 * params across the incoming arg registers — a permutation the naive 457 * per-param move order cannot realize) flush them now. Backends that bind 458 * eagerly leave this NULL. */ 459 void (*bind_params_end)(NativeTarget*); 460 461 MCLabel (*label_new)(NativeTarget*); 462 void (*label_place)(NativeTarget*, MCLabel); 463 void (*jump)(NativeTarget*, MCLabel); 464 void (*cmp_branch)(NativeTarget*, CmpOp, NativeLoc a, NativeLoc b, 465 MCLabel target); 466 void (*indirect_branch)(NativeTarget*, NativeLoc addr, 467 const MCLabel* valid_targets, u32 ntargets); 468 void (*load_label_addr)(NativeTarget*, NativeLoc dst, MCLabel target); 469 470 void (*emit)(NativeTarget*, const NativeInst*); 471 /* All instruction-emission hooks require caller-selected legal physical 472 * operands. In particular, dst values are NATIVE_LOC_REG, arithmetic sources 473 * are NATIVE_LOC_REG or target-legal immediates, and memory base/index 474 * registers in NativeAddr must already be materialized. NativeTarget may 475 * validate and assert, but it must not allocate registers. */ 476 void (*move)(NativeTarget*, NativeLoc dst_reg, NativeLoc src_reg); 477 void (*load_imm)(NativeTarget*, NativeLoc dst_reg, i64 imm); 478 void (*load_const)(NativeTarget*, NativeLoc dst_reg, ConstBytes); 479 void (*load_addr)(NativeTarget*, NativeLoc dst_reg, NativeAddr addr); 480 void (*load)(NativeTarget*, NativeLoc dst_reg, NativeAddr addr, MemAccess); 481 void (*store)(NativeTarget*, NativeAddr addr, NativeLoc src_reg, MemAccess); 482 void (*tls_addr_of)(NativeTarget*, NativeLoc dst_reg, ObjSymId sym, 483 i64 addend); 484 void (*copy_bytes)(NativeTarget*, NativeAddr dst, NativeAddr src, 485 AggregateAccess); 486 void (*set_bytes)(NativeTarget*, NativeAddr dst, NativeLoc byte_value, 487 AggregateAccess); 488 void (*bitfield_load)(NativeTarget*, NativeLoc dst_reg, 489 NativeAddr record_addr, BitFieldAccess); 490 void (*bitfield_store)(NativeTarget*, NativeAddr record_addr, 491 NativeLoc src_reg, BitFieldAccess); 492 void (*binop)(NativeTarget*, BinOp, NativeLoc dst_reg, NativeLoc a_reg, 493 NativeLoc b_reg_or_imm); 494 void (*unop)(NativeTarget*, UnOp, NativeLoc dst_reg, NativeLoc src_reg); 495 void (*cmp)(NativeTarget*, CmpOp, NativeLoc dst_reg, NativeLoc a_reg, 496 NativeLoc b_reg_or_imm); 497 void (*convert)(NativeTarget*, ConvKind, NativeLoc dst_reg, 498 NativeLoc src_reg); 499 void (*alloca_)(NativeTarget*, NativeLoc dst_reg, NativeLoc size_reg, 500 u32 align); 501 502 void (*spill)(NativeTarget*, NativeLoc src_reg, NativeFrameSlot, MemAccess); 503 void (*reload)(NativeTarget*, NativeLoc dst_reg, NativeFrameSlot, MemAccess); 504 505 void (*plan_call)(NativeTarget*, const NativeCallDesc*, NativeCallPlan*); 506 void (*emit_call)(NativeTarget*, const NativeCallPlan*); 507 /* `value` is the single returned local's location, or NULL for a void 508 * return. out_rets/out_nrets describe the ABI parts of that one value. */ 509 void (*plan_ret)(NativeTarget*, const CGFuncDesc*, const NativeLoc* value, 510 NativeCallPlanRet** out_rets, u32* out_nrets); 511 void (*ret)(NativeTarget*); 512 513 void (*atomic_load)(NativeTarget*, NativeLoc dst, NativeAddr addr, MemAccess, 514 KitCgMemOrder); 515 void (*atomic_store)(NativeTarget*, NativeAddr addr, NativeLoc src, MemAccess, 516 KitCgMemOrder); 517 void (*atomic_rmw)(NativeTarget*, KitCgAtomicOp, NativeLoc dst, 518 NativeAddr addr, NativeLoc val, MemAccess, KitCgMemOrder); 519 void (*atomic_cas)(NativeTarget*, NativeLoc prior, NativeLoc ok, 520 NativeAddr addr, NativeLoc expected, NativeLoc desired, 521 MemAccess, KitCgMemOrder success, KitCgMemOrder failure); 522 void (*fence)(NativeTarget*, KitCgMemOrder); 523 /* Variadic support. The optimizer passes the va_list pointer opaquely as a 524 * NativeLoc (a register or memory location holding the address of the 525 * va_list object); va_arg additionally receives the argument type and a 526 * destination location for the fetched value. All va_list layout knowledge 527 * (pointer ABI vs register-save-area ABI, field offsets, sizes) lives behind 528 * these hooks, which query the target ABI -- the optimizer makes no layout 529 * assumptions. */ 530 void (*va_start_)(NativeTarget*, NativeLoc ap_ptr); 531 void (*va_arg_)(NativeTarget*, NativeLoc dst, NativeLoc ap_ptr, 532 KitCgTypeId type); 533 void (*va_end_)(NativeTarget*, NativeLoc ap_ptr); 534 void (*va_copy_)(NativeTarget*, NativeLoc dst_ap_ptr, NativeLoc src_ap_ptr); 535 void (*intrinsic)(NativeTarget*, IntrinKind, const NativeLoc* dsts, u32 ndst, 536 const NativeLoc* args, u32 narg); 537 void (*asm_block)(NativeTarget*, const char* tmpl, const AsmConstraint* outs, 538 u32 nout, NativeLoc* out_locs, const AsmConstraint* ins, 539 u32 nin, const NativeLoc* in_locs, const Sym* clobbers, 540 u32 nclob); 541 void (*file_scope_asm)(NativeTarget*, const char* src, size_t len); 542 void (*patch_add)(NativeTarget*, const NativePatch*); 543 void (*patch_apply)(NativeTarget*); 544 void (*trap)(NativeTarget*); 545 void (*set_loc)(NativeTarget*, SrcLoc); 546 void (*finalize)(NativeTarget*); 547 void (*destroy)(NativeTarget*); 548 }; 549 550 static inline const NativeAllocClassInfo* 551 native_target_class_info(const NativeTarget* t, NativeAllocClass cls) { 552 if (!t || !t->regs) return NULL; 553 for (u32 i = 0; i < t->regs->nclasses; ++i) { 554 const NativeAllocClassInfo* ci = &t->regs->classes[i]; 555 if ((NativeAllocClass)ci->cls == cls) return ci; 556 } 557 return NULL; 558 } 559 560 static inline u32 native_target_caller_saved_mask(NativeTarget* t, 561 NativeAllocClass cls) { 562 const NativeAllocClassInfo* ci; 563 if (t && t->caller_saved_mask) return t->caller_saved_mask(t, cls); 564 ci = native_target_class_info(t, cls); 565 return ci ? ci->caller_saved_mask : 0u; 566 } 567 568 static inline u32 native_target_callee_saved_mask(NativeTarget* t, 569 NativeAllocClass cls) { 570 const NativeAllocClassInfo* ci; 571 if (t && t->callee_saved_mask) return t->callee_saved_mask(t, cls); 572 ci = native_target_class_info(t, cls); 573 return ci ? ci->callee_saved_mask : 0u; 574 } 575 576 static inline NativeLoc native_loc_none(void) { 577 NativeLoc loc; 578 memset(&loc, 0, sizeof loc); 579 loc.kind = NATIVE_LOC_NONE; 580 return loc; 581 } 582 583 /* Target-neutral location constructors and scalar queries. These are 584 * byte-identical across the native backends, so they live here as the single 585 * source of truth. (loc_reg's register mask differs per arch and stays 586 * per-backend.) */ 587 static inline NativeLoc native_loc_reg(KitCgTypeId type, NativeAllocClass cls, 588 Reg reg) { 589 NativeLoc loc; 590 memset(&loc, 0, sizeof loc); 591 loc.kind = NATIVE_LOC_REG; 592 loc.cls = (u8)cls; 593 loc.type = type; 594 loc.v.reg = reg; 595 return loc; 596 } 597 598 static inline NativeLoc native_loc_stack(KitCgTypeId type, NativeFrameSlot slot, 599 i32 offset) { 600 NativeLoc loc; 601 memset(&loc, 0, sizeof loc); 602 loc.kind = NATIVE_LOC_STACK; 603 loc.cls = NATIVE_REG_INT; 604 loc.type = type; 605 loc.v.stack.slot = slot; 606 loc.v.stack.offset = offset; 607 return loc; 608 } 609 610 static inline int native_loc_is_fp(NativeLoc loc) { 611 return (NativeAllocClass)loc.cls == NATIVE_REG_FP; 612 } 613 614 /* Scalar size/align, clamped to a usable register-sized default. Shared by the 615 * backends whose scalars are at most pointer-width (x64, rv64); aa64 keeps its 616 * own size query because it asserts on over-wide scalars. */ 617 static inline u32 native_type_size(NativeTarget* t, KitCgTypeId type) { 618 u64 n = type ? cg_type_size(t->c, type) : 8u; 619 if (n == 0) n = 8u; 620 return (u32)n; 621 } 622 623 static inline u32 native_type_align(NativeTarget* t, KitCgTypeId type) { 624 u64 n = type ? cg_type_align(t->c, type) : 8u; 625 if (n == 0) n = 1u; 626 if (n > 16u) n = 16u; 627 return (u32)n; 628 } 629 630 static inline MemAccess native_mem_for_type(NativeTarget* t, KitCgTypeId type, 631 u32 size) { 632 MemAccess m; 633 memset(&m, 0, sizeof m); 634 m.type = type; 635 m.size = size ? size : native_type_size(t, type); 636 m.align = native_type_align(t, type); 637 return m; 638 } 639 640 /* FP register class for a scalar type: a float value lives in an FP register 641 * only when the hardware float ABI has a register that wide. flen comes from 642 * the target float ABI (SINGLE->4, DOUBLE->8, SOFT->0); the DEFAULT/unset 643 * sentinel maps to the pointer width, which preserves the historical "FP iff 644 * float and <= 8 bytes" behavior for lp64d / x86-64 and yields the correct 645 * rv32 soft-double result (double is 8 bytes > flen=4 on ilp32f, > 0 on ilp32, 646 * so it is INT-class and never bit-cast through an FP register via fmv.d.x). 647 * aa64 keeps its own (same predicate, distinct mem helper). */ 648 static inline NativeAllocClass native_class_for_type_fp_le8(NativeTarget* t, 649 KitCgTypeId type) { 650 u32 flen; 651 switch (t->c->target.float_abi) { 652 case KIT_FLOAT_ABI_SINGLE: flen = 4u; break; 653 case KIT_FLOAT_ABI_DOUBLE: flen = 8u; break; 654 case KIT_FLOAT_ABI_SOFT: flen = 0u; break; 655 default: flen = t->c->target.ptr_size; break; /* DEFAULT: historical */ 656 } 657 if (type && flen && cg_type_is_float(t->c, type) && 658 cg_type_size(t->c, type) <= flen) 659 return NATIVE_REG_FP; 660 return NATIVE_REG_INT; 661 } 662 663 #endif