kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

native_target.h (27597B)


      1 #ifndef KIT_ARCH_NATIVE_TARGET_H
      2 #define KIT_ARCH_NATIVE_TARGET_H
      3 
      4 #include <string.h>
      5 
      6 #include "arch/mc.h"
      7 #include "cg/cgtarget.h"
      8 #include "cg/type.h"
      9 #include "core/core.h"
     10 #include "core/slice.h" /* Slice, for resolve_name */
     11 
     12 /* NativeTarget is the physical native-emission contract. It is driven after
     13  * semantic CG has been either direct-lowered by NativeDirectTarget or recorded,
     14  * optimized, machinized, and allocated. It must not speak in semantic CGLocal
     15  * ids except where a descriptor is carried for diagnostics or ABI queries. */
     16 
     17 typedef u32 NativeFrameSlot;
     18 #define NATIVE_FRAME_SLOT_NONE 0u
     19 
     20 typedef enum NativeFrameSlotKind {
     21   NATIVE_FRAME_SLOT_LOCAL,
     22   NATIVE_FRAME_SLOT_PARAM,
     23   NATIVE_FRAME_SLOT_SPILL,
     24   NATIVE_FRAME_SLOT_ALLOCA,
     25   NATIVE_FRAME_SLOT_OUTGOING,
     26   NATIVE_FRAME_SLOT_SAVE,
     27 } NativeFrameSlotKind;
     28 
     29 typedef enum NativeFrameSlotFlag {
     30   NATIVE_FRAME_SLOT_NONE_FLAG = 0,
     31   NATIVE_FRAME_SLOT_ADDR_TAKEN = 1u << 0,
     32   NATIVE_FRAME_SLOT_MEMORY_REQUIRED = 1u << 1,
     33   NATIVE_FRAME_SLOT_FIXED_OFFSET = 1u << 2,
     34 } NativeFrameSlotFlag;
     35 
     36 typedef struct NativeFrameSlotDesc {
     37   KitCgTypeId type;
     38   Sym name;
     39   SrcLoc loc;
     40   u32 size;
     41   u32 align;
     42   i32 fixed_offset;
     43   u8 kind; /* NativeFrameSlotKind */
     44   u8 pad;
     45   u16 flags; /* NativeFrameSlotFlag */
     46 } NativeFrameSlotDesc;
     47 
     48 typedef struct NativeKnownFrameDesc {
     49   const NativeFrameSlotDesc* slots;
     50   u32 nslots;
     51   u32 max_outgoing;
     52   u32 align;
     53   /* Callee-saved hard registers the allocator assigned, one bitmask per
     54    * NativeAllocClass (indexed by class id). The backend reserves a save slot
     55    * and emits the prologue save / epilogue restore for each — equivalent to a
     56    * reserve_callee_saves() call, but folded into the known-frame setup so the
     57    * full frame is fixed before the prologue is emitted. NULL / 0 means none. */
     58   const u32* callee_saved_used;
     59   u32 ncallee_classes;
     60   /* Union of the clobber register names of every inline-asm block in the body.
     61    * Inline-asm clobbers are invisible to the operand scan that builds
     62    * callee_saved_used, so the optimizer forwards the raw names here and the
     63    * backend resolves them with its own clobber parser, folding the callee-saved
     64    * ones into its save set (applying its ABI predicate, which excludes the
     65    * frame pointer and keeps any reserved-but-callee-saved scratch such as x64
     66    * rbx). The prologue/epilogue then preserve them, so the asm hook needs no
     67    * per-block spill — which on the known-frame path would request a frame slot
     68    * after the frame is already final. NULL / 0 when the body contains no inline
     69    * asm. */
     70   const Sym* asm_clobbers;
     71   u32 nasm_clobbers;
     72   /* Union of KitCgAsmClobberAbiSet bits over the body's inline-asm blocks: an
     73    * arch-neutral "clobbers the whole caller/callee-saved set" the backend
     74    * expands against its own register file, alongside the named asm_clobbers. */
     75   u32 asm_clobber_abi_sets;
     76   /* Whether the function body contains a dynamic alloca. The backend needs this
     77    * up front (before the body) to decide prologue/epilogue form, since with a
     78    * known frame the slim-epilogue eligibility is settled at func_begin. */
     79   u8 has_alloca;
     80   /* Whether the body has an operation that needs a backend-internal scratch
     81    * spill slot — on aa64, an atomic read-modify-write, whose retry loop spills
     82    * one scratch register. The backend reserves the slot up front so the body
     83    * never grows the frame after the prologue. */
     84   u8 needs_scratch_spill;
     85   /* Whether the function is a leaf — its body contains no call of any kind
     86    * (regular or sibling/tail). A leaf does not clobber the return-address
     87    * register or the stack below sp through a call, so backends can omit the
     88    * saved-frame record entirely (rv64 leaf tier) or skip the stack reservation
     89    * and keep locals in the red zone (x64 SysV red-zone tier) — but ONLY when
     90    * `has_asm` is also clear (see below). Conservatively false whenever any
     91    * IR_CALL is present. */
     92   u8 is_leaf;
     93   /* Whether the body contains an inline-asm block. Inline asm can clobber the
     94    * return-address register (rv64 ra) or write into the red zone / make a call
     95    * (x64) without the optimizer modelling it, so the frame-eliding
     96    * leaf/red-zone tiers must NOT fire when this is set — even for an
     97    * otherwise-leaf function. The single-pass and fat known-frame shapes always
     98    * save the return address and reserve their stack, so they are unaffected. */
     99   u8 has_asm;
    100   /* Whether the body reads its own frame-pointer chain via
    101    * __builtin_frame_address / __builtin_return_address (INTRIN_FRAME_ADDRESS /
    102    * INTRIN_RETURN_ADDRESS). Such a function must keep a valid frame record and
    103    * frame pointer, so the frameless-leaf tier (rv64 slim_prologue, which emits
    104    * no prologue and never anchors s0) must NOT fire. aa64/x64 keep the frame
    105    * record in every prologue shape, so they ignore this flag. */
    106   u8 reads_frame;
    107 } NativeKnownFrameDesc;
    108 
    109 typedef enum NativeAllocClass {
    110   NATIVE_REG_INT,
    111   NATIVE_REG_FP,
    112   NATIVE_REG_VEC,
    113 } NativeAllocClass;
    114 
    115 typedef enum NativeRegFlag {
    116   NATIVE_REG_NONE = 0,
    117   NATIVE_REG_ALLOCABLE = 1u << 0,
    118   NATIVE_REG_CALLER_SAVED = 1u << 1,
    119   NATIVE_REG_CALLEE_SAVED = 1u << 2,
    120   NATIVE_REG_ARG = 1u << 3,
    121   NATIVE_REG_RET = 1u << 4,
    122   NATIVE_REG_RESERVED = 1u << 5,
    123   NATIVE_REG_TEMP_PREFERRED = 1u << 6,
    124 } NativeRegFlag;
    125 
    126 typedef struct NativePhysRegInfo {
    127   Reg reg;
    128   u8 cls;       /* NativeAllocClass */
    129   u8 abi_index; /* 0xff when not an ordered ABI arg/ret register */
    130   u16 flags;    /* NativeRegFlag */
    131   u16 spill_cost;
    132   u16 copy_cost;
    133 } NativePhysRegInfo;
    134 
    135 typedef struct NativeAllocClassInfo {
    136   u8 cls; /* NativeAllocClass */
    137   u8 pad[3];
    138 
    139   const Reg* allocable;
    140   u32 nallocable;
    141 
    142   const Reg* scratch;
    143   u32 nscratch;
    144 
    145   const NativePhysRegInfo* phys;
    146   u32 nphys;
    147 
    148   u32 caller_saved_mask;
    149   u32 callee_saved_mask;
    150   u32 arg_mask;
    151   u32 ret_mask;
    152   u32 reserved_mask;
    153 } NativeAllocClassInfo;
    154 
    155 typedef struct NativeRegInfo NativeRegInfo;
    156 struct NativeRegInfo {
    157   const NativeAllocClassInfo* classes;
    158   u32 nclasses;
    159 
    160   /* Map a register name to its (Reg, class). `name` is the raw spelling
    161    * ("rax", "x8", "a7"); the caller resolves any Sym to its bytes first so this
    162    * stays pool-free. Returns 0 on success, non-zero for a non-register name. */
    163   int (*resolve_name)(const NativeRegInfo*, Slice name, Reg* out,
    164                       NativeAllocClass* cls_out);
    165   /* True when (cls, reg) is a valid hard-register home for an inline-asm value
    166    * operand. This is intentionally separate from allocator availability:
    167    * syscall idioms need ABI registers such as x8/a7, while stack/frame, zero,
    168    * link, platform, and backend scratch registers must stay unavailable even if
    169    * the assembler can name them. */
    170   int (*asm_operand_reg_ok)(const NativeRegInfo*, NativeAllocClass cls,
    171                             Reg reg);
    172   /* Optional target-specific register-constraint parser for inline asm. The
    173    * input is the constraint body after generic modifiers ('=', '+', '&') have
    174    * been stripped. Return non-zero only for constraints that name a register
    175    * class; set fixed_out to REG_NONE for a free class or to a physical register
    176    * when the constraint hard-wires the operand (x86 "a" -> rax). Set
    177    * allowed_mask_out to 0 for the whole class, or a physical-register bitmask
    178    * when the constraint names a restricted class subset. */
    179   int (*asm_constraint_reg)(const NativeRegInfo*, const char* body,
    180                             NativeAllocClass* cls_out, Reg* fixed_out,
    181                             u32* allowed_mask_out);
    182   const char* (*debug_name)(const NativeRegInfo*, NativeAllocClass, Reg);
    183   u32 (*dwarf_reg)(const NativeRegInfo*, NativeAllocClass, Reg);
    184 };
    185 
    186 typedef enum NativeLocKind {
    187   NATIVE_LOC_NONE,
    188   NATIVE_LOC_REG,
    189   NATIVE_LOC_FRAME,
    190   NATIVE_LOC_STACK,
    191   NATIVE_LOC_IMM,
    192   NATIVE_LOC_GLOBAL,
    193   NATIVE_LOC_ADDR,
    194 } NativeLocKind;
    195 
    196 typedef enum NativeAddrBaseKind {
    197   NATIVE_ADDR_BASE_NONE,
    198   NATIVE_ADDR_BASE_REG,
    199   NATIVE_ADDR_BASE_FRAME,
    200   NATIVE_ADDR_BASE_FRAME_VALUE,
    201   NATIVE_ADDR_BASE_GLOBAL,
    202 } NativeAddrBaseKind;
    203 
    204 typedef enum NativeAddrIndexKind {
    205   NATIVE_ADDR_INDEX_NONE,
    206   NATIVE_ADDR_INDEX_REG,
    207   NATIVE_ADDR_INDEX_FRAME_VALUE,
    208 } NativeAddrIndexKind;
    209 
    210 typedef enum NativeImmUse {
    211   NATIVE_IMM_MOVE,
    212   NATIVE_IMM_BINOP,
    213   NATIVE_IMM_CMP,
    214   NATIVE_IMM_ADDR_OFFSET,
    215 } NativeImmUse;
    216 
    217 typedef struct NativeAddr {
    218   u8 base_kind;  /* NativeAddrBaseKind */
    219   u8 cls;        /* NativeAllocClass for base value */
    220   u8 index_kind; /* NativeAddrIndexKind */
    221   u8 index_cls;  /* NativeAllocClass for index value */
    222   u8 log2_scale;
    223   u8 pad[3];
    224   KitCgTypeId base_type;
    225   KitCgTypeId index_type;
    226   union {
    227     Reg reg;
    228     NativeFrameSlot frame;
    229     struct {
    230       ObjSymId sym;
    231       i64 addend;
    232     } global;
    233   } base;
    234   union {
    235     Reg reg;
    236     NativeFrameSlot frame;
    237   } index;
    238   i32 offset;
    239 } NativeAddr;
    240 
    241 typedef struct NativeLoc {
    242   u8 kind; /* NativeLocKind */
    243   u8 cls;  /* NativeAllocClass for register-like locations */
    244   u8 pad[2];
    245   KitCgTypeId type;
    246   union {
    247     Reg reg;
    248     NativeFrameSlot frame;
    249     struct {
    250       NativeFrameSlot slot;
    251       i32 offset;
    252     } stack;
    253     i64 imm;
    254     struct {
    255       ObjSymId sym;
    256       i64 addend;
    257     } global;
    258     NativeAddr addr;
    259   } v;
    260 } NativeLoc;
    261 
    262 typedef struct NativeInst NativeInst;
    263 
    264 typedef enum NativePatchKind {
    265   NATIVE_PATCH_FRAME_SIZE,
    266   NATIVE_PATCH_MAX_OUTGOING,
    267   NATIVE_PATCH_ARCH = 0x1000,
    268 } NativePatchKind;
    269 
    270 typedef struct NativePatch {
    271   u32 kind; /* NativePatchKind or arch-private */
    272   u32 section_id;
    273   u32 offset;
    274   u32 width;
    275   i64 addend;
    276   u64 value;
    277 } NativePatch;
    278 
    279 typedef struct NativeFramePatchState {
    280   u32 max_outgoing;
    281   u32 max_align;
    282 } NativeFramePatchState;
    283 
    284 #define NATIVE_CALL_PLAN_CLASSES 3u
    285 
    286 /* A semantic machine operation, enough for the target to report the physical
    287  * registers its encoding clobbers as a side effect (e.g. x86 idiv writes
    288  * rax/rdx, variable shifts use cl). Built by the optimizer from an instruction;
    289  * the descriptor keeps the backend from depending on the optimizer IR. */
    290 typedef enum NativeMachineOpKind {
    291   NATIVE_MOP_BINOP,
    292   NATIVE_MOP_VA_START,
    293   NATIVE_MOP_VA_ARG,
    294   NATIVE_MOP_ATOMIC_CAS,
    295   NATIVE_MOP_ATOMIC_RMW,
    296   NATIVE_MOP_INTRINSIC,
    297   /* A thread-local address materialization (IR_TLS_ADDR_OF). On targets whose
    298    * TLS access model uses fixed scratch/result registers or a resolver-thunk
    299    * call (e.g. Mach-O TLV descriptors → x0/x16/x17/lr), the encoding clobbers
    300    * those regs even though the IR op only declares its destination. Targets
    301    * whose TLS sequence touches only the destination register (ELF Local-Exec)
    302    * report no clobbers. */
    303   NATIVE_MOP_TLS_ADDR,
    304 } NativeMachineOpKind;
    305 
    306 typedef struct NativeMachineOp {
    307   u8 kind;          /* NativeMachineOpKind */
    308   u8 binop;         /* BinOp, when kind == NATIVE_MOP_BINOP */
    309   u8 intrin;        /* IntrinKind, when kind == NATIVE_MOP_INTRINSIC */
    310   u8 second_is_reg; /* binop's second operand is a register (not an immediate)
    311                      */
    312   u8 result_is_fp;  /* result lands in an FP register (e.g. va_arg of a double)
    313                      */
    314 } NativeMachineOp;
    315 
    316 typedef struct NativeCallDesc {
    317   KitCgTypeId fn_type;
    318   NativeLoc callee;
    319   const NativeLoc* args;
    320   const NativeLoc* results;
    321   u32 nargs;
    322   u32 nresults;
    323   u16 flags;      /* CGCallFlag */
    324   u8 tail_policy; /* KitCgTailPolicy */
    325   u8 pad;
    326   KitCgInlinePolicy inline_policy;
    327 } NativeCallDesc;
    328 
    329 typedef enum NativeCallPlanMoveKind {
    330   NATIVE_CALL_MOVE_NONE,
    331   NATIVE_CALL_MOVE_VALUE,
    332   NATIVE_CALL_MOVE_ADDR,
    333 } NativeCallPlanMoveKind;
    334 
    335 typedef struct NativeCallPlanMove {
    336   NativeLoc src;
    337   NativeLoc dst;
    338   MemAccess mem;
    339   u8 src_kind; /* NativeCallPlanMoveKind */
    340   u8 dst_kind; /* NativeLocKind */
    341   u8 pad[2];
    342 } NativeCallPlanMove;
    343 
    344 typedef struct NativeCallPlanRet {
    345   NativeLoc src;
    346   NativeLoc dst;
    347   MemAccess mem;
    348 } NativeCallPlanRet;
    349 
    350 typedef struct NativeCallPlan {
    351   NativeLoc callee;
    352   NativeCallPlanMove* args;
    353   NativeCallPlanRet* rets;
    354   u32 nargs;
    355   u32 nrets;
    356   u32 stack_arg_size;
    357   u32 clobber_mask[NATIVE_CALL_PLAN_CLASSES];
    358   u32 return_mask[NATIVE_CALL_PLAN_CLASSES];
    359   u16 flags; /* CGCallFlag */
    360   u8 has_sret;
    361   u8 is_variadic;
    362 } NativeCallPlan;
    363 
    364 typedef struct NativeTarget NativeTarget;
    365 struct NativeTarget {
    366   Compiler* c;
    367   ObjBuilder* obj;
    368   MCEmitter* mc;
    369   const NativeRegInfo* regs;
    370 
    371   NativeAllocClass (*class_for_type)(NativeTarget*, KitCgTypeId);
    372   int (*imm_legal)(NativeTarget*, NativeImmUse, u32 op, KitCgTypeId, i64);
    373   int (*addr_legal)(NativeTarget*, const NativeAddr*, MemAccess);
    374   /* Optional. Report the physical registers the target's encoding of `op`
    375    * clobbers as a side effect (not its declared operands/results), one bitmask
    376    * per NativeAllocClass. The optimizer keeps values live ACROSS the
    377    * instruction out of these registers, so the backend may use them freely (x86
    378    * idiv writes rax/rdx; a variable shift uses cl; atomics use rax/rcx/rdx).
    379    * Return non-zero if any register is clobbered, 0 otherwise (the common,
    380    * unconstrained case). NULL means no instruction clobbers fixed registers
    381    * (aa64/rv64). */
    382   int (*machine_op_clobbers)(NativeTarget*, const NativeMachineOp* op,
    383                              u32 clobber_mask[NATIVE_CALL_PLAN_CLASSES]);
    384 
    385   void (*func_begin)(NativeTarget*, const CGFuncDesc*);
    386   void (*func_begin_known_frame)(NativeTarget*, const CGFuncDesc*,
    387                                  const NativeKnownFrameDesc*,
    388                                  NativeFrameSlot* out_slots);
    389   void (*note_frame_state)(NativeTarget*, const NativeFramePatchState*);
    390   /* Optional. Called once after func_begin and before frame-slot mapping, with
    391    * the set of callee-saved hard registers the allocator assigned (one bitmask
    392    * per NativeAllocClass, indexed by class id). The target reserves save slots
    393    * and emits the prologue save / epilogue restore for each. Register
    394    * allocation is complete before emission, so the caller knows the full set
    395    * up front. */
    396   void (*reserve_callee_saves)(NativeTarget*, const u32* used_by_class,
    397                                u32 nclasses);
    398   /* Optional live-ABI caller/callee-saved register masks for a class. Static
    399    * NativeAllocClassInfo masks describe the target register file, but some
    400    * targets vary preservation rules by OS ABI (x64 SysV vs Win64 XMM regs).
    401    * The optimizer and direct emission use these to keep allocation, call
    402    * clobbers, and prologue save sets aligned with the selected ABI. NULL falls
    403    * back to NativeAllocClassInfo.{caller,callee}_saved_mask. */
    404   u32 (*caller_saved_mask)(NativeTarget*, NativeAllocClass);
    405   u32 (*callee_saved_mask)(NativeTarget*, NativeAllocClass);
    406   /* Optional. When set, the optimizer emit path calls this once — after
    407    * func_begin, reserve_callee_saves, and frame-slot mapping, but before the
    408    * body — to emit a minimal, exact-size prologue in place (no reserved NOP
    409    * region). Frame-size immediates are still patched in func_end, since the
    410    * final frame size isn't known until body emission allocates its temporaries.
    411    * Backends that leave this NULL fall back to the single-pass
    412    * reserve-and-patch prologue used by NativeDirectTarget. Gated by
    413    * `emit_minimal_prologue`, which the optimizer emit path sets before
    414    * func_begin so func_begin can skip the reserved region. */
    415   void (*emit_prologue)(NativeTarget*);
    416   u8 emit_minimal_prologue;
    417   /* Bytes of stack-passed arguments the fixed parameters of this function
    418    * signature use (the part beyond the register arg pools). Sets *variadic to
    419    * whether the signature is variadic and *nparams to the fixed parameter
    420    * count. Used to decide tail-call (sibling) realizability: the callee's
    421    * outgoing stack args must fit the area the caller itself received. Either
    422    * out-pointer may be NULL. May itself be NULL. */
    423   u32 (*signature_stack_bytes)(NativeTarget*, KitCgTypeId fn_type,
    424                                int* variadic, u32* nparams);
    425   /* Pure query: the outgoing stack-argument bytes a call with this descriptor
    426    * uses, rounded to the ABI's outgoing-area alignment. Reads only fn_type,
    427    * flags, nargs, and each args[i].type — never argument *locations* — so the
    428    * optimizer can call it in a frame-planning pre-pass, before any argument
    429    * marshalling is emitted, to size the outgoing area. Must equal the
    430    * stack_arg_size plan_call computes for the same descriptor. May be NULL. */
    431   u32 (*call_stack_bytes)(NativeTarget*, const NativeCallDesc*);
    432   /* Integer hardware zero register, if the ISA has one (aa64 wzr/xzr, rv64
    433    * x0). When `has_store_zero_reg` is set, the emit path stores a constant 0
    434    * straight from `store_zero_reg` instead of materializing 0 into a scratch
    435    * with a mov/movz first. */
    436   u8 has_store_zero_reg;
    437   Reg store_zero_reg;
    438   void (*func_end)(NativeTarget*);
    439 
    440   NativeFrameSlot (*frame_slot)(NativeTarget*, const NativeFrameSlotDesc*);
    441   /* Optional post-finalization query for a native frame slot's debug location.
    442    * Each arch owns the frame layout math and returns the coordinate system its
    443    * debugger/unwinder path can materialize. */
    444   int (*frame_slot_debug_loc)(NativeTarget*, NativeFrameSlot, CGDebugLoc*);
    445   /* Place the incoming parameter into `dst`. The caller (which has run register
    446    * allocation) chooses the destination: a hard register (NATIVE_LOC_REG) for a
    447    * register-allocated scalar param, a frame slot (NATIVE_LOC_FRAME) for an
    448    * address-taken / spilled / aggregate param. NATIVE_LOC_NONE means the param
    449    * is unused and only the ABI register/stack cursor must advance. Incoming arg
    450    * registers are never allocable, so reg destinations never alias an incoming
    451    * arg register and ordering across params is unconstrained. */
    452   void (*bind_param)(NativeTarget*, const CGParamDesc*, NativeLoc dst);
    453   /* Optional. Called once by the optimizer emit path after the last bind_param,
    454    * before the body. Lets a backend that defers register-destination param
    455    * binds (to resolve them as a parallel copy, since the allocator may rotate
    456    * params across the incoming arg registers — a permutation the naive
    457    * per-param move order cannot realize) flush them now. Backends that bind
    458    * eagerly leave this NULL. */
    459   void (*bind_params_end)(NativeTarget*);
    460 
    461   MCLabel (*label_new)(NativeTarget*);
    462   void (*label_place)(NativeTarget*, MCLabel);
    463   void (*jump)(NativeTarget*, MCLabel);
    464   void (*cmp_branch)(NativeTarget*, CmpOp, NativeLoc a, NativeLoc b,
    465                      MCLabel target);
    466   void (*indirect_branch)(NativeTarget*, NativeLoc addr,
    467                           const MCLabel* valid_targets, u32 ntargets);
    468   void (*load_label_addr)(NativeTarget*, NativeLoc dst, MCLabel target);
    469 
    470   void (*emit)(NativeTarget*, const NativeInst*);
    471   /* All instruction-emission hooks require caller-selected legal physical
    472    * operands. In particular, dst values are NATIVE_LOC_REG, arithmetic sources
    473    * are NATIVE_LOC_REG or target-legal immediates, and memory base/index
    474    * registers in NativeAddr must already be materialized. NativeTarget may
    475    * validate and assert, but it must not allocate registers. */
    476   void (*move)(NativeTarget*, NativeLoc dst_reg, NativeLoc src_reg);
    477   void (*load_imm)(NativeTarget*, NativeLoc dst_reg, i64 imm);
    478   void (*load_const)(NativeTarget*, NativeLoc dst_reg, ConstBytes);
    479   void (*load_addr)(NativeTarget*, NativeLoc dst_reg, NativeAddr addr);
    480   void (*load)(NativeTarget*, NativeLoc dst_reg, NativeAddr addr, MemAccess);
    481   void (*store)(NativeTarget*, NativeAddr addr, NativeLoc src_reg, MemAccess);
    482   void (*tls_addr_of)(NativeTarget*, NativeLoc dst_reg, ObjSymId sym,
    483                       i64 addend);
    484   void (*copy_bytes)(NativeTarget*, NativeAddr dst, NativeAddr src,
    485                      AggregateAccess);
    486   void (*set_bytes)(NativeTarget*, NativeAddr dst, NativeLoc byte_value,
    487                     AggregateAccess);
    488   void (*bitfield_load)(NativeTarget*, NativeLoc dst_reg,
    489                         NativeAddr record_addr, BitFieldAccess);
    490   void (*bitfield_store)(NativeTarget*, NativeAddr record_addr,
    491                          NativeLoc src_reg, BitFieldAccess);
    492   void (*binop)(NativeTarget*, BinOp, NativeLoc dst_reg, NativeLoc a_reg,
    493                 NativeLoc b_reg_or_imm);
    494   void (*unop)(NativeTarget*, UnOp, NativeLoc dst_reg, NativeLoc src_reg);
    495   void (*cmp)(NativeTarget*, CmpOp, NativeLoc dst_reg, NativeLoc a_reg,
    496               NativeLoc b_reg_or_imm);
    497   void (*convert)(NativeTarget*, ConvKind, NativeLoc dst_reg,
    498                   NativeLoc src_reg);
    499   void (*alloca_)(NativeTarget*, NativeLoc dst_reg, NativeLoc size_reg,
    500                   u32 align);
    501 
    502   void (*spill)(NativeTarget*, NativeLoc src_reg, NativeFrameSlot, MemAccess);
    503   void (*reload)(NativeTarget*, NativeLoc dst_reg, NativeFrameSlot, MemAccess);
    504 
    505   void (*plan_call)(NativeTarget*, const NativeCallDesc*, NativeCallPlan*);
    506   void (*emit_call)(NativeTarget*, const NativeCallPlan*);
    507   /* `value` is the single returned local's location, or NULL for a void
    508    * return. out_rets/out_nrets describe the ABI parts of that one value. */
    509   void (*plan_ret)(NativeTarget*, const CGFuncDesc*, const NativeLoc* value,
    510                    NativeCallPlanRet** out_rets, u32* out_nrets);
    511   void (*ret)(NativeTarget*);
    512 
    513   void (*atomic_load)(NativeTarget*, NativeLoc dst, NativeAddr addr, MemAccess,
    514                       KitCgMemOrder);
    515   void (*atomic_store)(NativeTarget*, NativeAddr addr, NativeLoc src, MemAccess,
    516                        KitCgMemOrder);
    517   void (*atomic_rmw)(NativeTarget*, KitCgAtomicOp, NativeLoc dst,
    518                      NativeAddr addr, NativeLoc val, MemAccess, KitCgMemOrder);
    519   void (*atomic_cas)(NativeTarget*, NativeLoc prior, NativeLoc ok,
    520                      NativeAddr addr, NativeLoc expected, NativeLoc desired,
    521                      MemAccess, KitCgMemOrder success, KitCgMemOrder failure);
    522   void (*fence)(NativeTarget*, KitCgMemOrder);
    523   /* Variadic support. The optimizer passes the va_list pointer opaquely as a
    524    * NativeLoc (a register or memory location holding the address of the
    525    * va_list object); va_arg additionally receives the argument type and a
    526    * destination location for the fetched value. All va_list layout knowledge
    527    * (pointer ABI vs register-save-area ABI, field offsets, sizes) lives behind
    528    * these hooks, which query the target ABI -- the optimizer makes no layout
    529    * assumptions. */
    530   void (*va_start_)(NativeTarget*, NativeLoc ap_ptr);
    531   void (*va_arg_)(NativeTarget*, NativeLoc dst, NativeLoc ap_ptr,
    532                   KitCgTypeId type);
    533   void (*va_end_)(NativeTarget*, NativeLoc ap_ptr);
    534   void (*va_copy_)(NativeTarget*, NativeLoc dst_ap_ptr, NativeLoc src_ap_ptr);
    535   void (*intrinsic)(NativeTarget*, IntrinKind, const NativeLoc* dsts, u32 ndst,
    536                     const NativeLoc* args, u32 narg);
    537   void (*asm_block)(NativeTarget*, const char* tmpl, const AsmConstraint* outs,
    538                     u32 nout, NativeLoc* out_locs, const AsmConstraint* ins,
    539                     u32 nin, const NativeLoc* in_locs, const Sym* clobbers,
    540                     u32 nclob);
    541   void (*file_scope_asm)(NativeTarget*, const char* src, size_t len);
    542   void (*patch_add)(NativeTarget*, const NativePatch*);
    543   void (*patch_apply)(NativeTarget*);
    544   void (*trap)(NativeTarget*);
    545   void (*set_loc)(NativeTarget*, SrcLoc);
    546   void (*finalize)(NativeTarget*);
    547   void (*destroy)(NativeTarget*);
    548 };
    549 
    550 static inline const NativeAllocClassInfo*
    551 native_target_class_info(const NativeTarget* t, NativeAllocClass cls) {
    552   if (!t || !t->regs) return NULL;
    553   for (u32 i = 0; i < t->regs->nclasses; ++i) {
    554     const NativeAllocClassInfo* ci = &t->regs->classes[i];
    555     if ((NativeAllocClass)ci->cls == cls) return ci;
    556   }
    557   return NULL;
    558 }
    559 
    560 static inline u32 native_target_caller_saved_mask(NativeTarget* t,
    561                                                   NativeAllocClass cls) {
    562   const NativeAllocClassInfo* ci;
    563   if (t && t->caller_saved_mask) return t->caller_saved_mask(t, cls);
    564   ci = native_target_class_info(t, cls);
    565   return ci ? ci->caller_saved_mask : 0u;
    566 }
    567 
    568 static inline u32 native_target_callee_saved_mask(NativeTarget* t,
    569                                                   NativeAllocClass cls) {
    570   const NativeAllocClassInfo* ci;
    571   if (t && t->callee_saved_mask) return t->callee_saved_mask(t, cls);
    572   ci = native_target_class_info(t, cls);
    573   return ci ? ci->callee_saved_mask : 0u;
    574 }
    575 
    576 static inline NativeLoc native_loc_none(void) {
    577   NativeLoc loc;
    578   memset(&loc, 0, sizeof loc);
    579   loc.kind = NATIVE_LOC_NONE;
    580   return loc;
    581 }
    582 
    583 /* Target-neutral location constructors and scalar queries. These are
    584  * byte-identical across the native backends, so they live here as the single
    585  * source of truth. (loc_reg's register mask differs per arch and stays
    586  * per-backend.) */
    587 static inline NativeLoc native_loc_reg(KitCgTypeId type, NativeAllocClass cls,
    588                                        Reg reg) {
    589   NativeLoc loc;
    590   memset(&loc, 0, sizeof loc);
    591   loc.kind = NATIVE_LOC_REG;
    592   loc.cls = (u8)cls;
    593   loc.type = type;
    594   loc.v.reg = reg;
    595   return loc;
    596 }
    597 
    598 static inline NativeLoc native_loc_stack(KitCgTypeId type, NativeFrameSlot slot,
    599                                          i32 offset) {
    600   NativeLoc loc;
    601   memset(&loc, 0, sizeof loc);
    602   loc.kind = NATIVE_LOC_STACK;
    603   loc.cls = NATIVE_REG_INT;
    604   loc.type = type;
    605   loc.v.stack.slot = slot;
    606   loc.v.stack.offset = offset;
    607   return loc;
    608 }
    609 
    610 static inline int native_loc_is_fp(NativeLoc loc) {
    611   return (NativeAllocClass)loc.cls == NATIVE_REG_FP;
    612 }
    613 
    614 /* Scalar size/align, clamped to a usable register-sized default. Shared by the
    615  * backends whose scalars are at most pointer-width (x64, rv64); aa64 keeps its
    616  * own size query because it asserts on over-wide scalars. */
    617 static inline u32 native_type_size(NativeTarget* t, KitCgTypeId type) {
    618   u64 n = type ? cg_type_size(t->c, type) : 8u;
    619   if (n == 0) n = 8u;
    620   return (u32)n;
    621 }
    622 
    623 static inline u32 native_type_align(NativeTarget* t, KitCgTypeId type) {
    624   u64 n = type ? cg_type_align(t->c, type) : 8u;
    625   if (n == 0) n = 1u;
    626   if (n > 16u) n = 16u;
    627   return (u32)n;
    628 }
    629 
    630 static inline MemAccess native_mem_for_type(NativeTarget* t, KitCgTypeId type,
    631                                             u32 size) {
    632   MemAccess m;
    633   memset(&m, 0, sizeof m);
    634   m.type = type;
    635   m.size = size ? size : native_type_size(t, type);
    636   m.align = native_type_align(t, type);
    637   return m;
    638 }
    639 
    640 /* FP register class for a scalar type: a float value lives in an FP register
    641  * only when the hardware float ABI has a register that wide. flen comes from
    642  * the target float ABI (SINGLE->4, DOUBLE->8, SOFT->0); the DEFAULT/unset
    643  * sentinel maps to the pointer width, which preserves the historical "FP iff
    644  * float and <= 8 bytes" behavior for lp64d / x86-64 and yields the correct
    645  * rv32 soft-double result (double is 8 bytes > flen=4 on ilp32f, > 0 on ilp32,
    646  * so it is INT-class and never bit-cast through an FP register via fmv.d.x).
    647  * aa64 keeps its own (same predicate, distinct mem helper). */
    648 static inline NativeAllocClass native_class_for_type_fp_le8(NativeTarget* t,
    649                                                             KitCgTypeId type) {
    650   u32 flen;
    651   switch (t->c->target.float_abi) {
    652     case KIT_FLOAT_ABI_SINGLE: flen = 4u; break;
    653     case KIT_FLOAT_ABI_DOUBLE: flen = 8u; break;
    654     case KIT_FLOAT_ABI_SOFT: flen = 0u; break;
    655     default: flen = t->c->target.ptr_size; break; /* DEFAULT: historical */
    656   }
    657   if (type && flen && cg_type_is_float(t->c, type) &&
    658       cg_type_size(t->c, type) <= flen)
    659     return NATIVE_REG_FP;
    660   return NATIVE_REG_INT;
    661 }
    662 
    663 #endif