kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

native_direct_target.c (74862B)


      1 #include "cg/native_direct_target.h"
      2 
      3 /* NativeDirectTarget is intentionally single-pass: semantic CG calls are
      4  * lowered immediately to NativeTarget operations, MCEmitter owns label fixups,
      5  * and function-end calls note_frame_state()/patch_apply() let the native
      6  * backend patch deferred frame/prologue details after max outgoing space is
      7  * known. Direct lowering currently forwards final frame state but does not
      8  * author generic NativePatch records itself.
      9  *
     10  * Remaining direct/backend cutover work: stack arguments, tail/musttail,
     11  * varargs, typed inline-asm register/memory bindings and outputs,
     12  * label-address data and computed gotos, records/sret/large aggregates, FP and
     13  * rounding conversions, fuller scalar intrinsics, and production-grade atomic
     14  * RMW/CAS lowering. */
     15 
     16 #include <string.h>
     17 
     18 #include "abi/abi.h"
     19 #include "cg/native_asm.h"
     20 #include "cg/type.h"
     21 #include "core/arena.h"
     22 #include "core/pool.h"
     23 #include "core/slice.h"
     24 
     25 #define NATIVE_DIRECT_MAGIC 0x4e445447u
     26 
     27 static NativeDirectTarget* nd_of(CgTarget* t) { return (NativeDirectTarget*)t; }
     28 
     29 static _Noreturn void nd_panic(NativeDirectTarget* d, const char* what) {
     30   compiler_panic(d->base.c, d->loc, "native direct target: %s", what);
     31 }
     32 
     33 static void* nd_arena(NativeDirectTarget* d, size_t size, size_t align) {
     34   void* p = arena_zalloc(d->base.c->tu, size, align);
     35   if (!p) nd_panic(d, "out of memory");
     36   return p;
     37 }
     38 
     39 /* Pick a transient NativeLoc array for one op: the on-struct buffer when N
     40  * fits, else a one-shot arena allocation. N == 0 yields NULL. */
     41 static NativeLoc* nd_loc_buf(NativeDirectTarget* d, NativeLoc* buf, u32 cap,
     42                              u32 n) {
     43   if (!n) return NULL;
     44   if (n <= cap) return buf;
     45   return nd_arena(d, sizeof(NativeLoc) * n, _Alignof(NativeLoc));
     46 }
     47 
     48 static void nd_grow_locals(NativeDirectTarget* d, u32 want) {
     49   NativeDirectLocal* next;
     50   u32 cap;
     51   if (d->locals_cap >= want) return;
     52   cap = d->locals_cap ? d->locals_cap : 32u;
     53   while (cap < want) cap *= 2u;
     54   next = nd_arena(d, sizeof(*next) * cap, _Alignof(NativeDirectLocal));
     55   if (d->locals) memcpy(next, d->locals, sizeof(*next) * d->nlocals);
     56   d->locals = next;
     57   d->locals_cap = cap;
     58 }
     59 
     60 static void nd_grow_labels(NativeDirectTarget* d, u32 want) {
     61   MCLabel* next;
     62   u32 cap;
     63   if (d->labels_cap >= want) return;
     64   cap = d->labels_cap ? d->labels_cap : 32u;
     65   while (cap < want) cap *= 2u;
     66   next = nd_arena(d, sizeof(*next) * cap, _Alignof(MCLabel));
     67   if (d->labels) memcpy(next, d->labels, sizeof(*next) * d->labels_cap);
     68   d->labels = next;
     69   d->labels_cap = cap;
     70 }
     71 
     72 static void nd_grow_scopes(NativeDirectTarget* d, u32 want) {
     73   NativeDirectScope* next;
     74   u32 cap;
     75   if (d->scopes_cap >= want) return;
     76   cap = d->scopes_cap ? d->scopes_cap : 16u;
     77   while (cap < want) cap *= 2u;
     78   next = nd_arena(d, sizeof(*next) * cap, _Alignof(NativeDirectScope));
     79   if (d->scopes) memcpy(next, d->scopes, sizeof(*next) * d->nscopes);
     80   d->scopes = next;
     81   d->scopes_cap = cap;
     82 }
     83 
     84 static NativeDirectLocal* nd_local(NativeDirectTarget* d, CGLocal local) {
     85   if (local == CG_LOCAL_NONE || local > d->nlocals)
     86     nd_panic(d, "bad semantic local");
     87   return &d->locals[local - 1u];
     88 }
     89 
     90 static NativeAllocClass nd_class_for_type(NativeDirectTarget* d,
     91                                           KitCgTypeId type) {
     92   if (d->native && d->native->class_for_type)
     93     return d->native->class_for_type(d->native, type);
     94   return NATIVE_REG_INT;
     95 }
     96 
     97 static const NativeAllocClassInfo* nd_class_info(NativeDirectTarget* d,
     98                                                  NativeAllocClass cls) {
     99   const NativeAllocClassInfo* ci = (u32)cls < 3u ? d->class_info[cls] : NULL;
    100   if (!ci) nd_panic(d, "target has no requested register class");
    101   return ci;
    102 }
    103 
    104 /* Register-location constructor is shared as native_loc_reg in
    105  * native_target.h (arg order: type, cls, reg). */
    106 
    107 static void nd_flush_local(NativeDirectTarget* d, CGLocal local);
    108 static Reg nd_cache_reg_for(NativeDirectTarget* d, CGLocal local,
    109                             KitCgTypeId access_type);
    110 static Reg nd_pick_cache_victim(NativeDirectTarget* d, NativeAllocClass cls);
    111 
    112 static u32 nd_callee_saved_mask(NativeDirectTarget* d, NativeAllocClass cls) {
    113   return native_target_callee_saved_mask(d->native, cls);
    114 }
    115 
    116 static u32 nd_caller_saved_mask(NativeDirectTarget* d, NativeAllocClass cls) {
    117   return native_target_caller_saved_mask(d->native, cls);
    118 }
    119 
    120 static void nd_note_reg_used(NativeDirectTarget* d, NativeAllocClass cls,
    121                              Reg reg) {
    122   if ((u32)cls >= 3u || reg >= 32u) return;
    123   if (nd_callee_saved_mask(d, cls) & (1u << reg))
    124     d->callee_saved_used[cls] |= 1u << reg;
    125 }
    126 
    127 static Reg nd_scratch_acquire(NativeDirectTarget* d, NativeAllocClass cls) {
    128   const NativeAllocClassInfo* ci = nd_class_info(d, cls);
    129   const Reg* regs = ci->scratch;
    130   u32 nregs = ci->nscratch;
    131   /* Prefer a register that is neither pinned (scratch_used) nor caching a live
    132    * local (reg_owner). */
    133   for (u32 pass = 0; pass < 2u; ++pass) {
    134     for (u32 i = 0; i < nregs; ++i) {
    135       Reg r = regs[i];
    136       if (r >= 32u) continue;
    137       if ((d->scratch_used[cls] & (1u << r)) == 0 &&
    138           d->reg_owner[cls][r] == CG_LOCAL_NONE) {
    139         d->scratch_used[cls] |= 1u << r;
    140         nd_note_reg_used(d, cls, r);
    141         return r;
    142       }
    143     }
    144     regs = ci->allocable;
    145     nregs = ci->nallocable;
    146   }
    147   /* Under pressure, evict the LRU non-pinned cached local (spilling it to its
    148    * home) and reuse its register as a scratch temporary. */
    149   {
    150     Reg r = nd_pick_cache_victim(d, cls);
    151     if (r != REG_NONE) {
    152       nd_flush_local(d, d->reg_owner[cls][r]);
    153       d->scratch_used[cls] |= 1u << r;
    154       nd_note_reg_used(d, cls, r);
    155       return r;
    156     }
    157   }
    158   nd_panic(d, "out of scratch registers");
    159 }
    160 
    161 static void nd_scratch_release(NativeDirectTarget* d, NativeAllocClass cls,
    162                                Reg reg) {
    163   if (reg < 32u) d->scratch_used[cls] &= ~(1u << reg);
    164 }
    165 
    166 static NativeFrameSlot nd_alloc_frame_slot(NativeDirectTarget* d,
    167                                            const NativeFrameSlotDesc* desc) {
    168   NativeFrameSlot slot = NATIVE_FRAME_SLOT_NONE;
    169   if (d->native && d->native->frame_slot)
    170     slot = d->native->frame_slot(d->native, desc);
    171   else
    172     nd_panic(d, "target does not allocate frame slots");
    173   if (slot == NATIVE_FRAME_SLOT_NONE)
    174     nd_panic(d, "frame slot allocation failed");
    175   return slot;
    176 }
    177 
    178 static NativeFrameSlotDesc nd_slot_desc_local(const CGLocalDesc* in) {
    179   NativeFrameSlotDesc out;
    180   memset(&out, 0, sizeof out);
    181   out.type = in->type;
    182   out.name = in->name;
    183   out.loc = in->loc;
    184   out.size = in->size;
    185   out.align = in->align;
    186   out.kind = NATIVE_FRAME_SLOT_LOCAL;
    187   if (in->flags & CG_LOCAL_ADDR_TAKEN)
    188     out.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN;
    189   if (in->flags & CG_LOCAL_MEMORY_REQUIRED)
    190     out.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED;
    191   return out;
    192 }
    193 
    194 static NativeFrameSlotDesc nd_slot_desc_param(const CGParamDesc* in) {
    195   NativeFrameSlotDesc out;
    196   memset(&out, 0, sizeof out);
    197   out.type = in->type;
    198   out.name = in->name;
    199   out.loc = in->loc;
    200   out.size = in->size;
    201   out.align = in->align;
    202   out.kind = NATIVE_FRAME_SLOT_PARAM;
    203   if (in->flags & CG_LOCAL_ADDR_TAKEN)
    204     out.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN;
    205   if (in->flags & CG_LOCAL_MEMORY_REQUIRED)
    206     out.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED;
    207   return out;
    208 }
    209 
    210 static CGLocal nd_alloc_local(NativeDirectTarget* d, const CGLocalDesc* desc) {
    211   NativeDirectLocal* l;
    212   NativeFrameSlotDesc fsd;
    213   CGLocal id;
    214   nd_grow_locals(d, d->nlocals + 1u);
    215   id = d->nlocals + 1u;
    216   l = &d->locals[d->nlocals++];
    217   memset(l, 0, sizeof *l);
    218   l->type = desc->type;
    219   l->size = desc->size;
    220   l->align = desc->align;
    221   l->flags = desc->flags;
    222   l->reg = REG_NONE;
    223   l->address_taken = (desc->flags & CG_LOCAL_ADDR_TAKEN) != 0;
    224   l->memory_required = (desc->flags & CG_LOCAL_MEMORY_REQUIRED) != 0;
    225   l->cls = (u8)nd_class_for_type(d, desc->type);
    226   fsd = nd_slot_desc_local(desc);
    227   l->home = nd_alloc_frame_slot(d, &fsd);
    228   return id;
    229 }
    230 
    231 static MCLabel nd_mc_label(NativeDirectTarget* d, Label label) {
    232   if (label == LABEL_NONE || label > d->nlabels || !d->labels[label])
    233     nd_panic(d, "bad label");
    234   return d->labels[label];
    235 }
    236 
    237 static Label nd_label_new_raw(NativeDirectTarget* d) {
    238   Label id;
    239   if (!d->native || !d->native->label_new)
    240     nd_panic(d, "target does not allocate labels");
    241   id = d->nlabels + 1u;
    242   nd_grow_labels(d, id + 1u);
    243   d->labels[id] = d->native->label_new(d->native);
    244   d->nlabels = id;
    245   return id;
    246 }
    247 
    248 static NativeLoc nd_loc_frame(NativeDirectTarget* d, CGLocal local,
    249                               KitCgTypeId type) {
    250   NativeDirectLocal* l = nd_local(d, local);
    251   NativeLoc out;
    252   memset(&out, 0, sizeof out);
    253   out.kind = NATIVE_LOC_FRAME;
    254   out.cls = l->cls;
    255   out.type = type ? type : l->type;
    256   out.v.frame = l->home;
    257   return out;
    258 }
    259 
    260 static NativeLoc nd_loc_imm(i64 imm, KitCgTypeId type) {
    261   NativeLoc out;
    262   memset(&out, 0, sizeof out);
    263   out.kind = NATIVE_LOC_IMM;
    264   out.type = type;
    265   out.v.imm = imm;
    266   return out;
    267 }
    268 
    269 static NativeLoc nd_loc_global(ObjSymId sym, i64 addend, KitCgTypeId type) {
    270   NativeLoc out;
    271   memset(&out, 0, sizeof out);
    272   out.kind = NATIVE_LOC_GLOBAL;
    273   out.type = type;
    274   out.v.global.sym = sym;
    275   out.v.global.addend = addend;
    276   return out;
    277 }
    278 
    279 static NativeLoc nd_loc_operand(NativeDirectTarget* d, Operand op) {
    280   switch ((OpKind)op.kind) {
    281     case OPK_IMM:
    282       return nd_loc_imm(op.v.imm, op.type);
    283     case OPK_LOCAL:
    284       return nd_loc_frame(d, op.v.local, op.type);
    285     case OPK_GLOBAL:
    286       return nd_loc_global(op.v.global.sym, op.v.global.addend, op.type);
    287     case OPK_INDIRECT: {
    288       NativeDirectLocal* bl = nd_local(d, op.v.ind.base);
    289       NativeLoc out;
    290       memset(&out, 0, sizeof out);
    291       out.kind = NATIVE_LOC_ADDR;
    292       out.type = op.type;
    293       out.v.addr.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE;
    294       out.v.addr.base.frame = bl->home;
    295       out.v.addr.cls = bl->cls;
    296       out.v.addr.base_type = bl->type;
    297       if (op.v.ind.index != CG_LOCAL_NONE) {
    298         NativeDirectLocal* il = nd_local(d, op.v.ind.index);
    299         out.v.addr.index_kind = NATIVE_ADDR_INDEX_FRAME_VALUE;
    300         out.v.addr.index.frame = il->home;
    301         out.v.addr.index_cls = il->cls;
    302         out.v.addr.index_type = il->type;
    303       }
    304       out.v.addr.log2_scale = op.v.ind.log2_scale;
    305       out.v.addr.offset = op.v.ind.ofs;
    306       return out;
    307     }
    308     default:
    309       nd_panic(d, "bad operand kind");
    310   }
    311 }
    312 
    313 static NativeAddr nd_addr_storage(NativeDirectTarget* d, Operand op) {
    314   NativeAddr out;
    315   memset(&out, 0, sizeof out);
    316   switch ((OpKind)op.kind) {
    317     case OPK_LOCAL: {
    318       /* The local's home is addressed directly (a memory access reads/writes
    319        * the frame slot itself, e.g. by-value aggregate field extraction). This
    320        * is not pointer aliasing, but it does read the home, so a cached value
    321        * must be made current: spill if dirty and drop the entry. */
    322       NativeDirectLocal* l;
    323       nd_flush_local(d, op.v.local);
    324       l = nd_local(d, op.v.local);
    325       out.base_kind = NATIVE_ADDR_BASE_FRAME;
    326       out.base.frame = l->home;
    327       out.cls = l->cls;
    328       out.base_type = l->type;
    329       return out;
    330     }
    331     case OPK_GLOBAL:
    332       out.base_kind = NATIVE_ADDR_BASE_GLOBAL;
    333       out.base.global.sym = op.v.global.sym;
    334       out.base.global.addend = op.v.global.addend;
    335       out.base_type = op.type;
    336       return out;
    337     case OPK_INDIRECT: {
    338       NativeDirectLocal* bl = nd_local(d, op.v.ind.base);
    339       Reg br = nd_cache_reg_for(d, op.v.ind.base, bl->type);
    340       out.cls = bl->cls;
    341       out.base_type = bl->type;
    342       if (br != REG_NONE) {
    343         out.base_kind = NATIVE_ADDR_BASE_REG;
    344         out.base.reg = br;
    345         d->scratch_used[bl->cls] |= 1u
    346                                     << br; /* pin; unpinned at temps release */
    347       } else {
    348         out.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE;
    349         out.base.frame = bl->home;
    350       }
    351       if (op.v.ind.index != CG_LOCAL_NONE) {
    352         NativeDirectLocal* il = nd_local(d, op.v.ind.index);
    353         Reg ir = nd_cache_reg_for(d, op.v.ind.index, il->type);
    354         out.index_cls = il->cls;
    355         out.index_type = il->type;
    356         if (ir != REG_NONE) {
    357           out.index_kind = NATIVE_ADDR_INDEX_REG;
    358           out.index.reg = ir;
    359           d->scratch_used[il->cls] |= 1u << ir;
    360         } else {
    361           out.index_kind = NATIVE_ADDR_INDEX_FRAME_VALUE;
    362           out.index.frame = il->home;
    363         }
    364       }
    365       out.log2_scale = op.v.ind.log2_scale;
    366       out.offset = op.v.ind.ofs;
    367       return out;
    368     }
    369     default:
    370       nd_panic(d, "operand is not addressable storage");
    371   }
    372 }
    373 
    374 static NativeAddr nd_addr_pointer(NativeDirectTarget* d, Operand op) {
    375   NativeAddr out;
    376   memset(&out, 0, sizeof out);
    377   switch ((OpKind)op.kind) {
    378     case OPK_LOCAL: {
    379       NativeDirectLocal* l = nd_local(d, op.v.local);
    380       out.cls = l->cls;
    381       out.base_type = l->type;
    382       if (cg_type_is_ptr(d->base.c, op.type)) {
    383         /* Pointer value lives in the local: use its live register if cached
    384          * (a dirty cached pointer is a valid base), else load from the home. */
    385         Reg r = nd_cache_reg_for(d, op.v.local, l->type);
    386         if (r != REG_NONE) {
    387           out.base_kind = NATIVE_ADDR_BASE_REG;
    388           out.base.reg = r;
    389           d->scratch_used[l->cls] |= 1u << r;
    390         } else {
    391           out.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE;
    392           out.base.frame = l->home;
    393         }
    394       } else {
    395         /* The local's home is addressed directly; make it current first (see
    396          * nd_addr_storage OPK_LOCAL). */
    397         nd_flush_local(d, op.v.local);
    398         out.base_kind = NATIVE_ADDR_BASE_FRAME;
    399         out.base.frame = l->home;
    400       }
    401       return out;
    402     }
    403     case OPK_GLOBAL:
    404       out.base_kind = NATIVE_ADDR_BASE_GLOBAL;
    405       out.base.global.sym = op.v.global.sym;
    406       out.base.global.addend = op.v.global.addend;
    407       out.base_type = op.type;
    408       return out;
    409     case OPK_INDIRECT:
    410       return nd_addr_storage(d, op);
    411     default:
    412       nd_panic(d, "operand is not a pointer address");
    413   }
    414 }
    415 
    416 #define ND_REQUIRE_NATIVE(d, member, name)                           \
    417   do {                                                               \
    418     if (!(d)->native || !(d)->native->member) nd_panic((d), (name)); \
    419   } while (0)
    420 
    421 typedef struct NdAddrTemps {
    422   Reg base;
    423   Reg index;
    424   NativeAllocClass base_cls;
    425   NativeAllocClass index_cls;
    426 } NdAddrTemps;
    427 
    428 static void nd_addr_temps_release(NativeDirectTarget* d,
    429                                   const NdAddrTemps* temps);
    430 
    431 static MemAccess nd_scalar_mem(KitCgTypeId type, u32 size, u32 align) {
    432   MemAccess mem;
    433   memset(&mem, 0, sizeof mem);
    434   mem.type = type;
    435   mem.size = size;
    436   mem.align = align;
    437   return mem;
    438 }
    439 
    440 static MemAccess nd_type_mem(NativeDirectTarget* d, KitCgTypeId type) {
    441   u64 size;
    442   if (!type) type = builtin_id(KIT_CG_BUILTIN_I64);
    443   size = cg_type_size(d->base.c, type);
    444   if (size > 0xffffffffu) nd_panic(d, "scalar type is too large");
    445   return nd_scalar_mem(type, (u32)size, cg_type_align(d->base.c, type));
    446 }
    447 
    448 static void nd_barrier(NativeDirectTarget* d, u32 flags) {
    449   if (d->ops && d->ops->barrier) d->ops->barrier(d, flags);
    450 }
    451 
    452 static void nd_load_frame_to_reg(NativeDirectTarget* d, NativeLoc dst,
    453                                  NativeFrameSlot frame, KitCgTypeId type) {
    454   NativeAddr addr;
    455   MemAccess mem;
    456   memset(&addr, 0, sizeof addr);
    457   addr.base_kind = NATIVE_ADDR_BASE_FRAME;
    458   addr.base.frame = frame;
    459   addr.base_type = type;
    460   mem = nd_type_mem(d, type);
    461   ND_REQUIRE_NATIVE(d, load, "target does not emit loads");
    462   d->native->load(d->native, dst, addr, mem);
    463 }
    464 
    465 static void nd_store_reg_to_frame(NativeDirectTarget* d, NativeFrameSlot frame,
    466                                   KitCgTypeId type, NativeLoc src) {
    467   NativeAddr addr;
    468   MemAccess mem;
    469   memset(&addr, 0, sizeof addr);
    470   addr.base_kind = NATIVE_ADDR_BASE_FRAME;
    471   addr.base.frame = frame;
    472   addr.base_type = type;
    473   mem = nd_type_mem(d, type);
    474   ND_REQUIRE_NATIVE(d, store, "target does not emit stores");
    475   d->native->store(d->native, addr, src, mem);
    476 }
    477 
    478 static void nd_copy_to_reg(NativeDirectTarget* d, NativeLoc dst, NativeLoc src);
    479 static void nd_release_materialized(NativeDirectTarget* d, NativeLoc loc);
    480 static void nd_store_operand_from_reg(NativeDirectTarget* d, Operand dst,
    481                                       NativeLoc src);
    482 
    483 /* --- Local register cache (write-back, basic-block-scoped) ---------------- *
    484  * Only scalar, non-address-taken locals are cached, and only in caller-saved
    485  * allocable registers. Entries are created solely by pure-compute destinations
    486  * (nd_dst_reg/nd_dst_writeback) and are always dirty; reads hit a live entry or
    487  * fall back to a frame load without creating one. nd_flush_all spills and
    488  * empties the cache at the top of every non-pure-compute op, so the cache only
    489  * survives across straight-line runs of compute ops. Caching prefers the
    490  * register-file caller-saved mask; if the live OS ABI treats one of those
    491  * registers as callee-saved, nd_note_reg_used reports it to the backend before
    492  * the deferred prologue is patched. */
    493 
    494 static int nd_local_cacheable(NativeDirectTarget* d,
    495                               const NativeDirectLocal* l) {
    496   return !l->address_taken && !l->memory_required && l->size != 0 &&
    497          l->size <= (u32)d->base.c->target.ptr_size;
    498 }
    499 
    500 /* If LOCAL is currently cached and the access reads it at its cached (storage)
    501  * width, return its live register; else REG_NONE. Used by the address builders
    502  * to point an address at a base/index local's live register instead of reading
    503  * a possibly-stale frame home. Base/index reads are always of the local's own
    504  * type, so the width check is trivially met for that use; the value-read width
    505  * hazard is handled separately in nd_materialize_operand. */
    506 /* Stamp a cache touch (def/read/addressing use) for LRU victim selection. */
    507 static void nd_touch_local(NativeDirectTarget* d, NativeDirectLocal* l) {
    508   l->last_use = ++d->use_tick;
    509 }
    510 
    511 static Reg nd_cache_reg_for(NativeDirectTarget* d, CGLocal local,
    512                             KitCgTypeId access_type) {
    513   NativeDirectLocal* l = nd_local(d, local);
    514   if (l->reg == REG_NONE) return REG_NONE;
    515   if (!nd_local_cacheable(d, l)) return REG_NONE;
    516   if (access_type && access_type != l->type) return REG_NONE;
    517   nd_touch_local(d, l);
    518   return l->reg;
    519 }
    520 
    521 /* Pick the least-recently-used non-pinned cached local in CLS as a spill victim
    522  * (its register can then be reused). REG_NONE if every owned reg is pinned.
    523  * Pressure is real in Design B (the cache survives across memory ops), so an
    524  * arbitrary victim would thrash a hot local; LRU keeps the live set resident.
    525  */
    526 static Reg nd_pick_cache_victim(NativeDirectTarget* d, NativeAllocClass cls) {
    527   const NativeAllocClassInfo* ci = nd_class_info(d, cls);
    528   Reg best = REG_NONE;
    529   u32 best_use = 0;
    530   for (u32 i = 0; i < ci->nallocable; ++i) {
    531     Reg r = ci->allocable[i];
    532     CGLocal owner;
    533     if (r >= 32u) continue;
    534     owner = d->reg_owner[cls][r];
    535     if (owner == CG_LOCAL_NONE) continue;
    536     if (d->scratch_used[cls] & (1u << r)) continue; /* pinned: never a victim */
    537     if (best == REG_NONE || nd_local(d, owner)->last_use < best_use) {
    538       best = r;
    539       best_use = nd_local(d, owner)->last_use;
    540     }
    541   }
    542   return best;
    543 }
    544 
    545 /* Pick a caller-saved allocable register to cache a local in: a free one, else
    546  * evict the LRU non-pinned cached local. REG_NONE means use the frame-only
    547  * path. */
    548 static Reg nd_cache_alloc(NativeDirectTarget* d, NativeAllocClass cls) {
    549   const NativeAllocClassInfo* ci = nd_class_info(d, cls);
    550   u32 caller = nd_caller_saved_mask(d, cls);
    551   Reg victim;
    552   for (u32 i = 0; i < ci->nallocable; ++i) {
    553     Reg r = ci->allocable[i];
    554     if (r >= 32u) continue;
    555     if ((caller & (1u << r)) && d->reg_owner[cls][r] == CG_LOCAL_NONE &&
    556         (d->scratch_used[cls] & (1u << r)) == 0) {
    557       nd_note_reg_used(d, cls, r);
    558       return r;
    559     }
    560   }
    561   victim = nd_pick_cache_victim(d, cls);
    562   if (victim != REG_NONE && (caller & (1u << victim))) {
    563     nd_flush_local(d, d->reg_owner[cls][victim]);
    564     nd_note_reg_used(d, cls, victim);
    565     return victim;
    566   }
    567   return REG_NONE;
    568 }
    569 
    570 /* Append LOCAL to the tail of the cached-locals list (O(1)). Called only on the
    571  * REG_NONE -> reg transition in nd_dst_reg. */
    572 static void nd_cache_link(NativeDirectTarget* d, CGLocal local) {
    573   i32 idx = (i32)(local - 1u);
    574   i32 prev = d->cache_tail;
    575   d->locals[idx].cache_next = -1;
    576   d->locals[idx].cache_prev = prev;
    577   if (prev >= 0)
    578     d->locals[prev].cache_next = idx;
    579   else
    580     d->cache_head = idx;
    581   d->cache_tail = idx;
    582   d->ncached++;
    583 }
    584 
    585 /* Remove LOCAL (which must currently be cached) from the cached-locals list. */
    586 static void nd_cache_unlink(NativeDirectTarget* d, CGLocal local) {
    587   i32 idx = (i32)(local - 1u);
    588   i32 next = d->locals[idx].cache_next;
    589   i32 prev = d->locals[idx].cache_prev;
    590   if (next >= 0)
    591     d->locals[next].cache_prev = prev;
    592   else
    593     d->cache_tail = prev;
    594   if (prev >= 0)
    595     d->locals[prev].cache_next = next;
    596   else
    597     d->cache_head = next;
    598   d->ncached--;
    599 }
    600 
    601 /* Write a cached local back to its home (if dirty) and drop the entry. Safe to
    602  * call on an uncached local. */
    603 static void nd_flush_local(NativeDirectTarget* d, CGLocal local) {
    604   NativeDirectLocal* l = nd_local(d, local);
    605   if (l->reg == REG_NONE) return;
    606   if (l->dirty)
    607     nd_store_reg_to_frame(
    608         d, l->home, l->type,
    609         native_loc_reg(l->type, (NativeAllocClass)l->cls, l->reg));
    610   nd_cache_unlink(d, local);
    611   d->reg_owner[l->cls][l->reg] = CG_LOCAL_NONE;
    612   l->reg = REG_NONE;
    613   l->dirty = 0;
    614 }
    615 
    616 /* Drop a cache entry without writing it back, for when a store supersedes the
    617  * cached value. */
    618 static void nd_invalidate_local(NativeDirectTarget* d, CGLocal local) {
    619   NativeDirectLocal* l = nd_local(d, local);
    620   if (l->reg == REG_NONE) return;
    621   nd_cache_unlink(d, local);
    622   d->reg_owner[l->cls][l->reg] = CG_LOCAL_NONE;
    623   l->reg = REG_NONE;
    624   l->dirty = 0;
    625 }
    626 
    627 /* Spill the whole cache to memory and empty it. The list is sorted ascending,
    628  * so this spills in the same order as the former O(nlocals) index scan. */
    629 static void nd_flush_all(NativeDirectTarget* d) {
    630   while (d->cache_head >= 0) nd_flush_local(d, (CGLocal)(d->cache_head + 1));
    631 }
    632 
    633 static NativeAddr nd_addr_materialize(NativeDirectTarget* d, NativeAddr in,
    634                                       NdAddrTemps* temps, MemAccess mem) {
    635   NativeAddr out = in;
    636   memset(temps, 0, sizeof *temps);
    637   temps->base = REG_NONE;
    638   temps->index = REG_NONE;
    639   /* A base/index that arrives already in a register is a pinned live cache reg
    640    * (the addr builders are the only producers of REG-kind storage addresses).
    641    * Record it so the temps release unpins it afterward — without storing or
    642    * invalidating, leaving the cache entry intact. */
    643   if (out.base_kind == NATIVE_ADDR_BASE_REG) {
    644     temps->base = out.base.reg;
    645     temps->base_cls = (NativeAllocClass)out.cls;
    646   }
    647   if (out.index_kind == NATIVE_ADDR_INDEX_REG) {
    648     temps->index = out.index.reg;
    649     temps->index_cls = (NativeAllocClass)out.index_cls;
    650   }
    651   if (out.base_kind == NATIVE_ADDR_BASE_FRAME_VALUE) {
    652     NativeAllocClass cls = (NativeAllocClass)out.cls;
    653     Reg r = nd_scratch_acquire(d, cls);
    654     NativeLoc dst = native_loc_reg(out.base_type, cls, r);
    655     nd_load_frame_to_reg(d, dst, out.base.frame, out.base_type);
    656     out.base_kind = NATIVE_ADDR_BASE_REG;
    657     out.base.reg = r;
    658     temps->base = r;
    659     temps->base_cls = cls;
    660   }
    661   if (out.index_kind == NATIVE_ADDR_INDEX_FRAME_VALUE) {
    662     NativeAllocClass cls = (NativeAllocClass)out.index_cls;
    663     Reg r = nd_scratch_acquire(d, cls);
    664     NativeLoc dst = native_loc_reg(out.index_type, cls, r);
    665     nd_load_frame_to_reg(d, dst, out.index.frame, out.index_type);
    666     out.index_kind = NATIVE_ADDR_INDEX_REG;
    667     out.index.reg = r;
    668     temps->index = r;
    669     temps->index_cls = cls;
    670   }
    671   if (d->native && d->native->addr_legal &&
    672       !d->native->addr_legal(d->native, &out, mem)) {
    673     NativeAllocClass cls = NATIVE_REG_INT;
    674     Reg r = nd_scratch_acquire(d, cls);
    675     NativeLoc dst = native_loc_reg(
    676         out.base_type ? out.base_type : builtin_id(KIT_CG_BUILTIN_I64), cls, r);
    677     ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses");
    678     d->native->load_addr(d->native, dst, out);
    679     nd_addr_temps_release(d, temps);
    680     memset(temps, 0, sizeof *temps);
    681     temps->base = r;
    682     temps->index = REG_NONE;
    683     temps->base_cls = cls;
    684     memset(&out, 0, sizeof out);
    685     out.base_kind = NATIVE_ADDR_BASE_REG;
    686     out.base.reg = r;
    687     out.cls = (u8)cls;
    688     out.base_type = dst.type;
    689     if (d->native && d->native->addr_legal &&
    690         !d->native->addr_legal(d->native, &out, mem))
    691       nd_panic(d, "native address is not legal");
    692   }
    693   return out;
    694 }
    695 
    696 static void nd_addr_temps_release(NativeDirectTarget* d,
    697                                   const NdAddrTemps* temps) {
    698   if (temps->base != REG_NONE)
    699     nd_scratch_release(d, temps->base_cls, temps->base);
    700   if (temps->index != REG_NONE)
    701     nd_scratch_release(d, temps->index_cls, temps->index);
    702 }
    703 
    704 static NativeLoc nd_materialize_loc(NativeDirectTarget* d, NativeLoc src,
    705                                     NativeAllocClass cls, KitCgTypeId type) {
    706   Reg r;
    707   NativeLoc dst;
    708   if (src.kind == NATIVE_LOC_REG) return src;
    709   r = nd_scratch_acquire(d, cls);
    710   dst = native_loc_reg(type ? type : src.type, cls, r);
    711   nd_copy_to_reg(d, dst, src);
    712   return dst;
    713 }
    714 
    715 static void nd_copy_to_reg(NativeDirectTarget* d, NativeLoc dst,
    716                            NativeLoc src) {
    717   if (dst.kind != NATIVE_LOC_REG) nd_panic(d, "copy destination is not a reg");
    718   switch ((NativeLocKind)src.kind) {
    719     case NATIVE_LOC_REG:
    720       if (src.v.reg != dst.v.reg || src.cls != dst.cls) {
    721         ND_REQUIRE_NATIVE(d, move, "target does not emit register moves");
    722         d->native->move(d->native, dst, src);
    723       }
    724       break;
    725     case NATIVE_LOC_FRAME:
    726       nd_load_frame_to_reg(d, dst, src.v.frame, dst.type);
    727       break;
    728     case NATIVE_LOC_STACK: {
    729       NativeAddr addr;
    730       MemAccess mem = nd_type_mem(d, dst.type);
    731       memset(&addr, 0, sizeof addr);
    732       addr.base_kind = NATIVE_ADDR_BASE_FRAME;
    733       addr.base.frame = src.v.stack.slot;
    734       addr.base_type = dst.type;
    735       addr.offset = src.v.stack.offset;
    736       ND_REQUIRE_NATIVE(d, load, "target does not emit loads");
    737       d->native->load(d->native, dst, addr, mem);
    738       break;
    739     }
    740     case NATIVE_LOC_IMM:
    741       ND_REQUIRE_NATIVE(d, load_imm, "target does not emit immediates");
    742       d->native->load_imm(d->native, dst, src.v.imm);
    743       break;
    744     case NATIVE_LOC_GLOBAL: {
    745       NativeAddr addr;
    746       memset(&addr, 0, sizeof addr);
    747       addr.base_kind = NATIVE_ADDR_BASE_GLOBAL;
    748       addr.base.global.sym = src.v.global.sym;
    749       addr.base.global.addend = src.v.global.addend;
    750       addr.base_type = dst.type;
    751       ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses");
    752       d->native->load_addr(d->native, dst, addr);
    753       break;
    754     }
    755     case NATIVE_LOC_ADDR: {
    756       NdAddrTemps temps;
    757       MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size,
    758                                     d->base.c->target.ptr_align);
    759       NativeAddr addr = nd_addr_materialize(d, src.v.addr, &temps, mem);
    760       ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses");
    761       d->native->load_addr(d->native, dst, addr);
    762       nd_addr_temps_release(d, &temps);
    763       break;
    764     }
    765     default:
    766       nd_panic(d, "cannot materialize native location");
    767   }
    768 }
    769 
    770 static void nd_write_loc(NativeDirectTarget* d, NativeLoc dst, NativeLoc src,
    771                          MemAccess mem) {
    772   switch ((NativeLocKind)dst.kind) {
    773     case NATIVE_LOC_REG:
    774       nd_copy_to_reg(d, dst, src);
    775       break;
    776     case NATIVE_LOC_FRAME: {
    777       NativeLoc val =
    778           nd_materialize_loc(d, src, (NativeAllocClass)dst.cls, dst.type);
    779       nd_store_reg_to_frame(d, dst.v.frame, dst.type, val);
    780       nd_release_materialized(d, val);
    781       break;
    782     }
    783     case NATIVE_LOC_STACK: {
    784       NativeAddr addr;
    785       NativeLoc val =
    786           nd_materialize_loc(d, src, (NativeAllocClass)dst.cls, dst.type);
    787       memset(&addr, 0, sizeof addr);
    788       addr.base_kind = NATIVE_ADDR_BASE_FRAME;
    789       addr.base.frame = dst.v.stack.slot;
    790       addr.base_type = dst.type;
    791       addr.offset = dst.v.stack.offset;
    792       ND_REQUIRE_NATIVE(d, store, "target does not emit stores");
    793       d->native->store(d->native, addr, val, mem);
    794       nd_release_materialized(d, val);
    795       break;
    796     }
    797     case NATIVE_LOC_ADDR: {
    798       NdAddrTemps temps;
    799       NativeAddr addr = nd_addr_materialize(d, dst.v.addr, &temps, mem);
    800       NativeAllocClass cls = nd_class_for_type(d, src.type);
    801       NativeLoc val = nd_materialize_loc(d, src, cls, src.type);
    802       ND_REQUIRE_NATIVE(d, store, "target does not emit stores");
    803       d->native->store(d->native, addr, val, mem);
    804       nd_release_materialized(d, val);
    805       nd_addr_temps_release(d, &temps);
    806       break;
    807     }
    808     default:
    809       nd_panic(d, "unsupported write destination");
    810   }
    811 }
    812 
    813 static void nd_release_materialized(NativeDirectTarget* d, NativeLoc loc) {
    814   if (loc.kind == NATIVE_LOC_REG)
    815     nd_scratch_release(d, (NativeAllocClass)loc.cls, loc.v.reg);
    816 }
    817 
    818 /* Spill cached locals that back an INDIRECT operand's address before it is read
    819  * from their frame homes. Compute ops normally receive only LOCAL/IMM/GLOBAL
    820  * operands; this keeps the rare INDIRECT case correct without flushing all. */
    821 static void nd_flush_operand_addr_locals(NativeDirectTarget* d, Operand op) {
    822   if (op.kind != OPK_INDIRECT) return;
    823   nd_flush_local(d, op.v.ind.base);
    824   if (op.v.ind.index != CG_LOCAL_NONE) nd_flush_local(d, op.v.ind.index);
    825 }
    826 
    827 static NativeLoc nd_materialize_operand(NativeDirectTarget* d, Operand op) {
    828   NativeAllocClass cls = nd_class_for_type(d, op.type);
    829   if (op.kind == OPK_LOCAL) {
    830     NativeDirectLocal* l = nd_local(d, op.v.local);
    831     if (l->reg != REG_NONE && op.type == l->type && nd_local_cacheable(d, l)) {
    832       /* Cache hit: pin and reuse the live register, no reload. */
    833       d->scratch_used[l->cls] |= 1u << l->reg;
    834       nd_touch_local(d, l);
    835       return native_loc_reg(op.type, (NativeAllocClass)l->cls, l->reg);
    836     }
    837     /* A live entry under a different access width must reach memory before we
    838      * bypass the cache for this access. */
    839     if (l->reg != REG_NONE) nd_flush_local(d, op.v.local);
    840   }
    841   nd_flush_operand_addr_locals(d, op);
    842   return nd_materialize_loc(d, nd_loc_operand(d, op), cls, op.type);
    843 }
    844 
    845 static NativeLoc nd_dst_scratch(NativeDirectTarget* d, Operand dst) {
    846   NativeAllocClass cls = nd_class_for_type(d, dst.type);
    847   Reg r = nd_scratch_acquire(d, cls);
    848   return native_loc_reg(dst.type, cls, r);
    849 }
    850 
    851 /* Arithmetic/compare RHS: keep a constant operand as an immediate when the
    852  * target can encode it for `use` (so no scratch register is spent
    853  * materializing it), mirroring the optimizer's operand_imm_or_reg. Falls back
    854  * to a register when there is no imm_legal hook (e.g. a recording mock target)
    855  * or the constant is not target-legal for this op. */
    856 static NativeLoc nd_rhs_imm_or_reg(NativeDirectTarget* d, NativeImmUse use,
    857                                    u32 sub, Operand b) {
    858   if (b.kind == OPK_IMM && d->native->imm_legal &&
    859       d->native->imm_legal(d->native, use, sub, b.type, b.v.imm))
    860     return nd_loc_imm(b.v.imm, b.type);
    861   return nd_materialize_operand(d, b);
    862 }
    863 
    864 /* Register a pure-compute op writes its result into. For a cacheable local that
    865  * is the local's cache register (reused or freshly allocated), pinned for the
    866  * instruction; nd_dst_writeback then marks it dirty without storing. Otherwise
    867  * a scratch temporary that nd_dst_writeback spills to the frame home. */
    868 static NativeLoc nd_dst_reg(NativeDirectTarget* d, Operand dst) {
    869   if (dst.kind == OPK_LOCAL) {
    870     NativeDirectLocal* l = nd_local(d, dst.v.local);
    871     if (dst.type == l->type && nd_local_cacheable(d, l)) {
    872       Reg r = l->reg;
    873       if (r == REG_NONE) {
    874         r = nd_cache_alloc(d, (NativeAllocClass)l->cls);
    875         if (r != REG_NONE) {
    876           d->reg_owner[l->cls][r] = dst.v.local;
    877           l->reg = r;
    878           nd_cache_link(d, dst.v.local);
    879         }
    880       }
    881       if (r != REG_NONE) {
    882         d->scratch_used[l->cls] |= 1u << r; /* pin for the instruction */
    883         nd_touch_local(d, l);
    884         return native_loc_reg(dst.type, (NativeAllocClass)l->cls, r);
    885       }
    886     }
    887   }
    888   return nd_dst_scratch(d, dst);
    889 }
    890 
    891 static void nd_dst_writeback(NativeDirectTarget* d, Operand dst, NativeLoc dr) {
    892   if (dst.kind == OPK_LOCAL) {
    893     NativeDirectLocal* l = nd_local(d, dst.v.local);
    894     if (dr.kind == NATIVE_LOC_REG && l->reg == dr.v.reg &&
    895         dst.type == l->type && nd_local_cacheable(d, l)) {
    896       l->dirty = 1;
    897       d->scratch_used[l->cls] &= ~(1u << dr.v.reg); /* unpin, keep cached */
    898       return;
    899     }
    900     /* Bypassing the cache: drop any stale entry, then spill to the home. */
    901     if (l->reg != REG_NONE) nd_invalidate_local(d, dst.v.local);
    902   }
    903   nd_store_operand_from_reg(d, dst, dr);
    904   nd_release_materialized(d, dr);
    905 }
    906 
    907 static void nd_store_operand_from_reg(NativeDirectTarget* d, Operand dst,
    908                                       NativeLoc src) {
    909   if (dst.kind != OPK_LOCAL) nd_panic(d, "destination is not a semantic local");
    910   /* This writes SRC to the local's frame home, bypassing the value cache (the
    911    * result was produced in a scratch reg, e.g. a load / address-of). Any live
    912    * cache entry for the local is now stale and must be dropped — the home write
    913    * supersedes it. Drop without storing; storing back would clobber the new
    914    * home value. Runs after SRC is produced, so a dst that was its own address
    915    * base has already been consumed. */
    916   {
    917     NativeDirectLocal* l = nd_local(d, dst.v.local);
    918     if (l->reg != REG_NONE) nd_invalidate_local(d, dst.v.local);
    919     nd_store_reg_to_frame(d, l->home, dst.type, src);
    920   }
    921 }
    922 
    923 static void nd_func_begin(CgTarget* t, const CGFuncDesc* fd) {
    924   NativeDirectTarget* d = nd_of(t);
    925   d->func = fd;
    926   d->nlocals = 0;
    927   d->nlabels = 0;
    928   d->nscopes = 0;
    929   d->max_outgoing = 0;
    930   d->use_tick = 0;
    931   d->cache_head = -1;
    932   d->cache_tail = -1;
    933   d->ncached = 0;
    934   memset(d->scratch_used, 0, sizeof d->scratch_used);
    935   memset(d->callee_saved_used, 0, sizeof d->callee_saved_used);
    936   memset(d->reg_owner, 0, sizeof d->reg_owner);
    937   if (d->native && d->native->func_begin) d->native->func_begin(d->native, fd);
    938 }
    939 
    940 static void nd_func_end(CgTarget* t) {
    941   NativeDirectTarget* d = nd_of(t);
    942   NativeFramePatchState frame;
    943   u32 ncallee_classes = 0;
    944   memset(&frame, 0, sizeof frame);
    945   frame.max_outgoing = d->max_outgoing;
    946   for (u32 cls = 0; cls < 3u; ++cls) {
    947     if (d->callee_saved_used[cls]) ncallee_classes = cls + 1u;
    948   }
    949   if (ncallee_classes) {
    950     if (!d->native || !d->native->reserve_callee_saves)
    951       nd_panic(d, "target cannot preserve callee-saved scratch registers");
    952     d->native->reserve_callee_saves(d->native, d->callee_saved_used,
    953                                     ncallee_classes);
    954   }
    955   if (d->native && d->native->note_frame_state)
    956     d->native->note_frame_state(d->native, &frame);
    957   if (d->native && d->native->patch_apply) d->native->patch_apply(d->native);
    958   if (d->native && d->native->func_end) d->native->func_end(d->native);
    959   d->func = NULL;
    960 }
    961 
    962 static void nd_alias(CgTarget* t, ObjSymId alias_sym, ObjSymId target_sym,
    963                      KitCgTypeId type) {
    964   (void)t;
    965   (void)alias_sym;
    966   (void)target_sym;
    967   (void)type;
    968 }
    969 
    970 static CGLocal nd_local_new(CgTarget* t, const CGLocalDesc* desc) {
    971   return nd_alloc_local(nd_of(t), desc);
    972 }
    973 
    974 static void nd_local_addr(CgTarget* t, Operand dst, const CGLocalDesc* desc,
    975                           CGLocal local) {
    976   NativeDirectTarget* d = nd_of(t);
    977   NativeDirectLocal* l = nd_local(d, local);
    978   Operand lv;
    979   (void)desc;
    980   /* Targeted flush: only this local escapes. Spill+drop its entry so the home
    981    * is authoritative for the address computation, then mark it uncacheable. The
    982    * rest of the cache is unaffected (other cached locals stay non-escaped). */
    983   nd_flush_local(d, local);
    984   l->address_taken = 1;
    985   l->flags |= CG_LOCAL_ADDR_TAKEN;
    986   memset(&lv, 0, sizeof lv);
    987   lv.kind = OPK_LOCAL;
    988   lv.type = l->type;
    989   lv.v.local = local;
    990   {
    991     NativeLoc reg = nd_dst_scratch(d, dst);
    992     ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses");
    993     d->native->load_addr(d->native, reg, nd_addr_storage(d, lv));
    994     nd_store_operand_from_reg(d, dst, reg);
    995     nd_release_materialized(d, reg);
    996   }
    997 }
    998 
    999 static CGLocal nd_param(CgTarget* t, const CGParamDesc* desc) {
   1000   NativeDirectTarget* d = nd_of(t);
   1001   NativeDirectLocal* l;
   1002   NativeFrameSlotDesc fsd;
   1003   CGLocal id;
   1004   nd_grow_locals(d, d->nlocals + 1u);
   1005   id = d->nlocals + 1u;
   1006   l = &d->locals[d->nlocals++];
   1007   memset(l, 0, sizeof *l);
   1008   l->type = desc->type;
   1009   l->size = desc->size;
   1010   l->align = desc->align;
   1011   l->flags = desc->flags;
   1012   l->reg = REG_NONE;
   1013   l->address_taken = (desc->flags & CG_LOCAL_ADDR_TAKEN) != 0;
   1014   l->memory_required = (desc->flags & CG_LOCAL_MEMORY_REQUIRED) != 0;
   1015   l->cls = (u8)nd_class_for_type(d, desc->type);
   1016   fsd = nd_slot_desc_param(desc);
   1017   l->home = nd_alloc_frame_slot(d, &fsd);
   1018   if (d->ops && d->ops->bind_param) d->ops->bind_param(d, desc, id, l);
   1019   return id;
   1020 }
   1021 
   1022 static int nd_local_debug_loc(CgTarget* t, CGLocal local, CGDebugLoc* out) {
   1023   NativeDirectTarget* d = nd_of(t);
   1024   NativeDirectLocal* l;
   1025   if (!out) return 0;
   1026   memset(out, 0, sizeof *out);
   1027   if (!d->native || !d->native->frame_slot_debug_loc) return 0;
   1028   l = nd_local(d, local);
   1029   if (l->home == NATIVE_FRAME_SLOT_NONE) return 0;
   1030   return d->native->frame_slot_debug_loc(d->native, l->home, out);
   1031 }
   1032 
   1033 static Label nd_label_new(CgTarget* t) { return nd_label_new_raw(nd_of(t)); }
   1034 
   1035 static void nd_label_place(CgTarget* t, Label label) {
   1036   NativeDirectTarget* d = nd_of(t);
   1037   nd_flush_all(d);
   1038   ND_REQUIRE_NATIVE(d, label_place, "target does not place labels");
   1039   d->native->label_place(d->native, nd_mc_label(d, label));
   1040 }
   1041 
   1042 static void nd_jump(CgTarget* t, Label label) {
   1043   NativeDirectTarget* d = nd_of(t);
   1044   nd_flush_all(d);
   1045   ND_REQUIRE_NATIVE(d, jump, "target does not emit jumps");
   1046   d->native->jump(d->native, nd_mc_label(d, label));
   1047 }
   1048 
   1049 static void nd_cmp_branch(CgTarget* t, CmpOp op, Operand a, Operand b,
   1050                           Label label) {
   1051   NativeDirectTarget* d = nd_of(t);
   1052   NativeLoc ar, br;
   1053   nd_flush_all(d);
   1054   ar = nd_materialize_operand(d, a);
   1055   br = nd_rhs_imm_or_reg(d, NATIVE_IMM_CMP, (u32)op, b);
   1056   ND_REQUIRE_NATIVE(d, cmp_branch, "target does not emit compare branches");
   1057   d->native->cmp_branch(d->native, op, ar, br, nd_mc_label(d, label));
   1058   nd_release_materialized(d, br);
   1059   nd_release_materialized(d, ar);
   1060 }
   1061 
   1062 static void nd_switch(CgTarget* t, const CGSwitchDesc* desc) {
   1063   nd_flush_all(nd_of(t));
   1064   cg_lower_switch_default(t, desc);
   1065 }
   1066 
   1067 static void nd_indirect_branch(CgTarget* t, Operand addr,
   1068                                const Label* valid_targets, u32 ntargets) {
   1069   NativeDirectTarget* d = nd_of(t);
   1070   MCLabel* native_targets;
   1071   NativeLoc addr_reg;
   1072   nd_flush_all(d);
   1073   addr_reg = nd_materialize_operand(d, addr);
   1074   ND_REQUIRE_NATIVE(d, indirect_branch,
   1075                     "target does not emit indirect branches");
   1076   native_targets =
   1077       ntargets == 0 ? NULL
   1078       : ntargets <= ND_LBL_BUF
   1079           ? d->lblbuf
   1080           : nd_arena(d, sizeof(*native_targets) * ntargets, _Alignof(MCLabel));
   1081   for (u32 i = 0; i < ntargets; ++i)
   1082     native_targets[i] = nd_mc_label(d, valid_targets[i]);
   1083   d->native->indirect_branch(d->native, addr_reg, native_targets, ntargets);
   1084   nd_release_materialized(d, addr_reg);
   1085 }
   1086 
   1087 static void nd_load_label_addr(CgTarget* t, Operand dst, Label label) {
   1088   NativeDirectTarget* d = nd_of(t);
   1089   NativeLoc reg;
   1090   nd_flush_all(d);
   1091   reg = nd_dst_scratch(d, dst);
   1092   ND_REQUIRE_NATIVE(d, load_label_addr,
   1093                     "target does not materialize label addresses");
   1094   d->native->load_label_addr(d->native, reg, nd_mc_label(d, label));
   1095   nd_store_operand_from_reg(d, dst, reg);
   1096   nd_release_materialized(d, reg);
   1097 }
   1098 
   1099 static int nd_local_static_data_begin(CgTarget* t,
   1100                                       const CGLocalStaticDataDesc* desc) {
   1101   NativeDirectTarget* d = nd_of(t);
   1102   Sym name;
   1103   SecKind kind;
   1104   u16 flags;
   1105   if (!d->native || !d->native->mc || !desc) return 0;
   1106   if (d->local_static_active) nd_panic(d, "nested local static data");
   1107   if (desc->attrs.section) {
   1108     name = (Sym)desc->attrs.section;
   1109     kind =
   1110         (desc->attrs.flags & KIT_CG_DATADEF_READONLY) ? SEC_RODATA : SEC_DATA;
   1111     flags = (desc->attrs.flags & KIT_CG_DATADEF_READONLY)
   1112                 ? SF_ALLOC
   1113                 : (SF_ALLOC | SF_WRITE);
   1114   } else if (desc->attrs.flags & KIT_CG_DATADEF_READONLY) {
   1115     name = pool_intern_slice(t->c->global, SLICE_LIT(".rodata"));
   1116     kind = SEC_RODATA;
   1117     flags = SF_ALLOC;
   1118   } else {
   1119     name = pool_intern_slice(t->c->global, SLICE_LIT(".data"));
   1120     kind = SEC_DATA;
   1121     flags = SF_ALLOC | SF_WRITE;
   1122   }
   1123   d->local_static_sec =
   1124       obj_section(t->obj, name, kind, flags, desc->align ? desc->align : 1u);
   1125   d->local_static_base =
   1126       obj_align_to(t->obj, d->local_static_sec, desc->align ? desc->align : 1u);
   1127   d->local_static_size = 0;
   1128   d->local_static_sym = desc->sym;
   1129   d->local_static_active = 1;
   1130   return 1;
   1131 }
   1132 
   1133 static void nd_local_static_data_write(CgTarget* t, const u8* data, u64 len) {
   1134   NativeDirectTarget* d = nd_of(t);
   1135   u8 zero[64];
   1136   u64 orig_len = len;
   1137   if (!d->local_static_active || !len) return;
   1138   if (data) {
   1139     obj_write(t->obj, d->local_static_sec, data, (size_t)len);
   1140   } else {
   1141     memset(zero, 0, sizeof zero);
   1142     while (len >= sizeof zero) {
   1143       obj_write(t->obj, d->local_static_sec, zero, sizeof zero);
   1144       len -= sizeof zero;
   1145     }
   1146     if (len) obj_write(t->obj, d->local_static_sec, zero, (size_t)len);
   1147   }
   1148   d->local_static_size += (u32)orig_len;
   1149 }
   1150 
   1151 static void nd_local_static_data_label_addr(CgTarget* t, Label target,
   1152                                             i64 addend, u32 width,
   1153                                             u32 address_space) {
   1154   NativeDirectTarget* d = nd_of(t);
   1155   u32 off;
   1156   u8 zero[8];
   1157   RelocKind kind;
   1158   (void)address_space;
   1159   if (!d->local_static_active)
   1160     nd_panic(d, "label address outside local static data");
   1161   /* A jump-table / label-address slot is one target pointer wide: 8 bytes
   1162    * (R_ABS64) on a 64-bit target, 4 bytes (R_ABS32) on rv32/ELFCLASS32. */
   1163   if (width == 8u)
   1164     kind = R_ABS64;
   1165   else if (width == 4u)
   1166     kind = R_ABS32;
   1167   else {
   1168     nd_panic(d, "unsupported local static label address width");
   1169     return;
   1170   }
   1171   memset(zero, 0, sizeof zero);
   1172   off = d->local_static_base + d->local_static_size;
   1173   obj_write(t->obj, d->local_static_sec, zero, width);
   1174   d->native->mc->emit_label_data_reloc(d->native->mc, d->local_static_sec, off,
   1175                                        nd_mc_label(d, target), kind, width,
   1176                                        addend);
   1177   d->local_static_size += width;
   1178 }
   1179 
   1180 static void nd_local_static_data_end(CgTarget* t) {
   1181   NativeDirectTarget* d = nd_of(t);
   1182   if (!d->local_static_active) return;
   1183   obj_symbol_define_live(t->obj, d->local_static_sym, d->local_static_sec,
   1184                          d->local_static_base, d->local_static_size);
   1185   d->local_static_active = 0;
   1186   d->local_static_sec = OBJ_SEC_NONE;
   1187   d->local_static_sym = OBJ_SYM_NONE;
   1188   d->local_static_base = 0;
   1189   d->local_static_size = 0;
   1190 }
   1191 
   1192 static const char* nd_data_label_addr_unsupported_msg(CgTarget* t) {
   1193   (void)t;
   1194   return NULL;
   1195 }
   1196 
   1197 static CGScope nd_scope_begin(CgTarget* t, const CGScopeDesc* desc) {
   1198   NativeDirectTarget* d = nd_of(t);
   1199   NativeDirectScope* s;
   1200   CGScope id;
   1201   nd_grow_scopes(d, d->nscopes + 1u);
   1202   id = d->nscopes + 1u;
   1203   s = &d->scopes[d->nscopes++];
   1204   memset(s, 0, sizeof *s);
   1205   s->kind = desc->kind;
   1206   s->owns_break = desc->break_label == LABEL_NONE;
   1207   s->break_label = desc->break_label ? desc->break_label : nd_label_new_raw(d);
   1208   s->continue_label = desc->continue_label;
   1209   if (desc->kind == SCOPE_LOOP && s->continue_label == LABEL_NONE)
   1210     s->continue_label = nd_label_new_raw(d);
   1211   return id;
   1212 }
   1213 
   1214 static NativeDirectScope* nd_scope(NativeDirectTarget* d, CGScope scope) {
   1215   if (scope == CG_SCOPE_NONE || scope > d->nscopes) nd_panic(d, "bad scope");
   1216   return &d->scopes[scope - 1u];
   1217 }
   1218 
   1219 static void nd_scope_end(CgTarget* t, CGScope scope) {
   1220   NativeDirectTarget* d = nd_of(t);
   1221   NativeDirectScope* s = nd_scope(d, scope);
   1222   if (s->owns_break) nd_label_place(t, s->break_label);
   1223 }
   1224 
   1225 static void nd_break_to(CgTarget* t, CGScope scope) {
   1226   nd_jump(t, nd_scope(nd_of(t), scope)->break_label);
   1227 }
   1228 
   1229 static void nd_continue_to(CgTarget* t, CGScope scope) {
   1230   NativeDirectScope* s = nd_scope(nd_of(t), scope);
   1231   if (s->continue_label == LABEL_NONE)
   1232     nd_panic(nd_of(t), "continue_to on scope without continue label");
   1233   nd_jump(t, s->continue_label);
   1234 }
   1235 
   1236 static int nd_is_wide64_int(NativeDirectTarget* d, KitCgTypeId ty);
   1237 static int nd_is_soft_double(NativeDirectTarget* d, KitCgTypeId ty);
   1238 
   1239 static void nd_load_imm(CgTarget* t, Operand dst, i64 imm) {
   1240   NativeDirectTarget* d = nd_of(t);
   1241   NativeLoc reg;
   1242   if (nd_is_wide64_int(d, dst.type))
   1243     nd_panic(d,
   1244              "64-bit integer immediate reached the backend un-lowered "
   1245              "(cg should materialize it as two 32-bit lanes)");
   1246   reg = nd_dst_reg(d, dst);
   1247   ND_REQUIRE_NATIVE(d, load_imm, "target does not emit immediates");
   1248   d->native->load_imm(d->native, reg, imm);
   1249   nd_dst_writeback(d, dst, reg);
   1250 }
   1251 
   1252 static void nd_load_const(CgTarget* t, Operand dst, ConstBytes cbytes) {
   1253   NativeDirectTarget* d = nd_of(t);
   1254   NativeLoc reg;
   1255   if (nd_is_wide64_int(d, dst.type) || nd_is_soft_double(d, dst.type))
   1256     nd_panic(d,
   1257              "8-byte constant reached the backend un-lowered (cg should "
   1258              "materialize it as two 32-bit lanes)");
   1259   reg = nd_dst_reg(d, dst);
   1260   ND_REQUIRE_NATIVE(d, load_const, "target does not emit byte constants");
   1261   d->native->load_const(d->native, reg, cbytes);
   1262   nd_dst_writeback(d, dst, reg);
   1263 }
   1264 
   1265 static void nd_copy(CgTarget* t, Operand dst, Operand src) {
   1266   NativeDirectTarget* d = nd_of(t);
   1267   u64 size = dst.type ? cg_type_size(t->c, dst.type) : 0;
   1268   if (size > (u64)t->c->target.ptr_size) {
   1269     NdAddrTemps dt, st;
   1270     AggregateAccess access;
   1271     /* Aggregate copy: addresses are built cache-aware (a directly-addressed
   1272      * cached local is flushed in nd_addr_storage), so no whole-cache flush. */
   1273     memset(&access, 0, sizeof access);
   1274     access.type = dst.type;
   1275     access.size = (u32)size;
   1276     access.align =
   1277         dst.type ? cg_type_align(t->c, dst.type) : (u32)t->c->target.ptr_align;
   1278     access.mem.type = dst.type;
   1279     access.mem.size = access.size;
   1280     access.mem.align = access.align;
   1281     NativeAddr da =
   1282         nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, access.mem);
   1283     NativeAddr sa =
   1284         nd_addr_materialize(d, nd_addr_storage(d, src), &st, access.mem);
   1285     ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes");
   1286     d->native->copy_bytes(d->native, da, sa, access);
   1287     nd_addr_temps_release(d, &st);
   1288     nd_addr_temps_release(d, &dt);
   1289     return;
   1290   }
   1291   NativeLoc val = nd_materialize_operand(d, src);
   1292   NativeLoc dr = nd_dst_reg(d, dst);
   1293   nd_copy_to_reg(d, dr, val);
   1294   nd_dst_writeback(d, dst, dr);
   1295   nd_release_materialized(d, val);
   1296 }
   1297 
   1298 /* Bit-fields ride the generic load/store (mem.bf_width != 0); this impl
   1299  * translates them to the physical NativeTarget bitfield_load/store below. */
   1300 static void nd_bitfield_load(CgTarget* t, Operand dst, Operand record_addr,
   1301                              BitFieldAccess access);
   1302 static void nd_bitfield_store(CgTarget* t, Operand record_addr, Operand src,
   1303                               BitFieldAccess access);
   1304 
   1305 static void nd_load(CgTarget* t, Operand dst, Operand addr, MemAccess mem) {
   1306   NativeDirectTarget* d = nd_of(t);
   1307   NdAddrTemps temps;
   1308   u64 size;
   1309   if (mem.bf_width != 0) {
   1310     nd_bitfield_load(t, dst, addr, bf_from_mem(mem));
   1311     return;
   1312   }
   1313   size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0);
   1314   /* No value-cache flush: only escaped (address-taken / memory-required) locals
   1315    * can be aliased through a pointer, and those are never cached. A volatile
   1316    * access may observe memory and needs the cache made authoritative first. */
   1317   if (mem.flags & MF_VOLATILE) {
   1318     nd_flush_all(d);
   1319     nd_barrier(d,
   1320                NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE);
   1321   }
   1322   NativeAddr naddr =
   1323       nd_addr_materialize(d, nd_addr_storage(d, addr), &temps, mem);
   1324   if (size > (u64)t->c->target.ptr_size) {
   1325     NdAddrTemps dt;
   1326     AggregateAccess access;
   1327     memset(&access, 0, sizeof access);
   1328     access.type = mem.type ? mem.type : dst.type;
   1329     access.size = (u32)size;
   1330     access.align = mem.align;
   1331     access.mem = mem;
   1332     NativeAddr da = nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, mem);
   1333     ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes");
   1334     d->native->copy_bytes(d->native, da, naddr, access);
   1335     nd_addr_temps_release(d, &dt);
   1336     nd_addr_temps_release(d, &temps);
   1337     return;
   1338   }
   1339   NativeLoc reg = nd_dst_scratch(d, dst);
   1340   ND_REQUIRE_NATIVE(d, load, "target does not emit loads");
   1341   d->native->load(d->native, reg, naddr, mem);
   1342   nd_store_operand_from_reg(d, dst, reg);
   1343   nd_release_materialized(d, reg);
   1344   nd_addr_temps_release(d, &temps);
   1345 }
   1346 
   1347 static void nd_store(CgTarget* t, Operand addr, Operand src, MemAccess mem) {
   1348   NativeDirectTarget* d = nd_of(t);
   1349   NdAddrTemps temps;
   1350   u64 size;
   1351   if (mem.bf_width != 0) {
   1352     nd_bitfield_store(t, addr, src, bf_from_mem(mem));
   1353     return;
   1354   }
   1355   size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0);
   1356   /* No value-cache flush (see nd_load): a store through a pointer cannot alias
   1357    * a cached non-escaped local. The store target is foreign memory, so there is
   1358    * no dst local entry to invalidate; SRC is read via nd_materialize_operand.
   1359    */
   1360   if (mem.flags & MF_VOLATILE) {
   1361     nd_flush_all(d);
   1362     nd_barrier(d,
   1363                NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE);
   1364   }
   1365   NativeAddr naddr =
   1366       nd_addr_materialize(d, nd_addr_storage(d, addr), &temps, mem);
   1367   if (size > (u64)t->c->target.ptr_size) {
   1368     NdAddrTemps st;
   1369     AggregateAccess access;
   1370     memset(&access, 0, sizeof access);
   1371     access.type = mem.type ? mem.type : src.type;
   1372     access.size = (u32)size;
   1373     access.align = mem.align;
   1374     access.mem = mem;
   1375     NativeAddr sa = nd_addr_materialize(d, nd_addr_storage(d, src), &st, mem);
   1376     ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes");
   1377     d->native->copy_bytes(d->native, naddr, sa, access);
   1378     nd_addr_temps_release(d, &st);
   1379     nd_addr_temps_release(d, &temps);
   1380     return;
   1381   }
   1382   NativeLoc val = nd_materialize_operand(d, src);
   1383   ND_REQUIRE_NATIVE(d, store, "target does not emit stores");
   1384   d->native->store(d->native, naddr, val, mem);
   1385   nd_release_materialized(d, val);
   1386   nd_addr_temps_release(d, &temps);
   1387 }
   1388 
   1389 static void nd_addr_of(CgTarget* t, Operand dst, Operand lv) {
   1390   NativeDirectTarget* d = nd_of(t);
   1391   NdAddrTemps temps;
   1392   MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size,
   1393                                 d->base.c->target.ptr_align);
   1394   NativeAddr naddr;
   1395   /* Targeted: only an OPK_LOCAL lvalue escapes here — flush+mark just that
   1396    * local (its home becomes the authoritative address source). An INDIRECT
   1397    * lvalue's address is computed from base/index, which nd_addr_storage now
   1398    * reads from the cache directly; a GLOBAL needs nothing. The dst home write
   1399    * is handled by nd_store_operand_from_reg's invalidation. */
   1400   if (lv.kind == OPK_LOCAL) {
   1401     NativeDirectLocal* l = nd_local(d, lv.v.local);
   1402     nd_flush_local(d, lv.v.local);
   1403     l->address_taken = 1;
   1404     l->flags |= CG_LOCAL_ADDR_TAKEN;
   1405   }
   1406   naddr = nd_addr_materialize(d, nd_addr_storage(d, lv), &temps, mem);
   1407   NativeLoc reg = nd_dst_scratch(d, dst);
   1408   ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses");
   1409   d->native->load_addr(d->native, reg, naddr);
   1410   nd_store_operand_from_reg(d, dst, reg);
   1411   nd_release_materialized(d, reg);
   1412   nd_addr_temps_release(d, &temps);
   1413 }
   1414 
   1415 static void nd_tls_addr_of(CgTarget* t, Operand dst, ObjSymId sym, i64 addend) {
   1416   NativeDirectTarget* d = nd_of(t);
   1417   NativeLoc reg;
   1418   nd_flush_all(d);
   1419   reg = nd_dst_scratch(d, dst);
   1420   ND_REQUIRE_NATIVE(d, tls_addr_of,
   1421                     "target does not materialize TLS addresses");
   1422   d->native->tls_addr_of(d->native, reg, sym, addend);
   1423   nd_store_operand_from_reg(d, dst, reg);
   1424   nd_release_materialized(d, reg);
   1425 }
   1426 
   1427 static void nd_copy_bytes(CgTarget* t, Operand dst_addr, Operand src_addr,
   1428                           AggregateAccess access) {
   1429   NativeDirectTarget* d = nd_of(t);
   1430   NdAddrTemps dt, st;
   1431   NativeAddr dst;
   1432   /* Pointer-target memory; addresses are cache-aware. No whole-cache flush. */
   1433   dst = nd_addr_materialize(d, nd_addr_pointer(d, dst_addr), &dt, access.mem);
   1434   NativeAddr src =
   1435       nd_addr_materialize(d, nd_addr_pointer(d, src_addr), &st, access.mem);
   1436   ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes");
   1437   d->native->copy_bytes(d->native, dst, src, access);
   1438   nd_addr_temps_release(d, &st);
   1439   nd_addr_temps_release(d, &dt);
   1440 }
   1441 
   1442 static void nd_set_bytes(CgTarget* t, Operand dst_addr, Operand byte_value,
   1443                          AggregateAccess access) {
   1444   NativeDirectTarget* d = nd_of(t);
   1445   NdAddrTemps temps;
   1446   NativeAddr dst;
   1447   NativeLoc byte;
   1448   /* Pointer-target memory; addresses are cache-aware. No whole-cache flush. */
   1449   dst =
   1450       nd_addr_materialize(d, nd_addr_pointer(d, dst_addr), &temps, access.mem);
   1451   byte = nd_materialize_operand(d, byte_value);
   1452   ND_REQUIRE_NATIVE(d, set_bytes, "target does not set bytes");
   1453   d->native->set_bytes(d->native, dst, byte, access);
   1454   nd_release_materialized(d, byte);
   1455   nd_addr_temps_release(d, &temps);
   1456 }
   1457 
   1458 static void nd_bitfield_load(CgTarget* t, Operand dst, Operand record_addr,
   1459                              BitFieldAccess access) {
   1460   NativeDirectTarget* d = nd_of(t);
   1461   NdAddrTemps temps;
   1462   NativeAddr addr;
   1463   NativeLoc reg;
   1464   /* Record (pointer-target) memory; addresses are cache-aware. The dst home
   1465    * write is handled by nd_store_operand_from_reg's invalidation. */
   1466   addr = nd_addr_materialize(d, nd_addr_storage(d, record_addr), &temps,
   1467                              access.storage);
   1468   reg = nd_dst_scratch(d, dst);
   1469   ND_REQUIRE_NATIVE(d, bitfield_load, "target does not load bitfields");
   1470   d->native->bitfield_load(d->native, reg, addr, access);
   1471   nd_store_operand_from_reg(d, dst, reg);
   1472   nd_release_materialized(d, reg);
   1473   nd_addr_temps_release(d, &temps);
   1474 }
   1475 
   1476 static void nd_bitfield_store(CgTarget* t, Operand record_addr, Operand src,
   1477                               BitFieldAccess access) {
   1478   NativeDirectTarget* d = nd_of(t);
   1479   NdAddrTemps temps;
   1480   NativeAddr addr;
   1481   NativeLoc val;
   1482   /* Record (pointer-target) memory; addresses are cache-aware, SRC reads the
   1483    * cache. No whole-cache flush. */
   1484   addr = nd_addr_materialize(d, nd_addr_storage(d, record_addr), &temps,
   1485                              access.storage);
   1486   val = nd_materialize_operand(d, src);
   1487   ND_REQUIRE_NATIVE(d, bitfield_store, "target does not store bitfields");
   1488   d->native->bitfield_store(d->native, addr, val, access);
   1489   nd_release_materialized(d, val);
   1490   nd_addr_temps_release(d, &temps);
   1491 }
   1492 
   1493 /* Last line of defense against an unlowered split-scalar/soft-float op reaching
   1494  * the machine backend. The cg-layer gates in src/cg/arith.c route split i64
   1495  * mul/div/shift and all soft-double arith/convert/compare to runtime calls; if
   1496  * one escapes, the native backend would silently emit wrong code. */
   1497 static int nd_is_split_wide8_scalar(NativeDirectTarget* d, KitCgTypeId ty) {
   1498   return abi_cg_scalar_split_lane_size(d->base.c->abi, ty) == 4u &&
   1499          native_type_size(d->native, ty) == 8u;
   1500 }
   1501 
   1502 static int nd_is_wide64_int(NativeDirectTarget* d, KitCgTypeId ty) {
   1503   if (!nd_is_split_wide8_scalar(d, ty)) return 0;
   1504   if (kit_cg_type_int_width((KitCompiler*)d->base.c, ty) == 0) return 0;
   1505   return 1;
   1506 }
   1507 
   1508 static int nd_is_soft_double(NativeDirectTarget* d, KitCgTypeId ty) {
   1509   if (!nd_is_split_wide8_scalar(d, ty)) return 0;
   1510   return kit_cg_type_float_width((KitCompiler*)d->base.c, ty) == 64;
   1511 }
   1512 
   1513 static void nd_binop(CgTarget* t, BinOp op, Operand dst, Operand a, Operand b) {
   1514   NativeDirectTarget* d = nd_of(t);
   1515   NativeLoc ar;
   1516   NativeLoc br;
   1517   NativeLoc dr;
   1518   /* No split-lane 8-byte value reaches a single-register op: the cg layer
   1519    * lowers i64 add/sub/and/or/xor to inline 2-word lane sequences and
   1520    * mul/div/rem/shift to __*di3 runtime calls (src/cg/arith.c). Anything that
   1521    * slips through here would silently compute only the low word, so fail
   1522    * loudly instead. */
   1523   if (nd_is_wide64_int(d, a.type) || nd_is_wide64_int(d, dst.type)) {
   1524     nd_panic(d,
   1525              "64-bit integer arithmetic reached the backend un-lowered "
   1526              "(cg should emit a 2-word lane sequence or a __*di3 runtime call)");
   1527   }
   1528   if (nd_is_soft_double(d, a.type) || nd_is_soft_double(d, dst.type)) {
   1529     nd_panic(d,
   1530              "soft-float double arithmetic reached the backend un-lowered "
   1531              "(should be a __*df3 runtime call)");
   1532   }
   1533   ar = nd_materialize_operand(d, a);
   1534   br = nd_rhs_imm_or_reg(d, NATIVE_IMM_BINOP, (u32)op, b);
   1535   dr = nd_dst_reg(d, dst);
   1536   ND_REQUIRE_NATIVE(d, binop, "target does not emit binary ops");
   1537   d->native->binop(d->native, op, dr, ar, br);
   1538   nd_dst_writeback(d, dst, dr);
   1539   nd_release_materialized(d, br);
   1540   nd_release_materialized(d, ar);
   1541 }
   1542 
   1543 static void nd_unop(CgTarget* t, UnOp op, Operand dst, Operand a) {
   1544   NativeDirectTarget* d = nd_of(t);
   1545   NativeLoc ar;
   1546   NativeLoc dr;
   1547   /* i64 neg/bnot stay inline as register pairs, and soft-double FNEG stays
   1548    * inline as a high-word sign-bit flip (v1), so both are allowlisted. Any
   1549    * OTHER soft-double unop reaching the backend is an unlowered escape. */
   1550   if (nd_is_wide64_int(d, a.type) || nd_is_wide64_int(d, dst.type)) {
   1551     nd_panic(d,
   1552              "64-bit integer unary op reached the backend un-lowered "
   1553              "(cg should emit a 2-word lane sequence)");
   1554   }
   1555   if (op != UO_FNEG &&
   1556       (nd_is_soft_double(d, a.type) || nd_is_soft_double(d, dst.type))) {
   1557     nd_panic(d, "soft-float double unary op reached the backend un-lowered");
   1558   }
   1559   ar = nd_materialize_operand(d, a);
   1560   dr = nd_dst_reg(d, dst);
   1561   ND_REQUIRE_NATIVE(d, unop, "target does not emit unary ops");
   1562   d->native->unop(d->native, op, dr, ar);
   1563   nd_dst_writeback(d, dst, dr);
   1564   nd_release_materialized(d, ar);
   1565 }
   1566 
   1567 static void nd_cmp(CgTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
   1568   NativeDirectTarget* d = nd_of(t);
   1569   NativeLoc ar;
   1570   NativeLoc br;
   1571   NativeLoc dr;
   1572   /* i64 compares are lowered to inline 2-word lane sequences and soft-double
   1573    * compares to __*df2 runtime calls (src/cg/arith.c); neither reaches a single
   1574    * GPR compare here. */
   1575   if (nd_is_wide64_int(d, a.type) || nd_is_wide64_int(d, b.type)) {
   1576     nd_panic(d,
   1577              "64-bit integer compare reached the backend un-lowered "
   1578              "(cg should emit a 2-word lane sequence)");
   1579   }
   1580   if (nd_is_soft_double(d, a.type) || nd_is_soft_double(d, b.type)) {
   1581     nd_panic(d,
   1582              "soft-float double compare reached the backend un-lowered "
   1583              "(should be a __*df2 runtime call)");
   1584   }
   1585   ar = nd_materialize_operand(d, a);
   1586   br = nd_rhs_imm_or_reg(d, NATIVE_IMM_CMP, (u32)op, b);
   1587   dr = nd_dst_reg(d, dst);
   1588   ND_REQUIRE_NATIVE(d, cmp, "target does not emit compares");
   1589   d->native->cmp(d->native, op, dr, ar, br);
   1590   nd_dst_writeback(d, dst, dr);
   1591   nd_release_materialized(d, br);
   1592   nd_release_materialized(d, ar);
   1593 }
   1594 
   1595 static void nd_convert(CgTarget* t, ConvKind op, Operand dst, Operand src) {
   1596   NativeDirectTarget* d = nd_of(t);
   1597   NativeLoc sr;
   1598   NativeLoc dr;
   1599   /* i64<->i32 sext/zext/trunc are lowered to inline lane ops (src/cg/arith.c
   1600    * api_try_wide8_convert) and i64<->float / soft-double conversions to runtime
   1601    * calls; none reaches a single-register convert here. */
   1602   if (nd_is_wide64_int(d, src.type) || nd_is_wide64_int(d, dst.type)) {
   1603     nd_panic(d,
   1604              "64-bit integer conversion reached the backend un-lowered "
   1605              "(cg should emit a 2-word lane sequence or a runtime call)");
   1606   }
   1607   if (nd_is_soft_double(d, src.type) || nd_is_soft_double(d, dst.type)) {
   1608     nd_panic(d,
   1609              "soft-float double conversion reached the backend un-lowered "
   1610              "(should be a runtime call)");
   1611   }
   1612   sr = nd_materialize_operand(d, src);
   1613   dr = nd_dst_reg(d, dst);
   1614   ND_REQUIRE_NATIVE(d, convert, "target does not emit converts");
   1615   d->native->convert(d->native, op, dr, sr);
   1616   nd_dst_writeback(d, dst, dr);
   1617   nd_release_materialized(d, sr);
   1618 }
   1619 
   1620 static void nd_call(CgTarget* t, const CGCallDesc* desc) {
   1621   NativeDirectTarget* d = nd_of(t);
   1622   NativeCallPlan plan;
   1623   NativeCallDesc nd;
   1624   NativeLoc* args;
   1625   NativeLoc* results;
   1626   NativeLoc callee_tmp;
   1627   int release_callee_tmp = 0;
   1628   nd_flush_all(d);
   1629   nd_barrier(d, NATIVE_DIRECT_BARRIER_CALL | NATIVE_DIRECT_BARRIER_MEMORY);
   1630   memset(&plan, 0, sizeof plan);
   1631   memset(&nd, 0, sizeof nd);
   1632   memset(&callee_tmp, 0, sizeof callee_tmp);
   1633   u32 nresults = desc->result != CG_LOCAL_NONE ? 1u : 0u;
   1634   args = nd_loc_buf(d, d->argbuf, ND_ARG_BUF, desc->nargs);
   1635   results = nd_loc_buf(d, d->retbuf, ND_RET_BUF, nresults);
   1636   for (u32 i = 0; i < desc->nargs; ++i)
   1637     args[i] = nd_loc_frame(d, desc->args[i], 0);
   1638   if (nresults) results[0] = nd_loc_frame(d, desc->result, 0);
   1639   nd.fn_type = desc->fn_type;
   1640   nd.callee = nd_loc_operand(d, desc->callee);
   1641   if (nd.callee.kind == NATIVE_LOC_FRAME) {
   1642     callee_tmp = nd_materialize_loc(d, nd.callee,
   1643                                     (NativeAllocClass)nd.callee.cls,
   1644                                     nd.callee.type);
   1645     nd.callee = callee_tmp;
   1646     release_callee_tmp = 1;
   1647   }
   1648   nd.args = args;
   1649   nd.results = results;
   1650   nd.nargs = desc->nargs;
   1651   nd.nresults = nresults;
   1652   nd.flags = desc->flags;
   1653   nd.tail_policy = desc->tail_policy;
   1654   nd.inline_policy = desc->inline_policy;
   1655 
   1656   if (d->ops && d->ops->plan_call)
   1657     d->ops->plan_call(d, &nd, &plan);
   1658   else {
   1659     ND_REQUIRE_NATIVE(d, plan_call, "target does not plan calls");
   1660     d->native->plan_call(d->native, &nd, &plan);
   1661   }
   1662   if (plan.stack_arg_size > d->max_outgoing)
   1663     d->max_outgoing = plan.stack_arg_size;
   1664   for (u32 i = 0; i < plan.nargs; ++i)
   1665     nd_write_loc(d, plan.args[i].dst, plan.args[i].src, plan.args[i].mem);
   1666   if (d->ops && d->ops->emit_call)
   1667     d->ops->emit_call(d, &plan);
   1668   else {
   1669     ND_REQUIRE_NATIVE(d, emit_call, "target does not emit calls");
   1670     d->native->emit_call(d->native, &plan);
   1671   }
   1672   for (u32 i = 0; i < plan.nrets; ++i)
   1673     nd_write_loc(d, plan.rets[i].dst, plan.rets[i].src, plan.rets[i].mem);
   1674   if (release_callee_tmp)
   1675     nd_scratch_release(d, (NativeAllocClass)callee_tmp.cls,
   1676                        callee_tmp.v.reg);
   1677 }
   1678 
   1679 static const char* nd_tail_call_unrealizable_reason(CgTarget* t,
   1680                                                     const CGCallDesc* desc) {
   1681   NativeDirectTarget* d = nd_of(t);
   1682   if (d->ops && d->ops->tail_call_unrealizable_reason)
   1683     return d->ops->tail_call_unrealizable_reason(d, desc);
   1684   return "target does not expose direct tail-call lowering";
   1685 }
   1686 
   1687 static void nd_ret(CgTarget* t, CGLocal value) {
   1688   NativeDirectTarget* d = nd_of(t);
   1689   NativeLoc loc;
   1690   const NativeLoc* locp = NULL;
   1691   NativeCallPlanRet* rets = NULL;
   1692   u32 nrets = 0;
   1693   nd_flush_all(d);
   1694   if (d->ops && d->ops->emit_ret) {
   1695     d->ops->emit_ret(d, value);
   1696     return;
   1697   }
   1698   if (value != CG_LOCAL_NONE) {
   1699     loc = nd_loc_frame(d, value, 0);
   1700     locp = &loc;
   1701   }
   1702   ND_REQUIRE_NATIVE(d, plan_ret, "target does not plan returns");
   1703   d->native->plan_ret(d->native, d->func, locp, &rets, &nrets);
   1704   for (u32 i = 0; i < nrets; ++i)
   1705     nd_write_loc(d, rets[i].dst, rets[i].src, rets[i].mem);
   1706   ND_REQUIRE_NATIVE(d, ret, "target does not emit returns");
   1707   d->native->ret(d->native);
   1708 }
   1709 
   1710 static void nd_unreachable(CgTarget* t) {
   1711   NativeDirectTarget* d = nd_of(t);
   1712   nd_flush_all(d);
   1713   ND_REQUIRE_NATIVE(d, trap, "target does not emit traps");
   1714   d->native->trap(d->native);
   1715 }
   1716 
   1717 static void nd_alloca(CgTarget* t, Operand dst, Operand size, u32 align) {
   1718   NativeDirectTarget* d = nd_of(t);
   1719   NativeLoc sr, dr;
   1720   nd_flush_all(d);
   1721   sr = nd_materialize_operand(d, size);
   1722   dr = nd_dst_scratch(d, dst);
   1723   ND_REQUIRE_NATIVE(d, alloca_, "target does not emit alloca");
   1724   d->native->alloca_(d->native, dr, sr, align);
   1725   nd_store_operand_from_reg(d, dst, dr);
   1726   nd_release_materialized(d, dr);
   1727   nd_release_materialized(d, sr);
   1728 }
   1729 
   1730 static void nd_va_start(CgTarget* t, Operand ap_addr) {
   1731   NativeDirectTarget* d = nd_of(t);
   1732   nd_flush_all(d);
   1733   if (!d->ops || !d->ops->va_start_)
   1734     nd_panic(d, "target does not emit va_start");
   1735   d->ops->va_start_(d, ap_addr);
   1736 }
   1737 
   1738 static void nd_va_arg(CgTarget* t, Operand dst, Operand ap_addr,
   1739                       KitCgTypeId type) {
   1740   NativeDirectTarget* d = nd_of(t);
   1741   nd_flush_all(d);
   1742   if (!d->ops || !d->ops->va_arg_) nd_panic(d, "target does not emit va_arg");
   1743   d->ops->va_arg_(d, dst, ap_addr, type);
   1744 }
   1745 
   1746 static void nd_va_end(CgTarget* t, Operand ap_addr) {
   1747   NativeDirectTarget* d = nd_of(t);
   1748   nd_flush_all(d);
   1749   if (!d->ops || !d->ops->va_end_) nd_panic(d, "target does not emit va_end");
   1750   d->ops->va_end_(d, ap_addr);
   1751 }
   1752 
   1753 static void nd_va_copy(CgTarget* t, Operand dst_ap_addr, Operand src_ap_addr) {
   1754   NativeDirectTarget* d = nd_of(t);
   1755   nd_flush_all(d);
   1756   if (!d->ops || !d->ops->va_copy_) nd_panic(d, "target does not emit va_copy");
   1757   d->ops->va_copy_(d, dst_ap_addr, src_ap_addr);
   1758 }
   1759 
   1760 static void nd_atomic_load(CgTarget* t, Operand dst, Operand addr,
   1761                            MemAccess mem, KitCgMemOrder order) {
   1762   NativeDirectTarget* d = nd_of(t);
   1763   NdAddrTemps temps;
   1764   nd_flush_all(d);
   1765   nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC);
   1766   NativeAddr naddr =
   1767       nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem);
   1768   NativeLoc dr = nd_dst_scratch(d, dst);
   1769   ND_REQUIRE_NATIVE(d, atomic_load, "target does not emit atomic loads");
   1770   d->native->atomic_load(d->native, dr, naddr, mem, order);
   1771   nd_store_operand_from_reg(d, dst, dr);
   1772   nd_release_materialized(d, dr);
   1773   nd_addr_temps_release(d, &temps);
   1774 }
   1775 
   1776 static void nd_atomic_store(CgTarget* t, Operand addr, Operand src,
   1777                             MemAccess mem, KitCgMemOrder order) {
   1778   NativeDirectTarget* d = nd_of(t);
   1779   NdAddrTemps temps;
   1780   nd_flush_all(d);
   1781   nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC);
   1782   NativeAddr naddr =
   1783       nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem);
   1784   NativeLoc sr = nd_materialize_operand(d, src);
   1785   ND_REQUIRE_NATIVE(d, atomic_store, "target does not emit atomic stores");
   1786   d->native->atomic_store(d->native, naddr, sr, mem, order);
   1787   nd_release_materialized(d, sr);
   1788   nd_addr_temps_release(d, &temps);
   1789 }
   1790 
   1791 static void nd_atomic_rmw(CgTarget* t, KitCgAtomicOp op, Operand dst,
   1792                           Operand addr, Operand val, MemAccess mem,
   1793                           KitCgMemOrder order) {
   1794   NativeDirectTarget* d = nd_of(t);
   1795   NdAddrTemps temps;
   1796   nd_flush_all(d);
   1797   nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC);
   1798   NativeAddr naddr =
   1799       nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem);
   1800   NativeLoc vr = nd_materialize_operand(d, val);
   1801   NativeLoc dr = nd_dst_scratch(d, dst);
   1802   ND_REQUIRE_NATIVE(d, atomic_rmw, "target does not emit atomic rmw");
   1803   d->native->atomic_rmw(d->native, op, dr, naddr, vr, mem, order);
   1804   nd_store_operand_from_reg(d, dst, dr);
   1805   nd_release_materialized(d, dr);
   1806   nd_release_materialized(d, vr);
   1807   nd_addr_temps_release(d, &temps);
   1808 }
   1809 
   1810 static void nd_atomic_cas(CgTarget* t, Operand prior, Operand ok, Operand addr,
   1811                           Operand expected, Operand desired, MemAccess mem,
   1812                           KitCgMemOrder success, KitCgMemOrder failure) {
   1813   NativeDirectTarget* d = nd_of(t);
   1814   NdAddrTemps temps;
   1815   nd_flush_all(d);
   1816   nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC);
   1817   NativeAddr naddr =
   1818       nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem);
   1819   NativeLoc er = nd_materialize_operand(d, expected);
   1820   NativeLoc dr = nd_materialize_operand(d, desired);
   1821   NativeLoc pr = nd_dst_scratch(d, prior);
   1822   NativeLoc kr = nd_dst_scratch(d, ok);
   1823   ND_REQUIRE_NATIVE(d, atomic_cas,
   1824                     "target does not emit atomic compare-exchange");
   1825   d->native->atomic_cas(d->native, pr, kr, naddr, er, dr, mem, success,
   1826                         failure);
   1827   nd_store_operand_from_reg(d, prior, pr);
   1828   nd_store_operand_from_reg(d, ok, kr);
   1829   nd_release_materialized(d, kr);
   1830   nd_release_materialized(d, pr);
   1831   nd_release_materialized(d, dr);
   1832   nd_release_materialized(d, er);
   1833   nd_addr_temps_release(d, &temps);
   1834 }
   1835 
   1836 static void nd_fence(CgTarget* t, KitCgMemOrder order) {
   1837   NativeDirectTarget* d = nd_of(t);
   1838   nd_flush_all(d);
   1839   ND_REQUIRE_NATIVE(d, fence, "target does not emit fences");
   1840   d->native->fence(d->native, order);
   1841 }
   1842 
   1843 static void nd_intrinsic(CgTarget* t, IntrinKind kind, Operand* dsts, u32 ndst,
   1844                          const Operand* args, u32 narg) {
   1845   NativeDirectTarget* d = nd_of(t);
   1846   NativeLoc* ndsts = nd_loc_buf(d, d->retbuf, ND_RET_BUF, ndst);
   1847   NativeLoc* nargs = nd_loc_buf(d, d->argbuf, ND_ARG_BUF, narg);
   1848   nd_flush_all(d);
   1849   ND_REQUIRE_NATIVE(d, intrinsic, "target does not emit compiler intrinsics");
   1850   for (u32 i = 0; i < ndst; ++i) ndsts[i] = nd_dst_scratch(d, dsts[i]);
   1851   for (u32 i = 0; i < narg; ++i) {
   1852     nargs[i] = args[i].kind == OPK_IMM ? nd_loc_operand(d, args[i])
   1853                                        : nd_materialize_operand(d, args[i]);
   1854   }
   1855   d->native->intrinsic(d->native, kind, ndsts, ndst, nargs, narg);
   1856   for (u32 i = 0; i < ndst; ++i) {
   1857     nd_store_operand_from_reg(d, dsts[i], ndsts[i]);
   1858     nd_release_materialized(d, ndsts[i]);
   1859   }
   1860   for (u32 i = 0; i < narg; ++i) nd_release_materialized(d, nargs[i]);
   1861 }
   1862 
   1863 static void nd_asm_block(CgTarget* t, const char* tmpl,
   1864                          const AsmConstraint* outs, u32 nout, Operand* out_ops,
   1865                          const AsmConstraint* ins, u32 nin,
   1866                          const Operand* in_ops, const Sym* clobbers, u32 nclob,
   1867                          u32 clobber_abi_sets) {
   1868   NativeDirectTarget* d = nd_of(t);
   1869   nd_flush_all(d);
   1870   nd_barrier(d,
   1871              NATIVE_DIRECT_BARRIER_INLINE_ASM | NATIVE_DIRECT_BARRIER_MEMORY);
   1872   if (d->ops && d->ops->asm_block) {
   1873     d->ops->asm_block(d, tmpl, outs, nout, out_ops, ins, nin, in_ops, clobbers,
   1874                       nclob, clobber_abi_sets);
   1875     return;
   1876   }
   1877   nd_panic(d, "target does not emit inline asm");
   1878 }
   1879 
   1880 static int nd_asm_is_reg_constraint(CgTarget* t, const char* constraint) {
   1881   NativeDirectTarget* d = nd_of(t);
   1882   return native_asm_constraint_is_reg(d->native, constraint);
   1883 }
   1884 
   1885 static void nd_file_scope_asm(CgTarget* t, const char* src, size_t len) {
   1886   NativeDirectTarget* d = nd_of(t);
   1887   ND_REQUIRE_NATIVE(d, file_scope_asm, "target does not emit file-scope asm");
   1888   d->native->file_scope_asm(d->native, src, len);
   1889 }
   1890 
   1891 static void nd_set_loc(CgTarget* t, SrcLoc loc) {
   1892   NativeDirectTarget* d = nd_of(t);
   1893   d->loc = loc;
   1894   if (d->native && d->native->set_loc) d->native->set_loc(d->native, loc);
   1895 }
   1896 
   1897 static void nd_finalize(CgTarget* t) {
   1898   NativeDirectTarget* d = nd_of(t);
   1899   if (d->native && d->native->finalize) d->native->finalize(d->native);
   1900 }
   1901 
   1902 static void nd_destroy(CgTarget* t) {
   1903   NativeDirectTarget* d = nd_of(t);
   1904   if (d->native && d->native->destroy) d->native->destroy(d->native);
   1905 }
   1906 
   1907 CgTarget* native_direct_target_new(Compiler* c, ObjBuilder* obj,
   1908                                    const NativeDirectTargetConfig* cfg) {
   1909   NativeDirectTarget* d;
   1910   if (!c || !cfg || !cfg->native)
   1911     compiler_panic(c, (SrcLoc){0, 0, 0},
   1912                    "native_direct_target_new: missing native target");
   1913   d = arena_znew(c->tu, NativeDirectTarget);
   1914   if (!d) return NULL;
   1915   d->base.c = c;
   1916   d->base.obj = obj;
   1917   d->magic = NATIVE_DIRECT_MAGIC;
   1918   d->native = cfg->native;
   1919   d->ops = cfg->ops;
   1920   d->user = cfg->user;
   1921 
   1922   /* Resolve register/class info once; it is constant for the program. */
   1923   d->reg_info = cfg->native ? cfg->native->regs : NULL;
   1924   for (u32 i = 0; i < 3u; ++i) d->class_info[i] = NULL;
   1925   if (d->reg_info) {
   1926     const NativeRegInfo* ri = d->reg_info;
   1927     for (u32 i = 0; i < ri->nclasses; ++i) {
   1928       u32 cls = ri->classes[i].cls;
   1929       if (cls < 3u) d->class_info[cls] = &ri->classes[i];
   1930     }
   1931   }
   1932 
   1933   d->base.func_begin = nd_func_begin;
   1934   d->base.func_end = nd_func_end;
   1935   d->base.alias = nd_alias;
   1936   d->base.local = nd_local_new;
   1937   d->base.local_addr = nd_local_addr;
   1938   d->base.param = nd_param;
   1939   d->base.local_debug_loc = nd_local_debug_loc;
   1940   d->base.label_new = nd_label_new;
   1941   d->base.label_place = nd_label_place;
   1942   d->base.jump = nd_jump;
   1943   d->base.cmp_branch = nd_cmp_branch;
   1944   d->base.switch_ = nd_switch;
   1945   d->base.indirect_branch = nd_indirect_branch;
   1946   d->base.load_label_addr = nd_load_label_addr;
   1947   d->base.local_static_data_begin = nd_local_static_data_begin;
   1948   d->base.local_static_data_write = nd_local_static_data_write;
   1949   d->base.local_static_data_label_addr = nd_local_static_data_label_addr;
   1950   d->base.local_static_data_end = nd_local_static_data_end;
   1951   d->base.data_label_addr_unsupported_msg = nd_data_label_addr_unsupported_msg;
   1952   d->base.scope_begin = nd_scope_begin;
   1953   d->base.scope_end = nd_scope_end;
   1954   d->base.break_to = nd_break_to;
   1955   d->base.continue_to = nd_continue_to;
   1956   d->base.load_imm = nd_load_imm;
   1957   d->base.load_const = nd_load_const;
   1958   d->base.copy = nd_copy;
   1959   d->base.load = nd_load;
   1960   d->base.store = nd_store;
   1961   d->base.addr_of = nd_addr_of;
   1962   d->base.tls_addr_of = nd_tls_addr_of;
   1963   d->base.copy_bytes = nd_copy_bytes;
   1964   d->base.set_bytes = nd_set_bytes;
   1965   d->base.binop = nd_binop;
   1966   d->base.unop = nd_unop;
   1967   d->base.cmp = nd_cmp;
   1968   d->base.convert = nd_convert;
   1969   d->base.call = nd_call;
   1970   d->base.tail_call_unrealizable_reason = nd_tail_call_unrealizable_reason;
   1971   d->base.ret = nd_ret;
   1972   d->base.unreachable = nd_unreachable;
   1973   d->base.alloca_ = nd_alloca;
   1974   d->base.va_start_ = nd_va_start;
   1975   d->base.va_arg_ = nd_va_arg;
   1976   d->base.va_end_ = nd_va_end;
   1977   d->base.va_copy_ = nd_va_copy;
   1978   d->base.atomic_load = nd_atomic_load;
   1979   d->base.atomic_store = nd_atomic_store;
   1980   d->base.atomic_rmw = nd_atomic_rmw;
   1981   d->base.atomic_cas = nd_atomic_cas;
   1982   d->base.fence = nd_fence;
   1983   d->base.intrinsic = nd_intrinsic;
   1984   d->base.asm_is_reg_constraint = nd_asm_is_reg_constraint;
   1985   d->base.asm_block = nd_asm_block;
   1986   d->base.file_scope_asm = nd_file_scope_asm;
   1987   d->base.set_loc = nd_set_loc;
   1988   d->base.finalize = nd_finalize;
   1989   d->base.destroy = nd_destroy;
   1990   return &d->base;
   1991 }
   1992 
   1993 NativeTarget* native_direct_target_native(CgTarget* t) {
   1994   NativeDirectTarget* d = t ? nd_of(t) : NULL;
   1995   return d && d->magic == NATIVE_DIRECT_MAGIC ? d->native : NULL;
   1996 }