kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

memory.c (24333B)


      1 #include "cg/internal.h"
      2 
      3 void kit_cg_push_int(KitCg* g, uint64_t value, KitCgTypeId type) {
      4   KitCgTypeId ty;
      5   if (!g) return;
      6   ty = resolve_type(g->c, type);
      7   if (!ty) return;
      8   /* A 16-byte scalar immediate cannot be represented by the 64-bit op.v.imm
      9    * alone; materialize it into addressable storage with both lanes
     10    * sign-extended so no downstream consumer sees an undefined high half. */
     11   if (api_is_wide16_scalar_type(g->c, ty)) {
     12     api_push(g, api_make_wide16_int_const(g, (i64)value, ty));
     13     return;
     14   }
     15   /* Split-lane 8-byte int: the 64-bit value fits in op.v.imm, but the value is
     16    * memory-resident, so materialize it as two 32-bit lanes. */
     17   if (api_is_wide8_scalar_type(g->c, ty)) {
     18     api_push(g, api_make_wide8_int_const(g, (i64)value, ty));
     19     return;
     20   }
     21   api_push(g, api_make_sv(api_op_imm((i64)value, ty), ty));
     22 }
     23 
     24 void kit_cg_push_float(KitCg* g, double value, KitCgTypeId type) {
     25   KitCgTypeId ty;
     26   CgTarget* T;
     27   ConstBytes cb;
     28   union {
     29     double d;
     30     float f;
     31     uint8_t b[8];
     32   } u;
     33   CGLocal r;
     34   Operand dst;
     35   if (!g) return;
     36   ty = resolve_type(g->c, type);
     37   if (!ty) return;
     38   if (api_is_f128_type(g->c, ty)) {
     39     api_push(g, api_make_f128_const(g, value, ty));
     40     return;
     41   }
     42   /* Split-lane double: the 8-byte value is memory-resident, so materialize the
     43    * IEEE-754 binary64 pattern as two 32-bit lanes. */
     44   if (api_is_wide8_scalar_type(g->c, ty)) {
     45     union {
     46       double d;
     47       u64 u;
     48     } bits;
     49     bits.d = value;
     50     api_push(g, api_make_wide8_const_bits(g, bits.u, ty));
     51     return;
     52   }
     53   T = g->target;
     54   cb.type = ty;
     55   cb.size = (u32)abi_cg_sizeof(g->c->abi, type);
     56   cb.align = (u32)abi_cg_alignof(g->c->abi, type);
     57   if (ty == builtin_id(KIT_CG_BUILTIN_F32))
     58     u.f = (float)value;
     59   else
     60     u.d = value;
     61   cb.bytes = u.b;
     62   r = api_alloc_temp_local(g, ty);
     63   dst = api_op_local(r, ty);
     64   T->load_const(T, dst, cb);
     65   api_push(g, api_make_sv(dst, ty));
     66 }
     67 
     68 void kit_cg_push_null(KitCg* g, KitCgTypeId ptr_type) {
     69   KitCgTypeId ty;
     70   if (!g) return;
     71   ty = resolve_type(g->c, ptr_type);
     72   if (!ty) return;
     73   api_push(g, api_make_sv(api_op_imm(0, ty), ty));
     74 }
     75 
     76 static int api_const_data_can_defer(const KitCg* g) {
     77   if (!g || g->opt_level < 1) return 0;
     78   if (g->fn_ret_type == KIT_CG_TYPE_NONE) return 0;
     79   if (g->data_sym != OBJ_SYM_NONE || g->data_sec != OBJ_SEC_NONE) return 0;
     80   if (g->data_local_static_target || g->data_discard ||
     81       g->data_tls_collect) {
     82     return 0;
     83   }
     84   return g->target && g->target->local_static_data_begin &&
     85          g->target->local_static_data_write &&
     86          g->target->local_static_data_end;
     87 }
     88 
     89 static int api_const_data_emit_deferred(KitCg* g, ObjSymId sym,
     90                                         KitCgTypeId type, const uint8_t* data,
     91                                         size_t len, uint32_t align) {
     92   CGLocalStaticDataDesc desc;
     93   KitCgDataDefAttrs data_attrs;
     94   if (!api_const_data_can_defer(g)) return 0;
     95   memset(&data_attrs, 0, sizeof data_attrs);
     96   data_attrs.flags = KIT_CG_DATADEF_FUNCTION_LOCAL | KIT_CG_DATADEF_READONLY;
     97   data_attrs.align = align;
     98   memset(&desc, 0, sizeof desc);
     99   desc.sym = sym;
    100   desc.type = type;
    101   desc.attrs = data_attrs;
    102   desc.align = align;
    103   if (!g->target->local_static_data_begin(g->target, &desc)) return 0;
    104   if (len) g->target->local_static_data_write(g->target, data, (u64)len);
    105   g->target->local_static_data_end(g->target);
    106   return 1;
    107 }
    108 
    109 KitCgSym kit_cg_const_data(KitCg* g, const uint8_t* data, size_t len,
    110                            uint32_t align, KitCgTypeId pointee_type) {
    111   Compiler* c;
    112   ObjBuilder* ob;
    113   KitCgTypeId pty;
    114   Sym sec_name;
    115   ObjSecId sec;
    116   u32 base;
    117   char name_buf[32];
    118   StrBuf name_sb;
    119   Sym anon_name;
    120   ObjSymId sym;
    121   KitCgDecl attrs;
    122   int defer;
    123   if (!g) return KIT_CG_SYM_NONE;
    124   c = g->c;
    125   ob = g->obj;
    126   pty = resolve_type(c, pointee_type);
    127   if (!pty) return KIT_CG_SYM_NONE;
    128   align = align ? align : (u32)abi_cg_alignof(c->abi, pointee_type);
    129   sec_name = pool_intern_slice(c->global, SLICE_LIT(".rodata"));
    130   strbuf_init(&name_sb, name_buf, sizeof(name_buf));
    131   strbuf_put_slice(&name_sb, SLICE_LIT(".Lkit_ro."));
    132   strbuf_put_u64(&name_sb, g->rodata_counter++);
    133   anon_name = pool_intern_slice(
    134       c->global,
    135       (Slice){.s = strbuf_cstr(&name_sb), .len = strbuf_len(&name_sb)});
    136   defer = api_const_data_can_defer(g);
    137   sym = defer ? obj_symbol_defer(ob, anon_name, SB_LOCAL, SV_DEFAULT, SK_OBJ,
    138                                  (u64)len)
    139               : obj_symbol(ob, anon_name, SB_LOCAL, SK_OBJ, OBJ_SEC_NONE, 0,
    140                            (u64)len);
    141   if (sym == OBJ_SYM_NONE)
    142     compiler_panic(c, g->cur_loc, "kit_cg_const_data: symbol failed");
    143   memset(&attrs, 0, sizeof(attrs));
    144   attrs.kind = KIT_CG_DECL_OBJECT;
    145   attrs.sym.bind = KIT_SB_LOCAL;
    146   attrs.sym.visibility = KIT_CG_VIS_DEFAULT;
    147   attrs.as.object.flags = KIT_CG_OBJ_READONLY;
    148   api_remember_sym(g, sym, pty, attrs);
    149   if (defer && api_const_data_emit_deferred(g, sym, pty, data, len, align))
    150     return (KitCgSym)sym;
    151   sec = obj_section(ob, sec_name, SEC_RODATA, SF_ALLOC, align);
    152   base = obj_align_to(ob, sec, align);
    153   obj_write(ob, sec, data, len);
    154   obj_symbol_define_live(ob, sym, sec, base, (u64)len);
    155   return (KitCgSym)sym;
    156 }
    157 
    158 void api_push_local_lvalue(KitCg* g, CGLocal local, KitCgTypeId type) {
    159   if (!g) return;
    160   api_push(g, api_make_lv(api_op_local(local, type), type));
    161 }
    162 
    163 void api_push_source_local_lvalue(KitCg* g, KitCgLocal source_local,
    164                                   CGLocal storage, KitCgTypeId type) {
    165   ApiSValue sv;
    166   if (!g) return;
    167   sv = api_make_lv(api_op_local(storage, type), type);
    168   sv.source_local = source_local;
    169   api_push(g, sv);
    170 }
    171 
    172 void kit_cg_push_local(KitCg* g, KitCgLocal local) {
    173   ApiSourceLocal* rec;
    174   if (!g) return;
    175   rec = api_local_from_handle(g, local);
    176   if (!rec) return;
    177   api_push_source_local_lvalue(g, local, rec->storage, rec->type);
    178 }
    179 
    180 void kit_cg_push_local_addr(KitCg* g, KitCgLocal local) {
    181   kit_cg_push_local(g, local);
    182   kit_cg_addr(g);
    183 }
    184 
    185 void kit_cg_push_symbol_addr(KitCg* g, KitCgSym sym, int64_t addend) {
    186   KitCgTypeId ty;
    187   KitCgTypeId ptr_ty;
    188   if (!g) return;
    189   ty = api_sym_type(g, sym);
    190   if (!ty) ty = builtin_id(KIT_CG_BUILTIN_VOID);
    191   ptr_ty = cg_type_ptr_to(g->c, ty);
    192   if (api_sym_is_tls(g, sym)) {
    193     CGLocal r = api_alloc_temp_local(g, ptr_ty);
    194     Operand dst = api_op_local(r, ptr_ty);
    195     g->target->tls_addr_of(g->target, dst, (ObjSymId)sym, addend);
    196     api_push(g, api_make_sv(dst, ptr_ty));
    197   } else {
    198     api_push(g,
    199              api_make_sv(api_op_global((ObjSymId)sym, addend, ptr_ty), ptr_ty));
    200   }
    201 }
    202 
    203 static int api_sv_local_storage_is_aggregate(KitCg* g, const ApiSValue* sv) {
    204   ApiSourceLocal* rec;
    205   if (!sv || sv->op.kind != OPK_LOCAL ||
    206       sv->source_local == KIT_CG_LOCAL_NONE) {
    207     return 0;
    208   }
    209   rec = api_local_from_handle(g, sv->source_local);
    210   return rec && rec->storage == sv->op.v.local &&
    211          cg_type_is_aggregate(g->c, rec->type);
    212 }
    213 
    214 /* Retype an already-addressable place operand to the access type — the way the
    215  * old offset-0 EA fold did. GLOBAL/INDIRECT carry the access type into the
    216  * operand; a bare LOCAL keeps its own type (the memop's MemAccess carries the
    217  * access width). */
    218 static Operand place_operand_for_access(Operand op, KitCgTypeId access_ty) {
    219   switch (op.kind) {
    220     case OPK_GLOBAL:
    221       return api_op_global(op.v.global.sym, op.v.global.addend, access_ty);
    222     case OPK_INDIRECT:
    223       return api_op_indirect_indexed(op.v.ind.base, op.v.ind.index,
    224                                      op.v.ind.log2_scale, op.v.ind.ofs,
    225                                      access_ty);
    226     default:
    227       return op;
    228   }
    229 }
    230 
    231 /* Load a VALUE from the PLACE on TOS. The place encodes the full address (built
    232  * by push_local / deref / field / elem); there is no EA rider. Strict: the
    233  * operand must be a PLACE — a pointer VALUE must be `deref`'d first. */
    234 void kit_cg_load(KitCg* g, KitCgMemAccess access) {
    235   ApiSValue base;
    236   CgTarget* T;
    237   KitCgTypeId ty;
    238   KitCgTypeId access_ty;
    239   CGLocal owned_base = CG_LOCAL_NONE;
    240   Operand mem_op;
    241   CGLocal dst_r;
    242   Operand dst;
    243   int is_bitfield;
    244   if (!g) return;
    245   T = g->target;
    246   if (access.flags & KIT_CG_MEM_VOLATILE) api_local_const_memory_boundary(g);
    247 
    248   base = api_pop(g);
    249   if (!api_is_lvalue_sv(&base)) {
    250     compiler_panic(g->c, g->cur_loc,
    251                    "KitCg: load requires a place; deref the pointer first");
    252   }
    253   is_bitfield = api_sv_is_bitfield(&base);
    254 
    255   /* Aggregate place: an aggregate-typed access returns the place itself; a
    256    * scalar access reads a scalar sub-object and falls through. */
    257   if (!is_bitfield && cg_type_is_aggregate(g->c, api_sv_type(&base))) {
    258     ty = api_mem_access_type(g, access, api_sv_type(&base), "load");
    259     if (cg_type_is_aggregate(g->c, ty)) {
    260       u32 access_size = api_mem_type_size(g, ty, "load");
    261       u32 lvalue_size = api_mem_type_size(g, api_sv_type(&base), "load");
    262       if (access_size != lvalue_size) {
    263         compiler_panic(g->c, g->cur_loc,
    264                        "KitCg: load aggregate type/size mismatch: access "
    265                        "size %u, lvalue size %u",
    266                        (unsigned)access_size, (unsigned)lvalue_size);
    267       }
    268       api_push(g, base);
    269       return;
    270     }
    271     /* scalar access from an aggregate place: fall through */
    272   }
    273 
    274   ty = api_mem_access_type(g, access, api_sv_type(&base), "load");
    275   access_ty = ty;
    276 
    277   if (!is_bitfield) api_require_scalar_mem_type(g, "load", access_ty);
    278 
    279   /* Source-local constant load. */
    280   if (!is_bitfield && base.source_local != KIT_CG_LOCAL_NONE &&
    281       api_local_const_load(g, base.source_local, access, &dst)) {
    282     api_release(g, &base);
    283     api_push(g, api_make_sv(dst, dst.type));
    284     return;
    285   }
    286 
    287   /* Scalar local place: the value already lives in the local; hand it back
    288    * directly without a memory access. */
    289   if (!is_bitfield && base.source_local != KIT_CG_LOCAL_NONE &&
    290       base.op.kind == OPK_LOCAL &&
    291       !api_sv_local_storage_is_aggregate(g, &base) &&
    292       !cg_type_is_aggregate(g->c, api_sv_type(&base)) &&
    293       !cg_type_is_aggregate(g->c, ty) &&
    294       api_unalias_type(g->c, api_sv_type(&base)) ==
    295           api_unalias_type(g->c, ty)) {
    296     base.lvalue = 0;
    297     base.res = RES_FIXED_LOCAL;
    298     api_push(g, base);
    299     return;
    300   }
    301 
    302   /* Resolve the place into a single backend memop operand. */
    303   if (!api_operand_can_address(&base.op)) {
    304     KitCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base));
    305     Operand addr = api_lvalue_addr(g, &base, pty);
    306     mem_op = api_op_indirect(addr.v.local, 0, access_ty);
    307     owned_base = addr.v.local;
    308   } else {
    309     mem_op = place_operand_for_access(base.op, access_ty);
    310   }
    311 
    312   if (base.source_local != KIT_CG_LOCAL_NONE) {
    313     api_local_const_clear(api_local_from_handle(g, base.source_local));
    314   }
    315 
    316   dst_r = api_alloc_temp_local(g, access_ty);
    317   dst = api_op_local(dst_r, access_ty);
    318   if (is_bitfield) {
    319     /* A bit-field load rides the generic `load` with a bit-field MemAccess; the
    320      * CgTarget impl extracts/extends within the storage unit. */
    321     T->load(T, dst, mem_op, api_mem_for_bitfield(g, &base, &mem_op, access_ty));
    322   } else {
    323     T->load(T, dst, mem_op, api_mem_from_access(g, &mem_op, access));
    324   }
    325 
    326   api_release(g, &base);
    327   if (owned_base != CG_LOCAL_NONE) api_release_temp_local(g, owned_base);
    328   api_push(g, api_make_sv(dst, access_ty));
    329 }
    330 
    331 /* VALUE(ptr) -> PLACE: the explicit pointer->place transition. The produced
    332  * place is *(ptr + offset bytes). Strict on kind: the operand must be a pointer
    333  * VALUE, never a PLACE — the caller turns a place into a pointer with `addr`
    334  * first. */
    335 void kit_cg_deref(KitCg* g, int64_t offset) {
    336   ApiSValue v;
    337   KitCgTypeId pty;
    338   KitCgTypeId pointee;
    339   Operand ptr;
    340   if (!g) return;
    341   v = api_pop(g);
    342   pty = api_sv_type(&v);
    343   if (api_is_lvalue_sv(&v) || !cg_type_is_ptr(g->c, pty)) {
    344     compiler_panic(g->c, g->cur_loc,
    345                    "KitCg: deref requires a pointer value, not a place");
    346   }
    347   pointee = cg_type_pointee(g->c, pty);
    348   if (!pointee) pointee = builtin_id(KIT_CG_BUILTIN_VOID);
    349   /* A symbol address derefs to a global place, preserving direct
    350    * (PC-relative/absolute) addressing rather than materializing the address. */
    351   if (v.kind == SV_OPERAND && v.op.kind == OPK_GLOBAL) {
    352     api_push(g,
    353              api_make_lv(api_op_global(v.op.v.global.sym,
    354                                        v.op.v.global.addend + offset, pointee),
    355                          pointee));
    356     return;
    357   }
    358   ptr = api_force_local(g, &v, pty);
    359   if (offset >= INT32_MIN && offset <= INT32_MAX) {
    360     api_push(g, api_make_lv(api_op_indirect(ptr.v.local, (i32)offset, pointee),
    361                             pointee));
    362   } else {
    363     CGLocal r = api_alloc_temp_local(g, pty);
    364     Operand p2 = api_op_local(r, pty);
    365     g->target->binop(g->target, BO_IADD, p2, ptr, api_op_imm(offset, pty));
    366     api_push(g, api_make_lv(api_op_indirect(r, 0, pointee), pointee));
    367   }
    368 }
    369 
    370 void kit_cg_addr(KitCg* g) {
    371   ApiSValue v;
    372   KitCgTypeId pty;
    373   Operand dst;
    374   if (!g) return;
    375   v = api_pop(g);
    376   pty = cg_type_ptr_to(g->c, api_sv_type(&v));
    377   if (v.source_local != KIT_CG_LOCAL_NONE)
    378     api_local_const_address_taken(g, v.source_local);
    379   dst = api_lvalue_addr(g, &v, pty);
    380   api_release(g, &v);
    381   api_push(g, api_make_sv(dst, pty));
    382 }
    383 
    384 /* Store the VALUE on TOS into the PLACE beneath it. Stack: [place, value] ->
    385  * []. The place encodes the full address; there is no EA rider. Strict: the
    386  * destination must be a PLACE — a pointer VALUE must be `deref`'d first. */
    387 void kit_cg_store(KitCg* g, KitCgMemAccess access) {
    388   ApiSValue base, rv;
    389   CgTarget* T;
    390   KitCgTypeId ty;
    391   KitCgTypeId access_ty;
    392   Operand src;
    393   CGLocal owned_base = CG_LOCAL_NONE;
    394   Operand mem_op;
    395   int is_lvalue;
    396   int is_bitfield;
    397   if (!g) return;
    398   T = g->target;
    399   if (access.flags & KIT_CG_MEM_VOLATILE) api_local_const_memory_boundary(g);
    400 
    401   /* Stack: [base, value] - pop value, then base. */
    402   rv = api_pop(g);
    403   base = api_pop(g);
    404   is_lvalue = api_is_lvalue_sv(&base);
    405 
    406   if (!is_lvalue) {
    407     compiler_panic(g->c, g->cur_loc,
    408                    "KitCg: store requires a place destination; deref first");
    409     return;
    410   }
    411   is_bitfield = api_sv_is_bitfield(&base);
    412 
    413   ty = api_mem_access_type(g, access, api_sv_type(&base), "store");
    414   access_ty = ty;
    415 
    416   /* Aggregate store: memcpy through the source place. */
    417   if (!is_bitfield && (cg_type_is_aggregate(g->c, ty) ||
    418                        cg_type_is_aggregate(g->c, api_sv_type(&rv)))) {
    419     KitCgTypeId ptr_ty;
    420     Operand dst_addr, src_addr;
    421     int dst_addr_owned;
    422     int src_addr_owned;
    423     int src_ptr_rvalue;
    424     AggregateAccess agg;
    425     u32 src_size;
    426     u32 dst_size = cg_type_is_aggregate(g->c, ty)
    427                        ? api_mem_type_size(g, ty, "store")
    428                        : api_mem_type_size(g, api_sv_type(&base), "store");
    429     u32 access_size = cg_type_is_aggregate(g->c, ty)
    430                           ? api_mem_type_size(g, ty, "store")
    431                           : dst_size;
    432     src_ptr_rvalue =
    433         !api_is_lvalue_sv(&rv) && cg_type_is_ptr(g->c, api_sv_type(&rv));
    434     src_size = src_ptr_rvalue ? access_size
    435                               : api_mem_type_size(g, api_sv_type(&rv), "store");
    436     if (!api_is_lvalue_sv(&rv) && !src_ptr_rvalue) {
    437       compiler_panic(g->c, g->cur_loc,
    438                      "KitCg: aggregate store source is not an lvalue");
    439     }
    440     if (access_size != dst_size || access_size != src_size) {
    441       compiler_panic(g->c, g->cur_loc,
    442                      "KitCg: store aggregate type/size mismatch: access "
    443                      "size %u, destination size %u, value size %u",
    444                      (unsigned)access_size, (unsigned)dst_size,
    445                      (unsigned)src_size);
    446     }
    447     if (base.source_local != KIT_CG_LOCAL_NONE) {
    448       api_local_const_clear(api_local_from_handle(g, base.source_local));
    449     } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL ||
    450                (access.flags & KIT_CG_MEM_VOLATILE)) {
    451       api_local_const_memory_boundary(g);
    452     }
    453     ptr_ty = cg_type_ptr_to(g->c, ty);
    454     dst_addr = api_lvalue_addr(g, &base, ptr_ty);
    455     dst_addr_owned = 1;
    456     if (src_ptr_rvalue) {
    457       src_addr = api_force_local(g, &rv, api_sv_type(&rv));
    458       src_addr_owned = 0;
    459     } else {
    460       src_addr = api_lvalue_addr(g, &rv, ptr_ty);
    461       src_addr_owned = 1;
    462     }
    463     memset(&agg, 0, sizeof agg);
    464     agg.size = access_size;
    465     agg.align = access.align ? access.align : abi_cg_alignof(g->c->abi, ty);
    466     T->copy_bytes(T, dst_addr, src_addr, agg);
    467     if (dst_addr_owned) api_release_temp_local(g, dst_addr.v.local);
    468     if (src_addr_owned) api_release_temp_local(g, src_addr.v.local);
    469     api_release(g, &base);
    470     api_release(g, &rv);
    471     return;
    472   }
    473 
    474   if (!is_bitfield) api_validate_memory_value(g, "store", ty, api_sv_type(&rv));
    475 
    476   /* A 16-byte scalar immediate (an i128 small constant) only carries 64 bits in
    477    * op.v.imm; materialize it into both sign-extended lanes so the general store
    478    * path moves a correct 16-byte value rather than load_imm'ing the low lane
    479    * and leaving the high half as garbage. */
    480   if (!is_bitfield && api_sv_op_is(&rv, OPK_IMM) &&
    481       api_is_wide16_scalar_type(g->c, ty)) {
    482     rv = api_make_wide16_int_const(g, rv.op.v.imm, ty);
    483   }
    484   /* Same for a split-lane 8-byte immediate: lower it to a 2-lane memory value
    485    * so the store moves a full 64-bit value rather than only the low word. */
    486   if (!is_bitfield && api_sv_op_is(&rv, OPK_IMM) &&
    487       api_is_wide8_scalar_type(g->c, ty)) {
    488     rv = api_make_wide8_int_const(g, rv.op.v.imm, ty);
    489   }
    490 
    491   /* General scalar / bit-field store. Compute the source operand first so its
    492    * local lifetime doesn't overlap any addressing arith. */
    493   api_ensure_local(g, &rv);
    494   if (api_sv_op_is_local_or_imm(&rv)) {
    495     src = rv.op;
    496   } else {
    497     src = api_force_local(g, &rv, api_sv_type(&rv));
    498   }
    499 
    500   /* Scalar local-resident place, plain store: copy into the local. */
    501   if (!is_bitfield && base.source_local != KIT_CG_LOCAL_NONE &&
    502       base.op.kind == OPK_LOCAL &&
    503       !api_sv_local_storage_is_aggregate(g, &base) &&
    504       !cg_type_is_aggregate(g->c, api_sv_type(&base)) &&
    505       !cg_type_is_aggregate(g->c, ty) &&
    506       api_unalias_type(g->c, api_sv_type(&base)) ==
    507           api_unalias_type(g->c, ty)) {
    508     Operand dst = base.op;
    509     if (src.kind == OPK_IMM) {
    510       T->load_imm(T, dst, src.v.imm);
    511       if (base.source_local != KIT_CG_LOCAL_NONE)
    512         api_local_const_store(g, base.source_local, access, src.v.imm);
    513     } else {
    514       if (src.kind != OPK_LOCAL) src = api_force_local(g, &rv, ty);
    515       if (src.v.local != dst.v.local) T->copy(T, dst, src);
    516       if (base.source_local != KIT_CG_LOCAL_NONE)
    517         api_local_const_clear(api_local_from_handle(g, base.source_local));
    518     }
    519     api_release(g, &base);
    520     api_release(g, &rv);
    521     return;
    522   }
    523 
    524   /* Resolve the place into a single backend memop operand. */
    525   if (!api_operand_can_address(&base.op)) {
    526     KitCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base));
    527     Operand addr = api_lvalue_addr(g, &base, pty);
    528     mem_op = api_op_indirect(addr.v.local, 0, access_ty);
    529     owned_base = addr.v.local;
    530   } else {
    531     mem_op = place_operand_for_access(base.op, access_ty);
    532   }
    533 
    534   /* Source-local tracking: only a plain scalar-to-scalar store can fold into a
    535    * tracked constant; everything else clears tracking. */
    536   if (base.source_local != KIT_CG_LOCAL_NONE) {
    537     if (!is_bitfield && src.kind == OPK_IMM) {
    538       api_local_const_store(g, base.source_local, access, src.v.imm);
    539     } else {
    540       api_local_const_clear(api_local_from_handle(g, base.source_local));
    541     }
    542   } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL ||
    543              (access.flags & KIT_CG_MEM_VOLATILE)) {
    544     api_local_const_memory_boundary(g);
    545   }
    546 
    547   if (is_bitfield) {
    548     /* A bit-field store rides the generic `store` with a bit-field MemAccess;
    549      * the CgTarget impl does the read-modify-write insert. */
    550     T->store(T, mem_op, src,
    551              api_mem_for_bitfield(g, &base, &mem_op, access_ty));
    552   } else {
    553     T->store(T, mem_op, src, api_mem_from_access(g, &mem_op, access));
    554   }
    555 
    556   api_release(g, &base);
    557   api_release(g, &rv);
    558   if (owned_base != CG_LOCAL_NONE) api_release_temp_local(g, owned_base);
    559 }
    560 
    561 /* ============================================================
    562  * Stack manipulation
    563  * ============================================================ */
    564 
    565 void kit_cg_dup(KitCg* g) {
    566   ApiSValue v, dup;
    567   ApiSValue* top;
    568   KitCgTypeId ty;
    569   CGLocal r;
    570   Operand dst;
    571   if (!g || g->sp == 0) return;
    572   top = &g->stack[g->sp - 1];
    573   api_ensure_local(g, top);
    574   v = *top;
    575   if (v.res != RES_LOCAL) {
    576     if (v.res == RES_FIXED_LOCAL && !api_is_lvalue_sv(&v) &&
    577         v.op.kind == OPK_LOCAL) {
    578       ty = api_owned_local_type(g, &v);
    579       r = api_alloc_temp_local(g, ty);
    580       dst = api_op_local(r, ty);
    581       g->target->copy(g->target, dst,
    582                       api_op_local((CGLocal)api_local_of_sv(&v), ty));
    583       dup = v;
    584       api_set_owned_local(&dup, r);
    585       dup.res = RES_LOCAL;
    586       dup.pinned = 0;
    587       dup.source_local = KIT_CG_LOCAL_NONE;
    588       g->stack[g->sp - 1] = dup;
    589       api_push(g, v);
    590       return;
    591     }
    592     api_push(g, v);
    593     return;
    594   }
    595   top->pinned = 1;
    596   ty = api_owned_local_type(g, &v);
    597   r = api_alloc_temp_local(g, ty);
    598   dst = api_op_local(r, ty);
    599   g->target->copy(g->target, dst,
    600                   api_op_local((CGLocal)api_local_of_sv(&v), ty));
    601   g->stack[g->sp - 1].pinned = 0;
    602   dup = v;
    603   api_set_owned_local(&dup, r);
    604   dup.res = RES_LOCAL;
    605   dup.pinned = 0;
    606   api_push(g, dup);
    607 }
    608 
    609 /* Duplicate the top two value-stack entries. The lower of the two is the deeper
    610  * element; the higher is TOS. After dup2, the stack contains [a, b, a, b]
    611  * where TOS was [..., a, b]. Used to support compound assignment through a
    612  * scaled-index lvalue: the frontend duplicates [base, index] so it can
    613  * read-modify-write with a single EA expression each side.
    614  *
    615  * The current implementation duplicates the two entries one at a time using
    616  * kit_cg_dup with a rot3 between them so local/operand sharing stays
    617  * correct under the per-entry machinery. */
    618 void kit_cg_dup2(KitCg* g) {
    619   if (!g || g->sp < 2) return;
    620   /* Stack: [..., a, b]
    621    * Step 1: dup the lower (a). We push under TOS by first swapping. */
    622   kit_cg_swap(g); /* [..., b, a] */
    623   kit_cg_dup(g);  /* [..., b, a, a] */
    624   kit_cg_rot3(g); /* [..., a, a, b] */
    625   kit_cg_dup(g);  /* [..., a, a, b, b] */
    626   /* Now: [..., a, a, b, b]; we want [..., a, b, a, b]. */
    627   /* swap middle two: this is the [..., x, a, b, y]-shaped rotation. We
    628    * implement it by rot3 then swap. */
    629   /* Current: ..., a, a, b, b   indices (from top): 0=b, 1=b, 2=a, 3=a
    630    *
    631    * Want: ..., a, b, a, b. Difference: positions 1 (b) and 2 (a) should
    632    * swap. We accomplish that by:
    633    *   rot3       : [..., a, b, b, a]   (rotate top 3 forward)
    634    *   swap       : [..., a, b, a, b]
    635    */
    636   kit_cg_rot3(g);
    637   kit_cg_swap(g);
    638 }
    639 
    640 void kit_cg_swap(KitCg* g) {
    641   ApiSValue tmp;
    642   if (!g || g->sp < 2) return;
    643   tmp = g->stack[g->sp - 1];
    644   g->stack[g->sp - 1] = g->stack[g->sp - 2];
    645   g->stack[g->sp - 2] = tmp;
    646 }
    647 
    648 void kit_cg_drop(KitCg* g) {
    649   ApiSValue v;
    650   if (!g) return;
    651   v = api_pop(g);
    652   api_release(g, &v);
    653 }
    654 
    655 int kit_cg_top_const_int(KitCg* g, int64_t* out_value) {
    656   ApiSValue* v;
    657   KitCgTypeId ty;
    658   u32 width;
    659   if (!g || !out_value || !g->sp) return 0;
    660   v = &g->stack[g->sp - 1u];
    661   if (v->kind != SV_OPERAND || v->op.kind != OPK_IMM) return 0;
    662   ty = api_sv_type(v);
    663   if (!api_foldable_int_like_type(g->c, ty, &width)) return 0;
    664   *out_value = api_fold_result(g->c, ty, (u64)v->op.v.imm, width);
    665   return 1;
    666 }
    667 
    668 void kit_cg_rot3(KitCg* g) {
    669   ApiSValue a, b, c;
    670   if (!g || g->sp < 3) return;
    671   a = g->stack[g->sp - 3];
    672   b = g->stack[g->sp - 2];
    673   c = g->stack[g->sp - 1];
    674   g->stack[g->sp - 3] = b;
    675   g->stack[g->sp - 2] = c;
    676   g->stack[g->sp - 1] = a;
    677 }
    678 
    679 /* ============================================================
    680  * Arithmetic / compare / convert
    681  * ============================================================ */
    682 
    683 const char* api_i128_binop_helper(BinOp op);
    684 int api_i128_cmp_is_unsigned(CmpOp op);
    685 void api_cg_cmp(KitCg* g, CmpOp cop);
    686 void api_f128_call_unary(KitCg* g, const char* name, KitCgTypeId ret,
    687                          KitCgTypeId param);