kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

c_emit.c (144331B)


      1 /* C-source emission core. See doc/CBACKEND.md.
      2  *
      3  * Output strategy
      4  * ---------------
      5  * Each function buffers two CBufs while CG walks the body:
      6  *   decls — variable declarations: "  long long v3;\n"
      7  *   body  — TU-wide running output; we accumulate signature/body/closing-brace
      8  *           across all functions; func_end splices decls in after the open
      9  *           brace using the recorded fn_body_start bookmark.
     10  *
     11  * c_emit_finalize flushes a tiny prologue + body to the writer.
     12  *
     13  * Local declaration is lazy: every operand emit goes through c_ensure_local,
     14  * which appends one declaration for each semantic local. */
     15 
     16 #include "arch/c_target/c_emit.h"
     17 
     18 #include <stdio.h>
     19 #include <string.h>
     20 
     21 #include "cg/type.h"
     22 #include "core/arena.h"
     23 #include "core/core.h"
     24 #include "core/heap.h"
     25 #include "core/pool.h"
     26 #include "core/slice.h"
     27 #include "obj/format.h"
     28 #include "obj/obj.h"
     29 
     30 /* Forward decls. */
     31 static void c_ensure_typedef(CTarget* t, KitCgTypeId tid);
     32 static const char* c_typedef_name(CTarget* t, KitCgTypeId tid);
     33 static const char* c_typename(CTarget* t, KitCgTypeId type);
     34 static KitCgTypeId c_local_type_or_panic(CTarget* t, CGLocal local);
     35 static Operand c_op_local(CGLocal local, KitCgTypeId type);
     36 static int c_type_is_aggregate(CTarget* t, KitCgTypeId type);
     37 static int c_type_is_bool(CTarget* t, KitCgTypeId type);
     38 static int c_type_is_ptr(CTarget* t, KitCgTypeId type);
     39 static int c_operand_is_ptr_typed(CTarget* t, Operand op);
     40 static void c_emit_addr_deref(CTarget* t, Operand addr,
     41                               KitCgTypeId access_type);
     42 static void c_emit_copy_addr(CTarget* t, Operand addr);
     43 CGLocal c_emit_local(CTarget* t, const CGLocalDesc* d);
     44 /* Private accessor on ObjBuilder (defined in obj/obj.c, not in obj.h).
     45  * Same forward-decl trick as obj_tls.c uses. */
     46 ObjSymId obj_tlv_bootstrap_get(const ObjBuilder*);
     47 
     48 /* === Target state === */
     49 
     50 void c_emit_target_init(CTarget* t, Compiler* c, ObjBuilder* o, KitWriter* w) {
     51   memset(t, 0, sizeof *t);
     52   t->c = c;
     53   t->obj = o;
     54   t->w = w;
     55   cbuf_init(&t->forwards, c->ctx->heap);
     56   cbuf_init(&t->typedefs, c->ctx->heap);
     57   cbuf_init(&t->data_defs, c->ctx->heap);
     58   cbuf_init(&t->decls, c->ctx->heap);
     59   cbuf_init(&t->body, c->ctx->heap);
     60 }
     61 
     62 CTarget* c_emit_target_new(Compiler* c, ObjBuilder* o, KitWriter* w) {
     63   CTarget* t = arena_new(c->tu, CTarget);
     64   if (!t) return NULL;
     65   c_emit_target_init(t, c, o, w);
     66   return t;
     67 }
     68 
     69 /* === Writer helpers === */
     70 
     71 void c_writer_write(CTarget* t, const void* data, size_t n) {
     72   KitStatus st = kit_writer_write(t->w, data, n);
     73   if (st != KIT_OK) {
     74     SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
     75     compiler_panic(t->c, loc, "C target: writer error %d", (int)st);
     76   }
     77 }
     78 
     79 void c_writer_puts(CTarget* t, const char* s) {
     80   size_t n = 0;
     81   while (s[n]) ++n;
     82   c_writer_write(t, s, n);
     83 }
     84 
     85 /* === CLocal / type emission === */
     86 
     87 static const char* c_int_type_name_for_width(u32 width, int signed_) {
     88   switch (width) {
     89     case 1:
     90     case 8:
     91       return signed_ ? "int8_t" : "uint8_t";
     92     case 16:
     93       return signed_ ? "int16_t" : "uint16_t";
     94     case 32:
     95       return signed_ ? "int32_t" : "uint32_t";
     96     case 64:
     97       return signed_ ? "int64_t" : "uint64_t";
     98     case 128:
     99       return signed_ ? "__int128" : "unsigned __int128";
    100     default:
    101       return NULL;
    102   }
    103 }
    104 
    105 /* Returns the integer width for sign-aware emission. 0 if the type isn't a
    106  * fixed-width integer (float, ptr, void, aggregate). */
    107 static u32 c_int_width_for_signedness(CTarget* t, KitCgTypeId type) {
    108   if (type == KIT_CG_TYPE_NONE) return 0;
    109   KitCgTypeId u = api_unalias_type(t->c, type);
    110   const CgType* ty = cg_type_get(t->c, u);
    111   if (!ty) return 0;
    112   if (ty->kind == KIT_CG_TYPE_INT) return ty->integer.width;
    113   if (ty->kind == KIT_CG_TYPE_BOOL) return 32; /* bool maps to int32_t */
    114   return 0;
    115 }
    116 
    117 /* === Typedef machinery ===
    118  *
    119  * Composite types (records, arrays, function types) are emitted as opaque
    120  * byte-storage typedefs in a TU-wide typedefs section. The typedef name is
    121  * `__ty_<id>` keyed on the unaliased type id; this is stable for the
    122  * compiler instance.
    123  *
    124  * For records and arrays the typedef wraps a single `_Alignas(A) uint8_t
    125  * raw[N];` member, so all field/element access is mediated by the existing
    126  * `(*(T*)((char*)addr + ofs))` path. This sidesteps any ABI ambiguity (C
    127  * bitfield rules, array decay, packed/aligned attribute interactions) and
    128  * keeps types orthogonal to access patterns.
    129  *
    130  * For function types we emit a function-pointer typedef `R (*__ty_N)(...)`,
    131  * used for indirect calls and function-pointer-typed values. */
    132 
    133 static void c_grow_type_state(CTarget* t, u32 needed) {
    134   Heap* h = t->c->ctx->heap;
    135   u32 newcap = t->type_state_cap ? t->type_state_cap : 32;
    136   while (newcap < needed) newcap *= 2;
    137   u8* nd = (u8*)h->realloc(h, t->type_state, t->type_state_cap, newcap, 1);
    138   if (!nd && newcap) {
    139     compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: out of memory");
    140   }
    141   for (u32 i = t->type_state_cap; i < newcap; ++i) nd[i] = 0;
    142   t->type_state = nd;
    143   t->type_state_cap = newcap;
    144 }
    145 
    146 static const char* c_typedef_name(CTarget* t, KitCgTypeId tid) {
    147   KitCgTypeId u = api_unalias_type(t->c, tid);
    148   char buf[32];
    149   int n = snprintf(buf, sizeof buf, "__ty_%u", (unsigned)u);
    150   Sym s = pool_intern_slice(t->c->global, (Slice){.s = buf, .len = (size_t)n});
    151   return pool_slice(t->c->global, s).s;
    152 }
    153 
    154 /* Forward decl. */
    155 static void c_emit_typedef_for_func(CTarget* t, KitCgTypeId tid,
    156                                     const CgType* ty);
    157 
    158 static void c_ensure_typedef(CTarget* t, KitCgTypeId tid) {
    159   KitCgTypeId u = api_unalias_type(t->c, tid);
    160   if ((u32)u >= t->type_state_cap) c_grow_type_state(t, (u32)u + 1u);
    161   if (t->type_state[u] >= 2) return;
    162   if (t->type_state[u] == 1) return; /* cyclic — emit forward-only */
    163   t->type_state[u] = 1;
    164   const CgType* ty = cg_type_get(t->c, u);
    165   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
    166   if (!ty)
    167     compiler_panic(t->c, loc, "C target: unknown type id %u", (unsigned)u);
    168   switch (ty->kind) {
    169     case KIT_CG_TYPE_FUNC:
    170       c_emit_typedef_for_func(t, u, ty);
    171       break;
    172     case KIT_CG_TYPE_RECORD: {
    173       /* Recurse on field types so any composite-typed field has its
    174        * typedef emitted first. (Records-by-value are accessed only via
    175        * pointer arithmetic in kit CG, but emitting deps first keeps the
    176        * output readable and stable.) */
    177       for (u32 i = 0; i < ty->record.nfields; ++i) {
    178         if (!(ty->record.fields[i].flags & KIT_CG_FIELD_BITFIELD)) {
    179           KitCgTypeId ft = ty->record.fields[i].type;
    180           KitCgTypeId ftu = api_unalias_type(t->c, ft);
    181           const CgType* fty = cg_type_get(t->c, ftu);
    182           if (fty && (fty->kind == KIT_CG_TYPE_RECORD ||
    183                       fty->kind == KIT_CG_TYPE_ARRAY ||
    184                       fty->kind == KIT_CG_TYPE_FUNC)) {
    185             c_ensure_typedef(t, ftu);
    186           }
    187         }
    188       }
    189       cbuf_puts(&t->typedefs, "typedef struct { _Alignas(");
    190       cbuf_put_u64(&t->typedefs, (u64)cg_type_align(t->c, u));
    191       cbuf_puts(&t->typedefs, ") uint8_t raw[");
    192       cbuf_put_u64(&t->typedefs, cg_type_size(t->c, u));
    193       cbuf_puts(&t->typedefs, "]; } __ty_");
    194       cbuf_put_u64(&t->typedefs, (u64)u);
    195       cbuf_puts(&t->typedefs, ";\n");
    196       break;
    197     }
    198     case KIT_CG_TYPE_ARRAY: {
    199       KitCgTypeId eu = api_unalias_type(t->c, ty->array.elem);
    200       const CgType* ety = cg_type_get(t->c, eu);
    201       if (ety &&
    202           (ety->kind == KIT_CG_TYPE_RECORD || ety->kind == KIT_CG_TYPE_ARRAY ||
    203            ety->kind == KIT_CG_TYPE_FUNC)) {
    204         c_ensure_typedef(t, eu);
    205       }
    206       cbuf_puts(&t->typedefs, "typedef struct { _Alignas(");
    207       cbuf_put_u64(&t->typedefs, (u64)cg_type_align(t->c, u));
    208       cbuf_puts(&t->typedefs, ") uint8_t raw[");
    209       cbuf_put_u64(&t->typedefs, cg_type_size(t->c, u));
    210       cbuf_puts(&t->typedefs, "]; } __ty_");
    211       cbuf_put_u64(&t->typedefs, (u64)u);
    212       cbuf_puts(&t->typedefs, ";\n");
    213       break;
    214     }
    215     default:
    216       compiler_panic(t->c, loc,
    217                      "C target: c_ensure_typedef on non-composite kind %d",
    218                      (int)ty->kind);
    219   }
    220   t->type_state[u] = 2;
    221 }
    222 
    223 static void c_emit_typedef_for_func(CTarget* t, KitCgTypeId tid,
    224                                     const CgType* ty) {
    225   /* Emit recursively for return and param types if they're composites. */
    226   KitCgTypeId ret = api_unalias_type(t->c, cg_func_ret_type(ty));
    227   const CgType* rty = cg_type_get(t->c, ret);
    228   if (rty &&
    229       (rty->kind == KIT_CG_TYPE_RECORD || rty->kind == KIT_CG_TYPE_ARRAY ||
    230        rty->kind == KIT_CG_TYPE_FUNC)) {
    231     c_ensure_typedef(t, ret);
    232   }
    233   for (u32 i = 0; i < ty->func.nparams; ++i) {
    234     KitCgTypeId pt = api_unalias_type(t->c, ty->func.params[i].type);
    235     const CgType* pty = cg_type_get(t->c, pt);
    236     if (pty &&
    237         (pty->kind == KIT_CG_TYPE_RECORD || pty->kind == KIT_CG_TYPE_ARRAY ||
    238          pty->kind == KIT_CG_TYPE_FUNC)) {
    239       c_ensure_typedef(t, pt);
    240     }
    241   }
    242   cbuf_puts(&t->typedefs, "typedef ");
    243   cbuf_puts(&t->typedefs, c_typename(t, cg_func_ret_type(ty)));
    244   cbuf_puts(&t->typedefs, " (*__ty_");
    245   cbuf_put_u64(&t->typedefs, (u64)tid);
    246   cbuf_puts(&t->typedefs, ")(");
    247   if (ty->func.nparams == 0 && !ty->func.abi_variadic) {
    248     cbuf_puts(&t->typedefs, "void");
    249   } else {
    250     for (u32 i = 0; i < ty->func.nparams; ++i) {
    251       if (i > 0) cbuf_puts(&t->typedefs, ", ");
    252       cbuf_puts(&t->typedefs, c_typename(t, ty->func.params[i].type));
    253     }
    254     if (ty->func.abi_variadic) {
    255       if (ty->func.nparams > 0) cbuf_puts(&t->typedefs, ", ");
    256       cbuf_puts(&t->typedefs, "...");
    257     }
    258   }
    259   cbuf_puts(&t->typedefs, ");\n");
    260 }
    261 
    262 static const char* c_int_type_for_width_panic(CTarget* t, u32 width,
    263                                               int signed_) {
    264   const char* s = c_int_type_name_for_width(width, signed_);
    265   if (!s) {
    266     SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
    267     compiler_panic(t->c, loc, "C target: int width %u not yet supported",
    268                    (unsigned)width);
    269   }
    270   return s;
    271 }
    272 
    273 static const char* c_float_type_name(u32 width) {
    274   switch (width) {
    275     case 32:
    276       return "float";
    277     case 64:
    278       return "double";
    279     case 80:
    280     case 128:
    281       return "long double";
    282     default:
    283       return NULL;
    284   }
    285 }
    286 
    287 /* Returns the C type name for a CG type id. Scalars map to fixed-width
    288  * <stdint.h> types or float/double/long double; pointers collapse to void*;
    289  * composites (records/arrays/funcs) emit an opaque-storage typedef on first
    290  * sighting and return the typedef name. */
    291 static const char* c_typename(CTarget* t, KitCgTypeId type) {
    292   KitCgTypeId resolved = api_unalias_type(t->c, type);
    293   const CgType* ty = cg_type_get(t->c, resolved);
    294   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
    295   if (!ty) {
    296     compiler_panic(t->c, loc, "C target: unknown type id %u", (unsigned)type);
    297   }
    298   switch (ty->kind) {
    299     case KIT_CG_TYPE_VOID:
    300       return "void";
    301     case KIT_CG_TYPE_BOOL:
    302       return "int32_t";
    303     case KIT_CG_TYPE_INT:
    304       return c_int_type_for_width_panic(t, ty->integer.width, 1);
    305     case KIT_CG_TYPE_FLOAT: {
    306       const char* s = c_float_type_name(ty->fp.width);
    307       if (!s) {
    308         compiler_panic(t->c, loc, "C target: fp width %u not yet supported",
    309                        (unsigned)ty->fp.width);
    310       }
    311       return s;
    312     }
    313     case KIT_CG_TYPE_PTR:
    314       return "void*";
    315     case KIT_CG_TYPE_ENUM:
    316       /* CG enums are width-only; treat as their underlying integer base. */
    317       return c_typename(t, ty->enum_.base);
    318     case KIT_CG_TYPE_VARARG_STATE:
    319       t->need_stdarg = 1;
    320       return "va_list";
    321     case KIT_CG_TYPE_RECORD:
    322     case KIT_CG_TYPE_ARRAY:
    323     case KIT_CG_TYPE_FUNC:
    324       c_ensure_typedef(t, resolved);
    325       return c_typedef_name(t, resolved);
    326     default:
    327       compiler_panic(t->c, loc, "C target: type kind %d not yet supported",
    328                      (int)ty->kind);
    329   }
    330 }
    331 
    332 void c_emit_type(CTarget* t, CBuf* b, KitCgTypeId type) {
    333   cbuf_puts(b, c_typename(t, type));
    334 }
    335 
    336 static KitCgTypeId c_local_type_or_panic(CTarget* t, CGLocal local) {
    337   if ((u32)local < t->local_cap && t->local_declared[local] &&
    338       t->local_type[local]) {
    339     return t->local_type[local];
    340   }
    341   compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
    342                  "C target: unknown local type for v%u", (unsigned)local);
    343   return KIT_CG_TYPE_NONE;
    344 }
    345 
    346 static Operand c_op_local(CGLocal local, KitCgTypeId type) {
    347   Operand op;
    348   memset(&op, 0, sizeof op);
    349   op.kind = OPK_LOCAL;
    350   op.type = type;
    351   op.v.local = local;
    352   return op;
    353 }
    354 
    355 void c_local_name(CLocal r, char* out, size_t cap) {
    356   size_t i = 0;
    357   if (cap == 0) return;
    358   if (cap > 1) out[i++] = 'v';
    359   char tmp[16];
    360   size_t n = 0;
    361   u32 v = (u32)r;
    362   if (v == 0) {
    363     tmp[n++] = '0';
    364   } else {
    365     while (v) {
    366       tmp[n++] = (char)('0' + (v % 10));
    367       v /= 10;
    368     }
    369   }
    370   while (n && i + 1 < cap) out[i++] = tmp[--n];
    371   out[i] = '\0';
    372 }
    373 
    374 static void c_grow_local_table(CTarget* t, u32 needed) {
    375   Heap* h = t->c->ctx->heap;
    376   u32 newcap = t->local_cap ? t->local_cap : 16;
    377   while (newcap < needed) newcap *= 2;
    378   u8* nd = (u8*)h->realloc(h, t->local_declared, t->local_cap, newcap, 1);
    379   KitCgTypeId* nt = (KitCgTypeId*)h->realloc(
    380       h, t->local_type, t->local_cap * sizeof(KitCgTypeId),
    381       newcap * sizeof(KitCgTypeId), _Alignof(KitCgTypeId));
    382   if ((!nd && newcap) || (!nt && newcap)) {
    383     compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: out of memory");
    384   }
    385   for (u32 i = t->local_cap; i < newcap; ++i) {
    386     nd[i] = 0;
    387     nt[i] = 0;
    388   }
    389   t->local_declared = nd;
    390   t->local_type = nt;
    391   t->local_cap = newcap;
    392 }
    393 
    394 /* Emit the trailing `__attribute__((unused)) = INIT;` for a local decl of
    395  * type `ty`. Scalars get `= 0` (readable); aggregates get `= {0}` (the only
    396  * form that compiles for record/array). va_list also takes `= {0}`: the host's
    397  * <stdarg.h> va_list is an aggregate (struct/array) on common ABIs (aarch64,
    398  * x86-64 SysV) where `= 0` is invalid, and `= {0}` is also valid for the
    399  * pointer form (e.g. Apple), so it is the portable choice. */
    400 static void c_emit_zero_init(CTarget* t, KitCgTypeId ty) {
    401   const CgType* cgt = ty ? cg_type_get(t->c, api_unalias_type(t->c, ty)) : NULL;
    402   int braced = cgt && (cgt->kind == KIT_CG_TYPE_RECORD ||
    403                        cgt->kind == KIT_CG_TYPE_ARRAY ||
    404                        cgt->kind == KIT_CG_TYPE_VARARG_STATE);
    405   cbuf_puts(&t->decls, braced ? " __attribute__((unused)) = {0};\n"
    406                               : " __attribute__((unused)) = 0;\n");
    407 }
    408 
    409 void c_ensure_local(CTarget* t, CLocal r, KitCgTypeId type) {
    410   if (r == (CLocal)CG_LOCAL_NONE) {
    411     compiler_panic(t->c, (SrcLoc){0, 0, 0},
    412                    "C target: CG_LOCAL_NONE reached emission");
    413   }
    414   if ((u32)r >= t->local_cap) c_grow_local_table(t, (u32)r + 1u);
    415   if (t->local_declared[r]) {
    416     if (type && api_unalias_type(t->c, t->local_type[r]) !=
    417                     api_unalias_type(t->c, type)) {
    418       compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
    419                      "C target: local v%u used with inconsistent type "
    420                      "(declared %u, used %u)",
    421                      (unsigned)r,
    422                      (unsigned)api_unalias_type(t->c, t->local_type[r]),
    423                      (unsigned)api_unalias_type(t->c, type));
    424     }
    425     return;
    426   }
    427   t->local_declared[r] = 1;
    428   t->local_type[r] = type;
    429   cbuf_puts(&t->decls, "  ");
    430   c_emit_type(t, &t->decls, type);
    431   cbuf_puts(&t->decls, " ");
    432   char buf[24];
    433   c_local_name(r, buf, sizeof buf);
    434   cbuf_puts(&t->decls, buf);
    435   /* Zero-init kills -Wsometimes-uninitialized for control flow clang can't
    436    * reason through; the host C compiler DSEs the init when a real
    437    * assignment dominates. Scalars get `= 0`, aggregates `= {0}`. */
    438   c_emit_zero_init(t, type);
    439 }
    440 
    441 /* Emit a signed-int64 literal. INT64_MIN can't be written directly: clang
    442  * treats `-9223372036854775808` as `-(9223372036854775808)` with the inner
    443  * literal too large for any signed type, which trips
    444  * -Wimplicitly-unsigned-literal. The standard workaround is
    445  * `(-9223372036854775807LL - 1)`. */
    446 static void c_emit_imm_literal(CTarget* t, i64 v) {
    447   if (v == (i64)((u64)1u << 63u)) {
    448     cbuf_puts(&t->body, "(-9223372036854775807LL - 1)");
    449     return;
    450   }
    451   cbuf_put_i64(&t->body, v);
    452 }
    453 
    454 /* Address-mode tuple decoded from an OPK_INDIRECT operand. Mirrors the
    455  * `addr_mode` helper in the machine-code backends so all targets share a
    456  * single in-backend view of `base [+ index << log2_scale] + ofs`. */
    457 typedef struct CAddrMode {
    458   CLocal base;
    459   CLocal index;  /* CG_LOCAL_NONE when no index operand */
    460   u8 log2_scale; /* meaningful only when index != CG_LOCAL_NONE */
    461   i32 ofs;
    462 } CAddrMode;
    463 
    464 static CAddrMode c_addr_mode(Operand addr) {
    465   CAddrMode m;
    466   m.base = addr.v.ind.base;
    467   m.index = addr.v.ind.index;
    468   m.log2_scale = addr.v.ind.log2_scale;
    469   m.ofs = addr.v.ind.ofs;
    470   return m;
    471 }
    472 
    473 /* Emit `(char*)base [+ (uintptr_t)index * (1u << log2_scale)] [+ ofs]` into
    474  * the body, with each optional term suppressed when absent. Used by every
    475  * OPK_INDIRECT renderer; the caller wraps it with the appropriate
    476  * `(*(T*)(...))` or `((T)(...))` cast. */
    477 static void c_emit_indirect_addr_expr(CTarget* t, CAddrMode m) {
    478   char rbuf[24];
    479   cbuf_puts(&t->body, "(char*)");
    480   c_local_name(m.base, rbuf, sizeof rbuf);
    481   cbuf_puts(&t->body, rbuf);
    482   if (m.index != CG_LOCAL_NONE) {
    483     cbuf_puts(&t->body, " + (uintptr_t)");
    484     c_local_name(m.index, rbuf, sizeof rbuf);
    485     cbuf_puts(&t->body, rbuf);
    486     cbuf_puts(&t->body, " * ");
    487     /* Spell as the explicit 1/2/4/8 literal corresponding to log2_scale.
    488      * log2_scale is normalized to {0,1,2,3} by cg. */
    489     cbuf_put_u64(&t->body, (u64)(1u << m.log2_scale));
    490   }
    491   if (m.ofs != 0) {
    492     cbuf_puts(&t->body, " + ");
    493     cbuf_put_i64(&t->body, (i64)m.ofs);
    494   }
    495 }
    496 
    497 /* Assert that `addr`, if OPK_INDIRECT, has no index operand. Used by paths
    498  * the cg layer guarantees never carry the indexed shape (bitfield, atomics,
    499  * copy_bytes/set_bytes, inline asm). */
    500 static void c_assert_no_index(CTarget* t, Operand addr, const char* where) {
    501   if (addr.kind != OPK_INDIRECT) return;
    502   if (addr.v.ind.index == CG_LOCAL_NONE) return;
    503   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
    504   compiler_panic(t->c, loc,
    505                  "C target: %.*s: indexed OPK_INDIRECT not allowed here",
    506                  SLICE_ARG(slice_from_cstr(where)));
    507 }
    508 
    509 void c_emit_operand(CTarget* t, Operand op) {
    510   char buf[24];
    511   switch (op.kind) {
    512     case OPK_IMM:
    513       if (op.type == KIT_CG_TYPE_NONE) {
    514         /* Untyped IMM (e.g. memset byte value): emit the literal raw. */
    515         cbuf_putc(&t->body, '(');
    516         c_emit_imm_literal(t, op.v.imm);
    517         cbuf_putc(&t->body, ')');
    518       } else {
    519         cbuf_puts(&t->body, "((");
    520         c_emit_type(t, &t->body, op.type);
    521         cbuf_puts(&t->body, ")");
    522         c_emit_imm_literal(t, op.v.imm);
    523         cbuf_puts(&t->body, ")");
    524       }
    525       return;
    526     case OPK_LOCAL: {
    527       c_ensure_local(t, op.v.local, op.type);
    528       c_local_name(op.v.local, buf, sizeof buf);
    529       cbuf_puts(&t->body, buf);
    530       return;
    531     }
    532     case OPK_INDIRECT: {
    533       /* Used by call paths to pass aggregates by-address: the operand's type
    534        * is the aggregate, the storage is `base + index*scale + ofs`. Emit the
    535        * deref as a value expression. */
    536       cbuf_puts(&t->body, "(*(");
    537       c_emit_type(t, &t->body, op.type);
    538       cbuf_puts(&t->body, "*)(");
    539       c_emit_indirect_addr_expr(t, c_addr_mode(op));
    540       cbuf_puts(&t->body, "))");
    541       return;
    542     }
    543     case OPK_GLOBAL: {
    544       /* OPK_GLOBAL carries `&sym + addend`. How we spell it depends on
    545        * op.type:
    546        *   - pointer/scalar/void: the value IS the address, so cast through
    547        *     `((T)((char*)sym + addend))`.
    548        *   - aggregate (RECORD/ARRAY): the symbol's storage is an aggregate
    549        *     value; emit `(*(T*)((char*)sym + addend))` so the deref reads
    550        *     the aggregate value (used by call args that pass struct
    551        *     by-value via a global initialized buffer). */
    552       obj_sym_mark_referenced(t->obj, op.v.global.sym);
    553       const char* nm = c_sym_name(t, op.v.global.sym);
    554       const CgType* gty =
    555           (op.type != KIT_CG_TYPE_NONE)
    556               ? cg_type_get(t->c, api_unalias_type(t->c, op.type))
    557               : NULL;
    558       int is_aggregate = gty && (gty->kind == KIT_CG_TYPE_RECORD ||
    559                                  gty->kind == KIT_CG_TYPE_ARRAY);
    560       if (is_aggregate) {
    561         cbuf_puts(&t->body, "(*(");
    562         c_emit_type(t, &t->body, op.type);
    563         cbuf_puts(&t->body, "*)((char*)&");
    564         cbuf_puts(&t->body, nm);
    565         if (op.v.global.addend != 0) {
    566           cbuf_puts(&t->body, " + ");
    567           cbuf_put_i64(&t->body, op.v.global.addend);
    568         }
    569         cbuf_puts(&t->body, "))");
    570       } else {
    571         cbuf_puts(&t->body, "((");
    572         if (op.type != KIT_CG_TYPE_NONE) {
    573           c_emit_type(t, &t->body, op.type);
    574         } else {
    575           cbuf_puts(&t->body, "void*");
    576         }
    577         cbuf_puts(&t->body, ")((char*)&");
    578         cbuf_puts(&t->body, nm);
    579         if (op.v.global.addend != 0) {
    580           cbuf_puts(&t->body, " + ");
    581           cbuf_put_i64(&t->body, op.v.global.addend);
    582         }
    583         cbuf_puts(&t->body, "))");
    584       }
    585       return;
    586     }
    587     default: {
    588       SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
    589       compiler_panic(t->c, loc, "C target: operand kind %d not yet supported",
    590                      (int)op.kind);
    591     }
    592   }
    593 }
    594 
    595 static int c_type_is_float(CTarget* t, KitCgTypeId type) {
    596   if (type == KIT_CG_TYPE_NONE) return 0;
    597   const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, type));
    598   return ty && ty->kind == KIT_CG_TYPE_FLOAT;
    599 }
    600 
    601 /* True iff a and b name the same CG type after alias resolution. */
    602 static int c_types_equiv(CTarget* t, KitCgTypeId a, KitCgTypeId b) {
    603   if (a == 0 || b == 0) return 0;
    604   return api_unalias_type(t->c, a) == api_unalias_type(t->c, b);
    605 }
    606 
    607 /* Emit "  vN = " plus any cast needed for a C assignment expression.
    608  * Caller must then emit the RHS expression and call c_emit_local_assign_close.
    609  *
    610  * `rhs_ty` is the CG type the RHS expression will produce (or 0 if unknown).
    611  * Pointer/int crossings bridge through uintptr_t to keep host-C diagnostics
    612  * quiet. The outer `(...)` parens are kept so the closer's `);` stays
    613  * balanced. */
    614 static void c_emit_local_assign_open(CTarget* t, CLocal r, KitCgTypeId rhs_ty) {
    615   if ((u32)r >= t->local_cap || !t->local_declared[r]) {
    616     compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
    617                    "C target: assign to undeclared local v%u", (unsigned)r);
    618   }
    619   KitCgTypeId decl = t->local_type[r];
    620   char buf[24];
    621   c_local_name(r, buf, sizeof buf);
    622   cbuf_puts(&t->body, "  ");
    623   cbuf_puts(&t->body, buf);
    624   cbuf_puts(&t->body, " = ");
    625   if (!c_types_equiv(t, rhs_ty, decl)) {
    626     cbuf_putc(&t->body, '(');
    627     c_emit_type(t, &t->body, decl);
    628     cbuf_putc(&t->body, ')');
    629     if (!c_type_is_float(t, decl) &&
    630         (!rhs_ty || c_type_is_ptr(t, decl) || c_type_is_ptr(t, rhs_ty))) {
    631       cbuf_puts(&t->body, "(uintptr_t)");
    632     }
    633   }
    634   cbuf_puts(&t->body, "(");
    635 }
    636 
    637 static void c_emit_local_assign_close(CTarget* t) {
    638   cbuf_puts(&t->body, ");\n");
    639 }
    640 
    641 void c_emit_operand_signed(CTarget* t, Operand op, int signed_) {
    642   u32 w = c_int_width_for_signedness(t, op.type);
    643   if (w == 0) {
    644     /* Not an integer — emit without sign cast. */
    645     c_emit_operand(t, op);
    646     return;
    647   }
    648   const char* tn = c_int_type_name_for_width(w, signed_);
    649   if (!tn) {
    650     c_emit_operand(t, op);
    651     return;
    652   }
    653   int via_uptr = c_operand_is_ptr_typed(t, op);
    654   /* CG ints are width-only; the C target declares every int local/IMM
    655    * as the signed `int{W}_t` of its width. So when `signed_` is true and
    656    * the operand's emit-width matches `w`, the explicit cast is redundant
    657    * with what c_emit_operand already produces. Skipping it cuts the
    658    * ubiquitous `((int32_t)((int32_t)23))` double-cast down to one. */
    659   if (!via_uptr && signed_) {
    660     KitCgTypeId et = op.type;
    661     if (c_int_width_for_signedness(t, et) == w) {
    662       c_emit_operand(t, op);
    663       return;
    664     }
    665   }
    666   cbuf_puts(&t->body, "((");
    667   cbuf_puts(&t->body, tn);
    668   cbuf_puts(&t->body, ")");
    669   if (via_uptr) {
    670     cbuf_puts(&t->body, "(uintptr_t)");
    671   }
    672   c_emit_operand(t, op);
    673   cbuf_puts(&t->body, ")");
    674 }
    675 
    676 /* Returns 1 if `type` is a pointer (or void*). */
    677 static int c_type_is_ptr(CTarget* t, KitCgTypeId type) {
    678   if (type == KIT_CG_TYPE_NONE) return 0;
    679   const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, type));
    680   return ty && ty->kind == KIT_CG_TYPE_PTR;
    681 }
    682 
    683 static int c_type_is_bool(CTarget* t, KitCgTypeId type) {
    684   if (type == KIT_CG_TYPE_NONE) return 0;
    685   const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, type));
    686   return ty && ty->kind == KIT_CG_TYPE_BOOL;
    687 }
    688 
    689 static int c_type_is_aggregate(CTarget* t, KitCgTypeId type) {
    690   if (type == KIT_CG_TYPE_NONE) return 0;
    691   const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, type));
    692   return ty &&
    693          (ty->kind == KIT_CG_TYPE_RECORD || ty->kind == KIT_CG_TYPE_ARRAY);
    694 }
    695 
    696 static int c_operand_is_ptr_typed(CTarget* t, Operand op) {
    697   if (c_type_is_ptr(t, op.type)) return 1;
    698   return 0;
    699 }
    700 
    701 /* Emit `(target_ty)(uintptr_t)(op)` (or `(target_ty)(op)` for float
    702  * target_ty). Used when the caller needs a specific C expression type.
    703  * Pointer/int crossings bridge through uintptr_t. */
    704 static void c_emit_operand_as(CTarget* t, Operand op, KitCgTypeId target_ty) {
    705   if (c_types_equiv(t, op.type, target_ty)) {
    706     c_emit_operand(t, op);
    707     return;
    708   }
    709   cbuf_puts(&t->body, "(");
    710   c_emit_type(t, &t->body, target_ty);
    711   cbuf_puts(&t->body, ")");
    712   if (!c_type_is_float(t, target_ty) &&
    713       (!op.type || c_type_is_ptr(t, op.type) || c_type_is_ptr(t, target_ty))) {
    714     cbuf_puts(&t->body, "(uintptr_t)");
    715   }
    716   cbuf_puts(&t->body, "(");
    717   c_emit_operand(t, op);
    718   cbuf_puts(&t->body, ")");
    719 }
    720 
    721 /* Emit an operand for use in a C binary arithmetic expression. Pointer-typed
    722  * operands are cast to uintptr_t so C arithmetic semantics apply uniformly
    723  * (kit IR carries byte offsets, not C-pointer-arith scaled indices). */
    724 static void c_emit_operand_arith(CTarget* t, Operand op) {
    725   if (c_operand_is_ptr_typed(t, op)) {
    726     cbuf_puts(&t->body, "((uintptr_t)");
    727     if (op.kind == OPK_IMM) {
    728       c_emit_imm_literal(t, op.v.imm);
    729     } else {
    730       c_emit_operand(t, op);
    731     }
    732     cbuf_puts(&t->body, ")");
    733     return;
    734   }
    735   c_emit_operand(t, op);
    736 }
    737 
    738 /* Same, but applies the requested signedness when the operand is an integer
    739  * (used for SDIV/UDIV/SREM/UREM/SHR_S/SHR_U). Pointer operands always go
    740  * through the uintptr_t cast regardless of the requested signedness. */
    741 static void c_emit_operand_arith_signed(CTarget* t, Operand op, int signed_) {
    742   if (c_operand_is_ptr_typed(t, op)) {
    743     cbuf_puts(&t->body, "((uintptr_t)");
    744     if (op.kind == OPK_IMM) {
    745       c_emit_imm_literal(t, op.v.imm);
    746     } else {
    747       c_emit_operand(t, op);
    748     }
    749     cbuf_puts(&t->body, ")");
    750     return;
    751   }
    752   c_emit_operand_signed(t, op, signed_);
    753 }
    754 
    755 /* Emit a C lvalue expression for an addr operand (OPK_LOCAL / OPK_GLOBAL /
    756  * OPK_INDIRECT) using `access_type` as the access type. The result is the
    757  * full `*(T*)(...)` dereference, or the C variable directly when the access
    758  * type matches the underlying local/global object. */
    759 static void c_emit_addr_deref(CTarget* t, Operand addr,
    760                               KitCgTypeId access_type) {
    761   char buf[24];
    762   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
    763   switch (addr.kind) {
    764     case OPK_LOCAL: {
    765       c_ensure_local(t, addr.v.local, addr.type);
    766       c_local_name(addr.v.local, buf, sizeof buf);
    767       if (access_type == 0 || addr.type == 0 ||
    768           api_unalias_type(t->c, access_type) ==
    769               api_unalias_type(t->c, addr.type)) {
    770         cbuf_puts(&t->body, buf);
    771       } else {
    772         cbuf_puts(&t->body, "(*(");
    773         c_emit_type(t, &t->body, access_type);
    774         cbuf_puts(&t->body, "*)&");
    775         cbuf_puts(&t->body, buf);
    776         cbuf_puts(&t->body, ")");
    777       }
    778       return;
    779     }
    780     case OPK_GLOBAL: {
    781       obj_sym_mark_referenced(t->obj, addr.v.global.sym);
    782       const char* nm = c_sym_name(t, addr.v.global.sym);
    783       cbuf_puts(&t->body, "(*(");
    784       c_emit_type(t, &t->body, access_type);
    785       cbuf_puts(&t->body, "*)((char*)&");
    786       cbuf_puts(&t->body, nm);
    787       if (addr.v.global.addend != 0) {
    788         cbuf_puts(&t->body, " + ");
    789         cbuf_put_i64(&t->body, addr.v.global.addend);
    790       }
    791       cbuf_puts(&t->body, "))");
    792       return;
    793     }
    794     case OPK_INDIRECT: {
    795       CAddrMode m = c_addr_mode(addr);
    796       if ((u32)m.base >= t->local_cap || !t->local_declared[m.base]) {
    797         compiler_panic(t->c, loc,
    798                        "C target: indirect on undeclared base local v%u",
    799                        (unsigned)m.base);
    800       }
    801       if (m.index != CG_LOCAL_NONE &&
    802           ((u32)m.index >= t->local_cap || !t->local_declared[m.index])) {
    803         compiler_panic(t->c, loc,
    804                        "C target: indirect on undeclared index local v%u",
    805                        (unsigned)m.index);
    806       }
    807       cbuf_puts(&t->body, "(*(");
    808       c_emit_type(t, &t->body, access_type);
    809       cbuf_puts(&t->body, "*)(");
    810       c_emit_indirect_addr_expr(t, m);
    811       cbuf_puts(&t->body, "))");
    812       return;
    813     }
    814     default:
    815       compiler_panic(t->c, loc,
    816                      "C target: addr-deref on operand kind %d not supported",
    817                      (int)addr.kind);
    818   }
    819 }
    820 
    821 /* Emit a C address-of expression for a lvalue operand. Output is a pointer
    822  * value (cast to dst_type). */
    823 static void c_emit_lvalue_addr(CTarget* t, Operand lv, KitCgTypeId dst_type) {
    824   char buf[24];
    825   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
    826   switch (lv.kind) {
    827     case OPK_LOCAL:
    828       cbuf_puts(&t->body, "((");
    829       c_emit_type(t, &t->body, dst_type);
    830       cbuf_puts(&t->body, ")");
    831       cbuf_puts(&t->body, "&");
    832       c_ensure_local(t, lv.v.local, lv.type);
    833       c_local_name(lv.v.local, buf, sizeof buf);
    834       cbuf_puts(&t->body, buf);
    835       cbuf_puts(&t->body, ")");
    836       return;
    837     case OPK_GLOBAL: {
    838       obj_sym_mark_referenced(t->obj, lv.v.global.sym);
    839       const char* nm = c_sym_name(t, lv.v.global.sym);
    840       cbuf_puts(&t->body, "((");
    841       c_emit_type(t, &t->body, dst_type);
    842       cbuf_puts(&t->body, ")((char*)&");
    843       cbuf_puts(&t->body, nm);
    844       if (lv.v.global.addend != 0) {
    845         cbuf_puts(&t->body, " + ");
    846         cbuf_put_i64(&t->body, lv.v.global.addend);
    847       }
    848       cbuf_puts(&t->body, ")");
    849       cbuf_puts(&t->body, ")");
    850       return;
    851     }
    852     case OPK_INDIRECT: {
    853       cbuf_puts(&t->body, "((");
    854       c_emit_type(t, &t->body, dst_type);
    855       cbuf_puts(&t->body, ")(");
    856       c_emit_indirect_addr_expr(t, c_addr_mode(lv));
    857       cbuf_puts(&t->body, "))");
    858       return;
    859     }
    860     default:
    861       compiler_panic(t->c, loc,
    862                      "C target: addr-of on operand kind %d not supported",
    863                      (int)lv.kind);
    864   }
    865 }
    866 
    867 /* === Symbol name lookup === */
    868 
    869 const char* c_sym_name(CTarget* t, ObjSymId sym) {
    870   const ObjSym* os = obj_symbol_get(t->obj, sym);
    871   if (!os) {
    872     compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: unknown ObjSymId %u",
    873                    (unsigned)sym);
    874   }
    875   Slice nm = pool_slice(t->c->global, os->name);
    876   const char* s = nm.s;
    877   size_t n = nm.len;
    878   /* Linker symbols carry the active object format's C-mangle prefix (a leading
    879    * underscore on Mach-O); the host C compiler will re-add it on its own, so
    880    * strip when re-emitting source. */
    881   obj_format_demangle_c(t->c, &s, &n);
    882   /* Sanitize for C identifier rules: assemblers accept '.', '$', etc. in
    883    * symbol names; C does not. Replace each illegal byte with '_' and prepend
    884    * '_' if the first char isn't alpha/underscore. Local syms (SB_LOCAL) get
    885    * renamed freely since they have no cross-TU contract. Globals are assumed
    886    * to come in with C-safe names; if they don't, we still rewrite — the
    887    * resulting symbol won't link against other TUs that use the asm spelling,
    888    * but kit-produced code uses the rewritten spelling consistently. */
    889   int needs_rewrite = 0;
    890   if (n == 0) {
    891     return s;
    892   }
    893   if (!((s[0] >= 'a' && s[0] <= 'z') || (s[0] >= 'A' && s[0] <= 'Z') ||
    894         s[0] == '_')) {
    895     needs_rewrite = 1;
    896   } else {
    897     for (size_t i = 0; i < n; ++i) {
    898       char ch = s[i];
    899       if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
    900             (ch >= '0' && ch <= '9') || ch == '_')) {
    901         needs_rewrite = 1;
    902         break;
    903       }
    904     }
    905   }
    906   if (!needs_rewrite) return s;
    907   char buf[256];
    908   size_t cap = sizeof(buf) - 1u;
    909   size_t out = 0;
    910   int first_alpha = (s[0] >= 'a' && s[0] <= 'z') ||
    911                     (s[0] >= 'A' && s[0] <= 'Z') || s[0] == '_';
    912   if (!first_alpha && out < cap) buf[out++] = '_';
    913   for (size_t i = 0; i < n && out < cap; ++i) {
    914     char ch = s[i];
    915     int ok = (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
    916              (ch >= '0' && ch <= '9') || ch == '_';
    917     buf[out++] = ok ? ch : '_';
    918   }
    919   buf[out] = '\0';
    920   Sym interned = pool_intern_slice(t->c->global, (Slice){.s = buf, .len = out});
    921   return pool_slice(t->c->global, interned).s;
    922 }
    923 
    924 /* === Prologue / finalize === */
    925 
    926 void c_emit_prologue(CTarget* t) {
    927   if (t->prologue_emitted) return;
    928   t->prologue_emitted = 1;
    929   c_writer_puts(t,
    930                 "/* generated by kit --emit=c */\n"
    931                 "#include <stdint.h>\n"
    932                 "#include <stdalign.h>\n");
    933   /* Other headers are decided at finalize so include lines remain
    934    * deterministic regardless of when the type was first referenced.
    935    * Writer flushes are not stream-buffered, so we keep prologue compact and
    936    * tack the rest on at c_emit_finalize. */
    937   c_writer_puts(t, "\n");
    938 }
    939 
    940 /* === func_begin / func_end === */
    941 
    942 /* Write `RetT name(P0, P1, ...)` (without trailing `;` or `{`) to `b`. */
    943 static void c_emit_func_signature(CTarget* t, CBuf* b, const char* name,
    944                                   KitCgTypeId fn_type) {
    945   KitCgTypeId ret_type = cg_type_func_ret_id(t->c, fn_type);
    946   const CgType* fty = cg_type_get(t->c, api_unalias_type(t->c, fn_type));
    947   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
    948   if (!fty || fty->kind != KIT_CG_TYPE_FUNC) {
    949     compiler_panic(t->c, loc, "C target: fn_type is not a function type");
    950   }
    951   if (cg_type_is_void(t->c, ret_type)) {
    952     cbuf_puts(b, "void");
    953   } else {
    954     c_emit_type(t, b, ret_type);
    955   }
    956   cbuf_puts(b, " ");
    957   cbuf_puts(b, name);
    958   cbuf_puts(b, "(");
    959   if (fty->func.nparams == 0 && !fty->func.abi_variadic) {
    960     cbuf_puts(b, "void");
    961   } else {
    962     for (u32 i = 0; i < fty->func.nparams; ++i) {
    963       if (i > 0) cbuf_puts(b, ", ");
    964       c_emit_type(t, b, fty->func.params[i].type);
    965       cbuf_puts(b, " p");
    966       cbuf_put_u64(b, (u64)i);
    967     }
    968     if (fty->func.abi_variadic) {
    969       if (fty->func.nparams > 0) cbuf_puts(b, ", ");
    970       cbuf_puts(b, "...");
    971     }
    972   }
    973   cbuf_puts(b, ")");
    974 }
    975 
    976 void c_emit_func_begin(CTarget* t, const CGFuncDesc* fd) {
    977   c_emit_prologue(t);
    978 
    979   t->cur_fn = fd;
    980   cbuf_reset(&t->decls);
    981   for (u32 i = 0; i < t->local_cap; ++i) {
    982     t->local_declared[i] = 0;
    983     t->local_type[i] = 0;
    984   }
    985   t->next_label = 0;
    986   t->next_local = 0;
    987   t->next_tmp = 0;
    988   t->nscopes = 0;
    989   t->last_was_terminator = 0;
    990   t->have_emitted_loc = 0;
    991   t->emitted_loc = (SrcLoc){0, 0, 0};
    992 
    993   const char* name = c_sym_name(t, fd->sym);
    994 
    995   /* Forward-declare so out-of-order callers and same-TU references find the
    996    * prototype regardless of definition order. */
    997   c_ensure_forward_decl(t, fd->sym, fd->fn_type);
    998 
    999   c_emit_func_signature(t, &t->body, name, fd->fn_type);
   1000   cbuf_puts(&t->body, " {\n");
   1001   t->fn_body_start = t->body.len;
   1002 }
   1003 
   1004 void c_ensure_forward_decl(CTarget* t, ObjSymId sym, KitCgTypeId fn_type) {
   1005   Heap* h = t->c->ctx->heap;
   1006   if ((u32)sym >= t->sym_forwarded_cap) {
   1007     u32 newcap = t->sym_forwarded_cap ? t->sym_forwarded_cap : 16;
   1008     while (newcap <= (u32)sym) newcap *= 2;
   1009     u8* nd =
   1010         (u8*)h->realloc(h, t->sym_forwarded, t->sym_forwarded_cap, newcap, 1);
   1011     if (!nd && newcap) {
   1012       compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: out of memory");
   1013     }
   1014     for (u32 i = t->sym_forwarded_cap; i < newcap; ++i) nd[i] = 0;
   1015     t->sym_forwarded = nd;
   1016     t->sym_forwarded_cap = newcap;
   1017   }
   1018   if (t->sym_forwarded[sym]) return;
   1019   t->sym_forwarded[sym] = 1;
   1020   const char* name = c_sym_name(t, sym);
   1021   const ObjSym* os = obj_symbol_get(t->obj, sym);
   1022   if ((os && (os->kind == SK_FUNC || os->kind == SK_IFUNC)) || fn_type != 0) {
   1023     c_emit_func_signature(t, &t->forwards, name, fn_type);
   1024     cbuf_puts(&t->forwards, ";\n");
   1025   } else {
   1026     if (os && os->bind == SB_LOCAL)
   1027       cbuf_puts(&t->forwards, "static ");
   1028     else
   1029       cbuf_puts(&t->forwards, "extern ");
   1030     if (os && os->section_id != OBJ_SEC_NONE) {
   1031       const Section* sec = obj_section_get(t->obj, os->section_id);
   1032       if (sec->kind == SEC_RODATA) cbuf_puts(&t->forwards, "const ");
   1033     }
   1034     cbuf_puts(&t->forwards, "struct __kit_data_");
   1035     cbuf_puts(&t->forwards, name);
   1036     cbuf_puts(&t->forwards, " ");
   1037     cbuf_puts(&t->forwards, name);
   1038     cbuf_puts(&t->forwards, ";\n");
   1039   }
   1040 }
   1041 
   1042 void c_emit_func_end(CTarget* t) {
   1043   size_t splice_at = t->fn_body_start;
   1044   size_t body_after = t->body.len;
   1045   size_t fn_body_len = body_after - splice_at;
   1046   Heap* h = t->c->ctx->heap;
   1047 
   1048   u8* tmp = NULL;
   1049   if (fn_body_len) {
   1050     tmp = (u8*)h->alloc(h, fn_body_len, 1);
   1051     if (!tmp) {
   1052       compiler_panic(t->c, t->cur_fn->loc, "C target: out of memory");
   1053     }
   1054     for (size_t i = 0; i < fn_body_len; ++i) {
   1055       tmp[i] = t->body.data[splice_at + i];
   1056     }
   1057   }
   1058 
   1059   t->body.len = splice_at;
   1060   if (t->decls.len)
   1061     cbuf_putn(&t->body, (const char*)t->decls.data, t->decls.len);
   1062   if (tmp) {
   1063     cbuf_putn(&t->body, (const char*)tmp, fn_body_len);
   1064     h->free(h, tmp, fn_body_len);
   1065   }
   1066   cbuf_puts(&t->body, "}\n\n");
   1067 
   1068   t->cur_fn = NULL;
   1069 }
   1070 
   1071 /* === locals, params === */
   1072 
   1073 void c_emit_param_bind(CTarget* t, CGLocal local, KitCgTypeId type, u32 index) {
   1074   char buf[24];
   1075   c_ensure_local(t, local, type);
   1076   c_local_name(local, buf, sizeof buf);
   1077   cbuf_puts(&t->body, "  ");
   1078   cbuf_puts(&t->body, buf);
   1079   cbuf_puts(&t->body, " = p");
   1080   cbuf_put_u64(&t->body, (u64)index);
   1081   cbuf_puts(&t->body, ";\n");
   1082 }
   1083 
   1084 CGLocal c_emit_param(CTarget* t, const CGParamDesc* pd) {
   1085   CGLocalDesc d;
   1086   memset(&d, 0, sizeof d);
   1087   d.type = pd->type;
   1088   d.name = pd->name;
   1089   d.loc = pd->loc;
   1090   d.size = pd->size;
   1091   d.align = pd->align;
   1092   d.flags = pd->flags;
   1093   CGLocal local = c_emit_local(t, &d);
   1094   c_emit_param_bind(t, local, pd->type, pd->index);
   1095   return local;
   1096 }
   1097 
   1098 /* === load_imm, copy, binop === */
   1099 
   1100 void c_emit_load_imm(CTarget* t, Operand dst, i64 imm) {
   1101   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1102   if (dst.kind != OPK_LOCAL) {
   1103     compiler_panic(t->c, loc, "C target: load_imm dst must be LOCAL");
   1104   }
   1105   c_ensure_local(t, dst.v.local, dst.type);
   1106   /* The literal is emitted bare; its C type is `long long`. We can drop
   1107    * the bridge cast iff the bare assignment compiles cleanly:
   1108    *   - integer dst: imm must fit in dst's signed range (else
   1109    *     -Wconstant-conversion). 64-bit dst always fits.
   1110    *   - pointer dst: only `0` (null pointer constant) is safe; any other
   1111    *     literal trips -Wint-conversion.
   1112    * Otherwise keep the bridge. */
   1113   u32 w = c_int_width_for_signedness(t, dst.type);
   1114   int can_drop_bridge;
   1115   if (w > 0) {
   1116     can_drop_bridge = (w >= 64) || (imm >= -((i64)1 << (w - 1)) &&
   1117                                     imm <= (((i64)1 << (w - 1)) - 1));
   1118   } else if (c_type_is_ptr(t, dst.type)) {
   1119     can_drop_bridge = (imm == 0);
   1120   } else {
   1121     can_drop_bridge = 0;
   1122   }
   1123   c_emit_local_assign_open(t, dst.v.local,
   1124                            can_drop_bridge ? dst.type : (KitCgTypeId)0);
   1125   c_emit_imm_literal(t, imm);
   1126   c_emit_local_assign_close(t);
   1127 }
   1128 
   1129 void c_emit_copy(CTarget* t, Operand dst, Operand src) {
   1130   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1131   if (dst.kind != OPK_LOCAL) {
   1132     compiler_panic(t->c, loc, "C target: copy dst must be LOCAL");
   1133   }
   1134   c_ensure_local(t, dst.v.local, dst.type);
   1135   c_emit_local_assign_open(t, dst.v.local, src.type);
   1136   c_emit_operand(t, src);
   1137   c_emit_local_assign_close(t);
   1138 }
   1139 
   1140 static const char* binop_to_c(BinOp op) {
   1141   switch (op) {
   1142     case BO_IADD:
   1143     case BO_FADD:
   1144       return "+";
   1145     case BO_ISUB:
   1146     case BO_FSUB:
   1147       return "-";
   1148     case BO_IMUL:
   1149     case BO_FMUL:
   1150       return "*";
   1151     case BO_SDIV:
   1152     case BO_UDIV:
   1153     case BO_FDIV:
   1154       return "/";
   1155     case BO_SREM:
   1156     case BO_UREM:
   1157       return "%";
   1158     case BO_AND:
   1159       return "&";
   1160     case BO_OR:
   1161       return "|";
   1162     case BO_XOR:
   1163       return "^";
   1164     case BO_SHL:
   1165       return "<<";
   1166     case BO_SHR_S:
   1167     case BO_SHR_U:
   1168       return ">>";
   1169   }
   1170   return NULL;
   1171 }
   1172 
   1173 /* For BinOp `op`, decide how to sign-cast the operands. Returns 0 for "no
   1174  * cast", 1 for "cast both to signed", 2 for "cast both to unsigned", 3 for
   1175  * "cast lhs only (signedness `lhs_signed`)" (used for shifts). */
   1176 typedef enum { BSC_NONE, BSC_SIGNED, BSC_UNSIGNED, BSC_SHIFT_LHS } BinSignCast;
   1177 
   1178 static BinSignCast binop_sign_kind(BinOp op, int* lhs_signed_out) {
   1179   *lhs_signed_out = 1;
   1180   switch (op) {
   1181     case BO_SDIV:
   1182     case BO_SREM:
   1183       return BSC_SIGNED;
   1184     case BO_UDIV:
   1185     case BO_UREM:
   1186       return BSC_UNSIGNED;
   1187     case BO_SHR_S:
   1188       *lhs_signed_out = 1;
   1189       return BSC_SHIFT_LHS;
   1190     case BO_SHR_U:
   1191       *lhs_signed_out = 0;
   1192       return BSC_SHIFT_LHS;
   1193     default:
   1194       return BSC_NONE;
   1195   }
   1196 }
   1197 
   1198 void c_emit_binop(CTarget* t, BinOp op, Operand dst, Operand a, Operand b) {
   1199   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1200   const char* sym = binop_to_c(op);
   1201   if (!sym) {
   1202     compiler_panic(t->c, loc, "C target: unknown binop %d", (int)op);
   1203   }
   1204   if (dst.kind != OPK_LOCAL) {
   1205     compiler_panic(t->c, loc, "C target: binop dst must be LOCAL");
   1206   }
   1207   c_ensure_local(t, dst.v.local, dst.type);
   1208   /* Pointer operands get cast to uintptr_t inside c_emit_operand_arith,
   1209    * so the binop's C result type is `uintptr_t`, not the original pointer
   1210    * type. Keep the bridge when dst or either operand is pointer-typed so
   1211    * the assignment back to a pointer dst doesn't trip -Wint-conversion. */
   1212   int has_ptr = c_operand_is_ptr_typed(t, dst) ||
   1213                 c_operand_is_ptr_typed(t, a) || c_operand_is_ptr_typed(t, b);
   1214   c_emit_local_assign_open(t, dst.v.local, has_ptr ? (KitCgTypeId)0 : dst.type);
   1215   int lhs_signed = 1;
   1216   BinSignCast bsc = binop_sign_kind(op, &lhs_signed);
   1217   switch (bsc) {
   1218     case BSC_NONE:
   1219       c_emit_operand_arith(t, a);
   1220       cbuf_puts(&t->body, " ");
   1221       cbuf_puts(&t->body, sym);
   1222       cbuf_puts(&t->body, " ");
   1223       c_emit_operand_arith(t, b);
   1224       break;
   1225     case BSC_SIGNED:
   1226       c_emit_operand_arith_signed(t, a, 1);
   1227       cbuf_puts(&t->body, " ");
   1228       cbuf_puts(&t->body, sym);
   1229       cbuf_puts(&t->body, " ");
   1230       c_emit_operand_arith_signed(t, b, 1);
   1231       break;
   1232     case BSC_UNSIGNED:
   1233       c_emit_operand_arith_signed(t, a, 0);
   1234       cbuf_puts(&t->body, " ");
   1235       cbuf_puts(&t->body, sym);
   1236       cbuf_puts(&t->body, " ");
   1237       c_emit_operand_arith_signed(t, b, 0);
   1238       break;
   1239     case BSC_SHIFT_LHS:
   1240       c_emit_operand_arith_signed(t, a, lhs_signed);
   1241       cbuf_puts(&t->body, " ");
   1242       cbuf_puts(&t->body, sym);
   1243       cbuf_puts(&t->body, " ");
   1244       c_emit_operand(t, b);
   1245       break;
   1246   }
   1247   c_emit_local_assign_close(t);
   1248 }
   1249 
   1250 /* ===== unop ===== */
   1251 
   1252 void c_emit_unop(CTarget* t, UnOp op, Operand dst, Operand a) {
   1253   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1254   if (dst.kind != OPK_LOCAL) {
   1255     compiler_panic(t->c, loc, "C target: unop dst must be LOCAL");
   1256   }
   1257   c_ensure_local(t, dst.v.local, dst.type);
   1258   const char* sym = NULL;
   1259   switch (op) {
   1260     case UO_NEG:
   1261     case UO_FNEG:
   1262       sym = "-";
   1263       break;
   1264     case UO_NOT:
   1265       sym = "!";
   1266       break;
   1267     case UO_BNOT:
   1268       sym = "~";
   1269       break;
   1270     default:
   1271       compiler_panic(t->c, loc, "C target: unknown unop %d", (int)op);
   1272   }
   1273   c_emit_local_assign_open(t, dst.v.local, dst.type);
   1274   cbuf_puts(&t->body, sym);
   1275   c_emit_operand(t, a);
   1276   c_emit_local_assign_close(t);
   1277 }
   1278 
   1279 /* ===== compare ops ===== */
   1280 
   1281 /* The single C operator for ops that lower to one relational/equality
   1282  * expression: all integer ops, plus the FP predicates whose plain C operator
   1283  * already has the right NaN behavior (<,<=,>,>= and == are ordered: false on
   1284  * NaN; != is unordered: true on NaN). The remaining FP predicates need a
   1285  * compound expression and are handled in c_emit_cmp_operands; they return NULL
   1286  * here. No `default:` so -Wswitch flags any unhandled enumerator. */
   1287 static const char* cmp_to_c(CmpOp op) {
   1288   switch (op) {
   1289     case CMP_EQ:
   1290     case CMP_OEQ_F:
   1291       return "==";
   1292     case CMP_NE:
   1293     case CMP_UNE_F:
   1294       return "!=";
   1295     case CMP_LT_S:
   1296     case CMP_LT_U:
   1297     case CMP_OLT_F:
   1298       return "<";
   1299     case CMP_LE_S:
   1300     case CMP_LE_U:
   1301     case CMP_OLE_F:
   1302       return "<=";
   1303     case CMP_GT_S:
   1304     case CMP_GT_U:
   1305     case CMP_OGT_F:
   1306       return ">";
   1307     case CMP_GE_S:
   1308     case CMP_GE_U:
   1309     case CMP_OGE_F:
   1310       return ">=";
   1311     /* Compound FP predicates — no single C operator (see c_emit_cmp_operands).
   1312      */
   1313     case CMP_ONE_F:
   1314     case CMP_UEQ_F:
   1315     case CMP_ULT_F:
   1316     case CMP_ULE_F:
   1317     case CMP_UGT_F:
   1318     case CMP_UGE_F:
   1319       return NULL;
   1320   }
   1321   return NULL;
   1322 }
   1323 
   1324 /* The 6 FP predicates with no single C operator: built from compound ordered
   1325  * comparisons (no isnan(); host must not be built with -ffast-math). */
   1326 static int cmp_is_fp_compound(CmpOp op) {
   1327   return op == CMP_ONE_F || op == CMP_UEQ_F || op == CMP_ULT_F ||
   1328          op == CMP_ULE_F || op == CMP_UGT_F || op == CMP_UGE_F;
   1329 }
   1330 
   1331 /* Returns 1 if cmp op needs unsigned operand cast. -1 if signed. 0 if no cast
   1332  * (EQ/NE — sign doesn't matter for integer equality at the same width — and
   1333  * float compares). */
   1334 static int cmp_signedness(CmpOp op) {
   1335   switch (op) {
   1336     case CMP_LT_S:
   1337     case CMP_LE_S:
   1338     case CMP_GT_S:
   1339     case CMP_GE_S:
   1340       return -1;
   1341     case CMP_LT_U:
   1342     case CMP_LE_U:
   1343     case CMP_GT_U:
   1344     case CMP_GE_U:
   1345       return 1;
   1346     default:
   1347       return 0;
   1348   }
   1349 }
   1350 
   1351 /* Emit one ordered comparison `<a> opstr <b>` (no signedness cast — FP). */
   1352 static void c_emit_fp_rel(CTarget* t, Operand a, const char* opstr, Operand b) {
   1353   c_emit_operand_arith(t, a);
   1354   cbuf_puts(&t->body, " ");
   1355   cbuf_puts(&t->body, opstr);
   1356   cbuf_puts(&t->body, " ");
   1357   c_emit_operand_arith(t, b);
   1358 }
   1359 
   1360 static void c_emit_cmp_operands(CTarget* t, CmpOp op, Operand a, Operand b) {
   1361   /* The 6 FP predicates without a single C operator. Composed from ordered
   1362    * comparisons via unordered-R == !(ordered-not-R); ONE/UEQ from a<b / a>b.
   1363    * Each `!(...)` / `(...)` wraps the full cast-bearing comparison. */
   1364   switch (op) {
   1365     case CMP_UGE_F: /* !(OLT) */
   1366       cbuf_puts(&t->body, "!(");
   1367       c_emit_fp_rel(t, a, "<", b);
   1368       cbuf_puts(&t->body, ")");
   1369       return;
   1370     case CMP_UGT_F: /* !(OLE) */
   1371       cbuf_puts(&t->body, "!(");
   1372       c_emit_fp_rel(t, a, "<=", b);
   1373       cbuf_puts(&t->body, ")");
   1374       return;
   1375     case CMP_ULE_F: /* !(OGT) */
   1376       cbuf_puts(&t->body, "!(");
   1377       c_emit_fp_rel(t, a, ">", b);
   1378       cbuf_puts(&t->body, ")");
   1379       return;
   1380     case CMP_ULT_F: /* !(OGE) */
   1381       cbuf_puts(&t->body, "!(");
   1382       c_emit_fp_rel(t, a, ">=", b);
   1383       cbuf_puts(&t->body, ")");
   1384       return;
   1385     case CMP_ONE_F: /* ordered & !=: a<b || a>b */
   1386       cbuf_puts(&t->body, "(");
   1387       c_emit_fp_rel(t, a, "<", b);
   1388       cbuf_puts(&t->body, " || ");
   1389       c_emit_fp_rel(t, a, ">", b);
   1390       cbuf_puts(&t->body, ")");
   1391       return;
   1392     case CMP_UEQ_F: /* unordered | ==: !(a<b) && !(a>b) */
   1393       cbuf_puts(&t->body, "(!(");
   1394       c_emit_fp_rel(t, a, "<", b);
   1395       cbuf_puts(&t->body, ") && !(");
   1396       c_emit_fp_rel(t, a, ">", b);
   1397       cbuf_puts(&t->body, "))");
   1398       return;
   1399     default:
   1400       break; /* integer ops + single-operator FP fall through */
   1401   }
   1402   int sg = cmp_signedness(op);
   1403   if (sg == 0) {
   1404     c_emit_operand_arith(t, a);
   1405     cbuf_puts(&t->body, " ");
   1406     cbuf_puts(&t->body, cmp_to_c(op));
   1407     cbuf_puts(&t->body, " ");
   1408     c_emit_operand_arith(t, b);
   1409   } else {
   1410     int signed_ = (sg < 0);
   1411     c_emit_operand_arith_signed(t, a, signed_);
   1412     cbuf_puts(&t->body, " ");
   1413     cbuf_puts(&t->body, cmp_to_c(op));
   1414     cbuf_puts(&t->body, " ");
   1415     c_emit_operand_arith_signed(t, b, signed_);
   1416   }
   1417 }
   1418 
   1419 void c_emit_cmp(CTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
   1420   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1421   if (dst.kind != OPK_LOCAL) {
   1422     compiler_panic(t->c, loc, "C target: cmp dst must be LOCAL");
   1423   }
   1424   if (!cmp_to_c(op) && !cmp_is_fp_compound(op)) {
   1425     compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op);
   1426   }
   1427   c_ensure_local(t, dst.v.local, dst.type);
   1428   /* Compare result is C `int` (0/1); assigning to integer dst.type narrows
   1429    * implicitly without -Wall complaint. The result of a `!(...)` / `||` / `&&`
   1430    * compound FP predicate is already an int 0/1. */
   1431   c_emit_local_assign_open(t, dst.v.local, dst.type);
   1432   c_emit_cmp_operands(t, op, a, b);
   1433   c_emit_local_assign_close(t);
   1434 }
   1435 
   1436 /* ===== labels, jump, cmp_branch ===== */
   1437 
   1438 static void c_label_name(Label l, char* out, size_t cap) {
   1439   size_t i = 0;
   1440   if (cap == 0) return;
   1441   const char* p = "L";
   1442   while (*p && i + 1 < cap) out[i++] = *p++;
   1443   char tmp[16];
   1444   size_t n = 0;
   1445   u32 v = (u32)l;
   1446   if (v == 0) {
   1447     tmp[n++] = '0';
   1448   } else {
   1449     while (v) {
   1450       tmp[n++] = (char)('0' + (v % 10));
   1451       v /= 10;
   1452     }
   1453   }
   1454   while (n && i + 1 < cap) out[i++] = tmp[--n];
   1455   out[i] = '\0';
   1456 }
   1457 
   1458 Label c_emit_label_new(CTarget* t) {
   1459   t->next_label += 1;
   1460   return (Label)t->next_label;
   1461 }
   1462 
   1463 void c_emit_label_place(CTarget* t, Label l) {
   1464   char buf[24];
   1465   c_label_name(l, buf, sizeof buf);
   1466   /* `Lk: __attribute__((unused));` — empty stmt keeps it valid at end-of-block,
   1467    * and the attribute silences -Wunused-label when the goto got folded away. */
   1468   cbuf_puts(&t->body, " ");
   1469   cbuf_puts(&t->body, buf);
   1470   cbuf_puts(&t->body, ": __attribute__((unused));\n");
   1471   t->last_was_terminator = 0;
   1472 }
   1473 
   1474 /* If `l` is the innermost structured scope's break/continue label, return
   1475  * the C keyword that exits/iterates that scope (a literal `break` or
   1476  * `continue`). NULL means "fall back to goto." Matches only the innermost
   1477  * scope because C `break`/`continue` only escape the nearest enclosing
   1478  * loop/switch — outer-scope targets must stay as goto. */
   1479 static const char* c_scope_kw_for_label(CTarget* t, Label l) {
   1480   if (t->nscopes == 0) return NULL;
   1481   const CScopeInfo* s = &t->scopes[t->nscopes - 1u];
   1482   if (!s->structured) return NULL;
   1483   if (l == s->break_label) return "break";
   1484   if (l == s->continue_label) return "continue";
   1485   return NULL;
   1486 }
   1487 
   1488 void c_emit_jump(CTarget* t, Label l) {
   1489   if (t->last_was_terminator) return;
   1490   const char* kw = c_scope_kw_for_label(t, l);
   1491   if (kw) {
   1492     cbuf_puts(&t->body, "  ");
   1493     cbuf_puts(&t->body, kw);
   1494     cbuf_puts(&t->body, ";\n");
   1495   } else {
   1496     char buf[24];
   1497     c_label_name(l, buf, sizeof buf);
   1498     cbuf_puts(&t->body, "  goto ");
   1499     cbuf_puts(&t->body, buf);
   1500     cbuf_puts(&t->body, ";\n");
   1501   }
   1502   t->last_was_terminator = 1;
   1503 }
   1504 
   1505 void c_emit_cmp_branch(CTarget* t, CmpOp op, Operand a, Operand b, Label l) {
   1506   if (t->last_was_terminator) return;
   1507   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1508   if (!cmp_to_c(op) && !cmp_is_fp_compound(op)) {
   1509     compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op);
   1510   }
   1511   const char* kw = c_scope_kw_for_label(t, l);
   1512   cbuf_puts(&t->body, "  if (");
   1513   c_emit_cmp_operands(t, op, a, b);
   1514   if (kw) {
   1515     cbuf_puts(&t->body, ") ");
   1516     cbuf_puts(&t->body, kw);
   1517     cbuf_puts(&t->body, ";\n");
   1518   } else {
   1519     char buf[24];
   1520     c_label_name(l, buf, sizeof buf);
   1521     cbuf_puts(&t->body, ") goto ");
   1522     cbuf_puts(&t->body, buf);
   1523     cbuf_puts(&t->body, ";\n");
   1524   }
   1525 }
   1526 
   1527 /* ===== scopes =====
   1528  *
   1529  * SCOPE_LOOP maps to C's `for (;;) { ... }`. CG places the continue label
   1530  * just before `scope_begin` and the break label just before `scope_end`
   1531  * (see src/cg/control.c:208,253). The C target leaves those label
   1532  * placements in the body — they sit just before `for (;;) {` and just
   1533  * after `}` respectively, so any outer-scope `goto continue_lbl` or
   1534  * `goto break_lbl` (e.g. a nested loop's `continue` targeting this
   1535  * outer loop) still resolves. Inside the `for` body, `c_jump` and
   1536  * `c_cmp_branch` translate jumps whose target is the *innermost* scope's
   1537  * break/continue label into `break;` / `continue;`; outer-scope targets
   1538  * fall back to `goto`. The redundant `Lk: ;` adjacent to the `for` is
   1539  * cosmetic; gcc/clang fold it. */
   1540 
   1541 static void c_grow_scopes(CTarget* t, u32 needed) {
   1542   Heap* h = t->c->ctx->heap;
   1543   u32 newcap = t->scopes_cap ? t->scopes_cap : 8;
   1544   while (newcap < needed) newcap *= 2;
   1545   CScopeInfo* ns = (CScopeInfo*)h->realloc(
   1546       h, t->scopes, t->scopes_cap * sizeof(CScopeInfo),
   1547       newcap * sizeof(CScopeInfo), _Alignof(CScopeInfo));
   1548   if (!ns && newcap) {
   1549     compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: out of memory");
   1550   }
   1551   t->scopes = ns;
   1552   t->scopes_cap = newcap;
   1553 }
   1554 
   1555 CGScope c_emit_scope_begin(CTarget* t, const CGScopeDesc* d) {
   1556   if (t->nscopes + 1u >= t->scopes_cap) c_grow_scopes(t, t->nscopes + 2u);
   1557   u32 idx = t->nscopes;
   1558   t->scopes[idx].kind = d->kind;
   1559   t->scopes[idx].break_label = d->break_label;
   1560   t->scopes[idx].continue_label = d->continue_label;
   1561   t->scopes[idx].structured = 0;
   1562   t->nscopes += 1u;
   1563   if (d->kind == SCOPE_LOOP) {
   1564     cbuf_puts(&t->body, "  for (;;) {\n");
   1565     t->scopes[idx].structured = 1;
   1566     t->last_was_terminator = 0;
   1567     return (CGScope)(idx + 1u);
   1568   }
   1569   return (CGScope)(idx + 1u);
   1570 }
   1571 
   1572 void c_emit_scope_end(CTarget* t, CGScope s) {
   1573   if (s == 0 || (u32)s > t->nscopes) {
   1574     compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
   1575                    "C target: scope_end on invalid handle");
   1576   }
   1577   u32 idx = (u32)s - 1u;
   1578   if (t->scopes[idx].structured) {
   1579     /* CG places break_label just before scope_end, so the label sits
   1580      * inside the for-body. Anything that lands on it (including a
   1581      * `goto break_lbl` from a nested scope's labeled break) needs to
   1582      * exit the for — without an explicit `break;`, fall-through would
   1583      * iterate again. Always emit; if the body already terminated the
   1584      * defensive break is dead but harmless. */
   1585     cbuf_puts(&t->body, "  break;\n");
   1586     cbuf_puts(&t->body, "  }\n");
   1587     /* The closing brace is not a terminator; control can fall through it
   1588      * (e.g., off the end of a void function). */
   1589     t->last_was_terminator = 0;
   1590   }
   1591   t->nscopes -= 1u;
   1592 }
   1593 
   1594 void c_emit_break_to(CTarget* t, CGScope s) {
   1595   if (s == 0 || (u32)s > t->nscopes) {
   1596     compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
   1597                    "C target: break_to on invalid handle");
   1598   }
   1599   c_emit_jump(t, t->scopes[s - 1u].break_label);
   1600 }
   1601 
   1602 void c_emit_continue_to(CTarget* t, CGScope s) {
   1603   if (s == 0 || (u32)s > t->nscopes) {
   1604     compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
   1605                    "C target: continue_to on invalid handle");
   1606   }
   1607   c_emit_jump(t, t->scopes[s - 1u].continue_label);
   1608 }
   1609 
   1610 /* ===== switch dispatch ===== */
   1611 
   1612 /* Emit `case <value>:`. For an int32_t selector the bare literal is
   1613  * already the right type, so we skip the cast; for wider/narrower
   1614  * integers we wrap in `(T)` so the case constant matches the switch
   1615  * value's promoted type (avoids -Wswitch warnings on narrower
   1616  * selectors). */
   1617 static void c_emit_case_value(CTarget* t, KitCgTypeId sel_ty, u64 v) {
   1618   u32 w = c_int_width_for_signedness(t, sel_ty);
   1619   cbuf_puts(&t->body, "    case ");
   1620   if (w != 0 && w != 32) {
   1621     cbuf_putc(&t->body, '(');
   1622     c_emit_type(t, &t->body, sel_ty);
   1623     cbuf_puts(&t->body, ")");
   1624   }
   1625   c_emit_imm_literal(t, (i64)v);
   1626   cbuf_puts(&t->body, ":");
   1627 }
   1628 
   1629 void c_emit_switch_(
   1630     CTarget* t, const CGSwitchDesc* d) { /* gcc/clang ignore strategy hints and
   1631                                             pick their own dispatch shape. */
   1632   (void)d->hint;
   1633   if (t->last_was_terminator) return;
   1634   cbuf_puts(&t->body, "  switch (");
   1635   c_emit_operand(t, d->selector);
   1636   cbuf_puts(&t->body, ") {\n");
   1637   for (u32 i = 0; i < d->ncases; ++i) {
   1638     char buf[24];
   1639     c_label_name(d->cases[i].label, buf, sizeof buf);
   1640     c_emit_case_value(t, d->selector.type, d->cases[i].value);
   1641     cbuf_puts(&t->body, " goto ");
   1642     cbuf_puts(&t->body, buf);
   1643     cbuf_puts(&t->body, ";\n");
   1644   }
   1645   cbuf_puts(&t->body, "    default: ");
   1646   if (d->default_label != (Label)LABEL_NONE) {
   1647     char buf[24];
   1648     c_label_name(d->default_label, buf, sizeof buf);
   1649     cbuf_puts(&t->body, "goto ");
   1650     cbuf_puts(&t->body, buf);
   1651     cbuf_puts(&t->body, ";\n");
   1652   } else {
   1653     /* No default supplied — the kit IR's contract for that case is
   1654      * "if no case matches, fall through." `break;` does exactly that
   1655      * inside the for-wrapper around structured scopes. */
   1656     cbuf_puts(&t->body, "break;\n");
   1657   }
   1658   cbuf_puts(&t->body, "  }\n");
   1659   /* The switch always transfers control (every arm jumps or breaks).
   1660    * Mark as terminator so any frontend-emitted defensive jump after
   1661    * dispatch is dropped. */
   1662   t->last_was_terminator = 1;
   1663 }
   1664 
   1665 /* ===== load_label_addr / indirect_branch =====
   1666  * GCC computed-goto extension: `&&L` is the address of label L within
   1667  * the current function, and `goto *p;` jumps to such an address. This
   1668  * is the lowering every cc1-like backend uses (and what the toy
   1669  * frontend ultimately compiles to via the C target). */
   1670 void c_emit_load_label_addr(CTarget* t, Operand dst, Label l) {
   1671   char buf[24];
   1672   if (dst.kind != OPK_LOCAL) {
   1673     compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
   1674                    "C target: load_label_addr dst must be LOCAL");
   1675   }
   1676   c_ensure_local(t, dst.v.local, dst.type);
   1677   c_emit_local_assign_open(t, dst.v.local, (KitCgTypeId)0);
   1678   cbuf_puts(&t->body, "(void*)&&");
   1679   c_label_name(l, buf, sizeof buf);
   1680   cbuf_puts(&t->body, buf);
   1681   c_emit_local_assign_close(t);
   1682 }
   1683 
   1684 void c_emit_indirect_branch(CTarget* t, Operand addr,
   1685                             const Label* valid_targets, u32 ntargets) {
   1686   (void)valid_targets;
   1687   (void)ntargets;
   1688   if (t->last_was_terminator) return;
   1689   cbuf_puts(&t->body, "  goto *");
   1690   c_emit_operand(t, addr);
   1691   cbuf_puts(&t->body, ";\n");
   1692   t->last_was_terminator = 1;
   1693 }
   1694 
   1695 /* ===== function-local static label-address data ===== */
   1696 
   1697 static int c_is_local_static_sym(CTarget* t, ObjSymId sym) {
   1698   for (u32 i = 0; i < t->local_static_nsyms; ++i) {
   1699     if (t->local_static_syms[i] == sym) return 1;
   1700   }
   1701   return 0;
   1702 }
   1703 
   1704 static void c_mark_local_static_sym(CTarget* t, ObjSymId sym) {
   1705   Heap* h = t->c->ctx->heap;
   1706   if (sym == OBJ_SYM_NONE || c_is_local_static_sym(t, sym)) return;
   1707   if (t->local_static_nsyms + 1u > t->local_static_syms_cap) {
   1708     u32 oldcap = t->local_static_syms_cap;
   1709     u32 newcap = oldcap ? oldcap * 2u : 16u;
   1710     ObjSymId* ns = (ObjSymId*)h->realloc(
   1711         h, t->local_static_syms, oldcap * sizeof(*t->local_static_syms),
   1712         newcap * sizeof(*t->local_static_syms), _Alignof(ObjSymId));
   1713     if (!ns) {
   1714       compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
   1715                      "C target: out of memory");
   1716     }
   1717     t->local_static_syms = ns;
   1718     t->local_static_syms_cap = newcap;
   1719   }
   1720   t->local_static_syms[t->local_static_nsyms++] = sym;
   1721 }
   1722 
   1723 static void c_grow_local_static_entries(CTarget* t, u32 want) {
   1724   Heap* h = t->c->ctx->heap;
   1725   if (want <= t->local_static_entries_cap) return;
   1726   u32 oldcap = t->local_static_entries_cap;
   1727   u32 newcap = oldcap ? oldcap * 2u : 8u;
   1728   while (newcap < want) newcap *= 2u;
   1729   CLocalStaticLabelEntry* ne = (CLocalStaticLabelEntry*)h->realloc(
   1730       h, t->local_static_entries, oldcap * sizeof(*t->local_static_entries),
   1731       newcap * sizeof(*t->local_static_entries),
   1732       _Alignof(CLocalStaticLabelEntry));
   1733   if (!ne) {
   1734     compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
   1735                    "C target: out of memory");
   1736   }
   1737   t->local_static_entries = ne;
   1738   t->local_static_entries_cap = newcap;
   1739 }
   1740 
   1741 int c_emit_can_local_static_data(CTarget* t,
   1742                                  const CGLocalStaticDataDesc* desc) {
   1743   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1744   const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, desc->type));
   1745   if (!ty) {
   1746     compiler_panic(t->c, loc, "C target: unknown local static type %u",
   1747                    (unsigned)desc->type);
   1748   }
   1749   if (ty->kind == KIT_CG_TYPE_ARRAY) {
   1750     ty = cg_type_get(t->c, api_unalias_type(t->c, ty->array.elem));
   1751   }
   1752   return ty && ty->kind == KIT_CG_TYPE_PTR;
   1753 }
   1754 
   1755 int c_emit_local_static_data_begin(CTarget* t,
   1756                                    const CGLocalStaticDataDesc* desc) {
   1757   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1758   if (!t->cur_fn) {
   1759     compiler_panic(t->c, loc,
   1760                    "C target: function-local static data outside function");
   1761   }
   1762   if (t->local_static_active) {
   1763     compiler_panic(t->c, loc,
   1764                    "C target: nested function-local static data definition");
   1765   }
   1766   const CgType* ty = cg_type_get(t->c, api_unalias_type(t->c, desc->type));
   1767   if (!ty) {
   1768     compiler_panic(t->c, loc, "C target: unknown local static type %u",
   1769                    (unsigned)desc->type);
   1770   }
   1771 
   1772   u64 count = 1;
   1773   int is_array = 0;
   1774   KitCgTypeId elem = desc->type;
   1775   if (ty->kind == KIT_CG_TYPE_ARRAY) {
   1776     is_array = 1;
   1777     count = ty->array.count;
   1778     elem = ty->array.elem;
   1779     ty = cg_type_get(t->c, api_unalias_type(t->c, elem));
   1780   }
   1781   if (!c_emit_can_local_static_data(t, desc)) {
   1782     return 0;
   1783   }
   1784   if (count > UINT32_MAX) {
   1785     compiler_panic(t->c, loc, "C target: local static pointer table too large");
   1786   }
   1787 
   1788   c_grow_local_static_entries(t, (u32)count);
   1789   for (u32 i = 0; i < (u32)count; ++i) {
   1790     t->local_static_entries[i].label = LABEL_NONE;
   1791     t->local_static_entries[i].addend = 0;
   1792     t->local_static_entries[i].has_label = 0;
   1793   }
   1794   t->local_static_nentries = (u32)count;
   1795   t->local_static_sym = desc->sym;
   1796   t->local_static_type = desc->type;
   1797   t->local_static_count = count;
   1798   t->local_static_offset = 0;
   1799   t->local_static_ptr_width = (u32)cg_type_size(t->c, elem);
   1800   t->local_static_align =
   1801       desc->align ? desc->align : cg_type_align(t->c, desc->type);
   1802   t->local_static_active = 1;
   1803   t->local_static_is_array = (u8)is_array;
   1804   t->local_static_readonly =
   1805       (desc->attrs.flags & KIT_CG_DATADEF_READONLY) ? 1u : 0u;
   1806   c_mark_local_static_sym(t, desc->sym);
   1807   return 1;
   1808 }
   1809 
   1810 void c_emit_local_static_data_write(CTarget* t, const u8* data, u64 len) {
   1811   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1812   if (!t->local_static_active || !len) return;
   1813   if (data) {
   1814     for (u64 i = 0; i < len; ++i) {
   1815       if (data[i] != 0) {
   1816         compiler_panic(t->c, loc,
   1817                        "C target: function-local static label table supports "
   1818                        "only zero bytes and label addresses");
   1819       }
   1820     }
   1821   }
   1822   t->local_static_offset += len;
   1823 }
   1824 
   1825 void c_emit_local_static_data_label_addr(CTarget* t, Label target, i64 addend,
   1826                                          u32 width, u32 address_space) {
   1827   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1828   (void)address_space;
   1829   if (!t->local_static_active) {
   1830     compiler_panic(t->c, loc,
   1831                    "C target: label address outside local static data");
   1832   }
   1833   if (width != t->local_static_ptr_width) {
   1834     compiler_panic(t->c, loc,
   1835                    "C target: label address width %u does not match pointer "
   1836                    "width %u",
   1837                    (unsigned)width, (unsigned)t->local_static_ptr_width);
   1838   }
   1839   if ((t->local_static_offset % t->local_static_ptr_width) != 0) {
   1840     compiler_panic(t->c, loc,
   1841                    "C target: unaligned label address in local static data");
   1842   }
   1843   u64 idx = t->local_static_offset / t->local_static_ptr_width;
   1844   if (idx >= t->local_static_count) {
   1845     compiler_panic(t->c, loc,
   1846                    "C target: too many local static label table entries");
   1847   }
   1848   CLocalStaticLabelEntry* e = &t->local_static_entries[(u32)idx];
   1849   if (e->has_label) {
   1850     compiler_panic(t->c, loc,
   1851                    "C target: duplicate local static label table entry");
   1852   }
   1853   e->label = target;
   1854   e->addend = addend;
   1855   e->has_label = 1;
   1856   t->local_static_offset += width;
   1857 }
   1858 
   1859 static void c_emit_local_static_label_expr(CTarget* t,
   1860                                            const CLocalStaticLabelEntry* e) {
   1861   char lbuf[24];
   1862   if (!e->has_label) {
   1863     cbuf_puts(&t->decls, "(void*)0");
   1864     return;
   1865   }
   1866   if (e->addend == 0) {
   1867     cbuf_puts(&t->decls, "&&");
   1868     c_label_name(e->label, lbuf, sizeof lbuf);
   1869     cbuf_puts(&t->decls, lbuf);
   1870     return;
   1871   }
   1872   cbuf_puts(&t->decls, "(void*)((char*)&&");
   1873   c_label_name(e->label, lbuf, sizeof lbuf);
   1874   cbuf_puts(&t->decls, lbuf);
   1875   cbuf_puts(&t->decls, " + ");
   1876   cbuf_put_i64(&t->decls, e->addend);
   1877   cbuf_puts(&t->decls, ")");
   1878 }
   1879 
   1880 void c_emit_local_static_data_end(CTarget* t) {
   1881   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1882   if (!t->local_static_active) return;
   1883   u64 total_size = t->local_static_count * t->local_static_ptr_width;
   1884   if (t->local_static_offset > total_size) {
   1885     compiler_panic(t->c, loc,
   1886                    "C target: local static initializer exceeds object size");
   1887   }
   1888   const char* nm = c_sym_name(t, t->local_static_sym);
   1889   cbuf_puts(&t->decls, "  static __attribute__((unused)) ");
   1890   cbuf_puts(&t->decls, "_Alignas(");
   1891   cbuf_put_u64(&t->decls, t->local_static_align ? t->local_static_align : 1);
   1892   cbuf_puts(&t->decls, ") void* ");
   1893   if (t->local_static_readonly) cbuf_puts(&t->decls, "const ");
   1894   cbuf_puts(&t->decls, nm);
   1895   if (t->local_static_is_array) {
   1896     cbuf_puts(&t->decls, "[");
   1897     cbuf_put_u64(&t->decls, t->local_static_count);
   1898     cbuf_puts(&t->decls, "]");
   1899   }
   1900   cbuf_puts(&t->decls, " = {");
   1901   for (u32 i = 0; i < t->local_static_nentries; ++i) {
   1902     if (i > 0) cbuf_putc(&t->decls, ',');
   1903     if ((i & 3u) == 0) cbuf_puts(&t->decls, "\n    ");
   1904     c_emit_local_static_label_expr(t, &t->local_static_entries[i]);
   1905   }
   1906   cbuf_puts(&t->decls, "\n  };\n");
   1907 
   1908   t->local_static_active = 0;
   1909   t->local_static_sym = OBJ_SYM_NONE;
   1910   t->local_static_type = KIT_CG_TYPE_NONE;
   1911   t->local_static_count = 0;
   1912   t->local_static_offset = 0;
   1913   t->local_static_ptr_width = 0;
   1914   t->local_static_align = 0;
   1915   t->local_static_nentries = 0;
   1916   t->local_static_is_array = 0;
   1917   t->local_static_readonly = 0;
   1918 }
   1919 
   1920 /* ===== local, local_addr ===== */
   1921 
   1922 CGLocal c_emit_local(CTarget* t, const CGLocalDesc* d) {
   1923   t->next_local += 1u;
   1924   if (t->next_local == CG_LOCAL_NONE) {
   1925     compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
   1926                    "C target: semantic local id exhausted");
   1927     return CG_LOCAL_NONE;
   1928   }
   1929   c_ensure_local(t, (CGLocal)t->next_local, d->type);
   1930   return (CGLocal)t->next_local;
   1931 }
   1932 
   1933 void c_emit_local_addr(CTarget* t, Operand dst, const CGLocalDesc* d,
   1934                        CGLocal s) {
   1935   (void)d;
   1936   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1937   if (dst.kind != OPK_LOCAL) {
   1938     compiler_panic(t->c, loc, "C target: local_addr dst must be LOCAL");
   1939   }
   1940   c_ensure_local(t, dst.v.local, dst.type);
   1941   c_ensure_local(t, s, d->type);
   1942   char buf[24];
   1943   c_emit_local_assign_open(t, dst.v.local, (KitCgTypeId)0);
   1944   cbuf_puts(&t->body, "&");
   1945   c_local_name(s, buf, sizeof buf);
   1946   cbuf_puts(&t->body, buf);
   1947   c_emit_local_assign_close(t);
   1948 }
   1949 
   1950 /* ===== convert ===== */
   1951 
   1952 void c_emit_convert(CTarget* t, ConvKind k, Operand dst, Operand src) {
   1953   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   1954   if (dst.kind != OPK_LOCAL) {
   1955     compiler_panic(t->c, loc, "C target: convert dst must be LOCAL");
   1956   }
   1957   c_ensure_local(t, dst.v.local, dst.type);
   1958   char buf[24];
   1959   c_local_name(dst.v.local, buf, sizeof buf);
   1960 
   1961   if (k == CV_BITCAST) {
   1962     /* Same-size reinterpretation. Use __builtin_memcpy through a temp so
   1963      * neither aliasing nor representation assumptions creep in. The temp
   1964      * lives in its own `{ ... }` block, so no name collision tracking. */
   1965     u32 id = ++t->next_tmp;
   1966     cbuf_puts(&t->body, "  { ");
   1967     c_emit_type(t, &t->body, src.type);
   1968     cbuf_puts(&t->body, " __bc");
   1969     cbuf_put_u64(&t->body, (u64)id);
   1970     cbuf_puts(&t->body, " = ");
   1971     c_emit_operand(t, src);
   1972     cbuf_puts(&t->body, "; __builtin_memcpy(&");
   1973     cbuf_puts(&t->body, buf);
   1974     cbuf_puts(&t->body, ", &__bc");
   1975     cbuf_put_u64(&t->body, (u64)id);
   1976     cbuf_puts(&t->body, ", sizeof __bc");
   1977     cbuf_put_u64(&t->body, (u64)id);
   1978     cbuf_puts(&t->body, "); }\n");
   1979     return;
   1980   }
   1981 
   1982   if (c_type_is_bool(t, dst.type)) {
   1983     c_emit_local_assign_open(t, dst.v.local, dst.type);
   1984     cbuf_puts(&t->body, "(");
   1985     c_emit_type(t, &t->body, dst.type);
   1986     cbuf_puts(&t->body, ")(");
   1987     c_emit_operand(t, src);
   1988     cbuf_puts(&t->body, " != 0)");
   1989     c_emit_local_assign_close(t);
   1990     return;
   1991   }
   1992 
   1993   /* Integer and float conversions: a C cast does the right thing once the
   1994    * source is first cast to the appropriate signedness (for SEXT/ZEXT and
   1995    * ITOF_S/U / FTOI_S/U). */
   1996   int src_signed = 1;
   1997   switch (k) {
   1998     case CV_ZEXT:
   1999     case CV_ITOF_U:
   2000     case CV_FTOI_U:
   2001       src_signed = 0;
   2002       break;
   2003     default:
   2004       src_signed = 1;
   2005       break;
   2006   }
   2007 
   2008   /* The cast `(dst.type)(src)` produces a value of dst.type. */
   2009   c_emit_local_assign_open(t, dst.v.local, dst.type);
   2010   cbuf_puts(&t->body, "(");
   2011   c_emit_type(t, &t->body, dst.type);
   2012   cbuf_puts(&t->body, ")");
   2013   if (k == CV_SEXT || k == CV_ZEXT) {
   2014     c_emit_operand_signed(t, src, src_signed);
   2015   } else if (k == CV_TRUNC && c_operand_is_ptr_typed(t, src)) {
   2016     /* Casting a pointer directly to a narrower integer trips
   2017      * -Wvoid-pointer-to-int-cast (and -Wpointer-to-int-cast). Bridge
   2018      * through uintptr_t. */
   2019     cbuf_puts(&t->body, "((uintptr_t)");
   2020     c_emit_operand(t, src);
   2021     cbuf_puts(&t->body, ")");
   2022   } else {
   2023     /* TRUNC / FTOI / ITOF / FEXT / FTRUNC: rely on C cast semantics. */
   2024     c_emit_operand(t, src);
   2025   }
   2026   c_emit_local_assign_close(t);
   2027 }
   2028 
   2029 /* === call === */
   2030 
   2031 static KitCgTypeId c_call_arg_type(CTarget* t, const CgType* fty,
   2032                                    const CGCallDesc* d, u32 i) {
   2033   if (i < fty->func.nparams) return fty->func.params[i].type;
   2034   return c_local_type_or_panic(t, d->args[i]);
   2035 }
   2036 
   2037 static void c_emit_call_arg(CTarget* t, const CgType* fty, const CGCallDesc* d,
   2038                             u32 i) {
   2039   KitCgTypeId ty = c_call_arg_type(t, fty, d, i);
   2040   c_ensure_local(t, d->args[i], ty);
   2041   c_emit_operand(t, c_op_local(d->args[i], ty));
   2042 }
   2043 
   2044 /* Render call operand `i`, optionally cast to unsigned __int128 first (used by
   2045  * the unsigned i128 helpers below). */
   2046 static void c_emit_ti_operand(CTarget* t, const CgType* fty,
   2047                               const CGCallDesc* d, u32 i, int as_unsigned) {
   2048   if (as_unsigned) cbuf_puts(&t->body, "(unsigned __int128)(");
   2049   c_emit_call_arg(t, fty, d, i);
   2050   if (as_unsigned) cbuf_puts(&t->body, ")");
   2051 }
   2052 
   2053 /* The CG arithmetic layer (src/cg/arith.c) lowers 128-bit integer operations
   2054  * into calls to runtime helpers: compiler-rt-standard names for mul/div/mod/
   2055  * shift/neg, and __kit_*-prefixed ones for add/sub/bitwise/not/extend/compare
   2056  * — operations that real toolchains inline (so have no compiler-rt symbol), or
   2057  * that use kit's own -1/0/1 compare convention. A C compiler has native
   2058  * __int128, so the portable C backend re-expresses every such call as a native
   2059  * operator: the emitted source then needs neither kit's runtime nor the host's
   2060  * compiler-rt builtins. Returns 1 if it emitted the intrinsic, 0 to fall
   2061  * through to a normal call. */
   2062 static int c_try_emit_ti_intrinsic(CTarget* t, const CgType* fty,
   2063                                    const CGCallDesc* d) {
   2064   if (d->callee.kind != OPK_GLOBAL) return 0;
   2065   const char* n = c_sym_name(t, d->callee.v.global.sym);
   2066   if (!n) return 0;
   2067 
   2068   /* Symmetric binary ops over two i128 operands: (a) OP (b). `u` casts both
   2069    * operands to unsigned __int128 first (unsigned divide/remainder). */
   2070   static const struct {
   2071     const char* name;
   2072     const char* op;
   2073     int u;
   2074   } kBin[] = {
   2075       {"__kit_addti3", "+", 0}, {"__kit_subti3", "-", 0},
   2076       {"__multi3", "*", 0},     {"__kit_andti3", "&", 0},
   2077       {"__kit_orti3", "|", 0},  {"__kit_xorti3", "^", 0},
   2078       {"__divti3", "/", 0},     {"__modti3", "%", 0},
   2079       {"__udivti3", "/", 1},    {"__umodti3", "%", 1},
   2080   };
   2081   if (d->nargs == 2) {
   2082     for (size_t i = 0; i < sizeof kBin / sizeof kBin[0]; ++i) {
   2083       if (strcmp(n, kBin[i].name) != 0) continue;
   2084       cbuf_puts(&t->body, "(");
   2085       c_emit_ti_operand(t, fty, d, 0, kBin[i].u);
   2086       cbuf_puts(&t->body, " ");
   2087       cbuf_puts(&t->body, kBin[i].op);
   2088       cbuf_puts(&t->body, " ");
   2089       c_emit_ti_operand(t, fty, d, 1, kBin[i].u);
   2090       cbuf_puts(&t->body, ")");
   2091       return 1;
   2092     }
   2093   }
   2094 
   2095   /* Shifts: (value) OP (count). The count is a plain int operand, never cast;
   2096    * logical right shift takes an unsigned value. */
   2097   if (d->nargs == 2) {
   2098     const char* sop = NULL;
   2099     int uval = 0;
   2100     if (strcmp(n, "__ashlti3") == 0) {
   2101       sop = "<<";
   2102     } else if (strcmp(n, "__ashrti3") == 0) {
   2103       sop = ">>";
   2104     } else if (strcmp(n, "__lshrti3") == 0) {
   2105       sop = ">>";
   2106       uval = 1;
   2107     }
   2108     if (sop) {
   2109       cbuf_puts(&t->body, "(");
   2110       c_emit_ti_operand(t, fty, d, 0, uval);
   2111       cbuf_puts(&t->body, " ");
   2112       cbuf_puts(&t->body, sop);
   2113       cbuf_puts(&t->body, " ");
   2114       c_emit_call_arg(t, fty, d, 1);
   2115       cbuf_puts(&t->body, ")");
   2116       return 1;
   2117     }
   2118   }
   2119 
   2120   /* Unary ops and i64 -> i128 widening. */
   2121   if (d->nargs == 1) {
   2122     const char* uop = NULL;
   2123     if (strcmp(n, "__negti2") == 0)
   2124       uop = "-";
   2125     else if (strcmp(n, "__kit_notti3") == 0)
   2126       uop = "~";
   2127     if (uop) {
   2128       cbuf_puts(&t->body, "(");
   2129       cbuf_puts(&t->body, uop);
   2130       cbuf_puts(&t->body, "(");
   2131       c_emit_call_arg(t, fty, d, 0);
   2132       cbuf_puts(&t->body, "))");
   2133       return 1;
   2134     }
   2135     if (strcmp(n, "__kit_sext64ti") == 0) {
   2136       cbuf_puts(&t->body, "((__int128)(int64_t)(");
   2137       c_emit_call_arg(t, fty, d, 0);
   2138       cbuf_puts(&t->body, "))");
   2139       return 1;
   2140     }
   2141     if (strcmp(n, "__kit_zext64ti") == 0) {
   2142       cbuf_puts(&t->body, "((unsigned __int128)(uint64_t)(");
   2143       c_emit_call_arg(t, fty, d, 0);
   2144       cbuf_puts(&t->body, "))");
   2145       return 1;
   2146     }
   2147   }
   2148 
   2149   /* Compare: kit's helpers return -1/0/1 (the CG layer compares the result
   2150    * against zero), so reproduce that sign convention with native operators. */
   2151   if (d->nargs == 2) {
   2152     int usign = -1;
   2153     if (strcmp(n, "__kit_cmpti2") == 0)
   2154       usign = 0;
   2155     else if (strcmp(n, "__kit_ucmpti2") == 0)
   2156       usign = 1;
   2157     if (usign >= 0) {
   2158       cbuf_puts(&t->body, "(");
   2159       c_emit_ti_operand(t, fty, d, 0, usign);
   2160       cbuf_puts(&t->body, " < ");
   2161       c_emit_ti_operand(t, fty, d, 1, usign);
   2162       cbuf_puts(&t->body, " ? -1 : (");
   2163       c_emit_ti_operand(t, fty, d, 0, usign);
   2164       cbuf_puts(&t->body, " > ");
   2165       c_emit_ti_operand(t, fty, d, 1, usign);
   2166       cbuf_puts(&t->body, " ? 1 : 0))");
   2167       return 1;
   2168     }
   2169   }
   2170   return 0;
   2171 }
   2172 
   2173 static void c_emit_call_expr(CTarget* t, const CgType* fty,
   2174                              const CGCallDesc* d) {
   2175   if (c_try_emit_ti_intrinsic(t, fty, d)) return;
   2176   if (d->callee.kind == OPK_GLOBAL) {
   2177     c_ensure_forward_decl(t, d->callee.v.global.sym, d->fn_type);
   2178     cbuf_puts(&t->body, c_sym_name(t, d->callee.v.global.sym));
   2179   } else if (d->callee.kind == OPK_LOCAL) {
   2180     const char* fp = c_typedef_name(t, d->fn_type);
   2181     cbuf_puts(&t->body, "((");
   2182     c_ensure_typedef(t, d->fn_type);
   2183     cbuf_puts(&t->body, fp);
   2184     cbuf_puts(&t->body, ")");
   2185     c_emit_operand(t, d->callee);
   2186     cbuf_puts(&t->body, ")");
   2187   } else {
   2188     compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
   2189                    "C target: callee kind %d not supported",
   2190                    (int)d->callee.kind);
   2191   }
   2192 
   2193   cbuf_puts(&t->body, "(");
   2194   for (u32 i = 0; i < d->nargs; ++i) {
   2195     if (i > 0) cbuf_puts(&t->body, ", ");
   2196     c_emit_call_arg(t, fty, d, i);
   2197   }
   2198   cbuf_puts(&t->body, ")");
   2199 }
   2200 
   2201 const char* c_emit_tail_call_unrealizable_reason(CTarget* t,
   2202                                                  const CGCallDesc* d) {
   2203   return c_emit_tail_call_unrealizable_reason_for(t, t->cur_fn, d);
   2204 }
   2205 
   2206 const char* c_emit_tail_call_unrealizable_reason_for(
   2207     CTarget* t, const CGFuncDesc* caller_fd, const CGCallDesc* d) {
   2208   SrcLoc loc = caller_fd ? caller_fd->loc : (SrcLoc){0, 0, 0};
   2209   const CgType* fty = cg_type_get(t->c, api_unalias_type(t->c, d->fn_type));
   2210   if (!fty || fty->kind != KIT_CG_TYPE_FUNC) {
   2211     compiler_panic(t->c, loc, "C target: tail call: bad fn_type");
   2212   }
   2213   const CgType* caller =
   2214       caller_fd ? cg_type_get(t->c, api_unalias_type(t->c, caller_fd->fn_type))
   2215                 : NULL;
   2216   if (!caller || caller->kind != KIT_CG_TYPE_FUNC) {
   2217     compiler_panic(t->c, loc, "C target: tail call outside function");
   2218   }
   2219   if (caller->func.abi_variadic) {
   2220     return "C target: caller variadic tail call not yet supported by clang "
   2221            "musttail";
   2222   }
   2223   if (fty->func.abi_variadic) {
   2224     return "C target: variadic tail call not yet supported by clang musttail";
   2225   }
   2226   if (caller->func.nparams != fty->func.nparams) {
   2227     return "C target: tail call with differing parameter counts not yet "
   2228            "supported by clang musttail";
   2229   }
   2230   return NULL;
   2231 }
   2232 
   2233 void c_emit_call(CTarget* t, const CGCallDesc* d) {
   2234   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2235 
   2236   const CgType* fty = cg_type_get(t->c, api_unalias_type(t->c, d->fn_type));
   2237   if (!fty || fty->kind != KIT_CG_TYPE_FUNC) {
   2238     compiler_panic(t->c, loc, "C target: call: bad fn_type");
   2239   }
   2240   KitCgTypeId ret_type = cg_func_ret_type(fty);
   2241   int is_tail = (d->flags & CG_CALL_TAIL) != 0;
   2242 
   2243   if (is_tail) {
   2244     cbuf_puts(&t->body, "  __attribute__((musttail)) return ");
   2245     c_emit_call_expr(t, fty, d);
   2246     cbuf_puts(&t->body, ";\n");
   2247     t->last_was_terminator = 1;
   2248   } else if (d->result == CG_LOCAL_NONE) {
   2249     cbuf_puts(&t->body, "  ");
   2250     c_emit_call_expr(t, fty, d);
   2251     cbuf_puts(&t->body, ";\n");
   2252   } else {
   2253     c_ensure_local(t, d->result, ret_type);
   2254     c_emit_local_assign_open(t, d->result, ret_type);
   2255     c_emit_call_expr(t, fty, d);
   2256     c_emit_local_assign_close(t);
   2257   }
   2258 }
   2259 
   2260 /* === load / store === */
   2261 
   2262 void c_emit_load(CTarget* t, Operand dst, Operand addr, MemAccess m) {
   2263   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2264   if (dst.kind != OPK_LOCAL) {
   2265     compiler_panic(t->c, loc, "C target: load dst must be LOCAL");
   2266   }
   2267   c_ensure_local(t, dst.v.local, dst.type);
   2268   KitCgTypeId access_ty = m.type ? m.type : dst.type;
   2269   if (c_type_is_aggregate(t, access_ty) && !c_type_is_aggregate(t, dst.type))
   2270     access_ty = dst.type;
   2271   /* The deref `*(access_ty*)addr` produces a value of access_ty. */
   2272   c_emit_local_assign_open(t, dst.v.local, access_ty);
   2273   c_emit_addr_deref(t, addr, access_ty);
   2274   c_emit_local_assign_close(t);
   2275 }
   2276 
   2277 void c_emit_store(CTarget* t, Operand addr, Operand src, MemAccess m) {
   2278   KitCgTypeId access_ty = m.type ? m.type : src.type;
   2279   if (c_type_is_aggregate(t, access_ty) && !c_type_is_aggregate(t, src.type))
   2280     access_ty = src.type;
   2281   cbuf_puts(&t->body, "  ");
   2282   c_emit_addr_deref(t, addr, access_ty);
   2283   /* c_emit_operand_as bridges int/ptr crossings through uintptr_t so
   2284    * roundtrips don't trip `-Wint-conversion`. */
   2285   cbuf_puts(&t->body, " = ");
   2286   c_emit_operand_as(t, src, access_ty);
   2287   cbuf_puts(&t->body, ";\n");
   2288 }
   2289 
   2290 void c_emit_addr_of(CTarget* t, Operand dst, Operand lv) {
   2291   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2292   if (dst.kind != OPK_LOCAL) {
   2293     compiler_panic(t->c, loc, "C target: addr_of dst must be LOCAL");
   2294   }
   2295   c_ensure_local(t, dst.v.local, dst.type);
   2296   /* `c_emit_lvalue_addr` casts its output to dst.type already. */
   2297   c_emit_local_assign_open(t, dst.v.local, dst.type);
   2298   c_emit_lvalue_addr(t, lv, dst.type);
   2299   c_emit_local_assign_close(t);
   2300 }
   2301 
   2302 void c_emit_ret(CTarget* t, CGLocal value) {
   2303   /* Already-terminated block: this ret is unreachable (the frontend's
   2304    * defensive `return 0;` epilogue lands here right after a user return). */
   2305   if (t->last_was_terminator) return;
   2306   /* CG emits a defensive void-return epilogue at the end of every function. For
   2307    * a non-void function that's unreachable; emitting a bare `return;` would
   2308    * trip -Wreturn-type. Spell it as `__builtin_unreachable()` so the host C
   2309    * compiler sees the path is dead without us inventing a fake value. */
   2310   if (value == CG_LOCAL_NONE && t->cur_fn) {
   2311     if (t->cur_fn->result_type != KIT_CG_TYPE_NONE) {
   2312       cbuf_puts(&t->body, "  __builtin_unreachable();\n");
   2313       t->last_was_terminator = 1;
   2314       return;
   2315     }
   2316   }
   2317   cbuf_puts(&t->body, "  return");
   2318   if (value != CG_LOCAL_NONE) {
   2319     cbuf_puts(&t->body, " ");
   2320     KitCgTypeId ret_type = t->cur_fn ? t->cur_fn->result_type : (KitCgTypeId)0;
   2321     const CgType* rty =
   2322         ret_type ? cg_type_get(t->c, api_unalias_type(t->c, ret_type)) : NULL;
   2323     int is_aggregate = rty && (rty->kind == KIT_CG_TYPE_RECORD ||
   2324                                rty->kind == KIT_CG_TYPE_ARRAY);
   2325     if (ret_type && !is_aggregate) {
   2326       KitCgTypeId value_ty = c_local_type_or_panic(t, value);
   2327       c_emit_operand_as(t, c_op_local(value, value_ty), ret_type);
   2328     } else {
   2329       c_emit_operand(t, c_op_local(value, ret_type));
   2330     }
   2331   }
   2332   cbuf_puts(&t->body, ";\n");
   2333   t->last_was_terminator = 1;
   2334 }
   2335 
   2336 /* === unreachable ===
   2337  * Control terminator for statically-unreachable code (the C
   2338  * __builtin_unreachable point). Ends the basic block; emit the host
   2339  * compiler's `__builtin_unreachable()` so it sees the path is dead. */
   2340 void c_emit_unreachable(CTarget* t) {
   2341   if (t->last_was_terminator) return;
   2342   cbuf_puts(&t->body, "  __builtin_unreachable();\n");
   2343   t->last_was_terminator = 1;
   2344 }
   2345 
   2346 /* === alias ===
   2347  * `kit_cg_alias` makes alias_sym refer to target_sym's body. In obj-file
   2348  * land that's two ObjSyms sharing a (section_id, value); in C source we
   2349  * have to spell it out:
   2350  *
   2351  *   ELF/PE   → `Ret alias(args) __attribute__((alias("target")));`
   2352  *              Single definition, true aliasing, &alias == &target.
   2353  *   Mach-O   → emit a thunk `Ret alias(args) { return target(args); }`.
   2354  *              Clang on Darwin rejects __attribute__((alias)) outright,
   2355  *              so we fall back to a wrapper. Loses the `&alias==&target`
   2356  *              identity but preserves call-through semantics, which is
   2357  *              all the kit-emitted code path needs.
   2358  *
   2359  * The emitted decl serves as the alias definition AND a forward prototype
   2360  * for callers, so we mark sym_forwarded to dedup against a later c_call. */
   2361 void c_emit_alias(CTarget* t, ObjSymId alias_sym, ObjSymId target_sym,
   2362                   KitCgTypeId type) {
   2363   Heap* h = t->c->ctx->heap;
   2364   if ((u32)alias_sym >= t->sym_forwarded_cap) {
   2365     u32 newcap = t->sym_forwarded_cap ? t->sym_forwarded_cap : 16;
   2366     while (newcap <= (u32)alias_sym) newcap *= 2;
   2367     u8* nd =
   2368         (u8*)h->realloc(h, t->sym_forwarded, t->sym_forwarded_cap, newcap, 1);
   2369     if (!nd && newcap) {
   2370       compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: out of memory");
   2371     }
   2372     for (u32 i = t->sym_forwarded_cap; i < newcap; ++i) nd[i] = 0;
   2373     t->sym_forwarded = nd;
   2374     t->sym_forwarded_cap = newcap;
   2375   }
   2376   if (t->sym_forwarded[alias_sym]) return;
   2377   t->sym_forwarded[alias_sym] = 1;
   2378   const char* alias_name = c_sym_name(t, alias_sym);
   2379   const char* target_name = c_sym_name(t, target_sym);
   2380   const CgType* fty = cg_type_get(t->c, api_unalias_type(t->c, type));
   2381   int is_func = fty && fty->kind == KIT_CG_TYPE_FUNC;
   2382 
   2383   const ObjFormatImpl* fmt = obj_format_lookup(t->c->target.obj);
   2384   if (!fmt || !fmt->alias_via_thunk) {
   2385     /* Attribute form. Works for both function and object aliases on ELF
   2386      * and PE/COFF. */
   2387     c_emit_func_signature(t, &t->forwards, alias_name, type);
   2388     cbuf_puts(&t->forwards, " __attribute__((alias(\"");
   2389     cbuf_puts(&t->forwards, target_name);
   2390     cbuf_puts(&t->forwards, "\")));\n");
   2391     return;
   2392   }
   2393 
   2394   /* Mach-O thunk fallback. Functions only for v1 — object aliases on
   2395    * Darwin would need a more elaborate scheme (see doc/CBACKEND.md). */
   2396   if (!is_func) {
   2397     compiler_panic(t->c, (SrcLoc){0, 0, 0},
   2398                    "C target: object alias on Mach-O not yet supported");
   2399   }
   2400   /* Forward prototype for the target (its full definition lands separately
   2401    * via c_func_begin). Also dedup that. */
   2402   c_ensure_forward_decl(t, target_sym, type);
   2403   /* `static`? No — alias must be externally visible. */
   2404   c_emit_func_signature(t, &t->forwards, alias_name, type);
   2405   cbuf_puts(&t->forwards, " { ");
   2406   KitCgTypeId ret_type = cg_type_func_ret_id(t->c, type);
   2407   if (!cg_type_is_void(t->c, ret_type)) cbuf_puts(&t->forwards, "return ");
   2408   cbuf_puts(&t->forwards, target_name);
   2409   cbuf_puts(&t->forwards, "(");
   2410   for (u32 i = 0; i < fty->func.nparams; ++i) {
   2411     if (i > 0) cbuf_puts(&t->forwards, ", ");
   2412     cbuf_puts(&t->forwards, "p");
   2413     cbuf_put_u64(&t->forwards, (u64)i);
   2414   }
   2415   cbuf_puts(&t->forwards, "); }\n");
   2416 }
   2417 
   2418 /* === intrinsic ===
   2419  *
   2420  * All kit IntrinKinds map onto gcc/clang `__builtin_*` builtins, which
   2421  * the host C compiler then turns into the appropriate sequence (inline op,
   2422  * libcall, runtime CAS, etc.). This is exactly the seam the doc described:
   2423  * kit records intent, the downstream toolchain picks the mechanism.
   2424  *
   2425  * Operand shapes follow arch.h §IntrinKind. */
   2426 
   2427 static const char* c_bitop_builtin(IntrinKind k, u32 width) {
   2428   switch (k) {
   2429     case INTRIN_POPCOUNT:
   2430       if (width == 32) return "__builtin_popcount";
   2431       if (width == 64) return "__builtin_popcountll";
   2432       if (width == 16 || width == 8) return "__builtin_popcount";
   2433       return NULL;
   2434     case INTRIN_CTZ:
   2435       if (width == 32) return "__builtin_ctz";
   2436       if (width == 64) return "__builtin_ctzll";
   2437       if (width == 16 || width == 8) return "__builtin_ctz";
   2438       return NULL;
   2439     case INTRIN_CLZ:
   2440       if (width == 32) return "__builtin_clz";
   2441       if (width == 64) return "__builtin_clzll";
   2442       if (width == 16 || width == 8) return "__builtin_clz";
   2443       return NULL;
   2444     case INTRIN_BSWAP:
   2445       if (width == 16) return "__builtin_bswap16";
   2446       if (width == 32) return "__builtin_bswap32";
   2447       if (width == 64) return "__builtin_bswap64";
   2448       return NULL;
   2449     default:
   2450       return NULL;
   2451   }
   2452 }
   2453 
   2454 static const char* c_overflow_builtin(IntrinKind k) {
   2455   switch (k) {
   2456     case INTRIN_SADD_OVERFLOW:
   2457     case INTRIN_UADD_OVERFLOW:
   2458       return "__builtin_add_overflow";
   2459     case INTRIN_SSUB_OVERFLOW:
   2460     case INTRIN_USUB_OVERFLOW:
   2461       return "__builtin_sub_overflow";
   2462     case INTRIN_SMUL_OVERFLOW:
   2463     case INTRIN_UMUL_OVERFLOW:
   2464       return "__builtin_mul_overflow";
   2465     default:
   2466       return NULL;
   2467   }
   2468 }
   2469 
   2470 void c_emit_intrinsic(CTarget* t, IntrinKind k, Operand* dsts, u32 ndst,
   2471                       const Operand* args, u32 narg) {
   2472   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2473   switch (k) {
   2474     case INTRIN_TRAP:
   2475       cbuf_puts(&t->body, "  __builtin_trap();\n");
   2476       return;
   2477     case INTRIN_PREFETCH: {
   2478       cbuf_puts(&t->body, "  __builtin_prefetch(");
   2479       for (u32 i = 0; i < narg; ++i) {
   2480         if (i > 0) cbuf_puts(&t->body, ", ");
   2481         c_emit_operand(t, args[i]);
   2482       }
   2483       cbuf_puts(&t->body, ");\n");
   2484       return;
   2485     }
   2486     case INTRIN_ASSUME_ALIGNED: {
   2487       /* dsts[0] is the result local (pointer); args = (ptr, align [, ofs]) */
   2488       if (ndst != 1) {
   2489         compiler_panic(t->c, loc,
   2490                        "C target: assume_aligned: expected 1 dst, got %u",
   2491                        (unsigned)ndst);
   2492       }
   2493       c_ensure_local(t, dsts[0].v.local, dsts[0].type);
   2494       /* Returns void*; bridge to dst pointer type. */
   2495       c_emit_local_assign_open(t, dsts[0].v.local, (KitCgTypeId)0);
   2496       cbuf_puts(&t->body, "__builtin_assume_aligned(");
   2497       for (u32 i = 0; i < narg; ++i) {
   2498         if (i > 0) cbuf_puts(&t->body, ", ");
   2499         c_emit_operand(t, args[i]);
   2500       }
   2501       cbuf_puts(&t->body, ")");
   2502       c_emit_local_assign_close(t);
   2503       return;
   2504     }
   2505     case INTRIN_EXPECT: {
   2506       /* dsts[0] = __builtin_expect(args[0], args[1]) but typed via long. */
   2507       if (ndst != 1 || narg != 2) {
   2508         compiler_panic(t->c, loc,
   2509                        "C target: expect: bad shape (ndst=%u narg=%u)",
   2510                        (unsigned)ndst, (unsigned)narg);
   2511       }
   2512       c_ensure_local(t, dsts[0].v.local, dsts[0].type);
   2513       /* Returns `long`; dst.type may be a narrower int — keep the bridge. */
   2514       c_emit_local_assign_open(t, dsts[0].v.local, (KitCgTypeId)0);
   2515       cbuf_puts(&t->body, "__builtin_expect((long)");
   2516       c_emit_operand(t, args[0]);
   2517       cbuf_puts(&t->body, ", (long)");
   2518       c_emit_operand(t, args[1]);
   2519       cbuf_puts(&t->body, ")");
   2520       c_emit_local_assign_close(t);
   2521       return;
   2522     }
   2523     case INTRIN_POPCOUNT:
   2524     case INTRIN_CTZ:
   2525     case INTRIN_CLZ:
   2526     case INTRIN_BSWAP: {
   2527       if (ndst != 1 || narg != 1) {
   2528         compiler_panic(t->c, loc,
   2529                        "C target: bit-intrin: bad shape (ndst=%u narg=%u)",
   2530                        (unsigned)ndst, (unsigned)narg);
   2531       }
   2532       /* bswap width is determined by the result type (in bytes -> bit-width
   2533        * bucket, matching the old per-width intrinsic split). The other bit
   2534        * ops keep deriving width from the operand. */
   2535       u32 w;
   2536       if (k == INTRIN_BSWAP) {
   2537         u32 bytes = (u32)cg_type_size(t->c, dsts[0].type);
   2538         w = bytes <= 2 ? 16u : (bytes <= 4 ? 32u : 64u);
   2539       } else {
   2540         w = c_int_width_for_signedness(t, args[0].type);
   2541       }
   2542       const char* fn = c_bitop_builtin(k, w);
   2543       if (!fn) {
   2544         compiler_panic(t->c, loc, "C target: bit-intrin width %u unsupported",
   2545                        (unsigned)w);
   2546       }
   2547       c_ensure_local(t, dsts[0].v.local, dsts[0].type);
   2548       /* __builtin_popcount/ctz/clz return `int`; bswap returns its input
   2549        * type. Narrow to dst.type via the bridge. */
   2550       c_emit_local_assign_open(t, dsts[0].v.local, (KitCgTypeId)0);
   2551       cbuf_puts(&t->body, fn);
   2552       cbuf_puts(&t->body, "(");
   2553       c_emit_operand(t, args[0]);
   2554       cbuf_puts(&t->body, ")");
   2555       c_emit_local_assign_close(t);
   2556       return;
   2557     }
   2558     case INTRIN_MEMMOVE: {
   2559       cbuf_puts(&t->body, "  __builtin_memmove(");
   2560       for (u32 i = 0; i < narg; ++i) {
   2561         if (i > 0) cbuf_puts(&t->body, ", ");
   2562         /* The pointer operands (dst and src) may be typed as a plain integer
   2563          * local when they come from address arithmetic, which the C target
   2564          * declares as int64_t. __builtin_memmove takes void*, so cast
   2565          * explicitly to avoid -Wint-conversion. */
   2566         int is_ptr_arg = (i == 0) || (i == 1);
   2567         if (is_ptr_arg) cbuf_puts(&t->body, "(void*)");
   2568         c_emit_operand(t, args[i]);
   2569       }
   2570       cbuf_puts(&t->body, ");\n");
   2571       return;
   2572     }
   2573     case INTRIN_SADD_OVERFLOW:
   2574     case INTRIN_UADD_OVERFLOW:
   2575     case INTRIN_SSUB_OVERFLOW:
   2576     case INTRIN_USUB_OVERFLOW:
   2577     case INTRIN_SMUL_OVERFLOW:
   2578     case INTRIN_UMUL_OVERFLOW: {
   2579       /* dsts[0] = value local, dsts[1] = i1 overflow flag.
   2580        *
   2581        * Signedness comes from the intrinsic kind, but kit's CG int type
   2582        * is width-only and the C target declares every result as a signed
   2583        * fixed-width (int{8,16,32,64}_t). __builtin_*_overflow keys its
   2584        * overflow check on the result type, so passing the signed local
   2585        * directly makes a UADD test as if it were signed and miss true
   2586        * unsigned overflow. Wrap the call in a block with a scratch result
   2587        * of the right signedness and copy it back through the int/uint
   2588        * bridge. */
   2589       if (ndst != 2 || narg != 2) {
   2590         compiler_panic(t->c, loc, "C target: overflow-intrin: bad shape");
   2591       }
   2592       int is_unsigned =
   2593           (k == INTRIN_UADD_OVERFLOW || k == INTRIN_USUB_OVERFLOW ||
   2594            k == INTRIN_UMUL_OVERFLOW);
   2595       const char* fn = c_overflow_builtin(k);
   2596       c_ensure_local(t, dsts[0].v.local, dsts[0].type);
   2597       c_ensure_local(t, dsts[1].v.local, dsts[1].type);
   2598       char vbuf[24], obuf[24];
   2599       c_local_name(dsts[0].v.local, vbuf, sizeof vbuf);
   2600       c_local_name(dsts[1].v.local, obuf, sizeof obuf);
   2601       u32 w = c_int_width_for_signedness(t, dsts[0].type);
   2602       const char* sty = c_int_type_name_for_width(w, !is_unsigned);
   2603       if (!sty) {
   2604         compiler_panic(t->c, loc,
   2605                        "C target: overflow-intrin: unsupported width %u",
   2606                        (unsigned)w);
   2607       }
   2608       cbuf_puts(&t->body, "  { ");
   2609       cbuf_puts(&t->body, sty);
   2610       cbuf_puts(&t->body, " __ovsc; ");
   2611       cbuf_puts(&t->body, obuf);
   2612       cbuf_puts(&t->body, " = (");
   2613       c_emit_type(t, &t->body, dsts[1].type);
   2614       cbuf_puts(&t->body, ")");
   2615       cbuf_puts(&t->body, fn);
   2616       cbuf_puts(&t->body, "((");
   2617       cbuf_puts(&t->body, sty);
   2618       cbuf_puts(&t->body, ")");
   2619       c_emit_operand(t, args[0]);
   2620       cbuf_puts(&t->body, ", (");
   2621       cbuf_puts(&t->body, sty);
   2622       cbuf_puts(&t->body, ")");
   2623       c_emit_operand(t, args[1]);
   2624       cbuf_puts(&t->body, ", &__ovsc); ");
   2625       cbuf_puts(&t->body, vbuf);
   2626       cbuf_puts(&t->body, " = (");
   2627       c_emit_type(t, &t->body, dsts[0].type);
   2628       cbuf_puts(&t->body, ")__ovsc; }\n");
   2629       return;
   2630     }
   2631     case INTRIN_SETJMP: {
   2632       t->need_setjmp = 1;
   2633       if (ndst != 1 || narg != 1) {
   2634         compiler_panic(t->c, loc, "C target: setjmp: bad shape");
   2635       }
   2636       c_ensure_local(t, dsts[0].v.local, dsts[0].type);
   2637       /* setjmp returns `int`; bridge to dst.type. */
   2638       c_emit_local_assign_open(t, dsts[0].v.local, (KitCgTypeId)0);
   2639       cbuf_puts(&t->body, "setjmp(*(jmp_buf*)(");
   2640       c_emit_operand(t, args[0]);
   2641       cbuf_puts(&t->body, "))");
   2642       c_emit_local_assign_close(t);
   2643       return;
   2644     }
   2645     case INTRIN_LONGJMP: {
   2646       t->need_setjmp = 1;
   2647       cbuf_puts(&t->body, "  longjmp(*(jmp_buf*)(");
   2648       c_emit_operand(t, args[0]);
   2649       cbuf_puts(&t->body, "), (int)");
   2650       c_emit_operand(t, args[1]);
   2651       cbuf_puts(&t->body, ");\n");
   2652       return;
   2653     }
   2654     case INTRIN_FRAME_ADDRESS:
   2655     case INTRIN_RETURN_ADDRESS: {
   2656       /* Forward straight to the host compiler's builtin. dsts[0] is the void*
   2657        * result; args[0] is the constant level. The builtin requires a bare
   2658        * integer constant, so emit the level as a plain decimal (not via
   2659        * c_emit_operand, which wraps IMMs in a cast). */
   2660       char nbuf[24];
   2661       unsigned level =
   2662           (narg >= 1 && args[0].kind == OPK_IMM) ? (unsigned)args[0].v.imm : 0u;
   2663       if (ndst != 1) {
   2664         compiler_panic(t->c, loc,
   2665                        "C target: frame/return address: expected 1 dst, got %u",
   2666                        (unsigned)ndst);
   2667       }
   2668       snprintf(nbuf, sizeof nbuf, "%u", level);
   2669       c_ensure_local(t, dsts[0].v.local, dsts[0].type);
   2670       c_emit_local_assign_open(t, dsts[0].v.local, (KitCgTypeId)0);
   2671       cbuf_puts(&t->body, k == INTRIN_FRAME_ADDRESS
   2672                               ? "__builtin_frame_address("
   2673                               : "__builtin_return_address(");
   2674       cbuf_puts(&t->body, nbuf);
   2675       cbuf_puts(&t->body, ")");
   2676       c_emit_local_assign_close(t);
   2677       return;
   2678     }
   2679     case INTRIN_SYSCALL:
   2680       compiler_panic(t->c, loc, "C target: syscall intrinsic not supported");
   2681       return;
   2682     case INTRIN_NONE:
   2683     default:
   2684       compiler_panic(t->c, loc, "C target: intrinsic kind %d not handled",
   2685                      (int)k);
   2686   }
   2687 }
   2688 
   2689 /* === alloca === */
   2690 
   2691 void c_emit_alloca(CTarget* t, Operand dst, Operand size, u32 align) {
   2692   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2693   if (dst.kind != OPK_LOCAL) {
   2694     compiler_panic(t->c, loc, "C target: alloca dst must be LOCAL");
   2695   }
   2696   c_ensure_local(t, dst.v.local, dst.type);
   2697   /* __builtin_alloca returns `void*`; dst.type is typically void* too. */
   2698   c_emit_local_assign_open(t, dst.v.local, dst.type);
   2699   if (align > 1) {
   2700     /* gcc has __builtin_alloca_with_align taking bits, not bytes. */
   2701     cbuf_puts(&t->body, "__builtin_alloca_with_align(");
   2702     c_emit_operand(t, size);
   2703     cbuf_puts(&t->body, ", ");
   2704     cbuf_put_u64(&t->body, (u64)align * 8u);
   2705     cbuf_puts(&t->body, ")");
   2706   } else {
   2707     cbuf_puts(&t->body, "__builtin_alloca(");
   2708     c_emit_operand(t, size);
   2709     cbuf_puts(&t->body, ")");
   2710   }
   2711   c_emit_local_assign_close(t);
   2712 }
   2713 
   2714 /* === varargs ===
   2715  *
   2716  * The C-target va_list is the host toolchain's `va_list` from <stdarg.h>.
   2717  * The first arg of all va_* is `ap_addr` - the address of the va_list local.
   2718  * We deref to get the va_list lvalue C's macros expect. */
   2719 
   2720 void c_emit_va_start(CTarget* t, Operand ap_addr) {
   2721   t->need_stdarg = 1;
   2722   /* va_start needs the "last named parameter". CG doesn't pass that to the
   2723    * backend; gcc/clang accept any non-modified ident here for variadic
   2724    * compatibility — feed the synthesized parameter name `p<nparams-1>` from
   2725    * the enclosing function. */
   2726   const CGFuncDesc* fd = t->cur_fn;
   2727   SrcLoc loc = fd ? fd->loc : (SrcLoc){0, 0, 0};
   2728   if (!fd) compiler_panic(t->c, loc, "C target: va_start outside function");
   2729   const CgType* fty = cg_type_get(t->c, api_unalias_type(t->c, fd->fn_type));
   2730   if (!fty || fty->kind != KIT_CG_TYPE_FUNC || fty->func.nparams == 0) {
   2731     compiler_panic(t->c, loc,
   2732                    "C target: va_start in non-variadic function shape");
   2733   }
   2734   cbuf_puts(&t->body, "  __builtin_va_start(*(va_list*)(");
   2735   c_emit_operand(t, ap_addr);
   2736   cbuf_puts(&t->body, "), p");
   2737   cbuf_put_u64(&t->body, (u64)(fty->func.nparams - 1u));
   2738   cbuf_puts(&t->body, ");\n");
   2739 }
   2740 
   2741 void c_emit_va_end(CTarget* t, Operand ap_addr) {
   2742   cbuf_puts(&t->body, "  __builtin_va_end(*(va_list*)(");
   2743   c_emit_operand(t, ap_addr);
   2744   cbuf_puts(&t->body, "));\n");
   2745 }
   2746 
   2747 void c_emit_va_copy(CTarget* t, Operand dst_addr, Operand src_addr) {
   2748   cbuf_puts(&t->body, "  __builtin_va_copy(*(va_list*)(");
   2749   c_emit_operand(t, dst_addr);
   2750   cbuf_puts(&t->body, "), *(va_list*)(");
   2751   c_emit_operand(t, src_addr);
   2752   cbuf_puts(&t->body, "));\n");
   2753 }
   2754 
   2755 void c_emit_va_arg(CTarget* t, Operand dst, Operand ap_addr, KitCgTypeId ty) {
   2756   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2757   if (dst.kind != OPK_LOCAL) {
   2758     compiler_panic(t->c, loc, "C target: va_arg dst must be LOCAL");
   2759   }
   2760   c_ensure_local(t, dst.v.local, dst.type);
   2761   /* __builtin_va_arg yields a value of `ty`. */
   2762   c_emit_local_assign_open(t, dst.v.local, ty);
   2763   cbuf_puts(&t->body, "__builtin_va_arg(*(va_list*)(");
   2764   c_emit_operand(t, ap_addr);
   2765   cbuf_puts(&t->body, "), ");
   2766   c_emit_type(t, &t->body, ty);
   2767   cbuf_puts(&t->body, ")");
   2768   c_emit_local_assign_close(t);
   2769 }
   2770 
   2771 /* === copy_bytes / set_bytes === */
   2772 
   2773 void c_emit_copy_bytes(CTarget* t, Operand dst_addr, Operand src_addr,
   2774                        AggregateAccess m) {
   2775   c_assert_no_index(t, dst_addr, "copy_bytes dst");
   2776   c_assert_no_index(t, src_addr, "copy_bytes src");
   2777   /* dst/src may be plain integer regs from address arithmetic (declared
   2778    * int64_t); __builtin_memcpy takes void*, so cast to avoid
   2779    * -Wint-conversion. */
   2780   cbuf_puts(&t->body, "  __builtin_memcpy((void*)");
   2781   c_emit_copy_addr(t, dst_addr);
   2782   cbuf_puts(&t->body, ", (void*)");
   2783   c_emit_copy_addr(t, src_addr);
   2784   cbuf_puts(&t->body, ", ");
   2785   cbuf_put_u64(&t->body, (u64)m.size);
   2786   cbuf_puts(&t->body, ");\n");
   2787 }
   2788 
   2789 static void c_emit_copy_addr(CTarget* t, Operand addr) {
   2790   char buf[24];
   2791   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2792   switch (addr.kind) {
   2793     case OPK_LOCAL:
   2794       c_ensure_local(t, addr.v.local, addr.type);
   2795       if (c_operand_is_ptr_typed(t, addr)) {
   2796         c_emit_operand(t, addr);
   2797       } else {
   2798         cbuf_putc(&t->body, '&');
   2799         c_local_name(addr.v.local, buf, sizeof buf);
   2800         cbuf_puts(&t->body, buf);
   2801       }
   2802       return;
   2803     case OPK_GLOBAL: {
   2804       obj_sym_mark_referenced(t->obj, addr.v.global.sym);
   2805       cbuf_puts(&t->body, "((char*)&");
   2806       cbuf_puts(&t->body, c_sym_name(t, addr.v.global.sym));
   2807       if (addr.v.global.addend != 0) {
   2808         cbuf_puts(&t->body, " + ");
   2809         cbuf_put_i64(&t->body, addr.v.global.addend);
   2810       }
   2811       cbuf_putc(&t->body, ')');
   2812       return;
   2813     }
   2814     case OPK_INDIRECT:
   2815       c_emit_indirect_addr_expr(t, c_addr_mode(addr));
   2816       return;
   2817     default:
   2818       compiler_panic(t->c, loc,
   2819                      "C target: copy_bytes address operand kind %d not "
   2820                      "supported",
   2821                      (int)addr.kind);
   2822   }
   2823 }
   2824 
   2825 void c_emit_set_bytes(CTarget* t, Operand dst_addr, Operand byte_value,
   2826                       AggregateAccess m) {
   2827   c_assert_no_index(t, dst_addr, "set_bytes dst");
   2828   /* dst may be a plain integer local from address arithmetic (declared
   2829    * int64_t); __builtin_memset takes void*, so cast to avoid
   2830    * -Wint-conversion. */
   2831   cbuf_puts(&t->body, "  __builtin_memset((void*)");
   2832   c_emit_copy_addr(t, dst_addr);
   2833   cbuf_puts(&t->body, ", (int)");
   2834   c_emit_operand(t, byte_value);
   2835   cbuf_puts(&t->body, ", ");
   2836   cbuf_put_u64(&t->body, (u64)m.size);
   2837   cbuf_puts(&t->body, ");\n");
   2838 }
   2839 
   2840 /* === TLS ===
   2841  *
   2842  * Thread-local data is emitted as `_Thread_local _Alignas(A) uint8_t name[N];`
   2843  * during c_emit_data, and tls_addr_of spells `((char*)&name + addend)` with
   2844  * the requested pointer type. The host C compiler picks the TLS model. */
   2845 
   2846 void c_emit_tls_addr_of(CTarget* t, Operand dst, ObjSymId sym, i64 addend);
   2847 
   2848 void c_emit_tls_addr_of(CTarget* t, Operand dst, ObjSymId sym, i64 addend) {
   2849   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2850   if (dst.kind != OPK_LOCAL) {
   2851     compiler_panic(t->c, loc, "C target: tls_addr_of dst must be LOCAL");
   2852   }
   2853   c_ensure_local(t, dst.v.local, dst.type);
   2854   const char* nm = c_sym_name(t, sym);
   2855   /* RHS spells `(char*)&sym + addend` — pointer type that may not match
   2856    * dst.type; keep the bridge to cast through cleanly. */
   2857   c_emit_local_assign_open(t, dst.v.local, (KitCgTypeId)0);
   2858   cbuf_puts(&t->body, "((char*)&");
   2859   cbuf_puts(&t->body, nm);
   2860   if (addend != 0) {
   2861     cbuf_puts(&t->body, " + ");
   2862     cbuf_put_i64(&t->body, addend);
   2863   }
   2864   cbuf_puts(&t->body, ")");
   2865   c_emit_local_assign_close(t);
   2866 }
   2867 
   2868 /* === bitfields ===
   2869  *
   2870  * kit CG flattens bitfields to (storage_type, byte_offset, bit_offset,
   2871  * bit_width) at the access boundary, so the C target never sees a C-level
   2872  * bitfield declaration. We extract/insert via explicit mask+shift on the
   2873  * underlying storage unit (a fixed-width unsigned int loaded through the
   2874  * usual address-deref path), which sidesteps the C bitfield ABI ambiguity
   2875  * entirely. */
   2876 
   2877 void c_emit_bitfield_load(CTarget* t, Operand dst, Operand addr,
   2878                           BitFieldAccess bf);
   2879 void c_emit_bitfield_store(CTarget* t, Operand addr, Operand src,
   2880                            BitFieldAccess bf);
   2881 
   2882 /* Returns the unsigned C integer type matching the storage-unit byte size. */
   2883 static const char* c_bf_storage_type(u32 size) {
   2884   switch (size) {
   2885     case 1:
   2886       return "uint8_t";
   2887     case 2:
   2888       return "uint16_t";
   2889     case 4:
   2890       return "uint32_t";
   2891     case 8:
   2892       return "uint64_t";
   2893     default:
   2894       return NULL;
   2895   }
   2896 }
   2897 
   2898 /* Spell an address expression for a backend-addressable lvalue operand.
   2899  * Unlike c_emit_operand, this never reads the object value; it materializes
   2900  * the address of the local/global/indirect storage itself. */
   2901 static void c_emit_lvalue_addr_expr_raw(CTarget* t, Operand addr) {
   2902   char buf[24];
   2903   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2904   switch (addr.kind) {
   2905     case OPK_LOCAL:
   2906       cbuf_putc(&t->body, '&');
   2907       c_ensure_local(t, addr.v.local, addr.type);
   2908       c_local_name(addr.v.local, buf, sizeof buf);
   2909       cbuf_puts(&t->body, buf);
   2910       return;
   2911     case OPK_GLOBAL: {
   2912       obj_sym_mark_referenced(t->obj, addr.v.global.sym);
   2913       const char* nm = c_sym_name(t, addr.v.global.sym);
   2914       cbuf_puts(&t->body, "((char*)&");
   2915       cbuf_puts(&t->body, nm);
   2916       if (addr.v.global.addend != 0) {
   2917         cbuf_puts(&t->body, " + ");
   2918         cbuf_put_i64(&t->body, addr.v.global.addend);
   2919       }
   2920       cbuf_putc(&t->body, ')');
   2921       return;
   2922     }
   2923     case OPK_INDIRECT: {
   2924       CAddrMode m = c_addr_mode(addr);
   2925       if ((u32)m.base >= t->local_cap || !t->local_declared[m.base]) {
   2926         compiler_panic(t->c, loc,
   2927                        "C target: bitfield on undeclared base local v%u",
   2928                        (unsigned)m.base);
   2929       }
   2930       cbuf_putc(&t->body, '(');
   2931       c_emit_indirect_addr_expr(t, m);
   2932       cbuf_putc(&t->body, ')');
   2933       return;
   2934     }
   2935     default:
   2936       compiler_panic(t->c, loc,
   2937                      "C target: bitfield address on operand kind %d not "
   2938                      "supported",
   2939                      (int)addr.kind);
   2940   }
   2941 }
   2942 
   2943 /* Spell `*(uintN_t*)((char*)addr + bf.storage_offset)` into the body. */
   2944 static void c_bf_storage_lvalue(CTarget* t, Operand addr, BitFieldAccess bf,
   2945                                 const char* storage_ty) {
   2946   cbuf_puts(&t->body, "(*(");
   2947   cbuf_puts(&t->body, storage_ty);
   2948   cbuf_puts(&t->body, "*)((char*)");
   2949   c_emit_lvalue_addr_expr_raw(t, addr);
   2950   if (bf.storage_offset != 0) {
   2951     cbuf_puts(&t->body, " + ");
   2952     cbuf_put_u64(&t->body, (u64)bf.storage_offset);
   2953   }
   2954   cbuf_puts(&t->body, "))");
   2955 }
   2956 
   2957 void c_emit_bitfield_load(CTarget* t, Operand dst, Operand addr,
   2958                           BitFieldAccess bf) {
   2959   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   2960   if (dst.kind != OPK_LOCAL) {
   2961     compiler_panic(t->c, loc, "C target: bitfield_load dst must be LOCAL");
   2962   }
   2963   c_assert_no_index(t, addr, "bitfield_load");
   2964   if (bf.bit_width == 0) {
   2965     /* Zero-width — layout barrier only; nothing to load. Emit a no-op
   2966      * assignment so the dst local still gets a defined value. */
   2967     c_ensure_local(t, dst.v.local, dst.type);
   2968     /* RHS is the literal 0 (int); narrowing to dst.type is fine. */
   2969     c_emit_local_assign_open(t, dst.v.local, dst.type);
   2970     cbuf_puts(&t->body, "0");
   2971     c_emit_local_assign_close(t);
   2972     return;
   2973   }
   2974   const char* sty = c_bf_storage_type(bf.storage.size);
   2975   if (!sty) {
   2976     compiler_panic(t->c, loc, "C target: bitfield storage size %u unsupported",
   2977                    (unsigned)bf.storage.size);
   2978   }
   2979   c_ensure_local(t, dst.v.local, dst.type);
   2980   /* RHS is the storage-width int from the mask/shift expression; bridge
   2981    * to dst.type so any signedness/width adjustment is explicit. */
   2982   c_emit_local_assign_open(t, dst.v.local, (KitCgTypeId)0);
   2983   /* For signed bitfields, sign-extend via the standard shift-up / arith-shift-
   2984    * down trick on a signed integer of the storage width. For unsigned, mask
   2985    * the extracted bits.
   2986    *
   2987    * Storage is little-endian-bit-indexed on every kit-supported target
   2988    * (LSB-first within a storage unit on x86_64/aarch64/rv64). */
   2989   u32 sw = bf.storage.size * 8u;
   2990   if (bf.signed_) {
   2991     /* (int_storage_t)((storage << shl) >> shr) where:
   2992      *   shl = sw - bit_width - bit_offset
   2993      *   shr = sw - bit_width
   2994      * Then cast to dst type. */
   2995     u32 shl = sw - (u32)bf.bit_width - (u32)bf.bit_offset;
   2996     u32 shr = sw - (u32)bf.bit_width;
   2997     cbuf_puts(&t->body, "(((int");
   2998     cbuf_put_u64(&t->body, (u64)sw);
   2999     cbuf_puts(&t->body, "_t)(");
   3000     c_bf_storage_lvalue(t, addr, bf, sty);
   3001     cbuf_puts(&t->body, " << ");
   3002     cbuf_put_u64(&t->body, (u64)shl);
   3003     cbuf_puts(&t->body, ")) >> ");
   3004     cbuf_put_u64(&t->body, (u64)shr);
   3005     cbuf_puts(&t->body, ")");
   3006   } else {
   3007     /* ((storage >> bit_offset) & ((1u << bit_width) - 1)) */
   3008     u64 mask = (bf.bit_width >= 64) ? ~(u64)0 : (((u64)1 << bf.bit_width) - 1u);
   3009     cbuf_puts(&t->body, "((");
   3010     c_bf_storage_lvalue(t, addr, bf, sty);
   3011     cbuf_puts(&t->body, " >> ");
   3012     cbuf_put_u64(&t->body, (u64)bf.bit_offset);
   3013     cbuf_puts(&t->body, ") & (");
   3014     cbuf_puts(&t->body, sty);
   3015     cbuf_puts(&t->body, ")0x");
   3016     static const char hex[] = "0123456789abcdef";
   3017     int started = 0;
   3018     for (int sh = 60; sh >= 0; sh -= 4) {
   3019       u32 nib = (u32)((mask >> sh) & 0xfu);
   3020       if (nib || started || sh == 0) {
   3021         cbuf_putc(&t->body, hex[nib]);
   3022         started = 1;
   3023       }
   3024     }
   3025     cbuf_puts(&t->body, ")");
   3026   }
   3027   c_emit_local_assign_close(t);
   3028 }
   3029 
   3030 void c_emit_bitfield_store(CTarget* t, Operand addr, Operand src,
   3031                            BitFieldAccess bf) {
   3032   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   3033   c_assert_no_index(t, addr, "bitfield_store");
   3034   if (bf.bit_width == 0) return; /* zero-width: no-op */
   3035   const char* sty = c_bf_storage_type(bf.storage.size);
   3036   if (!sty) {
   3037     compiler_panic(t->c, loc, "C target: bitfield storage size %u unsupported",
   3038                    (unsigned)bf.storage.size);
   3039   }
   3040   u64 mask = (bf.bit_width >= 64) ? ~(u64)0 : (((u64)1 << bf.bit_width) - 1u);
   3041   /* *(uintN_t*)p = (*(uintN_t*)p & ~(mask << bit_offset)) |
   3042    *               (((uintN_t)src & mask) << bit_offset); */
   3043   cbuf_puts(&t->body, "  ");
   3044   c_bf_storage_lvalue(t, addr, bf, sty);
   3045   cbuf_puts(&t->body, " = (");
   3046   c_bf_storage_lvalue(t, addr, bf, sty);
   3047   cbuf_puts(&t->body, " & ~((");
   3048   cbuf_puts(&t->body, sty);
   3049   cbuf_puts(&t->body, ")0x");
   3050   static const char hex[] = "0123456789abcdef";
   3051   int started = 0;
   3052   for (int sh = 60; sh >= 0; sh -= 4) {
   3053     u32 nib = (u32)((mask >> sh) & 0xfu);
   3054     if (nib || started || sh == 0) {
   3055       cbuf_putc(&t->body, hex[nib]);
   3056       started = 1;
   3057     }
   3058   }
   3059   cbuf_puts(&t->body, " << ");
   3060   cbuf_put_u64(&t->body, (u64)bf.bit_offset);
   3061   cbuf_puts(&t->body, ")) | ((((");
   3062   cbuf_puts(&t->body, sty);
   3063   cbuf_puts(&t->body, ")");
   3064   c_emit_operand(t, src);
   3065   cbuf_puts(&t->body, ") & (");
   3066   cbuf_puts(&t->body, sty);
   3067   cbuf_puts(&t->body, ")0x");
   3068   started = 0;
   3069   for (int sh = 60; sh >= 0; sh -= 4) {
   3070     u32 nib = (u32)((mask >> sh) & 0xfu);
   3071     if (nib || started || sh == 0) {
   3072       cbuf_putc(&t->body, hex[nib]);
   3073       started = 1;
   3074     }
   3075   }
   3076   cbuf_puts(&t->body, ") << ");
   3077   cbuf_put_u64(&t->body, (u64)bf.bit_offset);
   3078   cbuf_puts(&t->body, ");\n");
   3079 }
   3080 
   3081 /* === inline asm ===
   3082  *
   3083  * Re-serialize kit's asm-block IR (template + constraint-bound operands +
   3084  * clobbers) as GCC extended asm. The kit CG already speaks GCC-style
   3085  * constraint strings ("r", "=r", "+m", "[name]constraint", matching "0"...),
   3086  * so we pass the template through and emit the constraint+operand pairs in
   3087  * order. */
   3088 
   3089 void c_emit_asm_block(CTarget* t, const char* tmpl, const AsmConstraint* outs,
   3090                       u32 no, Operand* oo, const AsmConstraint* ins, u32 ni,
   3091                       const Operand* io, const Sym* clobs, u32 nc);
   3092 
   3093 static void c_emit_c_string_literal(CBuf* b, const char* s) {
   3094   cbuf_putc(b, '"');
   3095   for (; *s; ++s) {
   3096     char ch = *s;
   3097     if (ch == '"' || ch == '\\') {
   3098       cbuf_putc(b, '\\');
   3099       cbuf_putc(b, ch);
   3100     } else if (ch == '\n') {
   3101       cbuf_puts(b, "\\n");
   3102     } else if (ch == '\r') {
   3103       cbuf_puts(b, "\\r");
   3104     } else if (ch == '\t') {
   3105       cbuf_puts(b, "\\t");
   3106     } else if ((unsigned char)ch < 0x20 || (unsigned char)ch >= 0x7f) {
   3107       static const char hex[] = "0123456789abcdef";
   3108       cbuf_puts(b, "\\x");
   3109       cbuf_putc(b, hex[((unsigned char)ch >> 4) & 0xfu]);
   3110       cbuf_putc(b, hex[(unsigned char)ch & 0xfu]);
   3111     } else {
   3112       cbuf_putc(b, ch);
   3113     }
   3114   }
   3115   cbuf_putc(b, '"');
   3116 }
   3117 
   3118 /* "__kit_ao<i>" / "__kit_ai<i>": a unique name for the register temporary that
   3119  * carries a hard-register-pinned output/input operand. */
   3120 static void c_asm_reg_temp_name(char* out, size_t cap, int is_out, u32 idx) {
   3121   const char* pfx = is_out ? "__kit_ao" : "__kit_ai";
   3122   size_t i = 0;
   3123   char tmp[16];
   3124   size_t n = 0;
   3125   u32 v = idx;
   3126   while (*pfx && i + 1 < cap) out[i++] = *pfx++;
   3127   if (!v) tmp[n++] = '0';
   3128   while (v) {
   3129     tmp[n++] = (char)('0' + v % 10);
   3130     v /= 10;
   3131   }
   3132   while (n && i + 1 < cap) out[i++] = tmp[--n];
   3133   out[i] = '\0';
   3134 }
   3135 
   3136 /* Emit an asm output operand's lvalue expression (a plain local, or a
   3137  * dereferenced address for OPK_INDIRECT). Usable as both lvalue and rvalue. */
   3138 static void c_emit_asm_out_lvalue(CTarget* t, Operand op) {
   3139   if (op.kind == OPK_LOCAL) {
   3140     char rb[24];
   3141     c_ensure_local(t, op.v.local, op.type);
   3142     c_local_name(op.v.local, rb, sizeof rb);
   3143     cbuf_puts(&t->body, rb);
   3144   } else {
   3145     c_emit_addr_deref(t, op, op.type);
   3146   }
   3147 }
   3148 
   3149 void c_emit_asm_block(CTarget* t, const char* tmpl, const AsmConstraint* outs,
   3150                       u32 no, Operand* oo, const AsmConstraint* ins, u32 ni,
   3151                       const Operand* io, const Sym* clobs, u32 nc) {
   3152   char nm[24];
   3153   for (u32 i = 0; i < no; ++i) c_assert_no_index(t, oo[i], "asm_block out");
   3154   for (u32 i = 0; i < ni; ++i) c_assert_no_index(t, io[i], "asm_block in");
   3155 
   3156   /* GNU local register variables (AsmConstraint.reg): a target backend resolves
   3157    * the pin to a physical register, but the portable C backend has no register
   3158    * names to bind — so re-emit each pinned operand as a faithful
   3159    * `register T v __asm__("reg")` temporary (scoped in a block) and let the
   3160    * host compiler honor the binding. Dormant unless a frontend marks an
   3161    * operand; only the C frontend does, for register variables. */
   3162   int any_pin = 0;
   3163   for (u32 i = 0; i < no; ++i)
   3164     if (outs[i].reg) any_pin = 1;
   3165   for (u32 i = 0; i < ni; ++i)
   3166     if (ins[i].reg) any_pin = 1;
   3167 
   3168   if (any_pin) {
   3169     cbuf_puts(&t->body, "  {\n");
   3170     for (u32 i = 0; i < ni; ++i) {
   3171       if (!ins[i].reg) continue;
   3172       c_asm_reg_temp_name(nm, sizeof nm, 0, i);
   3173       cbuf_puts(&t->body, "    register ");
   3174       c_emit_type(t, &t->body, io[i].type);
   3175       cbuf_puts(&t->body, " ");
   3176       cbuf_puts(&t->body, nm);
   3177       cbuf_puts(&t->body, " __asm__(");
   3178       c_emit_c_string_literal(&t->body, pool_slice(t->c->global, ins[i].reg).s);
   3179       cbuf_puts(&t->body, ") = ");
   3180       c_emit_operand(t, io[i]);
   3181       cbuf_puts(&t->body, ";\n");
   3182     }
   3183     for (u32 i = 0; i < no; ++i) {
   3184       if (!outs[i].reg) continue;
   3185       c_asm_reg_temp_name(nm, sizeof nm, 1, i);
   3186       cbuf_puts(&t->body, "    register ");
   3187       c_emit_type(t, &t->body, oo[i].type);
   3188       cbuf_puts(&t->body, " ");
   3189       cbuf_puts(&t->body, nm);
   3190       cbuf_puts(&t->body, " __asm__(");
   3191       c_emit_c_string_literal(&t->body,
   3192                               pool_slice(t->c->global, outs[i].reg).s);
   3193       cbuf_puts(&t->body, ")");
   3194       if (outs[i].dir == KIT_CG_ASM_INOUT) {
   3195         cbuf_puts(&t->body, " = ");
   3196         c_emit_asm_out_lvalue(t, oo[i]);
   3197       }
   3198       cbuf_puts(&t->body, ";\n");
   3199     }
   3200   }
   3201 
   3202   cbuf_puts(&t->body, any_pin ? "    __asm__ __volatile__ ("
   3203                               : "  __asm__ __volatile__ (");
   3204   c_emit_c_string_literal(&t->body, tmpl ? tmpl : "");
   3205   /* Outputs. */
   3206   cbuf_puts(&t->body, " : ");
   3207   for (u32 i = 0; i < no; ++i) {
   3208     if (i > 0) cbuf_puts(&t->body, ", ");
   3209     if (outs[i].name) {
   3210       cbuf_puts(&t->body, "[");
   3211       cbuf_puts(&t->body, pool_slice(t->c->global, outs[i].name).s);
   3212       cbuf_puts(&t->body, "] ");
   3213     }
   3214     c_emit_c_string_literal(&t->body, outs[i].str ? outs[i].str : "");
   3215     cbuf_puts(&t->body, "(");
   3216     /* Outputs must be an lvalue. OPK_LOCAL is a plain C local; this
   3217      * works directly. OPK_LOCAL / OPK_INDIRECT also produce lvalues. A pinned
   3218      * output names its register temporary instead. */
   3219     if (outs[i].reg) {
   3220       c_asm_reg_temp_name(nm, sizeof nm, 1, i);
   3221       cbuf_puts(&t->body, nm);
   3222     } else {
   3223       c_emit_asm_out_lvalue(t, oo[i]);
   3224     }
   3225     cbuf_puts(&t->body, ")");
   3226   }
   3227   /* Inputs. kit synthesizes a matching `"N"` input for every ASM_INOUT
   3228    * output (so its IR sees a fresh read), but gcc treats `+r` outputs as
   3229    * already serving the read role and rejects a redundant matching input.
   3230    * Drop those synthesized matches when the referenced output is `+`-tied. */
   3231   cbuf_puts(&t->body, " : ");
   3232   int emitted_any = 0;
   3233   for (u32 i = 0; i < ni; ++i) {
   3234     const char* cs = ins[i].str ? ins[i].str : "";
   3235     if (cs[0] >= '0' && cs[0] <= '9') {
   3236       u32 idx = (u32)(cs[0] - '0');
   3237       if (idx < no && outs[idx].str && outs[idx].str[0] == '+') continue;
   3238     }
   3239     if (emitted_any) cbuf_puts(&t->body, ", ");
   3240     emitted_any = 1;
   3241     if (ins[i].name) {
   3242       cbuf_puts(&t->body, "[");
   3243       cbuf_puts(&t->body, pool_slice(t->c->global, ins[i].name).s);
   3244       cbuf_puts(&t->body, "] ");
   3245     }
   3246     c_emit_c_string_literal(&t->body, cs);
   3247     cbuf_puts(&t->body, "(");
   3248     if (ins[i].reg) {
   3249       c_asm_reg_temp_name(nm, sizeof nm, 0, i);
   3250       cbuf_puts(&t->body, nm);
   3251     } else {
   3252       c_emit_operand(t, io[i]);
   3253     }
   3254     cbuf_puts(&t->body, ")");
   3255   }
   3256   /* Clobbers. */
   3257   cbuf_puts(&t->body, " : ");
   3258   for (u32 i = 0; i < nc; ++i) {
   3259     if (i > 0) cbuf_puts(&t->body, ", ");
   3260     c_emit_c_string_literal(&t->body, pool_slice(t->c->global, clobs[i]).s);
   3261   }
   3262   cbuf_puts(&t->body, ");\n");
   3263 
   3264   if (any_pin) {
   3265     for (u32 i = 0; i < no; ++i) {
   3266       if (!outs[i].reg) continue;
   3267       c_asm_reg_temp_name(nm, sizeof nm, 1, i);
   3268       cbuf_puts(&t->body, "    ");
   3269       c_emit_asm_out_lvalue(t, oo[i]);
   3270       cbuf_puts(&t->body, " = ");
   3271       cbuf_puts(&t->body, nm);
   3272       cbuf_puts(&t->body, ";\n");
   3273     }
   3274     cbuf_puts(&t->body, "  }\n");
   3275   }
   3276 }
   3277 
   3278 /* === load_const ===
   3279  *
   3280  * Used by CG for non-integer literal pushes (mainly floats —
   3281  * `kit_cg_push_float`). Bytes are the target's ABI encoding of the value; we
   3282  * copy them into the dst local via a static const byte array and
   3283  * __builtin_memcpy so any host C compiler sees the same bit pattern. */
   3284 
   3285 void c_emit_load_const(CTarget* t, Operand dst, ConstBytes cb);
   3286 
   3287 void c_emit_load_const(CTarget* t, Operand dst, ConstBytes cb) {
   3288   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   3289   if (dst.kind != OPK_LOCAL) {
   3290     compiler_panic(t->c, loc, "C target: load_const dst must be LOCAL");
   3291   }
   3292   c_ensure_local(t, dst.v.local, dst.type);
   3293   char buf[24];
   3294   c_local_name(dst.v.local, buf, sizeof buf);
   3295   u32 id = ++t->next_tmp;
   3296   cbuf_puts(&t->body, "  { static const uint8_t __k");
   3297   cbuf_put_u64(&t->body, (u64)id);
   3298   cbuf_puts(&t->body, "[");
   3299   cbuf_put_u64(&t->body, (u64)cb.size);
   3300   cbuf_puts(&t->body, "] = {");
   3301   static const char hex[] = "0123456789abcdef";
   3302   for (u32 i = 0; i < cb.size; ++i) {
   3303     if (i > 0) cbuf_putc(&t->body, ',');
   3304     cbuf_puts(&t->body, "0x");
   3305     cbuf_putc(&t->body, hex[(cb.bytes[i] >> 4) & 0xfu]);
   3306     cbuf_putc(&t->body, hex[cb.bytes[i] & 0xfu]);
   3307   }
   3308   cbuf_puts(&t->body, "}; __builtin_memcpy(&");
   3309   cbuf_puts(&t->body, buf);
   3310   cbuf_puts(&t->body, ", __k");
   3311   cbuf_put_u64(&t->body, (u64)id);
   3312   cbuf_puts(&t->body, ", ");
   3313   cbuf_put_u64(&t->body, (u64)cb.size);
   3314   cbuf_puts(&t->body, "); }\n");
   3315 }
   3316 
   3317 /* === atomics ===
   3318  *
   3319  * Lowered to gcc/clang's `__atomic_*` generic builtins. The host compiler
   3320  * picks the inline sequence vs. libcall and applies the requested memory
   3321  * order. kit's KitCgMemOrder enum aligns 1-1 with the `__ATOMIC_*` constants.
   3322  */
   3323 
   3324 static const char* c_memorder_token(KitCgMemOrder o) {
   3325   switch (o) {
   3326     case KIT_CG_MO_RELAXED:
   3327       return "__ATOMIC_RELAXED";
   3328     case KIT_CG_MO_CONSUME:
   3329       return "__ATOMIC_CONSUME";
   3330     case KIT_CG_MO_ACQUIRE:
   3331       return "__ATOMIC_ACQUIRE";
   3332     case KIT_CG_MO_RELEASE:
   3333       return "__ATOMIC_RELEASE";
   3334     case KIT_CG_MO_ACQ_REL:
   3335       return "__ATOMIC_ACQ_REL";
   3336     case KIT_CG_MO_SEQ_CST:
   3337       return "__ATOMIC_SEQ_CST";
   3338   }
   3339   return "__ATOMIC_SEQ_CST";
   3340 }
   3341 
   3342 void c_emit_atomic_load(CTarget* t, Operand dst, Operand addr, MemAccess m,
   3343                         KitCgMemOrder o);
   3344 void c_emit_atomic_store(CTarget* t, Operand addr, Operand src, MemAccess m,
   3345                          KitCgMemOrder o);
   3346 void c_emit_atomic_rmw(CTarget* t, KitCgAtomicOp op, Operand dst, Operand addr,
   3347                        Operand val, MemAccess m, KitCgMemOrder o);
   3348 void c_emit_atomic_cas(CTarget* t, Operand prior, Operand ok, Operand addr,
   3349                        Operand expected, Operand desired, MemAccess m,
   3350                        KitCgMemOrder so, KitCgMemOrder fo);
   3351 void c_emit_fence(CTarget* t, KitCgMemOrder o);
   3352 
   3353 void c_emit_atomic_load(CTarget* t, Operand dst, Operand addr, MemAccess m,
   3354                         KitCgMemOrder o) {
   3355   (void)m;
   3356   c_assert_no_index(t, addr, "atomic_load");
   3357   c_ensure_local(t, dst.v.local, dst.type);
   3358   /* __atomic_load_n returns a value of the pointed-to type (dst.type). */
   3359   c_emit_local_assign_open(t, dst.v.local, dst.type);
   3360   cbuf_puts(&t->body, "__atomic_load_n((");
   3361   c_emit_type(t, &t->body, dst.type);
   3362   cbuf_puts(&t->body, "*)");
   3363   c_emit_operand(t, addr);
   3364   cbuf_puts(&t->body, ", ");
   3365   cbuf_puts(&t->body, c_memorder_token(o));
   3366   cbuf_puts(&t->body, ")");
   3367   c_emit_local_assign_close(t);
   3368 }
   3369 
   3370 void c_emit_atomic_store(CTarget* t, Operand addr, Operand src, MemAccess m,
   3371                          KitCgMemOrder o) {
   3372   (void)m;
   3373   c_assert_no_index(t, addr, "atomic_store");
   3374   cbuf_puts(&t->body, "  __atomic_store_n((");
   3375   c_emit_type(t, &t->body, src.type);
   3376   cbuf_puts(&t->body, "*)");
   3377   c_emit_operand(t, addr);
   3378   cbuf_puts(&t->body, ", ");
   3379   c_emit_operand_as(t, src, src.type);
   3380   cbuf_puts(&t->body, ", ");
   3381   cbuf_puts(&t->body, c_memorder_token(o));
   3382   cbuf_puts(&t->body, ");\n");
   3383 }
   3384 
   3385 static const char* c_atomic_op_builtin(KitCgAtomicOp op) {
   3386   switch (op) {
   3387     case KIT_CG_ATOMIC_XCHG:
   3388       return "__atomic_exchange_n";
   3389     case KIT_CG_ATOMIC_ADD:
   3390       return "__atomic_fetch_add";
   3391     case KIT_CG_ATOMIC_SUB:
   3392       return "__atomic_fetch_sub";
   3393     case KIT_CG_ATOMIC_AND:
   3394       return "__atomic_fetch_and";
   3395     case KIT_CG_ATOMIC_OR:
   3396       return "__atomic_fetch_or";
   3397     case KIT_CG_ATOMIC_XOR:
   3398       return "__atomic_fetch_xor";
   3399     case KIT_CG_ATOMIC_NAND:
   3400       return "__atomic_fetch_nand";
   3401   }
   3402   return NULL;
   3403 }
   3404 
   3405 void c_emit_atomic_rmw(CTarget* t, KitCgAtomicOp op, Operand dst, Operand addr,
   3406                        Operand val, MemAccess m, KitCgMemOrder o) {
   3407   (void)m;
   3408   SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
   3409   c_assert_no_index(t, addr, "atomic_rmw");
   3410   const char* fn = c_atomic_op_builtin(op);
   3411   if (!fn) {
   3412     compiler_panic(t->c, loc, "C target: unknown atomic op %d", (int)op);
   3413   }
   3414   c_ensure_local(t, dst.v.local, dst.type);
   3415   /* __atomic_fetch_* returns the prior value of the pointed-to type. */
   3416   c_emit_local_assign_open(t, dst.v.local, val.type);
   3417   cbuf_puts(&t->body, fn);
   3418   cbuf_puts(&t->body, "((");
   3419   c_emit_type(t, &t->body, val.type);
   3420   cbuf_puts(&t->body, "*)");
   3421   c_emit_operand(t, addr);
   3422   cbuf_puts(&t->body, ", ");
   3423   c_emit_operand_as(t, val, val.type);
   3424   cbuf_puts(&t->body, ", ");
   3425   cbuf_puts(&t->body, c_memorder_token(o));
   3426   cbuf_puts(&t->body, ")");
   3427   c_emit_local_assign_close(t);
   3428 }
   3429 
   3430 void c_emit_atomic_cas(CTarget* t, Operand prior, Operand ok, Operand addr,
   3431                        Operand expected, Operand desired, MemAccess m,
   3432                        KitCgMemOrder so, KitCgMemOrder fo) {
   3433   (void)m;
   3434   c_assert_no_index(t, addr, "atomic_cas");
   3435   /* gcc's __atomic_compare_exchange_n needs a real lvalue holding the
   3436    * expected value because it is updated on failure. Materialize a scratch
   3437    * local with the compare type, then copy it out to the prior result. */
   3438   c_ensure_local(t, prior.v.local, prior.type);
   3439   c_ensure_local(t, ok.v.local, ok.type);
   3440   u32 id = ++t->next_tmp;
   3441   cbuf_puts(&t->body, "  { ");
   3442   c_emit_type(t, &t->body, prior.type);
   3443   cbuf_puts(&t->body, " __cas");
   3444   cbuf_put_u64(&t->body, (u64)id);
   3445   cbuf_puts(&t->body, " = ");
   3446   c_emit_operand_as(t, expected, prior.type);
   3447   cbuf_puts(&t->body, "; ");
   3448   char ok_name[24], prior_name[24];
   3449   c_local_name(ok.v.local, ok_name, sizeof ok_name);
   3450   c_local_name(prior.v.local, prior_name, sizeof prior_name);
   3451   cbuf_puts(&t->body, ok_name);
   3452   cbuf_puts(&t->body, " = (");
   3453   c_emit_type(t, &t->body, ok.type);
   3454   cbuf_puts(&t->body, ")__atomic_compare_exchange_n((");
   3455   c_emit_type(t, &t->body, prior.type);
   3456   cbuf_puts(&t->body, "*)");
   3457   c_emit_operand(t, addr);
   3458   cbuf_puts(&t->body, ", &__cas");
   3459   cbuf_put_u64(&t->body, (u64)id);
   3460   cbuf_puts(&t->body, ", ");
   3461   c_emit_operand_as(t, desired, prior.type);
   3462   cbuf_puts(&t->body, ", 0, ");
   3463   cbuf_puts(&t->body, c_memorder_token(so));
   3464   cbuf_puts(&t->body, ", ");
   3465   cbuf_puts(&t->body, c_memorder_token(fo));
   3466   cbuf_puts(&t->body, "); ");
   3467   /* prior local = __cas; */
   3468   cbuf_puts(&t->body, prior_name);
   3469   cbuf_puts(&t->body, " = __cas");
   3470   cbuf_put_u64(&t->body, (u64)id);
   3471   cbuf_puts(&t->body, "; }\n");
   3472 }
   3473 
   3474 void c_emit_fence(CTarget* t, KitCgMemOrder o) {
   3475   cbuf_puts(&t->body, "  __atomic_thread_fence(");
   3476   cbuf_puts(&t->body, c_memorder_token(o));
   3477   cbuf_puts(&t->body, ");\n");
   3478 }
   3479 
   3480 /* === set_loc === */
   3481 
   3482 static void cbuf_put_line_filename(CBuf* b, KitSlice s) {
   3483   size_t i;
   3484   cbuf_putc(b, '"');
   3485   for (i = 0; i < s.len; ++i) {
   3486     {
   3487       unsigned char ch = (unsigned char)s.s[i];
   3488       switch (ch) {
   3489         case '\\':
   3490         case '"':
   3491           cbuf_putc(b, '\\');
   3492           cbuf_putc(b, (char)ch);
   3493           break;
   3494         case '\n':
   3495           cbuf_puts(b, "\\n");
   3496           break;
   3497         case '\r':
   3498           cbuf_puts(b, "\\r");
   3499           break;
   3500         case '\t':
   3501           cbuf_puts(b, "\\t");
   3502           break;
   3503         default:
   3504           cbuf_putc(b, (char)ch);
   3505           break;
   3506       }
   3507     }
   3508   }
   3509   cbuf_putc(b, '"');
   3510 }
   3511 
   3512 void c_emit_set_loc(CTarget* t, SrcLoc l) {
   3513   KitSlice file;
   3514 
   3515   if (!t->cur_fn || l.file_id == 0 || l.line == 0) return;
   3516   if (t->have_emitted_loc && t->emitted_loc.file_id == l.file_id &&
   3517       t->emitted_loc.line == l.line) {
   3518     return;
   3519   }
   3520 
   3521   file = kit_compiler_file_name(t->c, l.file_id);
   3522   if (!file.len) return;
   3523 
   3524   cbuf_puts(&t->body, "#line ");
   3525   cbuf_put_u64(&t->body, (u64)l.line);
   3526   cbuf_putc(&t->body, ' ');
   3527   cbuf_put_line_filename(&t->body, file);
   3528   cbuf_putc(&t->body, '\n');
   3529 
   3530   t->emitted_loc = l;
   3531   t->have_emitted_loc = 1;
   3532 }
   3533 
   3534 /* === data emission ===
   3535  *
   3536  * Walks the ObjBuilder's symbol table at finalize and emits a C declaration
   3537  * for every data object — defined or extern. Bytes are emitted verbatim as a
   3538  * `uint8_t name[N] = { 0x.., ... }` initializer. Relocations targeting bytes
   3539  * inside a defined symbol are spelled as runtime fixups in a constructor; this
   3540  * covers both same-TU and cross-TU references uniformly and avoids the C
   3541  * static-initializer restrictions on non-constant addresses.
   3542  *
   3543  * The host C compiler re-applies the Mach-O leading-underscore on link, so the
   3544  * C source uses the kit linker name minus the `_` prefix (matching
   3545  * c_sym_name elsewhere in this file). */
   3546 
   3547 static int c_is_data_section(const Section* sec) {
   3548   if (!sec) return 0;
   3549   switch (sec->kind) {
   3550     case SEC_DATA:
   3551     case SEC_RODATA:
   3552     case SEC_BSS:
   3553       return 1;
   3554     case SEC_OTHER:
   3555       /* User-named sections holding allocated data (e.g. `.text.hot` would
   3556        * be EXEC, but a custom data section is just SF_ALLOC). */
   3557       return (sec->flags & SF_ALLOC) && !(sec->flags & SF_EXEC);
   3558     default:
   3559       return 0;
   3560   }
   3561 }
   3562 
   3563 static void c_emit_link_attrs(CBuf* b, const ObjSym* os) {
   3564   if (os->bind == SB_WEAK) cbuf_puts(b, "__attribute__((weak)) ");
   3565   if (os->vis == SV_HIDDEN) {
   3566     cbuf_puts(b, "__attribute__((visibility(\"hidden\"))) ");
   3567   } else if (os->vis == SV_PROTECTED) {
   3568     cbuf_puts(b, "__attribute__((visibility(\"protected\"))) ");
   3569   }
   3570 }
   3571 
   3572 /* Reads `len` bytes starting at `ofs` from the section's byte buffer. The
   3573  * Section uses a chunked Buf; buf_read does the splice for us. */
   3574 static void c_read_section_bytes(const Section* sec, u32 ofs, u8* out,
   3575                                  size_t len) {
   3576   buf_read(&sec->bytes, ofs, out, len);
   3577 }
   3578 
   3579 static void c_emit_data_bytes(CBuf* b, const u8* bytes, size_t n) {
   3580   cbuf_puts(b, " = {");
   3581   for (size_t i = 0; i < n; ++i) {
   3582     if (i > 0) cbuf_putc(b, ',');
   3583     if ((i & 15u) == 0) cbuf_puts(b, "\n    ");
   3584     cbuf_puts(b, "0x");
   3585     static const char hex[] = "0123456789abcdef";
   3586     cbuf_putc(b, hex[(bytes[i] >> 4) & 0xfu]);
   3587     cbuf_putc(b, hex[bytes[i] & 0xfu]);
   3588   }
   3589   cbuf_puts(b, "\n  }");
   3590 }
   3591 
   3592 /* Mach-O TLS support: the user-visible SK_TLS symbol is a 24-byte TLV
   3593  * descriptor in __DATA,__thread_vars, and the actual initial bytes live in
   3594  * a synthesized `<name>$tlv$init` sym in __thread_data (or __thread_bss).
   3595  * The descriptor carries an R_ABS64 reloc at offset +16 pointing at that
   3596  * init sym. For C-source emission we don't care about the descriptor at all
   3597  * — we just emit `_Thread_local` with the init sym's bytes and let the host
   3598  * C compiler synthesize whatever TLV plumbing it needs. */
   3599 
   3600 /* Find the data init sym referenced by a Mach-O TLS descriptor at
   3601  * `desc_base` in section `desc_sec`. Looks for an R_ABS64 reloc at
   3602  * `desc_base + 16`. Returns OBJ_SYM_NONE if not found. */
   3603 static ObjSymId c_macho_tls_find_init(CTarget* t, ObjSecId desc_sec,
   3604                                       u32 desc_base) {
   3605   u32 total = obj_reloc_total(t->obj);
   3606   for (u32 i = 0; i < total; ++i) {
   3607     const Reloc* r = obj_reloc_at(t->obj, i);
   3608     if (r->section_id != desc_sec) continue;
   3609     if (r->offset != desc_base + 16u) continue;
   3610     return r->sym;
   3611   }
   3612   return OBJ_SYM_NONE;
   3613 }
   3614 
   3615 /* Returns 1 if the section is __DATA,__thread_vars (the descriptor section
   3616  * on Mach-O). Compared by interned Sym id. */
   3617 static int c_sec_name_is_macho_tvars(CTarget* t, const Section* sec) {
   3618   if (!sec) return 0;
   3619   Sym tvars =
   3620       pool_intern_slice(t->c->global, SLICE_LIT("__DATA,__thread_vars"));
   3621   return sec->name == tvars;
   3622 }
   3623 
   3624 /* Returns 1 if any relocation falls into the half-open range [base, base+size)
   3625  * of section `sec_id` (i.e. patches the bytes of this symbol). */
   3626 static int c_sym_has_relocs(CTarget* t, ObjSecId sec_id, u32 base, u32 size) {
   3627   u32 total = obj_reloc_total(t->obj);
   3628   for (u32 i = 0; i < total; ++i) {
   3629     const Reloc* r = obj_reloc_at(t->obj, i);
   3630     if (r->section_id != sec_id) continue;
   3631     if (r->offset >= base && r->offset < base + size) return 1;
   3632   }
   3633   return 0;
   3634 }
   3635 
   3636 /* Emit one data symbol: extern declaration if undef, otherwise the full
   3637  * definition with bytes. Function symbols are skipped — those go through the
   3638  * forwards path. */
   3639 static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) {
   3640   if (c_is_local_static_sym(t, id)) return;
   3641   if (os->kind == SK_FUNC || os->kind == SK_IFUNC) return;
   3642   if (os->kind == SK_SECTION || os->kind == SK_FILE) return;
   3643   /* On descriptor-model TLS targets (Mach-O), obj_tls.c synthesizes
   3644    * `__tlv_bootstrap` as an SK_UNDEF extern for the TLV descriptor's first
   3645    * field. The C target delegates all TLS lowering to the host compiler via
   3646    * `_Thread_local`, so this descriptor-time-only symbol has no place in the
   3647    * emitted source. */
   3648   if (os->kind == SK_UNDEF && obj_format_tls_via_descriptor(t->c)) {
   3649     const ObjBuilder* ob = t->obj;
   3650     if (id == obj_tlv_bootstrap_get(ob)) return;
   3651   }
   3652   const char* nm = c_sym_name(t, id);
   3653   CBuf* b = &t->data_defs;
   3654   /* SK_TLS user-visible syms need a _Thread_local prefix. On ELF the sym
   3655    * lives in .tdata/.tbss with the right bytes/size and our normal data
   3656    * path handles them once we set the qualifier. On Mach-O the user-visible
   3657    * sym is the 24-byte TLV descriptor — its bytes are not the user's data,
   3658    * so we can't faithfully reproduce it in C; bail to a SKIP. */
   3659   int is_tls = (os->kind == SK_TLS);
   3660 
   3661   /* Extern (undefined) data — only declare if referenced. We can't readily
   3662    * distinguish "referenced as data" from "referenced as func address" here,
   3663    * so declare it as `extern uint8_t name[];` only if it was actually
   3664    * referenced from somewhere; otherwise it'd produce unused warnings. The
   3665    * obj symbol's `referenced` bit is exactly the right signal. */
   3666   /* SK_TLS with no defining section = extern TLS — falls through to the
   3667    * undef branch below with the `_Thread_local` qualifier. */
   3668   /* Extern: SK_UNDEF, or any other defined-kind sym that the producer
   3669    * marked as having no defining section (the C frontend uses SK_OBJ +
   3670    * section=NONE for `extern T x __attribute__((weak));`). */
   3671   int is_extern = (os->kind == SK_UNDEF) ||
   3672                   (os->kind != SK_COMMON && os->section_id == OBJ_SEC_NONE);
   3673   if (is_extern) {
   3674     /* Always declare extern data syms in C source: the host cc tolerates
   3675      * unused externs, and the ObjSym::referenced bit isn't reliably set on
   3676      * syms the C target only addresses by writing the name into the source
   3677      * (no relocation gets emitted against them).
   3678      *
   3679      * Weak externs need different attributes per object format: on Mach-O
   3680      * the `weak` attribute requires a definition; the right spelling for an
   3681      * undefined weak ref is `__attribute__((weak_import))`. On ELF/PE the
   3682      * existing `weak` attribute works as expected. */
   3683     if (os->bind == SB_WEAK) {
   3684       const ObjFormatImpl* fmt = obj_format_lookup(t->c->target.obj);
   3685       const char* weak_attr = (fmt && fmt->weak_undef_attr)
   3686                                   ? fmt->weak_undef_attr
   3687                                   : "weak";
   3688       cbuf_puts(b, "__attribute__((");
   3689       cbuf_puts(b, weak_attr);
   3690       cbuf_puts(b, ")) ");
   3691     }
   3692     if (os->vis == SV_HIDDEN) {
   3693       cbuf_puts(b, "__attribute__((visibility(\"hidden\"))) ");
   3694     } else if (os->vis == SV_PROTECTED) {
   3695       cbuf_puts(b, "__attribute__((visibility(\"protected\"))) ");
   3696     }
   3697     cbuf_puts(b, "extern ");
   3698     if (is_tls) cbuf_puts(b, "_Thread_local ");
   3699     cbuf_puts(b, "uint8_t ");
   3700     cbuf_puts(b, nm);
   3701     cbuf_puts(b, "[];\n");
   3702     return;
   3703   }
   3704   if (is_tls && obj_format_tls_via_descriptor(t->c)) {
   3705     /* Mach-O splits TLS across two object-file symbols (see obj_tls.c): the
   3706      * user-visible sym is a 24-byte TLV descriptor in
   3707      * __DATA,__thread_vars; the actual initial bytes live in a synthesized
   3708      * `<name>$tlv$init` sym in __DATA,__thread_data (or __thread_bss). For
   3709      * C source emission we don't need either of those — `_Thread_local`
   3710      * delegates to the host C compiler, which builds its own descriptor.
   3711      *
   3712      * We use the descriptor sym as the carrier (its name is what user code
   3713      * references) and pull the initial bytes/size/alignment from the init
   3714      * sym, found via the R_ABS64 reloc at descriptor offset +16. The init
   3715      * sym is skipped in its own iteration. */
   3716     const Section* desc_sec = obj_section_get(t->obj, os->section_id);
   3717     if (c_sec_name_is_macho_tvars(t, desc_sec)) {
   3718       ObjSymId init_id =
   3719           c_macho_tls_find_init(t, os->section_id, (u32)os->value);
   3720       if (init_id == OBJ_SYM_NONE) {
   3721         compiler_panic(t->c, (SrcLoc){0, 0, 0},
   3722                        "C target: Mach-O TLS descriptor missing init reloc");
   3723       }
   3724       const ObjSym* init_os = obj_symbol_get(t->obj, init_id);
   3725       if (!init_os || init_os->section_id == OBJ_SEC_NONE) {
   3726         compiler_panic(t->c, (SrcLoc){0, 0, 0},
   3727                        "C target: Mach-O TLS init sym not defined");
   3728       }
   3729       const Section* init_sec = obj_section_get(t->obj, init_os->section_id);
   3730       u32 init_base = (u32)init_os->value;
   3731       u32 init_size = (u32)init_os->size;
   3732       /* TLS data with relocations would need the constructor-fixup path
   3733        * (and we'd have to rewrite the reloc target's section/offset to
   3734        * the descriptor's name in the emitted C). No test currently
   3735        * exercises this; surface it as a clear panic-as-skip if we hit it. */
   3736       if (c_sym_has_relocs(t, init_os->section_id, init_base, init_size)) {
   3737         compiler_panic(t->c, (SrcLoc){0, 0, 0},
   3738                        "C target: Mach-O TLS with pointer init not yet "
   3739                        "supported");
   3740       }
   3741       if (os->bind == SB_LOCAL) cbuf_puts(b, "static ");
   3742       cbuf_puts(b, "_Thread_local ");
   3743       c_emit_link_attrs(b, os);
   3744       cbuf_puts(b, "__attribute__((unused)) ");
   3745       cbuf_puts(b, "_Alignas(");
   3746       cbuf_put_u64(b, init_sec->align ? init_sec->align : 1);
   3747       cbuf_puts(b, ") uint8_t ");
   3748       cbuf_puts(b, nm);
   3749       cbuf_puts(b, "[");
   3750       cbuf_put_u64(b, init_size ? init_size : 1);
   3751       cbuf_puts(b, "]");
   3752       if (init_sec->kind == SEC_BSS || init_sec->sem == SSEM_NOBITS ||
   3753           init_size == 0) {
   3754         cbuf_puts(b, ";\n");
   3755       } else {
   3756         Heap* h = t->c->ctx->heap;
   3757         u8* bytes = (u8*)h->alloc(h, init_size, 1);
   3758         if (!bytes) {
   3759           compiler_panic(t->c, (SrcLoc){0, 0, 0},
   3760                          "C target: oom on TLS init bytes");
   3761         }
   3762         c_read_section_bytes(init_sec, init_base, bytes, init_size);
   3763         c_emit_data_bytes(b, bytes, init_size);
   3764         h->free(h, bytes, init_size);
   3765         cbuf_puts(b, ";\n");
   3766       }
   3767       return;
   3768     }
   3769     /* Not the descriptor: this is the synthesized `<name>$tlv$init` data
   3770      * sym (or a __thread_ptrs entry). The descriptor case above already
   3771      * emitted the user-facing _Thread_local; nothing more to do. */
   3772     return;
   3773   }
   3774   if (os->kind == SK_COMMON) {
   3775     /* Common — uninitialized, with explicit alignment. Emit as
   3776      * tentative-definition (`uint8_t name[size];` at file scope), which C
   3777      * treats as a common-style definition under -fcommon. */
   3778     cbuf_puts(b, "__attribute__((unused)) _Alignas(");
   3779     cbuf_put_u64(b, os->common_align ? os->common_align : 1);
   3780     cbuf_puts(b, ") uint8_t ");
   3781     cbuf_puts(b, nm);
   3782     cbuf_puts(b, "[");
   3783     cbuf_put_u64(b, os->size);
   3784     cbuf_puts(b, "];\n");
   3785     return;
   3786   }
   3787   if (os->section_id == OBJ_SEC_NONE) return;
   3788   const Section* sec = obj_section_get(t->obj, os->section_id);
   3789   if (!c_is_data_section(sec)) return;
   3790   u32 base = (u32)os->value;
   3791   u32 size = (u32)os->size;
   3792   u32 nrelocs = 0;
   3793   u32 total_relocs = obj_reloc_total(t->obj);
   3794   for (u32 i = 0; i < total_relocs; ++i) {
   3795     const Reloc* r = obj_reloc_at(t->obj, i);
   3796     if (r->section_id == os->section_id && r->offset >= base &&
   3797         r->offset < base + size) {
   3798       nrelocs++;
   3799     }
   3800   }
   3801 
   3802   Heap* h = t->c->ctx->heap;
   3803   const Reloc** rs = NULL;
   3804   if (nrelocs) {
   3805     rs = (const Reloc**)h->alloc(h, nrelocs * sizeof(const Reloc*), 1);
   3806     u32 j = 0;
   3807     for (u32 i = 0; i < total_relocs; ++i) {
   3808       const Reloc* r = obj_reloc_at(t->obj, i);
   3809       if (r->section_id == os->section_id && r->offset >= base &&
   3810           r->offset < base + size) {
   3811         rs[j++] = r;
   3812       }
   3813     }
   3814     for (u32 i = 1; i < nrelocs; ++i) {
   3815       const Reloc* tmp = rs[i];
   3816       u32 k = i;
   3817       while (k > 0 && rs[k - 1]->offset > tmp->offset) {
   3818         rs[k] = rs[k - 1];
   3819         k--;
   3820       }
   3821       rs[k] = tmp;
   3822     }
   3823   }
   3824 
   3825   cbuf_puts(b, "struct ");
   3826   if (nrelocs > 0) cbuf_puts(b, "__attribute__((packed)) ");
   3827   cbuf_puts(b, "__kit_data_");
   3828   cbuf_puts(b, nm);
   3829   cbuf_puts(b, " {\n");
   3830 
   3831   if (nrelocs == 0) {
   3832     cbuf_puts(b, "  uint8_t raw[");
   3833     cbuf_put_u64(b, size ? size : 1);
   3834     cbuf_puts(b, "];\n");
   3835   } else {
   3836     u32 cur = base;
   3837     for (u32 i = 0; i < nrelocs; ++i) {
   3838       const Reloc* r = rs[i];
   3839       if (r->offset > cur) {
   3840         cbuf_puts(b, "  uint8_t chunk_");
   3841         cbuf_put_u64(b, i);
   3842         cbuf_puts(b, "[");
   3843         cbuf_put_u64(b, r->offset - cur);
   3844         cbuf_puts(b, "];\n");
   3845       }
   3846       u32 width = (r->kind == R_ABS32) ? 4 : 8;
   3847       const char* ty = (width == 4) ? "uint32_t" : "void*";
   3848       cbuf_puts(b, "  ");
   3849       cbuf_puts(b, ty);
   3850       cbuf_puts(b, " ptr_");
   3851       cbuf_put_u64(b, i);
   3852       cbuf_puts(b, ";\n");
   3853       cur = r->offset + width;
   3854     }
   3855     if (cur < base + size) {
   3856       cbuf_puts(b, "  uint8_t chunk_");
   3857       cbuf_put_u64(b, nrelocs);
   3858       cbuf_puts(b, "[");
   3859       cbuf_put_u64(b, base + size - cur);
   3860       cbuf_puts(b, "];\n");
   3861     }
   3862   }
   3863   cbuf_puts(b, "};\n");
   3864 
   3865   if (os->bind == SB_LOCAL) cbuf_puts(b, "static ");
   3866   if (is_tls) cbuf_puts(b, "_Thread_local ");
   3867   c_emit_link_attrs(b, os);
   3868   cbuf_puts(b, "__attribute__((unused)) ");
   3869 
   3870   int is_ro = (sec->kind == SEC_RODATA);
   3871   if (is_ro) cbuf_puts(b, "const ");
   3872 
   3873   cbuf_puts(b, "_Alignas(");
   3874   cbuf_put_u64(b, sec->align ? sec->align : 1);
   3875   cbuf_puts(b, ") struct __kit_data_");
   3876   cbuf_puts(b, nm);
   3877   cbuf_puts(b, " ");
   3878   cbuf_puts(b, nm);
   3879 
   3880   if (sec->kind == SEC_BSS || sec->sem == SSEM_NOBITS) {
   3881     cbuf_puts(b, ";\n");
   3882   } else if (size == 0) {
   3883     cbuf_puts(b, " = {{0}};\n");
   3884   } else {
   3885     cbuf_puts(b, " = {\n");
   3886     u8* bytes = (u8*)h->alloc(h, size, 1);
   3887     c_read_section_bytes(sec, base, bytes, size);
   3888 
   3889     if (nrelocs == 0) {
   3890       cbuf_puts(b, "  .raw = {");
   3891       for (u32 i = 0; i < size; ++i) {
   3892         if (i > 0) cbuf_puts(b, ", ");
   3893         cbuf_put_u64(b, bytes[i]);
   3894       }
   3895       cbuf_puts(b, "}\n");
   3896     } else {
   3897       u32 cur = base;
   3898       for (u32 i = 0; i < nrelocs; ++i) {
   3899         const Reloc* r = rs[i];
   3900         if (r->offset > cur) {
   3901           cbuf_puts(b, "  .chunk_");
   3902           cbuf_put_u64(b, i);
   3903           cbuf_puts(b, " = {");
   3904           for (u32 k = 0; k < r->offset - cur; ++k) {
   3905             if (k > 0) cbuf_puts(b, ", ");
   3906             cbuf_put_u64(b, bytes[cur - base + k]);
   3907           }
   3908           cbuf_puts(b, "},\n");
   3909         }
   3910 
   3911         u32 width = (r->kind == R_ABS32) ? 4 : 8;
   3912         c_ensure_forward_decl(t, r->sym, 0);
   3913         const char* tgt = c_sym_name(t, r->sym);
   3914         const char* cast = (width == 4) ? "(uint32_t)(uintptr_t)" : "(void*)";
   3915 
   3916         cbuf_puts(b, "  .ptr_");
   3917         cbuf_put_u64(b, i);
   3918         cbuf_puts(b, " = ");
   3919         cbuf_puts(b, cast);
   3920         cbuf_puts(b, "((char*)&");
   3921         cbuf_puts(b, tgt);
   3922         if (r->addend != 0) {
   3923           cbuf_puts(b, " + ");
   3924           cbuf_put_i64(b, r->addend);
   3925         }
   3926         cbuf_puts(b, "),\n");
   3927         cur = r->offset + width;
   3928       }
   3929       if (cur < base + size) {
   3930         cbuf_puts(b, "  .chunk_");
   3931         cbuf_put_u64(b, nrelocs);
   3932         cbuf_puts(b, " = {");
   3933         for (u32 k = 0; k < base + size - cur; ++k) {
   3934           if (k > 0) cbuf_puts(b, ", ");
   3935           cbuf_put_u64(b, bytes[cur - base + k]);
   3936         }
   3937         cbuf_puts(b, "}\n");
   3938       }
   3939     }
   3940     h->free(h, bytes, size);
   3941     cbuf_puts(b, "};\n");
   3942   }
   3943 
   3944   if (nrelocs) h->free(h, (void*)rs, nrelocs * sizeof(const Reloc*));
   3945 }
   3946 
   3947 /* Re-emit a file-scope `__asm__("...")` block at TU scope. The CG layer hands
   3948  * us the de-escaped assembly text (real newlines); re-quote it as a single C
   3949  * string literal so the host C compiler assembles it. Lands in data_defs, which
   3950  * finalize flushes at file scope before any function body. */
   3951 void c_emit_file_scope_asm(CTarget* t, const char* src, size_t len) {
   3952   CBuf* b = &t->data_defs;
   3953   cbuf_puts(b, "__asm__(\"");
   3954   for (size_t i = 0; i < len; ++i) {
   3955     char ch = src[i];
   3956     switch (ch) {
   3957       case '\\':
   3958         cbuf_puts(b, "\\\\");
   3959         break;
   3960       case '"':
   3961         cbuf_puts(b, "\\\"");
   3962         break;
   3963       case '\n':
   3964         cbuf_puts(b, "\\n");
   3965         break;
   3966       case '\t':
   3967         cbuf_puts(b, "\\t");
   3968         break;
   3969       case '\r':
   3970         cbuf_puts(b, "\\r");
   3971         break;
   3972       default:
   3973         cbuf_putc(b, ch);
   3974         break;
   3975     }
   3976   }
   3977   cbuf_puts(b, "\");\n");
   3978 }
   3979 
   3980 static void c_emit_data(CTarget* t) {
   3981   ObjSymIter* it = obj_symiter_new(t->obj);
   3982   if (!it) return;
   3983   ObjSymEntry e;
   3984   while (obj_symiter_next(it, &e)) {
   3985     if (!e.sym) continue;
   3986     c_emit_data_symbol(t, e.id, e.sym);
   3987   }
   3988   obj_symiter_free(it);
   3989 }
   3990 
   3991 /* === finalize / destroy === */
   3992 
   3993 void c_emit_finalize(CTarget* t) {
   3994   if (t->finalized) return;
   3995   t->finalized = 1;
   3996   c_emit_prologue(t);
   3997   if (t->need_stdarg) c_writer_puts(t, "#include <stdarg.h>\n");
   3998   if (t->need_setjmp) c_writer_puts(t, "#include <setjmp.h>\n");
   3999   if (t->need_stdarg || t->need_setjmp) c_writer_puts(t, "\n");
   4000   if (t->typedefs.len) {
   4001     c_writer_write(t, t->typedefs.data, t->typedefs.len);
   4002     c_writer_puts(t, "\n");
   4003   }
   4004   c_emit_data(t);
   4005   if (t->forwards.len) {
   4006     c_writer_write(t, t->forwards.data, t->forwards.len);
   4007     c_writer_puts(t, "\n");
   4008   }
   4009   if (t->data_defs.len) {
   4010     c_writer_write(t, t->data_defs.data, t->data_defs.len);
   4011     c_writer_puts(t, "\n");
   4012   }
   4013   if (t->body.len) c_writer_write(t, t->body.data, t->body.len);
   4014 }
   4015 
   4016 void c_emit_destroy(CTarget* t) {
   4017   Heap* h = t->c->ctx->heap;
   4018   cbuf_fini(&t->forwards);
   4019   cbuf_fini(&t->typedefs);
   4020   cbuf_fini(&t->data_defs);
   4021   cbuf_fini(&t->decls);
   4022   cbuf_fini(&t->body);
   4023   if (t->sym_forwarded) h->free(h, t->sym_forwarded, t->sym_forwarded_cap);
   4024   t->sym_forwarded = NULL;
   4025   t->sym_forwarded_cap = 0;
   4026   if (t->local_static_syms) {
   4027     h->free(h, t->local_static_syms,
   4028             t->local_static_syms_cap * sizeof(*t->local_static_syms));
   4029   }
   4030   if (t->local_static_entries) {
   4031     h->free(h, t->local_static_entries,
   4032             t->local_static_entries_cap * sizeof(*t->local_static_entries));
   4033   }
   4034   if (t->local_declared) h->free(h, t->local_declared, t->local_cap);
   4035   if (t->local_type)
   4036     h->free(h, t->local_type, t->local_cap * sizeof(KitCgTypeId));
   4037   if (t->scopes) h->free(h, t->scopes, t->scopes_cap * sizeof(CScopeInfo));
   4038   t->local_declared = NULL;
   4039   t->local_type = NULL;
   4040   t->scopes = NULL;
   4041   t->local_static_syms = NULL;
   4042   t->local_static_entries = NULL;
   4043   t->local_cap = 0;
   4044   t->scopes_cap = 0;
   4045   t->local_static_syms_cap = 0;
   4046   t->local_static_entries_cap = 0;
   4047   t->local_static_nsyms = 0;
   4048   t->local_static_nentries = 0;
   4049 }