kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

asm_emit.c (46767B)


      1 #include <kit/asm_emit.h>
      2 #include <kit/disasm.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 
      6 #include "arch/arch.h"
      7 #include "core/arena.h"
      8 #include "core/buf.h"
      9 #include "core/core.h"
     10 #include "core/heap.h"
     11 #include "core/pool.h"
     12 #include "core/slice.h"
     13 #include "obj/obj.h"
     14 
     15 #define ASM_BYTES_PER_LINE 16u
     16 
     17 static KitStatus w_str(Writer* w, const char* s) {
     18   return kit_writer_write(w, s, strlen(s));
     19 }
     20 
     21 static KitStatus w_newline(Writer* w) { return kit_writer_write(w, "\n", 1); }
     22 
     23 static KitStatus w_hex_byte(Writer* w, u8 v) {
     24   static const char H[] = "0123456789abcdef";
     25   char buf[2];
     26   buf[0] = H[(v >> 4) & 0xfu];
     27   buf[1] = H[v & 0xfu];
     28   return kit_writer_write(w, buf, 2);
     29 }
     30 
     31 static KitStatus w_dec(Writer* w, u64 v) {
     32   char buf[32];
     33   u32 i = sizeof(buf);
     34   if (v == 0) return kit_writer_write(w, "0", 1);
     35   buf[--i] = '\0';
     36   while (v) {
     37     buf[--i] = (char)('0' + (v % 10));
     38     v /= 10;
     39   }
     40   return kit_writer_write(w, buf + i, sizeof(buf) - i - 1);
     41 }
     42 
     43 static KitStatus w_sym(Writer* w, Compiler* c, Sym name) {
     44   Slice s;
     45   if (!name) return w_str(w, ".L0");
     46   s = pool_slice(c->global, name);
     47   return kit_writer_write(w, s.s, s.len);
     48 }
     49 
     50 typedef struct {
     51   u32 offset;
     52   Sym name;
     53   u16 bind;
     54   u16 kind;
     55   u64 size;
     56 } SymLabel;
     57 
     58 static int cmp_labels(const void* va, const void* vb) {
     59   const SymLabel* a = (const SymLabel*)va;
     60   const SymLabel* b = (const SymLabel*)vb;
     61   if (a->offset < b->offset) return -1;
     62   if (a->offset > b->offset) return 1;
     63   return 0;
     64 }
     65 
     66 static SymLabel* collect_labels(Compiler* c, ObjBuilder* ob, ObjSecId sec_id,
     67                                 u32* nlabels_out) {
     68   ObjSymIter* it = obj_symiter_new(ob);
     69   SymLabel* labels = NULL;
     70   u32 n = 0, cap = 0;
     71 
     72   *nlabels_out = 0;
     73   if (!it) return NULL;
     74 
     75   for (;;) {
     76     ObjSymEntry e;
     77     const ObjSym* sym;
     78     if (!obj_symiter_next(it, &e)) break;
     79     sym = e.sym;
     80     if (!sym || sym->removed) continue;
     81     if (sym->section_id != sec_id) continue;
     82     if (sym->kind == SK_SECTION || sym->kind == SK_FILE) continue;
     83     if (!sym->name) continue;
     84     /* RISC-V `.LpcrelHi` anchors are codegen-internal labels on AUIPC
     85      * instructions, used only as the target of a paired `%pcrel_lo`
     86      * relocation. Many share the one name (one per AUIPC), so emitting them
     87      * verbatim defines the same label repeatedly and breaks re-assembly. The
     88      * symbolizer replaces each with a unique synthesized anchor label
     89      * (emit_anchor / ref_anchor), so suppress the originals here. */
     90     {
     91       Slice nm = pool_slice(c->global, sym->name);
     92       if (slice_eq_cstr(nm, ".LpcrelHi")) continue;
     93     }
     94 
     95     if (n == cap) {
     96       u32 ncap = cap ? cap * 2 : 8;
     97       SymLabel* nl = arena_array(c->tu, SymLabel, ncap);
     98       if (!nl) break;
     99       if (labels) memcpy(nl, labels, cap * sizeof(SymLabel));
    100       labels = nl;
    101       cap = ncap;
    102     }
    103     labels[n].offset = (u32)sym->value;
    104     labels[n].name = sym->name;
    105     labels[n].bind = sym->bind;
    106     labels[n].kind = sym->kind;
    107     labels[n].size = sym->size;
    108     ++n;
    109   }
    110   obj_symiter_free(it);
    111 
    112   if (n > 0) qsort(labels, n, sizeof(SymLabel), cmp_labels);
    113   *nlabels_out = n;
    114   return labels;
    115 }
    116 
    117 /* ---- Object-format-specific directive syntax (AsmSyntax vtable) --------
    118  *
    119  * The directives whose spelling differs by object format hang off this tiny
    120  * vtable, selected by c->target.obj (asm_syntax_for, defined below). Selecting
    121  * by format — not arch — is correct: an x64-ELF and an aa64-ELF `.s` use the
    122  * same `.type`/`.size`/`.section` directives. Everything else (.globl, .comm,
    123  * labels, data directives, disassembled instructions) is format-neutral and
    124  * stays in the shared emit path. The Mach-O methods make cc -S output
    125  * clang/llvm-mc-acceptable; the ELF methods are the historical spelling. */
    126 typedef struct {
    127   Writer* w;
    128   Compiler* c;
    129 } AsmSynCtx;
    130 
    131 typedef struct AsmSyntax {
    132   KitObjFmt obj;
    133   const char* name;
    134   /* Section-switch directive for `sec`: returns 1 = emitted, 0 = skip it. */
    135   int (*section_header)(const AsmSynCtx* x, const Section* sec);
    136   /* Symbol type annotation at a definition (ELF `.type`; Mach-O none). */
    137   void (*sym_type)(const AsmSynCtx* x, Sym name, u16 sym_kind);
    138   /* Symbol size after a function body (ELF `.size`; Mach-O none). */
    139   void (*sym_size)(const AsmSynCtx* x, Sym name);
    140   /* Section alignment directive for a power-of-two byte alignment. */
    141   void (*align)(const AsmSynCtx* x, u32 byte_align);
    142 } AsmSyntax;
    143 
    144 static KitStatus emit_label(const AsmSynCtx* x, const AsmSyntax* syn,
    145                             const SymLabel* lbl) {
    146   Writer* w = x->w;
    147   if (lbl->bind == SB_GLOBAL || lbl->bind == SB_WEAK) {
    148     w_str(w, "        .globl  ");
    149     w_sym(w, x->c, lbl->name);
    150     w_newline(w);
    151   }
    152   syn->sym_type(x, lbl->name, lbl->kind);
    153   w_sym(w, x->c, lbl->name);
    154   w_str(w, ":");
    155   return w_newline(w);
    156 }
    157 
    158 /* Emit `.comm`/`.lcomm` for tentative (common) symbols. These live in no output
    159  * section — the linker allocates .bss space at link time — so the section walk
    160  * never sees them; without this a `cc -S` that references a tentative global
    161  * (`int x;` at file scope) re-assembles to an undefined reference. Global
    162  * commons use `.comm name, size, align`; local ones `.lcomm`. */
    163 static KitStatus emit_common_symbols(Writer* w, Compiler* c, ObjBuilder* ob) {
    164   ObjSymIter* it = obj_symiter_new(ob);
    165   KitStatus st = KIT_OK;
    166   if (!it) return KIT_NOMEM;
    167   for (;;) {
    168     ObjSymEntry e;
    169     const ObjSym* sym;
    170     if (!obj_symiter_next(it, &e)) break;
    171     sym = e.sym;
    172     if (!sym || sym->removed || sym->kind != SK_COMMON || !sym->name) continue;
    173     st = w_str(w,
    174                sym->bind == SB_LOCAL ? "        .lcomm  " : "        .comm   ");
    175     if (st != KIT_OK) break;
    176     st = w_sym(w, c, sym->name);
    177     if (st != KIT_OK) break;
    178     st = w_str(w, ", ");
    179     if (st != KIT_OK) break;
    180     st = w_dec(w, sym->size);
    181     if (st != KIT_OK) break;
    182     if (sym->common_align > 1) {
    183       st = w_str(w, ", ");
    184       if (st != KIT_OK) break;
    185       st = w_dec(w, sym->common_align);
    186       if (st != KIT_OK) break;
    187     }
    188     st = w_newline(w);
    189     if (st != KIT_OK) break;
    190   }
    191   obj_symiter_free(it);
    192   return st;
    193 }
    194 
    195 static KitStatus emit_size_directives(const AsmSynCtx* x, const AsmSyntax* syn,
    196                                       ObjBuilder* ob, ObjSecId sec_id) {
    197   ObjSymIter* it = obj_symiter_new(ob);
    198   if (!it) return KIT_NOMEM;
    199 
    200   for (;;) {
    201     ObjSymEntry e;
    202     const ObjSym* sym;
    203     if (!obj_symiter_next(it, &e)) break;
    204     sym = e.sym;
    205     if (!sym || sym->removed) continue;
    206     if (sym->section_id != sec_id) continue;
    207     if (sym->kind != SK_FUNC) continue;
    208     if (sym->size == 0) continue;
    209     syn->sym_size(x, sym->name);
    210   }
    211   obj_symiter_free(it);
    212   return kit_writer_status((KitWriter*)x->w);
    213 }
    214 
    215 /* GNU-as flag letters for a named (SEC_OTHER) section's `, "flags"` operand.
    216  * The assembler's .section parser (src/asm/asm.c) is the inverse mapping. */
    217 static void w_secflags(Writer* w, u16 flags) {
    218   if (flags & SF_ALLOC) w_str(w, "a");
    219   if (flags & SF_WRITE) w_str(w, "w");
    220   if (flags & SF_EXEC) w_str(w, "x");
    221   if (flags & SF_MERGE) w_str(w, "M");
    222   if (flags & SF_STRINGS) w_str(w, "S");
    223   if (flags & SF_TLS) w_str(w, "T");
    224   if (flags & SF_RETAIN) w_str(w, "R");
    225 }
    226 
    227 /* log2 of a power-of-two byte alignment (>=1 → 0). */
    228 static u32 align_log2(u32 a) {
    229   u32 n = 0;
    230   if (a < 2) return 0;
    231   while ((a & 1u) == 0u && n < 31u) {
    232     a >>= 1;
    233     ++n;
    234   }
    235   return n;
    236 }
    237 
    238 /* ---- ELF directive syntax: the historical spelling (unchanged) ----------
    239  *
    240  * Returns 0 to skip a section cc -S does not round-trip (TLS variants,
    241  * SEC_DEBUG). SEC_OTHER (a global in a named section, e.g.
    242  * __attribute__((section(...)))) emits the real name plus its flags/type/
    243  * entsize in GNU-as syntax so the label and bytes survive re-assembly. */
    244 /* Emit `.section name, "flags", @type[, entsize]` (the GNU-as named-section
    245  * form). Used for SEC_OTHER and for any canonical-kind section whose name or
    246  * flags can't be reproduced by the bare `.text`/`.section .rodata` builtins. */
    247 static int elf_named_section(const AsmSynCtx* x, const Section* sec) {
    248   Writer* w = x->w;
    249   Slice nm = pool_slice(x->c->global, sec->name);
    250   if (nm.len == 0) return 0;
    251   w_str(w, "        .section\t");
    252   kit_writer_write(w, nm.s, nm.len);
    253   w_str(w, ", \"");
    254   w_secflags(w, sec->flags);
    255   w_str(w, "\", ");
    256   w_str(w, sec->sem == SSEM_NOBITS ? "@nobits" : "@progbits");
    257   if ((sec->flags & SF_MERGE) || sec->entsize) {
    258     w_str(w, ", ");
    259     w_dec(w, (u64)(sec->entsize ? sec->entsize : 1));
    260   }
    261   w_newline(w);
    262   return 1;
    263 }
    264 
    265 /* Does this canonical-kind section round-trip through its bare builtin
    266  * directive? Only if its name is exactly the canonical spelling and it carries
    267  * no flags that the builtin can't express (MERGE/STRINGS/RETAIN/entsize). A
    268  * `.rodata.foo.merge` mergeable-string section, for instance, must be spelled
    269  * in full or the linker won't merge/GC it the way the direct object does. */
    270 static int sec_is_canonical(const AsmSynCtx* x, const Section* sec,
    271                             const char* canon) {
    272   Slice nm = pool_slice(x->c->global, sec->name);
    273   if (sec->flags & (SF_MERGE | SF_STRINGS | SF_RETAIN)) return 0;
    274   if (sec->entsize) return 0;
    275   return slice_eq_cstr(nm, canon);
    276 }
    277 
    278 static int elf_section_header(const AsmSynCtx* x, const Section* sec) {
    279   Writer* w = x->w;
    280   if (sec->flags & SF_TLS) return 0;
    281   switch (sec->kind) {
    282     case SEC_TEXT:
    283       if (!sec_is_canonical(x, sec, ".text")) return elf_named_section(x, sec);
    284       w_str(w, "        .text");
    285       w_newline(w);
    286       return 1;
    287     case SEC_RODATA:
    288       if (!sec_is_canonical(x, sec, ".rodata"))
    289         return elf_named_section(x, sec);
    290       w_str(w, "        .section\t.rodata");
    291       w_newline(w);
    292       return 1;
    293     case SEC_DATA:
    294       if (!sec_is_canonical(x, sec, ".data")) return elf_named_section(x, sec);
    295       w_str(w, "        .section\t.data");
    296       w_newline(w);
    297       return 1;
    298     case SEC_BSS:
    299       if (!sec_is_canonical(x, sec, ".bss")) return elf_named_section(x, sec);
    300       w_str(w, "        .section\t.bss");
    301       w_newline(w);
    302       return 1;
    303     case SEC_OTHER:
    304       return elf_named_section(x, sec);
    305     default:
    306       return 0;
    307   }
    308 }
    309 
    310 static void elf_sym_type(const AsmSynCtx* x, Sym name, u16 kind) {
    311   const char* t = NULL;
    312   if (kind == SK_FUNC)
    313     t = ", @function";
    314   else if (kind == SK_OBJ || kind == SK_COMMON || kind == SK_TLS)
    315     t = ", @object";
    316   if (!t) return;
    317   w_str(x->w, "        .type   ");
    318   w_sym(x->w, x->c, name);
    319   w_str(x->w, t);
    320   w_newline(x->w);
    321 }
    322 
    323 static void elf_sym_size(const AsmSynCtx* x, Sym name) {
    324   w_str(x->w, "        .size   ");
    325   w_sym(x->w, x->c, name);
    326   w_str(x->w, ", .-");
    327   w_sym(x->w, x->c, name);
    328   w_newline(x->w);
    329 }
    330 
    331 static void elf_align(const AsmSynCtx* x, u32 byte_align) {
    332   w_str(x->w, "        .align  ");
    333   w_dec(x->w, (u64)byte_align);
    334   w_newline(x->w);
    335 }
    336 
    337 /* ---- Mach-O directive syntax: clang/llvm-mc-acceptable spelling ---------- */
    338 
    339 static int macho_section_header(const AsmSynCtx* x, const Section* sec) {
    340   Writer* w = x->w;
    341   if (sec->flags & SF_TLS) return 0; /* TLS not round-tripped today */
    342   switch (sec->kind) {
    343     case SEC_TEXT:
    344       w_str(w, "        .text"); /* Mach-O builtin */
    345       w_newline(w);
    346       return 1;
    347     case SEC_RODATA:
    348       w_str(w, "        .section\t");
    349       w_str(w, obj_macho_canon_secname(SEC_RODATA)); /* __TEXT,__const */
    350       w_newline(w);
    351       return 1;
    352     case SEC_DATA:
    353       w_str(w, "        .section\t");
    354       w_str(w, obj_macho_canon_secname(SEC_DATA)); /* __DATA,__data */
    355       w_newline(w);
    356       return 1;
    357     case SEC_BSS:
    358       /* clang accepts the `.bss` builtin; the shared zero-range path fills it
    359        * (avoids `.zerofill`'s per-symbol operand syntax). */
    360       w_str(w, "        .bss");
    361       w_newline(w);
    362       return 1;
    363     case SEC_OTHER: {
    364       Slice nm = pool_slice(x->c->global, sec->name);
    365       if (nm.len == 0) return 0;
    366       w_str(w, "        .section\t");
    367       if (memchr(nm.s, ',', nm.len)) {
    368         /* Already "__SEG,__sect" (codegen interns eh_frame this way on
    369          * Mach-O). Emit bare — no ELF `, "flags", @progbits` suffix. */
    370         kit_writer_write(w, nm.s, nm.len);
    371       } else {
    372         /* Defensive: a non-comma name on a Mach-O target. Spell it the way
    373          * the writer's name_to_seg_sect would (canonical kind, else
    374          * __DATA,<name-without-dot>) so text and binary agree. */
    375         const char* canon = obj_macho_canon_secname(sec->kind);
    376         if (canon) {
    377           w_str(w, canon);
    378         } else {
    379           w_str(w, "__DATA,");
    380           if (nm.s[0] == '.')
    381             kit_writer_write(w, nm.s + 1, nm.len - 1);
    382           else
    383             kit_writer_write(w, nm.s, nm.len);
    384         }
    385       }
    386       w_newline(w);
    387       return 1;
    388     }
    389     default:
    390       return 0;
    391   }
    392 }
    393 
    394 static void macho_sym_type(const AsmSynCtx* x, Sym name, u16 kind) {
    395   (void)x;
    396   (void)name;
    397   (void)kind; /* Mach-O derives symbol kind from the symbol table */
    398 }
    399 
    400 static void macho_sym_size(const AsmSynCtx* x, Sym name) {
    401   (void)x;
    402   (void)name; /* Mach-O has no `.size` */
    403 }
    404 
    405 static void macho_align(const AsmSynCtx* x, u32 byte_align) {
    406   /* Mach-O `.align`/`.p2align` are log2; cc -S emits `.p2align` so clang and
    407    * kit-as read it identically. */
    408   w_str(x->w, "        .p2align ");
    409   w_dec(x->w, (u64)align_log2(byte_align));
    410   w_newline(x->w);
    411 }
    412 
    413 static const AsmSyntax g_asm_syntax_elf = {
    414     KIT_OBJ_ELF,  "elf",        elf_section_header,
    415     elf_sym_type, elf_sym_size, elf_align,
    416 };
    417 static const AsmSyntax g_asm_syntax_macho = {
    418     KIT_OBJ_MACHO,  "macho",        macho_section_header,
    419     macho_sym_type, macho_sym_size, macho_align,
    420 };
    421 /* COFF text emission is not yet exercised by the cc -S lanes; alias the ELF
    422  * directive spelling for now (TODO COFF: .def/.scl/.type/.endef; COFF
    423  * `.section name, "flags"` has its own form). The seam exists so COFF is
    424  * pluggable without touching the printer. */
    425 static const AsmSyntax g_asm_syntax_coff = {
    426     KIT_OBJ_COFF, "coff",       elf_section_header,
    427     elf_sym_type, elf_sym_size, elf_align,
    428 };
    429 
    430 static const AsmSyntax* asm_syntax_for(KitObjFmt fmt) {
    431   switch (fmt) {
    432     case KIT_OBJ_MACHO:
    433       return &g_asm_syntax_macho;
    434     case KIT_OBJ_COFF:
    435       return &g_asm_syntax_coff;
    436     case KIT_OBJ_ELF:
    437     default:
    438       return &g_asm_syntax_elf; /* WASM has no textual-asm path */
    439   }
    440 }
    441 
    442 /* Emit a run of raw `.byte` lines for [start, end). */
    443 static KitStatus emit_raw_bytes(Writer* w, const u8* data, u32 start, u32 end) {
    444   u32 off;
    445   for (off = start; off < end; off += ASM_BYTES_PER_LINE) {
    446     u32 rem = end - off;
    447     u32 n = rem < ASM_BYTES_PER_LINE ? rem : ASM_BYTES_PER_LINE;
    448     u32 j;
    449     KitStatus st;
    450     st = w_str(w, "        .byte   0x");
    451     if (st != KIT_OK) return st;
    452     st = w_hex_byte(w, data[off]);
    453     if (st != KIT_OK) return st;
    454     for (j = 1; j < n; ++j) {
    455       st = w_str(w, ", 0x");
    456       if (st != KIT_OK) return st;
    457       st = w_hex_byte(w, data[off + j]);
    458       if (st != KIT_OK) return st;
    459     }
    460     st = w_newline(w);
    461     if (st != KIT_OK) return st;
    462   }
    463   return KIT_OK;
    464 }
    465 
    466 /* A reloc kind whose data field carries a symbol value reproducible by an
    467  * integer directive: maps to (directive, byte width, PC-relative?). The
    468  * assembler emits the matching R_ABS{32,64} for `.word`/`.quad SYM+addend` and
    469  * R_PC{32,64} for `.long`/`.quad SYM - .` (emit_int_directive), so the
    470  * round-tripped relocation matches codegen's. `*pcrel` selects the `SYM - .`
    471  * spelling (built by build_data_symref). Returns 0 for kinds with no
    472  * integer-directive spelling (caller keeps the raw bytes). */
    473 static int data_reloc_directive(u16 kind, const char** dir, u32* width,
    474                                 int* pcrel) {
    475   *pcrel = 0;
    476   switch (kind) {
    477     case R_ABS64:
    478       *dir = "        .quad   ";
    479       *width = 8;
    480       return 1;
    481     case R_PC64:
    482       *dir = "        .quad   ";
    483       *width = 8;
    484       *pcrel = 1;
    485       return 1;
    486     case R_ABS32:
    487       *dir = "        .word   ";
    488       *width = 4;
    489       return 1;
    490     case R_PC32:
    491       *dir = "        .long   ";
    492       *width = 4;
    493       *pcrel = 1;
    494       return 1;
    495     default:
    496       return 0;
    497   }
    498 }
    499 
    500 static KitStatus emit_zero_range(Writer* w, u32 size) {
    501   KitStatus st;
    502   if (size == 0) return KIT_OK;
    503   st = w_str(w, "        .zero   ");
    504   if (st != KIT_OK) return st;
    505   st = w_dec(w, (u64)size);
    506   if (st != KIT_OK) return st;
    507   return w_newline(w);
    508 }
    509 
    510 /* ---- Phase 2 symbolization: reloc-driven operand substitution ----------
    511  *
    512  * `cc -S` must be re-assemblable. The disassembler renders relocated operands
    513  * numerically (e.g. `bl 0x10`, `adrp x16, 0x0`, `ldr w8, [x16]`), which would
    514  * branch to the wrong place or load from address 0 on re-assembly. Here we
    515  * consult the section's relocation table and rewrite the covered operand into
    516  * the relocation-operator syntax the assembler parses (the inverse of
    517  * src/arch/aa64/asm.c's parse_reloc_mod). See doc/TESTING.md.
    518  *
    519  * Operand text is rewritten in place rather than re-rendered from decoded
    520  * fields, so the register names the disassembler produced are preserved and
    521  * this layer stays free of per-arch register-naming knowledge. The reloc
    522  * kind alone selects the modifier and the operand shape to patch. */
    523 
    524 typedef struct {
    525   u32 offset;
    526   u16 kind;
    527   Sym sym;
    528   i64 addend;
    529 } SecReloc;
    530 
    531 static int cmp_secreloc(const void* va, const void* vb) {
    532   const SecReloc* a = (const SecReloc*)va;
    533   const SecReloc* b = (const SecReloc*)vb;
    534   if (a->offset < b->offset) return -1;
    535   if (a->offset > b->offset) return 1;
    536   return 0;
    537 }
    538 
    539 static SecReloc* collect_relocs(Compiler* c, ObjBuilder* ob, ObjSecId sec_id,
    540                                 u32* n_out) {
    541   u32 total = obj_reloc_total(ob);
    542   u32 n = 0, cap = 0, i;
    543   SecReloc* arr = NULL;
    544 
    545   *n_out = 0;
    546   for (i = 0; i < total; ++i) {
    547     const Reloc* r = obj_reloc_at(ob, i);
    548     const ObjSym* s;
    549     if (!r || r->removed) continue;
    550     if (r->section_id != sec_id) continue;
    551     if (n == cap) {
    552       u32 ncap = cap ? cap * 2 : 8;
    553       SecReloc* na = arena_array(c->tu, SecReloc, ncap);
    554       if (!na) break;
    555       if (arr) memcpy(na, arr, cap * sizeof(SecReloc));
    556       arr = na;
    557       cap = ncap;
    558     }
    559     s = obj_symbol_get(ob, r->sym);
    560     arr[n].offset = r->offset;
    561     arr[n].kind = r->kind;
    562     arr[n].sym = s ? s->name : (Sym)0;
    563     arr[n].addend = r->addend;
    564     ++n;
    565   }
    566   if (n > 1) qsort(arr, n, sizeof(SecReloc), cmp_secreloc);
    567   *n_out = n;
    568   return arr;
    569 }
    570 
    571 /* First relocation whose offset lies within instruction [off, off+len). */
    572 static const SecReloc* reloc_in_range(const SecReloc* r, u32 n, u32 off,
    573                                       u32 len) {
    574   u32 i;
    575   for (i = 0; i < n; ++i)
    576     if (r[i].offset >= off && r[i].offset < off + len) return &r[i];
    577   return NULL;
    578 }
    579 
    580 /* The reloc-kind → operand-syntax mapping now lives in the arch backend
    581  * (ArchAsmOps.reloc_operand, src/arch/<arch>/asm.c), reached via
    582  * arch_reloc_operand(). This keeps the printer arch-agnostic and format-aware:
    583  * aarch64 ELF spells `:lo12:sym`, Mach-O spells `sym@PAGEOFF`. */
    584 
    585 /* A `.L`-prefixed name is an assembler-local label (e.g. `.Lkit_ro.0`,
    586  * `.Lkit_jt.0`): the assembler's lexer accepts it as an identifier. Other
    587  * `.`-prefixed names (section symbols like `.text`, `.rodata`) are not yet
    588  * re-assemblable as operands, so the symbolizer keeps the numeric form. */
    589 static int sym_is_assemblable(Slice s) {
    590   if (s.len == 0) return 0;
    591   if (s.s[0] != '.') return 1;
    592   return s.len >= 2 && s.s[1] == 'L';
    593 }
    594 
    595 /* Build "<prefix><sym>[+/-addend]<suffix>" into buf. Returns length, or -1 if
    596  * the symbol has no usable name (anonymous, or a `.`-prefixed section symbol
    597  * the assembler's expression parser does not accept). The modifier is a prefix
    598  * (ELF `:lo12:sym`) or a suffix (Mach-O `sym@PAGEOFF`), per the arch/format;
    599  * an addend lands before the suffix (`sym+8@PAGEOFF`), which both clang and
    600  * kit-as parse. */
    601 static int build_symref(char* buf, u32 cap, Compiler* c,
    602                         const ArchRelocOperand* ro, Sym name, i64 addend) {
    603   Slice s;
    604   u32 p = 0, i;
    605   if (!name) return -1;
    606   s = pool_slice(c->global, name);
    607   if (!sym_is_assemblable(s)) return -1;
    608   /* Undo any instruction-encoding addend bias so the printed offset is the
    609    * symbol offset (x86-64 rel32 relocs store addend-4; the assembler re-applies
    610    * the -4, so emit `sym` for a stored -4). */
    611   addend += ro->addend_bias;
    612   for (i = 0; ro->prefix[i] && p + 1 < cap; ++i) buf[p++] = ro->prefix[i];
    613   for (i = 0; i < s.len && p + 1 < cap; ++i) buf[p++] = s.s[i];
    614   if (addend != 0) {
    615     char num[24];
    616     u32 nl = 0;
    617     u64 mag = addend < 0 ? (u64)(-(addend)) : (u64)addend;
    618     if (p + 1 < cap) buf[p++] = addend < 0 ? '-' : '+';
    619     do {
    620       num[nl++] = (char)('0' + (u32)(mag % 10));
    621       mag /= 10;
    622     } while (mag && nl < sizeof(num));
    623     while (nl && p + 1 < cap) buf[p++] = num[--nl];
    624   }
    625   for (i = 0; ro->suffix[i] && p + 1 < cap; ++i) buf[p++] = ro->suffix[i];
    626   buf[p] = '\0';
    627   return (int)p;
    628 }
    629 
    630 /* Position of the "(%rip)" substring in [ops, ops+olen), or -1. */
    631 static i32 find_rip(const char* ops, u32 olen) {
    632   u32 i;
    633   if (olen < 6) return -1;
    634   for (i = 0; i + 6 <= olen; ++i)
    635     if (memcmp(ops + i, "(%rip)", 6) == 0) return (i32)i;
    636   return -1;
    637 }
    638 
    639 /* Write `ops` with the relocated operand rewritten to `symref`. The surgery
    640  * site is chosen from the operand text first: an x86-64 `disp(%rip)` operand
    641  * always takes RIP surgery (insert sym before the displacement), regardless of
    642  * `surg`. Otherwise `surg` selects: TAIL replaces the last comma-separated
    643  * component (or the whole operand if there is no comma — branch targets); MEM
    644  * rewrites the offset inside [...] (aarch64 ldst). */
    645 static KitStatus w_symbolized(Writer* w, const char* ops, u32 olen,
    646                               const char* symref, ArchRelocSurg surg) {
    647   i32 rip = find_rip(ops, olen);
    648   if (rip >= 0) surg = ARCH_RELOC_SURG_RIP;
    649   if (surg == ARCH_RELOC_SURG_RIP) {
    650     /* `[disp](%rip)[, ...]` -> `symref(%rip)[, ...]`: replace any numeric
    651      * displacement immediately before `(%rip)` with symref. */
    652     i32 ds = rip; /* start of the displacement run before the '(' */
    653     KitStatus st;
    654     while (ds > 0) {
    655       char ch = ops[ds - 1];
    656       if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
    657           (ch >= 'A' && ch <= 'F') || ch == 'x' || ch == '-' || ch == '+')
    658         --ds;
    659       else
    660         break;
    661     }
    662     st = kit_writer_write(w, ops, (u32)ds); /* text before the displacement */
    663     if (st != KIT_OK) return st;
    664     st = w_str(w, symref);
    665     if (st != KIT_OK) return st;
    666     return kit_writer_write(w, ops + rip, olen - (u32)rip); /* "(%rip)..." */
    667   }
    668   if (surg == ARCH_RELOC_SURG_TAIL) {
    669     i32 last_comma = -1;
    670     u32 i;
    671     for (i = 0; i < olen; ++i)
    672       if (ops[i] == ',') last_comma = (i32)i;
    673     if (last_comma < 0) return w_str(w, symref);
    674     {
    675       KitStatus st = kit_writer_write(w, ops, (u32)last_comma);
    676       if (st != KIT_OK) return st;
    677       st = w_str(w, ", ");
    678       if (st != KIT_OK) return st;
    679       return w_str(w, symref);
    680     }
    681   }
    682   if (surg == ARCH_RELOC_SURG_RV_LO12) {
    683     /* RISC-V low-half: a `disp(base)` memory form rewrites the displacement;
    684      * a register-immediate form appends the modifier as a new operand. The
    685      * memory form is recognized by a trailing `(...)` group. */
    686     i32 lp = -1, rp = -1;
    687     u32 i;
    688     for (i = 0; i < olen; ++i) {
    689       if (ops[i] == '(')
    690         lp = (i32)i;
    691       else if (ops[i] == ')')
    692         rp = (i32)i;
    693     }
    694     if (lp >= 0 && rp > lp && (u32)(rp + 1) == olen) {
    695       /* `..., <disp>(base)` -> `..., symref(base)`: replace the displacement
    696        * run that ends immediately before the '('. */
    697       i32 ds = lp; /* start of the displacement run */
    698       KitStatus st;
    699       while (ds > 0) {
    700         char ch = ops[ds - 1];
    701         if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
    702             (ch >= 'A' && ch <= 'F') || ch == 'x' || ch == '-' || ch == '+')
    703           --ds;
    704         else
    705           break;
    706       }
    707       st = kit_writer_write(w, ops, (u32)ds); /* "..., " before the disp */
    708       if (st != KIT_OK) return st;
    709       st = w_str(w, symref);
    710       if (st != KIT_OK) return st;
    711       return kit_writer_write(w, ops + lp, olen - (u32)lp); /* "(base)" */
    712     }
    713     /* Register-immediate (e.g. `mv rd, rs`): append symref as a new operand. */
    714     {
    715       KitStatus st = kit_writer_write(w, ops, olen);
    716       if (st != KIT_OK) return st;
    717       st = w_str(w, ", ");
    718       if (st != KIT_OK) return st;
    719       return w_str(w, symref);
    720     }
    721   }
    722   /* SURG_MEM: keep the base register, set the offset to symref. */
    723   {
    724     i32 lb = -1, rb = -1, base_end;
    725     u32 i;
    726     KitStatus st;
    727     for (i = 0; i < olen; ++i) {
    728       if (ops[i] == '[')
    729         lb = (i32)i;
    730       else if (ops[i] == ']')
    731         rb = (i32)i;
    732     }
    733     if (lb < 0 || rb < 0 || rb < lb) /* unexpected shape; emit verbatim */
    734       return kit_writer_write(w, ops, olen);
    735     base_end = lb + 1;
    736     while (base_end < rb && ops[base_end] != ',') ++base_end;
    737     st = kit_writer_write(w, ops, (u32)(base_end)); /* "...[Rn" */
    738     if (st != KIT_OK) return st;
    739     st = w_str(w, ", ");
    740     if (st != KIT_OK) return st;
    741     st = w_str(w, symref);
    742     if (st != KIT_OK) return st;
    743     st = w_str(w, "]");
    744     if (st != KIT_OK) return st;
    745     /* trailing text after the close bracket (e.g. nothing for reloc'd ldst) */
    746     if ((u32)(rb + 1) < olen)
    747       return kit_writer_write(w, ops + rb + 1, olen - (u32)(rb + 1));
    748     return KIT_OK;
    749   }
    750 }
    751 
    752 /* ---- Phase 2 symbolization: intra-function branch labels ---------------
    753  *
    754  * Branches that stay within the section carry no relocation — the
    755  * disassembler renders the resolved target numerically (`b 0x60`). The
    756  * assembler rejects a numeric branch target, so re-assembly needs a label.
    757  * We pre-scan the section for such branch targets, synthesize a local label
    758  * at each, and rewrite the branch operand to reference it.
    759  *
    760  * Synthesized names are `Lcf_<secidx>_<hexoff>` — deliberately not `.L`
    761  * prefixed, since the assembler's expression parser does not currently accept
    762  * `.`-led identifiers as operands. The names are unique within the file. */
    763 
    764 static int cmp_u32(const void* va, const void* vb) {
    765   u32 a = *(const u32*)va, b = *(const u32*)vb;
    766   if (a < b) return -1;
    767   if (a > b) return 1;
    768   return 0;
    769 }
    770 
    771 /* Which mnemonics are intra-section local branches (target codegen resolved in
    772  * place, no relocation) is arch-specific: routed through arch_is_local_branch
    773  * (ArchAsmOps.is_local_branch). The disassembler renders such a target
    774  * numerically; we synthesize a label at it so the operand re-assembles. */
    775 
    776 /* Parse the trailing `0x<hex>` branch-target operand (the last comma-separated
    777  * component). Returns 1 and the value on success. */
    778 static int parse_hex_tail(KitSlice ops, u64* out) {
    779   i32 start = 0, p;
    780   u64 v = 0;
    781   u32 i;
    782   int any = 0;
    783   for (i = 0; i < ops.len; ++i)
    784     if (ops.s[i] == ',') start = (i32)i + 1;
    785   while (start < (i32)ops.len && (ops.s[start] == ' ' || ops.s[start] == '\t'))
    786     ++start;
    787   if (start + 2 > (i32)ops.len || ops.s[start] != '0' ||
    788       (ops.s[start + 1] | 32) != 'x')
    789     return 0;
    790   for (p = start + 2; p < (i32)ops.len; ++p) {
    791     char c = ops.s[p];
    792     u32 d;
    793     if (c >= '0' && c <= '9')
    794       d = (u32)(c - '0');
    795     else if ((c | 32) >= 'a' && (c | 32) <= 'f')
    796       d = (u32)((c | 32) - 'a' + 10);
    797     else
    798       break;
    799     v = v * 16 + d;
    800     any = 1;
    801   }
    802   while (p < (i32)ops.len && ops.s[p] == ' ') ++p;
    803   if (!any || p != (i32)ops.len) return 0;
    804   *out = v;
    805   return 1;
    806 }
    807 
    808 /* Parse a local-branch target from the operand tail into an absolute,
    809  * section-relative offset. Two render forms occur:
    810  *   "0x<hex>" — an absolute target (the printer had the instruction's address)
    811  *   "#<dec>"  — a PC-relative displacement, emitted when the printer had no
    812  *               address context, i.e. the instruction sits at section offset 0
    813  *               (a branch as the first instruction of .text, which -O1 leaf
    814  *               functions can produce). The absolute target is inst_off + disp.
    815  * Returns 1 and sets *out to the absolute target. */
    816 static int parse_branch_tail(KitSlice ops, u32 inst_off, u64* out) {
    817   i32 start = 0, p;
    818   i64 v = 0;
    819   int neg = 0, any = 0;
    820   u32 i;
    821   if (parse_hex_tail(ops, out)) return 1;
    822   for (i = 0; i < ops.len; ++i)
    823     if (ops.s[i] == ',') start = (i32)i + 1;
    824   while (start < (i32)ops.len && (ops.s[start] == ' ' || ops.s[start] == '\t'))
    825     ++start;
    826   if (start >= (i32)ops.len || ops.s[start] != '#') return 0;
    827   p = start + 1;
    828   if (p < (i32)ops.len && (ops.s[p] == '-' || ops.s[p] == '+'))
    829     neg = (ops.s[p++] == '-');
    830   for (; p < (i32)ops.len; ++p) {
    831     char c = ops.s[p];
    832     if (c < '0' || c > '9') break;
    833     v = v * 10 + (c - '0');
    834     any = 1;
    835   }
    836   while (p < (i32)ops.len && ops.s[p] == ' ') ++p;
    837   if (!any || p != (i32)ops.len) return 0;
    838   if (neg) v = -v;
    839   *out = (u64)((i64)inst_off + v);
    840   return 1;
    841 }
    842 
    843 typedef struct {
    844   Compiler* c;
    845   u32 secidx;
    846   const SecReloc* relocs;
    847   u32 nrelocs;
    848   const SymLabel* labels;
    849   u32 nlabels;
    850   const u32* btargets;
    851   u32 nbt;
    852 } EmitCtx;
    853 
    854 static u32 fmt_u64(char* buf, u32 p, u32 cap, u64 v, u32 base) {
    855   char tmp[24];
    856   u32 n = 0;
    857   do {
    858     u32 d = (u32)(v % base);
    859     tmp[n++] = (char)(d < 10 ? '0' + d : 'a' + d - 10);
    860     v /= base;
    861   } while (v && n < sizeof tmp);
    862   while (n && p + 1 < cap) buf[p++] = tmp[--n];
    863   return p;
    864 }
    865 
    866 /* Synthesized label spelling, shared by definition and reference sites. */
    867 static u32 fmt_synth_label(char* buf, u32 cap, u32 secidx, u32 off) {
    868   u32 p = 0;
    869   const char* pre = "Lcf_";
    870   u32 i;
    871   for (i = 0; pre[i] && p + 1 < cap; ++i) buf[p++] = pre[i];
    872   p = fmt_u64(buf, p, cap, secidx, 10);
    873   if (p + 1 < cap) buf[p++] = '_';
    874   p = fmt_u64(buf, p, cap, off, 16);
    875   buf[p] = '\0';
    876   return p;
    877 }
    878 
    879 /* Synthesized hi/lo anchor label spelling (RISC-V `%pcrel_hi`/`%pcrel_lo`
    880  * pairing). The high-half reloc defines `.Lpcrel_hi_<secidx>_<off>` at its
    881  * AUIPC; the paired low-half reloc references it. `.L`-prefixed so the
    882  * assembler's lexer accepts it and the linker treats it as local. */
    883 static u32 fmt_anchor_label(char* buf, u32 cap, u32 secidx, u32 off) {
    884   u32 p = 0;
    885   const char* pre = ".Lpcrel_hi_";
    886   u32 i;
    887   for (i = 0; pre[i] && p + 1 < cap; ++i) buf[p++] = pre[i];
    888   p = fmt_u64(buf, p, cap, secidx, 10);
    889   if (p + 1 < cap) buf[p++] = '_';
    890   p = fmt_u64(buf, p, cap, off, 16);
    891   buf[p] = '\0';
    892   return p;
    893 }
    894 
    895 /* The offset of the high-half (anchor-emitting) relocation paired with a
    896  * low-half reloc at `lo_off`: the nearest preceding reloc whose
    897  * ArchRelocOperand sets emit_anchor. kit's codegen always emits the AUIPC
    898  * immediately before its paired ADDI/load, so the nearest preceding anchor is
    899  * the correct one. Returns 1 and *hi_off on success. */
    900 static int find_anchor_for_lo12(const EmitCtx* x, u32 lo_off, u32* hi_off) {
    901   u32 i;
    902   int found = 0;
    903   u32 best = 0;
    904   for (i = 0; i < x->nrelocs; ++i) {
    905     ArchRelocOperand ro = {0}; /* zero emit_anchor/ref_anchor: arches that
    906                                 * don't set them must read as 0 (rv64-only) */
    907     if (x->relocs[i].offset >= lo_off) break;
    908     if (arch_reloc_operand(x->c, x->relocs[i].kind, &ro) && ro.emit_anchor) {
    909       best = x->relocs[i].offset;
    910       found = 1;
    911     }
    912   }
    913   if (found && hi_off) *hi_off = best;
    914   return found;
    915 }
    916 
    917 /* Non-dot symbol name defined at `off`, or NULL. Such a symbol is used as the
    918  * branch label directly (no synthesized label needed). */
    919 static Sym symbol_at(const EmitCtx* x, u32 off) {
    920   u32 i;
    921   for (i = 0; i < x->nlabels; ++i) {
    922     if (x->labels[i].offset == off && x->labels[i].name) {
    923       Slice s = pool_slice(x->c->global, x->labels[i].name);
    924       if (sym_is_assemblable(s)) return x->labels[i].name;
    925     }
    926   }
    927   return (Sym)0;
    928 }
    929 
    930 /* Label name for a branch target offset: an existing symbol if one is defined
    931  * there, else the synthesized `Lcf_...` name. */
    932 static u32 build_label_name(char* buf, u32 cap, const EmitCtx* x, u32 off) {
    933   Sym sym = symbol_at(x, off);
    934   if (sym) {
    935     Slice s = pool_slice(x->c->global, sym);
    936     u32 p = 0, i;
    937     for (i = 0; i < s.len && p + 1 < cap; ++i) buf[p++] = s.s[i];
    938     buf[p] = '\0';
    939     return p;
    940   }
    941   return fmt_synth_label(buf, cap, x->secidx, off);
    942 }
    943 
    944 static int is_btarget(const EmitCtx* x, u32 off) {
    945   u32 i;
    946   for (i = 0; i < x->nbt; ++i)
    947     if (x->btargets[i] == off) return 1;
    948   return 0;
    949 }
    950 
    951 /* Append `off` to a dynamic, deduplicated anchor array (arena-grown). */
    952 static void anchor_add(Compiler* c, u32** arr, u32* n, u32* cap, u32 off) {
    953   u32 j;
    954   for (j = 0; j < *n; ++j)
    955     if ((*arr)[j] == off) return;
    956   if (*n == *cap) {
    957     u32 nc = *cap ? *cap * 2 : 8;
    958     u32* na = arena_array(c->tu, u32, nc);
    959     if (!na) return;
    960     if (*arr) memcpy(na, *arr, *cap * sizeof(u32));
    961     *arr = na;
    962     *cap = nc;
    963   }
    964   (*arr)[(*n)++] = off;
    965 }
    966 
    967 /* Pre-scan: collect in-section branch targets of un-relocated local branches,
    968  * so cc -S synthesizes a label there and the branch re-assembles. Code-location
    969  * references that must survive a re-encoding assembler (switch jump-table
    970  * entries, `&&label` address-takes) are NOT handled here — codegen emits them
    971  * as relocations against per-block local symbols (mc_label_symbol), so the
    972  * normal reloc-operand path symbolizes them and the target label is a real
    973  * symbol. */
    974 static u32* collect_branch_targets(Compiler* c, ArchDisasm* dasm,
    975                                    const SecReloc* relocs, u32 nrelocs,
    976                                    const u8* data, u32 total, u32* n_out) {
    977   u32* arr = NULL;
    978   u32 n = 0, cap = 0, off = 0;
    979 
    980   *n_out = 0;
    981   while (off < total) {
    982     KitInsn insn;
    983     u32 nb = arch_disasm_decode(dasm, data + off, total - off, (u64)off, &insn);
    984     u64 tgt;
    985     if (nb == 0) {
    986       off += 1;
    987       continue;
    988     }
    989     if (!reloc_in_range(relocs, nrelocs, off, nb) &&
    990         arch_is_local_branch(c, insn.mnemonic) &&
    991         parse_branch_tail(insn.operands, off, &tgt) && tgt < total) {
    992       anchor_add(c, &arr, &n, &cap, (u32)tgt);
    993     }
    994     off += nb;
    995   }
    996 
    997   if (n > 1) qsort(arr, n, sizeof(u32), cmp_u32);
    998   *n_out = n;
    999   return arr;
   1000 }
   1001 
   1002 /* Emit an instruction's operands, symbolizing a covering relocation or an
   1003  * intra-section branch target when present. */
   1004 static KitStatus emit_operands(Writer* w, const EmitCtx* x, const KitInsn* insn,
   1005                                u32 off) {
   1006   const SecReloc* r;
   1007   if (!insn->operands.len) return KIT_OK;
   1008   r = reloc_in_range(x->relocs, x->nrelocs, off, insn->nbytes);
   1009   if (r) {
   1010     ArchRelocOperand ro = {0}; /* zero emit_anchor/ref_anchor: arches that
   1011                                 * don't set them must read as 0 (rv64-only) */
   1012     if (arch_reloc_operand(x->c, r->kind, &ro)) {
   1013       char symref[256];
   1014       /* A low-half reloc (RISC-V `%pcrel_lo`) names the paired high-half's
   1015        * synthesized anchor label, not the reloc's own (`.LpcrelHi`) symbol. */
   1016       if (ro.ref_anchor) {
   1017         u32 hi_off;
   1018         if (find_anchor_for_lo12(x, off, &hi_off)) {
   1019           char name[256];
   1020           u32 p = 0, i;
   1021           for (i = 0; ro.prefix[i] && p + 1 < sizeof name; ++i)
   1022             name[p++] = ro.prefix[i];
   1023           p += fmt_anchor_label(name + p, (u32)sizeof name - p, x->secidx,
   1024                                 hi_off);
   1025           for (i = 0; ro.suffix[i] && p + 1 < sizeof name; ++i)
   1026             name[p++] = ro.suffix[i];
   1027           name[p] = '\0';
   1028           return w_symbolized(w, insn->operands.s, insn->operands.len, name,
   1029                               ro.surg);
   1030         }
   1031         /* No anchor found (unexpected): fall through to keep numeric. */
   1032       } else if (build_symref(symref, sizeof symref, x->c, &ro, r->sym,
   1033                               r->addend) >= 0) {
   1034         return w_symbolized(w, insn->operands.s, insn->operands.len, symref,
   1035                             ro.surg);
   1036       }
   1037     }
   1038   } else if (arch_is_local_branch(x->c, insn->mnemonic)) {
   1039     u64 tgt;
   1040     if (parse_branch_tail(insn->operands, off, &tgt) &&
   1041         is_btarget(x, (u32)tgt)) {
   1042       char name[256];
   1043       build_label_name(name, sizeof name, x, (u32)tgt);
   1044       return w_symbolized(w, insn->operands.s, insn->operands.len, name,
   1045                           ARCH_RELOC_SURG_TAIL);
   1046     }
   1047   }
   1048   return kit_writer_write(w, insn->operands.s, insn->operands.len);
   1049 }
   1050 
   1051 /* Emit a data range, rendering any covered relocation as a symbolic integer
   1052  * directive (`.quad sym+addend`) so cc -S | as reproduces the data relocation
   1053  * table — switch jump tables (`.quad .Lcfblk.*` against per-block local
   1054  * symbols) and any other relocated rodata/data. A reloc kind with no
   1055  * integer-directive form, or a target the assembler can't spell, falls back to
   1056  * raw `.byte`; the dropped reloc then surfaces in the round-trip's reloc
   1057  * comparison. `relocs` is the section's relocation list, sorted by offset. */
   1058 static KitStatus emit_data_range(Writer* w, Compiler* c, const u8* data,
   1059                                  u32 start, u32 end, const SecReloc* relocs,
   1060                                  u32 nrelocs) {
   1061   u32 off = start;
   1062   while (off < end) {
   1063     const SecReloc* r = NULL;
   1064     u32 next = end;
   1065     u32 i;
   1066     /* Find a reloc starting at `off`, and the offset of the next reloc that
   1067      * starts strictly after `off` (which bounds the raw-byte run). */
   1068     for (i = 0; i < nrelocs; ++i) {
   1069       if (relocs[i].offset == off) {
   1070         r = &relocs[i];
   1071       } else if (relocs[i].offset > off && relocs[i].offset < next) {
   1072         next = relocs[i].offset;
   1073       }
   1074     }
   1075     if (r) {
   1076       const char* dir;
   1077       u32 width;
   1078       int pcrel;
   1079       char symref[256];
   1080       /* Data relocations spell the bare symbol (`.quad sym+addend`): no
   1081        * page/lo12-style operand modifier on either format. A PC-relative
   1082        * reloc adds a trailing ` - .` (location counter) so the assembler
   1083        * re-derives R_PC{32,64} instead of an absolute reloc. */
   1084       ArchRelocOperand bare = {ARCH_RELOC_SURG_NONE, "", "", 0, 0, 0};
   1085       if (data_reloc_directive(r->kind, &dir, &width, &pcrel) &&
   1086           off + width <= end &&
   1087           build_symref(symref, sizeof symref, c, &bare, r->sym, r->addend) >=
   1088               0) {
   1089         KitStatus st = w_str(w, dir);
   1090         if (st != KIT_OK) return st;
   1091         st = w_str(w, symref);
   1092         if (st != KIT_OK) return st;
   1093         if (pcrel) {
   1094           st = w_str(w, " - .");
   1095           if (st != KIT_OK) return st;
   1096         }
   1097         st = w_newline(w);
   1098         if (st != KIT_OK) return st;
   1099         off += width;
   1100         continue;
   1101       }
   1102       /* Unsupported kind / unspellable target: keep raw bytes for this slot
   1103        * (advance to the next reloc boundary so we don't re-handle it). */
   1104     }
   1105     if (next <= off) next = end;
   1106     {
   1107       KitStatus st = emit_raw_bytes(w, data, off, next);
   1108       if (st != KIT_OK) return st;
   1109     }
   1110     off = next;
   1111   }
   1112   return KIT_OK;
   1113 }
   1114 
   1115 static KitStatus emit_disasm_range(Writer* w, const EmitCtx* x,
   1116                                    ArchDisasm* dasm, const u8* data, u32 start,
   1117                                    u32 end) {
   1118   u32 off = start;
   1119   KitStatus st;
   1120 
   1121   while (off < end) {
   1122     KitInsn insn;
   1123     u64 vaddr = (u64)off;
   1124     u32 n = arch_disasm_decode(dasm, data + off, end - off, vaddr, &insn);
   1125 
   1126     if (n == 0) {
   1127       st = w_str(w, "        .byte   0x");
   1128       if (st != KIT_OK) return st;
   1129       st = w_hex_byte(w, data[off]);
   1130       if (st != KIT_OK) return st;
   1131       st = w_newline(w);
   1132       if (st != KIT_OK) return st;
   1133       off += 1;
   1134       continue;
   1135     }
   1136 
   1137     /* Call-pair fusion (RISC-V R_RV_CALL): a reloc on this instruction whose
   1138      * arch fuses it with the FOLLOWING instruction into a single `call`/`tail
   1139      * sym` pseudo. Probe the partner for the call-vs-tail decision, emit one
   1140      * line, and skip both. Decoding the partner reuses the disassembler's
   1141      * buffers (clobbering `insn`), so build the symref first and re-decode
   1142      * `insn` when the pair is not fused. */
   1143     {
   1144       const SecReloc* cr = reloc_in_range(x->relocs, x->nrelocs, off, n);
   1145       char symref[256];
   1146       ArchRelocOperand bare = {ARCH_RELOC_SURG_TAIL, "", "", 0, 0, 0};
   1147       if (cr && off + n < end &&
   1148           build_symref(symref, sizeof symref, x->c, &bare, cr->sym,
   1149                        cr->addend) >= 0) {
   1150         KitInsn partner;
   1151         u32 pn = arch_disasm_decode(dasm, data + off + n, end - (off + n),
   1152                                     (u64)(off + n), &partner);
   1153         const char* mn = NULL;
   1154         if (pn && arch_reloc_call_pair(x->c, cr->kind, partner.mnemonic,
   1155                                        partner.operands, &mn)) {
   1156           st = w_str(w, "\t");
   1157           if (st != KIT_OK) return st;
   1158           st = w_str(w, mn);
   1159           if (st != KIT_OK) return st;
   1160           st = w_str(w, "\t");
   1161           if (st != KIT_OK) return st;
   1162           st = w_str(w, symref);
   1163           if (st != KIT_OK) return st;
   1164           st = w_newline(w);
   1165           if (st != KIT_OK) return st;
   1166           off += n + pn;
   1167           continue;
   1168         }
   1169         /* Not fused: the partner probe clobbered `insn`; re-decode it. */
   1170         (void)arch_disasm_decode(dasm, data + off, end - off, vaddr, &insn);
   1171       }
   1172     }
   1173 
   1174     /* A high-half reloc (RISC-V AUIPC `%pcrel_hi`/`%got_pcrel_hi`) needs a
   1175      * unique local anchor label here so the paired `%pcrel_lo` can name it. */
   1176     {
   1177       const SecReloc* hr = reloc_in_range(x->relocs, x->nrelocs, off, n);
   1178       if (hr) {
   1179         ArchRelocOperand ro = {
   1180             0}; /* zero emit_anchor/ref_anchor: arches that
   1181                  * don't set them must read as 0 (rv64-only) */
   1182         if (arch_reloc_operand(x->c, hr->kind, &ro) && ro.emit_anchor) {
   1183           char name[256];
   1184           fmt_anchor_label(name, sizeof name, x->secidx, off);
   1185           st = w_str(w, name);
   1186           if (st != KIT_OK) return st;
   1187           st = w_str(w, ":");
   1188           if (st != KIT_OK) return st;
   1189           st = w_newline(w);
   1190           if (st != KIT_OK) return st;
   1191         }
   1192       }
   1193     }
   1194 
   1195     st = w_str(w, "\t");
   1196     if (st != KIT_OK) return st;
   1197     {
   1198       /* De-alias a relocated `mv rd, rs` — an ADDI whose %pcrel_lo/%lo
   1199        * immediate the disassembler aliased to `mv` because the encoded imm is
   1200        * 0 — to the canonical `addi rd, rs, %lo(...)`. The RV_LO12 surgery in
   1201        * emit_operands appends the `%lo(...)` as the third operand, and a
   1202        * 3-operand `mv` is non-standard (clang rejects it). */
   1203       KitSlice mn = insn.mnemonic;
   1204       if (mn.len == 2 && mn.s[0] == 'm' && mn.s[1] == 'v') {
   1205         const SecReloc* lr = reloc_in_range(x->relocs, x->nrelocs, off, n);
   1206         ArchRelocOperand ro = {0};
   1207         if (lr && arch_reloc_operand(x->c, lr->kind, &ro) &&
   1208             ro.surg == ARCH_RELOC_SURG_RV_LO12) {
   1209           mn.s = "addi";
   1210           mn.len = 4;
   1211         }
   1212       }
   1213       st = kit_writer_write(w, mn.s, mn.len);
   1214       if (st != KIT_OK) return st;
   1215     }
   1216     if (insn.operands.len) {
   1217       st = w_str(w, "\t");
   1218       if (st != KIT_OK) return st;
   1219       st = emit_operands(w, x, &insn, off);
   1220       if (st != KIT_OK) return st;
   1221     }
   1222     st = w_newline(w);
   1223     if (st != KIT_OK) return st;
   1224 
   1225     off += n;
   1226   }
   1227   return KIT_OK;
   1228 }
   1229 
   1230 KitStatus kit_obj_builder_emit_asm(KitObjBuilder* builder, KitWriter* out_w) {
   1231   ObjBuilder* ob = (ObjBuilder*)builder;
   1232   Compiler* c;
   1233   Writer* w;
   1234   const AsmSyntax* syn;
   1235   AsmSynCtx sx;
   1236   u32 nsec, i;
   1237 
   1238   if (!ob || !out_w) return KIT_INVALID;
   1239 
   1240   c = obj_compiler(ob);
   1241   w = (Writer*)out_w;
   1242   syn = asm_syntax_for(c->target.obj);
   1243   sx.w = w;
   1244   sx.c = c;
   1245   nsec = obj_section_count(ob);
   1246 
   1247   for (i = 1; i < nsec; ++i) {
   1248     const Section* sec = obj_section_get(ob, (ObjSecId)i);
   1249     SymLabel* labels;
   1250     u32 nlabels, total, off, li;
   1251     ArchDisasm* dasm;
   1252     const u8* flat_data;
   1253     u8* heap_data;
   1254     SecReloc* relocs;
   1255     u32 nrelocs;
   1256     u32* btargets;
   1257     u32 nbt, bi;
   1258     EmitCtx ctx;
   1259 
   1260     if (!sec || sec->removed) continue;
   1261     if (!syn->section_header(&sx, sec)) continue;
   1262 
   1263     labels = collect_labels(c, ob, (ObjSecId)i, &nlabels);
   1264 
   1265     if (sec->align > 1) syn->align(&sx, sec->align);
   1266 
   1267     if (sec->kind == SEC_BSS) {
   1268       total = sec->bss_size;
   1269     } else {
   1270       total = sec->bytes.total;
   1271     }
   1272 
   1273     dasm = NULL;
   1274     flat_data = NULL;
   1275     heap_data = NULL;
   1276     relocs = NULL;
   1277     nrelocs = 0;
   1278     btargets = NULL;
   1279     nbt = 0;
   1280     bi = 0;
   1281 
   1282     if (total > 0 && (sec->flags & SF_EXEC)) {
   1283       Heap* heap;
   1284       dasm = arch_disasm_new(c);
   1285       relocs = collect_relocs(c, ob, (ObjSecId)i, &nrelocs);
   1286       heap = c->ctx->heap;
   1287       heap_data = (u8*)heap->alloc(heap, total, 1);
   1288       if (heap_data) {
   1289         buf_flatten(&sec->bytes, heap_data);
   1290         flat_data = heap_data;
   1291         if (dasm)
   1292           btargets = collect_branch_targets(c, dasm, relocs, nrelocs, flat_data,
   1293                                             total, &nbt);
   1294       }
   1295     } else if (total > 0 && sec->kind != SEC_BSS) {
   1296       Heap* heap = c->ctx->heap;
   1297       relocs = collect_relocs(c, ob, (ObjSecId)i, &nrelocs);
   1298       heap_data = (u8*)heap->alloc(heap, total, 1);
   1299       if (heap_data) {
   1300         buf_flatten(&sec->bytes, heap_data);
   1301         flat_data = heap_data;
   1302       }
   1303     }
   1304 
   1305     ctx.c = c;
   1306     ctx.secidx = i;
   1307     ctx.relocs = relocs;
   1308     ctx.nrelocs = nrelocs;
   1309     ctx.labels = labels;
   1310     ctx.nlabels = nlabels;
   1311     ctx.btargets = btargets;
   1312     ctx.nbt = nbt;
   1313 
   1314     off = 0;
   1315     li = 0;
   1316 
   1317     while (off < total || li < nlabels) {
   1318       while (li < nlabels && labels[li].offset == off) {
   1319         emit_label(&sx, syn, &labels[li]);
   1320         ++li;
   1321       }
   1322       /* Synthesized branch-target label, unless a real symbol sits here. */
   1323       if (nbt && is_btarget(&ctx, off) && !symbol_at(&ctx, off)) {
   1324         char name[256];
   1325         fmt_synth_label(name, sizeof name, i, off);
   1326         w_str(w, name);
   1327         w_str(w, ":");
   1328         w_newline(w);
   1329       }
   1330 
   1331       if (off >= total) break;
   1332 
   1333       {
   1334         u32 next = total;
   1335         if (li < nlabels && labels[li].offset > off &&
   1336             labels[li].offset < total)
   1337           next = labels[li].offset;
   1338         while (bi < nbt && btargets[bi] <= off) ++bi;
   1339         if (bi < nbt && btargets[bi] < next) next = btargets[bi];
   1340 
   1341         if (sec->kind == SEC_BSS) {
   1342           emit_zero_range(w, next - off);
   1343         } else if ((sec->flags & SF_EXEC) && dasm && flat_data) {
   1344           emit_disasm_range(w, &ctx, dasm, flat_data, off, next);
   1345         } else if (flat_data) {
   1346           emit_data_range(w, c, flat_data, off, next, relocs, nrelocs);
   1347         }
   1348         off = next;
   1349       }
   1350     }
   1351 
   1352     emit_size_directives(&sx, syn, ob, (ObjSecId)i);
   1353 
   1354     if (dasm) arch_disasm_free(dasm);
   1355     if (heap_data) c->ctx->heap->free(c->ctx->heap, heap_data, total);
   1356 
   1357     w_newline(w);
   1358   }
   1359 
   1360   emit_common_symbols(w, c, ob);
   1361 
   1362   return kit_writer_status(out_w);
   1363 }