asm_emit.c (46767B)
1 #include <kit/asm_emit.h> 2 #include <kit/disasm.h> 3 #include <stdlib.h> 4 #include <string.h> 5 6 #include "arch/arch.h" 7 #include "core/arena.h" 8 #include "core/buf.h" 9 #include "core/core.h" 10 #include "core/heap.h" 11 #include "core/pool.h" 12 #include "core/slice.h" 13 #include "obj/obj.h" 14 15 #define ASM_BYTES_PER_LINE 16u 16 17 static KitStatus w_str(Writer* w, const char* s) { 18 return kit_writer_write(w, s, strlen(s)); 19 } 20 21 static KitStatus w_newline(Writer* w) { return kit_writer_write(w, "\n", 1); } 22 23 static KitStatus w_hex_byte(Writer* w, u8 v) { 24 static const char H[] = "0123456789abcdef"; 25 char buf[2]; 26 buf[0] = H[(v >> 4) & 0xfu]; 27 buf[1] = H[v & 0xfu]; 28 return kit_writer_write(w, buf, 2); 29 } 30 31 static KitStatus w_dec(Writer* w, u64 v) { 32 char buf[32]; 33 u32 i = sizeof(buf); 34 if (v == 0) return kit_writer_write(w, "0", 1); 35 buf[--i] = '\0'; 36 while (v) { 37 buf[--i] = (char)('0' + (v % 10)); 38 v /= 10; 39 } 40 return kit_writer_write(w, buf + i, sizeof(buf) - i - 1); 41 } 42 43 static KitStatus w_sym(Writer* w, Compiler* c, Sym name) { 44 Slice s; 45 if (!name) return w_str(w, ".L0"); 46 s = pool_slice(c->global, name); 47 return kit_writer_write(w, s.s, s.len); 48 } 49 50 typedef struct { 51 u32 offset; 52 Sym name; 53 u16 bind; 54 u16 kind; 55 u64 size; 56 } SymLabel; 57 58 static int cmp_labels(const void* va, const void* vb) { 59 const SymLabel* a = (const SymLabel*)va; 60 const SymLabel* b = (const SymLabel*)vb; 61 if (a->offset < b->offset) return -1; 62 if (a->offset > b->offset) return 1; 63 return 0; 64 } 65 66 static SymLabel* collect_labels(Compiler* c, ObjBuilder* ob, ObjSecId sec_id, 67 u32* nlabels_out) { 68 ObjSymIter* it = obj_symiter_new(ob); 69 SymLabel* labels = NULL; 70 u32 n = 0, cap = 0; 71 72 *nlabels_out = 0; 73 if (!it) return NULL; 74 75 for (;;) { 76 ObjSymEntry e; 77 const ObjSym* sym; 78 if (!obj_symiter_next(it, &e)) break; 79 sym = e.sym; 80 if (!sym || sym->removed) continue; 81 if (sym->section_id != sec_id) continue; 82 if (sym->kind == SK_SECTION || sym->kind == SK_FILE) continue; 83 if (!sym->name) continue; 84 /* RISC-V `.LpcrelHi` anchors are codegen-internal labels on AUIPC 85 * instructions, used only as the target of a paired `%pcrel_lo` 86 * relocation. Many share the one name (one per AUIPC), so emitting them 87 * verbatim defines the same label repeatedly and breaks re-assembly. The 88 * symbolizer replaces each with a unique synthesized anchor label 89 * (emit_anchor / ref_anchor), so suppress the originals here. */ 90 { 91 Slice nm = pool_slice(c->global, sym->name); 92 if (slice_eq_cstr(nm, ".LpcrelHi")) continue; 93 } 94 95 if (n == cap) { 96 u32 ncap = cap ? cap * 2 : 8; 97 SymLabel* nl = arena_array(c->tu, SymLabel, ncap); 98 if (!nl) break; 99 if (labels) memcpy(nl, labels, cap * sizeof(SymLabel)); 100 labels = nl; 101 cap = ncap; 102 } 103 labels[n].offset = (u32)sym->value; 104 labels[n].name = sym->name; 105 labels[n].bind = sym->bind; 106 labels[n].kind = sym->kind; 107 labels[n].size = sym->size; 108 ++n; 109 } 110 obj_symiter_free(it); 111 112 if (n > 0) qsort(labels, n, sizeof(SymLabel), cmp_labels); 113 *nlabels_out = n; 114 return labels; 115 } 116 117 /* ---- Object-format-specific directive syntax (AsmSyntax vtable) -------- 118 * 119 * The directives whose spelling differs by object format hang off this tiny 120 * vtable, selected by c->target.obj (asm_syntax_for, defined below). Selecting 121 * by format — not arch — is correct: an x64-ELF and an aa64-ELF `.s` use the 122 * same `.type`/`.size`/`.section` directives. Everything else (.globl, .comm, 123 * labels, data directives, disassembled instructions) is format-neutral and 124 * stays in the shared emit path. The Mach-O methods make cc -S output 125 * clang/llvm-mc-acceptable; the ELF methods are the historical spelling. */ 126 typedef struct { 127 Writer* w; 128 Compiler* c; 129 } AsmSynCtx; 130 131 typedef struct AsmSyntax { 132 KitObjFmt obj; 133 const char* name; 134 /* Section-switch directive for `sec`: returns 1 = emitted, 0 = skip it. */ 135 int (*section_header)(const AsmSynCtx* x, const Section* sec); 136 /* Symbol type annotation at a definition (ELF `.type`; Mach-O none). */ 137 void (*sym_type)(const AsmSynCtx* x, Sym name, u16 sym_kind); 138 /* Symbol size after a function body (ELF `.size`; Mach-O none). */ 139 void (*sym_size)(const AsmSynCtx* x, Sym name); 140 /* Section alignment directive for a power-of-two byte alignment. */ 141 void (*align)(const AsmSynCtx* x, u32 byte_align); 142 } AsmSyntax; 143 144 static KitStatus emit_label(const AsmSynCtx* x, const AsmSyntax* syn, 145 const SymLabel* lbl) { 146 Writer* w = x->w; 147 if (lbl->bind == SB_GLOBAL || lbl->bind == SB_WEAK) { 148 w_str(w, " .globl "); 149 w_sym(w, x->c, lbl->name); 150 w_newline(w); 151 } 152 syn->sym_type(x, lbl->name, lbl->kind); 153 w_sym(w, x->c, lbl->name); 154 w_str(w, ":"); 155 return w_newline(w); 156 } 157 158 /* Emit `.comm`/`.lcomm` for tentative (common) symbols. These live in no output 159 * section — the linker allocates .bss space at link time — so the section walk 160 * never sees them; without this a `cc -S` that references a tentative global 161 * (`int x;` at file scope) re-assembles to an undefined reference. Global 162 * commons use `.comm name, size, align`; local ones `.lcomm`. */ 163 static KitStatus emit_common_symbols(Writer* w, Compiler* c, ObjBuilder* ob) { 164 ObjSymIter* it = obj_symiter_new(ob); 165 KitStatus st = KIT_OK; 166 if (!it) return KIT_NOMEM; 167 for (;;) { 168 ObjSymEntry e; 169 const ObjSym* sym; 170 if (!obj_symiter_next(it, &e)) break; 171 sym = e.sym; 172 if (!sym || sym->removed || sym->kind != SK_COMMON || !sym->name) continue; 173 st = w_str(w, 174 sym->bind == SB_LOCAL ? " .lcomm " : " .comm "); 175 if (st != KIT_OK) break; 176 st = w_sym(w, c, sym->name); 177 if (st != KIT_OK) break; 178 st = w_str(w, ", "); 179 if (st != KIT_OK) break; 180 st = w_dec(w, sym->size); 181 if (st != KIT_OK) break; 182 if (sym->common_align > 1) { 183 st = w_str(w, ", "); 184 if (st != KIT_OK) break; 185 st = w_dec(w, sym->common_align); 186 if (st != KIT_OK) break; 187 } 188 st = w_newline(w); 189 if (st != KIT_OK) break; 190 } 191 obj_symiter_free(it); 192 return st; 193 } 194 195 static KitStatus emit_size_directives(const AsmSynCtx* x, const AsmSyntax* syn, 196 ObjBuilder* ob, ObjSecId sec_id) { 197 ObjSymIter* it = obj_symiter_new(ob); 198 if (!it) return KIT_NOMEM; 199 200 for (;;) { 201 ObjSymEntry e; 202 const ObjSym* sym; 203 if (!obj_symiter_next(it, &e)) break; 204 sym = e.sym; 205 if (!sym || sym->removed) continue; 206 if (sym->section_id != sec_id) continue; 207 if (sym->kind != SK_FUNC) continue; 208 if (sym->size == 0) continue; 209 syn->sym_size(x, sym->name); 210 } 211 obj_symiter_free(it); 212 return kit_writer_status((KitWriter*)x->w); 213 } 214 215 /* GNU-as flag letters for a named (SEC_OTHER) section's `, "flags"` operand. 216 * The assembler's .section parser (src/asm/asm.c) is the inverse mapping. */ 217 static void w_secflags(Writer* w, u16 flags) { 218 if (flags & SF_ALLOC) w_str(w, "a"); 219 if (flags & SF_WRITE) w_str(w, "w"); 220 if (flags & SF_EXEC) w_str(w, "x"); 221 if (flags & SF_MERGE) w_str(w, "M"); 222 if (flags & SF_STRINGS) w_str(w, "S"); 223 if (flags & SF_TLS) w_str(w, "T"); 224 if (flags & SF_RETAIN) w_str(w, "R"); 225 } 226 227 /* log2 of a power-of-two byte alignment (>=1 → 0). */ 228 static u32 align_log2(u32 a) { 229 u32 n = 0; 230 if (a < 2) return 0; 231 while ((a & 1u) == 0u && n < 31u) { 232 a >>= 1; 233 ++n; 234 } 235 return n; 236 } 237 238 /* ---- ELF directive syntax: the historical spelling (unchanged) ---------- 239 * 240 * Returns 0 to skip a section cc -S does not round-trip (TLS variants, 241 * SEC_DEBUG). SEC_OTHER (a global in a named section, e.g. 242 * __attribute__((section(...)))) emits the real name plus its flags/type/ 243 * entsize in GNU-as syntax so the label and bytes survive re-assembly. */ 244 /* Emit `.section name, "flags", @type[, entsize]` (the GNU-as named-section 245 * form). Used for SEC_OTHER and for any canonical-kind section whose name or 246 * flags can't be reproduced by the bare `.text`/`.section .rodata` builtins. */ 247 static int elf_named_section(const AsmSynCtx* x, const Section* sec) { 248 Writer* w = x->w; 249 Slice nm = pool_slice(x->c->global, sec->name); 250 if (nm.len == 0) return 0; 251 w_str(w, " .section\t"); 252 kit_writer_write(w, nm.s, nm.len); 253 w_str(w, ", \""); 254 w_secflags(w, sec->flags); 255 w_str(w, "\", "); 256 w_str(w, sec->sem == SSEM_NOBITS ? "@nobits" : "@progbits"); 257 if ((sec->flags & SF_MERGE) || sec->entsize) { 258 w_str(w, ", "); 259 w_dec(w, (u64)(sec->entsize ? sec->entsize : 1)); 260 } 261 w_newline(w); 262 return 1; 263 } 264 265 /* Does this canonical-kind section round-trip through its bare builtin 266 * directive? Only if its name is exactly the canonical spelling and it carries 267 * no flags that the builtin can't express (MERGE/STRINGS/RETAIN/entsize). A 268 * `.rodata.foo.merge` mergeable-string section, for instance, must be spelled 269 * in full or the linker won't merge/GC it the way the direct object does. */ 270 static int sec_is_canonical(const AsmSynCtx* x, const Section* sec, 271 const char* canon) { 272 Slice nm = pool_slice(x->c->global, sec->name); 273 if (sec->flags & (SF_MERGE | SF_STRINGS | SF_RETAIN)) return 0; 274 if (sec->entsize) return 0; 275 return slice_eq_cstr(nm, canon); 276 } 277 278 static int elf_section_header(const AsmSynCtx* x, const Section* sec) { 279 Writer* w = x->w; 280 if (sec->flags & SF_TLS) return 0; 281 switch (sec->kind) { 282 case SEC_TEXT: 283 if (!sec_is_canonical(x, sec, ".text")) return elf_named_section(x, sec); 284 w_str(w, " .text"); 285 w_newline(w); 286 return 1; 287 case SEC_RODATA: 288 if (!sec_is_canonical(x, sec, ".rodata")) 289 return elf_named_section(x, sec); 290 w_str(w, " .section\t.rodata"); 291 w_newline(w); 292 return 1; 293 case SEC_DATA: 294 if (!sec_is_canonical(x, sec, ".data")) return elf_named_section(x, sec); 295 w_str(w, " .section\t.data"); 296 w_newline(w); 297 return 1; 298 case SEC_BSS: 299 if (!sec_is_canonical(x, sec, ".bss")) return elf_named_section(x, sec); 300 w_str(w, " .section\t.bss"); 301 w_newline(w); 302 return 1; 303 case SEC_OTHER: 304 return elf_named_section(x, sec); 305 default: 306 return 0; 307 } 308 } 309 310 static void elf_sym_type(const AsmSynCtx* x, Sym name, u16 kind) { 311 const char* t = NULL; 312 if (kind == SK_FUNC) 313 t = ", @function"; 314 else if (kind == SK_OBJ || kind == SK_COMMON || kind == SK_TLS) 315 t = ", @object"; 316 if (!t) return; 317 w_str(x->w, " .type "); 318 w_sym(x->w, x->c, name); 319 w_str(x->w, t); 320 w_newline(x->w); 321 } 322 323 static void elf_sym_size(const AsmSynCtx* x, Sym name) { 324 w_str(x->w, " .size "); 325 w_sym(x->w, x->c, name); 326 w_str(x->w, ", .-"); 327 w_sym(x->w, x->c, name); 328 w_newline(x->w); 329 } 330 331 static void elf_align(const AsmSynCtx* x, u32 byte_align) { 332 w_str(x->w, " .align "); 333 w_dec(x->w, (u64)byte_align); 334 w_newline(x->w); 335 } 336 337 /* ---- Mach-O directive syntax: clang/llvm-mc-acceptable spelling ---------- */ 338 339 static int macho_section_header(const AsmSynCtx* x, const Section* sec) { 340 Writer* w = x->w; 341 if (sec->flags & SF_TLS) return 0; /* TLS not round-tripped today */ 342 switch (sec->kind) { 343 case SEC_TEXT: 344 w_str(w, " .text"); /* Mach-O builtin */ 345 w_newline(w); 346 return 1; 347 case SEC_RODATA: 348 w_str(w, " .section\t"); 349 w_str(w, obj_macho_canon_secname(SEC_RODATA)); /* __TEXT,__const */ 350 w_newline(w); 351 return 1; 352 case SEC_DATA: 353 w_str(w, " .section\t"); 354 w_str(w, obj_macho_canon_secname(SEC_DATA)); /* __DATA,__data */ 355 w_newline(w); 356 return 1; 357 case SEC_BSS: 358 /* clang accepts the `.bss` builtin; the shared zero-range path fills it 359 * (avoids `.zerofill`'s per-symbol operand syntax). */ 360 w_str(w, " .bss"); 361 w_newline(w); 362 return 1; 363 case SEC_OTHER: { 364 Slice nm = pool_slice(x->c->global, sec->name); 365 if (nm.len == 0) return 0; 366 w_str(w, " .section\t"); 367 if (memchr(nm.s, ',', nm.len)) { 368 /* Already "__SEG,__sect" (codegen interns eh_frame this way on 369 * Mach-O). Emit bare — no ELF `, "flags", @progbits` suffix. */ 370 kit_writer_write(w, nm.s, nm.len); 371 } else { 372 /* Defensive: a non-comma name on a Mach-O target. Spell it the way 373 * the writer's name_to_seg_sect would (canonical kind, else 374 * __DATA,<name-without-dot>) so text and binary agree. */ 375 const char* canon = obj_macho_canon_secname(sec->kind); 376 if (canon) { 377 w_str(w, canon); 378 } else { 379 w_str(w, "__DATA,"); 380 if (nm.s[0] == '.') 381 kit_writer_write(w, nm.s + 1, nm.len - 1); 382 else 383 kit_writer_write(w, nm.s, nm.len); 384 } 385 } 386 w_newline(w); 387 return 1; 388 } 389 default: 390 return 0; 391 } 392 } 393 394 static void macho_sym_type(const AsmSynCtx* x, Sym name, u16 kind) { 395 (void)x; 396 (void)name; 397 (void)kind; /* Mach-O derives symbol kind from the symbol table */ 398 } 399 400 static void macho_sym_size(const AsmSynCtx* x, Sym name) { 401 (void)x; 402 (void)name; /* Mach-O has no `.size` */ 403 } 404 405 static void macho_align(const AsmSynCtx* x, u32 byte_align) { 406 /* Mach-O `.align`/`.p2align` are log2; cc -S emits `.p2align` so clang and 407 * kit-as read it identically. */ 408 w_str(x->w, " .p2align "); 409 w_dec(x->w, (u64)align_log2(byte_align)); 410 w_newline(x->w); 411 } 412 413 static const AsmSyntax g_asm_syntax_elf = { 414 KIT_OBJ_ELF, "elf", elf_section_header, 415 elf_sym_type, elf_sym_size, elf_align, 416 }; 417 static const AsmSyntax g_asm_syntax_macho = { 418 KIT_OBJ_MACHO, "macho", macho_section_header, 419 macho_sym_type, macho_sym_size, macho_align, 420 }; 421 /* COFF text emission is not yet exercised by the cc -S lanes; alias the ELF 422 * directive spelling for now (TODO COFF: .def/.scl/.type/.endef; COFF 423 * `.section name, "flags"` has its own form). The seam exists so COFF is 424 * pluggable without touching the printer. */ 425 static const AsmSyntax g_asm_syntax_coff = { 426 KIT_OBJ_COFF, "coff", elf_section_header, 427 elf_sym_type, elf_sym_size, elf_align, 428 }; 429 430 static const AsmSyntax* asm_syntax_for(KitObjFmt fmt) { 431 switch (fmt) { 432 case KIT_OBJ_MACHO: 433 return &g_asm_syntax_macho; 434 case KIT_OBJ_COFF: 435 return &g_asm_syntax_coff; 436 case KIT_OBJ_ELF: 437 default: 438 return &g_asm_syntax_elf; /* WASM has no textual-asm path */ 439 } 440 } 441 442 /* Emit a run of raw `.byte` lines for [start, end). */ 443 static KitStatus emit_raw_bytes(Writer* w, const u8* data, u32 start, u32 end) { 444 u32 off; 445 for (off = start; off < end; off += ASM_BYTES_PER_LINE) { 446 u32 rem = end - off; 447 u32 n = rem < ASM_BYTES_PER_LINE ? rem : ASM_BYTES_PER_LINE; 448 u32 j; 449 KitStatus st; 450 st = w_str(w, " .byte 0x"); 451 if (st != KIT_OK) return st; 452 st = w_hex_byte(w, data[off]); 453 if (st != KIT_OK) return st; 454 for (j = 1; j < n; ++j) { 455 st = w_str(w, ", 0x"); 456 if (st != KIT_OK) return st; 457 st = w_hex_byte(w, data[off + j]); 458 if (st != KIT_OK) return st; 459 } 460 st = w_newline(w); 461 if (st != KIT_OK) return st; 462 } 463 return KIT_OK; 464 } 465 466 /* A reloc kind whose data field carries a symbol value reproducible by an 467 * integer directive: maps to (directive, byte width, PC-relative?). The 468 * assembler emits the matching R_ABS{32,64} for `.word`/`.quad SYM+addend` and 469 * R_PC{32,64} for `.long`/`.quad SYM - .` (emit_int_directive), so the 470 * round-tripped relocation matches codegen's. `*pcrel` selects the `SYM - .` 471 * spelling (built by build_data_symref). Returns 0 for kinds with no 472 * integer-directive spelling (caller keeps the raw bytes). */ 473 static int data_reloc_directive(u16 kind, const char** dir, u32* width, 474 int* pcrel) { 475 *pcrel = 0; 476 switch (kind) { 477 case R_ABS64: 478 *dir = " .quad "; 479 *width = 8; 480 return 1; 481 case R_PC64: 482 *dir = " .quad "; 483 *width = 8; 484 *pcrel = 1; 485 return 1; 486 case R_ABS32: 487 *dir = " .word "; 488 *width = 4; 489 return 1; 490 case R_PC32: 491 *dir = " .long "; 492 *width = 4; 493 *pcrel = 1; 494 return 1; 495 default: 496 return 0; 497 } 498 } 499 500 static KitStatus emit_zero_range(Writer* w, u32 size) { 501 KitStatus st; 502 if (size == 0) return KIT_OK; 503 st = w_str(w, " .zero "); 504 if (st != KIT_OK) return st; 505 st = w_dec(w, (u64)size); 506 if (st != KIT_OK) return st; 507 return w_newline(w); 508 } 509 510 /* ---- Phase 2 symbolization: reloc-driven operand substitution ---------- 511 * 512 * `cc -S` must be re-assemblable. The disassembler renders relocated operands 513 * numerically (e.g. `bl 0x10`, `adrp x16, 0x0`, `ldr w8, [x16]`), which would 514 * branch to the wrong place or load from address 0 on re-assembly. Here we 515 * consult the section's relocation table and rewrite the covered operand into 516 * the relocation-operator syntax the assembler parses (the inverse of 517 * src/arch/aa64/asm.c's parse_reloc_mod). See doc/TESTING.md. 518 * 519 * Operand text is rewritten in place rather than re-rendered from decoded 520 * fields, so the register names the disassembler produced are preserved and 521 * this layer stays free of per-arch register-naming knowledge. The reloc 522 * kind alone selects the modifier and the operand shape to patch. */ 523 524 typedef struct { 525 u32 offset; 526 u16 kind; 527 Sym sym; 528 i64 addend; 529 } SecReloc; 530 531 static int cmp_secreloc(const void* va, const void* vb) { 532 const SecReloc* a = (const SecReloc*)va; 533 const SecReloc* b = (const SecReloc*)vb; 534 if (a->offset < b->offset) return -1; 535 if (a->offset > b->offset) return 1; 536 return 0; 537 } 538 539 static SecReloc* collect_relocs(Compiler* c, ObjBuilder* ob, ObjSecId sec_id, 540 u32* n_out) { 541 u32 total = obj_reloc_total(ob); 542 u32 n = 0, cap = 0, i; 543 SecReloc* arr = NULL; 544 545 *n_out = 0; 546 for (i = 0; i < total; ++i) { 547 const Reloc* r = obj_reloc_at(ob, i); 548 const ObjSym* s; 549 if (!r || r->removed) continue; 550 if (r->section_id != sec_id) continue; 551 if (n == cap) { 552 u32 ncap = cap ? cap * 2 : 8; 553 SecReloc* na = arena_array(c->tu, SecReloc, ncap); 554 if (!na) break; 555 if (arr) memcpy(na, arr, cap * sizeof(SecReloc)); 556 arr = na; 557 cap = ncap; 558 } 559 s = obj_symbol_get(ob, r->sym); 560 arr[n].offset = r->offset; 561 arr[n].kind = r->kind; 562 arr[n].sym = s ? s->name : (Sym)0; 563 arr[n].addend = r->addend; 564 ++n; 565 } 566 if (n > 1) qsort(arr, n, sizeof(SecReloc), cmp_secreloc); 567 *n_out = n; 568 return arr; 569 } 570 571 /* First relocation whose offset lies within instruction [off, off+len). */ 572 static const SecReloc* reloc_in_range(const SecReloc* r, u32 n, u32 off, 573 u32 len) { 574 u32 i; 575 for (i = 0; i < n; ++i) 576 if (r[i].offset >= off && r[i].offset < off + len) return &r[i]; 577 return NULL; 578 } 579 580 /* The reloc-kind → operand-syntax mapping now lives in the arch backend 581 * (ArchAsmOps.reloc_operand, src/arch/<arch>/asm.c), reached via 582 * arch_reloc_operand(). This keeps the printer arch-agnostic and format-aware: 583 * aarch64 ELF spells `:lo12:sym`, Mach-O spells `sym@PAGEOFF`. */ 584 585 /* A `.L`-prefixed name is an assembler-local label (e.g. `.Lkit_ro.0`, 586 * `.Lkit_jt.0`): the assembler's lexer accepts it as an identifier. Other 587 * `.`-prefixed names (section symbols like `.text`, `.rodata`) are not yet 588 * re-assemblable as operands, so the symbolizer keeps the numeric form. */ 589 static int sym_is_assemblable(Slice s) { 590 if (s.len == 0) return 0; 591 if (s.s[0] != '.') return 1; 592 return s.len >= 2 && s.s[1] == 'L'; 593 } 594 595 /* Build "<prefix><sym>[+/-addend]<suffix>" into buf. Returns length, or -1 if 596 * the symbol has no usable name (anonymous, or a `.`-prefixed section symbol 597 * the assembler's expression parser does not accept). The modifier is a prefix 598 * (ELF `:lo12:sym`) or a suffix (Mach-O `sym@PAGEOFF`), per the arch/format; 599 * an addend lands before the suffix (`sym+8@PAGEOFF`), which both clang and 600 * kit-as parse. */ 601 static int build_symref(char* buf, u32 cap, Compiler* c, 602 const ArchRelocOperand* ro, Sym name, i64 addend) { 603 Slice s; 604 u32 p = 0, i; 605 if (!name) return -1; 606 s = pool_slice(c->global, name); 607 if (!sym_is_assemblable(s)) return -1; 608 /* Undo any instruction-encoding addend bias so the printed offset is the 609 * symbol offset (x86-64 rel32 relocs store addend-4; the assembler re-applies 610 * the -4, so emit `sym` for a stored -4). */ 611 addend += ro->addend_bias; 612 for (i = 0; ro->prefix[i] && p + 1 < cap; ++i) buf[p++] = ro->prefix[i]; 613 for (i = 0; i < s.len && p + 1 < cap; ++i) buf[p++] = s.s[i]; 614 if (addend != 0) { 615 char num[24]; 616 u32 nl = 0; 617 u64 mag = addend < 0 ? (u64)(-(addend)) : (u64)addend; 618 if (p + 1 < cap) buf[p++] = addend < 0 ? '-' : '+'; 619 do { 620 num[nl++] = (char)('0' + (u32)(mag % 10)); 621 mag /= 10; 622 } while (mag && nl < sizeof(num)); 623 while (nl && p + 1 < cap) buf[p++] = num[--nl]; 624 } 625 for (i = 0; ro->suffix[i] && p + 1 < cap; ++i) buf[p++] = ro->suffix[i]; 626 buf[p] = '\0'; 627 return (int)p; 628 } 629 630 /* Position of the "(%rip)" substring in [ops, ops+olen), or -1. */ 631 static i32 find_rip(const char* ops, u32 olen) { 632 u32 i; 633 if (olen < 6) return -1; 634 for (i = 0; i + 6 <= olen; ++i) 635 if (memcmp(ops + i, "(%rip)", 6) == 0) return (i32)i; 636 return -1; 637 } 638 639 /* Write `ops` with the relocated operand rewritten to `symref`. The surgery 640 * site is chosen from the operand text first: an x86-64 `disp(%rip)` operand 641 * always takes RIP surgery (insert sym before the displacement), regardless of 642 * `surg`. Otherwise `surg` selects: TAIL replaces the last comma-separated 643 * component (or the whole operand if there is no comma — branch targets); MEM 644 * rewrites the offset inside [...] (aarch64 ldst). */ 645 static KitStatus w_symbolized(Writer* w, const char* ops, u32 olen, 646 const char* symref, ArchRelocSurg surg) { 647 i32 rip = find_rip(ops, olen); 648 if (rip >= 0) surg = ARCH_RELOC_SURG_RIP; 649 if (surg == ARCH_RELOC_SURG_RIP) { 650 /* `[disp](%rip)[, ...]` -> `symref(%rip)[, ...]`: replace any numeric 651 * displacement immediately before `(%rip)` with symref. */ 652 i32 ds = rip; /* start of the displacement run before the '(' */ 653 KitStatus st; 654 while (ds > 0) { 655 char ch = ops[ds - 1]; 656 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || 657 (ch >= 'A' && ch <= 'F') || ch == 'x' || ch == '-' || ch == '+') 658 --ds; 659 else 660 break; 661 } 662 st = kit_writer_write(w, ops, (u32)ds); /* text before the displacement */ 663 if (st != KIT_OK) return st; 664 st = w_str(w, symref); 665 if (st != KIT_OK) return st; 666 return kit_writer_write(w, ops + rip, olen - (u32)rip); /* "(%rip)..." */ 667 } 668 if (surg == ARCH_RELOC_SURG_TAIL) { 669 i32 last_comma = -1; 670 u32 i; 671 for (i = 0; i < olen; ++i) 672 if (ops[i] == ',') last_comma = (i32)i; 673 if (last_comma < 0) return w_str(w, symref); 674 { 675 KitStatus st = kit_writer_write(w, ops, (u32)last_comma); 676 if (st != KIT_OK) return st; 677 st = w_str(w, ", "); 678 if (st != KIT_OK) return st; 679 return w_str(w, symref); 680 } 681 } 682 if (surg == ARCH_RELOC_SURG_RV_LO12) { 683 /* RISC-V low-half: a `disp(base)` memory form rewrites the displacement; 684 * a register-immediate form appends the modifier as a new operand. The 685 * memory form is recognized by a trailing `(...)` group. */ 686 i32 lp = -1, rp = -1; 687 u32 i; 688 for (i = 0; i < olen; ++i) { 689 if (ops[i] == '(') 690 lp = (i32)i; 691 else if (ops[i] == ')') 692 rp = (i32)i; 693 } 694 if (lp >= 0 && rp > lp && (u32)(rp + 1) == olen) { 695 /* `..., <disp>(base)` -> `..., symref(base)`: replace the displacement 696 * run that ends immediately before the '('. */ 697 i32 ds = lp; /* start of the displacement run */ 698 KitStatus st; 699 while (ds > 0) { 700 char ch = ops[ds - 1]; 701 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || 702 (ch >= 'A' && ch <= 'F') || ch == 'x' || ch == '-' || ch == '+') 703 --ds; 704 else 705 break; 706 } 707 st = kit_writer_write(w, ops, (u32)ds); /* "..., " before the disp */ 708 if (st != KIT_OK) return st; 709 st = w_str(w, symref); 710 if (st != KIT_OK) return st; 711 return kit_writer_write(w, ops + lp, olen - (u32)lp); /* "(base)" */ 712 } 713 /* Register-immediate (e.g. `mv rd, rs`): append symref as a new operand. */ 714 { 715 KitStatus st = kit_writer_write(w, ops, olen); 716 if (st != KIT_OK) return st; 717 st = w_str(w, ", "); 718 if (st != KIT_OK) return st; 719 return w_str(w, symref); 720 } 721 } 722 /* SURG_MEM: keep the base register, set the offset to symref. */ 723 { 724 i32 lb = -1, rb = -1, base_end; 725 u32 i; 726 KitStatus st; 727 for (i = 0; i < olen; ++i) { 728 if (ops[i] == '[') 729 lb = (i32)i; 730 else if (ops[i] == ']') 731 rb = (i32)i; 732 } 733 if (lb < 0 || rb < 0 || rb < lb) /* unexpected shape; emit verbatim */ 734 return kit_writer_write(w, ops, olen); 735 base_end = lb + 1; 736 while (base_end < rb && ops[base_end] != ',') ++base_end; 737 st = kit_writer_write(w, ops, (u32)(base_end)); /* "...[Rn" */ 738 if (st != KIT_OK) return st; 739 st = w_str(w, ", "); 740 if (st != KIT_OK) return st; 741 st = w_str(w, symref); 742 if (st != KIT_OK) return st; 743 st = w_str(w, "]"); 744 if (st != KIT_OK) return st; 745 /* trailing text after the close bracket (e.g. nothing for reloc'd ldst) */ 746 if ((u32)(rb + 1) < olen) 747 return kit_writer_write(w, ops + rb + 1, olen - (u32)(rb + 1)); 748 return KIT_OK; 749 } 750 } 751 752 /* ---- Phase 2 symbolization: intra-function branch labels --------------- 753 * 754 * Branches that stay within the section carry no relocation — the 755 * disassembler renders the resolved target numerically (`b 0x60`). The 756 * assembler rejects a numeric branch target, so re-assembly needs a label. 757 * We pre-scan the section for such branch targets, synthesize a local label 758 * at each, and rewrite the branch operand to reference it. 759 * 760 * Synthesized names are `Lcf_<secidx>_<hexoff>` — deliberately not `.L` 761 * prefixed, since the assembler's expression parser does not currently accept 762 * `.`-led identifiers as operands. The names are unique within the file. */ 763 764 static int cmp_u32(const void* va, const void* vb) { 765 u32 a = *(const u32*)va, b = *(const u32*)vb; 766 if (a < b) return -1; 767 if (a > b) return 1; 768 return 0; 769 } 770 771 /* Which mnemonics are intra-section local branches (target codegen resolved in 772 * place, no relocation) is arch-specific: routed through arch_is_local_branch 773 * (ArchAsmOps.is_local_branch). The disassembler renders such a target 774 * numerically; we synthesize a label at it so the operand re-assembles. */ 775 776 /* Parse the trailing `0x<hex>` branch-target operand (the last comma-separated 777 * component). Returns 1 and the value on success. */ 778 static int parse_hex_tail(KitSlice ops, u64* out) { 779 i32 start = 0, p; 780 u64 v = 0; 781 u32 i; 782 int any = 0; 783 for (i = 0; i < ops.len; ++i) 784 if (ops.s[i] == ',') start = (i32)i + 1; 785 while (start < (i32)ops.len && (ops.s[start] == ' ' || ops.s[start] == '\t')) 786 ++start; 787 if (start + 2 > (i32)ops.len || ops.s[start] != '0' || 788 (ops.s[start + 1] | 32) != 'x') 789 return 0; 790 for (p = start + 2; p < (i32)ops.len; ++p) { 791 char c = ops.s[p]; 792 u32 d; 793 if (c >= '0' && c <= '9') 794 d = (u32)(c - '0'); 795 else if ((c | 32) >= 'a' && (c | 32) <= 'f') 796 d = (u32)((c | 32) - 'a' + 10); 797 else 798 break; 799 v = v * 16 + d; 800 any = 1; 801 } 802 while (p < (i32)ops.len && ops.s[p] == ' ') ++p; 803 if (!any || p != (i32)ops.len) return 0; 804 *out = v; 805 return 1; 806 } 807 808 /* Parse a local-branch target from the operand tail into an absolute, 809 * section-relative offset. Two render forms occur: 810 * "0x<hex>" — an absolute target (the printer had the instruction's address) 811 * "#<dec>" — a PC-relative displacement, emitted when the printer had no 812 * address context, i.e. the instruction sits at section offset 0 813 * (a branch as the first instruction of .text, which -O1 leaf 814 * functions can produce). The absolute target is inst_off + disp. 815 * Returns 1 and sets *out to the absolute target. */ 816 static int parse_branch_tail(KitSlice ops, u32 inst_off, u64* out) { 817 i32 start = 0, p; 818 i64 v = 0; 819 int neg = 0, any = 0; 820 u32 i; 821 if (parse_hex_tail(ops, out)) return 1; 822 for (i = 0; i < ops.len; ++i) 823 if (ops.s[i] == ',') start = (i32)i + 1; 824 while (start < (i32)ops.len && (ops.s[start] == ' ' || ops.s[start] == '\t')) 825 ++start; 826 if (start >= (i32)ops.len || ops.s[start] != '#') return 0; 827 p = start + 1; 828 if (p < (i32)ops.len && (ops.s[p] == '-' || ops.s[p] == '+')) 829 neg = (ops.s[p++] == '-'); 830 for (; p < (i32)ops.len; ++p) { 831 char c = ops.s[p]; 832 if (c < '0' || c > '9') break; 833 v = v * 10 + (c - '0'); 834 any = 1; 835 } 836 while (p < (i32)ops.len && ops.s[p] == ' ') ++p; 837 if (!any || p != (i32)ops.len) return 0; 838 if (neg) v = -v; 839 *out = (u64)((i64)inst_off + v); 840 return 1; 841 } 842 843 typedef struct { 844 Compiler* c; 845 u32 secidx; 846 const SecReloc* relocs; 847 u32 nrelocs; 848 const SymLabel* labels; 849 u32 nlabels; 850 const u32* btargets; 851 u32 nbt; 852 } EmitCtx; 853 854 static u32 fmt_u64(char* buf, u32 p, u32 cap, u64 v, u32 base) { 855 char tmp[24]; 856 u32 n = 0; 857 do { 858 u32 d = (u32)(v % base); 859 tmp[n++] = (char)(d < 10 ? '0' + d : 'a' + d - 10); 860 v /= base; 861 } while (v && n < sizeof tmp); 862 while (n && p + 1 < cap) buf[p++] = tmp[--n]; 863 return p; 864 } 865 866 /* Synthesized label spelling, shared by definition and reference sites. */ 867 static u32 fmt_synth_label(char* buf, u32 cap, u32 secidx, u32 off) { 868 u32 p = 0; 869 const char* pre = "Lcf_"; 870 u32 i; 871 for (i = 0; pre[i] && p + 1 < cap; ++i) buf[p++] = pre[i]; 872 p = fmt_u64(buf, p, cap, secidx, 10); 873 if (p + 1 < cap) buf[p++] = '_'; 874 p = fmt_u64(buf, p, cap, off, 16); 875 buf[p] = '\0'; 876 return p; 877 } 878 879 /* Synthesized hi/lo anchor label spelling (RISC-V `%pcrel_hi`/`%pcrel_lo` 880 * pairing). The high-half reloc defines `.Lpcrel_hi_<secidx>_<off>` at its 881 * AUIPC; the paired low-half reloc references it. `.L`-prefixed so the 882 * assembler's lexer accepts it and the linker treats it as local. */ 883 static u32 fmt_anchor_label(char* buf, u32 cap, u32 secidx, u32 off) { 884 u32 p = 0; 885 const char* pre = ".Lpcrel_hi_"; 886 u32 i; 887 for (i = 0; pre[i] && p + 1 < cap; ++i) buf[p++] = pre[i]; 888 p = fmt_u64(buf, p, cap, secidx, 10); 889 if (p + 1 < cap) buf[p++] = '_'; 890 p = fmt_u64(buf, p, cap, off, 16); 891 buf[p] = '\0'; 892 return p; 893 } 894 895 /* The offset of the high-half (anchor-emitting) relocation paired with a 896 * low-half reloc at `lo_off`: the nearest preceding reloc whose 897 * ArchRelocOperand sets emit_anchor. kit's codegen always emits the AUIPC 898 * immediately before its paired ADDI/load, so the nearest preceding anchor is 899 * the correct one. Returns 1 and *hi_off on success. */ 900 static int find_anchor_for_lo12(const EmitCtx* x, u32 lo_off, u32* hi_off) { 901 u32 i; 902 int found = 0; 903 u32 best = 0; 904 for (i = 0; i < x->nrelocs; ++i) { 905 ArchRelocOperand ro = {0}; /* zero emit_anchor/ref_anchor: arches that 906 * don't set them must read as 0 (rv64-only) */ 907 if (x->relocs[i].offset >= lo_off) break; 908 if (arch_reloc_operand(x->c, x->relocs[i].kind, &ro) && ro.emit_anchor) { 909 best = x->relocs[i].offset; 910 found = 1; 911 } 912 } 913 if (found && hi_off) *hi_off = best; 914 return found; 915 } 916 917 /* Non-dot symbol name defined at `off`, or NULL. Such a symbol is used as the 918 * branch label directly (no synthesized label needed). */ 919 static Sym symbol_at(const EmitCtx* x, u32 off) { 920 u32 i; 921 for (i = 0; i < x->nlabels; ++i) { 922 if (x->labels[i].offset == off && x->labels[i].name) { 923 Slice s = pool_slice(x->c->global, x->labels[i].name); 924 if (sym_is_assemblable(s)) return x->labels[i].name; 925 } 926 } 927 return (Sym)0; 928 } 929 930 /* Label name for a branch target offset: an existing symbol if one is defined 931 * there, else the synthesized `Lcf_...` name. */ 932 static u32 build_label_name(char* buf, u32 cap, const EmitCtx* x, u32 off) { 933 Sym sym = symbol_at(x, off); 934 if (sym) { 935 Slice s = pool_slice(x->c->global, sym); 936 u32 p = 0, i; 937 for (i = 0; i < s.len && p + 1 < cap; ++i) buf[p++] = s.s[i]; 938 buf[p] = '\0'; 939 return p; 940 } 941 return fmt_synth_label(buf, cap, x->secidx, off); 942 } 943 944 static int is_btarget(const EmitCtx* x, u32 off) { 945 u32 i; 946 for (i = 0; i < x->nbt; ++i) 947 if (x->btargets[i] == off) return 1; 948 return 0; 949 } 950 951 /* Append `off` to a dynamic, deduplicated anchor array (arena-grown). */ 952 static void anchor_add(Compiler* c, u32** arr, u32* n, u32* cap, u32 off) { 953 u32 j; 954 for (j = 0; j < *n; ++j) 955 if ((*arr)[j] == off) return; 956 if (*n == *cap) { 957 u32 nc = *cap ? *cap * 2 : 8; 958 u32* na = arena_array(c->tu, u32, nc); 959 if (!na) return; 960 if (*arr) memcpy(na, *arr, *cap * sizeof(u32)); 961 *arr = na; 962 *cap = nc; 963 } 964 (*arr)[(*n)++] = off; 965 } 966 967 /* Pre-scan: collect in-section branch targets of un-relocated local branches, 968 * so cc -S synthesizes a label there and the branch re-assembles. Code-location 969 * references that must survive a re-encoding assembler (switch jump-table 970 * entries, `&&label` address-takes) are NOT handled here — codegen emits them 971 * as relocations against per-block local symbols (mc_label_symbol), so the 972 * normal reloc-operand path symbolizes them and the target label is a real 973 * symbol. */ 974 static u32* collect_branch_targets(Compiler* c, ArchDisasm* dasm, 975 const SecReloc* relocs, u32 nrelocs, 976 const u8* data, u32 total, u32* n_out) { 977 u32* arr = NULL; 978 u32 n = 0, cap = 0, off = 0; 979 980 *n_out = 0; 981 while (off < total) { 982 KitInsn insn; 983 u32 nb = arch_disasm_decode(dasm, data + off, total - off, (u64)off, &insn); 984 u64 tgt; 985 if (nb == 0) { 986 off += 1; 987 continue; 988 } 989 if (!reloc_in_range(relocs, nrelocs, off, nb) && 990 arch_is_local_branch(c, insn.mnemonic) && 991 parse_branch_tail(insn.operands, off, &tgt) && tgt < total) { 992 anchor_add(c, &arr, &n, &cap, (u32)tgt); 993 } 994 off += nb; 995 } 996 997 if (n > 1) qsort(arr, n, sizeof(u32), cmp_u32); 998 *n_out = n; 999 return arr; 1000 } 1001 1002 /* Emit an instruction's operands, symbolizing a covering relocation or an 1003 * intra-section branch target when present. */ 1004 static KitStatus emit_operands(Writer* w, const EmitCtx* x, const KitInsn* insn, 1005 u32 off) { 1006 const SecReloc* r; 1007 if (!insn->operands.len) return KIT_OK; 1008 r = reloc_in_range(x->relocs, x->nrelocs, off, insn->nbytes); 1009 if (r) { 1010 ArchRelocOperand ro = {0}; /* zero emit_anchor/ref_anchor: arches that 1011 * don't set them must read as 0 (rv64-only) */ 1012 if (arch_reloc_operand(x->c, r->kind, &ro)) { 1013 char symref[256]; 1014 /* A low-half reloc (RISC-V `%pcrel_lo`) names the paired high-half's 1015 * synthesized anchor label, not the reloc's own (`.LpcrelHi`) symbol. */ 1016 if (ro.ref_anchor) { 1017 u32 hi_off; 1018 if (find_anchor_for_lo12(x, off, &hi_off)) { 1019 char name[256]; 1020 u32 p = 0, i; 1021 for (i = 0; ro.prefix[i] && p + 1 < sizeof name; ++i) 1022 name[p++] = ro.prefix[i]; 1023 p += fmt_anchor_label(name + p, (u32)sizeof name - p, x->secidx, 1024 hi_off); 1025 for (i = 0; ro.suffix[i] && p + 1 < sizeof name; ++i) 1026 name[p++] = ro.suffix[i]; 1027 name[p] = '\0'; 1028 return w_symbolized(w, insn->operands.s, insn->operands.len, name, 1029 ro.surg); 1030 } 1031 /* No anchor found (unexpected): fall through to keep numeric. */ 1032 } else if (build_symref(symref, sizeof symref, x->c, &ro, r->sym, 1033 r->addend) >= 0) { 1034 return w_symbolized(w, insn->operands.s, insn->operands.len, symref, 1035 ro.surg); 1036 } 1037 } 1038 } else if (arch_is_local_branch(x->c, insn->mnemonic)) { 1039 u64 tgt; 1040 if (parse_branch_tail(insn->operands, off, &tgt) && 1041 is_btarget(x, (u32)tgt)) { 1042 char name[256]; 1043 build_label_name(name, sizeof name, x, (u32)tgt); 1044 return w_symbolized(w, insn->operands.s, insn->operands.len, name, 1045 ARCH_RELOC_SURG_TAIL); 1046 } 1047 } 1048 return kit_writer_write(w, insn->operands.s, insn->operands.len); 1049 } 1050 1051 /* Emit a data range, rendering any covered relocation as a symbolic integer 1052 * directive (`.quad sym+addend`) so cc -S | as reproduces the data relocation 1053 * table — switch jump tables (`.quad .Lcfblk.*` against per-block local 1054 * symbols) and any other relocated rodata/data. A reloc kind with no 1055 * integer-directive form, or a target the assembler can't spell, falls back to 1056 * raw `.byte`; the dropped reloc then surfaces in the round-trip's reloc 1057 * comparison. `relocs` is the section's relocation list, sorted by offset. */ 1058 static KitStatus emit_data_range(Writer* w, Compiler* c, const u8* data, 1059 u32 start, u32 end, const SecReloc* relocs, 1060 u32 nrelocs) { 1061 u32 off = start; 1062 while (off < end) { 1063 const SecReloc* r = NULL; 1064 u32 next = end; 1065 u32 i; 1066 /* Find a reloc starting at `off`, and the offset of the next reloc that 1067 * starts strictly after `off` (which bounds the raw-byte run). */ 1068 for (i = 0; i < nrelocs; ++i) { 1069 if (relocs[i].offset == off) { 1070 r = &relocs[i]; 1071 } else if (relocs[i].offset > off && relocs[i].offset < next) { 1072 next = relocs[i].offset; 1073 } 1074 } 1075 if (r) { 1076 const char* dir; 1077 u32 width; 1078 int pcrel; 1079 char symref[256]; 1080 /* Data relocations spell the bare symbol (`.quad sym+addend`): no 1081 * page/lo12-style operand modifier on either format. A PC-relative 1082 * reloc adds a trailing ` - .` (location counter) so the assembler 1083 * re-derives R_PC{32,64} instead of an absolute reloc. */ 1084 ArchRelocOperand bare = {ARCH_RELOC_SURG_NONE, "", "", 0, 0, 0}; 1085 if (data_reloc_directive(r->kind, &dir, &width, &pcrel) && 1086 off + width <= end && 1087 build_symref(symref, sizeof symref, c, &bare, r->sym, r->addend) >= 1088 0) { 1089 KitStatus st = w_str(w, dir); 1090 if (st != KIT_OK) return st; 1091 st = w_str(w, symref); 1092 if (st != KIT_OK) return st; 1093 if (pcrel) { 1094 st = w_str(w, " - ."); 1095 if (st != KIT_OK) return st; 1096 } 1097 st = w_newline(w); 1098 if (st != KIT_OK) return st; 1099 off += width; 1100 continue; 1101 } 1102 /* Unsupported kind / unspellable target: keep raw bytes for this slot 1103 * (advance to the next reloc boundary so we don't re-handle it). */ 1104 } 1105 if (next <= off) next = end; 1106 { 1107 KitStatus st = emit_raw_bytes(w, data, off, next); 1108 if (st != KIT_OK) return st; 1109 } 1110 off = next; 1111 } 1112 return KIT_OK; 1113 } 1114 1115 static KitStatus emit_disasm_range(Writer* w, const EmitCtx* x, 1116 ArchDisasm* dasm, const u8* data, u32 start, 1117 u32 end) { 1118 u32 off = start; 1119 KitStatus st; 1120 1121 while (off < end) { 1122 KitInsn insn; 1123 u64 vaddr = (u64)off; 1124 u32 n = arch_disasm_decode(dasm, data + off, end - off, vaddr, &insn); 1125 1126 if (n == 0) { 1127 st = w_str(w, " .byte 0x"); 1128 if (st != KIT_OK) return st; 1129 st = w_hex_byte(w, data[off]); 1130 if (st != KIT_OK) return st; 1131 st = w_newline(w); 1132 if (st != KIT_OK) return st; 1133 off += 1; 1134 continue; 1135 } 1136 1137 /* Call-pair fusion (RISC-V R_RV_CALL): a reloc on this instruction whose 1138 * arch fuses it with the FOLLOWING instruction into a single `call`/`tail 1139 * sym` pseudo. Probe the partner for the call-vs-tail decision, emit one 1140 * line, and skip both. Decoding the partner reuses the disassembler's 1141 * buffers (clobbering `insn`), so build the symref first and re-decode 1142 * `insn` when the pair is not fused. */ 1143 { 1144 const SecReloc* cr = reloc_in_range(x->relocs, x->nrelocs, off, n); 1145 char symref[256]; 1146 ArchRelocOperand bare = {ARCH_RELOC_SURG_TAIL, "", "", 0, 0, 0}; 1147 if (cr && off + n < end && 1148 build_symref(symref, sizeof symref, x->c, &bare, cr->sym, 1149 cr->addend) >= 0) { 1150 KitInsn partner; 1151 u32 pn = arch_disasm_decode(dasm, data + off + n, end - (off + n), 1152 (u64)(off + n), &partner); 1153 const char* mn = NULL; 1154 if (pn && arch_reloc_call_pair(x->c, cr->kind, partner.mnemonic, 1155 partner.operands, &mn)) { 1156 st = w_str(w, "\t"); 1157 if (st != KIT_OK) return st; 1158 st = w_str(w, mn); 1159 if (st != KIT_OK) return st; 1160 st = w_str(w, "\t"); 1161 if (st != KIT_OK) return st; 1162 st = w_str(w, symref); 1163 if (st != KIT_OK) return st; 1164 st = w_newline(w); 1165 if (st != KIT_OK) return st; 1166 off += n + pn; 1167 continue; 1168 } 1169 /* Not fused: the partner probe clobbered `insn`; re-decode it. */ 1170 (void)arch_disasm_decode(dasm, data + off, end - off, vaddr, &insn); 1171 } 1172 } 1173 1174 /* A high-half reloc (RISC-V AUIPC `%pcrel_hi`/`%got_pcrel_hi`) needs a 1175 * unique local anchor label here so the paired `%pcrel_lo` can name it. */ 1176 { 1177 const SecReloc* hr = reloc_in_range(x->relocs, x->nrelocs, off, n); 1178 if (hr) { 1179 ArchRelocOperand ro = { 1180 0}; /* zero emit_anchor/ref_anchor: arches that 1181 * don't set them must read as 0 (rv64-only) */ 1182 if (arch_reloc_operand(x->c, hr->kind, &ro) && ro.emit_anchor) { 1183 char name[256]; 1184 fmt_anchor_label(name, sizeof name, x->secidx, off); 1185 st = w_str(w, name); 1186 if (st != KIT_OK) return st; 1187 st = w_str(w, ":"); 1188 if (st != KIT_OK) return st; 1189 st = w_newline(w); 1190 if (st != KIT_OK) return st; 1191 } 1192 } 1193 } 1194 1195 st = w_str(w, "\t"); 1196 if (st != KIT_OK) return st; 1197 { 1198 /* De-alias a relocated `mv rd, rs` — an ADDI whose %pcrel_lo/%lo 1199 * immediate the disassembler aliased to `mv` because the encoded imm is 1200 * 0 — to the canonical `addi rd, rs, %lo(...)`. The RV_LO12 surgery in 1201 * emit_operands appends the `%lo(...)` as the third operand, and a 1202 * 3-operand `mv` is non-standard (clang rejects it). */ 1203 KitSlice mn = insn.mnemonic; 1204 if (mn.len == 2 && mn.s[0] == 'm' && mn.s[1] == 'v') { 1205 const SecReloc* lr = reloc_in_range(x->relocs, x->nrelocs, off, n); 1206 ArchRelocOperand ro = {0}; 1207 if (lr && arch_reloc_operand(x->c, lr->kind, &ro) && 1208 ro.surg == ARCH_RELOC_SURG_RV_LO12) { 1209 mn.s = "addi"; 1210 mn.len = 4; 1211 } 1212 } 1213 st = kit_writer_write(w, mn.s, mn.len); 1214 if (st != KIT_OK) return st; 1215 } 1216 if (insn.operands.len) { 1217 st = w_str(w, "\t"); 1218 if (st != KIT_OK) return st; 1219 st = emit_operands(w, x, &insn, off); 1220 if (st != KIT_OK) return st; 1221 } 1222 st = w_newline(w); 1223 if (st != KIT_OK) return st; 1224 1225 off += n; 1226 } 1227 return KIT_OK; 1228 } 1229 1230 KitStatus kit_obj_builder_emit_asm(KitObjBuilder* builder, KitWriter* out_w) { 1231 ObjBuilder* ob = (ObjBuilder*)builder; 1232 Compiler* c; 1233 Writer* w; 1234 const AsmSyntax* syn; 1235 AsmSynCtx sx; 1236 u32 nsec, i; 1237 1238 if (!ob || !out_w) return KIT_INVALID; 1239 1240 c = obj_compiler(ob); 1241 w = (Writer*)out_w; 1242 syn = asm_syntax_for(c->target.obj); 1243 sx.w = w; 1244 sx.c = c; 1245 nsec = obj_section_count(ob); 1246 1247 for (i = 1; i < nsec; ++i) { 1248 const Section* sec = obj_section_get(ob, (ObjSecId)i); 1249 SymLabel* labels; 1250 u32 nlabels, total, off, li; 1251 ArchDisasm* dasm; 1252 const u8* flat_data; 1253 u8* heap_data; 1254 SecReloc* relocs; 1255 u32 nrelocs; 1256 u32* btargets; 1257 u32 nbt, bi; 1258 EmitCtx ctx; 1259 1260 if (!sec || sec->removed) continue; 1261 if (!syn->section_header(&sx, sec)) continue; 1262 1263 labels = collect_labels(c, ob, (ObjSecId)i, &nlabels); 1264 1265 if (sec->align > 1) syn->align(&sx, sec->align); 1266 1267 if (sec->kind == SEC_BSS) { 1268 total = sec->bss_size; 1269 } else { 1270 total = sec->bytes.total; 1271 } 1272 1273 dasm = NULL; 1274 flat_data = NULL; 1275 heap_data = NULL; 1276 relocs = NULL; 1277 nrelocs = 0; 1278 btargets = NULL; 1279 nbt = 0; 1280 bi = 0; 1281 1282 if (total > 0 && (sec->flags & SF_EXEC)) { 1283 Heap* heap; 1284 dasm = arch_disasm_new(c); 1285 relocs = collect_relocs(c, ob, (ObjSecId)i, &nrelocs); 1286 heap = c->ctx->heap; 1287 heap_data = (u8*)heap->alloc(heap, total, 1); 1288 if (heap_data) { 1289 buf_flatten(&sec->bytes, heap_data); 1290 flat_data = heap_data; 1291 if (dasm) 1292 btargets = collect_branch_targets(c, dasm, relocs, nrelocs, flat_data, 1293 total, &nbt); 1294 } 1295 } else if (total > 0 && sec->kind != SEC_BSS) { 1296 Heap* heap = c->ctx->heap; 1297 relocs = collect_relocs(c, ob, (ObjSecId)i, &nrelocs); 1298 heap_data = (u8*)heap->alloc(heap, total, 1); 1299 if (heap_data) { 1300 buf_flatten(&sec->bytes, heap_data); 1301 flat_data = heap_data; 1302 } 1303 } 1304 1305 ctx.c = c; 1306 ctx.secidx = i; 1307 ctx.relocs = relocs; 1308 ctx.nrelocs = nrelocs; 1309 ctx.labels = labels; 1310 ctx.nlabels = nlabels; 1311 ctx.btargets = btargets; 1312 ctx.nbt = nbt; 1313 1314 off = 0; 1315 li = 0; 1316 1317 while (off < total || li < nlabels) { 1318 while (li < nlabels && labels[li].offset == off) { 1319 emit_label(&sx, syn, &labels[li]); 1320 ++li; 1321 } 1322 /* Synthesized branch-target label, unless a real symbol sits here. */ 1323 if (nbt && is_btarget(&ctx, off) && !symbol_at(&ctx, off)) { 1324 char name[256]; 1325 fmt_synth_label(name, sizeof name, i, off); 1326 w_str(w, name); 1327 w_str(w, ":"); 1328 w_newline(w); 1329 } 1330 1331 if (off >= total) break; 1332 1333 { 1334 u32 next = total; 1335 if (li < nlabels && labels[li].offset > off && 1336 labels[li].offset < total) 1337 next = labels[li].offset; 1338 while (bi < nbt && btargets[bi] <= off) ++bi; 1339 if (bi < nbt && btargets[bi] < next) next = btargets[bi]; 1340 1341 if (sec->kind == SEC_BSS) { 1342 emit_zero_range(w, next - off); 1343 } else if ((sec->flags & SF_EXEC) && dasm && flat_data) { 1344 emit_disasm_range(w, &ctx, dasm, flat_data, off, next); 1345 } else if (flat_data) { 1346 emit_data_range(w, c, flat_data, off, next, relocs, nrelocs); 1347 } 1348 off = next; 1349 } 1350 } 1351 1352 emit_size_directives(&sx, syn, ob, (ObjSecId)i); 1353 1354 if (dasm) arch_disasm_free(dasm); 1355 if (heap_data) c->ctx->heap->free(c->ctx->heap, heap_data, total); 1356 1357 w_newline(w); 1358 } 1359 1360 emit_common_symbols(w, c, ob); 1361 1362 return kit_writer_status(out_w); 1363 }