kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

asm.c (48215B)


      1 /* GNU-as compatible assembler driver — arch-agnostic.
      2  *
      3  * Reads tokens from an AsmLexer, dispatches directives, manages labels and
      4  * section state, and forwards mnemonic lines to the per-arch instruction
      5  * parser.  Output goes through MCEmitter against an ObjBuilder.
      6  *
      7  * AsmLexer quirks worked around here:
      8  *   - `#` is both the immediate marker in asm and the token used for
      9  *     preprocessed-assembler line markers.
     10  *     `#` at BOL is a cpp linemarker → skip to next newline; elsewhere
     11  *     the per-arch parser treats it as the immediate prefix.
     12  *   - composite mnemonics (`b.eq`, `b.ne`, ...) arrive as IDENT '.' IDENT
     13  *     and are reassembled before dispatch.
     14  *   - `.text` etc. arrive as PUNCT('.') + IDENT and are stitched here.
     15  *
     16  * Symbol bookkeeping: a Sym→ObjSymId map records the symbols introduced
     17  * by labels, `.globl`, and operand references so a forward reference
     18  * (`b foo` before `foo:`) shares one symbol with its later definition.
     19  * A second Sym→AsmEqu map carries `.set`/`.equ` constants. */
     20 
     21 #include "asm/asm.h"
     22 
     23 #include <stdarg.h>
     24 #include <string.h>
     25 
     26 #include "arch/arch.h"
     27 #include "asm/asm_helpers.h"
     28 #include "asm/asm_lex.h"
     29 #include "core/arena.h"
     30 #include "core/hashmap.h"
     31 #include "core/heap.h"
     32 #include "core/pool.h"
     33 #include "core/slice.h"
     34 #include "obj/obj.h"
     35 #include "obj/reloc_apply.h"
     36 
     37 HASHMAP_DEFINE(SymSecMap, Sym, ObjSecId, hash_u32);
     38 HASHMAP_DEFINE(SymSymMap, Sym, ObjSymId, hash_u32);
     39 
     40 typedef struct AsmEqu {
     41   i64 value;
     42   ObjSymId sym; /* nonzero when value is `sym + offset` */
     43   u8 has_sym;
     44   u8 pad[3];
     45 } AsmEqu;
     46 HASHMAP_DEFINE(SymEquMap, Sym, AsmEqu, hash_u32);
     47 
     48 struct AsmDriver {
     49   Compiler* c;
     50   AsmLexer* lex;
     51   MCEmitter* mc;
     52   ObjBuilder* ob;
     53   Pool* pool;
     54   Heap* heap;
     55 
     56   AsmTok cur;
     57   int has_cur;
     58 
     59   /* OBJ_SEC_NONE until first emit / explicit `.text` etc. */
     60   ObjSecId cur_sec;
     61 
     62   SymSecMap sec_map;
     63   SymSymMap sym_map;
     64   SymEquMap equ_map;
     65 
     66   Sym n_text, n_data, n_rodata, n_bss;
     67 
     68   ArchAsm* arch_asm;
     69 };
     70 
     71 /* ---- token plumbing ---- */
     72 
     73 static AsmTok d_peek(AsmDriver* d) {
     74   if (!d->has_cur) {
     75     d->cur = asm_lex_next(d->lex);
     76     d->has_cur = 1;
     77   }
     78   return d->cur;
     79 }
     80 
     81 static AsmTok d_next(AsmDriver* d) {
     82   AsmTok t = d_peek(d);
     83   d->has_cur = 0;
     84   return t;
     85 }
     86 
     87 static int d_is_eol(AsmDriver* d) {
     88   AsmTok t = d_peek(d);
     89   return t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_EOF;
     90 }
     91 
     92 static void d_skip_to_eol(AsmDriver* d) {
     93   while (!d_is_eol(d)) (void)d_next(d);
     94 }
     95 
     96 static void d_eat_eol(AsmDriver* d) {
     97   AsmTok t = d_peek(d);
     98   if (t.kind == ASM_TOK_NEWLINE) (void)d_next(d);
     99 }
    100 
    101 static SrcLoc d_loc(AsmDriver* d) {
    102   if (d->has_cur) return d->cur.loc;
    103   return asm_lex_loc(d->lex);
    104 }
    105 
    106 _Noreturn static void d_panicf(AsmDriver* d, const char* fmt, ...) {
    107   va_list ap;
    108   va_start(ap, fmt);
    109   compiler_panicv(d->c, d_loc(d), fmt, ap);
    110   /* unreachable; va_end omitted because compiler_panicv is _Noreturn */
    111 }
    112 
    113 /* ---- spelling helpers ---- */
    114 
    115 static const char* asm_str(AsmDriver* d, Sym s, size_t* nout) {
    116   Slice sl = pool_slice(d->pool, s);
    117   if (nout) *nout = sl.len;
    118   return sl.s;
    119 }
    120 
    121 static int sym_eq(AsmDriver* d, Sym s, const char* lit) {
    122   size_t n = 0;
    123   const char* p = asm_str(d, s, &n);
    124   size_t i;
    125   if (!p) return 0;
    126   for (i = 0; i < n; ++i) {
    127     if (!lit[i] || p[i] != lit[i]) return 0;
    128   }
    129   return lit[n] == '\0';
    130 }
    131 
    132 static int starts_with(AsmDriver* d, Sym s, const char* prefix) {
    133   size_t n = 0;
    134   const char* p = asm_str(d, s, &n);
    135   size_t i;
    136   if (!p) return 0;
    137   for (i = 0; prefix[i]; ++i) {
    138     if (i >= n || p[i] != prefix[i]) return 0;
    139   }
    140   return 1;
    141 }
    142 
    143 /* ---- section management ---- */
    144 
    145 static ObjSecId ensure_section_ex(AsmDriver* d, Sym name, SecKind kind, u16 sem,
    146                                   u16 flags, u32 align) {
    147   ObjSecId* hit = SymSecMap_get(&d->sec_map, name);
    148   ObjSecId id;
    149   if (hit) return *hit;
    150   id = obj_section_ex(d->ob, name, kind, sem, flags, align, 0, OBJ_SEC_NONE, 0);
    151   SymSecMap_set(&d->sec_map, name, id);
    152   return id;
    153 }
    154 
    155 static ObjSecId ensure_section(AsmDriver* d, Sym name, SecKind kind, u16 flags,
    156                                u32 align) {
    157   /* A .bss section is NOBITS: it stores no bytes, only a size. Create it that
    158    * way (codegen does the same via obj_section_ex) so the ELF emitter writes
    159    * SHT_NOBITS and `.zero`/labels track bss_size, not a byte buffer — matching
    160    * `cc -c` so the round-tripped object isn't a writable-but-loaded .bss. */
    161   return ensure_section_ex(d, name, kind,
    162                            kind == SEC_BSS ? SSEM_NOBITS : SSEM_PROGBITS, flags,
    163                            align);
    164 }
    165 
    166 static void set_section(AsmDriver* d, Sym name, SecKind kind, u16 flags,
    167                         u32 align) {
    168   ObjSecId id = ensure_section(d, name, kind, flags, align);
    169   d->cur_sec = id;
    170   d->mc->set_section(d->mc, id);
    171 }
    172 
    173 /* ---- symbol management ---- */
    174 
    175 static ObjSymId intern_sym(AsmDriver* d, Sym name) {
    176   ObjSymId* hit = SymSymMap_get(&d->sym_map, name);
    177   if (hit) return *hit;
    178   ObjSymId id = obj_symbol_find(d->ob, name);
    179   if (id == OBJ_SYM_NONE) {
    180     id = obj_symbol_ex(d->ob, name, SB_LOCAL, SV_DEFAULT, SK_NOTYPE,
    181                        OBJ_SEC_NONE, 0, 0, 0);
    182   }
    183   SymSymMap_set(&d->sym_map, name, id);
    184   return id;
    185 }
    186 
    187 static ObjSym* sym_mut(AsmDriver* d, ObjSymId id) {
    188   /* obj.h gives us a const view via obj_symbol_get; the underlying
    189    * record lives in the builder's arena and is safe to mutate
    190    * pre-finalize.  Wrapping the cast keeps the const-stripping in
    191    * one place. */
    192   return (ObjSym*)obj_symbol_get(d->ob, id);
    193 }
    194 
    195 /* GNU `as` makes any symbol that is referenced but neither defined nor
    196  * declared `.local` an undefined *global* (a local UNDEF is meaningless
    197  * in ELF and won't pull a member out of an archive at link time).
    198  * intern_sym mints every new symbol SB_LOCAL/SK_NOTYPE, so after the
    199  * parse we promote the ones that stayed undefined to global SK_UNDEF.
    200  * Defined locals (labels), `.local` decls, and absolute/common symbols
    201  * are left untouched. */
    202 static void promote_undef_externs(AsmDriver* d) {
    203   ObjSymIter* it = obj_symiter_new(d->ob);
    204   ObjSymEntry e;
    205   while (obj_symiter_next(it, &e)) {
    206     /* The iterator visits tombstoned slots too (see obj.h). Deferred
    207      * anonymous const-data / jump-table symbols (obj_symbol_defer) sit as
    208      * LOCAL/SK_OBJ/no-section tombstones until opt_whole_module_finalize
    209      * materializes them — which, when a file-scope `asm` block (e.g. a
    210      * FreeBSD header's `.symver`) replays before that finalize step, is
    211      * *after* this pass runs. Promoting them here would resurface them as
    212      * defined GLOBALs and collide at link (`duplicate definition of global
    213      * symbol '.Lkit_ro.0'`), so skip tombstones like every other consumer. */
    214     if (e.sym->removed) continue;
    215     if (e.sym->section_id != OBJ_SEC_NONE) continue; /* defined here */
    216     if (e.sym->bind != SB_LOCAL) continue;
    217     if (e.sym->kind == SK_ABS || e.sym->kind == SK_COMMON) continue;
    218     obj_symbol_set_bind(d->ob, e.id, SB_GLOBAL);
    219     sym_mut(d, e.id)->kind = (u16)SK_UNDEF;
    220   }
    221   obj_symiter_free(it);
    222 }
    223 
    224 /* ---- expression evaluator (constants + sym ± const) ---- */
    225 
    226 typedef struct AsmExpr {
    227   ObjSymId sym;
    228   i64 value;
    229   u8 is_here; /* the location-counter token `.` (no sym, no value yet) */
    230   u8 pcrel;   /* `sym - .`: emit a PC-relative data reloc instead of absolute */
    231 } AsmExpr;
    232 
    233 static AsmExpr expr_c(i64 v) {
    234   AsmExpr e = {OBJ_SYM_NONE, v, 0, 0};
    235   return e;
    236 }
    237 static AsmExpr expr_s(ObjSymId s, i64 v) {
    238   AsmExpr e = {s, v, 0, 0};
    239   return e;
    240 }
    241 static AsmExpr expr_here(void) {
    242   AsmExpr e = {OBJ_SYM_NONE, 0, 1, 0};
    243   return e;
    244 }
    245 
    246 static int tok_is_punct(AsmTok t, u32 p) {
    247   return t.kind == ASM_TOK_PUNCT && t.v.punct == p;
    248 }
    249 
    250 static i64 lit_to_i64(AsmDriver* d, Sym spelling) {
    251   size_t n = 0;
    252   const char* p = asm_str(d, spelling, &n);
    253   u64 v = 0;
    254   int base = 10;
    255   size_t i = 0;
    256   if (!p || !n) return 0;
    257   if (n >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
    258     base = 16;
    259     i = 2;
    260   } else if (n >= 2 && p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) {
    261     base = 2;
    262     i = 2;
    263   } else if (n >= 1 && p[0] == '0') {
    264     base = 8;
    265     i = 1;
    266   }
    267   for (; i < n; ++i) {
    268     char c = p[i];
    269     u32 dv;
    270     if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break;
    271     if (c >= '0' && c <= '9')
    272       dv = (u32)(c - '0');
    273     else if (c >= 'a' && c <= 'f')
    274       dv = 10 + (u32)(c - 'a');
    275     else if (c >= 'A' && c <= 'F')
    276       dv = 10 + (u32)(c - 'A');
    277     else
    278       d_panicf(d, "asm: bad digit in integer literal");
    279     if (dv >= (u32)base) d_panicf(d, "asm: digit out of base");
    280     v = v * (u64)base + dv;
    281   }
    282   return (i64)v;
    283 }
    284 
    285 static AsmExpr parse_expr(AsmDriver*);
    286 static AsmExpr parse_unary(AsmDriver*);
    287 
    288 static AsmExpr parse_primary(AsmDriver* d) {
    289   AsmTok t = d_peek(d);
    290   if (t.kind == ASM_TOK_NUM) {
    291     (void)d_next(d);
    292     return expr_c(lit_to_i64(d, t.spelling));
    293   }
    294   if (t.kind == ASM_TOK_IDENT) {
    295     (void)d_next(d);
    296     AsmEqu* eq = SymEquMap_get(&d->equ_map, t.v.ident);
    297     if (eq) {
    298       if (eq->has_sym) return expr_s(eq->sym, eq->value);
    299       return expr_c(eq->value);
    300     }
    301     return expr_s(intern_sym(d, t.v.ident), 0);
    302   }
    303   if (tok_is_punct(t, '(')) {
    304     (void)d_next(d);
    305     AsmExpr e = parse_expr(d);
    306     AsmTok cl = d_peek(d);
    307     if (!tok_is_punct(cl, ')')) d_panicf(d, "asm: expected ')'");
    308     (void)d_next(d);
    309     return e;
    310   }
    311   /* Lone `.` is the location counter (used in `sym - .` PC-relative data). */
    312   if (tok_is_punct(t, '.')) {
    313     (void)d_next(d);
    314     return expr_here();
    315   }
    316   d_panicf(d, "asm: expected expression");
    317 }
    318 
    319 static AsmExpr parse_unary(AsmDriver* d) {
    320   AsmTok t = d_peek(d);
    321   if (tok_is_punct(t, '-')) {
    322     (void)d_next(d);
    323     AsmExpr e = parse_unary(d);
    324     if (e.sym) d_panicf(d, "asm: unary '-' on symbol");
    325     /* Unsigned negate so `$-9223372036854775808` (negating INT64_MIN) is
    326      * well-defined 2's-complement, not signed-overflow UB. */
    327     return expr_c((i64)(0u - (u64)e.value));
    328   }
    329   if (tok_is_punct(t, '+')) {
    330     (void)d_next(d);
    331     return parse_unary(d);
    332   }
    333   if (tok_is_punct(t, '~')) {
    334     (void)d_next(d);
    335     AsmExpr e = parse_unary(d);
    336     if (e.sym) d_panicf(d, "asm: unary '~' on symbol");
    337     return expr_c(~e.value);
    338   }
    339   return parse_primary(d);
    340 }
    341 
    342 static AsmExpr parse_mul(AsmDriver* d) {
    343   AsmExpr a = parse_unary(d);
    344   for (;;) {
    345     AsmTok t = d_peek(d);
    346     if (!tok_is_punct(t, '*') && !tok_is_punct(t, '/') && !tok_is_punct(t, '%'))
    347       return a;
    348     u32 op = t.v.punct;
    349     (void)d_next(d);
    350     AsmExpr b = parse_unary(d);
    351     if (a.sym || b.sym || a.is_here || b.is_here)
    352       d_panicf(d, "asm: '*/%%' on symbolic operand");
    353     if (op == '*')
    354       a.value *= b.value;
    355     else if (op == '/') {
    356       if (!b.value) d_panicf(d, "asm: division by zero");
    357       a.value /= b.value;
    358     } else {
    359       if (!b.value) d_panicf(d, "asm: modulo by zero");
    360       a.value %= b.value;
    361     }
    362   }
    363 }
    364 
    365 static AsmExpr parse_add(AsmDriver* d) {
    366   AsmExpr a = parse_mul(d);
    367   for (;;) {
    368     AsmTok t = d_peek(d);
    369     if (!tok_is_punct(t, '+') && !tok_is_punct(t, '-')) return a;
    370     u32 op = t.v.punct;
    371     (void)d_next(d);
    372     AsmExpr b = parse_mul(d);
    373     /* `sym - .`: a PC-relative data reference. `.` is the location of the
    374      * field being emitted, so the relocation's P equals its own offset and the
    375      * RELA addend stays `a.value` (typically 0). */
    376     if (op == '-' && b.is_here) {
    377       if (!a.sym) d_panicf(d, "asm: '- .' requires a symbol operand");
    378       a.pcrel = 1;
    379       continue;
    380     }
    381     if (a.is_here || b.is_here)
    382       d_panicf(d, "asm: '.' location counter only valid as `sym - .`");
    383     if (op == '+') {
    384       if (a.sym && b.sym) d_panicf(d, "asm: cannot add two symbols");
    385       if (b.sym) {
    386         a.sym = b.sym;
    387         a.value += b.value;
    388       } else
    389         a.value += b.value;
    390     } else {
    391       if (b.sym) d_panicf(d, "asm: cannot subtract symbol from constant");
    392       a.value -= b.value;
    393     }
    394   }
    395 }
    396 
    397 static AsmExpr parse_shift(AsmDriver* d) {
    398   AsmExpr a = parse_add(d);
    399   for (;;) {
    400     AsmTok t = d_peek(d);
    401     if (!tok_is_punct(t, ASM_P_SHL) && !tok_is_punct(t, ASM_P_SHR)) return a;
    402     u32 op = t.v.punct;
    403     (void)d_next(d);
    404     AsmExpr b = parse_add(d);
    405     if (a.sym || b.sym) d_panicf(d, "asm: shift on symbolic operand");
    406     if (op == ASM_P_SHL)
    407       a.value = (i64)((u64)a.value << (b.value & 63));
    408     else
    409       a.value = a.value >> (b.value & 63);
    410   }
    411 }
    412 
    413 static AsmExpr parse_band(AsmDriver* d) {
    414   AsmExpr a = parse_shift(d);
    415   for (;;) {
    416     AsmTok t = d_peek(d);
    417     if (!tok_is_punct(t, '&')) return a;
    418     (void)d_next(d);
    419     AsmExpr b = parse_shift(d);
    420     if (a.sym || b.sym) d_panicf(d, "asm: '&' on symbolic operand");
    421     a.value &= b.value;
    422   }
    423 }
    424 
    425 static AsmExpr parse_bxor(AsmDriver* d) {
    426   AsmExpr a = parse_band(d);
    427   for (;;) {
    428     AsmTok t = d_peek(d);
    429     if (!tok_is_punct(t, '^')) return a;
    430     (void)d_next(d);
    431     AsmExpr b = parse_band(d);
    432     if (a.sym || b.sym) d_panicf(d, "asm: '^' on symbolic operand");
    433     a.value ^= b.value;
    434   }
    435 }
    436 
    437 static AsmExpr parse_bor(AsmDriver* d) {
    438   AsmExpr a = parse_bxor(d);
    439   for (;;) {
    440     AsmTok t = d_peek(d);
    441     if (!tok_is_punct(t, '|')) return a;
    442     (void)d_next(d);
    443     AsmExpr b = parse_bxor(d);
    444     if (a.sym || b.sym) d_panicf(d, "asm: '|' on symbolic operand");
    445     a.value |= b.value;
    446   }
    447 }
    448 
    449 static AsmExpr parse_expr(AsmDriver* d) { return parse_bor(d); }
    450 
    451 /* ---- public helpers exposed to per-arch parser ---- */
    452 
    453 AsmTok asm_driver_peek(AsmDriver* d) { return d_peek(d); }
    454 AsmTok asm_driver_next(AsmDriver* d) { return d_next(d); }
    455 int asm_driver_at_eol(AsmDriver* d) { return d_is_eol(d); }
    456 SrcLoc asm_driver_loc(AsmDriver* d) { return d_loc(d); }
    457 MCEmitter* asm_driver_mc(AsmDriver* d) { return d->mc; }
    458 ObjBuilder* asm_driver_ob(AsmDriver* d) { return d->ob; }
    459 Compiler* asm_driver_compiler(AsmDriver* d) { return d->c; }
    460 Pool* asm_driver_pool(AsmDriver* d) { return d->pool; }
    461 
    462 _Noreturn void asm_driver_panic(AsmDriver* d, const char* fmt, ...) {
    463   va_list ap;
    464   va_start(ap, fmt);
    465   compiler_panicv(d->c, d_loc(d), fmt, ap);
    466 }
    467 
    468 ObjSymId asm_driver_intern_sym(AsmDriver* d, Sym name) {
    469   return intern_sym(d, name);
    470 }
    471 
    472 ObjSecId asm_driver_cur_section(AsmDriver* d) {
    473   if (d->cur_sec == OBJ_SEC_NONE) {
    474     if (!d->n_text) d->n_text = pool_intern_slice(d->pool, SLICE_LIT(".text"));
    475     d->cur_sec =
    476         ensure_section(d, d->n_text, SEC_TEXT, (u16)(SF_ALLOC | SF_EXEC), 4);
    477     d->mc->set_section(d->mc, d->cur_sec);
    478   }
    479   return d->cur_sec;
    480 }
    481 
    482 int asm_driver_eat_comma(AsmDriver* d) {
    483   AsmTok t = d_peek(d);
    484   if (tok_is_punct(t, ',')) {
    485     (void)d_next(d);
    486     return 1;
    487   }
    488   return 0;
    489 }
    490 
    491 int asm_driver_eat_punct(AsmDriver* d, u32 p) {
    492   AsmTok t = d_peek(d);
    493   if (tok_is_punct(t, p)) {
    494     (void)d_next(d);
    495     return 1;
    496   }
    497   /* `#` arrives as ASM_TOK_HASH from the C lexer; accept it as the
    498    * immediate-prefix punctuator here. */
    499   if (p == '#' && t.kind == ASM_TOK_HASH) {
    500     (void)d_next(d);
    501     return 1;
    502   }
    503   return 0;
    504 }
    505 
    506 void asm_driver_expect_punct(AsmDriver* d, u32 p, const char* what) {
    507   if (!asm_driver_eat_punct(d, p))
    508     d_panicf(d, "asm: expected '%.*s' (%.*s)", SLICE_ARG(SLICE_LIT("punct")),
    509              SLICE_ARG(slice_from_cstr(what)));
    510 }
    511 
    512 i64 asm_driver_parse_const(AsmDriver* d) {
    513   AsmExpr e = parse_expr(d);
    514   if (e.sym) d_panicf(d, "asm: constant expression expected");
    515   return e.value;
    516 }
    517 
    518 void asm_driver_parse_sym_expr(AsmDriver* d, ObjSymId* sym_out, i64* off_out) {
    519   AsmExpr e = parse_expr(d);
    520   *sym_out = e.sym;
    521   *off_out = e.value;
    522 }
    523 
    524 int asm_driver_tok_is_punct(AsmTok t, u32 p) {
    525   if (tok_is_punct(t, p)) return 1;
    526   /* `#` arrives as ASM_TOK_HASH from the C lexer. */
    527   if (p == '#' && t.kind == ASM_TOK_HASH) return 1;
    528   return 0;
    529 }
    530 
    531 /* ---- string-literal decoding ---- */
    532 
    533 static void decode_string(AsmDriver* d, Sym spelling, u8** out, u32* nout) {
    534   size_t n = 0;
    535   const char* p = asm_str(d, spelling, &n);
    536   /* Skip any encoding prefix (L/u/u8/U). */
    537   while (n && (*p == 'L' || *p == 'u' || *p == 'U' || *p == '8')) {
    538     ++p;
    539     --n;
    540   }
    541   if (n < 2 || p[0] != '"' || p[n - 1] != '"')
    542     d_panicf(d, "asm: malformed string literal");
    543   size_t cap = n;
    544   u8* buf = (u8*)d->heap->alloc(d->heap, cap ? cap : 1, 1);
    545   u32 k = 0;
    546   for (size_t i = 1; i + 1 < n; ++i) {
    547     char c = p[i];
    548     if (c != '\\') {
    549       buf[k++] = (u8)c;
    550       continue;
    551     }
    552     ++i;
    553     if (i + 1 >= n) break;
    554     char e = p[i];
    555     switch (e) {
    556       case 'n':
    557         buf[k++] = '\n';
    558         break;
    559       case 't':
    560         buf[k++] = '\t';
    561         break;
    562       case 'r':
    563         buf[k++] = '\r';
    564         break;
    565       case '\\':
    566         buf[k++] = '\\';
    567         break;
    568       case '"':
    569         buf[k++] = '"';
    570         break;
    571       case '\'':
    572         buf[k++] = '\'';
    573         break;
    574       case '0':
    575         buf[k++] = 0;
    576         break;
    577       case 'b':
    578         buf[k++] = 8;
    579         break;
    580       case 'f':
    581         buf[k++] = 12;
    582         break;
    583       case 'v':
    584         buf[k++] = 11;
    585         break;
    586       case 'a':
    587         buf[k++] = 7;
    588         break;
    589       case 'x': {
    590         u32 v = 0;
    591         int dn = 0;
    592         while (i + 2 < n) {
    593           char h = p[i + 1];
    594           int dv;
    595           if (h >= '0' && h <= '9')
    596             dv = h - '0';
    597           else if (h >= 'a' && h <= 'f')
    598             dv = 10 + (h - 'a');
    599           else if (h >= 'A' && h <= 'F')
    600             dv = 10 + (h - 'A');
    601           else
    602             break;
    603           v = v * 16 + (u32)dv;
    604           ++i;
    605           if (++dn >= 2) break;
    606         }
    607         buf[k++] = (u8)v;
    608         break;
    609       }
    610       default:
    611         if (e >= '0' && e <= '7') {
    612           u32 v = (u32)(e - '0');
    613           int dn = 1;
    614           while (dn < 3 && i + 2 < n) {
    615             char h = p[i + 1];
    616             if (h < '0' || h > '7') break;
    617             v = v * 8 + (u32)(h - '0');
    618             ++i;
    619             ++dn;
    620           }
    621           buf[k++] = (u8)v;
    622         } else {
    623           buf[k++] = (u8)e;
    624         }
    625         break;
    626     }
    627   }
    628   *out = buf;
    629   *nout = k;
    630 }
    631 
    632 /* ---- directives ---- */
    633 
    634 static Sym expect_ident(AsmDriver* d, const char* what) {
    635   AsmTok t = d_peek(d);
    636   if (t.kind != ASM_TOK_IDENT)
    637     d_panicf(d, "asm: %.*s: expected identifier",
    638              SLICE_ARG(slice_from_cstr(what)));
    639   (void)d_next(d);
    640   return t.v.ident;
    641 }
    642 
    643 static void emit_le(AsmDriver* d, u64 v, u32 width) {
    644   u8 buf[8];
    645   for (u32 i = 0; i < width; ++i) buf[i] = (u8)(v >> (8 * i));
    646   (void)asm_driver_cur_section(d);
    647   d->mc->emit_bytes(d->mc, buf, width);
    648 }
    649 
    650 static void emit_int_directive(AsmDriver* d, u32 width) {
    651   for (;;) {
    652     AsmExpr e = parse_expr(d);
    653     if (e.sym) {
    654       RelocKind k;
    655       if (e.pcrel) {
    656         /* `sym - .`: PC-relative data. Only the 32/64-bit widths codegen
    657          * emits via kit_cg_data_pcrel are supported. */
    658         if (width == 4)
    659           k = R_PC32;
    660         else if (width == 8)
    661           k = R_PC64;
    662         else
    663           d_panicf(d, "asm: PC-relative `sym - .` needs .long/.quad");
    664       } else if (width == 4)
    665         k = R_ABS32;
    666       else if (width == 8)
    667         k = R_ABS64;
    668       else
    669         d_panicf(d, "asm: symbolic .byte/.hword not supported");
    670       (void)asm_driver_cur_section(d);
    671       u32 ofs = d->mc->pos(d->mc);
    672       /* Write the addend into the data field, not zero. Mach-O relocations
    673        * carry the addend implicitly in the relocated field (REL form); writing
    674        * zero loses it (every `.quad sym+N` would resolve to sym+0 — a switch
    675        * jump table dispatching into hyperspace). codegen pre-writes the addend
    676        * the same way. On ELF (RELA) the linker overwrites the field with S+A,
    677        * so the pre-written value is harmless there. */
    678       emit_le(d, (u64)e.value, width);
    679       d->mc->emit_reloc_at(d->mc, d->cur_sec, ofs, k, e.sym, e.value, 1, 0);
    680     } else {
    681       emit_le(d, (u64)e.value, width);
    682     }
    683     if (!asm_driver_eat_comma(d)) break;
    684   }
    685 }
    686 
    687 static void do_directive(AsmDriver* d, Sym name) {
    688   if (sym_eq(d, name, "text")) {
    689     if (!d->n_text) d->n_text = pool_intern_slice(d->pool, SLICE_LIT(".text"));
    690     set_section(d, d->n_text, SEC_TEXT, (u16)(SF_ALLOC | SF_EXEC), 4);
    691     d_skip_to_eol(d);
    692     return;
    693   }
    694   if (sym_eq(d, name, "data")) {
    695     if (!d->n_data) d->n_data = pool_intern_slice(d->pool, SLICE_LIT(".data"));
    696     set_section(d, d->n_data, SEC_DATA, (u16)(SF_ALLOC | SF_WRITE), 8);
    697     d_skip_to_eol(d);
    698     return;
    699   }
    700   if (sym_eq(d, name, "rodata")) {
    701     if (!d->n_rodata)
    702       d->n_rodata = pool_intern_slice(d->pool, SLICE_LIT(".rodata"));
    703     set_section(d, d->n_rodata, SEC_RODATA, (u16)SF_ALLOC, 8);
    704     d_skip_to_eol(d);
    705     return;
    706   }
    707   if (sym_eq(d, name, "bss")) {
    708     if (!d->n_bss) d->n_bss = pool_intern_slice(d->pool, SLICE_LIT(".bss"));
    709     set_section(d, d->n_bss, SEC_BSS, (u16)(SF_ALLOC | SF_WRITE), 8);
    710     d_skip_to_eol(d);
    711     return;
    712   }
    713   if (sym_eq(d, name, "section")) {
    714     Sym sname = 0;
    715     AsmTok t = d_peek(d);
    716     if (t.kind == ASM_TOK_STR) {
    717       size_t n = 0;
    718       const char* p = asm_str(d, t.spelling, &n);
    719       if (n >= 2 && p[0] == '"')
    720         sname = pool_intern_slice(d->pool, (Slice){.s = p + 1, .len = n - 2});
    721       (void)d_next(d);
    722     } else if (t.kind == ASM_TOK_IDENT || tok_is_punct(t, '.')) {
    723       /* A bare section name. The lexer breaks a dotted name like
    724        * `.rodata.toy.merge` into PUNCT('.')+IDENT segments (the `.`+digit
    725        * identifier rule does not glue `.`+letter), so reassemble the full
    726        * dotted spelling by consuming each adjacent `.segment`. Stops at the
    727        * `, "flags"` operands (the next token is then a comma). */
    728       char buf[128];
    729       size_t bn = 0;
    730       int leading_dot = tok_is_punct(t, '.');
    731       if (leading_dot) {
    732         (void)d_next(d);
    733         buf[bn++] = '.';
    734       }
    735       AsmTok id = d_next(d);
    736       if (id.kind != ASM_TOK_IDENT) d_panicf(d, "asm: .section: bad name");
    737       for (;;) {
    738         size_t ni = 0;
    739         const char* nm = asm_str(d, id.spelling, &ni);
    740         if (bn + ni >= sizeof buf) d_panicf(d, "asm: .section: name too long");
    741         for (size_t i = 0; i < ni; ++i) buf[bn++] = nm[i];
    742         /* Glue a following `.<ident>` (or `.<num>`) segment, no whitespace. */
    743         if (!tok_is_punct(d_peek(d), '.')) break;
    744         (void)d_next(d); /* '.' */
    745         AsmTok seg = d_peek(d);
    746         if (seg.kind != ASM_TOK_IDENT && seg.kind != ASM_TOK_NUM) {
    747           /* A lone trailing '.' is not part of the name; put it back is not
    748            * supported, but section names never end in '.', so this is a
    749            * malformed directive. */
    750           d_panicf(d, "asm: .section: bad name");
    751         }
    752         if (bn + 1 >= sizeof buf) d_panicf(d, "asm: .section: name too long");
    753         buf[bn++] = '.';
    754         id = d_next(d);
    755       }
    756       sname = pool_intern_slice(d->pool, (Slice){.s = buf, .len = bn});
    757     } else {
    758       d_panicf(d, "asm: .section: expected name");
    759     }
    760     SecKind kind = SEC_OTHER;
    761     u16 sem = SSEM_PROGBITS;
    762     u16 flags = 0;
    763     u32 entsize = 0;
    764     int have_flags = 0;
    765     int macho_2pos = 0;
    766 
    767     /* Mach-O `.section segname,sectname[,type[,attrs]]`: the token after the
    768      * first comma is a bare sectname IDENT (vs GNU's "flags" STRING). kit as
    769      * parses the dialect of its target only (no hybrid), so this branch is
    770      * gated on the Mach-O object format. Rebuild the comma-joined "seg,sect"
    771      * name that the Mach-O writer's name_to_seg_sect splits back. */
    772     if (d->c->target.obj == KIT_OBJ_MACHO && tok_is_punct(d_peek(d), ',')) {
    773       (void)d_next(d); /* eat ',' */
    774       AsmTok sect = d_next(d);
    775       size_t sgn = 0, scn = 0;
    776       const char* sgp;
    777       const char* scp;
    778       char buf[128];
    779       if (sect.kind != ASM_TOK_IDENT)
    780         d_panicf(d, "asm: .section: expected Mach-O sectname after ','");
    781       sgp = asm_str(d, sname, &sgn);
    782       scp = asm_str(d, sect.v.ident, &scn);
    783       if (sgn + 1 + scn >= sizeof buf)
    784         d_panicf(d, "asm: .section: name too long");
    785       memcpy(buf, sgp, sgn);
    786       buf[sgn] = ',';
    787       memcpy(buf + sgn + 1, scp, scn);
    788       sname =
    789           pool_intern_slice(d->pool, (Slice){.s = buf, .len = sgn + 1 + scn});
    790       macho_2pos = 1;
    791       /* Optional trailing Mach-O type/attribute fields (regular,
    792        * cstring_literals, …): accept and consume; map the few affecting
    793        * flags. */
    794       while (asm_driver_eat_comma(d)) {
    795         AsmTok ty = d_peek(d);
    796         if (ty.kind == ASM_TOK_IDENT) {
    797           size_t tn = 0;
    798           const char* tp = asm_str(d, ty.v.ident, &tn);
    799           if (tn == 16 && memcmp(tp, "cstring_literals", 16) == 0)
    800             flags |= SF_STRINGS;
    801           (void)d_next(d);
    802         } else if (ty.kind == ASM_TOK_NUM) {
    803           (void)d_next(d);
    804         } else {
    805           break;
    806         }
    807       }
    808     }
    809 
    810     /* Optional GNU-as operands: , "flags" [, @type [, entsize]]. The emitter
    811      * (src/api/asm_emit.c) writes these for SEC_OTHER named sections; parse
    812      * them back so a global's section flags/entsize round-trip faithfully. */
    813     if (!macho_2pos && asm_driver_eat_comma(d)) {
    814       AsmTok ft = d_peek(d);
    815       if (ft.kind == ASM_TOK_STR) {
    816         size_t fn = 0;
    817         const char* fp = asm_str(d, ft.spelling, &fn);
    818         size_t fi;
    819         for (fi = 0; fp && fi < fn; ++fi) {
    820           switch (fp[fi]) {
    821             case 'a':
    822               flags |= SF_ALLOC;
    823               break;
    824             case 'w':
    825               flags |= SF_WRITE;
    826               break;
    827             case 'x':
    828               flags |= SF_EXEC;
    829               break;
    830             case 'M':
    831               flags |= SF_MERGE;
    832               break;
    833             case 'S':
    834               flags |= SF_STRINGS;
    835               break;
    836             case 'T':
    837               flags |= SF_TLS;
    838               break;
    839             case 'R':
    840               flags |= SF_RETAIN;
    841               break;
    842             default:
    843               break; /* surrounding quotes / unknown letters */
    844           }
    845         }
    846         have_flags = 1;
    847         (void)d_next(d);
    848         if (asm_driver_eat_comma(d)) {
    849           AsmTok ty = d_peek(d);
    850           Sym tag = 0;
    851           if (tok_is_punct(ty, '@')) {
    852             (void)d_next(d);
    853             AsmTok ti = d_next(d); /* the @type ident (progbits/nobits/note) */
    854             if (ti.kind == ASM_TOK_IDENT) tag = ti.v.ident;
    855           } else if (ty.kind == ASM_TOK_IDENT) {
    856             tag = d_next(d).v.ident;
    857           }
    858           if (tag) {
    859             if (sym_eq(d, tag, "note")) {
    860               sem = SSEM_NOTE;
    861             } else if (sym_eq(d, tag, "nobits")) {
    862               sem = SSEM_NOBITS;
    863             } else if (sym_eq(d, tag, "init_array")) {
    864               sem = SSEM_INIT_ARRAY;
    865             } else if (sym_eq(d, tag, "fini_array")) {
    866               sem = SSEM_FINI_ARRAY;
    867             } else if (sym_eq(d, tag, "preinit_array")) {
    868               sem = SSEM_PREINIT_ARRAY;
    869             }
    870           }
    871           if (asm_driver_eat_comma(d)) {
    872             AsmTok es = d_peek(d);
    873             if (es.kind == ASM_TOK_NUM) {
    874               entsize = (u32)lit_to_i64(d, es.spelling);
    875               (void)d_next(d);
    876             }
    877           }
    878         }
    879       }
    880     }
    881 
    882     {
    883       size_t nn = 0;
    884       const char* p = asm_str(d, sname, &nn);
    885       if (macho_2pos) {
    886         /* Canonical Apple seg,sect → SecKind via the shared inverse of the
    887          * writer's name_to_seg_sect; unrecognized spellings (e.g.
    888          * __TEXT,__eh_frame) fall back to SEC_OTHER. Flags are derived from the
    889          * resolved kind: every Mach-O section is allocated (so SEC_OTHER keeps
    890          * SF_ALLOC), and __TEXT,__cstring additionally carries SF_STRINGS. The
    891          * comma name is preserved so the writer round-trips seg/sect verbatim. */
    892         if (!obj_macho_seckind_for_secname(p, nn, &kind)) kind = SEC_OTHER;
    893         switch (kind) {
    894           case SEC_TEXT:
    895             flags |= (u16)(SF_ALLOC | SF_EXEC);
    896             break;
    897           case SEC_RODATA:
    898             flags |= (u16)SF_ALLOC;
    899             if (nn == 16 && memcmp(p, "__TEXT,__cstring", 16) == 0)
    900               flags |= (u16)SF_STRINGS;
    901             break;
    902           case SEC_DATA:
    903           case SEC_BSS:
    904             flags |= (u16)(SF_ALLOC | SF_WRITE);
    905             break;
    906           case SEC_DEBUG:
    907             break;
    908           default:
    909             flags |= (u16)SF_ALLOC;
    910             break;
    911         }
    912       } else if (have_flags) {
    913         /* Explicit flags: a canonical name keeps its kind; any other name is a
    914          * SEC_OTHER named section (matching codegen for section(...) globals).
    915          */
    916         if (p && nn == 5 && memcmp(p, ".text", 5) == 0)
    917           kind = SEC_TEXT;
    918         else if (p && nn == 7 && memcmp(p, ".rodata", 7) == 0)
    919           kind = SEC_RODATA;
    920         else if (p && nn == 5 && memcmp(p, ".data", 5) == 0)
    921           kind = SEC_DATA;
    922         else if (p && nn == 4 && memcmp(p, ".bss", 4) == 0)
    923           kind = SEC_BSS;
    924         else
    925           kind = SEC_OTHER;
    926       } else if (p) {
    927         /* No flag string: infer kind+flags from a canonical name prefix. */
    928         if (nn >= 5 && memcmp(p, ".text", 5) == 0) {
    929           kind = SEC_TEXT;
    930           flags = (u16)(SF_ALLOC | SF_EXEC);
    931         } else if (nn >= 7 && memcmp(p, ".rodata", 7) == 0) {
    932           kind = SEC_RODATA;
    933           flags = (u16)SF_ALLOC;
    934         } else if (nn >= 5 && memcmp(p, ".data", 5) == 0) {
    935           kind = SEC_DATA;
    936           flags = (u16)(SF_ALLOC | SF_WRITE);
    937         } else if (nn >= 4 && memcmp(p, ".bss", 4) == 0) {
    938           kind = SEC_BSS;
    939           flags = (u16)(SF_ALLOC | SF_WRITE);
    940         }
    941       }
    942     }
    943     if (kind == SEC_BSS) sem = SSEM_NOBITS;
    944     if (sem == SSEM_NOTE) kind = SEC_OTHER;
    945 
    946     /* Consume any remaining operands (e.g. ,unique,N or group fields). */
    947     d_skip_to_eol(d);
    948     {
    949       ObjSecId sid = ensure_section_ex(d, sname, kind, sem, flags, 1);
    950       if (entsize) obj_section_set_entsize(d->ob, sid, entsize);
    951       d->cur_sec = sid;
    952       d->mc->set_section(d->mc, sid);
    953     }
    954     return;
    955   }
    956   if (sym_eq(d, name, "globl") || sym_eq(d, name, "global")) {
    957     Sym n = expect_ident(d, ".globl");
    958     sym_mut(d, intern_sym(d, n))->bind = (u16)SB_GLOBAL;
    959     d_skip_to_eol(d);
    960     return;
    961   }
    962   if (sym_eq(d, name, "local")) {
    963     Sym n = expect_ident(d, ".local");
    964     sym_mut(d, intern_sym(d, n))->bind = (u16)SB_LOCAL;
    965     d_skip_to_eol(d);
    966     return;
    967   }
    968   /* `.weak_definition` is the Mach-O spelling for a weak *defined* symbol
    969    * (clang rejects GNU `.weak` on Mach-O). It pairs with a `.globl`; kit
    970    * collapses both to SB_WEAK, which the Mach-O emitter turns into
    971    * N_EXT|N_WEAK_DEF and ELF into STB_WEAK. */
    972   if (sym_eq(d, name, "weak") || sym_eq(d, name, "weak_definition")) {
    973     Sym n = expect_ident(d, ".weak");
    974     sym_mut(d, intern_sym(d, n))->bind = (u16)SB_WEAK;
    975     d_skip_to_eol(d);
    976     return;
    977   }
    978   if (sym_eq(d, name, "hidden")) {
    979     Sym n = expect_ident(d, ".hidden");
    980     sym_mut(d, intern_sym(d, n))->vis = (u8)SV_HIDDEN;
    981     d_skip_to_eol(d);
    982     return;
    983   }
    984   if (sym_eq(d, name, "protected")) {
    985     Sym n = expect_ident(d, ".protected");
    986     sym_mut(d, intern_sym(d, n))->vis = (u8)SV_PROTECTED;
    987     d_skip_to_eol(d);
    988     return;
    989   }
    990   if (sym_eq(d, name, "internal")) {
    991     Sym n = expect_ident(d, ".internal");
    992     sym_mut(d, intern_sym(d, n))->vis = (u8)SV_INTERNAL;
    993     d_skip_to_eol(d);
    994     return;
    995   }
    996   if (sym_eq(d, name, "type")) {
    997     Sym n = expect_ident(d, ".type");
    998     ObjSymId id = intern_sym(d, n);
    999     if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .type: expected ','");
   1000     AsmTok t = d_next(d);
   1001     Sym tag = 0;
   1002     if (tok_is_punct(t, '@') || tok_is_punct(t, '%')) {
   1003       AsmTok ti = d_next(d);
   1004       if (ti.kind != ASM_TOK_IDENT) d_panicf(d, "asm: .type: tag");
   1005       tag = ti.v.ident;
   1006     } else if (t.kind == ASM_TOK_IDENT) {
   1007       tag = t.v.ident;
   1008     } else if (t.kind == ASM_TOK_STR) {
   1009       size_t sn = 0;
   1010       const char* sp = asm_str(d, t.spelling, &sn);
   1011       if (sn >= 2 && sp[0] == '"' && sp[sn - 1] == '"')
   1012         tag = pool_intern_slice(d->pool, (Slice){.s = sp + 1, .len = sn - 2});
   1013     } else {
   1014       d_panicf(d, "asm: .type: tag");
   1015     }
   1016     if (tag && sym_eq(d, tag, "function"))
   1017       sym_mut(d, id)->kind = (u16)SK_FUNC;
   1018     else if (tag && sym_eq(d, tag, "object"))
   1019       sym_mut(d, id)->kind = (u16)SK_OBJ;
   1020     else if (tag && sym_eq(d, tag, "tls_object"))
   1021       sym_mut(d, id)->kind = (u16)SK_TLS;
   1022     else if (tag && sym_eq(d, tag, "gnu_indirect_function"))
   1023       sym_mut(d, id)->kind = (u16)SK_IFUNC;
   1024     d_skip_to_eol(d);
   1025     return;
   1026   }
   1027   if (sym_eq(d, name, "size")) {
   1028     Sym n = expect_ident(d, ".size");
   1029     ObjSymId id = intern_sym(d, n);
   1030     if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .size: expected ','");
   1031     /* Recognize `. - NAME`. */
   1032     AsmTok t = d_peek(d);
   1033     i64 sz = 0;
   1034     if (tok_is_punct(t, '.')) {
   1035       (void)d_next(d);
   1036       if (tok_is_punct(d_peek(d), '-')) {
   1037         (void)d_next(d);
   1038         AsmTok rid = d_peek(d);
   1039         if (rid.kind == ASM_TOK_IDENT && rid.v.ident == n) {
   1040           (void)d_next(d);
   1041           const ObjSym* os = obj_symbol_get(d->ob, id);
   1042           if (os && os->section_id == d->cur_sec)
   1043             sz = (i64)d->mc->pos(d->mc) - (i64)os->value;
   1044         }
   1045       }
   1046     } else {
   1047       AsmExpr e = parse_expr(d);
   1048       if (!e.sym) sz = e.value;
   1049     }
   1050     if (sz < 0) sz = 0;
   1051     sym_mut(d, id)->size = (u64)sz;
   1052     d_skip_to_eol(d);
   1053     return;
   1054   }
   1055   if (sym_eq(d, name, "byte")) {
   1056     emit_int_directive(d, 1);
   1057     d_skip_to_eol(d);
   1058     return;
   1059   }
   1060   if (sym_eq(d, name, "hword") || sym_eq(d, name, "short") ||
   1061       sym_eq(d, name, "2byte")) {
   1062     emit_int_directive(d, 2);
   1063     d_skip_to_eol(d);
   1064     return;
   1065   }
   1066   if (sym_eq(d, name, "word") || sym_eq(d, name, "long") ||
   1067       sym_eq(d, name, "int") || sym_eq(d, name, "4byte")) {
   1068     emit_int_directive(d, 4);
   1069     d_skip_to_eol(d);
   1070     return;
   1071   }
   1072   if (sym_eq(d, name, "quad") || sym_eq(d, name, "8byte") ||
   1073       sym_eq(d, name, "dword") || sym_eq(d, name, "xword")) {
   1074     emit_int_directive(d, 8);
   1075     d_skip_to_eol(d);
   1076     return;
   1077   }
   1078   /* .inst WORD[, WORD...] — emit raw 32-bit instruction word(s), little-endian
   1079    * (AArch64/RISC-V are fixed 4-byte). This is how `cc -S` round-trips an
   1080    * instruction the disassembler can't decode yet (`.inst 0x<word>`); silently
   1081    * dropping it — the old behavior — deletes the instruction and miscompiles.
   1082    * Matches GNU as / llvm-mc, which emit the word. */
   1083   if (sym_eq(d, name, "inst")) {
   1084     (void)asm_driver_cur_section(d);
   1085     for (;;) {
   1086       i64 v = asm_driver_parse_const(d);
   1087       emit_le(d, (u64)v, 4);
   1088       if (!asm_driver_eat_comma(d)) break;
   1089     }
   1090     d_skip_to_eol(d);
   1091     return;
   1092   }
   1093   if (sym_eq(d, name, "ascii") || sym_eq(d, name, "asciz") ||
   1094       sym_eq(d, name, "string")) {
   1095     int term = !sym_eq(d, name, "ascii");
   1096     for (;;) {
   1097       AsmTok t = d_peek(d);
   1098       if (t.kind != ASM_TOK_STR)
   1099         d_panicf(d, "asm: .ascii/.string: expected string");
   1100       (void)d_next(d);
   1101       u8* buf = NULL;
   1102       u32 n = 0;
   1103       decode_string(d, t.spelling, &buf, &n);
   1104       (void)asm_driver_cur_section(d);
   1105       d->mc->emit_bytes(d->mc, buf, n);
   1106       if (term) emit_le(d, 0, 1);
   1107       d->heap->free(d->heap, buf, n);
   1108       if (!asm_driver_eat_comma(d)) break;
   1109     }
   1110     d_skip_to_eol(d);
   1111     return;
   1112   }
   1113   if (sym_eq(d, name, "zero") || sym_eq(d, name, "skip") ||
   1114       sym_eq(d, name, "space")) {
   1115     i64 n = asm_driver_parse_const(d);
   1116     i64 fill = 0;
   1117     if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
   1118     if (n > 0) {
   1119       (void)asm_driver_cur_section(d);
   1120       d->mc->emit_fill(d->mc, (size_t)n, (u8)fill);
   1121     }
   1122     d_skip_to_eol(d);
   1123     return;
   1124   }
   1125   if (sym_eq(d, name, "fill")) {
   1126     i64 n = asm_driver_parse_const(d);
   1127     i64 size = 1, val = 0;
   1128     if (asm_driver_eat_comma(d)) size = asm_driver_parse_const(d);
   1129     if (asm_driver_eat_comma(d)) val = asm_driver_parse_const(d);
   1130     if (size < 1 || size > 8) d_panicf(d, "asm: .fill: size out of range");
   1131     (void)asm_driver_cur_section(d);
   1132     for (i64 i = 0; i < n; ++i) emit_le(d, (u64)val, (u32)size);
   1133     d_skip_to_eol(d);
   1134     return;
   1135   }
   1136   if (sym_eq(d, name, "align") || sym_eq(d, name, "balign")) {
   1137     i64 a = asm_driver_parse_const(d);
   1138     i64 fill = 0;
   1139     if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
   1140     if (a <= 0 || (a & (a - 1))) d_panicf(d, "asm: .align: not a power of 2");
   1141     (void)asm_driver_cur_section(d);
   1142     d->mc->emit_align(d->mc, (u32)a, (u8)fill);
   1143     d_skip_to_eol(d);
   1144     return;
   1145   }
   1146   if (sym_eq(d, name, "p2align")) {
   1147     i64 lg = asm_driver_parse_const(d);
   1148     i64 fill = 0;
   1149     if (asm_driver_eat_comma(d)) fill = asm_driver_parse_const(d);
   1150     if (lg < 0 || lg > 16) d_panicf(d, "asm: .p2align: out of range");
   1151     (void)asm_driver_cur_section(d);
   1152     d->mc->emit_align(d->mc, 1u << (u32)lg, (u8)fill);
   1153     d_skip_to_eol(d);
   1154     return;
   1155   }
   1156   if (sym_eq(d, name, "set") || sym_eq(d, name, "equ")) {
   1157     Sym n = expect_ident(d, ".set");
   1158     if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .set: expected ','");
   1159     AsmExpr e = parse_expr(d);
   1160     AsmEqu eq;
   1161     eq.value = e.value;
   1162     eq.sym = e.sym;
   1163     eq.has_sym = e.sym ? 1 : 0;
   1164     eq.pad[0] = eq.pad[1] = eq.pad[2] = 0;
   1165     SymEquMap_set(&d->equ_map, n, eq);
   1166     d_skip_to_eol(d);
   1167     return;
   1168   }
   1169 
   1170   /* .comm/.lcomm NAME, SIZE[, ALIGN] — declare a common symbol. Previously
   1171    * skipped, which silently produced no symbol and reserved no space. Model
   1172    * both as SK_COMMON (the linker allocates .bss space); .comm is global,
   1173    * .lcomm local. */
   1174   if (sym_eq(d, name, "comm") || sym_eq(d, name, "lcomm")) {
   1175     int is_local = sym_eq(d, name, "lcomm");
   1176     Sym nm = expect_ident(d, ".comm");
   1177     i64 size = 0, align = 1;
   1178     if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .comm: expected ','");
   1179     size = asm_driver_parse_const(d);
   1180     if (asm_driver_eat_comma(d)) align = asm_driver_parse_const(d);
   1181     if (size < 0) size = 0;
   1182     if (align < 1) align = 1;
   1183     {
   1184       ObjSym* s = sym_mut(d, intern_sym(d, nm));
   1185       s->kind = (u16)SK_COMMON;
   1186       s->bind = (u16)(is_local ? SB_LOCAL : SB_GLOBAL);
   1187       s->size = (u64)size;
   1188       s->common_align = (u64)align;
   1189     }
   1190     d_skip_to_eol(d);
   1191     return;
   1192   }
   1193   /* .uleb128/.sleb128 VALUE[, VALUE...] — emit LEB128-encoded bytes.
   1194    * Previously skipped, which emitted nothing and corrupted any hand-written
   1195    * DWARF / exception tables that follow. */
   1196   if (sym_eq(d, name, "uleb128") || sym_eq(d, name, "sleb128")) {
   1197     int sgn = sym_eq(d, name, "sleb128");
   1198     (void)asm_driver_cur_section(d);
   1199     for (;;) {
   1200       i64 v = asm_driver_parse_const(d);
   1201       u8 buf[16];
   1202       u32 n = 0;
   1203       if (sgn) {
   1204         int more = 1;
   1205         while (more) {
   1206           u8 b = (u8)((u64)v & 0x7fu);
   1207           v >>= 7; /* arithmetic right shift keeps the sign */
   1208           if ((v == 0 && !(b & 0x40u)) || (v == -1 && (b & 0x40u)))
   1209             more = 0;
   1210           else
   1211             b |= 0x80u;
   1212           buf[n++] = b;
   1213         }
   1214       } else {
   1215         u64 uv = (u64)v;
   1216         do {
   1217           u8 b = (u8)(uv & 0x7fu);
   1218           uv >>= 7;
   1219           if (uv) b |= 0x80u;
   1220           buf[n++] = b;
   1221         } while (uv);
   1222       }
   1223       d->mc->emit_bytes(d->mc, buf, n);
   1224       if (!asm_driver_eat_comma(d)) break;
   1225     }
   1226     d_skip_to_eol(d);
   1227     return;
   1228   }
   1229 
   1230   /* CFI block + accepted-but-ignored directives.  Keep parser
   1231    * forward-progress without aborting the whole TU. */
   1232   if (starts_with(d, name, "cfi_") || sym_eq(d, name, "file") ||
   1233       sym_eq(d, name, "loc") || sym_eq(d, name, "ident") ||
   1234       sym_eq(d, name, "popsection") || sym_eq(d, name, "pushsection") ||
   1235       sym_eq(d, name, "previous") ||
   1236       sym_eq(d, name, "subsections_via_symbols") || sym_eq(d, name, "macro") ||
   1237       sym_eq(d, name, "endm") || sym_eq(d, name, "if") ||
   1238       sym_eq(d, name, "endif") || sym_eq(d, name, "else") ||
   1239       sym_eq(d, name, "include") ||
   1240       /* RISC-V `.option rvc/norvc/relax/norelax/push/pop/...`: kit's own
   1241        * cc -S emits `.option norvc`/`.option norelax` to pin its fixed
   1242        * instruction layout (see rv64_file_prologue). kit-as never compresses
   1243        * or relaxes, so it already honors these implicitly — accept and ignore
   1244        * rather than treat as an unknown directive. */
   1245       sym_eq(d, name, "option")) {
   1246     d_skip_to_eol(d);
   1247     return;
   1248   }
   1249 
   1250   /* Unknown directive — recover. */
   1251   d_skip_to_eol(d);
   1252 }
   1253 
   1254 /* ---- same-section branch relaxation ----
   1255  *
   1256  * The per-arch parser emits a relocation for every symbolic branch target,
   1257  * even one that resolves within the current section (a forward reference like
   1258  * `b .Lfoo` is only known to be local once `.Lfoo:` is seen). GNU as / llvm-mc
   1259  * — and kit's own codegen — instead resolve such intra-section branches at
   1260  * assembly time: compute the displacement, patch the instruction, and emit no
   1261  * relocation. Matching that is what makes `cc -S | as` reproduce `cc -c`'s
   1262  * .text relocation table for control-flow-bearing code (the L1 round-trip
   1263  * lane; see doc/TESTING.md).
   1264  *
   1265  * We relax only PC-relative *branch* relocations (never CALL26 — a call keeps
   1266  * its relocation on both sides) whose target is a symbol defined in the same
   1267  * section, with local binding, and not a function entry. The "local" guard
   1268  * matches GNU as (a global symbol may be interposed, so its branch keeps the
   1269  * relocation); the "not a function" guard matches kit codegen, which keeps
   1270  * the relocation for an intra-file call/tail-call to a function symbol while
   1271  * resolving branches to internal labels.
   1272  *
   1273  * Restricted to the AArch64 branch kinds for now — the round-trip vertical
   1274  * slice is aa64; rv64/x64 keep their current behavior. */
   1275 static int is_relaxable_branch_kind(u16 kind) {
   1276   switch (kind) {
   1277     case R_AARCH64_JUMP26:
   1278     case R_AARCH64_CONDBR19:
   1279     case R_AARCH64_TSTBR14:
   1280     case R_AARCH64_ADR_PREL_LO21: /* adr to a local code label (&&label) */
   1281       return 1;
   1282     default:
   1283       return 0;
   1284   }
   1285 }
   1286 
   1287 static void relax_local_branches(AsmDriver* d) {
   1288   u32 total = obj_reloc_total(d->ob), i;
   1289   for (i = 0; i < total; ++i) {
   1290     const Reloc* r = obj_reloc_at(d->ob, i);
   1291     const ObjSym* tgt;
   1292     Section* sec;
   1293     u8 insn[4];
   1294     if (!r || r->removed) continue;
   1295     if (!is_relaxable_branch_kind(r->kind)) continue;
   1296     tgt = obj_symbol_get(d->ob, r->sym);
   1297     if (!tgt) continue;
   1298     if (tgt->section_id != r->section_id) continue; /* cross-section / undef */
   1299     if (tgt->bind != SB_LOCAL) continue;            /* preemptible; keep */
   1300     if (tgt->kind == SK_FUNC) continue;             /* call/tail-call; keep */
   1301     sec = (Section*)obj_section_get(d->ob, r->section_id);
   1302     if (!sec) continue;
   1303     if ((u64)r->offset + 4 > sec->bytes.total) continue;
   1304     buf_read(&sec->bytes, r->offset, insn, 4);
   1305     /* Section-relative S and P make the base cancel: disp = S + A - P. */
   1306     link_reloc_apply(d->c, (RelocKind)r->kind, insn, tgt->value, r->addend,
   1307                      r->offset);
   1308     buf_patch(&sec->bytes, r->offset, insn, 4);
   1309     ((Reloc*)r)->removed = 1;
   1310   }
   1311 }
   1312 
   1313 /* ---- driver loop ---- */
   1314 
   1315 static void process_label(AsmDriver* d, Sym name) {
   1316   ObjSymId id = intern_sym(d, name);
   1317   (void)asm_driver_cur_section(d);
   1318   const ObjSym* os = obj_symbol_get(d->ob, id);
   1319   if (os && os->section_id != OBJ_SEC_NONE)
   1320     d_panicf(d, "asm: symbol defined twice");
   1321   obj_symbol_define(d->ob, id, d->cur_sec, (u64)d->mc->pos(d->mc), 0);
   1322   /* Promote SK_UNDEF (forward ref via reloc) to SK_NOTYPE so it's a
   1323    * real defined symbol; explicit `.type SYM, @function` will refine. */
   1324   if (os && os->kind == SK_UNDEF) sym_mut(d, id)->kind = (u16)SK_NOTYPE;
   1325 }
   1326 
   1327 static Sym maybe_compose_mnemonic(AsmDriver* d, Sym head) {
   1328   /* Loops to accept multi-dot mnemonics like RISC-V's `fcvt.w.s` /
   1329    * `amoadd.d` — peel one `.ident` per pass, intern the joined token,
   1330    * and stop when the next token isn't a touching dot. */
   1331   for (;;) {
   1332     AsmTok t = d_peek(d);
   1333     if (!tok_is_punct(t, '.')) return head;
   1334     if (t.flags & ASM_TF_HAS_SPACE) return head;
   1335     (void)d_next(d);
   1336     AsmTok rest = d_next(d);
   1337     if (rest.kind != ASM_TOK_IDENT)
   1338       d_panicf(d, "asm: composite mnemonic: expected ident");
   1339     size_t hn = 0, rn = 0;
   1340     const char* hp = asm_str(d, head, &hn);
   1341     const char* rp = asm_str(d, rest.v.ident, &rn);
   1342     size_t n = hn + 1 + rn;
   1343     if (n >= 64) d_panicf(d, "asm: mnemonic too long");
   1344     char buf[64];
   1345     for (size_t i = 0; i < hn; ++i) buf[i] = hp[i];
   1346     buf[hn] = '.';
   1347     for (size_t i = 0; i < rn; ++i) buf[hn + 1 + i] = rp[i];
   1348     head = pool_intern_slice(d->pool, (Slice){.s = buf, .len = n});
   1349   }
   1350 }
   1351 
   1352 /* ---- inline-asm driver constructor ----
   1353  *
   1354  * Inline-asm template walkers (per-arch) re-lex pre-substituted source
   1355  * text through the same per-mnemonic parsers used by the standalone .s
   1356  * driver.  This constructor builds a minimally-initialized AsmDriver
   1357  * around a caller-supplied memory-backed AsmLexer + MCEmitter.
   1358  *
   1359  * The driver does not own the AsmLexer or MCEmitter, does not allocate a
   1360  * default section (inline asm emits into whatever section the wrapping
   1361  * cg has selected on its MCEmitter), and skips the standalone driver's
   1362  * per-arch handle (`d->arch_asm`) — the caller has already opened its own
   1363  * arch asm handle to thread per-block bound state through. */
   1364 AsmDriver* asm_driver_open_inline(Compiler* c, MCEmitter* mc, AsmLexer* lex) {
   1365   Heap* heap = (Heap*)c->ctx->heap;
   1366   AsmDriver* d = (AsmDriver*)heap->alloc(heap, sizeof *d, _Alignof(AsmDriver));
   1367   memset(d, 0, sizeof *d);
   1368   d->c = c;
   1369   d->lex = lex;
   1370   d->mc = mc;
   1371   d->ob = mc->obj;
   1372   d->pool = c->global;
   1373   d->heap = heap;
   1374   /* The MCEmitter's section is whatever cg has set; do not override it.
   1375    * cur_sec == OBJ_SEC_NONE means "ask the MCEmitter on demand" — we use
   1376    * mc->section_id directly via asm_driver_cur_section's lazy init for
   1377    * standalone, but inline asm should never reach that path because the
   1378    * MCEmitter already has its section. Pre-seed cur_sec from the
   1379    * MCEmitter so emit_reloc_at calls get the right section id. */
   1380   d->cur_sec = mc->section_id;
   1381   SymSecMap_init(&d->sec_map, heap);
   1382   SymSymMap_init(&d->sym_map, heap);
   1383   SymEquMap_init(&d->equ_map, heap);
   1384   d->arch_asm = NULL; /* caller owns its own arch asm handle */
   1385   return d;
   1386 }
   1387 
   1388 void asm_driver_close_inline(AsmDriver* d) {
   1389   if (!d) return;
   1390   SymSecMap_fini(&d->sec_map);
   1391   SymSymMap_fini(&d->sym_map);
   1392   SymEquMap_fini(&d->equ_map);
   1393   Heap* heap = d->heap;
   1394   heap->free(heap, d, sizeof *d);
   1395 }
   1396 
   1397 void asm_parse(Compiler* c, AsmLexer* l, MCEmitter* mc) {
   1398   AsmDriver d;
   1399   memset(&d, 0, sizeof d);
   1400   d.c = c;
   1401   d.lex = l;
   1402   d.mc = mc;
   1403   d.ob = mc->obj;
   1404   d.pool = c->global;
   1405   d.heap = (Heap*)c->ctx->heap;
   1406   d.cur_sec = OBJ_SEC_NONE;
   1407   SymSecMap_init(&d.sec_map, d.heap);
   1408   SymSymMap_init(&d.sym_map, d.heap);
   1409   SymEquMap_init(&d.equ_map, d.heap);
   1410   {
   1411     const ArchImpl* arch = arch_for_compiler(c);
   1412     if (!arch || !arch->asm_new) {
   1413       SrcLoc loc = asm_lex_loc(l);
   1414       compiler_panic(c, loc, "asm_parse: unsupported target arch %d",
   1415                      (int)c->target.arch);
   1416     }
   1417     d.arch_asm = arch->asm_new(c);
   1418   }
   1419 
   1420   for (;;) {
   1421     AsmTok t = d_peek(&d);
   1422     if (t.kind == ASM_TOK_EOF) break;
   1423     if (t.kind == ASM_TOK_NEWLINE) {
   1424       (void)d_next(&d);
   1425       continue;
   1426     }
   1427     if (t.kind == ASM_TOK_HASH) {
   1428       /* cpp-style linemarker; skip the whole line. */
   1429       d_skip_to_eol(&d);
   1430       continue;
   1431     }
   1432     if (tok_is_punct(t, '.')) {
   1433       (void)d_next(&d);
   1434       AsmTok id = d_next(&d);
   1435       if (id.kind != ASM_TOK_IDENT)
   1436         d_panicf(&d, "asm: expected directive name after '.'");
   1437       do_directive(&d, id.v.ident);
   1438       d_eat_eol(&d);
   1439       continue;
   1440     }
   1441     if (t.kind == ASM_TOK_IDENT) {
   1442       Sym head = t.v.ident;
   1443       (void)d_next(&d);
   1444       AsmTok nxt = d_peek(&d);
   1445       if (tok_is_punct(nxt, ':')) {
   1446         (void)d_next(&d);
   1447         process_label(&d, head);
   1448         continue;
   1449       }
   1450       Sym mnemonic = maybe_compose_mnemonic(&d, head);
   1451       d.arch_asm->insn(d.arch_asm, &d, mnemonic);
   1452       d_skip_to_eol(&d);
   1453       continue;
   1454     }
   1455     /* Anything else: recover by skipping the line. */
   1456     d_skip_to_eol(&d);
   1457   }
   1458 
   1459   promote_undef_externs(&d);
   1460   relax_local_branches(&d);
   1461 
   1462   if (d.arch_asm && d.arch_asm->destroy) d.arch_asm->destroy(d.arch_asm);
   1463   SymSecMap_fini(&d.sec_map);
   1464   SymSymMap_fini(&d.sym_map);
   1465   SymEquMap_fini(&d.equ_map);
   1466 }