kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

asm.c (52855B)


      1 /* RV64 assembler — descriptor-table driven.
      2  *
      3  * Mnemonic → Rv64InsnDesc via rv64_asm_find; operand parsing dispatches
      4  * on the format kind. The descriptor's `match` field already carries
      5  * the funct3/funct7/opcode bits; the parser only needs to fill in the
      6  * register operands and immediate.
      7  *
      8  * Aliases (li, mv, ret, jr, j, nop, sext.w, beqz, bnez) are recognized
      9  * by their alias rows in the descriptor table and rewritten to the
     10  * canonical encoding here. Inline rv_* encoders in isa.h remain the
     11  * hot path for codegen; the assembler uses them to assemble the
     12  * machine word once it has the operand values. */
     13 
     14 #include "arch/rv64/asm.h"
     15 
     16 #include <string.h>
     17 
     18 #include "arch/rv64/isa.h"
     19 #include "arch/rv64/regs.h"
     20 #include "arch/rv64/rv64.h"
     21 #include "asm/asm_helpers.h"
     22 #include "core/arena.h"
     23 #include "core/pool.h"
     24 #include "core/slice.h"
     25 #include "core/strbuf.h"
     26 #include "obj/obj.h"
     27 
     28 struct Rv64Asm {
     29   ArchAsm base;
     30   Compiler* c;
     31 
     32   /* Inline-asm bound state (set by rv64_inline_bind, cleared otherwise).
     33    * Operand indexing per GCC convention: 0..nout-1 are outputs, then
     34    * nout..nout+nin-1 are inputs. Templates address into this combined
     35    * list via %N / %zN / %aN / %w[name] / %x[name]. */
     36   const AsmConstraint* outs;
     37   Operand* out_ops;
     38   const AsmConstraint* ins;
     39   const Operand* in_ops;
     40   const Sym* clobbers;
     41   u32 nout;
     42   u32 nin;
     43   u32 nclob;
     44 };
     45 
     46 typedef struct Rv64Asm Rv64Asm;
     47 
     48 /* Relocation modifier on a 12-bit immediate offset (`%lo`/`%pcrel_lo`).
     49  * RV_MEMMOD_NONE means a plain numeric displacement in `disp`. */
     50 typedef enum RvMemMod {
     51   RV_MEMMOD_NONE = 0,
     52   RV_MEMMOD_LO,
     53   RV_MEMMOD_PCREL_LO,
     54 } RvMemMod;
     55 
     56 typedef struct Rv64Mem {
     57   i32 disp;
     58   u32 base;
     59   RvMemMod mod; /* reloc modifier on the offset, or RV_MEMMOD_NONE */
     60   ObjSymId sym; /* symbol when mod != NONE */
     61   i64 off;      /* addend when mod != NONE */
     62 } Rv64Mem;
     63 
     64 static int sym_to_cstr(AsmDriver* d, Sym s, char* out, size_t cap) {
     65   Slice sl = pool_slice(asm_driver_pool(d), s);
     66   if (!sl.s || sl.len >= cap) return 0;
     67   memcpy(out, sl.s, sl.len);
     68   out[sl.len] = '\0';
     69   return 1;
     70 }
     71 
     72 /* True if `s` begins with the NUL-terminated literal `pfx` (length-explicit).
     73  */
     74 static bool slice_has_prefix_cstr(Slice s, const char* pfx, size_t n) {
     75   return s.len >= n && memcmp(s.s, pfx, n) == 0;
     76 }
     77 
     78 static int rv_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, int* fp_out) {
     79   char name[16];
     80   uint32_t dwarf = 0;
     81   if (!sym_to_cstr(d, s, name, sizeof name)) return 0;
     82   if (rv64_register_index(name, &dwarf) != 0) return 0;
     83   if (reg_out) *reg_out = dwarf & 31u;
     84   if (fp_out) *fp_out = dwarf >= 32u;
     85   return 1;
     86 }
     87 
     88 static u32 parse_reg(AsmDriver* d, int* fp_out) {
     89   AsmTok t = asm_driver_next(d);
     90   u32 r;
     91   if (t.kind != ASM_TOK_IDENT || !rv_reg_from_name(d, t.v.ident, &r, fp_out))
     92     asm_driver_panic(d, "rv64 asm: bad register");
     93   return r;
     94 }
     95 
     96 static u32 parse_xreg(AsmDriver* d) {
     97   int fp = 0;
     98   u32 r = parse_reg(d, &fp);
     99   if (fp) asm_driver_panic(d, "rv64 asm: expected integer register");
    100   return r;
    101 }
    102 
    103 static u32 parse_freg(AsmDriver* d) {
    104   int fp = 0;
    105   u32 r = parse_reg(d, &fp);
    106   if (!fp) asm_driver_panic(d, "rv64 asm: expected float register");
    107   return r;
    108 }
    109 
    110 static void expect_comma(AsmDriver* d) {
    111   if (!asm_driver_eat_comma(d)) asm_driver_panic(d, "rv64 asm: expected ','");
    112 }
    113 
    114 /* Position of a `%mod(sym)` relocation operand: the 20-bit upper field of
    115  * lui/auipc, or a 12-bit I-type (addi/load) or S-type (store) immediate. */
    116 typedef enum RvModPos {
    117   RV_MODPOS_HI20,
    118   RV_MODPOS_LO_I,
    119   RV_MODPOS_LO_S,
    120 } RvModPos;
    121 
    122 /* Map a relocation-modifier name (`hi`, `lo`, `pcrel_hi`, `pcrel_lo`,
    123  * `got_pcrel_hi`) to the RelocKind appropriate for `pos`. Panics on a name
    124  * that is not valid at this operand position. */
    125 static RelocKind rv_mod_to_reloc(AsmDriver* d, Slice name, RvModPos pos) {
    126   if (pos == RV_MODPOS_HI20) {
    127     if (slice_eq_cstr(name, "hi")) return R_RV_HI20;
    128     if (slice_eq_cstr(name, "pcrel_hi")) return R_RV_PCREL_HI20;
    129     if (slice_eq_cstr(name, "got_pcrel_hi")) return R_RV_GOT_HI20;
    130   } else {
    131     int store = (pos == RV_MODPOS_LO_S);
    132     if (slice_eq_cstr(name, "lo")) return store ? R_RV_LO12_S : R_RV_LO12_I;
    133     if (slice_eq_cstr(name, "pcrel_lo"))
    134       return store ? R_RV_PCREL_LO12_S : R_RV_PCREL_LO12_I;
    135   }
    136   asm_driver_panic(d, "rv64 asm: relocation modifier not valid here");
    137 }
    138 
    139 /* If the next token is `%`, parse a `%mod(sym{+off})` relocation operand,
    140  * emit the relocation at the current emit position (where the about-to-be-
    141  * returned instruction word will land), and return 1. The caller encodes a
    142  * zero placeholder in the immediate field. Returns 0 if there is no modifier
    143  * (leaving the stream untouched for normal constant parsing). A leading `%`
    144  * is unambiguous here: modulo is infix and never starts an operand. */
    145 static int rv_parse_mod_reloc(AsmDriver* d, RvModPos pos, ObjSymId* sym_out,
    146                               i64* off_out, RelocKind* kind_out) {
    147   if (!asm_driver_tok_is_punct(asm_driver_peek(d), '%')) return 0;
    148   (void)asm_driver_next(d); /* eat '%' */
    149   AsmTok name = asm_driver_next(d);
    150   if (name.kind != ASM_TOK_IDENT)
    151     asm_driver_panic(d, "rv64 asm: expected relocation modifier name");
    152   Slice nm = pool_slice(asm_driver_pool(d), name.v.ident);
    153   asm_driver_expect_punct(d, '(', "'(' after relocation modifier");
    154   ObjSymId sym = OBJ_SYM_NONE;
    155   i64 off = 0;
    156   asm_driver_parse_sym_expr(d, &sym, &off);
    157   asm_driver_expect_punct(d, ')', "')' after %mod(sym)");
    158   RelocKind k = rv_mod_to_reloc(d, nm, pos);
    159   if (sym_out) *sym_out = sym;
    160   if (off_out) *off_out = off;
    161   if (kind_out) *kind_out = k;
    162   return 1;
    163 }
    164 
    165 /* Parse a RISC-V rounding-mode mnemonic (the comma is already consumed) into
    166  * its 3-bit funct3 value. cc -S emits this suffix on fcvt/fsqrt when the mode
    167  * isn't the default `dyn`, so the round-trip (and clang) re-encode the exact
    168  * mode rather than guessing a default. */
    169 static u32 rv_parse_rm_name(AsmDriver* d) {
    170   AsmTok t = asm_driver_next(d);
    171   Slice s;
    172   if (t.kind != ASM_TOK_IDENT)
    173     asm_driver_panic(d, "rv64 asm: expected rounding mode");
    174   s = pool_slice(asm_driver_pool(d), t.v.ident);
    175   if (slice_eq_cstr(s, "rne")) return 0u;
    176   if (slice_eq_cstr(s, "rtz")) return 1u;
    177   if (slice_eq_cstr(s, "rdn")) return 2u;
    178   if (slice_eq_cstr(s, "rup")) return 3u;
    179   if (slice_eq_cstr(s, "rmm")) return 4u;
    180   if (slice_eq_cstr(s, "dyn")) return 7u;
    181   asm_driver_panic(d, "rv64 asm: unknown rounding mode");
    182 }
    183 
    184 /* Emit a relocation for a U-type / I-type immediate `%mod(sym)` operand at
    185  * the current instruction position; returns 1 if one was present. */
    186 static int rv_emit_imm_mod_reloc(AsmDriver* d, RvModPos pos) {
    187   ObjSymId sym;
    188   i64 off;
    189   RelocKind k;
    190   if (!rv_parse_mod_reloc(d, pos, &sym, &off, &k)) return 0;
    191   MCEmitter* mc = asm_driver_mc(d);
    192   mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), k, sym, off, 0, 0);
    193   return 1;
    194 }
    195 
    196 static Rv64Mem parse_mem(AsmDriver* d) {
    197   Rv64Mem m;
    198   m.disp = 0;
    199   m.mod = RV_MEMMOD_NONE;
    200   m.sym = OBJ_SYM_NONE;
    201   m.off = 0;
    202   if (asm_driver_tok_is_punct(asm_driver_peek(d), '%')) {
    203     /* `%lo(sym)(base)` / `%pcrel_lo(label)(base)` — record the modifier; the
    204      * load/store caller emits the I- or S-type relocation. */
    205     ObjSymId sym;
    206     i64 off;
    207     RelocKind k;
    208     (void)rv_parse_mod_reloc(d, RV_MODPOS_LO_I, &sym, &off, &k);
    209     m.mod = (k == R_RV_PCREL_LO12_I) ? RV_MEMMOD_PCREL_LO : RV_MEMMOD_LO;
    210     m.sym = sym;
    211     m.off = off;
    212   } else {
    213     m.disp = (i32)asm_driver_parse_const(d);
    214   }
    215   asm_driver_expect_punct(d, '(', "'(' in rv64 memory operand");
    216   m.base = parse_xreg(d);
    217   asm_driver_expect_punct(d, ')', "')' in rv64 memory operand");
    218   return m;
    219 }
    220 
    221 /* Emit the I/S-type relocation recorded by parse_mem for a `%lo`/`%pcrel_lo`
    222  * memory offset, picking the S-type variant for stores. */
    223 static void rv_emit_mem_mod_reloc(AsmDriver* d, const Rv64Mem* m,
    224                                   int is_store) {
    225   if (m->mod == RV_MEMMOD_NONE) return;
    226   RelocKind k = (m->mod == RV_MEMMOD_PCREL_LO)
    227                     ? (is_store ? R_RV_PCREL_LO12_S : R_RV_PCREL_LO12_I)
    228                     : (is_store ? R_RV_LO12_S : R_RV_LO12_I);
    229   MCEmitter* mc = asm_driver_mc(d);
    230   mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), k, m->sym, m->off, 0, 0);
    231 }
    232 
    233 /* Fence pred/succ parser — accepts a string like "rw" / "iorw" / "0" /
    234  * a numeric literal. Returns the 4-bit mask: bit3=i, bit2=o, bit1=r,
    235  * bit0=w. */
    236 static u32 parse_fence_mask(AsmDriver* d) {
    237   AsmTok t = asm_driver_peek(d);
    238   if (t.kind == ASM_TOK_NUM) {
    239     (void)asm_driver_next(d);
    240     return (u32)asm_driver_parse_const(d) & 0xfu;
    241   }
    242   if (t.kind == ASM_TOK_IDENT) {
    243     char name[8];
    244     AsmTok tt = asm_driver_next(d);
    245     if (!sym_to_cstr(d, tt.v.ident, name, sizeof name))
    246       asm_driver_panic(d, "rv64 asm: bad fence mask");
    247     u32 mask = 0;
    248     for (const char* p = name; *p; ++p) {
    249       switch (*p) {
    250         case 'i':
    251           mask |= 8u;
    252           break;
    253         case 'o':
    254           mask |= 4u;
    255           break;
    256         case 'r':
    257           mask |= 2u;
    258           break;
    259         case 'w':
    260           mask |= 1u;
    261           break;
    262         default:
    263           asm_driver_panic(d, "rv64 asm: bad fence char");
    264       }
    265     }
    266     return mask;
    267   }
    268   asm_driver_panic(d, "rv64 asm: bad fence operand");
    269 }
    270 
    271 /* Field overlay onto a descriptor's `match` word.
    272  *
    273  * For most formats the descriptor's match already pins opcode +
    274  * funct3 + funct7. We OR in the per-operand fields. For shift-imm and
    275  * AMO families the layouts diverge from the basic R/I templates — we
    276  * handle those explicitly below. */
    277 
    278 static u32 enc_r(u32 match, u32 rd, u32 rs1, u32 rs2) {
    279   return match | ((rs2 & 0x1fu) << 20) | ((rs1 & 0x1fu) << 15) |
    280          ((rd & 0x1fu) << 7);
    281 }
    282 static u32 enc_i(u32 match, u32 rd, u32 rs1, i32 imm12) {
    283   return match | (((u32)imm12 & 0xfffu) << 20) | ((rs1 & 0x1fu) << 15) |
    284          ((rd & 0x1fu) << 7);
    285 }
    286 static u32 enc_s(u32 match, u32 rs2, u32 rs1, i32 imm12) {
    287   u32 ui = (u32)imm12 & 0xfffu;
    288   return match | ((ui >> 5) << 25) | ((rs2 & 0x1fu) << 20) |
    289          ((rs1 & 0x1fu) << 15) | ((ui & 0x1fu) << 7);
    290 }
    291 static u32 enc_b(u32 match, u32 rs1, u32 rs2, i32 imm13) {
    292   u32 ui = (u32)imm13;
    293   return match | (((ui >> 12) & 1u) << 31) | (((ui >> 5) & 0x3fu) << 25) |
    294          ((rs2 & 0x1fu) << 20) | ((rs1 & 0x1fu) << 15) |
    295          (((ui >> 1) & 0xfu) << 8) | (((ui >> 11) & 1u) << 7);
    296 }
    297 static u32 enc_u(u32 match, u32 rd, u32 imm20) {
    298   return match | ((imm20 & 0xfffffu) << 12) | ((rd & 0x1fu) << 7);
    299 }
    300 static u32 enc_j(u32 match, u32 rd, i32 imm21) {
    301   u32 ui = (u32)imm21;
    302   return match | (((ui >> 20) & 1u) << 31) | (((ui >> 1) & 0x3ffu) << 21) |
    303          (((ui >> 11) & 1u) << 20) | (((ui >> 12) & 0xffu) << 12) |
    304          ((rd & 0x1fu) << 7);
    305 }
    306 static u32 enc_r4(u32 match, u32 rd, u32 rs1, u32 rs2, u32 rs3, u32 rm) {
    307   return match | ((rs3 & 0x1fu) << 27) | ((rs2 & 0x1fu) << 20) |
    308          ((rs1 & 0x1fu) << 15) | ((rm & 0x7u) << 12) | ((rd & 0x1fu) << 7);
    309 }
    310 
    311 /* RV64I shift-imm: shamt6 occupies bits 25:20; funct6 already in match. */
    312 static u32 enc_ishift(u32 match, u32 rd, u32 rs1, u32 shamt) {
    313   return match | ((shamt & 0x3fu) << 20) | ((rs1 & 0x1fu) << 15) |
    314          ((rd & 0x1fu) << 7);
    315 }
    316 /* RV32 word shift-imm: shamt5 occupies bits 24:20 (funct7 already pinned). */
    317 static u32 enc_ishiftw(u32 match, u32 rd, u32 rs1, u32 shamt) {
    318   return match | ((shamt & 0x1fu) << 20) | ((rs1 & 0x1fu) << 15) |
    319          ((rd & 0x1fu) << 7);
    320 }
    321 /* AMO: aq/rl bits 26/25 — we accept them as optional .aq/.rl suffixes
    322  * on the mnemonic. For now mnemonics arrive bare. */
    323 static u32 enc_amo(u32 match, u32 aq, u32 rl, u32 rd, u32 rs1, u32 rs2) {
    324   return match | ((aq & 1u) << 26) | ((rl & 1u) << 25) | ((rs2 & 0x1fu) << 20) |
    325          ((rs1 & 0x1fu) << 15) | ((rd & 0x1fu) << 7);
    326 }
    327 
    328 static u32 c_reg3(AsmDriver* d, u32 r) {
    329   if (r < 8u || r > 15u)
    330     asm_driver_panic(d,
    331                      "rv64 asm: compressed register must be x8..x15/f8..f15");
    332   return r - 8u;
    333 }
    334 
    335 static u32 enc_c_ci(u32 match, u32 rd, i32 imm) {
    336   u32 u = (u32)imm & 0x3fu;
    337   return match | (((u >> 5) & 1u) << 12) | ((rd & 0x1fu) << 7) |
    338          ((u & 0x1fu) << 2);
    339 }
    340 
    341 static u32 enc_c_cr(u32 match, u32 rd_rs1, u32 rs2) {
    342   return match | ((rd_rs1 & 0x1fu) << 7) | ((rs2 & 0x1fu) << 2);
    343 }
    344 
    345 static u32 enc_c_addi16sp(u32 match, i32 imm) {
    346   u32 u = (u32)imm & 0x3ffu;
    347   return match | (((u >> 9) & 1u) << 12) | (((u >> 4) & 1u) << 6) |
    348          (((u >> 6) & 1u) << 5) | (((u >> 7) & 3u) << 3) |
    349          (((u >> 5) & 1u) << 2);
    350 }
    351 
    352 static u32 enc_c_addi4spn(u32 match, u32 rd3, u32 imm) {
    353   u32 enc = (((imm >> 4) & 3u) << 6) | (((imm >> 6) & 0xfu) << 2) |
    354             (((imm >> 2) & 1u) << 1) | ((imm >> 3) & 1u);
    355   return match | ((enc & 0xffu) << 5) | ((rd3 & 7u) << 2);
    356 }
    357 
    358 static u32 enc_c_lwld(u32 match, u32 rd3, u32 rs1_3, u32 off, int wide64) {
    359   if (wide64) {
    360     return match | (((off >> 3) & 7u) << 10) | ((rs1_3 & 7u) << 7) |
    361            (((off >> 6) & 3u) << 5) | ((rd3 & 7u) << 2);
    362   }
    363   return match | (((off >> 3) & 7u) << 10) | ((rs1_3 & 7u) << 7) |
    364          (((off >> 2) & 1u) << 6) | (((off >> 6) & 1u) << 5) |
    365          ((rd3 & 7u) << 2);
    366 }
    367 
    368 static u32 enc_c_swld(u32 match, u32 rs2_3, u32 rs1_3, u32 off, int wide64) {
    369   return enc_c_lwld(match, rs2_3, rs1_3, off, wide64);
    370 }
    371 
    372 static u32 enc_c_lwsp(u32 match, u32 rd, u32 off, int wide64) {
    373   if (wide64) {
    374     return match | (((off >> 5) & 1u) << 12) | ((rd & 0x1fu) << 7) |
    375            (((off >> 3) & 3u) << 5) | (((off >> 6) & 7u) << 2);
    376   }
    377   return match | (((off >> 5) & 1u) << 12) | ((rd & 0x1fu) << 7) |
    378          (((off >> 2) & 7u) << 4) | (((off >> 6) & 3u) << 2);
    379 }
    380 
    381 static u32 enc_c_swsp(u32 match, u32 rs2, u32 off, int wide64) {
    382   u32 imm6;
    383   if (wide64)
    384     imm6 = (((off >> 3) & 7u) << 3) | ((off >> 6) & 7u);
    385   else
    386     imm6 = (((off >> 2) & 0xfu) << 2) | ((off >> 6) & 3u);
    387   return match | ((imm6 & 0x3fu) << 7) | ((rs2 & 0x1fu) << 2);
    388 }
    389 
    390 static u32 enc_c_cb_imm(u32 match, u32 rs1_3, i32 imm) {
    391   u32 u = (u32)imm & 0x1ffu;
    392   return match | (((u >> 8) & 1u) << 12) | (((u >> 3) & 3u) << 10) |
    393          ((rs1_3 & 7u) << 7) | (((u >> 6) & 3u) << 5) | (((u >> 1) & 3u) << 3) |
    394          (((u >> 5) & 1u) << 2);
    395 }
    396 
    397 static u32 enc_c_cb_alu_imm(u32 match, u32 rd3, i32 imm) {
    398   u32 u = (u32)imm & 0x3fu;
    399   return match | (((u >> 5) & 1u) << 12) | ((rd3 & 7u) << 7) |
    400          ((u & 0x1fu) << 2);
    401 }
    402 
    403 static u32 enc_c_cj(u32 match, i32 imm) {
    404   u32 u = (u32)imm & 0xfffu;
    405   return match | (((u >> 11) & 1u) << 12) | (((u >> 4) & 1u) << 11) |
    406          (((u >> 8) & 3u) << 9) | (((u >> 10) & 1u) << 8) |
    407          (((u >> 6) & 1u) << 7) | (((u >> 7) & 1u) << 6) |
    408          (((u >> 1) & 7u) << 3) | (((u >> 5) & 1u) << 2);
    409 }
    410 
    411 /* Parse a branch/jump target operand. With a symbolic target (a label), emit
    412  * the relocation at the current position — which is exactly where the caller
    413  * is about to write this instruction word — and return 0 as the placeholder
    414  * immediate. With a bare constant, return it as the PC-relative byte
    415  * displacement (preserving the existing numeric-offset corpus behavior). */
    416 static i32 rv_reloc_target(AsmDriver* d, RelocKind kind) {
    417   ObjSymId sym = OBJ_SYM_NONE;
    418   i64 off = 0;
    419   asm_driver_parse_sym_expr(d, &sym, &off);
    420   if (sym != OBJ_SYM_NONE) {
    421     MCEmitter* mc = asm_driver_mc(d);
    422     mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), kind, sym, off, 0, 0);
    423     return 0;
    424   }
    425   return (i32)off;
    426 }
    427 
    428 /* Per-format parser — reads the operand list off the driver and returns
    429  * the encoded 32-bit word, given the matched descriptor. */
    430 static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
    431   u32 m = desc->match;
    432   u32 rd = 0, rs1 = 0, rs2 = 0;
    433   i32 imm = 0;
    434   Rv64Mem mem;
    435 
    436   switch ((Rv64Format)desc->fmt) {
    437     case RV64_FMT_R:
    438       /* Two-operand aliases: snez/neg/negw — rd, rs (rs1=x0). */
    439       if (desc->flags & RV64_ASMFL_ALIAS) {
    440         rd = parse_xreg(d);
    441         expect_comma(d);
    442         rs2 = parse_xreg(d);
    443         return enc_r(m, rd, 0u, rs2);
    444       }
    445       rd = parse_xreg(d);
    446       expect_comma(d);
    447       rs1 = parse_xreg(d);
    448       expect_comma(d);
    449       rs2 = parse_xreg(d);
    450       return enc_r(m, rd, rs1, rs2);
    451 
    452     case RV64_FMT_R4: {
    453       u32 rs3;
    454       rd = parse_freg(d);
    455       expect_comma(d);
    456       rs1 = parse_freg(d);
    457       expect_comma(d);
    458       rs2 = parse_freg(d);
    459       expect_comma(d);
    460       rs3 = parse_freg(d);
    461       return enc_r4(m, rd, rs1, rs2, rs3, 0x7u);
    462     }
    463 
    464     case RV64_FMT_I:
    465       /* Aliases first. `li` is handled earlier by rv64_emit_pseudo (it may
    466        * need a multi-word expansion), so it never reaches here. */
    467       if (desc->flags & RV64_ASMFL_ALIAS) {
    468         if (slice_eq_cstr(desc->mnemonic, "mv")) {
    469           /* Standard two-operand `mv rd, rs` = `addi rd, rs, 0`. (A %pcrel_lo
    470            * low-half is emitted as the canonical `addi rd, rs, %pcrel_lo(L)`,
    471            * not a non-standard 3-operand `mv`, so it lands in the ADDI path
    472            * below — matching clang.) */
    473           rd = parse_xreg(d);
    474           expect_comma(d);
    475           rs1 = parse_xreg(d);
    476           return enc_i(m, rd, rs1, 0);
    477         }
    478         if (slice_eq_cstr(desc->mnemonic, "sext.w")) {
    479           rd = parse_xreg(d);
    480           expect_comma(d);
    481           rs1 = parse_xreg(d);
    482           return enc_i(m, rd, rs1, 0);
    483         }
    484         if (slice_eq_cstr(desc->mnemonic, "seqz") ||
    485             slice_eq_cstr(desc->mnemonic, "not")) {
    486           rd = parse_xreg(d);
    487           expect_comma(d);
    488           rs1 = parse_xreg(d);
    489           /* match already has imm12 + funct3 + op pinned. */
    490           return m | ((rs1 & 0x1fu) << 15) | ((rd & 0x1fu) << 7);
    491         }
    492       }
    493       rd = parse_xreg(d);
    494       expect_comma(d);
    495       rs1 = parse_xreg(d);
    496       expect_comma(d);
    497       /* `addi rd, rs1, %lo(sym)` / `%pcrel_lo(label)` → R_RV_LO12_I. */
    498       if (rv_emit_imm_mod_reloc(d, RV_MODPOS_LO_I)) return enc_i(m, rd, rs1, 0);
    499       imm = (i32)asm_driver_parse_const(d);
    500       return enc_i(m, rd, rs1, imm);
    501 
    502     case RV64_FMT_I_SHIFT:
    503       rd = parse_xreg(d);
    504       expect_comma(d);
    505       rs1 = parse_xreg(d);
    506       expect_comma(d);
    507       return enc_ishift(m, rd, rs1, (u32)asm_driver_parse_const(d));
    508 
    509     case RV64_FMT_I_SHIFTW:
    510       rd = parse_xreg(d);
    511       expect_comma(d);
    512       rs1 = parse_xreg(d);
    513       expect_comma(d);
    514       return enc_ishiftw(m, rd, rs1, (u32)asm_driver_parse_const(d));
    515 
    516     case RV64_FMT_U:
    517       rd = parse_xreg(d);
    518       expect_comma(d);
    519       /* `lui rd, %hi(sym)` → R_RV_HI20; `auipc rd, %pcrel_hi(sym)` →
    520        * R_RV_PCREL_HI20 (or %got_pcrel_hi → R_RV_GOT_HI20). */
    521       if (rv_emit_imm_mod_reloc(d, RV_MODPOS_HI20)) return enc_u(m, rd, 0);
    522       imm = (i32)asm_driver_parse_const(d);
    523       /* LUI/AUIPC immediate is the upper-20 value: the input is interpreted
    524        * as the literal 20-bit value (already shifted-out form). */
    525       return enc_u(m, rd, (u32)imm);
    526 
    527     case RV64_FMT_J:
    528       /* `j label` / `jal rd, label` accept a symbolic target (R_RV_JAL) or a
    529        * bare numeric displacement. */
    530       if ((desc->flags & RV64_ASMFL_ALIAS) &&
    531           slice_eq_cstr(desc->mnemonic, "j")) {
    532         return enc_j(m, 0u, rv_reloc_target(d, R_RV_JAL));
    533       }
    534       rd = parse_xreg(d);
    535       expect_comma(d);
    536       return enc_j(m, rd, rv_reloc_target(d, R_RV_JAL));
    537 
    538     case RV64_FMT_B:
    539       /* `beq rs1, rs2, label` (and beqz/bnez aliases) accept a symbolic target
    540        * (R_RV_BRANCH) or a bare numeric displacement. */
    541       if (desc->flags & RV64_ASMFL_ALIAS) {
    542         /* beqz / bnez: rs, off. */
    543         rs1 = parse_xreg(d);
    544         expect_comma(d);
    545         return enc_b(m, rs1, 0u, rv_reloc_target(d, R_RV_BRANCH));
    546       }
    547       rs1 = parse_xreg(d);
    548       expect_comma(d);
    549       rs2 = parse_xreg(d);
    550       expect_comma(d);
    551       return enc_b(m, rs1, rs2, rv_reloc_target(d, R_RV_BRANCH));
    552 
    553     case RV64_FMT_LOAD:
    554       rd = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d);
    555       expect_comma(d);
    556       mem = parse_mem(d);
    557       rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/0);
    558       return enc_i(m, rd, mem.base, mem.disp);
    559 
    560     case RV64_FMT_FP_LOAD:
    561       rd = parse_freg(d);
    562       expect_comma(d);
    563       mem = parse_mem(d);
    564       rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/0);
    565       return enc_i(m, rd, mem.base, mem.disp);
    566 
    567     case RV64_FMT_STORE:
    568       rs2 = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d);
    569       expect_comma(d);
    570       mem = parse_mem(d);
    571       rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/1);
    572       return enc_s(m, rs2, mem.base, mem.disp);
    573 
    574     case RV64_FMT_FP_STORE:
    575       rs2 = parse_freg(d);
    576       expect_comma(d);
    577       mem = parse_mem(d);
    578       rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/1);
    579       return enc_s(m, rs2, mem.base, mem.disp);
    580 
    581     case RV64_FMT_JALR:
    582       if ((desc->flags & RV64_ASMFL_ALIAS) &&
    583           slice_eq_cstr(desc->mnemonic, "jr")) {
    584         rs1 = parse_xreg(d);
    585         return enc_i(m, 0u, rs1, 0);
    586       }
    587       rd = parse_xreg(d);
    588       if (!asm_driver_eat_comma(d)) {
    589         if (slice_eq_cstr(desc->mnemonic, "jalr"))
    590           return enc_i(m, RV_RA, rd, 0);
    591         asm_driver_panic(d, "rv64 asm: expected ','");
    592       }
    593       /* Accept both `jalr rd, imm(rs1)` and `jalr rd, rs1, imm`. */
    594       {
    595         AsmTok t = asm_driver_peek(d);
    596         if (t.kind == ASM_TOK_IDENT) {
    597           /* register first → register form */
    598           rs1 = parse_xreg(d);
    599           if (asm_driver_eat_comma(d)) {
    600             imm = (i32)asm_driver_parse_const(d);
    601           } else {
    602             imm = 0;
    603           }
    604           return enc_i(m, rd, rs1, imm);
    605         }
    606       }
    607       mem = parse_mem(d);
    608       return enc_i(m, rd, mem.base, mem.disp);
    609 
    610     case RV64_FMT_FENCE: {
    611       u32 pred, succ;
    612       pred = parse_fence_mask(d);
    613       expect_comma(d);
    614       succ = parse_fence_mask(d);
    615       return m | (pred << 24) | (succ << 20);
    616     }
    617 
    618     case RV64_FMT_SYSTEM:
    619       /* No operands. nop/ret/ecall/ebreak. */
    620       return m;
    621 
    622     case RV64_FMT_FP_RM:
    623       rd = parse_freg(d);
    624       expect_comma(d);
    625       rs1 = parse_freg(d);
    626       expect_comma(d);
    627       rs2 = parse_freg(d);
    628       /* Use DYN(=7) rounding mode by default. */
    629       return enc_r(m | (0x7u << 12), rd, rs1, rs2);
    630 
    631     case RV64_FMT_FP_R:
    632       if (desc->flags & RV64_ASMFL_FP) {
    633         rd = parse_freg(d);
    634       } else {
    635         rd = parse_xreg(d);
    636       }
    637       expect_comma(d);
    638       rs1 = parse_freg(d);
    639       expect_comma(d);
    640       rs2 = parse_freg(d);
    641       return enc_r(m, rd, rs1, rs2);
    642 
    643     case RV64_FMT_FP_CVT:
    644       if (desc->flags & RV64_ASMFL_FP) {
    645         rd = parse_freg(d);
    646         expect_comma(d);
    647         /* Source: integer reg for fcvt.s.w etc (no FP flag would
    648          * indicate); but since we have ASMFL_FP set on dest, source may
    649          * be either. Disambiguate by mnemonic. */
    650         if (slice_has_prefix_cstr(desc->mnemonic, "fcvt.s.", 7) &&
    651             (desc->mnemonic.s[7] == 'w' || desc->mnemonic.s[7] == 'l')) {
    652           rs1 = parse_xreg(d);
    653         } else if (slice_has_prefix_cstr(desc->mnemonic, "fcvt.d.", 7) &&
    654                    (desc->mnemonic.s[7] == 'w' || desc->mnemonic.s[7] == 'l')) {
    655           rs1 = parse_xreg(d);
    656         } else if (slice_eq_cstr(desc->mnemonic, "fmv.w.x") ||
    657                    slice_eq_cstr(desc->mnemonic, "fmv.d.x")) {
    658           rs1 = parse_xreg(d);
    659         } else {
    660           rs1 = parse_freg(d);
    661         }
    662       } else {
    663         rd = parse_xreg(d);
    664         expect_comma(d);
    665         rs1 = parse_freg(d);
    666       }
    667       /* match encodes rs2 (type selector); OR in rd/rs1 and the rounding mode.
    668        * An explicit `, <rm>` suffix (cc -S emits it for non-default modes, and
    669        * clang/gas accept it) takes precedence; otherwise the rm is fixed per
    670        * conversion family (mirrors the rv_fcvt_* encoders in isa.h, the codegen
    671        * source of truth): fp->int truncates (RTZ=1); int->fp and fp->fp use the
    672        * default DYN=7; fmv bit-moves carry no rounding (rm=0). */
    673       {
    674         u32 funct7 = (m >> 25) & 0x7fu;
    675         u32 rm;
    676         if (asm_driver_eat_comma(d)) {
    677           rm = rv_parse_rm_name(d);
    678         } else {
    679           switch (funct7) {
    680             case 0x60:   /* fcvt.{w,wu,l,lu}.s */
    681             case 0x61:   /* fcvt.{w,wu,l,lu}.d */
    682               rm = 0x1u; /* RTZ */
    683               break;
    684             case 0x70: /* fmv.x.w */
    685             case 0x71: /* fmv.x.d */
    686             case 0x78: /* fmv.w.x */
    687             case 0x79: /* fmv.d.x */
    688               rm = 0x0u;
    689               break;
    690             default: /* int->fp (0x68/0x69) and fp<->fp (0x20/0x21): DYN */
    691               rm = 0x7u;
    692               break;
    693           }
    694         }
    695         return m | (rm << 12) | ((rs1 & 0x1fu) << 15) | ((rd & 0x1fu) << 7);
    696       }
    697 
    698     case RV64_FMT_AMO:
    699       rd = parse_xreg(d);
    700       expect_comma(d);
    701       rs2 = parse_xreg(d);
    702       expect_comma(d);
    703       asm_driver_expect_punct(d, '(', "'(' in rv64 amo operand");
    704       rs1 = parse_xreg(d);
    705       asm_driver_expect_punct(d, ')', "')' in rv64 amo operand");
    706       return enc_amo(m, 0u, 0u, rd, rs1, rs2);
    707 
    708     case RV64_FMT_LR:
    709       rd = parse_xreg(d);
    710       expect_comma(d);
    711       asm_driver_expect_punct(d, '(', "'(' in rv64 lr operand");
    712       rs1 = parse_xreg(d);
    713       asm_driver_expect_punct(d, ')', "')' in rv64 lr operand");
    714       return enc_amo(m, 0u, 0u, rd, rs1, 0u);
    715 
    716     case RV64_FMT_CSR: {
    717       i32 csr;
    718       rd = parse_xreg(d);
    719       expect_comma(d);
    720       csr = (i32)asm_driver_parse_const(d);
    721       expect_comma(d);
    722       rs1 = parse_xreg(d);
    723       return enc_i(m, rd, rs1, csr);
    724     }
    725 
    726     case RV64_FMT_CSRI: {
    727       i32 csr;
    728       rd = parse_xreg(d);
    729       expect_comma(d);
    730       csr = (i32)asm_driver_parse_const(d);
    731       expect_comma(d);
    732       u32 uimm = (u32)asm_driver_parse_const(d) & 0x1fu;
    733       return enc_i(m, rd, uimm, csr);
    734     }
    735 
    736     case RV64_FMT_CR:
    737       if (slice_eq_cstr(desc->mnemonic, "c.jr") ||
    738           slice_eq_cstr(desc->mnemonic, "c.jalr")) {
    739         rs1 = parse_xreg(d);
    740         return enc_c_cr(m, rs1, 0u);
    741       }
    742       rd = parse_xreg(d);
    743       expect_comma(d);
    744       rs2 = parse_xreg(d);
    745       return enc_c_cr(m, rd, rs2);
    746 
    747     case RV64_FMT_CI:
    748       if (slice_eq_cstr(desc->mnemonic, "c.lwsp") ||
    749           slice_eq_cstr(desc->mnemonic, "c.ldsp") ||
    750           slice_eq_cstr(desc->mnemonic, "c.fldsp")) {
    751         rd = slice_eq_cstr(desc->mnemonic, "c.fldsp") ? parse_freg(d)
    752                                                       : parse_xreg(d);
    753         expect_comma(d);
    754         mem = parse_mem(d);
    755         if (mem.base != RV_SP)
    756           asm_driver_panic(d, "rv64 asm: compressed stack load needs sp base");
    757         return enc_c_lwsp(m, rd, (u32)mem.disp,
    758                           !slice_eq_cstr(desc->mnemonic, "c.lwsp"));
    759       }
    760       rd = parse_xreg(d);
    761       expect_comma(d);
    762       imm = (i32)asm_driver_parse_const(d);
    763       if (slice_eq_cstr(desc->mnemonic, "c.lui") && ((u32)imm & 0xfffu) == 0)
    764         imm >>= 12;
    765       if (slice_eq_cstr(desc->mnemonic, "c.addi16sp")) {
    766         if (rd != RV_SP)
    767           asm_driver_panic(d, "rv64 asm: c.addi16sp needs sp destination");
    768         return enc_c_addi16sp(m, imm);
    769       }
    770       return enc_c_ci(m, rd, imm);
    771 
    772     case RV64_FMT_CSS:
    773       rs2 = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d);
    774       expect_comma(d);
    775       mem = parse_mem(d);
    776       if (mem.base != RV_SP)
    777         asm_driver_panic(d, "rv64 asm: compressed stack store needs sp base");
    778       return enc_c_swsp(m, rs2, (u32)mem.disp,
    779                         !slice_eq_cstr(desc->mnemonic, "c.swsp"));
    780 
    781     case RV64_FMT_CIW:
    782       rd = parse_xreg(d);
    783       expect_comma(d);
    784       rs1 = parse_xreg(d);
    785       expect_comma(d);
    786       if (rs1 != RV_SP)
    787         asm_driver_panic(d, "rv64 asm: c.addi4spn needs sp source");
    788       imm = (i32)asm_driver_parse_const(d);
    789       return enc_c_addi4spn(m, c_reg3(d, rd), (u32)imm);
    790 
    791     case RV64_FMT_CL:
    792       rd = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d);
    793       expect_comma(d);
    794       mem = parse_mem(d);
    795       return enc_c_lwld(m, c_reg3(d, rd), c_reg3(d, mem.base), (u32)mem.disp,
    796                         !slice_eq_cstr(desc->mnemonic, "c.lw"));
    797 
    798     case RV64_FMT_CS:
    799       rs2 = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d);
    800       expect_comma(d);
    801       mem = parse_mem(d);
    802       return enc_c_swld(m, c_reg3(d, rs2), c_reg3(d, mem.base), (u32)mem.disp,
    803                         !slice_eq_cstr(desc->mnemonic, "c.sw"));
    804 
    805     case RV64_FMT_CA:
    806       rd = parse_xreg(d);
    807       expect_comma(d);
    808       rs2 = parse_xreg(d);
    809       return m | (c_reg3(d, rd) << 7) | (c_reg3(d, rs2) << 2);
    810 
    811     case RV64_FMT_CB:
    812       rs1 = parse_xreg(d);
    813       expect_comma(d);
    814       imm = (i32)asm_driver_parse_const(d);
    815       if (slice_eq_cstr(desc->mnemonic, "c.beqz") ||
    816           slice_eq_cstr(desc->mnemonic, "c.bnez")) {
    817         return enc_c_cb_imm(m, c_reg3(d, rs1), imm);
    818       }
    819       return enc_c_cb_alu_imm(m, c_reg3(d, rs1), imm);
    820 
    821     case RV64_FMT_CJ:
    822       imm = (i32)asm_driver_parse_const(d);
    823       return enc_c_cj(m, imm);
    824 
    825     case RV64_FMT_C_NONE:
    826       return m;
    827 
    828     default:
    829       asm_driver_panic(d, "rv64 asm: unsupported format");
    830   }
    831 }
    832 
    833 /* ============================================================
    834  * Multi-word pseudo-instruction expansion.
    835  *
    836  * call/tail/la/lla expand to a PC-relative AUIPC + (JALR | ADDI) pair;
    837  * `li` with a constant that does not fit a 12-bit signed immediate
    838  * expands to an LUI/ADDI(W)/SLLI chain (no relocations). Each 32-bit
    839  * word goes out through rv64_emit32 — the same path assemble_one's
    840  * single-word result uses — and relocations are attached via
    841  * mc->emit_reloc_at at the appropriate word offset. */
    842 
    843 /* 12-bit signed immediate range check for li short-circuit. */
    844 static bool rv_fits_i12(i64 v) { return v >= -2048 && v <= 2047; }
    845 
    846 /* Sign-extend the low 12 bits of v. */
    847 static i64 rv_sext12(i64 v) {
    848   return (i64)((((u64)v & 0xfffu) ^ 0x800u)) - 0x800;
    849 }
    850 
    851 /* Emit an AUIPC rd,0 + a R_RV_PCREL_HI20(sym) reloc, then create a local
    852  * `.LpcrelHi` anchor at the AUIPC offset and return that anchor symbol so
    853  * the paired low-half reloc can reference it. Mirrors native.c's
    854  * rv_emit_global_addr (the non-GOT branch). */
    855 static ObjSymId rv_emit_pcrel_hi(AsmDriver* d, u32 rd, ObjSymId sym,
    856                                  i64 addend) {
    857   MCEmitter* mc = asm_driver_mc(d);
    858   ObjBuilder* obj = asm_driver_ob(d);
    859   Compiler* c = asm_driver_compiler(d);
    860   u32 sec = mc->section_id;
    861   u32 ap = mc->pos(mc);
    862   rv64_emit32(mc, rv_auipc(rd, 0));
    863   mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0);
    864   Sym an = pool_intern_slice(c->global, SLICE_LIT(".LpcrelHi"));
    865   return obj_symbol(obj, an, SB_LOCAL, SK_OBJ, sec, (u64)ap, 0);
    866 }
    867 
    868 /* call/tail: AUIPC <link>,0 + JALR <rd>,<link>,0 with one R_RV_CALL reloc
    869  * at the AUIPC. `link` is the register the AUIPC materializes into and the
    870  * JALR's base; `rd` is the JALR link-register (ra for call, zero for
    871  * tail). The linker patches both words from the single R_RV_CALL reloc. */
    872 static void rv_emit_call_pseudo(AsmDriver* d, u32 link, u32 rd) {
    873   MCEmitter* mc = asm_driver_mc(d);
    874   ObjSymId sym = OBJ_SYM_NONE;
    875   i64 off = 0;
    876   asm_driver_parse_sym_expr(d, &sym, &off);
    877   if (sym == OBJ_SYM_NONE)
    878     asm_driver_panic(d, "rv64 asm: call/tail target must be a symbol");
    879   u32 sec = mc->section_id;
    880   u32 ap = mc->pos(mc);
    881   rv64_emit32(mc, rv_auipc(link, 0));
    882   rv64_emit32(mc, rv_jalr(rd, link, 0));
    883   mc->emit_reloc_at(mc, sec, ap, R_RV_CALL, sym, off, 0, 0);
    884 }
    885 
    886 /* la/lla rd, sym: AUIPC rd,%pcrel_hi(sym) + ADDI rd,rd,%pcrel_lo(anchor).
    887  * kit's static Local-Exec model has no GOT, so `la` == `lla`. */
    888 static void rv_emit_la_pseudo(AsmDriver* d) {
    889   MCEmitter* mc = asm_driver_mc(d);
    890   u32 rd = parse_xreg(d);
    891   expect_comma(d);
    892   ObjSymId sym = OBJ_SYM_NONE;
    893   i64 off = 0;
    894   asm_driver_parse_sym_expr(d, &sym, &off);
    895   if (sym == OBJ_SYM_NONE)
    896     asm_driver_panic(d, "rv64 asm: la/lla target must be a symbol");
    897   ObjSymId anchor = rv_emit_pcrel_hi(d, rd, sym, off);
    898   u32 sec = mc->section_id;
    899   u32 lp = mc->pos(mc);
    900   rv64_emit32(mc, rv_addi(rd, rd, 0));
    901   mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
    902 }
    903 
    904 /* LUI immediate that sign-extends to a negative 32-bit value: bit 19 of
    905  * the 20-bit field is set, i.e. Hi20 >= 0x80000. */
    906 #define RV_LUI_HI20_SIGN 0x80000LL
    907 
    908 /* Materialize a 64-bit constant into `rd` via the LLVM RISCVMatInt
    909  * sequence: for values fitting a signed 32-bit range, LUI + ADDI/ADDIW;
    910  * otherwise a recursive top-down hi20/lo12 split with SLLI shifts that
    911  * absorb trailing zeros. No relocations.
    912  *
    913  * After an LUI, the low-half add uses ADDIW only when the LUI value is
    914  * negative in 32-bit form (Hi20 >= RV_LUI_HI20_SIGN): there the add must
    915  * wrap in 32-bit arithmetic and re-sign-extend to land in range. When the
    916  * LUI value is non-negative in its low 32 bits, plain ADDI keeps the
    917  * 64-bit result correct (matching LLVM's generateInstSeqImpl). */
    918 static void rv_emit_li_value(MCEmitter* mc, u32 rd, i64 val) {
    919   if (val >= -2147483648LL && val <= 2147483647LL) {
    920     i64 hi20 = ((val + 0x800) >> 12) & 0xfffffLL;
    921     i64 lo12 = rv_sext12(val);
    922     if (hi20) rv64_emit32(mc, rv_lui(rd, (u32)hi20));
    923     if (lo12 || hi20 == 0) {
    924       u32 src = hi20 ? rd : (u32)RV_ZERO;
    925       if (hi20 >= RV_LUI_HI20_SIGN)
    926         rv64_emit32(mc, rv_addiw(rd, src, (i32)lo12));
    927       else
    928         rv64_emit32(mc, rv_addi(rd, src, (i32)lo12));
    929     }
    930     return;
    931   }
    932   /* >32-bit: split off the low 12 bits, recurse on the (shifted) high
    933    * part, then SLLI back and ADD the low bits. The subtraction is done in
    934    * unsigned space so it cannot signed-overflow at the int64 extremes
    935    * (e.g. val=INT64_MAX, lo12=-1); the result has its low 12 bits clear,
    936    * and the arithmetic right shift recovers the sign-extended high part. */
    937   i64 lo12 = rv_sext12(val);
    938   i64 hi = (i64)((u64)val - (u64)lo12) >> 12;
    939   u32 shift = 12;
    940   /* Absorb trailing zeros of the high part into the shift amount. */
    941   while ((hi & 1) == 0) {
    942     hi >>= 1;
    943     ++shift;
    944   }
    945   rv_emit_li_value(mc, rd, hi);
    946   rv64_emit32(mc, rv_slli(rd, rd, shift));
    947   if (lo12) rv64_emit32(mc, rv_addi(rd, rd, (i32)lo12));
    948 }
    949 
    950 /* Dispatch a multi-word pseudo. Returns true if it consumed the operands
    951  * and emitted its expansion; false to fall through to the single-word
    952  * path. `li` is handled here only when its immediate exceeds the 12-bit
    953  * signed range the alias row encodes directly. */
    954 static bool rv64_emit_pseudo(AsmDriver* d, const Rv64InsnDesc* desc) {
    955   MCEmitter* mc = asm_driver_mc(d);
    956   if (desc->fmt == RV64_FMT_PSEUDO) {
    957     if (slice_eq_cstr(desc->mnemonic, "call")) {
    958       rv_emit_call_pseudo(d, RV_RA, RV_RA);
    959       return true;
    960     }
    961     if (slice_eq_cstr(desc->mnemonic, "tail")) {
    962       /* Standard RISC-V `tail` materializes the address into t1 (x6). kit
    963        * codegen uses t0 for its own tail-call temp, so a `cc -S`-fused
    964        * `tail sym` re-assembles to t1 not t0 — execution-equivalent (both are
    965        * caller-saved temps clobbered by the tail jump; cross-exec still
    966        * matches), only the byte image differs on tail-call cases. Keeping the
    967        * assembler's `tail` standard preserves clang/gas interop. */
    968       rv_emit_call_pseudo(d, RV_T1, RV_ZERO);
    969       return true;
    970     }
    971     /* la / lla — identical PC-relative expansion in kit. */
    972     rv_emit_la_pseudo(d);
    973     return true;
    974   }
    975   if ((desc->flags & RV64_ASMFL_ALIAS) && slice_eq_cstr(desc->mnemonic, "li")) {
    976     /* Peek the immediate without consuming the destination register: the
    977      * single-word alias path re-parses both. We commit to the multi-word
    978      * path only for out-of-range constants, leaving the existing 12-bit
    979      * fast path (and its golden behavior) untouched. */
    980     u32 rd = parse_xreg(d);
    981     expect_comma(d);
    982     i64 imm = asm_driver_parse_const(d);
    983     if (rv_fits_i12(imm)) {
    984       rv64_emit32(mc, rv_addi(rd, RV_ZERO, (i32)imm));
    985     } else {
    986       rv_emit_li_value(mc, rd, imm);
    987     }
    988     return true;
    989   }
    990   return false;
    991 }
    992 
    993 static void rv64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) {
    994   MCEmitter* mc = asm_driver_mc(d);
    995   const Rv64InsnDesc* desc;
    996   (void)base;
    997   (void)asm_driver_cur_section(d);
    998   desc = rv64_asm_find(pool_slice(asm_driver_pool(d), mnemonic));
    999   if (!desc) asm_driver_panic(d, "rv64 asm: unsupported instruction");
   1000   if (rv64_emit_pseudo(d, desc)) return;
   1001   if (desc->flags & RV64_ASMFL_C16)
   1002     rv64_emit16(mc, assemble_one(d, desc));
   1003   else
   1004     rv64_emit32(mc, assemble_one(d, desc));
   1005 }
   1006 
   1007 static void rv64_arch_asm_destroy(ArchAsm* base) { (void)base; }
   1008 
   1009 /* ---- textual-assembly operand syntax (printer <-> parser) ----------------
   1010  *
   1011  * Inverse of the `.s` parsers above (rv_parse_mod_reloc / rv_reloc_target and
   1012  * the call/la pseudo expanders): how a relocated rv64 operand is spelled in
   1013  * `cc -S` so the same text re-assembles under kit-as. RISC-V uses the same
   1014  * `%hi`/`%lo`/`%pcrel_hi`/`%pcrel_lo` operator syntax on every object format,
   1015  * so `fmt` is unused. See ArchAsmOps and src/api/asm_emit.c. */
   1016 static int rv64_reloc_operand(u16 kind, KitObjFmt fmt, ArchRelocOperand* out) {
   1017   (void)fmt;
   1018   out->prefix = "";
   1019   out->suffix = "";
   1020   out->addend_bias = 0;
   1021   out->emit_anchor = 0;
   1022   out->ref_anchor = 0;
   1023   switch (kind) {
   1024     case R_RV_PCREL_HI20:
   1025       out->surg = ARCH_RELOC_SURG_TAIL;
   1026       out->prefix = "%pcrel_hi(";
   1027       out->suffix = ")";
   1028       out->emit_anchor = 1; /* define a unique anchor label at this AUIPC */
   1029       return 1;
   1030     case R_RV_GOT_HI20:
   1031       out->surg = ARCH_RELOC_SURG_TAIL;
   1032       out->prefix = "%got_pcrel_hi(";
   1033       out->suffix = ")";
   1034       out->emit_anchor = 1;
   1035       return 1;
   1036     case R_RV_PCREL_LO12_I:
   1037     case R_RV_PCREL_LO12_S:
   1038       out->surg = ARCH_RELOC_SURG_RV_LO12;
   1039       out->prefix = "%pcrel_lo(";
   1040       out->suffix = ")";
   1041       out->ref_anchor = 1; /* references the preceding AUIPC's anchor label */
   1042       return 1;
   1043     case R_RV_HI20:
   1044       out->surg = ARCH_RELOC_SURG_TAIL;
   1045       out->prefix = "%hi(";
   1046       out->suffix = ")";
   1047       return 1;
   1048     case R_RV_LO12_I:
   1049     case R_RV_LO12_S:
   1050       out->surg = ARCH_RELOC_SURG_RV_LO12;
   1051       out->prefix = "%lo(";
   1052       out->suffix = ")";
   1053       return 1;
   1054     case R_RV_BRANCH:
   1055     case R_RV_JAL:
   1056       out->surg = ARCH_RELOC_SURG_TAIL;
   1057       return 1;
   1058     default:
   1059       return 0; /* R_ABS*, R_RV_RVC_*, R_RV_RELAX, TLS, ... → keep numeric */
   1060   }
   1061 }
   1062 
   1063 /* Intra-section local branches whose target codegen resolved in place (no
   1064  * relocation): the disassembler renders the target numerically, so cc -S
   1065  * synthesizes a label there. `j`/`jal x0` are JAL aliases; the conditional
   1066  * branches are B-type. `call`/`tail` are excluded — they carry R_RV_CALL. */
   1067 static int rv64_is_local_branch(KitSlice m) {
   1068   if (m.len == 1 && m.s[0] == 'j') return 1;
   1069   if (m.len == 3 && memcmp(m.s, "jal", 3) == 0) return 1;
   1070   if (m.len == 3 && memcmp(m.s, "beq", 3) == 0) return 1;
   1071   if (m.len == 3 && memcmp(m.s, "bne", 3) == 0) return 1;
   1072   if (m.len == 3 && memcmp(m.s, "blt", 3) == 0) return 1;
   1073   if (m.len == 3 && memcmp(m.s, "bge", 3) == 0) return 1;
   1074   if (m.len == 4 && memcmp(m.s, "bltu", 4) == 0) return 1;
   1075   if (m.len == 4 && memcmp(m.s, "bgeu", 4) == 0) return 1;
   1076   if (m.len == 4 && memcmp(m.s, "beqz", 4) == 0) return 1;
   1077   if (m.len == 4 && memcmp(m.s, "bnez", 4) == 0) return 1;
   1078   if (m.len == 4 && memcmp(m.s, "blez", 4) == 0) return 1;
   1079   if (m.len == 4 && memcmp(m.s, "bgez", 4) == 0) return 1;
   1080   if (m.len == 4 && memcmp(m.s, "bltz", 4) == 0) return 1;
   1081   if (m.len == 4 && memcmp(m.s, "bgtz", 4) == 0) return 1;
   1082   if (m.len == 6 && memcmp(m.s, "c.beqz", 6) == 0) return 1;
   1083   if (m.len == 6 && memcmp(m.s, "c.bnez", 6) == 0) return 1;
   1084   if (m.len == 3 && memcmp(m.s, "c.j", 3) == 0) return 1;
   1085   return 0;
   1086 }
   1087 
   1088 /* R_RV_CALL fuses an AUIPC+JALR pair into a single `call`/`tail sym` pseudo
   1089  * (the canonical `.s` spelling the assembler re-expands to the same pair +
   1090  * reloc). The reloc sits on the AUIPC; the JALR partner carries no reloc. A
   1091  * tail call links into x0 (the JALR's rd is `zero`); a regular call links into
   1092  * ra. We read that from the partner JALR's disassembled text. */
   1093 static int rv64_reloc_call_pair(u16 kind, KitSlice pair_mnemonic,
   1094                                 KitSlice pair_ops, const char** mnemonic_out) {
   1095   if (kind != R_RV_CALL) return 0;
   1096   /* The partner JALR links into ra (regular call) or x0 (tail). The
   1097    * disassembler renders the x0-link, zero-immediate form as the `jr rs`
   1098    * alias, and the ra form as `jalr ra, 0(ra)`. So a `jr` partner is always a
   1099    * tail; a `jalr` partner is a tail iff its link register is `zero`. */
   1100   if (pair_mnemonic.len == 2 && memcmp(pair_mnemonic.s, "jr", 2) == 0) {
   1101     *mnemonic_out = "tail";
   1102     return 1;
   1103   }
   1104   if (pair_mnemonic.len == 4 && memcmp(pair_mnemonic.s, "jalr", 4) == 0) {
   1105     if (pair_ops.len >= 4 && memcmp(pair_ops.s, "zero", 4) == 0)
   1106       *mnemonic_out = "tail";
   1107     else
   1108       *mnemonic_out = "call";
   1109     return 1;
   1110   }
   1111   return 0;
   1112 }
   1113 
   1114 const ArchAsmOps rv64_asm_ops = {
   1115     .reloc_operand = rv64_reloc_operand,
   1116     .is_local_branch = rv64_is_local_branch,
   1117     .reloc_call_pair = rv64_reloc_call_pair,
   1118 };
   1119 
   1120 ArchAsm* rv64_arch_asm_new(Compiler* c) {
   1121   Rv64Asm* a = arena_new(c->tu, Rv64Asm);
   1122   memset(a, 0, sizeof *a);
   1123   a->base.insn = rv64_arch_asm_insn;
   1124   a->base.destroy = rv64_arch_asm_destroy;
   1125   a->c = c;
   1126   return &a->base;
   1127 }
   1128 
   1129 /* ============================================================
   1130  * Inline-asm template walker (parallel to aa64 asm.c §"inline-asm
   1131  * template walker"). The walker substitutes %N / %[name] / %% / %a%w%x
   1132  * placeholders into a per-line StrBuf, then re-lexes each line through
   1133  * rv64_arch_asm_insn for assembly. Statement separators recognised are
   1134  * '\n' and ';' (outside parens / quoted strings).
   1135  * ============================================================ */
   1136 
   1137 Rv64Asm* rv64_asm_open(Compiler* c) {
   1138   Rv64Asm* a = arena_new(c->tu, Rv64Asm);
   1139   memset(a, 0, sizeof *a);
   1140   a->base.insn = rv64_arch_asm_insn;
   1141   a->base.destroy = rv64_arch_asm_destroy;
   1142   a->c = c;
   1143   return a;
   1144 }
   1145 
   1146 void rv64_asm_close(Rv64Asm* a) { (void)a; }
   1147 
   1148 void rv64_inline_bind(Rv64Asm* a, const AsmConstraint* outs, u32 nout,
   1149                       Operand* out_ops, const AsmConstraint* ins, u32 nin,
   1150                       const Operand* in_ops, const Sym* clobbers, u32 nclob) {
   1151   a->outs = outs;
   1152   a->out_ops = out_ops;
   1153   a->ins = ins;
   1154   a->in_ops = in_ops;
   1155   a->clobbers = clobbers;
   1156   a->nout = nout;
   1157   a->nin = nin;
   1158   a->nclob = nclob;
   1159 }
   1160 
   1161 /* Per-line rendered buffer cap. Inline asm rarely emits more than a
   1162  * handful of insns per block; one substituted line fits comfortably.
   1163  * Truncation panics — the operator grammar should never grow a single
   1164  * line beyond this without a deliberate reason. */
   1165 #define RV64_INLINE_LINE_CAP 1024
   1166 
   1167 _Noreturn static void inline_panic(Rv64Asm* a, const char* msg) {
   1168   SrcLoc loc = {0, 0, 0};
   1169   compiler_panic(a->c, loc, "rv64 inline asm: %.*s",
   1170                  SLICE_ARG(slice_from_cstr(msg)));
   1171 }
   1172 
   1173 /* Render a 5-bit integer register number using its canonical psABI name. */
   1174 static void render_xreg(StrBuf* sb, u32 reg) {
   1175   const char* nm = rv64_register_name(reg & 0x1fu);
   1176   if (!nm) {
   1177     strbuf_putc(sb, 'x');
   1178     if ((reg & 0x1fu) >= 10u)
   1179       strbuf_putc(sb, (char)('0' + ((reg & 0x1fu) / 10u)));
   1180     strbuf_putc(sb, (char)('0' + ((reg & 0x1fu) % 10u)));
   1181     return;
   1182   }
   1183   strbuf_puts(sb, nm);
   1184 }
   1185 
   1186 /* Render an FP register by its canonical psABI name (e.g., fa0). */
   1187 static void render_freg(StrBuf* sb, u32 reg) {
   1188   const char* nm = rv64_register_name(32u + (reg & 0x1fu));
   1189   if (!nm) {
   1190     strbuf_putc(sb, 'f');
   1191     if ((reg & 0x1fu) >= 10u)
   1192       strbuf_putc(sb, (char)('0' + ((reg & 0x1fu) / 10u)));
   1193     strbuf_putc(sb, (char)('0' + ((reg & 0x1fu) % 10u)));
   1194     return;
   1195   }
   1196   strbuf_puts(sb, nm);
   1197 }
   1198 
   1199 /* Render a signed 64-bit integer. Inline asm immediates appear bare in
   1200  * RISC-V (no '#' prefix), matching the standalone .s parser. */
   1201 static void render_imm(StrBuf* sb, i64 v) { strbuf_put_i64(sb, v); }
   1202 
   1203 /* Render addressing form `disp(base)`. */
   1204 static void render_indirect(Rv64Asm* a, StrBuf* sb, Reg base, i32 ofs) {
   1205   (void)a;
   1206   if (ofs != 0)
   1207     strbuf_put_i64(sb, (i64)ofs);
   1208   else
   1209     strbuf_putc(sb, '0');
   1210   strbuf_putc(sb, '(');
   1211   render_xreg(sb, (u32)base);
   1212   strbuf_putc(sb, ')');
   1213 }
   1214 
   1215 /* Resolve operand index → render into sb. form:
   1216  *   0 = default (per-kind),
   1217  *   1 = %wN (width hint; on rv64 same as default xreg form),
   1218  *   2 = %xN (force 64-bit reg form — identical to default for rv64),
   1219  *   3 = %aN (memory addressing form).
   1220  *   4 = %zN (RISC-V GCC: emits "zero" if operand is imm 0, else reg). */
   1221 static void render_operand(Rv64Asm* a, StrBuf* sb, u32 idx, int form) {
   1222   u32 ntot = a->nout + a->nin;
   1223   if (idx >= ntot) inline_panic(a, "operand index out of range");
   1224   const Operand* op =
   1225       (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout];
   1226   switch (form) {
   1227     case 1: /* %wN — accept any reg/imm; rv64 has no narrower spelling. */
   1228     case 2: /* %xN — same. */
   1229       if (op->kind == RV64_INLINE_OPK_REG) {
   1230         if (op->pad[0] == RV64_INLINE_OPCLS_FP)
   1231           render_freg(sb, (u32)op->v.local);
   1232         else
   1233           render_xreg(sb, (u32)op->v.local);
   1234         return;
   1235       }
   1236       if (op->kind == OPK_IMM) {
   1237         render_imm(sb, op->v.imm);
   1238         return;
   1239       }
   1240       inline_panic(a, "%w/%x on unsupported operand kind");
   1241     case 3: /* %aN — memory addressing form */
   1242       if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand");
   1243       if (op->v.ind.index != CG_LOCAL_NONE)
   1244         inline_panic(a,
   1245                      "%a on indexed memory operand: rv64 inline asm "
   1246                      "requires base+disp only");
   1247       render_indirect(a, sb, (Reg)op->v.ind.base, op->v.ind.ofs);
   1248       return;
   1249     case 4: /* %zN — zero-or-reg */
   1250       if (op->kind == OPK_IMM && op->v.imm == 0) {
   1251         strbuf_puts(sb, "zero");
   1252         return;
   1253       }
   1254       if (op->kind == RV64_INLINE_OPK_REG) {
   1255         if (op->pad[0] == RV64_INLINE_OPCLS_FP)
   1256           render_freg(sb, (u32)op->v.local);
   1257         else
   1258           render_xreg(sb, (u32)op->v.local);
   1259         return;
   1260       }
   1261       inline_panic(a, "%z on unsupported operand kind");
   1262     default:
   1263       break;
   1264   }
   1265   switch (op->kind) {
   1266     case RV64_INLINE_OPK_REG:
   1267       if (op->pad[0] == RV64_INLINE_OPCLS_FP)
   1268         render_freg(sb, (u32)op->v.local);
   1269       else
   1270         render_xreg(sb, (u32)op->v.local);
   1271       return;
   1272     case OPK_IMM:
   1273       render_imm(sb, op->v.imm);
   1274       return;
   1275     case OPK_INDIRECT:
   1276       if (op->v.ind.index != CG_LOCAL_NONE)
   1277         inline_panic(a,
   1278                      "indexed memory operand in inline asm: rv64 requires "
   1279                      "base+disp only");
   1280       render_indirect(a, sb, (Reg)op->v.ind.base, op->v.ind.ofs);
   1281       return;
   1282     default:
   1283       inline_panic(a, "unsupported operand kind for %N");
   1284   }
   1285 }
   1286 
   1287 /* Resolve a `%[name]` operand by looking up `needle` against the
   1288  * constraint.name fields on the combined outs+ins list. Returns the
   1289  * combined index, or (u32)-1 on miss. */
   1290 static u32 lookup_named(Rv64Asm* a, Sym needle) {
   1291   for (u32 k = 0; k < a->nout; ++k) {
   1292     if (a->outs[k].name == needle) return k;
   1293   }
   1294   for (u32 k = 0; k < a->nin; ++k) {
   1295     if (a->ins[k].name == needle) return a->nout + k;
   1296   }
   1297   return (u32)-1;
   1298 }
   1299 
   1300 /* Lex one line of substituted asm and dispatch via rv64_arch_asm_insn. */
   1301 static void run_one_line(Rv64Asm* a, MCEmitter* mc, const char* text,
   1302                          size_t len) {
   1303   /* Skip blank lines. */
   1304   size_t i;
   1305   for (i = 0; i < len; ++i) {
   1306     if (text[i] != ' ' && text[i] != '\t') break;
   1307   }
   1308   if (i == len) return;
   1309 
   1310   AsmLexer* lx = asm_lex_open_mem(a->c, "<inline-asm>", text, len);
   1311   AsmDriver* d = asm_driver_open_inline(a->c, mc, lx);
   1312 
   1313   /* The first non-trivial token must be the mnemonic identifier. */
   1314   AsmTok t = asm_driver_peek(d);
   1315   while (t.kind == ASM_TOK_NEWLINE) {
   1316     (void)asm_driver_next(d);
   1317     t = asm_driver_peek(d);
   1318   }
   1319   if (t.kind == ASM_TOK_EOF) {
   1320     asm_driver_close_inline(d);
   1321     asm_lex_close(lx);
   1322     return;
   1323   }
   1324   if (t.kind != ASM_TOK_IDENT)
   1325     inline_panic(a, "expected mnemonic at start of inline asm line");
   1326   (void)asm_driver_next(d);
   1327   Sym mn = t.v.ident;
   1328   /* Compose `fcvt.s.w` etc. — rv64 has dotted mnemonics; the standalone
   1329    * lexer already strings them together as a single IDENT in most paths.
   1330    * Mirror the aa64 composite handling for safety. */
   1331   AsmTok dot = asm_driver_peek(d);
   1332   while (asm_driver_tok_is_punct(dot, '.')) {
   1333     (void)asm_driver_next(d);
   1334     AsmTok rest = asm_driver_next(d);
   1335     if (rest.kind != ASM_TOK_IDENT)
   1336       inline_panic(a, "composite mnemonic: expected ident after '.'");
   1337     Slice hsl = pool_slice(asm_driver_pool(d), mn);
   1338     Slice rsl = pool_slice(asm_driver_pool(d), rest.v.ident);
   1339     size_t hn = hsl.len, rn = rsl.len;
   1340     char buf[64];
   1341     if (hn + 1 + rn >= sizeof buf)
   1342       inline_panic(a, "composite mnemonic too long");
   1343     for (size_t k = 0; k < hn; ++k) buf[k] = hsl.s[k];
   1344     buf[hn] = '.';
   1345     for (size_t k = 0; k < rn; ++k) buf[hn + 1 + k] = rsl.s[k];
   1346     mn = pool_intern_slice(asm_driver_pool(d),
   1347                            (Slice){.s = buf, .len = hn + 1 + rn});
   1348     dot = asm_driver_peek(d);
   1349   }
   1350   rv64_arch_asm_insn(&a->base, d, mn);
   1351   asm_driver_close_inline(d);
   1352   asm_lex_close(lx);
   1353 }
   1354 
   1355 /* Substitute placeholders into one line's StrBuf, then dispatch. */
   1356 static void render_and_run_line(Rv64Asm* a, MCEmitter* mc, StrBuf* sb,
   1357                                 const char* start, const char* end) {
   1358   strbuf_reset(sb);
   1359   for (const char* p = start; p < end; ++p) {
   1360     char c = *p;
   1361     if (c != '%') {
   1362       strbuf_putc(sb, c);
   1363       continue;
   1364     }
   1365     /* Placeholder. */
   1366     if (p + 1 >= end) inline_panic(a, "trailing '%' in template");
   1367     char n = *(p + 1);
   1368     if (n == '%') {
   1369       strbuf_putc(sb, '%');
   1370       ++p;
   1371       continue;
   1372     }
   1373     if (n == '[') {
   1374       const char* nbeg = p + 2;
   1375       const char* nend = nbeg;
   1376       while (nend < end && *nend != ']') ++nend;
   1377       if (nend == end) inline_panic(a, "unterminated %[name]");
   1378       size_t nlen = (size_t)(nend - nbeg);
   1379       Sym needle =
   1380           pool_intern_slice(a->c->global, (Slice){.s = nbeg, .len = nlen});
   1381       u32 idx = lookup_named(a, needle);
   1382       if (idx == (u32)-1)
   1383         inline_panic(a, "%[name] does not match any constraint");
   1384       p = nend; /* loop's ++p steps past the ']' */
   1385       render_operand(a, sb, idx, 0);
   1386       continue;
   1387     }
   1388     int form = 0; /* 0=default, 1=w, 2=x, 3=a, 4=z */
   1389     if (n == 'w' || n == 'x' || n == 'a' || n == 'z') {
   1390       form = (n == 'w') ? 1 : (n == 'x') ? 2 : (n == 'a') ? 3 : 4;
   1391       ++p;
   1392       if (p + 1 >= end) inline_panic(a, "trailing '%' modifier in template");
   1393       n = *(p + 1);
   1394     }
   1395     if (n == '[') {
   1396       const char* nbeg = p + 2;
   1397       const char* nend = nbeg;
   1398       while (nend < end && *nend != ']') ++nend;
   1399       if (nend == end) inline_panic(a, "unterminated %[name]");
   1400       size_t nlen = (size_t)(nend - nbeg);
   1401       Sym needle =
   1402           pool_intern_slice(a->c->global, (Slice){.s = nbeg, .len = nlen});
   1403       u32 idx = lookup_named(a, needle);
   1404       if (idx == (u32)-1)
   1405         inline_panic(a, "%[name] does not match any constraint");
   1406       p = nend;
   1407       render_operand(a, sb, idx, form);
   1408       continue;
   1409     }
   1410     if (n < '0' || n > '9') inline_panic(a, "expected digit after '%'");
   1411     u32 idx = (u32)(n - '0');
   1412     ++p;
   1413     /* GCC syntax permits up to two digits (%0..%99). */
   1414     if (p + 1 < end && *(p + 1) >= '0' && *(p + 1) <= '9') {
   1415       idx = idx * 10 + (u32)(*(p + 1) - '0');
   1416       ++p;
   1417     }
   1418     render_operand(a, sb, idx, form);
   1419   }
   1420   if (sb->truncated) inline_panic(a, "inline asm line buffer overflow");
   1421   run_one_line(a, mc, strbuf_cstr(sb), strbuf_len(sb));
   1422 }
   1423 
   1424 void rv64_asm_run_template(Rv64Asm* a, MCEmitter* mc, const char* tmpl) {
   1425   if (!tmpl || !*tmpl) return;
   1426 
   1427   char buf[RV64_INLINE_LINE_CAP];
   1428   StrBuf sb;
   1429   strbuf_init(&sb, buf, sizeof buf);
   1430 
   1431   /* Walk tmpl, splitting on '\n' and ';'. Track paren depth and quote
   1432    * state so that a literal ';' inside `( ... )` (memory operand) or a
   1433    * quoted string is not mistaken for a statement separator. RISC-V uses
   1434    * `disp(base)` for memory, hence we track parens. */
   1435   const char* line_start = tmpl;
   1436   int paren = 0;
   1437   char quote = 0;
   1438   for (const char* p = tmpl;; ++p) {
   1439     char c = *p;
   1440     if (c == '\0') {
   1441       render_and_run_line(a, mc, &sb, line_start, p);
   1442       break;
   1443     }
   1444     if (quote) {
   1445       if (c == '\\' && *(p + 1)) {
   1446         ++p;
   1447         continue;
   1448       }
   1449       if (c == quote) quote = 0;
   1450       continue;
   1451     }
   1452     if (c == '"' || c == '\'') {
   1453       quote = c;
   1454       continue;
   1455     }
   1456     if (c == '(') {
   1457       ++paren;
   1458       continue;
   1459     }
   1460     if (c == ')') {
   1461       if (paren) --paren;
   1462       continue;
   1463     }
   1464     if (paren == 0 && (c == '\n' || c == ';')) {
   1465       render_and_run_line(a, mc, &sb, line_start, p);
   1466       line_start = p + 1;
   1467     }
   1468   }
   1469 }