kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

asm.c (67902B)


      1 #include "arch/x64/asm.h"
      2 
      3 #include <string.h>
      4 
      5 #include "arch/x64/emit.h"
      6 #include "arch/x64/regs.h"
      7 #include "asm/asm_helpers.h"
      8 #include "core/arena.h"
      9 #include "core/pool.h"
     10 #include "core/slice.h"
     11 #include "core/strbuf.h"
     12 
     13 struct X64Asm {
     14   ArchAsm base;
     15   Compiler* c;
     16 
     17   const AsmConstraint* outs;
     18   Operand* out_ops;
     19   const AsmConstraint* ins;
     20   const Operand* in_ops;
     21   const Sym* clobbers;
     22   u32 nout;
     23   u32 nin;
     24   u32 nclob;
     25 };
     26 
     27 typedef enum X64AsmOperandKind {
     28   X64_ASM_OP_REG,
     29   X64_ASM_OP_XMM,
     30   X64_ASM_OP_IMM,
     31   X64_ASM_OP_MEM,
     32   X64_ASM_OP_IND_REG,
     33 } X64AsmOperandKind;
     34 
     35 typedef struct X64AsmOperand {
     36   u8 kind;
     37   u8 width;
     38   u8 reg;
     39   u8 base;
     40   u8 high8;
     41   u8 seg;
     42   u8 no_base;      /* MEM: segment-prefixed absolute, no base register */
     43   u8 index;        /* MEM SIB: index register (valid when has_index) */
     44   u8 scale;        /* MEM SIB: log2 of scale ∈ {0,1,2,3} → 1/2/4/8 */
     45   u8 has_index;    /* MEM: SIB index present */
     46   u8 rip_relative; /* MEM: bare (%rip)/disp(%rip) form */
     47   u8 has_reloc;    /* MEM: symbolic disp carries a relocation */
     48   u8 pad[3];
     49   i64 imm;
     50   i32 disp;
     51   RelocKind reloc_kind; /* MEM: reloc on the disp32 (PC32 / REX_GOTPCRELX) */
     52   ObjSymId reloc_sym;   /* MEM: relocated symbol */
     53   i64 reloc_off;        /* MEM: user addend on the symbol */
     54 } X64AsmOperand;
     55 
     56 static int x64_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, u32* width_out,
     57                              u32* high8_out) {
     58   Slice sl = pool_slice(asm_driver_pool(d), s);
     59   char buf[16];
     60   u32 reg;
     61   u32 width = 8;
     62   Slice q;
     63   if (!sl.s || sl.len < 2 || sl.len >= sizeof buf) return 0;
     64   memcpy(buf, sl.s, sl.len);
     65   buf[sl.len] = '\0';
     66   q = slice_from_cstr(buf);
     67   if (slice_eq_cstr(q, "ah") || slice_eq_cstr(q, "ch") ||
     68       slice_eq_cstr(q, "dh") || slice_eq_cstr(q, "bh")) {
     69     static const u32 high_map[4] = {4u, 5u, 6u, 7u};
     70     const char* names = "acdb";
     71     for (u32 i = 0; i < 4u; ++i) {
     72       if (buf[0] == names[i]) {
     73         if (reg_out) *reg_out = high_map[i];
     74         if (width_out) *width_out = 1;
     75         if (high8_out) *high8_out = 1;
     76         return 1;
     77       }
     78     }
     79   }
     80   if (x64_register_hw_index(buf, &reg) != 0) return 0;
     81   if (reg > 15u) return 0;
     82   if (slice_eq_cstr(q, "al") || slice_eq_cstr(q, "cl") ||
     83       slice_eq_cstr(q, "dl") || slice_eq_cstr(q, "bl") ||
     84       slice_eq_cstr(q, "spl") || slice_eq_cstr(q, "bpl") ||
     85       slice_eq_cstr(q, "sil") || slice_eq_cstr(q, "dil") ||
     86       buf[sl.len - 1] == 'b') {
     87     width = 1;
     88   } else if (slice_eq_cstr(q, "ax") || slice_eq_cstr(q, "cx") ||
     89              slice_eq_cstr(q, "dx") || slice_eq_cstr(q, "bx") ||
     90              slice_eq_cstr(q, "sp") || slice_eq_cstr(q, "bp") ||
     91              slice_eq_cstr(q, "si") || slice_eq_cstr(q, "di") ||
     92              buf[sl.len - 1] == 'w') {
     93     width = 2;
     94   } else if (buf[sl.len - 1] == 'd' || buf[0] == 'e') {
     95     width = 4;
     96   }
     97   if (reg_out) *reg_out = reg;
     98   if (width_out) *width_out = width;
     99   if (high8_out) *high8_out = 0;
    100   return 1;
    101 }
    102 
    103 static int x64_xmm_from_name(AsmDriver* d, Sym s, u32* reg_out) {
    104   Slice sl = pool_slice(asm_driver_pool(d), s);
    105   const char* p = sl.s;
    106   size_t n = sl.len;
    107   u32 reg = 0;
    108   if (!p || n < 4 || n > 5) return 0;
    109   if (p[0] != 'x' || p[1] != 'm' || p[2] != 'm') return 0;
    110   for (size_t i = 3; i < n; ++i) {
    111     if (p[i] < '0' || p[i] > '9') return 0;
    112     reg = reg * 10u + (u32)(p[i] - '0');
    113   }
    114   if (reg > 15u) return 0;
    115   if (reg_out) *reg_out = reg;
    116   return 1;
    117 }
    118 
    119 static int x64_segment_prefix_from_name(AsmDriver* d, Sym s, u8* prefix_out) {
    120   Slice sl = pool_slice(asm_driver_pool(d), s);
    121   const char* p = sl.s;
    122   size_t n = sl.len;
    123   if (!p || n != 2) return 0;
    124   if (p[0] == 'f' && p[1] == 's') {
    125     if (prefix_out) *prefix_out = 0x64;
    126     return 1;
    127   }
    128   if (p[0] == 'g' && p[1] == 's') {
    129     if (prefix_out) *prefix_out = 0x65;
    130     return 1;
    131   }
    132   return 0;
    133 }
    134 
    135 static void expect_comma(AsmDriver* d);
    136 
    137 static u32 parse_reg(AsmDriver* d, u32* width_out, u32* high8_out) {
    138   AsmTok t;
    139   u32 reg;
    140   if (!asm_driver_eat_punct(d, '%'))
    141     asm_driver_panic(d, "x64 asm: expected register");
    142   t = asm_driver_next(d);
    143   if (t.kind != ASM_TOK_IDENT ||
    144       !x64_reg_from_name(d, t.v.ident, &reg, width_out, high8_out)) {
    145     asm_driver_panic(d, "x64 asm: bad register");
    146   }
    147   return reg;
    148 }
    149 
    150 /* True if the symbol names the instruction pointer ("rip"). */
    151 static int x64_ident_is_rip(AsmDriver* d, Sym s) {
    152   Slice sl = pool_slice(asm_driver_pool(d), s);
    153   return sl.s && sl.len == 3 && sl.s[0] == 'r' && sl.s[1] == 'i' &&
    154          sl.s[2] == 'p';
    155 }
    156 
    157 /* Convert a SIB scale literal (1/2/4/8) to its log2 (0/1/2/3). */
    158 static u32 x64_scale_to_log2(AsmDriver* d, i64 scale) {
    159   switch (scale) {
    160     case 1:
    161       return 0u;
    162     case 2:
    163       return 1u;
    164     case 4:
    165       return 2u;
    166     case 8:
    167       return 3u;
    168     default:
    169       asm_driver_panic(d, "x64 asm: memory scale must be 1, 2, 4, or 8");
    170   }
    171 }
    172 
    173 /* Parse the body of a memory operand once the leading '(' has been
    174  * consumed: '%base[,%index,scale])', '%rip)', or ',%index,scale)'.
    175  * Fills base/index/scale/has_index/rip_relative on `op` and eats the
    176  * closing ')'. */
    177 static void parse_mem_paren_body(AsmDriver* d, X64AsmOperand* op) {
    178   AsmTok t = asm_driver_peek(d);
    179   if (asm_driver_tok_is_punct(t, '%')) {
    180     /* Peek the register name to detect the RIP-relative form. */
    181     AsmTok ident;
    182     (void)asm_driver_next(d);
    183     ident = asm_driver_next(d);
    184     if (ident.kind != ASM_TOK_IDENT)
    185       asm_driver_panic(d, "x64 asm: bad register");
    186     if (x64_ident_is_rip(d, ident.v.ident)) {
    187       op->rip_relative = 1;
    188       asm_driver_expect_punct(d, ')', "')' in x64 memory operand");
    189       return;
    190     }
    191     {
    192       u32 reg = 0;
    193       if (!x64_reg_from_name(d, ident.v.ident, &reg, NULL, NULL))
    194         asm_driver_panic(d, "x64 asm: bad register");
    195       op->base = (u8)reg;
    196     }
    197     /* Optional ',%index,scale'. */
    198     if (asm_driver_eat_comma(d)) {
    199       op->index = (u8)parse_reg(d, NULL, NULL);
    200       op->has_index = 1;
    201       expect_comma(d);
    202       op->scale = (u8)x64_scale_to_log2(d, asm_driver_parse_const(d));
    203     }
    204     asm_driver_expect_punct(d, ')', "')' in x64 memory operand");
    205     return;
    206   }
    207   /* Index-only form: '(,%index,scale)' — base omitted. */
    208   if (asm_driver_eat_comma(d)) {
    209     op->no_base = 1;
    210     op->index = (u8)parse_reg(d, NULL, NULL);
    211     op->has_index = 1;
    212     expect_comma(d);
    213     op->scale = (u8)x64_scale_to_log2(d, asm_driver_parse_const(d));
    214     asm_driver_expect_punct(d, ')', "')' in x64 memory operand");
    215     return;
    216   }
    217   asm_driver_panic(d, "x64 asm: expected register in memory operand");
    218 }
    219 
    220 /* Consume an optional `@MOD` relocation suffix after a symbol and return the
    221  * RelocKind it selects, or `dflt` when no suffix is present. */
    222 static RelocKind x64_parse_reloc_suffix(AsmDriver* d, RelocKind dflt) {
    223   if (!asm_driver_tok_is_punct(asm_driver_peek(d), '@')) return dflt;
    224   (void)asm_driver_next(d); /* '@' */
    225   AsmTok n = asm_driver_next(d);
    226   if (n.kind != ASM_TOK_IDENT)
    227     asm_driver_panic(d, "x64 asm: expected relocation name after '@'");
    228   Slice s = pool_slice(asm_driver_pool(d), n.v.ident);
    229   if (slice_eq_cstr(s, "PLT")) return R_X64_PLT32;
    230   if (slice_eq_cstr(s, "GOTPCREL")) return R_X64_REX_GOTPCRELX;
    231   if (slice_eq_cstr(s, "GOTPCRELX")) return R_X64_GOTPCRELX;
    232   asm_driver_panic(d, "x64 asm: unsupported relocation suffix");
    233 }
    234 
    235 static X64AsmOperand parse_operand(AsmDriver* d) {
    236   X64AsmOperand op;
    237   AsmTok t;
    238   memset(&op, 0, sizeof op);
    239   t = asm_driver_peek(d);
    240   if (asm_driver_eat_punct(d, '*')) {
    241     op.kind = X64_ASM_OP_IND_REG;
    242     op.reg = (u8)parse_reg(d, NULL, NULL);
    243     return op;
    244   }
    245   if (asm_driver_eat_punct(d, '$')) {
    246     op.kind = X64_ASM_OP_IMM;
    247     op.imm = asm_driver_parse_const(d);
    248     return op;
    249   }
    250   if (asm_driver_tok_is_punct(t, '%')) {
    251     u32 width = 8;
    252     u32 high8 = 0;
    253     AsmTok ident;
    254     (void)asm_driver_next(d);
    255     ident = asm_driver_next(d);
    256     if (ident.kind != ASM_TOK_IDENT)
    257       asm_driver_panic(d, "x64 asm: bad register");
    258     if (x64_segment_prefix_from_name(d, ident.v.ident, &op.seg)) {
    259       asm_driver_expect_punct(d, ':', "':' after x64 segment register");
    260       op.kind = X64_ASM_OP_MEM;
    261       if (!asm_driver_tok_is_punct(asm_driver_peek(d), '('))
    262         op.disp = (i32)asm_driver_parse_const(d);
    263       if (asm_driver_eat_punct(d, '(')) {
    264         op.base = (u8)parse_reg(d, NULL, NULL);
    265         asm_driver_expect_punct(d, ')', "')' in x64 memory operand");
    266       } else {
    267         op.no_base = 1;
    268       }
    269       return op;
    270     }
    271     if (x64_xmm_from_name(d, ident.v.ident, &width)) {
    272       op.kind = X64_ASM_OP_XMM;
    273       op.reg = (u8)width;
    274       op.width = 16;
    275       return op;
    276     }
    277     {
    278       u32 reg = 0;
    279       if (!x64_reg_from_name(d, ident.v.ident, &reg, &width, &high8))
    280         asm_driver_panic(d, "x64 asm: bad register");
    281       op.kind = X64_ASM_OP_REG;
    282       op.reg = (u8)reg;
    283     }
    284     op.width = (u8)width;
    285     op.high8 = (u8)high8;
    286     return op;
    287   }
    288   op.kind = X64_ASM_OP_MEM;
    289   op.disp = 0;
    290   if (!asm_driver_tok_is_punct(t, '(')) {
    291     /* A symbolic displacement (`sym(%rip)`, `sym@GOTPCREL(%rip)`) becomes a
    292      * relocation; a numeric displacement stays literal. */
    293     if (asm_driver_peek(d).kind == ASM_TOK_IDENT) {
    294       asm_driver_parse_sym_expr(d, &op.reloc_sym, &op.reloc_off);
    295       op.reloc_kind = x64_parse_reloc_suffix(d, R_PC32);
    296       op.has_reloc = 1;
    297     } else {
    298       op.disp = (i32)asm_driver_parse_const(d);
    299     }
    300   }
    301   asm_driver_expect_punct(d, '(', "'(' in x64 memory operand");
    302   parse_mem_paren_body(d, &op);
    303   if (op.has_reloc && !op.rip_relative)
    304     asm_driver_panic(d,
    305                      "x64 asm: symbolic memory displacement requires (%rip)");
    306   return op;
    307 }
    308 
    309 /* Emit the relocation a symbolic `(%rip)` memory operand carries, if any. The
    310  * disp32 field is the last 4 bytes of the instruction except for an immediate
    311  * store, where `trailing` immediate bytes follow it. R_X86_64_PC32-style
    312  * relocs use addend (off - 4 - trailing) so S+A-P yields the rip-relative
    313  * displacement to the end of the instruction. */
    314 static void x64_emit_mem_reloc(AsmDriver* d, MCEmitter* mc,
    315                                const X64AsmOperand* m, u32 trailing) {
    316   if (!m->has_reloc) return;
    317   u32 disp_pos = mc->pos(mc) - 4u - trailing;
    318   mc->emit_reloc_at(mc, asm_driver_cur_section(d), disp_pos, m->reloc_kind,
    319                     m->reloc_sym, m->reloc_off - 4 - (i64)trailing, 1, 0);
    320 }
    321 
    322 static u32 x64_pack_rex_mem_operand(u8* out, int w, u32 reg,
    323                                     X64AsmOperand mem) {
    324   /* RIP-relative carries no base/index registers (rm=101, no SIB). */
    325   if (mem.rip_relative) return x64_pack_rex(out, w, reg, 0, 0u);
    326   /* SIB forms supply REX.X from the index register (and REX.B from base
    327    * unless the base is omitted in the index-only form). */
    328   if (mem.has_index)
    329     return x64_pack_rex(out, w, reg, mem.index, mem.no_base ? 0u : mem.base);
    330   return x64_pack_rex(out, w, reg, 0, mem.no_base ? 0u : mem.base);
    331 }
    332 
    333 static u32 x64_pack_mem_operand(u8* out, u32 reg, X64AsmOperand mem) {
    334   if (mem.rip_relative) return x64_pack_mem_rip(out, reg, mem.disp);
    335   if (mem.has_index) {
    336     /* Index-only form (no base): mod=00 with SIB.base=101 → disp32. */
    337     if (mem.no_base) {
    338       out[0] = x64_modrm(0u, reg, X64_MODRM_RM_SIB);
    339       out[1] = x64_sib(mem.scale, mem.index, X64_SIB_NO_BASE);
    340       return 2u + x64_put_u32le(out + 2, (u32)mem.disp);
    341     }
    342     return x64_pack_mem_sib(out, reg, mem.base, mem.index, mem.scale, mem.disp);
    343   }
    344   if (mem.no_base) {
    345     out[0] = x64_modrm(0u, reg, X64_MODRM_RM_SIB);
    346     out[1] = x64_sib(0u, X64_SIB_NO_INDEX, X64_SIB_NO_BASE);
    347     return 2u + x64_put_u32le(out + 2, (u32)mem.disp);
    348   }
    349   return x64_pack_mem(out, reg, mem.base, mem.disp);
    350 }
    351 
    352 /* reg ← mem with an explicit single-byte opcode (e.g. 0x8B MOV, 0x8D LEA).
    353  * Routes the full memory-operand variety (plain / SIB / RIP / segment)
    354  * through the shared pack helpers. */
    355 static void emit_reg_mem_operand(AsmDriver* d, MCEmitter* mc, u32 size, u8 opc,
    356                                  u32 dst, X64AsmOperand src) {
    357   u8 buf[16];
    358   u32 n = 0;
    359   if (size == 2u) buf[n++] = X64_OPSIZE_PFX;
    360   if (src.seg) buf[n++] = src.seg;
    361   n += x64_pack_rex_mem_operand(buf + n, size == 8u, dst, src);
    362   buf[n++] = opc;
    363   n += x64_pack_mem_operand(buf + n, dst, src);
    364   mc->emit_bytes(mc, buf, n);
    365   x64_emit_mem_reloc(d, mc, &src, 0);
    366 }
    367 
    368 static void emit_mov_load_operand(AsmDriver* d, MCEmitter* mc, u32 size,
    369                                   u32 dst, X64AsmOperand src) {
    370   emit_reg_mem_operand(d, mc, size, X64_OPC_MOV_R_RM, dst, src);
    371 }
    372 
    373 /* reg → mem store with an explicit reg-to-r/m opcode.  Used by MOV
    374  * (0x89/0x88) and the ALU /r stores (ADD 0x01, OR 0x09, AND 0x21,
    375  * SUB 0x29, XOR 0x31, CMP 0x39).  The register operand occupies the
    376  * ModR/M reg field; the memory operand the r/m field. */
    377 static void emit_reg_store_operand(AsmDriver* d, MCEmitter* mc, u32 size,
    378                                    u8 opc, u32 src, X64AsmOperand dst,
    379                                    int force_rex) {
    380   u8 buf[16];
    381   u32 n = 0;
    382   if (size == 2u) buf[n++] = X64_OPSIZE_PFX;
    383   if (dst.seg) buf[n++] = dst.seg;
    384   if (force_rex)
    385     n += x64_pack_rex_force(buf + n, size == 8u, src, 0,
    386                             dst.no_base ? 0u : dst.base);
    387   else
    388     n += x64_pack_rex_mem_operand(buf + n, size == 8u, src, dst);
    389   buf[n++] = opc;
    390   n += x64_pack_mem_operand(buf + n, src, dst);
    391   mc->emit_bytes(mc, buf, n);
    392   x64_emit_mem_reloc(d, mc, &dst, 0);
    393 }
    394 
    395 static void emit_mov_store_operand(AsmDriver* d, MCEmitter* mc, u32 size,
    396                                    u32 src, X64AsmOperand dst, int force_rex) {
    397   emit_reg_store_operand(d, mc, size,
    398                          size == 1u ? X64_OPC_MOV_RM_R8 : X64_OPC_MOV_RM_R, src,
    399                          dst, force_rex);
    400 }
    401 
    402 /* imm → mem store via a group-1 /digit opcode (group-1 ALU 0x80/0x81/0x83,
    403  * or MOV C6/C7).  `opc8`/`opc32` select the 8-bit-immediate vs
    404  * 32-bit-immediate (sign-extended) opcode; pass equal values when the
    405  * encoding has no imm8 short form (e.g. MOV).  `imm8` forces the short
    406  * form when the immediate fits. */
    407 static void emit_rm_imm_store_operand(AsmDriver* d, MCEmitter* mc, u32 size,
    408                                       u8 opc8, u8 opc32, u32 sub,
    409                                       X64AsmOperand dst, i64 imm,
    410                                       int allow_i8) {
    411   u8 buf[16];
    412   u32 n = 0;
    413   int use_i8 = allow_i8 && imm_fits_i8(imm);
    414   if (!use_i8 && !imm_fits_i32(imm) && size != 1u)
    415     asm_driver_panic(d, "x64 asm: immediate out of range");
    416   if (size == 2u) buf[n++] = X64_OPSIZE_PFX;
    417   if (dst.seg) buf[n++] = dst.seg;
    418   n += x64_pack_rex_mem_operand(buf + n, size == 8u, 0, dst);
    419   buf[n++] = use_i8 ? opc8 : opc32;
    420   n += x64_pack_mem_operand(buf + n, sub, dst);
    421   u32 trailing;
    422   if (size == 1u) {
    423     buf[n++] = (u8)imm;
    424     trailing = 1u;
    425   } else if (use_i8) {
    426     buf[n++] = (u8)(i8)imm;
    427     trailing = 1u;
    428   } else if (size == 2u) {
    429     /* 16-bit operand size: a 2-byte immediate (under the 0x66 prefix). */
    430     u16 v = (u16)imm;
    431     buf[n++] = (u8)v;
    432     buf[n++] = (u8)(v >> 8);
    433     trailing = 2u;
    434   } else {
    435     n += x64_put_u32le(buf + n, (u32)(i32)imm);
    436     trailing = 4u;
    437   }
    438   mc->emit_bytes(mc, buf, n);
    439   x64_emit_mem_reloc(d, mc, &dst, trailing);
    440 }
    441 
    442 static void expect_comma(AsmDriver* d) {
    443   if (!asm_driver_eat_comma(d)) asm_driver_panic(d, "x64 asm: expected ','");
    444 }
    445 
    446 static void emit_indirect_branch(MCEmitter* mc, u32 sub, u32 reg) {
    447   u8 op = 0xff;
    448   emit_rex(mc, 0, 0, 0, reg);
    449   mc->emit_bytes(mc, &op, 1);
    450   {
    451     u8 mr = modrm(3u, sub, reg);
    452     mc->emit_bytes(mc, &mr, 1);
    453   }
    454 }
    455 
    456 static void emit_packed(MCEmitter* mc, const u8* bytes, u32 n) {
    457   mc->emit_bytes(mc, bytes, n);
    458 }
    459 
    460 static int byte_reg_needs_rex(const X64AsmOperand* op) {
    461   return op && !op->high8 && op->reg >= 4u;
    462 }
    463 
    464 static void reject_high8_with_rex(AsmDriver* d, const X64AsmOperand* a,
    465                                   const X64AsmOperand* b) {
    466   if ((a && a->high8 && byte_reg_needs_rex(b)) ||
    467       (b && b->high8 && byte_reg_needs_rex(a))) {
    468     asm_driver_panic(d, "x64 asm: high-byte register cannot use REX");
    469   }
    470 }
    471 
    472 static __attribute__((unused)) void emit_movb_rr_operand(AsmDriver* d,
    473                                                          MCEmitter* mc,
    474                                                          X64AsmOperand dst,
    475                                                          X64AsmOperand src) {
    476   u8 ob = 0x88;
    477   reject_high8_with_rex(d, &dst, &src);
    478   if (byte_reg_needs_rex(&dst) || byte_reg_needs_rex(&src))
    479     emit_rex_force(mc, 0, src.reg, 0, dst.reg);
    480   else
    481     emit_rex(mc, 0, src.reg, 0, dst.reg);
    482   mc->emit_bytes(mc, &ob, 1);
    483   {
    484     u8 mr = modrm(3u, src.reg, dst.reg);
    485     mc->emit_bytes(mc, &mr, 1);
    486   }
    487 }
    488 
    489 static __attribute__((unused)) void emit_movb_store_operand(AsmDriver* d,
    490                                                             MCEmitter* mc,
    491                                                             X64AsmOperand src,
    492                                                             X64AsmOperand dst) {
    493   if (src.high8) {
    494     u8 ob = 0x88;
    495     if (dst.no_base || dst.base >= 8u)
    496       asm_driver_panic(d, "x64 asm: high-byte register cannot use REX");
    497     if (dst.seg) mc->emit_bytes(mc, &dst.seg, 1);
    498     mc->emit_bytes(mc, &ob, 1);
    499     emit_mem_operand(mc, src.reg, dst.base, dst.disp);
    500     return;
    501   }
    502   emit_mov_store_operand(d, mc, 1, src.reg, dst, 1);
    503 }
    504 
    505 static __attribute__((unused)) void emit_rm_imm(AsmDriver* d, MCEmitter* mc,
    506                                                 u32 width, u8 opc, u32 sub,
    507                                                 X64AsmOperand dst, i32 imm,
    508                                                 int imm32) {
    509   u8 buf[16];
    510   u32 n = 0;
    511   if (dst.kind != X64_ASM_OP_REG && dst.kind != X64_ASM_OP_MEM)
    512     asm_driver_panic(d, "x64 asm: expected register or memory destination");
    513   if (width == 2u) buf[n++] = X64_OPSIZE_PFX;
    514   if (dst.kind == X64_ASM_OP_REG) {
    515     n += x64_pack_rex(buf + n, width == 8u, 0, 0, dst.reg);
    516     buf[n++] = opc;
    517     buf[n++] = x64_modrm(3u, sub, dst.reg);
    518   } else {
    519     n += x64_pack_rex(buf + n, width == 8u, 0, 0, dst.base);
    520     buf[n++] = opc;
    521     n += x64_pack_mem(buf + n, sub, dst.base, dst.disp);
    522   }
    523   if (imm32)
    524     n += x64_put_u32le(buf + n, (u32)imm);
    525   else
    526     buf[n++] = (u8)(i8)imm;
    527   emit_packed(mc, buf, n);
    528 }
    529 
    530 static __attribute__((unused)) void emit_rm_op(AsmDriver* d, MCEmitter* mc,
    531                                                u32 width, u8 opc, u32 sub,
    532                                                X64AsmOperand dst) {
    533   u8 buf[16];
    534   u32 n = 0;
    535   if (dst.kind != X64_ASM_OP_REG && dst.kind != X64_ASM_OP_MEM)
    536     asm_driver_panic(d, "x64 asm: expected register or memory operand");
    537   if (width == 2u) buf[n++] = X64_OPSIZE_PFX;
    538   if (dst.kind == X64_ASM_OP_REG) {
    539     n += x64_pack_rex(buf + n, width == 8u, 0, 0, dst.reg);
    540     buf[n++] = opc;
    541     buf[n++] = x64_modrm(3u, sub, dst.reg);
    542   } else {
    543     n += x64_pack_rex(buf + n, width == 8u, 0, 0, dst.base);
    544     buf[n++] = opc;
    545     n += x64_pack_mem(buf + n, sub, dst.base, dst.disp);
    546   }
    547   emit_packed(mc, buf, n);
    548 }
    549 
    550 static __attribute__((unused)) void emit_reg_rm_twobyte(
    551     AsmDriver* d, MCEmitter* mc, u32 width, u8 opcode2, u32 dst,
    552     X64AsmOperand src, int force_rex, u8 prefix) {
    553   u8 buf[16];
    554   u32 n = 0;
    555   if (src.kind != X64_ASM_OP_REG && src.kind != X64_ASM_OP_MEM)
    556     asm_driver_panic(d, "x64 asm: expected register or memory source");
    557   if (prefix) buf[n++] = prefix;
    558   if (src.kind == X64_ASM_OP_REG) {
    559     if (force_rex)
    560       n += x64_pack_rex_force(buf + n, width == 8u, dst, 0, src.reg);
    561     else
    562       n += x64_pack_rex(buf + n, width == 8u, dst, 0, src.reg);
    563     buf[n++] = X64_OPC_TWOBYTE;
    564     buf[n++] = opcode2;
    565     buf[n++] = x64_modrm(3u, dst, src.reg);
    566   } else {
    567     /* Route the full memory-operand variety (plain / SIB-indexed / RIP /
    568      * segment) through the shared pack helpers so a SIB index register is
    569      * preserved (e.g. `movzbl (%rcx,%rsi,1), %edx`). */
    570     if (src.seg) buf[n++] = src.seg;
    571     n += x64_pack_rex_mem_operand(buf + n, width == 8u, dst, src);
    572     buf[n++] = X64_OPC_TWOBYTE;
    573     buf[n++] = opcode2;
    574     n += x64_pack_mem_operand(buf + n, dst, src);
    575   }
    576   emit_packed(mc, buf, n);
    577   if (src.kind == X64_ASM_OP_MEM) x64_emit_mem_reloc(d, mc, &src, 0);
    578 }
    579 
    580 /* ====================================================================
    581  * Descriptor-driven mnemonic dispatch.
    582  *
    583  * The disassembler's `x64_insn_table` (src/arch/x64/isa.c) lists every
    584  * encoding kit emits with its X64Format.  We reuse the SAME table for
    585  * the assembler: linear-scan to find the row whose mnemonic matches the
    586  * user's AT&T spelling (after stripping the size suffix b/w/l/q), then
    587  * dispatch to a per-format parser that consumes the operands and calls
    588  * the existing `emit_*` helpers in emit.c.
    589  *
    590  * The width comes from the suffix (or the row's width flags); per-format
    591  * parsers receive it via a small X64ParseCtx so they can pick the right
    592  * emit overload (e.g., MOV r,r at 32 vs 64 bits).
    593  *
    594  * Note: a single mnemonic ("mov") has multiple table rows for different
    595  * formats (MOV_RI, ALU_RR, MOV_RM_LOAD).  We return the FIRST row that
    596  * matches the mnemonic + width filter; per-format parsers that need a
    597  * different row (e.g., MOV imm→reg uses MOV_RI but our scan may have
    598  * returned ALU_RR first) fall through to operand-kind dispatch and
    599  * select the correct emit helper directly.  Phase 1 of this refactor
    600  * only exercises the mnemonics asm.c handled before; richer disambiguation
    601  * lands in follow-ups. */
    602 
    603 #define X64_SFX_NONE 0u
    604 #define X64_SFX_B 1u
    605 #define X64_SFX_W 2u
    606 #define X64_SFX_L 4u
    607 #define X64_SFX_Q 8u
    608 
    609 typedef struct X64MnInfo {
    610   char base[16]; /* stripped mnemonic (table-spelling) */
    611   u32 base_len;
    612   u32 width; /* X64_SFX_* — 0 if mnemonic carries no size letter */
    613   u32 cc;    /* condition nibble for jcc/cmovcc/setcc, or 16 if none */
    614 } X64MnInfo;
    615 
    616 /* Parse the user-supplied mnemonic into (root, width, cc).  Handles:
    617  *   - trailing size letter (b/w/l/q) when the table mnemonic has none
    618  *   - jXX → ("j", cc)
    619  *   - cmovXX[q|l|w|b] → ("cmov", cc, width)
    620  *   - setXX → ("set", cc)
    621  *   - exact-match mnemonics carried verbatim (movslq, movzbl, ud2, ...) */
    622 static int parse_mnemonic(const char* s, size_t n, X64MnInfo* out) {
    623   static const struct {
    624     const char* name;
    625     u8 cc;
    626   } kCC[] = {
    627       /* Two-letter codes first so e.g. "ne" beats "n" if we ever add it. */
    628       {"ae", X64_CC_AE}, {"be", X64_CC_BE}, {"ge", X64_CC_GE},
    629       {"le", X64_CC_LE}, {"ne", X64_CC_NE}, {"no", X64_CC_NO},
    630       {"np", X64_CC_NP}, {"ns", X64_CC_NS}, {"a", X64_CC_A},
    631       {"b", X64_CC_B},   {"e", X64_CC_E},   {"g", X64_CC_G},
    632       {"l", X64_CC_L},   {"o", X64_CC_O},   {"p", X64_CC_P},
    633       {"s", X64_CC_S},
    634   };
    635   out->base_len = 0;
    636   out->width = X64_SFX_NONE;
    637   out->cc = 16u;
    638   if (n == 0 || n >= sizeof out->base) return 0;
    639   memcpy(out->base, s, n);
    640   out->base[n] = '\0';
    641 
    642   /* Exact-match mnemonics that carry their own width letters or are
    643    * already canonical table spellings. */
    644   if (n >= 6 && memcmp(s, "movslq", 6) == 0) {
    645     memcpy(out->base, "movslq", 6);
    646     out->base_len = 6;
    647     out->width = X64_SFX_Q;
    648     return 1;
    649   }
    650   if (n >= 6 && (memcmp(s, "movzbl", 6) == 0 || memcmp(s, "movzwl", 6) == 0 ||
    651                  memcmp(s, "movsbl", 6) == 0 || memcmp(s, "movswl", 6) == 0 ||
    652                  memcmp(s, "movzbq", 6) == 0 || memcmp(s, "movzwq", 6) == 0 ||
    653                  memcmp(s, "movsbq", 6) == 0 || memcmp(s, "movswq", 6) == 0)) {
    654     memcpy(out->base, s, 6);
    655     out->base_len = 6;
    656     return 1;
    657   }
    658   if (n == 3 && memcmp(s, "ud2", 3) == 0) {
    659     out->base_len = 3;
    660     return 1;
    661   }
    662   if (n == 3 && memcmp(s, "nop", 3) == 0) {
    663     out->base_len = 3;
    664     return 1;
    665   }
    666   if (n == 3 && memcmp(s, "ret", 3) == 0) {
    667     out->base_len = 3;
    668     return 1;
    669   }
    670   /* "syscall" ends in 'l' — return early so the generic size-suffix
    671    * stripper below does not mistake it for a movl-style width letter. */
    672   if (n == 7 && memcmp(s, "syscall", 7) == 0) {
    673     out->base_len = 7;
    674     return 1;
    675   }
    676 
    677   /* Indirect-branch spellings carry an explicit 'q' suffix that must be
    678    * preserved — the BR_RM rows in the table are keyed on "jmpq"/"callq". */
    679   if (n == 4 && memcmp(s, "call", 4) == 0) {
    680     memcpy(out->base, "callq", 5);
    681     out->base[5] = '\0';
    682     out->base_len = 5;
    683     out->width = X64_SFX_Q;
    684     return 1;
    685   }
    686   if (n == 4 && memcmp(s, "jmpq", 4) == 0) {
    687     out->base_len = 4;
    688     out->width = X64_SFX_Q;
    689     return 1;
    690   }
    691   if (n == 5 && memcmp(s, "callq", 5) == 0) {
    692     out->base_len = 5;
    693     out->width = X64_SFX_Q;
    694     return 1;
    695   }
    696 
    697   /* CMOVcc: cmov<cc>[suffix].  Strip optional trailing q/l/w/b first. */
    698   if (n >= 5 && memcmp(s, "cmov", 4) == 0) {
    699     size_t after = 4;
    700     size_t tail = n;
    701     char last = s[n - 1];
    702     if (last == 'b' || last == 'w' || last == 'l' || last == 'q') {
    703       out->width = (last == 'b')   ? X64_SFX_B
    704                    : (last == 'w') ? X64_SFX_W
    705                    : (last == 'l') ? X64_SFX_L
    706                                    : X64_SFX_Q;
    707       tail = n - 1;
    708     }
    709     if (tail > after) {
    710       Slice cc = {{s + after}, tail - after};
    711       for (size_t i = 0; i < sizeof kCC / sizeof kCC[0]; ++i) {
    712         if (slice_eq_cstr(cc, kCC[i].name)) {
    713           out->cc = kCC[i].cc;
    714           memcpy(out->base, "cmov", 4);
    715           out->base[4] = '\0';
    716           out->base_len = 4;
    717           return 1;
    718         }
    719       }
    720     }
    721   }
    722 
    723   /* SETcc: set<cc>. */
    724   if (n > 3 && memcmp(s, "set", 3) == 0) {
    725     Slice cc = {{s + 3}, n - 3};
    726     for (size_t i = 0; i < sizeof kCC / sizeof kCC[0]; ++i) {
    727       if (slice_eq_cstr(cc, kCC[i].name)) {
    728         out->cc = kCC[i].cc;
    729         memcpy(out->base, "set", 3);
    730         out->base[3] = '\0';
    731         out->base_len = 3;
    732         return 1;
    733       }
    734     }
    735   }
    736 
    737   /* Jcc: j<cc> — but NOT "jmp" / "jmpq" (handled above). */
    738   if (n > 1 && s[0] == 'j' && !(n >= 3 && s[1] == 'm' && s[2] == 'p')) {
    739     Slice cc = {{s + 1}, n - 1};
    740     for (size_t i = 0; i < sizeof kCC / sizeof kCC[0]; ++i) {
    741       if (slice_eq_cstr(cc, kCC[i].name)) {
    742         out->cc = kCC[i].cc;
    743         out->base[0] = 'j';
    744         out->base[1] = '\0';
    745         out->base_len = 1;
    746         return 1;
    747       }
    748     }
    749   }
    750 
    751   /* Generic: strip trailing size letter b/w/l/q. */
    752   {
    753     char last = s[n - 1];
    754     if (last == 'b' || last == 'w' || last == 'l' || last == 'q') {
    755       out->width = (last == 'b')   ? X64_SFX_B
    756                    : (last == 'w') ? X64_SFX_W
    757                    : (last == 'l') ? X64_SFX_L
    758                                    : X64_SFX_Q;
    759       out->base_len = (u32)(n - 1);
    760       out->base[out->base_len] = '\0';
    761       return 1;
    762     }
    763   }
    764 
    765   out->base_len = (u32)n;
    766   return 1;
    767 }
    768 
    769 /* Width implied by a descriptor row, given the mnemonic's parsed width. */
    770 static u32 row_implied_width(const X64InsnDesc* d) {
    771   if (d->flags & X64_ASMFL_FORCE_W64) return 8u;
    772   if (d->flags & X64_ASMFL_BYTE) return 1u;
    773   if (d->flags & X64_ASMFL_W16) return 2u;
    774   if (d->flags & X64_ASMFL_W_FROM_REX) return 0u; /* any */
    775   if (d->leg_pfx == X64_PFX_66) return 2u;
    776   return 0u; /* any */
    777 }
    778 
    779 /* Linear scan for the first table row whose mnemonic matches `info->base`
    780  * AND whose width filter is compatible.  Returns NULL on miss. */
    781 static const X64InsnDesc* find_mnemonic_row(const X64MnInfo* info) {
    782   u32 want_w = info->width; /* 0 = any */
    783   Slice base = {{info->base}, info->base_len};
    784   for (u32 i = 0; i < x64_insn_table_n; ++i) {
    785     const X64InsnDesc* d = &x64_insn_table[i];
    786     if (!slice_eq(d->mnemonic, base)) continue;
    787     if (want_w != 0) {
    788       u32 rw = row_implied_width(d);
    789       if (rw != 0 && rw != want_w) continue;
    790     }
    791     return d;
    792   }
    793   return NULL;
    794 }
    795 
    796 /* Per-format parse context. */
    797 typedef struct X64ParseCtx {
    798   AsmDriver* d;
    799   MCEmitter* mc;
    800   const X64InsnDesc* desc;
    801   u32 width; /* 1/2/4/8 — derived from suffix or row */
    802   u32 cc;    /* condition nibble (jcc/cmovcc/setcc); 16 if unused */
    803 } X64ParseCtx;
    804 
    805 /* w-bit for emit_rex / emit_alu_rr / emit_mov_rr etc. */
    806 static int width_to_w(u32 w) { return w == 8u ? 1 : 0; }
    807 
    808 /* ---- per-format parsers ---- */
    809 
    810 static void parse_nullary(X64ParseCtx* p) {
    811   /* nop / ret / ud2 / leave / cltd / cqto. */
    812   u8 buf[4];
    813   u32 n = 0;
    814   if (p->desc->leg_pfx) buf[n++] = p->desc->leg_pfx;
    815   if (p->desc->rex_w_req == X64_W_REQ_1) buf[n++] = X64_REX_BASE | X64_REX_W;
    816   for (u32 i = 0; i < p->desc->opc_len; ++i) buf[n++] = p->desc->opc[i];
    817   if (p->desc->opc_len >= 1u) {
    818     p->mc->emit_bytes(p->mc, buf, n);
    819     return;
    820   }
    821   asm_driver_panic(p->d, "x64 asm: nullary form not implemented");
    822 }
    823 
    824 static void parse_br_rm(X64ParseCtx* p) {
    825   /* jmpq *%reg or callq *%reg.  /digit picks sub (2 = call, 4 = jmp). */
    826   X64AsmOperand op = parse_operand(p->d);
    827   if (op.kind != X64_ASM_OP_IND_REG)
    828     asm_driver_panic(p->d, "x64 asm: indirect branch form");
    829   emit_indirect_branch(p->mc, p->desc->modrm_reg, op.reg);
    830 }
    831 
    832 /* Look up the ALU_RM_IMM8 row for a given mnemonic root; the /digit
    833  * picks the operation (0=add, 1=or, 4=and, 5=sub, 6=xor, 7=cmp). */
    834 static const X64InsnDesc* find_alu_imm_row(Slice root) {
    835   for (u32 i = 0; i < x64_insn_table_n; ++i) {
    836     const X64InsnDesc* d = &x64_insn_table[i];
    837     if (d->fmt != X64_FMT_ALU_RM_IMM8) continue;
    838     if (!slice_eq(d->mnemonic, root)) continue;
    839     return d;
    840   }
    841   return NULL;
    842 }
    843 
    844 static void parse_alu_rr(X64ParseCtx* p) {
    845   /* op src, dst  in AT&T.  Row's opc[0] is the ALU opcode (0x01/0x09/...
    846    * 0x31/0x85/0x89).  The byte/16-bit forms are handled by the
    847    * existing emit.c helpers for w=0/w=1 + size suffix; here phase-1
    848    * supports only the regular 32/64 forms used by the prior asm.c. */
    849   X64AsmOperand src;
    850   X64AsmOperand dst;
    851   src = parse_operand(p->d);
    852   expect_comma(p->d);
    853   dst = parse_operand(p->d);
    854 
    855   /* Immediate source → not an ALU_RR encoding.  Redirect to the
    856    * ALU_RM_IMM row for this mnemonic. */
    857   if (src.kind == X64_ASM_OP_IMM &&
    858       (dst.kind == X64_ASM_OP_REG || dst.kind == X64_ASM_OP_MEM)) {
    859     const X64InsnDesc* imm_row = find_alu_imm_row(p->desc->mnemonic);
    860     if (!imm_row) asm_driver_panic(p->d, "x64 asm: no alu-imm row");
    861     if (dst.kind == X64_ASM_OP_MEM) {
    862       emit_rm_imm_store_operand(p->d, p->mc, p->width, X64_OPC_ALU_IMM8,
    863                                 X64_OPC_ALU_IMM32, imm_row->modrm_reg, dst,
    864                                 src.imm, 1);
    865       return;
    866     }
    867     /* Stack-pointer adjustments (`add/sub $imm, %rsp`, 64-bit) always use the
    868      * imm32 form in codegen — the prologue and alloca patch a fixed-width
    869      * placeholder, so they never shrink to imm8 even for a small frame. Match
    870      * that here so `cc -S | as` reproduces codegen's bytes exactly; %rsp is a
    871      * reserved register, so codegen never emits an imm8 ALU op against it. */
    872     if (dst.reg == X64_RSP && p->width == 8u && imm_fits_i32(src.imm))
    873       emit_alu_imm32(p->mc, 1, imm_row->modrm_reg, dst.reg, (i32)src.imm);
    874     else if (imm_fits_i8(src.imm))
    875       emit_alu_imm8(p->mc, width_to_w(p->width), imm_row->modrm_reg, dst.reg,
    876                     (i8)src.imm);
    877     else if (imm_fits_i32(src.imm))
    878       emit_alu_imm32(p->mc, width_to_w(p->width), imm_row->modrm_reg, dst.reg,
    879                      (i32)src.imm);
    880     else
    881       asm_driver_panic(p->d, "x64 asm: alu-imm out of range");
    882     return;
    883   }
    884 
    885   if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_REG) {
    886     u8 op = p->desc->opc[0];
    887     if (p->width == 2u) {
    888       u8 pfx = X64_OPSIZE_PFX;
    889       p->mc->emit_bytes(p->mc, &pfx, 1);
    890     }
    891     if (op == 0x89u) {
    892       /* MOV r/m, r — phase-1 keeps the existing helper. */
    893       emit_mov_rr(p->mc, width_to_w(p->width), dst.reg, src.reg);
    894       return;
    895     }
    896     if (op == 0x88u) {
    897       emit_movb_rr_operand(p->d, p->mc, dst, src);
    898       return;
    899     }
    900     if (op == 0x88u) {
    901       /* MOV r/m8, r8 — byte form (preserved from prior asm.c). */
    902       u8 ob = 0x88;
    903       emit_rex(p->mc, 0, src.reg, 0, dst.reg);
    904       p->mc->emit_bytes(p->mc, &ob, 1);
    905       {
    906         u8 mr = modrm(3u, src.reg, dst.reg);
    907         p->mc->emit_bytes(p->mc, &mr, 1);
    908       }
    909       return;
    910     }
    911     /* xor/test/and/... — emit_alu_rr handles the generic shape. */
    912     emit_alu_rr(p->mc, width_to_w(p->width), op, dst.reg, src.reg);
    913     return;
    914   }
    915   /* MOV r, r/m goes through MOV_RM_LOAD; MOV r, m goes through
    916    * MOV_RM_LOAD (load) or ALU_RR with mem dst (store).  We handle the
    917    * store side here only when the mnemonic is "mov" (opc 0x89). */
    918   if (p->desc->opc[0] == 0x89u && src.kind == X64_ASM_OP_REG &&
    919       dst.kind == X64_ASM_OP_MEM) {
    920     if (p->width == 1u)
    921       emit_movb_store_operand(p->d, p->mc, src, dst);
    922     else
    923       emit_mov_store_operand(p->d, p->mc, p->width, src.reg, dst, 0);
    924     return;
    925   }
    926   if (p->desc->opc[0] == 0x89u && src.kind == X64_ASM_OP_MEM &&
    927       dst.kind == X64_ASM_OP_REG) {
    928     emit_mov_load_operand(p->d, p->mc, p->width, dst.reg, src);
    929     return;
    930   }
    931   /* ALU reg → mem store (add/or/and/sub/xor/cmp %reg, mem): the reg-to-r/m
    932    * /r opcode (opc[0]) with a memory ModR/M.  The byte form clears the
    933    * opcode's W bit (e.g. ADD r/m,r 0x01 → r/m8,r8 0x00). */
    934   if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_MEM) {
    935     u8 op = p->width == 1u ? (u8)(p->desc->opc[0] & ~1u) : p->desc->opc[0];
    936     emit_reg_store_operand(p->d, p->mc, p->width, op, src.reg, dst,
    937                            p->width == 1u && byte_reg_needs_rex(&src));
    938     return;
    939   }
    940   asm_driver_panic(p->d, "x64 asm: unsupported alu_rr form");
    941 }
    942 
    943 static void parse_mov_ri(X64ParseCtx* p) {
    944   X64AsmOperand src;
    945   X64AsmOperand dst;
    946   src = parse_operand(p->d);
    947   expect_comma(p->d);
    948   dst = parse_operand(p->d);
    949   if (src.kind != X64_ASM_OP_IMM ||
    950       (dst.kind != X64_ASM_OP_REG && dst.kind != X64_ASM_OP_MEM))
    951     asm_driver_panic(p->d, "x64 asm: mov-imm form");
    952   /* MOV $imm → mem: C6 /0 (byte) or C7 /0 (32/64 sign-extended imm32). */
    953   if (dst.kind == X64_ASM_OP_MEM) {
    954     if (p->width != 8u && !imm_fits_i32(src.imm))
    955       asm_driver_panic(p->d, "x64 asm: mov immediate out of range");
    956     emit_rm_imm_store_operand(
    957         p->d, p->mc, p->width, X64_OPC_MOV_RM_IMM8,
    958         p->width == 1u ? X64_OPC_MOV_RM_IMM8 : X64_OPC_MOV_RM_IMM32,
    959         X64_MOV_RM_IMM_SUB, dst, src.imm, 0);
    960     return;
    961   }
    962   if (p->width != 4u && p->width != 8u)
    963     asm_driver_panic(p->d, "x64 asm: mov imm only supports l/q forms");
    964   x64_emit_load_imm(p->mc, p->width == 8u ? 1 : 0, dst.reg, src.imm);
    965 }
    966 
    967 static void parse_mov_rm_load(X64ParseCtx* p) {
    968   /* MOV r, r/m (0x8B) or LEA r, m (0x8D).  AT&T order is src, dst.
    969    * Phase-1 covers reg-reg, reg←mem (load) and lea. */
    970   X64AsmOperand src;
    971   X64AsmOperand dst;
    972   src = parse_operand(p->d);
    973   expect_comma(p->d);
    974   dst = parse_operand(p->d);
    975   if (p->desc->opc[0] == 0x8Du) {
    976     if (src.kind != X64_ASM_OP_MEM || dst.kind != X64_ASM_OP_REG)
    977       asm_driver_panic(p->d, "x64 asm: lea form");
    978     emit_reg_mem_operand(p->d, p->mc, p->width, X64_OPC_LEA, dst.reg, src);
    979     return;
    980   }
    981   if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) {
    982     emit_mov_load_operand(p->d, p->mc, p->width, dst.reg, src);
    983     return;
    984   }
    985   if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_REG) {
    986     if (p->width == 2u) {
    987       u8 pfx = X64_OPSIZE_PFX;
    988       p->mc->emit_bytes(p->mc, &pfx, 1);
    989     }
    990     emit_mov_rr(p->mc, width_to_w(p->width), dst.reg, src.reg);
    991     return;
    992   }
    993   asm_driver_panic(p->d, "x64 asm: mov-load form");
    994 }
    995 
    996 static void parse_movsxd(X64ParseCtx* p) {
    997   X64AsmOperand src;
    998   X64AsmOperand dst;
    999   src = parse_operand(p->d);
   1000   expect_comma(p->d);
   1001   dst = parse_operand(p->d);
   1002   if (dst.kind != X64_ASM_OP_REG)
   1003     asm_driver_panic(p->d, "x64 asm: movslq form");
   1004   if (src.kind == X64_ASM_OP_REG) {
   1005     emit_extend_rr(p->mc, 1, 1, 4, dst.reg, src.reg);
   1006   } else if (src.kind == X64_ASM_OP_MEM) {
   1007     u8 buf[16];
   1008     u32 n = x64_mov_rm_load_pack((X64MovRMLoad){.w = 1,
   1009                                                 .opc0 = X64_OPC_MOVSXD,
   1010                                                 .dst = dst.reg,
   1011                                                 .base = src.base,
   1012                                                 .disp = src.disp},
   1013                                  buf);
   1014     emit_packed(p->mc, buf, n);
   1015   } else {
   1016     asm_driver_panic(p->d, "x64 asm: movslq source");
   1017   }
   1018 }
   1019 
   1020 static void parse_alu_rm_imm(X64ParseCtx* p) {
   1021   X64AsmOperand src;
   1022   X64AsmOperand dst;
   1023   src = parse_operand(p->d);
   1024   expect_comma(p->d);
   1025   dst = parse_operand(p->d);
   1026   if (src.kind != X64_ASM_OP_IMM || dst.kind != X64_ASM_OP_REG)
   1027     asm_driver_panic(p->d, "x64 asm: alu-imm form");
   1028   if (imm_fits_i8(src.imm))
   1029     emit_alu_imm8(p->mc, width_to_w(p->width), p->desc->modrm_reg, dst.reg,
   1030                   (i8)src.imm);
   1031   else if (imm_fits_i32(src.imm))
   1032     emit_alu_imm32(p->mc, width_to_w(p->width), p->desc->modrm_reg, dst.reg,
   1033                    (i32)src.imm);
   1034   else
   1035     asm_driver_panic(p->d, "x64 asm: alu-imm out of range");
   1036 }
   1037 
   1038 static void parse_cmovcc(X64ParseCtx* p) {
   1039   X64AsmOperand src;
   1040   X64AsmOperand dst;
   1041   src = parse_operand(p->d);
   1042   expect_comma(p->d);
   1043   dst = parse_operand(p->d);
   1044   if (src.kind != X64_ASM_OP_REG || dst.kind != X64_ASM_OP_REG)
   1045     asm_driver_panic(p->d, "x64 asm: cmovcc form");
   1046   {
   1047     u8 op[2] = {0x0f, (u8)(0x40u | (p->cc & 0xfu))};
   1048     if (p->width == 2u) {
   1049       u8 pfx = X64_OPSIZE_PFX;
   1050       p->mc->emit_bytes(p->mc, &pfx, 1);
   1051     }
   1052     emit_rex(p->mc, width_to_w(p->width), dst.reg, 0, src.reg);
   1053     p->mc->emit_bytes(p->mc, op, 2);
   1054     emit_rm_reg(p->mc, dst.reg, src.reg);
   1055   }
   1056 }
   1057 
   1058 static void parse_push_pop(X64ParseCtx* p) {
   1059   X64AsmOperand op = parse_operand(p->d);
   1060   u8 base = p->desc->opc[0];
   1061   u8 ob;
   1062   if (op.kind != X64_ASM_OP_REG)
   1063     asm_driver_panic(p->d, "x64 asm: push/pop register");
   1064   emit_rex(p->mc, 0, 0, 0, op.reg);
   1065   ob = (u8)(base | (op.reg & 7u));
   1066   p->mc->emit_bytes(p->mc, &ob, 1);
   1067 }
   1068 
   1069 static void parse_movzx_movsx(X64ParseCtx* p) {
   1070   X64AsmOperand src = parse_operand(p->d);
   1071   X64AsmOperand dst;
   1072   expect_comma(p->d);
   1073   dst = parse_operand(p->d);
   1074   if (dst.kind != X64_ASM_OP_REG)
   1075     asm_driver_panic(p->d, "x64 asm: movx dst register");
   1076   /* REX.W follows the destination register width: `movsbq …, %rcx` (64-bit)
   1077    * needs REX.W; `movsbl …, %ecx` (32-bit) does not. The disassembler spells
   1078    * the q/l form from REX.W, so honoring dst width here round-trips it. */
   1079   emit_reg_rm_twobyte(
   1080       p->d, p->mc, dst.width == 8u ? 8u : 4u, p->desc->opc[1], dst.reg, src,
   1081       p->desc->opc[1] == X64_OPC_MOVZX_B || p->desc->opc[1] == X64_OPC_MOVSX_B,
   1082       0);
   1083 }
   1084 
   1085 static void parse_imul_rr(X64ParseCtx* p) {
   1086   X64AsmOperand src = parse_operand(p->d);
   1087   X64AsmOperand dst;
   1088   if (src.kind == X64_ASM_OP_IMM) {
   1089     X64AsmOperand real_src;
   1090     expect_comma(p->d);
   1091     real_src = parse_operand(p->d);
   1092     expect_comma(p->d);
   1093     dst = parse_operand(p->d);
   1094     if (dst.kind != X64_ASM_OP_REG)
   1095       asm_driver_panic(p->d, "x64 asm: imul dst register");
   1096     if (real_src.kind == X64_ASM_OP_REG) {
   1097       if (imm_fits_i8(src.imm))
   1098         emit_imul_imm8(p->mc, width_to_w(p->width), dst.reg, real_src.reg,
   1099                        (i8)src.imm);
   1100       else if (imm_fits_i32(src.imm))
   1101         emit_imul_imm32(p->mc, width_to_w(p->width), dst.reg, real_src.reg,
   1102                         (i32)src.imm);
   1103       else
   1104         asm_driver_panic(p->d, "x64 asm: imul imm out of range");
   1105       return;
   1106     }
   1107     if (real_src.kind == X64_ASM_OP_MEM) {
   1108       u8 buf[16];
   1109       u32 n = 0;
   1110       int imm32 = !imm_fits_i8(src.imm);
   1111       if (imm32 && !imm_fits_i32(src.imm))
   1112         asm_driver_panic(p->d, "x64 asm: imul imm out of range");
   1113       n += x64_pack_rex(buf + n, width_to_w(p->width), dst.reg, 0,
   1114                         real_src.base);
   1115       buf[n++] = imm32 ? X64_OPC_IMUL_IMM32 : X64_OPC_IMUL_IMM8;
   1116       n += x64_pack_mem(buf + n, dst.reg, real_src.base, real_src.disp);
   1117       if (imm32)
   1118         n += x64_put_u32le(buf + n, (u32)(i32)src.imm);
   1119       else
   1120         buf[n++] = (u8)(i8)src.imm;
   1121       emit_packed(p->mc, buf, n);
   1122       return;
   1123     }
   1124     asm_driver_panic(p->d, "x64 asm: imul source");
   1125   }
   1126   expect_comma(p->d);
   1127   dst = parse_operand(p->d);
   1128   if (dst.kind != X64_ASM_OP_REG)
   1129     asm_driver_panic(p->d, "x64 asm: imul dst register");
   1130   emit_reg_rm_twobyte(p->d, p->mc, p->width, X64_OPC_IMUL_2B, dst.reg, src, 0,
   1131                       0);
   1132 }
   1133 
   1134 static void parse_imul_rri(X64ParseCtx* p) {
   1135   X64AsmOperand imm = parse_operand(p->d);
   1136   X64AsmOperand src;
   1137   X64AsmOperand dst;
   1138   if (imm.kind != X64_ASM_OP_IMM) asm_driver_panic(p->d, "x64 asm: imul imm");
   1139   expect_comma(p->d);
   1140   src = parse_operand(p->d);
   1141   expect_comma(p->d);
   1142   dst = parse_operand(p->d);
   1143   if (dst.kind != X64_ASM_OP_REG)
   1144     asm_driver_panic(p->d, "x64 asm: imul dst register");
   1145   if (src.kind == X64_ASM_OP_REG) {
   1146     if (p->desc->opc[0] == X64_OPC_IMUL_IMM8 || imm_fits_i8(imm.imm))
   1147       emit_imul_imm8(p->mc, width_to_w(p->width), dst.reg, src.reg,
   1148                      (i8)imm.imm);
   1149     else if (imm_fits_i32(imm.imm))
   1150       emit_imul_imm32(p->mc, width_to_w(p->width), dst.reg, src.reg,
   1151                       (i32)imm.imm);
   1152     else
   1153       asm_driver_panic(p->d, "x64 asm: imul imm out of range");
   1154     return;
   1155   }
   1156   if (src.kind == X64_ASM_OP_MEM) {
   1157     u8 buf[16];
   1158     u32 n = 0;
   1159     int imm32 = !(p->desc->opc[0] == X64_OPC_IMUL_IMM8 || imm_fits_i8(imm.imm));
   1160     if (imm32 && !imm_fits_i32(imm.imm))
   1161       asm_driver_panic(p->d, "x64 asm: imul imm out of range");
   1162     n += x64_pack_rex(buf + n, width_to_w(p->width), dst.reg, 0, src.base);
   1163     buf[n++] = imm32 ? X64_OPC_IMUL_IMM32 : X64_OPC_IMUL_IMM8;
   1164     n += x64_pack_mem(buf + n, dst.reg, src.base, src.disp);
   1165     if (imm32)
   1166       n += x64_put_u32le(buf + n, (u32)(i32)imm.imm);
   1167     else
   1168       buf[n++] = (u8)(i8)imm.imm;
   1169     emit_packed(p->mc, buf, n);
   1170     return;
   1171   }
   1172   asm_driver_panic(p->d, "x64 asm: imul source");
   1173 }
   1174 
   1175 static void parse_f7_rm(X64ParseCtx* p) {
   1176   X64AsmOperand op = parse_operand(p->d);
   1177   emit_rm_op(p->d, p->mc, p->width, X64_OPC_F7, p->desc->modrm_reg, op);
   1178 }
   1179 
   1180 static void parse_shift(X64ParseCtx* p) {
   1181   X64AsmOperand src = parse_operand(p->d);
   1182   X64AsmOperand dst;
   1183   expect_comma(p->d);
   1184   dst = parse_operand(p->d);
   1185   if (src.kind == X64_ASM_OP_REG && src.reg == X64_RCX && src.width == 1u) {
   1186     emit_rm_op(p->d, p->mc, p->width, X64_OPC_SHIFT_CL, p->desc->modrm_reg,
   1187                dst);
   1188     return;
   1189   }
   1190   if (src.kind != X64_ASM_OP_IMM) asm_driver_panic(p->d, "x64 asm: shift imm");
   1191   emit_rm_imm(p->d, p->mc, p->width, X64_OPC_SHIFT_IMM, p->desc->modrm_reg, dst,
   1192               (i32)src.imm, 0);
   1193 }
   1194 
   1195 static void parse_rel32_branch(X64ParseCtx* p) {
   1196   ObjSymId sym = OBJ_SYM_NONE;
   1197   i64 off = 0;
   1198   u32 disp_pos;
   1199   if (p->desc->fmt == X64_FMT_JCC_REL32) {
   1200     u8 op[2] = {0x0f, (u8)(0x80u | (p->cc & 0xfu))};
   1201     p->mc->emit_bytes(p->mc, op, 2);
   1202   } else {
   1203     u8 op = (p->desc->fmt == X64_FMT_CALL_REL32) ? X64_OPC_CALL_REL32
   1204                                                  : X64_OPC_JMP_REL32;
   1205     p->mc->emit_bytes(p->mc, &op, 1);
   1206   }
   1207   disp_pos = p->mc->pos(p->mc);
   1208   emit_u32le(p->mc, 0);
   1209   asm_driver_parse_sym_expr(p->d, &sym, &off);
   1210   if (sym == OBJ_SYM_NONE)
   1211     asm_driver_panic(p->d, "x64 asm: symbolic branch target required");
   1212   /* A `@PLT` suffix forces the PLT32 reloc (the default for `call`); plain
   1213    * `jmp sym` uses PC32. */
   1214   RelocKind dflt = p->desc->fmt == X64_FMT_CALL_REL32 ? R_X64_PLT32 : R_PC32;
   1215   RelocKind k = x64_parse_reloc_suffix(p->d, dflt);
   1216   if (k != R_X64_PLT32 && k != R_PC32)
   1217     asm_driver_panic(p->d, "x64 asm: only @PLT is valid on a branch target");
   1218   p->mc->emit_reloc_at(p->mc, asm_driver_cur_section(p->d), disp_pos, k, sym,
   1219                        off - 4, 1, 0);
   1220 }
   1221 
   1222 static void parse_setcc(X64ParseCtx* p) {
   1223   X64AsmOperand dst = parse_operand(p->d);
   1224   if (dst.kind == X64_ASM_OP_REG) {
   1225     if (dst.high8) {
   1226       u8 op[2] = {0x0f, (u8)(0x90u | (p->cc & 0xfu))};
   1227       p->mc->emit_bytes(p->mc, op, 2);
   1228       emit_rm_reg(p->mc, 0, dst.reg);
   1229     } else {
   1230       emit_setcc(p->mc, p->cc, dst.reg);
   1231     }
   1232     return;
   1233   }
   1234   if (dst.kind == X64_ASM_OP_MEM) {
   1235     u8 buf[16];
   1236     u32 n = x64_pack_rex(buf, 0, 0, 0, dst.base);
   1237     buf[n++] = X64_OPC_TWOBYTE;
   1238     buf[n++] = (u8)(X64_OPC_SETCC_BASE | (p->cc & 0xfu));
   1239     n += x64_pack_mem(buf + n, 0, dst.base, dst.disp);
   1240     emit_packed(p->mc, buf, n);
   1241     return;
   1242   }
   1243   asm_driver_panic(p->d, "x64 asm: setcc destination");
   1244 }
   1245 
   1246 static void parse_sse_rr(X64ParseCtx* p) {
   1247   X64AsmOperand src = parse_operand(p->d);
   1248   X64AsmOperand dst;
   1249   int cvt_to_int = p->desc->opc[1] == 0x2cu;
   1250   int cvt_from_int = p->desc->opc[1] == 0x2au;
   1251   expect_comma(p->d);
   1252   dst = parse_operand(p->d);
   1253   if (cvt_to_int) {
   1254     /* cvttsd2si/cvttss2si XMM/m -> GPR: REX.W follows the GPR destination
   1255      * width (`%rdx` = 64-bit, `%edx` = 32-bit), not the mnemonic — these rows
   1256      * carry no size suffix. */
   1257     if (dst.kind != X64_ASM_OP_REG)
   1258       asm_driver_panic(p->d, "x64 asm: cvtt dst register");
   1259     u32 gpr_w = dst.width == 8u ? 8u : 4u;
   1260     if (src.kind == X64_ASM_OP_XMM)
   1261       emit_sse_rr_w(p->mc, p->desc->leg_pfx, p->desc->opc[1], width_to_w(gpr_w),
   1262                     dst.reg, src.reg);
   1263     else if (src.kind == X64_ASM_OP_MEM)
   1264       emit_reg_rm_twobyte(p->d, p->mc, gpr_w, p->desc->opc[1], dst.reg, src, 0,
   1265                           p->desc->leg_pfx);
   1266     else
   1267       asm_driver_panic(p->d, "x64 asm: cvtt source");
   1268     return;
   1269   }
   1270   if (cvt_from_int) {
   1271     /* cvtsi2sd/cvtsi2ss GPR/m -> XMM: REX.W follows the GPR source width. */
   1272     if (dst.kind != X64_ASM_OP_XMM)
   1273       asm_driver_panic(p->d, "x64 asm: cvtsi dst xmm");
   1274     if (src.kind == X64_ASM_OP_REG) {
   1275       u32 gpr_w = src.width == 8u ? 8u : 4u;
   1276       emit_sse_rr_w(p->mc, p->desc->leg_pfx, p->desc->opc[1], width_to_w(gpr_w),
   1277                     dst.reg, src.reg);
   1278     } else if (src.kind == X64_ASM_OP_MEM)
   1279       emit_sse_load(p->mc, p->desc->leg_pfx, p->desc->opc[1], dst.reg, src.base,
   1280                     src.disp);
   1281     else
   1282       asm_driver_panic(p->d, "x64 asm: cvtsi source");
   1283     return;
   1284   }
   1285   if (dst.kind == X64_ASM_OP_MEM && src.kind == X64_ASM_OP_XMM &&
   1286       p->desc->opc[1] == 0x10u &&
   1287       (slice_eq_cstr(p->desc->mnemonic, "movsd") ||
   1288        slice_eq_cstr(p->desc->mnemonic, "movss"))) {
   1289     emit_sse_store(p->mc, p->desc->leg_pfx, 0x11, src.reg, dst.base, dst.disp);
   1290     return;
   1291   }
   1292   if (dst.kind == X64_ASM_OP_MEM && src.kind == X64_ASM_OP_XMM &&
   1293       p->desc->opc[1] == 0x28u && slice_eq_cstr(p->desc->mnemonic, "movaps")) {
   1294     emit_sse_store(p->mc, p->desc->leg_pfx, 0x29, src.reg, dst.base, dst.disp);
   1295     return;
   1296   }
   1297   if (dst.kind != X64_ASM_OP_XMM)
   1298     asm_driver_panic(p->d, "x64 asm: sse dst xmm");
   1299   if (src.kind == X64_ASM_OP_XMM)
   1300     emit_sse_rr(p->mc, p->desc->leg_pfx, p->desc->opc[1], dst.reg, src.reg);
   1301   else if (src.kind == X64_ASM_OP_MEM)
   1302     emit_sse_load(p->mc, p->desc->leg_pfx, p->desc->opc[1], dst.reg, src.base,
   1303                   src.disp);
   1304   else
   1305     asm_driver_panic(p->d, "x64 asm: sse source");
   1306 }
   1307 
   1308 static void parse_bswap(X64ParseCtx* p) {
   1309   X64AsmOperand reg = parse_operand(p->d);
   1310   u8 op[2];
   1311   if (reg.kind != X64_ASM_OP_REG) asm_driver_panic(p->d, "x64 asm: bswap reg");
   1312   emit_rex(p->mc, width_to_w(p->width), 0, 0, reg.reg);
   1313   op[0] = 0x0f;
   1314   op[1] = (u8)(0xc8u | (reg.reg & 7u));
   1315   p->mc->emit_bytes(p->mc, op, 2);
   1316 }
   1317 
   1318 static void parse_bs_popcnt(X64ParseCtx* p) {
   1319   X64AsmOperand src = parse_operand(p->d);
   1320   X64AsmOperand dst;
   1321   expect_comma(p->d);
   1322   dst = parse_operand(p->d);
   1323   if (dst.kind != X64_ASM_OP_REG)
   1324     asm_driver_panic(p->d, "x64 asm: bit-scan dst register");
   1325   emit_reg_rm_twobyte(p->d, p->mc, p->width, p->desc->opc[1], dst.reg, src, 0,
   1326                       p->desc->leg_pfx);
   1327 }
   1328 
   1329 static void parse_atomic(X64ParseCtx* p) {
   1330   X64AsmOperand src = parse_operand(p->d);
   1331   X64AsmOperand dst;
   1332   u8 buf[16];
   1333   u32 n = 0;
   1334   expect_comma(p->d);
   1335   dst = parse_operand(p->d);
   1336   if (src.kind != X64_ASM_OP_REG ||
   1337       (dst.kind != X64_ASM_OP_REG && dst.kind != X64_ASM_OP_MEM))
   1338     asm_driver_panic(p->d, "x64 asm: atomic form");
   1339   n += x64_pack_rex(buf + n, width_to_w(p->width), src.reg, 0,
   1340                     dst.kind == X64_ASM_OP_REG ? dst.reg : dst.base);
   1341   if (p->desc->opc_len == 2) {
   1342     buf[n++] = X64_OPC_TWOBYTE;
   1343     buf[n++] = p->desc->opc[1];
   1344   } else {
   1345     buf[n++] = p->desc->opc[0];
   1346   }
   1347   if (dst.kind == X64_ASM_OP_REG)
   1348     buf[n++] = x64_modrm(3u, src.reg, dst.reg);
   1349   else
   1350     n += x64_pack_mem(buf + n, src.reg, dst.base, dst.disp);
   1351   emit_packed(p->mc, buf, n);
   1352 }
   1353 
   1354 static void parse_nop_multi(X64ParseCtx* p) {
   1355   u8 nop6[6] = {X64_NOP6_BYTE0, X64_NOP6_BYTE1, X64_NOP6_BYTE2,
   1356                 X64_NOP6_BYTE3, X64_NOP6_BYTE4, X64_NOP6_BYTE5};
   1357   p->mc->emit_bytes(p->mc, nop6, sizeof nop6);
   1358 }
   1359 
   1360 static void parse_and_emit_for_format(X64ParseCtx* p) {
   1361   switch ((X64Format)p->desc->fmt) {
   1362     case X64_FMT_NULLARY:
   1363       parse_nullary(p);
   1364       return;
   1365     case X64_FMT_NOP_MULTI:
   1366       parse_nop_multi(p);
   1367       return;
   1368     case X64_FMT_PUSH_POP:
   1369       parse_push_pop(p);
   1370       return;
   1371     case X64_FMT_BR_RM:
   1372       parse_br_rm(p);
   1373       return;
   1374     case X64_FMT_ALU_RR:
   1375       parse_alu_rr(p);
   1376       return;
   1377     case X64_FMT_MOV_RI:
   1378       parse_mov_ri(p);
   1379       return;
   1380     case X64_FMT_MOV_RM_LOAD:
   1381       parse_mov_rm_load(p);
   1382       return;
   1383     case X64_FMT_MOVZX_MOVSX:
   1384       parse_movzx_movsx(p);
   1385       return;
   1386     case X64_FMT_MOVSXD:
   1387       parse_movsxd(p);
   1388       return;
   1389     case X64_FMT_ALU_RM_IMM8:
   1390     case X64_FMT_ALU_RM_IMM32:
   1391       parse_alu_rm_imm(p);
   1392       return;
   1393     case X64_FMT_CMOVCC_RR:
   1394       parse_cmovcc(p);
   1395       return;
   1396     case X64_FMT_IMUL_RR:
   1397       parse_imul_rr(p);
   1398       return;
   1399     case X64_FMT_IMUL_RRI:
   1400       parse_imul_rri(p);
   1401       return;
   1402     case X64_FMT_F7_RM:
   1403       parse_f7_rm(p);
   1404       return;
   1405     case X64_FMT_SHIFT_IMM:
   1406     case X64_FMT_SHIFT_CL:
   1407       parse_shift(p);
   1408       return;
   1409     case X64_FMT_JCC_REL32:
   1410     case X64_FMT_JMP_REL32:
   1411     case X64_FMT_CALL_REL32:
   1412       parse_rel32_branch(p);
   1413       return;
   1414     case X64_FMT_SETCC_RM:
   1415       parse_setcc(p);
   1416       return;
   1417     case X64_FMT_SSE_RR:
   1418     case X64_FMT_SSE_LOAD:
   1419     case X64_FMT_SSE_STORE:
   1420       parse_sse_rr(p);
   1421       return;
   1422     case X64_FMT_BSWAP:
   1423       parse_bswap(p);
   1424       return;
   1425     case X64_FMT_BS:
   1426     case X64_FMT_POPCNT:
   1427       parse_bs_popcnt(p);
   1428       return;
   1429     case X64_FMT_XADD_MEM:
   1430     case X64_FMT_XCHG_MEM:
   1431     case X64_FMT_CMPXCHG_MEM:
   1432       parse_atomic(p);
   1433       return;
   1434     default:
   1435       asm_driver_panic(p->d, "x64 asm: format not implemented");
   1436   }
   1437 }
   1438 
   1439 /* Width letter (b/w/l/q) → width in bytes.  Falls back to row-implied
   1440  * width if the suffix is absent. */
   1441 static u32 width_from_info(const X64MnInfo* info, const X64InsnDesc* d) {
   1442   if (info->width != 0) return info->width;
   1443   {
   1444     u32 rw = row_implied_width(d);
   1445     return rw ? rw : 4u;
   1446   }
   1447 }
   1448 
   1449 static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) {
   1450   X64Asm* a = (X64Asm*)base;
   1451   MCEmitter* mc = asm_driver_mc(d);
   1452   Slice mnsl = pool_slice(asm_driver_pool(d), mnemonic);
   1453   const char* p = mnsl.s;
   1454   size_t n = mnsl.len;
   1455   X64MnInfo info;
   1456   const X64InsnDesc* desc;
   1457   X64ParseCtx ctx;
   1458   (void)a;
   1459   (void)asm_driver_cur_section(d);
   1460 
   1461   if (!p || !parse_mnemonic(p, n, &info))
   1462     asm_driver_panic(d, "x64 asm: bad mnemonic");
   1463 
   1464   if (n == 4 && memcmp(p, "lock", 4) == 0) {
   1465     AsmTok next;
   1466     u8 pfx = 0xf0;
   1467     mc->emit_bytes(mc, &pfx, 1);
   1468     next = asm_driver_next(d);
   1469     if (next.kind != ASM_TOK_IDENT)
   1470       asm_driver_panic(d, "x64 asm: lock requires an instruction");
   1471     x64_arch_asm_insn(base, d, next.v.ident);
   1472     return;
   1473   }
   1474 
   1475   /* Special case: imm→reg "mov" still spelled "movl"/"movq" but the
   1476    * generic scan returns ALU_RR (0x89) first.  When we see a "$"
   1477    * immediate as the first operand, we want MOV_RI instead.  Defer this
   1478    * disambiguation to parse_alu_rr would force pre-parsing operands;
   1479    * simpler is to special-case MOV here. */
   1480   if (info.base_len == 3 && memcmp(info.base, "mov", 3) == 0) {
   1481     /* Peek for leading '$' → immediate form. */
   1482     AsmTok t = asm_driver_peek(d);
   1483     if (asm_driver_tok_is_punct(t, '$')) {
   1484       /* Find the MOV_RI row. */
   1485       for (u32 i = 0; i < x64_insn_table_n; ++i) {
   1486         const X64InsnDesc* dr = &x64_insn_table[i];
   1487         if (dr->fmt == X64_FMT_MOV_RI && slice_eq_cstr(dr->mnemonic, "mov")) {
   1488           ctx.d = d;
   1489           ctx.mc = mc;
   1490           ctx.desc = dr;
   1491           ctx.width = info.width ? info.width : 4u;
   1492           ctx.cc = info.cc;
   1493           parse_mov_ri(&ctx);
   1494           return;
   1495         }
   1496       }
   1497     }
   1498     /* For mov reg,mem and mov mem,reg we need MOV_RM_LOAD (0x8B) for
   1499      * the load side.  Easiest: pre-parse src; if mem and dst is reg →
   1500      * MOV_RM_LOAD.  Doing so re-uses the AT&T parser cleanly. */
   1501     {
   1502       X64AsmOperand src = parse_operand(d);
   1503       expect_comma(d);
   1504       {
   1505         X64AsmOperand dst = parse_operand(d);
   1506         u32 w = info.width ? info.width : 4u;
   1507         if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_REG) {
   1508           if (w == 1u) {
   1509             /* MOV r/m8, r8 — opcode 0x88. */
   1510             emit_movb_rr_operand(d, mc, dst, src);
   1511             return;
   1512           }
   1513           if (w == 2u) {
   1514             u8 pfx = X64_OPSIZE_PFX;
   1515             mc->emit_bytes(mc, &pfx, 1);
   1516           }
   1517           emit_mov_rr(mc, width_to_w(w), dst.reg, src.reg);
   1518           return;
   1519         }
   1520         if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_MEM) {
   1521           if (w == 1u)
   1522             emit_movb_store_operand(d, mc, src, dst);
   1523           else
   1524             emit_mov_store_operand(d, mc, w, src.reg, dst, 0);
   1525           return;
   1526         }
   1527         if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) {
   1528           emit_mov_load_operand(d, mc, w, dst.reg, src);
   1529           return;
   1530         }
   1531         /* movd/movq between a GPR and an XMM register: 66 [REX.W] 0F 6E (to
   1532          * xmm) / 7E (to gpr). The xmm is always the ModRM.reg field, the gpr
   1533          * the r/m; movq sets REX.W (w==8), movd does not (w==4). */
   1534         if ((src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_XMM) ||
   1535             (src.kind == X64_ASM_OP_XMM && dst.kind == X64_ASM_OP_REG)) {
   1536           int to_xmm = (dst.kind == X64_ASM_OP_XMM);
   1537           u32 xmm = to_xmm ? dst.reg : src.reg;
   1538           u32 gpr = to_xmm ? src.reg : dst.reg;
   1539           emit_sse_rr_w(mc, X64_OPSIZE_PFX, to_xmm ? 0x6Eu : 0x7Eu,
   1540                         width_to_w(w), xmm, gpr);
   1541           return;
   1542         }
   1543         asm_driver_panic(d, "x64 asm: mov form");
   1544       }
   1545     }
   1546   }
   1547 
   1548   desc = find_mnemonic_row(&info);
   1549   if (!desc) asm_driver_panic(d, "x64 asm: unknown mnemonic");
   1550 
   1551   /* If the user wrote an indirect branch (`*%reg`), prefer the BR_RM row
   1552    * over the rel32 row that may sort first in the table. */
   1553   if (desc->fmt == X64_FMT_CALL_REL32 || desc->fmt == X64_FMT_JMP_REL32) {
   1554     AsmTok t = asm_driver_peek(d);
   1555     if (asm_driver_tok_is_punct(t, '*')) {
   1556       for (u32 i = 0; i < x64_insn_table_n; ++i) {
   1557         const X64InsnDesc* dr = &x64_insn_table[i];
   1558         if (dr->fmt != X64_FMT_BR_RM) continue;
   1559         if (!slice_eq(dr->mnemonic, (Slice){{info.base}, info.base_len}))
   1560           continue;
   1561         desc = dr;
   1562         break;
   1563       }
   1564     }
   1565   }
   1566 
   1567   ctx.d = d;
   1568   ctx.mc = mc;
   1569   ctx.desc = desc;
   1570   ctx.width = width_from_info(&info, desc);
   1571   ctx.cc = info.cc;
   1572   parse_and_emit_for_format(&ctx);
   1573 }
   1574 
   1575 static void x64_arch_asm_destroy(ArchAsm* base) {
   1576   x64_asm_close((X64Asm*)base);
   1577 }
   1578 
   1579 X64Asm* x64_asm_open(Compiler* c) {
   1580   X64Asm* a = arena_new(c->tu, X64Asm);
   1581   memset(a, 0, sizeof *a);
   1582   a->base.insn = x64_arch_asm_insn;
   1583   a->base.destroy = x64_arch_asm_destroy;
   1584   a->c = c;
   1585   return a;
   1586 }
   1587 
   1588 void x64_asm_close(X64Asm* a) { (void)a; }
   1589 
   1590 /* ---- cc -S symbolization hooks (printer <-> this parser) ------------------
   1591  *
   1592  * Inverse of the operand-syntax this parser accepts (parse_rel32_branch,
   1593  * x64_parse_reloc_suffix): how the printer spells a relocated x64 operand so it
   1594  * re-assembles. x64 relocs store addend-4 (rel32 bias), so addend_bias=4 makes
   1595  * the printed offset the symbol offset. R_PC32 covers BOTH a branch target and
   1596  * a RIP-relative lea/mov, so surgery is chosen from the operand text by the
   1597  * printer (an `(%rip)` operand uses RIP surgery); we just supply the modifier.
   1598  * Calls (R_X64_PLT32) print as a bare symbol — both kit-as (call default) and
   1599  * clang resolve a same-TU callee, so execution matches regardless of the exact
   1600  * reloc kind each assembler picks. */
   1601 static int x64_reloc_operand(u16 kind, KitObjFmt fmt, ArchRelocOperand* out) {
   1602   const char* suffix;
   1603   (void)fmt; /* x64 cc -S cross-targets ELF; one spelling */
   1604   switch (kind) {
   1605     case R_PC32:      /* jmp/jcc target, or RIP-relative lea/mov */
   1606     case R_X64_PLT32: /* call target -> bare symbol */
   1607       suffix = "";
   1608       break;
   1609     case R_X64_GOTPCREL:
   1610     case R_X64_GOTPCRELX:
   1611     case R_X64_REX_GOTPCRELX:
   1612       suffix = "@GOTPCREL"; /* RIP-relative GOT load */
   1613       break;
   1614     default:
   1615       return 0; /* data (R_ABS*) via emit_data_range; TLS/etc. unsymbolized */
   1616   }
   1617   out->surg =
   1618       ARCH_RELOC_SURG_TAIL; /* promoted to RIP by the printer if (%rip) */
   1619   out->prefix = "";
   1620   out->suffix = suffix;
   1621   out->addend_bias = 4;
   1622   return 1;
   1623 }
   1624 
   1625 /* Intra-section local branches whose target codegen resolved in place (no
   1626  * relocation): jmp and the Jcc family. Excludes call (always relocated) and
   1627  * indirect/register-form jumps (no numeric target to relabel). */
   1628 static int x64_is_local_branch(KitSlice m) {
   1629   static const char* const br[] = {
   1630       "jmp",
   1631       "jo",
   1632       "jno",
   1633       "jb",
   1634       "jae",
   1635       "je",
   1636       "jne",
   1637       "jbe",
   1638       "ja",
   1639       "js",
   1640       "jns",
   1641       "jp",
   1642       "jnp",
   1643       "jl",
   1644       "jge",
   1645       "jle",
   1646       "jg",
   1647       /* aliases the disassembler may not emit but harmless to accept */
   1648       "jz",
   1649       "jnz",
   1650       "jc",
   1651       "jnc",
   1652   };
   1653   u32 i;
   1654   for (i = 0; i < sizeof br / sizeof br[0]; ++i) {
   1655     size_t n = strlen(br[i]);
   1656     if (m.len == (u32)n && memcmp(m.s, br[i], n) == 0) return 1;
   1657   }
   1658   return 0;
   1659 }
   1660 
   1661 const ArchAsmOps x64_asm_ops = {
   1662     .reloc_operand = x64_reloc_operand,
   1663     .is_local_branch = x64_is_local_branch,
   1664 };
   1665 
   1666 ArchAsm* x64_arch_asm_new(Compiler* c) { return &x64_asm_open(c)->base; }
   1667 
   1668 void x64_inline_bind(X64Asm* a, const AsmConstraint* outs, u32 nout,
   1669                      Operand* out_ops, const AsmConstraint* ins, u32 nin,
   1670                      const Operand* in_ops, const Sym* clobbers, u32 nclob) {
   1671   a->outs = outs;
   1672   a->out_ops = out_ops;
   1673   a->ins = ins;
   1674   a->in_ops = in_ops;
   1675   a->clobbers = clobbers;
   1676   a->nout = nout;
   1677   a->nin = nin;
   1678   a->nclob = nclob;
   1679 }
   1680 
   1681 #define X64_INLINE_LINE_CAP 1024
   1682 
   1683 _Noreturn static void inline_panic(X64Asm* a, const char* msg) {
   1684   SrcLoc loc = {0, 0, 0};
   1685   compiler_panic(a->c, loc, "x64 inline asm: %.*s",
   1686                  SLICE_ARG(slice_from_cstr(msg)));
   1687 }
   1688 
   1689 /* Width selector for x64_reg_spelling: matches the operand-modifier
   1690  * forms recognised by the template walker. */
   1691 #define X64_REG_WIDTH_64 0
   1692 #define X64_REG_WIDTH_32 1
   1693 #define X64_REG_WIDTH_8 2
   1694 #define X64_REG_WIDTH_16 3
   1695 #define X64_REG_WIDTH_H8 4
   1696 
   1697 static void render_xmm(StrBuf* sb, u32 reg) {
   1698   strbuf_putc(sb, '%');
   1699   strbuf_puts(sb, "xmm");
   1700   reg &= 15u;
   1701   if (reg >= 10u) strbuf_putc(sb, (char)('0' + (reg / 10u)));
   1702   strbuf_putc(sb, (char)('0' + (reg % 10u)));
   1703 }
   1704 
   1705 static const char* x64_reg_spelling(u32 reg, int width) {
   1706   static const char* r64[16] = {
   1707       "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
   1708       "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
   1709   };
   1710   static const char* r32[16] = {
   1711       "eax", "ecx", "edx",  "ebx",  "esp",  "ebp",  "esi",  "edi",
   1712       "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
   1713   };
   1714   static const char* r8[16] = {
   1715       "al",  "cl",  "dl",   "bl",   "spl",  "bpl",  "sil",  "dil",
   1716       "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b",
   1717   };
   1718   static const char* r16[16] = {
   1719       "ax",  "cx",  "dx",   "bx",   "sp",   "bp",   "si",   "di",
   1720       "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w",
   1721   };
   1722   static const char* rh8[4] = {"ah", "ch", "dh", "bh"};
   1723   if (width == X64_REG_WIDTH_H8) return reg < 4u ? rh8[reg] : NULL;
   1724   if (width == X64_REG_WIDTH_16) return r16[reg & 15u];
   1725   if (width == X64_REG_WIDTH_8) return r8[reg & 15u];
   1726   if (width == X64_REG_WIDTH_32) return r32[reg & 15u];
   1727   return r64[reg & 15u];
   1728 }
   1729 
   1730 static int x64_type_prefers_32(KitCgTypeId type) {
   1731   if (type == 0) return 0;
   1732   return !type_is_64(type);
   1733 }
   1734 
   1735 static void render_reg(StrBuf* sb, u32 reg, int width) {
   1736   const char* name = x64_reg_spelling(reg, width);
   1737   strbuf_putc(sb, '%');
   1738   if (name) strbuf_puts(sb, name);
   1739 }
   1740 
   1741 static void render_imm(StrBuf* sb, i64 v) {
   1742   strbuf_putc(sb, '$');
   1743   strbuf_put_i64(sb, v);
   1744 }
   1745 
   1746 static void render_indirect(StrBuf* sb, Reg base, i32 ofs) {
   1747   if (ofs) strbuf_put_i64(sb, (i64)ofs);
   1748   strbuf_putc(sb, '(');
   1749   render_reg(sb, (u32)base, X64_REG_WIDTH_64);
   1750   strbuf_putc(sb, ')');
   1751 }
   1752 
   1753 /* Operand-modifier forms used by the template walker. */
   1754 #define X64_FORM_DEFAULT 0
   1755 #define X64_FORM_W 1 /* %w — 16-bit */
   1756 #define X64_FORM_X 2 /* %x — 64-bit */
   1757 #define X64_FORM_A 3 /* %a — address / memory */
   1758 #define X64_FORM_B 4 /* %b — 8-bit (byte) register */
   1759 #define X64_FORM_K 5 /* %k — 32-bit */
   1760 #define X64_FORM_H 6 /* %h — high 8-bit register (a/c/d/b only) */
   1761 
   1762 static char x64_size_suffix_for_operand(X64Asm* a, u32 idx) {
   1763   u32 ntot = a->nout + a->nin;
   1764   const Operand* op;
   1765   u32 size;
   1766   if (idx >= ntot) inline_panic(a, "operand index out of range");
   1767   op = (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout];
   1768   if (op->type)
   1769     size = type_byte_size(op->type);
   1770   else if (op->kind == OPK_IMM)
   1771     size = 4;
   1772   else
   1773     size = 8;
   1774   switch (size) {
   1775     case 1:
   1776       return 'b';
   1777     case 2:
   1778       return 'w';
   1779     case 4:
   1780       return 'l';
   1781     case 8:
   1782       return 'q';
   1783     default:
   1784       inline_panic(a, "%z requires a scalar 1/2/4/8-byte operand");
   1785   }
   1786 }
   1787 
   1788 static void render_operand(X64Asm* a, StrBuf* sb, u32 idx, int form) {
   1789   u32 ntot = a->nout + a->nin;
   1790   const Operand* op;
   1791   if (idx >= ntot) inline_panic(a, "operand index out of range");
   1792   op = (idx < a->nout) ? &a->out_ops[idx] : &a->in_ops[idx - a->nout];
   1793   if (form == X64_FORM_A) {
   1794     if (op->kind != OPK_INDIRECT) inline_panic(a, "%a on non-memory operand");
   1795     if (op->v.ind.index != REG_NONE)
   1796       inline_panic(a, "inline asm: indexed addressing not supported");
   1797     render_indirect(sb, op->v.ind.base, op->v.ind.ofs);
   1798     return;
   1799   }
   1800   if ((form == X64_FORM_B || form == X64_FORM_H) &&
   1801       op->kind != X64_INLINE_OPK_REG) {
   1802     inline_panic(a, "byte-register modifier requires a register operand");
   1803   }
   1804   if (op->kind == X64_INLINE_OPK_REG) {
   1805     int width;
   1806     if (op->pad[0] == X64_INLINE_OPCLS_FP) {
   1807       render_xmm(sb, (u32)op->v.local);
   1808       return;
   1809     }
   1810     if (form == X64_FORM_B)
   1811       width = X64_REG_WIDTH_8;
   1812     else if (form == X64_FORM_H) {
   1813       if (op->v.local > X64_RBX) {
   1814         inline_panic(a, "%h modifier requires ax/cx/dx/bx register");
   1815       }
   1816       width = X64_REG_WIDTH_H8;
   1817     } else if (form == X64_FORM_W)
   1818       width = X64_REG_WIDTH_16;
   1819     else if (form == X64_FORM_K)
   1820       width = X64_REG_WIDTH_32;
   1821     else if (form == X64_FORM_X)
   1822       width = X64_REG_WIDTH_64;
   1823     else
   1824       width =
   1825           x64_type_prefers_32(op->type) ? X64_REG_WIDTH_32 : X64_REG_WIDTH_64;
   1826     render_reg(sb, (u32)op->v.local, width);
   1827     return;
   1828   }
   1829   if (op->kind == OPK_IMM) {
   1830     render_imm(sb, op->v.imm);
   1831     return;
   1832   }
   1833   if (op->kind == OPK_INDIRECT) {
   1834     if (op->v.ind.index != REG_NONE)
   1835       inline_panic(a, "inline asm: indexed addressing not supported");
   1836     render_indirect(sb, op->v.ind.base, op->v.ind.ofs);
   1837     return;
   1838   }
   1839   inline_panic(a, "unsupported operand kind");
   1840 }
   1841 
   1842 static u32 find_named_operand(X64Asm* a, const char* name, size_t len) {
   1843   Sym needle = pool_intern_slice(a->c->global, (Slice){.s = name, .len = len});
   1844   u32 i;
   1845   for (i = 0; i < a->nout; ++i) {
   1846     if (a->outs[i].name == needle) return i;
   1847   }
   1848   for (i = 0; i < a->nin; ++i) {
   1849     if (a->ins[i].name == needle) return a->nout + i;
   1850   }
   1851   inline_panic(a, "%[name] does not match any constraint");
   1852 }
   1853 
   1854 static void run_one_line(X64Asm* a, MCEmitter* mc, const char* text,
   1855                          size_t len) {
   1856   size_t i;
   1857   AsmLexer* lx;
   1858   AsmDriver* d;
   1859   AsmTok t;
   1860   for (i = 0; i < len; ++i) {
   1861     if (text[i] != ' ' && text[i] != '\t') break;
   1862   }
   1863   if (i == len) return;
   1864   lx = asm_lex_open_mem(a->c, "<inline-asm>", text, len);
   1865   d = asm_driver_open_inline(a->c, mc, lx);
   1866   t = asm_driver_peek(d);
   1867   while (t.kind == ASM_TOK_NEWLINE || t.kind == ASM_TOK_HASH) {
   1868     (void)asm_driver_next(d);
   1869     if (t.kind == ASM_TOK_HASH) {
   1870       while (!asm_driver_at_eol(d)) (void)asm_driver_next(d);
   1871     }
   1872     t = asm_driver_peek(d);
   1873   }
   1874   if (t.kind == ASM_TOK_EOF) {
   1875     asm_driver_close_inline(d);
   1876     asm_lex_close(lx);
   1877     return;
   1878   }
   1879   if (t.kind != ASM_TOK_IDENT) inline_panic(a, "expected mnemonic");
   1880   (void)asm_driver_next(d);
   1881   x64_arch_asm_insn(&a->base, d, t.v.ident);
   1882   asm_driver_close_inline(d);
   1883   asm_lex_close(lx);
   1884 }
   1885 
   1886 static void render_and_run_line(X64Asm* a, MCEmitter* mc, StrBuf* sb,
   1887                                 const char* start, const char* end) {
   1888   strbuf_reset(sb);
   1889   for (const char* p = start; p < end; ++p) {
   1890     char c = *p;
   1891     char n;
   1892     int form = 0;
   1893     if (c != '%') {
   1894       strbuf_putc(sb, c);
   1895       continue;
   1896     }
   1897     if (p + 1 >= end) inline_panic(a, "trailing '%' in template");
   1898     n = *(p + 1);
   1899     if (n == '%') {
   1900       strbuf_putc(sb, '%');
   1901       ++p;
   1902       continue;
   1903     }
   1904     if (n == 'w' || n == 'x' || n == 'a' || n == 'b' || n == 'k' || n == 'h' ||
   1905         n == 'z') {
   1906       form = (n == 'w')   ? X64_FORM_W
   1907              : (n == 'x') ? X64_FORM_X
   1908              : (n == 'a') ? X64_FORM_A
   1909              : (n == 'b') ? X64_FORM_B
   1910              : (n == 'k') ? X64_FORM_K
   1911              : (n == 'h') ? X64_FORM_H
   1912                           : -1;
   1913       ++p;
   1914       if (p + 1 >= end) inline_panic(a, "trailing '%' modifier");
   1915       n = *(p + 1);
   1916     }
   1917     if (n == '[') {
   1918       const char* nbeg = p + 2;
   1919       const char* nend = nbeg;
   1920       u32 idx;
   1921       while (nend < end && *nend != ']') ++nend;
   1922       if (nend == end) inline_panic(a, "unterminated %[name]");
   1923       idx = find_named_operand(a, nbeg, (size_t)(nend - nbeg));
   1924       p = nend;
   1925       if (form == -1)
   1926         strbuf_putc(sb, x64_size_suffix_for_operand(a, idx));
   1927       else
   1928         render_operand(a, sb, idx, form);
   1929       continue;
   1930     }
   1931     if (n < '0' || n > '9') inline_panic(a, "expected digit after '%'");
   1932     {
   1933       u32 idx = (u32)(n - '0');
   1934       ++p;
   1935       if (p + 1 < end && *(p + 1) >= '0' && *(p + 1) <= '9') {
   1936         idx = idx * 10u + (u32)(*(p + 1) - '0');
   1937         ++p;
   1938       }
   1939       if (form == -1)
   1940         strbuf_putc(sb, x64_size_suffix_for_operand(a, idx));
   1941       else
   1942         render_operand(a, sb, idx, form);
   1943     }
   1944   }
   1945   if (sb->truncated) inline_panic(a, "inline asm line buffer overflow");
   1946   run_one_line(a, mc, strbuf_cstr(sb), strbuf_len(sb));
   1947 }
   1948 
   1949 void x64_asm_run_template(X64Asm* a, MCEmitter* mc, const char* tmpl) {
   1950   char buf[X64_INLINE_LINE_CAP];
   1951   StrBuf sb;
   1952   const char* line_start;
   1953   int bracket = 0;
   1954   char quote = 0;
   1955   if (!tmpl || !*tmpl) return;
   1956   strbuf_init(&sb, buf, sizeof buf);
   1957   line_start = tmpl;
   1958   for (const char* p = tmpl;; ++p) {
   1959     char c = *p;
   1960     if (c == '\0') {
   1961       render_and_run_line(a, mc, &sb, line_start, p);
   1962       break;
   1963     }
   1964     if (quote) {
   1965       if (c == '\\' && *(p + 1)) {
   1966         ++p;
   1967         continue;
   1968       }
   1969       if (c == quote) quote = 0;
   1970       continue;
   1971     }
   1972     if (c == '"' || c == '\'') {
   1973       quote = c;
   1974       continue;
   1975     }
   1976     if (c == '[') {
   1977       ++bracket;
   1978       continue;
   1979     }
   1980     if (c == ']') {
   1981       if (bracket) --bracket;
   1982       continue;
   1983     }
   1984     if (bracket == 0 && (c == '\n' || c == ';')) {
   1985       render_and_run_line(a, mc, &sb, line_start, p);
   1986       line_start = p + 1;
   1987     }
   1988   }
   1989 }