boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

arm64-asm.c (63965B)


      1 /*************************************************************/
      2 /*
      3  *  ARM64 (AArch64) assembler for TCC — phase 2.
      4  *
      5  *  Phase 1 covered the in-tree .S surface (mov/add/ldr/str/ldp/stp/
      6  *  b/bl/ret/svc, register and simple immediate operands only). Phase 2
      7  *  broadens to roughly riscv64-asm.c parity:
      8  *    DP-imm:  add/sub/cmp/cmn (+set-flags), and/orr/eor/tst (logical),
      9  *             movz/movn/movk, sbfm/ubfm/bfm + lsl/lsr/asr/sxtb/sxth/
     10  *             sxtw/uxtb/uxth aliases.
     11  *    DP-reg:  add/sub/adds/subs (shifted + extended), cmp/cmn/neg/mvn
     12  *             aliases, and/orr/eor/bic/orn (shifted), lslv/lsrv/asrv/rorv
     13  *             (with lsl/lsr/asr/ror reg aliases), mul/mneg/madd/msub +
     14  *             smull/umull family, csel/csinc/csinv/csneg + cset/cinc
     15  *             aliases, sdiv/udiv.
     16  *    Mem:     ldr/str/ldrb/ldrh/ldrsb/ldrsh/ldrsw/strb/strh + register-
     17  *             offset and pre/post-indexed forms; ldp/stp full forms.
     18  *    Branch:  b/bl/ret/br/blr + b.cond/cbz/cbnz/tbz/tbnz (in-section),
     19  *             plus full SVC/BRK/HLT/HVC/SMC/HINT (nop/yield/wfe/wfi…).
     20  *    Pseudo:  ldr Xn, =imm64 → arm64_movimm chain; ldr Xn, =sym →
     21  *             4× MOVW_UABS_G* reloc chain.
     22  *
     23  *  Inline-__asm__ constraint plumbing remains stubbed in the riscv64-asm
     24  *  shape; .S input + top-level __asm__("…") works, constraint-driven
     25  *  asm gen is phase 3.
     26  *
     27  *  arm64_movimm() and arm64_encode_bimm64() live as static helpers in
     28  *  arm64-gen.c — under ONE_SOURCE both arm64-gen.c and this file are
     29  *  pulled into one TU (tcc.h includes them sequentially under the
     30  *  TCC_TARGET_ARM64 block), so we call them directly.
     31  */
     32 
     33 #ifdef TARGET_DEFS_ONLY
     34 
     35 #define CONFIG_TCC_ASM
     36 #define NB_ASM_REGS 32
     37 
     38 ST_FUNC void g(int c);
     39 ST_FUNC void gen_le16(int c);
     40 ST_FUNC void gen_le32(int c);
     41 
     42 /*************************************************************/
     43 #else
     44 /*************************************************************/
     45 #define USING_GLOBALS
     46 #include "tcc.h"
     47 
     48 ST_FUNC void g(int c)
     49 {
     50     int ind1;
     51     if (nocode_wanted)
     52         return;
     53     ind1 = ind + 1;
     54     if (ind1 > cur_text_section->data_allocated)
     55         section_realloc(cur_text_section, ind1);
     56     cur_text_section->data[ind] = c;
     57     ind = ind1;
     58 }
     59 
     60 ST_FUNC void gen_le16(int i)
     61 {
     62     g(i);
     63     g(i >> 8);
     64 }
     65 
     66 ST_FUNC void gen_le32(int i)
     67 {
     68     int ind1;
     69     if (nocode_wanted)
     70         return;
     71     ind1 = ind + 4;
     72     if (ind1 > cur_text_section->data_allocated)
     73         section_realloc(cur_text_section, ind1);
     74     cur_text_section->data[ind++] = i & 0xff;
     75     cur_text_section->data[ind++] = (i >> 8) & 0xff;
     76     cur_text_section->data[ind++] = (i >> 16) & 0xff;
     77     cur_text_section->data[ind++] = (i >> 24) & 0xff;
     78 }
     79 
     80 ST_FUNC void gen_expr32(ExprValue *pe)
     81 {
     82     if (pe->pcrel) {
     83         /* `.long sym - .` style — emit R_AARCH64_PREL32. asm_expr_sum
     84            biased pe->v by +4 (x86 PC32 convention); subtract it back. */
     85         greloca(cur_text_section, pe->sym, ind, R_AARCH64_PREL32, pe->v - 4);
     86         gen_le32(0);
     87     } else if (pe->sym) {
     88         greloca(cur_text_section, pe->sym, ind, R_AARCH64_ABS32, pe->v);
     89         gen_le32(0);
     90     } else {
     91         gen_le32(pe->v);
     92     }
     93 }
     94 
     95 ST_FUNC void gen_expr64(ExprValue *pe)
     96 {
     97     if (pe->pcrel) {
     98         /* `.quad sym - .` / `.quad sym - sym2` (sym2 same-section): the
     99            asm_expr_sum pcrel branch left pe->v biased by +4 (x86 PC32
    100            convention). Compensate so the addend matches gas's
    101            R_AARCH64_PREL64 emission. */
    102         greloca(cur_text_section, pe->sym, ind, R_AARCH64_PREL64, pe->v - 4);
    103         gen_le32(0);
    104         gen_le32(0);
    105     } else if (pe->sym) {
    106         greloca(cur_text_section, pe->sym, ind, R_AARCH64_ABS64, pe->v);
    107         gen_le32(0);
    108         gen_le32(0);
    109     } else {
    110         gen_le32((uint32_t)pe->v);
    111         gen_le32((uint32_t)((uint64_t)pe->v >> 32));
    112     }
    113 }
    114 
    115 /* ---- operand model ------------------------------------------------ */
    116 
    117 #define OP_NONE     0
    118 #define OP_REG      (1 << 0)
    119 #define OP_IMM      (1 << 1)
    120 #define OP_MEM      (1 << 2)
    121 #define OP_COND     (1 << 3)
    122 #define OP_LITERAL  (1 << 4)   /* `=imm` / `=sym` after ldr */
    123 
    124 /* Shift kinds (also the 2-bit shift field for shifted-reg ops). */
    125 #define SH_LSL  0
    126 #define SH_LSR  1
    127 #define SH_ASR  2
    128 #define SH_ROR  3
    129 #define SH_NONE 4
    130 
    131 /* Extend kinds (also the 3-bit option field for extended-reg ops). */
    132 #define EXT_UXTB 0
    133 #define EXT_UXTH 1
    134 #define EXT_UXTW 2
    135 #define EXT_UXTX 3
    136 #define EXT_SXTB 4
    137 #define EXT_SXTH 5
    138 #define EXT_SXTW 6
    139 #define EXT_SXTX 7
    140 #define EXT_NONE 8
    141 
    142 /* Memory addressing modes. */
    143 #define IDX_OFFSET   0   /* [Xn] / [Xn, #imm]              */
    144 #define IDX_PREIDX   1   /* [Xn, #imm]!                    */
    145 #define IDX_POSTIDX  2   /* [Xn], #imm                     */
    146 #define IDX_REGOFF   3   /* [Xn, Xm{,LSL #s|UXTW #s|…}]    */
    147 
    148 typedef struct AArch64Op {
    149     uint32_t kind;
    150     uint8_t  reg;            /* register number (also Rt for OP_MEM Rt forms) */
    151     uint8_t  is_w;           /* 1 = W-form, 0 = X-form */
    152     uint8_t  is_sp;          /* 1 if textual form was sp/wsp (vs zr) */
    153 
    154     /* Shifted/extended register operand decorations. */
    155     uint8_t  shift_kind;     /* SH_*; SH_NONE if none */
    156     uint8_t  shift_amt;
    157     uint8_t  ext_kind;       /* EXT_*; EXT_NONE if none */
    158     uint8_t  ext_amt;        /* shift after extend; 0 if absent */
    159 
    160     /* Memory operand fields (kind == OP_MEM). */
    161     uint8_t  base;
    162     uint8_t  base_is_sp;
    163     uint8_t  idx_reg;
    164     uint8_t  idx_is_w;
    165     uint8_t  indexing;       /* IDX_* */
    166     uint8_t  mem_ext_kind;   /* EXT_*; EXT_NONE for plain LSL or no shift */
    167     uint8_t  mem_ext_amt;    /* lsl/extend amount on register-offset form */
    168     uint8_t  mem_has_shift;  /* 1 if [Xn, Xm, lsl #s] / extend present */
    169 
    170     uint8_t  cond;           /* condition code 0..15 */
    171     ExprValue e;             /* immediate value or label expression */
    172 } AArch64Op;
    173 
    174 /* ---- forward declarations ---------------------------------------- */
    175 
    176 static int arm64_parse_reg(int t, uint8_t *preg, uint8_t *pis_w, uint8_t *pis_sp);
    177 
    178 /* ---- token classification helpers -------------------------------- */
    179 
    180 /* Translate a token to a 4-bit cond code, or -1 if not a cond. */
    181 static int tok_to_cond(int t)
    182 {
    183     if (t >= TOK_ASM_eq && t <= TOK_ASM_nv)
    184         return t - TOK_ASM_eq;
    185     if (t == TOK_ASM_hs) return 2;   /* alias of cs */
    186     if (t == TOK_ASM_lo) return 3;   /* alias of cc */
    187     return -1;
    188 }
    189 
    190 static int tok_to_shift(int t)
    191 {
    192     if (t == TOK_ASM_lsl) return SH_LSL;
    193     if (t == TOK_ASM_lsr) return SH_LSR;
    194     if (t == TOK_ASM_asr) return SH_ASR;
    195     if (t == TOK_ASM_ror) return SH_ROR;
    196     return -1;
    197 }
    198 
    199 static int tok_to_extend(int t)
    200 {
    201     if (t == TOK_ASM_uxtb) return EXT_UXTB;
    202     if (t == TOK_ASM_uxth) return EXT_UXTH;
    203     if (t == TOK_ASM_uxtw) return EXT_UXTW;
    204     if (t == TOK_ASM_uxtx) return EXT_UXTX;
    205     if (t == TOK_ASM_sxtb) return EXT_SXTB;
    206     if (t == TOK_ASM_sxth) return EXT_SXTH;
    207     if (t == TOK_ASM_sxtw) return EXT_SXTW;
    208     if (t == TOK_ASM_sxtx) return EXT_SXTX;
    209     return -1;
    210 }
    211 
    212 /* Recognise a register-name token. Returns 1 if matched. */
    213 static int arm64_parse_reg(int t, uint8_t *preg, uint8_t *pis_w, uint8_t *pis_sp)
    214 {
    215     if (t >= TOK_ASM_x0 && t <= TOK_ASM_x30) {
    216         *preg = t - TOK_ASM_x0; *pis_w = 0; *pis_sp = 0; return 1;
    217     }
    218     if (t == TOK_ASM_sp)  { *preg = 31; *pis_w = 0; *pis_sp = 1; return 1; }
    219     if (t == TOK_ASM_xzr) { *preg = 31; *pis_w = 0; *pis_sp = 0; return 1; }
    220     if (t >= TOK_ASM_w0 && t <= TOK_ASM_w30) {
    221         *preg = t - TOK_ASM_w0; *pis_w = 1; *pis_sp = 0; return 1;
    222     }
    223     if (t == TOK_ASM_wsp) { *preg = 31; *pis_w = 1; *pis_sp = 1; return 1; }
    224     if (t == TOK_ASM_wzr) { *preg = 31; *pis_w = 1; *pis_sp = 0; return 1; }
    225     if (t == TOK_ASM_lr)  { *preg = 30; *pis_w = 0; *pis_sp = 0; return 1; }
    226     if (t == TOK_ASM_fp)  { *preg = 29; *pis_w = 0; *pis_sp = 0; return 1; }
    227     if (t == TOK_ASM_ip0) { *preg = 16; *pis_w = 0; *pis_sp = 0; return 1; }
    228     if (t == TOK_ASM_ip1) { *preg = 17; *pis_w = 0; *pis_sp = 0; return 1; }
    229     return 0;
    230 }
    231 
    232 ST_FUNC int asm_parse_regvar(int t)
    233 {
    234     uint8_t r, w, sp;
    235     if (arm64_parse_reg(t, &r, &w, &sp))
    236         return r;
    237     return -1;
    238 }
    239 
    240 static void asm_skip_comma(void)
    241 {
    242     if (tok == ',') next();
    243     else expect("','");
    244 }
    245 
    246 static void asm_skip_hash(void)
    247 {
    248     if (tok == '#') next();
    249 }
    250 
    251 static int at_end_of_insn(void)
    252 {
    253     return tok == ';' || tok == TOK_LINEFEED || tok == TOK_EOF;
    254 }
    255 
    256 /* Parse `, lsl #n` or `, lsr #n` etc. after a register operand.
    257  * On entry the leading comma has been consumed; tok is the first
    258  * token of the shift specifier. */
    259 static void parse_reg_shift_or_extend(TCCState *s1, AArch64Op *op)
    260 {
    261     int sh, ex;
    262     ExprValue e;
    263     if ((sh = tok_to_shift(tok)) >= 0) {
    264         next();
    265         e.v = 0; e.sym = NULL; e.pcrel = 0;
    266         if (!at_end_of_insn() && tok != ',') {
    267             asm_skip_hash();
    268             asm_expr(s1, &e);
    269         }
    270         if (e.sym)
    271             tcc_error("shift amount must be a constant");
    272         op->shift_kind = sh;
    273         op->shift_amt  = (uint8_t)e.v;
    274         return;
    275     }
    276     if ((ex = tok_to_extend(tok)) >= 0) {
    277         next();
    278         e.v = 0; e.sym = NULL; e.pcrel = 0;
    279         if (!at_end_of_insn() && tok != ',') {
    280             if (tok == '#') asm_skip_hash();
    281             asm_expr(s1, &e);
    282         }
    283         if (e.sym)
    284             tcc_error("extend amount must be a constant");
    285         op->ext_kind = ex;
    286         op->ext_amt  = (uint8_t)e.v;
    287         return;
    288     }
    289     expect("shift / extend specifier");
    290 }
    291 
    292 /* Parse [Xn], [Xn, #imm], [Xn, #imm]!, [Xn], #imm, [Xn, Xm{,extend}]. */
    293 static void parse_mem(TCCState *s1, AArch64Op *op)
    294 {
    295     uint8_t r, w, sp;
    296     skip('[');
    297     if (!arm64_parse_reg(tok, &r, &w, &sp) || w)
    298         expect("64-bit base register");
    299     op->kind          = OP_MEM;
    300     op->base          = r;
    301     op->base_is_sp    = sp;
    302     op->indexing      = IDX_OFFSET;
    303     op->mem_ext_kind  = EXT_NONE;
    304     op->mem_ext_amt   = 0;
    305     op->mem_has_shift = 0;
    306     op->idx_reg       = 0;
    307     op->idx_is_w      = 0;
    308     op->e.v = 0; op->e.sym = NULL; op->e.pcrel = 0;
    309     next();
    310     if (tok == ',') {
    311         next();
    312         if (arm64_parse_reg(tok, &r, &w, &sp) && !sp) {
    313             /* register-offset form */
    314             op->indexing = IDX_REGOFF;
    315             op->idx_reg  = r;
    316             op->idx_is_w = w;
    317             next();
    318             if (tok == ',') {
    319                 next();
    320                 /* Either lsl #imm, or one of the extend keywords (uxtw/sxtw/sxtx). */
    321                 int sh = tok_to_shift(tok);
    322                 int ex = tok_to_extend(tok);
    323                 if (sh == SH_LSL) {
    324                     next();
    325                     asm_skip_hash();
    326                     {
    327                         ExprValue e = {0};
    328                         asm_expr(s1, &e);
    329                         if (e.sym) tcc_error("lsl amount must be constant");
    330                         op->mem_ext_kind  = EXT_NONE;
    331                         op->mem_ext_amt   = (uint8_t)e.v;
    332                         op->mem_has_shift = 1;
    333                     }
    334                 } else if (ex >= 0) {
    335                     next();
    336                     op->mem_ext_kind = (uint8_t)ex;
    337                     op->mem_ext_amt  = 0;
    338                     op->mem_has_shift = 1;
    339                     if (tok == '#') {
    340                         next();
    341                         {
    342                             ExprValue e = {0};
    343                             asm_expr(s1, &e);
    344                             if (e.sym) tcc_error("extend amount must be constant");
    345                             op->mem_ext_amt = (uint8_t)e.v;
    346                         }
    347                     }
    348                 } else {
    349                     expect("lsl / extend specifier");
    350                 }
    351             }
    352         } else {
    353             asm_skip_hash();
    354             asm_expr(s1, &op->e);
    355         }
    356     }
    357     skip(']');
    358     if (tok == '!') {
    359         next();
    360         op->indexing = IDX_PREIDX;
    361     } else if (tok == ',' && op->indexing == IDX_OFFSET
    362                && op->e.v == 0 && op->e.sym == NULL) {
    363         /* post-indexed form: [Xn], #imm — only if no in-bracket disp. */
    364         next();
    365         asm_skip_hash();
    366         asm_expr(s1, &op->e);
    367         op->indexing = IDX_POSTIDX;
    368     }
    369 }
    370 
    371 /* Parse one operand. */
    372 static void parse_operand(TCCState *s1, AArch64Op *op)
    373 {
    374     uint8_t r, w, sp;
    375     int c;
    376 
    377     op->kind        = OP_NONE;
    378     op->reg         = 0;
    379     op->is_w        = 0;
    380     op->is_sp       = 0;
    381     op->shift_kind  = SH_NONE;
    382     op->shift_amt   = 0;
    383     op->ext_kind    = EXT_NONE;
    384     op->ext_amt     = 0;
    385     op->cond        = 0;
    386     op->e.v = 0; op->e.sym = NULL; op->e.pcrel = 0;
    387 
    388     if (arm64_parse_reg(tok, &r, &w, &sp)) {
    389         op->kind = OP_REG;
    390         op->reg = r; op->is_w = w; op->is_sp = sp;
    391         next();
    392         return;
    393     }
    394     if ((c = tok_to_cond(tok)) >= 0) {
    395         op->kind = OP_COND;
    396         op->cond = (uint8_t)c;
    397         next();
    398         return;
    399     }
    400     if (tok == '[') {
    401         parse_mem(s1, op);
    402         return;
    403     }
    404     if (tok == '=') {
    405         /* ldr Xn, =imm  or  ldr Xn, =sym */
    406         next();
    407         asm_expr(s1, &op->e);
    408         op->kind = OP_LITERAL;
    409         return;
    410     }
    411     asm_skip_hash();
    412     asm_expr(s1, &op->e);
    413     op->kind = OP_IMM;
    414 }
    415 
    416 /* ---- bit-field encoding helper ---------------------------------- */
    417 /* arm64_encode_bimm64() is provided by arm64-gen.c (static, same TU). */
    418 
    419 /* ---- encoders ----------------------------------------------------- */
    420 
    421 static uint32_t sf_bit(int is_w) { return is_w ? 0u : (1u << 31); }
    422 
    423 /* ADD/SUB (immediate). is_sub flips the polarity; set_flags sets the S bit. */
    424 static void emit_addsub_imm(int rd, int rn, int64_t imm, int is_w,
    425                             int is_sub, int set_flags)
    426 {
    427     uint32_t op;
    428     if (imm < 0) {
    429         imm = -imm;
    430         is_sub = !is_sub;
    431     }
    432     op = 0x11000000u;                         /* ADD imm  base */
    433     op |= sf_bit(is_w);
    434     if (is_sub)    op |= (1u << 30);
    435     if (set_flags) op |= (1u << 29);
    436     if (imm >= 0 && imm < 4096) {
    437         gen_le32(op | (((uint32_t)imm) << 10) | (rn << 5) | rd);
    438     } else if (imm >= 0 && (imm & 0xfff) == 0 && (imm >> 12) < 4096) {
    439         gen_le32(op | (1u << 22) | (((uint32_t)(imm >> 12)) << 10) | (rn << 5) | rd);
    440     } else {
    441         tcc_error("add/sub immediate out of range");
    442     }
    443 }
    444 
    445 /* ADD/SUB (shifted register). */
    446 static void emit_addsub_reg(int rd, int rn, int rm, int is_w,
    447                             int is_sub, int set_flags,
    448                             int shift_kind, int shift_amt)
    449 {
    450     uint32_t op = 0x0b000000u;                /* base ADD shift-reg */
    451     op |= sf_bit(is_w);
    452     if (is_sub)    op |= (1u << 30);
    453     if (set_flags) op |= (1u << 29);
    454     if (shift_kind == SH_NONE) shift_kind = SH_LSL;
    455     if (shift_kind == SH_ROR)
    456         tcc_error("add/sub: ROR shift not allowed");
    457     if (shift_amt < 0 || shift_amt > (is_w ? 31 : 63))
    458         tcc_error("add/sub: shift amount out of range");
    459     op |= (uint32_t)shift_kind << 22;
    460     op |= (rm << 16) | ((uint32_t)shift_amt << 10) | (rn << 5) | rd;
    461     gen_le32(op);
    462 }
    463 
    464 /* ADD/SUB (extended register). */
    465 static void emit_addsub_ext(int rd, int rn, int rm, int is_w,
    466                             int is_sub, int set_flags,
    467                             int ext_kind, int ext_amt)
    468 {
    469     uint32_t op = 0x0b200000u;                /* base ADD extended-reg */
    470     op |= sf_bit(is_w);
    471     if (is_sub)    op |= (1u << 30);
    472     if (set_flags) op |= (1u << 29);
    473     if (ext_amt < 0 || ext_amt > 4)
    474         tcc_error("add/sub extend: shift out of range");
    475     op |= ((uint32_t)ext_kind & 7u) << 13;
    476     op |= (rm << 16) | ((uint32_t)ext_amt << 10) | (rn << 5) | rd;
    477     gen_le32(op);
    478 }
    479 
    480 /* Logical (immediate): AND/ORR/EOR/ANDS. */
    481 static void emit_log_imm(int rd, int rn, uint64_t imm, int is_w, int op2)
    482 {
    483     /* op2: 0=AND, 1=ORR, 2=EOR, 3=ANDS */
    484     uint32_t insn;
    485     int e;
    486     uint64_t v = is_w ? (imm | imm << 32) : imm;     /* widen for encoder */
    487     e = arm64_encode_bimm64(v);
    488     if (e < 0)
    489         tcc_error("logical immediate not encodable");
    490     insn = 0x12000000u | sf_bit(is_w) | ((uint32_t)op2 << 29) |
    491            ((uint32_t)e << 10) | (rn << 5) | rd;
    492     /* arm64_encode_bimm64 sets bit12 (=N) appropriately for 64-bit;
    493        for 32-bit ops the N bit must be clear, but the widened value
    494        above forces a 32-bit pattern with N=0 already.            */
    495     gen_le32(insn);
    496 }
    497 
    498 /* Logical (shifted register): AND/ORR/EOR/ANDS with optional invert (BIC/ORN/EON/BICS). */
    499 static void emit_log_reg(int rd, int rn, int rm, int is_w,
    500                          int op2, int invert, int shift_kind, int shift_amt)
    501 {
    502     uint32_t op = 0x0a000000u | sf_bit(is_w) | ((uint32_t)op2 << 29);
    503     if (shift_kind == SH_NONE) shift_kind = SH_LSL;
    504     if (shift_amt < 0 || shift_amt > (is_w ? 31 : 63))
    505         tcc_error("logical: shift amount out of range");
    506     op |= (uint32_t)shift_kind << 22;
    507     if (invert) op |= (1u << 21);
    508     op |= (rm << 16) | ((uint32_t)shift_amt << 10) | (rn << 5) | rd;
    509     gen_le32(op);
    510 }
    511 
    512 /* MOVZ/MOVN/MOVK (single 16-bit hword + LSL). */
    513 static void emit_movw(int rd, int hw_imm, int hw_shift, int is_w, int op2)
    514 {
    515     /* op2: 0=MOVN, 2=MOVZ, 3=MOVK */
    516     uint32_t op;
    517     if (hw_imm < 0 || hw_imm > 0xffff)
    518         tcc_error("movz/movn/movk: imm16 out of range");
    519     if ((hw_shift & 0xf) != 0 || hw_shift < 0 || hw_shift > (is_w ? 16 : 48))
    520         tcc_error("movz/movn/movk: shift must be 0/16/32/48");
    521     op = 0x12800000u | sf_bit(is_w) | ((uint32_t)op2 << 29) |
    522          (((uint32_t)hw_shift / 16) << 21) |
    523          ((uint32_t)hw_imm << 5) | rd;
    524     gen_le32(op);
    525 }
    526 
    527 /* SBFM/BFM/UBFM. */
    528 static void emit_bfm(int rd, int rn, int immr, int imms, int is_w, int op2)
    529 {
    530     /* op2: 0=SBFM, 1=BFM, 2=UBFM */
    531     int width = is_w ? 31 : 63;
    532     uint32_t op;
    533     if (immr < 0 || immr > width || imms < 0 || imms > width)
    534         tcc_error("bfm: bit positions out of range");
    535     op = 0x13000000u | sf_bit(is_w) | ((uint32_t)op2 << 29);
    536     if (!is_w) op |= (1u << 22);              /* N bit follows sf in 64-bit forms */
    537     op |= ((uint32_t)immr << 16) | ((uint32_t)imms << 10) | (rn << 5) | rd;
    538     gen_le32(op);
    539 }
    540 
    541 /* Variable-shift (LSLV/LSRV/ASRV/RORV). op2: 8=LSLV, 9=LSRV, 10=ASRV, 11=RORV. */
    542 static void emit_shift_reg(int rd, int rn, int rm, int is_w, int op2)
    543 {
    544     uint32_t op = 0x1ac02000u | sf_bit(is_w) | (rm << 16) |
    545                   ((uint32_t)(op2 & 0xf) << 10) | (rn << 5) | rd;
    546     gen_le32(op);
    547 }
    548 
    549 /* SDIV/UDIV. is_signed=1 => SDIV. */
    550 static void emit_div(int rd, int rn, int rm, int is_w, int is_signed)
    551 {
    552     uint32_t op = 0x1ac00800u | sf_bit(is_w) | (rm << 16) |
    553                   (rn << 5) | rd;
    554     if (is_signed) op |= (1u << 10);
    555     gen_le32(op);
    556 }
    557 
    558 /* MADD/MSUB (32 or 64 bit). is_sub flips the o0 bit. */
    559 static void emit_madd(int rd, int rn, int rm, int ra, int is_w, int is_sub)
    560 {
    561     uint32_t op = 0x1b000000u | sf_bit(is_w) | (rm << 16) |
    562                   (ra << 10) | (rn << 5) | rd;
    563     if (is_sub) op |= (1u << 15);
    564     gen_le32(op);
    565 }
    566 
    567 /* SMADDL/UMADDL/SMSUBL/UMSUBL/SMULL/UMULL (long mul). */
    568 static void emit_madd_long(int rd, int rn, int rm, int ra,
    569                            int is_unsigned, int is_sub)
    570 {
    571     uint32_t op = 0x9b200000u | (rm << 16) | (ra << 10) | (rn << 5) | rd;
    572     if (is_unsigned) op |= (1u << 23);
    573     if (is_sub)      op |= (1u << 15);
    574     gen_le32(op);
    575 }
    576 
    577 /* SMULH / UMULH. */
    578 static void emit_mulh(int rd, int rn, int rm, int is_unsigned)
    579 {
    580     uint32_t op = 0x9b407c00u | (rm << 16) | (rn << 5) | rd;
    581     if (is_unsigned) op |= (1u << 23);
    582     gen_le32(op);
    583 }
    584 
    585 /* CSEL/CSINC/CSINV/CSNEG. op2: 00=SEL, 01=INC, 10=INV, 11=NEG (bit15 invert, bit10 inc/neg). */
    586 static void emit_csel(int rd, int rn, int rm, int cond, int is_w,
    587                       int invert, int inc_neg)
    588 {
    589     uint32_t op = 0x1a800000u | sf_bit(is_w) | (rm << 16) |
    590                   ((uint32_t)(cond & 0xf) << 12) |
    591                   (rn << 5) | rd;
    592     if (invert)  op |= (1u << 30);
    593     if (inc_neg) op |= (1u << 10);
    594     gen_le32(op);
    595 }
    596 
    597 /* LDR/STR (immediate, unsigned offset / unscaled / pre-/post-indexed).
    598  * size: 0=byte,1=halfword,2=word,3=dword. opc encodes load/store/sign:
    599  *   STR=0, LDR=1, LDRSx 64-target=2, LDRSx 32-target=3 (size<3 only).
    600  * For size=3 + opc=0/1 = STR/LDR X.
    601  */
    602 static void emit_ldst_imm(int opc, int size, int rt, int rn,
    603                           int64_t imm, int indexing)
    604 {
    605     uint32_t op;
    606     if (indexing == IDX_OFFSET) {
    607         /* unsigned-offset, scaled by 1<<size, range 0..4095 */
    608         int64_t scale = (int64_t)1 << size;
    609         int64_t scaled;
    610         if (imm < 0 || (imm & (scale - 1)))
    611             tcc_error("ldr/str: immediate offset must be unsigned & scaled");
    612         scaled = imm >> size;
    613         if (scaled < 0 || scaled > 4095)
    614             tcc_error("ldr/str: unsigned offset out of range");
    615         op = 0x39000000u | ((uint32_t)size << 30) | ((uint32_t)opc << 22) |
    616              (((uint32_t)scaled) << 10) | (rn << 5) | rt;
    617         gen_le32(op);
    618         return;
    619     }
    620     /* signed 9-bit forms: post-index, pre-index, or unscaled (LDUR/STUR). */
    621     if (imm < -256 || imm > 255)
    622         tcc_error("ldr/str: signed-9 offset out of range");
    623     op = 0x38000000u | ((uint32_t)size << 30) | ((uint32_t)opc << 22) |
    624          (((uint32_t)imm & 0x1ff) << 12) | (rn << 5) | rt;
    625     if      (indexing == IDX_POSTIDX) op |= (1u << 10);
    626     else if (indexing == IDX_PREIDX)  op |= (3u << 10);
    627     /* IDX_OFFSET with unscaled (LDUR/STUR) leaves these bits zero. */
    628     gen_le32(op);
    629 }
    630 
    631 /* LDR/STR (register offset, with optional shift/extend). */
    632 static void emit_ldst_reg(int opc, int size, int rt, int rn, int rm,
    633                           int idx_is_w, int ext_kind, int ext_amt,
    634                           int has_shift)
    635 {
    636     uint32_t option;
    637     uint32_t s_bit = 0;
    638     if (ext_kind == EXT_NONE) {
    639         /* implicit LSL — option=011 (UXTX) for X-form, 010 (UXTW) for W-form */
    640         option = idx_is_w ? 2 : 3;
    641         if (has_shift) {
    642             if (ext_amt != 0 && ext_amt != size)
    643                 tcc_error("ldr/str: lsl amount must be 0 or %d", size);
    644             if (ext_amt == size && size > 0) s_bit = 1;
    645         }
    646     } else {
    647         option = (uint32_t)ext_kind & 7u;
    648         if (has_shift && ext_amt != 0) {
    649             if (ext_amt != size)
    650                 tcc_error("ldr/str: extend shift must be 0 or %d", size);
    651             s_bit = 1;
    652         }
    653     }
    654     {
    655         uint32_t op = 0x38200800u | ((uint32_t)size << 30) | ((uint32_t)opc << 22) |
    656                       (rm << 16) | (option << 13) | (s_bit << 12) |
    657                       (rn << 5) | rt;
    658         gen_le32(op);
    659     }
    660 }
    661 
    662 /* LDP/STP (X-form / W-form). */
    663 static void emit_ldst_pair(int is_load, int rt1, int rt2, int rn,
    664                            int64_t imm, int indexing, int is_w)
    665 {
    666     uint32_t op = 0x28000000u | ((uint32_t)(is_w ? 0 : 2) << 30);
    667     int shift = is_w ? 2 : 3;
    668     int64_t scaled;
    669     if (imm & ((1 << shift) - 1))
    670         tcc_error("ldp/stp: misaligned offset");
    671     scaled = imm >> shift;
    672     if (scaled < -64 || scaled > 63)
    673         tcc_error("ldp/stp: offset out of range");
    674     if (is_load) op |= (1u << 22);
    675     if      (indexing == IDX_POSTIDX) op |= (1u << 23);
    676     else if (indexing == IDX_PREIDX)  op |= (3u << 23);
    677     else                              op |= (2u << 23);
    678     op |= (((uint32_t)scaled & 0x7f) << 15) |
    679           (rt2 << 10) | (rn << 5) | rt1;
    680     gen_le32(op);
    681 }
    682 
    683 /* B / BL with a label or in-section offset. */
    684 static void emit_branch_imm(AArch64Op *op, int is_call)
    685 {
    686     uint32_t base = is_call ? 0x94000000u : 0x14000000u;
    687     Sym *sym = op->e.sym;
    688     if (sym && sym->r == cur_text_section->sh_num
    689         && !(sym->type.t & VT_EXTERN)) {
    690         int64_t target = (int64_t)sym->jnext + (int64_t)op->e.v;
    691         int64_t off = target - ind;
    692         if (off & 3) tcc_error("branch target not 4-byte aligned");
    693         off >>= 2;
    694         if (off < -(1 << 25) || off >= (1 << 25))
    695             tcc_error("branch target out of range");
    696         gen_le32(base | (((uint32_t)off) & 0x03ffffffu));
    697     } else if (sym) {
    698         int reloc = is_call ? R_AARCH64_CALL26 : R_AARCH64_JUMP26;
    699         greloca(cur_text_section, sym, ind, reloc, op->e.v);
    700         gen_le32(base);
    701     } else {
    702         int64_t off = (int64_t)op->e.v;
    703         if (off & 3) tcc_error("branch target not 4-byte aligned");
    704         off >>= 2;
    705         if (off < -(1 << 25) || off >= (1 << 25))
    706             tcc_error("branch target out of range");
    707         gen_le32(base | (((uint32_t)off) & 0x03ffffffu));
    708     }
    709 }
    710 
    711 /* Resolve a branch-target operand to either a same-section in-range
    712  * immediate offset (returns 1, *poff is set) or a relocation against a
    713  * not-yet-defined symbol (returns 0, *psym set, *poff = original addend).
    714  * Forward references to a label not yet seen produce a fresh symbol
    715  * with r=0 (SHN_UNDEF); the linker fixes them up via R_AARCH64_CONDBR19
    716  * / R_AARCH64_TSTBR14. */
    717 static int sec_branch_offset_or_reloc(AArch64Op *op, int64_t *poff, Sym **psym)
    718 {
    719     Sym *sym = op->e.sym;
    720     int64_t off;
    721     if (sym && sym->r == cur_text_section->sh_num
    722         && !(sym->type.t & VT_EXTERN)) {
    723         off = (int64_t)sym->jnext + (int64_t)op->e.v - (int64_t)ind;
    724         if (off & 3) tcc_error("branch target not 4-byte aligned");
    725         *poff = off;
    726         *psym = NULL;
    727         return 1;
    728     }
    729     if (sym) {
    730         *psym = sym;
    731         *poff = (int64_t)op->e.v;
    732         return 0;
    733     }
    734     off = (int64_t)op->e.v;
    735     if (off & 3) tcc_error("branch target not 4-byte aligned");
    736     *poff = off;
    737     *psym = NULL;
    738     return 1;
    739 }
    740 
    741 /* B.cond (in-section + forward-ref / extern via R_AARCH64_CONDBR19). */
    742 static void emit_branch_cond(int cond, AArch64Op *target)
    743 {
    744     int64_t off; Sym *sym;
    745     uint32_t base = 0x54000000u | (uint32_t)cond;
    746     if (sec_branch_offset_or_reloc(target, &off, &sym)) {
    747         int64_t imm = off >> 2;
    748         if (imm < -(1 << 18) || imm >= (1 << 18))
    749             tcc_error("b.cond: target out of 19-bit range");
    750         gen_le32(base | (((uint32_t)imm & 0x7ffffu) << 5));
    751     } else {
    752         greloca(cur_text_section, sym, ind, R_AARCH64_CONDBR19, off);
    753         gen_le32(base);
    754     }
    755 }
    756 
    757 /* CBZ/CBNZ — same imm19 layout as b.cond, so reuses CONDBR19. */
    758 static void emit_branch_cmp(int rt, AArch64Op *target, int is_w, int op_cbnz)
    759 {
    760     int64_t off; Sym *sym;
    761     uint32_t base = 0x34000000u | sf_bit(is_w) | rt;
    762     if (op_cbnz) base |= (1u << 24);
    763     if (sec_branch_offset_or_reloc(target, &off, &sym)) {
    764         int64_t imm = off >> 2;
    765         if (imm < -(1 << 18) || imm >= (1 << 18))
    766             tcc_error("cbz/cbnz: target out of 19-bit range");
    767         gen_le32(base | (((uint32_t)imm & 0x7ffffu) << 5));
    768     } else {
    769         greloca(cur_text_section, sym, ind, R_AARCH64_CONDBR19, off);
    770         gen_le32(base);
    771     }
    772 }
    773 
    774 /* TBZ/TBNZ. bit_pos in 0..63 (bit5 = b5, bits 4..0 = b40). */
    775 static void emit_branch_test(int rt, int bit_pos, AArch64Op *target, int op_tbnz)
    776 {
    777     int64_t off; Sym *sym;
    778     int b5;
    779     uint32_t base;
    780     if (bit_pos < 0 || bit_pos > 63)
    781         tcc_error("tbz/tbnz: bit position out of range");
    782     b5 = (bit_pos >> 5) & 1;
    783     base = 0x36000000u | ((uint32_t)b5 << 31) |
    784            (((uint32_t)bit_pos & 0x1fu) << 19) | rt;
    785     if (op_tbnz) base |= (1u << 24);
    786     if (sec_branch_offset_or_reloc(target, &off, &sym)) {
    787         int64_t imm = off >> 2;
    788         if (imm < -(1 << 13) || imm >= (1 << 13))
    789             tcc_error("tbz/tbnz: target out of 14-bit range");
    790         gen_le32(base | (((uint32_t)imm & 0x3fffu) << 5));
    791     } else {
    792         greloca(cur_text_section, sym, ind, R_AARCH64_TSTBR14, off);
    793         gen_le32(base);
    794     }
    795 }
    796 
    797 /* BR/BLR/RET — register-indirect branches. op2: 0=BR, 1=BLR, 2=RET. */
    798 static void emit_branch_reg(int rn, int op2)
    799 {
    800     static const uint32_t base[3] = {
    801         0xd61f0000u, 0xd63f0000u, 0xd65f0000u
    802     };
    803     gen_le32(base[op2] | (rn << 5));
    804 }
    805 
    806 /* SVC/HVC/SMC/BRK/HLT — exception-generating with imm16. */
    807 static void emit_excgen(uint32_t base, int64_t imm)
    808 {
    809     if (imm < 0 || imm > 0xffff)
    810         tcc_error("svc/brk imm out of range");
    811     gen_le32(base | (((uint32_t)imm) << 5));
    812 }
    813 
    814 /* Lower `ldr Xn, =imm64` and `ldr Xn, =sym` to a movz/movk chain. */
    815 static void emit_ldr_literal(int rd, AArch64Op *src)
    816 {
    817     if (src->e.sym) {
    818         Sym *sym = src->e.sym;
    819         /* MOVW_UABS_G0_NC, then G1_NC, G2_NC, G3 — full 64-bit address. */
    820         greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G0_NC, src->e.v);
    821         gen_le32(0xd2800000u | rd);                          /* movz */
    822         greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G1_NC, src->e.v);
    823         gen_le32(0xf2a00000u | rd);                          /* movk lsl#16 */
    824         greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G2_NC, src->e.v);
    825         gen_le32(0xf2c00000u | rd);                          /* movk lsl#32 */
    826         greloca(cur_text_section, sym, ind, R_AARCH64_MOVW_UABS_G3,    src->e.v);
    827         gen_le32(0xf2e00000u | rd);                          /* movk lsl#48 */
    828     } else {
    829         arm64_movimm(rd, (uint64_t)src->e.v);
    830     }
    831 }
    832 
    833 /* ---- mnemonic dispatch helpers ----------------------------------- */
    834 
    835 static void need_xreg(AArch64Op *op, const char *what)
    836 {
    837     if (op->kind != OP_REG || op->is_w)
    838         tcc_error("%s: expected 64-bit register", what);
    839 }
    840 
    841 static void need_reg(AArch64Op *op, const char *what)
    842 {
    843     if (op->kind != OP_REG)
    844         tcc_error("%s: expected register", what);
    845 }
    846 
    847 static void check_size_match(AArch64Op *a, AArch64Op *b, const char *what)
    848 {
    849     if (a->is_w != b->is_w)
    850         tcc_error("%s: register size mismatch", what);
    851 }
    852 
    853 /* Lookup table: addsub mnemonics → (is_sub, set_flags). */
    854 static int is_sub_token(int t)
    855 {
    856     return t == TOK_ASM_sub || t == TOK_ASM_subs ||
    857            t == TOK_ASM_cmp || t == TOK_ASM_neg  || t == TOK_ASM_negs;
    858 }
    859 
    860 static int sets_flags_token(int t)
    861 {
    862     return t == TOK_ASM_adds || t == TOK_ASM_subs ||
    863            t == TOK_ASM_cmp  || t == TOK_ASM_cmn  || t == TOK_ASM_negs;
    864 }
    865 
    866 /* Logical op2 from token (for plain logical-imm/reg). */
    867 static int log_op2(int t)
    868 {
    869     switch (t) {
    870     case TOK_ASM_and:                       return 0;
    871     case TOK_ASM_orr: case TOK_ASM_mvn:     return 1;
    872     case TOK_ASM_eor:                       return 2;
    873     case TOK_ASM_ands: case TOK_ASM_tst:    return 3;
    874     case TOK_ASM_bic:                       return 0;
    875     case TOK_ASM_orn:                       return 1;
    876     case TOK_ASM_eon:                       return 2;
    877     case TOK_ASM_bics:                      return 3;
    878     }
    879     return -1;
    880 }
    881 
    882 static int log_inverts(int t)
    883 {
    884     return t == TOK_ASM_bic || t == TOK_ASM_orn ||
    885            t == TOK_ASM_eon || t == TOK_ASM_bics ||
    886            t == TOK_ASM_mvn;
    887 }
    888 
    889 /* ---- per-mnemonic handlers --------------------------------------- */
    890 
    891 static void do_addsub(TCCState *s1, int token)
    892 {
    893     AArch64Op a, b, c;
    894     int is_sub  = is_sub_token(token);
    895     int set_flg = sets_flags_token(token);
    896     int has_dst = !(token == TOK_ASM_cmp || token == TOK_ASM_cmn);
    897     int is_neg  = (token == TOK_ASM_neg || token == TOK_ASM_negs);
    898 
    899     parse_operand(s1, &a);
    900     asm_skip_comma();
    901     parse_operand(s1, &b);
    902     if (!has_dst) {
    903         /* cmp/cmn: a=Rn, b=Rm/imm; encode subs/adds with rd=31 */
    904         c = b;
    905         b = a;
    906         a.kind = OP_REG; a.reg = 31; a.is_w = b.is_w; a.is_sp = 0;
    907     } else if (is_neg) {
    908         /* neg/negs Rd, Rm[, shift]:  sub Rd, xzr, Rm ... */
    909         c = b;
    910         b.kind = OP_REG; b.reg = 31; b.is_w = a.is_w; b.is_sp = 0;
    911     } else {
    912         asm_skip_comma();
    913         parse_operand(s1, &c);
    914     }
    915 
    916     need_reg(&a, "add/sub");
    917     need_reg(&b, "add/sub");
    918     check_size_match(&a, &b, "add/sub");
    919 
    920     if (c.kind == OP_REG) {
    921         check_size_match(&a, &c, "add/sub");
    922         if (tok == ',') {
    923             next();
    924             parse_reg_shift_or_extend(s1, &c);
    925         }
    926         if (c.ext_kind != EXT_NONE) {
    927             emit_addsub_ext(a.reg, b.reg, c.reg, a.is_w, is_sub, set_flg,
    928                             c.ext_kind, c.ext_amt);
    929         } else if (a.is_sp || b.is_sp) {
    930             /* sp uses the extended-reg encoding with default UXTX/UXTW. */
    931             int ext = a.is_w ? EXT_UXTW : EXT_UXTX;
    932             emit_addsub_ext(a.reg, b.reg, c.reg, a.is_w, is_sub, set_flg,
    933                             ext, c.shift_amt);
    934         } else {
    935             emit_addsub_reg(a.reg, b.reg, c.reg, a.is_w, is_sub, set_flg,
    936                             c.shift_kind, c.shift_amt);
    937         }
    938     } else if (c.kind == OP_IMM && !c.e.sym) {
    939         if (tok == ',') {
    940             /* allow `, lsl #12` after the immediate */
    941             next();
    942             if (tok == TOK_ASM_lsl) {
    943                 next();
    944                 asm_skip_hash();
    945                 {
    946                     ExprValue se = {0};
    947                     asm_expr(s1, &se);
    948                     if (se.v == 12) c.e.v <<= 12;
    949                     else if (se.v != 0) tcc_error("add/sub: lsl must be 0 or 12");
    950                 }
    951             } else {
    952                 expect("lsl");
    953             }
    954         }
    955         emit_addsub_imm(a.reg, b.reg, (int64_t)c.e.v, a.is_w, is_sub, set_flg);
    956     } else {
    957         tcc_error("add/sub: unsupported operand");
    958     }
    959 }
    960 
    961 static void do_logical(TCCState *s1, int token)
    962 {
    963     AArch64Op a, b, c;
    964     int op2  = log_op2(token);
    965     int invt = log_inverts(token);
    966     int has_dst = !(token == TOK_ASM_tst);
    967     int is_mvn  = (token == TOK_ASM_mvn);
    968 
    969     parse_operand(s1, &a);
    970     asm_skip_comma();
    971     parse_operand(s1, &b);
    972     if (!has_dst) {
    973         /* tst Rn, Op2  => ands xzr, Rn, Op2 */
    974         c = b;
    975         b = a;
    976         a.kind = OP_REG; a.reg = 31; a.is_w = b.is_w; a.is_sp = 0;
    977     } else if (is_mvn) {
    978         /* mvn Rd, Rm => orn Rd, xzr, Rm */
    979         c = b;
    980         b.kind = OP_REG; b.reg = 31; b.is_w = a.is_w; b.is_sp = 0;
    981     } else {
    982         asm_skip_comma();
    983         parse_operand(s1, &c);
    984     }
    985 
    986     need_reg(&a, "logical");
    987     need_reg(&b, "logical");
    988     check_size_match(&a, &b, "logical");
    989 
    990     if (c.kind == OP_REG) {
    991         check_size_match(&a, &c, "logical");
    992         if (tok == ',') {
    993             next();
    994             parse_reg_shift_or_extend(s1, &c);
    995             if (c.ext_kind != EXT_NONE)
    996                 tcc_error("logical: extend not supported");
    997         }
    998         emit_log_reg(a.reg, b.reg, c.reg, a.is_w, op2, invt,
    999                      c.shift_kind, c.shift_amt);
   1000     } else if (c.kind == OP_IMM && !c.e.sym) {
   1001         if (invt)
   1002             tcc_error("logical: invert form requires a register");
   1003         emit_log_imm(a.reg, b.reg, (uint64_t)c.e.v, a.is_w, op2);
   1004     } else {
   1005         tcc_error("logical: unsupported operand");
   1006     }
   1007 }
   1008 
   1009 static void do_movw(TCCState *s1, int token)
   1010 {
   1011     AArch64Op a, b;
   1012     int op2 = (token == TOK_ASM_movn) ? 0 :
   1013               (token == TOK_ASM_movz) ? 2 : 3;            /* movk */
   1014     int hw_shift = 0;
   1015     parse_operand(s1, &a);
   1016     asm_skip_comma();
   1017     parse_operand(s1, &b);
   1018     need_reg(&a, "movz/movn/movk");
   1019     if (b.kind != OP_IMM)
   1020         tcc_error("movz/movn/movk: expected immediate");
   1021     if (b.e.sym)
   1022         tcc_error("movz/movn/movk: symbol immediate not supported");
   1023     if (tok == ',') {
   1024         next();
   1025         if (tok == TOK_ASM_lsl) {
   1026             next();
   1027             asm_skip_hash();
   1028             {
   1029                 ExprValue se = {0};
   1030                 asm_expr(s1, &se);
   1031                 hw_shift = (int)se.v;
   1032             }
   1033         } else {
   1034             expect("lsl");
   1035         }
   1036     }
   1037     emit_movw(a.reg, (int)b.e.v, hw_shift, a.is_w, op2);
   1038 }
   1039 
   1040 /* mov — phase 2 expanded handler. */
   1041 static void do_mov(TCCState *s1)
   1042 {
   1043     AArch64Op a, b;
   1044     parse_operand(s1, &a);
   1045     asm_skip_comma();
   1046     parse_operand(s1, &b);
   1047     if (a.kind != OP_REG)
   1048         tcc_error("mov: destination must be a register");
   1049     if (b.kind == OP_REG) {
   1050         check_size_match(&a, &b, "mov");
   1051         if (a.is_sp || b.is_sp) {
   1052             /* mov sp/Rn, sp/Rn  =>  add Rd, Rn, #0 */
   1053             emit_addsub_imm(a.reg, b.reg, 0, a.is_w, 0, 0);
   1054         } else {
   1055             /* mov Rd, Rm  =>  orr Rd, xzr, Rm */
   1056             emit_log_reg(a.reg, 31, b.reg, a.is_w, 1 /*ORR*/, 0,
   1057                          SH_LSL, 0);
   1058         }
   1059     } else if (b.kind == OP_IMM) {
   1060         if (b.e.sym)
   1061             tcc_error("mov: symbol immediate not supported (use ldr =sym)");
   1062         if (a.is_sp)
   1063             tcc_error("mov sp, #imm: use add");
   1064         if (a.is_w) {
   1065             uint64_t v = (uint32_t)b.e.v;
   1066             uint32_t insn;
   1067             int e;
   1068             /* try movz w(r),#x;  movn w(r),#~x;  orr w(r), wzr, #imm */
   1069             if (!(v & ~0xffffull)) {
   1070                 gen_le32(0x52800000u | a.reg | ((uint32_t)v << 5));
   1071                 return;
   1072             }
   1073             if (!((~v & 0xffffffffu) & ~0xffffull)) {
   1074                 gen_le32(0x12800000u | a.reg | ((uint32_t)(~v & 0xffff) << 5));
   1075                 return;
   1076             }
   1077             e = arm64_encode_bimm64((v & 0xffffffffu) | (v << 32));
   1078             if (e >= 0) {
   1079                 insn = 0x320003e0u | a.reg | ((uint32_t)e << 10);
   1080                 gen_le32(insn);
   1081                 return;
   1082             }
   1083             tcc_error("mov w#imm: value not encodable");
   1084         } else {
   1085             arm64_movimm(a.reg, (uint64_t)b.e.v);
   1086         }
   1087     } else {
   1088         tcc_error("mov: unsupported source operand");
   1089     }
   1090 }
   1091 
   1092 /* shift-imm aliases (lsl/lsr/asr/ror imm) and shift-reg aliases.
   1093  * Detected when the third operand is OP_IMM (alias to bfm) or OP_REG
   1094  * (alias to lslv/lsrv/asrv/rorv). */
   1095 static void do_shift(TCCState *s1, int token)
   1096 {
   1097     AArch64Op a, b, c;
   1098     int sh = (token == TOK_ASM_lsl) ? SH_LSL :
   1099              (token == TOK_ASM_lsr) ? SH_LSR :
   1100              (token == TOK_ASM_asr) ? SH_ASR : SH_ROR;
   1101     parse_operand(s1, &a);
   1102     asm_skip_comma();
   1103     parse_operand(s1, &b);
   1104     asm_skip_comma();
   1105     parse_operand(s1, &c);
   1106     need_reg(&a, "shift");
   1107     need_reg(&b, "shift");
   1108     check_size_match(&a, &b, "shift");
   1109     if (c.kind == OP_REG) {
   1110         check_size_match(&a, &c, "shift");
   1111         emit_shift_reg(a.reg, b.reg, c.reg, a.is_w,
   1112                        sh == SH_LSL ? 8 :
   1113                        sh == SH_LSR ? 9 :
   1114                        sh == SH_ASR ? 10 : 11);
   1115         return;
   1116     }
   1117     if (c.kind != OP_IMM || c.e.sym)
   1118         tcc_error("shift: expected reg or immediate");
   1119     {
   1120         int width = a.is_w ? 32 : 64;
   1121         int shamt = (int)c.e.v;
   1122         int immr, imms, op2;
   1123         if (sh == SH_ROR) {
   1124             /* extr Rd, Rn, Rn, #imm — encode via EXTR (we don't have a
   1125                separate emitter, so error out for now) */
   1126             tcc_error("ror imm: not supported (use rorv)");
   1127         }
   1128         if (shamt < 0 || shamt >= width)
   1129             tcc_error("shift amount out of range");
   1130         if (sh == SH_LSL) {
   1131             immr = (-shamt) & (width - 1);
   1132             imms = (width - 1) - shamt;
   1133             op2  = 2;          /* UBFM */
   1134         } else if (sh == SH_LSR) {
   1135             immr = shamt;
   1136             imms = width - 1;
   1137             op2  = 2;          /* UBFM */
   1138         } else /* SH_ASR */ {
   1139             immr = shamt;
   1140             imms = width - 1;
   1141             op2  = 0;          /* SBFM */
   1142         }
   1143         emit_bfm(a.reg, b.reg, immr, imms, a.is_w, op2);
   1144     }
   1145 }
   1146 
   1147 /* Bitfield mnemonic handler (sbfm/ubfm/bfm). */
   1148 static void do_bfm(TCCState *s1, int token)
   1149 {
   1150     AArch64Op a, b, c, d;
   1151     int op2 = (token == TOK_ASM_sbfm) ? 0 :
   1152               (token == TOK_ASM_bfm)  ? 1 : 2;
   1153     parse_operand(s1, &a);
   1154     asm_skip_comma();
   1155     parse_operand(s1, &b);
   1156     asm_skip_comma();
   1157     parse_operand(s1, &c);
   1158     asm_skip_comma();
   1159     parse_operand(s1, &d);
   1160     need_reg(&a, "bfm");
   1161     need_reg(&b, "bfm");
   1162     check_size_match(&a, &b, "bfm");
   1163     if (c.kind != OP_IMM || d.kind != OP_IMM || c.e.sym || d.e.sym)
   1164         tcc_error("bfm: immr/imms must be constants");
   1165     emit_bfm(a.reg, b.reg, (int)c.e.v, (int)d.e.v, a.is_w, op2);
   1166 }
   1167 
   1168 /* Sign/zero-extend aliases (sxtb/sxth/sxtw/uxtb/uxth) when used as mnemonics. */
   1169 static void do_extend_alias(TCCState *s1, int token)
   1170 {
   1171     AArch64Op a, b;
   1172     int immr = 0, imms, op2, is_w;
   1173     parse_operand(s1, &a);
   1174     asm_skip_comma();
   1175     parse_operand(s1, &b);
   1176     need_reg(&a, "ext alias");
   1177     need_reg(&b, "ext alias");
   1178     /* sxtw is only valid for X-form. uxtb/uxth/sxtb/sxth follow Rd. */
   1179     is_w = a.is_w;
   1180     switch (token) {
   1181     case TOK_ASM_sxtb: imms = 7;  op2 = 0; break;
   1182     case TOK_ASM_sxth: imms = 15; op2 = 0; break;
   1183     case TOK_ASM_sxtw: imms = 31; op2 = 0; is_w = 0; break;
   1184     case TOK_ASM_uxtb: imms = 7;  op2 = 2; break;
   1185     case TOK_ASM_uxth: imms = 15; op2 = 2; break;
   1186     default: tcc_error("internal: bad extend alias"); return;
   1187     }
   1188     emit_bfm(a.reg, b.reg, immr, imms, is_w, op2);
   1189 }
   1190 
   1191 /* mul/mneg/madd/msub family. */
   1192 static void do_mul(TCCState *s1, int token)
   1193 {
   1194     AArch64Op a, b, c, d;
   1195     int is_sub = (token == TOK_ASM_msub || token == TOK_ASM_mneg);
   1196     int has_ra = (token == TOK_ASM_madd || token == TOK_ASM_msub);
   1197     parse_operand(s1, &a);
   1198     asm_skip_comma();
   1199     parse_operand(s1, &b);
   1200     asm_skip_comma();
   1201     parse_operand(s1, &c);
   1202     if (has_ra) {
   1203         asm_skip_comma();
   1204         parse_operand(s1, &d);
   1205         need_reg(&d, "madd/msub");
   1206     } else {
   1207         d.kind = OP_REG; d.reg = 31; d.is_w = a.is_w; d.is_sp = 0;
   1208     }
   1209     need_reg(&a, "mul"); need_reg(&b, "mul"); need_reg(&c, "mul");
   1210     check_size_match(&a, &b, "mul");
   1211     check_size_match(&a, &c, "mul");
   1212     emit_madd(a.reg, b.reg, c.reg, d.reg, a.is_w, is_sub);
   1213 }
   1214 
   1215 /* smull/umull/smnegl/umnegl/smaddl/umaddl/smsubl/umsubl. */
   1216 static void do_mul_long(TCCState *s1, int token)
   1217 {
   1218     AArch64Op a, b, c, d;
   1219     int is_unsigned, is_sub, has_ra;
   1220     is_unsigned = (token == TOK_ASM_umull || token == TOK_ASM_umnegl ||
   1221                    token == TOK_ASM_umaddl || token == TOK_ASM_umsubl);
   1222     is_sub      = (token == TOK_ASM_smnegl || token == TOK_ASM_umnegl ||
   1223                    token == TOK_ASM_smsubl || token == TOK_ASM_umsubl);
   1224     has_ra      = (token == TOK_ASM_smaddl || token == TOK_ASM_umaddl ||
   1225                    token == TOK_ASM_smsubl || token == TOK_ASM_umsubl);
   1226     parse_operand(s1, &a);
   1227     asm_skip_comma();
   1228     parse_operand(s1, &b);
   1229     asm_skip_comma();
   1230     parse_operand(s1, &c);
   1231     if (has_ra) {
   1232         asm_skip_comma();
   1233         parse_operand(s1, &d);
   1234         need_xreg(&d, "smaddl/umaddl");
   1235     } else {
   1236         d.kind = OP_REG; d.reg = 31; d.is_w = 0; d.is_sp = 0;
   1237     }
   1238     need_xreg(&a, "smull/umull");
   1239     if (b.kind != OP_REG || !b.is_w)
   1240         tcc_error("smull/umull: source must be W");
   1241     if (c.kind != OP_REG || !c.is_w)
   1242         tcc_error("smull/umull: source must be W");
   1243     emit_madd_long(a.reg, b.reg, c.reg, d.reg, is_unsigned, is_sub);
   1244 }
   1245 
   1246 /* CSEL/CSINC/CSINV/CSNEG and aliases (cset/cinc/cinv/cneg/csetm). */
   1247 static void do_csel(TCCState *s1, int token)
   1248 {
   1249     AArch64Op a, b, c, d;
   1250     int invert  = (token == TOK_ASM_csinv || token == TOK_ASM_csneg ||
   1251                    token == TOK_ASM_csetm || token == TOK_ASM_cinv);
   1252     int inc_neg = (token == TOK_ASM_csinc || token == TOK_ASM_csneg ||
   1253                    token == TOK_ASM_cset  || token == TOK_ASM_csetm ||
   1254                    token == TOK_ASM_cinc  || token == TOK_ASM_cneg);
   1255     int alias_dst_only = (token == TOK_ASM_cset || token == TOK_ASM_csetm);
   1256     int alias_two_src  = (token == TOK_ASM_cinc || token == TOK_ASM_cinv ||
   1257                           token == TOK_ASM_cneg);
   1258 
   1259     parse_operand(s1, &a);
   1260     asm_skip_comma();
   1261     if (alias_dst_only) {
   1262         /* cset Rd, cond  =>  csinc Rd, xzr, xzr, !cond */
   1263         parse_operand(s1, &d);             /* d = cond */
   1264         need_reg(&a, "cset/csetm");
   1265         if (d.kind != OP_COND) tcc_error("cset: expected cond");
   1266         b.kind = OP_REG; b.reg = 31; b.is_w = a.is_w; b.is_sp = 0;
   1267         c = b;
   1268         emit_csel(a.reg, b.reg, c.reg, d.cond ^ 1, a.is_w, invert, inc_neg);
   1269         return;
   1270     }
   1271     parse_operand(s1, &b);
   1272     asm_skip_comma();
   1273     if (alias_two_src) {
   1274         /* cinc Rd, Rn, cond  =>  csinc Rd, Rn, Rn, !cond  (also cinv/cneg) */
   1275         parse_operand(s1, &d);
   1276         if (d.kind != OP_COND) tcc_error("cinc/cinv/cneg: expected cond");
   1277         need_reg(&a, "cinc"); need_reg(&b, "cinc");
   1278         check_size_match(&a, &b, "cinc");
   1279         emit_csel(a.reg, b.reg, b.reg, d.cond ^ 1, a.is_w, invert, inc_neg);
   1280         return;
   1281     }
   1282     parse_operand(s1, &c);
   1283     asm_skip_comma();
   1284     parse_operand(s1, &d);
   1285     need_reg(&a, "csel"); need_reg(&b, "csel"); need_reg(&c, "csel");
   1286     if (d.kind != OP_COND) tcc_error("csel: expected cond");
   1287     check_size_match(&a, &b, "csel"); check_size_match(&a, &c, "csel");
   1288     emit_csel(a.reg, b.reg, c.reg, d.cond, a.is_w, invert, inc_neg);
   1289 }
   1290 
   1291 /* sdiv/udiv. */
   1292 static void do_div(TCCState *s1, int token)
   1293 {
   1294     AArch64Op a, b, c;
   1295     parse_operand(s1, &a); asm_skip_comma();
   1296     parse_operand(s1, &b); asm_skip_comma();
   1297     parse_operand(s1, &c);
   1298     need_reg(&a, "div"); need_reg(&b, "div"); need_reg(&c, "div");
   1299     check_size_match(&a, &b, "div"); check_size_match(&a, &c, "div");
   1300     emit_div(a.reg, b.reg, c.reg, a.is_w, token == TOK_ASM_sdiv);
   1301 }
   1302 
   1303 /* Generic ldr/str dispatcher. opc/size encode the variant. */
   1304 static void do_ldst(TCCState *s1, int token, int opc, int size)
   1305 {
   1306     AArch64Op a, b;
   1307     parse_operand(s1, &a);
   1308     asm_skip_comma();
   1309     parse_operand(s1, &b);
   1310     need_reg(&a, "ldr/str");
   1311 
   1312     /* `ldr Xn, =imm` / `ldr Xn, =sym` */
   1313     if (b.kind == OP_LITERAL) {
   1314         if (token != TOK_ASM_ldr)
   1315             tcc_error("=literal only valid with ldr");
   1316         emit_ldr_literal(a.reg, &b);
   1317         return;
   1318     }
   1319     if (b.kind != OP_MEM)
   1320         tcc_error("ldr/str: expected memory operand");
   1321     if (b.indexing == IDX_REGOFF) {
   1322         emit_ldst_reg(opc, size, a.reg, b.base, b.idx_reg,
   1323                       b.idx_is_w, b.mem_ext_kind, b.mem_ext_amt,
   1324                       b.mem_has_shift);
   1325         return;
   1326     }
   1327     if (b.e.sym)
   1328         tcc_error("ldr/str: symbolic offset not supported");
   1329     emit_ldst_imm(opc, size, a.reg, b.base, (int64_t)b.e.v, b.indexing);
   1330 }
   1331 
   1332 static void do_ldp_stp(TCCState *s1, int is_load)
   1333 {
   1334     AArch64Op a, b, c;
   1335     parse_operand(s1, &a);
   1336     asm_skip_comma();
   1337     parse_operand(s1, &b);
   1338     asm_skip_comma();
   1339     parse_operand(s1, &c);
   1340     need_reg(&a, "ldp/stp"); need_reg(&b, "ldp/stp");
   1341     check_size_match(&a, &b, "ldp/stp");
   1342     if (c.kind != OP_MEM)
   1343         tcc_error("ldp/stp: expected memory operand");
   1344     if (c.indexing == IDX_REGOFF)
   1345         tcc_error("ldp/stp: register-offset not supported");
   1346     if (c.e.sym)
   1347         tcc_error("ldp/stp: symbolic offset not supported");
   1348     emit_ldst_pair(is_load, a.reg, b.reg, c.base,
   1349                    (int64_t)c.e.v, c.indexing, a.is_w);
   1350 }
   1351 
   1352 /* CBZ/CBNZ. */
   1353 static void do_cbz(TCCState *s1, int token)
   1354 {
   1355     AArch64Op a, b;
   1356     parse_operand(s1, &a); asm_skip_comma();
   1357     parse_operand(s1, &b);
   1358     need_reg(&a, "cbz/cbnz");
   1359     if (b.kind != OP_IMM)
   1360         tcc_error("cbz/cbnz: expected label");
   1361     emit_branch_cmp(a.reg, &b, a.is_w, token == TOK_ASM_cbnz);
   1362 }
   1363 
   1364 /* TBZ/TBNZ. */
   1365 static void do_tbz(TCCState *s1, int token)
   1366 {
   1367     AArch64Op a, b, c;
   1368     parse_operand(s1, &a); asm_skip_comma();
   1369     parse_operand(s1, &b); asm_skip_comma();
   1370     parse_operand(s1, &c);
   1371     need_reg(&a, "tbz/tbnz");
   1372     if (b.kind != OP_IMM || b.e.sym)
   1373         tcc_error("tbz/tbnz: expected bit constant");
   1374     if (c.kind != OP_IMM)
   1375         tcc_error("tbz/tbnz: expected label");
   1376     emit_branch_test(a.reg, (int)b.e.v, &c, token == TOK_ASM_tbnz);
   1377 }
   1378 
   1379 /* HINT-encoded mnemonics. */
   1380 static void do_hint(int token, int hint_arg)
   1381 {
   1382     static const uint32_t base = 0xd503201fu;
   1383     int crm = (hint_arg >> 3) & 0xf;
   1384     int op2 = hint_arg & 7;
   1385     (void)token;
   1386     gen_le32(base | ((uint32_t)crm << 8) | ((uint32_t)op2 << 5));
   1387 }
   1388 
   1389 /* Map a token to a DSB/DMB barrier-option CRm value, or -1 if not one. */
   1390 static int tok_to_barrier_crm(int t)
   1391 {
   1392     if (t == TOK_ASM_sy)    return 0xf;
   1393     if (t == TOK_ASM_ish)   return 0xb;
   1394     if (t == TOK_ASM_ishst) return 0xa;
   1395     if (t == TOK_ASM_ishld) return 0x9;
   1396     if (t == TOK_ASM_nsh)   return 0x7;
   1397     if (t == TOK_ASM_nshst) return 0x6;
   1398     if (t == TOK_ASM_nshld) return 0x5;
   1399     if (t == TOK_ASM_osh)   return 0x3;
   1400     if (t == TOK_ASM_oshst) return 0x2;
   1401     if (t == TOK_ASM_oshld) return 0x1;
   1402     return -1;
   1403 }
   1404 
   1405 /* DSB/DMB/ISB. Accepts either a named CRm (sy/ish/ishst/...) or `#imm`. */
   1406 static void do_barrier(TCCState *s1, int token)
   1407 {
   1408     int crm = 0xf;     /* default sy */
   1409     if (!at_end_of_insn()) {
   1410         int named = tok_to_barrier_crm(tok);
   1411         if (named >= 0) {
   1412             crm = named;
   1413             next();
   1414         } else {
   1415             AArch64Op a;
   1416             parse_operand(s1, &a);
   1417             if (a.kind == OP_IMM && !a.e.sym) crm = (int)a.e.v & 0xf;
   1418             else tcc_error("dsb/dmb/isb: expected scope name or #imm");
   1419         }
   1420     }
   1421     {
   1422         uint32_t base = 0xd503309fu;        /* DSB sy */
   1423         if (token == TOK_ASM_dmb) base = 0xd50330bfu;
   1424         if (token == TOK_ASM_isb) base = 0xd50330dfu;
   1425         gen_le32((base & 0xfffff0ffu) | ((uint32_t)crm << 8));
   1426     }
   1427 }
   1428 
   1429 /* ---- system-register access (MRS / MSR / MSR-imm) ---------------- */
   1430 
   1431 /* Case-insensitive ASCII compare (sysreg names use both cases in the wild —
   1432  * `CurrentEL` is camelcase, `sctlr_el1` is lowercase). */
   1433 static int ci_streq(const char *a, const char *b)
   1434 {
   1435     while (*a && *b) {
   1436         int ca = (unsigned char)*a, cb = (unsigned char)*b;
   1437         if (ca >= 'A' && ca <= 'Z') ca += 32;
   1438         if (cb >= 'A' && cb <= 'Z') cb += 32;
   1439         if (ca != cb) return 0;
   1440         a++; b++;
   1441     }
   1442     return *a == 0 && *b == 0;
   1443 }
   1444 
   1445 /* Sysreg name → 14-bit encoding laid out as bits 19:5 of the MRS/MSR
   1446  * instruction (excluding the L bit at 21):
   1447  *   bit 14 = o0 (1 for op0=3, 0 for op0=2)
   1448  *   bits 13:11 = op1
   1449  *   bits 10:7  = CRn
   1450  *   bits 6:3   = CRm
   1451  *   bits 2:0   = op2
   1452  */
   1453 #define SR_ENC(o0, op1, CRn, CRm, op2) \
   1454     (((o0) << 14) | ((op1) << 11) | ((CRn) << 7) | ((CRm) << 3) | (op2))
   1455 
   1456 struct sysreg_entry {
   1457     const char *name;
   1458     uint32_t enc;
   1459 };
   1460 
   1461 /* The set used by seed-kernel/kernel.S. Extend as needed. */
   1462 static const struct sysreg_entry sysregs[] = {
   1463     /* EL1 read-only. */
   1464     { "currentel",  SR_ENC(1, 0, 4, 2, 2) },
   1465     /* EL1. */
   1466     { "sctlr_el1",  SR_ENC(1, 0, 1, 0, 0) },
   1467     { "cpacr_el1",  SR_ENC(1, 0, 1, 0, 2) },
   1468     { "ttbr0_el1",  SR_ENC(1, 0, 2, 0, 0) },
   1469     { "tcr_el1",    SR_ENC(1, 0, 2, 0, 2) },
   1470     { "spsr_el1",   SR_ENC(1, 0, 4, 0, 0) },
   1471     { "elr_el1",    SR_ENC(1, 0, 4, 0, 1) },
   1472     { "sp_el0",     SR_ENC(1, 0, 4, 1, 0) },
   1473     { "esr_el1",    SR_ENC(1, 0, 5, 2, 0) },
   1474     { "far_el1",    SR_ENC(1, 0, 6, 0, 0) },
   1475     { "mair_el1",   SR_ENC(1, 0, 10, 2, 0) },
   1476     { "vbar_el1",   SR_ENC(1, 0, 12, 0, 0) },
   1477     /* EL2. */
   1478     { "hcr_el2",    SR_ENC(1, 4, 1, 1, 0) },
   1479     { "spsr_el2",   SR_ENC(1, 4, 4, 0, 0) },
   1480     { "elr_el2",    SR_ENC(1, 4, 4, 0, 1) },
   1481     { "sp_el1",     SR_ENC(1, 4, 4, 1, 0) },
   1482 };
   1483 
   1484 static int lookup_sysreg(const char *name, uint32_t *out)
   1485 {
   1486     int i;
   1487     for (i = 0; i < (int)(sizeof(sysregs) / sizeof(sysregs[0])); i++) {
   1488         if (ci_streq(sysregs[i].name, name)) {
   1489             *out = sysregs[i].enc;
   1490             return 1;
   1491         }
   1492     }
   1493     return 0;
   1494 }
   1495 
   1496 /* Parse the textual sysreg token at `tok` (an identifier — daifset/daifclr
   1497  * tokens are handled separately by the MSR-imm path).  Errors if not
   1498  * recognised.  Consumes the token. */
   1499 static uint32_t parse_sysreg_or_die(const char *what)
   1500 {
   1501     uint32_t enc;
   1502     const char *name;
   1503     if (tok < TOK_IDENT)
   1504         tcc_error("%s: expected system register name", what);
   1505     name = get_tok_str(tok, NULL);
   1506     if (!lookup_sysreg(name, &enc))
   1507         tcc_error("%s: unknown system register `%s`", what, name);
   1508     next();
   1509     return enc;
   1510 }
   1511 
   1512 static void emit_msr_mrs(int is_read, int rt, uint32_t enc)
   1513 {
   1514     uint32_t base = is_read ? 0xd5300000u : 0xd5100000u;
   1515     gen_le32(base | ((enc & 0x7fffu) << 5) | (uint32_t)(rt & 0x1f));
   1516 }
   1517 
   1518 /* MSR <pstatefield>, #imm — currently DAIFSet / DAIFClr. */
   1519 static void emit_msr_pstate(int is_clr, int imm)
   1520 {
   1521     uint32_t base = is_clr ? 0xd50340ffu : 0xd50340dfu;
   1522     if (imm < 0 || imm > 15)
   1523         tcc_error("msr daifset/daifclr: imm out of 0..15");
   1524     gen_le32(base | (((uint32_t)imm & 0xf) << 8));
   1525 }
   1526 
   1527 static void do_mrs(TCCState *s1)
   1528 {
   1529     AArch64Op a;
   1530     uint32_t enc;
   1531     parse_operand(s1, &a);
   1532     asm_skip_comma();
   1533     need_xreg(&a, "mrs");
   1534     enc = parse_sysreg_or_die("mrs");
   1535     emit_msr_mrs(1, a.reg, enc);
   1536 }
   1537 
   1538 static void do_msr(TCCState *s1)
   1539 {
   1540     /* Two forms:
   1541      *   msr <sysreg>, Xt
   1542      *   msr DAIFSet|DAIFClr, #imm4
   1543      * Disambiguated by the first operand: tok-based PSTATE keyword vs
   1544      * a generic identifier name lookup. */
   1545     if (tok == TOK_ASM_daifset || tok == TOK_ASM_daifclr) {
   1546         int is_clr = (tok == TOK_ASM_daifclr);
   1547         AArch64Op imm;
   1548         next();
   1549         asm_skip_comma();
   1550         asm_skip_hash();
   1551         parse_operand(s1, &imm);
   1552         if (imm.kind != OP_IMM || imm.e.sym)
   1553             tcc_error("msr daifset/daifclr: expected #imm");
   1554         emit_msr_pstate(is_clr, (int)imm.e.v);
   1555         return;
   1556     }
   1557     {
   1558         AArch64Op a;
   1559         uint32_t enc = parse_sysreg_or_die("msr");
   1560         asm_skip_comma();
   1561         parse_operand(s1, &a);
   1562         need_xreg(&a, "msr");
   1563         emit_msr_mrs(0, a.reg, enc);
   1564     }
   1565 }
   1566 
   1567 /* ---- SYS-instruction aliases (IC / TLBI) ------------------------- */
   1568 
   1569 /* SYS-instruction encoding helper.  Same form as MRS/MSR but op0 is
   1570  * implicitly 1 (so o0_high=1 in bit 20 is set by the base), and the L
   1571  * bit selects SYS (write, L=0) vs SYSL (read, L=1).  We emit SYS only.
   1572  */
   1573 static void emit_sys_alias(int op1, int CRn, int CRm, int op2, int rt)
   1574 {
   1575     uint32_t base = 0xd5080000u;            /* SYS, L=0, o0=1 fixed */
   1576     uint32_t enc  = ((uint32_t)(op1 & 7) << 16) |
   1577                     ((uint32_t)(CRn & 0xf) << 12) |
   1578                     ((uint32_t)(CRm & 0xf) << 8) |
   1579                     ((uint32_t)(op2 & 7) << 5);
   1580     gen_le32(base | enc | (uint32_t)(rt & 0x1f));
   1581 }
   1582 
   1583 struct sys_alias {
   1584     const char *name;       /* lowercased */
   1585     uint8_t op1, CRn, CRm, op2;
   1586     uint8_t needs_xt;       /* 1 if this op takes an Xt register operand */
   1587 };
   1588 
   1589 static const struct sys_alias ic_aliases[] = {
   1590     { "iallu",   0, 7, 5, 0, 0 },
   1591     { "ialluis", 0, 7, 1, 0, 0 },
   1592     { "ivau",    3, 7, 5, 1, 1 },
   1593 };
   1594 
   1595 static const struct sys_alias tlbi_aliases[] = {
   1596     { "vmalle1",   0, 8, 7, 0, 0 },
   1597     { "vmalle1is", 0, 8, 3, 0, 0 },
   1598     { "alle1",     4, 8, 7, 4, 0 },
   1599     { "alle1is",   4, 8, 3, 4, 0 },
   1600 };
   1601 
   1602 static const struct sys_alias *
   1603 lookup_sys_alias(const struct sys_alias *table, int n, const char *name)
   1604 {
   1605     int i;
   1606     for (i = 0; i < n; i++)
   1607         if (ci_streq(table[i].name, name)) return &table[i];
   1608     return NULL;
   1609 }
   1610 
   1611 static void do_sys_alias_mnemonic(TCCState *s1, int is_tlbi)
   1612 {
   1613     const struct sys_alias *tbl = is_tlbi ? tlbi_aliases : ic_aliases;
   1614     int n = is_tlbi ? (int)(sizeof(tlbi_aliases) / sizeof(tlbi_aliases[0]))
   1615                     : (int)(sizeof(ic_aliases)   / sizeof(ic_aliases[0]));
   1616     const char *name;
   1617     const struct sys_alias *e;
   1618     int rt = 31;
   1619     if (tok < TOK_IDENT)
   1620         tcc_error("%s: expected operation name", is_tlbi ? "tlbi" : "ic");
   1621     name = get_tok_str(tok, NULL);
   1622     e = lookup_sys_alias(tbl, n, name);
   1623     if (!e)
   1624         tcc_error("%s: unknown operation `%s`", is_tlbi ? "tlbi" : "ic", name);
   1625     next();
   1626     if (e->needs_xt) {
   1627         AArch64Op r;
   1628         asm_skip_comma();
   1629         parse_operand(s1, &r);
   1630         need_xreg(&r, is_tlbi ? "tlbi" : "ic");
   1631         rt = r.reg;
   1632     }
   1633     emit_sys_alias(e->op1, e->CRn, e->CRm, e->op2, rt);
   1634 }
   1635 
   1636 /* ---- top-level dispatch ----------------------------------------- */
   1637 
   1638 ST_FUNC void asm_opcode(TCCState *s1, int token)
   1639 {
   1640     AArch64Op a, b;
   1641 
   1642     switch (token) {
   1643 
   1644     case TOK_ASM_mov:
   1645         do_mov(s1);
   1646         return;
   1647 
   1648     case TOK_ASM_movz:
   1649     case TOK_ASM_movn:
   1650     case TOK_ASM_movk:
   1651         do_movw(s1, token);
   1652         return;
   1653 
   1654     case TOK_ASM_add:
   1655     case TOK_ASM_adds:
   1656     case TOK_ASM_sub:
   1657     case TOK_ASM_subs:
   1658     case TOK_ASM_cmp:
   1659     case TOK_ASM_cmn:
   1660     case TOK_ASM_neg:
   1661     case TOK_ASM_negs:
   1662         do_addsub(s1, token);
   1663         return;
   1664 
   1665     case TOK_ASM_and:
   1666     case TOK_ASM_orr:
   1667     case TOK_ASM_eor:
   1668     case TOK_ASM_ands:
   1669     case TOK_ASM_tst:
   1670     case TOK_ASM_bic:
   1671     case TOK_ASM_orn:
   1672     case TOK_ASM_eon:
   1673     case TOK_ASM_bics:
   1674     case TOK_ASM_mvn:
   1675         do_logical(s1, token);
   1676         return;
   1677 
   1678     case TOK_ASM_sbfm:
   1679     case TOK_ASM_ubfm:
   1680     case TOK_ASM_bfm:
   1681         do_bfm(s1, token);
   1682         return;
   1683 
   1684     case TOK_ASM_lsl:
   1685     case TOK_ASM_lsr:
   1686     case TOK_ASM_asr:
   1687     case TOK_ASM_ror:
   1688         do_shift(s1, token);
   1689         return;
   1690 
   1691     case TOK_ASM_sxtb:
   1692     case TOK_ASM_sxth:
   1693     case TOK_ASM_sxtw:
   1694     case TOK_ASM_uxtb:
   1695     case TOK_ASM_uxth:
   1696         do_extend_alias(s1, token);
   1697         return;
   1698 
   1699     case TOK_ASM_lslv:
   1700     case TOK_ASM_lsrv:
   1701     case TOK_ASM_asrv:
   1702     case TOK_ASM_rorv: {
   1703         AArch64Op p, q, r;
   1704         parse_operand(s1, &p); asm_skip_comma();
   1705         parse_operand(s1, &q); asm_skip_comma();
   1706         parse_operand(s1, &r);
   1707         need_reg(&p, "lslv"); need_reg(&q, "lslv"); need_reg(&r, "lslv");
   1708         check_size_match(&p, &q, "lslv"); check_size_match(&p, &r, "lslv");
   1709         emit_shift_reg(p.reg, q.reg, r.reg, p.is_w,
   1710                        token == TOK_ASM_lslv ? 8 :
   1711                        token == TOK_ASM_lsrv ? 9 :
   1712                        token == TOK_ASM_asrv ? 10 : 11);
   1713         return;
   1714     }
   1715 
   1716     case TOK_ASM_mul:
   1717     case TOK_ASM_mneg:
   1718     case TOK_ASM_madd:
   1719     case TOK_ASM_msub:
   1720         do_mul(s1, token);
   1721         return;
   1722 
   1723     case TOK_ASM_smull:
   1724     case TOK_ASM_umull:
   1725     case TOK_ASM_smnegl:
   1726     case TOK_ASM_umnegl:
   1727     case TOK_ASM_smaddl:
   1728     case TOK_ASM_umaddl:
   1729     case TOK_ASM_smsubl:
   1730     case TOK_ASM_umsubl:
   1731         do_mul_long(s1, token);
   1732         return;
   1733 
   1734     case TOK_ASM_smulh:
   1735     case TOK_ASM_umulh: {
   1736         AArch64Op p, q, r;
   1737         parse_operand(s1, &p); asm_skip_comma();
   1738         parse_operand(s1, &q); asm_skip_comma();
   1739         parse_operand(s1, &r);
   1740         need_xreg(&p, "smulh"); need_xreg(&q, "smulh"); need_xreg(&r, "smulh");
   1741         emit_mulh(p.reg, q.reg, r.reg, token == TOK_ASM_umulh);
   1742         return;
   1743     }
   1744 
   1745     case TOK_ASM_sdiv:
   1746     case TOK_ASM_udiv:
   1747         do_div(s1, token);
   1748         return;
   1749 
   1750     case TOK_ASM_csel:
   1751     case TOK_ASM_csinc:
   1752     case TOK_ASM_csinv:
   1753     case TOK_ASM_csneg:
   1754     case TOK_ASM_cset:
   1755     case TOK_ASM_csetm:
   1756     case TOK_ASM_cinc:
   1757     case TOK_ASM_cinv:
   1758     case TOK_ASM_cneg:
   1759         do_csel(s1, token);
   1760         return;
   1761 
   1762     /* ----- loads / stores ----- */
   1763     case TOK_ASM_ldr:   do_ldst(s1, token, 1, 3); return;
   1764     case TOK_ASM_str:   do_ldst(s1, token, 0, 3); return;
   1765     case TOK_ASM_ldrb:  do_ldst(s1, token, 1, 0); return;
   1766     case TOK_ASM_strb:  do_ldst(s1, token, 0, 0); return;
   1767     case TOK_ASM_ldrh:  do_ldst(s1, token, 1, 1); return;
   1768     case TOK_ASM_strh:  do_ldst(s1, token, 0, 1); return;
   1769     case TOK_ASM_ldrsb: do_ldst(s1, token, 2, 0); return;   /* opc=2 (X-target) */
   1770     case TOK_ASM_ldrsh: do_ldst(s1, token, 2, 1); return;
   1771     case TOK_ASM_ldrsw: do_ldst(s1, token, 2, 2); return;
   1772 
   1773     case TOK_ASM_ldp:   do_ldp_stp(s1, 1); return;
   1774     case TOK_ASM_stp:   do_ldp_stp(s1, 0); return;
   1775 
   1776     /* ----- branches ----- */
   1777     case TOK_ASM_b:
   1778     case TOK_ASM_bl:
   1779         parse_operand(s1, &a);
   1780         if (a.kind != OP_IMM)
   1781             tcc_error("b/bl: expected label or immediate");
   1782         emit_branch_imm(&a, token == TOK_ASM_bl);
   1783         return;
   1784 
   1785     case TOK_ASM_br:
   1786     case TOK_ASM_blr:
   1787         parse_operand(s1, &a);
   1788         need_xreg(&a, "br/blr");
   1789         emit_branch_reg(a.reg, token == TOK_ASM_br ? 0 : 1);
   1790         return;
   1791 
   1792     case TOK_ASM_ret:
   1793         if (!at_end_of_insn()) {
   1794             parse_operand(s1, &a);
   1795             need_xreg(&a, "ret");
   1796             emit_branch_reg(a.reg, 2);
   1797         } else {
   1798             emit_branch_reg(30, 2);
   1799         }
   1800         return;
   1801 
   1802     case TOK_ASM_cbz:
   1803     case TOK_ASM_cbnz:
   1804         do_cbz(s1, token);
   1805         return;
   1806 
   1807     case TOK_ASM_tbz:
   1808     case TOK_ASM_tbnz:
   1809         do_tbz(s1, token);
   1810         return;
   1811 
   1812     /* ----- system ----- */
   1813     case TOK_ASM_svc:
   1814         parse_operand(s1, &a);
   1815         if (a.kind != OP_IMM || a.e.sym)
   1816             tcc_error("svc: expected immediate");
   1817         emit_excgen(0xd4000001u, (int64_t)a.e.v);
   1818         return;
   1819     case TOK_ASM_hvc:
   1820         parse_operand(s1, &a);
   1821         if (a.kind != OP_IMM || a.e.sym) tcc_error("hvc imm");
   1822         emit_excgen(0xd4000002u, (int64_t)a.e.v); return;
   1823     case TOK_ASM_smc:
   1824         parse_operand(s1, &a);
   1825         if (a.kind != OP_IMM || a.e.sym) tcc_error("smc imm");
   1826         emit_excgen(0xd4000003u, (int64_t)a.e.v); return;
   1827     case TOK_ASM_brk:
   1828         parse_operand(s1, &a);
   1829         if (a.kind != OP_IMM || a.e.sym) tcc_error("brk imm");
   1830         emit_excgen(0xd4200000u, (int64_t)a.e.v); return;
   1831     case TOK_ASM_hlt:
   1832         parse_operand(s1, &a);
   1833         if (a.kind != OP_IMM || a.e.sym) tcc_error("hlt imm");
   1834         emit_excgen(0xd4400000u, (int64_t)a.e.v); return;
   1835 
   1836     case TOK_ASM_nop:    do_hint(token, 0);  return;
   1837     case TOK_ASM_yield:  do_hint(token, 1);  return;
   1838     case TOK_ASM_wfe:    do_hint(token, 2);  return;
   1839     case TOK_ASM_wfi:    do_hint(token, 3);  return;
   1840     case TOK_ASM_sev:    do_hint(token, 4);  return;
   1841     case TOK_ASM_sevl:   do_hint(token, 5);  return;
   1842     case TOK_ASM_hint:
   1843         parse_operand(s1, &a);
   1844         if (a.kind != OP_IMM || a.e.sym) tcc_error("hint imm");
   1845         do_hint(token, (int)a.e.v);
   1846         return;
   1847 
   1848     case TOK_ASM_dmb:
   1849     case TOK_ASM_dsb:
   1850     case TOK_ASM_isb:
   1851         do_barrier(s1, token);
   1852         return;
   1853 
   1854     case TOK_ASM_mrs:
   1855         do_mrs(s1);
   1856         return;
   1857     case TOK_ASM_msr:
   1858         do_msr(s1);
   1859         return;
   1860     case TOK_ASM_eret:
   1861         gen_le32(0xd69f03e0u);
   1862         return;
   1863     case TOK_ASM_ic:
   1864         do_sys_alias_mnemonic(s1, 0);
   1865         return;
   1866     case TOK_ASM_tlbi:
   1867         do_sys_alias_mnemonic(s1, 1);
   1868         return;
   1869 
   1870     /* ----- conditional-branch family (b.eq..b.nv + aliases) ----- */
   1871     default:
   1872         if (token >= TOK_ASM_b_eq && token <= TOK_ASM_b_nv) {
   1873             int cond = token - TOK_ASM_b_eq;
   1874             parse_operand(s1, &a);
   1875             if (a.kind != OP_IMM)
   1876                 tcc_error("b.cond: expected label");
   1877             emit_branch_cond(cond, &a);
   1878             return;
   1879         }
   1880         if (token == TOK_ASM_b_hs || token == TOK_ASM_b_lo) {
   1881             int cond = (token == TOK_ASM_b_hs) ? 2 : 3;
   1882             parse_operand(s1, &a);
   1883             if (a.kind != OP_IMM)
   1884                 tcc_error("b.cond: expected label");
   1885             emit_branch_cond(cond, &a);
   1886             return;
   1887         }
   1888         expect("known instruction");
   1889     }
   1890 }
   1891 
   1892 /* ---- inline-asm constraint plumbing — phase-1 stubs --------------- */
   1893 
   1894 ST_FUNC void subst_asm_operand(CString *add_str, SValue *sv, int modifier)
   1895 {
   1896     tcc_error("ARM64 inline asm operands not implemented yet");
   1897 }
   1898 
   1899 ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands,
   1900                           int nb_outputs, int is_output,
   1901                           uint8_t *clobber_regs, int out_reg)
   1902 {
   1903 }
   1904 
   1905 ST_FUNC void asm_compute_constraints(ASMOperand *operands,
   1906                                      int nb_operands, int nb_outputs,
   1907                                      const uint8_t *clobber_regs,
   1908                                      int *pout_reg)
   1909 {
   1910 }
   1911 
   1912 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str)
   1913 {
   1914     int reg;
   1915     TokenSym *ts;
   1916 
   1917     if (!strcmp(str, "memory") ||
   1918         !strcmp(str, "cc") ||
   1919         !strcmp(str, "flags"))
   1920         return;
   1921     ts = tok_alloc(str, strlen(str));
   1922     reg = asm_parse_regvar(ts->tok);
   1923     if (reg == -1)
   1924         tcc_error("invalid clobber register '%s'", str);
   1925     clobber_regs[reg] = 1;
   1926 }
   1927 
   1928 /*************************************************************/
   1929 #endif /* ndef TARGET_DEFS_ONLY */