isa.h (61761B)
1 #ifndef KIT_ARCH_AA64_ISA_H 2 #define KIT_ARCH_AA64_ISA_H 3 4 /* AArch64 ISA descriptors — single source of truth for every instruction 5 * the encoder, decoder, and disassembler all need to agree on. 6 * 7 * Each format declares: 8 * - A field struct naming every encoded bitfield. 9 * - {pack, unpack} pure functions that round-trip through a u32 word. 10 * - A {family_match, family_mask} pair identifying the format. 11 * - Per-instruction inline wrappers that bake in the opc bits and 12 * return the encoded word; callers emit it via MCEmitter. 13 * 14 * A descriptor table at the bottom (aa64_insn_table) maps mnemonic → 15 * (match, mask, AA64Format), so the disassembler matches a raw word with 16 * for (i=0; i<N; ++i) if ((word & desc[i].mask) == desc[i].match) ... 17 * and then calls the format's unpack to recover the operand fields. 18 * 19 * Conventions: 20 * - sf = 0 selects the 32-bit (W) form, sf = 1 selects 64-bit (X). 21 * - Reg values are the raw 5-bit encoding (0..30 + 31 for ZR/SP). 22 * - All wrappers take Rd first, then Rn, Rm, Ra to match the AAPCS 23 * "destination first" convention used in the AArch64 manual. 24 * 25 * New instructions land as one entry in the table and (typically) one 26 * inline wrapper in the relevant format section. */ 27 28 #include "core/core.h" 29 #include "core/slice.h" 30 #include "core/strbuf.h" 31 32 /* ---- common register names ---- */ 33 #define AA64_ZR 31u /* WZR / XZR */ 34 #define AA64_SP 31u /* SP at Rd/Rn slot */ 35 #define AA64_LR 30u /* X30 / link register */ 36 37 /* ---- format kinds ---- */ 38 typedef enum AA64Format { 39 AA64_FMT_MOVEWIDE, 40 AA64_FMT_LOG_SR, /* logical, shifted register */ 41 AA64_FMT_ADDSUB_SR, /* add/sub, shifted register */ 42 AA64_FMT_DP3, /* data-processing, 3 source */ 43 AA64_FMT_DP2, /* data-processing, 2 source */ 44 AA64_FMT_CONDSEL, /* conditional select (CSEL / CSINC / aliases) */ 45 AA64_FMT_BR_REG, /* unconditional branch (register) */ 46 AA64_FMT_PCREL_ADR, /* PC-relative ADR / ADRP */ 47 AA64_FMT_ADDSUB_IMM, /* add/sub, immediate */ 48 AA64_FMT_LDST_UIMM, /* load/store, unsigned 12-bit immediate offset */ 49 AA64_FMT_LDSTP_PRE, /* load/store pair, pre-indexed */ 50 AA64_FMT_LDSTP_SOFF, /* load/store pair, signed-offset */ 51 AA64_FMT_LDSTP_POST, /* load/store pair, post-indexed */ 52 AA64_FMT_LDST_SIMM9, /* load/store, unscaled 9-bit signed offset 53 (LDUR / STUR, V=0 and V=1) */ 54 AA64_FMT_BR_IMM, /* unconditional branch (immediate) — B / BL */ 55 AA64_FMT_BR_COND, /* B.cond (imm19) */ 56 AA64_FMT_CB, /* compare-and-branch (CBZ / CBNZ) */ 57 AA64_FMT_EXCEPT, /* exception generation (BRK / SVC / HVC / ...) */ 58 AA64_FMT_HINT, /* hint (NOP / YIELD / ...) */ 59 AA64_FMT_BARRIER, /* memory barrier (DMB / DSB / ISB / CLREX) */ 60 AA64_FMT_DP1, /* data-processing, 1 source (RBIT/REV/REV16/CLZ) */ 61 AA64_FMT_BITFIELD, /* bitfield move (SBFM / UBFM): Rd, Rn, #immr, #imms */ 62 AA64_FMT_LDST_REGOFF, /* load/store, register offset [Xn, Xm{, LSL #s}] */ 63 AA64_FMT_FP_DP2, /* FP data-processing 2-source (FADD/FSUB/FMUL/FDIV) */ 64 AA64_FMT_FP_DP1, /* FP data-processing 1-source (FMOV/FNEG/FABS/FSQRT) */ 65 AA64_FMT_FP_CMP, /* FP compare (FCMP) */ 66 AA64_FMT_FP_CVT, /* FP precision convert (FCVT single<->double) */ 67 AA64_FMT_FP_INT_CVT, /* FP<->int convert + FMOV gpr<->fp 68 * (SCVTF/UCVTF/FCVTZS/FCVTZU/FMOV) */ 69 AA64_FMT_LDST_EXCL, /* load/store exclusive + acquire/release ordered 70 * (LDXR/LDAXR/STXR/STLXR/LDAR/STLR + b/h) */ 71 AA64_FMT_LOG_IMM, /* logical, immediate (AND/ORR/EOR/ANDS #bitmask) */ 72 AA64_FMT_SYSREG, /* system-register move (MRS Xt,<reg> / MSR <reg>,Xt) */ 73 } AA64Format; 74 75 /* ---- AsmFlags column on AA64InsnDesc ---- 76 * 77 * Per-row metadata that varies across same-format members. Most rows 78 * carry 0. When the disassembler matches a row whose ALIAS bit is set, 79 * that's the spelling it prints; the assembler also accepts both the 80 * alias and the canonical form because both rows live in the table. */ 81 #define AA64_ASMFL_ALIAS \ 82 0x01u /* row is an alias (e.g. MOV → ORR Rd, ZR, Rm) \ 83 */ 84 #define AA64_ASMFL_SF1 0x02u /* 64-bit form only (sf hard-wired) */ 85 #define AA64_ASMFL_NORN \ 86 0x04u /* hide Rn operand in print (e.g. RET when Rn=30) */ 87 88 /* ==================================================================== 89 * Move-wide immediate (MOVN / MOVZ / MOVK) 90 * sf opc(2) 100101 hw(2) imm16(16) Rd(5) 91 * 31 30..29 28..23 22..21 20..5 4..0 92 * ==================================================================== */ 93 94 #define AA64_MOVN_OPC 0u 95 #define AA64_MOVZ_OPC 2u 96 #define AA64_MOVK_OPC 3u 97 98 #define AA64_MOVEWIDE_FAMILY_MATCH 0x12800000u 99 #define AA64_MOVEWIDE_FAMILY_MASK 0x1F800000u /* bits 28:23 */ 100 101 typedef struct AA64MoveWide { 102 u32 sf, opc, hw, imm16, Rd; 103 } AA64MoveWide; 104 105 static inline u32 aa64_movewide_pack(AA64MoveWide f) { 106 return ((f.sf & 1u) << 31) | ((f.opc & 3u) << 29) | 107 AA64_MOVEWIDE_FAMILY_MATCH | ((f.hw & 3u) << 21) | 108 ((f.imm16 & 0xffffu) << 5) | (f.Rd & 0x1fu); 109 } 110 111 static inline AA64MoveWide aa64_movewide_unpack(u32 w) { 112 AA64MoveWide f; 113 f.sf = (w >> 31) & 1u; 114 f.opc = (w >> 29) & 3u; 115 f.hw = (w >> 21) & 3u; 116 f.imm16 = (w >> 5) & 0xffffu; 117 f.Rd = w & 0x1fu; 118 return f; 119 } 120 121 static inline u32 aa64_movz(u32 sf, u32 Rd, u32 imm16, u32 hw) { 122 return aa64_movewide_pack((AA64MoveWide){ 123 .sf = sf, .opc = AA64_MOVZ_OPC, .hw = hw, .imm16 = imm16, .Rd = Rd}); 124 } 125 static inline u32 aa64_movn(u32 sf, u32 Rd, u32 imm16, u32 hw) { 126 return aa64_movewide_pack((AA64MoveWide){ 127 .sf = sf, .opc = AA64_MOVN_OPC, .hw = hw, .imm16 = imm16, .Rd = Rd}); 128 } 129 static inline u32 aa64_movk(u32 sf, u32 Rd, u32 imm16, u32 hw) { 130 return aa64_movewide_pack((AA64MoveWide){ 131 .sf = sf, .opc = AA64_MOVK_OPC, .hw = hw, .imm16 = imm16, .Rd = Rd}); 132 } 133 134 /* ==================================================================== 135 * Logical, shifted register (AND / ORR / EOR / ANDS, with N inverting 136 * Rm to BIC / ORN / EON / BICS). 137 * sf opc(2) 01010 shift(2) N(1) Rm(5) imm6(6) Rn(5) Rd(5) 138 * 31 30..29 28..24 23..22 21 20..16 15..10 9..5 4..0 139 * ==================================================================== */ 140 141 #define AA64_LOG_AND_OPC 0u 142 #define AA64_LOG_ORR_OPC 1u 143 #define AA64_LOG_EOR_OPC 2u 144 #define AA64_LOG_ANDS_OPC 3u 145 146 #define AA64_LOGSR_FAMILY_MATCH 0x0A000000u 147 #define AA64_LOGSR_FAMILY_MASK 0x1F000000u /* bits 28:24 */ 148 149 typedef struct AA64LogSR { 150 u32 sf, opc, shift, N, Rm, imm6, Rn, Rd; 151 } AA64LogSR; 152 153 static inline u32 aa64_logsr_pack(AA64LogSR f) { 154 return ((f.sf & 1u) << 31) | ((f.opc & 3u) << 29) | AA64_LOGSR_FAMILY_MATCH | 155 ((f.shift & 3u) << 22) | ((f.N & 1u) << 21) | ((f.Rm & 0x1fu) << 16) | 156 ((f.imm6 & 0x3fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu); 157 } 158 159 static inline AA64LogSR aa64_logsr_unpack(u32 w) { 160 AA64LogSR f; 161 f.sf = (w >> 31) & 1u; 162 f.opc = (w >> 29) & 3u; 163 f.shift = (w >> 22) & 3u; 164 f.N = (w >> 21) & 1u; 165 f.Rm = (w >> 16) & 0x1fu; 166 f.imm6 = (w >> 10) & 0x3fu; 167 f.Rn = (w >> 5) & 0x1fu; 168 f.Rd = w & 0x1fu; 169 return f; 170 } 171 172 static inline u32 aa64_and(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 173 return aa64_logsr_pack((AA64LogSR){ 174 .sf = sf, .opc = AA64_LOG_AND_OPC, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 175 } 176 static inline u32 aa64_orr(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 177 return aa64_logsr_pack((AA64LogSR){ 178 .sf = sf, .opc = AA64_LOG_ORR_OPC, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 179 } 180 static inline u32 aa64_eor(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 181 return aa64_logsr_pack((AA64LogSR){ 182 .sf = sf, .opc = AA64_LOG_EOR_OPC, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 183 } 184 static inline u32 aa64_orn(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 185 return aa64_logsr_pack((AA64LogSR){ 186 .sf = sf, .opc = AA64_LOG_ORR_OPC, .N = 1, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 187 } 188 189 /* MOV Wd, Wm ≡ ORR Wd, WZR, Wm */ 190 static inline u32 aa64_mov_reg(u32 sf, u32 Rd, u32 Rm) { 191 return aa64_orr(sf, Rd, AA64_ZR, Rm); 192 } 193 /* MVN Wd, Wm ≡ ORN Wd, WZR, Wm */ 194 static inline u32 aa64_mvn(u32 sf, u32 Rd, u32 Rm) { 195 return aa64_orn(sf, Rd, AA64_ZR, Rm); 196 } 197 198 /* ==================================================================== 199 * Logical, immediate (AND / ORR / EOR / ANDS, bitmask-imm form) 200 * sf opc(2) 100100 N(1) immr(6) imms(6) Rn(5) Rd(5) 201 * 31 30..29 28..23 22 21..16 15..10 9..5 4..0 202 * 203 * N:immr:imms encodes a repeated-pattern bitmask. The encoder 204 * aa64_logimm_encode below computes those fields from a literal value; 205 * this pack just lays the bits out. For 32-bit ops (sf=0), N must be 0; 206 * for 64-bit ops N can be 0 or 1 and selects whether the pattern 207 * element is 64 bits (N=1) or 2..32 bits (N=0). 208 * ==================================================================== */ 209 210 #define AA64_LOGIMM_FAMILY_MATCH 0x12000000u 211 #define AA64_LOGIMM_FAMILY_MASK 0x1F800000u /* bits 28:23 */ 212 213 typedef struct AA64LogImm { 214 u32 sf, opc, N, immr, imms, Rn, Rd; 215 } AA64LogImm; 216 217 static inline u32 aa64_logimm_pack(AA64LogImm f) { 218 return ((f.sf & 1u) << 31) | ((f.opc & 3u) << 29) | AA64_LOGIMM_FAMILY_MATCH | 219 ((f.N & 1u) << 22) | ((f.immr & 0x3fu) << 16) | 220 ((f.imms & 0x3fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu); 221 } 222 223 static inline u32 aa64_and_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr, 224 u32 imms) { 225 return aa64_logimm_pack((AA64LogImm){.sf = sf, 226 .opc = AA64_LOG_AND_OPC, 227 .N = N, 228 .immr = immr, 229 .imms = imms, 230 .Rn = Rn, 231 .Rd = Rd}); 232 } 233 static inline u32 aa64_orr_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr, 234 u32 imms) { 235 return aa64_logimm_pack((AA64LogImm){.sf = sf, 236 .opc = AA64_LOG_ORR_OPC, 237 .N = N, 238 .immr = immr, 239 .imms = imms, 240 .Rn = Rn, 241 .Rd = Rd}); 242 } 243 static inline u32 aa64_eor_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr, 244 u32 imms) { 245 return aa64_logimm_pack((AA64LogImm){.sf = sf, 246 .opc = AA64_LOG_EOR_OPC, 247 .N = N, 248 .immr = immr, 249 .imms = imms, 250 .Rn = Rn, 251 .Rd = Rd}); 252 } 253 254 /* Bitmask-immediate predicate + encoder. Returns 1 and writes N/immr/imms 255 * if `imm` is encodable as an AArch64 logical immediate of width 256 * (sf ? 64 : 32); returns 0 otherwise (caller materializes into a 257 * scratch and uses the shifted-register form). 258 * 259 * Algorithm (inverse of ARM ARM "DecodeBitMasks"): an encodable value 260 * is a non-zero, non-all-ones bitmask made of a repeated `size`-bit 261 * element (size ∈ {2,4,8,16,32,64}); within one element the pattern is 262 * a rotation of (0…0 1…1). Find size by detecting the smallest 263 * repeating period; find the rotation that places the 1-run at the 264 * LSB; encode size and ones-count into imms per the standard scheme 265 * (top bits of imms inverted-encode size, low bits are ones-count-1). */ 266 static inline int aa64_logimm_encode(u64 imm, u32 sf, u32* N_out, u32* immr_out, 267 u32* imms_out) { 268 if (!sf) { 269 u64 lo = imm & 0xFFFFFFFFu; 270 u64 hi = imm >> 32; 271 if (hi != 0 && hi != lo) return 0; 272 imm = lo | (lo << 32); 273 } 274 if (imm == 0 || imm == ~(u64)0) return 0; 275 276 u32 size = 64; 277 for (u32 s = 32; s >= 2; s >>= 1) { 278 u64 mask = ((u64)1 << s) - 1u; 279 if ((imm & mask) != ((imm >> s) & mask)) break; 280 size = s; 281 } 282 u64 elt_mask = (size == 64) ? ~(u64)0 : (((u64)1 << size) - 1u); 283 u64 elt = imm & elt_mask; 284 if (elt == 0 || elt == elt_mask) return 0; 285 286 u32 ones = 0; 287 for (u64 x = elt; x; x >>= 1) ones += (u32)(x & 1u); 288 if (ones == 0 || ones >= size) return 0; 289 290 u64 aligned = ((u64)1 << ones) - 1u; 291 u32 rotation = 0xFFFFFFFFu; 292 for (u32 r = 0; r < size; r++) { 293 u64 rotated = 294 r == 0 ? elt : (((elt >> r) | (elt << (size - r))) & elt_mask); 295 if (rotated == aligned) { 296 rotation = r; 297 break; 298 } 299 } 300 if (rotation == 0xFFFFFFFFu) return 0; 301 302 if (size == 64) { 303 *N_out = 1u; 304 *imms_out = (ones - 1u) & 0x3Fu; 305 } else { 306 *N_out = 0u; 307 u32 neg_size_shl1 = ((u32)(-(i32)size) << 1) & 0x3Fu; 308 *imms_out = neg_size_shl1 | ((ones - 1u) & 0x3Fu); 309 } 310 *immr_out = rotation ? (size - rotation) & (size - 1u) : 0u; 311 return 1; 312 } 313 314 /* Shift-by-immediate field generators for LSL/LSR/ASR (encoded via 315 * UBFM/SBFM). Predicate: shift < width. The aa64_ubfm / aa64_sbfm 316 * encoders live in aarch64.c; callers pair these (immr, imms) with the 317 * matching pack. */ 318 static inline int aa64_lsl_imm_fields(u32 shift, u32 sf, u32* immr_out, 319 u32* imms_out) { 320 u32 width = sf ? 64u : 32u; 321 if (shift >= width) return 0; 322 *immr_out = (width - shift) & (width - 1u); 323 *imms_out = width - 1u - shift; 324 return 1; 325 } 326 static inline int aa64_lsr_imm_fields(u32 shift, u32 sf, u32* immr_out, 327 u32* imms_out) { 328 u32 width = sf ? 64u : 32u; 329 if (shift >= width) return 0; 330 *immr_out = shift; 331 *imms_out = width - 1u; 332 return 1; 333 } 334 static inline int aa64_asr_imm_fields(u32 shift, u32 sf, u32* immr_out, 335 u32* imms_out) { 336 u32 width = sf ? 64u : 32u; 337 if (shift >= width) return 0; 338 *immr_out = shift; 339 *imms_out = width - 1u; 340 return 1; 341 } 342 343 /* ==================================================================== 344 * Add/Sub, shifted register (ADD / SUB / ADDS / SUBS) 345 * sf op(1) S(1) 01011 shift(2) 0 Rm(5) imm6(6) Rn(5) Rd(5) 346 * 31 30 29 28..24 23..22 21 20..16 15..10 9..5 4..0 347 * ==================================================================== */ 348 349 #define AA64_ADDSUBSR_FAMILY_MATCH 0x0B000000u 350 #define AA64_ADDSUBSR_FAMILY_MASK 0x1F200000u /* bits 28:24 + bit 21 */ 351 352 typedef struct AA64AddSubSR { 353 u32 sf, op, S, shift, Rm, imm6, Rn, Rd; 354 } AA64AddSubSR; 355 356 static inline u32 aa64_addsubsr_pack(AA64AddSubSR f) { 357 return ((f.sf & 1u) << 31) | ((f.op & 1u) << 30) | ((f.S & 1u) << 29) | 358 AA64_ADDSUBSR_FAMILY_MATCH | ((f.shift & 3u) << 22) | 359 ((f.Rm & 0x1fu) << 16) | ((f.imm6 & 0x3fu) << 10) | 360 ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu); 361 } 362 363 static inline AA64AddSubSR aa64_addsubsr_unpack(u32 w) { 364 AA64AddSubSR f; 365 f.sf = (w >> 31) & 1u; 366 f.op = (w >> 30) & 1u; 367 f.S = (w >> 29) & 1u; 368 f.shift = (w >> 22) & 3u; 369 f.Rm = (w >> 16) & 0x1fu; 370 f.imm6 = (w >> 10) & 0x3fu; 371 f.Rn = (w >> 5) & 0x1fu; 372 f.Rd = w & 0x1fu; 373 return f; 374 } 375 376 static inline u32 aa64_add(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 377 return aa64_addsubsr_pack( 378 (AA64AddSubSR){.sf = sf, .op = 0, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 379 } 380 static inline u32 aa64_sub(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 381 return aa64_addsubsr_pack( 382 (AA64AddSubSR){.sf = sf, .op = 1, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 383 } 384 385 /* NEG Wd, Wm ≡ SUB Wd, WZR, Wm */ 386 static inline u32 aa64_neg(u32 sf, u32 Rd, u32 Rm) { 387 return aa64_sub(sf, Rd, AA64_ZR, Rm); 388 } 389 390 /* ==================================================================== 391 * Data-processing, 3-source (MADD / MSUB / SMULL / UMULL / ...) 392 * sf op54(2) 11011 op31(3) Rm(5) o0(1) Ra(5) Rn(5) Rd(5) 393 * 31 30..29 28..24 23..21 20..16 15 14..10 9..5 4..0 394 * ==================================================================== */ 395 396 #define AA64_DP3_FAMILY_MATCH 0x1B000000u 397 #define AA64_DP3_FAMILY_MASK 0x1F000000u /* bits 28:24 */ 398 399 typedef struct AA64DP3 { 400 u32 sf, op54, op31, Rm, o0, Ra, Rn, Rd; 401 } AA64DP3; 402 403 static inline u32 aa64_dp3_pack(AA64DP3 f) { 404 return ((f.sf & 1u) << 31) | ((f.op54 & 3u) << 29) | AA64_DP3_FAMILY_MATCH | 405 ((f.op31 & 7u) << 21) | ((f.Rm & 0x1fu) << 16) | ((f.o0 & 1u) << 15) | 406 ((f.Ra & 0x1fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu); 407 } 408 409 static inline AA64DP3 aa64_dp3_unpack(u32 w) { 410 AA64DP3 f; 411 f.sf = (w >> 31) & 1u; 412 f.op54 = (w >> 29) & 3u; 413 f.op31 = (w >> 21) & 7u; 414 f.Rm = (w >> 16) & 0x1fu; 415 f.o0 = (w >> 15) & 1u; 416 f.Ra = (w >> 10) & 0x1fu; 417 f.Rn = (w >> 5) & 0x1fu; 418 f.Rd = w & 0x1fu; 419 return f; 420 } 421 422 static inline u32 aa64_madd(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 Ra) { 423 return aa64_dp3_pack((AA64DP3){ 424 .sf = sf, .op31 = 0, .o0 = 0, .Rm = Rm, .Ra = Ra, .Rn = Rn, .Rd = Rd}); 425 } 426 static inline u32 aa64_msub(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 Ra) { 427 return aa64_dp3_pack((AA64DP3){ 428 .sf = sf, .op31 = 0, .o0 = 1, .Rm = Rm, .Ra = Ra, .Rn = Rn, .Rd = Rd}); 429 } 430 /* MUL Wd, Wn, Wm ≡ MADD Wd, Wn, Wm, WZR */ 431 static inline u32 aa64_mul(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 432 return aa64_madd(sf, Rd, Rn, Rm, AA64_ZR); 433 } 434 435 /* ==================================================================== 436 * Data-processing, 2-source (UDIV / SDIV / LSLV / LSRV / ASRV / RORV) 437 * sf 0 S(1) 11010110 Rm(5) opcode(6) Rn(5) Rd(5) 438 * 31 30 29 28..21 20..16 15..10 9..5 4..0 439 * ==================================================================== */ 440 441 #define AA64_DP2_UDIV_OP 0x02u 442 #define AA64_DP2_SDIV_OP 0x03u 443 #define AA64_DP2_LSLV_OP 0x08u 444 #define AA64_DP2_LSRV_OP 0x09u 445 #define AA64_DP2_ASRV_OP 0x0Au 446 #define AA64_DP2_RORV_OP 0x0Bu 447 448 #define AA64_DP2_FAMILY_MATCH 0x1AC00000u 449 #define AA64_DP2_FAMILY_MASK 0x5FE00000u /* bit 30 + bits 28:21 */ 450 451 typedef struct AA64DP2 { 452 u32 sf, S, opcode, Rm, Rn, Rd; 453 } AA64DP2; 454 455 static inline u32 aa64_dp2_pack(AA64DP2 f) { 456 return ((f.sf & 1u) << 31) | ((f.S & 1u) << 29) | AA64_DP2_FAMILY_MATCH | 457 ((f.Rm & 0x1fu) << 16) | ((f.opcode & 0x3fu) << 10) | 458 ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu); 459 } 460 461 static inline AA64DP2 aa64_dp2_unpack(u32 w) { 462 AA64DP2 f; 463 f.sf = (w >> 31) & 1u; 464 f.S = (w >> 29) & 1u; 465 f.Rm = (w >> 16) & 0x1fu; 466 f.opcode = (w >> 10) & 0x3fu; 467 f.Rn = (w >> 5) & 0x1fu; 468 f.Rd = w & 0x1fu; 469 return f; 470 } 471 472 static inline u32 aa64_udiv(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 473 return aa64_dp2_pack((AA64DP2){ 474 .sf = sf, .opcode = AA64_DP2_UDIV_OP, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 475 } 476 static inline u32 aa64_sdiv(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 477 return aa64_dp2_pack((AA64DP2){ 478 .sf = sf, .opcode = AA64_DP2_SDIV_OP, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 479 } 480 static inline u32 aa64_lslv(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 481 return aa64_dp2_pack((AA64DP2){ 482 .sf = sf, .opcode = AA64_DP2_LSLV_OP, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 483 } 484 static inline u32 aa64_lsrv(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 485 return aa64_dp2_pack((AA64DP2){ 486 .sf = sf, .opcode = AA64_DP2_LSRV_OP, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 487 } 488 static inline u32 aa64_asrv(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 489 return aa64_dp2_pack((AA64DP2){ 490 .sf = sf, .opcode = AA64_DP2_ASRV_OP, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 491 } 492 static inline u32 aa64_rorv(u32 sf, u32 Rd, u32 Rn, u32 Rm) { 493 return aa64_dp2_pack((AA64DP2){ 494 .sf = sf, .opcode = AA64_DP2_RORV_OP, .Rm = Rm, .Rn = Rn, .Rd = Rd}); 495 } 496 497 /* ==================================================================== 498 * Conditional select (CSEL / CSINC / CSINV / CSNEG) 499 * sf op S 11010100 Rm(5) cond(4) op2(2) Rn(5) Rd(5) 500 * 31 30 29 28..21 20..16 15..12 11..10 9..5 4..0 501 * 502 * The integer forms this backend emits keep S=0. Aliases such as CSET 503 * are descriptor-table rows over this same encoding family. */ 504 505 #define AA64_CONDSEL_FAMILY_MATCH 0x1A800000u 506 #define AA64_CONDSEL_FAMILY_MASK 0x1FE00000u /* bits 28:21 fixed */ 507 508 typedef struct AA64CondSel { 509 u32 sf, op, S, Rm, cond, op2, Rn, Rd; 510 } AA64CondSel; 511 512 static inline u32 aa64_condsel_pack(AA64CondSel f) { 513 return ((f.sf & 1u) << 31) | ((f.op & 1u) << 30) | ((f.S & 1u) << 29) | 514 AA64_CONDSEL_FAMILY_MATCH | ((f.Rm & 0x1fu) << 16) | 515 ((f.cond & 0xfu) << 12) | ((f.op2 & 3u) << 10) | 516 ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu); 517 } 518 519 static inline AA64CondSel aa64_condsel_unpack(u32 w) { 520 AA64CondSel f; 521 f.sf = (w >> 31) & 1u; 522 f.op = (w >> 30) & 1u; 523 f.S = (w >> 29) & 1u; 524 f.Rm = (w >> 16) & 0x1fu; 525 f.cond = (w >> 12) & 0xfu; 526 f.op2 = (w >> 10) & 3u; 527 f.Rn = (w >> 5) & 0x1fu; 528 f.Rd = w & 0x1fu; 529 return f; 530 } 531 532 static inline u32 aa64_csel_enc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) { 533 return aa64_condsel_pack((AA64CondSel){.sf = sf, 534 .op = 0, 535 .S = 0, 536 .Rm = Rm, 537 .cond = cond, 538 .op2 = 0, 539 .Rn = Rn, 540 .Rd = Rd}); 541 } 542 static inline u32 aa64_csinc_enc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) { 543 return aa64_condsel_pack((AA64CondSel){.sf = sf, 544 .op = 0, 545 .S = 0, 546 .Rm = Rm, 547 .cond = cond, 548 .op2 = 1, 549 .Rn = Rn, 550 .Rd = Rd}); 551 } 552 static inline u32 aa64_csinv_enc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) { 553 return aa64_condsel_pack((AA64CondSel){.sf = sf, 554 .op = 1, 555 .S = 0, 556 .Rm = Rm, 557 .cond = cond, 558 .op2 = 0, 559 .Rn = Rn, 560 .Rd = Rd}); 561 } 562 static inline u32 aa64_csneg_enc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) { 563 return aa64_condsel_pack((AA64CondSel){.sf = sf, 564 .op = 1, 565 .S = 0, 566 .Rm = Rm, 567 .cond = cond, 568 .op2 = 1, 569 .Rn = Rn, 570 .Rd = Rd}); 571 } 572 573 /* ==================================================================== 574 * Unconditional branch (register) — BR / BLR / RET 575 * 1101011 opc(4) op2(5)=11111 op3(6)=000000 Rn(5) op4(5)=00000 576 * 31..25 24..21 20..16 15..10 9..5 4..0 577 * ==================================================================== */ 578 579 #define AA64_BR_OP_BR 0u 580 #define AA64_BR_OP_BLR 1u 581 #define AA64_BR_OP_RET 2u 582 583 #define AA64_BR_REG_FAMILY_MATCH 0xD61F0000u 584 #define AA64_BR_REG_FAMILY_MASK \ 585 0xFE1FFC1Fu /* everything fixed except opc + Rn */ 586 587 typedef struct AA64BrReg { 588 u32 opc, Rn; 589 } AA64BrReg; 590 591 static inline u32 aa64_brreg_pack(AA64BrReg f) { 592 return AA64_BR_REG_FAMILY_MATCH | ((f.opc & 0xfu) << 21) | 593 ((f.Rn & 0x1fu) << 5); 594 } 595 596 static inline AA64BrReg aa64_brreg_unpack(u32 w) { 597 AA64BrReg f; 598 f.opc = (w >> 21) & 0xfu; 599 f.Rn = (w >> 5) & 0x1fu; 600 return f; 601 } 602 603 static inline u32 aa64_br(u32 Rn) { 604 return aa64_brreg_pack((AA64BrReg){.opc = AA64_BR_OP_BR, .Rn = Rn}); 605 } 606 static inline u32 aa64_blr(u32 Rn) { 607 return aa64_brreg_pack((AA64BrReg){.opc = AA64_BR_OP_BLR, .Rn = Rn}); 608 } 609 static inline u32 aa64_ret(u32 Rn) { 610 return aa64_brreg_pack((AA64BrReg){.opc = AA64_BR_OP_RET, .Rn = Rn}); 611 } 612 613 /* ==================================================================== 614 * PC-relative addressing (ADR / ADRP) 615 * op(1) immlo(2) 10000 immhi(19) Rd(5) 616 * 31 30..29 28..24 23..5 4..0 617 * 618 * op = 0 → ADR (PC + sign_extend(immhi:immlo)) 619 * op = 1 → ADRP (page(PC) + sign_extend(immhi:immlo) << 12) 620 * 621 * The two immediate halves stay split because the linker's 622 * R_AARCH64_ADR_PREL_PG_HI21 reloc patches them in place; keeping the 623 * field layout symmetric with the encoded word lets reloc-apply code 624 * reuse the same pack/unpack helpers. 625 * ==================================================================== */ 626 627 #define AA64_ADR_OP_ADR 0u 628 #define AA64_ADR_OP_ADRP 1u 629 630 #define AA64_PCREL_ADR_FAMILY_MATCH 0x10000000u 631 #define AA64_PCREL_ADR_FAMILY_MASK 0x1F000000u /* bits 28:24 */ 632 633 typedef struct AA64PCRelAdr { 634 u32 op, immlo, immhi, Rd; 635 } AA64PCRelAdr; 636 637 static inline u32 aa64_pcrel_adr_pack(AA64PCRelAdr f) { 638 return ((f.op & 1u) << 31) | ((f.immlo & 3u) << 29) | 639 AA64_PCREL_ADR_FAMILY_MATCH | ((f.immhi & 0x7ffffu) << 5) | 640 (f.Rd & 0x1fu); 641 } 642 643 static inline AA64PCRelAdr aa64_pcrel_adr_unpack(u32 w) { 644 AA64PCRelAdr f; 645 f.op = (w >> 31) & 1u; 646 f.immlo = (w >> 29) & 3u; 647 f.immhi = (w >> 5) & 0x7ffffu; 648 f.Rd = w & 0x1fu; 649 return f; 650 } 651 652 static inline u32 aa64_adrp(u32 Rd, u32 immlo, u32 immhi) { 653 return aa64_pcrel_adr_pack((AA64PCRelAdr){ 654 .op = AA64_ADR_OP_ADRP, .immlo = immlo, .immhi = immhi, .Rd = Rd}); 655 } 656 static inline u32 aa64_adr(u32 Rd, u32 immlo, u32 immhi) { 657 return aa64_pcrel_adr_pack((AA64PCRelAdr){ 658 .op = AA64_ADR_OP_ADR, .immlo = immlo, .immhi = immhi, .Rd = Rd}); 659 } 660 661 /* ==================================================================== 662 * Add/Sub, immediate (ADD / SUB / ADDS / SUBS, 12-bit imm with shift) 663 * sf op(1) S(1) 100010 sh(1) imm12(12) Rn(5) Rd(5) 664 * 31 30 29 28..23 22 21..10 9..5 4..0 665 * 666 * sh selects whether imm12 is left-shifted by 12. Used by PLT entries 667 * for `add x16, x16, #lo12(slot)` where sh=0 and imm12 = slot & 0xfff. 668 * ==================================================================== */ 669 670 #define AA64_ADDSUBIMM_FAMILY_MATCH 0x11000000u 671 #define AA64_ADDSUBIMM_FAMILY_MASK 0x1F000000u /* bits 28:24 */ 672 673 typedef struct AA64AddSubImm { 674 u32 sf, op, S, sh, imm12, Rn, Rd; 675 } AA64AddSubImm; 676 677 static inline u32 aa64_addsubimm_pack(AA64AddSubImm f) { 678 return ((f.sf & 1u) << 31) | ((f.op & 1u) << 30) | ((f.S & 1u) << 29) | 679 AA64_ADDSUBIMM_FAMILY_MATCH | ((f.sh & 1u) << 22) | 680 ((f.imm12 & 0xfffu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu); 681 } 682 683 static inline AA64AddSubImm aa64_addsubimm_unpack(u32 w) { 684 AA64AddSubImm f; 685 f.sf = (w >> 31) & 1u; 686 f.op = (w >> 30) & 1u; 687 f.S = (w >> 29) & 1u; 688 f.sh = (w >> 22) & 1u; 689 f.imm12 = (w >> 10) & 0xfffu; 690 f.Rn = (w >> 5) & 0x1fu; 691 f.Rd = w & 0x1fu; 692 return f; 693 } 694 695 static inline u32 aa64_add_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) { 696 return aa64_addsubimm_pack((AA64AddSubImm){ 697 .sf = sf, .op = 0, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd}); 698 } 699 static inline u32 aa64_sub_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) { 700 return aa64_addsubimm_pack((AA64AddSubImm){ 701 .sf = sf, .op = 1, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd}); 702 } 703 /* SUBS imm — sets flags. Used for CMP imm (Rd=ZR) and for branchless 704 * compares that feed CSET. The 12-bit-shifted form covers 0..0xFFFFF000 705 * stepped by 0x1000; cg_fold collapses literal-only compares upstream, 706 * so this encoder is reached for `x cmp const` and `if (x)` patterns. */ 707 static inline u32 aa64_subs_imm12(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) { 708 return aa64_addsubimm_pack((AA64AddSubImm){ 709 .sf = sf, .op = 1, .S = 1, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd}); 710 } 711 712 /* Predicate: does `imm` fit ADD/SUB/CMP's 12-bit immediate (optionally 713 * left-shifted by 12)? On success writes the encoded imm12 and sh and 714 * returns 1; on failure returns 0 and leaves outputs untouched. 715 * 716 * The encoding admits 0..4095 directly (sh=0) and multiples of 4096 up 717 * to 0xFFF000 (sh=1). Negative literals are rejected here — the caller 718 * (e.g. opt's machinize, or a smarter cg) is free to swap ADD ↔ SUB and 719 * retry with the negated literal; the bare predicate keeps the contract 720 * narrow. */ 721 static inline int aa64_addsub_imm_fits(i64 imm, u32* imm12_out, u32* sh_out) { 722 if (imm < 0) return 0; 723 u64 u = (u64)imm; 724 if (u <= 0xFFFu) { 725 *imm12_out = (u32)u; 726 *sh_out = 0; 727 return 1; 728 } 729 if ((u & 0xFFFu) == 0 && (u >> 12) <= 0xFFFu) { 730 *imm12_out = (u32)(u >> 12); 731 *sh_out = 1; 732 return 1; 733 } 734 return 0; 735 } 736 737 /* ==================================================================== 738 * Load/store, unsigned 12-bit immediate offset (LDR / STR, scaled) 739 * size(2) 111 V(1) 01 opc(2) imm12(12) Rn(5) Rt(5) 740 * 31..30 29..27 26 25..24 23..22 21..10 9..5 4..0 741 * 742 * size=11, V=0, opc=01 → LDR (64-bit, integer). imm12 is the byte 743 * offset divided by the access size (8 for LDR Xt), giving a 0..32760 744 * byte range. 745 * 746 * Only the LDR Xt form is needed by the linker today (PLT loads through 747 * x16/x17); the family encoders cover STR and the smaller widths so 748 * future callers can drop in without touching this header. 749 * ==================================================================== */ 750 751 #define AA64_LDST_SIZE_64 3u 752 #define AA64_LDST_OPC_STR 0u 753 #define AA64_LDST_OPC_LDR 1u 754 755 #define AA64_LDST_UIMM_FAMILY_MATCH 0x39000000u 756 #define AA64_LDST_UIMM_FAMILY_MASK 0x3B000000u /* bits 29:27 + bits 25:24 */ 757 758 typedef struct AA64LdStUimm { 759 u32 size, V, opc, imm12, Rn, Rt; 760 } AA64LdStUimm; 761 762 static inline u32 aa64_ldst_uimm_pack(AA64LdStUimm f) { 763 return ((f.size & 3u) << 30) | AA64_LDST_UIMM_FAMILY_MATCH | 764 ((f.V & 1u) << 26) | ((f.opc & 3u) << 22) | 765 ((f.imm12 & 0xfffu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); 766 } 767 768 static inline AA64LdStUimm aa64_ldst_uimm_unpack(u32 w) { 769 AA64LdStUimm f; 770 f.size = (w >> 30) & 3u; 771 f.V = (w >> 26) & 1u; 772 f.opc = (w >> 22) & 3u; 773 f.imm12 = (w >> 10) & 0xfffu; 774 f.Rn = (w >> 5) & 0x1fu; 775 f.Rt = w & 0x1fu; 776 return f; 777 } 778 779 /* LDR Xt, [Xn, #imm12_scaled]. imm12_scaled is the encoded field — 780 * callers pass `byte_offset >> 3` for the 64-bit form. */ 781 static inline u32 aa64_ldr64_uimm12(u32 Rt, u32 Rn, u32 imm12_scaled) { 782 return aa64_ldst_uimm_pack((AA64LdStUimm){.size = AA64_LDST_SIZE_64, 783 .V = 0, 784 .opc = AA64_LDST_OPC_LDR, 785 .imm12 = imm12_scaled, 786 .Rn = Rn, 787 .Rt = Rt}); 788 } 789 static inline u32 aa64_str64_uimm12(u32 Rt, u32 Rn, u32 imm12_scaled) { 790 return aa64_ldst_uimm_pack((AA64LdStUimm){.size = AA64_LDST_SIZE_64, 791 .V = 0, 792 .opc = AA64_LDST_OPC_STR, 793 .imm12 = imm12_scaled, 794 .Rn = Rn, 795 .Rt = Rt}); 796 } 797 798 /* ---- Scalar floating-point encoders ---- 799 * ftype: 0=single (Sn), 1=double (Dn), 3=half (Hn). The bit layouts match the 800 * FP_* decode rows in isa.c and the aa_* encoders in native.c, so encode and 801 * decode round-trip. The DP2/DP1 `op` and the FP_INT_CVT `opcode` are the 802 * named field values below. */ 803 #define AA64_FP_DP2_FMUL 0x0800u 804 #define AA64_FP_DP2_FDIV 0x1800u 805 #define AA64_FP_DP2_FADD 0x2800u 806 #define AA64_FP_DP2_FSUB 0x3800u 807 #define AA64_FP_DP2_FMAX 0x4800u 808 #define AA64_FP_DP2_FMIN 0x5800u 809 #define AA64_FP_DP2_FNMUL 0x8800u 810 #define AA64_FP_DP1_FMOV 0x4000u 811 #define AA64_FP_DP1_FABS 0xC000u 812 #define AA64_FP_DP1_FNEG 0x14000u 813 #define AA64_FP_DP1_FSQRT 0x1C000u 814 #define AA64_FP_ICVT_SCVTF 0x02u 815 #define AA64_FP_ICVT_UCVTF 0x03u 816 #define AA64_FP_ICVT_FCVTZS 0x18u 817 #define AA64_FP_ICVT_FCVTZU 0x19u 818 #define AA64_FP_ICVT_FMOV_TO_GPR 0x06u /* fmov Rd, Vn */ 819 #define AA64_FP_ICVT_FMOV_TO_FP 0x07u /* fmov Vd, Rn */ 820 821 static inline u32 aa64_fp_dp2(u32 ftype, u32 op, u32 Rd, u32 Rn, u32 Rm) { 822 return 0x1E200000u | ((ftype & 3u) << 22) | op | ((Rm & 0x1fu) << 16) | 823 ((Rn & 0x1fu) << 5) | (Rd & 0x1fu); 824 } 825 static inline u32 aa64_fp_dp1(u32 ftype, u32 op, u32 Rd, u32 Rn) { 826 return 0x1E200000u | ((ftype & 3u) << 22) | op | ((Rn & 0x1fu) << 5) | 827 (Rd & 0x1fu); 828 } 829 static inline u32 aa64_fcmp_reg(u32 ftype, u32 Rn, u32 Rm) { 830 return 0x1E202000u | ((ftype & 3u) << 22) | ((Rm & 0x1fu) << 16) | 831 ((Rn & 0x1fu) << 5); 832 } 833 static inline u32 aa64_fcvt_prec(u32 src_ftype, u32 dst_ftype, u32 Rd, u32 Rn) { 834 return 0x1E204000u | ((src_ftype & 3u) << 22) | (1u << 17) | 835 ((dst_ftype & 3u) << 15) | ((Rn & 0x1fu) << 5) | (Rd & 0x1fu); 836 } 837 static inline u32 aa64_fp_int_cvt(u32 sf, u32 ftype, u32 opcode, u32 Rd, 838 u32 Rn) { 839 return ((sf & 1u) << 31) | 0x1E200000u | ((ftype & 3u) << 22) | 840 ((opcode & 0x1fu) << 16) | ((Rn & 0x1fu) << 5) | (Rd & 0x1fu); 841 } 842 843 /* Bitfield move (opc: 0=SBFM, 1=BFM, 2=UBFM). The N bit tracks sf for the 844 * 32-/64-bit forms. Matches native.c aa_sbfm/aa_ubfm and the BITFIELD row. */ 845 static inline u32 aa64_bitfield(u32 sf, u32 opc, u32 immr, u32 imms, u32 Rd, 846 u32 Rn) { 847 return ((sf & 1u) << 31) | ((opc & 3u) << 29) | 0x13000000u | 848 ((sf & 1u) << 22) | ((immr & 0x3fu) << 16) | ((imms & 0x3fu) << 10) | 849 ((Rn & 0x1fu) << 5) | (Rd & 0x1fu); 850 } 851 852 /* Data-processing (1 source). opcode2 (bits[15:10]): RBIT=0, REV16=1, 853 * REV(32)=2, REV(64)=3, CLZ=4. Matches native.c aa_clz/aa_rbit/aa_rev. */ 854 #define AA64_DP1_RBIT 0x00u 855 #define AA64_DP1_REV16 0x01u 856 #define AA64_DP1_REV32 0x02u 857 #define AA64_DP1_REV64 0x03u 858 #define AA64_DP1_CLZ 0x04u 859 static inline u32 aa64_dp1(u32 sf, u32 opcode2, u32 Rd, u32 Rn) { 860 return ((sf & 1u) << 31) | 0x5AC00000u | ((opcode2 & 0x3fu) << 10) | 861 ((Rn & 0x1fu) << 5) | (Rd & 0x1fu); 862 } 863 864 /* ==================================================================== 865 * Load/store register pair, pre-indexed (STP / LDP, 64-bit form) 866 * opc(2) 101 V(1) 010 L(1) imm7(7) Rt2(5) Rn(5) Rt(5) 867 * 31..30 29..27 26 25..23 22 21..15 14..10 9..5 4..0 868 * 869 * 64-bit integer form fixes opc=10, V=0. L=0 → STP, L=1 → LDP. 870 * imm7 is a signed 7-bit value scaled by 8 (for the 64-bit form): the 871 * encoded field equals `byte_offset / 8`. Callers pass the scaled 872 * value already; the helper masks to 7 bits to handle negative inputs 873 * sign-extended in i32. 874 * ==================================================================== */ 875 876 #define AA64_LDSTP_PRE_FAMILY_MATCH 0x29800000u 877 #define AA64_LDSTP_PRE_FAMILY_MASK 0x7FC00000u /* bits 30:23 */ 878 879 typedef struct AA64LdStPPre { 880 u32 opc, V, L, imm7, Rt2, Rn, Rt; 881 } AA64LdStPPre; 882 883 static inline u32 aa64_ldstp_pre_pack(AA64LdStPPre f) { 884 return ((f.opc & 3u) << 30) | AA64_LDSTP_PRE_FAMILY_MATCH | 885 ((f.V & 1u) << 26) | ((f.L & 1u) << 22) | ((f.imm7 & 0x7fu) << 15) | 886 ((f.Rt2 & 0x1fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); 887 } 888 889 static inline AA64LdStPPre aa64_ldstp_pre_unpack(u32 w) { 890 AA64LdStPPre f; 891 f.opc = (w >> 30) & 3u; 892 f.V = (w >> 26) & 1u; 893 f.L = (w >> 22) & 1u; 894 f.imm7 = (w >> 15) & 0x7fu; 895 f.Rt2 = (w >> 10) & 0x1fu; 896 f.Rn = (w >> 5) & 0x1fu; 897 f.Rt = w & 0x1fu; 898 return f; 899 } 900 901 /* STP Xt, Xt2, [Xn, #imm7_scaled]! — opc=10 selects the 64-bit form. 902 * imm7_scaled is `byte_offset / 8`; callers pass it pre-scaled (e.g. 903 * -2 for [sp, #-16]!). */ 904 static inline u32 aa64_stp64_pre(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) { 905 return aa64_ldstp_pre_pack((AA64LdStPPre){.opc = 2, 906 .V = 0, 907 .L = 0, 908 .imm7 = (u32)imm7_scaled & 0x7fu, 909 .Rt2 = Rt2, 910 .Rn = Rn, 911 .Rt = Rt}); 912 } 913 static inline u32 aa64_ldp64_pre(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) { 914 return aa64_ldstp_pre_pack((AA64LdStPPre){.opc = 2, 915 .V = 0, 916 .L = 1, 917 .imm7 = (u32)imm7_scaled & 0x7fu, 918 .Rt2 = Rt2, 919 .Rn = Rn, 920 .Rt = Rt}); 921 } 922 923 /* Post-indexed STP/LDP — same field layout as the pre-indexed form, only 924 * bits[25:23] differ (001 vs 011); reuse AA64LdStPPre. Used for the slim 925 * prologue's epilogue restore: `ldp x29,x30,[sp],#16`. */ 926 #define AA64_LDSTP_POST_FAMILY_MATCH 0x28800000u 927 #define AA64_LDSTP_POST_FAMILY_MASK 0x7FC00000u /* bits 30:23 */ 928 929 static inline u32 aa64_ldstp_post_pack(AA64LdStPPre f) { 930 return ((f.opc & 3u) << 30) | AA64_LDSTP_POST_FAMILY_MATCH | 931 ((f.V & 1u) << 26) | ((f.L & 1u) << 22) | ((f.imm7 & 0x7fu) << 15) | 932 ((f.Rt2 & 0x1fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); 933 } 934 935 static inline u32 aa64_stp64_post(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) { 936 return aa64_ldstp_post_pack((AA64LdStPPre){.opc = 2, 937 .V = 0, 938 .L = 0, 939 .imm7 = (u32)imm7_scaled & 0x7fu, 940 .Rt2 = Rt2, 941 .Rn = Rn, 942 .Rt = Rt}); 943 } 944 static inline u32 aa64_ldp64_post(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) { 945 return aa64_ldstp_post_pack((AA64LdStPPre){.opc = 2, 946 .V = 0, 947 .L = 1, 948 .imm7 = (u32)imm7_scaled & 0x7fu, 949 .Rt2 = Rt2, 950 .Rn = Rn, 951 .Rt = Rt}); 952 } 953 954 /* ==================================================================== 955 * Hint instructions (NOP / YIELD / WFE / WFI / SEV / SEVL) 956 * 1101 0101 0000 0011 0010 CRm(4) op2(3) 11111 957 * 31..16 15..12 11..8 7..5 4..0 958 * 959 * NOP encodes CRm=0, op2=0 → 0xD503201F. The full hint family lives 960 * inside the system-instruction space; we only expose NOP today since 961 * that's the only slot the linker fills. 962 * ==================================================================== */ 963 964 #define AA64_HINT_FAMILY_MATCH 0xD503201Fu 965 #define AA64_HINT_FAMILY_MASK 0xFFFFF01Fu /* CRm + op2 vary */ 966 967 /* HINT #N with CRm=0: op2 selects the variant. */ 968 #define AA64_HINT_OP_NOP 0u /* CRm=0, op2=0 */ 969 #define AA64_HINT_OP_YIELD 1u /* CRm=0, op2=1 */ 970 #define AA64_HINT_OP_WFE 2u /* CRm=0, op2=2 */ 971 #define AA64_HINT_OP_WFI 3u /* CRm=0, op2=3 */ 972 #define AA64_HINT_OP_SEV 4u /* CRm=0, op2=4 */ 973 #define AA64_HINT_OP_SEVL 5u /* CRm=0, op2=5 */ 974 975 typedef struct AA64Hint { 976 u32 CRm, op2; 977 } AA64Hint; 978 979 static inline u32 aa64_hint_pack(AA64Hint f) { 980 return AA64_HINT_FAMILY_MATCH | ((f.CRm & 0xfu) << 8) | ((f.op2 & 7u) << 5); 981 } 982 983 static inline u32 aa64_hint(u32 op2) { 984 return aa64_hint_pack((AA64Hint){.CRm = 0, .op2 = op2}); 985 } 986 987 static inline AA64Hint aa64_hint_unpack(u32 w) { 988 AA64Hint f; 989 f.CRm = (w >> 8) & 0xfu; 990 f.op2 = (w >> 5) & 7u; 991 return f; 992 } 993 994 static inline u32 aa64_nop(void) { 995 return aa64_hint_pack((AA64Hint){.CRm = 0, .op2 = AA64_HINT_OP_NOP}); 996 } 997 998 /* ==================================================================== 999 * Memory barriers (DMB / DSB / ISB / CLREX) 1000 * 1101 0101 0000 0011 0011 CRm(4) op2(3) 11111 1001 * 31..16 15..12 11..8 7..5 4..0 1002 * 1003 * Shared encoding family with HINT (which uses bits[15:12]=0010); 1004 * barriers use bits[15:12]=0011. op2 selects the specific instruction: 1005 * CLREX=010 DSB=100 DMB=101 ISB=110 1006 * CRm is the option / domain (SY=15, ISH=11, NSH=7, OSH=3, ...). 1007 * ==================================================================== */ 1008 1009 #define AA64_BARRIER_FAMILY_MATCH 0xD503301Fu 1010 #define AA64_BARRIER_FAMILY_MASK 0xFFFFF01Fu /* CRm + op2 vary */ 1011 1012 #define AA64_BARRIER_OP2_CLREX 2u 1013 #define AA64_BARRIER_OP2_DSB 4u 1014 #define AA64_BARRIER_OP2_DMB 5u 1015 #define AA64_BARRIER_OP2_ISB 6u 1016 1017 /* Common CRm option encodings (ARM ARM C5.1.42). */ 1018 #define AA64_BARRIER_OPT_OSHLD 1u 1019 #define AA64_BARRIER_OPT_OSHST 2u 1020 #define AA64_BARRIER_OPT_OSH 3u 1021 #define AA64_BARRIER_OPT_NSHLD 5u 1022 #define AA64_BARRIER_OPT_NSHST 6u 1023 #define AA64_BARRIER_OPT_NSH 7u 1024 #define AA64_BARRIER_OPT_ISHLD 9u 1025 #define AA64_BARRIER_OPT_ISHST 10u 1026 #define AA64_BARRIER_OPT_ISH 11u 1027 #define AA64_BARRIER_OPT_LD 13u 1028 #define AA64_BARRIER_OPT_ST 14u 1029 #define AA64_BARRIER_OPT_SY 15u 1030 1031 typedef struct AA64Barrier { 1032 u32 CRm, op2; 1033 } AA64Barrier; 1034 1035 static inline u32 aa64_barrier_pack(AA64Barrier f) { 1036 return AA64_BARRIER_FAMILY_MATCH | ((f.CRm & 0xfu) << 8) | 1037 ((f.op2 & 7u) << 5); 1038 } 1039 1040 static inline AA64Barrier aa64_barrier_unpack(u32 w) { 1041 AA64Barrier f; 1042 f.CRm = (w >> 8) & 0xfu; 1043 f.op2 = (w >> 5) & 7u; 1044 return f; 1045 } 1046 1047 static inline u32 aa64_dmb(u32 opt) { 1048 return aa64_barrier_pack( 1049 (AA64Barrier){.CRm = opt, .op2 = AA64_BARRIER_OP2_DMB}); 1050 } 1051 static inline u32 aa64_dsb(u32 opt) { 1052 return aa64_barrier_pack( 1053 (AA64Barrier){.CRm = opt, .op2 = AA64_BARRIER_OP2_DSB}); 1054 } 1055 static inline u32 aa64_isb(u32 opt) { 1056 return aa64_barrier_pack( 1057 (AA64Barrier){.CRm = opt, .op2 = AA64_BARRIER_OP2_ISB}); 1058 } 1059 static inline u32 aa64_clrex(u32 opt) { 1060 return aa64_barrier_pack( 1061 (AA64Barrier){.CRm = opt, .op2 = AA64_BARRIER_OP2_CLREX}); 1062 } 1063 1064 /* ==================================================================== 1065 * Interrupt-mask (DAIF) system register access. Used by the IRQ-control 1066 * intrinsics; privileged at EL0. Only the encodings the backend emits live 1067 * here (they are not registered in the disassembler's mnemonic table). 1068 * MRS Xt, DAIF : 1101 0101 0011 1011 0100 0010 000 Rt -> 0xD53B4200|Rt 1069 * MSR DAIF, Xt : 1101 0101 0001 1011 0100 0010 000 Rt -> 0xD51B4200|Rt 1070 * MSR DAIFSet, #imm4 : op1=011, op2=110 -> 0xD50340DF | (imm4 << 8) 1071 * MSR DAIFClr, #imm4 : op1=011, op2=111 -> 0xD50340FF | (imm4 << 8) 1072 * imm4 = 0xF masks/unmasks D,A,I,F together. ==================== */ 1073 #define AA64_DAIF_ALL 0xfu 1074 1075 static inline u32 aa64_mrs_daif(u32 rt) { return 0xD53B4200u | (rt & 0x1fu); } 1076 static inline u32 aa64_msr_daif(u32 rt) { return 0xD51B4200u | (rt & 0x1fu); } 1077 static inline u32 aa64_msr_daifset(u32 imm4) { 1078 return 0xD50340DFu | ((imm4 & 0xfu) << 8); 1079 } 1080 static inline u32 aa64_msr_daifclr(u32 imm4) { 1081 return 0xD50340FFu | ((imm4 & 0xfu) << 8); 1082 } 1083 1084 /* ==================================================================== 1085 * Generic system-register move (MRS/MSR register form). A named system 1086 * register is the 15-bit selector op0:op1:CRn:CRm:op2 (op0's high bit is 1087 * fixed by the encoding, so only its low bit is a field). 1088 * MRS Xt, <sysreg> : 1101 0101 0 0 1 op0lo op1 CRn CRm op2 Rt (read, L=1) 1089 * MSR <sysreg>, Xt : 1101 0101 0 0 0 op0lo op1 CRn CRm op2 Rt (write, L=0) 1090 * e.g. TPIDR_EL0 = (op0=3,op1=3,CRn=13,CRm=0,op2=2): 1091 * MSR TPIDR_EL0, X0 -> 0xd51bd040 ; MRS X0, TPIDR_EL0 -> 0xd53bd040. */ 1092 static inline u32 aa64_sysreg_move(int is_read, u32 op0, u32 op1, u32 crn, 1093 u32 crm, u32 op2, u32 rt) { 1094 return 0xd5000000u | (is_read ? (1u << 21) : 0u) | ((op0 & 3u) << 19) | 1095 ((op1 & 7u) << 16) | ((crn & 0xfu) << 12) | ((crm & 0xfu) << 8) | 1096 ((op2 & 7u) << 5) | (rt & 0x1fu); 1097 } 1098 1099 /* System-register move encoding family: MRS (read, L=1) and MSR (write, 1100 * L=0). The disassembler matches these; the register selector op0:op1:CRn: 1101 * CRm:op2 and Rt are decoded from the word. op0's high bit is fixed (bit 1102 * 20), so the mask pins bits[31:20] and leaves op0lo/op1/CRn/CRm/op2/Rt. */ 1103 #define AA64_MRS_MATCH 0xd5300000u 1104 #define AA64_MSR_MATCH 0xd5100000u 1105 #define AA64_SYSREG_MOVE_MASK 0xfff00000u 1106 1107 /* Shared system-register name table (single source for the assembler's 1108 * name->selector parse and the disassembler's selector->name print). */ 1109 typedef struct AA64SysRegName { 1110 const char* name; 1111 u8 op0, op1, crn, crm, op2; 1112 } AA64SysRegName; 1113 1114 /* Resolve a system-register name (case-insensitive, length n) to its five 1115 * selector fields. Returns 1 on a hit, 0 otherwise. */ 1116 int aa64_sysreg_by_name(const char* s, size_t n, u32* op0, u32* op1, u32* crn, 1117 u32* crm, u32* op2); 1118 1119 /* Reverse lookup: canonical lowercase name for a selector, or NULL when the 1120 * selector is not in the table (the caller prints the generic Sx_x_Cx_Cx_x 1121 * spelling instead). */ 1122 const char* aa64_sysreg_name(u32 op0, u32 op1, u32 crn, u32 crm, u32 op2); 1123 1124 /* ==================================================================== 1125 * Load/store pair, signed-offset (STP / LDP, no pre/post-increment). 1126 * opc(2) 101 V(1) 010 L(1) imm7 Rt2 Rn Rt (bit 23 = 0) 1127 * 1128 * Mirrors the LDSTP_PRE format with bit 23 cleared; the field layout is 1129 * otherwise identical and the pack/unpack helpers above are reused for 1130 * pre/post/sign-offset via different family-match constants. Codegen 1131 * emits both X (opc=10) and FP-D (opc=01, V=1) variants for callee-save 1132 * spill/reload (`stp x29,x30,[sp,#16]`, `stp d8,d9,[sp,#32]`). */ 1133 1134 #define AA64_LDSTP_SOFF_FAMILY_MATCH 0x29000000u 1135 #define AA64_LDSTP_SOFF_FAMILY_MASK 0x7FC00000u /* bits 30:23 (bit 23 = 0) */ 1136 1137 typedef AA64LdStPPre AA64LdStPSOff; 1138 1139 static inline u32 aa64_ldstp_soff_pack(AA64LdStPSOff f) { 1140 return ((f.opc & 3u) << 30) | AA64_LDSTP_SOFF_FAMILY_MATCH | 1141 ((f.V & 1u) << 26) | ((f.L & 1u) << 22) | ((f.imm7 & 0x7fu) << 15) | 1142 ((f.Rt2 & 0x1fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); 1143 } 1144 1145 static inline AA64LdStPSOff aa64_ldstp_soff_unpack(u32 w) { 1146 AA64LdStPSOff f; 1147 f.opc = (w >> 30) & 3u; 1148 f.V = (w >> 26) & 1u; 1149 f.L = (w >> 22) & 1u; 1150 f.imm7 = (w >> 15) & 0x7fu; 1151 f.Rt2 = (w >> 10) & 0x1fu; 1152 f.Rn = (w >> 5) & 0x1fu; 1153 f.Rt = w & 0x1fu; 1154 return f; 1155 } 1156 1157 /* 64-bit integer STP/LDP, signed offset (no writeback). imm7_scaled is 1158 * byte_offset / 8. Used for the prologue/epilogue frame record and callee-save 1159 * pairs, which address off a fixed base (x17 / fp). */ 1160 static inline u32 aa64_stp64_soff(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) { 1161 return aa64_ldstp_soff_pack((AA64LdStPSOff){.opc = 2, 1162 .V = 0, 1163 .L = 0, 1164 .imm7 = (u32)imm7_scaled & 0x7fu, 1165 .Rt2 = Rt2, 1166 .Rn = Rn, 1167 .Rt = Rt}); 1168 } 1169 static inline u32 aa64_ldp64_soff(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) { 1170 return aa64_ldstp_soff_pack((AA64LdStPSOff){.opc = 2, 1171 .V = 0, 1172 .L = 1, 1173 .imm7 = (u32)imm7_scaled & 0x7fu, 1174 .Rt2 = Rt2, 1175 .Rn = Rn, 1176 .Rt = Rt}); 1177 } 1178 1179 /* ==================================================================== 1180 * Load/store, unscaled 9-bit signed offset (LDUR / STUR, V=0 and V=1). 1181 * size(2) 111 V(1) 00 opc(2) 0 imm9(9) 00 Rn(5) Rt(5) 1182 * 31..30 29..27 26 25..24 23..22 21 20..12 11..10 9..5 4..0 1183 * 1184 * size: 00=B, 01=H, 10=W, 11=X (V=0) — D when V=1 selects FP/SIMD. 1185 * opc: 00=STR, 01=LDR (sign-extension variants set opc bit 1 for the 1186 * smaller widths; not used by codegen today). */ 1187 1188 #define AA64_LDST_SIMM9_FAMILY_MATCH 0x38000000u 1189 /* bits 29:27 (=111) + bits 25:24 (=00) + bits 11:10 (=00). size, V, opc, 1190 * imm9, Rn, Rt all vary; bit 21 is fixed 0 for this variant. */ 1191 #define AA64_LDST_SIMM9_FAMILY_MASK 0x3B200C00u 1192 1193 typedef struct AA64LdStSimm9 { 1194 u32 size, V, opc, imm9, Rn, Rt; 1195 } AA64LdStSimm9; 1196 1197 static inline u32 aa64_ldst_simm9_pack(AA64LdStSimm9 f) { 1198 return ((f.size & 3u) << 30) | AA64_LDST_SIMM9_FAMILY_MATCH | 1199 ((f.V & 1u) << 26) | ((f.opc & 3u) << 22) | ((f.imm9 & 0x1ffu) << 12) | 1200 ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); 1201 } 1202 1203 static inline AA64LdStSimm9 aa64_ldst_simm9_unpack(u32 w) { 1204 AA64LdStSimm9 f; 1205 f.size = (w >> 30) & 3u; 1206 f.V = (w >> 26) & 1u; 1207 f.opc = (w >> 22) & 3u; 1208 f.imm9 = (w >> 12) & 0x1ffu; 1209 f.Rn = (w >> 5) & 0x1fu; 1210 f.Rt = w & 0x1fu; 1211 return f; 1212 } 1213 1214 /* ==================================================================== 1215 * Load/store, register offset (LDR/STR Rt,[Xn,Rm{,extend{#s}}]). 1216 * size(2) 111 V(1) 00 opc(2) 1 Rm(5) option(3) S(1) 10 Rn(5) Rt(5) 1217 * 31..30 29..27 26 25..24 23..22 21 20..16 15..13 12 11..10 9..5 4..0 1218 * 1219 * option selects the index extend: 010=UXTW, 011=LSL/UXTX, 110=SXTW, 1220 * 111=SXTX. S=1 scales the index by the access size (log2 = size); S=0 1221 * leaves it unscaled. opc/size match the uimm12 form. */ 1222 1223 #define AA64_LDST_REGOFF_FAMILY_MATCH 0x38200800u 1224 /* bits 29:27 (=111), 25:24 (=00), 21 (=1), 11:10 (=10). */ 1225 #define AA64_LDST_REGOFF_FAMILY_MASK 0x3B200C00u 1226 1227 /* Index-extend option encodings. */ 1228 #define AA64_LDST_OPTION_UXTW 2u 1229 #define AA64_LDST_OPTION_LSL 3u /* a.k.a. UXTX for 64-bit index */ 1230 #define AA64_LDST_OPTION_SXTW 6u 1231 #define AA64_LDST_OPTION_SXTX 7u 1232 1233 typedef struct AA64LdStRegOff { 1234 u32 size, V, opc, Rm, option, S, Rn, Rt; 1235 } AA64LdStRegOff; 1236 1237 static inline u32 aa64_ldst_regoff_pack(AA64LdStRegOff f) { 1238 return ((f.size & 3u) << 30) | AA64_LDST_REGOFF_FAMILY_MATCH | 1239 ((f.V & 1u) << 26) | ((f.opc & 3u) << 22) | ((f.Rm & 0x1fu) << 16) | 1240 ((f.option & 7u) << 13) | ((f.S & 1u) << 12) | ((f.Rn & 0x1fu) << 5) | 1241 (f.Rt & 0x1fu); 1242 } 1243 1244 static inline AA64LdStRegOff aa64_ldst_regoff_unpack(u32 w) { 1245 AA64LdStRegOff f; 1246 f.size = (w >> 30) & 3u; 1247 f.V = (w >> 26) & 1u; 1248 f.opc = (w >> 22) & 3u; 1249 f.Rm = (w >> 16) & 0x1fu; 1250 f.option = (w >> 13) & 7u; 1251 f.S = (w >> 12) & 1u; 1252 f.Rn = (w >> 5) & 0x1fu; 1253 f.Rt = w & 0x1fu; 1254 return f; 1255 } 1256 1257 /* ==================================================================== 1258 * Load/store, immediate pre/post-index (writeback). 1259 * size(2) 111 V(1) 00 opc(2) 0 imm9(9) idx(2) Rn(5) Rt(5) 1260 * 31..30 29..27 26 25..24 23..22 21 20..12 11..10 9..5 4..0 1261 * 1262 * idx (bits[11:10]) selects: 00=unscaled (LDUR, no writeback — see the 1263 * SIMM9 helpers above), 01=post-index, 11=pre-index. imm9 is the 1264 * unscaled signed byte offset (-256..255). */ 1265 1266 #define AA64_LDST_IDX_POST 1u 1267 #define AA64_LDST_IDX_PRE 3u 1268 1269 typedef struct AA64LdStWBack { 1270 u32 size, V, opc, imm9, idx, Rn, Rt; 1271 } AA64LdStWBack; 1272 1273 static inline u32 aa64_ldst_wback_pack(AA64LdStWBack f) { 1274 return ((f.size & 3u) << 30) | AA64_LDST_SIMM9_FAMILY_MATCH | 1275 ((f.V & 1u) << 26) | ((f.opc & 3u) << 22) | ((f.imm9 & 0x1ffu) << 12) | 1276 ((f.idx & 3u) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); 1277 } 1278 1279 static inline AA64LdStWBack aa64_ldst_wback_unpack(u32 w) { 1280 AA64LdStWBack f; 1281 f.size = (w >> 30) & 3u; 1282 f.V = (w >> 26) & 1u; 1283 f.opc = (w >> 22) & 3u; 1284 f.imm9 = (w >> 12) & 0x1ffu; 1285 f.idx = (w >> 10) & 3u; 1286 f.Rn = (w >> 5) & 0x1fu; 1287 f.Rt = w & 0x1fu; 1288 return f; 1289 } 1290 1291 /* ==================================================================== 1292 * Load/store exclusive (LDXR/STXR + acquire/release variants). 1293 * size(2) 001000 o2(1) L(1) o1(1) Rs(5) o0(1) Rt2(5) Rn(5) Rt(5) 1294 * 31..30 29..24 23 22 21 20..16 15 14..10 9..5 4..0 1295 * 1296 * size: 00=byte,01=half,10=word,11=dword. o1=0 for the LDXR/STXR 1297 * single-register family (CAS sets o1=1 via the CAS pack below). 1298 * LDXR: L=1 o0=0 o2=0 STXR: L=0 o0=0 o2=0 1299 * LDAXR: L=1 o0=1 o2=0 STLXR: L=0 o0=1 o2=0 1300 * LDAR: L=1 o0=1 o2=1 STLR: L=0 o0=1 o2=1 1301 * For LDXR/LDAXR/LDAR/STLR, Rs and Rt2 are unused (encode 11111). */ 1302 1303 #define AA64_LDSTEX_FAMILY_MATCH 0x08000000u 1304 /* bits 29:24 (=001000). */ 1305 #define AA64_LDSTEX_FAMILY_MASK 0x3F000000u 1306 1307 typedef struct AA64LdStEx { 1308 u32 size, o2, L, o1, Rs, o0, Rt2, Rn, Rt; 1309 } AA64LdStEx; 1310 1311 static inline u32 aa64_ldstex_pack(AA64LdStEx f) { 1312 return ((f.size & 3u) << 30) | AA64_LDSTEX_FAMILY_MATCH | 1313 ((f.o2 & 1u) << 23) | ((f.L & 1u) << 22) | ((f.o1 & 1u) << 21) | 1314 ((f.Rs & 0x1fu) << 16) | ((f.o0 & 1u) << 15) | 1315 ((f.Rt2 & 0x1fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu); 1316 } 1317 1318 static inline AA64LdStEx aa64_ldstex_unpack(u32 w) { 1319 AA64LdStEx f; 1320 f.size = (w >> 30) & 3u; 1321 f.o2 = (w >> 23) & 1u; 1322 f.L = (w >> 22) & 1u; 1323 f.o1 = (w >> 21) & 1u; 1324 f.Rs = (w >> 16) & 0x1fu; 1325 f.o0 = (w >> 15) & 1u; 1326 f.Rt2 = (w >> 10) & 0x1fu; 1327 f.Rn = (w >> 5) & 0x1fu; 1328 f.Rt = w & 0x1fu; 1329 return f; 1330 } 1331 1332 /* ==================================================================== 1333 * Compare and swap (CAS / CASA / CASL / CASAL + b/h variants, LSE). 1334 * size(2) 001000 1 L(1) 1 Rs(5) o0(1) 11111 Rn(5) Rt(5) 1335 * 31..30 29..23 . 22 . 20..16 15 14..10 9..5 4..0 1336 * 1337 * CAS: L=0 o0=0 CASA: L=1 o0=0 1338 * CASL: L=0 o0=1 CASAL: L=1 o0=1 1339 * Rt2 (bits[14:10]) is fixed at 11111. */ 1340 1341 #define AA64_CAS_FAMILY_MATCH 0x08a07c00u 1342 /* bits 29:24 (=001000), 23 (=1), 21 (=1), 14:10 (=11111). */ 1343 #define AA64_CAS_FAMILY_MASK 0x3Fa0fc00u 1344 1345 typedef struct AA64Cas { 1346 u32 size, L, Rs, o0, Rn, Rt; 1347 } AA64Cas; 1348 1349 static inline u32 aa64_cas_pack(AA64Cas f) { 1350 return ((f.size & 3u) << 30) | AA64_CAS_FAMILY_MATCH | ((f.L & 1u) << 22) | 1351 ((f.Rs & 0x1fu) << 16) | ((f.o0 & 1u) << 15) | ((f.Rn & 0x1fu) << 5) | 1352 (f.Rt & 0x1fu); 1353 } 1354 1355 static inline AA64Cas aa64_cas_unpack(u32 w) { 1356 AA64Cas f; 1357 f.size = (w >> 30) & 3u; 1358 f.L = (w >> 22) & 1u; 1359 f.Rs = (w >> 16) & 0x1fu; 1360 f.o0 = (w >> 15) & 1u; 1361 f.Rn = (w >> 5) & 0x1fu; 1362 f.Rt = w & 0x1fu; 1363 return f; 1364 } 1365 1366 /* ==================================================================== 1367 * LSE atomic memory operations (SWP / LDADD / LDCLR / LDEOR / LDSET + 1368 * acquire/release variants and b/h widths). 1369 * size(2) 111 V(1) 00 A(1) R(1) 1 Rs(5) o3(1) opc(3) 00 Rn(5) Rt(5) 1370 * 31..30 29..27 26 25..24 23 22 21 20..16 15 14..12 11..10 9..5 4..0 1371 * 1372 * A=acquire (a-suffix), R=release (l-suffix). o3=1 selects SWP (opc=000); 1373 * o3=0 with opc in {000=LDADD,001=LDCLR,010=LDEOR,011=LDSET}. */ 1374 1375 #define AA64_LSE_ATOMIC_FAMILY_MATCH 0x38200000u 1376 /* bits 29:27 (=111), 25:24 (=00), 21 (=1), 11:10 (=00). */ 1377 #define AA64_LSE_ATOMIC_FAMILY_MASK 0x3B200C00u 1378 1379 #define AA64_LSE_OPC_LDADD 0u 1380 #define AA64_LSE_OPC_LDCLR 1u 1381 #define AA64_LSE_OPC_LDEOR 2u 1382 #define AA64_LSE_OPC_LDSET 3u 1383 #define AA64_LSE_OPC_SWP 0u /* paired with o3=1 */ 1384 1385 typedef struct AA64LseAtomic { 1386 u32 size, A, R, Rs, o3, opc, Rn, Rt; 1387 } AA64LseAtomic; 1388 1389 static inline u32 aa64_lse_atomic_pack(AA64LseAtomic f) { 1390 return ((f.size & 3u) << 30) | AA64_LSE_ATOMIC_FAMILY_MATCH | 1391 ((f.A & 1u) << 23) | ((f.R & 1u) << 22) | ((f.Rs & 0x1fu) << 16) | 1392 ((f.o3 & 1u) << 15) | ((f.opc & 7u) << 12) | ((f.Rn & 0x1fu) << 5) | 1393 (f.Rt & 0x1fu); 1394 } 1395 1396 static inline AA64LseAtomic aa64_lse_atomic_unpack(u32 w) { 1397 AA64LseAtomic f; 1398 f.size = (w >> 30) & 3u; 1399 f.A = (w >> 23) & 1u; 1400 f.R = (w >> 22) & 1u; 1401 f.Rs = (w >> 16) & 0x1fu; 1402 f.o3 = (w >> 15) & 1u; 1403 f.opc = (w >> 12) & 7u; 1404 f.Rn = (w >> 5) & 0x1fu; 1405 f.Rt = w & 0x1fu; 1406 return f; 1407 } 1408 1409 /* ==================================================================== 1410 * Unconditional branch (immediate) — B / BL 1411 * op(1) 00101 imm26(26) 1412 * 31 30..26 25..0 1413 * 1414 * op=0 → B, op=1 → BL. imm26 is a signed 26-bit word displacement 1415 * (multiply by 4 to get byte offset). Codegen emits with imm26=0 paired 1416 * with a JUMP26 / CALL26 relocation. */ 1417 1418 #define AA64_BR_IMM_FAMILY_MATCH 0x14000000u 1419 #define AA64_BR_IMM_FAMILY_MASK 0x7C000000u /* bits 30:26 (=00101) */ 1420 1421 typedef struct AA64BrImm { 1422 u32 op, imm26; 1423 } AA64BrImm; 1424 1425 static inline u32 aa64_brimm_pack(AA64BrImm f) { 1426 return ((f.op & 1u) << 31) | AA64_BR_IMM_FAMILY_MATCH | 1427 (f.imm26 & 0x3ffffffu); 1428 } 1429 1430 static inline AA64BrImm aa64_brimm_unpack(u32 w) { 1431 AA64BrImm f; 1432 f.op = (w >> 31) & 1u; 1433 f.imm26 = w & 0x3ffffffu; 1434 return f; 1435 } 1436 1437 static inline u32 aa64_b(u32 imm26) { 1438 return aa64_brimm_pack((AA64BrImm){.op = 0, .imm26 = imm26}); 1439 } 1440 static inline u32 aa64_bl(u32 imm26) { 1441 return aa64_brimm_pack((AA64BrImm){.op = 1, .imm26 = imm26}); 1442 } 1443 1444 /* ==================================================================== 1445 * Conditional branch (immediate) — B.cond 1446 * 0101 0100 imm19(19) 0 cond(4) 1447 * 31..24 23..5 4 3..0 1448 * 1449 * imm19 is a signed 19-bit word displacement; cond is the 4-bit ARM 1450 * condition code (EQ=0, NE=1, ...). */ 1451 1452 #define AA64_BR_COND_FAMILY_MATCH 0x54000000u 1453 #define AA64_BR_COND_FAMILY_MASK \ 1454 0xFF000010u /* bits 31:24 fixed + bit 4 = 0 \ 1455 */ 1456 1457 typedef struct AA64BrCond { 1458 u32 imm19, cond; 1459 } AA64BrCond; 1460 1461 static inline u32 aa64_brcond_pack(AA64BrCond f) { 1462 return AA64_BR_COND_FAMILY_MATCH | ((f.imm19 & 0x7ffffu) << 5) | 1463 (f.cond & 0xfu); 1464 } 1465 1466 static inline AA64BrCond aa64_brcond_unpack(u32 w) { 1467 AA64BrCond f; 1468 f.imm19 = (w >> 5) & 0x7ffffu; 1469 f.cond = w & 0xfu; 1470 return f; 1471 } 1472 1473 /* ==================================================================== 1474 * Compare-and-branch — CBZ / CBNZ 1475 * sf 011010 op(1) imm19(19) Rt(5) 1476 * 31 30..25 24 23..5 4..0 1477 * 1478 * op=0 → CBZ (branch if zero), op=1 → CBNZ. */ 1479 1480 #define AA64_CB_FAMILY_MATCH 0x34000000u 1481 #define AA64_CB_FAMILY_MASK 0x7E000000u /* bits 30:25 (=011010) */ 1482 1483 typedef struct AA64CB { 1484 u32 sf, op, imm19, Rt; 1485 } AA64CB; 1486 1487 static inline u32 aa64_cb_pack(AA64CB f) { 1488 return ((f.sf & 1u) << 31) | AA64_CB_FAMILY_MATCH | ((f.op & 1u) << 24) | 1489 ((f.imm19 & 0x7ffffu) << 5) | (f.Rt & 0x1fu); 1490 } 1491 1492 static inline AA64CB aa64_cb_unpack(u32 w) { 1493 AA64CB f; 1494 f.sf = (w >> 31) & 1u; 1495 f.op = (w >> 24) & 1u; 1496 f.imm19 = (w >> 5) & 0x7ffffu; 1497 f.Rt = w & 0x1fu; 1498 return f; 1499 } 1500 1501 static inline u32 aa64_cbz(u32 sf, u32 Rt, u32 imm19) { 1502 return aa64_cb_pack((AA64CB){.sf = sf, .op = 0, .imm19 = imm19, .Rt = Rt}); 1503 } 1504 static inline u32 aa64_cbnz_imm(u32 sf, u32 Rt, u32 imm19) { 1505 return aa64_cb_pack((AA64CB){.sf = sf, .op = 1, .imm19 = imm19, .Rt = Rt}); 1506 } 1507 1508 /* ==================================================================== 1509 * Exception generation — BRK / SVC / HVC / SMC / HLT / UDF aliases. 1510 * 1101 0100 opc(3) imm16(16) op2(3) LL(2) 1511 * 31..24 23..21 20..5 4..2 1..0 1512 * 1513 * SVC: opc=000, LL=01. BRK: opc=001, LL=00. HVC/SMC/HLT/...: other 1514 * combos. Codegen emits BRK today. */ 1515 1516 #define AA64_EXCEPT_FAMILY_MATCH 0xD4000000u 1517 #define AA64_EXCEPT_FAMILY_MASK 0xFF000000u /* bits 31:24 */ 1518 1519 typedef struct AA64Except { 1520 u32 opc, imm16, op2, LL; 1521 } AA64Except; 1522 1523 static inline u32 aa64_except_pack(AA64Except f) { 1524 return AA64_EXCEPT_FAMILY_MATCH | ((f.opc & 7u) << 21) | 1525 ((f.imm16 & 0xffffu) << 5) | ((f.op2 & 7u) << 2) | (f.LL & 3u); 1526 } 1527 1528 static inline AA64Except aa64_except_unpack(u32 w) { 1529 AA64Except f; 1530 f.opc = (w >> 21) & 7u; 1531 f.imm16 = (w >> 5) & 0xffffu; 1532 f.op2 = (w >> 2) & 7u; 1533 f.LL = w & 3u; 1534 return f; 1535 } 1536 1537 static inline u32 aa64_brk(u32 imm16) { 1538 return aa64_except_pack( 1539 (AA64Except){.opc = 1, .imm16 = imm16, .op2 = 0, .LL = 0}); 1540 } 1541 static inline u32 aa64_svc(u32 imm16) { 1542 return aa64_except_pack( 1543 (AA64Except){.opc = 0, .imm16 = imm16, .op2 = 0, .LL = 1}); 1544 } 1545 1546 /* ==================================================================== 1547 * Disassembler descriptor table. 1548 * ==================================================================== */ 1549 1550 typedef struct AA64InsnDesc { 1551 Slice mnemonic; 1552 u32 match; 1553 u32 mask; 1554 u8 fmt; /* AA64Format */ 1555 u8 flags; /* AA64_ASMFL_* */ 1556 u8 pad[2]; 1557 } AA64InsnDesc; 1558 1559 extern const AA64InsnDesc aa64_insn_table[]; 1560 extern const u32 aa64_insn_table_n; 1561 1562 /* Linear-scan lookup. Returns the matching descriptor or NULL. First 1563 * match wins; ordering in aa64_insn_table.c puts more-specific entries 1564 * before broader ones (so aliases like MOV/MUL/NEG win over their 1565 * canonical ORR/MADD/SUB forms). */ 1566 const AA64InsnDesc* aa64_disasm_find(u32 word); 1567 1568 /* ==================================================================== 1569 * Operand print / parse — one entry per AA64Format. 1570 * 1571 * aa64_print_operands renders the operand text (everything after the 1572 * mnemonic) for `word` into `sb`, using `desc->fmt` to dispatch. 1573 * Mnemonic itself is in `desc->mnemonic`; the caller writes it before 1574 * calling this helper. `vaddr` is the instruction's virtual address 1575 * for PC-relative formats; pass 0 if not known. 1576 * 1577 * aa64_parse_operands is the dual: read the operand grammar for the 1578 * format from `tok` (opaque to phase 2 — declared but unimplemented) 1579 * and fill `fields_out` (a pointer to the format's field struct). 1580 * Phase 3 wires `tok` up; for now the function is a forward declaration 1581 * the assembler can resolve once it's in place. */ 1582 1583 struct AA64AsmTok; /* opaque, defined by the phase-3 asm parser */ 1584 1585 void aa64_print_operands(StrBuf* sb, const AA64InsnDesc* desc, u32 word, 1586 u64 vaddr); 1587 1588 /* If `word` is an SBFM/UBFM that has a preferred shift-alias disassembly, 1589 * return its mnemonic ("lsl"/"lsr"/"asr") and write the shift amount to 1590 * *shift; return NULL otherwise. Shared by the disassembler's mnemonic and 1591 * operand printers so the alias decision lives in one place. */ 1592 const char* aa64_bitfield_shift_alias(u32 word, u32* shift); 1593 1594 /* Preferred SBFM/UBFM extension aliases: sxtb/sxth/sxtw/uxtb/uxth. 1595 * Returns NULL when the bitfield does not match one of those forms. */ 1596 const char* aa64_bitfield_extend_alias(u32 word); 1597 1598 /* Preferred SBFM/UBFM extract aliases: sbfx/ubfx. Writes the least-significant 1599 * source bit and extracted width when an alias is available. */ 1600 const char* aa64_bitfield_extract_alias(u32 word, u32* lsb, u32* width); 1601 1602 /* Returns 1 on success, 0 on parse error. Phase 2 stub returns 0 for 1603 * every format; phase 3 fills in the bodies. */ 1604 int aa64_parse_operands(struct AA64AsmTok* tok, const AA64InsnDesc* desc, 1605 void* fields_out); 1606 1607 #endif