arch/aa64: factor encodings into shared ISA descriptors - kit

commit b42c7a606a0ae82b88c902d61b7e0bee646f9c3a
parent 6fd6c34fe44b2dfe41dd0158a2819a725c1afeed
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 13:36:20 -0700

arch/aa64: factor encodings into shared ISA descriptors

Hand-rolled 32-bit opcode words across aa_load_imm/aa_copy/aa_binop/
aa_unop/aa_ret had bit knowledge duplicated at every call site, with
no path for a disassembler to share it. Move the encoding info into
aa64_isa.{h,c}: each format (move-wide, log-shifted, addsub-shifted,
DP3, DP2, branch-register) declares a field struct, pack/unpack
functions, family match/mask constants, and per-instruction inline
wrappers (movz/movn/movk, and/orr/eor/orn/mov_reg/mvn, add/sub/neg,
madd/msub/mul, udiv/sdiv/lslv/lsrv/asrv/rorv, br/blr/ret).

aa64_isa.c carries an aa64_insn_table descriptor table keyed on
(match, mask, AA64Format) so a future ArchDisasm dispatches via
aa64_disasm_find() and recovers operands using the same unpack
functions the encoder packs through. New instructions land as one
descriptor row plus one inline wrapper.

aarch64.c is now free of magic opcode constants; behavior is
preserved (Group A 40/40 across D/R/E/J, Group C unchanged).

Diffstat:
A src/arch/aa64_isa.c  | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/arch/aa64_isa.h  | 361 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/arch/aarch64.c  | 125 +++++++++++++++++++++++++------------------------------------------------------

3 files changed, 476 insertions(+), 85 deletions(-)
diff --git a/src/arch/aa64_isa.c b/src/arch/aa64_isa.c
@@ -0,0 +1,75 @@
+/* AArch64 instruction descriptor table.
+ *
+ * The table mirrors the inline encoders in aa64_isa.h: each row records
+ * (mnemonic, match, mask, format) so the disassembler can identify a raw
+ * 32-bit word with one mask-and-compare and then dispatch on the format
+ * to extract operand fields via the same unpack functions the encoder
+ * uses. Encoder and decoder share the bit knowledge — when an opcode
+ * value or field position changes, both sides update at one site.
+ *
+ * Mask values include the family mask plus the bits that distinguish a
+ * specific instruction from its siblings in the same family. sf (bit 31)
+ * is intentionally a don't-care: every entry covers both the W and X
+ * forms, and the unpacker reads sf separately when printing operands. */
+
+#include "arch/aa64_isa.h"
+
+#include <stddef.h>
+
+const AA64InsnDesc aa64_insn_table[] = {
+    /* Move-wide immediate. Mask: family bits 28:23 + opc bits 30:29. */
+    { "movn", 0x12800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, {0,0,0} },
+    { "movz", 0x52800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, {0,0,0} },
+    { "movk", 0x72800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, {0,0,0} },
+
+    /* Logical, shifted register. Mask: family bits 28:24 + opc 30:29 + N(21).
+     * Shift bits 23:22 stay variable. */
+    { "and",  0x0A000000u, 0x7F200000u, AA64_FMT_LOG_SR, {0,0,0} },
+    { "bic",  0x0A200000u, 0x7F200000u, AA64_FMT_LOG_SR, {0,0,0} },
+    { "orr",  0x2A000000u, 0x7F200000u, AA64_FMT_LOG_SR, {0,0,0} },
+    { "orn",  0x2A200000u, 0x7F200000u, AA64_FMT_LOG_SR, {0,0,0} },
+    { "eor",  0x4A000000u, 0x7F200000u, AA64_FMT_LOG_SR, {0,0,0} },
+    { "eon",  0x4A200000u, 0x7F200000u, AA64_FMT_LOG_SR, {0,0,0} },
+    { "ands", 0x6A000000u, 0x7F200000u, AA64_FMT_LOG_SR, {0,0,0} },
+    { "bics", 0x6A200000u, 0x7F200000u, AA64_FMT_LOG_SR, {0,0,0} },
+
+    /* Add/Sub, shifted register. Mask: family bits 28:24 + bit 21 (fixed 0)
+     * + op(30) + S(29). Shift bits 23:22 stay variable. */
+    { "add",  0x0B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, {0,0,0} },
+    { "adds", 0x2B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, {0,0,0} },
+    { "sub",  0x4B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, {0,0,0} },
+    { "subs", 0x6B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, {0,0,0} },
+
+    /* Data-processing 3-source. Mask: family bits 28:24 + op54(30:29) +
+     * op31(23:21) + o0(15). v1 only emits MADD/MSUB (op31=000); other
+     * variants (SMADDL/UMADDL/SMULH/UMULH) land here as they're added. */
+    { "madd", 0x1B000000u, 0x7FE08000u, AA64_FMT_DP3, {0,0,0} },
+    { "msub", 0x1B008000u, 0x7FE08000u, AA64_FMT_DP3, {0,0,0} },
+
+    /* Data-processing 2-source. Mask: family bits 28:21 + bit 30 (fixed 0)
+     * + S(29) + opcode(15:10). */
+    { "udiv", 0x1AC00800u, 0x5FE0FC00u, AA64_FMT_DP2, {0,0,0} },
+    { "sdiv", 0x1AC00C00u, 0x5FE0FC00u, AA64_FMT_DP2, {0,0,0} },
+    { "lslv", 0x1AC02000u, 0x5FE0FC00u, AA64_FMT_DP2, {0,0,0} },
+    { "lsrv", 0x1AC02400u, 0x5FE0FC00u, AA64_FMT_DP2, {0,0,0} },
+    { "asrv", 0x1AC02800u, 0x5FE0FC00u, AA64_FMT_DP2, {0,0,0} },
+    { "rorv", 0x1AC02C00u, 0x5FE0FC00u, AA64_FMT_DP2, {0,0,0} },
+
+    /* Unconditional branch (register). Mask: bits 31:25 + opc(24:21) +
+     * op2(20:16) + op3(15:10) + op4(4:0). Rn (bits 9:5) varies. */
+    { "br",   0xD61F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0,0,0} },
+    { "blr",  0xD63F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0,0,0} },
+    { "ret",  0xD65F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0,0,0} },
+};
+
+const u32 aa64_insn_table_n =
+    (u32)(sizeof aa64_insn_table / sizeof aa64_insn_table[0]);
+
+const AA64InsnDesc* aa64_disasm_find(u32 word)
+{
+    for (u32 i = 0; i < aa64_insn_table_n; ++i) {
+        const AA64InsnDesc* d = &aa64_insn_table[i];
+        if ((word & d->mask) == d->match) return d;
+    }
+    return NULL;
+}
diff --git a/src/arch/aa64_isa.h b/src/arch/aa64_isa.h
@@ -0,0 +1,361 @@
+#ifndef CFREE_ARCH_AA64_ISA_H
+#define CFREE_ARCH_AA64_ISA_H
+
+/* AArch64 ISA descriptors — single source of truth for every instruction
+ * the encoder, decoder, and disassembler all need to agree on.
+ *
+ * Each format declares:
+ *   - A field struct naming every encoded bitfield.
+ *   - {pack, unpack} pure functions that round-trip through a u32 word.
+ *   - A {family_match, family_mask} pair identifying the format.
+ *   - Per-instruction inline wrappers that bake in the opc bits and
+ *     return the encoded word; callers emit it via MCEmitter.
+ *
+ * A descriptor table at the bottom (aa64_insn_table) maps mnemonic →
+ * (match, mask, AA64Format), so the disassembler matches a raw word with
+ *   for (i=0; i<N; ++i) if ((word & desc[i].mask) == desc[i].match) ...
+ * and then calls the format's unpack to recover the operand fields.
+ *
+ * Conventions:
+ *   - sf = 0 selects the 32-bit (W) form, sf = 1 selects 64-bit (X).
+ *   - Reg values are the raw 5-bit encoding (0..30 + 31 for ZR/SP).
+ *   - All wrappers take Rd first, then Rn, Rm, Ra to match the AAPCS
+ *     "destination first" convention used in the AArch64 manual.
+ *
+ * New instructions land as one entry in the table and (typically) one
+ * inline wrapper in the relevant format section. */
+
+#include "core/core.h"
+
+/* ---- common register names ---- */
+#define AA64_ZR  31u    /* WZR / XZR */
+#define AA64_SP  31u    /* SP at Rd/Rn slot */
+#define AA64_LR  30u    /* X30 / link register */
+
+/* ---- format kinds ---- */
+typedef enum AA64Format {
+    AA64_FMT_MOVEWIDE,
+    AA64_FMT_LOG_SR,         /* logical, shifted register */
+    AA64_FMT_ADDSUB_SR,      /* add/sub, shifted register */
+    AA64_FMT_DP3,            /* data-processing, 3 source */
+    AA64_FMT_DP2,            /* data-processing, 2 source */
+    AA64_FMT_BR_REG,         /* unconditional branch (register) */
+} AA64Format;
+
+/* ====================================================================
+ * Move-wide immediate (MOVN / MOVZ / MOVK)
+ *   sf  opc(2)  100101  hw(2)  imm16(16)  Rd(5)
+ *   31  30..29  28..23  22..21 20..5      4..0
+ * ==================================================================== */
+
+#define AA64_MOVN_OPC 0u
+#define AA64_MOVZ_OPC 2u
+#define AA64_MOVK_OPC 3u
+
+#define AA64_MOVEWIDE_FAMILY_MATCH 0x12800000u
+#define AA64_MOVEWIDE_FAMILY_MASK  0x1F800000u   /* bits 28:23 */
+
+typedef struct AA64MoveWide {
+    u32 sf, opc, hw, imm16, Rd;
+} AA64MoveWide;
+
+static inline u32 aa64_movewide_pack(AA64MoveWide f)
+{
+    return ((f.sf & 1u) << 31)
+         | ((f.opc & 3u) << 29)
+         | AA64_MOVEWIDE_FAMILY_MATCH
+         | ((f.hw & 3u) << 21)
+         | ((f.imm16 & 0xffffu) << 5)
+         | (f.Rd & 0x1fu);
+}
+
+static inline AA64MoveWide aa64_movewide_unpack(u32 w)
+{
+    AA64MoveWide f;
+    f.sf    = (w >> 31) & 1u;
+    f.opc   = (w >> 29) & 3u;
+    f.hw    = (w >> 21) & 3u;
+    f.imm16 = (w >> 5)  & 0xffffu;
+    f.Rd    =  w        & 0x1fu;
+    return f;
+}
+
+static inline u32 aa64_movz(u32 sf, u32 Rd, u32 imm16, u32 hw)
+{ return aa64_movewide_pack((AA64MoveWide){.sf=sf,.opc=AA64_MOVZ_OPC,.hw=hw,.imm16=imm16,.Rd=Rd}); }
+static inline u32 aa64_movn(u32 sf, u32 Rd, u32 imm16, u32 hw)
+{ return aa64_movewide_pack((AA64MoveWide){.sf=sf,.opc=AA64_MOVN_OPC,.hw=hw,.imm16=imm16,.Rd=Rd}); }
+static inline u32 aa64_movk(u32 sf, u32 Rd, u32 imm16, u32 hw)
+{ return aa64_movewide_pack((AA64MoveWide){.sf=sf,.opc=AA64_MOVK_OPC,.hw=hw,.imm16=imm16,.Rd=Rd}); }
+
+/* ====================================================================
+ * Logical, shifted register (AND / ORR / EOR / ANDS, with N inverting
+ * Rm to BIC / ORN / EON / BICS).
+ *   sf  opc(2)  01010  shift(2)  N(1)  Rm(5)  imm6(6)  Rn(5)  Rd(5)
+ *   31  30..29  28..24 23..22    21    20..16 15..10   9..5   4..0
+ * ==================================================================== */
+
+#define AA64_LOG_AND_OPC  0u
+#define AA64_LOG_ORR_OPC  1u
+#define AA64_LOG_EOR_OPC  2u
+#define AA64_LOG_ANDS_OPC 3u
+
+#define AA64_LOGSR_FAMILY_MATCH 0x0A000000u
+#define AA64_LOGSR_FAMILY_MASK  0x1F000000u    /* bits 28:24 */
+
+typedef struct AA64LogSR {
+    u32 sf, opc, shift, N, Rm, imm6, Rn, Rd;
+} AA64LogSR;
+
+static inline u32 aa64_logsr_pack(AA64LogSR f)
+{
+    return ((f.sf & 1u) << 31)
+         | ((f.opc & 3u) << 29)
+         | AA64_LOGSR_FAMILY_MATCH
+         | ((f.shift & 3u) << 22)
+         | ((f.N & 1u) << 21)
+         | ((f.Rm & 0x1fu) << 16)
+         | ((f.imm6 & 0x3fu) << 10)
+         | ((f.Rn & 0x1fu) << 5)
+         | (f.Rd & 0x1fu);
+}
+
+static inline AA64LogSR aa64_logsr_unpack(u32 w)
+{
+    AA64LogSR f;
+    f.sf    = (w >> 31) & 1u;
+    f.opc   = (w >> 29) & 3u;
+    f.shift = (w >> 22) & 3u;
+    f.N     = (w >> 21) & 1u;
+    f.Rm    = (w >> 16) & 0x1fu;
+    f.imm6  = (w >> 10) & 0x3fu;
+    f.Rn    = (w >> 5)  & 0x1fu;
+    f.Rd    =  w        & 0x1fu;
+    return f;
+}
+
+static inline u32 aa64_and(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_logsr_pack((AA64LogSR){.sf=sf,.opc=AA64_LOG_AND_OPC,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_orr(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_logsr_pack((AA64LogSR){.sf=sf,.opc=AA64_LOG_ORR_OPC,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_eor(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_logsr_pack((AA64LogSR){.sf=sf,.opc=AA64_LOG_EOR_OPC,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_orn(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_logsr_pack((AA64LogSR){.sf=sf,.opc=AA64_LOG_ORR_OPC,.N=1,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+
+/* MOV Wd, Wm  ≡  ORR Wd, WZR, Wm */
+static inline u32 aa64_mov_reg(u32 sf, u32 Rd, u32 Rm)
+{ return aa64_orr(sf, Rd, AA64_ZR, Rm); }
+/* MVN Wd, Wm  ≡  ORN Wd, WZR, Wm */
+static inline u32 aa64_mvn(u32 sf, u32 Rd, u32 Rm)
+{ return aa64_orn(sf, Rd, AA64_ZR, Rm); }
+
+/* ====================================================================
+ * Add/Sub, shifted register (ADD / SUB / ADDS / SUBS)
+ *   sf  op(1)  S(1)  01011  shift(2)  0  Rm(5)  imm6(6)  Rn(5)  Rd(5)
+ *   31  30     29    28..24 23..22    21 20..16 15..10   9..5   4..0
+ * ==================================================================== */
+
+#define AA64_ADDSUBSR_FAMILY_MATCH 0x0B000000u
+#define AA64_ADDSUBSR_FAMILY_MASK  0x1F200000u    /* bits 28:24 + bit 21 */
+
+typedef struct AA64AddSubSR {
+    u32 sf, op, S, shift, Rm, imm6, Rn, Rd;
+} AA64AddSubSR;
+
+static inline u32 aa64_addsubsr_pack(AA64AddSubSR f)
+{
+    return ((f.sf & 1u) << 31)
+         | ((f.op & 1u) << 30)
+         | ((f.S  & 1u) << 29)
+         | AA64_ADDSUBSR_FAMILY_MATCH
+         | ((f.shift & 3u) << 22)
+         | ((f.Rm & 0x1fu) << 16)
+         | ((f.imm6 & 0x3fu) << 10)
+         | ((f.Rn & 0x1fu) << 5)
+         | (f.Rd & 0x1fu);
+}
+
+static inline AA64AddSubSR aa64_addsubsr_unpack(u32 w)
+{
+    AA64AddSubSR f;
+    f.sf    = (w >> 31) & 1u;
+    f.op    = (w >> 30) & 1u;
+    f.S     = (w >> 29) & 1u;
+    f.shift = (w >> 22) & 3u;
+    f.Rm    = (w >> 16) & 0x1fu;
+    f.imm6  = (w >> 10) & 0x3fu;
+    f.Rn    = (w >> 5)  & 0x1fu;
+    f.Rd    =  w        & 0x1fu;
+    return f;
+}
+
+static inline u32 aa64_add(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_addsubsr_pack((AA64AddSubSR){.sf=sf,.op=0,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_sub(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_addsubsr_pack((AA64AddSubSR){.sf=sf,.op=1,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+
+/* NEG Wd, Wm  ≡  SUB Wd, WZR, Wm */
+static inline u32 aa64_neg(u32 sf, u32 Rd, u32 Rm)
+{ return aa64_sub(sf, Rd, AA64_ZR, Rm); }
+
+/* ====================================================================
+ * Data-processing, 3-source (MADD / MSUB / SMULL / UMULL / ...)
+ *   sf  op54(2)  11011  op31(3)  Rm(5)  o0(1)  Ra(5)  Rn(5)  Rd(5)
+ *   31  30..29   28..24 23..21   20..16 15     14..10 9..5   4..0
+ * ==================================================================== */
+
+#define AA64_DP3_FAMILY_MATCH 0x1B000000u
+#define AA64_DP3_FAMILY_MASK  0x1F000000u   /* bits 28:24 */
+
+typedef struct AA64DP3 {
+    u32 sf, op54, op31, Rm, o0, Ra, Rn, Rd;
+} AA64DP3;
+
+static inline u32 aa64_dp3_pack(AA64DP3 f)
+{
+    return ((f.sf & 1u) << 31)
+         | ((f.op54 & 3u) << 29)
+         | AA64_DP3_FAMILY_MATCH
+         | ((f.op31 & 7u) << 21)
+         | ((f.Rm & 0x1fu) << 16)
+         | ((f.o0 & 1u) << 15)
+         | ((f.Ra & 0x1fu) << 10)
+         | ((f.Rn & 0x1fu) << 5)
+         | (f.Rd & 0x1fu);
+}
+
+static inline AA64DP3 aa64_dp3_unpack(u32 w)
+{
+    AA64DP3 f;
+    f.sf    = (w >> 31) & 1u;
+    f.op54  = (w >> 29) & 3u;
+    f.op31  = (w >> 21) & 7u;
+    f.Rm    = (w >> 16) & 0x1fu;
+    f.o0    = (w >> 15) & 1u;
+    f.Ra    = (w >> 10) & 0x1fu;
+    f.Rn    = (w >> 5)  & 0x1fu;
+    f.Rd    =  w        & 0x1fu;
+    return f;
+}
+
+static inline u32 aa64_madd(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 Ra)
+{ return aa64_dp3_pack((AA64DP3){.sf=sf,.op31=0,.o0=0,.Rm=Rm,.Ra=Ra,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_msub(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 Ra)
+{ return aa64_dp3_pack((AA64DP3){.sf=sf,.op31=0,.o0=1,.Rm=Rm,.Ra=Ra,.Rn=Rn,.Rd=Rd}); }
+/* MUL Wd, Wn, Wm  ≡  MADD Wd, Wn, Wm, WZR */
+static inline u32 aa64_mul(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_madd(sf, Rd, Rn, Rm, AA64_ZR); }
+
+/* ====================================================================
+ * Data-processing, 2-source (UDIV / SDIV / LSLV / LSRV / ASRV / RORV)
+ *   sf  0  S(1)  11010110  Rm(5)  opcode(6)  Rn(5)  Rd(5)
+ *   31  30 29    28..21    20..16 15..10     9..5   4..0
+ * ==================================================================== */
+
+#define AA64_DP2_UDIV_OP  0x02u
+#define AA64_DP2_SDIV_OP  0x03u
+#define AA64_DP2_LSLV_OP  0x08u
+#define AA64_DP2_LSRV_OP  0x09u
+#define AA64_DP2_ASRV_OP  0x0Au
+#define AA64_DP2_RORV_OP  0x0Bu
+
+#define AA64_DP2_FAMILY_MATCH 0x1AC00000u
+#define AA64_DP2_FAMILY_MASK  0x5FE00000u    /* bit 30 + bits 28:21 */
+
+typedef struct AA64DP2 {
+    u32 sf, S, opcode, Rm, Rn, Rd;
+} AA64DP2;
+
+static inline u32 aa64_dp2_pack(AA64DP2 f)
+{
+    return ((f.sf & 1u) << 31)
+         | ((f.S  & 1u) << 29)
+         | AA64_DP2_FAMILY_MATCH
+         | ((f.Rm & 0x1fu) << 16)
+         | ((f.opcode & 0x3fu) << 10)
+         | ((f.Rn & 0x1fu) << 5)
+         | (f.Rd & 0x1fu);
+}
+
+static inline AA64DP2 aa64_dp2_unpack(u32 w)
+{
+    AA64DP2 f;
+    f.sf     = (w >> 31) & 1u;
+    f.S      = (w >> 29) & 1u;
+    f.Rm     = (w >> 16) & 0x1fu;
+    f.opcode = (w >> 10) & 0x3fu;
+    f.Rn     = (w >> 5)  & 0x1fu;
+    f.Rd     =  w        & 0x1fu;
+    return f;
+}
+
+static inline u32 aa64_udiv(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_dp2_pack((AA64DP2){.sf=sf,.opcode=AA64_DP2_UDIV_OP,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_sdiv(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_dp2_pack((AA64DP2){.sf=sf,.opcode=AA64_DP2_SDIV_OP,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_lslv(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_dp2_pack((AA64DP2){.sf=sf,.opcode=AA64_DP2_LSLV_OP,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_lsrv(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_dp2_pack((AA64DP2){.sf=sf,.opcode=AA64_DP2_LSRV_OP,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_asrv(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_dp2_pack((AA64DP2){.sf=sf,.opcode=AA64_DP2_ASRV_OP,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+static inline u32 aa64_rorv(u32 sf, u32 Rd, u32 Rn, u32 Rm)
+{ return aa64_dp2_pack((AA64DP2){.sf=sf,.opcode=AA64_DP2_RORV_OP,.Rm=Rm,.Rn=Rn,.Rd=Rd}); }
+
+/* ====================================================================
+ * Unconditional branch (register) — BR / BLR / RET
+ *   1101011  opc(4)  op2(5)=11111  op3(6)=000000  Rn(5)  op4(5)=00000
+ *   31..25   24..21  20..16        15..10         9..5   4..0
+ * ==================================================================== */
+
+#define AA64_BR_OP_BR   0u
+#define AA64_BR_OP_BLR  1u
+#define AA64_BR_OP_RET  2u
+
+#define AA64_BR_REG_FAMILY_MATCH 0xD61F0000u
+#define AA64_BR_REG_FAMILY_MASK  0xFE1FFC1Fu   /* everything fixed except opc + Rn */
+
+typedef struct AA64BrReg {
+    u32 opc, Rn;
+} AA64BrReg;
+
+static inline u32 aa64_brreg_pack(AA64BrReg f)
+{
+    return AA64_BR_REG_FAMILY_MATCH
+         | ((f.opc & 0xfu) << 21)
+         | ((f.Rn  & 0x1fu) << 5);
+}
+
+static inline AA64BrReg aa64_brreg_unpack(u32 w)
+{
+    AA64BrReg f;
+    f.opc = (w >> 21) & 0xfu;
+    f.Rn  = (w >> 5)  & 0x1fu;
+    return f;
+}
+
+static inline u32 aa64_br (u32 Rn) { return aa64_brreg_pack((AA64BrReg){.opc=AA64_BR_OP_BR, .Rn=Rn}); }
+static inline u32 aa64_blr(u32 Rn) { return aa64_brreg_pack((AA64BrReg){.opc=AA64_BR_OP_BLR,.Rn=Rn}); }
+static inline u32 aa64_ret(u32 Rn) { return aa64_brreg_pack((AA64BrReg){.opc=AA64_BR_OP_RET,.Rn=Rn}); }
+
+/* ====================================================================
+ * Disassembler descriptor table.
+ * ==================================================================== */
+
+typedef struct AA64InsnDesc {
+    const char* mnemonic;
+    u32         match;
+    u32         mask;
+    u8          fmt;        /* AA64Format */
+    u8          pad[3];
+} AA64InsnDesc;
+
+extern const AA64InsnDesc aa64_insn_table[];
+extern const u32          aa64_insn_table_n;
+
+/* Linear-scan lookup. Returns the matching descriptor or NULL. First
+ * match wins; ordering in aa64_insn_table.c puts more-specific entries
+ * before broader ones in the rare cases that matters. */
+const AA64InsnDesc* aa64_disasm_find(u32 word);
+
+#endif
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -14,6 +14,7 @@
  * harness this is enough; full ABI integration arrives with TargetABI. */
 
 #include "arch/arch.h"
+#include "arch/aa64_isa.h"
 #include "core/arena.h"
 #include "obj/obj.h"
 #include "type/type.h"
@@ -143,81 +144,54 @@ static void aa_load_imm(CGTarget* t, Operand dst, i64 imm)
     u32 rd = reg_num(dst);
 
     /* Effective bit-width: 32 unless we're materializing into Xd. The 32-bit
-     * encoding zero-extends the result, so we mask the value to 32 bits when
-     * sf == 0 so a "negative" int constant materializes its low 32 bits
-     * exactly (caller's responsibility to keep that in range). */
+     * encoding zero-extends the result, so we mask to 32 bits when sf==0
+     * so a "negative" int constant materializes its low 32 bits exactly. */
     const u32 nslots = sf ? 4u : 2u;
     u64 v = sf ? (u64)imm : ((u64)imm & 0xffffffffu);
 
-    /* Single MOVZ — fits in one 16-bit slot. */
+    /* Single MOVZ when only one 16-bit slot is non-zero. */
     for (u32 i = 0; i < nslots; ++i) {
-        u64 slot = (v >> (i * 16)) & 0xffffu;
+        u32 slot   = (u32)((v >> (i * 16)) & 0xffffu);
         u64 cleared = v & ~((u64)0xffffu << (i * 16));
         if (slot != 0 && cleared == 0) {
-            u32 word = (sf << 31) | 0x52800000u
-                     | ((u32)i << 21)
-                     | (((u32)slot & 0xffff) << 5) | rd;
-            emit32(mc, word);
-            return;
-        }
-        if (cleared == ~(u64)0 || (sf == 0 && cleared == 0xffffffff00000000ull
-                                   /* unreachable for sf==0 */)) {
-            /* MOVN: ~imm fits in this slot, all other slots are 1s. */
-            u64 inv_slot = (~v >> (i * 16)) & 0xffffu;
-            u32 word = (sf << 31) | 0x12800000u
-                     | ((u32)i << 21)
-                     | (((u32)inv_slot & 0xffff) << 5) | rd;
-            emit32(mc, word);
+            emit32(mc, aa64_movz(sf, rd, slot, i));
             return;
         }
     }
 
-    /* Special-case for sf==0: any 32-bit value can be expressed as MOVN of
-     * the inverted slot when one slot is all-ones in the inverse. We already
-     * handled cleared==0; the sf==0 MOVN case above didn't trigger (its
-     * "all other slots are 1s" check uses 64-bit ones). Try MOVN explicitly. */
-    if (sf == 0) {
-        u64 nv = (~v) & 0xffffffffu;
-        for (u32 i = 0; i < 2; ++i) {
-            u64 slot = (nv >> (i * 16)) & 0xffffu;
-            u64 cleared = nv & ~((u64)0xffffu << (i * 16));
+    /* Single MOVN when one slot of the inverted value covers the rest.
+     * For sf==1 the "rest is all ones" test is over the full 64 bits;
+     * for sf==0 we work in the 32-bit space. */
+    {
+        u64 inv = sf ? ~v : ((~v) & 0xffffffffu);
+        u64 all = sf ? ~(u64)0 : 0xffffffffu;
+        (void)all;
+        for (u32 i = 0; i < nslots; ++i) {
+            u32 slot   = (u32)((inv >> (i * 16)) & 0xffffu);
+            u64 cleared = inv & ~((u64)0xffffu << (i * 16));
             if (cleared == 0) {
-                u32 word = 0x12800000u | ((u32)i << 21)
-                         | (((u32)slot & 0xffff) << 5) | rd;
-                emit32(mc, word);
+                emit32(mc, aa64_movn(sf, rd, slot, i));
                 return;
             }
         }
     }
 
-    /* General path: MOVZ for the lowest non-zero slot, then MOVK for any
-     * other non-zero slot. For sf==0 with imm having both halves nonzero
-     * (e.g. 0xABCD0001), this is MOVZ + MOVK. For sf==1, up to 4 insns. */
+    /* General path: MOVZ the lowest non-zero slot, then MOVK any other
+     * non-zero slot. v==0 was caught by the single-MOVZ branch above. */
     int placed = 0;
     for (u32 i = 0; i < nslots; ++i) {
-        u64 slot = (v >> (i * 16)) & 0xffffu;
+        u32 slot = (u32)((v >> (i * 16)) & 0xffffu);
         if (!placed) {
-            /* First MOVZ (always emit even if slot==0 when v==0; that path
-             * was caught by the single-MOVZ branch above, so here v!=0 and
-             * we skip zero slots until the first nonzero one). */
             if (slot == 0) continue;
-            u32 word = (sf << 31) | 0x52800000u
-                     | ((u32)i << 21)
-                     | (((u32)slot & 0xffff) << 5) | rd;
-            emit32(mc, word);
+            emit32(mc, aa64_movz(sf, rd, slot, i));
             placed = 1;
         } else if (slot != 0) {
-            /* MOVK Wd|Xd, #imm16, lsl #(16*i) */
-            u32 word = (sf << 31) | 0x72800000u
-                     | ((u32)i << 21)
-                     | (((u32)slot & 0xffff) << 5) | rd;
-            emit32(mc, word);
+            emit32(mc, aa64_movk(sf, rd, slot, i));
         }
     }
     if (!placed) {
-        /* v == 0 path, already handled above; defensive emit MOVZ #0. */
-        u32 word = (sf << 31) | 0x52800000u | rd;
-        emit32(mc, word);
+        /* Defensive: should be unreachable (v==0 caught above). */
+        emit32(mc, aa64_movz(sf, rd, 0, 0));
     }
 }
 
@@ -226,13 +200,8 @@ static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb)
 
 static void aa_copy(CGTarget* t, Operand dst, Operand src)
 {
-    /* MOV Wd, Wm  ≡  ORR Wd, WZR, Wm */
-    MCEmitter* mc = t->mc;
     u32 sf = type_is_64(dst.type) ? 1u : 0u;
-    u32 rd = reg_num(dst);
-    u32 rm = reg_num(src);
-    u32 word = (sf << 31) | 0x2a000000u | (rm << 16) | (31u << 5) | rd;
-    emit32(mc, word);
+    emit32(t->mc, aa64_mov_reg(sf, reg_num(dst), reg_num(src)));
 }
 
 static void aa_load    (CGTarget* t, Operand d, Operand a, MemAccess m) { (void)d;(void)a;(void)m; aa_panic(t, "load"); }
@@ -262,21 +231,17 @@ static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a, Operand b)
     }
 
     switch (op) {
-    case BO_IADD: word = (sf << 31) | 0x0b000000u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_ISUB: word = (sf << 31) | 0x4b000000u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_IMUL: /* MADD Wd, Wn, Wm, WZR */
-                  word = (sf << 31) | 0x1b000000u | (rm << 16) | (31u << 10) | (rn << 5) | rd; break;
-    case BO_AND:  word = (sf << 31) | 0x0a000000u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_OR:   word = (sf << 31) | 0x2a000000u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_XOR:  word = (sf << 31) | 0x4a000000u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_SHL:  /* LSLV: data-processing-2-source op2=001000 */
-                  word = (sf << 31) | 0x1ac02000u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_SHR_U:/* LSRV: op2=001001 */
-                  word = (sf << 31) | 0x1ac02400u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_SHR_S:/* ASRV: op2=001010 */
-                  word = (sf << 31) | 0x1ac02800u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_UDIV: word = (sf << 31) | 0x1ac00800u | (rm << 16) | (rn << 5) | rd; break;
-    case BO_SDIV: word = (sf << 31) | 0x1ac00c00u | (rm << 16) | (rn << 5) | rd; break;
+    case BO_IADD:  word = aa64_add (sf, rd, rn, rm); break;
+    case BO_ISUB:  word = aa64_sub (sf, rd, rn, rm); break;
+    case BO_IMUL:  word = aa64_mul (sf, rd, rn, rm); break;
+    case BO_AND:   word = aa64_and (sf, rd, rn, rm); break;
+    case BO_OR:    word = aa64_orr (sf, rd, rn, rm); break;
+    case BO_XOR:   word = aa64_eor (sf, rd, rn, rm); break;
+    case BO_SHL:   word = aa64_lslv(sf, rd, rn, rm); break;
+    case BO_SHR_U: word = aa64_lsrv(sf, rd, rn, rm); break;
+    case BO_SHR_S: word = aa64_asrv(sf, rd, rn, rm); break;
+    case BO_UDIV:  word = aa64_udiv(sf, rd, rn, rm); break;
+    case BO_SDIV:  word = aa64_sdiv(sf, rd, rn, rm); break;
     case BO_SREM:
     case BO_UREM:
     case BO_FADD: case BO_FSUB: case BO_FMUL: case BO_FDIV:
@@ -301,14 +266,8 @@ static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a)
     }
 
     switch (op) {
-    case UO_NEG:
-        /* SUB Wd, WZR, Wn */
-        word = (sf << 31) | 0x4b000000u | (rn << 16) | (31u << 5) | rd;
-        break;
-    case UO_BNOT:
-        /* MVN Wd, Wn  ≡  ORN Wd, WZR, Wn (shift=0) */
-        word = (sf << 31) | 0x2a200000u | (rn << 16) | (31u << 5) | rd;
-        break;
+    case UO_NEG:  word = aa64_neg(sf, rd, rn); break;
+    case UO_BNOT: word = aa64_mvn(sf, rd, rn); break;
     case UO_NOT:
     default:
         compiler_panic(t->c, impl_of(t)->loc,
@@ -329,18 +288,14 @@ static void aa_ret(CGTarget* t, const CGABIValue* val)
     MCEmitter* mc = t->mc;
 
     if (val && val->storage.kind == OPK_REG) {
-        /* MOV W0|X0, src */
         u32 sf = type_is_64(val->storage.type) ? 1u : 0u;
-        u32 rm = reg_num(val->storage);
-        u32 word = (sf << 31) | 0x2a000000u | (rm << 16) | (31u << 5) | 0u;
-        emit32(mc, word);
+        emit32(mc, aa64_mov_reg(sf, /*Rd=*/0, reg_num(val->storage)));
     } else if (val && val->storage.kind == OPK_IMM) {
         /* MOV W0, #imm via load_imm */
         Operand w0 = { OPK_REG, RC_INT, 0, val->storage.type, .v.reg = 0 };
         aa_load_imm(t, w0, val->storage.v.imm);
     }
-    /* RET X30 */
-    emit32(mc, 0xd65f03c0u);
+    emit32(mc, aa64_ret(AA64_LR));
 }
 
 static void aa_alloca_ (CGTarget* t, Operand d, Operand s, u32 a) { (void)d;(void)s;(void)a; aa_panic(t, "alloca"); }

	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README

A	src/arch/aa64_isa.c	\|	75	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/arch/aa64_isa.h	\|	361	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/arch/aarch64.c	\|	125	+++++++++++++++++++++++++------------------------------------------------------