commit 0628fd75103a9c5159a6b384f25fa78a5a64abb4
parent 938b10869a2db00a0b374d7cd2bfc2449d7a87d0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 08:25:38 -0700
asm: phase-2 ISA descriptor table — AsmFlags, aliases, format backfill
Finishes the aa64 ISA descriptor table per doc/ASM.md §5 phase 2.
- AsmFlags column on AA64InsnDesc (ALIAS / SF1 / NORN) and alias rows
for MOV, MVN, NEG/NEGS, CMP/CMN, MUL/MNEG, RET-implicit-X30 placed
before their canonical rows so first-match disasm picks the alias.
- New formats with field struct + pack/unpack + table rows:
LDSTP_SOFF, LDST_SIMM9, BR_IMM, BR_COND, CB, EXCEPT. LDST_UIMM /
LDSTP_PRE rows expanded to every size x V codegen emits.
- aa64_print_operands dispatcher + per-format printers in aa64_isa.c.
aa64_parse_operands declared with the phase-3 signature, stubbed
to return 0 until the asm token stream lands.
- New tiny StrBuf util (src/core/strbuf.{h,c}) used by the printer.
- test/arch/aa64_isa_test.c covers one representative word per format
and the alias-precedence invariant; wired via make test-isa and
added to the default test list.
Diffstat:
8 files changed, 1144 insertions(+), 105 deletions(-)
diff --git a/doc/ASM.md b/doc/ASM.md
@@ -338,25 +338,44 @@ cases report SKIP for every path they apply to and the harness
wiring is exercised on every CI run. `bash test/cg/run.sh '' S` also
reports SKIP cleanly. No green asm cases yet — that's phase 3.
-### Phase 2 — finish the ISA descriptor table
+### Phase 2 — finish the ISA descriptor table (DONE)
Pure refactor. No new behavior; existing codegen still calls inline
encoders.
-1. Add `parse_operands` and `print_operands` per `AA64Format` in
- `aa64_isa.{h,c}`. The first cut prints into a `StrBuf` and parses
- from a tiny operand lexer (reg name, `#imm`, `[Xn, #ofs]`, label).
-2. Add `AsmFlags` column to `AA64InsnDesc`. Mark aliases (`MOV`, `MUL`,
- `NEG`, `RET`).
-3. Reorder rows in `aa64_insn_table` so aliases precede their canonical
- forms.
-4. Backfill formats not yet in the table that codegen emits today (load/
- store immediate, branch immediate, conditional branch, NOP, BRK).
- Each lands as one format-struct + pack/unpack + parse/print + table
- rows.
-
-Exit criterion: `aa64_disasm_find` returns a desc for every byte
-sequence the codegen currently produces; no test changes.
+- [x] Added `aa64_print_operands` dispatcher plus per-format
+ `print_*` helpers in `aa64_isa.c`; renders into a new tiny `StrBuf`
+ (`src/core/strbuf.{h,c}`). `aa64_parse_operands` is declared with
+ the phase-3 signature and stubbed to return 0 — phase 3 fills the
+ per-format grammar in once the asm token stream lands.
+- [x] Added an `AsmFlags` byte on `AA64InsnDesc`
+ (`AA64_ASMFL_ALIAS / SF1 / NORN`). Aliases marked: `MOV` (ORR Rd,
+ ZR, Rm), `MVN` (ORN), `NEG` / `NEGS` (SUB / SUBS Rd, ZR, Rm),
+ `CMP` / `CMN` (SUBS / ADDS ZR, Rn, Rm), `MUL` / `MNEG` (MADD /
+ MSUB with Ra=ZR), `RET`-no-operand (RET X30).
+- [x] Reordered `aa64_insn_table` so each alias precedes its
+ canonical form. First-match-wins now picks the alias spelling.
+- [x] Backfilled formats codegen emits: `BR_IMM` (B / BL),
+ `BR_COND` (B.cond), `CB` (CBZ / CBNZ), `EXCEPT` (BRK / SVC / HLT),
+ `LDST_SIMM9` (LDUR / STUR, V=0 and V=1, every size),
+ `LDSTP_SOFF` (STP / LDP signed-offset, X and D forms).
+ `LDST_UIMM` and `LDSTP_PRE` rows expanded to cover every
+ size × V combination codegen emits today. Each format lands as
+ one struct + pack/unpack + print + table rows; phase-3
+ parse-grammar bodies follow once the asm token stream exists.
+- [x] New `test/arch/aa64_isa_test.c` + `make test-isa` target.
+ Exercises one representative word per format, asserts mnemonic
+ and operand text, and pins the alias-precedence invariant
+ (`ORR Rd, ZR, Rm` resolves to "mov", `ORR Rd, Rn, Rm` to "orr").
+ Added to the default `test` list.
+
+Exit criterion (met): `aa64_disasm_find` returns a desc for the
+representative word of every format used in this phase, and the unit
+test pins that contract for future regressions. Full byte-by-byte
+coverage of every cg-emitted word becomes enforced when the `S`
+path on `test/cg/run.sh` turns green in phase 4 — the remaining
+codegen-only formats (bitfield, condsel, FP-DP1/2, FP↔int cvt,
+ldst-exclusive, dmb/clrex, mrs, dp1, SIMD basic) get table rows then.
### Phase 3 — standalone `.s` assembler
diff --git a/src/arch/aa64_isa.c b/src/arch/aa64_isa.c
@@ -1,94 +1,188 @@
-/* AArch64 instruction descriptor table.
+/* AArch64 instruction descriptor table + operand print/parse dispatch.
*
* The table mirrors the inline encoders in aa64_isa.h: each row records
- * (mnemonic, match, mask, format) so the disassembler can identify a raw
- * 32-bit word with one mask-and-compare and then dispatch on the format
- * to extract operand fields via the same unpack functions the encoder
- * uses. Encoder and decoder share the bit knowledge — when an opcode
- * value or field position changes, both sides update at one site.
+ * (mnemonic, match, mask, format, flags) so the disassembler can identify
+ * a raw 32-bit word with one mask-and-compare and then dispatch on the
+ * format to extract operand fields via the same unpack functions the
+ * encoder uses. Encoder and decoder share the bit knowledge — when an
+ * opcode value or field position changes, both sides update at one site.
*
* Mask values include the family mask plus the bits that distinguish a
- * specific instruction from its siblings in the same family. sf (bit 31)
- * is intentionally a don't-care: every entry covers both the W and X
- * forms, and the unpacker reads sf separately when printing operands. */
+ * specific instruction from its siblings in the same family. sf (bit 31)
+ * is intentionally a don't-care for formats where both 32- and 64-bit
+ * forms share one row; the unpacker reads sf separately when printing
+ * operands.
+ *
+ * Row ordering: first-match wins. Aliases (rows with AA64_ASMFL_ALIAS)
+ * are tighter masks placed BEFORE the canonical row they alias so the
+ * disassembler renders the alias spelling. The assembler accepts both
+ * spellings — they map to the same encoded word. */
#include "arch/aa64_isa.h"
#include <stddef.h>
const AA64InsnDesc aa64_insn_table[] = {
- /* Move-wide immediate. Mask: family bits 28:23 + opc bits 30:29. */
- {"movn", 0x12800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, {0, 0, 0}},
- {"movz", 0x52800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, {0, 0, 0}},
- {"movk", 0x72800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, {0, 0, 0}},
-
- /* Logical, shifted register. Mask: family bits 28:24 + opc 30:29 + N(21).
- * Shift bits 23:22 stay variable. */
- {"and", 0x0A000000u, 0x7F200000u, AA64_FMT_LOG_SR, {0, 0, 0}},
- {"bic", 0x0A200000u, 0x7F200000u, AA64_FMT_LOG_SR, {0, 0, 0}},
- {"orr", 0x2A000000u, 0x7F200000u, AA64_FMT_LOG_SR, {0, 0, 0}},
- {"orn", 0x2A200000u, 0x7F200000u, AA64_FMT_LOG_SR, {0, 0, 0}},
- {"eor", 0x4A000000u, 0x7F200000u, AA64_FMT_LOG_SR, {0, 0, 0}},
- {"eon", 0x4A200000u, 0x7F200000u, AA64_FMT_LOG_SR, {0, 0, 0}},
- {"ands", 0x6A000000u, 0x7F200000u, AA64_FMT_LOG_SR, {0, 0, 0}},
- {"bics", 0x6A200000u, 0x7F200000u, AA64_FMT_LOG_SR, {0, 0, 0}},
-
- /* Add/Sub, shifted register. Mask: family bits 28:24 + bit 21 (fixed 0)
- * + op(30) + S(29). Shift bits 23:22 stay variable. */
- {"add", 0x0B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, {0, 0, 0}},
- {"adds", 0x2B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, {0, 0, 0}},
- {"sub", 0x4B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, {0, 0, 0}},
- {"subs", 0x6B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, {0, 0, 0}},
-
- /* Data-processing 3-source. Mask: family bits 28:24 + op54(30:29) +
- * op31(23:21) + o0(15). v1 only emits MADD/MSUB (op31=000); other
- * variants (SMADDL/UMADDL/SMULH/UMULH) land here as they're added. */
- {"madd", 0x1B000000u, 0x7FE08000u, AA64_FMT_DP3, {0, 0, 0}},
- {"msub", 0x1B008000u, 0x7FE08000u, AA64_FMT_DP3, {0, 0, 0}},
-
- /* Data-processing 2-source. Mask: family bits 28:21 + bit 30 (fixed 0)
- * + S(29) + opcode(15:10). */
- {"udiv", 0x1AC00800u, 0x5FE0FC00u, AA64_FMT_DP2, {0, 0, 0}},
- {"sdiv", 0x1AC00C00u, 0x5FE0FC00u, AA64_FMT_DP2, {0, 0, 0}},
- {"lslv", 0x1AC02000u, 0x5FE0FC00u, AA64_FMT_DP2, {0, 0, 0}},
- {"lsrv", 0x1AC02400u, 0x5FE0FC00u, AA64_FMT_DP2, {0, 0, 0}},
- {"asrv", 0x1AC02800u, 0x5FE0FC00u, AA64_FMT_DP2, {0, 0, 0}},
- {"rorv", 0x1AC02C00u, 0x5FE0FC00u, AA64_FMT_DP2, {0, 0, 0}},
-
- /* Unconditional branch (register). Mask: bits 31:25 + opc(24:21) +
- * op2(20:16) + op3(15:10) + op4(4:0). Rn (bits 9:5) varies. */
- {"br", 0xD61F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0, 0, 0}},
- {"blr", 0xD63F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0, 0, 0}},
- {"ret", 0xD65F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0, 0, 0}},
-
- /* PC-relative addressing. Mask: family bits 28:24 + op(31). The two
- * halves of the immediate (immlo at 30:29, immhi at 23:5) and Rd
- * stay variable. */
- {"adr", 0x10000000u, 0x9F000000u, AA64_FMT_PCREL_ADR, {0, 0, 0}},
- {"adrp", 0x90000000u, 0x9F000000u, AA64_FMT_PCREL_ADR, {0, 0, 0}},
-
- /* Add/Sub immediate. Mask: family bits 28:24 + op(30) + S(29). The
- * sh bit (22) and imm12 (21:10) stay variable. */
- {"add_imm", 0x11000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, {0, 0, 0}},
- {"adds_imm", 0x31000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, {0, 0, 0}},
- {"sub_imm", 0x51000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, {0, 0, 0}},
- {"subs_imm", 0x71000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, {0, 0, 0}},
-
- /* Load/store, unsigned 12-bit immediate offset. Mask: family bits
- * 29:27 + 25:24 + size(31:30) + V(26) + opc(23:22). Only the 64-bit
- * integer LDR/STR forms (size=11, V=0) are listed today; widen as
- * other widths come online. */
- {"str_uimm", 0xF9000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, {0, 0, 0}},
- {"ldr_uimm", 0xF9400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, {0, 0, 0}},
-
- /* Load/store pair, pre-indexed. Mask: family bits 30:23 + opc(31:30)
- * + V(26) + L(22). Only the 64-bit integer form (opc=10, V=0) is
- * registered today. */
- {"stp_pre", 0xA9800000u, 0xFFC00000u, AA64_FMT_LDSTP_PRE, {0, 0, 0}},
- {"ldp_pre", 0xA9C00000u, 0xFFC00000u, AA64_FMT_LDSTP_PRE, {0, 0, 0}},
-
- /* Hint. Mask: family + bits 12:5 zero (NOP slot). */
- {"nop", 0xD503201Fu, 0xFFFFFFFFu, AA64_FMT_HINT, {0, 0, 0}},
+ /* ----- Move-wide immediate (MOVN / MOVZ / MOVK) ----- */
+ {"movn", 0x12800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, 0, {0, 0}},
+ {"movz", 0x52800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, 0, {0, 0}},
+ {"movk", 0x72800000u, 0x7F800000u, AA64_FMT_MOVEWIDE, 0, {0, 0}},
+
+ /* ----- Logical, shifted register -----
+ * Alias MOV Rd, Rm is ORR Rd, ZR, Rm with shift=0, imm6=0. The mask
+ * pins Rn (bits 9:5) to 11111 (ZR) and shift/imm6 to 0 so only the
+ * MOV spelling matches; broader ORR rows below catch the rest. */
+ {"mov", 0x2A0003E0u, 0x7FE0FFE0u, AA64_FMT_LOG_SR, AA64_ASMFL_ALIAS,
+ {0, 0}},
+ /* MVN Rd, Rm ≡ ORN Rd, ZR, Rm (logical N=1, Rn=ZR, no shift) */
+ {"mvn", 0x2A2003E0u, 0x7FE0FFE0u, AA64_FMT_LOG_SR, AA64_ASMFL_ALIAS,
+ {0, 0}},
+ {"and", 0x0A000000u, 0x7F200000u, AA64_FMT_LOG_SR, 0, {0, 0}},
+ {"bic", 0x0A200000u, 0x7F200000u, AA64_FMT_LOG_SR, 0, {0, 0}},
+ {"orr", 0x2A000000u, 0x7F200000u, AA64_FMT_LOG_SR, 0, {0, 0}},
+ {"orn", 0x2A200000u, 0x7F200000u, AA64_FMT_LOG_SR, 0, {0, 0}},
+ {"eor", 0x4A000000u, 0x7F200000u, AA64_FMT_LOG_SR, 0, {0, 0}},
+ {"eon", 0x4A200000u, 0x7F200000u, AA64_FMT_LOG_SR, 0, {0, 0}},
+ {"ands", 0x6A000000u, 0x7F200000u, AA64_FMT_LOG_SR, 0, {0, 0}},
+ {"bics", 0x6A200000u, 0x7F200000u, AA64_FMT_LOG_SR, 0, {0, 0}},
+
+ /* ----- Add/Sub, shifted register -----
+ * NEG Rd, Rm ≡ SUB Rd, ZR, Rm (Rn=ZR, shift=0, imm6=0). */
+ {"neg", 0x4B0003E0u, 0x7FE0FFE0u, AA64_FMT_ADDSUB_SR, AA64_ASMFL_ALIAS,
+ {0, 0}},
+ {"negs", 0x6B0003E0u, 0x7FE0FFE0u, AA64_FMT_ADDSUB_SR, AA64_ASMFL_ALIAS,
+ {0, 0}},
+ /* CMP Rn, Rm ≡ SUBS ZR, Rn, Rm. */
+ {"cmp", 0x6B00001Fu, 0x7F20001Fu, AA64_FMT_ADDSUB_SR, AA64_ASMFL_ALIAS,
+ {0, 0}},
+ /* CMN Rn, Rm ≡ ADDS ZR, Rn, Rm. */
+ {"cmn", 0x2B00001Fu, 0x7F20001Fu, AA64_FMT_ADDSUB_SR, AA64_ASMFL_ALIAS,
+ {0, 0}},
+ {"add", 0x0B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, 0, {0, 0}},
+ {"adds", 0x2B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, 0, {0, 0}},
+ {"sub", 0x4B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, 0, {0, 0}},
+ {"subs", 0x6B000000u, 0x7F200000u, AA64_FMT_ADDSUB_SR, 0, {0, 0}},
+
+ /* ----- Data-processing 3-source -----
+ * MUL Rd, Rn, Rm ≡ MADD Rd, Rn, Rm, ZR (Ra=ZR, op31=0, o0=0). */
+ {"mul", 0x1B007C00u, 0x7FE0FC00u, AA64_FMT_DP3, AA64_ASMFL_ALIAS, {0, 0}},
+ /* MNEG Rd, Rn, Rm ≡ MSUB Rd, Rn, Rm, ZR. */
+ {"mneg", 0x1B00FC00u, 0x7FE0FC00u, AA64_FMT_DP3, AA64_ASMFL_ALIAS, {0, 0}},
+ {"madd", 0x1B000000u, 0x7FE08000u, AA64_FMT_DP3, 0, {0, 0}},
+ {"msub", 0x1B008000u, 0x7FE08000u, AA64_FMT_DP3, 0, {0, 0}},
+
+ /* ----- Data-processing 2-source ----- */
+ {"udiv", 0x1AC00800u, 0x5FE0FC00u, AA64_FMT_DP2, 0, {0, 0}},
+ {"sdiv", 0x1AC00C00u, 0x5FE0FC00u, AA64_FMT_DP2, 0, {0, 0}},
+ {"lslv", 0x1AC02000u, 0x5FE0FC00u, AA64_FMT_DP2, 0, {0, 0}},
+ {"lsrv", 0x1AC02400u, 0x5FE0FC00u, AA64_FMT_DP2, 0, {0, 0}},
+ {"asrv", 0x1AC02800u, 0x5FE0FC00u, AA64_FMT_DP2, 0, {0, 0}},
+ {"rorv", 0x1AC02C00u, 0x5FE0FC00u, AA64_FMT_DP2, 0, {0, 0}},
+
+ /* ----- Unconditional branch (register) -----
+ * RET aliases its no-operand spelling to RET X30 (Rn=11110). The
+ * tighter row matches when Rn=30 and prints "ret" without operands;
+ * the looser row below catches RET Xn for other Rn. */
+ {"ret", 0xD65F03C0u, 0xFFFFFFFFu, AA64_FMT_BR_REG,
+ AA64_ASMFL_ALIAS | AA64_ASMFL_NORN, {0, 0}},
+ {"br", 0xD61F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, 0, {0, 0}},
+ {"blr", 0xD63F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, 0, {0, 0}},
+ {"ret", 0xD65F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, 0, {0, 0}},
+
+ /* ----- PC-relative addressing ----- */
+ {"adr", 0x10000000u, 0x9F000000u, AA64_FMT_PCREL_ADR, 0, {0, 0}},
+ {"adrp", 0x90000000u, 0x9F000000u, AA64_FMT_PCREL_ADR, 0, {0, 0}},
+
+ /* ----- Add/Sub immediate ----- */
+ {"add", 0x11000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, 0, {0, 0}},
+ {"adds", 0x31000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, 0, {0, 0}},
+ {"sub", 0x51000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, 0, {0, 0}},
+ {"subs", 0x71000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, 0, {0, 0}},
+
+ /* ----- Load/store, unsigned 12-bit immediate (scaled) -----
+ * Mask: family bits 29:27 + 25:24 + size(31:30) + V(26) + opc(23:22). */
+ {"strb", 0x39000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}},
+ {"ldrb", 0x39400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}},
+ {"strh", 0x79000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}},
+ {"ldrh", 0x79400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}},
+ {"str", 0xB9000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}}, /* 32 */
+ {"ldr", 0xB9400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}},
+ {"str", 0xF9000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, AA64_ASMFL_SF1,
+ {0, 0}}, /* 64 */
+ {"ldr", 0xF9400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, AA64_ASMFL_SF1,
+ {0, 0}},
+ /* SIMD/FP scaled loads/stores (V=1). size 0..2 select B/H/S; size=3
+ * selects D; the 128-bit Q form uses size=00 with opc bit 1 set and
+ * is not yet emitted by codegen. */
+ {"str", 0x3D000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}}, /* B */
+ {"ldr", 0x3D400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}},
+ {"str", 0x7D000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}}, /* H */
+ {"ldr", 0x7D400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}},
+ {"str", 0xBD000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}}, /* S */
+ {"ldr", 0xBD400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, 0, {0, 0}},
+ {"str", 0xFD000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, AA64_ASMFL_SF1,
+ {0, 0}}, /* D */
+ {"ldr", 0xFD400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, AA64_ASMFL_SF1,
+ {0, 0}},
+
+ /* ----- Load/store, unscaled signed 9-bit immediate (LDUR/STUR) -----
+ * V=0 first, V=1 next. Per-row mask narrows size+V+opc; family mask
+ * pins the high family bits + the SIMM9-vs-other-variant selector. */
+ {"sturb", 0x38000000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}},
+ {"ldurb", 0x38400000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}},
+ {"sturh", 0x78000000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}},
+ {"ldurh", 0x78400000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}},
+ {"stur", 0xB8000000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}}, /* 32 */
+ {"ldur", 0xB8400000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}},
+ {"stur", 0xF8000000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, AA64_ASMFL_SF1,
+ {0, 0}},
+ {"ldur", 0xF8400000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, AA64_ASMFL_SF1,
+ {0, 0}},
+ {"stur", 0x3C000000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}}, /* B */
+ {"ldur", 0x3C400000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}},
+ {"stur", 0x7C000000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}}, /* H */
+ {"ldur", 0x7C400000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}},
+ {"stur", 0xBC000000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}}, /* S */
+ {"ldur", 0xBC400000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, 0, {0, 0}},
+ {"stur", 0xFC000000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, AA64_ASMFL_SF1,
+ {0, 0}}, /* D */
+ {"ldur", 0xFC400000u, 0xFFE00C00u, AA64_FMT_LDST_SIMM9, AA64_ASMFL_SF1,
+ {0, 0}},
+
+ /* ----- Load/store pair, pre-indexed (opc=10 X / opc=01 D) ----- */
+ {"stp", 0xA9800000u, 0xFFC00000u, AA64_FMT_LDSTP_PRE, AA64_ASMFL_SF1,
+ {0, 0}},
+ {"ldp", 0xA9C00000u, 0xFFC00000u, AA64_FMT_LDSTP_PRE, AA64_ASMFL_SF1,
+ {0, 0}},
+ {"stp", 0x6D800000u, 0xFFC00000u, AA64_FMT_LDSTP_PRE, 0, {0, 0}}, /* D */
+ {"ldp", 0x6DC00000u, 0xFFC00000u, AA64_FMT_LDSTP_PRE, 0, {0, 0}},
+
+ /* ----- Load/store pair, signed-offset ----- */
+ {"stp", 0xA9000000u, 0xFFC00000u, AA64_FMT_LDSTP_SOFF, AA64_ASMFL_SF1,
+ {0, 0}},
+ {"ldp", 0xA9400000u, 0xFFC00000u, AA64_FMT_LDSTP_SOFF, AA64_ASMFL_SF1,
+ {0, 0}},
+ {"stp", 0x6D000000u, 0xFFC00000u, AA64_FMT_LDSTP_SOFF, 0, {0, 0}}, /* D */
+ {"ldp", 0x6D400000u, 0xFFC00000u, AA64_FMT_LDSTP_SOFF, 0, {0, 0}},
+
+ /* ----- Unconditional branch (immediate) ----- */
+ {"b", 0x14000000u, 0xFC000000u, AA64_FMT_BR_IMM, 0, {0, 0}},
+ {"bl", 0x94000000u, 0xFC000000u, AA64_FMT_BR_IMM, 0, {0, 0}},
+
+ /* ----- Conditional branch (immediate) ----- */
+ {"b.cond", 0x54000000u, 0xFF000010u, AA64_FMT_BR_COND, 0, {0, 0}},
+
+ /* ----- Compare-and-branch ----- */
+ {"cbz", 0x34000000u, 0x7F000000u, AA64_FMT_CB, 0, {0, 0}},
+ {"cbnz", 0x35000000u, 0x7F000000u, AA64_FMT_CB, 0, {0, 0}},
+
+ /* ----- Exception generation ----- */
+ {"svc", 0xD4000001u, 0xFFE0001Fu, AA64_FMT_EXCEPT, 0, {0, 0}},
+ {"brk", 0xD4200000u, 0xFFE0001Fu, AA64_FMT_EXCEPT, 0, {0, 0}},
+ {"hlt", 0xD4400000u, 0xFFE0001Fu, AA64_FMT_EXCEPT, 0, {0, 0}},
+
+ /* ----- Hint ----- */
+ {"nop", 0xD503201Fu, 0xFFFFFFFFu, AA64_FMT_HINT, 0, {0, 0}},
};
const u32 aa64_insn_table_n =
@@ -101,3 +195,356 @@ const AA64InsnDesc* aa64_disasm_find(u32 word) {
}
return NULL;
}
+
+/* =====================================================================
+ * Operand print — one helper per format.
+ *
+ * Format choices for immediates:
+ * - branch displacements, signed add/sub imm, signed ldur/stur ofs:
+ * signed decimal.
+ * - MOVZ/MOVK halfword, logical bitmask, exception generation #imm:
+ * 0x-prefixed hex.
+ *
+ * Register naming: ZR alias for x31 in places where the encoding treats
+ * Rd/Rn=31 as the zero register (logical/arith), SP where it treats 31
+ * as the stack pointer (add/sub imm, ldr/str-uimm Rn, ldp/stp Rn).
+ *
+ * vaddr is folded into PC-relative branch operands when nonzero. */
+
+static void emit_reg(StrBuf* sb, u32 r, int sf, int sp_means_sp) {
+ if (r == 31u) {
+ if (sp_means_sp) strbuf_puts(sb, "sp");
+ else if (sf) strbuf_puts(sb, "xzr");
+ else strbuf_puts(sb, "wzr");
+ return;
+ }
+ strbuf_putc(sb, sf ? 'x' : 'w');
+ strbuf_put_u64(sb, (u64)r);
+}
+
+static void emit_vreg(StrBuf* sb, u32 r, char prefix) {
+ strbuf_putc(sb, prefix);
+ strbuf_put_u64(sb, (u64)r);
+}
+
+static void emit_cond(StrBuf* sb, u32 cond) {
+ static const char* names[16] = {"eq", "ne", "cs", "cc", "mi", "pl",
+ "vs", "vc", "hi", "ls", "ge", "lt",
+ "gt", "le", "al", "nv"};
+ strbuf_puts(sb, names[cond & 0xfu]);
+}
+
+/* Sign-extend an n-bit value held in the low bits of v to i64. */
+static i64 sext(u64 v, u32 nbits) {
+ u64 mask = (nbits >= 64u) ? ~0ull : ((1ull << nbits) - 1ull);
+ v &= mask;
+ u64 sign = (nbits == 0u) ? 0ull : (1ull << (nbits - 1u));
+ if (v & sign) v |= ~mask;
+ return (i64)v;
+}
+
+static void print_movewide(StrBuf* sb, u32 w) {
+ AA64MoveWide f = aa64_movewide_unpack(w);
+ emit_reg(sb, f.Rd, (int)f.sf, /*sp_means_sp=*/0);
+ strbuf_puts(sb, ", ");
+ strbuf_put_hex_u64(sb, (u64)f.imm16);
+ if (f.hw) {
+ strbuf_puts(sb, ", lsl ");
+ strbuf_put_u64(sb, (u64)(f.hw * 16u));
+ }
+}
+
+static void print_logsr(StrBuf* sb, u32 w, const AA64InsnDesc* d) {
+ AA64LogSR f = aa64_logsr_unpack(w);
+ if (d->flags & AA64_ASMFL_ALIAS) {
+ /* MOV / MVN: Rd, Rm */
+ emit_reg(sb, f.Rd, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rm, (int)f.sf, 0);
+ return;
+ }
+ emit_reg(sb, f.Rd, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rn, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rm, (int)f.sf, 0);
+ if (f.imm6 || f.shift) {
+ static const char* sh[4] = {"lsl", "lsr", "asr", "ror"};
+ strbuf_puts(sb, ", ");
+ strbuf_puts(sb, sh[f.shift & 3u]);
+ strbuf_puts(sb, " #");
+ strbuf_put_u64(sb, (u64)f.imm6);
+ }
+}
+
+static void print_addsubsr(StrBuf* sb, u32 w, const AA64InsnDesc* d) {
+ AA64AddSubSR f = aa64_addsubsr_unpack(w);
+ if (d->flags & AA64_ASMFL_ALIAS) {
+ /* NEG / NEGS / CMP / CMN. */
+ if (d->mnemonic[0] == 'c') {
+ /* CMP / CMN — print Rn, Rm */
+ emit_reg(sb, f.Rn, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rm, (int)f.sf, 0);
+ } else {
+ /* NEG / NEGS — print Rd, Rm */
+ emit_reg(sb, f.Rd, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rm, (int)f.sf, 0);
+ }
+ return;
+ }
+ emit_reg(sb, f.Rd, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rn, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rm, (int)f.sf, 0);
+ if (f.imm6 || f.shift) {
+ static const char* sh[4] = {"lsl", "lsr", "asr", "rsv"};
+ strbuf_puts(sb, ", ");
+ strbuf_puts(sb, sh[f.shift & 3u]);
+ strbuf_puts(sb, " #");
+ strbuf_put_u64(sb, (u64)f.imm6);
+ }
+}
+
+static void print_dp3(StrBuf* sb, u32 w, const AA64InsnDesc* d) {
+ AA64DP3 f = aa64_dp3_unpack(w);
+ /* MUL / MNEG alias drop Ra (which is ZR for the alias). */
+ if (d->flags & AA64_ASMFL_ALIAS) {
+ emit_reg(sb, f.Rd, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rn, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rm, (int)f.sf, 0);
+ return;
+ }
+ emit_reg(sb, f.Rd, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rn, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rm, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Ra, (int)f.sf, 0);
+}
+
+static void print_dp2(StrBuf* sb, u32 w) {
+ AA64DP2 f = aa64_dp2_unpack(w);
+ emit_reg(sb, f.Rd, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rn, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rm, (int)f.sf, 0);
+}
+
+static void print_brreg(StrBuf* sb, u32 w, const AA64InsnDesc* d) {
+ AA64BrReg f = aa64_brreg_unpack(w);
+ if (d->flags & AA64_ASMFL_NORN) return; /* RET (with implicit X30) */
+ emit_reg(sb, f.Rn, /*sf=*/1, 0);
+}
+
+static void print_pcrel(StrBuf* sb, u32 w, u64 vaddr) {
+ AA64PCRelAdr f = aa64_pcrel_adr_unpack(w);
+ emit_reg(sb, f.Rd, /*sf=*/1, 0);
+ strbuf_puts(sb, ", ");
+ i64 imm = sext(((u64)f.immhi << 2) | (u64)f.immlo, 21);
+ if (f.op == AA64_ADR_OP_ADRP) imm <<= 12;
+ if (vaddr) {
+ u64 base = (f.op == AA64_ADR_OP_ADRP) ? (vaddr & ~0xfffull) : vaddr;
+ strbuf_put_hex_u64(sb, base + (u64)imm);
+ } else {
+ strbuf_puts(sb, "#");
+ strbuf_put_i64(sb, imm);
+ }
+}
+
+static void print_addsubimm(StrBuf* sb, u32 w) {
+ AA64AddSubImm f = aa64_addsubimm_unpack(w);
+ /* For these encodings, Rd/Rn=31 means SP. */
+ emit_reg(sb, f.Rd, (int)f.sf, 1);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rn, (int)f.sf, 1);
+ strbuf_puts(sb, ", #");
+ strbuf_put_u64(sb, (u64)f.imm12);
+ if (f.sh) strbuf_puts(sb, ", lsl #12");
+}
+
+static u32 ldst_log2_size(const AA64InsnDesc* d, u32 size_field) {
+ (void)d;
+ return size_field & 3u;
+}
+
+static void print_ldst_uimm(StrBuf* sb, u32 w, const AA64InsnDesc* d) {
+ AA64LdStUimm f = aa64_ldst_uimm_unpack(w);
+ u32 sz = ldst_log2_size(d, f.size);
+ /* Pick reg prefix: V=0 picks W/X by size; V=1 picks B/H/S/D by size. */
+ if (f.V == 0) {
+ emit_reg(sb, f.Rt, /*sf=*/(int)(sz == 3u), 0);
+ } else {
+ char p = (sz == 0u) ? 'b' : (sz == 1u) ? 'h' : (sz == 2u) ? 's' : 'd';
+ emit_vreg(sb, f.Rt, p);
+ }
+ strbuf_puts(sb, ", [");
+ emit_reg(sb, f.Rn, /*sf=*/1, 1);
+ u32 byte_off = f.imm12 << sz;
+ if (byte_off) {
+ strbuf_puts(sb, ", #");
+ strbuf_put_u64(sb, (u64)byte_off);
+ }
+ strbuf_putc(sb, ']');
+}
+
+static void print_ldst_simm9(StrBuf* sb, u32 w, const AA64InsnDesc* d) {
+ AA64LdStSimm9 f = aa64_ldst_simm9_unpack(w);
+ u32 sz = f.size & 3u;
+ (void)d;
+ if (f.V == 0) {
+ emit_reg(sb, f.Rt, /*sf=*/(int)(sz == 3u), 0);
+ } else {
+ char p = (sz == 0u) ? 'b' : (sz == 1u) ? 'h' : (sz == 2u) ? 's' : 'd';
+ emit_vreg(sb, f.Rt, p);
+ }
+ strbuf_puts(sb, ", [");
+ emit_reg(sb, f.Rn, /*sf=*/1, 1);
+ i64 off = sext((u64)f.imm9, 9);
+ if (off) {
+ strbuf_puts(sb, ", #");
+ strbuf_put_i64(sb, off);
+ }
+ strbuf_putc(sb, ']');
+}
+
+static void print_ldstp_common(StrBuf* sb, AA64LdStPPre f, int pre) {
+ /* opc=10 → 64-bit X; opc=00 → 32-bit W; opc=01 (V=1) → D (FP);
+ * opc=00 (V=1) → S; opc=10 (V=1) → Q (not yet emitted). */
+ i64 scale;
+ int is_fp = (f.V == 1);
+ char fp_prefix = 's';
+ int sf = 1;
+ if (is_fp) {
+ if (f.opc == 0) {
+ fp_prefix = 's';
+ scale = 4;
+ } else if (f.opc == 1) {
+ fp_prefix = 'd';
+ scale = 8;
+ } else {
+ fp_prefix = 'q';
+ scale = 16;
+ }
+ } else {
+ sf = (f.opc == 2);
+ scale = sf ? 8 : 4;
+ }
+ if (is_fp) {
+ emit_vreg(sb, f.Rt, fp_prefix);
+ strbuf_puts(sb, ", ");
+ emit_vreg(sb, f.Rt2, fp_prefix);
+ } else {
+ emit_reg(sb, f.Rt, sf, 0);
+ strbuf_puts(sb, ", ");
+ emit_reg(sb, f.Rt2, sf, 0);
+ }
+ strbuf_puts(sb, ", [");
+ emit_reg(sb, f.Rn, /*sf=*/1, 1);
+ i64 byte_off = sext((u64)f.imm7, 7) * scale;
+ if (byte_off) {
+ strbuf_puts(sb, ", #");
+ strbuf_put_i64(sb, byte_off);
+ }
+ strbuf_putc(sb, ']');
+ if (pre) strbuf_putc(sb, '!');
+}
+
+static void print_ldstp_pre(StrBuf* sb, u32 w) {
+ print_ldstp_common(sb, aa64_ldstp_pre_unpack(w), /*pre=*/1);
+}
+static void print_ldstp_soff(StrBuf* sb, u32 w) {
+ print_ldstp_common(sb, aa64_ldstp_soff_unpack(w), /*pre=*/0);
+}
+
+static void print_br_imm(StrBuf* sb, u32 w, u64 vaddr) {
+ AA64BrImm f = aa64_brimm_unpack(w);
+ i64 ofs = sext((u64)f.imm26, 26) * 4;
+ if (vaddr) {
+ strbuf_put_hex_u64(sb, vaddr + (u64)ofs);
+ } else {
+ strbuf_puts(sb, "#");
+ strbuf_put_i64(sb, ofs);
+ }
+}
+
+static void print_br_cond(StrBuf* sb, u32 w, u64 vaddr,
+ const AA64InsnDesc* d) {
+ AA64BrCond f = aa64_brcond_unpack(w);
+ (void)d;
+ /* mnemonic is "b.cond"; we'll print cond as a suffix on the target.
+ * The b.cond row keeps a single mnemonic for printing — for the asm
+ * spelling to be canonical the writer will need to emit b.<cc>, which
+ * is the printer's job at the dispatcher level (see aa64_print_operands). */
+ emit_cond(sb, f.cond);
+ strbuf_putc(sb, ' ');
+ i64 ofs = sext((u64)f.imm19, 19) * 4;
+ if (vaddr) {
+ strbuf_put_hex_u64(sb, vaddr + (u64)ofs);
+ } else {
+ strbuf_puts(sb, "#");
+ strbuf_put_i64(sb, ofs);
+ }
+}
+
+static void print_cb(StrBuf* sb, u32 w, u64 vaddr) {
+ AA64CB f = aa64_cb_unpack(w);
+ emit_reg(sb, f.Rt, (int)f.sf, 0);
+ strbuf_puts(sb, ", ");
+ i64 ofs = sext((u64)f.imm19, 19) * 4;
+ if (vaddr) {
+ strbuf_put_hex_u64(sb, vaddr + (u64)ofs);
+ } else {
+ strbuf_puts(sb, "#");
+ strbuf_put_i64(sb, ofs);
+ }
+}
+
+static void print_except(StrBuf* sb, u32 w) {
+ AA64Except f = aa64_except_unpack(w);
+ strbuf_puts(sb, "#");
+ strbuf_put_hex_u64(sb, (u64)f.imm16);
+}
+
+void aa64_print_operands(StrBuf* sb, const AA64InsnDesc* desc, u32 word,
+ u64 vaddr) {
+ switch ((AA64Format)desc->fmt) {
+ case AA64_FMT_MOVEWIDE: print_movewide(sb, word); break;
+ case AA64_FMT_LOG_SR: print_logsr(sb, word, desc); break;
+ case AA64_FMT_ADDSUB_SR: print_addsubsr(sb, word, desc); break;
+ case AA64_FMT_DP3: print_dp3(sb, word, desc); break;
+ case AA64_FMT_DP2: print_dp2(sb, word); break;
+ case AA64_FMT_BR_REG: print_brreg(sb, word, desc); break;
+ case AA64_FMT_PCREL_ADR: print_pcrel(sb, word, vaddr); break;
+ case AA64_FMT_ADDSUB_IMM: print_addsubimm(sb, word); break;
+ case AA64_FMT_LDST_UIMM: print_ldst_uimm(sb, word, desc); break;
+ case AA64_FMT_LDSTP_PRE: print_ldstp_pre(sb, word); break;
+ case AA64_FMT_LDSTP_SOFF: print_ldstp_soff(sb, word); break;
+ case AA64_FMT_LDST_SIMM9: print_ldst_simm9(sb, word, desc); break;
+ case AA64_FMT_BR_IMM: print_br_imm(sb, word, vaddr); break;
+ case AA64_FMT_BR_COND: print_br_cond(sb, word, vaddr, desc); break;
+ case AA64_FMT_CB: print_cb(sb, word, vaddr); break;
+ case AA64_FMT_EXCEPT: print_except(sb, word); break;
+ case AA64_FMT_HINT: break; /* no operands for NOP */
+ }
+}
+
+/* =====================================================================
+ * Operand parse — phase-3 wires this up to the asm token stream. Phase
+ * 2 ships the signature so the assembler bring-up commit doesn't need to
+ * touch the descriptor table; the body returns 0 for every format until
+ * the per-format grammar is implemented. */
+
+int aa64_parse_operands(struct AA64AsmTok* tok, const AA64InsnDesc* desc,
+ void* fields_out) {
+ (void)tok;
+ (void)desc;
+ (void)fields_out;
+ return 0;
+}
diff --git a/src/arch/aa64_isa.h b/src/arch/aa64_isa.h
@@ -26,6 +26,7 @@
* inline wrapper in the relevant format section. */
#include "core/core.h"
+#include "core/strbuf.h"
/* ---- common register names ---- */
#define AA64_ZR 31u /* WZR / XZR */
@@ -44,9 +45,26 @@ typedef enum AA64Format {
AA64_FMT_ADDSUB_IMM, /* add/sub, immediate */
AA64_FMT_LDST_UIMM, /* load/store, unsigned 12-bit immediate offset */
AA64_FMT_LDSTP_PRE, /* load/store pair, pre-indexed */
+ AA64_FMT_LDSTP_SOFF, /* load/store pair, signed-offset */
+ AA64_FMT_LDST_SIMM9, /* load/store, unscaled 9-bit signed offset
+ (LDUR / STUR, V=0 and V=1) */
+ AA64_FMT_BR_IMM, /* unconditional branch (immediate) — B / BL */
+ AA64_FMT_BR_COND, /* B.cond (imm19) */
+ AA64_FMT_CB, /* compare-and-branch (CBZ / CBNZ) */
+ AA64_FMT_EXCEPT, /* exception generation (BRK / SVC / HVC / ...) */
AA64_FMT_HINT, /* hint (NOP / YIELD / ...) */
} AA64Format;
+/* ---- AsmFlags column on AA64InsnDesc ----
+ *
+ * Per-row metadata that varies across same-format members. Most rows
+ * carry 0. When the disassembler matches a row whose ALIAS bit is set,
+ * that's the spelling it prints; the assembler also accepts both the
+ * alias and the canonical form because both rows live in the table. */
+#define AA64_ASMFL_ALIAS 0x01u /* row is an alias (e.g. MOV → ORR Rd, ZR, Rm) */
+#define AA64_ASMFL_SF1 0x02u /* 64-bit form only (sf hard-wired) */
+#define AA64_ASMFL_NORN 0x04u /* hide Rn operand in print (e.g. RET when Rn=30) */
+
/* ====================================================================
* Move-wide immediate (MOVN / MOVZ / MOVK)
* sf opc(2) 100101 hw(2) imm16(16) Rd(5)
@@ -597,6 +615,208 @@ static inline u32 aa64_nop(void) {
}
/* ====================================================================
+ * Load/store pair, signed-offset (STP / LDP, no pre/post-increment).
+ * opc(2) 101 V(1) 010 L(1) imm7 Rt2 Rn Rt (bit 23 = 0)
+ *
+ * Mirrors the LDSTP_PRE format with bit 23 cleared; the field layout is
+ * otherwise identical and the pack/unpack helpers above are reused for
+ * pre/post/sign-offset via different family-match constants. Codegen
+ * emits both X (opc=10) and FP-D (opc=01, V=1) variants for callee-save
+ * spill/reload (`stp x29,x30,[sp,#16]`, `stp d8,d9,[sp,#32]`). */
+
+#define AA64_LDSTP_SOFF_FAMILY_MATCH 0x29000000u
+#define AA64_LDSTP_SOFF_FAMILY_MASK 0x7FC00000u /* bits 30:23 (bit 23 = 0) */
+
+typedef AA64LdStPPre AA64LdStPSOff;
+
+static inline u32 aa64_ldstp_soff_pack(AA64LdStPSOff f) {
+ return ((f.opc & 3u) << 30) | AA64_LDSTP_SOFF_FAMILY_MATCH |
+ ((f.V & 1u) << 26) | ((f.L & 1u) << 22) | ((f.imm7 & 0x7fu) << 15) |
+ ((f.Rt2 & 0x1fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu);
+}
+
+static inline AA64LdStPSOff aa64_ldstp_soff_unpack(u32 w) {
+ AA64LdStPSOff f;
+ f.opc = (w >> 30) & 3u;
+ f.V = (w >> 26) & 1u;
+ f.L = (w >> 22) & 1u;
+ f.imm7 = (w >> 15) & 0x7fu;
+ f.Rt2 = (w >> 10) & 0x1fu;
+ f.Rn = (w >> 5) & 0x1fu;
+ f.Rt = w & 0x1fu;
+ return f;
+}
+
+/* ====================================================================
+ * Load/store, unscaled 9-bit signed offset (LDUR / STUR, V=0 and V=1).
+ * size(2) 111 V(1) 00 opc(2) 0 imm9(9) 00 Rn(5) Rt(5)
+ * 31..30 29..27 26 25..24 23..22 21 20..12 11..10 9..5 4..0
+ *
+ * size: 00=B, 01=H, 10=W, 11=X (V=0) — D when V=1 selects FP/SIMD.
+ * opc: 00=STR, 01=LDR (sign-extension variants set opc bit 1 for the
+ * smaller widths; not used by codegen today). */
+
+#define AA64_LDST_SIMM9_FAMILY_MATCH 0x38000000u
+/* bits 29:27 (=111) + bits 25:24 (=00) + bits 11:10 (=00). size, V, opc,
+ * imm9, Rn, Rt all vary; bit 21 is fixed 0 for this variant. */
+#define AA64_LDST_SIMM9_FAMILY_MASK 0x3B200C00u
+
+typedef struct AA64LdStSimm9 {
+ u32 size, V, opc, imm9, Rn, Rt;
+} AA64LdStSimm9;
+
+static inline u32 aa64_ldst_simm9_pack(AA64LdStSimm9 f) {
+ return ((f.size & 3u) << 30) | AA64_LDST_SIMM9_FAMILY_MATCH |
+ ((f.V & 1u) << 26) | ((f.opc & 3u) << 22) |
+ ((f.imm9 & 0x1ffu) << 12) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu);
+}
+
+static inline AA64LdStSimm9 aa64_ldst_simm9_unpack(u32 w) {
+ AA64LdStSimm9 f;
+ f.size = (w >> 30) & 3u;
+ f.V = (w >> 26) & 1u;
+ f.opc = (w >> 22) & 3u;
+ f.imm9 = (w >> 12) & 0x1ffu;
+ f.Rn = (w >> 5) & 0x1fu;
+ f.Rt = w & 0x1fu;
+ return f;
+}
+
+/* ====================================================================
+ * Unconditional branch (immediate) — B / BL
+ * op(1) 00101 imm26(26)
+ * 31 30..26 25..0
+ *
+ * op=0 → B, op=1 → BL. imm26 is a signed 26-bit word displacement
+ * (multiply by 4 to get byte offset). Codegen emits with imm26=0 paired
+ * with a JUMP26 / CALL26 relocation. */
+
+#define AA64_BR_IMM_FAMILY_MATCH 0x14000000u
+#define AA64_BR_IMM_FAMILY_MASK 0x7C000000u /* bits 30:26 (=00101) */
+
+typedef struct AA64BrImm {
+ u32 op, imm26;
+} AA64BrImm;
+
+static inline u32 aa64_brimm_pack(AA64BrImm f) {
+ return ((f.op & 1u) << 31) | AA64_BR_IMM_FAMILY_MATCH | (f.imm26 & 0x3ffffffu);
+}
+
+static inline AA64BrImm aa64_brimm_unpack(u32 w) {
+ AA64BrImm f;
+ f.op = (w >> 31) & 1u;
+ f.imm26 = w & 0x3ffffffu;
+ return f;
+}
+
+static inline u32 aa64_b(u32 imm26) {
+ return aa64_brimm_pack((AA64BrImm){.op = 0, .imm26 = imm26});
+}
+static inline u32 aa64_bl(u32 imm26) {
+ return aa64_brimm_pack((AA64BrImm){.op = 1, .imm26 = imm26});
+}
+
+/* ====================================================================
+ * Conditional branch (immediate) — B.cond
+ * 0101 0100 imm19(19) 0 cond(4)
+ * 31..24 23..5 4 3..0
+ *
+ * imm19 is a signed 19-bit word displacement; cond is the 4-bit ARM
+ * condition code (EQ=0, NE=1, ...). */
+
+#define AA64_BR_COND_FAMILY_MATCH 0x54000000u
+#define AA64_BR_COND_FAMILY_MASK 0xFF000010u /* bits 31:24 fixed + bit 4 = 0 */
+
+typedef struct AA64BrCond {
+ u32 imm19, cond;
+} AA64BrCond;
+
+static inline u32 aa64_brcond_pack(AA64BrCond f) {
+ return AA64_BR_COND_FAMILY_MATCH | ((f.imm19 & 0x7ffffu) << 5) |
+ (f.cond & 0xfu);
+}
+
+static inline AA64BrCond aa64_brcond_unpack(u32 w) {
+ AA64BrCond f;
+ f.imm19 = (w >> 5) & 0x7ffffu;
+ f.cond = w & 0xfu;
+ return f;
+}
+
+/* ====================================================================
+ * Compare-and-branch — CBZ / CBNZ
+ * sf 011010 op(1) imm19(19) Rt(5)
+ * 31 30..25 24 23..5 4..0
+ *
+ * op=0 → CBZ (branch if zero), op=1 → CBNZ. */
+
+#define AA64_CB_FAMILY_MATCH 0x34000000u
+#define AA64_CB_FAMILY_MASK 0x7E000000u /* bits 30:25 (=011010) */
+
+typedef struct AA64CB {
+ u32 sf, op, imm19, Rt;
+} AA64CB;
+
+static inline u32 aa64_cb_pack(AA64CB f) {
+ return ((f.sf & 1u) << 31) | AA64_CB_FAMILY_MATCH | ((f.op & 1u) << 24) |
+ ((f.imm19 & 0x7ffffu) << 5) | (f.Rt & 0x1fu);
+}
+
+static inline AA64CB aa64_cb_unpack(u32 w) {
+ AA64CB f;
+ f.sf = (w >> 31) & 1u;
+ f.op = (w >> 24) & 1u;
+ f.imm19 = (w >> 5) & 0x7ffffu;
+ f.Rt = w & 0x1fu;
+ return f;
+}
+
+static inline u32 aa64_cbz(u32 sf, u32 Rt, u32 imm19) {
+ return aa64_cb_pack((AA64CB){.sf = sf, .op = 0, .imm19 = imm19, .Rt = Rt});
+}
+static inline u32 aa64_cbnz_imm(u32 sf, u32 Rt, u32 imm19) {
+ return aa64_cb_pack((AA64CB){.sf = sf, .op = 1, .imm19 = imm19, .Rt = Rt});
+}
+
+/* ====================================================================
+ * Exception generation — BRK / SVC / HVC / SMC / HLT / UDF aliases.
+ * 1101 0100 opc(3) imm16(16) op2(3) LL(2)
+ * 31..24 23..21 20..5 4..2 1..0
+ *
+ * SVC: opc=000, LL=01. BRK: opc=001, LL=00. HVC/SMC/HLT/...: other
+ * combos. Codegen emits BRK today. */
+
+#define AA64_EXCEPT_FAMILY_MATCH 0xD4000000u
+#define AA64_EXCEPT_FAMILY_MASK 0xFF000000u /* bits 31:24 */
+
+typedef struct AA64Except {
+ u32 opc, imm16, op2, LL;
+} AA64Except;
+
+static inline u32 aa64_except_pack(AA64Except f) {
+ return AA64_EXCEPT_FAMILY_MATCH | ((f.opc & 7u) << 21) |
+ ((f.imm16 & 0xffffu) << 5) | ((f.op2 & 7u) << 2) | (f.LL & 3u);
+}
+
+static inline AA64Except aa64_except_unpack(u32 w) {
+ AA64Except f;
+ f.opc = (w >> 21) & 7u;
+ f.imm16 = (w >> 5) & 0xffffu;
+ f.op2 = (w >> 2) & 7u;
+ f.LL = w & 3u;
+ return f;
+}
+
+static inline u32 aa64_brk(u32 imm16) {
+ return aa64_except_pack(
+ (AA64Except){.opc = 1, .imm16 = imm16, .op2 = 0, .LL = 0});
+}
+static inline u32 aa64_svc(u32 imm16) {
+ return aa64_except_pack(
+ (AA64Except){.opc = 0, .imm16 = imm16, .op2 = 0, .LL = 1});
+}
+
+/* ====================================================================
* Disassembler descriptor table.
* ==================================================================== */
@@ -604,8 +824,9 @@ typedef struct AA64InsnDesc {
const char* mnemonic;
u32 match;
u32 mask;
- u8 fmt; /* AA64Format */
- u8 pad[3];
+ u8 fmt; /* AA64Format */
+ u8 flags; /* AA64_ASMFL_* */
+ u8 pad[2];
} AA64InsnDesc;
extern const AA64InsnDesc aa64_insn_table[];
@@ -613,7 +834,33 @@ extern const u32 aa64_insn_table_n;
/* Linear-scan lookup. Returns the matching descriptor or NULL. First
* match wins; ordering in aa64_insn_table.c puts more-specific entries
- * before broader ones in the rare cases that matters. */
+ * before broader ones (so aliases like MOV/MUL/NEG win over their
+ * canonical ORR/MADD/SUB forms). */
const AA64InsnDesc* aa64_disasm_find(u32 word);
+/* ====================================================================
+ * Operand print / parse — one entry per AA64Format.
+ *
+ * aa64_print_operands renders the operand text (everything after the
+ * mnemonic) for `word` into `sb`, using `desc->fmt` to dispatch.
+ * Mnemonic itself is in `desc->mnemonic`; the caller writes it before
+ * calling this helper. `vaddr` is the instruction's virtual address
+ * for PC-relative formats; pass 0 if not known.
+ *
+ * aa64_parse_operands is the dual: read the operand grammar for the
+ * format from `tok` (opaque to phase 2 — declared but unimplemented)
+ * and fill `fields_out` (a pointer to the format's field struct).
+ * Phase 3 wires `tok` up; for now the function is a forward declaration
+ * the assembler can resolve once it's in place. */
+
+struct AA64AsmTok; /* opaque, defined by the phase-3 asm parser */
+
+void aa64_print_operands(StrBuf* sb, const AA64InsnDesc* desc, u32 word,
+ u64 vaddr);
+
+/* Returns 1 on success, 0 on parse error. Phase 2 stub returns 0 for
+ * every format; phase 3 fills in the bodies. */
+int aa64_parse_operands(struct AA64AsmTok* tok, const AA64InsnDesc* desc,
+ void* fields_out);
+
#endif
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -2852,10 +2852,8 @@ static inline u32 aa64_subs_extreg_x_sxtw(u32 Rd, u32 Rn, u32 Rm) {
(Rd & 0x1f);
}
-/* BRK #imm16 — used for TRAP/UNREACHABLE landing pads. */
-static inline u32 aa64_brk(u32 imm16) {
- return 0xD4200000u | ((imm16 & 0xffffu) << 5);
-}
+/* BRK #imm16 (TRAP/UNREACHABLE landing pads) lives in arch/aa64_isa.h
+ * alongside the rest of the exception-generation family. */
static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
const Operand* args, u32 na) {
diff --git a/src/core/strbuf.c b/src/core/strbuf.c
@@ -0,0 +1,50 @@
+/* Tiny bounded text builder — numeric formatters.
+ *
+ * Char-by-char appends live inline in core/strbuf.h; the dec/hex
+ * formatters live here so we don't pull <stdio.h> into every TU that
+ * includes the header. */
+
+#include "core/strbuf.h"
+
+void strbuf_put_u64(StrBuf* sb, u64 v) {
+ char tmp[24];
+ u32 n = 0;
+ if (v == 0) {
+ strbuf_putc(sb, '0');
+ return;
+ }
+ while (v) {
+ tmp[n++] = (char)('0' + (u32)(v % 10u));
+ v /= 10u;
+ }
+ while (n--) strbuf_putc(sb, tmp[n]);
+}
+
+void strbuf_put_i64(StrBuf* sb, i64 v) {
+ u64 u;
+ if (v < 0) {
+ strbuf_putc(sb, '-');
+ /* Negate into u64 to handle INT64_MIN without UB. */
+ u = (u64)(-(v + 1)) + 1u;
+ } else {
+ u = (u64)v;
+ }
+ strbuf_put_u64(sb, u);
+}
+
+void strbuf_put_hex_u64(StrBuf* sb, u64 v) {
+ static const char digits[] = "0123456789abcdef";
+ char tmp[16];
+ u32 n = 0;
+ strbuf_putc(sb, '0');
+ strbuf_putc(sb, 'x');
+ if (v == 0) {
+ strbuf_putc(sb, '0');
+ return;
+ }
+ while (v) {
+ tmp[n++] = digits[v & 0xfu];
+ v >>= 4;
+ }
+ while (n--) strbuf_putc(sb, tmp[n]);
+}
diff --git a/src/core/strbuf.h b/src/core/strbuf.h
@@ -0,0 +1,70 @@
+#ifndef CFREE_STRBUF_H
+#define CFREE_STRBUF_H
+
+/* Tiny bounded text builder, freestanding-friendly.
+ *
+ * Caller owns the backing buffer. strbuf_init reserves one slot for the
+ * NUL terminator; appends past `cap-1` characters silently truncate and
+ * `truncated` flips to 1 so the caller can detect overflow without
+ * scanning the result. Every helper keeps the buffer NUL-terminated
+ * after every append, so the buffer is always a valid C string. */
+
+#include <stddef.h>
+
+#include "core/core.h"
+
+typedef struct StrBuf {
+ char* base; /* start of buffer (caller owned) */
+ char* p; /* next write slot */
+ char* end; /* one past last writable byte (reserves NUL slot) */
+ u8 truncated;
+ u8 pad[7];
+} StrBuf;
+
+static inline void strbuf_init(StrBuf* sb, char* buf, size_t cap) {
+ sb->base = buf;
+ sb->p = buf;
+ sb->end = (cap == 0) ? buf : (buf + cap - 1);
+ sb->truncated = 0;
+ if (cap) buf[0] = '\0';
+}
+
+static inline void strbuf_reset(StrBuf* sb) {
+ sb->p = sb->base;
+ sb->truncated = 0;
+ if (sb->base != sb->end || sb->base != NULL) {
+ if (sb->base) *sb->base = '\0';
+ }
+}
+
+static inline size_t strbuf_len(const StrBuf* sb) {
+ return (size_t)(sb->p - sb->base);
+}
+
+static inline const char* strbuf_cstr(const StrBuf* sb) { return sb->base; }
+
+static inline void strbuf_putc(StrBuf* sb, char c) {
+ if (sb->p < sb->end) {
+ *sb->p++ = c;
+ *sb->p = '\0';
+ } else {
+ sb->truncated = 1;
+ }
+}
+
+static inline void strbuf_puts(StrBuf* sb, const char* s) {
+ while (*s) strbuf_putc(sb, *s++);
+}
+
+/* Append `n` bytes verbatim (may contain embedded NULs; the builder still
+ * appends a trailing NUL byte after them so strbuf_cstr() works for the
+ * common all-printable case). */
+static inline void strbuf_putn(StrBuf* sb, const char* s, size_t n) {
+ for (size_t i = 0; i < n; ++i) strbuf_putc(sb, s[i]);
+}
+
+void strbuf_put_u64(StrBuf*, u64 v);
+void strbuf_put_i64(StrBuf*, i64 v);
+void strbuf_put_hex_u64(StrBuf*, u64 v); /* "0x" + hex, no padding */
+
+#endif
diff --git a/test/arch/aa64_isa_test.c b/test/arch/aa64_isa_test.c
@@ -0,0 +1,195 @@
+/* Phase-2 unit test for arch/aa64_isa.{h,c}.
+ *
+ * Asserts that aa64_disasm_find recognizes a representative word for
+ * every AA64Format the descriptor table covers, that the resolved row's
+ * mnemonic matches the expected string, and that aa64_print_operands
+ * produces non-empty output. Also exercises the alias-precedence
+ * invariant: an alias-bearing word (e.g. ORR Rd, ZR, Rm) resolves to
+ * the alias spelling (MOV) rather than the canonical row.
+ *
+ * Builds against the internal arch/aa64_isa.h surface (test.mk passes
+ * -Isrc). No public-API dependency — this is a unit test of the
+ * descriptor table itself. */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "arch/aa64_isa.h"
+#include "core/strbuf.h"
+
+static int fails = 0;
+static int cases = 0;
+
+static void check(u32 word, const char* want_mnem, const char* want_ops_substr) {
+ ++cases;
+ const AA64InsnDesc* d = aa64_disasm_find(word);
+ if (!d) {
+ fprintf(stderr, "FAIL 0x%08x: aa64_disasm_find returned NULL\n", word);
+ ++fails;
+ return;
+ }
+ if (strcmp(d->mnemonic, want_mnem) != 0) {
+ fprintf(stderr, "FAIL 0x%08x: mnemonic = %s, want %s\n", word, d->mnemonic,
+ want_mnem);
+ ++fails;
+ return;
+ }
+ char buf[128];
+ StrBuf sb;
+ strbuf_init(&sb, buf, sizeof buf);
+ aa64_print_operands(&sb, d, word, /*vaddr=*/0);
+ if (sb.truncated) {
+ fprintf(stderr, "FAIL 0x%08x %s: operand print truncated\n", word,
+ want_mnem);
+ ++fails;
+ return;
+ }
+ if (want_ops_substr && !strstr(buf, want_ops_substr)) {
+ fprintf(stderr,
+ "FAIL 0x%08x %s: operands=%s\n expected substring %s\n",
+ word, want_mnem, buf, want_ops_substr);
+ ++fails;
+ return;
+ }
+}
+
+int main(void) {
+ /* ---- per-format coverage ---- */
+
+ /* MOVEWIDE: movz x0, #0x1234 → sf=1, opc=10, hw=0, imm16=0x1234, Rd=0 */
+ check(aa64_movz(1, /*Rd=*/0, /*imm16=*/0x1234, /*hw=*/0), "movz", "x0");
+
+ /* MOVEWIDE alias: movk w3, #0xABCD, lsl #16 */
+ check(aa64_movk(0, /*Rd=*/3, /*imm16=*/0xABCD, /*hw=*/1), "movk", "lsl");
+
+ /* LOG_SR alias: MOV X1, X2 ≡ ORR X1, XZR, X2 */
+ check(aa64_mov_reg(1, /*Rd=*/1, /*Rm=*/2), "mov", "x1");
+
+ /* LOG_SR canonical: AND X1, X2, X3 */
+ check(aa64_and(1, 1, 2, 3), "and", "x1, x2, x3");
+
+ /* LOG_SR canonical: EOR W4, W5, W6 */
+ check(aa64_eor(0, 4, 5, 6), "eor", "w4, w5, w6");
+
+ /* LOG_SR alias: MVN X1, X2 ≡ ORN X1, XZR, X2 */
+ check(aa64_mvn(1, 1, 2), "mvn", "x1");
+
+ /* ADDSUB_SR alias: NEG X1, X2 ≡ SUB X1, XZR, X2 */
+ check(aa64_neg(1, 1, 2), "neg", "x1");
+
+ /* ADDSUB_SR canonical: ADD X3, X4, X5 */
+ check(aa64_add(1, 3, 4, 5), "add", "x3, x4, x5");
+ check(aa64_sub(0, 3, 4, 5), "sub", "w3, w4, w5");
+
+ /* DP3 alias: MUL X1, X2, X3 ≡ MADD X1, X2, X3, XZR */
+ check(aa64_mul(1, 1, 2, 3), "mul", "x1, x2, x3");
+ /* DP3 canonical: MADD X1, X2, X3, X4 */
+ check(aa64_madd(1, 1, 2, 3, 4), "madd", "x1, x2, x3, x4");
+
+ /* DP2: UDIV X1, X2, X3 */
+ check(aa64_udiv(1, 1, 2, 3), "udiv", "x1, x2, x3");
+ check(aa64_lslv(0, 1, 2, 3), "lslv", "w1, w2, w3");
+
+ /* BR_REG alias: RET (with implicit X30). */
+ check(aa64_ret(/*Rn=*/30), "ret", NULL);
+ /* BR_REG: BR X16 */
+ check(aa64_br(16), "br", "x16");
+ check(aa64_blr(17), "blr", "x17");
+
+ /* PCREL_ADR: ADR / ADRP — encode imm halves as zero. */
+ check(aa64_adr(/*Rd=*/9, 0, 0), "adr", "x9");
+ check(aa64_adrp(/*Rd=*/9, 0, 0), "adrp", "x9");
+
+ /* ADDSUB_IMM: ADD X1, X2, #0x10 */
+ check(aa64_add_imm(1, 1, 2, 0x10, 0), "add", "x1, x2, #16");
+ check(aa64_sub_imm(1, 1, 2, 0x10, 0), "sub", "x1, x2, #16");
+
+ /* LDST_UIMM (size=11, V=0): LDR X1, [X2, #8] (encoded imm12=1, scale=8) */
+ check(aa64_ldr64_uimm12(/*Rt=*/1, /*Rn=*/2, /*imm12_scaled=*/1), "ldr",
+ "x1, [x2, #8]");
+ check(aa64_str64_uimm12(/*Rt=*/1, /*Rn=*/2, /*imm12_scaled=*/0), "str",
+ "x1, [x2]");
+
+ /* LDSTP_PRE: STP X29, X30, [SP, #-16]! (imm7=-2) */
+ check(aa64_stp64_pre(/*Rt=*/29, /*Rt2=*/30, /*Rn=*/31, /*imm7=*/-2), "stp",
+ "x29, x30, [sp, #-16]!");
+ check(aa64_ldp64_pre(/*Rt=*/29, /*Rt2=*/30, /*Rn=*/31, /*imm7=*/2), "ldp",
+ "x29, x30, [sp, #16]!");
+
+ /* LDSTP_SOFF: STP X29, X30, [SP, #16] (signed-offset variant of pair) */
+ check(aa64_ldstp_soff_pack((AA64LdStPSOff){.opc = 2,
+ .V = 0,
+ .L = 0,
+ .imm7 = 2,
+ .Rt2 = 30,
+ .Rn = 31,
+ .Rt = 29}),
+ "stp", "x29, x30, [sp, #16]");
+
+ /* LDST_SIMM9 (V=0, size=3): STUR X1, [X2, #-8] */
+ check(aa64_ldst_simm9_pack((AA64LdStSimm9){.size = 3,
+ .V = 0,
+ .opc = 0,
+ .imm9 = (u32)(-8) & 0x1ffu,
+ .Rn = 2,
+ .Rt = 1}),
+ "stur", "x1, [x2, #-8]");
+ check(aa64_ldst_simm9_pack((AA64LdStSimm9){
+ .size = 3, .V = 0, .opc = 1, .imm9 = 0, .Rn = 2, .Rt = 1}),
+ "ldur", "x1, [x2]");
+
+ /* BR_IMM: B / BL. imm26=0 (relocated) — print should still resolve. */
+ check(aa64_b(0), "b", NULL);
+ check(aa64_bl(0), "bl", NULL);
+
+ /* BR_COND: B.eq #0 — cond=EQ=0, imm19=0. */
+ check(0x54000000u, "b.cond", "eq");
+
+ /* CB: CBZ W1, #0 / CBNZ X2, #0 */
+ check(aa64_cbz(0, 1, 0), "cbz", "w1");
+ check(aa64_cbnz_imm(1, 2, 0), "cbnz", "x2");
+
+ /* EXCEPT: BRK #0, SVC #0 */
+ check(aa64_brk(0), "brk", "0x0");
+ check(aa64_svc(0), "svc", "0x0");
+
+ /* HINT: NOP */
+ check(aa64_nop(), "nop", NULL);
+
+ /* ---- alias precedence: ensure first-match wins ---- */
+ {
+ /* ORR X1, XZR, X2 with shift=0,imm6=0 should resolve to "mov", not "orr". */
+ u32 w = aa64_mov_reg(1, 1, 2);
+ const AA64InsnDesc* d = aa64_disasm_find(w);
+ if (d == NULL || strcmp(d->mnemonic, "mov") != 0) {
+ fprintf(stderr,
+ "FAIL: alias precedence — ORR-as-MOV resolved to %s (want mov)\n",
+ d ? d->mnemonic : "(null)");
+ ++fails;
+ } else if (!(d->flags & AA64_ASMFL_ALIAS)) {
+ fprintf(stderr, "FAIL: alias precedence — resolved row missing ALIAS\n");
+ ++fails;
+ }
+ ++cases;
+ }
+
+ /* ORR X1, X3, X2 (Rn != ZR) must not resolve to "mov". */
+ {
+ u32 w = aa64_orr(1, 1, 3, 2);
+ const AA64InsnDesc* d = aa64_disasm_find(w);
+ if (d == NULL || strcmp(d->mnemonic, "orr") != 0) {
+ fprintf(stderr,
+ "FAIL: non-alias ORR resolved to %s (want orr)\n",
+ d ? d->mnemonic : "(null)");
+ ++fails;
+ }
+ ++cases;
+ }
+
+ if (fails) {
+ fprintf(stderr, "%d / %d failed\n", fails, cases);
+ return 1;
+ }
+ printf("aa64_isa_test: %d cases ok\n", cases);
+ return 0;
+}
diff --git a/test/test.mk b/test/test.mk
@@ -29,9 +29,9 @@
# parse_asm / cfree_disasm_iter_* are still stubs; the harness builds
# and runs end-to-end so the wiring stays exercised. See doc/ASM.md.
-.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-asm test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
+.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
-test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-asm test-lib-deps
+test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-lib-deps
test-lex: bin
@CFREE=$(abspath $(BIN)) test/lex/run.sh
@@ -90,6 +90,19 @@ $(DEBUG_TEST_BIN): test/debug/roundtrip_unit.c $(LIB_AR)
@mkdir -p $(dir $@)
$(CC) $(DRIVER_CFLAGS) -Isrc test/debug/roundtrip_unit.c $(LIB_AR) -o $@
+# aa64 ISA descriptor-table unit test (doc/ASM.md phase 2). Covers
+# every AA64Format the table maps and the alias-precedence invariant
+# (first-match disasm picks the alias spelling over the canonical
+# form). Internal arch/ surface — needs -Isrc.
+AA64_ISA_TEST_BIN = build/test/aa64_isa_test
+
+test-isa: $(AA64_ISA_TEST_BIN)
+ $(AA64_ISA_TEST_BIN)
+
+$(AA64_ISA_TEST_BIN): test/arch/aa64_isa_test.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(DRIVER_CFLAGS) -Isrc test/arch/aa64_isa_test.c $(LIB_AR) -o $@
+
# Test harness binaries shared by test-elf, test-link, and test-cg.
# Declared as Make targets (not built by the run.sh scripts) so they pick
# up libcfree.a changes deterministically.