aa64_sweep_gen.c (2817B)
1 /* aa64_sweep_gen — emit one representative encoding per row of aa64_insn_table 2 * (as little-endian hex words), for the asm<->disasm self-symmetry round-trip 3 * (test/asm/symmetry.sh). 4 * 5 * The harness decodes each word, re-assembles the disassembly, and decodes 6 * again, asserting the text is a fixed point. That catches the two decode-side 7 * asymmetries the codegen round-trip can't reach systematically: a form the 8 * disassembler decodes but the assembler can't re-encode (assemble error), and 9 * a form where decode and encode disagree on the bytes (text changes across the 10 * round-trip). All decode/encode goes through asm-runner, so the comparison is 11 * immune to any formatting difference between rendering paths. 12 * 13 * Variable (~mask) bits are filled with a fixed representative pattern that 14 * gives distinct, non-ZR/SP registers in the standard field positions; the 15 * text-fixed-point criterion tolerates the resulting don't-care bits (e.g. the 16 * RES Rt2 field of an exclusive load). Formats whose only operand is a 17 * PC-relative target or a relocation — branches and adr/adrp — are skipped: 18 * they aren't assemblable standalone without a label/symbol, and L1/L2 of the 19 * codegen round-trip already cover them. 20 * 21 * Builds against the internal arch/aa64/isa.h surface (mk/test.mk passes 22 * -Isrc). 23 */ 24 25 #include <stdint.h> 26 #include <stdio.h> 27 28 #include "arch/aa64/isa.h" 29 30 static int needs_context(uint8_t fmt) { 31 switch (fmt) { 32 case AA64_FMT_BR_IMM: /* b/bl: numeric target needs a label */ 33 case AA64_FMT_BR_COND: /* b.cc */ 34 case AA64_FMT_CB: /* cbz/cbnz */ 35 case AA64_FMT_PCREL_ADR: /* adr/adrp: needs a symbol operand */ 36 return 1; 37 default: 38 return 0; 39 } 40 } 41 42 int main(void) { 43 /* Rt/Rd[4:0]=1, Rn[9:5]=2, Rt2/Ra[14:10]=3, Rm/Rs[20:16]=4 — distinct, 44 * non-ZR/SP so ZR-keyed aliases don't fire; small immediates elsewhere. */ 45 const uint32_t PATTERN = (1u << 0) | (2u << 5) | (3u << 10) | (4u << 16); 46 for (uint32_t i = 0; i < aa64_insn_table_n; ++i) { 47 const AA64InsnDesc* d = &aa64_insn_table[i]; 48 uint32_t word; 49 if (needs_context(d->fmt)) continue; 50 word = (d->match & d->mask) | (PATTERN & ~d->mask); 51 /* The register-offset form's option field [15:13] must name a valid 52 * extend; the bare fill pattern yields the invalid UXTB(000). Force 53 * LSL/UXTX(011) so the synthesized word is a real instruction. */ 54 if (d->fmt == AA64_FMT_LDST_REGOFF) 55 word = (word & ~(7u << 13)) | (3u << 13); 56 /* Only sweep words that decode cleanly to this kind of instruction. */ 57 if (!aa64_disasm_find(word)) continue; 58 printf("%02x%02x%02x%02x\n", (unsigned)(word & 0xff), 59 (unsigned)((word >> 8) & 0xff), (unsigned)((word >> 16) & 0xff), 60 (unsigned)((word >> 24) & 0xff)); 61 } 62 return 0; 63 }