kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

aa64_sweep_gen.c (2817B)


      1 /* aa64_sweep_gen — emit one representative encoding per row of aa64_insn_table
      2  * (as little-endian hex words), for the asm<->disasm self-symmetry round-trip
      3  * (test/asm/symmetry.sh).
      4  *
      5  * The harness decodes each word, re-assembles the disassembly, and decodes
      6  * again, asserting the text is a fixed point. That catches the two decode-side
      7  * asymmetries the codegen round-trip can't reach systematically: a form the
      8  * disassembler decodes but the assembler can't re-encode (assemble error), and
      9  * a form where decode and encode disagree on the bytes (text changes across the
     10  * round-trip). All decode/encode goes through asm-runner, so the comparison is
     11  * immune to any formatting difference between rendering paths.
     12  *
     13  * Variable (~mask) bits are filled with a fixed representative pattern that
     14  * gives distinct, non-ZR/SP registers in the standard field positions; the
     15  * text-fixed-point criterion tolerates the resulting don't-care bits (e.g. the
     16  * RES Rt2 field of an exclusive load). Formats whose only operand is a
     17  * PC-relative target or a relocation — branches and adr/adrp — are skipped:
     18  * they aren't assemblable standalone without a label/symbol, and L1/L2 of the
     19  * codegen round-trip already cover them.
     20  *
     21  * Builds against the internal arch/aa64/isa.h surface (mk/test.mk passes
     22  * -Isrc).
     23  */
     24 
     25 #include <stdint.h>
     26 #include <stdio.h>
     27 
     28 #include "arch/aa64/isa.h"
     29 
     30 static int needs_context(uint8_t fmt) {
     31   switch (fmt) {
     32     case AA64_FMT_BR_IMM:    /* b/bl: numeric target needs a label */
     33     case AA64_FMT_BR_COND:   /* b.cc */
     34     case AA64_FMT_CB:        /* cbz/cbnz */
     35     case AA64_FMT_PCREL_ADR: /* adr/adrp: needs a symbol operand */
     36       return 1;
     37     default:
     38       return 0;
     39   }
     40 }
     41 
     42 int main(void) {
     43   /* Rt/Rd[4:0]=1, Rn[9:5]=2, Rt2/Ra[14:10]=3, Rm/Rs[20:16]=4 — distinct,
     44    * non-ZR/SP so ZR-keyed aliases don't fire; small immediates elsewhere. */
     45   const uint32_t PATTERN = (1u << 0) | (2u << 5) | (3u << 10) | (4u << 16);
     46   for (uint32_t i = 0; i < aa64_insn_table_n; ++i) {
     47     const AA64InsnDesc* d = &aa64_insn_table[i];
     48     uint32_t word;
     49     if (needs_context(d->fmt)) continue;
     50     word = (d->match & d->mask) | (PATTERN & ~d->mask);
     51     /* The register-offset form's option field [15:13] must name a valid
     52      * extend; the bare fill pattern yields the invalid UXTB(000). Force
     53      * LSL/UXTX(011) so the synthesized word is a real instruction. */
     54     if (d->fmt == AA64_FMT_LDST_REGOFF)
     55       word = (word & ~(7u << 13)) | (3u << 13);
     56     /* Only sweep words that decode cleanly to this kind of instruction. */
     57     if (!aa64_disasm_find(word)) continue;
     58     printf("%02x%02x%02x%02x\n", (unsigned)(word & 0xff),
     59            (unsigned)((word >> 8) & 0xff), (unsigned)((word >> 16) & 0xff),
     60            (unsigned)((word >> 24) & 0xff));
     61   }
     62   return 0;
     63 }