kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

symmetry.sh (5347B)


      1 #!/usr/bin/env bash
      2 # test/asm/symmetry.sh — Type D (differential, baseline mode):
      3 #   asm<->disasm self-symmetry sweep (aa64).
      4 #
      5 # Systematically checks that the assembler and disassembler agree on the
      6 # instruction set, in both directions:
      7 #
      8 #   decode-side (table sweep): aa64_sweep_gen emits one representative encoding
      9 #     per row of the disassembler's instruction table. Each is decoded, the
     10 #     disassembly re-assembled, and decoded again; the text must be a FIXED
     11 #     POINT. A form the assembler can't re-encode is "decode-only"; one that
     12 #     re-encodes to a different instruction is a "disagree".
     13 #
     14 #   encode-side (corpus sweep): every test/asm/encode/*.s that applies to aa64
     15 #     is assembled and disassembled; any `.inst` means the assembler emitted a
     16 #     byte the disassembler can't decode ("encode-only").
     17 #
     18 # The two tools cover slightly different ISA subsets (forms the assembler
     19 # accepts for completeness but codegen never emits, so the disassembler never
     20 # had to decode them, and vice-versa). The known asymmetries live in a checked-
     21 # in snapshot, test/asm/symmetry.baseline; the sweep PASSES iff the current set
     22 # equals the baseline, so it gates against *new* asymmetry (a regression) while
     23 # the baseline documents the disasm-completeness backlog. Closing a gap shrinks
     24 # the baseline (regenerate with --update). See doc/ASM_ROUNDTRIP_TESTING.md.
     25 #
     26 # Type D oracle: AGREEMENT against a checked-in baseline. The producer here is
     27 # the freshly-generated, sorted asymmetry report; kit_diff_baseline compares it
     28 # to test/asm/symmetry.baseline (or regenerates it under --update /
     29 # KIT_DIFF_UPDATE=1).
     30 #
     31 # Opt-in; host-independent (no execution). Decode-side assembles line-by-line,
     32 # so it is a few seconds.
     33 
     34 set -u
     35 
     36 ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
     37 . "$ROOT/test/lib/kit_sh_report.sh"
     38 . "$ROOT/test/lib/kit_differential.sh"
     39 
     40 AR="$ROOT/build/test/asm-runner"
     41 GEN="$ROOT/build/test/aa64_sweep_gen"
     42 ENCODE_DIR="$ROOT/test/asm/encode"
     43 WORK="$ROOT/build/test/asm/symmetry"
     44 BASELINE="$ROOT/test/asm/symmetry.baseline"
     45 
     46 # --update is the long-standing regen flag; KIT_DIFF_UPDATE=1 is the canonical
     47 # Type D knob. Either one regenerates the baseline (kit_diff_baseline honors
     48 # KIT_DIFF_UPDATE).
     49 [ "${1:-}" = "--update" ] && KIT_DIFF_UPDATE=1
     50 : "${KIT_DIFF_UPDATE:=0}"
     51 export KIT_DIFF_UPDATE
     52 
     53 if [ ! -x "$AR" ] || [ ! -x "$GEN" ]; then
     54     echo "symmetry: asm-runner / aa64_sweep_gen missing — run the test target" >&2
     55     exit 2
     56 fi
     57 
     58 export KIT_TEST_ARCH=aa64
     59 mkdir -p "$WORK"
     60 report="$WORK/report"
     61 : > "$report"
     62 
     63 kit_report_init
     64 
     65 strip_off() { sed -E 's/^[0-9a-f]+:\t//'; }
     66 
     67 # ---- decode-side: table sweep ---------------------------------------------
     68 # Fast path: re-assemble the whole disassembly at once; if it assembles, the
     69 # round-trip is decode->encode->decode and we compare the text line-for-line.
     70 # Slow path (only when whole-file assembly fails, i.e. some form is decode-
     71 # only): re-assemble line-by-line to name each offending mnemonic.
     72 "$GEN" > "$WORK/words.hex"
     73 "$AR" --decode "$WORK/words.hex" "$WORK/t1.txt" 2>/dev/null
     74 strip_off < "$WORK/t1.txt" > "$WORK/t1.norm"
     75 { printf '\t.text\n'; sed -E 's/^[0-9a-f]+:\t/\t/' "$WORK/t1.txt"; } > "$WORK/t1.s"
     76 if "$AR" --encode "$WORK/t1.s" "$WORK/hex2.hex" 2>/dev/null; then
     77     "$AR" --decode "$WORK/hex2.hex" "$WORK/t2.txt" 2>/dev/null
     78     strip_off < "$WORK/t2.txt" > "$WORK/t2.norm"
     79     awk 'NR==FNR { a[FNR] = $0; next }
     80          a[FNR] != $0 { printf "disagree: %s => %s\n", a[FNR], $0 }' \
     81         "$WORK/t1.norm" "$WORK/t2.norm" >> "$report"
     82 else
     83     while IFS= read -r line; do
     84         [ -z "$line" ] && continue
     85         mnem=$(printf '%s' "$line" | sed -E 's/^\t?([a-z0-9.]+).*/\1/')
     86         printf '\t.text\n%s\n' "$line" > "$WORK/one.s"
     87         if ! "$AR" --encode "$WORK/one.s" "$WORK/one.hex" 2>/dev/null; then
     88             printf 'decode-only: %s\n' "$mnem" >> "$report"
     89             continue
     90         fi
     91         "$AR" --decode "$WORK/one.hex" "$WORK/one.t2" 2>/dev/null
     92         t2=$(strip_off < "$WORK/one.t2")
     93         norm_line=$(printf '%s' "$line" | sed -E 's/^\t//')
     94         [ "$norm_line" != "$t2" ] && \
     95             printf 'disagree: %s => %s\n' "$norm_line" "$t2" >> "$report"
     96     done < "$WORK/t1.norm"
     97 fi
     98 
     99 # ---- encode-side: corpus sweep --------------------------------------------
    100 shopt -s nullglob
    101 for s in "$ENCODE_DIR"/*.s; do
    102     name="$(basename "$s" .s)"
    103     tg="$ENCODE_DIR/$name.targets"
    104     [ -f "$tg" ] && ! grep -qE 'aa64|aarch64|arm64' "$tg" && continue
    105     # Skip cases that deliberately place data in .text (not instructions).
    106     grep -qE '^\s*\.(uleb128|sleb128|byte|hword|short|word|long|quad|ascii|asciz|string|zero|fill|space|inst)\b' "$s" && continue
    107     w="$WORK/$name"
    108     "$AR" --encode "$s" "$w.hex" 2>/dev/null || continue
    109     "$AR" --decode "$w.hex" "$w.dec" 2>/dev/null
    110     awk -v n="$name" '/[[:space:]]\.inst[[:space:]]/{print "encode-only: " n " " $NF}' "$w.dec" >> "$report"
    111 done
    112 shopt -u nullglob
    113 
    114 sort -u "$report" -o "$report"
    115 
    116 # ---- compare against the baseline snapshot --------------------------------
    117 # Single Type D verdict: the freshly produced, normalized report must equal the
    118 # checked-in baseline (kit_diff_baseline regenerates it under KIT_DIFF_UPDATE=1
    119 # and shows the unified delta on drift).
    120 kit_diff_baseline symmetry "$BASELINE" "$report"
    121 
    122 kit_summary symmetry
    123 kit_exit