symmetry.sh (5347B)
1 #!/usr/bin/env bash 2 # test/asm/symmetry.sh — Type D (differential, baseline mode): 3 # asm<->disasm self-symmetry sweep (aa64). 4 # 5 # Systematically checks that the assembler and disassembler agree on the 6 # instruction set, in both directions: 7 # 8 # decode-side (table sweep): aa64_sweep_gen emits one representative encoding 9 # per row of the disassembler's instruction table. Each is decoded, the 10 # disassembly re-assembled, and decoded again; the text must be a FIXED 11 # POINT. A form the assembler can't re-encode is "decode-only"; one that 12 # re-encodes to a different instruction is a "disagree". 13 # 14 # encode-side (corpus sweep): every test/asm/encode/*.s that applies to aa64 15 # is assembled and disassembled; any `.inst` means the assembler emitted a 16 # byte the disassembler can't decode ("encode-only"). 17 # 18 # The two tools cover slightly different ISA subsets (forms the assembler 19 # accepts for completeness but codegen never emits, so the disassembler never 20 # had to decode them, and vice-versa). The known asymmetries live in a checked- 21 # in snapshot, test/asm/symmetry.baseline; the sweep PASSES iff the current set 22 # equals the baseline, so it gates against *new* asymmetry (a regression) while 23 # the baseline documents the disasm-completeness backlog. Closing a gap shrinks 24 # the baseline (regenerate with --update). See doc/ASM_ROUNDTRIP_TESTING.md. 25 # 26 # Type D oracle: AGREEMENT against a checked-in baseline. The producer here is 27 # the freshly-generated, sorted asymmetry report; kit_diff_baseline compares it 28 # to test/asm/symmetry.baseline (or regenerates it under --update / 29 # KIT_DIFF_UPDATE=1). 30 # 31 # Opt-in; host-independent (no execution). Decode-side assembles line-by-line, 32 # so it is a few seconds. 33 34 set -u 35 36 ROOT="$(cd "$(dirname "$0")/../.." && pwd)" 37 . "$ROOT/test/lib/kit_sh_report.sh" 38 . "$ROOT/test/lib/kit_differential.sh" 39 40 AR="$ROOT/build/test/asm-runner" 41 GEN="$ROOT/build/test/aa64_sweep_gen" 42 ENCODE_DIR="$ROOT/test/asm/encode" 43 WORK="$ROOT/build/test/asm/symmetry" 44 BASELINE="$ROOT/test/asm/symmetry.baseline" 45 46 # --update is the long-standing regen flag; KIT_DIFF_UPDATE=1 is the canonical 47 # Type D knob. Either one regenerates the baseline (kit_diff_baseline honors 48 # KIT_DIFF_UPDATE). 49 [ "${1:-}" = "--update" ] && KIT_DIFF_UPDATE=1 50 : "${KIT_DIFF_UPDATE:=0}" 51 export KIT_DIFF_UPDATE 52 53 if [ ! -x "$AR" ] || [ ! -x "$GEN" ]; then 54 echo "symmetry: asm-runner / aa64_sweep_gen missing — run the test target" >&2 55 exit 2 56 fi 57 58 export KIT_TEST_ARCH=aa64 59 mkdir -p "$WORK" 60 report="$WORK/report" 61 : > "$report" 62 63 kit_report_init 64 65 strip_off() { sed -E 's/^[0-9a-f]+:\t//'; } 66 67 # ---- decode-side: table sweep --------------------------------------------- 68 # Fast path: re-assemble the whole disassembly at once; if it assembles, the 69 # round-trip is decode->encode->decode and we compare the text line-for-line. 70 # Slow path (only when whole-file assembly fails, i.e. some form is decode- 71 # only): re-assemble line-by-line to name each offending mnemonic. 72 "$GEN" > "$WORK/words.hex" 73 "$AR" --decode "$WORK/words.hex" "$WORK/t1.txt" 2>/dev/null 74 strip_off < "$WORK/t1.txt" > "$WORK/t1.norm" 75 { printf '\t.text\n'; sed -E 's/^[0-9a-f]+:\t/\t/' "$WORK/t1.txt"; } > "$WORK/t1.s" 76 if "$AR" --encode "$WORK/t1.s" "$WORK/hex2.hex" 2>/dev/null; then 77 "$AR" --decode "$WORK/hex2.hex" "$WORK/t2.txt" 2>/dev/null 78 strip_off < "$WORK/t2.txt" > "$WORK/t2.norm" 79 awk 'NR==FNR { a[FNR] = $0; next } 80 a[FNR] != $0 { printf "disagree: %s => %s\n", a[FNR], $0 }' \ 81 "$WORK/t1.norm" "$WORK/t2.norm" >> "$report" 82 else 83 while IFS= read -r line; do 84 [ -z "$line" ] && continue 85 mnem=$(printf '%s' "$line" | sed -E 's/^\t?([a-z0-9.]+).*/\1/') 86 printf '\t.text\n%s\n' "$line" > "$WORK/one.s" 87 if ! "$AR" --encode "$WORK/one.s" "$WORK/one.hex" 2>/dev/null; then 88 printf 'decode-only: %s\n' "$mnem" >> "$report" 89 continue 90 fi 91 "$AR" --decode "$WORK/one.hex" "$WORK/one.t2" 2>/dev/null 92 t2=$(strip_off < "$WORK/one.t2") 93 norm_line=$(printf '%s' "$line" | sed -E 's/^\t//') 94 [ "$norm_line" != "$t2" ] && \ 95 printf 'disagree: %s => %s\n' "$norm_line" "$t2" >> "$report" 96 done < "$WORK/t1.norm" 97 fi 98 99 # ---- encode-side: corpus sweep -------------------------------------------- 100 shopt -s nullglob 101 for s in "$ENCODE_DIR"/*.s; do 102 name="$(basename "$s" .s)" 103 tg="$ENCODE_DIR/$name.targets" 104 [ -f "$tg" ] && ! grep -qE 'aa64|aarch64|arm64' "$tg" && continue 105 # Skip cases that deliberately place data in .text (not instructions). 106 grep -qE '^\s*\.(uleb128|sleb128|byte|hword|short|word|long|quad|ascii|asciz|string|zero|fill|space|inst)\b' "$s" && continue 107 w="$WORK/$name" 108 "$AR" --encode "$s" "$w.hex" 2>/dev/null || continue 109 "$AR" --decode "$w.hex" "$w.dec" 2>/dev/null 110 awk -v n="$name" '/[[:space:]]\.inst[[:space:]]/{print "encode-only: " n " " $NF}' "$w.dec" >> "$report" 111 done 112 shopt -u nullglob 113 114 sort -u "$report" -o "$report" 115 116 # ---- compare against the baseline snapshot -------------------------------- 117 # Single Type D verdict: the freshly produced, normalized report must equal the 118 # checked-in baseline (kit_diff_baseline regenerates it under KIT_DIFF_UPDATE=1 119 # and shows the unified delta on drift). 120 kit_diff_baseline symmetry "$BASELINE" "$report" 121 122 kit_summary symmetry 123 kit_exit