diff_llvm.sh (5980B)
1 #!/usr/bin/env bash 2 # test/asm/diff_llvm.sh — Type D (differential, reference mode): 3 # cross-check kit against llvm (aa64) as a second, independent oracle. 4 # 5 # Two byte-level lanes (robust: no disassembly-text normalization, which would 6 # founder on alias/format differences like movz-vs-mov or #16-vs-#0x10): 7 # 8 # encode lane: assemble every aa64 test/asm/encode/*.s with BOTH `kit as` 9 # and `llvm-mc`; the .text bytes must match. Validates kit's assembler 10 # against llvm-mc as a second oracle. 11 # 12 # disasm lane: for every test/asm/roundtrip/*.c, `kit cc -c` gives codegen's 13 # bytes and `kit cc -S` gives kit's disassembly as re-assemblable text; 14 # assemble that text with llvm-mc and require the bytes to match codegen's. 15 # If llvm agrees the -S text means the original bytes, kit's disassembler 16 # decoded them correctly — a decode differential that catches a *wrong* 17 # decode (one a self-round-trip can't, since kit's own re-encode would 18 # repeat the mistake). 19 # 20 # Type D oracle: AGREEMENT between two independent producers (kit vs llvm-mc), 21 # recorded per case via kit_diff_agree. Two recognized equivalence-skips precede 22 # the comparison and are kit_skip'd, never flagged: 23 # - llvm-mc REJECTS an input kit accepts (forms outside llvm's dialect). 24 # - the benign same-section-call reloc-vs-resolve difference: kit codegen 25 # keeps a CALL26/JUMP26/CONDBR reloc for a same-section call/branch to a 26 # defined local symbol, where llvm-mc (like GNU as) resolves it in place and 27 # drops the reloc. Both link to the same bytes; only the relocatable form 28 # differs. 29 # 30 # Opt-in; requires llvm-mc (+ a kit-readable object). Skips cleanly if the 31 # tools are absent. See doc/ASM_ROUNDTRIP_TESTING.md. 32 33 set -u 34 35 ROOT="$(cd "$(dirname "$0")/../.." && pwd)" 36 . "$ROOT/test/lib/kit_sh_report.sh" 37 . "$ROOT/test/lib/kit_differential.sh" 38 39 KIT="$ROOT/build/kit" 40 ENCODE_DIR="$ROOT/test/asm/encode" 41 RT_DIR="$ROOT/test/asm/roundtrip" 42 WORK="$ROOT/build/test/asm/diff_llvm" 43 TRIPLE="${KIT_LLVM_TRIPLE:-aarch64-linux-gnu}" 44 MATTR="${KIT_LLVM_MATTR:-+lse,+v8.1a}" 45 OPTS="${KIT_TEST_OPTS:-O1}" 46 47 LLVM_MC="${LLVM_MC:-$(command -v llvm-mc || echo /opt/homebrew/bin/llvm-mc)}" 48 49 kit_report_init 50 51 if [ ! -x "$LLVM_MC" ]; then 52 # No second oracle available: skip cleanly. kit_skip (not a hard exit) so the 53 # summary/exit path is uniform with the rest of the harness. 54 kit_skip diff-llvm "llvm-mc not found (set \$LLVM_MC)" 55 kit_summary diff-llvm 56 kit_exit 57 fi 58 if [ ! -x "$KIT" ]; then 59 echo "diff-llvm: kit missing — run \"make bin\"" >&2 60 exit 2 61 fi 62 mkdir -p "$WORK" 63 64 # Raw .text bytes via kit objdump (same tool for both objects, so the 65 # representation is identical regardless of which assembler produced the .o). 66 raw() { "$KIT" objdump -s -j .text "$1" 2>/dev/null | awk '/^ *[0-9a-f]+ /{print $2$3$4$5}'; } 67 # .text relocation kinds+targets (offset omitted — it shifts when a sibling 68 # reloc is relaxed). Used to recognize the one benign disagreement: kit 69 # codegen keeps a CALL26/JUMP26/CONDBR reloc for a same-section call/branch to 70 # a defined local symbol, where llvm-mc (like GNU as) resolves it in place and 71 # drops the reloc. Both link to the same bytes; only the relocatable form 72 # differs. When that's the whole story the reloc tables differ, so we skip 73 # rather than flag. 74 text_relocs() { "$KIT" objdump -r "$1" 2>/dev/null | awk ' 75 /RELOCATION RECORDS FOR \[\.text\]/{f=1;next} /RELOCATION RECORDS FOR/{f=0} 76 f && /^[0-9a-f]/{print $2, $3}'; } 77 mc() { "$LLVM_MC" -triple="$TRIPLE" -mattr="$MATTR" -filetype=obj "$1" -o "$2" 2>"$WORK/mc.err"; } 78 79 # --- encode lane: kit as vs llvm-mc over the encode corpus ---------------- 80 printf 'diff-llvm: encode lane (kit as vs llvm-mc over the encode corpus)\n' 81 shopt -s nullglob 82 for s in "$ENCODE_DIR"/aa64_*.s; do 83 name="$(basename "$s" .s)" 84 tg="$ENCODE_DIR/$name.targets" 85 [ -f "$tg" ] && ! grep -qE 'aa64|aarch64|arm64' "$tg" && continue 86 "$KIT" as -target "$TRIPLE" "$s" -o "$WORK/c.o" 2>/dev/null || continue 87 if ! mc "$s" "$WORK/l.o"; then 88 # llvm-mc rejected a form kit accepts: recognized dialect divergence. 89 kit_skip "encode:$name" "llvm-mc rejected ($(head -1 "$WORK/mc.err" | sed 's|.*error: *||'))" 90 continue 91 fi 92 # Compare the normalized .text byte extraction (same shape both sides). 93 raw "$WORK/c.o" > "$WORK/c.bytes" 94 raw "$WORK/l.o" > "$WORK/l.bytes" 95 kit_diff_agree "encode:$name" "$WORK/c.bytes" "$WORK/l.bytes" 96 done 97 98 # --- disasm lane: cc -c bytes vs llvm-mc of cc -S --------------------------- 99 printf 'diff-llvm: disasm lane (cc -c bytes vs llvm-mc of cc -S, opts="%s")\n' "$OPTS" 100 for src in "$RT_DIR"/*.c; do 101 name="$(basename "$src" .c)" 102 [ -e "$RT_DIR/$name.skip" ] && continue 103 for opt in $OPTS; do 104 "$KIT" cc -c "-$opt" -target "$TRIPLE" "$src" -o "$WORK/cc.o" 2>/dev/null || continue 105 "$KIT" cc -S "-$opt" -target "$TRIPLE" "$src" -o "$WORK/s.s" 2>/dev/null || continue 106 if ! mc "$WORK/s.s" "$WORK/l.o"; then 107 # llvm-mc rejected kit's cc -S text: recognized dialect divergence. 108 kit_skip "disasm:$name[-$opt]" "llvm-mc rejected cc -S ($(head -1 "$WORK/mc.err" | sed 's|.*error: *||'))" 109 continue 110 fi 111 raw "$WORK/cc.o" > "$WORK/cc.bytes" 112 raw "$WORK/l.o" > "$WORK/l.bytes" 113 if [ "$(text_relocs "$WORK/cc.o")" != "$(text_relocs "$WORK/l.o")" ] && 114 ! cmp -s "$WORK/cc.bytes" "$WORK/l.bytes"; then 115 # Reloc tables differ AND bytes differ: kit kept a same-section 116 # call/branch reloc that llvm-mc resolved in place. Link-equivalent 117 # — recognized benign divergence, skip rather than flag. 118 kit_skip "disasm:$name[-$opt]" "same-section-call reloc-vs-resolve (link-equivalent)" 119 continue 120 fi 121 kit_diff_agree "disasm:$name[-$opt]" "$WORK/cc.bytes" "$WORK/l.bytes" 122 done 123 done 124 shopt -u nullglob 125 126 kit_summary diff-llvm 127 kit_exit