kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

diff_llvm.sh (5980B)


      1 #!/usr/bin/env bash
      2 # test/asm/diff_llvm.sh — Type D (differential, reference mode):
      3 #   cross-check kit against llvm (aa64) as a second, independent oracle.
      4 #
      5 # Two byte-level lanes (robust: no disassembly-text normalization, which would
      6 # founder on alias/format differences like movz-vs-mov or #16-vs-#0x10):
      7 #
      8 #   encode lane: assemble every aa64 test/asm/encode/*.s with BOTH `kit as`
      9 #     and `llvm-mc`; the .text bytes must match. Validates kit's assembler
     10 #     against llvm-mc as a second oracle.
     11 #
     12 #   disasm lane: for every test/asm/roundtrip/*.c, `kit cc -c` gives codegen's
     13 #     bytes and `kit cc -S` gives kit's disassembly as re-assemblable text;
     14 #     assemble that text with llvm-mc and require the bytes to match codegen's.
     15 #     If llvm agrees the -S text means the original bytes, kit's disassembler
     16 #     decoded them correctly — a decode differential that catches a *wrong*
     17 #     decode (one a self-round-trip can't, since kit's own re-encode would
     18 #     repeat the mistake).
     19 #
     20 # Type D oracle: AGREEMENT between two independent producers (kit vs llvm-mc),
     21 # recorded per case via kit_diff_agree. Two recognized equivalence-skips precede
     22 # the comparison and are kit_skip'd, never flagged:
     23 #   - llvm-mc REJECTS an input kit accepts (forms outside llvm's dialect).
     24 #   - the benign same-section-call reloc-vs-resolve difference: kit codegen
     25 #     keeps a CALL26/JUMP26/CONDBR reloc for a same-section call/branch to a
     26 #     defined local symbol, where llvm-mc (like GNU as) resolves it in place and
     27 #     drops the reloc. Both link to the same bytes; only the relocatable form
     28 #     differs.
     29 #
     30 # Opt-in; requires llvm-mc (+ a kit-readable object). Skips cleanly if the
     31 # tools are absent. See doc/ASM_ROUNDTRIP_TESTING.md.
     32 
     33 set -u
     34 
     35 ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
     36 . "$ROOT/test/lib/kit_sh_report.sh"
     37 . "$ROOT/test/lib/kit_differential.sh"
     38 
     39 KIT="$ROOT/build/kit"
     40 ENCODE_DIR="$ROOT/test/asm/encode"
     41 RT_DIR="$ROOT/test/asm/roundtrip"
     42 WORK="$ROOT/build/test/asm/diff_llvm"
     43 TRIPLE="${KIT_LLVM_TRIPLE:-aarch64-linux-gnu}"
     44 MATTR="${KIT_LLVM_MATTR:-+lse,+v8.1a}"
     45 OPTS="${KIT_TEST_OPTS:-O1}"
     46 
     47 LLVM_MC="${LLVM_MC:-$(command -v llvm-mc || echo /opt/homebrew/bin/llvm-mc)}"
     48 
     49 kit_report_init
     50 
     51 if [ ! -x "$LLVM_MC" ]; then
     52     # No second oracle available: skip cleanly. kit_skip (not a hard exit) so the
     53     # summary/exit path is uniform with the rest of the harness.
     54     kit_skip diff-llvm "llvm-mc not found (set \$LLVM_MC)"
     55     kit_summary diff-llvm
     56     kit_exit
     57 fi
     58 if [ ! -x "$KIT" ]; then
     59     echo "diff-llvm: kit missing — run \"make bin\"" >&2
     60     exit 2
     61 fi
     62 mkdir -p "$WORK"
     63 
     64 # Raw .text bytes via kit objdump (same tool for both objects, so the
     65 # representation is identical regardless of which assembler produced the .o).
     66 raw() { "$KIT" objdump -s -j .text "$1" 2>/dev/null | awk '/^ *[0-9a-f]+ /{print $2$3$4$5}'; }
     67 # .text relocation kinds+targets (offset omitted — it shifts when a sibling
     68 # reloc is relaxed). Used to recognize the one benign disagreement: kit
     69 # codegen keeps a CALL26/JUMP26/CONDBR reloc for a same-section call/branch to
     70 # a defined local symbol, where llvm-mc (like GNU as) resolves it in place and
     71 # drops the reloc. Both link to the same bytes; only the relocatable form
     72 # differs. When that's the whole story the reloc tables differ, so we skip
     73 # rather than flag.
     74 text_relocs() { "$KIT" objdump -r "$1" 2>/dev/null | awk '
     75     /RELOCATION RECORDS FOR \[\.text\]/{f=1;next} /RELOCATION RECORDS FOR/{f=0}
     76     f && /^[0-9a-f]/{print $2, $3}'; }
     77 mc() { "$LLVM_MC" -triple="$TRIPLE" -mattr="$MATTR" -filetype=obj "$1" -o "$2" 2>"$WORK/mc.err"; }
     78 
     79 # --- encode lane: kit as vs llvm-mc over the encode corpus ----------------
     80 printf 'diff-llvm: encode lane (kit as vs llvm-mc over the encode corpus)\n'
     81 shopt -s nullglob
     82 for s in "$ENCODE_DIR"/aa64_*.s; do
     83     name="$(basename "$s" .s)"
     84     tg="$ENCODE_DIR/$name.targets"
     85     [ -f "$tg" ] && ! grep -qE 'aa64|aarch64|arm64' "$tg" && continue
     86     "$KIT" as -target "$TRIPLE" "$s" -o "$WORK/c.o" 2>/dev/null || continue
     87     if ! mc "$s" "$WORK/l.o"; then
     88         # llvm-mc rejected a form kit accepts: recognized dialect divergence.
     89         kit_skip "encode:$name" "llvm-mc rejected ($(head -1 "$WORK/mc.err" | sed 's|.*error: *||'))"
     90         continue
     91     fi
     92     # Compare the normalized .text byte extraction (same shape both sides).
     93     raw "$WORK/c.o" > "$WORK/c.bytes"
     94     raw "$WORK/l.o" > "$WORK/l.bytes"
     95     kit_diff_agree "encode:$name" "$WORK/c.bytes" "$WORK/l.bytes"
     96 done
     97 
     98 # --- disasm lane: cc -c bytes vs llvm-mc of cc -S ---------------------------
     99 printf 'diff-llvm: disasm lane (cc -c bytes vs llvm-mc of cc -S, opts="%s")\n' "$OPTS"
    100 for src in "$RT_DIR"/*.c; do
    101     name="$(basename "$src" .c)"
    102     [ -e "$RT_DIR/$name.skip" ] && continue
    103     for opt in $OPTS; do
    104         "$KIT" cc -c "-$opt" -target "$TRIPLE" "$src" -o "$WORK/cc.o" 2>/dev/null || continue
    105         "$KIT" cc -S "-$opt" -target "$TRIPLE" "$src" -o "$WORK/s.s" 2>/dev/null || continue
    106         if ! mc "$WORK/s.s" "$WORK/l.o"; then
    107             # llvm-mc rejected kit's cc -S text: recognized dialect divergence.
    108             kit_skip "disasm:$name[-$opt]" "llvm-mc rejected cc -S ($(head -1 "$WORK/mc.err" | sed 's|.*error: *||'))"
    109             continue
    110         fi
    111         raw "$WORK/cc.o" > "$WORK/cc.bytes"
    112         raw "$WORK/l.o"  > "$WORK/l.bytes"
    113         if [ "$(text_relocs "$WORK/cc.o")" != "$(text_relocs "$WORK/l.o")" ] &&
    114            ! cmp -s "$WORK/cc.bytes" "$WORK/l.bytes"; then
    115             # Reloc tables differ AND bytes differ: kit kept a same-section
    116             # call/branch reloc that llvm-mc resolved in place. Link-equivalent
    117             # — recognized benign divergence, skip rather than flag.
    118             kit_skip "disasm:$name[-$opt]" "same-section-call reloc-vs-resolve (link-equivalent)"
    119             continue
    120         fi
    121         kit_diff_agree "disasm:$name[-$opt]" "$WORK/cc.bytes" "$WORK/l.bytes"
    122     done
    123 done
    124 shopt -u nullglob
    125 
    126 kit_summary diff-llvm
    127 kit_exit