kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

roundtrip.sh (12737B)


      1 #!/usr/bin/env bash
      2 # test/asm/roundtrip.sh — codegen round-trip completeness harness, on the
      3 # shared corpus harness (test/lib/kit_corpus.sh).
      4 #
      5 # Measures completeness of the per-arch assembler, disassembler, and the link
      6 # relocation path by round-tripping the *compiler's own output* rather than a
      7 # hand-written corpus. See doc/ASM_ROUNDTRIP_TESTING.md.
      8 #
      9 # Corpus: test/asm/roundtrip/*.c — each defines `int test_main(...)` so the L2
     10 # lane can execute it through the shared exec harness (jit-runner).
     11 #
     12 # Three lanes (KIT_TEST_PATHS, default "012"; digits 0/1/2 -> lanes L0/L1/L2):
     13 #
     14 #   0  L0 decode-complete — `kit cc -S` and assert no in-function decode
     15 #        failure marker (aarch64 `.inst`) inside .text. Catches an instruction
     16 #        codegen emits that the disassembler cannot decode. Host-independent,
     17 #        no exec, pinpoints the undecoded word.
     18 #   1  L1 byte round-trip — `kit cc -c` (direct.o) vs `kit cc -S | kit as`
     19 #        (rt.o); diff the .text bytes AND the .text relocation table. Catches
     20 #        assembler/disassembler disagreements (round-trip violations). Exact,
     21 #        host-independent. Gated on L0's `cc -S` step succeeding first.
     22 #   2  L2 exec equivalence — run direct.o and rt.o and compare exit codes (and,
     23 #        when present, against <name>.expected). Tolerant of benign encoding
     24 #        differences; the end-to-end "it runs the same" signal. Executes via
     25 #        jit-runner; native target only (skips when host arch != cross-target).
     26 #
     27 # Opt levels: KIT_TEST_OPTS (default "O1"). Each case is run at every level.
     28 #
     29 # Per-case applicability: <name>.targets (tuple list) -> SKIP-NA; <name>.skip
     30 # quarantines the whole case; <name>.expected is the L2 exit-code oracle.
     31 #
     32 # Filtering:
     33 #   ./roundtrip.sh [name_filter] [lanes]
     34 #     name_filter   substring match against case basename
     35 #     lanes         subset of "012" (default "012")
     36 #   Equivalent env vars: KIT_TEST_FILTER, KIT_TEST_PATHS, KIT_TEST_OPTS.
     37 #
     38 # Every lane hook writes only under $KIT_WORK and records via kit_*, so the runner
     39 # is parallel-safe by construction (KIT_ASM_PARALLEL flips dispatch). The
     40 # L1-depends-on-L0 ordering (L1 reuses the listing L0 produced) is preserved
     41 # inside the L1 lane body: each lane recompiles its own `cc -S` listing under
     42 # $KIT_WORK rather than sharing artifacts across lanes/workers.
     43 set -u
     44 
     45 ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
     46 export KIT_LIB_DIR="$ROOT/test/lib"
     47 . "$ROOT/test/lib/kit_corpus.sh"
     48 
     49 TEST_DIR="$ROOT/test/asm"
     50 CORPUS_DIR="$TEST_DIR/roundtrip"
     51 BUILD_DIR="$ROOT/build/test/asm/roundtrip"
     52 KIT_BIN="${KIT:-$ROOT/build/kit}"
     53 JIT_RUNNER="$ROOT/build/test/jit-runner"
     54 
     55 # KIT_TEST_ARCH selects the cross-target. Mirrors test/asm/run.sh.
     56 KIT_TEST_ARCH="${KIT_TEST_ARCH:-aa64}"
     57 case "$KIT_TEST_ARCH" in
     58     aa64|aarch64|arm64)   TEST_ARCH=aa64;   TRIPLE=aarch64-linux-gnu ;;
     59     x64|x86_64|amd64)     TEST_ARCH=x64;    TRIPLE=x86_64-linux-gnu ;;
     60     rv64|riscv64)         TEST_ARCH=rv64;   TRIPLE=riscv64-linux-gnu ;;
     61     *) printf 'unknown KIT_TEST_ARCH=%s\n' "$KIT_TEST_ARCH" >&2; exit 2 ;;
     62 esac
     63 export KIT_TEST_ARCH="$TEST_ARCH"
     64 
     65 # Opt axis. KIT_TEST_OPTS carries the "O"-prefixed levels (e.g. "O0 O1"); the
     66 # engine matrix wants the bare digits, and the lanes build "-O<level>".
     67 OPTS="${KIT_TEST_OPTS:-O1}"
     68 OPT_LEVELS=""
     69 for _o in $OPTS; do OPT_LEVELS="$OPT_LEVELS ${_o#O}"; done
     70 
     71 FILTER="${1:-${KIT_TEST_FILTER:-}}"
     72 PATHS="${2:-${KIT_TEST_PATHS:-012}}"
     73 export KIT_TEST_FILTER="$FILTER"
     74 
     75 # Map the PATHS digit alphabet (0/1/2) onto the engine lane ids L0/L1/L2.
     76 LANES=""
     77 case "$PATHS" in *0*) LANES="$LANES L0";; esac
     78 case "$PATHS" in *1*) LANES="$LANES L1";; esac
     79 case "$PATHS" in *2*) LANES="$LANES L2";; esac
     80 
     81 PAR="${KIT_ASM_PARALLEL:-1}"
     82 
     83 # is_native_target=1 when the cross-target arch matches the host arch (needed
     84 # for in-process JIT exec in the L2 lane).
     85 arch_raw="$(uname -m 2>/dev/null || true)"
     86 is_native_target=0
     87 case "$TEST_ARCH" in
     88     aa64) { [ "$arch_raw" = "aarch64" ] || [ "$arch_raw" = "arm64" ]; } && is_native_target=1 ;;
     89     x64)  { [ "$arch_raw" = "x86_64" ] || [ "$arch_raw" = "amd64" ]; } && is_native_target=1 ;;
     90     rv64) [ "$arch_raw" = "riscv64" ] && is_native_target=1 ;;
     91 esac
     92 
     93 have_jit_runner=0
     94 [ -x "$JIT_RUNNER" ] && have_jit_runner=1
     95 
     96 # ---- per-case applicability (mirrors test/asm/run.sh) ----------------------
     97 # A <name>.targets sidecar lists the arches a case applies to, using arch-only
     98 # tokens with synonyms (aa64/aarch64/arm64, x64/x86_64/amd64, rv64/riscv64) —
     99 # NOT the engine's "<arch>-<obj>" tuple form. Reimplemented as a KIT_READ_CASE
    100 # hook (rather than the engine's KIT_TARGETS_EXT tuple matcher) so the exact
    101 # original token grammar and SKIP-NA verdict are preserved.
    102 rt_read_case() {
    103     local targets="$KIT_SIDECAR_DIR/$KIT_BASE.targets" tuple
    104     [ -f "$targets" ] || return 0   # no .targets => applies to all arches
    105     for tuple in $(cat "$targets"); do
    106         case "$tuple:$TEST_ARCH" in
    107             aa64:aa64|aarch64:aa64|arm64:aa64)  return 0 ;;
    108             x64:x64|x86_64:x64|amd64:x64)       return 0 ;;
    109             rv64:rv64|riscv64:rv64)             return 0 ;;
    110         esac
    111     done
    112     KIT_SKIP_NA_CASE=1
    113 }
    114 
    115 # ---- extraction helpers (lane-local) ---------------------------------------
    116 
    117 # .text bytes as objdump hex-dump lines (filename header stripped).
    118 text_bytes() { "$KIT_BIN" objdump -s -j .text "$1" 2>/dev/null | awk '/^ *[0-9a-f]+ /'; }
    119 
    120 # Relocation records (kind/offset/target) for the sections `cc -S` reproduces
    121 # — .text, .rodata, .data — so the comparison covers code relocs AND data
    122 # relocs (switch jump tables live in .rodata). Sections cc -S does not emit
    123 # (e.g. .eh_frame) are excluded so their absence in the round-tripped object
    124 # is not flagged. The section header is printed so a reloc at the same offset
    125 # in two sections stays distinct.
    126 reproduced_relocs() {
    127     "$KIT_BIN" objdump -r "$1" 2>/dev/null | awk '
    128         /^RELOCATION RECORDS FOR \[\.(text|rodata|data)\]/ { f=1; print; next }
    129         /^RELOCATION RECORDS FOR/                          { f=0; next }
    130         f && /^[0-9a-f]/                                   { print }'
    131 }
    132 
    133 # Emit the in-function decode-failure markers found in a `cc -S` listing.
    134 # Tracks the current section so only .text `.inst` lines count (data/rodata
    135 # `.byte` and inter-function padding are not decode failures). Prints each
    136 # offending line; exit 0 if any were found.
    137 decode_failures() {
    138     awk '
    139         /^[[:space:]]*\.text[[:space:]]*$/   { intext=1; next }
    140         /^[[:space:]]*\.section/             { intext=0; next }
    141         intext && /[[:space:]]\.inst[[:space:]]/ { print; found=1 }
    142         END { exit(found ? 0 : 1) }
    143     ' "$1"
    144 }
    145 
    146 # Build the shared `cc -S` assembly listing for this case/opt under $KIT_WORK.
    147 # Echoes the listing path on success; nonzero (with the log left in place) on
    148 # failure. Both L0 and L1 funnel through this so the L1-after-L0 ordering holds
    149 # regardless of dispatch order: each lane materializes the listing it needs.
    150 rt_compile_asm() {
    151     local asm="$KIT_WORK/out.s"
    152     if [ -f "$asm" ]; then printf '%s' "$asm"; return 0; fi
    153     if "$KIT_BIN" cc -S "-O$KIT_OPT" -target "$TRIPLE" "$KIT_SRC" -o "$asm" \
    154             >"$KIT_WORK/cc_s.log" 2>&1; then
    155         printf '%s' "$asm"; return 0
    156     fi
    157     return 1
    158 }
    159 
    160 # ---- lanes -----------------------------------------------------------------
    161 
    162 # L0: decode completeness. cc -S then assert no in-function `.inst` markers.
    163 kit_lane_L0() {
    164     local asm
    165     if ! asm=$(rt_compile_asm); then
    166         kit_fail "$KIT_NAME/L0" "cc -S failed; see $KIT_WORK/cc_s.log"; return
    167     fi
    168     if decode_failures "$asm" >"$KIT_WORK/decode_fail"; then
    169         kit_fail "$KIT_NAME/L0" "undecoded insn in .text; see $KIT_WORK/decode_fail"
    170     else
    171         kit_pass "$KIT_NAME/L0"
    172     fi
    173 }
    174 
    175 # L1: byte + reloc round-trip. cc -c (direct.o) vs cc -S | as (rt.o); diff the
    176 # .text bytes AND the reproduced reloc tables. Gated on the cc -S listing (L0's
    177 # producer) succeeding first.
    178 #
    179 # Intra-function branches now round-trip: `as` relaxes same-section local-label
    180 # branches at finalize (matching codegen), so the .text reloc tables agree. A
    181 # case that still can't round-trip is gated with a per-case `<name>.skip` file
    182 # rather than auto-skipped here.
    183 kit_lane_L1() {
    184     local asm direct="$KIT_WORK/direct.o" rt="$KIT_WORK/rt.o"
    185     if ! asm=$(rt_compile_asm); then
    186         kit_fail "$KIT_NAME/L1" "cc -S failed; see $KIT_WORK/cc_s.log"; return
    187     fi
    188     if ! "$KIT_BIN" cc -c "-O$KIT_OPT" -target "$TRIPLE" "$KIT_SRC" -o "$direct" \
    189             >"$KIT_WORK/cc_c.log" 2>&1; then
    190         kit_fail "$KIT_NAME/L1" "cc -c failed; see $KIT_WORK/cc_c.log"; return
    191     fi
    192     if ! "$KIT_BIN" as -target "$TRIPLE" "$asm" -o "$rt" \
    193             >"$KIT_WORK/as.log" 2>&1; then
    194         kit_fail "$KIT_NAME/L1" "as failed; see $KIT_WORK/as.log"; return
    195     fi
    196     text_bytes        "$direct" >"$KIT_WORK/direct.text"
    197     text_bytes        "$rt"     >"$KIT_WORK/rt.text"
    198     reproduced_relocs "$direct" >"$KIT_WORK/direct.rel"
    199     reproduced_relocs "$rt"     >"$KIT_WORK/rt.rel"
    200     if ! diff -u "$KIT_WORK/direct.text" "$KIT_WORK/rt.text" >"$KIT_WORK/text.diff"; then
    201         kit_fail "$KIT_NAME/L1" ".text bytes differ; see $KIT_WORK/text.diff"
    202     elif ! diff -u "$KIT_WORK/direct.rel" "$KIT_WORK/rt.rel" >"$KIT_WORK/rel.diff"; then
    203         kit_fail "$KIT_NAME/L1" "relocs differ (.text/.rodata/.data); see $KIT_WORK/rel.diff"
    204     else
    205         kit_pass "$KIT_NAME/L1"
    206     fi
    207 }
    208 
    209 # L2: exec equivalence. Build direct.o and rt.o, run both through jit-runner,
    210 # require equal exit codes + identical stdout, and (when a <name>.expected
    211 # oracle exists) require the exit code to match it. Native target only.
    212 kit_lane_L2() {
    213     if [ "$is_native_target" -eq 0 ]; then
    214         kit_skip "$KIT_NAME/L2" "non-native target ($arch_raw); cross-exec lane TODO"; return
    215     fi
    216     if [ "$have_jit_runner" -eq 0 ]; then
    217         kit_skip "$KIT_NAME/L2" "jit-runner unavailable"; return
    218     fi
    219     local asm direct="$KIT_WORK/direct.o" rt="$KIT_WORK/rt.o"
    220     # Reuse direct.o/rt.o from L1 if present; otherwise build them here.
    221     if [ ! -f "$direct" ]; then
    222         if ! "$KIT_BIN" cc -c "-O$KIT_OPT" -target "$TRIPLE" "$KIT_SRC" -o "$direct" \
    223                 >"$KIT_WORK/cc_c.log" 2>&1; then
    224             kit_fail "$KIT_NAME/L2" "cc -c failed; see $KIT_WORK/cc_c.log"; return
    225         fi
    226     fi
    227     if [ ! -f "$rt" ]; then
    228         if ! asm=$(rt_compile_asm); then
    229             kit_fail "$KIT_NAME/L2" "cc -S failed; see $KIT_WORK/cc_s.log"; return
    230         fi
    231         if ! "$KIT_BIN" as -target "$TRIPLE" "$asm" -o "$rt" \
    232                 >"$KIT_WORK/as.log" 2>&1; then
    233             kit_fail "$KIT_NAME/L2" "as failed; see $KIT_WORK/as.log"; return
    234         fi
    235     fi
    236     "$JIT_RUNNER" "$direct" >"$KIT_WORK/direct.out" 2>"$KIT_WORK/direct.err"; local rc_direct=$?
    237     "$JIT_RUNNER" "$rt"     >"$KIT_WORK/rt.out"     2>"$KIT_WORK/rt.err";     local rc_rt=$?
    238     if [ "$rc_direct" != "$rc_rt" ]; then
    239         kit_fail "$KIT_NAME/L2" "exit codes differ: direct=$rc_direct rt=$rc_rt"; return
    240     fi
    241     if ! diff -q "$KIT_WORK/direct.out" "$KIT_WORK/rt.out" >/dev/null; then
    242         kit_fail "$KIT_NAME/L2" "stdout differs"; return
    243     fi
    244     # KIT_EXPECTED is the <name>.expected oracle (engine default 0 when absent).
    245     # The original lane only enforced the oracle when the sidecar was present;
    246     # every roundtrip case carries one, so honoring KIT_EXPECTED unconditionally
    247     # is the same verdict.
    248     if [ "$rc_direct" != "$KIT_EXPECTED" ]; then
    249         kit_fail "$KIT_NAME/L2" "exit $rc_direct != expected $KIT_EXPECTED"; return
    250     fi
    251     kit_pass "$KIT_NAME/L2"
    252 }
    253 
    254 # ---- drive the corpus ------------------------------------------------------
    255 
    256 printf 'roundtrip: arch=%s triple=%s opts="%s" lanes=%s native=%d\n' \
    257     "$TEST_ARCH" "$TRIPLE" "$OPTS" "$PATHS" "$is_native_target"
    258 
    259 if [ ! -x "$KIT_BIN" ]; then
    260     printf '  FATAL kit binary missing — run "make bin"\n' >&2
    261     exit 1
    262 fi
    263 case " $LANES " in
    264     *" L2 "*)
    265         if [ "$is_native_target" -eq 1 ] && [ "$have_jit_runner" -eq 0 ]; then
    266             printf '  warn jit-runner missing; L2 lane will skip\n'
    267         fi
    268         ;;
    269 esac
    270 
    271 mkdir -p "$BUILD_DIR"
    272 
    273 # Skips here are informational (L2 skips on a non-native host / absent
    274 # jit-runner); the original exited 0 with skips + no fails, so do not gate the
    275 # exit on skips.
    276 KIT_SKIP_IS_FAILURE=0
    277 KIT_LABEL=test-asm-roundtrip KIT_BUILD_DIR="$BUILD_DIR" \
    278   KIT_CORPUS_GLOBS="$CORPUS_DIR/*.c" KIT_CORPUS_EXT=c KIT_SIDECAR_DIR="$CORPUS_DIR" \
    279   KIT_LANES="$LANES" KIT_OPT_LEVELS="$OPT_LEVELS" KIT_TUPLES="$TEST_ARCH-elf" \
    280   KIT_TARGETS_EXT="" KIT_READ_CASE=rt_read_case KIT_PARALLELIZABLE="$PAR" \
    281   kit_corpus_run
    282 
    283 kit_summary test-asm-roundtrip
    284 kit_exit