kit_corpus.sh - kit

kit_corpus.sh (13269B)
      1 # test/lib/kit_corpus.sh — the unified corpus test harness engine.
      2 #
      3 # ONE harness for the lane-matrix corpus runners (parse/toy/asm/link/elf/wasm).
      4 # A runner declares its corpus + lanes and calls kit_corpus_run; the engine owns
      5 # discovery, the {case}x{opt}x{tuple} matrix, per-case workdirs, serial-or-
      6 # parallel dispatch, event replay, deferred cross-arch exec, and per-lane
      7 # timing. Reporting goes through kit_sh_report.sh's mode-transparent verbs.
      8 #
      9 # Requires bash (indexed arrays). Sources, relative to $KIT_LIB_DIR:
     10 #   kit_sh_report.sh  (kit_pass/kit_fail/kit_skip/kit_skip_na/kit_time/summary)
     11 #   kit_skip.sh          (kit_skip_sidecar/kit_skip_diag/kit_tuple_applicable)
     12 #   parallel.sh         (kit_parallel_jobs/run/wait_all)
     13 #   exec_target.sh      (exec_target_queue/flush/supported)  [sourced by the runner if it uses path E]
     14 #
     15 # ============================================================================
     16 # RUNNER CONTRACT — set these before calling kit_corpus_run:
     17 #   KIT              binary under test
     18 #   KIT_LABEL           summary label (e.g. "toy")
     19 #   KIT_BUILD_DIR       scratch root; per-item workdirs live under it
     20 #   KIT_CORPUS_GLOBS    space-separated globs of case source files
     21 #   KIT_CORPUS_EXT      extension stripped for the basename (e.g. "toy","c","s")
     22 #   KIT_SIDECAR_DIR     dir holding sidecars (default: dir of the first glob)
     23 #   KIT_LANES           active lane ids, space-separated (runner derives from KIT_TEST_PATHS)
     24 #   KIT_OPT_LEVELS      opt levels to expand (default "$KIT_OPT_LEVELS" or "0 1");
     25 #                      set to "" for corpora with no opt axis (e.g. elf layers)
     26 #   KIT_TUPLES          target tuples "<arch>-<obj>" (default "$KIT_DEFAULT_TUPLE")
     27 #   KIT_OPT0ONLY        lanes that run only at opt 0 (space list; e.g. "C W")
     28 #   KIT_PARALLELIZABLE  1 to allow parallel dispatch (default 1); 0 forces serial
     29 #   KIT_EXPECTED_EXT    sidecar suffix for the expected exit code (default ".expected")
     30 #   KIT_TARGETS_EXT     sidecar suffix for whole-case tuple applicability (default ".targets"; empty disables)
     31 #   kit_lane_<ID>()     per-lane hook (see below)
     32 #   KIT_READ_CASE       optional fn, called per item after basics are set, to read
     33 #                      bespoke markers (may override KIT_EXPECTED, set KIT_SKIP_CASE/KIT_SKIP_NA_CASE)
     34 #   KIT_FLUSH_VERIFY    optional fn, called per queued-E item after flush: args
     35 #                      (label, payload, rc); return 0 to keep pass, 1 to fail
     36 #
     37 # Debugging:
     38 #   KIT_CORPUS_TRACE=1  force serial dispatch and print each item/lane before it
     39 #                      runs; deferred exec queues are flushed after each item.
     40 #
     41 # Per-item vars the engine sets before each kit_lane_<ID> call:
     42 #   KIT_BASE KIT_SRC KIT_WORK KIT_OPT KIT_LANE KIT_ARCH KIT_OBJ KIT_TUPLE KIT_EXPECTED
     43 #   KIT_NAME (display label "base" or "base/Oopt") KIT_SIDECAR_DIR
     44 # A hook ends by calling exactly one of: kit_pass/kit_fail/kit_skip/kit_skip_na, OR
     45 # kit_queue_e (deferred exec, resolved at flush). Hooks must write only under
     46 # KIT_WORK and record only via these verbs — that is what makes a runner
     47 # parallel-safe by construction (flip KIT_PARALLELIZABLE with no other change).
     48 # ============================================================================
     49 
     50 KIT_LIB_DIR="${KIT_LIB_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}"
     51 . "$KIT_LIB_DIR/kit_sh_report.sh"
     52 . "$KIT_LIB_DIR/kit_skip.sh"
     53 . "$KIT_LIB_DIR/parallel.sh"
     54 
     55 # Corpus runners treat an unexplained SKIP as a soft failure (gated by
     56 # KIT_TEST_ALLOW_SKIP); the driver-scenario harnesses leave this 0.
     57 KIT_SKIP_IS_FAILURE=1
     58 
     59 # Millisecond clock for per-lane timing (python3 if present; else second res).
     60 kit_now_ms() {
     61   if command -v python3 >/dev/null 2>&1; then
     62     python3 -c 'import time;print(int(time.time()*1000))'
     63   else
     64     echo $(( $(date +%s) * 1000 ))
     65   fi
     66 }
     67 
     68 kit_corpus_trace_enabled() {
     69   case "${KIT_CORPUS_TRACE:-0}" in
     70     ""|0|false|FALSE|no|NO) return 1 ;;
     71     *) return 0 ;;
     72   esac
     73 }
     74 
     75 kit_corpus_trace() {
     76   kit_corpus_trace_enabled || return 0
     77   printf 'TRACE %s\n' "$*" >&2
     78 }
     79 
     80 # Deferred-exec bookkeeping (populated ONLY during serial execution / replay,
     81 # never inside a worker — workers merely emit QUEUE_E events).
     82 CFQ_LABELS=(); CFQ_RCS=(); CFQ_EXPS=(); CFQ_PAYLOADS=()
     83 
     84 # kit_queue_e LABEL EXE OUT ERR RC_FILE EXPECTED EXEC_TAG [PAYLOAD]
     85 #   Record a case whose execution is deferred to a batched exec_target flush.
     86 #   In a worker: emit a QUEUE_E event (the parent does the actual queueing).
     87 #   In serial/replay: enqueue with exec_target and remember it for the flush.
     88 kit_queue_e() {
     89   if [ -n "${KIT_EV:-}" ]; then
     90     printf 'QUEUE_E\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
     91       "$1" "$2" "$3" "$4" "$5" "$6" "$7" "${8:-}" >> "$KIT_EV"
     92     return
     93   fi
     94   exec_target_queue "$7" "$1" "$2" "$3" "$4" "$5"
     95   CFQ_LABELS+=("$1"); CFQ_RCS+=("$5"); CFQ_EXPS+=("$6"); CFQ_PAYLOADS+=("${8:-}")
     96 }
     97 
     98 # ---- discovery -------------------------------------------------------------
     99 kit_corpus_discover() {
    100   KIT_CASES=()
    101   local g f base
    102   shopt -s nullglob
    103   for g in $KIT_CORPUS_GLOBS; do
    104     for f in $g; do
    105       base=$(basename "$f")
    106       [ -n "$KIT_CORPUS_EXT" ] && base="${base%.$KIT_CORPUS_EXT}"
    107       if [ -n "${KIT_TEST_FILTER:-}" ]; then
    108         case "$base" in *"$KIT_TEST_FILTER"*) ;; *) continue ;; esac
    109       fi
    110       KIT_CASES+=("$f")
    111     done
    112   done
    113 }
    114 
    115 # ---- per-item driver (runs every active lane for one case,opt,tuple) -------
    116 # Item encoding: "src|base|opt|tuple"  (src/base never contain '|').
    117 kit_corpus_item() {
    118   local item="$1"
    119   KIT_SRC="${item%%|*}"; item="${item#*|}"
    120   KIT_BASE="${item%%|*}"; item="${item#*|}"
    121   KIT_OPT="${item%%|*}"; KIT_TUPLE="${item#*|}"
    122   KIT_ARCH="${KIT_TUPLE%-*}"; KIT_OBJ="${KIT_TUPLE#*-}"
    123 
    124   # KIT_WORK must be unique per work item. The matrix is case x opt x tuple
    125   # (see kit_corpus_run), so the tuple is folded into the path — otherwise two
    126   # tuples of the same (case, opt) share a dir and, under parallel dispatch,
    127   # stomp each other's artifacts (one worker's `rm -rf $KIT_WORK` / rebuilt exe
    128   # races another's exec, surfacing as spurious SIGILLs). KIT_NAME stays the
    129   # human label; lanes that sweep multiple tuples prefix it with the arch.
    130   if [ "$KIT_OPT" = "-" ]; then
    131     KIT_NAME="$KIT_BASE"; KIT_WORK="$KIT_BUILD_DIR/$KIT_BASE/$KIT_TUPLE"
    132   else
    133     KIT_NAME="$KIT_BASE/O$KIT_OPT"; KIT_WORK="$KIT_BUILD_DIR/$KIT_BASE/O$KIT_OPT/$KIT_TUPLE"
    134   fi
    135   kit_corpus_trace "item $KIT_NAME tuple=$KIT_TUPLE src=$KIT_SRC"
    136   rm -rf "$KIT_WORK"; mkdir -p "$KIT_WORK"
    137 
    138   KIT_EXPECTED=0
    139   if [ -f "$KIT_SIDECAR_DIR/$KIT_BASE$KIT_EXPECTED_EXT" ]; then
    140     KIT_EXPECTED=$(tr -d '[:space:]' < "$KIT_SIDECAR_DIR/$KIT_BASE$KIT_EXPECTED_EXT")
    141   fi
    142 
    143   # Hook for bespoke per-case marker reading (link's expected/jit_only/targets,
    144   # etc.). May set KIT_EXPECTED, KIT_SKIP_CASE (reason), KIT_SKIP_NA_CASE (1).
    145   KIT_SKIP_CASE=; KIT_SKIP_NA_CASE=
    146   if [ -n "${KIT_READ_CASE:-}" ] && command -v "$KIT_READ_CASE" >/dev/null 2>&1; then
    147     "$KIT_READ_CASE"
    148   fi
    149   if [ -n "$KIT_SKIP_CASE" ]; then kit_skip "$KIT_NAME" "$KIT_SKIP_CASE"; return; fi
    150   if [ -n "$KIT_SKIP_NA_CASE" ]; then kit_skip_na "$KIT_NAME"; return; fi
    151 
    152   # Whole-case sidecar skip (one event for the item, not per lane).
    153   local reason
    154   if reason=$(kit_skip_sidecar "$KIT_SIDECAR_DIR" "$KIT_BASE" "$KIT_ARCH"); then
    155     kit_skip "$KIT_NAME" "$reason"; return
    156   fi
    157   # Whole-case tuple applicability (SKIP-NA, uncounted).
    158   if [ -n "$KIT_TARGETS_EXT" ] &&
    159      ! kit_tuple_applicable "$KIT_TUPLE" "$KIT_SIDECAR_DIR/$KIT_BASE$KIT_TARGETS_EXT"; then
    160     kit_skip_na "$KIT_NAME"; return
    161   fi
    162 
    163   local lane
    164   for lane in $KIT_LANES; do
    165     KIT_LANE="$lane"
    166     # opt-0-only lanes (C/W: backend ignores -O) run once, at opt 0.
    167     case " $KIT_OPT0ONLY " in
    168       *" $lane "*) [ "$KIT_OPT" = "0" ] || [ "$KIT_OPT" = "-" ] || continue ;;
    169     esac
    170     # per-lane sidecar skip (.<lane>.skip)
    171     if reason=$(kit_skip_sidecar "$KIT_SIDECAR_DIR" "$KIT_BASE" "" "$lane"); then
    172       kit_corpus_trace "lane $KIT_NAME/$lane skip-sidecar"
    173       kit_skip "$KIT_NAME/$lane" "$reason"; continue
    174     fi
    175     kit_corpus_trace "lane $KIT_NAME/$lane"
    176     "kit_lane_$lane"
    177   done
    178 }
    179 
    180 # ---- worker (parallel mode): isolate output to the event file --------------
    181 # Runs in a background subshell; the ONLY parent-visible channel is the event
    182 # file (+ captured stdout/stderr logs). Ends with a DONE sentinel so the parent
    183 # can tell "produced no records" (fine) from "crashed mid-case" (fail).
    184 kit_corpus_worker() {
    185   KIT_EV="$3"
    186   kit_corpus_item "$2" > "$4" 2> "$5"
    187   printf 'DONE\n' >> "$KIT_EV"
    188 }
    189 
    190 # ---- replay one worker's event file through the counting verbs -------------
    191 kit_corpus_replay() {
    192   local ev="$1" out="$2" err="$3" idx="$4"
    193   if [ ! -s "$ev" ] || [ "$(tail -n1 "$ev" 2>/dev/null)" != "DONE" ]; then
    194     kit_fail "internal/worker-$idx" "no DONE sentinel (worker crashed?)"
    195   else
    196     local kind a b c d e f g h
    197     while IFS="$(printf '\t')" read -r kind a b c d e f g h; do
    198       case "$kind" in
    199         PASS)    kit_pass "$a" ;;
    200         FAIL)    kit_fail "$a" "$b" ;;
    201         SKIP)    kit_skip "$a" "$b" ;;
    202         SKIP_NA) kit_skip_na "$a" "$b" ;;
    203         XFAIL)   kit_xfail "$a" "$b" ;;
    204         XPASS)   kit_xpass "$a" ;;
    205         TIME)    kit_time "$a" "$b" ;;
    206         QUEUE_E) kit_queue_e "$a" "$b" "$c" "$d" "$e" "$f" "$g" "$h" ;;
    207         DONE)    ;;
    208       esac
    209     done < "$ev"
    210   fi
    211   # Surface hook diagnostics (diffs/stderr) deterministically, in index order.
    212   [ -s "$out" ] && cat "$out"
    213   [ -s "$err" ] && cat "$err" >&2
    214   return 0
    215 }
    216 
    217 # ---- deferred-exec flush + per-case verification ---------------------------
    218 kit_corpus_flush_e() {
    219   [ "${#CFQ_LABELS[@]}" -eq 0 ] && return 0
    220   kit_corpus_trace "flush E queued=${#CFQ_LABELS[@]}"
    221   exec_target_flush
    222   local i rc ok
    223   for i in "${!CFQ_LABELS[@]}"; do
    224     rc=127; [ -f "${CFQ_RCS[$i]}" ] && rc=$(cat "${CFQ_RCS[$i]}")
    225     rc=$((rc & 255))
    226     ok=0; [ "$rc" -eq "$(( ${CFQ_EXPS[$i]} & 255 ))" ] && ok=1
    227     if [ "$ok" -eq 1 ] && [ -n "${KIT_FLUSH_VERIFY:-}" ] &&
    228        command -v "$KIT_FLUSH_VERIFY" >/dev/null 2>&1; then
    229       "$KIT_FLUSH_VERIFY" "${CFQ_LABELS[$i]}" "${CFQ_PAYLOADS[$i]}" "$rc" || ok=0
    230     fi
    231     if [ "$ok" -eq 1 ]; then kit_pass "${CFQ_LABELS[$i]}"
    232     else kit_fail "${CFQ_LABELS[$i]}" "expected ${CFQ_EXPS[$i]} got $rc"; fi
    233   done
    234   CFQ_LABELS=(); CFQ_RCS=(); CFQ_EXPS=(); CFQ_PAYLOADS=()
    235 }
    236 
    237 # ---- the entrypoint --------------------------------------------------------
    238 # Discovers + expands the matrix + dispatches (serial or parallel) + flushes
    239 # deferred exec. Accumulates into the shared counters (so a runner may call it
    240 # multiple times — e.g. elf's A/B/C layers — then summarize once). Does NOT
    241 # print the summary or exit; the runner calls kit_summary "$KIT_LABEL"; kit_exit.
    242 kit_corpus_run() {
    243   : "${KIT_OPT_LEVELS=${KIT_OPT_LEVELS:-0 1}}"
    244   : "${KIT_TUPLES:=${KIT_DEFAULT_TUPLE:-aarch64-elf}}"
    245   : "${KIT_OPT0ONLY:=}"
    246   : "${KIT_PARALLELIZABLE:=1}"
    247   : "${KIT_EXPECTED_EXT:=.expected}"
    248   # No-colon `=`: only default when UNSET. A caller-supplied empty value means
    249   # "disable the whole-case tuple matcher" (see CONTRACT above) — the colon form
    250   # `:=` would wrongly clobber that empty back to .targets and re-enable it.
    251   : "${KIT_TARGETS_EXT=.targets}"
    252   if [ -z "${KIT_SIDECAR_DIR:-}" ]; then
    253     set -- $KIT_CORPUS_GLOBS; KIT_SIDECAR_DIR=$(dirname "$1")
    254   fi
    255 
    256   kit_corpus_discover
    257   if [ "${#KIT_CASES[@]}" -eq 0 ]; then
    258     echo "$KIT_LABEL: no cases under $KIT_CORPUS_GLOBS" >&2
    259     exit 2
    260   fi
    261 
    262   # Build the flat work-item list = case x opt x tuple.
    263   local opts="$KIT_OPT_LEVELS"; [ -z "$opts" ] && opts="-"
    264   local f base o t
    265   KIT_ITEMS=()
    266   for f in "${KIT_CASES[@]}"; do
    267     base=$(basename "$f"); [ -n "$KIT_CORPUS_EXT" ] && base="${base%.$KIT_CORPUS_EXT}"
    268     for o in $opts; do
    269       for t in $KIT_TUPLES; do
    270         KIT_ITEMS+=("$f|$base|$o|$t")
    271       done
    272     done
    273   done
    274 
    275   local jobs; jobs="$(kit_parallel_jobs)" || jobs=1
    276   if kit_corpus_trace_enabled; then
    277     KIT_PARALLELIZABLE=0
    278     jobs=1
    279     kit_corpus_trace "dispatch serial items=${#KIT_ITEMS[@]} lanes=${KIT_LANES# }"
    280   fi
    281   if [ "$KIT_PARALLELIZABLE" = "1" ] && [ "$jobs" -gt 1 ] && [ "${#KIT_ITEMS[@]}" -gt 4 ]; then
    282     kit_corpus_dispatch_parallel "$jobs"
    283   else
    284     local idx=0
    285     for item in "${KIT_ITEMS[@]}"; do
    286       KIT_EV=; kit_corpus_item "$item"
    287       kit_corpus_trace_enabled && kit_corpus_flush_e
    288       idx=$((idx + 1))
    289     done
    290   fi
    291 
    292   kit_corpus_flush_e
    293 }
    294 
    295 kit_corpus_dispatch_parallel() {
    296   local jobs="$1"
    297   local pdir="$KIT_BUILD_DIR/.parallel.$$"
    298   rm -rf "$pdir"; mkdir -p "$pdir"
    299   local evs=() outs=() errs=() idx=0 ev out err
    300   for item in "${KIT_ITEMS[@]}"; do
    301     ev="$pdir/$idx.events"; out="$pdir/$idx.out"; err="$pdir/$idx.err"
    302     : > "$ev"
    303     evs+=("$ev"); outs+=("$out"); errs+=("$err")
    304     kit_parallel_run "$jobs" kit_corpus_worker "$idx" "$item" "$ev" "$out" "$err"
    305     idx=$((idx + 1))
    306   done
    307   kit_parallel_wait_all || true
    308   # Serial replay in strict index order -> deterministic counts + output +
    309   # exec_target queueing (which happens here, never in a worker).
    310   KIT_EV=
    311   local i=0
    312   while [ "$i" -lt "${#evs[@]}" ]; do
    313     kit_corpus_replay "${evs[$i]}" "${outs[$i]}" "${errs[$i]}" "$i"
    314     i=$((i + 1))
    315   done
    316   rm -rf "$pdir"
    317 }
	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README