kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

whole_program_inline.sh (5643B)


      1 #!/usr/bin/env bash
      2 # Whole-program cross-function inlining (LTO Phase 0).
      3 #
      4 # At -O1 the optimizer defers emission to a module-wide finalize sweep that GCs
      5 # dead symbols and runs the whole-program inliner (opt_inline) over the live
      6 # FuncSet. This is one path for every arch — no arch special-casing — so the
      7 # structural checks run identically for aarch64, x86_64, and riscv64.
      8 #
      9 # Green: a small static callee fuses into its caller (no call instruction left
     10 # in the caller, and the `opt.inline.inlined` metric fires). Behavioral: the
     11 # fused program still returns the right value via the host JIT.
     12 set -euo pipefail
     13 
     14 ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
     15 KIT="${KIT:-$ROOT/build/kit}"
     16 WORK="$ROOT/build/test/opt/whole_program_inline"
     17 mkdir -p "$WORK"
     18 
     19 # A caller (`compute`) that reaches two small static helpers. Both should fuse
     20 # in, leaving `compute` call-free.
     21 read -r -d '' SRC <<'EOF' || true
     22 static int add1(int x) { return x + 1; }
     23 static int twice(int x) { return add1(add1(x)); }
     24 int compute(int x) { return twice(x) + add1(x); }
     25 EOF
     26 
     27 # Per-arch call mnemonics (aarch64 bl/blr, x86_64 call/callq, riscv jal/jalr).
     28 # After fusion `compute` must contain none of them.
     29 call_mnemonics='\b(bl|blr|callq?|jalr?)\b'
     30 
     31 check_arch() {
     32   local triple=$1
     33   local tag=$2
     34   local src="$WORK/$tag.c"
     35   local obj="$WORK/$tag.o"
     36   printf '%s\n' "$SRC" > "$src"
     37   "$KIT" cc -target "$triple" -O1 -ffreestanding -std=c11 -c "$src" \
     38     -o "$obj" > "$WORK/$tag.cc.out" 2>&1
     39   "$KIT" objdump -d "$obj" > "$WORK/$tag.dis" 2>&1
     40   # Isolate the `compute` function body and count residual calls.
     41   local ncalls
     42   ncalls=$(sed -n '/<compute>:/,/^$/p' "$WORK/$tag.dis" \
     43     | grep -cE "$call_mnemonics" || true)
     44   if [ "$ncalls" -ne 0 ]; then
     45     printf 'whole-program-inline FAILED: %s left %s call(s) in compute (callee not fused)\n' \
     46       "$tag" "$ncalls" >&2
     47     sed -n '/<compute>:/,/^$/p' "$WORK/$tag.dis" | sed 's/^/  | /' >&2
     48     exit 1
     49   fi
     50   printf 'whole-program-inline %-8s fused (compute call-free)\n' "$tag"
     51 }
     52 
     53 check_arch aarch64-linux-gnu aa64
     54 check_arch x86_64-linux-gnu  x64
     55 check_arch riscv64-linux-gnu rv64
     56 
     57 # Interposition guard: a weak callee is link-time replaceable, so inlining its
     58 # body would defeat a strong override. The caller must keep the call. Check on
     59 # every arch (one unified inliner path).
     60 read -r -d '' WEAK_SRC <<'EOF' || true
     61 __attribute__((weak)) int wcallee(int x) { return x + 1; }
     62 int wcaller(int x) { return wcallee(x); }
     63 EOF
     64 check_weak_not_inlined() {
     65   local triple=$1
     66   local tag=$2
     67   local src="$WORK/weak_$tag.c"
     68   local obj="$WORK/weak_$tag.o"
     69   printf '%s\n' "$WEAK_SRC" > "$src"
     70   "$KIT" cc -target "$triple" -O1 -ffreestanding -std=c11 -c "$src" \
     71     -o "$obj" > "$WORK/weak_$tag.cc.out" 2>&1
     72   "$KIT" objdump -d "$obj" > "$WORK/weak_$tag.dis" 2>&1
     73   local ncalls
     74   ncalls=$(sed -n '/<wcaller>:/,/^$/p' "$WORK/weak_$tag.dis" \
     75     | grep -cE "$call_mnemonics" || true)
     76   if [ "$ncalls" -eq 0 ]; then
     77     printf 'whole-program-inline FAILED: %s inlined a WEAK callee (interposition unsound)\n' \
     78       "$tag" >&2
     79     sed -n '/<wcaller>:/,/^$/p' "$WORK/weak_$tag.dis" | sed 's/^/  | /' >&2
     80     exit 1
     81   fi
     82   printf 'whole-program-inline %-8s weak callee kept out-of-line\n' "$tag"
     83 }
     84 check_weak_not_inlined aarch64-linux-gnu aa64
     85 check_weak_not_inlined x86_64-linux-gnu  x64
     86 check_weak_not_inlined riscv64-linux-gnu rv64
     87 
     88 # Metric: the whole-program inliner must actually fire at -O1 (not just the
     89 # streaming tiny-inliner, which emits opt.tiny_inline.inlined instead).
     90 read -r -d '' RUN_SRC <<'EOF' || true
     91 static int add1(int x) { return x + 1; }
     92 int main(void) { return add1(41) == 42 ? 0 : 1; }
     93 EOF
     94 printf '%s\n' "$RUN_SRC" > "$WORK/run.c"
     95 if ! "$KIT" run --time -O1 "$WORK/run.c" >"$WORK/run.out" 2>"$WORK/run.err"; then
     96   printf 'whole-program-inline FAILED: `kit run -O1` did not exit 0\n' >&2
     97   sed 's/^/  | /' "$WORK/run.err" >&2
     98   exit 1
     99 fi
    100 if ! grep -q 'opt.inline.inlined' "$WORK/run.err"; then
    101   printf 'whole-program-inline FAILED: opt.inline.inlined metric absent at -O1\n' >&2
    102   sed -n '1,80p' "$WORK/run.err" >&2
    103   exit 1
    104 fi
    105 printf 'whole-program-inline run     fired opt.inline.inlined, exit 0\n'
    106 
    107 # The kit-native build verbs (build-exe/build-lib/build-obj) compile through the
    108 # same kit_cg path as cc, so whole-program optimization participates without any
    109 # build-verb-specific wiring. Guard that: build-obj at -O1 must fuse, and
    110 # build-exe must produce a correct, fused executable.
    111 printf '%s\n' "$SRC" > "$WORK/verb.c"
    112 "$KIT" build-obj -O1 -ffreestanding "$WORK/verb.c" -o "$WORK/verb.o" \
    113   > "$WORK/verb.cc.out" 2>&1
    114 "$KIT" objdump -d "$WORK/verb.o" > "$WORK/verb.dis" 2>&1
    115 vcalls=$(sed -n '/<compute>:/,/^$/p' "$WORK/verb.dis" \
    116   | grep -cE "$call_mnemonics" || true)
    117 if [ "$vcalls" -ne 0 ]; then
    118   printf 'whole-program-inline FAILED: build-obj -O1 did not fuse (LTO bypassed)\n' >&2
    119   sed -n '/<compute>:/,/^$/p' "$WORK/verb.dis" | sed 's/^/  | /' >&2
    120   exit 1
    121 fi
    122 printf 'whole-program-inline build-obj fused (verb participates in LTO)\n'
    123 
    124 read -r -d '' VERB_EXE_SRC <<'EOF' || true
    125 static int add1(int x) { return x + 1; }
    126 static int twice(int x) { return add1(add1(x)); }
    127 int main(void) { return (twice(20) + add1(1)) == 24 ? 0 : 1; }
    128 EOF
    129 printf '%s\n' "$VERB_EXE_SRC" > "$WORK/verb_exe.c"
    130 if ! "$KIT" build-exe -O1 "$WORK/verb_exe.c" -o "$WORK/verb_exe" \
    131      > "$WORK/verb_exe.cc.out" 2>&1 || ! "$WORK/verb_exe"; then
    132   printf 'whole-program-inline FAILED: build-exe -O1 produced wrong result\n' >&2
    133   sed 's/^/  | /' "$WORK/verb_exe.cc.out" >&2
    134   exit 1
    135 fi
    136 printf 'whole-program-inline build-exe correct + fused\n'
    137 
    138 printf 'whole-program-inline: ok\n'