whole_program_inline.sh (5643B)
1 #!/usr/bin/env bash 2 # Whole-program cross-function inlining (LTO Phase 0). 3 # 4 # At -O1 the optimizer defers emission to a module-wide finalize sweep that GCs 5 # dead symbols and runs the whole-program inliner (opt_inline) over the live 6 # FuncSet. This is one path for every arch — no arch special-casing — so the 7 # structural checks run identically for aarch64, x86_64, and riscv64. 8 # 9 # Green: a small static callee fuses into its caller (no call instruction left 10 # in the caller, and the `opt.inline.inlined` metric fires). Behavioral: the 11 # fused program still returns the right value via the host JIT. 12 set -euo pipefail 13 14 ROOT="$(cd "$(dirname "$0")/../.." && pwd)" 15 KIT="${KIT:-$ROOT/build/kit}" 16 WORK="$ROOT/build/test/opt/whole_program_inline" 17 mkdir -p "$WORK" 18 19 # A caller (`compute`) that reaches two small static helpers. Both should fuse 20 # in, leaving `compute` call-free. 21 read -r -d '' SRC <<'EOF' || true 22 static int add1(int x) { return x + 1; } 23 static int twice(int x) { return add1(add1(x)); } 24 int compute(int x) { return twice(x) + add1(x); } 25 EOF 26 27 # Per-arch call mnemonics (aarch64 bl/blr, x86_64 call/callq, riscv jal/jalr). 28 # After fusion `compute` must contain none of them. 29 call_mnemonics='\b(bl|blr|callq?|jalr?)\b' 30 31 check_arch() { 32 local triple=$1 33 local tag=$2 34 local src="$WORK/$tag.c" 35 local obj="$WORK/$tag.o" 36 printf '%s\n' "$SRC" > "$src" 37 "$KIT" cc -target "$triple" -O1 -ffreestanding -std=c11 -c "$src" \ 38 -o "$obj" > "$WORK/$tag.cc.out" 2>&1 39 "$KIT" objdump -d "$obj" > "$WORK/$tag.dis" 2>&1 40 # Isolate the `compute` function body and count residual calls. 41 local ncalls 42 ncalls=$(sed -n '/<compute>:/,/^$/p' "$WORK/$tag.dis" \ 43 | grep -cE "$call_mnemonics" || true) 44 if [ "$ncalls" -ne 0 ]; then 45 printf 'whole-program-inline FAILED: %s left %s call(s) in compute (callee not fused)\n' \ 46 "$tag" "$ncalls" >&2 47 sed -n '/<compute>:/,/^$/p' "$WORK/$tag.dis" | sed 's/^/ | /' >&2 48 exit 1 49 fi 50 printf 'whole-program-inline %-8s fused (compute call-free)\n' "$tag" 51 } 52 53 check_arch aarch64-linux-gnu aa64 54 check_arch x86_64-linux-gnu x64 55 check_arch riscv64-linux-gnu rv64 56 57 # Interposition guard: a weak callee is link-time replaceable, so inlining its 58 # body would defeat a strong override. The caller must keep the call. Check on 59 # every arch (one unified inliner path). 60 read -r -d '' WEAK_SRC <<'EOF' || true 61 __attribute__((weak)) int wcallee(int x) { return x + 1; } 62 int wcaller(int x) { return wcallee(x); } 63 EOF 64 check_weak_not_inlined() { 65 local triple=$1 66 local tag=$2 67 local src="$WORK/weak_$tag.c" 68 local obj="$WORK/weak_$tag.o" 69 printf '%s\n' "$WEAK_SRC" > "$src" 70 "$KIT" cc -target "$triple" -O1 -ffreestanding -std=c11 -c "$src" \ 71 -o "$obj" > "$WORK/weak_$tag.cc.out" 2>&1 72 "$KIT" objdump -d "$obj" > "$WORK/weak_$tag.dis" 2>&1 73 local ncalls 74 ncalls=$(sed -n '/<wcaller>:/,/^$/p' "$WORK/weak_$tag.dis" \ 75 | grep -cE "$call_mnemonics" || true) 76 if [ "$ncalls" -eq 0 ]; then 77 printf 'whole-program-inline FAILED: %s inlined a WEAK callee (interposition unsound)\n' \ 78 "$tag" >&2 79 sed -n '/<wcaller>:/,/^$/p' "$WORK/weak_$tag.dis" | sed 's/^/ | /' >&2 80 exit 1 81 fi 82 printf 'whole-program-inline %-8s weak callee kept out-of-line\n' "$tag" 83 } 84 check_weak_not_inlined aarch64-linux-gnu aa64 85 check_weak_not_inlined x86_64-linux-gnu x64 86 check_weak_not_inlined riscv64-linux-gnu rv64 87 88 # Metric: the whole-program inliner must actually fire at -O1 (not just the 89 # streaming tiny-inliner, which emits opt.tiny_inline.inlined instead). 90 read -r -d '' RUN_SRC <<'EOF' || true 91 static int add1(int x) { return x + 1; } 92 int main(void) { return add1(41) == 42 ? 0 : 1; } 93 EOF 94 printf '%s\n' "$RUN_SRC" > "$WORK/run.c" 95 if ! "$KIT" run --time -O1 "$WORK/run.c" >"$WORK/run.out" 2>"$WORK/run.err"; then 96 printf 'whole-program-inline FAILED: `kit run -O1` did not exit 0\n' >&2 97 sed 's/^/ | /' "$WORK/run.err" >&2 98 exit 1 99 fi 100 if ! grep -q 'opt.inline.inlined' "$WORK/run.err"; then 101 printf 'whole-program-inline FAILED: opt.inline.inlined metric absent at -O1\n' >&2 102 sed -n '1,80p' "$WORK/run.err" >&2 103 exit 1 104 fi 105 printf 'whole-program-inline run fired opt.inline.inlined, exit 0\n' 106 107 # The kit-native build verbs (build-exe/build-lib/build-obj) compile through the 108 # same kit_cg path as cc, so whole-program optimization participates without any 109 # build-verb-specific wiring. Guard that: build-obj at -O1 must fuse, and 110 # build-exe must produce a correct, fused executable. 111 printf '%s\n' "$SRC" > "$WORK/verb.c" 112 "$KIT" build-obj -O1 -ffreestanding "$WORK/verb.c" -o "$WORK/verb.o" \ 113 > "$WORK/verb.cc.out" 2>&1 114 "$KIT" objdump -d "$WORK/verb.o" > "$WORK/verb.dis" 2>&1 115 vcalls=$(sed -n '/<compute>:/,/^$/p' "$WORK/verb.dis" \ 116 | grep -cE "$call_mnemonics" || true) 117 if [ "$vcalls" -ne 0 ]; then 118 printf 'whole-program-inline FAILED: build-obj -O1 did not fuse (LTO bypassed)\n' >&2 119 sed -n '/<compute>:/,/^$/p' "$WORK/verb.dis" | sed 's/^/ | /' >&2 120 exit 1 121 fi 122 printf 'whole-program-inline build-obj fused (verb participates in LTO)\n' 123 124 read -r -d '' VERB_EXE_SRC <<'EOF' || true 125 static int add1(int x) { return x + 1; } 126 static int twice(int x) { return add1(add1(x)); } 127 int main(void) { return (twice(20) + add1(1)) == 24 ? 0 : 1; } 128 EOF 129 printf '%s\n' "$VERB_EXE_SRC" > "$WORK/verb_exe.c" 130 if ! "$KIT" build-exe -O1 "$WORK/verb_exe.c" -o "$WORK/verb_exe" \ 131 > "$WORK/verb_exe.cc.out" 2>&1 || ! "$WORK/verb_exe"; then 132 printf 'whole-program-inline FAILED: build-exe -O1 produced wrong result\n' >&2 133 sed 's/^/ | /' "$WORK/verb_exe.cc.out" >&2 134 exit 1 135 fi 136 printf 'whole-program-inline build-exe correct + fused\n' 137 138 printf 'whole-program-inline: ok\n'