kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

code_size.sh (5758B)


      1 #!/usr/bin/env bash
      2 # Regenerate doc/CODE_SIZE.md: line counts per functional component, with
      3 # per-format/per-target code separated from shared core. Counts are code lines
      4 # only (C + headers, excluding comments and blanks) via cloc.
      5 #
      6 # A second "Binary code size" section is appended when a linker map is passed
      7 # (or found at build/release/kit.map): per-component compiled byte sizes
      8 # attributed from the release binary's ld64 map. Generate the map with
      9 # `make code-size`, which relinks the release objects with -Wl,-map.
     10 #
     11 # Usage: scripts/code_size.sh [--stdout] [path/to/kit.map]
     12 set -uo pipefail
     13 
     14 ROOT="$(cd "$(dirname "$0")/.." && pwd)"
     15 cd "$ROOT"
     16 
     17 command -v cloc >/dev/null 2>&1 || { echo "code_size.sh: cloc not found" >&2; exit 1; }
     18 
     19 # code-line count (cloc SUM 'code' column) for the given paths/globs
     20 loc() {
     21   cloc --quiet --csv --include-lang=C,"C/C++ Header" "$@" 2>/dev/null \
     22     | awk -F, '/SUM,/{print $5}'
     23 }
     24 
     25 # format an integer with thousands separators
     26 fmt() { printf "%s" "$1" | sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'; }
     27 
     28 # table row: "| <label> | <comma-formatted loc of paths> |"
     29 trow() { local label="$1"; shift; printf "| %s | %s |\n" "$label" "$(fmt "$(loc "$@")")"; }
     30 
     31 OUT="doc/CODE_SIZE.md"
     32 MAP=""
     33 for arg in "$@"; do
     34   case "$arg" in
     35     --stdout) OUT=/dev/stdout ;;
     36     *) MAP="$arg" ;;
     37   esac
     38 done
     39 # Fall back to the conventional map location if none was passed.
     40 [ -z "$MAP" ] && [ -f build/release/kit.map ] && MAP=build/release/kit.map
     41 
     42 DATE="$(date +%Y-%m-%d)"
     43 
     44 {
     45 cat <<EOF
     46 # kit Code Size
     47 
     48 Line counts per functional component, broken down so per-format and per-target
     49 code is separated from shared core. Counts are **code lines only** (C +
     50 headers, excluding comments and blanks), via \`cloc\`.
     51 
     52 > Snapshot: $DATE. Regenerate with \`make code-size\` (or
     53 > \`scripts/code_size.sh\`); these numbers drift as the tree changes.
     54 
     55 ## Arch backends — \`src/arch/\` ($(fmt "$(loc src/arch)"))
     56 
     57 | Component | Lines |
     58 |---|---:|
     59 EOF
     60 trow "aarch64 (\`aa64\`, reference backend)" src/arch/aa64
     61 trow "x86-64 (\`x64\`)" src/arch/x64
     62 trow "riscv64 (\`rv64\`)" src/arch/rv64
     63 trow "WebAssembly (\`wasm\`)" src/arch/wasm
     64 trow "C-source backend (\`c_target\`)" src/arch/c_target
     65 trow "shared core (ArchImpl, MCEmitter, disasm, dwarf, registry)" src/arch/*.c src/arch/*.h
     66 
     67 cat <<EOF
     68 
     69 ## Object model — \`src/obj/\` ($(fmt "$(loc src/obj)"))
     70 
     71 | Component | Lines |
     72 |---|---:|
     73 EOF
     74 trow "ELF" src/obj/elf
     75 trow "Mach-O" src/obj/macho
     76 trow "COFF/PE" src/obj/coff
     77 trow "Wasm object" src/obj/wasm
     78 trow "core (format-neutral model, registry, reloc apply, TLS)" src/obj/*.c src/obj/*.h
     79 
     80 cat <<EOF
     81 
     82 ## Linker — \`src/link/\` ($(fmt "$(loc src/link)"))
     83 
     84 Entirely **format-neutral** — no per-format files; emits through the \`obj/\`
     85 format writers above. Covers resolve, layout, relocation, linker scripts,
     86 incremental linking, and the JIT image mapper.
     87 
     88 ## Frontends — \`lang/\`
     89 
     90 | Frontend | Lines |
     91 |---|---:|
     92 EOF
     93 trow "C compiler (\`lang/c\`: parse/type/decl/ABI-lower)" lang/c
     94 trow "toy (CG-API exercise frontend)" lang/toy
     95 trow "cpp (C lexer + preprocessor; shared by C frontend & \`cpp\`)" lang/cpp
     96 trow "wasm/WAT" lang/wasm
     97 
     98 cat <<EOF
     99 
    100 The full C compiler ≈ \`lang/c\` + \`lang/cpp\`, driving the shared
    101 \`cg\`/\`abi\`/\`opt\` infrastructure below.
    102 
    103 ## Other subsystems — \`src/\`
    104 
    105 | Subsystem | Lines |
    106 |---|---:|
    107 EOF
    108 trow "optimizer (\`opt\`, -O1 SSA/regalloc)" src/opt
    109 trow "codegen (\`cg\`, public CG API + IR)" src/cg
    110 trow "api (composition layer)" src/api
    111 trow "debug/DWARF (\`debug\`)" src/debug
    112 trow "dist (CAS + \`.kpkg\`)" src/dist
    113 trow "interp (bytecode interpreter)" src/interp
    114 trow "emu (guest-ELF emulator)" src/emu
    115 trow "asm (standalone + inline assembler)" src/asm
    116 trow "core (arenas/maps/bufs/diag/hash)" src/core
    117 trow "abi (calling conventions)" src/abi
    118 trow "dbg (debugger)" src/dbg
    119 trow "os (emu syscall personality)" src/os
    120 trow "jit (stub; real mapper lives in \`link\`)" src/jit
    121 
    122 cat <<EOF
    123 
    124 ### dist split — \`src/dist/\` ($(fmt "$(loc src/dist)"))
    125 
    126 | Component | Lines |
    127 |---|---:|
    128 EOF
    129 trow "packaging (manifest/kpkg/tar/trust)" src/dist/manifest.* src/dist/kpkg.* src/dist/tar.* src/dist/trust.* src/dist/dist.*
    130 trow "vendored compression (deflate/lz4)" src/dist/deflate.* src/dist/lz4.* src/dist/lz4frame.*
    131 trow "CAS store (blob/tree/cas)" src/dist/blob.* src/dist/tree.* src/dist/cas.*
    132 trow "vendored crypto (blake2b/ed25519/b64/minisig)" src/dist/blake2b.* src/dist/ed25519.* src/dist/b64.* src/dist/minisig.*
    133 
    134 cat <<EOF
    135 
    136 ## Driver — \`driver/\` ($(fmt "$(loc driver)"))
    137 
    138 | Area | Lines |
    139 |---|---:|
    140 EOF
    141 trow "cmd (per-subcommand)" driver/cmd
    142 trow "env (host adapters)" driver/env
    143 trow "lib (shared support)" driver/lib
    144 trow "\`*.c\` (main + dispatch)" driver/*.c driver/*.h
    145 
    146 cat <<EOF
    147 
    148 ### driver/cmd per-subcommand
    149 
    150 | Subcommand | Lines |
    151 |---|---:|
    152 EOF
    153 cloc --quiet --by-file --csv --include-lang=C,"C/C++ Header" driver/cmd 2>/dev/null \
    154   | awk -F, 'NF>=5 && $5 ~ /^[0-9]+$/ && $2 ~ /cmd/ {n=$2; sub(/.*\//,"",n); printf "%d\t%s\n", $5, n}' \
    155   | sort -rn \
    156   | while IFS=$'\t' read -r n name; do printf "| %s | %s |\n" "$name" "$(fmt "$n")"; done
    157 
    158 cat <<EOF
    159 
    160 ## Outside \`src/\`
    161 
    162 | Area | Lines |
    163 |---|---:|
    164 EOF
    165 trow "rt/ (freestanding runtime)" rt
    166 trow "vendor/ (monocypher, lz4)" vendor
    167 trow "include/kit (public headers)" include
    168 
    169 # --- Binary code size, from the release linker map (optional) --------------
    170 if [ -n "$MAP" ] && [ -f "$MAP" ]; then
    171   printf '\n'
    172   awk -f "$(dirname "$0")/relmap_md.awk" "$MAP"
    173 else
    174   cat <<EOF
    175 
    176 ## Binary code size (release \`kit\`)
    177 
    178 _Not generated: no linker map found. Run \`make code-size\` to relink the
    179 release binary with \`-Wl,-map\` and populate per-component compiled byte sizes._
    180 EOF
    181 fi
    182 } > "$OUT"
    183 
    184 [ "$OUT" != /dev/stdout ] && echo "wrote $OUT"