kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 6b2e04f0f93f4b32a33f92ffb37e565f412154e6
parent 578721239fdeb810d6b419713fad35b6f90a283f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed,  3 Jun 2026 08:36:44 -0700

doc: add binary code-size section from release linker map

code-size now relinks the release objects with -Wl,-map (explicit objects so
full paths survive dead_strip) and attributes each live symbol byte to a
component via scripts/relmap_md.awk, mirroring the source-line grouping.

Diffstat:
Mdoc/CODE_SIZE.md | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmk/maint.mk | 20++++++++++++++++++--
Mscripts/code_size.sh | 32+++++++++++++++++++++++++++++---
Ascripts/relmap_md.awk | 161+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 325 insertions(+), 5 deletions(-)

diff --git a/doc/CODE_SIZE.md b/doc/CODE_SIZE.md @@ -120,3 +120,120 @@ The full C compiler ≈ `lang/c` + `lang/cpp`, driving the shared | rt/ (freestanding runtime) | 5,167 | | vendor/ (monocypher, lz4) | 9,077 | | include/kit (public headers) | 2,935 | + +## Binary code size (release `kit`) + +Live `__TEXT`+`__DATA` symbol bytes attributed to each component from the +release binary's linker map (`-Wl,-map`, after `-dead_strip`). Total +attributed: **2,298,147 bytes** across 15,018 symbols' worth of objects. This is +compiled machine code + data, so the shape differs from source lines — +optimizer/codegen-heavy components weigh more here than their line counts suggest. + +### Arch backends + +| Component | Bytes | +|---|---:| +| aarch64 (`aa64`) | 143,665 | +| x86-64 (`x64`) | 112,572 | +| riscv64 (`rv64`) | 125,823 | +| WebAssembly (`wasm`) | 90,812 | +| C-source backend (`c_target`) | 62,105 | +| shared core | 9,588 | + +### Object formats + +| Component | Bytes | +|---|---:| +| ELF | 43,012 | +| Mach-O | 57,123 | +| COFF/PE | 30,166 | +| Wasm object | 1,883 | +| core (format-neutral) | 19,941 | + +### Linker + +| Component | Bytes | +|---|---:| +| linker (`src/link`) | 91,806 | + +### Frontends + +| Component | Bytes | +|---|---:| +| C compiler (`lang/c`) | 153,809 | +| toy | 107,884 | +| cpp (lexer + preprocessor) | 59,881 | +| wasm/WAT | 61,054 | + +### Other subsystems + +| Component | Bytes | +|---|---:| +| optimizer (`opt`) | 171,344 | +| codegen (`cg`) | 147,994 | +| api (composition) | 68,790 | +| debug/DWARF (`debug`) | 54,798 | +| interp (bytecode) | 30,907 | +| emu (guest-ELF) | 22,232 | +| asm (assembler) | 33,509 | +| core (arenas/maps/…) | 7,441 | +| abi (calling conv) | 8,671 | +| dbg (debugger) | 8,160 | +| os (emu syscalls) | 5,487 | +| jit | 224 | + +### dist split + +| Component | Bytes | +|---|---:| +| packaging (manifest/kpkg/tar/trust) | 21,297 | +| vendored compression (deflate/lz4) | 111,648 | +| CAS store (blob/tree/cas) | 9,677 | +| vendored crypto (blake2b/ed25519/b64/minisig) | 4,619 | + +### Driver + +| Component | Bytes | +|---|---:| +| cmd (all subcommands) | 221,077 | +| env (host adapters) | 22,258 | +| lib (shared support) | 28,197 | +| `main.c` (dispatch) | 5,101 | + +### driver/cmd per-subcommand + +| Component | Bytes | +|---|---:| +| dbg | 45,001 | +| cc | 30,583 | +| objdump | 26,017 | +| ld | 15,728 | +| run | 11,898 | +| pkg | 9,181 | +| xxd | 8,255 | +| objcopy | 7,887 | +| ar | 7,814 | +| compile | 6,303 | +| strip | 5,962 | +| size | 3,917 | +| nm | 3,741 | +| emu | 3,725 | +| cas | 3,696 | +| install | 3,365 | +| mc | 3,353 | +| as | 3,231 | +| compress | 3,108 | +| disas | 3,063 | +| cmp | 2,912 | +| strings | 2,828 | +| ranlib | 2,503 | +| cpp | 2,372 | +| addr2line | 2,361 | +| hash | 2,273 | + +### Vendor & unattributed + +| Component | Bytes | +|---|---:| +| vendor/ (monocypher, lz4) | 34,348 | +| (unattributed / linker synthesized) | 109,244 | diff --git a/mk/maint.mk b/mk/maint.mk @@ -32,9 +32,25 @@ bench-opt: $(MAKE) RELEASE=1 bin @KIT='$(abspath build/release/kit)' bash scripts/opt_bench.sh -# Regenerate doc/CODE_SIZE.md (per-component cloc line counts). Requires cloc. +# Regenerate doc/CODE_SIZE.md: per-component cloc line counts plus a binary +# code-size section attributed from the release linker map. Requires cloc. +# The map step relinks the release objects explicitly (not via libkit.a) so the +# full object paths survive in the map and dead_strip still prunes unused code. +RELMAP = build/release/kit.map +ifeq ($(HOST_OS),darwin) +RELMAP_LDFLAG = -Wl,-map,$(RELMAP) +else +RELMAP_LDFLAG = -Wl,-Map=$(RELMAP) +endif + code-size: - bash scripts/code_size.sh + $(MAKE) RELEASE=1 bin + $(MAKE) RELEASE=1 $(RELMAP) + bash scripts/code_size.sh $(RELMAP) + +# Release binary relinked with a linker map (consumed by code-size). +$(RELMAP): $(DRIVER_OBJS) $(LIB_OBJS) + $(CC) $(HOST_LDFLAGS) $(RELMAP_LDFLAG) -o $(BIN) $(DRIVER_OBJS) $(LIB_OBJS) $(HOST_LDLIBS) clean: rm -rf $(BUILD_DIR) diff --git a/scripts/code_size.sh b/scripts/code_size.sh @@ -3,8 +3,12 @@ # per-format/per-target code separated from shared core. Counts are code lines # only (C + headers, excluding comments and blanks) via cloc. # -# Usage: scripts/code_size.sh # rewrite doc/CODE_SIZE.md -# scripts/code_size.sh --stdout # print to stdout instead +# A second "Binary code size" section is appended when a linker map is passed +# (or found at build/release/kit.map): per-component compiled byte sizes +# attributed from the release binary's ld64 map. Generate the map with +# `make code-size`, which relinks the release objects with -Wl,-map. +# +# Usage: scripts/code_size.sh [--stdout] [path/to/kit.map] set -uo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" @@ -25,7 +29,15 @@ fmt() { printf "%s" "$1" | sed ':a;s/\B[0-9]\{3\}\>/,&/;ta'; } trow() { local label="$1"; shift; printf "| %s | %s |\n" "$label" "$(fmt "$(loc "$@")")"; } OUT="doc/CODE_SIZE.md" -[ "${1:-}" = "--stdout" ] && OUT=/dev/stdout +MAP="" +for arg in "$@"; do + case "$arg" in + --stdout) OUT=/dev/stdout ;; + *) MAP="$arg" ;; + esac +done +# Fall back to the conventional map location if none was passed. +[ -z "$MAP" ] && [ -f build/release/kit.map ] && MAP=build/release/kit.map DATE="$(date +%Y-%m-%d)" @@ -153,6 +165,20 @@ EOF trow "rt/ (freestanding runtime)" rt trow "vendor/ (monocypher, lz4)" vendor trow "include/kit (public headers)" include + +# --- Binary code size, from the release linker map (optional) -------------- +if [ -n "$MAP" ] && [ -f "$MAP" ]; then + printf '\n' + awk -f "$(dirname "$0")/relmap_md.awk" "$MAP" +else + cat <<EOF + +## Binary code size (release \`kit\`) + +_Not generated: no linker map found. Run \`make code-size\` to relink the +release binary with \`-Wl,-map\` and populate per-component compiled byte sizes._ +EOF +fi } > "$OUT" [ "$OUT" != /dev/stdout ] && echo "wrote $OUT" diff --git a/scripts/relmap_md.awk b/scripts/relmap_md.awk @@ -0,0 +1,161 @@ +# relmap_md.awk — turn an ld64 (-Wl,-map) linker map into the "Binary code +# size" markdown section of doc/CODE_SIZE.md. Attributes each live symbol's +# size to a component via its object-file path, mirroring the source-line +# grouping. BSD-awk compatible (no strtonum/gensub). +# +# Usage: awk -f scripts/relmap_md.awk build/release/kit.map +BEGIN { FS = "\t"; mode = 0 } + +function hex2dec(s, i, c, n, d) { + sub(/^0[xX]/, "", s); n = 0 + for (i = 1; i <= length(s); i++) { + c = substr(s, i, 1) + d = index("0123456789abcdef", tolower(c)) - 1 + if (d < 0) return 0 + n = n * 16 + d + } + return n +} + +# basename without directory or trailing .o +function stem(p, a, k) { k = split(p, a, "/"); s = a[k]; sub(/\.o$/, "", s); return s } + +# map an object-file path to a component key (also fills label[key]) +function classify(p, b) { + if (p ~ /\/lib\/arch\/aa64\//) { label["arch_aa64"]="aarch64 (`aa64`)"; return "arch_aa64" } + if (p ~ /\/lib\/arch\/x64\//) { label["arch_x64"]="x86-64 (`x64`)"; return "arch_x64" } + if (p ~ /\/lib\/arch\/rv64\//) { label["arch_rv64"]="riscv64 (`rv64`)"; return "arch_rv64" } + if (p ~ /\/lib\/arch\/wasm\//) { label["arch_wasm"]="WebAssembly (`wasm`)"; return "arch_wasm" } + if (p ~ /\/lib\/arch\/c_target\//) { label["arch_ct"]="C-source backend (`c_target`)"; return "arch_ct" } + if (p ~ /\/lib\/arch\//) { label["arch_core"]="shared core"; return "arch_core" } + + if (p ~ /\/lib\/obj\/elf\//) { label["obj_elf"]="ELF"; return "obj_elf" } + if (p ~ /\/lib\/obj\/macho\//) { label["obj_macho"]="Mach-O"; return "obj_macho" } + if (p ~ /\/lib\/obj\/coff\//) { label["obj_coff"]="COFF/PE"; return "obj_coff" } + if (p ~ /\/lib\/obj\/wasm\//) { label["obj_wasm"]="Wasm object"; return "obj_wasm" } + if (p ~ /\/lib\/obj\//) { label["obj_core"]="core (format-neutral)"; return "obj_core" } + + if (p ~ /\/lib\/link\//) { label["link"]="linker (`src/link`)"; return "link" } + + if (p ~ /\/lib\/dist\//) { + b = stem(p) + if (b ~ /^(manifest|kpkg|tar|trust|dist)$/) { label["dist_pkg"]="packaging (manifest/kpkg/tar/trust)"; return "dist_pkg" } + if (b ~ /^(deflate|lz4|lz4frame)$/) { label["dist_cmp"]="vendored compression (deflate/lz4)"; return "dist_cmp" } + if (b ~ /^(blob|tree|cas)$/) { label["dist_cas"]="CAS store (blob/tree/cas)"; return "dist_cas" } + if (b ~ /^(blake2b|ed25519|b64|minisig)$/) { label["dist_crypto"]="vendored crypto (blake2b/ed25519/b64/minisig)"; return "dist_crypto" } + label["dist_other"]="dist (other)"; return "dist_other" + } + + if (p ~ /\/lib\/opt\//) { label["sub_opt"]="optimizer (`opt`)"; return "sub_opt" } + if (p ~ /\/lib\/cg\//) { label["sub_cg"]="codegen (`cg`)"; return "sub_cg" } + if (p ~ /\/lib\/api\//) { label["sub_api"]="api (composition)"; return "sub_api" } + if (p ~ /\/lib\/debug\//) { label["sub_debug"]="debug/DWARF (`debug`)"; return "sub_debug" } + if (p ~ /\/lib\/interp\//) { label["sub_interp"]="interp (bytecode)"; return "sub_interp" } + if (p ~ /\/lib\/emu\//) { label["sub_emu"]="emu (guest-ELF)"; return "sub_emu" } + if (p ~ /\/lib\/asm\//) { label["sub_asm"]="asm (assembler)"; return "sub_asm" } + if (p ~ /\/lib\/core\//) { label["sub_core"]="core (arenas/maps/…)"; return "sub_core" } + if (p ~ /\/lib\/abi\//) { label["sub_abi"]="abi (calling conv)"; return "sub_abi" } + if (p ~ /\/lib\/dbg\//) { label["sub_dbg"]="dbg (debugger)"; return "sub_dbg" } + if (p ~ /\/lib\/os\//) { label["sub_os"]="os (emu syscalls)"; return "sub_os" } + if (p ~ /\/lib\/jit\//) { label["sub_jit"]="jit"; return "sub_jit" } + + if (p ~ /\/lang\/c\//) { label["lang_c"]="C compiler (`lang/c`)"; return "lang_c" } + if (p ~ /\/lang\/cpp\//) { label["lang_cpp"]="cpp (lexer + preprocessor)"; return "lang_cpp" } + if (p ~ /\/lang\/toy\//) { label["lang_toy"]="toy"; return "lang_toy" } + if (p ~ /\/lang\/wasm\//) { label["lang_wasm"]="wasm/WAT"; return "lang_wasm" } + + if (p ~ /\/driver\/cmd\//) { b = "cmd_" stem(p); label[b]=stem(p); cmdseen[b]=1; return b } + if (p ~ /\/driver\/env\//) { label["drv_env"]="env (host adapters)"; return "drv_env" } + if (p ~ /\/driver\/lib\//) { label["drv_lib"]="lib (shared support)"; return "drv_lib" } + if (p ~ /\/driver\/main\.o/) { label["drv_main"]="`main.c` (dispatch)"; return "drv_main" } + + if (p ~ /\/vendor\//) { label["vendor"]="vendor/ (monocypher, lz4)"; return "vendor" } + + label["other"]="(unattributed / linker synthesized)"; return "other" +} + +/^# Object files:/ { mode = 1; next } +/^# Sections:/ { mode = 2; next } +/^# Symbols:/ { mode = 3; next } +/^# Dead Stripped Symbols:/ { mode = 4 } + +mode == 1 { + if (match($0, /^\[[ ]*[0-9]+\]/)) { + idx = substr($0, RSTART, RLENGTH); gsub(/[^0-9]/, "", idx) + pathpart = substr($0, RSTART + RLENGTH + 1) + objpath[idx] = pathpart + } + next +} + +mode == 3 { + if ($1 ~ /^0[xX]/ && $2 ~ /^0[xX]/) { + if (match($3, /[0-9]+/)) { + idx = substr($3, RSTART, RLENGTH) + key = classify(objpath[idx]) + sz = hex2dec($2) + total[key] += sz + grand += sz + } + } + next +} + +# ---- formatting helpers ----------------------------------------------------- +function commafy(n, s, out, c, i) { + s = sprintf("%d", n); out = ""; c = 0 + for (i = length(s); i >= 1; i--) { + out = substr(s, i, 1) out; c++ + if (c % 3 == 0 && i > 1) out = "," out + } + return out +} +function row(key, l) { if (!(key in total)) return; l = (key in label) ? label[key] : key + printf "| %s | %s |\n", l, commafy(total[key]) } +function header(title) { printf "\n%s\n\n| Component | Bytes |\n|---|---:|\n", title } + +END { + print "## Binary code size (release `kit`)" + print "" + printf "Live `__TEXT`+`__DATA` symbol bytes attributed to each component from the\n" + printf "release binary's linker map (`-Wl,-map`, after `-dead_strip`). Total\n" + printf "attributed: **%s bytes** across %s symbols' worth of objects. This is\n", commafy(grand), commafy(0+NR) + printf "compiled machine code + data, so the shape differs from source lines —\n" + printf "optimizer/codegen-heavy components weigh more here than their line counts suggest.\n" + + header("### Arch backends") + row("arch_aa64"); row("arch_x64"); row("arch_rv64"); row("arch_wasm"); row("arch_ct"); row("arch_core") + + header("### Object formats") + row("obj_elf"); row("obj_macho"); row("obj_coff"); row("obj_wasm"); row("obj_core") + + header("### Linker") + row("link") + + header("### Frontends") + row("lang_c"); row("lang_toy"); row("lang_cpp"); row("lang_wasm") + + header("### Other subsystems") + row("sub_opt"); row("sub_cg"); row("sub_api"); row("sub_debug"); row("sub_interp") + row("sub_emu"); row("sub_asm"); row("sub_core"); row("sub_abi"); row("sub_dbg") + row("sub_os"); row("sub_jit") + + header("### dist split") + row("dist_pkg"); row("dist_cmp"); row("dist_cas"); row("dist_crypto"); row("dist_other") + + header("### Driver") + # aggregate + per-subcommand (descending) + ncmd = 0 + for (k in cmdseen) { ck[ncmd] = k; cv[ncmd] = total[k]; ncmd++ } + for (i = 0; i < ncmd; i++) for (j = i + 1; j < ncmd; j++) + if (cv[j] > cv[i]) { t = cv[i]; cv[i] = cv[j]; cv[j] = t; t = ck[i]; ck[i] = ck[j]; ck[j] = t } + cmdsum = 0; for (i = 0; i < ncmd; i++) cmdsum += cv[i] + printf "| cmd (all subcommands) | %s |\n", commafy(cmdsum) + row("drv_env"); row("drv_lib"); row("drv_main") + + header("### driver/cmd per-subcommand") + for (i = 0; i < ncmd; i++) printf "| %s | %s |\n", label[ck[i]], commafy(cv[i]) + + header("### Vendor & unattributed") + row("vendor"); row("other") +}