relmap_md.awk (7774B)
1 # relmap_md.awk — turn an ld64 (-Wl,-map) linker map into the "Binary code 2 # size" markdown section of doc/CODE_SIZE.md. Attributes each live symbol's 3 # size to a component via its object-file path, mirroring the source-line 4 # grouping. BSD-awk compatible (no strtonum/gensub). 5 # 6 # Usage: awk -f scripts/relmap_md.awk build/release/kit.map 7 BEGIN { FS = "\t"; mode = 0 } 8 9 function hex2dec(s, i, c, n, d) { 10 sub(/^0[xX]/, "", s); n = 0 11 for (i = 1; i <= length(s); i++) { 12 c = substr(s, i, 1) 13 d = index("0123456789abcdef", tolower(c)) - 1 14 if (d < 0) return 0 15 n = n * 16 + d 16 } 17 return n 18 } 19 20 # basename without directory or trailing .o 21 function stem(p, a, k) { k = split(p, a, "/"); s = a[k]; sub(/\.o$/, "", s); return s } 22 23 # map an object-file path to a component key (also fills label[key]) 24 function classify(p, b) { 25 if (p ~ /\/lib\/arch\/aa64\//) { label["arch_aa64"]="aarch64 (`aa64`)"; return "arch_aa64" } 26 if (p ~ /\/lib\/arch\/x64\//) { label["arch_x64"]="x86-64 (`x64`)"; return "arch_x64" } 27 if (p ~ /\/lib\/arch\/rv64\//) { label["arch_rv64"]="riscv64 (`rv64`)"; return "arch_rv64" } 28 if (p ~ /\/lib\/arch\/wasm\//) { label["arch_wasm"]="WebAssembly (`wasm`)"; return "arch_wasm" } 29 if (p ~ /\/lib\/arch\/c_target\//) { label["arch_ct"]="C-source backend (`c_target`)"; return "arch_ct" } 30 if (p ~ /\/lib\/arch\//) { label["arch_core"]="shared core"; return "arch_core" } 31 32 if (p ~ /\/lib\/obj\/elf\//) { label["obj_elf"]="ELF"; return "obj_elf" } 33 if (p ~ /\/lib\/obj\/macho\//) { label["obj_macho"]="Mach-O"; return "obj_macho" } 34 if (p ~ /\/lib\/obj\/coff\//) { label["obj_coff"]="COFF/PE"; return "obj_coff" } 35 if (p ~ /\/lib\/obj\/wasm\//) { label["obj_wasm"]="Wasm object"; return "obj_wasm" } 36 if (p ~ /\/lib\/obj\//) { label["obj_core"]="core (format-neutral)"; return "obj_core" } 37 38 if (p ~ /\/lib\/link\//) { label["link"]="linker (`src/link`)"; return "link" } 39 40 if (p ~ /\/lib\/dist\//) { 41 # object basenames are path-flattened (src/dist/manifest.c -> dist_manifest.o, 42 # see flatobjs in mk/lib_srcs.mk); strip the dir prefix to recover the file. 43 b = stem(p); sub(/^dist_/, "", b) 44 if (b ~ /^(manifest|kpkg|tar|trust|dist)$/) { label["dist_pkg"]="packaging (manifest/kpkg/tar/trust)"; return "dist_pkg" } 45 if (b ~ /^(deflate|lz4|lz4frame)$/) { label["dist_cmp"]="vendored compression (deflate/lz4)"; return "dist_cmp" } 46 if (b ~ /^(blob|tree|cas)$/) { label["dist_cas"]="CAS store (blob/tree/cas)"; return "dist_cas" } 47 if (b ~ /^(blake2b|ed25519|b64|minisig)$/) { label["dist_crypto"]="vendored crypto (blake2b/ed25519/b64/minisig)"; return "dist_crypto" } 48 label["dist_other"]="dist (other)"; return "dist_other" 49 } 50 51 if (p ~ /\/lib\/opt\//) { label["sub_opt"]="optimizer (`opt`)"; return "sub_opt" } 52 if (p ~ /\/lib\/cg\//) { label["sub_cg"]="codegen (`cg`)"; return "sub_cg" } 53 if (p ~ /\/lib\/api\//) { label["sub_api"]="api (composition)"; return "sub_api" } 54 if (p ~ /\/lib\/debug\//) { label["sub_debug"]="debug/DWARF (`debug`)"; return "sub_debug" } 55 if (p ~ /\/lib\/interp\//) { label["sub_interp"]="interp (bytecode)"; return "sub_interp" } 56 if (p ~ /\/lib\/emu\//) { label["sub_emu"]="emu (guest-ELF)"; return "sub_emu" } 57 if (p ~ /\/lib\/asm\//) { label["sub_asm"]="asm (assembler)"; return "sub_asm" } 58 if (p ~ /\/lib\/core\//) { label["sub_core"]="core (arenas/maps/…)"; return "sub_core" } 59 if (p ~ /\/lib\/abi\//) { label["sub_abi"]="abi (calling conv)"; return "sub_abi" } 60 if (p ~ /\/lib\/dbg\//) { label["sub_dbg"]="dbg (debugger)"; return "sub_dbg" } 61 if (p ~ /\/lib\/os\//) { label["sub_os"]="os (emu syscalls)"; return "sub_os" } 62 if (p ~ /\/lib\/jit\//) { label["sub_jit"]="jit"; return "sub_jit" } 63 64 if (p ~ /\/lang\/c\//) { label["lang_c"]="C compiler (`lang/c`)"; return "lang_c" } 65 if (p ~ /\/lang\/cpp\//) { label["lang_cpp"]="cpp (lexer + preprocessor)"; return "lang_cpp" } 66 if (p ~ /\/lang\/toy\//) { label["lang_toy"]="toy"; return "lang_toy" } 67 if (p ~ /\/lang\/wasm\//) { label["lang_wasm"]="wasm/WAT"; return "lang_wasm" } 68 69 if (p ~ /\/driver\/cmd\//) { b = "cmd_" stem(p); label[b]=stem(p); cmdseen[b]=1; return b } 70 if (p ~ /\/driver\/env\//) { label["drv_env"]="env (host adapters)"; return "drv_env" } 71 if (p ~ /\/driver\/lib\//) { label["drv_lib"]="lib (shared support)"; return "drv_lib" } 72 if (p ~ /\/driver\/main\.o/) { label["drv_main"]="`main.c` (dispatch)"; return "drv_main" } 73 74 if (p ~ /\/vendor\//) { label["vendor"]="vendor/ (monocypher, lz4)"; return "vendor" } 75 76 label["other"]="(unattributed / linker synthesized)"; return "other" 77 } 78 79 /^# Object files:/ { mode = 1; next } 80 /^# Sections:/ { mode = 2; next } 81 /^# Symbols:/ { mode = 3; next } 82 /^# Dead Stripped Symbols:/ { mode = 4 } 83 84 mode == 1 { 85 if (match($0, /^\[[ ]*[0-9]+\]/)) { 86 idx = substr($0, RSTART, RLENGTH); gsub(/[^0-9]/, "", idx) 87 pathpart = substr($0, RSTART + RLENGTH + 1) 88 objpath[idx] = pathpart 89 } 90 next 91 } 92 93 mode == 3 { 94 if ($1 ~ /^0[xX]/ && $2 ~ /^0[xX]/) { 95 if (match($3, /[0-9]+/)) { 96 idx = substr($3, RSTART, RLENGTH) 97 key = classify(objpath[idx]) 98 sz = hex2dec($2) 99 total[key] += sz 100 grand += sz 101 } 102 } 103 next 104 } 105 106 # ---- formatting helpers ----------------------------------------------------- 107 function commafy(n, s, out, c, i) { 108 s = sprintf("%d", n); out = ""; c = 0 109 for (i = length(s); i >= 1; i--) { 110 out = substr(s, i, 1) out; c++ 111 if (c % 3 == 0 && i > 1) out = "," out 112 } 113 return out 114 } 115 function row(key, l) { if (!(key in total)) return; l = (key in label) ? label[key] : key 116 printf "| %s | %s |\n", l, commafy(total[key]) } 117 function header(title) { printf "\n%s\n\n| Component | Bytes |\n|---|---:|\n", title } 118 119 END { 120 print "## Binary code size (release `kit`)" 121 print "" 122 printf "Live `__TEXT`+`__DATA` symbol bytes attributed to each component from the\n" 123 printf "release binary's linker map (`-Wl,-map`, after `-dead_strip`). Total\n" 124 printf "attributed: **%s bytes** across %s symbols' worth of objects. This is\n", commafy(grand), commafy(0+NR) 125 printf "compiled machine code + data, so the shape differs from source lines —\n" 126 printf "optimizer/codegen-heavy components weigh more here than their line counts suggest.\n" 127 128 header("### Arch backends") 129 row("arch_aa64"); row("arch_x64"); row("arch_rv64"); row("arch_wasm"); row("arch_ct"); row("arch_core") 130 131 header("### Object formats") 132 row("obj_elf"); row("obj_macho"); row("obj_coff"); row("obj_wasm"); row("obj_core") 133 134 header("### Linker") 135 row("link") 136 137 header("### Frontends") 138 row("lang_c"); row("lang_toy"); row("lang_cpp"); row("lang_wasm") 139 140 header("### Other subsystems") 141 row("sub_opt"); row("sub_cg"); row("sub_api"); row("sub_debug"); row("sub_interp") 142 row("sub_emu"); row("sub_asm"); row("sub_core"); row("sub_abi"); row("sub_dbg") 143 row("sub_os"); row("sub_jit") 144 145 header("### dist split") 146 row("dist_pkg"); row("dist_cmp"); row("dist_cas"); row("dist_crypto"); row("dist_other") 147 148 header("### Driver") 149 # aggregate + per-subcommand (descending) 150 ncmd = 0 151 for (k in cmdseen) { ck[ncmd] = k; cv[ncmd] = total[k]; ncmd++ } 152 for (i = 0; i < ncmd; i++) for (j = i + 1; j < ncmd; j++) 153 if (cv[j] > cv[i]) { t = cv[i]; cv[i] = cv[j]; cv[j] = t; t = ck[i]; ck[i] = ck[j]; ck[j] = t } 154 cmdsum = 0; for (i = 0; i < ncmd; i++) cmdsum += cv[i] 155 printf "| cmd (all subcommands) | %s |\n", commafy(cmdsum) 156 row("drv_env"); row("drv_lib"); row("drv_main") 157 158 header("### driver/cmd per-subcommand") 159 for (i = 0; i < ncmd; i++) printf "| %s | %s |\n", label[ck[i]], commafy(cv[i]) 160 161 header("### Vendor & unattributed") 162 row("vendor"); row("other") 163 }