kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

relmap_md.awk (7774B)


      1 # relmap_md.awk — turn an ld64 (-Wl,-map) linker map into the "Binary code
      2 # size" markdown section of doc/CODE_SIZE.md. Attributes each live symbol's
      3 # size to a component via its object-file path, mirroring the source-line
      4 # grouping. BSD-awk compatible (no strtonum/gensub).
      5 #
      6 # Usage: awk -f scripts/relmap_md.awk build/release/kit.map
      7 BEGIN { FS = "\t"; mode = 0 }
      8 
      9 function hex2dec(s,   i, c, n, d) {
     10   sub(/^0[xX]/, "", s); n = 0
     11   for (i = 1; i <= length(s); i++) {
     12     c = substr(s, i, 1)
     13     d = index("0123456789abcdef", tolower(c)) - 1
     14     if (d < 0) return 0
     15     n = n * 16 + d
     16   }
     17   return n
     18 }
     19 
     20 # basename without directory or trailing .o
     21 function stem(p,   a, k) { k = split(p, a, "/"); s = a[k]; sub(/\.o$/, "", s); return s }
     22 
     23 # map an object-file path to a component key (also fills label[key])
     24 function classify(p,   b) {
     25   if (p ~ /\/lib\/arch\/aa64\//)     { label["arch_aa64"]="aarch64 (`aa64`)"; return "arch_aa64" }
     26   if (p ~ /\/lib\/arch\/x64\//)      { label["arch_x64"]="x86-64 (`x64`)"; return "arch_x64" }
     27   if (p ~ /\/lib\/arch\/rv64\//)     { label["arch_rv64"]="riscv64 (`rv64`)"; return "arch_rv64" }
     28   if (p ~ /\/lib\/arch\/wasm\//)     { label["arch_wasm"]="WebAssembly (`wasm`)"; return "arch_wasm" }
     29   if (p ~ /\/lib\/arch\/c_target\//) { label["arch_ct"]="C-source backend (`c_target`)"; return "arch_ct" }
     30   if (p ~ /\/lib\/arch\//)           { label["arch_core"]="shared core"; return "arch_core" }
     31 
     32   if (p ~ /\/lib\/obj\/elf\//)   { label["obj_elf"]="ELF"; return "obj_elf" }
     33   if (p ~ /\/lib\/obj\/macho\//) { label["obj_macho"]="Mach-O"; return "obj_macho" }
     34   if (p ~ /\/lib\/obj\/coff\//)  { label["obj_coff"]="COFF/PE"; return "obj_coff" }
     35   if (p ~ /\/lib\/obj\/wasm\//)  { label["obj_wasm"]="Wasm object"; return "obj_wasm" }
     36   if (p ~ /\/lib\/obj\//)        { label["obj_core"]="core (format-neutral)"; return "obj_core" }
     37 
     38   if (p ~ /\/lib\/link\//) { label["link"]="linker (`src/link`)"; return "link" }
     39 
     40   if (p ~ /\/lib\/dist\//) {
     41     # object basenames are path-flattened (src/dist/manifest.c -> dist_manifest.o,
     42     # see flatobjs in mk/lib_srcs.mk); strip the dir prefix to recover the file.
     43     b = stem(p); sub(/^dist_/, "", b)
     44     if (b ~ /^(manifest|kpkg|tar|trust|dist)$/)    { label["dist_pkg"]="packaging (manifest/kpkg/tar/trust)"; return "dist_pkg" }
     45     if (b ~ /^(deflate|lz4|lz4frame)$/)            { label["dist_cmp"]="vendored compression (deflate/lz4)"; return "dist_cmp" }
     46     if (b ~ /^(blob|tree|cas)$/)                   { label["dist_cas"]="CAS store (blob/tree/cas)"; return "dist_cas" }
     47     if (b ~ /^(blake2b|ed25519|b64|minisig)$/)     { label["dist_crypto"]="vendored crypto (blake2b/ed25519/b64/minisig)"; return "dist_crypto" }
     48     label["dist_other"]="dist (other)"; return "dist_other"
     49   }
     50 
     51   if (p ~ /\/lib\/opt\//)    { label["sub_opt"]="optimizer (`opt`)"; return "sub_opt" }
     52   if (p ~ /\/lib\/cg\//)     { label["sub_cg"]="codegen (`cg`)"; return "sub_cg" }
     53   if (p ~ /\/lib\/api\//)    { label["sub_api"]="api (composition)"; return "sub_api" }
     54   if (p ~ /\/lib\/debug\//)  { label["sub_debug"]="debug/DWARF (`debug`)"; return "sub_debug" }
     55   if (p ~ /\/lib\/interp\//) { label["sub_interp"]="interp (bytecode)"; return "sub_interp" }
     56   if (p ~ /\/lib\/emu\//)    { label["sub_emu"]="emu (guest-ELF)"; return "sub_emu" }
     57   if (p ~ /\/lib\/asm\//)    { label["sub_asm"]="asm (assembler)"; return "sub_asm" }
     58   if (p ~ /\/lib\/core\//)   { label["sub_core"]="core (arenas/maps/…)"; return "sub_core" }
     59   if (p ~ /\/lib\/abi\//)    { label["sub_abi"]="abi (calling conv)"; return "sub_abi" }
     60   if (p ~ /\/lib\/dbg\//)    { label["sub_dbg"]="dbg (debugger)"; return "sub_dbg" }
     61   if (p ~ /\/lib\/os\//)     { label["sub_os"]="os (emu syscalls)"; return "sub_os" }
     62   if (p ~ /\/lib\/jit\//)    { label["sub_jit"]="jit"; return "sub_jit" }
     63 
     64   if (p ~ /\/lang\/c\//)    { label["lang_c"]="C compiler (`lang/c`)"; return "lang_c" }
     65   if (p ~ /\/lang\/cpp\//)  { label["lang_cpp"]="cpp (lexer + preprocessor)"; return "lang_cpp" }
     66   if (p ~ /\/lang\/toy\//)  { label["lang_toy"]="toy"; return "lang_toy" }
     67   if (p ~ /\/lang\/wasm\//) { label["lang_wasm"]="wasm/WAT"; return "lang_wasm" }
     68 
     69   if (p ~ /\/driver\/cmd\//) { b = "cmd_" stem(p); label[b]=stem(p); cmdseen[b]=1; return b }
     70   if (p ~ /\/driver\/env\//) { label["drv_env"]="env (host adapters)"; return "drv_env" }
     71   if (p ~ /\/driver\/lib\//) { label["drv_lib"]="lib (shared support)"; return "drv_lib" }
     72   if (p ~ /\/driver\/main\.o/) { label["drv_main"]="`main.c` (dispatch)"; return "drv_main" }
     73 
     74   if (p ~ /\/vendor\//) { label["vendor"]="vendor/ (monocypher, lz4)"; return "vendor" }
     75 
     76   label["other"]="(unattributed / linker synthesized)"; return "other"
     77 }
     78 
     79 /^# Object files:/ { mode = 1; next }
     80 /^# Sections:/     { mode = 2; next }
     81 /^# Symbols:/      { mode = 3; next }
     82 /^# Dead Stripped Symbols:/ { mode = 4 }
     83 
     84 mode == 1 {
     85   if (match($0, /^\[[ ]*[0-9]+\]/)) {
     86     idx = substr($0, RSTART, RLENGTH); gsub(/[^0-9]/, "", idx)
     87     pathpart = substr($0, RSTART + RLENGTH + 1)
     88     objpath[idx] = pathpart
     89   }
     90   next
     91 }
     92 
     93 mode == 3 {
     94   if ($1 ~ /^0[xX]/ && $2 ~ /^0[xX]/) {
     95     if (match($3, /[0-9]+/)) {
     96       idx = substr($3, RSTART, RLENGTH)
     97       key = classify(objpath[idx])
     98       sz = hex2dec($2)
     99       total[key] += sz
    100       grand += sz
    101     }
    102   }
    103   next
    104 }
    105 
    106 # ---- formatting helpers -----------------------------------------------------
    107 function commafy(n,   s, out, c, i) {
    108   s = sprintf("%d", n); out = ""; c = 0
    109   for (i = length(s); i >= 1; i--) {
    110     out = substr(s, i, 1) out; c++
    111     if (c % 3 == 0 && i > 1) out = "," out
    112   }
    113   return out
    114 }
    115 function row(key,   l) { if (!(key in total)) return; l = (key in label) ? label[key] : key
    116   printf "| %s | %s |\n", l, commafy(total[key]) }
    117 function header(title) { printf "\n%s\n\n| Component | Bytes |\n|---|---:|\n", title }
    118 
    119 END {
    120   print  "## Binary code size (release `kit`)"
    121   print  ""
    122   printf "Live `__TEXT`+`__DATA` symbol bytes attributed to each component from the\n"
    123   printf "release binary's linker map (`-Wl,-map`, after `-dead_strip`). Total\n"
    124   printf "attributed: **%s bytes** across %s symbols' worth of objects. This is\n", commafy(grand), commafy(0+NR)
    125   printf "compiled machine code + data, so the shape differs from source lines —\n"
    126   printf "optimizer/codegen-heavy components weigh more here than their line counts suggest.\n"
    127 
    128   header("### Arch backends")
    129   row("arch_aa64"); row("arch_x64"); row("arch_rv64"); row("arch_wasm"); row("arch_ct"); row("arch_core")
    130 
    131   header("### Object formats")
    132   row("obj_elf"); row("obj_macho"); row("obj_coff"); row("obj_wasm"); row("obj_core")
    133 
    134   header("### Linker")
    135   row("link")
    136 
    137   header("### Frontends")
    138   row("lang_c"); row("lang_toy"); row("lang_cpp"); row("lang_wasm")
    139 
    140   header("### Other subsystems")
    141   row("sub_opt"); row("sub_cg"); row("sub_api"); row("sub_debug"); row("sub_interp")
    142   row("sub_emu"); row("sub_asm"); row("sub_core"); row("sub_abi"); row("sub_dbg")
    143   row("sub_os"); row("sub_jit")
    144 
    145   header("### dist split")
    146   row("dist_pkg"); row("dist_cmp"); row("dist_cas"); row("dist_crypto"); row("dist_other")
    147 
    148   header("### Driver")
    149   # aggregate + per-subcommand (descending)
    150   ncmd = 0
    151   for (k in cmdseen) { ck[ncmd] = k; cv[ncmd] = total[k]; ncmd++ }
    152   for (i = 0; i < ncmd; i++) for (j = i + 1; j < ncmd; j++)
    153     if (cv[j] > cv[i]) { t = cv[i]; cv[i] = cv[j]; cv[j] = t; t = ck[i]; ck[i] = ck[j]; ck[j] = t }
    154   cmdsum = 0; for (i = 0; i < ncmd; i++) cmdsum += cv[i]
    155   printf "| cmd (all subcommands) | %s |\n", commafy(cmdsum)
    156   row("drv_env"); row("drv_lib"); row("drv_main")
    157 
    158   header("### driver/cmd per-subcommand")
    159   for (i = 0; i < ncmd; i++) printf "| %s | %s |\n", label[ck[i]], commafy(cv[i])
    160 
    161   header("### Vendor & unattributed")
    162   row("vendor"); row("other")
    163 }