commit b67efec9e6f40e94adf984d9e31165e297b6f1e7
parent 518c5c343c649db0492d762e55d24c36b6df8a32
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 29 Apr 2026 16:42:32 -0700
scripts/disasm-elf.sh
Diffstat:
1 file changed, 75 insertions(+), 0 deletions(-)
diff --git a/scripts/disasm-elf.sh b/scripts/disasm-elf.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+## disasm-elf.sh — disassemble a hex2-emitted ELF with llvm-objdump.
+##
+## Our seed ELF.hex2 sets ph_memsz to 512 MB (so the BSS region past
+## ELF_end is mappable), but ph_filesz is just the on-disk size.
+## llvm-objdump trusts memsz when laying out the segment for
+## disassembly and runs off the end of the file with
+## "The end of the file was unexpectedly encountered". The seed ELF
+## also lacks section headers, so --start-address/--stop-address
+## doesn't help on its own.
+##
+## Workaround: copy the ELF, patch ph_memsz down to ph_filesz, then
+## disassemble. Output goes to stdout.
+##
+## We also auto-default --start-address to e_entry so the ELF header +
+## program header bytes at the top of PT_LOAD aren't decoded as bogus
+## instructions. Pass an explicit --start-address (e.g. 0x600000) to
+## override and see the header bytes.
+##
+## Usage: disasm-elf.sh <elf> [llvm-objdump args...]
+## defaults to `-d` (text only). For data + text, pass `-D`.
+
+set -eu
+
+[ "$#" -ge 1 ] || { echo "usage: $0 <elf> [llvm-objdump args...]" >&2; exit 2; }
+
+ELF=$1; shift
+[ -e "$ELF" ] || { echo "missing $ELF" >&2; exit 1; }
+
+OBJDUMP=${LLVM_OBJDUMP:-llvm-objdump}
+TRIPLE=${TRIPLE:-aarch64-linux-gnu}
+
+# Extract ph_filesz from the first program header (only one in our
+# layout; e_phoff = 0x40, ph_filesz at offset 0x20 inside it = 0x60,
+# ph_memsz at 0x28 = 0x68). Both little-endian 8-byte.
+read_le8() {
+ od -An -tu8 -N8 -j"$2" "$1" | tr -d ' \n'
+}
+write_le8() {
+ # $1 file, $2 offset, $3 value
+ printf '%016x' "$3" \
+ | sed 's/\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)/\8\7\6\5\4\3\2\1/' \
+ | xxd -r -p \
+ | dd of="$1" bs=1 seek="$2" count=8 conv=notrunc status=none
+}
+
+ENTRY=$(read_le8 "$ELF" 24)
+FILESZ=$(read_le8 "$ELF" 96)
+MEMSZ=$(read_le8 "$ELF" 104)
+
+TMP=$(mktemp -t disasm-elf.XXXXXX)
+trap 'rm -f "$TMP"' EXIT
+cp "$ELF" "$TMP"
+chmod u+w "$TMP"
+
+if [ "$MEMSZ" != "$FILESZ" ]; then
+ write_le8 "$TMP" 104 "$FILESZ"
+fi
+
+# Default to -d if no objdump flags given.
+[ "$#" -eq 0 ] && set -- -d
+
+# Auto-skip the ELF header + program header by defaulting
+# --start-address to e_entry, unless the user supplied their own.
+have_start=0
+for arg in "$@"; do
+ case "$arg" in
+ --start-address=*|--start-address) have_start=1; break;;
+ esac
+done
+if [ "$have_start" -eq 0 ]; then
+ set -- "--start-address=0x$(printf '%x' "$ENTRY")" "$@"
+fi
+
+exec "$OBJDUMP" --triple="$TRIPLE" "$@" "$TMP"