commit 5c810d428f71b2b544e9fa4eb524eae88c838895
parent 30273099f630eb2e8f787c413fc863ad2e44edfa
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 24 Apr 2026 09:51:49 -0700
Add pokem, a P1 raw-byte file patcher
pokem overlays ASCII-hex-decoded bytes onto a file at a given decimal
offset. The primary use case is patching ELF header fields after
assembly — e.g. bumping p_memsz to reserve a BSS region without
emitting literal zero bytes in the source.
Implementation notes:
- Single fd with O_RDWR; advances position via read-into-scratch, then
writes in place. No in-memory file buffer, so pokem itself is ~1.8 KB
and handles files of any size.
- Bootstrap-tool posture: no input validation, no syscall error
checking. Branchless hex_nibble decode via (c & 15) + 9 * (c >> 6)
inlined into parse_hex_bytes, which is a leaf with no BSS spill.
- Only uses the P1 common syscall surface (openat, read, write, close,
exit).
Diffstat:
| A | pokem/pokem.M1 | | | 262 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 262 insertions(+), 0 deletions(-)
diff --git a/pokem/pokem.M1 b/pokem/pokem.M1
@@ -0,0 +1,262 @@
+## pokem.M1 — raw-byte file patcher, P1.
+##
+## Runtime shape: pokem file offset hex_bytes
+##
+## Opens `file` with O_RDWR, advances the file position by `offset`
+## bytes via discard reads, then writes the ASCII-hex-decoded payload
+## bytes in place. Primary use case is patching ELF header fields
+## after assembly — e.g. bumping p_memsz to reserve a BSS region
+## without emitting literal zero bytes in the source.
+##
+## Arguments:
+## file path to the file to patch in place.
+## offset byte offset, decimal ASCII, unsigned.
+## hex_bytes 2*N ASCII hex nibbles (upper or lower case). Each pair
+## becomes one raw byte. Empty string is a no-op.
+##
+## Bootstrap-tool posture: no input validation and no syscall error
+## checking. Callers are expected to pass well-formed inputs, and the
+## tool is invoked by build scripts, not end users. Bad input produces
+## either a bogus patch, a crash, or a silently extended file; we trade
+## defensive code for auditability.
+##
+## Pipeline:
+## _start stash argv; decode offset (parse_u64) and
+## payload (parse_hex_bytes into patch_buf);
+## openat O_RDWR; advance via read into scratch_buf
+## until offset bytes consumed; write patch_buf;
+## close; exit(0).
+## parse_u64 leaf. Decimal ASCII -> u64 in a0.
+## parse_hex_bytes leaf. ASCII hex -> patch_buf; returns byte count
+## in a0. Uses a branchless nibble decode that is
+## correct for [0-9A-Fa-f] and garbage otherwise.
+##
+## P1 ABI: a0..a3 arg/return, t0..t2 caller-saved temps, s0..s3
+## callee-saved (unused here). _start has no frame; the kernel-supplied
+## SP carries argv/argc directly via portable-offset 0/8/16
+## (argv[1..3]).
+
+## --- Constants & sizing ------------------------------------------------------
+
+## patch_buf cap: 256 bytes. ELF-header patch fields are 4 or 8 bytes;
+## this leaves generous slack for longer fixups.
+DEFINE POKEM_PATCH_CAP 0001000000000000
+
+## scratch_buf cap: 256 bytes. Used to absorb offset bytes before the
+## write. Each advance iteration reads up to this many to amortize
+## syscall cost on large offsets.
+DEFINE POKEM_SCRATCH_CAP 0001000000000000
+
+## openat flags. O_RDWR = 2 so a single fd handles both the position-
+## advance reads and the in-place write.
+DEFINE O_RDWR 0200000000000000
+DEFINE AT_FDCWD 9CFFFFFFFFFFFFFF
+
+DEFINE ZERO32 '0000000000000000000000000000000000000000000000000000000000000000'
+DEFINE ZERO8 '0000000000000000'
+
+## --- Runtime shell: argv, open, advance, write, exit -------------------------
+
+:_start
+ # Stash argv[1..3] pointers so downstream code doesn't need sp,off.
+ # file_path = argv[1] (native sp+16 == portable sp+0)
+ ld_t0,sp,0
+ la_a1 &file_path
+ st_t0,a1,0
+ # offset_str = argv[2]
+ ld_t0,sp,8
+ la_a1 &offset_str
+ st_t0,a1,0
+ # hex_str = argv[3]
+ ld_t0,sp,16
+ la_a1 &hex_str
+ st_t0,a1,0
+
+ # offset = parse_u64(offset_str)
+ la_a0 &offset_str
+ ld_a0,a0,0
+ la_br &parse_u64
+ call
+ la_a1 &offset_val
+ st_a0,a1,0
+
+ # patch_len = parse_hex_bytes(hex_str) — bytes land in patch_buf
+ la_a0 &hex_str
+ ld_a0,a0,0
+ la_br &parse_hex_bytes
+ call
+ la_a1 &patch_len
+ st_a0,a1,0
+
+ # fd = openat(AT_FDCWD, file_path, O_RDWR, 0)
+ la_a0 &file_path
+ ld_a2,a0,0
+ li_a0 sys_openat
+ li_a1 AT_FDCWD
+ li_a3 O_RDWR
+ li_t0 %0 %0
+ syscall
+ la_a1 &file_fd
+ st_a0,a1,0
+
+ # Advance the file position by offset bytes via discard reads into
+ # scratch_buf. Each iteration reads min(remaining, SCRATCH_CAP).
+ la_a0 &offset_val
+ ld_t0,a0,0 # t0 = remaining
+
+:advance_loop
+ la_br &advance_done
+ beqz_t0
+
+ # chunk = min(remaining, SCRATCH_CAP) -> a3
+ mov_a3,t0 # default: chunk = remaining
+ li_t1 POKEM_SCRATCH_CAP
+ la_br &advance_do_read
+ bltu_t0,t1 # if remaining < cap, keep a3 = remaining
+ mov_a3,t1 # else chunk = cap
+
+:advance_do_read
+ la_a0 &file_fd
+ ld_a1,a0,0
+ la_a2 &scratch_buf
+ li_a0 sys_read
+ syscall
+
+ # remaining -= n (short read with n == 0 loops forever; callers
+ # must pass offset <= file_size)
+ sub_t0,t0,a0
+ la_br &advance_loop
+ b
+
+:advance_done
+ # Write patch_buf (patch_len bytes) at the current position.
+ la_a0 &patch_len
+ ld_t0,a0,0 # t0 = patch_len (total)
+ li_t1 %0 %0 # t1 = written
+
+:write_loop
+ la_br &write_done
+ beq_t1,t0
+
+ # n = write(fd, patch_buf + written, patch_len - written)
+ la_a0 &file_fd
+ ld_a1,a0,0
+ la_a2 &patch_buf
+ add_a2,a2,t1
+ sub_a3,t0,t1
+ li_a0 sys_write
+ syscall
+
+ # written += n
+ add_t1,t1,a0
+ la_br &write_loop
+ b
+
+:write_done
+ # close(fd); exit(0)
+ la_a0 &file_fd
+ ld_a1,a0,0
+ li_a0 sys_close
+ syscall
+
+ li_a0 sys_exit
+ li_a1 %0 %0
+ syscall
+
+## --- parse_u64(a0=str) -> a0=value --------------------------------------------
+## Leaf. Decimal ASCII -> u64. Empty string returns 0. Non-digit bytes
+## produce garbage — caller's responsibility to pass a digit-only,
+## NUL-terminated string.
+
+:parse_u64
+ mov_t0,a0 # cursor
+ li_t1 %0 %0 # result = 0
+ li_a3 %10 %0 # constant multiplier
+
+:parse_u64_loop
+ lb_a1,t0,0 # c
+ la_br &parse_u64_done
+ beqz_a1
+
+ # result = result * 10 + (c - 48)
+ mul_t1,t1,a3
+ addi_a1,a1,neg48
+ add_t1,t1,a1
+
+ addi_t0,t0,1
+ la_br &parse_u64_loop
+ b
+
+:parse_u64_done
+ mov_a0,t1
+ ret
+
+## --- parse_hex_bytes(a0=str) -> a0=byte_count ---------------------------------
+## Leaf. Walks the ASCII hex string two chars at a time into patch_buf.
+## Empty input returns 0. Odd length or non-hex characters produce
+## garbage bytes; no validation.
+##
+## Branchless nibble decode: for any c in [0-9A-Fa-f],
+## nibble = (c & 15) + 9 * (c >> 6)
+## Digits (48..57) have bit 6 clear, so `9 * (c>>6)` is 0 and
+## `c & 15` is the value. Letters (65..70 / 97..102) have bit 6 set,
+## so we add 9 to the low nibble (1..6) to land on 10..15.
+
+:parse_hex_bytes
+ mov_t0,a0 # cursor
+ la_t1 &patch_buf # dst base
+ li_t2 %0 %0 # count
+
+:phb_loop
+ lb_a0,t0,0 # c0 = *cursor
+ la_br &phb_done
+ beqz_a0
+
+ lb_a1,t0,1 # c1 = *(cursor+1)
+ li_a3 %9 %0
+
+ # hi = (c0 & 15) + 9 * (c0 >> 6)
+ andi_a2,a0,15
+ shri_a0,a0,6
+ mul_a0,a0,a3
+ add_a0,a2,a0
+
+ # lo = (c1 & 15) + 9 * (c1 >> 6)
+ andi_a2,a1,15
+ shri_a1,a1,6
+ mul_a1,a1,a3
+ add_a1,a2,a1
+
+ # byte = (hi << 4) | lo; patch_buf[count] = byte
+ shli_a0,a0,4
+ or_a0,a0,a1
+ add_a2,t1,t2
+ sb_a0,a2,0
+
+ addi_t2,t2,1 # count += 1
+ addi_t0,t0,2 # cursor += 2
+ la_br &phb_loop
+ b
+
+:phb_done
+ mov_a0,t2
+ ret
+
+## --- BSS ---------------------------------------------------------------------
+
+:file_path ZERO8
+:offset_str ZERO8
+:hex_str ZERO8
+:offset_val ZERO8
+:patch_len ZERO8
+:file_fd ZERO8
+
+## scratch_buf (POKEM_SCRATCH_CAP bytes) — discard-read target.
+:scratch_buf
+ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32
+
+## patch_buf (POKEM_PATCH_CAP bytes) — decoded payload.
+:patch_buf
+ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32
+
+:ELF_end