boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 27dba8a270502627d69babede9b2112837600836
parent d0d9ec33dc17c2c446a8727ab09149cad0997fcc
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 16:27:25 -0700

hex2pp: single-input CLI + .ptrsize directive; P1 table prune

hex2pp.c / hex2pp.P1: drop multi-file -f loop in favour of one IN/OUT
positional pair (callers catm upstream). Add .ptrsize directive so &/%
sigil width is configurable per run (default 4, 8 for 64-bit images).
Short-flag CLI (-B/-E/-e/-b/-N) replaces the long-flag form.

P1-{aarch64,amd64,riscv64}.M1: regenerate after pruning unused entries
and adding the small set referenced by recent expansion paths.

docs/HEX2pp.md, scripts/boot-run-tests.sh: re-document the CLI shape
and drop the m1pp suite's redundant hex2pp smoke step (the p1 / cc-*
suites already exercise the assemble path against complete programs).

Diffstat:
MP1/P1-aarch64.M1 | 25+++++++------------------
MP1/P1-amd64.M1 | 25+++++++------------------
MP1/P1-riscv64.M1 | 25+++++++------------------
Mdocs/HEX2pp.md | 91++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mhex2pp/hex2pp.P1 | 323++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Mhex2pp/hex2pp.c | 118++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mscripts/boot-run-tests.sh | 30++++--------------------------
7 files changed, 335 insertions(+), 302 deletions(-)

diff --git a/P1/P1-aarch64.M1 b/P1/P1-aarch64.M1 @@ -23,6 +23,7 @@ DEFINE la_br 5100001802000014 ## ---- Moves DEFINE mov_a0,a1 E00301AA +DEFINE mov_a0,a2 E00302AA DEFINE mov_a0,a3 E00303AA DEFINE mov_a0,t0 E00309AA DEFINE mov_a0,t1 E0030AAA @@ -39,11 +40,10 @@ DEFINE mov_t2,a0 EB0300AA DEFINE mov_t2,t1 EB030AAA ## ---- Register Arithmetic -DEFINE add_a0,a0,a1 0000018B DEFINE add_a0,a0,a2 0000028B +DEFINE add_a0,a0,a3 0000038B DEFINE add_a0,a0,t0 0000098B DEFINE add_a0,a0,t1 00000A8B -DEFINE add_a0,a0,t2 00000B8B DEFINE add_a0,a2,a0 4000008B DEFINE add_a0,a2,t2 40000B8B DEFINE add_a0,t0,a3 2001038B @@ -57,7 +57,6 @@ DEFINE add_a1,a1,a2 2100028B DEFINE add_a1,a1,a3 2100038B DEFINE add_a1,a1,t0 2100098B DEFINE add_a1,a1,t1 21000A8B -DEFINE add_a1,a2,a3 4100038B DEFINE add_a1,a2,t0 4100098B DEFINE add_a1,a3,a1 6100018B DEFINE add_a1,t0,a0 2101008B @@ -67,9 +66,11 @@ DEFINE add_a1,t2,t1 61010A8B DEFINE add_a2,a1,a3 2200038B DEFINE add_a2,a1,t0 2200098B DEFINE add_a2,a2,a0 4200008B +DEFINE add_a2,a2,a1 4200018B DEFINE add_a2,a2,a3 4200038B DEFINE add_a2,a2,t0 4200098B DEFINE add_a2,a2,t1 42000A8B +DEFINE add_a2,a2,t2 42000B8B DEFINE add_a2,a3,a2 6200028B DEFINE add_a2,t0,t1 22010A8B DEFINE add_a2,t2,a0 6201008B @@ -100,7 +101,6 @@ DEFINE add_t1,t1,a2 4A01028B DEFINE add_t1,t1,t2 4A010B8B DEFINE add_t2,a0,t0 0B00098B DEFINE add_t2,a0,t1 0B000A8B -DEFINE add_t2,a1,a3 2B00038B DEFINE add_t2,a1,t2 2B000B8B DEFINE add_t2,a2,t1 4B000A8B DEFINE add_t2,t0,t1 2B010A8B @@ -113,11 +113,11 @@ DEFINE sub_a0,a1,t2 20000BCB DEFINE sub_a0,a3,a0 600000CB DEFINE sub_a1,t0,a0 210100CB DEFINE sub_a2,a1,a0 220000CB +DEFINE sub_a2,a1,a3 220003CB DEFINE sub_a2,a2,a2 420002CB DEFINE sub_a2,a2,t0 420009CB DEFINE sub_a2,t0,t1 22010ACB DEFINE sub_a2,t2,a3 620103CB -DEFINE sub_a3,a3,a1 630001CB DEFINE sub_a3,a3,a2 630002CB DEFINE sub_a3,t0,a2 230102CB DEFINE sub_a3,t0,a3 230103CB @@ -138,10 +138,8 @@ DEFINE shl_a2,a2,a3 4220C39A DEFINE sar_a2,a2,a3 4228C39A DEFINE mul_a0,a0,a3 007C039B DEFINE mul_a0,t1,t2 407D0B9B -DEFINE mul_a2,a2,t0 427C099B DEFINE mul_a3,a3,a2 637C029B DEFINE mul_t0,t0,a1 297D019B -DEFINE mul_t0,t0,a2 297D029B DEFINE mul_t2,t0,a2 2B7D029B DEFINE div_a0,a0,a1 000CC19A DEFINE div_a2,a2,a3 420CC39A @@ -239,11 +237,9 @@ DEFINE ld_a1,t1,24 410D40F9 DEFINE ld_a1,t2,16 610940F9 DEFINE ld_a1,sp,8 E10F40F9 DEFINE ld_a2,a0,0 020040F9 -DEFINE ld_a2,a0,8 020440F9 DEFINE ld_a2,a0,16 020840F9 DEFINE ld_a2,a0,24 020C40F9 DEFINE ld_a2,a1,0 220040F9 -DEFINE ld_a2,a1,8 220440F9 DEFINE ld_a2,a2,0 420040F9 DEFINE ld_a2,t0,0 220140F9 DEFINE ld_a2,t0,8 220540F9 @@ -254,7 +250,6 @@ DEFINE ld_a2,t1,0 420140F9 DEFINE ld_a2,t2,0 620140F9 DEFINE ld_a2,sp,16 E21340F9 DEFINE ld_a3,a0,0 030040F9 -DEFINE ld_a3,a0,8 030440F9 DEFINE ld_a3,a0,16 030840F9 DEFINE ld_a3,a1,0 230040F9 DEFINE ld_a3,a1,8 230440F9 @@ -274,8 +269,6 @@ DEFINE ld_t0,a1,16 290840F9 DEFINE ld_t0,a1,32 291040F9 DEFINE ld_t0,a2,0 490040F9 DEFINE ld_t0,t0,0 290140F9 -DEFINE ld_t0,t1,8 490540F9 -DEFINE ld_t0,t1,16 490940F9 DEFINE ld_t0,t2,0 690140F9 DEFINE ld_t0,t2,16 690940F9 DEFINE ld_t0,sp,0 E90B40F9 @@ -357,7 +350,6 @@ DEFINE st_a2,t2,0 620100F9 DEFINE st_a3,a0,0 030000F9 DEFINE st_a3,a1,0 230000F9 DEFINE st_a3,a2,0 430000F9 -DEFINE st_a3,t0,0 230100F9 DEFINE st_a3,t0,24 230D00F9 DEFINE st_a3,t1,8 430500F9 DEFINE st_a3,t2,0 630100F9 @@ -365,6 +357,7 @@ DEFINE st_a3,t2,8 630500F9 DEFINE st_a3,t2,16 630900F9 DEFINE st_a3,t2,24 630D00F9 DEFINE st_t0,a0,0 090000F9 +DEFINE st_t0,a0,8 090400F9 DEFINE st_t0,a0,16 090800F9 DEFINE st_t0,a0,24 090C00F9 DEFINE st_t0,a1,0 290000F9 @@ -402,12 +395,12 @@ DEFINE lb_a1,a3,1 61044039 DEFINE lb_a2,a1,0 22004039 DEFINE lb_a2,a2,0 42004039 DEFINE lb_a2,t0,0 22014039 +DEFINE lb_a3,a0,0 03004039 DEFINE lb_a3,a1,0 23004039 DEFINE lb_a3,a2,0 43004039 DEFINE lb_a3,a3,0 63004039 DEFINE lb_t0,a3,0 69004039 DEFINE lb_t0,a3,1 69044039 -DEFINE lb_t0,a3,2 69084039 DEFINE lb_t0,t0,0 29014039 DEFINE lb_t1,t1,0 4A014039 DEFINE lb_t2,t0,0 2B014039 @@ -417,7 +410,6 @@ DEFINE sb_a0,a2,0 40000039 DEFINE sb_a0,t2,0 60010039 DEFINE sb_a1,a2,0 41000039 DEFINE sb_a1,t0,0 21010039 -DEFINE sb_a2,a0,0 02000039 DEFINE sb_a2,a1,0 22000039 DEFINE sb_a2,a3,0 62000039 DEFINE sb_a2,t2,0 62010039 @@ -427,7 +419,6 @@ DEFINE sb_a3,a2,0 43000039 DEFINE sb_a3,t2,0 63010039 DEFINE sb_t1,a0,0 0A000039 DEFINE sb_t1,a2,0 4A000039 -DEFINE sb_t2,a0,0 0B000039 DEFINE sb_t2,a1,0 2B000039 DEFINE sb_t2,a2,0 4B000039 @@ -455,7 +446,6 @@ DEFINE beq_t1,a1 5F0101EB4100005420021FD6 DEFINE beq_t1,a2 5F0102EB4100005420021FD6 DEFINE beq_t1,t0 5F0109EB4100005420021FD6 DEFINE beq_t1,t2 5F010BEB4100005420021FD6 -DEFINE beq_t2,a0 7F0100EB4100005420021FD6 DEFINE beq_t2,a2 7F0102EB4100005420021FD6 DEFINE beq_t2,a3 7F0103EB4100005420021FD6 DEFINE beq_t2,t1 7F010AEB4100005420021FD6 @@ -484,7 +474,6 @@ DEFINE blt_a2,a1 5F0001EB4A00005420021FD6 DEFINE blt_a2,a3 5F0003EB4A00005420021FD6 DEFINE blt_a2,t0 5F0009EB4A00005420021FD6 DEFINE blt_a2,t1 5F000AEB4A00005420021FD6 -DEFINE blt_a2,t2 5F000BEB4A00005420021FD6 DEFINE blt_a3,a2 7F0002EB4A00005420021FD6 DEFINE blt_a3,t2 7F000BEB4A00005420021FD6 DEFINE blt_t0,t1 3F010AEB4A00005420021FD6 diff --git a/P1/P1-amd64.M1 b/P1/P1-amd64.M1 @@ -23,6 +23,7 @@ DEFINE la_br 41BF ## ---- Moves DEFINE mov_a0,a1 4889F7 +DEFINE mov_a0,a2 4889D7 DEFINE mov_a0,a3 4889CF DEFINE mov_a0,t0 4C89D7 DEFINE mov_a0,t1 4C89DF @@ -39,11 +40,10 @@ DEFINE mov_t2,a0 4989F8 DEFINE mov_t2,t1 4D89D8 ## ---- Register Arithmetic -DEFINE add_a0,a0,a1 4889FF4801F7 DEFINE add_a0,a0,a2 4889FF4801D7 +DEFINE add_a0,a0,a3 4889FF4801CF DEFINE add_a0,a0,t0 4889FF4C01D7 DEFINE add_a0,a0,t1 4889FF4C01DF -DEFINE add_a0,a0,t2 4889FF4C01C7 DEFINE add_a0,a2,a0 4989F94889D74C01CF DEFINE add_a0,a2,t2 4889D74C01C7 DEFINE add_a0,t0,a3 4C89D74801CF @@ -57,7 +57,6 @@ DEFINE add_a1,a1,a2 4889F64801D6 DEFINE add_a1,a1,a3 4889F64801CE DEFINE add_a1,a1,t0 4889F64C01D6 DEFINE add_a1,a1,t1 4889F64C01DE -DEFINE add_a1,a2,a3 4889D64801CE DEFINE add_a1,a2,t0 4889D64C01D6 DEFINE add_a1,a3,a1 4989F14889CE4C01CE DEFINE add_a1,t0,a0 4C89D64801FE @@ -67,9 +66,11 @@ DEFINE add_a1,t2,t1 4C89C64C01DE DEFINE add_a2,a1,a3 4889F24801CA DEFINE add_a2,a1,t0 4889F24C01D2 DEFINE add_a2,a2,a0 4889D24801FA +DEFINE add_a2,a2,a1 4889D24801F2 DEFINE add_a2,a2,a3 4889D24801CA DEFINE add_a2,a2,t0 4889D24C01D2 DEFINE add_a2,a2,t1 4889D24C01DA +DEFINE add_a2,a2,t2 4889D24C01C2 DEFINE add_a2,a3,a2 4989D14889CA4C01CA DEFINE add_a2,t0,t1 4C89D24C01DA DEFINE add_a2,t2,a0 4C89C24801FA @@ -100,7 +101,6 @@ DEFINE add_t1,t1,a2 4D89DB4901D3 DEFINE add_t1,t1,t2 4D89DB4D01C3 DEFINE add_t2,a0,t0 4989F84D01D0 DEFINE add_t2,a0,t1 4989F84D01D8 -DEFINE add_t2,a1,a3 4989F04901C8 DEFINE add_t2,a1,t2 4D89C14989F04D01C8 DEFINE add_t2,a2,t1 4989D04D01D8 DEFINE add_t2,t0,t1 4D89D04D01D8 @@ -113,11 +113,11 @@ DEFINE sub_a0,a1,t2 4889F74C29C7 DEFINE sub_a0,a3,a0 4989F94889CF4C29CF DEFINE sub_a1,t0,a0 4C89D64829FE DEFINE sub_a2,a1,a0 4889F24829FA +DEFINE sub_a2,a1,a3 4889F24829CA DEFINE sub_a2,a2,a2 4989D14889D24C29CA DEFINE sub_a2,a2,t0 4889D24C29D2 DEFINE sub_a2,t0,t1 4C89D24C29DA DEFINE sub_a2,t2,a3 4C89C24829CA -DEFINE sub_a3,a3,a1 4889C94829F1 DEFINE sub_a3,a3,a2 4889C94829D1 DEFINE sub_a3,t0,a2 4C89D14829D1 DEFINE sub_a3,t0,a3 4989C94C89D14C29C9 @@ -138,10 +138,8 @@ DEFINE shl_a2,a2,a3 4889CD4989D14889C949D3E14889E94C89CA DEFINE sar_a2,a2,a3 4889CD4989D14889C949D3F94889E94C89CA DEFINE mul_a0,a0,a3 4889FF480FAFF9 DEFINE mul_a0,t1,t2 4C89DF490FAFF8 -DEFINE mul_a2,a2,t0 4889D2490FAFD2 DEFINE mul_a3,a3,a2 4889C9480FAFCA DEFINE mul_t0,t0,a1 4D89D24C0FAFD6 -DEFINE mul_t0,t0,a2 4D89D24C0FAFD2 DEFINE mul_t2,t0,a2 4D89D04C0FAFC2 DEFINE div_a0,a0,a1 4889D54989F14889F8489949F7F94889EA4889C7 DEFINE div_a2,a2,a3 4889D54989C94889D0489949F7F94889EA4889C2 @@ -239,11 +237,9 @@ DEFINE ld_a1,t1,24 498B7318 DEFINE ld_a1,t2,16 498B7010 DEFINE ld_a1,sp,8 488B742418 DEFINE ld_a2,a0,0 488B5700 -DEFINE ld_a2,a0,8 488B5708 DEFINE ld_a2,a0,16 488B5710 DEFINE ld_a2,a0,24 488B5718 DEFINE ld_a2,a1,0 488B5600 -DEFINE ld_a2,a1,8 488B5608 DEFINE ld_a2,a2,0 488B5200 DEFINE ld_a2,t0,0 498B5200 DEFINE ld_a2,t0,8 498B5208 @@ -254,7 +250,6 @@ DEFINE ld_a2,t1,0 498B5300 DEFINE ld_a2,t2,0 498B5000 DEFINE ld_a2,sp,16 488B542420 DEFINE ld_a3,a0,0 488B4F00 -DEFINE ld_a3,a0,8 488B4F08 DEFINE ld_a3,a0,16 488B4F10 DEFINE ld_a3,a1,0 488B4E00 DEFINE ld_a3,a1,8 488B4E08 @@ -274,8 +269,6 @@ DEFINE ld_t0,a1,16 4C8B5610 DEFINE ld_t0,a1,32 4C8B5620 DEFINE ld_t0,a2,0 4C8B5200 DEFINE ld_t0,t0,0 4D8B5200 -DEFINE ld_t0,t1,8 4D8B5308 -DEFINE ld_t0,t1,16 4D8B5310 DEFINE ld_t0,t2,0 4D8B5000 DEFINE ld_t0,t2,16 4D8B5010 DEFINE ld_t0,sp,0 4C8B542410 @@ -357,7 +350,6 @@ DEFINE st_a2,t2,0 49895000 DEFINE st_a3,a0,0 48894F00 DEFINE st_a3,a1,0 48894E00 DEFINE st_a3,a2,0 48894A00 -DEFINE st_a3,t0,0 49894A00 DEFINE st_a3,t0,24 49894A18 DEFINE st_a3,t1,8 49894B08 DEFINE st_a3,t2,0 49894800 @@ -365,6 +357,7 @@ DEFINE st_a3,t2,8 49894808 DEFINE st_a3,t2,16 49894810 DEFINE st_a3,t2,24 49894818 DEFINE st_t0,a0,0 4C895700 +DEFINE st_t0,a0,8 4C895708 DEFINE st_t0,a0,16 4C895710 DEFINE st_t0,a0,24 4C895718 DEFINE st_t0,a1,0 4C895600 @@ -402,12 +395,12 @@ DEFINE lb_a1,a3,1 480FB67101 DEFINE lb_a2,a1,0 480FB65600 DEFINE lb_a2,a2,0 480FB65200 DEFINE lb_a2,t0,0 490FB65200 +DEFINE lb_a3,a0,0 480FB64F00 DEFINE lb_a3,a1,0 480FB64E00 DEFINE lb_a3,a2,0 480FB64A00 DEFINE lb_a3,a3,0 480FB64900 DEFINE lb_t0,a3,0 4C0FB65100 DEFINE lb_t0,a3,1 4C0FB65101 -DEFINE lb_t0,a3,2 4C0FB65102 DEFINE lb_t0,t0,0 4D0FB65200 DEFINE lb_t1,t1,0 4D0FB65B00 DEFINE lb_t2,t0,0 4D0FB64200 @@ -417,7 +410,6 @@ DEFINE sb_a0,a2,0 48887A00 DEFINE sb_a0,t2,0 49887800 DEFINE sb_a1,a2,0 48887200 DEFINE sb_a1,t0,0 49887200 -DEFINE sb_a2,a0,0 48885700 DEFINE sb_a2,a1,0 48885600 DEFINE sb_a2,a3,0 48885100 DEFINE sb_a2,t2,0 49885000 @@ -427,7 +419,6 @@ DEFINE sb_a3,a2,0 48884A00 DEFINE sb_a3,t2,0 49884800 DEFINE sb_t1,a0,0 4C885F00 DEFINE sb_t1,a2,0 4C885A00 -DEFINE sb_t2,a0,0 4C884700 DEFINE sb_t2,a1,0 4C884600 DEFINE sb_t2,a2,0 4C884200 @@ -455,7 +446,6 @@ DEFINE beq_t1,a1 4939F3750341FFE7 DEFINE beq_t1,a2 4939D3750341FFE7 DEFINE beq_t1,t0 4D39D3750341FFE7 DEFINE beq_t1,t2 4D39C3750341FFE7 -DEFINE beq_t2,a0 4939F8750341FFE7 DEFINE beq_t2,a2 4939D0750341FFE7 DEFINE beq_t2,a3 4939C8750341FFE7 DEFINE beq_t2,t1 4D39D8750341FFE7 @@ -484,7 +474,6 @@ DEFINE blt_a2,a1 4839F27D0341FFE7 DEFINE blt_a2,a3 4839CA7D0341FFE7 DEFINE blt_a2,t0 4C39D27D0341FFE7 DEFINE blt_a2,t1 4C39DA7D0341FFE7 -DEFINE blt_a2,t2 4C39C27D0341FFE7 DEFINE blt_a3,a2 4839D17D0341FFE7 DEFINE blt_a3,t2 4C39C17D0341FFE7 DEFINE blt_t0,t1 4D39DA7D0341FFE7 diff --git a/P1/P1-riscv64.M1 b/P1/P1-riscv64.M1 @@ -23,6 +23,7 @@ DEFINE la_br 970F000083EFCF006F008000 ## ---- Moves DEFINE mov_a0,a1 13850500 +DEFINE mov_a0,a2 13050600 DEFINE mov_a0,a3 13850600 DEFINE mov_a0,t0 13850200 DEFINE mov_a0,t1 13050300 @@ -39,11 +40,10 @@ DEFINE mov_t2,a0 93030500 DEFINE mov_t2,t1 93030300 ## ---- Register Arithmetic -DEFINE add_a0,a0,a1 3305B500 DEFINE add_a0,a0,a2 3305C500 +DEFINE add_a0,a0,a3 3305D500 DEFINE add_a0,a0,t0 33055500 DEFINE add_a0,a0,t1 33056500 -DEFINE add_a0,a0,t2 33057500 DEFINE add_a0,a2,a0 3305A600 DEFINE add_a0,a2,t2 33057600 DEFINE add_a0,t0,a3 3385D200 @@ -57,7 +57,6 @@ DEFINE add_a1,a1,a2 B385C500 DEFINE add_a1,a1,a3 B385D500 DEFINE add_a1,a1,t0 B3855500 DEFINE add_a1,a1,t1 B3856500 -DEFINE add_a1,a2,a3 B305D600 DEFINE add_a1,a2,t0 B3055600 DEFINE add_a1,a3,a1 B385B600 DEFINE add_a1,t0,a0 B385A200 @@ -67,9 +66,11 @@ DEFINE add_a1,t2,t1 B3856300 DEFINE add_a2,a1,a3 3386D500 DEFINE add_a2,a1,t0 33865500 DEFINE add_a2,a2,a0 3306A600 +DEFINE add_a2,a2,a1 3306B600 DEFINE add_a2,a2,a3 3306D600 DEFINE add_a2,a2,t0 33065600 DEFINE add_a2,a2,t1 33066600 +DEFINE add_a2,a2,t2 33067600 DEFINE add_a2,a3,a2 3386C600 DEFINE add_a2,t0,t1 33866200 DEFINE add_a2,t2,a0 3386A300 @@ -100,7 +101,6 @@ DEFINE add_t1,t1,a2 3303C300 DEFINE add_t1,t1,t2 33037300 DEFINE add_t2,a0,t0 B3035500 DEFINE add_t2,a0,t1 B3036500 -DEFINE add_t2,a1,a3 B383D500 DEFINE add_t2,a1,t2 B3837500 DEFINE add_t2,a2,t1 B3036600 DEFINE add_t2,t0,t1 B3836200 @@ -113,11 +113,11 @@ DEFINE sub_a0,a1,t2 33857540 DEFINE sub_a0,a3,a0 3385A640 DEFINE sub_a1,t0,a0 B385A240 DEFINE sub_a2,a1,a0 3386A540 +DEFINE sub_a2,a1,a3 3386D540 DEFINE sub_a2,a2,a2 3306C640 DEFINE sub_a2,a2,t0 33065640 DEFINE sub_a2,t0,t1 33866240 DEFINE sub_a2,t2,a3 3386D340 -DEFINE sub_a3,a3,a1 B386B640 DEFINE sub_a3,a3,a2 B386C640 DEFINE sub_a3,t0,a2 B386C240 DEFINE sub_a3,t0,a3 B386D240 @@ -138,10 +138,8 @@ DEFINE shl_a2,a2,a3 3316D600 DEFINE sar_a2,a2,a3 3356D640 DEFINE mul_a0,a0,a3 3305D502 DEFINE mul_a0,t1,t2 33057302 -DEFINE mul_a2,a2,t0 33065602 DEFINE mul_a3,a3,a2 B386C602 DEFINE mul_t0,t0,a1 B382B202 -DEFINE mul_t0,t0,a2 B382C202 DEFINE mul_t2,t0,a2 B383C202 DEFINE div_a0,a0,a1 3345B502 DEFINE div_a2,a2,a3 3346D602 @@ -239,11 +237,9 @@ DEFINE ld_a1,t1,24 83358301 DEFINE ld_a1,t2,16 83B50301 DEFINE ld_a1,sp,8 83358101 DEFINE ld_a2,a0,0 03360500 -DEFINE ld_a2,a0,8 03368500 DEFINE ld_a2,a0,16 03360501 DEFINE ld_a2,a0,24 03368501 DEFINE ld_a2,a1,0 03B60500 -DEFINE ld_a2,a1,8 03B68500 DEFINE ld_a2,a2,0 03360600 DEFINE ld_a2,t0,0 03B60200 DEFINE ld_a2,t0,8 03B68200 @@ -254,7 +250,6 @@ DEFINE ld_a2,t1,0 03360300 DEFINE ld_a2,t2,0 03B60300 DEFINE ld_a2,sp,16 03360102 DEFINE ld_a3,a0,0 83360500 -DEFINE ld_a3,a0,8 83368500 DEFINE ld_a3,a0,16 83360501 DEFINE ld_a3,a1,0 83B60500 DEFINE ld_a3,a1,8 83B68500 @@ -274,8 +269,6 @@ DEFINE ld_t0,a1,16 83B20501 DEFINE ld_t0,a1,32 83B20502 DEFINE ld_t0,a2,0 83320600 DEFINE ld_t0,t0,0 83B20200 -DEFINE ld_t0,t1,8 83328300 -DEFINE ld_t0,t1,16 83320301 DEFINE ld_t0,t2,0 83B20300 DEFINE ld_t0,t2,16 83B20301 DEFINE ld_t0,sp,0 83320101 @@ -357,7 +350,6 @@ DEFINE st_a2,t2,0 23B0C300 DEFINE st_a3,a0,0 2330D500 DEFINE st_a3,a1,0 23B0D500 DEFINE st_a3,a2,0 2330D600 -DEFINE st_a3,t0,0 23B0D200 DEFINE st_a3,t0,24 23BCD200 DEFINE st_a3,t1,8 2334D300 DEFINE st_a3,t2,0 23B0D300 @@ -365,6 +357,7 @@ DEFINE st_a3,t2,8 23B4D300 DEFINE st_a3,t2,16 23B8D300 DEFINE st_a3,t2,24 23BCD300 DEFINE st_t0,a0,0 23305500 +DEFINE st_t0,a0,8 23345500 DEFINE st_t0,a0,16 23385500 DEFINE st_t0,a0,24 233C5500 DEFINE st_t0,a1,0 23B05500 @@ -402,12 +395,12 @@ DEFINE lb_a1,a3,1 83C51600 DEFINE lb_a2,a1,0 03C60500 DEFINE lb_a2,a2,0 03460600 DEFINE lb_a2,t0,0 03C60200 +DEFINE lb_a3,a0,0 83460500 DEFINE lb_a3,a1,0 83C60500 DEFINE lb_a3,a2,0 83460600 DEFINE lb_a3,a3,0 83C60600 DEFINE lb_t0,a3,0 83C20600 DEFINE lb_t0,a3,1 83C21600 -DEFINE lb_t0,a3,2 83C22600 DEFINE lb_t0,t0,0 83C20200 DEFINE lb_t1,t1,0 03430300 DEFINE lb_t2,t0,0 83C30200 @@ -417,7 +410,6 @@ DEFINE sb_a0,a2,0 2300A600 DEFINE sb_a0,t2,0 2380A300 DEFINE sb_a1,a2,0 2300B600 DEFINE sb_a1,t0,0 2380B200 -DEFINE sb_a2,a0,0 2300C500 DEFINE sb_a2,a1,0 2380C500 DEFINE sb_a2,a3,0 2380C600 DEFINE sb_a2,t2,0 2380C300 @@ -427,7 +419,6 @@ DEFINE sb_a3,a2,0 2300D600 DEFINE sb_a3,t2,0 2380D300 DEFINE sb_t1,a0,0 23006500 DEFINE sb_t1,a2,0 23006600 -DEFINE sb_t2,a0,0 23007500 DEFINE sb_t2,a1,0 23807500 DEFINE sb_t2,a2,0 23007600 @@ -455,7 +446,6 @@ DEFINE beq_t1,a1 6314B30067800F00 DEFINE beq_t1,a2 6314C30067800F00 DEFINE beq_t1,t0 6314530067800F00 DEFINE beq_t1,t2 6314730067800F00 -DEFINE beq_t2,a0 6394A30067800F00 DEFINE beq_t2,a2 6394C30067800F00 DEFINE beq_t2,a3 6394D30067800F00 DEFINE beq_t2,t1 6394630067800F00 @@ -484,7 +474,6 @@ DEFINE blt_a2,a1 6354B60067800F00 DEFINE blt_a2,a3 6354D60067800F00 DEFINE blt_a2,t0 6354560067800F00 DEFINE blt_a2,t1 6354660067800F00 -DEFINE blt_a2,t2 6354760067800F00 DEFINE blt_a3,a2 63D4C60067800F00 DEFINE blt_a3,t2 63D4760067800F00 DEFINE blt_t0,t1 63D4620067800F00 diff --git a/docs/HEX2pp.md b/docs/HEX2pp.md @@ -8,17 +8,18 @@ output feeds hex2++ directly — there is no intermediate macro/hex stage. ## Invocation ``` -hex2++ (-f|--file) FILE [(-f|--file) FILE ...] - [-o|--output OUT] - [-B|--base-address ADDR] - [--big-endian | --little-endian] - [-b|--binary] # default is hex - [--non-executable] +hex2++ [-B ADDR] # base address + [-E | -e] # big-endian | little-endian (default: little) + [-b] # binary digit mode (default: hex) + [-N] # non-executable output + IN OUT ``` -Output is one flat binary written from `Base_Address` upward. Multiple `-f` -files are concatenated in argv order. Unless `--non-executable` is set and -the output is a regular file, the output is `chmod 0750`'d. +`IN` and `OUT` are positional: a single input file and a single output file. +To assemble several sources together, concatenate them upstream (e.g. with +`catm`) and pass the combined file as `IN`. Output is one flat binary +written from `Base_Address` upward. Unless `-N` is set and the output is a +regular file, the output is `chmod 0750`'d. There is no per-target configuration. Any target-specific encoding (RISC-V bitfield-scattered immediates, native branch displacements, etc.) is the @@ -42,9 +43,9 @@ Active characters: 0-9 a-f A-F hex digits (HEX mode) 0-1 binary digits (BINARY mode) : label definition -. (+kw) directive (.align, .fill, .scope, .endscope) +. (+kw) directive (.align, .fill, .scope, .endscope, .ptrsize) ! @ $ ~ % & label reference -- label arithmetic in references +- > label arithmetic in references (synonyms) # ; line comment ws token separator ``` @@ -58,11 +59,13 @@ ws token separator Label names are tokens terminated by whitespace or `-`. Labels may be referenced before they are defined; forward references resolve in pass 2. -The label namespace is global except that names beginning with `.` are -*local* to the enclosing `.scope`. Local labels are distinguished from -directives by the leading character of the token: `:.NAME` is a local -definition, `&.NAME` / `%.NAME` / etc. are local references, and a bare -`.NAME` (no leading `:` or sigil) is a directive. +The label namespace is global except that names beginning with `.` *inside +a `.scope`* are local to that scope. The leading character of the token +disambiguates labels from directives: `:.NAME` is a label definition, +`&.NAME` / `%.NAME` / etc. are label references, and a bare `.NAME` (no +leading `:` or sigil, at statement position) is a directive. Directive +names are therefore reserved only at statement position, and remain +available as label tokens when prefixed with `:` or a sigil. ``` .scope @@ -72,24 +75,28 @@ definition, `&.NAME` / `%.NAME` / etc. are local references, and a bare .endscope ``` -- `.scope` directives nest. A dotted reference resolves to the nearest - enclosing definition, so an inner scope shadows an outer one with the - same local name. +- `.scope` directives nest. A dotted reference inside a scope resolves to + the nearest enclosing definition, so an inner scope shadows an outer one + with the same local name. - Non-dotted labels defined inside a `.scope` remain global. -- Dot-prefixed labels outside any `.scope` are an error. +- Dot-prefixed labels outside any `.scope` are ordinary global labels; + the leading `.` is just part of the name. ## Label references A reference is a single sigil character followed by a label expression: -| Sigil | Width | Form | Range | -|-------|-------|------|------------------------| -| `!` | 1 B | rel | `-128..127` | -| `@` | 2 B | rel | `-32768..32767` | -| `$` | 2 B | abs | `0..65535` | -| `~` | 3 B | rel | `-2^23..2^23-1` | -| `%` | 4 B | rel | unchecked | -| `&` | 4 B | abs | unchecked | +| Sigil | Width | Form | Range | +|-------|----------|------|------------------------| +| `!` | 1 B | rel | `-128..127` | +| `@` | 2 B | rel | `-32768..32767` | +| `$` | 2 B | abs | `0..65535` | +| `~` | 3 B | rel | `-2^23..2^23-1` | +| `%` | ptrsize | rel | unchecked | +| `&` | ptrsize | abs | unchecked | + +The width of `%` and `&` is set by [`.ptrsize`](#ptrsize-n) — 4 bytes by +default, 8 for 64-bit pointer targets. - "rel" emits `target - base`, where `base` is `ip` immediately after the reference's bytes are accounted for. @@ -99,13 +106,16 @@ A reference is a single sigil character followed by a label expression: The label expression takes one of two forms: ``` -SIGIL LABEL # plain reference -SIGIL LABEL - OTHER # emit target(LABEL) - target(OTHER) +SIGIL LABEL # plain reference +SIGIL LABEL - OTHER # emit target(LABEL) - target(OTHER) +SIGIL LABEL > OTHER # synonym for `LABEL - OTHER` ``` The `LABEL - OTHER` form overrides the default base with another label, and -applies uniformly to all sigils. Both labels must be defined somewhere in -the input. Range checks apply identically to plain and arithmetic forms. +applies uniformly to all sigils. `>` is accepted as an alias for `-` so +hex2 inputs that use the relative-base override syntax assemble unchanged; +both produce identical bytes. Both labels must be defined somewhere in the +input. Range checks apply identically to plain and arithmetic forms. Only one subtraction per reference; no addition, nesting, or parenthesization. @@ -118,7 +128,8 @@ Examples: &case0-jt &case1-jt &case2-jt # string length prefix (string bytes themselves come from the -# upstream M1pp layer, e.g. `%bytes("hello")`) +# upstream M1pp layer, which decodes a bare `"hello"` into the +# five hex bytes shown here) :s_begin 68 65 6c 6c 6f :s_end @@ -156,6 +167,20 @@ The pad pattern is supplied by whichever upstream layer knows the target See [Labels](#labels). +### `.ptrsize N` + +``` +.ptrsize 4 # default +.ptrsize 8 # 64-bit pointer targets +``` + +Sets the byte width of the `&` and `%` sigils. `N` must be `4` or `8`. + +`.ptrsize` is whole-invocation: the first occurrence seen across all +inputs binds the width for the entire run, and any subsequent +`.ptrsize` must specify the same value or it is an error. If no +`.ptrsize` directive appears, the width defaults to `4`. + ## Implementation outline Two passes: diff --git a/hex2pp/hex2pp.P1 b/hex2pp/hex2pp.P1 @@ -134,20 +134,25 @@ DEFINE OFF_labels 0048000009800000 b :bss_init_done - # ---- Default output_path = "a.out" ------------------------------------- - la_a0 &const_a_out - la_a1 &output_path - st_a0,a1,0 + # ---- Default ptrsize = 4 ---------------------------------------------- + li_t0 %4 %0 + la_a0 &ptrsize + st_t0,a0,0 :arg_loop_init - li_t0 %1 %0 + li_t0 %0 %0 la_a0 &arg_idx st_t0,a0,0 :arg_loop - # if (i >= argc) goto arg_done + # i++; if (i >= argc) goto arg_done. arg_idx is bumped here at the + # top so each handler just `b` back to arg_loop without bookkeeping. + # arg_advance (used by value-taking flags) also increments, so a + # `-B ADDR` pair correctly advances by two argv slots per dispatch. la_a0 &arg_idx ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 la_a1 &saved_argc ld_t1,a1,0 la_br &arg_done @@ -169,45 +174,11 @@ DEFINE OFF_labels 0048000009800000 # Dispatch on the argument string. Each compare uses str_eq, which # checks the trailing NUL of the argv string against the option - # constant's known length. - - # -f / --file - la_a0 &arg_ptr - ld_a0,a0,0 - la_a1 &opt_dash_f - li_a2 %2 %0 - la_br &str_eq - call - la_br &arg_is_file - bnez_a0 - la_a0 &arg_ptr - ld_a0,a0,0 - la_a1 &opt_long_file - li_a2 %6 %0 - la_br &str_eq - call - la_br &arg_is_file - bnez_a0 + # constant's known length. Anything not matching a known flag (and + # not starting with '-') is treated as a positional argument: first + # is the input file, second is the output file. - # -o / --output - la_a0 &arg_ptr - ld_a0,a0,0 - la_a1 &opt_dash_o - li_a2 %2 %0 - la_br &str_eq - call - la_br &arg_is_output - bnez_a0 - la_a0 &arg_ptr - ld_a0,a0,0 - la_a1 &opt_long_output - li_a2 %8 %0 - la_br &str_eq - call - la_br &arg_is_output - bnez_a0 - - # -B / --base-address + # -B la_a0 &arg_ptr ld_a0,a0,0 la_a1 &opt_dash_B @@ -216,36 +187,28 @@ DEFINE OFF_labels 0048000009800000 call la_br &arg_is_base bnez_a0 - la_a0 &arg_ptr - ld_a0,a0,0 - la_a1 &opt_long_base - li_a2 %14 %0 - la_br &str_eq - call - la_br &arg_is_base - bnez_a0 - # --big-endian + # -E la_a0 &arg_ptr ld_a0,a0,0 - la_a1 &opt_long_big - li_a2 %12 %0 + la_a1 &opt_dash_E + li_a2 %2 %0 la_br &str_eq call la_br &arg_is_big bnez_a0 - # --little-endian + # -e la_a0 &arg_ptr ld_a0,a0,0 - la_a1 &opt_long_little - li_a2 %15 %0 + la_a1 &opt_dash_e + li_a2 %2 %0 la_br &str_eq call la_br &arg_is_little bnez_a0 - # -b / --binary + # -b la_a0 &arg_ptr ld_a0,a0,0 la_a1 &opt_dash_b @@ -254,64 +217,59 @@ DEFINE OFF_labels 0048000009800000 call la_br &arg_is_binary bnez_a0 - la_a0 &arg_ptr - ld_a0,a0,0 - la_a1 &opt_long_binary - li_a2 %8 %0 - la_br &str_eq - call - la_br &arg_is_binary - bnez_a0 - # --non-executable + # -N la_a0 &arg_ptr ld_a0,a0,0 - la_a1 &opt_long_nonexec - li_a2 %16 %0 + la_a1 &opt_dash_N + li_a2 %2 %0 la_br &str_eq call la_br &arg_is_nonexec bnez_a0 - # -h / --help + # Not a known flag. If it begins with '-' (and isn't just "-"), + # it's an unknown option. Otherwise it's a positional. la_a0 &arg_ptr ld_a0,a0,0 - la_a1 &opt_dash_h - li_a2 %2 %0 - la_br &str_eq - call - la_br &arg_is_help - bnez_a0 + lb_a0,a0,0 # a0 = first byte + li_t0 %45 %0 # t0 = '-' + la_br &arg_is_positional + bne_a0,t0 la_a0 &arg_ptr ld_a0,a0,0 - la_a1 &opt_long_help - li_a2 %6 %0 - la_br &str_eq - call - la_br &arg_is_help - bnez_a0 - + addi_a0,a0,1 + lb_a0,a0,0 # a0 = second byte + la_br &arg_is_positional + beqz_a0 la_br &err_unknown_arg b -:arg_is_file - la_br &arg_advance - call +:arg_is_positional + # If input not yet loaded, this is IN. Else if output not yet set, + # this is OUT. Else extra positional → error. + la_a0 &input_count + ld_t0,a0,0 + la_br &arg_pos_is_out + bnez_t0 la_a0 &arg_ptr ld_a0,a0,0 la_br &load_input call la_br &arg_loop b -:arg_is_output - la_br &arg_advance - call +:arg_pos_is_out + la_a0 &output_path + ld_t0,a0,0 + la_br &err_unknown_arg + bnez_t0 la_a0 &arg_ptr ld_a0,a0,0 la_a1 &output_path st_a0,a1,0 la_br &arg_loop b + :arg_is_base la_br &arg_advance call @@ -347,12 +305,6 @@ DEFINE OFF_labels 0048000009800000 st_t0,a0,0 la_br &arg_loop b -:arg_is_help - la_br &print_usage - call - li_a0 sys_exit - li_a1 %0 %0 - syscall ## arg_advance(): i++; if (i >= argc) usage error; arg_ptr = argv[i]. :arg_advance @@ -381,7 +333,11 @@ DEFINE OFF_labels 0048000009800000 :arg_done la_a0 &input_count ld_t0,a0,0 - la_br &err_no_inputs + la_br &err_missing_positional + beqz_t0 + la_a0 &output_path + ld_t0,a0,0 + la_br &err_missing_positional beqz_t0 # ---- Pass 1: collect labels -------------------------------------------- @@ -425,7 +381,8 @@ DEFINE OFF_labels 0048000009800000 li_a0 %0 %0 eret -## reset_pass_state(): ip=0, output_used=0, scope_depth=0, scope_seq=0. +## reset_pass_state(): ip=0, output_used=0, scope_depth=0, scope_seq=0, +## ptrsize=4, ptrsize_used=0. :reset_pass_state enter_0 li_t0 %0 %0 @@ -437,6 +394,11 @@ DEFINE OFF_labels 0048000009800000 st_t0,a0,0 la_a0 &scope_seq st_t0,a0,0 + la_a0 &ptrsize_used + st_t0,a0,0 + li_t0 %4 %0 + la_a0 &ptrsize + st_t0,a0,0 eret ## run_one_pass(): for i in [0, input_count) call process_file(i). @@ -493,7 +455,12 @@ DEFINE OFF_labels 0048000009800000 ld_a3,a0,0 st_a3,a2,0 - # Stash path for the syscall. + # Stash path for the syscall. a0 was clobbered by &aux_tmp above, + # so re-read the path from input_paths[input_count] (still index 0 + # for our slot since input_count is incremented at li_eof). + la_a0 &input_paths_ptr + ld_a0,a0,0 + ld_a0,a0,0 la_a1 &li_path st_a0,a1,0 @@ -696,10 +663,11 @@ DEFINE OFF_labels 0048000009800000 li_t1 %46 %0 la_br &scan_label_undotted bne_t0,t1 - # dotted: scope_depth must be > 0 + # dotted: scope-local only when inside a .scope; otherwise treat as + # an ordinary global label. la_a0 &scope_depth ld_t0,a0,0 - la_br &err_dotted_outside_scope + la_br &scan_label_undotted beqz_t0 addi_t0,t0,neg1 shli_t2,t0,3 @@ -795,7 +763,7 @@ DEFINE OFF_labels 0048000009800000 la_a0 &name_len ld_t0,a0,0 li_t1 %8 %0 - la_br &err_unknown_directive + la_br &scan_dir_check_7 bne_t0,t1 la_a0 &name_buf_ptr ld_a0,a0,0 @@ -807,6 +775,22 @@ DEFINE OFF_labels 0048000009800000 bnez_a0 la_br &err_unknown_directive b +:scan_dir_check_7 + la_a0 &name_len + ld_t0,a0,0 + li_t1 %7 %0 + la_br &err_unknown_directive + bne_t0,t1 + la_a0 &name_buf_ptr + ld_a0,a0,0 + la_a1 &dir_ptrsize + li_a2 %7 %0 + la_br &mem_eq + call + la_br &scan_dir_ptrsize + bnez_a0 + la_br &err_unknown_directive + b :scan_dir_align la_br &do_align @@ -828,6 +812,11 @@ DEFINE OFF_labels 0048000009800000 call la_br &scan_loop b +:scan_dir_ptrsize + la_br &do_ptrsize + call + la_br &scan_loop + b :scan_ref # a0 holds sigil; advance past it then process_reference. @@ -1027,13 +1016,16 @@ DEFINE OFF_labels 0048000009800000 bnez_a0 la_a1 &nt_c ld_a0,a1,0 - li_t0 %45 %0 + li_t0 %45 %0 # '-' + la_br &nt_yes + beq_a0,t0 + li_t0 %62 %0 # '>' (synonym for '-') la_br &nt_yes beq_a0,t0 - li_t0 %35 %0 + li_t0 %35 %0 # '#' la_br &nt_yes beq_a0,t0 - li_t0 %59 %0 + li_t0 %59 %0 # ';' la_br &nt_yes beq_a0,t0 li_a0 %0 %0 @@ -1867,9 +1859,12 @@ DEFINE OFF_labels 0048000009800000 li_t1 %46 %0 la_br &ll_undotted bne_t0,t1 - # Dotted: walk scope_stack innermost-out. + # Dotted but only meaningful inside a .scope; otherwise fall through + # to the global-name lookup. la_a0 &scope_depth ld_t0,a0,0 + la_br &ll_undotted + beqz_t0 addi_t0,t0,neg1 la_a1 &ll_d st_t0,a1,0 @@ -2026,7 +2021,8 @@ DEFINE OFF_labels 0048000009800000 li_t0 %0 %0 la_a0 &pr_has_other st_t0,a0,0 - # Optional '-' OTHER. + # Optional separator (- or >) followed by OTHER. '>' is a synonym + # for '-', accepted for hex2 compatibility. la_a0 &scan_pos ld_t0,a0,0 la_a1 &scan_end @@ -2036,16 +2032,18 @@ DEFINE OFF_labels 0048000009800000 la_br &pr_after_other blt_t1,t0 lb_a0,t0,0 - li_t1 %45 %0 + li_t1 %45 %0 # '-' + la_br &pr_consume_sep + beq_a0,t1 + li_t1 %62 %0 # '>' + la_br &pr_consume_sep + beq_a0,t1 la_br &pr_after_other - la_a3 &aux_tmp - st_t1,a3,0 - la_a0 &aux_tmp - st_a3,a0,0 - ld_t0,a0,0 - bne_a0,t0 - addi_t0,t0,1 + b +:pr_consume_sep la_a1 &scan_pos + ld_t0,a1,0 + addi_t0,t0,1 st_t0,a1,0 la_a1 &scan_end ld_t1,a1,0 @@ -2317,12 +2315,16 @@ DEFINE OFF_labels 0048000009800000 st_t1,a1,0 eret :ssi_pct - li_t0 %4 %0 + la_a0 &ptrsize + ld_t0,a0,0 la_a1 &pr_width st_t0,a1,0 li_t0 %1 %0 la_a1 &pr_is_rel st_t0,a1,0 + li_t0 %1 %0 + la_a1 &ptrsize_used + st_t0,a1,0 li_t0 %0 %0 la_a1 &pr_range_check st_t0,a1,0 @@ -2332,9 +2334,13 @@ DEFINE OFF_labels 0048000009800000 st_t0,a1,0 eret :ssi_amp - li_t0 %4 %0 + la_a0 &ptrsize + ld_t0,a0,0 la_a1 &pr_width st_t0,a1,0 + li_t0 %1 %0 + la_a1 &ptrsize_used + st_t0,a1,0 li_t0 %0 %0 la_a1 &pr_is_rel st_t0,a1,0 @@ -2555,6 +2561,46 @@ DEFINE OFF_labels 0048000009800000 st_t0,a0,0 eret +## do_ptrsize(): .ptrsize N -- N must be 4 or 8. Whole-invocation: the +## first directive (or first '&'/'%' use) binds the width; later ones +## must match the bound value. +:do_ptrsize + enter_0 + la_br &skip_inline_ws + call + la_br &read_decimal + call + la_a1 &dp_n + st_a0,a1,0 + # Validate N in {4, 8} + li_t0 %4 %0 + la_br &dp_ok_value + beq_a0,t0 + li_t0 %8 %0 + la_br &dp_ok_value + beq_a0,t0 + la_br &err_ptrsize_bad + b +:dp_ok_value + # If a '&'/'%' has already used ptrsize, N must equal current ptrsize. + la_a0 &ptrsize_used + ld_t0,a0,0 + la_br &dp_set + beqz_t0 + la_a0 &ptrsize + ld_t0,a0,0 + la_a1 &dp_n + ld_t1,a1,0 + la_br &err_ptrsize_conflict + bne_t0,t1 + eret +:dp_set + la_a0 &dp_n + ld_t0,a0,0 + la_a1 &ptrsize + st_t0,a1,0 + eret + ## do_scope_close(): scope_depth--; fatal if not in scope. :do_scope_close enter_0 @@ -3193,6 +3239,10 @@ DEFINE OFF_labels 0048000009800000 la_a0 &msg_missing_arg_value la_br &fatal_msg b +:err_missing_positional + la_a0 &msg_missing_positional + la_br &fatal_msg + b :err_no_inputs la_a0 &msg_no_inputs la_br &fatal_msg @@ -3325,6 +3375,14 @@ DEFINE OFF_labels 0048000009800000 la_a0 &msg_bad_long la_br &fatal_msg b +:err_ptrsize_bad + la_a0 &msg_ptrsize_bad + la_br &fatal_msg + b +:err_ptrsize_conflict + la_a0 &msg_ptrsize_conflict + la_br &fatal_msg + b ## Sentinel: end of executable text. :_text_end @@ -3333,24 +3391,17 @@ DEFINE OFF_labels 0048000009800000 :const_a_out "a.out" '00' -:opt_dash_f "-f" '00' -:opt_long_file "--file" '00' -:opt_dash_o "-o" '00' -:opt_long_output "--output" '00' :opt_dash_B "-B" '00' -:opt_long_base "--base-address" '00' -:opt_long_big "--big-endian" '00' -:opt_long_little "--little-endian" '00' +:opt_dash_E "-E" '00' +:opt_dash_e "-e" '00' :opt_dash_b "-b" '00' -:opt_long_binary "--binary" '00' -:opt_long_nonexec "--non-executable" '00' -:opt_dash_h "-h" '00' -:opt_long_help "--help" '00' +:opt_dash_N "-N" '00' :dir_align "align" :dir_fill "fill" :dir_scope "scope" :dir_endscope "endscope" +:dir_ptrsize "ptrsize" :str_colon ":" :str_colon_hex2pp ": hex2pp: " @@ -3359,16 +3410,12 @@ DEFINE OFF_labels 0048000009800000 " :str_zero "0" -:msg_usage "usage: hex2pp (-f|--file) FILE [(-f|--file) FILE ...] - [-o|--output OUT] - [-B|--base-address ADDR] - [--big-endian | --little-endian] - [-b|--binary] - [--non-executable] +:msg_usage "usage: hex2pp [-B ADDR] [-E|-e] [-b] [-N] IN OUT " '00' :msg_unknown_arg "unknown argument" '00' :msg_missing_arg_value "missing value for option" '00' :msg_no_inputs "no input files" '00' +:msg_missing_positional "missing IN or OUT positional argument" '00' :msg_too_many_files "too many input files" '00' :msg_open_input "failed to open input file" '00' :msg_read "failed to read input" '00' @@ -3401,6 +3448,8 @@ DEFINE OFF_labels 0048000009800000 :msg_expected_decimal "expected decimal integer" '00' :msg_output_overflow "output overflow" '00' :msg_bad_long "invalid integer argument" '00' +:msg_ptrsize_bad ".ptrsize: N must be 4 or 8" '00' +:msg_ptrsize_conflict ".ptrsize conflicts with already-used width" '00' ## --- BSS pointer-init table ------------------------------------------------ :bss_init_tbl @@ -3457,6 +3506,10 @@ ZERO8 ZERO8 :non_executable ZERO8 +:ptrsize +ZERO8 +:ptrsize_used +ZERO8 :pass ZERO8 @@ -3644,6 +3697,8 @@ ZERO8 ZERO8 :df_i ZERO8 +:dp_n +ZERO8 ## str/mem helpers :se_p diff --git a/hex2pp/hex2pp.c b/hex2pp/hex2pp.c @@ -27,7 +27,6 @@ #include <string.h> #include <sys/stat.h> -#define MAX_FILES 64 #define MAX_INPUT_BYTES (16 * 1024 * 1024) #define MAX_OUTPUT_BYTES (128 * 1024 * 1024) #define MAX_LABELS (1 << 20) @@ -50,8 +49,7 @@ struct Label { int scope_id; /* 0 = global */ }; -static struct InFile inputs[MAX_FILES]; -static int input_count; +static struct InFile input_file; static char text_buf[MAX_TEXT]; static int text_used; @@ -67,7 +65,9 @@ static long long base_address; static int byte_mode = HEX_MODE; static int big_endian; static int non_executable; -static const char *output_path = "a.out"; +static const char *output_path; +static int ptrsize = 4; /* width of '&' and '%'; settable via .ptrsize */ +static int ptrsize_used; /* a '&'/'%' reference has fixed the width */ static int scope_stack[MAX_SCOPE_DEPTH]; static int scope_depth; @@ -137,11 +137,11 @@ static long long lookup_label(const char *s, int len) { int i; int d; - int dotted = (len > 0 && s[0] == '.'); + int dotted = (len > 0 && s[0] == '.' && scope_depth > 0); if (dotted) { - /* Walk the scope stack innermost-out. A dotted name resolves to - * the nearest enclosing definition, so an inner scope can shadow - * an outer one with the same local name. */ + /* Inside a scope, walk the scope stack innermost-out. A dotted + * name resolves to the nearest enclosing definition, so an inner + * scope can shadow an outer one with the same local name. */ for (d = scope_depth - 1; d >= 0; d--) { int sid = scope_stack[d]; for (i = 0; i < label_count; i++) { @@ -337,11 +337,12 @@ static void parse_byte_stream(struct Scanner *s) static int is_name_terminator(int c) { - /* Per spec: names terminated by whitespace or '-'. We also stop at - * end-of-line comments and EOF for safety. */ + /* Per spec: names terminated by whitespace, '-', or '>' (the two + * label-arithmetic separators). We also stop at end-of-line comments + * and EOF for safety. */ if (c < 0) return 1; if (is_space_any(c)) return 1; - if (c == '-') return 1; + if (c == '-' || c == '>') return 1; if (c == '#' || c == ';') return 1; return 0; } @@ -395,8 +396,8 @@ static struct SigilInfo sigil_info(int c) case '@': si.width = 2; si.is_rel = 1; si.lo = -32768; si.hi = 32767; si.range_check = 1; break; case '$': si.width = 2; si.is_rel = 0; si.lo = 0; si.hi = 65535; si.range_check = 1; break; case '~': si.width = 3; si.is_rel = 1; si.lo = -(1LL << 23); si.hi = (1LL << 23) - 1; si.range_check = 1; break; - case '%': si.width = 4; si.is_rel = 1; si.lo = 0; si.hi = 0; si.range_check = 0; break; - case '&': si.width = 4; si.is_rel = 0; si.lo = 0; si.hi = 0; si.range_check = 0; break; + case '%': si.width = ptrsize; si.is_rel = 1; si.lo = 0; si.hi = 0; si.range_check = 0; break; + case '&': si.width = ptrsize; si.is_rel = 0; si.lo = 0; si.hi = 0; si.range_check = 0; break; default: die("internal: bad sigil 0x%02x", c); } return si; @@ -411,14 +412,17 @@ static void process_reference(struct Scanner *s, int sigil) struct SigilInfo si = sigil_info(sigil); long long value = 0; + if (sigil == '&' || sigil == '%') ptrsize_used = 1; + /* Sigil already consumed. Read tight LABEL. */ if (s->pos >= s->len || is_name_terminator((unsigned char)s->buf[s->pos])) { die("sigil '%c' not followed by label name", sigil); } llen = read_name(s, label, sizeof(label)); - /* Optional '-' OTHER (tight, no whitespace). */ - if (s->pos < s->len && s->buf[s->pos] == '-') { + /* Optional '-' OTHER or '>' OTHER (tight, no whitespace). + * '>' is a synonym for '-', accepted for hex2 compatibility. */ + if (s->pos < s->len && (s->buf[s->pos] == '-' || s->buf[s->pos] == '>')) { s->pos++; if (s->pos >= s->len || is_name_terminator((unsigned char)s->buf[s->pos])) { die("'-' must be followed by label name"); @@ -538,6 +542,20 @@ static void do_fill(struct Scanner *s) for (i = 0; i < N; i++) emit_byte(b); } +static void do_ptrsize(struct Scanner *s) +{ + long long N; + skip_inline_ws(s); + N = read_decimal(s); + if (N != 4 && N != 8) { + die(".ptrsize: N must be 4 or 8 (got %lld)", N); + } + if (ptrsize_used && (int)N != ptrsize) { + die(".ptrsize %lld conflicts with already-used width %d", N, ptrsize); + } + ptrsize = (int)N; +} + static void do_scope_open(void) { if (scope_depth >= MAX_SCOPE_DEPTH) die(".scope: depth overflow"); @@ -572,10 +590,9 @@ static void process_file(struct InFile *f) int scope; s.pos++; n = read_name(&s, name, sizeof(name)); - dotted = (n > 0 && name[0] == '.'); - if (dotted && scope_depth == 0) { - die("dot-prefixed label '%.*s' outside a .scope", n, name); - } + /* A dot-prefixed name is scope-local only inside a .scope; + * outside, it is an ordinary global name. */ + dotted = (n > 0 && name[0] == '.' && scope_depth > 0); scope = dotted ? scope_stack[scope_depth - 1] : 0; if (pass == 1) define_label(name, n, scope); continue; @@ -590,6 +607,7 @@ static void process_file(struct InFile *f) else if (n == 4 && memcmp(dn, "fill", 4) == 0) do_fill(&s); else if (n == 5 && memcmp(dn, "scope", 5) == 0) do_scope_open(); else if (n == 8 && memcmp(dn, "endscope", 8) == 0) do_scope_close(); + else if (n == 7 && memcmp(dn, "ptrsize", 7) == 0) do_ptrsize(&s); else die("unknown directive '.%.*s'", n, dn); continue; } @@ -632,10 +650,6 @@ static void load_input(const char *path) long sz; char *buf; - if (input_count >= MAX_FILES) { - fprintf(stderr, "hex2pp: too many input files\n"); - exit(1); - } fp = fopen(path, "rb"); if (fp == NULL) { perror(path); exit(1); } if (fseek(fp, 0, SEEK_END) != 0) { perror(path); exit(1); } @@ -652,71 +666,65 @@ static void load_input(const char *path) buf[sz] = '\0'; fclose(fp); - inputs[input_count].path = path; - inputs[input_count].buf = buf; - inputs[input_count].len = (int)sz; - input_count++; + input_file.path = path; + input_file.buf = buf; + input_file.len = (int)sz; } static void usage(const char *prog) { fprintf(stderr, - "usage: %s (-f|--file) FILE [(-f|--file) FILE ...]\n" - " [-o|--output OUT]\n" - " [-B|--base-address ADDR]\n" - " [--big-endian | --little-endian]\n" - " [-b|--binary]\n" - " [--non-executable]\n", + "usage: %s [-B ADDR] [-E|-e] [-b] [-N] IN OUT\n", prog); } int main(int argc, char **argv) { int i; + const char *in_path = NULL; for (i = 1; i < argc; i++) { const char *a = argv[i]; - if (strcmp(a, "-f") == 0 || strcmp(a, "--file") == 0) { - if (++i >= argc) { usage(argv[0]); return 1; } - load_input(argv[i]); - } else if (strcmp(a, "-o") == 0 || strcmp(a, "--output") == 0) { - if (++i >= argc) { usage(argv[0]); return 1; } - output_path = argv[i]; - } else if (strcmp(a, "-B") == 0 || strcmp(a, "--base-address") == 0) { + if (strcmp(a, "-B") == 0) { if (++i >= argc) { usage(argv[0]); return 1; } base_address = parse_long(argv[i], "base address"); - } else if (strcmp(a, "--big-endian") == 0) { + } else if (strcmp(a, "-E") == 0) { big_endian = 1; - } else if (strcmp(a, "--little-endian") == 0) { + } else if (strcmp(a, "-e") == 0) { big_endian = 0; - } else if (strcmp(a, "-b") == 0 || strcmp(a, "--binary") == 0) { + } else if (strcmp(a, "-b") == 0) { byte_mode = BINARY_MODE; - } else if (strcmp(a, "--non-executable") == 0) { + } else if (strcmp(a, "-N") == 0) { non_executable = 1; - } else if (strcmp(a, "-h") == 0 || strcmp(a, "--help") == 0) { + } else if (a[0] == '-' && a[1] != '\0') { + fprintf(stderr, "hex2pp: unknown argument: %s\n", a); usage(argv[0]); - return 0; + return 1; + } else if (in_path == NULL) { + in_path = a; + } else if (output_path == NULL) { + output_path = a; } else { - fprintf(stderr, "hex2pp: unknown argument: %s\n", a); + fprintf(stderr, "hex2pp: extra positional argument: %s\n", a); usage(argv[0]); return 1; } } - if (input_count == 0) { - fprintf(stderr, "hex2pp: no input files\n"); + if (in_path == NULL || output_path == NULL) { usage(argv[0]); return 1; } + load_input(in_path); /* Pass 1: collect labels. */ pass = 1; ip = 0; scope_depth = 0; scope_seq = 0; - for (i = 0; i < input_count; i++) { - process_file(&inputs[i]); - } + ptrsize = 4; + ptrsize_used = 0; + process_file(&input_file); if (scope_depth != 0) die(".scope not closed at end of input"); /* Pass 2: emit. */ @@ -725,9 +733,9 @@ int main(int argc, char **argv) output_used = 0; scope_depth = 0; scope_seq = 0; - for (i = 0; i < input_count; i++) { - process_file(&inputs[i]); - } + ptrsize = 4; + ptrsize_used = 0; + process_file(&input_file); if (scope_depth != 0) die(".scope not closed at end of input"); /* Write output. */ diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh @@ -79,18 +79,10 @@ fail() { ## --- m1pp suite --------------------------------------------------------- ## -## Two-step check: -## 1. Run M1pp against tests/M1pp/<name>.M1pp; diff its text output -## against tests/M1pp/<name>.expected (parity with the C oracle). -## 2. Pipe that output through hex2pp as an assemble smoke test. The -## new M1pp emits bare hex consumable directly by hex2pp; this -## catches cases where M1pp produces parity-correct text that -## hex2pp can't actually parse (e.g. stray whitespace bugs, -## malformed sigil expressions). -## -## Both steps must pass for the fixture to PASS. The smoke-test step -## uses hex2pp's --non-executable mode and writes to a throwaway path -## — we only care about hex2pp's exit status, not the bytes. +## Single check: run M1pp against tests/M1pp/<name>.M1pp, diff its text +## output against tests/M1pp/<name>.expected. The suite tests macro +## expansion only — assembling the result through hex2pp is the job of +## the p1 / cc-* suites, where the input is a complete program. run_m1pp_suite() { if [ -z "$NAMES" ]; then NAMES=$(discover tests/M1pp M1pp) @@ -126,20 +118,6 @@ run_m1pp_suite() { continue fi - # Smoke test: feed M1pp's output through hex2pp. We don't run - # the resulting bytes (the fixture isn't a complete program), - # only verify hex2pp accepts the syntax. --non-executable - # skips the chmod on the throwaway output. - binfile=build/$ARCH/tests/M1pp/$name.bin - hex2pp_log=build/$ARCH/tests/M1pp/$name.hex2pp.log - rm -f "$binfile" "$hex2pp_log" - if ! "./build/$ARCH/hex2pp/hex2pp" --non-executable \ - -f "$outfile" -o "$binfile" \ - >"$hex2pp_log" 2>&1; then - fail "$label" "hex2pp smoke-test failed:" "$hex2pp_log" - continue - fi - report "$label" PASS done }