commit 27dba8a270502627d69babede9b2112837600836
parent d0d9ec33dc17c2c446a8727ab09149cad0997fcc
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 16:27:25 -0700
hex2pp: single-input CLI + .ptrsize directive; P1 table prune
hex2pp.c / hex2pp.P1: drop multi-file -f loop in favour of one IN/OUT
positional pair (callers catm upstream). Add .ptrsize directive so &/%
sigil width is configurable per run (default 4, 8 for 64-bit images).
Short-flag CLI (-B/-E/-e/-b/-N) replaces the long-flag form.
P1-{aarch64,amd64,riscv64}.M1: regenerate after pruning unused entries
and adding the small set referenced by recent expansion paths.
docs/HEX2pp.md, scripts/boot-run-tests.sh: re-document the CLI shape
and drop the m1pp suite's redundant hex2pp smoke step (the p1 / cc-*
suites already exercise the assemble path against complete programs).
Diffstat:
7 files changed, 335 insertions(+), 302 deletions(-)
diff --git a/P1/P1-aarch64.M1 b/P1/P1-aarch64.M1
@@ -23,6 +23,7 @@ DEFINE la_br 5100001802000014
## ---- Moves
DEFINE mov_a0,a1 E00301AA
+DEFINE mov_a0,a2 E00302AA
DEFINE mov_a0,a3 E00303AA
DEFINE mov_a0,t0 E00309AA
DEFINE mov_a0,t1 E0030AAA
@@ -39,11 +40,10 @@ DEFINE mov_t2,a0 EB0300AA
DEFINE mov_t2,t1 EB030AAA
## ---- Register Arithmetic
-DEFINE add_a0,a0,a1 0000018B
DEFINE add_a0,a0,a2 0000028B
+DEFINE add_a0,a0,a3 0000038B
DEFINE add_a0,a0,t0 0000098B
DEFINE add_a0,a0,t1 00000A8B
-DEFINE add_a0,a0,t2 00000B8B
DEFINE add_a0,a2,a0 4000008B
DEFINE add_a0,a2,t2 40000B8B
DEFINE add_a0,t0,a3 2001038B
@@ -57,7 +57,6 @@ DEFINE add_a1,a1,a2 2100028B
DEFINE add_a1,a1,a3 2100038B
DEFINE add_a1,a1,t0 2100098B
DEFINE add_a1,a1,t1 21000A8B
-DEFINE add_a1,a2,a3 4100038B
DEFINE add_a1,a2,t0 4100098B
DEFINE add_a1,a3,a1 6100018B
DEFINE add_a1,t0,a0 2101008B
@@ -67,9 +66,11 @@ DEFINE add_a1,t2,t1 61010A8B
DEFINE add_a2,a1,a3 2200038B
DEFINE add_a2,a1,t0 2200098B
DEFINE add_a2,a2,a0 4200008B
+DEFINE add_a2,a2,a1 4200018B
DEFINE add_a2,a2,a3 4200038B
DEFINE add_a2,a2,t0 4200098B
DEFINE add_a2,a2,t1 42000A8B
+DEFINE add_a2,a2,t2 42000B8B
DEFINE add_a2,a3,a2 6200028B
DEFINE add_a2,t0,t1 22010A8B
DEFINE add_a2,t2,a0 6201008B
@@ -100,7 +101,6 @@ DEFINE add_t1,t1,a2 4A01028B
DEFINE add_t1,t1,t2 4A010B8B
DEFINE add_t2,a0,t0 0B00098B
DEFINE add_t2,a0,t1 0B000A8B
-DEFINE add_t2,a1,a3 2B00038B
DEFINE add_t2,a1,t2 2B000B8B
DEFINE add_t2,a2,t1 4B000A8B
DEFINE add_t2,t0,t1 2B010A8B
@@ -113,11 +113,11 @@ DEFINE sub_a0,a1,t2 20000BCB
DEFINE sub_a0,a3,a0 600000CB
DEFINE sub_a1,t0,a0 210100CB
DEFINE sub_a2,a1,a0 220000CB
+DEFINE sub_a2,a1,a3 220003CB
DEFINE sub_a2,a2,a2 420002CB
DEFINE sub_a2,a2,t0 420009CB
DEFINE sub_a2,t0,t1 22010ACB
DEFINE sub_a2,t2,a3 620103CB
-DEFINE sub_a3,a3,a1 630001CB
DEFINE sub_a3,a3,a2 630002CB
DEFINE sub_a3,t0,a2 230102CB
DEFINE sub_a3,t0,a3 230103CB
@@ -138,10 +138,8 @@ DEFINE shl_a2,a2,a3 4220C39A
DEFINE sar_a2,a2,a3 4228C39A
DEFINE mul_a0,a0,a3 007C039B
DEFINE mul_a0,t1,t2 407D0B9B
-DEFINE mul_a2,a2,t0 427C099B
DEFINE mul_a3,a3,a2 637C029B
DEFINE mul_t0,t0,a1 297D019B
-DEFINE mul_t0,t0,a2 297D029B
DEFINE mul_t2,t0,a2 2B7D029B
DEFINE div_a0,a0,a1 000CC19A
DEFINE div_a2,a2,a3 420CC39A
@@ -239,11 +237,9 @@ DEFINE ld_a1,t1,24 410D40F9
DEFINE ld_a1,t2,16 610940F9
DEFINE ld_a1,sp,8 E10F40F9
DEFINE ld_a2,a0,0 020040F9
-DEFINE ld_a2,a0,8 020440F9
DEFINE ld_a2,a0,16 020840F9
DEFINE ld_a2,a0,24 020C40F9
DEFINE ld_a2,a1,0 220040F9
-DEFINE ld_a2,a1,8 220440F9
DEFINE ld_a2,a2,0 420040F9
DEFINE ld_a2,t0,0 220140F9
DEFINE ld_a2,t0,8 220540F9
@@ -254,7 +250,6 @@ DEFINE ld_a2,t1,0 420140F9
DEFINE ld_a2,t2,0 620140F9
DEFINE ld_a2,sp,16 E21340F9
DEFINE ld_a3,a0,0 030040F9
-DEFINE ld_a3,a0,8 030440F9
DEFINE ld_a3,a0,16 030840F9
DEFINE ld_a3,a1,0 230040F9
DEFINE ld_a3,a1,8 230440F9
@@ -274,8 +269,6 @@ DEFINE ld_t0,a1,16 290840F9
DEFINE ld_t0,a1,32 291040F9
DEFINE ld_t0,a2,0 490040F9
DEFINE ld_t0,t0,0 290140F9
-DEFINE ld_t0,t1,8 490540F9
-DEFINE ld_t0,t1,16 490940F9
DEFINE ld_t0,t2,0 690140F9
DEFINE ld_t0,t2,16 690940F9
DEFINE ld_t0,sp,0 E90B40F9
@@ -357,7 +350,6 @@ DEFINE st_a2,t2,0 620100F9
DEFINE st_a3,a0,0 030000F9
DEFINE st_a3,a1,0 230000F9
DEFINE st_a3,a2,0 430000F9
-DEFINE st_a3,t0,0 230100F9
DEFINE st_a3,t0,24 230D00F9
DEFINE st_a3,t1,8 430500F9
DEFINE st_a3,t2,0 630100F9
@@ -365,6 +357,7 @@ DEFINE st_a3,t2,8 630500F9
DEFINE st_a3,t2,16 630900F9
DEFINE st_a3,t2,24 630D00F9
DEFINE st_t0,a0,0 090000F9
+DEFINE st_t0,a0,8 090400F9
DEFINE st_t0,a0,16 090800F9
DEFINE st_t0,a0,24 090C00F9
DEFINE st_t0,a1,0 290000F9
@@ -402,12 +395,12 @@ DEFINE lb_a1,a3,1 61044039
DEFINE lb_a2,a1,0 22004039
DEFINE lb_a2,a2,0 42004039
DEFINE lb_a2,t0,0 22014039
+DEFINE lb_a3,a0,0 03004039
DEFINE lb_a3,a1,0 23004039
DEFINE lb_a3,a2,0 43004039
DEFINE lb_a3,a3,0 63004039
DEFINE lb_t0,a3,0 69004039
DEFINE lb_t0,a3,1 69044039
-DEFINE lb_t0,a3,2 69084039
DEFINE lb_t0,t0,0 29014039
DEFINE lb_t1,t1,0 4A014039
DEFINE lb_t2,t0,0 2B014039
@@ -417,7 +410,6 @@ DEFINE sb_a0,a2,0 40000039
DEFINE sb_a0,t2,0 60010039
DEFINE sb_a1,a2,0 41000039
DEFINE sb_a1,t0,0 21010039
-DEFINE sb_a2,a0,0 02000039
DEFINE sb_a2,a1,0 22000039
DEFINE sb_a2,a3,0 62000039
DEFINE sb_a2,t2,0 62010039
@@ -427,7 +419,6 @@ DEFINE sb_a3,a2,0 43000039
DEFINE sb_a3,t2,0 63010039
DEFINE sb_t1,a0,0 0A000039
DEFINE sb_t1,a2,0 4A000039
-DEFINE sb_t2,a0,0 0B000039
DEFINE sb_t2,a1,0 2B000039
DEFINE sb_t2,a2,0 4B000039
@@ -455,7 +446,6 @@ DEFINE beq_t1,a1 5F0101EB4100005420021FD6
DEFINE beq_t1,a2 5F0102EB4100005420021FD6
DEFINE beq_t1,t0 5F0109EB4100005420021FD6
DEFINE beq_t1,t2 5F010BEB4100005420021FD6
-DEFINE beq_t2,a0 7F0100EB4100005420021FD6
DEFINE beq_t2,a2 7F0102EB4100005420021FD6
DEFINE beq_t2,a3 7F0103EB4100005420021FD6
DEFINE beq_t2,t1 7F010AEB4100005420021FD6
@@ -484,7 +474,6 @@ DEFINE blt_a2,a1 5F0001EB4A00005420021FD6
DEFINE blt_a2,a3 5F0003EB4A00005420021FD6
DEFINE blt_a2,t0 5F0009EB4A00005420021FD6
DEFINE blt_a2,t1 5F000AEB4A00005420021FD6
-DEFINE blt_a2,t2 5F000BEB4A00005420021FD6
DEFINE blt_a3,a2 7F0002EB4A00005420021FD6
DEFINE blt_a3,t2 7F000BEB4A00005420021FD6
DEFINE blt_t0,t1 3F010AEB4A00005420021FD6
diff --git a/P1/P1-amd64.M1 b/P1/P1-amd64.M1
@@ -23,6 +23,7 @@ DEFINE la_br 41BF
## ---- Moves
DEFINE mov_a0,a1 4889F7
+DEFINE mov_a0,a2 4889D7
DEFINE mov_a0,a3 4889CF
DEFINE mov_a0,t0 4C89D7
DEFINE mov_a0,t1 4C89DF
@@ -39,11 +40,10 @@ DEFINE mov_t2,a0 4989F8
DEFINE mov_t2,t1 4D89D8
## ---- Register Arithmetic
-DEFINE add_a0,a0,a1 4889FF4801F7
DEFINE add_a0,a0,a2 4889FF4801D7
+DEFINE add_a0,a0,a3 4889FF4801CF
DEFINE add_a0,a0,t0 4889FF4C01D7
DEFINE add_a0,a0,t1 4889FF4C01DF
-DEFINE add_a0,a0,t2 4889FF4C01C7
DEFINE add_a0,a2,a0 4989F94889D74C01CF
DEFINE add_a0,a2,t2 4889D74C01C7
DEFINE add_a0,t0,a3 4C89D74801CF
@@ -57,7 +57,6 @@ DEFINE add_a1,a1,a2 4889F64801D6
DEFINE add_a1,a1,a3 4889F64801CE
DEFINE add_a1,a1,t0 4889F64C01D6
DEFINE add_a1,a1,t1 4889F64C01DE
-DEFINE add_a1,a2,a3 4889D64801CE
DEFINE add_a1,a2,t0 4889D64C01D6
DEFINE add_a1,a3,a1 4989F14889CE4C01CE
DEFINE add_a1,t0,a0 4C89D64801FE
@@ -67,9 +66,11 @@ DEFINE add_a1,t2,t1 4C89C64C01DE
DEFINE add_a2,a1,a3 4889F24801CA
DEFINE add_a2,a1,t0 4889F24C01D2
DEFINE add_a2,a2,a0 4889D24801FA
+DEFINE add_a2,a2,a1 4889D24801F2
DEFINE add_a2,a2,a3 4889D24801CA
DEFINE add_a2,a2,t0 4889D24C01D2
DEFINE add_a2,a2,t1 4889D24C01DA
+DEFINE add_a2,a2,t2 4889D24C01C2
DEFINE add_a2,a3,a2 4989D14889CA4C01CA
DEFINE add_a2,t0,t1 4C89D24C01DA
DEFINE add_a2,t2,a0 4C89C24801FA
@@ -100,7 +101,6 @@ DEFINE add_t1,t1,a2 4D89DB4901D3
DEFINE add_t1,t1,t2 4D89DB4D01C3
DEFINE add_t2,a0,t0 4989F84D01D0
DEFINE add_t2,a0,t1 4989F84D01D8
-DEFINE add_t2,a1,a3 4989F04901C8
DEFINE add_t2,a1,t2 4D89C14989F04D01C8
DEFINE add_t2,a2,t1 4989D04D01D8
DEFINE add_t2,t0,t1 4D89D04D01D8
@@ -113,11 +113,11 @@ DEFINE sub_a0,a1,t2 4889F74C29C7
DEFINE sub_a0,a3,a0 4989F94889CF4C29CF
DEFINE sub_a1,t0,a0 4C89D64829FE
DEFINE sub_a2,a1,a0 4889F24829FA
+DEFINE sub_a2,a1,a3 4889F24829CA
DEFINE sub_a2,a2,a2 4989D14889D24C29CA
DEFINE sub_a2,a2,t0 4889D24C29D2
DEFINE sub_a2,t0,t1 4C89D24C29DA
DEFINE sub_a2,t2,a3 4C89C24829CA
-DEFINE sub_a3,a3,a1 4889C94829F1
DEFINE sub_a3,a3,a2 4889C94829D1
DEFINE sub_a3,t0,a2 4C89D14829D1
DEFINE sub_a3,t0,a3 4989C94C89D14C29C9
@@ -138,10 +138,8 @@ DEFINE shl_a2,a2,a3 4889CD4989D14889C949D3E14889E94C89CA
DEFINE sar_a2,a2,a3 4889CD4989D14889C949D3F94889E94C89CA
DEFINE mul_a0,a0,a3 4889FF480FAFF9
DEFINE mul_a0,t1,t2 4C89DF490FAFF8
-DEFINE mul_a2,a2,t0 4889D2490FAFD2
DEFINE mul_a3,a3,a2 4889C9480FAFCA
DEFINE mul_t0,t0,a1 4D89D24C0FAFD6
-DEFINE mul_t0,t0,a2 4D89D24C0FAFD2
DEFINE mul_t2,t0,a2 4D89D04C0FAFC2
DEFINE div_a0,a0,a1 4889D54989F14889F8489949F7F94889EA4889C7
DEFINE div_a2,a2,a3 4889D54989C94889D0489949F7F94889EA4889C2
@@ -239,11 +237,9 @@ DEFINE ld_a1,t1,24 498B7318
DEFINE ld_a1,t2,16 498B7010
DEFINE ld_a1,sp,8 488B742418
DEFINE ld_a2,a0,0 488B5700
-DEFINE ld_a2,a0,8 488B5708
DEFINE ld_a2,a0,16 488B5710
DEFINE ld_a2,a0,24 488B5718
DEFINE ld_a2,a1,0 488B5600
-DEFINE ld_a2,a1,8 488B5608
DEFINE ld_a2,a2,0 488B5200
DEFINE ld_a2,t0,0 498B5200
DEFINE ld_a2,t0,8 498B5208
@@ -254,7 +250,6 @@ DEFINE ld_a2,t1,0 498B5300
DEFINE ld_a2,t2,0 498B5000
DEFINE ld_a2,sp,16 488B542420
DEFINE ld_a3,a0,0 488B4F00
-DEFINE ld_a3,a0,8 488B4F08
DEFINE ld_a3,a0,16 488B4F10
DEFINE ld_a3,a1,0 488B4E00
DEFINE ld_a3,a1,8 488B4E08
@@ -274,8 +269,6 @@ DEFINE ld_t0,a1,16 4C8B5610
DEFINE ld_t0,a1,32 4C8B5620
DEFINE ld_t0,a2,0 4C8B5200
DEFINE ld_t0,t0,0 4D8B5200
-DEFINE ld_t0,t1,8 4D8B5308
-DEFINE ld_t0,t1,16 4D8B5310
DEFINE ld_t0,t2,0 4D8B5000
DEFINE ld_t0,t2,16 4D8B5010
DEFINE ld_t0,sp,0 4C8B542410
@@ -357,7 +350,6 @@ DEFINE st_a2,t2,0 49895000
DEFINE st_a3,a0,0 48894F00
DEFINE st_a3,a1,0 48894E00
DEFINE st_a3,a2,0 48894A00
-DEFINE st_a3,t0,0 49894A00
DEFINE st_a3,t0,24 49894A18
DEFINE st_a3,t1,8 49894B08
DEFINE st_a3,t2,0 49894800
@@ -365,6 +357,7 @@ DEFINE st_a3,t2,8 49894808
DEFINE st_a3,t2,16 49894810
DEFINE st_a3,t2,24 49894818
DEFINE st_t0,a0,0 4C895700
+DEFINE st_t0,a0,8 4C895708
DEFINE st_t0,a0,16 4C895710
DEFINE st_t0,a0,24 4C895718
DEFINE st_t0,a1,0 4C895600
@@ -402,12 +395,12 @@ DEFINE lb_a1,a3,1 480FB67101
DEFINE lb_a2,a1,0 480FB65600
DEFINE lb_a2,a2,0 480FB65200
DEFINE lb_a2,t0,0 490FB65200
+DEFINE lb_a3,a0,0 480FB64F00
DEFINE lb_a3,a1,0 480FB64E00
DEFINE lb_a3,a2,0 480FB64A00
DEFINE lb_a3,a3,0 480FB64900
DEFINE lb_t0,a3,0 4C0FB65100
DEFINE lb_t0,a3,1 4C0FB65101
-DEFINE lb_t0,a3,2 4C0FB65102
DEFINE lb_t0,t0,0 4D0FB65200
DEFINE lb_t1,t1,0 4D0FB65B00
DEFINE lb_t2,t0,0 4D0FB64200
@@ -417,7 +410,6 @@ DEFINE sb_a0,a2,0 48887A00
DEFINE sb_a0,t2,0 49887800
DEFINE sb_a1,a2,0 48887200
DEFINE sb_a1,t0,0 49887200
-DEFINE sb_a2,a0,0 48885700
DEFINE sb_a2,a1,0 48885600
DEFINE sb_a2,a3,0 48885100
DEFINE sb_a2,t2,0 49885000
@@ -427,7 +419,6 @@ DEFINE sb_a3,a2,0 48884A00
DEFINE sb_a3,t2,0 49884800
DEFINE sb_t1,a0,0 4C885F00
DEFINE sb_t1,a2,0 4C885A00
-DEFINE sb_t2,a0,0 4C884700
DEFINE sb_t2,a1,0 4C884600
DEFINE sb_t2,a2,0 4C884200
@@ -455,7 +446,6 @@ DEFINE beq_t1,a1 4939F3750341FFE7
DEFINE beq_t1,a2 4939D3750341FFE7
DEFINE beq_t1,t0 4D39D3750341FFE7
DEFINE beq_t1,t2 4D39C3750341FFE7
-DEFINE beq_t2,a0 4939F8750341FFE7
DEFINE beq_t2,a2 4939D0750341FFE7
DEFINE beq_t2,a3 4939C8750341FFE7
DEFINE beq_t2,t1 4D39D8750341FFE7
@@ -484,7 +474,6 @@ DEFINE blt_a2,a1 4839F27D0341FFE7
DEFINE blt_a2,a3 4839CA7D0341FFE7
DEFINE blt_a2,t0 4C39D27D0341FFE7
DEFINE blt_a2,t1 4C39DA7D0341FFE7
-DEFINE blt_a2,t2 4C39C27D0341FFE7
DEFINE blt_a3,a2 4839D17D0341FFE7
DEFINE blt_a3,t2 4C39C17D0341FFE7
DEFINE blt_t0,t1 4D39DA7D0341FFE7
diff --git a/P1/P1-riscv64.M1 b/P1/P1-riscv64.M1
@@ -23,6 +23,7 @@ DEFINE la_br 970F000083EFCF006F008000
## ---- Moves
DEFINE mov_a0,a1 13850500
+DEFINE mov_a0,a2 13050600
DEFINE mov_a0,a3 13850600
DEFINE mov_a0,t0 13850200
DEFINE mov_a0,t1 13050300
@@ -39,11 +40,10 @@ DEFINE mov_t2,a0 93030500
DEFINE mov_t2,t1 93030300
## ---- Register Arithmetic
-DEFINE add_a0,a0,a1 3305B500
DEFINE add_a0,a0,a2 3305C500
+DEFINE add_a0,a0,a3 3305D500
DEFINE add_a0,a0,t0 33055500
DEFINE add_a0,a0,t1 33056500
-DEFINE add_a0,a0,t2 33057500
DEFINE add_a0,a2,a0 3305A600
DEFINE add_a0,a2,t2 33057600
DEFINE add_a0,t0,a3 3385D200
@@ -57,7 +57,6 @@ DEFINE add_a1,a1,a2 B385C500
DEFINE add_a1,a1,a3 B385D500
DEFINE add_a1,a1,t0 B3855500
DEFINE add_a1,a1,t1 B3856500
-DEFINE add_a1,a2,a3 B305D600
DEFINE add_a1,a2,t0 B3055600
DEFINE add_a1,a3,a1 B385B600
DEFINE add_a1,t0,a0 B385A200
@@ -67,9 +66,11 @@ DEFINE add_a1,t2,t1 B3856300
DEFINE add_a2,a1,a3 3386D500
DEFINE add_a2,a1,t0 33865500
DEFINE add_a2,a2,a0 3306A600
+DEFINE add_a2,a2,a1 3306B600
DEFINE add_a2,a2,a3 3306D600
DEFINE add_a2,a2,t0 33065600
DEFINE add_a2,a2,t1 33066600
+DEFINE add_a2,a2,t2 33067600
DEFINE add_a2,a3,a2 3386C600
DEFINE add_a2,t0,t1 33866200
DEFINE add_a2,t2,a0 3386A300
@@ -100,7 +101,6 @@ DEFINE add_t1,t1,a2 3303C300
DEFINE add_t1,t1,t2 33037300
DEFINE add_t2,a0,t0 B3035500
DEFINE add_t2,a0,t1 B3036500
-DEFINE add_t2,a1,a3 B383D500
DEFINE add_t2,a1,t2 B3837500
DEFINE add_t2,a2,t1 B3036600
DEFINE add_t2,t0,t1 B3836200
@@ -113,11 +113,11 @@ DEFINE sub_a0,a1,t2 33857540
DEFINE sub_a0,a3,a0 3385A640
DEFINE sub_a1,t0,a0 B385A240
DEFINE sub_a2,a1,a0 3386A540
+DEFINE sub_a2,a1,a3 3386D540
DEFINE sub_a2,a2,a2 3306C640
DEFINE sub_a2,a2,t0 33065640
DEFINE sub_a2,t0,t1 33866240
DEFINE sub_a2,t2,a3 3386D340
-DEFINE sub_a3,a3,a1 B386B640
DEFINE sub_a3,a3,a2 B386C640
DEFINE sub_a3,t0,a2 B386C240
DEFINE sub_a3,t0,a3 B386D240
@@ -138,10 +138,8 @@ DEFINE shl_a2,a2,a3 3316D600
DEFINE sar_a2,a2,a3 3356D640
DEFINE mul_a0,a0,a3 3305D502
DEFINE mul_a0,t1,t2 33057302
-DEFINE mul_a2,a2,t0 33065602
DEFINE mul_a3,a3,a2 B386C602
DEFINE mul_t0,t0,a1 B382B202
-DEFINE mul_t0,t0,a2 B382C202
DEFINE mul_t2,t0,a2 B383C202
DEFINE div_a0,a0,a1 3345B502
DEFINE div_a2,a2,a3 3346D602
@@ -239,11 +237,9 @@ DEFINE ld_a1,t1,24 83358301
DEFINE ld_a1,t2,16 83B50301
DEFINE ld_a1,sp,8 83358101
DEFINE ld_a2,a0,0 03360500
-DEFINE ld_a2,a0,8 03368500
DEFINE ld_a2,a0,16 03360501
DEFINE ld_a2,a0,24 03368501
DEFINE ld_a2,a1,0 03B60500
-DEFINE ld_a2,a1,8 03B68500
DEFINE ld_a2,a2,0 03360600
DEFINE ld_a2,t0,0 03B60200
DEFINE ld_a2,t0,8 03B68200
@@ -254,7 +250,6 @@ DEFINE ld_a2,t1,0 03360300
DEFINE ld_a2,t2,0 03B60300
DEFINE ld_a2,sp,16 03360102
DEFINE ld_a3,a0,0 83360500
-DEFINE ld_a3,a0,8 83368500
DEFINE ld_a3,a0,16 83360501
DEFINE ld_a3,a1,0 83B60500
DEFINE ld_a3,a1,8 83B68500
@@ -274,8 +269,6 @@ DEFINE ld_t0,a1,16 83B20501
DEFINE ld_t0,a1,32 83B20502
DEFINE ld_t0,a2,0 83320600
DEFINE ld_t0,t0,0 83B20200
-DEFINE ld_t0,t1,8 83328300
-DEFINE ld_t0,t1,16 83320301
DEFINE ld_t0,t2,0 83B20300
DEFINE ld_t0,t2,16 83B20301
DEFINE ld_t0,sp,0 83320101
@@ -357,7 +350,6 @@ DEFINE st_a2,t2,0 23B0C300
DEFINE st_a3,a0,0 2330D500
DEFINE st_a3,a1,0 23B0D500
DEFINE st_a3,a2,0 2330D600
-DEFINE st_a3,t0,0 23B0D200
DEFINE st_a3,t0,24 23BCD200
DEFINE st_a3,t1,8 2334D300
DEFINE st_a3,t2,0 23B0D300
@@ -365,6 +357,7 @@ DEFINE st_a3,t2,8 23B4D300
DEFINE st_a3,t2,16 23B8D300
DEFINE st_a3,t2,24 23BCD300
DEFINE st_t0,a0,0 23305500
+DEFINE st_t0,a0,8 23345500
DEFINE st_t0,a0,16 23385500
DEFINE st_t0,a0,24 233C5500
DEFINE st_t0,a1,0 23B05500
@@ -402,12 +395,12 @@ DEFINE lb_a1,a3,1 83C51600
DEFINE lb_a2,a1,0 03C60500
DEFINE lb_a2,a2,0 03460600
DEFINE lb_a2,t0,0 03C60200
+DEFINE lb_a3,a0,0 83460500
DEFINE lb_a3,a1,0 83C60500
DEFINE lb_a3,a2,0 83460600
DEFINE lb_a3,a3,0 83C60600
DEFINE lb_t0,a3,0 83C20600
DEFINE lb_t0,a3,1 83C21600
-DEFINE lb_t0,a3,2 83C22600
DEFINE lb_t0,t0,0 83C20200
DEFINE lb_t1,t1,0 03430300
DEFINE lb_t2,t0,0 83C30200
@@ -417,7 +410,6 @@ DEFINE sb_a0,a2,0 2300A600
DEFINE sb_a0,t2,0 2380A300
DEFINE sb_a1,a2,0 2300B600
DEFINE sb_a1,t0,0 2380B200
-DEFINE sb_a2,a0,0 2300C500
DEFINE sb_a2,a1,0 2380C500
DEFINE sb_a2,a3,0 2380C600
DEFINE sb_a2,t2,0 2380C300
@@ -427,7 +419,6 @@ DEFINE sb_a3,a2,0 2300D600
DEFINE sb_a3,t2,0 2380D300
DEFINE sb_t1,a0,0 23006500
DEFINE sb_t1,a2,0 23006600
-DEFINE sb_t2,a0,0 23007500
DEFINE sb_t2,a1,0 23807500
DEFINE sb_t2,a2,0 23007600
@@ -455,7 +446,6 @@ DEFINE beq_t1,a1 6314B30067800F00
DEFINE beq_t1,a2 6314C30067800F00
DEFINE beq_t1,t0 6314530067800F00
DEFINE beq_t1,t2 6314730067800F00
-DEFINE beq_t2,a0 6394A30067800F00
DEFINE beq_t2,a2 6394C30067800F00
DEFINE beq_t2,a3 6394D30067800F00
DEFINE beq_t2,t1 6394630067800F00
@@ -484,7 +474,6 @@ DEFINE blt_a2,a1 6354B60067800F00
DEFINE blt_a2,a3 6354D60067800F00
DEFINE blt_a2,t0 6354560067800F00
DEFINE blt_a2,t1 6354660067800F00
-DEFINE blt_a2,t2 6354760067800F00
DEFINE blt_a3,a2 63D4C60067800F00
DEFINE blt_a3,t2 63D4760067800F00
DEFINE blt_t0,t1 63D4620067800F00
diff --git a/docs/HEX2pp.md b/docs/HEX2pp.md
@@ -8,17 +8,18 @@ output feeds hex2++ directly — there is no intermediate macro/hex stage.
## Invocation
```
-hex2++ (-f|--file) FILE [(-f|--file) FILE ...]
- [-o|--output OUT]
- [-B|--base-address ADDR]
- [--big-endian | --little-endian]
- [-b|--binary] # default is hex
- [--non-executable]
+hex2++ [-B ADDR] # base address
+ [-E | -e] # big-endian | little-endian (default: little)
+ [-b] # binary digit mode (default: hex)
+ [-N] # non-executable output
+ IN OUT
```
-Output is one flat binary written from `Base_Address` upward. Multiple `-f`
-files are concatenated in argv order. Unless `--non-executable` is set and
-the output is a regular file, the output is `chmod 0750`'d.
+`IN` and `OUT` are positional: a single input file and a single output file.
+To assemble several sources together, concatenate them upstream (e.g. with
+`catm`) and pass the combined file as `IN`. Output is one flat binary
+written from `Base_Address` upward. Unless `-N` is set and the output is a
+regular file, the output is `chmod 0750`'d.
There is no per-target configuration. Any target-specific encoding (RISC-V
bitfield-scattered immediates, native branch displacements, etc.) is the
@@ -42,9 +43,9 @@ Active characters:
0-9 a-f A-F hex digits (HEX mode)
0-1 binary digits (BINARY mode)
: label definition
-. (+kw) directive (.align, .fill, .scope, .endscope)
+. (+kw) directive (.align, .fill, .scope, .endscope, .ptrsize)
! @ $ ~ % & label reference
-- label arithmetic in references
+- > label arithmetic in references (synonyms)
# ; line comment
ws token separator
```
@@ -58,11 +59,13 @@ ws token separator
Label names are tokens terminated by whitespace or `-`. Labels may be
referenced before they are defined; forward references resolve in pass 2.
-The label namespace is global except that names beginning with `.` are
-*local* to the enclosing `.scope`. Local labels are distinguished from
-directives by the leading character of the token: `:.NAME` is a local
-definition, `&.NAME` / `%.NAME` / etc. are local references, and a bare
-`.NAME` (no leading `:` or sigil) is a directive.
+The label namespace is global except that names beginning with `.` *inside
+a `.scope`* are local to that scope. The leading character of the token
+disambiguates labels from directives: `:.NAME` is a label definition,
+`&.NAME` / `%.NAME` / etc. are label references, and a bare `.NAME` (no
+leading `:` or sigil, at statement position) is a directive. Directive
+names are therefore reserved only at statement position, and remain
+available as label tokens when prefixed with `:` or a sigil.
```
.scope
@@ -72,24 +75,28 @@ definition, `&.NAME` / `%.NAME` / etc. are local references, and a bare
.endscope
```
-- `.scope` directives nest. A dotted reference resolves to the nearest
- enclosing definition, so an inner scope shadows an outer one with the
- same local name.
+- `.scope` directives nest. A dotted reference inside a scope resolves to
+ the nearest enclosing definition, so an inner scope shadows an outer one
+ with the same local name.
- Non-dotted labels defined inside a `.scope` remain global.
-- Dot-prefixed labels outside any `.scope` are an error.
+- Dot-prefixed labels outside any `.scope` are ordinary global labels;
+ the leading `.` is just part of the name.
## Label references
A reference is a single sigil character followed by a label expression:
-| Sigil | Width | Form | Range |
-|-------|-------|------|------------------------|
-| `!` | 1 B | rel | `-128..127` |
-| `@` | 2 B | rel | `-32768..32767` |
-| `$` | 2 B | abs | `0..65535` |
-| `~` | 3 B | rel | `-2^23..2^23-1` |
-| `%` | 4 B | rel | unchecked |
-| `&` | 4 B | abs | unchecked |
+| Sigil | Width | Form | Range |
+|-------|----------|------|------------------------|
+| `!` | 1 B | rel | `-128..127` |
+| `@` | 2 B | rel | `-32768..32767` |
+| `$` | 2 B | abs | `0..65535` |
+| `~` | 3 B | rel | `-2^23..2^23-1` |
+| `%` | ptrsize | rel | unchecked |
+| `&` | ptrsize | abs | unchecked |
+
+The width of `%` and `&` is set by [`.ptrsize`](#ptrsize-n) — 4 bytes by
+default, 8 for 64-bit pointer targets.
- "rel" emits `target - base`, where `base` is `ip` immediately after the
reference's bytes are accounted for.
@@ -99,13 +106,16 @@ A reference is a single sigil character followed by a label expression:
The label expression takes one of two forms:
```
-SIGIL LABEL # plain reference
-SIGIL LABEL - OTHER # emit target(LABEL) - target(OTHER)
+SIGIL LABEL # plain reference
+SIGIL LABEL - OTHER # emit target(LABEL) - target(OTHER)
+SIGIL LABEL > OTHER # synonym for `LABEL - OTHER`
```
The `LABEL - OTHER` form overrides the default base with another label, and
-applies uniformly to all sigils. Both labels must be defined somewhere in
-the input. Range checks apply identically to plain and arithmetic forms.
+applies uniformly to all sigils. `>` is accepted as an alias for `-` so
+hex2 inputs that use the relative-base override syntax assemble unchanged;
+both produce identical bytes. Both labels must be defined somewhere in the
+input. Range checks apply identically to plain and arithmetic forms.
Only one subtraction per reference; no addition, nesting, or
parenthesization.
@@ -118,7 +128,8 @@ Examples:
&case0-jt &case1-jt &case2-jt
# string length prefix (string bytes themselves come from the
-# upstream M1pp layer, e.g. `%bytes("hello")`)
+# upstream M1pp layer, which decodes a bare `"hello"` into the
+# five hex bytes shown here)
:s_begin
68 65 6c 6c 6f
:s_end
@@ -156,6 +167,20 @@ The pad pattern is supplied by whichever upstream layer knows the target
See [Labels](#labels).
+### `.ptrsize N`
+
+```
+.ptrsize 4 # default
+.ptrsize 8 # 64-bit pointer targets
+```
+
+Sets the byte width of the `&` and `%` sigils. `N` must be `4` or `8`.
+
+`.ptrsize` is whole-invocation: the first occurrence seen across all
+inputs binds the width for the entire run, and any subsequent
+`.ptrsize` must specify the same value or it is an error. If no
+`.ptrsize` directive appears, the width defaults to `4`.
+
## Implementation outline
Two passes:
diff --git a/hex2pp/hex2pp.P1 b/hex2pp/hex2pp.P1
@@ -134,20 +134,25 @@ DEFINE OFF_labels 0048000009800000
b
:bss_init_done
- # ---- Default output_path = "a.out" -------------------------------------
- la_a0 &const_a_out
- la_a1 &output_path
- st_a0,a1,0
+ # ---- Default ptrsize = 4 ----------------------------------------------
+ li_t0 %4 %0
+ la_a0 &ptrsize
+ st_t0,a0,0
:arg_loop_init
- li_t0 %1 %0
+ li_t0 %0 %0
la_a0 &arg_idx
st_t0,a0,0
:arg_loop
- # if (i >= argc) goto arg_done
+ # i++; if (i >= argc) goto arg_done. arg_idx is bumped here at the
+ # top so each handler just `b` back to arg_loop without bookkeeping.
+ # arg_advance (used by value-taking flags) also increments, so a
+ # `-B ADDR` pair correctly advances by two argv slots per dispatch.
la_a0 &arg_idx
ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
la_a1 &saved_argc
ld_t1,a1,0
la_br &arg_done
@@ -169,45 +174,11 @@ DEFINE OFF_labels 0048000009800000
# Dispatch on the argument string. Each compare uses str_eq, which
# checks the trailing NUL of the argv string against the option
- # constant's known length.
-
- # -f / --file
- la_a0 &arg_ptr
- ld_a0,a0,0
- la_a1 &opt_dash_f
- li_a2 %2 %0
- la_br &str_eq
- call
- la_br &arg_is_file
- bnez_a0
- la_a0 &arg_ptr
- ld_a0,a0,0
- la_a1 &opt_long_file
- li_a2 %6 %0
- la_br &str_eq
- call
- la_br &arg_is_file
- bnez_a0
+ # constant's known length. Anything not matching a known flag (and
+ # not starting with '-') is treated as a positional argument: first
+ # is the input file, second is the output file.
- # -o / --output
- la_a0 &arg_ptr
- ld_a0,a0,0
- la_a1 &opt_dash_o
- li_a2 %2 %0
- la_br &str_eq
- call
- la_br &arg_is_output
- bnez_a0
- la_a0 &arg_ptr
- ld_a0,a0,0
- la_a1 &opt_long_output
- li_a2 %8 %0
- la_br &str_eq
- call
- la_br &arg_is_output
- bnez_a0
-
- # -B / --base-address
+ # -B
la_a0 &arg_ptr
ld_a0,a0,0
la_a1 &opt_dash_B
@@ -216,36 +187,28 @@ DEFINE OFF_labels 0048000009800000
call
la_br &arg_is_base
bnez_a0
- la_a0 &arg_ptr
- ld_a0,a0,0
- la_a1 &opt_long_base
- li_a2 %14 %0
- la_br &str_eq
- call
- la_br &arg_is_base
- bnez_a0
- # --big-endian
+ # -E
la_a0 &arg_ptr
ld_a0,a0,0
- la_a1 &opt_long_big
- li_a2 %12 %0
+ la_a1 &opt_dash_E
+ li_a2 %2 %0
la_br &str_eq
call
la_br &arg_is_big
bnez_a0
- # --little-endian
+ # -e
la_a0 &arg_ptr
ld_a0,a0,0
- la_a1 &opt_long_little
- li_a2 %15 %0
+ la_a1 &opt_dash_e
+ li_a2 %2 %0
la_br &str_eq
call
la_br &arg_is_little
bnez_a0
- # -b / --binary
+ # -b
la_a0 &arg_ptr
ld_a0,a0,0
la_a1 &opt_dash_b
@@ -254,64 +217,59 @@ DEFINE OFF_labels 0048000009800000
call
la_br &arg_is_binary
bnez_a0
- la_a0 &arg_ptr
- ld_a0,a0,0
- la_a1 &opt_long_binary
- li_a2 %8 %0
- la_br &str_eq
- call
- la_br &arg_is_binary
- bnez_a0
- # --non-executable
+ # -N
la_a0 &arg_ptr
ld_a0,a0,0
- la_a1 &opt_long_nonexec
- li_a2 %16 %0
+ la_a1 &opt_dash_N
+ li_a2 %2 %0
la_br &str_eq
call
la_br &arg_is_nonexec
bnez_a0
- # -h / --help
+ # Not a known flag. If it begins with '-' (and isn't just "-"),
+ # it's an unknown option. Otherwise it's a positional.
la_a0 &arg_ptr
ld_a0,a0,0
- la_a1 &opt_dash_h
- li_a2 %2 %0
- la_br &str_eq
- call
- la_br &arg_is_help
- bnez_a0
+ lb_a0,a0,0 # a0 = first byte
+ li_t0 %45 %0 # t0 = '-'
+ la_br &arg_is_positional
+ bne_a0,t0
la_a0 &arg_ptr
ld_a0,a0,0
- la_a1 &opt_long_help
- li_a2 %6 %0
- la_br &str_eq
- call
- la_br &arg_is_help
- bnez_a0
-
+ addi_a0,a0,1
+ lb_a0,a0,0 # a0 = second byte
+ la_br &arg_is_positional
+ beqz_a0
la_br &err_unknown_arg
b
-:arg_is_file
- la_br &arg_advance
- call
+:arg_is_positional
+ # If input not yet loaded, this is IN. Else if output not yet set,
+ # this is OUT. Else extra positional → error.
+ la_a0 &input_count
+ ld_t0,a0,0
+ la_br &arg_pos_is_out
+ bnez_t0
la_a0 &arg_ptr
ld_a0,a0,0
la_br &load_input
call
la_br &arg_loop
b
-:arg_is_output
- la_br &arg_advance
- call
+:arg_pos_is_out
+ la_a0 &output_path
+ ld_t0,a0,0
+ la_br &err_unknown_arg
+ bnez_t0
la_a0 &arg_ptr
ld_a0,a0,0
la_a1 &output_path
st_a0,a1,0
la_br &arg_loop
b
+
:arg_is_base
la_br &arg_advance
call
@@ -347,12 +305,6 @@ DEFINE OFF_labels 0048000009800000
st_t0,a0,0
la_br &arg_loop
b
-:arg_is_help
- la_br &print_usage
- call
- li_a0 sys_exit
- li_a1 %0 %0
- syscall
## arg_advance(): i++; if (i >= argc) usage error; arg_ptr = argv[i].
:arg_advance
@@ -381,7 +333,11 @@ DEFINE OFF_labels 0048000009800000
:arg_done
la_a0 &input_count
ld_t0,a0,0
- la_br &err_no_inputs
+ la_br &err_missing_positional
+ beqz_t0
+ la_a0 &output_path
+ ld_t0,a0,0
+ la_br &err_missing_positional
beqz_t0
# ---- Pass 1: collect labels --------------------------------------------
@@ -425,7 +381,8 @@ DEFINE OFF_labels 0048000009800000
li_a0 %0 %0
eret
-## reset_pass_state(): ip=0, output_used=0, scope_depth=0, scope_seq=0.
+## reset_pass_state(): ip=0, output_used=0, scope_depth=0, scope_seq=0,
+## ptrsize=4, ptrsize_used=0.
:reset_pass_state
enter_0
li_t0 %0 %0
@@ -437,6 +394,11 @@ DEFINE OFF_labels 0048000009800000
st_t0,a0,0
la_a0 &scope_seq
st_t0,a0,0
+ la_a0 &ptrsize_used
+ st_t0,a0,0
+ li_t0 %4 %0
+ la_a0 &ptrsize
+ st_t0,a0,0
eret
## run_one_pass(): for i in [0, input_count) call process_file(i).
@@ -493,7 +455,12 @@ DEFINE OFF_labels 0048000009800000
ld_a3,a0,0
st_a3,a2,0
- # Stash path for the syscall.
+ # Stash path for the syscall. a0 was clobbered by &aux_tmp above,
+ # so re-read the path from input_paths[input_count] (still index 0
+ # for our slot since input_count is incremented at li_eof).
+ la_a0 &input_paths_ptr
+ ld_a0,a0,0
+ ld_a0,a0,0
la_a1 &li_path
st_a0,a1,0
@@ -696,10 +663,11 @@ DEFINE OFF_labels 0048000009800000
li_t1 %46 %0
la_br &scan_label_undotted
bne_t0,t1
- # dotted: scope_depth must be > 0
+ # dotted: scope-local only when inside a .scope; otherwise treat as
+ # an ordinary global label.
la_a0 &scope_depth
ld_t0,a0,0
- la_br &err_dotted_outside_scope
+ la_br &scan_label_undotted
beqz_t0
addi_t0,t0,neg1
shli_t2,t0,3
@@ -795,7 +763,7 @@ DEFINE OFF_labels 0048000009800000
la_a0 &name_len
ld_t0,a0,0
li_t1 %8 %0
- la_br &err_unknown_directive
+ la_br &scan_dir_check_7
bne_t0,t1
la_a0 &name_buf_ptr
ld_a0,a0,0
@@ -807,6 +775,22 @@ DEFINE OFF_labels 0048000009800000
bnez_a0
la_br &err_unknown_directive
b
+:scan_dir_check_7
+ la_a0 &name_len
+ ld_t0,a0,0
+ li_t1 %7 %0
+ la_br &err_unknown_directive
+ bne_t0,t1
+ la_a0 &name_buf_ptr
+ ld_a0,a0,0
+ la_a1 &dir_ptrsize
+ li_a2 %7 %0
+ la_br &mem_eq
+ call
+ la_br &scan_dir_ptrsize
+ bnez_a0
+ la_br &err_unknown_directive
+ b
:scan_dir_align
la_br &do_align
@@ -828,6 +812,11 @@ DEFINE OFF_labels 0048000009800000
call
la_br &scan_loop
b
+:scan_dir_ptrsize
+ la_br &do_ptrsize
+ call
+ la_br &scan_loop
+ b
:scan_ref
# a0 holds sigil; advance past it then process_reference.
@@ -1027,13 +1016,16 @@ DEFINE OFF_labels 0048000009800000
bnez_a0
la_a1 &nt_c
ld_a0,a1,0
- li_t0 %45 %0
+ li_t0 %45 %0 # '-'
+ la_br &nt_yes
+ beq_a0,t0
+ li_t0 %62 %0 # '>' (synonym for '-')
la_br &nt_yes
beq_a0,t0
- li_t0 %35 %0
+ li_t0 %35 %0 # '#'
la_br &nt_yes
beq_a0,t0
- li_t0 %59 %0
+ li_t0 %59 %0 # ';'
la_br &nt_yes
beq_a0,t0
li_a0 %0 %0
@@ -1867,9 +1859,12 @@ DEFINE OFF_labels 0048000009800000
li_t1 %46 %0
la_br &ll_undotted
bne_t0,t1
- # Dotted: walk scope_stack innermost-out.
+ # Dotted but only meaningful inside a .scope; otherwise fall through
+ # to the global-name lookup.
la_a0 &scope_depth
ld_t0,a0,0
+ la_br &ll_undotted
+ beqz_t0
addi_t0,t0,neg1
la_a1 &ll_d
st_t0,a1,0
@@ -2026,7 +2021,8 @@ DEFINE OFF_labels 0048000009800000
li_t0 %0 %0
la_a0 &pr_has_other
st_t0,a0,0
- # Optional '-' OTHER.
+ # Optional separator (- or >) followed by OTHER. '>' is a synonym
+ # for '-', accepted for hex2 compatibility.
la_a0 &scan_pos
ld_t0,a0,0
la_a1 &scan_end
@@ -2036,16 +2032,18 @@ DEFINE OFF_labels 0048000009800000
la_br &pr_after_other
blt_t1,t0
lb_a0,t0,0
- li_t1 %45 %0
+ li_t1 %45 %0 # '-'
+ la_br &pr_consume_sep
+ beq_a0,t1
+ li_t1 %62 %0 # '>'
+ la_br &pr_consume_sep
+ beq_a0,t1
la_br &pr_after_other
- la_a3 &aux_tmp
- st_t1,a3,0
- la_a0 &aux_tmp
- st_a3,a0,0
- ld_t0,a0,0
- bne_a0,t0
- addi_t0,t0,1
+ b
+:pr_consume_sep
la_a1 &scan_pos
+ ld_t0,a1,0
+ addi_t0,t0,1
st_t0,a1,0
la_a1 &scan_end
ld_t1,a1,0
@@ -2317,12 +2315,16 @@ DEFINE OFF_labels 0048000009800000
st_t1,a1,0
eret
:ssi_pct
- li_t0 %4 %0
+ la_a0 &ptrsize
+ ld_t0,a0,0
la_a1 &pr_width
st_t0,a1,0
li_t0 %1 %0
la_a1 &pr_is_rel
st_t0,a1,0
+ li_t0 %1 %0
+ la_a1 &ptrsize_used
+ st_t0,a1,0
li_t0 %0 %0
la_a1 &pr_range_check
st_t0,a1,0
@@ -2332,9 +2334,13 @@ DEFINE OFF_labels 0048000009800000
st_t0,a1,0
eret
:ssi_amp
- li_t0 %4 %0
+ la_a0 &ptrsize
+ ld_t0,a0,0
la_a1 &pr_width
st_t0,a1,0
+ li_t0 %1 %0
+ la_a1 &ptrsize_used
+ st_t0,a1,0
li_t0 %0 %0
la_a1 &pr_is_rel
st_t0,a1,0
@@ -2555,6 +2561,46 @@ DEFINE OFF_labels 0048000009800000
st_t0,a0,0
eret
+## do_ptrsize(): .ptrsize N -- N must be 4 or 8. Whole-invocation: the
+## first directive (or first '&'/'%' use) binds the width; later ones
+## must match the bound value.
+:do_ptrsize
+ enter_0
+ la_br &skip_inline_ws
+ call
+ la_br &read_decimal
+ call
+ la_a1 &dp_n
+ st_a0,a1,0
+ # Validate N in {4, 8}
+ li_t0 %4 %0
+ la_br &dp_ok_value
+ beq_a0,t0
+ li_t0 %8 %0
+ la_br &dp_ok_value
+ beq_a0,t0
+ la_br &err_ptrsize_bad
+ b
+:dp_ok_value
+ # If a '&'/'%' has already used ptrsize, N must equal current ptrsize.
+ la_a0 &ptrsize_used
+ ld_t0,a0,0
+ la_br &dp_set
+ beqz_t0
+ la_a0 &ptrsize
+ ld_t0,a0,0
+ la_a1 &dp_n
+ ld_t1,a1,0
+ la_br &err_ptrsize_conflict
+ bne_t0,t1
+ eret
+:dp_set
+ la_a0 &dp_n
+ ld_t0,a0,0
+ la_a1 &ptrsize
+ st_t0,a1,0
+ eret
+
## do_scope_close(): scope_depth--; fatal if not in scope.
:do_scope_close
enter_0
@@ -3193,6 +3239,10 @@ DEFINE OFF_labels 0048000009800000
la_a0 &msg_missing_arg_value
la_br &fatal_msg
b
+:err_missing_positional
+ la_a0 &msg_missing_positional
+ la_br &fatal_msg
+ b
:err_no_inputs
la_a0 &msg_no_inputs
la_br &fatal_msg
@@ -3325,6 +3375,14 @@ DEFINE OFF_labels 0048000009800000
la_a0 &msg_bad_long
la_br &fatal_msg
b
+:err_ptrsize_bad
+ la_a0 &msg_ptrsize_bad
+ la_br &fatal_msg
+ b
+:err_ptrsize_conflict
+ la_a0 &msg_ptrsize_conflict
+ la_br &fatal_msg
+ b
## Sentinel: end of executable text.
:_text_end
@@ -3333,24 +3391,17 @@ DEFINE OFF_labels 0048000009800000
:const_a_out "a.out" '00'
-:opt_dash_f "-f" '00'
-:opt_long_file "--file" '00'
-:opt_dash_o "-o" '00'
-:opt_long_output "--output" '00'
:opt_dash_B "-B" '00'
-:opt_long_base "--base-address" '00'
-:opt_long_big "--big-endian" '00'
-:opt_long_little "--little-endian" '00'
+:opt_dash_E "-E" '00'
+:opt_dash_e "-e" '00'
:opt_dash_b "-b" '00'
-:opt_long_binary "--binary" '00'
-:opt_long_nonexec "--non-executable" '00'
-:opt_dash_h "-h" '00'
-:opt_long_help "--help" '00'
+:opt_dash_N "-N" '00'
:dir_align "align"
:dir_fill "fill"
:dir_scope "scope"
:dir_endscope "endscope"
+:dir_ptrsize "ptrsize"
:str_colon ":"
:str_colon_hex2pp ": hex2pp: "
@@ -3359,16 +3410,12 @@ DEFINE OFF_labels 0048000009800000
"
:str_zero "0"
-:msg_usage "usage: hex2pp (-f|--file) FILE [(-f|--file) FILE ...]
- [-o|--output OUT]
- [-B|--base-address ADDR]
- [--big-endian | --little-endian]
- [-b|--binary]
- [--non-executable]
+:msg_usage "usage: hex2pp [-B ADDR] [-E|-e] [-b] [-N] IN OUT
" '00'
:msg_unknown_arg "unknown argument" '00'
:msg_missing_arg_value "missing value for option" '00'
:msg_no_inputs "no input files" '00'
+:msg_missing_positional "missing IN or OUT positional argument" '00'
:msg_too_many_files "too many input files" '00'
:msg_open_input "failed to open input file" '00'
:msg_read "failed to read input" '00'
@@ -3401,6 +3448,8 @@ DEFINE OFF_labels 0048000009800000
:msg_expected_decimal "expected decimal integer" '00'
:msg_output_overflow "output overflow" '00'
:msg_bad_long "invalid integer argument" '00'
+:msg_ptrsize_bad ".ptrsize: N must be 4 or 8" '00'
+:msg_ptrsize_conflict ".ptrsize conflicts with already-used width" '00'
## --- BSS pointer-init table ------------------------------------------------
:bss_init_tbl
@@ -3457,6 +3506,10 @@ ZERO8
ZERO8
:non_executable
ZERO8
+:ptrsize
+ZERO8
+:ptrsize_used
+ZERO8
:pass
ZERO8
@@ -3644,6 +3697,8 @@ ZERO8
ZERO8
:df_i
ZERO8
+:dp_n
+ZERO8
## str/mem helpers
:se_p
diff --git a/hex2pp/hex2pp.c b/hex2pp/hex2pp.c
@@ -27,7 +27,6 @@
#include <string.h>
#include <sys/stat.h>
-#define MAX_FILES 64
#define MAX_INPUT_BYTES (16 * 1024 * 1024)
#define MAX_OUTPUT_BYTES (128 * 1024 * 1024)
#define MAX_LABELS (1 << 20)
@@ -50,8 +49,7 @@ struct Label {
int scope_id; /* 0 = global */
};
-static struct InFile inputs[MAX_FILES];
-static int input_count;
+static struct InFile input_file;
static char text_buf[MAX_TEXT];
static int text_used;
@@ -67,7 +65,9 @@ static long long base_address;
static int byte_mode = HEX_MODE;
static int big_endian;
static int non_executable;
-static const char *output_path = "a.out";
+static const char *output_path;
+static int ptrsize = 4; /* width of '&' and '%'; settable via .ptrsize */
+static int ptrsize_used; /* a '&'/'%' reference has fixed the width */
static int scope_stack[MAX_SCOPE_DEPTH];
static int scope_depth;
@@ -137,11 +137,11 @@ static long long lookup_label(const char *s, int len)
{
int i;
int d;
- int dotted = (len > 0 && s[0] == '.');
+ int dotted = (len > 0 && s[0] == '.' && scope_depth > 0);
if (dotted) {
- /* Walk the scope stack innermost-out. A dotted name resolves to
- * the nearest enclosing definition, so an inner scope can shadow
- * an outer one with the same local name. */
+ /* Inside a scope, walk the scope stack innermost-out. A dotted
+ * name resolves to the nearest enclosing definition, so an inner
+ * scope can shadow an outer one with the same local name. */
for (d = scope_depth - 1; d >= 0; d--) {
int sid = scope_stack[d];
for (i = 0; i < label_count; i++) {
@@ -337,11 +337,12 @@ static void parse_byte_stream(struct Scanner *s)
static int is_name_terminator(int c)
{
- /* Per spec: names terminated by whitespace or '-'. We also stop at
- * end-of-line comments and EOF for safety. */
+ /* Per spec: names terminated by whitespace, '-', or '>' (the two
+ * label-arithmetic separators). We also stop at end-of-line comments
+ * and EOF for safety. */
if (c < 0) return 1;
if (is_space_any(c)) return 1;
- if (c == '-') return 1;
+ if (c == '-' || c == '>') return 1;
if (c == '#' || c == ';') return 1;
return 0;
}
@@ -395,8 +396,8 @@ static struct SigilInfo sigil_info(int c)
case '@': si.width = 2; si.is_rel = 1; si.lo = -32768; si.hi = 32767; si.range_check = 1; break;
case '$': si.width = 2; si.is_rel = 0; si.lo = 0; si.hi = 65535; si.range_check = 1; break;
case '~': si.width = 3; si.is_rel = 1; si.lo = -(1LL << 23); si.hi = (1LL << 23) - 1; si.range_check = 1; break;
- case '%': si.width = 4; si.is_rel = 1; si.lo = 0; si.hi = 0; si.range_check = 0; break;
- case '&': si.width = 4; si.is_rel = 0; si.lo = 0; si.hi = 0; si.range_check = 0; break;
+ case '%': si.width = ptrsize; si.is_rel = 1; si.lo = 0; si.hi = 0; si.range_check = 0; break;
+ case '&': si.width = ptrsize; si.is_rel = 0; si.lo = 0; si.hi = 0; si.range_check = 0; break;
default: die("internal: bad sigil 0x%02x", c);
}
return si;
@@ -411,14 +412,17 @@ static void process_reference(struct Scanner *s, int sigil)
struct SigilInfo si = sigil_info(sigil);
long long value = 0;
+ if (sigil == '&' || sigil == '%') ptrsize_used = 1;
+
/* Sigil already consumed. Read tight LABEL. */
if (s->pos >= s->len || is_name_terminator((unsigned char)s->buf[s->pos])) {
die("sigil '%c' not followed by label name", sigil);
}
llen = read_name(s, label, sizeof(label));
- /* Optional '-' OTHER (tight, no whitespace). */
- if (s->pos < s->len && s->buf[s->pos] == '-') {
+ /* Optional '-' OTHER or '>' OTHER (tight, no whitespace).
+ * '>' is a synonym for '-', accepted for hex2 compatibility. */
+ if (s->pos < s->len && (s->buf[s->pos] == '-' || s->buf[s->pos] == '>')) {
s->pos++;
if (s->pos >= s->len || is_name_terminator((unsigned char)s->buf[s->pos])) {
die("'-' must be followed by label name");
@@ -538,6 +542,20 @@ static void do_fill(struct Scanner *s)
for (i = 0; i < N; i++) emit_byte(b);
}
+static void do_ptrsize(struct Scanner *s)
+{
+ long long N;
+ skip_inline_ws(s);
+ N = read_decimal(s);
+ if (N != 4 && N != 8) {
+ die(".ptrsize: N must be 4 or 8 (got %lld)", N);
+ }
+ if (ptrsize_used && (int)N != ptrsize) {
+ die(".ptrsize %lld conflicts with already-used width %d", N, ptrsize);
+ }
+ ptrsize = (int)N;
+}
+
static void do_scope_open(void)
{
if (scope_depth >= MAX_SCOPE_DEPTH) die(".scope: depth overflow");
@@ -572,10 +590,9 @@ static void process_file(struct InFile *f)
int scope;
s.pos++;
n = read_name(&s, name, sizeof(name));
- dotted = (n > 0 && name[0] == '.');
- if (dotted && scope_depth == 0) {
- die("dot-prefixed label '%.*s' outside a .scope", n, name);
- }
+ /* A dot-prefixed name is scope-local only inside a .scope;
+ * outside, it is an ordinary global name. */
+ dotted = (n > 0 && name[0] == '.' && scope_depth > 0);
scope = dotted ? scope_stack[scope_depth - 1] : 0;
if (pass == 1) define_label(name, n, scope);
continue;
@@ -590,6 +607,7 @@ static void process_file(struct InFile *f)
else if (n == 4 && memcmp(dn, "fill", 4) == 0) do_fill(&s);
else if (n == 5 && memcmp(dn, "scope", 5) == 0) do_scope_open();
else if (n == 8 && memcmp(dn, "endscope", 8) == 0) do_scope_close();
+ else if (n == 7 && memcmp(dn, "ptrsize", 7) == 0) do_ptrsize(&s);
else die("unknown directive '.%.*s'", n, dn);
continue;
}
@@ -632,10 +650,6 @@ static void load_input(const char *path)
long sz;
char *buf;
- if (input_count >= MAX_FILES) {
- fprintf(stderr, "hex2pp: too many input files\n");
- exit(1);
- }
fp = fopen(path, "rb");
if (fp == NULL) { perror(path); exit(1); }
if (fseek(fp, 0, SEEK_END) != 0) { perror(path); exit(1); }
@@ -652,71 +666,65 @@ static void load_input(const char *path)
buf[sz] = '\0';
fclose(fp);
- inputs[input_count].path = path;
- inputs[input_count].buf = buf;
- inputs[input_count].len = (int)sz;
- input_count++;
+ input_file.path = path;
+ input_file.buf = buf;
+ input_file.len = (int)sz;
}
static void usage(const char *prog)
{
fprintf(stderr,
- "usage: %s (-f|--file) FILE [(-f|--file) FILE ...]\n"
- " [-o|--output OUT]\n"
- " [-B|--base-address ADDR]\n"
- " [--big-endian | --little-endian]\n"
- " [-b|--binary]\n"
- " [--non-executable]\n",
+ "usage: %s [-B ADDR] [-E|-e] [-b] [-N] IN OUT\n",
prog);
}
int main(int argc, char **argv)
{
int i;
+ const char *in_path = NULL;
for (i = 1; i < argc; i++) {
const char *a = argv[i];
- if (strcmp(a, "-f") == 0 || strcmp(a, "--file") == 0) {
- if (++i >= argc) { usage(argv[0]); return 1; }
- load_input(argv[i]);
- } else if (strcmp(a, "-o") == 0 || strcmp(a, "--output") == 0) {
- if (++i >= argc) { usage(argv[0]); return 1; }
- output_path = argv[i];
- } else if (strcmp(a, "-B") == 0 || strcmp(a, "--base-address") == 0) {
+ if (strcmp(a, "-B") == 0) {
if (++i >= argc) { usage(argv[0]); return 1; }
base_address = parse_long(argv[i], "base address");
- } else if (strcmp(a, "--big-endian") == 0) {
+ } else if (strcmp(a, "-E") == 0) {
big_endian = 1;
- } else if (strcmp(a, "--little-endian") == 0) {
+ } else if (strcmp(a, "-e") == 0) {
big_endian = 0;
- } else if (strcmp(a, "-b") == 0 || strcmp(a, "--binary") == 0) {
+ } else if (strcmp(a, "-b") == 0) {
byte_mode = BINARY_MODE;
- } else if (strcmp(a, "--non-executable") == 0) {
+ } else if (strcmp(a, "-N") == 0) {
non_executable = 1;
- } else if (strcmp(a, "-h") == 0 || strcmp(a, "--help") == 0) {
+ } else if (a[0] == '-' && a[1] != '\0') {
+ fprintf(stderr, "hex2pp: unknown argument: %s\n", a);
usage(argv[0]);
- return 0;
+ return 1;
+ } else if (in_path == NULL) {
+ in_path = a;
+ } else if (output_path == NULL) {
+ output_path = a;
} else {
- fprintf(stderr, "hex2pp: unknown argument: %s\n", a);
+ fprintf(stderr, "hex2pp: extra positional argument: %s\n", a);
usage(argv[0]);
return 1;
}
}
- if (input_count == 0) {
- fprintf(stderr, "hex2pp: no input files\n");
+ if (in_path == NULL || output_path == NULL) {
usage(argv[0]);
return 1;
}
+ load_input(in_path);
/* Pass 1: collect labels. */
pass = 1;
ip = 0;
scope_depth = 0;
scope_seq = 0;
- for (i = 0; i < input_count; i++) {
- process_file(&inputs[i]);
- }
+ ptrsize = 4;
+ ptrsize_used = 0;
+ process_file(&input_file);
if (scope_depth != 0) die(".scope not closed at end of input");
/* Pass 2: emit. */
@@ -725,9 +733,9 @@ int main(int argc, char **argv)
output_used = 0;
scope_depth = 0;
scope_seq = 0;
- for (i = 0; i < input_count; i++) {
- process_file(&inputs[i]);
- }
+ ptrsize = 4;
+ ptrsize_used = 0;
+ process_file(&input_file);
if (scope_depth != 0) die(".scope not closed at end of input");
/* Write output. */
diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh
@@ -79,18 +79,10 @@ fail() {
## --- m1pp suite ---------------------------------------------------------
##
-## Two-step check:
-## 1. Run M1pp against tests/M1pp/<name>.M1pp; diff its text output
-## against tests/M1pp/<name>.expected (parity with the C oracle).
-## 2. Pipe that output through hex2pp as an assemble smoke test. The
-## new M1pp emits bare hex consumable directly by hex2pp; this
-## catches cases where M1pp produces parity-correct text that
-## hex2pp can't actually parse (e.g. stray whitespace bugs,
-## malformed sigil expressions).
-##
-## Both steps must pass for the fixture to PASS. The smoke-test step
-## uses hex2pp's --non-executable mode and writes to a throwaway path
-## — we only care about hex2pp's exit status, not the bytes.
+## Single check: run M1pp against tests/M1pp/<name>.M1pp, diff its text
+## output against tests/M1pp/<name>.expected. The suite tests macro
+## expansion only — assembling the result through hex2pp is the job of
+## the p1 / cc-* suites, where the input is a complete program.
run_m1pp_suite() {
if [ -z "$NAMES" ]; then
NAMES=$(discover tests/M1pp M1pp)
@@ -126,20 +118,6 @@ run_m1pp_suite() {
continue
fi
- # Smoke test: feed M1pp's output through hex2pp. We don't run
- # the resulting bytes (the fixture isn't a complete program),
- # only verify hex2pp accepts the syntax. --non-executable
- # skips the chmod on the throwaway output.
- binfile=build/$ARCH/tests/M1pp/$name.bin
- hex2pp_log=build/$ARCH/tests/M1pp/$name.hex2pp.log
- rm -f "$binfile" "$hex2pp_log"
- if ! "./build/$ARCH/hex2pp/hex2pp" --non-executable \
- -f "$outfile" -o "$binfile" \
- >"$hex2pp_log" 2>&1; then
- fail "$label" "hex2pp smoke-test failed:" "$hex2pp_log"
- continue
- fi
-
report "$label" PASS
done
}