commit 1857b6a6348084d906fc93dfec79329ffd4cd439
parent 38c3a3448502859a26a4e6f2d6a481bd6ece8504
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 14:09:10 -0700
hex2pp.P1, update M1pp.P1
Diffstat:
8 files changed, 4242 insertions(+), 539 deletions(-)
diff --git a/M1pp/M1pp.P1 b/M1pp/M1pp.P1
@@ -13,9 +13,15 @@
## stream and walks it token-by-token, dispatching to
## define_macro at line-start %macro, emit_newline /
## emit_token for pass-through, expand_builtin_call for
-## !@%$ and %select, and expand_call for user macros.
-## Macro expansions and %select push fresh streams onto
-## streams[]; popping rewinds the expansion pool.
+## !@%$, %select, %str, %bytes, %local, and expand_call
+## for user macros. Macro expansions and %select push
+## fresh streams onto streams[]; popping rewinds the
+## expansion pool.
+##
+## Output is consumed directly by hex2pp -- there is no intermediate M0/hex2
+## stage. Lexical scoping for control-flow labels is delegated to hex2pp's
+## nestable .scope / .endscope; M1pp itself only handles per-expansion
+## macro hygiene labels (:@name / &@name).
## define_macro Parse %macro header+body; record in macros[] +
## macro_body_tokens[]; consume through the %endm line
## without emitting output.
@@ -122,11 +128,6 @@ DEFINE M1PP_EXPR_FRAMES_CAP 0009000000000000
## Common cap used by macro params, call args, and expression args.
DEFINE M1PP_MAX_PARAMS 1000000000000000
-## Scope-stack cap. 32 nested scopes max; each slot is a 16-byte TextSpan
-## (ptr + len) pointing into stable text (input_buf or text_buf), so
-## scope_stack is 32 × 16 = 512 bytes.
-DEFINE M1PP_MAX_SCOPE_DEPTH 2000000000000000
-
## ExprOp codes (indexed by apply_expr_op).
DEFINE EXPR_ADD 0000000000000000
DEFINE EXPR_SUB 0100000000000000
@@ -164,7 +165,6 @@ DEFINE EXPR_INVALID 1200000000000000
## expr_frames 2304 B
DEFINE OFF_paste_scratch 0000000000000000
DEFINE OFF_local_label_scratch 0001000000000000
-DEFINE OFF_scope_stack 8001000000000000
DEFINE OFF_df_name_scratch 8003000000000000
DEFINE OFF_ebc_str_scratch 8004000000000000
DEFINE OFF_arg_starts 8005000000000000
@@ -866,51 +866,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &emit_token_skip
beq_t0,t1
- # Scope rewrite: TOK_WORD whose text begins with "::" (len>=3) becomes
- # a scoped definition, "&::" (len>=4) a scoped reference. Dispatch to
- # emit_scope_rewrite with a1=skip, a2=sigil.
- ld_a1,a0,0
- li_a2 TOK_WORD
- la_br &emit_token_after_scope
- bne_a1,a2
- ld_a2,a0,16
- li_a3 %3 %0
- la_br &emit_token_after_scope
- blt_a2,a3
- ld_a3,a0,8
- lb_t0,a3,0
- li_t1 %58 %0
- la_br &emit_token_check_amp
- bne_t0,t1
- lb_t0,a3,1
- li_t1 %58 %0
- la_br &emit_token_after_scope
- bne_t0,t1
- li_a1 %2 %0
- li_a2 %58 %0
- la_br &emit_scope_rewrite
- b
-:emit_token_check_amp
- li_t1 %38 %0
- la_br &emit_token_after_scope
- bne_t0,t1
- ld_a2,a0,16
- li_t2 %4 %0
- la_br &emit_token_after_scope
- blt_a2,t2
- lb_t0,a3,1
- li_t1 %58 %0
- la_br &emit_token_after_scope
- bne_t0,t1
- lb_t0,a3,2
- la_br &emit_token_after_scope
- bne_t0,t1
- li_a1 %3 %0
- li_a2 %38 %0
- la_br &emit_scope_rewrite
- b
-
-:emit_token_after_scope
# if (output_need_space) emit ' ' (skip the space for the first token on a line)
la_a1 &output_need_space
ld_t0,a1,0
@@ -970,170 +925,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
:emit_token_skip
ret
-## emit_scope_rewrite: branch target from emit_token for tokens whose text
-## starts with "::" (scoped definition) or "&::" (scoped reference).
-## Writes sigil + scope1 + "__" + ... + scopeN + "__" + name directly to
-## output_buf; with an empty scope stack the middle collapses so output is
-## just sigil + name (pass-through). Not a callable function: reached by `b`,
-## shares emit_token's leaf return address, exits via `ret`.
-##
-## Register inputs:
-## a0 = tok_ptr
-## a1 = skip (2 for "::", 3 for "&::")
-## a2 = sigil (':' = 58 for definitions, '&' = 38 for references)
-:emit_scope_rewrite
- # name_len = tok->text_len - skip; fail if zero.
- ld_a3,a0,16
- sub_a3,a3,a1
- la_br &err_bad_scope_label
- beqz_a3
-
- # Spill inputs — the byte-copy loops below reuse a0..a3/t0..t2 freely.
- la_t0 &sr_tok_ptr
- st_a0,t0,0
- la_t0 &sr_skip
- st_a1,t0,0
- la_t0 &sr_sigil
- st_a2,t0,0
- la_t0 &sr_name_len
- st_a3,t0,0
-
- # Emit leading ' ' if output_need_space.
- la_a0 &output_need_space
- ld_t0,a0,0
- la_br &sr_post_space
- beqz_t0
- la_a1 &output_used
- ld_t0,a1,0
- li_t1 M1PP_OUTPUT_CAP
- la_br &err_output_overflow
- beq_t0,t1
- la_a2 &output_buf_ptr
- ld_a2,a2,0
- add_a2,a2,t0
- li_t1 %32 %0
- sb_t1,a2,0
- addi_t0,t0,1
- st_t0,a1,0
-:sr_post_space
-
- # Emit the sigil byte.
- la_a0 &output_used
- ld_t0,a0,0
- li_t1 M1PP_OUTPUT_CAP
- la_br &err_output_overflow
- beq_t0,t1
- la_a1 &output_buf_ptr
- ld_a1,a1,0
- add_a1,a1,t0
- la_a2 &sr_sigil
- ld_a3,a2,0
- sb_a3,a1,0
- addi_t0,t0,1
- st_t0,a0,0
-
- # Emit each scope frame's bytes followed by "__".
- li_t0 %0 %0
-:sr_scope_outer
- la_a0 &scope_depth
- ld_a1,a0,0
- la_br &sr_tail_start
- beq_t0,a1
-
- la_a0 &scope_stack_ptr
- ld_a0,a0,0
- li_a2 %16 %0
- mul_a2,a2,t0
- add_a0,a0,a2
- ld_a1,a0,0
- ld_a2,a0,8
- li_a3 %0 %0
-:sr_scope_inner
- la_br &sr_scope_sep
- beq_a3,a2
- la_t1 &output_used
- ld_t2,t1,0
- li_a0 M1PP_OUTPUT_CAP
- la_br &err_output_overflow
- beq_t2,a0
- la_a0 &output_buf_ptr
- ld_a0,a0,0
- add_a0,a0,t2
- add_t2,a1,a3
- lb_t2,t2,0
- sb_t2,a0,0
- la_t1 &output_used
- ld_t2,t1,0
- addi_t2,t2,1
- st_t2,t1,0
- addi_a3,a3,1
- la_br &sr_scope_inner
- b
-:sr_scope_sep
- la_a0 &output_used
- ld_t1,a0,0
- li_t2 M1PP_OUTPUT_CAP
- la_br &err_output_overflow
- beq_t1,t2
- la_a1 &output_buf_ptr
- ld_a1,a1,0
- add_a1,a1,t1
- li_a2 %95 %0
- sb_a2,a1,0
- addi_t1,t1,1
- st_t1,a0,0
- la_a0 &output_used
- ld_t1,a0,0
- li_t2 M1PP_OUTPUT_CAP
- la_br &err_output_overflow
- beq_t1,t2
- la_a1 &output_buf_ptr
- ld_a1,a1,0
- add_a1,a1,t1
- li_a2 %95 %0
- sb_a2,a1,0
- addi_t1,t1,1
- st_t1,a0,0
- addi_t0,t0,1
- la_br &sr_scope_outer
- b
-
-:sr_tail_start
- la_a0 &sr_tok_ptr
- ld_a1,a0,0
- ld_a2,a1,8
- la_a0 &sr_skip
- ld_a3,a0,0
- add_a1,a2,a3
- la_a0 &sr_name_len
- ld_a2,a0,0
- li_a3 %0 %0
-:sr_tail_loop
- la_br &sr_tail_done
- beq_a3,a2
- la_t1 &output_used
- ld_t2,t1,0
- li_a0 M1PP_OUTPUT_CAP
- la_br &err_output_overflow
- beq_t2,a0
- la_a0 &output_buf_ptr
- ld_a0,a0,0
- add_a0,a0,t2
- add_t2,a1,a3
- lb_t2,t2,0
- sb_t2,a0,0
- la_t1 &output_used
- ld_t2,t1,0
- addi_t2,t2,1
- st_t2,t1,0
- addi_a3,a3,1
- la_br &sr_tail_loop
- b
-:sr_tail_done
- la_a0 &output_need_space
- li_a1 %1 %0
- st_a1,a0,0
- ret
## --- Main processor ----------------------------------------------------------
## Stream-driven loop. Pushes source_tokens as the initial stream, then drives
@@ -1244,7 +1035,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
li_a2 %5 %0
la_br &tok_eq_const
call
- la_br &proc_check_scope
+ la_br &proc_check_frame
beqz_a0
ld_a0,sp,0
ld_a1,sp,8
@@ -1258,46 +1049,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &proc_restore_and_loop
b
-## ---- tok eq "%scope" ----
-:proc_check_scope
- ld_a0,sp,8
- la_a1 &const_scope
- li_a2 %6 %0
- la_br &tok_eq_const
- call
- la_br &proc_check_endscope
- beqz_a0
- ld_a0,sp,0
- ld_a1,sp,8
- la_br &proc_save_pos_and_ls
- call
- ld_a0,sp,0
- ld_a0,a0,8
- la_br &push_scope
- call
- la_br &proc_restore_and_loop
- b
-
-## ---- tok eq "%endscope" ----
-:proc_check_endscope
- ld_a0,sp,8
- la_a1 &const_endscope
- li_a2 %9 %0
- la_br &tok_eq_const
- call
- la_br &proc_check_frame
- beqz_a0
- ld_a0,sp,0
- ld_a1,sp,8
- la_br &proc_save_pos_and_ls
- call
- ld_a0,sp,0
- ld_a0,a0,8
- la_br &pop_scope
- call
- la_br &proc_restore_and_loop
- b
-
## ---- tok eq "%frame" ----
:proc_check_frame
ld_a0,sp,8
@@ -1383,7 +1134,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &proc_check_macro
beqz_a1
- # try the six builtin names: ! @ % $ %select %str
+ # try the eight builtin names: ! @ % $ %select %str %bytes %local
mov_a0,t0
la_a1 &const_bang
li_a2 %1 %0
@@ -1433,6 +1184,13 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
call
la_br &proc_do_builtin
bnez_a0
+ ld_a0,sp,8
+ la_a1 &const_bytes
+ li_a2 %6 %0
+ la_br &tok_eq_const
+ call
+ la_br &proc_do_builtin
+ bnez_a0
la_br &proc_check_macro
b
@@ -1512,11 +1270,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
b
:proc_done
- # Every %scope must be matched by an %endscope before EOF.
- la_a0 &scope_depth
- ld_t0,a0,0
- la_br &err_scope_not_closed
- bnez_t0
# Every %frame must be matched by an %endframe before EOF.
la_a0 &frame_active
ld_t0,a0,0
@@ -1548,117 +1301,11 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &proc_loop
b
-## --- %scope / %endscope handlers --------------------------------------------
-## Called at proc_pos == the `%scope` / `%endscope` word on a line-start.
-## Input: a0 = stream end (pointer one past last token in the current stream).
-## Output: proc_pos advanced past the trailing newline (or stream end).
-
-## push_scope(a0 = stream_end): consume `%scope NAME` (header self-terminates
-## at NAME). Name must be a single WORD token. Newlines between %scope and
-## NAME, and between NAME and the body, are insignificant.
-:push_scope
- enter_0
-
- # proc_pos += 24 (skip past the `%scope` token).
- la_t0 &proc_pos
- ld_t1,t0,0
- addi_t1,t1,32
- st_t1,t0,0
-
- # Skip newlines between `%scope` and NAME.
- la_a1 &psc_stream_end
- st_a0,a1,0 # save stream_end across the call
- la_br &proc_skip_newlines
- call
- la_a1 &psc_stream_end
- ld_a0,a1,0
- la_t0 &proc_pos
- ld_t1,t0,0
-
- # Require a WORD name token within the stream.
- la_br &err_bad_scope_header
- beq_t1,a0
- ld_t2,t1,0
- la_br &err_bad_scope_header
- bnez_t2
-
- # scope_depth < MAX_SCOPE_DEPTH?
- la_a1 &scope_depth
- ld_a2,a1,0
- li_a3 M1PP_MAX_SCOPE_DEPTH
- la_br &err_scope_depth_overflow
- beq_a2,a3
-
- # scope_stack[scope_depth] = (name.text_ptr, name.text_len)
- la_a3 &scope_stack_ptr
- ld_a3,a3,0
- li_t0 %16 %0
- mul_t0,t0,a2
- add_a3,a3,t0
- ld_t0,t1,8
- st_t0,a3,0
- ld_t0,t1,16
- st_t0,a3,8
-
- # scope_depth++
- addi_a2,a2,1
- st_a2,a1,0
-
- # proc_pos += 24 (past the name).
- la_t0 &proc_pos
- ld_t1,t0,0
- addi_t1,t1,32
- st_t1,t0,0
-
- # Newlines between `%scope NAME` and the body content are insignificant.
- la_br &proc_skip_newlines
- call
- eret
-
-## pop_scope(a0 = stream_end): consume `%endscope` followed by a strict
-## TOK_NEWLINE — extra tokens on the line are now an error.
-:pop_scope
- enter_0
-
- # scope_depth > 0?
- la_a1 &scope_depth
- ld_a2,a1,0
- la_br &err_scope_underflow
- beqz_a2
- addi_a2,a2,neg1
- st_a2,a1,0
-
- # proc_pos += 24 (past the `%endscope` token).
- la_t0 &proc_pos
- ld_t1,t0,0
- addi_t1,t1,32
- st_t1,t0,0
-
- # Strict: the token immediately after `%endscope` must be TOK_NEWLINE.
- la_br &err_bad_scope_header
- beq_t1,a0
- ld_t2,t1,0
- li_t0 TOK_NEWLINE
- la_br &err_bad_scope_header
- bne_t2,t0
- # Consume the trailing newline only when %endscope sat at line-start;
- # mid-line %endscope leaves the newline so it can be emitted.
- la_t0 &proc_line_start
- ld_a1,t0,0
- la_br &pop_done
- beqz_a1
- addi_t1,t1,32
- la_t0 &proc_pos
- st_t1,t0,0
-:pop_done
- eret
-
## --- %frame / %endframe handlers --------------------------------------------
-## Single-slot frame state, separate from the %scope stack. push_frame(a0=
-## stream_end) parses `%frame NAME`, stashes name's TextSpan in
-## current_frame_ptr/_len, and sets frame_active = 1. pop_frame(a0=
-## stream_end) clears frame_active. Frames do not nest — a second push
-## without an intervening pop is fatal.
+## Single-slot frame state used by %local. push_frame(a0=stream_end) parses
+## `%frame NAME`, stashes name's TextSpan in current_frame_ptr/_len, and
+## sets frame_active = 1. pop_frame(a0=stream_end) clears frame_active.
+## Frames do not nest — a second push without an intervening pop is fatal.
:push_frame
enter_0
@@ -2265,7 +1912,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
# Strict: the closing '}' must be immediately followed by TOK_NEWLINE.
# Consume that newline only when the directive started at line-start,
- # mirroring %endm / %endscope.
+ # mirroring %endm / %endframe.
la_a0 &proc_pos
ld_t0,a0,0
la_a1 &source_end
@@ -4900,7 +4547,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
## skip_expr_newlines(a0=pos, a1=end) -> a0 = new pos. Leaf.
## Advance pos past consecutive TOK_NEWLINE tokens so expressions may span
## lines. Also used by directive header parsers to make whitespace
-## (newlines specifically) insignificant inside %macro/%struct/%scope
+## (newlines specifically) insignificant inside %macro/%struct/%frame
## headers and around `##` paste operands.
:skip_expr_newlines
:sen_loop
@@ -5474,10 +5121,10 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
## emit_hex_value(a0=value_u64, a1=byte_count) -> void (fatal on overflow)
## byte_count must be 1, 2, 4, or 8. Serialize value into (2 * byte_count)
## uppercase hex chars, little-endian byte order (byte i at char indices
-## 2i, 2i+1) WRAPPED IN SINGLE QUOTES so the downstream M0 assembler
-## treats it as a hex-byte string literal rather than parsing it as a
-## decimal numeric token. Total emitted text length = 2 + 2 * byte_count;
-## emitted as a TOK_STRING via append_text + emit_token.
+## 2i, 2i+1) as bare hex digits. hex2pp's byte-stream parser groups every
+## two hex digits into one byte; no quoting or separators are needed.
+## Total emitted text length = 2 * byte_count; emitted as a TOK_WORD via
+## append_text + emit_token.
:emit_hex_value
enter_0
@@ -5487,11 +5134,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_a2 &ehv_bytes
st_a1,a2,0
- # scratch[0] = '\''
- la_a1 &ehv_scratch
- li_a2 %39 %0
- sb_a2,a1,0
-
# i = 0
li_t0 %0 %0
:emit_hex_value_loop
@@ -5513,17 +5155,16 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
# low = byte & 0x0F
andi_a3,a3,15
- # scratch[1 + 2*i] = hex_chars[high]
+ # scratch[2*i] = hex_chars[high]
la_a1 &hex_chars
add_a1,a1,a2
lb_a2,a1,0
la_a1 &ehv_scratch
shli_a3,t0,1
add_a1,a1,a3
- addi_a1,a1,1
sb_a2,a1,0
- # scratch[1 + 2*i+1] = hex_chars[low] (reload low from byte & 0x0F)
+ # scratch[2*i+1] = hex_chars[low] (reload low from byte & 0x0F)
la_a1 &ehv_value
ld_t2,a1,0
andi_a3,t2,255
@@ -5534,7 +5175,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_a1 &ehv_scratch
shli_a3,t0,1
add_a1,a1,a3
- addi_a1,a1,2
+ addi_a1,a1,1
sb_a2,a1,0
# ehv_value >>= 8
@@ -5549,35 +5190,23 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
b
:emit_hex_value_emit
- # scratch[1 + 2*bytes] = '\'' (closing quote)
- la_a0 &ehv_scratch
- la_a1 &ehv_bytes
- ld_a1,a1,0
- shli_a1,a1,1
- add_a0,a0,a1
- addi_a0,a0,1
- li_a2 %39 %0
- sb_a2,a0,0
-
- # text_ptr = append_text(&ehv_scratch, 2 + 2 * ehv_bytes)
+ # text_ptr = append_text(&ehv_scratch, 2 * ehv_bytes)
la_a0 &ehv_scratch
la_a1 &ehv_bytes
ld_a1,a1,0
shli_a1,a1,1
- addi_a1,a1,2
la_br &append_text
call
- # ehv_token.kind = TOK_STRING; ehv_token.text_ptr = text_ptr;
- # ehv_token.text_len = 2 + 2 * ehv_bytes; ehv_token.tight = 0.
+ # ehv_token.kind = TOK_WORD; ehv_token.text_ptr = text_ptr;
+ # ehv_token.text_len = 2 * ehv_bytes; ehv_token.tight = 0.
la_a2 &ehv_token
- li_a3 TOK_STRING
+ li_a3 TOK_WORD
st_a3,a2,0
st_a0,a2,8
la_a1 &ehv_bytes
ld_a1,a1,0
shli_a1,a1,1
- addi_a1,a1,2
st_a1,a2,16
li_a1 %0 %0
st_a1,a2,24
@@ -5590,7 +5219,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
eret
## ============================================================================
-## --- Builtin dispatcher ( ! @ % $ %select ) ---------------------------------
+## --- Builtin dispatcher ( ! @ % $ %select %str %bytes %local ) -------------
## ============================================================================
## expand_builtin_call(a0=stream_ptr, a1=builtin_tok) -> void (fatal on bad)
@@ -5955,6 +5584,17 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_br &ebc_local
bnez_a0
+ # if tok_eq_const(tok, "%bytes", 6) -> bytes path
+ la_a0 &ebc_stream
+ ld_a0,a0,0
+ ld_a0,a0,16
+ la_a1 &const_bytes
+ li_a2 %6 %0
+ la_br &tok_eq_const
+ call
+ la_br &ebc_bytes_handler
+ bnez_a0
+
# else: fatal
la_br &err_bad_macro_header
b
@@ -6277,6 +5917,268 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
eret
+## %bytes("STR"): emit the raw bytes of a "..."-quoted string as one
+## contiguous run of hex bytes. Recognised escapes inside the string:
+## \n -> 0x0A \t -> 0x09 \r -> 0x0D \0 -> 0x00
+## \\ -> 0x5C \" -> 0x22 \xNN -> byte NN (two hex digits)
+## Any other backslash escape is fatal. No NUL terminator is appended;
+## the caller writes one explicitly (e.g. "00") if needed. An empty
+## string produces no output.
+##
+## Implementation strategy: each byte is emitted independently via
+## emit_hex_value(byte, 1). hex2pp's parse_byte_stream accumulates
+## adjacent hex digits across whitespace, so "68 69 0A" reads as the
+## same three bytes as "68690A" — there is no need to coalesce them
+## into a single output WORD here.
+##
+## Validation: arg_count == 1, arg span is exactly one token, kind is
+## TOK_STRING, len >= 2, ptr[0] == '"'. For \xNN, the next two source
+## bytes must both be valid hex digits (0-9, a-f, A-F).
+:ebc_bytes_handler
+ enter_0
+
+ # require arg_count == 1
+ la_a0 &arg_count
+ ld_t0,a0,0
+ li_t1 %1 %0
+ la_br &err_bad_macro_header
+ bne_t0,t1
+
+ # require arg span is exactly one token (32 bytes)
+ la_a0 &arg_starts_ptr
+ ld_a0,a0,0
+ ld_t0,a0,0
+ la_a1 &arg_ends_ptr
+ ld_a1,a1,0
+ ld_t1,a1,0
+ sub_t2,t1,t0
+ li_a2 %32 %0
+ la_br &err_bad_macro_header
+ bne_t2,a2
+
+ # require arg_tok->kind == TOK_STRING
+ ld_a3,t0,0
+ li_a2 TOK_STRING
+ la_br &err_bad_macro_header
+ bne_a3,a2
+
+ # require arg_tok->text.len >= 2
+ ld_a1,t0,16
+ li_a2 %2 %0
+ la_br &err_bad_macro_header
+ blt_a1,a2
+
+ # require arg_tok->text.ptr[0] == '"'.
+ # Save text_ptr to ebc_b_src_ptr (will += 1 below) and text_len to
+ # ebc_b_src_len (-= 2 below). Reading the first byte uses lb_a3,a3,0
+ # which clobbers a3, so do the save first.
+ ld_a3,t0,8
+ la_a0 &ebc_b_src_ptr
+ st_a3,a0,0
+ la_a0 &ebc_b_src_len
+ st_a1,a0,0
+ lb_a3,a3,0
+ li_a2 %34 %0
+ la_br &err_bad_macro_header
+ bne_a3,a2
+
+ # src_ptr += 1; src_len -= 2 (strip surrounding quotes)
+ la_a0 &ebc_b_src_ptr
+ ld_a3,a0,0
+ addi_a3,a3,1
+ st_a3,a0,0
+ la_a0 &ebc_b_src_len
+ ld_a1,a0,0
+ addi_a1,a1,neg2
+ st_a1,a0,0
+
+ # ebc_b_src_i = 0
+ li_a0 %0 %0
+ la_a1 &ebc_b_src_i
+ st_a0,a1,0
+
+:ebc_b_loop
+ # if (src_i == src_len) done
+ la_a0 &ebc_b_src_i
+ ld_t0,a0,0
+ la_a1 &ebc_b_src_len
+ ld_t1,a1,0
+ la_br &ebc_b_done
+ beq_t0,t1
+
+ # c = src_ptr[src_i]; src_i++
+ # P1 lacks lb_a3,a0,0 — bounce through a1 (mov_a1,a0; lb_a3,a1,0).
+ la_a0 &ebc_b_src_ptr
+ ld_a0,a0,0
+ add_a0,a0,t0
+ mov_a1,a0
+ lb_a3,a1,0
+ addi_t0,t0,1
+ la_a1 &ebc_b_src_i
+ st_t0,a1,0
+
+ # if (c == '\\') -> escape path
+ li_a2 %92 %0
+ la_br &ebc_b_escape
+ beq_a3,a2
+
+ # literal byte: emit_hex_value(c, 1) and reloop
+ mov_a0,a3
+ li_a1 %1 %0
+ la_br &emit_hex_value
+ call
+ la_br &ebc_b_loop
+ b
+
+:ebc_b_escape
+ # Read the escape character; require at least one byte left.
+ la_a0 &ebc_b_src_i
+ ld_t0,a0,0
+ la_a1 &ebc_b_src_len
+ ld_t1,a1,0
+ la_br &err_bad_escape
+ beq_t0,t1
+ la_a0 &ebc_b_src_ptr
+ ld_a0,a0,0
+ add_a0,a0,t0
+ mov_a1,a0
+ lb_a3,a1,0 # a3 = e
+ addi_t0,t0,1
+ la_a1 &ebc_b_src_i
+ st_t0,a1,0
+
+ # Single-char escapes: dispatch via beq chain (matches the existing
+ # proc_check_<directive> pattern). Each branch loads the resulting
+ # byte into a3 and falls through to ebc_b_emit_one.
+ li_a2 %110 %0 # 'n'
+ la_br &ebc_b_esc_n
+ beq_a3,a2
+ li_a2 %116 %0 # 't'
+ la_br &ebc_b_esc_t
+ beq_a3,a2
+ li_a2 %114 %0 # 'r'
+ la_br &ebc_b_esc_r
+ beq_a3,a2
+ li_a2 %48 %0 # '0'
+ la_br &ebc_b_esc_zero
+ beq_a3,a2
+ li_a2 %92 %0 # '\\'
+ la_br &ebc_b_esc_bs
+ beq_a3,a2
+ li_a2 %34 %0 # '"'
+ la_br &ebc_b_esc_dq
+ beq_a3,a2
+ li_a2 %120 %0 # 'x'
+ la_br &ebc_b_esc_hex
+ beq_a3,a2
+ la_br &err_bad_escape
+ b
+
+:ebc_b_esc_n
+ li_a3 %10 %0 # 0x0A
+ la_br &ebc_b_emit_one
+ b
+:ebc_b_esc_t
+ li_a3 %9 %0 # 0x09
+ la_br &ebc_b_emit_one
+ b
+:ebc_b_esc_r
+ li_a3 %13 %0 # 0x0D
+ la_br &ebc_b_emit_one
+ b
+:ebc_b_esc_zero
+ li_a3 %0 %0 # 0x00
+ la_br &ebc_b_emit_one
+ b
+:ebc_b_esc_bs
+ li_a3 %92 %0 # 0x5C
+ la_br &ebc_b_emit_one
+ b
+:ebc_b_esc_dq
+ li_a3 %34 %0 # 0x22
+ la_br &ebc_b_emit_one
+ b
+
+:ebc_b_emit_one
+ # Common tail for single-char escapes: emit_hex_value(a3, 1), reloop.
+ mov_a0,a3
+ li_a1 %1 %0
+ la_br &emit_hex_value
+ call
+ la_br &ebc_b_loop
+ b
+
+:ebc_b_esc_hex
+ # \xNN: require two hex chars at src[src_i], src[src_i+1].
+ la_a0 &ebc_b_src_i
+ ld_t0,a0,0
+ la_a1 &ebc_b_src_len
+ ld_t1,a1,0
+ sub_t2,t1,t0 # remaining = src_len - src_i
+ li_a3 %2 %0
+ la_br &err_bad_escape
+ blt_t2,a3
+
+ # hi char: src[src_i]; decode via hex_digit_table[c]; fail if 0xFF.
+ la_a0 &ebc_b_src_ptr
+ ld_a0,a0,0
+ add_a0,a0,t0
+ lb_a0,a0,0 # a0 = hi char
+ la_a1 &hex_digit_table
+ add_a1,a1,a0
+ lb_a2,a1,0 # a2 = hi digit (or 0xFF)
+ li_a3 %255 %0
+ la_br &err_bad_escape
+ beq_a2,a3
+ # Stash hi digit into ebc_b_hex_hi for the (hi << 4) | lo combine
+ # below — the lo-digit lookup clobbers a2.
+ la_a0 &ebc_b_hex_hi
+ st_a2,a0,0
+
+ # advance past hi char
+ la_a0 &ebc_b_src_i
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+
+ # lo char: src[src_i]; decode via hex_digit_table[c]; fail if 0xFF.
+ la_a0 &ebc_b_src_ptr
+ ld_a0,a0,0
+ add_a0,a0,t0
+ lb_a0,a0,0 # a0 = lo char
+ la_a1 &hex_digit_table
+ add_a1,a1,a0
+ lb_a2,a1,0 # a2 = lo digit (or 0xFF)
+ li_a3 %255 %0
+ la_br &err_bad_escape
+ beq_a2,a3
+
+ # advance past lo char
+ la_a0 &ebc_b_src_i
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+
+ # byte = (hi << 4) | lo. shli_a3,t0,4 puts hi<<4 in a3, then or.
+ la_a0 &ebc_b_hex_hi
+ ld_t0,a0,0
+ shli_a3,t0,4
+ or_a3,a3,a2
+
+ la_br &ebc_b_emit_one
+ b
+
+:ebc_b_done
+ # stream->pos = ebc_call_end_pos; stream->line_start = 0
+ la_a0 &ebc_stream
+ ld_a0,a0,0
+ la_a1 &ebc_call_end_pos
+ ld_t0,a1,0
+ st_t0,a0,16
+ li_t1 %0 %0
+ st_t1,a0,24
+ eret
+
## %local(NAME): emit-time variant. expand_builtin_call has already
## parse_args'd the call (so arg_starts/arg_ends/arg_count/call_end_pos
## are set), but expand_local_into_pool re-parses internally so it can
@@ -6384,24 +6286,8 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
la_a0 &msg_unterminated_directive
la_br &fatal
b
-:err_bad_scope_header
- la_a0 &msg_bad_scope_header
- la_br &fatal
- b
-:err_scope_depth_overflow
- la_a0 &msg_scope_depth_overflow
- la_br &fatal
- b
-:err_scope_underflow
- la_a0 &msg_scope_underflow
- la_br &fatal
- b
-:err_scope_not_closed
- la_a0 &msg_scope_not_closed
- la_br &fatal
- b
-:err_bad_scope_label
- la_a0 &msg_bad_scope_label
+:err_bad_escape
+ la_a0 &msg_bad_escape
la_br &fatal
b
:err_bad_frame_header
@@ -6506,11 +6392,10 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
:const_enum "%enum"
:const_size "SIZE"
:const_count "COUNT"
-:const_scope "%scope"
-:const_endscope "%endscope"
:const_frame "%frame"
:const_endframe "%endframe"
:const_local "%local"
+:const_bytes "%bytes"
## Suffix appended to the frame name when looking up <frame>_FRAME.<field>.
:const_frame_suffix "_FRAME."
@@ -6539,6 +6424,35 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
## Nibble-to-hex lookup table for emit_hex_value.
:hex_chars "0123456789ABCDEF"
+## 256-byte hex-digit lookup table for %bytes(\xNN). Indexed by source
+## byte; value is the digit (0..15) for '0'..'9'/'a'..'f'/'A'..'F', or
+## 0xFF for any other input. The escape decoder reads two source bytes
+## and combines (hi << 4) | lo into the emitted byte; either lookup
+## returning 0xFF triggers err_bad_escape.
+:hex_digit_table
+## 0x00-0x1F: all invalid
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+## 0x20-0x2F: invalid
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+## 0x30-0x39 = '0'..'9' -> 0..9; 0x3A-0x3F invalid
+'00010203040506070809FFFFFFFFFFFF'
+## 0x40 invalid; 0x41-0x46 = 'A'..'F' -> 10..15; 0x47-0x5F invalid
+'FF0A0B0C0D0E0FFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+## 0x60 invalid; 0x61-0x66 = 'a'..'f' -> 10..15; 0x67-0x7F invalid
+'FF0A0B0C0D0E0FFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+## 0x80-0xFF: all invalid
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'
+
## 256-byte char-class table for lex_loop / lex_word_scan. Indexed by the
## source byte `c`; value is the class code dispatched by lex_loop:
## 0 WORD (default; word_scan continues through this byte)
@@ -6581,7 +6495,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
:bss_init_tbl
&paste_scratch_ptr ZERO4 OFF_paste_scratch
&local_label_scratch_ptr ZERO4 OFF_local_label_scratch
-&scope_stack_ptr ZERO4 OFF_scope_stack
&df_name_scratch_ptr ZERO4 OFF_df_name_scratch
&ebc_str_scratch_ptr ZERO4 OFF_ebc_str_scratch
&arg_starts_ptr ZERO4 OFF_arg_starts
@@ -6620,11 +6533,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000
:msg_unbalanced_braces "unbalanced braces" '00'
:msg_bad_directive "bad %struct/%enum directive" '00'
:msg_unterminated_directive "unterminated %struct/%enum directive" '00'
-:msg_bad_scope_header "bad scope header" '00'
-:msg_scope_depth_overflow "scope depth overflow" '00'
-:msg_scope_underflow "scope underflow" '00'
-:msg_scope_not_closed "scope not closed" '00'
-:msg_bad_scope_label "bad scope label" '00'
+:msg_bad_escape "bad escape in %bytes" '00'
:msg_bad_frame_header "bad frame header" '00'
:msg_frame_already_active "frame already active" '00'
:msg_frame_underflow "frame underflow" '00'
@@ -6687,8 +6596,6 @@ ZERO8
ZERO8
:def_body_line_start
ZERO8
-:psc_stream_end
-ZERO8
:pf_stream_end
ZERO8
@@ -6848,23 +6755,6 @@ ZERO8 ZERO8 ZERO8
## append_text. Caps the combined tail + digit length at ~125 bytes,
## which is ample for any realistic local-label name.
-## --- Scope-stack rewrite -----------------------------------------------------
-## scope_depth: current depth (0..32).
-## scope_stack: 32 × TextSpan (16 bytes each) = 512 bytes. Each slot is
-## (text_ptr, text_len) pointing into stable text memory (input_buf or
-## text_buf — both append-only), so names are borrowed without copying.
-## sr_* slots hold emit_scope_rewrite's inputs across the byte-copy loops.
-:scope_depth
-ZERO8
-:sr_tok_ptr
-ZERO8
-:sr_skip
-ZERO8
-:sr_sigil
-ZERO8
-:sr_name_len
-ZERO8
-
## %struct / %enum scratch. define_fielded calls append_text twice
## per synthesized macro, so every piece of state that must survive a call
## lives here rather than in a register.
@@ -6939,9 +6829,9 @@ ZERO8
## Builtin scratch.
## emit_hex_value: ehv_value/bytes hold the args; ehv_scratch is a 24-byte
-## buffer (max 18 chars used: 2 quotes + 16 hex chars; rounded up to keep
-## the next slot 8-byte aligned); ehv_token is a synthesized 32-byte
-## Token { kind, text_ptr, text_len, tight }.
+## buffer (max 16 chars used: 16 hex chars for an 8-byte $-emit; rounded
+## up to keep the next slot 8-byte aligned); ehv_token is a synthesized
+## 32-byte Token { kind, text_ptr, text_len, tight }.
:ehv_value
ZERO8
:ehv_bytes
@@ -6990,6 +6880,21 @@ ZERO8
:ebc_str_token
ZERO8 ZERO8 ZERO8 ZERO8
+## %bytes builtin scratch. ebc_b_src_ptr/_len/_i walk the input string
+## across emit_hex_value calls (which clobber every caller-saved reg).
+## ebc_b_hex_hi spills the high nibble across the second hex_digit_table
+## lookup for the low nibble. Each source byte emits independently via
+## emit_hex_value(byte, 1); hex2pp's parse_byte_stream coalesces the
+## resulting space-separated runs back into a contiguous byte stream.
+:ebc_b_src_ptr
+ZERO8
+:ebc_b_src_len
+ZERO8
+:ebc_b_src_i
+ZERO8
+:ebc_b_hex_hi
+ZERO8
+
## arg_starts[16] / arg_ends[16]: 16 × 8 = 128 bytes each, i.e. 4 ZERO32.
## Written by parse_args; read by expand_macro_tokens and expand_builtin_call.
@@ -7022,8 +6927,6 @@ ZERO8 ZERO8 ZERO8 ZERO8
ZERO8
:local_label_scratch_ptr
ZERO8
-:scope_stack_ptr
-ZERO8
:df_name_scratch_ptr
ZERO8
:ebc_str_scratch_ptr
diff --git a/Makefile b/Makefile
@@ -11,8 +11,9 @@
# scripts/Containerfile.busybox.
#
# Common entrypoints:
-# make all (m1pp for ARCH)
+# make all (m1pp + hex2pp for ARCH)
# make m1pp build the m1pp expander for ARCH
+# make hex2pp build the hex2pp assembler/linker for ARCH
# make scheme1 build the scheme1 interpreter for ARCH
# make cc catm the cc compiler source for ARCH
# make tcc-flat flatten upstream tcc.c into one TU
@@ -28,15 +29,26 @@
# opt-in: not part of `make test`)
# make image build the per-arch container image
# make tools bootstrap M0/hex2-0/catm for ARCH
+# (seed-only: used to build m1pp + hex2pp)
# make tables regen pre-pruned P1/P1-<arch>.M1 tables
-# make tools-native build host-native M1/hex2/m1pp (opt-in)
+# make tools-native build host-native M1/hex2/m1pp/hex2pp (opt-in)
# make cloc line counts for the core sources
# make clean rm -rf build/
#
# Output layout: every binary lives at build/<arch>/<src-path-without-ext>,
# mirroring the source path under the repo root (e.g. M1pp/M1pp.P1 ->
-# build/<arch>/M1pp/M1pp; tests/cc/foo.c -> build/<arch>/tests/cc/foo).
+# build/<arch>/M1pp/M1pp; hex2pp/hex2pp.P1 -> build/<arch>/hex2pp/hex2pp;
+# tests/cc/foo.c -> build/<arch>/tests/cc/foo).
# Per-source intermediates land under build/<arch>/.work/<src-path>/.
+#
+# Bootstrap chain:
+# 1. seed (vendored hex0-seed) -> M0 + hex2-0 + catm (boot1.sh)
+# 2. seed M0 + hex2-0 -> M1pp ELF (boot-build-p1.sh)
+# 2. seed M0 + hex2-0 -> hex2pp ELF (boot-build-p1.sh)
+# 3. M1pp + hex2pp -> every other ELF (.P1pp pipeline) (boot-build-p1pp.sh)
+# The seed M0/hex2-0/catm participate ONLY in step 2 (building the two
+# new tools from their .P1 sources). Once both binaries exist, no
+# downstream user/test/scheme/cc target ever invokes them again.
ARCH ?= aarch64
@@ -71,10 +83,10 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \
# --- Targets --------------------------------------------------------------
-.PHONY: all m1pp scheme1 cc test image tools tables \
+.PHONY: all m1pp hex2pp scheme1 cc test image tools tables \
tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc
-all: m1pp
+all: m1pp hex2pp
help:
@sed -n '/^# Common entrypoints:/,/^$$/p' Makefile | sed 's/^# *//'
@@ -94,6 +106,8 @@ CLOC_FILES := \
$(foreach f,$(CLOC_SEED_BASES),vendor/seed/$(a)/$(f))) \
$(foreach a,$(CLOC_ARCHES),P1/P1-$(a).M1) \
M1pp/M1pp.P1 \
+ hex2pp/hex2pp.P1 \
+ $(foreach a,$(CLOC_ARCHES),P1/elf-$(a).hex2pp) \
$(foreach a,$(CLOC_ARCHES),P1/P1-$(a).M1pp) \
P1/P1.M1pp \
P1/P1pp.P1pp \
@@ -147,7 +161,7 @@ $(TOOLS_M0): build/%/tools/M0: scripts/boot1.sh build/%/.image \
# tables` after editing P1/gen/*.py or any of the prune-source files
# below, then commit the updated P1/*.M1.
-P1_PRUNE_SRCS := M1pp/M1pp.P1 $(wildcard tests/P1/*.P1)
+P1_PRUNE_SRCS := M1pp/M1pp.P1 hex2pp/hex2pp.P1 $(wildcard tests/P1/*.P1)
tables: $(foreach a,$(ALL_ARCHES),P1/P1-$(a).M1)
@@ -164,6 +178,7 @@ P1/P1-%.M1: build/%/P1/P1.M1 scripts/prune-p1-table.sh $(P1_PRUNE_SRCS)
# --- Programs (per arch) --------------------------------------------------
M1PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/M1pp/M1pp)
+HEX2PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hex2pp/hex2pp)
SCHEME1_SRC := scheme1/scheme1.P1pp
SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1/scheme1)
@@ -174,24 +189,35 @@ CC_SRCS := scheme1/prelude.scm cc/cc.scm cc/main.scm
CC_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/cc/cc.scm)
m1pp: $(OUT_DIR)/M1pp/M1pp
+hex2pp: $(OUT_DIR)/hex2pp/hex2pp
scheme1: $(OUT_DIR)/scheme1/scheme1
cc: $(OUT_DIR)/cc/cc.scm
-# Per-arch deps for .P1/.M1 builds (raw M1, no macro expansion).
+# Per-arch deps for the seed-built .P1 -> ELF chain. Used ONLY to build
+# the two new self-hosted tools (M1pp.P1, hex2pp.P1) from their pure-P1
+# sources via vendored M0 + hex2-0. After that, no other target reaches
+# back to the seed M0/hex2 path.
P1_BUILD_DEPS = scripts/lint.sh scripts/boot-build-p1.sh \
build/%/.image build/%/tools/M0 \
vendor/seed/%/ELF.hex2 P1/P1-%.M1
-# Per-arch deps for .P1pp builds (m1pp expansion + libp1pp).
+# Per-arch deps for .P1pp builds (M1pp expansion + libp1pp + hex2pp link).
+# The seed M0/hex2 tools are deliberately absent: this chain is
+# end-to-end M1pp + hex2pp.
P1PP_BUILD_DEPS = scripts/boot-build-p1pp.sh \
- build/%/.image build/%/tools/M0 build/%/M1pp/M1pp \
- vendor/seed/%/ELF.hex2 \
+ build/%/.image \
+ build/%/M1pp/M1pp build/%/hex2pp/hex2pp \
+ P1/elf-%.hex2pp \
P1/P1-%.M1pp P1/P1.M1pp P1/P1pp.P1pp
$(M1PP_BINS): build/%/M1pp/M1pp: M1pp/M1pp.P1 $(P1_BUILD_DEPS)
ARCH=$* sh scripts/lint.sh M1pp/M1pp.P1
$(call PODMAN,$*) sh scripts/boot-build-p1.sh M1pp/M1pp.P1 $@
+$(HEX2PP_BINS): build/%/hex2pp/hex2pp: hex2pp/hex2pp.P1 $(P1_BUILD_DEPS)
+ ARCH=$* sh scripts/lint.sh hex2pp/hex2pp.P1
+ $(call PODMAN,$*) sh scripts/boot-build-p1.sh hex2pp/hex2pp.P1 $@
+
$(SCHEME1_BINS): build/%/scheme1/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS)
$(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $@ $(SCHEME1_SRC)
@@ -273,7 +299,7 @@ $(TCC_BOOT2_P1PPS): build/%/tcc-boot2/tcc.flat.P1pp: \
# tcc-boot2 link: pure catm chain — entry stub, libc, client TU,
# elf terminator. boot-build-p1pp.sh concatenates them in order
-# ahead of the M1pp expander/M0/hex2 pipeline.
+# ahead of the M1pp expander + hex2pp pipeline.
$(TCC_BOOT2_BINS): build/%/tcc-boot2/tcc-boot2: \
build/%/tcc-boot2/tcc.flat.P1pp build/%/vendor/mes-libc/libc.P1pp \
P1/entry-libc.P1pp P1/elf-end.P1pp \
@@ -350,7 +376,8 @@ $(TCC_CC_MEM): tcc-cc/mem.c \
# --- Native tools (opt-in dev-loop helpers) -------------------------------
-NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 build/native-tools/m1pp
+NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 \
+ build/native-tools/m1pp build/native-tools/hex2pp
tools-native: $(NATIVE_TOOLS)
@@ -363,6 +390,9 @@ build/native-tools/hex2: scripts/build-native-tools.sh
build/native-tools/m1pp: scripts/build-native-tools.sh M1pp/M1pp.c
sh scripts/build-native-tools.sh m1pp
+build/native-tools/hex2pp: scripts/build-native-tools.sh hex2pp/hex2pp.c
+ sh scripts/build-native-tools.sh hex2pp
+
# --- Tests ----------------------------------------------------------------
#
# `make test` runs every suite. SUITE selects one; ARCH restricts to one
@@ -382,29 +412,39 @@ else
TEST_ARCHES := $(ARCH)
endif
-# m1pp suite per-arch deps: image, tools, table, expander.
+# m1pp suite per-arch deps: image, expander, hex2pp (the suite pipes
+# M1pp output through hex2pp as a smoke test in addition to the
+# text-diff against .expected).
TEST_M1PP_DEPS := $(foreach a,$(TEST_ARCHES), \
- build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 build/$(a)/M1pp/M1pp)
+ build/$(a)/.image build/$(a)/M1pp/M1pp build/$(a)/hex2pp/hex2pp \
+ P1/elf-$(a).hex2pp)
-# p1 suite per-arch deps: image, tools, table, expander.
+# p1 suite per-arch deps: image, table, expander, hex2pp, ELF header.
+# Raw .P1 fixtures still go through the seed M0+hex2 chain (boot-build-p1.sh)
+# because they share the legacy M1 backend; .P1pp fixtures go through
+# the new M1pp + hex2pp chain via boot-build-p1pp.sh.
TEST_P1_DEPS := $(foreach a,$(TEST_ARCHES), \
- build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 build/$(a)/M1pp/M1pp)
+ build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 \
+ build/$(a)/M1pp/M1pp build/$(a)/hex2pp/hex2pp P1/elf-$(a).hex2pp)
-# scheme1 suite per-arch deps: image, tools, expander, scheme1 binary.
+# scheme1 suite per-arch deps: image, expander, hex2pp, scheme1 binary.
# (run-tests.sh runs the pre-built binary against each .scm fixture; it
# does not rebuild the interpreter per fixture.)
TEST_SCHEME1_DEPS := $(foreach a,$(TEST_ARCHES), \
- build/$(a)/.image build/$(a)/tools/M0 build/$(a)/M1pp/M1pp \
+ build/$(a)/.image build/$(a)/M1pp/M1pp build/$(a)/hex2pp/hex2pp \
build/$(a)/scheme1/scheme1)
-# cc-* suites: scheme1 + m1pp cover everything. cc-util / cc-lex /
-# cc-pp byte-diff their pure transformations; cc-cg / cc compile the
-# emitted P1pp through the P1pp toolchain (which m1pp drives) and run
-# the resulting ELF. cc.scm is only needed by the cc suite (it invokes
-# the catm'd compiler against a .c fixture); the rest catm their own
-# per-suite layer list.
+# cc-* suites: scheme1 + M1pp + hex2pp cover everything. cc-util /
+# cc-lex / cc-pp byte-diff their pure transformations; cc-cg / cc
+# compile the emitted P1pp through the P1pp toolchain (M1pp + hex2pp)
+# and run the resulting ELF. cc.scm is only needed by the cc suite
+# (it invokes the catm'd compiler against a .c fixture); the rest
+# catm their own per-suite layer list. catm comes from build/$(a)/tools/
+# (built once during the seed bootstrap; only the cc-unit catm chain
+# uses it now — the P1pp pipeline no longer touches it).
TEST_CC_UNIT_DEPS := $(foreach a,$(TEST_ARCHES), \
- build/$(a)/.image build/$(a)/tools/M0 build/$(a)/M1pp/M1pp \
+ build/$(a)/.image build/$(a)/tools/M0 \
+ build/$(a)/M1pp/M1pp build/$(a)/hex2pp/hex2pp \
build/$(a)/scheme1/scheme1)
TEST_CC_DEPS := $(TEST_CC_UNIT_DEPS) \
diff --git a/hex2pp/hex2pp.P1 b/hex2pp/hex2pp.P1
@@ -0,0 +1,3727 @@
+## hex2pp.P1 -- P1 implementation of the hex2++ assembler/linker.
+##
+## Mirrors hex2pp/hex2pp.c exactly in observable behaviour. See the C
+## file and docs/HEX2pp.md for the full spec; brief summary:
+##
+## Inputs are concatenated, scanned in two passes. Pass 1 records label
+## definitions while advancing a position counter (ip). Pass 2 emits
+## bytes, resolving label references against the table built in pass 1.
+##
+## Active syntax:
+## digits in current byte mode -> raw bytes (HEX or BINARY)
+## :NAME -> label definition
+## SIGIL NAME [- OTHER] -> label reference (! @ $ ~ % &)
+## .align N [PATTERN] -> pad to N-byte boundary
+## .fill N B -> N copies of byte B
+## .scope / .endscope -> nestable local-label scope
+## # ... / ; ... -> line comment
+##
+## Multi-byte reference values are emitted little-endian by default.
+##
+## Invocation:
+## hex2pp (-f|--file) FILE [(-f|--file) FILE ...]
+## [-o|--output OUT]
+## [-B|--base-address ADDR]
+## [--big-endian | --little-endian]
+## [-b|--binary]
+## [--non-executable]
+## [-h|--help]
+##
+## P1 ABI: a0..a3 arg/return, t0..t2 caller-saved temps. Non-leaf
+## functions use enter_0 / eret. Entry is the portable p1_main
+## (a0=argc, a1=argv); the backend-owned :_start stub captures argc/argv
+## from the native entry state and sys_exits p1_main's return value.
+##
+## chmod note: the seed P1 (P1/P1-<arch>.M1) exposes only sys_openat /
+## sys_read / sys_write / sys_exit. Since there is no chmod() syscall in
+## the seed, we encode the desired final mode (0750 or 0640) directly in
+## openat's mode argument at file-creation time. This achieves the same
+## resulting file permissions as the C reference.
+##
+## Register usage discipline: the seed P1 mnemonic table only defines a
+## restricted subset of (dst,src1,src2) and (dst,base,offset) combos.
+## To stay within that table this file spills almost everything through
+## fixed BSS slots between operations. The naming convention
+## ``<func>_<name>`` keeps per-function spill slots from colliding.
+
+## --- Caps -------------------------------------------------------------------
+## Mirrors hex2pp.c constants (MAX_FILES=64, MAX_INPUT_BYTES=16 MiB,
+## MAX_OUTPUT_BYTES=128 MiB, MAX_LABELS=2^20, MAX_TEXT=8 MiB,
+## MAX_TOKEN=4096, MAX_SCOPE_DEPTH=32). Stored as 8-byte little-endian.
+DEFINE H2_INPUT_CAP 0000000001000000
+DEFINE H2_OUTPUT_CAP 0000000008000000
+DEFINE H2_TEXT_CAP 0000000000800000
+DEFINE H2_LABEL_CAP 0000000000100000
+DEFINE H2_TOKEN_CAP 0010000000000000
+DEFINE H2_FILES_CAP 4000000000000000
+DEFINE H2_SCOPE_CAP 2000000000000000
+
+## openat / mode constants (Linux generic)
+DEFINE O_RDONLY 0000000000000000
+DEFINE O_WRONLY_CREAT_TRUNC 4102000000000000
+DEFINE MODE_0750 E801000000000000
+DEFINE MODE_0640 A001000000000000
+DEFINE AT_FDCWD 9CFFFFFFFFFFFFFF
+
+DEFINE ZERO8 '0000000000000000'
+DEFINE ZERO4 '00000000'
+
+## --- BSS layout (offsets from ELF_end) -------------------------------------
+##
+## Each "_ptr" is a one-word slot in the executable's static data; p1_main's
+## bss_init_loop initializes each to ELF_end + OFF_*. The arenas live past
+## ELF_end (covered by the segment's memsz; the loader zero-initializes).
+##
+## Sizes (mirroring hex2pp.c caps):
+## input_paths 64 * 8 = 512 B (char * per file)
+## input_starts 64 * 8 = 512 B (offset into input_buf)
+## input_lens 64 * 8 = 512 B
+## scope_stack 32 * 8 = 256 B
+## line_scratch 64 B (decimal render of cur_line)
+## name_buf 4096 B
+## label_buf 4096 B
+## other_buf 4096 B
+## pat_buf 4096 B
+## ev_bytes 8 B
+## df_byte 8 B
+## input_buf 16 MiB
+## output_buf 128 MiB
+## text_buf 8 MiB
+## labels 32 MiB (2^20 * 32 B)
+##
+## Cumulative offsets, padded for clarity:
+DEFINE OFF_input_paths 0000000000000000
+DEFINE OFF_input_starts 0002000000000000
+DEFINE OFF_input_lens 0004000000000000
+DEFINE OFF_scope_stack 0006000000000000
+DEFINE OFF_line_scratch 0007000000000000
+DEFINE OFF_name_buf 4007000000000000
+DEFINE OFF_label_buf 4017000000000000
+DEFINE OFF_other_buf 4027000000000000
+DEFINE OFF_pat_buf 4037000000000000
+DEFINE OFF_ev_bytes 4047000000000000
+DEFINE OFF_df_byte 5047000000000000
+DEFINE OFF_input_buf 0048000000000000
+DEFINE OFF_output_buf 0048000001000000
+DEFINE OFF_text_buf 0048000009000000
+DEFINE OFF_labels 0048000009800000
+
+## --- Runtime shell: argv parse -> load files -> two passes -> write -> exit
+
+:p1_main
+ enter_0
+
+ # ---- Save argc / argv FIRST (subsequent setup clobbers a0/a1) ---------
+ # On entry a0 = argc, a1 = argv (per the backend :_start stub).
+ la_a2 &saved_argc
+ st_a0,a2,0
+ la_a2 &saved_argv
+ st_a1,a2,0
+
+ # ---- Init BSS pointer slots from ELF_end ------------------------------
+ la_t0 &ELF_end
+ la_t1 &bss_init_tbl
+ la_t2 &bss_init_tbl_end
+:bss_init_loop
+ la_br &bss_init_done
+ beq_t1,t2
+ ld_a2,t1,0
+ ld_a3,t1,8
+ add_a3,a3,t0
+ st_a3,a2,0
+ addi_t1,t1,16
+ la_br &bss_init_loop
+ b
+:bss_init_done
+
+ # ---- Default output_path = "a.out" -------------------------------------
+ la_a0 &const_a_out
+ la_a1 &output_path
+ st_a0,a1,0
+
+:arg_loop_init
+ li_t0 %1 %0
+ la_a0 &arg_idx
+ st_t0,a0,0
+
+:arg_loop
+ # if (i >= argc) goto arg_done
+ la_a0 &arg_idx
+ ld_t0,a0,0
+ la_a1 &saved_argc
+ ld_t1,a1,0
+ la_br &arg_done
+ beq_t0,t1
+ la_br &arg_done
+ blt_t1,t0
+
+ # arg_ptr = argv[i] = *(argv + 8*i)
+ la_a0 &saved_argv
+ ld_a0,a0,0
+ mov_a1,a0 # a1 = argv
+ la_a0 &arg_idx
+ ld_t0,a0,0
+ shli_t2,t0,3 # t2 = 8*i
+ add_a0,t2,a1 # a0 = argv + 8*i
+ ld_a0,a0,0 # a0 = argv[i]
+ la_a1 &arg_ptr
+ st_a0,a1,0
+
+ # Dispatch on the argument string. Each compare uses str_eq, which
+ # checks the trailing NUL of the argv string against the option
+ # constant's known length.
+
+ # -f / --file
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_dash_f
+ li_a2 %2 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_file
+ bnez_a0
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_long_file
+ li_a2 %6 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_file
+ bnez_a0
+
+ # -o / --output
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_dash_o
+ li_a2 %2 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_output
+ bnez_a0
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_long_output
+ li_a2 %8 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_output
+ bnez_a0
+
+ # -B / --base-address
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_dash_B
+ li_a2 %2 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_base
+ bnez_a0
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_long_base
+ li_a2 %14 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_base
+ bnez_a0
+
+ # --big-endian
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_long_big
+ li_a2 %12 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_big
+ bnez_a0
+
+ # --little-endian
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_long_little
+ li_a2 %15 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_little
+ bnez_a0
+
+ # -b / --binary
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_dash_b
+ li_a2 %2 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_binary
+ bnez_a0
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_long_binary
+ li_a2 %8 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_binary
+ bnez_a0
+
+ # --non-executable
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_long_nonexec
+ li_a2 %16 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_nonexec
+ bnez_a0
+
+ # -h / --help
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_dash_h
+ li_a2 %2 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_help
+ bnez_a0
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &opt_long_help
+ li_a2 %6 %0
+ la_br &str_eq
+ call
+ la_br &arg_is_help
+ bnez_a0
+
+ la_br &err_unknown_arg
+ b
+
+:arg_is_file
+ la_br &arg_advance
+ call
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_br &load_input
+ call
+ la_br &arg_loop
+ b
+:arg_is_output
+ la_br &arg_advance
+ call
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_a1 &output_path
+ st_a0,a1,0
+ la_br &arg_loop
+ b
+:arg_is_base
+ la_br &arg_advance
+ call
+ la_a0 &arg_ptr
+ ld_a0,a0,0
+ la_br &parse_long_arg
+ call
+ la_a1 &base_address
+ st_a0,a1,0
+ la_br &arg_loop
+ b
+:arg_is_big
+ li_t0 %1 %0
+ la_a0 &big_endian
+ st_t0,a0,0
+ la_br &arg_loop
+ b
+:arg_is_little
+ li_t0 %0 %0
+ la_a0 &big_endian
+ st_t0,a0,0
+ la_br &arg_loop
+ b
+:arg_is_binary
+ li_t0 %1 %0
+ la_a0 &byte_mode
+ st_t0,a0,0
+ la_br &arg_loop
+ b
+:arg_is_nonexec
+ li_t0 %1 %0
+ la_a0 &non_executable
+ st_t0,a0,0
+ la_br &arg_loop
+ b
+:arg_is_help
+ la_br &print_usage
+ call
+ li_a0 sys_exit
+ li_a1 %0 %0
+ syscall
+
+## arg_advance(): i++; if (i >= argc) usage error; arg_ptr = argv[i].
+:arg_advance
+ enter_0
+ la_a0 &arg_idx
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_a1 &saved_argc
+ ld_t1,a1,0
+ la_br &err_missing_arg_value
+ beq_t0,t1
+ la_br &err_missing_arg_value
+ blt_t1,t0
+ # arg_ptr = argv[i]
+ la_a0 &saved_argv
+ ld_a0,a0,0
+ mov_a1,a0
+ shli_t2,t0,3
+ add_a0,t2,a1
+ ld_a0,a0,0
+ la_a1 &arg_ptr
+ st_a0,a1,0
+ eret
+
+:arg_done
+ la_a0 &input_count
+ ld_t0,a0,0
+ la_br &err_no_inputs
+ beqz_t0
+
+ # ---- Pass 1: collect labels --------------------------------------------
+ li_t0 %1 %0
+ la_a0 &pass
+ st_t0,a0,0
+ la_br &reset_pass_state
+ call
+ la_br &run_one_pass
+ call
+ la_a0 &scope_depth
+ ld_t0,a0,0
+ la_br &err_scope_unclosed
+ bnez_t0
+
+ # Clear cur_path so any post-pass error reports without file:line.
+ li_t0 %0 %0
+ la_a0 &cur_path
+ st_t0,a0,0
+
+ # ---- Pass 2: emit ------------------------------------------------------
+ li_t0 %2 %0
+ la_a0 &pass
+ st_t0,a0,0
+ la_br &reset_pass_state
+ call
+ la_br &run_one_pass
+ call
+ la_a0 &scope_depth
+ ld_t0,a0,0
+ la_br &err_scope_unclosed
+ bnez_t0
+
+ li_t0 %0 %0
+ la_a0 &cur_path
+ st_t0,a0,0
+
+ la_br &write_output
+ call
+
+ li_a0 %0 %0
+ eret
+
+## reset_pass_state(): ip=0, output_used=0, scope_depth=0, scope_seq=0.
+:reset_pass_state
+ enter_0
+ li_t0 %0 %0
+ la_a0 &ip
+ st_t0,a0,0
+ la_a0 &output_used
+ st_t0,a0,0
+ la_a0 &scope_depth
+ st_t0,a0,0
+ la_a0 &scope_seq
+ st_t0,a0,0
+ eret
+
+## run_one_pass(): for i in [0, input_count) call process_file(i).
+:run_one_pass
+ enter_0
+ li_t0 %0 %0
+ la_a0 &pass_idx
+ st_t0,a0,0
+:run_one_pass_loop
+ la_a0 &pass_idx
+ ld_t0,a0,0
+ la_a1 &input_count
+ ld_t1,a1,0
+ la_br &run_one_pass_done
+ beq_t0,t1
+ mov_a0,t0
+ la_br &process_file
+ call
+ la_a0 &pass_idx
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &run_one_pass_loop
+ b
+:run_one_pass_done
+ eret
+
+## --- File loader ------------------------------------------------------------
+## load_input(a0=path): record path, read file appended into input_buf at
+## offset input_total, then advance input_total. Fatal on any I/O failure.
+:load_input
+ enter_0
+ la_a1 &input_count
+ ld_t0,a1,0
+ li_t1 H2_FILES_CAP
+ la_br &err_too_many_files
+ beq_t0,t1
+
+ # Save path into input_paths[input_count].
+ la_a1 &input_paths_ptr
+ ld_a1,a1,0
+ shli_t2,t0,3
+ add_a1,t2,a1
+ st_a0,a1,0
+
+ # input_starts[input_count] = input_total
+ la_a1 &input_total
+ ld_t1,a1,0
+ la_a2 &input_starts_ptr
+ ld_a2,a2,0
+ add_a2,t2,a2
+ la_a0 &aux_tmp
+ st_t1,a0,0
+ ld_a3,a0,0
+ st_a3,a2,0
+
+ # Stash path for the syscall.
+ la_a1 &li_path
+ st_a0,a1,0
+
+ # fd = openat(AT_FDCWD, path, O_RDONLY, 0)
+ li_a0 sys_openat
+ li_a1 AT_FDCWD
+ la_a2 &li_path
+ ld_a2,a2,0
+ li_a3 O_RDONLY
+ li_t0 %0 %0
+ syscall
+ la_br &err_open_input
+ bltz_a0
+ la_a1 &li_fd
+ st_a0,a1,0
+
+:li_read_loop
+ la_a0 &input_total
+ ld_t0,a0,0
+ li_t1 H2_INPUT_CAP
+ la_br &err_input_too_big
+ beq_t0,t1
+ la_br &err_input_too_big
+ blt_t1,t0
+
+ # n = read(fd, input_buf + input_total, INPUT_CAP - input_total)
+ la_a0 &li_fd
+ ld_a1,a0,0
+ la_a2 &input_buf_ptr
+ ld_a2,a2,0
+ add_a2,a2,t0
+ sub_a3,t1,t0 # available? sub_a3,t1,t0 — yes
+ li_a0 sys_read
+ syscall
+ la_br &li_eof
+ beqz_a0
+ la_br &err_read
+ bltz_a0
+
+ # input_total += n
+ la_a1 &input_total
+ ld_t0,a1,0
+ add_t0,t0,a0
+ st_t0,a1,0
+ la_br &li_read_loop
+ b
+
+:li_eof
+ # input_lens[input_count] = input_total - input_starts[input_count].
+ # The seed has add_a2,a2,t1 but neither add_a2,a2,t2 nor mov_t1,t2,
+ # so the stride-by-8 result lives in t2 and is then re-loaded as t1
+ # via the li_tmp scratch slot.
+ la_a0 &input_count
+ ld_t0,a0,0
+ shli_t2,t0,3
+ la_a3 &li_tmp
+ st_t2,a3,0
+ ld_t1,a3,0
+ la_a2 &input_starts_ptr
+ ld_a2,a2,0
+ add_a2,a2,t1 # a2 = &input_starts[i]
+ ld_a3,a2,0 # a3 = input_starts[i]
+ la_a0 &input_total
+ ld_a1,a0,0 # a1 = input_total
+ # We need (total - start) into a2, but no sub_a2,a1,a3 form exists.
+ # Spill start through li_tmp -> a0 so we can use sub_a2,a1,a0.
+ la_a0 &li_tmp
+ st_a3,a0,0
+ ld_a0,a0,0 # a0 = start (re-reads via li_tmp)
+ sub_a2,a1,a0 # a2 = total - start
+ # Store into input_lens[i] (reuse the same t1 scaling).
+ la_a0 &input_lens_ptr
+ ld_a0,a0,0
+ add_a0,a0,t1
+ st_a2,a0,0
+
+ la_a0 &input_count
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ eret
+
+## --- Per-file scanner -------------------------------------------------------
+
+## process_file(a0=file_idx): set cur_path / cur_line / scan_pos / scan_end
+## from the input record, then dispatch character by character.
+:process_file
+ enter_0
+ mov_t0,a0
+ shli_t2,t0,3 # t2 = 8*idx
+
+ # cur_path = input_paths[idx]
+ la_a3 &li_tmp
+ st_t2,a3,0
+ ld_t1,a3,0 # t1 = 8*idx
+ la_a1 &input_paths_ptr
+ ld_a1,a1,0
+ add_a1,a1,t1 # available add_a1,a1,t1
+ ld_a1,a1,0
+ la_a2 &cur_path
+ st_a1,a2,0
+
+ # cur_line = 1
+ li_t0 %1 %0
+ la_a2 &cur_line
+ st_t0,a2,0
+
+ # scan_pos = input_buf + input_starts[idx]
+ la_a1 &input_starts_ptr
+ ld_a1,a1,0
+ add_a1,a1,t1
+ ld_a1,a1,0 # a1 = start offset
+ la_a2 &input_buf_ptr
+ ld_a2,a2,0
+ mov_a0,a1
+ add_a2,a2,a0 # add_a2,a2,a1 NOT listed. Use a2,a2,a0 with a0=a1.
+ la_a3 &li_tmp
+ st_a1,a3,0
+ ld_a0,a3,0 # a0 = start
+ add_a2,a2,a0 # available
+ la_a0 &scan_pos
+ st_a2,a0,0
+
+ # scan_end = scan_pos + input_lens[idx]
+ la_a1 &input_lens_ptr
+ ld_a1,a1,0
+ add_a1,a1,t1
+ ld_a1,a1,0 # a1 = len
+ la_a3 &li_tmp
+ st_a1,a3,0
+ ld_a0,a3,0
+ add_a2,a2,a0
+ la_a0 &scan_end
+ st_a2,a0,0
+
+:scan_loop
+ la_br &skip_ws_and_comments
+ call
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &scan_done
+ beq_t0,t1
+ la_br &scan_done
+ blt_t1,t0
+ lb_a0,t0,0
+
+ # Dispatch on c.
+ li_t1 %58 %0 # ':'
+ la_br &scan_label_def
+ beq_a0,t1
+ li_t1 %46 %0 # '.'
+ la_br &scan_directive
+ beq_a0,t1
+ li_t1 %33 %0 # '!'
+ la_br &scan_ref
+ beq_a0,t1
+ li_t1 %64 %0 # '@'
+ la_br &scan_ref
+ beq_a0,t1
+ li_t1 %36 %0 # '$'
+ la_br &scan_ref
+ beq_a0,t1
+ li_t1 %126 %0 # '~'
+ la_br &scan_ref
+ beq_a0,t1
+ li_t1 %37 %0 # '%'
+ la_br &scan_ref
+ beq_a0,t1
+ li_t1 %38 %0 # '&'
+ la_br &scan_ref
+ beq_a0,t1
+ la_br &is_byte_digit
+ call
+ la_br &scan_byte_stream
+ bnez_a0
+ la_br &err_unexpected_char
+ b
+
+:scan_label_def
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_a0 &name_buf_ptr
+ ld_a0,a0,0
+ li_a1 H2_TOKEN_CAP
+ la_br &read_name
+ call
+ la_a1 &name_len
+ st_a0,a1,0
+ # dotted = (name[0] == '.')
+ la_a1 &name_buf_ptr
+ ld_a1,a1,0
+ la_a3 &aux_tmp
+ st_a1,a3,0
+ ld_a3,a3,0
+ lb_t0,a3,0
+ li_t1 %46 %0
+ la_br &scan_label_undotted
+ bne_t0,t1
+ # dotted: scope_depth must be > 0
+ la_a0 &scope_depth
+ ld_t0,a0,0
+ la_br &err_dotted_outside_scope
+ beqz_t0
+ addi_t0,t0,neg1
+ shli_t2,t0,3
+ la_a3 &sl_tmp
+ st_t2,a3,0
+ ld_t1,a3,0
+ la_a0 &scope_stack_ptr
+ ld_a0,a0,0
+ add_a0,a0,t1 # available
+ ld_a0,a0,0 # a0 = scope id of innermost scope
+ la_a1 &name_scope
+ st_a0,a1,0
+ la_br &scan_label_define
+ b
+:scan_label_undotted
+ li_t0 %0 %0
+ la_a1 &name_scope
+ st_t0,a1,0
+:scan_label_define
+ la_a0 &pass
+ ld_t0,a0,0
+ li_t1 %1 %0
+ la_br &scan_loop
+ bne_t0,t1
+ la_a0 &name_buf_ptr
+ ld_a0,a0,0
+ la_a1 &name_len
+ ld_a1,a1,0
+ la_a2 &name_scope
+ ld_a2,a2,0
+ la_br &define_label
+ call
+ la_br &scan_loop
+ b
+
+:scan_directive
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_a0 &name_buf_ptr
+ ld_a0,a0,0
+ li_a1 H2_TOKEN_CAP
+ la_br &read_directive_name
+ call
+ la_a1 &name_len
+ st_a0,a1,0
+
+ # Compare against the four known directive names.
+ li_t1 %5 %0
+ la_br &scan_dir_check_4
+ la_a3 &aux_tmp
+ st_t1,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ bne_a0,t0
+ la_a0 &name_buf_ptr
+ ld_a0,a0,0
+ la_a1 &dir_align
+ li_a2 %5 %0
+ la_br &mem_eq
+ call
+ la_br &scan_dir_align
+ bnez_a0
+ la_a0 &name_buf_ptr
+ ld_a0,a0,0
+ la_a1 &dir_scope
+ li_a2 %5 %0
+ la_br &mem_eq
+ call
+ la_br &scan_dir_scope_open
+ bnez_a0
+ la_br &err_unknown_directive
+ b
+:scan_dir_check_4
+ la_a0 &name_len
+ ld_t0,a0,0
+ li_t1 %4 %0
+ la_br &scan_dir_check_8
+ bne_t0,t1
+ la_a0 &name_buf_ptr
+ ld_a0,a0,0
+ la_a1 &dir_fill
+ li_a2 %4 %0
+ la_br &mem_eq
+ call
+ la_br &scan_dir_fill
+ bnez_a0
+ la_br &err_unknown_directive
+ b
+:scan_dir_check_8
+ la_a0 &name_len
+ ld_t0,a0,0
+ li_t1 %8 %0
+ la_br &err_unknown_directive
+ bne_t0,t1
+ la_a0 &name_buf_ptr
+ ld_a0,a0,0
+ la_a1 &dir_endscope
+ li_a2 %8 %0
+ la_br &mem_eq
+ call
+ la_br &scan_dir_scope_close
+ bnez_a0
+ la_br &err_unknown_directive
+ b
+
+:scan_dir_align
+ la_br &do_align
+ call
+ la_br &scan_loop
+ b
+:scan_dir_fill
+ la_br &do_fill
+ call
+ la_br &scan_loop
+ b
+:scan_dir_scope_open
+ la_br &do_scope_open
+ call
+ la_br &scan_loop
+ b
+:scan_dir_scope_close
+ la_br &do_scope_close
+ call
+ la_br &scan_loop
+ b
+
+:scan_ref
+ # a0 holds sigil; advance past it then process_reference.
+ la_a1 &cur_sigil
+ st_a0,a1,0
+ la_a1 &scan_pos
+ ld_t0,a1,0
+ addi_t0,t0,1
+ st_t0,a1,0
+ la_br &process_reference
+ call
+ la_br &scan_loop
+ b
+
+:scan_byte_stream
+ la_br &parse_byte_stream
+ call
+ la_br &scan_loop
+ b
+
+:scan_done
+ eret
+
+## --- Lex helpers ------------------------------------------------------------
+
+## skip_ws_and_comments(): advance scan_pos past whitespace and #/; comments.
+## Updates cur_line on '\n'.
+:skip_ws_and_comments
+ enter_0
+:swc_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &swc_done
+ beq_t0,t1
+ la_br &swc_done
+ blt_t1,t0
+ lb_a0,t0,0
+ la_br &is_space_any
+ call
+ la_br &swc_after_space_check
+ beqz_a0
+ # whitespace: advance; if '\n' bump cur_line.
+ la_a1 &scan_pos
+ ld_t0,a1,0
+ lb_a0,t0,0
+ li_t1 %10 %0
+ la_br &swc_advance
+ la_a3 &aux_tmp
+ st_t1,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ bne_a0,t0
+ la_a2 &cur_line
+ la_a0 &aux_tmp
+ st_a2,a0,0
+ ld_t2,a0,0
+ addi_t2,t2,1
+ st_t2,a2,0
+:swc_advance
+ la_a1 &scan_pos
+ ld_t0,a1,0
+ addi_t0,t0,1
+ st_t0,a1,0
+ la_br &swc_loop
+ b
+:swc_after_space_check
+ li_t1 %35 %0 # '#'
+ la_br &swc_consume_comment
+ beq_a0,t1
+ li_t1 %59 %0 # ';'
+ la_br &swc_consume_comment
+ beq_a0,t1
+ la_br &swc_done
+ b
+:swc_consume_comment
+:swc_cc_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &swc_loop
+ beq_t0,t1
+ la_br &swc_loop
+ blt_t1,t0
+ lb_a0,t0,0
+ li_t1 %10 %0
+ la_br &swc_loop
+ beq_a0,t1
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &swc_cc_loop
+ b
+:swc_done
+ eret
+
+## skip_inline_ws(): like skip_ws_and_comments but does NOT cross '\n'.
+:skip_inline_ws
+ enter_0
+:siw_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &siw_done
+ beq_t0,t1
+ la_br &siw_done
+ blt_t1,t0
+ lb_a0,t0,0
+ li_t1 %32 %0
+ la_br &siw_advance
+ beq_a0,t1
+ li_t1 %9 %0
+ la_br &siw_advance
+ beq_a0,t1
+ li_t1 %13 %0
+ la_br &siw_advance
+ beq_a0,t1
+ li_t1 %12 %0
+ la_br &siw_advance
+ beq_a0,t1
+ li_t1 %11 %0
+ la_br &siw_advance
+ beq_a0,t1
+ li_t1 %35 %0
+ la_br &siw_consume_comment
+ beq_a0,t1
+ li_t1 %59 %0
+ la_br &siw_consume_comment
+ beq_a0,t1
+ la_br &siw_done
+ b
+:siw_advance
+ la_a1 &scan_pos
+ ld_t0,a1,0
+ addi_t0,t0,1
+ st_t0,a1,0
+ la_br &siw_loop
+ b
+:siw_consume_comment
+:siw_cc_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &siw_done
+ beq_t0,t1
+ la_br &siw_done
+ blt_t1,t0
+ lb_a0,t0,0
+ li_t1 %10 %0
+ la_br &siw_done
+ beq_a0,t1
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &siw_cc_loop
+ b
+:siw_done
+ eret
+
+## is_space_any(a0=c) -> a0=0/1. Whitespace = ' ' \t \n \r \f \v.
+:is_space_any
+ li_t0 %32 %0
+ la_br &isa_yes
+ beq_a0,t0
+ li_t0 %9 %0
+ la_br &isa_yes
+ beq_a0,t0
+ li_t0 %10 %0
+ la_br &isa_yes
+ beq_a0,t0
+ li_t0 %13 %0
+ la_br &isa_yes
+ beq_a0,t0
+ li_t0 %12 %0
+ la_br &isa_yes
+ beq_a0,t0
+ li_t0 %11 %0
+ la_br &isa_yes
+ beq_a0,t0
+ li_a0 %0 %0
+ ret
+:isa_yes
+ li_a0 %1 %0
+ ret
+
+## is_name_terminator_c(a0=c) -> a0=0/1. Terminators: whitespace, '-', '#', ';'.
+## Spills c into a BSS slot since is_space_any clobbers a0.
+:is_name_terminator_c
+ la_a1 &nt_c
+ st_a0,a1,0
+ la_br &is_space_any
+ call
+ la_br &nt_yes
+ bnez_a0
+ la_a1 &nt_c
+ ld_a0,a1,0
+ li_t0 %45 %0
+ la_br &nt_yes
+ beq_a0,t0
+ li_t0 %35 %0
+ la_br &nt_yes
+ beq_a0,t0
+ li_t0 %59 %0
+ la_br &nt_yes
+ beq_a0,t0
+ li_a0 %0 %0
+ ret
+:nt_yes
+ li_a0 %1 %0
+ ret
+
+## is_byte_digit(a0=c) -> a0=0/1. Mode-aware (HEX vs BINARY).
+:is_byte_digit
+ la_a1 &byte_mode
+ ld_t0,a1,0
+ la_br &ibd_bin
+ bnez_t0
+ # HEX: 0-9, a-f, A-F
+ li_t0 %48 %0
+ la_br &ibd_no
+ mov_a2,t0
+ blt_a0,a2
+ li_t0 %57 %0
+ la_br &ibd_yes
+ mov_a2,t0
+ blt_a0,a2
+ la_br &ibd_yes
+ beq_a0,t0
+ li_t0 %65 %0
+ la_br &ibd_no
+ mov_a2,t0
+ blt_a0,a2
+ li_t0 %70 %0
+ la_br &ibd_yes
+ mov_a2,t0
+ blt_a0,a2
+ la_br &ibd_yes
+ beq_a0,t0
+ li_t0 %97 %0
+ la_br &ibd_no
+ mov_a2,t0
+ blt_a0,a2
+ li_t0 %102 %0
+ la_br &ibd_yes
+ mov_a2,t0
+ blt_a0,a2
+ la_br &ibd_yes
+ beq_a0,t0
+ la_br &ibd_no
+ b
+:ibd_bin
+ li_t0 %48 %0
+ la_br &ibd_yes
+ beq_a0,t0
+ li_t0 %49 %0
+ la_br &ibd_yes
+ beq_a0,t0
+ la_br &ibd_no
+ b
+:ibd_yes
+ li_a0 %1 %0
+ ret
+:ibd_no
+ li_a0 %0 %0
+ ret
+
+## byte_digit_value(a0=c) -> a0=value (0..15). Caller guarantees c is a
+## valid digit for the current byte mode.
+:byte_digit_value
+ li_t0 %57 %0
+ la_br &bdv_alpha
+ mov_t1,a0
+ blt_t0,t1 # if c > '9', go alpha
+ li_t1 %48 %0
+ sub_a0,a0,t1 # available
+ ret
+:bdv_alpha
+ li_t0 %96 %0
+ la_br &bdv_lower
+ mov_t1,a0
+ blt_t0,t1 # if c > 'a' - 1 (= 96), it's lowercase
+ li_t1 %55 %0
+ sub_a0,a0,t1 # 'A'(65) - 55 = 10
+ ret
+:bdv_lower
+ li_t1 %87 %0
+ sub_a0,a0,t1 # 'a'(97) - 87 = 10
+ ret
+
+## byte_digit_count() -> a0. 2 for HEX, 8 for BINARY.
+:byte_digit_count
+ la_a0 &byte_mode
+ ld_t0,a0,0
+ la_br &bdc_bin
+ bnez_t0
+ li_a0 %2 %0
+ ret
+:bdc_bin
+ li_a0 %8 %0
+ ret
+
+## read_name(a0=out_buf, a1=max) -> a0=length. Reads scan_pos into out_buf
+## until is_name_terminator_c or scan_end.
+:read_name
+ enter_0
+ la_a2 &rn_out
+ st_a0,a2,0
+ la_a2 &rn_max
+ st_a1,a2,0
+ li_t0 %0 %0
+ la_a2 &rn_n
+ st_t0,a2,0
+:rn_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &rn_done
+ beq_t0,t1
+ la_br &rn_done
+ blt_t1,t0
+ lb_a0,t0,0
+ la_br &is_name_terminator_c
+ call
+ la_br &rn_done
+ bnez_a0
+ # overflow check
+ la_a1 &rn_n
+ ld_t0,a1,0
+ la_a2 &rn_max
+ la_a0 &aux_tmp
+ st_a2,a0,0
+ ld_t1,a0,0
+ la_br &err_name_too_long
+ beq_t0,t1
+ # store char (re-read from scan_pos)
+ la_a3 &scan_pos
+ ld_a3,a3,0
+ la_a2 &aux_tmp
+ st_a3,a2,0
+ la_a0 &aux_tmp
+ st_a2,a0,0
+ ld_t1,a0,0
+ lb_t1,t1,0
+ la_a2 &rn_out
+ ld_a2,a2,0
+ add_a2,a2,t0
+ sb_t1,a2,0
+ # n++
+ addi_t0,t0,1
+ la_a1 &rn_n
+ st_t0,a1,0
+ # scan_pos++
+ la_a3 &scan_pos
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a3,0
+ la_br &rn_loop
+ b
+:rn_done
+ la_a1 &rn_n
+ ld_a0,a1,0
+ la_br &err_empty_name
+ beqz_a0
+ eret
+
+## read_directive_name(a0=out_buf, a1=max) -> a0=length. Like read_name but
+## terminates on the first non-alpha byte.
+:read_directive_name
+ enter_0
+ la_a2 &rn_out
+ st_a0,a2,0
+ la_a2 &rn_max
+ st_a1,a2,0
+ li_t0 %0 %0
+ la_a2 &rn_n
+ st_t0,a2,0
+:rdn_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &rdn_done
+ beq_t0,t1
+ la_br &rdn_done
+ blt_t1,t0
+ lb_a0,t0,0
+ # Reject if not [A-Za-z]
+ li_t1 %65 %0
+ la_br &rdn_check_lower
+ mov_a2,t1
+ blt_a0,a2
+ li_t1 %90 %0
+ la_br &rdn_consume
+ mov_a2,t1
+ blt_a0,a2
+ la_br &rdn_consume
+ beq_a0,t1
+:rdn_check_lower
+ li_t1 %97 %0
+ la_br &rdn_done
+ mov_a2,t1
+ blt_a0,a2
+ li_t1 %122 %0
+ la_br &rdn_consume
+ mov_a2,t1
+ blt_a0,a2
+ la_br &rdn_consume
+ beq_a0,t1
+ la_br &rdn_done
+ b
+:rdn_consume
+ la_a1 &rn_n
+ ld_t0,a1,0
+ la_a2 &rn_max
+ la_a0 &aux_tmp
+ st_a2,a0,0
+ ld_t1,a0,0
+ la_br &err_name_too_long
+ beq_t0,t1
+ la_a2 &rn_out
+ ld_a2,a2,0
+ add_a2,a2,t0
+ sb_a0,a2,0
+ addi_t0,t0,1
+ la_a1 &rn_n
+ st_t0,a1,0
+ la_a3 &scan_pos
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a3,0
+ la_br &rdn_loop
+ b
+:rdn_done
+ la_a1 &rn_n
+ ld_a0,a1,0
+ la_br &err_empty_directive
+ beqz_a0
+ eret
+
+## read_decimal() -> a0=value (i64). Fatal on no digits.
+:read_decimal
+ enter_0
+ li_t0 %0 %0
+ la_a0 &rd_val
+ st_t0,a0,0
+ la_a0 &rd_saw
+ st_t0,a0,0
+:rd_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &rd_done
+ beq_t0,t1
+ la_br &rd_done
+ blt_t1,t0
+ lb_a0,t0,0
+ li_t1 %48 %0
+ la_br &rd_done
+ mov_a2,t1
+ blt_a0,a2
+ li_t1 %57 %0
+ la_br &rd_done
+ mov_a1,a0
+ blt_t1,a1
+ # acc = acc * 10 + (c - '0')
+ la_a1 &rd_val
+ ld_t0,a1,0
+ li_t1 %10 %0
+ la_a3 &rd_tmp
+ st_t1,a3,0
+ la_a1 &aux_tmp
+ st_a3,a1,0
+ ld_a1,a1,0 # a1 = 10
+ mul_t0,t0,a1 # t0 = acc * 10
+ li_t1 %48 %0
+ sub_a0,a0,t1 # a0 = c - 48
+ add_t0,t0,a0
+ la_a1 &rd_val
+ st_t0,a1,0
+ li_t0 %1 %0
+ la_a1 &rd_saw
+ st_t0,a1,0
+ la_a3 &scan_pos
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a3,0
+ la_br &rd_loop
+ b
+:rd_done
+ la_a1 &rd_saw
+ ld_t0,a1,0
+ la_br &err_expected_decimal
+ beqz_t0
+ la_a1 &rd_val
+ ld_a0,a1,0
+ eret
+
+## --- Byte stream / single byte literal -------------------------------------
+
+## parse_byte_stream(): consume free-flowing digits (intermixed with
+## whitespace and #/; comments) and emit_byte them. Stops at first non-
+## digit non-whitespace non-comment byte.
+:parse_byte_stream
+ enter_0
+ li_t0 %0 %0
+ la_a0 &pbs_acc
+ st_t0,a0,0
+ la_a0 &pbs_have
+ st_t0,a0,0
+:pbs_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &pbs_done
+ beq_t0,t1
+ la_br &pbs_done
+ blt_t1,t0
+ lb_a0,t0,0
+ la_br &is_space_any
+ call
+ la_br &pbs_consume_ws
+ bnez_a0
+ la_a3 &scan_pos
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_a0,t0,0
+ li_t1 %35 %0
+ la_br &pbs_consume_comment
+ beq_a0,t1
+ li_t1 %59 %0
+ la_br &pbs_consume_comment
+ beq_a0,t1
+ la_br &is_byte_digit
+ call
+ la_br &pbs_done
+ beqz_a0
+ # Save and consume the digit char.
+ la_a3 &scan_pos
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_a0,t0,0
+ addi_t0,t0,1
+ st_t0,a3,0
+ la_a1 &pbs_c
+ st_a0,a1,0
+ # acc = (acc << shift_per_digit) | nibble
+ la_a1 &byte_mode
+ ld_t1,a1,0
+ la_br &pbs_bin_step
+ bnez_t1
+ # HEX
+ la_a1 &pbs_acc
+ ld_t0,a1,0
+ shli_a3,t0,4 # a3 = acc << 4
+ la_a1 &pbs_c
+ ld_a0,a1,0
+ la_br &byte_digit_value
+ call
+ mov_t0,a0
+ add_a3,a3,t0 # available
+ la_a1 &pbs_acc
+ st_a3,a1,0
+ la_br &pbs_bump
+ b
+:pbs_bin_step
+ # BINARY
+ la_a1 &pbs_acc
+ ld_t0,a1,0
+ shli_a3,t0,1 # a3 = acc << 1
+ la_a1 &pbs_c
+ ld_a0,a1,0
+ li_t1 %48 %0
+ sub_a0,a0,t1
+ mov_t0,a0
+ add_a3,a3,t0
+ la_a1 &pbs_acc
+ st_a3,a1,0
+:pbs_bump
+ la_a0 &pbs_have
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &byte_digit_count
+ call
+ la_a1 &pbs_have
+ ld_t0,a1,0
+ la_br &pbs_loop
+ la_a3 &aux_tmp
+ st_a0,a3,0
+ ld_t1,a3,0
+ bne_t0,t1
+ # have == digits_per_byte: emit and reset
+ la_a1 &pbs_acc
+ ld_t2,a1,0 # t2 = acc
+ andi_a3,t2,255
+ mov_a0,a3 # available mov_a0,a3
+ la_br &emit_byte
+ call
+ li_t0 %0 %0
+ la_a0 &pbs_acc
+ st_t0,a0,0
+ la_a0 &pbs_have
+ st_t0,a0,0
+ la_br &pbs_loop
+ b
+:pbs_consume_ws
+ la_a3 &scan_pos
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_a0,t0,0
+ li_t1 %10 %0
+ la_br &pbs_ws_advance
+ la_a3 &aux_tmp
+ st_t1,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ bne_a0,t0
+ la_a2 &cur_line
+ la_a0 &aux_tmp
+ st_a2,a0,0
+ ld_t2,a0,0
+ addi_t2,t2,1
+ st_t2,a2,0
+:pbs_ws_advance
+ la_a3 &scan_pos
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a3,0
+ la_br &pbs_loop
+ b
+:pbs_consume_comment
+:pbs_cc_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &pbs_loop
+ beq_t0,t1
+ la_br &pbs_loop
+ blt_t1,t0
+ lb_a0,t0,0
+ li_t1 %10 %0
+ la_br &pbs_loop
+ beq_a0,t1
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &pbs_cc_loop
+ b
+:pbs_done
+ la_a0 &pbs_have
+ ld_t0,a0,0
+ la_br &err_pbs_incomplete
+ bnez_t0
+ eret
+
+## parse_one_byte(a0=out_byte_addr): read a single byte literal (exactly
+## byte_digit_count contiguous digits, no internal whitespace). Fatal on
+## malformed input.
+:parse_one_byte
+ enter_0
+ la_a1 &p1b_out
+ st_a0,a1,0
+ li_t0 %0 %0
+ la_a1 &p1b_acc
+ st_t0,a1,0
+ la_a1 &p1b_have
+ st_t0,a1,0
+ la_a1 &p1b_done
+ st_t0,a1,0
+:p1b_loop
+ la_a0 &p1b_done
+ ld_t0,a0,0
+ la_br &p1b_finish
+ bnez_t0
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &p1b_finish
+ beq_t0,t1
+ la_br &p1b_finish
+ blt_t1,t0
+ lb_a0,t0,0
+ la_br &is_byte_digit
+ call
+ la_br &p1b_finish
+ beqz_a0
+ la_a3 &scan_pos
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_a0,t0,0
+ addi_t0,t0,1
+ st_t0,a3,0
+ la_a1 &p1b_c
+ st_a0,a1,0
+ la_a1 &byte_mode
+ ld_t1,a1,0
+ la_br &p1b_bin
+ bnez_t1
+ la_a1 &p1b_acc
+ ld_t0,a1,0
+ shli_a3,t0,4
+ la_a1 &p1b_c
+ ld_a0,a1,0
+ la_br &byte_digit_value
+ call
+ mov_t0,a0
+ add_a3,a3,t0
+ la_a1 &p1b_acc
+ st_a3,a1,0
+ la_br &p1b_bump
+ b
+:p1b_bin
+ la_a1 &p1b_c
+ ld_a0,a1,0
+ li_t1 %48 %0
+ sub_a0,a0,t1
+ la_a1 &p1b_acc
+ ld_t0,a1,0
+ shli_a3,t0,1
+ mov_t0,a0
+ add_a3,a3,t0
+ la_a1 &p1b_acc
+ st_a3,a1,0
+:p1b_bump
+ la_a0 &p1b_have
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &byte_digit_count
+ call
+ la_a1 &p1b_have
+ ld_t0,a1,0
+ la_br &p1b_loop
+ la_a3 &aux_tmp
+ st_a0,a3,0
+ ld_t1,a3,0
+ bne_t0,t1
+ # Got a full byte; record into *p1b_out and mark done.
+ la_a1 &p1b_acc
+ ld_t2,a1,0
+ andi_a3,t2,255
+ la_a0 &p1b_out
+ ld_a0,a0,0
+ sb_a3,a0,0
+ li_t0 %1 %0
+ la_a1 &p1b_done
+ st_t0,a1,0
+ la_br &p1b_loop
+ b
+:p1b_finish
+ la_a1 &p1b_done
+ ld_t0,a1,0
+ la_br &err_byte_lit_bad
+ beqz_t0
+ la_a1 &p1b_have
+ ld_t0,a1,0
+ la_br &err_byte_lit_bad
+ bnez_t0
+ eret
+
+## --- Label table -----------------------------------------------------------
+
+## intern(a0=src, a1=len) -> a0=offset into text_buf. Copies bytes plus a
+## NUL terminator. Fatal on overflow.
+:intern
+ enter_0
+ la_a2 &intern_src
+ st_a0,a2,0
+ la_a2 &intern_len
+ st_a1,a2,0
+ la_a2 &text_used
+ ld_a3,a2,0
+ la_a2 &intern_orig
+ st_a3,a2,0
+ # if (text_used + len + 1 > TEXT_CAP) fatal
+ add_a2,a1,a3 # a2 = a1 + a3 = len + text_used
+ addi_a2,a2,1
+ li_t0 H2_TEXT_CAP
+ la_br &err_text_overflow
+ mov_t1,a0
+ blt_t0,t1
+ # dst = text_buf + text_used. There's no add_a0,a0,a3 in the seed,
+ # so route the offset through t0.
+ la_a0 &text_buf_ptr
+ ld_a0,a0,0
+ la_a2 &intern_orig
+ ld_t0,a2,0
+ add_a0,a0,t0
+ la_a2 &intern_dst
+ st_a0,a2,0
+ # copy len bytes
+ li_t0 %0 %0
+ la_a1 &intern_i
+ st_t0,a1,0
+:intern_copy_loop
+ la_a0 &intern_i
+ ld_t0,a0,0
+ la_a1 &intern_len
+ ld_t1,a1,0
+ la_br &intern_copy_done
+ beq_t0,t1
+ la_a0 &intern_src
+ ld_a0,a0,0
+ add_a0,a0,t0
+ lb_a0,a0,0
+ la_a2 &intern_dst
+ ld_a2,a2,0
+ add_a2,a2,t0
+ sb_a0,a2,0
+ addi_t0,t0,1
+ la_a1 &intern_i
+ st_t0,a1,0
+ la_br &intern_copy_loop
+ b
+:intern_copy_done
+ # NUL terminator at dst[len]
+ la_a2 &intern_dst
+ ld_a2,a2,0
+ la_a1 &intern_len
+ ld_t0,a1,0
+ add_a2,a2,t0 # available add_a2,a2,t0
+ li_t1 %0 %0
+ sb_t1,a2,0
+ # text_used += len + 1
+ la_a2 &text_used
+ ld_a3,a2,0
+ la_a1 &intern_len
+ ld_a1,a1,0
+ add_a3,a3,a1
+ addi_a3,a3,1
+ st_a3,a2,0
+ # return original text_used
+ la_a0 &intern_orig
+ ld_a0,a0,0
+ eret
+
+## label_addr(a0=index) -> a0 = &labels[index]. Leaf. labels are 32 B.
+## Result is shipped through la_const_32 since neither mov_a0,a2 nor an
+## add_a0,a*,t* combo with the right operands exists in the seed table.
+:label_addr
+ mov_t0,a0
+ la_a3 &la_const_32
+ li_t1 %32 %0
+ st_t1,a3,0
+ la_a1 &aux_tmp
+ st_a3,a1,0
+ ld_a1,a1,0
+ mul_t0,t0,a1 # t0 = 32 * index
+ la_a2 &labels_ptr
+ ld_a2,a2,0
+ add_a2,a2,t0 # a2 = labels + 32*index
+ la_a3 &la_const_32
+ st_a2,a3,0 # spill result
+ ld_a0,a3,0 # reload into a0
+ ret
+
+## name_eq(a0=label_addr, a1=src, a2=len) -> a0=0/1. Compares the label's
+## interned name against (src, len). Leaf-ish (calls mem_eq).
+:name_eq
+ enter_0
+ la_a3 &ne_label
+ st_a0,a3,0
+ la_a3 &ne_src
+ st_a1,a3,0
+ la_a3 &ne_len
+ st_a2,a3,0
+ # if (label->name_len != len) return 0
+ ld_t0,a0,8
+ la_br &ne_no
+ la_a3 &aux_tmp
+ st_a2,a3,0
+ ld_t1,a3,0
+ bne_t0,t1
+ # bytes
+ ld_a3,a0,0 # name_off
+ la_a0 &text_buf_ptr
+ ld_a0,a0,0
+ la_a2 &ne_tmp
+ st_a3,a2,0
+ ld_t0,a2,0
+ add_a0,a0,t0
+ la_a1 &ne_src
+ ld_a1,a1,0
+ la_a2 &ne_len
+ ld_a2,a2,0
+ la_br &mem_eq
+ call
+ eret
+:ne_no
+ li_a0 %0 %0
+ eret
+
+## define_label(a0=src, a1=len, a2=scope_id): record at labels[label_count].
+## Fatal on duplicate (within same scope) or overflow.
+:define_label
+ enter_0
+ la_a3 &dl_src
+ st_a0,a3,0
+ la_a3 &dl_len
+ st_a1,a3,0
+ la_a3 &dl_scope
+ st_a2,a3,0
+
+ li_t0 %0 %0
+ la_a0 &dl_i
+ st_t0,a0,0
+:dl_dup_loop
+ la_a0 &dl_i
+ ld_t0,a0,0
+ la_a1 &label_count
+ ld_t1,a1,0
+ la_br &dl_dup_done
+ beq_t0,t1
+ mov_a0,t0
+ la_br &label_addr
+ call
+ # a0 = &labels[i]
+ la_a3 &dl_label
+ st_a0,a3,0
+ # scope match?
+ mov_a1,a0
+ mov_a2,a1
+ addi_a2,a2,24
+ ld_a0,a2,0
+ mov_t0,a0
+ la_a1 &dl_scope
+ ld_t1,a1,0
+ la_br &dl_dup_next
+ bne_t0,t1
+ # name match?
+ la_a0 &dl_label
+ ld_a0,a0,0
+ la_a1 &dl_src
+ ld_a1,a1,0
+ la_a2 &dl_len
+ ld_a2,a2,0
+ la_br &name_eq
+ call
+ la_br &dl_dup_next
+ beqz_a0
+ la_br &err_duplicate_label
+ b
+:dl_dup_next
+ la_a0 &dl_i
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &dl_dup_loop
+ b
+:dl_dup_done
+
+ la_a0 &label_count
+ ld_t0,a0,0
+ li_t1 H2_LABEL_CAP
+ la_br &err_too_many_labels
+ beq_t0,t1
+ la_br &err_too_many_labels
+ blt_t1,t0
+
+ # name_off = intern(src, len)
+ la_a0 &dl_src
+ ld_a0,a0,0
+ la_a1 &dl_len
+ ld_a1,a1,0
+ la_br &intern
+ call
+ la_a3 &dl_name_off
+ st_a0,a3,0
+
+ # &labels[label_count]
+ la_a0 &label_count
+ ld_a0,a0,0
+ la_br &label_addr
+ call
+ la_a3 &dl_label
+ st_a0,a3,0
+ # name_off
+ la_a1 &dl_name_off
+ ld_t0,a1,0
+ st_t0,a0,0
+ # name_len: st_t0,a0,8 is missing from the seed; the equivalent
+ # st_t0,a3,8 IS available, so move the base to a3 first via the
+ # dl_label scratch slot.
+ la_a3 &dl_label
+ ld_a3,a3,0
+ la_a1 &dl_len
+ ld_t0,a1,0
+ st_t0,a3,8
+ # target_ip
+ la_a1 &ip
+ ld_t0,a1,0
+ st_t0,a0,16
+ # scope_id
+ la_a1 &dl_scope
+ ld_t0,a1,0
+ st_t0,a0,24
+ # label_count++
+ la_a0 &label_count
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ eret
+
+## lookup_label(a0=src, a1=len) -> a0=target_ip. Fatal on undefined.
+:lookup_label
+ enter_0
+ la_a2 &ll_src
+ st_a0,a2,0
+ la_a2 &ll_len
+ st_a1,a2,0
+ la_a3 &aux_tmp
+ st_a0,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_t0,t0,0
+ li_t1 %46 %0
+ la_br &ll_undotted
+ bne_t0,t1
+ # Dotted: walk scope_stack innermost-out.
+ la_a0 &scope_depth
+ ld_t0,a0,0
+ addi_t0,t0,neg1
+ la_a1 &ll_d
+ st_t0,a1,0
+:ll_dot_outer
+ la_a0 &ll_d
+ ld_t0,a0,0
+ la_br &ll_undefined_local
+ bltz_t0
+ la_a1 &scope_stack_ptr
+ ld_a1,a1,0
+ shli_t2,t0,3
+ la_a3 &ll_tmp
+ st_t2,a3,0
+ ld_t1,a3,0
+ add_a1,a1,t1
+ ld_t1,a1,0 # t1 = sid
+ la_a0 &ll_sid
+ st_t1,a0,0
+ li_t0 %0 %0
+ la_a0 &ll_i
+ st_t0,a0,0
+:ll_dot_inner
+ la_a0 &ll_i
+ ld_t0,a0,0
+ la_a1 &label_count
+ ld_t1,a1,0
+ la_br &ll_dot_next_d
+ beq_t0,t1
+ mov_a0,t0
+ la_br &label_addr
+ call
+ la_a3 &ll_label
+ st_a0,a3,0
+ mov_a1,a0
+ mov_a2,a1
+ addi_a2,a2,24
+ ld_a0,a2,0
+ mov_t0,a0
+ la_a1 &ll_sid
+ ld_t1,a1,0
+ la_br &ll_dot_inner_next
+ bne_t0,t1
+ la_a0 &ll_label
+ ld_a0,a0,0
+ la_a1 &ll_src
+ ld_a1,a1,0
+ la_a2 &ll_len
+ ld_a2,a2,0
+ la_br &name_eq
+ call
+ la_br &ll_dot_inner_next
+ beqz_a0
+ la_a0 &ll_label
+ ld_a0,a0,0
+ ld_a0,a0,16 # target_ip
+ eret
+:ll_dot_inner_next
+ la_a0 &ll_i
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &ll_dot_inner
+ b
+:ll_dot_next_d
+ la_a0 &ll_d
+ ld_t0,a0,0
+ addi_t0,t0,neg1
+ st_t0,a0,0
+ la_br &ll_dot_outer
+ b
+
+:ll_undotted
+ li_t0 %0 %0
+ la_a0 &ll_i
+ st_t0,a0,0
+:ll_undotted_loop
+ la_a0 &ll_i
+ ld_t0,a0,0
+ la_a1 &label_count
+ ld_t1,a1,0
+ la_br &ll_undefined_global
+ beq_t0,t1
+ mov_a0,t0
+ la_br &label_addr
+ call
+ la_a3 &ll_label
+ st_a0,a3,0
+ mov_a1,a0
+ mov_a2,a1
+ addi_a2,a2,24
+ ld_a0,a2,0
+ mov_t0,a0
+ la_br &ll_undotted_next
+ bnez_t0
+ la_a0 &ll_label
+ ld_a0,a0,0
+ la_a1 &ll_src
+ ld_a1,a1,0
+ la_a2 &ll_len
+ ld_a2,a2,0
+ la_br &name_eq
+ call
+ la_br &ll_undotted_next
+ beqz_a0
+ la_a0 &ll_label
+ ld_a0,a0,0
+ ld_a0,a0,16
+ eret
+:ll_undotted_next
+ la_a0 &ll_i
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &ll_undotted_loop
+ b
+:ll_undefined_local
+ la_br &err_undefined_local
+ b
+:ll_undefined_global
+ la_br &err_undefined_label
+ b
+
+## --- Reference processor ----------------------------------------------------
+
+## process_reference(): cur_sigil already set by the dispatcher; scan_pos
+## already past the sigil byte. Reads label and (optional) -other,
+## advances ip on pass 1, and emits the resolved value on pass 2.
+:process_reference
+ enter_0
+ la_br &set_sigil_info
+ call
+ # Require non-terminator.
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &err_sigil_no_label
+ beq_t0,t1
+ la_br &err_sigil_no_label
+ blt_t1,t0
+ lb_a0,t0,0
+ la_br &is_name_terminator_c
+ call
+ la_br &err_sigil_no_label
+ bnez_a0
+ # llen = read_name(label_buf, MAX_TOKEN)
+ la_a0 &label_buf_ptr
+ ld_a0,a0,0
+ li_a1 H2_TOKEN_CAP
+ la_br &read_name
+ call
+ la_a1 &pr_llen
+ st_a0,a1,0
+ li_t0 %0 %0
+ la_a0 &pr_has_other
+ st_t0,a0,0
+ # Optional '-' OTHER.
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &pr_after_other
+ beq_t0,t1
+ la_br &pr_after_other
+ blt_t1,t0
+ lb_a0,t0,0
+ li_t1 %45 %0
+ la_br &pr_after_other
+ la_a3 &aux_tmp
+ st_t1,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ bne_a0,t0
+ addi_t0,t0,1
+ la_a1 &scan_pos
+ st_t0,a1,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &err_minus_no_label
+ beq_t0,t1
+ la_br &err_minus_no_label
+ blt_t1,t0
+ lb_a0,t0,0
+ la_br &is_name_terminator_c
+ call
+ la_br &err_minus_no_label
+ bnez_a0
+ la_a0 &other_buf_ptr
+ ld_a0,a0,0
+ li_a1 H2_TOKEN_CAP
+ la_br &read_name
+ call
+ la_a1 &pr_olen
+ st_a0,a1,0
+ li_t0 %1 %0
+ la_a0 &pr_has_other
+ st_t0,a0,0
+:pr_after_other
+ la_a0 &pass
+ ld_t0,a0,0
+ li_t1 %1 %0
+ la_br &pr_pass2
+ bne_t0,t1
+ # ip += pr_width. Route width through t2 since add_t1,t1,t* (t* in
+ # {t0,t2}) is the only fitting form in the seed.
+ la_a0 &ip
+ ld_t1,a0,0
+ la_a1 &pr_width
+ ld_t0,a1,0
+ la_a3 &pr_tmp
+ st_t0,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t2,a0,0
+ add_t1,t1,t2
+ st_t1,a0,0
+ eret
+:pr_pass2
+ la_a0 &label_buf_ptr
+ ld_a0,a0,0
+ la_a1 &pr_llen
+ ld_a1,a1,0
+ la_br &lookup_label
+ call
+ la_a1 &pr_t_label
+ st_a0,a1,0
+ la_a0 &pr_has_other
+ ld_t0,a0,0
+ la_br &pr_no_other
+ beqz_t0
+ la_a0 &other_buf_ptr
+ ld_a0,a0,0
+ la_a1 &pr_olen
+ ld_a1,a1,0
+ la_br &lookup_label
+ call
+ la_a1 &pr_t_other
+ st_a0,a1,0
+ # value = t_label - t_other
+ la_a0 &pr_t_label
+ ld_a1,a0,0
+ la_a0 &pr_t_other
+ ld_a0,a0,0
+ sub_a2,a1,a0 # available: a2 = a1 - a0
+ la_a1 &pr_value
+ st_a2,a1,0
+ la_br &pr_emit
+ b
+:pr_no_other
+ la_a0 &pr_is_rel
+ ld_t0,a0,0
+ la_br &pr_abs
+ beqz_t0
+ # rel: value = t_label - (ip + width)
+ la_a0 &ip
+ ld_a1,a0,0
+ la_a3 &pr_tmp
+ la_a0 &pr_width
+ ld_t0,a0,0
+ st_t0,a3,0
+ ld_a0,a3,0 # a0 = width
+ add_a1,a1,a0 # add_a1,a1,a0 — available
+ la_a3 &pr_tmp
+ st_a1,a3,0 # save (ip + width)
+ la_a0 &pr_t_label
+ ld_a1,a0,0 # a1 = t_label
+ ld_a0,a3,0 # a0 = ip+width
+ sub_a2,a1,a0
+ la_a1 &pr_value
+ st_a2,a1,0
+ la_br &pr_emit
+ b
+:pr_abs
+ # value = t_label + base_address
+ la_a0 &pr_t_label
+ ld_a1,a0,0
+ la_a0 &base_address
+ ld_a0,a0,0
+ add_a1,a1,a0
+ la_a3 &pr_value
+ st_a1,a3,0
+:pr_emit
+ la_a0 &pr_value
+ ld_a0,a0,0
+ la_a1 &pr_width
+ ld_a1,a1,0
+ la_a2 &pr_lo
+ ld_a2,a2,0
+ la_a3 &pr_hi
+ ld_a3,a3,0
+ la_a0 &pr_range_check
+ ld_t0,a0,0
+ # Reload value into a0 since we just clobbered it.
+ la_a0 &pr_value
+ ld_a0,a0,0
+ la_br &emit_value
+ call
+ eret
+
+## set_sigil_info(): reads cur_sigil; populates pr_width / pr_is_rel /
+## pr_lo / pr_hi / pr_range_check.
+##
+## Sigil table:
+## '!' (0x21): width=1, rel, lo=-128, hi=127, check
+## '@' (0x40): width=2, rel, lo=-32768, hi=32767, check
+## '$' (0x24): width=2, abs, lo=0, hi=65535, check
+## '~' (0x7E): width=3, rel, lo=-(1<<23), hi=(1<<23)-1, check
+## '%' (0x25): width=4, rel, no range check
+## '&' (0x26): width=4, abs, no range check
+:set_sigil_info
+ enter_0
+ la_a0 &cur_sigil
+ ld_a0,a0,0
+ li_t0 %33 %0
+ la_br &ssi_bang
+ beq_a0,t0
+ li_t0 %64 %0
+ la_br &ssi_at
+ beq_a0,t0
+ li_t0 %36 %0
+ la_br &ssi_dollar
+ beq_a0,t0
+ li_t0 %126 %0
+ la_br &ssi_tilde
+ beq_a0,t0
+ li_t0 %37 %0
+ la_br &ssi_pct
+ beq_a0,t0
+ li_t0 %38 %0
+ la_br &ssi_amp
+ beq_a0,t0
+ la_br &err_bad_sigil
+ b
+:ssi_bang
+ li_t0 %1 %0
+ la_a1 &pr_width
+ st_t0,a1,0
+ la_a1 &pr_is_rel
+ st_t0,a1,0
+ la_a1 &pr_range_check
+ st_t0,a1,0
+ # lo = -128 = 0 - 128
+ li_t0 %128 %0
+ la_a3 &ssi_tmp
+ st_t0,a3,0
+ ld_a3,a3,0 # a3 = 128
+ li_t0 %0 %0
+ sub_a3,t0,a3 # available: a3 = t0 - a3 = -128
+ la_a1 &pr_lo
+ st_a3,a1,0
+ li_t0 %127 %0
+ la_a1 &pr_hi
+ st_t0,a1,0
+ eret
+:ssi_at
+ li_t0 %2 %0
+ la_a1 &pr_width
+ st_t0,a1,0
+ li_t0 %1 %0
+ la_a1 &pr_is_rel
+ st_t0,a1,0
+ la_a1 &pr_range_check
+ st_t0,a1,0
+ # 32768 = 256 * 128. The seed has no immediate >= 256 in li_t* (it
+ # does, since li_t0 takes a 64-bit value, but we use the available
+ # %256 word literal). Stage values through ssi_tmp before each ld.
+ li_t0 %256 %0
+ la_a1 &ssi_tmp
+ st_t0,a1,0
+ ld_a3,a1,0 # a3 = 256
+ li_t0 %128 %0
+ st_t0,a1,0
+ ld_a2,a1,0 # a2 = 128
+ mul_a3,a3,a2 # a3 = 32768
+ la_a1 &ssi_tmp2
+ st_a3,a1,0
+ li_t0 %0 %0
+ sub_a3,t0,a3 # a3 = -32768
+ la_a1 &pr_lo
+ st_a3,a1,0
+ la_a1 &ssi_tmp2
+ ld_t1,a1,0
+ addi_t1,t1,neg1 # 32767
+ la_a1 &pr_hi
+ st_t1,a1,0
+ eret
+:ssi_dollar
+ li_t0 %2 %0
+ la_a1 &pr_width
+ st_t0,a1,0
+ li_t0 %0 %0
+ la_a1 &pr_is_rel
+ st_t0,a1,0
+ li_t0 %1 %0
+ la_a1 &pr_range_check
+ st_t0,a1,0
+ li_t0 %0 %0
+ la_a1 &pr_lo
+ st_t0,a1,0
+ # 65536 = 256 * 256
+ li_t0 %256 %0
+ la_a1 &ssi_tmp
+ st_t0,a1,0
+ ld_a3,a1,0
+ ld_a2,a1,0
+ mul_a3,a3,a2 # a3 = 65536
+ la_a1 &ssi_tmp2
+ st_a3,a1,0
+ ld_t1,a1,0
+ addi_t1,t1,neg1 # 65535
+ la_a1 &pr_hi
+ st_t1,a1,0
+ eret
+:ssi_tilde
+ li_t0 %3 %0
+ la_a1 &pr_width
+ st_t0,a1,0
+ li_t0 %1 %0
+ la_a1 &pr_is_rel
+ st_t0,a1,0
+ la_a1 &pr_range_check
+ st_t0,a1,0
+ # 8388608 = 256 * 256 * 128
+ li_t0 %256 %0
+ la_a1 &ssi_tmp
+ st_t0,a1,0
+ ld_a3,a1,0
+ ld_a2,a1,0
+ mul_a3,a3,a2 # 65536
+ li_t0 %128 %0
+ st_t0,a1,0
+ ld_a2,a1,0
+ mul_a3,a3,a2 # 8388608
+ la_a1 &ssi_tmp2
+ st_a3,a1,0
+ li_t0 %0 %0
+ sub_a3,t0,a3 # -8388608
+ la_a1 &pr_lo
+ st_a3,a1,0
+ la_a1 &ssi_tmp2
+ ld_t1,a1,0
+ addi_t1,t1,neg1 # 8388607
+ la_a1 &pr_hi
+ st_t1,a1,0
+ eret
+:ssi_pct
+ li_t0 %4 %0
+ la_a1 &pr_width
+ st_t0,a1,0
+ li_t0 %1 %0
+ la_a1 &pr_is_rel
+ st_t0,a1,0
+ li_t0 %0 %0
+ la_a1 &pr_range_check
+ st_t0,a1,0
+ la_a1 &pr_lo
+ st_t0,a1,0
+ la_a1 &pr_hi
+ st_t0,a1,0
+ eret
+:ssi_amp
+ li_t0 %4 %0
+ la_a1 &pr_width
+ st_t0,a1,0
+ li_t0 %0 %0
+ la_a1 &pr_is_rel
+ st_t0,a1,0
+ la_a1 &pr_range_check
+ st_t0,a1,0
+ la_a1 &pr_lo
+ st_t0,a1,0
+ la_a1 &pr_hi
+ st_t0,a1,0
+ eret
+
+## --- Directives -------------------------------------------------------------
+
+## do_align(): .align N [PATTERN]. N is a positive power of two; optional
+## byte-mode pattern. Pads with zeros if no pattern.
+:do_align
+ enter_0
+ la_br &skip_inline_ws
+ call
+ la_br &read_decimal
+ call
+ la_a1 &da_n
+ st_a0,a1,0
+ la_br &err_align_n
+ beqz_a0
+ la_br &err_align_n
+ bltz_a0
+ # Power-of-two check: N & (N-1) == 0
+ la_a0 &da_n
+ ld_a3,a0,0
+ la_a0 &da_n
+ ld_a2,a0,0
+ addi_a2,a2,neg1
+ and_a3,a3,a2
+ la_br &err_align_n
+ mov_a0,a3
+ bnez_a0
+
+ li_t0 %0 %0
+ la_a0 &da_has_pat
+ st_t0,a0,0
+ la_a0 &da_patlen
+ st_t0,a0,0
+
+ la_br &skip_inline_ws
+ call
+
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &da_compute
+ beq_t0,t1
+ la_br &da_compute
+ blt_t1,t0
+ lb_a0,t0,0
+ la_br &is_byte_digit
+ call
+ la_br &da_compute
+ beqz_a0
+ li_t0 %1 %0
+ la_a1 &da_has_pat
+ st_t0,a1,0
+:da_pat_loop
+ la_a0 &scan_pos
+ ld_t0,a0,0
+ la_a1 &scan_end
+ ld_t1,a1,0
+ la_br &da_compute
+ beq_t0,t1
+ la_br &da_compute
+ blt_t1,t0
+ lb_a0,t0,0
+ la_br &is_byte_digit
+ call
+ la_br &da_compute
+ beqz_a0
+ la_a0 &da_patlen
+ ld_t0,a0,0
+ li_t1 H2_TOKEN_CAP
+ la_br &err_pattern_too_large
+ beq_t0,t1
+ la_a0 &pat_buf_ptr
+ ld_a0,a0,0
+ add_a0,a0,t0
+ la_br &parse_one_byte
+ call
+ la_a0 &da_patlen
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &da_pat_loop
+ b
+:da_compute
+ # pad = (N - (ip mod N)) mod N
+ la_a0 &ip
+ ld_a0,a0,0
+ la_a1 &da_n
+ ld_a1,a1,0
+ rem_a2,a0,a1 # a2 = ip mod N
+ li_t0 %0 %0
+ la_a3 &da_pad
+ st_t0,a3,0
+ la_br &da_emit
+ beqz_a2
+ # pad = N - r
+ la_a3 &da_pad
+ st_a1,a3,0 # store N
+ ld_a3,a3,0 # a3 = N
+ sub_a3,a3,a2 # a3 = N - r
+ la_a1 &da_pad
+ st_a3,a1,0
+:da_emit
+ li_t0 %0 %0
+ la_a0 &da_i
+ st_t0,a0,0
+:da_emit_loop
+ la_a0 &da_i
+ ld_t0,a0,0
+ la_a1 &da_pad
+ ld_t1,a1,0
+ la_br &da_emit_done
+ beq_t0,t1
+ la_a0 &da_has_pat
+ ld_t1,a0,0
+ la_br &da_emit_zero
+ beqz_t1
+ # b = pat[i % patlen]
+ la_a0 &da_i
+ ld_a0,a0,0
+ la_a1 &da_patlen
+ ld_a1,a1,0
+ rem_a2,a0,a1
+ la_a0 &pat_buf_ptr
+ ld_a0,a0,0
+ add_a0,a0,a2
+ lb_a0,a0,0
+ la_br &emit_byte
+ call
+ la_br &da_emit_inc
+ b
+:da_emit_zero
+ li_a0 %0 %0
+ la_br &emit_byte
+ call
+:da_emit_inc
+ la_a0 &da_i
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &da_emit_loop
+ b
+:da_emit_done
+ eret
+
+## do_fill(): .fill N B. N >= 0 decimal; B is one byte literal.
+:do_fill
+ enter_0
+ la_br &skip_inline_ws
+ call
+ la_br &read_decimal
+ call
+ la_a1 &df_n
+ st_a0,a1,0
+ la_br &err_fill_n
+ bltz_a0
+ la_br &skip_inline_ws
+ call
+ la_a0 &df_byte_ptr
+ ld_a0,a0,0
+ la_br &parse_one_byte
+ call
+ li_t0 %0 %0
+ la_a0 &df_i
+ st_t0,a0,0
+:df_loop
+ la_a0 &df_i
+ ld_t0,a0,0
+ la_a1 &df_n
+ ld_t1,a1,0
+ la_br &df_done
+ beq_t0,t1
+ la_a0 &df_byte_ptr
+ ld_a0,a0,0
+ lb_a0,a0,0
+ la_br &emit_byte
+ call
+ la_a0 &df_i
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &df_loop
+ b
+:df_done
+ eret
+
+## do_scope_open(): scope_seq++; scope_stack[scope_depth++] = scope_seq.
+:do_scope_open
+ enter_0
+ la_a0 &scope_depth
+ ld_t0,a0,0
+ li_t1 H2_SCOPE_CAP
+ la_br &err_scope_overflow
+ beq_t0,t1
+ la_br &err_scope_overflow
+ blt_t1,t0
+ la_a0 &scope_seq
+ ld_t1,a0,0
+ addi_t1,t1,1
+ st_t1,a0,0
+ la_a1 &scope_stack_ptr
+ ld_a1,a1,0
+ shli_t2,t0,3
+ add_a1,t2,a1 # available
+ st_t1,a1,0
+ addi_t0,t0,1
+ la_a0 &scope_depth
+ st_t0,a0,0
+ eret
+
+## do_scope_close(): scope_depth--; fatal if not in scope.
+:do_scope_close
+ enter_0
+ la_a0 &scope_depth
+ ld_t0,a0,0
+ la_br &err_scope_underflow
+ beqz_t0
+ addi_t0,t0,neg1
+ st_t0,a0,0
+ eret
+
+## --- Emit -------------------------------------------------------------------
+
+## emit_byte(a0=byte): pass 1 only bumps ip; pass 2 also writes to output_buf.
+## Leaf.
+:emit_byte
+ la_a1 &pass
+ ld_t0,a1,0
+ li_t1 %2 %0
+ la_br &eb_pass1
+ bne_t0,t1
+ la_a1 &output_used
+ ld_t0,a1,0
+ li_t1 H2_OUTPUT_CAP
+ la_br &err_output_overflow
+ beq_t0,t1
+ la_br &err_output_overflow
+ blt_t1,t0
+ la_a2 &output_buf_ptr
+ ld_a2,a2,0
+ add_a2,a2,t0
+ sb_a0,a2,0
+ addi_t0,t0,1
+ st_t0,a1,0
+:eb_pass1
+ la_a0 &ip
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ ret
+
+## emit_value(a0=value, a1=width, a2=lo, a3=hi, t0=range_check). Range-checks
+## (if requested), packs little-endian into ev_bytes[0..width-1], then emits
+## (in reverse order if big_endian).
+:emit_value
+ enter_0
+ la_t1 &ev_value
+ mov_t2,t1
+ st_a0,t2,0
+ la_t1 &ev_width
+ st_a1,t1,0
+ la_t1 &ev_lo
+ st_a2,t1,0
+ la_t1 &ev_hi
+ la_a0 &aux_tmp
+ st_t1,a0,0
+ ld_a0,a0,0
+ st_a3,a0,0
+ la_t1 &ev_range_check
+ st_t0,t1,0
+
+ la_a0 &ev_range_check
+ ld_t0,a0,0
+ la_br &ev_no_range
+ beqz_t0
+ la_a0 &ev_value
+ ld_a0,a0,0
+ la_a1 &ev_lo
+ ld_a1,a1,0
+ la_br &err_ref_out_of_range
+ mov_a2,a1
+ blt_a0,a2
+ la_a0 &ev_value
+ ld_a0,a0,0
+ la_a1 &ev_hi
+ ld_a1,a1,0
+ la_br &err_ref_out_of_range
+ blt_a1,a0
+:ev_no_range
+ la_a0 &ev_value
+ ld_a0,a0,0
+ la_a1 &ev_pack_v
+ st_a0,a1,0
+ li_t0 %0 %0
+ la_a0 &ev_i
+ st_t0,a0,0
+:ev_pack_loop
+ la_a0 &ev_i
+ ld_t0,a0,0
+ la_a1 &ev_width
+ ld_t1,a1,0
+ la_br &ev_emit_dispatch
+ beq_t0,t1
+ la_a1 &ev_pack_v
+ ld_t2,a1,0
+ andi_a3,t2,255
+ la_a2 &ev_bytes_ptr
+ ld_a2,a2,0
+ add_a2,a2,t0
+ sb_a3,a2,0
+ la_a1 &ev_pack_v
+ ld_t2,a1,0
+ shri_t2,t2,8
+ st_t2,a1,0
+ addi_t0,t0,1
+ la_a0 &ev_i
+ st_t0,a0,0
+ la_br &ev_pack_loop
+ b
+:ev_emit_dispatch
+ la_a0 &big_endian
+ ld_t0,a0,0
+ la_br &ev_emit_be
+ bnez_t0
+ li_t0 %0 %0
+ la_a0 &ev_i
+ st_t0,a0,0
+:ev_emit_le_loop
+ la_a0 &ev_i
+ ld_t0,a0,0
+ la_a1 &ev_width
+ ld_t1,a1,0
+ la_br &ev_done
+ beq_t0,t1
+ la_a2 &ev_bytes_ptr
+ ld_a2,a2,0
+ add_a2,a2,t0
+ la_a3 &aux_tmp
+ st_a2,a3,0
+ ld_a0,a3,0
+ lb_a0,a0,0
+ la_br &emit_byte
+ call
+ la_a0 &ev_i
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &ev_emit_le_loop
+ b
+:ev_emit_be
+ la_a0 &ev_width
+ ld_t0,a0,0
+ addi_t0,t0,neg1
+ la_a1 &ev_i
+ st_t0,a1,0
+:ev_emit_be_loop
+ la_a0 &ev_i
+ ld_t0,a0,0
+ la_br &ev_done
+ bltz_t0
+ la_a2 &ev_bytes_ptr
+ ld_a2,a2,0
+ add_a2,a2,t0
+ la_a3 &aux_tmp
+ st_a2,a3,0
+ ld_a0,a3,0
+ lb_a0,a0,0
+ la_br &emit_byte
+ call
+ la_a0 &ev_i
+ ld_t0,a0,0
+ addi_t0,t0,neg1
+ st_t0,a0,0
+ la_br &ev_emit_be_loop
+ b
+:ev_done
+ eret
+
+## --- Misc helpers -----------------------------------------------------------
+
+## str_eq(a0=p, a1=q, a2=len) -> a0=0/1. Returns 1 iff p[0..len-1] == q[..]
+## AND p[len] == '\0'. Used for argv string compares.
+:str_eq
+ enter_0
+ la_t0 &se_p
+ st_a0,t0,0
+ la_t0 &se_q
+ st_a1,t0,0
+ la_t0 &se_len
+ st_a2,t0,0
+ li_t1 %0 %0
+:se_loop
+ la_a0 &se_len
+ ld_a1,a0,0
+ la_br &se_check_terminal
+ beq_t1,a1
+ la_a0 &se_p
+ ld_a0,a0,0
+ add_a0,a0,t1
+ lb_a0,a0,0
+ la_a2 &se_q
+ ld_a2,a2,0
+ add_a2,a2,t1
+ lb_a2,a2,0
+ la_br &se_no
+ la_a3 &aux_tmp
+ st_a2,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ bne_a0,t0
+ addi_t1,t1,1
+ la_br &se_loop
+ b
+:se_check_terminal
+ la_a0 &se_p
+ ld_a0,a0,0
+ add_a0,a0,t1
+ lb_a0,a0,0
+ la_br &se_no
+ bnez_a0
+ li_a0 %1 %0
+ eret
+:se_no
+ li_a0 %0 %0
+ eret
+
+## mem_eq(a0=p, a1=q, a2=len) -> a0=0/1. Plain byte compare, no NUL check.
+:mem_eq
+ enter_0
+ la_t0 &me_p
+ st_a0,t0,0
+ la_t0 &me_q
+ st_a1,t0,0
+ la_t0 &me_len
+ st_a2,t0,0
+ li_t1 %0 %0
+:me_loop
+ la_a0 &me_len
+ ld_a1,a0,0
+ la_br &me_yes
+ beq_t1,a1
+ la_a0 &me_p
+ ld_a0,a0,0
+ add_a0,a0,t1
+ lb_a0,a0,0
+ la_a2 &me_q
+ ld_a2,a2,0
+ add_a2,a2,t1
+ lb_a2,a2,0
+ la_br &me_no
+ la_a3 &aux_tmp
+ st_a2,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ bne_a0,t0
+ addi_t1,t1,1
+ la_br &me_loop
+ b
+:me_yes
+ li_a0 %1 %0
+ eret
+:me_no
+ li_a0 %0 %0
+ eret
+
+## parse_long_arg(a0=str): parse decimal or 0x-prefixed hex i64. Fatal on
+## malformed.
+:parse_long_arg
+ enter_0
+ la_t0 &pla_p
+ st_a0,t0,0
+ li_t0 %0 %0
+ la_a1 &pla_val
+ st_t0,a1,0
+ la_a1 &pla_neg
+ st_t0,a1,0
+ # detect 0x / 0X
+ la_a0 &pla_p
+ ld_a0,a0,0
+ la_a3 &aux_tmp
+ st_a0,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_t0,t0,0
+ li_t1 %48 %0
+ la_br &pla_dec_init
+ bne_t0,t1
+ la_a3 &aux_tmp
+ st_a0,a3,0
+ ld_a3,a3,0
+ lb_t0,a3,1
+ li_t1 %120 %0
+ la_br &pla_hex_init
+ beq_t0,t1
+ li_t1 %88 %0
+ la_br &pla_hex_init
+ beq_t0,t1
+ la_br &pla_dec_init
+ b
+:pla_hex_init
+ la_a0 &pla_p
+ ld_t0,a0,0
+ addi_t0,t0,2 # skip "0x" / "0X"
+ st_t0,a0,0
+:pla_hex_loop
+ la_a0 &pla_p
+ ld_a0,a0,0
+ la_a3 &aux_tmp
+ st_a0,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_t0,t0,0
+ la_br &pla_finish
+ beqz_t0
+ la_a3 &pla_tmp
+ st_t0,a3,0
+ ld_a0,a3,0 # a0 = c
+ la_br &byte_digit_value
+ call
+ la_a1 &pla_val
+ ld_t0,a1,0
+ shli_a3,t0,4 # a3 = val << 4
+ mov_t0,a0
+ add_a3,a3,t0
+ st_a3,a1,0
+ la_a0 &pla_p
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &pla_hex_loop
+ b
+:pla_dec_init
+ # Optional minus
+ la_a0 &pla_p
+ ld_a0,a0,0
+ la_a3 &aux_tmp
+ st_a0,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_t0,t0,0
+ li_t1 %45 %0
+ la_br &pla_dec_loop
+ bne_t0,t1
+ li_t0 %1 %0
+ la_a1 &pla_neg
+ st_t0,a1,0
+ la_a0 &pla_p
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+:pla_dec_loop
+ la_a0 &pla_p
+ ld_a0,a0,0
+ la_a3 &aux_tmp
+ st_a0,a3,0
+ la_a0 &aux_tmp
+ st_a3,a0,0
+ ld_t0,a0,0
+ lb_t0,t0,0
+ la_br &pla_finish
+ beqz_t0
+ li_t1 %48 %0
+ la_br &err_bad_long
+ blt_t0,t1
+ li_t1 %57 %0
+ la_br &err_bad_long
+ blt_t1,t0
+ la_a3 &pla_tmp
+ li_t1 %10 %0
+ st_t1,a3,0
+ la_a1 &aux_tmp
+ st_a3,a1,0
+ ld_a1,a1,0
+ la_a2 &pla_val
+ ld_t0,a2,0
+ mul_t0,t0,a1 # t0 = val * 10
+ la_a0 &pla_p
+ ld_a0,a0,0
+ lb_a0,a0,0
+ li_t1 %48 %0
+ sub_a0,a0,t1
+ add_t0,t0,a0
+ la_a2 &pla_val
+ st_t0,a2,0
+ la_a0 &pla_p
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_br &pla_dec_loop
+ b
+:pla_finish
+ la_a1 &pla_neg
+ ld_t0,a1,0
+ la_br &pla_done
+ beqz_t0
+ la_a2 &pla_val
+ ld_a3,a2,0
+ li_t0 %0 %0
+ sub_a3,t0,a3 # a3 = -val
+ st_a3,a2,0
+:pla_done
+ la_a1 &pla_val
+ ld_a0,a1,0
+ eret
+
+## --- Output writer ----------------------------------------------------------
+
+## write_output(): openat(output_path, O_WRONLY|O_CREAT|O_TRUNC, MODE).
+## MODE = 0750 unless --non-executable, then 0640. Then write loop.
+:write_output
+ enter_0
+ la_a0 &output_path
+ ld_a2,a0,0
+ li_a0 sys_openat
+ li_a1 AT_FDCWD
+ li_a3 O_WRONLY_CREAT_TRUNC
+ la_t1 &non_executable
+ ld_t1,t1,0
+ la_br &wo_mode_nonexec
+ bnez_t1
+ li_t0 MODE_0750
+ la_br &wo_after_mode
+ b
+:wo_mode_nonexec
+ li_t0 MODE_0640
+:wo_after_mode
+ syscall
+ la_br &err_open_output
+ bltz_a0
+ la_a1 &output_fd
+ st_a0,a1,0
+ li_t0 %0 %0
+ la_a1 &output_written
+ st_t0,a1,0
+:wo_loop
+ la_a0 &output_written
+ ld_t0,a0,0
+ la_a1 &output_used
+ ld_t1,a1,0
+ la_br &wo_done
+ beq_t0,t1
+ la_a0 &output_fd
+ ld_a1,a0,0
+ la_a2 &output_buf_ptr
+ ld_a2,a2,0
+ add_a2,a2,t0
+ sub_a3,t1,t0 # available
+ li_a0 sys_write
+ syscall
+ la_br &err_write
+ bltz_a0
+ la_br &err_write
+ beqz_a0
+ la_a1 &output_written
+ ld_t0,a1,0
+ add_t0,t0,a0
+ st_t0,a1,0
+ la_br &wo_loop
+ b
+:wo_done
+ eret
+
+## --- Errors -----------------------------------------------------------------
+
+## fatal_msg(a0=msg_ptr): write either "hex2pp: <msg>\n" or
+## "<path>:<line>: hex2pp: <msg>\n" to stderr, then exit(1).
+:fatal_msg
+ la_a1 &err_saved_msg
+ st_a0,a1,0
+ la_a0 &cur_path
+ ld_t0,a0,0
+ la_br &fm_no_path
+ beqz_t0
+ # write path
+ mov_a0,t0
+ la_br &strlen_cstr
+ call
+ la_a2 &err_saved_len
+ st_a0,a2,0
+ la_a2 &cur_path
+ ld_a2,a2,0
+ la_a3 &err_saved_len
+ ld_a3,a3,0
+ li_a0 sys_write
+ li_a1 %2 %0
+ syscall
+ # write ":"
+ li_a0 sys_write
+ li_a1 %2 %0
+ la_a2 &str_colon
+ li_a3 %1 %0
+ syscall
+ # write decimal(cur_line)
+ la_a0 &cur_line
+ ld_a0,a0,0
+ la_br &write_decimal_stderr
+ call
+ # write ": hex2pp: "
+ li_a0 sys_write
+ li_a1 %2 %0
+ la_a2 &str_colon_hex2pp
+ li_a3 %10 %0
+ syscall
+ la_br &fm_emit_msg
+ b
+:fm_no_path
+ li_a0 sys_write
+ li_a1 %2 %0
+ la_a2 &str_hex2pp
+ li_a3 %8 %0
+ syscall
+:fm_emit_msg
+ la_a0 &err_saved_msg
+ ld_a0,a0,0
+ la_br &strlen_cstr
+ call
+ la_a2 &err_saved_len
+ st_a0,a2,0
+ la_a2 &err_saved_msg
+ ld_a2,a2,0
+ la_a3 &err_saved_len
+ ld_a3,a3,0
+ li_a0 sys_write
+ li_a1 %2 %0
+ syscall
+ li_a0 sys_write
+ li_a1 %2 %0
+ la_a2 &str_newline
+ li_a3 %1 %0
+ syscall
+ li_a0 sys_exit
+ li_a1 %1 %0
+ syscall
+
+## strlen_cstr(a0=p) -> a0=length. Walks until NUL.
+:strlen_cstr
+ li_t0 %0 %0
+:sl_loop
+ add_t1,a0,t0 # available
+ lb_t1,t1,0
+ la_br &sl_done
+ beqz_t1
+ addi_t0,t0,1
+ la_br &sl_loop
+ b
+:sl_done
+ mov_a0,t0
+ ret
+
+## write_decimal_stderr(a0=value): write decimal of unsigned i64 to stderr.
+## Special-cases zero. Uses line_scratch (64 B) as a reverse-fill buffer.
+:write_decimal_stderr
+ enter_0
+ la_a1 &wd_v
+ st_a0,a1,0
+ la_br &wd_nonzero
+ bnez_a0
+ li_a0 sys_write
+ li_a1 %2 %0
+ la_a2 &str_zero
+ li_a3 %1 %0
+ syscall
+ eret
+:wd_nonzero
+ # Render reversed into line_scratch[...], starting near the end.
+ li_t0 %63 %0
+ la_a1 &wd_pos
+ st_t0,a1,0
+:wd_loop
+ la_a0 &wd_v
+ ld_a0,a0,0
+ la_br &wd_emit
+ beqz_a0
+ la_a3 &wd_tmp
+ li_t1 %10 %0
+ st_t1,a3,0
+ la_a1 &aux_tmp
+ st_a3,a1,0
+ ld_a1,a1,0
+ rem_a2,a0,a1 # a2 = v mod 10
+ div_a0,a0,a1 # a0 = v / 10
+ la_a3 &wd_v
+ st_a0,a3,0
+ li_t1 %48 %0
+ add_a3,t1,a2 # ascii = '0' + digit
+ la_a0 &wd_pos
+ ld_t0,a0,0
+ la_a1 &line_scratch_ptr
+ ld_a1,a1,0
+ add_a1,a1,t0
+ sb_a3,a1,0
+ addi_t0,t0,neg1
+ la_a0 &wd_pos
+ st_t0,a0,0
+ la_br &wd_loop
+ b
+:wd_emit
+ # write(2, &line_scratch[wd_pos+1], 64 - (wd_pos+1)) — but wd_pos+1
+ # is also our buffer offset, so length = 63 - wd_pos = 64 - (wd_pos+1).
+ # Route the buffer pointer (a1) through wd_tmp into a2 (no mov_a2,a1
+ # in the seed table).
+ la_a0 &wd_pos
+ ld_t0,a0,0
+ addi_t0,t0,1
+ la_a1 &line_scratch_ptr
+ ld_a1,a1,0
+ add_a1,a1,t0
+ la_a3 &wd_tmp
+ st_a1,a3,0
+ la_a2 &aux_tmp
+ st_a3,a2,0
+ ld_a2,a2,0
+ li_t1 %64 %0
+ sub_a3,t1,t0
+ li_a0 sys_write
+ li_a1 %2 %0
+ syscall
+ eret
+
+## print_usage(): write usage banner to stdout (fd=1).
+:print_usage
+ enter_0
+ la_a0 &msg_usage
+ la_br &strlen_cstr
+ call
+ la_a3 &pu_tmp
+ st_a0,a3,0
+ la_a2 &msg_usage
+ ld_a3,a3,0
+ li_a0 sys_write
+ li_a1 %1 %0
+ syscall
+ eret
+
+## --- Error stubs ------------------------------------------------------------
+:err_unknown_arg
+ la_a0 &msg_unknown_arg
+ la_br &fatal_msg
+ b
+:err_missing_arg_value
+ la_a0 &msg_missing_arg_value
+ la_br &fatal_msg
+ b
+:err_no_inputs
+ la_a0 &msg_no_inputs
+ la_br &fatal_msg
+ b
+:err_too_many_files
+ la_a0 &msg_too_many_files
+ la_br &fatal_msg
+ b
+:err_open_input
+ la_a0 &msg_open_input
+ la_br &fatal_msg
+ b
+:err_read
+ la_a0 &msg_read
+ la_br &fatal_msg
+ b
+:err_input_too_big
+ la_a0 &msg_input_too_big
+ la_br &fatal_msg
+ b
+:err_open_output
+ la_a0 &msg_open_output
+ la_br &fatal_msg
+ b
+:err_write
+ la_a0 &msg_write
+ la_br &fatal_msg
+ b
+:err_text_overflow
+ la_a0 &msg_text_overflow
+ la_br &fatal_msg
+ b
+:err_too_many_labels
+ la_a0 &msg_too_many_labels
+ la_br &fatal_msg
+ b
+:err_duplicate_label
+ la_a0 &msg_duplicate_label
+ la_br &fatal_msg
+ b
+:err_undefined_label
+ la_a0 &msg_undefined_label
+ la_br &fatal_msg
+ b
+:err_undefined_local
+ la_a0 &msg_undefined_local
+ la_br &fatal_msg
+ b
+:err_unexpected_char
+ la_a0 &msg_unexpected_char
+ la_br &fatal_msg
+ b
+:err_unknown_directive
+ la_a0 &msg_unknown_directive
+ la_br &fatal_msg
+ b
+:err_dotted_outside_scope
+ la_a0 &msg_dotted_outside_scope
+ la_br &fatal_msg
+ b
+:err_scope_overflow
+ la_a0 &msg_scope_overflow
+ la_br &fatal_msg
+ b
+:err_scope_underflow
+ la_a0 &msg_scope_underflow
+ la_br &fatal_msg
+ b
+:err_scope_unclosed
+ la_a0 &msg_scope_unclosed
+ la_br &fatal_msg
+ b
+:err_align_n
+ la_a0 &msg_align_n
+ la_br &fatal_msg
+ b
+:err_fill_n
+ la_a0 &msg_fill_n
+ la_br &fatal_msg
+ b
+:err_pattern_too_large
+ la_a0 &msg_pattern_too_large
+ la_br &fatal_msg
+ b
+:err_byte_lit_bad
+ la_a0 &msg_byte_lit_bad
+ la_br &fatal_msg
+ b
+:err_pbs_incomplete
+ la_a0 &msg_pbs_incomplete
+ la_br &fatal_msg
+ b
+:err_sigil_no_label
+ la_a0 &msg_sigil_no_label
+ la_br &fatal_msg
+ b
+:err_minus_no_label
+ la_a0 &msg_minus_no_label
+ la_br &fatal_msg
+ b
+:err_bad_sigil
+ la_a0 &msg_bad_sigil
+ la_br &fatal_msg
+ b
+:err_ref_out_of_range
+ la_a0 &msg_ref_out_of_range
+ la_br &fatal_msg
+ b
+:err_name_too_long
+ la_a0 &msg_name_too_long
+ la_br &fatal_msg
+ b
+:err_empty_name
+ la_a0 &msg_empty_name
+ la_br &fatal_msg
+ b
+:err_empty_directive
+ la_a0 &msg_empty_directive
+ la_br &fatal_msg
+ b
+:err_expected_decimal
+ la_a0 &msg_expected_decimal
+ la_br &fatal_msg
+ b
+:err_output_overflow
+ la_a0 &msg_output_overflow
+ la_br &fatal_msg
+ b
+:err_bad_long
+ la_a0 &msg_bad_long
+ la_br &fatal_msg
+ b
+
+## Sentinel: end of executable text.
+:_text_end
+
+## --- Rodata -----------------------------------------------------------------
+
+:const_a_out "a.out" '00'
+
+:opt_dash_f "-f" '00'
+:opt_long_file "--file" '00'
+:opt_dash_o "-o" '00'
+:opt_long_output "--output" '00'
+:opt_dash_B "-B" '00'
+:opt_long_base "--base-address" '00'
+:opt_long_big "--big-endian" '00'
+:opt_long_little "--little-endian" '00'
+:opt_dash_b "-b" '00'
+:opt_long_binary "--binary" '00'
+:opt_long_nonexec "--non-executable" '00'
+:opt_dash_h "-h" '00'
+:opt_long_help "--help" '00'
+
+:dir_align "align"
+:dir_fill "fill"
+:dir_scope "scope"
+:dir_endscope "endscope"
+
+:str_colon ":"
+:str_colon_hex2pp ": hex2pp: "
+:str_hex2pp "hex2pp: "
+:str_newline "
+"
+:str_zero "0"
+
+:msg_usage "usage: hex2pp (-f|--file) FILE [(-f|--file) FILE ...]
+ [-o|--output OUT]
+ [-B|--base-address ADDR]
+ [--big-endian | --little-endian]
+ [-b|--binary]
+ [--non-executable]
+" '00'
+:msg_unknown_arg "unknown argument" '00'
+:msg_missing_arg_value "missing value for option" '00'
+:msg_no_inputs "no input files" '00'
+:msg_too_many_files "too many input files" '00'
+:msg_open_input "failed to open input file" '00'
+:msg_read "failed to read input" '00'
+:msg_input_too_big "input too large" '00'
+:msg_open_output "failed to open output file" '00'
+:msg_write "failed to write output" '00'
+:msg_text_overflow "text pool overflow" '00'
+:msg_too_many_labels "too many labels" '00'
+:msg_duplicate_label "duplicate label" '00'
+:msg_undefined_label "undefined label" '00'
+:msg_undefined_local "undefined local label" '00'
+:msg_unexpected_char "unexpected character" '00'
+:msg_unknown_directive "unknown directive" '00'
+:msg_dotted_outside_scope "dot-prefixed label outside a .scope" '00'
+:msg_scope_overflow ".scope: depth overflow" '00'
+:msg_scope_underflow ".endscope: not in a scope" '00'
+:msg_scope_unclosed ".scope not closed at end of input" '00'
+:msg_align_n ".align: N must be a positive power of two" '00'
+:msg_fill_n ".fill: N must be non-negative" '00'
+:msg_pattern_too_large "pattern too large" '00'
+:msg_byte_lit_bad "byte literal: bad digit count" '00'
+:msg_pbs_incomplete "byte stream: incomplete digits at end of run" '00'
+:msg_sigil_no_label "sigil not followed by label name" '00'
+:msg_minus_no_label "'-' must be followed by label name" '00'
+:msg_bad_sigil "internal: bad sigil" '00'
+:msg_ref_out_of_range "reference out of range" '00'
+:msg_name_too_long "name too long" '00'
+:msg_empty_name "expected label name" '00'
+:msg_empty_directive "expected directive name after '.'" '00'
+:msg_expected_decimal "expected decimal integer" '00'
+:msg_output_overflow "output overflow" '00'
+:msg_bad_long "invalid integer argument" '00'
+
+## --- BSS pointer-init table ------------------------------------------------
+:bss_init_tbl
+&input_paths_ptr ZERO4 OFF_input_paths
+&input_starts_ptr ZERO4 OFF_input_starts
+&input_lens_ptr ZERO4 OFF_input_lens
+&scope_stack_ptr ZERO4 OFF_scope_stack
+&line_scratch_ptr ZERO4 OFF_line_scratch
+&name_buf_ptr ZERO4 OFF_name_buf
+&label_buf_ptr ZERO4 OFF_label_buf
+&other_buf_ptr ZERO4 OFF_other_buf
+&pat_buf_ptr ZERO4 OFF_pat_buf
+&ev_bytes_ptr ZERO4 OFF_ev_bytes
+&df_byte_ptr ZERO4 OFF_df_byte
+&input_buf_ptr ZERO4 OFF_input_buf
+&output_buf_ptr ZERO4 OFF_output_buf
+&text_buf_ptr ZERO4 OFF_text_buf
+&labels_ptr ZERO4 OFF_labels
+:bss_init_tbl_end
+
+## --- BSS scalars ------------------------------------------------------------
+
+:saved_argc
+ZERO8
+:saved_argv
+ZERO8
+:arg_idx
+ZERO8
+:arg_ptr
+ZERO8
+:input_count
+ZERO8
+:input_total
+ZERO8
+:li_path
+ZERO8
+:li_fd
+ZERO8
+:li_tmp
+ZERO8
+:output_path
+ZERO8
+:output_fd
+ZERO8
+:output_used
+ZERO8
+:output_written
+ZERO8
+:base_address
+ZERO8
+:byte_mode
+ZERO8
+:big_endian
+ZERO8
+:non_executable
+ZERO8
+
+:pass
+ZERO8
+:pass_idx
+ZERO8
+:ip
+ZERO8
+:cur_path
+ZERO8
+:cur_line
+ZERO8
+:scan_pos
+ZERO8
+:scan_end
+ZERO8
+:text_used
+ZERO8
+:label_count
+ZERO8
+:scope_depth
+ZERO8
+:scope_seq
+ZERO8
+
+## name read scratch
+:name_len
+ZERO8
+:name_scope
+ZERO8
+:nt_c
+ZERO8
+:rn_out
+ZERO8
+:rn_max
+ZERO8
+:rn_n
+ZERO8
+:sl_tmp
+ZERO8
+
+## decimal read
+:rd_val
+ZERO8
+:rd_saw
+ZERO8
+:rd_tmp
+ZERO8
+
+## byte stream
+:pbs_acc
+ZERO8
+:pbs_have
+ZERO8
+:pbs_c
+ZERO8
+
+## one byte literal
+:p1b_out
+ZERO8
+:p1b_acc
+ZERO8
+:p1b_have
+ZERO8
+:p1b_done
+ZERO8
+:p1b_c
+ZERO8
+
+## intern
+:intern_src
+ZERO8
+:intern_len
+ZERO8
+:intern_dst
+ZERO8
+:intern_orig
+ZERO8
+:intern_i
+ZERO8
+
+## label_addr scratch
+:la_const_32
+ZERO8
+
+## name_eq scratch
+:ne_label
+ZERO8
+:ne_src
+ZERO8
+:ne_len
+ZERO8
+:ne_tmp
+ZERO8
+
+## define_label scratch
+:dl_src
+ZERO8
+:dl_len
+ZERO8
+:dl_scope
+ZERO8
+:dl_i
+ZERO8
+:dl_label
+ZERO8
+:dl_name_off
+ZERO8
+
+## lookup_label scratch
+:ll_src
+ZERO8
+:ll_len
+ZERO8
+:ll_d
+ZERO8
+:ll_sid
+ZERO8
+:ll_i
+ZERO8
+:ll_label
+ZERO8
+:ll_tmp
+ZERO8
+
+## process_reference / set_sigil_info scratch
+:cur_sigil
+ZERO8
+:pr_width
+ZERO8
+:pr_is_rel
+ZERO8
+:pr_lo
+ZERO8
+:pr_hi
+ZERO8
+:pr_range_check
+ZERO8
+:pr_llen
+ZERO8
+:pr_olen
+ZERO8
+:pr_has_other
+ZERO8
+:pr_t_label
+ZERO8
+:pr_t_other
+ZERO8
+:pr_value
+ZERO8
+:pr_tmp
+ZERO8
+:ssi_tmp
+ZERO8
+:ssi_tmp2
+ZERO8
+
+## emit_value scratch
+:ev_value
+ZERO8
+:ev_width
+ZERO8
+:ev_lo
+ZERO8
+:ev_hi
+ZERO8
+:ev_range_check
+ZERO8
+:ev_pack_v
+ZERO8
+:ev_i
+ZERO8
+
+## directive scratch
+:da_n
+ZERO8
+:da_has_pat
+ZERO8
+:da_patlen
+ZERO8
+:da_pad
+ZERO8
+:da_i
+ZERO8
+:df_n
+ZERO8
+:df_i
+ZERO8
+
+## str/mem helpers
+:se_p
+ZERO8
+:se_q
+ZERO8
+:se_len
+ZERO8
+:me_p
+ZERO8
+:me_q
+ZERO8
+:me_len
+ZERO8
+
+## parse_long_arg
+:pla_p
+ZERO8
+:pla_val
+ZERO8
+:pla_neg
+ZERO8
+:pla_tmp
+ZERO8
+
+## error/fatal
+:err_saved_msg
+ZERO8
+:err_saved_len
+ZERO8
+
+## write_decimal
+:wd_v
+ZERO8
+:wd_pos
+ZERO8
+:wd_tmp
+ZERO8
+
+## print_usage
+:pu_tmp
+ZERO8
+
+## Generic auxiliary scratch used by sequences that route a value through
+## BSS to satisfy the seed P1 mnemonic table.
+:aux_tmp
+ZERO8
+
+## --- BSS pointer slots ------------------------------------------------------
+:input_paths_ptr
+ZERO8
+:input_starts_ptr
+ZERO8
+:input_lens_ptr
+ZERO8
+:scope_stack_ptr
+ZERO8
+:line_scratch_ptr
+ZERO8
+:name_buf_ptr
+ZERO8
+:label_buf_ptr
+ZERO8
+:other_buf_ptr
+ZERO8
+:pat_buf_ptr
+ZERO8
+:ev_bytes_ptr
+ZERO8
+:df_byte_ptr
+ZERO8
+:input_buf_ptr
+ZERO8
+:output_buf_ptr
+ZERO8
+:text_buf_ptr
+ZERO8
+:labels_ptr
+ZERO8
+
+:ELF_end
diff --git a/scripts/boot-build-p1pp.sh b/scripts/boot-build-p1pp.sh
@@ -1,23 +1,29 @@
#!/bin/sh
-## boot-build-p1pp.sh — in-container .P1pp -> ELF.
+## boot-build-p1pp.sh — in-container .P1pp -> ELF via the new chain.
##
## Pure transformation. Caller (the Makefile) ensures every fixed-path
-## input below already exists, including the per-arch self-hosted m1pp
-## ELF binary (build/$ARCH/M1pp/M1pp, built by boot2.sh / boot-build-p1.sh).
+## input below already exists, including the per-arch self-hosted M1pp
+## ELF binary (build/$ARCH/M1pp/M1pp) and hex2pp ELF binary
+## (build/$ARCH/hex2pp/hex2pp). Both of those are built once via the
+## seed M0+hex2 chain (boot-build-p1.sh); after that point the seed
+## tools no longer participate in any user/test pipeline.
##
-## Pipeline:
+## Pipeline (new chain — no M0/hex2/catm anywhere):
## cat <P1-$ARCH.M1pp> <P1.M1pp> <P1pp.P1pp> <srcs...> -> /tmp/combined.M1pp
-## m1pp /tmp/combined.M1pp -> /tmp/expanded.M1
-## M0 /tmp/expanded.M1 -> /tmp/prog.hex2
-## catm /tmp/elf.hex2 /tmp/prog.hex2 -> /tmp/linked.hex2
-## hex2-0 /tmp/linked.hex2 -> $OUT
+## M1pp /tmp/combined.M1pp -> /tmp/expanded.hex2pp
+## hex2pp -f $ELF_HDR -f /tmp/expanded.hex2pp -o $OUT
+##
+## $ELF_HDR is P1/elf-$ARCH.hex2pp — a hex2pp-syntax ELF header that
+## supplies :ELF_base / :_start / :ELF_end framing, replacing the old
+## vendor/seed/$ARCH/ELF.hex2 (which uses hex2 `>` arithmetic and
+## trailing-zero placeholders incompatible with hex2pp).
##
## libp1pp (P1/P1pp.P1pp) is concatenated unconditionally so portable
## sources can use %fn, the control-flow macros, and libp1pp routines
## (sys_*, print*, parse_*, fmt_*, memcpy/memcmp, bump allocator, panic,
-## %assert_*) without per-program plumbing. M0 has no link-time DCE, so
-## programs that don't reference any libp1pp routine still pay a fixed
-## code-size tax (~a few KB).
+## %assert_*) without per-program plumbing. hex2pp has no link-time DCE,
+## so programs that don't reference any libp1pp routine still pay a
+## fixed code-size tax (~a few KB).
##
## Multiple <srcs> are concatenated in the order given. This is how
## libc-using executables compose: a typical chain is
@@ -45,10 +51,10 @@
set -eu
-# Per-stage tracing is always on. The stage0 tools (M0, hex2-0) print
-# nothing on success and almost nothing on failure, so we narrate which
-# step is running, snapshot intermediates to $WORK before exiting, and
-# print a clear FAIL banner on error so the user knows where it died.
+# Per-stage tracing is always on. M1pp / hex2pp print little on success
+# and bail fast on error, so we narrate which step is running, snapshot
+# intermediates to $WORK before exiting, and print a clear FAIL banner
+# on error so the user knows where it died.
ARCH_LBL=${ARCH:-?}
CURRENT_STEP=
trap 'rc=$?
@@ -79,9 +85,9 @@ shift
BACKEND=P1/P1-$ARCH.M1pp
FRONTEND=P1/P1.M1pp
LIBP1PP=P1/P1pp.P1pp
-ELF_HDR=vendor/seed/$ARCH/ELF.hex2
-TOOLS=build/$ARCH/tools
+ELF_HDR=P1/elf-$ARCH.hex2pp
M1PP_BIN=build/$ARCH/M1pp/M1pp
+HEX2PP_BIN=build/$ARCH/hex2pp/hex2pp
if [ -n "${WORK_SUBPATH:-}" ]; then
NAME=$WORK_SUBPATH
else
@@ -99,24 +105,13 @@ cat "$BACKEND" "$FRONTEND" "$LIBP1PP" "$@" > /tmp/combined.M1pp
cp /tmp/combined.M1pp "$WORK/combined.M1pp"
trace "combined.M1pp" /tmp/combined.M1pp
-step "m1pp: combined.M1pp -> expanded.M1"
-"$M1PP_BIN" /tmp/combined.M1pp /tmp/expanded.M1
-cp /tmp/expanded.M1 "$WORK/expanded.M1"
-trace "expanded.M1" /tmp/expanded.M1
-
-step "M0: expanded.M1 -> prog.hex2"
-"$TOOLS/M0" /tmp/expanded.M1 /tmp/prog.hex2
-cp /tmp/prog.hex2 "$WORK/prog.hex2"
-trace "prog.hex2" /tmp/prog.hex2
-
-step "catm: ELF header + prog.hex2 -> linked.hex2"
-cp "$ELF_HDR" /tmp/elf.hex2
-"$TOOLS/catm" /tmp/linked.hex2 /tmp/elf.hex2 /tmp/prog.hex2
-cp /tmp/linked.hex2 "$WORK/linked.hex2"
-trace "linked.hex2" /tmp/linked.hex2
+step "M1pp: combined.M1pp -> expanded.hex2pp"
+"$M1PP_BIN" /tmp/combined.M1pp /tmp/expanded.hex2pp
+cp /tmp/expanded.hex2pp "$WORK/expanded.hex2pp"
+trace "expanded.hex2pp" /tmp/expanded.hex2pp
-step "hex2-0: linked.hex2 -> $OUT"
-"$TOOLS/hex2-0" /tmp/linked.hex2 /tmp/prog.bin
+step "hex2pp: ELF header + expanded.hex2pp -> $OUT"
+"$HEX2PP_BIN" -f "$ELF_HDR" -f /tmp/expanded.hex2pp -o /tmp/prog.bin
cp /tmp/prog.bin "$OUT"
chmod 0700 "$OUT"
trace "$OUT" "$OUT"
diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh
@@ -78,7 +78,19 @@ fail() {
}
## --- m1pp suite ---------------------------------------------------------
-
+##
+## Two-step check:
+## 1. Run M1pp against tests/M1pp/<name>.M1pp; diff its text output
+## against tests/M1pp/<name>.expected (parity with the C oracle).
+## 2. Pipe that output through hex2pp as an assemble smoke test. The
+## new M1pp emits bare hex consumable directly by hex2pp; this
+## catches cases where M1pp produces parity-correct text that
+## hex2pp can't actually parse (e.g. stray whitespace bugs,
+## malformed sigil expressions).
+##
+## Both steps must pass for the fixture to PASS. The smoke-test step
+## uses hex2pp's --non-executable mode and writes to a throwaway path
+## — we only care about hex2pp's exit status, not the bytes.
run_m1pp_suite() {
if [ -z "$NAMES" ]; then
NAMES=$(discover tests/M1pp M1pp)
@@ -98,7 +110,7 @@ run_m1pp_suite() {
expected_content=$(cat "$expected")
label="[$ARCH] $name"
- outfile=build/$ARCH/tests/M1pp/$name.M1
+ outfile=build/$ARCH/tests/M1pp/$name.hex2pp
mkdir -p "$(dirname "$outfile")"
rm -f "$outfile"
"./build/$ARCH/M1pp/M1pp" "$m1pp_src" "$outfile" >/dev/null 2>&1 || true
@@ -108,12 +120,27 @@ run_m1pp_suite() {
actual=
fi
- if [ "$actual" = "$expected_content" ]; then
- report "$label" PASS
- else
+ if [ "$actual" != "$expected_content" ]; then
report "$label" FAIL
show_diff "$expected_content" "$actual"
+ continue
fi
+
+ # Smoke test: feed M1pp's output through hex2pp. We don't run
+ # the resulting bytes (the fixture isn't a complete program),
+ # only verify hex2pp accepts the syntax. --non-executable
+ # skips the chmod on the throwaway output.
+ binfile=build/$ARCH/tests/M1pp/$name.bin
+ hex2pp_log=build/$ARCH/tests/M1pp/$name.hex2pp.log
+ rm -f "$binfile" "$hex2pp_log"
+ if ! "./build/$ARCH/hex2pp/hex2pp" --non-executable \
+ -f "$outfile" -o "$binfile" \
+ >"$hex2pp_log" 2>&1; then
+ fail "$label" "hex2pp smoke-test failed:" "$hex2pp_log"
+ continue
+ fi
+
+ report "$label" PASS
done
}
diff --git a/scripts/boot-undef.sh b/scripts/boot-undef.sh
@@ -1,23 +1,23 @@
#!/bin/sh
-## scripts/boot-undef.sh — list M1/hex2 references with no matching definition.
+## scripts/boot-undef.sh — list hex2pp references with no matching definition.
##
-## Cheap-and-cheerful linker-diagnostic for the live boot pipeline. M0 emits
-## linked.hex2 in asm-style: `:label` defines, `&label` references. A symbol
-## with refs but no def is unresolved — the same thing hex2-0 would flag,
-## except hex2-0 only prints the first miss before bailing, so this dumps
-## the full list.
+## Cheap-and-cheerful linker-diagnostic for the live boot pipeline. M1pp
+## emits expanded.hex2pp in asm-style: `:label` defines, `&label` (and
+## other-sigil) references. A symbol with refs but no def is unresolved
+## — the same thing hex2pp would flag, except hex2pp only prints the
+## first miss before bailing, so this dumps the full list.
##
-## Defaults to the linked.hex2 produced by the most recent
+## Defaults to the expanded.hex2pp produced by the most recent
## `make tcc-boot2 ARCH=<arch>` build. Run that first if missing.
##
## Caveats:
-## - Reads post-m1pp / post-M0 output, so %la(...) macro args are already
+## - Reads post-m1pp output, so %la(...) macro args are already
## expanded. Running this on the raw .P1pp would miss them.
## - m1pp rewrites local labels (@body, @end, ...) to per-expansion suffixed
## names, so they appear under both refs and defs naturally.
##
## Usage:
-## scripts/boot-undef.sh [--arch <aarch64|amd64|riscv64>] [<linked.hex2>]
+## scripts/boot-undef.sh [--arch <aarch64|amd64|riscv64>] [<expanded.hex2pp>]
set -eu
@@ -34,7 +34,7 @@ while [ $# -gt 0 ]; do
done
ROOT=$(cd "$(dirname "$0")/.." && pwd)
-: "${LINKED:=$ROOT/build/$ARCH/.work/tcc-boot2/tcc-boot2/linked.hex2}"
+: "${LINKED:=$ROOT/build/$ARCH/.work/tcc-boot2/tcc-boot2/expanded.hex2pp}"
[ -r "$LINKED" ] || {
echo "missing $LINKED" >&2
diff --git a/scripts/boot2.sh b/scripts/boot2.sh
@@ -1,11 +1,17 @@
#!/bin/sh
## boot2.sh — stage 2 of the bootstrap chain.
##
-## In-container script. Builds the M1pp expander ELF from the
-## checked-in pre-pruned P1 backend table (P1/P1-$ARCH.M1) plus their
-## sources, by calling scripts/boot-build-p1.sh.
+## In-container script. Builds the two self-hosted tools (M1pp expander
+## ELF and hex2pp assembler/linker ELF) from their pure-P1 sources via
+## the seed M0+hex2 pipeline (boot-build-p1.sh), using the checked-in
+## pre-pruned P1 backend table (P1/P1-$ARCH.M1).
##
## Outputs: build/$ARCH/M1pp/M1pp
+## build/$ARCH/hex2pp/hex2pp
+##
+## After this stage completes, the seed M0/hex2-0 tools are no longer
+## used by any downstream target — every .P1pp source flows through
+## M1pp + hex2pp instead (see scripts/boot-build-p1pp.sh).
##
## Env: ARCH=aarch64|amd64|riscv64
@@ -18,4 +24,5 @@ case "$ARCH" in
*) echo "boot2.sh: unsupported arch '$ARCH'" >&2; exit 1 ;;
esac
-sh scripts/boot-build-p1.sh M1pp/M1pp.P1 build/$ARCH/M1pp/M1pp
+sh scripts/boot-build-p1.sh M1pp/M1pp.P1 build/$ARCH/M1pp/M1pp
+sh scripts/boot-build-p1.sh hex2pp/hex2pp.P1 build/$ARCH/hex2pp/hex2pp
diff --git a/scripts/build-native-tools.sh b/scripts/build-native-tools.sh
@@ -5,8 +5,9 @@
## the requested tool; Make handles staleness.
##
## Tools (NOT in the bootstrap chain — fast host substitutes):
-## M1, hex2 — built from upstream mescc-tools sources
-## m1pp — built from M1pp/M1pp.c (the C oracle)
+## M1, hex2 — built from upstream mescc-tools sources (legacy oracle)
+## m1pp — built from M1pp/M1pp.c (the C oracle for the new M1pp)
+## hex2pp — built from hex2pp/hex2pp.c (the C oracle for hex2++)
##
## Source lookup for M1/hex2 (first match wins):
## 1. $MESCC_TOOLS_SRC (direct override)
@@ -17,11 +18,11 @@
## external dep explicit. Set LIVE_BOOTSTRAP=<path> the same way
## scripts/diag-livebootstrap-qemu.sh does.
##
-## Usage: scripts/build-native-tools.sh <M1|hex2|m1pp>
+## Usage: scripts/build-native-tools.sh <M1|hex2|m1pp|hex2pp>
set -eu
-[ "$#" -eq 1 ] || { echo "usage: $0 <M1|hex2|m1pp>" >&2; exit 2; }
+[ "$#" -eq 1 ] || { echo "usage: $0 <M1|hex2|m1pp|hex2pp>" >&2; exit 2; }
TOOL=$1
REPO=$(cd "$(dirname "$0")/.." && pwd)
@@ -74,6 +75,9 @@ case "$TOOL" in
m1pp)
$CC -O2 -std=c99 M1pp/M1pp.c -o "$OUT/m1pp"
;;
+ hex2pp)
+ $CC -O2 -std=c99 hex2pp/hex2pp.c -o "$OUT/hex2pp"
+ ;;
*)
echo "build-native-tools.sh: unknown tool '$TOOL'" >&2
exit 2