boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 1857b6a6348084d906fc93dfec79329ffd4cd439
parent 38c3a3448502859a26a4e6f2d6a481bd6ece8504
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 14:09:10 -0700

hex2pp.P1, update M1pp.P1

Diffstat:
MM1pp/M1pp.P1 | 817+++++++++++++++++++++++++++++++++++--------------------------------------------
MMakefile | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Ahex2pp/hex2pp.P1 | 3727+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/boot-build-p1pp.sh | 63+++++++++++++++++++++++++++++----------------------------------
Mscripts/boot-run-tests.sh | 37++++++++++++++++++++++++++++++++-----
Mscripts/boot-undef.sh | 20++++++++++----------
Mscripts/boot2.sh | 15+++++++++++----
Mscripts/build-native-tools.sh | 12++++++++----
8 files changed, 4242 insertions(+), 539 deletions(-)

diff --git a/M1pp/M1pp.P1 b/M1pp/M1pp.P1 @@ -13,9 +13,15 @@ ## stream and walks it token-by-token, dispatching to ## define_macro at line-start %macro, emit_newline / ## emit_token for pass-through, expand_builtin_call for -## !@%$ and %select, and expand_call for user macros. -## Macro expansions and %select push fresh streams onto -## streams[]; popping rewinds the expansion pool. +## !@%$, %select, %str, %bytes, %local, and expand_call +## for user macros. Macro expansions and %select push +## fresh streams onto streams[]; popping rewinds the +## expansion pool. +## +## Output is consumed directly by hex2pp -- there is no intermediate M0/hex2 +## stage. Lexical scoping for control-flow labels is delegated to hex2pp's +## nestable .scope / .endscope; M1pp itself only handles per-expansion +## macro hygiene labels (:@name / &@name). ## define_macro Parse %macro header+body; record in macros[] + ## macro_body_tokens[]; consume through the %endm line ## without emitting output. @@ -122,11 +128,6 @@ DEFINE M1PP_EXPR_FRAMES_CAP 0009000000000000 ## Common cap used by macro params, call args, and expression args. DEFINE M1PP_MAX_PARAMS 1000000000000000 -## Scope-stack cap. 32 nested scopes max; each slot is a 16-byte TextSpan -## (ptr + len) pointing into stable text (input_buf or text_buf), so -## scope_stack is 32 × 16 = 512 bytes. -DEFINE M1PP_MAX_SCOPE_DEPTH 2000000000000000 - ## ExprOp codes (indexed by apply_expr_op). DEFINE EXPR_ADD 0000000000000000 DEFINE EXPR_SUB 0100000000000000 @@ -164,7 +165,6 @@ DEFINE EXPR_INVALID 1200000000000000 ## expr_frames 2304 B DEFINE OFF_paste_scratch 0000000000000000 DEFINE OFF_local_label_scratch 0001000000000000 -DEFINE OFF_scope_stack 8001000000000000 DEFINE OFF_df_name_scratch 8003000000000000 DEFINE OFF_ebc_str_scratch 8004000000000000 DEFINE OFF_arg_starts 8005000000000000 @@ -866,51 +866,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 la_br &emit_token_skip beq_t0,t1 - # Scope rewrite: TOK_WORD whose text begins with "::" (len>=3) becomes - # a scoped definition, "&::" (len>=4) a scoped reference. Dispatch to - # emit_scope_rewrite with a1=skip, a2=sigil. - ld_a1,a0,0 - li_a2 TOK_WORD - la_br &emit_token_after_scope - bne_a1,a2 - ld_a2,a0,16 - li_a3 %3 %0 - la_br &emit_token_after_scope - blt_a2,a3 - ld_a3,a0,8 - lb_t0,a3,0 - li_t1 %58 %0 - la_br &emit_token_check_amp - bne_t0,t1 - lb_t0,a3,1 - li_t1 %58 %0 - la_br &emit_token_after_scope - bne_t0,t1 - li_a1 %2 %0 - li_a2 %58 %0 - la_br &emit_scope_rewrite - b -:emit_token_check_amp - li_t1 %38 %0 - la_br &emit_token_after_scope - bne_t0,t1 - ld_a2,a0,16 - li_t2 %4 %0 - la_br &emit_token_after_scope - blt_a2,t2 - lb_t0,a3,1 - li_t1 %58 %0 - la_br &emit_token_after_scope - bne_t0,t1 - lb_t0,a3,2 - la_br &emit_token_after_scope - bne_t0,t1 - li_a1 %3 %0 - li_a2 %38 %0 - la_br &emit_scope_rewrite - b - -:emit_token_after_scope # if (output_need_space) emit ' ' (skip the space for the first token on a line) la_a1 &output_need_space ld_t0,a1,0 @@ -970,170 +925,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 :emit_token_skip ret -## emit_scope_rewrite: branch target from emit_token for tokens whose text -## starts with "::" (scoped definition) or "&::" (scoped reference). -## Writes sigil + scope1 + "__" + ... + scopeN + "__" + name directly to -## output_buf; with an empty scope stack the middle collapses so output is -## just sigil + name (pass-through). Not a callable function: reached by `b`, -## shares emit_token's leaf return address, exits via `ret`. -## -## Register inputs: -## a0 = tok_ptr -## a1 = skip (2 for "::", 3 for "&::") -## a2 = sigil (':' = 58 for definitions, '&' = 38 for references) -:emit_scope_rewrite - # name_len = tok->text_len - skip; fail if zero. - ld_a3,a0,16 - sub_a3,a3,a1 - la_br &err_bad_scope_label - beqz_a3 - - # Spill inputs — the byte-copy loops below reuse a0..a3/t0..t2 freely. - la_t0 &sr_tok_ptr - st_a0,t0,0 - la_t0 &sr_skip - st_a1,t0,0 - la_t0 &sr_sigil - st_a2,t0,0 - la_t0 &sr_name_len - st_a3,t0,0 - - # Emit leading ' ' if output_need_space. - la_a0 &output_need_space - ld_t0,a0,0 - la_br &sr_post_space - beqz_t0 - la_a1 &output_used - ld_t0,a1,0 - li_t1 M1PP_OUTPUT_CAP - la_br &err_output_overflow - beq_t0,t1 - la_a2 &output_buf_ptr - ld_a2,a2,0 - add_a2,a2,t0 - li_t1 %32 %0 - sb_t1,a2,0 - addi_t0,t0,1 - st_t0,a1,0 -:sr_post_space - - # Emit the sigil byte. - la_a0 &output_used - ld_t0,a0,0 - li_t1 M1PP_OUTPUT_CAP - la_br &err_output_overflow - beq_t0,t1 - la_a1 &output_buf_ptr - ld_a1,a1,0 - add_a1,a1,t0 - la_a2 &sr_sigil - ld_a3,a2,0 - sb_a3,a1,0 - addi_t0,t0,1 - st_t0,a0,0 - - # Emit each scope frame's bytes followed by "__". - li_t0 %0 %0 -:sr_scope_outer - la_a0 &scope_depth - ld_a1,a0,0 - la_br &sr_tail_start - beq_t0,a1 - - la_a0 &scope_stack_ptr - ld_a0,a0,0 - li_a2 %16 %0 - mul_a2,a2,t0 - add_a0,a0,a2 - ld_a1,a0,0 - ld_a2,a0,8 - li_a3 %0 %0 -:sr_scope_inner - la_br &sr_scope_sep - beq_a3,a2 - la_t1 &output_used - ld_t2,t1,0 - li_a0 M1PP_OUTPUT_CAP - la_br &err_output_overflow - beq_t2,a0 - la_a0 &output_buf_ptr - ld_a0,a0,0 - add_a0,a0,t2 - add_t2,a1,a3 - lb_t2,t2,0 - sb_t2,a0,0 - la_t1 &output_used - ld_t2,t1,0 - addi_t2,t2,1 - st_t2,t1,0 - addi_a3,a3,1 - la_br &sr_scope_inner - b -:sr_scope_sep - la_a0 &output_used - ld_t1,a0,0 - li_t2 M1PP_OUTPUT_CAP - la_br &err_output_overflow - beq_t1,t2 - la_a1 &output_buf_ptr - ld_a1,a1,0 - add_a1,a1,t1 - li_a2 %95 %0 - sb_a2,a1,0 - addi_t1,t1,1 - st_t1,a0,0 - la_a0 &output_used - ld_t1,a0,0 - li_t2 M1PP_OUTPUT_CAP - la_br &err_output_overflow - beq_t1,t2 - la_a1 &output_buf_ptr - ld_a1,a1,0 - add_a1,a1,t1 - li_a2 %95 %0 - sb_a2,a1,0 - addi_t1,t1,1 - st_t1,a0,0 - addi_t0,t0,1 - la_br &sr_scope_outer - b - -:sr_tail_start - la_a0 &sr_tok_ptr - ld_a1,a0,0 - ld_a2,a1,8 - la_a0 &sr_skip - ld_a3,a0,0 - add_a1,a2,a3 - la_a0 &sr_name_len - ld_a2,a0,0 - li_a3 %0 %0 -:sr_tail_loop - la_br &sr_tail_done - beq_a3,a2 - la_t1 &output_used - ld_t2,t1,0 - li_a0 M1PP_OUTPUT_CAP - la_br &err_output_overflow - beq_t2,a0 - la_a0 &output_buf_ptr - ld_a0,a0,0 - add_a0,a0,t2 - add_t2,a1,a3 - lb_t2,t2,0 - sb_t2,a0,0 - la_t1 &output_used - ld_t2,t1,0 - addi_t2,t2,1 - st_t2,t1,0 - addi_a3,a3,1 - la_br &sr_tail_loop - b -:sr_tail_done - la_a0 &output_need_space - li_a1 %1 %0 - st_a1,a0,0 - ret ## --- Main processor ---------------------------------------------------------- ## Stream-driven loop. Pushes source_tokens as the initial stream, then drives @@ -1244,7 +1035,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 li_a2 %5 %0 la_br &tok_eq_const call - la_br &proc_check_scope + la_br &proc_check_frame beqz_a0 ld_a0,sp,0 ld_a1,sp,8 @@ -1258,46 +1049,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 la_br &proc_restore_and_loop b -## ---- tok eq "%scope" ---- -:proc_check_scope - ld_a0,sp,8 - la_a1 &const_scope - li_a2 %6 %0 - la_br &tok_eq_const - call - la_br &proc_check_endscope - beqz_a0 - ld_a0,sp,0 - ld_a1,sp,8 - la_br &proc_save_pos_and_ls - call - ld_a0,sp,0 - ld_a0,a0,8 - la_br &push_scope - call - la_br &proc_restore_and_loop - b - -## ---- tok eq "%endscope" ---- -:proc_check_endscope - ld_a0,sp,8 - la_a1 &const_endscope - li_a2 %9 %0 - la_br &tok_eq_const - call - la_br &proc_check_frame - beqz_a0 - ld_a0,sp,0 - ld_a1,sp,8 - la_br &proc_save_pos_and_ls - call - ld_a0,sp,0 - ld_a0,a0,8 - la_br &pop_scope - call - la_br &proc_restore_and_loop - b - ## ---- tok eq "%frame" ---- :proc_check_frame ld_a0,sp,8 @@ -1383,7 +1134,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 la_br &proc_check_macro beqz_a1 - # try the six builtin names: ! @ % $ %select %str + # try the eight builtin names: ! @ % $ %select %str %bytes %local mov_a0,t0 la_a1 &const_bang li_a2 %1 %0 @@ -1433,6 +1184,13 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 call la_br &proc_do_builtin bnez_a0 + ld_a0,sp,8 + la_a1 &const_bytes + li_a2 %6 %0 + la_br &tok_eq_const + call + la_br &proc_do_builtin + bnez_a0 la_br &proc_check_macro b @@ -1512,11 +1270,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 b :proc_done - # Every %scope must be matched by an %endscope before EOF. - la_a0 &scope_depth - ld_t0,a0,0 - la_br &err_scope_not_closed - bnez_t0 # Every %frame must be matched by an %endframe before EOF. la_a0 &frame_active ld_t0,a0,0 @@ -1548,117 +1301,11 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 la_br &proc_loop b -## --- %scope / %endscope handlers -------------------------------------------- -## Called at proc_pos == the `%scope` / `%endscope` word on a line-start. -## Input: a0 = stream end (pointer one past last token in the current stream). -## Output: proc_pos advanced past the trailing newline (or stream end). - -## push_scope(a0 = stream_end): consume `%scope NAME` (header self-terminates -## at NAME). Name must be a single WORD token. Newlines between %scope and -## NAME, and between NAME and the body, are insignificant. -:push_scope - enter_0 - - # proc_pos += 24 (skip past the `%scope` token). - la_t0 &proc_pos - ld_t1,t0,0 - addi_t1,t1,32 - st_t1,t0,0 - - # Skip newlines between `%scope` and NAME. - la_a1 &psc_stream_end - st_a0,a1,0 # save stream_end across the call - la_br &proc_skip_newlines - call - la_a1 &psc_stream_end - ld_a0,a1,0 - la_t0 &proc_pos - ld_t1,t0,0 - - # Require a WORD name token within the stream. - la_br &err_bad_scope_header - beq_t1,a0 - ld_t2,t1,0 - la_br &err_bad_scope_header - bnez_t2 - - # scope_depth < MAX_SCOPE_DEPTH? - la_a1 &scope_depth - ld_a2,a1,0 - li_a3 M1PP_MAX_SCOPE_DEPTH - la_br &err_scope_depth_overflow - beq_a2,a3 - - # scope_stack[scope_depth] = (name.text_ptr, name.text_len) - la_a3 &scope_stack_ptr - ld_a3,a3,0 - li_t0 %16 %0 - mul_t0,t0,a2 - add_a3,a3,t0 - ld_t0,t1,8 - st_t0,a3,0 - ld_t0,t1,16 - st_t0,a3,8 - - # scope_depth++ - addi_a2,a2,1 - st_a2,a1,0 - - # proc_pos += 24 (past the name). - la_t0 &proc_pos - ld_t1,t0,0 - addi_t1,t1,32 - st_t1,t0,0 - - # Newlines between `%scope NAME` and the body content are insignificant. - la_br &proc_skip_newlines - call - eret - -## pop_scope(a0 = stream_end): consume `%endscope` followed by a strict -## TOK_NEWLINE — extra tokens on the line are now an error. -:pop_scope - enter_0 - - # scope_depth > 0? - la_a1 &scope_depth - ld_a2,a1,0 - la_br &err_scope_underflow - beqz_a2 - addi_a2,a2,neg1 - st_a2,a1,0 - - # proc_pos += 24 (past the `%endscope` token). - la_t0 &proc_pos - ld_t1,t0,0 - addi_t1,t1,32 - st_t1,t0,0 - - # Strict: the token immediately after `%endscope` must be TOK_NEWLINE. - la_br &err_bad_scope_header - beq_t1,a0 - ld_t2,t1,0 - li_t0 TOK_NEWLINE - la_br &err_bad_scope_header - bne_t2,t0 - # Consume the trailing newline only when %endscope sat at line-start; - # mid-line %endscope leaves the newline so it can be emitted. - la_t0 &proc_line_start - ld_a1,t0,0 - la_br &pop_done - beqz_a1 - addi_t1,t1,32 - la_t0 &proc_pos - st_t1,t0,0 -:pop_done - eret - ## --- %frame / %endframe handlers -------------------------------------------- -## Single-slot frame state, separate from the %scope stack. push_frame(a0= -## stream_end) parses `%frame NAME`, stashes name's TextSpan in -## current_frame_ptr/_len, and sets frame_active = 1. pop_frame(a0= -## stream_end) clears frame_active. Frames do not nest — a second push -## without an intervening pop is fatal. +## Single-slot frame state used by %local. push_frame(a0=stream_end) parses +## `%frame NAME`, stashes name's TextSpan in current_frame_ptr/_len, and +## sets frame_active = 1. pop_frame(a0=stream_end) clears frame_active. +## Frames do not nest — a second push without an intervening pop is fatal. :push_frame enter_0 @@ -2265,7 +1912,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 # Strict: the closing '}' must be immediately followed by TOK_NEWLINE. # Consume that newline only when the directive started at line-start, - # mirroring %endm / %endscope. + # mirroring %endm / %endframe. la_a0 &proc_pos ld_t0,a0,0 la_a1 &source_end @@ -4900,7 +4547,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 ## skip_expr_newlines(a0=pos, a1=end) -> a0 = new pos. Leaf. ## Advance pos past consecutive TOK_NEWLINE tokens so expressions may span ## lines. Also used by directive header parsers to make whitespace -## (newlines specifically) insignificant inside %macro/%struct/%scope +## (newlines specifically) insignificant inside %macro/%struct/%frame ## headers and around `##` paste operands. :skip_expr_newlines :sen_loop @@ -5474,10 +5121,10 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 ## emit_hex_value(a0=value_u64, a1=byte_count) -> void (fatal on overflow) ## byte_count must be 1, 2, 4, or 8. Serialize value into (2 * byte_count) ## uppercase hex chars, little-endian byte order (byte i at char indices -## 2i, 2i+1) WRAPPED IN SINGLE QUOTES so the downstream M0 assembler -## treats it as a hex-byte string literal rather than parsing it as a -## decimal numeric token. Total emitted text length = 2 + 2 * byte_count; -## emitted as a TOK_STRING via append_text + emit_token. +## 2i, 2i+1) as bare hex digits. hex2pp's byte-stream parser groups every +## two hex digits into one byte; no quoting or separators are needed. +## Total emitted text length = 2 * byte_count; emitted as a TOK_WORD via +## append_text + emit_token. :emit_hex_value enter_0 @@ -5487,11 +5134,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 la_a2 &ehv_bytes st_a1,a2,0 - # scratch[0] = '\'' - la_a1 &ehv_scratch - li_a2 %39 %0 - sb_a2,a1,0 - # i = 0 li_t0 %0 %0 :emit_hex_value_loop @@ -5513,17 +5155,16 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 # low = byte & 0x0F andi_a3,a3,15 - # scratch[1 + 2*i] = hex_chars[high] + # scratch[2*i] = hex_chars[high] la_a1 &hex_chars add_a1,a1,a2 lb_a2,a1,0 la_a1 &ehv_scratch shli_a3,t0,1 add_a1,a1,a3 - addi_a1,a1,1 sb_a2,a1,0 - # scratch[1 + 2*i+1] = hex_chars[low] (reload low from byte & 0x0F) + # scratch[2*i+1] = hex_chars[low] (reload low from byte & 0x0F) la_a1 &ehv_value ld_t2,a1,0 andi_a3,t2,255 @@ -5534,7 +5175,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 la_a1 &ehv_scratch shli_a3,t0,1 add_a1,a1,a3 - addi_a1,a1,2 + addi_a1,a1,1 sb_a2,a1,0 # ehv_value >>= 8 @@ -5549,35 +5190,23 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 b :emit_hex_value_emit - # scratch[1 + 2*bytes] = '\'' (closing quote) - la_a0 &ehv_scratch - la_a1 &ehv_bytes - ld_a1,a1,0 - shli_a1,a1,1 - add_a0,a0,a1 - addi_a0,a0,1 - li_a2 %39 %0 - sb_a2,a0,0 - - # text_ptr = append_text(&ehv_scratch, 2 + 2 * ehv_bytes) + # text_ptr = append_text(&ehv_scratch, 2 * ehv_bytes) la_a0 &ehv_scratch la_a1 &ehv_bytes ld_a1,a1,0 shli_a1,a1,1 - addi_a1,a1,2 la_br &append_text call - # ehv_token.kind = TOK_STRING; ehv_token.text_ptr = text_ptr; - # ehv_token.text_len = 2 + 2 * ehv_bytes; ehv_token.tight = 0. + # ehv_token.kind = TOK_WORD; ehv_token.text_ptr = text_ptr; + # ehv_token.text_len = 2 * ehv_bytes; ehv_token.tight = 0. la_a2 &ehv_token - li_a3 TOK_STRING + li_a3 TOK_WORD st_a3,a2,0 st_a0,a2,8 la_a1 &ehv_bytes ld_a1,a1,0 shli_a1,a1,1 - addi_a1,a1,2 st_a1,a2,16 li_a1 %0 %0 st_a1,a2,24 @@ -5590,7 +5219,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 eret ## ============================================================================ -## --- Builtin dispatcher ( ! @ % $ %select ) --------------------------------- +## --- Builtin dispatcher ( ! @ % $ %select %str %bytes %local ) ------------- ## ============================================================================ ## expand_builtin_call(a0=stream_ptr, a1=builtin_tok) -> void (fatal on bad) @@ -5955,6 +5584,17 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 la_br &ebc_local bnez_a0 + # if tok_eq_const(tok, "%bytes", 6) -> bytes path + la_a0 &ebc_stream + ld_a0,a0,0 + ld_a0,a0,16 + la_a1 &const_bytes + li_a2 %6 %0 + la_br &tok_eq_const + call + la_br &ebc_bytes_handler + bnez_a0 + # else: fatal la_br &err_bad_macro_header b @@ -6277,6 +5917,268 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 eret +## %bytes("STR"): emit the raw bytes of a "..."-quoted string as one +## contiguous run of hex bytes. Recognised escapes inside the string: +## \n -> 0x0A \t -> 0x09 \r -> 0x0D \0 -> 0x00 +## \\ -> 0x5C \" -> 0x22 \xNN -> byte NN (two hex digits) +## Any other backslash escape is fatal. No NUL terminator is appended; +## the caller writes one explicitly (e.g. "00") if needed. An empty +## string produces no output. +## +## Implementation strategy: each byte is emitted independently via +## emit_hex_value(byte, 1). hex2pp's parse_byte_stream accumulates +## adjacent hex digits across whitespace, so "68 69 0A" reads as the +## same three bytes as "68690A" — there is no need to coalesce them +## into a single output WORD here. +## +## Validation: arg_count == 1, arg span is exactly one token, kind is +## TOK_STRING, len >= 2, ptr[0] == '"'. For \xNN, the next two source +## bytes must both be valid hex digits (0-9, a-f, A-F). +:ebc_bytes_handler + enter_0 + + # require arg_count == 1 + la_a0 &arg_count + ld_t0,a0,0 + li_t1 %1 %0 + la_br &err_bad_macro_header + bne_t0,t1 + + # require arg span is exactly one token (32 bytes) + la_a0 &arg_starts_ptr + ld_a0,a0,0 + ld_t0,a0,0 + la_a1 &arg_ends_ptr + ld_a1,a1,0 + ld_t1,a1,0 + sub_t2,t1,t0 + li_a2 %32 %0 + la_br &err_bad_macro_header + bne_t2,a2 + + # require arg_tok->kind == TOK_STRING + ld_a3,t0,0 + li_a2 TOK_STRING + la_br &err_bad_macro_header + bne_a3,a2 + + # require arg_tok->text.len >= 2 + ld_a1,t0,16 + li_a2 %2 %0 + la_br &err_bad_macro_header + blt_a1,a2 + + # require arg_tok->text.ptr[0] == '"'. + # Save text_ptr to ebc_b_src_ptr (will += 1 below) and text_len to + # ebc_b_src_len (-= 2 below). Reading the first byte uses lb_a3,a3,0 + # which clobbers a3, so do the save first. + ld_a3,t0,8 + la_a0 &ebc_b_src_ptr + st_a3,a0,0 + la_a0 &ebc_b_src_len + st_a1,a0,0 + lb_a3,a3,0 + li_a2 %34 %0 + la_br &err_bad_macro_header + bne_a3,a2 + + # src_ptr += 1; src_len -= 2 (strip surrounding quotes) + la_a0 &ebc_b_src_ptr + ld_a3,a0,0 + addi_a3,a3,1 + st_a3,a0,0 + la_a0 &ebc_b_src_len + ld_a1,a0,0 + addi_a1,a1,neg2 + st_a1,a0,0 + + # ebc_b_src_i = 0 + li_a0 %0 %0 + la_a1 &ebc_b_src_i + st_a0,a1,0 + +:ebc_b_loop + # if (src_i == src_len) done + la_a0 &ebc_b_src_i + ld_t0,a0,0 + la_a1 &ebc_b_src_len + ld_t1,a1,0 + la_br &ebc_b_done + beq_t0,t1 + + # c = src_ptr[src_i]; src_i++ + # P1 lacks lb_a3,a0,0 — bounce through a1 (mov_a1,a0; lb_a3,a1,0). + la_a0 &ebc_b_src_ptr + ld_a0,a0,0 + add_a0,a0,t0 + mov_a1,a0 + lb_a3,a1,0 + addi_t0,t0,1 + la_a1 &ebc_b_src_i + st_t0,a1,0 + + # if (c == '\\') -> escape path + li_a2 %92 %0 + la_br &ebc_b_escape + beq_a3,a2 + + # literal byte: emit_hex_value(c, 1) and reloop + mov_a0,a3 + li_a1 %1 %0 + la_br &emit_hex_value + call + la_br &ebc_b_loop + b + +:ebc_b_escape + # Read the escape character; require at least one byte left. + la_a0 &ebc_b_src_i + ld_t0,a0,0 + la_a1 &ebc_b_src_len + ld_t1,a1,0 + la_br &err_bad_escape + beq_t0,t1 + la_a0 &ebc_b_src_ptr + ld_a0,a0,0 + add_a0,a0,t0 + mov_a1,a0 + lb_a3,a1,0 # a3 = e + addi_t0,t0,1 + la_a1 &ebc_b_src_i + st_t0,a1,0 + + # Single-char escapes: dispatch via beq chain (matches the existing + # proc_check_<directive> pattern). Each branch loads the resulting + # byte into a3 and falls through to ebc_b_emit_one. + li_a2 %110 %0 # 'n' + la_br &ebc_b_esc_n + beq_a3,a2 + li_a2 %116 %0 # 't' + la_br &ebc_b_esc_t + beq_a3,a2 + li_a2 %114 %0 # 'r' + la_br &ebc_b_esc_r + beq_a3,a2 + li_a2 %48 %0 # '0' + la_br &ebc_b_esc_zero + beq_a3,a2 + li_a2 %92 %0 # '\\' + la_br &ebc_b_esc_bs + beq_a3,a2 + li_a2 %34 %0 # '"' + la_br &ebc_b_esc_dq + beq_a3,a2 + li_a2 %120 %0 # 'x' + la_br &ebc_b_esc_hex + beq_a3,a2 + la_br &err_bad_escape + b + +:ebc_b_esc_n + li_a3 %10 %0 # 0x0A + la_br &ebc_b_emit_one + b +:ebc_b_esc_t + li_a3 %9 %0 # 0x09 + la_br &ebc_b_emit_one + b +:ebc_b_esc_r + li_a3 %13 %0 # 0x0D + la_br &ebc_b_emit_one + b +:ebc_b_esc_zero + li_a3 %0 %0 # 0x00 + la_br &ebc_b_emit_one + b +:ebc_b_esc_bs + li_a3 %92 %0 # 0x5C + la_br &ebc_b_emit_one + b +:ebc_b_esc_dq + li_a3 %34 %0 # 0x22 + la_br &ebc_b_emit_one + b + +:ebc_b_emit_one + # Common tail for single-char escapes: emit_hex_value(a3, 1), reloop. + mov_a0,a3 + li_a1 %1 %0 + la_br &emit_hex_value + call + la_br &ebc_b_loop + b + +:ebc_b_esc_hex + # \xNN: require two hex chars at src[src_i], src[src_i+1]. + la_a0 &ebc_b_src_i + ld_t0,a0,0 + la_a1 &ebc_b_src_len + ld_t1,a1,0 + sub_t2,t1,t0 # remaining = src_len - src_i + li_a3 %2 %0 + la_br &err_bad_escape + blt_t2,a3 + + # hi char: src[src_i]; decode via hex_digit_table[c]; fail if 0xFF. + la_a0 &ebc_b_src_ptr + ld_a0,a0,0 + add_a0,a0,t0 + lb_a0,a0,0 # a0 = hi char + la_a1 &hex_digit_table + add_a1,a1,a0 + lb_a2,a1,0 # a2 = hi digit (or 0xFF) + li_a3 %255 %0 + la_br &err_bad_escape + beq_a2,a3 + # Stash hi digit into ebc_b_hex_hi for the (hi << 4) | lo combine + # below — the lo-digit lookup clobbers a2. + la_a0 &ebc_b_hex_hi + st_a2,a0,0 + + # advance past hi char + la_a0 &ebc_b_src_i + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + + # lo char: src[src_i]; decode via hex_digit_table[c]; fail if 0xFF. + la_a0 &ebc_b_src_ptr + ld_a0,a0,0 + add_a0,a0,t0 + lb_a0,a0,0 # a0 = lo char + la_a1 &hex_digit_table + add_a1,a1,a0 + lb_a2,a1,0 # a2 = lo digit (or 0xFF) + li_a3 %255 %0 + la_br &err_bad_escape + beq_a2,a3 + + # advance past lo char + la_a0 &ebc_b_src_i + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + + # byte = (hi << 4) | lo. shli_a3,t0,4 puts hi<<4 in a3, then or. + la_a0 &ebc_b_hex_hi + ld_t0,a0,0 + shli_a3,t0,4 + or_a3,a3,a2 + + la_br &ebc_b_emit_one + b + +:ebc_b_done + # stream->pos = ebc_call_end_pos; stream->line_start = 0 + la_a0 &ebc_stream + ld_a0,a0,0 + la_a1 &ebc_call_end_pos + ld_t0,a1,0 + st_t0,a0,16 + li_t1 %0 %0 + st_t1,a0,24 + eret + ## %local(NAME): emit-time variant. expand_builtin_call has already ## parse_args'd the call (so arg_starts/arg_ends/arg_count/call_end_pos ## are set), but expand_local_into_pool re-parses internally so it can @@ -6384,24 +6286,8 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 la_a0 &msg_unterminated_directive la_br &fatal b -:err_bad_scope_header - la_a0 &msg_bad_scope_header - la_br &fatal - b -:err_scope_depth_overflow - la_a0 &msg_scope_depth_overflow - la_br &fatal - b -:err_scope_underflow - la_a0 &msg_scope_underflow - la_br &fatal - b -:err_scope_not_closed - la_a0 &msg_scope_not_closed - la_br &fatal - b -:err_bad_scope_label - la_a0 &msg_bad_scope_label +:err_bad_escape + la_a0 &msg_bad_escape la_br &fatal b :err_bad_frame_header @@ -6506,11 +6392,10 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 :const_enum "%enum" :const_size "SIZE" :const_count "COUNT" -:const_scope "%scope" -:const_endscope "%endscope" :const_frame "%frame" :const_endframe "%endframe" :const_local "%local" +:const_bytes "%bytes" ## Suffix appended to the frame name when looking up <frame>_FRAME.<field>. :const_frame_suffix "_FRAME." @@ -6539,6 +6424,35 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 ## Nibble-to-hex lookup table for emit_hex_value. :hex_chars "0123456789ABCDEF" +## 256-byte hex-digit lookup table for %bytes(\xNN). Indexed by source +## byte; value is the digit (0..15) for '0'..'9'/'a'..'f'/'A'..'F', or +## 0xFF for any other input. The escape decoder reads two source bytes +## and combines (hi << 4) | lo into the emitted byte; either lookup +## returning 0xFF triggers err_bad_escape. +:hex_digit_table +## 0x00-0x1F: all invalid +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +## 0x20-0x2F: invalid +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +## 0x30-0x39 = '0'..'9' -> 0..9; 0x3A-0x3F invalid +'00010203040506070809FFFFFFFFFFFF' +## 0x40 invalid; 0x41-0x46 = 'A'..'F' -> 10..15; 0x47-0x5F invalid +'FF0A0B0C0D0E0FFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +## 0x60 invalid; 0x61-0x66 = 'a'..'f' -> 10..15; 0x67-0x7F invalid +'FF0A0B0C0D0E0FFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +## 0x80-0xFF: all invalid +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' +'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' + ## 256-byte char-class table for lex_loop / lex_word_scan. Indexed by the ## source byte `c`; value is the class code dispatched by lex_loop: ## 0 WORD (default; word_scan continues through this byte) @@ -6581,7 +6495,6 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 :bss_init_tbl &paste_scratch_ptr ZERO4 OFF_paste_scratch &local_label_scratch_ptr ZERO4 OFF_local_label_scratch -&scope_stack_ptr ZERO4 OFF_scope_stack &df_name_scratch_ptr ZERO4 OFF_df_name_scratch &ebc_str_scratch_ptr ZERO4 OFF_ebc_str_scratch &arg_starts_ptr ZERO4 OFF_arg_starts @@ -6620,11 +6533,7 @@ DEFINE OFF_local_lookup_scratch 0052850000000000 :msg_unbalanced_braces "unbalanced braces" '00' :msg_bad_directive "bad %struct/%enum directive" '00' :msg_unterminated_directive "unterminated %struct/%enum directive" '00' -:msg_bad_scope_header "bad scope header" '00' -:msg_scope_depth_overflow "scope depth overflow" '00' -:msg_scope_underflow "scope underflow" '00' -:msg_scope_not_closed "scope not closed" '00' -:msg_bad_scope_label "bad scope label" '00' +:msg_bad_escape "bad escape in %bytes" '00' :msg_bad_frame_header "bad frame header" '00' :msg_frame_already_active "frame already active" '00' :msg_frame_underflow "frame underflow" '00' @@ -6687,8 +6596,6 @@ ZERO8 ZERO8 :def_body_line_start ZERO8 -:psc_stream_end -ZERO8 :pf_stream_end ZERO8 @@ -6848,23 +6755,6 @@ ZERO8 ZERO8 ZERO8 ## append_text. Caps the combined tail + digit length at ~125 bytes, ## which is ample for any realistic local-label name. -## --- Scope-stack rewrite ----------------------------------------------------- -## scope_depth: current depth (0..32). -## scope_stack: 32 × TextSpan (16 bytes each) = 512 bytes. Each slot is -## (text_ptr, text_len) pointing into stable text memory (input_buf or -## text_buf — both append-only), so names are borrowed without copying. -## sr_* slots hold emit_scope_rewrite's inputs across the byte-copy loops. -:scope_depth -ZERO8 -:sr_tok_ptr -ZERO8 -:sr_skip -ZERO8 -:sr_sigil -ZERO8 -:sr_name_len -ZERO8 - ## %struct / %enum scratch. define_fielded calls append_text twice ## per synthesized macro, so every piece of state that must survive a call ## lives here rather than in a register. @@ -6939,9 +6829,9 @@ ZERO8 ## Builtin scratch. ## emit_hex_value: ehv_value/bytes hold the args; ehv_scratch is a 24-byte -## buffer (max 18 chars used: 2 quotes + 16 hex chars; rounded up to keep -## the next slot 8-byte aligned); ehv_token is a synthesized 32-byte -## Token { kind, text_ptr, text_len, tight }. +## buffer (max 16 chars used: 16 hex chars for an 8-byte $-emit; rounded +## up to keep the next slot 8-byte aligned); ehv_token is a synthesized +## 32-byte Token { kind, text_ptr, text_len, tight }. :ehv_value ZERO8 :ehv_bytes @@ -6990,6 +6880,21 @@ ZERO8 :ebc_str_token ZERO8 ZERO8 ZERO8 ZERO8 +## %bytes builtin scratch. ebc_b_src_ptr/_len/_i walk the input string +## across emit_hex_value calls (which clobber every caller-saved reg). +## ebc_b_hex_hi spills the high nibble across the second hex_digit_table +## lookup for the low nibble. Each source byte emits independently via +## emit_hex_value(byte, 1); hex2pp's parse_byte_stream coalesces the +## resulting space-separated runs back into a contiguous byte stream. +:ebc_b_src_ptr +ZERO8 +:ebc_b_src_len +ZERO8 +:ebc_b_src_i +ZERO8 +:ebc_b_hex_hi +ZERO8 + ## arg_starts[16] / arg_ends[16]: 16 × 8 = 128 bytes each, i.e. 4 ZERO32. ## Written by parse_args; read by expand_macro_tokens and expand_builtin_call. @@ -7022,8 +6927,6 @@ ZERO8 ZERO8 ZERO8 ZERO8 ZERO8 :local_label_scratch_ptr ZERO8 -:scope_stack_ptr -ZERO8 :df_name_scratch_ptr ZERO8 :ebc_str_scratch_ptr diff --git a/Makefile b/Makefile @@ -11,8 +11,9 @@ # scripts/Containerfile.busybox. # # Common entrypoints: -# make all (m1pp for ARCH) +# make all (m1pp + hex2pp for ARCH) # make m1pp build the m1pp expander for ARCH +# make hex2pp build the hex2pp assembler/linker for ARCH # make scheme1 build the scheme1 interpreter for ARCH # make cc catm the cc compiler source for ARCH # make tcc-flat flatten upstream tcc.c into one TU @@ -28,15 +29,26 @@ # opt-in: not part of `make test`) # make image build the per-arch container image # make tools bootstrap M0/hex2-0/catm for ARCH +# (seed-only: used to build m1pp + hex2pp) # make tables regen pre-pruned P1/P1-<arch>.M1 tables -# make tools-native build host-native M1/hex2/m1pp (opt-in) +# make tools-native build host-native M1/hex2/m1pp/hex2pp (opt-in) # make cloc line counts for the core sources # make clean rm -rf build/ # # Output layout: every binary lives at build/<arch>/<src-path-without-ext>, # mirroring the source path under the repo root (e.g. M1pp/M1pp.P1 -> -# build/<arch>/M1pp/M1pp; tests/cc/foo.c -> build/<arch>/tests/cc/foo). +# build/<arch>/M1pp/M1pp; hex2pp/hex2pp.P1 -> build/<arch>/hex2pp/hex2pp; +# tests/cc/foo.c -> build/<arch>/tests/cc/foo). # Per-source intermediates land under build/<arch>/.work/<src-path>/. +# +# Bootstrap chain: +# 1. seed (vendored hex0-seed) -> M0 + hex2-0 + catm (boot1.sh) +# 2. seed M0 + hex2-0 -> M1pp ELF (boot-build-p1.sh) +# 2. seed M0 + hex2-0 -> hex2pp ELF (boot-build-p1.sh) +# 3. M1pp + hex2pp -> every other ELF (.P1pp pipeline) (boot-build-p1pp.sh) +# The seed M0/hex2-0/catm participate ONLY in step 2 (building the two +# new tools from their .P1 sources). Once both binaries exist, no +# downstream user/test/scheme/cc target ever invokes them again. ARCH ?= aarch64 @@ -71,10 +83,10 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \ # --- Targets -------------------------------------------------------------- -.PHONY: all m1pp scheme1 cc test image tools tables \ +.PHONY: all m1pp hex2pp scheme1 cc test image tools tables \ tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc -all: m1pp +all: m1pp hex2pp help: @sed -n '/^# Common entrypoints:/,/^$$/p' Makefile | sed 's/^# *//' @@ -94,6 +106,8 @@ CLOC_FILES := \ $(foreach f,$(CLOC_SEED_BASES),vendor/seed/$(a)/$(f))) \ $(foreach a,$(CLOC_ARCHES),P1/P1-$(a).M1) \ M1pp/M1pp.P1 \ + hex2pp/hex2pp.P1 \ + $(foreach a,$(CLOC_ARCHES),P1/elf-$(a).hex2pp) \ $(foreach a,$(CLOC_ARCHES),P1/P1-$(a).M1pp) \ P1/P1.M1pp \ P1/P1pp.P1pp \ @@ -147,7 +161,7 @@ $(TOOLS_M0): build/%/tools/M0: scripts/boot1.sh build/%/.image \ # tables` after editing P1/gen/*.py or any of the prune-source files # below, then commit the updated P1/*.M1. -P1_PRUNE_SRCS := M1pp/M1pp.P1 $(wildcard tests/P1/*.P1) +P1_PRUNE_SRCS := M1pp/M1pp.P1 hex2pp/hex2pp.P1 $(wildcard tests/P1/*.P1) tables: $(foreach a,$(ALL_ARCHES),P1/P1-$(a).M1) @@ -164,6 +178,7 @@ P1/P1-%.M1: build/%/P1/P1.M1 scripts/prune-p1-table.sh $(P1_PRUNE_SRCS) # --- Programs (per arch) -------------------------------------------------- M1PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/M1pp/M1pp) +HEX2PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hex2pp/hex2pp) SCHEME1_SRC := scheme1/scheme1.P1pp SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1/scheme1) @@ -174,24 +189,35 @@ CC_SRCS := scheme1/prelude.scm cc/cc.scm cc/main.scm CC_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/cc/cc.scm) m1pp: $(OUT_DIR)/M1pp/M1pp +hex2pp: $(OUT_DIR)/hex2pp/hex2pp scheme1: $(OUT_DIR)/scheme1/scheme1 cc: $(OUT_DIR)/cc/cc.scm -# Per-arch deps for .P1/.M1 builds (raw M1, no macro expansion). +# Per-arch deps for the seed-built .P1 -> ELF chain. Used ONLY to build +# the two new self-hosted tools (M1pp.P1, hex2pp.P1) from their pure-P1 +# sources via vendored M0 + hex2-0. After that, no other target reaches +# back to the seed M0/hex2 path. P1_BUILD_DEPS = scripts/lint.sh scripts/boot-build-p1.sh \ build/%/.image build/%/tools/M0 \ vendor/seed/%/ELF.hex2 P1/P1-%.M1 -# Per-arch deps for .P1pp builds (m1pp expansion + libp1pp). +# Per-arch deps for .P1pp builds (M1pp expansion + libp1pp + hex2pp link). +# The seed M0/hex2 tools are deliberately absent: this chain is +# end-to-end M1pp + hex2pp. P1PP_BUILD_DEPS = scripts/boot-build-p1pp.sh \ - build/%/.image build/%/tools/M0 build/%/M1pp/M1pp \ - vendor/seed/%/ELF.hex2 \ + build/%/.image \ + build/%/M1pp/M1pp build/%/hex2pp/hex2pp \ + P1/elf-%.hex2pp \ P1/P1-%.M1pp P1/P1.M1pp P1/P1pp.P1pp $(M1PP_BINS): build/%/M1pp/M1pp: M1pp/M1pp.P1 $(P1_BUILD_DEPS) ARCH=$* sh scripts/lint.sh M1pp/M1pp.P1 $(call PODMAN,$*) sh scripts/boot-build-p1.sh M1pp/M1pp.P1 $@ +$(HEX2PP_BINS): build/%/hex2pp/hex2pp: hex2pp/hex2pp.P1 $(P1_BUILD_DEPS) + ARCH=$* sh scripts/lint.sh hex2pp/hex2pp.P1 + $(call PODMAN,$*) sh scripts/boot-build-p1.sh hex2pp/hex2pp.P1 $@ + $(SCHEME1_BINS): build/%/scheme1/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS) $(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $@ $(SCHEME1_SRC) @@ -273,7 +299,7 @@ $(TCC_BOOT2_P1PPS): build/%/tcc-boot2/tcc.flat.P1pp: \ # tcc-boot2 link: pure catm chain — entry stub, libc, client TU, # elf terminator. boot-build-p1pp.sh concatenates them in order -# ahead of the M1pp expander/M0/hex2 pipeline. +# ahead of the M1pp expander + hex2pp pipeline. $(TCC_BOOT2_BINS): build/%/tcc-boot2/tcc-boot2: \ build/%/tcc-boot2/tcc.flat.P1pp build/%/vendor/mes-libc/libc.P1pp \ P1/entry-libc.P1pp P1/elf-end.P1pp \ @@ -350,7 +376,8 @@ $(TCC_CC_MEM): tcc-cc/mem.c \ # --- Native tools (opt-in dev-loop helpers) ------------------------------- -NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 build/native-tools/m1pp +NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 \ + build/native-tools/m1pp build/native-tools/hex2pp tools-native: $(NATIVE_TOOLS) @@ -363,6 +390,9 @@ build/native-tools/hex2: scripts/build-native-tools.sh build/native-tools/m1pp: scripts/build-native-tools.sh M1pp/M1pp.c sh scripts/build-native-tools.sh m1pp +build/native-tools/hex2pp: scripts/build-native-tools.sh hex2pp/hex2pp.c + sh scripts/build-native-tools.sh hex2pp + # --- Tests ---------------------------------------------------------------- # # `make test` runs every suite. SUITE selects one; ARCH restricts to one @@ -382,29 +412,39 @@ else TEST_ARCHES := $(ARCH) endif -# m1pp suite per-arch deps: image, tools, table, expander. +# m1pp suite per-arch deps: image, expander, hex2pp (the suite pipes +# M1pp output through hex2pp as a smoke test in addition to the +# text-diff against .expected). TEST_M1PP_DEPS := $(foreach a,$(TEST_ARCHES), \ - build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 build/$(a)/M1pp/M1pp) + build/$(a)/.image build/$(a)/M1pp/M1pp build/$(a)/hex2pp/hex2pp \ + P1/elf-$(a).hex2pp) -# p1 suite per-arch deps: image, tools, table, expander. +# p1 suite per-arch deps: image, table, expander, hex2pp, ELF header. +# Raw .P1 fixtures still go through the seed M0+hex2 chain (boot-build-p1.sh) +# because they share the legacy M1 backend; .P1pp fixtures go through +# the new M1pp + hex2pp chain via boot-build-p1pp.sh. TEST_P1_DEPS := $(foreach a,$(TEST_ARCHES), \ - build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 build/$(a)/M1pp/M1pp) + build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 \ + build/$(a)/M1pp/M1pp build/$(a)/hex2pp/hex2pp P1/elf-$(a).hex2pp) -# scheme1 suite per-arch deps: image, tools, expander, scheme1 binary. +# scheme1 suite per-arch deps: image, expander, hex2pp, scheme1 binary. # (run-tests.sh runs the pre-built binary against each .scm fixture; it # does not rebuild the interpreter per fixture.) TEST_SCHEME1_DEPS := $(foreach a,$(TEST_ARCHES), \ - build/$(a)/.image build/$(a)/tools/M0 build/$(a)/M1pp/M1pp \ + build/$(a)/.image build/$(a)/M1pp/M1pp build/$(a)/hex2pp/hex2pp \ build/$(a)/scheme1/scheme1) -# cc-* suites: scheme1 + m1pp cover everything. cc-util / cc-lex / -# cc-pp byte-diff their pure transformations; cc-cg / cc compile the -# emitted P1pp through the P1pp toolchain (which m1pp drives) and run -# the resulting ELF. cc.scm is only needed by the cc suite (it invokes -# the catm'd compiler against a .c fixture); the rest catm their own -# per-suite layer list. +# cc-* suites: scheme1 + M1pp + hex2pp cover everything. cc-util / +# cc-lex / cc-pp byte-diff their pure transformations; cc-cg / cc +# compile the emitted P1pp through the P1pp toolchain (M1pp + hex2pp) +# and run the resulting ELF. cc.scm is only needed by the cc suite +# (it invokes the catm'd compiler against a .c fixture); the rest +# catm their own per-suite layer list. catm comes from build/$(a)/tools/ +# (built once during the seed bootstrap; only the cc-unit catm chain +# uses it now — the P1pp pipeline no longer touches it). TEST_CC_UNIT_DEPS := $(foreach a,$(TEST_ARCHES), \ - build/$(a)/.image build/$(a)/tools/M0 build/$(a)/M1pp/M1pp \ + build/$(a)/.image build/$(a)/tools/M0 \ + build/$(a)/M1pp/M1pp build/$(a)/hex2pp/hex2pp \ build/$(a)/scheme1/scheme1) TEST_CC_DEPS := $(TEST_CC_UNIT_DEPS) \ diff --git a/hex2pp/hex2pp.P1 b/hex2pp/hex2pp.P1 @@ -0,0 +1,3727 @@ +## hex2pp.P1 -- P1 implementation of the hex2++ assembler/linker. +## +## Mirrors hex2pp/hex2pp.c exactly in observable behaviour. See the C +## file and docs/HEX2pp.md for the full spec; brief summary: +## +## Inputs are concatenated, scanned in two passes. Pass 1 records label +## definitions while advancing a position counter (ip). Pass 2 emits +## bytes, resolving label references against the table built in pass 1. +## +## Active syntax: +## digits in current byte mode -> raw bytes (HEX or BINARY) +## :NAME -> label definition +## SIGIL NAME [- OTHER] -> label reference (! @ $ ~ % &) +## .align N [PATTERN] -> pad to N-byte boundary +## .fill N B -> N copies of byte B +## .scope / .endscope -> nestable local-label scope +## # ... / ; ... -> line comment +## +## Multi-byte reference values are emitted little-endian by default. +## +## Invocation: +## hex2pp (-f|--file) FILE [(-f|--file) FILE ...] +## [-o|--output OUT] +## [-B|--base-address ADDR] +## [--big-endian | --little-endian] +## [-b|--binary] +## [--non-executable] +## [-h|--help] +## +## P1 ABI: a0..a3 arg/return, t0..t2 caller-saved temps. Non-leaf +## functions use enter_0 / eret. Entry is the portable p1_main +## (a0=argc, a1=argv); the backend-owned :_start stub captures argc/argv +## from the native entry state and sys_exits p1_main's return value. +## +## chmod note: the seed P1 (P1/P1-<arch>.M1) exposes only sys_openat / +## sys_read / sys_write / sys_exit. Since there is no chmod() syscall in +## the seed, we encode the desired final mode (0750 or 0640) directly in +## openat's mode argument at file-creation time. This achieves the same +## resulting file permissions as the C reference. +## +## Register usage discipline: the seed P1 mnemonic table only defines a +## restricted subset of (dst,src1,src2) and (dst,base,offset) combos. +## To stay within that table this file spills almost everything through +## fixed BSS slots between operations. The naming convention +## ``<func>_<name>`` keeps per-function spill slots from colliding. + +## --- Caps ------------------------------------------------------------------- +## Mirrors hex2pp.c constants (MAX_FILES=64, MAX_INPUT_BYTES=16 MiB, +## MAX_OUTPUT_BYTES=128 MiB, MAX_LABELS=2^20, MAX_TEXT=8 MiB, +## MAX_TOKEN=4096, MAX_SCOPE_DEPTH=32). Stored as 8-byte little-endian. +DEFINE H2_INPUT_CAP 0000000001000000 +DEFINE H2_OUTPUT_CAP 0000000008000000 +DEFINE H2_TEXT_CAP 0000000000800000 +DEFINE H2_LABEL_CAP 0000000000100000 +DEFINE H2_TOKEN_CAP 0010000000000000 +DEFINE H2_FILES_CAP 4000000000000000 +DEFINE H2_SCOPE_CAP 2000000000000000 + +## openat / mode constants (Linux generic) +DEFINE O_RDONLY 0000000000000000 +DEFINE O_WRONLY_CREAT_TRUNC 4102000000000000 +DEFINE MODE_0750 E801000000000000 +DEFINE MODE_0640 A001000000000000 +DEFINE AT_FDCWD 9CFFFFFFFFFFFFFF + +DEFINE ZERO8 '0000000000000000' +DEFINE ZERO4 '00000000' + +## --- BSS layout (offsets from ELF_end) ------------------------------------- +## +## Each "_ptr" is a one-word slot in the executable's static data; p1_main's +## bss_init_loop initializes each to ELF_end + OFF_*. The arenas live past +## ELF_end (covered by the segment's memsz; the loader zero-initializes). +## +## Sizes (mirroring hex2pp.c caps): +## input_paths 64 * 8 = 512 B (char * per file) +## input_starts 64 * 8 = 512 B (offset into input_buf) +## input_lens 64 * 8 = 512 B +## scope_stack 32 * 8 = 256 B +## line_scratch 64 B (decimal render of cur_line) +## name_buf 4096 B +## label_buf 4096 B +## other_buf 4096 B +## pat_buf 4096 B +## ev_bytes 8 B +## df_byte 8 B +## input_buf 16 MiB +## output_buf 128 MiB +## text_buf 8 MiB +## labels 32 MiB (2^20 * 32 B) +## +## Cumulative offsets, padded for clarity: +DEFINE OFF_input_paths 0000000000000000 +DEFINE OFF_input_starts 0002000000000000 +DEFINE OFF_input_lens 0004000000000000 +DEFINE OFF_scope_stack 0006000000000000 +DEFINE OFF_line_scratch 0007000000000000 +DEFINE OFF_name_buf 4007000000000000 +DEFINE OFF_label_buf 4017000000000000 +DEFINE OFF_other_buf 4027000000000000 +DEFINE OFF_pat_buf 4037000000000000 +DEFINE OFF_ev_bytes 4047000000000000 +DEFINE OFF_df_byte 5047000000000000 +DEFINE OFF_input_buf 0048000000000000 +DEFINE OFF_output_buf 0048000001000000 +DEFINE OFF_text_buf 0048000009000000 +DEFINE OFF_labels 0048000009800000 + +## --- Runtime shell: argv parse -> load files -> two passes -> write -> exit + +:p1_main + enter_0 + + # ---- Save argc / argv FIRST (subsequent setup clobbers a0/a1) --------- + # On entry a0 = argc, a1 = argv (per the backend :_start stub). + la_a2 &saved_argc + st_a0,a2,0 + la_a2 &saved_argv + st_a1,a2,0 + + # ---- Init BSS pointer slots from ELF_end ------------------------------ + la_t0 &ELF_end + la_t1 &bss_init_tbl + la_t2 &bss_init_tbl_end +:bss_init_loop + la_br &bss_init_done + beq_t1,t2 + ld_a2,t1,0 + ld_a3,t1,8 + add_a3,a3,t0 + st_a3,a2,0 + addi_t1,t1,16 + la_br &bss_init_loop + b +:bss_init_done + + # ---- Default output_path = "a.out" ------------------------------------- + la_a0 &const_a_out + la_a1 &output_path + st_a0,a1,0 + +:arg_loop_init + li_t0 %1 %0 + la_a0 &arg_idx + st_t0,a0,0 + +:arg_loop + # if (i >= argc) goto arg_done + la_a0 &arg_idx + ld_t0,a0,0 + la_a1 &saved_argc + ld_t1,a1,0 + la_br &arg_done + beq_t0,t1 + la_br &arg_done + blt_t1,t0 + + # arg_ptr = argv[i] = *(argv + 8*i) + la_a0 &saved_argv + ld_a0,a0,0 + mov_a1,a0 # a1 = argv + la_a0 &arg_idx + ld_t0,a0,0 + shli_t2,t0,3 # t2 = 8*i + add_a0,t2,a1 # a0 = argv + 8*i + ld_a0,a0,0 # a0 = argv[i] + la_a1 &arg_ptr + st_a0,a1,0 + + # Dispatch on the argument string. Each compare uses str_eq, which + # checks the trailing NUL of the argv string against the option + # constant's known length. + + # -f / --file + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_dash_f + li_a2 %2 %0 + la_br &str_eq + call + la_br &arg_is_file + bnez_a0 + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_long_file + li_a2 %6 %0 + la_br &str_eq + call + la_br &arg_is_file + bnez_a0 + + # -o / --output + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_dash_o + li_a2 %2 %0 + la_br &str_eq + call + la_br &arg_is_output + bnez_a0 + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_long_output + li_a2 %8 %0 + la_br &str_eq + call + la_br &arg_is_output + bnez_a0 + + # -B / --base-address + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_dash_B + li_a2 %2 %0 + la_br &str_eq + call + la_br &arg_is_base + bnez_a0 + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_long_base + li_a2 %14 %0 + la_br &str_eq + call + la_br &arg_is_base + bnez_a0 + + # --big-endian + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_long_big + li_a2 %12 %0 + la_br &str_eq + call + la_br &arg_is_big + bnez_a0 + + # --little-endian + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_long_little + li_a2 %15 %0 + la_br &str_eq + call + la_br &arg_is_little + bnez_a0 + + # -b / --binary + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_dash_b + li_a2 %2 %0 + la_br &str_eq + call + la_br &arg_is_binary + bnez_a0 + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_long_binary + li_a2 %8 %0 + la_br &str_eq + call + la_br &arg_is_binary + bnez_a0 + + # --non-executable + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_long_nonexec + li_a2 %16 %0 + la_br &str_eq + call + la_br &arg_is_nonexec + bnez_a0 + + # -h / --help + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_dash_h + li_a2 %2 %0 + la_br &str_eq + call + la_br &arg_is_help + bnez_a0 + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &opt_long_help + li_a2 %6 %0 + la_br &str_eq + call + la_br &arg_is_help + bnez_a0 + + la_br &err_unknown_arg + b + +:arg_is_file + la_br &arg_advance + call + la_a0 &arg_ptr + ld_a0,a0,0 + la_br &load_input + call + la_br &arg_loop + b +:arg_is_output + la_br &arg_advance + call + la_a0 &arg_ptr + ld_a0,a0,0 + la_a1 &output_path + st_a0,a1,0 + la_br &arg_loop + b +:arg_is_base + la_br &arg_advance + call + la_a0 &arg_ptr + ld_a0,a0,0 + la_br &parse_long_arg + call + la_a1 &base_address + st_a0,a1,0 + la_br &arg_loop + b +:arg_is_big + li_t0 %1 %0 + la_a0 &big_endian + st_t0,a0,0 + la_br &arg_loop + b +:arg_is_little + li_t0 %0 %0 + la_a0 &big_endian + st_t0,a0,0 + la_br &arg_loop + b +:arg_is_binary + li_t0 %1 %0 + la_a0 &byte_mode + st_t0,a0,0 + la_br &arg_loop + b +:arg_is_nonexec + li_t0 %1 %0 + la_a0 &non_executable + st_t0,a0,0 + la_br &arg_loop + b +:arg_is_help + la_br &print_usage + call + li_a0 sys_exit + li_a1 %0 %0 + syscall + +## arg_advance(): i++; if (i >= argc) usage error; arg_ptr = argv[i]. +:arg_advance + enter_0 + la_a0 &arg_idx + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_a1 &saved_argc + ld_t1,a1,0 + la_br &err_missing_arg_value + beq_t0,t1 + la_br &err_missing_arg_value + blt_t1,t0 + # arg_ptr = argv[i] + la_a0 &saved_argv + ld_a0,a0,0 + mov_a1,a0 + shli_t2,t0,3 + add_a0,t2,a1 + ld_a0,a0,0 + la_a1 &arg_ptr + st_a0,a1,0 + eret + +:arg_done + la_a0 &input_count + ld_t0,a0,0 + la_br &err_no_inputs + beqz_t0 + + # ---- Pass 1: collect labels -------------------------------------------- + li_t0 %1 %0 + la_a0 &pass + st_t0,a0,0 + la_br &reset_pass_state + call + la_br &run_one_pass + call + la_a0 &scope_depth + ld_t0,a0,0 + la_br &err_scope_unclosed + bnez_t0 + + # Clear cur_path so any post-pass error reports without file:line. + li_t0 %0 %0 + la_a0 &cur_path + st_t0,a0,0 + + # ---- Pass 2: emit ------------------------------------------------------ + li_t0 %2 %0 + la_a0 &pass + st_t0,a0,0 + la_br &reset_pass_state + call + la_br &run_one_pass + call + la_a0 &scope_depth + ld_t0,a0,0 + la_br &err_scope_unclosed + bnez_t0 + + li_t0 %0 %0 + la_a0 &cur_path + st_t0,a0,0 + + la_br &write_output + call + + li_a0 %0 %0 + eret + +## reset_pass_state(): ip=0, output_used=0, scope_depth=0, scope_seq=0. +:reset_pass_state + enter_0 + li_t0 %0 %0 + la_a0 &ip + st_t0,a0,0 + la_a0 &output_used + st_t0,a0,0 + la_a0 &scope_depth + st_t0,a0,0 + la_a0 &scope_seq + st_t0,a0,0 + eret + +## run_one_pass(): for i in [0, input_count) call process_file(i). +:run_one_pass + enter_0 + li_t0 %0 %0 + la_a0 &pass_idx + st_t0,a0,0 +:run_one_pass_loop + la_a0 &pass_idx + ld_t0,a0,0 + la_a1 &input_count + ld_t1,a1,0 + la_br &run_one_pass_done + beq_t0,t1 + mov_a0,t0 + la_br &process_file + call + la_a0 &pass_idx + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &run_one_pass_loop + b +:run_one_pass_done + eret + +## --- File loader ------------------------------------------------------------ +## load_input(a0=path): record path, read file appended into input_buf at +## offset input_total, then advance input_total. Fatal on any I/O failure. +:load_input + enter_0 + la_a1 &input_count + ld_t0,a1,0 + li_t1 H2_FILES_CAP + la_br &err_too_many_files + beq_t0,t1 + + # Save path into input_paths[input_count]. + la_a1 &input_paths_ptr + ld_a1,a1,0 + shli_t2,t0,3 + add_a1,t2,a1 + st_a0,a1,0 + + # input_starts[input_count] = input_total + la_a1 &input_total + ld_t1,a1,0 + la_a2 &input_starts_ptr + ld_a2,a2,0 + add_a2,t2,a2 + la_a0 &aux_tmp + st_t1,a0,0 + ld_a3,a0,0 + st_a3,a2,0 + + # Stash path for the syscall. + la_a1 &li_path + st_a0,a1,0 + + # fd = openat(AT_FDCWD, path, O_RDONLY, 0) + li_a0 sys_openat + li_a1 AT_FDCWD + la_a2 &li_path + ld_a2,a2,0 + li_a3 O_RDONLY + li_t0 %0 %0 + syscall + la_br &err_open_input + bltz_a0 + la_a1 &li_fd + st_a0,a1,0 + +:li_read_loop + la_a0 &input_total + ld_t0,a0,0 + li_t1 H2_INPUT_CAP + la_br &err_input_too_big + beq_t0,t1 + la_br &err_input_too_big + blt_t1,t0 + + # n = read(fd, input_buf + input_total, INPUT_CAP - input_total) + la_a0 &li_fd + ld_a1,a0,0 + la_a2 &input_buf_ptr + ld_a2,a2,0 + add_a2,a2,t0 + sub_a3,t1,t0 # available? sub_a3,t1,t0 — yes + li_a0 sys_read + syscall + la_br &li_eof + beqz_a0 + la_br &err_read + bltz_a0 + + # input_total += n + la_a1 &input_total + ld_t0,a1,0 + add_t0,t0,a0 + st_t0,a1,0 + la_br &li_read_loop + b + +:li_eof + # input_lens[input_count] = input_total - input_starts[input_count]. + # The seed has add_a2,a2,t1 but neither add_a2,a2,t2 nor mov_t1,t2, + # so the stride-by-8 result lives in t2 and is then re-loaded as t1 + # via the li_tmp scratch slot. + la_a0 &input_count + ld_t0,a0,0 + shli_t2,t0,3 + la_a3 &li_tmp + st_t2,a3,0 + ld_t1,a3,0 + la_a2 &input_starts_ptr + ld_a2,a2,0 + add_a2,a2,t1 # a2 = &input_starts[i] + ld_a3,a2,0 # a3 = input_starts[i] + la_a0 &input_total + ld_a1,a0,0 # a1 = input_total + # We need (total - start) into a2, but no sub_a2,a1,a3 form exists. + # Spill start through li_tmp -> a0 so we can use sub_a2,a1,a0. + la_a0 &li_tmp + st_a3,a0,0 + ld_a0,a0,0 # a0 = start (re-reads via li_tmp) + sub_a2,a1,a0 # a2 = total - start + # Store into input_lens[i] (reuse the same t1 scaling). + la_a0 &input_lens_ptr + ld_a0,a0,0 + add_a0,a0,t1 + st_a2,a0,0 + + la_a0 &input_count + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + eret + +## --- Per-file scanner ------------------------------------------------------- + +## process_file(a0=file_idx): set cur_path / cur_line / scan_pos / scan_end +## from the input record, then dispatch character by character. +:process_file + enter_0 + mov_t0,a0 + shli_t2,t0,3 # t2 = 8*idx + + # cur_path = input_paths[idx] + la_a3 &li_tmp + st_t2,a3,0 + ld_t1,a3,0 # t1 = 8*idx + la_a1 &input_paths_ptr + ld_a1,a1,0 + add_a1,a1,t1 # available add_a1,a1,t1 + ld_a1,a1,0 + la_a2 &cur_path + st_a1,a2,0 + + # cur_line = 1 + li_t0 %1 %0 + la_a2 &cur_line + st_t0,a2,0 + + # scan_pos = input_buf + input_starts[idx] + la_a1 &input_starts_ptr + ld_a1,a1,0 + add_a1,a1,t1 + ld_a1,a1,0 # a1 = start offset + la_a2 &input_buf_ptr + ld_a2,a2,0 + mov_a0,a1 + add_a2,a2,a0 # add_a2,a2,a1 NOT listed. Use a2,a2,a0 with a0=a1. + la_a3 &li_tmp + st_a1,a3,0 + ld_a0,a3,0 # a0 = start + add_a2,a2,a0 # available + la_a0 &scan_pos + st_a2,a0,0 + + # scan_end = scan_pos + input_lens[idx] + la_a1 &input_lens_ptr + ld_a1,a1,0 + add_a1,a1,t1 + ld_a1,a1,0 # a1 = len + la_a3 &li_tmp + st_a1,a3,0 + ld_a0,a3,0 + add_a2,a2,a0 + la_a0 &scan_end + st_a2,a0,0 + +:scan_loop + la_br &skip_ws_and_comments + call + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &scan_done + beq_t0,t1 + la_br &scan_done + blt_t1,t0 + lb_a0,t0,0 + + # Dispatch on c. + li_t1 %58 %0 # ':' + la_br &scan_label_def + beq_a0,t1 + li_t1 %46 %0 # '.' + la_br &scan_directive + beq_a0,t1 + li_t1 %33 %0 # '!' + la_br &scan_ref + beq_a0,t1 + li_t1 %64 %0 # '@' + la_br &scan_ref + beq_a0,t1 + li_t1 %36 %0 # '$' + la_br &scan_ref + beq_a0,t1 + li_t1 %126 %0 # '~' + la_br &scan_ref + beq_a0,t1 + li_t1 %37 %0 # '%' + la_br &scan_ref + beq_a0,t1 + li_t1 %38 %0 # '&' + la_br &scan_ref + beq_a0,t1 + la_br &is_byte_digit + call + la_br &scan_byte_stream + bnez_a0 + la_br &err_unexpected_char + b + +:scan_label_def + la_a0 &scan_pos + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_a0 &name_buf_ptr + ld_a0,a0,0 + li_a1 H2_TOKEN_CAP + la_br &read_name + call + la_a1 &name_len + st_a0,a1,0 + # dotted = (name[0] == '.') + la_a1 &name_buf_ptr + ld_a1,a1,0 + la_a3 &aux_tmp + st_a1,a3,0 + ld_a3,a3,0 + lb_t0,a3,0 + li_t1 %46 %0 + la_br &scan_label_undotted + bne_t0,t1 + # dotted: scope_depth must be > 0 + la_a0 &scope_depth + ld_t0,a0,0 + la_br &err_dotted_outside_scope + beqz_t0 + addi_t0,t0,neg1 + shli_t2,t0,3 + la_a3 &sl_tmp + st_t2,a3,0 + ld_t1,a3,0 + la_a0 &scope_stack_ptr + ld_a0,a0,0 + add_a0,a0,t1 # available + ld_a0,a0,0 # a0 = scope id of innermost scope + la_a1 &name_scope + st_a0,a1,0 + la_br &scan_label_define + b +:scan_label_undotted + li_t0 %0 %0 + la_a1 &name_scope + st_t0,a1,0 +:scan_label_define + la_a0 &pass + ld_t0,a0,0 + li_t1 %1 %0 + la_br &scan_loop + bne_t0,t1 + la_a0 &name_buf_ptr + ld_a0,a0,0 + la_a1 &name_len + ld_a1,a1,0 + la_a2 &name_scope + ld_a2,a2,0 + la_br &define_label + call + la_br &scan_loop + b + +:scan_directive + la_a0 &scan_pos + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_a0 &name_buf_ptr + ld_a0,a0,0 + li_a1 H2_TOKEN_CAP + la_br &read_directive_name + call + la_a1 &name_len + st_a0,a1,0 + + # Compare against the four known directive names. + li_t1 %5 %0 + la_br &scan_dir_check_4 + la_a3 &aux_tmp + st_t1,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + bne_a0,t0 + la_a0 &name_buf_ptr + ld_a0,a0,0 + la_a1 &dir_align + li_a2 %5 %0 + la_br &mem_eq + call + la_br &scan_dir_align + bnez_a0 + la_a0 &name_buf_ptr + ld_a0,a0,0 + la_a1 &dir_scope + li_a2 %5 %0 + la_br &mem_eq + call + la_br &scan_dir_scope_open + bnez_a0 + la_br &err_unknown_directive + b +:scan_dir_check_4 + la_a0 &name_len + ld_t0,a0,0 + li_t1 %4 %0 + la_br &scan_dir_check_8 + bne_t0,t1 + la_a0 &name_buf_ptr + ld_a0,a0,0 + la_a1 &dir_fill + li_a2 %4 %0 + la_br &mem_eq + call + la_br &scan_dir_fill + bnez_a0 + la_br &err_unknown_directive + b +:scan_dir_check_8 + la_a0 &name_len + ld_t0,a0,0 + li_t1 %8 %0 + la_br &err_unknown_directive + bne_t0,t1 + la_a0 &name_buf_ptr + ld_a0,a0,0 + la_a1 &dir_endscope + li_a2 %8 %0 + la_br &mem_eq + call + la_br &scan_dir_scope_close + bnez_a0 + la_br &err_unknown_directive + b + +:scan_dir_align + la_br &do_align + call + la_br &scan_loop + b +:scan_dir_fill + la_br &do_fill + call + la_br &scan_loop + b +:scan_dir_scope_open + la_br &do_scope_open + call + la_br &scan_loop + b +:scan_dir_scope_close + la_br &do_scope_close + call + la_br &scan_loop + b + +:scan_ref + # a0 holds sigil; advance past it then process_reference. + la_a1 &cur_sigil + st_a0,a1,0 + la_a1 &scan_pos + ld_t0,a1,0 + addi_t0,t0,1 + st_t0,a1,0 + la_br &process_reference + call + la_br &scan_loop + b + +:scan_byte_stream + la_br &parse_byte_stream + call + la_br &scan_loop + b + +:scan_done + eret + +## --- Lex helpers ------------------------------------------------------------ + +## skip_ws_and_comments(): advance scan_pos past whitespace and #/; comments. +## Updates cur_line on '\n'. +:skip_ws_and_comments + enter_0 +:swc_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &swc_done + beq_t0,t1 + la_br &swc_done + blt_t1,t0 + lb_a0,t0,0 + la_br &is_space_any + call + la_br &swc_after_space_check + beqz_a0 + # whitespace: advance; if '\n' bump cur_line. + la_a1 &scan_pos + ld_t0,a1,0 + lb_a0,t0,0 + li_t1 %10 %0 + la_br &swc_advance + la_a3 &aux_tmp + st_t1,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + bne_a0,t0 + la_a2 &cur_line + la_a0 &aux_tmp + st_a2,a0,0 + ld_t2,a0,0 + addi_t2,t2,1 + st_t2,a2,0 +:swc_advance + la_a1 &scan_pos + ld_t0,a1,0 + addi_t0,t0,1 + st_t0,a1,0 + la_br &swc_loop + b +:swc_after_space_check + li_t1 %35 %0 # '#' + la_br &swc_consume_comment + beq_a0,t1 + li_t1 %59 %0 # ';' + la_br &swc_consume_comment + beq_a0,t1 + la_br &swc_done + b +:swc_consume_comment +:swc_cc_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &swc_loop + beq_t0,t1 + la_br &swc_loop + blt_t1,t0 + lb_a0,t0,0 + li_t1 %10 %0 + la_br &swc_loop + beq_a0,t1 + addi_t0,t0,1 + st_t0,a0,0 + la_br &swc_cc_loop + b +:swc_done + eret + +## skip_inline_ws(): like skip_ws_and_comments but does NOT cross '\n'. +:skip_inline_ws + enter_0 +:siw_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &siw_done + beq_t0,t1 + la_br &siw_done + blt_t1,t0 + lb_a0,t0,0 + li_t1 %32 %0 + la_br &siw_advance + beq_a0,t1 + li_t1 %9 %0 + la_br &siw_advance + beq_a0,t1 + li_t1 %13 %0 + la_br &siw_advance + beq_a0,t1 + li_t1 %12 %0 + la_br &siw_advance + beq_a0,t1 + li_t1 %11 %0 + la_br &siw_advance + beq_a0,t1 + li_t1 %35 %0 + la_br &siw_consume_comment + beq_a0,t1 + li_t1 %59 %0 + la_br &siw_consume_comment + beq_a0,t1 + la_br &siw_done + b +:siw_advance + la_a1 &scan_pos + ld_t0,a1,0 + addi_t0,t0,1 + st_t0,a1,0 + la_br &siw_loop + b +:siw_consume_comment +:siw_cc_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &siw_done + beq_t0,t1 + la_br &siw_done + blt_t1,t0 + lb_a0,t0,0 + li_t1 %10 %0 + la_br &siw_done + beq_a0,t1 + addi_t0,t0,1 + st_t0,a0,0 + la_br &siw_cc_loop + b +:siw_done + eret + +## is_space_any(a0=c) -> a0=0/1. Whitespace = ' ' \t \n \r \f \v. +:is_space_any + li_t0 %32 %0 + la_br &isa_yes + beq_a0,t0 + li_t0 %9 %0 + la_br &isa_yes + beq_a0,t0 + li_t0 %10 %0 + la_br &isa_yes + beq_a0,t0 + li_t0 %13 %0 + la_br &isa_yes + beq_a0,t0 + li_t0 %12 %0 + la_br &isa_yes + beq_a0,t0 + li_t0 %11 %0 + la_br &isa_yes + beq_a0,t0 + li_a0 %0 %0 + ret +:isa_yes + li_a0 %1 %0 + ret + +## is_name_terminator_c(a0=c) -> a0=0/1. Terminators: whitespace, '-', '#', ';'. +## Spills c into a BSS slot since is_space_any clobbers a0. +:is_name_terminator_c + la_a1 &nt_c + st_a0,a1,0 + la_br &is_space_any + call + la_br &nt_yes + bnez_a0 + la_a1 &nt_c + ld_a0,a1,0 + li_t0 %45 %0 + la_br &nt_yes + beq_a0,t0 + li_t0 %35 %0 + la_br &nt_yes + beq_a0,t0 + li_t0 %59 %0 + la_br &nt_yes + beq_a0,t0 + li_a0 %0 %0 + ret +:nt_yes + li_a0 %1 %0 + ret + +## is_byte_digit(a0=c) -> a0=0/1. Mode-aware (HEX vs BINARY). +:is_byte_digit + la_a1 &byte_mode + ld_t0,a1,0 + la_br &ibd_bin + bnez_t0 + # HEX: 0-9, a-f, A-F + li_t0 %48 %0 + la_br &ibd_no + mov_a2,t0 + blt_a0,a2 + li_t0 %57 %0 + la_br &ibd_yes + mov_a2,t0 + blt_a0,a2 + la_br &ibd_yes + beq_a0,t0 + li_t0 %65 %0 + la_br &ibd_no + mov_a2,t0 + blt_a0,a2 + li_t0 %70 %0 + la_br &ibd_yes + mov_a2,t0 + blt_a0,a2 + la_br &ibd_yes + beq_a0,t0 + li_t0 %97 %0 + la_br &ibd_no + mov_a2,t0 + blt_a0,a2 + li_t0 %102 %0 + la_br &ibd_yes + mov_a2,t0 + blt_a0,a2 + la_br &ibd_yes + beq_a0,t0 + la_br &ibd_no + b +:ibd_bin + li_t0 %48 %0 + la_br &ibd_yes + beq_a0,t0 + li_t0 %49 %0 + la_br &ibd_yes + beq_a0,t0 + la_br &ibd_no + b +:ibd_yes + li_a0 %1 %0 + ret +:ibd_no + li_a0 %0 %0 + ret + +## byte_digit_value(a0=c) -> a0=value (0..15). Caller guarantees c is a +## valid digit for the current byte mode. +:byte_digit_value + li_t0 %57 %0 + la_br &bdv_alpha + mov_t1,a0 + blt_t0,t1 # if c > '9', go alpha + li_t1 %48 %0 + sub_a0,a0,t1 # available + ret +:bdv_alpha + li_t0 %96 %0 + la_br &bdv_lower + mov_t1,a0 + blt_t0,t1 # if c > 'a' - 1 (= 96), it's lowercase + li_t1 %55 %0 + sub_a0,a0,t1 # 'A'(65) - 55 = 10 + ret +:bdv_lower + li_t1 %87 %0 + sub_a0,a0,t1 # 'a'(97) - 87 = 10 + ret + +## byte_digit_count() -> a0. 2 for HEX, 8 for BINARY. +:byte_digit_count + la_a0 &byte_mode + ld_t0,a0,0 + la_br &bdc_bin + bnez_t0 + li_a0 %2 %0 + ret +:bdc_bin + li_a0 %8 %0 + ret + +## read_name(a0=out_buf, a1=max) -> a0=length. Reads scan_pos into out_buf +## until is_name_terminator_c or scan_end. +:read_name + enter_0 + la_a2 &rn_out + st_a0,a2,0 + la_a2 &rn_max + st_a1,a2,0 + li_t0 %0 %0 + la_a2 &rn_n + st_t0,a2,0 +:rn_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &rn_done + beq_t0,t1 + la_br &rn_done + blt_t1,t0 + lb_a0,t0,0 + la_br &is_name_terminator_c + call + la_br &rn_done + bnez_a0 + # overflow check + la_a1 &rn_n + ld_t0,a1,0 + la_a2 &rn_max + la_a0 &aux_tmp + st_a2,a0,0 + ld_t1,a0,0 + la_br &err_name_too_long + beq_t0,t1 + # store char (re-read from scan_pos) + la_a3 &scan_pos + ld_a3,a3,0 + la_a2 &aux_tmp + st_a3,a2,0 + la_a0 &aux_tmp + st_a2,a0,0 + ld_t1,a0,0 + lb_t1,t1,0 + la_a2 &rn_out + ld_a2,a2,0 + add_a2,a2,t0 + sb_t1,a2,0 + # n++ + addi_t0,t0,1 + la_a1 &rn_n + st_t0,a1,0 + # scan_pos++ + la_a3 &scan_pos + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a3,0 + la_br &rn_loop + b +:rn_done + la_a1 &rn_n + ld_a0,a1,0 + la_br &err_empty_name + beqz_a0 + eret + +## read_directive_name(a0=out_buf, a1=max) -> a0=length. Like read_name but +## terminates on the first non-alpha byte. +:read_directive_name + enter_0 + la_a2 &rn_out + st_a0,a2,0 + la_a2 &rn_max + st_a1,a2,0 + li_t0 %0 %0 + la_a2 &rn_n + st_t0,a2,0 +:rdn_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &rdn_done + beq_t0,t1 + la_br &rdn_done + blt_t1,t0 + lb_a0,t0,0 + # Reject if not [A-Za-z] + li_t1 %65 %0 + la_br &rdn_check_lower + mov_a2,t1 + blt_a0,a2 + li_t1 %90 %0 + la_br &rdn_consume + mov_a2,t1 + blt_a0,a2 + la_br &rdn_consume + beq_a0,t1 +:rdn_check_lower + li_t1 %97 %0 + la_br &rdn_done + mov_a2,t1 + blt_a0,a2 + li_t1 %122 %0 + la_br &rdn_consume + mov_a2,t1 + blt_a0,a2 + la_br &rdn_consume + beq_a0,t1 + la_br &rdn_done + b +:rdn_consume + la_a1 &rn_n + ld_t0,a1,0 + la_a2 &rn_max + la_a0 &aux_tmp + st_a2,a0,0 + ld_t1,a0,0 + la_br &err_name_too_long + beq_t0,t1 + la_a2 &rn_out + ld_a2,a2,0 + add_a2,a2,t0 + sb_a0,a2,0 + addi_t0,t0,1 + la_a1 &rn_n + st_t0,a1,0 + la_a3 &scan_pos + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a3,0 + la_br &rdn_loop + b +:rdn_done + la_a1 &rn_n + ld_a0,a1,0 + la_br &err_empty_directive + beqz_a0 + eret + +## read_decimal() -> a0=value (i64). Fatal on no digits. +:read_decimal + enter_0 + li_t0 %0 %0 + la_a0 &rd_val + st_t0,a0,0 + la_a0 &rd_saw + st_t0,a0,0 +:rd_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &rd_done + beq_t0,t1 + la_br &rd_done + blt_t1,t0 + lb_a0,t0,0 + li_t1 %48 %0 + la_br &rd_done + mov_a2,t1 + blt_a0,a2 + li_t1 %57 %0 + la_br &rd_done + mov_a1,a0 + blt_t1,a1 + # acc = acc * 10 + (c - '0') + la_a1 &rd_val + ld_t0,a1,0 + li_t1 %10 %0 + la_a3 &rd_tmp + st_t1,a3,0 + la_a1 &aux_tmp + st_a3,a1,0 + ld_a1,a1,0 # a1 = 10 + mul_t0,t0,a1 # t0 = acc * 10 + li_t1 %48 %0 + sub_a0,a0,t1 # a0 = c - 48 + add_t0,t0,a0 + la_a1 &rd_val + st_t0,a1,0 + li_t0 %1 %0 + la_a1 &rd_saw + st_t0,a1,0 + la_a3 &scan_pos + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a3,0 + la_br &rd_loop + b +:rd_done + la_a1 &rd_saw + ld_t0,a1,0 + la_br &err_expected_decimal + beqz_t0 + la_a1 &rd_val + ld_a0,a1,0 + eret + +## --- Byte stream / single byte literal ------------------------------------- + +## parse_byte_stream(): consume free-flowing digits (intermixed with +## whitespace and #/; comments) and emit_byte them. Stops at first non- +## digit non-whitespace non-comment byte. +:parse_byte_stream + enter_0 + li_t0 %0 %0 + la_a0 &pbs_acc + st_t0,a0,0 + la_a0 &pbs_have + st_t0,a0,0 +:pbs_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &pbs_done + beq_t0,t1 + la_br &pbs_done + blt_t1,t0 + lb_a0,t0,0 + la_br &is_space_any + call + la_br &pbs_consume_ws + bnez_a0 + la_a3 &scan_pos + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_a0,t0,0 + li_t1 %35 %0 + la_br &pbs_consume_comment + beq_a0,t1 + li_t1 %59 %0 + la_br &pbs_consume_comment + beq_a0,t1 + la_br &is_byte_digit + call + la_br &pbs_done + beqz_a0 + # Save and consume the digit char. + la_a3 &scan_pos + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_a0,t0,0 + addi_t0,t0,1 + st_t0,a3,0 + la_a1 &pbs_c + st_a0,a1,0 + # acc = (acc << shift_per_digit) | nibble + la_a1 &byte_mode + ld_t1,a1,0 + la_br &pbs_bin_step + bnez_t1 + # HEX + la_a1 &pbs_acc + ld_t0,a1,0 + shli_a3,t0,4 # a3 = acc << 4 + la_a1 &pbs_c + ld_a0,a1,0 + la_br &byte_digit_value + call + mov_t0,a0 + add_a3,a3,t0 # available + la_a1 &pbs_acc + st_a3,a1,0 + la_br &pbs_bump + b +:pbs_bin_step + # BINARY + la_a1 &pbs_acc + ld_t0,a1,0 + shli_a3,t0,1 # a3 = acc << 1 + la_a1 &pbs_c + ld_a0,a1,0 + li_t1 %48 %0 + sub_a0,a0,t1 + mov_t0,a0 + add_a3,a3,t0 + la_a1 &pbs_acc + st_a3,a1,0 +:pbs_bump + la_a0 &pbs_have + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &byte_digit_count + call + la_a1 &pbs_have + ld_t0,a1,0 + la_br &pbs_loop + la_a3 &aux_tmp + st_a0,a3,0 + ld_t1,a3,0 + bne_t0,t1 + # have == digits_per_byte: emit and reset + la_a1 &pbs_acc + ld_t2,a1,0 # t2 = acc + andi_a3,t2,255 + mov_a0,a3 # available mov_a0,a3 + la_br &emit_byte + call + li_t0 %0 %0 + la_a0 &pbs_acc + st_t0,a0,0 + la_a0 &pbs_have + st_t0,a0,0 + la_br &pbs_loop + b +:pbs_consume_ws + la_a3 &scan_pos + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_a0,t0,0 + li_t1 %10 %0 + la_br &pbs_ws_advance + la_a3 &aux_tmp + st_t1,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + bne_a0,t0 + la_a2 &cur_line + la_a0 &aux_tmp + st_a2,a0,0 + ld_t2,a0,0 + addi_t2,t2,1 + st_t2,a2,0 +:pbs_ws_advance + la_a3 &scan_pos + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a3,0 + la_br &pbs_loop + b +:pbs_consume_comment +:pbs_cc_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &pbs_loop + beq_t0,t1 + la_br &pbs_loop + blt_t1,t0 + lb_a0,t0,0 + li_t1 %10 %0 + la_br &pbs_loop + beq_a0,t1 + addi_t0,t0,1 + st_t0,a0,0 + la_br &pbs_cc_loop + b +:pbs_done + la_a0 &pbs_have + ld_t0,a0,0 + la_br &err_pbs_incomplete + bnez_t0 + eret + +## parse_one_byte(a0=out_byte_addr): read a single byte literal (exactly +## byte_digit_count contiguous digits, no internal whitespace). Fatal on +## malformed input. +:parse_one_byte + enter_0 + la_a1 &p1b_out + st_a0,a1,0 + li_t0 %0 %0 + la_a1 &p1b_acc + st_t0,a1,0 + la_a1 &p1b_have + st_t0,a1,0 + la_a1 &p1b_done + st_t0,a1,0 +:p1b_loop + la_a0 &p1b_done + ld_t0,a0,0 + la_br &p1b_finish + bnez_t0 + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &p1b_finish + beq_t0,t1 + la_br &p1b_finish + blt_t1,t0 + lb_a0,t0,0 + la_br &is_byte_digit + call + la_br &p1b_finish + beqz_a0 + la_a3 &scan_pos + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_a0,t0,0 + addi_t0,t0,1 + st_t0,a3,0 + la_a1 &p1b_c + st_a0,a1,0 + la_a1 &byte_mode + ld_t1,a1,0 + la_br &p1b_bin + bnez_t1 + la_a1 &p1b_acc + ld_t0,a1,0 + shli_a3,t0,4 + la_a1 &p1b_c + ld_a0,a1,0 + la_br &byte_digit_value + call + mov_t0,a0 + add_a3,a3,t0 + la_a1 &p1b_acc + st_a3,a1,0 + la_br &p1b_bump + b +:p1b_bin + la_a1 &p1b_c + ld_a0,a1,0 + li_t1 %48 %0 + sub_a0,a0,t1 + la_a1 &p1b_acc + ld_t0,a1,0 + shli_a3,t0,1 + mov_t0,a0 + add_a3,a3,t0 + la_a1 &p1b_acc + st_a3,a1,0 +:p1b_bump + la_a0 &p1b_have + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &byte_digit_count + call + la_a1 &p1b_have + ld_t0,a1,0 + la_br &p1b_loop + la_a3 &aux_tmp + st_a0,a3,0 + ld_t1,a3,0 + bne_t0,t1 + # Got a full byte; record into *p1b_out and mark done. + la_a1 &p1b_acc + ld_t2,a1,0 + andi_a3,t2,255 + la_a0 &p1b_out + ld_a0,a0,0 + sb_a3,a0,0 + li_t0 %1 %0 + la_a1 &p1b_done + st_t0,a1,0 + la_br &p1b_loop + b +:p1b_finish + la_a1 &p1b_done + ld_t0,a1,0 + la_br &err_byte_lit_bad + beqz_t0 + la_a1 &p1b_have + ld_t0,a1,0 + la_br &err_byte_lit_bad + bnez_t0 + eret + +## --- Label table ----------------------------------------------------------- + +## intern(a0=src, a1=len) -> a0=offset into text_buf. Copies bytes plus a +## NUL terminator. Fatal on overflow. +:intern + enter_0 + la_a2 &intern_src + st_a0,a2,0 + la_a2 &intern_len + st_a1,a2,0 + la_a2 &text_used + ld_a3,a2,0 + la_a2 &intern_orig + st_a3,a2,0 + # if (text_used + len + 1 > TEXT_CAP) fatal + add_a2,a1,a3 # a2 = a1 + a3 = len + text_used + addi_a2,a2,1 + li_t0 H2_TEXT_CAP + la_br &err_text_overflow + mov_t1,a0 + blt_t0,t1 + # dst = text_buf + text_used. There's no add_a0,a0,a3 in the seed, + # so route the offset through t0. + la_a0 &text_buf_ptr + ld_a0,a0,0 + la_a2 &intern_orig + ld_t0,a2,0 + add_a0,a0,t0 + la_a2 &intern_dst + st_a0,a2,0 + # copy len bytes + li_t0 %0 %0 + la_a1 &intern_i + st_t0,a1,0 +:intern_copy_loop + la_a0 &intern_i + ld_t0,a0,0 + la_a1 &intern_len + ld_t1,a1,0 + la_br &intern_copy_done + beq_t0,t1 + la_a0 &intern_src + ld_a0,a0,0 + add_a0,a0,t0 + lb_a0,a0,0 + la_a2 &intern_dst + ld_a2,a2,0 + add_a2,a2,t0 + sb_a0,a2,0 + addi_t0,t0,1 + la_a1 &intern_i + st_t0,a1,0 + la_br &intern_copy_loop + b +:intern_copy_done + # NUL terminator at dst[len] + la_a2 &intern_dst + ld_a2,a2,0 + la_a1 &intern_len + ld_t0,a1,0 + add_a2,a2,t0 # available add_a2,a2,t0 + li_t1 %0 %0 + sb_t1,a2,0 + # text_used += len + 1 + la_a2 &text_used + ld_a3,a2,0 + la_a1 &intern_len + ld_a1,a1,0 + add_a3,a3,a1 + addi_a3,a3,1 + st_a3,a2,0 + # return original text_used + la_a0 &intern_orig + ld_a0,a0,0 + eret + +## label_addr(a0=index) -> a0 = &labels[index]. Leaf. labels are 32 B. +## Result is shipped through la_const_32 since neither mov_a0,a2 nor an +## add_a0,a*,t* combo with the right operands exists in the seed table. +:label_addr + mov_t0,a0 + la_a3 &la_const_32 + li_t1 %32 %0 + st_t1,a3,0 + la_a1 &aux_tmp + st_a3,a1,0 + ld_a1,a1,0 + mul_t0,t0,a1 # t0 = 32 * index + la_a2 &labels_ptr + ld_a2,a2,0 + add_a2,a2,t0 # a2 = labels + 32*index + la_a3 &la_const_32 + st_a2,a3,0 # spill result + ld_a0,a3,0 # reload into a0 + ret + +## name_eq(a0=label_addr, a1=src, a2=len) -> a0=0/1. Compares the label's +## interned name against (src, len). Leaf-ish (calls mem_eq). +:name_eq + enter_0 + la_a3 &ne_label + st_a0,a3,0 + la_a3 &ne_src + st_a1,a3,0 + la_a3 &ne_len + st_a2,a3,0 + # if (label->name_len != len) return 0 + ld_t0,a0,8 + la_br &ne_no + la_a3 &aux_tmp + st_a2,a3,0 + ld_t1,a3,0 + bne_t0,t1 + # bytes + ld_a3,a0,0 # name_off + la_a0 &text_buf_ptr + ld_a0,a0,0 + la_a2 &ne_tmp + st_a3,a2,0 + ld_t0,a2,0 + add_a0,a0,t0 + la_a1 &ne_src + ld_a1,a1,0 + la_a2 &ne_len + ld_a2,a2,0 + la_br &mem_eq + call + eret +:ne_no + li_a0 %0 %0 + eret + +## define_label(a0=src, a1=len, a2=scope_id): record at labels[label_count]. +## Fatal on duplicate (within same scope) or overflow. +:define_label + enter_0 + la_a3 &dl_src + st_a0,a3,0 + la_a3 &dl_len + st_a1,a3,0 + la_a3 &dl_scope + st_a2,a3,0 + + li_t0 %0 %0 + la_a0 &dl_i + st_t0,a0,0 +:dl_dup_loop + la_a0 &dl_i + ld_t0,a0,0 + la_a1 &label_count + ld_t1,a1,0 + la_br &dl_dup_done + beq_t0,t1 + mov_a0,t0 + la_br &label_addr + call + # a0 = &labels[i] + la_a3 &dl_label + st_a0,a3,0 + # scope match? + mov_a1,a0 + mov_a2,a1 + addi_a2,a2,24 + ld_a0,a2,0 + mov_t0,a0 + la_a1 &dl_scope + ld_t1,a1,0 + la_br &dl_dup_next + bne_t0,t1 + # name match? + la_a0 &dl_label + ld_a0,a0,0 + la_a1 &dl_src + ld_a1,a1,0 + la_a2 &dl_len + ld_a2,a2,0 + la_br &name_eq + call + la_br &dl_dup_next + beqz_a0 + la_br &err_duplicate_label + b +:dl_dup_next + la_a0 &dl_i + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &dl_dup_loop + b +:dl_dup_done + + la_a0 &label_count + ld_t0,a0,0 + li_t1 H2_LABEL_CAP + la_br &err_too_many_labels + beq_t0,t1 + la_br &err_too_many_labels + blt_t1,t0 + + # name_off = intern(src, len) + la_a0 &dl_src + ld_a0,a0,0 + la_a1 &dl_len + ld_a1,a1,0 + la_br &intern + call + la_a3 &dl_name_off + st_a0,a3,0 + + # &labels[label_count] + la_a0 &label_count + ld_a0,a0,0 + la_br &label_addr + call + la_a3 &dl_label + st_a0,a3,0 + # name_off + la_a1 &dl_name_off + ld_t0,a1,0 + st_t0,a0,0 + # name_len: st_t0,a0,8 is missing from the seed; the equivalent + # st_t0,a3,8 IS available, so move the base to a3 first via the + # dl_label scratch slot. + la_a3 &dl_label + ld_a3,a3,0 + la_a1 &dl_len + ld_t0,a1,0 + st_t0,a3,8 + # target_ip + la_a1 &ip + ld_t0,a1,0 + st_t0,a0,16 + # scope_id + la_a1 &dl_scope + ld_t0,a1,0 + st_t0,a0,24 + # label_count++ + la_a0 &label_count + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + eret + +## lookup_label(a0=src, a1=len) -> a0=target_ip. Fatal on undefined. +:lookup_label + enter_0 + la_a2 &ll_src + st_a0,a2,0 + la_a2 &ll_len + st_a1,a2,0 + la_a3 &aux_tmp + st_a0,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_t0,t0,0 + li_t1 %46 %0 + la_br &ll_undotted + bne_t0,t1 + # Dotted: walk scope_stack innermost-out. + la_a0 &scope_depth + ld_t0,a0,0 + addi_t0,t0,neg1 + la_a1 &ll_d + st_t0,a1,0 +:ll_dot_outer + la_a0 &ll_d + ld_t0,a0,0 + la_br &ll_undefined_local + bltz_t0 + la_a1 &scope_stack_ptr + ld_a1,a1,0 + shli_t2,t0,3 + la_a3 &ll_tmp + st_t2,a3,0 + ld_t1,a3,0 + add_a1,a1,t1 + ld_t1,a1,0 # t1 = sid + la_a0 &ll_sid + st_t1,a0,0 + li_t0 %0 %0 + la_a0 &ll_i + st_t0,a0,0 +:ll_dot_inner + la_a0 &ll_i + ld_t0,a0,0 + la_a1 &label_count + ld_t1,a1,0 + la_br &ll_dot_next_d + beq_t0,t1 + mov_a0,t0 + la_br &label_addr + call + la_a3 &ll_label + st_a0,a3,0 + mov_a1,a0 + mov_a2,a1 + addi_a2,a2,24 + ld_a0,a2,0 + mov_t0,a0 + la_a1 &ll_sid + ld_t1,a1,0 + la_br &ll_dot_inner_next + bne_t0,t1 + la_a0 &ll_label + ld_a0,a0,0 + la_a1 &ll_src + ld_a1,a1,0 + la_a2 &ll_len + ld_a2,a2,0 + la_br &name_eq + call + la_br &ll_dot_inner_next + beqz_a0 + la_a0 &ll_label + ld_a0,a0,0 + ld_a0,a0,16 # target_ip + eret +:ll_dot_inner_next + la_a0 &ll_i + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &ll_dot_inner + b +:ll_dot_next_d + la_a0 &ll_d + ld_t0,a0,0 + addi_t0,t0,neg1 + st_t0,a0,0 + la_br &ll_dot_outer + b + +:ll_undotted + li_t0 %0 %0 + la_a0 &ll_i + st_t0,a0,0 +:ll_undotted_loop + la_a0 &ll_i + ld_t0,a0,0 + la_a1 &label_count + ld_t1,a1,0 + la_br &ll_undefined_global + beq_t0,t1 + mov_a0,t0 + la_br &label_addr + call + la_a3 &ll_label + st_a0,a3,0 + mov_a1,a0 + mov_a2,a1 + addi_a2,a2,24 + ld_a0,a2,0 + mov_t0,a0 + la_br &ll_undotted_next + bnez_t0 + la_a0 &ll_label + ld_a0,a0,0 + la_a1 &ll_src + ld_a1,a1,0 + la_a2 &ll_len + ld_a2,a2,0 + la_br &name_eq + call + la_br &ll_undotted_next + beqz_a0 + la_a0 &ll_label + ld_a0,a0,0 + ld_a0,a0,16 + eret +:ll_undotted_next + la_a0 &ll_i + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &ll_undotted_loop + b +:ll_undefined_local + la_br &err_undefined_local + b +:ll_undefined_global + la_br &err_undefined_label + b + +## --- Reference processor ---------------------------------------------------- + +## process_reference(): cur_sigil already set by the dispatcher; scan_pos +## already past the sigil byte. Reads label and (optional) -other, +## advances ip on pass 1, and emits the resolved value on pass 2. +:process_reference + enter_0 + la_br &set_sigil_info + call + # Require non-terminator. + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &err_sigil_no_label + beq_t0,t1 + la_br &err_sigil_no_label + blt_t1,t0 + lb_a0,t0,0 + la_br &is_name_terminator_c + call + la_br &err_sigil_no_label + bnez_a0 + # llen = read_name(label_buf, MAX_TOKEN) + la_a0 &label_buf_ptr + ld_a0,a0,0 + li_a1 H2_TOKEN_CAP + la_br &read_name + call + la_a1 &pr_llen + st_a0,a1,0 + li_t0 %0 %0 + la_a0 &pr_has_other + st_t0,a0,0 + # Optional '-' OTHER. + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &pr_after_other + beq_t0,t1 + la_br &pr_after_other + blt_t1,t0 + lb_a0,t0,0 + li_t1 %45 %0 + la_br &pr_after_other + la_a3 &aux_tmp + st_t1,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + bne_a0,t0 + addi_t0,t0,1 + la_a1 &scan_pos + st_t0,a1,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &err_minus_no_label + beq_t0,t1 + la_br &err_minus_no_label + blt_t1,t0 + lb_a0,t0,0 + la_br &is_name_terminator_c + call + la_br &err_minus_no_label + bnez_a0 + la_a0 &other_buf_ptr + ld_a0,a0,0 + li_a1 H2_TOKEN_CAP + la_br &read_name + call + la_a1 &pr_olen + st_a0,a1,0 + li_t0 %1 %0 + la_a0 &pr_has_other + st_t0,a0,0 +:pr_after_other + la_a0 &pass + ld_t0,a0,0 + li_t1 %1 %0 + la_br &pr_pass2 + bne_t0,t1 + # ip += pr_width. Route width through t2 since add_t1,t1,t* (t* in + # {t0,t2}) is the only fitting form in the seed. + la_a0 &ip + ld_t1,a0,0 + la_a1 &pr_width + ld_t0,a1,0 + la_a3 &pr_tmp + st_t0,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t2,a0,0 + add_t1,t1,t2 + st_t1,a0,0 + eret +:pr_pass2 + la_a0 &label_buf_ptr + ld_a0,a0,0 + la_a1 &pr_llen + ld_a1,a1,0 + la_br &lookup_label + call + la_a1 &pr_t_label + st_a0,a1,0 + la_a0 &pr_has_other + ld_t0,a0,0 + la_br &pr_no_other + beqz_t0 + la_a0 &other_buf_ptr + ld_a0,a0,0 + la_a1 &pr_olen + ld_a1,a1,0 + la_br &lookup_label + call + la_a1 &pr_t_other + st_a0,a1,0 + # value = t_label - t_other + la_a0 &pr_t_label + ld_a1,a0,0 + la_a0 &pr_t_other + ld_a0,a0,0 + sub_a2,a1,a0 # available: a2 = a1 - a0 + la_a1 &pr_value + st_a2,a1,0 + la_br &pr_emit + b +:pr_no_other + la_a0 &pr_is_rel + ld_t0,a0,0 + la_br &pr_abs + beqz_t0 + # rel: value = t_label - (ip + width) + la_a0 &ip + ld_a1,a0,0 + la_a3 &pr_tmp + la_a0 &pr_width + ld_t0,a0,0 + st_t0,a3,0 + ld_a0,a3,0 # a0 = width + add_a1,a1,a0 # add_a1,a1,a0 — available + la_a3 &pr_tmp + st_a1,a3,0 # save (ip + width) + la_a0 &pr_t_label + ld_a1,a0,0 # a1 = t_label + ld_a0,a3,0 # a0 = ip+width + sub_a2,a1,a0 + la_a1 &pr_value + st_a2,a1,0 + la_br &pr_emit + b +:pr_abs + # value = t_label + base_address + la_a0 &pr_t_label + ld_a1,a0,0 + la_a0 &base_address + ld_a0,a0,0 + add_a1,a1,a0 + la_a3 &pr_value + st_a1,a3,0 +:pr_emit + la_a0 &pr_value + ld_a0,a0,0 + la_a1 &pr_width + ld_a1,a1,0 + la_a2 &pr_lo + ld_a2,a2,0 + la_a3 &pr_hi + ld_a3,a3,0 + la_a0 &pr_range_check + ld_t0,a0,0 + # Reload value into a0 since we just clobbered it. + la_a0 &pr_value + ld_a0,a0,0 + la_br &emit_value + call + eret + +## set_sigil_info(): reads cur_sigil; populates pr_width / pr_is_rel / +## pr_lo / pr_hi / pr_range_check. +## +## Sigil table: +## '!' (0x21): width=1, rel, lo=-128, hi=127, check +## '@' (0x40): width=2, rel, lo=-32768, hi=32767, check +## '$' (0x24): width=2, abs, lo=0, hi=65535, check +## '~' (0x7E): width=3, rel, lo=-(1<<23), hi=(1<<23)-1, check +## '%' (0x25): width=4, rel, no range check +## '&' (0x26): width=4, abs, no range check +:set_sigil_info + enter_0 + la_a0 &cur_sigil + ld_a0,a0,0 + li_t0 %33 %0 + la_br &ssi_bang + beq_a0,t0 + li_t0 %64 %0 + la_br &ssi_at + beq_a0,t0 + li_t0 %36 %0 + la_br &ssi_dollar + beq_a0,t0 + li_t0 %126 %0 + la_br &ssi_tilde + beq_a0,t0 + li_t0 %37 %0 + la_br &ssi_pct + beq_a0,t0 + li_t0 %38 %0 + la_br &ssi_amp + beq_a0,t0 + la_br &err_bad_sigil + b +:ssi_bang + li_t0 %1 %0 + la_a1 &pr_width + st_t0,a1,0 + la_a1 &pr_is_rel + st_t0,a1,0 + la_a1 &pr_range_check + st_t0,a1,0 + # lo = -128 = 0 - 128 + li_t0 %128 %0 + la_a3 &ssi_tmp + st_t0,a3,0 + ld_a3,a3,0 # a3 = 128 + li_t0 %0 %0 + sub_a3,t0,a3 # available: a3 = t0 - a3 = -128 + la_a1 &pr_lo + st_a3,a1,0 + li_t0 %127 %0 + la_a1 &pr_hi + st_t0,a1,0 + eret +:ssi_at + li_t0 %2 %0 + la_a1 &pr_width + st_t0,a1,0 + li_t0 %1 %0 + la_a1 &pr_is_rel + st_t0,a1,0 + la_a1 &pr_range_check + st_t0,a1,0 + # 32768 = 256 * 128. The seed has no immediate >= 256 in li_t* (it + # does, since li_t0 takes a 64-bit value, but we use the available + # %256 word literal). Stage values through ssi_tmp before each ld. + li_t0 %256 %0 + la_a1 &ssi_tmp + st_t0,a1,0 + ld_a3,a1,0 # a3 = 256 + li_t0 %128 %0 + st_t0,a1,0 + ld_a2,a1,0 # a2 = 128 + mul_a3,a3,a2 # a3 = 32768 + la_a1 &ssi_tmp2 + st_a3,a1,0 + li_t0 %0 %0 + sub_a3,t0,a3 # a3 = -32768 + la_a1 &pr_lo + st_a3,a1,0 + la_a1 &ssi_tmp2 + ld_t1,a1,0 + addi_t1,t1,neg1 # 32767 + la_a1 &pr_hi + st_t1,a1,0 + eret +:ssi_dollar + li_t0 %2 %0 + la_a1 &pr_width + st_t0,a1,0 + li_t0 %0 %0 + la_a1 &pr_is_rel + st_t0,a1,0 + li_t0 %1 %0 + la_a1 &pr_range_check + st_t0,a1,0 + li_t0 %0 %0 + la_a1 &pr_lo + st_t0,a1,0 + # 65536 = 256 * 256 + li_t0 %256 %0 + la_a1 &ssi_tmp + st_t0,a1,0 + ld_a3,a1,0 + ld_a2,a1,0 + mul_a3,a3,a2 # a3 = 65536 + la_a1 &ssi_tmp2 + st_a3,a1,0 + ld_t1,a1,0 + addi_t1,t1,neg1 # 65535 + la_a1 &pr_hi + st_t1,a1,0 + eret +:ssi_tilde + li_t0 %3 %0 + la_a1 &pr_width + st_t0,a1,0 + li_t0 %1 %0 + la_a1 &pr_is_rel + st_t0,a1,0 + la_a1 &pr_range_check + st_t0,a1,0 + # 8388608 = 256 * 256 * 128 + li_t0 %256 %0 + la_a1 &ssi_tmp + st_t0,a1,0 + ld_a3,a1,0 + ld_a2,a1,0 + mul_a3,a3,a2 # 65536 + li_t0 %128 %0 + st_t0,a1,0 + ld_a2,a1,0 + mul_a3,a3,a2 # 8388608 + la_a1 &ssi_tmp2 + st_a3,a1,0 + li_t0 %0 %0 + sub_a3,t0,a3 # -8388608 + la_a1 &pr_lo + st_a3,a1,0 + la_a1 &ssi_tmp2 + ld_t1,a1,0 + addi_t1,t1,neg1 # 8388607 + la_a1 &pr_hi + st_t1,a1,0 + eret +:ssi_pct + li_t0 %4 %0 + la_a1 &pr_width + st_t0,a1,0 + li_t0 %1 %0 + la_a1 &pr_is_rel + st_t0,a1,0 + li_t0 %0 %0 + la_a1 &pr_range_check + st_t0,a1,0 + la_a1 &pr_lo + st_t0,a1,0 + la_a1 &pr_hi + st_t0,a1,0 + eret +:ssi_amp + li_t0 %4 %0 + la_a1 &pr_width + st_t0,a1,0 + li_t0 %0 %0 + la_a1 &pr_is_rel + st_t0,a1,0 + la_a1 &pr_range_check + st_t0,a1,0 + la_a1 &pr_lo + st_t0,a1,0 + la_a1 &pr_hi + st_t0,a1,0 + eret + +## --- Directives ------------------------------------------------------------- + +## do_align(): .align N [PATTERN]. N is a positive power of two; optional +## byte-mode pattern. Pads with zeros if no pattern. +:do_align + enter_0 + la_br &skip_inline_ws + call + la_br &read_decimal + call + la_a1 &da_n + st_a0,a1,0 + la_br &err_align_n + beqz_a0 + la_br &err_align_n + bltz_a0 + # Power-of-two check: N & (N-1) == 0 + la_a0 &da_n + ld_a3,a0,0 + la_a0 &da_n + ld_a2,a0,0 + addi_a2,a2,neg1 + and_a3,a3,a2 + la_br &err_align_n + mov_a0,a3 + bnez_a0 + + li_t0 %0 %0 + la_a0 &da_has_pat + st_t0,a0,0 + la_a0 &da_patlen + st_t0,a0,0 + + la_br &skip_inline_ws + call + + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &da_compute + beq_t0,t1 + la_br &da_compute + blt_t1,t0 + lb_a0,t0,0 + la_br &is_byte_digit + call + la_br &da_compute + beqz_a0 + li_t0 %1 %0 + la_a1 &da_has_pat + st_t0,a1,0 +:da_pat_loop + la_a0 &scan_pos + ld_t0,a0,0 + la_a1 &scan_end + ld_t1,a1,0 + la_br &da_compute + beq_t0,t1 + la_br &da_compute + blt_t1,t0 + lb_a0,t0,0 + la_br &is_byte_digit + call + la_br &da_compute + beqz_a0 + la_a0 &da_patlen + ld_t0,a0,0 + li_t1 H2_TOKEN_CAP + la_br &err_pattern_too_large + beq_t0,t1 + la_a0 &pat_buf_ptr + ld_a0,a0,0 + add_a0,a0,t0 + la_br &parse_one_byte + call + la_a0 &da_patlen + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &da_pat_loop + b +:da_compute + # pad = (N - (ip mod N)) mod N + la_a0 &ip + ld_a0,a0,0 + la_a1 &da_n + ld_a1,a1,0 + rem_a2,a0,a1 # a2 = ip mod N + li_t0 %0 %0 + la_a3 &da_pad + st_t0,a3,0 + la_br &da_emit + beqz_a2 + # pad = N - r + la_a3 &da_pad + st_a1,a3,0 # store N + ld_a3,a3,0 # a3 = N + sub_a3,a3,a2 # a3 = N - r + la_a1 &da_pad + st_a3,a1,0 +:da_emit + li_t0 %0 %0 + la_a0 &da_i + st_t0,a0,0 +:da_emit_loop + la_a0 &da_i + ld_t0,a0,0 + la_a1 &da_pad + ld_t1,a1,0 + la_br &da_emit_done + beq_t0,t1 + la_a0 &da_has_pat + ld_t1,a0,0 + la_br &da_emit_zero + beqz_t1 + # b = pat[i % patlen] + la_a0 &da_i + ld_a0,a0,0 + la_a1 &da_patlen + ld_a1,a1,0 + rem_a2,a0,a1 + la_a0 &pat_buf_ptr + ld_a0,a0,0 + add_a0,a0,a2 + lb_a0,a0,0 + la_br &emit_byte + call + la_br &da_emit_inc + b +:da_emit_zero + li_a0 %0 %0 + la_br &emit_byte + call +:da_emit_inc + la_a0 &da_i + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &da_emit_loop + b +:da_emit_done + eret + +## do_fill(): .fill N B. N >= 0 decimal; B is one byte literal. +:do_fill + enter_0 + la_br &skip_inline_ws + call + la_br &read_decimal + call + la_a1 &df_n + st_a0,a1,0 + la_br &err_fill_n + bltz_a0 + la_br &skip_inline_ws + call + la_a0 &df_byte_ptr + ld_a0,a0,0 + la_br &parse_one_byte + call + li_t0 %0 %0 + la_a0 &df_i + st_t0,a0,0 +:df_loop + la_a0 &df_i + ld_t0,a0,0 + la_a1 &df_n + ld_t1,a1,0 + la_br &df_done + beq_t0,t1 + la_a0 &df_byte_ptr + ld_a0,a0,0 + lb_a0,a0,0 + la_br &emit_byte + call + la_a0 &df_i + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &df_loop + b +:df_done + eret + +## do_scope_open(): scope_seq++; scope_stack[scope_depth++] = scope_seq. +:do_scope_open + enter_0 + la_a0 &scope_depth + ld_t0,a0,0 + li_t1 H2_SCOPE_CAP + la_br &err_scope_overflow + beq_t0,t1 + la_br &err_scope_overflow + blt_t1,t0 + la_a0 &scope_seq + ld_t1,a0,0 + addi_t1,t1,1 + st_t1,a0,0 + la_a1 &scope_stack_ptr + ld_a1,a1,0 + shli_t2,t0,3 + add_a1,t2,a1 # available + st_t1,a1,0 + addi_t0,t0,1 + la_a0 &scope_depth + st_t0,a0,0 + eret + +## do_scope_close(): scope_depth--; fatal if not in scope. +:do_scope_close + enter_0 + la_a0 &scope_depth + ld_t0,a0,0 + la_br &err_scope_underflow + beqz_t0 + addi_t0,t0,neg1 + st_t0,a0,0 + eret + +## --- Emit ------------------------------------------------------------------- + +## emit_byte(a0=byte): pass 1 only bumps ip; pass 2 also writes to output_buf. +## Leaf. +:emit_byte + la_a1 &pass + ld_t0,a1,0 + li_t1 %2 %0 + la_br &eb_pass1 + bne_t0,t1 + la_a1 &output_used + ld_t0,a1,0 + li_t1 H2_OUTPUT_CAP + la_br &err_output_overflow + beq_t0,t1 + la_br &err_output_overflow + blt_t1,t0 + la_a2 &output_buf_ptr + ld_a2,a2,0 + add_a2,a2,t0 + sb_a0,a2,0 + addi_t0,t0,1 + st_t0,a1,0 +:eb_pass1 + la_a0 &ip + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + ret + +## emit_value(a0=value, a1=width, a2=lo, a3=hi, t0=range_check). Range-checks +## (if requested), packs little-endian into ev_bytes[0..width-1], then emits +## (in reverse order if big_endian). +:emit_value + enter_0 + la_t1 &ev_value + mov_t2,t1 + st_a0,t2,0 + la_t1 &ev_width + st_a1,t1,0 + la_t1 &ev_lo + st_a2,t1,0 + la_t1 &ev_hi + la_a0 &aux_tmp + st_t1,a0,0 + ld_a0,a0,0 + st_a3,a0,0 + la_t1 &ev_range_check + st_t0,t1,0 + + la_a0 &ev_range_check + ld_t0,a0,0 + la_br &ev_no_range + beqz_t0 + la_a0 &ev_value + ld_a0,a0,0 + la_a1 &ev_lo + ld_a1,a1,0 + la_br &err_ref_out_of_range + mov_a2,a1 + blt_a0,a2 + la_a0 &ev_value + ld_a0,a0,0 + la_a1 &ev_hi + ld_a1,a1,0 + la_br &err_ref_out_of_range + blt_a1,a0 +:ev_no_range + la_a0 &ev_value + ld_a0,a0,0 + la_a1 &ev_pack_v + st_a0,a1,0 + li_t0 %0 %0 + la_a0 &ev_i + st_t0,a0,0 +:ev_pack_loop + la_a0 &ev_i + ld_t0,a0,0 + la_a1 &ev_width + ld_t1,a1,0 + la_br &ev_emit_dispatch + beq_t0,t1 + la_a1 &ev_pack_v + ld_t2,a1,0 + andi_a3,t2,255 + la_a2 &ev_bytes_ptr + ld_a2,a2,0 + add_a2,a2,t0 + sb_a3,a2,0 + la_a1 &ev_pack_v + ld_t2,a1,0 + shri_t2,t2,8 + st_t2,a1,0 + addi_t0,t0,1 + la_a0 &ev_i + st_t0,a0,0 + la_br &ev_pack_loop + b +:ev_emit_dispatch + la_a0 &big_endian + ld_t0,a0,0 + la_br &ev_emit_be + bnez_t0 + li_t0 %0 %0 + la_a0 &ev_i + st_t0,a0,0 +:ev_emit_le_loop + la_a0 &ev_i + ld_t0,a0,0 + la_a1 &ev_width + ld_t1,a1,0 + la_br &ev_done + beq_t0,t1 + la_a2 &ev_bytes_ptr + ld_a2,a2,0 + add_a2,a2,t0 + la_a3 &aux_tmp + st_a2,a3,0 + ld_a0,a3,0 + lb_a0,a0,0 + la_br &emit_byte + call + la_a0 &ev_i + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &ev_emit_le_loop + b +:ev_emit_be + la_a0 &ev_width + ld_t0,a0,0 + addi_t0,t0,neg1 + la_a1 &ev_i + st_t0,a1,0 +:ev_emit_be_loop + la_a0 &ev_i + ld_t0,a0,0 + la_br &ev_done + bltz_t0 + la_a2 &ev_bytes_ptr + ld_a2,a2,0 + add_a2,a2,t0 + la_a3 &aux_tmp + st_a2,a3,0 + ld_a0,a3,0 + lb_a0,a0,0 + la_br &emit_byte + call + la_a0 &ev_i + ld_t0,a0,0 + addi_t0,t0,neg1 + st_t0,a0,0 + la_br &ev_emit_be_loop + b +:ev_done + eret + +## --- Misc helpers ----------------------------------------------------------- + +## str_eq(a0=p, a1=q, a2=len) -> a0=0/1. Returns 1 iff p[0..len-1] == q[..] +## AND p[len] == '\0'. Used for argv string compares. +:str_eq + enter_0 + la_t0 &se_p + st_a0,t0,0 + la_t0 &se_q + st_a1,t0,0 + la_t0 &se_len + st_a2,t0,0 + li_t1 %0 %0 +:se_loop + la_a0 &se_len + ld_a1,a0,0 + la_br &se_check_terminal + beq_t1,a1 + la_a0 &se_p + ld_a0,a0,0 + add_a0,a0,t1 + lb_a0,a0,0 + la_a2 &se_q + ld_a2,a2,0 + add_a2,a2,t1 + lb_a2,a2,0 + la_br &se_no + la_a3 &aux_tmp + st_a2,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + bne_a0,t0 + addi_t1,t1,1 + la_br &se_loop + b +:se_check_terminal + la_a0 &se_p + ld_a0,a0,0 + add_a0,a0,t1 + lb_a0,a0,0 + la_br &se_no + bnez_a0 + li_a0 %1 %0 + eret +:se_no + li_a0 %0 %0 + eret + +## mem_eq(a0=p, a1=q, a2=len) -> a0=0/1. Plain byte compare, no NUL check. +:mem_eq + enter_0 + la_t0 &me_p + st_a0,t0,0 + la_t0 &me_q + st_a1,t0,0 + la_t0 &me_len + st_a2,t0,0 + li_t1 %0 %0 +:me_loop + la_a0 &me_len + ld_a1,a0,0 + la_br &me_yes + beq_t1,a1 + la_a0 &me_p + ld_a0,a0,0 + add_a0,a0,t1 + lb_a0,a0,0 + la_a2 &me_q + ld_a2,a2,0 + add_a2,a2,t1 + lb_a2,a2,0 + la_br &me_no + la_a3 &aux_tmp + st_a2,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + bne_a0,t0 + addi_t1,t1,1 + la_br &me_loop + b +:me_yes + li_a0 %1 %0 + eret +:me_no + li_a0 %0 %0 + eret + +## parse_long_arg(a0=str): parse decimal or 0x-prefixed hex i64. Fatal on +## malformed. +:parse_long_arg + enter_0 + la_t0 &pla_p + st_a0,t0,0 + li_t0 %0 %0 + la_a1 &pla_val + st_t0,a1,0 + la_a1 &pla_neg + st_t0,a1,0 + # detect 0x / 0X + la_a0 &pla_p + ld_a0,a0,0 + la_a3 &aux_tmp + st_a0,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_t0,t0,0 + li_t1 %48 %0 + la_br &pla_dec_init + bne_t0,t1 + la_a3 &aux_tmp + st_a0,a3,0 + ld_a3,a3,0 + lb_t0,a3,1 + li_t1 %120 %0 + la_br &pla_hex_init + beq_t0,t1 + li_t1 %88 %0 + la_br &pla_hex_init + beq_t0,t1 + la_br &pla_dec_init + b +:pla_hex_init + la_a0 &pla_p + ld_t0,a0,0 + addi_t0,t0,2 # skip "0x" / "0X" + st_t0,a0,0 +:pla_hex_loop + la_a0 &pla_p + ld_a0,a0,0 + la_a3 &aux_tmp + st_a0,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_t0,t0,0 + la_br &pla_finish + beqz_t0 + la_a3 &pla_tmp + st_t0,a3,0 + ld_a0,a3,0 # a0 = c + la_br &byte_digit_value + call + la_a1 &pla_val + ld_t0,a1,0 + shli_a3,t0,4 # a3 = val << 4 + mov_t0,a0 + add_a3,a3,t0 + st_a3,a1,0 + la_a0 &pla_p + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &pla_hex_loop + b +:pla_dec_init + # Optional minus + la_a0 &pla_p + ld_a0,a0,0 + la_a3 &aux_tmp + st_a0,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_t0,t0,0 + li_t1 %45 %0 + la_br &pla_dec_loop + bne_t0,t1 + li_t0 %1 %0 + la_a1 &pla_neg + st_t0,a1,0 + la_a0 &pla_p + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 +:pla_dec_loop + la_a0 &pla_p + ld_a0,a0,0 + la_a3 &aux_tmp + st_a0,a3,0 + la_a0 &aux_tmp + st_a3,a0,0 + ld_t0,a0,0 + lb_t0,t0,0 + la_br &pla_finish + beqz_t0 + li_t1 %48 %0 + la_br &err_bad_long + blt_t0,t1 + li_t1 %57 %0 + la_br &err_bad_long + blt_t1,t0 + la_a3 &pla_tmp + li_t1 %10 %0 + st_t1,a3,0 + la_a1 &aux_tmp + st_a3,a1,0 + ld_a1,a1,0 + la_a2 &pla_val + ld_t0,a2,0 + mul_t0,t0,a1 # t0 = val * 10 + la_a0 &pla_p + ld_a0,a0,0 + lb_a0,a0,0 + li_t1 %48 %0 + sub_a0,a0,t1 + add_t0,t0,a0 + la_a2 &pla_val + st_t0,a2,0 + la_a0 &pla_p + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_br &pla_dec_loop + b +:pla_finish + la_a1 &pla_neg + ld_t0,a1,0 + la_br &pla_done + beqz_t0 + la_a2 &pla_val + ld_a3,a2,0 + li_t0 %0 %0 + sub_a3,t0,a3 # a3 = -val + st_a3,a2,0 +:pla_done + la_a1 &pla_val + ld_a0,a1,0 + eret + +## --- Output writer ---------------------------------------------------------- + +## write_output(): openat(output_path, O_WRONLY|O_CREAT|O_TRUNC, MODE). +## MODE = 0750 unless --non-executable, then 0640. Then write loop. +:write_output + enter_0 + la_a0 &output_path + ld_a2,a0,0 + li_a0 sys_openat + li_a1 AT_FDCWD + li_a3 O_WRONLY_CREAT_TRUNC + la_t1 &non_executable + ld_t1,t1,0 + la_br &wo_mode_nonexec + bnez_t1 + li_t0 MODE_0750 + la_br &wo_after_mode + b +:wo_mode_nonexec + li_t0 MODE_0640 +:wo_after_mode + syscall + la_br &err_open_output + bltz_a0 + la_a1 &output_fd + st_a0,a1,0 + li_t0 %0 %0 + la_a1 &output_written + st_t0,a1,0 +:wo_loop + la_a0 &output_written + ld_t0,a0,0 + la_a1 &output_used + ld_t1,a1,0 + la_br &wo_done + beq_t0,t1 + la_a0 &output_fd + ld_a1,a0,0 + la_a2 &output_buf_ptr + ld_a2,a2,0 + add_a2,a2,t0 + sub_a3,t1,t0 # available + li_a0 sys_write + syscall + la_br &err_write + bltz_a0 + la_br &err_write + beqz_a0 + la_a1 &output_written + ld_t0,a1,0 + add_t0,t0,a0 + st_t0,a1,0 + la_br &wo_loop + b +:wo_done + eret + +## --- Errors ----------------------------------------------------------------- + +## fatal_msg(a0=msg_ptr): write either "hex2pp: <msg>\n" or +## "<path>:<line>: hex2pp: <msg>\n" to stderr, then exit(1). +:fatal_msg + la_a1 &err_saved_msg + st_a0,a1,0 + la_a0 &cur_path + ld_t0,a0,0 + la_br &fm_no_path + beqz_t0 + # write path + mov_a0,t0 + la_br &strlen_cstr + call + la_a2 &err_saved_len + st_a0,a2,0 + la_a2 &cur_path + ld_a2,a2,0 + la_a3 &err_saved_len + ld_a3,a3,0 + li_a0 sys_write + li_a1 %2 %0 + syscall + # write ":" + li_a0 sys_write + li_a1 %2 %0 + la_a2 &str_colon + li_a3 %1 %0 + syscall + # write decimal(cur_line) + la_a0 &cur_line + ld_a0,a0,0 + la_br &write_decimal_stderr + call + # write ": hex2pp: " + li_a0 sys_write + li_a1 %2 %0 + la_a2 &str_colon_hex2pp + li_a3 %10 %0 + syscall + la_br &fm_emit_msg + b +:fm_no_path + li_a0 sys_write + li_a1 %2 %0 + la_a2 &str_hex2pp + li_a3 %8 %0 + syscall +:fm_emit_msg + la_a0 &err_saved_msg + ld_a0,a0,0 + la_br &strlen_cstr + call + la_a2 &err_saved_len + st_a0,a2,0 + la_a2 &err_saved_msg + ld_a2,a2,0 + la_a3 &err_saved_len + ld_a3,a3,0 + li_a0 sys_write + li_a1 %2 %0 + syscall + li_a0 sys_write + li_a1 %2 %0 + la_a2 &str_newline + li_a3 %1 %0 + syscall + li_a0 sys_exit + li_a1 %1 %0 + syscall + +## strlen_cstr(a0=p) -> a0=length. Walks until NUL. +:strlen_cstr + li_t0 %0 %0 +:sl_loop + add_t1,a0,t0 # available + lb_t1,t1,0 + la_br &sl_done + beqz_t1 + addi_t0,t0,1 + la_br &sl_loop + b +:sl_done + mov_a0,t0 + ret + +## write_decimal_stderr(a0=value): write decimal of unsigned i64 to stderr. +## Special-cases zero. Uses line_scratch (64 B) as a reverse-fill buffer. +:write_decimal_stderr + enter_0 + la_a1 &wd_v + st_a0,a1,0 + la_br &wd_nonzero + bnez_a0 + li_a0 sys_write + li_a1 %2 %0 + la_a2 &str_zero + li_a3 %1 %0 + syscall + eret +:wd_nonzero + # Render reversed into line_scratch[...], starting near the end. + li_t0 %63 %0 + la_a1 &wd_pos + st_t0,a1,0 +:wd_loop + la_a0 &wd_v + ld_a0,a0,0 + la_br &wd_emit + beqz_a0 + la_a3 &wd_tmp + li_t1 %10 %0 + st_t1,a3,0 + la_a1 &aux_tmp + st_a3,a1,0 + ld_a1,a1,0 + rem_a2,a0,a1 # a2 = v mod 10 + div_a0,a0,a1 # a0 = v / 10 + la_a3 &wd_v + st_a0,a3,0 + li_t1 %48 %0 + add_a3,t1,a2 # ascii = '0' + digit + la_a0 &wd_pos + ld_t0,a0,0 + la_a1 &line_scratch_ptr + ld_a1,a1,0 + add_a1,a1,t0 + sb_a3,a1,0 + addi_t0,t0,neg1 + la_a0 &wd_pos + st_t0,a0,0 + la_br &wd_loop + b +:wd_emit + # write(2, &line_scratch[wd_pos+1], 64 - (wd_pos+1)) — but wd_pos+1 + # is also our buffer offset, so length = 63 - wd_pos = 64 - (wd_pos+1). + # Route the buffer pointer (a1) through wd_tmp into a2 (no mov_a2,a1 + # in the seed table). + la_a0 &wd_pos + ld_t0,a0,0 + addi_t0,t0,1 + la_a1 &line_scratch_ptr + ld_a1,a1,0 + add_a1,a1,t0 + la_a3 &wd_tmp + st_a1,a3,0 + la_a2 &aux_tmp + st_a3,a2,0 + ld_a2,a2,0 + li_t1 %64 %0 + sub_a3,t1,t0 + li_a0 sys_write + li_a1 %2 %0 + syscall + eret + +## print_usage(): write usage banner to stdout (fd=1). +:print_usage + enter_0 + la_a0 &msg_usage + la_br &strlen_cstr + call + la_a3 &pu_tmp + st_a0,a3,0 + la_a2 &msg_usage + ld_a3,a3,0 + li_a0 sys_write + li_a1 %1 %0 + syscall + eret + +## --- Error stubs ------------------------------------------------------------ +:err_unknown_arg + la_a0 &msg_unknown_arg + la_br &fatal_msg + b +:err_missing_arg_value + la_a0 &msg_missing_arg_value + la_br &fatal_msg + b +:err_no_inputs + la_a0 &msg_no_inputs + la_br &fatal_msg + b +:err_too_many_files + la_a0 &msg_too_many_files + la_br &fatal_msg + b +:err_open_input + la_a0 &msg_open_input + la_br &fatal_msg + b +:err_read + la_a0 &msg_read + la_br &fatal_msg + b +:err_input_too_big + la_a0 &msg_input_too_big + la_br &fatal_msg + b +:err_open_output + la_a0 &msg_open_output + la_br &fatal_msg + b +:err_write + la_a0 &msg_write + la_br &fatal_msg + b +:err_text_overflow + la_a0 &msg_text_overflow + la_br &fatal_msg + b +:err_too_many_labels + la_a0 &msg_too_many_labels + la_br &fatal_msg + b +:err_duplicate_label + la_a0 &msg_duplicate_label + la_br &fatal_msg + b +:err_undefined_label + la_a0 &msg_undefined_label + la_br &fatal_msg + b +:err_undefined_local + la_a0 &msg_undefined_local + la_br &fatal_msg + b +:err_unexpected_char + la_a0 &msg_unexpected_char + la_br &fatal_msg + b +:err_unknown_directive + la_a0 &msg_unknown_directive + la_br &fatal_msg + b +:err_dotted_outside_scope + la_a0 &msg_dotted_outside_scope + la_br &fatal_msg + b +:err_scope_overflow + la_a0 &msg_scope_overflow + la_br &fatal_msg + b +:err_scope_underflow + la_a0 &msg_scope_underflow + la_br &fatal_msg + b +:err_scope_unclosed + la_a0 &msg_scope_unclosed + la_br &fatal_msg + b +:err_align_n + la_a0 &msg_align_n + la_br &fatal_msg + b +:err_fill_n + la_a0 &msg_fill_n + la_br &fatal_msg + b +:err_pattern_too_large + la_a0 &msg_pattern_too_large + la_br &fatal_msg + b +:err_byte_lit_bad + la_a0 &msg_byte_lit_bad + la_br &fatal_msg + b +:err_pbs_incomplete + la_a0 &msg_pbs_incomplete + la_br &fatal_msg + b +:err_sigil_no_label + la_a0 &msg_sigil_no_label + la_br &fatal_msg + b +:err_minus_no_label + la_a0 &msg_minus_no_label + la_br &fatal_msg + b +:err_bad_sigil + la_a0 &msg_bad_sigil + la_br &fatal_msg + b +:err_ref_out_of_range + la_a0 &msg_ref_out_of_range + la_br &fatal_msg + b +:err_name_too_long + la_a0 &msg_name_too_long + la_br &fatal_msg + b +:err_empty_name + la_a0 &msg_empty_name + la_br &fatal_msg + b +:err_empty_directive + la_a0 &msg_empty_directive + la_br &fatal_msg + b +:err_expected_decimal + la_a0 &msg_expected_decimal + la_br &fatal_msg + b +:err_output_overflow + la_a0 &msg_output_overflow + la_br &fatal_msg + b +:err_bad_long + la_a0 &msg_bad_long + la_br &fatal_msg + b + +## Sentinel: end of executable text. +:_text_end + +## --- Rodata ----------------------------------------------------------------- + +:const_a_out "a.out" '00' + +:opt_dash_f "-f" '00' +:opt_long_file "--file" '00' +:opt_dash_o "-o" '00' +:opt_long_output "--output" '00' +:opt_dash_B "-B" '00' +:opt_long_base "--base-address" '00' +:opt_long_big "--big-endian" '00' +:opt_long_little "--little-endian" '00' +:opt_dash_b "-b" '00' +:opt_long_binary "--binary" '00' +:opt_long_nonexec "--non-executable" '00' +:opt_dash_h "-h" '00' +:opt_long_help "--help" '00' + +:dir_align "align" +:dir_fill "fill" +:dir_scope "scope" +:dir_endscope "endscope" + +:str_colon ":" +:str_colon_hex2pp ": hex2pp: " +:str_hex2pp "hex2pp: " +:str_newline " +" +:str_zero "0" + +:msg_usage "usage: hex2pp (-f|--file) FILE [(-f|--file) FILE ...] + [-o|--output OUT] + [-B|--base-address ADDR] + [--big-endian | --little-endian] + [-b|--binary] + [--non-executable] +" '00' +:msg_unknown_arg "unknown argument" '00' +:msg_missing_arg_value "missing value for option" '00' +:msg_no_inputs "no input files" '00' +:msg_too_many_files "too many input files" '00' +:msg_open_input "failed to open input file" '00' +:msg_read "failed to read input" '00' +:msg_input_too_big "input too large" '00' +:msg_open_output "failed to open output file" '00' +:msg_write "failed to write output" '00' +:msg_text_overflow "text pool overflow" '00' +:msg_too_many_labels "too many labels" '00' +:msg_duplicate_label "duplicate label" '00' +:msg_undefined_label "undefined label" '00' +:msg_undefined_local "undefined local label" '00' +:msg_unexpected_char "unexpected character" '00' +:msg_unknown_directive "unknown directive" '00' +:msg_dotted_outside_scope "dot-prefixed label outside a .scope" '00' +:msg_scope_overflow ".scope: depth overflow" '00' +:msg_scope_underflow ".endscope: not in a scope" '00' +:msg_scope_unclosed ".scope not closed at end of input" '00' +:msg_align_n ".align: N must be a positive power of two" '00' +:msg_fill_n ".fill: N must be non-negative" '00' +:msg_pattern_too_large "pattern too large" '00' +:msg_byte_lit_bad "byte literal: bad digit count" '00' +:msg_pbs_incomplete "byte stream: incomplete digits at end of run" '00' +:msg_sigil_no_label "sigil not followed by label name" '00' +:msg_minus_no_label "'-' must be followed by label name" '00' +:msg_bad_sigil "internal: bad sigil" '00' +:msg_ref_out_of_range "reference out of range" '00' +:msg_name_too_long "name too long" '00' +:msg_empty_name "expected label name" '00' +:msg_empty_directive "expected directive name after '.'" '00' +:msg_expected_decimal "expected decimal integer" '00' +:msg_output_overflow "output overflow" '00' +:msg_bad_long "invalid integer argument" '00' + +## --- BSS pointer-init table ------------------------------------------------ +:bss_init_tbl +&input_paths_ptr ZERO4 OFF_input_paths +&input_starts_ptr ZERO4 OFF_input_starts +&input_lens_ptr ZERO4 OFF_input_lens +&scope_stack_ptr ZERO4 OFF_scope_stack +&line_scratch_ptr ZERO4 OFF_line_scratch +&name_buf_ptr ZERO4 OFF_name_buf +&label_buf_ptr ZERO4 OFF_label_buf +&other_buf_ptr ZERO4 OFF_other_buf +&pat_buf_ptr ZERO4 OFF_pat_buf +&ev_bytes_ptr ZERO4 OFF_ev_bytes +&df_byte_ptr ZERO4 OFF_df_byte +&input_buf_ptr ZERO4 OFF_input_buf +&output_buf_ptr ZERO4 OFF_output_buf +&text_buf_ptr ZERO4 OFF_text_buf +&labels_ptr ZERO4 OFF_labels +:bss_init_tbl_end + +## --- BSS scalars ------------------------------------------------------------ + +:saved_argc +ZERO8 +:saved_argv +ZERO8 +:arg_idx +ZERO8 +:arg_ptr +ZERO8 +:input_count +ZERO8 +:input_total +ZERO8 +:li_path +ZERO8 +:li_fd +ZERO8 +:li_tmp +ZERO8 +:output_path +ZERO8 +:output_fd +ZERO8 +:output_used +ZERO8 +:output_written +ZERO8 +:base_address +ZERO8 +:byte_mode +ZERO8 +:big_endian +ZERO8 +:non_executable +ZERO8 + +:pass +ZERO8 +:pass_idx +ZERO8 +:ip +ZERO8 +:cur_path +ZERO8 +:cur_line +ZERO8 +:scan_pos +ZERO8 +:scan_end +ZERO8 +:text_used +ZERO8 +:label_count +ZERO8 +:scope_depth +ZERO8 +:scope_seq +ZERO8 + +## name read scratch +:name_len +ZERO8 +:name_scope +ZERO8 +:nt_c +ZERO8 +:rn_out +ZERO8 +:rn_max +ZERO8 +:rn_n +ZERO8 +:sl_tmp +ZERO8 + +## decimal read +:rd_val +ZERO8 +:rd_saw +ZERO8 +:rd_tmp +ZERO8 + +## byte stream +:pbs_acc +ZERO8 +:pbs_have +ZERO8 +:pbs_c +ZERO8 + +## one byte literal +:p1b_out +ZERO8 +:p1b_acc +ZERO8 +:p1b_have +ZERO8 +:p1b_done +ZERO8 +:p1b_c +ZERO8 + +## intern +:intern_src +ZERO8 +:intern_len +ZERO8 +:intern_dst +ZERO8 +:intern_orig +ZERO8 +:intern_i +ZERO8 + +## label_addr scratch +:la_const_32 +ZERO8 + +## name_eq scratch +:ne_label +ZERO8 +:ne_src +ZERO8 +:ne_len +ZERO8 +:ne_tmp +ZERO8 + +## define_label scratch +:dl_src +ZERO8 +:dl_len +ZERO8 +:dl_scope +ZERO8 +:dl_i +ZERO8 +:dl_label +ZERO8 +:dl_name_off +ZERO8 + +## lookup_label scratch +:ll_src +ZERO8 +:ll_len +ZERO8 +:ll_d +ZERO8 +:ll_sid +ZERO8 +:ll_i +ZERO8 +:ll_label +ZERO8 +:ll_tmp +ZERO8 + +## process_reference / set_sigil_info scratch +:cur_sigil +ZERO8 +:pr_width +ZERO8 +:pr_is_rel +ZERO8 +:pr_lo +ZERO8 +:pr_hi +ZERO8 +:pr_range_check +ZERO8 +:pr_llen +ZERO8 +:pr_olen +ZERO8 +:pr_has_other +ZERO8 +:pr_t_label +ZERO8 +:pr_t_other +ZERO8 +:pr_value +ZERO8 +:pr_tmp +ZERO8 +:ssi_tmp +ZERO8 +:ssi_tmp2 +ZERO8 + +## emit_value scratch +:ev_value +ZERO8 +:ev_width +ZERO8 +:ev_lo +ZERO8 +:ev_hi +ZERO8 +:ev_range_check +ZERO8 +:ev_pack_v +ZERO8 +:ev_i +ZERO8 + +## directive scratch +:da_n +ZERO8 +:da_has_pat +ZERO8 +:da_patlen +ZERO8 +:da_pad +ZERO8 +:da_i +ZERO8 +:df_n +ZERO8 +:df_i +ZERO8 + +## str/mem helpers +:se_p +ZERO8 +:se_q +ZERO8 +:se_len +ZERO8 +:me_p +ZERO8 +:me_q +ZERO8 +:me_len +ZERO8 + +## parse_long_arg +:pla_p +ZERO8 +:pla_val +ZERO8 +:pla_neg +ZERO8 +:pla_tmp +ZERO8 + +## error/fatal +:err_saved_msg +ZERO8 +:err_saved_len +ZERO8 + +## write_decimal +:wd_v +ZERO8 +:wd_pos +ZERO8 +:wd_tmp +ZERO8 + +## print_usage +:pu_tmp +ZERO8 + +## Generic auxiliary scratch used by sequences that route a value through +## BSS to satisfy the seed P1 mnemonic table. +:aux_tmp +ZERO8 + +## --- BSS pointer slots ------------------------------------------------------ +:input_paths_ptr +ZERO8 +:input_starts_ptr +ZERO8 +:input_lens_ptr +ZERO8 +:scope_stack_ptr +ZERO8 +:line_scratch_ptr +ZERO8 +:name_buf_ptr +ZERO8 +:label_buf_ptr +ZERO8 +:other_buf_ptr +ZERO8 +:pat_buf_ptr +ZERO8 +:ev_bytes_ptr +ZERO8 +:df_byte_ptr +ZERO8 +:input_buf_ptr +ZERO8 +:output_buf_ptr +ZERO8 +:text_buf_ptr +ZERO8 +:labels_ptr +ZERO8 + +:ELF_end diff --git a/scripts/boot-build-p1pp.sh b/scripts/boot-build-p1pp.sh @@ -1,23 +1,29 @@ #!/bin/sh -## boot-build-p1pp.sh — in-container .P1pp -> ELF. +## boot-build-p1pp.sh — in-container .P1pp -> ELF via the new chain. ## ## Pure transformation. Caller (the Makefile) ensures every fixed-path -## input below already exists, including the per-arch self-hosted m1pp -## ELF binary (build/$ARCH/M1pp/M1pp, built by boot2.sh / boot-build-p1.sh). +## input below already exists, including the per-arch self-hosted M1pp +## ELF binary (build/$ARCH/M1pp/M1pp) and hex2pp ELF binary +## (build/$ARCH/hex2pp/hex2pp). Both of those are built once via the +## seed M0+hex2 chain (boot-build-p1.sh); after that point the seed +## tools no longer participate in any user/test pipeline. ## -## Pipeline: +## Pipeline (new chain — no M0/hex2/catm anywhere): ## cat <P1-$ARCH.M1pp> <P1.M1pp> <P1pp.P1pp> <srcs...> -> /tmp/combined.M1pp -## m1pp /tmp/combined.M1pp -> /tmp/expanded.M1 -## M0 /tmp/expanded.M1 -> /tmp/prog.hex2 -## catm /tmp/elf.hex2 /tmp/prog.hex2 -> /tmp/linked.hex2 -## hex2-0 /tmp/linked.hex2 -> $OUT +## M1pp /tmp/combined.M1pp -> /tmp/expanded.hex2pp +## hex2pp -f $ELF_HDR -f /tmp/expanded.hex2pp -o $OUT +## +## $ELF_HDR is P1/elf-$ARCH.hex2pp — a hex2pp-syntax ELF header that +## supplies :ELF_base / :_start / :ELF_end framing, replacing the old +## vendor/seed/$ARCH/ELF.hex2 (which uses hex2 `>` arithmetic and +## trailing-zero placeholders incompatible with hex2pp). ## ## libp1pp (P1/P1pp.P1pp) is concatenated unconditionally so portable ## sources can use %fn, the control-flow macros, and libp1pp routines ## (sys_*, print*, parse_*, fmt_*, memcpy/memcmp, bump allocator, panic, -## %assert_*) without per-program plumbing. M0 has no link-time DCE, so -## programs that don't reference any libp1pp routine still pay a fixed -## code-size tax (~a few KB). +## %assert_*) without per-program plumbing. hex2pp has no link-time DCE, +## so programs that don't reference any libp1pp routine still pay a +## fixed code-size tax (~a few KB). ## ## Multiple <srcs> are concatenated in the order given. This is how ## libc-using executables compose: a typical chain is @@ -45,10 +51,10 @@ set -eu -# Per-stage tracing is always on. The stage0 tools (M0, hex2-0) print -# nothing on success and almost nothing on failure, so we narrate which -# step is running, snapshot intermediates to $WORK before exiting, and -# print a clear FAIL banner on error so the user knows where it died. +# Per-stage tracing is always on. M1pp / hex2pp print little on success +# and bail fast on error, so we narrate which step is running, snapshot +# intermediates to $WORK before exiting, and print a clear FAIL banner +# on error so the user knows where it died. ARCH_LBL=${ARCH:-?} CURRENT_STEP= trap 'rc=$? @@ -79,9 +85,9 @@ shift BACKEND=P1/P1-$ARCH.M1pp FRONTEND=P1/P1.M1pp LIBP1PP=P1/P1pp.P1pp -ELF_HDR=vendor/seed/$ARCH/ELF.hex2 -TOOLS=build/$ARCH/tools +ELF_HDR=P1/elf-$ARCH.hex2pp M1PP_BIN=build/$ARCH/M1pp/M1pp +HEX2PP_BIN=build/$ARCH/hex2pp/hex2pp if [ -n "${WORK_SUBPATH:-}" ]; then NAME=$WORK_SUBPATH else @@ -99,24 +105,13 @@ cat "$BACKEND" "$FRONTEND" "$LIBP1PP" "$@" > /tmp/combined.M1pp cp /tmp/combined.M1pp "$WORK/combined.M1pp" trace "combined.M1pp" /tmp/combined.M1pp -step "m1pp: combined.M1pp -> expanded.M1" -"$M1PP_BIN" /tmp/combined.M1pp /tmp/expanded.M1 -cp /tmp/expanded.M1 "$WORK/expanded.M1" -trace "expanded.M1" /tmp/expanded.M1 - -step "M0: expanded.M1 -> prog.hex2" -"$TOOLS/M0" /tmp/expanded.M1 /tmp/prog.hex2 -cp /tmp/prog.hex2 "$WORK/prog.hex2" -trace "prog.hex2" /tmp/prog.hex2 - -step "catm: ELF header + prog.hex2 -> linked.hex2" -cp "$ELF_HDR" /tmp/elf.hex2 -"$TOOLS/catm" /tmp/linked.hex2 /tmp/elf.hex2 /tmp/prog.hex2 -cp /tmp/linked.hex2 "$WORK/linked.hex2" -trace "linked.hex2" /tmp/linked.hex2 +step "M1pp: combined.M1pp -> expanded.hex2pp" +"$M1PP_BIN" /tmp/combined.M1pp /tmp/expanded.hex2pp +cp /tmp/expanded.hex2pp "$WORK/expanded.hex2pp" +trace "expanded.hex2pp" /tmp/expanded.hex2pp -step "hex2-0: linked.hex2 -> $OUT" -"$TOOLS/hex2-0" /tmp/linked.hex2 /tmp/prog.bin +step "hex2pp: ELF header + expanded.hex2pp -> $OUT" +"$HEX2PP_BIN" -f "$ELF_HDR" -f /tmp/expanded.hex2pp -o /tmp/prog.bin cp /tmp/prog.bin "$OUT" chmod 0700 "$OUT" trace "$OUT" "$OUT" diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh @@ -78,7 +78,19 @@ fail() { } ## --- m1pp suite --------------------------------------------------------- - +## +## Two-step check: +## 1. Run M1pp against tests/M1pp/<name>.M1pp; diff its text output +## against tests/M1pp/<name>.expected (parity with the C oracle). +## 2. Pipe that output through hex2pp as an assemble smoke test. The +## new M1pp emits bare hex consumable directly by hex2pp; this +## catches cases where M1pp produces parity-correct text that +## hex2pp can't actually parse (e.g. stray whitespace bugs, +## malformed sigil expressions). +## +## Both steps must pass for the fixture to PASS. The smoke-test step +## uses hex2pp's --non-executable mode and writes to a throwaway path +## — we only care about hex2pp's exit status, not the bytes. run_m1pp_suite() { if [ -z "$NAMES" ]; then NAMES=$(discover tests/M1pp M1pp) @@ -98,7 +110,7 @@ run_m1pp_suite() { expected_content=$(cat "$expected") label="[$ARCH] $name" - outfile=build/$ARCH/tests/M1pp/$name.M1 + outfile=build/$ARCH/tests/M1pp/$name.hex2pp mkdir -p "$(dirname "$outfile")" rm -f "$outfile" "./build/$ARCH/M1pp/M1pp" "$m1pp_src" "$outfile" >/dev/null 2>&1 || true @@ -108,12 +120,27 @@ run_m1pp_suite() { actual= fi - if [ "$actual" = "$expected_content" ]; then - report "$label" PASS - else + if [ "$actual" != "$expected_content" ]; then report "$label" FAIL show_diff "$expected_content" "$actual" + continue fi + + # Smoke test: feed M1pp's output through hex2pp. We don't run + # the resulting bytes (the fixture isn't a complete program), + # only verify hex2pp accepts the syntax. --non-executable + # skips the chmod on the throwaway output. + binfile=build/$ARCH/tests/M1pp/$name.bin + hex2pp_log=build/$ARCH/tests/M1pp/$name.hex2pp.log + rm -f "$binfile" "$hex2pp_log" + if ! "./build/$ARCH/hex2pp/hex2pp" --non-executable \ + -f "$outfile" -o "$binfile" \ + >"$hex2pp_log" 2>&1; then + fail "$label" "hex2pp smoke-test failed:" "$hex2pp_log" + continue + fi + + report "$label" PASS done } diff --git a/scripts/boot-undef.sh b/scripts/boot-undef.sh @@ -1,23 +1,23 @@ #!/bin/sh -## scripts/boot-undef.sh — list M1/hex2 references with no matching definition. +## scripts/boot-undef.sh — list hex2pp references with no matching definition. ## -## Cheap-and-cheerful linker-diagnostic for the live boot pipeline. M0 emits -## linked.hex2 in asm-style: `:label` defines, `&label` references. A symbol -## with refs but no def is unresolved — the same thing hex2-0 would flag, -## except hex2-0 only prints the first miss before bailing, so this dumps -## the full list. +## Cheap-and-cheerful linker-diagnostic for the live boot pipeline. M1pp +## emits expanded.hex2pp in asm-style: `:label` defines, `&label` (and +## other-sigil) references. A symbol with refs but no def is unresolved +## — the same thing hex2pp would flag, except hex2pp only prints the +## first miss before bailing, so this dumps the full list. ## -## Defaults to the linked.hex2 produced by the most recent +## Defaults to the expanded.hex2pp produced by the most recent ## `make tcc-boot2 ARCH=<arch>` build. Run that first if missing. ## ## Caveats: -## - Reads post-m1pp / post-M0 output, so %la(...) macro args are already +## - Reads post-m1pp output, so %la(...) macro args are already ## expanded. Running this on the raw .P1pp would miss them. ## - m1pp rewrites local labels (@body, @end, ...) to per-expansion suffixed ## names, so they appear under both refs and defs naturally. ## ## Usage: -## scripts/boot-undef.sh [--arch <aarch64|amd64|riscv64>] [<linked.hex2>] +## scripts/boot-undef.sh [--arch <aarch64|amd64|riscv64>] [<expanded.hex2pp>] set -eu @@ -34,7 +34,7 @@ while [ $# -gt 0 ]; do done ROOT=$(cd "$(dirname "$0")/.." && pwd) -: "${LINKED:=$ROOT/build/$ARCH/.work/tcc-boot2/tcc-boot2/linked.hex2}" +: "${LINKED:=$ROOT/build/$ARCH/.work/tcc-boot2/tcc-boot2/expanded.hex2pp}" [ -r "$LINKED" ] || { echo "missing $LINKED" >&2 diff --git a/scripts/boot2.sh b/scripts/boot2.sh @@ -1,11 +1,17 @@ #!/bin/sh ## boot2.sh — stage 2 of the bootstrap chain. ## -## In-container script. Builds the M1pp expander ELF from the -## checked-in pre-pruned P1 backend table (P1/P1-$ARCH.M1) plus their -## sources, by calling scripts/boot-build-p1.sh. +## In-container script. Builds the two self-hosted tools (M1pp expander +## ELF and hex2pp assembler/linker ELF) from their pure-P1 sources via +## the seed M0+hex2 pipeline (boot-build-p1.sh), using the checked-in +## pre-pruned P1 backend table (P1/P1-$ARCH.M1). ## ## Outputs: build/$ARCH/M1pp/M1pp +## build/$ARCH/hex2pp/hex2pp +## +## After this stage completes, the seed M0/hex2-0 tools are no longer +## used by any downstream target — every .P1pp source flows through +## M1pp + hex2pp instead (see scripts/boot-build-p1pp.sh). ## ## Env: ARCH=aarch64|amd64|riscv64 @@ -18,4 +24,5 @@ case "$ARCH" in *) echo "boot2.sh: unsupported arch '$ARCH'" >&2; exit 1 ;; esac -sh scripts/boot-build-p1.sh M1pp/M1pp.P1 build/$ARCH/M1pp/M1pp +sh scripts/boot-build-p1.sh M1pp/M1pp.P1 build/$ARCH/M1pp/M1pp +sh scripts/boot-build-p1.sh hex2pp/hex2pp.P1 build/$ARCH/hex2pp/hex2pp diff --git a/scripts/build-native-tools.sh b/scripts/build-native-tools.sh @@ -5,8 +5,9 @@ ## the requested tool; Make handles staleness. ## ## Tools (NOT in the bootstrap chain — fast host substitutes): -## M1, hex2 — built from upstream mescc-tools sources -## m1pp — built from M1pp/M1pp.c (the C oracle) +## M1, hex2 — built from upstream mescc-tools sources (legacy oracle) +## m1pp — built from M1pp/M1pp.c (the C oracle for the new M1pp) +## hex2pp — built from hex2pp/hex2pp.c (the C oracle for hex2++) ## ## Source lookup for M1/hex2 (first match wins): ## 1. $MESCC_TOOLS_SRC (direct override) @@ -17,11 +18,11 @@ ## external dep explicit. Set LIVE_BOOTSTRAP=<path> the same way ## scripts/diag-livebootstrap-qemu.sh does. ## -## Usage: scripts/build-native-tools.sh <M1|hex2|m1pp> +## Usage: scripts/build-native-tools.sh <M1|hex2|m1pp|hex2pp> set -eu -[ "$#" -eq 1 ] || { echo "usage: $0 <M1|hex2|m1pp>" >&2; exit 2; } +[ "$#" -eq 1 ] || { echo "usage: $0 <M1|hex2|m1pp|hex2pp>" >&2; exit 2; } TOOL=$1 REPO=$(cd "$(dirname "$0")/.." && pwd) @@ -74,6 +75,9 @@ case "$TOOL" in m1pp) $CC -O2 -std=c99 M1pp/M1pp.c -o "$OUT/m1pp" ;; + hex2pp) + $CC -O2 -std=c99 hex2pp/hex2pp.c -o "$OUT/hex2pp" + ;; *) echo "build-native-tools.sh: unknown tool '$TOOL'" >&2 exit 2