boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit 1c9de593e0c3ec9dcc53712e78aa6353f0aaf41a
parent 9507a3668cd1218bae1aa0f171e7991cc8768afd
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 15:35:09 -0700

Track A: stream stack + pool lifetime + process_tokens rewrite

Diffstat:
Mm1pp/m1pp.M1 | 380+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 306 insertions(+), 74 deletions(-)

diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1 @@ -769,102 +769,226 @@ DEFINE EXPR_INVALID 1100000000000000 ret ## --- Main processor ---------------------------------------------------------- -## Walks source_tokens[] in order. For each token: -## - line-start %macro -> call skip_macro_def (consumes through %endm) -## - TOK_NEWLINE -> emit_newline, set line_start -## - anything else -> emit_token, clear line_start -## Phases 4+ will insert macro-call detection, builtin calls, and %select -## between the %macro check and the newline check. - -## process_tokens(): pass-through with structural %macro skipping. +## Phase-3+ stream-driven loop. Pushes source_tokens as the initial stream, then +## drives the streams[] stack until it empties. Per iteration: +## pop the stream if exhausted, otherwise dispatch on the current token: +## - line-start %macro -> shim into define_macro via proc_pos +## - TOK_NEWLINE -> emit_newline, advance, set line_start = 1 +## - WORD + LPAREN follow + name in {! @ % $ %select} +## -> expand_builtin_call(s, tok) +## - find_macro(tok) hit + LPAREN follow +## -> expand_call(s, macro) +## - otherwise -> emit_token, advance, clear line_start +## +## Stack frame: enter_16 reserves two 8-byte slots so we can preserve the +## current Stream* (sp+16) and the current Token* (sp+24) across calls +## (a0..a3, t0..t2 are caller-saved). + +## process_tokens(): stream-driven main loop. :process_tokens - enter_0 + enter_16 - # proc_pos = &source_tokens; proc_line_start = 1 + # push_stream_span(source_tokens, source_end, -1) la_a0 &source_tokens - la_a1 &proc_pos - st_a0,a1,0 - la_a0 &proc_line_start - li_a1 %1 %0 - st_a1,a0,0 - -:process_loop - # tok = proc_pos; if (tok == source_end) done - la_a0 &proc_pos - ld_t0,a0,0 la_a1 &source_end - ld_t1,a1,0 - la_br &process_done + ld_a1,a1,0 + sub_a2,a2,a2 + addi_a2,a2,neg1 + la_br &push_stream_span + call + +:proc_loop + # s = current_stream(); if (s == 0) done + la_br &current_stream + call + la_br &proc_done + beqz_a0 + st_a0,sp,16 + + # if (s->pos == s->end) pop and continue + ld_t0,a0,16 + ld_t1,a0,8 + la_br &proc_pop_continue beq_t0,t1 - # if (!line_start) fall through to non-macro branch - la_a0 &proc_line_start - ld_t2,a0,0 - la_br &process_not_macro - beqz_t2 + # tok = s->pos + st_t0,sp,24 - # if (tok->kind != TOK_WORD) not a %macro line + # ---- line_start && tok->kind == TOK_WORD && tok eq "%macro" ---- + ld_a1,a0,24 + la_br &proc_check_newline + beqz_a1 ld_a1,t0,0 li_a2 TOK_WORD - la_br &process_not_macro + la_br &proc_check_newline bne_a1,a2 - - # if (!tok_eq_const(tok, "%macro", 6)) not a %macro line mov_a0,t0 la_a1 &const_macro li_a2 %6 %0 la_br &tok_eq_const call - la_br &process_not_macro + la_br &proc_check_newline beqz_a0 - # line-start %macro -> record the definition + # %macro: shim into define_macro through the proc_pos globals. + # define_macro reads/writes proc_pos and walks against source_end, + # so it only behaves correctly when s is the source stream — which + # holds in practice (line_start in expansion streams is cleared + # before any %macro could matter). After it returns we copy + # proc_pos back into s->pos and set s->line_start = 1. + ld_t0,sp,24 + la_a0 &proc_pos + st_t0,a0,0 + la_a0 &proc_line_start + li_a1 %1 %0 + st_a1,a0,0 la_br &define_macro call - la_br &process_loop + ld_a0,sp,16 + la_a1 &proc_pos + ld_t0,a1,0 + st_t0,a0,16 + li_t1 %1 %0 + st_t1,a0,24 + la_br &proc_loop b -:process_not_macro - # reload tok (registers clobbered by the tok_eq_const call above) - la_a0 &proc_pos - ld_t0,a0,0 +:proc_check_newline + # reload s, tok + ld_a0,sp,16 + ld_t0,sp,24 ld_a1,t0,0 - - # if (tok->kind != TOK_NEWLINE) emit it li_a2 TOK_NEWLINE - la_br &process_regular_token + la_br &proc_check_builtin bne_a1,a2 - # newline: emit_newline, proc_pos++, line_start = 1 + # newline: s->pos += 24; s->line_start = 1; emit_newline() + addi_t0,t0,24 + st_t0,a0,16 + li_t1 %1 %0 + st_t1,a0,24 la_br &emit_newline call - la_a0 &proc_pos - ld_t0,a0,0 - addi_t0,t0,24 - st_t0,a0,0 - la_a0 &proc_line_start - li_a1 %1 %0 - st_a1,a0,0 - la_br &process_loop + la_br &proc_loop b -:process_regular_token - # emit_token(tok); proc_pos++; line_start = 0 - la_a0 &proc_pos - ld_a0,a0,0 +:proc_check_builtin + # tok->kind == TOK_WORD && tok+1 < s->end && (tok+1)->kind == TOK_LPAREN ? + ld_a0,sp,16 + ld_t0,sp,24 + ld_a1,t0,0 + li_a2 TOK_WORD + la_br &proc_check_macro + bne_a1,a2 + addi_t1,t0,24 + ld_a1,a0,8 + la_br &proc_check_builtin_has_next + blt_t1,a1 + la_br &proc_check_macro + b +:proc_check_builtin_has_next + ld_a1,t1,0 + li_a2 TOK_LPAREN + la_br &proc_check_macro + bne_a1,a2 + + # try the five builtin names: ! @ % $ %select + mov_a0,t0 + la_a1 &const_bang + li_a2 %1 %0 + la_br &tok_eq_const + call + la_br &proc_do_builtin + bnez_a0 + ld_a0,sp,24 + la_a1 &const_at + li_a2 %1 %0 + la_br &tok_eq_const + call + la_br &proc_do_builtin + bnez_a0 + ld_a0,sp,24 + la_a1 &const_pct + li_a2 %1 %0 + la_br &tok_eq_const + call + la_br &proc_do_builtin + bnez_a0 + ld_a0,sp,24 + la_a1 &const_dlr + li_a2 %1 %0 + la_br &tok_eq_const + call + la_br &proc_do_builtin + bnez_a0 + ld_a0,sp,24 + la_a1 &const_select + li_a2 %7 %0 + la_br &tok_eq_const + call + la_br &proc_do_builtin + bnez_a0 + la_br &proc_check_macro + b + +:proc_do_builtin + # expand_builtin_call(s, tok) + ld_a0,sp,16 + ld_a1,sp,24 + la_br &expand_builtin_call + call + la_br &proc_loop + b + +:proc_check_macro + # macro = find_macro(tok); if non-zero AND tok+1 < s->end AND (tok+1)->kind == TOK_LPAREN: expand_call + ld_a0,sp,24 + la_br &find_macro + call + la_br &proc_emit + beqz_a0 + mov_t2,a0 + ld_a0,sp,16 + ld_t0,sp,24 + addi_t1,t0,24 + ld_a1,a0,8 + la_br &proc_macro_has_next + blt_t1,a1 + la_br &proc_emit + b +:proc_macro_has_next + ld_a1,t1,0 + li_a2 TOK_LPAREN + la_br &proc_emit + bne_a1,a2 + ld_a0,sp,16 + mov_a1,t2 + la_br &expand_call + call + la_br &proc_loop + b + +:proc_emit + # emit_token(tok); s->pos += 24; s->line_start = 0 + ld_a0,sp,24 la_br &emit_token call - la_a0 &proc_pos - ld_t0,a0,0 + ld_a0,sp,16 + ld_t0,a0,16 addi_t0,t0,24 - st_t0,a0,0 - la_a0 &proc_line_start - li_a1 %0 %0 - st_a1,a0,0 - la_br &process_loop + st_t0,a0,16 + li_t1 %0 %0 + st_t1,a0,24 + la_br &proc_loop b -:process_done +:proc_pop_continue + la_br &pop_stream + call + la_br &proc_loop + b + +:proc_done leave ret @@ -1199,41 +1323,144 @@ DEFINE EXPR_INVALID 1100000000000000 ## Push Stream { start = pos = a0, end = a1, line_start = 1, pool_mark = a2 } ## onto streams[]. Bumps stream_top. pool_mark is a byte offset into ## expand_pool, or -1 for a source-owned stream (pop_stream won't rewind). -## Reads/writes: streams, stream_top. Oracle: m1pp.c:push_stream_span. +## +## stream_top is maintained as a byte offset into streams[] (count * 40), +## matching the running-tail-pointer pattern used by source_end / macros_end. +## Reads/writes: streams, stream_top. Leaf. Oracle: m1pp.c:push_stream_span. :push_stream_span - la_br &err_not_implemented - b + # new_top = stream_top + STREAM_SIZE; if (cap < new_top) fatal + la_t0 &stream_top + ld_t1,t0,0 + li_t2 M1PP_STREAM_SIZE + add_t2,t1,t2 + li_a3 M1PP_STREAM_STACK_CAP + la_br &err_token_overflow + blt_a3,t2 + + # s = &streams[stream_top] + la_a3 &streams + add_a3,a3,t1 + + # s->start = a0; s->end = a1; s->pos = a0; s->line_start = 1; s->pool_mark = a2 + st_a0,a3,0 + st_a1,a3,8 + st_a0,a3,16 + li_t1 %1 %0 + st_t1,a3,24 + st_a2,a3,32 + + # stream_top = new_top + st_t2,t0,0 + ret ## current_stream() -> a0 = &streams[stream_top-1], or 0 if empty. Leaf. +## stream_top is a byte offset, so &streams[top-1] = streams + stream_top - 40. ## Reads: streams, stream_top. Oracle: m1pp.c:current_stream. :current_stream - la_br &err_not_implemented - b + la_a0 &stream_top + ld_t0,a0,0 + la_br &current_stream_empty + beqz_t0 + la_a0 &streams + add_a0,a0,t0 + li_t1 M1PP_STREAM_SIZE + sub_a0,a0,t1 + ret +:current_stream_empty + li_a0 %0 %0 + ret ## pop_stream() -> void. Leaf. ## Decrement stream_top. If the popped stream's pool_mark >= 0, restore ## pool_used = pool_mark (reclaim the expansion-pool space it used). ## Reads/writes: streams, stream_top, pool_used. Oracle: m1pp.c:pop_stream. :pop_stream - la_br &err_not_implemented - b + la_a0 &stream_top + ld_t0,a0,0 + la_br &pop_stream_done + beqz_t0 + li_t1 M1PP_STREAM_SIZE + sub_t0,t0,t1 + st_t0,a0,0 + + # mark = popped->pool_mark + la_a1 &streams + add_a1,a1,t0 + ld_t0,a1,32 + + # if (mark < 0) skip; else pool_used = mark + la_br &pop_stream_done + bltz_t0 + la_a1 &pool_used + st_t0,a1,0 +:pop_stream_done + ret ## copy_span_to_pool(a0=start_tok, a1=end_tok) -> void (fatal on pool overflow) ## Append each 24-byte Token in [start, end) to expand_pool at pool_used, ## advancing pool_used accordingly. -## Reads/writes: expand_pool, pool_used. Oracle: m1pp.c:copy_span_to_pool. +## Reads/writes: expand_pool, pool_used. Leaf. +## Oracle: m1pp.c:copy_span_to_pool. :copy_span_to_pool - la_br &err_not_implemented +:cstp_loop + # if (start == end) done + la_br &cstp_done + beq_a0,a1 + + # bounds: pool_used + 24 must fit in EXPAND_CAP + la_a2 &pool_used + ld_t0,a2,0 + addi_t1,t0,24 + li_t2 M1PP_EXPAND_CAP + la_br &err_token_overflow + blt_t2,t1 + + # dst = &expand_pool[pool_used] + la_a3 &expand_pool + add_a3,a3,t0 + + # copy 24 bytes (3 × u64) + ld_t1,a0,0 + st_t1,a3,0 + ld_t1,a0,8 + st_t1,a3,8 + ld_t1,a0,16 + st_t1,a3,16 + + # pool_used += 24; start += 24 + addi_t0,t0,24 + st_t0,a2,0 + addi_a0,a0,24 + la_br &cstp_loop b +:cstp_done + ret ## push_pool_stream_from_mark(a0=mark) -> void (fatal on overflow) ## If pool_used == mark (empty expansion), do nothing and return. ## Otherwise push_stream_span(expand_pool+mark, expand_pool+pool_used, mark). -## Reads/writes: expand_pool, pool_used, streams, stream_top. +## Reads/writes: expand_pool, pool_used, streams, stream_top. Non-leaf: +## needs a frame so the call to push_stream_span doesn't clobber LR. ## Oracle: m1pp.c:push_pool_stream_from_mark. :push_pool_stream_from_mark - la_br &err_not_implemented - b + enter_0 + # if (pool_used == mark) return + la_a1 &pool_used + ld_t0,a1,0 + la_br &ppsfm_done + beq_t0,a0 + + # push_stream_span(expand_pool+mark, expand_pool+pool_used, mark) + la_a2 &expand_pool + mov_t1,a0 + add_a0,a2,a0 + add_a1,a2,t0 + mov_a2,t1 + la_br &push_stream_span + call +:ppsfm_done + leave + ret ## ============================================================================ ## --- Phase 4 STUB: argument parsing ------------------------------------------ @@ -1578,6 +1805,11 @@ DEFINE EXPR_INVALID 1100000000000000 :const_lparen "(" :const_rparen ")" :const_comma "," +:const_bang "!" +:const_at "@" +:const_pct "%" +:const_dlr "$" +:const_select "%select" :msg_prefix "m1pp: " :msg_newline "