Track A: stream stack + pool lifetime + process_tokens rewrite - boot2

commit 1c9de593e0c3ec9dcc53712e78aa6353f0aaf41a
parent 9507a3668cd1218bae1aa0f171e7991cc8768afd
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 15:35:09 -0700

Track A: stream stack + pool lifetime + process_tokens rewrite

Diffstat:
M m1pp/m1pp.M1  | 380 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------

1 file changed, 306 insertions(+), 74 deletions(-)
diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1
@@ -769,102 +769,226 @@ DEFINE EXPR_INVALID 1100000000000000
     ret
 
 ## --- Main processor ----------------------------------------------------------
-## Walks source_tokens[] in order. For each token:
-##   - line-start %macro    -> call skip_macro_def (consumes through %endm)
-##   - TOK_NEWLINE          -> emit_newline, set line_start
-##   - anything else        -> emit_token, clear line_start
-## Phases 4+ will insert macro-call detection, builtin calls, and %select
-## between the %macro check and the newline check.
-
-## process_tokens(): pass-through with structural %macro skipping.
+## Phase-3+ stream-driven loop. Pushes source_tokens as the initial stream, then
+## drives the streams[] stack until it empties. Per iteration:
+##   pop the stream if exhausted, otherwise dispatch on the current token:
+##   - line-start %macro      -> shim into define_macro via proc_pos
+##   - TOK_NEWLINE            -> emit_newline, advance, set line_start = 1
+##   - WORD + LPAREN follow + name in {! @ % $ %select}
+##                            -> expand_builtin_call(s, tok)
+##   - find_macro(tok) hit + LPAREN follow
+##                            -> expand_call(s, macro)
+##   - otherwise              -> emit_token, advance, clear line_start
+##
+## Stack frame: enter_16 reserves two 8-byte slots so we can preserve the
+## current Stream* (sp+16) and the current Token* (sp+24) across calls
+## (a0..a3, t0..t2 are caller-saved).
+
+## process_tokens(): stream-driven main loop.
 :process_tokens
-    enter_0
+    enter_16
 
-    # proc_pos = &source_tokens; proc_line_start = 1
+    # push_stream_span(source_tokens, source_end, -1)
     la_a0 &source_tokens
-    la_a1 &proc_pos
-    st_a0,a1,0
-    la_a0 &proc_line_start
-    li_a1 %1 %0
-    st_a1,a0,0
-
-:process_loop
-    # tok = proc_pos;  if (tok == source_end) done
-    la_a0 &proc_pos
-    ld_t0,a0,0
     la_a1 &source_end
-    ld_t1,a1,0
-    la_br &process_done
+    ld_a1,a1,0
+    sub_a2,a2,a2
+    addi_a2,a2,neg1
+    la_br &push_stream_span
+    call
+
+:proc_loop
+    # s = current_stream();  if (s == 0) done
+    la_br &current_stream
+    call
+    la_br &proc_done
+    beqz_a0
+    st_a0,sp,16
+
+    # if (s->pos == s->end) pop and continue
+    ld_t0,a0,16
+    ld_t1,a0,8
+    la_br &proc_pop_continue
     beq_t0,t1
 
-    # if (!line_start) fall through to non-macro branch
-    la_a0 &proc_line_start
-    ld_t2,a0,0
-    la_br &process_not_macro
-    beqz_t2
+    # tok = s->pos
+    st_t0,sp,24
 
-    # if (tok->kind != TOK_WORD) not a %macro line
+    # ---- line_start && tok->kind == TOK_WORD && tok eq "%macro" ----
+    ld_a1,a0,24
+    la_br &proc_check_newline
+    beqz_a1
     ld_a1,t0,0
     li_a2 TOK_WORD
-    la_br &process_not_macro
+    la_br &proc_check_newline
     bne_a1,a2
-
-    # if (!tok_eq_const(tok, "%macro", 6)) not a %macro line
     mov_a0,t0
     la_a1 &const_macro
     li_a2 %6 %0
     la_br &tok_eq_const
     call
-    la_br &process_not_macro
+    la_br &proc_check_newline
     beqz_a0
 
-    # line-start %macro -> record the definition
+    # %macro: shim into define_macro through the proc_pos globals.
+    # define_macro reads/writes proc_pos and walks against source_end,
+    # so it only behaves correctly when s is the source stream — which
+    # holds in practice (line_start in expansion streams is cleared
+    # before any %macro could matter). After it returns we copy
+    # proc_pos back into s->pos and set s->line_start = 1.
+    ld_t0,sp,24
+    la_a0 &proc_pos
+    st_t0,a0,0
+    la_a0 &proc_line_start
+    li_a1 %1 %0
+    st_a1,a0,0
     la_br &define_macro
     call
-    la_br &process_loop
+    ld_a0,sp,16
+    la_a1 &proc_pos
+    ld_t0,a1,0
+    st_t0,a0,16
+    li_t1 %1 %0
+    st_t1,a0,24
+    la_br &proc_loop
     b
 
-:process_not_macro
-    # reload tok (registers clobbered by the tok_eq_const call above)
-    la_a0 &proc_pos
-    ld_t0,a0,0
+:proc_check_newline
+    # reload s, tok
+    ld_a0,sp,16
+    ld_t0,sp,24
     ld_a1,t0,0
-
-    # if (tok->kind != TOK_NEWLINE) emit it
     li_a2 TOK_NEWLINE
-    la_br &process_regular_token
+    la_br &proc_check_builtin
     bne_a1,a2
 
-    # newline: emit_newline, proc_pos++, line_start = 1
+    # newline: s->pos += 24; s->line_start = 1; emit_newline()
+    addi_t0,t0,24
+    st_t0,a0,16
+    li_t1 %1 %0
+    st_t1,a0,24
     la_br &emit_newline
     call
-    la_a0 &proc_pos
-    ld_t0,a0,0
-    addi_t0,t0,24
-    st_t0,a0,0
-    la_a0 &proc_line_start
-    li_a1 %1 %0
-    st_a1,a0,0
-    la_br &process_loop
+    la_br &proc_loop
     b
 
-:process_regular_token
-    # emit_token(tok); proc_pos++; line_start = 0
-    la_a0 &proc_pos
-    ld_a0,a0,0
+:proc_check_builtin
+    # tok->kind == TOK_WORD && tok+1 < s->end && (tok+1)->kind == TOK_LPAREN ?
+    ld_a0,sp,16
+    ld_t0,sp,24
+    ld_a1,t0,0
+    li_a2 TOK_WORD
+    la_br &proc_check_macro
+    bne_a1,a2
+    addi_t1,t0,24
+    ld_a1,a0,8
+    la_br &proc_check_builtin_has_next
+    blt_t1,a1
+    la_br &proc_check_macro
+    b
+:proc_check_builtin_has_next
+    ld_a1,t1,0
+    li_a2 TOK_LPAREN
+    la_br &proc_check_macro
+    bne_a1,a2
+
+    # try the five builtin names: ! @ % $ %select
+    mov_a0,t0
+    la_a1 &const_bang
+    li_a2 %1 %0
+    la_br &tok_eq_const
+    call
+    la_br &proc_do_builtin
+    bnez_a0
+    ld_a0,sp,24
+    la_a1 &const_at
+    li_a2 %1 %0
+    la_br &tok_eq_const
+    call
+    la_br &proc_do_builtin
+    bnez_a0
+    ld_a0,sp,24
+    la_a1 &const_pct
+    li_a2 %1 %0
+    la_br &tok_eq_const
+    call
+    la_br &proc_do_builtin
+    bnez_a0
+    ld_a0,sp,24
+    la_a1 &const_dlr
+    li_a2 %1 %0
+    la_br &tok_eq_const
+    call
+    la_br &proc_do_builtin
+    bnez_a0
+    ld_a0,sp,24
+    la_a1 &const_select
+    li_a2 %7 %0
+    la_br &tok_eq_const
+    call
+    la_br &proc_do_builtin
+    bnez_a0
+    la_br &proc_check_macro
+    b
+
+:proc_do_builtin
+    # expand_builtin_call(s, tok)
+    ld_a0,sp,16
+    ld_a1,sp,24
+    la_br &expand_builtin_call
+    call
+    la_br &proc_loop
+    b
+
+:proc_check_macro
+    # macro = find_macro(tok); if non-zero AND tok+1 < s->end AND (tok+1)->kind == TOK_LPAREN: expand_call
+    ld_a0,sp,24
+    la_br &find_macro
+    call
+    la_br &proc_emit
+    beqz_a0
+    mov_t2,a0
+    ld_a0,sp,16
+    ld_t0,sp,24
+    addi_t1,t0,24
+    ld_a1,a0,8
+    la_br &proc_macro_has_next
+    blt_t1,a1
+    la_br &proc_emit
+    b
+:proc_macro_has_next
+    ld_a1,t1,0
+    li_a2 TOK_LPAREN
+    la_br &proc_emit
+    bne_a1,a2
+    ld_a0,sp,16
+    mov_a1,t2
+    la_br &expand_call
+    call
+    la_br &proc_loop
+    b
+
+:proc_emit
+    # emit_token(tok); s->pos += 24; s->line_start = 0
+    ld_a0,sp,24
     la_br &emit_token
     call
-    la_a0 &proc_pos
-    ld_t0,a0,0
+    ld_a0,sp,16
+    ld_t0,a0,16
     addi_t0,t0,24
-    st_t0,a0,0
-    la_a0 &proc_line_start
-    li_a1 %0 %0
-    st_a1,a0,0
-    la_br &process_loop
+    st_t0,a0,16
+    li_t1 %0 %0
+    st_t1,a0,24
+    la_br &proc_loop
     b
 
-:process_done
+:proc_pop_continue
+    la_br &pop_stream
+    call
+    la_br &proc_loop
+    b
+
+:proc_done
     leave
     ret
 
@@ -1199,41 +1323,144 @@ DEFINE EXPR_INVALID 1100000000000000
 ## Push Stream { start = pos = a0, end = a1, line_start = 1, pool_mark = a2 }
 ## onto streams[]. Bumps stream_top. pool_mark is a byte offset into
 ## expand_pool, or -1 for a source-owned stream (pop_stream won't rewind).
-## Reads/writes: streams, stream_top. Oracle: m1pp.c:push_stream_span.
+##
+## stream_top is maintained as a byte offset into streams[] (count * 40),
+## matching the running-tail-pointer pattern used by source_end / macros_end.
+## Reads/writes: streams, stream_top. Leaf. Oracle: m1pp.c:push_stream_span.
 :push_stream_span
-    la_br &err_not_implemented
-    b
+    # new_top = stream_top + STREAM_SIZE; if (cap < new_top) fatal
+    la_t0 &stream_top
+    ld_t1,t0,0
+    li_t2 M1PP_STREAM_SIZE
+    add_t2,t1,t2
+    li_a3 M1PP_STREAM_STACK_CAP
+    la_br &err_token_overflow
+    blt_a3,t2
+
+    # s = &streams[stream_top]
+    la_a3 &streams
+    add_a3,a3,t1
+
+    # s->start = a0; s->end = a1; s->pos = a0; s->line_start = 1; s->pool_mark = a2
+    st_a0,a3,0
+    st_a1,a3,8
+    st_a0,a3,16
+    li_t1 %1 %0
+    st_t1,a3,24
+    st_a2,a3,32
+
+    # stream_top = new_top
+    st_t2,t0,0
+    ret
 
 ## current_stream() -> a0 = &streams[stream_top-1], or 0 if empty. Leaf.
+## stream_top is a byte offset, so &streams[top-1] = streams + stream_top - 40.
 ## Reads: streams, stream_top. Oracle: m1pp.c:current_stream.
 :current_stream
-    la_br &err_not_implemented
-    b
+    la_a0 &stream_top
+    ld_t0,a0,0
+    la_br &current_stream_empty
+    beqz_t0
+    la_a0 &streams
+    add_a0,a0,t0
+    li_t1 M1PP_STREAM_SIZE
+    sub_a0,a0,t1
+    ret
+:current_stream_empty
+    li_a0 %0 %0
+    ret
 
 ## pop_stream() -> void. Leaf.
 ## Decrement stream_top. If the popped stream's pool_mark >= 0, restore
 ## pool_used = pool_mark (reclaim the expansion-pool space it used).
 ## Reads/writes: streams, stream_top, pool_used. Oracle: m1pp.c:pop_stream.
 :pop_stream
-    la_br &err_not_implemented
-    b
+    la_a0 &stream_top
+    ld_t0,a0,0
+    la_br &pop_stream_done
+    beqz_t0
+    li_t1 M1PP_STREAM_SIZE
+    sub_t0,t0,t1
+    st_t0,a0,0
+
+    # mark = popped->pool_mark
+    la_a1 &streams
+    add_a1,a1,t0
+    ld_t0,a1,32
+
+    # if (mark < 0) skip; else pool_used = mark
+    la_br &pop_stream_done
+    bltz_t0
+    la_a1 &pool_used
+    st_t0,a1,0
+:pop_stream_done
+    ret
 
 ## copy_span_to_pool(a0=start_tok, a1=end_tok) -> void (fatal on pool overflow)
 ## Append each 24-byte Token in [start, end) to expand_pool at pool_used,
 ## advancing pool_used accordingly.
-## Reads/writes: expand_pool, pool_used. Oracle: m1pp.c:copy_span_to_pool.
+## Reads/writes: expand_pool, pool_used. Leaf.
+## Oracle: m1pp.c:copy_span_to_pool.
 :copy_span_to_pool
-    la_br &err_not_implemented
+:cstp_loop
+    # if (start == end) done
+    la_br &cstp_done
+    beq_a0,a1
+
+    # bounds: pool_used + 24 must fit in EXPAND_CAP
+    la_a2 &pool_used
+    ld_t0,a2,0
+    addi_t1,t0,24
+    li_t2 M1PP_EXPAND_CAP
+    la_br &err_token_overflow
+    blt_t2,t1
+
+    # dst = &expand_pool[pool_used]
+    la_a3 &expand_pool
+    add_a3,a3,t0
+
+    # copy 24 bytes (3 × u64)
+    ld_t1,a0,0
+    st_t1,a3,0
+    ld_t1,a0,8
+    st_t1,a3,8
+    ld_t1,a0,16
+    st_t1,a3,16
+
+    # pool_used += 24; start += 24
+    addi_t0,t0,24
+    st_t0,a2,0
+    addi_a0,a0,24
+    la_br &cstp_loop
     b
+:cstp_done
+    ret
 
 ## push_pool_stream_from_mark(a0=mark) -> void (fatal on overflow)
 ## If pool_used == mark (empty expansion), do nothing and return.
 ## Otherwise push_stream_span(expand_pool+mark, expand_pool+pool_used, mark).
-## Reads/writes: expand_pool, pool_used, streams, stream_top.
+## Reads/writes: expand_pool, pool_used, streams, stream_top. Non-leaf:
+## needs a frame so the call to push_stream_span doesn't clobber LR.
 ## Oracle: m1pp.c:push_pool_stream_from_mark.
 :push_pool_stream_from_mark
-    la_br &err_not_implemented
-    b
+    enter_0
+    # if (pool_used == mark) return
+    la_a1 &pool_used
+    ld_t0,a1,0
+    la_br &ppsfm_done
+    beq_t0,a0
+
+    # push_stream_span(expand_pool+mark, expand_pool+pool_used, mark)
+    la_a2 &expand_pool
+    mov_t1,a0
+    add_a0,a2,a0
+    add_a1,a2,t0
+    mov_a2,t1
+    la_br &push_stream_span
+    call
+:ppsfm_done
+    leave
+    ret
 
 ## ============================================================================
 ## --- Phase 4 STUB: argument parsing ------------------------------------------
@@ -1578,6 +1805,11 @@ DEFINE EXPR_INVALID 1100000000000000
 :const_lparen "("
 :const_rparen ")"
 :const_comma ","
+:const_bang "!"
+:const_at "@"
+:const_pct "%"
+:const_dlr "$"
+:const_select "%select"
 
 :msg_prefix "m1pp: "
 :msg_newline "

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs