commit e0a71b2419b2d73e5e8c8c5b453e290c69df7d54
parent 1c9de593e0c3ec9dcc53712e78aa6353f0aaf41a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 15:35:28 -0700
Merge branch 'worktree-agent-a55e267523e077156' into integrate-m1pp
Diffstat:
| M | m1pp/m1pp.M1 | | | 277 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- |
1 file changed, 271 insertions(+), 6 deletions(-)
diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1
@@ -1548,24 +1548,248 @@ DEFINE EXPR_INVALID 1100000000000000
## ============================================================================
## append_pasted_token(a0=dst_tok, a1=left_tok, a2=right_tok) -> void (fatal)
-## Concatenate left->text and right->text into text_buf via append_text and
-## write *dst = { TOK_WORD, new_span }. Practical length limit: fit in the
-## implementation's working buffer (oracle uses 512 bytes).
+## Concatenate left->text and right->text into paste_scratch, then call
+## append_text(&paste_scratch, total_len) for stable storage in text_buf,
+## and write *dst = { TOK_WORD, text_ptr, total_len }. The C oracle uses a
+## 512-byte tmp buffer; we use 256 to fit M0's quoted-literal cap. Fatal
+## (err_text_overflow) if combined length exceeds 256 bytes; append_text
+## handles its own text_buf overflow check.
## Oracle: m1pp.c:append_pasted_token.
:append_pasted_token
- la_br &err_not_implemented
+ enter_0
+
+ # ---- Spill all three operands to BSS so we can survive append_text. ----
+ la_t0 &paste_dst_save
+ st_a0,t0,0
+ la_t0 &paste_left_ptr
+ ld_t1,a1,8
+ st_t1,t0,0
+ la_t0 &paste_left_len
+ ld_t1,a1,16
+ st_t1,t0,0
+ la_t0 &paste_right_ptr
+ ld_t1,a2,8
+ st_t1,t0,0
+ la_t0 &paste_right_len
+ ld_t1,a2,16
+ st_t1,t0,0
+
+ # ---- total_len = left.len + right.len; fatal if > 256 ----
+ la_t0 &paste_left_len
+ ld_t1,t0,0
+ la_t0 &paste_right_len
+ ld_t2,t0,0
+ add_a0,t1,t2
+ li_a1 %256 %0
+ la_br &err_text_overflow
+ blt_a1,a0
+ # save total_len for the append_text call below
+ la_t0 &paste_total_len
+ st_a0,t0,0
+
+ # ---- Copy left bytes: paste_scratch[0..left.len) <- left.text_ptr ----
+ la_t0 &paste_left_ptr
+ ld_t0,t0,0
+ la_t1 &paste_left_len
+ ld_t1,t1,0
+ la_t2 &paste_scratch
+ li_a0 %0 %0
+:append_pasted_left_loop
+ la_br &append_pasted_left_done
+ beq_a0,t1
+ add_a1,t0,a0
+ lb_a1,a1,0
+ add_a2,t2,a0
+ sb_a1,a2,0
+ addi_a0,a0,1
+ la_br &append_pasted_left_loop
+ b
+:append_pasted_left_done
+
+ # ---- Copy right bytes: paste_scratch[left.len..total_len) <- right.text_ptr ----
+ la_t0 &paste_right_ptr
+ ld_t0,t0,0
+ la_t1 &paste_right_len
+ ld_t1,t1,0
+ la_t2 &paste_scratch
+ la_a3 &paste_left_len
+ ld_a3,a3,0
+ add_t2,t2,a3 # t2 = &paste_scratch[left.len]
+ li_a0 %0 %0
+:append_pasted_right_loop
+ la_br &append_pasted_right_done
+ beq_a0,t1
+ add_a1,t0,a0
+ lb_a1,a1,0
+ add_a2,t2,a0
+ sb_a1,a2,0
+ addi_a0,a0,1
+ la_br &append_pasted_right_loop
b
+:append_pasted_right_done
+
+ # ---- text_ptr = append_text(&paste_scratch, total_len) ----
+ la_a0 &paste_scratch
+ la_a1 &paste_total_len
+ ld_a1,a1,0
+ la_br &append_text
+ call
+ # a0 = text_ptr (returned)
+
+ # ---- *dst = { TOK_WORD, text_ptr, total_len } ----
+ la_t0 &paste_dst_save
+ ld_t0,t0,0
+ li_a2 TOK_WORD
+ st_a2,t0,0
+ st_a0,t0,8
+ la_a1 &paste_total_len
+ ld_a1,a1,0
+ st_a1,t0,16
+
+ leave
+ ret
## paste_pool_range(a0=mark) -> void (fatal on bad paste)
## In-place compactor over expand_pool[mark..pool_used). For each TOK_PASTE,
## paste (prev, next) into prev via append_pasted_token and skip both the
## PASTE and the next token. Copy other tokens forward. Update pool_used to
-## the new end. Fatal if ## is first, last, or adjacent to NEWLINE/PASTE.
+## the new end. Fatal (err_bad_macro_header — closest "bad input" label) if
+## ## is first, last, or adjacent to NEWLINE/PASTE.
## Oracle: m1pp.c:paste_pool_range.
:paste_pool_range
- la_br &err_not_implemented
+ enter_0
+
+ # ---- start = expand_pool + mark ----
+ la_t0 &expand_pool
+ add_t0,t0,a0
+ la_t1 &paste_start
+ st_t0,t1,0
+ # paste_in = start
+ la_t1 &paste_in
+ st_t0,t1,0
+ # paste_out = start
+ la_t1 &paste_out
+ st_t0,t1,0
+
+ # ---- end = expand_pool + pool_used ----
+ la_t1 &pool_used
+ ld_t2,t1,0
+ la_t1 &expand_pool
+ add_t2,t1,t2
+ la_t1 &paste_end
+ st_t2,t1,0
+
+:paste_pool_loop
+ # in = paste_in; end = paste_end; if (in == end) done
+ la_a0 &paste_in
+ ld_t0,a0,0
+ la_a1 &paste_end
+ ld_t1,a1,0
+ la_br &paste_pool_done
+ beq_t0,t1
+
+ # kind = in->kind
+ ld_a2,t0,0
+ li_a3 TOK_PASTE
+ la_br &paste_pool_handle_paste
+ beq_a2,a3
+
+ # ---- non-PASTE: copy *in to *out, advance both by 24 ----
+ la_a0 &paste_out
+ ld_t2,a0,0
+ # if (in == out) skip the copy
+ la_br &paste_pool_skip_copy
+ beq_t0,t2
+ ld_a3,t0,0
+ st_a3,t2,0
+ ld_a3,t0,8
+ st_a3,t2,8
+ ld_a3,t0,16
+ st_a3,t2,16
+:paste_pool_skip_copy
+ addi_t0,t0,24
+ addi_t2,t2,24
+ la_a0 &paste_in
+ st_t0,a0,0
+ la_a0 &paste_out
+ st_t2,a0,0
+ la_br &paste_pool_loop
+ b
+
+:paste_pool_handle_paste
+ # ---- TOK_PASTE handling ----
+ # Validate:
+ # out == start -> ## is first (fatal)
+ la_a0 &paste_out
+ ld_t1,a0,0
+ la_a1 &paste_start
+ ld_t2,a1,0
+ la_br &err_bad_macro_header
+ beq_t1,t2
+
+ # in+1 >= end -> ## is last (fatal)
+ # Equivalent: in+24 >= end, i.e. !(in+24 < end).
+ addi_t0,t0,24 # t0 = in + 24 (right operand ptr)
+ la_a1 &paste_end
+ ld_t2,a1,0
+ # fatal if (in+1) >= end, i.e. if (in+24) >= end. blt branches when
+ # left < right, so branch over fatal when (in+24) < end.
+ la_br &paste_pool_paste_right_in_range
+ blt_t0,t2
+ la_br &err_bad_macro_header
+ b
+:paste_pool_paste_right_in_range
+ # t0 currently = in+24 (right operand)
+ # Validate (out-1)->kind not in {NEWLINE, PASTE}.
+ # out is in t1; out-1 = t1 - 24. (out-1)->kind = *(t1-24+0).
+ # Use mem offset: ld with offset NEG24.
+ ld_a2,t1,neg24
+ li_a3 TOK_NEWLINE
+ la_br &err_bad_macro_header
+ beq_a2,a3
+ li_a3 TOK_PASTE
+ la_br &err_bad_macro_header
+ beq_a2,a3
+
+ # Validate (in+1)->kind not in {NEWLINE, PASTE}.
+ # t0 = in+24 (right operand), so kind = *(t0+0).
+ ld_a2,t0,0
+ li_a3 TOK_NEWLINE
+ la_br &err_bad_macro_header
+ beq_a2,a3
+ li_a3 TOK_PASTE
+ la_br &err_bad_macro_header
+ beq_a2,a3
+
+ # ---- append_pasted_token(out-1, out-1, in+1) ----
+ # t1 = out, t0 = in+1 (right operand).
+ addi_t1,t1,neg24 # t1 = out - 1 (left = dst)
+ mov_a0,t1
+ mov_a1,t1
+ mov_a2,t0
+ la_br &append_pasted_token
+ call
+
+ # in += 48 (skip ## and the right operand). Out is unchanged.
+ la_a0 &paste_in
+ ld_t0,a0,0
+ addi_t0,t0,48
+ st_t0,a0,0
+
+ la_br &paste_pool_loop
b
+:paste_pool_done
+ # pool_used = (out - expand_pool)
+ la_a0 &paste_out
+ ld_t0,a0,0
+ la_a1 &expand_pool
+ sub_t0,t0,a1
+ la_a1 &pool_used
+ st_t0,a1,0
+ leave
+ ret
+
## ============================================================================
## --- Phase 7 STUBS: integer atoms + S-expression evaluator -------------------
## ============================================================================
@@ -1912,6 +2136,47 @@ ZERO8
:eval_value
ZERO8
+## Phase 6 paste-pass spill slots. Both append_pasted_token and
+## paste_pool_range call other functions, so all locals must round-trip
+## through BSS across the call.
+## paste_dst_save — dst Token* spilled across append_text
+## paste_left_ptr/_len, paste_right_ptr/_len — operand spans for the
+## byte-copy loops in append_pasted_token
+## paste_total_len — left.len + right.len, reused after append_text
+## paste_start — expand_pool + mark; needed to detect "## is first"
+## after registers are clobbered by append_pasted_token
+## paste_in — current read cursor (Token*)
+## paste_out — current write cursor (Token*)
+## paste_end — exclusive end (Token*), = expand_pool + pool_used
+:paste_dst_save
+ZERO8
+:paste_left_ptr
+ZERO8
+:paste_left_len
+ZERO8
+:paste_right_ptr
+ZERO8
+:paste_right_len
+ZERO8
+:paste_total_len
+ZERO8
+:paste_start
+ZERO8
+:paste_in
+ZERO8
+:paste_out
+ZERO8
+:paste_end
+ZERO8
+
+## paste_scratch — 256-byte working buffer for append_pasted_token.
+## We assemble left.text ++ right.text here, then call
+## append_text(&paste_scratch, total_len) to copy into the durable
+## text_buf arena. The cap is the M0 quoted-literal limit; the C oracle
+## uses 512.
+:paste_scratch
+ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32
+
## arg_starts[16] / arg_ends[16]: 16 × 8 = 128 bytes each, i.e. 4 ZERO32.
## Written by parse_args; read by expand_macro_tokens and expand_builtin_call.
:arg_starts