boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit 02cef1ca5ab2161f25f7761d5e54e8bffd0ea57d
parent 9507a3668cd1218bae1aa0f171e7991cc8768afd
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 15:35:14 -0700

Track B: ## token paste compaction

Diffstat:
Mm1pp/m1pp.M1 | 277+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 271 insertions(+), 6 deletions(-)

diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1 @@ -1321,24 +1321,248 @@ DEFINE EXPR_INVALID 1100000000000000 ## ============================================================================ ## append_pasted_token(a0=dst_tok, a1=left_tok, a2=right_tok) -> void (fatal) -## Concatenate left->text and right->text into text_buf via append_text and -## write *dst = { TOK_WORD, new_span }. Practical length limit: fit in the -## implementation's working buffer (oracle uses 512 bytes). +## Concatenate left->text and right->text into paste_scratch, then call +## append_text(&paste_scratch, total_len) for stable storage in text_buf, +## and write *dst = { TOK_WORD, text_ptr, total_len }. The C oracle uses a +## 512-byte tmp buffer; we use 256 to fit M0's quoted-literal cap. Fatal +## (err_text_overflow) if combined length exceeds 256 bytes; append_text +## handles its own text_buf overflow check. ## Oracle: m1pp.c:append_pasted_token. :append_pasted_token - la_br &err_not_implemented + enter_0 + + # ---- Spill all three operands to BSS so we can survive append_text. ---- + la_t0 &paste_dst_save + st_a0,t0,0 + la_t0 &paste_left_ptr + ld_t1,a1,8 + st_t1,t0,0 + la_t0 &paste_left_len + ld_t1,a1,16 + st_t1,t0,0 + la_t0 &paste_right_ptr + ld_t1,a2,8 + st_t1,t0,0 + la_t0 &paste_right_len + ld_t1,a2,16 + st_t1,t0,0 + + # ---- total_len = left.len + right.len; fatal if > 256 ---- + la_t0 &paste_left_len + ld_t1,t0,0 + la_t0 &paste_right_len + ld_t2,t0,0 + add_a0,t1,t2 + li_a1 %256 %0 + la_br &err_text_overflow + blt_a1,a0 + # save total_len for the append_text call below + la_t0 &paste_total_len + st_a0,t0,0 + + # ---- Copy left bytes: paste_scratch[0..left.len) <- left.text_ptr ---- + la_t0 &paste_left_ptr + ld_t0,t0,0 + la_t1 &paste_left_len + ld_t1,t1,0 + la_t2 &paste_scratch + li_a0 %0 %0 +:append_pasted_left_loop + la_br &append_pasted_left_done + beq_a0,t1 + add_a1,t0,a0 + lb_a1,a1,0 + add_a2,t2,a0 + sb_a1,a2,0 + addi_a0,a0,1 + la_br &append_pasted_left_loop + b +:append_pasted_left_done + + # ---- Copy right bytes: paste_scratch[left.len..total_len) <- right.text_ptr ---- + la_t0 &paste_right_ptr + ld_t0,t0,0 + la_t1 &paste_right_len + ld_t1,t1,0 + la_t2 &paste_scratch + la_a3 &paste_left_len + ld_a3,a3,0 + add_t2,t2,a3 # t2 = &paste_scratch[left.len] + li_a0 %0 %0 +:append_pasted_right_loop + la_br &append_pasted_right_done + beq_a0,t1 + add_a1,t0,a0 + lb_a1,a1,0 + add_a2,t2,a0 + sb_a1,a2,0 + addi_a0,a0,1 + la_br &append_pasted_right_loop b +:append_pasted_right_done + + # ---- text_ptr = append_text(&paste_scratch, total_len) ---- + la_a0 &paste_scratch + la_a1 &paste_total_len + ld_a1,a1,0 + la_br &append_text + call + # a0 = text_ptr (returned) + + # ---- *dst = { TOK_WORD, text_ptr, total_len } ---- + la_t0 &paste_dst_save + ld_t0,t0,0 + li_a2 TOK_WORD + st_a2,t0,0 + st_a0,t0,8 + la_a1 &paste_total_len + ld_a1,a1,0 + st_a1,t0,16 + + leave + ret ## paste_pool_range(a0=mark) -> void (fatal on bad paste) ## In-place compactor over expand_pool[mark..pool_used). For each TOK_PASTE, ## paste (prev, next) into prev via append_pasted_token and skip both the ## PASTE and the next token. Copy other tokens forward. Update pool_used to -## the new end. Fatal if ## is first, last, or adjacent to NEWLINE/PASTE. +## the new end. Fatal (err_bad_macro_header — closest "bad input" label) if +## ## is first, last, or adjacent to NEWLINE/PASTE. ## Oracle: m1pp.c:paste_pool_range. :paste_pool_range - la_br &err_not_implemented + enter_0 + + # ---- start = expand_pool + mark ---- + la_t0 &expand_pool + add_t0,t0,a0 + la_t1 &paste_start + st_t0,t1,0 + # paste_in = start + la_t1 &paste_in + st_t0,t1,0 + # paste_out = start + la_t1 &paste_out + st_t0,t1,0 + + # ---- end = expand_pool + pool_used ---- + la_t1 &pool_used + ld_t2,t1,0 + la_t1 &expand_pool + add_t2,t1,t2 + la_t1 &paste_end + st_t2,t1,0 + +:paste_pool_loop + # in = paste_in; end = paste_end; if (in == end) done + la_a0 &paste_in + ld_t0,a0,0 + la_a1 &paste_end + ld_t1,a1,0 + la_br &paste_pool_done + beq_t0,t1 + + # kind = in->kind + ld_a2,t0,0 + li_a3 TOK_PASTE + la_br &paste_pool_handle_paste + beq_a2,a3 + + # ---- non-PASTE: copy *in to *out, advance both by 24 ---- + la_a0 &paste_out + ld_t2,a0,0 + # if (in == out) skip the copy + la_br &paste_pool_skip_copy + beq_t0,t2 + ld_a3,t0,0 + st_a3,t2,0 + ld_a3,t0,8 + st_a3,t2,8 + ld_a3,t0,16 + st_a3,t2,16 +:paste_pool_skip_copy + addi_t0,t0,24 + addi_t2,t2,24 + la_a0 &paste_in + st_t0,a0,0 + la_a0 &paste_out + st_t2,a0,0 + la_br &paste_pool_loop + b + +:paste_pool_handle_paste + # ---- TOK_PASTE handling ---- + # Validate: + # out == start -> ## is first (fatal) + la_a0 &paste_out + ld_t1,a0,0 + la_a1 &paste_start + ld_t2,a1,0 + la_br &err_bad_macro_header + beq_t1,t2 + + # in+1 >= end -> ## is last (fatal) + # Equivalent: in+24 >= end, i.e. !(in+24 < end). + addi_t0,t0,24 # t0 = in + 24 (right operand ptr) + la_a1 &paste_end + ld_t2,a1,0 + # fatal if (in+1) >= end, i.e. if (in+24) >= end. blt branches when + # left < right, so branch over fatal when (in+24) < end. + la_br &paste_pool_paste_right_in_range + blt_t0,t2 + la_br &err_bad_macro_header + b +:paste_pool_paste_right_in_range + # t0 currently = in+24 (right operand) + # Validate (out-1)->kind not in {NEWLINE, PASTE}. + # out is in t1; out-1 = t1 - 24. (out-1)->kind = *(t1-24+0). + # Use mem offset: ld with offset NEG24. + ld_a2,t1,neg24 + li_a3 TOK_NEWLINE + la_br &err_bad_macro_header + beq_a2,a3 + li_a3 TOK_PASTE + la_br &err_bad_macro_header + beq_a2,a3 + + # Validate (in+1)->kind not in {NEWLINE, PASTE}. + # t0 = in+24 (right operand), so kind = *(t0+0). + ld_a2,t0,0 + li_a3 TOK_NEWLINE + la_br &err_bad_macro_header + beq_a2,a3 + li_a3 TOK_PASTE + la_br &err_bad_macro_header + beq_a2,a3 + + # ---- append_pasted_token(out-1, out-1, in+1) ---- + # t1 = out, t0 = in+1 (right operand). + addi_t1,t1,neg24 # t1 = out - 1 (left = dst) + mov_a0,t1 + mov_a1,t1 + mov_a2,t0 + la_br &append_pasted_token + call + + # in += 48 (skip ## and the right operand). Out is unchanged. + la_a0 &paste_in + ld_t0,a0,0 + addi_t0,t0,48 + st_t0,a0,0 + + la_br &paste_pool_loop b +:paste_pool_done + # pool_used = (out - expand_pool) + la_a0 &paste_out + ld_t0,a0,0 + la_a1 &expand_pool + sub_t0,t0,a1 + la_a1 &pool_used + st_t0,a1,0 + leave + ret + ## ============================================================================ ## --- Phase 7 STUBS: integer atoms + S-expression evaluator ------------------- ## ============================================================================ @@ -1680,6 +1904,47 @@ ZERO8 :eval_value ZERO8 +## Phase 6 paste-pass spill slots. Both append_pasted_token and +## paste_pool_range call other functions, so all locals must round-trip +## through BSS across the call. +## paste_dst_save — dst Token* spilled across append_text +## paste_left_ptr/_len, paste_right_ptr/_len — operand spans for the +## byte-copy loops in append_pasted_token +## paste_total_len — left.len + right.len, reused after append_text +## paste_start — expand_pool + mark; needed to detect "## is first" +## after registers are clobbered by append_pasted_token +## paste_in — current read cursor (Token*) +## paste_out — current write cursor (Token*) +## paste_end — exclusive end (Token*), = expand_pool + pool_used +:paste_dst_save +ZERO8 +:paste_left_ptr +ZERO8 +:paste_left_len +ZERO8 +:paste_right_ptr +ZERO8 +:paste_right_len +ZERO8 +:paste_total_len +ZERO8 +:paste_start +ZERO8 +:paste_in +ZERO8 +:paste_out +ZERO8 +:paste_end +ZERO8 + +## paste_scratch — 256-byte working buffer for append_pasted_token. +## We assemble left.text ++ right.text here, then call +## append_text(&paste_scratch, total_len) to copy into the durable +## text_buf arena. The cap is the M0 quoted-literal limit; the C oracle +## uses 512. +:paste_scratch +ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 + ## arg_starts[16] / arg_ends[16]: 16 × 8 = 128 bytes each, i.e. 4 ZERO32. ## Written by parse_args; read by expand_macro_tokens and expand_builtin_call. :arg_starts