boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit 04089609068b2758302653f0bf1488525c8eb499
parent 51e51fb86495f1a6cbfb8fde625b5db1eab2dac1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 17:22:42 -0700

Merge feature: local labels :@name / &@name (§1)

Diffstat:
Mm1pp/m1pp.M1 | 261++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mm1pp/m1pp.c | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/m1pp/11-local-labels.M1pp | 37+++++++++++++++++++++++++++++++++++++
Atests/m1pp/11-local-labels.expected | 31+++++++++++++++++++++++++++++++
4 files changed, 395 insertions(+), 2 deletions(-)

diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1 @@ -1927,6 +1927,15 @@ DEFINE EXPR_INVALID 1200000000000000 la_br &err_bad_macro_header bne_t0,t1 + # expansion_id = ++next_expansion_id (monotonic; used by local-label + # rewriting in the body-copy path to rename :@name / &@name tokens). + la_a0 &next_expansion_id + ld_t0,a0,0 + addi_t0,t0,1 + st_t0,a0,0 + la_a1 &emt_expansion_id + st_t0,a1,0 + # Snapshot call_end_pos -> emt_after_pos before the body walk, so # nothing in the substitution loop can clobber the resume position. la_a0 &call_end_pos @@ -1996,8 +2005,9 @@ DEFINE EXPR_INVALID 1200000000000000 la_br &find_param call - # if (param_idx == 0) fall through to copy literal body token - la_br &emt_copy_literal + # if (param_idx == 0) body-native token: check for local-label rewrite, + # else fall through to substitute logic. + la_br &emt_check_local_label beqz_a0 # param_idx != 0: substitute. emt_do_substitute_* will re-derive @@ -2051,6 +2061,218 @@ DEFINE EXPR_INVALID 1200000000000000 la_br &emt_do_substitute_plain b +## emt_check_local_label: body-native token at body_pos. If it's a +## TOK_WORD whose text starts with ":@" or "&@" and has at least one +## char after the '@', rewrite it to ":name__NN" / "&name__NN" (NN = +## emt_expansion_id) and push as TOK_WORD. Otherwise fall through to +## emt_copy_literal, which copies the body token verbatim. +:emt_check_local_label + # t0 = body_tok ptr + la_a0 &emt_body_pos + ld_t0,a0,0 + # kind must be TOK_WORD (== 0) + ld_a1,t0,0 + la_br &emt_copy_literal + bnez_a1 + # len must be >= 3 (sigil + '@' + >=1 tail char) + ld_a2,t0,16 + li_a3 %3 %0 + la_br &emt_copy_literal + blt_a2,a3 + # first byte must be ':' (58) or '&' (38) + ld_a3,t0,8 + lb_a1,a3,0 + li_a2 %58 %0 + la_br &emt_check_local_label_at + beq_a1,a2 + li_a2 %38 %0 + la_br &emt_copy_literal + bne_a1,a2 +:emt_check_local_label_at + # second byte must be '@' (64) + lb_a1,a3,1 + li_a2 %64 %0 + la_br &emt_copy_literal + bne_a1,a2 + # Local label! Fall through to rewrite. + +## emt_rewrite_local_label: build "sigil + tail + __ + decimal(NN)" in +## local_label_scratch, stash it into text_buf via append_text, and push +## a TOK_WORD to expand_pool. +:emt_rewrite_local_label + # Stash body_tok text_ptr / text_len into BSS so they survive + # function calls (append_text is non-leaf via its arena bump). + la_a0 &emt_body_pos + ld_t0,a0,0 + ld_a1,t0,8 + la_a2 &ll_src_ptr + st_a1,a2,0 + ld_a1,t0,16 + la_a2 &ll_src_len + st_a1,a2,0 + + # --- Convert emt_expansion_id to decimal, reverse-fill into + # --- local_label_digits[0..24). Write right-to-left starting at + # --- offset 23 so digits are adjacent at [cursor, &scratch+24). + la_a0 &emt_expansion_id + ld_t0,a0,0 # t0 = id (mutated) + la_t1 &local_label_digits + li_a2 %24 %0 + add_t1,t1,a2 # t1 = end (one past last slot) + mov_t2,t1 # t2 = cursor (moves left) + + # Special-case id == 0 -> single '0' digit. + la_br &emt_rldg_loop + bnez_t0 + addi_t2,t2,neg1 + li_a0 %48 %0 + sb_a0,t2,0 + la_br &emt_rldg_done + b +:emt_rldg_loop + la_br &emt_rldg_done + beqz_t0 + # digit = id % 10 + mov_a0,t0 + li_a1 %10 %0 + rem_a2,a0,a1 # a2 = id % 10 + addi_a2,a2,48 # a2 = '0' + digit + addi_t2,t2,neg1 + sb_a2,t2,0 # *--cursor = digit + # id = id / 10 + mov_a0,t0 + li_a1 %10 %0 + div_a0,a0,a1 + mov_t0,a0 + la_br &emt_rldg_loop + b +:emt_rldg_done + # digit_count = end - cursor + la_a1 &local_label_digits + li_a2 %24 %0 + add_a1,a1,a2 # a1 = end + sub_a0,a1,t2 # a0 = digit_count + la_a1 &ll_digit_count + st_a0,a1,0 + # Save cursor (start of digits) for the copy step. + la_a1 &ll_digit_cursor + st_t2,a1,0 + + # --- Build final text in local_label_scratch --- + # Layout: [0]=sigil, [1..1+tail_len)=tail, then "__", then digits. + # tail_len = len - 2 + + # Write sigil (src_ptr[0]) to scratch[0]. + la_a0 &ll_src_ptr + ld_a1,a0,0 + lb_a2,a1,0 + la_a3 &local_label_scratch + sb_a2,a3,0 + + # Copy tail: scratch[1..1+tail_len) <- src_ptr[2..2+tail_len). + la_a0 &ll_src_len + ld_a1,a0,0 + li_a2 %2 %0 + sub_t0,a1,a2 # t0 = tail_len = src_len - 2 + la_a0 &ll_src_ptr + ld_a1,a0,0 # a1 = src_ptr + addi_a1,a1,2 # a1 = src_ptr + 2 (tail start) + la_a2 &local_label_scratch + addi_a2,a2,1 # a2 = scratch + 1 (dst tail start) + li_t1 %0 %0 # t1 = i +:emt_rlbuild_tail_loop + la_br &emt_rlbuild_tail_done + beq_t1,t0 + add_a3,a1,t1 + lb_a3,a3,0 + add_t2,a2,t1 + sb_a3,t2,0 + addi_t1,t1,1 + la_br &emt_rlbuild_tail_loop + b +:emt_rlbuild_tail_done + # Save tail_len for later offset math. + la_a0 &ll_tail_len + st_t0,a0,0 + + # Write "__" at scratch[1+tail_len], scratch[2+tail_len]. + la_a2 &local_label_scratch + addi_a2,a2,1 + add_a2,a2,t0 # a2 = &scratch[1+tail_len] + li_a3 %95 %0 # '_' + sb_a3,a2,0 + addi_a2,a2,1 + sb_a3,a2,0 + + # Copy digits: scratch[3+tail_len..3+tail_len+digit_count) <- digit_cursor[0..digit_count). + la_a0 &ll_digit_count + ld_t1,a0,0 # t1 = digit_count + la_a0 &ll_digit_cursor + ld_a1,a0,0 # a1 = digit_cursor (src) + la_a0 &ll_tail_len + ld_t0,a0,0 # t0 = tail_len + la_a2 &local_label_scratch + addi_a2,a2,3 + add_a2,a2,t0 # a2 = &scratch[3+tail_len] (dst) + li_t2 %0 %0 # t2 = i +:emt_rlbuild_digits_loop + la_br &emt_rlbuild_digits_done + beq_t2,t1 + add_a3,a1,t2 + lb_a3,a3,0 + add_a0,a2,t2 + sb_a3,a0,0 + addi_t2,t2,1 + la_br &emt_rlbuild_digits_loop + b +:emt_rlbuild_digits_done + + # total_len = 1 + tail_len + 2 + digit_count = 3 + tail_len + digit_count + la_a0 &ll_tail_len + ld_a1,a0,0 + la_a0 &ll_digit_count + ld_a2,a0,0 + add_a1,a1,a2 + addi_a1,a1,3 + la_a0 &ll_total_len + st_a1,a0,0 + + # durable_ptr = append_text(&local_label_scratch, total_len) + la_a0 &local_label_scratch + la_br &append_text + call + # a0 = durable_ptr (into text_buf) + + # Push TOK_WORD { kind=0, text_ptr=durable_ptr, text_len=total_len } to expand_pool. + la_a1 &pool_used + ld_t0,a1,0 + li_a2 M1PP_EXPAND_CAP + la_br &err_token_overflow + beq_t0,a2 + la_a3 &expand_pool + add_a3,a3,t0 # a3 = dst slot + # kind = TOK_WORD + li_a2 TOK_WORD + st_a2,a3,0 + # text_ptr + st_a0,a3,8 + # text_len + la_a0 &ll_total_len + ld_a2,a0,0 + st_a2,a3,16 + # pool_used += 24 + addi_t0,t0,24 + la_a1 &pool_used + st_t0,a1,0 + + # body_pos += 24 + la_a0 &emt_body_pos + ld_t0,a0,0 + addi_t0,t0,24 + st_t0,a0,0 + la_br &emt_loop + b + :emt_copy_literal # Append *body_pos to expand_pool. Check overflow. la_a0 &pool_used @@ -4702,6 +4924,41 @@ ZERO8 ZERO8 :emt_body_start ZERO8 + +## Local-label rewrite (§1). next_expansion_id is the monotonic counter +## (never reset); emt_expansion_id snapshots it at the start of each +## expand_macro_tokens call so nested-call BSS reuse is safe. +## ll_* slots hold body-token span + derived sizes while building the +## renamed text in local_label_scratch. +:next_expansion_id +ZERO8 +:emt_expansion_id +ZERO8 +:ll_src_ptr +ZERO8 +:ll_src_len +ZERO8 +:ll_tail_len +ZERO8 +:ll_digit_count +ZERO8 +:ll_digit_cursor +ZERO8 +:ll_total_len +ZERO8 + +## local_label_digits: 24-byte reverse-fill scratch for the decimal +## rendering of emt_expansion_id (fits any u64 value). +:local_label_digits +ZERO8 ZERO8 ZERO8 + +## local_label_scratch: 128-byte working buffer for the renamed text +## (sigil + tail + "__" + digits) before it's copied into text_buf via +## append_text. Caps the combined tail + digit length at ~125 bytes, +## which is ample for any realistic local-label name. +:local_label_scratch +ZERO32 ZERO32 ZERO32 ZERO32 + :fp_macro ZERO8 :fp_tok diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c @@ -162,6 +162,7 @@ static int pool_used; static int output_used; static int output_need_space; static int stream_top; +static int next_expansion_id; static struct Token *arg_starts[MAX_PARAMS]; static struct Token *arg_ends[MAX_PARAMS]; @@ -669,6 +670,64 @@ static int paste_pool_range(int mark) return 1; } +static int is_local_label_token(const struct Token *tok) +{ + if (tok->kind != TOK_WORD || tok->text.len < 3) { + return 0; + } + if (tok->text.ptr[0] != ':' && tok->text.ptr[0] != '&') { + return 0; + } + if (tok->text.ptr[1] != '@') { + return 0; + } + return 1; +} + +static int push_local_label_token(const struct Token *tok, int expansion_id) +{ + /* Rewrite ":@name" -> ":name__NN", "&@name" -> "&name__NN". + * Build the text directly in text_buf so the resulting span is stable. */ + char digits[16]; + int digit_count = 0; + int unsigned_id; + int start; + int total; + int i; + struct Token out; + + unsigned_id = expansion_id; + if (unsigned_id == 0) { + digits[digit_count++] = '0'; + } else { + while (unsigned_id > 0) { + digits[digit_count++] = (char)('0' + (unsigned_id % 10)); + unsigned_id /= 10; + } + } + + /* Reserve: sigil(1) + tail(len-2) + "__"(2) + digits + NUL. */ + total = 1 + (tok->text.len - 2) + 2 + digit_count; + if (text_used + total + 1 > MAX_TEXT) { + return fail("text overflow"); + } + start = text_used; + text_buf[text_used++] = tok->text.ptr[0]; + memcpy(text_buf + text_used, tok->text.ptr + 2, (size_t)(tok->text.len - 2)); + text_used += tok->text.len - 2; + text_buf[text_used++] = '_'; + text_buf[text_used++] = '_'; + for (i = digit_count - 1; i >= 0; i--) { + text_buf[text_used++] = digits[i]; + } + text_buf[text_used++] = '\0'; + + out.kind = TOK_WORD; + out.text.ptr = text_buf + start; + out.text.len = total; + return push_pool_token(out); +} + static int expand_macro_tokens(struct Token *call_tok, struct Token *limit, const struct Macro *m, struct Token **after_out, int *mark_out) @@ -676,6 +735,7 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit, struct Token *body_tok; struct Token *end_pos; int mark; + int expansion_id; if (call_tok + 1 < limit && (call_tok + 1)->kind == TOK_LPAREN) { if (!parse_args(call_tok + 1, limit)) { @@ -692,6 +752,7 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit, return fail("bad macro call"); } + expansion_id = ++next_expansion_id; mark = pool_used; for (body_tok = m->body_start; body_tok < m->body_end; body_tok++) { int param_idx = find_param(m, body_tok); @@ -709,6 +770,13 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit, } continue; } + if (is_local_label_token(body_tok)) { + if (!push_local_label_token(body_tok, expansion_id)) { + pool_used = mark; + return 0; + } + continue; + } if (!push_pool_token(*body_tok)) { pool_used = mark; return 0; diff --git a/tests/m1pp/11-local-labels.M1pp b/tests/m1pp/11-local-labels.M1pp @@ -0,0 +1,37 @@ +# Local labels (§1): `:@name` / `&@name` inside macro bodies rewrite to +# `:name__NN` / `&name__NN` where NN is a fresh monotonic id per expansion. +# Scoping: body-native only; param-substituted tokens pass through untouched. +# +# Scenarios: +# 1) a single macro using `:@end` called twice -> end__1, end__2 distinct +# 2) nested macros each using `:@done` -> outer/inner get separate ids +# 3) `&@label` address form rewrites the same way +# 4) a `:@name` literal passed as an argument is NOT rewritten + +%macro ONCE() + jne &@end + :@end +%endm + +%macro OUTER() + :@done + %INNER() + jmp &@done +%endm + +%macro INNER() + :@done + body +%endm + +%macro ARGPASS(lbl) + lbl +%endm + +%ONCE() +%ONCE() + +%OUTER() + +%ARGPASS(:@kept) +END diff --git a/tests/m1pp/11-local-labels.expected b/tests/m1pp/11-local-labels.expected @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + +jne &end__1 +:end__1 + +jne &end__2 +:end__2 + + +:done__3 +:done__4 +body + +jmp &done__3 + + +:@kept + +END