commit 04089609068b2758302653f0bf1488525c8eb499
parent 51e51fb86495f1a6cbfb8fde625b5db1eab2dac1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 17:22:42 -0700
Merge feature: local labels :@name / &@name (§1)
Diffstat:
4 files changed, 395 insertions(+), 2 deletions(-)
diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1
@@ -1927,6 +1927,15 @@ DEFINE EXPR_INVALID 1200000000000000
la_br &err_bad_macro_header
bne_t0,t1
+ # expansion_id = ++next_expansion_id (monotonic; used by local-label
+ # rewriting in the body-copy path to rename :@name / &@name tokens).
+ la_a0 &next_expansion_id
+ ld_t0,a0,0
+ addi_t0,t0,1
+ st_t0,a0,0
+ la_a1 &emt_expansion_id
+ st_t0,a1,0
+
# Snapshot call_end_pos -> emt_after_pos before the body walk, so
# nothing in the substitution loop can clobber the resume position.
la_a0 &call_end_pos
@@ -1996,8 +2005,9 @@ DEFINE EXPR_INVALID 1200000000000000
la_br &find_param
call
- # if (param_idx == 0) fall through to copy literal body token
- la_br &emt_copy_literal
+ # if (param_idx == 0) body-native token: check for local-label rewrite,
+ # else fall through to substitute logic.
+ la_br &emt_check_local_label
beqz_a0
# param_idx != 0: substitute. emt_do_substitute_* will re-derive
@@ -2051,6 +2061,218 @@ DEFINE EXPR_INVALID 1200000000000000
la_br &emt_do_substitute_plain
b
+## emt_check_local_label: body-native token at body_pos. If it's a
+## TOK_WORD whose text starts with ":@" or "&@" and has at least one
+## char after the '@', rewrite it to ":name__NN" / "&name__NN" (NN =
+## emt_expansion_id) and push as TOK_WORD. Otherwise fall through to
+## emt_copy_literal, which copies the body token verbatim.
+:emt_check_local_label
+ # t0 = body_tok ptr
+ la_a0 &emt_body_pos
+ ld_t0,a0,0
+ # kind must be TOK_WORD (== 0)
+ ld_a1,t0,0
+ la_br &emt_copy_literal
+ bnez_a1
+ # len must be >= 3 (sigil + '@' + >=1 tail char)
+ ld_a2,t0,16
+ li_a3 %3 %0
+ la_br &emt_copy_literal
+ blt_a2,a3
+ # first byte must be ':' (58) or '&' (38)
+ ld_a3,t0,8
+ lb_a1,a3,0
+ li_a2 %58 %0
+ la_br &emt_check_local_label_at
+ beq_a1,a2
+ li_a2 %38 %0
+ la_br &emt_copy_literal
+ bne_a1,a2
+:emt_check_local_label_at
+ # second byte must be '@' (64)
+ lb_a1,a3,1
+ li_a2 %64 %0
+ la_br &emt_copy_literal
+ bne_a1,a2
+ # Local label! Fall through to rewrite.
+
+## emt_rewrite_local_label: build "sigil + tail + __ + decimal(NN)" in
+## local_label_scratch, stash it into text_buf via append_text, and push
+## a TOK_WORD to expand_pool.
+:emt_rewrite_local_label
+ # Stash body_tok text_ptr / text_len into BSS so they survive
+ # function calls (append_text is non-leaf via its arena bump).
+ la_a0 &emt_body_pos
+ ld_t0,a0,0
+ ld_a1,t0,8
+ la_a2 &ll_src_ptr
+ st_a1,a2,0
+ ld_a1,t0,16
+ la_a2 &ll_src_len
+ st_a1,a2,0
+
+ # --- Convert emt_expansion_id to decimal, reverse-fill into
+ # --- local_label_digits[0..24). Write right-to-left starting at
+ # --- offset 23 so digits are adjacent at [cursor, &scratch+24).
+ la_a0 &emt_expansion_id
+ ld_t0,a0,0 # t0 = id (mutated)
+ la_t1 &local_label_digits
+ li_a2 %24 %0
+ add_t1,t1,a2 # t1 = end (one past last slot)
+ mov_t2,t1 # t2 = cursor (moves left)
+
+ # Special-case id == 0 -> single '0' digit.
+ la_br &emt_rldg_loop
+ bnez_t0
+ addi_t2,t2,neg1
+ li_a0 %48 %0
+ sb_a0,t2,0
+ la_br &emt_rldg_done
+ b
+:emt_rldg_loop
+ la_br &emt_rldg_done
+ beqz_t0
+ # digit = id % 10
+ mov_a0,t0
+ li_a1 %10 %0
+ rem_a2,a0,a1 # a2 = id % 10
+ addi_a2,a2,48 # a2 = '0' + digit
+ addi_t2,t2,neg1
+ sb_a2,t2,0 # *--cursor = digit
+ # id = id / 10
+ mov_a0,t0
+ li_a1 %10 %0
+ div_a0,a0,a1
+ mov_t0,a0
+ la_br &emt_rldg_loop
+ b
+:emt_rldg_done
+ # digit_count = end - cursor
+ la_a1 &local_label_digits
+ li_a2 %24 %0
+ add_a1,a1,a2 # a1 = end
+ sub_a0,a1,t2 # a0 = digit_count
+ la_a1 &ll_digit_count
+ st_a0,a1,0
+ # Save cursor (start of digits) for the copy step.
+ la_a1 &ll_digit_cursor
+ st_t2,a1,0
+
+ # --- Build final text in local_label_scratch ---
+ # Layout: [0]=sigil, [1..1+tail_len)=tail, then "__", then digits.
+ # tail_len = len - 2
+
+ # Write sigil (src_ptr[0]) to scratch[0].
+ la_a0 &ll_src_ptr
+ ld_a1,a0,0
+ lb_a2,a1,0
+ la_a3 &local_label_scratch
+ sb_a2,a3,0
+
+ # Copy tail: scratch[1..1+tail_len) <- src_ptr[2..2+tail_len).
+ la_a0 &ll_src_len
+ ld_a1,a0,0
+ li_a2 %2 %0
+ sub_t0,a1,a2 # t0 = tail_len = src_len - 2
+ la_a0 &ll_src_ptr
+ ld_a1,a0,0 # a1 = src_ptr
+ addi_a1,a1,2 # a1 = src_ptr + 2 (tail start)
+ la_a2 &local_label_scratch
+ addi_a2,a2,1 # a2 = scratch + 1 (dst tail start)
+ li_t1 %0 %0 # t1 = i
+:emt_rlbuild_tail_loop
+ la_br &emt_rlbuild_tail_done
+ beq_t1,t0
+ add_a3,a1,t1
+ lb_a3,a3,0
+ add_t2,a2,t1
+ sb_a3,t2,0
+ addi_t1,t1,1
+ la_br &emt_rlbuild_tail_loop
+ b
+:emt_rlbuild_tail_done
+ # Save tail_len for later offset math.
+ la_a0 &ll_tail_len
+ st_t0,a0,0
+
+ # Write "__" at scratch[1+tail_len], scratch[2+tail_len].
+ la_a2 &local_label_scratch
+ addi_a2,a2,1
+ add_a2,a2,t0 # a2 = &scratch[1+tail_len]
+ li_a3 %95 %0 # '_'
+ sb_a3,a2,0
+ addi_a2,a2,1
+ sb_a3,a2,0
+
+ # Copy digits: scratch[3+tail_len..3+tail_len+digit_count) <- digit_cursor[0..digit_count).
+ la_a0 &ll_digit_count
+ ld_t1,a0,0 # t1 = digit_count
+ la_a0 &ll_digit_cursor
+ ld_a1,a0,0 # a1 = digit_cursor (src)
+ la_a0 &ll_tail_len
+ ld_t0,a0,0 # t0 = tail_len
+ la_a2 &local_label_scratch
+ addi_a2,a2,3
+ add_a2,a2,t0 # a2 = &scratch[3+tail_len] (dst)
+ li_t2 %0 %0 # t2 = i
+:emt_rlbuild_digits_loop
+ la_br &emt_rlbuild_digits_done
+ beq_t2,t1
+ add_a3,a1,t2
+ lb_a3,a3,0
+ add_a0,a2,t2
+ sb_a3,a0,0
+ addi_t2,t2,1
+ la_br &emt_rlbuild_digits_loop
+ b
+:emt_rlbuild_digits_done
+
+ # total_len = 1 + tail_len + 2 + digit_count = 3 + tail_len + digit_count
+ la_a0 &ll_tail_len
+ ld_a1,a0,0
+ la_a0 &ll_digit_count
+ ld_a2,a0,0
+ add_a1,a1,a2
+ addi_a1,a1,3
+ la_a0 &ll_total_len
+ st_a1,a0,0
+
+ # durable_ptr = append_text(&local_label_scratch, total_len)
+ la_a0 &local_label_scratch
+ la_br &append_text
+ call
+ # a0 = durable_ptr (into text_buf)
+
+ # Push TOK_WORD { kind=0, text_ptr=durable_ptr, text_len=total_len } to expand_pool.
+ la_a1 &pool_used
+ ld_t0,a1,0
+ li_a2 M1PP_EXPAND_CAP
+ la_br &err_token_overflow
+ beq_t0,a2
+ la_a3 &expand_pool
+ add_a3,a3,t0 # a3 = dst slot
+ # kind = TOK_WORD
+ li_a2 TOK_WORD
+ st_a2,a3,0
+ # text_ptr
+ st_a0,a3,8
+ # text_len
+ la_a0 &ll_total_len
+ ld_a2,a0,0
+ st_a2,a3,16
+ # pool_used += 24
+ addi_t0,t0,24
+ la_a1 &pool_used
+ st_t0,a1,0
+
+ # body_pos += 24
+ la_a0 &emt_body_pos
+ ld_t0,a0,0
+ addi_t0,t0,24
+ st_t0,a0,0
+ la_br &emt_loop
+ b
+
:emt_copy_literal
# Append *body_pos to expand_pool. Check overflow.
la_a0 &pool_used
@@ -4702,6 +4924,41 @@ ZERO8
ZERO8
:emt_body_start
ZERO8
+
+## Local-label rewrite (§1). next_expansion_id is the monotonic counter
+## (never reset); emt_expansion_id snapshots it at the start of each
+## expand_macro_tokens call so nested-call BSS reuse is safe.
+## ll_* slots hold body-token span + derived sizes while building the
+## renamed text in local_label_scratch.
+:next_expansion_id
+ZERO8
+:emt_expansion_id
+ZERO8
+:ll_src_ptr
+ZERO8
+:ll_src_len
+ZERO8
+:ll_tail_len
+ZERO8
+:ll_digit_count
+ZERO8
+:ll_digit_cursor
+ZERO8
+:ll_total_len
+ZERO8
+
+## local_label_digits: 24-byte reverse-fill scratch for the decimal
+## rendering of emt_expansion_id (fits any u64 value).
+:local_label_digits
+ZERO8 ZERO8 ZERO8
+
+## local_label_scratch: 128-byte working buffer for the renamed text
+## (sigil + tail + "__" + digits) before it's copied into text_buf via
+## append_text. Caps the combined tail + digit length at ~125 bytes,
+## which is ample for any realistic local-label name.
+:local_label_scratch
+ZERO32 ZERO32 ZERO32 ZERO32
+
:fp_macro
ZERO8
:fp_tok
diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c
@@ -162,6 +162,7 @@ static int pool_used;
static int output_used;
static int output_need_space;
static int stream_top;
+static int next_expansion_id;
static struct Token *arg_starts[MAX_PARAMS];
static struct Token *arg_ends[MAX_PARAMS];
@@ -669,6 +670,64 @@ static int paste_pool_range(int mark)
return 1;
}
+static int is_local_label_token(const struct Token *tok)
+{
+ if (tok->kind != TOK_WORD || tok->text.len < 3) {
+ return 0;
+ }
+ if (tok->text.ptr[0] != ':' && tok->text.ptr[0] != '&') {
+ return 0;
+ }
+ if (tok->text.ptr[1] != '@') {
+ return 0;
+ }
+ return 1;
+}
+
+static int push_local_label_token(const struct Token *tok, int expansion_id)
+{
+ /* Rewrite ":@name" -> ":name__NN", "&@name" -> "&name__NN".
+ * Build the text directly in text_buf so the resulting span is stable. */
+ char digits[16];
+ int digit_count = 0;
+ int unsigned_id;
+ int start;
+ int total;
+ int i;
+ struct Token out;
+
+ unsigned_id = expansion_id;
+ if (unsigned_id == 0) {
+ digits[digit_count++] = '0';
+ } else {
+ while (unsigned_id > 0) {
+ digits[digit_count++] = (char)('0' + (unsigned_id % 10));
+ unsigned_id /= 10;
+ }
+ }
+
+ /* Reserve: sigil(1) + tail(len-2) + "__"(2) + digits + NUL. */
+ total = 1 + (tok->text.len - 2) + 2 + digit_count;
+ if (text_used + total + 1 > MAX_TEXT) {
+ return fail("text overflow");
+ }
+ start = text_used;
+ text_buf[text_used++] = tok->text.ptr[0];
+ memcpy(text_buf + text_used, tok->text.ptr + 2, (size_t)(tok->text.len - 2));
+ text_used += tok->text.len - 2;
+ text_buf[text_used++] = '_';
+ text_buf[text_used++] = '_';
+ for (i = digit_count - 1; i >= 0; i--) {
+ text_buf[text_used++] = digits[i];
+ }
+ text_buf[text_used++] = '\0';
+
+ out.kind = TOK_WORD;
+ out.text.ptr = text_buf + start;
+ out.text.len = total;
+ return push_pool_token(out);
+}
+
static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
const struct Macro *m, struct Token **after_out,
int *mark_out)
@@ -676,6 +735,7 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
struct Token *body_tok;
struct Token *end_pos;
int mark;
+ int expansion_id;
if (call_tok + 1 < limit && (call_tok + 1)->kind == TOK_LPAREN) {
if (!parse_args(call_tok + 1, limit)) {
@@ -692,6 +752,7 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
return fail("bad macro call");
}
+ expansion_id = ++next_expansion_id;
mark = pool_used;
for (body_tok = m->body_start; body_tok < m->body_end; body_tok++) {
int param_idx = find_param(m, body_tok);
@@ -709,6 +770,13 @@ static int expand_macro_tokens(struct Token *call_tok, struct Token *limit,
}
continue;
}
+ if (is_local_label_token(body_tok)) {
+ if (!push_local_label_token(body_tok, expansion_id)) {
+ pool_used = mark;
+ return 0;
+ }
+ continue;
+ }
if (!push_pool_token(*body_tok)) {
pool_used = mark;
return 0;
diff --git a/tests/m1pp/11-local-labels.M1pp b/tests/m1pp/11-local-labels.M1pp
@@ -0,0 +1,37 @@
+# Local labels (§1): `:@name` / `&@name` inside macro bodies rewrite to
+# `:name__NN` / `&name__NN` where NN is a fresh monotonic id per expansion.
+# Scoping: body-native only; param-substituted tokens pass through untouched.
+#
+# Scenarios:
+# 1) a single macro using `:@end` called twice -> end__1, end__2 distinct
+# 2) nested macros each using `:@done` -> outer/inner get separate ids
+# 3) `&@label` address form rewrites the same way
+# 4) a `:@name` literal passed as an argument is NOT rewritten
+
+%macro ONCE()
+ jne &@end
+ :@end
+%endm
+
+%macro OUTER()
+ :@done
+ %INNER()
+ jmp &@done
+%endm
+
+%macro INNER()
+ :@done
+ body
+%endm
+
+%macro ARGPASS(lbl)
+ lbl
+%endm
+
+%ONCE()
+%ONCE()
+
+%OUTER()
+
+%ARGPASS(:@kept)
+END
diff --git a/tests/m1pp/11-local-labels.expected b/tests/m1pp/11-local-labels.expected
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+jne &end__1
+:end__1
+
+jne &end__2
+:end__2
+
+
+:done__3
+:done__4
+body
+
+jmp &done__3
+
+
+:@kept
+
+END