commit 6bb13576cb3eaa4c6bf33ad1104009f019b79512
parent 04089609068b2758302653f0bf1488525c8eb499
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 17:22:45 -0700
Merge feature: braced block arguments (§2)
Diffstat:
5 files changed, 405 insertions(+), 7 deletions(-)
diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1
@@ -50,6 +50,8 @@ DEFINE TOK_LPAREN 0300000000000000
DEFINE TOK_RPAREN 0400000000000000
DEFINE TOK_COMMA 0500000000000000
DEFINE TOK_PASTE 0600000000000000
+DEFINE TOK_LBRACE 0700000000000000
+DEFINE TOK_RBRACE 0800000000000000
## Token record stride (kind + text_ptr + text_len). Advance a Token* by this.
DEFINE M1PP_TOK_SIZE 1800000000000000
@@ -432,6 +434,12 @@ DEFINE EXPR_INVALID 1200000000000000
li_a1 %44 %0
la_br &lex_comma
beq_a0,a1
+ li_a1 %123 %0
+ la_br &lex_lbrace
+ beq_a0,a1
+ li_a1 %125 %0
+ la_br &lex_rbrace
+ beq_a0,a1
# otherwise: word
la_br &lex_word
@@ -600,6 +608,30 @@ DEFINE EXPR_INVALID 1200000000000000
li_a2 %1 %0
la_br &push_source_token
call
+ la_br &lex_advance_one_then_loop
+ b
+:lex_lbrace
+ la_a0 &const_lbrace
+ li_a1 %1 %0
+ la_br &append_text
+ call
+ mov_a1,a0
+ li_a0 TOK_LBRACE
+ li_a2 %1 %0
+ la_br &push_source_token
+ call
+ la_br &lex_advance_one_then_loop
+ b
+:lex_rbrace
+ la_a0 &const_rbrace
+ li_a1 %1 %0
+ la_br &append_text
+ call
+ mov_a1,a0
+ li_a0 TOK_RBRACE
+ li_a2 %1 %0
+ la_br &push_source_token
+ call
:lex_advance_one_then_loop
# lex_ptr++
la_a0 &lex_ptr
@@ -651,6 +683,12 @@ DEFINE EXPR_INVALID 1200000000000000
li_a1 %44 %0
la_br &lex_word_finish
beq_a0,a1
+ li_a1 %123 %0
+ la_br &lex_word_finish
+ beq_a0,a1
+ li_a1 %125 %0
+ la_br &lex_word_finish
+ beq_a0,a1
# else lex_ptr++
addi_t0,t0,1
la_br &lex_word_scan
@@ -716,6 +754,15 @@ DEFINE EXPR_INVALID 1200000000000000
## emit_token(a0=token_ptr). Leaf.
:emit_token
+ # brace tokens are no-ops at emit time (belt-and-braces with arg-strip)
+ ld_t0,a0,0
+ li_t1 TOK_LBRACE
+ la_br &emit_token_skip
+ beq_t0,t1
+ li_t1 TOK_RBRACE
+ la_br &emit_token_skip
+ beq_t0,t1
+
# if (output_need_space) emit ' ' (skip the space for the first token on a line)
la_a1 &output_need_space
ld_t0,a1,0
@@ -770,6 +817,8 @@ DEFINE EXPR_INVALID 1200000000000000
li_a1 %1 %0
st_a1,a0,0
ret
+:emit_token_skip
+ ret
## --- Main processor ----------------------------------------------------------
## Stream-driven loop. Pushes source_tokens as the initial stream, then drives
@@ -1501,7 +1550,7 @@ DEFINE EXPR_INVALID 1200000000000000
##
## Fatal on: > 16 args, reaching limit without matching RPAREN.
:parse_args
- # tok = lparen + 1; arg_start = tok; depth = 1; arg_index = 0
+ # tok = lparen + 1; arg_start = tok; depth = 1; arg_index = 0; brace_depth = 0
addi_a0,a0,24
la_a2 &pa_pos
st_a0,a2,0
@@ -1515,6 +1564,9 @@ DEFINE EXPR_INVALID 1200000000000000
li_a2 %0 %0
la_a3 &pa_arg_index
st_a2,a3,0
+ li_a2 %0 %0
+ la_a3 &pa_brace_depth
+ st_a2,a3,0
:pa_loop
# if (tok >= limit) fatal unterminated
@@ -1538,6 +1590,12 @@ DEFINE EXPR_INVALID 1200000000000000
li_a3 TOK_COMMA
la_br &pa_maybe_comma
beq_a2,a3
+ li_a3 TOK_LBRACE
+ la_br &pa_lbrace
+ beq_a2,a3
+ li_a3 TOK_RBRACE
+ la_br &pa_rbrace
+ beq_a2,a3
# default: tok++
addi_t0,t0,24
@@ -1573,7 +1631,12 @@ DEFINE EXPR_INVALID 1200000000000000
b
:pa_rparen_close
- # depth == 0: close out the call.
+ # depth == 0: if brace_depth != 0 -> unbalanced braces
+ la_a0 &pa_brace_depth
+ ld_t1,a0,0
+ la_br &err_unbalanced_braces
+ bnez_t1
+ # close out the call.
# arg_start (BSS), arg_index (BSS), tok = current pos.
la_a0 &pa_arg_start
ld_a1,a0,0
@@ -1628,8 +1691,13 @@ DEFINE EXPR_INVALID 1200000000000000
li_a3 %1 %0
la_br &pa_default_advance
bne_t1,a3
+ # and only when brace_depth == 0
+ la_a0 &pa_brace_depth
+ ld_t1,a0,0
+ la_br &pa_default_advance
+ bnez_t1
- # depth == 1 split: append (arg_start, tok) at arg_index
+ # depth == 1 && brace_depth == 0 split: append (arg_start, tok) at arg_index
la_a0 &pa_arg_index
ld_a2,a0,0
li_a3 M1PP_MAX_PARAMS
@@ -1668,6 +1736,33 @@ DEFINE EXPR_INVALID 1200000000000000
la_br &pa_loop
b
+:pa_lbrace
+ # brace_depth++; tok++
+ la_a0 &pa_brace_depth
+ ld_t1,a0,0
+ addi_t1,t1,1
+ st_t1,a0,0
+ addi_t0,t0,24
+ la_a0 &pa_pos
+ st_t0,a0,0
+ la_br &pa_loop
+ b
+
+:pa_rbrace
+ # if (brace_depth <= 0) fatal unbalanced braces
+ la_a0 &pa_brace_depth
+ ld_t1,a0,0
+ la_br &err_unbalanced_braces
+ beqz_t1
+ # brace_depth--; tok++
+ addi_t1,t1,neg1
+ st_t1,a0,0
+ addi_t0,t0,24
+ la_a0 &pa_pos
+ st_t0,a0,0
+ la_br &pa_loop
+ b
+
## ============================================================================
## --- Macro lookup + call expansion ------------------------------------------
## ============================================================================
@@ -1844,22 +1939,125 @@ DEFINE EXPR_INVALID 1200000000000000
li_a0 %0 %0
ret
+## arg_is_braced(a0=start, a1=end) -> a0 = 1 if the span wraps in a matching
+## outer { ... } pair (outer RBRACE is the same-level mate of the leading
+## LBRACE), else 0. Leaf.
+:arg_is_braced
+ # if (end - start < 2 tokens = 48 bytes) return 0
+ sub_a2,a1,a0
+ li_a3 %48 %0
+ la_br &aib_zero
+ blt_a2,a3
+
+ # if (start->kind != TOK_LBRACE) return 0
+ ld_a2,a0,0
+ li_a3 TOK_LBRACE
+ la_br &aib_zero
+ bne_a2,a3
+
+ # if ((end - 24)->kind != TOK_RBRACE) return 0
+ addi_t0,a1,neg24
+ ld_a2,t0,0
+ li_a3 TOK_RBRACE
+ la_br &aib_zero
+ bne_a2,a3
+
+ # walk tokens tracking depth; if depth hits 0 before reaching end-24,
+ # the leading LBRACE doesn't match the trailing RBRACE -> return 0.
+ # t0 = tok, t1 = depth, t2 = last_tok = end - 24
+ mov_t0,a0
+ li_t1 %0 %0
+ addi_t2,a1,neg24
+:aib_loop
+ la_br &aib_done
+ beq_t0,a1
+ ld_a2,t0,0
+ li_a3 TOK_LBRACE
+ la_br &aib_incr
+ beq_a2,a3
+ li_a3 TOK_RBRACE
+ la_br &aib_decr
+ beq_a2,a3
+ # non-brace: advance
+ addi_t0,t0,24
+ la_br &aib_loop
+ b
+:aib_incr
+ addi_t1,t1,1
+ addi_t0,t0,24
+ la_br &aib_loop
+ b
+:aib_decr
+ addi_t1,t1,neg1
+ # if (depth == 0 && tok != end - 24) -> not wrapping
+ la_br &aib_decr_skip
+ bnez_t1
+ la_br &aib_zero
+ bne_t0,t2
+:aib_decr_skip
+ addi_t0,t0,24
+ la_br &aib_loop
+ b
+:aib_done
+ # return (depth == 0) ? 1 : 0
+ la_br &aib_zero
+ bnez_t1
+ li_a0 %1 %0
+ ret
+:aib_zero
+ li_a0 %0 %0
+ ret
+
## copy_arg_tokens_to_pool(a0=arg_start, a1=arg_end) -> void (fatal if empty)
## Non-leaf (calls copy_span_to_pool). Empty arg is an error.
+## If the span is wrapped in a matching outer { ... } pair, strip the outer
+## braces before copying; an empty inner span is a no-op.
:copy_arg_tokens_to_pool
- enter_0
+ enter_16
# if (arg_start == arg_end) fatal
la_br &err_bad_macro_header
beq_a0,a1
+ # spill a0/a1 so arg_is_braced can clobber regs
+ st_a0,sp,16
+ st_a1,sp,24
+ la_br &arg_is_braced
+ call
+ la_br &catp_plain
+ beqz_a0
+ # braced: strip outer braces (start+24, end-24)
+ ld_a0,sp,16
+ ld_a1,sp,24
+ addi_a0,a0,24
+ addi_a1,a1,neg24
+ la_br &catp_done
+ beq_a0,a1
+ la_br ©_span_to_pool
+ call
+ la_br &catp_done
+ b
+:catp_plain
+ ld_a0,sp,16
+ ld_a1,sp,24
la_br ©_span_to_pool
call
+:catp_done
leave
ret
## copy_paste_arg_to_pool(a0=arg_start, a1=arg_end) -> void (fatal unless len 1)
## Enforces the single-token-argument rule for params adjacent to ##.
+## Braced args are rejected — pasting onto a block is nonsense.
:copy_paste_arg_to_pool
- enter_0
+ enter_16
+ # spill a0/a1 for the arg_is_braced call
+ st_a0,sp,16
+ st_a1,sp,24
+ la_br &arg_is_braced
+ call
+ la_br &err_bad_macro_header
+ bnez_a0
+ ld_a0,sp,16
+ ld_a1,sp,24
# if ((arg_end - arg_start) != 24) fatal
sub_a2,a1,a0
li_a3 M1PP_TOK_SIZE
@@ -4679,6 +4877,11 @@ DEFINE EXPR_INVALID 1200000000000000
li_a1 %15 %0
la_br &fatal
b
+:err_unbalanced_braces
+ la_a0 &msg_unbalanced_braces
+ li_a1 %17 %0
+ la_br &fatal
+ b
## fatal(a0=msg_ptr, a1=msg_len): writes "m1pp: <msg>\n" to stderr, exits 1.
## Saves args across the three syscalls since a0..a3 are caller-saved.
@@ -4725,6 +4928,8 @@ DEFINE EXPR_INVALID 1200000000000000
:const_lparen "("
:const_rparen ")"
:const_comma ","
+:const_lbrace "{"
+:const_rbrace "}"
:const_bang "!"
:const_at "@"
:const_pct "%"
@@ -4774,6 +4979,7 @@ DEFINE EXPR_INVALID 1200000000000000
:msg_too_many_macros "too many macros"
:msg_macro_body_overflow "macro body overflow"
:msg_not_implemented "not implemented"
+:msg_unbalanced_braces "unbalanced braces"
## --- BSS ---------------------------------------------------------------------
## Placed before :ELF_end so filesz/memsz (which this ELF header sets equal)
@@ -4912,6 +5118,8 @@ ZERO8
ZERO8
:pa_limit
ZERO8
+:pa_brace_depth
+ZERO8
:emt_call_tok
ZERO8
:emt_limit
diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c
@@ -82,7 +82,9 @@ enum {
TOK_LPAREN,
TOK_RPAREN,
TOK_COMMA,
- TOK_PASTE
+ TOK_PASTE,
+ TOK_LBRACE,
+ TOK_RBRACE
};
enum ExprOp {
@@ -308,6 +310,22 @@ static int lex_source(const char *src)
i++;
continue;
}
+ if (src[i] == '{') {
+ if (!push_token(source_tokens, &source_count, MAX_TOKENS,
+ TOK_LBRACE, (struct TextSpan){src + i, 1})) {
+ return 0;
+ }
+ i++;
+ continue;
+ }
+ if (src[i] == '}') {
+ if (!push_token(source_tokens, &source_count, MAX_TOKENS,
+ TOK_RBRACE, (struct TextSpan){src + i, 1})) {
+ return 0;
+ }
+ i++;
+ continue;
+ }
start = i;
while (src[i] != '\0' &&
@@ -318,6 +336,8 @@ static int lex_source(const char *src)
src[i] != '(' &&
src[i] != ')' &&
src[i] != ',' &&
+ src[i] != '{' &&
+ src[i] != '}' &&
!(src[i] == '#' && src[i + 1] == '#')) {
i++;
}
@@ -379,6 +399,9 @@ static int emit_newline(void)
static int emit_token(const struct Token *tok)
{
+ if (tok->kind == TOK_LBRACE || tok->kind == TOK_RBRACE) {
+ return 1;
+ }
if (output_need_space) {
if (output_used + 1 >= MAX_OUTPUT) {
return fail("output overflow");
@@ -548,6 +571,7 @@ static int parse_args(struct Token *lparen, struct Token *limit)
struct Token *tok = lparen + 1;
struct Token *arg_start = tok;
int depth = 1;
+ int brace_depth = 0;
int arg_index = 0;
while (tok < limit) {
@@ -559,6 +583,9 @@ static int parse_args(struct Token *lparen, struct Token *limit)
if (tok->kind == TOK_RPAREN) {
depth--;
if (depth == 0) {
+ if (brace_depth != 0) {
+ return fail("unbalanced braces");
+ }
if (arg_start == tok && arg_index == 0) {
arg_count = 0;
} else {
@@ -575,7 +602,20 @@ static int parse_args(struct Token *lparen, struct Token *limit)
tok++;
continue;
}
- if (tok->kind == TOK_COMMA && depth == 1) {
+ if (tok->kind == TOK_LBRACE) {
+ brace_depth++;
+ tok++;
+ continue;
+ }
+ if (tok->kind == TOK_RBRACE) {
+ if (brace_depth <= 0) {
+ return fail("unbalanced braces");
+ }
+ brace_depth--;
+ tok++;
+ continue;
+ }
+ if (tok->kind == TOK_COMMA && depth == 1 && brace_depth == 0) {
if (arg_index >= MAX_PARAMS) {
return fail("too many args");
}
@@ -592,16 +632,54 @@ static int parse_args(struct Token *lparen, struct Token *limit)
return fail("unterminated macro call");
}
+static int arg_is_braced(struct TokenSpan span)
+{
+ struct Token *tok;
+ int depth;
+
+ if (span.end - span.start < 2) {
+ return 0;
+ }
+ if (span.start->kind != TOK_LBRACE ||
+ (span.end - 1)->kind != TOK_RBRACE) {
+ return 0;
+ }
+ depth = 0;
+ for (tok = span.start; tok < span.end; tok++) {
+ if (tok->kind == TOK_LBRACE) {
+ depth++;
+ } else if (tok->kind == TOK_RBRACE) {
+ depth--;
+ if (depth == 0 && tok != span.end - 1) {
+ return 0;
+ }
+ }
+ }
+ return depth == 0;
+}
+
static int copy_arg_tokens_to_pool(struct TokenSpan span)
{
if (span.start == span.end) {
return fail("bad macro argument");
}
+ if (arg_is_braced(span)) {
+ struct TokenSpan inner;
+ inner.start = span.start + 1;
+ inner.end = span.end - 1;
+ if (inner.start == inner.end) {
+ return 1;
+ }
+ return copy_span_to_pool(inner);
+ }
return copy_span_to_pool(span);
}
static int copy_paste_arg_to_pool(struct TokenSpan span)
{
+ if (arg_is_braced(span)) {
+ return fail("bad macro argument");
+ }
if (span.end - span.start != 1) {
return fail("bad macro argument");
}
diff --git a/tests/m1pp/12-braced-args.M1pp b/tests/m1pp/12-braced-args.M1pp
@@ -0,0 +1,50 @@
+# Braced block arguments (§2 of M1PP-EXT):
+# - { ... } groups tokens into one arg, protecting commas inside
+# - outer { ... } is stripped when the arg span begins with LBRACE and
+# ends with its matching RBRACE
+# - nesting: { { ... } } — outer is stripped, inner braces pass through
+# (emit_token is a no-op on brace kinds, so inner braces never reach
+# output either)
+# - braces are independent of parens: st(r0, r3, 0) inside a braced arg
+# is a single group, its commas are NOT arg separators
+# - plain (non-braced) args still work unchanged
+
+%macro IF_EQ_ELSE(a, b, t, e)
+(= a b) t e
+%endm
+
+%macro WHILE_NEZ(r, body)
+:loop__
+body
+bnez r :loop__
+%endm
+
+%macro ID(x)
+x
+%endm
+
+# body with commas inside a brace — st(r0, r3, 0) carries two commas that
+# MUST NOT split the outer call into more than 4 args
+%IF_EQ_ELSE(r1, r2, {
+li(r0, 5)
+st(r0, r3, 0)
+}, {
+li(r0, 0)
+})
+
+# nested braces — inner { inner_block } survives outer strip but its
+# braces are no-op'd at emit time, so only the tokens appear
+%WHILE_NEZ(rx, {
+addi(rx, rx, -1)
+{ inner_block }
+})
+
+# plain arg with no braces still works (sanity)
+%ID(plain_token)
+
+# arg that opens with { but does not close at the outer level is NOT
+# stripped: { x } tail — the { and } are emitted as nothing (emit_token
+# no-op) but the surrounding tokens pass through verbatim
+%ID({ x } tail)
+
+END
diff --git a/tests/m1pp/12-braced-args.expected b/tests/m1pp/12-braced-args.expected
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+( = r1 r2 )
+li ( r0 , 5 )
+st ( r0 , r3 , 0 )
+
+li ( r0 , 0 )
+
+
+
+
+
+:loop__
+
+addi ( rx , rx , -1 )
+inner_block
+
+bnez rx :loop__
+
+
+
+plain_token
+
+
+
+
+
+x tail
+
+
+END
diff --git a/tests/m1pp/_12-braced-malformed.M1pp b/tests/m1pp/_12-braced-malformed.M1pp
@@ -0,0 +1,17 @@
+# Malformed: unmatched `{` inside a macro call.
+#
+# Expected behavior: the m1pp expander MUST exit non-zero. parse_args detects
+# that the outer RPAREN closes the call while brace_depth is still > 0 and
+# reports "unbalanced braces".
+#
+# No `.expected` file is needed — the leading underscore in the filename
+# causes m1pp/test.sh to skip this fixture. It is verified manually via the
+# verification block in the §2 implementation notes.
+
+%macro F(a, b)
+a b
+%endm
+
+%F(first, { never_closed )
+
+END