boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit 6bb13576cb3eaa4c6bf33ad1104009f019b79512
parent 04089609068b2758302653f0bf1488525c8eb499
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 17:22:45 -0700

Merge feature: braced block arguments (§2)

Diffstat:
Mm1pp/m1pp.M1 | 218+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mm1pp/m1pp.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Atests/m1pp/12-braced-args.M1pp | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/m1pp/12-braced-args.expected | 45+++++++++++++++++++++++++++++++++++++++++++++
Atests/m1pp/_12-braced-malformed.M1pp | 17+++++++++++++++++
5 files changed, 405 insertions(+), 7 deletions(-)

diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1 @@ -50,6 +50,8 @@ DEFINE TOK_LPAREN 0300000000000000 DEFINE TOK_RPAREN 0400000000000000 DEFINE TOK_COMMA 0500000000000000 DEFINE TOK_PASTE 0600000000000000 +DEFINE TOK_LBRACE 0700000000000000 +DEFINE TOK_RBRACE 0800000000000000 ## Token record stride (kind + text_ptr + text_len). Advance a Token* by this. DEFINE M1PP_TOK_SIZE 1800000000000000 @@ -432,6 +434,12 @@ DEFINE EXPR_INVALID 1200000000000000 li_a1 %44 %0 la_br &lex_comma beq_a0,a1 + li_a1 %123 %0 + la_br &lex_lbrace + beq_a0,a1 + li_a1 %125 %0 + la_br &lex_rbrace + beq_a0,a1 # otherwise: word la_br &lex_word @@ -600,6 +608,30 @@ DEFINE EXPR_INVALID 1200000000000000 li_a2 %1 %0 la_br &push_source_token call + la_br &lex_advance_one_then_loop + b +:lex_lbrace + la_a0 &const_lbrace + li_a1 %1 %0 + la_br &append_text + call + mov_a1,a0 + li_a0 TOK_LBRACE + li_a2 %1 %0 + la_br &push_source_token + call + la_br &lex_advance_one_then_loop + b +:lex_rbrace + la_a0 &const_rbrace + li_a1 %1 %0 + la_br &append_text + call + mov_a1,a0 + li_a0 TOK_RBRACE + li_a2 %1 %0 + la_br &push_source_token + call :lex_advance_one_then_loop # lex_ptr++ la_a0 &lex_ptr @@ -651,6 +683,12 @@ DEFINE EXPR_INVALID 1200000000000000 li_a1 %44 %0 la_br &lex_word_finish beq_a0,a1 + li_a1 %123 %0 + la_br &lex_word_finish + beq_a0,a1 + li_a1 %125 %0 + la_br &lex_word_finish + beq_a0,a1 # else lex_ptr++ addi_t0,t0,1 la_br &lex_word_scan @@ -716,6 +754,15 @@ DEFINE EXPR_INVALID 1200000000000000 ## emit_token(a0=token_ptr). Leaf. :emit_token + # brace tokens are no-ops at emit time (belt-and-braces with arg-strip) + ld_t0,a0,0 + li_t1 TOK_LBRACE + la_br &emit_token_skip + beq_t0,t1 + li_t1 TOK_RBRACE + la_br &emit_token_skip + beq_t0,t1 + # if (output_need_space) emit ' ' (skip the space for the first token on a line) la_a1 &output_need_space ld_t0,a1,0 @@ -770,6 +817,8 @@ DEFINE EXPR_INVALID 1200000000000000 li_a1 %1 %0 st_a1,a0,0 ret +:emit_token_skip + ret ## --- Main processor ---------------------------------------------------------- ## Stream-driven loop. Pushes source_tokens as the initial stream, then drives @@ -1501,7 +1550,7 @@ DEFINE EXPR_INVALID 1200000000000000 ## ## Fatal on: > 16 args, reaching limit without matching RPAREN. :parse_args - # tok = lparen + 1; arg_start = tok; depth = 1; arg_index = 0 + # tok = lparen + 1; arg_start = tok; depth = 1; arg_index = 0; brace_depth = 0 addi_a0,a0,24 la_a2 &pa_pos st_a0,a2,0 @@ -1515,6 +1564,9 @@ DEFINE EXPR_INVALID 1200000000000000 li_a2 %0 %0 la_a3 &pa_arg_index st_a2,a3,0 + li_a2 %0 %0 + la_a3 &pa_brace_depth + st_a2,a3,0 :pa_loop # if (tok >= limit) fatal unterminated @@ -1538,6 +1590,12 @@ DEFINE EXPR_INVALID 1200000000000000 li_a3 TOK_COMMA la_br &pa_maybe_comma beq_a2,a3 + li_a3 TOK_LBRACE + la_br &pa_lbrace + beq_a2,a3 + li_a3 TOK_RBRACE + la_br &pa_rbrace + beq_a2,a3 # default: tok++ addi_t0,t0,24 @@ -1573,7 +1631,12 @@ DEFINE EXPR_INVALID 1200000000000000 b :pa_rparen_close - # depth == 0: close out the call. + # depth == 0: if brace_depth != 0 -> unbalanced braces + la_a0 &pa_brace_depth + ld_t1,a0,0 + la_br &err_unbalanced_braces + bnez_t1 + # close out the call. # arg_start (BSS), arg_index (BSS), tok = current pos. la_a0 &pa_arg_start ld_a1,a0,0 @@ -1628,8 +1691,13 @@ DEFINE EXPR_INVALID 1200000000000000 li_a3 %1 %0 la_br &pa_default_advance bne_t1,a3 + # and only when brace_depth == 0 + la_a0 &pa_brace_depth + ld_t1,a0,0 + la_br &pa_default_advance + bnez_t1 - # depth == 1 split: append (arg_start, tok) at arg_index + # depth == 1 && brace_depth == 0 split: append (arg_start, tok) at arg_index la_a0 &pa_arg_index ld_a2,a0,0 li_a3 M1PP_MAX_PARAMS @@ -1668,6 +1736,33 @@ DEFINE EXPR_INVALID 1200000000000000 la_br &pa_loop b +:pa_lbrace + # brace_depth++; tok++ + la_a0 &pa_brace_depth + ld_t1,a0,0 + addi_t1,t1,1 + st_t1,a0,0 + addi_t0,t0,24 + la_a0 &pa_pos + st_t0,a0,0 + la_br &pa_loop + b + +:pa_rbrace + # if (brace_depth <= 0) fatal unbalanced braces + la_a0 &pa_brace_depth + ld_t1,a0,0 + la_br &err_unbalanced_braces + beqz_t1 + # brace_depth--; tok++ + addi_t1,t1,neg1 + st_t1,a0,0 + addi_t0,t0,24 + la_a0 &pa_pos + st_t0,a0,0 + la_br &pa_loop + b + ## ============================================================================ ## --- Macro lookup + call expansion ------------------------------------------ ## ============================================================================ @@ -1844,22 +1939,125 @@ DEFINE EXPR_INVALID 1200000000000000 li_a0 %0 %0 ret +## arg_is_braced(a0=start, a1=end) -> a0 = 1 if the span wraps in a matching +## outer { ... } pair (outer RBRACE is the same-level mate of the leading +## LBRACE), else 0. Leaf. +:arg_is_braced + # if (end - start < 2 tokens = 48 bytes) return 0 + sub_a2,a1,a0 + li_a3 %48 %0 + la_br &aib_zero + blt_a2,a3 + + # if (start->kind != TOK_LBRACE) return 0 + ld_a2,a0,0 + li_a3 TOK_LBRACE + la_br &aib_zero + bne_a2,a3 + + # if ((end - 24)->kind != TOK_RBRACE) return 0 + addi_t0,a1,neg24 + ld_a2,t0,0 + li_a3 TOK_RBRACE + la_br &aib_zero + bne_a2,a3 + + # walk tokens tracking depth; if depth hits 0 before reaching end-24, + # the leading LBRACE doesn't match the trailing RBRACE -> return 0. + # t0 = tok, t1 = depth, t2 = last_tok = end - 24 + mov_t0,a0 + li_t1 %0 %0 + addi_t2,a1,neg24 +:aib_loop + la_br &aib_done + beq_t0,a1 + ld_a2,t0,0 + li_a3 TOK_LBRACE + la_br &aib_incr + beq_a2,a3 + li_a3 TOK_RBRACE + la_br &aib_decr + beq_a2,a3 + # non-brace: advance + addi_t0,t0,24 + la_br &aib_loop + b +:aib_incr + addi_t1,t1,1 + addi_t0,t0,24 + la_br &aib_loop + b +:aib_decr + addi_t1,t1,neg1 + # if (depth == 0 && tok != end - 24) -> not wrapping + la_br &aib_decr_skip + bnez_t1 + la_br &aib_zero + bne_t0,t2 +:aib_decr_skip + addi_t0,t0,24 + la_br &aib_loop + b +:aib_done + # return (depth == 0) ? 1 : 0 + la_br &aib_zero + bnez_t1 + li_a0 %1 %0 + ret +:aib_zero + li_a0 %0 %0 + ret + ## copy_arg_tokens_to_pool(a0=arg_start, a1=arg_end) -> void (fatal if empty) ## Non-leaf (calls copy_span_to_pool). Empty arg is an error. +## If the span is wrapped in a matching outer { ... } pair, strip the outer +## braces before copying; an empty inner span is a no-op. :copy_arg_tokens_to_pool - enter_0 + enter_16 # if (arg_start == arg_end) fatal la_br &err_bad_macro_header beq_a0,a1 + # spill a0/a1 so arg_is_braced can clobber regs + st_a0,sp,16 + st_a1,sp,24 + la_br &arg_is_braced + call + la_br &catp_plain + beqz_a0 + # braced: strip outer braces (start+24, end-24) + ld_a0,sp,16 + ld_a1,sp,24 + addi_a0,a0,24 + addi_a1,a1,neg24 + la_br &catp_done + beq_a0,a1 + la_br &copy_span_to_pool + call + la_br &catp_done + b +:catp_plain + ld_a0,sp,16 + ld_a1,sp,24 la_br &copy_span_to_pool call +:catp_done leave ret ## copy_paste_arg_to_pool(a0=arg_start, a1=arg_end) -> void (fatal unless len 1) ## Enforces the single-token-argument rule for params adjacent to ##. +## Braced args are rejected — pasting onto a block is nonsense. :copy_paste_arg_to_pool - enter_0 + enter_16 + # spill a0/a1 for the arg_is_braced call + st_a0,sp,16 + st_a1,sp,24 + la_br &arg_is_braced + call + la_br &err_bad_macro_header + bnez_a0 + ld_a0,sp,16 + ld_a1,sp,24 # if ((arg_end - arg_start) != 24) fatal sub_a2,a1,a0 li_a3 M1PP_TOK_SIZE @@ -4679,6 +4877,11 @@ DEFINE EXPR_INVALID 1200000000000000 li_a1 %15 %0 la_br &fatal b +:err_unbalanced_braces + la_a0 &msg_unbalanced_braces + li_a1 %17 %0 + la_br &fatal + b ## fatal(a0=msg_ptr, a1=msg_len): writes "m1pp: <msg>\n" to stderr, exits 1. ## Saves args across the three syscalls since a0..a3 are caller-saved. @@ -4725,6 +4928,8 @@ DEFINE EXPR_INVALID 1200000000000000 :const_lparen "(" :const_rparen ")" :const_comma "," +:const_lbrace "{" +:const_rbrace "}" :const_bang "!" :const_at "@" :const_pct "%" @@ -4774,6 +4979,7 @@ DEFINE EXPR_INVALID 1200000000000000 :msg_too_many_macros "too many macros" :msg_macro_body_overflow "macro body overflow" :msg_not_implemented "not implemented" +:msg_unbalanced_braces "unbalanced braces" ## --- BSS --------------------------------------------------------------------- ## Placed before :ELF_end so filesz/memsz (which this ELF header sets equal) @@ -4912,6 +5118,8 @@ ZERO8 ZERO8 :pa_limit ZERO8 +:pa_brace_depth +ZERO8 :emt_call_tok ZERO8 :emt_limit diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c @@ -82,7 +82,9 @@ enum { TOK_LPAREN, TOK_RPAREN, TOK_COMMA, - TOK_PASTE + TOK_PASTE, + TOK_LBRACE, + TOK_RBRACE }; enum ExprOp { @@ -308,6 +310,22 @@ static int lex_source(const char *src) i++; continue; } + if (src[i] == '{') { + if (!push_token(source_tokens, &source_count, MAX_TOKENS, + TOK_LBRACE, (struct TextSpan){src + i, 1})) { + return 0; + } + i++; + continue; + } + if (src[i] == '}') { + if (!push_token(source_tokens, &source_count, MAX_TOKENS, + TOK_RBRACE, (struct TextSpan){src + i, 1})) { + return 0; + } + i++; + continue; + } start = i; while (src[i] != '\0' && @@ -318,6 +336,8 @@ static int lex_source(const char *src) src[i] != '(' && src[i] != ')' && src[i] != ',' && + src[i] != '{' && + src[i] != '}' && !(src[i] == '#' && src[i + 1] == '#')) { i++; } @@ -379,6 +399,9 @@ static int emit_newline(void) static int emit_token(const struct Token *tok) { + if (tok->kind == TOK_LBRACE || tok->kind == TOK_RBRACE) { + return 1; + } if (output_need_space) { if (output_used + 1 >= MAX_OUTPUT) { return fail("output overflow"); @@ -548,6 +571,7 @@ static int parse_args(struct Token *lparen, struct Token *limit) struct Token *tok = lparen + 1; struct Token *arg_start = tok; int depth = 1; + int brace_depth = 0; int arg_index = 0; while (tok < limit) { @@ -559,6 +583,9 @@ static int parse_args(struct Token *lparen, struct Token *limit) if (tok->kind == TOK_RPAREN) { depth--; if (depth == 0) { + if (brace_depth != 0) { + return fail("unbalanced braces"); + } if (arg_start == tok && arg_index == 0) { arg_count = 0; } else { @@ -575,7 +602,20 @@ static int parse_args(struct Token *lparen, struct Token *limit) tok++; continue; } - if (tok->kind == TOK_COMMA && depth == 1) { + if (tok->kind == TOK_LBRACE) { + brace_depth++; + tok++; + continue; + } + if (tok->kind == TOK_RBRACE) { + if (brace_depth <= 0) { + return fail("unbalanced braces"); + } + brace_depth--; + tok++; + continue; + } + if (tok->kind == TOK_COMMA && depth == 1 && brace_depth == 0) { if (arg_index >= MAX_PARAMS) { return fail("too many args"); } @@ -592,16 +632,54 @@ static int parse_args(struct Token *lparen, struct Token *limit) return fail("unterminated macro call"); } +static int arg_is_braced(struct TokenSpan span) +{ + struct Token *tok; + int depth; + + if (span.end - span.start < 2) { + return 0; + } + if (span.start->kind != TOK_LBRACE || + (span.end - 1)->kind != TOK_RBRACE) { + return 0; + } + depth = 0; + for (tok = span.start; tok < span.end; tok++) { + if (tok->kind == TOK_LBRACE) { + depth++; + } else if (tok->kind == TOK_RBRACE) { + depth--; + if (depth == 0 && tok != span.end - 1) { + return 0; + } + } + } + return depth == 0; +} + static int copy_arg_tokens_to_pool(struct TokenSpan span) { if (span.start == span.end) { return fail("bad macro argument"); } + if (arg_is_braced(span)) { + struct TokenSpan inner; + inner.start = span.start + 1; + inner.end = span.end - 1; + if (inner.start == inner.end) { + return 1; + } + return copy_span_to_pool(inner); + } return copy_span_to_pool(span); } static int copy_paste_arg_to_pool(struct TokenSpan span) { + if (arg_is_braced(span)) { + return fail("bad macro argument"); + } if (span.end - span.start != 1) { return fail("bad macro argument"); } diff --git a/tests/m1pp/12-braced-args.M1pp b/tests/m1pp/12-braced-args.M1pp @@ -0,0 +1,50 @@ +# Braced block arguments (§2 of M1PP-EXT): +# - { ... } groups tokens into one arg, protecting commas inside +# - outer { ... } is stripped when the arg span begins with LBRACE and +# ends with its matching RBRACE +# - nesting: { { ... } } — outer is stripped, inner braces pass through +# (emit_token is a no-op on brace kinds, so inner braces never reach +# output either) +# - braces are independent of parens: st(r0, r3, 0) inside a braced arg +# is a single group, its commas are NOT arg separators +# - plain (non-braced) args still work unchanged + +%macro IF_EQ_ELSE(a, b, t, e) +(= a b) t e +%endm + +%macro WHILE_NEZ(r, body) +:loop__ +body +bnez r :loop__ +%endm + +%macro ID(x) +x +%endm + +# body with commas inside a brace — st(r0, r3, 0) carries two commas that +# MUST NOT split the outer call into more than 4 args +%IF_EQ_ELSE(r1, r2, { +li(r0, 5) +st(r0, r3, 0) +}, { +li(r0, 0) +}) + +# nested braces — inner { inner_block } survives outer strip but its +# braces are no-op'd at emit time, so only the tokens appear +%WHILE_NEZ(rx, { +addi(rx, rx, -1) +{ inner_block } +}) + +# plain arg with no braces still works (sanity) +%ID(plain_token) + +# arg that opens with { but does not close at the outer level is NOT +# stripped: { x } tail — the { and } are emitted as nothing (emit_token +# no-op) but the surrounding tokens pass through verbatim +%ID({ x } tail) + +END diff --git a/tests/m1pp/12-braced-args.expected b/tests/m1pp/12-braced-args.expected @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + +( = r1 r2 ) +li ( r0 , 5 ) +st ( r0 , r3 , 0 ) + +li ( r0 , 0 ) + + + + + +:loop__ + +addi ( rx , rx , -1 ) +inner_block + +bnez rx :loop__ + + + +plain_token + + + + + +x tail + + +END diff --git a/tests/m1pp/_12-braced-malformed.M1pp b/tests/m1pp/_12-braced-malformed.M1pp @@ -0,0 +1,17 @@ +# Malformed: unmatched `{` inside a macro call. +# +# Expected behavior: the m1pp expander MUST exit non-zero. parse_args detects +# that the outer RPAREN closes the call while brace_depth is still > 0 and +# reports "unbalanced braces". +# +# No `.expected` file is needed — the leading underscore in the filename +# causes m1pp/test.sh to skip this fixture. It is verified manually via the +# verification block in the §2 implementation notes. + +%macro F(a, b) +a b +%endm + +%F(first, { never_closed ) + +END