boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit 51e51fb86495f1a6cbfb8fde625b5db1eab2dac1
parent 6def9f0029040c6df23519bf58d44b45de20a5d6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 17:22:38 -0700

Merge feature: %str stringification builtin (§7)

Diffstat:
Mm1pp/m1pp.M1 | 154++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mm1pp/m1pp.c | 44+++++++++++++++++++++++++++++++++++++++++++-
Atests/m1pp/14-str-builtin.M1pp | 17+++++++++++++++++
Atests/m1pp/14-str-builtin.expected | 17+++++++++++++++++
Atests/m1pp/14-str-paste.M1pp | 13+++++++++++++
Atests/m1pp/14-str-paste.expected | 13+++++++++++++
Atests/m1pp/_14-str-malformed.M1pp | 8++++++++
7 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1 @@ -895,7 +895,7 @@ DEFINE EXPR_INVALID 1200000000000000 la_br &proc_check_macro bne_a1,a2 - # try the five builtin names: ! @ % $ %select + # try the six builtin names: ! @ % $ %select %str mov_a0,t0 la_a1 &const_bang li_a2 %1 %0 @@ -931,6 +931,13 @@ DEFINE EXPR_INVALID 1200000000000000 call la_br &proc_do_builtin bnez_a0 + ld_a0,sp,24 + la_a1 &const_str + li_a2 %4 %0 + la_br &tok_eq_const + call + la_br &proc_do_builtin + bnez_a0 la_br &proc_check_macro b @@ -4054,6 +4061,17 @@ DEFINE EXPR_INVALID 1200000000000000 la_br &ebc_select bnez_a0 + # if tok_eq_const(tok, "%str", 4) -> str path + la_a0 &ebc_stream + ld_a0,a0,0 + ld_a0,a0,16 + la_a1 &const_str + li_a2 %4 %0 + la_br &tok_eq_const + call + la_br &ebc_str + bnez_a0 + # else: fatal la_br &err_bad_macro_header b @@ -4245,6 +4263,125 @@ DEFINE EXPR_INVALID 1200000000000000 leave ret +## %str(IDENT): stringify a single WORD argument into a TOK_STRING literal. +## Validation: arg_count == 1, arg span length == 1 token, and that token's +## kind is TOK_WORD. Output: a freshly-allocated text span built as +## `"` + arg.text + `"` (len = arg.text.len + 2) and a synthesized TOK_STRING +## pointing at it. Stream pos advances to call_end_pos; line_start = 0. +:ebc_str + # require arg_count == 1 + la_a0 &arg_count + ld_t0,a0,0 + li_t1 %1 %0 + la_br &err_bad_macro_header + bne_t0,t1 + + # snapshot arg_starts[0] / arg_ends[0] + la_a0 &arg_starts + ld_t0,a0,0 + la_a1 &ebc_arg0_start + st_t0,a1,0 + la_a0 &arg_ends + ld_t0,a0,0 + la_a1 &ebc_arg0_end + st_t0,a1,0 + + # require arg0_end - arg0_start == 24 (exactly one token) + la_a0 &ebc_arg0_start + ld_t0,a0,0 + la_a1 &ebc_arg0_end + ld_t1,a1,0 + sub_t2,t1,t0 + li_a2 %24 %0 + la_br &err_bad_macro_header + bne_t2,a2 + + # require arg_tok->kind == TOK_WORD + ld_a3,t0,0 + li_a2 TOK_WORD + la_br &err_bad_macro_header + bne_a3,a2 + + # orig_len = arg_tok->text.len; out_len = orig_len + 2 + # fatal if out_len > 256 (scratch cap; text_buf cap checked by append_text) + ld_t1,t0,16 + la_a0 &ebc_str_orig_len + st_t1,a0,0 + addi_t2,t1,2 + la_a0 &ebc_str_out_len + st_t2,a0,0 + li_a1 %256 %0 + la_br &err_text_overflow + blt_a1,t2 + + # scratch[0] = '"' + la_t2 &ebc_str_scratch + li_a3 %34 %0 + sb_a3,t2,0 + + # copy arg_tok->text bytes into scratch[1..1+orig_len) + # src = arg_tok->text.ptr; i = 0 + la_a0 &ebc_arg0_start + ld_a0,a0,0 + ld_t0,a0,8 + la_a1 &ebc_str_orig_len + ld_t1,a1,0 + li_a0 %0 %0 +:ebc_str_copy_loop + la_br &ebc_str_copy_done + beq_a0,t1 + add_a1,t0,a0 + lb_a1,a1,0 + addi_a2,a0,1 + add_a2,t2,a2 + sb_a1,a2,0 + addi_a0,a0,1 + la_br &ebc_str_copy_loop + b +:ebc_str_copy_done + + # scratch[1 + orig_len] = '"' + la_t2 &ebc_str_scratch + la_a1 &ebc_str_orig_len + ld_a1,a1,0 + addi_a1,a1,1 + add_a0,t2,a1 + li_a3 %34 %0 + sb_a3,a0,0 + + # text_ptr = append_text(&scratch, out_len) + la_a0 &ebc_str_scratch + la_a1 &ebc_str_out_len + ld_a1,a1,0 + la_br &append_text + call + + # ebc_str_token = { TOK_STRING, text_ptr, out_len } + la_a2 &ebc_str_token + li_a3 TOK_STRING + st_a3,a2,0 + st_a0,a2,8 + la_a1 &ebc_str_out_len + ld_a1,a1,0 + st_a1,a2,16 + + # stream->pos = ebc_call_end_pos; stream->line_start = 0 + la_a0 &ebc_stream + ld_a0,a0,0 + la_a1 &ebc_call_end_pos + ld_t0,a1,0 + st_t0,a0,16 + li_t1 %0 %0 + st_t1,a0,24 + + # emit_token(&ebc_str_token) + la_a0 &ebc_str_token + la_br &emit_token + call + + leave + ret + ## --- Error paths ------------------------------------------------------------- ## Each err_* loads a (msg, len) pair for fatal; fatal writes "m1pp: <msg>\n" ## to stderr and exits 1. Error labels are branched to from range/overflow @@ -4371,6 +4508,7 @@ DEFINE EXPR_INVALID 1200000000000000 :const_pct "%" :const_dlr "$" :const_select "%select" +:const_str "%str" ## Operator strings for expr_op_code. Each is a raw byte literal; lengths ## are passed separately to tok_eq_const. "<=" must be tested before "<" @@ -4631,6 +4769,20 @@ ZERO8 :ebc_mark ZERO8 +## %str builtin scratch. ebc_str_orig_len / ebc_str_out_len spill the +## argument text length and its +2 output length across append_text; +## ebc_str_token is the synthesized TOK_STRING { kind, text_ptr, text_len } +## handed to emit_token; ebc_str_scratch is a 256-byte assembly buffer +## (matches paste_scratch / M0's quoted-literal cap). +:ebc_str_orig_len +ZERO8 +:ebc_str_out_len +ZERO8 +:ebc_str_token +ZERO8 ZERO8 ZERO8 +:ebc_str_scratch +ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 + ## arg_starts[16] / arg_ends[16]: 16 × 8 = 128 bytes each, i.e. 4 ZERO32. ## Written by parse_args; read by expand_macro_tokens and expand_builtin_call. :arg_starts diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c @@ -13,6 +13,7 @@ * %(expr) evaluate an integer S-expression, emit LE 32-bit hex * $(expr) evaluate an integer S-expression, emit LE 64-bit hex * %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e + * %str(IDENT) stringify a single WORD token into a "..."-quoted literal * * Expression syntax is intentionally Lisp-shaped: * atoms: decimal or 0x-prefixed integer literals @@ -1189,6 +1190,46 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok) return push_pool_stream_from_mark(mark); } + if (token_text_eq(tok, "%str")) { + struct Token *arg_tok; + struct Token *end_pos; + struct Token out_tok; + char *text_ptr; + int orig_len; + int out_len; + + if (arg_count != 1) { + return fail("bad builtin"); + } + if (arg_ends[0] - arg_starts[0] != 1) { + return fail("bad builtin"); + } + arg_tok = arg_starts[0]; + if (arg_tok->kind != TOK_WORD) { + return fail("bad builtin"); + } + end_pos = call_end_pos; + + orig_len = arg_tok->text.len; + out_len = orig_len + 2; + if (text_used + out_len + 1 > MAX_TEXT) { + return fail("text overflow"); + } + text_ptr = text_buf + text_used; + text_buf[text_used++] = '"'; + memcpy(text_buf + text_used, arg_tok->text.ptr, (size_t)orig_len); + text_used += orig_len; + text_buf[text_used++] = '"'; + text_buf[text_used++] = '\0'; + + out_tok.kind = TOK_STRING; + out_tok.text.ptr = text_ptr; + out_tok.text.len = out_len; + s->pos = end_pos; + s->line_start = 0; + return emit_token(&out_tok); + } + return fail("bad builtin"); } @@ -1252,7 +1293,8 @@ static int process_tokens(void) token_text_eq(tok, "@") || token_text_eq(tok, "%") || token_text_eq(tok, "$") || - token_text_eq(tok, "%select"))) { + token_text_eq(tok, "%select") || + token_text_eq(tok, "%str"))) { if (!expand_builtin_call(s, tok)) { return 0; } diff --git a/tests/m1pp/14-str-builtin.M1pp b/tests/m1pp/14-str-builtin.M1pp @@ -0,0 +1,17 @@ +# Phase 14 %str stringification builtin. +# - %str(IDENT) wraps the identifier text in double quotes +# - result is a TOK_STRING, byte-identical to a hand-written literal + +%macro quoteit(name) +%str(name) +%endm + +%quoteit(hello) +%quoteit(foo_bar) +%quoteit(a) + +# Control: hand-written literals must match the macro-generated form. +"hello" +"foo_bar" +"a" +END diff --git a/tests/m1pp/14-str-builtin.expected b/tests/m1pp/14-str-builtin.expected @@ -0,0 +1,17 @@ + + + + + +"hello" + +"foo_bar" + +"a" + + + +"hello" +"foo_bar" +"a" +END diff --git a/tests/m1pp/14-str-paste.M1pp b/tests/m1pp/14-str-paste.M1pp @@ -0,0 +1,13 @@ +# Phase 14 paste + stringify. +# - `##` joins word fragments: str_##n -> str_quote (TOK_WORD). +# - `%str(n)` wraps the same identifier in quotes (TOK_STRING). +# - Complementary operators: paste builds the label, %str builds the literal. + +%macro defsym(n) +:str_ ## n %str(n) +%endm + +%defsym(quote) +%defsym(if) +%defsym(begin) +END diff --git a/tests/m1pp/14-str-paste.expected b/tests/m1pp/14-str-paste.expected @@ -0,0 +1,13 @@ + + + + + + +:str_quote "quote" + +:str_if "if" + +:str_begin "begin" + +END diff --git a/tests/m1pp/_14-str-malformed.M1pp b/tests/m1pp/_14-str-malformed.M1pp @@ -0,0 +1,8 @@ +# Phase 14 %str malformed input. +# - Underscore-prefix => skipped by test.sh. +# - Expected outcome: m1pp exits non-zero. +# - %str takes exactly one single-token WORD argument. A multi-token +# argument (`a b`) must be rejected; so must an already-string arg +# (`"already_string"`). This fixture exercises the multi-token path. + +%str(a b)