Merge feature: %str stringification builtin (§7) - boot2

commit 51e51fb86495f1a6cbfb8fde625b5db1eab2dac1
parent 6def9f0029040c6df23519bf58d44b45de20a5d6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 17:22:38 -0700

Merge feature: %str stringification builtin (§7)

Diffstat:
M m1pp/m1pp.M1  | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M m1pp/m1pp.c  | 44 +++++++++++++++++++++++++++++++++++++++++++-
A tests/m1pp/14-str-builtin.M1pp  | 17 +++++++++++++++++
A tests/m1pp/14-str-builtin.expected  | 17 +++++++++++++++++
A tests/m1pp/14-str-paste.M1pp  | 13 +++++++++++++
A tests/m1pp/14-str-paste.expected  | 13 +++++++++++++
A tests/m1pp/_14-str-malformed.M1pp  | 8 ++++++++

7 files changed, 264 insertions(+), 2 deletions(-)
diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1
@@ -895,7 +895,7 @@ DEFINE EXPR_INVALID 1200000000000000
     la_br &proc_check_macro
     bne_a1,a2
 
-    # try the five builtin names: ! @ % $ %select
+    # try the six builtin names: ! @ % $ %select %str
     mov_a0,t0
     la_a1 &const_bang
     li_a2 %1 %0
@@ -931,6 +931,13 @@ DEFINE EXPR_INVALID 1200000000000000
     call
     la_br &proc_do_builtin
     bnez_a0
+    ld_a0,sp,24
+    la_a1 &const_str
+    li_a2 %4 %0
+    la_br &tok_eq_const
+    call
+    la_br &proc_do_builtin
+    bnez_a0
     la_br &proc_check_macro
     b
 
@@ -4054,6 +4061,17 @@ DEFINE EXPR_INVALID 1200000000000000
     la_br &ebc_select
     bnez_a0
 
+    # if tok_eq_const(tok, "%str", 4) -> str path
+    la_a0 &ebc_stream
+    ld_a0,a0,0
+    ld_a0,a0,16
+    la_a1 &const_str
+    li_a2 %4 %0
+    la_br &tok_eq_const
+    call
+    la_br &ebc_str
+    bnez_a0
+
     # else: fatal
     la_br &err_bad_macro_header
     b
@@ -4245,6 +4263,125 @@ DEFINE EXPR_INVALID 1200000000000000
     leave
     ret
 
+## %str(IDENT): stringify a single WORD argument into a TOK_STRING literal.
+## Validation: arg_count == 1, arg span length == 1 token, and that token's
+## kind is TOK_WORD. Output: a freshly-allocated text span built as
+## `"` + arg.text + `"` (len = arg.text.len + 2) and a synthesized TOK_STRING
+## pointing at it. Stream pos advances to call_end_pos; line_start = 0.
+:ebc_str
+    # require arg_count == 1
+    la_a0 &arg_count
+    ld_t0,a0,0
+    li_t1 %1 %0
+    la_br &err_bad_macro_header
+    bne_t0,t1
+
+    # snapshot arg_starts[0] / arg_ends[0]
+    la_a0 &arg_starts
+    ld_t0,a0,0
+    la_a1 &ebc_arg0_start
+    st_t0,a1,0
+    la_a0 &arg_ends
+    ld_t0,a0,0
+    la_a1 &ebc_arg0_end
+    st_t0,a1,0
+
+    # require arg0_end - arg0_start == 24 (exactly one token)
+    la_a0 &ebc_arg0_start
+    ld_t0,a0,0
+    la_a1 &ebc_arg0_end
+    ld_t1,a1,0
+    sub_t2,t1,t0
+    li_a2 %24 %0
+    la_br &err_bad_macro_header
+    bne_t2,a2
+
+    # require arg_tok->kind == TOK_WORD
+    ld_a3,t0,0
+    li_a2 TOK_WORD
+    la_br &err_bad_macro_header
+    bne_a3,a2
+
+    # orig_len = arg_tok->text.len; out_len = orig_len + 2
+    # fatal if out_len > 256 (scratch cap; text_buf cap checked by append_text)
+    ld_t1,t0,16
+    la_a0 &ebc_str_orig_len
+    st_t1,a0,0
+    addi_t2,t1,2
+    la_a0 &ebc_str_out_len
+    st_t2,a0,0
+    li_a1 %256 %0
+    la_br &err_text_overflow
+    blt_a1,t2
+
+    # scratch[0] = '"'
+    la_t2 &ebc_str_scratch
+    li_a3 %34 %0
+    sb_a3,t2,0
+
+    # copy arg_tok->text bytes into scratch[1..1+orig_len)
+    #   src = arg_tok->text.ptr; i = 0
+    la_a0 &ebc_arg0_start
+    ld_a0,a0,0
+    ld_t0,a0,8
+    la_a1 &ebc_str_orig_len
+    ld_t1,a1,0
+    li_a0 %0 %0
+:ebc_str_copy_loop
+    la_br &ebc_str_copy_done
+    beq_a0,t1
+    add_a1,t0,a0
+    lb_a1,a1,0
+    addi_a2,a0,1
+    add_a2,t2,a2
+    sb_a1,a2,0
+    addi_a0,a0,1
+    la_br &ebc_str_copy_loop
+    b
+:ebc_str_copy_done
+
+    # scratch[1 + orig_len] = '"'
+    la_t2 &ebc_str_scratch
+    la_a1 &ebc_str_orig_len
+    ld_a1,a1,0
+    addi_a1,a1,1
+    add_a0,t2,a1
+    li_a3 %34 %0
+    sb_a3,a0,0
+
+    # text_ptr = append_text(&scratch, out_len)
+    la_a0 &ebc_str_scratch
+    la_a1 &ebc_str_out_len
+    ld_a1,a1,0
+    la_br &append_text
+    call
+
+    # ebc_str_token = { TOK_STRING, text_ptr, out_len }
+    la_a2 &ebc_str_token
+    li_a3 TOK_STRING
+    st_a3,a2,0
+    st_a0,a2,8
+    la_a1 &ebc_str_out_len
+    ld_a1,a1,0
+    st_a1,a2,16
+
+    # stream->pos = ebc_call_end_pos; stream->line_start = 0
+    la_a0 &ebc_stream
+    ld_a0,a0,0
+    la_a1 &ebc_call_end_pos
+    ld_t0,a1,0
+    st_t0,a0,16
+    li_t1 %0 %0
+    st_t1,a0,24
+
+    # emit_token(&ebc_str_token)
+    la_a0 &ebc_str_token
+    la_br &emit_token
+    call
+
+    leave
+    ret
+
 ## --- Error paths -------------------------------------------------------------
 ## Each err_* loads a (msg, len) pair for fatal; fatal writes "m1pp: <msg>\n"
 ## to stderr and exits 1. Error labels are branched to from range/overflow
@@ -4371,6 +4508,7 @@ DEFINE EXPR_INVALID 1200000000000000
 :const_pct "%"
 :const_dlr "$"
 :const_select "%select"
+:const_str "%str"
 
 ## Operator strings for expr_op_code. Each is a raw byte literal; lengths
 ## are passed separately to tok_eq_const. "<=" must be tested before "<"
@@ -4631,6 +4769,20 @@ ZERO8
 :ebc_mark
 ZERO8
 
+## %str builtin scratch. ebc_str_orig_len / ebc_str_out_len spill the
+## argument text length and its +2 output length across append_text;
+## ebc_str_token is the synthesized TOK_STRING { kind, text_ptr, text_len }
+## handed to emit_token; ebc_str_scratch is a 256-byte assembly buffer
+## (matches paste_scratch / M0's quoted-literal cap).
+:ebc_str_orig_len
+ZERO8
+:ebc_str_out_len
+ZERO8
+:ebc_str_token
+ZERO8 ZERO8 ZERO8
+:ebc_str_scratch
+ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32
+
 ## arg_starts[16] / arg_ends[16]: 16 × 8 = 128 bytes each, i.e. 4 ZERO32.
 ## Written by parse_args; read by expand_macro_tokens and expand_builtin_call.
 :arg_starts
diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c
@@ -13,6 +13,7 @@
  *   %(expr)          evaluate an integer S-expression, emit LE 32-bit hex
  *   $(expr)          evaluate an integer S-expression, emit LE 64-bit hex
  *   %select(c,t,e)   evaluate condition S-expression; expand t if nonzero else e
+ *   %str(IDENT)      stringify a single WORD token into a "..."-quoted literal
  *
  * Expression syntax is intentionally Lisp-shaped:
  *   atoms: decimal or 0x-prefixed integer literals
@@ -1189,6 +1190,46 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok)
         return push_pool_stream_from_mark(mark);
     }
 
+    if (token_text_eq(tok, "%str")) {
+        struct Token *arg_tok;
+        struct Token *end_pos;
+        struct Token out_tok;
+        char *text_ptr;
+        int orig_len;
+        int out_len;
+
+        if (arg_count != 1) {
+            return fail("bad builtin");
+        }
+        if (arg_ends[0] - arg_starts[0] != 1) {
+            return fail("bad builtin");
+        }
+        arg_tok = arg_starts[0];
+        if (arg_tok->kind != TOK_WORD) {
+            return fail("bad builtin");
+        }
+        end_pos = call_end_pos;
+
+        orig_len = arg_tok->text.len;
+        out_len = orig_len + 2;
+        if (text_used + out_len + 1 > MAX_TEXT) {
+            return fail("text overflow");
+        }
+        text_ptr = text_buf + text_used;
+        text_buf[text_used++] = '"';
+        memcpy(text_buf + text_used, arg_tok->text.ptr, (size_t)orig_len);
+        text_used += orig_len;
+        text_buf[text_used++] = '"';
+        text_buf[text_used++] = '\0';
+
+        out_tok.kind = TOK_STRING;
+        out_tok.text.ptr = text_ptr;
+        out_tok.text.len = out_len;
+        s->pos = end_pos;
+        s->line_start = 0;
+        return emit_token(&out_tok);
+    }
+
     return fail("bad builtin");
 }
 
@@ -1252,7 +1293,8 @@ static int process_tokens(void)
              token_text_eq(tok, "@") ||
              token_text_eq(tok, "%") ||
              token_text_eq(tok, "$") ||
-             token_text_eq(tok, "%select"))) {
+             token_text_eq(tok, "%select") ||
+             token_text_eq(tok, "%str"))) {
             if (!expand_builtin_call(s, tok)) {
                 return 0;
             }
diff --git a/tests/m1pp/14-str-builtin.M1pp b/tests/m1pp/14-str-builtin.M1pp
@@ -0,0 +1,17 @@
+# Phase 14 %str stringification builtin.
+#  - %str(IDENT) wraps the identifier text in double quotes
+#  - result is a TOK_STRING, byte-identical to a hand-written literal
+
+%macro quoteit(name)
+%str(name)
+%endm
+
+%quoteit(hello)
+%quoteit(foo_bar)
+%quoteit(a)
+
+# Control: hand-written literals must match the macro-generated form.
+"hello"
+"foo_bar"
+"a"
+END
diff --git a/tests/m1pp/14-str-builtin.expected b/tests/m1pp/14-str-builtin.expected
@@ -0,0 +1,17 @@
+
+
+
+
+
+"hello"
+
+"foo_bar"
+
+"a"
+
+
+
+"hello"
+"foo_bar"
+"a"
+END
diff --git a/tests/m1pp/14-str-paste.M1pp b/tests/m1pp/14-str-paste.M1pp
@@ -0,0 +1,13 @@
+# Phase 14 paste + stringify.
+#  - `##` joins word fragments: str_##n -> str_quote (TOK_WORD).
+#  - `%str(n)` wraps the same identifier in quotes (TOK_STRING).
+#  - Complementary operators: paste builds the label, %str builds the literal.
+
+%macro defsym(n)
+:str_ ## n %str(n)
+%endm
+
+%defsym(quote)
+%defsym(if)
+%defsym(begin)
+END
diff --git a/tests/m1pp/14-str-paste.expected b/tests/m1pp/14-str-paste.expected
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+:str_quote "quote"
+
+:str_if "if"
+
+:str_begin "begin"
+
+END
diff --git a/tests/m1pp/_14-str-malformed.M1pp b/tests/m1pp/_14-str-malformed.M1pp
@@ -0,0 +1,8 @@
+# Phase 14 %str malformed input.
+#  - Underscore-prefix => skipped by test.sh.
+#  - Expected outcome: m1pp exits non-zero.
+#  - %str takes exactly one single-token WORD argument. A multi-token
+#    argument (`a b`) must be rejected; so must an already-string arg
+#    (`"already_string"`). This fixture exercises the multi-token path.
+
+%str(a b)

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs

M	m1pp/m1pp.M1	\|	154	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	m1pp/m1pp.c	\|	44	+++++++++++++++++++++++++++++++++++++++++++-
A	tests/m1pp/14-str-builtin.M1pp	\|	17	+++++++++++++++++
A	tests/m1pp/14-str-builtin.expected	\|	17	+++++++++++++++++
A	tests/m1pp/14-str-paste.M1pp	\|	13	+++++++++++++
A	tests/m1pp/14-str-paste.expected	\|	13	+++++++++++++
A	tests/m1pp/_14-str-malformed.M1pp	\|	8	++++++++