commit 5b1416eeda2c4dc29373ef07c50dd1106b4708db
parent ba016341a809dc6bc748baa48219eabb8bf8fdea
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 17:21:42 -0700
m1pp.M1: port %str stringification builtin
Diffstat:
| M | m1pp/m1pp.M1 | | | 154 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
1 file changed, 153 insertions(+), 1 deletion(-)
diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1
@@ -894,7 +894,7 @@ DEFINE EXPR_INVALID 1100000000000000
la_br &proc_check_macro
bne_a1,a2
- # try the five builtin names: ! @ % $ %select
+ # try the six builtin names: ! @ % $ %select %str
mov_a0,t0
la_a1 &const_bang
li_a2 %1 %0
@@ -930,6 +930,13 @@ DEFINE EXPR_INVALID 1100000000000000
call
la_br &proc_do_builtin
bnez_a0
+ ld_a0,sp,24
+ la_a1 &const_str
+ li_a2 %4 %0
+ la_br &tok_eq_const
+ call
+ la_br &proc_do_builtin
+ bnez_a0
la_br &proc_check_macro
b
@@ -3915,6 +3922,17 @@ DEFINE EXPR_INVALID 1100000000000000
la_br &ebc_select
bnez_a0
+ # if tok_eq_const(tok, "%str", 4) -> str path
+ la_a0 &ebc_stream
+ ld_a0,a0,0
+ ld_a0,a0,16
+ la_a1 &const_str
+ li_a2 %4 %0
+ la_br &tok_eq_const
+ call
+ la_br &ebc_str
+ bnez_a0
+
# else: fatal
la_br &err_bad_macro_header
b
@@ -4106,6 +4124,125 @@ DEFINE EXPR_INVALID 1100000000000000
leave
ret
+## %str(IDENT): stringify a single WORD argument into a TOK_STRING literal.
+## Validation: arg_count == 1, arg span length == 1 token, and that token's
+## kind is TOK_WORD. Output: a freshly-allocated text span built as
+## `"` + arg.text + `"` (len = arg.text.len + 2) and a synthesized TOK_STRING
+## pointing at it. Stream pos advances to call_end_pos; line_start = 0.
+:ebc_str
+ # require arg_count == 1
+ la_a0 &arg_count
+ ld_t0,a0,0
+ li_t1 %1 %0
+ la_br &err_bad_macro_header
+ bne_t0,t1
+
+ # snapshot arg_starts[0] / arg_ends[0]
+ la_a0 &arg_starts
+ ld_t0,a0,0
+ la_a1 &ebc_arg0_start
+ st_t0,a1,0
+ la_a0 &arg_ends
+ ld_t0,a0,0
+ la_a1 &ebc_arg0_end
+ st_t0,a1,0
+
+ # require arg0_end - arg0_start == 24 (exactly one token)
+ la_a0 &ebc_arg0_start
+ ld_t0,a0,0
+ la_a1 &ebc_arg0_end
+ ld_t1,a1,0
+ sub_t2,t1,t0
+ li_a2 %24 %0
+ la_br &err_bad_macro_header
+ bne_t2,a2
+
+ # require arg_tok->kind == TOK_WORD
+ ld_a3,t0,0
+ li_a2 TOK_WORD
+ la_br &err_bad_macro_header
+ bne_a3,a2
+
+ # orig_len = arg_tok->text.len; out_len = orig_len + 2
+ # fatal if out_len > 256 (scratch cap; text_buf cap checked by append_text)
+ ld_t1,t0,16
+ la_a0 &ebc_str_orig_len
+ st_t1,a0,0
+ addi_t2,t1,2
+ la_a0 &ebc_str_out_len
+ st_t2,a0,0
+ li_a1 %256 %0
+ la_br &err_text_overflow
+ blt_a1,t2
+
+ # scratch[0] = '"'
+ la_t2 &ebc_str_scratch
+ li_a3 %34 %0
+ sb_a3,t2,0
+
+ # copy arg_tok->text bytes into scratch[1..1+orig_len)
+ # src = arg_tok->text.ptr; i = 0
+ la_a0 &ebc_arg0_start
+ ld_a0,a0,0
+ ld_t0,a0,8
+ la_a1 &ebc_str_orig_len
+ ld_t1,a1,0
+ li_a0 %0 %0
+:ebc_str_copy_loop
+ la_br &ebc_str_copy_done
+ beq_a0,t1
+ add_a1,t0,a0
+ lb_a1,a1,0
+ addi_a2,a0,1
+ add_a2,t2,a2
+ sb_a1,a2,0
+ addi_a0,a0,1
+ la_br &ebc_str_copy_loop
+ b
+:ebc_str_copy_done
+
+ # scratch[1 + orig_len] = '"'
+ la_t2 &ebc_str_scratch
+ la_a1 &ebc_str_orig_len
+ ld_a1,a1,0
+ addi_a1,a1,1
+ add_a0,t2,a1
+ li_a3 %34 %0
+ sb_a3,a0,0
+
+ # text_ptr = append_text(&scratch, out_len)
+ la_a0 &ebc_str_scratch
+ la_a1 &ebc_str_out_len
+ ld_a1,a1,0
+ la_br &append_text
+ call
+
+ # ebc_str_token = { TOK_STRING, text_ptr, out_len }
+ la_a2 &ebc_str_token
+ li_a3 TOK_STRING
+ st_a3,a2,0
+ st_a0,a2,8
+ la_a1 &ebc_str_out_len
+ ld_a1,a1,0
+ st_a1,a2,16
+
+ # stream->pos = ebc_call_end_pos; stream->line_start = 0
+ la_a0 &ebc_stream
+ ld_a0,a0,0
+ la_a1 &ebc_call_end_pos
+ ld_t0,a1,0
+ st_t0,a0,16
+ li_t1 %0 %0
+ st_t1,a0,24
+
+ # emit_token(&ebc_str_token)
+ la_a0 &ebc_str_token
+ la_br &emit_token
+ call
+
+ leave
+ ret
+
## --- Error paths -------------------------------------------------------------
## Each err_* loads a (msg, len) pair for fatal; fatal writes "m1pp: <msg>\n"
## to stderr and exits 1. Error labels are branched to from range/overflow
@@ -4232,6 +4369,7 @@ DEFINE EXPR_INVALID 1100000000000000
:const_pct "%"
:const_dlr "$"
:const_select "%select"
+:const_str "%str"
## Operator strings for expr_op_code. Each is a raw byte literal; lengths
## are passed separately to tok_eq_const. "<=" must be tested before "<"
@@ -4491,6 +4629,20 @@ ZERO8
:ebc_mark
ZERO8
+## %str builtin scratch. ebc_str_orig_len / ebc_str_out_len spill the
+## argument text length and its +2 output length across append_text;
+## ebc_str_token is the synthesized TOK_STRING { kind, text_ptr, text_len }
+## handed to emit_token; ebc_str_scratch is a 256-byte assembly buffer
+## (matches paste_scratch / M0's quoted-literal cap).
+:ebc_str_orig_len
+ZERO8
+:ebc_str_out_len
+ZERO8
+:ebc_str_token
+ZERO8 ZERO8 ZERO8
+:ebc_str_scratch
+ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32
+
## arg_starts[16] / arg_ends[16]: 16 × 8 = 128 bytes each, i.e. 4 ZERO32.
## Written by parse_args; read by expand_macro_tokens and expand_builtin_call.
:arg_starts