commit 6def9f0029040c6df23519bf58d44b45de20a5d6
parent 5b828f7a0aa7a329a933fedb0576854a30bddd03
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 17:22:35 -0700
Merge feature: strlen expression op (§3)
Diffstat:
5 files changed, 137 insertions(+), 4 deletions(-)
diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1
@@ -105,7 +105,8 @@ DEFINE EXPR_LT 0D00000000000000
DEFINE EXPR_LE 0E00000000000000
DEFINE EXPR_GT 0F00000000000000
DEFINE EXPR_GE 1000000000000000
-DEFINE EXPR_INVALID 1100000000000000
+DEFINE EXPR_STRLEN 1100000000000000
+DEFINE EXPR_INVALID 1200000000000000
## --- Runtime shell: argv, read input, call pipeline, write output, exit ------
@@ -2589,9 +2590,9 @@ DEFINE EXPR_INVALID 1100000000000000
:pit_done
ret
-## expr_op_code(a0=tok) -> a0 = EXPR_ADD..EXPR_GE, or EXPR_INVALID.
+## expr_op_code(a0=tok) -> a0 = EXPR_ADD..EXPR_STRLEN, or EXPR_INVALID.
## Accepts operator tokens: + - * / % << >> & | ^ ~ = !=
-## < <= > >=. Non-WORD tok or unknown operator -> EXPR_INVALID.
+## < <= > >= strlen. Non-WORD tok or unknown operator -> EXPR_INVALID.
##
## tok_eq_const is a leaf but clobbers a0..a3,t0..t2; spill tok to eoc_tok
## once, reload before each compare. Needs an enter_0 frame because it
@@ -2776,6 +2777,16 @@ DEFINE EXPR_INVALID 1100000000000000
la_br &eoc_gt
bnez_a0
+ # "strlen" -> EXPR_STRLEN
+ la_a0 &eoc_tok
+ ld_a0,a0,0
+ la_a1 &op_strlen
+ li_a2 %6 %0
+ la_br &tok_eq_const
+ call
+ la_br &eoc_strlen
+ bnez_a0
+
:eoc_invalid
li_a0 EXPR_INVALID
leave
@@ -2848,6 +2859,10 @@ DEFINE EXPR_INVALID 1100000000000000
li_a0 EXPR_GE
leave
ret
+:eoc_strlen
+ li_a0 EXPR_STRLEN
+ leave
+ ret
## apply_expr_op(a0=op_code, a1=args_ptr, a2=argc) -> a0 = i64 result
## Reduce args[0..argc) per op:
@@ -3639,6 +3654,11 @@ DEFINE EXPR_INVALID 1100000000000000
li_t0 EXPR_INVALID
la_br &err_bad_macro_header
beq_a0,t0
+ # if (op == EXPR_STRLEN) handle inline — strlen's argument is a
+ # TOK_STRING atom, not a recursive expression. Yield text.len - 2.
+ li_t0 EXPR_STRLEN
+ la_br &eer_strlen
+ beq_a0,t0
# frame stack overflow check: if (expr_frame_top >= 16) fatal
# (the global expr_frames[] array has 16 slots, shared across recursive
# eval_expr_range calls)
@@ -3707,6 +3727,71 @@ DEFINE EXPR_INVALID 1100000000000000
la_br &eer_loop
b
+:eer_strlen
+ # (strlen "literal") — degenerate unary op whose argument is a
+ # TOK_STRING atom, not a recursive expression.
+ # pos++ past the "strlen" operator word.
+ ld_t0,sp,16
+ addi_t0,t0,24
+ st_t0,sp,16
+ # skip_expr_newlines(pos, end)
+ ld_a0,sp,16
+ ld_a1,sp,24
+ la_br &skip_expr_newlines
+ call
+ st_a0,sp,16
+ # if (pos >= end) fatal
+ ld_t0,sp,16
+ ld_t1,sp,24
+ la_br &err_bad_macro_header
+ beq_t0,t1
+ # if (pos->kind != TOK_STRING) fatal
+ ld_t2,t0,0
+ li_a3 TOK_STRING
+ la_br &err_bad_macro_header
+ bne_t2,a3
+ # if (pos->text.len < 2) fatal
+ ld_a1,t0,16
+ li_a2 %2 %0
+ la_br &err_bad_macro_header
+ blt_a1,a2
+ # if (pos->text.ptr[0] != '"') fatal — rejects single-quoted '..' hex
+ ld_a2,t0,8
+ lb_a3,a2,0
+ li_a0 %34 %0
+ la_br &err_bad_macro_header
+ bne_a3,a0
+ # value = pos->text.len - 2
+ addi_a1,a1,neg2
+ st_a1,sp,32
+ # pos++
+ addi_t0,t0,24
+ st_t0,sp,16
+ # skip_expr_newlines(pos, end)
+ ld_a0,sp,16
+ ld_a1,sp,24
+ la_br &skip_expr_newlines
+ call
+ st_a0,sp,16
+ # if (pos >= end) fatal
+ ld_t0,sp,16
+ ld_t1,sp,24
+ la_br &err_bad_macro_header
+ beq_t0,t1
+ # if (pos->kind != TOK_RPAREN) fatal
+ ld_t2,t0,0
+ li_a3 TOK_RPAREN
+ la_br &err_bad_macro_header
+ bne_t2,a3
+ # pos++
+ addi_t0,t0,24
+ st_t0,sp,16
+ # have_value = 1
+ li_t0 %1 %0
+ st_t0,sp,48
+ la_br &eer_loop
+ b
+
:eer_loop_done
# frame_top must equal entry_frame_top
la_a0 &expr_frame_top
@@ -4307,6 +4392,7 @@ DEFINE EXPR_INVALID 1100000000000000
:op_le "<="
:op_gt ">"
:op_ge ">="
+:op_strlen "strlen"
## Nibble-to-hex lookup table for emit_hex_value.
:hex_chars "0123456789ABCDEF"
diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c
@@ -102,6 +102,7 @@ enum ExprOp {
EXPR_LE,
EXPR_GT,
EXPR_GE,
+ EXPR_STRLEN,
EXPR_INVALID
};
@@ -808,6 +809,9 @@ static enum ExprOp expr_op_code(const struct Token *tok)
if (token_text_eq(tok, ">=")) {
return EXPR_GE;
}
+ if (token_text_eq(tok, "strlen")) {
+ return EXPR_STRLEN;
+ }
return EXPR_INVALID;
}
@@ -936,6 +940,7 @@ static int apply_expr_op(enum ExprOp op, const long long *args, int argc, long l
}
*out = (args[0] >= args[1]);
return 1;
+ case EXPR_STRLEN:
case EXPR_INVALID:
break;
}
@@ -1027,13 +1032,34 @@ static int eval_expr_range(struct TokenSpan span, long long *out)
if (op == EXPR_INVALID) {
return fail("bad expression");
}
+ pos++;
+ if (op == EXPR_STRLEN) {
+ /* strlen is degenerate: argument is a TOK_STRING atom,
+ * not a recursive expression. Handle inline and yield
+ * the string's raw byte count (span.len - 2). */
+ skip_expr_newlines(&pos, span.end);
+ if (pos >= span.end || pos->kind != TOK_STRING) {
+ return fail("bad expression");
+ }
+ if (pos->text.len < 2 || pos->text.ptr[0] != '"') {
+ return fail("bad expression");
+ }
+ value = (long long)(pos->text.len - 2);
+ pos++;
+ skip_expr_newlines(&pos, span.end);
+ if (pos >= span.end || pos->kind != TOK_RPAREN) {
+ return fail("bad expression");
+ }
+ pos++;
+ have_value = 1;
+ continue;
+ }
if (frame_top >= MAX_EXPR_FRAMES) {
return fail("expression overflow");
}
frames[frame_top].op = op;
frames[frame_top].argc = 0;
frame_top++;
- pos++;
continue;
}
diff --git a/tests/m1pp/04-expr-ops.M1pp b/tests/m1pp/04-expr-ops.M1pp
@@ -33,4 +33,11 @@ $((>= 5 6))
# nested expressions
$((+ (* 2 3) (- 7 4) (/ 12 3)))
+
+# strlen: raw byte count between the quotes (matches what M1's "..." emits
+# before appending NUL). Composes with arithmetic like any other op.
+%((strlen "hello"))
+%((+ (strlen "hello") 1))
+!((strlen "x"))
+%((strlen ""))
END
diff --git a/tests/m1pp/04-expr-ops.expected b/tests/m1pp/04-expr-ops.expected
@@ -33,4 +33,11 @@
'0D00000000000000'
+
+
+
+'05000000'
+'06000000'
+'01'
+'00000000'
END
diff --git a/tests/m1pp/_04-strlen-badarg.M1pp b/tests/m1pp/_04-strlen-badarg.M1pp
@@ -0,0 +1,7 @@
+# Malformed: strlen requires a double-quoted TOK_STRING argument.
+# Single-quoted '...' hex literals are meaningless for strlen and must
+# be rejected. Expected behavior: non-zero exit from the expander.
+# (Underscore-prefixed filename so test.sh skips this fixture.)
+
+%((strlen 'deadbeef'))
+END