boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit 6def9f0029040c6df23519bf58d44b45de20a5d6
parent 5b828f7a0aa7a329a933fedb0576854a30bddd03
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 17:22:35 -0700

Merge feature: strlen expression op (§3)

Diffstat:
Mm1pp/m1pp.M1 | 92++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mm1pp/m1pp.c | 28+++++++++++++++++++++++++++-
Mtests/m1pp/04-expr-ops.M1pp | 7+++++++
Mtests/m1pp/04-expr-ops.expected | 7+++++++
Atests/m1pp/_04-strlen-badarg.M1pp | 7+++++++
5 files changed, 137 insertions(+), 4 deletions(-)

diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1 @@ -105,7 +105,8 @@ DEFINE EXPR_LT 0D00000000000000 DEFINE EXPR_LE 0E00000000000000 DEFINE EXPR_GT 0F00000000000000 DEFINE EXPR_GE 1000000000000000 -DEFINE EXPR_INVALID 1100000000000000 +DEFINE EXPR_STRLEN 1100000000000000 +DEFINE EXPR_INVALID 1200000000000000 ## --- Runtime shell: argv, read input, call pipeline, write output, exit ------ @@ -2589,9 +2590,9 @@ DEFINE EXPR_INVALID 1100000000000000 :pit_done ret -## expr_op_code(a0=tok) -> a0 = EXPR_ADD..EXPR_GE, or EXPR_INVALID. +## expr_op_code(a0=tok) -> a0 = EXPR_ADD..EXPR_STRLEN, or EXPR_INVALID. ## Accepts operator tokens: + - * / % << >> & | ^ ~ = != -## < <= > >=. Non-WORD tok or unknown operator -> EXPR_INVALID. +## < <= > >= strlen. Non-WORD tok or unknown operator -> EXPR_INVALID. ## ## tok_eq_const is a leaf but clobbers a0..a3,t0..t2; spill tok to eoc_tok ## once, reload before each compare. Needs an enter_0 frame because it @@ -2776,6 +2777,16 @@ DEFINE EXPR_INVALID 1100000000000000 la_br &eoc_gt bnez_a0 + # "strlen" -> EXPR_STRLEN + la_a0 &eoc_tok + ld_a0,a0,0 + la_a1 &op_strlen + li_a2 %6 %0 + la_br &tok_eq_const + call + la_br &eoc_strlen + bnez_a0 + :eoc_invalid li_a0 EXPR_INVALID leave @@ -2848,6 +2859,10 @@ DEFINE EXPR_INVALID 1100000000000000 li_a0 EXPR_GE leave ret +:eoc_strlen + li_a0 EXPR_STRLEN + leave + ret ## apply_expr_op(a0=op_code, a1=args_ptr, a2=argc) -> a0 = i64 result ## Reduce args[0..argc) per op: @@ -3639,6 +3654,11 @@ DEFINE EXPR_INVALID 1100000000000000 li_t0 EXPR_INVALID la_br &err_bad_macro_header beq_a0,t0 + # if (op == EXPR_STRLEN) handle inline — strlen's argument is a + # TOK_STRING atom, not a recursive expression. Yield text.len - 2. + li_t0 EXPR_STRLEN + la_br &eer_strlen + beq_a0,t0 # frame stack overflow check: if (expr_frame_top >= 16) fatal # (the global expr_frames[] array has 16 slots, shared across recursive # eval_expr_range calls) @@ -3707,6 +3727,71 @@ DEFINE EXPR_INVALID 1100000000000000 la_br &eer_loop b +:eer_strlen + # (strlen "literal") — degenerate unary op whose argument is a + # TOK_STRING atom, not a recursive expression. + # pos++ past the "strlen" operator word. + ld_t0,sp,16 + addi_t0,t0,24 + st_t0,sp,16 + # skip_expr_newlines(pos, end) + ld_a0,sp,16 + ld_a1,sp,24 + la_br &skip_expr_newlines + call + st_a0,sp,16 + # if (pos >= end) fatal + ld_t0,sp,16 + ld_t1,sp,24 + la_br &err_bad_macro_header + beq_t0,t1 + # if (pos->kind != TOK_STRING) fatal + ld_t2,t0,0 + li_a3 TOK_STRING + la_br &err_bad_macro_header + bne_t2,a3 + # if (pos->text.len < 2) fatal + ld_a1,t0,16 + li_a2 %2 %0 + la_br &err_bad_macro_header + blt_a1,a2 + # if (pos->text.ptr[0] != '"') fatal — rejects single-quoted '..' hex + ld_a2,t0,8 + lb_a3,a2,0 + li_a0 %34 %0 + la_br &err_bad_macro_header + bne_a3,a0 + # value = pos->text.len - 2 + addi_a1,a1,neg2 + st_a1,sp,32 + # pos++ + addi_t0,t0,24 + st_t0,sp,16 + # skip_expr_newlines(pos, end) + ld_a0,sp,16 + ld_a1,sp,24 + la_br &skip_expr_newlines + call + st_a0,sp,16 + # if (pos >= end) fatal + ld_t0,sp,16 + ld_t1,sp,24 + la_br &err_bad_macro_header + beq_t0,t1 + # if (pos->kind != TOK_RPAREN) fatal + ld_t2,t0,0 + li_a3 TOK_RPAREN + la_br &err_bad_macro_header + bne_t2,a3 + # pos++ + addi_t0,t0,24 + st_t0,sp,16 + # have_value = 1 + li_t0 %1 %0 + st_t0,sp,48 + la_br &eer_loop + b + :eer_loop_done # frame_top must equal entry_frame_top la_a0 &expr_frame_top @@ -4307,6 +4392,7 @@ DEFINE EXPR_INVALID 1100000000000000 :op_le "<=" :op_gt ">" :op_ge ">=" +:op_strlen "strlen" ## Nibble-to-hex lookup table for emit_hex_value. :hex_chars "0123456789ABCDEF" diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c @@ -102,6 +102,7 @@ enum ExprOp { EXPR_LE, EXPR_GT, EXPR_GE, + EXPR_STRLEN, EXPR_INVALID }; @@ -808,6 +809,9 @@ static enum ExprOp expr_op_code(const struct Token *tok) if (token_text_eq(tok, ">=")) { return EXPR_GE; } + if (token_text_eq(tok, "strlen")) { + return EXPR_STRLEN; + } return EXPR_INVALID; } @@ -936,6 +940,7 @@ static int apply_expr_op(enum ExprOp op, const long long *args, int argc, long l } *out = (args[0] >= args[1]); return 1; + case EXPR_STRLEN: case EXPR_INVALID: break; } @@ -1027,13 +1032,34 @@ static int eval_expr_range(struct TokenSpan span, long long *out) if (op == EXPR_INVALID) { return fail("bad expression"); } + pos++; + if (op == EXPR_STRLEN) { + /* strlen is degenerate: argument is a TOK_STRING atom, + * not a recursive expression. Handle inline and yield + * the string's raw byte count (span.len - 2). */ + skip_expr_newlines(&pos, span.end); + if (pos >= span.end || pos->kind != TOK_STRING) { + return fail("bad expression"); + } + if (pos->text.len < 2 || pos->text.ptr[0] != '"') { + return fail("bad expression"); + } + value = (long long)(pos->text.len - 2); + pos++; + skip_expr_newlines(&pos, span.end); + if (pos >= span.end || pos->kind != TOK_RPAREN) { + return fail("bad expression"); + } + pos++; + have_value = 1; + continue; + } if (frame_top >= MAX_EXPR_FRAMES) { return fail("expression overflow"); } frames[frame_top].op = op; frames[frame_top].argc = 0; frame_top++; - pos++; continue; } diff --git a/tests/m1pp/04-expr-ops.M1pp b/tests/m1pp/04-expr-ops.M1pp @@ -33,4 +33,11 @@ $((>= 5 6)) # nested expressions $((+ (* 2 3) (- 7 4) (/ 12 3))) + +# strlen: raw byte count between the quotes (matches what M1's "..." emits +# before appending NUL). Composes with arithmetic like any other op. +%((strlen "hello")) +%((+ (strlen "hello") 1)) +!((strlen "x")) +%((strlen "")) END diff --git a/tests/m1pp/04-expr-ops.expected b/tests/m1pp/04-expr-ops.expected @@ -33,4 +33,11 @@ '0D00000000000000' + + + +'05000000' +'06000000' +'01' +'00000000' END diff --git a/tests/m1pp/_04-strlen-badarg.M1pp b/tests/m1pp/_04-strlen-badarg.M1pp @@ -0,0 +1,7 @@ +# Malformed: strlen requires a double-quoted TOK_STRING argument. +# Single-quoted '...' hex literals are meaningless for strlen and must +# be rejected. Expected behavior: non-zero exit from the expander. +# (Underscore-prefixed filename so test.sh skips this fixture.) + +%((strlen 'deadbeef')) +END