boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 1c28efab79a61a5ce7f7191f55dd4fca7357a597
parent 65fc09c8eabc994c0c5b77384b896cdbb9d5aaf2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 16:07:09 -0700

M1pp: %frame %local support

Diffstat:
MM1pp/M1pp.P1 | 601++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
MM1pp/M1pp.c | 175++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
MP1/P1-aarch64.M1 | 16++++++++++++++++
MP1/P1-amd64.M1 | 16++++++++++++++++
MP1/P1-riscv64.M1 | 16++++++++++++++++
Mdocs/M1PP.md | 115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mtests/M1pp/15-struct.M1pp | 12++++++------
Atests/M1pp/25-frame-locals.M1pp | 27+++++++++++++++++++++++++++
Atests/M1pp/25-frame-locals.expected | 22++++++++++++++++++++++
Atests/M1pp/26-fn2-pattern.M1pp | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/M1pp/26-fn2-pattern.expected | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
11 files changed, 1102 insertions(+), 34 deletions(-)

diff --git a/M1pp/M1pp.P1 b/M1pp/M1pp.P1 @@ -141,6 +141,11 @@ DEFINE OFF_macro_body_tokens 80463f0000000000 DEFINE OFF_streams 80465f0000000000 DEFINE OFF_expand_pool 00495f0000000000 DEFINE OFF_expr_frames 00497f0000000000 +## local_lookup_scratch — 256-byte working buffer used by +## expand_local_into_pool to assemble "<frame>_FRAME.<field>" before +## the macro-table linear search. Placed past expr_frames (BSS end) so +## adding it does not shift any existing OFF_*. +DEFINE OFF_local_lookup_scratch 00527f0000000000 ## --- Runtime shell: argv, read input, call pipeline, write output, exit ------ @@ -1288,7 +1293,7 @@ DEFINE OFF_expr_frames 00497f0000000000 li_a2 %9 %0 la_br &tok_eq_const call - la_br &proc_check_newline + la_br &proc_check_frame beqz_a0 # %endscope matched: shim into pop_scope(stream_end). @@ -1312,6 +1317,70 @@ DEFINE OFF_expr_frames 00497f0000000000 la_br &proc_loop b +## ---- tok eq "%frame" ---- +:proc_check_frame + ld_t0,sp,8 + mov_a0,t0 + la_a1 &const_frame + li_a2 %6 %0 + la_br &tok_eq_const + call + la_br &proc_check_endframe + beqz_a0 + + # %frame matched: shim into push_frame(stream_end). + ld_a0,sp,0 + ld_t0,sp,8 + la_a1 &proc_pos + st_t0,a1,0 + ld_a2,a0,24 + la_a1 &proc_line_start + st_a2,a1,0 + ld_a0,sp,0 + ld_a0,a0,8 + la_br &push_frame + call + ld_a0,sp,0 + la_a1 &proc_pos + ld_t0,a1,0 + st_t0,a0,16 + li_t1 %1 %0 + st_t1,a0,24 + la_br &proc_loop + b + +## ---- tok eq "%endframe" ---- +:proc_check_endframe + ld_t0,sp,8 + mov_a0,t0 + la_a1 &const_endframe + li_a2 %9 %0 + la_br &tok_eq_const + call + la_br &proc_check_newline + beqz_a0 + + # %endframe matched: shim into pop_frame(stream_end). + ld_a0,sp,0 + ld_t0,sp,8 + la_a1 &proc_pos + st_t0,a1,0 + ld_a2,a0,24 + la_a1 &proc_line_start + st_a2,a1,0 + ld_a0,sp,0 + ld_a0,a0,8 + la_br &pop_frame + call + ld_a0,sp,0 + la_a1 &proc_pos + ld_t0,a1,0 + st_t0,a0,16 + li_t1 %1 %0 + st_t1,a0,24 + la_br &proc_loop + b + :proc_check_newline # reload s, tok ld_a0,sp,0 @@ -1400,6 +1469,13 @@ DEFINE OFF_expr_frames 00497f0000000000 call la_br &proc_do_builtin bnez_a0 + ld_a0,sp,8 + la_a1 &const_local + li_a2 %6 %0 + la_br &tok_eq_const + call + la_br &proc_do_builtin + bnez_a0 la_br &proc_check_macro b @@ -1484,6 +1560,11 @@ DEFINE OFF_expr_frames 00497f0000000000 ld_t0,a0,0 la_br &err_scope_not_closed bnez_t0 + # Every %frame must be matched by an %endframe before EOF. + la_a0 &frame_active + ld_t0,a0,0 + la_br &err_frame_not_closed + bnez_t0 eret ## --- %scope / %endscope handlers -------------------------------------------- @@ -1591,6 +1672,106 @@ DEFINE OFF_expr_frames 00497f0000000000 :pop_done eret +## --- %frame / %endframe handlers -------------------------------------------- +## Single-slot frame state, separate from the %scope stack. push_frame(a0= +## stream_end) parses `%frame NAME`, stashes name's TextSpan in +## current_frame_ptr/_len, and sets frame_active = 1. pop_frame(a0= +## stream_end) clears frame_active. Frames do not nest — a second push +## without an intervening pop is fatal. + +:push_frame + enter_0 + + # proc_pos += 32 (skip past the `%frame` token). + la_t0 &proc_pos + ld_t1,t0,0 + addi_t1,t1,32 + st_t1,t0,0 + + # Skip newlines between `%frame` and NAME. + la_a1 &pf_stream_end + st_a0,a1,0 + la_br &proc_skip_newlines + call + la_a1 &pf_stream_end + ld_a0,a1,0 + la_t0 &proc_pos + ld_t1,t0,0 + + # Require a WORD name token within the stream. + la_br &err_bad_frame_header + beq_t1,a0 + ld_t2,t1,0 + la_br &err_bad_frame_header + bnez_t2 + + # !frame_active (cannot nest) + la_a1 &frame_active + ld_a2,a1,0 + la_br &err_frame_already_active + bnez_a2 + + # current_frame_ptr = name.text_ptr; current_frame_len = name.text_len + la_a3 &current_frame_ptr + ld_t2,t1,8 + st_t2,a3,0 + la_a3 &current_frame_len + ld_t2,t1,16 + st_t2,a3,0 + + # frame_active = 1 + li_a2 %1 %0 + st_a2,a1,0 + + # proc_pos += 32 (past the name). + la_t0 &proc_pos + ld_t1,t0,0 + addi_t1,t1,32 + st_t1,t0,0 + + # Newlines between `%frame NAME` and the body content are insignificant. + la_br &proc_skip_newlines + call + eret + +## pop_frame(a0 = stream_end): consume `%endframe` followed by a strict +## TOK_NEWLINE. Fatal if no frame is active. +:pop_frame + enter_0 + + # frame_active? + la_a1 &frame_active + ld_a2,a1,0 + la_br &err_frame_underflow + beqz_a2 + li_a2 %0 %0 + st_a2,a1,0 + + # proc_pos += 32 (past the `%endframe` token). + la_t0 &proc_pos + ld_t1,t0,0 + addi_t1,t1,32 + st_t1,t0,0 + + # Strict: the token immediately after `%endframe` must be TOK_NEWLINE. + la_br &err_bad_frame_header + beq_t1,a0 + ld_t2,t1,0 + li_t0 TOK_NEWLINE + la_br &err_bad_frame_header + bne_t2,t0 + # Consume the trailing newline only when %endframe sat at line-start; + # mid-line %endframe leaves the newline so it can be emitted. + la_t0 &proc_line_start + ld_a1,t0,0 + la_br &pop_frame_done + beqz_a1 + addi_t1,t1,32 + la_t0 &proc_pos + st_t1,t0,0 +:pop_frame_done + eret + ## --- %macro storage: parse header + body into macros[] / macro_body_tokens -- ## Called at proc_pos == line-start `%macro`. Leaves proc_pos past the %endm ## line with proc_line_start = 1. Uses BSS scratch (def_m_ptr, def_param_ptr, @@ -4815,7 +4996,88 @@ DEFINE OFF_expr_frames 00497f0000000000 st_a0,sp,0 st_a1,sp,8 + # ---- tok eq "%local" with tight ( -> expand and recurse over body ---- + # %local is a built-in (not a macro) but expands to an integer-yielding + # token sequence, so eval_expr_atom must handle it before the find_macro + # path. Validation (tight LPAREN, arg shape, frame_active, name lookup) + # is centralized in expand_local_into_pool. + ld_a0,sp,0 + la_a1 &const_local + li_a2 %6 %0 + la_br &tok_eq_const + call + la_br &eea_skip_local + beqz_a0 + + # Confirm tight LPAREN follows; otherwise treat %local as opaque text and + # let the integer-atom path fail with a useful error. + ld_t0,sp,0 + addi_t0,t0,32 + ld_t1,sp,8 + la_br &eea_skip_local + blt_t1,t0 + la_br &eea_skip_local + beq_t0,t1 + ld_a3,t0,0 + li_a2 TOK_LPAREN + la_br &eea_skip_local + bne_a3,a2 + ld_a3,t0,24 + la_br &eea_skip_local + beqz_a3 + + # Dispatch to the local-expansion path. + ld_a0,sp,0 + ld_a1,sp,8 + la_br &expand_local_into_pool + call + + # Snapshot elp_mark / elp_after before recursing. + la_a0 &elp_after + ld_t0,a0,0 + st_t0,sp,24 + la_a0 &elp_mark + ld_t0,a0,0 + st_t0,sp,32 + + # If pool was not extended (pool_used == mark) -> bad expression. + la_a0 &pool_used + ld_t0,a0,0 + ld_t1,sp,32 + la_br &err_bad_macro_header + beq_t0,t1 + + # eval_expr_range(expand_pool + mark, expand_pool + pool_used) + la_a0 &expand_pool_ptr + ld_a0,a0,0 + ld_t1,sp,32 + add_a0,a0,t1 + la_a1 &expand_pool_ptr + ld_a1,a1,0 + la_a2 &pool_used + ld_a2,a2,0 + add_a1,a1,a2 + la_br &eval_expr_range + call + + la_a1 &eval_value + st_a0,a1,0 + + # restore pool_used = mark + la_a0 &pool_used + ld_t0,sp,32 + st_t0,a0,0 + + # eval_after_pos = saved elp_after + la_a0 &eval_after_pos + ld_t0,sp,24 + st_t0,a0,0 + + eret + +:eea_skip_local # macro_ptr = find_macro(tok) + ld_a0,sp,0 la_br &find_macro call st_a0,sp,16 @@ -5380,6 +5642,230 @@ DEFINE OFF_expr_frames 00497f0000000000 ## The unchosen branch is NOT evaluated, validated, or expanded. ## ## Any other text under a builtin slot -> fatal "bad builtin". +## expand_local_into_pool(a0=call_tok, a1=limit) -> writes elp_after, elp_mark +## Resolve %local(NAME) against the current frame: assemble the lookup key +## "<frame>_FRAME.<NAME>" in local_lookup_scratch, linear-search macros[] +## for that name, and copy the matching body into the pool. Errors: +## - call_tok+1 missing / not tight LPAREN: bad_macro_header +## - parse_args fails: propagated +## - arg_count != 1, arg span != 1 token, arg kind != WORD: bad_macro_header +## - frame not active: local_outside_frame +## - assembled name >= 256 bytes: local_name_too_long +## - no matching macro: unknown_local +## +## On success, elp_mark = pool_used at entry, elp_after = call_end_pos +## (Token* one past the call's `)`). Both expand_builtin_call's %local +## branch and eval_expr_atom's %local branch consume those. +:expand_local_into_pool + enter_0 + + # --- Validate (call_tok+1) is a tight LPAREN within the stream. --- + addi_t0,a0,32 # lparen = call_tok + 32 + la_br &err_bad_macro_header + blt_a1,t0 + la_br &err_bad_macro_header + beq_t0,a1 + ld_a2,t0,0 + li_a3 TOK_LPAREN + la_br &err_bad_macro_header + bne_a2,a3 + ld_a2,t0,24 + la_br &err_bad_macro_header + beqz_a2 + + # --- parse_args(lparen, limit) --- + mov_a0,t0 + la_br &parse_args + call + + # --- Validate arg shape: arg_count == 1, single 32-byte token, WORD kind. --- + la_a0 &arg_count + ld_t0,a0,0 + li_t1 %1 %0 + la_br &err_bad_macro_header + bne_t0,t1 + + la_a0 &arg_starts_ptr + ld_a0,a0,0 + ld_t0,a0,0 # arg_tok = arg_starts[0] + la_a1 &arg_ends_ptr + ld_a1,a1,0 + ld_t1,a1,0 # arg_end = arg_ends[0] + sub_t2,t1,t0 + li_a2 %32 %0 + la_br &err_bad_macro_header + bne_t2,a2 + + ld_a3,t0,0 # arg_tok->kind + li_a2 TOK_WORD + la_br &err_bad_macro_header + bne_a3,a2 + + # Stash arg.text.ptr / arg.text.len for the byte-copy loop below. + ld_a0,t0,8 + la_a1 &elp_arg_ptr + st_a0,a1,0 + ld_a0,t0,16 + la_a1 &elp_arg_len + st_a0,a1,0 + + # --- frame_active? --- + la_a1 &frame_active + ld_a2,a1,0 + la_br &err_local_outside_frame + beqz_a2 + + # --- name_len = current_frame_len + 7 + arg_len; must be < 256. --- + la_a1 &current_frame_len + ld_a2,a1,0 + la_a1 &elp_arg_len + ld_a3,a1,0 + add_t0,a2,a3 + addi_t0,t0,7 + la_a1 &elp_name_len + st_t0,a1,0 + li_t1 %256 %0 + la_br &err_local_name_too_long + blt_t1,t0 + la_br &err_local_name_too_long + beq_t0,t1 + + # --- Build lookup name in local_lookup_scratch. --- + # First: copy current_frame_ptr[0..frame_len] -> scratch[0..] + la_a0 &current_frame_ptr + ld_t0,a0,0 # frame_ptr + la_a0 &current_frame_len + ld_t1,a0,0 # frame_len + la_t2 &local_lookup_scratch_ptr + ld_t2,t2,0 # scratch_base + li_a3 %0 %0 +:elp_copy_frame + la_br &elp_copy_frame_done + beq_a3,t1 + add_a0,t0,a3 + lb_a0,a0,0 + add_a1,t2,a3 + sb_a0,a1,0 + addi_a3,a3,1 + la_br &elp_copy_frame + b +:elp_copy_frame_done + + # Advance scratch cursor to scratch + frame_len for the suffix copy. + add_t2,t2,t1 + + # Copy const_frame_suffix (7 bytes "_FRAME.") -> scratch[frame_len..] + la_a0 &const_frame_suffix + li_t1 %7 %0 + li_a3 %0 %0 +:elp_copy_suffix + la_br &elp_copy_suffix_done + beq_a3,t1 + add_a1,a0,a3 + lb_a1,a1,0 + add_t0,t2,a3 + sb_a1,t0,0 + addi_a3,a3,1 + la_br &elp_copy_suffix + b +:elp_copy_suffix_done + + # Advance past suffix (7 bytes). + addi_t2,t2,7 + + # Copy arg bytes -> scratch[frame_len + 7 ..] + la_a0 &elp_arg_ptr + ld_t0,a0,0 + la_a0 &elp_arg_len + ld_t1,a0,0 + li_a3 %0 %0 +:elp_copy_arg + la_br &elp_copy_arg_done + beq_a3,t1 + add_a0,t0,a3 + lb_a0,a0,0 + add_a1,t2,a3 + sb_a0,a1,0 + addi_a3,a3,1 + la_br &elp_copy_arg + b +:elp_copy_arg_done + + # --- Linear search macros[] for an exact name match. --- + # m (a3) walks from macros_ptr to macros_end (each MACRO_RECORD_SIZE). + # Match criterion: m->name.len == name_len AND first name_len bytes of + # m->name.ptr equal local_lookup_scratch. Modeled on find_macro: keep + # m in a3, reload macros_end into t0 after each iteration, and use + # a0/a1/a2/t1/t2 as scratch within the inner byte-compare. + la_a3 &macros_ptr + ld_a3,a3,0 + la_t0 &macros_end + ld_t0,t0,0 +:elp_search_loop + la_br &elp_unknown + beq_a3,t0 + + # m->name.len == name_len? + ld_t1,a3,8 + la_a0 &elp_name_len + ld_a2,a0,0 + la_br &elp_search_next + bne_t1,a2 + + # byte-compare m->name.ptr vs scratch for name_len bytes. + ld_t1,a3,0 # name_ptr + la_a0 &local_lookup_scratch_ptr + ld_a1,a0,0 # lookup_ptr + li_t2 %0 %0 +:elp_search_cmp + la_br &elp_search_match + beq_t2,a2 + add_a0,t1,t2 + lb_a0,a0,0 + add_t0,a1,t2 + lb_t0,t0,0 + la_br &elp_search_next + bne_a0,t0 + addi_t2,t2,1 + la_br &elp_search_cmp + b + +:elp_search_next + li_t1 M1PP_MACRO_RECORD_SIZE + add_a3,a3,t1 + la_t0 &macros_end + ld_t0,t0,0 + la_br &elp_search_loop + b + +:elp_unknown + la_br &err_unknown_local + b + +:elp_search_match + # a3 = matched macro pointer. mark = pool_used; copy body span. + la_a0 &pool_used + ld_t0,a0,0 + la_a1 &elp_mark + st_t0,a1,0 + + li_t0 M1PP_MACRO_BODY_START_OFF + add_t0,a3,t0 + ld_a0,t0,0 # body_start + li_t1 M1PP_MACRO_BODY_END_OFF + add_t1,a3,t1 + ld_a1,t1,0 # body_end + la_br &copy_span_to_pool + call + + # elp_after = call_end_pos + la_a0 &call_end_pos + ld_t0,a0,0 + la_a1 &elp_after + st_t0,a1,0 + + eret + :expand_builtin_call enter_0 @@ -5487,6 +5973,17 @@ DEFINE OFF_expr_frames 00497f0000000000 la_br &ebc_str bnez_a0 + # if tok_eq_const(tok, "%local", 6) -> local path + la_a0 &ebc_stream + ld_a0,a0,0 + ld_a0,a0,16 + la_a1 &const_local + li_a2 %6 %0 + la_br &tok_eq_const + call + la_br &ebc_local + bnez_a0 + # else: fatal la_br &err_bad_macro_header b @@ -5809,6 +6306,40 @@ DEFINE OFF_expr_frames 00497f0000000000 eret +## %local(NAME): emit-time variant. expand_builtin_call has already +## parse_args'd the call (so arg_starts/arg_ends/arg_count/call_end_pos +## are set), but expand_local_into_pool re-parses internally so it can +## also be invoked from eval_expr_atom where parse_args wasn't called. +## After the helper returns, advance the stream past the call and push +## the body slice as a fresh stream for rescan. +:ebc_local + # call_tok = stream->pos; limit = stream->end + la_a0 &ebc_stream + ld_a0,a0,0 + ld_t0,a0,16 # call_tok + ld_t1,a0,8 # limit + mov_a0,t0 + mov_a1,t1 + la_br &expand_local_into_pool + call + + # stream->pos = elp_after; stream->line_start = 0 + la_a0 &ebc_stream + ld_a0,a0,0 + la_a1 &elp_after + ld_t0,a1,0 + st_t0,a0,16 + li_t1 %0 %0 + st_t1,a0,24 + + # push_pool_stream_from_mark(elp_mark) + la_a0 &elp_mark + ld_a0,a0,0 + la_br &push_pool_stream_from_mark + call + + eret + ## --- Error paths ------------------------------------------------------------- ## Each err_* loads a (msg, len) pair for fatal; fatal writes "m1pp: <msg>\n" ## to stderr and exits 1. Error labels are branched to from range/overflow @@ -5902,6 +6433,34 @@ DEFINE OFF_expr_frames 00497f0000000000 la_a0 &msg_bad_scope_label la_br &fatal b +:err_bad_frame_header + la_a0 &msg_bad_frame_header + la_br &fatal + b +:err_frame_already_active + la_a0 &msg_frame_already_active + la_br &fatal + b +:err_frame_underflow + la_a0 &msg_frame_underflow + la_br &fatal + b +:err_frame_not_closed + la_a0 &msg_frame_not_closed + la_br &fatal + b +:err_local_outside_frame + la_a0 &msg_local_outside_frame + la_br &fatal + b +:err_unknown_local + la_a0 &msg_unknown_local + la_br &fatal + b +:err_local_name_too_long + la_a0 &msg_local_name_too_long + la_br &fatal + b ## fatal(a0=msg_ptr): writes "m1pp: <msg>\n" to stderr and exits 1. ## Length is computed inline via a strlen loop (messages are NUL-terminated). @@ -5973,6 +6532,11 @@ DEFINE OFF_expr_frames 00497f0000000000 :const_count "COUNT" :const_scope "%scope" :const_endscope "%endscope" +:const_frame "%frame" +:const_endframe "%endframe" +:const_local "%local" +## Suffix appended to the frame name when looking up <frame>_FRAME.<field>. +:const_frame_suffix "_FRAME." ## Operator strings for expr_op_code. Each is a raw byte literal; lengths ## are passed separately to tok_eq_const. "<=" must be tested before "<" @@ -6055,6 +6619,7 @@ DEFINE OFF_expr_frames 00497f0000000000 &streams_ptr ZERO4 OFF_streams &expand_pool_ptr ZERO4 OFF_expand_pool &expr_frames_ptr ZERO4 OFF_expr_frames +&local_lookup_scratch_ptr ZERO4 OFF_local_lookup_scratch :bss_init_tbl_end :msg_prefix "m1pp: " @@ -6084,6 +6649,13 @@ DEFINE OFF_expr_frames 00497f0000000000 :msg_scope_underflow "scope underflow" '00' :msg_scope_not_closed "scope not closed" '00' :msg_bad_scope_label "bad scope label" '00' +:msg_bad_frame_header "bad frame header" '00' +:msg_frame_already_active "frame already active" '00' +:msg_frame_underflow "frame underflow" '00' +:msg_frame_not_closed "frame not closed" '00' +:msg_local_outside_frame "local outside frame" '00' +:msg_unknown_local "unknown local" '00' +:msg_local_name_too_long "local name too long" '00' ## --- BSS --------------------------------------------------------------------- ## Placed before :ELF_end so filesz/memsz (which this ELF header sets equal) @@ -6141,6 +6713,31 @@ ZERO8 ZERO8 :psc_stream_end ZERO8 +:pf_stream_end +ZERO8 + +## --- Frame state ------------------------------------------------------------- +## Single-slot "current frame" used by %local. current_frame_ptr/_len point +## into stable text memory (input_buf or text_buf), borrowed from the WORD +## token that named the frame. frame_active is 0 / 1. +## elp_after / elp_mark are expand_local_into_pool's outputs (mirroring +## emt_after_pos / emt_mark for expand_macro_tokens). +:current_frame_ptr +ZERO8 +:current_frame_len +ZERO8 +:frame_active +ZERO8 +:elp_after +ZERO8 +:elp_mark +ZERO8 +:elp_arg_ptr +ZERO8 +:elp_arg_len +ZERO8 +:elp_name_len +ZERO8 :err_saved_msg ZERO8 :err_saved_len @@ -6477,5 +7074,7 @@ ZERO8 ZERO8 :expr_frames_ptr ZERO8 +:local_lookup_scratch_ptr +ZERO8 :ELF_end diff --git a/M1pp/M1pp.c b/M1pp/M1pp.c @@ -18,6 +18,14 @@ * %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e * %str(IDENT) stringify a single WORD token into a "..."-quoted literal * + * %frame NAME / %endframe set/clear a single-slot "current frame" + * %local(NAME) expand to the body of <frame>_FRAME.<NAME> + * + * Frames are a separate state from the %scope stack. %frame does not push + * onto scope_stack; %scope does not change the current frame. This lets a + * function body open inner control-flow scopes (whose ::labels resolve + * against the scope stack) without disturbing %local lookup. + * * Expression syntax is intentionally Lisp-shaped: * atoms: decimal or 0x-prefixed integer literals * calls: (+ a b), (- a b), (* a b), (/ a b), (% a b), (<< a b), (>> a b) @@ -168,6 +176,8 @@ static struct Token expand_pool[MAX_EXPAND]; static struct Macro macros[MAX_MACROS]; static struct Stream streams[MAX_STACK]; static struct TextSpan scope_stack[MAX_SCOPE_DEPTH]; +static struct TextSpan current_frame; +static int frame_active; static int text_used; static int source_count; @@ -1383,6 +1393,75 @@ static int apply_expr_op(enum ExprOp op, const long long *args, int argc, long l static int eval_expr_range(struct TokenSpan span, long long *out); +static int expand_local_into_pool(struct Token *call_tok, struct Token *limit, + struct Token **after_out, int *mark_out) +{ + /* Resolve %local(NAME) against the current frame: build the lookup + * key "<frame>_FRAME.<NAME>" and copy the matching macro's body + * into the pool. NAME must be exactly one WORD token. The pool + * mark and the position past the call's `)` are returned so the + * caller can either push the body as a stream (process_tokens) or + * recursively eval it as an expression (eval_expr_atom). */ + char name[256]; + int frame_len; + int arg_len; + int name_len; + int i; + const struct Macro *m = NULL; + struct Token *arg_tok; + int mark = pool_used; + + if (call_tok + 1 >= limit || (call_tok + 1)->kind != TOK_LPAREN || + !(call_tok + 1)->tight) { + return fail("bad builtin"); + } + if (!parse_args(call_tok + 1, limit)) { + return 0; + } + if (arg_count != 1) { + return fail("bad builtin"); + } + if (arg_ends[0] - arg_starts[0] != 1) { + return fail("bad builtin"); + } + arg_tok = arg_starts[0]; + if (arg_tok->kind != TOK_WORD) { + return fail("bad builtin"); + } + if (!frame_active) { + return fail("local outside frame"); + } + + frame_len = current_frame.len; + arg_len = arg_tok->text.len; + name_len = frame_len + 7 /* _FRAME. */ + arg_len; + if (name_len >= (int)sizeof(name)) { + return fail("local name too long"); + } + memcpy(name, current_frame.ptr, (size_t)frame_len); + memcpy(name + frame_len, "_FRAME.", 7); + memcpy(name + frame_len + 7, arg_tok->text.ptr, (size_t)arg_len); + + for (i = 0; i < macro_count; i++) { + if (macros[i].name.len == name_len && + memcmp(macros[i].name.ptr, name, (size_t)name_len) == 0) { + m = &macros[i]; + break; + } + } + if (m == NULL) { + return fail("unknown local"); + } + + if (!copy_span_to_pool((struct TokenSpan){m->body_start, m->body_end})) { + pool_used = mark; + return 0; + } + *after_out = call_end_pos; + *mark_out = mark; + return 1; +} + static int eval_expr_atom(struct Token *tok, struct Token *limit, struct Token **after_out, long long *out) { @@ -1390,6 +1469,23 @@ static int eval_expr_atom(struct Token *tok, struct Token *limit, struct Token *after; int mark; + if (tok->kind == TOK_WORD && token_text_eq(tok, "%local")) { + if (!expand_local_into_pool(tok, limit, &after, &mark)) { + return 0; + } + if (pool_used == mark) { + pool_used = mark; + return fail("bad expression"); + } + if (!eval_expr_range((struct TokenSpan){expand_pool + mark, expand_pool + pool_used}, out)) { + pool_used = mark; + return 0; + } + pool_used = mark; + *after_out = after; + return 1; + } + macro = find_macro(tok); if (macro != NULL && ((tok + 1 < limit && (tok + 1)->kind == TOK_LPAREN && @@ -1628,6 +1724,18 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok) return push_pool_stream_from_mark(mark); } + if (token_text_eq(tok, "%local")) { + struct Token *after; + int mark; + + if (!expand_local_into_pool((struct Token *)tok, s->end, &after, &mark)) { + return 0; + } + s->pos = after; + s->line_start = 0; + return push_pool_stream_from_mark(mark); + } + if (token_text_eq(tok, "%str")) { struct Token *arg_tok; struct Token *end_pos; @@ -1732,6 +1840,53 @@ static int pop_scope(struct Stream *s) return 1; } +static int push_frame(struct Stream *s) +{ + /* %frame NAME sets the single-slot current frame, used by %local + * lookup. Frames do not nest: a second %frame before %endframe is + * an error. The header behaves like %scope (newlines after the + * name are absorbed when the directive appeared at line_start). */ + int started_at_line_start = s->line_start; + + s->pos++; + skip_newlines(&s->pos, s->end); + if (s->pos >= s->end || s->pos->kind != TOK_WORD) { + return fail("bad frame header"); + } + if (frame_active) { + return fail("frame already active"); + } + current_frame = s->pos->text; + frame_active = 1; + s->pos++; + if (started_at_line_start) { + skip_newlines(&s->pos, s->end); + s->line_start = 1; + } + return 1; +} + +static int pop_frame(struct Stream *s) +{ + /* %endframe must be immediately followed by TOK_NEWLINE; the newline + * is consumed iff %endframe itself appeared at line_start. */ + int started_at_line_start = s->line_start; + + s->pos++; + if (!frame_active) { + return fail("frame underflow"); + } + frame_active = 0; + if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) { + return fail("expected newline after %endframe"); + } + if (started_at_line_start) { + s->pos++; + s->line_start = 1; + } + return 1; +} + static int process_tokens(void) { if (!push_stream_span((struct TokenSpan){source_tokens, source_tokens + source_count}, -1)) { @@ -1792,6 +1947,20 @@ static int process_tokens(void) continue; } + if (tok->kind == TOK_WORD && token_text_eq(tok, "%frame")) { + if (!push_frame(s)) { + return 0; + } + continue; + } + + if (tok->kind == TOK_WORD && token_text_eq(tok, "%endframe")) { + if (!pop_frame(s)) { + return 0; + } + continue; + } + if (tok->kind == TOK_NEWLINE) { s->pos++; s->line_start = 1; @@ -1810,7 +1979,8 @@ static int process_tokens(void) token_text_eq(tok, "%") || token_text_eq(tok, "$") || token_text_eq(tok, "%select") || - token_text_eq(tok, "%str"))) { + token_text_eq(tok, "%str") || + token_text_eq(tok, "%local"))) { if (!expand_builtin_call(s, tok)) { return 0; } @@ -1838,6 +2008,9 @@ static int process_tokens(void) if (scope_depth != 0) { return fail("scope not closed"); } + if (frame_active) { + return fail("frame not closed"); + } if (output_used >= MAX_OUTPUT) { return fail("output overflow"); diff --git a/P1/P1-aarch64.M1 b/P1/P1-aarch64.M1 @@ -54,6 +54,7 @@ DEFINE add_a0,t1,a0 4001008B DEFINE add_a0,t1,t2 40010B8B DEFINE add_a0,t2,a1 6001018B DEFINE add_a1,a0,a1 0100018B +DEFINE add_a1,a0,a3 0100038B DEFINE add_a1,a1,a0 2100008B DEFINE add_a1,a1,a2 2100028B DEFINE add_a1,a1,a3 2100038B @@ -89,12 +90,16 @@ DEFINE add_a3,a3,t2 63000B8B DEFINE add_a3,t0,t2 23010B8B DEFINE add_a3,t1,a2 4301028B DEFINE add_t0,a1,t2 29000B8B +DEFINE add_t0,a2,a3 4900038B DEFINE add_t0,a3,a1 6900018B +DEFINE add_t0,a3,t0 6900098B DEFINE add_t0,t0,a0 2901008B DEFINE add_t0,t0,a1 2901018B DEFINE add_t0,t0,a3 2901038B DEFINE add_t0,t0,t1 29010A8B +DEFINE add_t0,t2,a3 6901038B DEFINE add_t1,a0,t0 0A00098B +DEFINE add_t1,a3,t1 6A000A8B DEFINE add_t1,t0,t1 2A010A8B DEFINE add_t1,t1,a0 4A01008B DEFINE add_t1,t1,a1 4A01018B @@ -109,6 +114,7 @@ DEFINE add_t2,t0,t1 2B010A8B DEFINE add_t2,t1,t2 4B010B8B DEFINE add_t2,t2,a0 6B01008B DEFINE add_t2,t2,a3 6B01038B +DEFINE add_t2,t2,t1 6B010A8B DEFINE sub_a0,a0,t1 00000ACB DEFINE sub_a0,a1,t2 20000BCB DEFINE sub_a0,a3,a0 600000CB @@ -179,11 +185,13 @@ DEFINE addi_a3,a3,1 63040091 DEFINE addi_a3,a3,32 63800091 DEFINE addi_a3,t0,32 23810091 DEFINE addi_a3,t1,32 43810091 +DEFINE addi_t0,a0,32 09800091 DEFINE addi_t0,a1,neg32 298000D1 DEFINE addi_t0,a1,32 29800091 DEFINE addi_t0,t0,neg1 290500D1 DEFINE addi_t0,t0,1 29050091 DEFINE addi_t0,t0,2 29090091 +DEFINE addi_t0,t0,7 291D0091 DEFINE addi_t0,t0,32 29810091 DEFINE addi_t1,t0,32 2A810091 DEFINE addi_t1,t1,neg32 4A8100D1 @@ -197,6 +205,7 @@ DEFINE addi_t2,t0,32 2B810091 DEFINE addi_t2,t1,2 4B090091 DEFINE addi_t2,t2,neg1 6B0500D1 DEFINE addi_t2,t2,1 6B050091 +DEFINE addi_t2,t2,7 6B1D0091 DEFINE addi_t2,t2,24 6B610091 DEFINE addi_t2,t2,32 6B810091 DEFINE andi_a2,a0,15 F00180D20200108A @@ -228,6 +237,8 @@ DEFINE ld_a0,a3,8 600440F9 DEFINE ld_a0,a3,16 600840F9 DEFINE ld_a0,a3,24 600C40F9 DEFINE ld_a0,t0,0 200140F9 +DEFINE ld_a0,t0,8 200540F9 +DEFINE ld_a0,t0,16 200940F9 DEFINE ld_a0,sp,0 E00B40F9 DEFINE ld_a0,sp,8 E00F40F9 DEFINE ld_a0,sp,24 E01740F9 @@ -254,6 +265,7 @@ DEFINE ld_a2,a2,0 420040F9 DEFINE ld_a2,t0,0 220140F9 DEFINE ld_a2,t0,8 220540F9 DEFINE ld_a2,t0,16 220940F9 +DEFINE ld_a2,t0,24 220D40F9 DEFINE ld_a2,t1,neg32 42015EF8 DEFINE ld_a2,t1,0 420140F9 DEFINE ld_a2,t2,0 620140F9 @@ -314,6 +326,8 @@ DEFINE ld_t2,a3,16 6B0840F9 DEFINE ld_t2,t0,0 2B0140F9 DEFINE ld_t2,t0,24 2B0D40F9 DEFINE ld_t2,t1,0 4B0140F9 +DEFINE ld_t2,t1,8 4B0540F9 +DEFINE ld_t2,t1,16 4B0940F9 DEFINE ld_t2,t2,0 6B0140F9 DEFINE ld_t2,sp,16 EB1340F9 DEFINE st_a0,a1,0 200000F9 @@ -422,6 +436,7 @@ DEFINE sb_a0,a1,0 20000039 DEFINE sb_a0,a2,0 40000039 DEFINE sb_a0,t2,0 60010039 DEFINE sb_a1,a2,0 41000039 +DEFINE sb_a1,t0,0 21010039 DEFINE sb_a2,a0,0 02000039 DEFINE sb_a2,a1,0 22000039 DEFINE sb_a2,a3,0 62000039 @@ -483,6 +498,7 @@ DEFINE bne_t2,t0 7F0109EB4000005420021FD6 DEFINE blt_a0,a2 1F0002EB4A00005420021FD6 DEFINE blt_a1,a0 3F0000EB4A00005420021FD6 DEFINE blt_a1,a2 3F0002EB4A00005420021FD6 +DEFINE blt_a1,t0 3F0009EB4A00005420021FD6 DEFINE blt_a1,t2 3F000BEB4A00005420021FD6 DEFINE blt_a2,a1 5F0001EB4A00005420021FD6 DEFINE blt_a2,a3 5F0003EB4A00005420021FD6 diff --git a/P1/P1-amd64.M1 b/P1/P1-amd64.M1 @@ -54,6 +54,7 @@ DEFINE add_a0,t1,a0 4989F94C89DF4C01CF DEFINE add_a0,t1,t2 4C89DF4C01C7 DEFINE add_a0,t2,a1 4C89C74801F7 DEFINE add_a1,a0,a1 4989F14889FE4C01CE +DEFINE add_a1,a0,a3 4889FE4801CE DEFINE add_a1,a1,a0 4889F64801FE DEFINE add_a1,a1,a2 4889F64801D6 DEFINE add_a1,a1,a3 4889F64801CE @@ -89,12 +90,16 @@ DEFINE add_a3,a3,t2 4889C94C01C1 DEFINE add_a3,t0,t2 4C89D14C01C1 DEFINE add_a3,t1,a2 4C89D94801D1 DEFINE add_t0,a1,t2 4989F24D01C2 +DEFINE add_t0,a2,a3 4989D24901CA DEFINE add_t0,a3,a1 4989CA4901F2 +DEFINE add_t0,a3,t0 4D89D14989CA4D01CA DEFINE add_t0,t0,a0 4D89D24901FA DEFINE add_t0,t0,a1 4D89D24901F2 DEFINE add_t0,t0,a3 4D89D24901CA DEFINE add_t0,t0,t1 4D89D24D01DA +DEFINE add_t0,t2,a3 4D89C24901CA DEFINE add_t1,a0,t0 4989FB4D01D3 +DEFINE add_t1,a3,t1 4D89D94989CB4D01CB DEFINE add_t1,t0,t1 4D89D94D89D34D01CB DEFINE add_t1,t1,a0 4D89DB4901FB DEFINE add_t1,t1,a1 4D89DB4901F3 @@ -109,6 +114,7 @@ DEFINE add_t2,t0,t1 4D89D04D01D8 DEFINE add_t2,t1,t2 4D89C14D89D84D01C8 DEFINE add_t2,t2,a0 4D89C04901F8 DEFINE add_t2,t2,a3 4D89C04901C8 +DEFINE add_t2,t2,t1 4D89C04D01D8 DEFINE sub_a0,a0,t1 4889FF4C29DF DEFINE sub_a0,a1,t2 4889F74C29C7 DEFINE sub_a0,a3,a0 4989F94889CF4C29CF @@ -179,11 +185,13 @@ DEFINE addi_a3,a3,1 4889C94883C101 DEFINE addi_a3,a3,32 4889C94883C120 DEFINE addi_a3,t0,32 4C89D14883C120 DEFINE addi_a3,t1,32 4C89D94883C120 +DEFINE addi_t0,a0,32 4989FA4983C220 DEFINE addi_t0,a1,neg32 4989F24983C2E0 DEFINE addi_t0,a1,32 4989F24983C220 DEFINE addi_t0,t0,neg1 4D89D24983C2FF DEFINE addi_t0,t0,1 4D89D24983C201 DEFINE addi_t0,t0,2 4D89D24983C202 +DEFINE addi_t0,t0,7 4D89D24983C207 DEFINE addi_t0,t0,32 4D89D24983C220 DEFINE addi_t1,t0,32 4D89D34983C320 DEFINE addi_t1,t1,neg32 4D89DB4983C3E0 @@ -197,6 +205,7 @@ DEFINE addi_t2,t0,32 4D89D04983C020 DEFINE addi_t2,t1,2 4D89D84983C002 DEFINE addi_t2,t2,neg1 4D89C04983C0FF DEFINE addi_t2,t2,1 4D89C04983C001 +DEFINE addi_t2,t2,7 4D89C04983C007 DEFINE addi_t2,t2,24 4D89C04983C018 DEFINE addi_t2,t2,32 4D89C04983C020 DEFINE andi_a2,a0,15 4889FA4883E20F @@ -228,6 +237,8 @@ DEFINE ld_a0,a3,8 488B7908 DEFINE ld_a0,a3,16 488B7910 DEFINE ld_a0,a3,24 488B7918 DEFINE ld_a0,t0,0 498B7A00 +DEFINE ld_a0,t0,8 498B7A08 +DEFINE ld_a0,t0,16 498B7A10 DEFINE ld_a0,sp,0 488B7C2410 DEFINE ld_a0,sp,8 488B7C2418 DEFINE ld_a0,sp,24 488B7C2428 @@ -254,6 +265,7 @@ DEFINE ld_a2,a2,0 488B5200 DEFINE ld_a2,t0,0 498B5200 DEFINE ld_a2,t0,8 498B5208 DEFINE ld_a2,t0,16 498B5210 +DEFINE ld_a2,t0,24 498B5218 DEFINE ld_a2,t1,neg32 498B53E0 DEFINE ld_a2,t1,0 498B5300 DEFINE ld_a2,t2,0 498B5000 @@ -314,6 +326,8 @@ DEFINE ld_t2,a3,16 4C8B4110 DEFINE ld_t2,t0,0 4D8B4200 DEFINE ld_t2,t0,24 4D8B4218 DEFINE ld_t2,t1,0 4D8B4300 +DEFINE ld_t2,t1,8 4D8B4308 +DEFINE ld_t2,t1,16 4D8B4310 DEFINE ld_t2,t2,0 4D8B4000 DEFINE ld_t2,sp,16 4C8B442420 DEFINE st_a0,a1,0 48897E00 @@ -422,6 +436,7 @@ DEFINE sb_a0,a1,0 48887E00 DEFINE sb_a0,a2,0 48887A00 DEFINE sb_a0,t2,0 49887800 DEFINE sb_a1,a2,0 48887200 +DEFINE sb_a1,t0,0 49887200 DEFINE sb_a2,a0,0 48885700 DEFINE sb_a2,a1,0 48885600 DEFINE sb_a2,a3,0 48885100 @@ -483,6 +498,7 @@ DEFINE bne_t2,t0 4D39D0740341FFE7 DEFINE blt_a0,a2 4839D77D0341FFE7 DEFINE blt_a1,a0 4839FE7D0341FFE7 DEFINE blt_a1,a2 4839D67D0341FFE7 +DEFINE blt_a1,t0 4C39D67D0341FFE7 DEFINE blt_a1,t2 4C39C67D0341FFE7 DEFINE blt_a2,a1 4839F27D0341FFE7 DEFINE blt_a2,a3 4839CA7D0341FFE7 diff --git a/P1/P1-riscv64.M1 b/P1/P1-riscv64.M1 @@ -54,6 +54,7 @@ DEFINE add_a0,t1,a0 3305A300 DEFINE add_a0,t1,t2 33057300 DEFINE add_a0,t2,a1 3385B300 DEFINE add_a1,a0,a1 B305B500 +DEFINE add_a1,a0,a3 B305D500 DEFINE add_a1,a1,a0 B385A500 DEFINE add_a1,a1,a2 B385C500 DEFINE add_a1,a1,a3 B385D500 @@ -89,12 +90,16 @@ DEFINE add_a3,a3,t2 B3867600 DEFINE add_a3,t0,t2 B3867200 DEFINE add_a3,t1,a2 B306C300 DEFINE add_t0,a1,t2 B3827500 +DEFINE add_t0,a2,a3 B302D600 DEFINE add_t0,a3,a1 B382B600 +DEFINE add_t0,a3,t0 B3825600 DEFINE add_t0,t0,a0 B382A200 DEFINE add_t0,t0,a1 B382B200 DEFINE add_t0,t0,a3 B382D200 DEFINE add_t0,t0,t1 B3826200 +DEFINE add_t0,t2,a3 B382D300 DEFINE add_t1,a0,t0 33035500 +DEFINE add_t1,a3,t1 33836600 DEFINE add_t1,t0,t1 33836200 DEFINE add_t1,t1,a0 3303A300 DEFINE add_t1,t1,a1 3303B300 @@ -109,6 +114,7 @@ DEFINE add_t2,t0,t1 B3836200 DEFINE add_t2,t1,t2 B3037300 DEFINE add_t2,t2,a0 B383A300 DEFINE add_t2,t2,a3 B383D300 +DEFINE add_t2,t2,t1 B3836300 DEFINE sub_a0,a0,t1 33056540 DEFINE sub_a0,a1,t2 33857540 DEFINE sub_a0,a3,a0 3385A640 @@ -179,11 +185,13 @@ DEFINE addi_a3,a3,1 93861600 DEFINE addi_a3,a3,32 93860602 DEFINE addi_a3,t0,32 93860202 DEFINE addi_a3,t1,32 93060302 +DEFINE addi_t0,a0,32 93020502 DEFINE addi_t0,a1,neg32 938205FE DEFINE addi_t0,a1,32 93820502 DEFINE addi_t0,t0,neg1 9382F2FF DEFINE addi_t0,t0,1 93821200 DEFINE addi_t0,t0,2 93822200 +DEFINE addi_t0,t0,7 93827200 DEFINE addi_t0,t0,32 93820202 DEFINE addi_t1,t0,32 13830202 DEFINE addi_t1,t1,neg32 130303FE @@ -197,6 +205,7 @@ DEFINE addi_t2,t0,32 93830202 DEFINE addi_t2,t1,2 93032300 DEFINE addi_t2,t2,neg1 9383F3FF DEFINE addi_t2,t2,1 93831300 +DEFINE addi_t2,t2,7 93837300 DEFINE addi_t2,t2,24 93838301 DEFINE addi_t2,t2,32 93830302 DEFINE andi_a2,a0,15 1376F500 @@ -228,6 +237,8 @@ DEFINE ld_a0,a3,8 03B58600 DEFINE ld_a0,a3,16 03B50601 DEFINE ld_a0,a3,24 03B58601 DEFINE ld_a0,t0,0 03B50200 +DEFINE ld_a0,t0,8 03B58200 +DEFINE ld_a0,t0,16 03B50201 DEFINE ld_a0,sp,0 03350101 DEFINE ld_a0,sp,8 03358101 DEFINE ld_a0,sp,24 03358102 @@ -254,6 +265,7 @@ DEFINE ld_a2,a2,0 03360600 DEFINE ld_a2,t0,0 03B60200 DEFINE ld_a2,t0,8 03B68200 DEFINE ld_a2,t0,16 03B60201 +DEFINE ld_a2,t0,24 03B68201 DEFINE ld_a2,t1,neg32 033603FE DEFINE ld_a2,t1,0 03360300 DEFINE ld_a2,t2,0 03B60300 @@ -314,6 +326,8 @@ DEFINE ld_t2,a3,16 83B30601 DEFINE ld_t2,t0,0 83B30200 DEFINE ld_t2,t0,24 83B38201 DEFINE ld_t2,t1,0 83330300 +DEFINE ld_t2,t1,8 83338300 +DEFINE ld_t2,t1,16 83330301 DEFINE ld_t2,t2,0 83B30300 DEFINE ld_t2,sp,16 83330102 DEFINE st_a0,a1,0 23B0A500 @@ -422,6 +436,7 @@ DEFINE sb_a0,a1,0 2380A500 DEFINE sb_a0,a2,0 2300A600 DEFINE sb_a0,t2,0 2380A300 DEFINE sb_a1,a2,0 2300B600 +DEFINE sb_a1,t0,0 2380B200 DEFINE sb_a2,a0,0 2300C500 DEFINE sb_a2,a1,0 2380C500 DEFINE sb_a2,a3,0 2380C600 @@ -483,6 +498,7 @@ DEFINE bne_t2,t0 6384530067800F00 DEFINE blt_a0,a2 6354C50067800F00 DEFINE blt_a1,a0 63D4A50067800F00 DEFINE blt_a1,a2 63D4C50067800F00 +DEFINE blt_a1,t0 63D4550067800F00 DEFINE blt_a1,t2 63D4750067800F00 DEFINE blt_a2,a1 6354B60067800F00 DEFINE blt_a2,a3 6354D60067800F00 diff --git a/docs/M1PP.md b/docs/M1PP.md @@ -8,12 +8,40 @@ M1 source with macro directives and emits plain M1 suitable for `M0`. The implementation lives in `m1pp/m1pp.c`. It is one pass, allocation-free (fixed static buffers), and stops at the first error. +## Features + +- Function-like macros with parameters (`%macro` / `%endm`); zero-arg call + as `%NAME` or `%NAME()` +- Brace-grouped arguments (`{ ... }`) to pass token spans containing commas + or parens as a single argument +- Token paste with `##` after argument substitution +- Recursive expansion: expanded bodies are rescanned, so macros can call + macros +- Local labels (`:@name` / `&@name`) rewritten per-expansion for hygienic + intra-macro labels +- Scoped labels (`::name` / `&::name`) resolved at **emit time** against + the `%scope` stack — enables generic control-flow macros like + `loop`/`break` +- Struct and enum synthesis (`%struct`, `%enum`) generating per-field + zero-arg macros plus `SIZE`/`COUNT` +- Named stack-frame access via `%frame` / `%endframe` + `%local(field)`, + composing with `%struct`-generated `<frame>_FRAME.<field>` macros +- Compile-time integer expression language (Lisp S-expressions: + arithmetic, bitwise, shift, comparison, `strlen`) +- M0-safe little-endian hex emission: `!` (1B), `@` (2B), `%` (4B), + `$` (8B) — emits `'AABBCCDD'` quoted literals +- Conditional token selection: `%select(cond, then, else)` +- Stringification: `%str(IDENT)` → `"IDENT"` +- Line comments (`#`, `;`); whitespace-insensitive output normalization +- Single-pass, allocation-free implementation with fixed static buffers; + fail-fast on first error + ## Invocation m1pp input.M1 output.M1 -Input is read whole into a fixed buffer (`MAX_INPUT = 262144` bytes); output -is written whole from another (`MAX_OUTPUT = 524288` bytes). +Input is read whole into a fixed buffer (`MAX_INPUT` bytes); output +is written whole from another (`MAX_OUTPUT` bytes). ## Lexical structure @@ -31,8 +59,7 @@ separated by spaces and newlines; original spacing is not preserved. ## Directives -Directives are recognized only at the start of a line (after a newline or at -the top of file). +Directives are recognized via `%X`. ### `%macro` / `%endm` @@ -79,6 +106,25 @@ path (see [Scoped labels](#scoped-labels)). Every `%scope` must be closed before end-of-input. `NAME` is a single `WORD` token and may come from macro-argument substitution. +### `%frame` / `%endframe` + + %frame NAME + ... body ... + %endframe + +Sets a single-slot "current frame" to `NAME`, consulted by `%local` to +look up named offsets in `<NAME>_FRAME.<field>` macros (typically +synthesized by `%struct`). Frames do not nest: a second `%frame` before +`%endframe` is an error. Every `%frame` must be closed before +end-of-input. `NAME` is a single `WORD` token and may come from +macro-argument substitution. + +Frame state is independent of the `%scope` stack. A nested `%scope` +inside a frame's body affects `::label` resolution but does not change +`%local` resolution — locals stay bound to the enclosing frame, not +to the lexical scope. This lets a function body open inner +control-flow scopes without the local namespace shifting underneath. + ## Macro calls %NAME(arg, arg, ...) @@ -192,6 +238,29 @@ branches are raw token spans, not expressions. Stringifies a single `WORD` token into a double-quoted string literal: `%str(foo)` → `"foo"`. The argument must be exactly one word token. +### `%local(NAME)` + +Looks up the zero-parameter macro `<frame>_FRAME.<NAME>`, where +`<frame>` is the currently active `%frame`, and emits its body. Errors +if no frame is active or the field is undefined. `NAME` must be exactly +one `WORD` token. + +`%local` is also recognized as an expression atom, so it composes with +`%(...)` arithmetic: `%(+ %local(off) 4)` evaluates as expected. + +The intended pattern combines `%struct`, `%frame`, and `%local` for +named stack-frame access: + + %struct foo_FRAME { saved_buf saved_len } + : foo + %scope foo + %frame foo + %enter(%foo_FRAME.SIZE) + ;; %local(saved_buf) -> 0, %local(saved_len) -> 8 + %eret + %endframe + %endscope + ## Expression language Expressions are Lisp-shaped S-expressions. Atoms are integer literals @@ -219,31 +288,25 @@ is how `%struct` and `%enum`-generated names compose into arithmetic. ## Limits -Fixed at compile time: - -| Resource | Limit | -| --------------------- | ------- | -| input bytes | 262144 | -| output bytes | 524288 | -| total token text | 524288 | -| source tokens | 65536 | -| macro body tokens | 65536 | -| expansion pool tokens | 65536 | -| macros | 512 | -| parameters per macro | 16 | -| stream stack depth | 64 | -| expression frames | 256 | -| scope stack depth | 32 | +Various limits are fixed at compile time. See the code for values. + +| Resource | +| --------------------- | +| input bytes | +| output bytes | +| total token text | +| source tokens | +| macro body tokens | +| expansion pool tokens | +| macros | +| parameters per macro | +| stream stack depth | +| expression frames | +| scope stack depth | Exceeding any limit aborts with an error message on `stderr`. ## Errors On failure, `m1pp` prints `m1macro: <reason>` to `stderr` and exits 1. -Reasons are terse: `bad macro header`, `unterminated macro`, -`wrong arg count`, `bad paste`, `bad expression`, `bad builtin`, -`text overflow`, `token overflow`, `expansion overflow`, `output overflow`, -`stream overflow`, `unbalanced braces`, `too many args`, `too many macros`, -`bad integer`, `bad directive`, `unterminated directive`, -`unterminated macro call`, `bad scope header`, `scope underflow`, -`scope not closed`, `scope depth overflow`, `bad scope label`. +See code for reasons. diff --git a/tests/M1pp/15-struct.M1pp b/tests/M1pp/15-struct.M1pp @@ -21,19 +21,19 @@ # Inside an expression atom: loads 16+100 = 116 -> 0x74. %((+ %closure.body 100)) -# Compose-and-add path: %frame adds a 16-byte header prefix to every +# Compose-and-add path: %with_hdr adds a 16-byte header prefix to every # %frame_apply.* offset. Exercises the paren-less atom inside %(...). %struct frame_hdr { retaddr caller_sp } -%macro frame(field) +%macro with_hdr(field) %((+ field %frame_hdr.SIZE)) %endm %struct frame_apply { callee args body env } -%frame(%frame_apply.callee) -%frame(%frame_apply.args) -%frame(%frame_apply.body) -%frame(%frame_apply.env) +%with_hdr(%frame_apply.callee) +%with_hdr(%frame_apply.args) +%with_hdr(%frame_apply.body) +%with_hdr(%frame_apply.env) # Total frame size for an enter/leave pair. %frame_apply.SIZE diff --git a/tests/M1pp/25-frame-locals.M1pp b/tests/M1pp/25-frame-locals.M1pp @@ -0,0 +1,27 @@ +# %frame / %endframe + %local(name): frame state separate from the +# %scope stack. %frame sets a single-slot "current frame"; %local(name) +# looks up <frame>_FRAME.<name> (a macro typically synthesized by +# %struct) and emits its body. +# +# A nested %scope does NOT affect %local resolution. Locals belong to +# the enclosing frame, not the lexical scope, so a function body can +# open inner control-flow scopes without the local namespace shifting +# underneath it. + +%struct foo_FRAME { a b c } + +%frame foo +%local(a) +%local(b) +%local(c) + +%scope inner +::label +%local(b) +%endscope + +%((+ %local(b) 100)) + +%endframe + +END diff --git a/tests/M1pp/25-frame-locals.expected b/tests/M1pp/25-frame-locals.expected @@ -0,0 +1,22 @@ + + + + + + + + + + + +0 +8 +16 + +:inner__label +8 + +'6C000000' + + +END diff --git a/tests/M1pp/26-fn2-pattern.M1pp b/tests/M1pp/26-fn2-pattern.M1pp @@ -0,0 +1,79 @@ +# %fn2 / %stl / %ldl — the intended P1pp-side ergonomic pattern built +# on %struct + %frame + %endframe + %local. +# +# %fn2(name, locals, body) synthesizes <name>_FRAME from `locals`, +# emits the function label, opens %scope+%frame around the body, and +# brackets it with %enter sized from <name>_FRAME.SIZE plus %eret. +# +# %stl(reg, local) / %ldl(reg, local) store/load a named local via +# %local(name), which resolves against the currently active frame. +# +# A nested %scope inside the body affects ::label resolution but does +# NOT affect %local — locals stay bound to the function's frame. +# +# %enter, %eret, %st, %ld, %b, %beqz are stubbed as plain text-emitting +# macros so the test isolates the macro-composition behavior rather +# than depending on real P1 encodings. + +%macro enter(size) +ENTER size +%endm + +%macro eret() +ERET +%endm + +%macro st(rs, base, off) +ST rs base off +%endm + +%macro ld(rd, base, off) +LD rd base off +%endm + +%macro b(target) +B target +%endm + +%macro beqz(rs, target) +BEQZ rs target +%endm + +%macro fn2(name, locals, body) +%struct name ## _FRAME { locals } +LABEL name +%scope name +%frame name +%enter(% ## name ## _FRAME.SIZE) +body +%eret +%endframe +%endscope +%endm + +%macro stl(reg, local) +%st(reg, sp, %local(local)) +%endm + +%macro ldl(reg, local) +%ld(reg, sp, %local(local)) +%endm + +# A function with three named locals. Demonstrates: +# - %enter sized from the synthesized print_FRAME.SIZE = 24 +# - %stl/%ldl resolving names a/b/c to offsets 0/8/16 +# - a nested %scope (loop) for ::label resolution coexisting with the +# outer %frame, so %ldl(s0, a) inside the loop still hits offset 0 +# (print_FRAME.a) rather than anything tied to the loop scope. +%fn2(print, {a b c}, { +%stl(s0, a) +%stl(s1, b) +%stl(s2, c) +%scope loop +::top +%ldl(s0, a) +%beqz(s0, &::top) +%endscope +}) + +END diff --git a/tests/M1pp/26-fn2-pattern.expected b/tests/M1pp/26-fn2-pattern.expected @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +LABEL print +ENTER 24 + + +ST s0 sp 0 + + +ST s1 sp 8 + + +ST s2 sp 16 + + +:print__loop__top +LD s0 sp 0 + + +BEQZ s0 &print__loop__top + + +ERET + + + +END