boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

M1pp.P1 (143953B)


      1 ## m1pp.M1 — bootstrap M1 macro-expander, P1.
      2 ##
      3 ## Runtime shape: m1pp input.M1 output.M1
      4 ##
      5 ## Pipeline:
      6 ##   p1_main         argc/argv from the backend :_start stub; stash argv[1..2]
      7 ##                   into input_path / output_path; openat+read into
      8 ##                   input_buf; call lex_source, then process_tokens;
      9 ##                   openat+write output_buf to output_path; return 0.
     10 ##   lex_source      input_buf -> source_tokens[] (via append_text +
     11 ##                   push_source_token).
     12 ##   process_tokens  Stream-driven loop. Pushes source_tokens as the initial
     13 ##                   stream and walks it token-by-token, dispatching to
     14 ##                   define_macro at line-start %macro, emit_newline /
     15 ##                   emit_token for pass-through, expand_builtin_call for
     16 ##                   !@%$ and %select, and expand_call for user macros.
     17 ##                   Macro expansions and %select push fresh streams onto
     18 ##                   streams[]; popping rewinds the expansion pool.
     19 ##   define_macro    Parse %macro header+body; record in macros[] +
     20 ##                   macro_body_tokens[]; consume through the %endm line
     21 ##                   without emitting output.
     22 ##
     23 ## P1 ABI: a0..a3 arg/return, t0..t2 caller-saved temps, s0..s3 callee-saved
     24 ## (unused here). Non-leaf functions use enter_0 / eret. Entry is the portable
     25 ## p1_main (a0=argc, a1=argv); the backend-owned :_start stub captures
     26 ## argc/argv from the native entry state and sys_exits p1_main's return value.
     27 
     28 ## --- Constants & sizing ------------------------------------------------------
     29 
     30 DEFINE M1PP_INPUT_CAP 0000040000000000
     31 DEFINE M1PP_OUTPUT_CAP 0000040000000000
     32 DEFINE M1PP_TEXT_CAP 0000040000000000
     33 DEFINE M1PP_TOKENS_END 0000c00000000000
     34 ## Macro record is 296 bytes: name (16) + param_count (8) + params[16]*16 (256)
     35 ## + body_start (8) + body_end (8). MACROS_CAP fits 512 records (151552 B).
     36 ## Body-token arena fits 4096 tokens (98304 B = 0x18000).
     37 DEFINE M1PP_MACRO_RECORD_SIZE 2801000000000000
     38 DEFINE M1PP_MACRO_BODY_START_OFF 1801000000000000
     39 DEFINE M1PP_MACRO_BODY_END_OFF 2001000000000000
     40 DEFINE M1PP_MACROS_CAP 0050020000000000
     41 DEFINE M1PP_MACRO_BODY_CAP 0000030000000000
     42 DEFINE O_WRONLY_CREAT_TRUNC 4102000000000000
     43 DEFINE MODE_0644 A401000000000000
     44 DEFINE AT_FDCWD 9CFFFFFFFFFFFFFF
     45 DEFINE ZERO32 '0000000000000000000000000000000000000000000000000000000000000000'
     46 DEFINE ZERO8 '0000000000000000'
     47 DEFINE ZERO4 '00000000'
     48 
     49 DEFINE TOK_WORD 0000000000000000
     50 DEFINE TOK_STRING 0100000000000000
     51 DEFINE TOK_NEWLINE 0200000000000000
     52 DEFINE TOK_LPAREN 0300000000000000
     53 DEFINE TOK_RPAREN 0400000000000000
     54 DEFINE TOK_COMMA 0500000000000000
     55 DEFINE TOK_PASTE 0600000000000000
     56 DEFINE TOK_LBRACE 0700000000000000
     57 DEFINE TOK_RBRACE 0800000000000000
     58 
     59 ## Token record stride (kind + text_ptr + text_len). Advance a Token* by this.
     60 DEFINE M1PP_TOK_SIZE 1800000000000000
     61 
     62 ## --- Stream / expansion-pool / expression-frame sizes ------------------------
     63 ## Stream record: 40 bytes. Fields (each 8 bytes):
     64 ##   +0   start       Token*
     65 ##   +8   end         Token*   (exclusive)
     66 ##   +16  pos         Token*
     67 ##   +24  line_start  u64      (1 at entry, 0 after first non-newline emit)
     68 ##   +32  pool_mark   i64      (byte offset into expand_pool; -1 for source)
     69 DEFINE M1PP_STREAM_SIZE 2800000000000000
     70 DEFINE M1PP_STREAM_END_OFF 0800000000000000
     71 DEFINE M1PP_STREAM_POS_OFF 1000000000000000
     72 DEFINE M1PP_STREAM_LS_OFF 1800000000000000
     73 DEFINE M1PP_STREAM_MARK_OFF 2000000000000000
     74 
     75 ## Stream stack cap: 16 streams × 40 = 640 bytes.
     76 DEFINE M1PP_STREAM_STACK_CAP 8002000000000000
     77 
     78 ## Expansion pool fits 4096 Token slots × 24 bytes = 98304 bytes (0x18000).
     79 DEFINE M1PP_EXPAND_CAP 0080010000000000
     80 
     81 ## ExprFrame record: 144 bytes. Fields:
     82 ##   +0   op_code  u64
     83 ##   +8   argc     u64
     84 ##   +16  args     i64[16]  (16 × 8 = 128 bytes)
     85 DEFINE M1PP_EXPR_FRAME_SIZE 9000000000000000
     86 DEFINE M1PP_EXPR_ARGC_OFF 0800000000000000
     87 DEFINE M1PP_EXPR_ARGS_OFF 1000000000000000
     88 
     89 ## Expr frame stack cap: 16 frames × 144 = 2304 bytes.
     90 DEFINE M1PP_EXPR_FRAMES_CAP 0009000000000000
     91 
     92 ## Common cap used by macro params, call args, and expression args.
     93 DEFINE M1PP_MAX_PARAMS 1000000000000000
     94 
     95 ## Scope-stack cap. 32 nested scopes max; each slot is a 16-byte TextSpan
     96 ## (ptr + len) pointing into stable text (input_buf or text_buf), so
     97 ## scope_stack is 32 × 16 = 512 bytes.
     98 DEFINE M1PP_MAX_SCOPE_DEPTH 2000000000000000
     99 
    100 ## ExprOp codes (indexed by apply_expr_op).
    101 DEFINE EXPR_ADD 0000000000000000
    102 DEFINE EXPR_SUB 0100000000000000
    103 DEFINE EXPR_MUL 0200000000000000
    104 DEFINE EXPR_DIV 0300000000000000
    105 DEFINE EXPR_MOD 0400000000000000
    106 DEFINE EXPR_SHL 0500000000000000
    107 DEFINE EXPR_SHR 0600000000000000
    108 DEFINE EXPR_AND 0700000000000000
    109 DEFINE EXPR_OR 0800000000000000
    110 DEFINE EXPR_XOR 0900000000000000
    111 DEFINE EXPR_NOT 0A00000000000000
    112 DEFINE EXPR_EQ 0B00000000000000
    113 DEFINE EXPR_NE 0C00000000000000
    114 DEFINE EXPR_LT 0D00000000000000
    115 DEFINE EXPR_LE 0E00000000000000
    116 DEFINE EXPR_GT 0F00000000000000
    117 DEFINE EXPR_GE 1000000000000000
    118 DEFINE EXPR_STRLEN 1100000000000000
    119 DEFINE EXPR_INVALID 1200000000000000
    120 ## --- BSS layout (offsets from ELF_end) -------------------------------------
    121 DEFINE OFF_paste_scratch 0000000000000000
    122 DEFINE OFF_local_label_scratch 0001000000000000
    123 DEFINE OFF_scope_stack 8001000000000000
    124 DEFINE OFF_df_name_scratch 8003000000000000
    125 DEFINE OFF_ebc_str_scratch 8004000000000000
    126 DEFINE OFF_arg_starts 8005000000000000
    127 DEFINE OFF_arg_ends 0006000000000000
    128 DEFINE OFF_input_buf 8006000000000000
    129 DEFINE OFF_output_buf 8006080000000000
    130 DEFINE OFF_text_buf 8006100000000000
    131 DEFINE OFF_source_tokens 8006140000000000
    132 DEFINE OFF_macros 8006200000000000
    133 DEFINE OFF_macro_body_tokens 8046290000000000
    134 DEFINE OFF_streams 8046350000000000
    135 DEFINE OFF_expand_pool 0049350000000000
    136 DEFINE OFF_expr_frames 0049410000000000
    137 
    138 
    139 ## --- Runtime shell: argv, read input, call pipeline, write output, exit ------
    140 
    141 :p1_main
    142     enter_0
    143     # --- init BSS pointer slots from ELF_end via table walk ------------------
    144     # Each bss_init_tbl entry is 16 bytes:
    145     #   +0  slot ptr   (&label + 4 zero pad = 8-byte absolute address)
    146     #   +8  offset     (8-byte OFF_* constant)
    147     # For each entry: *slot_ptr = ELF_end + offset.
    148     la_t0 &ELF_end
    149     la_t1 &bss_init_tbl
    150     la_t2 &bss_init_tbl_end
    151 :bss_init_loop
    152     la_br &bss_init_done
    153     beq_t1,t2
    154     ld_a2,t1,0
    155     ld_a3,t1,8
    156     add_a3,a3,t0
    157     st_a3,a2,0
    158     addi_t1,t1,16
    159     la_br &bss_init_loop
    160     b
    161 :bss_init_done
    162     # --- end BSS init -------------------------------------------------------
    163 
    164     # a0 = argc, a1 = argv (pointer to argv[0]).
    165     # if (argc < 3) usage
    166     li_a2 %3 %0
    167     la_br &err_usage
    168     blt_a0,a2
    169 
    170     # Stash argv[1] and argv[2] into memory before anything clobbers a1.
    171     ld_t0,a1,8
    172     la_a2 &input_path
    173     st_t0,a2,0
    174     ld_t0,a1,16
    175     la_a2 &output_path
    176     st_t0,a2,0
    177 
    178     # source_end = &source_tokens   (running tail pointer)
    179     la_a0 &source_tokens_ptr
    180     ld_a0,a0,0
    181     la_a2 &source_end
    182     st_a0,a2,0
    183 
    184     # macros_end = &macros; macro_body_end = &macro_body_tokens
    185     la_a0 &macros_ptr
    186     ld_a0,a0,0
    187     la_a2 &macros_end
    188     st_a0,a2,0
    189     la_a0 &macro_body_tokens_ptr
    190     ld_a0,a0,0
    191     la_a2 &macro_body_end
    192     st_a0,a2,0
    193 
    194     # input_fd = openat(AT_FDCWD, input_path, O_RDONLY, 0)
    195     li_a0 sys_openat
    196     li_a1 AT_FDCWD
    197     la_a2 &input_path
    198     ld_a2,a2,0
    199     li_a3 %0 %0
    200     li_t0 %0 %0
    201     syscall
    202     la_br &err_open_input
    203     bltz_a0
    204     la_a1 &input_fd
    205     st_a0,a1,0
    206 
    207 :read_loop
    208     # while (input_len < INPUT_CAP)
    209     la_a0 &input_len
    210     ld_t1,a0,0
    211     li_t2 M1PP_INPUT_CAP
    212     la_br &read_done
    213     beq_t1,t2
    214 
    215     # n = read(input_fd, &input_buf[input_len], INPUT_CAP - input_len)
    216     la_a0 &input_fd
    217     ld_a1,a0,0
    218     la_a2 &input_buf_ptr
    219     ld_a2,a2,0
    220     add_a2,a2,t1
    221     sub_a3,t2,t1
    222     li_a0 sys_read
    223     syscall
    224 
    225     # if (n == 0) break;  if (n < 0) fatal
    226     la_br &read_done
    227     beqz_a0
    228     la_br &err_read
    229     bltz_a0
    230 
    231     # input_len += n
    232     la_a1 &input_len
    233     ld_a2,a1,0
    234     add_a2,a2,a0
    235     st_a2,a1,0
    236     la_br &read_loop
    237     b
    238 
    239 :read_done
    240     # if (input_len == INPUT_CAP) fatal  (no room for null terminator)
    241     la_a0 &input_len
    242     ld_t0,a0,0
    243     li_t1 M1PP_INPUT_CAP
    244     la_br &err_input_too_big
    245     beq_t0,t1
    246 
    247     # input_buf[input_len] = '\0'
    248     la_a0 &input_buf_ptr
    249     ld_a0,a0,0
    250     add_a0,a0,t0
    251     li_t1 %0 %0
    252     sb_t1,a0,0
    253 
    254     # lex_source(); process_tokens()
    255     la_br &lex_source
    256     call
    257     la_br &process_tokens
    258     call
    259 
    260     la_br &write_output
    261     b
    262 
    263 :write_output
    264     # output_fd = openat(AT_FDCWD, output_path, O_WRONLY|O_CREAT|O_TRUNC, 0644)
    265     la_a0 &output_path
    266     ld_a2,a0,0
    267     li_a0 sys_openat
    268     li_a1 AT_FDCWD
    269     li_a3 O_WRONLY_CREAT_TRUNC
    270     li_t0 MODE_0644
    271     syscall
    272     la_br &err_open_output
    273     bltz_a0
    274     la_a1 &output_fd
    275     st_a0,a1,0
    276 
    277 :write_loop
    278     # while (output_written < output_used)
    279     la_a0 &output_written
    280     ld_t0,a0,0
    281     la_a1 &output_used
    282     ld_t1,a1,0
    283     la_br &write_done
    284     beq_t0,t1
    285 
    286     # n = write(output_fd, &output_buf[output_written], output_used - output_written)
    287     la_a0 &output_fd
    288     ld_a1,a0,0
    289     la_a2 &output_buf_ptr
    290     ld_a2,a2,0
    291     add_a2,a2,t0
    292     sub_a3,t1,t0
    293     li_a0 sys_write
    294     syscall
    295 
    296     # n <= 0 is fatal (short write or error)
    297     la_br &err_write
    298     bltz_a0
    299     la_br &err_write
    300     beqz_a0
    301 
    302     # output_written += n
    303     la_a1 &output_written
    304     ld_a2,a1,0
    305     add_a2,a2,a0
    306     st_a2,a1,0
    307     la_br &write_loop
    308     b
    309 
    310 :write_done
    311     # return 0 (backend :_start stub sys_exits with a0)
    312     li_a0 %0 %0
    313     eret
    314 
    315 ## --- Helpers: text arena + token array + equality ----------------------------
    316 ## append_text appends bytes to text_buf (used for synthesized token text,
    317 ## e.g. single-char parens/commas and the paste `##`). Source-word and string
    318 ## tokens point directly into input_buf and skip this arena.
    319 
    320 ## append_text(a0=src, a1=len) -> a0=text ptr. Leaf.
    321 :append_text
    322     # a3 = text_used
    323     la_a2 &text_used
    324     ld_a3,a2,0
    325 
    326     # if (text_used + len + 1) > TEXT_CAP: fatal
    327     add_t0,a3,a1
    328     addi_t0,t0,1
    329     li_t1 M1PP_TEXT_CAP
    330     la_br &err_text_overflow
    331     blt_t1,t0
    332 
    333     # dst = &text_buf[text_used]
    334     la_t0 &text_buf_ptr
    335     ld_t0,t0,0
    336     add_t0,t0,a3
    337 
    338     # for (i = 0; i < len; i++) dst[i] = src[i]
    339     li_t1 %0 %0
    340 :append_text_loop
    341     la_br &append_text_done
    342     beq_t1,a1
    343     add_t2,a0,t1
    344     lb_t2,t2,0
    345     add_a2,t0,t1
    346     sb_t2,a2,0
    347     addi_t1,t1,1
    348     la_br &append_text_loop
    349     b
    350 :append_text_done
    351     # dst[len] = '\0'
    352     add_a2,t0,t1
    353     li_t2 %0 %0
    354     sb_t2,a2,0
    355 
    356     # text_used += len + 1
    357     la_a2 &text_used
    358     ld_a3,a2,0
    359     add_a3,a3,a1
    360     addi_a3,a3,1
    361     st_a3,a2,0
    362 
    363     # return dst
    364     mov_a0,t0
    365     ret
    366 
    367 ## push_source_token(a0=kind, a1=text_ptr, a2=text_len). Leaf.
    368 ## Token layout: +0 kind, +8 text_ptr, +16 text_len (24 bytes total).
    369 :push_source_token
    370     # tok = source_end
    371     la_a3 &source_end
    372     ld_t0,a3,0
    373 
    374     # if (tok == &source_tokens[0] + TOKENS_END) fatal
    375     la_t1 &source_tokens_ptr
    376     ld_t1,t1,0
    377     li_t2 M1PP_TOKENS_END
    378     add_t1,t1,t2
    379     la_br &err_token_overflow
    380     beq_t0,t1
    381 
    382     # tok->kind = kind; tok->text_ptr = text_ptr; tok->text_len = text_len
    383     st_a0,t0,0
    384     st_a1,t0,8
    385     st_a2,t0,16
    386 
    387     # source_end = tok + 1 (advance 24 bytes)
    388     addi_t0,t0,24
    389     st_t0,a3,0
    390     ret
    391 
    392 ## tok_eq_const(a0=token_ptr, a1=const_ptr, a2=const_len) -> a0=0/1. Leaf.
    393 ## Compares a token's text against a constant byte string.
    394 :tok_eq_const
    395     # if (tok->text_len != const_len) return 0
    396     ld_a3,a0,16
    397     la_br &tok_eq_false
    398     bne_a3,a2
    399 
    400     # src = tok->text_ptr; i = 0
    401     ld_t0,a0,8
    402     li_t1 %0 %0
    403 :tok_eq_loop
    404     # if (i == const_len) return 1
    405     la_br &tok_eq_true
    406     beq_t1,a2
    407 
    408     # if (src[i] != const_ptr[i]) return 0
    409     add_t2,t0,t1
    410     lb_t2,t2,0
    411     add_a3,a1,t1
    412     lb_a3,a3,0
    413     la_br &tok_eq_false
    414     bne_t2,a3
    415 
    416     # i++
    417     addi_t1,t1,1
    418     la_br &tok_eq_loop
    419     b
    420 :tok_eq_true
    421     li_a0 %1 %0
    422     ret
    423 :tok_eq_false
    424     li_a0 %0 %0
    425     ret
    426 
    427 ## --- Lexer -------------------------------------------------------------------
    428 ## Dispatches on the first byte at lex_ptr:
    429 ##   whitespace (sp/tab/cr/ff/vt) -> lex_skip_one
    430 ##   newline (\n)                 -> lex_newline   -> TOK_NEWLINE
    431 ##   quote (" or ')               -> lex_string    -> TOK_STRING
    432 ##   `#`                          -> lex_hash      -> TOK_PASTE on ##, else comment
    433 ##   `;`                          -> lex_comment   (drop to end of line)
    434 ##   `(` `)` `,`                  -> lex_lparen / rparen / comma
    435 ##   otherwise                    -> lex_word      -> TOK_WORD
    436 ##
    437 ## All branches loop back to lex_loop. lex_done exits once lex_ptr hits
    438 ## the terminating NUL that _start writes past the end of input_buf.
    439 
    440 ## lex_source(): fills source_tokens[] from input_buf.
    441 :lex_source
    442     enter_0
    443     la_a0 &input_buf_ptr
    444     ld_a0,a0,0
    445     la_a1 &lex_ptr
    446     st_a0,a1,0
    447 :lex_loop
    448     # c = *lex_ptr; dispatch on lex_char_class[c].
    449     #   0 word, 1 skip ws, 2 newline, 3 string, 4 hash, 5 comment,
    450     #   6 '(', 7 ')', 8 ',', 9 '{', 10 '}', 11 NUL (fall through to done).
    451     la_a0 &lex_ptr
    452     ld_t0,a0,0
    453     lb_a0,t0,0
    454     la_a1 &lex_char_class
    455     add_a1,a1,a0
    456     lb_a2,a1,0
    457 
    458     la_br &lex_word
    459     beqz_a2
    460     li_a1 %1 %0
    461     la_br &lex_skip_one
    462     beq_a2,a1
    463     li_a1 %2 %0
    464     la_br &lex_newline
    465     beq_a2,a1
    466     li_a1 %3 %0
    467     la_br &lex_string
    468     beq_a2,a1
    469     li_a1 %4 %0
    470     la_br &lex_hash
    471     beq_a2,a1
    472     li_a1 %5 %0
    473     la_br &lex_comment
    474     beq_a2,a1
    475     li_a1 %6 %0
    476     la_br &lex_lparen
    477     beq_a2,a1
    478     li_a1 %7 %0
    479     la_br &lex_rparen
    480     beq_a2,a1
    481     li_a1 %8 %0
    482     la_br &lex_comma
    483     beq_a2,a1
    484     li_a1 %9 %0
    485     la_br &lex_lbrace
    486     beq_a2,a1
    487     li_a1 %10 %0
    488     la_br &lex_rbrace
    489     beq_a2,a1
    490     ## class 11 (NUL) — fall through
    491     la_br &lex_done
    492     b
    493 
    494 :lex_skip_one
    495     # lex_ptr++
    496     addi_t0,t0,1
    497     la_a0 &lex_ptr
    498     st_t0,a0,0
    499     la_br &lex_loop
    500     b
    501 
    502 :lex_newline
    503     # push_source_token(TOK_NEWLINE, lex_ptr, 1)
    504     mov_a1,t0
    505     li_a0 TOK_NEWLINE
    506     li_a2 %1 %0
    507     la_br &push_source_token
    508     call
    509 
    510     # lex_ptr++
    511     la_a0 &lex_ptr
    512     ld_t0,a0,0
    513     addi_t0,t0,1
    514     st_t0,a0,0
    515     la_br &lex_loop
    516     b
    517 
    518 :lex_string
    519     # lex_start = lex_ptr; lex_quote = c; lex_ptr++
    520     la_a1 &lex_start
    521     st_t0,a1,0
    522     la_a1 &lex_quote
    523     st_a0,a1,0
    524     addi_t0,t0,1
    525 :lex_string_scan
    526     # c = *lex_ptr
    527     lb_a0,t0,0
    528     # if (c == '\0') finish (unterminated; keep what we have)
    529     la_br &lex_string_finish
    530     beqz_a0
    531     # if (c == quote) consume closing quote and finish
    532     la_a1 &lex_quote
    533     ld_a1,a1,0
    534     la_br &lex_string_after_quote
    535     beq_a0,a1
    536     # else lex_ptr++
    537     addi_t0,t0,1
    538     la_br &lex_string_scan
    539     b
    540 :lex_string_after_quote
    541     addi_t0,t0,1
    542 :lex_string_finish
    543     # lex_ptr = t0
    544     la_a1 &lex_ptr
    545     st_t0,a1,0
    546 
    547     # text_ptr = append_text(lex_start, lex_ptr - lex_start)
    548     la_a1 &lex_start
    549     ld_a0,a1,0
    550     sub_a1,t0,a0
    551     la_br &append_text
    552     call
    553 
    554     # push_source_token(TOK_STRING, text_ptr, lex_ptr - lex_start)
    555     la_a1 &lex_ptr
    556     ld_t0,a1,0
    557     la_a1 &lex_start
    558     ld_t1,a1,0
    559     sub_a2,t0,t1
    560     mov_a1,a0
    561     li_a0 TOK_STRING
    562     la_br &push_source_token
    563     call
    564     la_br &lex_loop
    565     b
    566 
    567 :lex_hash
    568     # if (lex_ptr[1] == '#') goto lex_paste, else lex_comment
    569     addi_a1,t0,1
    570     lb_a1,a1,0
    571     li_a2 %35 %0
    572     la_br &lex_paste
    573     beq_a1,a2
    574     la_br &lex_comment
    575     b
    576 
    577 :lex_paste
    578     # text_ptr = append_text("##", 2)
    579     la_a0 &const_paste
    580     li_a1 %2 %0
    581     la_br &append_text
    582     call
    583 
    584     # push_source_token(TOK_PASTE, text_ptr, 2)
    585     mov_a1,a0
    586     li_a0 TOK_PASTE
    587     li_a2 %2 %0
    588     la_br &push_source_token
    589     call
    590 
    591     # lex_ptr += 2
    592     la_a0 &lex_ptr
    593     ld_t0,a0,0
    594     addi_t0,t0,2
    595     st_t0,a0,0
    596     la_br &lex_loop
    597     b
    598 
    599 :lex_comment
    600     # skip to end of line: while (c != '\0' && c != '\n') lex_ptr++
    601     la_a0 &lex_ptr
    602     ld_t0,a0,0
    603 :lex_comment_loop
    604     lb_a0,t0,0
    605     la_br &lex_comment_done
    606     beqz_a0
    607     li_a1 %10 %0
    608     la_br &lex_comment_done
    609     beq_a0,a1
    610     addi_t0,t0,1
    611     la_br &lex_comment_loop
    612     b
    613 :lex_comment_done
    614     la_a0 &lex_ptr
    615     st_t0,a0,0
    616     la_br &lex_loop
    617     b
    618 
    619 ## lex_lparen / lex_rparen / lex_comma all share the same shape:
    620 ## append the single-char constant, push a 1-byte token of the right kind,
    621 ## then fall through to lex_advance_one_then_loop to bump lex_ptr.
    622 
    623 :lex_lparen
    624     li_a0 TOK_LPAREN
    625     la_a1 &const_lparen
    626     la_br &lex_punct1
    627     b
    628 :lex_rparen
    629     li_a0 TOK_RPAREN
    630     la_a1 &const_rparen
    631     la_br &lex_punct1
    632     b
    633 :lex_comma
    634     li_a0 TOK_COMMA
    635     la_a1 &const_comma
    636     la_br &lex_punct1
    637     b
    638 :lex_lbrace
    639     li_a0 TOK_LBRACE
    640     la_a1 &const_lbrace
    641     la_br &lex_punct1
    642     b
    643 :lex_rbrace
    644     li_a0 TOK_RBRACE
    645     la_a1 &const_rbrace
    646     ## fall through into lex_punct1
    647 
    648 ## lex_punct1(a0=kind, a1=const_ptr): append 1 byte to text arena, push a
    649 ## 1-byte token of the given kind, advance lex_ptr by 1, branch back to
    650 ## lex_loop. Called by tail-branch from the single-char lex_X blocks, which
    651 ## all share lex_source's frame. Spills `kind` since append_text clobbers
    652 ## a0..a3.
    653 :lex_punct1
    654     la_t0 &lex_punct_kind
    655     st_a0,t0,0
    656     mov_a0,a1
    657     li_a1 %1 %0
    658     la_br &append_text
    659     call
    660     mov_a1,a0
    661     la_t0 &lex_punct_kind
    662     ld_a0,t0,0
    663     li_a2 %1 %0
    664     la_br &push_source_token
    665     call
    666     ## fall through to lex_advance_one_then_loop
    667 
    668 :lex_advance_one_then_loop
    669     # lex_ptr++
    670     la_a0 &lex_ptr
    671     ld_t0,a0,0
    672     addi_t0,t0,1
    673     st_t0,a0,0
    674     la_br &lex_loop
    675     b
    676 
    677 :lex_word
    678     # lex_start = lex_ptr
    679     la_a1 &lex_start
    680     st_t0,a1,0
    681 :lex_word_scan
    682     # c = *lex_ptr; terminate the word if lex_char_class[c] != WORD (0).
    683     lb_a2,t0,0
    684     la_a1 &lex_char_class
    685     add_a1,a1,a2
    686     lb_a2,a1,0
    687     la_br &lex_word_finish
    688     bnez_a2
    689     addi_t0,t0,1
    690     la_br &lex_word_scan
    691     b
    692 :lex_word_finish
    693     # lex_ptr = t0
    694     la_a1 &lex_ptr
    695     st_t0,a1,0
    696 
    697     # text_ptr = append_text(lex_start, lex_ptr - lex_start)
    698     la_a1 &lex_start
    699     ld_a0,a1,0
    700     sub_a1,t0,a0
    701     la_br &append_text
    702     call
    703 
    704     # push_source_token(TOK_WORD, text_ptr, lex_ptr - lex_start)
    705     la_a1 &lex_ptr
    706     ld_t0,a1,0
    707     la_a1 &lex_start
    708     ld_t1,a1,0
    709     sub_a2,t0,t1
    710     mov_a1,a0
    711     li_a0 TOK_WORD
    712     la_br &push_source_token
    713     call
    714     la_br &lex_loop
    715     b
    716 
    717 :lex_done
    718     eret
    719 
    720 ## --- Output: normalized token stream to output_buf ---------------------------
    721 ## emit_newline writes '\n' and clears output_need_space.
    722 ## emit_token prefixes a space when output_need_space is set, copies the
    723 ## token text, then sets output_need_space. This is how source whitespace
    724 ## gets normalized: one '\n' per TOK_NEWLINE, one ' ' between consecutive
    725 ## non-newline tokens.
    726 
    727 ## emit_newline(). Leaf.
    728 :emit_newline
    729     # if (output_used == OUTPUT_CAP) fatal
    730     la_a0 &output_used
    731     ld_t0,a0,0
    732     li_t1 M1PP_OUTPUT_CAP
    733     la_br &err_output_overflow
    734     beq_t0,t1
    735 
    736     # output_buf[output_used] = '\n'; output_used++
    737     la_a1 &output_buf_ptr
    738     ld_a1,a1,0
    739     add_a1,a1,t0
    740     li_t2 %10 %0
    741     sb_t2,a1,0
    742     addi_t0,t0,1
    743     st_t0,a0,0
    744 
    745     # output_need_space = 0
    746     la_a0 &output_need_space
    747     li_a1 %0 %0
    748     st_a1,a0,0
    749     ret
    750 
    751 ## emit_token(a0=token_ptr). Leaf.
    752 :emit_token
    753     # brace tokens are no-ops at emit time (belt-and-braces with arg-strip)
    754     ld_t0,a0,0
    755     li_t1 TOK_LBRACE
    756     la_br &emit_token_skip
    757     beq_t0,t1
    758     li_t1 TOK_RBRACE
    759     la_br &emit_token_skip
    760     beq_t0,t1
    761 
    762     # Scope rewrite: TOK_WORD whose text begins with "::" (len>=3) becomes
    763     # a scoped definition, "&::" (len>=4) a scoped reference. Dispatch to
    764     # emit_scope_rewrite with a1=skip, a2=sigil.
    765     ld_a1,a0,0
    766     li_a2 TOK_WORD
    767     la_br &emit_token_after_scope
    768     bne_a1,a2
    769     ld_a2,a0,16
    770     li_a3 %3 %0
    771     la_br &emit_token_after_scope
    772     blt_a2,a3
    773     ld_a3,a0,8
    774     lb_t0,a3,0
    775     li_t1 %58 %0
    776     la_br &emit_token_check_amp
    777     bne_t0,t1
    778     lb_t0,a3,1
    779     li_t1 %58 %0
    780     la_br &emit_token_after_scope
    781     bne_t0,t1
    782     li_a1 %2 %0
    783     li_a2 %58 %0
    784     la_br &emit_scope_rewrite
    785     b
    786 :emit_token_check_amp
    787     li_t1 %38 %0
    788     la_br &emit_token_after_scope
    789     bne_t0,t1
    790     ld_a2,a0,16
    791     li_t2 %4 %0
    792     la_br &emit_token_after_scope
    793     blt_a2,t2
    794     lb_t0,a3,1
    795     li_t1 %58 %0
    796     la_br &emit_token_after_scope
    797     bne_t0,t1
    798     lb_t0,a3,2
    799     la_br &emit_token_after_scope
    800     bne_t0,t1
    801     li_a1 %3 %0
    802     li_a2 %38 %0
    803     la_br &emit_scope_rewrite
    804     b
    805 
    806 :emit_token_after_scope
    807     # if (output_need_space) emit ' '  (skip the space for the first token on a line)
    808     la_a1 &output_need_space
    809     ld_t0,a1,0
    810     la_br &emit_token_copy
    811     beqz_t0
    812 
    813     la_a1 &output_used
    814     ld_t0,a1,0
    815     li_t1 M1PP_OUTPUT_CAP
    816     la_br &err_output_overflow
    817     beq_t0,t1
    818     la_a2 &output_buf_ptr
    819     ld_a2,a2,0
    820     add_a2,a2,t0
    821     li_t1 %32 %0
    822     sb_t1,a2,0
    823     addi_t0,t0,1
    824     st_t0,a1,0
    825 
    826 :emit_token_copy
    827     # src = tok->text_ptr; len = tok->text_len; i = 0
    828     ld_t0,a0,8
    829     ld_t1,a0,16
    830     li_t2 %0 %0
    831 :emit_token_loop
    832     # if (i == len) done
    833     la_br &emit_token_done
    834     beq_t2,t1
    835 
    836     # if (output_used == OUTPUT_CAP) fatal
    837     la_a1 &output_used
    838     ld_a2,a1,0
    839     li_a3 M1PP_OUTPUT_CAP
    840     la_br &err_output_overflow
    841     beq_a2,a3
    842 
    843     # output_buf[output_used++] = src[i]
    844     add_a3,t0,t2
    845     lb_a3,a3,0
    846     la_a0 &output_buf_ptr
    847     ld_a0,a0,0
    848     add_a0,a0,a2
    849     sb_a3,a0,0
    850     addi_a2,a2,1
    851     st_a2,a1,0
    852 
    853     # i++
    854     addi_t2,t2,1
    855     la_br &emit_token_loop
    856     b
    857 :emit_token_done
    858     # output_need_space = 1
    859     la_a0 &output_need_space
    860     li_a1 %1 %0
    861     st_a1,a0,0
    862     ret
    863 :emit_token_skip
    864     ret
    865 
    866 ## emit_scope_rewrite: branch target from emit_token for tokens whose text
    867 ## starts with "::" (scoped definition) or "&::" (scoped reference).
    868 ## Writes sigil + scope1 + "__" + ... + scopeN + "__" + name directly to
    869 ## output_buf; with an empty scope stack the middle collapses so output is
    870 ## just sigil + name (pass-through). Not a callable function: reached by `b`,
    871 ## shares emit_token's leaf return address, exits via `ret`.
    872 ##
    873 ## Register inputs:
    874 ##   a0 = tok_ptr
    875 ##   a1 = skip   (2 for "::", 3 for "&::")
    876 ##   a2 = sigil  (':' = 58 for definitions, '&' = 38 for references)
    877 :emit_scope_rewrite
    878     # name_len = tok->text_len - skip; fail if zero.
    879     ld_a3,a0,16
    880     sub_a3,a3,a1
    881     la_br &err_bad_scope_label
    882     beqz_a3
    883 
    884     # Spill inputs — the byte-copy loops below reuse a0..a3/t0..t2 freely.
    885     la_t0 &sr_tok_ptr
    886     st_a0,t0,0
    887     la_t0 &sr_skip
    888     st_a1,t0,0
    889     la_t0 &sr_sigil
    890     st_a2,t0,0
    891     la_t0 &sr_name_len
    892     st_a3,t0,0
    893 
    894     # Emit leading ' ' if output_need_space.
    895     la_a0 &output_need_space
    896     ld_t0,a0,0
    897     la_br &sr_post_space
    898     beqz_t0
    899     la_a1 &output_used
    900     ld_t0,a1,0
    901     li_t1 M1PP_OUTPUT_CAP
    902     la_br &err_output_overflow
    903     beq_t0,t1
    904     la_a2 &output_buf_ptr
    905     ld_a2,a2,0
    906     add_a2,a2,t0
    907     li_t1 %32 %0
    908     sb_t1,a2,0
    909     addi_t0,t0,1
    910     st_t0,a1,0
    911 :sr_post_space
    912 
    913     # Emit the sigil byte.
    914     la_a0 &output_used
    915     ld_t0,a0,0
    916     li_t1 M1PP_OUTPUT_CAP
    917     la_br &err_output_overflow
    918     beq_t0,t1
    919     la_a1 &output_buf_ptr
    920     ld_a1,a1,0
    921     add_a1,a1,t0
    922     la_a2 &sr_sigil
    923     ld_a3,a2,0
    924     sb_a3,a1,0
    925     addi_t0,t0,1
    926     st_t0,a0,0
    927 
    928     # Emit each scope frame's bytes followed by "__".
    929     li_t0 %0 %0
    930 :sr_scope_outer
    931     la_a0 &scope_depth
    932     ld_a1,a0,0
    933     la_br &sr_tail_start
    934     beq_t0,a1
    935 
    936     la_a0 &scope_stack_ptr
    937     ld_a0,a0,0
    938     li_a2 %16 %0
    939     mul_a2,a2,t0
    940     add_a0,a0,a2
    941     ld_a1,a0,0
    942     ld_a2,a0,8
    943     li_a3 %0 %0
    944 :sr_scope_inner
    945     la_br &sr_scope_sep
    946     beq_a3,a2
    947     la_t1 &output_used
    948     ld_t2,t1,0
    949     li_a0 M1PP_OUTPUT_CAP
    950     la_br &err_output_overflow
    951     beq_t2,a0
    952     la_a0 &output_buf_ptr
    953     ld_a0,a0,0
    954     add_a0,a0,t2
    955     add_t2,a1,a3
    956     lb_t2,t2,0
    957     sb_t2,a0,0
    958     la_t1 &output_used
    959     ld_t2,t1,0
    960     addi_t2,t2,1
    961     st_t2,t1,0
    962     addi_a3,a3,1
    963     la_br &sr_scope_inner
    964     b
    965 :sr_scope_sep
    966     la_a0 &output_used
    967     ld_t1,a0,0
    968     li_t2 M1PP_OUTPUT_CAP
    969     la_br &err_output_overflow
    970     beq_t1,t2
    971     la_a1 &output_buf_ptr
    972     ld_a1,a1,0
    973     add_a1,a1,t1
    974     li_a2 %95 %0
    975     sb_a2,a1,0
    976     addi_t1,t1,1
    977     st_t1,a0,0
    978     la_a0 &output_used
    979     ld_t1,a0,0
    980     li_t2 M1PP_OUTPUT_CAP
    981     la_br &err_output_overflow
    982     beq_t1,t2
    983     la_a1 &output_buf_ptr
    984     ld_a1,a1,0
    985     add_a1,a1,t1
    986     li_a2 %95 %0
    987     sb_a2,a1,0
    988     addi_t1,t1,1
    989     st_t1,a0,0
    990     addi_t0,t0,1
    991     la_br &sr_scope_outer
    992     b
    993 
    994 :sr_tail_start
    995     la_a0 &sr_tok_ptr
    996     ld_a1,a0,0
    997     ld_a2,a1,8
    998     la_a0 &sr_skip
    999     ld_a3,a0,0
   1000     add_a1,a2,a3
   1001     la_a0 &sr_name_len
   1002     ld_a2,a0,0
   1003     li_a3 %0 %0
   1004 :sr_tail_loop
   1005     la_br &sr_tail_done
   1006     beq_a3,a2
   1007     la_t1 &output_used
   1008     ld_t2,t1,0
   1009     li_a0 M1PP_OUTPUT_CAP
   1010     la_br &err_output_overflow
   1011     beq_t2,a0
   1012     la_a0 &output_buf_ptr
   1013     ld_a0,a0,0
   1014     add_a0,a0,t2
   1015     add_t2,a1,a3
   1016     lb_t2,t2,0
   1017     sb_t2,a0,0
   1018     la_t1 &output_used
   1019     ld_t2,t1,0
   1020     addi_t2,t2,1
   1021     st_t2,t1,0
   1022     addi_a3,a3,1
   1023     la_br &sr_tail_loop
   1024     b
   1025 :sr_tail_done
   1026     la_a0 &output_need_space
   1027     li_a1 %1 %0
   1028     st_a1,a0,0
   1029     ret
   1030 
   1031 ## --- Main processor ----------------------------------------------------------
   1032 ## Stream-driven loop. Pushes source_tokens as the initial stream, then drives
   1033 ## the streams[] stack until it empties. Per iteration: pop the stream if
   1034 ## exhausted, otherwise dispatch on the current token:
   1035 ##   - line-start %macro      -> shim into define_macro via proc_pos
   1036 ##   - TOK_NEWLINE            -> emit_newline, advance, set line_start = 1
   1037 ##   - WORD + LPAREN follow + name in {! @ % $ %select}
   1038 ##                            -> expand_builtin_call(s, tok)
   1039 ##   - find_macro(tok) hit + LPAREN follow
   1040 ##                            -> expand_call(s, macro)
   1041 ##   - otherwise              -> emit_token, advance, clear line_start
   1042 ##
   1043 ## Stack frame: enter_16 reserves two 8-byte slots so we can preserve the
   1044 ## current Stream* (sp+16) and the current Token* (sp+24) across calls
   1045 ## (a0..a3, t0..t2 are caller-saved).
   1046 
   1047 ## process_tokens(): stream-driven main loop.
   1048 :process_tokens
   1049     enter_16
   1050 
   1051     # push_stream_span(source_tokens, source_end, -1)
   1052     la_a0 &source_tokens_ptr
   1053     ld_a0,a0,0
   1054     la_a1 &source_end
   1055     ld_a1,a1,0
   1056     sub_a2,a2,a2
   1057     addi_a2,a2,neg1
   1058     la_br &push_stream_span
   1059     call
   1060 
   1061 :proc_loop
   1062     # s = current_stream();  if (s == 0) done
   1063     la_br &current_stream
   1064     call
   1065     la_br &proc_done
   1066     beqz_a0
   1067     st_a0,sp,0
   1068 
   1069     # if (s->pos == s->end) pop and continue
   1070     ld_t0,a0,16
   1071     ld_t1,a0,8
   1072     la_br &proc_pop_continue
   1073     beq_t0,t1
   1074 
   1075     # tok = s->pos
   1076     st_t0,sp,8
   1077 
   1078     # ---- line_start && tok->kind == TOK_WORD && tok eq "%macro" ----
   1079     ld_a1,a0,24
   1080     la_br &proc_check_newline
   1081     beqz_a1
   1082     ld_a1,t0,0
   1083     li_a2 TOK_WORD
   1084     la_br &proc_check_newline
   1085     bne_a1,a2
   1086     mov_a0,t0
   1087     la_a1 &const_macro
   1088     li_a2 %6 %0
   1089     la_br &tok_eq_const
   1090     call
   1091     la_br &proc_check_struct
   1092     beqz_a0
   1093 
   1094     # %macro: shim into define_macro through the proc_pos globals.
   1095     # define_macro reads/writes proc_pos and walks against source_end,
   1096     # so it only behaves correctly when s is the source stream — which
   1097     # holds in practice (line_start in expansion streams is cleared
   1098     # before any %macro could matter). After it returns we copy
   1099     # proc_pos back into s->pos and set s->line_start = 1.
   1100     ld_t0,sp,8
   1101     la_a0 &proc_pos
   1102     st_t0,a0,0
   1103     la_a0 &proc_line_start
   1104     li_a1 %1 %0
   1105     st_a1,a0,0
   1106     la_br &define_macro
   1107     call
   1108     ld_a0,sp,0
   1109     la_a1 &proc_pos
   1110     ld_t0,a1,0
   1111     st_t0,a0,16
   1112     li_t1 %1 %0
   1113     st_t1,a0,24
   1114     la_br &proc_loop
   1115     b
   1116 
   1117 ## ---- line_start && tok eq "%struct" ----
   1118 ## The %macro guard above already proved line_start && kind == TOK_WORD; if
   1119 ## we reach here via a %macro non-match, those gates still hold.
   1120 :proc_check_struct
   1121     ld_t0,sp,8
   1122     mov_a0,t0
   1123     la_a1 &const_struct
   1124     li_a2 %7 %0
   1125     la_br &tok_eq_const
   1126     call
   1127     la_br &proc_check_enum
   1128     beqz_a0
   1129 
   1130     # %struct matched: shim into define_fielded(stride=8, total="SIZE", len=4)
   1131     ld_t0,sp,8
   1132     la_a0 &proc_pos
   1133     st_t0,a0,0
   1134     la_a0 &proc_line_start
   1135     li_a1 %1 %0
   1136     st_a1,a0,0
   1137     li_a0 %8 %0
   1138     la_a1 &const_size
   1139     li_a2 %4 %0
   1140     la_br &define_fielded
   1141     call
   1142     ld_a0,sp,0
   1143     la_a1 &proc_pos
   1144     ld_t0,a1,0
   1145     st_t0,a0,16
   1146     li_t1 %1 %0
   1147     st_t1,a0,24
   1148     la_br &proc_loop
   1149     b
   1150 
   1151 ## ---- line_start && tok eq "%enum" ----
   1152 :proc_check_enum
   1153     ld_t0,sp,8
   1154     mov_a0,t0
   1155     la_a1 &const_enum
   1156     li_a2 %5 %0
   1157     la_br &tok_eq_const
   1158     call
   1159     la_br &proc_check_scope
   1160     beqz_a0
   1161 
   1162     # %enum matched: shim into define_fielded(stride=1, total="COUNT", len=5)
   1163     ld_t0,sp,8
   1164     la_a0 &proc_pos
   1165     st_t0,a0,0
   1166     la_a0 &proc_line_start
   1167     li_a1 %1 %0
   1168     st_a1,a0,0
   1169     li_a0 %1 %0
   1170     la_a1 &const_count
   1171     li_a2 %5 %0
   1172     la_br &define_fielded
   1173     call
   1174     ld_a0,sp,0
   1175     la_a1 &proc_pos
   1176     ld_t0,a1,0
   1177     st_t0,a0,16
   1178     li_t1 %1 %0
   1179     st_t1,a0,24
   1180     la_br &proc_loop
   1181     b
   1182 
   1183 ## ---- line_start && tok eq "%scope" ----
   1184 :proc_check_scope
   1185     ld_t0,sp,8
   1186     mov_a0,t0
   1187     la_a1 &const_scope
   1188     li_a2 %6 %0
   1189     la_br &tok_eq_const
   1190     call
   1191     la_br &proc_check_endscope
   1192     beqz_a0
   1193 
   1194     # %scope matched: shim into push_scope(stream_end).
   1195     ld_t0,sp,8
   1196     la_a0 &proc_pos
   1197     st_t0,a0,0
   1198     la_a0 &proc_line_start
   1199     li_a1 %1 %0
   1200     st_a1,a0,0
   1201     ld_a0,sp,0
   1202     ld_a0,a0,8
   1203     la_br &push_scope
   1204     call
   1205     ld_a0,sp,0
   1206     la_a1 &proc_pos
   1207     ld_t0,a1,0
   1208     st_t0,a0,16
   1209     li_t1 %1 %0
   1210     st_t1,a0,24
   1211     la_br &proc_loop
   1212     b
   1213 
   1214 ## ---- line_start && tok eq "%endscope" ----
   1215 :proc_check_endscope
   1216     ld_t0,sp,8
   1217     mov_a0,t0
   1218     la_a1 &const_endscope
   1219     li_a2 %9 %0
   1220     la_br &tok_eq_const
   1221     call
   1222     la_br &proc_check_newline
   1223     beqz_a0
   1224 
   1225     # %endscope matched: shim into pop_scope(stream_end).
   1226     ld_t0,sp,8
   1227     la_a0 &proc_pos
   1228     st_t0,a0,0
   1229     la_a0 &proc_line_start
   1230     li_a1 %1 %0
   1231     st_a1,a0,0
   1232     ld_a0,sp,0
   1233     ld_a0,a0,8
   1234     la_br &pop_scope
   1235     call
   1236     ld_a0,sp,0
   1237     la_a1 &proc_pos
   1238     ld_t0,a1,0
   1239     st_t0,a0,16
   1240     li_t1 %1 %0
   1241     st_t1,a0,24
   1242     la_br &proc_loop
   1243     b
   1244 
   1245 :proc_check_newline
   1246     # reload s, tok
   1247     ld_a0,sp,0
   1248     ld_t0,sp,8
   1249     ld_a1,t0,0
   1250     li_a2 TOK_NEWLINE
   1251     la_br &proc_check_builtin
   1252     bne_a1,a2
   1253 
   1254     # newline: s->pos += 24; s->line_start = 1; emit_newline()
   1255     addi_t0,t0,24
   1256     st_t0,a0,16
   1257     li_t1 %1 %0
   1258     st_t1,a0,24
   1259     la_br &emit_newline
   1260     call
   1261     la_br &proc_loop
   1262     b
   1263 
   1264 :proc_check_builtin
   1265     # tok->kind == TOK_WORD && tok+1 < s->end && (tok+1)->kind == TOK_LPAREN ?
   1266     ld_a0,sp,0
   1267     ld_t0,sp,8
   1268     ld_a1,t0,0
   1269     li_a2 TOK_WORD
   1270     la_br &proc_check_macro
   1271     bne_a1,a2
   1272     addi_t1,t0,24
   1273     ld_a1,a0,8
   1274     la_br &proc_check_builtin_has_next
   1275     blt_t1,a1
   1276     la_br &proc_check_macro
   1277     b
   1278 :proc_check_builtin_has_next
   1279     ld_a1,t1,0
   1280     li_a2 TOK_LPAREN
   1281     la_br &proc_check_macro
   1282     bne_a1,a2
   1283 
   1284     # try the six builtin names: ! @ % $ %select %str
   1285     mov_a0,t0
   1286     la_a1 &const_bang
   1287     li_a2 %1 %0
   1288     la_br &tok_eq_const
   1289     call
   1290     la_br &proc_do_builtin
   1291     bnez_a0
   1292     ld_a0,sp,8
   1293     la_a1 &const_at
   1294     li_a2 %1 %0
   1295     la_br &tok_eq_const
   1296     call
   1297     la_br &proc_do_builtin
   1298     bnez_a0
   1299     ld_a0,sp,8
   1300     la_a1 &const_pct
   1301     li_a2 %1 %0
   1302     la_br &tok_eq_const
   1303     call
   1304     la_br &proc_do_builtin
   1305     bnez_a0
   1306     ld_a0,sp,8
   1307     la_a1 &const_dlr
   1308     li_a2 %1 %0
   1309     la_br &tok_eq_const
   1310     call
   1311     la_br &proc_do_builtin
   1312     bnez_a0
   1313     ld_a0,sp,8
   1314     la_a1 &const_select
   1315     li_a2 %7 %0
   1316     la_br &tok_eq_const
   1317     call
   1318     la_br &proc_do_builtin
   1319     bnez_a0
   1320     ld_a0,sp,8
   1321     la_a1 &const_str
   1322     li_a2 %4 %0
   1323     la_br &tok_eq_const
   1324     call
   1325     la_br &proc_do_builtin
   1326     bnez_a0
   1327     la_br &proc_check_macro
   1328     b
   1329 
   1330 :proc_do_builtin
   1331     # expand_builtin_call(s, tok)
   1332     ld_a0,sp,0
   1333     ld_a1,sp,8
   1334     la_br &expand_builtin_call
   1335     call
   1336     la_br &proc_loop
   1337     b
   1338 
   1339 :proc_check_macro
   1340     # macro = find_macro(tok); if non-zero AND
   1341     #   ((tok+1 < s->end AND (tok+1)->kind == TOK_LPAREN) OR macro->param_count == 0)
   1342     # then expand_call. Paren-less form is reserved for 0-arg macros.
   1343     ld_a0,sp,8
   1344     la_br &find_macro
   1345     call
   1346     la_br &proc_emit
   1347     beqz_a0
   1348     mov_t2,a0
   1349     ld_a0,sp,0
   1350     ld_t0,sp,8
   1351     addi_t1,t0,24
   1352     ld_a1,a0,8
   1353     la_br &proc_macro_has_next
   1354     blt_t1,a1
   1355     la_br &proc_macro_zero_arg
   1356     b
   1357 :proc_macro_has_next
   1358     ld_a1,t1,0
   1359     li_a2 TOK_LPAREN
   1360     la_br &proc_macro_zero_arg
   1361     bne_a1,a2
   1362     ld_a0,sp,0
   1363     mov_a1,t2
   1364     la_br &expand_call
   1365     call
   1366     la_br &proc_loop
   1367     b
   1368 :proc_macro_zero_arg
   1369     # No trailing LPAREN. Expand only if macro->param_count == 0.
   1370     ld_t0,t2,16
   1371     la_br &proc_emit
   1372     bnez_t0
   1373     ld_a0,sp,0
   1374     mov_a1,t2
   1375     la_br &expand_call
   1376     call
   1377     la_br &proc_loop
   1378     b
   1379 
   1380 :proc_emit
   1381     # emit_token(tok); s->pos += 24; s->line_start = 0
   1382     ld_a0,sp,8
   1383     la_br &emit_token
   1384     call
   1385     ld_a0,sp,0
   1386     ld_t0,a0,16
   1387     addi_t0,t0,24
   1388     st_t0,a0,16
   1389     li_t1 %0 %0
   1390     st_t1,a0,24
   1391     la_br &proc_loop
   1392     b
   1393 
   1394 :proc_pop_continue
   1395     la_br &pop_stream
   1396     call
   1397     la_br &proc_loop
   1398     b
   1399 
   1400 :proc_done
   1401     # Every %scope must be matched by an %endscope before EOF.
   1402     la_a0 &scope_depth
   1403     ld_t0,a0,0
   1404     la_br &err_scope_not_closed
   1405     bnez_t0
   1406     eret
   1407 
   1408 ## --- %scope / %endscope handlers --------------------------------------------
   1409 ## Called at proc_pos == the `%scope` / `%endscope` word on a line-start.
   1410 ## Input: a0 = stream end (pointer one past last token in the current stream).
   1411 ## Output: proc_pos advanced past the trailing newline (or stream end).
   1412 
   1413 ## push_scope(a0 = stream_end): consume `%scope NAME\n`.
   1414 ## Name must be a single WORD token; anything else on the line is an error.
   1415 :push_scope
   1416     enter_0
   1417 
   1418     # proc_pos += 24 (skip past the `%scope` token).
   1419     la_t0 &proc_pos
   1420     ld_t1,t0,0
   1421     addi_t1,t1,24
   1422     st_t1,t0,0
   1423 
   1424     # Require a WORD name token within the stream.
   1425     la_br &err_bad_scope_header
   1426     beq_t1,a0
   1427     ld_t2,t1,0
   1428     la_br &err_bad_scope_header
   1429     bnez_t2
   1430 
   1431     # scope_depth < MAX_SCOPE_DEPTH?
   1432     la_a1 &scope_depth
   1433     ld_a2,a1,0
   1434     li_a3 M1PP_MAX_SCOPE_DEPTH
   1435     la_br &err_scope_depth_overflow
   1436     beq_a2,a3
   1437 
   1438     # scope_stack[scope_depth] = (name.text_ptr, name.text_len)
   1439     la_a3 &scope_stack_ptr
   1440     ld_a3,a3,0
   1441     li_t0 %16 %0
   1442     mul_t0,t0,a2
   1443     add_a3,a3,t0
   1444     ld_t0,t1,8
   1445     st_t0,a3,0
   1446     ld_t0,t1,16
   1447     st_t0,a3,8
   1448 
   1449     # scope_depth++
   1450     addi_a2,a2,1
   1451     st_a2,a1,0
   1452 
   1453     # proc_pos += 24 (past the name).
   1454     la_t0 &proc_pos
   1455     ld_t1,t0,0
   1456     addi_t1,t1,24
   1457     st_t1,t0,0
   1458 
   1459     # EOF here is tolerated (caller handles stream end). Otherwise the next
   1460     # token must be TOK_NEWLINE — anything else is a header error.
   1461     la_br &psc_done
   1462     beq_t1,a0
   1463     ld_t2,t1,0
   1464     li_t0 TOK_NEWLINE
   1465     la_br &err_bad_scope_header
   1466     bne_t2,t0
   1467     addi_t1,t1,24
   1468     la_t0 &proc_pos
   1469     st_t1,t0,0
   1470 :psc_done
   1471     eret
   1472 
   1473 ## pop_scope(a0 = stream_end): consume `%endscope\n`. Extra tokens on the line
   1474 ## are tolerated (matches %endm's behavior) — skip to the next newline.
   1475 :pop_scope
   1476     enter_0
   1477 
   1478     # scope_depth > 0?
   1479     la_a1 &scope_depth
   1480     ld_a2,a1,0
   1481     la_br &err_scope_underflow
   1482     beqz_a2
   1483     addi_a2,a2,neg1
   1484     st_a2,a1,0
   1485 
   1486     # proc_pos += 24 (past the `%endscope` token).
   1487     la_t0 &proc_pos
   1488     ld_t1,t0,0
   1489     addi_t1,t1,24
   1490     st_t1,t0,0
   1491 
   1492 :pop_skip_loop
   1493     la_br &pop_done
   1494     beq_t1,a0
   1495     ld_t2,t1,0
   1496     li_t0 TOK_NEWLINE
   1497     la_br &pop_consume_newline
   1498     beq_t2,t0
   1499     addi_t1,t1,24
   1500     la_t0 &proc_pos
   1501     st_t1,t0,0
   1502     la_br &pop_skip_loop
   1503     b
   1504 :pop_consume_newline
   1505     addi_t1,t1,24
   1506     la_t0 &proc_pos
   1507     st_t1,t0,0
   1508 :pop_done
   1509     eret
   1510 
   1511 ## --- %macro storage: parse header + body into macros[] / macro_body_tokens --
   1512 ## Called at proc_pos == line-start `%macro`. Leaves proc_pos past the %endm
   1513 ## line with proc_line_start = 1. Uses BSS scratch (def_m_ptr, def_param_ptr,
   1514 ## def_body_line_start) since P1 enter/eret does not save s* registers.
   1515 ##
   1516 ## Macro record layout (296 bytes, see M1PP_MACRO_RECORD_SIZE):
   1517 ##   +0   name.ptr        (8)
   1518 ##   +8   name.len        (8)
   1519 ##   +16  param_count     (8)
   1520 ##   +24  params[16].ptr/.len  (16 * 16 = 256)
   1521 ##   +280 body_start      (8)  -> *Token into macro_body_tokens[]
   1522 ##   +288 body_end        (8)  -> exclusive end
   1523 
   1524 ## define_macro(): consume `%macro NAME(params...)\n ... %endm\n`.
   1525 :define_macro
   1526     enter_0
   1527 
   1528     # macros_end bounds check: if (macros_end == &macros + MACROS_CAP) fatal
   1529     la_a0 &macros_end
   1530     ld_t0,a0,0
   1531     la_a1 &macros_ptr
   1532     ld_a1,a1,0
   1533     li_a2 M1PP_MACROS_CAP
   1534     add_a1,a1,a2
   1535     la_br &err_too_many_macros
   1536     beq_t0,a1
   1537 
   1538     # def_m_ptr = macros_end   (Macro *m = &macros[macro_count])
   1539     la_a1 &def_m_ptr
   1540     st_t0,a1,0
   1541 
   1542     # advance past the %macro token itself
   1543     la_a0 &proc_pos
   1544     ld_t0,a0,0
   1545     addi_t0,t0,24
   1546     st_t0,a0,0
   1547 
   1548     # ---- header: name (WORD) ----
   1549     la_a1 &source_end
   1550     ld_t1,a1,0
   1551     la_br &err_bad_macro_header
   1552     beq_t0,t1
   1553     ld_a1,t0,0
   1554     li_a2 TOK_WORD
   1555     la_br &err_bad_macro_header
   1556     bne_a1,a2
   1557 
   1558     # m->name.ptr = tok->text_ptr; m->name.len = tok->text_len
   1559     ld_a2,t0,8
   1560     ld_a3,t0,16
   1561     la_a0 &def_m_ptr
   1562     ld_t2,a0,0
   1563     st_a2,t2,0
   1564     st_a3,t2,8
   1565 
   1566     # m->param_count = 0; def_param_ptr = m + 24 (first TextSpan slot)
   1567     li_a0 %0 %0
   1568     st_a0,t2,16
   1569     addi_t2,t2,24
   1570     la_a0 &def_param_ptr
   1571     st_t2,a0,0
   1572 
   1573     # advance past name
   1574     addi_t0,t0,24
   1575     la_a0 &proc_pos
   1576     st_t0,a0,0
   1577 
   1578     # ---- header: LPAREN ----
   1579     la_a1 &source_end
   1580     ld_t1,a1,0
   1581     la_br &err_bad_macro_header
   1582     beq_t0,t1
   1583     ld_a1,t0,0
   1584     li_a2 TOK_LPAREN
   1585     la_br &err_bad_macro_header
   1586     bne_a1,a2
   1587 
   1588     # advance past '('
   1589     addi_t0,t0,24
   1590     la_a0 &proc_pos
   1591     st_t0,a0,0
   1592 
   1593     # ---- header: optional param list ----
   1594     # if at end -> fall through to RPAREN check (which will fail)
   1595     # if next is RPAREN -> skip the param loop
   1596     # else enter param loop
   1597     la_a1 &source_end
   1598     ld_t1,a1,0
   1599     la_br &def_header_close
   1600     beq_t0,t1
   1601     ld_a1,t0,0
   1602     li_a2 TOK_RPAREN
   1603     la_br &def_header_close
   1604     beq_a1,a2
   1605 
   1606 :def_param_loop
   1607     # reject > 16 params: if (15 < param_count) fail   (param_count capped at 16)
   1608     la_a0 &def_m_ptr
   1609     ld_t2,a0,0
   1610     ld_a1,t2,16
   1611     li_a2 %15 %0
   1612     la_br &err_bad_macro_header
   1613     blt_a2,a1
   1614 
   1615     # tok must be in range and WORD
   1616     la_a0 &proc_pos
   1617     ld_t0,a0,0
   1618     la_a1 &source_end
   1619     ld_t1,a1,0
   1620     la_br &err_bad_macro_header
   1621     beq_t0,t1
   1622     ld_a1,t0,0
   1623     li_a2 TOK_WORD
   1624     la_br &err_bad_macro_header
   1625     bne_a1,a2
   1626 
   1627     # *def_param_ptr = (tok.text_ptr, tok.text_len); def_param_ptr += 16
   1628     ld_a2,t0,8
   1629     ld_a3,t0,16
   1630     la_a0 &def_param_ptr
   1631     ld_t1,a0,0
   1632     st_a2,t1,0
   1633     st_a3,t1,8
   1634     addi_t1,t1,16
   1635     st_t1,a0,0
   1636 
   1637     # m->param_count++
   1638     la_a0 &def_m_ptr
   1639     ld_t2,a0,0
   1640     ld_a1,t2,16
   1641     addi_a1,a1,1
   1642     st_a1,t2,16
   1643 
   1644     # advance past the param word
   1645     addi_t0,t0,24
   1646     la_a0 &proc_pos
   1647     st_t0,a0,0
   1648 
   1649     # if next is COMMA, consume and loop; else break
   1650     la_a1 &source_end
   1651     ld_t1,a1,0
   1652     la_br &def_header_close
   1653     beq_t0,t1
   1654     ld_a1,t0,0
   1655     li_a2 TOK_COMMA
   1656     la_br &def_header_close
   1657     bne_a1,a2
   1658     addi_t0,t0,24
   1659     la_a0 &proc_pos
   1660     st_t0,a0,0
   1661     la_br &def_param_loop
   1662     b
   1663 
   1664 :def_header_close
   1665     # ---- header: RPAREN ----
   1666     la_a0 &proc_pos
   1667     ld_t0,a0,0
   1668     la_a1 &source_end
   1669     ld_t1,a1,0
   1670     la_br &err_bad_macro_header
   1671     beq_t0,t1
   1672     ld_a1,t0,0
   1673     li_a2 TOK_RPAREN
   1674     la_br &err_bad_macro_header
   1675     bne_a1,a2
   1676 
   1677     addi_t0,t0,24
   1678     la_a0 &proc_pos
   1679     st_t0,a0,0
   1680 
   1681     # ---- header: terminating NEWLINE ----
   1682     la_a1 &source_end
   1683     ld_t1,a1,0
   1684     la_br &err_bad_macro_header
   1685     beq_t0,t1
   1686     ld_a1,t0,0
   1687     li_a2 TOK_NEWLINE
   1688     la_br &err_bad_macro_header
   1689     bne_a1,a2
   1690 
   1691     addi_t0,t0,24
   1692     la_a0 &proc_pos
   1693     st_t0,a0,0
   1694 
   1695     # ---- body: m->body_start = macro_body_end; body_line_start = 1 ----
   1696     la_a1 &macro_body_end
   1697     ld_t2,a1,0
   1698     la_a0 &def_m_ptr
   1699     ld_t1,a0,0
   1700     li_a0 M1PP_MACRO_BODY_START_OFF
   1701     add_a0,t1,a0
   1702     st_t2,a0,0
   1703     la_a0 &def_body_line_start
   1704     li_a1 %1 %0
   1705     st_a1,a0,0
   1706 
   1707 :def_body_loop
   1708     # if proc_pos == source_end: unterminated %macro
   1709     la_a0 &proc_pos
   1710     ld_t0,a0,0
   1711     la_a1 &source_end
   1712     ld_t1,a1,0
   1713     la_br &err_unterminated_macro
   1714     beq_t0,t1
   1715 
   1716     # if (!body_line_start) copy token
   1717     la_a0 &def_body_line_start
   1718     ld_t2,a0,0
   1719     la_br &def_body_copy
   1720     beqz_t2
   1721 
   1722     # if (tok.kind != TOK_WORD) copy token
   1723     ld_a1,t0,0
   1724     li_a2 TOK_WORD
   1725     la_br &def_body_copy
   1726     bne_a1,a2
   1727 
   1728     # if (!tok_eq_const(tok, "%endm", 5)) copy token
   1729     mov_a0,t0
   1730     la_a1 &const_endm
   1731     li_a2 %5 %0
   1732     la_br &tok_eq_const
   1733     call
   1734     la_br &def_body_copy
   1735     beqz_a0
   1736 
   1737     # matched %endm at line start -> skip to end of the line, then finish
   1738     la_br &def_endm_skip_to_newline
   1739     b
   1740 
   1741 :def_body_copy
   1742     # bounds: if (macro_body_end - macro_body_tokens + 24 > MACRO_BODY_CAP) fail
   1743     la_a0 &macro_body_end
   1744     ld_t1,a0,0
   1745     la_a2 &macro_body_tokens_ptr
   1746     ld_a2,a2,0
   1747     sub_a3,t1,a2
   1748     addi_a3,a3,24
   1749     li_t2 M1PP_MACRO_BODY_CAP
   1750     la_br &err_macro_body_overflow
   1751     blt_t2,a3
   1752 
   1753     # copy 24 bytes from *proc_pos to *macro_body_end
   1754     la_a0 &proc_pos
   1755     ld_t0,a0,0
   1756     ld_a1,t0,0
   1757     st_a1,t1,0
   1758     ld_a1,t0,8
   1759     st_a1,t1,8
   1760     ld_a1,t0,16
   1761     st_a1,t1,16
   1762 
   1763     # macro_body_end += 24
   1764     addi_t1,t1,24
   1765     la_a0 &macro_body_end
   1766     st_t1,a0,0
   1767 
   1768     # body_line_start = (tok.kind == TOK_NEWLINE)
   1769     ld_a1,t0,0
   1770     li_a2 TOK_NEWLINE
   1771     la_br &def_body_clear_ls
   1772     bne_a1,a2
   1773     la_a0 &def_body_line_start
   1774     li_a1 %1 %0
   1775     st_a1,a0,0
   1776     la_br &def_body_advance
   1777     b
   1778 :def_body_clear_ls
   1779     la_a0 &def_body_line_start
   1780     li_a1 %0 %0
   1781     st_a1,a0,0
   1782 :def_body_advance
   1783     # proc_pos += 24
   1784     la_a0 &proc_pos
   1785     ld_t0,a0,0
   1786     addi_t0,t0,24
   1787     st_t0,a0,0
   1788     la_br &def_body_loop
   1789     b
   1790 
   1791 :def_endm_skip_to_newline
   1792     # consume tokens through the first NEWLINE (inclusive); tolerate EOF
   1793     la_a0 &proc_pos
   1794     ld_t0,a0,0
   1795     la_a1 &source_end
   1796     ld_t1,a1,0
   1797     la_br &def_finish
   1798     beq_t0,t1
   1799     ld_a1,t0,0
   1800     li_a2 TOK_NEWLINE
   1801     addi_t0,t0,24
   1802     la_a0 &proc_pos
   1803     st_t0,a0,0
   1804     la_br &def_finish
   1805     beq_a1,a2
   1806     la_br &def_endm_skip_to_newline
   1807     b
   1808 
   1809 :def_finish
   1810     # m->body_end = macro_body_end
   1811     la_a1 &macro_body_end
   1812     ld_t2,a1,0
   1813     la_a0 &def_m_ptr
   1814     ld_t1,a0,0
   1815     li_a0 M1PP_MACRO_BODY_END_OFF
   1816     add_a0,t1,a0
   1817     st_t2,a0,0
   1818 
   1819     # macros_end += MACRO_RECORD_SIZE
   1820     la_a0 &macros_end
   1821     ld_t0,a0,0
   1822     li_a1 M1PP_MACRO_RECORD_SIZE
   1823     add_t0,t0,a1
   1824     st_t0,a0,0
   1825 
   1826     # caller resumes at line start
   1827     la_a0 &proc_line_start
   1828     li_a1 %1 %0
   1829     st_a1,a0,0
   1830     eret
   1831 
   1832 ## --- %struct / %enum directive ----------------------------------------------
   1833 ## define_fielded(a0=stride, a1=total_name_ptr, a2=total_name_len).
   1834 ## Parses `%struct NAME { f1 f2 ... }` or `%enum NAME { ... }` (caller has
   1835 ## already detected %struct / %enum at line start and primed proc_pos to
   1836 ## that token). Synthesizes N+1 zero-parameter macros — NAME.field_k -> k*stride
   1837 ## and NAME.<total_name> -> N*stride — by appending each {name, body-token}
   1838 ## pair into macros[] / macro_body_tokens[].
   1839 ##
   1840 ## All working state lives in BSS (df_* slots + df_name_scratch / df_digit_scratch)
   1841 ## because df_emit_field calls append_text, which clobbers caller-saved regs.
   1842 :define_fielded
   1843     enter_0
   1844 
   1845     # Save directive args to BSS.
   1846     la_a3 &df_stride
   1847     st_a0,a3,0
   1848     la_a3 &df_total_name_ptr
   1849     st_a1,a3,0
   1850     la_a3 &df_total_name_len
   1851     st_a2,a3,0
   1852 
   1853     # advance past the %struct / %enum directive token
   1854     la_a0 &proc_pos
   1855     ld_t0,a0,0
   1856     addi_t0,t0,24
   1857     st_t0,a0,0
   1858 
   1859     # ---- header: name (WORD) ----
   1860     la_a1 &source_end
   1861     ld_t1,a1,0
   1862     la_br &err_bad_directive
   1863     beq_t0,t1
   1864     ld_a1,t0,0
   1865     li_a2 TOK_WORD
   1866     la_br &err_bad_directive
   1867     bne_a1,a2
   1868 
   1869     # df_base_ptr = tok.text_ptr; df_base_len = tok.text_len
   1870     ld_a2,t0,8
   1871     la_a3 &df_base_ptr
   1872     st_a2,a3,0
   1873     ld_a2,t0,16
   1874     la_a3 &df_base_len
   1875     st_a2,a3,0
   1876 
   1877     # advance past the base name
   1878     addi_t0,t0,24
   1879     la_a0 &proc_pos
   1880     st_t0,a0,0
   1881 
   1882 ## skip NEWLINE tokens before '{' (tolerates `%struct NAME\n{ ... }`)
   1883 :df_skip_nl_before_lbrace
   1884     la_a0 &proc_pos
   1885     ld_t0,a0,0
   1886     la_a1 &source_end
   1887     ld_t1,a1,0
   1888     la_br &err_bad_directive
   1889     beq_t0,t1
   1890     ld_a1,t0,0
   1891     li_a2 TOK_NEWLINE
   1892     la_br &df_require_lbrace
   1893     bne_a1,a2
   1894     addi_t0,t0,24
   1895     la_a0 &proc_pos
   1896     st_t0,a0,0
   1897     la_br &df_skip_nl_before_lbrace
   1898     b
   1899 
   1900 :df_require_lbrace
   1901     # expect LBRACE
   1902     li_a2 TOK_LBRACE
   1903     la_br &err_bad_directive
   1904     bne_a1,a2
   1905 
   1906     # advance past '{'
   1907     addi_t0,t0,24
   1908     la_a0 &proc_pos
   1909     st_t0,a0,0
   1910 
   1911     # df_index = 0
   1912     li_a0 %0 %0
   1913     la_a1 &df_index
   1914     st_a0,a1,0
   1915 
   1916 ## field loop: skip comma/newline separators, stop at '}', else consume a WORD.
   1917 :df_field_loop
   1918     la_a0 &proc_pos
   1919     ld_t0,a0,0
   1920     la_a1 &source_end
   1921     ld_t1,a1,0
   1922     la_br &err_unterminated_directive
   1923     beq_t0,t1
   1924     ld_a1,t0,0
   1925 
   1926     # separator: COMMA or NEWLINE -> advance and reloop
   1927     li_a2 TOK_COMMA
   1928     la_br &df_field_skip_sep
   1929     beq_a1,a2
   1930     li_a2 TOK_NEWLINE
   1931     la_br &df_field_skip_sep
   1932     beq_a1,a2
   1933 
   1934     # end-of-list marker '}' -> break
   1935     li_a2 TOK_RBRACE
   1936     la_br &df_fields_done
   1937     beq_a1,a2
   1938 
   1939     # else must be a WORD
   1940     li_a2 TOK_WORD
   1941     la_br &err_bad_directive
   1942     bne_a1,a2
   1943 
   1944     # df_suffix_ptr = tok.text_ptr; df_suffix_len = tok.text_len
   1945     ld_a2,t0,8
   1946     la_a3 &df_suffix_ptr
   1947     st_a2,a3,0
   1948     ld_a2,t0,16
   1949     la_a3 &df_suffix_len
   1950     st_a2,a3,0
   1951 
   1952     # df_value = df_index * df_stride
   1953     la_a0 &df_index
   1954     ld_t1,a0,0
   1955     la_a0 &df_stride
   1956     ld_t2,a0,0
   1957     mul_a0,t1,t2
   1958     la_a1 &df_value
   1959     st_a0,a1,0
   1960 
   1961     # synthesize the field macro
   1962     la_br &df_emit_field
   1963     call
   1964 
   1965     # df_index++
   1966     la_a0 &df_index
   1967     ld_t1,a0,0
   1968     addi_t1,t1,1
   1969     st_t1,a0,0
   1970 
   1971     # advance past the field word
   1972     la_a0 &proc_pos
   1973     ld_t0,a0,0
   1974     addi_t0,t0,24
   1975     st_t0,a0,0
   1976     la_br &df_field_loop
   1977     b
   1978 
   1979 :df_field_skip_sep
   1980     addi_t0,t0,24
   1981     la_a0 &proc_pos
   1982     st_t0,a0,0
   1983     la_br &df_field_loop
   1984     b
   1985 
   1986 :df_fields_done
   1987     # advance past '}'
   1988     addi_t0,t0,24
   1989     la_a0 &proc_pos
   1990     st_t0,a0,0
   1991 
   1992     # ---- emit totalizer: df_suffix <- df_total_name; df_value = N * stride ----
   1993     la_a0 &df_total_name_ptr
   1994     ld_t0,a0,0
   1995     la_a1 &df_suffix_ptr
   1996     st_t0,a1,0
   1997     la_a0 &df_total_name_len
   1998     ld_t0,a0,0
   1999     la_a1 &df_suffix_len
   2000     st_t0,a1,0
   2001 
   2002     la_a0 &df_index
   2003     ld_t1,a0,0
   2004     la_a0 &df_stride
   2005     ld_t2,a0,0
   2006     mul_a0,t1,t2
   2007     la_a1 &df_value
   2008     st_a0,a1,0
   2009 
   2010     la_br &df_emit_field
   2011     call
   2012 
   2013     # consume tokens through the first trailing NEWLINE (tolerate EOF)
   2014 :df_skip_trailing_loop
   2015     la_a0 &proc_pos
   2016     ld_t0,a0,0
   2017     la_a1 &source_end
   2018     ld_t1,a1,0
   2019     la_br &df_finish
   2020     beq_t0,t1
   2021     ld_a1,t0,0
   2022     li_a2 TOK_NEWLINE
   2023     addi_t0,t0,24
   2024     la_a0 &proc_pos
   2025     st_t0,a0,0
   2026     la_br &df_finish
   2027     beq_a1,a2
   2028     la_br &df_skip_trailing_loop
   2029     b
   2030 
   2031 :df_finish
   2032     la_a0 &proc_line_start
   2033     li_a1 %1 %0
   2034     st_a1,a0,0
   2035     eret
   2036 
   2037 ## df_emit_field(): read df_base_*, df_suffix_*, df_value from BSS; synthesize
   2038 ## one macro record + one body token. Builds the "NAME.field" identifier in
   2039 ## df_name_scratch and the decimal body text via df_render_decimal, then
   2040 ## copies both into text_buf via append_text so they outlive the scratch.
   2041 :df_emit_field
   2042     enter_0
   2043 
   2044     # macros_end capacity check
   2045     la_a0 &macros_end
   2046     ld_t0,a0,0
   2047     la_a1 &macros_ptr
   2048     ld_a1,a1,0
   2049     li_a2 M1PP_MACROS_CAP
   2050     add_a1,a1,a2
   2051     la_br &err_too_many_macros
   2052     beq_t0,a1
   2053 
   2054     # ---- assemble "BASE.SUFFIX" into df_name_scratch ----
   2055     # copy base bytes
   2056     la_a0 &df_base_ptr
   2057     ld_t0,a0,0
   2058     la_a0 &df_base_len
   2059     ld_t1,a0,0
   2060     la_t2 &df_name_scratch_ptr
   2061     ld_t2,t2,0
   2062     li_a3 %0 %0
   2063 :df_ef_base_loop
   2064     la_br &df_ef_base_done
   2065     beq_a3,t1
   2066     add_a0,t0,a3
   2067     lb_a0,a0,0
   2068     add_a1,t2,a3
   2069     sb_a0,a1,0
   2070     addi_a3,a3,1
   2071     la_br &df_ef_base_loop
   2072     b
   2073 :df_ef_base_done
   2074     # scratch[base_len] = '.'
   2075     add_a1,t2,t1
   2076     li_a0 %46 %0
   2077     sb_a0,a1,0
   2078 
   2079     # copy suffix bytes into scratch[base_len + 1 ..]
   2080     la_a0 &df_suffix_ptr
   2081     ld_t0,a0,0
   2082     la_a0 &df_suffix_len
   2083     ld_t1,a0,0
   2084     addi_a1,a1,1
   2085     li_a3 %0 %0
   2086 :df_ef_suffix_loop
   2087     la_br &df_ef_suffix_done
   2088     beq_a3,t1
   2089     add_a0,t0,a3
   2090     lb_a0,a0,0
   2091     add_a2,a1,a3
   2092     sb_a0,a2,0
   2093     addi_a3,a3,1
   2094     la_br &df_ef_suffix_loop
   2095     b
   2096 :df_ef_suffix_done
   2097 
   2098     # name_len = base_len + 1 + suffix_len
   2099     la_a0 &df_base_len
   2100     ld_t0,a0,0
   2101     la_a0 &df_suffix_len
   2102     ld_t1,a0,0
   2103     add_t0,t0,t1
   2104     addi_t0,t0,1
   2105     la_a1 &df_name_len
   2106     st_t0,a1,0
   2107 
   2108     # durable_name = append_text(&df_name_scratch, name_len)
   2109     la_a0 &df_name_scratch_ptr
   2110     ld_a0,a0,0
   2111     mov_a1,t0
   2112     la_br &append_text
   2113     call
   2114     # a0 = durable_name ptr
   2115 
   2116     # m = macros_end; m->name.ptr = durable_name; m->name.len = name_len
   2117     la_a1 &macros_end
   2118     ld_t2,a1,0
   2119     st_a0,t2,0
   2120     la_a0 &df_name_len
   2121     ld_a0,a0,0
   2122     st_a0,t2,8
   2123 
   2124     # m->param_count = 0  (params[] left zeroed; not read when count == 0)
   2125     li_a0 %0 %0
   2126     st_a0,t2,16
   2127 
   2128     # render df_value into df_digit_scratch (reverse fill)
   2129     la_br &df_render_decimal
   2130     call
   2131 
   2132     # durable_digits = append_text(&df_digit_cursor, df_digit_count)
   2133     la_a0 &df_digit_cursor
   2134     ld_a0,a0,0
   2135     la_a1 &df_digit_count
   2136     ld_a1,a1,0
   2137     la_br &append_text
   2138     call
   2139     # a0 = durable_digits
   2140 
   2141     # macro_body_end capacity check
   2142     la_a1 &macro_body_end
   2143     ld_t0,a1,0
   2144     la_a2 &macro_body_tokens_ptr
   2145     ld_a2,a2,0
   2146     sub_a3,t0,a2
   2147     addi_a3,a3,24
   2148     li_t2 M1PP_MACRO_BODY_CAP
   2149     la_br &err_macro_body_overflow
   2150     blt_t2,a3
   2151 
   2152     # body_tok = TOK_WORD { durable_digits, df_digit_count }
   2153     li_a1 TOK_WORD
   2154     st_a1,t0,0
   2155     st_a0,t0,8
   2156     la_a2 &df_digit_count
   2157     ld_a2,a2,0
   2158     st_a2,t0,16
   2159 
   2160     # m->body_start = macro_body_end (the slot we just wrote)
   2161     la_a0 &macros_end
   2162     ld_t2,a0,0
   2163     li_a1 M1PP_MACRO_BODY_START_OFF
   2164     add_a1,t2,a1
   2165     st_t0,a1,0
   2166 
   2167     # macro_body_end += 24
   2168     addi_t0,t0,24
   2169     la_a1 &macro_body_end
   2170     st_t0,a1,0
   2171 
   2172     # m->body_end = macro_body_end
   2173     li_a1 M1PP_MACRO_BODY_END_OFF
   2174     add_a1,t2,a1
   2175     st_t0,a1,0
   2176 
   2177     # macros_end += MACRO_RECORD_SIZE
   2178     li_a0 M1PP_MACRO_RECORD_SIZE
   2179     add_t2,t2,a0
   2180     la_a1 &macros_end
   2181     st_t2,a1,0
   2182 
   2183     eret
   2184 
   2185 ## df_render_decimal(): reads df_value; writes a reverse-filled decimal
   2186 ## rendering into df_digit_scratch[cursor..end) and stores df_digit_count +
   2187 ## df_digit_cursor for a subsequent append_text call. Leaf.
   2188 :df_render_decimal
   2189     la_a0 &df_value
   2190     ld_t0,a0,0
   2191     la_t1 &df_digit_scratch
   2192     li_a2 %24 %0
   2193     add_t1,t1,a2
   2194     mov_t2,t1
   2195 
   2196     # special-case v == 0 -> single '0'
   2197     la_br &df_rd_loop
   2198     bnez_t0
   2199     addi_t2,t2,neg1
   2200     li_a0 %48 %0
   2201     sb_a0,t2,0
   2202     la_br &df_rd_done
   2203     b
   2204 :df_rd_loop
   2205     la_br &df_rd_done
   2206     beqz_t0
   2207     mov_a0,t0
   2208     li_a1 %10 %0
   2209     rem_a2,a0,a1
   2210     addi_a2,a2,48
   2211     addi_t2,t2,neg1
   2212     sb_a2,t2,0
   2213     mov_a0,t0
   2214     li_a1 %10 %0
   2215     div_a0,a0,a1
   2216     mov_t0,a0
   2217     la_br &df_rd_loop
   2218     b
   2219 :df_rd_done
   2220     la_a1 &df_digit_scratch
   2221     li_a2 %24 %0
   2222     add_a1,a1,a2
   2223     sub_a0,a1,t2
   2224     la_a1 &df_digit_count
   2225     st_a0,a1,0
   2226     la_a1 &df_digit_cursor
   2227     st_t2,a1,0
   2228     ret
   2229 
   2230 ## ============================================================================
   2231 ## --- Stream stack + expansion-pool lifetime ---------------------------------
   2232 ## ============================================================================
   2233 ## process_tokens drives a stack of token streams. The source token array is
   2234 ## pushed first; each macro expansion or %select chosen-branch pushes a fresh
   2235 ## stream backed by a slice of expand_pool, popping rewinds pool_used to the
   2236 ## stream's pool_mark.
   2237 
   2238 ## push_stream_span(a0=start_tok, a1=end_tok, a2=pool_mark) -> void (fatal on overflow)
   2239 ## Push Stream { start = pos = a0, end = a1, line_start = 1, pool_mark = a2 }
   2240 ## onto streams[]. Bumps stream_top. pool_mark is a byte offset into
   2241 ## expand_pool, or -1 for a source-owned stream (pop_stream won't rewind).
   2242 ##
   2243 ## stream_top is maintained as a byte offset into streams[] (count * 40),
   2244 ## matching the running-tail-pointer pattern used by source_end / macros_end.
   2245 ## Reads/writes: streams, stream_top. Leaf.
   2246 :push_stream_span
   2247     # new_top = stream_top + STREAM_SIZE; if (cap < new_top) fatal
   2248     la_t0 &stream_top
   2249     ld_t1,t0,0
   2250     li_t2 M1PP_STREAM_SIZE
   2251     add_t2,t1,t2
   2252     li_a3 M1PP_STREAM_STACK_CAP
   2253     la_br &err_token_overflow
   2254     blt_a3,t2
   2255 
   2256     # s = &streams[stream_top]
   2257     la_a3 &streams_ptr
   2258     ld_a3,a3,0
   2259     add_a3,a3,t1
   2260 
   2261     # s->start = a0; s->end = a1; s->pos = a0; s->line_start = 1; s->pool_mark = a2
   2262     st_a0,a3,0
   2263     st_a1,a3,8
   2264     st_a0,a3,16
   2265     li_t1 %1 %0
   2266     st_t1,a3,24
   2267     st_a2,a3,32
   2268 
   2269     # stream_top = new_top
   2270     st_t2,t0,0
   2271     ret
   2272 
   2273 ## current_stream() -> a0 = &streams[stream_top-1], or 0 if empty. Leaf.
   2274 ## stream_top is a byte offset, so &streams[top-1] = streams + stream_top - 40.
   2275 ## Reads: streams, stream_top.
   2276 :current_stream
   2277     la_a0 &stream_top
   2278     ld_t0,a0,0
   2279     la_br &current_stream_empty
   2280     beqz_t0
   2281     la_a0 &streams_ptr
   2282     ld_a0,a0,0
   2283     add_a0,a0,t0
   2284     li_t1 M1PP_STREAM_SIZE
   2285     sub_a0,a0,t1
   2286     ret
   2287 :current_stream_empty
   2288     li_a0 %0 %0
   2289     ret
   2290 
   2291 ## pop_stream() -> void. Leaf.
   2292 ## Decrement stream_top. If the popped stream's pool_mark >= 0, restore
   2293 ## pool_used = pool_mark (reclaim the expansion-pool space it used).
   2294 ## Reads/writes: streams, stream_top, pool_used.
   2295 :pop_stream
   2296     la_a0 &stream_top
   2297     ld_t0,a0,0
   2298     la_br &pop_stream_done
   2299     beqz_t0
   2300     li_t1 M1PP_STREAM_SIZE
   2301     sub_t0,t0,t1
   2302     st_t0,a0,0
   2303 
   2304     # mark = popped->pool_mark
   2305     la_a1 &streams_ptr
   2306     ld_a1,a1,0
   2307     add_a1,a1,t0
   2308     ld_t0,a1,32
   2309 
   2310     # if (mark < 0) skip; else pool_used = mark
   2311     la_br &pop_stream_done
   2312     bltz_t0
   2313     la_a1 &pool_used
   2314     st_t0,a1,0
   2315 :pop_stream_done
   2316     ret
   2317 
   2318 ## copy_span_to_pool(a0=start_tok, a1=end_tok) -> void (fatal on pool overflow)
   2319 ## Append each 24-byte Token in [start, end) to expand_pool at pool_used,
   2320 ## advancing pool_used accordingly.
   2321 ## Reads/writes: expand_pool, pool_used. Leaf.
   2322 :copy_span_to_pool
   2323 :cstp_loop
   2324     # if (start == end) done
   2325     la_br &cstp_done
   2326     beq_a0,a1
   2327 
   2328     # bounds: pool_used + 24 must fit in EXPAND_CAP
   2329     la_a2 &pool_used
   2330     ld_t0,a2,0
   2331     addi_t1,t0,24
   2332     li_t2 M1PP_EXPAND_CAP
   2333     la_br &err_token_overflow
   2334     blt_t2,t1
   2335 
   2336     # dst = &expand_pool[pool_used]
   2337     la_a3 &expand_pool_ptr
   2338     ld_a3,a3,0
   2339     add_a3,a3,t0
   2340 
   2341     # copy 24 bytes (3 × u64)
   2342     ld_t1,a0,0
   2343     st_t1,a3,0
   2344     ld_t1,a0,8
   2345     st_t1,a3,8
   2346     ld_t1,a0,16
   2347     st_t1,a3,16
   2348 
   2349     # pool_used += 24; start += 24
   2350     addi_t0,t0,24
   2351     st_t0,a2,0
   2352     addi_a0,a0,24
   2353     la_br &cstp_loop
   2354     b
   2355 :cstp_done
   2356     ret
   2357 
   2358 ## push_pool_stream_from_mark(a0=mark) -> void (fatal on overflow)
   2359 ## If pool_used == mark (empty expansion), do nothing and return.
   2360 ## Otherwise push_stream_span(expand_pool+mark, expand_pool+pool_used, mark).
   2361 ## Reads/writes: expand_pool, pool_used, streams, stream_top. Non-leaf:
   2362 ## needs a frame so the call to push_stream_span doesn't clobber LR.
   2363 :push_pool_stream_from_mark
   2364     enter_0
   2365     # if (pool_used == mark) return
   2366     la_a1 &pool_used
   2367     ld_t0,a1,0
   2368     la_br &ppsfm_done
   2369     beq_t0,a0
   2370 
   2371     # push_stream_span(expand_pool+mark, expand_pool+pool_used, mark)
   2372     la_a2 &expand_pool_ptr
   2373     ld_a2,a2,0
   2374     mov_t1,a0
   2375     add_a0,a2,a0
   2376     add_a1,a2,t0
   2377     mov_a2,t1
   2378     la_br &push_stream_span
   2379     call
   2380 :ppsfm_done
   2381     eret
   2382 
   2383 ## ============================================================================
   2384 ## --- Argument parsing -------------------------------------------------------
   2385 ## ============================================================================
   2386 
   2387 ## parse_args(a0=lparen_tok, a1=limit_tok) -> void (fatal on unterminated/overflow)
   2388 ## Scan tokens from lparen+1 up to limit, tracking paren depth. At depth 1 each
   2389 ## TOK_COMMA ends one arg and starts the next; the matching TOK_RPAREN at
   2390 ## depth 0 ends the last arg. An empty `()` is arg_count = 0.
   2391 ##
   2392 ## Writes globals:
   2393 ##   arg_starts[i]  = first token of arg i
   2394 ##   arg_ends[i]    = one past last token of arg i
   2395 ##   arg_count      = number of args (0..16)
   2396 ##   call_end_pos   = one past the closing RPAREN
   2397 ##
   2398 ## Fatal on: > 16 args, reaching limit without matching RPAREN.
   2399 :parse_args
   2400     # tok = lparen + 1; arg_start = tok; depth = 1; arg_index = 0; brace_depth = 0
   2401     addi_a0,a0,24
   2402     la_a2 &pa_pos
   2403     st_a0,a2,0
   2404     la_a2 &pa_arg_start
   2405     st_a0,a2,0
   2406     la_a2 &pa_limit
   2407     st_a1,a2,0
   2408     li_a2 %1 %0
   2409     la_a3 &pa_depth
   2410     st_a2,a3,0
   2411     li_a2 %0 %0
   2412     la_a3 &pa_arg_index
   2413     st_a2,a3,0
   2414     li_a2 %0 %0
   2415     la_a3 &pa_brace_depth
   2416     st_a2,a3,0
   2417 
   2418 :pa_loop
   2419     # if (tok >= limit) fatal unterminated
   2420     la_a0 &pa_pos
   2421     ld_t0,a0,0
   2422     la_a1 &pa_limit
   2423     ld_t1,a1,0
   2424     la_br &err_unterminated_macro
   2425     beq_t0,t1
   2426 
   2427     # kind = tok->kind
   2428     ld_a2,t0,0
   2429 
   2430     # if (kind == TOK_LPAREN) { depth++; tok++; loop }
   2431     li_a3 TOK_LPAREN
   2432     la_br &pa_lparen
   2433     beq_a2,a3
   2434     li_a3 TOK_RPAREN
   2435     la_br &pa_rparen
   2436     beq_a2,a3
   2437     li_a3 TOK_COMMA
   2438     la_br &pa_maybe_comma
   2439     beq_a2,a3
   2440     li_a3 TOK_LBRACE
   2441     la_br &pa_lbrace
   2442     beq_a2,a3
   2443     li_a3 TOK_RBRACE
   2444     la_br &pa_rbrace
   2445     beq_a2,a3
   2446 
   2447     # default: tok++
   2448     addi_t0,t0,24
   2449     la_a0 &pa_pos
   2450     st_t0,a0,0
   2451     la_br &pa_loop
   2452     b
   2453 
   2454 :pa_lparen
   2455     la_a0 &pa_depth
   2456     ld_t1,a0,0
   2457     addi_t1,t1,1
   2458     st_t1,a0,0
   2459     addi_t0,t0,24
   2460     la_a0 &pa_pos
   2461     st_t0,a0,0
   2462     la_br &pa_loop
   2463     b
   2464 
   2465 :pa_rparen
   2466     # depth--
   2467     la_a0 &pa_depth
   2468     ld_t1,a0,0
   2469     addi_t1,t1,neg1
   2470     st_t1,a0,0
   2471     # if (depth != 0) tok++; loop
   2472     la_br &pa_rparen_close
   2473     beqz_t1
   2474     addi_t0,t0,24
   2475     la_a0 &pa_pos
   2476     st_t0,a0,0
   2477     la_br &pa_loop
   2478     b
   2479 
   2480 :pa_rparen_close
   2481     # depth == 0: if brace_depth != 0 -> unbalanced braces
   2482     la_a0 &pa_brace_depth
   2483     ld_t1,a0,0
   2484     la_br &err_unbalanced_braces
   2485     bnez_t1
   2486     # close out the call.
   2487     # arg_start (BSS), arg_index (BSS), tok = current pos.
   2488     la_a0 &pa_arg_start
   2489     ld_a1,a0,0
   2490     la_a0 &pa_arg_index
   2491     ld_a2,a0,0
   2492 
   2493     # if (arg_start == tok && arg_index == 0) -> arg_count = 0
   2494     la_br &pa_close_with_arg
   2495     bne_a1,t0
   2496     la_br &pa_close_with_arg
   2497     bnez_a2
   2498 
   2499     # empty (): arg_count = 0
   2500     li_a3 %0 %0
   2501     la_a0 &arg_count
   2502     st_a3,a0,0
   2503     la_br &pa_finish
   2504     b
   2505 
   2506 :pa_close_with_arg
   2507     # if (arg_index >= 16) fatal: branch to ok only if arg_index < 16
   2508     li_a3 M1PP_MAX_PARAMS
   2509     la_br &pa_close_with_arg_ok
   2510     blt_a2,a3
   2511     la_br &err_bad_macro_header
   2512     b
   2513 :pa_close_with_arg_ok
   2514     # arg_starts[arg_index] = arg_start; arg_ends[arg_index] = tok
   2515     la_a3 &arg_starts_ptr
   2516     ld_a3,a3,0
   2517     shli_t1,a2,3
   2518     add_a3,a3,t1
   2519     st_a1,a3,0
   2520     la_a3 &arg_ends_ptr
   2521     ld_a3,a3,0
   2522     add_a3,a3,t1
   2523     st_t0,a3,0
   2524     # arg_count = arg_index + 1
   2525     addi_a2,a2,1
   2526     la_a0 &arg_count
   2527     st_a2,a0,0
   2528 
   2529 :pa_finish
   2530     # call_end_pos = tok + 24
   2531     addi_t0,t0,24
   2532     la_a0 &call_end_pos
   2533     st_t0,a0,0
   2534     ret
   2535 
   2536 :pa_maybe_comma
   2537     # only split at depth == 1
   2538     la_a0 &pa_depth
   2539     ld_t1,a0,0
   2540     li_a3 %1 %0
   2541     la_br &pa_default_advance
   2542     bne_t1,a3
   2543     # and only when brace_depth == 0
   2544     la_a0 &pa_brace_depth
   2545     ld_t1,a0,0
   2546     la_br &pa_default_advance
   2547     bnez_t1
   2548 
   2549     # depth == 1 && brace_depth == 0 split: append (arg_start, tok) at arg_index
   2550     la_a0 &pa_arg_index
   2551     ld_a2,a0,0
   2552     li_a3 M1PP_MAX_PARAMS
   2553     la_br &pa_comma_ok
   2554     blt_a2,a3
   2555     la_br &err_bad_macro_header
   2556     b
   2557 :pa_comma_ok
   2558     la_a0 &pa_arg_start
   2559     ld_a1,a0,0
   2560     la_a3 &arg_starts_ptr
   2561     ld_a3,a3,0
   2562     shli_t1,a2,3
   2563     add_a3,a3,t1
   2564     st_a1,a3,0
   2565     la_a3 &arg_ends_ptr
   2566     ld_a3,a3,0
   2567     add_a3,a3,t1
   2568     st_t0,a3,0
   2569     # arg_index++
   2570     addi_a2,a2,1
   2571     la_a0 &pa_arg_index
   2572     st_a2,a0,0
   2573     # arg_start = tok + 24
   2574     addi_t0,t0,24
   2575     la_a0 &pa_arg_start
   2576     st_t0,a0,0
   2577     la_a0 &pa_pos
   2578     st_t0,a0,0
   2579     la_br &pa_loop
   2580     b
   2581 
   2582 :pa_default_advance
   2583     # comma at depth != 1: just advance
   2584     addi_t0,t0,24
   2585     la_a0 &pa_pos
   2586     st_t0,a0,0
   2587     la_br &pa_loop
   2588     b
   2589 
   2590 :pa_lbrace
   2591     # brace_depth++; tok++
   2592     la_a0 &pa_brace_depth
   2593     ld_t1,a0,0
   2594     addi_t1,t1,1
   2595     st_t1,a0,0
   2596     addi_t0,t0,24
   2597     la_a0 &pa_pos
   2598     st_t0,a0,0
   2599     la_br &pa_loop
   2600     b
   2601 
   2602 :pa_rbrace
   2603     # if (brace_depth <= 0) fatal unbalanced braces
   2604     la_a0 &pa_brace_depth
   2605     ld_t1,a0,0
   2606     la_br &err_unbalanced_braces
   2607     beqz_t1
   2608     # brace_depth--; tok++
   2609     addi_t1,t1,neg1
   2610     st_t1,a0,0
   2611     addi_t0,t0,24
   2612     la_a0 &pa_pos
   2613     st_t0,a0,0
   2614     la_br &pa_loop
   2615     b
   2616 
   2617 ## ============================================================================
   2618 ## --- Macro lookup + call expansion ------------------------------------------
   2619 ## ============================================================================
   2620 
   2621 ## find_macro(a0=tok) -> a0 = Macro* or 0. Leaf.
   2622 ## Non-zero only if tok is TOK_WORD, text.len >= 2, text[0] == '%', and
   2623 ## (text+1, len-1) equals macros[i].name for some i. First match wins.
   2624 ## Reads: macros, macros_end.
   2625 :find_macro
   2626     # if (tok.kind != TOK_WORD) return 0
   2627     ld_a1,a0,0
   2628     li_a2 TOK_WORD
   2629     la_br &find_macro_zero
   2630     bne_a1,a2
   2631 
   2632     # if (tok.text.len < 2) return 0
   2633     ld_a2,a0,16
   2634     li_a3 %2 %0
   2635     la_br &find_macro_zero
   2636     blt_a2,a3
   2637 
   2638     # if (tok.text[0] != '%') return 0
   2639     ld_a1,a0,8
   2640     lb_a3,a1,0
   2641     li_t0 %37 %0
   2642     la_br &find_macro_zero
   2643     bne_a3,t0
   2644 
   2645     # name_ptr = tok.text + 1; name_len = tok.text.len - 1
   2646     addi_a1,a1,1
   2647     addi_a2,a2,neg1
   2648 
   2649     # m = &macros[0]; m_end = macros_end
   2650     la_a3 &macros_ptr
   2651     ld_a3,a3,0
   2652     la_t0 &macros_end
   2653     ld_t0,t0,0
   2654 
   2655 :find_macro_loop
   2656     # if (m == macros_end) return 0
   2657     la_br &find_macro_zero
   2658     beq_a3,t0
   2659 
   2660     # if (m->name.len != name_len) advance
   2661     ld_t1,a3,8
   2662     la_br &find_macro_next
   2663     bne_t1,a2
   2664 
   2665     # byte-compare m->name.ptr vs name_ptr for name_len bytes
   2666     ld_t1,a3,0
   2667     li_t2 %0 %0
   2668 :find_macro_cmp
   2669     la_br &find_macro_match
   2670     beq_t2,a2
   2671     add_a0,t1,t2
   2672     lb_a0,a0,0
   2673     add_t0,a1,t2
   2674     lb_t0,t0,0
   2675     la_br &find_macro_next
   2676     bne_a0,t0
   2677     addi_t2,t2,1
   2678     la_br &find_macro_cmp
   2679     b
   2680 
   2681 :find_macro_next
   2682     # m += M1PP_MACRO_RECORD_SIZE
   2683     li_t1 M1PP_MACRO_RECORD_SIZE
   2684     add_a3,a3,t1
   2685     # reload macros_end (clobbered by the comparisons)
   2686     la_t0 &macros_end
   2687     ld_t0,t0,0
   2688     la_br &find_macro_loop
   2689     b
   2690 
   2691 :find_macro_match
   2692     mov_a0,a3
   2693     ret
   2694 
   2695 :find_macro_zero
   2696     li_a0 %0 %0
   2697     ret
   2698 
   2699 ## find_param(a0=macro_ptr, a1=tok) -> a0 = (index+1) or 0. Leaf.
   2700 ## Linear search over macro->params[0..param_count). Non-WORD tok -> 0, so
   2701 ## callers can test the return against zero without pre-filtering.
   2702 :find_param
   2703     # if (tok.kind != TOK_WORD) return 0
   2704     ld_a2,a1,0
   2705     li_a3 TOK_WORD
   2706     la_br &find_param_zero
   2707     bne_a2,a3
   2708 
   2709     # param_count = macro->param_count
   2710     ld_a2,a0,16
   2711     la_br &find_param_zero
   2712     beqz_a2
   2713 
   2714     # Spill bases into BSS so the cmp loop has free temp regs.
   2715     #   fp_macro     = macro_ptr
   2716     #   fp_tok       = tok ptr
   2717     #   fp_pcount    = param_count
   2718     #   fp_idx       = current param index
   2719     la_a3 &fp_macro
   2720     st_a0,a3,0
   2721     la_a3 &fp_tok
   2722     st_a1,a3,0
   2723     la_a3 &fp_pcount
   2724     st_a2,a3,0
   2725     li_a3 %0 %0
   2726     la_a0 &fp_idx
   2727     st_a3,a0,0
   2728 
   2729 :find_param_outer
   2730     # idx, pcount
   2731     la_a0 &fp_idx
   2732     ld_t0,a0,0
   2733     la_a0 &fp_pcount
   2734     ld_a1,a0,0
   2735     la_br &find_param_zero
   2736     beq_t0,a1
   2737 
   2738     # param_ptr = fp_macro + 24 + idx * 16
   2739     la_a0 &fp_macro
   2740     ld_a2,a0,0
   2741     addi_a2,a2,24
   2742     shli_a3,t0,4
   2743     add_a2,a2,a3
   2744 
   2745     # tok ptr
   2746     la_a0 &fp_tok
   2747     ld_a3,a0,0
   2748 
   2749     # Compare lengths.
   2750     ld_t1,a2,8
   2751     ld_t2,a3,16
   2752     la_br &find_param_next
   2753     bne_t1,t2
   2754 
   2755     # Lengths match. Byte-compare param.ptr vs tok.text.ptr for t1 bytes.
   2756     # After this point we either return or restart the outer loop, so
   2757     # all caller-saved regs are free.
   2758     ld_a0,a2,0
   2759     ld_a1,a3,8
   2760     li_t0 %0 %0
   2761 :find_param_cmp
   2762     la_br &find_param_match
   2763     beq_t0,t1
   2764     add_t2,a0,t0
   2765     lb_t2,t2,0
   2766     add_a2,a1,t0
   2767     lb_a2,a2,0
   2768     la_br &find_param_next
   2769     bne_t2,a2
   2770     addi_t0,t0,1
   2771     la_br &find_param_cmp
   2772     b
   2773 
   2774 :find_param_next
   2775     # idx++
   2776     la_a0 &fp_idx
   2777     ld_t0,a0,0
   2778     addi_t0,t0,1
   2779     st_t0,a0,0
   2780     la_br &find_param_outer
   2781     b
   2782 
   2783 :find_param_match
   2784     # return idx + 1
   2785     la_a0 &fp_idx
   2786     ld_a0,a0,0
   2787     addi_a0,a0,1
   2788     ret
   2789 
   2790 :find_param_zero
   2791     li_a0 %0 %0
   2792     ret
   2793 
   2794 ## arg_is_braced(a0=start, a1=end) -> a0 = 1 if the span wraps in a matching
   2795 ## outer { ... } pair (outer RBRACE is the same-level mate of the leading
   2796 ## LBRACE), else 0. Leaf.
   2797 :arg_is_braced
   2798     # if (end - start < 2 tokens = 48 bytes) return 0
   2799     sub_a2,a1,a0
   2800     li_a3 %48 %0
   2801     la_br &aib_zero
   2802     blt_a2,a3
   2803 
   2804     # if (start->kind != TOK_LBRACE) return 0
   2805     ld_a2,a0,0
   2806     li_a3 TOK_LBRACE
   2807     la_br &aib_zero
   2808     bne_a2,a3
   2809 
   2810     # if ((end - 24)->kind != TOK_RBRACE) return 0
   2811     addi_t0,a1,neg24
   2812     ld_a2,t0,0
   2813     li_a3 TOK_RBRACE
   2814     la_br &aib_zero
   2815     bne_a2,a3
   2816 
   2817     # walk tokens tracking depth; if depth hits 0 before reaching end-24,
   2818     # the leading LBRACE doesn't match the trailing RBRACE -> return 0.
   2819     # t0 = tok, t1 = depth, t2 = last_tok = end - 24
   2820     mov_t0,a0
   2821     li_t1 %0 %0
   2822     addi_t2,a1,neg24
   2823 :aib_loop
   2824     la_br &aib_done
   2825     beq_t0,a1
   2826     ld_a2,t0,0
   2827     li_a3 TOK_LBRACE
   2828     la_br &aib_incr
   2829     beq_a2,a3
   2830     li_a3 TOK_RBRACE
   2831     la_br &aib_decr
   2832     beq_a2,a3
   2833     # non-brace: advance
   2834     addi_t0,t0,24
   2835     la_br &aib_loop
   2836     b
   2837 :aib_incr
   2838     addi_t1,t1,1
   2839     addi_t0,t0,24
   2840     la_br &aib_loop
   2841     b
   2842 :aib_decr
   2843     addi_t1,t1,neg1
   2844     # if (depth == 0 && tok != end - 24) -> not wrapping
   2845     la_br &aib_decr_skip
   2846     bnez_t1
   2847     la_br &aib_zero
   2848     bne_t0,t2
   2849 :aib_decr_skip
   2850     addi_t0,t0,24
   2851     la_br &aib_loop
   2852     b
   2853 :aib_done
   2854     # return (depth == 0) ? 1 : 0
   2855     la_br &aib_zero
   2856     bnez_t1
   2857     li_a0 %1 %0
   2858     ret
   2859 :aib_zero
   2860     li_a0 %0 %0
   2861     ret
   2862 
   2863 ## copy_arg_tokens_to_pool(a0=arg_start, a1=arg_end) -> void (fatal if empty)
   2864 ## Non-leaf (calls copy_span_to_pool). Empty arg is an error.
   2865 ## If the span is wrapped in a matching outer { ... } pair, strip the outer
   2866 ## braces before copying; an empty inner span is a no-op.
   2867 :copy_arg_tokens_to_pool
   2868     enter_16
   2869     # if (arg_start == arg_end) fatal
   2870     la_br &err_bad_macro_header
   2871     beq_a0,a1
   2872     # spill a0/a1 so arg_is_braced can clobber regs
   2873     st_a0,sp,0
   2874     st_a1,sp,8
   2875     la_br &arg_is_braced
   2876     call
   2877     la_br &catp_plain
   2878     beqz_a0
   2879     # braced: strip outer braces (start+24, end-24)
   2880     ld_a0,sp,0
   2881     ld_a1,sp,8
   2882     addi_a0,a0,24
   2883     addi_a1,a1,neg24
   2884     la_br &catp_done
   2885     beq_a0,a1
   2886     la_br &copy_span_to_pool
   2887     call
   2888     la_br &catp_done
   2889     b
   2890 :catp_plain
   2891     ld_a0,sp,0
   2892     ld_a1,sp,8
   2893     la_br &copy_span_to_pool
   2894     call
   2895 :catp_done
   2896     eret
   2897 
   2898 ## copy_paste_arg_to_pool(a0=arg_start, a1=arg_end) -> void (fatal unless len 1)
   2899 ## Enforces the single-token-argument rule for params adjacent to ##.
   2900 ## Braced args are rejected — pasting onto a block is nonsense.
   2901 :copy_paste_arg_to_pool
   2902     enter_16
   2903     # spill a0/a1 for the arg_is_braced call
   2904     st_a0,sp,0
   2905     st_a1,sp,8
   2906     la_br &arg_is_braced
   2907     call
   2908     la_br &err_bad_macro_header
   2909     bnez_a0
   2910     ld_a0,sp,0
   2911     ld_a1,sp,8
   2912     # if ((arg_end - arg_start) != 24) fatal
   2913     sub_a2,a1,a0
   2914     li_a3 M1PP_TOK_SIZE
   2915     la_br &err_bad_macro_header
   2916     bne_a2,a3
   2917     la_br &copy_span_to_pool
   2918     call
   2919     eret
   2920 
   2921 ## expand_macro_tokens(a0=call_tok, a1=limit, a2=macro_ptr) -> void (fatal on bad)
   2922 ## Requires call_tok+1 is TOK_LPAREN. Runs parse_args(call_tok+1, limit),
   2923 ## verifies arg_count == macro->param_count, walks macro body, substituting
   2924 ## each param token via copy_arg_tokens_to_pool (or copy_paste_arg_to_pool
   2925 ## when adjacent to ##), copying other body tokens as-is, then runs
   2926 ## paste_pool_range over the newly-written slice.
   2927 ##
   2928 ## Outputs via globals (callers must snapshot before any nested call that
   2929 ## could overwrite them):
   2930 ##   emt_after_pos = token one past the matching ')' (= call_end_pos)
   2931 ##   emt_mark      = pool_used as of entry (start of expansion slice)
   2932 ##
   2933 :expand_macro_tokens
   2934     enter_0
   2935 
   2936     # Snapshot inputs into BSS (find_param/copy_*/paste_pool_range clobber regs).
   2937     la_a3 &emt_call_tok
   2938     st_a0,a3,0
   2939     la_a3 &emt_limit
   2940     st_a1,a3,0
   2941     la_a3 &emt_macro
   2942     st_a2,a3,0
   2943 
   2944     # lparen = call_tok + 24
   2945     addi_a0,a0,24
   2946 
   2947     # Branch split for paren-less 0-arg calls:
   2948     #   if lparen < limit AND lparen->kind == TOK_LPAREN: parse_args as usual.
   2949     #   else if macro->param_count == 0: synthesize empty arg list, no parse_args.
   2950     #   else: fatal "bad macro call".
   2951 
   2952     # if (lparen >= limit) goto emt_try_zero_arg
   2953     la_br &emt_try_zero_arg
   2954     beq_a0,a1
   2955     la_br &emt_try_zero_arg
   2956     blt_a1,a0
   2957 
   2958     # if (lparen->kind != TOK_LPAREN) goto emt_try_zero_arg
   2959     ld_a2,a0,0
   2960     li_a3 TOK_LPAREN
   2961     la_br &emt_try_zero_arg
   2962     bne_a2,a3
   2963 
   2964     # parse_args(lparen, limit)
   2965     # a0 already lparen; a1 already limit
   2966     la_br &parse_args
   2967     call
   2968 
   2969     # Check arg_count == macro->param_count
   2970     la_a0 &arg_count
   2971     ld_t0,a0,0
   2972     la_a0 &emt_macro
   2973     ld_t1,a0,0
   2974     ld_t1,t1,16
   2975     la_br &err_bad_macro_header
   2976     bne_t0,t1
   2977 
   2978     # expansion_id = ++next_expansion_id (monotonic; used by local-label
   2979     # rewriting in the body-copy path to rename :@name / &@name tokens).
   2980     la_a0 &next_expansion_id
   2981     ld_t0,a0,0
   2982     addi_t0,t0,1
   2983     st_t0,a0,0
   2984     la_a1 &emt_expansion_id
   2985     st_t0,a1,0
   2986 
   2987     # Snapshot call_end_pos -> emt_after_pos before the body walk, so
   2988     # nothing in the substitution loop can clobber the resume position.
   2989     la_a0 &call_end_pos
   2990     ld_t0,a0,0
   2991     la_a1 &emt_after_pos
   2992     st_t0,a1,0
   2993     la_br &emt_after_arg_setup
   2994     b
   2995 
   2996 :emt_try_zero_arg
   2997     # No trailing LPAREN. Allowed only if macro->param_count == 0.
   2998     la_a0 &emt_macro
   2999     ld_t1,a0,0
   3000     ld_t1,t1,16
   3001     la_br &err_bad_macro_header
   3002     bnez_t1
   3003 
   3004     # arg_count = 0
   3005     la_a0 &arg_count
   3006     li_t0 %0 %0
   3007     st_t0,a0,0
   3008 
   3009     # emt_after_pos = call_tok + 24
   3010     la_a0 &emt_call_tok
   3011     ld_t0,a0,0
   3012     addi_t0,t0,24
   3013     la_a1 &emt_after_pos
   3014     st_t0,a1,0
   3015 
   3016 :emt_after_arg_setup
   3017 
   3018     # mark = pool_used; emt_mark = mark
   3019     la_a0 &pool_used
   3020     ld_t0,a0,0
   3021     la_a1 &emt_mark
   3022     st_t0,a1,0
   3023 
   3024     # body_pos = macro->body_start; body_end = macro->body_end
   3025     la_a0 &emt_macro
   3026     ld_t1,a0,0
   3027     li_a2 M1PP_MACRO_BODY_START_OFF
   3028     add_a3,t1,a2
   3029     ld_a3,a3,0
   3030     la_a0 &emt_body_pos
   3031     st_a3,a0,0
   3032     la_a0 &emt_body_start
   3033     st_a3,a0,0
   3034     li_a2 M1PP_MACRO_BODY_END_OFF
   3035     add_a3,t1,a2
   3036     ld_a3,a3,0
   3037     la_a0 &emt_body_end
   3038     st_a3,a0,0
   3039 
   3040 :emt_loop
   3041     # if (body_pos == body_end) break
   3042     la_a0 &emt_body_pos
   3043     ld_t0,a0,0
   3044     la_a1 &emt_body_end
   3045     ld_t1,a1,0
   3046     la_br &emt_done
   3047     beq_t0,t1
   3048 
   3049     # param_idx = find_param(macro, body_tok)
   3050     la_a0 &emt_macro
   3051     ld_a0,a0,0
   3052     mov_a1,t0
   3053     la_br &find_param
   3054     call
   3055 
   3056     # if (param_idx == 0) body-native token: check for local-label rewrite,
   3057     # else fall through to substitute logic.
   3058     la_br &emt_check_local_label
   3059     beqz_a0
   3060 
   3061     # param_idx != 0: substitute. emt_do_substitute_* will re-derive
   3062     # the arg span (calls find_param again to recover the index) after
   3063     # the "pasted" classification below. This saves us from spilling idx
   3064     # across the body_pos +/- TOK_PASTE peek.
   3065 
   3066     # Reload body_pos for the pasted-classification loads.
   3067     la_a0 &emt_body_pos
   3068     ld_t0,a0,0
   3069 
   3070     # Compute pasted = (body_pos > body_start AND (body_pos - 24)->kind == TOK_PASTE)
   3071     #                  OR (body_pos + 24 < body_end AND (body_pos + 24)->kind == TOK_PASTE)
   3072     la_a1 &emt_body_start
   3073     ld_t1,a1,0
   3074 
   3075     # Branch to emt_check_after if body_pos == body_start
   3076     la_br &emt_check_after
   3077     beq_t0,t1
   3078 
   3079     # prev_kind = (body_pos - 24)->kind
   3080     addi_t2,t0,neg24
   3081     ld_a2,t2,0
   3082     li_a3 TOK_PASTE
   3083     la_br &emt_pasted
   3084     beq_a2,a3
   3085 
   3086 :emt_check_after
   3087     # next_pos = body_pos + 24; if (next_pos >= body_end) skip
   3088     addi_t2,t0,24
   3089     la_a1 &emt_body_end
   3090     ld_a3,a1,0
   3091     # if (next_pos == body_end) -> not pasted (need next_pos < body_end)
   3092     la_br &emt_not_pasted
   3093     beq_t2,a3
   3094     # next_kind = next_pos->kind
   3095     ld_a2,t2,0
   3096     li_a3 TOK_PASTE
   3097     la_br &emt_pasted
   3098     beq_a2,a3
   3099     la_br &emt_not_pasted
   3100     b
   3101 
   3102 :emt_pasted
   3103     # body_pos is a param adjacent to ##: substitute one-token arg.
   3104     la_br &emt_do_substitute_paste
   3105     b
   3106 
   3107 :emt_not_pasted
   3108     # body_pos is a param NOT adjacent to ##: substitute arg span.
   3109     la_br &emt_do_substitute_plain
   3110     b
   3111 
   3112 ## emt_check_local_label: body-native token at body_pos. If it's a
   3113 ## TOK_WORD whose text starts with ":@" or "&@" and has at least one
   3114 ## char after the '@', rewrite it to ":name__NN" / "&name__NN" (NN =
   3115 ## emt_expansion_id) and push as TOK_WORD. Otherwise fall through to
   3116 ## emt_copy_literal, which copies the body token verbatim.
   3117 :emt_check_local_label
   3118     # t0 = body_tok ptr
   3119     la_a0 &emt_body_pos
   3120     ld_t0,a0,0
   3121     # kind must be TOK_WORD (== 0)
   3122     ld_a1,t0,0
   3123     la_br &emt_copy_literal
   3124     bnez_a1
   3125     # len must be >= 3 (sigil + '@' + >=1 tail char)
   3126     ld_a2,t0,16
   3127     li_a3 %3 %0
   3128     la_br &emt_copy_literal
   3129     blt_a2,a3
   3130     # first byte must be ':' (58) or '&' (38)
   3131     ld_a3,t0,8
   3132     lb_a1,a3,0
   3133     li_a2 %58 %0
   3134     la_br &emt_check_local_label_at
   3135     beq_a1,a2
   3136     li_a2 %38 %0
   3137     la_br &emt_copy_literal
   3138     bne_a1,a2
   3139 :emt_check_local_label_at
   3140     # second byte must be '@' (64)
   3141     lb_a1,a3,1
   3142     li_a2 %64 %0
   3143     la_br &emt_copy_literal
   3144     bne_a1,a2
   3145     # Local label! Fall through to rewrite.
   3146 
   3147 ## emt_rewrite_local_label: build "sigil + tail + __ + decimal(NN)" in
   3148 ## local_label_scratch, stash it into text_buf via append_text, and push
   3149 ## a TOK_WORD to expand_pool.
   3150 :emt_rewrite_local_label
   3151     # Stash body_tok text_ptr / text_len into BSS so they survive
   3152     # function calls (append_text is non-leaf via its arena bump).
   3153     la_a0 &emt_body_pos
   3154     ld_t0,a0,0
   3155     ld_a1,t0,8
   3156     la_a2 &ll_src_ptr
   3157     st_a1,a2,0
   3158     ld_a1,t0,16
   3159     la_a2 &ll_src_len
   3160     st_a1,a2,0
   3161 
   3162     # --- Convert emt_expansion_id to decimal, reverse-fill into
   3163     # --- local_label_digits[0..24). Write right-to-left starting at
   3164     # --- offset 23 so digits are adjacent at [cursor, &scratch+24).
   3165     la_a0 &emt_expansion_id
   3166     ld_t0,a0,0                 # t0 = id (mutated)
   3167     la_t1 &local_label_digits
   3168     li_a2 %24 %0
   3169     add_t1,t1,a2               # t1 = end (one past last slot)
   3170     mov_t2,t1                  # t2 = cursor (moves left)
   3171 
   3172     # Special-case id == 0 -> single '0' digit.
   3173     la_br &emt_rldg_loop
   3174     bnez_t0
   3175     addi_t2,t2,neg1
   3176     li_a0 %48 %0
   3177     sb_a0,t2,0
   3178     la_br &emt_rldg_done
   3179     b
   3180 :emt_rldg_loop
   3181     la_br &emt_rldg_done
   3182     beqz_t0
   3183     # digit = id % 10
   3184     mov_a0,t0
   3185     li_a1 %10 %0
   3186     rem_a2,a0,a1               # a2 = id % 10
   3187     addi_a2,a2,48              # a2 = '0' + digit
   3188     addi_t2,t2,neg1
   3189     sb_a2,t2,0                 # *--cursor = digit
   3190     # id = id / 10
   3191     mov_a0,t0
   3192     li_a1 %10 %0
   3193     div_a0,a0,a1
   3194     mov_t0,a0
   3195     la_br &emt_rldg_loop
   3196     b
   3197 :emt_rldg_done
   3198     # digit_count = end - cursor
   3199     la_a1 &local_label_digits
   3200     li_a2 %24 %0
   3201     add_a1,a1,a2               # a1 = end
   3202     sub_a0,a1,t2               # a0 = digit_count
   3203     la_a1 &ll_digit_count
   3204     st_a0,a1,0
   3205     # Save cursor (start of digits) for the copy step.
   3206     la_a1 &ll_digit_cursor
   3207     st_t2,a1,0
   3208 
   3209     # --- Build final text in local_label_scratch ---
   3210     # Layout: [0]=sigil, [1..1+tail_len)=tail, then "__", then digits.
   3211     # tail_len = len - 2
   3212 
   3213     # Write sigil (src_ptr[0]) to scratch[0].
   3214     la_a0 &ll_src_ptr
   3215     ld_a1,a0,0
   3216     lb_a2,a1,0
   3217     la_a3 &local_label_scratch_ptr
   3218     ld_a3,a3,0
   3219     sb_a2,a3,0
   3220 
   3221     # Copy tail: scratch[1..1+tail_len) <- src_ptr[2..2+tail_len).
   3222     la_a0 &ll_src_len
   3223     ld_a1,a0,0
   3224     li_a2 %2 %0
   3225     sub_t0,a1,a2               # t0 = tail_len = src_len - 2
   3226     la_a0 &ll_src_ptr
   3227     ld_a1,a0,0                 # a1 = src_ptr
   3228     addi_a1,a1,2               # a1 = src_ptr + 2 (tail start)
   3229     la_a2 &local_label_scratch_ptr
   3230     ld_a2,a2,0
   3231     addi_a2,a2,1               # a2 = scratch + 1 (dst tail start)
   3232     li_t1 %0 %0                # t1 = i
   3233 :emt_rlbuild_tail_loop
   3234     la_br &emt_rlbuild_tail_done
   3235     beq_t1,t0
   3236     add_a3,a1,t1
   3237     lb_a3,a3,0
   3238     add_t2,a2,t1
   3239     sb_a3,t2,0
   3240     addi_t1,t1,1
   3241     la_br &emt_rlbuild_tail_loop
   3242     b
   3243 :emt_rlbuild_tail_done
   3244     # Save tail_len for later offset math.
   3245     la_a0 &ll_tail_len
   3246     st_t0,a0,0
   3247 
   3248     # Write "__" at scratch[1+tail_len], scratch[2+tail_len].
   3249     la_a2 &local_label_scratch_ptr
   3250     ld_a2,a2,0
   3251     addi_a2,a2,1
   3252     add_a2,a2,t0               # a2 = &scratch[1+tail_len]
   3253     li_a3 %95 %0               # '_'
   3254     sb_a3,a2,0
   3255     addi_a2,a2,1
   3256     sb_a3,a2,0
   3257 
   3258     # Copy digits: scratch[3+tail_len..3+tail_len+digit_count) <- digit_cursor[0..digit_count).
   3259     la_a0 &ll_digit_count
   3260     ld_t1,a0,0                 # t1 = digit_count
   3261     la_a0 &ll_digit_cursor
   3262     ld_a1,a0,0                 # a1 = digit_cursor (src)
   3263     la_a0 &ll_tail_len
   3264     ld_t0,a0,0                 # t0 = tail_len
   3265     la_a2 &local_label_scratch_ptr
   3266     ld_a2,a2,0
   3267     addi_a2,a2,3
   3268     add_a2,a2,t0               # a2 = &scratch[3+tail_len] (dst)
   3269     li_t2 %0 %0                # t2 = i
   3270 :emt_rlbuild_digits_loop
   3271     la_br &emt_rlbuild_digits_done
   3272     beq_t2,t1
   3273     add_a3,a1,t2
   3274     lb_a3,a3,0
   3275     add_a0,a2,t2
   3276     sb_a3,a0,0
   3277     addi_t2,t2,1
   3278     la_br &emt_rlbuild_digits_loop
   3279     b
   3280 :emt_rlbuild_digits_done
   3281 
   3282     # total_len = 1 + tail_len + 2 + digit_count = 3 + tail_len + digit_count
   3283     la_a0 &ll_tail_len
   3284     ld_a1,a0,0
   3285     la_a0 &ll_digit_count
   3286     ld_a2,a0,0
   3287     add_a1,a1,a2
   3288     addi_a1,a1,3
   3289     la_a0 &ll_total_len
   3290     st_a1,a0,0
   3291 
   3292     # durable_ptr = append_text(&local_label_scratch, total_len)
   3293     la_a0 &local_label_scratch_ptr
   3294     ld_a0,a0,0
   3295     la_br &append_text
   3296     call
   3297     # a0 = durable_ptr (into text_buf)
   3298 
   3299     # Push TOK_WORD { kind=0, text_ptr=durable_ptr, text_len=total_len } to expand_pool.
   3300     la_a1 &pool_used
   3301     ld_t0,a1,0
   3302     li_a2 M1PP_EXPAND_CAP
   3303     la_br &err_token_overflow
   3304     beq_t0,a2
   3305     la_a3 &expand_pool_ptr
   3306     ld_a3,a3,0
   3307     add_a3,a3,t0               # a3 = dst slot
   3308     # kind = TOK_WORD
   3309     li_a2 TOK_WORD
   3310     st_a2,a3,0
   3311     # text_ptr
   3312     st_a0,a3,8
   3313     # text_len
   3314     la_a0 &ll_total_len
   3315     ld_a2,a0,0
   3316     st_a2,a3,16
   3317     # pool_used += 24
   3318     addi_t0,t0,24
   3319     la_a1 &pool_used
   3320     st_t0,a1,0
   3321 
   3322     # body_pos += 24
   3323     la_a0 &emt_body_pos
   3324     ld_t0,a0,0
   3325     addi_t0,t0,24
   3326     st_t0,a0,0
   3327     la_br &emt_loop
   3328     b
   3329 
   3330 :emt_copy_literal
   3331     # Append *body_pos to expand_pool. Check overflow.
   3332     la_a0 &pool_used
   3333     ld_t0,a0,0
   3334     li_a1 M1PP_EXPAND_CAP
   3335     la_br &err_token_overflow
   3336     beq_t0,a1
   3337     # dst = &expand_pool + pool_used
   3338     la_a2 &expand_pool_ptr
   3339     ld_a2,a2,0
   3340     add_a2,a2,t0
   3341     # src = body_pos
   3342     la_a0 &emt_body_pos
   3343     ld_a3,a0,0
   3344     # copy 24 bytes (3 x 8)
   3345     ld_a0,a3,0
   3346     st_a0,a2,0
   3347     ld_a0,a3,8
   3348     st_a0,a2,8
   3349     ld_a0,a3,16
   3350     st_a0,a2,16
   3351     # pool_used += 24
   3352     addi_t0,t0,24
   3353     la_a0 &pool_used
   3354     st_t0,a0,0
   3355     # body_pos += 24
   3356     addi_a3,a3,24
   3357     la_a0 &emt_body_pos
   3358     st_a3,a0,0
   3359     la_br &emt_loop
   3360     b
   3361 
   3362 :emt_do_substitute_paste
   3363     # Re-derive arg span for current body_pos.
   3364     la_a0 &emt_macro
   3365     ld_a0,a0,0
   3366     la_a1 &emt_body_pos
   3367     ld_a1,a1,0
   3368     la_br &find_param
   3369     call
   3370     addi_a0,a0,neg1
   3371     shli_a0,a0,3
   3372     la_a1 &arg_starts_ptr
   3373     ld_a1,a1,0
   3374     add_a1,a1,a0
   3375     ld_t0,a1,0
   3376     la_a1 &arg_ends_ptr
   3377     ld_a1,a1,0
   3378     add_a1,a1,a0
   3379     ld_t1,a1,0
   3380     mov_a0,t0
   3381     mov_a1,t1
   3382     la_br &copy_paste_arg_to_pool
   3383     call
   3384     # body_pos += 24
   3385     la_a0 &emt_body_pos
   3386     ld_t0,a0,0
   3387     addi_t0,t0,24
   3388     st_t0,a0,0
   3389     la_br &emt_loop
   3390     b
   3391 
   3392 :emt_do_substitute_plain
   3393     # Re-derive arg span for current body_pos.
   3394     la_a0 &emt_macro
   3395     ld_a0,a0,0
   3396     la_a1 &emt_body_pos
   3397     ld_a1,a1,0
   3398     la_br &find_param
   3399     call
   3400     addi_a0,a0,neg1
   3401     shli_a0,a0,3
   3402     la_a1 &arg_starts_ptr
   3403     ld_a1,a1,0
   3404     add_a1,a1,a0
   3405     ld_t0,a1,0
   3406     la_a1 &arg_ends_ptr
   3407     ld_a1,a1,0
   3408     add_a1,a1,a0
   3409     ld_t1,a1,0
   3410     mov_a0,t0
   3411     mov_a1,t1
   3412     la_br &copy_arg_tokens_to_pool
   3413     call
   3414     # body_pos += 24
   3415     la_a0 &emt_body_pos
   3416     ld_t0,a0,0
   3417     addi_t0,t0,24
   3418     st_t0,a0,0
   3419     la_br &emt_loop
   3420     b
   3421 
   3422 :emt_done
   3423     # paste_pool_range(mark). emt_after_pos and emt_mark are already published.
   3424     la_a0 &emt_mark
   3425     ld_a0,a0,0
   3426     la_br &paste_pool_range
   3427     call
   3428 
   3429     eret
   3430 
   3431 ## expand_call(a0=stream_ptr, a1=macro_ptr) -> void (fatal on bad call)
   3432 ## Calls expand_macro_tokens for the call at stream->pos, sets
   3433 ## stream->pos = emt_after_pos, stream->line_start = 0, and
   3434 ## push_pool_stream_from_mark(emt_mark) to rescan the expansion.
   3435 :expand_call
   3436     enter_8
   3437 
   3438     # spill stream_ptr to local frame slot (sp+16 is the first local; sp+0/+8
   3439     # hold the saved return address and saved caller sp).
   3440     st_a0,sp,0
   3441 
   3442     # expand_macro_tokens(stream->pos, stream->end, macro)
   3443     # stream->pos at +16, stream->end at +8
   3444     ld_t0,a0,16
   3445     ld_t1,a0,8
   3446     mov_a2,a1
   3447     mov_a0,t0
   3448     mov_a1,t1
   3449     la_br &expand_macro_tokens
   3450     call
   3451 
   3452     # stream->pos = emt_after_pos
   3453     ld_a0,sp,0
   3454     la_a1 &emt_after_pos
   3455     ld_t0,a1,0
   3456     st_t0,a0,16
   3457 
   3458     # stream->line_start = 0
   3459     li_t0 %0 %0
   3460     st_t0,a0,24
   3461 
   3462     # push_pool_stream_from_mark(emt_mark)
   3463     la_a0 &emt_mark
   3464     ld_a0,a0,0
   3465     la_br &push_pool_stream_from_mark
   3466     call
   3467 
   3468     eret
   3469 
   3470 ## ============================================================================
   3471 ## --- ## token paste compaction ----------------------------------------------
   3472 ## ============================================================================
   3473 
   3474 ## append_pasted_token(a0=dst_tok, a1=left_tok, a2=right_tok) -> void (fatal)
   3475 ## Concatenate left->text and right->text into paste_scratch, then call
   3476 ## append_text(&paste_scratch, total_len) for stable storage in text_buf,
   3477 ## and write *dst = { TOK_WORD, text_ptr, total_len }. paste_scratch is
   3478 ## 256 bytes (M0's quoted-literal cap). Fatal err_text_overflow if combined
   3479 ## length exceeds 256 bytes; append_text handles its own text_buf overflow.
   3480 :append_pasted_token
   3481     enter_0
   3482 
   3483     # ---- Spill all three operands to BSS so we can survive append_text. ----
   3484     la_t0 &paste_dst_save
   3485     st_a0,t0,0
   3486     la_t0 &paste_left_ptr
   3487     ld_t1,a1,8
   3488     st_t1,t0,0
   3489     la_t0 &paste_left_len
   3490     ld_t1,a1,16
   3491     st_t1,t0,0
   3492     la_t0 &paste_right_ptr
   3493     ld_t1,a2,8
   3494     st_t1,t0,0
   3495     la_t0 &paste_right_len
   3496     ld_t1,a2,16
   3497     st_t1,t0,0
   3498 
   3499     # ---- total_len = left.len + right.len; fatal if > 256 ----
   3500     la_t0 &paste_left_len
   3501     ld_t1,t0,0
   3502     la_t0 &paste_right_len
   3503     ld_t2,t0,0
   3504     add_a0,t1,t2
   3505     li_a1 %256 %0
   3506     la_br &err_text_overflow
   3507     blt_a1,a0
   3508     # save total_len for the append_text call below
   3509     la_t0 &paste_total_len
   3510     st_a0,t0,0
   3511 
   3512     # ---- Copy left bytes: paste_scratch[0..left.len) <- left.text_ptr ----
   3513     la_t0 &paste_left_ptr
   3514     ld_t0,t0,0
   3515     la_t1 &paste_left_len
   3516     ld_t1,t1,0
   3517     la_t2 &paste_scratch_ptr
   3518     ld_t2,t2,0
   3519     li_a0 %0 %0
   3520 :append_pasted_left_loop
   3521     la_br &append_pasted_left_done
   3522     beq_a0,t1
   3523     add_a1,t0,a0
   3524     lb_a1,a1,0
   3525     add_a2,t2,a0
   3526     sb_a1,a2,0
   3527     addi_a0,a0,1
   3528     la_br &append_pasted_left_loop
   3529     b
   3530 :append_pasted_left_done
   3531 
   3532     # ---- Copy right bytes: paste_scratch[left.len..total_len) <- right.text_ptr ----
   3533     la_t0 &paste_right_ptr
   3534     ld_t0,t0,0
   3535     la_t1 &paste_right_len
   3536     ld_t1,t1,0
   3537     la_t2 &paste_scratch_ptr
   3538     ld_t2,t2,0
   3539     la_a3 &paste_left_len
   3540     ld_a3,a3,0
   3541     add_t2,t2,a3              # t2 = &paste_scratch[left.len]
   3542     li_a0 %0 %0
   3543 :append_pasted_right_loop
   3544     la_br &append_pasted_right_done
   3545     beq_a0,t1
   3546     add_a1,t0,a0
   3547     lb_a1,a1,0
   3548     add_a2,t2,a0
   3549     sb_a1,a2,0
   3550     addi_a0,a0,1
   3551     la_br &append_pasted_right_loop
   3552     b
   3553 :append_pasted_right_done
   3554 
   3555     # ---- text_ptr = append_text(&paste_scratch, total_len) ----
   3556     la_a0 &paste_scratch_ptr
   3557     ld_a0,a0,0
   3558     la_a1 &paste_total_len
   3559     ld_a1,a1,0
   3560     la_br &append_text
   3561     call
   3562     # a0 = text_ptr (returned)
   3563 
   3564     # ---- *dst = { TOK_WORD, text_ptr, total_len } ----
   3565     la_t0 &paste_dst_save
   3566     ld_t0,t0,0
   3567     li_a2 TOK_WORD
   3568     st_a2,t0,0
   3569     st_a0,t0,8
   3570     la_a1 &paste_total_len
   3571     ld_a1,a1,0
   3572     st_a1,t0,16
   3573 
   3574     eret
   3575 
   3576 ## paste_pool_range(a0=mark) -> void (fatal on bad paste)
   3577 ## In-place compactor over expand_pool[mark..pool_used). For each TOK_PASTE,
   3578 ## paste (prev, next) into prev via append_pasted_token and skip both the
   3579 ## PASTE and the next token. Copy other tokens forward. Update pool_used to
   3580 ## the new end. Fatal (err_bad_macro_header — closest "bad input" label) if
   3581 ## ## is first, last, or adjacent to NEWLINE/PASTE.
   3582 :paste_pool_range
   3583     enter_0
   3584 
   3585     # ---- start = expand_pool + mark ----
   3586     la_t0 &expand_pool_ptr
   3587     ld_t0,t0,0
   3588     add_t0,t0,a0
   3589     la_t1 &paste_start
   3590     st_t0,t1,0
   3591     # paste_in = start
   3592     la_t1 &paste_in
   3593     st_t0,t1,0
   3594     # paste_out = start
   3595     la_t1 &paste_out
   3596     st_t0,t1,0
   3597 
   3598     # ---- end = expand_pool + pool_used ----
   3599     la_t1 &pool_used
   3600     ld_t2,t1,0
   3601     la_t1 &expand_pool_ptr
   3602     ld_t1,t1,0
   3603     add_t2,t1,t2
   3604     la_t1 &paste_end
   3605     st_t2,t1,0
   3606 
   3607 :paste_pool_loop
   3608     # in = paste_in; end = paste_end; if (in == end) done
   3609     la_a0 &paste_in
   3610     ld_t0,a0,0
   3611     la_a1 &paste_end
   3612     ld_t1,a1,0
   3613     la_br &paste_pool_done
   3614     beq_t0,t1
   3615 
   3616     # kind = in->kind
   3617     ld_a2,t0,0
   3618     li_a3 TOK_PASTE
   3619     la_br &paste_pool_handle_paste
   3620     beq_a2,a3
   3621 
   3622     # ---- non-PASTE: copy *in to *out, advance both by 24 ----
   3623     la_a0 &paste_out
   3624     ld_t2,a0,0
   3625     # if (in == out) skip the copy
   3626     la_br &paste_pool_skip_copy
   3627     beq_t0,t2
   3628     ld_a3,t0,0
   3629     st_a3,t2,0
   3630     ld_a3,t0,8
   3631     st_a3,t2,8
   3632     ld_a3,t0,16
   3633     st_a3,t2,16
   3634 :paste_pool_skip_copy
   3635     addi_t0,t0,24
   3636     addi_t2,t2,24
   3637     la_a0 &paste_in
   3638     st_t0,a0,0
   3639     la_a0 &paste_out
   3640     st_t2,a0,0
   3641     la_br &paste_pool_loop
   3642     b
   3643 
   3644 :paste_pool_handle_paste
   3645     # ---- TOK_PASTE handling ----
   3646     # Validate:
   3647     #   out == start                     -> ## is first (fatal)
   3648     la_a0 &paste_out
   3649     ld_t1,a0,0
   3650     la_a1 &paste_start
   3651     ld_t2,a1,0
   3652     la_br &err_bad_macro_header
   3653     beq_t1,t2
   3654 
   3655     #   in+1 >= end                       -> ## is last (fatal)
   3656     # Equivalent: in+24 >= end, i.e. !(in+24 < end).
   3657     addi_t0,t0,24                # t0 = in + 24 (right operand ptr)
   3658     la_a1 &paste_end
   3659     ld_t2,a1,0
   3660     # fatal if (in+1) >= end, i.e. if (in+24) >= end. blt branches when
   3661     # left < right, so branch over fatal when (in+24) < end.
   3662     la_br &paste_pool_paste_right_in_range
   3663     blt_t0,t2
   3664     la_br &err_bad_macro_header
   3665     b
   3666 :paste_pool_paste_right_in_range
   3667     # t0 currently = in+24 (right operand)
   3668     # Validate (out-1)->kind not in {NEWLINE, PASTE}.
   3669     # out is in t1; out-1 = t1 - 24. (out-1)->kind = *(t1-24+0).
   3670     # Use mem offset: ld with offset NEG24.
   3671     ld_a2,t1,neg24
   3672     li_a3 TOK_NEWLINE
   3673     la_br &err_bad_macro_header
   3674     beq_a2,a3
   3675     li_a3 TOK_PASTE
   3676     la_br &err_bad_macro_header
   3677     beq_a2,a3
   3678 
   3679     # Validate (in+1)->kind not in {NEWLINE, PASTE}.
   3680     # t0 = in+24 (right operand), so kind = *(t0+0).
   3681     ld_a2,t0,0
   3682     li_a3 TOK_NEWLINE
   3683     la_br &err_bad_macro_header
   3684     beq_a2,a3
   3685     li_a3 TOK_PASTE
   3686     la_br &err_bad_macro_header
   3687     beq_a2,a3
   3688 
   3689     # ---- append_pasted_token(out-1, out-1, in+1) ----
   3690     # t1 = out, t0 = in+1 (right operand).
   3691     addi_t1,t1,neg24             # t1 = out - 1 (left = dst)
   3692     mov_a0,t1
   3693     mov_a1,t1
   3694     mov_a2,t0
   3695     la_br &append_pasted_token
   3696     call
   3697 
   3698     # in += 48 (skip ## and the right operand). Out is unchanged.
   3699     la_a0 &paste_in
   3700     ld_t0,a0,0
   3701     addi_t0,t0,48
   3702     st_t0,a0,0
   3703 
   3704     la_br &paste_pool_loop
   3705     b
   3706 
   3707 :paste_pool_done
   3708     # pool_used = (out - expand_pool)
   3709     la_a0 &paste_out
   3710     ld_t0,a0,0
   3711     la_a1 &expand_pool_ptr
   3712     ld_a1,a1,0
   3713     sub_t0,t0,a1
   3714     la_a1 &pool_used
   3715     st_t0,a1,0
   3716     eret
   3717 
   3718 ## ============================================================================
   3719 ## --- Integer atoms + S-expression evaluator ---------------------------------
   3720 ## ============================================================================
   3721 
   3722 ## parse_int_token(a0=tok) -> a0 = i64 (fatal on bad). Leaf.
   3723 ## Accepts decimal (optional leading '-') and 0x-prefixed hex. Positive
   3724 ## values are accumulated as u64 and reinterpreted as i64, so values with
   3725 ## the high bit set wrap to negative i64.
   3726 ##
   3727 ## Register usage (leaf, no calls):
   3728 ##   t0 = src ptr (cursor into text)
   3729 ##   t1 = end ptr (text + len)
   3730 ##   t2 = current byte
   3731 ##   a0 = accumulator (return)
   3732 ##   a1 = negative flag (0/1)
   3733 ##   a2 = scratch (digit, multiplier)
   3734 ##   a3 = scratch (compare value)
   3735 :parse_int_token
   3736     # if (tok->kind != TOK_WORD) fatal
   3737     ld_t0,a0,0
   3738     li_t1 TOK_WORD
   3739     la_br &err_bad_macro_header
   3740     bne_t0,t1
   3741 
   3742     # src = tok->text_ptr; len = tok->text_len; end = src + len
   3743     ld_t0,a0,8
   3744     ld_t1,a0,16
   3745 
   3746     # if (len <= 0) fatal
   3747     la_br &err_bad_macro_header
   3748     beqz_t1
   3749     add_t1,t0,t1
   3750 
   3751     # negative = 0
   3752     li_a1 %0 %0
   3753 
   3754     # if (*src == '-') { negative = 1; src++; if (src == end) fatal }
   3755     lb_t2,t0,0
   3756     li_a3 %45 %0
   3757     la_br &pit_after_sign
   3758     bne_t2,a3
   3759     li_a1 %1 %0
   3760     addi_t0,t0,1
   3761     la_br &err_bad_macro_header
   3762     beq_t0,t1
   3763 
   3764 :pit_after_sign
   3765     # accumulator = 0
   3766     li_a0 %0 %0
   3767 
   3768     # check for 0x / 0X prefix: need at least 2 chars left
   3769     mov_a2,t1
   3770     sub_a2,a2,t0
   3771     li_a3 %2 %0
   3772     la_br &pit_decimal
   3773     blt_a2,a3
   3774 
   3775     # if (src[0] == '0' && (src[1] == 'x' || src[1] == 'X')) -> hex
   3776     lb_t2,t0,0
   3777     li_a3 %48 %0
   3778     la_br &pit_decimal
   3779     bne_t2,a3
   3780     addi_a2,t0,1
   3781     lb_a2,a2,0
   3782     li_a3 %120 %0
   3783     la_br &pit_hex_start
   3784     beq_a2,a3
   3785     li_a3 %88 %0
   3786     la_br &pit_hex_start
   3787     beq_a2,a3
   3788     la_br &pit_decimal
   3789     b
   3790 
   3791 :pit_hex_start
   3792     # consume "0x"; require at least one hex digit after
   3793     addi_t0,t0,2
   3794     la_br &err_bad_macro_header
   3795     beq_t0,t1
   3796 :pit_hex_loop
   3797     la_br &pit_finish
   3798     beq_t0,t1
   3799     lb_t2,t0,0
   3800 
   3801     # 0..9
   3802     li_a3 %48 %0
   3803     la_br &pit_hex_check_lower
   3804     blt_t2,a3
   3805     li_a3 %57 %0
   3806     la_br &pit_hex_check_lower
   3807     blt_a3,t2
   3808     # digit = c - '0'
   3809     addi_a2,t2,neg48
   3810     la_br &pit_hex_accum
   3811     b
   3812 
   3813 :pit_hex_check_lower
   3814     # 'a'..'f'
   3815     li_a3 %97 %0
   3816     la_br &pit_hex_check_upper
   3817     blt_t2,a3
   3818     li_a3 %102 %0
   3819     la_br &pit_hex_check_upper
   3820     blt_a3,t2
   3821     # digit = (c - 'a') + 10
   3822     li_a3 %97 %0
   3823     sub_a2,t2,a3
   3824     addi_a2,a2,8
   3825     addi_a2,a2,2
   3826     la_br &pit_hex_accum
   3827     b
   3828 
   3829 :pit_hex_check_upper
   3830     # 'A'..'F'
   3831     li_a3 %65 %0
   3832     la_br &err_bad_macro_header
   3833     blt_t2,a3
   3834     li_a3 %70 %0
   3835     la_br &err_bad_macro_header
   3836     blt_a3,t2
   3837     # digit = (c - 'A') + 10
   3838     li_a3 %65 %0
   3839     sub_a2,t2,a3
   3840     addi_a2,a2,8
   3841     addi_a2,a2,2
   3842 
   3843 :pit_hex_accum
   3844     # accum = (accum << 4) | digit
   3845     shli_a0,a0,4
   3846     or_a0,a0,a2
   3847     addi_t0,t0,1
   3848     la_br &pit_hex_loop
   3849     b
   3850 
   3851 :pit_decimal
   3852     # decimal loop: accum = accum * 10 + digit
   3853     # (caller already ensured len > 0 and that src points to first digit)
   3854 :pit_decimal_loop
   3855     la_br &pit_finish
   3856     beq_t0,t1
   3857     lb_t2,t0,0
   3858     li_a3 %48 %0
   3859     la_br &err_bad_macro_header
   3860     blt_t2,a3
   3861     li_a3 %57 %0
   3862     la_br &err_bad_macro_header
   3863     blt_a3,t2
   3864     # accum = accum * 10
   3865     li_a3 %10 %0
   3866     mul_a0,a0,a3
   3867     # digit = c - '0'; accum += digit
   3868     addi_a2,t2,neg48
   3869     add_a0,a0,a2
   3870     addi_t0,t0,1
   3871     la_br &pit_decimal_loop
   3872     b
   3873 
   3874 :pit_finish
   3875     # if (negative) accum = 0 - accum
   3876     la_br &pit_done
   3877     beqz_a1
   3878     li_a3 %0 %0
   3879     sub_a0,a3,a0
   3880 :pit_done
   3881     ret
   3882 
   3883 ## expr_op_code(a0=tok) -> a0 = EXPR_ADD..EXPR_STRLEN, or EXPR_INVALID.
   3884 ## Accepts operator tokens: +  -  *  /  %  <<  >>  &  |  ^  ~  =  !=
   3885 ## <  <=  >  >=  strlen. Non-WORD tok or unknown operator -> EXPR_INVALID.
   3886 ##
   3887 ## tok_eq_const is a leaf but clobbers a0..a3,t0..t2; spill tok to eoc_tok
   3888 ## once, reload before each compare. Needs an enter_0 frame because it
   3889 ## issues `call` instructions (aarch64 CALL writes LR).
   3890 :expr_op_code
   3891     enter_0
   3892     # spill tok; reject non-WORD up front
   3893     la_a1 &eoc_tok
   3894     st_a0,a1,0
   3895     ld_t0,a0,0
   3896     li_t1 TOK_WORD
   3897     la_br &eoc_invalid
   3898     bne_t0,t1
   3899 
   3900     # "+" -> EXPR_ADD
   3901     la_a0 &eoc_tok
   3902     ld_a0,a0,0
   3903     la_a1 &op_plus
   3904     li_a2 %1 %0
   3905     la_br &tok_eq_const
   3906     call
   3907     la_br &eoc_add
   3908     bnez_a0
   3909 
   3910     # "-" -> EXPR_SUB
   3911     la_a0 &eoc_tok
   3912     ld_a0,a0,0
   3913     la_a1 &op_minus
   3914     li_a2 %1 %0
   3915     la_br &tok_eq_const
   3916     call
   3917     la_br &eoc_sub
   3918     bnez_a0
   3919 
   3920     # "*" -> EXPR_MUL
   3921     la_a0 &eoc_tok
   3922     ld_a0,a0,0
   3923     la_a1 &op_star
   3924     li_a2 %1 %0
   3925     la_br &tok_eq_const
   3926     call
   3927     la_br &eoc_mul
   3928     bnez_a0
   3929 
   3930     # "/" -> EXPR_DIV
   3931     la_a0 &eoc_tok
   3932     ld_a0,a0,0
   3933     la_a1 &op_slash
   3934     li_a2 %1 %0
   3935     la_br &tok_eq_const
   3936     call
   3937     la_br &eoc_div
   3938     bnez_a0
   3939 
   3940     # "%" -> EXPR_MOD
   3941     la_a0 &eoc_tok
   3942     ld_a0,a0,0
   3943     la_a1 &op_percent
   3944     li_a2 %1 %0
   3945     la_br &tok_eq_const
   3946     call
   3947     la_br &eoc_mod
   3948     bnez_a0
   3949 
   3950     # "<<" -> EXPR_SHL
   3951     la_a0 &eoc_tok
   3952     ld_a0,a0,0
   3953     la_a1 &op_shl
   3954     li_a2 %2 %0
   3955     la_br &tok_eq_const
   3956     call
   3957     la_br &eoc_shl
   3958     bnez_a0
   3959 
   3960     # ">>" -> EXPR_SHR
   3961     la_a0 &eoc_tok
   3962     ld_a0,a0,0
   3963     la_a1 &op_shr
   3964     li_a2 %2 %0
   3965     la_br &tok_eq_const
   3966     call
   3967     la_br &eoc_shr
   3968     bnez_a0
   3969 
   3970     # "&" -> EXPR_AND
   3971     la_a0 &eoc_tok
   3972     ld_a0,a0,0
   3973     la_a1 &op_amp
   3974     li_a2 %1 %0
   3975     la_br &tok_eq_const
   3976     call
   3977     la_br &eoc_and
   3978     bnez_a0
   3979 
   3980     # "|" -> EXPR_OR
   3981     la_a0 &eoc_tok
   3982     ld_a0,a0,0
   3983     la_a1 &op_bar
   3984     li_a2 %1 %0
   3985     la_br &tok_eq_const
   3986     call
   3987     la_br &eoc_or
   3988     bnez_a0
   3989 
   3990     # "^" -> EXPR_XOR
   3991     la_a0 &eoc_tok
   3992     ld_a0,a0,0
   3993     la_a1 &op_caret
   3994     li_a2 %1 %0
   3995     la_br &tok_eq_const
   3996     call
   3997     la_br &eoc_xor
   3998     bnez_a0
   3999 
   4000     # "~" -> EXPR_NOT
   4001     la_a0 &eoc_tok
   4002     ld_a0,a0,0
   4003     la_a1 &op_tilde
   4004     li_a2 %1 %0
   4005     la_br &tok_eq_const
   4006     call
   4007     la_br &eoc_not
   4008     bnez_a0
   4009 
   4010     # "=" -> EXPR_EQ
   4011     la_a0 &eoc_tok
   4012     ld_a0,a0,0
   4013     la_a1 &op_eq
   4014     li_a2 %1 %0
   4015     la_br &tok_eq_const
   4016     call
   4017     la_br &eoc_eq
   4018     bnez_a0
   4019 
   4020     # "!=" -> EXPR_NE
   4021     la_a0 &eoc_tok
   4022     ld_a0,a0,0
   4023     la_a1 &op_ne
   4024     li_a2 %2 %0
   4025     la_br &tok_eq_const
   4026     call
   4027     la_br &eoc_ne
   4028     bnez_a0
   4029 
   4030     # "<=" -> EXPR_LE (check before single "<")
   4031     la_a0 &eoc_tok
   4032     ld_a0,a0,0
   4033     la_a1 &op_le
   4034     li_a2 %2 %0
   4035     la_br &tok_eq_const
   4036     call
   4037     la_br &eoc_le
   4038     bnez_a0
   4039 
   4040     # "<" -> EXPR_LT
   4041     la_a0 &eoc_tok
   4042     ld_a0,a0,0
   4043     la_a1 &op_lt
   4044     li_a2 %1 %0
   4045     la_br &tok_eq_const
   4046     call
   4047     la_br &eoc_lt
   4048     bnez_a0
   4049 
   4050     # ">=" -> EXPR_GE (check before single ">")
   4051     la_a0 &eoc_tok
   4052     ld_a0,a0,0
   4053     la_a1 &op_ge
   4054     li_a2 %2 %0
   4055     la_br &tok_eq_const
   4056     call
   4057     la_br &eoc_ge
   4058     bnez_a0
   4059 
   4060     # ">" -> EXPR_GT
   4061     la_a0 &eoc_tok
   4062     ld_a0,a0,0
   4063     la_a1 &op_gt
   4064     li_a2 %1 %0
   4065     la_br &tok_eq_const
   4066     call
   4067     la_br &eoc_gt
   4068     bnez_a0
   4069 
   4070     # "strlen" -> EXPR_STRLEN
   4071     la_a0 &eoc_tok
   4072     ld_a0,a0,0
   4073     la_a1 &op_strlen
   4074     li_a2 %6 %0
   4075     la_br &tok_eq_const
   4076     call
   4077     la_br &eoc_strlen
   4078     bnez_a0
   4079 
   4080 :eoc_invalid
   4081     li_a0 EXPR_INVALID
   4082     eret
   4083 :eoc_add
   4084     li_a0 EXPR_ADD
   4085     eret
   4086 :eoc_sub
   4087     li_a0 EXPR_SUB
   4088     eret
   4089 :eoc_mul
   4090     li_a0 EXPR_MUL
   4091     eret
   4092 :eoc_div
   4093     li_a0 EXPR_DIV
   4094     eret
   4095 :eoc_mod
   4096     li_a0 EXPR_MOD
   4097     eret
   4098 :eoc_shl
   4099     li_a0 EXPR_SHL
   4100     eret
   4101 :eoc_shr
   4102     li_a0 EXPR_SHR
   4103     eret
   4104 :eoc_and
   4105     li_a0 EXPR_AND
   4106     eret
   4107 :eoc_or
   4108     li_a0 EXPR_OR
   4109     eret
   4110 :eoc_xor
   4111     li_a0 EXPR_XOR
   4112     eret
   4113 :eoc_not
   4114     li_a0 EXPR_NOT
   4115     eret
   4116 :eoc_eq
   4117     li_a0 EXPR_EQ
   4118     eret
   4119 :eoc_ne
   4120     li_a0 EXPR_NE
   4121     eret
   4122 :eoc_lt
   4123     li_a0 EXPR_LT
   4124     eret
   4125 :eoc_le
   4126     li_a0 EXPR_LE
   4127     eret
   4128 :eoc_gt
   4129     li_a0 EXPR_GT
   4130     eret
   4131 :eoc_ge
   4132     li_a0 EXPR_GE
   4133     eret
   4134 :eoc_strlen
   4135     li_a0 EXPR_STRLEN
   4136     eret
   4137 
   4138 ## apply_expr_op(a0=op_code, a1=args_ptr, a2=argc) -> a0 = i64 result
   4139 ## Reduce args[0..argc) per op:
   4140 ##   + * & | $      variadic, argc >= 1
   4141 ##   -              argc >= 1 (argc == 1 is negate, else left-assoc subtract)
   4142 ##   / %            binary, div-by-zero fatal
   4143 ##   << >>          binary (>> is arithmetic)
   4144 ##   ~              unary
   4145 ##   = == != < <= > >=  binary
   4146 ## Fatal on wrong argc or EXPR_INVALID.
   4147 ##
   4148 ## Calls aeo_require_* helpers via `call`, so it needs a frame.
   4149 ## State held in BSS scratch (aeo_op/args/argc/acc/i) since loops trash registers.
   4150 :apply_expr_op
   4151     enter_0
   4152     # spill op, args, argc to BSS
   4153     la_a3 &aeo_op
   4154     st_a0,a3,0
   4155     la_a3 &aeo_args
   4156     st_a1,a3,0
   4157     la_a3 &aeo_argc
   4158     st_a2,a3,0
   4159 
   4160     # dispatch: compare op against each EXPR_* and branch to its handler
   4161     li_t0 EXPR_ADD
   4162     la_br &aeo_do_add
   4163     beq_a0,t0
   4164     li_t0 EXPR_SUB
   4165     la_br &aeo_do_sub
   4166     beq_a0,t0
   4167     li_t0 EXPR_MUL
   4168     la_br &aeo_do_mul
   4169     beq_a0,t0
   4170     li_t0 EXPR_DIV
   4171     la_br &aeo_do_div
   4172     beq_a0,t0
   4173     li_t0 EXPR_MOD
   4174     la_br &aeo_do_mod
   4175     beq_a0,t0
   4176     li_t0 EXPR_SHL
   4177     la_br &aeo_do_shl
   4178     beq_a0,t0
   4179     li_t0 EXPR_SHR
   4180     la_br &aeo_do_shr
   4181     beq_a0,t0
   4182     li_t0 EXPR_AND
   4183     la_br &aeo_do_and
   4184     beq_a0,t0
   4185     li_t0 EXPR_OR
   4186     la_br &aeo_do_or
   4187     beq_a0,t0
   4188     li_t0 EXPR_XOR
   4189     la_br &aeo_do_xor
   4190     beq_a0,t0
   4191     li_t0 EXPR_NOT
   4192     la_br &aeo_do_not
   4193     beq_a0,t0
   4194     li_t0 EXPR_EQ
   4195     la_br &aeo_do_eq
   4196     beq_a0,t0
   4197     li_t0 EXPR_NE
   4198     la_br &aeo_do_ne
   4199     beq_a0,t0
   4200     li_t0 EXPR_LT
   4201     la_br &aeo_do_lt
   4202     beq_a0,t0
   4203     li_t0 EXPR_LE
   4204     la_br &aeo_do_le
   4205     beq_a0,t0
   4206     li_t0 EXPR_GT
   4207     la_br &aeo_do_gt
   4208     beq_a0,t0
   4209     li_t0 EXPR_GE
   4210     la_br &aeo_do_ge
   4211     beq_a0,t0
   4212     # EXPR_INVALID or unknown
   4213     la_br &err_bad_macro_header
   4214     b
   4215 
   4216 ## --- shared helpers for variadic folds ----------------------------------
   4217 ## aeo_require_argc_ge1: branch to err if argc < 1
   4218 ## aeo_require_argc_eq2: branch to err if argc != 2
   4219 ## aeo_load_arg0_to_acc: acc = args[0]; i = 1
   4220 
   4221 :aeo_do_add
   4222     la_br &aeo_require_argc_ge1
   4223     call
   4224     la_br &aeo_load_arg0_to_acc
   4225     call
   4226 :aeo_add_loop
   4227     la_a0 &aeo_i
   4228     ld_t0,a0,0
   4229     la_a1 &aeo_argc
   4230     ld_t1,a1,0
   4231     la_br &aeo_finish
   4232     beq_t0,t1
   4233     # acc += args[i]
   4234     la_a0 &aeo_args
   4235     ld_a1,a0,0
   4236     shli_t2,t0,3
   4237     add_t2,a1,t2
   4238     ld_a2,t2,0
   4239     la_a0 &aeo_acc
   4240     ld_a3,a0,0
   4241     add_a3,a3,a2
   4242     st_a3,a0,0
   4243     addi_t0,t0,1
   4244     la_a1 &aeo_i
   4245     st_t0,a1,0
   4246     la_br &aeo_add_loop
   4247     b
   4248 
   4249 :aeo_do_sub
   4250     la_br &aeo_require_argc_ge1
   4251     call
   4252     # if (argc == 1) acc = -args[0]; else acc = args[0]
   4253     la_a0 &aeo_argc
   4254     ld_t0,a0,0
   4255     li_t1 %1 %0
   4256     la_br &aeo_sub_unary
   4257     beq_t0,t1
   4258     la_br &aeo_load_arg0_to_acc
   4259     call
   4260 :aeo_sub_loop
   4261     la_a0 &aeo_i
   4262     ld_t0,a0,0
   4263     la_a1 &aeo_argc
   4264     ld_t1,a1,0
   4265     la_br &aeo_finish
   4266     beq_t0,t1
   4267     la_a0 &aeo_args
   4268     ld_a1,a0,0
   4269     shli_t2,t0,3
   4270     add_t2,a1,t2
   4271     ld_a2,t2,0
   4272     la_a0 &aeo_acc
   4273     ld_a3,a0,0
   4274     sub_a3,a3,a2
   4275     st_a3,a0,0
   4276     addi_t0,t0,1
   4277     la_a1 &aeo_i
   4278     st_t0,a1,0
   4279     la_br &aeo_sub_loop
   4280     b
   4281 :aeo_sub_unary
   4282     # acc = 0 - args[0]
   4283     la_a0 &aeo_args
   4284     ld_a1,a0,0
   4285     ld_a2,a1,0
   4286     li_a3 %0 %0
   4287     sub_a3,a3,a2
   4288     la_a0 &aeo_acc
   4289     st_a3,a0,0
   4290     la_br &aeo_finish
   4291     b
   4292 
   4293 :aeo_do_mul
   4294     la_br &aeo_require_argc_ge1
   4295     call
   4296     la_br &aeo_load_arg0_to_acc
   4297     call
   4298 :aeo_mul_loop
   4299     la_a0 &aeo_i
   4300     ld_t0,a0,0
   4301     la_a1 &aeo_argc
   4302     ld_t1,a1,0
   4303     la_br &aeo_finish
   4304     beq_t0,t1
   4305     la_a0 &aeo_args
   4306     ld_a1,a0,0
   4307     shli_t2,t0,3
   4308     add_t2,a1,t2
   4309     ld_a2,t2,0
   4310     la_a0 &aeo_acc
   4311     ld_a3,a0,0
   4312     mul_a3,a3,a2
   4313     st_a3,a0,0
   4314     addi_t0,t0,1
   4315     la_a1 &aeo_i
   4316     st_t0,a1,0
   4317     la_br &aeo_mul_loop
   4318     b
   4319 
   4320 :aeo_do_div
   4321     la_br &aeo_require_argc_eq2
   4322     call
   4323     la_a0 &aeo_args
   4324     ld_a1,a0,0
   4325     ld_a2,a1,0
   4326     ld_a3,a1,8
   4327     # if (args[1] == 0) fatal
   4328     la_br &err_bad_macro_header
   4329     beqz_a3
   4330     div_a2,a2,a3
   4331     la_a0 &aeo_acc
   4332     st_a2,a0,0
   4333     la_br &aeo_finish
   4334     b
   4335 
   4336 :aeo_do_mod
   4337     la_br &aeo_require_argc_eq2
   4338     call
   4339     la_a0 &aeo_args
   4340     ld_a1,a0,0
   4341     ld_a2,a1,0
   4342     ld_a3,a1,8
   4343     la_br &err_bad_macro_header
   4344     beqz_a3
   4345     rem_a2,a2,a3
   4346     la_a0 &aeo_acc
   4347     st_a2,a0,0
   4348     la_br &aeo_finish
   4349     b
   4350 
   4351 :aeo_do_shl
   4352     la_br &aeo_require_argc_eq2
   4353     call
   4354     la_a0 &aeo_args
   4355     ld_a1,a0,0
   4356     ld_a2,a1,0
   4357     ld_a3,a1,8
   4358     shl_a2,a2,a3
   4359     la_a0 &aeo_acc
   4360     st_a2,a0,0
   4361     la_br &aeo_finish
   4362     b
   4363 
   4364 :aeo_do_shr
   4365     la_br &aeo_require_argc_eq2
   4366     call
   4367     la_a0 &aeo_args
   4368     ld_a1,a0,0
   4369     ld_a2,a1,0
   4370     ld_a3,a1,8
   4371     sar_a2,a2,a3
   4372     la_a0 &aeo_acc
   4373     st_a2,a0,0
   4374     la_br &aeo_finish
   4375     b
   4376 
   4377 :aeo_do_and
   4378     la_br &aeo_require_argc_ge1
   4379     call
   4380     la_br &aeo_load_arg0_to_acc
   4381     call
   4382 :aeo_and_loop
   4383     la_a0 &aeo_i
   4384     ld_t0,a0,0
   4385     la_a1 &aeo_argc
   4386     ld_t1,a1,0
   4387     la_br &aeo_finish
   4388     beq_t0,t1
   4389     la_a0 &aeo_args
   4390     ld_a1,a0,0
   4391     shli_t2,t0,3
   4392     add_t2,a1,t2
   4393     ld_a2,t2,0
   4394     la_a0 &aeo_acc
   4395     ld_a3,a0,0
   4396     and_a3,a3,a2
   4397     st_a3,a0,0
   4398     addi_t0,t0,1
   4399     la_a1 &aeo_i
   4400     st_t0,a1,0
   4401     la_br &aeo_and_loop
   4402     b
   4403 
   4404 :aeo_do_or
   4405     la_br &aeo_require_argc_ge1
   4406     call
   4407     la_br &aeo_load_arg0_to_acc
   4408     call
   4409 :aeo_or_loop
   4410     la_a0 &aeo_i
   4411     ld_t0,a0,0
   4412     la_a1 &aeo_argc
   4413     ld_t1,a1,0
   4414     la_br &aeo_finish
   4415     beq_t0,t1
   4416     la_a0 &aeo_args
   4417     ld_a1,a0,0
   4418     shli_t2,t0,3
   4419     add_t2,a1,t2
   4420     ld_a2,t2,0
   4421     la_a0 &aeo_acc
   4422     ld_a3,a0,0
   4423     or_a3,a3,a2
   4424     st_a3,a0,0
   4425     addi_t0,t0,1
   4426     la_a1 &aeo_i
   4427     st_t0,a1,0
   4428     la_br &aeo_or_loop
   4429     b
   4430 
   4431 :aeo_do_xor
   4432     la_br &aeo_require_argc_ge1
   4433     call
   4434     la_br &aeo_load_arg0_to_acc
   4435     call
   4436 :aeo_xor_loop
   4437     la_a0 &aeo_i
   4438     ld_t0,a0,0
   4439     la_a1 &aeo_argc
   4440     ld_t1,a1,0
   4441     la_br &aeo_finish
   4442     beq_t0,t1
   4443     la_a0 &aeo_args
   4444     ld_a1,a0,0
   4445     shli_t2,t0,3
   4446     add_t2,a1,t2
   4447     ld_a2,t2,0
   4448     la_a0 &aeo_acc
   4449     ld_a3,a0,0
   4450     xor_a3,a3,a2
   4451     st_a3,a0,0
   4452     addi_t0,t0,1
   4453     la_a1 &aeo_i
   4454     st_t0,a1,0
   4455     la_br &aeo_xor_loop
   4456     b
   4457 
   4458 :aeo_do_not
   4459     # require argc == 1
   4460     la_a0 &aeo_argc
   4461     ld_t0,a0,0
   4462     li_t1 %1 %0
   4463     la_br &err_bad_macro_header
   4464     bne_t0,t1
   4465     la_a0 &aeo_args
   4466     ld_a1,a0,0
   4467     ld_a2,a1,0
   4468     # ~x = x XOR -1
   4469     li_a3 %1 %0
   4470     li_t0 %0 %0
   4471     sub_a3,t0,a3
   4472     xor_a2,a2,a3
   4473     la_a0 &aeo_acc
   4474     st_a2,a0,0
   4475     la_br &aeo_finish
   4476     b
   4477 
   4478 ## --- comparison ops: return 0 or 1 ----------------------------------------
   4479 ## EQ:  args[0] == args[1]
   4480 ## NE:  args[0] != args[1]
   4481 ## LT:  args[0] <  args[1]   (signed)
   4482 ## LE:  args[0] <= args[1]   (signed)
   4483 ## GT:  args[0] >  args[1]   (signed)
   4484 ## GE:  args[0] >= args[1]   (signed)
   4485 
   4486 :aeo_do_eq
   4487     la_br &aeo_require_argc_eq2
   4488     call
   4489     la_a0 &aeo_args
   4490     ld_a1,a0,0
   4491     ld_a2,a1,0
   4492     ld_a3,a1,8
   4493     li_t0 %0 %0
   4494     la_br &aeo_cmp_store_zero
   4495     bne_a2,a3
   4496     li_t0 %1 %0
   4497 :aeo_cmp_store_zero
   4498     la_a0 &aeo_acc
   4499     st_t0,a0,0
   4500     la_br &aeo_finish
   4501     b
   4502 
   4503 :aeo_do_ne
   4504     la_br &aeo_require_argc_eq2
   4505     call
   4506     la_a0 &aeo_args
   4507     ld_a1,a0,0
   4508     ld_a2,a1,0
   4509     ld_a3,a1,8
   4510     li_t0 %0 %0
   4511     la_br &aeo_cmp_store_zero1
   4512     beq_a2,a3
   4513     li_t0 %1 %0
   4514 :aeo_cmp_store_zero1
   4515     la_a0 &aeo_acc
   4516     st_t0,a0,0
   4517     la_br &aeo_finish
   4518     b
   4519 
   4520 :aeo_do_lt
   4521     la_br &aeo_require_argc_eq2
   4522     call
   4523     la_a0 &aeo_args
   4524     ld_a1,a0,0
   4525     ld_a2,a1,0
   4526     ld_a3,a1,8
   4527     li_t0 %1 %0
   4528     la_br &aeo_cmp_store_one
   4529     blt_a2,a3
   4530     li_t0 %0 %0
   4531 :aeo_cmp_store_one
   4532     la_a0 &aeo_acc
   4533     st_t0,a0,0
   4534     la_br &aeo_finish
   4535     b
   4536 
   4537 :aeo_do_le
   4538     # a[0] <= a[1]   <=>   !(a[1] < a[0])
   4539     la_br &aeo_require_argc_eq2
   4540     call
   4541     la_a0 &aeo_args
   4542     ld_a1,a0,0
   4543     ld_a2,a1,0
   4544     ld_a3,a1,8
   4545     li_t0 %0 %0
   4546     la_br &aeo_cmp_store_two
   4547     blt_a3,a2
   4548     li_t0 %1 %0
   4549 :aeo_cmp_store_two
   4550     la_a0 &aeo_acc
   4551     st_t0,a0,0
   4552     la_br &aeo_finish
   4553     b
   4554 
   4555 :aeo_do_gt
   4556     # a[0] > a[1]   <=>   a[1] < a[0]
   4557     la_br &aeo_require_argc_eq2
   4558     call
   4559     la_a0 &aeo_args
   4560     ld_a1,a0,0
   4561     ld_a2,a1,0
   4562     ld_a3,a1,8
   4563     li_t0 %1 %0
   4564     la_br &aeo_cmp_store_three
   4565     blt_a3,a2
   4566     li_t0 %0 %0
   4567 :aeo_cmp_store_three
   4568     la_a0 &aeo_acc
   4569     st_t0,a0,0
   4570     la_br &aeo_finish
   4571     b
   4572 
   4573 :aeo_do_ge
   4574     # a[0] >= a[1]   <=>   !(a[0] < a[1])
   4575     la_br &aeo_require_argc_eq2
   4576     call
   4577     la_a0 &aeo_args
   4578     ld_a1,a0,0
   4579     ld_a2,a1,0
   4580     ld_a3,a1,8
   4581     li_t0 %0 %0
   4582     la_br &aeo_cmp_store_four
   4583     blt_a2,a3
   4584     li_t0 %1 %0
   4585 :aeo_cmp_store_four
   4586     la_a0 &aeo_acc
   4587     st_t0,a0,0
   4588     la_br &aeo_finish
   4589     b
   4590 
   4591 :aeo_finish
   4592     la_a0 &aeo_acc
   4593     ld_a0,a0,0
   4594     eret
   4595 
   4596 ## helper: validate argc >= 1; fatal otherwise. (Returns to caller.)
   4597 :aeo_require_argc_ge1
   4598     la_a0 &aeo_argc
   4599     ld_t0,a0,0
   4600     li_t1 %1 %0
   4601     la_br &err_bad_macro_header
   4602     blt_t0,t1
   4603     ret
   4604 
   4605 ## helper: validate argc == 2; fatal otherwise.
   4606 :aeo_require_argc_eq2
   4607     la_a0 &aeo_argc
   4608     ld_t0,a0,0
   4609     li_t1 %2 %0
   4610     la_br &err_bad_macro_header
   4611     bne_t0,t1
   4612     ret
   4613 
   4614 ## helper: acc = args[0]; i = 1.
   4615 :aeo_load_arg0_to_acc
   4616     la_a0 &aeo_args
   4617     ld_a1,a0,0
   4618     ld_a2,a1,0
   4619     la_a0 &aeo_acc
   4620     st_a2,a0,0
   4621     li_t0 %1 %0
   4622     la_a0 &aeo_i
   4623     st_t0,a0,0
   4624     ret
   4625 
   4626 ## skip_expr_newlines(a0=pos, a1=end) -> a0 = new pos. Leaf.
   4627 ## Advance pos past consecutive TOK_NEWLINE tokens so expressions may span
   4628 ## lines.
   4629 :skip_expr_newlines
   4630 :sen_loop
   4631     # if (pos == end) done
   4632     la_br &sen_done
   4633     beq_a0,a1
   4634     # if (pos->kind != TOK_NEWLINE) done
   4635     ld_t0,a0,0
   4636     li_t1 TOK_NEWLINE
   4637     la_br &sen_done
   4638     bne_t0,t1
   4639     # pos += 24
   4640     addi_a0,a0,24
   4641     la_br &sen_loop
   4642     b
   4643 :sen_done
   4644     ret
   4645 
   4646 ## eval_expr_atom(a0=tok, a1=limit) -> void
   4647 ## Outputs via globals:
   4648 ##   eval_after_pos = token one past the consumed atom (or one past ')' for
   4649 ##                    a macro atom)
   4650 ##   eval_value     = the atom's i64 value
   4651 ##
   4652 ## If tok is a defined macro followed by TOK_LPAREN: expand_macro_tokens into
   4653 ## the pool at mark = pool_used, recursively eval_expr_range over the new
   4654 ## slice, require exactly one value (no trailing tokens), restore
   4655 ## pool_used = mark, and set eval_after_pos = emt_after_pos. Otherwise
   4656 ## parse_int_token(tok) and set eval_after_pos = tok + 24 bytes.
   4657 ##
   4658 ## CAVEAT: this path can recurse through eval_expr_range. Callers MUST
   4659 ## snapshot eval_after_pos / eval_value into local stack slots (via
   4660 ## enter_N) before any further call that might overwrite them.
   4661 ##
   4662 ## Stack-local layout (enter_40):
   4663 ##   sp+16  saved tok
   4664 ##   sp+24  saved limit
   4665 ##   sp+32  macro_ptr (find_macro result)
   4666 ##   sp+40  saved emt_after_pos
   4667 ##   sp+48  saved emt_mark
   4668 :eval_expr_atom
   4669     enter_40
   4670     st_a0,sp,0
   4671     st_a1,sp,8
   4672 
   4673     # macro_ptr = find_macro(tok)
   4674     la_br &find_macro
   4675     call
   4676     st_a0,sp,16
   4677 
   4678     # if (macro_ptr == 0) -> integer atom branch
   4679     la_br &eea_int_atom
   4680     beqz_a0
   4681 
   4682     # Paren-less 0-arg atom:
   4683     #   Take the macro-call branch if (tok+1 < limit AND (tok+1)->kind == TOK_LPAREN)
   4684     #   OR macro->param_count == 0. Otherwise fall through to int atom (unchanged).
   4685     ld_t0,sp,0
   4686     addi_t0,t0,24
   4687     ld_t1,sp,8
   4688     la_br &eea_check_zero_arg
   4689     blt_t1,t0
   4690     la_br &eea_check_zero_arg
   4691     beq_t0,t1
   4692     ld_t2,t0,0
   4693     li_a3 TOK_LPAREN
   4694     la_br &eea_check_zero_arg
   4695     bne_t2,a3
   4696     la_br &eea_do_macro
   4697     b
   4698 
   4699 :eea_check_zero_arg
   4700     # No trailing LPAREN. Take the macro branch only if param_count == 0.
   4701     ld_t0,sp,16
   4702     ld_t1,t0,16
   4703     la_br &eea_int_atom
   4704     bnez_t1
   4705 
   4706 :eea_do_macro
   4707     # Macro call branch:
   4708     #   expand_macro_tokens(tok, limit, macro_ptr)
   4709     ld_a0,sp,0
   4710     ld_a1,sp,8
   4711     ld_a2,sp,16
   4712     la_br &expand_macro_tokens
   4713     call
   4714 
   4715     # Snapshot emt outputs immediately.
   4716     la_a0 &emt_after_pos
   4717     ld_t0,a0,0
   4718     st_t0,sp,24
   4719     la_a0 &emt_mark
   4720     ld_t0,a0,0
   4721     st_t0,sp,32
   4722 
   4723     # If pool was not extended (pool_used == mark) -> bad expression.
   4724     la_a0 &pool_used
   4725     ld_t0,a0,0
   4726     ld_t1,sp,32
   4727     la_br &err_bad_macro_header
   4728     beq_t0,t1
   4729 
   4730     # eval_expr_range(expand_pool + mark, expand_pool + pool_used)
   4731     la_a0 &expand_pool_ptr
   4732     ld_a0,a0,0
   4733     ld_t1,sp,32
   4734     add_a0,a0,t1
   4735     la_a1 &expand_pool_ptr
   4736     ld_a1,a1,0
   4737     la_a2 &pool_used
   4738     ld_a2,a2,0
   4739     add_a1,a1,a2
   4740     la_br &eval_expr_range
   4741     call
   4742 
   4743     # eval_value = result
   4744     la_a1 &eval_value
   4745     st_a0,a1,0
   4746 
   4747     # restore pool_used = mark
   4748     la_a0 &pool_used
   4749     ld_t0,sp,32
   4750     st_t0,a0,0
   4751 
   4752     # eval_after_pos = saved emt_after_pos
   4753     la_a0 &eval_after_pos
   4754     ld_t0,sp,24
   4755     st_t0,a0,0
   4756 
   4757     eret
   4758 
   4759 :eea_int_atom
   4760     # parse_int_token(tok) -> i64
   4761     ld_a0,sp,0
   4762     la_br &parse_int_token
   4763     call
   4764     la_a1 &eval_value
   4765     st_a0,a1,0
   4766 
   4767     # eval_after_pos = tok + 24
   4768     ld_t0,sp,0
   4769     addi_t0,t0,24
   4770     la_a0 &eval_after_pos
   4771     st_t0,a0,0
   4772 
   4773     eret
   4774 
   4775 ## eval_expr_range(a0=start_tok, a1=end_tok) -> a0 = i64 result (fatal on bad)
   4776 ## Main S-expression evaluator loop, driven by the explicit ExprFrame stack
   4777 ## in expr_frames[] / expr_frame_top — NOT by P1 recursion (eval_expr_atom
   4778 ## can re-enter eval_expr_range through expand_macro_tokens, and a P1
   4779 ## recursion would defeat the bounded frame budget). Enforces exactly one
   4780 ## top-level value and no trailing tokens.
   4781 ## Fatal on: unmatched parens, > 16 frames deep, > 16 args per frame,
   4782 ## bad atom, bad operator.
   4783 ## Reads/writes: expr_frames, expr_frame_top.
   4784 ##
   4785 ## Stack-local layout (enter_56):
   4786 ##   sp+16  pos              Token*
   4787 ##   sp+24  end              Token*
   4788 ##   sp+32  value            i64 (most recent atom or rparen result)
   4789 ##   sp+40  result           i64 (set when have_result transitions to 1)
   4790 ##   sp+48  have_value       0/1
   4791 ##   sp+56  have_result      0/1
   4792 ##   sp+64  entry_frame_top  i64 (snapshot at entry; restored on exit;
   4793 ##                                used as the local base for stack checks)
   4794 :eval_expr_range
   4795     enter_56
   4796     st_a0,sp,0
   4797     st_a1,sp,8
   4798     li_t0 %0 %0
   4799     st_t0,sp,16
   4800     st_t0,sp,24
   4801     st_t0,sp,32
   4802     st_t0,sp,40
   4803     # entry_frame_top = expr_frame_top
   4804     la_a0 &expr_frame_top
   4805     ld_t0,a0,0
   4806     st_t0,sp,48
   4807 
   4808 :eer_loop
   4809     # If have_value, deliver it.
   4810     ld_t0,sp,32
   4811     la_br &eer_no_have_value
   4812     beqz_t0
   4813 
   4814     # have_value: feed into top frame, or set result.
   4815     la_a0 &expr_frame_top
   4816     ld_t0,a0,0
   4817     ld_t1,sp,48
   4818     la_br &eer_set_result
   4819     beq_t0,t1
   4820     # frame = &expr_frames[frame_top - 1]
   4821     addi_t0,t0,neg1
   4822     li_a1 M1PP_EXPR_FRAME_SIZE
   4823     mul_t0,t0,a1
   4824     la_a0 &expr_frames_ptr
   4825     ld_a0,a0,0
   4826     add_a0,a0,t0
   4827     # if (frame->argc >= MAX_PARAMS) fatal
   4828     li_a1 M1PP_EXPR_ARGC_OFF
   4829     add_a1,a0,a1
   4830     ld_t1,a1,0
   4831     li_a2 M1PP_MAX_PARAMS
   4832     la_br &err_bad_macro_header
   4833     blt_a2,t1
   4834     la_br &err_bad_macro_header
   4835     beq_t1,a2
   4836     # frame->args[argc] = value
   4837     li_a2 M1PP_EXPR_ARGS_OFF
   4838     add_a3,a0,a2
   4839     shli_a2,t1,3
   4840     add_a3,a3,a2
   4841     ld_t2,sp,16
   4842     st_t2,a3,0
   4843     # frame->argc++
   4844     addi_t1,t1,1
   4845     st_t1,a1,0
   4846     # have_value = 0
   4847     li_t0 %0 %0
   4848     st_t0,sp,32
   4849     la_br &eer_loop
   4850     b
   4851 
   4852 :eer_set_result
   4853     # No frame open; this value is the top-level result.
   4854     ld_t0,sp,40
   4855     la_br &err_bad_macro_header
   4856     bnez_t0
   4857     ld_t0,sp,16
   4858     st_t0,sp,24
   4859     li_t0 %1 %0
   4860     st_t0,sp,40
   4861     li_t0 %0 %0
   4862     st_t0,sp,32
   4863     la_br &eer_loop
   4864     b
   4865 
   4866 :eer_no_have_value
   4867     # skip_expr_newlines(pos, end)
   4868     ld_a0,sp,0
   4869     ld_a1,sp,8
   4870     la_br &skip_expr_newlines
   4871     call
   4872     st_a0,sp,0
   4873 
   4874     # if (pos >= end) break
   4875     ld_t0,sp,0
   4876     ld_t1,sp,8
   4877     la_br &eer_loop_done
   4878     beq_t0,t1
   4879 
   4880     # Dispatch on token kind.
   4881     ld_t2,t0,0
   4882     li_a3 TOK_LPAREN
   4883     la_br &eer_lparen
   4884     beq_t2,a3
   4885     li_a3 TOK_RPAREN
   4886     la_br &eer_rparen
   4887     beq_t2,a3
   4888 
   4889     # atom: eval_expr_atom(pos, end); value = eval_value; pos = eval_after_pos
   4890     ld_a0,sp,0
   4891     ld_a1,sp,8
   4892     la_br &eval_expr_atom
   4893     call
   4894     la_a0 &eval_value
   4895     ld_t0,a0,0
   4896     st_t0,sp,16
   4897     la_a0 &eval_after_pos
   4898     ld_t0,a0,0
   4899     st_t0,sp,0
   4900     li_t0 %1 %0
   4901     st_t0,sp,32
   4902     la_br &eer_loop
   4903     b
   4904 
   4905 :eer_lparen
   4906     # pos++
   4907     addi_t0,t0,24
   4908     st_t0,sp,0
   4909     # skip_expr_newlines
   4910     ld_a0,sp,0
   4911     ld_a1,sp,8
   4912     la_br &skip_expr_newlines
   4913     call
   4914     st_a0,sp,0
   4915     # if (pos >= end) fatal
   4916     ld_t0,sp,0
   4917     ld_t1,sp,8
   4918     la_br &err_bad_macro_header
   4919     beq_t0,t1
   4920     # op = expr_op_code(pos)
   4921     ld_a0,sp,0
   4922     la_br &expr_op_code
   4923     call
   4924     # if (op == EXPR_INVALID) fatal
   4925     li_t0 EXPR_INVALID
   4926     la_br &err_bad_macro_header
   4927     beq_a0,t0
   4928     # if (op == EXPR_STRLEN) handle inline — strlen's argument is a
   4929     # TOK_STRING atom, not a recursive expression. Yield text.len - 2.
   4930     li_t0 EXPR_STRLEN
   4931     la_br &eer_strlen
   4932     beq_a0,t0
   4933     # frame stack overflow check: if (expr_frame_top >= 16) fatal
   4934     # (the global expr_frames[] array has 16 slots, shared across recursive
   4935     # eval_expr_range calls)
   4936     la_a1 &expr_frame_top
   4937     ld_t0,a1,0
   4938     li_a2 M1PP_MAX_PARAMS
   4939     la_br &err_bad_macro_header
   4940     blt_a2,t0
   4941     la_br &err_bad_macro_header
   4942     beq_t0,a2
   4943     # frames[frame_top].op = op; frames[frame_top].argc = 0
   4944     li_a2 M1PP_EXPR_FRAME_SIZE
   4945     mul_t2,t0,a2
   4946     la_a3 &expr_frames_ptr
   4947     ld_a3,a3,0
   4948     add_a3,a3,t2
   4949     st_a0,a3,0
   4950     li_a2 M1PP_EXPR_ARGC_OFF
   4951     add_a2,a3,a2
   4952     li_t2 %0 %0
   4953     st_t2,a2,0
   4954     # frame_top++
   4955     addi_t0,t0,1
   4956     st_t0,a1,0
   4957     # pos++ (skip operator token)
   4958     ld_t0,sp,0
   4959     addi_t0,t0,24
   4960     st_t0,sp,0
   4961     la_br &eer_loop
   4962     b
   4963 
   4964 :eer_rparen
   4965     # if (frame_top <= entry_frame_top) fatal
   4966     la_a0 &expr_frame_top
   4967     ld_t0,a0,0
   4968     ld_t1,sp,48
   4969     la_br &err_bad_macro_header
   4970     beq_t0,t1
   4971     la_br &err_bad_macro_header
   4972     blt_t0,t1
   4973     # frame = &expr_frames[frame_top - 1]
   4974     addi_t0,t0,neg1
   4975     li_a1 M1PP_EXPR_FRAME_SIZE
   4976     mul_t0,t0,a1
   4977     la_a3 &expr_frames_ptr
   4978     ld_a3,a3,0
   4979     add_a3,a3,t0
   4980     # apply_expr_op(op, args, argc) -> a0
   4981     ld_a0,a3,0
   4982     li_a1 M1PP_EXPR_ARGS_OFF
   4983     add_a1,a3,a1
   4984     li_a2 M1PP_EXPR_ARGC_OFF
   4985     add_a2,a3,a2
   4986     ld_a2,a2,0
   4987     la_br &apply_expr_op
   4988     call
   4989     # value = result; frame_top--; pos++; have_value = 1
   4990     st_a0,sp,16
   4991     la_a1 &expr_frame_top
   4992     ld_t0,a1,0
   4993     addi_t0,t0,neg1
   4994     st_t0,a1,0
   4995     ld_t0,sp,0
   4996     addi_t0,t0,24
   4997     st_t0,sp,0
   4998     li_t0 %1 %0
   4999     st_t0,sp,32
   5000     la_br &eer_loop
   5001     b
   5002 
   5003 :eer_strlen
   5004     # (strlen "literal") — degenerate unary op whose argument is a
   5005     # TOK_STRING atom, not a recursive expression.
   5006     # pos++ past the "strlen" operator word.
   5007     ld_t0,sp,0
   5008     addi_t0,t0,24
   5009     st_t0,sp,0
   5010     # skip_expr_newlines(pos, end)
   5011     ld_a0,sp,0
   5012     ld_a1,sp,8
   5013     la_br &skip_expr_newlines
   5014     call
   5015     st_a0,sp,0
   5016     # if (pos >= end) fatal
   5017     ld_t0,sp,0
   5018     ld_t1,sp,8
   5019     la_br &err_bad_macro_header
   5020     beq_t0,t1
   5021     # if (pos->kind != TOK_STRING) fatal
   5022     ld_t2,t0,0
   5023     li_a3 TOK_STRING
   5024     la_br &err_bad_macro_header
   5025     bne_t2,a3
   5026     # if (pos->text.len < 2) fatal
   5027     ld_a1,t0,16
   5028     li_a2 %2 %0
   5029     la_br &err_bad_macro_header
   5030     blt_a1,a2
   5031     # if (pos->text.ptr[0] != '"') fatal — rejects single-quoted '..' hex
   5032     ld_a2,t0,8
   5033     lb_a3,a2,0
   5034     li_a0 %34 %0
   5035     la_br &err_bad_macro_header
   5036     bne_a3,a0
   5037     # value = pos->text.len - 2
   5038     addi_a1,a1,neg2
   5039     st_a1,sp,16
   5040     # pos++
   5041     addi_t0,t0,24
   5042     st_t0,sp,0
   5043     # skip_expr_newlines(pos, end)
   5044     ld_a0,sp,0
   5045     ld_a1,sp,8
   5046     la_br &skip_expr_newlines
   5047     call
   5048     st_a0,sp,0
   5049     # if (pos >= end) fatal
   5050     ld_t0,sp,0
   5051     ld_t1,sp,8
   5052     la_br &err_bad_macro_header
   5053     beq_t0,t1
   5054     # if (pos->kind != TOK_RPAREN) fatal
   5055     ld_t2,t0,0
   5056     li_a3 TOK_RPAREN
   5057     la_br &err_bad_macro_header
   5058     bne_t2,a3
   5059     # pos++
   5060     addi_t0,t0,24
   5061     st_t0,sp,0
   5062     # have_value = 1
   5063     li_t0 %1 %0
   5064     st_t0,sp,32
   5065     la_br &eer_loop
   5066     b
   5067 
   5068 :eer_loop_done
   5069     # frame_top must equal entry_frame_top
   5070     la_a0 &expr_frame_top
   5071     ld_t0,a0,0
   5072     ld_t1,sp,48
   5073     la_br &err_bad_macro_header
   5074     bne_t0,t1
   5075     # have_result must be 1
   5076     ld_t0,sp,40
   5077     la_br &err_bad_macro_header
   5078     beqz_t0
   5079     # pos must equal end
   5080     ld_t0,sp,0
   5081     ld_t1,sp,8
   5082     la_br &err_bad_macro_header
   5083     bne_t0,t1
   5084     # return result
   5085     ld_a0,sp,24
   5086     eret
   5087 
   5088 ## ============================================================================
   5089 ## --- Hex emit for !@%$ ------------------------------------------------------
   5090 ## ============================================================================
   5091 
   5092 ## emit_hex_value(a0=value_u64, a1=byte_count) -> void (fatal on overflow)
   5093 ## byte_count must be 1, 2, 4, or 8. Serialize value into (2 * byte_count)
   5094 ## uppercase hex chars, little-endian byte order (byte i at char indices
   5095 ## 2i, 2i+1) WRAPPED IN SINGLE QUOTES so the downstream M0 assembler
   5096 ## treats it as a hex-byte string literal rather than parsing it as a
   5097 ## decimal numeric token. Total emitted text length = 2 + 2 * byte_count;
   5098 ## emitted as a TOK_STRING via append_text + emit_token.
   5099 :emit_hex_value
   5100     enter_0
   5101 
   5102     # ehv_value = value; ehv_bytes = byte_count
   5103     la_a2 &ehv_value
   5104     st_a0,a2,0
   5105     la_a2 &ehv_bytes
   5106     st_a1,a2,0
   5107 
   5108     # scratch[0] = '\''
   5109     la_a1 &ehv_scratch
   5110     li_a2 %39 %0
   5111     sb_a2,a1,0
   5112 
   5113     # i = 0
   5114     li_t0 %0 %0
   5115 :emit_hex_value_loop
   5116     # if (i == bytes) done
   5117     la_a1 &ehv_bytes
   5118     ld_t1,a1,0
   5119     la_br &emit_hex_value_emit
   5120     beq_t0,t1
   5121 
   5122     # byte = ehv_value & 0xFF
   5123     la_a1 &ehv_value
   5124     ld_t2,a1,0
   5125     andi_a3,t2,255
   5126 
   5127     # high = (byte >> 4) & 0x0F  (byte is already in a3)
   5128     shri_a2,a3,4
   5129     andi_a2,a2,15
   5130 
   5131     # low = byte & 0x0F
   5132     andi_a3,a3,15
   5133 
   5134     # scratch[1 + 2*i] = hex_chars[high]
   5135     la_a1 &hex_chars
   5136     add_a1,a1,a2
   5137     lb_a2,a1,0
   5138     la_a1 &ehv_scratch
   5139     shli_a3,t0,1
   5140     add_a1,a1,a3
   5141     addi_a1,a1,1
   5142     sb_a2,a1,0
   5143 
   5144     # scratch[1 + 2*i+1] = hex_chars[low]   (reload low from byte & 0x0F)
   5145     la_a1 &ehv_value
   5146     ld_t2,a1,0
   5147     andi_a3,t2,255
   5148     andi_a3,a3,15
   5149     la_a1 &hex_chars
   5150     add_a1,a1,a3
   5151     lb_a2,a1,0
   5152     la_a1 &ehv_scratch
   5153     shli_a3,t0,1
   5154     add_a1,a1,a3
   5155     addi_a1,a1,2
   5156     sb_a2,a1,0
   5157 
   5158     # ehv_value >>= 8
   5159     la_a1 &ehv_value
   5160     ld_t2,a1,0
   5161     shri_t2,t2,8
   5162     st_t2,a1,0
   5163 
   5164     # i++
   5165     addi_t0,t0,1
   5166     la_br &emit_hex_value_loop
   5167     b
   5168 
   5169 :emit_hex_value_emit
   5170     # scratch[1 + 2*bytes] = '\''  (closing quote)
   5171     la_a0 &ehv_scratch
   5172     la_a1 &ehv_bytes
   5173     ld_a1,a1,0
   5174     shli_a1,a1,1
   5175     add_a0,a0,a1
   5176     addi_a0,a0,1
   5177     li_a2 %39 %0
   5178     sb_a2,a0,0
   5179 
   5180     # text_ptr = append_text(&ehv_scratch, 2 + 2 * ehv_bytes)
   5181     la_a0 &ehv_scratch
   5182     la_a1 &ehv_bytes
   5183     ld_a1,a1,0
   5184     shli_a1,a1,1
   5185     addi_a1,a1,2
   5186     la_br &append_text
   5187     call
   5188 
   5189     # ehv_token.kind = TOK_STRING; ehv_token.text_ptr = text_ptr;
   5190     # ehv_token.text_len = 2 + 2 * ehv_bytes
   5191     la_a2 &ehv_token
   5192     li_a3 TOK_STRING
   5193     st_a3,a2,0
   5194     st_a0,a2,8
   5195     la_a1 &ehv_bytes
   5196     ld_a1,a1,0
   5197     shli_a1,a1,1
   5198     addi_a1,a1,2
   5199     st_a1,a2,16
   5200 
   5201     # emit_token(&ehv_token)
   5202     la_a0 &ehv_token
   5203     la_br &emit_token
   5204     call
   5205 
   5206     eret
   5207 
   5208 ## ============================================================================
   5209 ## --- Builtin dispatcher ( ! @ % $ %select ) ---------------------------------
   5210 ## ============================================================================
   5211 
   5212 ## expand_builtin_call(a0=stream_ptr, a1=builtin_tok) -> void (fatal on bad)
   5213 ## Requires builtin_tok+1 is TOK_LPAREN. Runs parse_args(lparen, stream->end),
   5214 ## then dispatches on builtin_tok->text:
   5215 ##
   5216 ##   "!" "@" "%" "$"
   5217 ##     require arg_count == 1
   5218 ##     eval_expr_range(arg_starts[0], arg_ends[0]) -> value
   5219 ##     stream->pos = call_end_pos; stream->line_start = 0
   5220 ##     emit_hex_value(value, 1 / 2 / 4 / 8 respectively)
   5221 ##
   5222 ##   "%select"
   5223 ##     require arg_count == 3
   5224 ##     eval_expr_range(cond_arg) -> value
   5225 ##     chosen = (value != 0) ? arg1 : arg2
   5226 ##     stream->pos = call_end_pos; stream->line_start = 0
   5227 ##     if chosen is empty, return (no stream push)
   5228 ##     else copy_span_to_pool(chosen) and push_pool_stream_from_mark(mark)
   5229 ##     The unchosen branch is NOT evaluated, validated, or expanded.
   5230 ##
   5231 ## Any other text under a builtin slot -> fatal "bad builtin".
   5232 :expand_builtin_call
   5233     enter_0
   5234 
   5235     # ebc_stream = stream_ptr;  also stash builtin_tok via a register reload path
   5236     la_a2 &ebc_stream
   5237     st_a0,a2,0
   5238 
   5239     # lparen = builtin_tok + 24; if (lparen >= stream->end) fatal
   5240     addi_t0,a1,24
   5241     ld_t1,a0,8           # stream->end
   5242     la_br &err_bad_macro_header
   5243     beq_t0,t1
   5244     la_br &err_bad_macro_header
   5245     blt_t1,t0
   5246 
   5247     # if (lparen->kind != TOK_LPAREN) fatal
   5248     ld_a3,t0,0
   5249     li_a2 TOK_LPAREN
   5250     la_br &err_bad_macro_header
   5251     bne_a3,a2
   5252 
   5253     # parse_args(lparen, stream->end)
   5254     mov_a0,t0
   5255     la_a2 &ebc_stream
   5256     ld_a2,a2,0
   5257     ld_a1,a2,8           # stream->end
   5258     la_br &parse_args
   5259     call
   5260 
   5261     # snapshot call_end_pos -> ebc_call_end_pos
   5262     la_a0 &call_end_pos
   5263     ld_t0,a0,0
   5264     la_a1 &ebc_call_end_pos
   5265     st_t0,a1,0
   5266 
   5267     # dispatch on builtin_tok->text. a1 (builtin_tok) is gone after parse_args,
   5268     # but stream->pos still points at the builtin token (we don't advance it
   5269     # until the dispatched branch sets stream->pos = call_end_pos), so reload
   5270     # builtin_tok from stream->pos.
   5271     la_a0 &ebc_stream
   5272     ld_a0,a0,0
   5273     ld_t0,a0,16          # stream->pos -> builtin_tok
   5274 
   5275     # if tok_eq_const(tok, "!", 1) -> bytes=1
   5276     mov_a0,t0
   5277     la_a1 &const_bang
   5278     li_a2 %1 %0
   5279     la_br &tok_eq_const
   5280     call
   5281     la_br &ebc_arg_set_1
   5282     bnez_a0
   5283 
   5284     # if tok_eq_const(tok, "@", 1) -> bytes=2
   5285     la_a0 &ebc_stream
   5286     ld_a0,a0,0
   5287     ld_a0,a0,16
   5288     la_a1 &const_at
   5289     li_a2 %1 %0
   5290     la_br &tok_eq_const
   5291     call
   5292     la_br &ebc_arg_set_2
   5293     bnez_a0
   5294 
   5295     # if tok_eq_const(tok, "%", 1) -> bytes=4
   5296     la_a0 &ebc_stream
   5297     ld_a0,a0,0
   5298     ld_a0,a0,16
   5299     la_a1 &const_pct
   5300     li_a2 %1 %0
   5301     la_br &tok_eq_const
   5302     call
   5303     la_br &ebc_arg_set_4
   5304     bnez_a0
   5305 
   5306     # if tok_eq_const(tok, "$", 1) -> bytes=8
   5307     la_a0 &ebc_stream
   5308     ld_a0,a0,0
   5309     ld_a0,a0,16
   5310     la_a1 &const_dlr
   5311     li_a2 %1 %0
   5312     la_br &tok_eq_const
   5313     call
   5314     la_br &ebc_arg_set_8
   5315     bnez_a0
   5316 
   5317     # if tok_eq_const(tok, "%select", 7) -> select path
   5318     la_a0 &ebc_stream
   5319     ld_a0,a0,0
   5320     ld_a0,a0,16
   5321     la_a1 &const_select
   5322     li_a2 %7 %0
   5323     la_br &tok_eq_const
   5324     call
   5325     la_br &ebc_select
   5326     bnez_a0
   5327 
   5328     # if tok_eq_const(tok, "%str", 4) -> str path
   5329     la_a0 &ebc_stream
   5330     ld_a0,a0,0
   5331     ld_a0,a0,16
   5332     la_a1 &const_str
   5333     li_a2 %4 %0
   5334     la_br &tok_eq_const
   5335     call
   5336     la_br &ebc_str
   5337     bnez_a0
   5338 
   5339     # else: fatal
   5340     la_br &err_bad_macro_header
   5341     b
   5342 
   5343 :ebc_arg_set_1
   5344     li_a0 %1 %0
   5345     la_a1 &ebc_bytes
   5346     st_a0,a1,0
   5347     la_br &ebc_arg_path
   5348     b
   5349 :ebc_arg_set_2
   5350     li_a0 %2 %0
   5351     la_a1 &ebc_bytes
   5352     st_a0,a1,0
   5353     la_br &ebc_arg_path
   5354     b
   5355 :ebc_arg_set_4
   5356     li_a0 %4 %0
   5357     la_a1 &ebc_bytes
   5358     st_a0,a1,0
   5359     la_br &ebc_arg_path
   5360     b
   5361 :ebc_arg_set_8
   5362     li_a0 %8 %0
   5363     la_a1 &ebc_bytes
   5364     st_a0,a1,0
   5365     la_br &ebc_arg_path
   5366     b
   5367 
   5368 :ebc_arg_path
   5369     # require arg_count == 1
   5370     la_a0 &arg_count
   5371     ld_t0,a0,0
   5372     li_t1 %1 %0
   5373     la_br &err_bad_macro_header
   5374     bne_t0,t1
   5375 
   5376     # snapshot arg_starts[0], arg_ends[0]
   5377     la_a0 &arg_starts_ptr
   5378     ld_a0,a0,0
   5379     ld_t0,a0,0
   5380     la_a1 &ebc_arg0_start
   5381     st_t0,a1,0
   5382     la_a0 &arg_ends_ptr
   5383     ld_a0,a0,0
   5384     ld_t0,a0,0
   5385     la_a1 &ebc_arg0_end
   5386     st_t0,a1,0
   5387 
   5388     # value = eval_expr_range(arg0_start, arg0_end)
   5389     la_a0 &ebc_arg0_start
   5390     ld_a0,a0,0
   5391     la_a1 &ebc_arg0_end
   5392     ld_a1,a1,0
   5393     la_br &eval_expr_range
   5394     call
   5395 
   5396     # ebc_value = a0
   5397     la_a1 &ebc_value
   5398     st_a0,a1,0
   5399 
   5400     # stream->pos = ebc_call_end_pos; stream->line_start = 0
   5401     la_a0 &ebc_stream
   5402     ld_a0,a0,0
   5403     la_a1 &ebc_call_end_pos
   5404     ld_t0,a1,0
   5405     st_t0,a0,16
   5406     li_t1 %0 %0
   5407     st_t1,a0,24
   5408 
   5409     # emit_hex_value(ebc_value, ebc_bytes)
   5410     la_a0 &ebc_value
   5411     ld_a0,a0,0
   5412     la_a1 &ebc_bytes
   5413     ld_a1,a1,0
   5414     la_br &emit_hex_value
   5415     call
   5416 
   5417     eret
   5418 
   5419 :ebc_select
   5420     # require arg_count == 3
   5421     la_a0 &arg_count
   5422     ld_t0,a0,0
   5423     li_t1 %3 %0
   5424     la_br &err_bad_macro_header
   5425     bne_t0,t1
   5426 
   5427     # snapshot arg_starts[0..2] / arg_ends[0..2]
   5428     la_a0 &arg_starts_ptr
   5429     ld_a0,a0,0
   5430     ld_t0,a0,0
   5431     la_a1 &ebc_arg0_start
   5432     st_t0,a1,0
   5433     la_a0 &arg_starts_ptr
   5434     ld_a0,a0,0
   5435     ld_t0,a0,8
   5436     la_a1 &ebc_then_start
   5437     st_t0,a1,0
   5438     la_a0 &arg_starts_ptr
   5439     ld_a0,a0,0
   5440     ld_t0,a0,16
   5441     la_a1 &ebc_else_start
   5442     st_t0,a1,0
   5443 
   5444     la_a0 &arg_ends_ptr
   5445     ld_a0,a0,0
   5446     ld_t0,a0,0
   5447     la_a1 &ebc_arg0_end
   5448     st_t0,a1,0
   5449     la_a0 &arg_ends_ptr
   5450     ld_a0,a0,0
   5451     ld_t0,a0,8
   5452     la_a1 &ebc_then_end
   5453     st_t0,a1,0
   5454     la_a0 &arg_ends_ptr
   5455     ld_a0,a0,0
   5456     ld_t0,a0,16
   5457     la_a1 &ebc_else_end
   5458     st_t0,a1,0
   5459 
   5460     # value = eval_expr_range(arg0_start, arg0_end)
   5461     la_a0 &ebc_arg0_start
   5462     ld_a0,a0,0
   5463     la_a1 &ebc_arg0_end
   5464     ld_a1,a1,0
   5465     la_br &eval_expr_range
   5466     call
   5467 
   5468     # if (value != 0) chosen = then; else chosen = else
   5469     la_br &ebc_select_then
   5470     bnez_a0
   5471 
   5472     # chosen = else
   5473     la_a0 &ebc_else_start
   5474     ld_t0,a0,0
   5475     la_a1 &ebc_arg0_start
   5476     st_t0,a1,0
   5477     la_a0 &ebc_else_end
   5478     ld_t0,a0,0
   5479     la_a1 &ebc_arg0_end
   5480     st_t0,a1,0
   5481     la_br &ebc_select_after_pick
   5482     b
   5483 
   5484 :ebc_select_then
   5485     # chosen = then
   5486     la_a0 &ebc_then_start
   5487     ld_t0,a0,0
   5488     la_a1 &ebc_arg0_start
   5489     st_t0,a1,0
   5490     la_a0 &ebc_then_end
   5491     ld_t0,a0,0
   5492     la_a1 &ebc_arg0_end
   5493     st_t0,a1,0
   5494 
   5495 :ebc_select_after_pick
   5496     # stream->pos = ebc_call_end_pos; stream->line_start = 0
   5497     la_a0 &ebc_stream
   5498     ld_a0,a0,0
   5499     la_a1 &ebc_call_end_pos
   5500     ld_t0,a1,0
   5501     st_t0,a0,16
   5502     li_t1 %0 %0
   5503     st_t1,a0,24
   5504 
   5505     # if (chosen_start == chosen_end) return
   5506     la_a0 &ebc_arg0_start
   5507     ld_t0,a0,0
   5508     la_a1 &ebc_arg0_end
   5509     ld_t1,a1,0
   5510     la_br &ebc_select_done
   5511     beq_t0,t1
   5512 
   5513     # mark = pool_used
   5514     la_a0 &pool_used
   5515     ld_t0,a0,0
   5516     la_a1 &ebc_mark
   5517     st_t0,a1,0
   5518 
   5519     # copy_span_to_pool(chosen_start, chosen_end)
   5520     la_a0 &ebc_arg0_start
   5521     ld_a0,a0,0
   5522     la_a1 &ebc_arg0_end
   5523     ld_a1,a1,0
   5524     la_br &copy_span_to_pool
   5525     call
   5526 
   5527     # push_pool_stream_from_mark(mark)
   5528     la_a0 &ebc_mark
   5529     ld_a0,a0,0
   5530     la_br &push_pool_stream_from_mark
   5531     call
   5532 
   5533 :ebc_select_done
   5534     eret
   5535 
   5536 ## %str(IDENT): stringify a single WORD argument into a TOK_STRING literal.
   5537 ## Validation: arg_count == 1, arg span length == 1 token, and that token's
   5538 ## kind is TOK_WORD. Output: a freshly-allocated text span built as
   5539 ## `"` + arg.text + `"` (len = arg.text.len + 2) and a synthesized TOK_STRING
   5540 ## pointing at it. Stream pos advances to call_end_pos; line_start = 0.
   5541 :ebc_str
   5542     # require arg_count == 1
   5543     la_a0 &arg_count
   5544     ld_t0,a0,0
   5545     li_t1 %1 %0
   5546     la_br &err_bad_macro_header
   5547     bne_t0,t1
   5548 
   5549     # snapshot arg_starts[0] / arg_ends[0]
   5550     la_a0 &arg_starts_ptr
   5551     ld_a0,a0,0
   5552     ld_t0,a0,0
   5553     la_a1 &ebc_arg0_start
   5554     st_t0,a1,0
   5555     la_a0 &arg_ends_ptr
   5556     ld_a0,a0,0
   5557     ld_t0,a0,0
   5558     la_a1 &ebc_arg0_end
   5559     st_t0,a1,0
   5560 
   5561     # require arg0_end - arg0_start == 24 (exactly one token)
   5562     la_a0 &ebc_arg0_start
   5563     ld_t0,a0,0
   5564     la_a1 &ebc_arg0_end
   5565     ld_t1,a1,0
   5566     sub_t2,t1,t0
   5567     li_a2 %24 %0
   5568     la_br &err_bad_macro_header
   5569     bne_t2,a2
   5570 
   5571     # require arg_tok->kind == TOK_WORD
   5572     ld_a3,t0,0
   5573     li_a2 TOK_WORD
   5574     la_br &err_bad_macro_header
   5575     bne_a3,a2
   5576 
   5577     # orig_len = arg_tok->text.len; out_len = orig_len + 2
   5578     # fatal if out_len > 256 (scratch cap; text_buf cap checked by append_text)
   5579     ld_t1,t0,16
   5580     la_a0 &ebc_str_orig_len
   5581     st_t1,a0,0
   5582     addi_t2,t1,2
   5583     la_a0 &ebc_str_out_len
   5584     st_t2,a0,0
   5585     li_a1 %256 %0
   5586     la_br &err_text_overflow
   5587     blt_a1,t2
   5588 
   5589     # scratch[0] = '"'
   5590     la_t2 &ebc_str_scratch_ptr
   5591     ld_t2,t2,0
   5592     li_a3 %34 %0
   5593     sb_a3,t2,0
   5594 
   5595     # copy arg_tok->text bytes into scratch[1..1+orig_len)
   5596     #   src = arg_tok->text.ptr; i = 0
   5597     la_a0 &ebc_arg0_start
   5598     ld_a0,a0,0
   5599     ld_t0,a0,8
   5600     la_a1 &ebc_str_orig_len
   5601     ld_t1,a1,0
   5602     li_a0 %0 %0
   5603 :ebc_str_copy_loop
   5604     la_br &ebc_str_copy_done
   5605     beq_a0,t1
   5606     add_a1,t0,a0
   5607     lb_a1,a1,0
   5608     addi_a2,a0,1
   5609     add_a2,t2,a2
   5610     sb_a1,a2,0
   5611     addi_a0,a0,1
   5612     la_br &ebc_str_copy_loop
   5613     b
   5614 :ebc_str_copy_done
   5615 
   5616     # scratch[1 + orig_len] = '"'
   5617     la_t2 &ebc_str_scratch_ptr
   5618     ld_t2,t2,0
   5619     la_a1 &ebc_str_orig_len
   5620     ld_a1,a1,0
   5621     addi_a1,a1,1
   5622     add_a0,t2,a1
   5623     li_a3 %34 %0
   5624     sb_a3,a0,0
   5625 
   5626     # text_ptr = append_text(&scratch, out_len)
   5627     la_a0 &ebc_str_scratch_ptr
   5628     ld_a0,a0,0
   5629     la_a1 &ebc_str_out_len
   5630     ld_a1,a1,0
   5631     la_br &append_text
   5632     call
   5633 
   5634     # ebc_str_token = { TOK_STRING, text_ptr, out_len }
   5635     la_a2 &ebc_str_token
   5636     li_a3 TOK_STRING
   5637     st_a3,a2,0
   5638     st_a0,a2,8
   5639     la_a1 &ebc_str_out_len
   5640     ld_a1,a1,0
   5641     st_a1,a2,16
   5642 
   5643     # stream->pos = ebc_call_end_pos; stream->line_start = 0
   5644     la_a0 &ebc_stream
   5645     ld_a0,a0,0
   5646     la_a1 &ebc_call_end_pos
   5647     ld_t0,a1,0
   5648     st_t0,a0,16
   5649     li_t1 %0 %0
   5650     st_t1,a0,24
   5651 
   5652     # emit_token(&ebc_str_token)
   5653     la_a0 &ebc_str_token
   5654     la_br &emit_token
   5655     call
   5656 
   5657     eret
   5658 
   5659 ## --- Error paths -------------------------------------------------------------
   5660 ## Each err_* loads a (msg, len) pair for fatal; fatal writes "m1pp: <msg>\n"
   5661 ## to stderr and exits 1. Error labels are branched to from range/overflow
   5662 ## checks throughout the code.
   5663 
   5664 :err_usage
   5665     la_a0 &msg_usage
   5666     la_br &fatal
   5667     b
   5668 :err_open_input
   5669     la_a0 &msg_open_input
   5670     la_br &fatal
   5671     b
   5672 :err_read
   5673     la_a0 &msg_read
   5674     la_br &fatal
   5675     b
   5676 :err_input_too_big
   5677     la_a0 &msg_input_too_big
   5678     la_br &fatal
   5679     b
   5680 :err_open_output
   5681     la_a0 &msg_open_output
   5682     la_br &fatal
   5683     b
   5684 :err_write
   5685     la_a0 &msg_write
   5686     la_br &fatal
   5687     b
   5688 :err_text_overflow
   5689     la_a0 &msg_text_overflow
   5690     la_br &fatal
   5691     b
   5692 :err_token_overflow
   5693     la_a0 &msg_token_overflow
   5694     la_br &fatal
   5695     b
   5696 :err_output_overflow
   5697     la_a0 &msg_output_overflow
   5698     la_br &fatal
   5699     b
   5700 :err_unterminated_macro
   5701     la_a0 &msg_unterminated_macro
   5702     la_br &fatal
   5703     b
   5704 :err_bad_macro_header
   5705     la_a0 &msg_bad_macro_header
   5706     la_br &fatal
   5707     b
   5708 :err_too_many_macros
   5709     la_a0 &msg_too_many_macros
   5710     la_br &fatal
   5711     b
   5712 :err_macro_body_overflow
   5713     la_a0 &msg_macro_body_overflow
   5714     la_br &fatal
   5715     b
   5716 :err_not_implemented
   5717     la_a0 &msg_not_implemented
   5718     la_br &fatal
   5719     b
   5720 :err_unbalanced_braces
   5721     la_a0 &msg_unbalanced_braces
   5722     la_br &fatal
   5723     b
   5724 :err_bad_directive
   5725     la_a0 &msg_bad_directive
   5726     la_br &fatal
   5727     b
   5728 :err_unterminated_directive
   5729     la_a0 &msg_unterminated_directive
   5730     la_br &fatal
   5731     b
   5732 :err_bad_scope_header
   5733     la_a0 &msg_bad_scope_header
   5734     la_br &fatal
   5735     b
   5736 :err_scope_depth_overflow
   5737     la_a0 &msg_scope_depth_overflow
   5738     la_br &fatal
   5739     b
   5740 :err_scope_underflow
   5741     la_a0 &msg_scope_underflow
   5742     la_br &fatal
   5743     b
   5744 :err_scope_not_closed
   5745     la_a0 &msg_scope_not_closed
   5746     la_br &fatal
   5747     b
   5748 :err_bad_scope_label
   5749     la_a0 &msg_bad_scope_label
   5750     la_br &fatal
   5751     b
   5752 
   5753 ## fatal(a0=msg_ptr): writes "m1pp: <msg>\n" to stderr and exits 1.
   5754 ## Length is computed inline via a strlen loop (messages are NUL-terminated).
   5755 ## Reached by unconditional branch from any err_* stub, so no frame is required.
   5756 :fatal
   5757     # Stash msg_ptr; compute len inline into err_saved_len.
   5758     la_a1 &err_saved_msg
   5759     st_a0,a1,0
   5760     li_t0 %0 %0
   5761 :fatal_strlen
   5762     add_t1,a0,t0
   5763     lb_t1,t1,0
   5764     la_br &fatal_strlen_done
   5765     beqz_t1
   5766     addi_t0,t0,1
   5767     la_br &fatal_strlen
   5768     b
   5769 :fatal_strlen_done
   5770     la_a1 &err_saved_len
   5771     st_t0,a1,0
   5772 
   5773     # write(2, "m1pp:", 5)
   5774     li_a0 sys_write
   5775     li_a1 %2 %0
   5776     la_a2 &msg_prefix
   5777     li_a3 %5 %0
   5778     syscall
   5779 
   5780     # write(2, msg, len)
   5781     la_a0 &err_saved_msg
   5782     ld_a2,a0,0
   5783     la_a0 &err_saved_len
   5784     ld_a3,a0,0
   5785     li_a0 sys_write
   5786     li_a1 %2 %0
   5787     syscall
   5788 
   5789     # write(2, "\n", 1)
   5790     li_a0 sys_write
   5791     li_a1 %2 %0
   5792     la_a2 &msg_newline
   5793     li_a3 %1 %0
   5794     syscall
   5795 
   5796     # exit(1)
   5797     li_a0 sys_exit
   5798     li_a1 %1 %0
   5799     syscall
   5800 
   5801 ## --- Rodata: const tokens (for tok_eq_const) and fatal messages --------------
   5802 
   5803 :const_macro "%macro"
   5804 :const_endm "%endm"
   5805 :const_paste "##"
   5806 :const_lparen "("
   5807 :const_rparen ")"
   5808 :const_comma ","
   5809 :const_lbrace "{"
   5810 :const_rbrace "}"
   5811 :const_bang "!"
   5812 :const_at "@"
   5813 :const_pct "%"
   5814 :const_dlr "$"
   5815 :const_select "%select"
   5816 :const_str "%str"
   5817 :const_struct "%struct"
   5818 :const_enum "%enum"
   5819 :const_size "SIZE"
   5820 :const_count "COUNT"
   5821 :const_scope "%scope"
   5822 :const_endscope "%endscope"
   5823 
   5824 ## Operator strings for expr_op_code. Each is a raw byte literal; lengths
   5825 ## are passed separately to tok_eq_const. "<=" must be tested before "<"
   5826 ## so the longer match wins; same for ">=" before ">".
   5827 :op_plus "+"
   5828 :op_minus "-"
   5829 :op_star "*"
   5830 :op_slash "/"
   5831 :op_percent "%"
   5832 :op_shl "<<"
   5833 :op_shr ">>"
   5834 :op_amp "&"
   5835 :op_bar "|"
   5836 :op_caret "^"
   5837 :op_tilde "~"
   5838 :op_eq "="
   5839 :op_ne "!="
   5840 :op_lt "<"
   5841 :op_le "<="
   5842 :op_gt ">"
   5843 :op_ge ">="
   5844 :op_strlen "strlen"
   5845 
   5846 ## Nibble-to-hex lookup table for emit_hex_value.
   5847 :hex_chars "0123456789ABCDEF"
   5848 
   5849 ## 256-byte char-class table for lex_loop / lex_word_scan. Indexed by the
   5850 ## source byte `c`; value is the class code dispatched by lex_loop:
   5851 ##   0  WORD (default; word_scan continues through this byte)
   5852 ##   1  SKIP (non-newline whitespace: 0x09 tab, 0x0B-0x0D vt/ff/cr, 0x20 sp)
   5853 ##   2  NEWLINE (0x0A)
   5854 ##   3  STRING (0x22 ", 0x27 ')
   5855 ##   4  HASH (0x23 #)
   5856 ##   5  COMMENT (0x3B ;)
   5857 ##   6  LPAREN (0x28 ()
   5858 ##   7  RPAREN (0x29 ))
   5859 ##   8  COMMA  (0x2C ,)
   5860 ##   9  LBRACE (0x7B {)
   5861 ##  10  RBRACE (0x7D })
   5862 ##  11  NUL (0x00 — lex_loop fall-through to lex_done)
   5863 :lex_char_class
   5864 ## bytes 0x00-0x1F: NUL=11, \t=1, \n=2, \v/\f/\r=1, rest=0
   5865 '0B000000000000000001020101010000'
   5866 '00000000000000000000000000000000'
   5867 ## bytes 0x20-0x3F: sp=1, "=3, #=4, '=3, (=6, )=7, ,=8, ;=5
   5868 '01000304000000030607000008000000'
   5869 '00000000000000000000000500000000'
   5870 ## bytes 0x40-0x7F: {=9 (0x7B), }=10 (0x7D)
   5871 '00000000000000000000000000000000'
   5872 '00000000000000000000000000000000'
   5873 '00000000000000000000000000000000'
   5874 '000000000000000000000009000A0000'
   5875 ## bytes 0x80-0xFF: all 0 (word)
   5876 '00000000000000000000000000000000'
   5877 '00000000000000000000000000000000'
   5878 '00000000000000000000000000000000'
   5879 '00000000000000000000000000000000'
   5880 '00000000000000000000000000000000'
   5881 '00000000000000000000000000000000'
   5882 '00000000000000000000000000000000'
   5883 '00000000000000000000000000000000'
   5884 
   5885 ## BSS pointer-slot init table (for p1_main's bss_init_loop).
   5886 ## Each entry: 8-byte slot ptr (&label + 4 pad) + 8-byte OFF_* constant.
   5887 ## Walked linearly; order is irrelevant.
   5888 :bss_init_tbl
   5889 &paste_scratch_ptr ZERO4 OFF_paste_scratch
   5890 &local_label_scratch_ptr ZERO4 OFF_local_label_scratch
   5891 &scope_stack_ptr ZERO4 OFF_scope_stack
   5892 &df_name_scratch_ptr ZERO4 OFF_df_name_scratch
   5893 &ebc_str_scratch_ptr ZERO4 OFF_ebc_str_scratch
   5894 &arg_starts_ptr ZERO4 OFF_arg_starts
   5895 &arg_ends_ptr ZERO4 OFF_arg_ends
   5896 &input_buf_ptr ZERO4 OFF_input_buf
   5897 &output_buf_ptr ZERO4 OFF_output_buf
   5898 &text_buf_ptr ZERO4 OFF_text_buf
   5899 &source_tokens_ptr ZERO4 OFF_source_tokens
   5900 &macros_ptr ZERO4 OFF_macros
   5901 &macro_body_tokens_ptr ZERO4 OFF_macro_body_tokens
   5902 &streams_ptr ZERO4 OFF_streams
   5903 &expand_pool_ptr ZERO4 OFF_expand_pool
   5904 &expr_frames_ptr ZERO4 OFF_expr_frames
   5905 :bss_init_tbl_end
   5906 
   5907 :msg_prefix "m1pp: "
   5908 :msg_newline "
   5909 "
   5910 ## All err_* messages below are NUL-terminated (trailing '00'); fatal uses an
   5911 ## inline strlen loop rather than a caller-supplied length.
   5912 :msg_usage "usage: m1pp input.M1 output.M1" '00'
   5913 :msg_open_input "failed to open input file" '00'
   5914 :msg_read "failed to read input" '00'
   5915 :msg_input_too_big "input file too large" '00'
   5916 :msg_open_output "failed to open output file" '00'
   5917 :msg_write "failed to write output" '00'
   5918 :msg_text_overflow "text buffer overflow" '00'
   5919 :msg_token_overflow "token buffer overflow" '00'
   5920 :msg_output_overflow "output buffer overflow" '00'
   5921 :msg_unterminated_macro "unterminated %macro definition" '00'
   5922 :msg_bad_macro_header "bad macro header" '00'
   5923 :msg_too_many_macros "too many macros" '00'
   5924 :msg_macro_body_overflow "macro body overflow" '00'
   5925 :msg_not_implemented "not implemented" '00'
   5926 :msg_unbalanced_braces "unbalanced braces" '00'
   5927 :msg_bad_directive "bad %struct/%enum directive" '00'
   5928 :msg_unterminated_directive "unterminated %struct/%enum directive" '00'
   5929 :msg_bad_scope_header "bad scope header" '00'
   5930 :msg_scope_depth_overflow "scope depth overflow" '00'
   5931 :msg_scope_underflow "scope underflow" '00'
   5932 :msg_scope_not_closed "scope not closed" '00'
   5933 :msg_bad_scope_label "bad scope label" '00'
   5934 
   5935 ## --- BSS ---------------------------------------------------------------------
   5936 ## Placed before :ELF_end so filesz/memsz (which this ELF header sets equal)
   5937 ## covers the whole zero-initialized region. Bloats the file by the BSS size,
   5938 ## but avoids a custom ELF header.
   5939 ##
   5940 ## Layout: scalars (pointers, counters, lexer/processor state), then the
   5941 ## four arenas — input_buf, output_buf, text_buf, source_tokens — whose
   5942 ## sizes match the CAP constants above.
   5943 
   5944 ## Scalars (each 8 bytes).
   5945 :input_fd
   5946 ZERO8
   5947 :input_len
   5948 ZERO8
   5949 :output_fd
   5950 ZERO8
   5951 :output_used
   5952 ZERO8
   5953 :output_written
   5954 ZERO8
   5955 :output_need_space
   5956 ZERO8
   5957 :input_path
   5958 ZERO8
   5959 :output_path
   5960 ZERO8
   5961 :text_used
   5962 ZERO8
   5963 :source_end
   5964 ZERO8
   5965 :lex_ptr
   5966 ZERO8
   5967 :lex_start
   5968 ZERO8
   5969 :lex_quote
   5970 ZERO8
   5971 :lex_punct_kind
   5972 ZERO8
   5973 :proc_pos
   5974 ZERO8
   5975 :proc_line_start
   5976 ZERO8
   5977 :macros_end
   5978 ZERO8
   5979 :macro_body_end
   5980 ZERO8
   5981 :def_m_ptr
   5982 ZERO8
   5983 :def_param_ptr
   5984 ZERO8
   5985 :def_body_line_start
   5986 ZERO8
   5987 :err_saved_msg
   5988 ZERO8
   5989 :err_saved_len
   5990 ZERO8
   5991 
   5992 ## Stream / pool / arg / expression scalars. Each is one u64 (ZERO8).
   5993 ## pool_used      — byte offset into expand_pool (i.e. next write slot).
   5994 ## stream_top     — stream stack depth in bytes (count × 40; 0 == empty).
   5995 ## arg_count      — number of args produced by the most recent parse_args.
   5996 ## call_end_pos   — Token* one past the ')' of that call.
   5997 ## expr_frame_top — ExprFrame stack depth inside eval_expr_range.
   5998 ## emt_after_pos, emt_mark — expand_macro_tokens output slots (Token* and
   5999 ##                           byte offset into expand_pool).
   6000 ## eval_after_pos, eval_value — eval_expr_atom output slots (Token* and i64).
   6001 ##                              Callers MUST snapshot these before any nested
   6002 ##                              eval_* call that could overwrite them.
   6003 :pool_used
   6004 ZERO8
   6005 :stream_top
   6006 ZERO8
   6007 :arg_count
   6008 ZERO8
   6009 :call_end_pos
   6010 ZERO8
   6011 :expr_frame_top
   6012 ZERO8
   6013 :emt_after_pos
   6014 ZERO8
   6015 :emt_mark
   6016 ZERO8
   6017 :eval_after_pos
   6018 ZERO8
   6019 :eval_value
   6020 ZERO8
   6021 
   6022 ## Paste-pass spill slots. Both append_pasted_token and paste_pool_range
   6023 ## call other functions, so all locals must round-trip through BSS
   6024 ## across the call.
   6025 ##   paste_dst_save  — dst Token* spilled across append_text
   6026 ##   paste_left_ptr/_len, paste_right_ptr/_len — operand spans for the
   6027 ##                       byte-copy loops in append_pasted_token
   6028 ##   paste_total_len — left.len + right.len, reused after append_text
   6029 ##   paste_start     — expand_pool + mark; needed to detect "## is first"
   6030 ##                     after registers are clobbered by append_pasted_token
   6031 ##   paste_in        — current read cursor (Token*)
   6032 ##   paste_out       — current write cursor (Token*)
   6033 ##   paste_end       — exclusive end (Token*), = expand_pool + pool_used
   6034 :paste_dst_save
   6035 ZERO8
   6036 :paste_left_ptr
   6037 ZERO8
   6038 :paste_left_len
   6039 ZERO8
   6040 :paste_right_ptr
   6041 ZERO8
   6042 :paste_right_len
   6043 ZERO8
   6044 :paste_total_len
   6045 ZERO8
   6046 :paste_start
   6047 ZERO8
   6048 :paste_in
   6049 ZERO8
   6050 :paste_out
   6051 ZERO8
   6052 :paste_end
   6053 ZERO8
   6054 
   6055 ## paste_scratch — 256-byte working buffer for append_pasted_token.
   6056 ## We assemble left.text ++ right.text here, then call
   6057 ## append_text(&paste_scratch, total_len) to copy into the durable
   6058 ## text_buf arena. 256 bytes is M0's quoted-literal cap.
   6059 
   6060 ## parse_args + expand_macro_tokens + find_param spill slots (P1 has
   6061 ## no callee-save spill on enter, and find_param's inner byte compare
   6062 ## needs every caller-saved register; parse_args + expand_macro_tokens
   6063 ## carry state across iterations and nested calls). One u64 each (ZERO8).
   6064 :pa_pos
   6065 ZERO8
   6066 :pa_arg_start
   6067 ZERO8
   6068 :pa_depth
   6069 ZERO8
   6070 :pa_arg_index
   6071 ZERO8
   6072 :pa_limit
   6073 ZERO8
   6074 :pa_brace_depth
   6075 ZERO8
   6076 :emt_call_tok
   6077 ZERO8
   6078 :emt_limit
   6079 ZERO8
   6080 :emt_macro
   6081 ZERO8
   6082 :emt_body_pos
   6083 ZERO8
   6084 :emt_body_end
   6085 ZERO8
   6086 :emt_body_start
   6087 ZERO8
   6088 
   6089 ## Local-label rewrite. next_expansion_id is the monotonic counter
   6090 ## (never reset); emt_expansion_id snapshots it at the start of each
   6091 ## expand_macro_tokens call so nested-call BSS reuse is safe.
   6092 ## ll_* slots hold body-token span + derived sizes while building the
   6093 ## renamed text in local_label_scratch.
   6094 :next_expansion_id
   6095 ZERO8
   6096 :emt_expansion_id
   6097 ZERO8
   6098 :ll_src_ptr
   6099 ZERO8
   6100 :ll_src_len
   6101 ZERO8
   6102 :ll_tail_len
   6103 ZERO8
   6104 :ll_digit_count
   6105 ZERO8
   6106 :ll_digit_cursor
   6107 ZERO8
   6108 :ll_total_len
   6109 ZERO8
   6110 
   6111 ## local_label_digits: 24-byte reverse-fill scratch for the decimal
   6112 ## rendering of emt_expansion_id (fits any u64 value).
   6113 :local_label_digits
   6114 ZERO8 ZERO8 ZERO8
   6115 
   6116 ## local_label_scratch: 128-byte working buffer for the renamed text
   6117 ## (sigil + tail + "__" + digits) before it's copied into text_buf via
   6118 ## append_text. Caps the combined tail + digit length at ~125 bytes,
   6119 ## which is ample for any realistic local-label name.
   6120 
   6121 ## --- Scope-stack rewrite -----------------------------------------------------
   6122 ## scope_depth: current depth (0..32).
   6123 ## scope_stack: 32 × TextSpan (16 bytes each) = 512 bytes. Each slot is
   6124 ##   (text_ptr, text_len) pointing into stable text memory (input_buf or
   6125 ##   text_buf — both append-only), so names are borrowed without copying.
   6126 ## sr_* slots hold emit_scope_rewrite's inputs across the byte-copy loops.
   6127 :scope_depth
   6128 ZERO8
   6129 :sr_tok_ptr
   6130 ZERO8
   6131 :sr_skip
   6132 ZERO8
   6133 :sr_sigil
   6134 ZERO8
   6135 :sr_name_len
   6136 ZERO8
   6137 
   6138 ## %struct / %enum scratch. define_fielded calls append_text twice
   6139 ## per synthesized macro, so every piece of state that must survive a call
   6140 ## lives here rather than in a register.
   6141 ##   df_stride               — 8 for %struct, 1 for %enum
   6142 ##   df_total_name_ptr/_len  — "SIZE" (4) for struct, "COUNT" (5) for enum
   6143 ##   df_base_ptr/_len        — directive's NAME token span
   6144 ##   df_index                — running field index, 0..N
   6145 ##   df_suffix_ptr/_len      — current synthesized field suffix
   6146 ##   df_value                — index * stride for this macro's body
   6147 ##   df_name_len             — base_len + 1 + suffix_len
   6148 ##   df_digit_count/_cursor  — df_render_decimal output
   6149 :df_stride
   6150 ZERO8
   6151 :df_total_name_ptr
   6152 ZERO8
   6153 :df_total_name_len
   6154 ZERO8
   6155 :df_base_ptr
   6156 ZERO8
   6157 :df_base_len
   6158 ZERO8
   6159 :df_index
   6160 ZERO8
   6161 :df_suffix_ptr
   6162 ZERO8
   6163 :df_suffix_len
   6164 ZERO8
   6165 :df_value
   6166 ZERO8
   6167 :df_name_len
   6168 ZERO8
   6169 :df_digit_count
   6170 ZERO8
   6171 :df_digit_cursor
   6172 ZERO8
   6173 
   6174 ## df_name_scratch: 256-byte working buffer for "BASE.SUFFIX" before
   6175 ## append_text copies it to text_buf. 256 B matches paste_scratch /
   6176 ## ebc_str_scratch; df_emit_field asserts nothing explicit, but realistic
   6177 ## struct/enum names stay well under 128 chars.
   6178 
   6179 ## df_digit_scratch: 24-byte reverse-fill buffer for the decimal rendering
   6180 ## of df_value (any u64 fits).
   6181 :df_digit_scratch
   6182 ZERO8 ZERO8 ZERO8
   6183 
   6184 :fp_macro
   6185 ZERO8
   6186 :fp_tok
   6187 ZERO8
   6188 :fp_pcount
   6189 ZERO8
   6190 :fp_idx
   6191 ZERO8
   6192 
   6193 ## Expression-evaluator scratch slots. expr_op_code spills its tok
   6194 ## argument to eoc_tok across tok_eq_const calls. apply_expr_op spills
   6195 ## op/args/argc and uses acc/i as the accumulator and loop induction var
   6196 ## inside the variadic folds.
   6197 :eoc_tok
   6198 ZERO8
   6199 :aeo_op
   6200 ZERO8
   6201 :aeo_args
   6202 ZERO8
   6203 :aeo_argc
   6204 ZERO8
   6205 :aeo_acc
   6206 ZERO8
   6207 :aeo_i
   6208 ZERO8
   6209 
   6210 ## Builtin scratch.
   6211 ## emit_hex_value: ehv_value/bytes hold the args; ehv_scratch is a 24-byte
   6212 ## buffer (max 18 chars used: 2 quotes + 16 hex chars; rounded up to keep
   6213 ## the next slot 8-byte aligned); ehv_token is a synthesized 24-byte
   6214 ## Token { kind, text_ptr, text_len }.
   6215 :ehv_value
   6216 ZERO8
   6217 :ehv_bytes
   6218 ZERO8
   6219 :ehv_scratch
   6220 ZERO8 ZERO8 ZERO8
   6221 :ehv_token
   6222 ZERO8 ZERO8 ZERO8
   6223 
   6224 ## expand_builtin_call: snapshots the stream pointer, the post-call resume
   6225 ## position, the byte count for !@%$, the eval_expr_range result, the chosen
   6226 ## arg span (start/end), the unchosen-side spans for %select, and the
   6227 ## pool mark used to push the chosen-stream slice.
   6228 :ebc_stream
   6229 ZERO8
   6230 :ebc_call_end_pos
   6231 ZERO8
   6232 :ebc_bytes
   6233 ZERO8
   6234 :ebc_value
   6235 ZERO8
   6236 :ebc_arg0_start
   6237 ZERO8
   6238 :ebc_arg0_end
   6239 ZERO8
   6240 :ebc_then_start
   6241 ZERO8
   6242 :ebc_then_end
   6243 ZERO8
   6244 :ebc_else_start
   6245 ZERO8
   6246 :ebc_else_end
   6247 ZERO8
   6248 :ebc_mark
   6249 ZERO8
   6250 
   6251 ## %str builtin scratch. ebc_str_orig_len / ebc_str_out_len spill the
   6252 ## argument text length and its +2 output length across append_text;
   6253 ## ebc_str_token is the synthesized TOK_STRING { kind, text_ptr, text_len }
   6254 ## handed to emit_token; ebc_str_scratch is a 256-byte assembly buffer
   6255 ## (matches paste_scratch / M0's quoted-literal cap).
   6256 :ebc_str_orig_len
   6257 ZERO8
   6258 :ebc_str_out_len
   6259 ZERO8
   6260 :ebc_str_token
   6261 ZERO8 ZERO8 ZERO8
   6262 
   6263 ## arg_starts[16] / arg_ends[16]: 16 × 8 = 128 bytes each, i.e. 4 ZERO32.
   6264 ## Written by parse_args; read by expand_macro_tokens and expand_builtin_call.
   6265 
   6266 ## input_buf: 8 KB (M1PP_INPUT_CAP)
   6267 
   6268 ## output_buf: 8 KB (M1PP_OUTPUT_CAP)
   6269 
   6270 ## text_buf: 4 KB (M1PP_TEXT_CAP)
   6271 
   6272 ## source_tokens (M1PP_TOKENS_END)
   6273 
   6274 ## macros: 32 records × 296 bytes = 9472 bytes (M1PP_MACROS_CAP).
   6275 ## 37 lines × 256 bytes = 9472. Each line is 8 × ZERO32 = 256 bytes.
   6276 
   6277 ## macro_body_tokens: 256 slots × 24 bytes = 6 KB (M1PP_MACRO_BODY_CAP).
   6278 ## 24 lines × 256 bytes = 6144. Source tokens are copied in 24 bytes at a
   6279 ## time as macro bodies are recorded.
   6280 
   6281 ## streams: 16 Stream records × 40 bytes = 640 bytes (M1PP_STREAM_STACK_CAP).
   6282 ## 20 ZERO32 = 2 lines of 8 + 1 line of 4.
   6283 
   6284 ## expand_pool: 256 Token slots × 24 bytes = 6144 bytes (M1PP_EXPAND_CAP).
   6285 ## 24 lines × 8 ZERO32 = 192 ZERO32.
   6286 
   6287 ## expr_frames: 16 × 144 bytes = 2304 bytes (M1PP_EXPR_FRAMES_CAP).
   6288 ## 9 lines × 8 ZERO32 = 72 ZERO32.
   6289 
   6290 ## --- BSS pointer slots (set by p1_main; one per BSS buffer) -----------------
   6291 :paste_scratch_ptr
   6292 ZERO8
   6293 :local_label_scratch_ptr
   6294 ZERO8
   6295 :scope_stack_ptr
   6296 ZERO8
   6297 :df_name_scratch_ptr
   6298 ZERO8
   6299 :ebc_str_scratch_ptr
   6300 ZERO8
   6301 :arg_starts_ptr
   6302 ZERO8
   6303 :arg_ends_ptr
   6304 ZERO8
   6305 :input_buf_ptr
   6306 ZERO8
   6307 :output_buf_ptr
   6308 ZERO8
   6309 :text_buf_ptr
   6310 ZERO8
   6311 :source_tokens_ptr
   6312 ZERO8
   6313 :macros_ptr
   6314 ZERO8
   6315 :macro_body_tokens_ptr
   6316 ZERO8
   6317 :streams_ptr
   6318 ZERO8
   6319 :expand_pool_ptr
   6320 ZERO8
   6321 :expr_frames_ptr
   6322 ZERO8
   6323 
   6324 :ELF_end