commit 955cc50921de523481bdfc77a37f49725a4cb554
parent 87d0cf252b91a60d2736c898ed9e5a08fa24c09a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 18:22:32 -0700
m1pp.M1: port %struct and %enum directives
Adds :define_fielded + :df_emit_field + :df_render_decimal to the P1
expander, plus line-start dispatch branches (:proc_check_struct,
:proc_check_enum) that shim into define_fielded with the stride /
totalizer-name parameter pair. State lives in df_* BSS slots +
df_name_scratch (256 B) + df_digit_scratch (24 B) so append_text calls
don't clobber it.
Test suite: all 19 m1pp fixtures pass byte-identical against the
C oracle.
Diffstat:
| M | m1pp/m1pp.M1 | | | 528 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
1 file changed, 527 insertions(+), 1 deletion(-)
diff --git a/m1pp/m1pp.M1 b/m1pp/m1pp.M1
@@ -879,7 +879,7 @@ DEFINE EXPR_INVALID 1200000000000000
li_a2 %6 %0
la_br &tok_eq_const
call
- la_br &proc_check_newline
+ la_br &proc_check_struct
beqz_a0
# %macro: shim into define_macro through the proc_pos globals.
@@ -905,6 +905,72 @@ DEFINE EXPR_INVALID 1200000000000000
la_br &proc_loop
b
+## ---- line_start && tok eq "%struct" ----
+## The %macro guard above already proved line_start && kind == TOK_WORD; if
+## we reach here via a %macro non-match, those gates still hold.
+:proc_check_struct
+ ld_t0,sp,24
+ mov_a0,t0
+ la_a1 &const_struct
+ li_a2 %7 %0
+ la_br &tok_eq_const
+ call
+ la_br &proc_check_enum
+ beqz_a0
+
+ # %struct matched: shim into define_fielded(stride=8, total="SIZE", len=4)
+ ld_t0,sp,24
+ la_a0 &proc_pos
+ st_t0,a0,0
+ la_a0 &proc_line_start
+ li_a1 %1 %0
+ st_a1,a0,0
+ li_a0 %8 %0
+ la_a1 &const_size
+ li_a2 %4 %0
+ la_br &define_fielded
+ call
+ ld_a0,sp,16
+ la_a1 &proc_pos
+ ld_t0,a1,0
+ st_t0,a0,16
+ li_t1 %1 %0
+ st_t1,a0,24
+ la_br &proc_loop
+ b
+
+## ---- line_start && tok eq "%enum" ----
+:proc_check_enum
+ ld_t0,sp,24
+ mov_a0,t0
+ la_a1 &const_enum
+ li_a2 %5 %0
+ la_br &tok_eq_const
+ call
+ la_br &proc_check_newline
+ beqz_a0
+
+ # %enum matched: shim into define_fielded(stride=1, total="COUNT", len=5)
+ ld_t0,sp,24
+ la_a0 &proc_pos
+ st_t0,a0,0
+ la_a0 &proc_line_start
+ li_a1 %1 %0
+ st_a1,a0,0
+ li_a0 %1 %0
+ la_a1 &const_count
+ li_a2 %5 %0
+ la_br &define_fielded
+ call
+ ld_a0,sp,16
+ la_a1 &proc_pos
+ ld_t0,a1,0
+ st_t0,a0,16
+ li_t1 %1 %0
+ st_t1,a0,24
+ la_br &proc_loop
+ b
+
:proc_check_newline
# reload s, tok
ld_a0,sp,16
@@ -1384,6 +1450,402 @@ DEFINE EXPR_INVALID 1200000000000000
leave
ret
+## --- %struct / %enum directive ----------------------------------------------
+## define_fielded(a0=stride, a1=total_name_ptr, a2=total_name_len).
+## Parses `%struct NAME { f1 f2 ... }` or `%enum NAME { ... }` (caller has
+## already detected %struct / %enum at line start and primed proc_pos to
+## that token). Synthesizes N+1 zero-parameter macros — NAME.field_k -> k*stride
+## and NAME.<total_name> -> N*stride — by appending each {name, body-token}
+## pair into macros[] / macro_body_tokens[].
+##
+## All working state lives in BSS (df_* slots + df_name_scratch / df_digit_scratch)
+## because df_emit_field calls append_text, which clobbers caller-saved regs.
+:define_fielded
+ enter_0
+
+ # Save directive args to BSS.
+ la_a3 &df_stride
+ st_a0,a3,0
+ la_a3 &df_total_name_ptr
+ st_a1,a3,0
+ la_a3 &df_total_name_len
+ st_a2,a3,0
+
+ # advance past the %struct / %enum directive token
+ la_a0 &proc_pos
+ ld_t0,a0,0
+ addi_t0,t0,24
+ st_t0,a0,0
+
+ # ---- header: name (WORD) ----
+ la_a1 &source_end
+ ld_t1,a1,0
+ la_br &err_bad_directive
+ beq_t0,t1
+ ld_a1,t0,0
+ li_a2 TOK_WORD
+ la_br &err_bad_directive
+ bne_a1,a2
+
+ # df_base_ptr = tok.text_ptr; df_base_len = tok.text_len
+ ld_a2,t0,8
+ la_a3 &df_base_ptr
+ st_a2,a3,0
+ ld_a2,t0,16
+ la_a3 &df_base_len
+ st_a2,a3,0
+
+ # advance past the base name
+ addi_t0,t0,24
+ la_a0 &proc_pos
+ st_t0,a0,0
+
+## skip NEWLINE tokens before '{' (tolerates `%struct NAME\n{ ... }`)
+:df_skip_nl_before_lbrace
+ la_a0 &proc_pos
+ ld_t0,a0,0
+ la_a1 &source_end
+ ld_t1,a1,0
+ la_br &err_bad_directive
+ beq_t0,t1
+ ld_a1,t0,0
+ li_a2 TOK_NEWLINE
+ la_br &df_require_lbrace
+ bne_a1,a2
+ addi_t0,t0,24
+ la_a0 &proc_pos
+ st_t0,a0,0
+ la_br &df_skip_nl_before_lbrace
+ b
+
+:df_require_lbrace
+ # expect LBRACE
+ li_a2 TOK_LBRACE
+ la_br &err_bad_directive
+ bne_a1,a2
+
+ # advance past '{'
+ addi_t0,t0,24
+ la_a0 &proc_pos
+ st_t0,a0,0
+
+ # df_index = 0
+ li_a0 %0 %0
+ la_a1 &df_index
+ st_a0,a1,0
+
+## field loop: skip comma/newline separators, stop at '}', else consume a WORD.
+:df_field_loop
+ la_a0 &proc_pos
+ ld_t0,a0,0
+ la_a1 &source_end
+ ld_t1,a1,0
+ la_br &err_unterminated_directive
+ beq_t0,t1
+ ld_a1,t0,0
+
+ # separator: COMMA or NEWLINE -> advance and reloop
+ li_a2 TOK_COMMA
+ la_br &df_field_skip_sep
+ beq_a1,a2
+ li_a2 TOK_NEWLINE
+ la_br &df_field_skip_sep
+ beq_a1,a2
+
+ # end-of-list marker '}' -> break
+ li_a2 TOK_RBRACE
+ la_br &df_fields_done
+ beq_a1,a2
+
+ # else must be a WORD
+ li_a2 TOK_WORD
+ la_br &err_bad_directive
+ bne_a1,a2
+
+ # df_suffix_ptr = tok.text_ptr; df_suffix_len = tok.text_len
+ ld_a2,t0,8
+ la_a3 &df_suffix_ptr
+ st_a2,a3,0
+ ld_a2,t0,16
+ la_a3 &df_suffix_len
+ st_a2,a3,0
+
+ # df_value = df_index * df_stride
+ la_a0 &df_index
+ ld_t1,a0,0
+ la_a0 &df_stride
+ ld_t2,a0,0
+ mul_a0,t1,t2
+ la_a1 &df_value
+ st_a0,a1,0
+
+ # synthesize the field macro
+ la_br &df_emit_field
+ call
+
+ # df_index++
+ la_a0 &df_index
+ ld_t1,a0,0
+ addi_t1,t1,1
+ st_t1,a0,0
+
+ # advance past the field word
+ la_a0 &proc_pos
+ ld_t0,a0,0
+ addi_t0,t0,24
+ st_t0,a0,0
+ la_br &df_field_loop
+ b
+
+:df_field_skip_sep
+ addi_t0,t0,24
+ la_a0 &proc_pos
+ st_t0,a0,0
+ la_br &df_field_loop
+ b
+
+:df_fields_done
+ # advance past '}'
+ addi_t0,t0,24
+ la_a0 &proc_pos
+ st_t0,a0,0
+
+ # ---- emit totalizer: df_suffix <- df_total_name; df_value = N * stride ----
+ la_a0 &df_total_name_ptr
+ ld_t0,a0,0
+ la_a1 &df_suffix_ptr
+ st_t0,a1,0
+ la_a0 &df_total_name_len
+ ld_t0,a0,0
+ la_a1 &df_suffix_len
+ st_t0,a1,0
+
+ la_a0 &df_index
+ ld_t1,a0,0
+ la_a0 &df_stride
+ ld_t2,a0,0
+ mul_a0,t1,t2
+ la_a1 &df_value
+ st_a0,a1,0
+
+ la_br &df_emit_field
+ call
+
+ # consume tokens through the first trailing NEWLINE (tolerate EOF)
+:df_skip_trailing_loop
+ la_a0 &proc_pos
+ ld_t0,a0,0
+ la_a1 &source_end
+ ld_t1,a1,0
+ la_br &df_finish
+ beq_t0,t1
+ ld_a1,t0,0
+ li_a2 TOK_NEWLINE
+ addi_t0,t0,24
+ la_a0 &proc_pos
+ st_t0,a0,0
+ la_br &df_finish
+ beq_a1,a2
+ la_br &df_skip_trailing_loop
+ b
+
+:df_finish
+ la_a0 &proc_line_start
+ li_a1 %1 %0
+ st_a1,a0,0
+ leave
+ ret
+
+## df_emit_field(): read df_base_*, df_suffix_*, df_value from BSS; synthesize
+## one macro record + one body token. Builds the "NAME.field" identifier in
+## df_name_scratch and the decimal body text via df_render_decimal, then
+## copies both into text_buf via append_text so they outlive the scratch.
+:df_emit_field
+ enter_0
+
+ # macros_end capacity check
+ la_a0 ¯os_end
+ ld_t0,a0,0
+ la_a1 ¯os
+ li_a2 M1PP_MACROS_CAP
+ add_a1,a1,a2
+ la_br &err_too_many_macros
+ beq_t0,a1
+
+ # ---- assemble "BASE.SUFFIX" into df_name_scratch ----
+ # copy base bytes
+ la_a0 &df_base_ptr
+ ld_t0,a0,0
+ la_a0 &df_base_len
+ ld_t1,a0,0
+ la_t2 &df_name_scratch
+ li_a3 %0 %0
+:df_ef_base_loop
+ la_br &df_ef_base_done
+ beq_a3,t1
+ add_a0,t0,a3
+ lb_a0,a0,0
+ add_a1,t2,a3
+ sb_a0,a1,0
+ addi_a3,a3,1
+ la_br &df_ef_base_loop
+ b
+:df_ef_base_done
+ # scratch[base_len] = '.'
+ add_a1,t2,t1
+ li_a0 %46 %0
+ sb_a0,a1,0
+
+ # copy suffix bytes into scratch[base_len + 1 ..]
+ la_a0 &df_suffix_ptr
+ ld_t0,a0,0
+ la_a0 &df_suffix_len
+ ld_t1,a0,0
+ addi_a1,a1,1
+ li_a3 %0 %0
+:df_ef_suffix_loop
+ la_br &df_ef_suffix_done
+ beq_a3,t1
+ add_a0,t0,a3
+ lb_a0,a0,0
+ add_a2,a1,a3
+ sb_a0,a2,0
+ addi_a3,a3,1
+ la_br &df_ef_suffix_loop
+ b
+:df_ef_suffix_done
+
+ # name_len = base_len + 1 + suffix_len
+ la_a0 &df_base_len
+ ld_t0,a0,0
+ la_a0 &df_suffix_len
+ ld_t1,a0,0
+ add_t0,t0,t1
+ addi_t0,t0,1
+ la_a1 &df_name_len
+ st_t0,a1,0
+
+ # durable_name = append_text(&df_name_scratch, name_len)
+ la_a0 &df_name_scratch
+ mov_a1,t0
+ la_br &append_text
+ call
+ # a0 = durable_name ptr
+
+ # m = macros_end; m->name.ptr = durable_name; m->name.len = name_len
+ la_a1 ¯os_end
+ ld_t2,a1,0
+ st_a0,t2,0
+ la_a0 &df_name_len
+ ld_a0,a0,0
+ st_a0,t2,8
+
+ # m->param_count = 0 (params[] left zeroed; not read when count == 0)
+ li_a0 %0 %0
+ st_a0,t2,16
+
+ # render df_value into df_digit_scratch (reverse fill)
+ la_br &df_render_decimal
+ call
+
+ # durable_digits = append_text(&df_digit_cursor, df_digit_count)
+ la_a0 &df_digit_cursor
+ ld_a0,a0,0
+ la_a1 &df_digit_count
+ ld_a1,a1,0
+ la_br &append_text
+ call
+ # a0 = durable_digits
+
+ # macro_body_end capacity check
+ la_a1 ¯o_body_end
+ ld_t0,a1,0
+ la_a2 ¯o_body_tokens
+ sub_a3,t0,a2
+ addi_a3,a3,24
+ li_t2 M1PP_MACRO_BODY_CAP
+ la_br &err_macro_body_overflow
+ blt_t2,a3
+
+ # body_tok = TOK_WORD { durable_digits, df_digit_count }
+ li_a1 TOK_WORD
+ st_a1,t0,0
+ st_a0,t0,8
+ la_a2 &df_digit_count
+ ld_a2,a2,0
+ st_a2,t0,16
+
+ # m->body_start = macro_body_end (the slot we just wrote)
+ la_a0 ¯os_end
+ ld_t2,a0,0
+ li_a1 M1PP_MACRO_BODY_START_OFF
+ add_a1,t2,a1
+ st_t0,a1,0
+
+ # macro_body_end += 24
+ addi_t0,t0,24
+ la_a1 ¯o_body_end
+ st_t0,a1,0
+
+ # m->body_end = macro_body_end
+ li_a1 M1PP_MACRO_BODY_END_OFF
+ add_a1,t2,a1
+ st_t0,a1,0
+
+ # macros_end += MACRO_RECORD_SIZE
+ li_a0 M1PP_MACRO_RECORD_SIZE
+ add_t2,t2,a0
+ la_a1 ¯os_end
+ st_t2,a1,0
+
+ leave
+ ret
+
+## df_render_decimal(): reads df_value; writes a reverse-filled decimal
+## rendering into df_digit_scratch[cursor..end) and stores df_digit_count +
+## df_digit_cursor for a subsequent append_text call. Leaf.
+:df_render_decimal
+ la_a0 &df_value
+ ld_t0,a0,0
+ la_t1 &df_digit_scratch
+ li_a2 %24 %0
+ add_t1,t1,a2
+ mov_t2,t1
+
+ # special-case v == 0 -> single '0'
+ la_br &df_rd_loop
+ bnez_t0
+ addi_t2,t2,neg1
+ li_a0 %48 %0
+ sb_a0,t2,0
+ la_br &df_rd_done
+ b
+:df_rd_loop
+ la_br &df_rd_done
+ beqz_t0
+ mov_a0,t0
+ li_a1 %10 %0
+ rem_a2,a0,a1
+ addi_a2,a2,48
+ addi_t2,t2,neg1
+ sb_a2,t2,0
+ mov_a0,t0
+ li_a1 %10 %0
+ div_a0,a0,a1
+ mov_t0,a0
+ la_br &df_rd_loop
+ b
+:df_rd_done
+ la_a1 &df_digit_scratch
+ li_a2 %24 %0
+ add_a1,a1,a2
+ sub_a0,a1,t2
+ la_a1 &df_digit_count
+ st_a0,a1,0
+ la_a1 &df_digit_cursor
+ st_t2,a1,0
+ ret
+
## ============================================================================
## --- Stream stack + expansion-pool lifetime ---------------------------------
## ============================================================================
@@ -4882,6 +5344,16 @@ DEFINE EXPR_INVALID 1200000000000000
li_a1 %17 %0
la_br &fatal
b
+:err_bad_directive
+ la_a0 &msg_bad_directive
+ li_a1 %27 %0
+ la_br &fatal
+ b
+:err_unterminated_directive
+ la_a0 &msg_unterminated_directive
+ li_a1 %36 %0
+ la_br &fatal
+ b
## fatal(a0=msg_ptr, a1=msg_len): writes "m1pp: <msg>\n" to stderr, exits 1.
## Saves args across the three syscalls since a0..a3 are caller-saved.
@@ -4936,6 +5408,10 @@ DEFINE EXPR_INVALID 1200000000000000
:const_dlr "$"
:const_select "%select"
:const_str "%str"
+:const_struct "%struct"
+:const_enum "%enum"
+:const_size "SIZE"
+:const_count "COUNT"
## Operator strings for expr_op_code. Each is a raw byte literal; lengths
## are passed separately to tok_eq_const. "<=" must be tested before "<"
@@ -4980,6 +5456,8 @@ DEFINE EXPR_INVALID 1200000000000000
:msg_macro_body_overflow "macro body overflow"
:msg_not_implemented "not implemented"
:msg_unbalanced_braces "unbalanced braces"
+:msg_bad_directive "bad %struct/%enum directive"
+:msg_unterminated_directive "unterminated %struct/%enum directive"
## --- BSS ---------------------------------------------------------------------
## Placed before :ELF_end so filesz/memsz (which this ELF header sets equal)
@@ -5167,6 +5645,54 @@ ZERO8 ZERO8 ZERO8
:local_label_scratch
ZERO32 ZERO32 ZERO32 ZERO32
+## %struct / %enum scratch (§5, §6). define_fielded calls append_text twice
+## per synthesized macro, so every piece of state that must survive a call
+## lives here rather than in a register.
+## df_stride — 8 for %struct, 1 for %enum
+## df_total_name_ptr/_len — "SIZE" (4) for struct, "COUNT" (5) for enum
+## df_base_ptr/_len — directive's NAME token span
+## df_index — running field index, 0..N
+## df_suffix_ptr/_len — current synthesized field suffix
+## df_value — index * stride for this macro's body
+## df_name_len — base_len + 1 + suffix_len
+## df_digit_count/_cursor — df_render_decimal output
+:df_stride
+ZERO8
+:df_total_name_ptr
+ZERO8
+:df_total_name_len
+ZERO8
+:df_base_ptr
+ZERO8
+:df_base_len
+ZERO8
+:df_index
+ZERO8
+:df_suffix_ptr
+ZERO8
+:df_suffix_len
+ZERO8
+:df_value
+ZERO8
+:df_name_len
+ZERO8
+:df_digit_count
+ZERO8
+:df_digit_cursor
+ZERO8
+
+## df_name_scratch: 256-byte working buffer for "BASE.SUFFIX" before
+## append_text copies it to text_buf. 256 B matches paste_scratch /
+## ebc_str_scratch; df_emit_field asserts nothing explicit, but realistic
+## struct/enum names stay well under 128 chars.
+:df_name_scratch
+ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32 ZERO32
+
+## df_digit_scratch: 24-byte reverse-fill buffer for the decimal rendering
+## of df_value (any u64 fits).
+:df_digit_scratch
+ZERO8 ZERO8 ZERO8
+
:fp_macro
ZERO8
:fp_tok