boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit 87d0cf252b91a60d2736c898ed9e5a08fa24c09a
parent dd82213eb8aa8db05b2bfaddfde3317d69f80d86
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 18:22:26 -0700

m1pp: add %struct and %enum directives

Implements §5 and §6 of docs/M1PP-EXT.md. Both share define_fielded(),
parameterized by stride and totalizer name:

  %struct NAME { f1 f2 ... }   ->  NAME.field_k = k*8,  NAME.SIZE  = N*8
  %enum   NAME { l1 l2 ... }   ->  NAME.label_k = k,    NAME.COUNT = N

Each synthesized field expands as a zero-parameter macro, consumed via
paren-less access (§4) at use sites. Two fixtures cover the 4-field
struct + %frame wrapper composition and a 7-label enum with a
%(=) compile-time COUNT sanity check.

Diffstat:
Mm1pp/m1pp.c | 168+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/m1pp/15-struct.M1pp | 41+++++++++++++++++++++++++++++++++++++++++
Atests/m1pp/15-struct.expected | 39+++++++++++++++++++++++++++++++++++++++
Atests/m1pp/16-enum.M1pp | 34++++++++++++++++++++++++++++++++++
Atests/m1pp/16-enum.expected | 32++++++++++++++++++++++++++++++++
5 files changed, 314 insertions(+), 0 deletions(-)

diff --git a/m1pp/m1pp.c b/m1pp/m1pp.c @@ -6,6 +6,9 @@ * ... body ... * %endm * + * %struct NAME { f1 f2 ... } fixed-layout 8-byte-field aggregate + * %enum NAME { l1 l2 ... } incrementing integer constants + * * %NAME(x, y) function-like macro call * ## token pasting inside macro bodies * !(expr) evaluate an integer S-expression, emit LE 8-bit hex @@ -482,6 +485,153 @@ static void skip_expr_newlines(struct Token **pos, struct Token *end) } } +static int emit_decimal_text(long long value, struct TextSpan *out) +{ + /* Render a non-negative integer as decimal into text_buf and + * return the span. No snprintf; plain reverse-fill. */ + char digits[24]; + int digit_count = 0; + long long v = value; + int start; + int i; + + if (v < 0) { + return fail("bad directive"); + } + if (v == 0) { + digits[digit_count++] = '0'; + } else { + while (v > 0) { + digits[digit_count++] = (char)('0' + (v % 10)); + v /= 10; + } + } + + if (text_used + digit_count + 1 > MAX_TEXT) { + return fail("text overflow"); + } + start = text_used; + for (i = digit_count - 1; i >= 0; i--) { + text_buf[text_used++] = digits[i]; + } + text_buf[text_used++] = '\0'; + out->ptr = text_buf + start; + out->len = digit_count; + return 1; +} + +static int emit_dotted_name(struct TextSpan base, const char *suffix, + int suffix_len, struct TextSpan *out) +{ + int total = base.len + 1 + suffix_len; + int start; + + if (text_used + total + 1 > MAX_TEXT) { + return fail("text overflow"); + } + start = text_used; + memcpy(text_buf + text_used, base.ptr, (size_t)base.len); + text_used += base.len; + text_buf[text_used++] = '.'; + memcpy(text_buf + text_used, suffix, (size_t)suffix_len); + text_used += suffix_len; + text_buf[text_used++] = '\0'; + out->ptr = text_buf + start; + out->len = total; + return 1; +} + +static int define_fielded_macro(struct TextSpan base, const char *suffix, + int suffix_len, long long value) +{ + struct Macro *m; + struct Token body_tok; + + if (macro_count >= MAX_MACROS) { + return fail("too many macros"); + } + if (macro_body_used >= MAX_MACRO_BODY_TOKENS) { + return fail("macro body overflow"); + } + m = &macros[macro_count]; + memset(m, 0, sizeof(*m)); + if (!emit_dotted_name(base, suffix, suffix_len, &m->name)) { + return 0; + } + m->param_count = 0; + body_tok.kind = TOK_WORD; + if (!emit_decimal_text(value, &body_tok.text)) { + return 0; + } + m->body_start = macro_body_tokens + macro_body_used; + macro_body_tokens[macro_body_used++] = body_tok; + m->body_end = macro_body_tokens + macro_body_used; + macro_count++; + return 1; +} + +static int define_fielded(struct Stream *s, long long stride, + const char *total_name, int total_name_len) +{ + /* Parses `%struct NAME { f1 f2 ... }` or `%enum NAME { ... }` and + * synthesizes N+1 zero-parameter macros: + * NAME.field_k -> k * stride + * NAME.<total> -> N * stride (SIZE for struct, COUNT for enum) */ + struct TextSpan base; + long long index = 0; + + s->pos++; + if (s->pos >= s->end || s->pos->kind != TOK_WORD) { + return fail("bad directive"); + } + base = s->pos->text; + s->pos++; + + while (s->pos < s->end && s->pos->kind == TOK_NEWLINE) { + s->pos++; + } + if (s->pos >= s->end || s->pos->kind != TOK_LBRACE) { + return fail("bad directive"); + } + s->pos++; + + for (;;) { + while (s->pos < s->end && + (s->pos->kind == TOK_COMMA || s->pos->kind == TOK_NEWLINE)) { + s->pos++; + } + if (s->pos >= s->end) { + return fail("unterminated directive"); + } + if (s->pos->kind == TOK_RBRACE) { + s->pos++; + break; + } + if (s->pos->kind != TOK_WORD) { + return fail("bad directive"); + } + if (!define_fielded_macro(base, s->pos->text.ptr, s->pos->text.len, + index * stride)) { + return 0; + } + s->pos++; + index++; + } + + if (!define_fielded_macro(base, total_name, total_name_len, index * stride)) { + return 0; + } + + while (s->pos < s->end && s->pos->kind != TOK_NEWLINE) { + s->pos++; + } + if (s->pos < s->end && s->pos->kind == TOK_NEWLINE) { + s->pos++; + } + s->line_start = 1; + return 1; +} + static int define_macro(struct Stream *s) { struct Macro *m; @@ -1423,6 +1573,24 @@ static int process_tokens(void) continue; } + if (s->line_start && + tok->kind == TOK_WORD && + token_text_eq(tok, "%struct")) { + if (!define_fielded(s, 8, "SIZE", 4)) { + return 0; + } + continue; + } + + if (s->line_start && + tok->kind == TOK_WORD && + token_text_eq(tok, "%enum")) { + if (!define_fielded(s, 1, "COUNT", 5)) { + return 0; + } + continue; + } + if (tok->kind == TOK_NEWLINE) { s->pos++; s->line_start = 1; diff --git a/tests/m1pp/15-struct.M1pp b/tests/m1pp/15-struct.M1pp @@ -0,0 +1,41 @@ +# %struct directive (M1PP-EXT §5): +# - %struct NAME { f1 f2 ... } synthesizes N+1 zero-parameter macros: +# NAME.field_k -> k*8 (decimal word) +# NAME.SIZE -> N*8 +# - paren-less access (§4) is the natural read form: %closure.body +# - composes via a plain wrapper macro using %frame_hdr.SIZE for stack- +# frame layouts + +%struct closure { hdr params body env } + +# Paren-less access to each field and to SIZE. +%closure.hdr +%closure.params +%closure.body +%closure.env +%closure.SIZE + +# With parens still works (§4 parity). +%closure.body() + +# Inside an expression atom: loads 16+100 = 116 -> 0x74. +%((+ %closure.body 100)) + +# Compose-and-add path: %frame adds a 16-byte header prefix to every +# %frame_apply.* offset. Exercises the paren-less atom inside %(...). +%struct frame_hdr { retaddr caller_sp } + +%macro frame(field) +%((+ field %frame_hdr.SIZE)) +%endm + +%struct frame_apply { callee args body env } +%frame(%frame_apply.callee) +%frame(%frame_apply.args) +%frame(%frame_apply.body) +%frame(%frame_apply.env) + +# Total frame size for an enter/leave pair. +%frame_apply.SIZE +%frame_hdr.SIZE +END diff --git a/tests/m1pp/15-struct.expected b/tests/m1pp/15-struct.expected @@ -0,0 +1,39 @@ + + + + + + + + + + +0 +8 +16 +24 +32 + + +16 + + +'74000000' + + + + + +'10000000' + +'18000000' + +'20000000' + +'28000000' + + + +32 +16 +END diff --git a/tests/m1pp/16-enum.M1pp b/tests/m1pp/16-enum.M1pp @@ -0,0 +1,34 @@ +# %enum directive (M1PP-EXT §6): +# - %enum NAME { l1 l2 ... } synthesizes N+1 zero-parameter macros: +# NAME.label_k -> k +# NAME.COUNT -> N +# - same directive-handler as %struct, just stride=1 and totalizer=COUNT +# - compile-time COUNT check with %(=) proves the totalizer ties to the +# label count + +%enum tag { fixnum pair vector string symbol proc singleton } + +# Paren-less access to each label. +%tag.fixnum +%tag.pair +%tag.vector +%tag.string +%tag.symbol +%tag.proc +%tag.singleton + +# Totalizer. +%tag.COUNT + +# Compile-time sanity: COUNT must equal 7. +%((= %tag.COUNT 7)) + +# An enum value as an LI-immediate slot via the 4-byte emitter. +%((+ %tag.pair 0)) + +# Second enum, proves independent naming. +%enum prim_id { add sub mul div mod } +%prim_id.add +%prim_id.mul +%prim_id.COUNT +END diff --git a/tests/m1pp/16-enum.expected b/tests/m1pp/16-enum.expected @@ -0,0 +1,32 @@ + + + + + + + + + + +0 +1 +2 +3 +4 +5 +6 + + +7 + + +'01000000' + + +'01000000' + + +0 +2 +5 +END