boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs

commit f0abcca8fab5acb18dbfbfcea898ffc3bdc6d176
parent c40935b691eabb80b07e5bb9d65d5d86c402dbe1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 11:03:01 -0700

Use !@%$ vs %le32/le64 in m1pp

Diffstat:
Mdocs/M1M-IMPL.md | 12++++++------
Mdocs/M1M-P1-PORT.md | 10+++++-----
Mp1/aarch64.M1M | 68++++++++++++++++++++++++++++++++++----------------------------------
Msrc/m1macro.c | 26+++++++++++++++++---------
Mtests/m1m/full-parity.M1M | 4++--
5 files changed, 64 insertions(+), 56 deletions(-)

diff --git a/docs/M1M-IMPL.md b/docs/M1M-IMPL.md @@ -11,7 +11,7 @@ The target expander supports the features required by `p1/*.M1M`: - `%macro NAME(a, b)` / `%endm` - `%NAME(x, y)` function-like expansion with recursive rescanning - `##` token paste -- `%le32(expr)` / `%le64(expr)` +- `!(expr)` / `@(expr)` / `%(expr)` / `$(expr)` - `%select(cond, then, else)` - Lisp-shaped integer expressions used by the builtins @@ -49,7 +49,7 @@ The program should be structured as a small compiler pipeline: result as a new stream for recursive rescanning. 6. Builtins - `%le32(expr)` and `%le64(expr)` evaluate integer expressions and emit + `%(expr)` and `$(expr)` evaluate integer expressions and emit one generated token directly. `%select(cond, then, else)` evaluates `cond` first, then chooses exactly one of `then` or `else`, copies only that chosen token range @@ -126,7 +126,7 @@ indices. That keeps stream and argument walking simple in P1: advance by one token record, compare pointers, no repeated `base + index << 5`. Source token spans point into `input_buf`. `text_buf` is reserved for -synthesized token text such as `##` pastes and `%le32`/`%le64` output. +synthesized token text such as `##` pastes and `!@%$` output. ### Bottom-Up Helper Layers @@ -310,13 +310,13 @@ main output stream. expand_builtin_call(stream_ptr, builtin_tok) ``` -`%le32` / `%le64`: +`!@%$` ```text parse args require one arg value = eval_expr_range(arg_start, arg_end) -emit_hex_value(value, 4 or 8) +emit_hex_value(value, 1 2 4 or 8) advance stream pos to call_end_pos line_start = 0 ``` @@ -388,7 +388,7 @@ process_tokens: operands, and pasted-parameter single-token validation. 6. Add integer atom parsing and explicit-stack expression evaluation. Test arithmetic and comparison expressions without macros. -7. Add `%le32` and `%le64`. +7. Add `!@%$(` 8. Add macro expansion inside expressions. This is required for `p1/aarch64.M1M`. 9. Add `%select`. diff --git a/docs/M1M-P1-PORT.md b/docs/M1M-P1-PORT.md @@ -31,7 +31,7 @@ instruction encodings: - `%macro NAME(a, b)` / `%endm` - `%NAME(x, y)` function-like expansion with recursive rescanning - `##` token paste -- `%le32(expr)` / `%le64(expr)` +- `!(expr)` / `@(expr)` / `%(expr)` / `$(expr)` - `%select(cond, then, else)` - Lisp-shaped integer expressions used by the builtins @@ -98,7 +98,7 @@ applicable. pass-through fixture output. - [x] Add `tests/m1m/full-parity.M1M` and its C-oracle expected output as the real expansion parity target. This fixture intentionally uses macro calls, - recursive rescanning, paste, `%le32`, `%le64`, and `%select`; it is expected + recursive rescanning, paste, `!@%$(` and `%select`; it is expected to fail under the partial P1 implementation until the remaining unchecked expansion tasks land. - [ ] Replace structural `%macro` skipping with real macro table storage: @@ -114,7 +114,7 @@ applicable. failures. - [ ] Port integer atom parsing and S-expression evaluation for arithmetic, comparisons, shifts, and bitwise operators. -- [ ] Implement `%le32(expr)` and `%le64(expr)` on top of expression +- [ ] Implement `!@%$(expr)` on top of expression evaluation and token emission. - [ ] Implement `%select(cond, then, else)` on top of expression evaluation and stream pushback. @@ -170,7 +170,7 @@ applicable. 8. **Builtins** - Implement `%le32`, `%le64`, and `%select` on top of the expression evaluator + Implement `!@%$(` and `%select` on top of the expression evaluator and stream pushback. 9. **Cleanup and limits** @@ -231,7 +231,7 @@ Minimum checks: ``` src/m1macro.c oracle: p1/aarch64.M1M src/m1macro.c oracle: p1/P1.M1M - custom fixture: paste, recursive rescanning, %le32/%le64, %select + custom fixture: paste, recursive rescanning, !@%%(, %select malformed fixtures: bad paste, wrong arg count, bad expression ``` diff --git a/p1/aarch64.M1M b/p1/aarch64.M1M @@ -1,7 +1,7 @@ # aarch64.M1M -- P1v2 aarch64 backend expressed in m1macro. # # This mirrors p1/aarch64.py using the m1macro integer builtins: -# %le32(sexpr), %le64(sexpr), and %select(cond, then, else). +# %(sexpr), $(sexpr), and %select(cond, then, else). # ---- Native register numbers -------------------------------------------- @@ -126,15 +126,15 @@ # ---- Low-level instruction encoders -------------------------------------- %macro aa64_rrr(base, rd, ra, rb) -%le32((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) %endm %macro aa64_add_imm(rd, ra, imm12) -%le32((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) %endm %macro aa64_sub_imm(rd, ra, imm12) -%le32((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) %endm %macro aa64_mov_rr(dst, src) @@ -142,23 +142,23 @@ %aa64_add_imm(sp, src, 0), %select((= %aa64_is_sp(src) 1), %aa64_add_imm(dst, sp, 0), - %le32((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst))))) + %((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst))))) %endm %macro aa64_ubfm(rd, ra, immr, imms) -%le32((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) %endm %macro aa64_sbfm(rd, ra, immr, imms) -%le32((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) %endm %macro aa64_movz(rd, imm16) -%le32((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) +%((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) %endm %macro aa64_movn(rd, imm16) -%le32((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) +%((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd))) %endm %macro aa64_materialize_small_imm(rd, imm) @@ -168,11 +168,11 @@ %endm %macro aa64_ldst_uimm12(base, rt, rn, off_bytes, size_log2) -%le32((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) +%((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) %endm %macro aa64_ldst_unscaled(base, rt, rn, off) -%le32((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) +%((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt))) %endm %macro aa64_mem_uimm_base_LD() @@ -257,25 +257,25 @@ %endm %macro aa64_cmp_skip(cond, ra, rb) -%le32((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31)) -%le32((| 0x54000040 cond)) +%((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31)) +%((| 0x54000040 cond)) %endm %macro aa64_br(reg) -%le32((| 0xD61F0000 (<< %aa64_reg(reg) 5))) +%((| 0xD61F0000 (<< %aa64_reg(reg) 5))) %endm %macro aa64_blr(reg) -%le32((| 0xD63F0000 (<< %aa64_reg(reg) 5))) +%((| 0xD63F0000 (<< %aa64_reg(reg) 5))) %endm %macro aa64_ret() -%le32(0xD65F03C0) +%(0xD65F03C0) %endm %macro aa64_lit64_prefix(rd) -%le32((| 0x58000040 %aa64_reg(rd))) -%le32(0x14000002) +%((| 0x58000040 %aa64_reg(rd))) +%(0x14000002) %endm # ---- P1 register-register op lowering ----------------------------------- @@ -308,11 +308,11 @@ %aa64_rrr(0x9AC00C00, rd, ra, rb) %endm %macro aa64_rrr_MUL(rd, ra, rb) -%le32((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) +%((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd))) %endm %macro aa64_rrr_REM(rd, ra, rb) -%le32((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch))) -%le32((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd))) +%((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch))) +%((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd))) %endm %macro aa64_rrr_op(op, rd, ra, rb) @@ -434,16 +434,16 @@ %endm %macro p1_condbz_BEQZ(ra) -%le32((| 0xB5000000 (<< 2 5) %aa64_reg(ra))) +%((| 0xB5000000 (<< 2 5) %aa64_reg(ra))) %aa64_br(br) %endm %macro p1_condbz_BNEZ(ra) -%le32((| 0xB4000000 (<< 2 5) %aa64_reg(ra))) +%((| 0xB4000000 (<< 2 5) %aa64_reg(ra))) %aa64_br(br) %endm %macro p1_condbz_BLTZ(ra) -%le32((| 0xEB1F001F (<< %aa64_reg(ra) 5))) -%le32((| 0x54000040 10)) +%((| 0xEB1F001F (<< %aa64_reg(ra) 5))) +%((| 0x54000040 10)) %aa64_br(br) %endm %macro p1_condbz(op, ra) @@ -468,7 +468,7 @@ %aa64_mov_rr(a3, t0) %aa64_mov_rr(x4, s0) %aa64_mov_rr(x5, s1) -%le32(0xD4000001) +%(0xD4000001) %aa64_mov_rr(a1, save0) %aa64_mov_rr(a2, save1) %aa64_mov_rr(a3, save2) @@ -477,26 +477,26 @@ # ---- Linux syscall number data words ------------------------------------- %macro p1_sys_read() -%le64(63) +$(63) %endm %macro p1_sys_write() -%le64(64) +$(64) %endm %macro p1_sys_close() -%le64(57) +$(57) %endm %macro p1_sys_openat() -%le64(56) +$(56) %endm %macro p1_sys_exit() -%le64(93) +$(93) %endm %macro p1_sys_clone() -%le64(220) +$(220) %endm %macro p1_sys_execve() -%le64(221) +$(221) %endm %macro p1_sys_waitid() -%le64(95) +$(95) %endm diff --git a/src/m1macro.c b/src/m1macro.c @@ -8,8 +8,10 @@ * * %NAME(x, y) function-like macro call * ## token pasting inside macro bodies - * %le32(expr) evaluate an integer S-expression, emit LE 32-bit hex - * %le64(expr) evaluate an integer S-expression, emit LE 64-bit hex + * !(expr) evaluate an integer S-expression, emit LE 8-bit hex + * @(expr) evaluate an integer S-expression, emit LE 16-bit hex + * %(expr) evaluate an integer S-expression, emit LE 32-bit hex + * $(expr) evaluate an integer S-expression, emit LE 64-bit hex * %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e * * Expression syntax is intentionally Lisp-shaped: @@ -31,7 +33,7 @@ * -> define_macro(): consume header + body tokens into macros[] and * macro_body_tokens[]; register name and param list. * - * %le32(e) / %le64(e) / %select(c,t,e) + * !(e) / @(e) / %(e) / $(e) / %select(c,t,e) * -> expand_builtin_call(): parse arg spans, eval S-expression(s) via * eval_expr_range(), emit LE hex or push the chosen token span. * @@ -778,7 +780,7 @@ static enum ExprOp expr_op_code(const struct Token *tok) if (token_text_eq(tok, "|")) { return EXPR_OR; } - if (token_text_eq(tok, "^")) { + if (token_text_eq(tok, "$")) { return EXPR_XOR; } if (token_text_eq(tok, "~")) { @@ -1098,9 +1100,11 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok) return 0; } - if (token_text_eq(tok, "%le32") || token_text_eq(tok, "%le64")) { + if (token_text_eq(tok, "!") || token_text_eq(tok, "@") || + token_text_eq(tok, "%") || token_text_eq(tok, "$")) { struct TokenSpan arg; struct Token *end_pos; + int bytes; if (arg_count != 1) { return fail("bad builtin"); @@ -1113,8 +1117,10 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok) } s->pos = end_pos; s->line_start = 0; - return emit_hex_value((unsigned long long)value, - token_text_eq(tok, "%le32") ? 4 : 8); + bytes = token_text_eq(tok, "!") ? 1 : + token_text_eq(tok, "@") ? 2 : + token_text_eq(tok, "%") ? 4 : 8; + return emit_hex_value((unsigned long long)value, bytes); } if (token_text_eq(tok, "%select")) { @@ -1205,8 +1211,10 @@ static int process_tokens(void) if (tok->kind == TOK_WORD && tok + 1 < s->end && (tok + 1)->kind == TOK_LPAREN && - (token_text_eq(tok, "%le32") || - token_text_eq(tok, "%le64") || + (token_text_eq(tok, "!") || + token_text_eq(tok, "@") || + token_text_eq(tok, "%") || + token_text_eq(tok, "$") || token_text_eq(tok, "%select"))) { if (!expand_builtin_call(s, tok)) { return 0; diff --git a/tests/m1m/full-parity.M1M b/tests/m1m/full-parity.M1M @@ -18,8 +18,8 @@ x %PASTE(HELLO, _WORLD) %CHAIN(recursed) -%le32((+ 1 (<< 2 8))) -%le64((| 0x1122 (<< 0x33 16))) +%((+ 1 (<< 2 8))) +$((| 0x1122 (<< 0x33 16))) %CHOOSE((= 4 4), selected_true, selected_false) %CHOOSE((!= 4 4), selected_bad, selected_else) END