Use !@%$ vs %le32/le64 in m1pp - boot2 - Playing with the boostrap

commit f0abcca8fab5acb18dbfbfcea898ffc3bdc6d176
parent c40935b691eabb80b07e5bb9d65d5d86c402dbe1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 23 Apr 2026 11:03:01 -0700

Use !@%$ vs %le32/le64 in m1pp

Diffstat:
M docs/M1M-IMPL.md  | 12 ++++++------
M docs/M1M-P1-PORT.md  | 10 +++++-----
M p1/aarch64.M1M  | 68 ++++++++++++++++++++++++++++++++++----------------------------------
M src/m1macro.c  | 26 +++++++++++++++++---------
M tests/m1m/full-parity.M1M  | 4 ++--

5 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/docs/M1M-IMPL.md b/docs/M1M-IMPL.md
@@ -11,7 +11,7 @@ The target expander supports the features required by `p1/*.M1M`:
 - `%macro NAME(a, b)` / `%endm`
 - `%NAME(x, y)` function-like expansion with recursive rescanning
 - `##` token paste
-- `%le32(expr)` / `%le64(expr)`
+- `!(expr)` / `@(expr)` / `%(expr)` / `$(expr)`
 - `%select(cond, then, else)`
 - Lisp-shaped integer expressions used by the builtins
 
@@ -49,7 +49,7 @@ The program should be structured as a small compiler pipeline:
    result as a new stream for recursive rescanning.
 
 6. Builtins
-   `%le32(expr)` and `%le64(expr)` evaluate integer expressions and emit
+   `%(expr)` and `$(expr)` evaluate integer expressions and emit
    one generated token directly.
    `%select(cond, then, else)` evaluates `cond` first, then chooses
    exactly one of `then` or `else`, copies only that chosen token range
@@ -126,7 +126,7 @@ indices. That keeps stream and argument walking simple in P1: advance by
 one token record, compare pointers, no repeated `base + index << 5`.
 
 Source token spans point into `input_buf`. `text_buf` is reserved for
-synthesized token text such as `##` pastes and `%le32`/`%le64` output.
+synthesized token text such as `##` pastes and `!@%$` output.
 
 ### Bottom-Up Helper Layers
 
@@ -310,13 +310,13 @@ main output stream.
 expand_builtin_call(stream_ptr, builtin_tok)
 ```
 
-`%le32` / `%le64`:
+`!@%$`
 
 ```text
 parse args
 require one arg
 value = eval_expr_range(arg_start, arg_end)
-emit_hex_value(value, 4 or 8)
+emit_hex_value(value, 1 2 4 or 8)
 advance stream pos to call_end_pos
 line_start = 0
 ```
@@ -388,7 +388,7 @@ process_tokens:
    operands, and pasted-parameter single-token validation.
 6. Add integer atom parsing and explicit-stack expression evaluation.
    Test arithmetic and comparison expressions without macros.
-7. Add `%le32` and `%le64`.
+7. Add `!@%$(`
 8. Add macro expansion inside expressions. This is required for
    `p1/aarch64.M1M`.
 9. Add `%select`.
diff --git a/docs/M1M-P1-PORT.md b/docs/M1M-P1-PORT.md
@@ -31,7 +31,7 @@ instruction encodings:
 - `%macro NAME(a, b)` / `%endm`
 - `%NAME(x, y)` function-like expansion with recursive rescanning
 - `##` token paste
-- `%le32(expr)` / `%le64(expr)`
+- `!(expr)` / `@(expr)` / `%(expr)` / `$(expr)`
 - `%select(cond, then, else)`
 - Lisp-shaped integer expressions used by the builtins
 
@@ -98,7 +98,7 @@ applicable.
   pass-through fixture output.
 - [x] Add `tests/m1m/full-parity.M1M` and its C-oracle expected output as the
   real expansion parity target. This fixture intentionally uses macro calls,
-  recursive rescanning, paste, `%le32`, `%le64`, and `%select`; it is expected
+  recursive rescanning, paste, `!@%$(` and `%select`; it is expected
   to fail under the partial P1 implementation until the remaining unchecked
   expansion tasks land.
 - [ ] Replace structural `%macro` skipping with real macro table storage:
@@ -114,7 +114,7 @@ applicable.
   failures.
 - [ ] Port integer atom parsing and S-expression evaluation for arithmetic,
   comparisons, shifts, and bitwise operators.
-- [ ] Implement `%le32(expr)` and `%le64(expr)` on top of expression
+- [ ] Implement `!@%$(expr)` on top of expression
   evaluation and token emission.
 - [ ] Implement `%select(cond, then, else)` on top of expression evaluation
   and stream pushback.
@@ -170,7 +170,7 @@ applicable.
 
 8. **Builtins**
 
-   Implement `%le32`, `%le64`, and `%select` on top of the expression evaluator
+   Implement `!@%$(` and `%select` on top of the expression evaluator
    and stream pushback.
 
 9. **Cleanup and limits**
@@ -231,7 +231,7 @@ Minimum checks:
    ```
    src/m1macro.c oracle: p1/aarch64.M1M
    src/m1macro.c oracle: p1/P1.M1M
-   custom fixture: paste, recursive rescanning, %le32/%le64, %select
+   custom fixture: paste, recursive rescanning, !@%%(, %select
    malformed fixtures: bad paste, wrong arg count, bad expression
    ```
 
diff --git a/p1/aarch64.M1M b/p1/aarch64.M1M
@@ -1,7 +1,7 @@
 # aarch64.M1M -- P1v2 aarch64 backend expressed in m1macro.
 #
 # This mirrors p1/aarch64.py using the m1macro integer builtins:
-#   %le32(sexpr), %le64(sexpr), and %select(cond, then, else).
+#   %(sexpr), $(sexpr), and %select(cond, then, else).
 
 # ---- Native register numbers --------------------------------------------
 
@@ -126,15 +126,15 @@
 # ---- Low-level instruction encoders --------------------------------------
 
 %macro aa64_rrr(base, rd, ra, rb)
-%le32((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
 %endm
 
 %macro aa64_add_imm(rd, ra, imm12)
-%le32((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
 %endm
 
 %macro aa64_sub_imm(rd, ra, imm12)
-%le32((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
 %endm
 
 %macro aa64_mov_rr(dst, src)
@@ -142,23 +142,23 @@
     %aa64_add_imm(sp, src, 0),
     %select((= %aa64_is_sp(src) 1),
         %aa64_add_imm(dst, sp, 0),
-        %le32((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst)))))
+        %((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst)))))
 %endm
 
 %macro aa64_ubfm(rd, ra, immr, imms)
-%le32((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
 %endm
 
 %macro aa64_sbfm(rd, ra, immr, imms)
-%le32((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
 %endm
 
 %macro aa64_movz(rd, imm16)
-%le32((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd)))
+%((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd)))
 %endm
 
 %macro aa64_movn(rd, imm16)
-%le32((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd)))
+%((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd)))
 %endm
 
 %macro aa64_materialize_small_imm(rd, imm)
@@ -168,11 +168,11 @@
 %endm
 
 %macro aa64_ldst_uimm12(base, rt, rn, off_bytes, size_log2)
-%le32((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt)))
+%((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt)))
 %endm
 
 %macro aa64_ldst_unscaled(base, rt, rn, off)
-%le32((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt)))
+%((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt)))
 %endm
 
 %macro aa64_mem_uimm_base_LD()
@@ -257,25 +257,25 @@
 %endm
 
 %macro aa64_cmp_skip(cond, ra, rb)
-%le32((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31))
-%le32((| 0x54000040 cond))
+%((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31))
+%((| 0x54000040 cond))
 %endm
 
 %macro aa64_br(reg)
-%le32((| 0xD61F0000 (<< %aa64_reg(reg) 5)))
+%((| 0xD61F0000 (<< %aa64_reg(reg) 5)))
 %endm
 
 %macro aa64_blr(reg)
-%le32((| 0xD63F0000 (<< %aa64_reg(reg) 5)))
+%((| 0xD63F0000 (<< %aa64_reg(reg) 5)))
 %endm
 
 %macro aa64_ret()
-%le32(0xD65F03C0)
+%(0xD65F03C0)
 %endm
 
 %macro aa64_lit64_prefix(rd)
-%le32((| 0x58000040 %aa64_reg(rd)))
-%le32(0x14000002)
+%((| 0x58000040 %aa64_reg(rd)))
+%(0x14000002)
 %endm
 
 # ---- P1 register-register op lowering -----------------------------------
@@ -308,11 +308,11 @@
 %aa64_rrr(0x9AC00C00, rd, ra, rb)
 %endm
 %macro aa64_rrr_MUL(rd, ra, rb)
-%le32((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
 %endm
 %macro aa64_rrr_REM(rd, ra, rb)
-%le32((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch)))
-%le32((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd)))
+%((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch)))
+%((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd)))
 %endm
 
 %macro aa64_rrr_op(op, rd, ra, rb)
@@ -434,16 +434,16 @@
 %endm
 
 %macro p1_condbz_BEQZ(ra)
-%le32((| 0xB5000000 (<< 2 5) %aa64_reg(ra)))
+%((| 0xB5000000 (<< 2 5) %aa64_reg(ra)))
 %aa64_br(br)
 %endm
 %macro p1_condbz_BNEZ(ra)
-%le32((| 0xB4000000 (<< 2 5) %aa64_reg(ra)))
+%((| 0xB4000000 (<< 2 5) %aa64_reg(ra)))
 %aa64_br(br)
 %endm
 %macro p1_condbz_BLTZ(ra)
-%le32((| 0xEB1F001F (<< %aa64_reg(ra) 5)))
-%le32((| 0x54000040 10))
+%((| 0xEB1F001F (<< %aa64_reg(ra) 5)))
+%((| 0x54000040 10))
 %aa64_br(br)
 %endm
 %macro p1_condbz(op, ra)
@@ -468,7 +468,7 @@
 %aa64_mov_rr(a3, t0)
 %aa64_mov_rr(x4, s0)
 %aa64_mov_rr(x5, s1)
-%le32(0xD4000001)
+%(0xD4000001)
 %aa64_mov_rr(a1, save0)
 %aa64_mov_rr(a2, save1)
 %aa64_mov_rr(a3, save2)
@@ -477,26 +477,26 @@
 # ---- Linux syscall number data words -------------------------------------
 
 %macro p1_sys_read()
-%le64(63)
+$(63)
 %endm
 %macro p1_sys_write()
-%le64(64)
+$(64)
 %endm
 %macro p1_sys_close()
-%le64(57)
+$(57)
 %endm
 %macro p1_sys_openat()
-%le64(56)
+$(56)
 %endm
 %macro p1_sys_exit()
-%le64(93)
+$(93)
 %endm
 %macro p1_sys_clone()
-%le64(220)
+$(220)
 %endm
 %macro p1_sys_execve()
-%le64(221)
+$(221)
 %endm
 %macro p1_sys_waitid()
-%le64(95)
+$(95)
 %endm
diff --git a/src/m1macro.c b/src/m1macro.c
@@ -8,8 +8,10 @@
  *
  *   %NAME(x, y)      function-like macro call
  *   ##               token pasting inside macro bodies
- *   %le32(expr)      evaluate an integer S-expression, emit LE 32-bit hex
- *   %le64(expr)      evaluate an integer S-expression, emit LE 64-bit hex
+ *   !(expr)          evaluate an integer S-expression, emit LE 8-bit hex
+ *   @(expr)          evaluate an integer S-expression, emit LE 16-bit hex
+ *   %(expr)          evaluate an integer S-expression, emit LE 32-bit hex
+ *   $(expr)          evaluate an integer S-expression, emit LE 64-bit hex
  *   %select(c,t,e)   evaluate condition S-expression; expand t if nonzero else e
  *
  * Expression syntax is intentionally Lisp-shaped:
@@ -31,7 +33,7 @@
  *          -> define_macro(): consume header + body tokens into macros[] and
  *             macro_body_tokens[]; register name and param list.
  *
- *        %le32(e) / %le64(e) / %select(c,t,e)
+ *        !(e) / @(e) / %(e) / $(e) / %select(c,t,e)
  *          -> expand_builtin_call(): parse arg spans, eval S-expression(s) via
  *             eval_expr_range(), emit LE hex or push the chosen token span.
  *
@@ -778,7 +780,7 @@ static enum ExprOp expr_op_code(const struct Token *tok)
     if (token_text_eq(tok, "|")) {
         return EXPR_OR;
     }
-    if (token_text_eq(tok, "^")) {
+    if (token_text_eq(tok, "$")) {
         return EXPR_XOR;
     }
     if (token_text_eq(tok, "~")) {
@@ -1098,9 +1100,11 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok)
         return 0;
     }
 
-    if (token_text_eq(tok, "%le32") || token_text_eq(tok, "%le64")) {
+    if (token_text_eq(tok, "!") || token_text_eq(tok, "@") ||
+        token_text_eq(tok, "%") || token_text_eq(tok, "$")) {
         struct TokenSpan arg;
         struct Token *end_pos;
+        int bytes;
 
         if (arg_count != 1) {
             return fail("bad builtin");
@@ -1113,8 +1117,10 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok)
         }
         s->pos = end_pos;
         s->line_start = 0;
-        return emit_hex_value((unsigned long long)value,
-                              token_text_eq(tok, "%le32") ? 4 : 8);
+        bytes = token_text_eq(tok, "!") ? 1 :
+                token_text_eq(tok, "@") ? 2 :
+                token_text_eq(tok, "%") ? 4 : 8;
+        return emit_hex_value((unsigned long long)value, bytes);
     }
 
     if (token_text_eq(tok, "%select")) {
@@ -1205,8 +1211,10 @@ static int process_tokens(void)
         if (tok->kind == TOK_WORD &&
             tok + 1 < s->end &&
             (tok + 1)->kind == TOK_LPAREN &&
-            (token_text_eq(tok, "%le32") ||
-             token_text_eq(tok, "%le64") ||
+            (token_text_eq(tok, "!") ||
+             token_text_eq(tok, "@") ||
+             token_text_eq(tok, "%") ||
+             token_text_eq(tok, "$") ||
              token_text_eq(tok, "%select"))) {
             if (!expand_builtin_call(s, tok)) {
                 return 0;
diff --git a/tests/m1m/full-parity.M1M b/tests/m1m/full-parity.M1M
@@ -18,8 +18,8 @@ x
 
 %PASTE(HELLO, _WORLD)
 %CHAIN(recursed)
-%le32((+ 1 (<< 2 8)))
-%le64((| 0x1122 (<< 0x33 16)))
+%((+ 1 (<< 2 8)))
+$((| 0x1122 (<< 0x33 16)))
 %CHOOSE((= 4 4), selected_true, selected_false)
 %CHOOSE((!= 4 4), selected_bad, selected_else)
 END

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs

M	docs/M1M-IMPL.md	\|	12	++++++------
M	docs/M1M-P1-PORT.md	\|	10	+++++-----
M	p1/aarch64.M1M	\|	68	++++++++++++++++++++++++++++++++++----------------------------------
M	src/m1macro.c	\|	26	+++++++++++++++++---------
M	tests/m1m/full-parity.M1M	\|	4	++--