commit f0abcca8fab5acb18dbfbfcea898ffc3bdc6d176
parent c40935b691eabb80b07e5bb9d65d5d86c402dbe1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 11:03:01 -0700
Use !@%$ vs %le32/le64 in m1pp
Diffstat:
5 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/docs/M1M-IMPL.md b/docs/M1M-IMPL.md
@@ -11,7 +11,7 @@ The target expander supports the features required by `p1/*.M1M`:
- `%macro NAME(a, b)` / `%endm`
- `%NAME(x, y)` function-like expansion with recursive rescanning
- `##` token paste
-- `%le32(expr)` / `%le64(expr)`
+- `!(expr)` / `@(expr)` / `%(expr)` / `$(expr)`
- `%select(cond, then, else)`
- Lisp-shaped integer expressions used by the builtins
@@ -49,7 +49,7 @@ The program should be structured as a small compiler pipeline:
result as a new stream for recursive rescanning.
6. Builtins
- `%le32(expr)` and `%le64(expr)` evaluate integer expressions and emit
+ `%(expr)` and `$(expr)` evaluate integer expressions and emit
one generated token directly.
`%select(cond, then, else)` evaluates `cond` first, then chooses
exactly one of `then` or `else`, copies only that chosen token range
@@ -126,7 +126,7 @@ indices. That keeps stream and argument walking simple in P1: advance by
one token record, compare pointers, no repeated `base + index << 5`.
Source token spans point into `input_buf`. `text_buf` is reserved for
-synthesized token text such as `##` pastes and `%le32`/`%le64` output.
+synthesized token text such as `##` pastes and `!@%$` output.
### Bottom-Up Helper Layers
@@ -310,13 +310,13 @@ main output stream.
expand_builtin_call(stream_ptr, builtin_tok)
```
-`%le32` / `%le64`:
+`!@%$`
```text
parse args
require one arg
value = eval_expr_range(arg_start, arg_end)
-emit_hex_value(value, 4 or 8)
+emit_hex_value(value, 1 2 4 or 8)
advance stream pos to call_end_pos
line_start = 0
```
@@ -388,7 +388,7 @@ process_tokens:
operands, and pasted-parameter single-token validation.
6. Add integer atom parsing and explicit-stack expression evaluation.
Test arithmetic and comparison expressions without macros.
-7. Add `%le32` and `%le64`.
+7. Add `!@%$(`
8. Add macro expansion inside expressions. This is required for
`p1/aarch64.M1M`.
9. Add `%select`.
diff --git a/docs/M1M-P1-PORT.md b/docs/M1M-P1-PORT.md
@@ -31,7 +31,7 @@ instruction encodings:
- `%macro NAME(a, b)` / `%endm`
- `%NAME(x, y)` function-like expansion with recursive rescanning
- `##` token paste
-- `%le32(expr)` / `%le64(expr)`
+- `!(expr)` / `@(expr)` / `%(expr)` / `$(expr)`
- `%select(cond, then, else)`
- Lisp-shaped integer expressions used by the builtins
@@ -98,7 +98,7 @@ applicable.
pass-through fixture output.
- [x] Add `tests/m1m/full-parity.M1M` and its C-oracle expected output as the
real expansion parity target. This fixture intentionally uses macro calls,
- recursive rescanning, paste, `%le32`, `%le64`, and `%select`; it is expected
+ recursive rescanning, paste, `!@%$(` and `%select`; it is expected
to fail under the partial P1 implementation until the remaining unchecked
expansion tasks land.
- [ ] Replace structural `%macro` skipping with real macro table storage:
@@ -114,7 +114,7 @@ applicable.
failures.
- [ ] Port integer atom parsing and S-expression evaluation for arithmetic,
comparisons, shifts, and bitwise operators.
-- [ ] Implement `%le32(expr)` and `%le64(expr)` on top of expression
+- [ ] Implement `!@%$(expr)` on top of expression
evaluation and token emission.
- [ ] Implement `%select(cond, then, else)` on top of expression evaluation
and stream pushback.
@@ -170,7 +170,7 @@ applicable.
8. **Builtins**
- Implement `%le32`, `%le64`, and `%select` on top of the expression evaluator
+ Implement `!@%$(` and `%select` on top of the expression evaluator
and stream pushback.
9. **Cleanup and limits**
@@ -231,7 +231,7 @@ Minimum checks:
```
src/m1macro.c oracle: p1/aarch64.M1M
src/m1macro.c oracle: p1/P1.M1M
- custom fixture: paste, recursive rescanning, %le32/%le64, %select
+ custom fixture: paste, recursive rescanning, !@%%(, %select
malformed fixtures: bad paste, wrong arg count, bad expression
```
diff --git a/p1/aarch64.M1M b/p1/aarch64.M1M
@@ -1,7 +1,7 @@
# aarch64.M1M -- P1v2 aarch64 backend expressed in m1macro.
#
# This mirrors p1/aarch64.py using the m1macro integer builtins:
-# %le32(sexpr), %le64(sexpr), and %select(cond, then, else).
+# %(sexpr), $(sexpr), and %select(cond, then, else).
# ---- Native register numbers --------------------------------------------
@@ -126,15 +126,15 @@
# ---- Low-level instruction encoders --------------------------------------
%macro aa64_rrr(base, rd, ra, rb)
-%le32((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| base (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
%endm
%macro aa64_add_imm(rd, ra, imm12)
-%le32((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0x91000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
%endm
%macro aa64_sub_imm(rd, ra, imm12)
-%le32((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0xD1000000 (<< (& imm12 0xFFF) 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
%endm
%macro aa64_mov_rr(dst, src)
@@ -142,23 +142,23 @@
%aa64_add_imm(sp, src, 0),
%select((= %aa64_is_sp(src) 1),
%aa64_add_imm(dst, sp, 0),
- %le32((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst)))))
+ %((| 0xAA000000 (<< %aa64_reg(src) 16) (<< 31 5) %aa64_reg(dst)))))
%endm
%macro aa64_ubfm(rd, ra, immr, imms)
-%le32((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0xD3400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
%endm
%macro aa64_sbfm(rd, ra, immr, imms)
-%le32((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0x93400000 (<< immr 16) (<< imms 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
%endm
%macro aa64_movz(rd, imm16)
-%le32((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd)))
+%((| 0xD2800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd)))
%endm
%macro aa64_movn(rd, imm16)
-%le32((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd)))
+%((| 0x92800000 (<< (& imm16 0xFFFF) 5) %aa64_reg(rd)))
%endm
%macro aa64_materialize_small_imm(rd, imm)
@@ -168,11 +168,11 @@
%endm
%macro aa64_ldst_uimm12(base, rt, rn, off_bytes, size_log2)
-%le32((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt)))
+%((| base (<< (>> off_bytes size_log2) 10) (<< %aa64_reg(rn) 5) %aa64_reg(rt)))
%endm
%macro aa64_ldst_unscaled(base, rt, rn, off)
-%le32((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt)))
+%((| base (<< (& off 0x1FF) 12) (<< %aa64_reg(rn) 5) %aa64_reg(rt)))
%endm
%macro aa64_mem_uimm_base_LD()
@@ -257,25 +257,25 @@
%endm
%macro aa64_cmp_skip(cond, ra, rb)
-%le32((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31))
-%le32((| 0x54000040 cond))
+%((| 0xEB000000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) 31))
+%((| 0x54000040 cond))
%endm
%macro aa64_br(reg)
-%le32((| 0xD61F0000 (<< %aa64_reg(reg) 5)))
+%((| 0xD61F0000 (<< %aa64_reg(reg) 5)))
%endm
%macro aa64_blr(reg)
-%le32((| 0xD63F0000 (<< %aa64_reg(reg) 5)))
+%((| 0xD63F0000 (<< %aa64_reg(reg) 5)))
%endm
%macro aa64_ret()
-%le32(0xD65F03C0)
+%(0xD65F03C0)
%endm
%macro aa64_lit64_prefix(rd)
-%le32((| 0x58000040 %aa64_reg(rd)))
-%le32(0x14000002)
+%((| 0x58000040 %aa64_reg(rd)))
+%(0x14000002)
%endm
# ---- P1 register-register op lowering -----------------------------------
@@ -308,11 +308,11 @@
%aa64_rrr(0x9AC00C00, rd, ra, rb)
%endm
%macro aa64_rrr_MUL(rd, ra, rb)
-%le32((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
+%((| 0x9B000000 (<< %aa64_reg(rb) 16) (<< 31 10) (<< %aa64_reg(ra) 5) %aa64_reg(rd)))
%endm
%macro aa64_rrr_REM(rd, ra, rb)
-%le32((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch)))
-%le32((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd)))
+%((| 0x9AC00C00 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 5) %aa64_reg(scratch)))
+%((| 0x9B008000 (<< %aa64_reg(rb) 16) (<< %aa64_reg(ra) 10) (<< %aa64_reg(scratch) 5) %aa64_reg(rd)))
%endm
%macro aa64_rrr_op(op, rd, ra, rb)
@@ -434,16 +434,16 @@
%endm
%macro p1_condbz_BEQZ(ra)
-%le32((| 0xB5000000 (<< 2 5) %aa64_reg(ra)))
+%((| 0xB5000000 (<< 2 5) %aa64_reg(ra)))
%aa64_br(br)
%endm
%macro p1_condbz_BNEZ(ra)
-%le32((| 0xB4000000 (<< 2 5) %aa64_reg(ra)))
+%((| 0xB4000000 (<< 2 5) %aa64_reg(ra)))
%aa64_br(br)
%endm
%macro p1_condbz_BLTZ(ra)
-%le32((| 0xEB1F001F (<< %aa64_reg(ra) 5)))
-%le32((| 0x54000040 10))
+%((| 0xEB1F001F (<< %aa64_reg(ra) 5)))
+%((| 0x54000040 10))
%aa64_br(br)
%endm
%macro p1_condbz(op, ra)
@@ -468,7 +468,7 @@
%aa64_mov_rr(a3, t0)
%aa64_mov_rr(x4, s0)
%aa64_mov_rr(x5, s1)
-%le32(0xD4000001)
+%(0xD4000001)
%aa64_mov_rr(a1, save0)
%aa64_mov_rr(a2, save1)
%aa64_mov_rr(a3, save2)
@@ -477,26 +477,26 @@
# ---- Linux syscall number data words -------------------------------------
%macro p1_sys_read()
-%le64(63)
+$(63)
%endm
%macro p1_sys_write()
-%le64(64)
+$(64)
%endm
%macro p1_sys_close()
-%le64(57)
+$(57)
%endm
%macro p1_sys_openat()
-%le64(56)
+$(56)
%endm
%macro p1_sys_exit()
-%le64(93)
+$(93)
%endm
%macro p1_sys_clone()
-%le64(220)
+$(220)
%endm
%macro p1_sys_execve()
-%le64(221)
+$(221)
%endm
%macro p1_sys_waitid()
-%le64(95)
+$(95)
%endm
diff --git a/src/m1macro.c b/src/m1macro.c
@@ -8,8 +8,10 @@
*
* %NAME(x, y) function-like macro call
* ## token pasting inside macro bodies
- * %le32(expr) evaluate an integer S-expression, emit LE 32-bit hex
- * %le64(expr) evaluate an integer S-expression, emit LE 64-bit hex
+ * !(expr) evaluate an integer S-expression, emit LE 8-bit hex
+ * @(expr) evaluate an integer S-expression, emit LE 16-bit hex
+ * %(expr) evaluate an integer S-expression, emit LE 32-bit hex
+ * $(expr) evaluate an integer S-expression, emit LE 64-bit hex
* %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e
*
* Expression syntax is intentionally Lisp-shaped:
@@ -31,7 +33,7 @@
* -> define_macro(): consume header + body tokens into macros[] and
* macro_body_tokens[]; register name and param list.
*
- * %le32(e) / %le64(e) / %select(c,t,e)
+ * !(e) / @(e) / %(e) / $(e) / %select(c,t,e)
* -> expand_builtin_call(): parse arg spans, eval S-expression(s) via
* eval_expr_range(), emit LE hex or push the chosen token span.
*
@@ -778,7 +780,7 @@ static enum ExprOp expr_op_code(const struct Token *tok)
if (token_text_eq(tok, "|")) {
return EXPR_OR;
}
- if (token_text_eq(tok, "^")) {
+ if (token_text_eq(tok, "$")) {
return EXPR_XOR;
}
if (token_text_eq(tok, "~")) {
@@ -1098,9 +1100,11 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok)
return 0;
}
- if (token_text_eq(tok, "%le32") || token_text_eq(tok, "%le64")) {
+ if (token_text_eq(tok, "!") || token_text_eq(tok, "@") ||
+ token_text_eq(tok, "%") || token_text_eq(tok, "$")) {
struct TokenSpan arg;
struct Token *end_pos;
+ int bytes;
if (arg_count != 1) {
return fail("bad builtin");
@@ -1113,8 +1117,10 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok)
}
s->pos = end_pos;
s->line_start = 0;
- return emit_hex_value((unsigned long long)value,
- token_text_eq(tok, "%le32") ? 4 : 8);
+ bytes = token_text_eq(tok, "!") ? 1 :
+ token_text_eq(tok, "@") ? 2 :
+ token_text_eq(tok, "%") ? 4 : 8;
+ return emit_hex_value((unsigned long long)value, bytes);
}
if (token_text_eq(tok, "%select")) {
@@ -1205,8 +1211,10 @@ static int process_tokens(void)
if (tok->kind == TOK_WORD &&
tok + 1 < s->end &&
(tok + 1)->kind == TOK_LPAREN &&
- (token_text_eq(tok, "%le32") ||
- token_text_eq(tok, "%le64") ||
+ (token_text_eq(tok, "!") ||
+ token_text_eq(tok, "@") ||
+ token_text_eq(tok, "%") ||
+ token_text_eq(tok, "$") ||
token_text_eq(tok, "%select"))) {
if (!expand_builtin_call(s, tok)) {
return 0;
diff --git a/tests/m1m/full-parity.M1M b/tests/m1m/full-parity.M1M
@@ -18,8 +18,8 @@ x
%PASTE(HELLO, _WORLD)
%CHAIN(recursed)
-%le32((+ 1 (<< 2 8)))
-%le64((| 0x1122 (<< 0x33 16)))
+%((+ 1 (<< 2 8)))
+$((| 0x1122 (<< 0x33 16)))
%CHOOSE((= 4 4), selected_true, selected_false)
%CHOOSE((!= 4 4), selected_bad, selected_else)
END