commit 611ad4d2c08121778de01ec03e8e59898617c458
parent 64c477b2cbf7684e884fa53252a8d89b2a09300c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 23 Apr 2026 15:52:15 -0700
tests/m1pp: add 7 oracle-parity fixtures + unpark full-parity
Each .expected was generated by running m1pp/m1pp.c (the oracle) on the
fixture, so a passing test means the M1 implementation is byte-for-byte
identical to the C reference on that input.
03-builtins - !@%$ at 1/2/4/8-byte sizes, including high-bit values
04-expr-ops - every operator: variadic folds, unary -, /, %, shifts, ~,
all 6 comparisons, nested expressions
05-int-atoms - decimal/hex literals, negatives, multi-digit, high-bit wrap
06-paste - multi-paste chain, prefix/suffix patterns, mix on one line
07-rescan - macro-in-macro, macro arg holding a call, macro-in-expression
08-select - empty branches, multi-token branches, branch with macro call,
nested %select
09-args - nested parens, depth-1-only commas, 0-param, 16-param, string
token containing a comma
Unparks the existing _full-parity fixture (now 10-full-parity) - it has
been passing oracle parity since the integration completed.
Diffstat:
16 files changed, 352 insertions(+), 0 deletions(-)
diff --git a/tests/m1pp/03-builtins.M1pp b/tests/m1pp/03-builtins.M1pp
@@ -0,0 +1,15 @@
+# Phase 8 parity: each of !(1B), @(2B), %(4B), $(8B) emits little-endian
+# uppercase hex of (2 * size) chars. Exercises each size at:
+# - small literal so byte-order is observable
+# - hex literal that fills the slot exactly
+# - value with the high bit set (signed wrap survives the round trip)
+
+!(0x7F)
+!(0xFF)
+@(0x1234)
+@(0xFFFF)
+%(0x12345678)
+%(0xFFFFFFFF)
+$(0x1122334455667788)
+$((<< 1 63))
+END
diff --git a/tests/m1pp/03-builtins.expected b/tests/m1pp/03-builtins.expected
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+7F
+FF
+3412
+FFFF
+78563412
+FFFFFFFF
+8877665544332211
+0000000000000080
+END
diff --git a/tests/m1pp/04-expr-ops.M1pp b/tests/m1pp/04-expr-ops.M1pp
@@ -0,0 +1,38 @@
+# Phase 7 parity: every operator in the oracle's apply_expr_op table.
+# `$` is the oracle's spelling of XOR (not `^`). `==` and `=` are aliases.
+
+# variadic: +, *, &, |, $, with argc >= 1 (>2 to exercise the fold)
+$((+ 1 2 3 4 5))
+$((* 2 3 4))
+$((& 0xFF 0x0F 0x07))
+$((| 0x10 0x20 0x40))
+$(($ 0xFF 0x0F 0xF0))
+
+# subtract: unary negate AND left-assoc with > 2 args
+$((- 7))
+$((- 100 10 20 30))
+
+# binary div / mod
+$((/ 1000 7))
+$((% 1000 7))
+
+# shifts: << logical, >> arithmetic (so high bit propagates on negatives)
+$((<< 1 16))
+$((>> 0x80000000 4))
+
+# unary ~
+$((~ 0))
+
+# comparisons (each returns 0 or 1)
+$((= 5 5))
+$((== 5 5))
+$((!= 5 5))
+$((< 3 5))
+$((<= 5 5))
+$((> 5 3))
+$((>= 5 5))
+$((>= 5 6))
+
+# nested expressions
+$((+ (* 2 3) (- 7 4) (/ 12 3)))
+END
diff --git a/tests/m1pp/04-expr-ops.expected b/tests/m1pp/04-expr-ops.expected
@@ -0,0 +1,38 @@
+
+
+
+
+0F00000000000000
+1800000000000000
+0700000000000000
+7000000000000000
+0000000000000000
+
+
+F9FFFFFFFFFFFFFF
+2800000000000000
+
+
+8E00000000000000
+0600000000000000
+
+
+0000010000000000
+0000000800000000
+
+
+FFFFFFFFFFFFFFFF
+
+
+0100000000000000
+0100000000000000
+0000000000000000
+0100000000000000
+0100000000000000
+0100000000000000
+0100000000000000
+0000000000000000
+
+
+0D00000000000000
+END
diff --git a/tests/m1pp/05-int-atoms.M1pp b/tests/m1pp/05-int-atoms.M1pp
@@ -0,0 +1,13 @@
+# parse_int_token coverage: decimal (+/-), 0x hex (lower/upper),
+# multi-digit values, and the high-bit wrap that the oracle inherits
+# from strtoull casting to i64.
+
+$((+ 0))
+$((+ 42))
+$((+ -7))
+$((+ 1000000))
+$((+ 0xff))
+$((+ 0xFF))
+$((+ 0xDEADBEEF))
+$((+ 0x8000000000000000))
+END
diff --git a/tests/m1pp/05-int-atoms.expected b/tests/m1pp/05-int-atoms.expected
@@ -0,0 +1,13 @@
+
+
+
+
+0000000000000000
+2A00000000000000
+F9FFFFFFFFFFFFFF
+40420F0000000000
+FF00000000000000
+FF00000000000000
+EFBEADDE00000000
+0000000000000080
+END
diff --git a/tests/m1pp/06-paste.M1pp b/tests/m1pp/06-paste.M1pp
@@ -0,0 +1,32 @@
+# Phase 6 paste compaction.
+# - param ## param (basic)
+# - literal ## param / param ## literal
+# - chain: a ## b ## c (paste compactor processes left-to-right)
+# - multiple params on one body line, only some adjacent to ##
+
+%macro PP(a, b)
+a ## b
+%endm
+
+%macro PRE(x)
+prefix_ ## x
+%endm
+
+%macro SUF(x)
+x ## _suffix
+%endm
+
+%macro CHAIN(a, b, c)
+a ## b ## c
+%endm
+
+%macro MIX(a, b)
+a normal_ ## b a_ ## a
+%endm
+
+%PP(HELLO, _WORLD)
+%PRE(name)
+%SUF(name)
+%CHAIN(one, _two, _three)
+%MIX(LEFT, RIGHT)
+END
diff --git a/tests/m1pp/06-paste.expected b/tests/m1pp/06-paste.expected
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
+
+
+
+HELLO_WORLD
+
+prefix_name
+
+name_suffix
+
+one_two_three
+
+LEFT normal_RIGHT a_LEFT
+
+END
diff --git a/tests/m1pp/07-rescan.M1pp b/tests/m1pp/07-rescan.M1pp
@@ -0,0 +1,33 @@
+# Rescan / nesting parity:
+# - macro body contains a macro call (rescanned via stream push)
+# - macro arg contains a macro call (parse_args walks tokens raw,
+# expansion happens after substitution + rescan on the new stream)
+# - macro-in-expression: builtin arg invokes a macro that produces
+# an integer expression atom
+
+%macro ID(x)
+x
+%endm
+
+%macro WRAP(x)
+[ %ID(x) ]
+%endm
+
+%macro ADD2(a, b)
+(+ a b)
+%endm
+
+%macro SHL(x, n)
+(<< x n)
+%endm
+
+# expansion that resolves through ID then back to top
+%WRAP(payload)
+
+# macro arg holding another call
+%WRAP(%ID(inner))
+
+# macro-in-expression composition: %SHL evaluates to an atom inside $()
+$(%SHL(1, 8))
+$((+ %SHL(1, 4) %ADD2(2, 3)))
+END
diff --git a/tests/m1pp/07-rescan.expected b/tests/m1pp/07-rescan.expected
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+[ payload
+]
+
+
+
+[ inner
+
+]
+
+
+
+0001000000000000
+1500000000000000
+END
diff --git a/tests/m1pp/08-select.M1pp b/tests/m1pp/08-select.M1pp
@@ -0,0 +1,33 @@
+# %select(cond, then, else): only the chosen branch is rescanned.
+# - cond truthy / falsy (any nonzero is truthy)
+# - empty chosen branch (no stream push)
+# - multi-token branch
+# - branch containing a macro call (gets rescanned)
+# - nested %select
+
+%macro PICK(c, y, n)
+%select(c, y, n)
+%endm
+
+# truthy / falsy
+%select(1, yes, no)
+%select(0, yes, no)
+%select((= 4 4), eq, ne)
+%select((!= 4 4), eq, ne)
+
+# empty branches
+empty_then_before %select(1, , kept) empty_then_after
+empty_else_before %select(0, kept, ) empty_else_after
+
+# multi-token branch
+%select(1, alpha beta gamma, x y z)
+
+# branch with a macro call (rescan)
+%macro WORD(s)
+s
+%endm
+%select(1, %WORD(picked), %WORD(skipped))
+
+# nested
+%select(1, %select(0, A, B), C)
+END
diff --git a/tests/m1pp/08-select.expected b/tests/m1pp/08-select.expected
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+yes
+no
+eq
+ne
+
+
+empty_then_before empty_then_after
+empty_else_before empty_else_after
+
+
+alpha beta gamma
+
+
+picked
+
+
+
+B
+END
diff --git a/tests/m1pp/09-args.M1pp b/tests/m1pp/09-args.M1pp
@@ -0,0 +1,25 @@
+# Argument parsing edge cases for parse_args:
+# - nested parens in an arg (depth tracking)
+# - commas only split at depth 1 (commas inside nested parens stay in the arg)
+# - 0-param macro called with ()
+# - macro at the 16-param edge
+# - string token containing a comma is one TOK_STRING, not a split point
+
+%macro NULLARY()
+ok
+%endm
+
+%macro TWO(a, b)
+[ a | b ]
+%endm
+
+%macro SIXTEEN(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p)
+< a b c d e f g h i j k l m n o p >
+%endm
+
+%NULLARY()
+%TWO((nested, parens, here), trailing)
+%TWO((1 2 (3 4) 5), other)
+%TWO("string with, comma", x)
+%SIXTEEN(t1,t2,t3,t4,t5,t6,t7,t8,t9,tA,tB,tC,tD,tE,tF,tG)
+END
diff --git a/tests/m1pp/09-args.expected b/tests/m1pp/09-args.expected
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+ok
+
+[ ( nested , parens , here ) | trailing ]
+
+[ ( 1 2 ( 3 4 ) 5 ) | other ]
+
+[ "string with, comma" | x ]
+
+< t1 t2 t3 t4 t5 t6 t7 t8 t9 tA tB tC tD tE tF tG >
+
+END
diff --git a/tests/m1pp/_full-parity.M1pp b/tests/m1pp/10-full-parity.M1pp
diff --git a/tests/m1pp/_full-parity.expected b/tests/m1pp/10-full-parity.expected