M1pp: drop %bytes builtin; bare strings emit decoded bytes - boot2

commit ea3e075a6aab78970bf2b1726b72fb6688bf70b2
parent 27dba8a270502627d69babede9b2112837600836
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 16:27:46 -0700

M1pp: drop %bytes builtin; bare strings emit decoded bytes

A TOK_STRING reaching the output stream now decodes its quoted body
into raw little-endian hex bytes via the same escape table that used
to live behind %bytes (\n \t \r \0 \\ \" \xNN). %bytes itself is
removed: bare "..." (or '...') at statement position emits bytes
directly, %str(IDENT) produces a STRING that flows through the same
emission path, and (strlen "...") still reads the string atom inside
expressions without decoding.

Test suite: regenerate the five expected files whose strings used to
pass through verbatim (01-passthrough, 09-args, 14-str-builtin,
14-str-paste, 29-string-escapes); rename 27-bytes.M1pp ->
27-string-emit.M1pp and 31-bytes-via-macro.M1pp ->
31-string-via-macro.M1pp with the %bytes(...) wrapper stripped; delete
the underscore-prefixed negative fixtures (their %bytes-validator
errors no longer apply).

docs/M1PP.md: rewrite the STRING token, %str, and string-emission
sections; drop %bytes from the feature bullet.

Diffstat:
M M1pp/M1pp.c  | 144 +++++++++++++++++++++++++++++++++++++------------------------------------------
M docs/M1PP.md  | 41 +++++++++++++++++++++++++----------------
M tests/M1pp/01-passthrough.M1pp  | 7 +++++--
M tests/M1pp/01-passthrough.expected  | 11 +++++++----
M tests/M1pp/09-args.expected  | 2 +-
M tests/M1pp/14-str-builtin.M1pp  | 7 +++++--
M tests/M1pp/14-str-builtin.expected  | 15 +++++++++------
M tests/M1pp/14-str-paste.M1pp  | 7 +++++--
M tests/M1pp/14-str-paste.expected  | 9 ++++++---
D tests/M1pp/27-bytes.M1pp  | 17 -----------------
D tests/M1pp/27-bytes.expected  | 17 -----------------
A tests/M1pp/27-string-emit.M1pp  | 18 ++++++++++++++++++
A tests/M1pp/27-string-emit.expected  | 18 ++++++++++++++++++
M tests/M1pp/29-string-escapes.M1pp  | 14 +++++++-------
M tests/M1pp/29-string-escapes.expected  | 16 ++++++++--------
D tests/M1pp/31-bytes-via-macro.M1pp  | 27 ---------------------------
A tests/M1pp/31-string-via-macro.M1pp  | 27 +++++++++++++++++++++++++++
R tests/M1pp/31-bytes-via-macro.expected -> tests/M1pp/31-string-via-macro.expected  | 0 
D tests/M1pp/_04-strlen-badarg.M1pp  | 7 -------
D tests/M1pp/_12-braced-malformed.M1pp  | 17 -----------------
D tests/M1pp/_14-str-malformed.M1pp  | 8 --------
D tests/M1pp/_27-bytes-bad-escape.M1pp  | 10 ----------
D tests/M1pp/_27-bytes-bad-hex.M1pp  | 9 ---------
D tests/M1pp/_27-bytes-not-string.M1pp  | 9 ---------
D tests/M1pp/_27-bytes-truncated-hex.M1pp  | 9 ---------

25 files changed, 208 insertions(+), 258 deletions(-)
diff --git a/M1pp/M1pp.c b/M1pp/M1pp.c
@@ -494,11 +494,17 @@ static int emit_newline(void)
     return 1;
 }
 
+static int emit_string_as_bytes(const struct Token *tok);
+static int emit_hex_value(unsigned long long value, int bytes);
+
 static int emit_token(const struct Token *tok)
 {
     if (tok->kind == TOK_LBRACE || tok->kind == TOK_RBRACE) {
         return 1;
     }
+    if (tok->kind == TOK_STRING) {
+        return emit_string_as_bytes(tok);
+    }
     if (output_need_space) {
         if (output_used + 1 >= MAX_OUTPUT) {
             return fail("output overflow");
@@ -515,6 +521,67 @@ static int emit_token(const struct Token *tok)
     return 1;
 }
 
+/* Decode a "..." or '...' literal and emit one TOK_WORD per byte
+ * (each token's text is the two hex digits for that byte). Recognised
+ * escapes inside the literal: \n \t \r \0 \\ \" \xNN. No NUL is
+ * appended; user code writes one explicitly if needed. */
+static int emit_string_as_bytes(const struct Token *tok)
+{
+    const char *src;
+    int src_len;
+    int src_i;
+
+    if (tok->text.len < 2) {
+        return fail("bad string");
+    }
+    src = tok->text.ptr + 1;
+    src_len = tok->text.len - 2;
+    src_i = 0;
+    while (src_i < src_len) {
+        unsigned int b;
+        char c = src[src_i++];
+        if (c == '\\') {
+            char e;
+            if (src_i >= src_len) {
+                return fail("bad escape");
+            }
+            e = src[src_i++];
+            if (e == 'n')       b = 0x0A;
+            else if (e == 't')  b = 0x09;
+            else if (e == 'r')  b = 0x0D;
+            else if (e == '0')  b = 0x00;
+            else if (e == '\\') b = 0x5C;
+            else if (e == '"')  b = 0x22;
+            else if (e == 'x') {
+                int hi, lo, hv, lv;
+                if (src_i + 2 > src_len) {
+                    return fail("bad escape");
+                }
+                hi = (unsigned char)src[src_i++];
+                lo = (unsigned char)src[src_i++];
+                hv = (hi >= '0' && hi <= '9') ? hi - '0' :
+                     (hi >= 'a' && hi <= 'f') ? hi - 'a' + 10 :
+                     (hi >= 'A' && hi <= 'F') ? hi - 'A' + 10 : -1;
+                lv = (lo >= '0' && lo <= '9') ? lo - '0' :
+                     (lo >= 'a' && lo <= 'f') ? lo - 'a' + 10 :
+                     (lo >= 'A' && lo <= 'F') ? lo - 'A' + 10 : -1;
+                if (hv < 0 || lv < 0) {
+                    return fail("bad escape");
+                }
+                b = (unsigned int)((hv << 4) | lv);
+            } else {
+                return fail("bad escape");
+            }
+        } else {
+            b = (unsigned char)c;
+        }
+        if (!emit_hex_value((unsigned long long)b, 1)) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
 static int push_stream_span(struct TokenSpan span, int pool_mark)
 {
     struct Stream *s;
@@ -1713,83 +1780,6 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok)
         return push_pool_stream_from_mark(mark);
     }
 
-    if (token_text_eq(tok, "%bytes")) {
-        /* Emit the raw bytes of a "..." string as a sequence of one
-         * `TOK_WORD` per byte (each token's text is the two hex digits
-         * for that byte). Recognised escapes: \n \t \r \0 \\ \" and
-         * \xNN. No NUL is appended; the caller writes one explicitly
-         * if needed. hex2pp's parse_byte_stream coalesces hex digits
-         * across whitespace, so the emitted tokens reassemble into a
-         * contiguous byte sequence at link time. */
-        struct Token *arg_tok;
-        struct Token *end_pos;
-        const char *src;
-        int src_len;
-        int src_i;
-
-        if (arg_count != 1) {
-            return fail("bad builtin");
-        }
-        if (arg_ends[0] - arg_starts[0] != 1) {
-            return fail("bad builtin");
-        }
-        arg_tok = arg_starts[0];
-        if (arg_tok->kind != TOK_STRING ||
-            arg_tok->text.len < 2 || arg_tok->text.ptr[0] != '"') {
-            return fail("bad builtin");
-        }
-        end_pos = call_end_pos;
-        s->pos = end_pos;
-        s->line_start = 0;
-
-        src = arg_tok->text.ptr + 1;
-        src_len = arg_tok->text.len - 2;
-        src_i = 0;
-        while (src_i < src_len) {
-            unsigned int b;
-            char c = src[src_i++];
-            if (c == '\\') {
-                char e;
-                if (src_i >= src_len) {
-                    return fail("bad escape");
-                }
-                e = src[src_i++];
-                if (e == 'n')       b = 0x0A;
-                else if (e == 't')  b = 0x09;
-                else if (e == 'r')  b = 0x0D;
-                else if (e == '0')  b = 0x00;
-                else if (e == '\\') b = 0x5C;
-                else if (e == '"')  b = 0x22;
-                else if (e == 'x') {
-                    int hi, lo, hv, lv;
-                    if (src_i + 2 > src_len) {
-                        return fail("bad escape");
-                    }
-                    hi = (unsigned char)src[src_i++];
-                    lo = (unsigned char)src[src_i++];
-                    hv = (hi >= '0' && hi <= '9') ? hi - '0' :
-                         (hi >= 'a' && hi <= 'f') ? hi - 'a' + 10 :
-                         (hi >= 'A' && hi <= 'F') ? hi - 'A' + 10 : -1;
-                    lv = (lo >= '0' && lo <= '9') ? lo - '0' :
-                         (lo >= 'a' && lo <= 'f') ? lo - 'a' + 10 :
-                         (lo >= 'A' && lo <= 'F') ? lo - 'A' + 10 : -1;
-                    if (hv < 0 || lv < 0) {
-                        return fail("bad escape");
-                    }
-                    b = (unsigned int)((hv << 4) | lv);
-                } else {
-                    return fail("bad escape");
-                }
-            } else {
-                b = (unsigned char)c;
-            }
-            if (!emit_hex_value((unsigned long long)b, 1)) {
-                return 0;
-            }
-        }
-        return 1;
-    }
-
     if (token_text_eq(tok, "%str")) {
         struct Token *arg_tok;
         struct Token *end_pos;
diff --git a/docs/M1PP.md b/docs/M1PP.md
@@ -28,9 +28,11 @@ The implementation lives in `M1pp/M1pp.c`. It is one pass, allocation-free
   arithmetic, bitwise, shift, comparison, `strlen`)
 - Little-endian hex emission: `!` (1B), `@` (2B), `%` (4B), `$` (8B) —
   emits bare hex digits (e.g. `AABBCCDD`) consumable by `hex2++`
-- Raw byte emission from string literals: `%bytes("...")`
+- Raw byte emission from string literals: a bare `"..."` token at
+  statement position emits its decoded bytes
 - Conditional token selection: `%select(cond, then, else)`
-- Stringification: `%str(IDENT)` → `"IDENT"`
+- Stringification: `%str(IDENT)` produces a `STRING` token holding the
+  identifier text, which then emits as bytes
 - Line comments (`#`, `;`); whitespace-insensitive output normalization
 - Single-pass, allocation-free implementation with fixed static buffers;
   fail-fast on first error
@@ -54,9 +56,8 @@ The lexer produces a flat token array. Token kinds:
 - `STRING` — `"..."` or `'...'` (quotes included in the token text).
   Inside a string, a backslash plus the next character is consumed as
   one unit, so `\"` and `\\` do not end the literal. The escape's
-  *meaning* is decoded later by whoever interprets the bytes (e.g.
-  `%bytes` decodes `\n`, `\xNN`, etc.); the lexer only uses the
-  backslash to find the right closing quote.
+  *meaning* is decoded at emission (see [String emission](#string-emission));
+  the lexer only uses the backslash to find the right closing quote.
 - `NEWLINE` — a single `\n`
 - `LPAREN`, `RPAREN`, `COMMA`, `LBRACE`, `RBRACE`
 - `PASTE` — the `##` marker
@@ -181,23 +182,31 @@ branches are raw token spans, not expressions.
 
 ### `%str(IDENT)`
 
-Stringifies a single `WORD` token into a double-quoted string literal:
-`%str(foo)` → `"foo"`. The argument must be exactly one word token.
+Stringifies a single `WORD` token into a `STRING` token wrapping the
+identifier text in double quotes. The argument must be exactly one word
+token. The resulting `STRING` flows through emission like any bare
+string literal: `%str(foo)` produces the same output bytes as `"foo"`
+(`66 6F 6F`). Use it when the identifier is built up from macro
+arguments or `##` paste and you want its text emitted as bytes.
 
-### `%bytes(STRING)`
+### String emission
 
-Emits the raw bytes of a `"..."`-quoted string as a sequence of
-two-hex-digit `WORD` tokens — one per byte. `hex2++` coalesces hex
-digits across whitespace, so the result reassembles into a contiguous
-byte sequence at link time. No NUL terminator is appended; write `00`
-explicitly if you need one. Recognised escapes inside the string are:
+A `"..."` token reaching the output stream is decoded into raw bytes,
+one two-hex-digit `WORD` token per byte. `hex2++` coalesces hex digits
+across whitespace, so the result reassembles into a contiguous byte
+sequence at link time. No NUL terminator is appended; write `00`
+explicitly (or use `\0`) if you need one. Recognised escapes inside the
+string are:
 
     \n  0x0A    \t  0x09    \r  0x0D    \0  0x00
     \\  0x5C    \"  0x22    \xNN  byte NN (two hex digits)
 
-Any other backslash escape is an error. The argument must be exactly one
-`STRING` token quoted with `"`. Example: `%bytes("hi\n")` emits
-`68 69 0A`.
+Any other backslash escape is an error. Example: `:msg "hi\n"` emits
+`68 69 0A` immediately after defining `:msg`.
+
+Strings inside expression arguments (e.g. `(strlen "literal")`) and
+inside `%str(IDENT)` are not decoded — the string atom is read by the
+expression evaluator instead.
 
 ### `%local(NAME)`
 
diff --git a/tests/M1pp/01-passthrough.M1pp b/tests/M1pp/01-passthrough.M1pp
@@ -1,6 +1,9 @@
 ## Pass-through fixture: tokenizer + structural macro-keyword skip.
-## No macro calls, no ## paste, no !@%$ or %select. The m1pp expander must
-## match the C oracle byte-for-byte on this input.
+## No macro calls, no ## paste, no !@%$ or %select.
+##
+## STRING tokens (`"..."` and `'...'`) decode to raw bytes on emission
+## — the only "passthrough" exception, kept here so the byte form is
+## visible in the expected output alongside the rest.
 
 hello world
   leading whitespace  and   runs   of   spaces
diff --git a/tests/M1pp/01-passthrough.expected b/tests/M1pp/01-passthrough.expected
@@ -1,6 +1,9 @@
 ## Pass-through fixture: tokenizer + structural macro-keyword skip.
-## No macro calls , no ## paste , no !@%$ or %select. The m1pp expander must
-## match the C oracle byte-for-byte on this input.
+## No macro calls , no ## paste , no !@%$ or %select.
+##
+## STRING tokens ( `"..."` and `'...'` ) decode to raw bytes on emission
+## — the only 70 61 73 73 74 68 72 6F 75 67 68 exception , kept here so the byte form is
+## visible in the expected output alongside the rest.
 
 hello world
 leading whitespace and runs of spaces
@@ -8,8 +11,8 @@ mixed , punctuation ( goes ) through
 
 ## this comment goes away
 line with
-"double-quoted string stays"
-'single quoted too'
+64 6F 75 62 6C 65 2D 71 75 6F 74 65 64 20 73 74 72 69 6E 67 20 73 74 61 79 73
+73 69 6E 67 6C 65 20 71 75 6F 74 65 64 20 74 6F 6F
 
 
 first top-level line after the macro
diff --git a/tests/M1pp/09-args.expected b/tests/M1pp/09-args.expected
@@ -14,7 +14,7 @@ ok
 
 [ ( 1 2 ( 3 4 ) 5 ) | other ]
 
-[ "string with, comma" | x ]
+[ 73 74 72 69 6E 67 20 77 69 74 68 2C 20 63 6F 6D 6D 61 | x ]
 
 < t1 t2 t3 t4 t5 t6 t7 t8 t9 tA tB tC tD tE tF tG >
 
diff --git a/tests/M1pp/14-str-builtin.M1pp b/tests/M1pp/14-str-builtin.M1pp
@@ -1,6 +1,9 @@
 # %str stringification builtin.
-#  - %str(IDENT) wraps the identifier text in double quotes
-#  - result is a TOK_STRING, byte-identical to a hand-written literal
+#  - %str(IDENT) wraps the identifier text in double quotes, producing
+#    a TOK_STRING.
+#  - At emission, every TOK_STRING (whether hand-written or built by
+#    %str) decodes into raw bytes, so `%str(hello)` and `"hello"`
+#    yield identical output (`68 65 6C 6C 6F`).
 
 %macro quoteit(name)
 %str(name)
diff --git a/tests/M1pp/14-str-builtin.expected b/tests/M1pp/14-str-builtin.expected
@@ -3,15 +3,18 @@
 
 
 
-"hello"
 
-"foo_bar"
 
-"a"
 
+68 65 6C 6C 6F
 
+66 6F 6F 5F 62 61 72
 
-"hello"
-"foo_bar"
-"a"
+61
+
+
+
+68 65 6C 6C 6F
+66 6F 6F 5F 62 61 72
+61
 END
diff --git a/tests/M1pp/14-str-paste.M1pp b/tests/M1pp/14-str-paste.M1pp
@@ -1,7 +1,10 @@
 # `##` paste + `%str` stringify composed on the same identifier.
 #  - `##` joins word fragments: str_##n -> str_quote (TOK_WORD).
-#  - `%str(n)` wraps the same identifier in quotes (TOK_STRING).
-#  - Complementary operators: paste builds the label, %str builds the literal.
+#  - `%str(n)` wraps the same identifier in quotes (TOK_STRING), which
+#    is then emitted as raw bytes — so `:str_quote %str(quote)`
+#    produces `:str_quote 71 75 6F 74 65`.
+#  - Complementary operators: paste builds the label, %str builds the
+#    decoded byte sequence.
 
 %macro defsym(n)
 :str_ ## n %str(n)
diff --git a/tests/M1pp/14-str-paste.expected b/tests/M1pp/14-str-paste.expected
@@ -4,10 +4,13 @@
 
 
 
-:str_quote "quote"
 
-:str_if "if"
 
-:str_begin "begin"
+
+:str_quote 71 75 6F 74 65
+
+:str_if 69 66
+
+:str_begin 62 65 67 69 6E
 
 END
diff --git a/tests/M1pp/27-bytes.M1pp b/tests/M1pp/27-bytes.M1pp
@@ -1,17 +0,0 @@
-# %bytes(STRING): emit raw bytes of a "..." literal as contiguous hex.
-# Recognised escapes: \n \t \r \0 \\ \" and \xNN.
-
-# Plain ASCII.
-%bytes("hi")
-
-# Empty string emits nothing.
-%bytes("")
-
-# Each supported single-char escape, plus \xNN.
-%bytes("a\nb\tc\rd\0e\\f\"g\x7Fh")
-
-# Followed by trailing literal hex, to confirm hex2pp's byte-stream
-# coalescing handles the boundary.
-%bytes("X") 90
-
-END
diff --git a/tests/M1pp/27-bytes.expected b/tests/M1pp/27-bytes.expected
@@ -1,17 +0,0 @@
-
-
-
-
-68 69
-
-
-
-
-
-61 0A 62 09 63 0D 64 00 65 5C 66 22 67 7F 68
-
-
-
-58 90
-
-END
diff --git a/tests/M1pp/27-string-emit.M1pp b/tests/M1pp/27-string-emit.M1pp
@@ -0,0 +1,18 @@
+# Bare string-token emission: every TOK_STRING reaching the output
+# decodes its quoted body to raw bytes, one two-hex-digit token per
+# byte. Recognised escapes inside the literal: \n \t \r \0 \\ \" \xNN.
+
+# Plain ASCII.
+"hi"
+
+# Empty string emits nothing.
+""
+
+# Each supported single-char escape, plus \xNN.
+"a\nb\tc\rd\0e\\f\"g\x7Fh"
+
+# Followed by trailing literal hex, to confirm hex2pp's byte-stream
+# coalescing handles the boundary.
+"X" 90
+
+END
diff --git a/tests/M1pp/27-string-emit.expected b/tests/M1pp/27-string-emit.expected
@@ -0,0 +1,18 @@
+
+
+
+
+
+68 69
+
+
+
+
+
+61 0A 62 09 63 0D 64 00 65 5C 66 22 67 7F 68
+
+
+
+58 90
+
+END
diff --git a/tests/M1pp/29-string-escapes.M1pp b/tests/M1pp/29-string-escapes.M1pp
@@ -1,11 +1,11 @@
-# Lexer-level string escape preservation outside %bytes.
-#  - In a plain STRING token the lexer treats `\X` as one unit when
-#    finding the closing quote, so `\"` and `\\` do NOT terminate the
-#    literal. The escape's *meaning* is left to the consumer; the
-#    emitter writes the bytes back verbatim.
+# String escape decoding at emission.
+#  - In the lexer, `\X` inside a STRING is one unit (so `\"` and `\\`
+#    do NOT terminate the literal), but the byte value is left for the
+#    emitter to decode.
+#  - At output, `\\` becomes `5C`, `\"` becomes `22`, etc. — the same
+#    table used to live behind `%bytes(...)` and now applies to every
+#    bare string token.
 #  - Both quote styles ("..." and '...') participate.
-#  - %str composition: the ## paste compactor sees the STRING as one
-#    token and passes it through alongside a paste-built WORD.
 
 "plain"
 "with \"quoted\" inside"
diff --git a/tests/M1pp/29-string-escapes.expected b/tests/M1pp/29-string-escapes.expected
@@ -7,16 +7,16 @@
 
 
 
-"plain"
-"with \"quoted\" inside"
-"trailing backslash pair: \\"
-"mixed \\ and \" together"
-'single \"quoted\" inside'
-'single \\ pair'
+70 6C 61 69 6E
+77 69 74 68 20 22 71 75 6F 74 65 64 22 20 69 6E 73 69 64 65
+74 72 61 69 6C 69 6E 67 20 62 61 63 6B 73 6C 61 73 68 20 70 61 69 72 3A 20 5C
+6D 69 78 65 64 20 5C 20 61 6E 64 20 22 20 74 6F 67 65 74 68 65 72
+73 69 6E 67 6C 65 20 22 71 75 6F 74 65 64 22 20 69 6E 73 69 64 65
+73 69 6E 67 6C 65 20 5C 20 70 61 69 72
 
 
-"arg with \"escaped\" quotes"
+61 72 67 20 77 69 74 68 20 22 65 73 63 61 70 65 64 22 20 71 75 6F 74 65 73
 
-"arg with \\ backslash pair"
+61 72 67 20 77 69 74 68 20 5C 20 62 61 63 6B 73 6C 61 73 68 20 70 61 69 72
 
 END
diff --git a/tests/M1pp/31-bytes-via-macro.M1pp b/tests/M1pp/31-bytes-via-macro.M1pp
@@ -1,27 +0,0 @@
-# %bytes appearing inside a macro body, so the builtin runs against
-# rescanned/expanded input rather than top-level source. The string
-# argument may be a literal or a parameter-substituted STRING token.
-
-%macro EMIT_BYTES(s)
-%bytes(s)
-%endm
-
-%macro PREFIXED(s)
-AA
-%bytes(s)
-BB
-%endm
-
-# Literal string passed through a macro arg.
-%EMIT_BYTES("hi")
-
-# Surrounded by literal hex inside the body, to confirm hex2pp's
-# byte-stream coalescing works across the rescan boundary.
-%PREFIXED("ok")
-
-# Each escape exercised again, but via macro substitution.
-%EMIT_BYTES("a\nb")
-
-# Empty string substituted in.
-%EMIT_BYTES("")
-END
diff --git a/tests/M1pp/31-string-via-macro.M1pp b/tests/M1pp/31-string-via-macro.M1pp
@@ -0,0 +1,27 @@
+# String emission through macro substitution: STRING tokens passed as
+# a macro argument decode to bytes when the body's `s` is rescanned at
+# the call site.
+
+%macro EMIT(s)
+s
+%endm
+
+%macro PREFIXED(s)
+AA
+s
+BB
+%endm
+
+# Literal string passed through a macro arg.
+%EMIT("hi")
+
+# Surrounded by literal hex inside the body, to confirm hex2pp's
+# byte-stream coalescing works across the rescan boundary.
+%PREFIXED("ok")
+
+# Each escape exercised again, but via macro substitution.
+%EMIT("a\nb")
+
+# Empty string substituted in.
+%EMIT("")
+END
diff --git a/tests/M1pp/31-bytes-via-macro.expected b/tests/M1pp/31-string-via-macro.expected
diff --git a/tests/M1pp/_04-strlen-badarg.M1pp b/tests/M1pp/_04-strlen-badarg.M1pp
@@ -1,7 +0,0 @@
-# Malformed: strlen requires a double-quoted TOK_STRING argument.
-# Single-quoted '...' hex literals are meaningless for strlen and must
-# be rejected. Expected behavior: non-zero exit from the expander.
-# (Underscore-prefixed filename so test.sh skips this fixture.)
-
-%((strlen 'deadbeef'))
-END
diff --git a/tests/M1pp/_12-braced-malformed.M1pp b/tests/M1pp/_12-braced-malformed.M1pp
@@ -1,17 +0,0 @@
-# Malformed: unmatched `{` inside a macro call.
-#
-# Expected behavior: the m1pp expander MUST exit non-zero. parse_args detects
-# that the outer RPAREN closes the call while brace_depth is still > 0 and
-# reports "unbalanced braces".
-#
-# No `.expected` file is needed — the leading underscore in the filename
-# causes m1pp/test.sh to skip this fixture. Run by hand to observe the
-# non-zero exit with "unbalanced braces".
-
-%macro F(a, b)
-a b
-%endm
-
-%F(first, { never_closed )
-
-END
diff --git a/tests/M1pp/_14-str-malformed.M1pp b/tests/M1pp/_14-str-malformed.M1pp
@@ -1,8 +0,0 @@
-# %str malformed input.
-#  - Underscore-prefix => skipped by test.sh.
-#  - Expected outcome: m1pp exits non-zero.
-#  - %str takes exactly one single-token WORD argument. A multi-token
-#    argument (`a b`) must be rejected; so must an already-string arg
-#    (`"already_string"`). This fixture exercises the multi-token path.
-
-%str(a b)
diff --git a/tests/M1pp/_27-bytes-bad-escape.M1pp b/tests/M1pp/_27-bytes-bad-escape.M1pp
@@ -1,10 +0,0 @@
-# %bytes: unknown backslash escape must be rejected.
-#
-# Recognised escapes: \n \t \r \0 \\ \" and \xNN. Anything else
-# (here `\q`) is an error. Expected outcome: m1pp exits non-zero
-# with "bad escape".
-#
-# (Underscore-prefixed filename so test.sh skips this fixture.)
-
-%bytes("oops\q")
-END
diff --git a/tests/M1pp/_27-bytes-bad-hex.M1pp b/tests/M1pp/_27-bytes-bad-hex.M1pp
@@ -1,9 +0,0 @@
-# %bytes: \xNN requires exactly two hex digits. A non-hex char in
-# either position is an error.
-#
-# Expected outcome: m1pp exits non-zero with "bad escape".
-#
-# (Underscore-prefixed filename so test.sh skips this fixture.)
-
-%bytes("oops\xZZ")
-END
diff --git a/tests/M1pp/_27-bytes-not-string.M1pp b/tests/M1pp/_27-bytes-not-string.M1pp
@@ -1,9 +0,0 @@
-# %bytes argument must be a single TOK_STRING token quoted with `"`.
-# A WORD argument must be rejected.
-#
-# Expected outcome: m1pp exits non-zero with "bad builtin".
-#
-# (Underscore-prefixed filename so test.sh skips this fixture.)
-
-%bytes(notastring)
-END
diff --git a/tests/M1pp/_27-bytes-truncated-hex.M1pp b/tests/M1pp/_27-bytes-truncated-hex.M1pp
@@ -1,9 +0,0 @@
-# %bytes: \xNN requires exactly two hex digits. A single trailing hex
-# digit at the close-quote position must be rejected.
-#
-# Expected outcome: m1pp exits non-zero with "bad escape".
-#
-# (Underscore-prefixed filename so test.sh skips this fixture.)
-
-%bytes("oops\x7")
-END

	boot2 Playing with the boostrap
	git clone https://git.ryansepassi.com/git/boot2.git
	Log \| Files \| Refs \| README

M	M1pp/M1pp.c	\|	144	+++++++++++++++++++++++++++++++++++++------------------------------------------
M	docs/M1PP.md	\|	41	+++++++++++++++++++++++++----------------
M	tests/M1pp/01-passthrough.M1pp	\|	7	+++++--
M	tests/M1pp/01-passthrough.expected	\|	11	+++++++----
M	tests/M1pp/09-args.expected	\|	2	+-
M	tests/M1pp/14-str-builtin.M1pp	\|	7	+++++--
M	tests/M1pp/14-str-builtin.expected	\|	15	+++++++++------
M	tests/M1pp/14-str-paste.M1pp	\|	7	+++++--
M	tests/M1pp/14-str-paste.expected	\|	9	++++++---
D	tests/M1pp/27-bytes.M1pp	\|	17	-----------------
D	tests/M1pp/27-bytes.expected	\|	17	-----------------
A	tests/M1pp/27-string-emit.M1pp	\|	18	++++++++++++++++++
A	tests/M1pp/27-string-emit.expected	\|	18	++++++++++++++++++
M	tests/M1pp/29-string-escapes.M1pp	\|	14	+++++++-------
M	tests/M1pp/29-string-escapes.expected	\|	16	++++++++--------
D	tests/M1pp/31-bytes-via-macro.M1pp	\|	27	---------------------------
A	tests/M1pp/31-string-via-macro.M1pp	\|	27	+++++++++++++++++++++++++++
R	tests/M1pp/31-bytes-via-macro.expected -> tests/M1pp/31-string-via-macro.expected	\|	0
D	tests/M1pp/_04-strlen-badarg.M1pp	\|	7	-------
D	tests/M1pp/_12-braced-malformed.M1pp	\|	17	-----------------
D	tests/M1pp/_14-str-malformed.M1pp	\|	8	--------
D	tests/M1pp/_27-bytes-bad-escape.M1pp	\|	10	----------
D	tests/M1pp/_27-bytes-bad-hex.M1pp	\|	9	---------
D	tests/M1pp/_27-bytes-not-string.M1pp	\|	9	---------
D	tests/M1pp/_27-bytes-truncated-hex.M1pp	\|	9	---------