commit 38c3a3448502859a26a4e6f2d6a481bd6ece8504
parent e8d3d15a25a1195b1b4cfa3d4f5bc513f3483a2b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 11:50:53 -0700
M1pp/hex2pp: direct pipeline, drop M0-style quoting
Cut the M0/hex2 layer out of the contract: M1pp output now feeds hex2pp
directly. Move lexical scoping for control-flow labels to hex2pp so
there's only one mechanism, and add %bytes for raw string emission since
hex2pp itself stays purely byte/label oriented.
- M1pp: remove %scope/%endscope and ::/&:: resolution; emit !/@/%/$
integer literals as bare hex (no surrounding quotes); add %bytes("...")
with \n \t \r \0 \\ \" \xNN escapes.
- hex2pp: .scope nests; dotted-label lookup walks innermost-out so an
inner scope shadows an outer one. Same scope ids in both passes.
- Docs updated to match.
Diffstat:
| M | M1pp/M1pp.c | | | 245 | +++++++++++++++++++++++++++++++++++-------------------------------------------- |
| M | docs/HEX2pp.md | | | 23 | +++++++++++++++-------- |
| M | docs/M1PP.md | | | 121 | ++++++++++++++++++++++++------------------------------------------------------- |
| M | hex2pp/hex2pp.c | | | 44 | ++++++++++++++++++++++++-------------------- |
4 files changed, 184 insertions(+), 249 deletions(-)
diff --git a/M1pp/M1pp.c b/M1pp/M1pp.c
@@ -1,5 +1,7 @@
/*
- * Tiny single-pass M1 macro expander.
+ * Tiny single-pass M1pp macro expander. Output is consumed directly by
+ * hex2pp -- there is no intermediate M0/hex2 stage. All emission is in
+ * the byte/label/directive vocabulary hex2pp accepts.
*
* Syntax:
* %macro NAME(a, b)
@@ -17,14 +19,14 @@
* $(expr) evaluate an integer S-expression, emit LE 64-bit hex
* %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e
* %str(IDENT) stringify a single WORD token into a "..."-quoted literal
+ * %bytes(STR) emit the raw bytes of STR as contiguous hex digits
*
* %frame NAME / %endframe set/clear a single-slot "current frame"
* %local(NAME) expand to the body of <frame>_FRAME.<NAME>
*
- * Frames are a separate state from the %scope stack. %frame does not push
- * onto scope_stack; %scope does not change the current frame. This lets a
- * function body open inner control-flow scopes (whose ::labels resolve
- * against the scope stack) without disturbing %local lookup.
+ * Lexical scoping for control-flow labels is delegated to hex2pp's
+ * `.scope` / `.endscope` (which nest). M1pp itself only handles
+ * per-expansion macro hygiene labels (`:@name` / `&@name`).
*
* Expression syntax is intentionally Lisp-shaped:
* atoms: decimal or 0x-prefixed integer literals
@@ -114,7 +116,6 @@
* under 16 MiB. */
#define MAX_STACK 64
#define MAX_EXPR_FRAMES 256
-#define MAX_SCOPE_DEPTH 32
enum {
TOK_WORD,
@@ -198,7 +199,6 @@ static struct Token macro_body_tokens[MAX_MACRO_BODY_TOKENS];
static struct Token expand_pool[MAX_EXPAND];
static struct Macro macros[MAX_MACROS];
static struct Stream streams[MAX_STACK];
-static struct TextSpan scope_stack[MAX_SCOPE_DEPTH];
static struct TextSpan current_frame;
static int frame_active;
@@ -211,7 +211,6 @@ static int output_used;
static int output_need_space;
static int stream_top;
static int next_expansion_id;
-static int scope_depth;
static int current_line;
static int error_line;
static const char *input_path;
@@ -482,68 +481,11 @@ static int emit_newline(void)
return 1;
}
-static int emit_scoped_label(const struct Token *tok, int skip, char sigil)
-{
- /* Rewrite `::name` or `&::name` against the current scope stack.
- * skip is the number of leading chars to drop (`::` -> 2, `&::` -> 3);
- * sigil is the single-char prefix to emit (`:` for definitions, `&`
- * for references). With a non-empty scope stack the output is
- * sigil + scope1 + "__" + ... + scopeN + "__" + name; with an empty
- * stack it degrades to sigil + name (pass-through). */
- int name_len = tok->text.len - skip;
- int i;
-
- if (name_len <= 0) {
- return fail("bad scope label");
- }
-
- if (output_need_space) {
- if (output_used + 1 >= MAX_OUTPUT) {
- return fail("output overflow");
- }
- output_buf[output_used++] = ' ';
- }
-
- if (output_used + 1 >= MAX_OUTPUT) {
- return fail("output overflow");
- }
- output_buf[output_used++] = sigil;
-
- for (i = 0; i < scope_depth; i++) {
- int span_len = scope_stack[i].len;
- if (output_used + span_len + 2 >= MAX_OUTPUT) {
- return fail("output overflow");
- }
- memcpy(output_buf + output_used, scope_stack[i].ptr,
- (size_t)span_len);
- output_used += span_len;
- output_buf[output_used++] = '_';
- output_buf[output_used++] = '_';
- }
-
- if (output_used + name_len >= MAX_OUTPUT) {
- return fail("output overflow");
- }
- memcpy(output_buf + output_used, tok->text.ptr + skip, (size_t)name_len);
- output_used += name_len;
- output_need_space = 1;
- return 1;
-}
-
static int emit_token(const struct Token *tok)
{
if (tok->kind == TOK_LBRACE || tok->kind == TOK_RBRACE) {
return 1;
}
- if (tok->kind == TOK_WORD && tok->text.len >= 2 &&
- tok->text.ptr[0] == ':' && tok->text.ptr[1] == ':') {
- return emit_scoped_label(tok, 2, ':');
- }
- if (tok->kind == TOK_WORD && tok->text.len >= 3 &&
- tok->text.ptr[0] == '&' &&
- tok->text.ptr[1] == ':' && tok->text.ptr[2] == ':') {
- return emit_scoped_label(tok, 3, '&');
- }
if (output_need_space) {
if (output_used + 1 >= MAX_OUTPUT) {
return fail("output overflow");
@@ -1654,29 +1596,28 @@ static int eval_expr_range(struct TokenSpan span, long long *out)
static int emit_hex_value(unsigned long long value, int bytes)
{
- /* Wrap the hex digits in single quotes so M0 sees a STRING-literal
- * hex token, not a numeric token (which it would parse as decimal). */
- char tmp[19];
+ /* Emit the bytes as bare little-endian hex digits. hex2pp's byte-
+ * stream parser groups every two hex digits into one byte; no
+ * quoting or separators are needed. */
+ char tmp[17];
static const char hex[] = "0123456789ABCDEF";
struct Token tok;
int i;
char *text_ptr;
- int total_len = 2 + 2 * bytes;
+ int total_len = 2 * bytes;
- tmp[0] = '\'';
for (i = 0; i < bytes; i++) {
unsigned int b = (unsigned int)((value >> (8 * i)) & 0xFF);
- tmp[1 + 2 * i] = hex[b >> 4];
- tmp[1 + 2 * i + 1] = hex[b & 0x0F];
+ tmp[2 * i] = hex[b >> 4];
+ tmp[2 * i + 1] = hex[b & 0x0F];
}
- tmp[1 + 2 * bytes] = '\'';
tmp[total_len] = '\0';
text_ptr = append_text_len(tmp, total_len);
if (text_ptr == NULL) {
return 0;
}
- tok.kind = TOK_STRING;
+ tok.kind = TOK_WORD;
tok.tight = 0;
tok.line = current_line;
tok.text.ptr = text_ptr;
@@ -1759,6 +1700,98 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok)
return push_pool_stream_from_mark(mark);
}
+ if (token_text_eq(tok, "%bytes")) {
+ /* Emit the raw bytes of a "..." string as one WORD of contiguous
+ * hex digits. Recognised escapes: \n \t \r \0 \\ \" and \xNN.
+ * No NUL is appended; the caller writes one explicitly if needed. */
+ struct Token *arg_tok;
+ struct Token *end_pos;
+ struct Token out_tok;
+ static const char hex[] = "0123456789ABCDEF";
+ char *text_ptr;
+ const char *src;
+ int src_len;
+ int src_i;
+ int hex_len;
+
+ if (arg_count != 1) {
+ return fail("bad builtin");
+ }
+ if (arg_ends[0] - arg_starts[0] != 1) {
+ return fail("bad builtin");
+ }
+ arg_tok = arg_starts[0];
+ if (arg_tok->kind != TOK_STRING ||
+ arg_tok->text.len < 2 || arg_tok->text.ptr[0] != '"') {
+ return fail("bad builtin");
+ }
+ end_pos = call_end_pos;
+
+ src = arg_tok->text.ptr + 1;
+ src_len = arg_tok->text.len - 2;
+ if (text_used + 2 * src_len + 1 > MAX_TEXT) {
+ return fail("text overflow");
+ }
+ text_ptr = text_buf + text_used;
+ hex_len = 0;
+ src_i = 0;
+ while (src_i < src_len) {
+ unsigned int b;
+ char c = src[src_i++];
+ if (c == '\\') {
+ char e;
+ if (src_i >= src_len) {
+ return fail("bad escape");
+ }
+ e = src[src_i++];
+ if (e == 'n') b = 0x0A;
+ else if (e == 't') b = 0x09;
+ else if (e == 'r') b = 0x0D;
+ else if (e == '0') b = 0x00;
+ else if (e == '\\') b = 0x5C;
+ else if (e == '"') b = 0x22;
+ else if (e == 'x') {
+ int hi, lo, hv, lv;
+ if (src_i + 2 > src_len) {
+ return fail("bad escape");
+ }
+ hi = (unsigned char)src[src_i++];
+ lo = (unsigned char)src[src_i++];
+ hv = (hi >= '0' && hi <= '9') ? hi - '0' :
+ (hi >= 'a' && hi <= 'f') ? hi - 'a' + 10 :
+ (hi >= 'A' && hi <= 'F') ? hi - 'A' + 10 : -1;
+ lv = (lo >= '0' && lo <= '9') ? lo - '0' :
+ (lo >= 'a' && lo <= 'f') ? lo - 'a' + 10 :
+ (lo >= 'A' && lo <= 'F') ? lo - 'A' + 10 : -1;
+ if (hv < 0 || lv < 0) {
+ return fail("bad escape");
+ }
+ b = (unsigned int)((hv << 4) | lv);
+ } else {
+ return fail("bad escape");
+ }
+ } else {
+ b = (unsigned char)c;
+ }
+ text_buf[text_used++] = hex[(b >> 4) & 0xF];
+ text_buf[text_used++] = hex[b & 0xF];
+ hex_len += 2;
+ }
+ text_buf[text_used++] = '\0';
+
+ out_tok.kind = TOK_WORD;
+ out_tok.tight = 0;
+ out_tok.line = current_line;
+ out_tok.text.ptr = text_ptr;
+ out_tok.text.len = hex_len;
+ s->pos = end_pos;
+ s->line_start = 0;
+ if (hex_len == 0) {
+ return 1;
+ }
+ return emit_token(&out_tok);
+ }
+
if (token_text_eq(tok, "%str")) {
struct Token *arg_tok;
struct Token *end_pos;
@@ -1817,52 +1850,6 @@ static int expand_call(struct Stream *s, const struct Macro *macro)
return push_pool_stream_from_mark(mark);
}
-static int push_scope(struct Stream *s)
-{
- /* Header self-terminates at the scope name. Newlines after the name
- * are insignificant — they're skipped here so a multi-line scope
- * (`%scope NAME\nbody\n%endscope`) doesn't introduce an extra blank
- * line in output. */
- int started_at_line_start = s->line_start;
-
- s->pos++;
- skip_newlines(&s->pos, s->end);
- if (s->pos >= s->end || s->pos->kind != TOK_WORD) {
- return fail("bad scope header");
- }
- if (scope_depth >= MAX_SCOPE_DEPTH) {
- return fail("scope depth overflow");
- }
- scope_stack[scope_depth++] = s->pos->text;
- s->pos++;
- if (started_at_line_start) {
- skip_newlines(&s->pos, s->end);
- s->line_start = 1;
- }
- return 1;
-}
-
-static int pop_scope(struct Stream *s)
-{
- /* %endscope must be immediately followed by TOK_NEWLINE. The newline
- * is consumed iff %endscope itself appeared at line_start. */
- int started_at_line_start = s->line_start;
-
- s->pos++;
- if (scope_depth <= 0) {
- return fail("scope underflow");
- }
- scope_depth--;
- if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) {
- return fail("expected newline after %endscope");
- }
- if (started_at_line_start) {
- s->pos++;
- s->line_start = 1;
- }
- return 1;
-}
-
static int push_frame(struct Stream *s)
{
/* %frame NAME sets the single-slot current frame, used by %local
@@ -1956,20 +1943,6 @@ static int process_tokens(void)
continue;
}
- if (tok->kind == TOK_WORD && token_text_eq(tok, "%scope")) {
- if (!push_scope(s)) {
- return 0;
- }
- continue;
- }
-
- if (tok->kind == TOK_WORD && token_text_eq(tok, "%endscope")) {
- if (!pop_scope(s)) {
- return 0;
- }
- continue;
- }
-
if (tok->kind == TOK_WORD && token_text_eq(tok, "%frame")) {
if (!push_frame(s)) {
return 0;
@@ -2003,6 +1976,7 @@ static int process_tokens(void)
token_text_eq(tok, "$") ||
token_text_eq(tok, "%select") ||
token_text_eq(tok, "%str") ||
+ token_text_eq(tok, "%bytes") ||
token_text_eq(tok, "%local"))) {
if (!expand_builtin_call(s, tok)) {
return 0;
@@ -2028,9 +2002,6 @@ static int process_tokens(void)
}
}
- if (scope_depth != 0) {
- return fail("scope not closed");
- }
if (frame_active) {
return fail("frame not closed");
}
diff --git a/docs/HEX2pp.md b/docs/HEX2pp.md
@@ -2,7 +2,8 @@
A small, byte-oriented assembler/linker that takes hex source with labels and
references and emits a flat binary. Implemented in P1; used by `cc.scm` and
-the P1 backends as the final stage of the `M1pp → M1 → hex2++` toolchain.
+the P1 backends as the final stage of the `M1pp → hex2++` toolchain. M1pp
+output feeds hex2++ directly — there is no intermediate macro/hex stage.
## Invocation
@@ -71,7 +72,9 @@ definition, `&.NAME` / `%.NAME` / etc. are local references, and a bare
.endscope
```
-- `.scope` directives may not nest.
+- `.scope` directives nest. A dotted reference resolves to the nearest
+ enclosing definition, so an inner scope shadows an outer one with the
+ same local name.
- Non-dotted labels defined inside a `.scope` remain global.
- Dot-prefixed labels outside any `.scope` are an error.
@@ -114,9 +117,10 @@ Examples:
:jt
&case0-jt &case1-jt &case2-jt
-# string length prefix
+# string length prefix (string bytes themselves come from the
+# upstream M1pp layer, e.g. `%bytes("hello")`)
:s_begin
- "hello"
+ 68 65 6c 6c 6f
:s_end
&s_end-s_begin
```
@@ -158,12 +162,15 @@ Two passes:
- **Pass 1** — read every input file, advancing `ip` and recording label
definitions. `.align` and `.fill` advance `ip` deterministically;
- `.scope` / `.endscope` push and pop the current scope id.
-- **Pass 2** — re-read, emit bytes, resolve references.
+ `.scope` / `.endscope` push and pop the scope stack, assigning each
+ open scope a fresh id.
+- **Pass 2** — re-read, emit bytes, resolve references. Scope ids are
+ assigned in the same order, so pass-1 definitions and pass-2
+ references see identical ids.
The label table carries `(name, target_ip, scope_id)` entries. Lookup for a
-dotted name compares both name and current scope id; lookup for a non-dotted
-name ignores scope.
+dotted name walks the scope stack innermost-out and returns the first match;
+lookup for a non-dotted name ignores scope.
Both labels in `LABEL-OTHER` have known addresses by the start of pass 2, so
the subtraction is a single operation at emit time. No third pass is
diff --git a/docs/M1PP.md b/docs/M1PP.md
@@ -2,10 +2,11 @@
## Scope
-M1PP is a tiny single-pass macro expander that runs ahead of `M0`. It takes
-M1 source with macro directives and emits plain M1 suitable for `M0`.
+M1PP is a tiny single-pass macro expander. Its output is consumed
+directly by `hex2++` — there is no intermediate macro/hex stage. All
+emission is in the byte/label/directive vocabulary `hex2++` accepts.
-The implementation lives in `m1pp/m1pp.c`. It is one pass, allocation-free
+The implementation lives in `M1pp/M1pp.c`. It is one pass, allocation-free
(fixed static buffers), and stops at the first error.
## Features
@@ -19,23 +20,25 @@ The implementation lives in `m1pp/m1pp.c`. It is one pass, allocation-free
macros
- Local labels (`:@name` / `&@name`) rewritten per-expansion for hygienic
intra-macro labels
-- Scoped labels (`::name` / `&::name`) resolved at **emit time** against
- the `%scope` stack — enables generic control-flow macros like
- `loop`/`break`
- Struct and enum synthesis (`%struct`, `%enum`) generating per-field
zero-arg macros plus `SIZE`/`COUNT`
- Named stack-frame access via `%frame` / `%endframe` + `%local(field)`,
composing with `%struct`-generated `<frame>_FRAME.<field>` macros
- Compile-time integer expression language (Lisp S-expressions:
arithmetic, bitwise, shift, comparison, `strlen`)
-- M0-safe little-endian hex emission: `!` (1B), `@` (2B), `%` (4B),
- `$` (8B) — emits `'AABBCCDD'` quoted literals
+- Little-endian hex emission: `!` (1B), `@` (2B), `%` (4B), `$` (8B) —
+ emits bare hex digits (e.g. `AABBCCDD`) consumable by `hex2++`
+- Raw byte emission from string literals: `%bytes("...")`
- Conditional token selection: `%select(cond, then, else)`
- Stringification: `%str(IDENT)` → `"IDENT"`
- Line comments (`#`, `;`); whitespace-insensitive output normalization
- Single-pass, allocation-free implementation with fixed static buffers;
fail-fast on first error
+Lexical scoping for control-flow labels is delegated to `hex2++`'s
+nestable `.scope` / `.endscope`; M1PP itself only handles per-expansion
+macro hygiene labels.
+
## Invocation
m1pp input.M1 output.M1
@@ -93,19 +96,6 @@ Like `%struct` with stride 1 and a trailing `COUNT`:
- `%NAME.l1` → `0`, `%NAME.l2` → `1`, ...
- `%NAME.COUNT` → `N`
-### `%scope` / `%endscope`
-
- %scope NAME
- ... body ...
- %endscope
-
-Pushes `NAME` onto a lexical scope stack active until the matching
-`%endscope`. Scopes nest. While the stack is non-empty, any `::name` or
-`&::name` token emitted from within is rewritten with the current scope
-path (see [Scoped labels](#scoped-labels)). Every `%scope` must be closed
-before end-of-input. `NAME` is a single `WORD` token and may come from
-macro-argument substitution.
-
### `%frame` / `%endframe`
%frame NAME
@@ -119,12 +109,6 @@ synthesized by `%struct`). Frames do not nest: a second `%frame` before
end-of-input. `NAME` is a single `WORD` token and may come from
macro-argument substitution.
-Frame state is independent of the `%scope` stack. A nested `%scope`
-inside a frame's body affects `::label` resolution but does not change
-`%local` resolution — locals stay bound to the enclosing frame, not
-to the lexical scope. This lets a function body open inner
-control-flow scopes without the local namespace shifting underneath.
-
## Macro calls
%NAME(arg, arg, ...)
@@ -162,55 +146,12 @@ sites (or two different macros) never collide. Argument-substituted tokens
keep their original text and are not rewritten, so a `:@name` literal
passed as a macro argument passes through verbatim.
-## Scoped labels
-
-A `WORD` token whose text starts with `::` is a scoped label definition; a
-token starting with `&::` is a scoped reference. The `::` prefix is rewritten
-at **emit time** against the current `%scope` stack:
-
-- stack = `[parse_number]`: `::start` → `:parse_number__start`
-- stack = `[outer, inner]`: `&::end` → `&outer__inner__end`
-- stack empty: `::foo` → `:foo`; `&::bar` → `&bar` (pass-through)
-
-Because resolution is at emit time rather than macro-expansion time, a
-`::foo` token written inside a macro body resolves against whatever scope
-is active at the point the token flows to the output — i.e. the caller's
-surroundings, not the macro's own expansion id. This makes generic
-control-flow macros possible:
-
- %macro loop_scoped(name, body)
- %scope name
- ::top
- body
- LA_BR &::top
- B
- ::end
- %endscope
- %endm
-
- %macro break()
- LA_BR &::end
- B
- %endm
-
- %loop_scoped(scan, {
- ...
- %if_eqz(a0, { %break() })
- ...
- })
-
-Inside the expansion, `%loop_scoped` has pushed the scope `[scan]`, so
-when `%break()`'s `&::end` token is finally emitted the stack is `[scan]`
-and the output is `&scan__end` — exactly the label `%loop_scoped`
-defined at the bottom of its body. A nested `%loop_scoped(inner, { ... })`
-makes `[outer, inner]` the active stack, so a `%break()` inside the inner
-block targets the innermost scope. To jump past an intervening scope,
-write the concatenated name explicitly (`&outer__end`).
-
-Scoped labels and local (`:@` / `&@`) labels are independent and compose.
-A common pattern: use `:@` for the macro's private internal labels (the
-caller can never name them) and `::` for labels that are the macro's
-public contract with its caller (`::end`, `::top`, etc.).
+These labels exist only to keep macro-internal symbols from colliding
+with each other or with caller code. Lexical scoping for control-flow
+labels (e.g. `loop`/`break` patterns where an inner macro must reference
+a label defined by an outer macro) belongs to `hex2++`'s `.scope` —
+emit `.scope` / `.endscope` and dotted local labels from your macro
+bodies, and rely on `hex2++`'s innermost-out lookup to bind references.
## Built-in calls
@@ -218,14 +159,14 @@ These are recognized wherever a token matches, not only at line start.
### Integer emission: `!` `@` `%` `$`
- !(expr) → 1-byte little-endian hex, e.g. 'AB'
+ !(expr) → 1-byte little-endian hex, e.g. AB
@(expr) → 2-byte little-endian hex
%(expr) → 4-byte little-endian hex
$(expr) → 8-byte little-endian hex
-The expression is evaluated to a signed 64-bit integer and emitted as an
-M0-safe single-quoted hex literal (`'AABBCCDD'`) rather than a bare number,
-so `M0` does not reinterpret it as decimal.
+The expression is evaluated to a signed 64-bit integer and emitted as
+bare hex digits (e.g. `AABBCCDD`). `hex2++` consumes whitespace-separated
+hex bytes directly, so no quoting or wrapping is required.
### `%select(cond, then, else)`
@@ -238,6 +179,19 @@ branches are raw token spans, not expressions.
Stringifies a single `WORD` token into a double-quoted string literal:
`%str(foo)` → `"foo"`. The argument must be exactly one word token.
+### `%bytes(STRING)`
+
+Emits the raw bytes of a `"..."`-quoted string as a single contiguous
+hex word (consumed as bytes by `hex2++`). No NUL terminator is appended;
+write `00` explicitly if you need one. Recognised escapes inside the
+string are:
+
+ \n 0x0A \t 0x09 \r 0x0D \0 0x00
+ \\ 0x5C \" 0x22 \xNN byte NN (two hex digits)
+
+Any other backslash escape is an error. The argument must be exactly one
+`STRING` token quoted with `"`. Example: `%bytes("hi\n")` emits `68690A`.
+
### `%local(NAME)`
Looks up the zero-parameter macro `<frame>_FRAME.<NAME>`, where
@@ -252,14 +206,14 @@ The intended pattern combines `%struct`, `%frame`, and `%local` for
named stack-frame access:
%struct foo_FRAME { saved_buf saved_len }
- : foo
- %scope foo
+ :foo
+ .scope
%frame foo
%enter(%foo_FRAME.SIZE)
;; %local(saved_buf) -> 0, %local(saved_len) -> 8
%eret
%endframe
- %endscope
+ .endscope
## Expression language
@@ -302,7 +256,6 @@ Various limits are fixed at compile time. See the code for values.
| parameters per macro |
| stream stack depth |
| expression frames |
-| scope stack depth |
Exceeding any limit aborts with an error message on `stderr`.
diff --git a/hex2pp/hex2pp.c b/hex2pp/hex2pp.c
@@ -13,7 +13,7 @@
* SIGIL NAME [- OTHER] -> label reference (! @ $ ~ % &)
* .align N [PATTERN] -> pad to N-byte boundary
* .fill N B -> N copies of byte B
- * .scope / .endscope -> local-label scope (no nesting)
+ * .scope / .endscope -> local-label scope (nestable)
* # ... / ; ... -> line comment
*
* Multi-byte reference values are emitted little-endian by default.
@@ -33,6 +33,7 @@
#define MAX_LABELS (1 << 20)
#define MAX_TEXT (8 * 1024 * 1024)
#define MAX_TOKEN 4096
+#define MAX_SCOPE_DEPTH 32
enum { HEX_MODE, BINARY_MODE };
@@ -68,9 +69,9 @@ static int big_endian;
static int non_executable;
static const char *output_path = "a.out";
-static int current_scope;
+static int scope_stack[MAX_SCOPE_DEPTH];
+static int scope_depth;
static int scope_seq;
-static int in_scope;
static int pass; /* 1 or 2 */
static const char *cur_path;
@@ -135,11 +136,18 @@ static void define_label(const char *s, int len, int scope_id)
static long long lookup_label(const char *s, int len)
{
int i;
+ int d;
int dotted = (len > 0 && s[0] == '.');
if (dotted) {
- for (i = 0; i < label_count; i++) {
- if (labels[i].scope_id == current_scope && name_eq(&labels[i], s, len)) {
- return labels[i].target_ip;
+ /* Walk the scope stack innermost-out. A dotted name resolves to
+ * the nearest enclosing definition, so an inner scope can shadow
+ * an outer one with the same local name. */
+ for (d = scope_depth - 1; d >= 0; d--) {
+ int sid = scope_stack[d];
+ for (i = 0; i < label_count; i++) {
+ if (labels[i].scope_id == sid && name_eq(&labels[i], s, len)) {
+ return labels[i].target_ip;
+ }
}
}
die("undefined local label '%.*s'", len, s);
@@ -532,17 +540,15 @@ static void do_fill(struct Scanner *s)
static void do_scope_open(void)
{
- if (in_scope) die(".scope: nested scopes are not permitted");
- in_scope = 1;
+ if (scope_depth >= MAX_SCOPE_DEPTH) die(".scope: depth overflow");
scope_seq++;
- current_scope = scope_seq;
+ scope_stack[scope_depth++] = scope_seq;
}
static void do_scope_close(void)
{
- if (!in_scope) die(".endscope: not in a scope");
- in_scope = 0;
- current_scope = 0;
+ if (scope_depth <= 0) die(".endscope: not in a scope");
+ scope_depth--;
}
/* --- main scanner loop ------------------------------------------------ */
@@ -567,10 +573,10 @@ static void process_file(struct InFile *f)
s.pos++;
n = read_name(&s, name, sizeof(name));
dotted = (n > 0 && name[0] == '.');
- if (dotted && !in_scope) {
+ if (dotted && scope_depth == 0) {
die("dot-prefixed label '%.*s' outside a .scope", n, name);
}
- scope = dotted ? current_scope : 0;
+ scope = dotted ? scope_stack[scope_depth - 1] : 0;
if (pass == 1) define_label(name, n, scope);
continue;
}
@@ -706,25 +712,23 @@ int main(int argc, char **argv)
/* Pass 1: collect labels. */
pass = 1;
ip = 0;
- current_scope = 0;
- in_scope = 0;
+ scope_depth = 0;
scope_seq = 0;
for (i = 0; i < input_count; i++) {
process_file(&inputs[i]);
}
- if (in_scope) die(".scope not closed at end of input");
+ if (scope_depth != 0) die(".scope not closed at end of input");
/* Pass 2: emit. */
pass = 2;
ip = 0;
output_used = 0;
- current_scope = 0;
- in_scope = 0;
+ scope_depth = 0;
scope_seq = 0;
for (i = 0; i < input_count; i++) {
process_file(&inputs[i]);
}
- if (in_scope) die(".scope not closed at end of input");
+ if (scope_depth != 0) die(".scope not closed at end of input");
/* Write output. */
{