boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 38c3a3448502859a26a4e6f2d6a481bd6ece8504
parent e8d3d15a25a1195b1b4cfa3d4f5bc513f3483a2b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 11:50:53 -0700

M1pp/hex2pp: direct pipeline, drop M0-style quoting

Cut the M0/hex2 layer out of the contract: M1pp output now feeds hex2pp
directly. Move lexical scoping for control-flow labels to hex2pp so
there's only one mechanism, and add %bytes for raw string emission since
hex2pp itself stays purely byte/label oriented.

- M1pp: remove %scope/%endscope and ::/&:: resolution; emit !/@/%/$
  integer literals as bare hex (no surrounding quotes); add %bytes("...")
  with \n \t \r \0 \\ \" \xNN escapes.
- hex2pp: .scope nests; dotted-label lookup walks innermost-out so an
  inner scope shadows an outer one. Same scope ids in both passes.
- Docs updated to match.

Diffstat:
MM1pp/M1pp.c | 245+++++++++++++++++++++++++++++++++++--------------------------------------------
Mdocs/HEX2pp.md | 23+++++++++++++++--------
Mdocs/M1PP.md | 121++++++++++++++++++++++++-------------------------------------------------------
Mhex2pp/hex2pp.c | 44++++++++++++++++++++++++--------------------
4 files changed, 184 insertions(+), 249 deletions(-)

diff --git a/M1pp/M1pp.c b/M1pp/M1pp.c @@ -1,5 +1,7 @@ /* - * Tiny single-pass M1 macro expander. + * Tiny single-pass M1pp macro expander. Output is consumed directly by + * hex2pp -- there is no intermediate M0/hex2 stage. All emission is in + * the byte/label/directive vocabulary hex2pp accepts. * * Syntax: * %macro NAME(a, b) @@ -17,14 +19,14 @@ * $(expr) evaluate an integer S-expression, emit LE 64-bit hex * %select(c,t,e) evaluate condition S-expression; expand t if nonzero else e * %str(IDENT) stringify a single WORD token into a "..."-quoted literal + * %bytes(STR) emit the raw bytes of STR as contiguous hex digits * * %frame NAME / %endframe set/clear a single-slot "current frame" * %local(NAME) expand to the body of <frame>_FRAME.<NAME> * - * Frames are a separate state from the %scope stack. %frame does not push - * onto scope_stack; %scope does not change the current frame. This lets a - * function body open inner control-flow scopes (whose ::labels resolve - * against the scope stack) without disturbing %local lookup. + * Lexical scoping for control-flow labels is delegated to hex2pp's + * `.scope` / `.endscope` (which nest). M1pp itself only handles + * per-expansion macro hygiene labels (`:@name` / `&@name`). * * Expression syntax is intentionally Lisp-shaped: * atoms: decimal or 0x-prefixed integer literals @@ -114,7 +116,6 @@ * under 16 MiB. */ #define MAX_STACK 64 #define MAX_EXPR_FRAMES 256 -#define MAX_SCOPE_DEPTH 32 enum { TOK_WORD, @@ -198,7 +199,6 @@ static struct Token macro_body_tokens[MAX_MACRO_BODY_TOKENS]; static struct Token expand_pool[MAX_EXPAND]; static struct Macro macros[MAX_MACROS]; static struct Stream streams[MAX_STACK]; -static struct TextSpan scope_stack[MAX_SCOPE_DEPTH]; static struct TextSpan current_frame; static int frame_active; @@ -211,7 +211,6 @@ static int output_used; static int output_need_space; static int stream_top; static int next_expansion_id; -static int scope_depth; static int current_line; static int error_line; static const char *input_path; @@ -482,68 +481,11 @@ static int emit_newline(void) return 1; } -static int emit_scoped_label(const struct Token *tok, int skip, char sigil) -{ - /* Rewrite `::name` or `&::name` against the current scope stack. - * skip is the number of leading chars to drop (`::` -> 2, `&::` -> 3); - * sigil is the single-char prefix to emit (`:` for definitions, `&` - * for references). With a non-empty scope stack the output is - * sigil + scope1 + "__" + ... + scopeN + "__" + name; with an empty - * stack it degrades to sigil + name (pass-through). */ - int name_len = tok->text.len - skip; - int i; - - if (name_len <= 0) { - return fail("bad scope label"); - } - - if (output_need_space) { - if (output_used + 1 >= MAX_OUTPUT) { - return fail("output overflow"); - } - output_buf[output_used++] = ' '; - } - - if (output_used + 1 >= MAX_OUTPUT) { - return fail("output overflow"); - } - output_buf[output_used++] = sigil; - - for (i = 0; i < scope_depth; i++) { - int span_len = scope_stack[i].len; - if (output_used + span_len + 2 >= MAX_OUTPUT) { - return fail("output overflow"); - } - memcpy(output_buf + output_used, scope_stack[i].ptr, - (size_t)span_len); - output_used += span_len; - output_buf[output_used++] = '_'; - output_buf[output_used++] = '_'; - } - - if (output_used + name_len >= MAX_OUTPUT) { - return fail("output overflow"); - } - memcpy(output_buf + output_used, tok->text.ptr + skip, (size_t)name_len); - output_used += name_len; - output_need_space = 1; - return 1; -} - static int emit_token(const struct Token *tok) { if (tok->kind == TOK_LBRACE || tok->kind == TOK_RBRACE) { return 1; } - if (tok->kind == TOK_WORD && tok->text.len >= 2 && - tok->text.ptr[0] == ':' && tok->text.ptr[1] == ':') { - return emit_scoped_label(tok, 2, ':'); - } - if (tok->kind == TOK_WORD && tok->text.len >= 3 && - tok->text.ptr[0] == '&' && - tok->text.ptr[1] == ':' && tok->text.ptr[2] == ':') { - return emit_scoped_label(tok, 3, '&'); - } if (output_need_space) { if (output_used + 1 >= MAX_OUTPUT) { return fail("output overflow"); @@ -1654,29 +1596,28 @@ static int eval_expr_range(struct TokenSpan span, long long *out) static int emit_hex_value(unsigned long long value, int bytes) { - /* Wrap the hex digits in single quotes so M0 sees a STRING-literal - * hex token, not a numeric token (which it would parse as decimal). */ - char tmp[19]; + /* Emit the bytes as bare little-endian hex digits. hex2pp's byte- + * stream parser groups every two hex digits into one byte; no + * quoting or separators are needed. */ + char tmp[17]; static const char hex[] = "0123456789ABCDEF"; struct Token tok; int i; char *text_ptr; - int total_len = 2 + 2 * bytes; + int total_len = 2 * bytes; - tmp[0] = '\''; for (i = 0; i < bytes; i++) { unsigned int b = (unsigned int)((value >> (8 * i)) & 0xFF); - tmp[1 + 2 * i] = hex[b >> 4]; - tmp[1 + 2 * i + 1] = hex[b & 0x0F]; + tmp[2 * i] = hex[b >> 4]; + tmp[2 * i + 1] = hex[b & 0x0F]; } - tmp[1 + 2 * bytes] = '\''; tmp[total_len] = '\0'; text_ptr = append_text_len(tmp, total_len); if (text_ptr == NULL) { return 0; } - tok.kind = TOK_STRING; + tok.kind = TOK_WORD; tok.tight = 0; tok.line = current_line; tok.text.ptr = text_ptr; @@ -1759,6 +1700,98 @@ static int expand_builtin_call(struct Stream *s, const struct Token *tok) return push_pool_stream_from_mark(mark); } + if (token_text_eq(tok, "%bytes")) { + /* Emit the raw bytes of a "..." string as one WORD of contiguous + * hex digits. Recognised escapes: \n \t \r \0 \\ \" and \xNN. + * No NUL is appended; the caller writes one explicitly if needed. */ + struct Token *arg_tok; + struct Token *end_pos; + struct Token out_tok; + static const char hex[] = "0123456789ABCDEF"; + char *text_ptr; + const char *src; + int src_len; + int src_i; + int hex_len; + + if (arg_count != 1) { + return fail("bad builtin"); + } + if (arg_ends[0] - arg_starts[0] != 1) { + return fail("bad builtin"); + } + arg_tok = arg_starts[0]; + if (arg_tok->kind != TOK_STRING || + arg_tok->text.len < 2 || arg_tok->text.ptr[0] != '"') { + return fail("bad builtin"); + } + end_pos = call_end_pos; + + src = arg_tok->text.ptr + 1; + src_len = arg_tok->text.len - 2; + if (text_used + 2 * src_len + 1 > MAX_TEXT) { + return fail("text overflow"); + } + text_ptr = text_buf + text_used; + hex_len = 0; + src_i = 0; + while (src_i < src_len) { + unsigned int b; + char c = src[src_i++]; + if (c == '\\') { + char e; + if (src_i >= src_len) { + return fail("bad escape"); + } + e = src[src_i++]; + if (e == 'n') b = 0x0A; + else if (e == 't') b = 0x09; + else if (e == 'r') b = 0x0D; + else if (e == '0') b = 0x00; + else if (e == '\\') b = 0x5C; + else if (e == '"') b = 0x22; + else if (e == 'x') { + int hi, lo, hv, lv; + if (src_i + 2 > src_len) { + return fail("bad escape"); + } + hi = (unsigned char)src[src_i++]; + lo = (unsigned char)src[src_i++]; + hv = (hi >= '0' && hi <= '9') ? hi - '0' : + (hi >= 'a' && hi <= 'f') ? hi - 'a' + 10 : + (hi >= 'A' && hi <= 'F') ? hi - 'A' + 10 : -1; + lv = (lo >= '0' && lo <= '9') ? lo - '0' : + (lo >= 'a' && lo <= 'f') ? lo - 'a' + 10 : + (lo >= 'A' && lo <= 'F') ? lo - 'A' + 10 : -1; + if (hv < 0 || lv < 0) { + return fail("bad escape"); + } + b = (unsigned int)((hv << 4) | lv); + } else { + return fail("bad escape"); + } + } else { + b = (unsigned char)c; + } + text_buf[text_used++] = hex[(b >> 4) & 0xF]; + text_buf[text_used++] = hex[b & 0xF]; + hex_len += 2; + } + text_buf[text_used++] = '\0'; + + out_tok.kind = TOK_WORD; + out_tok.tight = 0; + out_tok.line = current_line; + out_tok.text.ptr = text_ptr; + out_tok.text.len = hex_len; + s->pos = end_pos; + s->line_start = 0; + if (hex_len == 0) { + return 1; + } + return emit_token(&out_tok); + } + if (token_text_eq(tok, "%str")) { struct Token *arg_tok; struct Token *end_pos; @@ -1817,52 +1850,6 @@ static int expand_call(struct Stream *s, const struct Macro *macro) return push_pool_stream_from_mark(mark); } -static int push_scope(struct Stream *s) -{ - /* Header self-terminates at the scope name. Newlines after the name - * are insignificant — they're skipped here so a multi-line scope - * (`%scope NAME\nbody\n%endscope`) doesn't introduce an extra blank - * line in output. */ - int started_at_line_start = s->line_start; - - s->pos++; - skip_newlines(&s->pos, s->end); - if (s->pos >= s->end || s->pos->kind != TOK_WORD) { - return fail("bad scope header"); - } - if (scope_depth >= MAX_SCOPE_DEPTH) { - return fail("scope depth overflow"); - } - scope_stack[scope_depth++] = s->pos->text; - s->pos++; - if (started_at_line_start) { - skip_newlines(&s->pos, s->end); - s->line_start = 1; - } - return 1; -} - -static int pop_scope(struct Stream *s) -{ - /* %endscope must be immediately followed by TOK_NEWLINE. The newline - * is consumed iff %endscope itself appeared at line_start. */ - int started_at_line_start = s->line_start; - - s->pos++; - if (scope_depth <= 0) { - return fail("scope underflow"); - } - scope_depth--; - if (s->pos >= s->end || s->pos->kind != TOK_NEWLINE) { - return fail("expected newline after %endscope"); - } - if (started_at_line_start) { - s->pos++; - s->line_start = 1; - } - return 1; -} - static int push_frame(struct Stream *s) { /* %frame NAME sets the single-slot current frame, used by %local @@ -1956,20 +1943,6 @@ static int process_tokens(void) continue; } - if (tok->kind == TOK_WORD && token_text_eq(tok, "%scope")) { - if (!push_scope(s)) { - return 0; - } - continue; - } - - if (tok->kind == TOK_WORD && token_text_eq(tok, "%endscope")) { - if (!pop_scope(s)) { - return 0; - } - continue; - } - if (tok->kind == TOK_WORD && token_text_eq(tok, "%frame")) { if (!push_frame(s)) { return 0; @@ -2003,6 +1976,7 @@ static int process_tokens(void) token_text_eq(tok, "$") || token_text_eq(tok, "%select") || token_text_eq(tok, "%str") || + token_text_eq(tok, "%bytes") || token_text_eq(tok, "%local"))) { if (!expand_builtin_call(s, tok)) { return 0; @@ -2028,9 +2002,6 @@ static int process_tokens(void) } } - if (scope_depth != 0) { - return fail("scope not closed"); - } if (frame_active) { return fail("frame not closed"); } diff --git a/docs/HEX2pp.md b/docs/HEX2pp.md @@ -2,7 +2,8 @@ A small, byte-oriented assembler/linker that takes hex source with labels and references and emits a flat binary. Implemented in P1; used by `cc.scm` and -the P1 backends as the final stage of the `M1pp → M1 → hex2++` toolchain. +the P1 backends as the final stage of the `M1pp → hex2++` toolchain. M1pp +output feeds hex2++ directly — there is no intermediate macro/hex stage. ## Invocation @@ -71,7 +72,9 @@ definition, `&.NAME` / `%.NAME` / etc. are local references, and a bare .endscope ``` -- `.scope` directives may not nest. +- `.scope` directives nest. A dotted reference resolves to the nearest + enclosing definition, so an inner scope shadows an outer one with the + same local name. - Non-dotted labels defined inside a `.scope` remain global. - Dot-prefixed labels outside any `.scope` are an error. @@ -114,9 +117,10 @@ Examples: :jt &case0-jt &case1-jt &case2-jt -# string length prefix +# string length prefix (string bytes themselves come from the +# upstream M1pp layer, e.g. `%bytes("hello")`) :s_begin - "hello" + 68 65 6c 6c 6f :s_end &s_end-s_begin ``` @@ -158,12 +162,15 @@ Two passes: - **Pass 1** — read every input file, advancing `ip` and recording label definitions. `.align` and `.fill` advance `ip` deterministically; - `.scope` / `.endscope` push and pop the current scope id. -- **Pass 2** — re-read, emit bytes, resolve references. + `.scope` / `.endscope` push and pop the scope stack, assigning each + open scope a fresh id. +- **Pass 2** — re-read, emit bytes, resolve references. Scope ids are + assigned in the same order, so pass-1 definitions and pass-2 + references see identical ids. The label table carries `(name, target_ip, scope_id)` entries. Lookup for a -dotted name compares both name and current scope id; lookup for a non-dotted -name ignores scope. +dotted name walks the scope stack innermost-out and returns the first match; +lookup for a non-dotted name ignores scope. Both labels in `LABEL-OTHER` have known addresses by the start of pass 2, so the subtraction is a single operation at emit time. No third pass is diff --git a/docs/M1PP.md b/docs/M1PP.md @@ -2,10 +2,11 @@ ## Scope -M1PP is a tiny single-pass macro expander that runs ahead of `M0`. It takes -M1 source with macro directives and emits plain M1 suitable for `M0`. +M1PP is a tiny single-pass macro expander. Its output is consumed +directly by `hex2++` — there is no intermediate macro/hex stage. All +emission is in the byte/label/directive vocabulary `hex2++` accepts. -The implementation lives in `m1pp/m1pp.c`. It is one pass, allocation-free +The implementation lives in `M1pp/M1pp.c`. It is one pass, allocation-free (fixed static buffers), and stops at the first error. ## Features @@ -19,23 +20,25 @@ The implementation lives in `m1pp/m1pp.c`. It is one pass, allocation-free macros - Local labels (`:@name` / `&@name`) rewritten per-expansion for hygienic intra-macro labels -- Scoped labels (`::name` / `&::name`) resolved at **emit time** against - the `%scope` stack — enables generic control-flow macros like - `loop`/`break` - Struct and enum synthesis (`%struct`, `%enum`) generating per-field zero-arg macros plus `SIZE`/`COUNT` - Named stack-frame access via `%frame` / `%endframe` + `%local(field)`, composing with `%struct`-generated `<frame>_FRAME.<field>` macros - Compile-time integer expression language (Lisp S-expressions: arithmetic, bitwise, shift, comparison, `strlen`) -- M0-safe little-endian hex emission: `!` (1B), `@` (2B), `%` (4B), - `$` (8B) — emits `'AABBCCDD'` quoted literals +- Little-endian hex emission: `!` (1B), `@` (2B), `%` (4B), `$` (8B) — + emits bare hex digits (e.g. `AABBCCDD`) consumable by `hex2++` +- Raw byte emission from string literals: `%bytes("...")` - Conditional token selection: `%select(cond, then, else)` - Stringification: `%str(IDENT)` → `"IDENT"` - Line comments (`#`, `;`); whitespace-insensitive output normalization - Single-pass, allocation-free implementation with fixed static buffers; fail-fast on first error +Lexical scoping for control-flow labels is delegated to `hex2++`'s +nestable `.scope` / `.endscope`; M1PP itself only handles per-expansion +macro hygiene labels. + ## Invocation m1pp input.M1 output.M1 @@ -93,19 +96,6 @@ Like `%struct` with stride 1 and a trailing `COUNT`: - `%NAME.l1` → `0`, `%NAME.l2` → `1`, ... - `%NAME.COUNT` → `N` -### `%scope` / `%endscope` - - %scope NAME - ... body ... - %endscope - -Pushes `NAME` onto a lexical scope stack active until the matching -`%endscope`. Scopes nest. While the stack is non-empty, any `::name` or -`&::name` token emitted from within is rewritten with the current scope -path (see [Scoped labels](#scoped-labels)). Every `%scope` must be closed -before end-of-input. `NAME` is a single `WORD` token and may come from -macro-argument substitution. - ### `%frame` / `%endframe` %frame NAME @@ -119,12 +109,6 @@ synthesized by `%struct`). Frames do not nest: a second `%frame` before end-of-input. `NAME` is a single `WORD` token and may come from macro-argument substitution. -Frame state is independent of the `%scope` stack. A nested `%scope` -inside a frame's body affects `::label` resolution but does not change -`%local` resolution — locals stay bound to the enclosing frame, not -to the lexical scope. This lets a function body open inner -control-flow scopes without the local namespace shifting underneath. - ## Macro calls %NAME(arg, arg, ...) @@ -162,55 +146,12 @@ sites (or two different macros) never collide. Argument-substituted tokens keep their original text and are not rewritten, so a `:@name` literal passed as a macro argument passes through verbatim. -## Scoped labels - -A `WORD` token whose text starts with `::` is a scoped label definition; a -token starting with `&::` is a scoped reference. The `::` prefix is rewritten -at **emit time** against the current `%scope` stack: - -- stack = `[parse_number]`: `::start` → `:parse_number__start` -- stack = `[outer, inner]`: `&::end` → `&outer__inner__end` -- stack empty: `::foo` → `:foo`; `&::bar` → `&bar` (pass-through) - -Because resolution is at emit time rather than macro-expansion time, a -`::foo` token written inside a macro body resolves against whatever scope -is active at the point the token flows to the output — i.e. the caller's -surroundings, not the macro's own expansion id. This makes generic -control-flow macros possible: - - %macro loop_scoped(name, body) - %scope name - ::top - body - LA_BR &::top - B - ::end - %endscope - %endm - - %macro break() - LA_BR &::end - B - %endm - - %loop_scoped(scan, { - ... - %if_eqz(a0, { %break() }) - ... - }) - -Inside the expansion, `%loop_scoped` has pushed the scope `[scan]`, so -when `%break()`'s `&::end` token is finally emitted the stack is `[scan]` -and the output is `&scan__end` — exactly the label `%loop_scoped` -defined at the bottom of its body. A nested `%loop_scoped(inner, { ... })` -makes `[outer, inner]` the active stack, so a `%break()` inside the inner -block targets the innermost scope. To jump past an intervening scope, -write the concatenated name explicitly (`&outer__end`). - -Scoped labels and local (`:@` / `&@`) labels are independent and compose. -A common pattern: use `:@` for the macro's private internal labels (the -caller can never name them) and `::` for labels that are the macro's -public contract with its caller (`::end`, `::top`, etc.). +These labels exist only to keep macro-internal symbols from colliding +with each other or with caller code. Lexical scoping for control-flow +labels (e.g. `loop`/`break` patterns where an inner macro must reference +a label defined by an outer macro) belongs to `hex2++`'s `.scope` — +emit `.scope` / `.endscope` and dotted local labels from your macro +bodies, and rely on `hex2++`'s innermost-out lookup to bind references. ## Built-in calls @@ -218,14 +159,14 @@ These are recognized wherever a token matches, not only at line start. ### Integer emission: `!` `@` `%` `$` - !(expr) → 1-byte little-endian hex, e.g. 'AB' + !(expr) → 1-byte little-endian hex, e.g. AB @(expr) → 2-byte little-endian hex %(expr) → 4-byte little-endian hex $(expr) → 8-byte little-endian hex -The expression is evaluated to a signed 64-bit integer and emitted as an -M0-safe single-quoted hex literal (`'AABBCCDD'`) rather than a bare number, -so `M0` does not reinterpret it as decimal. +The expression is evaluated to a signed 64-bit integer and emitted as +bare hex digits (e.g. `AABBCCDD`). `hex2++` consumes whitespace-separated +hex bytes directly, so no quoting or wrapping is required. ### `%select(cond, then, else)` @@ -238,6 +179,19 @@ branches are raw token spans, not expressions. Stringifies a single `WORD` token into a double-quoted string literal: `%str(foo)` → `"foo"`. The argument must be exactly one word token. +### `%bytes(STRING)` + +Emits the raw bytes of a `"..."`-quoted string as a single contiguous +hex word (consumed as bytes by `hex2++`). No NUL terminator is appended; +write `00` explicitly if you need one. Recognised escapes inside the +string are: + + \n 0x0A \t 0x09 \r 0x0D \0 0x00 + \\ 0x5C \" 0x22 \xNN byte NN (two hex digits) + +Any other backslash escape is an error. The argument must be exactly one +`STRING` token quoted with `"`. Example: `%bytes("hi\n")` emits `68690A`. + ### `%local(NAME)` Looks up the zero-parameter macro `<frame>_FRAME.<NAME>`, where @@ -252,14 +206,14 @@ The intended pattern combines `%struct`, `%frame`, and `%local` for named stack-frame access: %struct foo_FRAME { saved_buf saved_len } - : foo - %scope foo + :foo + .scope %frame foo %enter(%foo_FRAME.SIZE) ;; %local(saved_buf) -> 0, %local(saved_len) -> 8 %eret %endframe - %endscope + .endscope ## Expression language @@ -302,7 +256,6 @@ Various limits are fixed at compile time. See the code for values. | parameters per macro | | stream stack depth | | expression frames | -| scope stack depth | Exceeding any limit aborts with an error message on `stderr`. diff --git a/hex2pp/hex2pp.c b/hex2pp/hex2pp.c @@ -13,7 +13,7 @@ * SIGIL NAME [- OTHER] -> label reference (! @ $ ~ % &) * .align N [PATTERN] -> pad to N-byte boundary * .fill N B -> N copies of byte B - * .scope / .endscope -> local-label scope (no nesting) + * .scope / .endscope -> local-label scope (nestable) * # ... / ; ... -> line comment * * Multi-byte reference values are emitted little-endian by default. @@ -33,6 +33,7 @@ #define MAX_LABELS (1 << 20) #define MAX_TEXT (8 * 1024 * 1024) #define MAX_TOKEN 4096 +#define MAX_SCOPE_DEPTH 32 enum { HEX_MODE, BINARY_MODE }; @@ -68,9 +69,9 @@ static int big_endian; static int non_executable; static const char *output_path = "a.out"; -static int current_scope; +static int scope_stack[MAX_SCOPE_DEPTH]; +static int scope_depth; static int scope_seq; -static int in_scope; static int pass; /* 1 or 2 */ static const char *cur_path; @@ -135,11 +136,18 @@ static void define_label(const char *s, int len, int scope_id) static long long lookup_label(const char *s, int len) { int i; + int d; int dotted = (len > 0 && s[0] == '.'); if (dotted) { - for (i = 0; i < label_count; i++) { - if (labels[i].scope_id == current_scope && name_eq(&labels[i], s, len)) { - return labels[i].target_ip; + /* Walk the scope stack innermost-out. A dotted name resolves to + * the nearest enclosing definition, so an inner scope can shadow + * an outer one with the same local name. */ + for (d = scope_depth - 1; d >= 0; d--) { + int sid = scope_stack[d]; + for (i = 0; i < label_count; i++) { + if (labels[i].scope_id == sid && name_eq(&labels[i], s, len)) { + return labels[i].target_ip; + } } } die("undefined local label '%.*s'", len, s); @@ -532,17 +540,15 @@ static void do_fill(struct Scanner *s) static void do_scope_open(void) { - if (in_scope) die(".scope: nested scopes are not permitted"); - in_scope = 1; + if (scope_depth >= MAX_SCOPE_DEPTH) die(".scope: depth overflow"); scope_seq++; - current_scope = scope_seq; + scope_stack[scope_depth++] = scope_seq; } static void do_scope_close(void) { - if (!in_scope) die(".endscope: not in a scope"); - in_scope = 0; - current_scope = 0; + if (scope_depth <= 0) die(".endscope: not in a scope"); + scope_depth--; } /* --- main scanner loop ------------------------------------------------ */ @@ -567,10 +573,10 @@ static void process_file(struct InFile *f) s.pos++; n = read_name(&s, name, sizeof(name)); dotted = (n > 0 && name[0] == '.'); - if (dotted && !in_scope) { + if (dotted && scope_depth == 0) { die("dot-prefixed label '%.*s' outside a .scope", n, name); } - scope = dotted ? current_scope : 0; + scope = dotted ? scope_stack[scope_depth - 1] : 0; if (pass == 1) define_label(name, n, scope); continue; } @@ -706,25 +712,23 @@ int main(int argc, char **argv) /* Pass 1: collect labels. */ pass = 1; ip = 0; - current_scope = 0; - in_scope = 0; + scope_depth = 0; scope_seq = 0; for (i = 0; i < input_count; i++) { process_file(&inputs[i]); } - if (in_scope) die(".scope not closed at end of input"); + if (scope_depth != 0) die(".scope not closed at end of input"); /* Pass 2: emit. */ pass = 2; ip = 0; output_used = 0; - current_scope = 0; - in_scope = 0; + scope_depth = 0; scope_seq = 0; for (i = 0; i < input_count; i++) { process_file(&inputs[i]); } - if (in_scope) die(".scope not closed at end of input"); + if (scope_depth != 0) die(".scope not closed at end of input"); /* Write output. */ {