commit 4fb9c5b2564b3755aea2b0f468b93988c849dd32
parent 8f565bea093f2924ca3ca86ac810a02501b9e026
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 10:35:06 -0700
parse: Phase 5 — switch, goto, do-while, labels
Adds the remaining §6.8 statement forms behind a §6.2.3 label namespace:
do-while, switch/case/default with a single-pass compare-chain dispatch,
goto with forward/backward resolution, and labeled statements detected by
peek1(':') lookahead. Per-function GotoLabel chain and SwitchCtx stack
saved/restored across function bodies; cg_func_end checks every goto
target was placed.
Flips corpus rows 6_2_3_02, 6_8_04, 6_8_07–11 to ★.
Diffstat:
3 files changed, 305 insertions(+), 15 deletions(-)
diff --git a/doc/parser-status.md b/doc/parser-status.md
@@ -188,17 +188,43 @@ Unlocks (status as landed): `6_2_4_01` ★, `6_7_02–04` ★, `6_9_03` ★,
---
-## Phase 5 — Statement completeness ⬜
+## Phase 5 — Statement completeness ✅
Switch family, goto/labels, do-while, void return, label namespace.
-- [ ] `switch` / `case` / `default` (incl. fall-through, default-only)
-- [ ] `goto` forward and backward
-- [ ] User labels (separate namespace from ordinary identifiers)
-- [ ] `do { } while ()`
-- [ ] `return;` from void function
-
-Unlocks: `6_2_3_02`, `6_8_04`, `6_8_07–11`, `6_8_14`.
+- [x] `switch` / `case` / `default` (incl. fall-through, default-only)
+- [x] `goto` forward and backward
+- [x] User labels (separate namespace from ordinary identifiers)
+- [x] `do { } while ()`
+- [x] `return;` from void function
+
+Phase 5 also added:
+ - `SwitchCtx` parser-side stack tracking the innermost switch's case list,
+ optional default label, and a frame slot holding the controlling
+ expression's value. Lowering is single-pass: evaluate-and-stash, jump
+ over the body to a dispatch trampoline, parse the body (cases push
+ `(value, label)` onto the ctx as they're encountered), then emit a
+ compare-chain that selects the matching label or falls through to the
+ end of the switch. No jump-table primitive in cg today; a dense-case
+ optimization can plug in later through opt without touching the parser.
+ - `GotoLabel` per-function label namespace (separate chain from the
+ ordinary identifier scope, §6.2.3 ¶1). CGLabel is allocated lazily on
+ first reference so forward and backward gotos share one entry; label
+ placement (`name:`) marks the entry as resolved. At cg_func_end the
+ parser checks every entry was placed and panics with the first-use
+ location otherwise.
+ - Labeled-statement detection in `parse_stmt` via `peek1(':')` lookahead
+ on a non-keyword IDENT. The label form short-circuits before the
+ expression-statement fallback, so `s: return 42;` doesn't get parsed
+ as `s` (the int) followed by stray `:`.
+ - `parse_function_body` saves/restores `goto_labels` and `cur_switch`
+ around the function so the per-function state never leaks even if a
+ pathological caller nests definitions.
+
+Unlocks (status as landed): `6_2_3_02` ★, `6_8_04` ★, `6_8_07–11` ★,
+`6_8_14` ★ (was already passing — bare `return;` in void was wired up by
+parse_return_stmt before Phase 5; the corpus row stays under this phase
+because it's the §6.8.6 spec slot).
---
diff --git a/src/parse/parse.c b/src/parse/parse.c
@@ -179,6 +179,38 @@ struct Scope {
* Parser context
* ============================================================ */
+/* Switch dispatch: each `case K:` records (value, label) into the innermost
+ * switch context. After the body, the dispatch chain at L_dispatch loads the
+ * saved switch value, compares against each entry, and branches to its label. */
+typedef struct CaseEntry CaseEntry;
+struct CaseEntry {
+ i64 value;
+ CGLabel label;
+ CaseEntry* next; /* LIFO; reverse-walked at dispatch emit time */
+};
+
+typedef struct SwitchCtx SwitchCtx;
+struct SwitchCtx {
+ CaseEntry* cases; /* LIFO, nodes arena-allocated */
+ CGLabel default_label; /* 0 if no `default:` seen */
+ FrameSlot value_slot; /* holds the switch expression value */
+ const Type* value_type; /* type of the switch expression */
+ SwitchCtx* parent;
+};
+
+/* Labels live in a per-function namespace separate from ordinary identifiers
+ * (§6.2.3 ¶1). One entry per unique label name; CGLabel is allocated lazily
+ * on first reference (whether goto-forward or label-place comes first). */
+typedef struct GotoLabel GotoLabel;
+struct GotoLabel {
+ Sym name;
+ CGLabel label;
+ u8 placed; /* the matching `name:` was seen */
+ u8 pad[3];
+ SrcLoc first_use;
+ GotoLabel* next;
+};
+
typedef struct Parser {
Compiler* c;
Pp* pp;
@@ -202,6 +234,13 @@ typedef struct Parser {
CGLabel cur_break;
CGLabel cur_continue;
+ /* Innermost switch (`case`/`default` bind here). NULL outside any switch.
+ * `break` still goes through `cur_break`, which the switch sets. */
+ SwitchCtx* cur_switch;
+
+ /* Per-function label chain. Reset across each function definition. */
+ GotoLabel* goto_labels;
+
/* VLA bookkeeping. parse_decl_suffix emits the size-expression code at
* suffix-parse time (because the tokens are about to vanish) and stashes
* the i64 count in `vla_pending_count_slot`; parse_init_declarator picks
@@ -3134,6 +3173,182 @@ static void parse_continue_stmt(Parser* p) {
expect_punct(p, ';', "';' after continue");
}
+static void parse_do_stmt(Parser* p) {
+ CGLabel L_top = cg_label_new(p->cg);
+ CGLabel L_cond = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ CGLabel saved_break = p->cur_break;
+ CGLabel saved_continue = p->cur_continue;
+ cg_label_place(p->cg, L_top);
+ p->cur_break = L_end;
+ p->cur_continue = L_cond;
+ parse_stmt(p);
+ p->cur_break = saved_break;
+ p->cur_continue = saved_continue;
+ cg_label_place(p->cg, L_cond);
+ if (!is_kw(p, &p->cur, KW_WHILE)) perr(p, "expected 'while' after do-body");
+ advance(p); /* while */
+ expect_punct(p, '(', "'('");
+ parse_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')' after do-while condition");
+ expect_punct(p, ';', "';' after do-while");
+ cg_branch_true(p->cg, L_top);
+ cg_label_place(p->cg, L_end);
+}
+
+static GotoLabel* label_get_or_create(Parser* p, Sym name, SrcLoc loc) {
+ GotoLabel* gl;
+ for (gl = p->goto_labels; gl; gl = gl->next) {
+ if (gl->name == name) return gl;
+ }
+ gl = arena_new(p->c->tu, GotoLabel);
+ if (!gl) perr(p, "out of memory in label_get_or_create");
+ memset(gl, 0, sizeof *gl);
+ gl->name = name;
+ gl->label = cg_label_new(p->cg);
+ gl->placed = 0;
+ gl->first_use = loc;
+ gl->next = p->goto_labels;
+ p->goto_labels = gl;
+ return gl;
+}
+
+static void parse_goto_stmt(Parser* p) {
+ Sym name;
+ SrcLoc loc;
+ GotoLabel* gl;
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected label name after 'goto'");
+ }
+ name = p->cur.v.ident;
+ loc = tok_loc(&p->cur);
+ advance(p);
+ expect_punct(p, ';', "';' after goto");
+ gl = label_get_or_create(p, name, loc);
+ cg_jump(p->cg, gl->label);
+}
+
+/* `IDENT ':' STMT` — labeled statement. The IDENT lookup happens in the label
+ * namespace, not the ordinary identifier scope. Caller has already verified
+ * that cur is a non-keyword IDENT and the next token is ':'. */
+static void parse_label_stmt(Parser* p) {
+ Sym name = p->cur.v.ident;
+ SrcLoc loc = tok_loc(&p->cur);
+ GotoLabel* gl;
+ advance(p); /* IDENT */
+ advance(p); /* ':' */
+ gl = label_get_or_create(p, name, loc);
+ if (gl->placed) perr(p, "duplicate label");
+ gl->placed = 1;
+ cg_label_place(p->cg, gl->label);
+ parse_stmt(p);
+}
+
+static void parse_case_stmt(Parser* p) {
+ i64 v;
+ CGLabel L;
+ CaseEntry* ce;
+ SrcLoc loc = tok_loc(&p->cur);
+ if (!p->cur_switch) perr(p, "'case' label not in switch statement");
+ v = eval_const_int(p, loc);
+ expect_punct(p, ':', "':' after case constant");
+ L = cg_label_new(p->cg);
+ cg_label_place(p->cg, L);
+ ce = arena_new(p->c->tu, CaseEntry);
+ if (!ce) perr(p, "out of memory in parse_case_stmt");
+ ce->value = v;
+ ce->label = L;
+ ce->next = p->cur_switch->cases;
+ p->cur_switch->cases = ce;
+ parse_stmt(p);
+}
+
+static void parse_default_stmt(Parser* p) {
+ CGLabel L;
+ if (!p->cur_switch) perr(p, "'default' label not in switch statement");
+ expect_punct(p, ':', "':' after default");
+ if (p->cur_switch->default_label != 0) perr(p, "duplicate 'default' label");
+ L = cg_label_new(p->cg);
+ cg_label_place(p->cg, L);
+ p->cur_switch->default_label = L;
+ parse_stmt(p);
+}
+
+static void parse_switch_stmt(Parser* p) {
+ /* Single-pass lowering: evaluate the controlling expression once into a
+ * temp, jump over the body to the dispatch chain, parse the body (which
+ * places case/default labels and records (value, label) pairs in
+ * cur_switch), then emit a compare-and-branch chain that selects the
+ * matching label. Falls through to L_end if no case/default matches. */
+ CGLabel L_dispatch = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ CGLabel saved_break = p->cur_break;
+ SwitchCtx ctx;
+ SwitchCtx* saved_switch = p->cur_switch;
+ FrameSlotDesc fsd;
+ const Type* vty;
+ CaseEntry* it;
+ CaseEntry* prev;
+ CaseEntry* head;
+
+ expect_punct(p, '(', "'('");
+ parse_expr(p);
+ to_rvalue(p);
+ vty = cg_top_type(p->cg);
+ if (!vty) vty = ty_int(p);
+ expect_punct(p, ')', "')' after switch expression");
+
+ memset(&ctx, 0, sizeof ctx);
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = vty;
+ fsd.size = abi_sizeof(p->abi, vty);
+ fsd.align = abi_alignof(p->abi, vty);
+ fsd.kind = FS_LOCAL;
+ ctx.value_slot = cg_local(p->cg, &fsd);
+ ctx.value_type = vty;
+ ctx.parent = saved_switch;
+
+ /* Stash the value: stack has [rv]; want [lv, rv] then store. */
+ cg_push_local_typed(p->cg, ctx.value_slot, vty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+
+ cg_jump(p->cg, L_dispatch);
+
+ p->cur_switch = &ctx;
+ p->cur_break = L_end;
+ parse_stmt(p);
+ p->cur_break = saved_break;
+ p->cur_switch = saved_switch;
+
+ /* Body fall-through exits the switch. */
+ cg_jump(p->cg, L_end);
+
+ /* Emit dispatch in source order — reverse the LIFO chain. */
+ cg_label_place(p->cg, L_dispatch);
+ prev = NULL;
+ head = ctx.cases;
+ while (head) {
+ CaseEntry* nxt = head->next;
+ head->next = prev;
+ prev = head;
+ head = nxt;
+ }
+ for (it = prev; it; it = it->next) {
+ cg_push_local_typed(p->cg, ctx.value_slot, vty);
+ cg_load(p->cg);
+ cg_push_int(p->cg, it->value, vty);
+ cg_cmp(p->cg, CMP_EQ);
+ cg_branch_true(p->cg, it->label);
+ }
+ if (ctx.default_label) {
+ cg_jump(p->cg, ctx.default_label);
+ }
+ cg_label_place(p->cg, L_end);
+}
+
static void parse_compound_stmt(Parser* p) {
expect_punct(p, '{', "'{'");
scope_push(p);
@@ -3164,6 +3379,16 @@ static void parse_stmt(Parser* p) {
* registers so a function body with many sequential reg-allocating
* operations isn't bounded by the backend's fixed scratch window. */
cg_set_loc(p->cg, tok_loc(&p->cur));
+ /* Labeled statement: `IDENT ':' STMT`. The IDENT must not be a keyword;
+ * peek1 disambiguates the label form from an expression statement that
+ * happens to start with an identifier. */
+ if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
+ Tok n = peek1(p);
+ if (is_punct(&n, ':')) {
+ parse_label_stmt(p);
+ return;
+ }
+ }
if (is_punct(&p->cur, '{')) {
parse_compound_stmt(p);
return;
@@ -3187,6 +3412,11 @@ static void parse_stmt(Parser* p) {
parse_for_stmt(p);
return;
}
+ if (is_kw(p, &p->cur, KW_DO)) {
+ advance(p);
+ parse_do_stmt(p);
+ return;
+ }
if (is_kw(p, &p->cur, KW_RETURN)) {
advance(p);
parse_return_stmt(p);
@@ -3202,6 +3432,26 @@ static void parse_stmt(Parser* p) {
parse_continue_stmt(p);
return;
}
+ if (is_kw(p, &p->cur, KW_GOTO)) {
+ advance(p);
+ parse_goto_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_SWITCH)) {
+ advance(p);
+ parse_switch_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_CASE)) {
+ advance(p);
+ parse_case_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_DEFAULT)) {
+ advance(p);
+ parse_default_stmt(p);
+ return;
+ }
/* Expression statement. */
parse_expr(p);
cg_drop(p->cg);
@@ -3353,6 +3603,12 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
}
scope_push(p); /* parameter scope */
+ /* Per-function label namespace and switch context — both are saved here
+ * for hygiene even though C forbids nested function definitions. */
+ GotoLabel* saved_goto_labels = p->goto_labels;
+ SwitchCtx* saved_switch = p->cur_switch;
+ p->goto_labels = NULL;
+ p->cur_switch = NULL;
cg_set_loc(p->cg, fname_loc);
cg_func_begin(p->cg, &fd);
@@ -3389,6 +3645,14 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
} else {
cg_ret(p->cg, 0);
}
+ /* All goto targets must have been placed by some `name:` in the body. */
+ for (GotoLabel* gl = p->goto_labels; gl; gl = gl->next) {
+ if (!gl->placed) {
+ compiler_panic(p->c, gl->first_use, "goto to undefined label");
+ }
+ }
+ p->goto_labels = saved_goto_labels;
+ p->cur_switch = saved_switch;
cg_func_end(p->cg);
scope_pop(p);
}
diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md
@@ -80,7 +80,7 @@ natural home elsewhere.
| Case | Status | Body | Expected |
|---|---|---|---|
| `6_2_3_01_tag_ord_namespace` | · | `struct s { int v; }; int s = 42; struct s t = {0}; return s + t.v;` | 42 |
-| `6_2_3_02_label_namespace` | · | `int s = 0; goto s; s = 99; s: return 42;` | 42 |
+| `6_2_3_02_label_namespace` | ★ | `int s = 0; goto s; s = 99; s: return 42;` | 42 |
| `6_2_4_01_static_keeps_value` | ★ | helper `int next(){static int n=40; return ++n;}`; `next(); return next();` | 42 |
| `6_2_5_01_void_func_no_value` | ★ | helper `void f(int *p){*p=42;} int x; f(&x); return x;` | 42 |
@@ -287,14 +287,14 @@ cover compound typedef targets.
| `6_8_01_if_else` | ★ | `int x; if (1) x=7; else x=99; return x;` | 7 |
| `6_8_02_while_sum` | ★ | sum 0..9 with `while` | 45 |
| `6_8_03_for_sum` | ★ | sum 1..10 with `for` | 55 |
-| `6_8_04_do_while` | · | `int i=0; do { i=42; } while (0); return i;` | 42 |
+| `6_8_04_do_while` | ★ | `int i=0; do { i=42; } while (0); return i;` | 42 |
| `6_8_05_break` | ★ | `for (i=0;;i++) if (i==42) break; return i;` | 42 |
| `6_8_06_continue` | ★ | sum of evens in `[0,20)` via `continue` | 90 |
-| `6_8_07_switch_case` | · | three-arm switch returns 42 on case 2 | 42 |
-| `6_8_08_switch_fallthrough` | · | `case 1: r+=10; case 2: r+=20;` on input 1 | 30 |
-| `6_8_09_switch_default` | · | unmatched switch hits `default` | 7 |
-| `6_8_10_goto_forward` | · | `goto L; r=99; L: return 42;` | 42 |
-| `6_8_11_goto_backward` | · | counter loop built with `goto` | 10 |
+| `6_8_07_switch_case` | ★ | three-arm switch returns 42 on case 2 | 42 |
+| `6_8_08_switch_fallthrough` | ★ | `case 1: r+=10; case 2: r+=20;` on input 1 | 30 |
+| `6_8_09_switch_default` | ★ | unmatched switch hits `default` | 7 |
+| `6_8_10_goto_forward` | ★ | `goto L; r=99; L: return 42;` | 42 |
+| `6_8_11_goto_backward` | ★ | counter loop built with `goto` | 10 |
| `6_8_12_block_scope` | ★ | inner `{ int x=42; }` shadows outer | 42 |
| `6_8_13_compound_decl_mix` | ★ | declarations interleaved with statements (C99) | 42 |
| `6_8_14_return_void` | ★ | `void f(void){return;}; f(); return 42;` | 42 |