kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 4fb9c5b2564b3755aea2b0f468b93988c849dd32
parent 8f565bea093f2924ca3ca86ac810a02501b9e026
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 10 May 2026 10:35:06 -0700

parse: Phase 5 — switch, goto, do-while, labels

Adds the remaining §6.8 statement forms behind a §6.2.3 label namespace:
do-while, switch/case/default with a single-pass compare-chain dispatch,
goto with forward/backward resolution, and labeled statements detected by
peek1(':') lookahead. Per-function GotoLabel chain and SwitchCtx stack
saved/restored across function bodies; cg_func_end checks every goto
target was placed.

Flips corpus rows 6_2_3_02, 6_8_04, 6_8_07–11 to ★.

Diffstat:
Mdoc/parser-status.md | 42++++++++++++++++++++++++++++++++++--------
Msrc/parse/parse.c | 264+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/parse/CORPUS.md | 14+++++++-------
3 files changed, 305 insertions(+), 15 deletions(-)

diff --git a/doc/parser-status.md b/doc/parser-status.md @@ -188,17 +188,43 @@ Unlocks (status as landed): `6_2_4_01` ★, `6_7_02–04` ★, `6_9_03` ★, --- -## Phase 5 — Statement completeness ⬜ +## Phase 5 — Statement completeness ✅ Switch family, goto/labels, do-while, void return, label namespace. -- [ ] `switch` / `case` / `default` (incl. fall-through, default-only) -- [ ] `goto` forward and backward -- [ ] User labels (separate namespace from ordinary identifiers) -- [ ] `do { } while ()` -- [ ] `return;` from void function - -Unlocks: `6_2_3_02`, `6_8_04`, `6_8_07–11`, `6_8_14`. +- [x] `switch` / `case` / `default` (incl. fall-through, default-only) +- [x] `goto` forward and backward +- [x] User labels (separate namespace from ordinary identifiers) +- [x] `do { } while ()` +- [x] `return;` from void function + +Phase 5 also added: + - `SwitchCtx` parser-side stack tracking the innermost switch's case list, + optional default label, and a frame slot holding the controlling + expression's value. Lowering is single-pass: evaluate-and-stash, jump + over the body to a dispatch trampoline, parse the body (cases push + `(value, label)` onto the ctx as they're encountered), then emit a + compare-chain that selects the matching label or falls through to the + end of the switch. No jump-table primitive in cg today; a dense-case + optimization can plug in later through opt without touching the parser. + - `GotoLabel` per-function label namespace (separate chain from the + ordinary identifier scope, §6.2.3 ¶1). CGLabel is allocated lazily on + first reference so forward and backward gotos share one entry; label + placement (`name:`) marks the entry as resolved. At cg_func_end the + parser checks every entry was placed and panics with the first-use + location otherwise. + - Labeled-statement detection in `parse_stmt` via `peek1(':')` lookahead + on a non-keyword IDENT. The label form short-circuits before the + expression-statement fallback, so `s: return 42;` doesn't get parsed + as `s` (the int) followed by stray `:`. + - `parse_function_body` saves/restores `goto_labels` and `cur_switch` + around the function so the per-function state never leaks even if a + pathological caller nests definitions. + +Unlocks (status as landed): `6_2_3_02` ★, `6_8_04` ★, `6_8_07–11` ★, +`6_8_14` ★ (was already passing — bare `return;` in void was wired up by +parse_return_stmt before Phase 5; the corpus row stays under this phase +because it's the §6.8.6 spec slot). --- diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -179,6 +179,38 @@ struct Scope { * Parser context * ============================================================ */ +/* Switch dispatch: each `case K:` records (value, label) into the innermost + * switch context. After the body, the dispatch chain at L_dispatch loads the + * saved switch value, compares against each entry, and branches to its label. */ +typedef struct CaseEntry CaseEntry; +struct CaseEntry { + i64 value; + CGLabel label; + CaseEntry* next; /* LIFO; reverse-walked at dispatch emit time */ +}; + +typedef struct SwitchCtx SwitchCtx; +struct SwitchCtx { + CaseEntry* cases; /* LIFO, nodes arena-allocated */ + CGLabel default_label; /* 0 if no `default:` seen */ + FrameSlot value_slot; /* holds the switch expression value */ + const Type* value_type; /* type of the switch expression */ + SwitchCtx* parent; +}; + +/* Labels live in a per-function namespace separate from ordinary identifiers + * (§6.2.3 ¶1). One entry per unique label name; CGLabel is allocated lazily + * on first reference (whether goto-forward or label-place comes first). */ +typedef struct GotoLabel GotoLabel; +struct GotoLabel { + Sym name; + CGLabel label; + u8 placed; /* the matching `name:` was seen */ + u8 pad[3]; + SrcLoc first_use; + GotoLabel* next; +}; + typedef struct Parser { Compiler* c; Pp* pp; @@ -202,6 +234,13 @@ typedef struct Parser { CGLabel cur_break; CGLabel cur_continue; + /* Innermost switch (`case`/`default` bind here). NULL outside any switch. + * `break` still goes through `cur_break`, which the switch sets. */ + SwitchCtx* cur_switch; + + /* Per-function label chain. Reset across each function definition. */ + GotoLabel* goto_labels; + /* VLA bookkeeping. parse_decl_suffix emits the size-expression code at * suffix-parse time (because the tokens are about to vanish) and stashes * the i64 count in `vla_pending_count_slot`; parse_init_declarator picks @@ -3134,6 +3173,182 @@ static void parse_continue_stmt(Parser* p) { expect_punct(p, ';', "';' after continue"); } +static void parse_do_stmt(Parser* p) { + CGLabel L_top = cg_label_new(p->cg); + CGLabel L_cond = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + CGLabel saved_break = p->cur_break; + CGLabel saved_continue = p->cur_continue; + cg_label_place(p->cg, L_top); + p->cur_break = L_end; + p->cur_continue = L_cond; + parse_stmt(p); + p->cur_break = saved_break; + p->cur_continue = saved_continue; + cg_label_place(p->cg, L_cond); + if (!is_kw(p, &p->cur, KW_WHILE)) perr(p, "expected 'while' after do-body"); + advance(p); /* while */ + expect_punct(p, '(', "'('"); + parse_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')' after do-while condition"); + expect_punct(p, ';', "';' after do-while"); + cg_branch_true(p->cg, L_top); + cg_label_place(p->cg, L_end); +} + +static GotoLabel* label_get_or_create(Parser* p, Sym name, SrcLoc loc) { + GotoLabel* gl; + for (gl = p->goto_labels; gl; gl = gl->next) { + if (gl->name == name) return gl; + } + gl = arena_new(p->c->tu, GotoLabel); + if (!gl) perr(p, "out of memory in label_get_or_create"); + memset(gl, 0, sizeof *gl); + gl->name = name; + gl->label = cg_label_new(p->cg); + gl->placed = 0; + gl->first_use = loc; + gl->next = p->goto_labels; + p->goto_labels = gl; + return gl; +} + +static void parse_goto_stmt(Parser* p) { + Sym name; + SrcLoc loc; + GotoLabel* gl; + if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) { + perr(p, "expected label name after 'goto'"); + } + name = p->cur.v.ident; + loc = tok_loc(&p->cur); + advance(p); + expect_punct(p, ';', "';' after goto"); + gl = label_get_or_create(p, name, loc); + cg_jump(p->cg, gl->label); +} + +/* `IDENT ':' STMT` — labeled statement. The IDENT lookup happens in the label + * namespace, not the ordinary identifier scope. Caller has already verified + * that cur is a non-keyword IDENT and the next token is ':'. */ +static void parse_label_stmt(Parser* p) { + Sym name = p->cur.v.ident; + SrcLoc loc = tok_loc(&p->cur); + GotoLabel* gl; + advance(p); /* IDENT */ + advance(p); /* ':' */ + gl = label_get_or_create(p, name, loc); + if (gl->placed) perr(p, "duplicate label"); + gl->placed = 1; + cg_label_place(p->cg, gl->label); + parse_stmt(p); +} + +static void parse_case_stmt(Parser* p) { + i64 v; + CGLabel L; + CaseEntry* ce; + SrcLoc loc = tok_loc(&p->cur); + if (!p->cur_switch) perr(p, "'case' label not in switch statement"); + v = eval_const_int(p, loc); + expect_punct(p, ':', "':' after case constant"); + L = cg_label_new(p->cg); + cg_label_place(p->cg, L); + ce = arena_new(p->c->tu, CaseEntry); + if (!ce) perr(p, "out of memory in parse_case_stmt"); + ce->value = v; + ce->label = L; + ce->next = p->cur_switch->cases; + p->cur_switch->cases = ce; + parse_stmt(p); +} + +static void parse_default_stmt(Parser* p) { + CGLabel L; + if (!p->cur_switch) perr(p, "'default' label not in switch statement"); + expect_punct(p, ':', "':' after default"); + if (p->cur_switch->default_label != 0) perr(p, "duplicate 'default' label"); + L = cg_label_new(p->cg); + cg_label_place(p->cg, L); + p->cur_switch->default_label = L; + parse_stmt(p); +} + +static void parse_switch_stmt(Parser* p) { + /* Single-pass lowering: evaluate the controlling expression once into a + * temp, jump over the body to the dispatch chain, parse the body (which + * places case/default labels and records (value, label) pairs in + * cur_switch), then emit a compare-and-branch chain that selects the + * matching label. Falls through to L_end if no case/default matches. */ + CGLabel L_dispatch = cg_label_new(p->cg); + CGLabel L_end = cg_label_new(p->cg); + CGLabel saved_break = p->cur_break; + SwitchCtx ctx; + SwitchCtx* saved_switch = p->cur_switch; + FrameSlotDesc fsd; + const Type* vty; + CaseEntry* it; + CaseEntry* prev; + CaseEntry* head; + + expect_punct(p, '(', "'('"); + parse_expr(p); + to_rvalue(p); + vty = cg_top_type(p->cg); + if (!vty) vty = ty_int(p); + expect_punct(p, ')', "')' after switch expression"); + + memset(&ctx, 0, sizeof ctx); + memset(&fsd, 0, sizeof fsd); + fsd.type = vty; + fsd.size = abi_sizeof(p->abi, vty); + fsd.align = abi_alignof(p->abi, vty); + fsd.kind = FS_LOCAL; + ctx.value_slot = cg_local(p->cg, &fsd); + ctx.value_type = vty; + ctx.parent = saved_switch; + + /* Stash the value: stack has [rv]; want [lv, rv] then store. */ + cg_push_local_typed(p->cg, ctx.value_slot, vty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + + cg_jump(p->cg, L_dispatch); + + p->cur_switch = &ctx; + p->cur_break = L_end; + parse_stmt(p); + p->cur_break = saved_break; + p->cur_switch = saved_switch; + + /* Body fall-through exits the switch. */ + cg_jump(p->cg, L_end); + + /* Emit dispatch in source order — reverse the LIFO chain. */ + cg_label_place(p->cg, L_dispatch); + prev = NULL; + head = ctx.cases; + while (head) { + CaseEntry* nxt = head->next; + head->next = prev; + prev = head; + head = nxt; + } + for (it = prev; it; it = it->next) { + cg_push_local_typed(p->cg, ctx.value_slot, vty); + cg_load(p->cg); + cg_push_int(p->cg, it->value, vty); + cg_cmp(p->cg, CMP_EQ); + cg_branch_true(p->cg, it->label); + } + if (ctx.default_label) { + cg_jump(p->cg, ctx.default_label); + } + cg_label_place(p->cg, L_end); +} + static void parse_compound_stmt(Parser* p) { expect_punct(p, '{', "'{'"); scope_push(p); @@ -3164,6 +3379,16 @@ static void parse_stmt(Parser* p) { * registers so a function body with many sequential reg-allocating * operations isn't bounded by the backend's fixed scratch window. */ cg_set_loc(p->cg, tok_loc(&p->cur)); + /* Labeled statement: `IDENT ':' STMT`. The IDENT must not be a keyword; + * peek1 disambiguates the label form from an expression statement that + * happens to start with an identifier. */ + if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) { + Tok n = peek1(p); + if (is_punct(&n, ':')) { + parse_label_stmt(p); + return; + } + } if (is_punct(&p->cur, '{')) { parse_compound_stmt(p); return; @@ -3187,6 +3412,11 @@ static void parse_stmt(Parser* p) { parse_for_stmt(p); return; } + if (is_kw(p, &p->cur, KW_DO)) { + advance(p); + parse_do_stmt(p); + return; + } if (is_kw(p, &p->cur, KW_RETURN)) { advance(p); parse_return_stmt(p); @@ -3202,6 +3432,26 @@ static void parse_stmt(Parser* p) { parse_continue_stmt(p); return; } + if (is_kw(p, &p->cur, KW_GOTO)) { + advance(p); + parse_goto_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_SWITCH)) { + advance(p); + parse_switch_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_CASE)) { + advance(p); + parse_case_stmt(p); + return; + } + if (is_kw(p, &p->cur, KW_DEFAULT)) { + advance(p); + parse_default_stmt(p); + return; + } /* Expression statement. */ parse_expr(p); cg_drop(p->cg); @@ -3353,6 +3603,12 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, } scope_push(p); /* parameter scope */ + /* Per-function label namespace and switch context — both are saved here + * for hygiene even though C forbids nested function definitions. */ + GotoLabel* saved_goto_labels = p->goto_labels; + SwitchCtx* saved_switch = p->cur_switch; + p->goto_labels = NULL; + p->cur_switch = NULL; cg_set_loc(p->cg, fname_loc); cg_func_begin(p->cg, &fd); @@ -3389,6 +3645,14 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty, } else { cg_ret(p->cg, 0); } + /* All goto targets must have been placed by some `name:` in the body. */ + for (GotoLabel* gl = p->goto_labels; gl; gl = gl->next) { + if (!gl->placed) { + compiler_panic(p->c, gl->first_use, "goto to undefined label"); + } + } + p->goto_labels = saved_goto_labels; + p->cur_switch = saved_switch; cg_func_end(p->cg); scope_pop(p); } diff --git a/test/parse/CORPUS.md b/test/parse/CORPUS.md @@ -80,7 +80,7 @@ natural home elsewhere. | Case | Status | Body | Expected | |---|---|---|---| | `6_2_3_01_tag_ord_namespace` | · | `struct s { int v; }; int s = 42; struct s t = {0}; return s + t.v;` | 42 | -| `6_2_3_02_label_namespace` | · | `int s = 0; goto s; s = 99; s: return 42;` | 42 | +| `6_2_3_02_label_namespace` | ★ | `int s = 0; goto s; s = 99; s: return 42;` | 42 | | `6_2_4_01_static_keeps_value` | ★ | helper `int next(){static int n=40; return ++n;}`; `next(); return next();` | 42 | | `6_2_5_01_void_func_no_value` | ★ | helper `void f(int *p){*p=42;} int x; f(&x); return x;` | 42 | @@ -287,14 +287,14 @@ cover compound typedef targets. | `6_8_01_if_else` | ★ | `int x; if (1) x=7; else x=99; return x;` | 7 | | `6_8_02_while_sum` | ★ | sum 0..9 with `while` | 45 | | `6_8_03_for_sum` | ★ | sum 1..10 with `for` | 55 | -| `6_8_04_do_while` | · | `int i=0; do { i=42; } while (0); return i;` | 42 | +| `6_8_04_do_while` | ★ | `int i=0; do { i=42; } while (0); return i;` | 42 | | `6_8_05_break` | ★ | `for (i=0;;i++) if (i==42) break; return i;` | 42 | | `6_8_06_continue` | ★ | sum of evens in `[0,20)` via `continue` | 90 | -| `6_8_07_switch_case` | · | three-arm switch returns 42 on case 2 | 42 | -| `6_8_08_switch_fallthrough` | · | `case 1: r+=10; case 2: r+=20;` on input 1 | 30 | -| `6_8_09_switch_default` | · | unmatched switch hits `default` | 7 | -| `6_8_10_goto_forward` | · | `goto L; r=99; L: return 42;` | 42 | -| `6_8_11_goto_backward` | · | counter loop built with `goto` | 10 | +| `6_8_07_switch_case` | ★ | three-arm switch returns 42 on case 2 | 42 | +| `6_8_08_switch_fallthrough` | ★ | `case 1: r+=10; case 2: r+=20;` on input 1 | 30 | +| `6_8_09_switch_default` | ★ | unmatched switch hits `default` | 7 | +| `6_8_10_goto_forward` | ★ | `goto L; r=99; L: return 42;` | 42 | +| `6_8_11_goto_backward` | ★ | counter loop built with `goto` | 10 | | `6_8_12_block_scope` | ★ | inner `{ int x=42; }` shadows outer | 42 | | `6_8_13_compound_decl_mix` | ★ | declarations interleaved with statements (C99) | 42 | | `6_8_14_return_void` | ★ | `void f(void){return;}; f(); return 42;` | 42 |