kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 9d10a93d9c5f01d50072556ba9034a96dae14a27
parent 610ddf0efcd7fce88d1e3a84771212e900392ae2
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 20 May 2026 15:48:31 -0700

c_target,c-frontend: emit structured for/break/continue from SCOPE_LOOP

CG already exposed structured CF (cfree_cg_scope_begin/end/break/continue
with SCOPE_LOOP/BLOCK/IF in the CGTarget vtable), and the toy frontend
already drove it. The C target was throwing the structure away — both
scope_begin and scope_end were inert and every back-edge / exit rendered
as `goto Lk;`. The C frontend's parse_stmt was a separate bypass: it
went straight to raw cg_label_new / cg_jump for while/switch instead of
the scope API.

Two changes that together turn the SCOPE_LOOP signal into readable C:

c_target: c_scope_begin opens `for (;;) {`; c_scope_end emits a
defensive `break;` and `}`. The defensive break is unconditional because
CG places the break label inside the for-body (just before scope_end),
so any inbound goto — including a nested loop's labeled break — needs an
explicit `break;` after the label or the for would re-iterate on
fall-through. c_jump / c_cmp_branch translate any jump whose target
matches the innermost structured scope's break or continue label into C
`break;` / `continue;` (and `if (cond) break;` / `if (cond) continue;`
for the branch shape); outer-scope targets stay as raw `goto Lk;` since
C `break`/`continue` only escape the innermost loop/switch.

C frontend: parse_while_stmt and parse_switch_stmt now drive
cfree_cg_scope_begin/end. Two new accessors,
cfree_cg_scope_break_label and cfree_cg_scope_continue_label, let the
frontend reuse the scope's own labels as cur_break/cur_continue — so
parse_break_stmt/parse_continue_stmt keep using plain cg_jump and the
C-target translation does the structuring. for-loop and do-while stay
on raw labels: their continue semantics (step-then-cond, cond-after-
body) don't fit SCOPE_LOOP's continue-label-at-body-top placement and
would need a different scope kind for clean output.

While loop and switch in path-C now read as `for (;;) { ... break; ...
continue; ... }`. test-parse 3584/0/0, test-toy 610/0/0, parse path C
419/0/29 (same pre-existing skips), toy path C 124/0/3, and the
standalone gcc-14 + clang sweep is 434/434 under -Wall -Wextra -Werror.

Diffstat:
Minclude/cfree/cg.h | 8++++++++
Mlang/c/parse/parse_stmt.c | 23+++++++++++++++++------
Msrc/arch/c_target/emit.c | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc/arch/c_target/internal.h | 6+++++-
Msrc/cg/control.c | 12++++++++++++
5 files changed, 113 insertions(+), 22 deletions(-)

diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -434,6 +434,14 @@ void cfree_cg_alloca(CfreeCg*, uint32_t align, CfreeCgTypeId result_ptr_type); * the loop header, not the scope exit. */ CfreeCgScope cfree_cg_scope_begin(CfreeCg*, CfreeCgTypeId result_type); void cfree_cg_scope_end(CfreeCg*, CfreeCgScope); +/* Expose the labels CG minted for this scope. Frontends that emit + * unstructured `jump`/`branch` ops (rather than going through the + * scope_break/scope_continue helpers) can use these to land control at + * the scope's break/continue points — handy when the same backend needs + * to translate label-targeted jumps back into structured `break;`/ + * `continue;` later (the C source target does this). */ +CfreeCgLabel cfree_cg_scope_break_label(CfreeCg*, CfreeCgScope); +CfreeCgLabel cfree_cg_scope_continue_label(CfreeCg*, CfreeCgScope); void cfree_cg_break(CfreeCg*, CfreeCgScope); void cfree_cg_break_true(CfreeCg*, CfreeCgScope); void cfree_cg_break_false(CfreeCg*, CfreeCgScope); diff --git a/lang/c/parse/parse_stmt.c b/lang/c/parse/parse_stmt.c @@ -70,12 +70,18 @@ static void parse_if_stmt(Parser* p) { } static void parse_while_stmt(Parser* p) { - CGLabel L_top = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); + /* Drive the structured-CF API so the C-source target can lower this to + * `for (;;) { … break; … continue; }` instead of goto soup. The labels + * the scope mints are reused as cur_break/cur_continue, so parse_break + * /parse_continue/etc. keep using their existing raw `cg_jump` calls — + * the C target recognizes the labels as the innermost scope's + * boundaries and emits the structured keywords on its own. */ + CfreeCgScope scope = cfree_cg_scope_begin(p->cg, CFREE_CG_TYPE_NONE); + CGLabel L_top = cfree_cg_scope_continue_label(p->cg, scope); + CGLabel L_end = cfree_cg_scope_break_label(p->cg, scope); CGLabel saved_break = p->cur_break; CGLabel saved_continue = p->cur_continue; expect_punct(p, '(', "'('"); - cg_label_place(p->cg, L_top); parse_expr(p); to_rvalue(p); if (!c_type_is_scalar(cg_top_type(p->cg))) { @@ -89,7 +95,7 @@ static void parse_while_stmt(Parser* p) { p->cur_break = saved_break; p->cur_continue = saved_continue; cg_jump(p->cg, L_top); - cg_label_place(p->cg, L_end); + cfree_cg_scope_end(p->cg, scope); } static void parse_for_stmt(Parser* p) { @@ -303,8 +309,13 @@ static void parse_default_stmt(Parser* p) { } static void parse_switch_stmt(Parser* p) { + /* Wrap the whole switch in a structured scope so the C-source target + * renders `break;` as the keyword. Continue isn't applicable to + * switch (C `continue` skips switches and targets the enclosing loop), + * so cur_continue is left alone. */ + CfreeCgScope scope = cfree_cg_scope_begin(p->cg, CFREE_CG_TYPE_NONE); CGLabel L_dispatch = cg_label_new(p->cg); - CGLabel L_end = cg_label_new(p->cg); + CGLabel L_end = cfree_cg_scope_break_label(p->cg, scope); CGLabel saved_break = p->cur_break; SwitchCtx ctx; SwitchCtx* saved_switch = p->cur_switch; @@ -366,7 +377,7 @@ static void parse_switch_stmt(Parser* p) { if (ctx.default_label) { cg_jump(p->cg, ctx.default_label); } - cg_label_place(p->cg, L_end); + cfree_cg_scope_end(p->cg, scope); } void parse_static_assert(Parser* p) { diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c @@ -1390,14 +1390,35 @@ void c_label_place(CGTarget* T, Label l) { t->last_was_terminator = 0; } +/* If `l` is the innermost structured scope's break/continue label, return + * the C keyword that exits/iterates that scope (a literal `break` or + * `continue`). NULL means "fall back to goto." Matches only the innermost + * scope because C `break`/`continue` only escape the nearest enclosing + * loop/switch — outer-scope targets must stay as goto. */ +static const char* c_scope_kw_for_label(CTarget* t, Label l) { + if (t->nscopes == 0) return NULL; + const CScopeInfo* s = &t->scopes[t->nscopes - 1u]; + if (!s->structured) return NULL; + if (l == s->break_label) return "break"; + if (l == s->continue_label) return "continue"; + return NULL; +} + void c_jump(CGTarget* T, Label l) { CTarget* t = (CTarget*)T; if (t->last_was_terminator) return; - char buf[24]; - c_label_name(l, buf, sizeof buf); - cbuf_puts(&t->body, " goto "); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, ";\n"); + const char* kw = c_scope_kw_for_label(t, l); + if (kw) { + cbuf_puts(&t->body, " "); + cbuf_puts(&t->body, kw); + cbuf_puts(&t->body, ";\n"); + } else { + char buf[24]; + c_label_name(l, buf, sizeof buf); + cbuf_puts(&t->body, " goto "); + cbuf_puts(&t->body, buf); + cbuf_puts(&t->body, ";\n"); + } t->last_was_terminator = 1; } @@ -1408,20 +1429,35 @@ void c_cmp_branch(CGTarget* T, CmpOp op, Operand a, Operand b, Label l) { if (!cmp_to_c(op)) { compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op); } - char buf[24]; - c_label_name(l, buf, sizeof buf); + const char* kw = c_scope_kw_for_label(t, l); cbuf_puts(&t->body, " if ("); c_emit_cmp_operands(t, op, a, b); - cbuf_puts(&t->body, ") goto "); - cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, ";\n"); + if (kw) { + cbuf_puts(&t->body, ") "); + cbuf_puts(&t->body, kw); + cbuf_puts(&t->body, ";\n"); + } else { + char buf[24]; + c_label_name(l, buf, sizeof buf); + cbuf_puts(&t->body, ") goto "); + cbuf_puts(&t->body, buf); + cbuf_puts(&t->body, ";\n"); + } } /* ===== scopes ===== - * CG mints break/continue labels itself and places them around the body, so - * scope_begin/end have nothing to emit. We retain the kind+labels in case - * break_to/continue_to are invoked (legacy code path not used by the public - * api but reachable via opt's lowering — which the C target bypasses). */ + * + * SCOPE_LOOP maps to C's `for (;;) { ... }`. CG places the continue label + * just before `scope_begin` and the break label just before `scope_end` + * (see src/cg/control.c:208,253). The C target leaves those label + * placements in the body — they sit just before `for (;;) {` and just + * after `}` respectively, so any outer-scope `goto continue_lbl` or + * `goto break_lbl` (e.g. a nested loop's `continue` targeting this + * outer loop) still resolves. Inside the `for` body, `c_jump` and + * `c_cmp_branch` translate jumps whose target is the *innermost* scope's + * break/continue label into `break;` / `continue;`; outer-scope targets + * fall back to `goto`. The redundant `Lk: ;` adjacent to the `for` is + * cosmetic; gcc/clang fold it. */ static void c_grow_scopes(CTarget* t, u32 needed) { Heap* h = t->c->ctx->heap; @@ -1444,7 +1480,14 @@ CGScope c_scope_begin(CGTarget* T, const CGScopeDesc* d) { t->scopes[idx].kind = d->kind; t->scopes[idx].break_label = d->break_label; t->scopes[idx].continue_label = d->continue_label; + t->scopes[idx].structured = 0; t->nscopes += 1u; + if (d->kind == SCOPE_LOOP) { + cbuf_puts(&t->body, " for (;;) {\n"); + t->scopes[idx].structured = 1; + t->last_was_terminator = 0; + return (CGScope)(idx + 1u); + } /* SCOPE_IF carries a cond consumed here. The public CfreeCg API always * emits SCOPE_LOOP, so this branch only fires for internal callers. */ if (d->kind == SCOPE_IF) { @@ -1472,7 +1515,20 @@ void c_scope_end(CGTarget* T, CGScope s) { compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, "C target: scope_end on invalid handle"); } - /* LIFO pop. */ + u32 idx = (u32)s - 1u; + if (t->scopes[idx].structured) { + /* CG places break_label just before scope_end, so the label sits + * inside the for-body. Anything that lands on it (including a + * `goto break_lbl` from a nested scope's labeled break) needs to + * exit the for — without an explicit `break;`, fall-through would + * iterate again. Always emit; if the body already terminated the + * defensive break is dead but harmless. */ + cbuf_puts(&t->body, " break;\n"); + cbuf_puts(&t->body, " }\n"); + /* The closing brace is not a terminator; control can fall through it + * (e.g., off the end of a void function). */ + t->last_was_terminator = 0; + } t->nscopes -= 1u; } diff --git a/src/arch/c_target/internal.h b/src/arch/c_target/internal.h @@ -120,7 +120,11 @@ typedef struct CTarget { typedef struct CScopeInfo { u8 kind; /* ScopeKind */ - u8 pad[3]; + /* Set when the C target emitted a `for (;;) { ... }` wrapper around + * this scope. Drives jump/cmp_branch's translation of break/continue + * labels into C `break;`/`continue;` and scope_end's `}`. */ + u8 structured; + u8 pad[2]; Label break_label; Label continue_label; } CScopeInfo; diff --git a/src/cg/control.c b/src/cg/control.c @@ -242,6 +242,18 @@ CfreeCgScope cfree_cg_scope_begin(CfreeCg* g, CfreeCgTypeId result_type) { return api_scope_handle(idx, s->generation); } +CfreeCgLabel cfree_cg_scope_break_label(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s = api_scope_from_handle(g, scope, 0, + "CfreeCg: scope_break_label"); + return s ? (CfreeCgLabel)s->break_lbl : (CfreeCgLabel)0; +} + +CfreeCgLabel cfree_cg_scope_continue_label(CfreeCg* g, CfreeCgScope scope) { + ApiCgScope* s = api_scope_from_handle(g, scope, 0, + "CfreeCg: scope_continue_label"); + return s ? (CfreeCgLabel)s->continue_lbl : (CfreeCgLabel)0; +} + void cfree_cg_scope_end(CfreeCg* g, CfreeCgScope scope) { ApiCgScope* s = api_scope_from_handle(g, scope, 1, "CfreeCg: scope_end"); if (!s) return;