kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 75a1d2a8469928e951ed126582f3b499065ff695
parent 34beffa90bc8871bb7749008fbe1a89d3ee88cf9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 11 May 2026 09:47:19 -0700

asm/inline: track A — frontend parser for asm() statements

Adds KW_ASM/KW_BUILTIN_ASM, parse_asm_stmt with full GNU inline-asm
grammar (volatile, goto, four colon-separated lists, [name] symbolic
constraints), and dispatch from parse_stmt. Stops at cg_inline_asm
which remains a panic until track B lands.

- AsmConstraint gains a `Sym name` field for `[name]` operands; field
  defaults to 0, so existing track-B/track-C consumers stay binary-
  compatible.
- volatile / __volatile__ accepted and dropped (informational); goto
  parsed at the keyword level; label list consumed but discarded.
- Outputs are captured as lvalue addresses stashed in fresh frame
  slots so the post-cg_inline_asm assignment back to user lvalues
  has somewhere to land once track B replaces the panic.
- Inputs pushed on the CG stack in declaration order per the cg.h
  docstring.

test/parse/cases/asm_01_grammar.c documents every grammar form. It is
.skip-marked because cg_inline_asm is still a panic stub on main; the
parser-only path is verifiable by hand via `parse-runner --jit FILE.c`
which prints "cg_inline_asm: not in v1 slice" once the parser
accepts the form.

Diffstat:
Msrc/arch/arch.h | 1+
Msrc/parse/parse.c | 277+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/parse/cases/asm_01_grammar.c | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/parse/cases/asm_01_grammar.skip | 1+
4 files changed, 346 insertions(+), 0 deletions(-)

diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -378,6 +378,7 @@ typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir; typedef struct AsmConstraint { const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */ + Sym name; /* GCC `[name]` symbolic operand; 0 if absent */ u8 dir; /* AsmDir */ u8 pad[3]; } AsmConstraint; diff --git a/src/parse/parse.c b/src/parse/parse.c @@ -110,6 +110,8 @@ typedef enum CKw { KW_NORETURN, /* _Noreturn */ KW_STATIC_ASSERT, /* _Static_assert */ KW_THREAD_LOCAL, /* _Thread_local */ + KW_ASM, /* GNU `asm` */ + KW_BUILTIN_ASM, /* GNU `__asm__` */ KW_COUNT } CKw; @@ -123,6 +125,7 @@ static const char* const kw_names[KW_COUNT] = { "union", "unsigned", "void", "volatile", "while", "_Bool", "_Complex", "_Imaginary","_Alignas", "_Alignof", "_Atomic", "_Generic", "_Noreturn", "_Static_assert", "_Thread_local", + "asm", "__asm__", }; /* ============================================================ @@ -265,6 +268,10 @@ typedef struct Parser { * so it lives outside kw_names[] — matched by IDENT comparison just like * the __builtin_* family. */ Sym sym_attribute; + /* GNU `__volatile__` alias for `volatile` inside asm() qualifiers. + * `volatile`/`KW_VOLATILE` already lives in kw_names[]; the doubled- + * underscore spelling is sym-compared in parse_asm_stmt. */ + Sym sym_volatile_alias; Sym sym_a_load_n; Sym sym_a_store_n; Sym sym_a_exchange_n; @@ -5693,6 +5700,270 @@ static void parse_static_assert(Parser* p) { } } +/* GNU inline-asm statement (Track A — frontend only). + * + * asm-stmt := ('asm'|'__asm__') ['volatile'|'__volatile__'] ['goto'] + * '(' template-string + * [':' outputs [':' inputs [':' clobbers [':' labels]]]] + * ')' ';' + * outputs := output (',' output)* + * output := ['[' name ']'] string-literal '(' lvalue-expr ')' + * inputs := input (',' input)* + * input := ['[' name ']'] string-literal '(' expr ')' + * clobbers := string-literal (',' string-literal)* + * labels := identifier (',' identifier)* + * + * The leading 'asm'/'__asm__' keyword has already been consumed by + * parse_stmt. This function: + * 1. Captures each output's lvalue address into a fresh frame slot, so + * after cg_inline_asm pushes the result SValues we can store them + * back into the user's lvalues. + * 2. Pushes each input's rvalue onto the CG value stack in declaration + * order — cg_inline_asm consumes them per its docstring contract. + * 3. Calls cg_inline_asm. The cg layer (Track B) is still a panic + * stub, so any test exercising this path will hit the panic until + * Track B lands. + * 4. After return, walks outputs in REVERSE order and stores the + * top-of-stack SValue into the captured lvalue address. + * + * `volatile` / `__volatile__` are accepted and ignored (informational — + * see ASM.md §9 / INLINEASM.md §9). `goto` is accepted at the keyword + * level; the labels list, if present, is parsed and discarded — Track B + * will reject `asm goto` inside cg_inline_asm per INLINEASM.md §1. */ +typedef struct AsmOutLValue { + FrameSlot addr_slot; /* holds &lvalue (pointer) */ + const Type* ptr_ty; /* pointer-to type (for the slot/load) */ + const Type* val_ty; /* the lvalue's value type (for cg_deref) */ +} AsmOutLValue; + +static Sym parse_asm_operand_name(Parser* p) { + /* `[ ident ]` already known to start with `[` per the caller. Returns + * the interned ident Sym, or 0 if absent. */ + Sym name = 0; + if (!is_punct(&p->cur, '[')) return 0; + advance(p); + if (p->cur.kind != TOK_IDENT) { + perr(p, "expected identifier inside '[name]' on asm operand"); + } + name = p->cur.v.ident; + advance(p); + expect_punct(p, ']', "']' after asm operand name"); + return name; +} + +static const char* parse_asm_str(Parser* p, const char* what) { + /* Adjacent string literals are already fused at the pp-pull boundary + * (see fuse_string_lits). Decode the fused token to raw bytes and + * intern the resulting C-string into the global pool. */ + u8* bytes; + size_t nlen = 0; + Sym s; + Tok t; + if (p->cur.kind != TOK_STR) { + perr(p, "expected string literal in %s", what); + } + t = p->cur; + advance(p); + bytes = decode_string_literal(p, &t, &nlen); + /* decode_string_literal includes a trailing NUL in nlen; strip it + * before interning so the C-string view round-trips. */ + if (nlen > 0) nlen -= 1; + s = pool_intern(p->pool, (const char*)bytes, nlen); + p->c->env->heap->free(p->c->env->heap, bytes, 0); + return pool_str(p->pool, s, NULL); +} + +static void parse_asm_stmt(Parser* p) { + /* The 'asm'/'__asm__' keyword was just consumed by parse_stmt. */ + const char* tmpl; + AsmConstraint* outs = NULL; + AsmConstraint* ins = NULL; + Sym* clobbers = NULL; + AsmOutLValue* out_lvs = NULL; + u32 nout = 0, nin = 0, nclob = 0; + u32 cap_out = 0, cap_in = 0, cap_clob = 0; + int saw_goto = 0; + SrcLoc loc = tok_loc(&p->cur); + + /* Optional `volatile` / `__volatile__` — informational, dropped. */ + for (;;) { + if (accept_kw(p, KW_VOLATILE)) continue; + if (p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_volatile_alias) { + advance(p); + continue; + } + break; + } + /* Optional `goto`. */ + if (accept_kw(p, KW_GOTO)) saw_goto = 1; + + expect_punct(p, '(', "'(' after asm"); + + /* Template string. */ + tmpl = parse_asm_str(p, "asm template"); + + /* ---- outputs ---- */ + if (accept_punct(p, ':')) { + if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { + cap_out = 4; + outs = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_out); + out_lvs = (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, cap_out); + for (;;) { + AsmConstraint c; + AsmOutLValue lv; + const Type* val_ty; + const Type* ptr_ty; + FrameSlotDesc fsd; + FrameSlot slot; + memset(&c, 0, sizeof c); + memset(&lv, 0, sizeof lv); + c.name = parse_asm_operand_name(p); /* 0 if absent */ + c.str = parse_asm_str(p, "asm output constraint"); + /* Per GCC: outputs require '=' or '+'. Track B will validate; + * the parser stays lenient here. */ + if (c.str && c.str[0] == '+') c.dir = ASM_INOUT; + else c.dir = ASM_OUT; + expect_punct(p, '(', "'(' before asm output lvalue"); + /* parse_assign_expr leaves an lvalue (or rvalue) on the stack. + * We need the address; cg_addr converts an lvalue → ptr rvalue. + * Then we stash that pointer in a scratch frame slot so we can + * store back into it after cg_inline_asm returns. */ + parse_assign_expr(p); + val_ty = cg_top_type(p->cg); + if (!val_ty) perr(p, "asm output: cannot determine lvalue type"); + cg_addr(p->cg); + ptr_ty = cg_top_type(p->cg); + if (!ptr_ty) perr(p, "asm output: cannot take address"); + memset(&fsd, 0, sizeof fsd); + fsd.type = ptr_ty; + fsd.size = 8; + fsd.align = 8; + fsd.kind = FS_LOCAL; + slot = cg_local(p->cg, &fsd); + cg_push_local_typed(p->cg, slot, ptr_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + lv.addr_slot = slot; + lv.ptr_ty = ptr_ty; + lv.val_ty = val_ty; + expect_punct(p, ')', "')' after asm output lvalue"); + if (nout == cap_out) { + u32 nc = cap_out * 2; + AsmConstraint* nb = + (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc); + AsmOutLValue* nlv = + (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, nc); + memcpy(nb, outs, sizeof(AsmConstraint) * nout); + memcpy(nlv, out_lvs, sizeof(AsmOutLValue) * nout); + outs = nb; + out_lvs = nlv; + cap_out = nc; + } + outs[nout] = c; + out_lvs[nout] = lv; + nout++; + if (!accept_punct(p, ',')) break; + } + } + + /* ---- inputs ---- */ + if (accept_punct(p, ':')) { + if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { + cap_in = 4; + ins = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_in); + for (;;) { + AsmConstraint c; + memset(&c, 0, sizeof c); + c.name = parse_asm_operand_name(p); + c.str = parse_asm_str(p, "asm input constraint"); + c.dir = ASM_IN; + expect_punct(p, '(', "'(' before asm input expression"); + /* Push input value onto the CG stack in declaration order. + * cg_inline_asm consumes them per its docstring. */ + parse_assign_expr(p); + to_rvalue(p); + expect_punct(p, ')', "')' after asm input expression"); + if (nin == cap_in) { + u32 nc = cap_in * 2; + AsmConstraint* nb = + (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc); + memcpy(nb, ins, sizeof(AsmConstraint) * nin); + ins = nb; + cap_in = nc; + } + ins[nin++] = c; + if (!accept_punct(p, ',')) break; + } + } + + /* ---- clobbers ---- */ + if (accept_punct(p, ':')) { + if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) { + cap_clob = 4; + clobbers = (Sym*)arena_array(p->c->tu, Sym, cap_clob); + for (;;) { + const char* cstr; + Sym cs; + cstr = parse_asm_str(p, "asm clobber"); + cs = pool_intern_cstr(p->pool, cstr); + if (nclob == cap_clob) { + u32 nc = cap_clob * 2; + Sym* nb = (Sym*)arena_array(p->c->tu, Sym, nc); + memcpy(nb, clobbers, sizeof(Sym) * nclob); + clobbers = nb; + cap_clob = nc; + } + clobbers[nclob++] = cs; + if (!accept_punct(p, ',')) break; + } + } + + /* ---- labels (asm goto) ---- */ + if (accept_punct(p, ':')) { + /* Parse-only: consume identifier list. cg_inline_asm rejects + * asm-goto in v1 per INLINEASM.md §1. */ + if (!is_punct(&p->cur, ')')) { + for (;;) { + if (p->cur.kind != TOK_IDENT) { + perr(p, "expected label identifier in asm-goto label list"); + } + advance(p); + if (!accept_punct(p, ',')) break; + } + } + } + } + } + } + + expect_punct(p, ')', "')' to close asm"); + expect_punct(p, ';', "';' after asm statement"); + + (void)saw_goto; /* parsed; cg layer rejects asm-goto in v1 */ + cg_set_loc(p->cg, loc); + cg_inline_asm(p->cg, tmpl, outs, nout, ins, nin, clobbers, nclob); + + /* For each output (in reverse so the last output is on top first), + * store the SValue cg_inline_asm pushed back into the captured lvalue + * via the address we stashed. cg_inline_asm is currently a panic stub + * (Track B), so this loop is unreachable until Track B lands. */ + if (nout > 0) { + u32 i; + for (i = nout; i-- > 0;) { + AsmOutLValue* lv = &out_lvs[i]; + /* Stack: [..., out_val]. Push the address (load from slot, then + * deref to make it an lvalue), swap, store, drop. */ + cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty); + cg_load(p->cg); + cg_deref(p->cg, lv->val_ty); + cg_swap(p->cg); + cg_store(p->cg); + cg_drop(p->cg); + } + } +} + static void parse_compound_stmt(Parser* p) { expect_punct(p, '{', "'{'"); scope_push(p); @@ -5800,6 +6071,11 @@ static void parse_stmt(Parser* p) { parse_default_stmt(p); return; } + if (is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM)) { + advance(p); + parse_asm_stmt(p); + return; + } /* Expression statement. */ parse_expr(p); cg_drop(p->cg); @@ -6331,6 +6607,7 @@ void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) { p.sym_b_va_end = pool_intern_cstr(p.pool, "__builtin_va_end"); p.sym_b_va_copy = pool_intern_cstr(p.pool, "__builtin_va_copy"); p.sym_attribute = pool_intern_cstr(p.pool, "__attribute__"); + p.sym_volatile_alias = pool_intern_cstr(p.pool, "__volatile__"); p.sym_a_load_n = pool_intern_cstr(p.pool, "__atomic_load_n"); p.sym_a_store_n = pool_intern_cstr(p.pool, "__atomic_store_n"); p.sym_a_exchange_n = pool_intern_cstr(p.pool, "__atomic_exchange_n"); diff --git a/test/parse/cases/asm_01_grammar.c b/test/parse/cases/asm_01_grammar.c @@ -0,0 +1,67 @@ +/* Track A — frontend parser for GNU inline-asm statements. + * + * Exercises every limb of the asm-stmt grammar: + * - both keyword spellings: `asm` and `__asm__` + * - `volatile` and `__volatile__` qualifiers (informational, dropped) + * - `goto` keyword (parsed; cg layer rejects asm-goto in v1) + * - the four colon-separated lists: outputs, inputs, clobbers, labels + * - `[name]` symbolic operands + * - adjacent string-literal fusion in the template + * - empty intermediate sections (`asm("..." : : "r"(x))`) + * + * This case is .skip-marked because cg_inline_asm is a panic stub on + * main (Track B not yet landed) — every asm-stmt the parser accepts + * panics inside cg with "cg_inline_asm: not in v1 slice", which is + * the documented expected behavior for Track A landing first. To + * sanity-check the parser by hand: run `parse-runner --jit FILE.c`; + * a "cg_inline_asm: not in v1 slice" diagnostic means the parser + * accepted the form. Lifting the .skip happens when Track B replaces + * the panic in src/cg/cg.c. + * + * The cg panic aborts compilation at the first asm-stmt, so to verify + * a specific form by hand, hoist it to the top of test_main. */ + +int test_main(void) { + int rc = 42; + int a = 1, b = 2, c = 0; + + /* Smallest form. */ + asm("nop"); + + /* `__asm__` spelling, `volatile` qualifier. */ + __asm__ volatile("nop"); + + /* `__volatile__` alias. */ + asm __volatile__("nop"); + + /* Adjacent string-literal fusion in the template. */ + asm("nop\n\t" + "nop"); + + /* Inputs only. */ + __asm__ volatile("mov w0, %w0" : : "r"(rc) : "x0"); + + /* Outputs + inputs + clobbers. */ + asm("add %w0, %w1, %w2" : "=r"(c) : "r"(a), "r"(b) : "cc"); + + /* Symbolic operand names ([sum], [x], [y]). */ + asm("add %w[sum], %w[x], %w[y]" + : [sum] "=r"(c) + : [x] "r"(a), [y] "r"(b)); + + /* In-out (`+r`). */ + asm("add %w0, %w0, #1" : "+r"(rc)); + + /* Memory clobber. */ + asm volatile("dmb sy" : : : "memory"); + + /* Empty middle sections. */ + asm("nop" : : : ); + + /* asm goto — labels list. cg layer will reject in v1; the parser + * accepts the syntax. */ + asm goto("b %l[done]" : : : : done); +done: + + return rc; +} diff --git a/test/parse/cases/asm_01_grammar.skip b/test/parse/cases/asm_01_grammar.skip @@ -0,0 +1 @@ +cg_inline_asm is a panic stub until Track B (cg/opt) lands; parser-only check