commit 75a1d2a8469928e951ed126582f3b499065ff695
parent 34beffa90bc8871bb7749008fbe1a89d3ee88cf9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 09:47:19 -0700
asm/inline: track A — frontend parser for asm() statements
Adds KW_ASM/KW_BUILTIN_ASM, parse_asm_stmt with full GNU inline-asm
grammar (volatile, goto, four colon-separated lists, [name] symbolic
constraints), and dispatch from parse_stmt. Stops at cg_inline_asm
which remains a panic until track B lands.
- AsmConstraint gains a `Sym name` field for `[name]` operands; field
defaults to 0, so existing track-B/track-C consumers stay binary-
compatible.
- volatile / __volatile__ accepted and dropped (informational); goto
parsed at the keyword level; label list consumed but discarded.
- Outputs are captured as lvalue addresses stashed in fresh frame
slots so the post-cg_inline_asm assignment back to user lvalues
has somewhere to land once track B replaces the panic.
- Inputs pushed on the CG stack in declaration order per the cg.h
docstring.
test/parse/cases/asm_01_grammar.c documents every grammar form. It is
.skip-marked because cg_inline_asm is still a panic stub on main; the
parser-only path is verifiable by hand via `parse-runner --jit FILE.c`
which prints "cg_inline_asm: not in v1 slice" once the parser
accepts the form.
Diffstat:
4 files changed, 346 insertions(+), 0 deletions(-)
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -378,6 +378,7 @@ typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir;
typedef struct AsmConstraint {
const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */
+ Sym name; /* GCC `[name]` symbolic operand; 0 if absent */
u8 dir; /* AsmDir */
u8 pad[3];
} AsmConstraint;
diff --git a/src/parse/parse.c b/src/parse/parse.c
@@ -110,6 +110,8 @@ typedef enum CKw {
KW_NORETURN, /* _Noreturn */
KW_STATIC_ASSERT, /* _Static_assert */
KW_THREAD_LOCAL, /* _Thread_local */
+ KW_ASM, /* GNU `asm` */
+ KW_BUILTIN_ASM, /* GNU `__asm__` */
KW_COUNT
} CKw;
@@ -123,6 +125,7 @@ static const char* const kw_names[KW_COUNT] = {
"union", "unsigned", "void", "volatile", "while",
"_Bool", "_Complex", "_Imaginary","_Alignas", "_Alignof",
"_Atomic", "_Generic", "_Noreturn", "_Static_assert", "_Thread_local",
+ "asm", "__asm__",
};
/* ============================================================
@@ -265,6 +268,10 @@ typedef struct Parser {
* so it lives outside kw_names[] — matched by IDENT comparison just like
* the __builtin_* family. */
Sym sym_attribute;
+ /* GNU `__volatile__` alias for `volatile` inside asm() qualifiers.
+ * `volatile`/`KW_VOLATILE` already lives in kw_names[]; the doubled-
+ * underscore spelling is sym-compared in parse_asm_stmt. */
+ Sym sym_volatile_alias;
Sym sym_a_load_n;
Sym sym_a_store_n;
Sym sym_a_exchange_n;
@@ -5693,6 +5700,270 @@ static void parse_static_assert(Parser* p) {
}
}
+/* GNU inline-asm statement (Track A — frontend only).
+ *
+ * asm-stmt := ('asm'|'__asm__') ['volatile'|'__volatile__'] ['goto']
+ * '(' template-string
+ * [':' outputs [':' inputs [':' clobbers [':' labels]]]]
+ * ')' ';'
+ * outputs := output (',' output)*
+ * output := ['[' name ']'] string-literal '(' lvalue-expr ')'
+ * inputs := input (',' input)*
+ * input := ['[' name ']'] string-literal '(' expr ')'
+ * clobbers := string-literal (',' string-literal)*
+ * labels := identifier (',' identifier)*
+ *
+ * The leading 'asm'/'__asm__' keyword has already been consumed by
+ * parse_stmt. This function:
+ * 1. Captures each output's lvalue address into a fresh frame slot, so
+ * after cg_inline_asm pushes the result SValues we can store them
+ * back into the user's lvalues.
+ * 2. Pushes each input's rvalue onto the CG value stack in declaration
+ * order — cg_inline_asm consumes them per its docstring contract.
+ * 3. Calls cg_inline_asm. The cg layer (Track B) is still a panic
+ * stub, so any test exercising this path will hit the panic until
+ * Track B lands.
+ * 4. After return, walks outputs in REVERSE order and stores the
+ * top-of-stack SValue into the captured lvalue address.
+ *
+ * `volatile` / `__volatile__` are accepted and ignored (informational —
+ * see ASM.md §9 / INLINEASM.md §9). `goto` is accepted at the keyword
+ * level; the labels list, if present, is parsed and discarded — Track B
+ * will reject `asm goto` inside cg_inline_asm per INLINEASM.md §1. */
+typedef struct AsmOutLValue {
+ FrameSlot addr_slot; /* holds &lvalue (pointer) */
+ const Type* ptr_ty; /* pointer-to type (for the slot/load) */
+ const Type* val_ty; /* the lvalue's value type (for cg_deref) */
+} AsmOutLValue;
+
+static Sym parse_asm_operand_name(Parser* p) {
+ /* `[ ident ]` already known to start with `[` per the caller. Returns
+ * the interned ident Sym, or 0 if absent. */
+ Sym name = 0;
+ if (!is_punct(&p->cur, '[')) return 0;
+ advance(p);
+ if (p->cur.kind != TOK_IDENT) {
+ perr(p, "expected identifier inside '[name]' on asm operand");
+ }
+ name = p->cur.v.ident;
+ advance(p);
+ expect_punct(p, ']', "']' after asm operand name");
+ return name;
+}
+
+static const char* parse_asm_str(Parser* p, const char* what) {
+ /* Adjacent string literals are already fused at the pp-pull boundary
+ * (see fuse_string_lits). Decode the fused token to raw bytes and
+ * intern the resulting C-string into the global pool. */
+ u8* bytes;
+ size_t nlen = 0;
+ Sym s;
+ Tok t;
+ if (p->cur.kind != TOK_STR) {
+ perr(p, "expected string literal in %s", what);
+ }
+ t = p->cur;
+ advance(p);
+ bytes = decode_string_literal(p, &t, &nlen);
+ /* decode_string_literal includes a trailing NUL in nlen; strip it
+ * before interning so the C-string view round-trips. */
+ if (nlen > 0) nlen -= 1;
+ s = pool_intern(p->pool, (const char*)bytes, nlen);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ return pool_str(p->pool, s, NULL);
+}
+
+static void parse_asm_stmt(Parser* p) {
+ /* The 'asm'/'__asm__' keyword was just consumed by parse_stmt. */
+ const char* tmpl;
+ AsmConstraint* outs = NULL;
+ AsmConstraint* ins = NULL;
+ Sym* clobbers = NULL;
+ AsmOutLValue* out_lvs = NULL;
+ u32 nout = 0, nin = 0, nclob = 0;
+ u32 cap_out = 0, cap_in = 0, cap_clob = 0;
+ int saw_goto = 0;
+ SrcLoc loc = tok_loc(&p->cur);
+
+ /* Optional `volatile` / `__volatile__` — informational, dropped. */
+ for (;;) {
+ if (accept_kw(p, KW_VOLATILE)) continue;
+ if (p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_volatile_alias) {
+ advance(p);
+ continue;
+ }
+ break;
+ }
+ /* Optional `goto`. */
+ if (accept_kw(p, KW_GOTO)) saw_goto = 1;
+
+ expect_punct(p, '(', "'(' after asm");
+
+ /* Template string. */
+ tmpl = parse_asm_str(p, "asm template");
+
+ /* ---- outputs ---- */
+ if (accept_punct(p, ':')) {
+ if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
+ cap_out = 4;
+ outs = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_out);
+ out_lvs = (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, cap_out);
+ for (;;) {
+ AsmConstraint c;
+ AsmOutLValue lv;
+ const Type* val_ty;
+ const Type* ptr_ty;
+ FrameSlotDesc fsd;
+ FrameSlot slot;
+ memset(&c, 0, sizeof c);
+ memset(&lv, 0, sizeof lv);
+ c.name = parse_asm_operand_name(p); /* 0 if absent */
+ c.str = parse_asm_str(p, "asm output constraint");
+ /* Per GCC: outputs require '=' or '+'. Track B will validate;
+ * the parser stays lenient here. */
+ if (c.str && c.str[0] == '+') c.dir = ASM_INOUT;
+ else c.dir = ASM_OUT;
+ expect_punct(p, '(', "'(' before asm output lvalue");
+ /* parse_assign_expr leaves an lvalue (or rvalue) on the stack.
+ * We need the address; cg_addr converts an lvalue → ptr rvalue.
+ * Then we stash that pointer in a scratch frame slot so we can
+ * store back into it after cg_inline_asm returns. */
+ parse_assign_expr(p);
+ val_ty = cg_top_type(p->cg);
+ if (!val_ty) perr(p, "asm output: cannot determine lvalue type");
+ cg_addr(p->cg);
+ ptr_ty = cg_top_type(p->cg);
+ if (!ptr_ty) perr(p, "asm output: cannot take address");
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = ptr_ty;
+ fsd.size = 8;
+ fsd.align = 8;
+ fsd.kind = FS_LOCAL;
+ slot = cg_local(p->cg, &fsd);
+ cg_push_local_typed(p->cg, slot, ptr_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ lv.addr_slot = slot;
+ lv.ptr_ty = ptr_ty;
+ lv.val_ty = val_ty;
+ expect_punct(p, ')', "')' after asm output lvalue");
+ if (nout == cap_out) {
+ u32 nc = cap_out * 2;
+ AsmConstraint* nb =
+ (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc);
+ AsmOutLValue* nlv =
+ (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, nc);
+ memcpy(nb, outs, sizeof(AsmConstraint) * nout);
+ memcpy(nlv, out_lvs, sizeof(AsmOutLValue) * nout);
+ outs = nb;
+ out_lvs = nlv;
+ cap_out = nc;
+ }
+ outs[nout] = c;
+ out_lvs[nout] = lv;
+ nout++;
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+
+ /* ---- inputs ---- */
+ if (accept_punct(p, ':')) {
+ if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
+ cap_in = 4;
+ ins = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_in);
+ for (;;) {
+ AsmConstraint c;
+ memset(&c, 0, sizeof c);
+ c.name = parse_asm_operand_name(p);
+ c.str = parse_asm_str(p, "asm input constraint");
+ c.dir = ASM_IN;
+ expect_punct(p, '(', "'(' before asm input expression");
+ /* Push input value onto the CG stack in declaration order.
+ * cg_inline_asm consumes them per its docstring. */
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')' after asm input expression");
+ if (nin == cap_in) {
+ u32 nc = cap_in * 2;
+ AsmConstraint* nb =
+ (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc);
+ memcpy(nb, ins, sizeof(AsmConstraint) * nin);
+ ins = nb;
+ cap_in = nc;
+ }
+ ins[nin++] = c;
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+
+ /* ---- clobbers ---- */
+ if (accept_punct(p, ':')) {
+ if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
+ cap_clob = 4;
+ clobbers = (Sym*)arena_array(p->c->tu, Sym, cap_clob);
+ for (;;) {
+ const char* cstr;
+ Sym cs;
+ cstr = parse_asm_str(p, "asm clobber");
+ cs = pool_intern_cstr(p->pool, cstr);
+ if (nclob == cap_clob) {
+ u32 nc = cap_clob * 2;
+ Sym* nb = (Sym*)arena_array(p->c->tu, Sym, nc);
+ memcpy(nb, clobbers, sizeof(Sym) * nclob);
+ clobbers = nb;
+ cap_clob = nc;
+ }
+ clobbers[nclob++] = cs;
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+
+ /* ---- labels (asm goto) ---- */
+ if (accept_punct(p, ':')) {
+ /* Parse-only: consume identifier list. cg_inline_asm rejects
+ * asm-goto in v1 per INLINEASM.md §1. */
+ if (!is_punct(&p->cur, ')')) {
+ for (;;) {
+ if (p->cur.kind != TOK_IDENT) {
+ perr(p, "expected label identifier in asm-goto label list");
+ }
+ advance(p);
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ expect_punct(p, ')', "')' to close asm");
+ expect_punct(p, ';', "';' after asm statement");
+
+ (void)saw_goto; /* parsed; cg layer rejects asm-goto in v1 */
+ cg_set_loc(p->cg, loc);
+ cg_inline_asm(p->cg, tmpl, outs, nout, ins, nin, clobbers, nclob);
+
+ /* For each output (in reverse so the last output is on top first),
+ * store the SValue cg_inline_asm pushed back into the captured lvalue
+ * via the address we stashed. cg_inline_asm is currently a panic stub
+ * (Track B), so this loop is unreachable until Track B lands. */
+ if (nout > 0) {
+ u32 i;
+ for (i = nout; i-- > 0;) {
+ AsmOutLValue* lv = &out_lvs[i];
+ /* Stack: [..., out_val]. Push the address (load from slot, then
+ * deref to make it an lvalue), swap, store, drop. */
+ cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty);
+ cg_load(p->cg);
+ cg_deref(p->cg, lv->val_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ }
+ }
+}
+
static void parse_compound_stmt(Parser* p) {
expect_punct(p, '{', "'{'");
scope_push(p);
@@ -5800,6 +6071,11 @@ static void parse_stmt(Parser* p) {
parse_default_stmt(p);
return;
}
+ if (is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM)) {
+ advance(p);
+ parse_asm_stmt(p);
+ return;
+ }
/* Expression statement. */
parse_expr(p);
cg_drop(p->cg);
@@ -6331,6 +6607,7 @@ void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) {
p.sym_b_va_end = pool_intern_cstr(p.pool, "__builtin_va_end");
p.sym_b_va_copy = pool_intern_cstr(p.pool, "__builtin_va_copy");
p.sym_attribute = pool_intern_cstr(p.pool, "__attribute__");
+ p.sym_volatile_alias = pool_intern_cstr(p.pool, "__volatile__");
p.sym_a_load_n = pool_intern_cstr(p.pool, "__atomic_load_n");
p.sym_a_store_n = pool_intern_cstr(p.pool, "__atomic_store_n");
p.sym_a_exchange_n = pool_intern_cstr(p.pool, "__atomic_exchange_n");
diff --git a/test/parse/cases/asm_01_grammar.c b/test/parse/cases/asm_01_grammar.c
@@ -0,0 +1,67 @@
+/* Track A — frontend parser for GNU inline-asm statements.
+ *
+ * Exercises every limb of the asm-stmt grammar:
+ * - both keyword spellings: `asm` and `__asm__`
+ * - `volatile` and `__volatile__` qualifiers (informational, dropped)
+ * - `goto` keyword (parsed; cg layer rejects asm-goto in v1)
+ * - the four colon-separated lists: outputs, inputs, clobbers, labels
+ * - `[name]` symbolic operands
+ * - adjacent string-literal fusion in the template
+ * - empty intermediate sections (`asm("..." : : "r"(x))`)
+ *
+ * This case is .skip-marked because cg_inline_asm is a panic stub on
+ * main (Track B not yet landed) — every asm-stmt the parser accepts
+ * panics inside cg with "cg_inline_asm: not in v1 slice", which is
+ * the documented expected behavior for Track A landing first. To
+ * sanity-check the parser by hand: run `parse-runner --jit FILE.c`;
+ * a "cg_inline_asm: not in v1 slice" diagnostic means the parser
+ * accepted the form. Lifting the .skip happens when Track B replaces
+ * the panic in src/cg/cg.c.
+ *
+ * The cg panic aborts compilation at the first asm-stmt, so to verify
+ * a specific form by hand, hoist it to the top of test_main. */
+
+int test_main(void) {
+ int rc = 42;
+ int a = 1, b = 2, c = 0;
+
+ /* Smallest form. */
+ asm("nop");
+
+ /* `__asm__` spelling, `volatile` qualifier. */
+ __asm__ volatile("nop");
+
+ /* `__volatile__` alias. */
+ asm __volatile__("nop");
+
+ /* Adjacent string-literal fusion in the template. */
+ asm("nop\n\t"
+ "nop");
+
+ /* Inputs only. */
+ __asm__ volatile("mov w0, %w0" : : "r"(rc) : "x0");
+
+ /* Outputs + inputs + clobbers. */
+ asm("add %w0, %w1, %w2" : "=r"(c) : "r"(a), "r"(b) : "cc");
+
+ /* Symbolic operand names ([sum], [x], [y]). */
+ asm("add %w[sum], %w[x], %w[y]"
+ : [sum] "=r"(c)
+ : [x] "r"(a), [y] "r"(b));
+
+ /* In-out (`+r`). */
+ asm("add %w0, %w0, #1" : "+r"(rc));
+
+ /* Memory clobber. */
+ asm volatile("dmb sy" : : : "memory");
+
+ /* Empty middle sections. */
+ asm("nop" : : : );
+
+ /* asm goto — labels list. cg layer will reject in v1; the parser
+ * accepts the syntax. */
+ asm goto("b %l[done]" : : : : done);
+done:
+
+ return rc;
+}
diff --git a/test/parse/cases/asm_01_grammar.skip b/test/parse/cases/asm_01_grammar.skip
@@ -0,0 +1 @@
+cg_inline_asm is a panic stub until Track B (cg/opt) lands; parser-only check