commit 40c33a8064ee77a7d0d61bad473978394e834f2e
parent 5219fba169c77ac4b66bd51bd2764a1ba1e61f38
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 09:42:47 -0700
asm/inline: track C — aa64 backend (template walker + aa_asm_block)
Adds aa64_inline_bind + aa64_asm_run_template that pre-substitute
%N/%wN/%xN/%[name]/%aN placeholders into asm source text and re-lex
through the existing aa64_asm_insn dispatch. Implements aa_asm_block
on top. Inline-mode AsmDriver constructor exposed from parse_asm.c.
Hand-rolled Operand arrays drive the new aa64_inline unit test.
Diffstat:
7 files changed, 617 insertions(+), 12 deletions(-)
diff --git a/src/arch/aa64_asm.c b/src/arch/aa64_asm.c
@@ -18,9 +18,11 @@
#include <string.h>
#include "arch/aa64_isa.h"
+#include "arch/aa64_regs.h"
#include "arch/arch.h"
#include "core/arena.h"
#include "core/pool.h"
+#include "core/strbuf.h"
#include "lex/lex.h"
#include "obj/obj.h"
#include "parse/parse_asm_helpers.h"
@@ -29,16 +31,46 @@
struct AA64Asm {
Compiler* c;
+
+ /* Inline-asm bound state (set by aa64_inline_bind, cleared otherwise).
+ * Operand indexing per GCC convention: 0..nout-1 are outputs, then
+ * nout..nout+nin-1 are inputs. Templates address into this combined
+ * list via %N / %wN / %xN / %aN. out_ops is mutable (the binder fills
+ * in result locations); in_ops + constraints + clobbers are read-only
+ * borrows. */
+ const AsmConstraint* outs;
+ Operand* out_ops;
+ const AsmConstraint* ins;
+ const Operand* in_ops;
+ const Sym* clobbers;
+ u32 nout;
+ u32 nin;
+ u32 nclob;
};
AA64Asm* aa64_asm_open(Compiler* c) {
AA64Asm* a = arena_new(c->tu, AA64Asm);
+ memset(a, 0, sizeof *a);
a->c = c;
return a;
}
void aa64_asm_close(AA64Asm* a) { (void)a; }
+void aa64_inline_bind(AA64Asm* a,
+ const AsmConstraint* outs, u32 nout, Operand* out_ops,
+ const AsmConstraint* ins, u32 nin, const Operand* in_ops,
+ const Sym* clobbers, u32 nclob) {
+ a->outs = outs;
+ a->out_ops = out_ops;
+ a->ins = ins;
+ a->in_ops = in_ops;
+ a->clobbers = clobbers;
+ a->nout = nout;
+ a->nin = nin;
+ a->nclob = nclob;
+}
+
/* ---- helpers ---- */
static int tok_punct(Tok t, u32 p) { return asm_driver_tok_is_punct(t, p); }
@@ -859,3 +891,219 @@ void aa64_asm_insn(AA64Asm* a, AsmDriver* d, Sym mnemonic) {
}
asm_driver_panic(d, "asm: unknown mnemonic");
}
+
+/* ---- inline-asm template walker (Phase 4b Track C) ---- */
+
+/* Per-call rendered-line buffer. GCC's inline asm rarely emits more
+ * than a handful of instructions per block; one line of substituted
+ * text fits comfortably inside this. Truncation panics — the operator
+ * grammar should never grow a single line beyond this without a
+ * deliberate reason. */
+#define AA64_INLINE_LINE_CAP 1024
+
+/* Render a 5-bit register number into the StrBuf using the requested
+ * width form. is64 picks x-form vs w-form; SP / ZR encode as
+ * register #31 and we render them as wzr/xzr or wsp/sp depending on
+ * caller intent — for inline-asm v1 the bound operand always names a
+ * GP register, never SP, so we emit wzr/xzr for #31. */
+static void render_reg(StrBuf* sb, u32 reg, int is64) {
+ if (reg == 31u) {
+ strbuf_puts(sb, is64 ? "xzr" : "wzr");
+ return;
+ }
+ strbuf_putc(sb, is64 ? 'x' : 'w');
+ if (reg >= 10u) strbuf_putc(sb, (char)('0' + (reg / 10u)));
+ strbuf_putc(sb, (char)('0' + (reg % 10u)));
+}
+
+/* Render a signed 64-bit integer prefixed with '#'. */
+static void render_imm(StrBuf* sb, i64 v) {
+ strbuf_putc(sb, '#');
+ strbuf_put_i64(sb, v);
+}
+
+/* Render an addressing form `[xN, #ofs]` for OPK_INDIRECT. */
+static void render_indirect(StrBuf* sb, Reg base, i32 ofs) {
+ strbuf_putc(sb, '[');
+ render_reg(sb, (u32)base, /*is64=*/1);
+ if (ofs != 0) {
+ strbuf_puts(sb, ", ");
+ render_imm(sb, (i64)ofs);
+ }
+ strbuf_putc(sb, ']');
+}
+
+_Noreturn static void inline_panic(AA64Asm* a, const char* msg) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(a->c, loc, "inline asm: %s", msg);
+}
+
+/* Resolve operand index N → (kind=0 forced default, 1=force-w, 2=force-x,
+ * 3=address form `%aN`). Renders into sb. */
+static void render_operand(AA64Asm* a, StrBuf* sb, u32 idx, int form) {
+ u32 ntot = a->nout + a->nin;
+ if (idx >= ntot) inline_panic(a, "operand index out of range");
+ const Operand* op = (idx < a->nout) ? &a->out_ops[idx]
+ : &a->in_ops[idx - a->nout];
+ switch (form) {
+ case 1: /* %wN — force 32-bit register form */
+ if (op->kind != OPK_REG)
+ inline_panic(a, "%w on non-register operand");
+ render_reg(sb, (u32)op->v.reg, /*is64=*/0);
+ return;
+ case 2: /* %xN — force 64-bit register form */
+ if (op->kind != OPK_REG)
+ inline_panic(a, "%x on non-register operand");
+ render_reg(sb, (u32)op->v.reg, /*is64=*/1);
+ return;
+ case 3: /* %aN — memory addressing form */
+ if (op->kind != OPK_INDIRECT)
+ inline_panic(a, "%a on non-memory operand");
+ render_indirect(sb, op->v.ind.base, op->v.ind.ofs);
+ return;
+ default:
+ break;
+ }
+ /* Default rendering by operand kind. */
+ switch (op->kind) {
+ case OPK_REG:
+ render_reg(sb, (u32)op->v.reg, /*is64=*/1);
+ return;
+ case OPK_IMM:
+ render_imm(sb, op->v.imm);
+ return;
+ case OPK_INDIRECT:
+ render_indirect(sb, op->v.ind.base, op->v.ind.ofs);
+ return;
+ default:
+ inline_panic(a, "unsupported operand kind for %N");
+ }
+}
+
+/* Lex one line of substituted asm and dispatch via aa64_asm_insn. */
+static void run_one_line(AA64Asm* a, MCEmitter* mc, const char* text,
+ size_t len) {
+ /* Skip blank lines. */
+ size_t i;
+ for (i = 0; i < len; ++i) {
+ if (text[i] != ' ' && text[i] != '\t') break;
+ }
+ if (i == len) return;
+
+ Lexer* lx = lex_open_mem(a->c, "<inline-asm>", text, len);
+ AsmDriver* d = asm_driver_open_inline(a->c, mc, lx);
+
+ /* The first non-trivial token must be the mnemonic identifier (or a
+ * `.directive`, but inline asm doesn't normally use directives — leave
+ * that path unsupported until needed). */
+ Tok t = asm_driver_peek(d);
+ while (t.kind == TOK_NEWLINE || t.kind == TOK_PP_HASH) {
+ (void)asm_driver_next(d);
+ if (t.kind == TOK_PP_HASH) {
+ /* Skip cpp linemarker rest of line. */
+ while (!asm_driver_at_eol(d)) (void)asm_driver_next(d);
+ }
+ t = asm_driver_peek(d);
+ }
+ if (t.kind == TOK_EOF) {
+ asm_driver_close_inline(d);
+ lex_close(lx);
+ return;
+ }
+ if (t.kind != TOK_IDENT)
+ inline_panic(a, "expected mnemonic at start of inline asm line");
+ (void)asm_driver_next(d);
+ Sym mn = t.v.ident;
+ /* Compose `b.eq` etc. — same trick as the standalone driver. */
+ Tok dot = asm_driver_peek(d);
+ if (asm_driver_tok_is_punct(dot, '.')) {
+ (void)asm_driver_next(d);
+ Tok rest = asm_driver_next(d);
+ if (rest.kind != TOK_IDENT)
+ inline_panic(a, "composite mnemonic: expected ident after '.'");
+ size_t hn = 0, rn = 0;
+ const char* hp = pool_str(asm_driver_pool(d), mn, &hn);
+ const char* rp = pool_str(asm_driver_pool(d), rest.v.ident, &rn);
+ char buf[64];
+ if (hn + 1 + rn >= sizeof buf)
+ inline_panic(a, "composite mnemonic too long");
+ for (size_t k = 0; k < hn; ++k) buf[k] = hp[k];
+ buf[hn] = '.';
+ for (size_t k = 0; k < rn; ++k) buf[hn + 1 + k] = rp[k];
+ mn = pool_intern(asm_driver_pool(d), buf, hn + 1 + rn);
+ }
+ aa64_asm_insn(a, d, mn);
+ asm_driver_close_inline(d);
+ lex_close(lx);
+}
+
+/* Substitute placeholders into one line's StrBuf, then dispatch.
+ *
+ * The input range is [start, end) inside `tmpl`. Updates `*line_idx`
+ * is not used — the caller resets the StrBuf between lines. */
+static void render_and_run_line(AA64Asm* a, MCEmitter* mc, StrBuf* sb,
+ const char* start, const char* end) {
+ strbuf_reset(sb);
+ for (const char* p = start; p < end; ++p) {
+ char c = *p;
+ if (c != '%') {
+ strbuf_putc(sb, c);
+ continue;
+ }
+ /* Placeholder. */
+ if (p + 1 >= end) inline_panic(a, "trailing '%' in template");
+ char n = *(p + 1);
+ if (n == '%') {
+ strbuf_putc(sb, '%');
+ ++p;
+ continue;
+ }
+ if (n == '[') {
+ /* %[name] — Track A ships the AsmConstraint name field; v1 has
+ * no name carrier yet, so this rejects with a clear message. */
+ inline_panic(a, "%[name] requires AsmConstraint.name (Track A pending)");
+ }
+ int form = 0; /* 0=default, 1=w, 2=x, 3=a */
+ if (n == 'w' || n == 'x' || n == 'a') {
+ form = (n == 'w') ? 1 : (n == 'x') ? 2 : 3;
+ ++p;
+ if (p + 1 >= end) inline_panic(a, "trailing '%' modifier in template");
+ n = *(p + 1);
+ }
+ if (n < '0' || n > '9')
+ inline_panic(a, "expected digit after '%'");
+ u32 idx = 0;
+ /* Single-digit operand index. (10+ operands are exceedingly rare in
+ * inline asm and would require GCC's two-digit syntax; v1 reads one
+ * digit per the most common GCC convention.) */
+ idx = (u32)(n - '0');
+ ++p;
+ render_operand(a, sb, idx, form);
+ }
+ if (sb->truncated) inline_panic(a, "inline asm line buffer overflow");
+ run_one_line(a, mc, strbuf_cstr(sb), strbuf_len(sb));
+}
+
+void aa64_asm_run_template(AA64Asm* a, MCEmitter* mc, const char* tmpl) {
+ if (!tmpl || !*tmpl) return;
+
+ char buf[AA64_INLINE_LINE_CAP];
+ StrBuf sb;
+ strbuf_init(&sb, buf, sizeof buf);
+
+ /* Walk tmpl, splitting on '\n' and ';' line terminators. v1 does not
+ * try to honor `;` inside brackets / quoted strings — the substitution
+ * grammar emits address forms via %aN as a single bracketed token, so
+ * a real ';' inside `[ ... ]` would only arise from operator-written
+ * asm that happens to spell a literal `;` inside the bracket text;
+ * left as a TODO if a real test case needs it. */
+ const char* line_start = tmpl;
+ for (const char* p = tmpl;; ++p) {
+ char c = *p;
+ if (c == '\0' || c == '\n' || c == ';') {
+ render_and_run_line(a, mc, &sb, line_start, p);
+ if (c == '\0') break;
+ line_start = p + 1;
+ }
+ }
+}
diff --git a/src/arch/aa64_asm.h b/src/arch/aa64_asm.h
@@ -32,4 +32,26 @@ void aa64_asm_close(AA64Asm*);
* MCEmitter. Diagnostics on parse failure go through compiler_panic. */
void aa64_asm_insn(AA64Asm*, AsmDriver*, Sym mnemonic);
+/* ---- inline-asm entry points (Phase 4b Track C) ---- */
+
+#include "arch/arch.h"
+
+/* Bind the operand arrays + clobbers from the cg-side asm_block call onto
+ * the AA64Asm handle. The arrays are borrowed for the lifetime of the
+ * subsequent aa64_asm_run_template call; the caller owns the storage.
+ *
+ * Operand indexing follows the GCC convention: outputs are indexed
+ * 0..nout-1, then inputs nout..nout+nin-1. Template placeholders %N
+ * resolve into this combined list. */
+void aa64_inline_bind(AA64Asm*,
+ const AsmConstraint* outs, u32 nout, Operand* out_ops,
+ const AsmConstraint* ins, u32 nin, const Operand* in_ops,
+ const Sym* clobbers, u32 nclob);
+
+/* Walk the inline-asm template, substituting placeholders into per-line
+ * source text and re-lexing each line through aa64_asm_insn. Must be
+ * called after aa64_inline_bind. Emits into `mc` (must equal the
+ * MCEmitter the caller's CGTarget is using). */
+void aa64_asm_run_template(AA64Asm*, MCEmitter* mc, const char* tmpl);
+
#endif
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -31,6 +31,7 @@
#include <string.h>
+#include "arch/aa64_asm.h"
#include "arch/aa64_isa.h"
#include "arch/arch.h"
#include "core/arena.h"
@@ -3100,16 +3101,20 @@ static void aa_asm_block(CGTarget* t, const char* tmpl,
const AsmConstraint* outs, u32 no, Operand* oo,
const AsmConstraint* ins, u32 ni, const Operand* io,
const Sym* clobs, u32 nc) {
- (void)tmpl;
- (void)outs;
- (void)no;
- (void)oo;
- (void)ins;
- (void)ni;
- (void)io;
- (void)clobs;
- (void)nc;
- aa_panic(t, "asm_block");
+ /* Per doc/INLINEASM.md §6: open a per-block AA64Asm, bind operands +
+ * clobbers, walk the template substituting placeholders into asm
+ * source text and dispatching each line through the standalone
+ * aa64_asm_insn parser.
+ *
+ * Register-name clobbers (Sym entries like "x0") are not yet routed
+ * to the aarch64 RA — Track B's cg_inline_asm is responsible for
+ * marking the RA's clobber set before/after this call. v1 trusts
+ * that contract; the binder runs the template, the RA plumbing lands
+ * with Track B. */
+ AA64Asm* a = aa64_asm_open(t->c);
+ aa64_inline_bind(a, outs, no, oo, ins, ni, io, clobs, nc);
+ aa64_asm_run_template(a, t->mc, tmpl);
+ aa64_asm_close(a);
}
static void aa_set_loc(CGTarget* t, SrcLoc loc) {
diff --git a/src/parse/parse_asm.c b/src/parse/parse_asm.c
@@ -874,6 +874,51 @@ static Sym maybe_compose_mnemonic(AsmDriver* d, Sym head) {
return pool_intern(d->pool, buf, n);
}
+/* ---- inline-asm driver constructor ----
+ *
+ * Inline-asm template walkers (per-arch) re-lex pre-substituted source
+ * text through the same per-mnemonic parsers used by the standalone .s
+ * driver. This constructor builds a minimally-initialized AsmDriver
+ * around a caller-supplied memory-backed Lexer + MCEmitter.
+ *
+ * The driver does not own the Lexer or MCEmitter, does not allocate a
+ * default section (inline asm emits into whatever section the wrapping
+ * cg has selected on its MCEmitter), and skips the standalone driver's
+ * per-arch handle (`d->aa64`) — the caller has already opened its own
+ * AA64Asm to thread per-block bound state through. */
+AsmDriver* asm_driver_open_inline(Compiler* c, MCEmitter* mc, Lexer* lex) {
+ Heap* heap = (Heap*)c->env->heap;
+ AsmDriver* d = (AsmDriver*)heap->alloc(heap, sizeof *d, _Alignof(AsmDriver));
+ memset(d, 0, sizeof *d);
+ d->c = c;
+ d->lex = lex;
+ d->mc = mc;
+ d->ob = mc->obj;
+ d->pool = c->global;
+ d->heap = heap;
+ /* The MCEmitter's section is whatever cg has set; do not override it.
+ * cur_sec == OBJ_SEC_NONE means "ask the MCEmitter on demand" — we use
+ * mc->section_id directly via asm_driver_cur_section's lazy init for
+ * standalone, but inline asm should never reach that path because the
+ * MCEmitter already has its section. Pre-seed cur_sec from the
+ * MCEmitter so emit_reloc_at calls get the right section id. */
+ d->cur_sec = mc->section_id;
+ SymSecMap_init(&d->sec_map, heap);
+ SymSymMap_init(&d->sym_map, heap);
+ SymEquMap_init(&d->equ_map, heap);
+ d->aa64 = NULL; /* caller owns its own AA64Asm */
+ return d;
+}
+
+void asm_driver_close_inline(AsmDriver* d) {
+ if (!d) return;
+ SymSecMap_fini(&d->sec_map);
+ SymSymMap_fini(&d->sym_map);
+ SymEquMap_fini(&d->equ_map);
+ Heap* heap = d->heap;
+ heap->free(heap, d, sizeof *d);
+}
+
void parse_asm(Compiler* c, Lexer* l, MCEmitter* mc) {
AsmDriver d;
memset(&d, 0, sizeof d);
diff --git a/src/parse/parse_asm_helpers.h b/src/parse/parse_asm_helpers.h
@@ -45,4 +45,19 @@ i64 asm_driver_parse_const(AsmDriver*);
* leave *sym_out == OBJ_SYM_NONE. */
void asm_driver_parse_sym_expr(AsmDriver*, ObjSymId* sym_out, i64* off_out);
+/* ---- inline-asm constructor ----
+ *
+ * Build an AsmDriver around a memory-backed Lexer + caller-supplied
+ * MCEmitter. Used by inline-asm template walkers (one driver per asm
+ * line) to reuse the existing per-arch instruction parsers verbatim
+ * over a substituted source buffer.
+ *
+ * The driver is heap-allocated through c->env->heap and must be released
+ * with asm_driver_close_inline. It does not own the Lexer or the
+ * MCEmitter — the caller retains ownership of both. The driver does
+ * not initialize a default section; inline asm always emits into the
+ * MCEmitter's currently-active section. */
+AsmDriver* asm_driver_open_inline(Compiler*, MCEmitter*, Lexer*);
+void asm_driver_close_inline(AsmDriver*);
+
#endif
diff --git a/test/arch/aa64_inline_test.c b/test/arch/aa64_inline_test.c
@@ -0,0 +1,256 @@
+/* Phase-4b Track-C unit test for the aa64 inline-asm backend.
+ *
+ * Drives aa_asm_block (via the CGTarget vtable) directly: builds an
+ * Operand array by hand, calls the entry point against an in-process
+ * MCEmitter, and asserts the emitted .text bytes match the expected
+ * machine encoding. No parser or cg involvement — this isolates the
+ * template walker + per-mnemonic dispatch wired up in this track.
+ *
+ * Smoke case mirrors the canonical INLINEASM.md example:
+ *
+ * asm("mov w0, %w0; svc #0" : : "r"(rc) : "x0")
+ *
+ * with the input bound to register x9. The expected encoding is
+ * MOV W0, W9 (= ORR W0, WZR, W9) → 0x2a0903e0
+ * SVC #0 → 0xd4000001
+ *
+ * Builds against the internal arch/ + obj/ surface (test.mk passes
+ * -Isrc). No public-API dependency for the inline machinery itself —
+ * that lands once Track A (parser) + Track B (cg) are wired. */
+
+#include <cfree.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "arch/aa64_asm.h"
+#include "arch/arch.h"
+#include "core/buf.h"
+#include "core/core.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+
+/* ---- env ---- */
+
+static void* h_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void h_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "[%s] ",
+ k == CFREE_DIAG_ERROR ? "error"
+ : k == CFREE_DIAG_WARN ? "warning"
+ : "note");
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_sink = {diag_emit, 0, 0, 0};
+static CfreeEnv g_env = {&g_heap, NULL, &g_sink, NULL, 0};
+
+static int g_fail = 0;
+#define EXPECT(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ g_fail++; \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ } \
+ } while (0)
+
+/* Architecture-defined opcode constants used by the expected-encoding
+ * table. Promoted to named constants per the project convention
+ * (no bare hex literals as load-bearing values). */
+#define EXPECTED_MOV_W0_W9 0x2a0903e0u /* mov w0, w9 ≡ orr w0, wzr, w9 */
+#define EXPECTED_SVC_0 0xd4000001u /* svc #0 */
+
+static u32 read_word_le(const Section* s, u32 ofs) {
+ u8 b[4];
+ buf_read(&s->bytes, ofs, b, 4);
+ return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | ((u32)b[3] << 24);
+}
+
+/* External constructors we need from the internal arch surface — these
+ * are the same entry points cg_runner uses to spin up a backend without
+ * dragging in opt or the JIT. */
+MCEmitter* mc_new(Compiler*, ObjBuilder*);
+CGTarget* cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*);
+
+int main(void) {
+ CfreeTarget t;
+ memset(&t, 0, sizeof t);
+ t.arch = CFREE_ARCH_ARM_64;
+ t.os = CFREE_OS_LINUX;
+ t.obj = CFREE_OBJ_ELF;
+ t.ptr_size = 8;
+ t.ptr_align = 8;
+
+ CfreeCompiler* cc = cfree_compiler_new(t, &g_env);
+ if (!cc) {
+ fprintf(stderr, "compiler_new failed\n");
+ return 2;
+ }
+ Compiler* c = (Compiler*)cc;
+
+ if (setjmp(c->panic)) {
+ fprintf(stderr, "FAIL: compiler panic\n");
+ cfree_compiler_free(cc);
+ return 1;
+ }
+
+ ObjBuilder* ob = obj_new(c);
+ Pool* pool = c->global;
+ ObjSecId text_sec = obj_section(ob, pool_intern_cstr(pool, ".text"),
+ SEC_TEXT, SF_EXEC | SF_ALLOC, 4);
+ MCEmitter* mc = mc_new(c, ob);
+ mc->set_section(mc, text_sec);
+ CGTarget* target = cgtarget_new(c, ob, mc);
+
+ /* ---- smoke case 1: r-input bound to x9, x0 clobber ---- */
+ {
+ AsmConstraint ins[1];
+ memset(ins, 0, sizeof ins);
+ ins[0].str = "r";
+ ins[0].dir = ASM_IN;
+
+ Operand in_ops[1];
+ memset(in_ops, 0, sizeof in_ops);
+ in_ops[0].kind = OPK_REG;
+ in_ops[0].cls = RC_INT;
+ in_ops[0].v.reg = 9; /* x9 */
+
+ Sym clobs[1];
+ clobs[0] = pool_intern_cstr(pool, "x0");
+
+ u32 start = mc->pos(mc);
+ target->asm_block(target, "mov w0, %w0; svc #0",
+ /*outs=*/NULL, /*nout=*/0, /*out_ops=*/NULL,
+ ins, /*nin=*/1, in_ops, clobs, /*nclob=*/1);
+ u32 end = mc->pos(mc);
+
+ EXPECT(end - start == 8u,
+ "smoke1: expected 8 bytes emitted, got %u", (end - start));
+ if (end - start == 8u) {
+ const Section* sec = obj_section_get(ob, text_sec);
+ u32 w0 = read_word_le(sec, start);
+ u32 w1 = read_word_le(sec, start + 4);
+ EXPECT(w0 == EXPECTED_MOV_W0_W9,
+ "smoke1: mov w0, w9 = 0x%08x, want 0x%08x", w0,
+ EXPECTED_MOV_W0_W9);
+ EXPECT(w1 == EXPECTED_SVC_0,
+ "smoke1: svc #0 = 0x%08x, want 0x%08x", w1, EXPECTED_SVC_0);
+ }
+ }
+
+ /* ---- smoke case 2: %xN forces 64-bit reg form ---- */
+ {
+ AsmConstraint ins[1] = {{0}};
+ ins[0].str = "r";
+ ins[0].dir = ASM_IN;
+ Operand in_ops[1];
+ memset(in_ops, 0, sizeof in_ops);
+ in_ops[0].kind = OPK_REG;
+ in_ops[0].cls = RC_INT;
+ in_ops[0].v.reg = 5; /* x5 */
+
+ u32 start = mc->pos(mc);
+ target->asm_block(target, "mov x1, %x0",
+ NULL, 0, NULL, ins, 1, in_ops, NULL, 0);
+ u32 end = mc->pos(mc);
+
+ EXPECT(end - start == 4u,
+ "smoke2: expected 4 bytes, got %u", (end - start));
+ if (end - start == 4u) {
+ const Section* sec = obj_section_get(ob, text_sec);
+ u32 w = read_word_le(sec, start);
+ /* MOV X1, X5 ≡ ORR X1, XZR, X5 → sf=1, opc=01, Rm=5, Rn=31, Rd=1
+ * Encoding: 0xaa0503e1 */
+ EXPECT(w == 0xaa0503e1u,
+ "smoke2: mov x1, x5 = 0x%08x, want 0xaa0503e1", w);
+ }
+ }
+
+ /* ---- smoke case 3: %% emits literal % (line should still parse —
+ * use a no-op-style line that has no operands to keep the test on the
+ * mnemonic-dispatch surface) ---- */
+ {
+ u32 start = mc->pos(mc);
+ /* Two NOPs separated by ';' — also exercises the line-splitter on ';'. */
+ target->asm_block(target, "nop ; nop",
+ NULL, 0, NULL, NULL, 0, NULL, NULL, 0);
+ u32 end = mc->pos(mc);
+ EXPECT(end - start == 8u, "smoke3: expected 8 bytes, got %u",
+ (end - start));
+ if (end - start == 8u) {
+ const Section* sec = obj_section_get(ob, text_sec);
+ EXPECT(read_word_le(sec, start) == 0xd503201fu, "smoke3: nop[0] != 0xd503201f");
+ EXPECT(read_word_le(sec, start + 4) == 0xd503201fu,
+ "smoke3: nop[1] != 0xd503201f");
+ }
+ }
+
+ /* ---- smoke case 4: outputs precede inputs in operand index space ---- */
+ {
+ AsmConstraint outs[1] = {{0}};
+ outs[0].str = "=r";
+ outs[0].dir = ASM_OUT;
+ Operand out_ops[1];
+ memset(out_ops, 0, sizeof out_ops);
+ out_ops[0].kind = OPK_REG;
+ out_ops[0].cls = RC_INT;
+ out_ops[0].v.reg = 7; /* x7 — caller-bound output */
+
+ AsmConstraint ins[1] = {{0}};
+ ins[0].str = "r";
+ ins[0].dir = ASM_IN;
+ Operand in_ops[1];
+ memset(in_ops, 0, sizeof in_ops);
+ in_ops[0].kind = OPK_REG;
+ in_ops[0].cls = RC_INT;
+ in_ops[0].v.reg = 9; /* x9 */
+
+ u32 start = mc->pos(mc);
+ /* %0 = output (x7), %1 = input (x9). */
+ target->asm_block(target, "mov %x0, %x1",
+ outs, 1, out_ops, ins, 1, in_ops, NULL, 0);
+ u32 end = mc->pos(mc);
+ EXPECT(end - start == 4u, "smoke4: expected 4 bytes, got %u",
+ (end - start));
+ if (end - start == 4u) {
+ const Section* sec = obj_section_get(ob, text_sec);
+ u32 w = read_word_le(sec, start);
+ /* MOV X7, X9 ≡ ORR X7, XZR, X9
+ * sf=1, opc=01 (ORR), Rm=9, Rn=31, Rd=7
+ * word = 0xaa0903e7 */
+ EXPECT(w == 0xaa0903e7u,
+ "smoke4: mov x7, x9 = 0x%08x, want 0xaa0903e7", w);
+ }
+ }
+
+ cfree_compiler_free(cc);
+
+ if (g_fail) {
+ fprintf(stderr, "%d failure(s)\n", g_fail);
+ return 1;
+ }
+ printf("aa64_inline_test: ok\n");
+ return 0;
+}
diff --git a/test/test.mk b/test/test.mk
@@ -29,9 +29,9 @@
# parse_asm / cfree_disasm_iter_* are still stubs; the harness builds
# and runs end-to-end so the wiring stays exercised. See doc/ASM.md.
-.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
+.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
-test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-lib-deps
+test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-lib-deps
test-lex: bin
@CFREE=$(abspath $(BIN)) test/lex/run.sh
@@ -117,6 +117,20 @@ $(CG_BINDER_TEST_BIN): test/cg/binder_test.c $(LIB_AR)
@mkdir -p $(dir $@)
$(CC) $(DRIVER_CFLAGS) -Isrc test/cg/binder_test.c $(LIB_AR) -o $@
+# Phase-4b Track-C: aarch64 inline-asm backend unit test (doc/INLINEASM.md
+# §7.1 "Track C"). Drives aa_asm_block (CGTarget vtable) directly with
+# hand-rolled Operand arrays and asserts the emitted .text bytes match
+# the expected machine encoding. Independent of cg / parser — it gates
+# the per-arch template walker + per-mnemonic dispatch in isolation.
+AA64_INLINE_TEST_BIN = build/test/aa64_inline_test
+
+test-aa64-inline: $(AA64_INLINE_TEST_BIN)
+ $(AA64_INLINE_TEST_BIN)
+
+$(AA64_INLINE_TEST_BIN): test/arch/aa64_inline_test.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(DRIVER_CFLAGS) -Isrc test/arch/aa64_inline_test.c $(LIB_AR) -o $@
+
# Test harness binaries shared by test-elf, test-link, and test-cg.
# Declared as Make targets (not built by the run.sh scripts) so they pick
# up libcfree.a changes deterministically.