commit 5219fba169c77ac4b66bd51bd2764a1ba1e61f38
parent 75a1d2a8469928e951ed126582f3b499065ff695
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 09:38:38 -0700
asm/inline: track B — cg_inline_asm binder + opt recorder/replay
Implements cg_inline_asm constraint binding (r/=r/+r/=&r/i/m/0,
memory clobber, register clobbers, cc), w_asm_block recorder, and
IR_ASM_BLOCK replay path. Wrapped target receives bound Operand
arrays. Mock-target unit test covers each constraint kind.
Diffstat:
| M | src/cg/cg.c | | | 284 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
| M | src/opt/ir.h | | | 1 | + |
| M | src/opt/opt.c | | | 64 | +++++++++++++++++++++++++++++++++++++++++++++++++++++----------- |
| A | test/cg/binder_test.c | | | 496 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | test/test.mk | | | 18 | ++++++++++++++++-- |
5 files changed, 840 insertions(+), 23 deletions(-)
diff --git a/src/cg/cg.c b/src/cg/cg.c
@@ -1516,18 +1516,282 @@ void cg_set_loc(CG* g, SrcLoc loc) {
}
/* ============================================================
- * Inline asm — placeholder
- * ============================================================ */
+ * Inline asm — constraint binder (doc/INLINEASM.md §5).
+ *
+ * The parser pushed `nin` input SValues onto the value stack in declaration
+ * order (the Nth input is at the top). Outputs come back as fresh SValues
+ * that the parser assigns to its declared lvalues. The signature does not
+ * carry per-output Type info today; outputs that need a fresh register get
+ * an arch-default 64-bit int type. TODO(track-A): when the parser starts
+ * carrying output types alongside AsmConstraint, route them through here so
+ * RegClass and width are correct for FP/short outputs.
+ *
+ * Constraints handled:
+ * inputs : "r" (force into REG), "i" (must be IMM),
+ * "m" (materialize an INDIRECT lvalue),
+ * "0".."9" (matching: bind to out_ops[N].v.reg)
+ * outputs : "=r" (alloc fresh), "+r" (alloc fresh; expects a parallel
+ * matching input slot), "=&r" (early-clobber: alloc disjoint
+ * from any input reg)
+ * Clobbers:
+ * "memory" — spill all live RES_REG SValues so subsequent reads reload.
+ * register names — passed through to target->asm_block (the arch backend
+ * routes them through its call-clobber set).
+ * "cc" — silently ignored on aarch64 (NZCV is reserved across blocks). */
+
+/* Parse a leading non-negative decimal index from a constraint string.
+ * Returns -1 if the first character isn't a digit. */
+static int asm_parse_match_index(const char* s) {
+ if (!s || s[0] < '0' || s[0] > '9') return -1;
+ int n = 0;
+ for (const char* p = s; *p >= '0' && *p <= '9'; ++p) {
+ n = n * 10 + (*p - '0');
+ }
+ return n;
+}
+
+/* Skip leading "=&" / "=" / "+" modifier prefix and return a pointer past
+ * it. The remainder is the body letter ("r", "m", ...). */
+static const char* asm_constraint_body(const char* s) {
+ if (!s) return "";
+ if (s[0] == '=' && s[1] == '&') return s + 2;
+ if (s[0] == '=' || s[0] == '+' || s[0] == '&') return s + 1;
+ return s;
+}
+
+static int asm_is_early_clobber(const char* s) {
+ if (!s) return 0;
+ if (s[0] == '=' && s[1] == '&') return 1;
+ if (s[0] == '&') return 1;
+ return 0;
+}
void cg_inline_asm(CG* g, const char* tmpl, const AsmConstraint* outs, u32 nout,
const AsmConstraint* ins, u32 nin, const Sym* clobbers,
u32 nclob) {
- (void)tmpl;
- (void)outs;
- (void)nout;
- (void)ins;
- (void)nin;
- (void)clobbers;
- (void)nclob;
- compiler_panic(g->c, g->cur_loc, "cg_inline_asm: not in v1 slice");
+ CGTarget* T = g->target;
+ Heap* h = g->c->env->heap;
+ /* Default output type for the v1 binder. RC_INT, 64-bit. */
+ const Type* default_out_ty = type_prim(g->pool, TY_LLONG);
+
+ /* ---- pop inputs in reverse, store in declaration order ---- */
+ SValue* in_svs = NULL;
+ if (nin) {
+ in_svs = (SValue*)h->alloc(h, sizeof(SValue) * nin, _Alignof(SValue));
+ for (u32 i = 0; i < nin; ++i) {
+ u32 idx = nin - 1u - i;
+ in_svs[idx] = pop(g);
+ ensure_reg(g, &in_svs[idx]);
+ }
+ }
+
+ Operand* in_ops = NULL;
+ if (nin) {
+ in_ops = (Operand*)h->alloc(h, sizeof(Operand) * nin, _Alignof(Operand));
+ memset(in_ops, 0, sizeof(Operand) * nin);
+ }
+ Operand* out_ops = NULL;
+ if (nout) {
+ out_ops = (Operand*)h->alloc(h, sizeof(Operand) * nout, _Alignof(Operand));
+ memset(out_ops, 0, sizeof(Operand) * nout);
+ }
+ /* Tracks whether each out_ops[i] reg was freshly allocated (and should be
+ * pushed back as RES_REG owning that reg) vs. shared with an input that
+ * still owns the reg. */
+ u8* out_reg_owned = NULL;
+ if (nout) {
+ out_reg_owned = (u8*)h->alloc(h, nout, 1);
+ memset(out_reg_owned, 0, nout);
+ }
+
+ /* ---- Pass 1: allocate output regs that are NOT early-clobber. ----
+ * Early-clobber (=&r) outputs are allocated in pass 3 once input regs
+ * are known so the disjoint-set property is checkable. */
+ for (u32 i = 0; i < nout; ++i) {
+ const char* body = asm_constraint_body(outs[i].str);
+ if (asm_is_early_clobber(outs[i].str)) continue;
+ if (body[0] == 'r') {
+ Reg r = alloc_reg_or_spill(g, RC_INT, default_out_ty);
+ out_ops[i] = op_reg(r, default_out_ty);
+ out_reg_owned[i] = 1;
+ } else {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inline_asm: unsupported output constraint '%s'",
+ outs[i].str ? outs[i].str : "");
+ }
+ }
+
+ /* ---- Pass 2: materialize inputs per constraint. ----
+ * Matching constraints ("0".."9") need their referenced output's reg to
+ * already exist; non-early outputs satisfy that after pass 1. (An output
+ * referenced by a matching input must not itself be early-clobber — that
+ * combination is meaningless; we panic below if the parser produced it.) */
+ for (u32 i = 0; i < nin; ++i) {
+ const char* s = ins[i].str ? ins[i].str : "";
+ int matched = asm_parse_match_index(s);
+ if (matched >= 0) {
+ if ((u32)matched >= nout) {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inline_asm: matching constraint '%s' references "
+ "out-of-range output %d",
+ s, matched);
+ }
+ if (asm_is_early_clobber(outs[matched].str)) {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inline_asm: matching input '%s' references "
+ "early-clobber output =&r",
+ s);
+ }
+ /* Force input into the output's register. If the input is already an
+ * IMM or in a different reg, materialize via target->copy/load_imm
+ * into the bound output reg. The input SValue keeps its own reg
+ * (which we'll release at the end); the binding only needs the
+ * value to be present in out_ops[matched].v.reg before the asm runs. */
+ Operand bound = out_ops[matched];
+ ensure_reg(g, &in_svs[i]);
+ if (in_svs[i].op.kind == OPK_REG &&
+ in_svs[i].op.v.reg == bound.v.reg) {
+ /* Already in place. */
+ } else if (in_svs[i].op.kind == OPK_IMM) {
+ T->load_imm(T, bound, in_svs[i].op.v.imm);
+ } else {
+ Operand src = force_reg(g, &in_svs[i], sv_type(&in_svs[i]));
+ T->copy(T, bound, src);
+ }
+ in_ops[i] = bound;
+ continue;
+ }
+ if (s[0] == 'r') {
+ in_ops[i] = force_reg(g, &in_svs[i], sv_type(&in_svs[i]));
+ } else if (s[0] == 'i') {
+ if (in_svs[i].op.kind != OPK_IMM) {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inline_asm: 'i' constraint requires constant input");
+ }
+ in_ops[i] = in_svs[i].op;
+ } else if (s[0] == 'm') {
+ if (in_svs[i].op.kind == OPK_INDIRECT) {
+ in_ops[i] = in_svs[i].op;
+ } else if (is_lvalue(&in_svs[i].op)) {
+ const Type* lt = sv_type(&in_svs[i]);
+ const Type* pty = type_ptr(g->pool, lt ? lt : type_void(g->pool));
+ Reg r = alloc_reg_or_spill(g, RC_INT, pty);
+ Operand dst = op_reg(r, pty);
+ T->addr_of(T, dst, in_svs[i].op);
+ /* Replace the SValue's lvalue with an INDIRECT pointing at the
+ * freshly-loaded address; the new INDIRECT owns the base reg, so
+ * release() at the end of the block will free it. */
+ if (in_svs[i].op.kind == OPK_INDIRECT) {
+ T->free_reg(T, in_svs[i].op.v.ind.base, RC_INT);
+ }
+ in_svs[i].op = op_indirect(r, 0, lt);
+ in_svs[i].res = RES_REG;
+ in_ops[i] = in_svs[i].op;
+ } else {
+ compiler_panic(
+ g->c, g->cur_loc,
+ "cg_inline_asm: 'm' constraint requires an addressable operand");
+ }
+ } else {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inline_asm: unsupported input constraint '%s'", s);
+ }
+ }
+
+ /* ---- Pass 3: allocate early-clobber outputs (=&r) disjoint from inputs.
+ * The reg pool only hands out free regs, so any reg returned by alloc_reg
+ * is by construction not in use by any input materialized above. We loop
+ * to retry if the pool happens to recycle a reg the spill machinery just
+ * freed (none of the input materializers above call free_reg on input
+ * regs while inputs are still live, so a single alloc suffices in
+ * practice — but the loop documents the intent and gives a clean panic
+ * point). */
+ for (u32 i = 0; i < nout; ++i) {
+ if (!asm_is_early_clobber(outs[i].str)) continue;
+ const char* body = asm_constraint_body(outs[i].str);
+ if (body[0] != 'r') {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inline_asm: unsupported early-clobber constraint '%s'",
+ outs[i].str);
+ }
+ Reg r = alloc_reg_or_spill(g, RC_INT, default_out_ty);
+ /* Validate disjoint: walk inputs, collide-check. The pool guarantees
+ * uniqueness against currently-allocated regs, so this is belt-and-
+ * suspenders, but the panic gives a meaningful diagnostic if any
+ * future binder change breaks the invariant. */
+ for (u32 k = 0; k < nin; ++k) {
+ if (in_ops[k].kind == OPK_REG && in_ops[k].v.reg == r) {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inline_asm: early-clobber output collided with "
+ "input reg (binder bug)");
+ }
+ if (in_ops[k].kind == OPK_INDIRECT && in_ops[k].v.ind.base == r) {
+ compiler_panic(g->c, g->cur_loc,
+ "cg_inline_asm: early-clobber output collided with "
+ "input INDIRECT base (binder bug)");
+ }
+ }
+ out_ops[i] = op_reg(r, default_out_ty);
+ out_reg_owned[i] = 1;
+ }
+
+ /* ---- "memory" clobber: spill all live RES_REG SValues. ----
+ * Intern "memory" once per call; Sym equality is pointer-equal after
+ * interning. The remaining stack values become RES_SPILLED, so a later
+ * read goes through ensure_reg → reload_reg. */
+ Sym sym_memory = pool_intern_cstr(g->pool, "memory");
+ int has_memory_clobber = 0;
+ for (u32 i = 0; i < nclob; ++i) {
+ if (clobbers[i] == sym_memory) {
+ has_memory_clobber = 1;
+ break;
+ }
+ }
+ if (has_memory_clobber) {
+ for (u32 i = 0; i < g->sp; ++i) {
+ SValue* sv = &g->stack[i];
+ if (sv->res != RES_REG) continue;
+ u8 cls = class_of_sv(sv);
+ FrameSlot slot = take_spill_slot(g, cls);
+ Operand victim_reg = op_reg((Reg)reg_of_sv(sv), sv->type);
+ T->spill_reg(T, victim_reg, slot, mem_for_spill(g, sv));
+ T->free_reg(T, (Reg)reg_of_sv(sv), cls);
+ sv->spill_slot = slot;
+ sv->res = RES_SPILLED;
+ set_owned_reg(sv, (Reg)REG_NONE);
+ }
+ }
+
+ /* ---- Call the per-arch asm_block. ---- */
+ T->asm_block(T, tmpl, outs, nout, out_ops, ins, nin, in_ops, clobbers, nclob);
+
+ /* ---- Release input SValue resources. ----
+ * Inputs are consumed by the asm block. Their owned regs/slots return to
+ * the pool. Note: matching inputs that were copied into an output reg
+ * still own their original input reg — release frees that one; the
+ * output reg lives on through the pushed output SValue. */
+ for (u32 i = 0; i < nin; ++i) {
+ release(g, &in_svs[i]);
+ }
+
+ /* ---- Push outputs back as fresh SValues for the parser to assign. ----
+ * Each pushed SValue owns the freshly-allocated reg (RES_REG), so the
+ * parser's eventual cg_store on it will release the reg after consuming. */
+ for (u32 i = 0; i < nout; ++i) {
+ SValue sv = make_sv(out_ops[i], default_out_ty);
+ /* If the target overwrote out_ops[i] with a different kind (e.g. a
+ * memory location), make_sv already classified residency correctly. */
+ if (!out_reg_owned[i] && sv.res == RES_REG) {
+ /* Not owned by us — the value is borrowed from elsewhere. Treat as
+ * inherent to avoid double-free. (No production path produces this
+ * today, but the bookkeeping is explicit.) */
+ sv.res = RES_INHERENT;
+ }
+ push(g, sv);
+ }
+
+ if (in_svs) h->free(h, in_svs, sizeof(SValue) * nin);
+ if (in_ops) h->free(h, in_ops, sizeof(Operand) * nin);
+ if (out_ops) h->free(h, out_ops, sizeof(Operand) * nout);
+ if (out_reg_owned) h->free(h, out_reg_owned, nout);
}
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -173,6 +173,7 @@ typedef struct IRAsmAux {
AsmConstraint* ins;
Sym* clobbers;
Operand* out_ops; /* nout slots; the wrapped target may fill in REG location */
+ Operand* in_ops; /* nin slots; recorded by w_asm_block, xlat'd at replay */
u32 nout, nin, nclob;
} IRAsmAux;
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -694,16 +694,39 @@ static void w_asm_block(CGTarget* t, const char* tmpl,
const AsmConstraint* outs, u32 nout, Operand* out_ops,
const AsmConstraint* ins, u32 nin,
const Operand* in_ops, const Sym* clobbers, u32 nclob) {
- (void)tmpl;
- (void)outs;
- (void)nout;
- (void)out_ops;
- (void)ins;
- (void)nin;
- (void)in_ops;
- (void)clobbers;
- (void)nclob;
- panic_unsupported(impl_of(t), "asm_block");
+ OptImpl* o = impl_of(t);
+ Inst* in = rec(o, IR_ASM_BLOCK);
+ IRAsmAux* aux = arena_znew(o->f->arena, IRAsmAux);
+ /* Template strings reach us via the parser's interned string pool, which
+ * outlives the CG/Opt arenas. Storing the pointer is safe; copy
+ * defensively into the IR arena anyway so the IR is self-contained. */
+ if (tmpl) {
+ size_t tl = 0;
+ while (tmpl[tl]) ++tl;
+ aux->tmpl = arena_strdup(o->f->arena, tmpl, tl);
+ } else {
+ aux->tmpl = NULL;
+ }
+ aux->nout = nout;
+ aux->nin = nin;
+ aux->nclob = nclob;
+ if (nout) {
+ aux->outs = arena_array(o->f->arena, AsmConstraint, nout);
+ memcpy(aux->outs, outs, nout * sizeof *outs);
+ aux->out_ops = arena_array(o->f->arena, Operand, nout);
+ memcpy(aux->out_ops, out_ops, nout * sizeof *out_ops);
+ }
+ if (nin) {
+ aux->ins = arena_array(o->f->arena, AsmConstraint, nin);
+ memcpy(aux->ins, ins, nin * sizeof *ins);
+ aux->in_ops = arena_array(o->f->arena, Operand, nin);
+ memcpy(aux->in_ops, in_ops, nin * sizeof *in_ops);
+ }
+ if (nclob) {
+ aux->clobbers = arena_array(o->f->arena, Sym, nclob);
+ memcpy(aux->clobbers, clobbers, nclob * sizeof *clobbers);
+ }
+ in->extra.aux = aux;
}
static void w_set_loc(CGTarget* t, SrcLoc loc) {
@@ -812,8 +835,27 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
case IR_PARAM_DECL:
case IR_PHI:
case IR_CONDBR:
- case IR_ASM_BLOCK:
break;
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ Operand* in_ops_ = NULL;
+ Operand* out_ops_ = NULL;
+ if (aux->nin) {
+ in_ops_ = arena_array(r->o->f->arena, Operand, aux->nin);
+ for (u32 k = 0; k < aux->nin; ++k) {
+ in_ops_[k] = xlat_op(r, aux->in_ops[k]);
+ }
+ }
+ if (aux->nout) {
+ out_ops_ = arena_array(r->o->f->arena, Operand, aux->nout);
+ for (u32 k = 0; k < aux->nout; ++k) {
+ out_ops_[k] = xlat_op(r, aux->out_ops[k]);
+ }
+ }
+ w->asm_block(w, aux->tmpl, aux->outs, aux->nout, out_ops_, aux->ins,
+ aux->nin, in_ops_, aux->clobbers, aux->nclob);
+ break;
+ }
case IR_LOAD_IMM: {
Operand dst = xlat_op(r, in->opnds[0]);
w->load_imm(w, dst, in->extra.imm);
diff --git a/test/cg/binder_test.c b/test/cg/binder_test.c
@@ -0,0 +1,496 @@
+/* Unit test for cg_inline_asm — the constraint binder (Track B of
+ * doc/INLINEASM.md). Builds a Compiler with a stand-in CGTarget that
+ * records every operand the binder hands to asm_block, then asserts the
+ * binding shape for each constraint kind:
+ *
+ * "r" — input forced to OPK_REG.
+ * "=r" — output gets a fresh REG, pushed back as an SValue.
+ * "+r" — output reg is the same as the matching input slot's reg.
+ * "=&r" — output reg is disjoint from any input reg.
+ * "i" — input must be OPK_IMM; passes through.
+ * "m" — addressable lvalue → OPK_INDIRECT in the bound input.
+ * "0"..."N" — matching input bound to out_ops[N].v.reg.
+ * "memory" — every live RES_REG SValue on the CG stack is spilled
+ * via target->spill_reg before asm_block fires.
+ * register-name — passed straight through in the clobbers array.
+ * "cc" — accepted-and-dropped on the binder side (still appears
+ * in the clobbers array we forward — the arch backend
+ * handles the no-op).
+ *
+ * The mock target is the smallest thing that compiles: it hands out reg
+ * ids 1, 2, 3, ... from a tiny pool, refuses to do real codegen, and
+ * appends every call into a log buffer the test asserts against.
+ *
+ * Built standalone (no cg-runner dependency) so the test runs without
+ * the JIT / link harness. Wired into test/test.mk as a separate target
+ * (test-cg-binder). */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cfree.h>
+
+#include "abi/abi.h"
+#include "arch/arch.h"
+#include "cg/cg.h"
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "type/type.h"
+
+/* ---- host glue ------------------------------------------------------- */
+
+static void* h_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void h_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+/* ---- mock CGTarget --------------------------------------------------- */
+
+#define MOCK_LOG_CAP 4096u
+#define MOCK_REG_CAP 16u
+
+typedef struct MockTarget {
+ CGTarget base;
+
+ /* Tiny reg pool: hand out ids in [1, MOCK_REG_CAP]. 0 is "free", 1 is
+ * "in use". Class is ignored — the binder asks for RC_INT throughout. */
+ u8 in_use[MOCK_REG_CAP + 1u];
+
+ /* Spill-slot id counter; binder doesn't care about layout, only id
+ * uniqueness. */
+ FrameSlot next_slot;
+
+ /* Recorded asm_block call (last one wins; the binder only fires once
+ * per cg_inline_asm). */
+ int asm_called;
+ const char* tmpl;
+ u32 nout, nin, nclob;
+ Operand out_ops[8];
+ Operand in_ops[8];
+ Sym clobbers[8];
+
+ /* Log of side effects: spills, copies, load_imms, addr_ofs, free_regs.
+ * Each entry is a one-line summary; the test scans for substrings. */
+ char log[MOCK_LOG_CAP];
+ u32 log_len;
+} MockTarget;
+
+static void mock_logf(MockTarget* m, const char* fmt, ...) {
+ if (m->log_len >= MOCK_LOG_CAP - 1u) return;
+ va_list ap;
+ va_start(ap, fmt);
+ int n = vsnprintf(m->log + m->log_len, MOCK_LOG_CAP - m->log_len, fmt, ap);
+ va_end(ap);
+ if (n > 0) m->log_len += (u32)n;
+}
+
+static Reg m_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
+ (void)cls;
+ (void)ty;
+ MockTarget* m = (MockTarget*)t;
+ for (u32 i = 1; i <= MOCK_REG_CAP; ++i) {
+ if (!m->in_use[i]) {
+ m->in_use[i] = 1;
+ return (Reg)i;
+ }
+ }
+ return (Reg)REG_NONE;
+}
+
+static void m_free_reg(CGTarget* t, Reg r, RegClass cls) {
+ (void)cls;
+ MockTarget* m = (MockTarget*)t;
+ if (r != (Reg)REG_NONE && r <= MOCK_REG_CAP) m->in_use[r] = 0;
+ mock_logf(m, "free_reg r%u\n", (unsigned)r);
+}
+
+static FrameSlot m_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+ (void)d;
+ MockTarget* m = (MockTarget*)t;
+ return ++m->next_slot;
+}
+
+static void m_spill_reg(CGTarget* t, Operand src, FrameSlot s, MemAccess ma) {
+ (void)ma;
+ MockTarget* m = (MockTarget*)t;
+ mock_logf(m, "spill_reg r%u -> slot %u\n", (unsigned)src.v.reg, (unsigned)s);
+}
+
+static void m_reload_reg(CGTarget* t, Operand dst, FrameSlot s, MemAccess ma) {
+ (void)ma;
+ MockTarget* m = (MockTarget*)t;
+ mock_logf(m, "reload_reg slot %u -> r%u\n", (unsigned)s, (unsigned)dst.v.reg);
+}
+
+static void m_load_imm(CGTarget* t, Operand dst, i64 imm) {
+ MockTarget* m = (MockTarget*)t;
+ mock_logf(m, "load_imm r%u = %lld\n", (unsigned)dst.v.reg, (long long)imm);
+}
+
+static void m_copy(CGTarget* t, Operand dst, Operand src) {
+ MockTarget* m = (MockTarget*)t;
+ mock_logf(m, "copy r%u <- r%u\n", (unsigned)dst.v.reg, (unsigned)src.v.reg);
+}
+
+static void m_addr_of(CGTarget* t, Operand dst, Operand lv) {
+ MockTarget* m = (MockTarget*)t;
+ unsigned src_id = 0;
+ switch (lv.kind) {
+ case OPK_LOCAL: src_id = (unsigned)lv.v.frame_slot; break;
+ case OPK_GLOBAL: src_id = (unsigned)lv.v.global.sym; break;
+ case OPK_INDIRECT: src_id = (unsigned)lv.v.ind.base; break;
+ default: src_id = 0;
+ }
+ mock_logf(m, "addr_of r%u <- kind=%u id=%u\n", (unsigned)dst.v.reg,
+ (unsigned)lv.kind, src_id);
+}
+
+static void m_set_loc(CGTarget* t, SrcLoc loc) {
+ (void)t;
+ (void)loc;
+}
+
+static void m_func_begin(CGTarget* t, const CGFuncDesc* fd) {
+ (void)t;
+ (void)fd;
+}
+static void m_func_end(CGTarget* t) { (void)t; }
+static void m_param(CGTarget* t, const CGParamDesc* d) {
+ (void)t;
+ (void)d;
+}
+
+static void m_asm_block(CGTarget* t, const char* tmpl,
+ const AsmConstraint* outs, u32 nout, Operand* out_ops,
+ const AsmConstraint* ins, u32 nin,
+ const Operand* in_ops, const Sym* clobbers, u32 nclob) {
+ (void)outs;
+ (void)ins;
+ MockTarget* m = (MockTarget*)t;
+ m->asm_called = 1;
+ m->tmpl = tmpl;
+ m->nout = nout;
+ m->nin = nin;
+ m->nclob = nclob;
+ if (nout > 8) nout = 8;
+ if (nin > 8) nin = 8;
+ if (nclob > 8) nclob = 8;
+ for (u32 i = 0; i < nout; ++i) m->out_ops[i] = out_ops[i];
+ for (u32 i = 0; i < nin; ++i) m->in_ops[i] = in_ops[i];
+ for (u32 i = 0; i < nclob; ++i) m->clobbers[i] = clobbers[i];
+ mock_logf(m, "asm_block tmpl=%s nout=%u nin=%u nclob=%u\n",
+ tmpl ? tmpl : "(null)", (unsigned)m->nout, (unsigned)m->nin,
+ (unsigned)m->nclob);
+}
+
+static void mock_target_init(MockTarget* m, Compiler* c) {
+ memset(m, 0, sizeof *m);
+ m->base.c = c;
+ m->base.alloc_reg = m_alloc_reg;
+ m->base.free_reg = m_free_reg;
+ m->base.frame_slot = m_frame_slot;
+ m->base.param = m_param;
+ m->base.spill_reg = m_spill_reg;
+ m->base.reload_reg = m_reload_reg;
+ m->base.load_imm = m_load_imm;
+ m->base.copy = m_copy;
+ m->base.addr_of = m_addr_of;
+ m->base.func_begin = m_func_begin;
+ m->base.func_end = m_func_end;
+ m->base.set_loc = m_set_loc;
+ m->base.asm_block = m_asm_block;
+}
+
+/* ---- per-test compiler scaffold ------------------------------------- */
+
+typedef struct TestCtx {
+ Compiler cc;
+ Compiler* c;
+ MockTarget mt;
+ CG* g;
+ const Type* i64_ty;
+} TestCtx;
+
+static void tc_init(TestCtx* tc) {
+ CfreeEnv env;
+ memset(&env, 0, sizeof env);
+ env.heap = &g_heap;
+ env.diag = &g_diag;
+ env.now = -1;
+
+ CfreeTarget tgt;
+ memset(&tgt, 0, sizeof tgt);
+ tgt.arch = CFREE_ARCH_ARM_64;
+ tgt.os = CFREE_OS_LINUX;
+ tgt.obj = CFREE_OBJ_ELF;
+ tgt.ptr_size = 8;
+ tgt.ptr_align = 8;
+
+ /* compiler_init wants the env on the heap-pointer side; stash it. */
+ static CfreeEnv s_env_stash;
+ s_env_stash = env;
+ compiler_init(&tc->cc, tgt, &s_env_stash);
+ tc->c = &tc->cc;
+
+ mock_target_init(&tc->mt, tc->c);
+ tc->g = cg_new(tc->c, &tc->mt.base, NULL);
+ tc->i64_ty = type_prim(tc->c->global, TY_LLONG);
+}
+
+static void tc_fini(TestCtx* tc) {
+ cg_free(tc->g);
+ compiler_fini(&tc->cc);
+}
+
+/* ---- assertion helpers ----------------------------------------------- */
+
+static int g_fails = 0;
+static int g_cases = 0;
+
+#define EXPECT(cond, ...) do { \
+ ++g_cases; \
+ if (!(cond)) { \
+ ++g_fails; \
+ fprintf(stderr, "FAIL %s:%d: %s\n", __FILE__, __LINE__, #cond); \
+ fprintf(stderr, " "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ } \
+} while (0)
+
+static int log_contains(const MockTarget* m, const char* needle) {
+ return strstr(m->log, needle) != NULL;
+}
+
+/* ---- test cases ------------------------------------------------------ */
+
+static void test_r_in(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ /* asm("nop" :: "r"(42)) */
+ cg_push_int(tc.g, 42, tc.i64_ty);
+ AsmConstraint ins[1] = {{"r", ASM_IN, {0,0,0}}};
+ cg_inline_asm(tc.g, "nop", NULL, 0, ins, 1, NULL, 0);
+ EXPECT(tc.mt.asm_called, "asm_block was not invoked");
+ EXPECT(tc.mt.nin == 1, "nin=%u", tc.mt.nin);
+ EXPECT(tc.mt.in_ops[0].kind == OPK_REG, "in_ops[0].kind=%u",
+ tc.mt.in_ops[0].kind);
+ /* The IMM was materialized into a freshly-allocated reg; load_imm shows it. */
+ EXPECT(log_contains(&tc.mt, "load_imm"), "missing load_imm in log:\n%s",
+ tc.mt.log);
+ tc_fini(&tc);
+}
+
+static void test_eq_r_out(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ /* asm("mov %0, #1" : "=r"(x)) — pushes an output SValue back. */
+ AsmConstraint outs[1] = {{"=r", ASM_OUT, {0,0,0}}};
+ cg_inline_asm(tc.g, "mov %0, #1", outs, 1, NULL, 0, NULL, 0);
+ EXPECT(tc.mt.asm_called, "asm_block was not invoked");
+ EXPECT(tc.mt.nout == 1, "nout=%u", tc.mt.nout);
+ EXPECT(tc.mt.out_ops[0].kind == OPK_REG, "out_ops[0].kind=%u",
+ tc.mt.out_ops[0].kind);
+ EXPECT(tc.mt.out_ops[0].v.reg != (Reg)REG_NONE, "out reg should be allocated");
+ tc_fini(&tc);
+}
+
+static void test_plus_r_inout(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ /* +r is GCC's "use this reg as both input and output". The parser
+ * convention this binder honors: emit one output with =r-style behavior
+ * and one matching input "0" with the input value to seed the reg. */
+ cg_push_int(tc.g, 7, tc.i64_ty);
+ AsmConstraint outs[1] = {{"+r", ASM_INOUT, {0,0,0}}};
+ AsmConstraint ins[1] = {{"0", ASM_IN, {0,0,0}}};
+ cg_inline_asm(tc.g, "add %0, %0, #1", outs, 1, ins, 1, NULL, 0);
+ EXPECT(tc.mt.nout == 1 && tc.mt.nin == 1, "nout/nin");
+ EXPECT(tc.mt.out_ops[0].kind == OPK_REG, "out reg");
+ EXPECT(tc.mt.in_ops[0].kind == OPK_REG, "in reg");
+ EXPECT(tc.mt.out_ops[0].v.reg == tc.mt.in_ops[0].v.reg,
+ "matching constraint should bind to same reg (out=%u in=%u)",
+ (unsigned)tc.mt.out_ops[0].v.reg, (unsigned)tc.mt.in_ops[0].v.reg);
+ tc_fini(&tc);
+}
+
+static void test_eq_amp_r_early_clobber(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ /* asm("..." : "=&r"(x) : "r"(y)) — output reg must differ from input reg. */
+ cg_push_int(tc.g, 5, tc.i64_ty);
+ AsmConstraint outs[1] = {{"=&r", ASM_OUT, {0,0,0}}};
+ AsmConstraint ins[1] = {{"r", ASM_IN, {0,0,0}}};
+ cg_inline_asm(tc.g, "tmpl", outs, 1, ins, 1, NULL, 0);
+ EXPECT(tc.mt.out_ops[0].kind == OPK_REG && tc.mt.in_ops[0].kind == OPK_REG,
+ "REGs expected");
+ EXPECT(tc.mt.out_ops[0].v.reg != tc.mt.in_ops[0].v.reg,
+ "early-clobber should be disjoint (out=%u in=%u)",
+ (unsigned)tc.mt.out_ops[0].v.reg, (unsigned)tc.mt.in_ops[0].v.reg);
+ tc_fini(&tc);
+}
+
+static void test_i_constant(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ cg_push_int(tc.g, 99, tc.i64_ty);
+ AsmConstraint ins[1] = {{"i", ASM_IN, {0,0,0}}};
+ cg_inline_asm(tc.g, "tmpl", NULL, 0, ins, 1, NULL, 0);
+ EXPECT(tc.mt.in_ops[0].kind == OPK_IMM, "in kind=%u", tc.mt.in_ops[0].kind);
+ EXPECT(tc.mt.in_ops[0].v.imm == 99, "in imm=%lld",
+ (long long)tc.mt.in_ops[0].v.imm);
+ /* No load_imm (the binder forwards the IMM unchanged). */
+ EXPECT(!log_contains(&tc.mt, "load_imm"),
+ "'i' should not load_imm, log:\n%s", tc.mt.log);
+ tc_fini(&tc);
+}
+
+static void test_m_memory_lvalue(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ /* Push a local lvalue; "m" should materialize it into OPK_INDIRECT
+ * via target->addr_of. */
+ FrameSlotDesc fsd;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.size = 8;
+ fsd.align = 8;
+ fsd.kind = FS_LOCAL;
+ FrameSlot s = cg_local(tc.g, &fsd);
+ /* Use the type-aware push path. We declare it via prototype: */
+ void cg_push_local_typed(CG*, FrameSlot, const Type*);
+ cg_push_local_typed(tc.g, s, tc.i64_ty);
+ AsmConstraint ins[1] = {{"m", ASM_IN, {0,0,0}}};
+ cg_inline_asm(tc.g, "ldr w0, %0", NULL, 0, ins, 1, NULL, 0);
+ EXPECT(tc.mt.in_ops[0].kind == OPK_INDIRECT, "in kind=%u",
+ tc.mt.in_ops[0].kind);
+ EXPECT(log_contains(&tc.mt, "addr_of"),
+ "expected addr_of in log:\n%s", tc.mt.log);
+ tc_fini(&tc);
+}
+
+static void test_matching_input(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ /* Output =r at index 0; input "0" should bind to its reg. */
+ cg_push_int(tc.g, 11, tc.i64_ty);
+ AsmConstraint outs[1] = {{"=r", ASM_OUT, {0,0,0}}};
+ AsmConstraint ins[1] = {{"0", ASM_IN, {0,0,0}}};
+ cg_inline_asm(tc.g, "tmpl", outs, 1, ins, 1, NULL, 0);
+ EXPECT(tc.mt.out_ops[0].v.reg == tc.mt.in_ops[0].v.reg,
+ "matching '0' input should reuse out reg (out=%u in=%u)",
+ (unsigned)tc.mt.out_ops[0].v.reg, (unsigned)tc.mt.in_ops[0].v.reg);
+ tc_fini(&tc);
+}
+
+static void test_memory_clobber_spills_live_regs(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ /* Push a LOCAL lvalue, load it into a reg via cg_load — that leaves a
+ * live RES_REG SValue at the bottom of the stack. Then call asm with a
+ * "memory" clobber and verify the live reg got spilled before the
+ * asm_block fired. */
+ FrameSlotDesc fsd;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.size = 8;
+ fsd.align = 8;
+ fsd.kind = FS_LOCAL;
+ FrameSlot s = cg_local(tc.g, &fsd);
+ void cg_push_local_typed(CG*, FrameSlot, const Type*);
+ cg_push_local_typed(tc.g, s, tc.i64_ty);
+ /* Need a load implementation on the mock to promote LOCAL → REG.
+ * The mock doesn't implement target->load — instead push an immediate
+ * to get a REG-resident SValue without calling cg_load. */
+ cg_push_int(tc.g, 0, tc.i64_ty); /* IMM, not REG, won't be a spill victim */
+
+ /* Force a REG-resident SValue at the bottom by pushing an int and
+ * promoting via cg_inline_asm itself — easier: skip this complexity and
+ * directly observe spill via a real reg-resident value built by =r
+ * output being pushed back. */
+ AsmConstraint outs1[1] = {{"=r", ASM_OUT, {0,0,0}}};
+ cg_inline_asm(tc.g, "produce", outs1, 1, NULL, 0, NULL, 0);
+ /* Now the stack has a REG-resident SValue from the produced output.
+ * Reset the log before the second call so we can scan for spill_reg
+ * specifically caused by "memory". */
+ tc.mt.log_len = 0;
+ tc.mt.log[0] = '\0';
+
+ Sym mem_sym = pool_intern_cstr(tc.c->global, "memory");
+ Sym clobs[1] = {mem_sym};
+ cg_inline_asm(tc.g, "barrier", NULL, 0, NULL, 0, clobs, 1);
+ EXPECT(log_contains(&tc.mt, "spill_reg"),
+ "expected spill_reg from memory clobber, log:\n%s", tc.mt.log);
+ tc_fini(&tc);
+}
+
+static void test_register_clobber_passthrough(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Sym x0_sym = pool_intern_cstr(tc.c->global, "x0");
+ Sym clobs[1] = {x0_sym};
+ cg_inline_asm(tc.g, "tmpl", NULL, 0, NULL, 0, clobs, 1);
+ EXPECT(tc.mt.nclob == 1, "nclob=%u", tc.mt.nclob);
+ EXPECT(tc.mt.clobbers[0] == x0_sym, "register clobber not forwarded");
+ tc_fini(&tc);
+}
+
+static void test_cc_clobber_silent(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ /* "cc" should not cause spills; the binder forwards it but does no
+ * special work. (Arch backends drop it on aarch64.) */
+ Sym cc_sym = pool_intern_cstr(tc.c->global, "cc");
+ Sym clobs[1] = {cc_sym};
+ /* Arrange a live REG-resident SValue first; verify it is NOT spilled. */
+ AsmConstraint outs1[1] = {{"=r", ASM_OUT, {0,0,0}}};
+ cg_inline_asm(tc.g, "produce", outs1, 1, NULL, 0, NULL, 0);
+ tc.mt.log_len = 0;
+ tc.mt.log[0] = '\0';
+ cg_inline_asm(tc.g, "tmpl", NULL, 0, NULL, 0, clobs, 1);
+ EXPECT(!log_contains(&tc.mt, "spill_reg"),
+ "'cc' must not trigger spills, log:\n%s", tc.mt.log);
+ EXPECT(tc.mt.nclob == 1, "nclob=%u", tc.mt.nclob);
+ tc_fini(&tc);
+}
+
+int main(void) {
+ test_r_in();
+ test_eq_r_out();
+ test_plus_r_inout();
+ test_eq_amp_r_early_clobber();
+ test_i_constant();
+ test_m_memory_lvalue();
+ test_matching_input();
+ test_memory_clobber_spills_live_regs();
+ test_register_clobber_passthrough();
+ test_cc_clobber_silent();
+
+ fprintf(stderr, "binder_test: %d cases, %d failures\n", g_cases, g_fails);
+ return g_fails ? 1 : 0;
+}
diff --git a/test/test.mk b/test/test.mk
@@ -29,9 +29,9 @@
# parse_asm / cfree_disasm_iter_* are still stubs; the harness builds
# and runs end-to-end so the wiring stays exercised. See doc/ASM.md.
-.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
+.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
-test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-lib-deps
+test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-lib-deps
test-lex: bin
@CFREE=$(abspath $(BIN)) test/lex/run.sh
@@ -103,6 +103,20 @@ $(AA64_ISA_TEST_BIN): test/arch/aa64_isa_test.c $(LIB_AR)
@mkdir -p $(dir $@)
$(CC) $(DRIVER_CFLAGS) -Isrc test/arch/aa64_isa_test.c $(LIB_AR) -o $@
+# cg_inline_asm constraint binder unit test (doc/INLINEASM.md Track B).
+# Drives cg_inline_asm against a stand-in CGTarget that records every
+# operand handed to asm_block; covers r/=r/+r/=&r/i/m/0, the "memory"
+# clobber spill behaviour, register-name passthrough, and "cc" no-op.
+# Internal cg/ + arch/ surface — needs -Isrc.
+CG_BINDER_TEST_BIN = build/test/cg_binder_test
+
+test-cg-binder: $(CG_BINDER_TEST_BIN)
+ $(CG_BINDER_TEST_BIN)
+
+$(CG_BINDER_TEST_BIN): test/cg/binder_test.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(DRIVER_CFLAGS) -Isrc test/cg/binder_test.c $(LIB_AR) -o $@
+
# Test harness binaries shared by test-elf, test-link, and test-cg.
# Declared as Make targets (not built by the run.sh scripts) so they pick
# up libcfree.a changes deterministically.