kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 5219fba169c77ac4b66bd51bd2764a1ba1e61f38
parent 75a1d2a8469928e951ed126582f3b499065ff695
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 11 May 2026 09:38:38 -0700

asm/inline: track B — cg_inline_asm binder + opt recorder/replay

Implements cg_inline_asm constraint binding (r/=r/+r/=&r/i/m/0,
memory clobber, register clobbers, cc), w_asm_block recorder, and
IR_ASM_BLOCK replay path. Wrapped target receives bound Operand
arrays. Mock-target unit test covers each constraint kind.

Diffstat:
Msrc/cg/cg.c | 284++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/opt/ir.h | 1+
Msrc/opt/opt.c | 64+++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Atest/cg/binder_test.c | 496+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/test.mk | 18++++++++++++++++--
5 files changed, 840 insertions(+), 23 deletions(-)

diff --git a/src/cg/cg.c b/src/cg/cg.c @@ -1516,18 +1516,282 @@ void cg_set_loc(CG* g, SrcLoc loc) { } /* ============================================================ - * Inline asm — placeholder - * ============================================================ */ + * Inline asm — constraint binder (doc/INLINEASM.md §5). + * + * The parser pushed `nin` input SValues onto the value stack in declaration + * order (the Nth input is at the top). Outputs come back as fresh SValues + * that the parser assigns to its declared lvalues. The signature does not + * carry per-output Type info today; outputs that need a fresh register get + * an arch-default 64-bit int type. TODO(track-A): when the parser starts + * carrying output types alongside AsmConstraint, route them through here so + * RegClass and width are correct for FP/short outputs. + * + * Constraints handled: + * inputs : "r" (force into REG), "i" (must be IMM), + * "m" (materialize an INDIRECT lvalue), + * "0".."9" (matching: bind to out_ops[N].v.reg) + * outputs : "=r" (alloc fresh), "+r" (alloc fresh; expects a parallel + * matching input slot), "=&r" (early-clobber: alloc disjoint + * from any input reg) + * Clobbers: + * "memory" — spill all live RES_REG SValues so subsequent reads reload. + * register names — passed through to target->asm_block (the arch backend + * routes them through its call-clobber set). + * "cc" — silently ignored on aarch64 (NZCV is reserved across blocks). */ + +/* Parse a leading non-negative decimal index from a constraint string. + * Returns -1 if the first character isn't a digit. */ +static int asm_parse_match_index(const char* s) { + if (!s || s[0] < '0' || s[0] > '9') return -1; + int n = 0; + for (const char* p = s; *p >= '0' && *p <= '9'; ++p) { + n = n * 10 + (*p - '0'); + } + return n; +} + +/* Skip leading "=&" / "=" / "+" modifier prefix and return a pointer past + * it. The remainder is the body letter ("r", "m", ...). */ +static const char* asm_constraint_body(const char* s) { + if (!s) return ""; + if (s[0] == '=' && s[1] == '&') return s + 2; + if (s[0] == '=' || s[0] == '+' || s[0] == '&') return s + 1; + return s; +} + +static int asm_is_early_clobber(const char* s) { + if (!s) return 0; + if (s[0] == '=' && s[1] == '&') return 1; + if (s[0] == '&') return 1; + return 0; +} void cg_inline_asm(CG* g, const char* tmpl, const AsmConstraint* outs, u32 nout, const AsmConstraint* ins, u32 nin, const Sym* clobbers, u32 nclob) { - (void)tmpl; - (void)outs; - (void)nout; - (void)ins; - (void)nin; - (void)clobbers; - (void)nclob; - compiler_panic(g->c, g->cur_loc, "cg_inline_asm: not in v1 slice"); + CGTarget* T = g->target; + Heap* h = g->c->env->heap; + /* Default output type for the v1 binder. RC_INT, 64-bit. */ + const Type* default_out_ty = type_prim(g->pool, TY_LLONG); + + /* ---- pop inputs in reverse, store in declaration order ---- */ + SValue* in_svs = NULL; + if (nin) { + in_svs = (SValue*)h->alloc(h, sizeof(SValue) * nin, _Alignof(SValue)); + for (u32 i = 0; i < nin; ++i) { + u32 idx = nin - 1u - i; + in_svs[idx] = pop(g); + ensure_reg(g, &in_svs[idx]); + } + } + + Operand* in_ops = NULL; + if (nin) { + in_ops = (Operand*)h->alloc(h, sizeof(Operand) * nin, _Alignof(Operand)); + memset(in_ops, 0, sizeof(Operand) * nin); + } + Operand* out_ops = NULL; + if (nout) { + out_ops = (Operand*)h->alloc(h, sizeof(Operand) * nout, _Alignof(Operand)); + memset(out_ops, 0, sizeof(Operand) * nout); + } + /* Tracks whether each out_ops[i] reg was freshly allocated (and should be + * pushed back as RES_REG owning that reg) vs. shared with an input that + * still owns the reg. */ + u8* out_reg_owned = NULL; + if (nout) { + out_reg_owned = (u8*)h->alloc(h, nout, 1); + memset(out_reg_owned, 0, nout); + } + + /* ---- Pass 1: allocate output regs that are NOT early-clobber. ---- + * Early-clobber (=&r) outputs are allocated in pass 3 once input regs + * are known so the disjoint-set property is checkable. */ + for (u32 i = 0; i < nout; ++i) { + const char* body = asm_constraint_body(outs[i].str); + if (asm_is_early_clobber(outs[i].str)) continue; + if (body[0] == 'r') { + Reg r = alloc_reg_or_spill(g, RC_INT, default_out_ty); + out_ops[i] = op_reg(r, default_out_ty); + out_reg_owned[i] = 1; + } else { + compiler_panic(g->c, g->cur_loc, + "cg_inline_asm: unsupported output constraint '%s'", + outs[i].str ? outs[i].str : ""); + } + } + + /* ---- Pass 2: materialize inputs per constraint. ---- + * Matching constraints ("0".."9") need their referenced output's reg to + * already exist; non-early outputs satisfy that after pass 1. (An output + * referenced by a matching input must not itself be early-clobber — that + * combination is meaningless; we panic below if the parser produced it.) */ + for (u32 i = 0; i < nin; ++i) { + const char* s = ins[i].str ? ins[i].str : ""; + int matched = asm_parse_match_index(s); + if (matched >= 0) { + if ((u32)matched >= nout) { + compiler_panic(g->c, g->cur_loc, + "cg_inline_asm: matching constraint '%s' references " + "out-of-range output %d", + s, matched); + } + if (asm_is_early_clobber(outs[matched].str)) { + compiler_panic(g->c, g->cur_loc, + "cg_inline_asm: matching input '%s' references " + "early-clobber output =&r", + s); + } + /* Force input into the output's register. If the input is already an + * IMM or in a different reg, materialize via target->copy/load_imm + * into the bound output reg. The input SValue keeps its own reg + * (which we'll release at the end); the binding only needs the + * value to be present in out_ops[matched].v.reg before the asm runs. */ + Operand bound = out_ops[matched]; + ensure_reg(g, &in_svs[i]); + if (in_svs[i].op.kind == OPK_REG && + in_svs[i].op.v.reg == bound.v.reg) { + /* Already in place. */ + } else if (in_svs[i].op.kind == OPK_IMM) { + T->load_imm(T, bound, in_svs[i].op.v.imm); + } else { + Operand src = force_reg(g, &in_svs[i], sv_type(&in_svs[i])); + T->copy(T, bound, src); + } + in_ops[i] = bound; + continue; + } + if (s[0] == 'r') { + in_ops[i] = force_reg(g, &in_svs[i], sv_type(&in_svs[i])); + } else if (s[0] == 'i') { + if (in_svs[i].op.kind != OPK_IMM) { + compiler_panic(g->c, g->cur_loc, + "cg_inline_asm: 'i' constraint requires constant input"); + } + in_ops[i] = in_svs[i].op; + } else if (s[0] == 'm') { + if (in_svs[i].op.kind == OPK_INDIRECT) { + in_ops[i] = in_svs[i].op; + } else if (is_lvalue(&in_svs[i].op)) { + const Type* lt = sv_type(&in_svs[i]); + const Type* pty = type_ptr(g->pool, lt ? lt : type_void(g->pool)); + Reg r = alloc_reg_or_spill(g, RC_INT, pty); + Operand dst = op_reg(r, pty); + T->addr_of(T, dst, in_svs[i].op); + /* Replace the SValue's lvalue with an INDIRECT pointing at the + * freshly-loaded address; the new INDIRECT owns the base reg, so + * release() at the end of the block will free it. */ + if (in_svs[i].op.kind == OPK_INDIRECT) { + T->free_reg(T, in_svs[i].op.v.ind.base, RC_INT); + } + in_svs[i].op = op_indirect(r, 0, lt); + in_svs[i].res = RES_REG; + in_ops[i] = in_svs[i].op; + } else { + compiler_panic( + g->c, g->cur_loc, + "cg_inline_asm: 'm' constraint requires an addressable operand"); + } + } else { + compiler_panic(g->c, g->cur_loc, + "cg_inline_asm: unsupported input constraint '%s'", s); + } + } + + /* ---- Pass 3: allocate early-clobber outputs (=&r) disjoint from inputs. + * The reg pool only hands out free regs, so any reg returned by alloc_reg + * is by construction not in use by any input materialized above. We loop + * to retry if the pool happens to recycle a reg the spill machinery just + * freed (none of the input materializers above call free_reg on input + * regs while inputs are still live, so a single alloc suffices in + * practice — but the loop documents the intent and gives a clean panic + * point). */ + for (u32 i = 0; i < nout; ++i) { + if (!asm_is_early_clobber(outs[i].str)) continue; + const char* body = asm_constraint_body(outs[i].str); + if (body[0] != 'r') { + compiler_panic(g->c, g->cur_loc, + "cg_inline_asm: unsupported early-clobber constraint '%s'", + outs[i].str); + } + Reg r = alloc_reg_or_spill(g, RC_INT, default_out_ty); + /* Validate disjoint: walk inputs, collide-check. The pool guarantees + * uniqueness against currently-allocated regs, so this is belt-and- + * suspenders, but the panic gives a meaningful diagnostic if any + * future binder change breaks the invariant. */ + for (u32 k = 0; k < nin; ++k) { + if (in_ops[k].kind == OPK_REG && in_ops[k].v.reg == r) { + compiler_panic(g->c, g->cur_loc, + "cg_inline_asm: early-clobber output collided with " + "input reg (binder bug)"); + } + if (in_ops[k].kind == OPK_INDIRECT && in_ops[k].v.ind.base == r) { + compiler_panic(g->c, g->cur_loc, + "cg_inline_asm: early-clobber output collided with " + "input INDIRECT base (binder bug)"); + } + } + out_ops[i] = op_reg(r, default_out_ty); + out_reg_owned[i] = 1; + } + + /* ---- "memory" clobber: spill all live RES_REG SValues. ---- + * Intern "memory" once per call; Sym equality is pointer-equal after + * interning. The remaining stack values become RES_SPILLED, so a later + * read goes through ensure_reg → reload_reg. */ + Sym sym_memory = pool_intern_cstr(g->pool, "memory"); + int has_memory_clobber = 0; + for (u32 i = 0; i < nclob; ++i) { + if (clobbers[i] == sym_memory) { + has_memory_clobber = 1; + break; + } + } + if (has_memory_clobber) { + for (u32 i = 0; i < g->sp; ++i) { + SValue* sv = &g->stack[i]; + if (sv->res != RES_REG) continue; + u8 cls = class_of_sv(sv); + FrameSlot slot = take_spill_slot(g, cls); + Operand victim_reg = op_reg((Reg)reg_of_sv(sv), sv->type); + T->spill_reg(T, victim_reg, slot, mem_for_spill(g, sv)); + T->free_reg(T, (Reg)reg_of_sv(sv), cls); + sv->spill_slot = slot; + sv->res = RES_SPILLED; + set_owned_reg(sv, (Reg)REG_NONE); + } + } + + /* ---- Call the per-arch asm_block. ---- */ + T->asm_block(T, tmpl, outs, nout, out_ops, ins, nin, in_ops, clobbers, nclob); + + /* ---- Release input SValue resources. ---- + * Inputs are consumed by the asm block. Their owned regs/slots return to + * the pool. Note: matching inputs that were copied into an output reg + * still own their original input reg — release frees that one; the + * output reg lives on through the pushed output SValue. */ + for (u32 i = 0; i < nin; ++i) { + release(g, &in_svs[i]); + } + + /* ---- Push outputs back as fresh SValues for the parser to assign. ---- + * Each pushed SValue owns the freshly-allocated reg (RES_REG), so the + * parser's eventual cg_store on it will release the reg after consuming. */ + for (u32 i = 0; i < nout; ++i) { + SValue sv = make_sv(out_ops[i], default_out_ty); + /* If the target overwrote out_ops[i] with a different kind (e.g. a + * memory location), make_sv already classified residency correctly. */ + if (!out_reg_owned[i] && sv.res == RES_REG) { + /* Not owned by us — the value is borrowed from elsewhere. Treat as + * inherent to avoid double-free. (No production path produces this + * today, but the bookkeeping is explicit.) */ + sv.res = RES_INHERENT; + } + push(g, sv); + } + + if (in_svs) h->free(h, in_svs, sizeof(SValue) * nin); + if (in_ops) h->free(h, in_ops, sizeof(Operand) * nin); + if (out_ops) h->free(h, out_ops, sizeof(Operand) * nout); + if (out_reg_owned) h->free(h, out_reg_owned, nout); } diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -173,6 +173,7 @@ typedef struct IRAsmAux { AsmConstraint* ins; Sym* clobbers; Operand* out_ops; /* nout slots; the wrapped target may fill in REG location */ + Operand* in_ops; /* nin slots; recorded by w_asm_block, xlat'd at replay */ u32 nout, nin, nclob; } IRAsmAux; diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -694,16 +694,39 @@ static void w_asm_block(CGTarget* t, const char* tmpl, const AsmConstraint* outs, u32 nout, Operand* out_ops, const AsmConstraint* ins, u32 nin, const Operand* in_ops, const Sym* clobbers, u32 nclob) { - (void)tmpl; - (void)outs; - (void)nout; - (void)out_ops; - (void)ins; - (void)nin; - (void)in_ops; - (void)clobbers; - (void)nclob; - panic_unsupported(impl_of(t), "asm_block"); + OptImpl* o = impl_of(t); + Inst* in = rec(o, IR_ASM_BLOCK); + IRAsmAux* aux = arena_znew(o->f->arena, IRAsmAux); + /* Template strings reach us via the parser's interned string pool, which + * outlives the CG/Opt arenas. Storing the pointer is safe; copy + * defensively into the IR arena anyway so the IR is self-contained. */ + if (tmpl) { + size_t tl = 0; + while (tmpl[tl]) ++tl; + aux->tmpl = arena_strdup(o->f->arena, tmpl, tl); + } else { + aux->tmpl = NULL; + } + aux->nout = nout; + aux->nin = nin; + aux->nclob = nclob; + if (nout) { + aux->outs = arena_array(o->f->arena, AsmConstraint, nout); + memcpy(aux->outs, outs, nout * sizeof *outs); + aux->out_ops = arena_array(o->f->arena, Operand, nout); + memcpy(aux->out_ops, out_ops, nout * sizeof *out_ops); + } + if (nin) { + aux->ins = arena_array(o->f->arena, AsmConstraint, nin); + memcpy(aux->ins, ins, nin * sizeof *ins); + aux->in_ops = arena_array(o->f->arena, Operand, nin); + memcpy(aux->in_ops, in_ops, nin * sizeof *in_ops); + } + if (nclob) { + aux->clobbers = arena_array(o->f->arena, Sym, nclob); + memcpy(aux->clobbers, clobbers, nclob * sizeof *clobbers); + } + in->extra.aux = aux; } static void w_set_loc(CGTarget* t, SrcLoc loc) { @@ -812,8 +835,27 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) { case IR_PARAM_DECL: case IR_PHI: case IR_CONDBR: - case IR_ASM_BLOCK: break; + case IR_ASM_BLOCK: { + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + Operand* in_ops_ = NULL; + Operand* out_ops_ = NULL; + if (aux->nin) { + in_ops_ = arena_array(r->o->f->arena, Operand, aux->nin); + for (u32 k = 0; k < aux->nin; ++k) { + in_ops_[k] = xlat_op(r, aux->in_ops[k]); + } + } + if (aux->nout) { + out_ops_ = arena_array(r->o->f->arena, Operand, aux->nout); + for (u32 k = 0; k < aux->nout; ++k) { + out_ops_[k] = xlat_op(r, aux->out_ops[k]); + } + } + w->asm_block(w, aux->tmpl, aux->outs, aux->nout, out_ops_, aux->ins, + aux->nin, in_ops_, aux->clobbers, aux->nclob); + break; + } case IR_LOAD_IMM: { Operand dst = xlat_op(r, in->opnds[0]); w->load_imm(w, dst, in->extra.imm); diff --git a/test/cg/binder_test.c b/test/cg/binder_test.c @@ -0,0 +1,496 @@ +/* Unit test for cg_inline_asm — the constraint binder (Track B of + * doc/INLINEASM.md). Builds a Compiler with a stand-in CGTarget that + * records every operand the binder hands to asm_block, then asserts the + * binding shape for each constraint kind: + * + * "r" — input forced to OPK_REG. + * "=r" — output gets a fresh REG, pushed back as an SValue. + * "+r" — output reg is the same as the matching input slot's reg. + * "=&r" — output reg is disjoint from any input reg. + * "i" — input must be OPK_IMM; passes through. + * "m" — addressable lvalue → OPK_INDIRECT in the bound input. + * "0"..."N" — matching input bound to out_ops[N].v.reg. + * "memory" — every live RES_REG SValue on the CG stack is spilled + * via target->spill_reg before asm_block fires. + * register-name — passed straight through in the clobbers array. + * "cc" — accepted-and-dropped on the binder side (still appears + * in the clobbers array we forward — the arch backend + * handles the no-op). + * + * The mock target is the smallest thing that compiles: it hands out reg + * ids 1, 2, 3, ... from a tiny pool, refuses to do real codegen, and + * appends every call into a log buffer the test asserts against. + * + * Built standalone (no cg-runner dependency) so the test runs without + * the JIT / link harness. Wired into test/test.mk as a separate target + * (test-cg-binder). */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <cfree.h> + +#include "abi/abi.h" +#include "arch/arch.h" +#include "cg/cg.h" +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "type/type.h" + +/* ---- host glue ------------------------------------------------------- */ + +static void* h_alloc(CfreeHeap* h, size_t n, size_t a) { + (void)h; + (void)a; + return n ? malloc(n) : NULL; +} +static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { + (void)h; + (void)o; + (void)a; + return realloc(p, n); +} +static void h_free(CfreeHeap* h, void* p, size_t n) { + (void)h; + (void)n; + free(p); +} +static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL}; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) { + static const char* names[] = {"note", "warning", "error", "fatal"}; + (void)s; + (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; + +/* ---- mock CGTarget --------------------------------------------------- */ + +#define MOCK_LOG_CAP 4096u +#define MOCK_REG_CAP 16u + +typedef struct MockTarget { + CGTarget base; + + /* Tiny reg pool: hand out ids in [1, MOCK_REG_CAP]. 0 is "free", 1 is + * "in use". Class is ignored — the binder asks for RC_INT throughout. */ + u8 in_use[MOCK_REG_CAP + 1u]; + + /* Spill-slot id counter; binder doesn't care about layout, only id + * uniqueness. */ + FrameSlot next_slot; + + /* Recorded asm_block call (last one wins; the binder only fires once + * per cg_inline_asm). */ + int asm_called; + const char* tmpl; + u32 nout, nin, nclob; + Operand out_ops[8]; + Operand in_ops[8]; + Sym clobbers[8]; + + /* Log of side effects: spills, copies, load_imms, addr_ofs, free_regs. + * Each entry is a one-line summary; the test scans for substrings. */ + char log[MOCK_LOG_CAP]; + u32 log_len; +} MockTarget; + +static void mock_logf(MockTarget* m, const char* fmt, ...) { + if (m->log_len >= MOCK_LOG_CAP - 1u) return; + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(m->log + m->log_len, MOCK_LOG_CAP - m->log_len, fmt, ap); + va_end(ap); + if (n > 0) m->log_len += (u32)n; +} + +static Reg m_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) { + (void)cls; + (void)ty; + MockTarget* m = (MockTarget*)t; + for (u32 i = 1; i <= MOCK_REG_CAP; ++i) { + if (!m->in_use[i]) { + m->in_use[i] = 1; + return (Reg)i; + } + } + return (Reg)REG_NONE; +} + +static void m_free_reg(CGTarget* t, Reg r, RegClass cls) { + (void)cls; + MockTarget* m = (MockTarget*)t; + if (r != (Reg)REG_NONE && r <= MOCK_REG_CAP) m->in_use[r] = 0; + mock_logf(m, "free_reg r%u\n", (unsigned)r); +} + +static FrameSlot m_frame_slot(CGTarget* t, const FrameSlotDesc* d) { + (void)d; + MockTarget* m = (MockTarget*)t; + return ++m->next_slot; +} + +static void m_spill_reg(CGTarget* t, Operand src, FrameSlot s, MemAccess ma) { + (void)ma; + MockTarget* m = (MockTarget*)t; + mock_logf(m, "spill_reg r%u -> slot %u\n", (unsigned)src.v.reg, (unsigned)s); +} + +static void m_reload_reg(CGTarget* t, Operand dst, FrameSlot s, MemAccess ma) { + (void)ma; + MockTarget* m = (MockTarget*)t; + mock_logf(m, "reload_reg slot %u -> r%u\n", (unsigned)s, (unsigned)dst.v.reg); +} + +static void m_load_imm(CGTarget* t, Operand dst, i64 imm) { + MockTarget* m = (MockTarget*)t; + mock_logf(m, "load_imm r%u = %lld\n", (unsigned)dst.v.reg, (long long)imm); +} + +static void m_copy(CGTarget* t, Operand dst, Operand src) { + MockTarget* m = (MockTarget*)t; + mock_logf(m, "copy r%u <- r%u\n", (unsigned)dst.v.reg, (unsigned)src.v.reg); +} + +static void m_addr_of(CGTarget* t, Operand dst, Operand lv) { + MockTarget* m = (MockTarget*)t; + unsigned src_id = 0; + switch (lv.kind) { + case OPK_LOCAL: src_id = (unsigned)lv.v.frame_slot; break; + case OPK_GLOBAL: src_id = (unsigned)lv.v.global.sym; break; + case OPK_INDIRECT: src_id = (unsigned)lv.v.ind.base; break; + default: src_id = 0; + } + mock_logf(m, "addr_of r%u <- kind=%u id=%u\n", (unsigned)dst.v.reg, + (unsigned)lv.kind, src_id); +} + +static void m_set_loc(CGTarget* t, SrcLoc loc) { + (void)t; + (void)loc; +} + +static void m_func_begin(CGTarget* t, const CGFuncDesc* fd) { + (void)t; + (void)fd; +} +static void m_func_end(CGTarget* t) { (void)t; } +static void m_param(CGTarget* t, const CGParamDesc* d) { + (void)t; + (void)d; +} + +static void m_asm_block(CGTarget* t, const char* tmpl, + const AsmConstraint* outs, u32 nout, Operand* out_ops, + const AsmConstraint* ins, u32 nin, + const Operand* in_ops, const Sym* clobbers, u32 nclob) { + (void)outs; + (void)ins; + MockTarget* m = (MockTarget*)t; + m->asm_called = 1; + m->tmpl = tmpl; + m->nout = nout; + m->nin = nin; + m->nclob = nclob; + if (nout > 8) nout = 8; + if (nin > 8) nin = 8; + if (nclob > 8) nclob = 8; + for (u32 i = 0; i < nout; ++i) m->out_ops[i] = out_ops[i]; + for (u32 i = 0; i < nin; ++i) m->in_ops[i] = in_ops[i]; + for (u32 i = 0; i < nclob; ++i) m->clobbers[i] = clobbers[i]; + mock_logf(m, "asm_block tmpl=%s nout=%u nin=%u nclob=%u\n", + tmpl ? tmpl : "(null)", (unsigned)m->nout, (unsigned)m->nin, + (unsigned)m->nclob); +} + +static void mock_target_init(MockTarget* m, Compiler* c) { + memset(m, 0, sizeof *m); + m->base.c = c; + m->base.alloc_reg = m_alloc_reg; + m->base.free_reg = m_free_reg; + m->base.frame_slot = m_frame_slot; + m->base.param = m_param; + m->base.spill_reg = m_spill_reg; + m->base.reload_reg = m_reload_reg; + m->base.load_imm = m_load_imm; + m->base.copy = m_copy; + m->base.addr_of = m_addr_of; + m->base.func_begin = m_func_begin; + m->base.func_end = m_func_end; + m->base.set_loc = m_set_loc; + m->base.asm_block = m_asm_block; +} + +/* ---- per-test compiler scaffold ------------------------------------- */ + +typedef struct TestCtx { + Compiler cc; + Compiler* c; + MockTarget mt; + CG* g; + const Type* i64_ty; +} TestCtx; + +static void tc_init(TestCtx* tc) { + CfreeEnv env; + memset(&env, 0, sizeof env); + env.heap = &g_heap; + env.diag = &g_diag; + env.now = -1; + + CfreeTarget tgt; + memset(&tgt, 0, sizeof tgt); + tgt.arch = CFREE_ARCH_ARM_64; + tgt.os = CFREE_OS_LINUX; + tgt.obj = CFREE_OBJ_ELF; + tgt.ptr_size = 8; + tgt.ptr_align = 8; + + /* compiler_init wants the env on the heap-pointer side; stash it. */ + static CfreeEnv s_env_stash; + s_env_stash = env; + compiler_init(&tc->cc, tgt, &s_env_stash); + tc->c = &tc->cc; + + mock_target_init(&tc->mt, tc->c); + tc->g = cg_new(tc->c, &tc->mt.base, NULL); + tc->i64_ty = type_prim(tc->c->global, TY_LLONG); +} + +static void tc_fini(TestCtx* tc) { + cg_free(tc->g); + compiler_fini(&tc->cc); +} + +/* ---- assertion helpers ----------------------------------------------- */ + +static int g_fails = 0; +static int g_cases = 0; + +#define EXPECT(cond, ...) do { \ + ++g_cases; \ + if (!(cond)) { \ + ++g_fails; \ + fprintf(stderr, "FAIL %s:%d: %s\n", __FILE__, __LINE__, #cond); \ + fprintf(stderr, " "); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + } \ +} while (0) + +static int log_contains(const MockTarget* m, const char* needle) { + return strstr(m->log, needle) != NULL; +} + +/* ---- test cases ------------------------------------------------------ */ + +static void test_r_in(void) { + TestCtx tc; + tc_init(&tc); + /* asm("nop" :: "r"(42)) */ + cg_push_int(tc.g, 42, tc.i64_ty); + AsmConstraint ins[1] = {{"r", ASM_IN, {0,0,0}}}; + cg_inline_asm(tc.g, "nop", NULL, 0, ins, 1, NULL, 0); + EXPECT(tc.mt.asm_called, "asm_block was not invoked"); + EXPECT(tc.mt.nin == 1, "nin=%u", tc.mt.nin); + EXPECT(tc.mt.in_ops[0].kind == OPK_REG, "in_ops[0].kind=%u", + tc.mt.in_ops[0].kind); + /* The IMM was materialized into a freshly-allocated reg; load_imm shows it. */ + EXPECT(log_contains(&tc.mt, "load_imm"), "missing load_imm in log:\n%s", + tc.mt.log); + tc_fini(&tc); +} + +static void test_eq_r_out(void) { + TestCtx tc; + tc_init(&tc); + /* asm("mov %0, #1" : "=r"(x)) — pushes an output SValue back. */ + AsmConstraint outs[1] = {{"=r", ASM_OUT, {0,0,0}}}; + cg_inline_asm(tc.g, "mov %0, #1", outs, 1, NULL, 0, NULL, 0); + EXPECT(tc.mt.asm_called, "asm_block was not invoked"); + EXPECT(tc.mt.nout == 1, "nout=%u", tc.mt.nout); + EXPECT(tc.mt.out_ops[0].kind == OPK_REG, "out_ops[0].kind=%u", + tc.mt.out_ops[0].kind); + EXPECT(tc.mt.out_ops[0].v.reg != (Reg)REG_NONE, "out reg should be allocated"); + tc_fini(&tc); +} + +static void test_plus_r_inout(void) { + TestCtx tc; + tc_init(&tc); + /* +r is GCC's "use this reg as both input and output". The parser + * convention this binder honors: emit one output with =r-style behavior + * and one matching input "0" with the input value to seed the reg. */ + cg_push_int(tc.g, 7, tc.i64_ty); + AsmConstraint outs[1] = {{"+r", ASM_INOUT, {0,0,0}}}; + AsmConstraint ins[1] = {{"0", ASM_IN, {0,0,0}}}; + cg_inline_asm(tc.g, "add %0, %0, #1", outs, 1, ins, 1, NULL, 0); + EXPECT(tc.mt.nout == 1 && tc.mt.nin == 1, "nout/nin"); + EXPECT(tc.mt.out_ops[0].kind == OPK_REG, "out reg"); + EXPECT(tc.mt.in_ops[0].kind == OPK_REG, "in reg"); + EXPECT(tc.mt.out_ops[0].v.reg == tc.mt.in_ops[0].v.reg, + "matching constraint should bind to same reg (out=%u in=%u)", + (unsigned)tc.mt.out_ops[0].v.reg, (unsigned)tc.mt.in_ops[0].v.reg); + tc_fini(&tc); +} + +static void test_eq_amp_r_early_clobber(void) { + TestCtx tc; + tc_init(&tc); + /* asm("..." : "=&r"(x) : "r"(y)) — output reg must differ from input reg. */ + cg_push_int(tc.g, 5, tc.i64_ty); + AsmConstraint outs[1] = {{"=&r", ASM_OUT, {0,0,0}}}; + AsmConstraint ins[1] = {{"r", ASM_IN, {0,0,0}}}; + cg_inline_asm(tc.g, "tmpl", outs, 1, ins, 1, NULL, 0); + EXPECT(tc.mt.out_ops[0].kind == OPK_REG && tc.mt.in_ops[0].kind == OPK_REG, + "REGs expected"); + EXPECT(tc.mt.out_ops[0].v.reg != tc.mt.in_ops[0].v.reg, + "early-clobber should be disjoint (out=%u in=%u)", + (unsigned)tc.mt.out_ops[0].v.reg, (unsigned)tc.mt.in_ops[0].v.reg); + tc_fini(&tc); +} + +static void test_i_constant(void) { + TestCtx tc; + tc_init(&tc); + cg_push_int(tc.g, 99, tc.i64_ty); + AsmConstraint ins[1] = {{"i", ASM_IN, {0,0,0}}}; + cg_inline_asm(tc.g, "tmpl", NULL, 0, ins, 1, NULL, 0); + EXPECT(tc.mt.in_ops[0].kind == OPK_IMM, "in kind=%u", tc.mt.in_ops[0].kind); + EXPECT(tc.mt.in_ops[0].v.imm == 99, "in imm=%lld", + (long long)tc.mt.in_ops[0].v.imm); + /* No load_imm (the binder forwards the IMM unchanged). */ + EXPECT(!log_contains(&tc.mt, "load_imm"), + "'i' should not load_imm, log:\n%s", tc.mt.log); + tc_fini(&tc); +} + +static void test_m_memory_lvalue(void) { + TestCtx tc; + tc_init(&tc); + /* Push a local lvalue; "m" should materialize it into OPK_INDIRECT + * via target->addr_of. */ + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.size = 8; + fsd.align = 8; + fsd.kind = FS_LOCAL; + FrameSlot s = cg_local(tc.g, &fsd); + /* Use the type-aware push path. We declare it via prototype: */ + void cg_push_local_typed(CG*, FrameSlot, const Type*); + cg_push_local_typed(tc.g, s, tc.i64_ty); + AsmConstraint ins[1] = {{"m", ASM_IN, {0,0,0}}}; + cg_inline_asm(tc.g, "ldr w0, %0", NULL, 0, ins, 1, NULL, 0); + EXPECT(tc.mt.in_ops[0].kind == OPK_INDIRECT, "in kind=%u", + tc.mt.in_ops[0].kind); + EXPECT(log_contains(&tc.mt, "addr_of"), + "expected addr_of in log:\n%s", tc.mt.log); + tc_fini(&tc); +} + +static void test_matching_input(void) { + TestCtx tc; + tc_init(&tc); + /* Output =r at index 0; input "0" should bind to its reg. */ + cg_push_int(tc.g, 11, tc.i64_ty); + AsmConstraint outs[1] = {{"=r", ASM_OUT, {0,0,0}}}; + AsmConstraint ins[1] = {{"0", ASM_IN, {0,0,0}}}; + cg_inline_asm(tc.g, "tmpl", outs, 1, ins, 1, NULL, 0); + EXPECT(tc.mt.out_ops[0].v.reg == tc.mt.in_ops[0].v.reg, + "matching '0' input should reuse out reg (out=%u in=%u)", + (unsigned)tc.mt.out_ops[0].v.reg, (unsigned)tc.mt.in_ops[0].v.reg); + tc_fini(&tc); +} + +static void test_memory_clobber_spills_live_regs(void) { + TestCtx tc; + tc_init(&tc); + /* Push a LOCAL lvalue, load it into a reg via cg_load — that leaves a + * live RES_REG SValue at the bottom of the stack. Then call asm with a + * "memory" clobber and verify the live reg got spilled before the + * asm_block fired. */ + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.size = 8; + fsd.align = 8; + fsd.kind = FS_LOCAL; + FrameSlot s = cg_local(tc.g, &fsd); + void cg_push_local_typed(CG*, FrameSlot, const Type*); + cg_push_local_typed(tc.g, s, tc.i64_ty); + /* Need a load implementation on the mock to promote LOCAL → REG. + * The mock doesn't implement target->load — instead push an immediate + * to get a REG-resident SValue without calling cg_load. */ + cg_push_int(tc.g, 0, tc.i64_ty); /* IMM, not REG, won't be a spill victim */ + + /* Force a REG-resident SValue at the bottom by pushing an int and + * promoting via cg_inline_asm itself — easier: skip this complexity and + * directly observe spill via a real reg-resident value built by =r + * output being pushed back. */ + AsmConstraint outs1[1] = {{"=r", ASM_OUT, {0,0,0}}}; + cg_inline_asm(tc.g, "produce", outs1, 1, NULL, 0, NULL, 0); + /* Now the stack has a REG-resident SValue from the produced output. + * Reset the log before the second call so we can scan for spill_reg + * specifically caused by "memory". */ + tc.mt.log_len = 0; + tc.mt.log[0] = '\0'; + + Sym mem_sym = pool_intern_cstr(tc.c->global, "memory"); + Sym clobs[1] = {mem_sym}; + cg_inline_asm(tc.g, "barrier", NULL, 0, NULL, 0, clobs, 1); + EXPECT(log_contains(&tc.mt, "spill_reg"), + "expected spill_reg from memory clobber, log:\n%s", tc.mt.log); + tc_fini(&tc); +} + +static void test_register_clobber_passthrough(void) { + TestCtx tc; + tc_init(&tc); + Sym x0_sym = pool_intern_cstr(tc.c->global, "x0"); + Sym clobs[1] = {x0_sym}; + cg_inline_asm(tc.g, "tmpl", NULL, 0, NULL, 0, clobs, 1); + EXPECT(tc.mt.nclob == 1, "nclob=%u", tc.mt.nclob); + EXPECT(tc.mt.clobbers[0] == x0_sym, "register clobber not forwarded"); + tc_fini(&tc); +} + +static void test_cc_clobber_silent(void) { + TestCtx tc; + tc_init(&tc); + /* "cc" should not cause spills; the binder forwards it but does no + * special work. (Arch backends drop it on aarch64.) */ + Sym cc_sym = pool_intern_cstr(tc.c->global, "cc"); + Sym clobs[1] = {cc_sym}; + /* Arrange a live REG-resident SValue first; verify it is NOT spilled. */ + AsmConstraint outs1[1] = {{"=r", ASM_OUT, {0,0,0}}}; + cg_inline_asm(tc.g, "produce", outs1, 1, NULL, 0, NULL, 0); + tc.mt.log_len = 0; + tc.mt.log[0] = '\0'; + cg_inline_asm(tc.g, "tmpl", NULL, 0, NULL, 0, clobs, 1); + EXPECT(!log_contains(&tc.mt, "spill_reg"), + "'cc' must not trigger spills, log:\n%s", tc.mt.log); + EXPECT(tc.mt.nclob == 1, "nclob=%u", tc.mt.nclob); + tc_fini(&tc); +} + +int main(void) { + test_r_in(); + test_eq_r_out(); + test_plus_r_inout(); + test_eq_amp_r_early_clobber(); + test_i_constant(); + test_m_memory_lvalue(); + test_matching_input(); + test_memory_clobber_spills_live_regs(); + test_register_clobber_passthrough(); + test_cc_clobber_silent(); + + fprintf(stderr, "binder_test: %d cases, %d failures\n", g_cases, g_fails); + return g_fails ? 1 : 0; +} diff --git a/test/test.mk b/test/test.mk @@ -29,9 +29,9 @@ # parse_asm / cfree_disasm_iter_* are still stubs; the harness builds # and runs end-to-end so the wiring stays exercised. See doc/ASM.md. -.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64 +.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64 -test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-lib-deps +test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-cg-binder test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-lib-deps test-lex: bin @CFREE=$(abspath $(BIN)) test/lex/run.sh @@ -103,6 +103,20 @@ $(AA64_ISA_TEST_BIN): test/arch/aa64_isa_test.c $(LIB_AR) @mkdir -p $(dir $@) $(CC) $(DRIVER_CFLAGS) -Isrc test/arch/aa64_isa_test.c $(LIB_AR) -o $@ +# cg_inline_asm constraint binder unit test (doc/INLINEASM.md Track B). +# Drives cg_inline_asm against a stand-in CGTarget that records every +# operand handed to asm_block; covers r/=r/+r/=&r/i/m/0, the "memory" +# clobber spill behaviour, register-name passthrough, and "cc" no-op. +# Internal cg/ + arch/ surface — needs -Isrc. +CG_BINDER_TEST_BIN = build/test/cg_binder_test + +test-cg-binder: $(CG_BINDER_TEST_BIN) + $(CG_BINDER_TEST_BIN) + +$(CG_BINDER_TEST_BIN): test/cg/binder_test.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(DRIVER_CFLAGS) -Isrc test/cg/binder_test.c $(LIB_AR) -o $@ + # Test harness binaries shared by test-elf, test-link, and test-cg. # Declared as Make targets (not built by the run.sh scripts) so they pick # up libcfree.a changes deterministically.