commit c8b9d53d52c4d396b6caed50da9e576413595199
parent 96de8d734d0490c3e537ec6950c325e51015df59
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 6 Jun 2026 04:15:28 -0700
Fix hard-pinned native inline asm staging
Diffstat:
7 files changed, 248 insertions(+), 65 deletions(-)
diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md
@@ -5,44 +5,6 @@ fixed, remove it instead of checking it off or keeping a closed entry.
Add new deferred fixes below as they are discovered.
-## x86-64 inline asm: `-g -O1` + a 4-operand register idiom → `too many memory asm operands` (compiler abort)
-
-A register-pinned inline-asm syscall (4 operands: `rax`/`rdi`/`rsi`/`rdx` via GNU
-local register variables + an `"r"`/`"+r"` constraint, the only syscall idiom kit
-accepts — see the entry above) aborts the compiler **only on x86-64 at `-O1` with
-`-g`**. The bracket is exact: `-g -O0` OK, `-O1` (no `-g`) OK, `-O0` OK, and
-aarch64/riscv64 compile it fine at `-g -O1`; only x64 + O1 + -g fails. The
-message is `fatal: x64 inline asm: too many memory asm operands`.
-
-Minimal repro (`kit cc -target x86_64-linux-gnu -g -O1 -c`):
-
-```c
-static long w(int fd, const char* b, unsigned long n) {
- register long rax __asm__("rax") = 1, rdi __asm__("rdi") = fd;
- register long rsi __asm__("rsi") = (long)b, rdx __asm__("rdx") = (long)n;
- __asm__ volatile("syscall" : "+r"(rax) : "r"(rdi), "r"(rsi), "r"(rdx)
- : "rcx", "r11", "memory");
- return rax;
-}
-```
-
-Root cause: the x64 asm lowering stages a *memory-resident* `"r"` operand into a
-scratch register before the asm, but the scratch pool is only **two** registers
-(`X64_TMP_INT` / `X64_TMP_INT2`), and `src/arch/x64/native.c:4014` panics on the
-third. At `-O1 -g` the four pinned `register long` operands are left stack-
-resident at the asm point (the GNU `register asm` hint binds the operand, it does
-not pin residency across statements; the `-g` location tracking perturbs the
-allocator into spilling), so 3+ need staging and it trips. `-O0` keeps them in
-registers, so `ntmp` stays ≤ 2. Fix: when an `"r"` operand carries a hard-
-register pin, load it straight into that pinned register instead of a shared
-scratch temp (no temp needed at all); failing that, stage through more than two
-scratch regs. **Secondary:** the fatal itself does not exit cleanly — under the
-ASan host build `compiler_panic`'s `longjmp` (`src/core/core.c:179`) SEGVs, so the
-diagnostic becomes a SIGABRT/SEGV instead of a clean `fatal:` exit. Found writing
-the WS4 backtrace round-trip (`test/rt/addr2line_prog.c`), whose x86-64 `write`
-sink is exactly this idiom; surfaced by sweeping that test at `-O1` per
-doc/plan/BACKTRACE.md — left red (`test-rt-backtrace`, `x64/O1` lane).
-
## setjmp/longjmp miscompiled at `-O1`: the longjmp'd `setjmp` return value is wrong
A textbook setjmp/longjmp round-trip returns the right answer at `-O0` but the
@@ -84,3 +46,12 @@ program loads at an ASLR base, so captured code addresses don't match link-time
addresses without computing the load slide. `-no-pie` should clear `o->pie` (and
ideally `-static` without `-pie` should default to non-PIE). Found while making a
backtrace demo's addresses line up with `kit addr2line`.
+
+## ASan host build: some fatal diagnostics SEGV during `compiler_panic` recovery
+
+While reproducing the former x64 inline-asm fatal on the ASan host build, the
+diagnostic printed, then `compiler_panicv`'s `longjmp` (`src/core/core.c:187`)
+faulted in `_longjmp`, turning a user-facing `fatal:` into SIGABRT/SEGV. The
+inline-asm trigger is fixed, but the panic recovery path still needs a focused
+expected-fatal regression and an audit of panic save/restore lifetime under the
+sanitized hosted driver.
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -4926,6 +4926,36 @@ static Reg aa_asm_native_mem_base(AANativeTarget* a, SrcLoc loc, NativeLoc src,
return dst;
}
+static void aa_asm_load_loc_to_reg(AANativeTarget* a, SrcLoc loc, NativeLoc src,
+ NativeLoc dst) {
+ NativeTarget* t = &a->base;
+ NativeAllocClass cls = (NativeAllocClass)dst.cls;
+ if (src.kind == NATIVE_LOC_REG) {
+ if (src.v.reg != dst.v.reg || src.cls != dst.cls) t->move(t, dst, src);
+ return;
+ }
+ if (src.kind == NATIVE_LOC_IMM) {
+ if (cls != NATIVE_REG_INT)
+ aa_asm_panic_at(t->c, loc,
+ "floating-point immediate asm input is unsupported");
+ t->load_imm(t, dst, src.v.imm);
+ return;
+ }
+ aa_emit_mem(a, 1, dst, aa_asm_loc_to_addr(a, loc, src),
+ aa_mem_for_type(t, dst.type, type_size32(t, dst.type)));
+}
+
+static void aa_asm_store_reg_to_loc(AANativeTarget* a, SrcLoc loc,
+ NativeLoc dst, NativeLoc src) {
+ NativeTarget* t = &a->base;
+ if (dst.kind == NATIVE_LOC_REG) {
+ if (dst.v.reg != src.v.reg || dst.cls != src.cls) t->move(t, dst, src);
+ return;
+ }
+ aa_emit_mem(a, 0, src, aa_asm_loc_to_addr(a, loc, dst),
+ aa_mem_for_type(t, src.type, type_size32(t, src.type)));
+}
+
static void aa_asm_bind_native(AANativeTarget* a, SrcLoc loc, Operand* out,
const char* constraint, KitCgTypeId type,
NativeLoc src, u32* ntmp) {
@@ -4970,13 +5000,29 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl,
SrcLoc loc = a->func ? a->func->loc : (SrcLoc){0, 0, 0};
Operand* bound_outs = nout ? arena_zarray(c->tu, Operand, nout) : NULL;
Operand* bound_ins = nin ? arena_zarray(c->tu, Operand, nin) : NULL;
+ u8* staged_outs = nout ? arena_zarray(c->tu, u8, nout) : NULL;
u32 ntmp = 0;
AA64Asm* asmh;
for (u32 i = 0; i < nout; ++i) {
KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type;
- aa_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, out_locs[i],
- &ntmp);
+ NativeLoc outloc = out_locs[i];
+ NativeAsmPinnedLoc pinned =
+ native_asm_prepare_pinned_loc(t, outs[i].reg, outs[i].str, type, outloc);
+ if (pinned.has_pin) {
+ if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK)
+ aa_asm_panic_at(c, loc,
+ native_asm_pin_status_message(pinned.pin_status));
+ if (pinned.wrong_reg)
+ aa_asm_panic_at(c, loc, "hard-register asm operand in wrong register");
+ outloc = pinned.loc;
+ if (pinned.needs_stage) {
+ staged_outs[i] = 1u;
+ if (outs[i].dir == KIT_CG_ASM_INOUT)
+ aa_asm_load_loc_to_reg(a, loc, out_locs[i], outloc);
+ }
+ }
+ aa_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, outloc, &ntmp);
}
for (u32 i = 0; i < nin; ++i) {
const char* body = native_asm_constraint_body(ins[i].str);
@@ -4993,16 +5039,29 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl,
const char* in_body = native_asm_constraint_body(ins[i].str);
NativeAsmConstraintInfo info;
NativeLoc inloc = in_locs[i];
+ NativeAsmPinnedLoc pinned =
+ native_asm_prepare_pinned_loc(t, ins[i].reg, ins[i].str, type, inloc);
/* A register-constrained input whose value is an address-taken local
* arrives in a frame slot: the optimizer cannot keep an address-taken
* local live in a register across the block, so the "inputs are already
* in registers" contract does not hold for it. Load it into a reserved
- * scratch register (as the direct path does) before binding. Only
- * unrestricted integer constraints can use this scratch; restricted
- * register sets must already arrive in an allowed hard register. */
- if (native_asm_constraint_reg_info(t, ins[i].str, &info) &&
- info.cls == NATIVE_REG_INT && info.allowed_mask == 0 &&
- inloc.kind != NATIVE_LOC_REG) {
+ * scratch register (as the direct path does) before binding. With no
+ * hard pin, only unrestricted integer constraints can use this scratch;
+ * restricted register sets must already arrive in an allowed hard
+ * register. */
+ if (pinned.has_pin) {
+ if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK)
+ aa_asm_panic_at(c, loc,
+ native_asm_pin_status_message(pinned.pin_status));
+ if (pinned.wrong_reg)
+ aa_asm_panic_at(c, loc,
+ "hard-register asm operand in wrong register");
+ inloc = pinned.loc;
+ if (pinned.needs_stage)
+ aa_asm_load_loc_to_reg(a, loc, in_locs[i], inloc);
+ } else if (native_asm_constraint_reg_info(t, ins[i].str, &info) &&
+ info.cls == NATIVE_REG_INT && info.allowed_mask == 0 &&
+ inloc.kind != NATIVE_LOC_REG) {
Reg r;
if (ntmp >= 2u) aa_asm_panic_at(c, loc, "too many memory asm operands");
r = (ntmp == 0u) ? AA_TMP0 : AA_TMP1;
@@ -5024,6 +5083,17 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl,
nclob);
aa64_asm_run_template(asmh, t->mc, tmpl);
aa64_asm_close(asmh);
+
+ for (u32 i = 0; i < nout; ++i) {
+ NativeAllocClass cls;
+ NativeLoc src;
+ if (!staged_outs || !staged_outs[i]) continue;
+ if (bound_outs[i].kind != AA64_INLINE_OPK_REG) continue;
+ cls = bound_outs[i].pad[0] == AA64_INLINE_OPCLS_FP ? NATIVE_REG_FP
+ : NATIVE_REG_INT;
+ src = native_loc_reg(bound_outs[i].type, cls, (Reg)bound_outs[i].v.local);
+ aa_asm_store_reg_to_loc(a, loc, out_locs[i], src);
+ }
}
static const NativeOps aa_direct_ops = {
diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c
@@ -3827,8 +3827,22 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl,
KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type;
NativeLoc outloc = out_locs[i];
NativeAsmConstraintInfo info;
- if (native_asm_constraint_reg_info(t, outs[i].str, &info) &&
- info.allowed_mask == 0 && outloc.kind != NATIVE_LOC_REG) {
+ NativeAsmPinnedLoc pinned =
+ native_asm_prepare_pinned_loc(t, outs[i].reg, outs[i].str, type, outloc);
+ if (pinned.has_pin) {
+ if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK)
+ rv_asm_panic_at(c, loc,
+ native_asm_pin_status_message(pinned.pin_status));
+ if (pinned.wrong_reg)
+ rv_asm_panic_at(c, loc, "hard-register asm operand in wrong register");
+ outloc = pinned.loc;
+ if (pinned.needs_stage) {
+ staged_outs[i] = 1u;
+ if (outs[i].dir == KIT_CG_ASM_INOUT)
+ rv_asm_load_loc_to_reg(a, loc, out_locs[i], outloc);
+ }
+ } else if (native_asm_constraint_reg_info(t, outs[i].str, &info) &&
+ info.allowed_mask == 0 && outloc.kind != NATIVE_LOC_REG) {
Reg r = rv_asm_stage_reg(a, loc, info.cls, &nstage_int, &nstage_fp);
outloc = native_loc_reg(type, info.cls, r);
staged_outs[i] = 1u;
@@ -3852,8 +3866,20 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl,
inloc = in_locs[i];
{
NativeAsmConstraintInfo info;
- if (native_asm_constraint_reg_info(t, ins[i].str, &info) &&
- info.allowed_mask == 0 && inloc.kind != NATIVE_LOC_REG) {
+ NativeAsmPinnedLoc pinned =
+ native_asm_prepare_pinned_loc(t, ins[i].reg, ins[i].str, type, inloc);
+ if (pinned.has_pin) {
+ if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK)
+ rv_asm_panic_at(c, loc,
+ native_asm_pin_status_message(pinned.pin_status));
+ if (pinned.wrong_reg)
+ rv_asm_panic_at(c, loc,
+ "hard-register asm operand in wrong register");
+ inloc = pinned.loc;
+ if (pinned.needs_stage)
+ rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc);
+ } else if (native_asm_constraint_reg_info(t, ins[i].str, &info) &&
+ info.allowed_mask == 0 && inloc.kind != NATIVE_LOC_REG) {
Reg r = rv_asm_stage_reg(a, loc, info.cls, &nstage_int, &nstage_fp);
inloc = native_loc_reg(type, info.cls, r);
rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc);
diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c
@@ -4014,6 +4014,36 @@ static Reg x64_asm_native_mem_base(X64NativeTarget* a, SrcLoc loc,
return dst;
}
+static void x64_asm_load_loc_to_reg(X64NativeTarget* a, SrcLoc loc,
+ NativeLoc src, NativeLoc dst) {
+ NativeTarget* t = &a->base;
+ NativeAllocClass cls = (NativeAllocClass)dst.cls;
+ if (src.kind == NATIVE_LOC_REG) {
+ if (src.v.reg != dst.v.reg || src.cls != dst.cls) t->move(t, dst, src);
+ return;
+ }
+ if (src.kind == NATIVE_LOC_IMM) {
+ if (cls != NATIVE_REG_INT)
+ x64_asm_panic_at(t->c, loc,
+ "floating-point immediate asm input is unsupported");
+ t->load_imm(t, dst, src.v.imm);
+ return;
+ }
+ x64_emit_mem(a, 1, dst, x64_asm_loc_to_addr(a, loc, src),
+ native_mem_for_type(t, dst.type, native_type_size(t, dst.type)));
+}
+
+static void x64_asm_store_reg_to_loc(X64NativeTarget* a, SrcLoc loc,
+ NativeLoc dst, NativeLoc src) {
+ NativeTarget* t = &a->base;
+ if (dst.kind == NATIVE_LOC_REG) {
+ if (dst.v.reg != src.v.reg || dst.cls != src.cls) t->move(t, dst, src);
+ return;
+ }
+ x64_emit_mem(a, 0, src, x64_asm_loc_to_addr(a, loc, dst),
+ native_mem_for_type(t, src.type, native_type_size(t, src.type)));
+}
+
static void x64_asm_bind_native(X64NativeTarget* a, SrcLoc loc, Operand* out,
const char* constraint, KitCgTypeId type,
NativeLoc src, u32* ntmp) {
@@ -4057,12 +4087,29 @@ static void x64_asm_block_native(NativeTarget* t, const char* tmpl,
SrcLoc loc = a->func ? a->func->loc : (SrcLoc){0, 0, 0};
Operand* bound_outs = nout ? arena_zarray(c->tu, Operand, nout) : NULL;
Operand* bound_ins = nin ? arena_zarray(c->tu, Operand, nin) : NULL;
+ u8* staged_outs = nout ? arena_zarray(c->tu, u8, nout) : NULL;
u32 ntmp = 0, i;
X64Asm* asmh;
for (i = 0; i < nout; ++i) {
KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type;
- x64_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, out_locs[i],
+ NativeLoc outloc = out_locs[i];
+ NativeAsmPinnedLoc pinned =
+ native_asm_prepare_pinned_loc(t, outs[i].reg, outs[i].str, type, outloc);
+ if (pinned.has_pin) {
+ if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK)
+ x64_asm_panic_at(c, loc,
+ native_asm_pin_status_message(pinned.pin_status));
+ if (pinned.wrong_reg)
+ x64_asm_panic_at(c, loc, "hard-register asm operand in wrong register");
+ outloc = pinned.loc;
+ if (pinned.needs_stage) {
+ staged_outs[i] = 1u;
+ if (outs[i].dir == KIT_CG_ASM_INOUT)
+ x64_asm_load_loc_to_reg(a, loc, out_locs[i], outloc);
+ }
+ }
+ x64_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, outloc,
&ntmp);
}
for (i = 0; i < nin; ++i) {
@@ -4078,14 +4125,28 @@ static void x64_asm_block_native(NativeTarget* t, const char* tmpl,
}
type = ins[i].type ? ins[i].type : in_locs[i].type;
inloc = in_locs[i];
- if ((body[0] == 'r') && inloc.kind != NATIVE_LOC_REG) {
- Reg r;
- if (ntmp >= 2u) x64_asm_panic_at(c, loc, "too many memory asm operands");
- r = (ntmp == 0u) ? (Reg)X64_TMP_INT : (Reg)X64_TMP_INT2;
- ntmp++;
- inloc = native_loc_reg(type, NATIVE_REG_INT, r);
- x64_emit_mem(a, 1, inloc, x64_asm_loc_to_addr(a, loc, in_locs[i]),
- native_mem_for_type(t, type, native_type_size(t, type)));
+ {
+ NativeAsmPinnedLoc pinned =
+ native_asm_prepare_pinned_loc(t, ins[i].reg, ins[i].str, type, inloc);
+ if (pinned.has_pin) {
+ if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK)
+ x64_asm_panic_at(c, loc,
+ native_asm_pin_status_message(pinned.pin_status));
+ if (pinned.wrong_reg)
+ x64_asm_panic_at(c, loc,
+ "hard-register asm operand in wrong register");
+ inloc = pinned.loc;
+ if (pinned.needs_stage)
+ x64_asm_load_loc_to_reg(a, loc, in_locs[i], inloc);
+ } else if ((body[0] == 'r') && inloc.kind != NATIVE_LOC_REG) {
+ Reg r;
+ if (ntmp >= 2u) x64_asm_panic_at(c, loc, "too many memory asm operands");
+ r = (ntmp == 0u) ? (Reg)X64_TMP_INT : (Reg)X64_TMP_INT2;
+ ntmp++;
+ inloc = native_loc_reg(type, NATIVE_REG_INT, r);
+ x64_emit_mem(a, 1, inloc, x64_asm_loc_to_addr(a, loc, in_locs[i]),
+ native_mem_for_type(t, type, native_type_size(t, type)));
+ }
}
x64_asm_bind_native(a, loc, &bound_ins[i], ins[i].str, type, inloc, &ntmp);
}
@@ -4098,6 +4159,17 @@ static void x64_asm_block_native(NativeTarget* t, const char* tmpl,
nclob);
x64_asm_run_template(asmh, t->mc, tmpl);
x64_asm_close(asmh);
+
+ for (i = 0; i < nout; ++i) {
+ NativeAllocClass cls;
+ NativeLoc src;
+ if (!staged_outs || !staged_outs[i]) continue;
+ if (bound_outs[i].kind != X64_INLINE_OPK_REG) continue;
+ cls = bound_outs[i].pad[0] == X64_INLINE_OPCLS_FP ? NATIVE_REG_FP
+ : NATIVE_REG_INT;
+ src = native_loc_reg(bound_outs[i].type, cls, (Reg)bound_outs[i].v.local);
+ x64_asm_store_reg_to_loc(a, loc, out_locs[i], src);
+ }
}
/* file_scope_asm + finalize are shared (cg/native_asm.h). */
diff --git a/src/cg/native_asm.c b/src/cg/native_asm.c
@@ -161,6 +161,28 @@ NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg,
return NATIVE_ASM_REG_PIN_OK;
}
+NativeAsmPinnedLoc native_asm_prepare_pinned_loc(NativeTarget* t, Sym reg,
+ const char* constraint,
+ KitCgTypeId type,
+ NativeLoc loc) {
+ NativeAsmPinnedLoc out;
+ NativeAsmRegPin pin;
+ memset(&out, 0, sizeof out);
+ out.loc = loc;
+ out.pin_status = native_asm_resolve_pin(t, reg, constraint, &pin);
+ if (out.pin_status == NATIVE_ASM_REG_PIN_ABSENT) return out;
+ out.has_pin = 1u;
+ if (out.pin_status != NATIVE_ASM_REG_PIN_OK) return out;
+ if (loc.kind != NATIVE_LOC_REG) {
+ out.loc = native_loc_reg(type, pin.cls, pin.reg);
+ out.needs_stage = 1u;
+ return out;
+ }
+ if ((Reg)loc.v.reg != pin.reg || (NativeAllocClass)loc.cls != pin.cls)
+ out.wrong_reg = 1u;
+ return out;
+}
+
const char* native_asm_pin_status_message(NativeAsmRegPinStatus st) {
switch (st) {
case NATIVE_ASM_REG_PIN_ABSENT:
diff --git a/src/cg/native_asm.h b/src/cg/native_asm.h
@@ -65,6 +65,14 @@ typedef struct NativeAsmConstraintInfo {
u32 allowed_mask; /* 0 means any valid register in cls. */
} NativeAsmConstraintInfo;
+typedef struct NativeAsmPinnedLoc {
+ NativeLoc loc;
+ NativeAsmRegPinStatus pin_status;
+ u8 has_pin;
+ u8 needs_stage;
+ u8 wrong_reg;
+} NativeAsmPinnedLoc;
+
int native_asm_constraint_reg_info(NativeTarget* t, const char* constraint,
NativeAsmConstraintInfo* out);
int native_asm_constraint_is_reg(NativeTarget* t, const char* constraint);
@@ -76,6 +84,10 @@ int native_asm_constraint_is_reg(NativeTarget* t, const char* constraint);
NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg,
const char* constraint,
NativeAsmRegPin* out);
+NativeAsmPinnedLoc native_asm_prepare_pinned_loc(NativeTarget* t, Sym reg,
+ const char* constraint,
+ KitCgTypeId type,
+ NativeLoc loc);
const char* native_asm_pin_status_message(NativeAsmRegPinStatus st);
int native_asm_constraint_reg_class(const char* constraint,
NativeAllocClass* cls_out);
diff --git a/src/opt/pass_native_emit.c b/src/opt/pass_native_emit.c
@@ -1414,13 +1414,18 @@ static void plan_frame(NativeEmitCtx* e, const CGFuncDesc* fd) {
} else if ((IROp)in->op == IR_ASM_BLOCK) {
/* Inline asm may clobber the return-address register or the red zone
* opaquely; disqualifies the frame-eliding tiers (see has_asm). Its
- * callee-saved register clobbers are equally opaque to the operand scan
- * below; count them now so the backend can fold them into the saved
- * set (collected into a single Sym list in a second pass below). */
+ * callee-saved register clobbers and hard-register operand pins are
+ * equally opaque to the operand scan below; count them now so the
+ * backend can fold them into the saved set (collected into a single Sym
+ * list in a second pass below). */
IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
has_asm = 1;
if (aux) {
nasm_clob += aux->nclob;
+ for (u32 k = 0; k < aux->nout; ++k)
+ if (aux->outs[k].reg) ++nasm_clob;
+ for (u32 k = 0; k < aux->nin; ++k)
+ if (aux->ins[k].reg) ++nasm_clob;
asm_clobber_abi_sets |= aux->clobber_abi_sets;
}
} else if ((IROp)in->op == IR_INTRINSIC) {
@@ -1434,9 +1439,10 @@ static void plan_frame(NativeEmitCtx* e, const CGFuncDesc* fd) {
}
}
}
- /* Gather the union of every asm block's clobber names. The backend resolves
- * them with its own clobber parser (machinize's resolve_name is unset on
- * every backend, so aux->clobber_mask is unreliable here). */
+ /* Gather the union of every asm block's clobber names and hard-register
+ * operand pins. The backend resolves them with its own clobber parser
+ * (machinize's resolve_name is unset on every backend, so aux->clobber_mask is
+ * unreliable here). */
if (nasm_clob) {
u32 n = 0;
asm_clobbers = arena_array(e->f->arena, Sym, nasm_clob);
@@ -1449,6 +1455,10 @@ static void plan_frame(NativeEmitCtx* e, const CGFuncDesc* fd) {
aux = (IRAsmAux*)in->extra.aux;
for (u32 k = 0; aux && k < aux->nclob; ++k)
asm_clobbers[n++] = aux->clobbers[k];
+ for (u32 k = 0; aux && k < aux->nout; ++k)
+ if (aux->outs[k].reg) asm_clobbers[n++] = aux->outs[k].reg;
+ for (u32 k = 0; aux && k < aux->nin; ++k)
+ if (aux->ins[k].reg) asm_clobbers[n++] = aux->ins[k].reg;
}
}
nasm_clob = n;