kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit c8b9d53d52c4d396b6caed50da9e576413595199
parent 96de8d734d0490c3e537ec6950c325e51015df59
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  6 Jun 2026 04:15:28 -0700

Fix hard-pinned native inline asm staging

Diffstat:
Mdoc/plan/TODO.md | 47+++++++++--------------------------------------
Msrc/arch/aa64/native.c | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Msrc/arch/riscv/native.c | 34++++++++++++++++++++++++++++++----
Msrc/arch/x64/native.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Msrc/cg/native_asm.c | 22++++++++++++++++++++++
Msrc/cg/native_asm.h | 12++++++++++++
Msrc/opt/pass_native_emit.c | 22++++++++++++++++------
7 files changed, 248 insertions(+), 65 deletions(-)

diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md @@ -5,44 +5,6 @@ fixed, remove it instead of checking it off or keeping a closed entry. Add new deferred fixes below as they are discovered. -## x86-64 inline asm: `-g -O1` + a 4-operand register idiom → `too many memory asm operands` (compiler abort) - -A register-pinned inline-asm syscall (4 operands: `rax`/`rdi`/`rsi`/`rdx` via GNU -local register variables + an `"r"`/`"+r"` constraint, the only syscall idiom kit -accepts — see the entry above) aborts the compiler **only on x86-64 at `-O1` with -`-g`**. The bracket is exact: `-g -O0` OK, `-O1` (no `-g`) OK, `-O0` OK, and -aarch64/riscv64 compile it fine at `-g -O1`; only x64 + O1 + -g fails. The -message is `fatal: x64 inline asm: too many memory asm operands`. - -Minimal repro (`kit cc -target x86_64-linux-gnu -g -O1 -c`): - -```c -static long w(int fd, const char* b, unsigned long n) { - register long rax __asm__("rax") = 1, rdi __asm__("rdi") = fd; - register long rsi __asm__("rsi") = (long)b, rdx __asm__("rdx") = (long)n; - __asm__ volatile("syscall" : "+r"(rax) : "r"(rdi), "r"(rsi), "r"(rdx) - : "rcx", "r11", "memory"); - return rax; -} -``` - -Root cause: the x64 asm lowering stages a *memory-resident* `"r"` operand into a -scratch register before the asm, but the scratch pool is only **two** registers -(`X64_TMP_INT` / `X64_TMP_INT2`), and `src/arch/x64/native.c:4014` panics on the -third. At `-O1 -g` the four pinned `register long` operands are left stack- -resident at the asm point (the GNU `register asm` hint binds the operand, it does -not pin residency across statements; the `-g` location tracking perturbs the -allocator into spilling), so 3+ need staging and it trips. `-O0` keeps them in -registers, so `ntmp` stays ≤ 2. Fix: when an `"r"` operand carries a hard- -register pin, load it straight into that pinned register instead of a shared -scratch temp (no temp needed at all); failing that, stage through more than two -scratch regs. **Secondary:** the fatal itself does not exit cleanly — under the -ASan host build `compiler_panic`'s `longjmp` (`src/core/core.c:179`) SEGVs, so the -diagnostic becomes a SIGABRT/SEGV instead of a clean `fatal:` exit. Found writing -the WS4 backtrace round-trip (`test/rt/addr2line_prog.c`), whose x86-64 `write` -sink is exactly this idiom; surfaced by sweeping that test at `-O1` per -doc/plan/BACKTRACE.md — left red (`test-rt-backtrace`, `x64/O1` lane). - ## setjmp/longjmp miscompiled at `-O1`: the longjmp'd `setjmp` return value is wrong A textbook setjmp/longjmp round-trip returns the right answer at `-O0` but the @@ -84,3 +46,12 @@ program loads at an ASLR base, so captured code addresses don't match link-time addresses without computing the load slide. `-no-pie` should clear `o->pie` (and ideally `-static` without `-pie` should default to non-PIE). Found while making a backtrace demo's addresses line up with `kit addr2line`. + +## ASan host build: some fatal diagnostics SEGV during `compiler_panic` recovery + +While reproducing the former x64 inline-asm fatal on the ASan host build, the +diagnostic printed, then `compiler_panicv`'s `longjmp` (`src/core/core.c:187`) +faulted in `_longjmp`, turning a user-facing `fatal:` into SIGABRT/SEGV. The +inline-asm trigger is fixed, but the panic recovery path still needs a focused +expected-fatal regression and an audit of panic save/restore lifetime under the +sanitized hosted driver. diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c @@ -4926,6 +4926,36 @@ static Reg aa_asm_native_mem_base(AANativeTarget* a, SrcLoc loc, NativeLoc src, return dst; } +static void aa_asm_load_loc_to_reg(AANativeTarget* a, SrcLoc loc, NativeLoc src, + NativeLoc dst) { + NativeTarget* t = &a->base; + NativeAllocClass cls = (NativeAllocClass)dst.cls; + if (src.kind == NATIVE_LOC_REG) { + if (src.v.reg != dst.v.reg || src.cls != dst.cls) t->move(t, dst, src); + return; + } + if (src.kind == NATIVE_LOC_IMM) { + if (cls != NATIVE_REG_INT) + aa_asm_panic_at(t->c, loc, + "floating-point immediate asm input is unsupported"); + t->load_imm(t, dst, src.v.imm); + return; + } + aa_emit_mem(a, 1, dst, aa_asm_loc_to_addr(a, loc, src), + aa_mem_for_type(t, dst.type, type_size32(t, dst.type))); +} + +static void aa_asm_store_reg_to_loc(AANativeTarget* a, SrcLoc loc, + NativeLoc dst, NativeLoc src) { + NativeTarget* t = &a->base; + if (dst.kind == NATIVE_LOC_REG) { + if (dst.v.reg != src.v.reg || dst.cls != src.cls) t->move(t, dst, src); + return; + } + aa_emit_mem(a, 0, src, aa_asm_loc_to_addr(a, loc, dst), + aa_mem_for_type(t, src.type, type_size32(t, src.type))); +} + static void aa_asm_bind_native(AANativeTarget* a, SrcLoc loc, Operand* out, const char* constraint, KitCgTypeId type, NativeLoc src, u32* ntmp) { @@ -4970,13 +5000,29 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl, SrcLoc loc = a->func ? a->func->loc : (SrcLoc){0, 0, 0}; Operand* bound_outs = nout ? arena_zarray(c->tu, Operand, nout) : NULL; Operand* bound_ins = nin ? arena_zarray(c->tu, Operand, nin) : NULL; + u8* staged_outs = nout ? arena_zarray(c->tu, u8, nout) : NULL; u32 ntmp = 0; AA64Asm* asmh; for (u32 i = 0; i < nout; ++i) { KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type; - aa_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, out_locs[i], - &ntmp); + NativeLoc outloc = out_locs[i]; + NativeAsmPinnedLoc pinned = + native_asm_prepare_pinned_loc(t, outs[i].reg, outs[i].str, type, outloc); + if (pinned.has_pin) { + if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK) + aa_asm_panic_at(c, loc, + native_asm_pin_status_message(pinned.pin_status)); + if (pinned.wrong_reg) + aa_asm_panic_at(c, loc, "hard-register asm operand in wrong register"); + outloc = pinned.loc; + if (pinned.needs_stage) { + staged_outs[i] = 1u; + if (outs[i].dir == KIT_CG_ASM_INOUT) + aa_asm_load_loc_to_reg(a, loc, out_locs[i], outloc); + } + } + aa_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, outloc, &ntmp); } for (u32 i = 0; i < nin; ++i) { const char* body = native_asm_constraint_body(ins[i].str); @@ -4993,16 +5039,29 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl, const char* in_body = native_asm_constraint_body(ins[i].str); NativeAsmConstraintInfo info; NativeLoc inloc = in_locs[i]; + NativeAsmPinnedLoc pinned = + native_asm_prepare_pinned_loc(t, ins[i].reg, ins[i].str, type, inloc); /* A register-constrained input whose value is an address-taken local * arrives in a frame slot: the optimizer cannot keep an address-taken * local live in a register across the block, so the "inputs are already * in registers" contract does not hold for it. Load it into a reserved - * scratch register (as the direct path does) before binding. Only - * unrestricted integer constraints can use this scratch; restricted - * register sets must already arrive in an allowed hard register. */ - if (native_asm_constraint_reg_info(t, ins[i].str, &info) && - info.cls == NATIVE_REG_INT && info.allowed_mask == 0 && - inloc.kind != NATIVE_LOC_REG) { + * scratch register (as the direct path does) before binding. With no + * hard pin, only unrestricted integer constraints can use this scratch; + * restricted register sets must already arrive in an allowed hard + * register. */ + if (pinned.has_pin) { + if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK) + aa_asm_panic_at(c, loc, + native_asm_pin_status_message(pinned.pin_status)); + if (pinned.wrong_reg) + aa_asm_panic_at(c, loc, + "hard-register asm operand in wrong register"); + inloc = pinned.loc; + if (pinned.needs_stage) + aa_asm_load_loc_to_reg(a, loc, in_locs[i], inloc); + } else if (native_asm_constraint_reg_info(t, ins[i].str, &info) && + info.cls == NATIVE_REG_INT && info.allowed_mask == 0 && + inloc.kind != NATIVE_LOC_REG) { Reg r; if (ntmp >= 2u) aa_asm_panic_at(c, loc, "too many memory asm operands"); r = (ntmp == 0u) ? AA_TMP0 : AA_TMP1; @@ -5024,6 +5083,17 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl, nclob); aa64_asm_run_template(asmh, t->mc, tmpl); aa64_asm_close(asmh); + + for (u32 i = 0; i < nout; ++i) { + NativeAllocClass cls; + NativeLoc src; + if (!staged_outs || !staged_outs[i]) continue; + if (bound_outs[i].kind != AA64_INLINE_OPK_REG) continue; + cls = bound_outs[i].pad[0] == AA64_INLINE_OPCLS_FP ? NATIVE_REG_FP + : NATIVE_REG_INT; + src = native_loc_reg(bound_outs[i].type, cls, (Reg)bound_outs[i].v.local); + aa_asm_store_reg_to_loc(a, loc, out_locs[i], src); + } } static const NativeOps aa_direct_ops = { diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c @@ -3827,8 +3827,22 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl, KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type; NativeLoc outloc = out_locs[i]; NativeAsmConstraintInfo info; - if (native_asm_constraint_reg_info(t, outs[i].str, &info) && - info.allowed_mask == 0 && outloc.kind != NATIVE_LOC_REG) { + NativeAsmPinnedLoc pinned = + native_asm_prepare_pinned_loc(t, outs[i].reg, outs[i].str, type, outloc); + if (pinned.has_pin) { + if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK) + rv_asm_panic_at(c, loc, + native_asm_pin_status_message(pinned.pin_status)); + if (pinned.wrong_reg) + rv_asm_panic_at(c, loc, "hard-register asm operand in wrong register"); + outloc = pinned.loc; + if (pinned.needs_stage) { + staged_outs[i] = 1u; + if (outs[i].dir == KIT_CG_ASM_INOUT) + rv_asm_load_loc_to_reg(a, loc, out_locs[i], outloc); + } + } else if (native_asm_constraint_reg_info(t, outs[i].str, &info) && + info.allowed_mask == 0 && outloc.kind != NATIVE_LOC_REG) { Reg r = rv_asm_stage_reg(a, loc, info.cls, &nstage_int, &nstage_fp); outloc = native_loc_reg(type, info.cls, r); staged_outs[i] = 1u; @@ -3852,8 +3866,20 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl, inloc = in_locs[i]; { NativeAsmConstraintInfo info; - if (native_asm_constraint_reg_info(t, ins[i].str, &info) && - info.allowed_mask == 0 && inloc.kind != NATIVE_LOC_REG) { + NativeAsmPinnedLoc pinned = + native_asm_prepare_pinned_loc(t, ins[i].reg, ins[i].str, type, inloc); + if (pinned.has_pin) { + if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK) + rv_asm_panic_at(c, loc, + native_asm_pin_status_message(pinned.pin_status)); + if (pinned.wrong_reg) + rv_asm_panic_at(c, loc, + "hard-register asm operand in wrong register"); + inloc = pinned.loc; + if (pinned.needs_stage) + rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc); + } else if (native_asm_constraint_reg_info(t, ins[i].str, &info) && + info.allowed_mask == 0 && inloc.kind != NATIVE_LOC_REG) { Reg r = rv_asm_stage_reg(a, loc, info.cls, &nstage_int, &nstage_fp); inloc = native_loc_reg(type, info.cls, r); rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc); diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c @@ -4014,6 +4014,36 @@ static Reg x64_asm_native_mem_base(X64NativeTarget* a, SrcLoc loc, return dst; } +static void x64_asm_load_loc_to_reg(X64NativeTarget* a, SrcLoc loc, + NativeLoc src, NativeLoc dst) { + NativeTarget* t = &a->base; + NativeAllocClass cls = (NativeAllocClass)dst.cls; + if (src.kind == NATIVE_LOC_REG) { + if (src.v.reg != dst.v.reg || src.cls != dst.cls) t->move(t, dst, src); + return; + } + if (src.kind == NATIVE_LOC_IMM) { + if (cls != NATIVE_REG_INT) + x64_asm_panic_at(t->c, loc, + "floating-point immediate asm input is unsupported"); + t->load_imm(t, dst, src.v.imm); + return; + } + x64_emit_mem(a, 1, dst, x64_asm_loc_to_addr(a, loc, src), + native_mem_for_type(t, dst.type, native_type_size(t, dst.type))); +} + +static void x64_asm_store_reg_to_loc(X64NativeTarget* a, SrcLoc loc, + NativeLoc dst, NativeLoc src) { + NativeTarget* t = &a->base; + if (dst.kind == NATIVE_LOC_REG) { + if (dst.v.reg != src.v.reg || dst.cls != src.cls) t->move(t, dst, src); + return; + } + x64_emit_mem(a, 0, src, x64_asm_loc_to_addr(a, loc, dst), + native_mem_for_type(t, src.type, native_type_size(t, src.type))); +} + static void x64_asm_bind_native(X64NativeTarget* a, SrcLoc loc, Operand* out, const char* constraint, KitCgTypeId type, NativeLoc src, u32* ntmp) { @@ -4057,12 +4087,29 @@ static void x64_asm_block_native(NativeTarget* t, const char* tmpl, SrcLoc loc = a->func ? a->func->loc : (SrcLoc){0, 0, 0}; Operand* bound_outs = nout ? arena_zarray(c->tu, Operand, nout) : NULL; Operand* bound_ins = nin ? arena_zarray(c->tu, Operand, nin) : NULL; + u8* staged_outs = nout ? arena_zarray(c->tu, u8, nout) : NULL; u32 ntmp = 0, i; X64Asm* asmh; for (i = 0; i < nout; ++i) { KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type; - x64_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, out_locs[i], + NativeLoc outloc = out_locs[i]; + NativeAsmPinnedLoc pinned = + native_asm_prepare_pinned_loc(t, outs[i].reg, outs[i].str, type, outloc); + if (pinned.has_pin) { + if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK) + x64_asm_panic_at(c, loc, + native_asm_pin_status_message(pinned.pin_status)); + if (pinned.wrong_reg) + x64_asm_panic_at(c, loc, "hard-register asm operand in wrong register"); + outloc = pinned.loc; + if (pinned.needs_stage) { + staged_outs[i] = 1u; + if (outs[i].dir == KIT_CG_ASM_INOUT) + x64_asm_load_loc_to_reg(a, loc, out_locs[i], outloc); + } + } + x64_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, outloc, &ntmp); } for (i = 0; i < nin; ++i) { @@ -4078,14 +4125,28 @@ static void x64_asm_block_native(NativeTarget* t, const char* tmpl, } type = ins[i].type ? ins[i].type : in_locs[i].type; inloc = in_locs[i]; - if ((body[0] == 'r') && inloc.kind != NATIVE_LOC_REG) { - Reg r; - if (ntmp >= 2u) x64_asm_panic_at(c, loc, "too many memory asm operands"); - r = (ntmp == 0u) ? (Reg)X64_TMP_INT : (Reg)X64_TMP_INT2; - ntmp++; - inloc = native_loc_reg(type, NATIVE_REG_INT, r); - x64_emit_mem(a, 1, inloc, x64_asm_loc_to_addr(a, loc, in_locs[i]), - native_mem_for_type(t, type, native_type_size(t, type))); + { + NativeAsmPinnedLoc pinned = + native_asm_prepare_pinned_loc(t, ins[i].reg, ins[i].str, type, inloc); + if (pinned.has_pin) { + if (pinned.pin_status != NATIVE_ASM_REG_PIN_OK) + x64_asm_panic_at(c, loc, + native_asm_pin_status_message(pinned.pin_status)); + if (pinned.wrong_reg) + x64_asm_panic_at(c, loc, + "hard-register asm operand in wrong register"); + inloc = pinned.loc; + if (pinned.needs_stage) + x64_asm_load_loc_to_reg(a, loc, in_locs[i], inloc); + } else if ((body[0] == 'r') && inloc.kind != NATIVE_LOC_REG) { + Reg r; + if (ntmp >= 2u) x64_asm_panic_at(c, loc, "too many memory asm operands"); + r = (ntmp == 0u) ? (Reg)X64_TMP_INT : (Reg)X64_TMP_INT2; + ntmp++; + inloc = native_loc_reg(type, NATIVE_REG_INT, r); + x64_emit_mem(a, 1, inloc, x64_asm_loc_to_addr(a, loc, in_locs[i]), + native_mem_for_type(t, type, native_type_size(t, type))); + } } x64_asm_bind_native(a, loc, &bound_ins[i], ins[i].str, type, inloc, &ntmp); } @@ -4098,6 +4159,17 @@ static void x64_asm_block_native(NativeTarget* t, const char* tmpl, nclob); x64_asm_run_template(asmh, t->mc, tmpl); x64_asm_close(asmh); + + for (i = 0; i < nout; ++i) { + NativeAllocClass cls; + NativeLoc src; + if (!staged_outs || !staged_outs[i]) continue; + if (bound_outs[i].kind != X64_INLINE_OPK_REG) continue; + cls = bound_outs[i].pad[0] == X64_INLINE_OPCLS_FP ? NATIVE_REG_FP + : NATIVE_REG_INT; + src = native_loc_reg(bound_outs[i].type, cls, (Reg)bound_outs[i].v.local); + x64_asm_store_reg_to_loc(a, loc, out_locs[i], src); + } } /* file_scope_asm + finalize are shared (cg/native_asm.h). */ diff --git a/src/cg/native_asm.c b/src/cg/native_asm.c @@ -161,6 +161,28 @@ NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg, return NATIVE_ASM_REG_PIN_OK; } +NativeAsmPinnedLoc native_asm_prepare_pinned_loc(NativeTarget* t, Sym reg, + const char* constraint, + KitCgTypeId type, + NativeLoc loc) { + NativeAsmPinnedLoc out; + NativeAsmRegPin pin; + memset(&out, 0, sizeof out); + out.loc = loc; + out.pin_status = native_asm_resolve_pin(t, reg, constraint, &pin); + if (out.pin_status == NATIVE_ASM_REG_PIN_ABSENT) return out; + out.has_pin = 1u; + if (out.pin_status != NATIVE_ASM_REG_PIN_OK) return out; + if (loc.kind != NATIVE_LOC_REG) { + out.loc = native_loc_reg(type, pin.cls, pin.reg); + out.needs_stage = 1u; + return out; + } + if ((Reg)loc.v.reg != pin.reg || (NativeAllocClass)loc.cls != pin.cls) + out.wrong_reg = 1u; + return out; +} + const char* native_asm_pin_status_message(NativeAsmRegPinStatus st) { switch (st) { case NATIVE_ASM_REG_PIN_ABSENT: diff --git a/src/cg/native_asm.h b/src/cg/native_asm.h @@ -65,6 +65,14 @@ typedef struct NativeAsmConstraintInfo { u32 allowed_mask; /* 0 means any valid register in cls. */ } NativeAsmConstraintInfo; +typedef struct NativeAsmPinnedLoc { + NativeLoc loc; + NativeAsmRegPinStatus pin_status; + u8 has_pin; + u8 needs_stage; + u8 wrong_reg; +} NativeAsmPinnedLoc; + int native_asm_constraint_reg_info(NativeTarget* t, const char* constraint, NativeAsmConstraintInfo* out); int native_asm_constraint_is_reg(NativeTarget* t, const char* constraint); @@ -76,6 +84,10 @@ int native_asm_constraint_is_reg(NativeTarget* t, const char* constraint); NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg, const char* constraint, NativeAsmRegPin* out); +NativeAsmPinnedLoc native_asm_prepare_pinned_loc(NativeTarget* t, Sym reg, + const char* constraint, + KitCgTypeId type, + NativeLoc loc); const char* native_asm_pin_status_message(NativeAsmRegPinStatus st); int native_asm_constraint_reg_class(const char* constraint, NativeAllocClass* cls_out); diff --git a/src/opt/pass_native_emit.c b/src/opt/pass_native_emit.c @@ -1414,13 +1414,18 @@ static void plan_frame(NativeEmitCtx* e, const CGFuncDesc* fd) { } else if ((IROp)in->op == IR_ASM_BLOCK) { /* Inline asm may clobber the return-address register or the red zone * opaquely; disqualifies the frame-eliding tiers (see has_asm). Its - * callee-saved register clobbers are equally opaque to the operand scan - * below; count them now so the backend can fold them into the saved - * set (collected into a single Sym list in a second pass below). */ + * callee-saved register clobbers and hard-register operand pins are + * equally opaque to the operand scan below; count them now so the + * backend can fold them into the saved set (collected into a single Sym + * list in a second pass below). */ IRAsmAux* aux = (IRAsmAux*)in->extra.aux; has_asm = 1; if (aux) { nasm_clob += aux->nclob; + for (u32 k = 0; k < aux->nout; ++k) + if (aux->outs[k].reg) ++nasm_clob; + for (u32 k = 0; k < aux->nin; ++k) + if (aux->ins[k].reg) ++nasm_clob; asm_clobber_abi_sets |= aux->clobber_abi_sets; } } else if ((IROp)in->op == IR_INTRINSIC) { @@ -1434,9 +1439,10 @@ static void plan_frame(NativeEmitCtx* e, const CGFuncDesc* fd) { } } } - /* Gather the union of every asm block's clobber names. The backend resolves - * them with its own clobber parser (machinize's resolve_name is unset on - * every backend, so aux->clobber_mask is unreliable here). */ + /* Gather the union of every asm block's clobber names and hard-register + * operand pins. The backend resolves them with its own clobber parser + * (machinize's resolve_name is unset on every backend, so aux->clobber_mask is + * unreliable here). */ if (nasm_clob) { u32 n = 0; asm_clobbers = arena_array(e->f->arena, Sym, nasm_clob); @@ -1449,6 +1455,10 @@ static void plan_frame(NativeEmitCtx* e, const CGFuncDesc* fd) { aux = (IRAsmAux*)in->extra.aux; for (u32 k = 0; aux && k < aux->nclob; ++k) asm_clobbers[n++] = aux->clobbers[k]; + for (u32 k = 0; aux && k < aux->nout; ++k) + if (aux->outs[k].reg) asm_clobbers[n++] = aux->outs[k].reg; + for (u32 k = 0; aux && k < aux->nin; ++k) + if (aux->ins[k].reg) asm_clobbers[n++] = aux->ins[k].reg; } } nasm_clob = n;