kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 44b5521e6ab0bdb348582ec7fe080e7ff6e36704
parent 69d47850d76ff863e4493092346bd41f5c1f6574
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  6 Jun 2026 03:34:19 -0700

Support arch inline asm machine constraints

Diffstat:
Mdoc/plan/TODO.md | 51+++++++++++++++++----------------------------------
Msrc/arch/aa64/native.c | 142+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Msrc/arch/native_target.h | 10++++++++++
Msrc/arch/riscv/native.c | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Msrc/arch/x64/native.c | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Msrc/cg/asm.c | 23+++++++++++++----------
Msrc/cg/cgtarget.h | 1+
Msrc/cg/ir_recorder.c | 9+++++++++
Msrc/cg/ir_recorder.h | 1+
Msrc/cg/native_asm.c | 65++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/cg/native_asm.h | 11+++++++++++
Msrc/cg/native_direct_target.c | 7+++++++
Msrc/opt/ir.h | 5+++++
Msrc/opt/opt.c | 7+++++++
Msrc/opt/pass_coalesce.c | 38+++++++++++++++++++++++++++++++++++---
Msrc/opt/pass_lower.c | 152+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Msrc/opt/pass_machinize.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Atest/parse/cases/cg_native_inline_asm_machine_constraints.c | 19+++++++++++++++++++
Atest/parse/cases/cg_native_inline_asm_machine_constraints.expected | 1+
Atest/parse/cases/cg_x64_inline_asm_machine_constraints.c | 17+++++++++++++++++
Atest/parse/cases/cg_x64_inline_asm_machine_constraints.expected | 1+
21 files changed, 692 insertions(+), 210 deletions(-)

diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md @@ -5,44 +5,27 @@ fixed, remove it instead of checking it off or keeping a closed entry. Add new deferred fixes below as they are discovered. -## Inline asm: kit rejects machine-specific register constraints (e.g. x86 `"=a"`) - -kit's inline-asm lowering only recognizes the **architecture-neutral** register -classes `r` (general), `f`/`x`/`w` (FP/SIMD), plus `i` (immediate input), `m` -(memory), and matching digits (`0`–`9`). Any GCC *machine* constraint letter — -most commonly x86's `"=a"`/`"a"` (the canonical `syscall`/`cpuid`/`rdtsc` idiom), -but also `b`/`c`/`d`/`S`/`D`/`q`/`Q`, etc. — is rejected with -`KitCg: unsupported asm output constraint` (or `… input constraint`). GCC accepts -`"=a"` on x86; kit does not. - -Root cause is target-independent and lives in the **CG layer**, not a backend: -`api_asm_is_reg_constraint` (`src/cg/asm.c:41`) returns true only for -`r`/`f`/`x`/`w`, and the output/input constraint handlers -(`src/cg/asm.c:195` / `:234`) `compiler_panic` on anything else. Because this is -the front-of-pipe lowering (before any arch backend, before any opt pass), the -rejection is identical on **every arch and every opt level**. Verified matrix -(x86_64/aarch64/riscv64 × O0/O1): `"=a"` output → all 6 fail with the same -message; the register-pinned workaround → all 6 compile. - -Workaround (what the rest of the tree uses, e.g. `test/link/harness/start.c` and -`test/rt/addr2line_prog.c`): pin to a hard register with a GNU local register -variable bound to a plain `r`/`+r` constraint, never the machine letter: +## x86-64 inline asm: `long` `"=r"` output stores as 4 bytes + +`KIT_TEST_ARCH=x64 KIT_OPT_LEVELS="0 1" KIT_PARSE_PARALLEL=0 +test/parse/run.sh cg_x64_inline_asm_mov R` fails in both O0 and O1 emit with +`[1]:4:11: fatal: KitCg: store value type/size mismatch: access size 4, value +size 8`. + +Minimal existing repro: ```c -/* NOT supported: __asm__("syscall" : "=a"(ret) : "a"(n) : ...); */ -register long rax __asm__("rax") = n; /* hard-register pin */ -__asm__ volatile("syscall" : "+r"(rax) : : "rcx", "r11", "memory"); -return rax; /* read result back from rax */ +int test_main(void) { + long out; + __asm__ volatile("movq %1, %0" : "=r"(out) : "r"(42)); + return out == 42 ? 42 : 1; +} ``` -The hard-register pin (`AsmConstraint.reg`) rides alongside the `r` operand and -selects the exact register, so this is fully general. A proper fix would map the -common machine constraint letters to their register class + a hard-register pin -(at least x86 `a`/`b`/`c`/`d`/`S`/`D` → `r`+pin) so stock GCC/Clang syscall and -cpuid snippets compile unchanged. Found writing the freestanding backtrace -round-trip program (`test/rt/addr2line_prog.c`), whose x86-64 `write` syscall -first used `"=a"` and had to be rewritten to the register-pinned form -(doc/plan/BACKTRACE.md, WS4). +The same failure masks new inline-asm constraint tests if they use `long` or +`long long` outputs. The constraint classification is now arch-specific; this +looks like the C frontend / CG store path is choosing a 4-byte output-lvalue +access for an 8-byte asm result. ## x86-64 inline asm: `-g -O1` + a 4-operand register idiom → `too many memory asm operands` (compiler abort) diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c @@ -43,6 +43,7 @@ #include "cg/type.h" #include "core/arena.h" #include "core/bytes.h" +#include "core/core.h" #include "core/pool.h" #include "core/slice.h" #include "obj/obj.h" @@ -3873,11 +3874,39 @@ static int aa_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls, return 0; } +static int aa_asm_constraint_reg(const NativeRegInfo* ri, const char* body, + NativeAllocClass* cls_out, Reg* fixed_out, + u32* allowed_mask_out) { + (void)ri; + if (!body || !body[0] || body[1]) return 0; + if (fixed_out) *fixed_out = REG_NONE; + if (allowed_mask_out) *allowed_mask_out = 0; + switch (body[0]) { + case 'r': + if (cls_out) *cls_out = NATIVE_REG_INT; + return 1; + case 'w': + if (cls_out) *cls_out = NATIVE_REG_FP; + return 1; + case 'x': + if (cls_out) *cls_out = NATIVE_REG_FP; + if (allowed_mask_out) *allowed_mask_out = 0x0000ffffu; /* v0..v15 */ + return 1; + case 'y': + if (cls_out) *cls_out = NATIVE_REG_FP; + if (allowed_mask_out) *allowed_mask_out = 0x000000ffu; /* v0..v7 */ + return 1; + default: + return 0; + } +} + static const NativeRegInfo aa_reg_info = { .classes = aa_classes, .nclasses = sizeof aa_classes / sizeof aa_classes[0], .resolve_name = aa_resolve_name, .asm_operand_reg_ok = aa_asm_operand_reg_ok, + .asm_constraint_reg = aa_asm_constraint_reg, }; static void aa_va_start_native(NativeTarget* t, NativeLoc ap_ptr); @@ -4514,7 +4543,8 @@ static void aa_asm_clobber_masks(Compiler* c, SrcLoc loc, const Sym* clobbers, } AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d, - NativeAllocClass cls, u32* used_int, + NativeAllocClass cls, + u32 allowed_mask, u32* used_int, u32* used_fp) { static const Reg int_pool[] = {0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 11u, 12u, 13u, 14u, 15u}; @@ -4527,6 +4557,7 @@ AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d, u32* used = cls == NATIVE_REG_FP ? used_fp : used_int; for (u32 i = 0; i < n; ++i) { Reg r = pool[i]; + if (allowed_mask && (allowed_mask & (1u << r)) == 0) continue; if ((*used & (1u << r)) != 0) continue; *used |= 1u << r; return r; @@ -4535,14 +4566,6 @@ AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d, return REG_NONE; } -AA_UNUSED_FN static NativeAllocClass aa_asm_constraint_class( - NativeDirectTarget* d, const char* body) { - if (body[0] == 'r') return NATIVE_REG_INT; - if (body[0] == 'w') return NATIVE_REG_FP; - aa_asm_panic(d, "constraint is not a register constraint"); - return NATIVE_REG_INT; -} - static int aa_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg, const char* constraint, NativeAsmRegPin* pin) { @@ -4708,17 +4731,29 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl, clob_int |= 1u << pin.reg; } aa_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg); - } else if (body[0] == 'r' || body[0] == 'w') { - NativeAllocClass cls = aa_asm_constraint_class(d, body); - Reg reg = aa_asm_alloc_reg(d, cls, &used_int, &used_fp); - KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type; - aa_asm_bound_reg(&bound_outs[i], type, cls, reg); - } else if (body[0] == 'm') { - Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp); - KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type; - aa_asm_bound_mem(&bound_outs[i], type, reg); } else { - aa_asm_panic(d, "unsupported output constraint"); + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(d->native, outs[i].str, &info)) { + Reg reg = info.fixed_reg != REG_NONE + ? info.fixed_reg + : aa_asm_alloc_reg(d, info.cls, info.allowed_mask, + &used_int, &used_fp); + KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type; + if (info.cls == NATIVE_REG_FP) { + used_fp |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg; + } else { + used_int |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg; + } + aa_asm_bound_reg(&bound_outs[i], type, info.cls, reg); + } else if (body[0] == 'm') { + Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp); + KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type; + aa_asm_bound_mem(&bound_outs[i], type, reg); + } else { + aa_asm_panic(d, "unsupported output constraint"); + } } } @@ -4747,21 +4782,33 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl, clob_int |= 1u << pin.reg; } aa_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg); - } else if (body[0] == 'r' || body[0] == 'w') { - NativeAllocClass cls = aa_asm_constraint_class(d, body); - Reg reg = aa_asm_alloc_reg(d, cls, &used_int, &used_fp); - KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type; - aa_asm_bound_reg(&bound_ins[i], type, cls, reg); - } else if (body[0] == 'i') { - if (in_ops[i].kind != OPK_IMM) - aa_asm_panic(d, "immediate constraint requires immediate operand"); - bound_ins[i] = in_ops[i]; - } else if (body[0] == 'm') { - Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp); - KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type; - aa_asm_bound_mem(&bound_ins[i], type, reg); } else { - aa_asm_panic(d, "unsupported input constraint"); + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(d->native, ins[i].str, &info)) { + Reg reg = info.fixed_reg != REG_NONE + ? info.fixed_reg + : aa_asm_alloc_reg(d, info.cls, info.allowed_mask, + &used_int, &used_fp); + KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type; + if (info.cls == NATIVE_REG_FP) { + used_fp |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg; + } else { + used_int |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg; + } + aa_asm_bound_reg(&bound_ins[i], type, info.cls, reg); + } else if (body[0] == 'i') { + if (in_ops[i].kind != OPK_IMM) + aa_asm_panic(d, "immediate constraint requires immediate operand"); + bound_ins[i] = in_ops[i]; + } else if (body[0] == 'm') { + Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp); + KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type; + aa_asm_bound_mem(&bound_ins[i], type, reg); + } else { + aa_asm_panic(d, "unsupported input constraint"); + } } } @@ -4883,11 +4930,22 @@ static void aa_asm_bind_native(AANativeTarget* a, SrcLoc loc, Operand* out, const char* constraint, KitCgTypeId type, NativeLoc src, u32* ntmp) { const char* body = native_asm_constraint_body(constraint); - if (body[0] == 'r' || body[0] == 'w') { - NativeAllocClass cls = (body[0] == 'w') ? NATIVE_REG_FP : NATIVE_REG_INT; + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(&a->base, constraint, &info)) { if (src.kind != NATIVE_LOC_REG) aa_asm_panic_at(a->base.c, loc, "register asm operand not in a register"); - aa_asm_bound_reg(out, type, cls, (Reg)src.v.reg); + if (info.fixed_reg != REG_NONE && info.fixed_reg != (Reg)src.v.reg) + aa_asm_panic_at(a->base.c, loc, + "fixed-register asm operand in wrong register"); + if (info.allowed_mask && + ((Reg)src.v.reg >= 32 || + (info.allowed_mask & (1u << (Reg)src.v.reg)) == 0)) + compiler_panic( + a->base.c, loc, + "aarch64 inline asm: constraint %s got cls%u reg%u outside %08x", + constraint, (unsigned)info.cls, (unsigned)src.v.reg, + (unsigned)info.allowed_mask); + aa_asm_bound_reg(out, type, info.cls, (Reg)src.v.reg); } else if (body[0] == 'i') { if (src.kind != NATIVE_LOC_IMM) aa_asm_panic_at(a->base.c, loc, "immediate asm operand is not immediate"); @@ -4933,15 +4991,18 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl, type = ins[i].type ? ins[i].type : in_locs[i].type; { const char* in_body = native_asm_constraint_body(ins[i].str); + NativeAsmConstraintInfo info; NativeLoc inloc = in_locs[i]; /* A register-constrained input whose value is an address-taken local * arrives in a frame slot: the optimizer cannot keep an address-taken * local live in a register across the block, so the "inputs are already * in registers" contract does not hold for it. Load it into a reserved - * scratch register (as the direct path does) before binding. Only the - * integer 'r' form is handled here — 'w' would need an FP scratch, which - * isn't reserved; an address-taken FP input still falls to the panic. */ - if (in_body[0] == 'r' && inloc.kind != NATIVE_LOC_REG) { + * scratch register (as the direct path does) before binding. Only + * unrestricted integer constraints can use this scratch; restricted + * register sets must already arrive in an allowed hard register. */ + if (native_asm_constraint_reg_info(t, ins[i].str, &info) && + info.cls == NATIVE_REG_INT && info.allowed_mask == 0 && + inloc.kind != NATIVE_LOC_REG) { Reg r; if (ntmp >= 2u) aa_asm_panic_at(c, loc, "too many memory asm operands"); r = (ntmp == 0u) ? AA_TMP0 : AA_TMP1; @@ -4950,6 +5011,7 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl, aa_emit_mem(a, 1, inloc, aa_asm_loc_to_addr(a, loc, in_locs[i]), aa_mem_for_type(t, type, type_size32(t, type))); } + (void)in_body; aa_asm_bind_native(a, loc, &bound_ins[i], ins[i].str, type, inloc, &ntmp); } } diff --git a/src/arch/native_target.h b/src/arch/native_target.h @@ -169,6 +169,16 @@ struct NativeRegInfo { * the assembler can name them. */ int (*asm_operand_reg_ok)(const NativeRegInfo*, NativeAllocClass cls, Reg reg); + /* Optional target-specific register-constraint parser for inline asm. The + * input is the constraint body after generic modifiers ('=', '+', '&') have + * been stripped. Return non-zero only for constraints that name a register + * class; set fixed_out to REG_NONE for a free class or to a physical register + * when the constraint hard-wires the operand (x86 "a" -> rax). Set + * allowed_mask_out to 0 for the whole class, or a physical-register bitmask + * when the constraint names a restricted class subset. */ + int (*asm_constraint_reg)(const NativeRegInfo*, const char* body, + NativeAllocClass* cls_out, Reg* fixed_out, + u32* allowed_mask_out); const char* (*debug_name)(const NativeRegInfo*, NativeAllocClass, Reg); u32 (*dwarf_reg)(const NativeRegInfo*, NativeAllocClass, Reg); }; diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c @@ -545,11 +545,40 @@ static int rv_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls, return 0; } +static int rv_asm_constraint_reg(const NativeRegInfo* ri, const char* body, + NativeAllocClass* cls_out, Reg* fixed_out, + u32* allowed_mask_out) { + (void)ri; + if (!body || !body[0]) return 0; + if (fixed_out) *fixed_out = REG_NONE; + if (allowed_mask_out) *allowed_mask_out = 0; + if (body[0] == 'r' && body[1] == '\0') { + if (cls_out) *cls_out = NATIVE_REG_INT; + return 1; + } + if (body[0] == 'f' && body[1] == '\0') { + if (cls_out) *cls_out = NATIVE_REG_FP; + return 1; + } + if (body[0] == 'c' && body[1] == 'r' && body[2] == '\0') { + if (cls_out) *cls_out = NATIVE_REG_INT; + if (allowed_mask_out) *allowed_mask_out = 0x0000ff00u; /* x8..x15 */ + return 1; + } + if (body[0] == 'c' && body[1] == 'f' && body[2] == '\0') { + if (cls_out) *cls_out = NATIVE_REG_FP; + if (allowed_mask_out) *allowed_mask_out = 0x0000ff00u; /* f8..f15 */ + return 1; + } + return 0; +} + static const NativeRegInfo rv_reg_info = { .classes = rv_classes, .nclasses = sizeof rv_classes / sizeof rv_classes[0], .resolve_name = rv_resolve_name, .asm_operand_reg_ok = rv_asm_operand_reg_ok, + .asm_constraint_reg = rv_asm_constraint_reg, }; /* ============================ legality ============================ */ @@ -3431,14 +3460,6 @@ static void rv_asm_clobber_masks(Compiler* c, SrcLoc loc, const Sym* clobbers, } } -static NativeAllocClass rv_asm_constraint_class(NativeDirectTarget* d, - const char* body) { - if (body[0] == 'r') return NATIVE_REG_INT; - if (body[0] == 'f') return NATIVE_REG_FP; - rv_asm_panic(d, "constraint is not a register constraint"); - return NATIVE_REG_INT; -} - static int rv_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg, const char* constraint, NativeAsmRegPin* pin) { @@ -3453,7 +3474,7 @@ static int rv_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg, /* Pick a free register from the arch's caller-saved allocable pools for an * asm operand the direct path must self-allocate. */ static Reg rv_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls, - u32* used_int, u32* used_fp) { + u32 allowed_mask, u32* used_int, u32* used_fp) { /* int: a0..a7 (10..17) then t-temps that aren't emit scratch. */ static const Reg int_pool[] = {10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u, 29u, 30u, 31u}; @@ -3467,6 +3488,7 @@ static Reg rv_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls, u32 i; for (i = 0; i < n; ++i) { Reg r = pool[i]; + if (allowed_mask && (allowed_mask & (1u << r)) == 0) continue; if ((*used & (1u << r)) != 0) continue; *used |= 1u << r; return r; @@ -3760,11 +3782,19 @@ static void rv_asm_bind_native(RvNativeTarget* a, SrcLoc loc, Operand* out, const char* constraint, KitCgTypeId type, NativeLoc src, u32* ntmp) { const char* body = native_asm_constraint_body(constraint); - if (body[0] == 'r' || body[0] == 'f') { - NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT; + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(&a->base, constraint, &info)) { if (src.kind != NATIVE_LOC_REG) rv_asm_panic_at(a->base.c, loc, "register asm operand not in a register"); - rv_asm_bound_reg(out, type, cls, (Reg)src.v.reg); + if (info.fixed_reg != REG_NONE && info.fixed_reg != (Reg)src.v.reg) + rv_asm_panic_at(a->base.c, loc, + "fixed-register asm operand in wrong register"); + if (info.allowed_mask && + ((Reg)src.v.reg >= 32 || + (info.allowed_mask & (1u << (Reg)src.v.reg)) == 0)) + rv_asm_panic_at(a->base.c, loc, + "register asm operand violates constraint register set"); + rv_asm_bound_reg(out, type, info.cls, (Reg)src.v.reg); } else if (body[0] == 'i') { if (src.kind != NATIVE_LOC_IMM) rv_asm_panic_at(a->base.c, loc, "immediate asm operand is not immediate"); @@ -3794,13 +3824,13 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl, Rv64Asm* asmh; for (i = 0; i < nout; ++i) { - const char* body = native_asm_constraint_body(outs[i].str); KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type; NativeLoc outloc = out_locs[i]; - if ((body[0] == 'r' || body[0] == 'f') && outloc.kind != NATIVE_LOC_REG) { - NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT; - Reg r = rv_asm_stage_reg(a, loc, cls, &nstage_int, &nstage_fp); - outloc = native_loc_reg(type, cls, r); + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(t, outs[i].str, &info) && + info.allowed_mask == 0 && outloc.kind != NATIVE_LOC_REG) { + Reg r = rv_asm_stage_reg(a, loc, info.cls, &nstage_int, &nstage_fp); + outloc = native_loc_reg(type, info.cls, r); staged_outs[i] = 1u; if (outs[i].dir == KIT_CG_ASM_INOUT) rv_asm_load_loc_to_reg(a, loc, out_locs[i], outloc); @@ -3820,11 +3850,14 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl, } type = ins[i].type ? ins[i].type : in_locs[i].type; inloc = in_locs[i]; - if ((body[0] == 'r' || body[0] == 'f') && inloc.kind != NATIVE_LOC_REG) { - NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT; - Reg r = rv_asm_stage_reg(a, loc, cls, &nstage_int, &nstage_fp); - inloc = native_loc_reg(type, cls, r); - rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc); + { + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(t, ins[i].str, &info) && + info.allowed_mask == 0 && inloc.kind != NATIVE_LOC_REG) { + Reg r = rv_asm_stage_reg(a, loc, info.cls, &nstage_int, &nstage_fp); + inloc = native_loc_reg(type, info.cls, r); + rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc); + } } rv_asm_bind_native(a, loc, &bound_ins[i], ins[i].str, type, inloc, &ntmp); } @@ -4122,15 +4155,27 @@ static void rv_direct_asm_block(NativeDirectTarget* d, const char* tmpl, clob_int |= 1u << pin.reg; } rv_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg); - } else if (body[0] == 'r' || body[0] == 'f') { - NativeAllocClass cls = rv_asm_constraint_class(d, body); - Reg reg = rv_asm_alloc_reg(d, cls, &used_int, &used_fp); - rv_asm_bound_reg(&bound_outs[i], type, cls, reg); - } else if (body[0] == 'm') { - Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp); - rv_asm_bound_mem(&bound_outs[i], type, reg); } else { - rv_asm_panic(d, "unsupported output constraint"); + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(d->native, outs[i].str, &info)) { + Reg reg = info.fixed_reg != REG_NONE + ? info.fixed_reg + : rv_asm_alloc_reg(d, info.cls, info.allowed_mask, + &used_int, &used_fp); + if (info.cls == NATIVE_REG_FP) { + used_fp |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg; + } else { + used_int |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg; + } + rv_asm_bound_reg(&bound_outs[i], type, info.cls, reg); + } else if (body[0] == 'm') { + Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp); + rv_asm_bound_mem(&bound_outs[i], type, reg); + } else { + rv_asm_panic(d, "unsupported output constraint"); + } } } @@ -4159,19 +4204,31 @@ static void rv_direct_asm_block(NativeDirectTarget* d, const char* tmpl, clob_int |= 1u << pin.reg; } rv_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg); - } else if (body[0] == 'r' || body[0] == 'f') { - NativeAllocClass cls = rv_asm_constraint_class(d, body); - Reg reg = rv_asm_alloc_reg(d, cls, &used_int, &used_fp); - rv_asm_bound_reg(&bound_ins[i], type, cls, reg); - } else if (body[0] == 'i') { - if (in_ops[i].kind != OPK_IMM) - rv_asm_panic(d, "immediate constraint requires immediate operand"); - bound_ins[i] = in_ops[i]; - } else if (body[0] == 'm') { - Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp); - rv_asm_bound_mem(&bound_ins[i], type, reg); } else { - rv_asm_panic(d, "unsupported input constraint"); + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(d->native, ins[i].str, &info)) { + Reg reg = info.fixed_reg != REG_NONE + ? info.fixed_reg + : rv_asm_alloc_reg(d, info.cls, info.allowed_mask, + &used_int, &used_fp); + if (info.cls == NATIVE_REG_FP) { + used_fp |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg; + } else { + used_int |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg; + } + rv_asm_bound_reg(&bound_ins[i], type, info.cls, reg); + } else if (body[0] == 'i') { + if (in_ops[i].kind != OPK_IMM) + rv_asm_panic(d, "immediate constraint requires immediate operand"); + bound_ins[i] = in_ops[i]; + } else if (body[0] == 'm') { + Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp); + rv_asm_bound_mem(&bound_ins[i], type, reg); + } else { + rv_asm_panic(d, "unsupported input constraint"); + } } } diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c @@ -391,11 +391,57 @@ static int x64_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls, return 0; } +static int x64_asm_constraint_reg(const NativeRegInfo* ri, const char* body, + NativeAllocClass* cls_out, Reg* fixed_out, + u32* allowed_mask_out) { + (void)ri; + if (!body || !body[0] || body[1]) return 0; + if (fixed_out) *fixed_out = REG_NONE; + if (allowed_mask_out) *allowed_mask_out = 0; + switch (body[0]) { + case 'r': + case 'q': + if (cls_out) *cls_out = NATIVE_REG_INT; + return 1; + case 'a': + if (cls_out) *cls_out = NATIVE_REG_INT; + if (fixed_out) *fixed_out = X64_RAX; + return 1; + case 'b': + if (cls_out) *cls_out = NATIVE_REG_INT; + if (fixed_out) *fixed_out = X64_RBX; + return 1; + case 'c': + if (cls_out) *cls_out = NATIVE_REG_INT; + if (fixed_out) *fixed_out = X64_RCX; + return 1; + case 'd': + if (cls_out) *cls_out = NATIVE_REG_INT; + if (fixed_out) *fixed_out = X64_RDX; + return 1; + case 'S': + if (cls_out) *cls_out = NATIVE_REG_INT; + if (fixed_out) *fixed_out = X64_RSI; + return 1; + case 'D': + if (cls_out) *cls_out = NATIVE_REG_INT; + if (fixed_out) *fixed_out = X64_RDI; + return 1; + case 'x': + case 'v': + if (cls_out) *cls_out = NATIVE_REG_FP; + return 1; + default: + return 0; + } +} + static const NativeRegInfo x64_reg_info = { .classes = x64_classes, .nclasses = sizeof x64_classes / sizeof x64_classes[0], .resolve_name = x64_resolve_name, .asm_operand_reg_ok = x64_asm_operand_reg_ok, + .asm_constraint_reg = x64_asm_constraint_reg, }; /* ============================ legality ============================ */ @@ -3717,16 +3763,6 @@ static void x64_asm_clobber_masks(Compiler* c, SrcLoc loc, const Sym* clobbers, } } -static NativeAllocClass x64_asm_constraint_class(NativeDirectTarget* d, - const char* body) { - if (body[0] == 'r' || body[0] == 'q' || body[0] == 'a' || body[0] == 'b' || - body[0] == 'c' || body[0] == 'd' || body[0] == 'S' || body[0] == 'D') - return NATIVE_REG_INT; - if (body[0] == 'x' || body[0] == 'v') return NATIVE_REG_FP; - x64_asm_panic(d, "constraint is not a register constraint"); - return NATIVE_REG_INT; -} - static int x64_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg, const char* constraint, NativeAsmRegPin* pin) { @@ -3741,7 +3777,7 @@ static int x64_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg, /* Pick a free register from caller-saved allocable pools for an asm operand the * direct path self-allocates. */ static Reg x64_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls, - u32* used_int, u32* used_fp) { + u32 allowed_mask, u32* used_int, u32* used_fp) { static const Reg int_pool[] = {X64_RDI, X64_RSI, X64_RDX, X64_RCX, X64_R8, X64_R9}; static const Reg fp_pool[] = { @@ -3754,6 +3790,7 @@ static Reg x64_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls, u32 i; for (i = 0; i < n; ++i) { Reg r = pool[i]; + if (allowed_mask && (allowed_mask & (1u << r)) == 0) continue; if ((*used & (1u << r)) != 0) continue; *used |= 1u << r; return r; @@ -3981,12 +4018,20 @@ static void x64_asm_bind_native(X64NativeTarget* a, SrcLoc loc, Operand* out, const char* constraint, KitCgTypeId type, NativeLoc src, u32* ntmp) { const char* body = native_asm_constraint_body(constraint); - if (body[0] == 'r' || body[0] == 'x') { - NativeAllocClass cls = (body[0] == 'x') ? NATIVE_REG_FP : NATIVE_REG_INT; + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(&a->base, constraint, &info)) { if (src.kind != NATIVE_LOC_REG) x64_asm_panic_at(a->base.c, loc, "register asm operand not in a register"); - x64_asm_bound_reg(out, type, cls, (Reg)src.v.reg); + if (info.fixed_reg != REG_NONE && info.fixed_reg != (Reg)src.v.reg) + x64_asm_panic_at(a->base.c, loc, + "fixed-register asm operand in wrong register"); + if (info.allowed_mask && + ((Reg)src.v.reg >= 32 || + (info.allowed_mask & (1u << (Reg)src.v.reg)) == 0)) + x64_asm_panic_at(a->base.c, loc, + "register asm operand violates constraint register set"); + x64_asm_bound_reg(out, type, info.cls, (Reg)src.v.reg); } else if (body[0] == 'i') { if (src.kind != NATIVE_LOC_IMM) x64_asm_panic_at(a->base.c, loc, @@ -4375,15 +4420,27 @@ static void x64_direct_asm_block(NativeDirectTarget* d, const char* tmpl, clob_int |= 1u << pin.reg; } x64_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg); - } else if (body[0] == 'r' || body[0] == 'x') { - NativeAllocClass cls = x64_asm_constraint_class(d, body); - Reg reg = x64_asm_alloc_reg(d, cls, &used_int, &used_fp); - x64_asm_bound_reg(&bound_outs[i], type, cls, reg); - } else if (body[0] == 'm') { - Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp); - x64_asm_bound_mem(&bound_outs[i], type, reg); } else { - x64_asm_panic(d, "unsupported output constraint"); + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(d->native, outs[i].str, &info)) { + Reg reg = info.fixed_reg != REG_NONE + ? info.fixed_reg + : x64_asm_alloc_reg(d, info.cls, info.allowed_mask, + &used_int, &used_fp); + if (info.cls == NATIVE_REG_FP) { + used_fp |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg; + } else { + used_int |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg; + } + x64_asm_bound_reg(&bound_outs[i], type, info.cls, reg); + } else if (body[0] == 'm') { + Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp); + x64_asm_bound_mem(&bound_outs[i], type, reg); + } else { + x64_asm_panic(d, "unsupported output constraint"); + } } } @@ -4412,19 +4469,31 @@ static void x64_direct_asm_block(NativeDirectTarget* d, const char* tmpl, clob_int |= 1u << pin.reg; } x64_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg); - } else if (body[0] == 'r' || body[0] == 'x') { - NativeAllocClass cls = x64_asm_constraint_class(d, body); - Reg reg = x64_asm_alloc_reg(d, cls, &used_int, &used_fp); - x64_asm_bound_reg(&bound_ins[i], type, cls, reg); - } else if (body[0] == 'i') { - if (in_ops[i].kind != OPK_IMM) - x64_asm_panic(d, "immediate constraint requires immediate operand"); - bound_ins[i] = in_ops[i]; - } else if (body[0] == 'm') { - Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp); - x64_asm_bound_mem(&bound_ins[i], type, reg); } else { - x64_asm_panic(d, "unsupported input constraint"); + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(d->native, ins[i].str, &info)) { + Reg reg = info.fixed_reg != REG_NONE + ? info.fixed_reg + : x64_asm_alloc_reg(d, info.cls, info.allowed_mask, + &used_int, &used_fp); + if (info.cls == NATIVE_REG_FP) { + used_fp |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg; + } else { + used_int |= 1u << reg; + if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg; + } + x64_asm_bound_reg(&bound_ins[i], type, info.cls, reg); + } else if (body[0] == 'i') { + if (in_ops[i].kind != OPK_IMM) + x64_asm_panic(d, "immediate constraint requires immediate operand"); + bound_ins[i] = in_ops[i]; + } else if (body[0] == 'm') { + Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp); + x64_asm_bound_mem(&bound_ins[i], type, reg); + } else { + x64_asm_panic(d, "unsupported input constraint"); + } } } diff --git a/src/cg/asm.c b/src/cg/asm.c @@ -42,6 +42,14 @@ int api_asm_is_reg_constraint(char c) { return c == 'r' || c == 'f' || c == 'x' || c == 'w'; } +static int api_asm_constraint_is_reg(KitCg* g, const char* constraint) { + const char* body = api_asm_constraint_body(constraint); + if (api_asm_is_reg_constraint(body[0])) return 1; + if (g && g->target && g->target->asm_is_reg_constraint) + return g->target->asm_is_reg_constraint(g->target, constraint); + return 0; +} + /* A register ('r'/'f'/'x'/'w') asm operand must live in a single hardware * register. A 64-bit scalar on a 32-bit target does not fit one: it would need * a register pair, which this inline-asm lowering does not model, so binding it @@ -119,8 +127,7 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) { outs[i].reg = (Sym)outputs[i].reg; outs[i].dir = (u8)outputs[i].dir; if (!outs[i].type) outs[i].type = fallback_ty; - if (outs[i].reg && - !api_asm_is_reg_constraint(api_asm_constraint_body(outs[i].str)[0])) { + if (outs[i].reg && !api_asm_constraint_is_reg(g, outs[i].str)) { compiler_panic(g->c, g->cur_loc, "KitCg: asm hard-register output requires a register " "constraint"); @@ -159,8 +166,7 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) { ins[i].reg = (Sym)inputs[i].reg; ins[i].dir = (u8)inputs[i].dir; if (!ins[i].type) ins[i].type = fallback_ty; - if (ins[i].reg && - !api_asm_is_reg_constraint(api_asm_constraint_body(ins[i].str)[0])) { + if (ins[i].reg && !api_asm_constraint_is_reg(g, ins[i].str)) { compiler_panic(g->c, g->cur_loc, "KitCg: asm hard-register input requires a register " "constraint"); @@ -187,12 +193,11 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) { } for (u32 i = 0; i < noutputs; ++i) { - const char* body = api_asm_constraint_body(outs[i].str); if (api_asm_is_early_clobber(outs[i].str)) continue; /* A register constraint binds to a temp local; the local's type selects the * register class (integer vs FP), so the backend hook places an FP-class * output (riscv 'f', x86 'x', aarch64 'w') in an FP register. */ - if (api_asm_is_reg_constraint(body[0])) { + if (api_asm_constraint_is_reg(g, outs[i].str)) { KitCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty; CGLocal r; api_asm_reject_wide_reg(g, oty); @@ -231,7 +236,7 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) { T->copy(T, bound, src); } in_ops[i] = bound; - } else if (api_asm_is_reg_constraint(s[0])) { + } else if (api_asm_constraint_is_reg(g, s)) { api_asm_reject_wide_reg(g, ity); in_ops[i] = api_force_local(g, &in_svs[i], ity); } else if (s[0] == 'i') { @@ -261,12 +266,10 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) { } for (u32 i = 0; i < noutputs; ++i) { - const char* body; KitCgTypeId oty; CGLocal r; if (!api_asm_is_early_clobber(outs[i].str)) continue; - body = api_asm_constraint_body(outs[i].str); - if (!api_asm_is_reg_constraint(body[0])) { + if (!api_asm_constraint_is_reg(g, outs[i].str)) { compiler_panic(g->c, g->cur_loc, "KitCg: unsupported early-clobber asm output"); continue; diff --git a/src/cg/cgtarget.h b/src/cg/cgtarget.h @@ -765,6 +765,7 @@ struct CgTarget { * "=&r" early-clobber outputs must be allocated disjoint from any input. * opt_cgtarget records this as a single IR_ASM_BLOCK; the wrapped target * receives the same call at lowering time with materialized operands. */ + int (*asm_is_reg_constraint)(CgTarget*, const char* constraint); void (*asm_block)(CgTarget*, const char* tmpl, const AsmConstraint* outs, u32 nout, Operand* out_ops, const AsmConstraint* ins, u32 nin, const Operand* in_ops, const Sym* clobbers, diff --git a/src/cg/ir_recorder.c b/src/cg/ir_recorder.c @@ -15,6 +15,7 @@ struct CgIrRecorder { const char* (*data_label_addr_unsupported_msg)(void*); const char* (*tail_call_unrealizable_reason)(void*, const CGFuncDesc*, const CGCallDesc*); + int (*asm_is_reg_constraint)(void*, const char*); void* user; }; @@ -552,6 +553,12 @@ static void rec_intrinsic(CgTarget* t, IntrinKind kind, Operand* dsts, u32 ndst, in->extra.aux = aux; } +static int rec_asm_is_reg_constraint(CgTarget* t, const char* constraint) { + CgIrRecorder* r = rec_of(t); + if (!r->asm_is_reg_constraint) return 0; + return r->asm_is_reg_constraint(r->user, constraint); +} + static void rec_asm_block(CgTarget* t, const char* tmpl, const AsmConstraint* outs, u32 nout, Operand* out_ops, const AsmConstraint* ins, u32 nin, @@ -616,6 +623,7 @@ CgTarget* cg_ir_recorder_new(Compiler* c, ObjBuilder* obj, r->local_static_data_begin = cfg->local_static_data_begin; r->data_label_addr_unsupported_msg = cfg->data_label_addr_unsupported_msg; r->tail_call_unrealizable_reason = cfg->tail_call_unrealizable_reason; + r->asm_is_reg_constraint = cfg->asm_is_reg_constraint; r->user = cfg->user; } @@ -670,6 +678,7 @@ CgTarget* cg_ir_recorder_new(Compiler* c, ObjBuilder* obj, r->base.atomic_cas = rec_atomic_cas; r->base.fence = rec_fence; r->base.intrinsic = rec_intrinsic; + r->base.asm_is_reg_constraint = rec_asm_is_reg_constraint; r->base.asm_block = rec_asm_block; r->base.file_scope_asm = rec_file_scope_asm; r->base.set_loc = rec_set_loc; diff --git a/src/cg/ir_recorder.h b/src/cg/ir_recorder.h @@ -14,6 +14,7 @@ typedef struct CgIrRecorderConfig { const char* (*tail_call_unrealizable_reason)(void* user, const CGFuncDesc* caller, const CGCallDesc* call); + int (*asm_is_reg_constraint)(void* user, const char* constraint); void* user; } CgIrRecorderConfig; diff --git a/src/cg/native_asm.c b/src/cg/native_asm.c @@ -76,12 +76,65 @@ static int native_asm_default_operand_reg_ok(const NativeRegInfo* ri, return 0; } +int native_asm_constraint_reg_info(NativeTarget* t, const char* constraint, + NativeAsmConstraintInfo* out) { + NativeAsmConstraintInfo info; + const char* body = native_asm_constraint_body(constraint); + memset(&info, 0, sizeof info); + info.fixed_reg = REG_NONE; + if (!body || !body[0]) return 0; + if (t && t->regs && t->regs->asm_constraint_reg) { + Reg fixed = REG_NONE; + NativeAllocClass cls = NATIVE_REG_INT; + u32 allowed_mask = 0; + if (t->regs->asm_constraint_reg(t->regs, body, &cls, &fixed, + &allowed_mask)) { + if (allowed_mask) { + u32 filtered = 0; + for (Reg r = 0; r < 32; ++r) { + if ((allowed_mask & (1u << r)) == 0) continue; + if (t->regs->asm_operand_reg_ok) { + if (!t->regs->asm_operand_reg_ok(t->regs, cls, r)) continue; + } else if (!native_asm_default_operand_reg_ok(t->regs, cls, r)) { + continue; + } + filtered |= 1u << r; + } + allowed_mask = filtered; + if (!allowed_mask) return 0; + } + if (fixed != REG_NONE) { + if (t->regs->asm_operand_reg_ok) { + if (!t->regs->asm_operand_reg_ok(t->regs, cls, fixed)) return 0; + } else if (!native_asm_default_operand_reg_ok(t->regs, cls, fixed)) { + return 0; + } + if (fixed >= 32) return 0; + if (allowed_mask && (allowed_mask & (1u << fixed)) == 0) return 0; + } + info.cls = cls; + info.fixed_reg = fixed; + info.allowed_mask = allowed_mask; + if (out) *out = info; + return 1; + } + return 0; + } + if (!native_asm_constraint_reg_class(constraint, &info.cls)) return 0; + if (out) *out = info; + return 1; +} + +int native_asm_constraint_is_reg(NativeTarget* t, const char* constraint) { + return native_asm_constraint_reg_info(t, constraint, NULL); +} + NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg, const char* constraint, NativeAsmRegPin* out) { Reg r; NativeAllocClass cls; - NativeAllocClass want; + NativeAsmConstraintInfo info; if (!reg) return NATIVE_ASM_REG_PIN_ABSENT; if (!t || !t->regs || !t->regs->resolve_name) return NATIVE_ASM_REG_PIN_UNKNOWN; @@ -94,9 +147,13 @@ NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg, } else if (!native_asm_default_operand_reg_ok(t->regs, cls, r)) { return NATIVE_ASM_REG_PIN_FORBIDDEN; } - if (!native_asm_constraint_reg_class(constraint, &want)) + if (!native_asm_constraint_reg_info(t, constraint, &info)) return NATIVE_ASM_REG_PIN_BAD_CONSTRAINT; - if (want != cls) return NATIVE_ASM_REG_PIN_CLASS_MISMATCH; + if (info.cls != cls) return NATIVE_ASM_REG_PIN_CLASS_MISMATCH; + if (info.fixed_reg != REG_NONE && info.fixed_reg != r) + return NATIVE_ASM_REG_PIN_FIXED_MISMATCH; + if (info.allowed_mask && (r >= 32 || (info.allowed_mask & (1u << r)) == 0)) + return NATIVE_ASM_REG_PIN_FIXED_MISMATCH; if (out) { out->reg = r; out->cls = cls; @@ -118,6 +175,8 @@ const char* native_asm_pin_status_message(NativeAsmRegPinStatus st) { return "asm register variable requires a register constraint"; case NATIVE_ASM_REG_PIN_CLASS_MISMATCH: return "asm register variable class does not match its constraint"; + case NATIVE_ASM_REG_PIN_FIXED_MISMATCH: + return "asm register variable conflicts with register constraint"; } return "invalid asm register variable"; } diff --git a/src/cg/native_asm.h b/src/cg/native_asm.h @@ -51,6 +51,7 @@ typedef enum NativeAsmRegPinStatus { NATIVE_ASM_REG_PIN_FORBIDDEN = -2, NATIVE_ASM_REG_PIN_BAD_CONSTRAINT = -3, NATIVE_ASM_REG_PIN_CLASS_MISMATCH = -4, + NATIVE_ASM_REG_PIN_FIXED_MISMATCH = -5, } NativeAsmRegPinStatus; typedef struct NativeAsmRegPin { @@ -58,6 +59,16 @@ typedef struct NativeAsmRegPin { NativeAllocClass cls; } NativeAsmRegPin; +typedef struct NativeAsmConstraintInfo { + NativeAllocClass cls; + Reg fixed_reg; /* REG_NONE when the constraint only names a register class. */ + u32 allowed_mask; /* 0 means any valid register in cls. */ +} NativeAsmConstraintInfo; + +int native_asm_constraint_reg_info(NativeTarget* t, const char* constraint, + NativeAsmConstraintInfo* out); +int native_asm_constraint_is_reg(NativeTarget* t, const char* constraint); + /* Resolve and validate an inline-asm operand's explicit hard-register pin * (AsmConstraint.reg, from a GNU local register variable). Distinguishes no pin * from invalid pins, and verifies that the operand uses a register constraint diff --git a/src/cg/native_direct_target.c b/src/cg/native_direct_target.c @@ -16,6 +16,7 @@ #include <string.h> #include "abi/abi.h" +#include "cg/native_asm.h" #include "cg/type.h" #include "core/arena.h" #include "core/pool.h" @@ -1873,6 +1874,11 @@ static void nd_asm_block(CgTarget* t, const char* tmpl, nd_panic(d, "target does not emit inline asm"); } +static int nd_asm_is_reg_constraint(CgTarget* t, const char* constraint) { + NativeDirectTarget* d = nd_of(t); + return native_asm_constraint_is_reg(d->native, constraint); +} + static void nd_file_scope_asm(CgTarget* t, const char* src, size_t len) { NativeDirectTarget* d = nd_of(t); ND_REQUIRE_NATIVE(d, file_scope_asm, "target does not emit file-scope asm"); @@ -1972,6 +1978,7 @@ CgTarget* native_direct_target_new(Compiler* c, ObjBuilder* obj, d->base.atomic_cas = nd_atomic_cas; d->base.fence = nd_fence; d->base.intrinsic = nd_intrinsic; + d->base.asm_is_reg_constraint = nd_asm_is_reg_constraint; d->base.asm_block = nd_asm_block; d->base.file_scope_asm = nd_file_scope_asm; d->base.set_loc = nd_set_loc; diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -448,6 +448,10 @@ typedef struct IRAsmAux { i32* in_fixed_regs; /* nin, -1 when unconstrained */ u8* out_fixed_cls; /* RegClass, parallel to out_fixed_regs */ u8* in_fixed_cls; /* RegClass, parallel to in_fixed_regs */ + u32* out_allowed_masks; /* nout, 0 when the whole class is allowed */ + u32* in_allowed_masks; /* nin, 0 when the whole class is allowed */ + u8* out_allowed_cls; /* RegClass, parallel to out_allowed_masks */ + u8* in_allowed_cls; /* RegClass, parallel to in_allowed_masks */ } IRAsmAux; typedef struct IRIntrinAux { @@ -610,6 +614,7 @@ typedef struct OptPRegInfo { i8 preferred_hard_reg; /* soft hint for allocator; -1 = no hint */ u8 pad[1]; u32 forbidden_hard_regs; /* bit r means PReg may not allocate hard reg r. */ + u32 allowed_hard_regs; /* 0 means unrestricted; otherwise positive mask. */ /* Subset of forbidden_hard_regs that comes from a fixed-register machine * clobber (an instruction live across this value destroys reg r — see * Func.inst_clobbers). Unlike soft forbids, the return-register hint must not diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -4,6 +4,7 @@ #include "abi/abi.h" #include "cg/ir.h" #include "cg/ir_recorder.h" +#include "cg/native_asm.h" #include "cg/native_direct_target.h" #include "cg/type.h" #include "core/arena.h" @@ -893,6 +894,11 @@ static const char* opt_on_tail_call_unrealizable_reason( return NULL; } +static int opt_on_asm_is_reg_constraint(void* user, const char* constraint) { + OptImpl* o = (OptImpl*)user; + return native_asm_constraint_is_reg(o ? o->native : NULL, constraint); +} + CgTarget* opt_cgtarget_new(Compiler* c, CgTarget* target, int level) { if (!target) compiler_panic(c, (SrcLoc){0, 0, 0}, "opt_cgtarget_new: target is NULL"); @@ -918,6 +924,7 @@ CgTarget* opt_cgtarget_new(Compiler* c, CgTarget* target, int level) { cfg.destroy = opt_on_destroy; cfg.local_static_data_begin = opt_on_local_static_data_begin; cfg.tail_call_unrealizable_reason = opt_on_tail_call_unrealizable_reason; + cfg.asm_is_reg_constraint = opt_on_asm_is_reg_constraint; cfg.user = o; return cg_ir_recorder_new(c, target->obj, &cfg); } diff --git a/src/opt/pass_coalesce.c b/src/opt/pass_coalesce.c @@ -148,10 +148,31 @@ static int group_conflicts(const CoalesceCtx* c, PReg ra, PReg rb, PReg allow_a, return 0; } -static int hard_reg_possible(Func* f, u8 cls, u32 forbidden) { +static int hard_reg_possible(Func* f, u8 cls, u32 forbidden, u32 allowed) { for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) { Reg r = f->opt_hard_regs[cls][i]; - if (r < 32 && (forbidden & (1u << r)) == 0) return 1; + if (r >= 32) continue; + if (allowed && (allowed & (1u << r)) == 0) continue; + if ((forbidden & (1u << r)) == 0) return 1; + } + if (allowed) { + for (Reg r = 0; r < 32; ++r) { + if ((allowed & (1u << r)) == 0) continue; + if (forbidden & (1u << r)) continue; + int in_hard = 0; + for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) { + if (f->opt_hard_regs[cls][i] == r) { + in_hard = 1; + break; + } + } + if (in_hard) continue; + for (u32 i = 0; i < f->opt_phys_reg_count[cls]; ++i) { + const CGPhysRegInfo* pi = &f->opt_phys_regs[cls][i]; + if (pi->reg == r && (pi->flags & CG_REG_RESERVED) == 0) return 1; + } + } + return 0; } return f->opt_hard_reg_count[cls] == 0; } @@ -163,19 +184,30 @@ static int group_constraints_compatible(const CoalesceCtx* c, PReg ra, KitCgTypeId type = opt_reg_type(f, ra); i32 tied = -1; u32 forbidden = 0; + u32 allowed = 0; for (PReg v = 1; v < opt_reg_count(f); ++v) { PReg r = coalesce_find(f, v); if (r != ra && r != rb) continue; if (opt_reg_cls(f, v) != cls || opt_reg_type(f, v) != type) return 0; const OptPRegInfo* vi = &f->preg_info[v]; forbidden |= vi->forbidden_hard_regs; + if (vi->allowed_hard_regs) { + if (allowed) { + allowed &= vi->allowed_hard_regs; + if (!allowed) return 0; + } else { + allowed = vi->allowed_hard_regs; + } + } if (vi->tied_hard_reg >= 0) { if (tied >= 0 && tied != vi->tied_hard_reg) return 0; tied = vi->tied_hard_reg; } } if (tied >= 0 && tied < 32 && (forbidden & (1u << (Reg)tied))) return 0; - return hard_reg_possible(f, cls, forbidden); + if (tied >= 0 && tied < 32 && allowed && (allowed & (1u << (Reg)tied)) == 0) + return 0; + return hard_reg_possible(f, cls, forbidden, allowed); } static void coalesce_union(Func* f, PReg a, PReg b) { diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -175,6 +175,33 @@ static void apply_fixed_asm_operand(Func* f, Operand* op, i32 fixed, f->preg_info[v].tied_hard_reg = fixed; } +static void apply_allowed_asm_operand(Func* f, Operand* op, u32 allowed, + u8 allowed_cls) { + u32 hard_mask = 0; + if (!op || op->kind != OPK_REG || !allowed) return; + PReg v = (PReg)op->v.reg; + if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return; + if (allowed_cls >= OPT_REG_CLASSES || opt_reg_cls(f, v) != allowed_cls) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, "opt asm: allowed register class mismatch"); + } + for (u32 i = 0; i < f->opt_hard_reg_count[allowed_cls]; ++i) { + Reg r = f->opt_hard_regs[allowed_cls][i]; + if (r < 32) hard_mask |= 1u << r; + } + if (f->preg_info[v].allowed_hard_regs) { + u32 both = f->preg_info[v].allowed_hard_regs & allowed; + if (!both) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, "opt asm: conflicting allowed register sets"); + } + f->preg_info[v].allowed_hard_regs = both; + } else { + f->preg_info[v].allowed_hard_regs = allowed; + } + f->preg_info[v].forbidden_hard_regs |= hard_mask & ~allowed; +} + static void apply_asm_register_constraints(Func* f, Inst* in, u64* use, u64* def, u64* live_after) { IRAsmAux* aux = (IRAsmAux*)in->extra.aux; @@ -183,11 +210,17 @@ static void apply_asm_register_constraints(Func* f, Inst* in, u64* use, for (u32 i = 0; i < aux->nout; ++i) { i32 fixed = aux->out_fixed_regs ? aux->out_fixed_regs[i] : -1; u8 cls = aux->out_fixed_cls ? aux->out_fixed_cls[i] : 0; + u32 allowed = aux->out_allowed_masks ? aux->out_allowed_masks[i] : 0; + u8 allowed_cls = aux->out_allowed_cls ? aux->out_allowed_cls[i] : 0; + apply_allowed_asm_operand(f, &aux->out_ops[i], allowed, allowed_cls); apply_fixed_asm_operand(f, &aux->out_ops[i], fixed, cls); } for (u32 i = 0; i < aux->nin; ++i) { i32 fixed = aux->in_fixed_regs ? aux->in_fixed_regs[i] : -1; u8 cls = aux->in_fixed_cls ? aux->in_fixed_cls[i] : 0; + u32 allowed = aux->in_allowed_masks ? aux->in_allowed_masks[i] : 0; + u8 allowed_cls = aux->in_allowed_cls ? aux->in_allowed_cls[i] : 0; + apply_allowed_asm_operand(f, &aux->in_ops[i], allowed, allowed_cls); apply_fixed_asm_operand(f, &aux->in_ops[i], fixed, cls); } @@ -569,6 +602,7 @@ typedef struct OptAllocGroupInfo { u32 last; i32 tied_hard_reg; u32 forbidden_hard_regs; + u32 allowed_hard_regs; u8 cls; u8 pad[3]; } OptAllocGroupInfo; @@ -732,6 +766,19 @@ static void alloc_group_info(Func* f, const OptLiveRangeSet* ranges, PReg root, if (first < out->first) out->first = first; if (vi->last_pos > out->last) out->last = vi->last_pos; out->forbidden_hard_regs |= vi->forbidden_hard_regs; + if (vi->allowed_hard_regs) { + if (out->allowed_hard_regs) { + u32 both = out->allowed_hard_regs & vi->allowed_hard_regs; + if (!both) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, + "opt asm: conflicting allowed register sets"); + } + out->allowed_hard_regs = both; + } else { + out->allowed_hard_regs = vi->allowed_hard_regs; + } + } if (vi->tied_hard_reg >= 0) out->tied_hard_reg = vi->tied_hard_reg; } if (out->first == (u32)~0u) out->first = 0; @@ -745,6 +792,7 @@ static void opt_init_preg_info_from_ranges(Func* f, for (PReg v = 0; v < opt_reg_count(f); ++v) { i32 tied = old ? old[v].tied_hard_reg : -1; u32 forbidden = old ? old[v].forbidden_hard_regs : 0; + u32 allowed = old ? old[v].allowed_hard_regs : 0; u32 clobbered = old ? old[v].clobbered_hard_regs : 0; u32 old_frequency = old ? old[v].frequency : 0; i8 pref = old ? old[v].preferred_hard_reg : (i8)-1; @@ -756,6 +804,7 @@ static void opt_init_preg_info_from_ranges(Func* f, vi->alloc_kind = OPT_ALLOC_NONE; vi->cls = opt_reg_cls(f, v); vi->forbidden_hard_regs = forbidden; + vi->allowed_hard_regs = allowed; vi->clobbered_hard_regs = clobbered; if (!ranges || v == PREG_NONE || v == 0 || ranges->first_range_by_preg[v] == OPT_RANGE_NONE) { @@ -1059,6 +1108,15 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges, "opt regalloc: fixed hard reg %u is clobbered", (unsigned)fixed); } + if (fixed >= 32 || + (gi.allowed_hard_regs && + (gi.allowed_hard_regs & (1u << fixed)) == 0)) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, + "opt regalloc: fixed hard reg %u violates asm " + "constraint", + (unsigned)fixed); + } u32 bit = hard_loc_bit(cls, fixed); if (fixed >= 32 || alloc_group_conflicts_bit(a, bit)) { SrcLoc loc = {0, 0, 0}; @@ -1075,6 +1133,8 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges, for (u32 r = 0; r < f->opt_hard_reg_count[cls]; ++r) { Reg hr = f->opt_hard_regs[cls][r]; if (hr >= 32) continue; + if (gi.allowed_hard_regs && (gi.allowed_hard_regs & (1u << hr)) == 0) + continue; if (gi.forbidden_hard_regs & (1u << hr)) continue; u32 bit = hard_loc_bit(cls, hr); if (alloc_group_conflicts_bit(a, bit)) continue; @@ -1085,6 +1145,24 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges, best_score = score; } } + if (gi.allowed_hard_regs) { + for (Reg hr = 0; hr < 32; ++hr) { + const CGPhysRegInfo* pi; + if ((gi.allowed_hard_regs & (1u << hr)) == 0) continue; + if (hard_available(f, cls, hr)) continue; + if (gi.forbidden_hard_regs & (1u << hr)) continue; + pi = phys_info_for(f, cls, hr); + if (!pi || (pi->flags & CG_REG_RESERVED)) continue; + u32 bit = hard_loc_bit(cls, hr); + if (alloc_group_conflicts_bit(a, bit)) continue; + u32 score = hard_reg_alloc_score(f, a, vi, hr); + if (!found || score < best_score) { + found = 1; + best = hr; + best_score = score; + } + } + } /* Also consider the preferred hard reg if it's outside the standard * allocable set (e.g. x0 on aa64: reserved as the ABI ret reg, not in * aa_int_allocable). Used by apply_abi_aliasing_hints to let an IR_CALL @@ -1102,49 +1180,55 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges, is_caller_saved(f, cls, (Reg)vi->preferred_hard_reg))) { Reg hint = (Reg)vi->preferred_hard_reg; int already_tried = 0; - for (u32 r = 0; r < f->opt_hard_reg_count[cls]; ++r) { - if (f->opt_hard_regs[cls][r] == hint) { - already_tried = 1; - break; + if (hint < 32 && + (!gi.allowed_hard_regs || (gi.allowed_hard_regs & (1u << hint)))) { + for (u32 r = 0; r < f->opt_hard_reg_count[cls]; ++r) { + if (f->opt_hard_regs[cls][r] == hint) { + already_tried = 1; + break; + } } - } - if (!already_tried && hint < 32 && - !(gi.forbidden_hard_regs & (1u << hint))) { - u32 bit = hard_loc_bit(cls, hint); - int hint_safe = !alloc_group_conflicts_bit(a, bit); - /* The bitmap conflict can be falsely positive when an - * already-assigned PReg ends exactly where v begins — the - * swap-friendly pattern like `sub x0, x21, x0`, where the previous - * call's result occupies x0 and the sub both reads it and writes - * the new value. Fall back to a precise per-PReg interference check - * that allows the unit-length overlap (same rule used by - * opt_coalesce_ranges for moves). */ - if (!hint_safe) { - int real_conflict = 0; - for (PReg u = 1; u < opt_reg_count(f); ++u) { - if (u == v) continue; - const OptPRegInfo* ui = &f->preg_info[u]; - if (ui->alloc_kind != OPT_ALLOC_HARD) continue; - if (ui->hard_reg != hint) continue; - if (opt_ranges_overlap_kind(ranges, u, v) >= 2) { - real_conflict = 1; - break; + if (!already_tried && !(gi.forbidden_hard_regs & (1u << hint))) { + u32 bit = hard_loc_bit(cls, hint); + int hint_safe = !alloc_group_conflicts_bit(a, bit); + /* The bitmap conflict can be falsely positive when an + * already-assigned PReg ends exactly where v begins — the + * swap-friendly pattern like `sub x0, x21, x0`, where the previous + * call's result occupies x0 and the sub both reads it and writes + * the new value. Fall back to a precise per-PReg interference check + * that allows the unit-length overlap (same rule used by + * opt_coalesce_ranges for moves). */ + if (!hint_safe) { + int real_conflict = 0; + for (PReg u = 1; u < opt_reg_count(f); ++u) { + if (u == v) continue; + const OptPRegInfo* ui = &f->preg_info[u]; + if (ui->alloc_kind != OPT_ALLOC_HARD) continue; + if (ui->hard_reg != hint) continue; + if (opt_ranges_overlap_kind(ranges, u, v) >= 2) { + real_conflict = 1; + break; + } } + if (!real_conflict) hint_safe = 1; } - if (!real_conflict) hint_safe = 1; - } - if (hint_safe) { - u32 score = hard_reg_alloc_score(f, a, vi, hint); - if (!found || score < best_score) { - found = 1; - best = hint; - best_score = score; + if (hint_safe) { + u32 score = hard_reg_alloc_score(f, a, vi, hint); + if (!found || score < best_score) { + found = 1; + best = hint; + best_score = score; + } } } } } if (found) { alloc_assign_group_hard(f, a, ranges, v, best); + } else if (gi.allowed_hard_regs) { + SrcLoc loc = {0, 0, 0}; + compiler_panic(f->c, loc, + "opt regalloc: no hard register satisfies asm constraint"); } else { alloc_assign_group_stack(f, a, ranges, v); } diff --git a/src/opt/pass_machinize.c b/src/opt/pass_machinize.c @@ -22,12 +22,30 @@ static void asm_prepare_constraints(Func* f, NativeTarget* target, if (aux->nout && !aux->out_fixed_regs) { aux->out_fixed_regs = arena_array(f->arena, i32, aux->nout); aux->out_fixed_cls = arena_zarray(f->arena, u8, aux->nout); - for (u32 i = 0; i < aux->nout; ++i) aux->out_fixed_regs[i] = -1; } if (aux->nin && !aux->in_fixed_regs) { aux->in_fixed_regs = arena_array(f->arena, i32, aux->nin); aux->in_fixed_cls = arena_zarray(f->arena, u8, aux->nin); - for (u32 i = 0; i < aux->nin; ++i) aux->in_fixed_regs[i] = -1; + } + if (aux->nout && !aux->out_allowed_masks) { + aux->out_allowed_masks = arena_zarray(f->arena, u32, aux->nout); + aux->out_allowed_cls = arena_zarray(f->arena, u8, aux->nout); + } + if (aux->nin && !aux->in_allowed_masks) { + aux->in_allowed_masks = arena_zarray(f->arena, u32, aux->nin); + aux->in_allowed_cls = arena_zarray(f->arena, u8, aux->nin); + } + for (u32 i = 0; i < aux->nout; ++i) { + aux->out_fixed_regs[i] = -1; + aux->out_fixed_cls[i] = 0; + aux->out_allowed_masks[i] = 0; + aux->out_allowed_cls[i] = 0; + } + for (u32 i = 0; i < aux->nin; ++i) { + aux->in_fixed_regs[i] = -1; + aux->in_fixed_cls[i] = 0; + aux->in_allowed_masks[i] = 0; + aux->in_allowed_cls[i] = 0; } for (u32 i = 0; i < aux->nclob; ++i) { Reg r; @@ -40,25 +58,51 @@ static void asm_prepare_constraints(Func* f, NativeTarget* target, NativeAsmRegPin pin; NativeAsmRegPinStatus st = native_asm_resolve_pin(target, aux->outs[i].reg, aux->outs[i].str, &pin); - if (st == NATIVE_ASM_REG_PIN_ABSENT) continue; - if (st != NATIVE_ASM_REG_PIN_OK) { + if (st == NATIVE_ASM_REG_PIN_OK) { + aux->out_fixed_regs[i] = (i32)pin.reg; + aux->out_fixed_cls[i] = (u8)pin.cls; + continue; + } + if (st != NATIVE_ASM_REG_PIN_ABSENT) { compiler_panic(f->c, (SrcLoc){0, 0, 0}, "opt asm: %s", native_asm_pin_status_message(st)); } - aux->out_fixed_regs[i] = (i32)pin.reg; - aux->out_fixed_cls[i] = (u8)pin.cls; + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(target, aux->outs[i].str, &info)) { + if (info.allowed_mask) { + aux->out_allowed_masks[i] = info.allowed_mask; + aux->out_allowed_cls[i] = (u8)info.cls; + } + if (info.fixed_reg != REG_NONE) { + aux->out_fixed_regs[i] = (i32)info.fixed_reg; + aux->out_fixed_cls[i] = (u8)info.cls; + } + } } for (u32 i = 0; i < aux->nin; ++i) { NativeAsmRegPin pin; NativeAsmRegPinStatus st = native_asm_resolve_pin(target, aux->ins[i].reg, aux->ins[i].str, &pin); - if (st == NATIVE_ASM_REG_PIN_ABSENT) continue; - if (st != NATIVE_ASM_REG_PIN_OK) { + if (st == NATIVE_ASM_REG_PIN_OK) { + aux->in_fixed_regs[i] = (i32)pin.reg; + aux->in_fixed_cls[i] = (u8)pin.cls; + continue; + } + if (st != NATIVE_ASM_REG_PIN_ABSENT) { compiler_panic(f->c, (SrcLoc){0, 0, 0}, "opt asm: %s", native_asm_pin_status_message(st)); } - aux->in_fixed_regs[i] = (i32)pin.reg; - aux->in_fixed_cls[i] = (u8)pin.cls; + NativeAsmConstraintInfo info; + if (native_asm_constraint_reg_info(target, aux->ins[i].str, &info)) { + if (info.allowed_mask) { + aux->in_allowed_masks[i] = info.allowed_mask; + aux->in_allowed_cls[i] = (u8)info.cls; + } + if (info.fixed_reg != REG_NONE) { + aux->in_fixed_regs[i] = (i32)info.fixed_reg; + aux->in_fixed_cls[i] = (u8)info.cls; + } + } } } diff --git a/test/parse/cases/cg_native_inline_asm_machine_constraints.c b/test/parse/cases/cg_native_inline_asm_machine_constraints.c @@ -0,0 +1,19 @@ +int test_main(void) { +#if defined(__aarch64__) + double x = 1.0; + double y = 2.0; + __asm__ volatile("" : : "x"(x), "y"(y)); + return 42; +#elif defined(__riscv) && __riscv_xlen == 64 + int a = 20; + int b = 22; + int out = 0; + float fa = 1.0f; + float fb = 2.0f; + __asm__ volatile("add %0, %1, %2" : "=cr"(out) : "cr"(a), "cr"(b)); + __asm__ volatile("" : : "cf"(fa), "cf"(fb)); + return out == 42 ? 42 : 1; +#else + return 42; +#endif +} diff --git a/test/parse/cases/cg_native_inline_asm_machine_constraints.expected b/test/parse/cases/cg_native_inline_asm_machine_constraints.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/cg_x64_inline_asm_machine_constraints.c b/test/parse/cases/cg_x64_inline_asm_machine_constraints.c @@ -0,0 +1,17 @@ +int test_main(void) { +#if defined(__x86_64__) + int aout = 0; + int bout = 0; + + __asm__ volatile("movl %1, %0\n\taddl %2, %0" + : "=a"(aout) + : "c"(20), "d"(22)); + __asm__ volatile("movl %1, %0\n\taddl %2, %0" + : "=b"(bout) + : "S"(17), "D"(25)); + + return (aout == 42 && bout == 42) ? 42 : 1; +#else + return 42; +#endif +} diff --git a/test/parse/cases/cg_x64_inline_asm_machine_constraints.expected b/test/parse/cases/cg_x64_inline_asm_machine_constraints.expected @@ -0,0 +1 @@ +42