commit 44b5521e6ab0bdb348582ec7fe080e7ff6e36704
parent 69d47850d76ff863e4493092346bd41f5c1f6574
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 6 Jun 2026 03:34:19 -0700
Support arch inline asm machine constraints
Diffstat:
21 files changed, 692 insertions(+), 210 deletions(-)
diff --git a/doc/plan/TODO.md b/doc/plan/TODO.md
@@ -5,44 +5,27 @@ fixed, remove it instead of checking it off or keeping a closed entry.
Add new deferred fixes below as they are discovered.
-## Inline asm: kit rejects machine-specific register constraints (e.g. x86 `"=a"`)
-
-kit's inline-asm lowering only recognizes the **architecture-neutral** register
-classes `r` (general), `f`/`x`/`w` (FP/SIMD), plus `i` (immediate input), `m`
-(memory), and matching digits (`0`–`9`). Any GCC *machine* constraint letter —
-most commonly x86's `"=a"`/`"a"` (the canonical `syscall`/`cpuid`/`rdtsc` idiom),
-but also `b`/`c`/`d`/`S`/`D`/`q`/`Q`, etc. — is rejected with
-`KitCg: unsupported asm output constraint` (or `… input constraint`). GCC accepts
-`"=a"` on x86; kit does not.
-
-Root cause is target-independent and lives in the **CG layer**, not a backend:
-`api_asm_is_reg_constraint` (`src/cg/asm.c:41`) returns true only for
-`r`/`f`/`x`/`w`, and the output/input constraint handlers
-(`src/cg/asm.c:195` / `:234`) `compiler_panic` on anything else. Because this is
-the front-of-pipe lowering (before any arch backend, before any opt pass), the
-rejection is identical on **every arch and every opt level**. Verified matrix
-(x86_64/aarch64/riscv64 × O0/O1): `"=a"` output → all 6 fail with the same
-message; the register-pinned workaround → all 6 compile.
-
-Workaround (what the rest of the tree uses, e.g. `test/link/harness/start.c` and
-`test/rt/addr2line_prog.c`): pin to a hard register with a GNU local register
-variable bound to a plain `r`/`+r` constraint, never the machine letter:
+## x86-64 inline asm: `long` `"=r"` output stores as 4 bytes
+
+`KIT_TEST_ARCH=x64 KIT_OPT_LEVELS="0 1" KIT_PARSE_PARALLEL=0
+test/parse/run.sh cg_x64_inline_asm_mov R` fails in both O0 and O1 emit with
+`[1]:4:11: fatal: KitCg: store value type/size mismatch: access size 4, value
+size 8`.
+
+Minimal existing repro:
```c
-/* NOT supported: __asm__("syscall" : "=a"(ret) : "a"(n) : ...); */
-register long rax __asm__("rax") = n; /* hard-register pin */
-__asm__ volatile("syscall" : "+r"(rax) : : "rcx", "r11", "memory");
-return rax; /* read result back from rax */
+int test_main(void) {
+ long out;
+ __asm__ volatile("movq %1, %0" : "=r"(out) : "r"(42));
+ return out == 42 ? 42 : 1;
+}
```
-The hard-register pin (`AsmConstraint.reg`) rides alongside the `r` operand and
-selects the exact register, so this is fully general. A proper fix would map the
-common machine constraint letters to their register class + a hard-register pin
-(at least x86 `a`/`b`/`c`/`d`/`S`/`D` → `r`+pin) so stock GCC/Clang syscall and
-cpuid snippets compile unchanged. Found writing the freestanding backtrace
-round-trip program (`test/rt/addr2line_prog.c`), whose x86-64 `write` syscall
-first used `"=a"` and had to be rewritten to the register-pinned form
-(doc/plan/BACKTRACE.md, WS4).
+The same failure masks new inline-asm constraint tests if they use `long` or
+`long long` outputs. The constraint classification is now arch-specific; this
+looks like the C frontend / CG store path is choosing a 4-byte output-lvalue
+access for an 8-byte asm result.
## x86-64 inline asm: `-g -O1` + a 4-operand register idiom → `too many memory asm operands` (compiler abort)
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -43,6 +43,7 @@
#include "cg/type.h"
#include "core/arena.h"
#include "core/bytes.h"
+#include "core/core.h"
#include "core/pool.h"
#include "core/slice.h"
#include "obj/obj.h"
@@ -3873,11 +3874,39 @@ static int aa_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls,
return 0;
}
+static int aa_asm_constraint_reg(const NativeRegInfo* ri, const char* body,
+ NativeAllocClass* cls_out, Reg* fixed_out,
+ u32* allowed_mask_out) {
+ (void)ri;
+ if (!body || !body[0] || body[1]) return 0;
+ if (fixed_out) *fixed_out = REG_NONE;
+ if (allowed_mask_out) *allowed_mask_out = 0;
+ switch (body[0]) {
+ case 'r':
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ return 1;
+ case 'w':
+ if (cls_out) *cls_out = NATIVE_REG_FP;
+ return 1;
+ case 'x':
+ if (cls_out) *cls_out = NATIVE_REG_FP;
+ if (allowed_mask_out) *allowed_mask_out = 0x0000ffffu; /* v0..v15 */
+ return 1;
+ case 'y':
+ if (cls_out) *cls_out = NATIVE_REG_FP;
+ if (allowed_mask_out) *allowed_mask_out = 0x000000ffu; /* v0..v7 */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static const NativeRegInfo aa_reg_info = {
.classes = aa_classes,
.nclasses = sizeof aa_classes / sizeof aa_classes[0],
.resolve_name = aa_resolve_name,
.asm_operand_reg_ok = aa_asm_operand_reg_ok,
+ .asm_constraint_reg = aa_asm_constraint_reg,
};
static void aa_va_start_native(NativeTarget* t, NativeLoc ap_ptr);
@@ -4514,7 +4543,8 @@ static void aa_asm_clobber_masks(Compiler* c, SrcLoc loc, const Sym* clobbers,
}
AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d,
- NativeAllocClass cls, u32* used_int,
+ NativeAllocClass cls,
+ u32 allowed_mask, u32* used_int,
u32* used_fp) {
static const Reg int_pool[] = {0u, 1u, 2u, 3u, 4u, 5u, 6u,
7u, 8u, 11u, 12u, 13u, 14u, 15u};
@@ -4527,6 +4557,7 @@ AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d,
u32* used = cls == NATIVE_REG_FP ? used_fp : used_int;
for (u32 i = 0; i < n; ++i) {
Reg r = pool[i];
+ if (allowed_mask && (allowed_mask & (1u << r)) == 0) continue;
if ((*used & (1u << r)) != 0) continue;
*used |= 1u << r;
return r;
@@ -4535,14 +4566,6 @@ AA_UNUSED_FN static Reg aa_asm_alloc_reg(NativeDirectTarget* d,
return REG_NONE;
}
-AA_UNUSED_FN static NativeAllocClass aa_asm_constraint_class(
- NativeDirectTarget* d, const char* body) {
- if (body[0] == 'r') return NATIVE_REG_INT;
- if (body[0] == 'w') return NATIVE_REG_FP;
- aa_asm_panic(d, "constraint is not a register constraint");
- return NATIVE_REG_INT;
-}
-
static int aa_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg,
const char* constraint,
NativeAsmRegPin* pin) {
@@ -4708,17 +4731,29 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
clob_int |= 1u << pin.reg;
}
aa_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg);
- } else if (body[0] == 'r' || body[0] == 'w') {
- NativeAllocClass cls = aa_asm_constraint_class(d, body);
- Reg reg = aa_asm_alloc_reg(d, cls, &used_int, &used_fp);
- KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
- aa_asm_bound_reg(&bound_outs[i], type, cls, reg);
- } else if (body[0] == 'm') {
- Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
- KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
- aa_asm_bound_mem(&bound_outs[i], type, reg);
} else {
- aa_asm_panic(d, "unsupported output constraint");
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(d->native, outs[i].str, &info)) {
+ Reg reg = info.fixed_reg != REG_NONE
+ ? info.fixed_reg
+ : aa_asm_alloc_reg(d, info.cls, info.allowed_mask,
+ &used_int, &used_fp);
+ KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
+ if (info.cls == NATIVE_REG_FP) {
+ used_fp |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg;
+ } else {
+ used_int |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg;
+ }
+ aa_asm_bound_reg(&bound_outs[i], type, info.cls, reg);
+ } else if (body[0] == 'm') {
+ Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp);
+ KitCgTypeId type = outs[i].type ? outs[i].type : out_ops[i].type;
+ aa_asm_bound_mem(&bound_outs[i], type, reg);
+ } else {
+ aa_asm_panic(d, "unsupported output constraint");
+ }
}
}
@@ -4747,21 +4782,33 @@ static void aa_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
clob_int |= 1u << pin.reg;
}
aa_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg);
- } else if (body[0] == 'r' || body[0] == 'w') {
- NativeAllocClass cls = aa_asm_constraint_class(d, body);
- Reg reg = aa_asm_alloc_reg(d, cls, &used_int, &used_fp);
- KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
- aa_asm_bound_reg(&bound_ins[i], type, cls, reg);
- } else if (body[0] == 'i') {
- if (in_ops[i].kind != OPK_IMM)
- aa_asm_panic(d, "immediate constraint requires immediate operand");
- bound_ins[i] = in_ops[i];
- } else if (body[0] == 'm') {
- Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
- KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
- aa_asm_bound_mem(&bound_ins[i], type, reg);
} else {
- aa_asm_panic(d, "unsupported input constraint");
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(d->native, ins[i].str, &info)) {
+ Reg reg = info.fixed_reg != REG_NONE
+ ? info.fixed_reg
+ : aa_asm_alloc_reg(d, info.cls, info.allowed_mask,
+ &used_int, &used_fp);
+ KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
+ if (info.cls == NATIVE_REG_FP) {
+ used_fp |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg;
+ } else {
+ used_int |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg;
+ }
+ aa_asm_bound_reg(&bound_ins[i], type, info.cls, reg);
+ } else if (body[0] == 'i') {
+ if (in_ops[i].kind != OPK_IMM)
+ aa_asm_panic(d, "immediate constraint requires immediate operand");
+ bound_ins[i] = in_ops[i];
+ } else if (body[0] == 'm') {
+ Reg reg = aa_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp);
+ KitCgTypeId type = ins[i].type ? ins[i].type : in_ops[i].type;
+ aa_asm_bound_mem(&bound_ins[i], type, reg);
+ } else {
+ aa_asm_panic(d, "unsupported input constraint");
+ }
}
}
@@ -4883,11 +4930,22 @@ static void aa_asm_bind_native(AANativeTarget* a, SrcLoc loc, Operand* out,
const char* constraint, KitCgTypeId type,
NativeLoc src, u32* ntmp) {
const char* body = native_asm_constraint_body(constraint);
- if (body[0] == 'r' || body[0] == 'w') {
- NativeAllocClass cls = (body[0] == 'w') ? NATIVE_REG_FP : NATIVE_REG_INT;
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(&a->base, constraint, &info)) {
if (src.kind != NATIVE_LOC_REG)
aa_asm_panic_at(a->base.c, loc, "register asm operand not in a register");
- aa_asm_bound_reg(out, type, cls, (Reg)src.v.reg);
+ if (info.fixed_reg != REG_NONE && info.fixed_reg != (Reg)src.v.reg)
+ aa_asm_panic_at(a->base.c, loc,
+ "fixed-register asm operand in wrong register");
+ if (info.allowed_mask &&
+ ((Reg)src.v.reg >= 32 ||
+ (info.allowed_mask & (1u << (Reg)src.v.reg)) == 0))
+ compiler_panic(
+ a->base.c, loc,
+ "aarch64 inline asm: constraint %s got cls%u reg%u outside %08x",
+ constraint, (unsigned)info.cls, (unsigned)src.v.reg,
+ (unsigned)info.allowed_mask);
+ aa_asm_bound_reg(out, type, info.cls, (Reg)src.v.reg);
} else if (body[0] == 'i') {
if (src.kind != NATIVE_LOC_IMM)
aa_asm_panic_at(a->base.c, loc, "immediate asm operand is not immediate");
@@ -4933,15 +4991,18 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl,
type = ins[i].type ? ins[i].type : in_locs[i].type;
{
const char* in_body = native_asm_constraint_body(ins[i].str);
+ NativeAsmConstraintInfo info;
NativeLoc inloc = in_locs[i];
/* A register-constrained input whose value is an address-taken local
* arrives in a frame slot: the optimizer cannot keep an address-taken
* local live in a register across the block, so the "inputs are already
* in registers" contract does not hold for it. Load it into a reserved
- * scratch register (as the direct path does) before binding. Only the
- * integer 'r' form is handled here — 'w' would need an FP scratch, which
- * isn't reserved; an address-taken FP input still falls to the panic. */
- if (in_body[0] == 'r' && inloc.kind != NATIVE_LOC_REG) {
+ * scratch register (as the direct path does) before binding. Only
+ * unrestricted integer constraints can use this scratch; restricted
+ * register sets must already arrive in an allowed hard register. */
+ if (native_asm_constraint_reg_info(t, ins[i].str, &info) &&
+ info.cls == NATIVE_REG_INT && info.allowed_mask == 0 &&
+ inloc.kind != NATIVE_LOC_REG) {
Reg r;
if (ntmp >= 2u) aa_asm_panic_at(c, loc, "too many memory asm operands");
r = (ntmp == 0u) ? AA_TMP0 : AA_TMP1;
@@ -4950,6 +5011,7 @@ static void aa_asm_block_native(NativeTarget* t, const char* tmpl,
aa_emit_mem(a, 1, inloc, aa_asm_loc_to_addr(a, loc, in_locs[i]),
aa_mem_for_type(t, type, type_size32(t, type)));
}
+ (void)in_body;
aa_asm_bind_native(a, loc, &bound_ins[i], ins[i].str, type, inloc, &ntmp);
}
}
diff --git a/src/arch/native_target.h b/src/arch/native_target.h
@@ -169,6 +169,16 @@ struct NativeRegInfo {
* the assembler can name them. */
int (*asm_operand_reg_ok)(const NativeRegInfo*, NativeAllocClass cls,
Reg reg);
+ /* Optional target-specific register-constraint parser for inline asm. The
+ * input is the constraint body after generic modifiers ('=', '+', '&') have
+ * been stripped. Return non-zero only for constraints that name a register
+ * class; set fixed_out to REG_NONE for a free class or to a physical register
+ * when the constraint hard-wires the operand (x86 "a" -> rax). Set
+ * allowed_mask_out to 0 for the whole class, or a physical-register bitmask
+ * when the constraint names a restricted class subset. */
+ int (*asm_constraint_reg)(const NativeRegInfo*, const char* body,
+ NativeAllocClass* cls_out, Reg* fixed_out,
+ u32* allowed_mask_out);
const char* (*debug_name)(const NativeRegInfo*, NativeAllocClass, Reg);
u32 (*dwarf_reg)(const NativeRegInfo*, NativeAllocClass, Reg);
};
diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c
@@ -545,11 +545,40 @@ static int rv_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls,
return 0;
}
+static int rv_asm_constraint_reg(const NativeRegInfo* ri, const char* body,
+ NativeAllocClass* cls_out, Reg* fixed_out,
+ u32* allowed_mask_out) {
+ (void)ri;
+ if (!body || !body[0]) return 0;
+ if (fixed_out) *fixed_out = REG_NONE;
+ if (allowed_mask_out) *allowed_mask_out = 0;
+ if (body[0] == 'r' && body[1] == '\0') {
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ return 1;
+ }
+ if (body[0] == 'f' && body[1] == '\0') {
+ if (cls_out) *cls_out = NATIVE_REG_FP;
+ return 1;
+ }
+ if (body[0] == 'c' && body[1] == 'r' && body[2] == '\0') {
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ if (allowed_mask_out) *allowed_mask_out = 0x0000ff00u; /* x8..x15 */
+ return 1;
+ }
+ if (body[0] == 'c' && body[1] == 'f' && body[2] == '\0') {
+ if (cls_out) *cls_out = NATIVE_REG_FP;
+ if (allowed_mask_out) *allowed_mask_out = 0x0000ff00u; /* f8..f15 */
+ return 1;
+ }
+ return 0;
+}
+
static const NativeRegInfo rv_reg_info = {
.classes = rv_classes,
.nclasses = sizeof rv_classes / sizeof rv_classes[0],
.resolve_name = rv_resolve_name,
.asm_operand_reg_ok = rv_asm_operand_reg_ok,
+ .asm_constraint_reg = rv_asm_constraint_reg,
};
/* ============================ legality ============================ */
@@ -3431,14 +3460,6 @@ static void rv_asm_clobber_masks(Compiler* c, SrcLoc loc, const Sym* clobbers,
}
}
-static NativeAllocClass rv_asm_constraint_class(NativeDirectTarget* d,
- const char* body) {
- if (body[0] == 'r') return NATIVE_REG_INT;
- if (body[0] == 'f') return NATIVE_REG_FP;
- rv_asm_panic(d, "constraint is not a register constraint");
- return NATIVE_REG_INT;
-}
-
static int rv_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg,
const char* constraint,
NativeAsmRegPin* pin) {
@@ -3453,7 +3474,7 @@ static int rv_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg,
/* Pick a free register from the arch's caller-saved allocable pools for an
* asm operand the direct path must self-allocate. */
static Reg rv_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls,
- u32* used_int, u32* used_fp) {
+ u32 allowed_mask, u32* used_int, u32* used_fp) {
/* int: a0..a7 (10..17) then t-temps that aren't emit scratch. */
static const Reg int_pool[] = {10u, 11u, 12u, 13u, 14u, 15u,
16u, 17u, 29u, 30u, 31u};
@@ -3467,6 +3488,7 @@ static Reg rv_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls,
u32 i;
for (i = 0; i < n; ++i) {
Reg r = pool[i];
+ if (allowed_mask && (allowed_mask & (1u << r)) == 0) continue;
if ((*used & (1u << r)) != 0) continue;
*used |= 1u << r;
return r;
@@ -3760,11 +3782,19 @@ static void rv_asm_bind_native(RvNativeTarget* a, SrcLoc loc, Operand* out,
const char* constraint, KitCgTypeId type,
NativeLoc src, u32* ntmp) {
const char* body = native_asm_constraint_body(constraint);
- if (body[0] == 'r' || body[0] == 'f') {
- NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT;
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(&a->base, constraint, &info)) {
if (src.kind != NATIVE_LOC_REG)
rv_asm_panic_at(a->base.c, loc, "register asm operand not in a register");
- rv_asm_bound_reg(out, type, cls, (Reg)src.v.reg);
+ if (info.fixed_reg != REG_NONE && info.fixed_reg != (Reg)src.v.reg)
+ rv_asm_panic_at(a->base.c, loc,
+ "fixed-register asm operand in wrong register");
+ if (info.allowed_mask &&
+ ((Reg)src.v.reg >= 32 ||
+ (info.allowed_mask & (1u << (Reg)src.v.reg)) == 0))
+ rv_asm_panic_at(a->base.c, loc,
+ "register asm operand violates constraint register set");
+ rv_asm_bound_reg(out, type, info.cls, (Reg)src.v.reg);
} else if (body[0] == 'i') {
if (src.kind != NATIVE_LOC_IMM)
rv_asm_panic_at(a->base.c, loc, "immediate asm operand is not immediate");
@@ -3794,13 +3824,13 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl,
Rv64Asm* asmh;
for (i = 0; i < nout; ++i) {
- const char* body = native_asm_constraint_body(outs[i].str);
KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type;
NativeLoc outloc = out_locs[i];
- if ((body[0] == 'r' || body[0] == 'f') && outloc.kind != NATIVE_LOC_REG) {
- NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT;
- Reg r = rv_asm_stage_reg(a, loc, cls, &nstage_int, &nstage_fp);
- outloc = native_loc_reg(type, cls, r);
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(t, outs[i].str, &info) &&
+ info.allowed_mask == 0 && outloc.kind != NATIVE_LOC_REG) {
+ Reg r = rv_asm_stage_reg(a, loc, info.cls, &nstage_int, &nstage_fp);
+ outloc = native_loc_reg(type, info.cls, r);
staged_outs[i] = 1u;
if (outs[i].dir == KIT_CG_ASM_INOUT)
rv_asm_load_loc_to_reg(a, loc, out_locs[i], outloc);
@@ -3820,11 +3850,14 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl,
}
type = ins[i].type ? ins[i].type : in_locs[i].type;
inloc = in_locs[i];
- if ((body[0] == 'r' || body[0] == 'f') && inloc.kind != NATIVE_LOC_REG) {
- NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT;
- Reg r = rv_asm_stage_reg(a, loc, cls, &nstage_int, &nstage_fp);
- inloc = native_loc_reg(type, cls, r);
- rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc);
+ {
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(t, ins[i].str, &info) &&
+ info.allowed_mask == 0 && inloc.kind != NATIVE_LOC_REG) {
+ Reg r = rv_asm_stage_reg(a, loc, info.cls, &nstage_int, &nstage_fp);
+ inloc = native_loc_reg(type, info.cls, r);
+ rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc);
+ }
}
rv_asm_bind_native(a, loc, &bound_ins[i], ins[i].str, type, inloc, &ntmp);
}
@@ -4122,15 +4155,27 @@ static void rv_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
clob_int |= 1u << pin.reg;
}
rv_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg);
- } else if (body[0] == 'r' || body[0] == 'f') {
- NativeAllocClass cls = rv_asm_constraint_class(d, body);
- Reg reg = rv_asm_alloc_reg(d, cls, &used_int, &used_fp);
- rv_asm_bound_reg(&bound_outs[i], type, cls, reg);
- } else if (body[0] == 'm') {
- Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
- rv_asm_bound_mem(&bound_outs[i], type, reg);
} else {
- rv_asm_panic(d, "unsupported output constraint");
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(d->native, outs[i].str, &info)) {
+ Reg reg = info.fixed_reg != REG_NONE
+ ? info.fixed_reg
+ : rv_asm_alloc_reg(d, info.cls, info.allowed_mask,
+ &used_int, &used_fp);
+ if (info.cls == NATIVE_REG_FP) {
+ used_fp |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg;
+ } else {
+ used_int |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg;
+ }
+ rv_asm_bound_reg(&bound_outs[i], type, info.cls, reg);
+ } else if (body[0] == 'm') {
+ Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp);
+ rv_asm_bound_mem(&bound_outs[i], type, reg);
+ } else {
+ rv_asm_panic(d, "unsupported output constraint");
+ }
}
}
@@ -4159,19 +4204,31 @@ static void rv_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
clob_int |= 1u << pin.reg;
}
rv_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg);
- } else if (body[0] == 'r' || body[0] == 'f') {
- NativeAllocClass cls = rv_asm_constraint_class(d, body);
- Reg reg = rv_asm_alloc_reg(d, cls, &used_int, &used_fp);
- rv_asm_bound_reg(&bound_ins[i], type, cls, reg);
- } else if (body[0] == 'i') {
- if (in_ops[i].kind != OPK_IMM)
- rv_asm_panic(d, "immediate constraint requires immediate operand");
- bound_ins[i] = in_ops[i];
- } else if (body[0] == 'm') {
- Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
- rv_asm_bound_mem(&bound_ins[i], type, reg);
} else {
- rv_asm_panic(d, "unsupported input constraint");
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(d->native, ins[i].str, &info)) {
+ Reg reg = info.fixed_reg != REG_NONE
+ ? info.fixed_reg
+ : rv_asm_alloc_reg(d, info.cls, info.allowed_mask,
+ &used_int, &used_fp);
+ if (info.cls == NATIVE_REG_FP) {
+ used_fp |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg;
+ } else {
+ used_int |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg;
+ }
+ rv_asm_bound_reg(&bound_ins[i], type, info.cls, reg);
+ } else if (body[0] == 'i') {
+ if (in_ops[i].kind != OPK_IMM)
+ rv_asm_panic(d, "immediate constraint requires immediate operand");
+ bound_ins[i] = in_ops[i];
+ } else if (body[0] == 'm') {
+ Reg reg = rv_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp);
+ rv_asm_bound_mem(&bound_ins[i], type, reg);
+ } else {
+ rv_asm_panic(d, "unsupported input constraint");
+ }
}
}
diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c
@@ -391,11 +391,57 @@ static int x64_asm_operand_reg_ok(const NativeRegInfo* ri, NativeAllocClass cls,
return 0;
}
+static int x64_asm_constraint_reg(const NativeRegInfo* ri, const char* body,
+ NativeAllocClass* cls_out, Reg* fixed_out,
+ u32* allowed_mask_out) {
+ (void)ri;
+ if (!body || !body[0] || body[1]) return 0;
+ if (fixed_out) *fixed_out = REG_NONE;
+ if (allowed_mask_out) *allowed_mask_out = 0;
+ switch (body[0]) {
+ case 'r':
+ case 'q':
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ return 1;
+ case 'a':
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ if (fixed_out) *fixed_out = X64_RAX;
+ return 1;
+ case 'b':
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ if (fixed_out) *fixed_out = X64_RBX;
+ return 1;
+ case 'c':
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ if (fixed_out) *fixed_out = X64_RCX;
+ return 1;
+ case 'd':
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ if (fixed_out) *fixed_out = X64_RDX;
+ return 1;
+ case 'S':
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ if (fixed_out) *fixed_out = X64_RSI;
+ return 1;
+ case 'D':
+ if (cls_out) *cls_out = NATIVE_REG_INT;
+ if (fixed_out) *fixed_out = X64_RDI;
+ return 1;
+ case 'x':
+ case 'v':
+ if (cls_out) *cls_out = NATIVE_REG_FP;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static const NativeRegInfo x64_reg_info = {
.classes = x64_classes,
.nclasses = sizeof x64_classes / sizeof x64_classes[0],
.resolve_name = x64_resolve_name,
.asm_operand_reg_ok = x64_asm_operand_reg_ok,
+ .asm_constraint_reg = x64_asm_constraint_reg,
};
/* ============================ legality ============================ */
@@ -3717,16 +3763,6 @@ static void x64_asm_clobber_masks(Compiler* c, SrcLoc loc, const Sym* clobbers,
}
}
-static NativeAllocClass x64_asm_constraint_class(NativeDirectTarget* d,
- const char* body) {
- if (body[0] == 'r' || body[0] == 'q' || body[0] == 'a' || body[0] == 'b' ||
- body[0] == 'c' || body[0] == 'd' || body[0] == 'S' || body[0] == 'D')
- return NATIVE_REG_INT;
- if (body[0] == 'x' || body[0] == 'v') return NATIVE_REG_FP;
- x64_asm_panic(d, "constraint is not a register constraint");
- return NATIVE_REG_INT;
-}
-
static int x64_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg,
const char* constraint,
NativeAsmRegPin* pin) {
@@ -3741,7 +3777,7 @@ static int x64_asm_resolve_pin_or_panic(NativeDirectTarget* d, Sym reg,
/* Pick a free register from caller-saved allocable pools for an asm operand the
* direct path self-allocates. */
static Reg x64_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls,
- u32* used_int, u32* used_fp) {
+ u32 allowed_mask, u32* used_int, u32* used_fp) {
static const Reg int_pool[] = {X64_RDI, X64_RSI, X64_RDX,
X64_RCX, X64_R8, X64_R9};
static const Reg fp_pool[] = {
@@ -3754,6 +3790,7 @@ static Reg x64_asm_alloc_reg(NativeDirectTarget* d, NativeAllocClass cls,
u32 i;
for (i = 0; i < n; ++i) {
Reg r = pool[i];
+ if (allowed_mask && (allowed_mask & (1u << r)) == 0) continue;
if ((*used & (1u << r)) != 0) continue;
*used |= 1u << r;
return r;
@@ -3981,12 +4018,20 @@ static void x64_asm_bind_native(X64NativeTarget* a, SrcLoc loc, Operand* out,
const char* constraint, KitCgTypeId type,
NativeLoc src, u32* ntmp) {
const char* body = native_asm_constraint_body(constraint);
- if (body[0] == 'r' || body[0] == 'x') {
- NativeAllocClass cls = (body[0] == 'x') ? NATIVE_REG_FP : NATIVE_REG_INT;
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(&a->base, constraint, &info)) {
if (src.kind != NATIVE_LOC_REG)
x64_asm_panic_at(a->base.c, loc,
"register asm operand not in a register");
- x64_asm_bound_reg(out, type, cls, (Reg)src.v.reg);
+ if (info.fixed_reg != REG_NONE && info.fixed_reg != (Reg)src.v.reg)
+ x64_asm_panic_at(a->base.c, loc,
+ "fixed-register asm operand in wrong register");
+ if (info.allowed_mask &&
+ ((Reg)src.v.reg >= 32 ||
+ (info.allowed_mask & (1u << (Reg)src.v.reg)) == 0))
+ x64_asm_panic_at(a->base.c, loc,
+ "register asm operand violates constraint register set");
+ x64_asm_bound_reg(out, type, info.cls, (Reg)src.v.reg);
} else if (body[0] == 'i') {
if (src.kind != NATIVE_LOC_IMM)
x64_asm_panic_at(a->base.c, loc,
@@ -4375,15 +4420,27 @@ static void x64_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
clob_int |= 1u << pin.reg;
}
x64_asm_bound_reg(&bound_outs[i], type, pin.cls, pin.reg);
- } else if (body[0] == 'r' || body[0] == 'x') {
- NativeAllocClass cls = x64_asm_constraint_class(d, body);
- Reg reg = x64_asm_alloc_reg(d, cls, &used_int, &used_fp);
- x64_asm_bound_reg(&bound_outs[i], type, cls, reg);
- } else if (body[0] == 'm') {
- Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
- x64_asm_bound_mem(&bound_outs[i], type, reg);
} else {
- x64_asm_panic(d, "unsupported output constraint");
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(d->native, outs[i].str, &info)) {
+ Reg reg = info.fixed_reg != REG_NONE
+ ? info.fixed_reg
+ : x64_asm_alloc_reg(d, info.cls, info.allowed_mask,
+ &used_int, &used_fp);
+ if (info.cls == NATIVE_REG_FP) {
+ used_fp |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg;
+ } else {
+ used_int |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg;
+ }
+ x64_asm_bound_reg(&bound_outs[i], type, info.cls, reg);
+ } else if (body[0] == 'm') {
+ Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp);
+ x64_asm_bound_mem(&bound_outs[i], type, reg);
+ } else {
+ x64_asm_panic(d, "unsupported output constraint");
+ }
}
}
@@ -4412,19 +4469,31 @@ static void x64_direct_asm_block(NativeDirectTarget* d, const char* tmpl,
clob_int |= 1u << pin.reg;
}
x64_asm_bound_reg(&bound_ins[i], type, pin.cls, pin.reg);
- } else if (body[0] == 'r' || body[0] == 'x') {
- NativeAllocClass cls = x64_asm_constraint_class(d, body);
- Reg reg = x64_asm_alloc_reg(d, cls, &used_int, &used_fp);
- x64_asm_bound_reg(&bound_ins[i], type, cls, reg);
- } else if (body[0] == 'i') {
- if (in_ops[i].kind != OPK_IMM)
- x64_asm_panic(d, "immediate constraint requires immediate operand");
- bound_ins[i] = in_ops[i];
- } else if (body[0] == 'm') {
- Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, &used_int, &used_fp);
- x64_asm_bound_mem(&bound_ins[i], type, reg);
} else {
- x64_asm_panic(d, "unsupported input constraint");
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(d->native, ins[i].str, &info)) {
+ Reg reg = info.fixed_reg != REG_NONE
+ ? info.fixed_reg
+ : x64_asm_alloc_reg(d, info.cls, info.allowed_mask,
+ &used_int, &used_fp);
+ if (info.cls == NATIVE_REG_FP) {
+ used_fp |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_fp |= 1u << reg;
+ } else {
+ used_int |= 1u << reg;
+ if (info.fixed_reg != REG_NONE) clob_int |= 1u << reg;
+ }
+ x64_asm_bound_reg(&bound_ins[i], type, info.cls, reg);
+ } else if (body[0] == 'i') {
+ if (in_ops[i].kind != OPK_IMM)
+ x64_asm_panic(d, "immediate constraint requires immediate operand");
+ bound_ins[i] = in_ops[i];
+ } else if (body[0] == 'm') {
+ Reg reg = x64_asm_alloc_reg(d, NATIVE_REG_INT, 0, &used_int, &used_fp);
+ x64_asm_bound_mem(&bound_ins[i], type, reg);
+ } else {
+ x64_asm_panic(d, "unsupported input constraint");
+ }
}
}
diff --git a/src/cg/asm.c b/src/cg/asm.c
@@ -42,6 +42,14 @@ int api_asm_is_reg_constraint(char c) {
return c == 'r' || c == 'f' || c == 'x' || c == 'w';
}
+static int api_asm_constraint_is_reg(KitCg* g, const char* constraint) {
+ const char* body = api_asm_constraint_body(constraint);
+ if (api_asm_is_reg_constraint(body[0])) return 1;
+ if (g && g->target && g->target->asm_is_reg_constraint)
+ return g->target->asm_is_reg_constraint(g->target, constraint);
+ return 0;
+}
+
/* A register ('r'/'f'/'x'/'w') asm operand must live in a single hardware
* register. A 64-bit scalar on a 32-bit target does not fit one: it would need
* a register pair, which this inline-asm lowering does not model, so binding it
@@ -119,8 +127,7 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) {
outs[i].reg = (Sym)outputs[i].reg;
outs[i].dir = (u8)outputs[i].dir;
if (!outs[i].type) outs[i].type = fallback_ty;
- if (outs[i].reg &&
- !api_asm_is_reg_constraint(api_asm_constraint_body(outs[i].str)[0])) {
+ if (outs[i].reg && !api_asm_constraint_is_reg(g, outs[i].str)) {
compiler_panic(g->c, g->cur_loc,
"KitCg: asm hard-register output requires a register "
"constraint");
@@ -159,8 +166,7 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) {
ins[i].reg = (Sym)inputs[i].reg;
ins[i].dir = (u8)inputs[i].dir;
if (!ins[i].type) ins[i].type = fallback_ty;
- if (ins[i].reg &&
- !api_asm_is_reg_constraint(api_asm_constraint_body(ins[i].str)[0])) {
+ if (ins[i].reg && !api_asm_constraint_is_reg(g, ins[i].str)) {
compiler_panic(g->c, g->cur_loc,
"KitCg: asm hard-register input requires a register "
"constraint");
@@ -187,12 +193,11 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) {
}
for (u32 i = 0; i < noutputs; ++i) {
- const char* body = api_asm_constraint_body(outs[i].str);
if (api_asm_is_early_clobber(outs[i].str)) continue;
/* A register constraint binds to a temp local; the local's type selects the
* register class (integer vs FP), so the backend hook places an FP-class
* output (riscv 'f', x86 'x', aarch64 'w') in an FP register. */
- if (api_asm_is_reg_constraint(body[0])) {
+ if (api_asm_constraint_is_reg(g, outs[i].str)) {
KitCgTypeId oty = outs[i].type ? outs[i].type : fallback_ty;
CGLocal r;
api_asm_reject_wide_reg(g, oty);
@@ -231,7 +236,7 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) {
T->copy(T, bound, src);
}
in_ops[i] = bound;
- } else if (api_asm_is_reg_constraint(s[0])) {
+ } else if (api_asm_constraint_is_reg(g, s)) {
api_asm_reject_wide_reg(g, ity);
in_ops[i] = api_force_local(g, &in_svs[i], ity);
} else if (s[0] == 'i') {
@@ -261,12 +266,10 @@ void kit_cg_inline_asm(KitCg* g, KitCgInlineAsm asm_block) {
}
for (u32 i = 0; i < noutputs; ++i) {
- const char* body;
KitCgTypeId oty;
CGLocal r;
if (!api_asm_is_early_clobber(outs[i].str)) continue;
- body = api_asm_constraint_body(outs[i].str);
- if (!api_asm_is_reg_constraint(body[0])) {
+ if (!api_asm_constraint_is_reg(g, outs[i].str)) {
compiler_panic(g->c, g->cur_loc,
"KitCg: unsupported early-clobber asm output");
continue;
diff --git a/src/cg/cgtarget.h b/src/cg/cgtarget.h
@@ -765,6 +765,7 @@ struct CgTarget {
* "=&r" early-clobber outputs must be allocated disjoint from any input.
* opt_cgtarget records this as a single IR_ASM_BLOCK; the wrapped target
* receives the same call at lowering time with materialized operands. */
+ int (*asm_is_reg_constraint)(CgTarget*, const char* constraint);
void (*asm_block)(CgTarget*, const char* tmpl, const AsmConstraint* outs,
u32 nout, Operand* out_ops, const AsmConstraint* ins,
u32 nin, const Operand* in_ops, const Sym* clobbers,
diff --git a/src/cg/ir_recorder.c b/src/cg/ir_recorder.c
@@ -15,6 +15,7 @@ struct CgIrRecorder {
const char* (*data_label_addr_unsupported_msg)(void*);
const char* (*tail_call_unrealizable_reason)(void*, const CGFuncDesc*,
const CGCallDesc*);
+ int (*asm_is_reg_constraint)(void*, const char*);
void* user;
};
@@ -552,6 +553,12 @@ static void rec_intrinsic(CgTarget* t, IntrinKind kind, Operand* dsts, u32 ndst,
in->extra.aux = aux;
}
+static int rec_asm_is_reg_constraint(CgTarget* t, const char* constraint) {
+ CgIrRecorder* r = rec_of(t);
+ if (!r->asm_is_reg_constraint) return 0;
+ return r->asm_is_reg_constraint(r->user, constraint);
+}
+
static void rec_asm_block(CgTarget* t, const char* tmpl,
const AsmConstraint* outs, u32 nout, Operand* out_ops,
const AsmConstraint* ins, u32 nin,
@@ -616,6 +623,7 @@ CgTarget* cg_ir_recorder_new(Compiler* c, ObjBuilder* obj,
r->local_static_data_begin = cfg->local_static_data_begin;
r->data_label_addr_unsupported_msg = cfg->data_label_addr_unsupported_msg;
r->tail_call_unrealizable_reason = cfg->tail_call_unrealizable_reason;
+ r->asm_is_reg_constraint = cfg->asm_is_reg_constraint;
r->user = cfg->user;
}
@@ -670,6 +678,7 @@ CgTarget* cg_ir_recorder_new(Compiler* c, ObjBuilder* obj,
r->base.atomic_cas = rec_atomic_cas;
r->base.fence = rec_fence;
r->base.intrinsic = rec_intrinsic;
+ r->base.asm_is_reg_constraint = rec_asm_is_reg_constraint;
r->base.asm_block = rec_asm_block;
r->base.file_scope_asm = rec_file_scope_asm;
r->base.set_loc = rec_set_loc;
diff --git a/src/cg/ir_recorder.h b/src/cg/ir_recorder.h
@@ -14,6 +14,7 @@ typedef struct CgIrRecorderConfig {
const char* (*tail_call_unrealizable_reason)(void* user,
const CGFuncDesc* caller,
const CGCallDesc* call);
+ int (*asm_is_reg_constraint)(void* user, const char* constraint);
void* user;
} CgIrRecorderConfig;
diff --git a/src/cg/native_asm.c b/src/cg/native_asm.c
@@ -76,12 +76,65 @@ static int native_asm_default_operand_reg_ok(const NativeRegInfo* ri,
return 0;
}
+int native_asm_constraint_reg_info(NativeTarget* t, const char* constraint,
+ NativeAsmConstraintInfo* out) {
+ NativeAsmConstraintInfo info;
+ const char* body = native_asm_constraint_body(constraint);
+ memset(&info, 0, sizeof info);
+ info.fixed_reg = REG_NONE;
+ if (!body || !body[0]) return 0;
+ if (t && t->regs && t->regs->asm_constraint_reg) {
+ Reg fixed = REG_NONE;
+ NativeAllocClass cls = NATIVE_REG_INT;
+ u32 allowed_mask = 0;
+ if (t->regs->asm_constraint_reg(t->regs, body, &cls, &fixed,
+ &allowed_mask)) {
+ if (allowed_mask) {
+ u32 filtered = 0;
+ for (Reg r = 0; r < 32; ++r) {
+ if ((allowed_mask & (1u << r)) == 0) continue;
+ if (t->regs->asm_operand_reg_ok) {
+ if (!t->regs->asm_operand_reg_ok(t->regs, cls, r)) continue;
+ } else if (!native_asm_default_operand_reg_ok(t->regs, cls, r)) {
+ continue;
+ }
+ filtered |= 1u << r;
+ }
+ allowed_mask = filtered;
+ if (!allowed_mask) return 0;
+ }
+ if (fixed != REG_NONE) {
+ if (t->regs->asm_operand_reg_ok) {
+ if (!t->regs->asm_operand_reg_ok(t->regs, cls, fixed)) return 0;
+ } else if (!native_asm_default_operand_reg_ok(t->regs, cls, fixed)) {
+ return 0;
+ }
+ if (fixed >= 32) return 0;
+ if (allowed_mask && (allowed_mask & (1u << fixed)) == 0) return 0;
+ }
+ info.cls = cls;
+ info.fixed_reg = fixed;
+ info.allowed_mask = allowed_mask;
+ if (out) *out = info;
+ return 1;
+ }
+ return 0;
+ }
+ if (!native_asm_constraint_reg_class(constraint, &info.cls)) return 0;
+ if (out) *out = info;
+ return 1;
+}
+
+int native_asm_constraint_is_reg(NativeTarget* t, const char* constraint) {
+ return native_asm_constraint_reg_info(t, constraint, NULL);
+}
+
NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg,
const char* constraint,
NativeAsmRegPin* out) {
Reg r;
NativeAllocClass cls;
- NativeAllocClass want;
+ NativeAsmConstraintInfo info;
if (!reg) return NATIVE_ASM_REG_PIN_ABSENT;
if (!t || !t->regs || !t->regs->resolve_name)
return NATIVE_ASM_REG_PIN_UNKNOWN;
@@ -94,9 +147,13 @@ NativeAsmRegPinStatus native_asm_resolve_pin(NativeTarget* t, Sym reg,
} else if (!native_asm_default_operand_reg_ok(t->regs, cls, r)) {
return NATIVE_ASM_REG_PIN_FORBIDDEN;
}
- if (!native_asm_constraint_reg_class(constraint, &want))
+ if (!native_asm_constraint_reg_info(t, constraint, &info))
return NATIVE_ASM_REG_PIN_BAD_CONSTRAINT;
- if (want != cls) return NATIVE_ASM_REG_PIN_CLASS_MISMATCH;
+ if (info.cls != cls) return NATIVE_ASM_REG_PIN_CLASS_MISMATCH;
+ if (info.fixed_reg != REG_NONE && info.fixed_reg != r)
+ return NATIVE_ASM_REG_PIN_FIXED_MISMATCH;
+ if (info.allowed_mask && (r >= 32 || (info.allowed_mask & (1u << r)) == 0))
+ return NATIVE_ASM_REG_PIN_FIXED_MISMATCH;
if (out) {
out->reg = r;
out->cls = cls;
@@ -118,6 +175,8 @@ const char* native_asm_pin_status_message(NativeAsmRegPinStatus st) {
return "asm register variable requires a register constraint";
case NATIVE_ASM_REG_PIN_CLASS_MISMATCH:
return "asm register variable class does not match its constraint";
+ case NATIVE_ASM_REG_PIN_FIXED_MISMATCH:
+ return "asm register variable conflicts with register constraint";
}
return "invalid asm register variable";
}
diff --git a/src/cg/native_asm.h b/src/cg/native_asm.h
@@ -51,6 +51,7 @@ typedef enum NativeAsmRegPinStatus {
NATIVE_ASM_REG_PIN_FORBIDDEN = -2,
NATIVE_ASM_REG_PIN_BAD_CONSTRAINT = -3,
NATIVE_ASM_REG_PIN_CLASS_MISMATCH = -4,
+ NATIVE_ASM_REG_PIN_FIXED_MISMATCH = -5,
} NativeAsmRegPinStatus;
typedef struct NativeAsmRegPin {
@@ -58,6 +59,16 @@ typedef struct NativeAsmRegPin {
NativeAllocClass cls;
} NativeAsmRegPin;
+typedef struct NativeAsmConstraintInfo {
+ NativeAllocClass cls;
+ Reg fixed_reg; /* REG_NONE when the constraint only names a register class. */
+ u32 allowed_mask; /* 0 means any valid register in cls. */
+} NativeAsmConstraintInfo;
+
+int native_asm_constraint_reg_info(NativeTarget* t, const char* constraint,
+ NativeAsmConstraintInfo* out);
+int native_asm_constraint_is_reg(NativeTarget* t, const char* constraint);
+
/* Resolve and validate an inline-asm operand's explicit hard-register pin
* (AsmConstraint.reg, from a GNU local register variable). Distinguishes no pin
* from invalid pins, and verifies that the operand uses a register constraint
diff --git a/src/cg/native_direct_target.c b/src/cg/native_direct_target.c
@@ -16,6 +16,7 @@
#include <string.h>
#include "abi/abi.h"
+#include "cg/native_asm.h"
#include "cg/type.h"
#include "core/arena.h"
#include "core/pool.h"
@@ -1873,6 +1874,11 @@ static void nd_asm_block(CgTarget* t, const char* tmpl,
nd_panic(d, "target does not emit inline asm");
}
+static int nd_asm_is_reg_constraint(CgTarget* t, const char* constraint) {
+ NativeDirectTarget* d = nd_of(t);
+ return native_asm_constraint_is_reg(d->native, constraint);
+}
+
static void nd_file_scope_asm(CgTarget* t, const char* src, size_t len) {
NativeDirectTarget* d = nd_of(t);
ND_REQUIRE_NATIVE(d, file_scope_asm, "target does not emit file-scope asm");
@@ -1972,6 +1978,7 @@ CgTarget* native_direct_target_new(Compiler* c, ObjBuilder* obj,
d->base.atomic_cas = nd_atomic_cas;
d->base.fence = nd_fence;
d->base.intrinsic = nd_intrinsic;
+ d->base.asm_is_reg_constraint = nd_asm_is_reg_constraint;
d->base.asm_block = nd_asm_block;
d->base.file_scope_asm = nd_file_scope_asm;
d->base.set_loc = nd_set_loc;
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -448,6 +448,10 @@ typedef struct IRAsmAux {
i32* in_fixed_regs; /* nin, -1 when unconstrained */
u8* out_fixed_cls; /* RegClass, parallel to out_fixed_regs */
u8* in_fixed_cls; /* RegClass, parallel to in_fixed_regs */
+ u32* out_allowed_masks; /* nout, 0 when the whole class is allowed */
+ u32* in_allowed_masks; /* nin, 0 when the whole class is allowed */
+ u8* out_allowed_cls; /* RegClass, parallel to out_allowed_masks */
+ u8* in_allowed_cls; /* RegClass, parallel to in_allowed_masks */
} IRAsmAux;
typedef struct IRIntrinAux {
@@ -610,6 +614,7 @@ typedef struct OptPRegInfo {
i8 preferred_hard_reg; /* soft hint for allocator; -1 = no hint */
u8 pad[1];
u32 forbidden_hard_regs; /* bit r means PReg may not allocate hard reg r. */
+ u32 allowed_hard_regs; /* 0 means unrestricted; otherwise positive mask. */
/* Subset of forbidden_hard_regs that comes from a fixed-register machine
* clobber (an instruction live across this value destroys reg r — see
* Func.inst_clobbers). Unlike soft forbids, the return-register hint must not
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -4,6 +4,7 @@
#include "abi/abi.h"
#include "cg/ir.h"
#include "cg/ir_recorder.h"
+#include "cg/native_asm.h"
#include "cg/native_direct_target.h"
#include "cg/type.h"
#include "core/arena.h"
@@ -893,6 +894,11 @@ static const char* opt_on_tail_call_unrealizable_reason(
return NULL;
}
+static int opt_on_asm_is_reg_constraint(void* user, const char* constraint) {
+ OptImpl* o = (OptImpl*)user;
+ return native_asm_constraint_is_reg(o ? o->native : NULL, constraint);
+}
+
CgTarget* opt_cgtarget_new(Compiler* c, CgTarget* target, int level) {
if (!target)
compiler_panic(c, (SrcLoc){0, 0, 0}, "opt_cgtarget_new: target is NULL");
@@ -918,6 +924,7 @@ CgTarget* opt_cgtarget_new(Compiler* c, CgTarget* target, int level) {
cfg.destroy = opt_on_destroy;
cfg.local_static_data_begin = opt_on_local_static_data_begin;
cfg.tail_call_unrealizable_reason = opt_on_tail_call_unrealizable_reason;
+ cfg.asm_is_reg_constraint = opt_on_asm_is_reg_constraint;
cfg.user = o;
return cg_ir_recorder_new(c, target->obj, &cfg);
}
diff --git a/src/opt/pass_coalesce.c b/src/opt/pass_coalesce.c
@@ -148,10 +148,31 @@ static int group_conflicts(const CoalesceCtx* c, PReg ra, PReg rb, PReg allow_a,
return 0;
}
-static int hard_reg_possible(Func* f, u8 cls, u32 forbidden) {
+static int hard_reg_possible(Func* f, u8 cls, u32 forbidden, u32 allowed) {
for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) {
Reg r = f->opt_hard_regs[cls][i];
- if (r < 32 && (forbidden & (1u << r)) == 0) return 1;
+ if (r >= 32) continue;
+ if (allowed && (allowed & (1u << r)) == 0) continue;
+ if ((forbidden & (1u << r)) == 0) return 1;
+ }
+ if (allowed) {
+ for (Reg r = 0; r < 32; ++r) {
+ if ((allowed & (1u << r)) == 0) continue;
+ if (forbidden & (1u << r)) continue;
+ int in_hard = 0;
+ for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) {
+ if (f->opt_hard_regs[cls][i] == r) {
+ in_hard = 1;
+ break;
+ }
+ }
+ if (in_hard) continue;
+ for (u32 i = 0; i < f->opt_phys_reg_count[cls]; ++i) {
+ const CGPhysRegInfo* pi = &f->opt_phys_regs[cls][i];
+ if (pi->reg == r && (pi->flags & CG_REG_RESERVED) == 0) return 1;
+ }
+ }
+ return 0;
}
return f->opt_hard_reg_count[cls] == 0;
}
@@ -163,19 +184,30 @@ static int group_constraints_compatible(const CoalesceCtx* c, PReg ra,
KitCgTypeId type = opt_reg_type(f, ra);
i32 tied = -1;
u32 forbidden = 0;
+ u32 allowed = 0;
for (PReg v = 1; v < opt_reg_count(f); ++v) {
PReg r = coalesce_find(f, v);
if (r != ra && r != rb) continue;
if (opt_reg_cls(f, v) != cls || opt_reg_type(f, v) != type) return 0;
const OptPRegInfo* vi = &f->preg_info[v];
forbidden |= vi->forbidden_hard_regs;
+ if (vi->allowed_hard_regs) {
+ if (allowed) {
+ allowed &= vi->allowed_hard_regs;
+ if (!allowed) return 0;
+ } else {
+ allowed = vi->allowed_hard_regs;
+ }
+ }
if (vi->tied_hard_reg >= 0) {
if (tied >= 0 && tied != vi->tied_hard_reg) return 0;
tied = vi->tied_hard_reg;
}
}
if (tied >= 0 && tied < 32 && (forbidden & (1u << (Reg)tied))) return 0;
- return hard_reg_possible(f, cls, forbidden);
+ if (tied >= 0 && tied < 32 && allowed && (allowed & (1u << (Reg)tied)) == 0)
+ return 0;
+ return hard_reg_possible(f, cls, forbidden, allowed);
}
static void coalesce_union(Func* f, PReg a, PReg b) {
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -175,6 +175,33 @@ static void apply_fixed_asm_operand(Func* f, Operand* op, i32 fixed,
f->preg_info[v].tied_hard_reg = fixed;
}
+static void apply_allowed_asm_operand(Func* f, Operand* op, u32 allowed,
+ u8 allowed_cls) {
+ u32 hard_mask = 0;
+ if (!op || op->kind != OPK_REG || !allowed) return;
+ PReg v = (PReg)op->v.reg;
+ if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return;
+ if (allowed_cls >= OPT_REG_CLASSES || opt_reg_cls(f, v) != allowed_cls) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(f->c, loc, "opt asm: allowed register class mismatch");
+ }
+ for (u32 i = 0; i < f->opt_hard_reg_count[allowed_cls]; ++i) {
+ Reg r = f->opt_hard_regs[allowed_cls][i];
+ if (r < 32) hard_mask |= 1u << r;
+ }
+ if (f->preg_info[v].allowed_hard_regs) {
+ u32 both = f->preg_info[v].allowed_hard_regs & allowed;
+ if (!both) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(f->c, loc, "opt asm: conflicting allowed register sets");
+ }
+ f->preg_info[v].allowed_hard_regs = both;
+ } else {
+ f->preg_info[v].allowed_hard_regs = allowed;
+ }
+ f->preg_info[v].forbidden_hard_regs |= hard_mask & ~allowed;
+}
+
static void apply_asm_register_constraints(Func* f, Inst* in, u64* use,
u64* def, u64* live_after) {
IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
@@ -183,11 +210,17 @@ static void apply_asm_register_constraints(Func* f, Inst* in, u64* use,
for (u32 i = 0; i < aux->nout; ++i) {
i32 fixed = aux->out_fixed_regs ? aux->out_fixed_regs[i] : -1;
u8 cls = aux->out_fixed_cls ? aux->out_fixed_cls[i] : 0;
+ u32 allowed = aux->out_allowed_masks ? aux->out_allowed_masks[i] : 0;
+ u8 allowed_cls = aux->out_allowed_cls ? aux->out_allowed_cls[i] : 0;
+ apply_allowed_asm_operand(f, &aux->out_ops[i], allowed, allowed_cls);
apply_fixed_asm_operand(f, &aux->out_ops[i], fixed, cls);
}
for (u32 i = 0; i < aux->nin; ++i) {
i32 fixed = aux->in_fixed_regs ? aux->in_fixed_regs[i] : -1;
u8 cls = aux->in_fixed_cls ? aux->in_fixed_cls[i] : 0;
+ u32 allowed = aux->in_allowed_masks ? aux->in_allowed_masks[i] : 0;
+ u8 allowed_cls = aux->in_allowed_cls ? aux->in_allowed_cls[i] : 0;
+ apply_allowed_asm_operand(f, &aux->in_ops[i], allowed, allowed_cls);
apply_fixed_asm_operand(f, &aux->in_ops[i], fixed, cls);
}
@@ -569,6 +602,7 @@ typedef struct OptAllocGroupInfo {
u32 last;
i32 tied_hard_reg;
u32 forbidden_hard_regs;
+ u32 allowed_hard_regs;
u8 cls;
u8 pad[3];
} OptAllocGroupInfo;
@@ -732,6 +766,19 @@ static void alloc_group_info(Func* f, const OptLiveRangeSet* ranges, PReg root,
if (first < out->first) out->first = first;
if (vi->last_pos > out->last) out->last = vi->last_pos;
out->forbidden_hard_regs |= vi->forbidden_hard_regs;
+ if (vi->allowed_hard_regs) {
+ if (out->allowed_hard_regs) {
+ u32 both = out->allowed_hard_regs & vi->allowed_hard_regs;
+ if (!both) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(f->c, loc,
+ "opt asm: conflicting allowed register sets");
+ }
+ out->allowed_hard_regs = both;
+ } else {
+ out->allowed_hard_regs = vi->allowed_hard_regs;
+ }
+ }
if (vi->tied_hard_reg >= 0) out->tied_hard_reg = vi->tied_hard_reg;
}
if (out->first == (u32)~0u) out->first = 0;
@@ -745,6 +792,7 @@ static void opt_init_preg_info_from_ranges(Func* f,
for (PReg v = 0; v < opt_reg_count(f); ++v) {
i32 tied = old ? old[v].tied_hard_reg : -1;
u32 forbidden = old ? old[v].forbidden_hard_regs : 0;
+ u32 allowed = old ? old[v].allowed_hard_regs : 0;
u32 clobbered = old ? old[v].clobbered_hard_regs : 0;
u32 old_frequency = old ? old[v].frequency : 0;
i8 pref = old ? old[v].preferred_hard_reg : (i8)-1;
@@ -756,6 +804,7 @@ static void opt_init_preg_info_from_ranges(Func* f,
vi->alloc_kind = OPT_ALLOC_NONE;
vi->cls = opt_reg_cls(f, v);
vi->forbidden_hard_regs = forbidden;
+ vi->allowed_hard_regs = allowed;
vi->clobbered_hard_regs = clobbered;
if (!ranges || v == PREG_NONE || v == 0 ||
ranges->first_range_by_preg[v] == OPT_RANGE_NONE) {
@@ -1059,6 +1108,15 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges,
"opt regalloc: fixed hard reg %u is clobbered",
(unsigned)fixed);
}
+ if (fixed >= 32 ||
+ (gi.allowed_hard_regs &&
+ (gi.allowed_hard_regs & (1u << fixed)) == 0)) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(f->c, loc,
+ "opt regalloc: fixed hard reg %u violates asm "
+ "constraint",
+ (unsigned)fixed);
+ }
u32 bit = hard_loc_bit(cls, fixed);
if (fixed >= 32 || alloc_group_conflicts_bit(a, bit)) {
SrcLoc loc = {0, 0, 0};
@@ -1075,6 +1133,8 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges,
for (u32 r = 0; r < f->opt_hard_reg_count[cls]; ++r) {
Reg hr = f->opt_hard_regs[cls][r];
if (hr >= 32) continue;
+ if (gi.allowed_hard_regs && (gi.allowed_hard_regs & (1u << hr)) == 0)
+ continue;
if (gi.forbidden_hard_regs & (1u << hr)) continue;
u32 bit = hard_loc_bit(cls, hr);
if (alloc_group_conflicts_bit(a, bit)) continue;
@@ -1085,6 +1145,24 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges,
best_score = score;
}
}
+ if (gi.allowed_hard_regs) {
+ for (Reg hr = 0; hr < 32; ++hr) {
+ const CGPhysRegInfo* pi;
+ if ((gi.allowed_hard_regs & (1u << hr)) == 0) continue;
+ if (hard_available(f, cls, hr)) continue;
+ if (gi.forbidden_hard_regs & (1u << hr)) continue;
+ pi = phys_info_for(f, cls, hr);
+ if (!pi || (pi->flags & CG_REG_RESERVED)) continue;
+ u32 bit = hard_loc_bit(cls, hr);
+ if (alloc_group_conflicts_bit(a, bit)) continue;
+ u32 score = hard_reg_alloc_score(f, a, vi, hr);
+ if (!found || score < best_score) {
+ found = 1;
+ best = hr;
+ best_score = score;
+ }
+ }
+ }
/* Also consider the preferred hard reg if it's outside the standard
* allocable set (e.g. x0 on aa64: reserved as the ABI ret reg, not in
* aa_int_allocable). Used by apply_abi_aliasing_hints to let an IR_CALL
@@ -1102,49 +1180,55 @@ static void opt_assign_ranges(Func* f, const OptLiveRangeSet* ranges,
is_caller_saved(f, cls, (Reg)vi->preferred_hard_reg))) {
Reg hint = (Reg)vi->preferred_hard_reg;
int already_tried = 0;
- for (u32 r = 0; r < f->opt_hard_reg_count[cls]; ++r) {
- if (f->opt_hard_regs[cls][r] == hint) {
- already_tried = 1;
- break;
+ if (hint < 32 &&
+ (!gi.allowed_hard_regs || (gi.allowed_hard_regs & (1u << hint)))) {
+ for (u32 r = 0; r < f->opt_hard_reg_count[cls]; ++r) {
+ if (f->opt_hard_regs[cls][r] == hint) {
+ already_tried = 1;
+ break;
+ }
}
- }
- if (!already_tried && hint < 32 &&
- !(gi.forbidden_hard_regs & (1u << hint))) {
- u32 bit = hard_loc_bit(cls, hint);
- int hint_safe = !alloc_group_conflicts_bit(a, bit);
- /* The bitmap conflict can be falsely positive when an
- * already-assigned PReg ends exactly where v begins — the
- * swap-friendly pattern like `sub x0, x21, x0`, where the previous
- * call's result occupies x0 and the sub both reads it and writes
- * the new value. Fall back to a precise per-PReg interference check
- * that allows the unit-length overlap (same rule used by
- * opt_coalesce_ranges for moves). */
- if (!hint_safe) {
- int real_conflict = 0;
- for (PReg u = 1; u < opt_reg_count(f); ++u) {
- if (u == v) continue;
- const OptPRegInfo* ui = &f->preg_info[u];
- if (ui->alloc_kind != OPT_ALLOC_HARD) continue;
- if (ui->hard_reg != hint) continue;
- if (opt_ranges_overlap_kind(ranges, u, v) >= 2) {
- real_conflict = 1;
- break;
+ if (!already_tried && !(gi.forbidden_hard_regs & (1u << hint))) {
+ u32 bit = hard_loc_bit(cls, hint);
+ int hint_safe = !alloc_group_conflicts_bit(a, bit);
+ /* The bitmap conflict can be falsely positive when an
+ * already-assigned PReg ends exactly where v begins — the
+ * swap-friendly pattern like `sub x0, x21, x0`, where the previous
+ * call's result occupies x0 and the sub both reads it and writes
+ * the new value. Fall back to a precise per-PReg interference check
+ * that allows the unit-length overlap (same rule used by
+ * opt_coalesce_ranges for moves). */
+ if (!hint_safe) {
+ int real_conflict = 0;
+ for (PReg u = 1; u < opt_reg_count(f); ++u) {
+ if (u == v) continue;
+ const OptPRegInfo* ui = &f->preg_info[u];
+ if (ui->alloc_kind != OPT_ALLOC_HARD) continue;
+ if (ui->hard_reg != hint) continue;
+ if (opt_ranges_overlap_kind(ranges, u, v) >= 2) {
+ real_conflict = 1;
+ break;
+ }
}
+ if (!real_conflict) hint_safe = 1;
}
- if (!real_conflict) hint_safe = 1;
- }
- if (hint_safe) {
- u32 score = hard_reg_alloc_score(f, a, vi, hint);
- if (!found || score < best_score) {
- found = 1;
- best = hint;
- best_score = score;
+ if (hint_safe) {
+ u32 score = hard_reg_alloc_score(f, a, vi, hint);
+ if (!found || score < best_score) {
+ found = 1;
+ best = hint;
+ best_score = score;
+ }
}
}
}
}
if (found) {
alloc_assign_group_hard(f, a, ranges, v, best);
+ } else if (gi.allowed_hard_regs) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(f->c, loc,
+ "opt regalloc: no hard register satisfies asm constraint");
} else {
alloc_assign_group_stack(f, a, ranges, v);
}
diff --git a/src/opt/pass_machinize.c b/src/opt/pass_machinize.c
@@ -22,12 +22,30 @@ static void asm_prepare_constraints(Func* f, NativeTarget* target,
if (aux->nout && !aux->out_fixed_regs) {
aux->out_fixed_regs = arena_array(f->arena, i32, aux->nout);
aux->out_fixed_cls = arena_zarray(f->arena, u8, aux->nout);
- for (u32 i = 0; i < aux->nout; ++i) aux->out_fixed_regs[i] = -1;
}
if (aux->nin && !aux->in_fixed_regs) {
aux->in_fixed_regs = arena_array(f->arena, i32, aux->nin);
aux->in_fixed_cls = arena_zarray(f->arena, u8, aux->nin);
- for (u32 i = 0; i < aux->nin; ++i) aux->in_fixed_regs[i] = -1;
+ }
+ if (aux->nout && !aux->out_allowed_masks) {
+ aux->out_allowed_masks = arena_zarray(f->arena, u32, aux->nout);
+ aux->out_allowed_cls = arena_zarray(f->arena, u8, aux->nout);
+ }
+ if (aux->nin && !aux->in_allowed_masks) {
+ aux->in_allowed_masks = arena_zarray(f->arena, u32, aux->nin);
+ aux->in_allowed_cls = arena_zarray(f->arena, u8, aux->nin);
+ }
+ for (u32 i = 0; i < aux->nout; ++i) {
+ aux->out_fixed_regs[i] = -1;
+ aux->out_fixed_cls[i] = 0;
+ aux->out_allowed_masks[i] = 0;
+ aux->out_allowed_cls[i] = 0;
+ }
+ for (u32 i = 0; i < aux->nin; ++i) {
+ aux->in_fixed_regs[i] = -1;
+ aux->in_fixed_cls[i] = 0;
+ aux->in_allowed_masks[i] = 0;
+ aux->in_allowed_cls[i] = 0;
}
for (u32 i = 0; i < aux->nclob; ++i) {
Reg r;
@@ -40,25 +58,51 @@ static void asm_prepare_constraints(Func* f, NativeTarget* target,
NativeAsmRegPin pin;
NativeAsmRegPinStatus st = native_asm_resolve_pin(target, aux->outs[i].reg,
aux->outs[i].str, &pin);
- if (st == NATIVE_ASM_REG_PIN_ABSENT) continue;
- if (st != NATIVE_ASM_REG_PIN_OK) {
+ if (st == NATIVE_ASM_REG_PIN_OK) {
+ aux->out_fixed_regs[i] = (i32)pin.reg;
+ aux->out_fixed_cls[i] = (u8)pin.cls;
+ continue;
+ }
+ if (st != NATIVE_ASM_REG_PIN_ABSENT) {
compiler_panic(f->c, (SrcLoc){0, 0, 0}, "opt asm: %s",
native_asm_pin_status_message(st));
}
- aux->out_fixed_regs[i] = (i32)pin.reg;
- aux->out_fixed_cls[i] = (u8)pin.cls;
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(target, aux->outs[i].str, &info)) {
+ if (info.allowed_mask) {
+ aux->out_allowed_masks[i] = info.allowed_mask;
+ aux->out_allowed_cls[i] = (u8)info.cls;
+ }
+ if (info.fixed_reg != REG_NONE) {
+ aux->out_fixed_regs[i] = (i32)info.fixed_reg;
+ aux->out_fixed_cls[i] = (u8)info.cls;
+ }
+ }
}
for (u32 i = 0; i < aux->nin; ++i) {
NativeAsmRegPin pin;
NativeAsmRegPinStatus st =
native_asm_resolve_pin(target, aux->ins[i].reg, aux->ins[i].str, &pin);
- if (st == NATIVE_ASM_REG_PIN_ABSENT) continue;
- if (st != NATIVE_ASM_REG_PIN_OK) {
+ if (st == NATIVE_ASM_REG_PIN_OK) {
+ aux->in_fixed_regs[i] = (i32)pin.reg;
+ aux->in_fixed_cls[i] = (u8)pin.cls;
+ continue;
+ }
+ if (st != NATIVE_ASM_REG_PIN_ABSENT) {
compiler_panic(f->c, (SrcLoc){0, 0, 0}, "opt asm: %s",
native_asm_pin_status_message(st));
}
- aux->in_fixed_regs[i] = (i32)pin.reg;
- aux->in_fixed_cls[i] = (u8)pin.cls;
+ NativeAsmConstraintInfo info;
+ if (native_asm_constraint_reg_info(target, aux->ins[i].str, &info)) {
+ if (info.allowed_mask) {
+ aux->in_allowed_masks[i] = info.allowed_mask;
+ aux->in_allowed_cls[i] = (u8)info.cls;
+ }
+ if (info.fixed_reg != REG_NONE) {
+ aux->in_fixed_regs[i] = (i32)info.fixed_reg;
+ aux->in_fixed_cls[i] = (u8)info.cls;
+ }
+ }
}
}
diff --git a/test/parse/cases/cg_native_inline_asm_machine_constraints.c b/test/parse/cases/cg_native_inline_asm_machine_constraints.c
@@ -0,0 +1,19 @@
+int test_main(void) {
+#if defined(__aarch64__)
+ double x = 1.0;
+ double y = 2.0;
+ __asm__ volatile("" : : "x"(x), "y"(y));
+ return 42;
+#elif defined(__riscv) && __riscv_xlen == 64
+ int a = 20;
+ int b = 22;
+ int out = 0;
+ float fa = 1.0f;
+ float fb = 2.0f;
+ __asm__ volatile("add %0, %1, %2" : "=cr"(out) : "cr"(a), "cr"(b));
+ __asm__ volatile("" : : "cf"(fa), "cf"(fb));
+ return out == 42 ? 42 : 1;
+#else
+ return 42;
+#endif
+}
diff --git a/test/parse/cases/cg_native_inline_asm_machine_constraints.expected b/test/parse/cases/cg_native_inline_asm_machine_constraints.expected
@@ -0,0 +1 @@
+42
diff --git a/test/parse/cases/cg_x64_inline_asm_machine_constraints.c b/test/parse/cases/cg_x64_inline_asm_machine_constraints.c
@@ -0,0 +1,17 @@
+int test_main(void) {
+#if defined(__x86_64__)
+ int aout = 0;
+ int bout = 0;
+
+ __asm__ volatile("movl %1, %0\n\taddl %2, %0"
+ : "=a"(aout)
+ : "c"(20), "d"(22));
+ __asm__ volatile("movl %1, %0\n\taddl %2, %0"
+ : "=b"(bout)
+ : "S"(17), "D"(25));
+
+ return (aout == 42 && bout == 42) ? 42 : 1;
+#else
+ return 42;
+#endif
+}
diff --git a/test/parse/cases/cg_x64_inline_asm_machine_constraints.expected b/test/parse/cases/cg_x64_inline_asm_machine_constraints.expected
@@ -0,0 +1 @@
+42