commit 4931d48851d32a219a48d75854daf88be384e4e1
parent 056fc9ea14811f46112837b87a5a7de5630bce80
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 3 Jun 2026 22:01:49 -0700
rv32 lowering fixes
Diffstat:
3 files changed, 107 insertions(+), 22 deletions(-)
diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c
@@ -3661,6 +3661,61 @@ static Reg rv_asm_native_mem_base(RvNativeTarget* a, SrcLoc loc, NativeLoc src,
return dst;
}
+static u32 rv_asm_reg_mem_size(RvNativeTarget* a, NativeAllocClass cls,
+ KitCgTypeId type) {
+ u32 sz = native_type_size(&a->base, type);
+ if (cls == NATIVE_REG_INT && sz > a->variant->ptr_bytes)
+ sz = a->variant->ptr_bytes;
+ return sz;
+}
+
+static Reg rv_asm_stage_reg(RvNativeTarget* a, SrcLoc loc, NativeAllocClass cls,
+ u32* nint, u32* nfp) {
+ static const Reg int_regs[] = {RV_TMP2, RV_TMP3};
+ static const Reg fp_regs[] = {RV_FTMP0, RV_FTMP1};
+ if (cls == NATIVE_REG_FP) {
+ if (*nfp >= (u32)(sizeof fp_regs / sizeof fp_regs[0]))
+ rv_asm_panic_at(a->base.c, loc, "too many staged fp asm operands");
+ return fp_regs[(*nfp)++];
+ }
+ if (*nint >= (u32)(sizeof int_regs / sizeof int_regs[0]))
+ rv_asm_panic_at(a->base.c, loc, "too many staged integer asm operands");
+ return int_regs[(*nint)++];
+}
+
+static void rv_asm_load_loc_to_reg(RvNativeTarget* a, SrcLoc loc, NativeLoc src,
+ NativeLoc dst) {
+ NativeTarget* t = &a->base;
+ NativeAllocClass cls = (NativeAllocClass)dst.cls;
+ if (src.kind == NATIVE_LOC_REG) {
+ if (src.v.reg != dst.v.reg || src.cls != dst.cls) t->move(t, dst, src);
+ return;
+ }
+ if (src.kind == NATIVE_LOC_IMM) {
+ if (cls != NATIVE_REG_INT)
+ rv_asm_panic_at(t->c, loc,
+ "floating-point immediate asm input is unsupported");
+ t->load_imm(t, dst, src.v.imm);
+ return;
+ }
+ rv_emit_mem(a, 1, dst, rv_asm_loc_to_addr(a, loc, src),
+ native_mem_for_type(t, dst.type,
+ rv_asm_reg_mem_size(a, cls, dst.type)));
+}
+
+static void rv_asm_store_reg_to_loc(RvNativeTarget* a, SrcLoc loc, NativeLoc dst,
+ NativeLoc src) {
+ NativeTarget* t = &a->base;
+ NativeAllocClass cls = (NativeAllocClass)src.cls;
+ if (dst.kind == NATIVE_LOC_REG) {
+ if (dst.v.reg != src.v.reg || dst.cls != src.cls) t->move(t, dst, src);
+ return;
+ }
+ rv_emit_mem(a, 0, src, rv_asm_loc_to_addr(a, loc, dst),
+ native_mem_for_type(t, src.type,
+ rv_asm_reg_mem_size(a, cls, src.type)));
+}
+
static void rv_asm_bind_native(RvNativeTarget* a, SrcLoc loc, Operand* out,
const char* constraint, KitCgTypeId type,
NativeLoc src, u32* ntmp) {
@@ -3694,13 +3749,23 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl,
SrcLoc loc = a->func ? a->func->loc : (SrcLoc){0, 0, 0};
Operand* bound_outs = nout ? arena_zarray(c->tu, Operand, nout) : NULL;
Operand* bound_ins = nin ? arena_zarray(c->tu, Operand, nin) : NULL;
- u32 ntmp = 0, i;
+ u8* staged_outs = nout ? arena_zarray(c->tu, u8, nout) : NULL;
+ u32 ntmp = 0, nstage_int = 0, nstage_fp = 0, i;
Rv64Asm* asmh;
for (i = 0; i < nout; ++i) {
+ const char* body = native_asm_constraint_body(outs[i].str);
KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type;
- rv_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, out_locs[i],
- &ntmp);
+ NativeLoc outloc = out_locs[i];
+ if ((body[0] == 'r' || body[0] == 'f') && outloc.kind != NATIVE_LOC_REG) {
+ NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT;
+ Reg r = rv_asm_stage_reg(a, loc, cls, &nstage_int, &nstage_fp);
+ outloc = native_loc_reg(type, cls, r);
+ staged_outs[i] = 1u;
+ if (outs[i].dir == KIT_CG_ASM_INOUT)
+ rv_asm_load_loc_to_reg(a, loc, out_locs[i], outloc);
+ }
+ rv_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, outloc, &ntmp);
}
for (i = 0; i < nin; ++i) {
const char* body = native_asm_constraint_body(ins[i].str);
@@ -3715,16 +3780,11 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl,
}
type = ins[i].type ? ins[i].type : in_locs[i].type;
inloc = in_locs[i];
- /* A register-constrained input that lives in a frame slot (address-taken
- * local) must be loaded into a reserved scratch first. */
- if (body[0] == 'r' && inloc.kind != NATIVE_LOC_REG) {
- Reg r;
- if (ntmp >= 2u) rv_asm_panic_at(c, loc, "too many memory asm operands");
- r = (ntmp == 0u) ? RV_TMP0 : RV_TMP1;
- ntmp++;
- inloc = native_loc_reg(type, NATIVE_REG_INT, r);
- rv_emit_mem(a, 1, inloc, rv_asm_loc_to_addr(a, loc, in_locs[i]),
- native_mem_for_type(t, type, native_type_size(t, type)));
+ if ((body[0] == 'r' || body[0] == 'f') && inloc.kind != NATIVE_LOC_REG) {
+ NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT;
+ Reg r = rv_asm_stage_reg(a, loc, cls, &nstage_int, &nstage_fp);
+ inloc = native_loc_reg(type, cls, r);
+ rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc);
}
rv_asm_bind_native(a, loc, &bound_ins[i], ins[i].str, type, inloc, &ntmp);
}
@@ -3737,6 +3797,17 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl,
nclob);
rv64_asm_run_template(asmh, t->mc, tmpl);
rv64_asm_close(asmh);
+
+ for (i = 0; i < nout; ++i) {
+ NativeAllocClass cls;
+ NativeLoc src;
+ if (!staged_outs || !staged_outs[i]) continue;
+ if (bound_outs[i].kind != RV64_INLINE_OPK_REG) continue;
+ cls = bound_outs[i].pad[0] == RV64_INLINE_OPCLS_FP ? NATIVE_REG_FP
+ : NATIVE_REG_INT;
+ src = native_loc_reg(bound_outs[i].type, cls, (Reg)bound_outs[i].v.local);
+ rv_asm_store_reg_to_loc(a, loc, out_locs[i], src);
+ }
}
/* file_scope_asm + finalize are shared (cg/native_asm.h). */
diff --git a/src/cg/arith.c b/src/cg/arith.c
@@ -1100,6 +1100,7 @@ void kit_cg_int_unop(KitCg* g, KitCgIntUnOp op, uint32_t flags) {
api_push(g, api_make_sv(orl, i32));
kit_cg_push_int(g, 0, i32);
api_cg_cmp(g, CMP_EQ);
+ api_cg_convert_kind(g, ty, CV_ZEXT);
return;
}
}
diff --git a/src/opt/cg_ir_lower.c b/src/opt/cg_ir_lower.c
@@ -815,11 +815,18 @@ static void lower_intrinsic(CgIrLower* l, Inst* out, const CgIrInst* in) {
for (u32 i = 0; i < src->narg; ++i)
aux->args[i] = lower_operand_value(l, &src->args[i], in->loc);
if (src->ndst) {
- out->ndefs = src->ndst;
- out->defs = arena_array(l->f->arena, Val, src->ndst);
+ u32 ndefs = 0;
for (u32 i = 0; i < src->ndst; ++i)
- out->defs[i] = aux->dsts[i].kind == OPK_REG ? aux->dsts[i].v.reg : 0;
- out->def = out->defs[0];
+ if (aux->dsts[i].kind == OPK_REG) ++ndefs;
+ if (ndefs) {
+ u32 d = 0;
+ out->ndefs = ndefs;
+ out->defs = arena_array(l->f->arena, Val, ndefs);
+ for (u32 i = 0; i < src->ndst; ++i)
+ if (aux->dsts[i].kind == OPK_REG)
+ out->defs[d++] = aux->dsts[i].v.reg;
+ out->def = out->defs[0];
+ }
out->type = aux->dsts[0].type;
}
}
@@ -847,12 +854,18 @@ static void lower_asm(CgIrLower* l, Inst* out, const CgIrInst* in) {
for (u32 i = 0; i < src->nin; ++i)
aux->in_ops[i] = lower_operand_value(l, &src->in_ops[i], in->loc);
if (src->nout) {
- out->ndefs = src->nout;
- out->defs = arena_array(l->f->arena, Val, src->nout);
+ u32 ndefs = 0;
for (u32 i = 0; i < src->nout; ++i)
- out->defs[i] =
- aux->out_ops[i].kind == OPK_REG ? aux->out_ops[i].v.reg : 0;
- out->def = out->defs[0];
+ if (aux->out_ops[i].kind == OPK_REG) ++ndefs;
+ if (ndefs) {
+ u32 d = 0;
+ out->ndefs = ndefs;
+ out->defs = arena_array(l->f->arena, Val, ndefs);
+ for (u32 i = 0; i < src->nout; ++i)
+ if (aux->out_ops[i].kind == OPK_REG)
+ out->defs[d++] = aux->out_ops[i].v.reg;
+ out->def = out->defs[0];
+ }
out->type = aux->out_ops[0].type;
}
}