kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 4931d48851d32a219a48d75854daf88be384e4e1
parent 056fc9ea14811f46112837b87a5a7de5630bce80
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed,  3 Jun 2026 22:01:49 -0700

rv32 lowering fixes

Diffstat:
Msrc/arch/riscv/native.c | 97++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Msrc/cg/arith.c | 1+
Msrc/opt/cg_ir_lower.c | 31++++++++++++++++++++++---------
3 files changed, 107 insertions(+), 22 deletions(-)

diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c @@ -3661,6 +3661,61 @@ static Reg rv_asm_native_mem_base(RvNativeTarget* a, SrcLoc loc, NativeLoc src, return dst; } +static u32 rv_asm_reg_mem_size(RvNativeTarget* a, NativeAllocClass cls, + KitCgTypeId type) { + u32 sz = native_type_size(&a->base, type); + if (cls == NATIVE_REG_INT && sz > a->variant->ptr_bytes) + sz = a->variant->ptr_bytes; + return sz; +} + +static Reg rv_asm_stage_reg(RvNativeTarget* a, SrcLoc loc, NativeAllocClass cls, + u32* nint, u32* nfp) { + static const Reg int_regs[] = {RV_TMP2, RV_TMP3}; + static const Reg fp_regs[] = {RV_FTMP0, RV_FTMP1}; + if (cls == NATIVE_REG_FP) { + if (*nfp >= (u32)(sizeof fp_regs / sizeof fp_regs[0])) + rv_asm_panic_at(a->base.c, loc, "too many staged fp asm operands"); + return fp_regs[(*nfp)++]; + } + if (*nint >= (u32)(sizeof int_regs / sizeof int_regs[0])) + rv_asm_panic_at(a->base.c, loc, "too many staged integer asm operands"); + return int_regs[(*nint)++]; +} + +static void rv_asm_load_loc_to_reg(RvNativeTarget* a, SrcLoc loc, NativeLoc src, + NativeLoc dst) { + NativeTarget* t = &a->base; + NativeAllocClass cls = (NativeAllocClass)dst.cls; + if (src.kind == NATIVE_LOC_REG) { + if (src.v.reg != dst.v.reg || src.cls != dst.cls) t->move(t, dst, src); + return; + } + if (src.kind == NATIVE_LOC_IMM) { + if (cls != NATIVE_REG_INT) + rv_asm_panic_at(t->c, loc, + "floating-point immediate asm input is unsupported"); + t->load_imm(t, dst, src.v.imm); + return; + } + rv_emit_mem(a, 1, dst, rv_asm_loc_to_addr(a, loc, src), + native_mem_for_type(t, dst.type, + rv_asm_reg_mem_size(a, cls, dst.type))); +} + +static void rv_asm_store_reg_to_loc(RvNativeTarget* a, SrcLoc loc, NativeLoc dst, + NativeLoc src) { + NativeTarget* t = &a->base; + NativeAllocClass cls = (NativeAllocClass)src.cls; + if (dst.kind == NATIVE_LOC_REG) { + if (dst.v.reg != src.v.reg || dst.cls != src.cls) t->move(t, dst, src); + return; + } + rv_emit_mem(a, 0, src, rv_asm_loc_to_addr(a, loc, dst), + native_mem_for_type(t, src.type, + rv_asm_reg_mem_size(a, cls, src.type))); +} + static void rv_asm_bind_native(RvNativeTarget* a, SrcLoc loc, Operand* out, const char* constraint, KitCgTypeId type, NativeLoc src, u32* ntmp) { @@ -3694,13 +3749,23 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl, SrcLoc loc = a->func ? a->func->loc : (SrcLoc){0, 0, 0}; Operand* bound_outs = nout ? arena_zarray(c->tu, Operand, nout) : NULL; Operand* bound_ins = nin ? arena_zarray(c->tu, Operand, nin) : NULL; - u32 ntmp = 0, i; + u8* staged_outs = nout ? arena_zarray(c->tu, u8, nout) : NULL; + u32 ntmp = 0, nstage_int = 0, nstage_fp = 0, i; Rv64Asm* asmh; for (i = 0; i < nout; ++i) { + const char* body = native_asm_constraint_body(outs[i].str); KitCgTypeId type = outs[i].type ? outs[i].type : out_locs[i].type; - rv_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, out_locs[i], - &ntmp); + NativeLoc outloc = out_locs[i]; + if ((body[0] == 'r' || body[0] == 'f') && outloc.kind != NATIVE_LOC_REG) { + NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT; + Reg r = rv_asm_stage_reg(a, loc, cls, &nstage_int, &nstage_fp); + outloc = native_loc_reg(type, cls, r); + staged_outs[i] = 1u; + if (outs[i].dir == KIT_CG_ASM_INOUT) + rv_asm_load_loc_to_reg(a, loc, out_locs[i], outloc); + } + rv_asm_bind_native(a, loc, &bound_outs[i], outs[i].str, type, outloc, &ntmp); } for (i = 0; i < nin; ++i) { const char* body = native_asm_constraint_body(ins[i].str); @@ -3715,16 +3780,11 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl, } type = ins[i].type ? ins[i].type : in_locs[i].type; inloc = in_locs[i]; - /* A register-constrained input that lives in a frame slot (address-taken - * local) must be loaded into a reserved scratch first. */ - if (body[0] == 'r' && inloc.kind != NATIVE_LOC_REG) { - Reg r; - if (ntmp >= 2u) rv_asm_panic_at(c, loc, "too many memory asm operands"); - r = (ntmp == 0u) ? RV_TMP0 : RV_TMP1; - ntmp++; - inloc = native_loc_reg(type, NATIVE_REG_INT, r); - rv_emit_mem(a, 1, inloc, rv_asm_loc_to_addr(a, loc, in_locs[i]), - native_mem_for_type(t, type, native_type_size(t, type))); + if ((body[0] == 'r' || body[0] == 'f') && inloc.kind != NATIVE_LOC_REG) { + NativeAllocClass cls = (body[0] == 'f') ? NATIVE_REG_FP : NATIVE_REG_INT; + Reg r = rv_asm_stage_reg(a, loc, cls, &nstage_int, &nstage_fp); + inloc = native_loc_reg(type, cls, r); + rv_asm_load_loc_to_reg(a, loc, in_locs[i], inloc); } rv_asm_bind_native(a, loc, &bound_ins[i], ins[i].str, type, inloc, &ntmp); } @@ -3737,6 +3797,17 @@ static void rv_asm_block_native(NativeTarget* t, const char* tmpl, nclob); rv64_asm_run_template(asmh, t->mc, tmpl); rv64_asm_close(asmh); + + for (i = 0; i < nout; ++i) { + NativeAllocClass cls; + NativeLoc src; + if (!staged_outs || !staged_outs[i]) continue; + if (bound_outs[i].kind != RV64_INLINE_OPK_REG) continue; + cls = bound_outs[i].pad[0] == RV64_INLINE_OPCLS_FP ? NATIVE_REG_FP + : NATIVE_REG_INT; + src = native_loc_reg(bound_outs[i].type, cls, (Reg)bound_outs[i].v.local); + rv_asm_store_reg_to_loc(a, loc, out_locs[i], src); + } } /* file_scope_asm + finalize are shared (cg/native_asm.h). */ diff --git a/src/cg/arith.c b/src/cg/arith.c @@ -1100,6 +1100,7 @@ void kit_cg_int_unop(KitCg* g, KitCgIntUnOp op, uint32_t flags) { api_push(g, api_make_sv(orl, i32)); kit_cg_push_int(g, 0, i32); api_cg_cmp(g, CMP_EQ); + api_cg_convert_kind(g, ty, CV_ZEXT); return; } } diff --git a/src/opt/cg_ir_lower.c b/src/opt/cg_ir_lower.c @@ -815,11 +815,18 @@ static void lower_intrinsic(CgIrLower* l, Inst* out, const CgIrInst* in) { for (u32 i = 0; i < src->narg; ++i) aux->args[i] = lower_operand_value(l, &src->args[i], in->loc); if (src->ndst) { - out->ndefs = src->ndst; - out->defs = arena_array(l->f->arena, Val, src->ndst); + u32 ndefs = 0; for (u32 i = 0; i < src->ndst; ++i) - out->defs[i] = aux->dsts[i].kind == OPK_REG ? aux->dsts[i].v.reg : 0; - out->def = out->defs[0]; + if (aux->dsts[i].kind == OPK_REG) ++ndefs; + if (ndefs) { + u32 d = 0; + out->ndefs = ndefs; + out->defs = arena_array(l->f->arena, Val, ndefs); + for (u32 i = 0; i < src->ndst; ++i) + if (aux->dsts[i].kind == OPK_REG) + out->defs[d++] = aux->dsts[i].v.reg; + out->def = out->defs[0]; + } out->type = aux->dsts[0].type; } } @@ -847,12 +854,18 @@ static void lower_asm(CgIrLower* l, Inst* out, const CgIrInst* in) { for (u32 i = 0; i < src->nin; ++i) aux->in_ops[i] = lower_operand_value(l, &src->in_ops[i], in->loc); if (src->nout) { - out->ndefs = src->nout; - out->defs = arena_array(l->f->arena, Val, src->nout); + u32 ndefs = 0; for (u32 i = 0; i < src->nout; ++i) - out->defs[i] = - aux->out_ops[i].kind == OPK_REG ? aux->out_ops[i].v.reg : 0; - out->def = out->defs[0]; + if (aux->out_ops[i].kind == OPK_REG) ++ndefs; + if (ndefs) { + u32 d = 0; + out->ndefs = ndefs; + out->defs = arena_array(l->f->arena, Val, ndefs); + for (u32 i = 0; i < src->nout; ++i) + if (aux->out_ops[i].kind == OPK_REG) + out->defs[d++] = aux->out_ops[i].v.reg; + out->def = out->defs[0]; + } out->type = aux->out_ops[0].type; } }