kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 6f48bfde8f810cd7e705dc9c31f7391da7607acc
parent e554263a21a3c16282604bbdaad3bee5283f039c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  2 Jun 2026 03:54:03 -0700

cg: flow i128/f128 as VALUEs, collapse wide16 special paths (Track 7.3)

Diffstat:
Msrc/cg/call.c | 32++++++++++++--------------------
Msrc/cg/memory.c | 87+++++++------------------------------------------------------------------------
2 files changed, 19 insertions(+), 100 deletions(-)

diff --git a/src/cg/call.c b/src/cg/call.c @@ -41,7 +41,14 @@ static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg, return r; } CfreeCgTypeId src_ty = api_sv_type(arg); - Operand op = api_force_local_unless_imm(g, arg, src_ty); + Operand op; + /* A 16-byte scalar immediate (an i128 small constant) only carries 64 bits in + * op.v.imm; materialize it into both sign-extended lanes so it flows as an + * ordinary 16-byte value rather than load_imm'ing only the low lane. */ + if (api_sv_op_is(arg, OPK_IMM) && api_is_wide16_scalar_type(g->c, ty)) { + *arg = api_make_wide16_int_const(g, arg->op.v.imm, ty); + } + op = api_force_local_unless_imm(g, arg, src_ty); if (op.kind == OPK_LOCAL && api_unalias_type(g->c, op.type) == api_unalias_type(g->c, ty)) { return op.v.local; @@ -50,23 +57,7 @@ static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg, CGLocal r = api_alloc_temp_local(g, ty); Operand dst = api_op_local(r, ty); if (op.kind == OPK_IMM) { - if (api_is_wide16_scalar_type(g->c, ty)) { - /* A 16-byte scalar immediate (an i128 small constant) only carries - * 64 bits in op.v.imm; load_imm would leave the high lane as stack - * garbage. Write both lanes, sign-extending into the high half. */ - u8 bytes[16]; - u64 lo = (u64)op.v.imm; - u64 hi = (op.v.imm < 0) ? ~(u64)0 : 0; - for (u32 i = 0; i < 8; ++i) { - u32 lo_idx = g->c->target.big_endian ? 15u - i : i; - u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; - bytes[lo_idx] = (u8)(lo >> (i * 8u)); - bytes[hi_idx] = (u8)(hi >> (i * 8u)); - } - api_store_f128_bytes(g, r, ty, bytes); - } else { - g->target->load_imm(g->target, dst, op.v.imm); - } + g->target->load_imm(g->target, dst, op.v.imm); } else if (op.kind == OPK_LOCAL) { g->target->copy(g->target, dst, op); } else { @@ -98,8 +89,9 @@ void api_release_call_args(CfreeCg* g, CGLocal* args, u32 nargs) { void api_push_call_result(CfreeCg* g, CGLocal result, CfreeCgTypeId ret_ty) { Operand op = api_op_local(result, ret_ty); - if (cg_type_is_aggregate(g->c, ret_ty) || - api_is_wide16_scalar_type(g->c, ret_ty)) { + /* An aggregate result is a PLACE (it is addressed/copied, never a scalar + * VALUE); i128/f128 are scalar VALUEs and flow like any other result. */ + if (cg_type_is_aggregate(g->c, ret_ty)) { api_push(g, api_make_lv(op, ret_ty)); } else { api_push(g, api_make_sv(op, ret_ty)); diff --git a/src/cg/memory.c b/src/cg/memory.c @@ -258,12 +258,6 @@ void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access) { return; } - /* Wide-16 scalar place: keep the addressable storage as the value. */ - if (!is_bitfield && api_is_wide16_scalar_type(g->c, ty)) { - api_push(g, base); - return; - } - /* Resolve the place into a single backend memop operand. */ if (!api_operand_can_address(&base.op)) { CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base)); @@ -441,80 +435,13 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) { if (!is_bitfield) api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); - /* Wide-16 scalar store. */ - if (!is_bitfield && api_is_wide16_scalar_type(g->c, ty)) { - if (base.source_local != CFREE_CG_LOCAL_NONE) { - api_local_const_clear(api_local_from_handle(g, base.source_local)); - } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL || - (access.flags & CFREE_CG_MEM_VOLATILE)) { - api_local_const_memory_boundary(g); - } - if (api_is_lvalue_sv(&rv)) { - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Operand dst_addr; - Operand src_addr; - int dst_addr_owned = 0; - int src_addr_owned = 0; - AggregateAccess agg; - if (base.op.kind == OPK_LOCAL) { - dst_addr = base.op; - } else { - dst_addr = api_lvalue_addr(g, &base, ptr_ty); - dst_addr_owned = 1; - } - if (rv.op.kind == OPK_LOCAL) { - src_addr = rv.op; - } else { - src_addr = api_lvalue_addr(g, &rv, ptr_ty); - src_addr_owned = 1; - } - memset(&agg, 0, sizeof agg); - agg.size = 16; - agg.align = access.align ? access.align : 16; - T->copy_bytes(T, dst_addr, src_addr, agg); - if (dst_addr_owned) { - api_release_temp_local(g, dst_addr.kind == OPK_INDIRECT - ? dst_addr.v.ind.base - : dst_addr.v.local); - } - if (src_addr_owned) api_release_temp_local(g, src_addr.v.local); - } else if (rv.op.kind == OPK_IMM) { - u8 bytes[16]; - u64 lo = (u64)rv.op.v.imm; - u64 hi = rv.op.v.imm < 0 ? ~(u64)0 : 0; - memset(bytes, 0, sizeof bytes); - for (u32 i = 0; i < 8; ++i) { - u32 lo_idx = g->c->target.big_endian ? 15u - i : i; - u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; - bytes[lo_idx] = (u8)(lo >> (i * 8u)); - bytes[hi_idx] = (u8)(hi >> (i * 8u)); - } - if (base.op.kind == OPK_LOCAL) { - api_store_f128_bytes(g, base.op.v.local, ty, bytes); - } else { - CGLocal local = api_f128_temp_local(g, ty); - ApiSValue tmp = api_make_lv(api_op_local(local, ty), ty); - CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); - Operand dst_addr; - Operand src_addr; - AggregateAccess agg; - api_store_f128_bytes(g, local, ty, bytes); - dst_addr = api_lvalue_addr(g, &base, ptr_ty); - src_addr = api_lvalue_addr(g, &tmp, ptr_ty); - memset(&agg, 0, sizeof agg); - agg.size = 16; - agg.align = access.align ? access.align : 16; - T->copy_bytes(T, dst_addr, src_addr, agg); - api_release_temp_local(g, dst_addr.v.local); - api_release_temp_local(g, src_addr.v.local); - } - } else { - src = api_force_local(g, &rv, ty); - T->store(T, base.op, src, api_mem_from_access(g, &base.op, access)); - } - api_release(g, &base); - api_release(g, &rv); - return; + /* A 16-byte scalar immediate (an i128 small constant) only carries 64 bits in + * op.v.imm; materialize it into both sign-extended lanes so the general store + * path moves a correct 16-byte value rather than load_imm'ing the low lane and + * leaving the high half as garbage. */ + if (!is_bitfield && api_sv_op_is(&rv, OPK_IMM) && + api_is_wide16_scalar_type(g->c, ty)) { + rv = api_make_wide16_int_const(g, rv.op.v.imm, ty); } /* General scalar / bit-field store. Compute the source operand first so its