commit 6f48bfde8f810cd7e705dc9c31f7391da7607acc
parent e554263a21a3c16282604bbdaad3bee5283f039c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 2 Jun 2026 03:54:03 -0700
cg: flow i128/f128 as VALUEs, collapse wide16 special paths (Track 7.3)
Diffstat:
| M | src/cg/call.c | | | 32 | ++++++++++++-------------------- |
| M | src/cg/memory.c | | | 87 | +++++++------------------------------------------------------------------------ |
2 files changed, 19 insertions(+), 100 deletions(-)
diff --git a/src/cg/call.c b/src/cg/call.c
@@ -41,7 +41,14 @@ static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg,
return r;
}
CfreeCgTypeId src_ty = api_sv_type(arg);
- Operand op = api_force_local_unless_imm(g, arg, src_ty);
+ Operand op;
+ /* A 16-byte scalar immediate (an i128 small constant) only carries 64 bits in
+ * op.v.imm; materialize it into both sign-extended lanes so it flows as an
+ * ordinary 16-byte value rather than load_imm'ing only the low lane. */
+ if (api_sv_op_is(arg, OPK_IMM) && api_is_wide16_scalar_type(g->c, ty)) {
+ *arg = api_make_wide16_int_const(g, arg->op.v.imm, ty);
+ }
+ op = api_force_local_unless_imm(g, arg, src_ty);
if (op.kind == OPK_LOCAL &&
api_unalias_type(g->c, op.type) == api_unalias_type(g->c, ty)) {
return op.v.local;
@@ -50,23 +57,7 @@ static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg,
CGLocal r = api_alloc_temp_local(g, ty);
Operand dst = api_op_local(r, ty);
if (op.kind == OPK_IMM) {
- if (api_is_wide16_scalar_type(g->c, ty)) {
- /* A 16-byte scalar immediate (an i128 small constant) only carries
- * 64 bits in op.v.imm; load_imm would leave the high lane as stack
- * garbage. Write both lanes, sign-extending into the high half. */
- u8 bytes[16];
- u64 lo = (u64)op.v.imm;
- u64 hi = (op.v.imm < 0) ? ~(u64)0 : 0;
- for (u32 i = 0; i < 8; ++i) {
- u32 lo_idx = g->c->target.big_endian ? 15u - i : i;
- u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i;
- bytes[lo_idx] = (u8)(lo >> (i * 8u));
- bytes[hi_idx] = (u8)(hi >> (i * 8u));
- }
- api_store_f128_bytes(g, r, ty, bytes);
- } else {
- g->target->load_imm(g->target, dst, op.v.imm);
- }
+ g->target->load_imm(g->target, dst, op.v.imm);
} else if (op.kind == OPK_LOCAL) {
g->target->copy(g->target, dst, op);
} else {
@@ -98,8 +89,9 @@ void api_release_call_args(CfreeCg* g, CGLocal* args, u32 nargs) {
void api_push_call_result(CfreeCg* g, CGLocal result, CfreeCgTypeId ret_ty) {
Operand op = api_op_local(result, ret_ty);
- if (cg_type_is_aggregate(g->c, ret_ty) ||
- api_is_wide16_scalar_type(g->c, ret_ty)) {
+ /* An aggregate result is a PLACE (it is addressed/copied, never a scalar
+ * VALUE); i128/f128 are scalar VALUEs and flow like any other result. */
+ if (cg_type_is_aggregate(g->c, ret_ty)) {
api_push(g, api_make_lv(op, ret_ty));
} else {
api_push(g, api_make_sv(op, ret_ty));
diff --git a/src/cg/memory.c b/src/cg/memory.c
@@ -258,12 +258,6 @@ void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access) {
return;
}
- /* Wide-16 scalar place: keep the addressable storage as the value. */
- if (!is_bitfield && api_is_wide16_scalar_type(g->c, ty)) {
- api_push(g, base);
- return;
- }
-
/* Resolve the place into a single backend memop operand. */
if (!api_operand_can_address(&base.op)) {
CfreeCgTypeId pty = cg_type_ptr_to(g->c, api_sv_type(&base));
@@ -441,80 +435,13 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access) {
if (!is_bitfield) api_validate_memory_value(g, "store", ty, api_sv_type(&rv));
- /* Wide-16 scalar store. */
- if (!is_bitfield && api_is_wide16_scalar_type(g->c, ty)) {
- if (base.source_local != CFREE_CG_LOCAL_NONE) {
- api_local_const_clear(api_local_from_handle(g, base.source_local));
- } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL ||
- (access.flags & CFREE_CG_MEM_VOLATILE)) {
- api_local_const_memory_boundary(g);
- }
- if (api_is_lvalue_sv(&rv)) {
- CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty);
- Operand dst_addr;
- Operand src_addr;
- int dst_addr_owned = 0;
- int src_addr_owned = 0;
- AggregateAccess agg;
- if (base.op.kind == OPK_LOCAL) {
- dst_addr = base.op;
- } else {
- dst_addr = api_lvalue_addr(g, &base, ptr_ty);
- dst_addr_owned = 1;
- }
- if (rv.op.kind == OPK_LOCAL) {
- src_addr = rv.op;
- } else {
- src_addr = api_lvalue_addr(g, &rv, ptr_ty);
- src_addr_owned = 1;
- }
- memset(&agg, 0, sizeof agg);
- agg.size = 16;
- agg.align = access.align ? access.align : 16;
- T->copy_bytes(T, dst_addr, src_addr, agg);
- if (dst_addr_owned) {
- api_release_temp_local(g, dst_addr.kind == OPK_INDIRECT
- ? dst_addr.v.ind.base
- : dst_addr.v.local);
- }
- if (src_addr_owned) api_release_temp_local(g, src_addr.v.local);
- } else if (rv.op.kind == OPK_IMM) {
- u8 bytes[16];
- u64 lo = (u64)rv.op.v.imm;
- u64 hi = rv.op.v.imm < 0 ? ~(u64)0 : 0;
- memset(bytes, 0, sizeof bytes);
- for (u32 i = 0; i < 8; ++i) {
- u32 lo_idx = g->c->target.big_endian ? 15u - i : i;
- u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i;
- bytes[lo_idx] = (u8)(lo >> (i * 8u));
- bytes[hi_idx] = (u8)(hi >> (i * 8u));
- }
- if (base.op.kind == OPK_LOCAL) {
- api_store_f128_bytes(g, base.op.v.local, ty, bytes);
- } else {
- CGLocal local = api_f128_temp_local(g, ty);
- ApiSValue tmp = api_make_lv(api_op_local(local, ty), ty);
- CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty);
- Operand dst_addr;
- Operand src_addr;
- AggregateAccess agg;
- api_store_f128_bytes(g, local, ty, bytes);
- dst_addr = api_lvalue_addr(g, &base, ptr_ty);
- src_addr = api_lvalue_addr(g, &tmp, ptr_ty);
- memset(&agg, 0, sizeof agg);
- agg.size = 16;
- agg.align = access.align ? access.align : 16;
- T->copy_bytes(T, dst_addr, src_addr, agg);
- api_release_temp_local(g, dst_addr.v.local);
- api_release_temp_local(g, src_addr.v.local);
- }
- } else {
- src = api_force_local(g, &rv, ty);
- T->store(T, base.op, src, api_mem_from_access(g, &base.op, access));
- }
- api_release(g, &base);
- api_release(g, &rv);
- return;
+ /* A 16-byte scalar immediate (an i128 small constant) only carries 64 bits in
+ * op.v.imm; materialize it into both sign-extended lanes so the general store
+ * path moves a correct 16-byte value rather than load_imm'ing the low lane and
+ * leaving the high half as garbage. */
+ if (!is_bitfield && api_sv_op_is(&rv, OPK_IMM) &&
+ api_is_wide16_scalar_type(g->c, ty)) {
+ rv = api_make_wide16_int_const(g, rv.op.v.imm, ty);
}
/* General scalar / bit-field store. Compute the source operand first so its