commit a691bcbf26887ba8ddcb59d1ebbf17408d2a3fca
parent ab11c06f26a5bd56cb96a76ff54c04d6ecbd44ea
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 26 May 2026 17:53:31 -0700
cg: add i128 arithmetic via libcalls, wire supports_label_table through ir_recorder
Extend the semantic CG layer with i128 integer arithmetic and conversions via
runtime library calls (__addti2, __multi2, etc.), mirroring existing f128
support. ir_recorder.c now delegates supports_label_table() so Wasm correctly
returns false and native targets return true, enabling correct switch lowering
decisions at the recording layer.
Diffstat:
7 files changed, 353 insertions(+), 61 deletions(-)
diff --git a/src/cg/arith.c b/src/cg/arith.c
@@ -179,8 +179,9 @@ void api_cg_cmp(CfreeCg* g, CmpOp cop) {
ra = api_force_local_unless_imm(g, &a, opty);
rb = api_force_local_unless_imm(g, &b, opty);
if (!api_type_is_float(g->c, opty)) {
- api_push(g, api_make_cmp(cop, ra, rb, i32, api_sv_owns_operand_local(&a, &ra),
- api_sv_owns_operand_local(&b, &rb)));
+ api_push(g,
+ api_make_cmp(cop, ra, rb, i32, api_sv_owns_operand_local(&a, &ra),
+ api_sv_owns_operand_local(&b, &rb)));
return;
}
rr = api_alloc_temp_local(g, i32);
@@ -191,6 +192,9 @@ void api_cg_cmp(CfreeCg* g, CmpOp cop) {
api_push(g, api_make_sv(dst, i32));
}
+int api_try_i128_convert(CfreeCg* g, ConvKind ck, CfreeCgTypeId sty,
+ CfreeCgTypeId dty, ApiSValue* v);
+
void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) {
ApiSValue v;
CgTarget* T;
@@ -224,6 +228,7 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) {
return;
}
}
+ if (api_try_i128_convert(g, ck, sty, dty, &v)) return;
if (ck == CV_BITCAST && abi_cg_sizeof(g->c->abi, sty) == 16 &&
abi_cg_sizeof(g->c->abi, dty) == 16 &&
(api_is_f128_type(g->c, sty) || api_is_f128_type(g->c, dty))) {
@@ -278,16 +283,218 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) {
api_push(g, api_make_sv(dst, dty));
}
+/* ============================================================
+ * 128-bit integer lowering
+ *
+ * i128/u128 are 16-byte memory-resident scalars (see api_is_wide16
+ * and src/cg/wide.c). The native backends only model <=64-bit
+ * register ops, so every i128 arithmetic/compare/convert is lowered
+ * here to a compiler-rt-style runtime call (rt/lib/int64). This
+ * mirrors the f128 dispatch in cfree_cg_fp_*.
+ * ============================================================ */
+
+int api_i128_stack_top(CfreeCg* g, u32 depth) {
+ if (!g || g->sp <= depth) return 0;
+ return api_is_i128_type(g->c, api_sv_type(&g->stack[g->sp - 1u - depth]));
+}
+
+static int api_binop_is_shift(BinOp iop) {
+ return iop == BO_SHL || iop == BO_SHR_U || iop == BO_SHR_S;
+}
+
+static int api_is_bool_type(Compiler* c, CfreeCgTypeId ty) {
+ const CgType* cg = cg_type_get(c, api_unalias_type(c, ty));
+ return cg && cg->kind == CFREE_CG_TYPE_BOOL;
+}
+
+/* Materialize an i128 value as an lvalue and return a pointer local to it. */
+static Operand api_i128_addr(CfreeCg* g, ApiSValue* v) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ ApiSValue lv = api_wide16_materialize_lvalue(g, v, i128);
+ return api_lvalue_addr(g, &lv, cg_type_ptr_to(g->c, i128));
+}
+
+/* Load a 64-bit lane of an i128 (addressed by `addr`) into a fresh i64. */
+static Operand api_i128_load_lane(CfreeCg* g, Operand addr, i32 off) {
+ CfreeCgTypeId i64 = builtin_id(CFREE_CG_BUILTIN_I64);
+ CGLocal rr = api_alloc_temp_local(g, i64);
+ Operand dst = api_op_local(rr, i64);
+ MemAccess ma;
+ memset(&ma, 0, sizeof ma);
+ ma.type = i64;
+ ma.size = 8;
+ ma.align = 8;
+ g->target->load(g->target, dst, api_op_indirect(addr.v.local, off, i64), ma);
+ return dst;
+}
+
+static void api_i128_binop(CfreeCg* g, BinOp iop) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32);
+ const char* name = api_i128_binop_helper(iop);
+ CfreeCgTypeId ps[2];
+ ApiSValue args[2];
+ if (!name) {
+ compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported i128 binop");
+ return;
+ }
+ args[1] = api_pop(g);
+ args[0] = api_pop(g);
+ ps[0] = i128;
+ ps[1] = api_binop_is_shift(iop) ? i32 : i128;
+ api_runtime_call_values(g, name, i128, ps, 2, args);
+}
+
+static void api_i128_unop(CfreeCg* g, UnOp iop) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ const char* name = NULL;
+ ApiSValue args[1];
+ CfreeCgTypeId ps[1];
+ if (iop == UO_NEG)
+ name = "__negti2";
+ else if (iop == UO_BNOT)
+ name = "__cfree_notti3";
+ else {
+ compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported i128 unop");
+ return;
+ }
+ args[0] = api_pop(g);
+ ps[0] = i128;
+ api_runtime_call_values(g, name, i128, ps, 1, args);
+}
+
+/* Map a relational op to the form used to compare a __cfree_*cmpti2
+ * result (-1/0/1, a signed i32) against zero. */
+static CmpOp api_i128_cmp_vs_zero(CmpOp cop) {
+ switch (cop) {
+ case CMP_EQ:
+ return CMP_EQ;
+ case CMP_NE:
+ return CMP_NE;
+ case CMP_LT_S:
+ case CMP_LT_U:
+ return CMP_LT_S;
+ case CMP_LE_S:
+ case CMP_LE_U:
+ return CMP_LE_S;
+ case CMP_GT_S:
+ case CMP_GT_U:
+ return CMP_GT_S;
+ case CMP_GE_S:
+ case CMP_GE_U:
+ return CMP_GE_S;
+ default:
+ return CMP_NE;
+ }
+}
+
+static void api_i128_cmp(CfreeCg* g, CmpOp cop) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32);
+ const char* name =
+ api_i128_cmp_is_unsigned(cop) ? "__cfree_ucmpti2" : "__cfree_cmpti2";
+ CfreeCgTypeId ps[2] = {i128, i128};
+ ApiSValue args[2];
+ args[1] = api_pop(g);
+ args[0] = api_pop(g);
+ api_runtime_call_values(g, name, i32, ps, 2, args);
+ cfree_cg_push_int(g, 0, i32);
+ api_cg_cmp(g, api_i128_cmp_vs_zero(cop));
+}
+
+/* int<->i128 conversions. Returns 1 if it handled the conversion and
+ * consumed *v, 0 to fall through to the generic path. */
+int api_try_i128_convert(CfreeCg* g, ConvKind ck, CfreeCgTypeId sty,
+ CfreeCgTypeId dty, ApiSValue* v) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ CfreeCgTypeId i64 = builtin_id(CFREE_CG_BUILTIN_I64);
+ int s_is_128 = api_is_i128_type(g->c, sty);
+ int d_is_128 = api_is_i128_type(g->c, dty);
+ if (!s_is_128 && !d_is_128) return 0;
+ if (s_is_128 && d_is_128) {
+ /* signed<->unsigned i128 reinterpret: identical layout. */
+ v->type = dty;
+ v->op.type = dty;
+ api_push(g, *v);
+ return 1;
+ }
+ if (d_is_128) {
+ u32 sw = cfree_cg_type_int_width((CfreeCompiler*)g->c, sty);
+ const char* name =
+ (ck == CV_SEXT) ? "__cfree_sext64ti" : "__cfree_zext64ti";
+ ApiSValue arg;
+ CfreeCgTypeId ps[1];
+ if (sw == 0) return 0; /* float->i128 unsupported here */
+ if (sw >= 64) {
+ arg = *v;
+ arg.type = i64;
+ arg.op.type = i64;
+ } else {
+ api_push(g, *v);
+ api_cg_convert_kind(g, i64, ck);
+ arg = api_pop(g);
+ }
+ ps[0] = i64;
+ api_runtime_call_values(g, name, i128, ps, 1, &arg);
+ return 1;
+ }
+ /* s_is_128, dty is _Bool: "value != 0" over the full 128 bits, not a
+ * low-lane truncation (a value whose only set bits are above bit 63 must
+ * still become 1). Reuse the runtime i128 compare. */
+ if (api_is_bool_type(g->c, dty)) {
+ api_push(g, *v);
+ cfree_cg_push_int(g, 0, i128);
+ api_i128_cmp(g, CMP_NE); /* leaves i32 0/1 */
+ api_cg_convert_kind(g, dty, CV_TRUNC);
+ return 1;
+ }
+ /* s_is_128, dty is a narrower integer: take the low 64 bits, then
+ * truncate further if needed. */
+ {
+ u32 dw = cfree_cg_type_int_width((CfreeCompiler*)g->c, dty);
+ i32 lo_off = g->c->target.big_endian ? 8 : 0;
+ Operand addr;
+ Operand lo;
+ if (dw == 0) return 0; /* i128->float unsupported here */
+ addr = api_i128_addr(g, v);
+ lo = api_i128_load_lane(g, addr, lo_off);
+ api_release_temp_local(g, addr.v.local);
+ api_release(g, v);
+ if (dw >= 64) {
+ api_push(g, api_make_sv(lo, dty));
+ } else {
+ api_push(g, api_make_sv(lo, i64));
+ api_cg_convert_kind(g, dty, CV_TRUNC);
+ }
+ return 1;
+ }
+}
+
void cfree_cg_int_binop(CfreeCg* g, CfreeCgIntBinOp op, uint32_t flags) {
- api_cg_binop(g, api_map_int_binop(op), flags);
+ BinOp iop = api_map_int_binop(op);
+ if (g && (api_i128_stack_top(g, 0) || api_i128_stack_top(g, 1))) {
+ api_i128_binop(g, iop);
+ return;
+ }
+ api_cg_binop(g, iop, flags);
}
void cfree_cg_int_unop(CfreeCg* g, CfreeCgIntUnOp op, uint32_t flags) {
- api_cg_unop(g, api_map_int_unop(op), flags);
+ UnOp iop = api_map_int_unop(op);
+ if (g && api_i128_stack_top(g, 0) && (iop == UO_NEG || iop == UO_BNOT)) {
+ api_i128_unop(g, iop);
+ return;
+ }
+ api_cg_unop(g, iop, flags);
}
void cfree_cg_int_cmp(CfreeCg* g, CfreeCgIntCmpOp op) {
- api_cg_cmp(g, api_map_int_cmp(op));
+ CmpOp cop = api_map_int_cmp(op);
+ if (g && (api_i128_stack_top(g, 0) || api_i128_stack_top(g, 1))) {
+ api_i128_cmp(g, cop);
+ return;
+ }
+ api_cg_cmp(g, cop);
}
const char* api_i128_binop_helper(BinOp op) {
diff --git a/src/cg/call.c b/src/cg/call.c
@@ -50,7 +50,23 @@ static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg,
CGLocal r = api_alloc_temp_local(g, ty);
Operand dst = api_op_local(r, ty);
if (op.kind == OPK_IMM) {
- g->target->load_imm(g->target, dst, op.v.imm);
+ if (api_is_wide16_scalar_type(g->c, ty)) {
+ /* A 16-byte scalar immediate (an i128 small constant) only carries
+ * 64 bits in op.v.imm; load_imm would leave the high lane as stack
+ * garbage. Write both lanes, sign-extending into the high half. */
+ u8 bytes[16];
+ u64 lo = (u64)op.v.imm;
+ u64 hi = (op.v.imm < 0) ? ~(u64)0 : 0;
+ for (u32 i = 0; i < 8; ++i) {
+ u32 lo_idx = g->c->target.big_endian ? 15u - i : i;
+ u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i;
+ bytes[lo_idx] = (u8)(lo >> (i * 8u));
+ bytes[hi_idx] = (u8)(hi >> (i * 8u));
+ }
+ api_store_f128_bytes(g, r, ty, bytes);
+ } else {
+ g->target->load_imm(g->target, dst, op.v.imm);
+ }
} else if (op.kind == OPK_LOCAL) {
g->target->copy(g->target, dst, op);
} else {
@@ -64,8 +80,8 @@ static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg,
void api_pack_call_arg(CfreeCg* g, CGLocal* out, CfreeCgTypeId fty, u32 idx) {
ApiSValue arg = api_pop(g);
u32 nfixed = api_func_nparams(g, fty);
- CfreeCgTypeId aty = idx >= nfixed ? api_sv_type(&arg)
- : cg_type_func_param_id(g->c, fty, idx);
+ CfreeCgTypeId aty =
+ idx >= nfixed ? api_sv_type(&arg) : cg_type_func_param_id(g->c, fty, idx);
if (!aty) aty = api_sv_type(&arg);
*out = api_materialize_call_local(g, &arg, aty);
}
@@ -82,7 +98,8 @@ void api_release_call_args(CfreeCg* g, CGLocal* args, u32 nargs) {
void api_push_call_result(CfreeCg* g, CGLocal result, CfreeCgTypeId ret_ty) {
Operand op = api_op_local(result, ret_ty);
- if (cg_type_is_aggregate(g->c, ret_ty) || api_is_wide16_scalar_type(g->c, ret_ty)) {
+ if (cg_type_is_aggregate(g->c, ret_ty) ||
+ api_is_wide16_scalar_type(g->c, ret_ty)) {
api_push(g, api_make_lv(op, ret_ty));
} else {
api_push(g, api_make_sv(op, ret_ty));
@@ -131,8 +148,8 @@ static void api_tail_fallback_ret(CfreeCg* g, CfreeCgTypeId ret_ty) {
static void api_finish_call(CfreeCg* g, CGCallDesc* desc, CGLocal* args,
u32 nargs, Operand callee_op, ApiSValue* callee,
- CfreeCgTypeId ret_ty, int has_result,
- int want_tail, int emit_tail) {
+ CfreeCgTypeId ret_ty, int has_result, int want_tail,
+ int emit_tail) {
if (emit_tail) api_temp_locals_finish(g);
if (!emit_tail) api_call_clobber_boundary(g, desc);
g->target->call(g->target, desc);
diff --git a/src/cg/control.c b/src/cg/control.c
@@ -323,12 +323,21 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) {
/* Direct O0 targets may override switch_ for a single-pass branch-chain
* lowering. Still honor an explicit jump-table hint so tests and frontends
* can exercise the semantic label-table path without enabling O1. */
- native_switch_override =
- (g->target->switch_ && g->opt_level == 0 &&
- desc.hint != CFREE_CG_SWITCH_JUMP_TABLE);
+ native_switch_override = (g->target->switch_ && g->opt_level == 0 &&
+ desc.hint != CFREE_CG_SWITCH_JUMP_TABLE);
plan = native_switch_override ? (CGSwitchPlan){CG_SWITCH_PLAN_CHAIN, 0, 0}
: cg_plan_switch(g, &desc);
+ /* The label-table lowering materializes a rodata table of code-label
+ * addresses and an indirect branch. Targets that can't express that (Wasm)
+ * realize dense dispatch through their switch_ hook (br_table) instead, so
+ * hand the plan—hint and all—to switch_ rather than the table path. */
+ if (plan.kind == CG_SWITCH_PLAN_TABLE && g->target->switch_ &&
+ g->target->supports_label_table &&
+ !g->target->supports_label_table(g->target)) {
+ plan.kind = CG_SWITCH_PLAN_CHAIN;
+ }
+
if (plan.kind == CG_SWITCH_PLAN_TABLE) {
/* Selector stays on the value stack; cg_emit_switch_table consumes
* it via cg-API ops so the path also records cleanly under opt. */
@@ -337,7 +346,8 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) {
} else {
metrics_count(g->c, "cg.switch.chain", 1);
selector = api_pop(g);
- desc.selector = api_force_local_unless_imm(g, &selector, desc.selector_type);
+ desc.selector =
+ api_force_local_unless_imm(g, &selector, desc.selector_type);
if (g->target->switch_) {
g->target->switch_(g->target, &desc);
} else {
@@ -667,8 +677,9 @@ void cfree_cg_alloca(CfreeCg* g, uint32_t align,
sz = api_pop(g);
pty = resolve_type(g->c, result_ptr_type);
if (!pty) pty = cg_type_ptr_to(g->c, builtin_id(CFREE_CG_BUILTIN_VOID));
- sz_op = api_sv_op_is(&sz, OPK_IMM) ? sz.op
- : api_force_local(g, &sz, api_sv_type(&sz));
+ sz_op = api_sv_op_is(&sz, OPK_IMM)
+ ? sz.op
+ : api_force_local(g, &sz, api_sv_type(&sz));
rr = api_alloc_temp_local(g, pty);
dst = api_op_local(rr, pty);
T->alloca_(T, dst, sz_op, align ? align : 16);
@@ -903,7 +914,8 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) {
if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY)
api_release(g, &base);
api_release(g, &idx);
- api_push(g, api_make_lv(api_op_indirect(result.v.local, 0, elem_ty), elem_ty));
+ api_push(g,
+ api_make_lv(api_op_indirect(result.v.local, 0, elem_ty), elem_ty));
}
void cfree_cg_field(CfreeCg* g, uint32_t field_index) {
@@ -997,8 +1009,8 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) {
api_op_imm((i64)field_offset, rec_ptr_ty));
api_release(g, &base);
}
- api_push(g,
- api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty));
+ api_push(
+ g, api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty));
} else if (base.op.kind == OPK_GLOBAL) {
result =
api_op_global(base.op.v.global.sym,
@@ -1024,8 +1036,8 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) {
api_op_imm((i64)field_offset, rec_ptr_ty));
api_release_temp_local(g, base_addr.v.local);
}
- api_push(g,
- api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty));
+ api_push(
+ g, api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty));
}
}
diff --git a/src/cg/ir_recorder.c b/src/cg/ir_recorder.c
@@ -223,6 +223,14 @@ static const char* rec_data_label_addr_unsupported_msg(CgTarget* t) {
return "IR recorder supports function-local label address data";
}
+/* A target that cannot resolve code-label addresses in static data (it set
+ * data_label_addr_unsupported_msg) likewise cannot build a label-address jump
+ * table; report that so cfree_cg_switch routes table plans through switch_. */
+static int rec_supports_label_table(CgTarget* t) {
+ CgIrRecorder* r = rec_of(t);
+ return r->data_label_addr_unsupported_msg ? 0 : 1;
+}
+
static CGScope rec_scope_begin(CgTarget* t, const CGScopeDesc* desc) {
CgIrRecorder* r = rec_of(t);
CgIrInst* in = emit(r, CG_IR_SCOPE_BEGIN);
@@ -582,6 +590,7 @@ CgTarget* cg_ir_recorder_new(Compiler* c, ObjBuilder* obj,
r->base.jump = rec_jump;
r->base.cmp_branch = rec_cmp_branch;
r->base.switch_ = rec_switch;
+ r->base.supports_label_table = rec_supports_label_table;
r->base.indirect_branch = rec_indirect_branch;
r->base.load_label_addr = rec_load_label_addr;
r->base.local_static_data_begin = rec_local_static_data_begin;
diff --git a/src/cg/memory.c b/src/cg/memory.c
@@ -5,6 +5,13 @@ void cfree_cg_push_int(CfreeCg* g, uint64_t value, CfreeCgTypeId type) {
if (!g) return;
ty = resolve_type(g->c, type);
if (!ty) return;
+ /* A 16-byte scalar immediate cannot be represented by the 64-bit op.v.imm
+ * alone; materialize it into addressable storage with both lanes
+ * sign-extended so no downstream consumer sees an undefined high half. */
+ if (api_is_wide16_scalar_type(g->c, ty)) {
+ api_push(g, api_make_wide16_int_const(g, (i64)value, ty));
+ return;
+ }
api_push(g, api_make_sv(api_op_imm((i64)value, ty), ty));
}
@@ -182,10 +189,10 @@ static int scale_to_log2(uint32_t scale) {
* this helper does not free it.
*/
static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset,
- CGLocal index, u8 log2_scale,
- CfreeCgTypeId access_ty,
- int addr_is_pointer_value,
- CGLocal* out_owned_base) {
+ CGLocal index, u8 log2_scale,
+ CfreeCgTypeId access_ty,
+ int addr_is_pointer_value,
+ CGLocal* out_owned_base) {
CgTarget* T = g->target;
CfreeCgTypeId base_ty = cg_type_is_ptr(g->c, addr.type)
? addr.type
@@ -303,7 +310,8 @@ static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset,
* *out_log2 = log2_scale (0..3) if scale was normalized to one of {1,2,4,8}
* or to 0 if we materialized the scaled value (log2=0).
*/
-static CGLocal pop_and_normalize_index(CfreeCg* g, uint32_t scale, u8* out_log2) {
+static CGLocal pop_and_normalize_index(CfreeCg* g, uint32_t scale,
+ u8* out_log2) {
ApiSValue idx;
CfreeCgTypeId idx_ty;
int lg2;
@@ -485,7 +493,8 @@ void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) {
!api_sv_local_storage_is_aggregate(g, &base) &&
!cg_type_is_aggregate(g->c, api_sv_type(&base)) &&
!cg_type_is_aggregate(g->c, ty) &&
- api_unalias_type(g->c, api_sv_type(&base)) == api_unalias_type(g->c, ty)) {
+ api_unalias_type(g->c, api_sv_type(&base)) ==
+ api_unalias_type(g->c, ty)) {
base.lvalue = 0;
base.res = RES_FIXED_LOCAL;
api_push(g, base);
@@ -733,6 +742,25 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) {
/* Wide-16 scalar store: keep the pre-existing wide16 lowering for the plain
* (no-EA) case. */
if (!has_index && !is_bitfield && api_is_wide16_scalar_type(g->c, ty)) {
+ /* Normalize the destination up front into a single offset-0 lvalue
+ * operand so every sub-branch below addresses the right location. Two
+ * cases otherwise misbehave: a pointer-rvalue base (`*p`) is the address
+ * itself and must be dereferenced (not treated as storage), and a field
+ * offset (a struct member) must be folded in. Both collapse to an
+ * OPK_INDIRECT lvalue here. */
+ if (!is_lvalue) {
+ /* Pointer-rvalue base: the operand value is the destination address. */
+ Operand ptr_op = api_force_local(g, &base, api_sv_type(&base));
+ base =
+ api_make_lv(api_op_indirect(ptr_op.v.local, (i32)ea.offset, ty), ty);
+ ea.offset = 0;
+ is_lvalue = 1;
+ } else if (ea.offset != 0 && base.op.kind == OPK_LOCAL) {
+ CfreeCgTypeId base_ptr_ty = cg_type_ptr_to(g->c, ty);
+ Operand addr = api_lvalue_addr(g, &base, base_ptr_ty);
+ base = api_make_lv(api_op_indirect(addr.v.local, (i32)ea.offset, ty), ty);
+ ea.offset = 0;
+ }
if (base.source_local != CFREE_CG_LOCAL_NONE) {
api_local_const_clear(api_local_from_handle(g, base.source_local));
} else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL ||
@@ -750,8 +778,8 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) {
if (ea.offset == 0) {
dst_addr = base.op;
} else {
- dst_addr = fold_ea_into_operand(g, base.op, ea.offset, CG_LOCAL_NONE, 0,
- ty, 0, &owned_base);
+ dst_addr = fold_ea_into_operand(g, base.op, ea.offset, CG_LOCAL_NONE,
+ 0, ty, 0, &owned_base);
dst_addr_owned = owned_base != CG_LOCAL_NONE;
}
} else if (is_lvalue) {
@@ -771,9 +799,9 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) {
agg.align = access.align ? access.align : 16;
T->copy_bytes(T, dst_addr, src_addr, agg);
if (dst_addr_owned) {
- api_release_temp_local(g,
- dst_addr.kind == OPK_INDIRECT ? dst_addr.v.ind.base
- : dst_addr.v.local);
+ api_release_temp_local(g, dst_addr.kind == OPK_INDIRECT
+ ? dst_addr.v.ind.base
+ : dst_addr.v.local);
}
if (src_addr_owned) api_release_temp_local(g, src_addr.v.local);
} else if (rv.op.kind == OPK_IMM) {
@@ -841,7 +869,8 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) {
!api_sv_local_storage_is_aggregate(g, &base) &&
!cg_type_is_aggregate(g->c, api_sv_type(&base)) &&
!cg_type_is_aggregate(g->c, ty) &&
- api_unalias_type(g->c, api_sv_type(&base)) == api_unalias_type(g->c, ty)) {
+ api_unalias_type(g->c, api_sv_type(&base)) ==
+ api_unalias_type(g->c, ty)) {
Operand dst = base.op;
if (src.kind == OPK_IMM) {
T->load_imm(T, dst, src.v.imm);
@@ -934,7 +963,8 @@ void cfree_cg_dup(CfreeCg* g) {
ty = api_owned_local_type(g, &v);
r = api_alloc_temp_local(g, ty);
dst = api_op_local(r, ty);
- g->target->copy(g->target, dst, api_op_local((CGLocal)api_local_of_sv(&v), ty));
+ g->target->copy(g->target, dst,
+ api_op_local((CGLocal)api_local_of_sv(&v), ty));
dup = v;
api_set_owned_local(&dup, r);
dup.res = RES_LOCAL;
@@ -951,7 +981,8 @@ void cfree_cg_dup(CfreeCg* g) {
ty = api_owned_local_type(g, &v);
r = api_alloc_temp_local(g, ty);
dst = api_op_local(r, ty);
- g->target->copy(g->target, dst, api_op_local((CGLocal)api_local_of_sv(&v), ty));
+ g->target->copy(g->target, dst,
+ api_op_local((CGLocal)api_local_of_sv(&v), ty));
g->stack[g->sp - 1].pinned = 0;
dup = v;
api_set_owned_local(&dup, r);
diff --git a/src/cg/native_direct_target.c b/src/cg/native_direct_target.c
@@ -494,8 +494,7 @@ static void nd_copy_to_reg(NativeDirectTarget* d, NativeLoc dst,
break;
case NATIVE_LOC_STACK: {
NativeAddr addr;
- MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size,
- d->base.c->target.ptr_align);
+ MemAccess mem = nd_type_mem(d, dst.type);
memset(&addr, 0, sizeof addr);
addr.base_kind = NATIVE_ADDR_BASE_FRAME;
addr.base.frame = src.v.stack.slot;
@@ -746,10 +745,11 @@ static int nd_local_static_data_begin(CgTarget* t,
if (d->local_static_active) nd_panic(d, "nested local static data");
if (desc->attrs.section) {
name = (Sym)desc->attrs.section;
- kind = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SEC_RODATA
- : SEC_DATA;
- flags = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SF_ALLOC
- : (SF_ALLOC | SF_WRITE);
+ kind =
+ (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SEC_RODATA : SEC_DATA;
+ flags = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY)
+ ? SF_ALLOC
+ : (SF_ALLOC | SF_WRITE);
} else if (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) {
name = pool_intern_slice(t->c->global, SLICE_LIT(".rodata"));
kind = SEC_RODATA;
@@ -795,7 +795,8 @@ static void nd_local_static_data_label_addr(CgTarget* t, Label target,
u8 zero[8];
(void)width;
(void)address_space;
- if (!d->local_static_active) nd_panic(d, "label address outside local static data");
+ if (!d->local_static_active)
+ nd_panic(d, "label address outside local static data");
if (width != 8u) nd_panic(d, "unsupported local static label address width");
memset(zero, 0, sizeof zero);
off = d->local_static_base + d->local_static_size;
@@ -912,15 +913,15 @@ static void nd_copy(CgTarget* t, Operand dst, Operand src) {
memset(&access, 0, sizeof access);
access.type = dst.type;
access.size = (u32)size;
- access.align = dst.type ? cg_type_align(t->c, dst.type)
- : (u32)t->c->target.ptr_align;
+ access.align =
+ dst.type ? cg_type_align(t->c, dst.type) : (u32)t->c->target.ptr_align;
access.mem.type = dst.type;
access.mem.size = access.size;
access.mem.align = access.align;
- NativeAddr da = nd_addr_materialize(d, nd_addr_storage(d, dst), &dt,
- access.mem);
- NativeAddr sa = nd_addr_materialize(d, nd_addr_storage(d, src), &st,
- access.mem);
+ NativeAddr da =
+ nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, access.mem);
+ NativeAddr sa =
+ nd_addr_materialize(d, nd_addr_storage(d, src), &st, access.mem);
ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes");
d->native->copy_bytes(d->native, da, sa, access);
nd_addr_temps_release(d, &st);
@@ -935,7 +936,8 @@ static void nd_copy(CgTarget* t, Operand dst, Operand src) {
static void nd_load(CgTarget* t, Operand dst, Operand addr, MemAccess mem) {
NativeDirectTarget* d = nd_of(t);
NdAddrTemps temps;
- u64 size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0);
+ u64 size =
+ mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0);
if (mem.flags & MF_VOLATILE)
nd_barrier(d,
NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE);
@@ -967,7 +969,8 @@ static void nd_load(CgTarget* t, Operand dst, Operand addr, MemAccess mem) {
static void nd_store(CgTarget* t, Operand addr, Operand src, MemAccess mem) {
NativeDirectTarget* d = nd_of(t);
NdAddrTemps temps;
- u64 size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0);
+ u64 size =
+ mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0);
if (mem.flags & MF_VOLATILE)
nd_barrier(d,
NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE);
diff --git a/src/cg/wide.c b/src/cg/wide.c
@@ -19,6 +19,28 @@ u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes) {
return v;
}
+void api_wide16_sext_imm_bytes(CfreeCg* g, i64 imm, u8 bytes[16]) {
+ /* A 16-byte scalar immediate only carries 64 bits in op.v.imm; the full
+ * value is its sign-extension. Fill both lanes accordingly, honoring the
+ * target byte order. */
+ u64 lo = (u64)imm;
+ u64 hi = imm < 0 ? ~(u64)0 : 0;
+ for (u32 i = 0; i < 8; ++i) {
+ u32 lo_idx = g->c->target.big_endian ? 15u - i : i;
+ u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i;
+ bytes[lo_idx] = (u8)(lo >> (i * 8u));
+ bytes[hi_idx] = (u8)(hi >> (i * 8u));
+ }
+}
+
+ApiSValue api_make_wide16_int_const(CfreeCg* g, i64 value, CfreeCgTypeId ty) {
+ u8 bytes[16];
+ CGLocal local = api_f128_temp_local(g, ty);
+ api_wide16_sext_imm_bytes(g, value, bytes);
+ api_store_f128_bytes(g, local, ty, bytes);
+ return api_make_lv(api_op_local(local, ty), ty);
+}
+
void api_store_f128_bytes(CfreeCg* g, CGLocal local, CfreeCgTypeId ty,
const u8 bytes[16]) {
CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64);
@@ -145,16 +167,7 @@ ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v,
return api_make_lv(dst, ty);
}
if (v->op.kind == OPK_IMM) {
- u8 bytes[16];
- u64 lo = (u64)v->op.v.imm;
- memset(bytes, 0, sizeof bytes);
- for (u32 i = 0; i < 8; ++i) {
- u32 idx = g->c->target.big_endian ? 15u - i : i;
- bytes[idx] = (u8)(lo >> (i * 8u));
- }
- CGLocal local = api_f128_temp_local(g, ty);
- api_store_f128_bytes(g, local, ty, bytes);
- return api_make_lv(api_op_local(local, ty), ty);
+ return api_make_wide16_int_const(g, v->op.v.imm, ty);
}
compiler_panic(
g->c, g->cur_loc,