kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit a691bcbf26887ba8ddcb59d1ebbf17408d2a3fca
parent ab11c06f26a5bd56cb96a76ff54c04d6ecbd44ea
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 26 May 2026 17:53:31 -0700

cg: add i128 arithmetic via libcalls, wire supports_label_table through ir_recorder

Extend the semantic CG layer with i128 integer arithmetic and conversions via
runtime library calls (__addti2, __multi2, etc.), mirroring existing f128
support. ir_recorder.c now delegates supports_label_table() so Wasm correctly
returns false and native targets return true, enabling correct switch lowering
decisions at the recording layer.

Diffstat:
Msrc/cg/arith.c | 217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/cg/call.c | 29+++++++++++++++++++++++------
Msrc/cg/control.c | 34+++++++++++++++++++++++-----------
Msrc/cg/ir_recorder.c | 9+++++++++
Msrc/cg/memory.c | 59+++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc/cg/native_direct_target.c | 33++++++++++++++++++---------------
Msrc/cg/wide.c | 33+++++++++++++++++++++++----------
7 files changed, 353 insertions(+), 61 deletions(-)

diff --git a/src/cg/arith.c b/src/cg/arith.c @@ -179,8 +179,9 @@ void api_cg_cmp(CfreeCg* g, CmpOp cop) { ra = api_force_local_unless_imm(g, &a, opty); rb = api_force_local_unless_imm(g, &b, opty); if (!api_type_is_float(g->c, opty)) { - api_push(g, api_make_cmp(cop, ra, rb, i32, api_sv_owns_operand_local(&a, &ra), - api_sv_owns_operand_local(&b, &rb))); + api_push(g, + api_make_cmp(cop, ra, rb, i32, api_sv_owns_operand_local(&a, &ra), + api_sv_owns_operand_local(&b, &rb))); return; } rr = api_alloc_temp_local(g, i32); @@ -191,6 +192,9 @@ void api_cg_cmp(CfreeCg* g, CmpOp cop) { api_push(g, api_make_sv(dst, i32)); } +int api_try_i128_convert(CfreeCg* g, ConvKind ck, CfreeCgTypeId sty, + CfreeCgTypeId dty, ApiSValue* v); + void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { ApiSValue v; CgTarget* T; @@ -224,6 +228,7 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { return; } } + if (api_try_i128_convert(g, ck, sty, dty, &v)) return; if (ck == CV_BITCAST && abi_cg_sizeof(g->c->abi, sty) == 16 && abi_cg_sizeof(g->c->abi, dty) == 16 && (api_is_f128_type(g->c, sty) || api_is_f128_type(g->c, dty))) { @@ -278,16 +283,218 @@ void api_cg_convert_kind(CfreeCg* g, CfreeCgTypeId dst_type, ConvKind ck) { api_push(g, api_make_sv(dst, dty)); } +/* ============================================================ + * 128-bit integer lowering + * + * i128/u128 are 16-byte memory-resident scalars (see api_is_wide16 + * and src/cg/wide.c). The native backends only model <=64-bit + * register ops, so every i128 arithmetic/compare/convert is lowered + * here to a compiler-rt-style runtime call (rt/lib/int64). This + * mirrors the f128 dispatch in cfree_cg_fp_*. + * ============================================================ */ + +int api_i128_stack_top(CfreeCg* g, u32 depth) { + if (!g || g->sp <= depth) return 0; + return api_is_i128_type(g->c, api_sv_type(&g->stack[g->sp - 1u - depth])); +} + +static int api_binop_is_shift(BinOp iop) { + return iop == BO_SHL || iop == BO_SHR_U || iop == BO_SHR_S; +} + +static int api_is_bool_type(Compiler* c, CfreeCgTypeId ty) { + const CgType* cg = cg_type_get(c, api_unalias_type(c, ty)); + return cg && cg->kind == CFREE_CG_TYPE_BOOL; +} + +/* Materialize an i128 value as an lvalue and return a pointer local to it. */ +static Operand api_i128_addr(CfreeCg* g, ApiSValue* v) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + ApiSValue lv = api_wide16_materialize_lvalue(g, v, i128); + return api_lvalue_addr(g, &lv, cg_type_ptr_to(g->c, i128)); +} + +/* Load a 64-bit lane of an i128 (addressed by `addr`) into a fresh i64. */ +static Operand api_i128_load_lane(CfreeCg* g, Operand addr, i32 off) { + CfreeCgTypeId i64 = builtin_id(CFREE_CG_BUILTIN_I64); + CGLocal rr = api_alloc_temp_local(g, i64); + Operand dst = api_op_local(rr, i64); + MemAccess ma; + memset(&ma, 0, sizeof ma); + ma.type = i64; + ma.size = 8; + ma.align = 8; + g->target->load(g->target, dst, api_op_indirect(addr.v.local, off, i64), ma); + return dst; +} + +static void api_i128_binop(CfreeCg* g, BinOp iop) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); + const char* name = api_i128_binop_helper(iop); + CfreeCgTypeId ps[2]; + ApiSValue args[2]; + if (!name) { + compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported i128 binop"); + return; + } + args[1] = api_pop(g); + args[0] = api_pop(g); + ps[0] = i128; + ps[1] = api_binop_is_shift(iop) ? i32 : i128; + api_runtime_call_values(g, name, i128, ps, 2, args); +} + +static void api_i128_unop(CfreeCg* g, UnOp iop) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + const char* name = NULL; + ApiSValue args[1]; + CfreeCgTypeId ps[1]; + if (iop == UO_NEG) + name = "__negti2"; + else if (iop == UO_BNOT) + name = "__cfree_notti3"; + else { + compiler_panic(g->c, g->cur_loc, "CfreeCg: unsupported i128 unop"); + return; + } + args[0] = api_pop(g); + ps[0] = i128; + api_runtime_call_values(g, name, i128, ps, 1, args); +} + +/* Map a relational op to the form used to compare a __cfree_*cmpti2 + * result (-1/0/1, a signed i32) against zero. */ +static CmpOp api_i128_cmp_vs_zero(CmpOp cop) { + switch (cop) { + case CMP_EQ: + return CMP_EQ; + case CMP_NE: + return CMP_NE; + case CMP_LT_S: + case CMP_LT_U: + return CMP_LT_S; + case CMP_LE_S: + case CMP_LE_U: + return CMP_LE_S; + case CMP_GT_S: + case CMP_GT_U: + return CMP_GT_S; + case CMP_GE_S: + case CMP_GE_U: + return CMP_GE_S; + default: + return CMP_NE; + } +} + +static void api_i128_cmp(CfreeCg* g, CmpOp cop) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); + const char* name = + api_i128_cmp_is_unsigned(cop) ? "__cfree_ucmpti2" : "__cfree_cmpti2"; + CfreeCgTypeId ps[2] = {i128, i128}; + ApiSValue args[2]; + args[1] = api_pop(g); + args[0] = api_pop(g); + api_runtime_call_values(g, name, i32, ps, 2, args); + cfree_cg_push_int(g, 0, i32); + api_cg_cmp(g, api_i128_cmp_vs_zero(cop)); +} + +/* int<->i128 conversions. Returns 1 if it handled the conversion and + * consumed *v, 0 to fall through to the generic path. */ +int api_try_i128_convert(CfreeCg* g, ConvKind ck, CfreeCgTypeId sty, + CfreeCgTypeId dty, ApiSValue* v) { + CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128); + CfreeCgTypeId i64 = builtin_id(CFREE_CG_BUILTIN_I64); + int s_is_128 = api_is_i128_type(g->c, sty); + int d_is_128 = api_is_i128_type(g->c, dty); + if (!s_is_128 && !d_is_128) return 0; + if (s_is_128 && d_is_128) { + /* signed<->unsigned i128 reinterpret: identical layout. */ + v->type = dty; + v->op.type = dty; + api_push(g, *v); + return 1; + } + if (d_is_128) { + u32 sw = cfree_cg_type_int_width((CfreeCompiler*)g->c, sty); + const char* name = + (ck == CV_SEXT) ? "__cfree_sext64ti" : "__cfree_zext64ti"; + ApiSValue arg; + CfreeCgTypeId ps[1]; + if (sw == 0) return 0; /* float->i128 unsupported here */ + if (sw >= 64) { + arg = *v; + arg.type = i64; + arg.op.type = i64; + } else { + api_push(g, *v); + api_cg_convert_kind(g, i64, ck); + arg = api_pop(g); + } + ps[0] = i64; + api_runtime_call_values(g, name, i128, ps, 1, &arg); + return 1; + } + /* s_is_128, dty is _Bool: "value != 0" over the full 128 bits, not a + * low-lane truncation (a value whose only set bits are above bit 63 must + * still become 1). Reuse the runtime i128 compare. */ + if (api_is_bool_type(g->c, dty)) { + api_push(g, *v); + cfree_cg_push_int(g, 0, i128); + api_i128_cmp(g, CMP_NE); /* leaves i32 0/1 */ + api_cg_convert_kind(g, dty, CV_TRUNC); + return 1; + } + /* s_is_128, dty is a narrower integer: take the low 64 bits, then + * truncate further if needed. */ + { + u32 dw = cfree_cg_type_int_width((CfreeCompiler*)g->c, dty); + i32 lo_off = g->c->target.big_endian ? 8 : 0; + Operand addr; + Operand lo; + if (dw == 0) return 0; /* i128->float unsupported here */ + addr = api_i128_addr(g, v); + lo = api_i128_load_lane(g, addr, lo_off); + api_release_temp_local(g, addr.v.local); + api_release(g, v); + if (dw >= 64) { + api_push(g, api_make_sv(lo, dty)); + } else { + api_push(g, api_make_sv(lo, i64)); + api_cg_convert_kind(g, dty, CV_TRUNC); + } + return 1; + } +} + void cfree_cg_int_binop(CfreeCg* g, CfreeCgIntBinOp op, uint32_t flags) { - api_cg_binop(g, api_map_int_binop(op), flags); + BinOp iop = api_map_int_binop(op); + if (g && (api_i128_stack_top(g, 0) || api_i128_stack_top(g, 1))) { + api_i128_binop(g, iop); + return; + } + api_cg_binop(g, iop, flags); } void cfree_cg_int_unop(CfreeCg* g, CfreeCgIntUnOp op, uint32_t flags) { - api_cg_unop(g, api_map_int_unop(op), flags); + UnOp iop = api_map_int_unop(op); + if (g && api_i128_stack_top(g, 0) && (iop == UO_NEG || iop == UO_BNOT)) { + api_i128_unop(g, iop); + return; + } + api_cg_unop(g, iop, flags); } void cfree_cg_int_cmp(CfreeCg* g, CfreeCgIntCmpOp op) { - api_cg_cmp(g, api_map_int_cmp(op)); + CmpOp cop = api_map_int_cmp(op); + if (g && (api_i128_stack_top(g, 0) || api_i128_stack_top(g, 1))) { + api_i128_cmp(g, cop); + return; + } + api_cg_cmp(g, cop); } const char* api_i128_binop_helper(BinOp op) { diff --git a/src/cg/call.c b/src/cg/call.c @@ -50,7 +50,23 @@ static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg, CGLocal r = api_alloc_temp_local(g, ty); Operand dst = api_op_local(r, ty); if (op.kind == OPK_IMM) { - g->target->load_imm(g->target, dst, op.v.imm); + if (api_is_wide16_scalar_type(g->c, ty)) { + /* A 16-byte scalar immediate (an i128 small constant) only carries + * 64 bits in op.v.imm; load_imm would leave the high lane as stack + * garbage. Write both lanes, sign-extending into the high half. */ + u8 bytes[16]; + u64 lo = (u64)op.v.imm; + u64 hi = (op.v.imm < 0) ? ~(u64)0 : 0; + for (u32 i = 0; i < 8; ++i) { + u32 lo_idx = g->c->target.big_endian ? 15u - i : i; + u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; + bytes[lo_idx] = (u8)(lo >> (i * 8u)); + bytes[hi_idx] = (u8)(hi >> (i * 8u)); + } + api_store_f128_bytes(g, r, ty, bytes); + } else { + g->target->load_imm(g->target, dst, op.v.imm); + } } else if (op.kind == OPK_LOCAL) { g->target->copy(g->target, dst, op); } else { @@ -64,8 +80,8 @@ static CGLocal api_materialize_call_local(CfreeCg* g, ApiSValue* arg, void api_pack_call_arg(CfreeCg* g, CGLocal* out, CfreeCgTypeId fty, u32 idx) { ApiSValue arg = api_pop(g); u32 nfixed = api_func_nparams(g, fty); - CfreeCgTypeId aty = idx >= nfixed ? api_sv_type(&arg) - : cg_type_func_param_id(g->c, fty, idx); + CfreeCgTypeId aty = + idx >= nfixed ? api_sv_type(&arg) : cg_type_func_param_id(g->c, fty, idx); if (!aty) aty = api_sv_type(&arg); *out = api_materialize_call_local(g, &arg, aty); } @@ -82,7 +98,8 @@ void api_release_call_args(CfreeCg* g, CGLocal* args, u32 nargs) { void api_push_call_result(CfreeCg* g, CGLocal result, CfreeCgTypeId ret_ty) { Operand op = api_op_local(result, ret_ty); - if (cg_type_is_aggregate(g->c, ret_ty) || api_is_wide16_scalar_type(g->c, ret_ty)) { + if (cg_type_is_aggregate(g->c, ret_ty) || + api_is_wide16_scalar_type(g->c, ret_ty)) { api_push(g, api_make_lv(op, ret_ty)); } else { api_push(g, api_make_sv(op, ret_ty)); @@ -131,8 +148,8 @@ static void api_tail_fallback_ret(CfreeCg* g, CfreeCgTypeId ret_ty) { static void api_finish_call(CfreeCg* g, CGCallDesc* desc, CGLocal* args, u32 nargs, Operand callee_op, ApiSValue* callee, - CfreeCgTypeId ret_ty, int has_result, - int want_tail, int emit_tail) { + CfreeCgTypeId ret_ty, int has_result, int want_tail, + int emit_tail) { if (emit_tail) api_temp_locals_finish(g); if (!emit_tail) api_call_clobber_boundary(g, desc); g->target->call(g->target, desc); diff --git a/src/cg/control.c b/src/cg/control.c @@ -323,12 +323,21 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) { /* Direct O0 targets may override switch_ for a single-pass branch-chain * lowering. Still honor an explicit jump-table hint so tests and frontends * can exercise the semantic label-table path without enabling O1. */ - native_switch_override = - (g->target->switch_ && g->opt_level == 0 && - desc.hint != CFREE_CG_SWITCH_JUMP_TABLE); + native_switch_override = (g->target->switch_ && g->opt_level == 0 && + desc.hint != CFREE_CG_SWITCH_JUMP_TABLE); plan = native_switch_override ? (CGSwitchPlan){CG_SWITCH_PLAN_CHAIN, 0, 0} : cg_plan_switch(g, &desc); + /* The label-table lowering materializes a rodata table of code-label + * addresses and an indirect branch. Targets that can't express that (Wasm) + * realize dense dispatch through their switch_ hook (br_table) instead, so + * hand the plan—hint and all—to switch_ rather than the table path. */ + if (plan.kind == CG_SWITCH_PLAN_TABLE && g->target->switch_ && + g->target->supports_label_table && + !g->target->supports_label_table(g->target)) { + plan.kind = CG_SWITCH_PLAN_CHAIN; + } + if (plan.kind == CG_SWITCH_PLAN_TABLE) { /* Selector stays on the value stack; cg_emit_switch_table consumes * it via cg-API ops so the path also records cleanly under opt. */ @@ -337,7 +346,8 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) { } else { metrics_count(g->c, "cg.switch.chain", 1); selector = api_pop(g); - desc.selector = api_force_local_unless_imm(g, &selector, desc.selector_type); + desc.selector = + api_force_local_unless_imm(g, &selector, desc.selector_type); if (g->target->switch_) { g->target->switch_(g->target, &desc); } else { @@ -667,8 +677,9 @@ void cfree_cg_alloca(CfreeCg* g, uint32_t align, sz = api_pop(g); pty = resolve_type(g->c, result_ptr_type); if (!pty) pty = cg_type_ptr_to(g->c, builtin_id(CFREE_CG_BUILTIN_VOID)); - sz_op = api_sv_op_is(&sz, OPK_IMM) ? sz.op - : api_force_local(g, &sz, api_sv_type(&sz)); + sz_op = api_sv_op_is(&sz, OPK_IMM) + ? sz.op + : api_force_local(g, &sz, api_sv_type(&sz)); rr = api_alloc_temp_local(g, pty); dst = api_op_local(rr, pty); T->alloca_(T, dst, sz_op, align ? align : 16); @@ -903,7 +914,8 @@ void cfree_cg_index(CfreeCg* g, uint64_t offset) { if (!base_info || base_info->kind != CFREE_CG_TYPE_ARRAY) api_release(g, &base); api_release(g, &idx); - api_push(g, api_make_lv(api_op_indirect(result.v.local, 0, elem_ty), elem_ty)); + api_push(g, + api_make_lv(api_op_indirect(result.v.local, 0, elem_ty), elem_ty)); } void cfree_cg_field(CfreeCg* g, uint32_t field_index) { @@ -997,8 +1009,8 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) { api_op_imm((i64)field_offset, rec_ptr_ty)); api_release(g, &base); } - api_push(g, - api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty)); + api_push( + g, api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty)); } else if (base.op.kind == OPK_GLOBAL) { result = api_op_global(base.op.v.global.sym, @@ -1024,8 +1036,8 @@ void cfree_cg_field(CfreeCg* g, uint32_t field_index) { api_op_imm((i64)field_offset, rec_ptr_ty)); api_release_temp_local(g, base_addr.v.local); } - api_push(g, - api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty)); + api_push( + g, api_make_lv(api_op_indirect(result.v.local, 0, field_ty), field_ty)); } } diff --git a/src/cg/ir_recorder.c b/src/cg/ir_recorder.c @@ -223,6 +223,14 @@ static const char* rec_data_label_addr_unsupported_msg(CgTarget* t) { return "IR recorder supports function-local label address data"; } +/* A target that cannot resolve code-label addresses in static data (it set + * data_label_addr_unsupported_msg) likewise cannot build a label-address jump + * table; report that so cfree_cg_switch routes table plans through switch_. */ +static int rec_supports_label_table(CgTarget* t) { + CgIrRecorder* r = rec_of(t); + return r->data_label_addr_unsupported_msg ? 0 : 1; +} + static CGScope rec_scope_begin(CgTarget* t, const CGScopeDesc* desc) { CgIrRecorder* r = rec_of(t); CgIrInst* in = emit(r, CG_IR_SCOPE_BEGIN); @@ -582,6 +590,7 @@ CgTarget* cg_ir_recorder_new(Compiler* c, ObjBuilder* obj, r->base.jump = rec_jump; r->base.cmp_branch = rec_cmp_branch; r->base.switch_ = rec_switch; + r->base.supports_label_table = rec_supports_label_table; r->base.indirect_branch = rec_indirect_branch; r->base.load_label_addr = rec_load_label_addr; r->base.local_static_data_begin = rec_local_static_data_begin; diff --git a/src/cg/memory.c b/src/cg/memory.c @@ -5,6 +5,13 @@ void cfree_cg_push_int(CfreeCg* g, uint64_t value, CfreeCgTypeId type) { if (!g) return; ty = resolve_type(g->c, type); if (!ty) return; + /* A 16-byte scalar immediate cannot be represented by the 64-bit op.v.imm + * alone; materialize it into addressable storage with both lanes + * sign-extended so no downstream consumer sees an undefined high half. */ + if (api_is_wide16_scalar_type(g->c, ty)) { + api_push(g, api_make_wide16_int_const(g, (i64)value, ty)); + return; + } api_push(g, api_make_sv(api_op_imm((i64)value, ty), ty)); } @@ -182,10 +189,10 @@ static int scale_to_log2(uint32_t scale) { * this helper does not free it. */ static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset, - CGLocal index, u8 log2_scale, - CfreeCgTypeId access_ty, - int addr_is_pointer_value, - CGLocal* out_owned_base) { + CGLocal index, u8 log2_scale, + CfreeCgTypeId access_ty, + int addr_is_pointer_value, + CGLocal* out_owned_base) { CgTarget* T = g->target; CfreeCgTypeId base_ty = cg_type_is_ptr(g->c, addr.type) ? addr.type @@ -303,7 +310,8 @@ static Operand fold_ea_into_operand(CfreeCg* g, Operand addr, i64 offset, * *out_log2 = log2_scale (0..3) if scale was normalized to one of {1,2,4,8} * or to 0 if we materialized the scaled value (log2=0). */ -static CGLocal pop_and_normalize_index(CfreeCg* g, uint32_t scale, u8* out_log2) { +static CGLocal pop_and_normalize_index(CfreeCg* g, uint32_t scale, + u8* out_log2) { ApiSValue idx; CfreeCgTypeId idx_ty; int lg2; @@ -485,7 +493,8 @@ void cfree_cg_load(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { !api_sv_local_storage_is_aggregate(g, &base) && !cg_type_is_aggregate(g->c, api_sv_type(&base)) && !cg_type_is_aggregate(g->c, ty) && - api_unalias_type(g->c, api_sv_type(&base)) == api_unalias_type(g->c, ty)) { + api_unalias_type(g->c, api_sv_type(&base)) == + api_unalias_type(g->c, ty)) { base.lvalue = 0; base.res = RES_FIXED_LOCAL; api_push(g, base); @@ -733,6 +742,25 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { /* Wide-16 scalar store: keep the pre-existing wide16 lowering for the plain * (no-EA) case. */ if (!has_index && !is_bitfield && api_is_wide16_scalar_type(g->c, ty)) { + /* Normalize the destination up front into a single offset-0 lvalue + * operand so every sub-branch below addresses the right location. Two + * cases otherwise misbehave: a pointer-rvalue base (`*p`) is the address + * itself and must be dereferenced (not treated as storage), and a field + * offset (a struct member) must be folded in. Both collapse to an + * OPK_INDIRECT lvalue here. */ + if (!is_lvalue) { + /* Pointer-rvalue base: the operand value is the destination address. */ + Operand ptr_op = api_force_local(g, &base, api_sv_type(&base)); + base = + api_make_lv(api_op_indirect(ptr_op.v.local, (i32)ea.offset, ty), ty); + ea.offset = 0; + is_lvalue = 1; + } else if (ea.offset != 0 && base.op.kind == OPK_LOCAL) { + CfreeCgTypeId base_ptr_ty = cg_type_ptr_to(g->c, ty); + Operand addr = api_lvalue_addr(g, &base, base_ptr_ty); + base = api_make_lv(api_op_indirect(addr.v.local, (i32)ea.offset, ty), ty); + ea.offset = 0; + } if (base.source_local != CFREE_CG_LOCAL_NONE) { api_local_const_clear(api_local_from_handle(g, base.source_local)); } else if (base.op.kind == OPK_INDIRECT || base.op.kind == OPK_GLOBAL || @@ -750,8 +778,8 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { if (ea.offset == 0) { dst_addr = base.op; } else { - dst_addr = fold_ea_into_operand(g, base.op, ea.offset, CG_LOCAL_NONE, 0, - ty, 0, &owned_base); + dst_addr = fold_ea_into_operand(g, base.op, ea.offset, CG_LOCAL_NONE, + 0, ty, 0, &owned_base); dst_addr_owned = owned_base != CG_LOCAL_NONE; } } else if (is_lvalue) { @@ -771,9 +799,9 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { agg.align = access.align ? access.align : 16; T->copy_bytes(T, dst_addr, src_addr, agg); if (dst_addr_owned) { - api_release_temp_local(g, - dst_addr.kind == OPK_INDIRECT ? dst_addr.v.ind.base - : dst_addr.v.local); + api_release_temp_local(g, dst_addr.kind == OPK_INDIRECT + ? dst_addr.v.ind.base + : dst_addr.v.local); } if (src_addr_owned) api_release_temp_local(g, src_addr.v.local); } else if (rv.op.kind == OPK_IMM) { @@ -841,7 +869,8 @@ void cfree_cg_store(CfreeCg* g, CfreeCgMemAccess access, CfreeCgEffAddr ea) { !api_sv_local_storage_is_aggregate(g, &base) && !cg_type_is_aggregate(g->c, api_sv_type(&base)) && !cg_type_is_aggregate(g->c, ty) && - api_unalias_type(g->c, api_sv_type(&base)) == api_unalias_type(g->c, ty)) { + api_unalias_type(g->c, api_sv_type(&base)) == + api_unalias_type(g->c, ty)) { Operand dst = base.op; if (src.kind == OPK_IMM) { T->load_imm(T, dst, src.v.imm); @@ -934,7 +963,8 @@ void cfree_cg_dup(CfreeCg* g) { ty = api_owned_local_type(g, &v); r = api_alloc_temp_local(g, ty); dst = api_op_local(r, ty); - g->target->copy(g->target, dst, api_op_local((CGLocal)api_local_of_sv(&v), ty)); + g->target->copy(g->target, dst, + api_op_local((CGLocal)api_local_of_sv(&v), ty)); dup = v; api_set_owned_local(&dup, r); dup.res = RES_LOCAL; @@ -951,7 +981,8 @@ void cfree_cg_dup(CfreeCg* g) { ty = api_owned_local_type(g, &v); r = api_alloc_temp_local(g, ty); dst = api_op_local(r, ty); - g->target->copy(g->target, dst, api_op_local((CGLocal)api_local_of_sv(&v), ty)); + g->target->copy(g->target, dst, + api_op_local((CGLocal)api_local_of_sv(&v), ty)); g->stack[g->sp - 1].pinned = 0; dup = v; api_set_owned_local(&dup, r); diff --git a/src/cg/native_direct_target.c b/src/cg/native_direct_target.c @@ -494,8 +494,7 @@ static void nd_copy_to_reg(NativeDirectTarget* d, NativeLoc dst, break; case NATIVE_LOC_STACK: { NativeAddr addr; - MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size, - d->base.c->target.ptr_align); + MemAccess mem = nd_type_mem(d, dst.type); memset(&addr, 0, sizeof addr); addr.base_kind = NATIVE_ADDR_BASE_FRAME; addr.base.frame = src.v.stack.slot; @@ -746,10 +745,11 @@ static int nd_local_static_data_begin(CgTarget* t, if (d->local_static_active) nd_panic(d, "nested local static data"); if (desc->attrs.section) { name = (Sym)desc->attrs.section; - kind = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SEC_RODATA - : SEC_DATA; - flags = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SF_ALLOC - : (SF_ALLOC | SF_WRITE); + kind = + (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SEC_RODATA : SEC_DATA; + flags = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) + ? SF_ALLOC + : (SF_ALLOC | SF_WRITE); } else if (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) { name = pool_intern_slice(t->c->global, SLICE_LIT(".rodata")); kind = SEC_RODATA; @@ -795,7 +795,8 @@ static void nd_local_static_data_label_addr(CgTarget* t, Label target, u8 zero[8]; (void)width; (void)address_space; - if (!d->local_static_active) nd_panic(d, "label address outside local static data"); + if (!d->local_static_active) + nd_panic(d, "label address outside local static data"); if (width != 8u) nd_panic(d, "unsupported local static label address width"); memset(zero, 0, sizeof zero); off = d->local_static_base + d->local_static_size; @@ -912,15 +913,15 @@ static void nd_copy(CgTarget* t, Operand dst, Operand src) { memset(&access, 0, sizeof access); access.type = dst.type; access.size = (u32)size; - access.align = dst.type ? cg_type_align(t->c, dst.type) - : (u32)t->c->target.ptr_align; + access.align = + dst.type ? cg_type_align(t->c, dst.type) : (u32)t->c->target.ptr_align; access.mem.type = dst.type; access.mem.size = access.size; access.mem.align = access.align; - NativeAddr da = nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, - access.mem); - NativeAddr sa = nd_addr_materialize(d, nd_addr_storage(d, src), &st, - access.mem); + NativeAddr da = + nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, access.mem); + NativeAddr sa = + nd_addr_materialize(d, nd_addr_storage(d, src), &st, access.mem); ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); d->native->copy_bytes(d->native, da, sa, access); nd_addr_temps_release(d, &st); @@ -935,7 +936,8 @@ static void nd_copy(CgTarget* t, Operand dst, Operand src) { static void nd_load(CgTarget* t, Operand dst, Operand addr, MemAccess mem) { NativeDirectTarget* d = nd_of(t); NdAddrTemps temps; - u64 size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0); + u64 size = + mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0); if (mem.flags & MF_VOLATILE) nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE); @@ -967,7 +969,8 @@ static void nd_load(CgTarget* t, Operand dst, Operand addr, MemAccess mem) { static void nd_store(CgTarget* t, Operand addr, Operand src, MemAccess mem) { NativeDirectTarget* d = nd_of(t); NdAddrTemps temps; - u64 size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0); + u64 size = + mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0); if (mem.flags & MF_VOLATILE) nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE); diff --git a/src/cg/wide.c b/src/cg/wide.c @@ -19,6 +19,28 @@ u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes) { return v; } +void api_wide16_sext_imm_bytes(CfreeCg* g, i64 imm, u8 bytes[16]) { + /* A 16-byte scalar immediate only carries 64 bits in op.v.imm; the full + * value is its sign-extension. Fill both lanes accordingly, honoring the + * target byte order. */ + u64 lo = (u64)imm; + u64 hi = imm < 0 ? ~(u64)0 : 0; + for (u32 i = 0; i < 8; ++i) { + u32 lo_idx = g->c->target.big_endian ? 15u - i : i; + u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; + bytes[lo_idx] = (u8)(lo >> (i * 8u)); + bytes[hi_idx] = (u8)(hi >> (i * 8u)); + } +} + +ApiSValue api_make_wide16_int_const(CfreeCg* g, i64 value, CfreeCgTypeId ty) { + u8 bytes[16]; + CGLocal local = api_f128_temp_local(g, ty); + api_wide16_sext_imm_bytes(g, value, bytes); + api_store_f128_bytes(g, local, ty, bytes); + return api_make_lv(api_op_local(local, ty), ty); +} + void api_store_f128_bytes(CfreeCg* g, CGLocal local, CfreeCgTypeId ty, const u8 bytes[16]) { CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); @@ -145,16 +167,7 @@ ApiSValue api_wide16_materialize_lvalue(CfreeCg* g, ApiSValue* v, return api_make_lv(dst, ty); } if (v->op.kind == OPK_IMM) { - u8 bytes[16]; - u64 lo = (u64)v->op.v.imm; - memset(bytes, 0, sizeof bytes); - for (u32 i = 0; i < 8; ++i) { - u32 idx = g->c->target.big_endian ? 15u - i : i; - bytes[idx] = (u8)(lo >> (i * 8u)); - } - CGLocal local = api_f128_temp_local(g, ty); - api_store_f128_bytes(g, local, ty, bytes); - return api_make_lv(api_op_local(local, ty), ty); + return api_make_wide16_int_const(g, v->op.v.imm, ty); } compiler_panic( g->c, g->cur_loc,