wide.c (12224B)
1 #include "cg/internal.h" 2 3 CGLocal api_f128_temp_local(KitCg* g, KitCgTypeId ty) { 4 CGLocalDesc d; 5 memset(&d, 0, sizeof d); 6 d.type = ty; 7 d.size = 16; 8 d.align = 16; 9 d.flags = CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED; 10 return g->target->local(g->target, &d); 11 } 12 13 u64 api_u64_from_target_bytes(KitCg* g, const u8* bytes) { 14 u64 v = 0; 15 for (u32 i = 0; i < 8; ++i) { 16 u32 shift = g->c->target.big_endian ? (7u - i) * 8u : i * 8u; 17 v |= (u64)bytes[i] << shift; 18 } 19 return v; 20 } 21 22 void api_wide16_sext_imm_bytes(KitCg* g, i64 imm, u8 bytes[16]) { 23 /* A 16-byte scalar immediate only carries 64 bits in op.v.imm; the full 24 * value is its sign-extension. Fill both lanes accordingly, honoring the 25 * target byte order. */ 26 u64 lo = (u64)imm; 27 u64 hi = imm < 0 ? ~(u64)0 : 0; 28 for (u32 i = 0; i < 8; ++i) { 29 u32 lo_idx = g->c->target.big_endian ? 15u - i : i; 30 u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i; 31 bytes[lo_idx] = (u8)(lo >> (i * 8u)); 32 bytes[hi_idx] = (u8)(hi >> (i * 8u)); 33 } 34 } 35 36 ApiSValue api_make_wide16_int_const(KitCg* g, i64 value, KitCgTypeId ty) { 37 u8 bytes[16]; 38 CGLocal local = api_f128_temp_local(g, ty); 39 api_wide16_sext_imm_bytes(g, value, bytes); 40 api_store_f128_bytes(g, local, ty, bytes); 41 /* i128/f128 are scalar VALUEs (Track 7.3), not places: the constant lives in 42 * `local` and flows as a value. Returning an lvalue here made the O1 ABI path 43 * pass the constant by-reference and deref a value slot (a null-deref crash 44 * on i128->bool compares); a value backed by the local is the correct form 45 * and matches how api_push_call_result represents an i128 result. */ 46 return api_make_sv(api_op_local(local, ty), ty); 47 } 48 49 void api_store_f128_bytes(KitCg* g, CGLocal local, KitCgTypeId ty, 50 const u8 bytes[16]) { 51 KitCgTypeId i64_ty = builtin_id(KIT_CG_BUILTIN_I64); 52 KitCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); 53 CGLocal ar = api_alloc_temp_local(g, ptr_ty); 54 Operand base = api_op_local(ar, ptr_ty); 55 MemAccess ma; 56 memset(&ma, 0, sizeof ma); 57 ma.type = i64_ty; 58 ma.size = 8; 59 ma.align = 8; 60 g->target->addr_of(g->target, base, api_op_local(local, ty)); 61 g->target->store(g->target, api_op_indirect(ar, 0, i64_ty), 62 api_op_imm((i64)api_u64_from_target_bytes(g, bytes), i64_ty), 63 ma); 64 g->target->store( 65 g->target, api_op_indirect(ar, 8, i64_ty), 66 api_op_imm((i64)api_u64_from_target_bytes(g, bytes + 8), i64_ty), ma); 67 api_release_temp_local(g, ar); 68 } 69 70 void api_encode_binary128_from_double(KitCg* g, double value, u8 out[16]) { 71 union { 72 double d; 73 u64 u; 74 } in; 75 u64 lo = 0; 76 u64 hi = 0; 77 u64 frac; 78 u32 sign; 79 u32 exp; 80 in.d = value; 81 sign = (u32)(in.u >> 63); 82 exp = (u32)((in.u >> 52) & 0x7ffu); 83 frac = in.u & 0x000fffffffffffffull; 84 if (sign) hi |= 1ull << 63; 85 if (exp == 0x7ffu) { 86 hi |= (u64)0x7fffu << 48; 87 if (frac) { 88 lo |= (frac & 0xfu) << 60; 89 hi |= frac >> 4; 90 hi |= 1ull << 47; 91 } 92 } else if (exp != 0 || frac != 0) { 93 i32 e; 94 u64 sig; 95 if (exp == 0) { 96 e = -1022; 97 sig = frac; 98 while ((sig & (1ull << 52)) == 0) { 99 sig <<= 1; 100 --e; 101 } 102 frac = sig & 0x000fffffffffffffull; 103 } else { 104 e = (i32)exp - 1023; 105 } 106 hi |= (u64)(u32)(e + 16383) << 48; 107 lo |= (frac & 0xfu) << 60; 108 hi |= frac >> 4; 109 } 110 for (u32 i = 0; i < 16; ++i) { 111 if (g->c->target.big_endian) { 112 u64 lane = i < 8u ? hi : lo; 113 u32 shift = (7u - (i & 7u)) * 8u; 114 out[i] = (u8)(lane >> shift); 115 } else { 116 u64 lane = i < 8u ? lo : hi; 117 u32 shift = (i & 7u) * 8u; 118 out[i] = (u8)(lane >> shift); 119 } 120 } 121 } 122 123 ApiSValue api_make_f128_const(KitCg* g, double value, KitCgTypeId ty) { 124 u8 bytes[16]; 125 CGLocal local; 126 api_encode_binary128_from_double(g, value, bytes); 127 local = api_f128_temp_local(g, ty); 128 api_store_f128_bytes(g, local, ty, bytes); 129 return api_make_lv(api_op_local(local, ty), ty); 130 } 131 132 /* ============================================================ 133 * wide8 — 8-byte scalar split into two 4-byte lanes 134 * 135 * Some 32-bit ABIs represent long long / int64_t, and sometimes soft double, 136 * as two machine words. Like the wide16 (i128/f128) scalars above it is 137 * memory-resident (api_is_wide8_scalar_type forces CG_LOCAL_MEMORY_REQUIRED), 138 * but its arithmetic is done INLINE as 2-word lane sequences (src/cg/arith.c) 139 * rather than via a runtime call, because compiler-rt has no 64-bit 140 * add/sub/and/or/xor helper. The lane size is 4 bytes; the low word is at 141 * offset 0 on a little-endian target. These primitives are the inline analogue 142 * of api_store_f128_bytes / api_i128_addr / api_i128_load_lane. 143 * ============================================================ */ 144 145 /* Allocate an 8-byte memory-resident, address-taken scalar temp. */ 146 CGLocal api_wide8_temp_local(KitCg* g, KitCgTypeId ty) { 147 CGLocalDesc d; 148 memset(&d, 0, sizeof d); 149 d.type = ty; 150 d.size = 8; 151 d.align = (u32)abi_cg_alignof(g->c->abi, ty); 152 if (!d.align) d.align = 8; 153 d.flags = CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED; 154 return g->target->local(g->target, &d); 155 } 156 157 /* Byte offset of the low / high 32-bit lane within an 8-byte scalar. */ 158 static i32 api_wide8_lo_off(KitCg* g) { return g->c->target.big_endian ? 4 : 0; } 159 static i32 api_wide8_hi_off(KitCg* g) { return g->c->target.big_endian ? 0 : 4; } 160 161 /* Materialize a 64-bit constant bit pattern into a fresh memory-resident scalar, 162 * storing its two 32-bit lanes, and return the value backed by that local. Used 163 * for both i64 immediates (bits = (u64)imm) and soft-double constants (bits = 164 * the IEEE-754 binary64 encoding). */ 165 ApiSValue api_make_wide8_const_bits(KitCg* g, u64 bits, KitCgTypeId ty) { 166 KitCgTypeId i32_ty = builtin_id(KIT_CG_BUILTIN_I32); 167 KitCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); 168 CGLocal local = api_wide8_temp_local(g, ty); 169 CGLocal ar = api_alloc_temp_local(g, ptr_ty); 170 Operand base = api_op_local(ar, ptr_ty); 171 MemAccess ma; 172 memset(&ma, 0, sizeof ma); 173 ma.type = i32_ty; 174 ma.size = 4; 175 ma.align = 4; 176 g->target->addr_of(g->target, base, api_op_local(local, ty)); 177 g->target->store(g->target, api_op_indirect(ar, api_wide8_lo_off(g), i32_ty), 178 api_op_imm((i64)(i32)(u32)(bits & 0xffffffffu), i32_ty), ma); 179 g->target->store(g->target, api_op_indirect(ar, api_wide8_hi_off(g), i32_ty), 180 api_op_imm((i64)(i32)(u32)(bits >> 32), i32_ty), ma); 181 api_release_temp_local(g, ar); 182 return api_make_sv(api_op_local(local, ty), ty); 183 } 184 185 ApiSValue api_make_wide8_int_const(KitCg* g, i64 value, KitCgTypeId ty) { 186 return api_make_wide8_const_bits(g, (u64)value, ty); 187 } 188 189 /* Materialize an 8-byte value as an lvalue and return a pointer local to it. 190 * An immediate is first lowered to a 2-lane memory constant. */ 191 Operand api_wide8_addr(KitCg* g, ApiSValue* v, KitCgTypeId ty) { 192 ApiSValue lv; 193 if (api_sv_op_is(v, OPK_IMM)) { 194 lv = api_make_wide8_int_const(g, v->op.v.imm, ty); 195 } else { 196 lv = *v; 197 } 198 /* A delayed value (SV_CMP/SV_ARITH) routed here through the wide64 helpers is 199 * not yet a place. Materialize it first: api_ensure_local lowers it into a 200 * memory-resident wide8 temp (api_alloc_temp_local forces 201 * CG_LOCAL_MEMORY_REQUIRED for an 8-byte scalar), which is a real addressable 202 * home. Materialization, however, clears 203 * sv.lvalue (fold.c), so we must set the flag AFTER it runs — otherwise the 204 * lvalue check in api_lvalue_addr fails ("addr operand is not an lvalue"). 205 * Doing this before api_lvalue_addr also makes its own api_ensure_local a 206 * no-op (kind is now SV_OPERAND), so the flag survives. An operand that is 207 * already a place is left untouched by api_ensure_local and flows through as 208 * before. */ 209 if (lv.kind != SV_OPERAND) api_ensure_local(g, &lv); 210 lv.type = ty; 211 lv.op.type = ty; 212 lv.lvalue = 1; 213 return api_lvalue_addr(g, &lv, cg_type_ptr_to(g->c, ty)); 214 } 215 216 /* Load a 32-bit lane (at byte offset `off`) of the scalar addressed by `addr` 217 * into a fresh i32 temp; returns the temp operand. */ 218 Operand api_wide8_load_lane(KitCg* g, Operand addr, i32 off) { 219 KitCgTypeId i32_ty = builtin_id(KIT_CG_BUILTIN_I32); 220 CGLocal rr = api_alloc_temp_local(g, i32_ty); 221 Operand dst = api_op_local(rr, i32_ty); 222 MemAccess ma; 223 memset(&ma, 0, sizeof ma); 224 ma.type = i32_ty; 225 ma.size = 4; 226 ma.align = 4; 227 g->target->load(g->target, dst, api_op_indirect(addr.v.local, off, i32_ty), 228 ma); 229 return dst; 230 } 231 232 /* Store an i32 `val` into the 32-bit lane (byte offset `off`) addressed by 233 * `addr`. */ 234 void api_wide8_store_lane(KitCg* g, Operand addr, i32 off, Operand val) { 235 KitCgTypeId i32_ty = builtin_id(KIT_CG_BUILTIN_I32); 236 MemAccess ma; 237 memset(&ma, 0, sizeof ma); 238 ma.type = i32_ty; 239 ma.size = 4; 240 ma.align = 4; 241 g->target->store(g->target, api_op_indirect(addr.v.local, off, i32_ty), val, 242 ma); 243 } 244 245 ApiSValue api_wide16_materialize_lvalue(KitCg* g, ApiSValue* v, 246 KitCgTypeId ty) { 247 if (v->op.kind == OPK_LOCAL && 248 api_unalias_type(g->c, v->op.type) == api_unalias_type(g->c, ty)) { 249 v->lvalue = 1; 250 return *v; 251 } 252 if (v->op.kind == OPK_INDIRECT) { 253 ApiSValue out = *v; 254 out.type = ty; 255 out.op.type = ty; 256 out.lvalue = 1; 257 return out; 258 } 259 if (v->op.kind == OPK_LOCAL) { 260 v->lvalue = 1; 261 return *v; 262 } 263 if (v->op.kind == OPK_GLOBAL) { 264 CGLocal local = api_f128_temp_local(g, ty); 265 Operand dst_lv = api_op_local(local, ty); 266 Operand dst_addr; 267 Operand src_addr; 268 AggregateAccess agg; 269 ApiSValue tmp = api_make_lv(dst_lv, ty); 270 ApiSValue src = api_make_lv(v->op, ty); 271 dst_addr = api_lvalue_addr(g, &tmp, cg_type_ptr_to(g->c, ty)); 272 src_addr = api_lvalue_addr(g, &src, cg_type_ptr_to(g->c, ty)); 273 memset(&agg, 0, sizeof agg); 274 agg.size = 16; 275 agg.align = 16; 276 g->target->copy_bytes(g->target, dst_addr, src_addr, agg); 277 api_release_temp_local(g, dst_addr.v.local); 278 api_release_temp_local(g, src_addr.v.local); 279 return api_make_lv(dst_lv, ty); 280 } 281 if (v->op.kind == OPK_LOCAL) { 282 CGLocal local = api_f128_temp_local(g, ty); 283 Operand dst = api_op_local(local, ty); 284 g->target->store(g->target, dst, v->op, api_mem_for_lvalue(g, &dst, ty)); 285 return api_make_lv(dst, ty); 286 } 287 if (v->op.kind == OPK_IMM) { 288 return api_make_wide16_int_const(g, v->op.v.imm, ty); 289 } 290 compiler_panic( 291 g->c, g->cur_loc, 292 "KitCg: 16-byte scalar value is not addressable (kind %u, op %u)", 293 (unsigned)v->kind, (unsigned)v->op.kind); 294 return *v; 295 } 296 297 KitCgSym api_runtime_helper(KitCg* g, const char* name, KitCgTypeId ret, 298 const KitCgTypeId* params, u32 nparams) { 299 KitCgFuncParam ps[3]; 300 KitCgFuncResult result; 301 KitCgFuncSig sig; 302 KitCgDecl decl; 303 if (nparams > 3) return KIT_CG_SYM_NONE; 304 memset(ps, 0, sizeof ps); 305 for (u32 i = 0; i < nparams; ++i) ps[i].type = params[i]; 306 memset(&sig, 0, sizeof sig); 307 /* Runtime helpers always return a single value. */ 308 memset(&result, 0, sizeof result); 309 result.type = ret; 310 sig.result = result; 311 sig.params = ps; 312 sig.nparams = nparams; 313 sig.call_conv = KIT_CG_CC_TARGET_C; 314 memset(&decl, 0, sizeof decl); 315 decl.kind = KIT_CG_DECL_FUNC; 316 decl.linkage_name = kit_cg_c_linkage_name( 317 (KitCompiler*)g->c, 318 pool_intern_slice(g->c->global, slice_from_cstr(name))); 319 decl.display_name = decl.linkage_name; 320 decl.type = kit_cg_type_func((KitCompiler*)g->c, sig); 321 decl.sym.bind = KIT_SB_GLOBAL; 322 decl.sym.visibility = KIT_CG_VIS_DEFAULT; 323 return kit_cg_decl(g, decl); 324 } 325 326 void api_runtime_call_values(KitCg* g, const char* name, KitCgTypeId ret, 327 const KitCgTypeId* params, u32 nparams, 328 ApiSValue* args) { 329 KitCgCallAttrs attrs; 330 KitCgSym sym = api_runtime_helper(g, name, ret, params, nparams); 331 memset(&attrs, 0, sizeof attrs); 332 for (u32 i = 0; i < nparams; ++i) api_push(g, args[i]); 333 api_call_symbol_common(g, sym, nparams, attrs); 334 } 335 336 /* ============================================================ 337 * Locals and params 338 * ============================================================ */