kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

wide.c (12224B)


      1 #include "cg/internal.h"
      2 
      3 CGLocal api_f128_temp_local(KitCg* g, KitCgTypeId ty) {
      4   CGLocalDesc d;
      5   memset(&d, 0, sizeof d);
      6   d.type = ty;
      7   d.size = 16;
      8   d.align = 16;
      9   d.flags = CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED;
     10   return g->target->local(g->target, &d);
     11 }
     12 
     13 u64 api_u64_from_target_bytes(KitCg* g, const u8* bytes) {
     14   u64 v = 0;
     15   for (u32 i = 0; i < 8; ++i) {
     16     u32 shift = g->c->target.big_endian ? (7u - i) * 8u : i * 8u;
     17     v |= (u64)bytes[i] << shift;
     18   }
     19   return v;
     20 }
     21 
     22 void api_wide16_sext_imm_bytes(KitCg* g, i64 imm, u8 bytes[16]) {
     23   /* A 16-byte scalar immediate only carries 64 bits in op.v.imm; the full
     24    * value is its sign-extension. Fill both lanes accordingly, honoring the
     25    * target byte order. */
     26   u64 lo = (u64)imm;
     27   u64 hi = imm < 0 ? ~(u64)0 : 0;
     28   for (u32 i = 0; i < 8; ++i) {
     29     u32 lo_idx = g->c->target.big_endian ? 15u - i : i;
     30     u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i;
     31     bytes[lo_idx] = (u8)(lo >> (i * 8u));
     32     bytes[hi_idx] = (u8)(hi >> (i * 8u));
     33   }
     34 }
     35 
     36 ApiSValue api_make_wide16_int_const(KitCg* g, i64 value, KitCgTypeId ty) {
     37   u8 bytes[16];
     38   CGLocal local = api_f128_temp_local(g, ty);
     39   api_wide16_sext_imm_bytes(g, value, bytes);
     40   api_store_f128_bytes(g, local, ty, bytes);
     41   /* i128/f128 are scalar VALUEs (Track 7.3), not places: the constant lives in
     42    * `local` and flows as a value. Returning an lvalue here made the O1 ABI path
     43    * pass the constant by-reference and deref a value slot (a null-deref crash
     44    * on i128->bool compares); a value backed by the local is the correct form
     45    * and matches how api_push_call_result represents an i128 result. */
     46   return api_make_sv(api_op_local(local, ty), ty);
     47 }
     48 
     49 void api_store_f128_bytes(KitCg* g, CGLocal local, KitCgTypeId ty,
     50                           const u8 bytes[16]) {
     51   KitCgTypeId i64_ty = builtin_id(KIT_CG_BUILTIN_I64);
     52   KitCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty);
     53   CGLocal ar = api_alloc_temp_local(g, ptr_ty);
     54   Operand base = api_op_local(ar, ptr_ty);
     55   MemAccess ma;
     56   memset(&ma, 0, sizeof ma);
     57   ma.type = i64_ty;
     58   ma.size = 8;
     59   ma.align = 8;
     60   g->target->addr_of(g->target, base, api_op_local(local, ty));
     61   g->target->store(g->target, api_op_indirect(ar, 0, i64_ty),
     62                    api_op_imm((i64)api_u64_from_target_bytes(g, bytes), i64_ty),
     63                    ma);
     64   g->target->store(
     65       g->target, api_op_indirect(ar, 8, i64_ty),
     66       api_op_imm((i64)api_u64_from_target_bytes(g, bytes + 8), i64_ty), ma);
     67   api_release_temp_local(g, ar);
     68 }
     69 
     70 void api_encode_binary128_from_double(KitCg* g, double value, u8 out[16]) {
     71   union {
     72     double d;
     73     u64 u;
     74   } in;
     75   u64 lo = 0;
     76   u64 hi = 0;
     77   u64 frac;
     78   u32 sign;
     79   u32 exp;
     80   in.d = value;
     81   sign = (u32)(in.u >> 63);
     82   exp = (u32)((in.u >> 52) & 0x7ffu);
     83   frac = in.u & 0x000fffffffffffffull;
     84   if (sign) hi |= 1ull << 63;
     85   if (exp == 0x7ffu) {
     86     hi |= (u64)0x7fffu << 48;
     87     if (frac) {
     88       lo |= (frac & 0xfu) << 60;
     89       hi |= frac >> 4;
     90       hi |= 1ull << 47;
     91     }
     92   } else if (exp != 0 || frac != 0) {
     93     i32 e;
     94     u64 sig;
     95     if (exp == 0) {
     96       e = -1022;
     97       sig = frac;
     98       while ((sig & (1ull << 52)) == 0) {
     99         sig <<= 1;
    100         --e;
    101       }
    102       frac = sig & 0x000fffffffffffffull;
    103     } else {
    104       e = (i32)exp - 1023;
    105     }
    106     hi |= (u64)(u32)(e + 16383) << 48;
    107     lo |= (frac & 0xfu) << 60;
    108     hi |= frac >> 4;
    109   }
    110   for (u32 i = 0; i < 16; ++i) {
    111     if (g->c->target.big_endian) {
    112       u64 lane = i < 8u ? hi : lo;
    113       u32 shift = (7u - (i & 7u)) * 8u;
    114       out[i] = (u8)(lane >> shift);
    115     } else {
    116       u64 lane = i < 8u ? lo : hi;
    117       u32 shift = (i & 7u) * 8u;
    118       out[i] = (u8)(lane >> shift);
    119     }
    120   }
    121 }
    122 
    123 ApiSValue api_make_f128_const(KitCg* g, double value, KitCgTypeId ty) {
    124   u8 bytes[16];
    125   CGLocal local;
    126   api_encode_binary128_from_double(g, value, bytes);
    127   local = api_f128_temp_local(g, ty);
    128   api_store_f128_bytes(g, local, ty, bytes);
    129   return api_make_lv(api_op_local(local, ty), ty);
    130 }
    131 
    132 /* ============================================================
    133  * wide8 — 8-byte scalar split into two 4-byte lanes
    134  *
    135  * Some 32-bit ABIs represent long long / int64_t, and sometimes soft double,
    136  * as two machine words. Like the wide16 (i128/f128) scalars above it is
    137  * memory-resident (api_is_wide8_scalar_type forces CG_LOCAL_MEMORY_REQUIRED),
    138  * but its arithmetic is done INLINE as 2-word lane sequences (src/cg/arith.c)
    139  * rather than via a runtime call, because compiler-rt has no 64-bit
    140  * add/sub/and/or/xor helper. The lane size is 4 bytes; the low word is at
    141  * offset 0 on a little-endian target. These primitives are the inline analogue
    142  * of api_store_f128_bytes / api_i128_addr / api_i128_load_lane.
    143  * ============================================================ */
    144 
    145 /* Allocate an 8-byte memory-resident, address-taken scalar temp. */
    146 CGLocal api_wide8_temp_local(KitCg* g, KitCgTypeId ty) {
    147   CGLocalDesc d;
    148   memset(&d, 0, sizeof d);
    149   d.type = ty;
    150   d.size = 8;
    151   d.align = (u32)abi_cg_alignof(g->c->abi, ty);
    152   if (!d.align) d.align = 8;
    153   d.flags = CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED;
    154   return g->target->local(g->target, &d);
    155 }
    156 
    157 /* Byte offset of the low / high 32-bit lane within an 8-byte scalar. */
    158 static i32 api_wide8_lo_off(KitCg* g) { return g->c->target.big_endian ? 4 : 0; }
    159 static i32 api_wide8_hi_off(KitCg* g) { return g->c->target.big_endian ? 0 : 4; }
    160 
    161 /* Materialize a 64-bit constant bit pattern into a fresh memory-resident scalar,
    162  * storing its two 32-bit lanes, and return the value backed by that local. Used
    163  * for both i64 immediates (bits = (u64)imm) and soft-double constants (bits =
    164  * the IEEE-754 binary64 encoding). */
    165 ApiSValue api_make_wide8_const_bits(KitCg* g, u64 bits, KitCgTypeId ty) {
    166   KitCgTypeId i32_ty = builtin_id(KIT_CG_BUILTIN_I32);
    167   KitCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty);
    168   CGLocal local = api_wide8_temp_local(g, ty);
    169   CGLocal ar = api_alloc_temp_local(g, ptr_ty);
    170   Operand base = api_op_local(ar, ptr_ty);
    171   MemAccess ma;
    172   memset(&ma, 0, sizeof ma);
    173   ma.type = i32_ty;
    174   ma.size = 4;
    175   ma.align = 4;
    176   g->target->addr_of(g->target, base, api_op_local(local, ty));
    177   g->target->store(g->target, api_op_indirect(ar, api_wide8_lo_off(g), i32_ty),
    178                    api_op_imm((i64)(i32)(u32)(bits & 0xffffffffu), i32_ty), ma);
    179   g->target->store(g->target, api_op_indirect(ar, api_wide8_hi_off(g), i32_ty),
    180                    api_op_imm((i64)(i32)(u32)(bits >> 32), i32_ty), ma);
    181   api_release_temp_local(g, ar);
    182   return api_make_sv(api_op_local(local, ty), ty);
    183 }
    184 
    185 ApiSValue api_make_wide8_int_const(KitCg* g, i64 value, KitCgTypeId ty) {
    186   return api_make_wide8_const_bits(g, (u64)value, ty);
    187 }
    188 
    189 /* Materialize an 8-byte value as an lvalue and return a pointer local to it.
    190  * An immediate is first lowered to a 2-lane memory constant. */
    191 Operand api_wide8_addr(KitCg* g, ApiSValue* v, KitCgTypeId ty) {
    192   ApiSValue lv;
    193   if (api_sv_op_is(v, OPK_IMM)) {
    194     lv = api_make_wide8_int_const(g, v->op.v.imm, ty);
    195   } else {
    196     lv = *v;
    197   }
    198   /* A delayed value (SV_CMP/SV_ARITH) routed here through the wide64 helpers is
    199    * not yet a place. Materialize it first: api_ensure_local lowers it into a
    200    * memory-resident wide8 temp (api_alloc_temp_local forces
    201    * CG_LOCAL_MEMORY_REQUIRED for an 8-byte scalar), which is a real addressable
    202    * home. Materialization, however, clears
    203    * sv.lvalue (fold.c), so we must set the flag AFTER it runs — otherwise the
    204    * lvalue check in api_lvalue_addr fails ("addr operand is not an lvalue").
    205    * Doing this before api_lvalue_addr also makes its own api_ensure_local a
    206    * no-op (kind is now SV_OPERAND), so the flag survives. An operand that is
    207    * already a place is left untouched by api_ensure_local and flows through as
    208    * before. */
    209   if (lv.kind != SV_OPERAND) api_ensure_local(g, &lv);
    210   lv.type = ty;
    211   lv.op.type = ty;
    212   lv.lvalue = 1;
    213   return api_lvalue_addr(g, &lv, cg_type_ptr_to(g->c, ty));
    214 }
    215 
    216 /* Load a 32-bit lane (at byte offset `off`) of the scalar addressed by `addr`
    217  * into a fresh i32 temp; returns the temp operand. */
    218 Operand api_wide8_load_lane(KitCg* g, Operand addr, i32 off) {
    219   KitCgTypeId i32_ty = builtin_id(KIT_CG_BUILTIN_I32);
    220   CGLocal rr = api_alloc_temp_local(g, i32_ty);
    221   Operand dst = api_op_local(rr, i32_ty);
    222   MemAccess ma;
    223   memset(&ma, 0, sizeof ma);
    224   ma.type = i32_ty;
    225   ma.size = 4;
    226   ma.align = 4;
    227   g->target->load(g->target, dst, api_op_indirect(addr.v.local, off, i32_ty),
    228                   ma);
    229   return dst;
    230 }
    231 
    232 /* Store an i32 `val` into the 32-bit lane (byte offset `off`) addressed by
    233  * `addr`. */
    234 void api_wide8_store_lane(KitCg* g, Operand addr, i32 off, Operand val) {
    235   KitCgTypeId i32_ty = builtin_id(KIT_CG_BUILTIN_I32);
    236   MemAccess ma;
    237   memset(&ma, 0, sizeof ma);
    238   ma.type = i32_ty;
    239   ma.size = 4;
    240   ma.align = 4;
    241   g->target->store(g->target, api_op_indirect(addr.v.local, off, i32_ty), val,
    242                    ma);
    243 }
    244 
    245 ApiSValue api_wide16_materialize_lvalue(KitCg* g, ApiSValue* v,
    246                                         KitCgTypeId ty) {
    247   if (v->op.kind == OPK_LOCAL &&
    248       api_unalias_type(g->c, v->op.type) == api_unalias_type(g->c, ty)) {
    249     v->lvalue = 1;
    250     return *v;
    251   }
    252   if (v->op.kind == OPK_INDIRECT) {
    253     ApiSValue out = *v;
    254     out.type = ty;
    255     out.op.type = ty;
    256     out.lvalue = 1;
    257     return out;
    258   }
    259   if (v->op.kind == OPK_LOCAL) {
    260     v->lvalue = 1;
    261     return *v;
    262   }
    263   if (v->op.kind == OPK_GLOBAL) {
    264     CGLocal local = api_f128_temp_local(g, ty);
    265     Operand dst_lv = api_op_local(local, ty);
    266     Operand dst_addr;
    267     Operand src_addr;
    268     AggregateAccess agg;
    269     ApiSValue tmp = api_make_lv(dst_lv, ty);
    270     ApiSValue src = api_make_lv(v->op, ty);
    271     dst_addr = api_lvalue_addr(g, &tmp, cg_type_ptr_to(g->c, ty));
    272     src_addr = api_lvalue_addr(g, &src, cg_type_ptr_to(g->c, ty));
    273     memset(&agg, 0, sizeof agg);
    274     agg.size = 16;
    275     agg.align = 16;
    276     g->target->copy_bytes(g->target, dst_addr, src_addr, agg);
    277     api_release_temp_local(g, dst_addr.v.local);
    278     api_release_temp_local(g, src_addr.v.local);
    279     return api_make_lv(dst_lv, ty);
    280   }
    281   if (v->op.kind == OPK_LOCAL) {
    282     CGLocal local = api_f128_temp_local(g, ty);
    283     Operand dst = api_op_local(local, ty);
    284     g->target->store(g->target, dst, v->op, api_mem_for_lvalue(g, &dst, ty));
    285     return api_make_lv(dst, ty);
    286   }
    287   if (v->op.kind == OPK_IMM) {
    288     return api_make_wide16_int_const(g, v->op.v.imm, ty);
    289   }
    290   compiler_panic(
    291       g->c, g->cur_loc,
    292       "KitCg: 16-byte scalar value is not addressable (kind %u, op %u)",
    293       (unsigned)v->kind, (unsigned)v->op.kind);
    294   return *v;
    295 }
    296 
    297 KitCgSym api_runtime_helper(KitCg* g, const char* name, KitCgTypeId ret,
    298                             const KitCgTypeId* params, u32 nparams) {
    299   KitCgFuncParam ps[3];
    300   KitCgFuncResult result;
    301   KitCgFuncSig sig;
    302   KitCgDecl decl;
    303   if (nparams > 3) return KIT_CG_SYM_NONE;
    304   memset(ps, 0, sizeof ps);
    305   for (u32 i = 0; i < nparams; ++i) ps[i].type = params[i];
    306   memset(&sig, 0, sizeof sig);
    307   /* Runtime helpers always return a single value. */
    308   memset(&result, 0, sizeof result);
    309   result.type = ret;
    310   sig.result = result;
    311   sig.params = ps;
    312   sig.nparams = nparams;
    313   sig.call_conv = KIT_CG_CC_TARGET_C;
    314   memset(&decl, 0, sizeof decl);
    315   decl.kind = KIT_CG_DECL_FUNC;
    316   decl.linkage_name = kit_cg_c_linkage_name(
    317       (KitCompiler*)g->c,
    318       pool_intern_slice(g->c->global, slice_from_cstr(name)));
    319   decl.display_name = decl.linkage_name;
    320   decl.type = kit_cg_type_func((KitCompiler*)g->c, sig);
    321   decl.sym.bind = KIT_SB_GLOBAL;
    322   decl.sym.visibility = KIT_CG_VIS_DEFAULT;
    323   return kit_cg_decl(g, decl);
    324 }
    325 
    326 void api_runtime_call_values(KitCg* g, const char* name, KitCgTypeId ret,
    327                              const KitCgTypeId* params, u32 nparams,
    328                              ApiSValue* args) {
    329   KitCgCallAttrs attrs;
    330   KitCgSym sym = api_runtime_helper(g, name, ret, params, nparams);
    331   memset(&attrs, 0, sizeof attrs);
    332   for (u32 i = 0; i < nparams; ++i) api_push(g, args[i]);
    333   api_call_symbol_common(g, sym, nparams, attrs);
    334 }
    335 
    336 /* ============================================================
    337  * Locals and params
    338  * ============================================================ */