kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

atomic.c (14262B)


      1 #include "arch/arch.h"
      2 #include "cg/internal.h"
      3 
      4 MemAccess api_mem_for_atomic(KitCg* g, KitCgTypeId val_ty) {
      5   MemAccess ma;
      6   api_require_scalar_mem_type(g, "atomic memory access", val_ty);
      7   if (api_mem_type_size(g, val_ty, "atomic memory access") >
      8       CG_MAX_ATOMIC_SIZE) {
      9     compiler_panic(g->c, g->cur_loc,
     10                    "KitCg: atomic memory access size exceeds 8 bytes");
     11   }
     12   memset(&ma, 0, sizeof ma);
     13   ma.type = val_ty;
     14   ma.size = val_ty ? abi_cg_sizeof(g->c->abi, val_ty) : 0;
     15   ma.align = val_ty ? abi_cg_alignof(g->c->abi, val_ty) : 0;
     16   ma.flags = MF_ATOMIC;
     17   ma.alias.kind = (u8)ALIAS_UNKNOWN;
     18   return ma;
     19 }
     20 
     21 /* Native (lock-free) atomic ceiling for the target, read from the arch backend
     22  * descriptor (ArchImpl.atomic_lock_free_max). Most targets — aa64, x64, rv64,
     23  * wasm32 — lower 8-byte (i64-width) atomics lock-free. rv32 reports 4: it has
     24  * no native 64-bit atomic instructions (lr.d/sc.d/amo*.d are RV64-only), so
     25  * 8-byte atomics there must go through the libatomic spinlock shim. (wasm32 has
     26  * 4-byte pointers but still reports 8 — this is a per-arch capability, not a
     27  * pointer-width test.)
     28  *
     29  * NOTE: this predicate is the single source of truth shared with the C
     30  * front-end's __atomic_always_lock_free / __atomic_is_lock_free builtins (they
     31  * route through kit_cg_atomic_is_lock_free below). Keeping it here guarantees
     32  * that when kit cc compiles rt/lib/atomic/atomic_freestanding.c FOR rv32, the
     33  * shim's IS_LOCK_FREE_8 test (__atomic_always_lock_free(8, p)) evaluates false,
     34  * so the shim takes the spinlock path instead of recursing into an illegal
     35  * native 8-byte atomic. */
     36 static u32 cg_atomic_lock_free_max(KitCompiler* c) {
     37   const ArchImpl* a = arch_for_compiler(c);
     38   return a ? a->atomic_lock_free_max : CG_MAX_ATOMIC_SIZE;
     39 }
     40 
     41 int kit_cg_atomic_is_legal(KitCompiler* c, KitCgMemAccess access,
     42                            KitCgMemOrder order) {
     43   KitCgTypeId ty = resolve_type(c, access.type);
     44   (void)order;
     45   if (!ty) return 0;
     46   if (cg_type_is_aggregate(c, ty) || cg_type_is_void(c, ty)) return 0;
     47   /* Still legal up to 8 bytes everywhere: the libcall path makes 8-byte atomics
     48    * available even when they are not lock-free. */
     49   return abi_cg_sizeof(c->abi, access.type) <= CG_MAX_ATOMIC_SIZE;
     50 }
     51 
     52 int kit_cg_atomic_is_lock_free(KitCompiler* c, KitCgMemAccess access) {
     53   KitCgTypeId ty = resolve_type(c, access.type);
     54   if (!ty) return 0;
     55   if (cg_type_is_aggregate(c, ty) || cg_type_is_void(c, ty)) return 0;
     56   /* Lock-free up to the native atomic width, NOT the pointer width: wasm32 has
     57    * 4-byte pointers but lowers 8-byte (i64) atomics lock-free, while rv32 does
     58    * not have native 64-bit atomics. */
     59   return abi_cg_sizeof(c->abi, access.type) <= cg_atomic_lock_free_max(c);
     60 }
     61 
     62 /* True when an atomic access of `val_ty` must be lowered to a libatomic
     63  * (__atomic_*_8) libcall instead of a native instruction sequence. Today this
     64  * is exactly the 8-byte-on-a-4-byte-target case (rv32). */
     65 static int cg_atomic_needs_libcall(KitCg* g, KitCgTypeId val_ty) {
     66   return abi_cg_sizeof(g->c->abi, val_ty) == 8 &&
     67          cg_atomic_lock_free_max(g->c) < 8u;
     68 }
     69 
     70 /* Map a KitCgAtomicOp to the libatomic __atomic_fetch_<op>_8 / __atomic_*_8
     71  * entry point. XCHG maps to __atomic_exchange_8. */
     72 static const char* cg_atomic_rmw_libcall_8(KitCgAtomicOp op) {
     73   switch (op) {
     74     case KIT_CG_ATOMIC_XCHG:
     75       return "__atomic_exchange_8";
     76     case KIT_CG_ATOMIC_ADD:
     77       return "__atomic_fetch_add_8";
     78     case KIT_CG_ATOMIC_SUB:
     79       return "__atomic_fetch_sub_8";
     80     case KIT_CG_ATOMIC_AND:
     81       return "__atomic_fetch_and_8";
     82     case KIT_CG_ATOMIC_OR:
     83       return "__atomic_fetch_or_8";
     84     case KIT_CG_ATOMIC_XOR:
     85       return "__atomic_fetch_xor_8";
     86     case KIT_CG_ATOMIC_NAND:
     87       return "__atomic_fetch_nand_8";
     88   }
     89   return NULL;
     90 }
     91 
     92 /* Declare a runtime function symbol with an arbitrary (<=5) param list. Mirrors
     93  * api_runtime_helper (wide.c) but without its 3-param ceiling, which the
     94  * 5-argument __atomic_compare_exchange_8 needs. */
     95 static KitCgSym cg_atomic_runtime_sym(KitCg* g, const char* name,
     96                                       KitCgTypeId ret,
     97                                       const KitCgTypeId* params, u32 nparams) {
     98   KitCgFuncParam ps[5];
     99   KitCgFuncResult result;
    100   KitCgFuncSig sig;
    101   KitCgDecl decl;
    102   if (nparams > 5) return KIT_CG_SYM_NONE;
    103   memset(ps, 0, sizeof ps);
    104   for (u32 i = 0; i < nparams; ++i) ps[i].type = params[i];
    105   memset(&sig, 0, sizeof sig);
    106   memset(&result, 0, sizeof result);
    107   result.type = ret; /* ret == KIT_CG_TYPE_NONE -> void result */
    108   sig.result = result;
    109   sig.params = ps;
    110   sig.nparams = nparams;
    111   sig.call_conv = KIT_CG_CC_TARGET_C;
    112   memset(&decl, 0, sizeof decl);
    113   decl.kind = KIT_CG_DECL_FUNC;
    114   decl.linkage_name = kit_cg_c_linkage_name(
    115       (KitCompiler*)g->c,
    116       pool_intern_slice(g->c->global, slice_from_cstr(name)));
    117   decl.display_name = decl.linkage_name;
    118   decl.type = kit_cg_type_func((KitCompiler*)g->c, sig);
    119   decl.sym.bind = KIT_SB_GLOBAL;
    120   decl.sym.visibility = KIT_CG_VIS_DEFAULT;
    121   return kit_cg_decl(g, decl);
    122 }
    123 
    124 /* Emit a runtime call: push args[0..nparams) then call. The single (optional)
    125  * result is left on the value stack, matching api_runtime_call_values. */
    126 static void cg_atomic_runtime_call(KitCg* g, const char* name, KitCgTypeId ret,
    127                                    const KitCgTypeId* params, u32 nparams,
    128                                    ApiSValue* args) {
    129   KitCgCallAttrs attrs;
    130   KitCgSym sym = cg_atomic_runtime_sym(g, name, ret, params, nparams);
    131   memset(&attrs, 0, sizeof attrs);
    132   for (u32 i = 0; i < nparams; ++i) api_push(g, args[i]);
    133   api_call_symbol_common(g, sym, nparams, attrs);
    134 }
    135 
    136 void kit_cg_atomic_load(KitCg* g, KitCgMemAccess access, KitCgMemOrder order) {
    137   ApiSValue ptr;
    138   KitCgTypeId pty, val_ty;
    139   Operand addr, dst;
    140   CGLocal rr;
    141   if (!g) return;
    142   api_local_const_memory_boundary(g);
    143   ptr = api_pop(g);
    144   pty = api_sv_type(&ptr);
    145   val_ty = resolve_type(g->c, access.type);
    146   if (!val_ty) val_ty = api_atomic_pointee(g, pty, "KitCg: atomic_load");
    147   api_require_pointer_value(g, "atomic_load pointer", pty);
    148   if (cg_atomic_needs_libcall(g, val_ty)) {
    149     /* u64 __atomic_load_8(const void* ptr, int memorder) */
    150     KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32);
    151     KitCgTypeId ps[2];
    152     ApiSValue args[2];
    153     ps[0] = pty;
    154     ps[1] = i32;
    155     args[0] = ptr;
    156     args[1] = api_make_sv(api_op_imm((i64)order, i32), i32);
    157     cg_atomic_runtime_call(g, "__atomic_load_8", val_ty, ps, 2, args);
    158     return;
    159   }
    160   addr = api_force_local(g, &ptr, pty);
    161   rr = api_alloc_temp_local(g, val_ty);
    162   dst = api_op_local(rr, val_ty);
    163   g->target->atomic_load(g->target, dst, addr, api_mem_for_atomic(g, val_ty),
    164                          order);
    165   api_release(g, &ptr);
    166   api_push(g, api_make_sv(dst, val_ty));
    167 }
    168 
    169 void kit_cg_atomic_store(KitCg* g, KitCgMemAccess access, KitCgMemOrder order) {
    170   ApiSValue val, ptr;
    171   KitCgTypeId pty, val_ty;
    172   Operand addr, src;
    173   if (!g) return;
    174   api_local_const_memory_boundary(g);
    175   val = api_pop(g);
    176   ptr = api_pop(g);
    177   pty = api_sv_type(&ptr);
    178   val_ty = resolve_type(g->c, access.type);
    179   if (!val_ty) val_ty = api_atomic_pointee(g, pty, "KitCg: atomic_store");
    180   api_require_pointer_value(g, "atomic_store pointer", pty);
    181   api_validate_memory_value(g, "atomic_store", val_ty, api_sv_type(&val));
    182   if (cg_atomic_needs_libcall(g, val_ty)) {
    183     /* void __atomic_store_8(void* ptr, u64 val, int memorder) */
    184     KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32);
    185     KitCgTypeId ps[3];
    186     ApiSValue args[3];
    187     ps[0] = pty;
    188     ps[1] = val_ty;
    189     ps[2] = i32;
    190     args[0] = ptr;
    191     args[1] = val;
    192     args[2] = api_make_sv(api_op_imm((i64)order, i32), i32);
    193     cg_atomic_runtime_call(g, "__atomic_store_8", (KitCgTypeId)0, ps, 3, args);
    194     return;
    195   }
    196   addr = api_force_local(g, &ptr, pty);
    197   src = api_sv_op_is_local_or_imm(&val) ? val.op
    198                                         : api_force_local(g, &val, val_ty);
    199   g->target->atomic_store(g->target, addr, src, api_mem_for_atomic(g, val_ty),
    200                           order);
    201   api_release(g, &val);
    202   api_release(g, &ptr);
    203 }
    204 
    205 void kit_cg_atomic_rmw(KitCg* g, KitCgMemAccess access, KitCgAtomicOp op,
    206                        KitCgMemOrder order) {
    207   ApiSValue val, ptr;
    208   KitCgTypeId pty, val_ty;
    209   Operand addr, vop, dst;
    210   CGLocal rr;
    211   if (!g) return;
    212   api_local_const_memory_boundary(g);
    213   val = api_pop(g);
    214   ptr = api_pop(g);
    215   pty = api_sv_type(&ptr);
    216   val_ty = resolve_type(g->c, access.type);
    217   if (!val_ty) val_ty = api_atomic_pointee(g, pty, "KitCg: atomic_rmw");
    218   api_require_pointer_value(g, "atomic_rmw pointer", pty);
    219   api_validate_memory_value(g, "atomic_rmw", val_ty, api_sv_type(&val));
    220   if (cg_atomic_needs_libcall(g, val_ty)) {
    221     /* u64 __atomic_{exchange,fetch_*}_8(void* ptr, u64 val, int memorder).
    222      * All return the prior value, matching native atomic_rmw semantics. */
    223     const char* name = cg_atomic_rmw_libcall_8(op);
    224     KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32);
    225     KitCgTypeId ps[3];
    226     ApiSValue args[3];
    227     if (!name) {
    228       compiler_panic(g->c, g->cur_loc,
    229                      "KitCg: unsupported 8-byte atomic rmw op");
    230       return;
    231     }
    232     ps[0] = pty;
    233     ps[1] = val_ty;
    234     ps[2] = i32;
    235     args[0] = ptr;
    236     args[1] = val;
    237     args[2] = api_make_sv(api_op_imm((i64)order, i32), i32);
    238     cg_atomic_runtime_call(g, name, val_ty, ps, 3, args);
    239     return;
    240   }
    241   addr = api_force_local(g, &ptr, pty);
    242   vop = api_sv_op_is_local_or_imm(&val) ? val.op
    243                                         : api_force_local(g, &val, val_ty);
    244   rr = api_alloc_temp_local(g, val_ty);
    245   dst = api_op_local(rr, val_ty);
    246   g->target->atomic_rmw(g->target, op, dst, addr, vop,
    247                         api_mem_for_atomic(g, val_ty), order);
    248   api_release(g, &val);
    249   api_release(g, &ptr);
    250   api_push(g, api_make_sv(dst, val_ty));
    251 }
    252 
    253 void kit_cg_atomic_cmpxchg(KitCg* g, KitCgMemAccess access,
    254                            KitCgMemOrder success, KitCgMemOrder failure,
    255                            int weak) {
    256   ApiSValue desired, expected, ptr;
    257   KitCgTypeId pty, val_ty, bool_ty;
    258   Operand addr, exp_op, des_op, prior, ok;
    259   CGLocal pr, kr;
    260   if (!g) return;
    261   api_local_const_memory_boundary(g);
    262   (void)weak;
    263   desired = api_pop(g);
    264   expected = api_pop(g);
    265   ptr = api_pop(g);
    266   pty = api_sv_type(&ptr);
    267   val_ty = resolve_type(g->c, access.type);
    268   if (!val_ty) val_ty = api_atomic_pointee(g, pty, "KitCg: atomic_cmpxchg");
    269   api_require_pointer_value(g, "atomic_cmpxchg pointer", pty);
    270   api_validate_memory_value(g, "atomic_cmpxchg expected", val_ty,
    271                             api_sv_type(&expected));
    272   api_validate_memory_value(g, "atomic_cmpxchg desired", val_ty,
    273                             api_sv_type(&desired));
    274   if (cg_atomic_needs_libcall(g, val_ty)) {
    275     /* bool __atomic_compare_exchange_8(void* ptr, void* expected, u64 desired,
    276      *                                  int succ, int fail).
    277      * libatomic takes `expected` by pointer and updates *expected with the
    278      * observed value on failure. Our ABI is value-in / value-out, so spill the
    279      * expected value to a stack slot, pass its address, then reload the slot to
    280      * obtain `prior`. */
    281     KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32);
    282     KitCgTypeId ptr_to_val = cg_type_ptr_to(g->c, val_ty);
    283     KitCgTypeId ps[5];
    284     ApiSValue args[5];
    285     Operand exp_slot, exp_addr, exp_src;
    286     CGLocal er, ar, pr2;
    287     bool_ty = builtin_id(KIT_CG_BUILTIN_BOOL);
    288     /* Materialize the expected value into an addressable stack slot. */
    289     er = api_alloc_temp_local(g, val_ty);
    290     exp_slot = api_op_local(er, val_ty);
    291     exp_src = api_sv_op_is_local_or_imm(&expected)
    292                   ? expected.op
    293                   : api_force_local(g, &expected, val_ty);
    294     g->target->store(g->target, exp_slot, exp_src,
    295                      api_mem_for_lvalue(g, &exp_slot, val_ty));
    296     ar = api_alloc_temp_local(g, ptr_to_val);
    297     exp_addr = api_op_local(ar, ptr_to_val);
    298     g->target->addr_of(g->target, exp_addr, exp_slot);
    299     ps[0] = pty;
    300     ps[1] = ptr_to_val;
    301     ps[2] = val_ty;
    302     ps[3] = i32;
    303     ps[4] = i32;
    304     args[0] = ptr;
    305     args[1] = api_make_sv(exp_addr, ptr_to_val);
    306     args[2] = desired;
    307     args[3] = api_make_sv(api_op_imm((i64)success, i32), i32);
    308     args[4] = api_make_sv(api_op_imm((i64)failure, i32), i32);
    309     cg_atomic_runtime_call(g, "__atomic_compare_exchange_8", bool_ty, ps, 5,
    310                            args);
    311     {
    312       ApiSValue ok_sv = api_pop(g); /* the returned bool */
    313       ok = ok_sv.op;
    314     }
    315     /* Reload the (possibly updated) expected slot as `prior`. */
    316     pr2 = api_alloc_temp_local(g, val_ty);
    317     prior = api_op_local(pr2, val_ty);
    318     g->target->load(g->target, prior, exp_slot,
    319                     api_mem_for_lvalue(g, &exp_slot, val_ty));
    320     /* `ptr` and `desired` were pushed as call args and are consumed by the
    321      * call; only `expected` (spilled to a slot, not pushed) is still owned. */
    322     api_release(g, &expected);
    323     api_push(g, api_make_sv(prior, val_ty));
    324     api_push(g, api_make_sv(ok, bool_ty));
    325     return;
    326   }
    327   addr = api_force_local(g, &ptr, pty);
    328   exp_op = api_sv_op_is_local_or_imm(&expected)
    329                ? expected.op
    330                : api_force_local(g, &expected, val_ty);
    331   des_op = api_sv_op_is_local_or_imm(&desired)
    332                ? desired.op
    333                : api_force_local(g, &desired, val_ty);
    334   bool_ty = builtin_id(KIT_CG_BUILTIN_BOOL);
    335   pr = api_alloc_temp_local(g, val_ty);
    336   kr = api_alloc_temp_local(g, bool_ty);
    337   prior = api_op_local(pr, val_ty);
    338   ok = api_op_local(kr, bool_ty);
    339   g->target->atomic_cas(g->target, prior, ok, addr, exp_op, des_op,
    340                         api_mem_for_atomic(g, val_ty), success, failure);
    341   api_release(g, &desired);
    342   api_release(g, &expected);
    343   api_release(g, &ptr);
    344   api_push(g, api_make_sv(prior, val_ty));
    345   api_push(g, api_make_sv(ok, bool_ty));
    346 }
    347 
    348 void kit_cg_atomic_fence(KitCg* g, KitCgMemOrder order) {
    349   if (!g) return;
    350   api_local_const_memory_boundary(g);
    351   g->target->fence(g->target, order);
    352 }
    353 
    354 /* ============================================================
    355  * Inline asm (stub)
    356  * ============================================================ */