atomic.c (14262B)
1 #include "arch/arch.h" 2 #include "cg/internal.h" 3 4 MemAccess api_mem_for_atomic(KitCg* g, KitCgTypeId val_ty) { 5 MemAccess ma; 6 api_require_scalar_mem_type(g, "atomic memory access", val_ty); 7 if (api_mem_type_size(g, val_ty, "atomic memory access") > 8 CG_MAX_ATOMIC_SIZE) { 9 compiler_panic(g->c, g->cur_loc, 10 "KitCg: atomic memory access size exceeds 8 bytes"); 11 } 12 memset(&ma, 0, sizeof ma); 13 ma.type = val_ty; 14 ma.size = val_ty ? abi_cg_sizeof(g->c->abi, val_ty) : 0; 15 ma.align = val_ty ? abi_cg_alignof(g->c->abi, val_ty) : 0; 16 ma.flags = MF_ATOMIC; 17 ma.alias.kind = (u8)ALIAS_UNKNOWN; 18 return ma; 19 } 20 21 /* Native (lock-free) atomic ceiling for the target, read from the arch backend 22 * descriptor (ArchImpl.atomic_lock_free_max). Most targets — aa64, x64, rv64, 23 * wasm32 — lower 8-byte (i64-width) atomics lock-free. rv32 reports 4: it has 24 * no native 64-bit atomic instructions (lr.d/sc.d/amo*.d are RV64-only), so 25 * 8-byte atomics there must go through the libatomic spinlock shim. (wasm32 has 26 * 4-byte pointers but still reports 8 — this is a per-arch capability, not a 27 * pointer-width test.) 28 * 29 * NOTE: this predicate is the single source of truth shared with the C 30 * front-end's __atomic_always_lock_free / __atomic_is_lock_free builtins (they 31 * route through kit_cg_atomic_is_lock_free below). Keeping it here guarantees 32 * that when kit cc compiles rt/lib/atomic/atomic_freestanding.c FOR rv32, the 33 * shim's IS_LOCK_FREE_8 test (__atomic_always_lock_free(8, p)) evaluates false, 34 * so the shim takes the spinlock path instead of recursing into an illegal 35 * native 8-byte atomic. */ 36 static u32 cg_atomic_lock_free_max(KitCompiler* c) { 37 const ArchImpl* a = arch_for_compiler(c); 38 return a ? a->atomic_lock_free_max : CG_MAX_ATOMIC_SIZE; 39 } 40 41 int kit_cg_atomic_is_legal(KitCompiler* c, KitCgMemAccess access, 42 KitCgMemOrder order) { 43 KitCgTypeId ty = resolve_type(c, access.type); 44 (void)order; 45 if (!ty) return 0; 46 if (cg_type_is_aggregate(c, ty) || cg_type_is_void(c, ty)) return 0; 47 /* Still legal up to 8 bytes everywhere: the libcall path makes 8-byte atomics 48 * available even when they are not lock-free. */ 49 return abi_cg_sizeof(c->abi, access.type) <= CG_MAX_ATOMIC_SIZE; 50 } 51 52 int kit_cg_atomic_is_lock_free(KitCompiler* c, KitCgMemAccess access) { 53 KitCgTypeId ty = resolve_type(c, access.type); 54 if (!ty) return 0; 55 if (cg_type_is_aggregate(c, ty) || cg_type_is_void(c, ty)) return 0; 56 /* Lock-free up to the native atomic width, NOT the pointer width: wasm32 has 57 * 4-byte pointers but lowers 8-byte (i64) atomics lock-free, while rv32 does 58 * not have native 64-bit atomics. */ 59 return abi_cg_sizeof(c->abi, access.type) <= cg_atomic_lock_free_max(c); 60 } 61 62 /* True when an atomic access of `val_ty` must be lowered to a libatomic 63 * (__atomic_*_8) libcall instead of a native instruction sequence. Today this 64 * is exactly the 8-byte-on-a-4-byte-target case (rv32). */ 65 static int cg_atomic_needs_libcall(KitCg* g, KitCgTypeId val_ty) { 66 return abi_cg_sizeof(g->c->abi, val_ty) == 8 && 67 cg_atomic_lock_free_max(g->c) < 8u; 68 } 69 70 /* Map a KitCgAtomicOp to the libatomic __atomic_fetch_<op>_8 / __atomic_*_8 71 * entry point. XCHG maps to __atomic_exchange_8. */ 72 static const char* cg_atomic_rmw_libcall_8(KitCgAtomicOp op) { 73 switch (op) { 74 case KIT_CG_ATOMIC_XCHG: 75 return "__atomic_exchange_8"; 76 case KIT_CG_ATOMIC_ADD: 77 return "__atomic_fetch_add_8"; 78 case KIT_CG_ATOMIC_SUB: 79 return "__atomic_fetch_sub_8"; 80 case KIT_CG_ATOMIC_AND: 81 return "__atomic_fetch_and_8"; 82 case KIT_CG_ATOMIC_OR: 83 return "__atomic_fetch_or_8"; 84 case KIT_CG_ATOMIC_XOR: 85 return "__atomic_fetch_xor_8"; 86 case KIT_CG_ATOMIC_NAND: 87 return "__atomic_fetch_nand_8"; 88 } 89 return NULL; 90 } 91 92 /* Declare a runtime function symbol with an arbitrary (<=5) param list. Mirrors 93 * api_runtime_helper (wide.c) but without its 3-param ceiling, which the 94 * 5-argument __atomic_compare_exchange_8 needs. */ 95 static KitCgSym cg_atomic_runtime_sym(KitCg* g, const char* name, 96 KitCgTypeId ret, 97 const KitCgTypeId* params, u32 nparams) { 98 KitCgFuncParam ps[5]; 99 KitCgFuncResult result; 100 KitCgFuncSig sig; 101 KitCgDecl decl; 102 if (nparams > 5) return KIT_CG_SYM_NONE; 103 memset(ps, 0, sizeof ps); 104 for (u32 i = 0; i < nparams; ++i) ps[i].type = params[i]; 105 memset(&sig, 0, sizeof sig); 106 memset(&result, 0, sizeof result); 107 result.type = ret; /* ret == KIT_CG_TYPE_NONE -> void result */ 108 sig.result = result; 109 sig.params = ps; 110 sig.nparams = nparams; 111 sig.call_conv = KIT_CG_CC_TARGET_C; 112 memset(&decl, 0, sizeof decl); 113 decl.kind = KIT_CG_DECL_FUNC; 114 decl.linkage_name = kit_cg_c_linkage_name( 115 (KitCompiler*)g->c, 116 pool_intern_slice(g->c->global, slice_from_cstr(name))); 117 decl.display_name = decl.linkage_name; 118 decl.type = kit_cg_type_func((KitCompiler*)g->c, sig); 119 decl.sym.bind = KIT_SB_GLOBAL; 120 decl.sym.visibility = KIT_CG_VIS_DEFAULT; 121 return kit_cg_decl(g, decl); 122 } 123 124 /* Emit a runtime call: push args[0..nparams) then call. The single (optional) 125 * result is left on the value stack, matching api_runtime_call_values. */ 126 static void cg_atomic_runtime_call(KitCg* g, const char* name, KitCgTypeId ret, 127 const KitCgTypeId* params, u32 nparams, 128 ApiSValue* args) { 129 KitCgCallAttrs attrs; 130 KitCgSym sym = cg_atomic_runtime_sym(g, name, ret, params, nparams); 131 memset(&attrs, 0, sizeof attrs); 132 for (u32 i = 0; i < nparams; ++i) api_push(g, args[i]); 133 api_call_symbol_common(g, sym, nparams, attrs); 134 } 135 136 void kit_cg_atomic_load(KitCg* g, KitCgMemAccess access, KitCgMemOrder order) { 137 ApiSValue ptr; 138 KitCgTypeId pty, val_ty; 139 Operand addr, dst; 140 CGLocal rr; 141 if (!g) return; 142 api_local_const_memory_boundary(g); 143 ptr = api_pop(g); 144 pty = api_sv_type(&ptr); 145 val_ty = resolve_type(g->c, access.type); 146 if (!val_ty) val_ty = api_atomic_pointee(g, pty, "KitCg: atomic_load"); 147 api_require_pointer_value(g, "atomic_load pointer", pty); 148 if (cg_atomic_needs_libcall(g, val_ty)) { 149 /* u64 __atomic_load_8(const void* ptr, int memorder) */ 150 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 151 KitCgTypeId ps[2]; 152 ApiSValue args[2]; 153 ps[0] = pty; 154 ps[1] = i32; 155 args[0] = ptr; 156 args[1] = api_make_sv(api_op_imm((i64)order, i32), i32); 157 cg_atomic_runtime_call(g, "__atomic_load_8", val_ty, ps, 2, args); 158 return; 159 } 160 addr = api_force_local(g, &ptr, pty); 161 rr = api_alloc_temp_local(g, val_ty); 162 dst = api_op_local(rr, val_ty); 163 g->target->atomic_load(g->target, dst, addr, api_mem_for_atomic(g, val_ty), 164 order); 165 api_release(g, &ptr); 166 api_push(g, api_make_sv(dst, val_ty)); 167 } 168 169 void kit_cg_atomic_store(KitCg* g, KitCgMemAccess access, KitCgMemOrder order) { 170 ApiSValue val, ptr; 171 KitCgTypeId pty, val_ty; 172 Operand addr, src; 173 if (!g) return; 174 api_local_const_memory_boundary(g); 175 val = api_pop(g); 176 ptr = api_pop(g); 177 pty = api_sv_type(&ptr); 178 val_ty = resolve_type(g->c, access.type); 179 if (!val_ty) val_ty = api_atomic_pointee(g, pty, "KitCg: atomic_store"); 180 api_require_pointer_value(g, "atomic_store pointer", pty); 181 api_validate_memory_value(g, "atomic_store", val_ty, api_sv_type(&val)); 182 if (cg_atomic_needs_libcall(g, val_ty)) { 183 /* void __atomic_store_8(void* ptr, u64 val, int memorder) */ 184 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 185 KitCgTypeId ps[3]; 186 ApiSValue args[3]; 187 ps[0] = pty; 188 ps[1] = val_ty; 189 ps[2] = i32; 190 args[0] = ptr; 191 args[1] = val; 192 args[2] = api_make_sv(api_op_imm((i64)order, i32), i32); 193 cg_atomic_runtime_call(g, "__atomic_store_8", (KitCgTypeId)0, ps, 3, args); 194 return; 195 } 196 addr = api_force_local(g, &ptr, pty); 197 src = api_sv_op_is_local_or_imm(&val) ? val.op 198 : api_force_local(g, &val, val_ty); 199 g->target->atomic_store(g->target, addr, src, api_mem_for_atomic(g, val_ty), 200 order); 201 api_release(g, &val); 202 api_release(g, &ptr); 203 } 204 205 void kit_cg_atomic_rmw(KitCg* g, KitCgMemAccess access, KitCgAtomicOp op, 206 KitCgMemOrder order) { 207 ApiSValue val, ptr; 208 KitCgTypeId pty, val_ty; 209 Operand addr, vop, dst; 210 CGLocal rr; 211 if (!g) return; 212 api_local_const_memory_boundary(g); 213 val = api_pop(g); 214 ptr = api_pop(g); 215 pty = api_sv_type(&ptr); 216 val_ty = resolve_type(g->c, access.type); 217 if (!val_ty) val_ty = api_atomic_pointee(g, pty, "KitCg: atomic_rmw"); 218 api_require_pointer_value(g, "atomic_rmw pointer", pty); 219 api_validate_memory_value(g, "atomic_rmw", val_ty, api_sv_type(&val)); 220 if (cg_atomic_needs_libcall(g, val_ty)) { 221 /* u64 __atomic_{exchange,fetch_*}_8(void* ptr, u64 val, int memorder). 222 * All return the prior value, matching native atomic_rmw semantics. */ 223 const char* name = cg_atomic_rmw_libcall_8(op); 224 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 225 KitCgTypeId ps[3]; 226 ApiSValue args[3]; 227 if (!name) { 228 compiler_panic(g->c, g->cur_loc, 229 "KitCg: unsupported 8-byte atomic rmw op"); 230 return; 231 } 232 ps[0] = pty; 233 ps[1] = val_ty; 234 ps[2] = i32; 235 args[0] = ptr; 236 args[1] = val; 237 args[2] = api_make_sv(api_op_imm((i64)order, i32), i32); 238 cg_atomic_runtime_call(g, name, val_ty, ps, 3, args); 239 return; 240 } 241 addr = api_force_local(g, &ptr, pty); 242 vop = api_sv_op_is_local_or_imm(&val) ? val.op 243 : api_force_local(g, &val, val_ty); 244 rr = api_alloc_temp_local(g, val_ty); 245 dst = api_op_local(rr, val_ty); 246 g->target->atomic_rmw(g->target, op, dst, addr, vop, 247 api_mem_for_atomic(g, val_ty), order); 248 api_release(g, &val); 249 api_release(g, &ptr); 250 api_push(g, api_make_sv(dst, val_ty)); 251 } 252 253 void kit_cg_atomic_cmpxchg(KitCg* g, KitCgMemAccess access, 254 KitCgMemOrder success, KitCgMemOrder failure, 255 int weak) { 256 ApiSValue desired, expected, ptr; 257 KitCgTypeId pty, val_ty, bool_ty; 258 Operand addr, exp_op, des_op, prior, ok; 259 CGLocal pr, kr; 260 if (!g) return; 261 api_local_const_memory_boundary(g); 262 (void)weak; 263 desired = api_pop(g); 264 expected = api_pop(g); 265 ptr = api_pop(g); 266 pty = api_sv_type(&ptr); 267 val_ty = resolve_type(g->c, access.type); 268 if (!val_ty) val_ty = api_atomic_pointee(g, pty, "KitCg: atomic_cmpxchg"); 269 api_require_pointer_value(g, "atomic_cmpxchg pointer", pty); 270 api_validate_memory_value(g, "atomic_cmpxchg expected", val_ty, 271 api_sv_type(&expected)); 272 api_validate_memory_value(g, "atomic_cmpxchg desired", val_ty, 273 api_sv_type(&desired)); 274 if (cg_atomic_needs_libcall(g, val_ty)) { 275 /* bool __atomic_compare_exchange_8(void* ptr, void* expected, u64 desired, 276 * int succ, int fail). 277 * libatomic takes `expected` by pointer and updates *expected with the 278 * observed value on failure. Our ABI is value-in / value-out, so spill the 279 * expected value to a stack slot, pass its address, then reload the slot to 280 * obtain `prior`. */ 281 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 282 KitCgTypeId ptr_to_val = cg_type_ptr_to(g->c, val_ty); 283 KitCgTypeId ps[5]; 284 ApiSValue args[5]; 285 Operand exp_slot, exp_addr, exp_src; 286 CGLocal er, ar, pr2; 287 bool_ty = builtin_id(KIT_CG_BUILTIN_BOOL); 288 /* Materialize the expected value into an addressable stack slot. */ 289 er = api_alloc_temp_local(g, val_ty); 290 exp_slot = api_op_local(er, val_ty); 291 exp_src = api_sv_op_is_local_or_imm(&expected) 292 ? expected.op 293 : api_force_local(g, &expected, val_ty); 294 g->target->store(g->target, exp_slot, exp_src, 295 api_mem_for_lvalue(g, &exp_slot, val_ty)); 296 ar = api_alloc_temp_local(g, ptr_to_val); 297 exp_addr = api_op_local(ar, ptr_to_val); 298 g->target->addr_of(g->target, exp_addr, exp_slot); 299 ps[0] = pty; 300 ps[1] = ptr_to_val; 301 ps[2] = val_ty; 302 ps[3] = i32; 303 ps[4] = i32; 304 args[0] = ptr; 305 args[1] = api_make_sv(exp_addr, ptr_to_val); 306 args[2] = desired; 307 args[3] = api_make_sv(api_op_imm((i64)success, i32), i32); 308 args[4] = api_make_sv(api_op_imm((i64)failure, i32), i32); 309 cg_atomic_runtime_call(g, "__atomic_compare_exchange_8", bool_ty, ps, 5, 310 args); 311 { 312 ApiSValue ok_sv = api_pop(g); /* the returned bool */ 313 ok = ok_sv.op; 314 } 315 /* Reload the (possibly updated) expected slot as `prior`. */ 316 pr2 = api_alloc_temp_local(g, val_ty); 317 prior = api_op_local(pr2, val_ty); 318 g->target->load(g->target, prior, exp_slot, 319 api_mem_for_lvalue(g, &exp_slot, val_ty)); 320 /* `ptr` and `desired` were pushed as call args and are consumed by the 321 * call; only `expected` (spilled to a slot, not pushed) is still owned. */ 322 api_release(g, &expected); 323 api_push(g, api_make_sv(prior, val_ty)); 324 api_push(g, api_make_sv(ok, bool_ty)); 325 return; 326 } 327 addr = api_force_local(g, &ptr, pty); 328 exp_op = api_sv_op_is_local_or_imm(&expected) 329 ? expected.op 330 : api_force_local(g, &expected, val_ty); 331 des_op = api_sv_op_is_local_or_imm(&desired) 332 ? desired.op 333 : api_force_local(g, &desired, val_ty); 334 bool_ty = builtin_id(KIT_CG_BUILTIN_BOOL); 335 pr = api_alloc_temp_local(g, val_ty); 336 kr = api_alloc_temp_local(g, bool_ty); 337 prior = api_op_local(pr, val_ty); 338 ok = api_op_local(kr, bool_ty); 339 g->target->atomic_cas(g->target, prior, ok, addr, exp_op, des_op, 340 api_mem_for_atomic(g, val_ty), success, failure); 341 api_release(g, &desired); 342 api_release(g, &expected); 343 api_release(g, &ptr); 344 api_push(g, api_make_sv(prior, val_ty)); 345 api_push(g, api_make_sv(ok, bool_ty)); 346 } 347 348 void kit_cg_atomic_fence(KitCg* g, KitCgMemOrder order) { 349 if (!g) return; 350 api_local_const_memory_boundary(g); 351 g->target->fence(g->target, order); 352 } 353 354 /* ============================================================ 355 * Inline asm (stub) 356 * ============================================================ */