native_direct_target.c (74862B)
1 #include "cg/native_direct_target.h" 2 3 /* NativeDirectTarget is intentionally single-pass: semantic CG calls are 4 * lowered immediately to NativeTarget operations, MCEmitter owns label fixups, 5 * and function-end calls note_frame_state()/patch_apply() let the native 6 * backend patch deferred frame/prologue details after max outgoing space is 7 * known. Direct lowering currently forwards final frame state but does not 8 * author generic NativePatch records itself. 9 * 10 * Remaining direct/backend cutover work: stack arguments, tail/musttail, 11 * varargs, typed inline-asm register/memory bindings and outputs, 12 * label-address data and computed gotos, records/sret/large aggregates, FP and 13 * rounding conversions, fuller scalar intrinsics, and production-grade atomic 14 * RMW/CAS lowering. */ 15 16 #include <string.h> 17 18 #include "abi/abi.h" 19 #include "cg/native_asm.h" 20 #include "cg/type.h" 21 #include "core/arena.h" 22 #include "core/pool.h" 23 #include "core/slice.h" 24 25 #define NATIVE_DIRECT_MAGIC 0x4e445447u 26 27 static NativeDirectTarget* nd_of(CgTarget* t) { return (NativeDirectTarget*)t; } 28 29 static _Noreturn void nd_panic(NativeDirectTarget* d, const char* what) { 30 compiler_panic(d->base.c, d->loc, "native direct target: %s", what); 31 } 32 33 static void* nd_arena(NativeDirectTarget* d, size_t size, size_t align) { 34 void* p = arena_zalloc(d->base.c->tu, size, align); 35 if (!p) nd_panic(d, "out of memory"); 36 return p; 37 } 38 39 /* Pick a transient NativeLoc array for one op: the on-struct buffer when N 40 * fits, else a one-shot arena allocation. N == 0 yields NULL. */ 41 static NativeLoc* nd_loc_buf(NativeDirectTarget* d, NativeLoc* buf, u32 cap, 42 u32 n) { 43 if (!n) return NULL; 44 if (n <= cap) return buf; 45 return nd_arena(d, sizeof(NativeLoc) * n, _Alignof(NativeLoc)); 46 } 47 48 static void nd_grow_locals(NativeDirectTarget* d, u32 want) { 49 NativeDirectLocal* next; 50 u32 cap; 51 if (d->locals_cap >= want) return; 52 cap = d->locals_cap ? d->locals_cap : 32u; 53 while (cap < want) cap *= 2u; 54 next = nd_arena(d, sizeof(*next) * cap, _Alignof(NativeDirectLocal)); 55 if (d->locals) memcpy(next, d->locals, sizeof(*next) * d->nlocals); 56 d->locals = next; 57 d->locals_cap = cap; 58 } 59 60 static void nd_grow_labels(NativeDirectTarget* d, u32 want) { 61 MCLabel* next; 62 u32 cap; 63 if (d->labels_cap >= want) return; 64 cap = d->labels_cap ? d->labels_cap : 32u; 65 while (cap < want) cap *= 2u; 66 next = nd_arena(d, sizeof(*next) * cap, _Alignof(MCLabel)); 67 if (d->labels) memcpy(next, d->labels, sizeof(*next) * d->labels_cap); 68 d->labels = next; 69 d->labels_cap = cap; 70 } 71 72 static void nd_grow_scopes(NativeDirectTarget* d, u32 want) { 73 NativeDirectScope* next; 74 u32 cap; 75 if (d->scopes_cap >= want) return; 76 cap = d->scopes_cap ? d->scopes_cap : 16u; 77 while (cap < want) cap *= 2u; 78 next = nd_arena(d, sizeof(*next) * cap, _Alignof(NativeDirectScope)); 79 if (d->scopes) memcpy(next, d->scopes, sizeof(*next) * d->nscopes); 80 d->scopes = next; 81 d->scopes_cap = cap; 82 } 83 84 static NativeDirectLocal* nd_local(NativeDirectTarget* d, CGLocal local) { 85 if (local == CG_LOCAL_NONE || local > d->nlocals) 86 nd_panic(d, "bad semantic local"); 87 return &d->locals[local - 1u]; 88 } 89 90 static NativeAllocClass nd_class_for_type(NativeDirectTarget* d, 91 KitCgTypeId type) { 92 if (d->native && d->native->class_for_type) 93 return d->native->class_for_type(d->native, type); 94 return NATIVE_REG_INT; 95 } 96 97 static const NativeAllocClassInfo* nd_class_info(NativeDirectTarget* d, 98 NativeAllocClass cls) { 99 const NativeAllocClassInfo* ci = (u32)cls < 3u ? d->class_info[cls] : NULL; 100 if (!ci) nd_panic(d, "target has no requested register class"); 101 return ci; 102 } 103 104 /* Register-location constructor is shared as native_loc_reg in 105 * native_target.h (arg order: type, cls, reg). */ 106 107 static void nd_flush_local(NativeDirectTarget* d, CGLocal local); 108 static Reg nd_cache_reg_for(NativeDirectTarget* d, CGLocal local, 109 KitCgTypeId access_type); 110 static Reg nd_pick_cache_victim(NativeDirectTarget* d, NativeAllocClass cls); 111 112 static u32 nd_callee_saved_mask(NativeDirectTarget* d, NativeAllocClass cls) { 113 return native_target_callee_saved_mask(d->native, cls); 114 } 115 116 static u32 nd_caller_saved_mask(NativeDirectTarget* d, NativeAllocClass cls) { 117 return native_target_caller_saved_mask(d->native, cls); 118 } 119 120 static void nd_note_reg_used(NativeDirectTarget* d, NativeAllocClass cls, 121 Reg reg) { 122 if ((u32)cls >= 3u || reg >= 32u) return; 123 if (nd_callee_saved_mask(d, cls) & (1u << reg)) 124 d->callee_saved_used[cls] |= 1u << reg; 125 } 126 127 static Reg nd_scratch_acquire(NativeDirectTarget* d, NativeAllocClass cls) { 128 const NativeAllocClassInfo* ci = nd_class_info(d, cls); 129 const Reg* regs = ci->scratch; 130 u32 nregs = ci->nscratch; 131 /* Prefer a register that is neither pinned (scratch_used) nor caching a live 132 * local (reg_owner). */ 133 for (u32 pass = 0; pass < 2u; ++pass) { 134 for (u32 i = 0; i < nregs; ++i) { 135 Reg r = regs[i]; 136 if (r >= 32u) continue; 137 if ((d->scratch_used[cls] & (1u << r)) == 0 && 138 d->reg_owner[cls][r] == CG_LOCAL_NONE) { 139 d->scratch_used[cls] |= 1u << r; 140 nd_note_reg_used(d, cls, r); 141 return r; 142 } 143 } 144 regs = ci->allocable; 145 nregs = ci->nallocable; 146 } 147 /* Under pressure, evict the LRU non-pinned cached local (spilling it to its 148 * home) and reuse its register as a scratch temporary. */ 149 { 150 Reg r = nd_pick_cache_victim(d, cls); 151 if (r != REG_NONE) { 152 nd_flush_local(d, d->reg_owner[cls][r]); 153 d->scratch_used[cls] |= 1u << r; 154 nd_note_reg_used(d, cls, r); 155 return r; 156 } 157 } 158 nd_panic(d, "out of scratch registers"); 159 } 160 161 static void nd_scratch_release(NativeDirectTarget* d, NativeAllocClass cls, 162 Reg reg) { 163 if (reg < 32u) d->scratch_used[cls] &= ~(1u << reg); 164 } 165 166 static NativeFrameSlot nd_alloc_frame_slot(NativeDirectTarget* d, 167 const NativeFrameSlotDesc* desc) { 168 NativeFrameSlot slot = NATIVE_FRAME_SLOT_NONE; 169 if (d->native && d->native->frame_slot) 170 slot = d->native->frame_slot(d->native, desc); 171 else 172 nd_panic(d, "target does not allocate frame slots"); 173 if (slot == NATIVE_FRAME_SLOT_NONE) 174 nd_panic(d, "frame slot allocation failed"); 175 return slot; 176 } 177 178 static NativeFrameSlotDesc nd_slot_desc_local(const CGLocalDesc* in) { 179 NativeFrameSlotDesc out; 180 memset(&out, 0, sizeof out); 181 out.type = in->type; 182 out.name = in->name; 183 out.loc = in->loc; 184 out.size = in->size; 185 out.align = in->align; 186 out.kind = NATIVE_FRAME_SLOT_LOCAL; 187 if (in->flags & CG_LOCAL_ADDR_TAKEN) 188 out.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN; 189 if (in->flags & CG_LOCAL_MEMORY_REQUIRED) 190 out.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED; 191 return out; 192 } 193 194 static NativeFrameSlotDesc nd_slot_desc_param(const CGParamDesc* in) { 195 NativeFrameSlotDesc out; 196 memset(&out, 0, sizeof out); 197 out.type = in->type; 198 out.name = in->name; 199 out.loc = in->loc; 200 out.size = in->size; 201 out.align = in->align; 202 out.kind = NATIVE_FRAME_SLOT_PARAM; 203 if (in->flags & CG_LOCAL_ADDR_TAKEN) 204 out.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN; 205 if (in->flags & CG_LOCAL_MEMORY_REQUIRED) 206 out.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED; 207 return out; 208 } 209 210 static CGLocal nd_alloc_local(NativeDirectTarget* d, const CGLocalDesc* desc) { 211 NativeDirectLocal* l; 212 NativeFrameSlotDesc fsd; 213 CGLocal id; 214 nd_grow_locals(d, d->nlocals + 1u); 215 id = d->nlocals + 1u; 216 l = &d->locals[d->nlocals++]; 217 memset(l, 0, sizeof *l); 218 l->type = desc->type; 219 l->size = desc->size; 220 l->align = desc->align; 221 l->flags = desc->flags; 222 l->reg = REG_NONE; 223 l->address_taken = (desc->flags & CG_LOCAL_ADDR_TAKEN) != 0; 224 l->memory_required = (desc->flags & CG_LOCAL_MEMORY_REQUIRED) != 0; 225 l->cls = (u8)nd_class_for_type(d, desc->type); 226 fsd = nd_slot_desc_local(desc); 227 l->home = nd_alloc_frame_slot(d, &fsd); 228 return id; 229 } 230 231 static MCLabel nd_mc_label(NativeDirectTarget* d, Label label) { 232 if (label == LABEL_NONE || label > d->nlabels || !d->labels[label]) 233 nd_panic(d, "bad label"); 234 return d->labels[label]; 235 } 236 237 static Label nd_label_new_raw(NativeDirectTarget* d) { 238 Label id; 239 if (!d->native || !d->native->label_new) 240 nd_panic(d, "target does not allocate labels"); 241 id = d->nlabels + 1u; 242 nd_grow_labels(d, id + 1u); 243 d->labels[id] = d->native->label_new(d->native); 244 d->nlabels = id; 245 return id; 246 } 247 248 static NativeLoc nd_loc_frame(NativeDirectTarget* d, CGLocal local, 249 KitCgTypeId type) { 250 NativeDirectLocal* l = nd_local(d, local); 251 NativeLoc out; 252 memset(&out, 0, sizeof out); 253 out.kind = NATIVE_LOC_FRAME; 254 out.cls = l->cls; 255 out.type = type ? type : l->type; 256 out.v.frame = l->home; 257 return out; 258 } 259 260 static NativeLoc nd_loc_imm(i64 imm, KitCgTypeId type) { 261 NativeLoc out; 262 memset(&out, 0, sizeof out); 263 out.kind = NATIVE_LOC_IMM; 264 out.type = type; 265 out.v.imm = imm; 266 return out; 267 } 268 269 static NativeLoc nd_loc_global(ObjSymId sym, i64 addend, KitCgTypeId type) { 270 NativeLoc out; 271 memset(&out, 0, sizeof out); 272 out.kind = NATIVE_LOC_GLOBAL; 273 out.type = type; 274 out.v.global.sym = sym; 275 out.v.global.addend = addend; 276 return out; 277 } 278 279 static NativeLoc nd_loc_operand(NativeDirectTarget* d, Operand op) { 280 switch ((OpKind)op.kind) { 281 case OPK_IMM: 282 return nd_loc_imm(op.v.imm, op.type); 283 case OPK_LOCAL: 284 return nd_loc_frame(d, op.v.local, op.type); 285 case OPK_GLOBAL: 286 return nd_loc_global(op.v.global.sym, op.v.global.addend, op.type); 287 case OPK_INDIRECT: { 288 NativeDirectLocal* bl = nd_local(d, op.v.ind.base); 289 NativeLoc out; 290 memset(&out, 0, sizeof out); 291 out.kind = NATIVE_LOC_ADDR; 292 out.type = op.type; 293 out.v.addr.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE; 294 out.v.addr.base.frame = bl->home; 295 out.v.addr.cls = bl->cls; 296 out.v.addr.base_type = bl->type; 297 if (op.v.ind.index != CG_LOCAL_NONE) { 298 NativeDirectLocal* il = nd_local(d, op.v.ind.index); 299 out.v.addr.index_kind = NATIVE_ADDR_INDEX_FRAME_VALUE; 300 out.v.addr.index.frame = il->home; 301 out.v.addr.index_cls = il->cls; 302 out.v.addr.index_type = il->type; 303 } 304 out.v.addr.log2_scale = op.v.ind.log2_scale; 305 out.v.addr.offset = op.v.ind.ofs; 306 return out; 307 } 308 default: 309 nd_panic(d, "bad operand kind"); 310 } 311 } 312 313 static NativeAddr nd_addr_storage(NativeDirectTarget* d, Operand op) { 314 NativeAddr out; 315 memset(&out, 0, sizeof out); 316 switch ((OpKind)op.kind) { 317 case OPK_LOCAL: { 318 /* The local's home is addressed directly (a memory access reads/writes 319 * the frame slot itself, e.g. by-value aggregate field extraction). This 320 * is not pointer aliasing, but it does read the home, so a cached value 321 * must be made current: spill if dirty and drop the entry. */ 322 NativeDirectLocal* l; 323 nd_flush_local(d, op.v.local); 324 l = nd_local(d, op.v.local); 325 out.base_kind = NATIVE_ADDR_BASE_FRAME; 326 out.base.frame = l->home; 327 out.cls = l->cls; 328 out.base_type = l->type; 329 return out; 330 } 331 case OPK_GLOBAL: 332 out.base_kind = NATIVE_ADDR_BASE_GLOBAL; 333 out.base.global.sym = op.v.global.sym; 334 out.base.global.addend = op.v.global.addend; 335 out.base_type = op.type; 336 return out; 337 case OPK_INDIRECT: { 338 NativeDirectLocal* bl = nd_local(d, op.v.ind.base); 339 Reg br = nd_cache_reg_for(d, op.v.ind.base, bl->type); 340 out.cls = bl->cls; 341 out.base_type = bl->type; 342 if (br != REG_NONE) { 343 out.base_kind = NATIVE_ADDR_BASE_REG; 344 out.base.reg = br; 345 d->scratch_used[bl->cls] |= 1u 346 << br; /* pin; unpinned at temps release */ 347 } else { 348 out.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE; 349 out.base.frame = bl->home; 350 } 351 if (op.v.ind.index != CG_LOCAL_NONE) { 352 NativeDirectLocal* il = nd_local(d, op.v.ind.index); 353 Reg ir = nd_cache_reg_for(d, op.v.ind.index, il->type); 354 out.index_cls = il->cls; 355 out.index_type = il->type; 356 if (ir != REG_NONE) { 357 out.index_kind = NATIVE_ADDR_INDEX_REG; 358 out.index.reg = ir; 359 d->scratch_used[il->cls] |= 1u << ir; 360 } else { 361 out.index_kind = NATIVE_ADDR_INDEX_FRAME_VALUE; 362 out.index.frame = il->home; 363 } 364 } 365 out.log2_scale = op.v.ind.log2_scale; 366 out.offset = op.v.ind.ofs; 367 return out; 368 } 369 default: 370 nd_panic(d, "operand is not addressable storage"); 371 } 372 } 373 374 static NativeAddr nd_addr_pointer(NativeDirectTarget* d, Operand op) { 375 NativeAddr out; 376 memset(&out, 0, sizeof out); 377 switch ((OpKind)op.kind) { 378 case OPK_LOCAL: { 379 NativeDirectLocal* l = nd_local(d, op.v.local); 380 out.cls = l->cls; 381 out.base_type = l->type; 382 if (cg_type_is_ptr(d->base.c, op.type)) { 383 /* Pointer value lives in the local: use its live register if cached 384 * (a dirty cached pointer is a valid base), else load from the home. */ 385 Reg r = nd_cache_reg_for(d, op.v.local, l->type); 386 if (r != REG_NONE) { 387 out.base_kind = NATIVE_ADDR_BASE_REG; 388 out.base.reg = r; 389 d->scratch_used[l->cls] |= 1u << r; 390 } else { 391 out.base_kind = NATIVE_ADDR_BASE_FRAME_VALUE; 392 out.base.frame = l->home; 393 } 394 } else { 395 /* The local's home is addressed directly; make it current first (see 396 * nd_addr_storage OPK_LOCAL). */ 397 nd_flush_local(d, op.v.local); 398 out.base_kind = NATIVE_ADDR_BASE_FRAME; 399 out.base.frame = l->home; 400 } 401 return out; 402 } 403 case OPK_GLOBAL: 404 out.base_kind = NATIVE_ADDR_BASE_GLOBAL; 405 out.base.global.sym = op.v.global.sym; 406 out.base.global.addend = op.v.global.addend; 407 out.base_type = op.type; 408 return out; 409 case OPK_INDIRECT: 410 return nd_addr_storage(d, op); 411 default: 412 nd_panic(d, "operand is not a pointer address"); 413 } 414 } 415 416 #define ND_REQUIRE_NATIVE(d, member, name) \ 417 do { \ 418 if (!(d)->native || !(d)->native->member) nd_panic((d), (name)); \ 419 } while (0) 420 421 typedef struct NdAddrTemps { 422 Reg base; 423 Reg index; 424 NativeAllocClass base_cls; 425 NativeAllocClass index_cls; 426 } NdAddrTemps; 427 428 static void nd_addr_temps_release(NativeDirectTarget* d, 429 const NdAddrTemps* temps); 430 431 static MemAccess nd_scalar_mem(KitCgTypeId type, u32 size, u32 align) { 432 MemAccess mem; 433 memset(&mem, 0, sizeof mem); 434 mem.type = type; 435 mem.size = size; 436 mem.align = align; 437 return mem; 438 } 439 440 static MemAccess nd_type_mem(NativeDirectTarget* d, KitCgTypeId type) { 441 u64 size; 442 if (!type) type = builtin_id(KIT_CG_BUILTIN_I64); 443 size = cg_type_size(d->base.c, type); 444 if (size > 0xffffffffu) nd_panic(d, "scalar type is too large"); 445 return nd_scalar_mem(type, (u32)size, cg_type_align(d->base.c, type)); 446 } 447 448 static void nd_barrier(NativeDirectTarget* d, u32 flags) { 449 if (d->ops && d->ops->barrier) d->ops->barrier(d, flags); 450 } 451 452 static void nd_load_frame_to_reg(NativeDirectTarget* d, NativeLoc dst, 453 NativeFrameSlot frame, KitCgTypeId type) { 454 NativeAddr addr; 455 MemAccess mem; 456 memset(&addr, 0, sizeof addr); 457 addr.base_kind = NATIVE_ADDR_BASE_FRAME; 458 addr.base.frame = frame; 459 addr.base_type = type; 460 mem = nd_type_mem(d, type); 461 ND_REQUIRE_NATIVE(d, load, "target does not emit loads"); 462 d->native->load(d->native, dst, addr, mem); 463 } 464 465 static void nd_store_reg_to_frame(NativeDirectTarget* d, NativeFrameSlot frame, 466 KitCgTypeId type, NativeLoc src) { 467 NativeAddr addr; 468 MemAccess mem; 469 memset(&addr, 0, sizeof addr); 470 addr.base_kind = NATIVE_ADDR_BASE_FRAME; 471 addr.base.frame = frame; 472 addr.base_type = type; 473 mem = nd_type_mem(d, type); 474 ND_REQUIRE_NATIVE(d, store, "target does not emit stores"); 475 d->native->store(d->native, addr, src, mem); 476 } 477 478 static void nd_copy_to_reg(NativeDirectTarget* d, NativeLoc dst, NativeLoc src); 479 static void nd_release_materialized(NativeDirectTarget* d, NativeLoc loc); 480 static void nd_store_operand_from_reg(NativeDirectTarget* d, Operand dst, 481 NativeLoc src); 482 483 /* --- Local register cache (write-back, basic-block-scoped) ---------------- * 484 * Only scalar, non-address-taken locals are cached, and only in caller-saved 485 * allocable registers. Entries are created solely by pure-compute destinations 486 * (nd_dst_reg/nd_dst_writeback) and are always dirty; reads hit a live entry or 487 * fall back to a frame load without creating one. nd_flush_all spills and 488 * empties the cache at the top of every non-pure-compute op, so the cache only 489 * survives across straight-line runs of compute ops. Caching prefers the 490 * register-file caller-saved mask; if the live OS ABI treats one of those 491 * registers as callee-saved, nd_note_reg_used reports it to the backend before 492 * the deferred prologue is patched. */ 493 494 static int nd_local_cacheable(NativeDirectTarget* d, 495 const NativeDirectLocal* l) { 496 return !l->address_taken && !l->memory_required && l->size != 0 && 497 l->size <= (u32)d->base.c->target.ptr_size; 498 } 499 500 /* If LOCAL is currently cached and the access reads it at its cached (storage) 501 * width, return its live register; else REG_NONE. Used by the address builders 502 * to point an address at a base/index local's live register instead of reading 503 * a possibly-stale frame home. Base/index reads are always of the local's own 504 * type, so the width check is trivially met for that use; the value-read width 505 * hazard is handled separately in nd_materialize_operand. */ 506 /* Stamp a cache touch (def/read/addressing use) for LRU victim selection. */ 507 static void nd_touch_local(NativeDirectTarget* d, NativeDirectLocal* l) { 508 l->last_use = ++d->use_tick; 509 } 510 511 static Reg nd_cache_reg_for(NativeDirectTarget* d, CGLocal local, 512 KitCgTypeId access_type) { 513 NativeDirectLocal* l = nd_local(d, local); 514 if (l->reg == REG_NONE) return REG_NONE; 515 if (!nd_local_cacheable(d, l)) return REG_NONE; 516 if (access_type && access_type != l->type) return REG_NONE; 517 nd_touch_local(d, l); 518 return l->reg; 519 } 520 521 /* Pick the least-recently-used non-pinned cached local in CLS as a spill victim 522 * (its register can then be reused). REG_NONE if every owned reg is pinned. 523 * Pressure is real in Design B (the cache survives across memory ops), so an 524 * arbitrary victim would thrash a hot local; LRU keeps the live set resident. 525 */ 526 static Reg nd_pick_cache_victim(NativeDirectTarget* d, NativeAllocClass cls) { 527 const NativeAllocClassInfo* ci = nd_class_info(d, cls); 528 Reg best = REG_NONE; 529 u32 best_use = 0; 530 for (u32 i = 0; i < ci->nallocable; ++i) { 531 Reg r = ci->allocable[i]; 532 CGLocal owner; 533 if (r >= 32u) continue; 534 owner = d->reg_owner[cls][r]; 535 if (owner == CG_LOCAL_NONE) continue; 536 if (d->scratch_used[cls] & (1u << r)) continue; /* pinned: never a victim */ 537 if (best == REG_NONE || nd_local(d, owner)->last_use < best_use) { 538 best = r; 539 best_use = nd_local(d, owner)->last_use; 540 } 541 } 542 return best; 543 } 544 545 /* Pick a caller-saved allocable register to cache a local in: a free one, else 546 * evict the LRU non-pinned cached local. REG_NONE means use the frame-only 547 * path. */ 548 static Reg nd_cache_alloc(NativeDirectTarget* d, NativeAllocClass cls) { 549 const NativeAllocClassInfo* ci = nd_class_info(d, cls); 550 u32 caller = nd_caller_saved_mask(d, cls); 551 Reg victim; 552 for (u32 i = 0; i < ci->nallocable; ++i) { 553 Reg r = ci->allocable[i]; 554 if (r >= 32u) continue; 555 if ((caller & (1u << r)) && d->reg_owner[cls][r] == CG_LOCAL_NONE && 556 (d->scratch_used[cls] & (1u << r)) == 0) { 557 nd_note_reg_used(d, cls, r); 558 return r; 559 } 560 } 561 victim = nd_pick_cache_victim(d, cls); 562 if (victim != REG_NONE && (caller & (1u << victim))) { 563 nd_flush_local(d, d->reg_owner[cls][victim]); 564 nd_note_reg_used(d, cls, victim); 565 return victim; 566 } 567 return REG_NONE; 568 } 569 570 /* Append LOCAL to the tail of the cached-locals list (O(1)). Called only on the 571 * REG_NONE -> reg transition in nd_dst_reg. */ 572 static void nd_cache_link(NativeDirectTarget* d, CGLocal local) { 573 i32 idx = (i32)(local - 1u); 574 i32 prev = d->cache_tail; 575 d->locals[idx].cache_next = -1; 576 d->locals[idx].cache_prev = prev; 577 if (prev >= 0) 578 d->locals[prev].cache_next = idx; 579 else 580 d->cache_head = idx; 581 d->cache_tail = idx; 582 d->ncached++; 583 } 584 585 /* Remove LOCAL (which must currently be cached) from the cached-locals list. */ 586 static void nd_cache_unlink(NativeDirectTarget* d, CGLocal local) { 587 i32 idx = (i32)(local - 1u); 588 i32 next = d->locals[idx].cache_next; 589 i32 prev = d->locals[idx].cache_prev; 590 if (next >= 0) 591 d->locals[next].cache_prev = prev; 592 else 593 d->cache_tail = prev; 594 if (prev >= 0) 595 d->locals[prev].cache_next = next; 596 else 597 d->cache_head = next; 598 d->ncached--; 599 } 600 601 /* Write a cached local back to its home (if dirty) and drop the entry. Safe to 602 * call on an uncached local. */ 603 static void nd_flush_local(NativeDirectTarget* d, CGLocal local) { 604 NativeDirectLocal* l = nd_local(d, local); 605 if (l->reg == REG_NONE) return; 606 if (l->dirty) 607 nd_store_reg_to_frame( 608 d, l->home, l->type, 609 native_loc_reg(l->type, (NativeAllocClass)l->cls, l->reg)); 610 nd_cache_unlink(d, local); 611 d->reg_owner[l->cls][l->reg] = CG_LOCAL_NONE; 612 l->reg = REG_NONE; 613 l->dirty = 0; 614 } 615 616 /* Drop a cache entry without writing it back, for when a store supersedes the 617 * cached value. */ 618 static void nd_invalidate_local(NativeDirectTarget* d, CGLocal local) { 619 NativeDirectLocal* l = nd_local(d, local); 620 if (l->reg == REG_NONE) return; 621 nd_cache_unlink(d, local); 622 d->reg_owner[l->cls][l->reg] = CG_LOCAL_NONE; 623 l->reg = REG_NONE; 624 l->dirty = 0; 625 } 626 627 /* Spill the whole cache to memory and empty it. The list is sorted ascending, 628 * so this spills in the same order as the former O(nlocals) index scan. */ 629 static void nd_flush_all(NativeDirectTarget* d) { 630 while (d->cache_head >= 0) nd_flush_local(d, (CGLocal)(d->cache_head + 1)); 631 } 632 633 static NativeAddr nd_addr_materialize(NativeDirectTarget* d, NativeAddr in, 634 NdAddrTemps* temps, MemAccess mem) { 635 NativeAddr out = in; 636 memset(temps, 0, sizeof *temps); 637 temps->base = REG_NONE; 638 temps->index = REG_NONE; 639 /* A base/index that arrives already in a register is a pinned live cache reg 640 * (the addr builders are the only producers of REG-kind storage addresses). 641 * Record it so the temps release unpins it afterward — without storing or 642 * invalidating, leaving the cache entry intact. */ 643 if (out.base_kind == NATIVE_ADDR_BASE_REG) { 644 temps->base = out.base.reg; 645 temps->base_cls = (NativeAllocClass)out.cls; 646 } 647 if (out.index_kind == NATIVE_ADDR_INDEX_REG) { 648 temps->index = out.index.reg; 649 temps->index_cls = (NativeAllocClass)out.index_cls; 650 } 651 if (out.base_kind == NATIVE_ADDR_BASE_FRAME_VALUE) { 652 NativeAllocClass cls = (NativeAllocClass)out.cls; 653 Reg r = nd_scratch_acquire(d, cls); 654 NativeLoc dst = native_loc_reg(out.base_type, cls, r); 655 nd_load_frame_to_reg(d, dst, out.base.frame, out.base_type); 656 out.base_kind = NATIVE_ADDR_BASE_REG; 657 out.base.reg = r; 658 temps->base = r; 659 temps->base_cls = cls; 660 } 661 if (out.index_kind == NATIVE_ADDR_INDEX_FRAME_VALUE) { 662 NativeAllocClass cls = (NativeAllocClass)out.index_cls; 663 Reg r = nd_scratch_acquire(d, cls); 664 NativeLoc dst = native_loc_reg(out.index_type, cls, r); 665 nd_load_frame_to_reg(d, dst, out.index.frame, out.index_type); 666 out.index_kind = NATIVE_ADDR_INDEX_REG; 667 out.index.reg = r; 668 temps->index = r; 669 temps->index_cls = cls; 670 } 671 if (d->native && d->native->addr_legal && 672 !d->native->addr_legal(d->native, &out, mem)) { 673 NativeAllocClass cls = NATIVE_REG_INT; 674 Reg r = nd_scratch_acquire(d, cls); 675 NativeLoc dst = native_loc_reg( 676 out.base_type ? out.base_type : builtin_id(KIT_CG_BUILTIN_I64), cls, r); 677 ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); 678 d->native->load_addr(d->native, dst, out); 679 nd_addr_temps_release(d, temps); 680 memset(temps, 0, sizeof *temps); 681 temps->base = r; 682 temps->index = REG_NONE; 683 temps->base_cls = cls; 684 memset(&out, 0, sizeof out); 685 out.base_kind = NATIVE_ADDR_BASE_REG; 686 out.base.reg = r; 687 out.cls = (u8)cls; 688 out.base_type = dst.type; 689 if (d->native && d->native->addr_legal && 690 !d->native->addr_legal(d->native, &out, mem)) 691 nd_panic(d, "native address is not legal"); 692 } 693 return out; 694 } 695 696 static void nd_addr_temps_release(NativeDirectTarget* d, 697 const NdAddrTemps* temps) { 698 if (temps->base != REG_NONE) 699 nd_scratch_release(d, temps->base_cls, temps->base); 700 if (temps->index != REG_NONE) 701 nd_scratch_release(d, temps->index_cls, temps->index); 702 } 703 704 static NativeLoc nd_materialize_loc(NativeDirectTarget* d, NativeLoc src, 705 NativeAllocClass cls, KitCgTypeId type) { 706 Reg r; 707 NativeLoc dst; 708 if (src.kind == NATIVE_LOC_REG) return src; 709 r = nd_scratch_acquire(d, cls); 710 dst = native_loc_reg(type ? type : src.type, cls, r); 711 nd_copy_to_reg(d, dst, src); 712 return dst; 713 } 714 715 static void nd_copy_to_reg(NativeDirectTarget* d, NativeLoc dst, 716 NativeLoc src) { 717 if (dst.kind != NATIVE_LOC_REG) nd_panic(d, "copy destination is not a reg"); 718 switch ((NativeLocKind)src.kind) { 719 case NATIVE_LOC_REG: 720 if (src.v.reg != dst.v.reg || src.cls != dst.cls) { 721 ND_REQUIRE_NATIVE(d, move, "target does not emit register moves"); 722 d->native->move(d->native, dst, src); 723 } 724 break; 725 case NATIVE_LOC_FRAME: 726 nd_load_frame_to_reg(d, dst, src.v.frame, dst.type); 727 break; 728 case NATIVE_LOC_STACK: { 729 NativeAddr addr; 730 MemAccess mem = nd_type_mem(d, dst.type); 731 memset(&addr, 0, sizeof addr); 732 addr.base_kind = NATIVE_ADDR_BASE_FRAME; 733 addr.base.frame = src.v.stack.slot; 734 addr.base_type = dst.type; 735 addr.offset = src.v.stack.offset; 736 ND_REQUIRE_NATIVE(d, load, "target does not emit loads"); 737 d->native->load(d->native, dst, addr, mem); 738 break; 739 } 740 case NATIVE_LOC_IMM: 741 ND_REQUIRE_NATIVE(d, load_imm, "target does not emit immediates"); 742 d->native->load_imm(d->native, dst, src.v.imm); 743 break; 744 case NATIVE_LOC_GLOBAL: { 745 NativeAddr addr; 746 memset(&addr, 0, sizeof addr); 747 addr.base_kind = NATIVE_ADDR_BASE_GLOBAL; 748 addr.base.global.sym = src.v.global.sym; 749 addr.base.global.addend = src.v.global.addend; 750 addr.base_type = dst.type; 751 ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); 752 d->native->load_addr(d->native, dst, addr); 753 break; 754 } 755 case NATIVE_LOC_ADDR: { 756 NdAddrTemps temps; 757 MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size, 758 d->base.c->target.ptr_align); 759 NativeAddr addr = nd_addr_materialize(d, src.v.addr, &temps, mem); 760 ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); 761 d->native->load_addr(d->native, dst, addr); 762 nd_addr_temps_release(d, &temps); 763 break; 764 } 765 default: 766 nd_panic(d, "cannot materialize native location"); 767 } 768 } 769 770 static void nd_write_loc(NativeDirectTarget* d, NativeLoc dst, NativeLoc src, 771 MemAccess mem) { 772 switch ((NativeLocKind)dst.kind) { 773 case NATIVE_LOC_REG: 774 nd_copy_to_reg(d, dst, src); 775 break; 776 case NATIVE_LOC_FRAME: { 777 NativeLoc val = 778 nd_materialize_loc(d, src, (NativeAllocClass)dst.cls, dst.type); 779 nd_store_reg_to_frame(d, dst.v.frame, dst.type, val); 780 nd_release_materialized(d, val); 781 break; 782 } 783 case NATIVE_LOC_STACK: { 784 NativeAddr addr; 785 NativeLoc val = 786 nd_materialize_loc(d, src, (NativeAllocClass)dst.cls, dst.type); 787 memset(&addr, 0, sizeof addr); 788 addr.base_kind = NATIVE_ADDR_BASE_FRAME; 789 addr.base.frame = dst.v.stack.slot; 790 addr.base_type = dst.type; 791 addr.offset = dst.v.stack.offset; 792 ND_REQUIRE_NATIVE(d, store, "target does not emit stores"); 793 d->native->store(d->native, addr, val, mem); 794 nd_release_materialized(d, val); 795 break; 796 } 797 case NATIVE_LOC_ADDR: { 798 NdAddrTemps temps; 799 NativeAddr addr = nd_addr_materialize(d, dst.v.addr, &temps, mem); 800 NativeAllocClass cls = nd_class_for_type(d, src.type); 801 NativeLoc val = nd_materialize_loc(d, src, cls, src.type); 802 ND_REQUIRE_NATIVE(d, store, "target does not emit stores"); 803 d->native->store(d->native, addr, val, mem); 804 nd_release_materialized(d, val); 805 nd_addr_temps_release(d, &temps); 806 break; 807 } 808 default: 809 nd_panic(d, "unsupported write destination"); 810 } 811 } 812 813 static void nd_release_materialized(NativeDirectTarget* d, NativeLoc loc) { 814 if (loc.kind == NATIVE_LOC_REG) 815 nd_scratch_release(d, (NativeAllocClass)loc.cls, loc.v.reg); 816 } 817 818 /* Spill cached locals that back an INDIRECT operand's address before it is read 819 * from their frame homes. Compute ops normally receive only LOCAL/IMM/GLOBAL 820 * operands; this keeps the rare INDIRECT case correct without flushing all. */ 821 static void nd_flush_operand_addr_locals(NativeDirectTarget* d, Operand op) { 822 if (op.kind != OPK_INDIRECT) return; 823 nd_flush_local(d, op.v.ind.base); 824 if (op.v.ind.index != CG_LOCAL_NONE) nd_flush_local(d, op.v.ind.index); 825 } 826 827 static NativeLoc nd_materialize_operand(NativeDirectTarget* d, Operand op) { 828 NativeAllocClass cls = nd_class_for_type(d, op.type); 829 if (op.kind == OPK_LOCAL) { 830 NativeDirectLocal* l = nd_local(d, op.v.local); 831 if (l->reg != REG_NONE && op.type == l->type && nd_local_cacheable(d, l)) { 832 /* Cache hit: pin and reuse the live register, no reload. */ 833 d->scratch_used[l->cls] |= 1u << l->reg; 834 nd_touch_local(d, l); 835 return native_loc_reg(op.type, (NativeAllocClass)l->cls, l->reg); 836 } 837 /* A live entry under a different access width must reach memory before we 838 * bypass the cache for this access. */ 839 if (l->reg != REG_NONE) nd_flush_local(d, op.v.local); 840 } 841 nd_flush_operand_addr_locals(d, op); 842 return nd_materialize_loc(d, nd_loc_operand(d, op), cls, op.type); 843 } 844 845 static NativeLoc nd_dst_scratch(NativeDirectTarget* d, Operand dst) { 846 NativeAllocClass cls = nd_class_for_type(d, dst.type); 847 Reg r = nd_scratch_acquire(d, cls); 848 return native_loc_reg(dst.type, cls, r); 849 } 850 851 /* Arithmetic/compare RHS: keep a constant operand as an immediate when the 852 * target can encode it for `use` (so no scratch register is spent 853 * materializing it), mirroring the optimizer's operand_imm_or_reg. Falls back 854 * to a register when there is no imm_legal hook (e.g. a recording mock target) 855 * or the constant is not target-legal for this op. */ 856 static NativeLoc nd_rhs_imm_or_reg(NativeDirectTarget* d, NativeImmUse use, 857 u32 sub, Operand b) { 858 if (b.kind == OPK_IMM && d->native->imm_legal && 859 d->native->imm_legal(d->native, use, sub, b.type, b.v.imm)) 860 return nd_loc_imm(b.v.imm, b.type); 861 return nd_materialize_operand(d, b); 862 } 863 864 /* Register a pure-compute op writes its result into. For a cacheable local that 865 * is the local's cache register (reused or freshly allocated), pinned for the 866 * instruction; nd_dst_writeback then marks it dirty without storing. Otherwise 867 * a scratch temporary that nd_dst_writeback spills to the frame home. */ 868 static NativeLoc nd_dst_reg(NativeDirectTarget* d, Operand dst) { 869 if (dst.kind == OPK_LOCAL) { 870 NativeDirectLocal* l = nd_local(d, dst.v.local); 871 if (dst.type == l->type && nd_local_cacheable(d, l)) { 872 Reg r = l->reg; 873 if (r == REG_NONE) { 874 r = nd_cache_alloc(d, (NativeAllocClass)l->cls); 875 if (r != REG_NONE) { 876 d->reg_owner[l->cls][r] = dst.v.local; 877 l->reg = r; 878 nd_cache_link(d, dst.v.local); 879 } 880 } 881 if (r != REG_NONE) { 882 d->scratch_used[l->cls] |= 1u << r; /* pin for the instruction */ 883 nd_touch_local(d, l); 884 return native_loc_reg(dst.type, (NativeAllocClass)l->cls, r); 885 } 886 } 887 } 888 return nd_dst_scratch(d, dst); 889 } 890 891 static void nd_dst_writeback(NativeDirectTarget* d, Operand dst, NativeLoc dr) { 892 if (dst.kind == OPK_LOCAL) { 893 NativeDirectLocal* l = nd_local(d, dst.v.local); 894 if (dr.kind == NATIVE_LOC_REG && l->reg == dr.v.reg && 895 dst.type == l->type && nd_local_cacheable(d, l)) { 896 l->dirty = 1; 897 d->scratch_used[l->cls] &= ~(1u << dr.v.reg); /* unpin, keep cached */ 898 return; 899 } 900 /* Bypassing the cache: drop any stale entry, then spill to the home. */ 901 if (l->reg != REG_NONE) nd_invalidate_local(d, dst.v.local); 902 } 903 nd_store_operand_from_reg(d, dst, dr); 904 nd_release_materialized(d, dr); 905 } 906 907 static void nd_store_operand_from_reg(NativeDirectTarget* d, Operand dst, 908 NativeLoc src) { 909 if (dst.kind != OPK_LOCAL) nd_panic(d, "destination is not a semantic local"); 910 /* This writes SRC to the local's frame home, bypassing the value cache (the 911 * result was produced in a scratch reg, e.g. a load / address-of). Any live 912 * cache entry for the local is now stale and must be dropped — the home write 913 * supersedes it. Drop without storing; storing back would clobber the new 914 * home value. Runs after SRC is produced, so a dst that was its own address 915 * base has already been consumed. */ 916 { 917 NativeDirectLocal* l = nd_local(d, dst.v.local); 918 if (l->reg != REG_NONE) nd_invalidate_local(d, dst.v.local); 919 nd_store_reg_to_frame(d, l->home, dst.type, src); 920 } 921 } 922 923 static void nd_func_begin(CgTarget* t, const CGFuncDesc* fd) { 924 NativeDirectTarget* d = nd_of(t); 925 d->func = fd; 926 d->nlocals = 0; 927 d->nlabels = 0; 928 d->nscopes = 0; 929 d->max_outgoing = 0; 930 d->use_tick = 0; 931 d->cache_head = -1; 932 d->cache_tail = -1; 933 d->ncached = 0; 934 memset(d->scratch_used, 0, sizeof d->scratch_used); 935 memset(d->callee_saved_used, 0, sizeof d->callee_saved_used); 936 memset(d->reg_owner, 0, sizeof d->reg_owner); 937 if (d->native && d->native->func_begin) d->native->func_begin(d->native, fd); 938 } 939 940 static void nd_func_end(CgTarget* t) { 941 NativeDirectTarget* d = nd_of(t); 942 NativeFramePatchState frame; 943 u32 ncallee_classes = 0; 944 memset(&frame, 0, sizeof frame); 945 frame.max_outgoing = d->max_outgoing; 946 for (u32 cls = 0; cls < 3u; ++cls) { 947 if (d->callee_saved_used[cls]) ncallee_classes = cls + 1u; 948 } 949 if (ncallee_classes) { 950 if (!d->native || !d->native->reserve_callee_saves) 951 nd_panic(d, "target cannot preserve callee-saved scratch registers"); 952 d->native->reserve_callee_saves(d->native, d->callee_saved_used, 953 ncallee_classes); 954 } 955 if (d->native && d->native->note_frame_state) 956 d->native->note_frame_state(d->native, &frame); 957 if (d->native && d->native->patch_apply) d->native->patch_apply(d->native); 958 if (d->native && d->native->func_end) d->native->func_end(d->native); 959 d->func = NULL; 960 } 961 962 static void nd_alias(CgTarget* t, ObjSymId alias_sym, ObjSymId target_sym, 963 KitCgTypeId type) { 964 (void)t; 965 (void)alias_sym; 966 (void)target_sym; 967 (void)type; 968 } 969 970 static CGLocal nd_local_new(CgTarget* t, const CGLocalDesc* desc) { 971 return nd_alloc_local(nd_of(t), desc); 972 } 973 974 static void nd_local_addr(CgTarget* t, Operand dst, const CGLocalDesc* desc, 975 CGLocal local) { 976 NativeDirectTarget* d = nd_of(t); 977 NativeDirectLocal* l = nd_local(d, local); 978 Operand lv; 979 (void)desc; 980 /* Targeted flush: only this local escapes. Spill+drop its entry so the home 981 * is authoritative for the address computation, then mark it uncacheable. The 982 * rest of the cache is unaffected (other cached locals stay non-escaped). */ 983 nd_flush_local(d, local); 984 l->address_taken = 1; 985 l->flags |= CG_LOCAL_ADDR_TAKEN; 986 memset(&lv, 0, sizeof lv); 987 lv.kind = OPK_LOCAL; 988 lv.type = l->type; 989 lv.v.local = local; 990 { 991 NativeLoc reg = nd_dst_scratch(d, dst); 992 ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); 993 d->native->load_addr(d->native, reg, nd_addr_storage(d, lv)); 994 nd_store_operand_from_reg(d, dst, reg); 995 nd_release_materialized(d, reg); 996 } 997 } 998 999 static CGLocal nd_param(CgTarget* t, const CGParamDesc* desc) { 1000 NativeDirectTarget* d = nd_of(t); 1001 NativeDirectLocal* l; 1002 NativeFrameSlotDesc fsd; 1003 CGLocal id; 1004 nd_grow_locals(d, d->nlocals + 1u); 1005 id = d->nlocals + 1u; 1006 l = &d->locals[d->nlocals++]; 1007 memset(l, 0, sizeof *l); 1008 l->type = desc->type; 1009 l->size = desc->size; 1010 l->align = desc->align; 1011 l->flags = desc->flags; 1012 l->reg = REG_NONE; 1013 l->address_taken = (desc->flags & CG_LOCAL_ADDR_TAKEN) != 0; 1014 l->memory_required = (desc->flags & CG_LOCAL_MEMORY_REQUIRED) != 0; 1015 l->cls = (u8)nd_class_for_type(d, desc->type); 1016 fsd = nd_slot_desc_param(desc); 1017 l->home = nd_alloc_frame_slot(d, &fsd); 1018 if (d->ops && d->ops->bind_param) d->ops->bind_param(d, desc, id, l); 1019 return id; 1020 } 1021 1022 static int nd_local_debug_loc(CgTarget* t, CGLocal local, CGDebugLoc* out) { 1023 NativeDirectTarget* d = nd_of(t); 1024 NativeDirectLocal* l; 1025 if (!out) return 0; 1026 memset(out, 0, sizeof *out); 1027 if (!d->native || !d->native->frame_slot_debug_loc) return 0; 1028 l = nd_local(d, local); 1029 if (l->home == NATIVE_FRAME_SLOT_NONE) return 0; 1030 return d->native->frame_slot_debug_loc(d->native, l->home, out); 1031 } 1032 1033 static Label nd_label_new(CgTarget* t) { return nd_label_new_raw(nd_of(t)); } 1034 1035 static void nd_label_place(CgTarget* t, Label label) { 1036 NativeDirectTarget* d = nd_of(t); 1037 nd_flush_all(d); 1038 ND_REQUIRE_NATIVE(d, label_place, "target does not place labels"); 1039 d->native->label_place(d->native, nd_mc_label(d, label)); 1040 } 1041 1042 static void nd_jump(CgTarget* t, Label label) { 1043 NativeDirectTarget* d = nd_of(t); 1044 nd_flush_all(d); 1045 ND_REQUIRE_NATIVE(d, jump, "target does not emit jumps"); 1046 d->native->jump(d->native, nd_mc_label(d, label)); 1047 } 1048 1049 static void nd_cmp_branch(CgTarget* t, CmpOp op, Operand a, Operand b, 1050 Label label) { 1051 NativeDirectTarget* d = nd_of(t); 1052 NativeLoc ar, br; 1053 nd_flush_all(d); 1054 ar = nd_materialize_operand(d, a); 1055 br = nd_rhs_imm_or_reg(d, NATIVE_IMM_CMP, (u32)op, b); 1056 ND_REQUIRE_NATIVE(d, cmp_branch, "target does not emit compare branches"); 1057 d->native->cmp_branch(d->native, op, ar, br, nd_mc_label(d, label)); 1058 nd_release_materialized(d, br); 1059 nd_release_materialized(d, ar); 1060 } 1061 1062 static void nd_switch(CgTarget* t, const CGSwitchDesc* desc) { 1063 nd_flush_all(nd_of(t)); 1064 cg_lower_switch_default(t, desc); 1065 } 1066 1067 static void nd_indirect_branch(CgTarget* t, Operand addr, 1068 const Label* valid_targets, u32 ntargets) { 1069 NativeDirectTarget* d = nd_of(t); 1070 MCLabel* native_targets; 1071 NativeLoc addr_reg; 1072 nd_flush_all(d); 1073 addr_reg = nd_materialize_operand(d, addr); 1074 ND_REQUIRE_NATIVE(d, indirect_branch, 1075 "target does not emit indirect branches"); 1076 native_targets = 1077 ntargets == 0 ? NULL 1078 : ntargets <= ND_LBL_BUF 1079 ? d->lblbuf 1080 : nd_arena(d, sizeof(*native_targets) * ntargets, _Alignof(MCLabel)); 1081 for (u32 i = 0; i < ntargets; ++i) 1082 native_targets[i] = nd_mc_label(d, valid_targets[i]); 1083 d->native->indirect_branch(d->native, addr_reg, native_targets, ntargets); 1084 nd_release_materialized(d, addr_reg); 1085 } 1086 1087 static void nd_load_label_addr(CgTarget* t, Operand dst, Label label) { 1088 NativeDirectTarget* d = nd_of(t); 1089 NativeLoc reg; 1090 nd_flush_all(d); 1091 reg = nd_dst_scratch(d, dst); 1092 ND_REQUIRE_NATIVE(d, load_label_addr, 1093 "target does not materialize label addresses"); 1094 d->native->load_label_addr(d->native, reg, nd_mc_label(d, label)); 1095 nd_store_operand_from_reg(d, dst, reg); 1096 nd_release_materialized(d, reg); 1097 } 1098 1099 static int nd_local_static_data_begin(CgTarget* t, 1100 const CGLocalStaticDataDesc* desc) { 1101 NativeDirectTarget* d = nd_of(t); 1102 Sym name; 1103 SecKind kind; 1104 u16 flags; 1105 if (!d->native || !d->native->mc || !desc) return 0; 1106 if (d->local_static_active) nd_panic(d, "nested local static data"); 1107 if (desc->attrs.section) { 1108 name = (Sym)desc->attrs.section; 1109 kind = 1110 (desc->attrs.flags & KIT_CG_DATADEF_READONLY) ? SEC_RODATA : SEC_DATA; 1111 flags = (desc->attrs.flags & KIT_CG_DATADEF_READONLY) 1112 ? SF_ALLOC 1113 : (SF_ALLOC | SF_WRITE); 1114 } else if (desc->attrs.flags & KIT_CG_DATADEF_READONLY) { 1115 name = pool_intern_slice(t->c->global, SLICE_LIT(".rodata")); 1116 kind = SEC_RODATA; 1117 flags = SF_ALLOC; 1118 } else { 1119 name = pool_intern_slice(t->c->global, SLICE_LIT(".data")); 1120 kind = SEC_DATA; 1121 flags = SF_ALLOC | SF_WRITE; 1122 } 1123 d->local_static_sec = 1124 obj_section(t->obj, name, kind, flags, desc->align ? desc->align : 1u); 1125 d->local_static_base = 1126 obj_align_to(t->obj, d->local_static_sec, desc->align ? desc->align : 1u); 1127 d->local_static_size = 0; 1128 d->local_static_sym = desc->sym; 1129 d->local_static_active = 1; 1130 return 1; 1131 } 1132 1133 static void nd_local_static_data_write(CgTarget* t, const u8* data, u64 len) { 1134 NativeDirectTarget* d = nd_of(t); 1135 u8 zero[64]; 1136 u64 orig_len = len; 1137 if (!d->local_static_active || !len) return; 1138 if (data) { 1139 obj_write(t->obj, d->local_static_sec, data, (size_t)len); 1140 } else { 1141 memset(zero, 0, sizeof zero); 1142 while (len >= sizeof zero) { 1143 obj_write(t->obj, d->local_static_sec, zero, sizeof zero); 1144 len -= sizeof zero; 1145 } 1146 if (len) obj_write(t->obj, d->local_static_sec, zero, (size_t)len); 1147 } 1148 d->local_static_size += (u32)orig_len; 1149 } 1150 1151 static void nd_local_static_data_label_addr(CgTarget* t, Label target, 1152 i64 addend, u32 width, 1153 u32 address_space) { 1154 NativeDirectTarget* d = nd_of(t); 1155 u32 off; 1156 u8 zero[8]; 1157 RelocKind kind; 1158 (void)address_space; 1159 if (!d->local_static_active) 1160 nd_panic(d, "label address outside local static data"); 1161 /* A jump-table / label-address slot is one target pointer wide: 8 bytes 1162 * (R_ABS64) on a 64-bit target, 4 bytes (R_ABS32) on rv32/ELFCLASS32. */ 1163 if (width == 8u) 1164 kind = R_ABS64; 1165 else if (width == 4u) 1166 kind = R_ABS32; 1167 else { 1168 nd_panic(d, "unsupported local static label address width"); 1169 return; 1170 } 1171 memset(zero, 0, sizeof zero); 1172 off = d->local_static_base + d->local_static_size; 1173 obj_write(t->obj, d->local_static_sec, zero, width); 1174 d->native->mc->emit_label_data_reloc(d->native->mc, d->local_static_sec, off, 1175 nd_mc_label(d, target), kind, width, 1176 addend); 1177 d->local_static_size += width; 1178 } 1179 1180 static void nd_local_static_data_end(CgTarget* t) { 1181 NativeDirectTarget* d = nd_of(t); 1182 if (!d->local_static_active) return; 1183 obj_symbol_define_live(t->obj, d->local_static_sym, d->local_static_sec, 1184 d->local_static_base, d->local_static_size); 1185 d->local_static_active = 0; 1186 d->local_static_sec = OBJ_SEC_NONE; 1187 d->local_static_sym = OBJ_SYM_NONE; 1188 d->local_static_base = 0; 1189 d->local_static_size = 0; 1190 } 1191 1192 static const char* nd_data_label_addr_unsupported_msg(CgTarget* t) { 1193 (void)t; 1194 return NULL; 1195 } 1196 1197 static CGScope nd_scope_begin(CgTarget* t, const CGScopeDesc* desc) { 1198 NativeDirectTarget* d = nd_of(t); 1199 NativeDirectScope* s; 1200 CGScope id; 1201 nd_grow_scopes(d, d->nscopes + 1u); 1202 id = d->nscopes + 1u; 1203 s = &d->scopes[d->nscopes++]; 1204 memset(s, 0, sizeof *s); 1205 s->kind = desc->kind; 1206 s->owns_break = desc->break_label == LABEL_NONE; 1207 s->break_label = desc->break_label ? desc->break_label : nd_label_new_raw(d); 1208 s->continue_label = desc->continue_label; 1209 if (desc->kind == SCOPE_LOOP && s->continue_label == LABEL_NONE) 1210 s->continue_label = nd_label_new_raw(d); 1211 return id; 1212 } 1213 1214 static NativeDirectScope* nd_scope(NativeDirectTarget* d, CGScope scope) { 1215 if (scope == CG_SCOPE_NONE || scope > d->nscopes) nd_panic(d, "bad scope"); 1216 return &d->scopes[scope - 1u]; 1217 } 1218 1219 static void nd_scope_end(CgTarget* t, CGScope scope) { 1220 NativeDirectTarget* d = nd_of(t); 1221 NativeDirectScope* s = nd_scope(d, scope); 1222 if (s->owns_break) nd_label_place(t, s->break_label); 1223 } 1224 1225 static void nd_break_to(CgTarget* t, CGScope scope) { 1226 nd_jump(t, nd_scope(nd_of(t), scope)->break_label); 1227 } 1228 1229 static void nd_continue_to(CgTarget* t, CGScope scope) { 1230 NativeDirectScope* s = nd_scope(nd_of(t), scope); 1231 if (s->continue_label == LABEL_NONE) 1232 nd_panic(nd_of(t), "continue_to on scope without continue label"); 1233 nd_jump(t, s->continue_label); 1234 } 1235 1236 static int nd_is_wide64_int(NativeDirectTarget* d, KitCgTypeId ty); 1237 static int nd_is_soft_double(NativeDirectTarget* d, KitCgTypeId ty); 1238 1239 static void nd_load_imm(CgTarget* t, Operand dst, i64 imm) { 1240 NativeDirectTarget* d = nd_of(t); 1241 NativeLoc reg; 1242 if (nd_is_wide64_int(d, dst.type)) 1243 nd_panic(d, 1244 "64-bit integer immediate reached the backend un-lowered " 1245 "(cg should materialize it as two 32-bit lanes)"); 1246 reg = nd_dst_reg(d, dst); 1247 ND_REQUIRE_NATIVE(d, load_imm, "target does not emit immediates"); 1248 d->native->load_imm(d->native, reg, imm); 1249 nd_dst_writeback(d, dst, reg); 1250 } 1251 1252 static void nd_load_const(CgTarget* t, Operand dst, ConstBytes cbytes) { 1253 NativeDirectTarget* d = nd_of(t); 1254 NativeLoc reg; 1255 if (nd_is_wide64_int(d, dst.type) || nd_is_soft_double(d, dst.type)) 1256 nd_panic(d, 1257 "8-byte constant reached the backend un-lowered (cg should " 1258 "materialize it as two 32-bit lanes)"); 1259 reg = nd_dst_reg(d, dst); 1260 ND_REQUIRE_NATIVE(d, load_const, "target does not emit byte constants"); 1261 d->native->load_const(d->native, reg, cbytes); 1262 nd_dst_writeback(d, dst, reg); 1263 } 1264 1265 static void nd_copy(CgTarget* t, Operand dst, Operand src) { 1266 NativeDirectTarget* d = nd_of(t); 1267 u64 size = dst.type ? cg_type_size(t->c, dst.type) : 0; 1268 if (size > (u64)t->c->target.ptr_size) { 1269 NdAddrTemps dt, st; 1270 AggregateAccess access; 1271 /* Aggregate copy: addresses are built cache-aware (a directly-addressed 1272 * cached local is flushed in nd_addr_storage), so no whole-cache flush. */ 1273 memset(&access, 0, sizeof access); 1274 access.type = dst.type; 1275 access.size = (u32)size; 1276 access.align = 1277 dst.type ? cg_type_align(t->c, dst.type) : (u32)t->c->target.ptr_align; 1278 access.mem.type = dst.type; 1279 access.mem.size = access.size; 1280 access.mem.align = access.align; 1281 NativeAddr da = 1282 nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, access.mem); 1283 NativeAddr sa = 1284 nd_addr_materialize(d, nd_addr_storage(d, src), &st, access.mem); 1285 ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); 1286 d->native->copy_bytes(d->native, da, sa, access); 1287 nd_addr_temps_release(d, &st); 1288 nd_addr_temps_release(d, &dt); 1289 return; 1290 } 1291 NativeLoc val = nd_materialize_operand(d, src); 1292 NativeLoc dr = nd_dst_reg(d, dst); 1293 nd_copy_to_reg(d, dr, val); 1294 nd_dst_writeback(d, dst, dr); 1295 nd_release_materialized(d, val); 1296 } 1297 1298 /* Bit-fields ride the generic load/store (mem.bf_width != 0); this impl 1299 * translates them to the physical NativeTarget bitfield_load/store below. */ 1300 static void nd_bitfield_load(CgTarget* t, Operand dst, Operand record_addr, 1301 BitFieldAccess access); 1302 static void nd_bitfield_store(CgTarget* t, Operand record_addr, Operand src, 1303 BitFieldAccess access); 1304 1305 static void nd_load(CgTarget* t, Operand dst, Operand addr, MemAccess mem) { 1306 NativeDirectTarget* d = nd_of(t); 1307 NdAddrTemps temps; 1308 u64 size; 1309 if (mem.bf_width != 0) { 1310 nd_bitfield_load(t, dst, addr, bf_from_mem(mem)); 1311 return; 1312 } 1313 size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0); 1314 /* No value-cache flush: only escaped (address-taken / memory-required) locals 1315 * can be aliased through a pointer, and those are never cached. A volatile 1316 * access may observe memory and needs the cache made authoritative first. */ 1317 if (mem.flags & MF_VOLATILE) { 1318 nd_flush_all(d); 1319 nd_barrier(d, 1320 NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE); 1321 } 1322 NativeAddr naddr = 1323 nd_addr_materialize(d, nd_addr_storage(d, addr), &temps, mem); 1324 if (size > (u64)t->c->target.ptr_size) { 1325 NdAddrTemps dt; 1326 AggregateAccess access; 1327 memset(&access, 0, sizeof access); 1328 access.type = mem.type ? mem.type : dst.type; 1329 access.size = (u32)size; 1330 access.align = mem.align; 1331 access.mem = mem; 1332 NativeAddr da = nd_addr_materialize(d, nd_addr_storage(d, dst), &dt, mem); 1333 ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); 1334 d->native->copy_bytes(d->native, da, naddr, access); 1335 nd_addr_temps_release(d, &dt); 1336 nd_addr_temps_release(d, &temps); 1337 return; 1338 } 1339 NativeLoc reg = nd_dst_scratch(d, dst); 1340 ND_REQUIRE_NATIVE(d, load, "target does not emit loads"); 1341 d->native->load(d->native, reg, naddr, mem); 1342 nd_store_operand_from_reg(d, dst, reg); 1343 nd_release_materialized(d, reg); 1344 nd_addr_temps_release(d, &temps); 1345 } 1346 1347 static void nd_store(CgTarget* t, Operand addr, Operand src, MemAccess mem) { 1348 NativeDirectTarget* d = nd_of(t); 1349 NdAddrTemps temps; 1350 u64 size; 1351 if (mem.bf_width != 0) { 1352 nd_bitfield_store(t, addr, src, bf_from_mem(mem)); 1353 return; 1354 } 1355 size = mem.size ? mem.size : (mem.type ? cg_type_size(t->c, mem.type) : 0); 1356 /* No value-cache flush (see nd_load): a store through a pointer cannot alias 1357 * a cached non-escaped local. The store target is foreign memory, so there is 1358 * no dst local entry to invalidate; SRC is read via nd_materialize_operand. 1359 */ 1360 if (mem.flags & MF_VOLATILE) { 1361 nd_flush_all(d); 1362 nd_barrier(d, 1363 NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_VOLATILE); 1364 } 1365 NativeAddr naddr = 1366 nd_addr_materialize(d, nd_addr_storage(d, addr), &temps, mem); 1367 if (size > (u64)t->c->target.ptr_size) { 1368 NdAddrTemps st; 1369 AggregateAccess access; 1370 memset(&access, 0, sizeof access); 1371 access.type = mem.type ? mem.type : src.type; 1372 access.size = (u32)size; 1373 access.align = mem.align; 1374 access.mem = mem; 1375 NativeAddr sa = nd_addr_materialize(d, nd_addr_storage(d, src), &st, mem); 1376 ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); 1377 d->native->copy_bytes(d->native, naddr, sa, access); 1378 nd_addr_temps_release(d, &st); 1379 nd_addr_temps_release(d, &temps); 1380 return; 1381 } 1382 NativeLoc val = nd_materialize_operand(d, src); 1383 ND_REQUIRE_NATIVE(d, store, "target does not emit stores"); 1384 d->native->store(d->native, naddr, val, mem); 1385 nd_release_materialized(d, val); 1386 nd_addr_temps_release(d, &temps); 1387 } 1388 1389 static void nd_addr_of(CgTarget* t, Operand dst, Operand lv) { 1390 NativeDirectTarget* d = nd_of(t); 1391 NdAddrTemps temps; 1392 MemAccess mem = nd_scalar_mem(dst.type, d->base.c->target.ptr_size, 1393 d->base.c->target.ptr_align); 1394 NativeAddr naddr; 1395 /* Targeted: only an OPK_LOCAL lvalue escapes here — flush+mark just that 1396 * local (its home becomes the authoritative address source). An INDIRECT 1397 * lvalue's address is computed from base/index, which nd_addr_storage now 1398 * reads from the cache directly; a GLOBAL needs nothing. The dst home write 1399 * is handled by nd_store_operand_from_reg's invalidation. */ 1400 if (lv.kind == OPK_LOCAL) { 1401 NativeDirectLocal* l = nd_local(d, lv.v.local); 1402 nd_flush_local(d, lv.v.local); 1403 l->address_taken = 1; 1404 l->flags |= CG_LOCAL_ADDR_TAKEN; 1405 } 1406 naddr = nd_addr_materialize(d, nd_addr_storage(d, lv), &temps, mem); 1407 NativeLoc reg = nd_dst_scratch(d, dst); 1408 ND_REQUIRE_NATIVE(d, load_addr, "target does not materialize addresses"); 1409 d->native->load_addr(d->native, reg, naddr); 1410 nd_store_operand_from_reg(d, dst, reg); 1411 nd_release_materialized(d, reg); 1412 nd_addr_temps_release(d, &temps); 1413 } 1414 1415 static void nd_tls_addr_of(CgTarget* t, Operand dst, ObjSymId sym, i64 addend) { 1416 NativeDirectTarget* d = nd_of(t); 1417 NativeLoc reg; 1418 nd_flush_all(d); 1419 reg = nd_dst_scratch(d, dst); 1420 ND_REQUIRE_NATIVE(d, tls_addr_of, 1421 "target does not materialize TLS addresses"); 1422 d->native->tls_addr_of(d->native, reg, sym, addend); 1423 nd_store_operand_from_reg(d, dst, reg); 1424 nd_release_materialized(d, reg); 1425 } 1426 1427 static void nd_copy_bytes(CgTarget* t, Operand dst_addr, Operand src_addr, 1428 AggregateAccess access) { 1429 NativeDirectTarget* d = nd_of(t); 1430 NdAddrTemps dt, st; 1431 NativeAddr dst; 1432 /* Pointer-target memory; addresses are cache-aware. No whole-cache flush. */ 1433 dst = nd_addr_materialize(d, nd_addr_pointer(d, dst_addr), &dt, access.mem); 1434 NativeAddr src = 1435 nd_addr_materialize(d, nd_addr_pointer(d, src_addr), &st, access.mem); 1436 ND_REQUIRE_NATIVE(d, copy_bytes, "target does not copy bytes"); 1437 d->native->copy_bytes(d->native, dst, src, access); 1438 nd_addr_temps_release(d, &st); 1439 nd_addr_temps_release(d, &dt); 1440 } 1441 1442 static void nd_set_bytes(CgTarget* t, Operand dst_addr, Operand byte_value, 1443 AggregateAccess access) { 1444 NativeDirectTarget* d = nd_of(t); 1445 NdAddrTemps temps; 1446 NativeAddr dst; 1447 NativeLoc byte; 1448 /* Pointer-target memory; addresses are cache-aware. No whole-cache flush. */ 1449 dst = 1450 nd_addr_materialize(d, nd_addr_pointer(d, dst_addr), &temps, access.mem); 1451 byte = nd_materialize_operand(d, byte_value); 1452 ND_REQUIRE_NATIVE(d, set_bytes, "target does not set bytes"); 1453 d->native->set_bytes(d->native, dst, byte, access); 1454 nd_release_materialized(d, byte); 1455 nd_addr_temps_release(d, &temps); 1456 } 1457 1458 static void nd_bitfield_load(CgTarget* t, Operand dst, Operand record_addr, 1459 BitFieldAccess access) { 1460 NativeDirectTarget* d = nd_of(t); 1461 NdAddrTemps temps; 1462 NativeAddr addr; 1463 NativeLoc reg; 1464 /* Record (pointer-target) memory; addresses are cache-aware. The dst home 1465 * write is handled by nd_store_operand_from_reg's invalidation. */ 1466 addr = nd_addr_materialize(d, nd_addr_storage(d, record_addr), &temps, 1467 access.storage); 1468 reg = nd_dst_scratch(d, dst); 1469 ND_REQUIRE_NATIVE(d, bitfield_load, "target does not load bitfields"); 1470 d->native->bitfield_load(d->native, reg, addr, access); 1471 nd_store_operand_from_reg(d, dst, reg); 1472 nd_release_materialized(d, reg); 1473 nd_addr_temps_release(d, &temps); 1474 } 1475 1476 static void nd_bitfield_store(CgTarget* t, Operand record_addr, Operand src, 1477 BitFieldAccess access) { 1478 NativeDirectTarget* d = nd_of(t); 1479 NdAddrTemps temps; 1480 NativeAddr addr; 1481 NativeLoc val; 1482 /* Record (pointer-target) memory; addresses are cache-aware, SRC reads the 1483 * cache. No whole-cache flush. */ 1484 addr = nd_addr_materialize(d, nd_addr_storage(d, record_addr), &temps, 1485 access.storage); 1486 val = nd_materialize_operand(d, src); 1487 ND_REQUIRE_NATIVE(d, bitfield_store, "target does not store bitfields"); 1488 d->native->bitfield_store(d->native, addr, val, access); 1489 nd_release_materialized(d, val); 1490 nd_addr_temps_release(d, &temps); 1491 } 1492 1493 /* Last line of defense against an unlowered split-scalar/soft-float op reaching 1494 * the machine backend. The cg-layer gates in src/cg/arith.c route split i64 1495 * mul/div/shift and all soft-double arith/convert/compare to runtime calls; if 1496 * one escapes, the native backend would silently emit wrong code. */ 1497 static int nd_is_split_wide8_scalar(NativeDirectTarget* d, KitCgTypeId ty) { 1498 return abi_cg_scalar_split_lane_size(d->base.c->abi, ty) == 4u && 1499 native_type_size(d->native, ty) == 8u; 1500 } 1501 1502 static int nd_is_wide64_int(NativeDirectTarget* d, KitCgTypeId ty) { 1503 if (!nd_is_split_wide8_scalar(d, ty)) return 0; 1504 if (kit_cg_type_int_width((KitCompiler*)d->base.c, ty) == 0) return 0; 1505 return 1; 1506 } 1507 1508 static int nd_is_soft_double(NativeDirectTarget* d, KitCgTypeId ty) { 1509 if (!nd_is_split_wide8_scalar(d, ty)) return 0; 1510 return kit_cg_type_float_width((KitCompiler*)d->base.c, ty) == 64; 1511 } 1512 1513 static void nd_binop(CgTarget* t, BinOp op, Operand dst, Operand a, Operand b) { 1514 NativeDirectTarget* d = nd_of(t); 1515 NativeLoc ar; 1516 NativeLoc br; 1517 NativeLoc dr; 1518 /* No split-lane 8-byte value reaches a single-register op: the cg layer 1519 * lowers i64 add/sub/and/or/xor to inline 2-word lane sequences and 1520 * mul/div/rem/shift to __*di3 runtime calls (src/cg/arith.c). Anything that 1521 * slips through here would silently compute only the low word, so fail 1522 * loudly instead. */ 1523 if (nd_is_wide64_int(d, a.type) || nd_is_wide64_int(d, dst.type)) { 1524 nd_panic(d, 1525 "64-bit integer arithmetic reached the backend un-lowered " 1526 "(cg should emit a 2-word lane sequence or a __*di3 runtime call)"); 1527 } 1528 if (nd_is_soft_double(d, a.type) || nd_is_soft_double(d, dst.type)) { 1529 nd_panic(d, 1530 "soft-float double arithmetic reached the backend un-lowered " 1531 "(should be a __*df3 runtime call)"); 1532 } 1533 ar = nd_materialize_operand(d, a); 1534 br = nd_rhs_imm_or_reg(d, NATIVE_IMM_BINOP, (u32)op, b); 1535 dr = nd_dst_reg(d, dst); 1536 ND_REQUIRE_NATIVE(d, binop, "target does not emit binary ops"); 1537 d->native->binop(d->native, op, dr, ar, br); 1538 nd_dst_writeback(d, dst, dr); 1539 nd_release_materialized(d, br); 1540 nd_release_materialized(d, ar); 1541 } 1542 1543 static void nd_unop(CgTarget* t, UnOp op, Operand dst, Operand a) { 1544 NativeDirectTarget* d = nd_of(t); 1545 NativeLoc ar; 1546 NativeLoc dr; 1547 /* i64 neg/bnot stay inline as register pairs, and soft-double FNEG stays 1548 * inline as a high-word sign-bit flip (v1), so both are allowlisted. Any 1549 * OTHER soft-double unop reaching the backend is an unlowered escape. */ 1550 if (nd_is_wide64_int(d, a.type) || nd_is_wide64_int(d, dst.type)) { 1551 nd_panic(d, 1552 "64-bit integer unary op reached the backend un-lowered " 1553 "(cg should emit a 2-word lane sequence)"); 1554 } 1555 if (op != UO_FNEG && 1556 (nd_is_soft_double(d, a.type) || nd_is_soft_double(d, dst.type))) { 1557 nd_panic(d, "soft-float double unary op reached the backend un-lowered"); 1558 } 1559 ar = nd_materialize_operand(d, a); 1560 dr = nd_dst_reg(d, dst); 1561 ND_REQUIRE_NATIVE(d, unop, "target does not emit unary ops"); 1562 d->native->unop(d->native, op, dr, ar); 1563 nd_dst_writeback(d, dst, dr); 1564 nd_release_materialized(d, ar); 1565 } 1566 1567 static void nd_cmp(CgTarget* t, CmpOp op, Operand dst, Operand a, Operand b) { 1568 NativeDirectTarget* d = nd_of(t); 1569 NativeLoc ar; 1570 NativeLoc br; 1571 NativeLoc dr; 1572 /* i64 compares are lowered to inline 2-word lane sequences and soft-double 1573 * compares to __*df2 runtime calls (src/cg/arith.c); neither reaches a single 1574 * GPR compare here. */ 1575 if (nd_is_wide64_int(d, a.type) || nd_is_wide64_int(d, b.type)) { 1576 nd_panic(d, 1577 "64-bit integer compare reached the backend un-lowered " 1578 "(cg should emit a 2-word lane sequence)"); 1579 } 1580 if (nd_is_soft_double(d, a.type) || nd_is_soft_double(d, b.type)) { 1581 nd_panic(d, 1582 "soft-float double compare reached the backend un-lowered " 1583 "(should be a __*df2 runtime call)"); 1584 } 1585 ar = nd_materialize_operand(d, a); 1586 br = nd_rhs_imm_or_reg(d, NATIVE_IMM_CMP, (u32)op, b); 1587 dr = nd_dst_reg(d, dst); 1588 ND_REQUIRE_NATIVE(d, cmp, "target does not emit compares"); 1589 d->native->cmp(d->native, op, dr, ar, br); 1590 nd_dst_writeback(d, dst, dr); 1591 nd_release_materialized(d, br); 1592 nd_release_materialized(d, ar); 1593 } 1594 1595 static void nd_convert(CgTarget* t, ConvKind op, Operand dst, Operand src) { 1596 NativeDirectTarget* d = nd_of(t); 1597 NativeLoc sr; 1598 NativeLoc dr; 1599 /* i64<->i32 sext/zext/trunc are lowered to inline lane ops (src/cg/arith.c 1600 * api_try_wide8_convert) and i64<->float / soft-double conversions to runtime 1601 * calls; none reaches a single-register convert here. */ 1602 if (nd_is_wide64_int(d, src.type) || nd_is_wide64_int(d, dst.type)) { 1603 nd_panic(d, 1604 "64-bit integer conversion reached the backend un-lowered " 1605 "(cg should emit a 2-word lane sequence or a runtime call)"); 1606 } 1607 if (nd_is_soft_double(d, src.type) || nd_is_soft_double(d, dst.type)) { 1608 nd_panic(d, 1609 "soft-float double conversion reached the backend un-lowered " 1610 "(should be a runtime call)"); 1611 } 1612 sr = nd_materialize_operand(d, src); 1613 dr = nd_dst_reg(d, dst); 1614 ND_REQUIRE_NATIVE(d, convert, "target does not emit converts"); 1615 d->native->convert(d->native, op, dr, sr); 1616 nd_dst_writeback(d, dst, dr); 1617 nd_release_materialized(d, sr); 1618 } 1619 1620 static void nd_call(CgTarget* t, const CGCallDesc* desc) { 1621 NativeDirectTarget* d = nd_of(t); 1622 NativeCallPlan plan; 1623 NativeCallDesc nd; 1624 NativeLoc* args; 1625 NativeLoc* results; 1626 NativeLoc callee_tmp; 1627 int release_callee_tmp = 0; 1628 nd_flush_all(d); 1629 nd_barrier(d, NATIVE_DIRECT_BARRIER_CALL | NATIVE_DIRECT_BARRIER_MEMORY); 1630 memset(&plan, 0, sizeof plan); 1631 memset(&nd, 0, sizeof nd); 1632 memset(&callee_tmp, 0, sizeof callee_tmp); 1633 u32 nresults = desc->result != CG_LOCAL_NONE ? 1u : 0u; 1634 args = nd_loc_buf(d, d->argbuf, ND_ARG_BUF, desc->nargs); 1635 results = nd_loc_buf(d, d->retbuf, ND_RET_BUF, nresults); 1636 for (u32 i = 0; i < desc->nargs; ++i) 1637 args[i] = nd_loc_frame(d, desc->args[i], 0); 1638 if (nresults) results[0] = nd_loc_frame(d, desc->result, 0); 1639 nd.fn_type = desc->fn_type; 1640 nd.callee = nd_loc_operand(d, desc->callee); 1641 if (nd.callee.kind == NATIVE_LOC_FRAME) { 1642 callee_tmp = nd_materialize_loc(d, nd.callee, 1643 (NativeAllocClass)nd.callee.cls, 1644 nd.callee.type); 1645 nd.callee = callee_tmp; 1646 release_callee_tmp = 1; 1647 } 1648 nd.args = args; 1649 nd.results = results; 1650 nd.nargs = desc->nargs; 1651 nd.nresults = nresults; 1652 nd.flags = desc->flags; 1653 nd.tail_policy = desc->tail_policy; 1654 nd.inline_policy = desc->inline_policy; 1655 1656 if (d->ops && d->ops->plan_call) 1657 d->ops->plan_call(d, &nd, &plan); 1658 else { 1659 ND_REQUIRE_NATIVE(d, plan_call, "target does not plan calls"); 1660 d->native->plan_call(d->native, &nd, &plan); 1661 } 1662 if (plan.stack_arg_size > d->max_outgoing) 1663 d->max_outgoing = plan.stack_arg_size; 1664 for (u32 i = 0; i < plan.nargs; ++i) 1665 nd_write_loc(d, plan.args[i].dst, plan.args[i].src, plan.args[i].mem); 1666 if (d->ops && d->ops->emit_call) 1667 d->ops->emit_call(d, &plan); 1668 else { 1669 ND_REQUIRE_NATIVE(d, emit_call, "target does not emit calls"); 1670 d->native->emit_call(d->native, &plan); 1671 } 1672 for (u32 i = 0; i < plan.nrets; ++i) 1673 nd_write_loc(d, plan.rets[i].dst, plan.rets[i].src, plan.rets[i].mem); 1674 if (release_callee_tmp) 1675 nd_scratch_release(d, (NativeAllocClass)callee_tmp.cls, 1676 callee_tmp.v.reg); 1677 } 1678 1679 static const char* nd_tail_call_unrealizable_reason(CgTarget* t, 1680 const CGCallDesc* desc) { 1681 NativeDirectTarget* d = nd_of(t); 1682 if (d->ops && d->ops->tail_call_unrealizable_reason) 1683 return d->ops->tail_call_unrealizable_reason(d, desc); 1684 return "target does not expose direct tail-call lowering"; 1685 } 1686 1687 static void nd_ret(CgTarget* t, CGLocal value) { 1688 NativeDirectTarget* d = nd_of(t); 1689 NativeLoc loc; 1690 const NativeLoc* locp = NULL; 1691 NativeCallPlanRet* rets = NULL; 1692 u32 nrets = 0; 1693 nd_flush_all(d); 1694 if (d->ops && d->ops->emit_ret) { 1695 d->ops->emit_ret(d, value); 1696 return; 1697 } 1698 if (value != CG_LOCAL_NONE) { 1699 loc = nd_loc_frame(d, value, 0); 1700 locp = &loc; 1701 } 1702 ND_REQUIRE_NATIVE(d, plan_ret, "target does not plan returns"); 1703 d->native->plan_ret(d->native, d->func, locp, &rets, &nrets); 1704 for (u32 i = 0; i < nrets; ++i) 1705 nd_write_loc(d, rets[i].dst, rets[i].src, rets[i].mem); 1706 ND_REQUIRE_NATIVE(d, ret, "target does not emit returns"); 1707 d->native->ret(d->native); 1708 } 1709 1710 static void nd_unreachable(CgTarget* t) { 1711 NativeDirectTarget* d = nd_of(t); 1712 nd_flush_all(d); 1713 ND_REQUIRE_NATIVE(d, trap, "target does not emit traps"); 1714 d->native->trap(d->native); 1715 } 1716 1717 static void nd_alloca(CgTarget* t, Operand dst, Operand size, u32 align) { 1718 NativeDirectTarget* d = nd_of(t); 1719 NativeLoc sr, dr; 1720 nd_flush_all(d); 1721 sr = nd_materialize_operand(d, size); 1722 dr = nd_dst_scratch(d, dst); 1723 ND_REQUIRE_NATIVE(d, alloca_, "target does not emit alloca"); 1724 d->native->alloca_(d->native, dr, sr, align); 1725 nd_store_operand_from_reg(d, dst, dr); 1726 nd_release_materialized(d, dr); 1727 nd_release_materialized(d, sr); 1728 } 1729 1730 static void nd_va_start(CgTarget* t, Operand ap_addr) { 1731 NativeDirectTarget* d = nd_of(t); 1732 nd_flush_all(d); 1733 if (!d->ops || !d->ops->va_start_) 1734 nd_panic(d, "target does not emit va_start"); 1735 d->ops->va_start_(d, ap_addr); 1736 } 1737 1738 static void nd_va_arg(CgTarget* t, Operand dst, Operand ap_addr, 1739 KitCgTypeId type) { 1740 NativeDirectTarget* d = nd_of(t); 1741 nd_flush_all(d); 1742 if (!d->ops || !d->ops->va_arg_) nd_panic(d, "target does not emit va_arg"); 1743 d->ops->va_arg_(d, dst, ap_addr, type); 1744 } 1745 1746 static void nd_va_end(CgTarget* t, Operand ap_addr) { 1747 NativeDirectTarget* d = nd_of(t); 1748 nd_flush_all(d); 1749 if (!d->ops || !d->ops->va_end_) nd_panic(d, "target does not emit va_end"); 1750 d->ops->va_end_(d, ap_addr); 1751 } 1752 1753 static void nd_va_copy(CgTarget* t, Operand dst_ap_addr, Operand src_ap_addr) { 1754 NativeDirectTarget* d = nd_of(t); 1755 nd_flush_all(d); 1756 if (!d->ops || !d->ops->va_copy_) nd_panic(d, "target does not emit va_copy"); 1757 d->ops->va_copy_(d, dst_ap_addr, src_ap_addr); 1758 } 1759 1760 static void nd_atomic_load(CgTarget* t, Operand dst, Operand addr, 1761 MemAccess mem, KitCgMemOrder order) { 1762 NativeDirectTarget* d = nd_of(t); 1763 NdAddrTemps temps; 1764 nd_flush_all(d); 1765 nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC); 1766 NativeAddr naddr = 1767 nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem); 1768 NativeLoc dr = nd_dst_scratch(d, dst); 1769 ND_REQUIRE_NATIVE(d, atomic_load, "target does not emit atomic loads"); 1770 d->native->atomic_load(d->native, dr, naddr, mem, order); 1771 nd_store_operand_from_reg(d, dst, dr); 1772 nd_release_materialized(d, dr); 1773 nd_addr_temps_release(d, &temps); 1774 } 1775 1776 static void nd_atomic_store(CgTarget* t, Operand addr, Operand src, 1777 MemAccess mem, KitCgMemOrder order) { 1778 NativeDirectTarget* d = nd_of(t); 1779 NdAddrTemps temps; 1780 nd_flush_all(d); 1781 nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC); 1782 NativeAddr naddr = 1783 nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem); 1784 NativeLoc sr = nd_materialize_operand(d, src); 1785 ND_REQUIRE_NATIVE(d, atomic_store, "target does not emit atomic stores"); 1786 d->native->atomic_store(d->native, naddr, sr, mem, order); 1787 nd_release_materialized(d, sr); 1788 nd_addr_temps_release(d, &temps); 1789 } 1790 1791 static void nd_atomic_rmw(CgTarget* t, KitCgAtomicOp op, Operand dst, 1792 Operand addr, Operand val, MemAccess mem, 1793 KitCgMemOrder order) { 1794 NativeDirectTarget* d = nd_of(t); 1795 NdAddrTemps temps; 1796 nd_flush_all(d); 1797 nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC); 1798 NativeAddr naddr = 1799 nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem); 1800 NativeLoc vr = nd_materialize_operand(d, val); 1801 NativeLoc dr = nd_dst_scratch(d, dst); 1802 ND_REQUIRE_NATIVE(d, atomic_rmw, "target does not emit atomic rmw"); 1803 d->native->atomic_rmw(d->native, op, dr, naddr, vr, mem, order); 1804 nd_store_operand_from_reg(d, dst, dr); 1805 nd_release_materialized(d, dr); 1806 nd_release_materialized(d, vr); 1807 nd_addr_temps_release(d, &temps); 1808 } 1809 1810 static void nd_atomic_cas(CgTarget* t, Operand prior, Operand ok, Operand addr, 1811 Operand expected, Operand desired, MemAccess mem, 1812 KitCgMemOrder success, KitCgMemOrder failure) { 1813 NativeDirectTarget* d = nd_of(t); 1814 NdAddrTemps temps; 1815 nd_flush_all(d); 1816 nd_barrier(d, NATIVE_DIRECT_BARRIER_MEMORY | NATIVE_DIRECT_BARRIER_ATOMIC); 1817 NativeAddr naddr = 1818 nd_addr_materialize(d, nd_addr_pointer(d, addr), &temps, mem); 1819 NativeLoc er = nd_materialize_operand(d, expected); 1820 NativeLoc dr = nd_materialize_operand(d, desired); 1821 NativeLoc pr = nd_dst_scratch(d, prior); 1822 NativeLoc kr = nd_dst_scratch(d, ok); 1823 ND_REQUIRE_NATIVE(d, atomic_cas, 1824 "target does not emit atomic compare-exchange"); 1825 d->native->atomic_cas(d->native, pr, kr, naddr, er, dr, mem, success, 1826 failure); 1827 nd_store_operand_from_reg(d, prior, pr); 1828 nd_store_operand_from_reg(d, ok, kr); 1829 nd_release_materialized(d, kr); 1830 nd_release_materialized(d, pr); 1831 nd_release_materialized(d, dr); 1832 nd_release_materialized(d, er); 1833 nd_addr_temps_release(d, &temps); 1834 } 1835 1836 static void nd_fence(CgTarget* t, KitCgMemOrder order) { 1837 NativeDirectTarget* d = nd_of(t); 1838 nd_flush_all(d); 1839 ND_REQUIRE_NATIVE(d, fence, "target does not emit fences"); 1840 d->native->fence(d->native, order); 1841 } 1842 1843 static void nd_intrinsic(CgTarget* t, IntrinKind kind, Operand* dsts, u32 ndst, 1844 const Operand* args, u32 narg) { 1845 NativeDirectTarget* d = nd_of(t); 1846 NativeLoc* ndsts = nd_loc_buf(d, d->retbuf, ND_RET_BUF, ndst); 1847 NativeLoc* nargs = nd_loc_buf(d, d->argbuf, ND_ARG_BUF, narg); 1848 nd_flush_all(d); 1849 ND_REQUIRE_NATIVE(d, intrinsic, "target does not emit compiler intrinsics"); 1850 for (u32 i = 0; i < ndst; ++i) ndsts[i] = nd_dst_scratch(d, dsts[i]); 1851 for (u32 i = 0; i < narg; ++i) { 1852 nargs[i] = args[i].kind == OPK_IMM ? nd_loc_operand(d, args[i]) 1853 : nd_materialize_operand(d, args[i]); 1854 } 1855 d->native->intrinsic(d->native, kind, ndsts, ndst, nargs, narg); 1856 for (u32 i = 0; i < ndst; ++i) { 1857 nd_store_operand_from_reg(d, dsts[i], ndsts[i]); 1858 nd_release_materialized(d, ndsts[i]); 1859 } 1860 for (u32 i = 0; i < narg; ++i) nd_release_materialized(d, nargs[i]); 1861 } 1862 1863 static void nd_asm_block(CgTarget* t, const char* tmpl, 1864 const AsmConstraint* outs, u32 nout, Operand* out_ops, 1865 const AsmConstraint* ins, u32 nin, 1866 const Operand* in_ops, const Sym* clobbers, u32 nclob, 1867 u32 clobber_abi_sets) { 1868 NativeDirectTarget* d = nd_of(t); 1869 nd_flush_all(d); 1870 nd_barrier(d, 1871 NATIVE_DIRECT_BARRIER_INLINE_ASM | NATIVE_DIRECT_BARRIER_MEMORY); 1872 if (d->ops && d->ops->asm_block) { 1873 d->ops->asm_block(d, tmpl, outs, nout, out_ops, ins, nin, in_ops, clobbers, 1874 nclob, clobber_abi_sets); 1875 return; 1876 } 1877 nd_panic(d, "target does not emit inline asm"); 1878 } 1879 1880 static int nd_asm_is_reg_constraint(CgTarget* t, const char* constraint) { 1881 NativeDirectTarget* d = nd_of(t); 1882 return native_asm_constraint_is_reg(d->native, constraint); 1883 } 1884 1885 static void nd_file_scope_asm(CgTarget* t, const char* src, size_t len) { 1886 NativeDirectTarget* d = nd_of(t); 1887 ND_REQUIRE_NATIVE(d, file_scope_asm, "target does not emit file-scope asm"); 1888 d->native->file_scope_asm(d->native, src, len); 1889 } 1890 1891 static void nd_set_loc(CgTarget* t, SrcLoc loc) { 1892 NativeDirectTarget* d = nd_of(t); 1893 d->loc = loc; 1894 if (d->native && d->native->set_loc) d->native->set_loc(d->native, loc); 1895 } 1896 1897 static void nd_finalize(CgTarget* t) { 1898 NativeDirectTarget* d = nd_of(t); 1899 if (d->native && d->native->finalize) d->native->finalize(d->native); 1900 } 1901 1902 static void nd_destroy(CgTarget* t) { 1903 NativeDirectTarget* d = nd_of(t); 1904 if (d->native && d->native->destroy) d->native->destroy(d->native); 1905 } 1906 1907 CgTarget* native_direct_target_new(Compiler* c, ObjBuilder* obj, 1908 const NativeDirectTargetConfig* cfg) { 1909 NativeDirectTarget* d; 1910 if (!c || !cfg || !cfg->native) 1911 compiler_panic(c, (SrcLoc){0, 0, 0}, 1912 "native_direct_target_new: missing native target"); 1913 d = arena_znew(c->tu, NativeDirectTarget); 1914 if (!d) return NULL; 1915 d->base.c = c; 1916 d->base.obj = obj; 1917 d->magic = NATIVE_DIRECT_MAGIC; 1918 d->native = cfg->native; 1919 d->ops = cfg->ops; 1920 d->user = cfg->user; 1921 1922 /* Resolve register/class info once; it is constant for the program. */ 1923 d->reg_info = cfg->native ? cfg->native->regs : NULL; 1924 for (u32 i = 0; i < 3u; ++i) d->class_info[i] = NULL; 1925 if (d->reg_info) { 1926 const NativeRegInfo* ri = d->reg_info; 1927 for (u32 i = 0; i < ri->nclasses; ++i) { 1928 u32 cls = ri->classes[i].cls; 1929 if (cls < 3u) d->class_info[cls] = &ri->classes[i]; 1930 } 1931 } 1932 1933 d->base.func_begin = nd_func_begin; 1934 d->base.func_end = nd_func_end; 1935 d->base.alias = nd_alias; 1936 d->base.local = nd_local_new; 1937 d->base.local_addr = nd_local_addr; 1938 d->base.param = nd_param; 1939 d->base.local_debug_loc = nd_local_debug_loc; 1940 d->base.label_new = nd_label_new; 1941 d->base.label_place = nd_label_place; 1942 d->base.jump = nd_jump; 1943 d->base.cmp_branch = nd_cmp_branch; 1944 d->base.switch_ = nd_switch; 1945 d->base.indirect_branch = nd_indirect_branch; 1946 d->base.load_label_addr = nd_load_label_addr; 1947 d->base.local_static_data_begin = nd_local_static_data_begin; 1948 d->base.local_static_data_write = nd_local_static_data_write; 1949 d->base.local_static_data_label_addr = nd_local_static_data_label_addr; 1950 d->base.local_static_data_end = nd_local_static_data_end; 1951 d->base.data_label_addr_unsupported_msg = nd_data_label_addr_unsupported_msg; 1952 d->base.scope_begin = nd_scope_begin; 1953 d->base.scope_end = nd_scope_end; 1954 d->base.break_to = nd_break_to; 1955 d->base.continue_to = nd_continue_to; 1956 d->base.load_imm = nd_load_imm; 1957 d->base.load_const = nd_load_const; 1958 d->base.copy = nd_copy; 1959 d->base.load = nd_load; 1960 d->base.store = nd_store; 1961 d->base.addr_of = nd_addr_of; 1962 d->base.tls_addr_of = nd_tls_addr_of; 1963 d->base.copy_bytes = nd_copy_bytes; 1964 d->base.set_bytes = nd_set_bytes; 1965 d->base.binop = nd_binop; 1966 d->base.unop = nd_unop; 1967 d->base.cmp = nd_cmp; 1968 d->base.convert = nd_convert; 1969 d->base.call = nd_call; 1970 d->base.tail_call_unrealizable_reason = nd_tail_call_unrealizable_reason; 1971 d->base.ret = nd_ret; 1972 d->base.unreachable = nd_unreachable; 1973 d->base.alloca_ = nd_alloca; 1974 d->base.va_start_ = nd_va_start; 1975 d->base.va_arg_ = nd_va_arg; 1976 d->base.va_end_ = nd_va_end; 1977 d->base.va_copy_ = nd_va_copy; 1978 d->base.atomic_load = nd_atomic_load; 1979 d->base.atomic_store = nd_atomic_store; 1980 d->base.atomic_rmw = nd_atomic_rmw; 1981 d->base.atomic_cas = nd_atomic_cas; 1982 d->base.fence = nd_fence; 1983 d->base.intrinsic = nd_intrinsic; 1984 d->base.asm_is_reg_constraint = nd_asm_is_reg_constraint; 1985 d->base.asm_block = nd_asm_block; 1986 d->base.file_scope_asm = nd_file_scope_asm; 1987 d->base.set_loc = nd_set_loc; 1988 d->base.finalize = nd_finalize; 1989 d->base.destroy = nd_destroy; 1990 return &d->base; 1991 } 1992 1993 NativeTarget* native_direct_target_native(CgTarget* t) { 1994 NativeDirectTarget* d = t ? nd_of(t) : NULL; 1995 return d && d->magic == NATIVE_DIRECT_MAGIC ? d->native : NULL; 1996 }