pass_native_emit.c (64744B)
1 #include <string.h> 2 3 #include "cg/type.h" 4 #include "core/metrics.h" 5 #include "core/pool.h" 6 #include "opt/opt_internal.h" 7 8 #undef Operand 9 #undef CGParamDesc 10 #undef CGCallDesc 11 #undef CGFuncDesc 12 #undef CGLocalStorage 13 #undef CGABIValue 14 #undef CGABIPart 15 #undef CGCallPlan 16 #undef CGCallPlanMove 17 #undef CGCallPlanRet 18 #undef CGScopeDesc 19 20 typedef struct NativeEmitCtx { 21 Compiler* c; 22 Func* f; 23 NativeTarget* target; 24 NativeFrameSlot* slot_map; 25 MCLabel* labels; 26 u8* label_placed; 27 ObjSecId local_static_sec; 28 ObjSymId local_static_sym; 29 u32 local_static_base; 30 u32 local_static_size; 31 u8 local_static_active; 32 /* Set by emit_block for the IR_RET that is the last inst of the last block 33 * in emit_order. emit_ret consults it to skip the trailing 34 * branch-to-epilogue: func_end places the epilogue label at the very next 35 * position, so the branch would just jump to the next 4 bytes. */ 36 u8 emitting_terminal_ret; 37 } NativeEmitCtx; 38 39 static _Noreturn void emit_panic(NativeEmitCtx* e, SrcLoc loc, 40 const char* msg) { 41 compiler_panic(e->c, loc, "opt native emit: %s", msg); 42 } 43 44 static void emit_local_static_begin(NativeEmitCtx* e, 45 const CGLocalStaticDataDesc* desc, 46 SrcLoc loc) { 47 Sym name; 48 SecKind kind; 49 u16 flags; 50 u32 align; 51 if (!desc) emit_panic(e, loc, "missing local static data descriptor"); 52 if (e->local_static_active) emit_panic(e, loc, "nested local static data"); 53 if (desc->attrs.section) { 54 name = (Sym)desc->attrs.section; 55 kind = 56 (desc->attrs.flags & KIT_CG_DATADEF_READONLY) ? SEC_RODATA : SEC_DATA; 57 flags = (desc->attrs.flags & KIT_CG_DATADEF_READONLY) 58 ? SF_ALLOC 59 : (SF_ALLOC | SF_WRITE); 60 } else if (desc->attrs.flags & KIT_CG_DATADEF_READONLY) { 61 name = pool_intern_slice(e->c->global, SLICE_LIT(".rodata")); 62 kind = SEC_RODATA; 63 flags = SF_ALLOC; 64 } else { 65 name = pool_intern_slice(e->c->global, SLICE_LIT(".data")); 66 kind = SEC_DATA; 67 flags = SF_ALLOC | SF_WRITE; 68 } 69 align = desc->align ? desc->align : 1u; 70 e->local_static_sec = obj_section(e->target->obj, name, kind, flags, align); 71 e->local_static_base = 72 obj_align_to(e->target->obj, e->local_static_sec, align); 73 e->local_static_size = 0; 74 e->local_static_sym = desc->sym; 75 e->local_static_active = 1; 76 } 77 78 static void emit_local_static_write(NativeEmitCtx* e, const u8* data, u64 len, 79 SrcLoc loc) { 80 u8 zero[64]; 81 u64 orig_len = len; 82 if (!e->local_static_active) emit_panic(e, loc, "local static data inactive"); 83 if (!len) return; 84 if (data) { 85 obj_write(e->target->obj, e->local_static_sec, data, (size_t)len); 86 } else { 87 memset(zero, 0, sizeof zero); 88 while (len >= sizeof zero) { 89 obj_write(e->target->obj, e->local_static_sec, zero, sizeof zero); 90 len -= sizeof zero; 91 } 92 if (len) obj_write(e->target->obj, e->local_static_sec, zero, (size_t)len); 93 } 94 e->local_static_size += (u32)orig_len; 95 } 96 97 static void emit_local_static_label_addr(NativeEmitCtx* e, MCLabel target, 98 i64 addend, u32 width, SrcLoc loc) { 99 u8 zero[8]; 100 u32 off; 101 RelocKind kind; 102 if (!e->local_static_active) emit_panic(e, loc, "local static data inactive"); 103 /* A jump-table / label-address slot is one target pointer wide: 8 bytes 104 * (R_ABS64) on a 64-bit target, 4 bytes (R_ABS32) on rv32/ELFCLASS32. */ 105 if (width == 8u) 106 kind = R_ABS64; 107 else if (width == 4u) 108 kind = R_ABS32; 109 else { 110 emit_panic(e, loc, "unsupported local static label width"); 111 return; 112 } 113 memset(zero, 0, sizeof zero); 114 off = e->local_static_base + e->local_static_size; 115 obj_write(e->target->obj, e->local_static_sec, zero, width); 116 e->target->mc->emit_label_data_reloc(e->target->mc, e->local_static_sec, off, 117 target, kind, width, addend); 118 e->local_static_size += width; 119 } 120 121 static void emit_local_static_end(NativeEmitCtx* e, SrcLoc loc) { 122 if (!e->local_static_active) emit_panic(e, loc, "local static data inactive"); 123 obj_symbol_define_live(e->target->obj, e->local_static_sym, 124 e->local_static_sec, e->local_static_base, 125 e->local_static_size); 126 e->local_static_active = 0; 127 e->local_static_sec = OBJ_SEC_NONE; 128 e->local_static_sym = OBJ_SYM_NONE; 129 e->local_static_base = 0; 130 e->local_static_size = 0; 131 } 132 133 static u32 type_size_or(Compiler* c, KitCgTypeId type, u32 fallback) { 134 u64 n = type ? cg_type_size(c, type) : 0u; 135 if (!n || n > 0xffffffffull) return fallback; 136 return (u32)n; 137 } 138 139 static u32 type_align_or(Compiler* c, KitCgTypeId type, u32 fallback) { 140 u64 n = type ? cg_type_align(c, type) : 0u; 141 if (!n || n > 0xffffffffull) return fallback; 142 return (u32)n; 143 } 144 145 static MemAccess mem_for_type(Compiler* c, KitCgTypeId type) { 146 MemAccess mem; 147 memset(&mem, 0, sizeof mem); 148 mem.type = type; 149 mem.size = type_size_or(c, type, 8u); 150 mem.align = type_align_or(c, type, mem.size >= 8u ? 8u : mem.size); 151 return mem; 152 } 153 154 static NativeAllocClass class_for_type(NativeEmitCtx* e, KitCgTypeId type) { 155 if (e->target->class_for_type) 156 return e->target->class_for_type(e->target, type); 157 return cg_type_is_float(e->c, type) ? NATIVE_REG_FP : NATIVE_REG_INT; 158 } 159 160 static NativeLoc loc_none(void) { 161 NativeLoc loc; 162 memset(&loc, 0, sizeof loc); 163 return loc; 164 } 165 166 static NativeLoc loc_reg(KitCgTypeId type, NativeAllocClass cls, Reg reg) { 167 NativeLoc loc; 168 memset(&loc, 0, sizeof loc); 169 loc.kind = NATIVE_LOC_REG; 170 loc.cls = (u8)cls; 171 loc.type = type; 172 loc.v.reg = reg; 173 return loc; 174 } 175 176 static NativeLoc loc_frame(KitCgTypeId type, NativeAllocClass cls, 177 NativeFrameSlot slot) { 178 NativeLoc loc; 179 memset(&loc, 0, sizeof loc); 180 loc.kind = NATIVE_LOC_FRAME; 181 loc.cls = (u8)cls; 182 loc.type = type; 183 loc.v.frame = slot; 184 return loc; 185 } 186 187 static NativeLoc loc_imm(KitCgTypeId type, i64 imm) { 188 NativeLoc loc; 189 memset(&loc, 0, sizeof loc); 190 loc.kind = NATIVE_LOC_IMM; 191 loc.cls = NATIVE_REG_INT; 192 loc.type = type; 193 loc.v.imm = imm; 194 return loc; 195 } 196 197 static NativeLoc loc_global(KitCgTypeId type, ObjSymId sym, i64 addend) { 198 NativeLoc loc; 199 memset(&loc, 0, sizeof loc); 200 loc.kind = NATIVE_LOC_GLOBAL; 201 loc.cls = NATIVE_REG_INT; 202 loc.type = type; 203 loc.v.global.sym = sym; 204 loc.v.global.addend = addend; 205 return loc; 206 } 207 208 static int loc_same_frame(NativeLoc a, NativeLoc b) { 209 return a.kind == NATIVE_LOC_FRAME && b.kind == NATIVE_LOC_FRAME && 210 a.v.frame == b.v.frame; 211 } 212 213 static Reg scratch_reg(NativeEmitCtx* e, NativeAllocClass cls, Reg a, Reg b, 214 SrcLoc loc) { 215 u32 c = (u32)cls; 216 if (c < OPT_REG_CLASSES) { 217 for (u32 i = 0; i < e->f->opt_scratch_reg_count[c]; ++i) { 218 Reg r = e->f->opt_scratch_regs[c][i]; 219 if (r != a && r != b) return r; 220 } 221 } 222 emit_panic(e, loc, "no scratch register for native emission"); 223 } 224 225 static int scratch_available(NativeEmitCtx* e, NativeAllocClass cls, Reg a, 226 Reg b) { 227 u32 c = (u32)cls; 228 if (c < OPT_REG_CLASSES) { 229 for (u32 i = 0; i < e->f->opt_scratch_reg_count[c]; ++i) { 230 Reg r = e->f->opt_scratch_regs[c][i]; 231 if (r != a && r != b) return 1; 232 } 233 } 234 return 0; 235 } 236 237 static NativeLoc scratch_loc(NativeEmitCtx* e, KitCgTypeId type, 238 NativeAllocClass cls, Reg a, Reg b, SrcLoc loc) { 239 return loc_reg(type, cls, scratch_reg(e, cls, a, b, loc)); 240 } 241 242 static NativeFrameSlot map_slot(NativeEmitCtx* e, NativeFrameSlot slot, 243 SrcLoc loc) { 244 if (slot == NATIVE_FRAME_SLOT_NONE) return NATIVE_FRAME_SLOT_NONE; 245 if (slot > e->f->nframe_slots) emit_panic(e, loc, "bad frame slot"); 246 if (!e->slot_map[slot]) emit_panic(e, loc, "unmapped frame slot"); 247 return e->slot_map[slot]; 248 } 249 250 static MCLabel ensure_label(NativeEmitCtx* e, u32 block, SrcLoc loc) { 251 if (block >= e->f->nblocks) emit_panic(e, loc, "bad block label"); 252 if (e->labels[block] == MC_LABEL_NONE) 253 e->labels[block] = e->target->label_new(e->target); 254 return e->labels[block]; 255 } 256 257 static NativeAddr addr_from_loc(NativeEmitCtx* e, NativeLoc loc, 258 SrcLoc src_loc) { 259 NativeAddr addr; 260 memset(&addr, 0, sizeof addr); 261 addr.base_type = loc.type; 262 switch ((NativeLocKind)loc.kind) { 263 case NATIVE_LOC_FRAME: 264 addr.base_kind = NATIVE_ADDR_BASE_FRAME; 265 addr.base.frame = loc.v.frame; 266 return addr; 267 case NATIVE_LOC_STACK: 268 addr.base_kind = NATIVE_ADDR_BASE_FRAME; 269 addr.base.frame = loc.v.stack.slot; 270 addr.offset = loc.v.stack.offset; 271 return addr; 272 case NATIVE_LOC_GLOBAL: 273 addr.base_kind = NATIVE_ADDR_BASE_GLOBAL; 274 addr.base.global.sym = loc.v.global.sym; 275 addr.base.global.addend = loc.v.global.addend; 276 return addr; 277 case NATIVE_LOC_REG: 278 addr.base_kind = NATIVE_ADDR_BASE_REG; 279 addr.cls = loc.cls; 280 addr.base.reg = loc.v.reg; 281 return addr; 282 case NATIVE_LOC_ADDR: 283 return loc.v.addr; 284 default: 285 emit_panic(e, src_loc, "location is not addressable"); 286 } 287 } 288 289 static NativeAddr addr_from_operand(NativeEmitCtx* e, const OptOperand* op, 290 SrcLoc loc) { 291 NativeAddr addr; 292 memset(&addr, 0, sizeof addr); 293 if (!op) emit_panic(e, loc, "missing address operand"); 294 addr.base_type = op->type; 295 switch ((OptOperandKind)op->kind) { 296 case OPT_OPK_LOCAL: 297 addr.base_kind = NATIVE_ADDR_BASE_FRAME; 298 addr.base.frame = map_slot(e, op->v.frame_slot, loc); 299 return addr; 300 case OPT_OPK_GLOBAL: 301 addr.base_kind = NATIVE_ADDR_BASE_GLOBAL; 302 addr.base.global.sym = op->v.global.sym; 303 addr.base.global.addend = op->v.global.addend; 304 return addr; 305 case OPT_OPK_INDIRECT: 306 addr.base_kind = NATIVE_ADDR_BASE_REG; 307 addr.cls = NATIVE_REG_INT; 308 addr.base.reg = op->v.ind.base; 309 addr.index_kind = op->v.ind.index == (Reg)REG_NONE 310 ? NATIVE_ADDR_INDEX_NONE 311 : NATIVE_ADDR_INDEX_REG; 312 addr.index_cls = NATIVE_REG_INT; 313 addr.index.reg = op->v.ind.index; 314 addr.log2_scale = op->v.ind.log2_scale; 315 addr.offset = op->v.ind.ofs; 316 return addr; 317 case OPT_OPK_REG: 318 addr.base_kind = NATIVE_ADDR_BASE_REG; 319 addr.cls = op->cls; 320 addr.base.reg = op->v.reg; 321 return addr; 322 default: 323 emit_panic(e, loc, "operand is not addressable"); 324 } 325 } 326 327 static NativeAddr pointer_addr_from_operand(NativeEmitCtx* e, 328 const OptOperand* op, SrcLoc loc, 329 Reg avoid_a, Reg avoid_b) { 330 NativeAddr addr; 331 memset(&addr, 0, sizeof addr); 332 if (!op) emit_panic(e, loc, "missing pointer operand"); 333 addr.base_type = op->type; 334 switch ((OptOperandKind)op->kind) { 335 case OPT_OPK_LOCAL: { 336 NativeAddr frame; 337 NativeLoc dst; 338 NativeAllocClass cls; 339 Reg r; 340 /* An OPK_LOCAL in a pointer-address position is ambiguous. When the 341 * operand's type is a pointer, the local *holds* the pointer value and 342 * must be loaded to get the address. Otherwise the local *is* the 343 * aggregate storage and its frame home is the address directly — loading 344 * it would dereference the aggregate's first 8 bytes as a pointer (e.g. 345 * an `__int128` call result copied by `agg_copy`). Mirrors the 346 * single-pass path's nd_addr_pointer. */ 347 if (!cg_type_is_ptr(e->c, op->type)) { 348 addr.base_kind = NATIVE_ADDR_BASE_FRAME; 349 addr.base.frame = map_slot(e, op->v.frame_slot, loc); 350 return addr; 351 } 352 cls = class_for_type(e, op->type); 353 r = scratch_reg(e, cls, avoid_a, avoid_b, loc); 354 memset(&frame, 0, sizeof frame); 355 frame.base_kind = NATIVE_ADDR_BASE_FRAME; 356 frame.base.frame = map_slot(e, op->v.frame_slot, loc); 357 frame.base_type = op->type; 358 dst = loc_reg(op->type, cls, r); 359 e->target->load(e->target, dst, frame, mem_for_type(e->c, op->type)); 360 addr.base_kind = NATIVE_ADDR_BASE_REG; 361 addr.cls = (u8)cls; 362 addr.base.reg = r; 363 return addr; 364 } 365 case OPT_OPK_GLOBAL: 366 addr.base_kind = NATIVE_ADDR_BASE_GLOBAL; 367 addr.base.global.sym = op->v.global.sym; 368 addr.base.global.addend = op->v.global.addend; 369 return addr; 370 case OPT_OPK_INDIRECT: 371 return addr_from_operand(e, op, loc); 372 case OPT_OPK_REG: 373 addr.base_kind = NATIVE_ADDR_BASE_REG; 374 addr.cls = op->cls; 375 addr.base.reg = op->v.reg; 376 return addr; 377 default: 378 emit_panic(e, loc, "operand is not a pointer address"); 379 } 380 } 381 382 static Reg addr_base_reg(const NativeAddr* addr) { 383 return addr && addr->base_kind == NATIVE_ADDR_BASE_REG ? addr->base.reg 384 : REG_NONE; 385 } 386 387 static Reg addr_index_reg(const NativeAddr* addr) { 388 return addr && addr->index_kind == NATIVE_ADDR_INDEX_REG ? addr->index.reg 389 : REG_NONE; 390 } 391 392 static void collapse_addr_to_reg(NativeEmitCtx* e, NativeAddr* addr, 393 SrcLoc loc) { 394 /* Materialize the full address into a reserved scratch register. We must not 395 * reuse the base register as the destination: the register allocator may keep 396 * that value live past this memory op (e.g. a pointer stored into several of 397 * its own fields and then returned), so an in-place `add base, base, #off` 398 * would corrupt it. Avoid both base and index so load_addr can still read 399 * them. */ 400 Reg r = scratch_reg(e, NATIVE_REG_INT, addr_base_reg(addr), 401 addr_index_reg(addr), loc); 402 NativeLoc dst = loc_reg(addr->base_type, NATIVE_REG_INT, r); 403 e->target->load_addr(e->target, dst, *addr); 404 memset(addr, 0, sizeof *addr); 405 addr->base_kind = NATIVE_ADDR_BASE_REG; 406 addr->cls = NATIVE_REG_INT; 407 addr->base.reg = r; 408 addr->base_type = dst.type; 409 } 410 411 /* Collapse an address the target cannot encode for this access (e.g. an 412 * index scale aarch64 cannot fold into a load/store) into a single base 413 * register via load_addr. Mirrors NativeDirectTarget's nd_addr_materialize so 414 * the O1 emit path legalizes the same address shapes as direct -O0 emission. */ 415 static void legalize_addr(NativeEmitCtx* e, NativeAddr* addr, MemAccess mem, 416 SrcLoc loc) { 417 if (e->target->addr_legal && !e->target->addr_legal(e->target, addr, mem)) 418 collapse_addr_to_reg(e, addr, loc); 419 } 420 421 static NativeLoc loc_from_operand(NativeEmitCtx* e, const OptOperand* op, 422 SrcLoc loc) { 423 if (!op) return loc_none(); 424 switch ((OptOperandKind)op->kind) { 425 case OPT_OPK_REG: 426 return loc_reg(op->type, (NativeAllocClass)op->cls, op->v.reg); 427 case OPT_OPK_IMM: 428 return loc_imm(op->type, op->v.imm); 429 case OPT_OPK_GLOBAL: 430 return loc_global(op->type, op->v.global.sym, op->v.global.addend); 431 case OPT_OPK_LOCAL: 432 return loc_frame(op->type, class_for_type(e, op->type), 433 map_slot(e, op->v.frame_slot, loc)); 434 case OPT_OPK_INDIRECT: { 435 NativeLoc out = loc_none(); 436 out.kind = NATIVE_LOC_ADDR; 437 out.cls = op->cls; 438 out.type = op->type; 439 out.v.addr = addr_from_operand(e, op, loc); 440 return out; 441 } 442 } 443 emit_panic(e, loc, "bad operand kind"); 444 } 445 446 static NativeLoc materialize(NativeEmitCtx* e, NativeLoc src, 447 NativeAllocClass cls, KitCgTypeId type, 448 Reg avoid_a, Reg avoid_b, SrcLoc loc) { 449 NativeLoc dst; 450 NativeAddr addr; 451 MemAccess mem; 452 if (src.kind == NATIVE_LOC_REG) return src; 453 dst = scratch_loc(e, type ? type : src.type, cls, avoid_a, avoid_b, loc); 454 switch ((NativeLocKind)src.kind) { 455 case NATIVE_LOC_IMM: 456 e->target->load_imm(e->target, dst, src.v.imm); 457 return dst; 458 case NATIVE_LOC_GLOBAL: 459 addr = addr_from_loc(e, src, loc); 460 e->target->load_addr(e->target, dst, addr); 461 return dst; 462 case NATIVE_LOC_FRAME: 463 case NATIVE_LOC_STACK: 464 case NATIVE_LOC_ADDR: 465 addr = addr_from_loc(e, src, loc); 466 mem = mem_for_type(e->c, dst.type); 467 e->target->load(e->target, dst, addr, mem); 468 return dst; 469 default: 470 emit_panic(e, loc, "cannot materialize location"); 471 } 472 } 473 474 static void write_loc(NativeEmitCtx* e, NativeLoc dst, NativeLoc src, 475 MemAccess mem, SrcLoc loc) { 476 NativeAddr addr; 477 NativeLoc tmp; 478 if (dst.kind == NATIVE_LOC_NONE) return; 479 if (loc_same_frame(dst, src)) return; 480 if (dst.kind == NATIVE_LOC_REG) { 481 if (src.kind == NATIVE_LOC_REG) { 482 if (dst.v.reg != src.v.reg || dst.cls != src.cls) 483 e->target->move(e->target, dst, src); 484 return; 485 } 486 /* An immediate goes straight into the destination register; routing it 487 * through a scratch and then moving would cost an extra instruction. */ 488 if (src.kind == NATIVE_LOC_IMM) { 489 e->target->load_imm(e->target, dst, src.v.imm); 490 return; 491 } 492 tmp = materialize(e, src, (NativeAllocClass)dst.cls, dst.type, dst.v.reg, 493 REG_NONE, loc); 494 if (tmp.v.reg != dst.v.reg || tmp.cls != dst.cls) 495 e->target->move(e->target, dst, tmp); 496 return; 497 } 498 addr = addr_from_loc(e, dst, loc); 499 if (src.kind != NATIVE_LOC_REG) 500 src = materialize(e, src, (NativeAllocClass)dst.cls, dst.type, REG_NONE, 501 REG_NONE, loc); 502 e->target->store(e->target, addr, src, mem); 503 } 504 505 /* For an arithmetic / compare source operand: keep it as an immediate when it 506 * is a constant the target can encode for `use` (so no register is wasted 507 * materializing it); otherwise materialize into a register. */ 508 static NativeLoc operand_imm_or_reg(NativeEmitCtx* e, const OptOperand* op, 509 NativeImmUse use, u32 sub, Reg avoid_a, 510 Reg avoid_b, SrcLoc loc) { 511 if (op->kind == OPK_IMM && e->target->imm_legal && 512 e->target->imm_legal(e->target, use, sub, op->type, op->v.imm)) 513 return loc_imm(op->type, op->v.imm); 514 return materialize(e, loc_from_operand(e, op, loc), 515 class_for_type(e, op->type), op->type, avoid_a, avoid_b, 516 loc); 517 } 518 519 static Reg loc_avoid_reg(NativeLoc l) { 520 return l.kind == NATIVE_LOC_REG ? l.v.reg : REG_NONE; 521 } 522 523 static int type_is_aggregate_or_large(NativeEmitCtx* e, KitCgTypeId type) { 524 /* "Large" = wider than one machine word (ptr_size): such a value cannot move 525 * through a single register, so IR_COPY/IR_LOAD/IR_STORE of it must go through 526 * copy_bytes. 8 on rv64/x64/aa64, 4 on rv32 (so an 8-byte i64/double is large 527 * there and is copied as two words rather than truncated into one register). */ 528 return type && (cg_type_is_aggregate(e->c, type) || 529 type_size_or(e->c, type, 8u) > e->c->target.ptr_size); 530 } 531 532 /* Copy an aggregate / oversized value between two memory locations. dst and 533 * src must be addressable (frame/global/indirect/reg-as-pointer); used for 534 * IR_COPY/IR_LOAD/IR_STORE whose value type cannot move through one register. 535 */ 536 static void emit_agg_move(NativeEmitCtx* e, NativeAddr da, NativeAddr sa, 537 KitCgTypeId type) { 538 AggregateAccess acc; 539 memset(&acc, 0, sizeof acc); 540 acc.type = type; 541 acc.size = type_size_or(e->c, type, 8u); 542 acc.align = type_align_or(e->c, type, 8u); 543 acc.mem = mem_for_type(e->c, type); 544 e->target->copy_bytes(e->target, da, sa, acc); 545 } 546 547 static CGFuncDesc semantic_func_desc(NativeEmitCtx* e) { 548 OptCGFuncDesc* in = &e->f->desc; 549 CGFuncDesc out; 550 memset(&out, 0, sizeof out); 551 out.sym = in->sym; 552 out.text_section_id = in->text_section_id; 553 out.group_id = in->group_id; 554 out.fn_type = in->fn_type; 555 out.result_type = in->result_type; 556 out.nparams = in->nparams; 557 out.loc = in->loc; 558 out.flags = in->flags; 559 out.inline_policy = in->inline_policy; 560 out.atomize = in->atomize; 561 if (in->nparams && in->params) { 562 CGParamDesc* params = arena_zarray(e->f->arena, CGParamDesc, in->nparams); 563 for (u32 i = 0; i < in->nparams; ++i) { 564 params[i].index = in->params[i].index; 565 params[i].name = in->params[i].name; 566 params[i].type = in->params[i].type; 567 params[i].size = in->params[i].size; 568 params[i].align = in->params[i].align; 569 params[i].flags = in->params[i].flags; 570 params[i].loc = in->params[i].loc; 571 } 572 out.params = params; 573 } 574 return out; 575 } 576 577 static CGParamDesc semantic_param_desc(const IRParam* p) { 578 CGParamDesc out; 579 memset(&out, 0, sizeof out); 580 out.index = p->index; 581 out.name = p->name; 582 out.type = p->type; 583 out.size = p->size; 584 out.align = p->align; 585 out.flags = p->flags; 586 out.loc = p->loc; 587 return out; 588 } 589 590 static NativeLoc loc_for_preg(NativeEmitCtx* e, PReg preg, KitCgTypeId type, 591 SrcLoc loc) { 592 u8 kind = opt_preg_alloc_kind(e->f, preg); 593 if (kind == OPT_ALLOC_HARD) 594 return loc_reg(type, (NativeAllocClass)opt_preg_loc_cls(e->f, preg), 595 opt_preg_hard_reg(e->f, preg)); 596 if (kind == OPT_ALLOC_SPILL) 597 return loc_frame(type, class_for_type(e, type), 598 map_slot(e, opt_preg_spill_slot(e->f, preg), loc)); 599 return loc_none(); 600 } 601 602 static void bind_params(NativeEmitCtx* e) { 603 for (u32 i = 0; i < e->f->nparams; ++i) { 604 IRParam* p = &e->f->params[i]; 605 CGParamDesc sd = semantic_param_desc(p); 606 NativeLoc dst; 607 if (p->storage.kind == CG_LOCAL_STORAGE_REG) 608 dst = loc_for_preg(e, (PReg)p->storage.v.reg, p->type, p->loc); 609 else 610 dst = loc_frame(p->type, class_for_type(e, p->type), 611 map_slot(e, p->storage.v.frame_slot, p->loc)); 612 if (e->target->bind_param) e->target->bind_param(e->target, &sd, dst); 613 } 614 /* Let a backend that defers register-destination binds resolve them now (as a 615 * parallel copy), once every param's incoming location has been read. */ 616 if (e->target->bind_params_end) e->target->bind_params_end(e->target); 617 } 618 619 /* The parameter value is placed into its allocated location by bind_param at 620 * function entry; the IR_PARAM_DECL marker emits nothing. */ 621 static void emit_param_decl(NativeEmitCtx* e, Inst* in) { 622 (void)e; 623 (void)in; 624 } 625 626 static NativeFrameSlot temp_slot(NativeEmitCtx* e, KitCgTypeId type, SrcLoc loc, 627 NativeFrameSlotKind kind) { 628 NativeFrameSlotDesc d; 629 memset(&d, 0, sizeof d); 630 d.type = type; 631 d.loc = loc; 632 d.size = type_size_or(e->c, type, 8u); 633 d.align = type_align_or(e->c, type, d.size >= 8u ? 8u : d.size); 634 d.kind = kind; 635 return e->target->frame_slot(e->target, &d); 636 } 637 638 static NativeLoc abi_storage_loc(NativeEmitCtx* e, const OptCGABIValue* v, 639 SrcLoc loc) { 640 if (!v) return loc_none(); 641 return loc_from_operand(e, &v->storage, loc); 642 } 643 644 static void emit_call(NativeEmitCtx* e, Inst* in) { 645 IRCallAux* aux = (IRCallAux*)in->extra.aux; 646 NativeCallDesc d; 647 NativeCallPlan plan; 648 NativeLoc* args = NULL; 649 NativeLoc* results = NULL; 650 NativeLoc final_result = loc_none(); 651 NativeFrameSlot result_slot = NATIVE_FRAME_SLOT_NONE; 652 MemAccess result_mem; 653 if (!aux) return; 654 memset(&d, 0, sizeof d); 655 memset(&plan, 0, sizeof plan); 656 if (aux->desc.nargs) 657 args = arena_zarray(e->f->arena, NativeLoc, aux->desc.nargs); 658 for (u32 i = 0; i < aux->desc.nargs; ++i) 659 args[i] = abi_storage_loc(e, &aux->desc.args[i], in->loc); 660 if (aux->desc.ret.storage.kind) { 661 KitCgTypeId rty = aux->desc.ret.type; 662 results = arena_zarray(e->f->arena, NativeLoc, 1); 663 final_result = abi_storage_loc(e, &aux->desc.ret, in->loc); 664 /* Hand plan_call the value's real destination directly whenever it is a 665 * register or a frame slot: a scalar result is a single move out of the ABI 666 * result register, and an aggregate / oversized result — which plan_call or 667 * the callee writes in parts and so must land in memory — lands straight in 668 * its frame home. Routing either through a fresh temp slot (store then 669 * reload / copy_bytes) was a pure round trip on every call. The temp slot 670 * is a fallback for the rare result whose storage is neither a register nor 671 * a frame slot (e.g. written into a global); lowering hoists aggregates to 672 * a frame home (opt_lower_to_mir), so this branch is scalar-only in 673 * practice. */ 674 if (final_result.kind == NATIVE_LOC_REG || 675 final_result.kind == NATIVE_LOC_FRAME) { 676 results[0] = final_result; 677 } else { 678 result_slot = temp_slot(e, rty, in->loc, NATIVE_FRAME_SLOT_SPILL); 679 results[0] = loc_frame(rty, class_for_type(e, rty), result_slot); 680 } 681 } 682 d.fn_type = aux->desc.fn_type; 683 d.callee = loc_from_operand(e, &aux->desc.callee, in->loc); 684 d.args = args; 685 d.results = results; 686 d.nargs = aux->desc.nargs; 687 d.nresults = results ? 1u : 0u; 688 d.flags = aux->desc.flags; 689 d.tail_policy = aux->desc.tail_policy; 690 d.inline_policy = aux->desc.inline_policy; 691 e->target->plan_call(e->target, &d, &plan); 692 for (u32 i = 0; i < plan.nargs; ++i) 693 write_loc(e, plan.args[i].dst, plan.args[i].src, plan.args[i].mem, in->loc); 694 if (plan.callee.kind != NATIVE_LOC_REG && 695 plan.callee.kind != NATIVE_LOC_GLOBAL) 696 plan.callee = materialize(e, plan.callee, NATIVE_REG_INT, plan.callee.type, 697 REG_NONE, REG_NONE, in->loc); 698 e->target->emit_call(e->target, &plan); 699 for (u32 i = 0; i < plan.nrets; ++i) 700 write_loc(e, plan.rets[i].dst, plan.rets[i].src, plan.rets[i].mem, in->loc); 701 if (result_slot && final_result.kind != NATIVE_LOC_NONE) { 702 KitCgTypeId rty = aux->desc.ret.type; 703 NativeLoc tmp = loc_frame(rty, class_for_type(e, rty), result_slot); 704 result_mem = mem_for_type(e->c, rty); 705 if (final_result.kind != NATIVE_LOC_REG && 706 (cg_type_is_aggregate(e->c, rty) || 707 type_size_or(e->c, rty, 8u) > e->c->target.ptr_size)) { 708 /* Aggregate / oversized result: move bytes rather than a scalar copy 709 * (which would exceed the single-register width). The result was either 710 * written in parts by plan_call's rets, or by the callee via the sret 711 * pointer; either way it now lives in the temp slot. */ 712 AggregateAccess acc; 713 NativeAddr da = addr_from_loc(e, final_result, in->loc); 714 NativeAddr sa = addr_from_loc(e, tmp, in->loc); 715 memset(&acc, 0, sizeof acc); 716 acc.type = rty; 717 acc.size = type_size_or(e->c, rty, 8u); 718 acc.align = type_align_or(e->c, rty, 8u); 719 acc.mem = result_mem; 720 e->target->copy_bytes(e->target, da, sa, acc); 721 } else { 722 write_loc(e, final_result, tmp, result_mem, in->loc); 723 } 724 } 725 } 726 727 static void emit_ret(NativeEmitCtx* e, Inst* in, const CGFuncDesc* fd) { 728 IRRetAux* aux = (IRRetAux*)in->extra.aux; 729 NativeLoc value = loc_none(); 730 const NativeLoc* values = NULL; 731 NativeCallPlanRet* rets = NULL; 732 u32 nrets = 0; 733 if (aux && aux->present) { 734 /* Hand plan_ret the value's location directly. For an aggregate / oversized 735 * result it is a memory location (plan_ret copies to the sret pointer or 736 * reads parts into the return registers); for a scalar it is the value's 737 * register or slot, which plan_ret moves into the return register. The old 738 * code spilled scalars to a fresh slot and reloaded them, a pure round 739 * trip on every return. */ 740 value = abi_storage_loc(e, &aux->val, in->loc); 741 values = &value; 742 } 743 e->target->plan_ret(e->target, fd, values, &rets, &nrets); 744 for (u32 i = 0; i < nrets; ++i) 745 write_loc(e, rets[i].dst, rets[i].src, rets[i].mem, in->loc); 746 /* Skip the trailing branch-to-epilogue when this IR_RET is the very last 747 * inst emitted: func_end will place the epilogue label at mc->pos right 748 * after this, so the branch would jump to the next 4 bytes. The actual 749 * `ret` instruction lives in func_end's restore-frame sequence and is 750 * unaffected. */ 751 if (!e->emitting_terminal_ret) e->target->ret(e->target); 752 } 753 754 static void emit_inst(NativeEmitCtx* e, u32 block, u32 order_index, Inst* in, 755 const CGFuncDesc* fd) { 756 NativeLoc dst, a, b, src, tmp; 757 NativeAddr addr, addr2; 758 Reg dst_reg; 759 (void)block; 760 if (e->target->set_loc) e->target->set_loc(e->target, in->loc); 761 switch ((IROp)in->op) { 762 case IR_NOP: 763 case IR_CONST_I: 764 case IR_CONST_BYTES: 765 case IR_PHI: 766 case IR_SCOPE_BEGIN: 767 case IR_SCOPE_END: 768 return; 769 case IR_PARAM_DECL: 770 emit_param_decl(e, in); 771 return; 772 case IR_LOAD_IMM: 773 dst = loc_from_operand(e, &in->opnds[0], in->loc); 774 write_loc(e, dst, loc_imm(in->opnds[0].type, in->extra.imm), 775 mem_for_type(e->c, in->opnds[0].type), in->loc); 776 return; 777 case IR_LOAD_CONST: 778 dst = loc_from_operand(e, &in->opnds[0], in->loc); 779 if (dst.kind != NATIVE_LOC_REG) 780 dst = materialize(e, dst, class_for_type(e, in->opnds[0].type), 781 in->opnds[0].type, REG_NONE, REG_NONE, in->loc); 782 e->target->load_const(e->target, dst, in->extra.cbytes); 783 return; 784 case IR_COPY: 785 if (type_is_aggregate_or_large(e, in->opnds[0].type)) { 786 emit_agg_move(e, addr_from_operand(e, &in->opnds[0], in->loc), 787 addr_from_operand(e, &in->opnds[1], in->loc), 788 in->opnds[0].type); 789 return; 790 } 791 dst = loc_from_operand(e, &in->opnds[0], in->loc); 792 src = loc_from_operand(e, &in->opnds[1], in->loc); 793 write_loc(e, dst, src, mem_for_type(e->c, in->opnds[0].type), in->loc); 794 return; 795 case IR_LOAD: 796 if (type_is_aggregate_or_large(e, in->opnds[0].type)) { 797 addr = addr_from_operand(e, &in->opnds[1], in->loc); 798 emit_agg_move(e, addr_from_operand(e, &in->opnds[0], in->loc), addr, 799 in->opnds[0].type); 800 return; 801 } 802 dst = loc_from_operand(e, &in->opnds[0], in->loc); 803 addr = addr_from_operand(e, &in->opnds[1], in->loc); 804 legalize_addr(e, &addr, in->extra.mem, in->loc); 805 if (dst.kind == NATIVE_LOC_REG) { 806 e->target->load(e->target, dst, addr, in->extra.mem); 807 } else { 808 if (!scratch_available(e, class_for_type(e, in->opnds[0].type), 809 addr_base_reg(&addr), addr_index_reg(&addr))) 810 collapse_addr_to_reg(e, &addr, in->loc); 811 tmp = scratch_loc(e, in->opnds[0].type, 812 class_for_type(e, in->opnds[0].type), 813 addr_base_reg(&addr), addr_index_reg(&addr), in->loc); 814 e->target->load(e->target, tmp, addr, in->extra.mem); 815 write_loc(e, dst, tmp, in->extra.mem, in->loc); 816 } 817 return; 818 case IR_STORE: 819 if (type_is_aggregate_or_large(e, in->opnds[1].type)) { 820 emit_agg_move(e, addr_from_operand(e, &in->opnds[0], in->loc), 821 addr_from_operand(e, &in->opnds[1], in->loc), 822 in->opnds[1].type); 823 return; 824 } 825 addr = addr_from_operand(e, &in->opnds[0], in->loc); 826 legalize_addr(e, &addr, in->extra.mem, in->loc); 827 src = loc_from_operand(e, &in->opnds[1], in->loc); 828 /* Storing a constant 0 from the hardware zero register avoids 829 * materializing 0 into a scratch first (e.g. `strb wzr, [..]` rather than 830 * `movz w9,0; strb w9, [..]`). */ 831 if (src.kind == NATIVE_LOC_IMM && src.v.imm == 0 && 832 e->target->has_store_zero_reg && 833 class_for_type(e, in->opnds[1].type) == NATIVE_REG_INT) 834 src = loc_reg(in->opnds[1].type, NATIVE_REG_INT, 835 e->target->store_zero_reg); 836 /* Source register aliases the address base/index (e.g. `*p = (T)p`). 837 * Collapse the address into a scratch register: collapse_addr_to_reg 838 * selects a scratch distinct from both base and index — hence distinct 839 * from `src` — so the store reads `src` and writes through the fresh 840 * scratch with no alias. This stays entirely in registers; the frame is 841 * fully planned before emission, so emit never allocates a slot here. */ 842 if (src.kind == NATIVE_LOC_REG && (src.v.reg == addr_base_reg(&addr) || 843 src.v.reg == addr_index_reg(&addr))) 844 collapse_addr_to_reg(e, &addr, in->loc); 845 if (src.kind != NATIVE_LOC_REG) { 846 if (!scratch_available(e, class_for_type(e, in->opnds[1].type), 847 addr_base_reg(&addr), addr_index_reg(&addr))) 848 collapse_addr_to_reg(e, &addr, in->loc); 849 src = materialize(e, src, class_for_type(e, in->opnds[1].type), 850 in->opnds[1].type, addr_base_reg(&addr), 851 addr_index_reg(&addr), in->loc); 852 } 853 e->target->store(e->target, addr, src, in->extra.mem); 854 return; 855 case IR_ADDR_OF: { 856 NativeLoc real = loc_from_operand(e, &in->opnds[0], in->loc); 857 addr = addr_from_operand(e, &in->opnds[1], in->loc); 858 dst = real; 859 if (dst.kind != NATIVE_LOC_REG) 860 dst = scratch_loc(e, in->opnds[0].type, 861 class_for_type(e, in->opnds[0].type), REG_NONE, 862 REG_NONE, in->loc); 863 e->target->load_addr(e->target, dst, addr); 864 if (real.kind != NATIVE_LOC_REG) 865 write_loc(e, real, dst, mem_for_type(e->c, in->opnds[0].type), in->loc); 866 return; 867 } 868 case IR_TLS_ADDR_OF: { 869 IRTlsAux* aux = (IRTlsAux*)in->extra.aux; 870 dst = loc_from_operand(e, &in->opnds[0], in->loc); 871 if (dst.kind != NATIVE_LOC_REG) 872 dst = materialize(e, dst, NATIVE_REG_INT, in->opnds[0].type, REG_NONE, 873 REG_NONE, in->loc); 874 e->target->tls_addr_of(e->target, dst, aux->sym, aux->addend); 875 return; 876 } 877 case IR_AGG_COPY: { 878 IRAggAux* aux = (IRAggAux*)in->extra.aux; 879 addr = pointer_addr_from_operand(e, &in->opnds[0], in->loc, REG_NONE, 880 REG_NONE); 881 addr2 = pointer_addr_from_operand( 882 e, &in->opnds[1], in->loc, 883 addr.base_kind == NATIVE_ADDR_BASE_REG ? addr.base.reg : REG_NONE, 884 REG_NONE); 885 e->target->copy_bytes(e->target, addr, addr2, aux->access); 886 return; 887 } 888 case IR_AGG_SET: { 889 IRAggAux* aux = (IRAggAux*)in->extra.aux; 890 addr = pointer_addr_from_operand(e, &in->opnds[0], in->loc, REG_NONE, 891 REG_NONE); 892 src = loc_from_operand(e, &in->opnds[1], in->loc); 893 if (src.kind != NATIVE_LOC_REG) { 894 if (!scratch_available(e, NATIVE_REG_INT, addr_base_reg(&addr), 895 addr_index_reg(&addr))) 896 collapse_addr_to_reg(e, &addr, in->loc); 897 src = materialize(e, src, NATIVE_REG_INT, in->opnds[1].type, 898 addr_base_reg(&addr), addr_index_reg(&addr), in->loc); 899 } 900 e->target->set_bytes(e->target, addr, src, aux->access); 901 return; 902 } 903 case IR_BITFIELD_LOAD: { 904 IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; 905 dst = loc_from_operand(e, &in->opnds[0], in->loc); 906 addr = addr_from_operand(e, &in->opnds[1], in->loc); 907 if (dst.kind != NATIVE_LOC_REG) 908 dst = materialize(e, dst, class_for_type(e, in->opnds[0].type), 909 in->opnds[0].type, REG_NONE, REG_NONE, in->loc); 910 e->target->bitfield_load(e->target, dst, addr, aux->access); 911 return; 912 } 913 case IR_BITFIELD_STORE: { 914 IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux; 915 addr = addr_from_operand(e, &in->opnds[0], in->loc); 916 src = loc_from_operand(e, &in->opnds[1], in->loc); 917 if (src.kind != NATIVE_LOC_REG) 918 src = materialize(e, src, class_for_type(e, in->opnds[1].type), 919 in->opnds[1].type, REG_NONE, REG_NONE, in->loc); 920 e->target->bitfield_store(e->target, addr, src, aux->access); 921 return; 922 } 923 case IR_BINOP: 924 dst = loc_from_operand(e, &in->opnds[0], in->loc); 925 dst_reg = dst.kind == NATIVE_LOC_REG ? dst.v.reg : REG_NONE; 926 b = loc_from_operand(e, &in->opnds[2], in->loc); 927 a = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc), 928 class_for_type(e, in->opnds[1].type), in->opnds[1].type, 929 dst_reg, loc_avoid_reg(b), in->loc); 930 b = operand_imm_or_reg(e, &in->opnds[2], NATIVE_IMM_BINOP, 931 (u32)in->extra.imm, a.v.reg, dst_reg, in->loc); 932 if (dst.kind != NATIVE_LOC_REG) 933 dst = scratch_loc(e, in->opnds[0].type, 934 class_for_type(e, in->opnds[0].type), a.v.reg, 935 loc_avoid_reg(b), in->loc); 936 e->target->binop(e->target, (BinOp)in->extra.imm, dst, a, b); 937 if (in->opnds[0].kind != OPK_REG) 938 write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, 939 mem_for_type(e->c, in->opnds[0].type), in->loc); 940 return; 941 case IR_UNOP: 942 dst = loc_from_operand(e, &in->opnds[0], in->loc); 943 dst_reg = dst.kind == NATIVE_LOC_REG ? dst.v.reg : REG_NONE; 944 a = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc), 945 class_for_type(e, in->opnds[1].type), in->opnds[1].type, 946 dst_reg, REG_NONE, in->loc); 947 if (dst.kind != NATIVE_LOC_REG) 948 dst = scratch_loc(e, in->opnds[0].type, 949 class_for_type(e, in->opnds[0].type), a.v.reg, 950 REG_NONE, in->loc); 951 e->target->unop(e->target, (UnOp)in->extra.imm, dst, a); 952 if (in->opnds[0].kind != OPK_REG) 953 write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, 954 mem_for_type(e->c, in->opnds[0].type), in->loc); 955 return; 956 case IR_CMP: 957 dst = loc_from_operand(e, &in->opnds[0], in->loc); 958 dst_reg = dst.kind == NATIVE_LOC_REG ? dst.v.reg : REG_NONE; 959 b = loc_from_operand(e, &in->opnds[2], in->loc); 960 a = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc), 961 class_for_type(e, in->opnds[1].type), in->opnds[1].type, 962 dst_reg, loc_avoid_reg(b), in->loc); 963 b = operand_imm_or_reg(e, &in->opnds[2], NATIVE_IMM_CMP, 964 (u32)in->extra.imm, a.v.reg, dst_reg, in->loc); 965 if (dst.kind != NATIVE_LOC_REG) 966 dst = scratch_loc(e, in->opnds[0].type, 967 class_for_type(e, in->opnds[0].type), a.v.reg, 968 loc_avoid_reg(b), in->loc); 969 e->target->cmp(e->target, (CmpOp)in->extra.imm, dst, a, b); 970 if (in->opnds[0].kind != OPK_REG) 971 write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, 972 mem_for_type(e->c, in->opnds[0].type), in->loc); 973 return; 974 case IR_CONVERT: 975 dst = loc_from_operand(e, &in->opnds[0], in->loc); 976 dst_reg = dst.kind == NATIVE_LOC_REG ? dst.v.reg : REG_NONE; 977 src = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc), 978 class_for_type(e, in->opnds[1].type), in->opnds[1].type, 979 dst_reg, REG_NONE, in->loc); 980 if (dst.kind != NATIVE_LOC_REG) 981 dst = scratch_loc(e, in->opnds[0].type, 982 class_for_type(e, in->opnds[0].type), src.v.reg, 983 REG_NONE, in->loc); 984 e->target->convert(e->target, (ConvKind)in->extra.imm, dst, src); 985 if (in->opnds[0].kind != OPK_REG) 986 write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, 987 mem_for_type(e->c, in->opnds[0].type), in->loc); 988 return; 989 case IR_CALL: 990 emit_call(e, in); 991 return; 992 case IR_BR: 993 e->target->jump(e->target, 994 ensure_label(e, e->f->blocks[block].succ[0], in->loc)); 995 return; 996 case IR_CMP_BRANCH: { 997 u32 next = order_index + 1u < e->f->emit_order_n 998 ? e->f->emit_order[order_index + 1u] 999 : UINT32_MAX; 1000 b = loc_from_operand(e, &in->opnds[1], in->loc); 1001 a = materialize(e, loc_from_operand(e, &in->opnds[0], in->loc), 1002 class_for_type(e, in->opnds[0].type), in->opnds[0].type, 1003 REG_NONE, loc_avoid_reg(b), in->loc); 1004 b = operand_imm_or_reg(e, &in->opnds[1], NATIVE_IMM_CMP, 1005 (u32)in->extra.imm, a.v.reg, REG_NONE, in->loc); 1006 e->target->cmp_branch( 1007 e->target, (CmpOp)in->extra.imm, a, b, 1008 ensure_label(e, e->f->blocks[block].succ[0], in->loc)); 1009 if (e->f->blocks[block].nsucc > 1u && e->f->blocks[block].succ[1] != next) 1010 e->target->jump(e->target, 1011 ensure_label(e, e->f->blocks[block].succ[1], in->loc)); 1012 return; 1013 } 1014 case IR_SWITCH: { 1015 IRSwitchAux* aux = (IRSwitchAux*)in->extra.aux; 1016 NativeLoc sel = 1017 materialize(e, loc_from_operand(e, &in->opnds[0], in->loc), 1018 class_for_type(e, in->opnds[0].type), in->opnds[0].type, 1019 REG_NONE, REG_NONE, in->loc); 1020 NativeLoc imm = 1021 scratch_loc(e, in->opnds[0].type, (NativeAllocClass)sel.cls, 1022 sel.v.reg, REG_NONE, in->loc); 1023 for (u32 i = 0; aux && i < aux->ncases; ++i) { 1024 e->target->load_imm(e->target, imm, (i64)aux->cases[i].value); 1025 e->target->cmp_branch(e->target, CMP_EQ, sel, imm, 1026 ensure_label(e, aux->cases[i].block, in->loc)); 1027 } 1028 if (aux) 1029 e->target->jump(e->target, 1030 ensure_label(e, aux->default_block, in->loc)); 1031 return; 1032 } 1033 case IR_INDIRECT_BRANCH: { 1034 IRIndirectAux* aux = (IRIndirectAux*)in->extra.aux; 1035 MCLabel* labels = aux && aux->ntargets 1036 ? arena_array(e->f->arena, MCLabel, aux->ntargets) 1037 : NULL; 1038 for (u32 i = 0; aux && i < aux->ntargets; ++i) 1039 labels[i] = ensure_label(e, aux->targets[i], in->loc); 1040 src = materialize(e, loc_from_operand(e, &in->opnds[0], in->loc), 1041 NATIVE_REG_INT, in->opnds[0].type, REG_NONE, REG_NONE, 1042 in->loc); 1043 e->target->indirect_branch(e->target, src, labels, 1044 aux ? aux->ntargets : 0u); 1045 return; 1046 } 1047 case IR_LOAD_LABEL_ADDR: 1048 dst = loc_from_operand(e, &in->opnds[0], in->loc); 1049 if (dst.kind != NATIVE_LOC_REG) 1050 dst = materialize(e, dst, NATIVE_REG_INT, in->opnds[0].type, REG_NONE, 1051 REG_NONE, in->loc); 1052 e->target->load_label_addr(e->target, dst, 1053 ensure_label(e, (u32)in->extra.imm, in->loc)); 1054 return; 1055 case IR_LOCAL_STATIC_DATA_BEGIN: { 1056 CgIrLocalStaticBeginAux* aux = (CgIrLocalStaticBeginAux*)in->extra.aux; 1057 emit_local_static_begin(e, aux ? &aux->desc : NULL, in->loc); 1058 return; 1059 } 1060 case IR_LOCAL_STATIC_DATA_WRITE: { 1061 CgIrLocalStaticWriteAux* aux = (CgIrLocalStaticWriteAux*)in->extra.aux; 1062 if (!aux) emit_panic(e, in->loc, "missing local static data write"); 1063 emit_local_static_write(e, aux->has_data ? aux->data : NULL, aux->len, 1064 in->loc); 1065 return; 1066 } 1067 case IR_LOCAL_STATIC_DATA_LABEL_ADDR: { 1068 CgIrLocalStaticLabelAux* aux = (CgIrLocalStaticLabelAux*)in->extra.aux; 1069 if (!aux) emit_panic(e, in->loc, "missing local static label data"); 1070 (void)aux->address_space; 1071 emit_local_static_label_addr(e, 1072 ensure_label(e, (u32)aux->target, in->loc), 1073 aux->addend, aux->width, in->loc); 1074 return; 1075 } 1076 case IR_LOCAL_STATIC_DATA_END: 1077 emit_local_static_end(e, in->loc); 1078 return; 1079 case IR_RET: 1080 emit_ret(e, in, fd); 1081 return; 1082 case IR_ALLOCA: 1083 dst = loc_from_operand(e, &in->opnds[0], in->loc); 1084 src = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc), 1085 NATIVE_REG_INT, in->opnds[1].type, REG_NONE, REG_NONE, 1086 in->loc); 1087 if (dst.kind != NATIVE_LOC_REG) 1088 dst = scratch_loc(e, in->opnds[0].type, NATIVE_REG_INT, src.v.reg, 1089 REG_NONE, in->loc); 1090 e->target->alloca_(e->target, dst, src, (u32)in->extra.imm); 1091 return; 1092 case IR_ATOMIC_LOAD: { 1093 IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; 1094 dst = loc_from_operand(e, &in->opnds[0], in->loc); 1095 addr = pointer_addr_from_operand(e, &in->opnds[1], in->loc, REG_NONE, 1096 REG_NONE); 1097 if (dst.kind != NATIVE_LOC_REG) 1098 dst = scratch_loc(e, in->opnds[0].type, 1099 class_for_type(e, in->opnds[0].type), REG_NONE, 1100 REG_NONE, in->loc); 1101 e->target->atomic_load(e->target, dst, addr, aux->mem, aux->mo); 1102 if (in->opnds[0].kind != OPK_REG) 1103 write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, aux->mem, 1104 in->loc); 1105 return; 1106 } 1107 case IR_ATOMIC_STORE: { 1108 IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; 1109 addr = pointer_addr_from_operand(e, &in->opnds[0], in->loc, REG_NONE, 1110 REG_NONE); 1111 src = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc), 1112 class_for_type(e, in->opnds[1].type), in->opnds[1].type, 1113 REG_NONE, REG_NONE, in->loc); 1114 e->target->atomic_store(e->target, addr, src, aux->mem, aux->mo); 1115 return; 1116 } 1117 case IR_ATOMIC_RMW: { 1118 IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; 1119 dst = loc_from_operand(e, &in->opnds[0], in->loc); 1120 addr = pointer_addr_from_operand(e, &in->opnds[1], in->loc, REG_NONE, 1121 REG_NONE); 1122 src = materialize(e, loc_from_operand(e, &in->opnds[2], in->loc), 1123 class_for_type(e, in->opnds[2].type), in->opnds[2].type, 1124 REG_NONE, REG_NONE, in->loc); 1125 if (dst.kind != NATIVE_LOC_REG) 1126 dst = scratch_loc(e, in->opnds[0].type, 1127 class_for_type(e, in->opnds[0].type), src.v.reg, 1128 REG_NONE, in->loc); 1129 e->target->atomic_rmw(e->target, (KitCgAtomicOp)aux->op, dst, addr, src, 1130 aux->mem, aux->mo); 1131 if (in->opnds[0].kind != OPK_REG) 1132 write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, aux->mem, 1133 in->loc); 1134 return; 1135 } 1136 case IR_ATOMIC_CAS: { 1137 IRCasAux* aux = (IRCasAux*)in->extra.aux; 1138 NativeLoc ok; 1139 NativeLoc expected; 1140 NativeLoc desired; 1141 dst = loc_from_operand(e, &in->opnds[0], in->loc); 1142 ok = loc_from_operand(e, &in->opnds[1], in->loc); 1143 addr = pointer_addr_from_operand(e, &in->opnds[2], in->loc, REG_NONE, 1144 REG_NONE); 1145 expected = materialize(e, loc_from_operand(e, &in->opnds[3], in->loc), 1146 class_for_type(e, in->opnds[3].type), 1147 in->opnds[3].type, REG_NONE, REG_NONE, in->loc); 1148 desired = 1149 materialize(e, loc_from_operand(e, &in->opnds[4], in->loc), 1150 class_for_type(e, in->opnds[4].type), in->opnds[4].type, 1151 expected.v.reg, REG_NONE, in->loc); 1152 if (dst.kind != NATIVE_LOC_REG) 1153 dst = scratch_loc(e, in->opnds[0].type, 1154 class_for_type(e, in->opnds[0].type), expected.v.reg, 1155 desired.v.reg, in->loc); 1156 if (ok.kind != NATIVE_LOC_REG) 1157 ok = scratch_loc(e, in->opnds[1].type, 1158 class_for_type(e, in->opnds[1].type), dst.v.reg, 1159 expected.v.reg, in->loc); 1160 e->target->atomic_cas(e->target, dst, ok, addr, expected, desired, 1161 aux->mem, aux->success, aux->failure); 1162 if (in->opnds[0].kind != OPK_REG) 1163 write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, aux->mem, 1164 in->loc); 1165 if (in->opnds[1].kind != OPK_REG) 1166 write_loc(e, loc_from_operand(e, &in->opnds[1], in->loc), ok, 1167 mem_for_type(e->c, in->opnds[1].type), in->loc); 1168 return; 1169 } 1170 case IR_VA_START: { 1171 NativeLoc ap = materialize(e, loc_from_operand(e, &in->opnds[0], in->loc), 1172 NATIVE_REG_INT, in->opnds[0].type, REG_NONE, 1173 REG_NONE, in->loc); 1174 e->target->va_start_(e->target, ap); 1175 return; 1176 } 1177 case IR_VA_END: { 1178 NativeLoc ap = materialize(e, loc_from_operand(e, &in->opnds[0], in->loc), 1179 NATIVE_REG_INT, in->opnds[0].type, REG_NONE, 1180 REG_NONE, in->loc); 1181 e->target->va_end_(e->target, ap); 1182 return; 1183 } 1184 case IR_VA_COPY: { 1185 NativeLoc d = materialize(e, loc_from_operand(e, &in->opnds[0], in->loc), 1186 NATIVE_REG_INT, in->opnds[0].type, REG_NONE, 1187 REG_NONE, in->loc); 1188 NativeLoc s = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc), 1189 NATIVE_REG_INT, in->opnds[1].type, d.v.reg, 1190 REG_NONE, in->loc); 1191 e->target->va_copy_(e->target, d, s); 1192 return; 1193 } 1194 case IR_VA_ARG: { 1195 KitCgTypeId ty = in->opnds[0].type; 1196 NativeLoc ap = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc), 1197 NATIVE_REG_INT, in->opnds[1].type, REG_NONE, 1198 REG_NONE, in->loc); 1199 NativeLoc res; 1200 if (type_is_aggregate_or_large(e, ty)) { 1201 /* A value too wide for one register (an 8-byte i64/double on a 32-bit 1202 * target, or an aggregate) can't pass through a scratch register; hand 1203 * the target its memory destination so it can copy the value directly. 1204 */ 1205 e->target->va_arg_(e->target, loc_from_operand(e, &in->opnds[0], 1206 in->loc), 1207 ap, ty); 1208 return; 1209 } 1210 /* The result must land in a register distinct from the va_list pointer; 1211 * fetch into a scratch register, then write to the real destination. */ 1212 res = scratch_loc(e, ty, class_for_type(e, ty), ap.v.reg, REG_NONE, 1213 in->loc); 1214 e->target->va_arg_(e->target, res, ap, ty); 1215 write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), res, 1216 mem_for_type(e->c, ty), in->loc); 1217 return; 1218 } 1219 case IR_ASM_BLOCK: { 1220 IRAsmAux* aux = (IRAsmAux*)in->extra.aux; 1221 NativeLoc* out_locs = aux && aux->nout 1222 ? arena_array(e->f->arena, NativeLoc, aux->nout) 1223 : NULL; 1224 NativeLoc* in_locs = aux && aux->nin 1225 ? arena_array(e->f->arena, NativeLoc, aux->nin) 1226 : NULL; 1227 /* The optimizer has already allocated registers for the asm operands and 1228 * placed the input values / consumes the output values through the normal 1229 * use/def data flow. We only convert each operand to its NativeLoc; the 1230 * NativeTarget hook binds the pre-allocated registers to the template and 1231 * saves/restores any callee-saved registers the asm clobbers. */ 1232 for (u32 i = 0; aux && i < aux->nout; ++i) 1233 out_locs[i] = loc_from_operand(e, &aux->out_ops[i], in->loc); 1234 for (u32 i = 0; aux && i < aux->nin; ++i) 1235 in_locs[i] = loc_from_operand(e, &aux->in_ops[i], in->loc); 1236 e->target->asm_block(e->target, aux ? aux->tmpl : "", 1237 aux ? aux->outs : NULL, aux ? aux->nout : 0, 1238 out_locs, aux ? aux->ins : NULL, aux ? aux->nin : 0, 1239 in_locs, aux ? aux->clobbers : NULL, 1240 aux ? aux->nclob : 0); 1241 return; 1242 } 1243 case IR_BREAK_TO: 1244 case IR_CONTINUE_TO: 1245 emit_panic(e, in->loc, "operation is not wired to NativeTarget yet"); 1246 case IR_FENCE: 1247 e->target->fence(e->target, (KitCgMemOrder)in->extra.imm); 1248 return; 1249 case IR_UNREACHABLE: 1250 e->target->trap(e->target); 1251 return; 1252 case IR_INTRINSIC: { 1253 IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; 1254 NativeLoc* dsts = aux && aux->ndst 1255 ? arena_array(e->f->arena, NativeLoc, aux->ndst) 1256 : NULL; 1257 NativeLoc* args = aux && aux->narg 1258 ? arena_array(e->f->arena, NativeLoc, aux->narg) 1259 : NULL; 1260 for (u32 i = 0; aux && i < aux->ndst; ++i) 1261 dsts[i] = loc_from_operand(e, &aux->dsts[i], in->loc); 1262 for (u32 i = 0; aux && i < aux->narg; ++i) { 1263 if (aux->args[i].kind == OPK_IMM) { 1264 args[i] = loc_from_operand(e, &aux->args[i], in->loc); 1265 } else { 1266 args[i] = materialize(e, loc_from_operand(e, &aux->args[i], in->loc), 1267 class_for_type(e, aux->args[i].type), 1268 aux->args[i].type, REG_NONE, REG_NONE, in->loc); 1269 } 1270 } 1271 e->target->intrinsic(e->target, aux->kind, dsts, aux->ndst, args, 1272 aux->narg); 1273 return; 1274 } 1275 default: 1276 emit_panic(e, in->loc, "unknown IR op"); 1277 } 1278 } 1279 1280 static int native_emit_terminates(const Inst* in) { 1281 if (!in) return 0; 1282 switch ((IROp)in->op) { 1283 case IR_BR: 1284 case IR_CONDBR: 1285 case IR_CMP_BRANCH: 1286 case IR_SWITCH: 1287 case IR_INDIRECT_BRANCH: 1288 case IR_RET: 1289 case IR_UNREACHABLE: 1290 case IR_BREAK_TO: 1291 case IR_CONTINUE_TO: 1292 return 1; 1293 case IR_INTRINSIC: { 1294 IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; 1295 return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP); 1296 } 1297 default: 1298 return 0; 1299 } 1300 } 1301 1302 static void emit_block(NativeEmitCtx* e, u32 block, u32 order_index, 1303 const CGFuncDesc* fd) { 1304 if (block >= e->f->nblocks) return; 1305 if (!e->label_placed[block]) { 1306 e->label_placed[block] = 1u; 1307 e->target->label_place(e->target, 1308 ensure_label(e, block, (SrcLoc){0, 0, 0})); 1309 } 1310 Block* bl = &e->f->blocks[block]; 1311 int is_last_block = order_index + 1u == e->f->emit_order_n; 1312 for (u32 i = 0; i < bl->ninsts; ++i) { 1313 e->emitting_terminal_ret = is_last_block && i + 1u == bl->ninsts && 1314 (IROp)bl->insts[i].op == IR_RET; 1315 emit_inst(e, block, order_index, &bl->insts[i], fd); 1316 } 1317 e->emitting_terminal_ret = 0; 1318 if (bl->nsucc == 1u && 1319 (bl->ninsts == 0 || 1320 !native_emit_terminates(&bl->insts[bl->ninsts - 1u]))) { 1321 u32 next = order_index + 1u < e->f->emit_order_n 1322 ? e->f->emit_order[order_index + 1u] 1323 : UINT32_MAX; 1324 if (bl->succ[0] != next) 1325 e->target->jump(e->target, 1326 ensure_label(e, bl->succ[0], (SrcLoc){0, 0, 0})); 1327 } 1328 } 1329 1330 #define EMIT_MAX_REG_CLASSES 4u 1331 1332 static void collect_used_reg(Func* f, Inst* in, OptOperand* op, int is_def, 1333 void* ctx) { 1334 u32* used = (u32*)ctx; 1335 (void)f; 1336 (void)in; 1337 (void)is_def; 1338 if (op && op->kind == OPT_OPK_REG && op->cls < EMIT_MAX_REG_CLASSES && 1339 op->v.reg < 32u) 1340 used[op->cls] |= 1u << op->v.reg; 1341 } 1342 1343 /* After register allocation the MIR names hard registers directly, so we scan 1344 * it for the callee-saved registers the allocator assigned. Fills `used[cls]` 1345 * (one bitmask per alloc class, masked to each class's callee-saved set) and 1346 * returns the class count. The masks feed NativeKnownFrameDesc so the backend 1347 * reserves the save slots as part of the up-front frame. */ 1348 static u32 compute_callee_saved_used(NativeEmitCtx* e, u32* used, u32 cap) { 1349 NativeTarget* t = e->target; 1350 const NativeRegInfo* ri = t->regs; 1351 u32 nclasses; 1352 for (u32 i = 0; i < cap; ++i) used[i] = 0; 1353 if (!ri) return 0; 1354 for (u32 b = 0; b < e->f->nblocks; ++b) { 1355 Block* bl = &e->f->blocks[b]; 1356 for (u32 i = 0; i < bl->ninsts; ++i) 1357 opt_walk_inst_operands(e->f, &bl->insts[i], collect_used_reg, used); 1358 } 1359 nclasses = ri->nclasses < cap ? ri->nclasses : cap; 1360 for (u32 i = 0; i < ri->nclasses; ++i) { 1361 const NativeAllocClassInfo* ci = &ri->classes[i]; 1362 if (ci->cls < cap) 1363 used[ci->cls] &= 1364 native_target_callee_saved_mask(t, (NativeAllocClass)ci->cls); 1365 } 1366 return nclasses; 1367 } 1368 1369 /* Plan the complete call frame before any code is emitted, then hand it to the 1370 * backend via func_begin_known_frame so the prologue is emitted final. The 1371 * optimizer knows everything the frame needs after register allocation and MIR 1372 * lowering: the callee-saved set (scanned from the MIR), every static frame 1373 * slot (f->frame_slots), and the outgoing-arg area (the max over all calls of 1374 * the pure call_stack_bytes query). The body therefore allocates no slots, so 1375 * the frame is final up front and nothing is back-patched. Populates 1376 * e->slot_map from the backend-assigned slot handles for the body to use. */ 1377 static void plan_frame(NativeEmitCtx* e, const CGFuncDesc* fd) { 1378 NativeTarget* t = e->target; 1379 NativeKnownFrameDesc frame; 1380 NativeFrameSlotDesc* slots = NULL; 1381 NativeFrameSlot* out_slots = NULL; 1382 u32 used[EMIT_MAX_REG_CLASSES]; 1383 u32 nclasses; 1384 u32 max_args = 0, max_outgoing = 0; 1385 u8 has_alloca = 0; 1386 u8 needs_scratch_spill = 0; 1387 u8 has_call = 0; 1388 u8 has_asm = 0; 1389 u8 reads_frame = 0; 1390 u32 nasm_clob = 0; 1391 u32 asm_clobber_abi_sets = 0; 1392 Sym* asm_clobbers = NULL; 1393 memset(&frame, 0, sizeof frame); 1394 nclasses = t->reserve_callee_saves 1395 ? compute_callee_saved_used(e, used, EMIT_MAX_REG_CLASSES) 1396 : 0u; 1397 /* Outgoing-arg area = max stack-arg bytes over all calls; also note alloca. 1398 */ 1399 for (u32 b = 0; b < e->f->nblocks; ++b) { 1400 Block* bl = &e->f->blocks[b]; 1401 for (u32 i = 0; i < bl->ninsts; ++i) { 1402 Inst* in = &bl->insts[i]; 1403 if ((IROp)in->op == IR_ALLOCA) { 1404 has_alloca = 1; 1405 } else if ((IROp)in->op == IR_ATOMIC_RMW) { 1406 needs_scratch_spill = 1; 1407 } else if ((IROp)in->op == IR_CALL) { 1408 IRCallAux* aux = (IRCallAux*)in->extra.aux; 1409 /* Any call (regular or sibling/tail) means the function is not a leaf: 1410 * it clobbers the return-address register and the stack below sp. */ 1411 has_call = 1; 1412 if (aux && aux->desc.nargs > max_args) max_args = aux->desc.nargs; 1413 } else if ((IROp)in->op == IR_ASM_BLOCK) { 1414 /* Inline asm may clobber the return-address register or the red zone 1415 * opaquely; disqualifies the frame-eliding tiers (see has_asm). Its 1416 * callee-saved register clobbers and hard-register operand pins are 1417 * equally opaque to the operand scan below; count them now so the 1418 * backend can fold them into the saved set (collected into a single Sym 1419 * list in a second pass below). */ 1420 IRAsmAux* aux = (IRAsmAux*)in->extra.aux; 1421 has_asm = 1; 1422 if (aux) { 1423 nasm_clob += aux->nclob; 1424 for (u32 k = 0; k < aux->nout; ++k) 1425 if (aux->outs[k].reg) ++nasm_clob; 1426 for (u32 k = 0; k < aux->nin; ++k) 1427 if (aux->ins[k].reg) ++nasm_clob; 1428 asm_clobber_abi_sets |= aux->clobber_abi_sets; 1429 } 1430 } else if ((IROp)in->op == IR_INTRINSIC) { 1431 /* __builtin_frame_address / __builtin_return_address read the frame 1432 * record, so the function must keep one (disables the rv64 frameless 1433 * leaf tier; see NativeKnownFrameDesc.reads_frame). */ 1434 IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; 1435 if (aux && (aux->kind == INTRIN_FRAME_ADDRESS || 1436 aux->kind == INTRIN_RETURN_ADDRESS)) 1437 reads_frame = 1; 1438 } 1439 } 1440 } 1441 /* Gather the union of every asm block's clobber names and hard-register 1442 * operand pins. The backend resolves them with its own clobber parser 1443 * (machinize's resolve_name is unset on every backend, so aux->clobber_mask is 1444 * unreliable here). */ 1445 if (nasm_clob) { 1446 u32 n = 0; 1447 asm_clobbers = arena_array(e->f->arena, Sym, nasm_clob); 1448 for (u32 b = 0; b < e->f->nblocks; ++b) { 1449 Block* bl = &e->f->blocks[b]; 1450 for (u32 i = 0; i < bl->ninsts; ++i) { 1451 Inst* in = &bl->insts[i]; 1452 IRAsmAux* aux; 1453 if ((IROp)in->op != IR_ASM_BLOCK) continue; 1454 aux = (IRAsmAux*)in->extra.aux; 1455 for (u32 k = 0; aux && k < aux->nclob; ++k) 1456 asm_clobbers[n++] = aux->clobbers[k]; 1457 for (u32 k = 0; aux && k < aux->nout; ++k) 1458 if (aux->outs[k].reg) asm_clobbers[n++] = aux->outs[k].reg; 1459 for (u32 k = 0; aux && k < aux->nin; ++k) 1460 if (aux->ins[k].reg) asm_clobbers[n++] = aux->ins[k].reg; 1461 } 1462 } 1463 nasm_clob = n; 1464 } 1465 if (t->call_stack_bytes) { 1466 NativeLoc* args = 1467 max_args ? arena_zarray(e->f->arena, NativeLoc, max_args) : NULL; 1468 for (u32 b = 0; b < e->f->nblocks; ++b) { 1469 Block* bl = &e->f->blocks[b]; 1470 for (u32 i = 0; i < bl->ninsts; ++i) { 1471 Inst* in = &bl->insts[i]; 1472 IRCallAux* aux; 1473 NativeCallDesc d; 1474 u32 sb; 1475 if ((IROp)in->op != IR_CALL) continue; 1476 aux = (IRCallAux*)in->extra.aux; 1477 if (!aux) continue; 1478 memset(&d, 0, sizeof d); 1479 d.fn_type = aux->desc.fn_type; 1480 d.flags = aux->desc.flags; 1481 d.nargs = aux->desc.nargs; 1482 for (u32 k = 0; k < aux->desc.nargs; ++k) { 1483 memset(&args[k], 0, sizeof args[k]); 1484 args[k].type = aux->desc.args[k].type; 1485 } 1486 d.args = args; 1487 sb = t->call_stack_bytes(t, &d); 1488 if (sb > max_outgoing) max_outgoing = sb; 1489 } 1490 } 1491 } 1492 e->slot_map = 1493 arena_zarray(e->f->arena, NativeFrameSlot, e->f->nframe_slots + 1u); 1494 if (e->f->nframe_slots) { 1495 slots = arena_zarray(e->f->arena, NativeFrameSlotDesc, e->f->nframe_slots); 1496 out_slots = arena_zarray(e->f->arena, NativeFrameSlot, e->f->nframe_slots); 1497 for (u32 i = 0; i < e->f->nframe_slots; ++i) { 1498 IRFrameSlot* s = &e->f->frame_slots[i]; 1499 NativeFrameSlotDesc* d = &slots[i]; 1500 memset(d, 0, sizeof *d); 1501 d->type = s->type; 1502 d->name = s->name; 1503 d->loc = s->loc; 1504 d->size = s->size; 1505 d->align = s->align; 1506 d->kind = s->kind; 1507 d->flags = s->flags; 1508 } 1509 } 1510 frame.slots = slots; 1511 frame.nslots = e->f->nframe_slots; 1512 frame.max_outgoing = max_outgoing; 1513 frame.callee_saved_used = nclasses ? used : NULL; 1514 frame.ncallee_classes = nclasses; 1515 frame.has_alloca = has_alloca; 1516 frame.needs_scratch_spill = needs_scratch_spill; 1517 frame.is_leaf = !has_call; 1518 frame.has_asm = has_asm; 1519 frame.reads_frame = reads_frame; 1520 frame.asm_clobbers = asm_clobbers; 1521 frame.nasm_clobbers = nasm_clob; 1522 frame.asm_clobber_abi_sets = asm_clobber_abi_sets; 1523 t->func_begin_known_frame(t, fd, &frame, out_slots); 1524 for (u32 i = 0; i < e->f->nframe_slots; ++i) 1525 e->slot_map[e->f->frame_slots[i].id] = out_slots[i]; 1526 } 1527 1528 void opt_emit_native(Compiler* c, Func* f, NativeTarget* target) { 1529 NativeEmitCtx e; 1530 Func view; 1531 CGFuncDesc fd; 1532 if (!f || !target) return; 1533 memset(&e, 0, sizeof e); 1534 if (f->mir) { 1535 view = *f; 1536 view.blocks = f->mir->blocks; 1537 view.nblocks = f->mir->nblocks; 1538 view.entry = f->mir->entry; 1539 view.emit_order = f->mir->emit_order; 1540 view.emit_order_n = f->mir->emit_order_n; 1541 view.emit_order_cap = f->mir->emit_order_cap; 1542 view.opt_rewritten = 1; 1543 view.mir = NULL; 1544 e.f = &view; 1545 } else { 1546 e.f = f; 1547 } 1548 e.c = c; 1549 e.target = target; 1550 metrics_scope_begin(c, "opt.native_emit.setup"); 1551 e.labels = arena_array(e.f->arena, MCLabel, e.f->nblocks ? e.f->nblocks : 1u); 1552 e.label_placed = 1553 arena_zarray(e.f->arena, u8, e.f->nblocks ? e.f->nblocks : 1u); 1554 for (u32 i = 0; i < e.f->nblocks; ++i) e.labels[i] = MC_LABEL_NONE; 1555 fd = semantic_func_desc(&e); 1556 metrics_scope_end(c, "opt.native_emit.setup"); 1557 1558 metrics_scope_begin(c, "opt.native_emit.func_begin"); 1559 /* The optimizer has the whole frame after regalloc + MIR lowering, so it 1560 * plans it up front (plan_frame) and drives func_begin_known_frame: the 1561 * backend emits a final prologue with no reserved NOP region and no 1562 * back-patching. The body allocates no frame slots, so the frame stays final; 1563 * allocas and tail epilogues are emitted final too. (Contrast the 1564 * single-pass NativeDirectTarget path, which reserves and patches.) */ 1565 plan_frame(&e, &fd); 1566 bind_params(&e); 1567 metrics_scope_end(c, "opt.native_emit.func_begin"); 1568 1569 metrics_scope_begin(c, "opt.native_emit.body"); 1570 for (u32 i = 0; i < e.f->emit_order_n; ++i) 1571 emit_block(&e, e.f->emit_order[i], i, &fd); 1572 metrics_scope_end(c, "opt.native_emit.body"); 1573 1574 metrics_scope_begin(c, "opt.native_emit.func_end"); 1575 target->func_end(target); 1576 metrics_scope_end(c, "opt.native_emit.func_end"); 1577 }