cg_ir_lower.c (44612B)
1 #include <string.h> 2 3 #include "cg/ir.h" 4 #include "cg/type.h" 5 #include "opt/opt_internal.h" 6 7 #undef Operand 8 #undef CGParamDesc 9 #undef CGCallDesc 10 #undef CGFuncDesc 11 #undef CGLocalStorage 12 #undef FrameSlotDesc 13 14 typedef struct OptLocalMap { 15 OptCGLocalStorage storage; 16 NativeFrameSlot home_slot; 17 KitCgTypeId type; 18 u32 size; 19 u32 align; 20 u8 cls; 21 u8 address_taken; 22 u8 pad[2]; 23 } OptLocalMap; 24 25 /* Per-instruction record of pointer locals whose value was loaded from their 26 * frame home into a fresh PReg so they can serve as an indirect-addressing base 27 * (see frame_indirect_base_reg). Reset for each lowered instruction. */ 28 #define CG_IR_LOWER_MAX_MAT 8u 29 typedef struct CgIrLower { 30 Compiler* c; 31 const CgIrFunc* src; 32 Func* f; 33 OptLocalMap* locals; 34 u32 nlocals; 35 u32* label_block; 36 u32 nlabels; 37 u32* inst_block; 38 u8* leader; 39 CGLocal mat_local[CG_IR_LOWER_MAX_MAT]; 40 u8 mat_role[CG_IR_LOWER_MAX_MAT]; 41 Reg mat_reg[CG_IR_LOWER_MAX_MAT]; 42 u32 nmat; 43 } CgIrLower; 44 45 typedef enum CgIrMatRole { 46 CG_IR_MAT_BASE = 0, 47 CG_IR_MAT_INDEX = 1, 48 } CgIrMatRole; 49 50 static _Noreturn void lower_panic(CgIrLower* l, SrcLoc loc, const char* msg) { 51 compiler_panic(l->c, loc, "opt cg-ir lower: %s", msg); 52 } 53 54 static u8 local_reg_class(Compiler* c, KitCgTypeId ty) { 55 return opt_value_reg_class(c, ty); 56 } 57 58 static OptCGFuncDesc lower_func_desc(Arena* a, const struct CGFuncDesc* in) { 59 OptCGFuncDesc out; 60 memset(&out, 0, sizeof out); 61 if (!in) return out; 62 out.sym = in->sym; 63 out.text_section_id = in->text_section_id; 64 out.group_id = in->group_id; 65 out.fn_type = in->fn_type; 66 out.result_type = in->result_type; 67 out.nparams = in->nparams; 68 out.loc = in->loc; 69 out.flags = in->flags; 70 out.inline_policy = in->inline_policy; 71 out.atomize = in->atomize; 72 if (in->nparams && in->params) { 73 OptCGParamDesc* params = arena_zarray(a, OptCGParamDesc, in->nparams); 74 for (u32 i = 0; i < in->nparams; ++i) { 75 params[i].index = in->params[i].index; 76 params[i].name = in->params[i].name; 77 params[i].type = in->params[i].type; 78 params[i].size = in->params[i].size; 79 params[i].align = in->params[i].align; 80 params[i].flags = in->params[i].flags; 81 params[i].loc = in->params[i].loc; 82 } 83 out.params = params; 84 } 85 return out; 86 } 87 88 static NativeFrameSlotDesc local_slot_desc(const CgIrLocal* in, u8 kind) { 89 NativeFrameSlotDesc out; 90 memset(&out, 0, sizeof out); 91 out.type = in->desc.type; 92 out.name = in->desc.name; 93 out.loc = in->desc.loc; 94 out.size = in->desc.size; 95 out.align = in->desc.align; 96 out.kind = kind; 97 if (in->address_taken || (in->desc.flags & CG_LOCAL_ADDR_TAKEN)) 98 out.flags |= FSF_ADDR_TAKEN; 99 if (in->desc.flags & CG_LOCAL_MEMORY_REQUIRED) 100 out.flags |= FSF_MEMORY_REQUIRED; 101 return out; 102 } 103 104 static OptLocalMap* local_map(CgIrLower* l, CGLocal id, SrcLoc loc) { 105 if (id == CG_LOCAL_NONE || id > l->nlocals) 106 lower_panic(l, loc, "bad semantic local"); 107 return &l->locals[id - 1u]; 108 } 109 110 static int local_needs_home(const CgIrLocal* in) { 111 return in->address_taken || 112 (in->desc.flags & (CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED)); 113 } 114 115 static int operand_uses_local_addr(const Operand* op, CGLocal local) { 116 if (!op) return 0; 117 if (op->kind == OPK_LOCAL) return op->v.local == local; 118 return 0; 119 } 120 121 /* AGG_COPY/AGG_SET take their dest/src as *pointer values* to the aggregate — 122 * the emitter derefs an OPK_LOCAL pointer operand via pointer_addr_from_operand 123 * (it loads the pointer; it does not address the local's own slot). So a 124 * pointer-typed local operand of an aggregate op uses the local's VALUE, not 125 * its address, and must not force the local to a frame home. Only a non-pointer 126 * operand (the aggregate-typed local itself) genuinely addresses its storage. 127 * (STORE/LOAD/ADDR_OF use addr_from_operand, where an OPK_LOCAL always 128 * addresses the slot, so they keep operand_uses_local_addr.) */ 129 static int operand_uses_local_agg_addr(Compiler* c, const Operand* op, 130 CGLocal local) { 131 if (!op || op->kind != OPK_LOCAL || op->v.local != local) return 0; 132 return !cg_type_is_ptr(c, op->type); 133 } 134 135 static int local_address_used_in_cg_ir(Compiler* c, const CgIrFunc* f, 136 CGLocal local) { 137 for (u32 i = 0; i < f->ninsts; ++i) { 138 const CgIrInst* in = &f->insts[i]; 139 switch ((CgIrOp)in->op) { 140 case CG_IR_LOAD: 141 case CG_IR_BITFIELD_LOAD: 142 if (in->nopnds > 1u && operand_uses_local_addr(&in->opnds[1], local)) 143 return 1; 144 break; 145 case CG_IR_STORE: 146 case CG_IR_BITFIELD_STORE: 147 if (in->nopnds > 0u && operand_uses_local_addr(&in->opnds[0], local)) 148 return 1; 149 break; 150 case CG_IR_AGG_SET: 151 if (in->nopnds > 0u && 152 operand_uses_local_agg_addr(c, &in->opnds[0], local)) 153 return 1; 154 break; 155 case CG_IR_ADDR_OF: 156 if (in->nopnds > 1u && operand_uses_local_addr(&in->opnds[1], local)) 157 return 1; 158 break; 159 case CG_IR_AGG_COPY: 160 if ((in->nopnds > 0u && 161 operand_uses_local_agg_addr(c, &in->opnds[0], local)) || 162 (in->nopnds > 1u && 163 operand_uses_local_agg_addr(c, &in->opnds[1], local))) 164 return 1; 165 break; 166 /* VA_START/VA_ARG/VA_END/VA_COPY consume a pointer *value* (the address 167 * of the va_list, produced by an earlier ADDR_OF); they do not take the 168 * address of their pointer operand, so they must not force it to a frame 169 * slot. */ 170 default: 171 break; 172 } 173 } 174 return 0; 175 } 176 177 static void lower_locals(CgIrLower* l) { 178 l->nlocals = l->src->nlocals; 179 l->locals = 180 arena_zarray(l->f->arena, OptLocalMap, l->nlocals ? l->nlocals : 1u); 181 for (u32 i = 0; i < l->src->nlocals; ++i) { 182 const CgIrLocal* in = &l->src->locals[i]; 183 OptLocalMap* m; 184 if (in->id == CG_LOCAL_NONE || in->id > l->src->nlocals) 185 lower_panic(l, in->desc.loc, "non-dense semantic local table"); 186 m = &l->locals[in->id - 1u]; 187 m->type = in->desc.type; 188 m->size = in->desc.size; 189 m->align = in->desc.align; 190 m->cls = local_reg_class(l->c, in->desc.type); 191 /* Aggregates and oversized scalars cannot live in a single PReg; they need 192 * a memory home regardless of whether their address is taken. "Oversized" 193 * is wider than the machine word (ptr_size): 8 on rv64/x64/aa64, 4 on rv32 194 * — so an 8-byte i64/double on rv32 is homed in memory like an i128 is on a 195 * 64-bit target (the cg layer also flags these CG_LOCAL_MEMORY_REQUIRED). */ 196 m->address_taken = local_needs_home(in) || 197 local_address_used_in_cg_ir(l->c, l->src, in->id) || 198 cg_type_is_aggregate(l->c, in->desc.type) || 199 cg_type_size(l->c, in->desc.type) > 200 (u64)l->c->target.ptr_size; 201 202 PReg r = ir_alloc_preg(l->f, in->desc.type, m->cls); 203 if (m->address_taken) { 204 m->storage.kind = CG_LOCAL_STORAGE_FRAME; 205 } else { 206 m->storage.kind = CG_LOCAL_STORAGE_REG; 207 m->storage.v.reg = (Reg)r; 208 } 209 210 if (m->address_taken) { 211 NativeFrameSlotDesc fsd = 212 local_slot_desc(in, in->is_param ? FS_PARAM : FS_LOCAL); 213 m->home_slot = ir_frame_slot_new(l->f, &fsd); 214 m->storage.v.frame_slot = m->home_slot; 215 } else { 216 m->home_slot = FRAME_SLOT_NONE; 217 } 218 (void)ir_local_add(l->f, &in->desc, m->storage); 219 l->f->locals[l->f->nlocals - 1u].address_taken = m->address_taken; 220 l->f->locals[l->f->nlocals - 1u].home_slot = m->home_slot; 221 } 222 } 223 224 static const CgIrParam* find_param(const CgIrFunc* f, CGLocal local) { 225 for (u32 i = 0; i < f->nparams; ++i) 226 if (f->params[i].local == local) return &f->params[i]; 227 return NULL; 228 } 229 230 static void lower_params(CgIrLower* l) { 231 /* Resolve the function-level ABI info once so we can attach per-param 232 * ABIArgInfo to each IRParam. Consumers (set_preg_pref_for_params, the 233 * native bind_param emit path) read p->abi without going through 234 * f->desc.abi, so this stays scoped to the param plumbing and does not 235 * activate the dormant f->desc.abi-gated passes (e.g. 236 * apply_param_incoming_register_hazards, opt_verify_alloc's incoming 237 * check), which have known issues with tail-call shuffles. */ 238 const ABIFuncInfo* fi = NULL; 239 if (l->c && l->c->abi && l->f->desc.fn_type) 240 fi = abi_cg_func_info(l->c->abi, l->f->desc.fn_type); 241 for (u32 i = 0; i < l->src->nlocals; ++i) { 242 const CgIrLocal* loc = &l->src->locals[i]; 243 if (!loc->is_param) continue; 244 const CgIrParam* p = find_param(l->src, loc->id); 245 OptLocalMap* m = local_map(l, loc->id, loc->desc.loc); 246 OptCGParamDesc d; 247 memset(&d, 0, sizeof d); 248 if (p) { 249 d.index = p->desc.index; 250 d.name = p->desc.name; 251 d.type = p->desc.type; 252 d.size = p->desc.size; 253 d.align = p->desc.align; 254 d.flags = p->desc.flags; 255 d.loc = p->desc.loc; 256 } else { 257 d.index = loc->param_index; 258 d.name = loc->desc.name; 259 d.type = loc->desc.type; 260 d.size = loc->desc.size; 261 d.align = loc->desc.align; 262 d.flags = loc->desc.flags; 263 d.loc = loc->desc.loc; 264 } 265 d.storage = m->storage; 266 if (fi && d.index < fi->nparams) d.abi = &fi->params[d.index]; 267 ir_param_add(l->f, &d); 268 } 269 } 270 271 static int cg_inst_terminates(const CgIrInst* in) { 272 if (!in) return 0; 273 switch ((CgIrOp)in->op) { 274 case CG_IR_BR: 275 case CG_IR_RET: 276 case CG_IR_UNREACHABLE: 277 case CG_IR_CMP_BRANCH: 278 case CG_IR_SWITCH: 279 case CG_IR_INDIRECT_BRANCH: 280 case CG_IR_BREAK_TO: 281 case CG_IR_CONTINUE_TO: 282 return 1; 283 case CG_IR_INTRINSIC: { 284 const CgIrIntrinsicAux* aux = (const CgIrIntrinsicAux*)in->extra.aux; 285 return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP); 286 } 287 default: 288 return 0; 289 } 290 } 291 292 static u32 label_id_max(const CgIrFunc* f) { 293 u32 max = 0; 294 for (u32 i = 0; i < f->nlabels; ++i) 295 if (f->labels[i].id > max) max = f->labels[i].id; 296 return max; 297 } 298 299 static void mark_label_leader(CgIrLower* l, Label label, const u32* place) { 300 if (label == LABEL_NONE || label > l->nlabels || place[label] == UINT32_MAX) 301 return; 302 l->leader[place[label]] = 1; 303 } 304 305 static void mark_leaders(CgIrLower* l, u32* label_place) { 306 const CgIrFunc* f = l->src; 307 for (u32 i = 0; i <= f->ninsts; ++i) l->leader[i] = 0; 308 if (f->ninsts) l->leader[0] = 1; 309 for (u32 i = 0; i < f->ninsts; ++i) { 310 const CgIrInst* in = &f->insts[i]; 311 if ((CgIrOp)in->op == CG_IR_LABEL) { 312 Label label = (Label)in->extra.imm; 313 l->leader[i] = 1; 314 if (label && label <= l->nlabels && label_place[label] == UINT32_MAX) 315 label_place[label] = i; 316 } 317 } 318 for (u32 i = 0; i < f->ninsts; ++i) { 319 const CgIrInst* in = &f->insts[i]; 320 if (cg_inst_terminates(in) && i + 1u < f->ninsts) l->leader[i + 1u] = 1; 321 switch ((CgIrOp)in->op) { 322 case CG_IR_BR: 323 case CG_IR_LOAD_LABEL_ADDR: 324 mark_label_leader(l, (Label)in->extra.imm, label_place); 325 break; 326 case CG_IR_CMP_BRANCH: { 327 CgIrCmpBranchAux* aux = (CgIrCmpBranchAux*)in->extra.aux; 328 if (i + 1u < f->ninsts) l->leader[i + 1u] = 1; 329 if (aux) mark_label_leader(l, aux->target, label_place); 330 break; 331 } 332 case CG_IR_SWITCH: { 333 CgIrSwitchAux* aux = (CgIrSwitchAux*)in->extra.aux; 334 if (i + 1u < f->ninsts) l->leader[i + 1u] = 1; 335 if (aux) { 336 mark_label_leader(l, aux->default_label, label_place); 337 for (u32 c = 0; c < aux->ncases; ++c) 338 mark_label_leader(l, aux->cases[c].label, label_place); 339 } 340 break; 341 } 342 case CG_IR_INDIRECT_BRANCH: { 343 CgIrIndirectAux* aux = (CgIrIndirectAux*)in->extra.aux; 344 if (aux) { 345 for (u32 t = 0; t < aux->ntargets; ++t) 346 mark_label_leader(l, aux->targets[t], label_place); 347 } 348 break; 349 } 350 case CG_IR_SCOPE_BEGIN: 351 if (i + 1u < f->ninsts) l->leader[i + 1u] = 1; 352 break; 353 case CG_IR_SCOPE_END: 354 l->leader[i] = 1; 355 if (i + 1u < f->ninsts) l->leader[i + 1u] = 1; 356 break; 357 default: 358 break; 359 } 360 } 361 } 362 363 static void make_blocks(CgIrLower* l, const u32* label_place) { 364 const CgIrFunc* f = l->src; 365 u32 cur = UINT32_MAX; 366 l->inst_block = arena_zarray(l->f->arena, u32, f->ninsts ? f->ninsts : 1u); 367 for (u32 i = 0; i < f->ninsts; ++i) { 368 if (l->leader[i] || cur == UINT32_MAX) { 369 cur = ir_block_new(l->f); 370 ir_note_emit(l->f, cur); 371 if (l->f->nblocks == 1u) l->f->entry = cur; 372 } 373 l->inst_block[i] = cur; 374 } 375 l->label_block = 376 arena_zarray(l->f->arena, u32, l->nlabels ? l->nlabels + 1u : 1u); 377 for (u32 i = 0; i <= l->nlabels; ++i) l->label_block[i] = UINT32_MAX; 378 for (u32 label = 1; label <= l->nlabels; ++label) { 379 if (label_place[label] != UINT32_MAX) { 380 u32 place = label_place[label]; 381 l->label_block[label] = (place + 1u < f->ninsts) 382 ? l->inst_block[place + 1u] 383 : l->inst_block[place]; 384 } else { 385 l->label_block[label] = ir_block_new(l->f); 386 } 387 } 388 if (!l->f->nblocks) { 389 l->f->entry = ir_block_new(l->f); 390 ir_note_emit(l->f, l->f->entry); 391 } 392 l->f->emit_order_n = 0; 393 for (u32 i = 0; i < f->ninsts; ++i) ir_note_emit(l->f, l->inst_block[i]); 394 if (!f->ninsts) ir_note_emit(l->f, l->f->entry); 395 } 396 397 static void emit_param_decls(CgIrLower* l) { 398 if (!l->f->nparams || l->f->entry >= l->f->nblocks) return; 399 /* Emit the IR_PARAM_DECL phantom defs into a dedicated prologue block that 400 * falls through to the body, and make it the function entry. This keeps the 401 * parameter defs out of the body's first block, which matters when the body 402 * begins with a loop: that first block is then the loop header and the 403 * back-edge targets it. With the param_decls in the header, liveness reads 404 * each parameter as redefined every iteration (killing the liveness of an 405 * induction variable carried in a parameter register), and because the entry 406 * block's label is not placed by the emitter the back-edge resolves to a 407 * branch-to-self. Both miscompile loop-first functions at -O1. The prologue 408 * block emits no code (param_decls are markers, the fall-through is free, and 409 * the entry label is elided), so this is free in the common case. */ 410 u32 prologue = ir_block_new(l->f); 411 l->f->entry = prologue; 412 ir_note_emit(l->f, prologue); 413 for (u32 i = l->f->emit_order_n - 1u; i > 0; --i) 414 l->f->emit_order[i] = l->f->emit_order[i - 1u]; 415 l->f->emit_order[0] = prologue; 416 for (u32 i = 0; i < l->f->nparams; ++i) { 417 IRParam* p = &l->f->params[i]; 418 Inst* in = ir_emit(l->f, prologue, IR_PARAM_DECL); 419 IRParamDeclAux* aux = arena_znew(l->f->arena, IRParamDeclAux); 420 in->loc = p->loc; 421 in->type = p->type; 422 if (p->storage.kind == CG_LOCAL_STORAGE_REG) in->def = p->storage.v.reg; 423 memset(aux, 0, sizeof *aux); 424 aux->desc.index = p->index; 425 aux->desc.name = p->name; 426 aux->desc.type = p->type; 427 aux->desc.size = p->size; 428 aux->desc.align = p->align; 429 aux->desc.flags = p->flags; 430 aux->desc.loc = p->loc; 431 aux->desc.storage = p->storage; 432 aux->desc.abi = p->abi; 433 in->extra.aux = aux; 434 } 435 } 436 437 static u32 block_for_label(CgIrLower* l, Label label, SrcLoc loc) { 438 if (label == LABEL_NONE || label > l->nlabels || 439 l->label_block[label] == UINT32_MAX) 440 lower_panic(l, loc, "bad label"); 441 return l->label_block[label]; 442 } 443 444 static u32 fallthrough_block(CgIrLower* l, u32 inst_index) { 445 if (inst_index + 1u >= l->src->ninsts) return UINT32_MAX; 446 return l->inst_block[inst_index + 1u]; 447 } 448 449 static void set_succ1(CgIrLower* l, u32 block, u32 succ) { 450 if (succ == UINT32_MAX) { 451 l->f->blocks[block].nsucc = 0; 452 return; 453 } 454 l->f->blocks[block].succ[0] = succ; 455 l->f->blocks[block].nsucc = 1; 456 } 457 458 static OptOperand* dup_opt_ops(CgIrLower* l, const OptOperand* ops, u32 n) { 459 if (!n) return NULL; 460 OptOperand* out = arena_array(l->f->arena, OptOperand, n); 461 memcpy(out, ops, sizeof(*out) * n); 462 return out; 463 } 464 465 static OptOperand opt_reg_operand(OptLocalMap* m) { 466 OptOperand out; 467 memset(&out, 0, sizeof out); 468 out.kind = OPK_REG; 469 out.cls = m->cls; 470 out.type = m->type; 471 out.v.reg = m->storage.v.reg; 472 return out; 473 } 474 475 static OptOperand opt_frame_operand(OptLocalMap* m) { 476 OptOperand out; 477 memset(&out, 0, sizeof out); 478 out.kind = OPK_LOCAL; 479 out.cls = RC_INT; 480 out.type = m->type; 481 out.v.frame_slot = m->home_slot; 482 return out; 483 } 484 485 /* Base/index register for an OPK_INDIRECT whose base is a local. A REG-storage 486 * local supplies its value register directly. A FRAME-storage local (its 487 * address was taken, e.g. `int **q = &p; p->f = ...`) holds the pointer value 488 * in its frame home, so storage.v.reg is meaningless; load the home into a 489 * fresh PReg. prematerialize_indirect_bases emits that load before the using 490 * instruction; here we just look the result up (l->mat_*). */ 491 static Reg resolve_materialized_reg(CgIrLower* l, CGLocal local, 492 CgIrMatRole role, SrcLoc loc) { 493 OptLocalMap* m = local_map(l, local, loc); 494 if (m->storage.kind == CG_LOCAL_STORAGE_REG) return m->storage.v.reg; 495 for (u32 i = 0; i < l->nmat; ++i) 496 if (l->mat_local[i] == local && l->mat_role[i] == (u8)role) 497 return l->mat_reg[i]; 498 lower_panic(l, loc, role == CG_IR_MAT_INDEX 499 ? "indirect index local not materialized" 500 : "indirect base local not materialized"); 501 } 502 503 static KitCgTypeId pointer_sized_int_type(CgIrLower* l) { 504 return builtin_id(l->c->target.ptr_size <= 4u ? KIT_CG_BUILTIN_I32 505 : KIT_CG_BUILTIN_I64); 506 } 507 508 static void remember_materialized_reg(CgIrLower* l, CGLocal local, 509 CgIrMatRole role, Reg r, SrcLoc loc) { 510 if (l->nmat >= CG_IR_LOWER_MAX_MAT) 511 lower_panic(l, loc, "too many frame indirect operands in one instruction"); 512 l->mat_local[l->nmat] = local; 513 l->mat_role[l->nmat] = (u8)role; 514 l->mat_reg[l->nmat] = r; 515 l->nmat++; 516 } 517 518 static int materialized_reg_exists(CgIrLower* l, CGLocal local, 519 CgIrMatRole role) { 520 for (u32 i = 0; i < l->nmat; ++i) 521 if (l->mat_local[i] == local && l->mat_role[i] == (u8)role) return 1; 522 return 0; 523 } 524 525 static OptOperand opt_frame_operand_as(OptLocalMap* m, KitCgTypeId type) { 526 OptOperand out = opt_frame_operand(m); 527 out.type = type ? type : m->type; 528 return out; 529 } 530 531 /* Emit the pre-materialization needed for a FRAME-storage local used as an 532 * OPK_INDIRECT base. A pointer-typed local holds the base pointer value and is 533 * loaded. A non-pointer local names storage, so its frame address is the base. */ 534 static void materialize_frame_base(CgIrLower* l, u32 block, CGLocal local, 535 SrcLoc loc) { 536 OptLocalMap* m = local_map(l, local, loc); 537 if (m->storage.kind == CG_LOCAL_STORAGE_REG) return; 538 if (materialized_reg_exists(l, local, CG_IR_MAT_BASE)) return; 539 PReg r = ir_alloc_preg(l->f, m->type, RC_INT); 540 OptOperand ops[2]; 541 ops[1] = opt_frame_operand(m); 542 if (cg_type_is_ptr(l->c, m->type)) { 543 /* The local *holds* a pointer; load that value to use as the base. */ 544 Inst* ld = ir_emit(l->f, block, IR_LOAD); 545 ld->loc = loc; 546 memset(&ops[0], 0, sizeof ops[0]); 547 ops[0].kind = OPK_REG; 548 ops[0].cls = RC_INT; 549 ops[0].type = m->type; 550 ops[0].v.reg = (Reg)r; 551 ld->opnds = dup_opt_ops(l, ops, 2); 552 ld->nopnds = 2; 553 ld->def = (Val)r; 554 ld->type = m->type; 555 memset(&ld->extra.mem, 0, sizeof ld->extra.mem); 556 ld->extra.mem.type = m->type; 557 ld->extra.mem.size = m->size ? m->size : 8u; 558 ld->extra.mem.align = m->align ? m->align : 8u; 559 } else { 560 /* The local *is* the storage; its frame address is the base. */ 561 Inst* ao = ir_emit(l->f, block, IR_ADDR_OF); 562 ao->loc = loc; 563 memset(&ops[0], 0, sizeof ops[0]); 564 ops[0].kind = OPK_REG; 565 ops[0].cls = RC_INT; 566 ops[0].type = m->type; 567 ops[0].v.reg = (Reg)r; 568 ao->opnds = dup_opt_ops(l, ops, 2); 569 ao->nopnds = 2; 570 ao->def = (Val)r; 571 ao->type = m->type; 572 } 573 remember_materialized_reg(l, local, CG_IR_MAT_BASE, (Reg)r, loc); 574 } 575 576 /* Emit `r = load <local home>` for a FRAME-storage local used as an 577 * OPK_INDIRECT index. Unlike a non-pointer base, an index always needs the 578 * local's value. On rv32, Toy indexes are i64 and therefore memory-backed; the 579 * address calculation only consumes the pointer-width low word. */ 580 static void materialize_frame_index(CgIrLower* l, u32 block, CGLocal local, 581 SrcLoc loc) { 582 OptLocalMap* m = local_map(l, local, loc); 583 if (m->storage.kind == CG_LOCAL_STORAGE_REG) return; 584 if (materialized_reg_exists(l, local, CG_IR_MAT_INDEX)) return; 585 KitCgTypeId idx_ty = pointer_sized_int_type(l); 586 PReg r = ir_alloc_preg(l->f, idx_ty, RC_INT); 587 OptOperand ops[2]; 588 Inst* ld = ir_emit(l->f, block, IR_LOAD); 589 ld->loc = loc; 590 memset(&ops[0], 0, sizeof ops[0]); 591 ops[0].kind = OPK_REG; 592 ops[0].cls = RC_INT; 593 ops[0].type = idx_ty; 594 ops[0].v.reg = (Reg)r; 595 ops[1] = opt_frame_operand_as(m, idx_ty); 596 ld->opnds = dup_opt_ops(l, ops, 2); 597 ld->nopnds = 2; 598 ld->def = (Val)r; 599 ld->type = idx_ty; 600 memset(&ld->extra.mem, 0, sizeof ld->extra.mem); 601 ld->extra.mem.type = idx_ty; 602 ld->extra.mem.size = l->c->target.ptr_size; 603 ld->extra.mem.align = m->align && m->align < l->c->target.ptr_size 604 ? m->align 605 : l->c->target.ptr_size; 606 remember_materialized_reg(l, local, CG_IR_MAT_INDEX, (Reg)r, loc); 607 } 608 609 /* Scan the CG instruction's operands for OPK_INDIRECT bases/indices that are 610 * FRAME-storage locals and pre-load them (see materialize_frame_base). */ 611 static void prematerialize_indirect_bases(CgIrLower* l, const CgIrInst* in, 612 u32 block) { 613 l->nmat = 0; 614 for (u32 i = 0; i < in->nopnds; ++i) { 615 const Operand* op = &in->opnds[i]; 616 if (op->kind != OPK_INDIRECT) continue; 617 materialize_frame_base(l, block, op->v.ind.base, in->loc); 618 if (op->v.ind.index != CG_LOCAL_NONE) 619 materialize_frame_index(l, block, op->v.ind.index, in->loc); 620 } 621 } 622 623 static OptOperand lower_operand_value(CgIrLower* l, const Operand* in, 624 SrcLoc loc); 625 626 static OptOperand lower_operand_addr(CgIrLower* l, const Operand* in, 627 SrcLoc loc) { 628 OptOperand out; 629 memset(&out, 0, sizeof out); 630 if (!in) return out; 631 out.type = in->type; 632 switch ((OpKind)in->kind) { 633 case OPK_LOCAL: { 634 OptLocalMap* m = local_map(l, in->v.local, loc); 635 if (m->home_slot == FRAME_SLOT_NONE) { 636 const CgIrLocal* src = &l->src->locals[in->v.local - 1u]; 637 NativeFrameSlotDesc fsd = 638 local_slot_desc(src, src->is_param ? FS_PARAM : FS_LOCAL); 639 m->home_slot = ir_frame_slot_new(l->f, &fsd); 640 m->address_taken = 1; 641 if (in->v.local - 1u < l->f->nlocals) { 642 l->f->locals[in->v.local - 1u].address_taken = 1; 643 l->f->locals[in->v.local - 1u].home_slot = m->home_slot; 644 } 645 } 646 return opt_frame_operand(m); 647 } 648 case OPK_GLOBAL: 649 out.kind = OPK_GLOBAL; 650 out.cls = RC_INT; 651 out.v.global.sym = in->v.global.sym; 652 out.v.global.addend = in->v.global.addend; 653 return out; 654 case OPK_INDIRECT: { 655 out.kind = OPK_INDIRECT; 656 out.cls = RC_INT; 657 out.v.ind.base = 658 resolve_materialized_reg(l, in->v.ind.base, CG_IR_MAT_BASE, loc); 659 out.v.ind.index = REG_NONE; 660 if (in->v.ind.index != CG_LOCAL_NONE) 661 out.v.ind.index = 662 resolve_materialized_reg(l, in->v.ind.index, CG_IR_MAT_INDEX, loc); 663 out.v.ind.log2_scale = in->v.ind.log2_scale; 664 out.v.ind.ofs = in->v.ind.ofs; 665 return out; 666 } 667 case OPK_IMM: 668 default: 669 lower_panic(l, loc, "operand is not addressable"); 670 } 671 } 672 673 static OptOperand lower_operand_value(CgIrLower* l, const Operand* in, 674 SrcLoc loc) { 675 OptOperand out; 676 memset(&out, 0, sizeof out); 677 if (!in) return out; 678 out.type = in->type; 679 switch ((OpKind)in->kind) { 680 case OPK_IMM: 681 out.kind = OPK_IMM; 682 out.cls = RC_INT; 683 out.v.imm = in->v.imm; 684 return out; 685 case OPK_LOCAL: { 686 OptLocalMap* m = local_map(l, in->v.local, loc); 687 return m->address_taken ? opt_frame_operand(m) : opt_reg_operand(m); 688 } 689 case OPK_GLOBAL: 690 out.kind = OPK_GLOBAL; 691 out.cls = RC_INT; 692 out.v.global.sym = in->v.global.sym; 693 out.v.global.addend = in->v.global.addend; 694 return out; 695 case OPK_INDIRECT: 696 return lower_operand_addr(l, in, loc); 697 default: 698 lower_panic(l, loc, "bad operand kind"); 699 } 700 } 701 702 static void set_inst_def(Inst* out, const OptOperand* op) { 703 if (op && op->kind == OPK_REG) { 704 out->def = (Val)op->v.reg; 705 out->type = op->type; 706 } 707 } 708 709 /* Lower `n` value operands. When `defs_first` is set, opnds[0] is the 710 * instruction's destination (def); otherwise all operands are uses. Branch 711 * terminators (CMP_BRANCH, SWITCH, INDIRECT_BRANCH) read their first operand 712 * and define nothing, so they must pass defs_first=0 -- otherwise dead-def 713 * elimination treats the branch as a redefinition of the tested value and 714 * removes the real producer. */ 715 static void lower_value_ops_ex(CgIrLower* l, Inst* out, const CgIrInst* in, 716 u32 n, int defs_first) { 717 OptOperand tmp[5]; 718 if (n > 5u) lower_panic(l, in->loc, "too many operands"); 719 for (u32 i = 0; i < n; ++i) 720 tmp[i] = lower_operand_value(l, &in->opnds[i], in->loc); 721 out->opnds = dup_opt_ops(l, tmp, n); 722 out->nopnds = n; 723 if (n && defs_first) set_inst_def(out, &out->opnds[0]); 724 } 725 726 static void lower_value_ops(CgIrLower* l, Inst* out, const CgIrInst* in, 727 u32 n) { 728 lower_value_ops_ex(l, out, in, n, 1); 729 } 730 731 static void lower_use_ops(CgIrLower* l, Inst* out, const CgIrInst* in, u32 n) { 732 lower_value_ops_ex(l, out, in, n, 0); 733 } 734 735 static void lower_addr_value_ops(CgIrLower* l, Inst* out, const CgIrInst* in, 736 u32 naddr, u32 nvalue) { 737 OptOperand tmp[5]; 738 u32 n = naddr + nvalue; 739 if (n > 5u) lower_panic(l, in->loc, "too many operands"); 740 for (u32 i = 0; i < naddr; ++i) 741 tmp[i] = lower_operand_addr(l, &in->opnds[i], in->loc); 742 for (u32 i = 0; i < nvalue; ++i) 743 tmp[naddr + i] = lower_operand_value(l, &in->opnds[naddr + i], in->loc); 744 out->opnds = dup_opt_ops(l, tmp, n); 745 out->nopnds = n; 746 } 747 748 static OptCGABIValue abi_value_for_local(CgIrLower* l, CGLocal local, 749 SrcLoc loc) { 750 OptCGABIValue out; 751 memset(&out, 0, sizeof out); 752 OptLocalMap* m = local_map(l, local, loc); 753 out.type = m->type; 754 out.storage = m->address_taken ? opt_frame_operand(m) : opt_reg_operand(m); 755 return out; 756 } 757 758 static void lower_call(CgIrLower* l, Inst* out, const CgIrInst* in) { 759 const CgIrCallAux* src = (const CgIrCallAux*)in->extra.aux; 760 IRCallAux* aux = arena_znew(l->f->arena, IRCallAux); 761 memset(aux, 0, sizeof *aux); 762 if (!src) { 763 out->extra.aux = aux; 764 return; 765 } 766 aux->desc.fn_type = src->desc.fn_type; 767 aux->desc.callee = lower_operand_value(l, &src->desc.callee, in->loc); 768 aux->desc.nargs = src->desc.nargs; 769 aux->desc.flags = src->desc.flags; 770 aux->desc.tail_policy = src->desc.tail_policy; 771 aux->desc.inline_policy = src->desc.inline_policy; 772 /* Cache the function ABI on the desc so downstream passes (e.g. the 773 * regalloc hint pass that steers call-arg sources toward their ABI dest 774 * register) don't have to re-derive it per call. abi_cg_func_info is the 775 * canonical lookup. */ 776 if (l->f->c && l->f->c->abi) 777 aux->desc.abi = abi_cg_func_info(l->f->c->abi, src->desc.fn_type); 778 if (src->desc.nargs) { 779 aux->desc.args = arena_zarray(l->f->arena, OptCGABIValue, src->desc.nargs); 780 for (u32 i = 0; i < src->desc.nargs; ++i) 781 aux->desc.args[i] = abi_value_for_local(l, src->desc.args[i], in->loc); 782 } 783 if (src->desc.result != CG_LOCAL_NONE) { 784 aux->desc.ret = abi_value_for_local(l, src->desc.result, in->loc); 785 set_inst_def(out, &aux->desc.ret.storage); 786 } 787 out->type = src->desc.fn_type; 788 out->extra.aux = aux; 789 } 790 791 static void lower_ret(CgIrLower* l, Inst* out, const CgIrInst* in) { 792 const CgIrRetAux* src = (const CgIrRetAux*)in->extra.aux; 793 IRRetAux* aux = arena_znew(l->f->arena, IRRetAux); 794 if (src && src->present) { 795 aux->present = 1; 796 aux->val = abi_value_for_local(l, src->value, in->loc); 797 } 798 out->extra.aux = aux; 799 } 800 801 static void lower_intrinsic(CgIrLower* l, Inst* out, const CgIrInst* in) { 802 const CgIrIntrinsicAux* src = (const CgIrIntrinsicAux*)in->extra.aux; 803 IRIntrinAux* aux = arena_znew(l->f->arena, IRIntrinAux); 804 if (src) { 805 aux->kind = src->kind; 806 aux->ndst = src->ndst; 807 aux->narg = src->narg; 808 aux->dsts = 809 src->ndst ? arena_array(l->f->arena, OptOperand, src->ndst) : NULL; 810 aux->args = 811 src->narg ? arena_array(l->f->arena, OptOperand, src->narg) : NULL; 812 for (u32 i = 0; i < src->ndst; ++i) 813 aux->dsts[i] = lower_operand_value(l, &src->dsts[i], in->loc); 814 for (u32 i = 0; i < src->narg; ++i) 815 aux->args[i] = lower_operand_value(l, &src->args[i], in->loc); 816 if (src->ndst) { 817 u32 ndefs = 0; 818 for (u32 i = 0; i < src->ndst; ++i) 819 if (aux->dsts[i].kind == OPK_REG) ++ndefs; 820 if (ndefs) { 821 u32 d = 0; 822 out->ndefs = ndefs; 823 out->defs = arena_array(l->f->arena, Val, ndefs); 824 for (u32 i = 0; i < src->ndst; ++i) 825 if (aux->dsts[i].kind == OPK_REG) 826 out->defs[d++] = aux->dsts[i].v.reg; 827 out->def = out->defs[0]; 828 } 829 out->type = aux->dsts[0].type; 830 } 831 } 832 out->extra.aux = aux; 833 } 834 835 static void lower_asm(CgIrLower* l, Inst* out, const CgIrInst* in) { 836 const CgIrAsmAux* src = (const CgIrAsmAux*)in->extra.aux; 837 IRAsmAux* aux = arena_znew(l->f->arena, IRAsmAux); 838 if (src) { 839 aux->tmpl = src->tmpl; 840 aux->outs = src->outs; 841 aux->ins = src->ins; 842 aux->clobbers = src->clobbers; 843 aux->nout = src->nout; 844 aux->nin = src->nin; 845 aux->nclob = src->nclob; 846 aux->clobber_abi_sets = src->clobber_abi_sets; 847 aux->out_ops = 848 src->nout ? arena_array(l->f->arena, OptOperand, src->nout) : NULL; 849 aux->in_ops = 850 src->nin ? arena_array(l->f->arena, OptOperand, src->nin) : NULL; 851 for (u32 i = 0; i < src->nout; ++i) 852 aux->out_ops[i] = lower_operand_value(l, &src->out_ops[i], in->loc); 853 for (u32 i = 0; i < src->nin; ++i) 854 aux->in_ops[i] = lower_operand_value(l, &src->in_ops[i], in->loc); 855 if (src->nout) { 856 u32 ndefs = 0; 857 for (u32 i = 0; i < src->nout; ++i) 858 if (aux->out_ops[i].kind == OPK_REG) ++ndefs; 859 if (ndefs) { 860 u32 d = 0; 861 out->ndefs = ndefs; 862 out->defs = arena_array(l->f->arena, Val, ndefs); 863 for (u32 i = 0; i < src->nout; ++i) 864 if (aux->out_ops[i].kind == OPK_REG) 865 out->defs[d++] = aux->out_ops[i].v.reg; 866 out->def = out->defs[0]; 867 } 868 out->type = aux->out_ops[0].type; 869 } 870 } 871 out->extra.aux = aux; 872 } 873 874 static void lower_one_inst(CgIrLower* l, u32 idx) { 875 const CgIrInst* in = &l->src->insts[idx]; 876 u32 block = l->inst_block[idx]; 877 Inst* out = NULL; 878 IROp op = IR_NOP; 879 switch ((CgIrOp)in->op) { 880 case CG_IR_LABEL: 881 return; 882 case CG_IR_LOAD_IMM: 883 op = IR_LOAD_IMM; 884 break; 885 case CG_IR_LOAD_CONST: 886 op = IR_LOAD_CONST; 887 break; 888 case CG_IR_COPY: 889 op = IR_COPY; 890 break; 891 case CG_IR_LOAD: 892 op = IR_LOAD; 893 break; 894 case CG_IR_STORE: 895 op = IR_STORE; 896 break; 897 case CG_IR_ADDR_OF: 898 op = IR_ADDR_OF; 899 break; 900 case CG_IR_TLS_ADDR_OF: 901 op = IR_TLS_ADDR_OF; 902 break; 903 case CG_IR_AGG_COPY: 904 op = IR_AGG_COPY; 905 break; 906 case CG_IR_AGG_SET: 907 op = IR_AGG_SET; 908 break; 909 case CG_IR_BITFIELD_LOAD: 910 op = IR_BITFIELD_LOAD; 911 break; 912 case CG_IR_BITFIELD_STORE: 913 op = IR_BITFIELD_STORE; 914 break; 915 case CG_IR_BINOP: 916 op = IR_BINOP; 917 break; 918 case CG_IR_UNOP: 919 op = IR_UNOP; 920 break; 921 case CG_IR_CMP: 922 op = IR_CMP; 923 break; 924 case CG_IR_CONVERT: 925 op = IR_CONVERT; 926 break; 927 case CG_IR_CALL: 928 op = IR_CALL; 929 break; 930 case CG_IR_RET: 931 op = IR_RET; 932 break; 933 case CG_IR_UNREACHABLE: 934 op = IR_UNREACHABLE; 935 break; 936 case CG_IR_BR: 937 op = IR_BR; 938 break; 939 case CG_IR_CMP_BRANCH: 940 op = IR_CMP_BRANCH; 941 break; 942 case CG_IR_SWITCH: 943 op = IR_SWITCH; 944 break; 945 case CG_IR_INDIRECT_BRANCH: 946 op = IR_INDIRECT_BRANCH; 947 break; 948 case CG_IR_LOAD_LABEL_ADDR: 949 op = IR_LOAD_LABEL_ADDR; 950 break; 951 case CG_IR_LOCAL_STATIC_DATA_BEGIN: 952 op = IR_LOCAL_STATIC_DATA_BEGIN; 953 break; 954 case CG_IR_LOCAL_STATIC_DATA_WRITE: 955 op = IR_LOCAL_STATIC_DATA_WRITE; 956 break; 957 case CG_IR_LOCAL_STATIC_DATA_LABEL_ADDR: 958 op = IR_LOCAL_STATIC_DATA_LABEL_ADDR; 959 break; 960 case CG_IR_LOCAL_STATIC_DATA_END: 961 op = IR_LOCAL_STATIC_DATA_END; 962 break; 963 case CG_IR_SCOPE_BEGIN: 964 op = IR_SCOPE_BEGIN; 965 break; 966 case CG_IR_SCOPE_END: 967 op = IR_SCOPE_END; 968 break; 969 case CG_IR_BREAK_TO: 970 op = IR_BREAK_TO; 971 break; 972 case CG_IR_CONTINUE_TO: 973 op = IR_CONTINUE_TO; 974 break; 975 case CG_IR_ALLOCA: 976 op = IR_ALLOCA; 977 break; 978 case CG_IR_VA_START: 979 op = IR_VA_START; 980 break; 981 case CG_IR_VA_ARG: 982 op = IR_VA_ARG; 983 break; 984 case CG_IR_VA_END: 985 op = IR_VA_END; 986 break; 987 case CG_IR_VA_COPY: 988 op = IR_VA_COPY; 989 break; 990 case CG_IR_ATOMIC_LOAD: 991 op = IR_ATOMIC_LOAD; 992 break; 993 case CG_IR_ATOMIC_STORE: 994 op = IR_ATOMIC_STORE; 995 break; 996 case CG_IR_ATOMIC_RMW: 997 op = IR_ATOMIC_RMW; 998 break; 999 case CG_IR_ATOMIC_CAS: 1000 op = IR_ATOMIC_CAS; 1001 break; 1002 case CG_IR_FENCE: 1003 op = IR_FENCE; 1004 break; 1005 case CG_IR_INTRINSIC: 1006 op = IR_INTRINSIC; 1007 break; 1008 case CG_IR_ASM_BLOCK: 1009 op = IR_ASM_BLOCK; 1010 break; 1011 default: 1012 op = IR_NOP; 1013 break; 1014 } 1015 /* Pre-load any FRAME-resident pointer locals used as indirect bases so the 1016 * load dominates this instruction (which is emitted next). */ 1017 prematerialize_indirect_bases(l, in, block); 1018 out = ir_emit(l->f, block, op); 1019 out->loc = in->loc; 1020 switch ((CgIrOp)in->op) { 1021 case CG_IR_LOAD_IMM: 1022 lower_value_ops(l, out, in, 1); 1023 out->extra.imm = in->extra.imm; 1024 break; 1025 case CG_IR_LOAD_CONST: 1026 lower_value_ops(l, out, in, 1); 1027 out->extra.cbytes = in->extra.cbytes; 1028 break; 1029 case CG_IR_COPY: 1030 case CG_IR_BINOP: 1031 case CG_IR_UNOP: 1032 case CG_IR_CMP: 1033 case CG_IR_CONVERT: 1034 case CG_IR_ALLOCA: 1035 case CG_IR_VA_ARG: 1036 lower_value_ops(l, out, in, in->nopnds); 1037 out->extra.imm = in->extra.imm; 1038 break; 1039 case CG_IR_LOAD: 1040 case CG_IR_BITFIELD_LOAD: { 1041 OptOperand ops[2]; 1042 ops[0] = lower_operand_value(l, &in->opnds[0], in->loc); 1043 ops[1] = lower_operand_addr(l, &in->opnds[1], in->loc); 1044 out->opnds = dup_opt_ops(l, ops, 2); 1045 out->nopnds = 2; 1046 set_inst_def(out, &out->opnds[0]); 1047 if ((CgIrOp)in->op == CG_IR_LOAD) 1048 out->extra.mem = in->extra.mem; 1049 else 1050 out->extra.aux = in->extra.aux; 1051 break; 1052 } 1053 case CG_IR_ATOMIC_LOAD: { 1054 OptOperand ops[2]; 1055 ops[0] = lower_operand_value(l, &in->opnds[0], in->loc); 1056 ops[1] = lower_operand_value(l, &in->opnds[1], in->loc); 1057 out->opnds = dup_opt_ops(l, ops, 2); 1058 out->nopnds = 2; 1059 set_inst_def(out, &out->opnds[0]); 1060 out->extra.aux = in->extra.aux; 1061 break; 1062 } 1063 case CG_IR_STORE: 1064 case CG_IR_BITFIELD_STORE: 1065 lower_addr_value_ops(l, out, in, 1, in->nopnds - 1u); 1066 if ((CgIrOp)in->op == CG_IR_STORE) 1067 out->extra.mem = in->extra.mem; 1068 else 1069 out->extra.aux = in->extra.aux; 1070 break; 1071 case CG_IR_AGG_COPY: 1072 case CG_IR_AGG_SET: 1073 /* Aggregate ops take their operands as pointer *values* to the aggregates 1074 * (the emitter derefs them via pointer_addr_from_operand). Lowering them 1075 * as values keeps a pointer local in its register instead of forcing a 1076 * frame home — the home would otherwise break the local's other uses as 1077 * an indirect base, whose lowering reads storage.v.reg. */ 1078 lower_use_ops(l, out, in, in->nopnds); 1079 out->extra.aux = in->extra.aux; 1080 break; 1081 case CG_IR_ATOMIC_STORE: { 1082 OptOperand ops[2]; 1083 ops[0] = lower_operand_value(l, &in->opnds[0], in->loc); 1084 ops[1] = lower_operand_value(l, &in->opnds[1], in->loc); 1085 out->opnds = dup_opt_ops(l, ops, 2); 1086 out->nopnds = 2; 1087 out->extra.aux = in->extra.aux; 1088 break; 1089 } 1090 case CG_IR_ADDR_OF: { 1091 OptOperand ops[2]; 1092 ops[0] = lower_operand_value(l, &in->opnds[0], in->loc); 1093 ops[1] = lower_operand_addr(l, &in->opnds[1], in->loc); 1094 out->opnds = dup_opt_ops(l, ops, 2); 1095 out->nopnds = 2; 1096 set_inst_def(out, &out->opnds[0]); 1097 break; 1098 } 1099 case CG_IR_TLS_ADDR_OF: 1100 lower_value_ops(l, out, in, 1); 1101 out->extra.aux = in->extra.aux; 1102 break; 1103 case CG_IR_CALL: 1104 lower_call(l, out, in); 1105 break; 1106 case CG_IR_RET: 1107 lower_ret(l, out, in); 1108 l->f->blocks[block].nsucc = 0; 1109 break; 1110 case CG_IR_UNREACHABLE: 1111 /* Terminator with no successors: control does not leave this block. */ 1112 l->f->blocks[block].nsucc = 0; 1113 break; 1114 case CG_IR_BR: 1115 out->extra.imm = block_for_label(l, (Label)in->extra.imm, in->loc); 1116 set_succ1(l, block, (u32)out->extra.imm); 1117 break; 1118 case CG_IR_CMP_BRANCH: { 1119 CgIrCmpBranchAux* aux = (CgIrCmpBranchAux*)in->extra.aux; 1120 lower_use_ops(l, out, in, 2); 1121 out->extra.imm = aux ? aux->op : CMP_NE; 1122 ir_block_set_nsucc(l->f, block, 2); 1123 l->f->blocks[block].succ[0] = 1124 aux ? block_for_label(l, aux->target, in->loc) : UINT32_MAX; 1125 l->f->blocks[block].succ[1] = fallthrough_block(l, idx); 1126 break; 1127 } 1128 case CG_IR_SWITCH: { 1129 CgIrSwitchAux* src = (CgIrSwitchAux*)in->extra.aux; 1130 IRSwitchAux* aux = arena_znew(l->f->arena, IRSwitchAux); 1131 lower_use_ops(l, out, in, 1); 1132 if (src) { 1133 aux->selector_type = src->selector_type; 1134 aux->ncases = src->ncases; 1135 aux->hint = src->hint; 1136 aux->has_default = src->default_label != LABEL_NONE; 1137 aux->default_block = 1138 aux->has_default ? block_for_label(l, src->default_label, in->loc) 1139 : fallthrough_block(l, idx); 1140 if (src->ncases) { 1141 aux->cases = arena_array(l->f->arena, IRSwitchAuxCase, src->ncases); 1142 for (u32 i = 0; i < src->ncases; ++i) { 1143 aux->cases[i].value = src->cases[i].value; 1144 aux->cases[i].block = 1145 block_for_label(l, src->cases[i].label, in->loc); 1146 } 1147 } 1148 ir_block_set_nsucc(l->f, block, src->ncases + 1u); 1149 for (u32 i = 0; i < src->ncases; ++i) 1150 l->f->blocks[block].succ[i] = aux->cases[i].block; 1151 l->f->blocks[block].succ[src->ncases] = aux->default_block; 1152 } 1153 out->extra.aux = aux; 1154 break; 1155 } 1156 case CG_IR_INDIRECT_BRANCH: { 1157 CgIrIndirectAux* src = (CgIrIndirectAux*)in->extra.aux; 1158 IRIndirectAux* aux = arena_znew(l->f->arena, IRIndirectAux); 1159 lower_use_ops(l, out, in, 1); 1160 if (src && src->ntargets) { 1161 aux->ntargets = src->ntargets; 1162 aux->targets = arena_array(l->f->arena, u32, src->ntargets); 1163 ir_block_set_nsucc(l->f, block, src->ntargets); 1164 for (u32 i = 0; i < src->ntargets; ++i) { 1165 aux->targets[i] = block_for_label(l, src->targets[i], in->loc); 1166 l->f->blocks[block].succ[i] = aux->targets[i]; 1167 } 1168 } 1169 out->extra.aux = aux; 1170 break; 1171 } 1172 case CG_IR_LOAD_LABEL_ADDR: 1173 lower_value_ops(l, out, in, 1); 1174 out->extra.imm = block_for_label(l, (Label)in->extra.imm, in->loc); 1175 break; 1176 case CG_IR_LOCAL_STATIC_DATA_BEGIN: 1177 out->extra.aux = in->extra.aux; 1178 break; 1179 case CG_IR_LOCAL_STATIC_DATA_WRITE: 1180 out->extra.aux = in->extra.aux; 1181 break; 1182 case CG_IR_LOCAL_STATIC_DATA_LABEL_ADDR: { 1183 CgIrLocalStaticLabelAux* src = (CgIrLocalStaticLabelAux*)in->extra.aux; 1184 CgIrLocalStaticLabelAux* aux = 1185 arena_znew(l->f->arena, CgIrLocalStaticLabelAux); 1186 if (src) { 1187 *aux = *src; 1188 aux->target = (Label)block_for_label(l, src->target, in->loc); 1189 } 1190 out->extra.aux = aux; 1191 break; 1192 } 1193 case CG_IR_LOCAL_STATIC_DATA_END: 1194 break; 1195 case CG_IR_SCOPE_BEGIN: { 1196 CgIrScopeAux* src = (CgIrScopeAux*)in->extra.aux; 1197 IRScopeAux* aux = arena_znew(l->f->arena, IRScopeAux); 1198 if (src) { 1199 aux->scope_id = src->scope; 1200 aux->desc.kind = src->desc.kind; 1201 aux->desc.break_label = src->desc.break_label; 1202 aux->desc.continue_label = src->desc.continue_label; 1203 aux->desc.result_type = src->desc.result_type; 1204 } 1205 out->extra.aux = aux; 1206 break; 1207 } 1208 case CG_IR_SCOPE_END: 1209 case CG_IR_BREAK_TO: 1210 case CG_IR_CONTINUE_TO: 1211 out->extra.imm = in->extra.imm; 1212 break; 1213 case CG_IR_VA_START: 1214 case CG_IR_VA_END: 1215 /* The operand is a pointer value (the address of the va_list object), 1216 * produced by an earlier ADDR_OF. Lower as a value so it can live in a 1217 * register; the backend va hook consumes the pointer. */ 1218 lower_use_ops(l, out, in, 1); 1219 break; 1220 case CG_IR_VA_COPY: 1221 lower_use_ops(l, out, in, 2); 1222 break; 1223 case CG_IR_ATOMIC_RMW: 1224 lower_value_ops(l, out, in, 3); 1225 out->extra.aux = in->extra.aux; 1226 break; 1227 case CG_IR_ATOMIC_CAS: 1228 lower_value_ops(l, out, in, 5); 1229 out->ndefs = 2; 1230 out->defs = arena_array(l->f->arena, Val, 2); 1231 out->defs[0] = out->opnds[0].v.reg; 1232 out->defs[1] = out->opnds[1].v.reg; 1233 out->def = out->defs[0]; 1234 out->type = out->opnds[0].type; 1235 { 1236 const CgIrAtomicAux* src = (const CgIrAtomicAux*)in->extra.aux; 1237 IRCasAux* aux = arena_znew(l->f->arena, IRCasAux); 1238 if (src) { 1239 aux->mem = src->mem; 1240 aux->success = src->order; 1241 aux->failure = src->failure; 1242 } 1243 out->extra.aux = aux; 1244 } 1245 break; 1246 case CG_IR_FENCE: 1247 out->extra.imm = in->extra.imm; 1248 break; 1249 case CG_IR_INTRINSIC: 1250 lower_intrinsic(l, out, in); 1251 break; 1252 case CG_IR_ASM_BLOCK: 1253 lower_asm(l, out, in); 1254 break; 1255 default: 1256 out->extra.aux = in->extra.aux; 1257 break; 1258 } 1259 } 1260 1261 static void add_fallthrough_succs(CgIrLower* l) { 1262 for (u32 b = 0; b < l->f->nblocks; ++b) { 1263 Block* bl = &l->f->blocks[b]; 1264 if (bl->nsucc) continue; 1265 if (bl->ninsts) { 1266 Inst* last = &bl->insts[bl->ninsts - 1u]; 1267 switch ((IROp)last->op) { 1268 case IR_BR: 1269 case IR_CONDBR: 1270 case IR_CMP_BRANCH: 1271 case IR_SWITCH: 1272 case IR_INDIRECT_BRANCH: 1273 case IR_RET: 1274 case IR_UNREACHABLE: 1275 case IR_BREAK_TO: 1276 case IR_CONTINUE_TO: 1277 continue; 1278 case IR_INTRINSIC: { 1279 IRIntrinAux* aux = (IRIntrinAux*)last->extra.aux; 1280 if (aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP)) 1281 continue; 1282 break; 1283 } 1284 default: 1285 break; 1286 } 1287 } 1288 for (u32 i = 0; i + 1u < l->f->emit_order_n; ++i) { 1289 if (l->f->emit_order[i] == b) { 1290 set_succ1(l, b, l->f->emit_order[i + 1u]); 1291 break; 1292 } 1293 } 1294 } 1295 } 1296 1297 Func* opt_func_from_cg_ir(Compiler* c, const CgIrFunc* src) { 1298 if (!c || !src) return NULL; 1299 OptCGFuncDesc desc = lower_func_desc(c->tu, &src->desc); 1300 Func* f = ir_func_new(c, &desc); 1301 CgIrLower l; 1302 memset(&l, 0, sizeof l); 1303 l.c = c; 1304 l.src = src; 1305 l.f = f; 1306 l.nlabels = label_id_max(src); 1307 u32* label_place = 1308 arena_array(f->arena, u32, l.nlabels ? l.nlabels + 1u : 1u); 1309 for (u32 i = 0; i <= l.nlabels; ++i) label_place[i] = UINT32_MAX; 1310 l.leader = arena_zarray(f->arena, u8, src->ninsts + 1u); 1311 lower_locals(&l); 1312 lower_params(&l); 1313 mark_leaders(&l, label_place); 1314 make_blocks(&l, label_place); 1315 emit_param_decls(&l); 1316 for (u32 i = 0; i < src->ninsts; ++i) lower_one_inst(&l, i); 1317 add_fallthrough_succs(&l); 1318 opt_build_cfg(f); 1319 return f; 1320 }