lower.c (18046B)
1 /* Func (post opt_run_o1_interp) -> InterpFunc bytecode loader. 2 * 3 * Walks blocks in f->emit_order, emits one fixed-width InterpInsn per non-no-op 4 * Inst, bump-allocates frame-slot offsets, resolves branch targets from block 5 * ids to code pcs, and collects the global symbols the function references (so 6 * their names survive past the obj's lifetime). Unsupported ops are lowered to 7 * IOP_TRAP and the function is flagged rejected with a reason — the engine then 8 * reports a clean "interp: <op> not supported" rather than miscompiling. */ 9 10 #include <string.h> 11 12 #include "abi/abi.h" 13 /* cg/ir.h must precede any header that pulls opt/ir.h: opt/ir.h aliases 14 * `Operand`/`CGCallDesc`/... to their Opt* forms via macros, so the semantic 15 * cg structs (CgIrLocalStatic*Aux, reused verbatim as the opt aux pointers) 16 * have to be parsed first. This mirrors opt/opt.h's include order. */ 17 #include "cg/cgtarget.h" 18 #include "cg/ir.h" 19 #include "cg/type.h" 20 #include "core/arena.h" 21 #include "core/core.h" 22 #include "core/pool.h" 23 #include "core/slice.h" 24 #include "interp/interp.h" 25 #include "obj/obj.h" 26 #include "opt/ir.h" 27 28 typedef struct Lower { 29 InterpProgram* p; 30 Compiler* c; 31 Func* f; 32 const ObjBuilder* obj; 33 InterpFunc* fn; 34 const char* reject; 35 } Lower; 36 37 static void type_wf(Compiler* c, KitCgTypeId t, u16* w, u8* fp) { 38 ABITypeInfo ti = abi_cg_type_info(c->abi, t); 39 *w = (u16)ti.size; 40 *fp = (ti.scalar_kind == ABI_SC_FLOAT) ? 1u : 0u; 41 } 42 43 static int is_agg_or_large(Compiler* c, KitCgTypeId t) { 44 return cg_type_is_aggregate(c, t) || abi_cg_sizeof(c->abi, t) > 8u; 45 } 46 47 /* Does this IROp produce a bytecode record? */ 48 static int inst_emits(u16 op) { 49 switch ((IROp)op) { 50 case IR_NOP: 51 case IR_CONST_I: 52 case IR_CONST_BYTES: 53 case IR_PHI: 54 case IR_PARAM_DECL: 55 case IR_SCOPE_BEGIN: 56 case IR_SCOPE_END: 57 return 0; 58 default: 59 return 1; 60 } 61 } 62 63 /* Map an IROp to its InterpOp, choosing aggregate-specialized handlers. On an 64 * unsupported op, records a reject reason and returns IOP_TRAP. */ 65 static InterpOp map_op(Lower* lw, const Inst* in) { 66 Compiler* c = lw->c; 67 switch ((IROp)in->op) { 68 case IR_LOAD_IMM: 69 return IOP_LOAD_IMM; 70 case IR_LOAD_CONST: 71 return IOP_LOAD_CONST; 72 case IR_COPY: 73 return is_agg_or_large(c, in->opnds[0].type) ? IOP_COPY_AGG : IOP_COPY; 74 case IR_LOAD: 75 return is_agg_or_large(c, in->opnds[0].type) ? IOP_LOAD_AGG : IOP_LOAD; 76 case IR_STORE: 77 return is_agg_or_large(c, in->opnds[1].type) ? IOP_STORE_AGG : IOP_STORE; 78 case IR_ADDR_OF: 79 return IOP_ADDR_OF; 80 case IR_TLS_ADDR_OF: 81 return IOP_TLS_ADDR; 82 case IR_AGG_COPY: 83 return IOP_AGG_COPY; 84 case IR_AGG_SET: 85 return IOP_AGG_SET; 86 case IR_BITFIELD_LOAD: 87 return IOP_BITFIELD_LOAD; 88 case IR_BITFIELD_STORE: 89 return IOP_BITFIELD_STORE; 90 case IR_BINOP: 91 return IOP_BINOP; 92 case IR_UNOP: 93 return IOP_UNOP; 94 case IR_CMP: 95 return IOP_CMP; 96 case IR_CONVERT: 97 return IOP_CONVERT; 98 case IR_CALL: 99 return IOP_CALL; 100 case IR_BR: 101 case IR_BREAK_TO: 102 case IR_CONTINUE_TO: 103 return IOP_BR; 104 case IR_CONDBR: 105 return IOP_CONDBR; 106 case IR_CMP_BRANCH: 107 return IOP_CMP_BRANCH; 108 case IR_SWITCH: 109 return IOP_SWITCH; 110 case IR_INDIRECT_BRANCH: 111 return IOP_INDIRECT_BR; 112 case IR_LOAD_LABEL_ADDR: 113 return IOP_LOAD_LABEL_ADDR; 114 case IR_RET: { 115 IRRetAux* aux = (IRRetAux*)in->extra.aux; 116 return (aux && aux->present) ? IOP_RET : IOP_RET_VOID; 117 } 118 case IR_UNREACHABLE: 119 return IOP_UNREACHABLE; 120 case IR_ALLOCA: 121 return IOP_ALLOCA; 122 case IR_VA_START: 123 return IOP_VA_START; 124 case IR_VA_ARG: 125 return IOP_VA_ARG; 126 case IR_VA_END: 127 return IOP_VA_END; 128 case IR_VA_COPY: 129 return IOP_VA_COPY; 130 case IR_ATOMIC_LOAD: 131 return IOP_ATOMIC_LOAD; 132 case IR_ATOMIC_STORE: 133 return IOP_ATOMIC_STORE; 134 case IR_ATOMIC_RMW: 135 return IOP_ATOMIC_RMW; 136 case IR_ATOMIC_CAS: 137 return IOP_ATOMIC_CAS; 138 case IR_FENCE: 139 return IOP_FENCE; 140 case IR_INTRINSIC: 141 return IOP_INTRINSIC; 142 case IR_ASM_BLOCK: 143 lw->reject = "inline asm"; 144 return IOP_TRAP; 145 case IR_LOCAL_STATIC_DATA_BEGIN: 146 case IR_LOCAL_STATIC_DATA_WRITE: 147 case IR_LOCAL_STATIC_DATA_LABEL_ADDR: 148 case IR_LOCAL_STATIC_DATA_END: 149 /* Function-scope static data (incl. dense-switch jump tables and 150 * computed-goto label arrays) is materialized into an interp-private 151 * blob at lower time (lower_static_blobs) and the blob's symbol is 152 * resolved to that buffer; the stream ops themselves are pure markers. */ 153 return IOP_NOP; 154 default: 155 lw->reject = "unhandled IR op"; 156 return IOP_TRAP; 157 } 158 } 159 160 /* Record (deduped) a global symbol the function references, resolving its name 161 * from the obj while it is alive. */ 162 static void note_global(Lower* lw, ObjSymId sym) { 163 InterpFunc* fn = lw->fn; 164 const ObjSym* s; 165 u32 i; 166 if (sym == OBJ_SYM_NONE || !lw->obj) return; 167 for (i = 0; i < fn->nglobals; ++i) 168 if (fn->globals[i].sym == sym) return; 169 s = obj_symbol_get(lw->obj, sym); 170 if (!s) return; 171 fn->globals[fn->nglobals].sym = sym; 172 fn->globals[fn->nglobals].name = pool_slice(lw->c->global, s->name); 173 fn->globals[fn->nglobals].cached = NULL; 174 fn->globals[fn->nglobals].resolved = 0; 175 fn->nglobals++; 176 } 177 178 static void note_operand_globals(Lower* lw, const Operand* op) { 179 if (op->kind == OPK_GLOBAL) note_global(lw, op->v.global.sym); 180 } 181 182 static void note_inst_globals(Lower* lw, const Inst* in) { 183 u32 i; 184 for (i = 0; i < in->nopnds; ++i) note_operand_globals(lw, &in->opnds[i]); 185 if ((IROp)in->op == IR_CALL && in->extra.aux) { 186 IRCallAux* aux = (IRCallAux*)in->extra.aux; 187 note_operand_globals(lw, &aux->desc.callee); 188 } else if ((IROp)in->op == IR_TLS_ADDR_OF && in->extra.aux) { 189 IRTlsAux* aux = (IRTlsAux*)in->extra.aux; 190 note_global(lw, aux->sym); 191 } 192 } 193 194 /* Block placement: code pc per reachable block, in emit_order. */ 195 static u32 block_pc_of(InterpFunc* fn, u32 block) { 196 if (block >= fn->nblocks) return INTERP_PC_NONE; 197 return fn->block_pc[block]; 198 } 199 200 /* Point a (referenced) global symbol at an already-resolved host address, 201 * overriding lazy name resolution. Used for interp-private static blobs. */ 202 static void register_blob_global(InterpFunc* fn, ObjSymId sym, void* ptr) { 203 u32 i; 204 if (sym == OBJ_SYM_NONE) return; 205 for (i = 0; i < fn->nglobals; ++i) { 206 if (fn->globals[i].sym == sym) { 207 fn->globals[i].cached = ptr; 208 fn->globals[i].resolved = 1; 209 return; 210 } 211 } 212 fn->globals[fn->nglobals].sym = sym; 213 fn->globals[fn->nglobals].name.s = NULL; 214 fn->globals[fn->nglobals].name.len = 0; 215 fn->globals[fn->nglobals].cached = ptr; 216 fn->globals[fn->nglobals].resolved = 1; 217 fn->nglobals++; 218 } 219 220 /* Materialize every function-scope static data blob (regular static locals, 221 * dense-switch jump tables, computed-goto label arrays) into an interp-private, 222 * program-lifetime buffer and bind the blob symbol to it. 223 * 224 * WRITE records contribute their literal bytes (zero-fill when has_data==0). 225 * LABEL_ADDR records contribute a *bytecode pc* for the target block: the 226 * interpreter addresses code by InterpInsn index, so a table that the program 227 * later walks with IR_LOAD + IR_INDIRECT_BRANCH must hold interp pcs, not the 228 * native code-label addresses the parallel object/JIT path bakes in. (kit 229 * forbids data-symbol relocations inside function-local statics, so WRITE bytes 230 * + code-label pcs are the only contents possible.) The BEGIN/WRITE/LABEL_ADDR/ 231 * END stream ops lower to IOP_NOP — they are fully consumed here. 232 * 233 * Runs after block placement (block pcs known) and after the globals table is 234 * allocated; the per-blob sym is also referenced by an OPK_GLOBAL operand, so 235 * the table has room and Pass B's note_global dedups against the entry added 236 * here. */ 237 static void lower_static_blobs(Lower* lw) { 238 InterpFunc* fn = lw->fn; 239 Func* f = lw->f; 240 Arena* a = f->arena ? f->arena : lw->c->tu; 241 u32 b; 242 for (b = 0; b < f->nblocks; ++b) { 243 Block* bl = &f->blocks[b]; 244 u32 k; 245 for (k = 0; k < bl->ninsts; ++k) { 246 const Inst* in = &bl->insts[k]; 247 CgIrLocalStaticBeginAux* beg; 248 ObjSymId sym; 249 u32 total = 0u, off = 0u, j; 250 u8* buf; 251 if ((IROp)in->op != IR_LOCAL_STATIC_DATA_BEGIN) continue; 252 beg = (CgIrLocalStaticBeginAux*)in->extra.aux; 253 sym = beg ? beg->desc.sym : OBJ_SYM_NONE; 254 for (j = k + 1u; j < bl->ninsts; ++j) { 255 IROp op = (IROp)bl->insts[j].op; 256 if (op == IR_LOCAL_STATIC_DATA_END) break; 257 if (op == IR_LOCAL_STATIC_DATA_WRITE) { 258 CgIrLocalStaticWriteAux* w = 259 (CgIrLocalStaticWriteAux*)bl->insts[j].extra.aux; 260 total += w ? (u32)w->len : 0u; 261 } else if (op == IR_LOCAL_STATIC_DATA_LABEL_ADDR) { 262 CgIrLocalStaticLabelAux* la = 263 (CgIrLocalStaticLabelAux*)bl->insts[j].extra.aux; 264 total += la ? la->width : 0u; 265 } 266 } 267 buf = total ? arena_array(a, u8, total) : NULL; 268 for (j = k + 1u; j < bl->ninsts; ++j) { 269 IROp op = (IROp)bl->insts[j].op; 270 if (op == IR_LOCAL_STATIC_DATA_END) break; 271 if (op == IR_LOCAL_STATIC_DATA_WRITE) { 272 CgIrLocalStaticWriteAux* w = 273 (CgIrLocalStaticWriteAux*)bl->insts[j].extra.aux; 274 u32 len = w ? (u32)w->len : 0u; 275 if (len) { 276 if (w->has_data && w->data) 277 memcpy(buf + off, w->data, len); 278 else 279 memset(buf + off, 0, len); 280 } 281 off += len; 282 } else if (op == IR_LOCAL_STATIC_DATA_LABEL_ADDR) { 283 CgIrLocalStaticLabelAux* la = 284 (CgIrLocalStaticLabelAux*)bl->insts[j].extra.aux; 285 u32 width = la ? la->width : 0u; 286 u64 val = la ? (u64)block_pc_of(fn, (u32)la->target) : 0u; 287 u32 bi; 288 for (bi = 0u; bi < width && bi < 8u; ++bi) 289 buf[off + bi] = (u8)(val >> (bi * 8u)); 290 for (bi = 8u; bi < width; ++bi) buf[off + bi] = 0u; 291 off += width; 292 } 293 } 294 register_blob_global(fn, sym, buf); 295 } 296 } 297 } 298 299 InterpFunc* interp_lower(InterpProgram* p, Func* f, ObjSymId sym, Slice name, 300 const ObjBuilder* obj) { 301 Compiler* c = p->c; 302 Arena* a = f->arena ? f->arena : c->tu; 303 Lower lw; 304 InterpFunc* fn; 305 u32 i, b, pc; 306 u32 ncode = 0, nopnd_total = 0; 307 u32 max_slot_id = 0; 308 u32 off; 309 u32 nswitch = 0, swi = 0; 310 311 memset(&lw, 0, sizeof lw); 312 lw.p = p; 313 lw.c = c; 314 lw.f = f; 315 lw.obj = obj; 316 317 fn = arena_znew(a, InterpFunc); 318 fn->prog = p; 319 fn->f = f; 320 fn->sym = sym; 321 fn->name = name; 322 fn->npregs = f->npregs ? f->npregs : 1u; 323 fn->nblocks = f->nblocks; 324 fn->ok = 1; 325 lw.fn = fn; 326 327 /* block_pc, default unreachable. */ 328 fn->block_pc = arena_array(a, u32, f->nblocks ? f->nblocks : 1u); 329 for (b = 0; b < f->nblocks; ++b) fn->block_pc[b] = INTERP_PC_NONE; 330 331 /* Pass A: place blocks in emit_order, count records + operands + switches. */ 332 for (i = 0; i < f->emit_order_n; ++i) { 333 b = f->emit_order[i]; 334 if (b >= f->nblocks) continue; 335 if (fn->block_pc[b] != INTERP_PC_NONE) continue; /* placed already */ 336 fn->block_pc[b] = ncode; 337 { 338 Block* bl = &f->blocks[b]; 339 u32 k; 340 for (k = 0; k < bl->ninsts; ++k) { 341 const Inst* in = &bl->insts[k]; 342 if (!inst_emits(in->op)) continue; 343 ncode++; 344 nopnd_total += in->nopnds; 345 if ((IROp)in->op == IR_SWITCH) nswitch++; 346 } 347 } 348 } 349 (void)nopnd_total; 350 351 fn->code = arena_zarray(a, InterpInsn, ncode ? ncode : 1u); 352 fn->ncode = ncode; 353 fn->switches = nswitch ? arena_zarray(a, InterpSwitch, nswitch) : NULL; 354 fn->nswitches = nswitch; 355 /* Over-allocate the globals table: at most one per operand + one per call. */ 356 { 357 u32 cap = 0; 358 for (i = 0; i < f->nblocks; ++i) { 359 Block* bl = &f->blocks[i]; 360 u32 k; 361 for (k = 0; k < bl->ninsts; ++k) cap += bl->insts[k].nopnds + 1u; 362 } 363 fn->globals = arena_zarray(a, InterpGlobal, cap ? cap : 1u); 364 fn->nglobals = 0; 365 } 366 367 /* Frame slots: bump-allocate non-alloca slots honoring align. */ 368 for (i = 0; i < f->nframe_slots; ++i) 369 if (f->frame_slots[i].id > max_slot_id) max_slot_id = f->frame_slots[i].id; 370 fn->nslots = max_slot_id + 1u; 371 fn->slot_off = arena_zarray(a, u32, fn->nslots); 372 fn->frame_align = 16u; 373 off = 0; 374 for (i = 0; i < f->nframe_slots; ++i) { 375 IRFrameSlot* fs = &f->frame_slots[i]; 376 u32 align = fs->align ? fs->align : 1u; 377 u32 size = fs->size; 378 if (fs->kind == FS_ALLOCA) continue; /* dynamic, allocated at OP_ALLOCA */ 379 if (align > fn->frame_align) fn->frame_align = align; 380 off = (off + align - 1u) & ~(align - 1u); 381 fn->slot_off[fs->id] = off; 382 off += size ? size : 1u; 383 } 384 fn->frame_bytes = (off + 15u) & ~15u; 385 386 /* Materialize function-scope static data into interp-private buffers and bind 387 * their symbols (block pcs are now known for any label-address tables). */ 388 lower_static_blobs(&lw); 389 390 /* Pass B: emit records, resolving branch targets to pcs. */ 391 pc = 0; 392 for (i = 0; i < f->emit_order_n; ++i) { 393 Block* bl; 394 u32 k; 395 b = f->emit_order[i]; 396 if (b >= f->nblocks) continue; 397 if (fn->block_pc[b] != pc) { 398 /* A block placed at a different pc (duplicate in emit_order) — skip its 399 * second appearance to keep pc aligned with Pass A. */ 400 if (fn->block_pc[b] != INTERP_PC_NONE && fn->block_pc[b] < pc) continue; 401 } 402 bl = &f->blocks[b]; 403 for (k = 0; k < bl->ninsts; ++k) { 404 const Inst* in = &bl->insts[k]; 405 InterpInsn* rec; 406 if (!inst_emits(in->op)) continue; 407 note_inst_globals(&lw, in); 408 rec = &fn->code[pc++]; 409 rec->inst = in; 410 rec->op = (u32)map_op(&lw, in); 411 rec->dst = (in->nopnds > 0 && in->opnds[0].kind == OPK_REG) 412 ? in->opnds[0].v.reg 413 : 0u; 414 switch ((InterpOp)rec->op) { 415 case IOP_LOAD_IMM: 416 rec->imm = in->extra.imm; 417 type_wf(c, in->opnds[0].type, &rec->w0, &rec->fp0); 418 break; 419 case IOP_COPY: 420 case IOP_LOAD: 421 case IOP_ADDR_OF: 422 type_wf(c, in->opnds[0].type, &rec->w0, &rec->fp0); 423 if ((IROp)in->op == IR_LOAD) rec->w0 = (u16)in->extra.mem.size; 424 break; 425 case IOP_STORE: 426 rec->w0 = (u16)in->extra.mem.size; 427 type_wf(c, in->opnds[1].type, &rec->w1, &rec->fp1); 428 rec->fp0 = rec->fp1; 429 break; 430 case IOP_BINOP: 431 case IOP_UNOP: 432 rec->sub = (u32)in->extra.imm; 433 type_wf(c, in->opnds[0].type, &rec->w0, &rec->fp0); 434 break; 435 case IOP_CMP: 436 rec->sub = (u32)in->extra.imm; 437 type_wf(c, in->opnds[1].type, &rec->w0, &rec->fp0); 438 break; 439 case IOP_CONVERT: 440 rec->sub = (u32)in->extra.imm; 441 type_wf(c, in->opnds[0].type, &rec->w0, &rec->fp0); 442 type_wf(c, in->opnds[1].type, &rec->w1, &rec->fp1); 443 break; 444 case IOP_ALLOCA: 445 rec->imm = in->extra.imm; /* alignment */ 446 break; 447 case IOP_CALL: { 448 /* A realized tail call is the block terminator: the CGCallDesc has 449 * CG_CALL_TAIL, or equivalently the call is the last emitting inst 450 * of a successor-less block (it returns the callee's result 451 * directly). Detect both. */ 452 IRCallAux* aux = (IRCallAux*)in->extra.aux; 453 u8 is_tail = aux && (aux->desc.flags & CG_CALL_TAIL); 454 if (!is_tail && bl->nsucc == 0) { 455 u32 j; 456 u8 last = 1; 457 for (j = k + 1u; j < bl->ninsts; ++j) 458 if (inst_emits(bl->insts[j].op)) { 459 last = 0; 460 break; 461 } 462 is_tail = last; 463 } 464 rec->tail = is_tail; 465 break; 466 } 467 case IOP_BR: 468 rec->t0 = block_pc_of(fn, bl->nsucc > 0 ? bl->succ[0] : 0xffffffffu); 469 break; 470 case IOP_CONDBR: 471 case IOP_CMP_BRANCH: 472 if ((IROp)in->op == IR_CMP_BRANCH) { 473 rec->sub = (u32)in->extra.imm; 474 type_wf(c, in->opnds[0].type, &rec->w0, &rec->fp0); 475 } 476 rec->t0 = block_pc_of(fn, bl->nsucc > 0 ? bl->succ[0] : 0xffffffffu); 477 rec->t1 = block_pc_of(fn, bl->nsucc > 1 ? bl->succ[1] : 0xffffffffu); 478 break; 479 case IOP_SWITCH: { 480 IRSwitchAux* aux = (IRSwitchAux*)in->extra.aux; 481 InterpSwitch* sw = &fn->switches[swi]; 482 u32 ci; 483 rec->t0 = swi; 484 swi++; 485 sw->aux = aux; 486 sw->sel_type = aux ? aux->selector_type : 0; 487 sw->ncases = aux ? aux->ncases : 0; 488 sw->case_pc = sw->ncases ? arena_array(a, u32, sw->ncases) : NULL; 489 for (ci = 0; ci < sw->ncases; ++ci) 490 sw->case_pc[ci] = block_pc_of(fn, aux->cases[ci].block); 491 sw->default_pc = 492 aux ? block_pc_of(fn, aux->default_block) : INTERP_PC_NONE; 493 break; 494 } 495 case IOP_LOAD_LABEL_ADDR: 496 rec->t0 = block_pc_of(fn, (u32)in->extra.imm); 497 break; 498 case IOP_RET: 499 case IOP_RET_VOID: 500 break; 501 case IOP_ATOMIC_RMW: { 502 IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; 503 rec->sub = aux ? aux->op : 0u; 504 rec->w0 = aux ? (u16)aux->mem.size : 8u; 505 break; 506 } 507 case IOP_ATOMIC_LOAD: 508 case IOP_ATOMIC_STORE: { 509 IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux; 510 rec->w0 = aux ? (u16)aux->mem.size : 8u; 511 break; 512 } 513 case IOP_ATOMIC_CAS: { 514 IRCasAux* aux = (IRCasAux*)in->extra.aux; 515 rec->w0 = aux ? (u16)aux->mem.size : 8u; 516 break; 517 } 518 case IOP_TRAP: 519 if (fn->ok) { 520 fn->ok = 0; 521 fn->reject_reason = lw.reject ? lw.reject : "unsupported op"; 522 } 523 break; 524 default: 525 break; 526 } 527 } 528 } 529 530 return fn; 531 }