engine.c (62812B)
1 /* The interpreter engine: an explicit-stack dispatch loop over the lowered 2 * bytecode. IR-level calls push/pop InterpFrames on the InterpStack instead of 3 * recursing on the host C stack, so execution can be suspended and resumed. 4 * 5 * Dispatch is a switch on the record opcode. (Direct threading via a computed 6 * goto is reserved for a later pass; the InterpInsn keeps a `handler` slot for 7 * it. A switch keeps the engine portable under -Wpedantic and self-host.) */ 8 9 #include <kit/config.h> /* KIT_INTERP_THREADED: dispatch default */ 10 #include <string.h> 11 12 #include "abi/abi.h" 13 #include "cg/cgtarget.h" 14 #include "cg/type.h" 15 #include "core/arena.h" 16 #include "core/core.h" 17 #include "core/diag.h" 18 #include "interp/interp.h" 19 20 #define PERM_R KIT_INTERP_PERM_READ 21 #define PERM_W KIT_INTERP_PERM_WRITE 22 23 static SrcLoc iloc(void) { 24 SrcLoc l; 25 l.file_id = 0; 26 l.line = 0; 27 l.col = 0; 28 return l; 29 } 30 31 /* ---- width / fp helpers ---- */ 32 33 static u64 mask_w(u64 v, u32 w) { 34 if (w >= 8) return v; 35 if (w == 0) return v; 36 return v & ((1ull << (w * 8u)) - 1ull); 37 } 38 39 static i64 sext_w(u64 v, u32 w) { 40 u32 bits; 41 u64 m; 42 if (w >= 8 || w == 0) return (i64)v; 43 bits = w * 8u; 44 v &= ((1ull << bits) - 1ull); 45 m = 1ull << (bits - 1u); 46 return (i64)((v ^ m) - m); 47 } 48 49 /* Low `width`-bit mask (width in *bits*, 0..64). */ 50 static u64 bits_mask(u32 width) { 51 return width >= 64u ? ~0ull : ((1ull << width) - 1ull); 52 } 53 54 /* Interpreter-private va_list layout: a single cursor walks a contiguous buffer 55 * of the anonymous arguments, each at an 8-byte (16 for >8B types) aligned 56 * slot. The interpreter owns both the call-site buffer build and 57 * va_start/va_arg, so the layout is self-consistent regardless of the target 58 * ABI's real va_list. */ 59 static u32 va_align_of(u32 size) { return size > 8u ? 16u : 8u; } 60 static u32 va_stride_of(u32 size) { 61 return size > 8u ? ((size + 15u) & ~15u) : 8u; 62 } 63 64 static double rd_f(u64 bits, u32 w) { 65 if (w == 4) { 66 float f; 67 u32 b = (u32)bits; 68 memcpy(&f, &b, 4); 69 return (double)f; 70 } 71 { 72 double d; 73 memcpy(&d, &bits, 8); 74 return d; 75 } 76 } 77 78 static u64 wr_f(double d, u32 w) { 79 if (w == 4) { 80 float f = (float)d; 81 u32 b; 82 memcpy(&b, &f, 4); 83 return b; 84 } 85 { 86 u64 b; 87 memcpy(&b, &d, 8); 88 return b; 89 } 90 } 91 92 /* ---- memory access (always vtable-translated) ---- */ 93 /* A translation miss latches st->mem_fault; the run loop converts the latch to 94 * a delivered fault at the next straight-line/branch re-check point. */ 95 96 static u64 mem_read(InterpStack* st, u64 addr, u32 size) { 97 u8* host = interp_translate(st->prog, addr, size, PERM_R); 98 u64 v = 0; 99 if (!host) { 100 st->mem_fault = 1; 101 return 0; 102 } 103 memcpy(&v, host, size ? size : 8u); 104 return v; 105 } 106 107 static void mem_write(InterpStack* st, u64 addr, u32 size, u64 v) { 108 u8* host = interp_translate(st->prog, addr, size, PERM_W); 109 if (!host) { 110 st->mem_fault = 1; 111 return; 112 } 113 memcpy(host, &v, size ? size : 8u); 114 } 115 116 static void mem_copy(InterpStack* st, u64 dst, u64 src, u32 n) { 117 u8* d = interp_translate(st->prog, dst, n, PERM_W); 118 u8* s = interp_translate(st->prog, src, n, PERM_R); 119 if (!d || !s) { 120 st->mem_fault = 1; 121 return; 122 } 123 memmove(d, s, n); 124 } 125 126 /* ---- operand access ---- */ 127 128 static u64 frame_base(InterpStack* st, u32 mem_off) { 129 return (u64)(uintptr_t)(st->mem_arena + mem_off); 130 } 131 132 /* addr_from_operand semantics: the abstract address an lvalue operand denotes. 133 */ 134 static u64 op_addr(InterpStack* st, InterpFunc* fn, u64* regs, u32 mem_off, 135 const Operand* op) { 136 switch ((OptOperandKind)op->kind) { 137 case OPT_OPK_LOCAL: 138 return frame_base(st, mem_off) + fn->slot_off[op->v.frame_slot]; 139 case OPT_OPK_GLOBAL: 140 return (u64)(uintptr_t)interp_global_base(fn, op->v.global.sym) + 141 (u64)op->v.global.addend; 142 case OPT_OPK_INDIRECT: { 143 u64 a = regs[op->v.ind.base]; 144 if (op->v.ind.index != (Reg)REG_NONE) 145 a += regs[op->v.ind.index] << op->v.ind.log2_scale; 146 a += (u64)(i64)op->v.ind.ofs; 147 return a; 148 } 149 case OPT_OPK_REG: 150 return regs[op->v.reg]; 151 default: 152 return 0; 153 } 154 } 155 156 /* loc_from_operand-as-value semantics: the scalar value of a value operand. */ 157 static u64 op_value(InterpStack* st, InterpFunc* fn, u64* regs, u32 mem_off, 158 const Operand* op) { 159 switch ((OptOperandKind)op->kind) { 160 case OPT_OPK_REG: 161 return regs[op->v.reg]; 162 case OPT_OPK_IMM: 163 return (u64)op->v.imm; 164 case OPT_OPK_LOCAL: 165 case OPT_OPK_GLOBAL: 166 case OPT_OPK_INDIRECT: { 167 u64 a = op_addr(st, fn, regs, mem_off, op); 168 u32 sz = abi_cg_sizeof(fn->prog->c->abi, op->type); 169 return mem_read(st, a, sz ? sz : 8u); 170 } 171 default: 172 return 0; 173 } 174 } 175 176 /* write_loc semantics: store a scalar result into a destination operand, which 177 * may be a register OR a memory location (OPK_LOCAL/GLOBAL/INDIRECT). The 178 * optimizer leaves un-promoted (e.g. address-taken) destinations as memory. */ 179 static void write_dst(InterpStack* st, InterpFunc* fn, u64* regs, u32 mem_off, 180 const Operand* op, u64 value) { 181 if (op->kind == OPK_REG) { 182 regs[op->v.reg] = value; 183 return; 184 } 185 { 186 u64 a = op_addr(st, fn, regs, mem_off, op); 187 u32 sz = abi_cg_sizeof(fn->prog->c->abi, op->type); 188 mem_write(st, a, sz ? sz : 8u, value); 189 } 190 } 191 192 /* pointer_addr_from_operand semantics: the address an aggregate pointer 193 * operand denotes. An OPK_LOCAL of pointer type *holds* the pointer (load it); 194 * otherwise the local *is* the aggregate storage (its frame home is the 195 * address). Used only by AGG_COPY/AGG_SET. */ 196 static u64 interp_ptr_addr(InterpStack* st, InterpFunc* fn, u64* regs, 197 u32 mem_off, const Operand* op) { 198 if (op->kind == OPK_LOCAL && !cg_type_is_ptr(fn->prog->c, op->type)) 199 return frame_base(st, mem_off) + fn->slot_off[op->v.frame_slot]; 200 if (op->kind == OPK_LOCAL) { 201 /* pointer-typed local: the slot holds the pointer value */ 202 u64 slot = frame_base(st, mem_off) + fn->slot_off[op->v.frame_slot]; 203 return mem_read(st, slot, 8u); 204 } 205 return op_addr(st, fn, regs, mem_off, op); 206 } 207 208 /* Common compiler intrinsics. Returns 0 (and sets status) if unsupported. */ 209 static int interp_intrinsic(InterpStack* st, InterpFunc* fn, u64* regs, 210 u32 mem_off, InterpInsn* in); 211 212 /* The register and addressable-memory arenas are FIXED reservations that never 213 * move: an OP_ADDR_OF materializes a local's address as an absolute host 214 * pointer into mem_arena, and that pointer can escape into a register or out to 215 * another local, so reallocating (moving) the arena would dangle it. Frames 216 * follow strict stack discipline (CALL bumps the top, RET rewinds it), so a 217 * generous fixed reservation suffices; overflow traps cleanly as a stack 218 * overflow rather than corrupting memory. */ 219 #define INTERP_REGS_RESERVE (8u * 1024u * 1024u) 220 #define INTERP_MEM_RESERVE (8u * 1024u * 1024u) 221 222 static u32 bump(u8* arena, u32* top, u32 cap, u32 size, u32 align) { 223 u32 off = (*top + align - 1u) & ~(align - 1u); 224 (void)arena; 225 if (off + size > cap || off + size < off) return 0xffffffffu; /* overflow */ 226 *top = off + size; 227 return off; 228 } 229 230 /* Push a fresh frame for fn; returns its index, or 0xffffffff on overflow. 231 * The arenas never move, so existing frame pointers stay valid. */ 232 static u32 frame_push(InterpStack* st, InterpFunc* fn) { 233 InterpFrame* fr; 234 u32 regs_off, mem_off; 235 if (st->nframes == st->frames_cap) { 236 Heap* h = st->prog->c->ctx->heap; 237 u32 ncap = st->frames_cap ? st->frames_cap * 2u : 32u; 238 InterpFrame* nf = (InterpFrame*)h->realloc( 239 h, st->frames, sizeof(InterpFrame) * st->frames_cap, 240 sizeof(InterpFrame) * ncap, _Alignof(InterpFrame)); 241 if (!nf) return 0xffffffffu; 242 st->frames = nf; 243 st->frames_cap = ncap; 244 } 245 regs_off = bump(st->regs_arena, &st->regs_top, st->regs_cap, 246 (fn->npregs ? fn->npregs : 1u) * 8u, 8u); 247 mem_off = bump(st->mem_arena, &st->mem_top, st->mem_cap, 248 fn->frame_bytes ? fn->frame_bytes : 16u, fn->frame_align); 249 if (regs_off == 0xffffffffu || mem_off == 0xffffffffu) return 0xffffffffu; 250 fr = &st->frames[st->nframes]; 251 memset(fr, 0, sizeof *fr); 252 fr->fn = fn; 253 fr->regs_off = regs_off; 254 fr->mem_off = mem_off; 255 fr->frame_bytes = fn->frame_bytes; 256 fr->alloca_top = fn->frame_bytes; 257 fr->ip = &fn->code[fn->block_pc[fn->f->entry] == INTERP_PC_NONE 258 ? 0u 259 : fn->block_pc[fn->f->entry]]; 260 /* zero the register file */ 261 memset(st->regs_arena + regs_off, 0, (fn->npregs ? fn->npregs : 1u) * 8u); 262 st->nframes++; 263 return st->nframes - 1u; 264 } 265 266 static void unsupported(InterpStack* st, const char* what) { 267 st->status = KIT_INTERP_ERROR; 268 st->trap_reason = what; 269 diag_emit(st->prog->c->ctx->diag, KIT_DIAG_ERROR, iloc(), 270 "interp: %s not supported", what ? what : "operation"); 271 } 272 273 static void fault(InterpStack* st, const char* what) { 274 st->status = KIT_INTERP_TRAP; 275 st->trap_reason = what; 276 diag_emit(st->prog->c->ctx->diag, KIT_DIAG_ERROR, iloc(), "interp: trap: %s", 277 what ? what : "fault"); 278 } 279 280 /* ---- integer/fp arithmetic ---- */ 281 282 /* Shift-count mask for the spec's portable "reduce modulo width" rule 283 * (doc/IR.md). The engine stores every scalar in a u64, so the meaningful 284 * range is the storage width (<=64 bits); 16-byte scalars are lowered to 285 * memory / 64-bit-half sequences before reaching here, never as a w==16 BINOP. 286 * Clamping to the storage width keeps the host C shift in range regardless and 287 * is identical to (w*8-1) for every width the engine actually carries (<=8). */ 288 static u32 shift_mask(u32 w) { return (w >= 8u ? 64u : w * 8u) - 1u; } 289 290 static u64 do_binop(InterpStack* st, u32 binop, u64 a, u64 b, u32 w, u8 fp) { 291 if (fp) { 292 double x = rd_f(a, w), y = rd_f(b, w), r = 0; 293 switch ((BinOp)binop) { 294 case BO_FADD: 295 r = x + y; 296 break; 297 case BO_FSUB: 298 r = x - y; 299 break; 300 case BO_FMUL: 301 r = x * y; 302 break; 303 case BO_FDIV: 304 r = x / y; 305 break; 306 default: 307 unsupported(st, "fp binop"); 308 return 0; 309 } 310 return wr_f(r, w); 311 } 312 switch ((BinOp)binop) { 313 case BO_IADD: 314 return mask_w(a + b, w); 315 case BO_ISUB: 316 return mask_w(a - b, w); 317 case BO_IMUL: 318 return mask_w(a * b, w); 319 case BO_SDIV: { 320 i64 x = sext_w(a, w), y = sext_w(b, w); 321 if (y == 0) { 322 fault(st, "integer divide by zero"); 323 return 0; 324 } 325 /* INT_MIN / -1 overflows (UB / SIGFPE on x86) — wraps to INT_MIN. */ 326 if (y == -1) return mask_w(0u - (u64)x, w); 327 return mask_w((u64)(x / y), w); 328 } 329 case BO_UDIV: { 330 u64 x = mask_w(a, w), y = mask_w(b, w); 331 if (y == 0) { 332 fault(st, "integer divide by zero"); 333 return 0; 334 } 335 return mask_w(x / y, w); 336 } 337 case BO_SREM: { 338 i64 x = sext_w(a, w), y = sext_w(b, w); 339 if (y == 0) { 340 fault(st, "integer divide by zero"); 341 return 0; 342 } 343 if (y == -1) return 0; /* INT_MIN % -1 == 0 (avoids the overflow UB) */ 344 return mask_w((u64)(x % y), w); 345 } 346 case BO_UREM: { 347 u64 x = mask_w(a, w), y = mask_w(b, w); 348 if (y == 0) { 349 fault(st, "integer divide by zero"); 350 return 0; 351 } 352 return mask_w(x % y, w); 353 } 354 case BO_AND: 355 return mask_w(a & b, w); 356 case BO_OR: 357 return mask_w(a | b, w); 358 case BO_XOR: 359 return mask_w(a ^ b, w); 360 case BO_SHL: 361 return mask_w(a << (b & shift_mask(w)), w); 362 case BO_SHR_S: { 363 i64 x = sext_w(a, w); 364 return mask_w((u64)(x >> (b & shift_mask(w))), w); 365 } 366 case BO_SHR_U: 367 return mask_w(mask_w(a, w) >> (b & shift_mask(w)), w); 368 default: 369 unsupported(st, "int binop"); 370 return 0; 371 } 372 } 373 374 static int do_cmp(InterpStack* st, u32 cmp, u64 a, u64 b, u32 w) { 375 /* FP-ness is self-describing from the opcode (the FP block starts at 376 * CMP_OEQ_F); no operand-class sniffing needed. */ 377 if (cmp >= CMP_OEQ_F) { 378 double x = rd_f(a, w), y = rd_f(b, w); 379 int uno = (x != x) || (y != y); /* unordered: either operand is NaN */ 380 switch ((CmpOp)cmp) { 381 case CMP_OEQ_F: 382 return x == y; /* ordered: false on NaN */ 383 case CMP_ONE_F: 384 return !uno && (x != y); 385 case CMP_OLT_F: 386 return x < y; 387 case CMP_OLE_F: 388 return x <= y; 389 case CMP_OGT_F: 390 return x > y; 391 case CMP_OGE_F: 392 return x >= y; 393 case CMP_UEQ_F: 394 return uno || (x == y); 395 case CMP_UNE_F: 396 return x != y; /* unordered: true on NaN */ 397 case CMP_ULT_F: 398 return uno || (x < y); 399 case CMP_ULE_F: 400 return uno || (x <= y); 401 case CMP_UGT_F: 402 return uno || (x > y); 403 case CMP_UGE_F: 404 return uno || (x >= y); 405 default: 406 break; 407 } 408 } 409 switch ((CmpOp)cmp) { 410 case CMP_EQ: 411 return mask_w(a, w) == mask_w(b, w); 412 case CMP_NE: 413 return mask_w(a, w) != mask_w(b, w); 414 case CMP_LT_S: 415 return sext_w(a, w) < sext_w(b, w); 416 case CMP_LE_S: 417 return sext_w(a, w) <= sext_w(b, w); 418 case CMP_GT_S: 419 return sext_w(a, w) > sext_w(b, w); 420 case CMP_GE_S: 421 return sext_w(a, w) >= sext_w(b, w); 422 case CMP_LT_U: 423 return mask_w(a, w) < mask_w(b, w); 424 case CMP_LE_U: 425 return mask_w(a, w) <= mask_w(b, w); 426 case CMP_GT_U: 427 return mask_w(a, w) > mask_w(b, w); 428 case CMP_GE_U: 429 return mask_w(a, w) >= mask_w(b, w); 430 default: 431 unsupported(st, "cmp"); 432 return 0; 433 } 434 } 435 436 /* Saturating float-to-integer (NaN -> 0, out-of-range -> clamped to the 437 * destination width). Matches Wasm trunc_sat semantics and, crucially, avoids 438 * the UB of casting a NaN/overflowing double to an integer (which traps under 439 * UBSan). For in-range values this is identical to a plain truncating cast, so 440 * well-defined C float->int conversions are unaffected. Avoids <math.h> 441 * (libkit is freestanding) by building the 2^k bound with a loop. */ 442 static u64 ftoi_sat(double d, u32 wbytes, int is_signed) { 443 u32 bits, i; 444 double bound; 445 if (d != d) return 0; /* NaN */ 446 if (wbytes == 0 || wbytes > 8) wbytes = 8; 447 bits = wbytes * 8u; 448 if (is_signed) { 449 bound = 1.0; 450 for (i = 0; i + 1u < bits; ++i) bound *= 2.0; /* 2^(bits-1) */ 451 if (d >= bound) 452 return mask_w( 453 bits >= 64 ? 0x7fffffffffffffffull : (((u64)1 << (bits - 1u)) - 1u), 454 wbytes); 455 if (d < -bound) 456 return mask_w( 457 bits >= 64 ? 0x8000000000000000ull : ((u64)1 << (bits - 1u)), wbytes); 458 return mask_w((u64)(i64)d, wbytes); 459 } 460 bound = 1.0; 461 for (i = 0; i < bits; ++i) bound *= 2.0; /* 2^bits */ 462 if (d < 0.0) return 0; 463 if (d >= bound) 464 return mask_w(bits >= 64 ? ~0ull : (((u64)1 << bits) - 1u), wbytes); 465 return mask_w((u64)d, wbytes); 466 } 467 468 static u64 do_convert(InterpStack* st, InterpInsn* in, u64 v) { 469 u32 wd = in->w0, ws = in->w1; 470 switch ((ConvKind)in->sub) { 471 case CV_SEXT: 472 return mask_w((u64)sext_w(v, ws), wd); 473 case CV_ZEXT: 474 return mask_w(mask_w(v, ws), wd); 475 case CV_TRUNC: 476 return mask_w(v, wd); 477 case CV_ITOF_S: 478 return wr_f((double)sext_w(v, ws), wd); 479 case CV_ITOF_U: 480 return wr_f((double)mask_w(v, ws), wd); 481 case CV_FTOI_S: 482 return ftoi_sat(rd_f(v, ws), wd, 1); 483 case CV_FTOI_U: 484 return ftoi_sat(rd_f(v, ws), wd, 0); 485 case CV_FEXT: 486 return wr_f(rd_f(v, ws), wd); 487 case CV_FTRUNC: 488 return wr_f(rd_f(v, ws), wd); 489 case CV_BITCAST: 490 return mask_w(v, wd); 491 default: 492 unsupported(st, "convert"); 493 return 0; 494 } 495 } 496 497 static u64 do_rmw(u32 op, u64 old, u64 val, u32 w) { 498 switch ((KitCgAtomicOp)op) { 499 case KIT_CG_ATOMIC_XCHG: 500 return mask_w(val, w); 501 case KIT_CG_ATOMIC_ADD: 502 return mask_w(old + val, w); 503 case KIT_CG_ATOMIC_SUB: 504 return mask_w(old - val, w); 505 case KIT_CG_ATOMIC_AND: 506 return mask_w(old & val, w); 507 case KIT_CG_ATOMIC_OR: 508 return mask_w(old | val, w); 509 case KIT_CG_ATOMIC_XOR: 510 return mask_w(old ^ val, w); 511 case KIT_CG_ATOMIC_NAND: 512 return mask_w(~(old & val), w); 513 default: 514 return old; 515 } 516 } 517 518 static u64 do_unop(InterpStack* st, u32 unop, u64 a, u32 w, u8 fp) { 519 (void)fp; 520 switch ((UnOp)unop) { 521 case UO_NEG: 522 return mask_w(0u - a, w); /* well-defined two's-complement */ 523 case UO_FNEG: 524 return wr_f(-rd_f(a, w), w); 525 case UO_NOT: 526 return mask_w(a, w) == 0 ? 1u : 0u; 527 case UO_BNOT: 528 return mask_w(~a, w); 529 default: 530 unsupported(st, "unop"); 531 return 0; 532 } 533 } 534 535 /* Bind call arguments into a freshly-pushed callee frame (value semantics). */ 536 static void bind_args(InterpStack* st, u32 caller_idx, u32 callee_idx, 537 const OptCGCallDesc* desc) { 538 InterpProgram* p = st->prog; 539 InterpFrame* caller = &st->frames[caller_idx]; 540 InterpFrame* callee = &st->frames[callee_idx]; 541 InterpFunc* cfn = caller->fn; 542 InterpFunc* efn = callee->fn; 543 u64* cregs = (u64*)(st->regs_arena + caller->regs_off); 544 u64* eregs = (u64*)(st->regs_arena + callee->regs_off); 545 u32 nbind = desc->nargs < efn->f->nparams ? desc->nargs : efn->f->nparams; 546 u32 i; 547 for (i = 0; i < nbind; ++i) { 548 OptCGABIValue* arg = &desc->args[i]; 549 IRParam* pr = &efn->f->params[i]; 550 u32 size = abi_cg_sizeof(p->c->abi, arg->type); 551 if (pr->storage.kind == CG_LOCAL_STORAGE_REG) { 552 eregs[pr->storage.v.reg] = 553 op_value(st, cfn, cregs, caller->mem_off, &arg->storage); 554 } else { 555 u64 dst = frame_base(st, callee->mem_off) + 556 efn->slot_off[pr->storage.v.frame_slot]; 557 if (cg_type_is_aggregate(p->c, arg->type) || size > 8u) { 558 u64 src = op_addr(st, cfn, cregs, caller->mem_off, &arg->storage); 559 mem_copy(st, dst, src, size); 560 } else { 561 mem_write(st, dst, size ? size : 8u, 562 op_value(st, cfn, cregs, caller->mem_off, &arg->storage)); 563 } 564 } 565 } 566 } 567 568 /* Lay out the anonymous (variadic) arguments of an internal call into a 569 * contiguous buffer in the callee frame's addressable region, above its static 570 * frame and any future alloca. Records the buffer offset on the callee frame so 571 * IOP_VA_START can hand va_arg a cursor over it. Returns 0 on stack overflow. 572 */ 573 static int build_varargs(InterpStack* st, u32 caller_idx, u32 callee_idx, 574 const OptCGCallDesc* desc) { 575 InterpProgram* p = st->prog; 576 InterpFrame* caller = &st->frames[caller_idx]; 577 InterpFrame* callee = &st->frames[callee_idx]; 578 InterpFunc* cfn = caller->fn; 579 InterpFunc* efn = callee->fn; 580 u64* cregs = (u64*)(st->regs_arena + caller->regs_off); 581 u32 nfixed = efn->f->nparams; 582 u32 cur = (callee->alloca_top + 15u) & ~15u; /* 16-align buffer start */ 583 u32 buf_start = cur; 584 u32 i; 585 if (desc->nargs <= nfixed) return 1; /* no anonymous args */ 586 for (i = nfixed; i < desc->nargs; ++i) { 587 OptCGABIValue* arg = &desc->args[i]; 588 u32 size = abi_cg_sizeof(p->c->abi, arg->type); 589 u32 al = va_align_of(size); 590 u64 dst; 591 cur = (cur + al - 1u) & ~(al - 1u); 592 if ((u64)callee->mem_off + cur + va_stride_of(size) > st->mem_cap) return 0; 593 dst = frame_base(st, callee->mem_off) + cur; 594 if (cg_type_is_aggregate(p->c, arg->type) || size > 8u) { 595 u64 src = op_addr(st, cfn, cregs, caller->mem_off, &arg->storage); 596 mem_copy(st, dst, src, size); 597 } else { 598 mem_write(st, dst, 8u, 599 op_value(st, cfn, cregs, caller->mem_off, &arg->storage)); 600 } 601 cur += va_stride_of(size); 602 } 603 callee->has_varargs = 1; 604 callee->vararg_off = callee->mem_off + buf_start; 605 callee->alloca_top = cur; 606 if (callee->mem_off + cur > st->mem_top) st->mem_top = callee->mem_off + cur; 607 return 1; 608 } 609 610 /* ---- external (host ABI) call marshalling ---- */ 611 612 /* Record an integer-register argument. Returns non-zero (with *why) on 613 * overflow of the supported register-thunk family. */ 614 static int ffi_push_int(InterpFfiArgs* fa, u64 v, const char** why) { 615 if (fa->nint >= 8u) { 616 *why = "external call: too many int args"; 617 return 1; 618 } 619 fa->iargs[fa->nint++] = v; 620 return 0; 621 } 622 623 /* Record an fp-register argument, tracking single vs double precision (the two 624 * occupy the fp register differently). Returns non-zero (with *why) on overflow 625 * or a float/double mix within one signature. */ 626 static int ffi_push_fp(InterpFfiArgs* fa, u64 bits, u32 size, 627 const char** why) { 628 if (fa->nfp >= 8u) { 629 *why = "external call: too many fp args"; 630 return 1; 631 } 632 if (size == 4u) { 633 if (fa->nfp > 0u && !fa->args_fp_is_float) { 634 *why = "external call: mixed float/double args"; 635 return 1; 636 } 637 fa->args_fp_is_float = 1u; 638 fa->fargs_f[fa->nfp++] = (float)rd_f(bits, 4u); 639 } else { 640 if (fa->nfp > 0u && fa->args_fp_is_float) { 641 *why = "external call: mixed float/double args"; 642 return 1; 643 } 644 fa->fargs[fa->nfp++] = rd_f(bits, size ? size : 8u); 645 } 646 return 0; 647 } 648 649 static u64 ext_call(InterpStack* st, InterpFrame* fr, u64* regs, void* host_fp, 650 const OptCGCallDesc* desc) { 651 InterpProgram* p = st->prog; 652 const ABIFuncInfo* fi = desc->abi; 653 InterpFfiArgs fa; 654 const char* reason = NULL; 655 u32 i; 656 657 if (!fi) { 658 unsupported(st, "external call without ABI info"); 659 return 0; 660 } 661 if (fi->vararg_on_stack && fi->variadic) { 662 unsupported(st, "variadic external call (stack-routed)"); 663 return 0; 664 } 665 memset(&fa, 0, sizeof fa); 666 fa.fi = fi; 667 668 /* hidden struct return: pass the caller's aggregate-return slot directly. 669 * When the call is a tail call its result has no local home (ret.storage is 670 * void) — forward this frame's own sret destination instead. */ 671 if (fi->has_sret) { 672 u32 rsz = abi_cg_sizeof(p->c->abi, desc->ret.type); 673 if (desc->ret.storage.kind == OPK_LOCAL || 674 desc->ret.storage.kind == OPK_GLOBAL || 675 desc->ret.storage.kind == OPK_INDIRECT) { 676 u64 dst = op_addr(st, fr->fn, regs, fr->mem_off, &desc->ret.storage); 677 fa.sret = interp_translate(p, dst, rsz, PERM_W); 678 } else { 679 fa.sret = fr->sret_ptr; /* tail call: deliver to our caller's sret slot */ 680 } 681 if (!fa.sret) { 682 unsupported(st, "sret destination"); 683 return 0; 684 } 685 fa.iargs[fa.nint++] = (u64)(uintptr_t)fa.sret; 686 fa.ret_is_void = 1; 687 } 688 689 for (i = 0; i < desc->nargs; ++i) { 690 OptCGABIValue* arg = &desc->args[i]; 691 const ABIArgInfo* ai = (i < fi->nparams) ? &fi->params[i] : NULL; 692 if (ai && ai->kind == ABI_ARG_IGNORE) continue; 693 if (ai && ai->kind == ABI_ARG_INDIRECT) { 694 /* byval: pass a pointer to the aggregate (caller's copy). */ 695 u64 a = op_addr(st, fr->fn, regs, fr->mem_off, &arg->storage); 696 u8* h = interp_translate(p, a, 1, PERM_R); 697 if (fa.nint >= 8) { 698 unsupported(st, "external call: too many int args"); 699 return 0; 700 } 701 fa.iargs[fa.nint++] = (u64)(uintptr_t)h; 702 continue; 703 } 704 if (ai && ai->kind == ABI_ARG_DIRECT && ai->nparts > 1) { 705 /* aggregate split across registers: read each part from memory. */ 706 u64 base = op_addr(st, fr->fn, regs, fr->mem_off, &arg->storage); 707 u32 k; 708 for (k = 0; k < ai->nparts; ++k) { 709 const ABIArgPart* pt = &ai->parts[k]; 710 u64 chunk = mem_read(st, base + pt->src_offset, pt->size); 711 int bad = (pt->cls == ABI_CLASS_FP) 712 ? ffi_push_fp(&fa, chunk, pt->size, &reason) 713 : ffi_push_int(&fa, chunk, &reason); 714 if (bad) { 715 unsupported(st, reason); 716 return 0; 717 } 718 } 719 continue; 720 } 721 /* scalar (or variadic extra arg): route by type. The named-parameter 722 * aggregate/large cases are handled by the INDIRECT / multi-part branches 723 * above; a variadic-tail arg has no ABI classification (ai==NULL), so an 724 * aggregate or >8-byte scalar here can't be marshalled (and op_value's 725 * 8-byte read would overflow) — diagnose rather than corrupt. */ 726 if (cg_type_is_aggregate(p->c, arg->type) || 727 abi_cg_sizeof(p->c->abi, arg->type) > 8u) { 728 unsupported(st, "external call: aggregate/oversized variadic argument"); 729 return 0; 730 } 731 { 732 ABITypeInfo ti = abi_cg_type_info(p->c->abi, arg->type); 733 u64 v = op_value(st, fr->fn, regs, fr->mem_off, &arg->storage); 734 int bad = (ti.scalar_kind == ABI_SC_FLOAT) 735 ? ffi_push_fp(&fa, v, ti.size ? ti.size : 8u, &reason) 736 : ffi_push_int(&fa, v, &reason); 737 if (bad) { 738 unsupported(st, reason); 739 return 0; 740 } 741 } 742 } 743 744 /* Return classification from the ABI's own return descriptor (robust even 745 * when desc->ret.type is void, e.g. a tail call whose result is not stored 746 * into any caller local). A small struct can come back in up to two 747 * registers; each part's class steers which return register the thunk reads. 748 */ 749 if (!fi->has_sret) { 750 if (fi->ret.kind == ABI_ARG_IGNORE || fi->ret.nparts == 0) { 751 fa.ret_is_void = 1; 752 fa.ret_nparts = 0; 753 } else if (fi->ret.nparts > 2) { 754 unsupported(st, "external call: 3+ register struct return"); 755 return 0; 756 } else { 757 u32 k; 758 fa.ret_nparts = (u8)fi->ret.nparts; 759 for (k = 0; k < fi->ret.nparts; ++k) { 760 fa.ret_fp[k] = (fi->ret.parts[k].cls == ABI_CLASS_FP) ? 1u : 0u; 761 fa.ret_size[k] = fi->ret.parts[k].size ? fi->ret.parts[k].size : 8u; 762 /* A 4-byte fp return part is a single in the low half of an fp reg; the 763 * two-register thunks read fp parts as doubles, so diagnose it. */ 764 if (fi->ret.nparts > 1u && fa.ret_fp[k] && fa.ret_size[k] == 4u) { 765 unsupported(st, "external call: 32-bit fp struct-return field"); 766 return 0; 767 } 768 } 769 } 770 } 771 772 { 773 u64 out[2] = {0, 0}; 774 if (interp_ffi_invoke(host_fp, &fa, out, &reason) != 0) { 775 unsupported(st, reason ? reason : "external call signature"); 776 return 0; 777 } 778 if (fa.ret_is_void || fa.ret_nparts == 0) return 0; 779 /* Deliver the result. A register destination (OPK_REG) takes the low 780 * register; a memory destination (an address-taken result local, or a small 781 * aggregate returned in registers) receives each part's bytes scattered to 782 * its src_offset. A value-less tail call has no home — the low register is 783 * shuttled out as the scalar result. */ 784 if (desc->ret.storage.kind == OPK_REG) { 785 if (fa.ret_nparts == 1) regs[desc->ret.storage.v.reg] = out[0]; 786 } else if (desc->ret.storage.kind == OPK_LOCAL || 787 desc->ret.storage.kind == OPK_GLOBAL || 788 desc->ret.storage.kind == OPK_INDIRECT) { 789 u64 dst = op_addr(st, fr->fn, regs, fr->mem_off, &desc->ret.storage); 790 u32 k; 791 for (k = 0; k < fi->ret.nparts && k < 2u; ++k) 792 mem_write(st, dst + fi->ret.parts[k].src_offset, fa.ret_size[k], out[k]); 793 } 794 return out[0]; 795 } 796 } 797 798 /* ---- engine ---- */ 799 800 /* Dispatch mechanism. With labels-as-values (GNU computed goto) the engine is 801 * direct-threaded: each InterpInsn caches the &&handler of its opcode and every 802 * handler tail-dispatches straight to the next via `goto *`, giving the branch 803 * predictor a distinct indirect branch per opcode site. This is the default 804 * (KIT_INTERP_THREADED in <kit/config.h>). GCC, clang, and kit itself 805 * (__kit__) all implement labels-as-values; any other compiler transparently 806 * falls back to a portable `switch`, sharing one set of handler bodies through 807 * OP()/NEXT()/GO(). Force the choice with -DKIT_INTERP_THREADED=0|1. */ 808 #if !defined(KIT_INTERP_THREADED) 809 /* Belt-and-braces: config.h normally defines this. Default on so a missed 810 * include degrades to threaded-where-supported, never a silent switch. */ 811 #define KIT_INTERP_THREADED 1 812 #endif 813 /* Effective dispatch: requested AND the compiler can compile labels-as-values. 814 */ 815 #if KIT_INTERP_THREADED && \ 816 (defined(__GNUC__) || defined(__clang__) || defined(__kit__)) 817 #define INTERP_DISPATCH_THREADED 1 818 #else 819 #define INTERP_DISPATCH_THREADED 0 820 #endif 821 822 /* The opcode roster: one entry per InterpOp with a handler, used to publish the 823 * threaded dispatch table from the in-function &&labels. Must stay in sync with 824 * the OP(...) handlers below (a missing/extra entry is a compile error: an 825 * undefined or unused label). */ 826 // clang-format off 827 #define INTERP_OPS(X) \ 828 X(IOP_NOP) \ 829 X(IOP_LOAD_IMM) \ 830 X(IOP_LOAD_CONST) \ 831 X(IOP_COPY) \ 832 X(IOP_COPY_AGG) \ 833 X(IOP_LOAD) \ 834 X(IOP_LOAD_AGG) \ 835 X(IOP_STORE) \ 836 X(IOP_STORE_AGG) \ 837 X(IOP_ADDR_OF) \ 838 X(IOP_TLS_ADDR) \ 839 X(IOP_BINOP) \ 840 X(IOP_UNOP) \ 841 X(IOP_CMP) \ 842 X(IOP_CONVERT) \ 843 X(IOP_CALL) \ 844 X(IOP_BR) \ 845 X(IOP_CONDBR) \ 846 X(IOP_CMP_BRANCH) \ 847 X(IOP_SWITCH) \ 848 X(IOP_INDIRECT_BR) \ 849 X(IOP_LOAD_LABEL_ADDR) \ 850 X(IOP_RET) \ 851 X(IOP_RET_VOID) \ 852 X(IOP_ALLOCA) \ 853 X(IOP_AGG_COPY) \ 854 X(IOP_AGG_SET) \ 855 X(IOP_BITFIELD_LOAD) \ 856 X(IOP_BITFIELD_STORE) \ 857 X(IOP_VA_START) \ 858 X(IOP_VA_ARG) \ 859 X(IOP_VA_END) \ 860 X(IOP_VA_COPY) \ 861 X(IOP_ATOMIC_LOAD) \ 862 X(IOP_ATOMIC_STORE) \ 863 X(IOP_ATOMIC_RMW) \ 864 X(IOP_ATOMIC_CAS) \ 865 X(IOP_FENCE) \ 866 X(IOP_INTRINSIC) \ 867 X(IOP_UNREACHABLE) \ 868 X(IOP_TRAP) 869 // clang-format on 870 871 #if INTERP_DISPATCH_THREADED 872 #define OP(name) L_##name 873 /* linear op: re-check the memory-fault latch, advance, dispatch the next insn 874 */ 875 #define NEXT() \ 876 do { \ 877 if (st->mem_fault) goto fault_mem; \ 878 ++ip; \ 879 in = ip; \ 880 I = in->inst; \ 881 goto * in->handler; \ 882 } while (0) 883 /* branch op: ip already retargeted, dispatch without advancing */ 884 #define GO() \ 885 do { \ 886 in = ip; \ 887 I = in->inst; \ 888 goto * in->handler; \ 889 } while (0) 890 #if defined(__clang__) 891 #pragma clang diagnostic push 892 #pragma clang diagnostic ignored "-Wgnu-label-as-value" 893 #pragma clang diagnostic ignored "-Wpedantic" 894 #elif defined(__GNUC__) 895 #pragma GCC diagnostic push 896 #pragma GCC diagnostic ignored "-Wpedantic" 897 #endif 898 #else 899 #define OP(name) case name 900 #define NEXT() break 901 #define GO() continue 902 #endif 903 904 KitInterpStatus interp_run_stack(InterpStack* st, int64_t* out_ret) { 905 InterpProgram* p = st->prog; 906 InterpFrame* fr; 907 InterpFunc* fn; 908 u64* regs; 909 u32 mem_off; 910 InterpInsn* ip; 911 InterpInsn* in = NULL; 912 const Inst* I = NULL; 913 914 if (st->nframes == 0) { 915 st->status = KIT_INTERP_DONE; 916 if (out_ret) *out_ret = (int64_t)st->scalar_ret; 917 return KIT_INTERP_DONE; 918 } 919 920 #if INTERP_DISPATCH_THREADED 921 /* Per-function lazy threading: copy each opcode's handler into its record on 922 * first entry to the function (RELOAD runs whenever the top frame changes). 923 */ 924 #define RELOAD() \ 925 do { \ 926 fr = &st->frames[st->nframes - 1u]; \ 927 fn = fr->fn; \ 928 regs = (u64*)(st->regs_arena + fr->regs_off); \ 929 mem_off = fr->mem_off; \ 930 ip = fr->ip; \ 931 if (!fn->threaded) { \ 932 u32 ti_; \ 933 for (ti_ = 0; ti_ < fn->ncode; ++ti_) { \ 934 u32 o_ = fn->code[ti_].op; \ 935 fn->code[ti_].handler = \ 936 g_dt[o_ < (u32)IOP__COUNT ? o_ : (u32)IOP_TRAP]; \ 937 } \ 938 fn->threaded = 1; \ 939 } \ 940 } while (0) 941 #else 942 #define RELOAD() \ 943 do { \ 944 fr = &st->frames[st->nframes - 1u]; \ 945 fn = fr->fn; \ 946 regs = (u64*)(st->regs_arena + fr->regs_off); \ 947 mem_off = fr->mem_off; \ 948 ip = fr->ip; \ 949 } while (0) 950 #endif 951 952 #if INTERP_DISPATCH_THREADED 953 static void* g_dt[IOP__COUNT]; 954 static int g_dt_ready = 0; 955 if (!g_dt_ready) { 956 #define DT_ENTRY(name) g_dt[name] = &&L_##name; 957 INTERP_OPS(DT_ENTRY) 958 #undef DT_ENTRY 959 g_dt_ready = 1; 960 } 961 #endif 962 963 RELOAD(); 964 if (!fn->ok) { 965 unsupported(st, fn->reject_reason ? fn->reject_reason : "function"); 966 return (KitInterpStatus)st->status; 967 } 968 st->mem_fault = 0; 969 970 #if INTERP_DISPATCH_THREADED 971 in = ip; 972 I = in->inst; 973 goto * in->handler; 974 #else 975 for (;;) { 976 in = ip; 977 I = in->inst; 978 switch ((InterpOp)in->op) { 979 #endif 980 OP(IOP_NOP) : NEXT(); 981 OP(IOP_LOAD_IMM) 982 : write_dst(st, fn, regs, mem_off, &I->opnds[0], (u64)in->imm); 983 NEXT(); 984 OP(IOP_LOAD_CONST) : { 985 ConstBytes cb = I->extra.cbytes; 986 u64 v = 0; 987 u32 n = cb.size > 8u ? 8u : cb.size; 988 if (cb.bytes && n) memcpy(&v, cb.bytes, n); 989 write_dst(st, fn, regs, mem_off, &I->opnds[0], v); 990 NEXT(); 991 } 992 OP(IOP_COPY) 993 : write_dst(st, fn, regs, mem_off, &I->opnds[0], 994 op_value(st, fn, regs, mem_off, &I->opnds[1])); 995 NEXT(); 996 OP(IOP_COPY_AGG) : { 997 u64 d = op_addr(st, fn, regs, mem_off, &I->opnds[0]); 998 u64 s = op_addr(st, fn, regs, mem_off, &I->opnds[1]); 999 mem_copy(st, d, s, abi_cg_sizeof(p->c->abi, I->opnds[0].type)); 1000 NEXT(); 1001 } 1002 OP(IOP_LOAD) : { 1003 u64 a = op_addr(st, fn, regs, mem_off, &I->opnds[1]); 1004 write_dst(st, fn, regs, mem_off, &I->opnds[0], 1005 mem_read(st, a, in->w0 ? in->w0 : 8u)); 1006 NEXT(); 1007 } 1008 OP(IOP_LOAD_AGG) : { 1009 u64 d = op_addr(st, fn, regs, mem_off, &I->opnds[0]); 1010 u64 s = op_addr(st, fn, regs, mem_off, &I->opnds[1]); 1011 mem_copy(st, d, s, abi_cg_sizeof(p->c->abi, I->opnds[0].type)); 1012 NEXT(); 1013 } 1014 OP(IOP_STORE) : { 1015 u64 a = op_addr(st, fn, regs, mem_off, &I->opnds[0]); 1016 u64 v = op_value(st, fn, regs, mem_off, &I->opnds[1]); 1017 mem_write(st, a, in->w0 ? in->w0 : 8u, v); 1018 NEXT(); 1019 } 1020 OP(IOP_STORE_AGG) : { 1021 u64 d = op_addr(st, fn, regs, mem_off, &I->opnds[0]); 1022 u64 s = op_addr(st, fn, regs, mem_off, &I->opnds[1]); 1023 mem_copy(st, d, s, abi_cg_sizeof(p->c->abi, I->opnds[1].type)); 1024 NEXT(); 1025 } 1026 OP(IOP_ADDR_OF) 1027 : write_dst(st, fn, regs, mem_off, &I->opnds[0], 1028 op_addr(st, fn, regs, mem_off, &I->opnds[1])); 1029 NEXT(); 1030 OP(IOP_BINOP) : { 1031 u64 r = do_binop(st, in->sub, op_value(st, fn, regs, mem_off, &I->opnds[1]), 1032 op_value(st, fn, regs, mem_off, &I->opnds[2]), in->w0, 1033 in->fp0); 1034 if (st->status) goto stop; 1035 write_dst(st, fn, regs, mem_off, &I->opnds[0], r); 1036 NEXT(); 1037 } 1038 OP(IOP_UNOP) : { 1039 u64 r = do_unop(st, in->sub, op_value(st, fn, regs, mem_off, &I->opnds[1]), 1040 in->w0, in->fp0); 1041 if (st->status) goto stop; 1042 write_dst(st, fn, regs, mem_off, &I->opnds[0], r); 1043 NEXT(); 1044 } 1045 OP(IOP_CMP) : { 1046 u64 r = 1047 (u64)do_cmp(st, in->sub, op_value(st, fn, regs, mem_off, &I->opnds[1]), 1048 op_value(st, fn, regs, mem_off, &I->opnds[2]), in->w0); 1049 if (st->status) goto stop; 1050 write_dst(st, fn, regs, mem_off, &I->opnds[0], r); 1051 NEXT(); 1052 } 1053 OP(IOP_CONVERT) : { 1054 u64 r = do_convert(st, in, op_value(st, fn, regs, mem_off, &I->opnds[1])); 1055 if (st->status) goto stop; 1056 write_dst(st, fn, regs, mem_off, &I->opnds[0], r); 1057 NEXT(); 1058 } 1059 OP(IOP_ALLOCA) : { 1060 u64 size = op_value(st, fn, regs, mem_off, &I->opnds[1]); 1061 u32 align = in->imm ? (u32)in->imm : 16u; 1062 u32 off = (fr->alloca_top + align - 1u) & ~(align - 1u); 1063 if ((u64)fr->mem_off + off + size > st->mem_cap) { 1064 fault(st, "alloca: stack overflow"); 1065 goto stop; 1066 } 1067 write_dst(st, fn, regs, mem_off, &I->opnds[0], 1068 frame_base(st, fr->mem_off) + off); 1069 fr->alloca_top = off + (u32)size; 1070 /* Advance the global high-water so a nested call's frame is allocated 1071 * ABOVE this live alloca region (otherwise it would alias it). */ 1072 if (fr->mem_off + fr->alloca_top > st->mem_top) 1073 st->mem_top = fr->mem_off + fr->alloca_top; 1074 NEXT(); 1075 } 1076 OP(IOP_BR) : ip = &fn->code[in->t0]; 1077 GO(); 1078 OP(IOP_CONDBR) : { 1079 u64 c = op_value(st, fn, regs, mem_off, &I->opnds[0]); 1080 /* A faulting selector would otherwise branch on garbage: branch ops 1081 * skip the straight-line fault re-check, so test the latch here. */ 1082 if (st->mem_fault) { 1083 fault(st, "invalid memory access"); 1084 goto stop; 1085 } 1086 ip = &fn->code[c ? in->t0 : in->t1]; 1087 GO(); 1088 } 1089 OP(IOP_CMP_BRANCH) : { 1090 int taken = do_cmp( 1091 st, in->sub, op_value(st, fn, regs, mem_off, &I->opnds[0]), 1092 op_value(st, fn, regs, mem_off, &I->opnds[1]), in->w0 ? in->w0 : 8u); 1093 if (st->status) goto stop; 1094 if (st->mem_fault) { 1095 fault(st, "invalid memory access"); 1096 goto stop; 1097 } 1098 ip = &fn->code[taken ? in->t0 : in->t1]; 1099 GO(); 1100 } 1101 OP(IOP_SWITCH) : { 1102 InterpSwitch* sw = &fn->switches[in->t0]; 1103 u64 sel = op_value(st, fn, regs, mem_off, &I->opnds[0]); 1104 u32 ci; 1105 u32 target = sw->default_pc; 1106 u32 selw = (u32)abi_cg_sizeof(p->c->abi, sw->sel_type); 1107 if (st->mem_fault) { 1108 fault(st, "invalid memory access"); 1109 goto stop; 1110 } 1111 for (ci = 0; ci < sw->ncases; ++ci) { 1112 if (mask_w(sel, selw) == mask_w(sw->aux->cases[ci].value, selw)) { 1113 target = sw->case_pc[ci]; 1114 break; /* leaves the case-search loop, not the dispatch */ 1115 } 1116 } 1117 if (target == INTERP_PC_NONE) { 1118 fault(st, "switch: no target"); 1119 goto stop; 1120 } 1121 ip = &fn->code[target]; 1122 GO(); 1123 } 1124 OP(IOP_LOAD_LABEL_ADDR) 1125 : /* encode target pc as the label address */ 1126 write_dst(st, fn, regs, mem_off, &I->opnds[0], (u64)in->t0); 1127 NEXT(); 1128 OP(IOP_INDIRECT_BR) : { 1129 u64 target = op_value(st, fn, regs, mem_off, &I->opnds[0]); 1130 if (st->mem_fault) { 1131 fault(st, "invalid memory access"); 1132 goto stop; 1133 } 1134 if (target >= fn->ncode) { 1135 fault(st, "indirect branch out of range"); 1136 goto stop; 1137 } 1138 ip = &fn->code[target]; 1139 GO(); 1140 } 1141 OP(IOP_CALL) : { 1142 IRCallAux* aux = (IRCallAux*)I->extra.aux; 1143 OptCGCallDesc* desc = &aux->desc; 1144 InterpFunc* callee = NULL; 1145 void* host_fp = NULL; 1146 if (desc->callee.kind == OPK_GLOBAL) { 1147 callee = interp_func_for_sym(p, desc->callee.v.global.sym); 1148 if (callee && !callee->ok) { 1149 /* A known internal callee we cannot interpret: propagate its 1150 * reason rather than silently calling the native version (the 1151 * --no-jit contract is that execution never falls back to JIT). */ 1152 unsupported(st, 1153 callee->reject_reason ? callee->reject_reason : "callee"); 1154 goto stop; 1155 } 1156 if (!callee) host_fp = interp_global_base(fn, desc->callee.v.global.sym); 1157 } else if (desc->callee.kind == OPK_REG) { 1158 host_fp = (void*)(uintptr_t)regs[desc->callee.v.reg]; 1159 /* If the function pointer targets a TU-internal function, interpret 1160 * it (don't run its native code) so --no-jit truly never executes 1161 * JITed code. External pointers fall through to the FFI path. */ 1162 callee = interp_func_for_addr(p, host_fp); 1163 if (callee && !callee->ok) { 1164 unsupported(st, 1165 callee->reject_reason ? callee->reject_reason : "callee"); 1166 goto stop; 1167 } 1168 } 1169 if (callee) { 1170 /* internal call: push a frame and bind args. */ 1171 u32 caller_idx = st->nframes - 1u; 1172 u32 callee_idx; 1173 if (!in->tail) fr->ip = ip + 1; /* resume after a non-tail call */ 1174 callee_idx = frame_push(st, callee); 1175 if (callee_idx == 0xffffffffu) { 1176 fault(st, "call: stack overflow"); 1177 goto stop; 1178 } 1179 bind_args(st, caller_idx, callee_idx, desc); 1180 if (!build_varargs(st, caller_idx, callee_idx, desc)) { 1181 fault(st, "call: stack overflow"); 1182 goto stop; 1183 } 1184 if (st->mem_fault) { 1185 fault(st, "invalid memory access"); 1186 goto stop; 1187 } 1188 { 1189 InterpFrame* cf = &st->frames[callee_idx]; 1190 InterpFrame* caller = &st->frames[caller_idx]; 1191 if (in->tail) { 1192 /* True O(1) tail call: the callee's result IS this function's 1193 * result, so inherit the tail-caller's return target and relocate 1194 * the freshly-built callee frame down onto the (now dead) caller's 1195 * register/memory region, rewinding the arenas. A tail loop then 1196 * runs in constant interp+host stack space instead of growing the 1197 * fixed reservation each iteration. 1198 * 1199 * Safe because the callee has not executed yet: no absolute 1200 * pointers into its own frame exist (va_start runs later; an arg 1201 * holding &caller_local would be UB, the caller being about to 1202 * return). bind_args/build_varargs already copied every argument 1203 * value out of the caller, so overwriting the caller is fine. */ 1204 u32 dst_regs = caller->regs_off; 1205 u32 dst_mem = caller->mem_off; 1206 u32 nregs_bytes = (callee->npregs ? callee->npregs : 1u) * 8u; 1207 u32 mem_used = cf->alloca_top; /* static frame + vararg buffer */ 1208 cf->ret_wanted = caller->ret_wanted; 1209 cf->ret_dst = caller->ret_dst; 1210 cf->sret_ptr = caller->sret_ptr; 1211 if (cf->regs_off != dst_regs) 1212 memmove(st->regs_arena + dst_regs, st->regs_arena + cf->regs_off, 1213 nregs_bytes); 1214 if (cf->mem_off != dst_mem) { 1215 memmove(st->mem_arena + dst_mem, st->mem_arena + cf->mem_off, 1216 mem_used); 1217 if (cf->has_varargs) cf->vararg_off -= (cf->mem_off - dst_mem); 1218 } 1219 cf->regs_off = dst_regs; 1220 cf->mem_off = dst_mem; 1221 *caller = *cf; 1222 st->nframes = caller_idx + 1u; 1223 st->regs_top = dst_regs + nregs_bytes; 1224 st->mem_top = dst_mem + mem_used; 1225 } else if (desc->ret.storage.kind == OPK_REG) { 1226 cf->ret_wanted = 1; 1227 cf->ret_dst = desc->ret.storage.v.reg; 1228 } else if (desc->ret.storage.kind == OPK_LOCAL) { 1229 /* aggregate return: callee writes into the caller's slot */ 1230 u64 a = frame_base(st, caller->mem_off) + 1231 caller->fn->slot_off[desc->ret.storage.v.frame_slot]; 1232 cf->sret_ptr = interp_translate(p, a, 1, PERM_W); 1233 } 1234 } 1235 RELOAD(); 1236 GO(); 1237 } 1238 if (!host_fp) { 1239 unsupported(st, "unresolved call target"); 1240 goto stop; 1241 } 1242 { 1243 u64 callret = ext_call(st, fr, regs, host_fp, desc); 1244 if (st->status) goto stop; 1245 if (in->tail) { 1246 /* External tail call: the call's result is this function's 1247 * result (desc.ret.storage may be empty for a tail call). */ 1248 u64 rv = callret; 1249 u8 want = fr->ret_wanted; 1250 u32 rdst = fr->ret_dst; 1251 st->regs_top = fr->regs_off; 1252 st->mem_top = fr->mem_off; 1253 st->nframes--; 1254 st->scalar_ret = rv; 1255 if (st->nframes == 0) { 1256 st->status = KIT_INTERP_DONE; 1257 if (out_ret) *out_ret = (int64_t)rv; 1258 return KIT_INTERP_DONE; 1259 } 1260 if (want) { 1261 InterpFrame* caller = &st->frames[st->nframes - 1u]; 1262 u64* cregs = (u64*)(st->regs_arena + caller->regs_off); 1263 cregs[rdst] = rv; 1264 } 1265 RELOAD(); 1266 GO(); 1267 } 1268 } 1269 NEXT(); 1270 } 1271 OP(IOP_RET) : OP(IOP_RET_VOID) : { 1272 u8 is_fp = 0; 1273 u64 rv = 0; 1274 u8* sret = fr->sret_ptr; 1275 if (in->op == IOP_RET) { 1276 IRRetAux* aux = (IRRetAux*)I->extra.aux; 1277 OptCGABIValue* val = &aux->val; 1278 if (cg_type_is_aggregate(p->c, val->type) || 1279 abi_cg_sizeof(p->c->abi, val->type) > 8u) { 1280 if (sret) { 1281 u64 src = op_addr(st, fn, regs, mem_off, &val->storage); 1282 u8* s = interp_translate(p, src, abi_cg_sizeof(p->c->abi, val->type), 1283 PERM_R); 1284 if (s) memcpy(sret, s, abi_cg_sizeof(p->c->abi, val->type)); 1285 } 1286 } else { 1287 ABITypeInfo ti = abi_cg_type_info(p->c->abi, val->type); 1288 u32 sz = abi_cg_sizeof(p->c->abi, val->type); 1289 rv = op_value(st, fn, regs, mem_off, &val->storage); 1290 is_fp = (ti.scalar_kind == ABI_SC_FLOAT) ? 1u : 0u; 1291 /* A scalar result whose caller destination is a memory slot (an 1292 * address-taken result local) is delivered via sret_ptr, not a 1293 * register — write it there. */ 1294 if (sret) memcpy(sret, &rv, sz ? (sz > 8u ? 8u : sz) : 8u); 1295 } 1296 } 1297 /* The popped (callee) frame records where its scalar result lands in 1298 * the caller — capture before popping, then rewind the arenas to the 1299 * frame's bases (strict stack discipline). */ 1300 { 1301 u8 want = fr->ret_wanted; 1302 u32 dst = fr->ret_dst; 1303 st->regs_top = fr->regs_off; 1304 st->mem_top = fr->mem_off; 1305 st->nframes--; 1306 st->scalar_ret = rv; 1307 st->ret_is_fp = is_fp; 1308 if (st->nframes == 0) { 1309 st->status = KIT_INTERP_DONE; 1310 if (out_ret) *out_ret = (int64_t)rv; 1311 return KIT_INTERP_DONE; 1312 } 1313 if (want) { 1314 InterpFrame* caller = &st->frames[st->nframes - 1u]; 1315 u64* cregs = (u64*)(st->regs_arena + caller->regs_off); 1316 cregs[dst] = rv; 1317 } 1318 } 1319 RELOAD(); 1320 GO(); 1321 } 1322 OP(IOP_INTRINSIC) : { 1323 if (!interp_intrinsic(st, fn, regs, mem_off, in)) goto stop; 1324 NEXT(); 1325 } 1326 OP(IOP_FENCE) : NEXT(); /* single-thread: no-op */ 1327 OP(IOP_AGG_SET) : OP(IOP_AGG_COPY) : { 1328 /* AGG_COPY/SET use pointer-deref addressing (pointer_addr_from_operand): 1329 * a LOCAL holding a pointer is dereferenced; otherwise it is the slot. */ 1330 u64 d = interp_ptr_addr(st, fn, regs, mem_off, &I->opnds[0]); 1331 if (in->op == IOP_AGG_COPY) { 1332 IRAggAux* aux = (IRAggAux*)I->extra.aux; 1333 u64 s = interp_ptr_addr(st, fn, regs, mem_off, &I->opnds[1]); 1334 mem_copy(st, d, s, aux ? aux->access.size : 0u); 1335 } else { 1336 IRAggAux* aux = (IRAggAux*)I->extra.aux; 1337 u64 byte = op_value(st, fn, regs, mem_off, &I->opnds[1]); 1338 u32 n = aux ? aux->access.size : 0u; 1339 u8* h = interp_translate(p, d, n, PERM_W); 1340 if (h) 1341 memset(h, (int)(byte & 0xffu), n); 1342 else 1343 st->mem_fault = 1; 1344 } 1345 NEXT(); 1346 } 1347 OP(IOP_TLS_ADDR) : { 1348 /* A thread-local's symbol does not resolve to its storage on every 1349 * target (a Mach-O symbol resolves to a TLV descriptor), so route 1350 * through interp_tls_addr / the host resolve_tls hook, which returns the 1351 * running thread's address of the variable (already +addend). */ 1352 IRTlsAux* aux = (IRTlsAux*)I->extra.aux; 1353 void* addr = aux ? interp_tls_addr(fn, aux->sym, aux->addend) : NULL; 1354 if (!addr) { 1355 unsupported(st, "unresolved thread-local symbol"); 1356 goto stop; 1357 } 1358 write_dst(st, fn, regs, mem_off, &I->opnds[0], (u64)(uintptr_t)addr); 1359 NEXT(); 1360 } 1361 OP(IOP_BITFIELD_LOAD) : { 1362 /* opnds[1] is the record address; the field bits live in the storage 1363 * unit at record + storage_offset. Extract by shift+mask (target uses 1364 * little-endian bit numbering), sign-extending signed fields. */ 1365 IRBitFieldAux* aux = (IRBitFieldAux*)I->extra.aux; 1366 u64 rec, raw, v = 0; 1367 u32 ssz, width; 1368 if (!aux) { 1369 unsupported(st, "bitfield access"); 1370 goto stop; 1371 } 1372 rec = op_addr(st, fn, regs, mem_off, &I->opnds[1]); 1373 ssz = aux->access.storage.size ? aux->access.storage.size : 4u; 1374 width = aux->access.bit_width; 1375 if (width) { 1376 raw = mem_read(st, rec + aux->access.storage_offset, ssz); 1377 v = (raw >> aux->access.bit_offset) & bits_mask(width); 1378 if (aux->access.signed_ && width < 64u && (v & (1ull << (width - 1u)))) 1379 v |= ~bits_mask(width); 1380 } 1381 write_dst(st, fn, regs, mem_off, &I->opnds[0], v); 1382 NEXT(); 1383 } 1384 OP(IOP_BITFIELD_STORE) : { 1385 /* opnds[0] = record address, opnds[1] = source value. Read-modify-write 1386 * the storage unit: clear the field bits, then OR in the masked, shifted 1387 * source. A zero-width field is a layout barrier — no store. */ 1388 IRBitFieldAux* aux = (IRBitFieldAux*)I->extra.aux; 1389 u64 rec, addr, ones, fmask, src, raw; 1390 u32 ssz, width; 1391 if (!aux) { 1392 unsupported(st, "bitfield access"); 1393 goto stop; 1394 } 1395 width = aux->access.bit_width; 1396 if (width == 0) NEXT(); 1397 rec = op_addr(st, fn, regs, mem_off, &I->opnds[0]); 1398 ssz = aux->access.storage.size ? aux->access.storage.size : 4u; 1399 addr = rec + aux->access.storage_offset; 1400 ones = bits_mask(width); 1401 fmask = ones << aux->access.bit_offset; 1402 src = op_value(st, fn, regs, mem_off, &I->opnds[1]); 1403 raw = mem_read(st, addr, ssz); 1404 raw = (raw & ~fmask) | ((src & ones) << aux->access.bit_offset); 1405 mem_write(st, addr, ssz, raw); 1406 NEXT(); 1407 } 1408 OP(IOP_VA_START) : { 1409 /* opnds[0] is the va_list object's address (a pointer value). Seed it 1410 * with a cursor over this frame's anonymous-argument buffer. */ 1411 u64 ap = op_value(st, fn, regs, mem_off, &I->opnds[0]); 1412 u64 cursor = fr->has_varargs ? frame_base(st, fr->vararg_off) : 0u; 1413 mem_write(st, ap, 8u, cursor); 1414 NEXT(); 1415 } 1416 OP(IOP_VA_END) : NEXT(); /* nothing to release in the cursor model */ 1417 OP(IOP_VA_COPY) : { 1418 /* opnds = [dst va_list addr, src va_list addr]: duplicate the cursor. */ 1419 u64 d = op_value(st, fn, regs, mem_off, &I->opnds[0]); 1420 u64 s = op_value(st, fn, regs, mem_off, &I->opnds[1]); 1421 mem_write(st, d, 8u, mem_read(st, s, 8u)); 1422 NEXT(); 1423 } 1424 OP(IOP_VA_ARG) : { 1425 /* opnds[0] = dst (type drives the read width), opnds[1] = va_list addr. 1426 * Align the cursor, read the slot, advance, store the cursor back. */ 1427 KitCgTypeId ty = I->opnds[0].type; 1428 u64 ap = op_value(st, fn, regs, mem_off, &I->opnds[1]); 1429 u64 cursor = mem_read(st, ap, 8u); 1430 u32 size = abi_cg_sizeof(p->c->abi, ty); 1431 u32 al = va_align_of(size); 1432 cursor = (cursor + al - 1u) & ~((u64)al - 1u); 1433 if (cg_type_is_aggregate(p->c, ty) || size > 8u) { 1434 u64 dstaddr = op_addr(st, fn, regs, mem_off, &I->opnds[0]); 1435 mem_copy(st, dstaddr, cursor, size); 1436 } else { 1437 write_dst(st, fn, regs, mem_off, &I->opnds[0], 1438 mem_read(st, cursor, size ? size : 8u)); 1439 } 1440 mem_write(st, ap, 8u, cursor + va_stride_of(size)); 1441 NEXT(); 1442 } 1443 /* Atomics: single-threaded interpreter, so the operation is serialized 1444 * and the memory order is irrelevant (treated as seq-cst). */ 1445 OP(IOP_ATOMIC_LOAD) : { 1446 u64 a = op_value(st, fn, regs, mem_off, &I->opnds[1]); 1447 write_dst(st, fn, regs, mem_off, &I->opnds[0], 1448 mem_read(st, a, in->w0 ? in->w0 : 8u)); 1449 NEXT(); 1450 } 1451 OP(IOP_ATOMIC_STORE) : { 1452 u64 a = op_value(st, fn, regs, mem_off, &I->opnds[0]); 1453 mem_write(st, a, in->w0 ? in->w0 : 8u, 1454 op_value(st, fn, regs, mem_off, &I->opnds[1])); 1455 NEXT(); 1456 } 1457 OP(IOP_ATOMIC_RMW) : { 1458 u32 w = in->w0 ? in->w0 : 8u; 1459 u64 a = op_value(st, fn, regs, mem_off, &I->opnds[1]); 1460 u64 old = mem_read(st, a, w); 1461 u64 v = op_value(st, fn, regs, mem_off, &I->opnds[2]); 1462 mem_write(st, a, w, do_rmw(in->sub, old, v, w)); 1463 write_dst(st, fn, regs, mem_off, &I->opnds[0], old); 1464 NEXT(); 1465 } 1466 OP(IOP_ATOMIC_CAS) : { 1467 u32 w = in->w0 ? in->w0 : 8u; 1468 u64 a = op_value(st, fn, regs, mem_off, &I->opnds[2]); 1469 u64 expected = op_value(st, fn, regs, mem_off, &I->opnds[3]); 1470 u64 desired = op_value(st, fn, regs, mem_off, &I->opnds[4]); 1471 u64 old = mem_read(st, a, w); 1472 u64 ok = (mask_w(old, w) == mask_w(expected, w)); 1473 if (ok) mem_write(st, a, w, desired); 1474 write_dst(st, fn, regs, mem_off, &I->opnds[0], old); /* prior */ 1475 write_dst(st, fn, regs, mem_off, &I->opnds[1], ok); /* ok flag */ 1476 NEXT(); 1477 } 1478 OP(IOP_UNREACHABLE) : fault(st, "unreachable"); 1479 goto stop; 1480 OP(IOP_TRAP) 1481 : unsupported(st, fn->reject_reason ? fn->reject_reason : "operation"); 1482 goto stop; 1483 #if !INTERP_DISPATCH_THREADED 1484 default: 1485 unsupported(st, "opcode"); 1486 goto stop; 1487 } 1488 if (st->mem_fault) { 1489 fault(st, "invalid memory access"); 1490 goto stop; 1491 } 1492 ip++; 1493 } 1494 #else 1495 fault_mem: 1496 fault(st, "invalid memory access"); 1497 /* fall through to stop */ 1498 #endif 1499 1500 stop: fr->ip = ip; 1501 return (KitInterpStatus)st->status; 1502 #undef RELOAD 1503 } 1504 #if INTERP_DISPATCH_THREADED 1505 #if defined(__clang__) 1506 #pragma clang diagnostic pop 1507 #elif defined(__GNUC__) 1508 #pragma GCC diagnostic pop 1509 #endif 1510 #endif 1511 1512 /* ---- intrinsics ---- */ 1513 1514 static u64 ipopcount(u64 v, u32 w) { 1515 u64 m = (w >= 8) ? ~0ull : ((1ull << (w * 8u)) - 1ull); 1516 u64 x = v & m; 1517 u64 n = 0; 1518 while (x) { 1519 n += (x & 1u); 1520 x >>= 1; 1521 } 1522 return n; 1523 } 1524 static u64 ictz(u64 v, u32 w) { 1525 u32 bits = w * 8u; 1526 u64 n = 0; 1527 if ((v & ((bits >= 64) ? ~0ull : ((1ull << bits) - 1ull))) == 0) return bits; 1528 while (!(v & 1u)) { 1529 n++; 1530 v >>= 1; 1531 } 1532 return n; 1533 } 1534 static u64 iclz(u64 v, u32 w) { 1535 u32 bits = w * 8u; 1536 u64 n = 0; 1537 u64 top = 1ull << (bits - 1u); 1538 v &= (bits >= 64) ? ~0ull : ((1ull << bits) - 1ull); 1539 if (v == 0) return bits; 1540 while (!(v & top)) { 1541 n++; 1542 v <<= 1; 1543 } 1544 return n; 1545 } 1546 static u64 ibswap(u64 v, u32 nbytes) { 1547 u64 r = 0; 1548 u32 i; 1549 for (i = 0; i < nbytes; ++i) { 1550 r = (r << 8) | (v & 0xffu); 1551 v >>= 8; 1552 } 1553 return r; 1554 } 1555 1556 static int interp_intrinsic(InterpStack* st, InterpFunc* fn, u64* regs, 1557 u32 mem_off, InterpInsn* in) { 1558 InterpProgram* p = st->prog; 1559 IRIntrinAux* aux = (IRIntrinAux*)in->inst->extra.aux; 1560 Compiler* c = p->c; 1561 if (!aux) { 1562 unsupported(st, "intrinsic"); 1563 return 0; 1564 } 1565 #define ARGV(i) op_value(st, fn, regs, mem_off, &aux->args[i]) 1566 #define AWID(i) ((u32)abi_cg_sizeof(c->abi, aux->args[i].type)) 1567 #define DWID(i) ((u32)abi_cg_sizeof(c->abi, aux->dsts[i].type)) 1568 #define DST0 \ 1569 (aux->ndst > 0 && aux->dsts[0].kind == OPK_REG ? aux->dsts[0].v.reg : 0u) 1570 switch (aux->kind) { 1571 case INTRIN_MEMMOVE: { 1572 u64 d = ARGV(0), s = ARGV(1), n = ARGV(2); 1573 mem_copy(st, d, s, (u32)n); 1574 if (aux->ndst > 0 && aux->dsts[0].kind == OPK_REG) regs[DST0] = d; 1575 return 1; 1576 } 1577 case INTRIN_POPCOUNT: 1578 regs[DST0] = ipopcount(ARGV(0), AWID(0)); 1579 return 1; 1580 case INTRIN_CTZ: 1581 regs[DST0] = ictz(ARGV(0), AWID(0)); 1582 return 1; 1583 case INTRIN_CLZ: 1584 regs[DST0] = iclz(ARGV(0), AWID(0)); 1585 return 1; 1586 case INTRIN_BSWAP: 1587 regs[DST0] = ibswap(ARGV(0), DWID(0)); 1588 return 1; 1589 case INTRIN_EXPECT: 1590 regs[DST0] = ARGV(0); 1591 return 1; 1592 case INTRIN_ASSUME_ALIGNED: 1593 if (aux->ndst > 0 && aux->dsts[0].kind == OPK_REG) regs[DST0] = ARGV(0); 1594 return 1; 1595 case INTRIN_PREFETCH: 1596 return 1; 1597 /* CPU hints and memory barriers have no observable effect in the 1598 * single-threaded interpreter model: treat them as no-ops. */ 1599 case INTRIN_CPU_NOP: 1600 case INTRIN_CPU_YIELD: 1601 case INTRIN_ISB: 1602 case INTRIN_DMB: 1603 case INTRIN_DSB: 1604 return 1; 1605 case INTRIN_TRAP: 1606 fault(st, "__builtin_trap"); 1607 return 0; 1608 case INTRIN_SADD_OVERFLOW: 1609 case INTRIN_UADD_OVERFLOW: 1610 case INTRIN_SSUB_OVERFLOW: 1611 case INTRIN_USUB_OVERFLOW: 1612 case INTRIN_SMUL_OVERFLOW: 1613 case INTRIN_UMUL_OVERFLOW: { 1614 u32 w = AWID(0); 1615 u64 a = ARGV(0), b = ARGV(1); 1616 u64 res = 0; 1617 int ovf = 0; 1618 switch (aux->kind) { 1619 /* For w<8 the operands fit in i64/u64 so the exact result is available 1620 * and a re-narrow comparison detects overflow; for w==8 there is no 1621 * wider type, so detect via sign/carry logic (the re-narrow trick would 1622 * always read "no overflow"). */ 1623 case INTRIN_SADD_OVERFLOW: { 1624 i64 x = sext_w(a, w), y = sext_w(b, w); 1625 u64 r = (u64)x + (u64)y; 1626 res = mask_w(r, w); 1627 ovf = (w < 8) ? (sext_w(res, w) != x + y) 1628 : (int)((((u64)x ^ r) & ((u64)y ^ r)) >> 63); 1629 break; 1630 } 1631 case INTRIN_UADD_OVERFLOW: { 1632 u64 x = mask_w(a, w), y = mask_w(b, w), r = x + y; 1633 res = mask_w(r, w); 1634 ovf = (res != r) || (mask_w(r, w) < x); 1635 break; 1636 } 1637 case INTRIN_SSUB_OVERFLOW: { 1638 i64 x = sext_w(a, w), y = sext_w(b, w); 1639 u64 r = (u64)x - (u64)y; 1640 res = mask_w(r, w); 1641 ovf = (w < 8) ? (sext_w(res, w) != x - y) 1642 : (int)((((u64)x ^ (u64)y) & ((u64)x ^ r)) >> 63); 1643 break; 1644 } 1645 case INTRIN_USUB_OVERFLOW: { 1646 ovf = mask_w(a, w) < mask_w(b, w); 1647 res = mask_w(mask_w(a, w) - mask_w(b, w), w); 1648 break; 1649 } 1650 case INTRIN_SMUL_OVERFLOW: { 1651 i64 x = sext_w(a, w), y = sext_w(b, w); 1652 u64 r = (u64)x * (u64)y; 1653 res = mask_w(r, w); 1654 if (w < 8) { 1655 ovf = (sext_w(res, w) != x * y); 1656 } else if (x == 0 || y == 0) { 1657 ovf = 0; 1658 } else if ((x == -1 && (u64)y == 0x8000000000000000ull) || 1659 (y == -1 && (u64)x == 0x8000000000000000ull)) { 1660 ovf = 1; /* INT64_MIN * -1 */ 1661 } else { 1662 ovf = ((i64)r / x != y); 1663 } 1664 break; 1665 } 1666 case INTRIN_UMUL_OVERFLOW: { 1667 u64 x = mask_w(a, w), y = mask_w(b, w), r = x * y; 1668 res = mask_w(r, w); 1669 ovf = (w < 8) ? (r != res) : (x != 0 && r / x != y); 1670 break; 1671 } 1672 default: 1673 break; 1674 } 1675 if (aux->ndst > 0 && aux->dsts[0].kind == OPK_REG) 1676 regs[aux->dsts[0].v.reg] = res; 1677 if (aux->ndst > 1 && aux->dsts[1].kind == OPK_REG) 1678 regs[aux->dsts[1].v.reg] = (u64)ovf; 1679 return 1; 1680 } 1681 default: 1682 unsupported(st, "intrinsic"); 1683 return 0; 1684 } 1685 #undef ARGV 1686 #undef AWID 1687 #undef DWID 1688 #undef DST0 1689 } 1690 1691 /* ---- public stack API ---- */ 1692 1693 KitInterpStack* kit_interp_stack_new(KitInterpProgram* pp) { 1694 InterpProgram* p = (InterpProgram*)pp; 1695 Heap* h; 1696 InterpStack* st; 1697 if (!p) return NULL; 1698 h = p->c->ctx->heap; 1699 st = (InterpStack*)h->alloc(h, sizeof(*st), _Alignof(InterpStack)); 1700 if (!st) return NULL; 1701 memset(st, 0, sizeof *st); 1702 st->prog = p; 1703 /* Fixed, non-relocating arenas (see bump()/INTERP_*_RESERVE). */ 1704 st->regs_arena = (u8*)h->alloc(h, INTERP_REGS_RESERVE, 16u); 1705 st->mem_arena = (u8*)h->alloc(h, INTERP_MEM_RESERVE, 16u); 1706 if (!st->regs_arena || !st->mem_arena) { 1707 if (st->regs_arena) h->free(h, st->regs_arena, INTERP_REGS_RESERVE); 1708 if (st->mem_arena) h->free(h, st->mem_arena, INTERP_MEM_RESERVE); 1709 h->free(h, st, sizeof *st); 1710 return NULL; 1711 } 1712 st->regs_cap = INTERP_REGS_RESERVE; 1713 st->mem_cap = INTERP_MEM_RESERVE; 1714 return (KitInterpStack*)st; 1715 } 1716 1717 void kit_interp_stack_free(KitInterpStack* s) { 1718 InterpStack* st = (InterpStack*)s; 1719 Heap* h; 1720 if (!st) return; 1721 h = st->prog->c->ctx->heap; 1722 if (st->frames) h->free(h, st->frames, sizeof(InterpFrame) * st->frames_cap); 1723 if (st->regs_arena) h->free(h, st->regs_arena, st->regs_cap); 1724 if (st->mem_arena) h->free(h, st->mem_arena, st->mem_cap); 1725 h->free(h, st, sizeof *st); 1726 } 1727 1728 static void bind_entry_param(InterpStack* st, InterpFunc* fn, u32 idx, u32 i, 1729 u64 value) { 1730 InterpFrame* fr = &st->frames[idx]; 1731 IRParam* pr; 1732 if (i >= fn->f->nparams) return; 1733 pr = &fn->f->params[i]; 1734 if (pr->storage.kind == CG_LOCAL_STORAGE_REG) { 1735 u64* regs = (u64*)(st->regs_arena + fr->regs_off); 1736 regs[pr->storage.v.reg] = value; 1737 } else { 1738 u64 dst = 1739 frame_base(st, fr->mem_off) + fn->slot_off[pr->storage.v.frame_slot]; 1740 mem_write(st, dst, 8u, value); 1741 } 1742 } 1743 1744 KitStatus kit_interp_call_on(KitInterpStack* s, KitInterpFunc* ff, int argc, 1745 char** argv) { 1746 InterpStack* st = (InterpStack*)s; 1747 InterpFunc* fn = (InterpFunc*)ff; 1748 u32 idx; 1749 if (!st || !fn) return KIT_INVALID; 1750 idx = frame_push(st, fn); 1751 if (idx == 0xffffffffu) return KIT_NOMEM; 1752 bind_entry_param(st, fn, idx, 0u, (u64)(unsigned)argc); 1753 bind_entry_param(st, fn, idx, 1u, (u64)(uintptr_t)argv); 1754 return KIT_OK; 1755 } 1756 1757 KitInterpStatus kit_interp_resume(KitInterpStack* s, int64_t* out_ret) { 1758 InterpStack* st = (InterpStack*)s; 1759 if (!st) return KIT_INTERP_ERROR; 1760 return interp_run_stack(st, out_ret); 1761 } 1762 1763 KitInterpStatus kit_interp_call(KitInterpProgram* pp, KitInterpFunc* ff, 1764 int argc, char** argv, int64_t* out_ret) { 1765 KitInterpStack* s = kit_interp_stack_new(pp); 1766 KitInterpStatus rc; 1767 if (!s) return KIT_INTERP_ERROR; 1768 if (kit_interp_call_on(s, ff, argc, argv) != KIT_OK) { 1769 kit_interp_stack_free(s); 1770 return KIT_INTERP_ERROR; 1771 } 1772 rc = kit_interp_resume(s, out_ret); 1773 kit_interp_stack_free(s); 1774 return rc; 1775 } 1776 1777 KitInterpStatus kit_interp_call_args(KitInterpProgram* pp, KitInterpFunc* ff, 1778 const uint64_t* args, uint32_t nargs, 1779 int64_t* out_ret) { 1780 InterpStack* st = (InterpStack*)kit_interp_stack_new(pp); 1781 InterpFunc* fn = (InterpFunc*)ff; 1782 KitInterpStatus rc; 1783 u32 idx, i; 1784 if (!st) return KIT_INTERP_ERROR; 1785 if (!fn) { 1786 kit_interp_stack_free((KitInterpStack*)st); 1787 return KIT_INTERP_ERROR; 1788 } 1789 idx = frame_push(st, fn); 1790 if (idx == 0xffffffffu) { 1791 kit_interp_stack_free((KitInterpStack*)st); 1792 return KIT_INTERP_ERROR; 1793 } 1794 for (i = 0; i < nargs; ++i) bind_entry_param(st, fn, idx, i, args[i]); 1795 rc = interp_run_stack(st, out_ret); 1796 kit_interp_stack_free((KitInterpStack*)st); 1797 return rc; 1798 } 1799 1800 KitStatus kit_interp_stack_reset(KitInterpStack* s) { 1801 InterpStack* st = (InterpStack*)s; 1802 if (!st) return KIT_INVALID; 1803 /* Keep the (fixed, non-relocating) arenas; rewind their bump tops and drop 1804 * all frames + the return shuttle + any prior status/trap. */ 1805 st->nframes = 0; 1806 st->regs_top = 0; 1807 st->mem_top = 0; 1808 st->scalar_ret = 0; 1809 st->ret_is_fp = 0; 1810 st->status = KIT_INTERP_DONE; 1811 st->trap_reason = NULL; 1812 st->mem_fault = 0; 1813 return KIT_OK; 1814 } 1815 1816 KitStatus kit_interp_call_args_on(KitInterpStack* s, KitInterpFunc* ff, 1817 const uint64_t* args, uint32_t nargs) { 1818 InterpStack* st = (InterpStack*)s; 1819 InterpFunc* fn = (InterpFunc*)ff; 1820 u32 idx, i; 1821 if (!st || !fn) return KIT_INVALID; 1822 idx = frame_push(st, fn); 1823 if (idx == 0xffffffffu) return KIT_NOMEM; 1824 for (i = 0; i < nargs; ++i) bind_entry_param(st, fn, idx, i, args[i]); 1825 return KIT_OK; 1826 } 1827 1828 const char* kit_interp_stack_trap_reason(KitInterpStack* s) { 1829 InterpStack* st = (InterpStack*)s; 1830 return st ? st->trap_reason : NULL; 1831 }