emit.c (184324B)
1 /* Wasm CGTarget emission. 2 * 3 * Records CGTarget operations into a per-function WIR list, then linearizes 4 * to a WasmFunc body at func_end. Each SSA Reg becomes a Wasm local; control 5 * flow that fits the kit_cg_if_begin/else/end pattern lowers to 6 * if/else/end, while CG scopes (SCOPE_LOOP) lower to (block (loop ...)). 7 * 8 * TODO: complete IR-to-WASM coverage for: 9 * - bitfield load/store 10 * - multiple call/return results 11 * - address-taken parameters 12 * - ABI multipart params 13 * - dynamic memcpy/memset via memory.* calls 14 * - file-scope asm 15 * - atomics 16 * - intrinsics 17 */ 18 19 #include <stdarg.h> 20 #include <string.h> 21 22 #include "abi/abi.h" 23 #include "arch/wasm/internal.h" 24 #include "cg/type.h" 25 #include "core/arena.h" 26 #include "core/buf.h" 27 #include "core/heap.h" 28 #include "core/pool.h" 29 #include "obj/obj.h" 30 #include "obj/wasm_imports.h" 31 32 /* Shared Wasm core: in-memory WasmModule, helpers (wasm_add_func, 33 * wasm_intern_func_type, wasm_func_add_insn, ...), and wasm_encode for 34 * the final flush from emit_wasm. */ 35 #include "wasm/wasm.h" 36 37 /* ----------------------------------------------------------------- 38 * Helpers 39 * ----------------------------------------------------------------- */ 40 41 static SrcLoc cur_loc(WTarget* t) { 42 /* Prefer the most recent statement loc the frontend reported via 43 * wasm_set_loc — gives diagnostics the actual failing line, not the 44 * function definition's line. Fall back to the function loc when no 45 * statement loc has been set (line == 0). */ 46 if (t->cur_stmt_loc.line) return t->cur_stmt_loc; 47 if (t->cur_fn_desc) return t->cur_fn_desc->loc; 48 SrcLoc l = {0, 0, 0}; 49 return l; 50 } 51 52 static _Noreturn void wfail(WTarget* t, const char* fmt, ...) { 53 va_list ap; 54 va_start(ap, fmt); 55 compiler_panicv(t->c, cur_loc(t), fmt, ap); 56 } 57 58 static _Noreturn void wfail_at(WTarget* t, SrcLoc loc, const char* fmt, ...) { 59 va_list ap; 60 va_start(ap, fmt); 61 compiler_panicv(t->c, loc, fmt, ap); 62 } 63 64 static struct WasmModule* ensure_module(WTarget* t); 65 66 static const char* pool_sym_cstr(Pool* p, Sym sym, size_t* len_out) { 67 Slice sl = pool_slice(p, sym); 68 if (len_out) *len_out = sl.len; 69 return sl.s; 70 } 71 72 static WasmValType valtype_for_size_kind(WTarget* t, u32 size, u8 scalar_kind) { 73 if (scalar_kind == ABI_SC_FLOAT) { 74 if (size == 4) return WASM_VAL_F32; 75 if (size == 8) return WASM_VAL_F64; 76 /* The only C float wider than f64 is binary128 long double, which has 77 * no wasm value type. Report it specifically rather than as a generic 78 * size error. */ 79 if (size == 16) wfail(t, "wasm: long double not supported"); 80 wfail(t, "wasm: unsupported float size %u", size); 81 } 82 if (size <= 4) return WASM_VAL_I32; 83 if (size == 8) return WASM_VAL_I64; 84 wfail(t, "wasm: unsupported integer size %u", size); 85 } 86 87 static WasmValType valtype_for_type(WTarget* t, KitCgTypeId ty) { 88 ABITypeInfo ti = abi_cg_type_info(t->c->abi, ty); 89 if (ti.scalar_kind == ABI_SC_VOID) { 90 wfail(t, "wasm: void value type requested"); 91 } 92 if (ti.scalar_kind == ABI_SC_PTR) return WASM_VAL_I32; /* wasm32 ILP32 */ 93 return valtype_for_size_kind(t, ti.size, ti.scalar_kind); 94 } 95 96 static u32 align_to_u32(u32 v, u32 a) { 97 if (!a) return v; 98 return (v + a - 1u) & ~(a - 1u); 99 } 100 101 /* Low-memory guard: leaves the first NULL_GUARD bytes of linear memory 102 * unassigned so addr==0 never resolves to a real symbol. */ 103 #define WASM_DATA_NULL_GUARD 16u 104 105 static void type_size_align(WTarget* t, KitCgTypeId ty, u32 fallback_size, 106 u32 fallback_align, u32* size_out, u32* align_out) { 107 ABITypeInfo ti; 108 if (ty) { 109 ti = abi_cg_type_info(t->c->abi, ty); 110 *size_out = ti.size ? ti.size : fallback_size; 111 *align_out = ti.align ? ti.align : fallback_align; 112 } else { 113 *size_out = fallback_size; 114 *align_out = fallback_align; 115 } 116 if (!*size_out) *size_out = 1; 117 if (!*align_out) *align_out = 1; 118 } 119 120 static u32 add_wasm_local(WTarget* t, WasmValType vt) { 121 if (!t->cur_func) wfail(t, "wasm: local allocation outside a function"); 122 return wasm_func_push_local(t->c, t->module, t->cur_func, vt); 123 } 124 125 static void ensure_linear_memory(WTarget* t) { 126 ensure_module(t); 127 if (!t->has_memory) { 128 WasmMemory* mem = wasm_add_memory(t->c, t->module); 129 mem->min_pages = 1; 130 t->has_memory = 1; 131 } 132 } 133 134 /* Atomic memory ops require the linear memory to be declared shared and to 135 * carry a maximum size. We promote the (single) module memory to shared on 136 * first atomic emission. max_pages is provisionally set to the wasm32 limit 137 * (65536, i.e. 4 GiB); wasm_materialize_data tightens it to match min_pages 138 * after the final layout is known so embedders can pre-reserve a snug arena. */ 139 static void ensure_shared_memory(WTarget* t) { 140 ensure_linear_memory(t); 141 WasmMemory* mem = &t->module->memories[0]; 142 if (!mem->shared) { 143 mem->shared = 1; 144 } 145 if (!mem->has_max) { 146 mem->has_max = 1; 147 mem->max_pages = 65536u; 148 } 149 } 150 151 static void ensure_stack_pointer(WTarget* t) { 152 ensure_linear_memory(t); 153 if (!t->has_stack_pointer) { 154 WasmGlobal* g = wasm_add_global(t->c, t->module); 155 g->name = wasm_strdup(t->module->heap, "__stack_pointer", 156 sizeof("__stack_pointer") - 1u); 157 g->type = WASM_VAL_I32; 158 g->mutable_ = 1; 159 g->init.kind = WASM_INSN_I32_CONST; 160 g->init.imm = 65536; 161 t->stack_pointer_global = t->module->nglobals - 1u; 162 t->stack_size = 65536; 163 t->has_stack_pointer = 1; 164 } 165 } 166 167 /* Map an SSA Reg to its WasmFunc local index, allocating on first use. */ 168 static u32 reg_local(WTarget* t, Reg r, KitCgTypeId ty, RegClass cls) { 169 Heap* h = t->c->ctx->heap; 170 if (r == REG_NONE) wfail(t, "wasm: REG_NONE used as operand"); 171 if (r >= t->reg_cap) { 172 u32 nc = t->reg_cap ? t->reg_cap : 16u; 173 while (nc <= r) nc *= 2u; 174 u32* nl = (u32*)h->realloc(h, t->reg_to_local, sizeof(u32) * t->reg_cap, 175 sizeof(u32) * nc, _Alignof(u32)); 176 KitCgTypeId* nt = (KitCgTypeId*)h->realloc( 177 h, t->reg_type, sizeof(KitCgTypeId) * t->reg_cap, 178 sizeof(KitCgTypeId) * nc, _Alignof(KitCgTypeId)); 179 u8* nc_arr = (u8*)h->realloc(h, t->reg_cls, t->reg_cap, nc, 1); 180 if (!nl || !nt || !nc_arr) wfail(t, "wasm: out of memory"); 181 for (u32 i = t->reg_cap; i < nc; ++i) { 182 nl[i] = 0xffffffffu; 183 nt[i] = KIT_CG_TYPE_NONE; 184 nc_arr[i] = 0; 185 } 186 t->reg_to_local = nl; 187 t->reg_type = nt; 188 t->reg_cls = nc_arr; 189 t->reg_cap = nc; 190 } 191 /* CG may reuse the same Reg id with different value types: api_ensure_reg 192 * for an SV_CMP reuses one of the cmp operands' Regs (originally e.g. i64) 193 * to hold the i32 cmp result. Detect a type change and rebind to a fresh 194 * wasm local; the previous binding is dead from CG's point of view. */ 195 if (t->reg_to_local[r] != 0xffffffffu && t->reg_type[r]) { 196 WasmValType cached_vt = valtype_for_type(t, t->reg_type[r]); 197 WasmValType want_vt = valtype_for_type(t, ty); 198 if (cached_vt == want_vt) return t->reg_to_local[r]; 199 /* fall through to allocate fresh */ 200 } 201 { 202 WasmValType vt = valtype_for_type(t, ty); 203 t->reg_to_local[r] = add_wasm_local(t, vt); 204 t->reg_type[r] = ty; 205 t->reg_cls[r] = (u8)cls; 206 } 207 return t->reg_to_local[r]; 208 } 209 210 /* ----------------------------------------------------------------- 211 * WIR appending 212 * ----------------------------------------------------------------- */ 213 214 static WIR* wir_push(WTarget* t) { 215 Heap* h = t->c->ctx->heap; 216 if (t->nwir == t->wir_cap) { 217 u32 nc = t->wir_cap ? t->wir_cap * 2u : 64u; 218 void* p = h->realloc(h, t->wir, sizeof(WIR) * t->wir_cap, sizeof(WIR) * nc, 219 _Alignof(WIR)); 220 if (!p) wfail(t, "wasm: out of memory"); 221 t->wir = (WIR*)p; 222 t->wir_cap = nc; 223 } 224 WIR* w = &t->wir[t->nwir++]; 225 memset(w, 0, sizeof *w); 226 return w; 227 } 228 229 /* Operand-kind encoding stored in WIR's imm_kind / imm_kind_b. */ 230 enum { 231 WOP_REG = 0, 232 WOP_IMM = 1, 233 WOP_LOCAL = 2, 234 WOP_WASM_LOCAL = 3, 235 WOP_ADDR = 4 236 }; 237 238 static void wir_capture_operand(WIR* w, int which, Operand op) { 239 u32 kind; 240 i64 ival; 241 Reg r = REG_NONE; 242 switch (op.kind) { 243 case OPK_REG: 244 kind = WOP_REG; 245 r = op.v.reg; 246 ival = 0; 247 break; 248 case OPK_IMM: 249 kind = WOP_IMM; 250 ival = op.v.imm; 251 break; 252 case OPK_LOCAL: 253 kind = WOP_LOCAL; 254 ival = (i64)op.v.frame_slot; 255 break; 256 default: 257 kind = 99u; 258 ival = 0; 259 break; 260 } 261 if (which == 0) { 262 w->imm_kind = kind; 263 w->imm_a = ival; 264 w->a = r; 265 } else { 266 w->imm_kind_b = kind; 267 w->imm_b = ival; 268 w->b = r; 269 } 270 } 271 272 /* ----------------------------------------------------------------- 273 * Labels 274 * ----------------------------------------------------------------- */ 275 276 Label wasm_label_new(CGTarget* tg) { 277 WTarget* t = (WTarget*)tg; 278 Heap* h = t->c->ctx->heap; 279 if (t->nlabels == t->labels_cap) { 280 u32 nc = t->labels_cap ? t->labels_cap * 2u : 16u; 281 void* p = h->realloc(h, t->labels, sizeof(WLabel) * t->labels_cap, 282 sizeof(WLabel) * nc, _Alignof(WLabel)); 283 if (!p) wfail(t, "wasm: out of memory"); 284 t->labels = (WLabel*)p; 285 t->labels_cap = nc; 286 } 287 u32 id = t->nlabels++; 288 memset(&t->labels[id], 0, sizeof t->labels[id]); 289 return (Label)(id + 1u); 290 } 291 292 static WLabel* lookup_label(WTarget* t, Label l) { 293 if (l == LABEL_NONE || l - 1u >= t->nlabels) return NULL; 294 return &t->labels[l - 1u]; 295 } 296 297 void wasm_label_place(CGTarget* tg, Label l) { 298 WTarget* t = (WTarget*)tg; 299 WLabel* lbl = lookup_label(t, l); 300 if (!lbl) wfail(t, "wasm: label_place on unknown label"); 301 /* If this label is registered to a scope, the scope ops drive the wasm 302 * structure — placement here is a no-op. */ 303 if (lbl->kind == WLBL_SCOPE_BREAK || lbl->kind == WLBL_SCOPE_CONT) { 304 lbl->placed = 1; 305 t->dead = 0; 306 return; 307 } 308 /* Free-standing label placement: record but emit nothing. The CG layer 309 * sometimes places scope continue/break labels just before/after a 310 * scope_begin/end pair; for wasm the structured scope ops drive the 311 * `br N` targets, so these placements are no-ops. A subsequent jump or 312 * cmp_branch that lands on an unbound label will diagnose. */ 313 lbl->placed = 1; 314 lbl->wir_index = t->nwir; 315 if (lbl->kind == WLBL_UNBOUND) lbl->kind = WLBL_FORWARD; 316 WIR* w = wir_push(t); 317 w->op = WIR_LABEL; 318 w->labels[0] = l; 319 t->dead = 0; 320 } 321 322 void wasm_jump(CGTarget* tg, Label l) { 323 WTarget* t = (WTarget*)tg; 324 if (t->dead) return; 325 WLabel* lbl = lookup_label(t, l); 326 if (!lbl) wfail(t, "wasm: jump to unknown label"); 327 WIR* w = wir_push(t); 328 w->op = WIR_JUMP; 329 w->labels[0] = l; 330 t->dead = 1; 331 } 332 333 void wasm_cmp_branch(CGTarget* tg, CmpOp op, Operand a, Operand b, Label l) { 334 WTarget* t = (WTarget*)tg; 335 if (t->dead) return; 336 if (!lookup_label(t, l)) wfail(t, "wasm: cmp_branch to unknown label"); 337 WIR* w = wir_push(t); 338 w->op = WIR_CMP_BRANCH; 339 w->cgop = (u8)op; 340 wir_capture_operand(w, 0, a); 341 wir_capture_operand(w, 1, b); 342 w->type = a.type ? a.type : b.type; 343 w->labels[0] = l; 344 } 345 346 void wasm_switch(CGTarget* tg, const CGSwitchDesc* d) { 347 WTarget* t = (WTarget*)tg; 348 WIR* w; 349 if (t->dead) return; 350 if (!d) wfail(t, "wasm: switch without descriptor"); 351 if (d->default_label == LABEL_NONE) 352 wfail(t, "wasm: switch without default label"); 353 if (d->ncases && !d->cases) wfail(t, "wasm: switch case count without cases"); 354 if (d->selector.kind != OPK_REG && d->selector.kind != OPK_IMM && 355 d->selector.kind != OPK_LOCAL) 356 wfail(t, "wasm: switch selector has unsupported operand kind"); 357 if (!lookup_label(t, d->default_label)) 358 wfail(t, "wasm: switch default label is unknown"); 359 for (u32 i = 0; i < d->ncases; ++i) { 360 if (!lookup_label(t, d->cases[i].label)) 361 wfail(t, "wasm: switch case label is unknown"); 362 } 363 364 w = wir_push(t); 365 w->op = WIR_SWITCH; 366 wir_capture_operand(w, 0, d->selector); 367 w->type = d->selector_type; 368 w->labels[0] = d->default_label; 369 w->switch_ncases = d->ncases; 370 if (d->ncases) { 371 Heap* h = t->c->ctx->heap; 372 w->switch_cases = (CGSwitchCase*)h->alloc( 373 h, sizeof(CGSwitchCase) * d->ncases, _Alignof(CGSwitchCase)); 374 if (!w->switch_cases) wfail(t, "wasm: out of memory"); 375 memcpy(w->switch_cases, d->cases, sizeof(CGSwitchCase) * d->ncases); 376 } 377 t->dead = 1; 378 } 379 380 /* ----------------------------------------------------------------- 381 * Scopes 382 * ----------------------------------------------------------------- */ 383 384 CGScope wasm_scope_begin(CGTarget* tg, const CGScopeDesc* d) { 385 WTarget* t = (WTarget*)tg; 386 if (t->nscopes >= 32u) wfail(t, "wasm: too many nested scopes (max 32)"); 387 WScope* s = &t->scopes[t->nscopes]; 388 memset(s, 0, sizeof *s); 389 s->id = ++t->next_scope_id; 390 s->cg_kind = d->kind; 391 s->break_lbl = d->break_label; 392 s->cont_lbl = d->continue_label; 393 s->result_type = d->result_type; 394 395 /* Wire scope's break/continue labels to this scope so jump()/cmp_branch() 396 * to them can lower to wasm `br`. */ 397 if (d->break_label != LABEL_NONE) { 398 WLabel* lbl = lookup_label(t, d->break_label); 399 if (lbl) { 400 lbl->kind = WLBL_SCOPE_BREAK; 401 lbl->scope_id = s->id; 402 } 403 } 404 if (d->continue_label != LABEL_NONE) { 405 WLabel* lbl = lookup_label(t, d->continue_label); 406 if (lbl) { 407 lbl->kind = WLBL_SCOPE_CONT; 408 lbl->scope_id = s->id; 409 } 410 } 411 412 WIR* open = wir_push(t); 413 open->op = WIR_SCOPE_OPEN; 414 open->scope_id = s->id; 415 open->cgop = d->kind; 416 open->dst = REG_NONE; 417 418 s->placed_in_wir = 1; 419 t->nscopes++; 420 return (CGScope)s->id; 421 } 422 423 static WScope* scope_by_id(WTarget* t, u32 id) { 424 for (u32 i = 0; i < t->nscopes; ++i) { 425 if (t->scopes[i].id == id) return &t->scopes[i]; 426 } 427 return NULL; 428 } 429 430 void wasm_scope_end(CGTarget* tg, CGScope sc) { 431 WTarget* t = (WTarget*)tg; 432 WScope* s = scope_by_id(t, (u32)sc); 433 if (!s) wfail(t, "wasm: scope_end on unknown scope"); 434 WIR* w = wir_push(t); 435 w->op = WIR_SCOPE_CLOSE; 436 w->scope_id = s->id; 437 /* Pop the scope from the stack. CG always closes in LIFO order. */ 438 if (t->nscopes == 0 || t->scopes[t->nscopes - 1u].id != s->id) 439 wfail(t, "wasm: scope_end out of LIFO order"); 440 t->nscopes--; 441 t->dead = 0; 442 } 443 444 void wasm_break_to(CGTarget* tg, CGScope sc) { 445 WTarget* t = (WTarget*)tg; 446 if (t->dead) return; 447 WScope* s = scope_by_id(t, (u32)sc); 448 if (!s) wfail(t, "wasm: break_to unknown scope"); 449 WIR* w = wir_push(t); 450 w->op = WIR_JUMP; 451 w->labels[0] = s->break_lbl; 452 t->dead = 1; 453 } 454 455 void wasm_continue_to(CGTarget* tg, CGScope sc) { 456 WTarget* t = (WTarget*)tg; 457 if (t->dead) return; 458 WScope* s = scope_by_id(t, (u32)sc); 459 if (!s) wfail(t, "wasm: continue_to unknown scope"); 460 WIR* w = wir_push(t); 461 w->op = WIR_JUMP; 462 w->labels[0] = s->cont_lbl; 463 t->dead = 1; 464 } 465 466 /* ----------------------------------------------------------------- 467 * Function lifecycle 468 * ----------------------------------------------------------------- */ 469 470 /* Forward decl — promotes an undefined function symbol's WasmFunc to an 471 * import using the supplied ABI to build the wasm signature. */ 472 static void promote_import_func(WTarget* t, ObjSymId sym, WasmFunc* f, 473 const ABIFuncInfo* abi); 474 475 /* Lookup or allocate a Wasm function index for an ObjSymId. Returns 476 * (wasm_func_idx, *out_func) on success. */ 477 static u32 sym_to_wasm_func(WTarget* t, ObjSymId sym, WasmFunc** out_func) { 478 Heap* h = t->c->ctx->heap; 479 if (sym >= t->sym_to_func_cap) { 480 u32 nc = t->sym_to_func_cap ? t->sym_to_func_cap : 16u; 481 while (nc <= sym) nc *= 2u; 482 u32* p = 483 (u32*)h->realloc(h, t->sym_to_func, sizeof(u32) * t->sym_to_func_cap, 484 sizeof(u32) * nc, _Alignof(u32)); 485 if (!p) wfail(t, "wasm: out of memory"); 486 for (u32 i = t->sym_to_func_cap; i < nc; ++i) p[i] = 0; 487 t->sym_to_func = p; 488 t->sym_to_func_cap = nc; 489 } 490 if (t->sym_to_func[sym]) { 491 u32 idx = t->sym_to_func[sym] - 1u; 492 if (out_func) *out_func = &t->module->funcs[idx]; 493 return idx; 494 } 495 /* Create a fresh WasmFunc and link. */ 496 WasmFunc* f = wasm_add_func(t->c, t->module); 497 u32 idx = t->module->nfuncs - 1u; 498 t->sym_to_func[sym] = idx + 1u; 499 if (out_func) *out_func = f; 500 return idx; 501 } 502 503 static WSlot* slot_push(WTarget* t) { 504 Heap* h = t->c->ctx->heap; 505 if (t->nslots == t->slots_cap) { 506 u32 nc = t->slots_cap ? t->slots_cap * 2u : 16u; 507 WSlot* ns = (WSlot*)h->realloc(h, t->slots, sizeof(WSlot) * t->slots_cap, 508 sizeof(WSlot) * nc, _Alignof(WSlot)); 509 if (!ns) wfail(t, "wasm: out of memory"); 510 t->slots = ns; 511 t->slots_cap = nc; 512 } 513 WSlot* s = &t->slots[t->nslots++]; 514 memset(s, 0, sizeof *s); 515 return s; 516 } 517 518 /* True iff `ty` maps to a single wasm value type (i32/i64/f32/f64) and so can 519 * live directly in a wasm local. Aggregates (records/arrays) and anything 520 * wider than 8 bytes must be homed in linear memory. */ 521 static int type_is_wasm_scalar(WTarget* t, KitCgTypeId ty) { 522 ABITypeInfo ti; 523 if (!ty) return 0; 524 ti = abi_cg_type_info(t->c->abi, ty); 525 if (ti.scalar_kind == ABI_SC_VOID) return 0; 526 if (ti.scalar_kind == ABI_SC_PTR) return 1; 527 return ti.size <= 8u; 528 } 529 530 static FrameSlot alloc_frame_slot_kind(WTarget* t, KitCgTypeId ty, u32 size, 531 u32 align, int stack_backed) { 532 WSlot* s; 533 u32 slot_id; 534 type_size_align(t, ty, size, align, &size, &align); 535 /* A non-scalar type has no wasm value type; force it into linear memory. */ 536 if (!stack_backed && !type_is_wasm_scalar(t, ty)) stack_backed = 1; 537 s = slot_push(t); 538 s->type = ty; 539 s->size = size; 540 s->align = align; 541 if (stack_backed) { 542 ensure_stack_pointer(t); 543 t->frame_size = align_to_u32(t->frame_size, align); 544 s->kind = W_SLOT_STACK; 545 s->frame_offset = t->frame_size; 546 t->frame_size += size; 547 if (align > t->frame_align) t->frame_align = align; 548 t->has_stack_frame = 1; 549 } else { 550 s->kind = W_SLOT_LOCAL; 551 s->wasm_local = add_wasm_local(t, valtype_for_type(t, ty)); 552 } 553 slot_id = t->nslots; 554 return (FrameSlot)slot_id; 555 } 556 557 static WSlot* slot_for(WTarget* t, FrameSlot fs) { 558 if (fs == FRAME_SLOT_NONE) wfail(t, "wasm: FRAME_SLOT_NONE used"); 559 u32 idx = fs - 1u; 560 if (idx >= t->nslots) wfail(t, "wasm: bad frame slot id"); 561 return &t->slots[idx]; 562 } 563 564 static void promote_slot_to_stack(WTarget* t, WSlot* s) { 565 if (s->kind == W_SLOT_STACK) return; 566 ensure_stack_pointer(t); 567 t->frame_size = align_to_u32(t->frame_size, s->align ? s->align : 1u); 568 s->frame_offset = t->frame_size; 569 t->frame_size += s->size ? s->size : 1u; 570 if (s->align > t->frame_align) t->frame_align = s->align; 571 s->kind = W_SLOT_STACK; 572 t->has_stack_frame = 1; 573 } 574 575 void wasm_func_begin(CGTarget* tg, const CGFuncDesc* d) { 576 WTarget* t = (WTarget*)tg; 577 WasmFunc* f; 578 u32 idx; 579 const CgType* fnty; 580 const ABIFuncInfo* abi; 581 Heap* h = t->c->ctx->heap; 582 583 ensure_module(t); 584 t->cur_fn_desc = d; 585 memset(&t->cur_stmt_loc, 0, sizeof t->cur_stmt_loc); 586 t->nwir = 0; 587 t->nlabels = 0; 588 t->nslots = 0; 589 t->nscopes = 0; 590 t->next_scope_id = 0; 591 t->frame_size = 0; 592 t->frame_align = 1; 593 t->frame_base_local = 0xffffffffu; 594 t->frame_saved_sp_local = 0xffffffffu; 595 t->has_stack_frame = 0; 596 t->dead = 0; 597 t->sret_param_local = 0xffffffffu; 598 t->va_ptr_param_local = 0xffffffffu; 599 t->cur_has_sret = 0; 600 t->cur_is_variadic = 0; 601 t->varcall_saved_sp_local = 0xffffffffu; 602 t->varcall_buf_local = 0xffffffffu; 603 t->va_arg_tmp_addr_local = 0xffffffffu; 604 t->nparams_cg = 0; 605 t->nbyval_copies = 0; 606 /* Wipe reg map. */ 607 for (u32 i = 0; i < t->reg_cap; ++i) t->reg_to_local[i] = 0xffffffffu; 608 609 idx = sym_to_wasm_func(t, d->sym, &f); 610 t->cur_func_idx = idx; 611 t->cur_func = f; 612 613 fnty = cg_type_get(t->c, d->fn_type); 614 if (!fnty || fnty->kind != KIT_CG_TYPE_FUNC) 615 wfail(t, "wasm: func_begin without function type"); 616 abi = d->abi; 617 if (!abi) wfail(t, "wasm: func_begin with no ABI info"); 618 619 /* Build the wasm function's param layout: 620 * [sret_ptr]? [param_0] [param_1] ... 621 * with IGNORE'd CG params dropped. Record per-CG-param the wasm-local 622 * index so wasm_param can place each frame slot on the right local. 623 * params/locals/local_names grow on demand — no fixed cap. */ 624 f->nparams = 0; 625 if (abi->has_sret) { 626 t->sret_param_local = 627 wasm_func_push_param(t->c, t->module, f, WASM_VAL_I32); 628 t->cur_has_sret = 1; 629 ensure_linear_memory(t); 630 } 631 if (fnty->func.nparams > t->param_local_idx_cap) { 632 u32 nc = t->param_local_idx_cap ? t->param_local_idx_cap : 4u; 633 while (nc < fnty->func.nparams) nc *= 2u; 634 u32* p = (u32*)h->realloc(h, t->param_local_idx, 635 sizeof(u32) * t->param_local_idx_cap, 636 sizeof(u32) * nc, _Alignof(u32)); 637 if (!p) wfail(t, "wasm: out of memory"); 638 t->param_local_idx = p; 639 t->param_local_idx_cap = nc; 640 } 641 t->nparams_cg = fnty->func.nparams; 642 for (u32 i = 0; i < fnty->func.nparams; ++i) { 643 const ABIArgInfo* ai = &abi->params[i]; 644 if (ai->kind == ABI_ARG_IGNORE) { 645 t->param_local_idx[i] = 0xffffffffu; 646 continue; 647 } 648 if (ai->kind == ABI_ARG_INDIRECT) { 649 t->param_local_idx[i] = 650 wasm_func_push_param(t->c, t->module, f, WASM_VAL_I32); 651 ensure_linear_memory(t); 652 } else { 653 if (ai->nparts != 1) 654 wfail(t, "wasm: multi-part DIRECT param %u not yet implemented", i); 655 const ABIArgPart* p = &ai->parts[0]; 656 WasmValType vt = valtype_for_size_kind( 657 t, p->size, p->cls == ABI_CLASS_FP ? ABI_SC_FLOAT : ABI_SC_INT); 658 t->param_local_idx[i] = wasm_func_push_param(t->c, t->module, f, vt); 659 } 660 } 661 /* Variadic: append hidden i32 va_ptr trailing param. Must match 662 * abi_to_wasm_func_type so indirect calls' signature interning agrees. */ 663 if (abi->variadic) { 664 t->va_ptr_param_local = 665 wasm_func_push_param(t->c, t->module, f, WASM_VAL_I32); 666 t->cur_is_variadic = 1; 667 ensure_linear_memory(t); 668 } 669 f->nresults = 0; 670 if (!abi->has_sret && abi->ret.kind == ABI_ARG_DIRECT && 671 abi->ret.nparts == 1) { 672 const ABIArgPart* p = &abi->ret.parts[0]; 673 f->results[0] = valtype_for_size_kind( 674 t, p->size, p->cls == ABI_CLASS_FP ? ABI_SC_FLOAT : ABI_SC_INT); 675 f->nresults = 1; 676 } 677 f->typeidx = wasm_intern_func_type(t->c, t->module, f); 678 679 /* Export under the symbol's name when the symbol is globally bound. */ 680 const ObjSym* sym = obj_symbol_get(t->obj, d->sym); 681 if (sym && sym->bind != SB_LOCAL) { 682 const char* name = pool_sym_cstr(t->c->global, sym->name, NULL); 683 if (name && *name) { 684 Heap* h = t->c->ctx->heap; 685 size_t nlen = strlen(name); 686 char* dup = (char*)h->alloc(h, nlen + 1u, 1); 687 memcpy(dup, name, nlen + 1u); 688 f->export_name = dup; 689 WasmExport* e = wasm_add_export(t->c, t->module); 690 char* exp_name = (char*)h->alloc(h, nlen + 1u, 1); 691 memcpy(exp_name, name, nlen + 1u); 692 e->name = exp_name; 693 e->kind = 0; /* function export */ 694 e->index = idx; 695 } 696 } 697 } 698 699 CGLocalStorage wasm_param(CGTarget* tg, const CGParamDesc* d) { 700 WTarget* t = (WTarget*)tg; 701 CGLocalStorage ls; 702 Heap* h = t->c->ctx->heap; 703 u32 wli; 704 WSlot* s; 705 memset(&ls, 0, sizeof ls); 706 if (d->index >= t->nparams_cg) 707 wfail(t, "wasm: param index %u out of range (nparams=%u)", d->index, 708 t->nparams_cg); 709 wli = t->param_local_idx[d->index]; 710 if (wli == 0xffffffffu) { 711 /* ABI_ARG_IGNORE — no wasm storage. Push a placeholder slot so the 712 * returned FrameSlot is meaningful to CG; it never gets emitted. */ 713 s = slot_push(t); 714 s->type = d->type; 715 s->size = d->size; 716 s->align = d->align ? d->align : 1u; 717 s->kind = W_SLOT_LOCAL; 718 s->wasm_local = 0; 719 ls.kind = CG_LOCAL_STORAGE_FRAME; 720 ls.v.frame_slot = (FrameSlot)t->nslots; 721 return ls; 722 } 723 if (d->abi && d->abi->kind == ABI_ARG_INDIRECT) { 724 /* byval: callee receives an i32 pointer; copy the aggregate into a 725 * caller-isolated stack-backed slot at function entry. */ 726 u32 size = d->size; 727 u32 align = d->align ? d->align : 1u; 728 type_size_align(t, d->type, size, align, &size, &align); 729 ensure_stack_pointer(t); 730 s = slot_push(t); 731 s->type = d->type; 732 s->size = size; 733 s->align = align; 734 s->kind = W_SLOT_STACK; 735 t->frame_size = align_to_u32(t->frame_size, align); 736 s->frame_offset = t->frame_size; 737 t->frame_size += size; 738 if (align > t->frame_align) t->frame_align = align; 739 t->has_stack_frame = 1; 740 /* Queue prologue copy-in from the pointer's wasm-local into &slot. */ 741 if (t->nbyval_copies == t->byval_copies_cap) { 742 u32 nc = t->byval_copies_cap ? t->byval_copies_cap * 2u : 4u; 743 WByvalCopy* nb = (WByvalCopy*)h->realloc( 744 h, t->byval_copies, sizeof(WByvalCopy) * t->byval_copies_cap, 745 sizeof(WByvalCopy) * nc, _Alignof(WByvalCopy)); 746 if (!nb) wfail(t, "wasm: out of memory"); 747 t->byval_copies = nb; 748 t->byval_copies_cap = nc; 749 } 750 WByvalCopy* bc = &t->byval_copies[t->nbyval_copies++]; 751 bc->ptr_wasm_local = wli; 752 bc->dst_slot_id = t->nslots - 1u; 753 ls.kind = CG_LOCAL_STORAGE_FRAME; 754 ls.v.frame_slot = (FrameSlot)t->nslots; 755 return ls; 756 } 757 if (d->flags & CG_LOCAL_ADDR_TAKEN) { 758 wfail(t, "wasm: address-taken parameter not yet implemented"); 759 } 760 s = slot_push(t); 761 s->type = d->type; 762 s->size = d->size; 763 s->align = d->align ? d->align : 1u; 764 s->kind = W_SLOT_LOCAL; 765 s->wasm_local = wli; 766 ls.kind = CG_LOCAL_STORAGE_FRAME; 767 ls.v.frame_slot = (FrameSlot)t->nslots; 768 return ls; 769 } 770 771 /* Allocate a frame slot backed by a fresh wasm local. */ 772 static FrameSlot alloc_frame_slot(WTarget* t, KitCgTypeId ty) { 773 return alloc_frame_slot_kind(t, ty, 0, 0, 0); 774 } 775 776 FrameSlot wasm_frame_slot(CGTarget* tg, const FrameSlotDesc* d) { 777 WTarget* t = (WTarget*)tg; 778 if (!d->type && !d->size) wfail(t, "wasm: frame slot without type/size"); 779 return alloc_frame_slot_kind( 780 t, d->type, d->size, d->align, 781 (d->flags & FSF_ADDR_TAKEN) != 0 || d->kind == FS_ALLOCA); 782 } 783 784 CGLocalStorage wasm_local(CGTarget* tg, const CGLocalDesc* d) { 785 WTarget* t = (WTarget*)tg; 786 CGLocalStorage ls; 787 memset(&ls, 0, sizeof ls); 788 if (d->flags & (CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED)) { 789 ls.kind = CG_LOCAL_STORAGE_FRAME; 790 ls.v.frame_slot = alloc_frame_slot_kind(t, d->type, d->size, d->align, 1); 791 return ls; 792 } 793 ls.kind = CG_LOCAL_STORAGE_FRAME; 794 ls.v.frame_slot = alloc_frame_slot(t, d->type); 795 return ls; 796 } 797 798 /* ----------------------------------------------------------------- 799 * Data-movement records 800 * ----------------------------------------------------------------- */ 801 802 void wasm_load_imm(CGTarget* tg, Operand dst, i64 imm) { 803 WTarget* t = (WTarget*)tg; 804 if (t->dead) return; 805 if (dst.kind != OPK_REG) wfail(t, "wasm: load_imm dst must be REG"); 806 WIR* w = wir_push(t); 807 w->op = WIR_LOAD_IMM; 808 w->dst = dst.v.reg; 809 w->imm = imm; 810 w->type = dst.type; 811 w->cls = dst.cls; 812 } 813 814 void wasm_load_const(CGTarget* tg, Operand dst, ConstBytes cb) { 815 WTarget* t = (WTarget*)tg; 816 if (t->dead) return; 817 if (dst.kind != OPK_REG) wfail(t, "wasm: load_const dst must be REG"); 818 WasmValType vt = valtype_for_type(t, cb.type); 819 WIR* w = wir_push(t); 820 w->dst = dst.v.reg; 821 w->type = cb.type; 822 w->cls = dst.cls; 823 if (vt == WASM_VAL_F32) { 824 if (cb.size != 4) wfail(t, "wasm: f32 const must be 4 bytes"); 825 float f; 826 memcpy(&f, cb.bytes, 4); 827 w->op = WIR_LOAD_CONST_F; 828 w->fp_imm = (double)f; 829 } else if (vt == WASM_VAL_F64) { 830 if (cb.size != 8) wfail(t, "wasm: f64 const must be 8 bytes"); 831 double v; 832 memcpy(&v, cb.bytes, 8); 833 w->op = WIR_LOAD_CONST_F; 834 w->fp_imm = v; 835 } else { 836 i64 v = 0; 837 memcpy(&v, cb.bytes, cb.size < 8 ? cb.size : 8u); 838 /* Sign-extend for small signed types so the immediate has the expected 839 * bit pattern. */ 840 if (cb.size == 1) 841 v = (i64)(i8)v; 842 else if (cb.size == 2) 843 v = (i64)(i16)v; 844 else if (cb.size == 4) 845 v = (i64)(i32)v; 846 w->op = WIR_LOAD_IMM; 847 w->imm = v; 848 } 849 } 850 851 void wasm_copy(CGTarget* tg, Operand dst, Operand src) { 852 WTarget* t = (WTarget*)tg; 853 if (t->dead) return; 854 if (dst.kind != OPK_REG || src.kind != OPK_REG) 855 wfail(t, "wasm: copy operands must both be REG"); 856 WIR* w = wir_push(t); 857 w->op = WIR_COPY; 858 w->dst = dst.v.reg; 859 w->a = src.v.reg; 860 w->type = dst.type; 861 } 862 863 void wasm_binop(CGTarget* tg, BinOp op, Operand dst, Operand a, Operand b) { 864 WTarget* t = (WTarget*)tg; 865 if (t->dead) return; 866 if (dst.kind != OPK_REG) wfail(t, "wasm: binop dst must be REG"); 867 WIR* w = wir_push(t); 868 w->op = WIR_BINOP; 869 w->cgop = (u8)op; 870 w->dst = dst.v.reg; 871 wir_capture_operand(w, 0, a); 872 wir_capture_operand(w, 1, b); 873 w->type = dst.type; 874 w->cls = dst.cls; 875 } 876 877 void wasm_unop(CGTarget* tg, UnOp op, Operand dst, Operand a) { 878 WTarget* t = (WTarget*)tg; 879 if (t->dead) return; 880 if (dst.kind != OPK_REG) wfail(t, "wasm: unop dst must be REG"); 881 WIR* w = wir_push(t); 882 w->op = WIR_UNOP; 883 w->cgop = (u8)op; 884 w->dst = dst.v.reg; 885 wir_capture_operand(w, 0, a); 886 w->type = dst.type; 887 w->cls = dst.cls; 888 } 889 890 void wasm_cmp(CGTarget* tg, CmpOp op, Operand dst, Operand a, Operand b) { 891 WTarget* t = (WTarget*)tg; 892 if (t->dead) return; 893 if (dst.kind != OPK_REG) wfail(t, "wasm: cmp dst must be REG"); 894 WIR* w = wir_push(t); 895 w->op = WIR_CMP; 896 w->cgop = (u8)op; 897 w->dst = dst.v.reg; 898 wir_capture_operand(w, 0, a); 899 wir_capture_operand(w, 1, b); 900 w->type = dst.type; 901 w->type2 = a.type ? a.type : b.type; 902 w->cls = dst.cls; 903 } 904 905 void wasm_convert(CGTarget* tg, ConvKind ck, Operand dst, Operand src) { 906 WTarget* t = (WTarget*)tg; 907 if (t->dead) return; 908 if (dst.kind != OPK_REG) wfail(t, "wasm: convert dst must be REG"); 909 WIR* w = wir_push(t); 910 w->op = WIR_CONVERT; 911 w->cgop = (u8)ck; 912 w->dst = dst.v.reg; 913 wir_capture_operand(w, 0, src); 914 w->type = dst.type; 915 w->type2 = src.type; 916 w->cls = dst.cls; 917 } 918 919 /* Build (or reuse) the wasm typeidx for a function-typed indirect call. The 920 * signature shape must exactly match a direct call to the same C type: 921 * - hidden i32 sret pointer prepended when ABI has_sret 922 * - per-param: i32 pointer for ABI_ARG_INDIRECT, else the DIRECT scalar 923 * produced by the wasm32 BasicCABI classifier (IGNORE params dropped) 924 * - result: empty when has_sret, else the DIRECT scalar 925 * 926 * call_indirect's runtime type check compares this typeidx against the 927 * funcref's recorded type, so any mismatch with the direct-call path would 928 * trap. The temporary WasmFunc is stack-allocated; wasm_intern_func_type 929 * copies the param array on insertion. */ 930 /* Translate an ABI function signature into the wasm-level param/result list. 931 * Used both for indirect-call signature interning and for import-function 932 * type synthesis. `what` names the call site in diagnostics. Returns the 933 * interned type index. The caller-provided buffer `params` (length `cap`) is 934 * filled from index 0; *nparams_out is the count written. */ 935 static u32 abi_to_wasm_func_type(WTarget* t, const ABIFuncInfo* abi, 936 WasmValType* params, u32 cap, u32* nparams_out, 937 WasmValType* result_out, u32* nresults_out, 938 const char* what) { 939 WasmFunc tmp; 940 memset(&tmp, 0, sizeof tmp); 941 tmp.params = params; 942 tmp.cap_params = cap; 943 if (abi->has_sret) { 944 if (tmp.nparams >= cap) wfail(t, "wasm: %s has too many params", what); 945 params[tmp.nparams++] = WASM_VAL_I32; 946 } 947 for (u32 i = 0; i < abi->nparams; ++i) { 948 const ABIArgInfo* ai = &abi->params[i]; 949 if (ai->kind == ABI_ARG_IGNORE) continue; 950 if (tmp.nparams >= cap) wfail(t, "wasm: %s has too many params", what); 951 if (ai->kind == ABI_ARG_INDIRECT) { 952 params[tmp.nparams++] = WASM_VAL_I32; 953 } else { 954 if (ai->nparts != 1) 955 wfail(t, "wasm: %s has multi-part DIRECT param (unsupported)", what); 956 const ABIArgPart* p = &ai->parts[0]; 957 params[tmp.nparams++] = valtype_for_size_kind( 958 t, p->size, p->cls == ABI_CLASS_FP ? ABI_SC_FLOAT : ABI_SC_INT); 959 } 960 } 961 /* Variadic functions take one hidden trailing i32 va_ptr — the address of 962 * the caller-packed varargs buffer in linear memory. See wasm_call's 963 * variadic packing and wasm_va_start in this file. */ 964 if (abi->variadic) { 965 if (tmp.nparams >= cap) wfail(t, "wasm: %s has too many params", what); 966 params[tmp.nparams++] = WASM_VAL_I32; 967 } 968 tmp.nresults = 0; 969 if (!abi->has_sret && abi->ret.kind == ABI_ARG_DIRECT && 970 abi->ret.nparts == 1) { 971 const ABIArgPart* p = &abi->ret.parts[0]; 972 tmp.results[0] = valtype_for_size_kind( 973 t, p->size, p->cls == ABI_CLASS_FP ? ABI_SC_FLOAT : ABI_SC_INT); 974 tmp.nresults = 1; 975 } 976 if (nparams_out) *nparams_out = tmp.nparams; 977 if (result_out && tmp.nresults) *result_out = tmp.results[0]; 978 if (nresults_out) *nresults_out = tmp.nresults; 979 return wasm_intern_func_type(t->c, t->module, &tmp); 980 } 981 982 static u32 intern_indirect_signature(WTarget* t, const ABIFuncInfo* abi) { 983 WasmValType params[64]; 984 return abi_to_wasm_func_type(t, abi, params, 64u, NULL, NULL, NULL, 985 "indirect call"); 986 } 987 988 /* Promote `f` (already allocated for `sym` via sym_to_wasm_func) into a wasm 989 * `(import "<module>" "<field>" (func ...))` entry. The signature is 990 * synthesized from the supplied ABIFuncInfo, mirroring the layout the 991 * caller-side WIR_CALL pushes onto the stack: hidden i32 sret-pointer when 992 * has_sret, followed by lowered params, with a single i32/i64/f32/f64 993 * result for direct, non-sret returns. Module/field default to "env" / the 994 * symbol's name; either may be overridden by 995 * `__attribute__((import_module/import_name))`. */ 996 static void promote_import_func(WTarget* t, ObjSymId sym, WasmFunc* f, 997 const ABIFuncInfo* abi) { 998 Heap* h = t->c->ctx->heap; 999 const ObjSym* os; 1000 const char* sym_name; 1001 size_t sym_name_len = 0; 1002 Sym attr_module = 0; 1003 Sym attr_name = 0; 1004 const char* mod_str = "env"; 1005 size_t mod_len = sizeof("env") - 1u; 1006 if (!t->module) return; 1007 if (f->is_import) return; 1008 os = obj_symbol_get(t->obj, sym); 1009 if (!os) return; 1010 if (os->section_id != OBJ_SEC_NONE) return; /* already defined locally */ 1011 if (os->kind != SK_UNDEF && os->kind != SK_FUNC) return; 1012 if (!abi) 1013 wfail(t, 1014 "wasm: cannot synthesize import signature for '%s' " 1015 "(missing ABI info)", 1016 pool_sym_cstr(t->c->global, os->name, NULL)); 1017 if (f->ninsns != 0) 1018 wfail(t, "wasm: cannot promote function with emitted body to import"); 1019 /* Synthesize the wasm type from the ABI. Diagnoses unsupported shapes 1020 * (varargs => multi-part DIRECT or extra parts; by-value aggregates that 1021 * the ABI didn't already lower to ABI_ARG_INDIRECT) by naming the import 1022 * symbol so the error points at the C declaration. */ 1023 WasmValType params[64]; 1024 u32 nparams = 0; 1025 WasmValType result_vt = 0; 1026 u32 nresults = 0; 1027 char what[160]; 1028 sym_name = pool_sym_cstr(t->c->global, os->name, &sym_name_len); 1029 if (!sym_name) sym_name = "(anonymous)"; 1030 /* Snprintf-free: build a short context string by hand to avoid pulling in 1031 * stdio. The buffer is large enough for any plausible C identifier. */ 1032 { 1033 const char* prefix = "import '"; 1034 const char* suffix = "'"; 1035 size_t plen = 8u; /* strlen(prefix) */ 1036 size_t slen = 1u; /* strlen(suffix) */ 1037 size_t nlen = sym_name_len ? sym_name_len : strlen(sym_name); 1038 if (nlen > sizeof(what) - plen - slen - 1u) 1039 nlen = sizeof(what) - plen - slen - 1u; 1040 memcpy(what, prefix, plen); 1041 memcpy(what + plen, sym_name, nlen); 1042 memcpy(what + plen + nlen, suffix, slen); 1043 what[plen + nlen + slen] = 0; 1044 } 1045 f->typeidx = abi_to_wasm_func_type(t, abi, params, 64u, &nparams, &result_vt, 1046 &nresults, what); 1047 f->has_typeidx = 1; 1048 /* Mirror the synthesized params/results onto the WasmFunc so the import 1049 * encoder writes the matching signature. */ 1050 wasm_func_set_params(t->c, t->module, f, params, nparams); 1051 f->nresults = nresults; 1052 if (nresults) f->results[0] = result_vt; 1053 /* Resolve module/name overrides set via __attribute__((import_module/ 1054 * import_name)) on the C declaration. */ 1055 (void)wasm_imports_get(t->obj, os->name, &attr_module, &attr_name); 1056 if (attr_module) mod_str = pool_sym_cstr(t->c->global, attr_module, &mod_len); 1057 const char* name_str = 1058 attr_name ? pool_sym_cstr(t->c->global, attr_name, &sym_name_len) 1059 : sym_name; 1060 size_t name_len = attr_name 1061 ? sym_name_len 1062 : (sym_name_len ? sym_name_len : strlen(name_str)); 1063 f->is_import = 1; 1064 { 1065 char* m = (char*)h->alloc(h, mod_len + 1u, 1); 1066 if (!m) wfail(t, "wasm: out of memory"); 1067 memcpy(m, mod_str, mod_len); 1068 m[mod_len] = 0; 1069 f->import_module = m; 1070 } 1071 { 1072 char* n = (char*)h->alloc(h, name_len + 1u, 1); 1073 if (!n) wfail(t, "wasm: out of memory"); 1074 memcpy(n, name_str, name_len); 1075 n[name_len] = 0; 1076 f->import_name = n; 1077 } 1078 } 1079 1080 const char* wasm_tail_call_unrealizable_reason(CGTarget* tg, 1081 const CGCallDesc* d) { 1082 (void)tg; 1083 /* Variadic tail calls are not realizable on wasm: varargs are packed into a 1084 * buffer carved from this function's linear-memory frame, which return_call 1085 * tears down before the callee reads it. sret is realizable — the tail 1086 * forwards the function's own incoming sret pointer (see wasm_call). wasm 1087 * function parameters are wasm locals, so there is no caller stack-arg area 1088 * to overflow. */ 1089 if (d->abi && d->abi->variadic) 1090 return "wasm cannot tail-call a variadic function (its vararg buffer " 1091 "lives in the frame a sibling call tears down)"; 1092 return NULL; 1093 } 1094 1095 void wasm_call(CGTarget* tg, const CGCallDesc* d) { 1096 WTarget* t = (WTarget*)tg; 1097 if (t->dead) return; 1098 int is_indirect = (d->callee.kind != OPK_GLOBAL); 1099 if (is_indirect && d->callee.kind != OPK_REG) 1100 wfail(t, "wasm: indirect call callee must be a register (got opkind %u)", 1101 (unsigned)d->callee.kind); 1102 Heap* h = t->c->ctx->heap; 1103 int callee_has_sret = (d->abi && d->abi->has_sret) ? 1 : 0; 1104 int callee_variadic = (d->abi && d->abi->variadic) ? 1 : 0; 1105 int is_tail = (d->flags & CG_CALL_TAIL) ? 1 : 0; 1106 if (is_tail) { 1107 /* Realizability is decided by CG via wasm_tail_call_unrealizable_reason 1108 * before CG_CALL_TAIL is set: variadic tails are rejected there, and sret 1109 * tails forward the incoming sret pointer (handled in the WIR emit). */ 1110 ensure_module(t); 1111 t->module->features |= WASM_FEATURE_TAIL_CALLS; 1112 } 1113 u32 nfixed = (u32)d->nargs; 1114 u32 nvar = 0u; 1115 if (callee_variadic) { 1116 if (d->nargs < d->abi->nparams) 1117 wfail(t, "wasm: variadic call has fewer args (%u) than fixed params (%u)", 1118 (unsigned)d->nargs, (unsigned)d->abi->nparams); 1119 nfixed = d->abi->nparams; 1120 nvar = (u32)d->nargs - nfixed; 1121 } 1122 WIR* w = wir_push(t); 1123 if (is_indirect) { 1124 if (!d->abi) wfail(t, "wasm: indirect call without ABIFuncInfo"); 1125 ensure_module(t); 1126 w->op = WIR_CALL_INDIRECT; 1127 w->a = d->callee.v.reg; 1128 w->imm = (i64)intern_indirect_signature(t, d->abi); 1129 } else { 1130 w->op = WIR_CALL; 1131 w->call_sym = d->callee.v.global.sym; 1132 /* Direct calls into externally-defined functions become wasm imports. 1133 * Synthesize the import signature now while the ABI is available — the 1134 * WIR emit loop only has the symbol index. The C frontend mints SK_FUNC 1135 * for `extern foo(...)` declarations; the "undefined" signal is 1136 * `section_id == OBJ_SEC_NONE`. SK_UNDEF can appear when a symbol's 1137 * kind hasn't been pinned down yet. */ 1138 { 1139 const ObjSym* os = obj_symbol_get(t->obj, d->callee.v.global.sym); 1140 if (os && os->section_id == OBJ_SEC_NONE && 1141 (os->kind == SK_UNDEF || os->kind == SK_FUNC)) { 1142 WasmFunc* f; 1143 ensure_module(t); 1144 (void)sym_to_wasm_func(t, d->callee.v.global.sym, &f); 1145 if (!f->is_import) 1146 promote_import_func(t, d->callee.v.global.sym, f, d->abi); 1147 } 1148 } 1149 } 1150 w->call_narg = nfixed; 1151 w->type = d->ret.type; 1152 w->call_has_sret = (u8)callee_has_sret; 1153 w->call_variadic = (u8)callee_variadic; 1154 w->call_tail = (u8)is_tail; 1155 w->call_nvar = nvar; 1156 if (callee_variadic) ensure_linear_memory(t); 1157 if (callee_has_sret) { 1158 /* Caller allocated a frame slot for the aggregate result via 1159 * api_alloc_call_ret_storage; pass its address as the prepended i32. */ 1160 w->call_sret_addr = d->ret.storage; 1161 ensure_linear_memory(t); 1162 } 1163 if (nfixed) { 1164 w->call_args = (Reg*)h->alloc(h, sizeof(Reg) * nfixed, _Alignof(Reg)); 1165 w->call_arg_imms = (i64*)h->alloc(h, sizeof(i64) * nfixed, _Alignof(i64)); 1166 w->call_arg_kinds = (u8*)h->alloc(h, nfixed, 1); 1167 w->call_arg_types = (KitCgTypeId*)h->alloc(h, sizeof(KitCgTypeId) * nfixed, 1168 _Alignof(KitCgTypeId)); 1169 w->call_arg_addrs = 1170 (Operand*)h->alloc(h, sizeof(Operand) * nfixed, _Alignof(Operand)); 1171 memset(w->call_arg_addrs, 0, sizeof(Operand) * nfixed); 1172 for (u32 i = 0; i < nfixed; ++i) { 1173 const CGABIValue* av = &d->args[i]; 1174 w->call_arg_types[i] = av->type; 1175 int is_indirect = (av->abi && av->abi->kind == ABI_ARG_INDIRECT); 1176 if (is_indirect) { 1177 if (av->storage.kind != OPK_LOCAL && av->storage.kind != OPK_INDIRECT && 1178 av->storage.kind != OPK_GLOBAL) { 1179 wfail(t, "wasm: byval call arg %u storage kind %u must be an address", 1180 i, (unsigned)av->storage.kind); 1181 } 1182 w->call_arg_kinds[i] = WOP_ADDR; 1183 w->call_args[i] = REG_NONE; 1184 w->call_arg_imms[i] = 0; 1185 w->call_arg_addrs[i] = av->storage; 1186 ensure_linear_memory(t); 1187 } else if (av->storage.kind == OPK_REG) { 1188 w->call_arg_kinds[i] = 0; 1189 w->call_args[i] = av->storage.v.reg; 1190 w->call_arg_imms[i] = 0; 1191 } else if (av->storage.kind == OPK_IMM) { 1192 w->call_arg_kinds[i] = 1; 1193 w->call_args[i] = REG_NONE; 1194 w->call_arg_imms[i] = av->storage.v.imm; 1195 } else { 1196 wfail(t, 1197 "wasm: call arg %u has unsupported operand kind %u (only " 1198 "REG/IMM scalar args are supported in v1)", 1199 i, (unsigned)av->storage.kind); 1200 } 1201 } 1202 } 1203 if (nvar) { 1204 w->call_var_regs = (Reg*)h->alloc(h, sizeof(Reg) * nvar, _Alignof(Reg)); 1205 w->call_var_imms = (i64*)h->alloc(h, sizeof(i64) * nvar, _Alignof(i64)); 1206 w->call_var_kinds = (u8*)h->alloc(h, nvar, 1); 1207 w->call_var_types = (KitCgTypeId*)h->alloc(h, sizeof(KitCgTypeId) * nvar, 1208 _Alignof(KitCgTypeId)); 1209 for (u32 i = 0; i < nvar; ++i) { 1210 const CGABIValue* av = &d->args[nfixed + i]; 1211 const CgType* aty = av->type ? cg_type_get(t->c, av->type) : NULL; 1212 w->call_var_types[i] = av->type; 1213 if (aty && 1214 (aty->kind == KIT_CG_TYPE_RECORD || aty->kind == KIT_CG_TYPE_ARRAY)) { 1215 wfail(t, "wasm target: aggregate variadic arg %u not yet supported", i); 1216 } 1217 if (av->storage.kind == OPK_REG) { 1218 w->call_var_kinds[i] = WOP_REG; 1219 w->call_var_regs[i] = av->storage.v.reg; 1220 w->call_var_imms[i] = 0; 1221 } else if (av->storage.kind == OPK_IMM) { 1222 w->call_var_kinds[i] = WOP_IMM; 1223 w->call_var_regs[i] = REG_NONE; 1224 w->call_var_imms[i] = av->storage.v.imm; 1225 } else { 1226 wfail(t, 1227 "wasm target: variadic arg %u has unsupported operand kind %u " 1228 "(only REG/IMM scalar args supported in v1)", 1229 i, (unsigned)av->storage.kind); 1230 } 1231 } 1232 } 1233 if (callee_has_sret) { 1234 /* The call has no wasm result; the buffer pointed to by the sret arg 1235 * holds the aggregate. */ 1236 w->dst = REG_NONE; 1237 } else if (d->ret.storage.kind == OPK_REG && 1238 d->ret.storage.v.reg != REG_NONE) { 1239 w->dst = d->ret.storage.v.reg; 1240 } else { 1241 w->dst = REG_NONE; 1242 } 1243 } 1244 1245 void wasm_ret(CGTarget* tg, const CGABIValue* v) { 1246 WTarget* t = (WTarget*)tg; 1247 if (t->dead) return; 1248 WIR* w = wir_push(t); 1249 w->op = WIR_RET; 1250 if (t->cur_has_sret && v && v->abi && v->abi->kind == ABI_ARG_INDIRECT) { 1251 /* Aggregate sret return: emit a memcpy from av.storage to the buffer 1252 * pointed to by the hidden sret parameter, then a void return. */ 1253 w->addr = v->storage; 1254 w->type = v->type; 1255 w->agg.size = (u32)abi_cg_sizeof(t->c->abi, v->type); 1256 w->agg.align = 1u; 1257 w->cgop = 1; /* tag: sret copy */ 1258 w->dst = REG_NONE; 1259 } else if (v && v->storage.kind == OPK_REG && v->storage.v.reg != REG_NONE) { 1260 w->dst = v->storage.v.reg; 1261 w->type = v->type; 1262 } else if (v && v->storage.kind == OPK_IMM) { 1263 w->imm_kind = 1; 1264 w->imm_a = v->storage.v.imm; 1265 w->type = v->type; 1266 w->dst = REG_NONE; 1267 } else { 1268 w->dst = REG_NONE; 1269 } 1270 t->dead = 1; 1271 } 1272 1273 void wasm_load(CGTarget* tg, Operand dst, Operand addr, MemAccess mem) { 1274 WTarget* t = (WTarget*)tg; 1275 if (t->dead) return; 1276 if (dst.kind != OPK_REG) wfail(t, "wasm: load dst must be REG"); 1277 if (addr.kind != OPK_LOCAL || 1278 slot_for(t, addr.v.frame_slot)->kind == W_SLOT_STACK) 1279 ensure_linear_memory(t); 1280 WIR* w = wir_push(t); 1281 w->op = (addr.kind == OPK_LOCAL && 1282 slot_for(t, addr.v.frame_slot)->kind == W_SLOT_LOCAL) 1283 ? WIR_LOAD_LOCAL 1284 : WIR_LOAD_MEM; 1285 w->dst = dst.v.reg; 1286 w->addr = addr; 1287 if (addr.kind == OPK_LOCAL) { 1288 WSlot* s = slot_for(t, addr.v.frame_slot); 1289 w->imm = 1290 (w->op == WIR_LOAD_LOCAL) ? (i64)s->wasm_local : (i64)addr.v.frame_slot; 1291 } 1292 w->mem = mem; 1293 w->type = dst.type; 1294 w->cls = dst.cls; 1295 } 1296 1297 void wasm_store(CGTarget* tg, Operand addr, Operand src, MemAccess mem) { 1298 WTarget* t = (WTarget*)tg; 1299 if (t->dead) return; 1300 if (addr.kind != OPK_LOCAL || 1301 slot_for(t, addr.v.frame_slot)->kind == W_SLOT_STACK) 1302 ensure_linear_memory(t); 1303 WIR* w = wir_push(t); 1304 w->op = (addr.kind == OPK_LOCAL && 1305 slot_for(t, addr.v.frame_slot)->kind == W_SLOT_LOCAL) 1306 ? WIR_STORE_LOCAL 1307 : WIR_STORE_MEM; 1308 w->addr = addr; 1309 if (addr.kind == OPK_LOCAL) { 1310 WSlot* s = slot_for(t, addr.v.frame_slot); 1311 w->imm = (w->op == WIR_STORE_LOCAL) ? (i64)s->wasm_local 1312 : (i64)addr.v.frame_slot; 1313 } 1314 wir_capture_operand(w, 0, src); 1315 w->mem = mem; 1316 /* The store's value type is the accessed type. When storing through a 1317 * pointer register, addr.type is the (possibly void) pointer rvalue type, 1318 * not the pointee — so prefer the MemAccess type, which always describes 1319 * the element being written, before falling back to the operands. */ 1320 w->type = mem.type ? mem.type : (addr.type ? addr.type : src.type); 1321 } 1322 1323 /* Variadic CG hooks. va_list on wasm32 is a single i32 pointer into a 1324 * caller-packed buffer of 8-byte slots (see wasm_call's variadic packing). 1325 * va_start writes the hidden va_ptr param into *ap; va_arg loads T from 1326 * *ap and advances *ap by 8; va_end is a no-op; va_copy copies the i32. */ 1327 void wasm_va_start(CGTarget* tg, Operand ap_addr) { 1328 WTarget* t = (WTarget*)tg; 1329 if (t->dead) return; 1330 if (!t->cur_is_variadic || t->va_ptr_param_local == 0xffffffffu) 1331 wfail(t, "wasm: va_start in non-variadic function"); 1332 ensure_linear_memory(t); 1333 WIR* w = wir_push(t); 1334 w->op = WIR_VA_START; 1335 w->addr = ap_addr; 1336 } 1337 1338 void wasm_va_arg(CGTarget* tg, Operand dst, Operand ap_addr, KitCgTypeId type) { 1339 WTarget* t = (WTarget*)tg; 1340 if (t->dead) return; 1341 if (dst.kind != OPK_REG) wfail(t, "wasm: va_arg dst must be REG"); 1342 const CgType* aty = type ? cg_type_get(t->c, type) : NULL; 1343 if (aty && 1344 (aty->kind == KIT_CG_TYPE_RECORD || aty->kind == KIT_CG_TYPE_ARRAY)) { 1345 wfail(t, "wasm target: va_arg of aggregate type not yet supported"); 1346 } 1347 ensure_linear_memory(t); 1348 WIR* w = wir_push(t); 1349 w->op = WIR_VA_ARG; 1350 w->dst = dst.v.reg; 1351 w->addr = ap_addr; 1352 w->type = type; 1353 w->cls = dst.cls; 1354 } 1355 1356 void wasm_va_end(CGTarget* tg, Operand ap_addr) { 1357 WTarget* t = (WTarget*)tg; 1358 (void)ap_addr; 1359 if (t->dead) return; 1360 /* No-op: nothing to release. */ 1361 } 1362 1363 void wasm_va_copy(CGTarget* tg, Operand dst_ap_addr, Operand src_ap_addr) { 1364 WTarget* t = (WTarget*)tg; 1365 if (t->dead) return; 1366 ensure_linear_memory(t); 1367 WIR* w = wir_push(t); 1368 w->op = WIR_VA_COPY; 1369 w->addr = dst_ap_addr; 1370 w->call_sret_addr = src_ap_addr; /* reused slot — see WIR comment */ 1371 } 1372 1373 void wasm_addr_of(CGTarget* tg, Operand dst, Operand lv) { 1374 WTarget* t = (WTarget*)tg; 1375 if (t->dead) return; 1376 if (dst.kind != OPK_REG) wfail(t, "wasm: addr_of dst must be REG"); 1377 if (lv.kind == OPK_LOCAL) { 1378 WSlot* s = slot_for(t, lv.v.frame_slot); 1379 if (s->kind == W_SLOT_LOCAL) { 1380 u32 old_local = s->wasm_local; 1381 promote_slot_to_stack(t, s); 1382 WIR* st = wir_push(t); 1383 st->op = WIR_STORE_MEM; 1384 st->addr = lv; 1385 st->type = s->type; 1386 st->mem.type = s->type; 1387 st->mem.size = s->size; 1388 st->mem.align = s->align; 1389 st->imm_kind = WOP_WASM_LOCAL; 1390 st->imm_a = old_local; 1391 } 1392 } else { 1393 ensure_linear_memory(t); 1394 } 1395 WIR* w = wir_push(t); 1396 w->op = WIR_ADDR_OF; 1397 w->dst = dst.v.reg; 1398 w->addr = lv; 1399 w->type = dst.type; 1400 w->cls = dst.cls; 1401 } 1402 1403 void wasm_alloca(CGTarget* tg, Operand dst, Operand size, u32 align) { 1404 WTarget* t = (WTarget*)tg; 1405 if (t->dead) return; 1406 ensure_linear_memory(t); 1407 if (dst.kind != OPK_REG) wfail(t, "wasm: alloca dst must be REG"); 1408 ensure_stack_pointer(t); 1409 t->has_stack_frame = 1; 1410 WIR* w = wir_push(t); 1411 w->op = WIR_ALLOCA; 1412 w->dst = dst.v.reg; 1413 wir_capture_operand(w, 0, size); 1414 w->type = dst.type; 1415 w->type2 = size.type; 1416 w->cls = dst.cls; 1417 w->imm = align ? align : 16u; 1418 } 1419 1420 void wasm_copy_bytes(CGTarget* tg, Operand dst, Operand src, 1421 AggregateAccess a) { 1422 WTarget* t = (WTarget*)tg; 1423 if (t->dead) return; 1424 ensure_linear_memory(t); 1425 WIR* w = wir_push(t); 1426 w->op = WIR_COPY_BYTES; 1427 w->addr = dst; 1428 wir_capture_operand(w, 0, src); 1429 w->agg = a; 1430 } 1431 1432 void wasm_set_bytes(CGTarget* tg, Operand dst, Operand byte, 1433 AggregateAccess a) { 1434 WTarget* t = (WTarget*)tg; 1435 if (t->dead) return; 1436 WIR* w = wir_push(t); 1437 w->op = WIR_SET_BYTES; 1438 w->addr = dst; 1439 wir_capture_operand(w, 0, byte); 1440 w->agg = a; 1441 } 1442 1443 /* Atomic ops. CG forces `addr` to a REG and accepts reg-or-imm for value 1444 * operands. Wasm only models seq_cst; the KitCgMemOrder argument is captured 1445 * but not encoded — every emitted atomic op is sequentially consistent. The 1446 * caller-provided MemAccess carries the type and natural alignment we need 1447 * for the memarg width. */ 1448 static void atomic_require_addr_reg(WTarget* t, Operand addr, 1449 const char* what) { 1450 if (addr.kind != OPK_REG) 1451 wfail(t, "wasm: %s address must be in a register (got opkind %u)", what, 1452 (unsigned)addr.kind); 1453 } 1454 1455 void wasm_atomic_load(CGTarget* tg, Operand dst, Operand addr, MemAccess mem, 1456 KitCgMemOrder mo) { 1457 WTarget* t = (WTarget*)tg; 1458 (void)mo; 1459 if (t->dead) return; 1460 if (dst.kind != OPK_REG) wfail(t, "wasm: atomic_load dst must be REG"); 1461 atomic_require_addr_reg(t, addr, "atomic_load"); 1462 ensure_shared_memory(t); 1463 WIR* w = wir_push(t); 1464 w->op = WIR_ATOMIC_LOAD; 1465 w->dst = dst.v.reg; 1466 w->a = addr.v.reg; 1467 w->mem = mem; 1468 w->type = dst.type ? dst.type : mem.type; 1469 w->cls = dst.cls; 1470 } 1471 1472 void wasm_atomic_store(CGTarget* tg, Operand addr, Operand src, MemAccess mem, 1473 KitCgMemOrder mo) { 1474 WTarget* t = (WTarget*)tg; 1475 (void)mo; 1476 if (t->dead) return; 1477 atomic_require_addr_reg(t, addr, "atomic_store"); 1478 if (src.kind != OPK_REG && src.kind != OPK_IMM) 1479 wfail(t, "wasm: atomic_store value must be REG or IMM"); 1480 ensure_shared_memory(t); 1481 WIR* w = wir_push(t); 1482 w->op = WIR_ATOMIC_STORE; 1483 w->a = addr.v.reg; 1484 wir_capture_operand(w, 1, src); 1485 w->mem = mem; 1486 w->type = mem.type ? mem.type : src.type; 1487 } 1488 1489 void wasm_atomic_rmw(CGTarget* tg, KitCgAtomicOp op, Operand dst, Operand addr, 1490 Operand val, MemAccess mem, KitCgMemOrder mo) { 1491 WTarget* t = (WTarget*)tg; 1492 (void)mo; 1493 if (t->dead) return; 1494 if (dst.kind != OPK_REG) wfail(t, "wasm: atomic_rmw dst must be REG"); 1495 atomic_require_addr_reg(t, addr, "atomic_rmw"); 1496 if (val.kind != OPK_REG && val.kind != OPK_IMM) 1497 wfail(t, "wasm: atomic_rmw value must be REG or IMM"); 1498 /* KIT_CG_ATOMIC_NAND has no native wasm-threads opcode; the linearizer 1499 * expands it into an atomic cmpxchg retry loop (see WIR_ATOMIC_RMW). */ 1500 ensure_shared_memory(t); 1501 WIR* w = wir_push(t); 1502 w->op = WIR_ATOMIC_RMW; 1503 w->cgop = (u8)op; 1504 w->dst = dst.v.reg; 1505 w->a = addr.v.reg; 1506 wir_capture_operand(w, 1, val); 1507 w->mem = mem; 1508 w->type = dst.type ? dst.type : mem.type; 1509 w->cls = dst.cls; 1510 } 1511 1512 void wasm_atomic_cas(CGTarget* tg, Operand prior, Operand ok, Operand addr, 1513 Operand expected, Operand desired, MemAccess mem, 1514 KitCgMemOrder success, KitCgMemOrder failure) { 1515 WTarget* t = (WTarget*)tg; 1516 (void)success; 1517 (void)failure; 1518 if (t->dead) return; 1519 if (prior.kind != OPK_REG) wfail(t, "wasm: atomic_cas prior must be REG"); 1520 if (ok.kind != OPK_REG) wfail(t, "wasm: atomic_cas ok must be REG"); 1521 atomic_require_addr_reg(t, addr, "atomic_cas"); 1522 if (expected.kind != OPK_REG && expected.kind != OPK_IMM) 1523 wfail(t, "wasm: atomic_cas expected must be REG or IMM"); 1524 if (desired.kind != OPK_REG && desired.kind != OPK_IMM) 1525 wfail(t, "wasm: atomic_cas desired must be REG or IMM"); 1526 ensure_shared_memory(t); 1527 WIR* w = wir_push(t); 1528 w->op = WIR_ATOMIC_CAS; 1529 w->dst = prior.v.reg; 1530 w->dst2 = ok.v.reg; 1531 w->a = addr.v.reg; 1532 wir_capture_operand(w, 1, expected); 1533 /* Capture desired into op_c/imm_kind_c/imm_c (third operand slot). */ 1534 if (desired.kind == OPK_REG) { 1535 w->imm_kind_c = WOP_REG; 1536 w->op_c = desired.v.reg; 1537 } else { 1538 w->imm_kind_c = WOP_IMM; 1539 w->imm_c = desired.v.imm; 1540 } 1541 w->mem = mem; 1542 w->type = prior.type ? prior.type : mem.type; 1543 w->cls = prior.cls; 1544 w->type2 = ok.type; 1545 } 1546 1547 void wasm_fence(CGTarget* tg, KitCgMemOrder mo) { 1548 WTarget* t = (WTarget*)tg; 1549 (void)mo; 1550 if (t->dead) return; 1551 /* Wasm atomic.fence does not require a memory to exist, but in practice it 1552 * is meaningful only inside a module that has shared memory. We don't 1553 * force-create memory here to avoid producing a bogus memory for fence-only 1554 * modules. */ 1555 WIR* w = wir_push(t); 1556 w->op = WIR_FENCE; 1557 } 1558 1559 /* Forward decls: defined further down. */ 1560 static WasmValType type_valtype(WTarget* t, KitCgTypeId ty); 1561 void wasm_emit_unreachable(WTarget* t); 1562 1563 /* Per-intrinsic-name diagnostic text. Used both by the recorder for 1564 * SETJMP/LONGJMP (which we still reject) and for the fallback panic so 1565 * users see "wasm target: __builtin_clz ..." instead of a numeric kind. */ 1566 static const char* intrin_name(IntrinKind k) { 1567 switch (k) { 1568 case INTRIN_NONE: 1569 return "<none>"; 1570 case INTRIN_POPCOUNT: 1571 return "__builtin_popcount"; 1572 case INTRIN_CTZ: 1573 return "__builtin_ctz"; 1574 case INTRIN_CLZ: 1575 return "__builtin_clz"; 1576 case INTRIN_BSWAP: 1577 return "__builtin_bswap"; 1578 case INTRIN_MEMMOVE: 1579 return "memmove"; 1580 case INTRIN_PREFETCH: 1581 return "__builtin_prefetch"; 1582 case INTRIN_ASSUME_ALIGNED: 1583 return "__builtin_assume_aligned"; 1584 case INTRIN_EXPECT: 1585 return "__builtin_expect"; 1586 case INTRIN_TRAP: 1587 return "__builtin_trap"; 1588 case INTRIN_SYSCALL: 1589 return "__kit_syscall"; 1590 case INTRIN_SETJMP: 1591 return "setjmp"; 1592 case INTRIN_LONGJMP: 1593 return "longjmp"; 1594 case INTRIN_SADD_OVERFLOW: 1595 return "__builtin_sadd_overflow"; 1596 case INTRIN_UADD_OVERFLOW: 1597 return "__builtin_uadd_overflow"; 1598 case INTRIN_SSUB_OVERFLOW: 1599 return "__builtin_ssub_overflow"; 1600 case INTRIN_USUB_OVERFLOW: 1601 return "__builtin_usub_overflow"; 1602 case INTRIN_SMUL_OVERFLOW: 1603 return "__builtin_smul_overflow"; 1604 case INTRIN_UMUL_OVERFLOW: 1605 return "__builtin_umul_overflow"; 1606 case INTRIN_CPU_NOP: 1607 return "cpu_nop"; 1608 case INTRIN_CPU_YIELD: 1609 return "cpu_yield"; 1610 case INTRIN_WFI: 1611 return "wfi"; 1612 case INTRIN_WFE: 1613 return "wfe"; 1614 case INTRIN_SEV: 1615 return "sev"; 1616 case INTRIN_ISB: 1617 return "isb"; 1618 case INTRIN_DMB: 1619 return "dmb"; 1620 case INTRIN_DSB: 1621 return "dsb"; 1622 case INTRIN_IRQ_SAVE: 1623 return "irq_save"; 1624 case INTRIN_IRQ_RESTORE: 1625 return "irq_restore"; 1626 case INTRIN_IRQ_ENABLE: 1627 return "irq_enable"; 1628 case INTRIN_IRQ_DISABLE: 1629 return "irq_disable"; 1630 case INTRIN_FRAME_ADDRESS: 1631 return "frame_address"; 1632 case INTRIN_RETURN_ADDRESS: 1633 return "return_address"; 1634 } 1635 return "<unknown>"; 1636 } 1637 1638 void wasm_intrinsic(CGTarget* tg, IntrinKind k, Operand* dst, u32 ndst, 1639 const Operand* args, u32 nargs) { 1640 WTarget* t = (WTarget*)tg; 1641 if (t->dead) return; 1642 1643 switch (k) { 1644 case INTRIN_TRAP: 1645 wasm_emit_unreachable(t); 1646 return; 1647 1648 case INTRIN_PREFETCH: 1649 /* No-op hint. */ 1650 return; 1651 1652 case INTRIN_EXPECT: 1653 case INTRIN_ASSUME_ALIGNED: 1654 /* Pass-through hint: result = first argument. CG always allocates a 1655 * dst reg for these; copy arg[0] there so downstream uses see the 1656 * expected value. CG keeps the first arg as OPK_IMM when it was a 1657 * literal so the constant flows through unchanged. */ 1658 if (ndst == 1 && nargs >= 1) { 1659 if (args[0].kind == OPK_IMM) { 1660 wasm_load_imm(tg, dst[0], args[0].v.imm); 1661 } else { 1662 wasm_copy(tg, dst[0], args[0]); 1663 } 1664 } 1665 return; 1666 1667 case INTRIN_MEMMOVE: { 1668 /* memmove lowers to memory.copy, which is spec-defined to handle overlap 1669 * correctly. CG forces (dst, src) to REG and passes size as OPK_IMM 1670 * (kit_cg_memmove). */ 1671 if (nargs != 3 || args[0].kind != OPK_REG || args[1].kind != OPK_REG) { 1672 compiler_panic(t->c, cur_loc(t), 1673 "wasm target: %s requires register pointers", 1674 intrin_name(k)); 1675 return; 1676 } 1677 if (args[2].kind != OPK_IMM) { 1678 compiler_panic(t->c, cur_loc(t), 1679 "wasm target: %s with non-constant size is not yet " 1680 "supported", 1681 intrin_name(k)); 1682 return; 1683 } 1684 ensure_linear_memory(t); 1685 AggregateAccess a; 1686 memset(&a, 0, sizeof a); 1687 a.size = (u32)args[2].v.imm; 1688 a.align = 1; 1689 WIR* w = wir_push(t); 1690 w->op = WIR_COPY_BYTES; 1691 w->addr = args[0]; 1692 wir_capture_operand(w, 0, args[1]); 1693 w->agg = a; 1694 return; 1695 } 1696 1697 case INTRIN_CLZ: 1698 case INTRIN_CTZ: 1699 case INTRIN_POPCOUNT: 1700 case INTRIN_BSWAP: { 1701 if (ndst != 1 || nargs != 1 || dst[0].kind != OPK_REG || 1702 args[0].kind != OPK_REG) { 1703 compiler_panic(t->c, cur_loc(t), 1704 "wasm target: %s requires single REG operand", 1705 intrin_name(k)); 1706 return; 1707 } 1708 WIR* w = wir_push(t); 1709 w->op = WIR_INTRINSIC; 1710 w->cgop = (u8)k; 1711 w->dst = dst[0].v.reg; 1712 w->a = args[0].v.reg; 1713 w->type = dst[0].type; 1714 /* clz/ctz/popcount return int (i32) but operate at the operand's width 1715 * (e.g. __builtin_ctzl over an i64). The wasm op width must follow the 1716 * operand, with a wrap to the i32 dst afterward. type2 carries it. */ 1717 w->type2 = args[0].type; 1718 w->cls = dst[0].cls; 1719 return; 1720 } 1721 1722 case INTRIN_SADD_OVERFLOW: 1723 case INTRIN_UADD_OVERFLOW: 1724 case INTRIN_SSUB_OVERFLOW: 1725 case INTRIN_USUB_OVERFLOW: 1726 case INTRIN_SMUL_OVERFLOW: 1727 case INTRIN_UMUL_OVERFLOW: { 1728 if (ndst != 2 || nargs != 2 || dst[0].kind != OPK_REG || 1729 dst[1].kind != OPK_REG) { 1730 compiler_panic(t->c, cur_loc(t), 1731 "wasm target: %s requires 2 args + 2 result regs", 1732 intrin_name(k)); 1733 return; 1734 } 1735 /* Reject i64 mul-overflow for now: wasm core has no widening 64x64 1736 * multiply, so the standard expansion would need partial-product 1737 * synthesis. 32-bit (the common shape on wasm32) is supported. */ 1738 WasmValType vt = type_valtype(t, dst[0].type); 1739 if (vt == WASM_VAL_I64 && 1740 (k == INTRIN_SMUL_OVERFLOW || k == INTRIN_UMUL_OVERFLOW)) { 1741 compiler_panic(t->c, cur_loc(t), 1742 "wasm target: 64-bit checked-overflow multiply is " 1743 "not yet supported"); 1744 return; 1745 } 1746 WIR* w = wir_push(t); 1747 w->op = WIR_INTRINSIC; 1748 w->cgop = (u8)k; 1749 w->dst = dst[0].v.reg; 1750 w->dst2 = dst[1].v.reg; 1751 w->type = dst[0].type; 1752 w->cls = dst[0].cls; 1753 wir_capture_operand(w, 0, args[0]); 1754 wir_capture_operand(w, 1, args[1]); 1755 return; 1756 } 1757 1758 case INTRIN_SETJMP: 1759 case INTRIN_LONGJMP: 1760 compiler_panic(t->c, cur_loc(t), 1761 "wasm target: %s is not yet supported (no exception/" 1762 "stack-unwind runtime)", 1763 intrin_name(k)); 1764 return; 1765 1766 /* Baremetal/CPU-control intrinsics have no wasm lowering; 1767 * kit_cg_target_supports_intrinsic reports them false so frontends 1768 * diagnose before reaching here. Fall through to the generic panic. */ 1769 case INTRIN_CPU_NOP: 1770 case INTRIN_CPU_YIELD: 1771 case INTRIN_WFI: 1772 case INTRIN_WFE: 1773 case INTRIN_SEV: 1774 case INTRIN_ISB: 1775 case INTRIN_DMB: 1776 case INTRIN_DSB: 1777 case INTRIN_IRQ_SAVE: 1778 case INTRIN_IRQ_RESTORE: 1779 case INTRIN_IRQ_ENABLE: 1780 case INTRIN_IRQ_DISABLE: 1781 case INTRIN_SYSCALL: 1782 /* No frame-pointer chain in wasm; reported unsupported up front. */ 1783 case INTRIN_FRAME_ADDRESS: 1784 case INTRIN_RETURN_ADDRESS: 1785 case INTRIN_NONE: 1786 break; 1787 } 1788 compiler_panic(t->c, cur_loc(t), 1789 "wasm target: intrinsic %s not yet implemented", 1790 intrin_name(k)); 1791 } 1792 1793 /* Inline asm v1 — see doc/WASM.md "Inline asm" for the surface contract. 1794 * 1795 * Template syntax: WAT instruction sequence. Inputs pre-pushed to the value 1796 * stack (via local.get on synthetic input locals); outputs popped from the 1797 * stack into synthetic output locals after the body. The snippet's 1798 * local.get/set/tee with index < nin refers to the i-th input. 1799 * 1800 * Constraints: "r" → wasm local; "i" → const-folded; "m" → i32 address. 1801 * Numeric tie-back constraints ("0","1",...) reuse the referenced output's 1802 * slot (cg/asm.c expands them into duplicate input entries at the end). 1803 * 1804 * Disallowed in v1: escaping br/br_if/br_table, return/return_call*, 1805 * call_indirect, snippet-internal locals, output count > 1, register 1806 * clobbers (only `memory` is accepted). */ 1807 static WasmValType wasm_asm_operand_vt(WTarget* t, KitCgTypeId ty, 1808 const char* what, SrcLoc loc) { 1809 WasmValType vt; 1810 if (!ty) wfail_at(t, loc, "wasm target: asm %s with no CG type", what); 1811 vt = valtype_for_type(t, ty); 1812 if (vt != WASM_VAL_I32 && vt != WASM_VAL_I64 && vt != WASM_VAL_F32 && 1813 vt != WASM_VAL_F64) 1814 wfail_at(t, loc, "wasm target: asm %s of non-scalar type not supported", 1815 what); 1816 return vt; 1817 } 1818 1819 void wasm_asm_block(CGTarget* tg, const char* tmpl, const AsmConstraint* outs, 1820 u32 nout, Operand* out_ops, const AsmConstraint* ins, 1821 u32 nin, const Operand* in_ops, const Sym* clob, 1822 u32 nclob) { 1823 WTarget* t = (WTarget*)tg; 1824 Heap* h = t->c->ctx->heap; 1825 Sym sym_memory; 1826 WasmFunc scratch; 1827 SrcLoc loc = cur_loc(t); 1828 u32 depth; 1829 u32 i; 1830 1831 if (t->dead) return; 1832 1833 /* Clobber policy: only `memory` is meaningful on wasm (effective no-op 1834 * because cg/asm.c already spilled live SSA values). Reject named-register 1835 * clobbers explicitly. */ 1836 sym_memory = pool_intern_slice(t->c->global, SLICE_LIT("memory")); 1837 for (i = 0; i < nclob; ++i) { 1838 if (clob[i] != sym_memory) 1839 wfail_at(t, loc, "wasm target: asm register clobbers not yet supported"); 1840 } 1841 for (i = 0; i < nout; ++i) { 1842 if (outs[i].reg) 1843 wfail_at(t, loc, "wasm target: asm hard-register operands not supported"); 1844 } 1845 for (i = 0; i < nin; ++i) { 1846 if (ins[i].reg) 1847 wfail_at(t, loc, "wasm target: asm hard-register operands not supported"); 1848 } 1849 1850 /* Build a scratch WasmFunc with the synthetic signature. Layout is: 1851 * params = input types (indices 0 .. nin-1) 1852 * locals = output types (indices nin .. nin+nout-1) 1853 * results = empty 1854 * Snippets use local.get/set/tee N to read/write either side. The author 1855 * is responsible for writing each output via local.set N (>= nin); empty 1856 * snippets paired with `+r` / numeric tieback constraints get identity 1857 * behavior because the input and output share a wasm local at lowering. */ 1858 memset(&scratch, 0, sizeof scratch); 1859 for (i = 0; i < nin; ++i) { 1860 WasmValType vt = wasm_asm_operand_vt(t, ins[i].type, "input operand", loc); 1861 /* Constraint-specific checks: "i" requires an immediate operand; "m" 1862 * requires an indirect (i32 address). */ 1863 if (ins[i].str && ins[i].str[0] == 'i' && in_ops[i].kind != OPK_IMM) 1864 wfail_at(t, loc, "wasm target: asm 'i' input must be an immediate"); 1865 if (ins[i].str && ins[i].str[0] == 'm') { 1866 if (in_ops[i].kind != OPK_INDIRECT) 1867 wfail_at(t, loc, "wasm target: asm 'm' input must be indirect"); 1868 vt = WASM_VAL_I32; 1869 } 1870 wasm_func_push_param(t->c, t->module, &scratch, vt); 1871 } 1872 for (i = 0; i < nout; ++i) { 1873 WasmValType vt = 1874 wasm_asm_operand_vt(t, outs[i].type, "output operand", loc); 1875 wasm_func_push_local(t->c, t->module, &scratch, vt); 1876 } 1877 /* No declared result — outputs are read from locals at the end of the 1878 * lowering, not popped from the value stack. */ 1879 1880 /* Parse the template into scratch.insns. */ 1881 wasm_parse_wat_body(t->c, t->module, &scratch, tmpl, strlen(tmpl), loc); 1882 1883 /* Walk the parsed body to reject constructs that escape or aren't 1884 * supported in v1. Track control depth so br/br_if/br_table with imm >= 1885 * depth (i.e. would branch out of the snippet) are rejected. */ 1886 depth = 0; 1887 for (i = 0; i < scratch.ninsns; ++i) { 1888 WasmInsn* in = &scratch.insns[i]; 1889 switch (in->kind) { 1890 case WASM_INSN_BLOCK: 1891 case WASM_INSN_LOOP: 1892 case WASM_INSN_IF: 1893 depth++; 1894 break; 1895 case WASM_INSN_END: 1896 if (depth) depth--; 1897 break; 1898 case WASM_INSN_BR: 1899 case WASM_INSN_BR_IF: 1900 if (in->imm < 0 || (u64)in->imm >= depth) 1901 wfail_at(t, in->loc, 1902 "wasm target: asm template branch escapes snippet"); 1903 break; 1904 case WASM_INSN_BR_TABLE: { 1905 u32 k; 1906 for (k = 0; k < in->ntargets; ++k) 1907 if (in->targets[k] >= depth) 1908 wfail_at(t, in->loc, 1909 "wasm target: asm template br_table escapes snippet"); 1910 break; 1911 } 1912 case WASM_INSN_RETURN: 1913 case WASM_INSN_RETURN_CALL: 1914 case WASM_INSN_RETURN_CALL_INDIRECT: 1915 case WASM_INSN_RETURN_CALL_REF: 1916 wfail_at(t, in->loc, 1917 "wasm target: return/tail-call in asm template not " 1918 "supported"); 1919 break; 1920 case WASM_INSN_LOCAL_GET: 1921 case WASM_INSN_LOCAL_SET: 1922 case WASM_INSN_LOCAL_TEE: 1923 if (in->imm < 0 || (u64)in->imm >= (u64)(nin + nout)) 1924 wfail_at(t, in->loc, 1925 "wasm target: asm template references local beyond " 1926 "declared operands (snippet-internal locals not " 1927 "supported)"); 1928 break; 1929 default: 1930 break; 1931 } 1932 } 1933 1934 /* Validate the body against the synthetic signature. */ 1935 wasm_validate_func(t->c, t->module, &scratch); 1936 1937 { 1938 /* Build the WIR_ASM_BLOCK payload. raw_insns is owned by the WIR; the 1939 * other arrays are too. Sizes are zero-when-empty per the WIR teardown 1940 * conventions. */ 1941 WIR* w = wir_push(t); 1942 w->op = WIR_ASM_BLOCK; 1943 w->raw_ninsns = scratch.ninsns; 1944 if (scratch.ninsns) { 1945 w->raw_insns = (WasmInsn*)h->alloc(h, sizeof(WasmInsn) * scratch.ninsns, 1946 _Alignof(WasmInsn)); 1947 if (!w->raw_insns) wfail(t, "wasm: out of memory"); 1948 memcpy(w->raw_insns, scratch.insns, sizeof(WasmInsn) * scratch.ninsns); 1949 } 1950 w->asm_nin = nin; 1951 w->asm_nout = nout; 1952 if (nin) { 1953 w->asm_in_kinds = (u8*)h->alloc(h, nin, 1); 1954 w->asm_in_imms = (i64*)h->alloc(h, sizeof(i64) * nin, _Alignof(i64)); 1955 w->asm_in_regs = (Reg*)h->alloc(h, sizeof(Reg) * nin, _Alignof(Reg)); 1956 w->asm_in_types = (KitCgTypeId*)h->alloc(h, sizeof(KitCgTypeId) * nin, 1957 _Alignof(KitCgTypeId)); 1958 w->asm_in_share_out = (i32*)h->alloc(h, sizeof(i32) * nin, _Alignof(i32)); 1959 if (!w->asm_in_kinds || !w->asm_in_imms || !w->asm_in_regs || 1960 !w->asm_in_types || !w->asm_in_share_out) 1961 wfail(t, "wasm: out of memory"); 1962 for (i = 0; i < nin; ++i) w->asm_in_share_out[i] = -1; 1963 for (i = 0; i < nin; ++i) { 1964 Operand op = in_ops[i]; 1965 /* Numeric tieback constraints ("0".."9") share the matching 1966 * output's wasm local. cg/asm.c also rewrites +r inout outputs 1967 * into duplicate inputs at the tail of ins[], using the same 1968 * numeric encoding. */ 1969 const char* s = ins[i].str; 1970 if (s && s[0] >= '0' && s[0] <= '9' && s[1] == '\0') { 1971 int idx = s[0] - '0'; 1972 if ((u32)idx < nout) w->asm_in_share_out[i] = idx; 1973 } 1974 switch (op.kind) { 1975 case OPK_REG: 1976 w->asm_in_kinds[i] = WOP_REG; 1977 w->asm_in_regs[i] = op.v.reg; 1978 w->asm_in_imms[i] = 0; 1979 w->asm_in_types[i] = op.type ? op.type : ins[i].type; 1980 break; 1981 case OPK_IMM: 1982 w->asm_in_kinds[i] = WOP_IMM; 1983 w->asm_in_regs[i] = REG_NONE; 1984 w->asm_in_imms[i] = op.v.imm; 1985 w->asm_in_types[i] = op.type ? op.type : ins[i].type; 1986 break; 1987 case OPK_INDIRECT: 1988 /* For "m" constraint: the input local holds the i32 address 1989 * `base + ofs` of the lvalue. We re-use asm_in_imms (unused 1990 * for WOP_REG operands) to carry the displacement so the 1991 * linearizer can splice in `i32.const ofs; i32.add` after 1992 * pushing the base local. */ 1993 w->asm_in_kinds[i] = WOP_REG; 1994 w->asm_in_regs[i] = op.v.ind.base; 1995 w->asm_in_imms[i] = (i64)op.v.ind.ofs; 1996 w->asm_in_types[i] = builtin_id(KIT_CG_BUILTIN_I32); 1997 break; 1998 default: 1999 wfail_at(t, loc, "wasm target: unsupported asm input operand kind"); 2000 } 2001 } 2002 } 2003 if (nout) { 2004 w->asm_out_regs = (Reg*)h->alloc(h, sizeof(Reg) * nout, _Alignof(Reg)); 2005 w->asm_out_types = (KitCgTypeId*)h->alloc(h, sizeof(KitCgTypeId) * nout, 2006 _Alignof(KitCgTypeId)); 2007 if (!w->asm_out_regs || !w->asm_out_types) 2008 wfail(t, "wasm: out of memory"); 2009 for (i = 0; i < nout; ++i) { 2010 if (out_ops[i].kind != OPK_REG) 2011 wfail_at(t, loc, "wasm target: asm output must be a register"); 2012 w->asm_out_regs[i] = out_ops[i].v.reg; 2013 w->asm_out_types[i] = out_ops[i].type ? out_ops[i].type : outs[i].type; 2014 } 2015 } 2016 } 2017 2018 /* Free scratch func storage. The parsed insns have been copied into the 2019 * WIR payload. */ 2020 if (scratch.params) 2021 h->free(h, scratch.params, sizeof(WasmValType) * scratch.cap_params); 2022 if (scratch.locals) 2023 h->free(h, scratch.locals, sizeof(WasmValType) * scratch.cap_locals); 2024 if (scratch.insns) 2025 h->free(h, scratch.insns, sizeof(WasmInsn) * scratch.cap_insns); 2026 } 2027 2028 void wasm_file_scope_asm(CGTarget* tg, const char* src, size_t len) { 2029 WTarget* t = (WTarget*)tg; 2030 (void)src; 2031 (void)len; 2032 compiler_panic(t->c, cur_loc(t), 2033 "wasm target: file-scope asm not yet supported"); 2034 } 2035 2036 void wasm_set_loc(CGTarget* tg, SrcLoc loc) { 2037 WTarget* t = (WTarget*)tg; 2038 /* No debug info in v1, but we stash the most recent loc so cur_loc / 2039 * diagnostics attribute to the actual statement rather than the 2040 * function-definition location. */ 2041 t->cur_stmt_loc = loc; 2042 } 2043 2044 void wasm_emit_unreachable(WTarget* t) { 2045 if (t->dead) return; 2046 WIR* w = wir_push(t); 2047 w->op = WIR_UNREACHABLE; 2048 t->dead = 1; 2049 } 2050 2051 /* Control terminator (the C __builtin_unreachable point): emit the Wasm 2052 * `unreachable` opcode, which traps if reached. Ends the current block. */ 2053 void wasm_unreachable(CGTarget* tg) { wasm_emit_unreachable((WTarget*)tg); } 2054 2055 /* ----------------------------------------------------------------- 2056 * WIR -> WasmFunc lowering 2057 * ----------------------------------------------------------------- */ 2058 2059 static void emit_insn(WTarget* t, WasmInsnKind k, i64 imm) { 2060 wasm_func_add_insn(t->c, t->module, t->cur_func, k, imm); 2061 } 2062 static void emit_fp(WTarget* t, WasmInsnKind k, double v) { 2063 wasm_func_add_fp_insn(t->c, t->module, t->cur_func, k, v); 2064 } 2065 2066 /* Push an operand onto the wasm value stack. */ 2067 static void emit_push_operand_reg(WTarget* t, Reg r) { 2068 if (r == REG_NONE) wfail(t, "wasm: push of REG_NONE"); 2069 /* The reg must already have a local. */ 2070 if (r >= t->reg_cap || t->reg_to_local[r] == 0xffffffffu) { 2071 wfail(t, "wasm: reg %u used before being defined", (unsigned)r); 2072 } 2073 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->reg_to_local[r]); 2074 } 2075 2076 static WasmValType type_valtype(WTarget* t, KitCgTypeId ty) { 2077 return valtype_for_type(t, ty); 2078 } 2079 2080 static void emit_push_imm(WTarget* t, WasmValType vt, i64 imm) { 2081 WasmInsnKind k = 2082 (vt == WASM_VAL_I64) ? WASM_INSN_I64_CONST : WASM_INSN_I32_CONST; 2083 emit_insn(t, k, imm); 2084 } 2085 2086 static u32 memarg_align_log2(u32 align, u32 width); 2087 static WasmInsnKind load_kind_for(WTarget* t, KitCgTypeId ty, MemAccess ma); 2088 2089 static void emit_push_operand(WTarget* t, u32 kind, i64 imm, Reg r, 2090 KitCgTypeId ty) { 2091 if (kind == WOP_IMM) { 2092 WasmValType vt = type_valtype(t, ty); 2093 if (vt == WASM_VAL_F32 || vt == WASM_VAL_F64) { 2094 wfail(t, "wasm: float immediate operand not supported"); 2095 } 2096 emit_push_imm(t, vt, imm); 2097 } else if (kind == WOP_LOCAL) { 2098 FrameSlot fs = (FrameSlot)imm; 2099 WSlot* s = slot_for(t, fs); 2100 if (s->kind == W_SLOT_LOCAL) { 2101 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)s->wasm_local); 2102 } else { 2103 MemAccess ma; 2104 memset(&ma, 0, sizeof ma); 2105 ma.type = ty; 2106 ma.size = (u32)abi_cg_sizeof(t->c->abi, ty); 2107 ma.align = s->align; 2108 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->frame_base_local); 2109 WasmInsnKind k = load_kind_for(t, ty, ma); 2110 wasm_func_add_mem_insn( 2111 t->c, t->module, t->cur_func, k, 2112 memarg_align_log2(ma.align, wasm_mem_width((uint8_t)k)), 2113 s->frame_offset, 0); 2114 } 2115 } else { 2116 emit_push_operand_reg(t, r); 2117 } 2118 } 2119 2120 static void emit_local_set(WTarget* t, Reg dst, KitCgTypeId ty, RegClass cls) { 2121 u32 idx = reg_local(t, dst, ty, cls); 2122 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)idx); 2123 } 2124 2125 static u32 memarg_align_log2(u32 align, u32 width) { 2126 u32 a = align ? align : width; 2127 u32 lg = 0; 2128 if (a > width) a = width; 2129 while (a > 1u) { 2130 a >>= 1u; 2131 lg++; 2132 } 2133 return lg; 2134 } 2135 2136 static WasmInsnKind load_kind_for(WTarget* t, KitCgTypeId ty, MemAccess ma) { 2137 WasmValType vt = type_valtype(t, ty); 2138 u32 size = ma.size ? ma.size : (u32)abi_cg_sizeof(t->c->abi, ty); 2139 if (vt == WASM_VAL_F32) return WASM_INSN_F32_LOAD; 2140 if (vt == WASM_VAL_F64) return WASM_INSN_F64_LOAD; 2141 if (vt == WASM_VAL_I64) { 2142 if (size == 1) return WASM_INSN_I64_LOAD8_U; 2143 if (size == 2) return WASM_INSN_I64_LOAD16_U; 2144 if (size == 4) return WASM_INSN_I64_LOAD32_U; 2145 return WASM_INSN_I64_LOAD; 2146 } 2147 if (size == 1) return WASM_INSN_I32_LOAD8_U; 2148 if (size == 2) return WASM_INSN_I32_LOAD16_U; 2149 return WASM_INSN_I32_LOAD; 2150 } 2151 2152 static WasmInsnKind store_kind_for(WTarget* t, KitCgTypeId ty, MemAccess ma) { 2153 WasmValType vt = type_valtype(t, ty); 2154 u32 size = ma.size ? ma.size : (u32)abi_cg_sizeof(t->c->abi, ty); 2155 if (vt == WASM_VAL_F32) return WASM_INSN_F32_STORE; 2156 if (vt == WASM_VAL_F64) return WASM_INSN_F64_STORE; 2157 if (vt == WASM_VAL_I64) { 2158 if (size == 1) return WASM_INSN_I64_STORE8; 2159 if (size == 2) return WASM_INSN_I64_STORE16; 2160 if (size == 4) return WASM_INSN_I64_STORE32; 2161 return WASM_INSN_I64_STORE; 2162 } 2163 if (size == 1) return WASM_INSN_I32_STORE8; 2164 if (size == 2) return WASM_INSN_I32_STORE16; 2165 return WASM_INSN_I32_STORE; 2166 } 2167 2168 /* Atomic op selection. Wasm threads gives natural-width atomic load/store for 2169 * i32/i64 (with 8/16/32 subword variants) but only full-width (i32/i64) RMW 2170 * and cmpxchg in kit's wasm core. Sub-word RMW/cmpxchg therefore diagnose 2171 * rather than silently widening. */ 2172 static WasmInsnKind atomic_load_kind_for(WTarget* t, KitCgTypeId ty, 2173 MemAccess ma) { 2174 WasmValType vt = type_valtype(t, ty); 2175 u32 size = ma.size ? ma.size : (u32)abi_cg_sizeof(t->c->abi, ty); 2176 if (vt == WASM_VAL_F32 || vt == WASM_VAL_F64) 2177 wfail(t, 2178 "wasm target: atomic load of floating-point value is not " 2179 "representable in wasm threads"); 2180 if (vt == WASM_VAL_I64) { 2181 if (size == 1) return WASM_INSN_I64_ATOMIC_LOAD8_U; 2182 if (size == 2) return WASM_INSN_I64_ATOMIC_LOAD16_U; 2183 if (size == 4) return WASM_INSN_I64_ATOMIC_LOAD32_U; 2184 if (size == 8) return WASM_INSN_I64_ATOMIC_LOAD; 2185 wfail(t, "wasm: atomic load i64 size %u not supported", size); 2186 } 2187 if (size == 1) return WASM_INSN_I32_ATOMIC_LOAD8_U; 2188 if (size == 2) return WASM_INSN_I32_ATOMIC_LOAD16_U; 2189 if (size == 4) return WASM_INSN_I32_ATOMIC_LOAD; 2190 wfail(t, "wasm: atomic load i32 size %u not supported", size); 2191 } 2192 2193 static WasmInsnKind atomic_store_kind_for(WTarget* t, KitCgTypeId ty, 2194 MemAccess ma) { 2195 WasmValType vt = type_valtype(t, ty); 2196 u32 size = ma.size ? ma.size : (u32)abi_cg_sizeof(t->c->abi, ty); 2197 if (vt == WASM_VAL_F32 || vt == WASM_VAL_F64) 2198 wfail(t, 2199 "wasm target: atomic store of floating-point value is not " 2200 "representable in wasm threads"); 2201 if (vt == WASM_VAL_I64) { 2202 if (size == 1) return WASM_INSN_I64_ATOMIC_STORE8; 2203 if (size == 2) return WASM_INSN_I64_ATOMIC_STORE16; 2204 if (size == 4) return WASM_INSN_I64_ATOMIC_STORE32; 2205 if (size == 8) return WASM_INSN_I64_ATOMIC_STORE; 2206 wfail(t, "wasm: atomic store i64 size %u not supported", size); 2207 } 2208 if (size == 1) return WASM_INSN_I32_ATOMIC_STORE8; 2209 if (size == 2) return WASM_INSN_I32_ATOMIC_STORE16; 2210 if (size == 4) return WASM_INSN_I32_ATOMIC_STORE; 2211 wfail(t, "wasm: atomic store i32 size %u not supported", size); 2212 } 2213 2214 static WasmInsnKind atomic_rmw_kind_for(WTarget* t, KitCgAtomicOp op, 2215 KitCgTypeId ty, MemAccess ma) { 2216 WasmValType vt = type_valtype(t, ty); 2217 u32 size = ma.size ? ma.size : (u32)abi_cg_sizeof(t->c->abi, ty); 2218 int is64 = (vt == WASM_VAL_I64); 2219 if (vt == WASM_VAL_F32 || vt == WASM_VAL_F64) 2220 wfail(t, 2221 "wasm target: atomic RMW on floating-point value is not " 2222 "representable in wasm threads"); 2223 if (!(size == 4 || size == 8) || (is64 && size != 8) || 2224 (!is64 && size != 4)) { 2225 wfail(t, 2226 "wasm target: atomic RMW size %u not yet supported (only " 2227 "full-width i32 and i64 atomic RMW are wired)", 2228 size); 2229 } 2230 switch (op) { 2231 case KIT_CG_ATOMIC_ADD: 2232 return is64 ? WASM_INSN_I64_ATOMIC_RMW_ADD : WASM_INSN_I32_ATOMIC_RMW_ADD; 2233 case KIT_CG_ATOMIC_SUB: 2234 return is64 ? WASM_INSN_I64_ATOMIC_RMW_SUB : WASM_INSN_I32_ATOMIC_RMW_SUB; 2235 case KIT_CG_ATOMIC_AND: 2236 return is64 ? WASM_INSN_I64_ATOMIC_RMW_AND : WASM_INSN_I32_ATOMIC_RMW_AND; 2237 case KIT_CG_ATOMIC_OR: 2238 return is64 ? WASM_INSN_I64_ATOMIC_RMW_OR : WASM_INSN_I32_ATOMIC_RMW_OR; 2239 case KIT_CG_ATOMIC_XOR: 2240 return is64 ? WASM_INSN_I64_ATOMIC_RMW_XOR : WASM_INSN_I32_ATOMIC_RMW_XOR; 2241 case KIT_CG_ATOMIC_XCHG: 2242 return is64 ? WASM_INSN_I64_ATOMIC_RMW_XCHG 2243 : WASM_INSN_I32_ATOMIC_RMW_XCHG; 2244 case KIT_CG_ATOMIC_NAND: 2245 wfail(t, "wasm target: atomic NAND has no native wasm-threads opcode"); 2246 } 2247 wfail(t, "wasm: unsupported atomic RMW op %d", (int)op); 2248 } 2249 2250 static WasmInsnKind atomic_cmpxchg_kind_for(WTarget* t, KitCgTypeId ty, 2251 MemAccess ma) { 2252 WasmValType vt = type_valtype(t, ty); 2253 u32 size = ma.size ? ma.size : (u32)abi_cg_sizeof(t->c->abi, ty); 2254 if (vt == WASM_VAL_F32 || vt == WASM_VAL_F64) 2255 wfail(t, 2256 "wasm target: atomic cmpxchg on floating-point value is not " 2257 "representable in wasm threads"); 2258 if (vt == WASM_VAL_I64) { 2259 if (size != 8) 2260 wfail(t, "wasm target: atomic cmpxchg i64 size %u not yet supported", 2261 size); 2262 return WASM_INSN_I64_ATOMIC_RMW_CMPXCHG; 2263 } 2264 if (size != 4) 2265 wfail(t, "wasm target: atomic cmpxchg i32 size %u not yet supported", size); 2266 return WASM_INSN_I32_ATOMIC_RMW_CMPXCHG; 2267 } 2268 2269 /* Look up (or assign) `sym`'s slot in the funcref table. Returned index is 2270 * the wasm table index (>= 1, with 0 reserved as the null/trap slot). */ 2271 static u32 func_table_index_for(WTarget* t, ObjSymId sym) { 2272 Heap* h = t->c->ctx->heap; 2273 for (u32 i = 0; i < t->func_table_count; ++i) { 2274 if (t->func_table[i] == sym) return i + 1u; 2275 } 2276 if (t->func_table_count == t->func_table_cap) { 2277 u32 nc = t->func_table_cap ? t->func_table_cap * 2u : 8u; 2278 void* p = h->realloc(h, t->func_table, sizeof(ObjSymId) * t->func_table_cap, 2279 sizeof(ObjSymId) * nc, _Alignof(ObjSymId)); 2280 if (!p) wfail(t, "wasm: out of memory"); 2281 t->func_table = (ObjSymId*)p; 2282 t->func_table_cap = nc; 2283 } 2284 t->func_table[t->func_table_count] = sym; 2285 t->has_func_table = 1; 2286 return ++t->func_table_count; /* slot 0 reserved; first sym -> index 1 */ 2287 } 2288 2289 /* Defer function-pointer materialization to wasm_materialize_functable. 2290 * Emits `i32.const 0` and queues a WFuncTableFixup keyed by the placeholder's 2291 * (cur_func_idx, ninsns-1). */ 2292 static void queue_func_table_fixup(WTarget* t, ObjSymId sym) { 2293 Heap* h = t->c->ctx->heap; 2294 if (!t->cur_func) wfail(t, "wasm: function address outside a function"); 2295 /* Ensure the function gets a slot and force the WasmFunc shell to exist so 2296 * the table's element segment can resolve its wasm-func index later. */ 2297 (void)func_table_index_for(t, sym); 2298 (void)sym_to_wasm_func(t, sym, NULL); 2299 emit_insn(t, WASM_INSN_I32_CONST, 0); 2300 if (t->func_table_fixups_count == t->func_table_fixups_cap) { 2301 u32 nc = t->func_table_fixups_cap ? t->func_table_fixups_cap * 2u : 16u; 2302 void* p = 2303 h->realloc(h, t->func_table_fixups, 2304 sizeof(WFuncTableFixup) * t->func_table_fixups_cap, 2305 sizeof(WFuncTableFixup) * nc, _Alignof(WFuncTableFixup)); 2306 if (!p) wfail(t, "wasm: out of memory"); 2307 t->func_table_fixups = (WFuncTableFixup*)p; 2308 t->func_table_fixups_cap = nc; 2309 } 2310 WFuncTableFixup* fx = &t->func_table_fixups[t->func_table_fixups_count++]; 2311 fx->wasm_func_idx = t->cur_func_idx; 2312 fx->insn_idx = t->cur_func->ninsns - 1u; 2313 fx->sym = sym; 2314 } 2315 2316 /* Defer symbol-address resolution to wasm_materialize_data. Emits an 2317 * i32.const placeholder into the current function and queues a WSymFixup 2318 * keyed by (cur_func_idx, ninsns-1). The compact section layout is only 2319 * known once every section's final size is settled, which doesn't happen 2320 * until finalize. */ 2321 static void queue_symbol_addr_fixup(WTarget* t, ObjSymId sym, i64 addend) { 2322 Heap* h = t->c->ctx->heap; 2323 const ObjSym* os = obj_symbol_get(t->obj, sym); 2324 /* Function-symbol addresses route through the funcref table, not linear 2325 * memory. CG occasionally takes the address of an extern function before 2326 * the function body is seen (forward declarations, indirect-call 2327 * setup); the SK_FUNC kind is set by the frontend at sym creation. */ 2328 if (os && os->kind == SK_FUNC) { 2329 if (addend != 0) 2330 wfail(t, "wasm: nonzero addend on function-pointer reference"); 2331 queue_func_table_fixup(t, sym); 2332 return; 2333 } 2334 if (!os) 2335 wfail(t, "wasm target: address of unresolved symbol not yet implemented"); 2336 if (os->section_id == OBJ_SEC_NONE && os->kind != SK_COMMON) 2337 wfail(t, "wasm target: address of undefined symbol not yet implemented"); 2338 /* SK_COMMON falls through: apply_sym_fixups allocates a BSS-style base for 2339 * it in wasm_materialize_data and patches the i32.const at finalize. */ 2340 if (addend < INT32_MIN || addend > INT32_MAX) 2341 wfail(t, "wasm: symbol addend out of range"); 2342 if (!t->cur_func) wfail(t, "wasm: symbol address outside a function"); 2343 emit_insn(t, WASM_INSN_I32_CONST, 0); 2344 if (t->sym_fixups_count == t->sym_fixups_cap) { 2345 u32 nc = t->sym_fixups_cap ? t->sym_fixups_cap * 2u : 16u; 2346 void* p = 2347 h->realloc(h, t->sym_fixups, sizeof(WSymFixup) * t->sym_fixups_cap, 2348 sizeof(WSymFixup) * nc, _Alignof(WSymFixup)); 2349 if (!p) wfail(t, "wasm: out of memory"); 2350 t->sym_fixups = (WSymFixup*)p; 2351 t->sym_fixups_cap = nc; 2352 } 2353 WSymFixup* fx = &t->sym_fixups[t->sym_fixups_count++]; 2354 fx->wasm_func_idx = t->cur_func_idx; 2355 fx->insn_idx = t->cur_func->ninsns - 1u; 2356 fx->sym = sym; 2357 fx->addend = addend; 2358 } 2359 2360 /* Push the value of an OPK_INDIRECT base/index component. The CG defers loading 2361 * the pointer value of an address-taken (frame-resident) pointer local to the 2362 * backend: the deref of such a local arrives as an OPK_INDIRECT whose base 2363 * names the local itself, not a materialized register (see 2364 * fold_ea_into_operand, and native_direct_target's nd_cache_reg_for, which 2365 * loads it from the home). In the wasm backend each id is either a register 2366 * (reg_to_local set) or a frame slot, never both — so dispatch on that: a 2367 * register is fetched directly; a frame-resident local is read from its home 2368 * like any other WOP_LOCAL operand. */ 2369 static void emit_push_addr_component(WTarget* t, Reg id) { 2370 if (id < t->reg_cap && t->reg_to_local[id] != 0xffffffffu) { 2371 emit_push_operand_reg(t, id); 2372 } else { 2373 WSlot* s = slot_for(t, id); 2374 emit_push_operand(t, WOP_LOCAL, (i64)id, REG_NONE, s->type); 2375 } 2376 } 2377 2378 /* Value type of an indirect component, whether it lives in a register or a 2379 * frame slot (used to decide i64->i32 address narrowing). */ 2380 static WasmValType addr_component_valtype(WTarget* t, Reg id) { 2381 if (id < t->reg_cap && t->reg_to_local[id] != 0xffffffffu && t->reg_type[id]) 2382 return type_valtype(t, t->reg_type[id]); 2383 return type_valtype(t, slot_for(t, id)->type); 2384 } 2385 2386 static void emit_addr_operand(WTarget* t, Operand addr, uint64_t* offset_out) { 2387 *offset_out = 0; 2388 if (addr.kind == OPK_LOCAL) { 2389 WSlot* s = slot_for(t, addr.v.frame_slot); 2390 if (s->kind != W_SLOT_STACK) 2391 wfail(t, "wasm: address of non-addressable local"); 2392 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->frame_base_local); 2393 *offset_out = s->frame_offset; 2394 return; 2395 } 2396 if (addr.kind == OPK_INDIRECT) { 2397 emit_push_addr_component(t, addr.v.ind.base); 2398 if (addr.v.ind.index != REG_NONE) { 2399 emit_push_addr_component(t, addr.v.ind.index); 2400 if (addr_component_valtype(t, addr.v.ind.index) == WASM_VAL_I64) { 2401 emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 2402 } 2403 if (addr.v.ind.log2_scale != 0) { 2404 emit_insn(t, WASM_INSN_I32_CONST, (i64)addr.v.ind.log2_scale); 2405 emit_insn(t, WASM_INSN_I32_SHL, 0); 2406 } 2407 emit_insn(t, WASM_INSN_I32_ADD, 0); 2408 } 2409 if (addr.v.ind.ofs >= 0) { 2410 *offset_out = (uint32_t)addr.v.ind.ofs; 2411 } else { 2412 emit_insn(t, WASM_INSN_I32_CONST, (i64)addr.v.ind.ofs); 2413 emit_insn(t, WASM_INSN_I32_ADD, 0); 2414 } 2415 return; 2416 } 2417 if (addr.kind == OPK_GLOBAL) { 2418 queue_symbol_addr_fixup(t, addr.v.global.sym, addr.v.global.addend); 2419 return; 2420 } 2421 if (addr.kind == OPK_REG) { 2422 /* An i32 address already materialized in a register: just push it. */ 2423 emit_push_operand_reg(t, addr.v.reg); 2424 return; 2425 } 2426 wfail(t, "wasm: unsupported address operand kind %u", (unsigned)addr.kind); 2427 } 2428 2429 /* Push a complete i32 address value (folding any positive offset into the base 2430 * via i32.add). Used by bulk-memory ops (memory.copy / memory.fill) which take 2431 * the address as a stack operand and carry no memarg offset. */ 2432 static void emit_push_addr_value(WTarget* t, Operand addr) { 2433 uint64_t off; 2434 emit_addr_operand(t, addr, &off); 2435 if (off != 0) { 2436 emit_insn(t, WASM_INSN_I32_CONST, (i64)(uint32_t)off); 2437 emit_insn(t, WASM_INSN_I32_ADD, 0); 2438 } 2439 } 2440 2441 static void emit_load_addr(WTarget* t, Operand addr, KitCgTypeId ty, 2442 MemAccess ma) { 2443 uint64_t offset; 2444 WasmInsnKind k = load_kind_for(t, ty, ma); 2445 u32 width = wasm_mem_width((uint8_t)k); 2446 emit_addr_operand(t, addr, &offset); 2447 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, k, 2448 memarg_align_log2(ma.align, width), offset, 0); 2449 } 2450 2451 static void emit_store_addr(WTarget* t, Operand addr, KitCgTypeId ty, 2452 Operand src, MemAccess ma, u32 src_kind, 2453 i64 src_imm, Reg src_reg) { 2454 uint64_t offset; 2455 WasmInsnKind k = store_kind_for(t, ty, ma); 2456 u32 width = wasm_mem_width((uint8_t)k); 2457 emit_addr_operand(t, addr, &offset); 2458 if (src_kind == WOP_IMM) { 2459 emit_push_imm(t, type_valtype(t, ty), src_imm); 2460 } else if (src_kind == WOP_WASM_LOCAL) { 2461 emit_insn(t, WASM_INSN_LOCAL_GET, src_imm); 2462 } else if (src.kind == OPK_IMM) { 2463 emit_push_imm(t, type_valtype(t, ty), src.v.imm); 2464 } else if (src_kind == WOP_LOCAL) { 2465 emit_push_operand(t, WOP_LOCAL, src_imm, REG_NONE, ty); 2466 } else { 2467 emit_push_operand_reg(t, src_reg); 2468 } 2469 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, k, 2470 memarg_align_log2(ma.align, width), offset, 0); 2471 } 2472 2473 /* Map (BinOp, valtype) to wasm opcode. */ 2474 static WasmInsnKind binop_kind(WTarget* t, BinOp op, WasmValType vt) { 2475 switch (op) { 2476 case BO_IADD: 2477 return vt == WASM_VAL_I64 ? WASM_INSN_I64_ADD : WASM_INSN_I32_ADD; 2478 case BO_ISUB: 2479 return vt == WASM_VAL_I64 ? WASM_INSN_I64_SUB : WASM_INSN_I32_SUB; 2480 case BO_IMUL: 2481 return vt == WASM_VAL_I64 ? WASM_INSN_I64_MUL : WASM_INSN_I32_MUL; 2482 case BO_SDIV: 2483 return vt == WASM_VAL_I64 ? WASM_INSN_I64_DIV_S : WASM_INSN_I32_DIV_S; 2484 case BO_UDIV: 2485 return vt == WASM_VAL_I64 ? WASM_INSN_I64_DIV_U : WASM_INSN_I32_DIV_U; 2486 case BO_SREM: 2487 return vt == WASM_VAL_I64 ? WASM_INSN_I64_REM_S : WASM_INSN_I32_REM_S; 2488 case BO_UREM: 2489 return vt == WASM_VAL_I64 ? WASM_INSN_I64_REM_U : WASM_INSN_I32_REM_U; 2490 case BO_AND: 2491 return vt == WASM_VAL_I64 ? WASM_INSN_I64_AND : WASM_INSN_I32_AND; 2492 case BO_OR: 2493 return vt == WASM_VAL_I64 ? WASM_INSN_I64_OR : WASM_INSN_I32_OR; 2494 case BO_XOR: 2495 return vt == WASM_VAL_I64 ? WASM_INSN_I64_XOR : WASM_INSN_I32_XOR; 2496 case BO_SHL: 2497 return vt == WASM_VAL_I64 ? WASM_INSN_I64_SHL : WASM_INSN_I32_SHL; 2498 case BO_SHR_S: 2499 return vt == WASM_VAL_I64 ? WASM_INSN_I64_SHR_S : WASM_INSN_I32_SHR_S; 2500 case BO_SHR_U: 2501 return vt == WASM_VAL_I64 ? WASM_INSN_I64_SHR_U : WASM_INSN_I32_SHR_U; 2502 case BO_FADD: 2503 return vt == WASM_VAL_F64 ? WASM_INSN_F64_ADD : WASM_INSN_F32_ADD; 2504 case BO_FSUB: 2505 return vt == WASM_VAL_F64 ? WASM_INSN_F64_SUB : WASM_INSN_F32_SUB; 2506 case BO_FMUL: 2507 return vt == WASM_VAL_F64 ? WASM_INSN_F64_MUL : WASM_INSN_F32_MUL; 2508 case BO_FDIV: 2509 return vt == WASM_VAL_F64 ? WASM_INSN_F64_DIV : WASM_INSN_F32_DIV; 2510 } 2511 wfail(t, "wasm: unsupported binop %d", (int)op); 2512 } 2513 2514 static WasmInsnKind cmp_kind(WTarget* t, CmpOp op, WasmValType vt) { 2515 int is64 = (vt == WASM_VAL_I64); 2516 switch (op) { 2517 case CMP_EQ: 2518 return is64 ? WASM_INSN_I64_EQ : WASM_INSN_I32_EQ; 2519 case CMP_NE: 2520 return is64 ? WASM_INSN_I64_NE : WASM_INSN_I32_NE; 2521 case CMP_LT_S: 2522 return is64 ? WASM_INSN_I64_LT_S : WASM_INSN_I32_LT_S; 2523 case CMP_LE_S: 2524 return is64 ? WASM_INSN_I64_LE_S : WASM_INSN_I32_LE_S; 2525 case CMP_GT_S: 2526 return is64 ? WASM_INSN_I64_GT_S : WASM_INSN_I32_GT_S; 2527 case CMP_GE_S: 2528 return is64 ? WASM_INSN_I64_GE_S : WASM_INSN_I32_GE_S; 2529 case CMP_LT_U: 2530 return is64 ? WASM_INSN_I64_LT_U : WASM_INSN_I32_LT_U; 2531 case CMP_LE_U: 2532 return is64 ? WASM_INSN_I64_LE_U : WASM_INSN_I32_LE_U; 2533 case CMP_GT_U: 2534 return is64 ? WASM_INSN_I64_GT_U : WASM_INSN_I32_GT_U; 2535 case CMP_GE_U: 2536 return is64 ? WASM_INSN_I64_GE_U : WASM_INSN_I32_GE_U; 2537 /* FP compares are lowered by emit_fp_cmp (they may need multiple wasm 2538 * instructions) and never reach cmp_kind. Listed so -Wswitch stays useful. 2539 */ 2540 case CMP_OEQ_F: 2541 case CMP_ONE_F: 2542 case CMP_OLT_F: 2543 case CMP_OLE_F: 2544 case CMP_OGT_F: 2545 case CMP_OGE_F: 2546 case CMP_UEQ_F: 2547 case CMP_UNE_F: 2548 case CMP_ULT_F: 2549 case CMP_ULE_F: 2550 case CMP_UGT_F: 2551 case CMP_UGE_F: 2552 break; 2553 } 2554 wfail(t, "wasm: unsupported cmp %d", (int)op); 2555 } 2556 2557 /* Push both compare operands (a then b) onto the wasm stack. */ 2558 static void push_cmp_operands(WTarget* t, WIR* w, KitCgTypeId opty) { 2559 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, opty); 2560 emit_push_operand(t, w->imm_kind_b, w->imm_b, w->b, opty); 2561 } 2562 2563 /* Lower an FP compare to the wasm stack, leaving an i32 0/1 result. wasm's 2564 * f.eq/f.lt/f.le/f.gt/f.ge are ordered (false on NaN) and f.ne is unordered 2565 * (true on NaN), so the 12 IEEE predicates compose from those plus i32.eqz / 2566 * i32.or, using unordered-R == !(ordered-not-R). ONE/UEQ need both operands 2567 * twice, so push_cmp_operands runs again for the second relation. */ 2568 static void emit_fp_cmp(WTarget* t, CmpOp op, WIR* w, KitCgTypeId opty) { 2569 int d = (type_valtype(t, opty) == WASM_VAL_F64); 2570 WasmInsnKind EQ = d ? WASM_INSN_F64_EQ : WASM_INSN_F32_EQ; 2571 WasmInsnKind NE = d ? WASM_INSN_F64_NE : WASM_INSN_F32_NE; 2572 WasmInsnKind LT = d ? WASM_INSN_F64_LT : WASM_INSN_F32_LT; 2573 WasmInsnKind LE = d ? WASM_INSN_F64_LE : WASM_INSN_F32_LE; 2574 WasmInsnKind GT = d ? WASM_INSN_F64_GT : WASM_INSN_F32_GT; 2575 WasmInsnKind GE = d ? WASM_INSN_F64_GE : WASM_INSN_F32_GE; 2576 switch (op) { 2577 case CMP_OEQ_F: 2578 push_cmp_operands(t, w, opty); 2579 emit_insn(t, EQ, 0); 2580 return; 2581 case CMP_UNE_F: 2582 push_cmp_operands(t, w, opty); 2583 emit_insn(t, NE, 0); 2584 return; 2585 case CMP_OLT_F: 2586 push_cmp_operands(t, w, opty); 2587 emit_insn(t, LT, 0); 2588 return; 2589 case CMP_OLE_F: 2590 push_cmp_operands(t, w, opty); 2591 emit_insn(t, LE, 0); 2592 return; 2593 case CMP_OGT_F: 2594 push_cmp_operands(t, w, opty); 2595 emit_insn(t, GT, 0); 2596 return; 2597 case CMP_OGE_F: 2598 push_cmp_operands(t, w, opty); 2599 emit_insn(t, GE, 0); 2600 return; 2601 case CMP_UGE_F: /* !(OLT) */ 2602 push_cmp_operands(t, w, opty); 2603 emit_insn(t, LT, 0); 2604 emit_insn(t, WASM_INSN_I32_EQZ, 0); 2605 return; 2606 case CMP_UGT_F: /* !(OLE) */ 2607 push_cmp_operands(t, w, opty); 2608 emit_insn(t, LE, 0); 2609 emit_insn(t, WASM_INSN_I32_EQZ, 0); 2610 return; 2611 case CMP_ULE_F: /* !(OGT) */ 2612 push_cmp_operands(t, w, opty); 2613 emit_insn(t, GT, 0); 2614 emit_insn(t, WASM_INSN_I32_EQZ, 0); 2615 return; 2616 case CMP_ULT_F: /* !(OGE) */ 2617 push_cmp_operands(t, w, opty); 2618 emit_insn(t, GE, 0); 2619 emit_insn(t, WASM_INSN_I32_EQZ, 0); 2620 return; 2621 case CMP_ONE_F: /* ordered & !=: (a<b) | (a>b) */ 2622 push_cmp_operands(t, w, opty); 2623 emit_insn(t, LT, 0); 2624 push_cmp_operands(t, w, opty); 2625 emit_insn(t, GT, 0); 2626 emit_insn(t, WASM_INSN_I32_OR, 0); 2627 return; 2628 case CMP_UEQ_F: /* unordered | ==: !((a<b) | (a>b)) */ 2629 push_cmp_operands(t, w, opty); 2630 emit_insn(t, LT, 0); 2631 push_cmp_operands(t, w, opty); 2632 emit_insn(t, GT, 0); 2633 emit_insn(t, WASM_INSN_I32_OR, 0); 2634 emit_insn(t, WASM_INSN_I32_EQZ, 0); 2635 return; 2636 default: 2637 wfail(t, "wasm: unsupported fp cmp %d", (int)op); 2638 } 2639 } 2640 2641 static void emit_convert(WTarget* t, ConvKind ck, WasmValType src, 2642 WasmValType dst, u32 sw, u32 dw) { 2643 (void)dw; 2644 /* Integer sign/zero extension. Sub-i32 logical widths (i8/i16) share the i32 2645 * valtype, so a "same valtype" SEXT/ZEXT is NOT a no-op — the high bits must 2646 * be filled per the source's logical width (sw). The CG IR keeps narrow 2647 * immediates as truncated bit patterns, so without this an i8 value like 2648 * (signed char)-128 reads back as 128. */ 2649 if (ck == CV_SEXT && src != WASM_VAL_F32 && src != WASM_VAL_F64) { 2650 if (src == WASM_VAL_I32) { 2651 if (sw == 8u) 2652 emit_insn(t, WASM_INSN_I32_EXTEND8_S, 0); 2653 else if (sw == 16u) 2654 emit_insn(t, WASM_INSN_I32_EXTEND16_S, 0); 2655 } else { 2656 if (sw == 8u) 2657 emit_insn(t, WASM_INSN_I64_EXTEND8_S, 0); 2658 else if (sw == 16u) 2659 emit_insn(t, WASM_INSN_I64_EXTEND16_S, 0); 2660 else if (sw == 32u) 2661 emit_insn(t, WASM_INSN_I64_EXTEND32_S, 0); 2662 } 2663 if (src == WASM_VAL_I32 && dst == WASM_VAL_I64) 2664 emit_insn(t, WASM_INSN_I64_EXTEND_I32_S, 0); 2665 else if (src == WASM_VAL_I64 && dst == WASM_VAL_I32) 2666 emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 2667 return; 2668 } 2669 if (ck == CV_ZEXT && src != WASM_VAL_F32 && src != WASM_VAL_F64) { 2670 if (src == WASM_VAL_I32) { 2671 if (sw > 0u && sw < 32u) { 2672 emit_insn(t, WASM_INSN_I32_CONST, (i64)(((u32)1 << sw) - 1u)); 2673 emit_insn(t, WASM_INSN_I32_AND, 0); 2674 } 2675 if (dst == WASM_VAL_I64) emit_insn(t, WASM_INSN_I64_EXTEND_I32_U, 0); 2676 } else { 2677 if (sw > 0u && sw < 64u) { 2678 emit_push_imm(t, WASM_VAL_I64, (i64)(((u64)1 << sw) - 1u)); 2679 emit_insn(t, WASM_INSN_I64_AND, 0); 2680 } 2681 if (dst == WASM_VAL_I32) emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 2682 } 2683 return; 2684 } 2685 if (src == dst && (ck == CV_BITCAST || ck == CV_TRUNC)) { 2686 /* No-op conversion. */ 2687 return; 2688 } 2689 if (ck == CV_BITCAST) { 2690 if (src == WASM_VAL_I32 && dst == WASM_VAL_F32) { 2691 emit_insn(t, WASM_INSN_F32_REINTERPRET_I32, 0); 2692 return; 2693 } 2694 if (src == WASM_VAL_F32 && dst == WASM_VAL_I32) { 2695 emit_insn(t, WASM_INSN_I32_REINTERPRET_F32, 0); 2696 return; 2697 } 2698 if (src == WASM_VAL_I64 && dst == WASM_VAL_F64) { 2699 emit_insn(t, WASM_INSN_F64_REINTERPRET_I64, 0); 2700 return; 2701 } 2702 if (src == WASM_VAL_F64 && dst == WASM_VAL_I64) { 2703 emit_insn(t, WASM_INSN_I64_REINTERPRET_F64, 0); 2704 return; 2705 } 2706 /* Width-changing ptr/int bitcasts: kit_cg_ptr_to_int and 2707 * kit_cg_int_to_ptr route through CV_BITCAST, and on wasm32 a pointer 2708 * is i32 while the frontend integer side may be i64. Lower as 2709 * wrap/extend (zero-extend; pointers are non-negative addresses). */ 2710 if (src == WASM_VAL_I64 && dst == WASM_VAL_I32) { 2711 emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 2712 return; 2713 } 2714 if (src == WASM_VAL_I32 && dst == WASM_VAL_I64) { 2715 emit_insn(t, WASM_INSN_I64_EXTEND_I32_U, 0); 2716 return; 2717 } 2718 wfail(t, "wasm: unsupported bitcast"); 2719 } 2720 if (ck == CV_TRUNC) { 2721 if (src == WASM_VAL_I64 && dst == WASM_VAL_I32) { 2722 emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 2723 return; 2724 } 2725 } 2726 if (ck == CV_FEXT && src == WASM_VAL_F32 && dst == WASM_VAL_F64) { 2727 emit_insn(t, WASM_INSN_F64_PROMOTE_F32, 0); 2728 return; 2729 } 2730 if (ck == CV_FTRUNC && src == WASM_VAL_F64 && dst == WASM_VAL_F32) { 2731 emit_insn(t, WASM_INSN_F32_DEMOTE_F64, 0); 2732 return; 2733 } 2734 if (ck == CV_ITOF_S) { 2735 if (src == WASM_VAL_I32 && dst == WASM_VAL_F32) { 2736 emit_insn(t, WASM_INSN_F32_CONVERT_I32_S, 0); 2737 return; 2738 } 2739 if (src == WASM_VAL_I32 && dst == WASM_VAL_F64) { 2740 emit_insn(t, WASM_INSN_F64_CONVERT_I32_S, 0); 2741 return; 2742 } 2743 if (src == WASM_VAL_I64 && dst == WASM_VAL_F32) { 2744 emit_insn(t, WASM_INSN_F32_CONVERT_I64_S, 0); 2745 return; 2746 } 2747 if (src == WASM_VAL_I64 && dst == WASM_VAL_F64) { 2748 emit_insn(t, WASM_INSN_F64_CONVERT_I64_S, 0); 2749 return; 2750 } 2751 } 2752 if (ck == CV_ITOF_U) { 2753 if (src == WASM_VAL_I32 && dst == WASM_VAL_F32) { 2754 emit_insn(t, WASM_INSN_F32_CONVERT_I32_U, 0); 2755 return; 2756 } 2757 if (src == WASM_VAL_I32 && dst == WASM_VAL_F64) { 2758 emit_insn(t, WASM_INSN_F64_CONVERT_I32_U, 0); 2759 return; 2760 } 2761 if (src == WASM_VAL_I64 && dst == WASM_VAL_F32) { 2762 emit_insn(t, WASM_INSN_F32_CONVERT_I64_U, 0); 2763 return; 2764 } 2765 if (src == WASM_VAL_I64 && dst == WASM_VAL_F64) { 2766 emit_insn(t, WASM_INSN_F64_CONVERT_I64_U, 0); 2767 return; 2768 } 2769 } 2770 if (ck == CV_FTOI_S) { 2771 if (src == WASM_VAL_F32 && dst == WASM_VAL_I32) { 2772 emit_insn(t, WASM_INSN_I32_TRUNC_F32_S, 0); 2773 return; 2774 } 2775 if (src == WASM_VAL_F64 && dst == WASM_VAL_I32) { 2776 emit_insn(t, WASM_INSN_I32_TRUNC_F64_S, 0); 2777 return; 2778 } 2779 if (src == WASM_VAL_F32 && dst == WASM_VAL_I64) { 2780 emit_insn(t, WASM_INSN_I64_TRUNC_F32_S, 0); 2781 return; 2782 } 2783 if (src == WASM_VAL_F64 && dst == WASM_VAL_I64) { 2784 emit_insn(t, WASM_INSN_I64_TRUNC_F64_S, 0); 2785 return; 2786 } 2787 } 2788 if (ck == CV_FTOI_U) { 2789 if (src == WASM_VAL_F32 && dst == WASM_VAL_I32) { 2790 emit_insn(t, WASM_INSN_I32_TRUNC_F32_U, 0); 2791 return; 2792 } 2793 if (src == WASM_VAL_F64 && dst == WASM_VAL_I32) { 2794 emit_insn(t, WASM_INSN_I32_TRUNC_F64_U, 0); 2795 return; 2796 } 2797 if (src == WASM_VAL_F32 && dst == WASM_VAL_I64) { 2798 emit_insn(t, WASM_INSN_I64_TRUNC_F32_U, 0); 2799 return; 2800 } 2801 if (src == WASM_VAL_F64 && dst == WASM_VAL_I64) { 2802 emit_insn(t, WASM_INSN_I64_TRUNC_F64_U, 0); 2803 return; 2804 } 2805 } 2806 wfail(t, "wasm: unsupported convert kind %d (%d -> %d)", (int)ck, (int)src, 2807 (int)dst); 2808 } 2809 2810 /* During lowering we keep a running active-scope stack so we can compute 2811 * br depths. */ 2812 typedef struct LoweringScope { 2813 u32 id; 2814 u8 kind; 2815 /* Depth at which break/continue targets are reached. */ 2816 u32 break_depth; 2817 u32 cont_depth; 2818 } LoweringScope; 2819 2820 typedef struct LoweringState { 2821 WTarget* t; 2822 /* Bounded by the deepest synthetic + CG scope nesting we'll emit. 2823 * Switch islands wrap one block per case, so the limit is roughly 2824 * (max cases + max user nesting). 1024 leaves room for very wide 2825 * switches without forcing future per-case-count caps. */ 2826 LoweringScope stack[1024]; 2827 u32 nstack; 2828 u32 cur_depth; 2829 } LoweringState; 2830 2831 static u32 br_to_label(LoweringState* L, Label l) { 2832 WLabel* lbl = lookup_label(L->t, l); 2833 if (!lbl) wfail(L->t, "wasm: br to unknown label"); 2834 if (lbl->kind == WLBL_SCOPE_BREAK) { 2835 for (u32 i = L->nstack; i > 0; --i) { 2836 if (L->stack[i - 1u].id == lbl->scope_id) { 2837 return L->cur_depth - L->stack[i - 1u].break_depth; 2838 } 2839 } 2840 wfail(L->t, "wasm: br to break label of inactive scope"); 2841 } 2842 if (lbl->kind == WLBL_SCOPE_CONT) { 2843 for (u32 i = L->nstack; i > 0; --i) { 2844 if (L->stack[i - 1u].id == lbl->scope_id) { 2845 return L->cur_depth - L->stack[i - 1u].cont_depth; 2846 } 2847 } 2848 wfail(L->t, "wasm: br to continue label of inactive scope"); 2849 } 2850 /* wasm_structurize wraps every reachable forward label in a synthetic 2851 * SCOPE_BLOCK (forward goto) or SCOPE_LOOP (backward goto), and 2852 * unroll_switch_islands reorders the WIR so switch case labels are 2853 * forward refs from WIR_SWITCH. Arriving here means the structurer 2854 * missed a shape — a bug, not a feature gap. */ 2855 wfail(L->t, 2856 "wasm: br to free label whose synthetic scope was not " 2857 "emitted; structurer bug"); 2858 } 2859 2860 static i64 wasm_switch_sign_extend(u64 v, u32 width) { 2861 if (width == 0u || width >= 64u) return (i64)v; 2862 { 2863 u64 bit = 1ull << (width - 1u); 2864 u64 mask = (1ull << width) - 1u; 2865 v &= mask; 2866 return (i64)((v ^ bit) - bit); 2867 } 2868 } 2869 2870 static int wasm_switch_extents(WTarget* t, const WIR* w, i64* out_vmin, 2871 u64* out_span) { 2872 u32 width; 2873 i64 vmin = INT64_MAX; 2874 i64 vmax = INT64_MIN; 2875 if (w->switch_ncases == 0) return 0; 2876 width = kit_cg_type_int_width((KitCompiler*)t->c, w->type); 2877 if (!width || width > 64u) return 0; 2878 for (u32 i = 0; i < w->switch_ncases; ++i) { 2879 i64 vi = wasm_switch_sign_extend(w->switch_cases[i].value, width); 2880 if (vi < vmin) vmin = vi; 2881 if (vi > vmax) vmax = vi; 2882 } 2883 if (vmax < vmin) return 0; 2884 { 2885 u64 delta = (u64)vmax - (u64)vmin; 2886 if (delta == UINT64_MAX) return 0; 2887 *out_span = delta + 1u; 2888 } 2889 *out_vmin = vmin; 2890 return 1; 2891 } 2892 2893 static void emit_br_table(WTarget* t, const u32* targets, u32 ntargets) { 2894 WasmInsn* in; 2895 if (ntargets == 0) 2896 wfail(t, "wasm: br_table needs at least the default target"); 2897 wasm_func_add_insn(t->c, t->module, t->cur_func, WASM_INSN_BR_TABLE, 0); 2898 in = &t->cur_func->insns[t->cur_func->ninsns - 1u]; 2899 wasm_insn_set_targets(t->c, t->module, in, targets, ntargets); 2900 } 2901 2902 /* A switch lowers to a dense br_table when its case values span a range that 2903 * isn't pathologically sparse relative to the number of cases; otherwise an 2904 * `eq`/`br_if` comparison chain. Small ranges always take the table (cheap 2905 * either way); larger ranges only when at least ~half the table slots carry a 2906 * real case, so a sparse switch (e.g. `case 0`, `case 1000000`) doesn't 2907 * materialize a giant mostly-default table. There is no range-splitting yet, 2908 * so a switch that fails this test is a linear scan. */ 2909 static int switch_use_br_table(const WIR* w, u64 span) { 2910 if (span <= 64u) return 1; 2911 return span <= (u64)w->switch_ncases * 2u; 2912 } 2913 2914 static void emit_switch_br_table(WTarget* t, LoweringState* L, const WIR* w) { 2915 i64 vmin; 2916 u64 span; 2917 u32* targets; 2918 Label* labels; 2919 u32 ntargets; 2920 WasmValType vt; 2921 Heap* h = t->c->ctx->heap; 2922 2923 if (w->switch_ncases == 0) { 2924 emit_insn(t, WASM_INSN_BR, (i64)br_to_label(L, w->labels[0])); 2925 return; 2926 } 2927 if (!wasm_switch_extents(t, w, &vmin, &span)) 2928 wfail(t, "wasm: unsupported switch selector type"); 2929 vt = type_valtype(t, w->type); 2930 if (vt != WASM_VAL_I32 && vt != WASM_VAL_I64) 2931 wfail(t, "wasm: switch selector must be integer"); 2932 if (!switch_use_br_table(w, span)) { 2933 for (u32 i = 0; i < w->switch_ncases; ++i) { 2934 u32 width = kit_cg_type_int_width((KitCompiler*)t->c, w->type); 2935 i64 vi = wasm_switch_sign_extend(w->switch_cases[i].value, width); 2936 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type); 2937 emit_push_imm(t, vt, vi); 2938 emit_insn(t, vt == WASM_VAL_I64 ? WASM_INSN_I64_EQ : WASM_INSN_I32_EQ, 0); 2939 emit_insn(t, WASM_INSN_BR_IF, 2940 (i64)br_to_label(L, w->switch_cases[i].label)); 2941 } 2942 emit_insn(t, WASM_INSN_BR, (i64)br_to_label(L, w->labels[0])); 2943 return; 2944 } 2945 2946 /* Dense table: one slot per value in [vmin, vmin+span), default-filled, with 2947 * the default appended as the trailing out-of-range target. */ 2948 ntargets = (u32)span + 1u; 2949 labels = (Label*)h->alloc(h, sizeof(Label) * span, _Alignof(Label)); 2950 targets = (u32*)h->alloc(h, sizeof(u32) * ntargets, _Alignof(u32)); 2951 if (!labels || !targets) wfail(t, "wasm: out of memory for switch table"); 2952 for (u64 i = 0; i < span; ++i) labels[i] = w->labels[0]; 2953 for (u32 i = 0; i < w->switch_ncases; ++i) { 2954 u32 width = kit_cg_type_int_width((KitCompiler*)t->c, w->type); 2955 i64 vi = wasm_switch_sign_extend(w->switch_cases[i].value, width); 2956 u64 slot = (u64)(vi - vmin); 2957 if (slot >= span) wfail(t, "wasm: switch case outside span"); 2958 labels[slot] = w->switch_cases[i].label; 2959 } 2960 2961 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type); 2962 if (vmin != 0) { 2963 emit_push_imm(t, vt, vmin); 2964 emit_insn(t, vt == WASM_VAL_I64 ? WASM_INSN_I64_SUB : WASM_INSN_I32_SUB, 0); 2965 } 2966 if (vt == WASM_VAL_I64) emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 2967 2968 for (u32 i = 0; i < (u32)span; ++i) targets[i] = br_to_label(L, labels[i]); 2969 targets[ntargets - 1u] = br_to_label(L, w->labels[0]); 2970 emit_br_table(t, targets, ntargets); 2971 h->free(h, targets, sizeof(u32) * ntargets); 2972 h->free(h, labels, sizeof(Label) * span); 2973 } 2974 2975 /* ----------------------------------------------------------------- 2976 * Intrinsics (bit ops / bswap / overflow arith) 2977 * 2978 * MEMCPY/MEMMOVE/MEMSET don't appear here: the recorder funnels them 2979 * into WIR_COPY_BYTES / WIR_SET_BYTES which already lower to 2980 * memory.copy / memory.fill. Hints (PREFETCH/EXPECT/ASSUME_ALIGNED) 2981 * also don't reach the linearizer — the recorder either drops them or 2982 * emits a plain copy. ----------------------------------------------- */ 2983 2984 static void emit_intrinsic_bit_op(WTarget* t, const WIR* w) { 2985 /* clz/ctz/popcount instruction width follows the operand (type2), not the 2986 * i32 result. i64 forms produce an i64 count that we wrap to the i32 dst. */ 2987 WasmValType vt = type_valtype(t, w->type2 ? w->type2 : w->type); 2988 WasmValType dvt = type_valtype(t, w->type); 2989 WasmInsnKind op; 2990 switch ((IntrinKind)w->cgop) { 2991 case INTRIN_CLZ: 2992 op = (vt == WASM_VAL_I64) ? WASM_INSN_I64_CLZ : WASM_INSN_I32_CLZ; 2993 break; 2994 case INTRIN_CTZ: 2995 op = (vt == WASM_VAL_I64) ? WASM_INSN_I64_CTZ : WASM_INSN_I32_CTZ; 2996 break; 2997 case INTRIN_POPCOUNT: 2998 op = (vt == WASM_VAL_I64) ? WASM_INSN_I64_POPCNT : WASM_INSN_I32_POPCNT; 2999 break; 3000 default: 3001 wfail(t, "wasm: unexpected bit-op intrinsic %d", (int)w->cgop); 3002 return; 3003 } 3004 emit_push_operand_reg(t, w->a); 3005 emit_insn(t, op, 0); 3006 if (vt == WASM_VAL_I64 && dvt == WASM_VAL_I32) 3007 emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 3008 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3009 } 3010 3011 static void emit_intrinsic_bswap(WTarget* t, const WIR* w) { 3012 /* Width-by-type: the recorded result type fixes the byte width. */ 3013 u32 width = (u32)abi_cg_sizeof(t->c->abi, w->type); 3014 if (width <= 4) { 3015 /* Both 16- and 32-bit forms operate over i32. The 16-bit form only 3016 * touches the low 16 bits; any extra high bits in the input are 3017 * discarded by the AND mask. */ 3018 u32 tmp = add_wasm_local(t, WASM_VAL_I32); 3019 emit_push_operand_reg(t, w->a); 3020 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)tmp); 3021 if (width <= 2) { 3022 /* (x & 0xff) << 8 */ 3023 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)tmp); 3024 emit_insn(t, WASM_INSN_I32_CONST, 0xff); 3025 emit_insn(t, WASM_INSN_I32_AND, 0); 3026 emit_insn(t, WASM_INSN_I32_CONST, 8); 3027 emit_insn(t, WASM_INSN_I32_SHL, 0); 3028 /* (x >> 8) & 0xff */ 3029 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)tmp); 3030 emit_insn(t, WASM_INSN_I32_CONST, 8); 3031 emit_insn(t, WASM_INSN_I32_SHR_U, 0); 3032 emit_insn(t, WASM_INSN_I32_CONST, 0xff); 3033 emit_insn(t, WASM_INSN_I32_AND, 0); 3034 emit_insn(t, WASM_INSN_I32_OR, 0); 3035 } else { 3036 /* Four-byte shuffle. */ 3037 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)tmp); 3038 emit_insn(t, WASM_INSN_I32_CONST, 24); 3039 emit_insn(t, WASM_INSN_I32_SHR_U, 0); 3040 emit_insn(t, WASM_INSN_I32_CONST, 0xff); 3041 emit_insn(t, WASM_INSN_I32_AND, 0); 3042 3043 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)tmp); 3044 emit_insn(t, WASM_INSN_I32_CONST, 8); 3045 emit_insn(t, WASM_INSN_I32_SHR_U, 0); 3046 emit_insn(t, WASM_INSN_I32_CONST, 0xff00); 3047 emit_insn(t, WASM_INSN_I32_AND, 0); 3048 emit_insn(t, WASM_INSN_I32_OR, 0); 3049 3050 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)tmp); 3051 emit_insn(t, WASM_INSN_I32_CONST, 8); 3052 emit_insn(t, WASM_INSN_I32_SHL, 0); 3053 emit_insn(t, WASM_INSN_I32_CONST, 0xff0000); 3054 emit_insn(t, WASM_INSN_I32_AND, 0); 3055 emit_insn(t, WASM_INSN_I32_OR, 0); 3056 3057 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)tmp); 3058 emit_insn(t, WASM_INSN_I32_CONST, 24); 3059 emit_insn(t, WASM_INSN_I32_SHL, 0); 3060 emit_insn(t, WASM_INSN_I32_OR, 0); 3061 } 3062 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3063 return; 3064 } 3065 /* 8-byte form: byte reverse over i64. */ 3066 u32 tmp = add_wasm_local(t, WASM_VAL_I64); 3067 emit_push_operand_reg(t, w->a); 3068 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)tmp); 3069 for (int i = 0; i < 8; ++i) { 3070 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)tmp); 3071 if (i > 0) { 3072 emit_insn(t, WASM_INSN_I64_CONST, (i64)(i * 8)); 3073 emit_insn(t, WASM_INSN_I64_SHR_U, 0); 3074 } 3075 emit_insn(t, WASM_INSN_I64_CONST, 0xff); 3076 emit_insn(t, WASM_INSN_I64_AND, 0); 3077 int shift = (7 - i) * 8; 3078 if (shift > 0) { 3079 emit_insn(t, WASM_INSN_I64_CONST, (i64)shift); 3080 emit_insn(t, WASM_INSN_I64_SHL, 0); 3081 } 3082 if (i > 0) emit_insn(t, WASM_INSN_I64_OR, 0); 3083 } 3084 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3085 } 3086 3087 static void emit_intrinsic_overflow(WTarget* t, const WIR* w) { 3088 IntrinKind k = (IntrinKind)w->cgop; 3089 WasmValType vt = type_valtype(t, w->type); 3090 WasmInsnKind k_add = 3091 (vt == WASM_VAL_I64) ? WASM_INSN_I64_ADD : WASM_INSN_I32_ADD; 3092 WasmInsnKind k_sub = 3093 (vt == WASM_VAL_I64) ? WASM_INSN_I64_SUB : WASM_INSN_I32_SUB; 3094 WasmInsnKind k_and = 3095 (vt == WASM_VAL_I64) ? WASM_INSN_I64_AND : WASM_INSN_I32_AND; 3096 WasmInsnKind k_xor = 3097 (vt == WASM_VAL_I64) ? WASM_INSN_I64_XOR : WASM_INSN_I32_XOR; 3098 WasmInsnKind k_shr_u = 3099 (vt == WASM_VAL_I64) ? WASM_INSN_I64_SHR_U : WASM_INSN_I32_SHR_U; 3100 WasmInsnKind k_lt_u = 3101 (vt == WASM_VAL_I64) ? WASM_INSN_I64_LT_U : WASM_INSN_I32_LT_U; 3102 WasmInsnKind k_const = 3103 (vt == WASM_VAL_I64) ? WASM_INSN_I64_CONST : WASM_INSN_I32_CONST; 3104 KitCgTypeId bool_ty = builtin_id(KIT_CG_BUILTIN_BOOL); 3105 3106 /* Stash both operands in scratch locals so each side of the expansion can 3107 * re-load them without re-evaluating immediates or relying on the wasm 3108 * value stack shape. */ 3109 u32 a_loc = add_wasm_local(t, vt); 3110 u32 b_loc = add_wasm_local(t, vt); 3111 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type); 3112 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)a_loc); 3113 emit_push_operand(t, w->imm_kind_b, w->imm_b, w->b, w->type); 3114 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)b_loc); 3115 3116 switch (k) { 3117 case INTRIN_UADD_OVERFLOW: 3118 /* r = a + b; ovf = (r <u a) */ 3119 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3120 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3121 emit_insn(t, k_add, 0); 3122 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3123 emit_push_operand_reg(t, w->dst); 3124 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3125 emit_insn(t, k_lt_u, 0); 3126 emit_local_set(t, w->dst2, bool_ty, RC_INT); 3127 break; 3128 case INTRIN_USUB_OVERFLOW: 3129 /* r = a - b; ovf = (a <u b) */ 3130 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3131 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3132 emit_insn(t, k_sub, 0); 3133 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3134 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3135 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3136 emit_insn(t, k_lt_u, 0); 3137 emit_local_set(t, w->dst2, bool_ty, RC_INT); 3138 break; 3139 case INTRIN_SADD_OVERFLOW: 3140 /* r = a + b; ovf = ((r ^ a) & (r ^ b)) >>u (W-1) */ 3141 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3142 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3143 emit_insn(t, k_add, 0); 3144 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3145 emit_push_operand_reg(t, w->dst); 3146 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3147 emit_insn(t, k_xor, 0); 3148 emit_push_operand_reg(t, w->dst); 3149 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3150 emit_insn(t, k_xor, 0); 3151 emit_insn(t, k_and, 0); 3152 emit_insn(t, k_const, (i64)(vt == WASM_VAL_I64 ? 63 : 31)); 3153 emit_insn(t, k_shr_u, 0); 3154 if (vt == WASM_VAL_I64) emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 3155 emit_local_set(t, w->dst2, bool_ty, RC_INT); 3156 break; 3157 case INTRIN_SSUB_OVERFLOW: 3158 /* r = a - b; ovf = ((a ^ b) & (a ^ r)) >>u (W-1) */ 3159 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3160 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3161 emit_insn(t, k_sub, 0); 3162 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3163 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3164 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3165 emit_insn(t, k_xor, 0); 3166 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3167 emit_push_operand_reg(t, w->dst); 3168 emit_insn(t, k_xor, 0); 3169 emit_insn(t, k_and, 0); 3170 emit_insn(t, k_const, (i64)(vt == WASM_VAL_I64 ? 63 : 31)); 3171 emit_insn(t, k_shr_u, 0); 3172 if (vt == WASM_VAL_I64) emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 3173 emit_local_set(t, w->dst2, bool_ty, RC_INT); 3174 break; 3175 case INTRIN_UMUL_OVERFLOW: { 3176 /* i32 only (i64 rejected in recorder). Widen to i64, multiply, 3177 * low 32 = result, ovf = (wide >> 32) != 0. */ 3178 u32 wide = add_wasm_local(t, WASM_VAL_I64); 3179 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3180 emit_insn(t, WASM_INSN_I64_EXTEND_I32_U, 0); 3181 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3182 emit_insn(t, WASM_INSN_I64_EXTEND_I32_U, 0); 3183 emit_insn(t, WASM_INSN_I64_MUL, 0); 3184 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)wide); 3185 3186 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)wide); 3187 emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 3188 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3189 3190 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)wide); 3191 emit_insn(t, WASM_INSN_I64_CONST, 32); 3192 emit_insn(t, WASM_INSN_I64_SHR_U, 0); 3193 emit_insn(t, WASM_INSN_I64_CONST, 0); 3194 emit_insn(t, WASM_INSN_I64_NE, 0); 3195 emit_local_set(t, w->dst2, bool_ty, RC_INT); 3196 break; 3197 } 3198 case INTRIN_SMUL_OVERFLOW: { 3199 /* i32 only. Sign-extend, multiply, low 32 = result, ovf if 3200 * sext(result) != wide product. */ 3201 u32 wide = add_wasm_local(t, WASM_VAL_I64); 3202 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)a_loc); 3203 emit_insn(t, WASM_INSN_I64_EXTEND_I32_S, 0); 3204 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)b_loc); 3205 emit_insn(t, WASM_INSN_I64_EXTEND_I32_S, 0); 3206 emit_insn(t, WASM_INSN_I64_MUL, 0); 3207 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)wide); 3208 3209 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)wide); 3210 emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 3211 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3212 3213 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)wide); 3214 emit_push_operand_reg(t, w->dst); 3215 emit_insn(t, WASM_INSN_I64_EXTEND_I32_S, 0); 3216 emit_insn(t, WASM_INSN_I64_NE, 0); 3217 emit_local_set(t, w->dst2, bool_ty, RC_INT); 3218 break; 3219 } 3220 default: 3221 wfail(t, "wasm: overflow intrinsic dispatch reached default (%d)", 3222 (int)k); 3223 } 3224 } 3225 3226 static void emit_intrinsic(WTarget* t, const WIR* w) { 3227 IntrinKind k = (IntrinKind)w->cgop; 3228 switch (k) { 3229 case INTRIN_CLZ: 3230 case INTRIN_CTZ: 3231 case INTRIN_POPCOUNT: 3232 emit_intrinsic_bit_op(t, w); 3233 return; 3234 case INTRIN_BSWAP: 3235 emit_intrinsic_bswap(t, w); 3236 return; 3237 case INTRIN_SADD_OVERFLOW: 3238 case INTRIN_UADD_OVERFLOW: 3239 case INTRIN_SSUB_OVERFLOW: 3240 case INTRIN_USUB_OVERFLOW: 3241 case INTRIN_SMUL_OVERFLOW: 3242 case INTRIN_UMUL_OVERFLOW: 3243 emit_intrinsic_overflow(t, w); 3244 return; 3245 default: 3246 wfail(t, "wasm: unexpected intrinsic kind %d in linearizer", (int)k); 3247 } 3248 } 3249 3250 static void linearize_range(WTarget* t, LoweringState* L, u32 start, u32 end); 3251 3252 #if 0 /* Switch-island matcher: replaced by wasm_structurize's \ 3253 * unroll_switch_islands, which reorders the WIR in-place so case \ 3254 * labels become forward refs handled by the general structurer. */ 3255 static int label_in_list(Label l, const Label* labels, u32 nlabels) { 3256 for (u32 i = 0; i < nlabels; ++i) { 3257 if (labels[i] == l) return 1; 3258 } 3259 return 0; 3260 } 3261 3262 static int try_linearize_switch_island(WTarget* t, LoweringState* L, u32* ip) { 3263 WIR* jump = &t->wir[*ip]; 3264 WLabel* dispatch = lookup_label(t, jump->labels[0]); 3265 u32 dispatch_i; 3266 u32 switch_i = UINT32_MAX; 3267 WIR* sw; 3268 Label target_labels[64]; 3269 Label body_labels[64]; 3270 Label end_labels[64]; 3271 u32 ntarget_labels = 0; 3272 u32 nbody_labels = 0; 3273 u32 nend_labels = 0; 3274 u32 end_remap_mark; 3275 u32 case_remap_mark; 3276 Label synthetic_end = LABEL_NONE; 3277 3278 if (!dispatch || dispatch->kind != WLBL_FORWARD || !dispatch->placed || 3279 dispatch->wir_index <= *ip) 3280 return 0; 3281 dispatch_i = dispatch->wir_index; 3282 if (dispatch_i >= t->nwir || t->wir[dispatch_i].op != WIR_LABEL) 3283 return 0; 3284 for (u32 i = dispatch_i + 1u; i < t->nwir; ++i) { 3285 if (t->wir[i].op == WIR_SWITCH) { 3286 switch_i = i; 3287 break; 3288 } 3289 } 3290 if (switch_i == UINT32_MAX) return 0; 3291 3292 sw = &t->wir[switch_i]; 3293 for (u32 i = 0; i < sw->switch_ncases; ++i) { 3294 Label l = sw->switch_cases[i].label; 3295 if (!label_in_list(l, target_labels, ntarget_labels)) { 3296 if (ntarget_labels >= 64u) wfail(t, "wasm: too many switch targets"); 3297 target_labels[ntarget_labels++] = l; 3298 } 3299 } 3300 if (sw->labels[0] != LABEL_NONE && 3301 !label_in_list(sw->labels[0], target_labels, ntarget_labels)) { 3302 if (ntarget_labels >= 64u) wfail(t, "wasm: too many switch targets"); 3303 target_labels[ntarget_labels++] = sw->labels[0]; 3304 } 3305 3306 for (u32 i = *ip + 1u; i < dispatch_i; ++i) { 3307 if (t->wir[i].op != WIR_LABEL) continue; 3308 Label l = t->wir[i].labels[0]; 3309 if (label_in_list(l, target_labels, ntarget_labels) && 3310 !label_in_list(l, body_labels, nbody_labels)) { 3311 if (nbody_labels >= 64u) wfail(t, "wasm: too many switch body labels"); 3312 body_labels[nbody_labels++] = l; 3313 } 3314 } 3315 for (u32 i = *ip + 1u; i < dispatch_i; ++i) { 3316 Label l = LABEL_NONE; 3317 WLabel* lbl; 3318 if (t->wir[i].op == WIR_JUMP || t->wir[i].op == WIR_CMP_BRANCH) { 3319 l = t->wir[i].labels[0]; 3320 } 3321 if (l == LABEL_NONE || label_in_list(l, body_labels, nbody_labels)) 3322 continue; 3323 lbl = lookup_label(t, l); 3324 if (!lbl || lbl->kind != WLBL_FORWARD || !lbl->placed || 3325 lbl->wir_index <= switch_i) 3326 continue; 3327 if (!label_in_list(l, end_labels, nend_labels)) { 3328 if (nend_labels >= 64u) wfail(t, "wasm: too many switch exit labels"); 3329 end_labels[nend_labels++] = l; 3330 } 3331 } 3332 if (nbody_labels == 0) return 0; 3333 3334 if (!label_in_list(sw->labels[0], body_labels, nbody_labels)) 3335 synthetic_end = sw->labels[0]; 3336 3337 emit_insn(t, WASM_INSN_BLOCK, 0); 3338 L->cur_depth++; 3339 end_remap_mark = L->nremaps; 3340 if (synthetic_end != LABEL_NONE) 3341 lowering_push_remap(L, synthetic_end, L->cur_depth); 3342 for (u32 i = 0; i < nend_labels; ++i) { 3343 lowering_push_remap(L, end_labels[i], L->cur_depth); 3344 } 3345 3346 for (u32 ri = nbody_labels; ri > 0; --ri) { 3347 Label l = body_labels[ri - 1u]; 3348 emit_insn(t, WASM_INSN_BLOCK, 0); 3349 L->cur_depth++; 3350 lowering_push_remap(L, l, L->cur_depth); 3351 } 3352 3353 linearize_range(t, L, dispatch_i + 1u, switch_i); 3354 case_remap_mark = end_remap_mark + nend_labels + 3355 (synthetic_end != LABEL_NONE ? 1u : 0u); 3356 emit_switch_br_table(t, L, sw); 3357 lowering_pop_remaps(L, case_remap_mark); 3358 3359 for (u32 bi = 0; bi < nbody_labels; ++bi) { 3360 u32 seg_start; 3361 u32 seg_end = dispatch_i; 3362 WLabel* lbl = lookup_label(t, body_labels[bi]); 3363 if (!lbl) wfail(t, "wasm: switch body label disappeared"); 3364 emit_insn(t, WASM_INSN_END, 0); 3365 L->cur_depth--; 3366 seg_start = lbl->wir_index + 1u; 3367 if (bi + 1u < nbody_labels) { 3368 WLabel* next = lookup_label(t, body_labels[bi + 1u]); 3369 if (!next) wfail(t, "wasm: switch body label disappeared"); 3370 seg_end = next->wir_index; 3371 } 3372 linearize_range(t, L, seg_start, seg_end); 3373 } 3374 3375 emit_insn(t, WASM_INSN_END, 0); 3376 L->cur_depth--; 3377 lowering_pop_remaps(L, end_remap_mark); 3378 *ip = switch_i; 3379 return 1; 3380 } 3381 #endif 3382 3383 static void linearize_range(WTarget* t, LoweringState* L, u32 start, u32 end) { 3384 for (u32 i = start; i < end; ++i) { 3385 WIR* w = &t->wir[i]; 3386 switch (w->op) { 3387 case WIR_LOAD_IMM: { 3388 WasmValType vt = type_valtype(t, w->type); 3389 emit_push_imm(t, vt, w->imm); 3390 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3391 break; 3392 } 3393 case WIR_LOAD_CONST_F: { 3394 WasmValType vt = type_valtype(t, w->type); 3395 emit_fp(t, 3396 vt == WASM_VAL_F64 ? WASM_INSN_F64_CONST : WASM_INSN_F32_CONST, 3397 w->fp_imm); 3398 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3399 break; 3400 } 3401 case WIR_COPY: { 3402 emit_push_operand_reg(t, w->a); 3403 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3404 break; 3405 } 3406 case WIR_BINOP: { 3407 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type); 3408 emit_push_operand(t, w->imm_kind_b, w->imm_b, w->b, w->type); 3409 WasmValType vt = type_valtype(t, w->type); 3410 emit_insn(t, binop_kind(t, (BinOp)w->cgop, vt), 0); 3411 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3412 break; 3413 } 3414 case WIR_UNOP: { 3415 WasmValType vt = type_valtype(t, w->type); 3416 switch ((UnOp)w->cgop) { 3417 case UO_NEG: { 3418 /* 0 - a */ 3419 emit_push_imm(t, vt, 0); 3420 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type); 3421 emit_insn( 3422 t, vt == WASM_VAL_I64 ? WASM_INSN_I64_SUB : WASM_INSN_I32_SUB, 3423 0); 3424 break; 3425 } 3426 case UO_FNEG: { 3427 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type); 3428 emit_insn( 3429 t, vt == WASM_VAL_F64 ? WASM_INSN_F64_NEG : WASM_INSN_F32_NEG, 3430 0); 3431 break; 3432 } 3433 case UO_BNOT: { 3434 /* a XOR -1 */ 3435 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type); 3436 emit_push_imm(t, vt, -1); 3437 emit_insn( 3438 t, vt == WASM_VAL_I64 ? WASM_INSN_I64_XOR : WASM_INSN_I32_XOR, 3439 0); 3440 break; 3441 } 3442 case UO_NOT: { 3443 /* a == 0 — i{32,64}.eqz always produces an i32 0/1. When the CG 3444 * destination is i64 (e.g. !x where x was zext'd to i64 before the 3445 * negation), widen the i32 boolean back to i64 so the following 3446 * local.set is well-typed. */ 3447 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type); 3448 emit_insn( 3449 t, vt == WASM_VAL_I64 ? WASM_INSN_I64_EQZ : WASM_INSN_I32_EQZ, 3450 0); 3451 if (vt == WASM_VAL_I64) 3452 emit_insn(t, WASM_INSN_I64_EXTEND_I32_U, 0); 3453 break; 3454 } 3455 } 3456 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3457 break; 3458 } 3459 case WIR_CMP: { 3460 CmpOp cop = (CmpOp)w->cgop; 3461 if (cop >= CMP_OEQ_F) { 3462 emit_fp_cmp(t, cop, w, w->type2); 3463 } else { 3464 push_cmp_operands(t, w, w->type2); 3465 emit_insn(t, cmp_kind(t, cop, type_valtype(t, w->type2)), 0); 3466 } 3467 /* cmp result is i32 (0/1). dst type may be wider — but cg generally 3468 * stores cmp results into i32. */ 3469 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3470 break; 3471 } 3472 case WIR_CONVERT: { 3473 WasmValType src = type_valtype(t, w->type2); 3474 WasmValType dst = type_valtype(t, w->type); 3475 u32 sw = kit_cg_type_int_width((KitCompiler*)t->c, w->type2); 3476 u32 dw = kit_cg_type_int_width((KitCompiler*)t->c, w->type); 3477 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, w->type2); 3478 emit_convert(t, (ConvKind)w->cgop, src, dst, sw, dw); 3479 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3480 break; 3481 } 3482 case WIR_CALL: 3483 case WIR_CALL_INDIRECT: { 3484 /* Tail calls tear down the caller's wasm frame (return_call / 3485 * return_call_indirect have polymorphic-unreachable type after the 3486 * call). Mirror the WIR_RET linear-stack epilogue before pushing 3487 * args so the linear-memory stack frame is released. Operands 3488 * below come from wasm locals (incoming params or reg-locals), 3489 * not from the linear stack we just freed. Variadic tail calls are 3490 * rejected upstream; sret tail calls forward the incoming pointer. */ 3491 if (w->call_tail && t->has_stack_frame) { 3492 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->frame_saved_sp_local); 3493 emit_insn(t, WASM_INSN_GLOBAL_SET, (i64)t->stack_pointer_global); 3494 } 3495 if (w->call_has_sret) { 3496 if (w->call_tail) { 3497 /* Forward this function's own incoming sret pointer: the callee 3498 * writes the same buffer (in our caller's frame, which outlives 3499 * the sibling call) and return_calls back. The pointer is a wasm 3500 * local, unaffected by the linear-frame teardown above. */ 3501 if (t->sret_param_local == 0xffffffffu) 3502 wfail(t, "wasm: sret tail call without an incoming sret pointer"); 3503 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->sret_param_local); 3504 } else { 3505 /* Push sret pointer (address of caller-allocated buffer). */ 3506 uint64_t off; 3507 emit_addr_operand(t, w->call_sret_addr, &off); 3508 if (off) { 3509 emit_insn(t, WASM_INSN_I32_CONST, (i64)off); 3510 emit_insn(t, WASM_INSN_I32_ADD, 0); 3511 } 3512 } 3513 } 3514 for (u32 a = 0; a < w->call_narg; ++a) { 3515 if (w->call_arg_kinds[a] == WOP_REG) { 3516 emit_push_operand_reg(t, w->call_args[a]); 3517 } else if (w->call_arg_kinds[a] == WOP_IMM) { 3518 WasmValType vt = type_valtype(t, w->call_arg_types[a]); 3519 emit_push_imm(t, vt, w->call_arg_imms[a]); 3520 } else if (w->call_arg_kinds[a] == WOP_ADDR) { 3521 uint64_t off; 3522 emit_addr_operand(t, w->call_arg_addrs[a], &off); 3523 if (off) { 3524 emit_insn(t, WASM_INSN_I32_CONST, (i64)off); 3525 emit_insn(t, WASM_INSN_I32_ADD, 0); 3526 } 3527 } else { 3528 wfail(t, "wasm: bad call-arg kind %u", w->call_arg_kinds[a]); 3529 } 3530 } 3531 /* Variadic packing. Each variadic arg occupies an 8-byte slot in a 3532 * caller-allocated linear-memory buffer; the buffer's address is 3533 * passed as the hidden trailing i32. We save __stack_pointer to a 3534 * scratch local before allocating the buffer and restore it after 3535 * the call returns, so a variadic call in a loop doesn't grow the 3536 * linear stack. See wasm_va_start / wasm_va_arg for the callee side. 3537 */ 3538 if (w->call_variadic) { 3539 if (w->call_nvar == 0u) { 3540 /* No varargs: still pass a hidden i32. NULL is fine — the callee 3541 * must not deref va_list without a matching @va_arg, which a 3542 * well-typed program won't do. */ 3543 emit_insn(t, WASM_INSN_I32_CONST, 0); 3544 } else { 3545 ensure_stack_pointer(t); 3546 if (t->varcall_saved_sp_local == 0xffffffffu) 3547 t->varcall_saved_sp_local = add_wasm_local(t, WASM_VAL_I32); 3548 if (t->varcall_buf_local == 0xffffffffu) 3549 t->varcall_buf_local = add_wasm_local(t, WASM_VAL_I32); 3550 u32 buf_size = w->call_nvar * 8u; 3551 /* Save SP, allocate aligned buffer, set SP = buf. */ 3552 emit_insn(t, WASM_INSN_GLOBAL_GET, (i64)t->stack_pointer_global); 3553 emit_insn(t, WASM_INSN_LOCAL_TEE, (i64)t->varcall_saved_sp_local); 3554 emit_insn(t, WASM_INSN_I32_CONST, (i64)buf_size); 3555 emit_insn(t, WASM_INSN_I32_SUB, 0); 3556 emit_insn(t, WASM_INSN_I32_CONST, -(i64)8); 3557 emit_insn(t, WASM_INSN_I32_AND, 0); 3558 emit_insn(t, WASM_INSN_LOCAL_TEE, (i64)t->varcall_buf_local); 3559 emit_insn(t, WASM_INSN_GLOBAL_SET, (i64)t->stack_pointer_global); 3560 /* Pack each variadic arg at offset i*8. Store width is the 3561 * value's natural width (i32/i64/f32/f64); the unused high 3562 * bytes of i32/f32 slots are left as whatever __stack_pointer 3563 * pointed at, which @va_arg won't read for those slots. */ 3564 for (u32 v = 0; v < w->call_nvar; ++v) { 3565 KitCgTypeId vty = w->call_var_types[v]; 3566 WasmValType vvt = type_valtype(t, vty); 3567 WasmInsnKind store_op; 3568 u32 width; 3569 if (vvt == WASM_VAL_I64) { 3570 store_op = WASM_INSN_I64_STORE; 3571 width = 8u; 3572 } else if (vvt == WASM_VAL_F32) { 3573 store_op = WASM_INSN_F32_STORE; 3574 width = 4u; 3575 } else if (vvt == WASM_VAL_F64) { 3576 store_op = WASM_INSN_F64_STORE; 3577 width = 8u; 3578 } else { 3579 store_op = WASM_INSN_I32_STORE; 3580 width = 4u; 3581 } 3582 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->varcall_buf_local); 3583 if (w->call_var_kinds[v] == WOP_REG) { 3584 emit_push_operand_reg(t, w->call_var_regs[v]); 3585 } else if (w->call_var_kinds[v] == WOP_IMM) { 3586 if (vvt == WASM_VAL_F32 || vvt == WASM_VAL_F64) 3587 wfail(t, "wasm: float immediate variadic arg unsupported"); 3588 emit_push_imm(t, vvt, w->call_var_imms[v]); 3589 } else { 3590 wfail(t, "wasm: bad variadic-arg kind %u", 3591 w->call_var_kinds[v]); 3592 } 3593 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, store_op, 3594 memarg_align_log2(width, width), 3595 (u64)(v * 8u), 0u); 3596 } 3597 /* Push buf addr as hidden last arg. */ 3598 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->varcall_buf_local); 3599 } 3600 } 3601 if (w->op == WIR_CALL_INDIRECT) { 3602 /* Callee: push the i32 table index. */ 3603 emit_push_operand_reg(t, w->a); 3604 /* call_indirect / return_call_indirect both encode 3605 * (typeidx, tableidx). The encoder reads `imm` as typeidx and 3606 * `align` as tableidx. */ 3607 wasm_func_add_insn(t->c, t->module, t->cur_func, 3608 w->call_tail ? WASM_INSN_RETURN_CALL_INDIRECT 3609 : WASM_INSN_CALL_INDIRECT, 3610 w->imm); 3611 t->cur_func->insns[t->cur_func->ninsns - 1u].align = 0u; 3612 } else { 3613 u32 idx = sym_to_wasm_func(t, w->call_sym, NULL); 3614 emit_insn(t, w->call_tail ? WASM_INSN_RETURN_CALL : WASM_INSN_CALL, 3615 (i64)idx); 3616 } 3617 /* Tail calls never return to this function: the operand stack is 3618 * polymorphic-unreachable after return_call*, so writing dst or 3619 * restoring the variadic stack pointer would be dead and would 3620 * also corrupt stack typing. (Variadic tail calls are rejected 3621 * upstream, so the variadic SP-restore guard is defensive.) */ 3622 if (!w->call_tail) { 3623 if (w->dst != REG_NONE) { 3624 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3625 } 3626 /* Restore SP after variadic call so loop-resident variadic calls 3627 * don't accumulate stack usage. Done after stashing the return 3628 * value into its local. */ 3629 if (w->call_variadic && w->call_nvar) { 3630 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->varcall_saved_sp_local); 3631 emit_insn(t, WASM_INSN_GLOBAL_SET, (i64)t->stack_pointer_global); 3632 } 3633 } 3634 break; 3635 } 3636 case WIR_RET: { 3637 if (w->cgop == 1) { 3638 /* Aggregate sret return: memcpy w->addr -> *sret_param, then 3639 * void return. The sret pointer was the hidden first wasm param. 3640 * NOTE: this still uses a byte loop rather than memory.copy so 3641 * the produced module remains loadable by the kit runtime 3642 * before the wasm-core default-feature change lands. The path 3643 * will collapse to memory.copy once the core's default feature 3644 * set includes WASM_FEATURE_BULK_MEMORY (subagent A). */ 3645 if (t->sret_param_local == 0xffffffffu) 3646 wfail(t, "wasm: sret return without hidden sret param"); 3647 for (u32 n = 0; n < w->agg.size; ++n) { 3648 /* Push destination address (sret_ptr) onto stack. The memarg 3649 * offset on the i32.store8 carries the per-byte offset. */ 3650 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->sret_param_local); 3651 /* Load src byte at (w->addr + n). */ 3652 Operand src = w->addr; 3653 if (src.kind == OPK_INDIRECT) 3654 src.v.ind.ofs += (i32)n; 3655 else if (src.kind == OPK_GLOBAL) 3656 src.v.global.addend += n; 3657 uint64_t src_off; 3658 emit_addr_operand(t, src, &src_off); 3659 if (src.kind == OPK_LOCAL) src_off += n; 3660 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, 3661 WASM_INSN_I32_LOAD8_U, 0, src_off, 0); 3662 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, 3663 WASM_INSN_I32_STORE8, 0, n, 0); 3664 } 3665 } else if (w->dst != REG_NONE) 3666 emit_push_operand_reg(t, w->dst); 3667 else if (w->imm_kind == 1) { 3668 WasmValType vt = type_valtype(t, w->type); 3669 emit_push_imm(t, vt, w->imm_a); 3670 } 3671 if (t->has_stack_frame) { 3672 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->frame_saved_sp_local); 3673 emit_insn(t, WASM_INSN_GLOBAL_SET, (i64)t->stack_pointer_global); 3674 } 3675 emit_insn(t, WASM_INSN_RETURN, 0); 3676 break; 3677 } 3678 case WIR_UNREACHABLE: { 3679 emit_insn(t, WASM_INSN_UNREACHABLE, 0); 3680 break; 3681 } 3682 case WIR_LOAD_LOCAL: { 3683 u32 wli = (u32)w->imm; 3684 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)wli); 3685 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3686 break; 3687 } 3688 case WIR_STORE_LOCAL: { 3689 u32 wli = (u32)w->imm; 3690 if (w->imm_kind == 1) { 3691 WasmValType vt = type_valtype(t, w->type); 3692 emit_push_imm(t, vt, w->imm_a); 3693 } else { 3694 emit_push_operand_reg(t, w->a); 3695 } 3696 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)wli); 3697 break; 3698 } 3699 case WIR_LOAD_MEM: { 3700 emit_load_addr(t, w->addr, w->type, w->mem); 3701 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3702 break; 3703 } 3704 case WIR_STORE_MEM: { 3705 Operand src; 3706 memset(&src, 0, sizeof src); 3707 src.kind = w->imm_kind == WOP_IMM ? OPK_IMM : OPK_REG; 3708 src.type = w->type; 3709 if (src.kind == OPK_IMM) 3710 src.v.imm = w->imm_a; 3711 else 3712 src.v.reg = w->a; 3713 emit_store_addr(t, w->addr, w->type, src, w->mem, w->imm_kind, w->imm_a, 3714 w->a); 3715 break; 3716 } 3717 case WIR_ADDR_OF: { 3718 uint64_t offset; 3719 emit_addr_operand(t, w->addr, &offset); 3720 if (offset) { 3721 emit_insn(t, WASM_INSN_I32_CONST, (i64)offset); 3722 emit_insn(t, WASM_INSN_I32_ADD, 0); 3723 } 3724 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3725 break; 3726 } 3727 case WIR_ALLOCA: { 3728 u32 align = (u32)w->imm; 3729 emit_insn(t, WASM_INSN_GLOBAL_GET, (i64)t->stack_pointer_global); 3730 emit_push_operand(t, w->imm_kind, w->imm_a, w->a, 3731 w->type2 ? w->type2 : builtin_id(KIT_CG_BUILTIN_I32)); 3732 if (w->type2 && type_valtype(t, w->type2) == WASM_VAL_I64) 3733 emit_insn(t, WASM_INSN_I32_WRAP_I64, 0); 3734 emit_insn(t, WASM_INSN_I32_SUB, 0); 3735 if (align > 1u) { 3736 emit_insn(t, WASM_INSN_I32_CONST, -(i64)align); 3737 emit_insn(t, WASM_INSN_I32_AND, 0); 3738 } 3739 emit_insn(t, WASM_INSN_LOCAL_TEE, 3740 (i64)reg_local(t, w->dst, w->type, (RegClass)w->cls)); 3741 emit_insn(t, WASM_INSN_GLOBAL_SET, (i64)t->stack_pointer_global); 3742 break; 3743 } 3744 case WIR_COPY_BYTES: { 3745 /* memory.copy: stack = dst_addr, src_addr, n; both memidx fields = 0. 3746 */ 3747 Operand src_addr; 3748 if (w->imm_kind != WOP_REG) 3749 wfail(t, "wasm: copy_bytes source must be a register pointer"); 3750 memset(&src_addr, 0, sizeof src_addr); 3751 src_addr.kind = OPK_INDIRECT; 3752 src_addr.type = w->addr.type; 3753 src_addr.v.ind.base = w->a; 3754 src_addr.v.ind.index = REG_NONE; 3755 src_addr.v.ind.log2_scale = 0; 3756 src_addr.v.ind.ofs = 0; 3757 if (w->agg.size == 0) break; 3758 emit_push_addr_value(t, w->addr); 3759 emit_push_addr_value(t, src_addr); 3760 emit_insn(t, WASM_INSN_I32_CONST, (i64)(uint32_t)w->agg.size); 3761 wasm_func_add_insn(t->c, t->module, t->cur_func, WASM_INSN_MEMORY_COPY, 3762 0); 3763 /* dst memidx = 0, src memidx = 0 (kit-cc single-memory module). */ 3764 t->cur_func->insns[t->cur_func->ninsns - 1u].memidx = 0; 3765 t->cur_func->insns[t->cur_func->ninsns - 1u].aux_idx = 0; 3766 break; 3767 } 3768 case WIR_SET_BYTES: { 3769 /* memory.fill: stack = dst_addr, value_i32, n; memidx = 0. */ 3770 if (w->imm_kind != WOP_IMM) 3771 wfail(t, "wasm: set_bytes value must be immediate in v1"); 3772 if (w->agg.size == 0) break; 3773 emit_push_addr_value(t, w->addr); 3774 emit_insn(t, WASM_INSN_I32_CONST, (i64)(w->imm_a & 0xff)); 3775 emit_insn(t, WASM_INSN_I32_CONST, (i64)(uint32_t)w->agg.size); 3776 wasm_func_add_insn(t->c, t->module, t->cur_func, WASM_INSN_MEMORY_FILL, 3777 0); 3778 t->cur_func->insns[t->cur_func->ninsns - 1u].memidx = 0; 3779 break; 3780 } 3781 case WIR_ATOMIC_LOAD: { 3782 WasmInsnKind k = atomic_load_kind_for(t, w->type, w->mem); 3783 u32 width = wasm_mem_width((uint8_t)k); 3784 emit_push_operand_reg(t, w->a); 3785 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, k, 3786 memarg_align_log2(w->mem.align, width), 0, 0); 3787 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3788 break; 3789 } 3790 case WIR_ATOMIC_STORE: { 3791 WasmInsnKind k = atomic_store_kind_for(t, w->type, w->mem); 3792 u32 width = wasm_mem_width((uint8_t)k); 3793 emit_push_operand_reg(t, w->a); 3794 emit_push_operand(t, w->imm_kind_b, w->imm_b, w->b, w->type); 3795 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, k, 3796 memarg_align_log2(w->mem.align, width), 0, 0); 3797 break; 3798 } 3799 case WIR_ATOMIC_RMW: { 3800 if ((KitCgAtomicOp)w->cgop == KIT_CG_ATOMIC_NAND) { 3801 /* wasm-threads has no atomic.rmw.nand. Expand to a cmpxchg retry 3802 * loop computing desired = ~(old & val): 3803 * loop 3804 * old = atomic.load(addr) ; tee into old_local 3805 * desired = (old & val) ^ -1 3806 * got = atomic.rmw.cmpxchg(addr, old, desired) 3807 * br_if loop (got != old) ; lost the race, retry 3808 * end 3809 * dst = old_local ; fetch returns prior value 3810 */ 3811 WasmValType vt = type_valtype(t, w->type); 3812 WasmInsnKind load_k = atomic_load_kind_for(t, w->type, w->mem); 3813 WasmInsnKind cas_k = atomic_cmpxchg_kind_for(t, w->type, w->mem); 3814 u32 load_w = wasm_mem_width((uint8_t)load_k); 3815 u32 cas_w = wasm_mem_width((uint8_t)cas_k); 3816 int is64 = (vt == WASM_VAL_I64); 3817 u32 old_local = add_wasm_local(t, vt); 3818 emit_insn(t, WASM_INSN_LOOP, 0); 3819 /* addr (cmpxchg arg0) */ 3820 emit_push_operand_reg(t, w->a); 3821 /* expected = atomic.load(addr), tee into old_local */ 3822 emit_push_operand_reg(t, w->a); 3823 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, load_k, 3824 memarg_align_log2(w->mem.align, load_w), 0, 0); 3825 emit_insn(t, WASM_INSN_LOCAL_TEE, (i64)old_local); 3826 /* desired = (old & val) ^ -1 */ 3827 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)old_local); 3828 emit_push_operand(t, w->imm_kind_b, w->imm_b, w->b, w->type); 3829 emit_insn(t, is64 ? WASM_INSN_I64_AND : WASM_INSN_I32_AND, 0); 3830 emit_push_imm(t, vt, -1); 3831 emit_insn(t, is64 ? WASM_INSN_I64_XOR : WASM_INSN_I32_XOR, 0); 3832 /* cmpxchg -> value previously in memory */ 3833 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, cas_k, 3834 memarg_align_log2(w->mem.align, cas_w), 0, 0); 3835 /* retry if memory had changed (got != expected) */ 3836 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)old_local); 3837 emit_insn(t, is64 ? WASM_INSN_I64_NE : WASM_INSN_I32_NE, 0); 3838 emit_insn(t, WASM_INSN_BR_IF, 0); /* 0 = innermost loop */ 3839 emit_insn(t, WASM_INSN_END, 0); 3840 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)old_local); 3841 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3842 break; 3843 } 3844 WasmInsnKind k = 3845 atomic_rmw_kind_for(t, (KitCgAtomicOp)w->cgop, w->type, w->mem); 3846 u32 width = wasm_mem_width((uint8_t)k); 3847 emit_push_operand_reg(t, w->a); 3848 emit_push_operand(t, w->imm_kind_b, w->imm_b, w->b, w->type); 3849 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, k, 3850 memarg_align_log2(w->mem.align, width), 0, 0); 3851 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3852 break; 3853 } 3854 case WIR_ATOMIC_CAS: { 3855 WasmInsnKind k = atomic_cmpxchg_kind_for(t, w->type, w->mem); 3856 WasmValType vt = type_valtype(t, w->type); 3857 u32 width = wasm_mem_width((uint8_t)k); 3858 /* Save expected into a fresh wasm local before consuming inputs. CG 3859 * may reuse one of (addr, expected, desired) regs for prior or ok; 3860 * reg_local() for w->dst/w->dst2 would then rebind that reg's local 3861 * mid-stream, and re-pushing expected via the (now-stale) mapping 3862 * would read an uninitialized local. The temp sidesteps that. */ 3863 u32 saved_expected = add_wasm_local(t, vt); 3864 /* push addr; expected (tee into saved-expected, leaves on stack); 3865 * desired; cmpxchg -> prior on stack. */ 3866 emit_push_operand_reg(t, w->a); 3867 emit_push_operand(t, w->imm_kind_b, w->imm_b, w->b, w->type); 3868 emit_insn(t, WASM_INSN_LOCAL_TEE, (i64)saved_expected); 3869 emit_push_operand(t, w->imm_kind_c, w->imm_c, w->op_c, w->type); 3870 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, k, 3871 memarg_align_log2(w->mem.align, width), 0, 0); 3872 /* All input regs have been consumed; safe to rebind. */ 3873 u32 prior_local = reg_local(t, w->dst, w->type, (RegClass)w->cls); 3874 emit_insn(t, WASM_INSN_LOCAL_TEE, (i64)prior_local); 3875 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)saved_expected); 3876 emit_insn(t, vt == WASM_VAL_I64 ? WASM_INSN_I64_EQ : WASM_INSN_I32_EQ, 3877 0); 3878 emit_local_set(t, w->dst2, 3879 w->type2 ? w->type2 : builtin_id(KIT_CG_BUILTIN_BOOL), 3880 RC_INT); 3881 break; 3882 } 3883 case WIR_FENCE: { 3884 emit_insn(t, WASM_INSN_ATOMIC_FENCE, 0); 3885 break; 3886 } 3887 case WIR_JUMP: { 3888 u32 d = br_to_label(L, w->labels[0]); 3889 emit_insn(t, WASM_INSN_BR, (i64)d); 3890 break; 3891 } 3892 case WIR_CMP_BRANCH: { 3893 CmpOp cop = (CmpOp)w->cgop; 3894 if (cop >= CMP_OEQ_F) { 3895 emit_fp_cmp(t, cop, w, w->type); 3896 } else { 3897 push_cmp_operands(t, w, w->type); 3898 emit_insn(t, cmp_kind(t, cop, type_valtype(t, w->type)), 0); 3899 } 3900 u32 d = br_to_label(L, w->labels[0]); 3901 emit_insn(t, WASM_INSN_BR_IF, (i64)d); 3902 break; 3903 } 3904 case WIR_SWITCH: { 3905 emit_switch_br_table(t, L, w); 3906 break; 3907 } 3908 case WIR_SCOPE_OPEN: { 3909 if (L->nstack >= 1024u) 3910 wfail(t, "wasm: scope nesting too deep (max 1024)"); 3911 LoweringScope* s = &L->stack[L->nstack++]; 3912 s->id = w->scope_id; 3913 s->kind = w->cgop; 3914 if (w->cgop == SCOPE_LOOP) { 3915 /* (block (loop ...)); inside the body: 3916 * br to loop top (cur_depth+1) = continue 3917 * br to past block (cur_depth) = break (one more level out) */ 3918 emit_insn(t, WASM_INSN_BLOCK, 0); 3919 L->cur_depth++; 3920 s->break_depth = L->cur_depth; /* `br N` lands AFTER block */ 3921 emit_insn(t, WASM_INSN_LOOP, 0); 3922 L->cur_depth++; 3923 s->cont_depth = L->cur_depth; /* `br N` lands at LOOP top */ 3924 } else if (w->cgop == SCOPE_BLOCK) { 3925 emit_insn(t, WASM_INSN_BLOCK, 0); 3926 L->cur_depth++; 3927 s->break_depth = L->cur_depth; 3928 s->cont_depth = L->cur_depth; /* unused */ 3929 } else { 3930 wfail(t, "wasm: unknown scope kind %d", (int)w->cgop); 3931 } 3932 break; 3933 } 3934 case WIR_SCOPE_CLOSE: { 3935 if (L->nstack == 0) wfail(t, "wasm: scope_close without open scope"); 3936 LoweringScope* s = &L->stack[L->nstack - 1u]; 3937 if (s->kind == SCOPE_LOOP) { 3938 emit_insn(t, WASM_INSN_END, 0); /* close loop */ 3939 L->cur_depth--; 3940 emit_insn(t, WASM_INSN_END, 0); /* close outer block */ 3941 L->cur_depth--; 3942 } else { 3943 emit_insn(t, WASM_INSN_END, 0); 3944 L->cur_depth--; 3945 } 3946 L->nstack--; 3947 break; 3948 } 3949 case WIR_VA_START: { 3950 /* *ap_addr = va_ptr_param_local */ 3951 emit_push_addr_value(t, w->addr); 3952 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->va_ptr_param_local); 3953 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, 3954 WASM_INSN_I32_STORE, 2u, 0u, 0u); 3955 break; 3956 } 3957 case WIR_VA_ARG: { 3958 if (t->va_arg_tmp_addr_local == 0xffffffffu) 3959 t->va_arg_tmp_addr_local = add_wasm_local(t, WASM_VAL_I32); 3960 WasmValType vt = type_valtype(t, w->type); 3961 WasmInsnKind load_op; 3962 u32 width; 3963 if (vt == WASM_VAL_I64) { 3964 load_op = WASM_INSN_I64_LOAD; 3965 width = 8u; 3966 } else if (vt == WASM_VAL_F32) { 3967 load_op = WASM_INSN_F32_LOAD; 3968 width = 4u; 3969 } else if (vt == WASM_VAL_F64) { 3970 load_op = WASM_INSN_F64_LOAD; 3971 width = 8u; 3972 } else { 3973 load_op = WASM_INSN_I32_LOAD; 3974 width = 4u; 3975 } 3976 /* Load T from current *ap and stash into dst. */ 3977 emit_push_addr_value(t, w->addr); 3978 emit_insn(t, WASM_INSN_LOCAL_TEE, (i64)t->va_arg_tmp_addr_local); 3979 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, WASM_INSN_I32_LOAD, 3980 2u, 0u, 0u); 3981 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, load_op, 3982 memarg_align_log2(width, width), 0u, 0u); 3983 emit_local_set(t, w->dst, w->type, (RegClass)w->cls); 3984 /* Advance: *ap = *ap + 8. */ 3985 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->va_arg_tmp_addr_local); 3986 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->va_arg_tmp_addr_local); 3987 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, WASM_INSN_I32_LOAD, 3988 2u, 0u, 0u); 3989 emit_insn(t, WASM_INSN_I32_CONST, (i64)8); 3990 emit_insn(t, WASM_INSN_I32_ADD, 0); 3991 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, 3992 WASM_INSN_I32_STORE, 2u, 0u, 0u); 3993 break; 3994 } 3995 case WIR_VA_COPY: { 3996 /* *dst_ap = *src_ap (single i32). */ 3997 emit_push_addr_value(t, w->addr); 3998 emit_push_addr_value(t, w->call_sret_addr); 3999 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, WASM_INSN_I32_LOAD, 4000 2u, 0u, 0u); 4001 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, 4002 WASM_INSN_I32_STORE, 2u, 0u, 0u); 4003 break; 4004 } 4005 case WIR_INTRINSIC: { 4006 emit_intrinsic(t, w); 4007 break; 4008 } 4009 case WIR_ASM_BLOCK: { 4010 Heap* h_blk = t->c->ctx->heap; 4011 u32 nin = w->asm_nin; 4012 u32 nout = w->asm_nout; 4013 u32* in_locals = NULL; 4014 u32* out_locals = NULL; 4015 if (nin) { 4016 in_locals = 4017 (u32*)h_blk->alloc(h_blk, sizeof(u32) * nin, _Alignof(u32)); 4018 if (!in_locals) wfail(t, "wasm: out of memory"); 4019 /* defer per-input allocation until after output locals are known 4020 * so numeric tieback ("+r", "0".."9") can share. */ 4021 } 4022 if (nout) { 4023 out_locals = 4024 (u32*)h_blk->alloc(h_blk, sizeof(u32) * nout, _Alignof(u32)); 4025 if (!out_locals) wfail(t, "wasm: out of memory"); 4026 for (u32 i = 0; i < nout; ++i) 4027 out_locals[i] = 4028 add_wasm_local(t, valtype_for_type(t, w->asm_out_types[i])); 4029 } 4030 if (nin) { 4031 for (u32 i = 0; i < nin; ++i) { 4032 i32 share = w->asm_in_share_out[i]; 4033 if (share >= 0 && (u32)share < nout) { 4034 in_locals[i] = out_locals[share]; 4035 } else { 4036 in_locals[i] = 4037 add_wasm_local(t, valtype_for_type(t, w->asm_in_types[i])); 4038 } 4039 } 4040 } 4041 /* Input materialization: push source operand, then for OPK_INDIRECT 4042 * inputs ("m" constraint with displacement) splice in 4043 * `i32.const ofs; i32.add` so the input local holds base+ofs. 4044 * Finally local.set into the input's local (which may be a shared 4045 * output local). */ 4046 for (u32 i = 0; i < nin; ++i) { 4047 emit_push_operand(t, w->asm_in_kinds[i], w->asm_in_imms[i], 4048 w->asm_in_regs[i], w->asm_in_types[i]); 4049 if (w->asm_in_kinds[i] == WOP_REG && w->asm_in_imms[i] != 0) { 4050 emit_push_imm(t, WASM_VAL_I32, w->asm_in_imms[i]); 4051 emit_insn(t, WASM_INSN_I32_ADD, 0); 4052 } 4053 emit_insn(t, WASM_INSN_LOCAL_SET, (i64)in_locals[i]); 4054 } 4055 /* Splice body, remapping local indices < nin+nout to the actual 4056 * wasm local table. */ 4057 for (u32 i = 0; i < w->raw_ninsns; ++i) { 4058 WasmInsn in = w->raw_insns[i]; 4059 if (in.kind == WASM_INSN_LOCAL_GET || 4060 in.kind == WASM_INSN_LOCAL_SET || 4061 in.kind == WASM_INSN_LOCAL_TEE) { 4062 if (in.imm >= 0 && (u64)in.imm < (u64)nin) 4063 in.imm = (i64)in_locals[in.imm]; 4064 else if (in.imm >= (i64)nin && (u64)in.imm < (u64)(nin + nout)) 4065 in.imm = (i64)out_locals[in.imm - (i64)nin]; 4066 } 4067 t->module->current_loc = in.loc; 4068 wasm_func_add_insn(t->c, t->module, t->cur_func, 4069 (WasmInsnKind)in.kind, in.imm); 4070 t->cur_func->insns[t->cur_func->ninsns - 1u] = in; 4071 } 4072 /* Output extraction: copy each output local into the destination 4073 * Reg's wasm local. */ 4074 for (u32 i = 0; i < nout; ++i) { 4075 WasmValType ovt = valtype_for_type(t, w->asm_out_types[i]); 4076 RegClass cls = 4077 (ovt == WASM_VAL_F32 || ovt == WASM_VAL_F64) ? RC_FP : RC_INT; 4078 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)out_locals[i]); 4079 emit_local_set(t, w->asm_out_regs[i], w->asm_out_types[i], cls); 4080 } 4081 if (in_locals) h_blk->free(h_blk, in_locals, sizeof(u32) * nin); 4082 if (out_locals) h_blk->free(h_blk, out_locals, sizeof(u32) * nout); 4083 break; 4084 } 4085 case WIR_LABEL: { 4086 break; 4087 } 4088 } 4089 } 4090 } 4091 4092 static void linearize(WTarget* t) { 4093 LoweringState L; 4094 /* Rewrite WIR so every free label is bound to a synthetic SCOPE_BLOCK 4095 * (forward goto) or SCOPE_LOOP (backward goto). After this, the only 4096 * remaining free labels are switch-island participants, which the 4097 * try_linearize_switch_island fast path inside linearize_range handles. */ 4098 wasm_structurize(t); 4099 memset(&L, 0, sizeof L); 4100 L.t = t; 4101 4102 if (t->has_stack_frame) { 4103 t->frame_saved_sp_local = add_wasm_local(t, WASM_VAL_I32); 4104 t->frame_base_local = add_wasm_local(t, WASM_VAL_I32); 4105 emit_insn(t, WASM_INSN_GLOBAL_GET, (i64)t->stack_pointer_global); 4106 emit_insn(t, WASM_INSN_LOCAL_TEE, (i64)t->frame_saved_sp_local); 4107 if (t->frame_size) { 4108 emit_insn(t, WASM_INSN_I32_CONST, 4109 (i64)align_to_u32(t->frame_size, t->frame_align)); 4110 emit_insn(t, WASM_INSN_I32_SUB, 0); 4111 } 4112 emit_insn(t, WASM_INSN_LOCAL_TEE, (i64)t->frame_base_local); 4113 emit_insn(t, WASM_INSN_GLOBAL_SET, (i64)t->stack_pointer_global); 4114 } 4115 4116 /* Byval copy-in: for each ABI_ARG_INDIRECT param, copy the aggregate from 4117 * the caller's pointer into the callee's stack-frame buffer so callee 4118 * mutations are isolated (wasm32 BasicCABI). Byte-by-byte for v1; can be 4119 * promoted to wider chunks later. */ 4120 for (u32 i = 0; i < t->nbyval_copies; ++i) { 4121 const WByvalCopy* bc = &t->byval_copies[i]; 4122 const WSlot* s = &t->slots[bc->dst_slot_id]; 4123 for (u32 n = 0; n < s->size; ++n) { 4124 /* dst: frame_base */ 4125 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->frame_base_local); 4126 /* src byte: i32.load8_u (ptr_local) offset=n */ 4127 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)bc->ptr_wasm_local); 4128 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, 4129 WASM_INSN_I32_LOAD8_U, 0, n, 0); 4130 wasm_func_add_mem_insn(t->c, t->module, t->cur_func, WASM_INSN_I32_STORE8, 4131 0, s->frame_offset + n, 0); 4132 } 4133 } 4134 4135 linearize_range(t, &L, 0, t->nwir); 4136 if (L.nstack != 0) 4137 wfail(t, "wasm: function ended with %u open scopes", L.nstack); 4138 /* If the body's last real WIR is a terminator (return / br / switch / 4139 * unreachable / tail call) buried inside nested blocks, kit's wasm 4140 * validator does not propagate the unreachable flag across enclosing 4141 * ENDs and would complain about a missing result at the implicit 4142 * function exit. Emit an explicit trailing `unreachable` so control[0] 4143 * is marked unreachable independent of the validator's propagation 4144 * rules. Also: when the terminator is a tail call we've already 4145 * emitted the linear-stack SP restore inline (see WIR_CALL handler), 4146 * and the function never reaches the post-body epilogue at runtime — 4147 * skip it to avoid emitting dead GLOBAL_GET/GLOBAL_SET pairs after the 4148 * return_call. */ 4149 int last_is_tail_call = 0; 4150 { 4151 int needs_unreachable = 0; 4152 for (u32 i = t->nwir; i > 0; --i) { 4153 WIR* w = &t->wir[i - 1u]; 4154 if (w->op == WIR_LABEL || w->op == WIR_SCOPE_OPEN || 4155 w->op == WIR_SCOPE_CLOSE) 4156 continue; 4157 if (w->op == WIR_RET || w->op == WIR_JUMP || w->op == WIR_SWITCH || 4158 w->op == WIR_UNREACHABLE) { 4159 needs_unreachable = 1; 4160 } else if ((w->op == WIR_CALL || w->op == WIR_CALL_INDIRECT) && 4161 w->call_tail) { 4162 needs_unreachable = 1; 4163 last_is_tail_call = 1; 4164 } 4165 break; 4166 } 4167 if (needs_unreachable) emit_insn(t, WASM_INSN_UNREACHABLE, 0); 4168 } 4169 if (t->has_stack_frame && !t->dead && !last_is_tail_call) { 4170 emit_insn(t, WASM_INSN_LOCAL_GET, (i64)t->frame_saved_sp_local); 4171 emit_insn(t, WASM_INSN_GLOBAL_SET, (i64)t->stack_pointer_global); 4172 } 4173 } 4174 4175 void wasm_func_end(CGTarget* tg) { 4176 WTarget* t = (WTarget*)tg; 4177 if (!t->cur_func) return; 4178 /* Linearize WIR into the WasmFunc body. */ 4179 linearize(t); 4180 t->cur_fn_desc = NULL; 4181 t->cur_func = NULL; 4182 /* Free per-function WIR arg arrays. */ 4183 Heap* h = t->c->ctx->heap; 4184 for (u32 i = 0; i < t->nwir; ++i) { 4185 WIR* w = &t->wir[i]; 4186 if (w->call_args) { 4187 h->free(h, w->call_args, sizeof(Reg) * w->call_narg); 4188 h->free(h, w->call_arg_imms, sizeof(i64) * w->call_narg); 4189 h->free(h, w->call_arg_kinds, w->call_narg); 4190 h->free(h, w->call_arg_types, sizeof(KitCgTypeId) * w->call_narg); 4191 if (w->call_arg_addrs) 4192 h->free(h, w->call_arg_addrs, sizeof(Operand) * w->call_narg); 4193 w->call_args = NULL; 4194 w->call_arg_imms = NULL; 4195 w->call_arg_kinds = NULL; 4196 w->call_arg_types = NULL; 4197 w->call_arg_addrs = NULL; 4198 } 4199 if (w->switch_cases) { 4200 h->free(h, w->switch_cases, sizeof(CGSwitchCase) * w->switch_ncases); 4201 w->switch_cases = NULL; 4202 w->switch_ncases = 0; 4203 } 4204 if (w->raw_insns) { 4205 h->free(h, w->raw_insns, sizeof(WasmInsn) * w->raw_ninsns); 4206 w->raw_insns = NULL; 4207 w->raw_ninsns = 0; 4208 } 4209 if (w->asm_in_kinds) { 4210 h->free(h, w->asm_in_kinds, w->asm_nin); 4211 h->free(h, w->asm_in_imms, sizeof(i64) * w->asm_nin); 4212 h->free(h, w->asm_in_regs, sizeof(Reg) * w->asm_nin); 4213 h->free(h, w->asm_in_types, sizeof(KitCgTypeId) * w->asm_nin); 4214 h->free(h, w->asm_in_share_out, sizeof(i32) * w->asm_nin); 4215 w->asm_in_kinds = NULL; 4216 w->asm_in_imms = NULL; 4217 w->asm_in_regs = NULL; 4218 w->asm_in_types = NULL; 4219 w->asm_in_share_out = NULL; 4220 w->asm_nin = 0; 4221 } 4222 if (w->asm_out_regs) { 4223 h->free(h, w->asm_out_regs, sizeof(Reg) * w->asm_nout); 4224 h->free(h, w->asm_out_types, sizeof(KitCgTypeId) * w->asm_nout); 4225 w->asm_out_regs = NULL; 4226 w->asm_out_types = NULL; 4227 w->asm_nout = 0; 4228 } 4229 } 4230 t->nwir = 0; 4231 } 4232 4233 /* CGTarget alias hook. cg/session.c has already shared (section_id, value) 4234 * between alias_sym and target_sym at the ObjBuilder layer, which covers 4235 * data aliases (apply_sym_fixups reads section_id/value directly off the 4236 * ObjSym). Function aliases need extra wiring: the wasm function payload 4237 * lives in a target-side side-table (sym_to_func), not in obj sections, 4238 * and the alias's external linker name needs its own WasmExport entry. */ 4239 void wasm_alias(CGTarget* tg, ObjSymId alias_sym, ObjSymId target_sym, 4240 KitCgTypeId type) { 4241 WTarget* t = (WTarget*)tg; 4242 const ObjSym* tsym; 4243 const ObjSym* asym; 4244 (void)type; 4245 if (t->dead) return; 4246 /* Aliases are processed before any function body is emitted, so the module 4247 * may not exist yet; sym_to_wasm_func / wasm_add_export both need it. */ 4248 ensure_module(t); 4249 tsym = obj_symbol_get(t->obj, target_sym); 4250 if (!tsym) wfail(t, "wasm: alias against unknown target symbol"); 4251 if (tsym->kind == SK_FUNC) { 4252 /* Mirror sym_to_func so any later WIR_CALL against the alias resolves 4253 * to the target's wasm function index. */ 4254 u32 idx = sym_to_wasm_func(t, target_sym, NULL); 4255 if (alias_sym >= t->sym_to_func_cap) { 4256 Heap* h = t->c->ctx->heap; 4257 u32 nc = t->sym_to_func_cap ? t->sym_to_func_cap : 16u; 4258 while (nc <= alias_sym) nc *= 2u; 4259 u32* p = 4260 (u32*)h->realloc(h, t->sym_to_func, sizeof(u32) * t->sym_to_func_cap, 4261 sizeof(u32) * nc, _Alignof(u32)); 4262 if (!p) wfail(t, "wasm: out of memory"); 4263 for (u32 i = t->sym_to_func_cap; i < nc; ++i) p[i] = 0; 4264 t->sym_to_func = p; 4265 t->sym_to_func_cap = nc; 4266 } 4267 t->sym_to_func[alias_sym] = idx + 1u; 4268 /* Export under the alias's linker name when non-local. Mirrors the 4269 * export logic at the end of wasm_func_begin. */ 4270 asym = obj_symbol_get(t->obj, alias_sym); 4271 if (asym && asym->bind != SB_LOCAL) { 4272 const char* name = pool_sym_cstr(t->c->global, asym->name, NULL); 4273 if (name && *name) { 4274 Heap* h = t->c->ctx->heap; 4275 size_t nlen = strlen(name); 4276 char* exp_name = (char*)h->alloc(h, nlen + 1u, 1); 4277 WasmExport* e; 4278 memcpy(exp_name, name, nlen + 1u); 4279 e = wasm_add_export(t->c, t->module); 4280 e->name = exp_name; 4281 e->kind = 0; /* function export */ 4282 e->index = idx; 4283 } 4284 } 4285 return; 4286 } 4287 if (tsym->kind == SK_OBJ) { 4288 /* Data aliases: obj_symbol_define has already shared (section_id, 4289 * value), and apply_sym_fixups reads those directly. Nothing more 4290 * to do here — but diagnose if the target hasn't been defined yet 4291 * (it would produce a bogus address at finalize). */ 4292 if (tsym->section_id == OBJ_SEC_NONE) { 4293 wfail(t, "wasm: data alias against undefined target symbol"); 4294 } 4295 return; 4296 } 4297 wfail(t, "wasm target: alias of symbol kind %u not yet supported", 4298 (unsigned)tsym->kind); 4299 } 4300 4301 /* Assign each SF_ALLOC (non-EXEC) ObjBuilder section a compact base in 4302 * linear memory. Walks sections in id order so the layout is deterministic. 4303 * Each base is aligned to the section's required alignment and lives in 4304 * t->section_base[sid]. Returns the next unused offset (end of data image). */ 4305 static u32 assign_section_bases(WTarget* t) { 4306 Heap* h = t->c->ctx->heap; 4307 u32 nsec = obj_section_count(t->obj); 4308 if (nsec > t->section_base_cap) { 4309 u32 nc = t->section_base_cap ? t->section_base_cap : 4u; 4310 while (nc < nsec) nc *= 2u; 4311 void* p = h->realloc(h, t->section_base, sizeof(u32) * t->section_base_cap, 4312 sizeof(u32) * nc, _Alignof(u32)); 4313 if (!p) wfail(t, "wasm: out of memory"); 4314 t->section_base = (u32*)p; 4315 for (u32 i = t->section_base_cap; i < nc; ++i) 4316 t->section_base[i] = 0xFFFFFFFFu; 4317 t->section_base_cap = nc; 4318 } 4319 u32 next = WASM_DATA_NULL_GUARD; 4320 for (ObjSecId sid = 0; sid < nsec; ++sid) t->section_base[sid] = 0xFFFFFFFFu; 4321 for (ObjSecId sid = 1; sid < nsec; ++sid) { 4322 const Section* s = obj_section_get(t->obj, sid); 4323 if (!s || s->removed || !(s->flags & SF_ALLOC) || s->flags & SF_EXEC) 4324 continue; 4325 u32 align = s->align ? s->align : 1u; 4326 if (align < 1u) align = 1u; 4327 next = align_to_u32(next, align); 4328 t->section_base[sid] = next; 4329 u32 sz = (s->kind == SEC_BSS || s->sem == SSEM_NOBITS) 4330 ? s->bss_size 4331 : (u32)s->bytes.total; 4332 if (sz > UINT32_MAX - next) wfail(t, "wasm: linear memory image too large"); 4333 next += sz; 4334 } 4335 return next; 4336 } 4337 4338 /* Patch a single i32/i64 value into the linear-memory image buffer at 4339 * `offset`. Wasm is little-endian. */ 4340 static void mem_write_le(u8* buf, u32 offset, u64 value, u32 width) { 4341 for (u32 i = 0; i < width; ++i) buf[offset + i] = (u8)(value >> (i * 8u)); 4342 } 4343 4344 /* Allocate aligned BSS-style space in linear memory for every SK_COMMON 4345 * symbol the ObjBuilder knows about. Called after assign_section_bases so 4346 * common storage sits past the last SF_ALLOC section. Records the assigned 4347 * base in t->common_base[id]; returns the next free cursor. */ 4348 static u32 assign_common_bases(WTarget* t, u32 next) { 4349 Heap* h = t->c->ctx->heap; 4350 ObjSymIter* it = obj_symiter_new(t->obj); 4351 ObjSymEntry e; 4352 while (obj_symiter_next(it, &e)) { 4353 const ObjSym* os = e.sym; 4354 if (!os || os->removed) continue; 4355 if (os->kind != SK_COMMON) continue; 4356 u32 align = os->common_align ? (u32)os->common_align : 1u; 4357 if (align < 1u) align = 1u; 4358 if (e.id >= t->common_base_cap) { 4359 u32 nc = t->common_base_cap ? t->common_base_cap : 8u; 4360 while (nc <= e.id) nc *= 2u; 4361 void* p = h->realloc(h, t->common_base, sizeof(u32) * t->common_base_cap, 4362 sizeof(u32) * nc, _Alignof(u32)); 4363 if (!p) wfail(t, "wasm: out of memory"); 4364 t->common_base = (u32*)p; 4365 for (u32 i = t->common_base_cap; i < nc; ++i) 4366 t->common_base[i] = 0xFFFFFFFFu; 4367 t->common_base_cap = nc; 4368 } 4369 next = align_to_u32(next, align); 4370 t->common_base[e.id] = next; 4371 u32 sz = (u32)os->size; 4372 if (sz > UINT32_MAX - next) 4373 wfail(t, "wasm: linear memory image too large (common symbols)"); 4374 next += sz; 4375 } 4376 obj_symiter_free(it); 4377 return next; 4378 } 4379 4380 /* Resolve `sym + addend` to a linear-memory address. Handles both 4381 * section-defined symbols (via t->section_base[sym->section_id]) and 4382 * common symbols (via t->common_base[sym]). Returns 0 and sets *ok=0 if 4383 * the symbol can't be resolved here; callers diagnose. */ 4384 static u32 wasm_sym_linear_addr(WTarget* t, ObjSymId sym, i64 addend, int* ok) { 4385 const ObjSym* os = obj_symbol_get(t->obj, sym); 4386 *ok = 0; 4387 if (!os) return 0; 4388 if (os->kind == SK_COMMON) { 4389 if (sym >= t->common_base_cap || t->common_base[sym] == 0xFFFFFFFFu) 4390 return 0; 4391 *ok = 1; 4392 return t->common_base[sym] + (u32)addend; 4393 } 4394 if (os->section_id == OBJ_SEC_NONE) return 0; 4395 if (os->section_id >= t->section_base_cap || 4396 t->section_base[os->section_id] == 0xFFFFFFFFu) 4397 return 0; 4398 *ok = 1; 4399 return t->section_base[os->section_id] + (u32)os->value + (u32)addend; 4400 } 4401 4402 /* Apply each ObjBuilder relocation to the linear-memory image. Only 4403 * absolute (R_ABS32/R_ABS64) relocations are supported for now; PC-relative 4404 * and other kinds diagnose. */ 4405 static void apply_data_relocs(WTarget* t, u8* mem) { 4406 u32 ntotal = obj_reloc_total(t->obj); 4407 for (u32 i = 0; i < ntotal; ++i) { 4408 const Reloc* r = obj_reloc_at(t->obj, i); 4409 if (!r || r->removed) continue; 4410 if (r->section_id == OBJ_SEC_NONE) continue; 4411 if (r->section_id >= t->section_base_cap || 4412 t->section_base[r->section_id] == 0xFFFFFFFFu) 4413 continue; 4414 const Section* rs = obj_section_get(t->obj, r->section_id); 4415 if (!rs || rs->flags & SF_EXEC) continue; 4416 const ObjSym* tos = obj_symbol_get(t->obj, r->sym); 4417 if (!tos) 4418 wfail(t, "wasm: data relocation against unresolved symbol not supported"); 4419 /* Function-symbol references in data sections (e.g. a static vtable 4420 * `static fn_t v = &foo;`) resolve to wasm function-table indices, not 4421 * linear-memory addresses. The funcref table is built before 4422 * apply_data_relocs runs, so the index is already known. */ 4423 u32 width; 4424 u64 value; 4425 if (tos->kind == SK_FUNC) { 4426 if (r->kind != R_ABS32) 4427 wfail(t, 4428 "wasm: function-pointer data relocation kind %u not supported " 4429 "(only R_ABS32 on wasm32 target)", 4430 (unsigned)r->kind); 4431 if (r->addend != 0) 4432 wfail(t, "wasm: nonzero addend on function-pointer data relocation"); 4433 u32 tbl_idx = func_table_index_for(t, r->sym); 4434 width = 4; 4435 value = (u64)tbl_idx; 4436 u32 dst_off = t->section_base[r->section_id] + r->offset; 4437 mem_write_le(mem, dst_off, value, width); 4438 continue; 4439 } 4440 if (tos->section_id == OBJ_SEC_NONE && tos->kind != SK_COMMON) 4441 wfail(t, "wasm: data relocation against unresolved symbol not supported"); 4442 { 4443 int ok = 0; 4444 /* The addend is already added by `value = sym_addr + r->addend` below; 4445 * pass 0 here so we don't double-count. */ 4446 u32 sym_addr = wasm_sym_linear_addr(t, r->sym, 0, &ok); 4447 if (!ok) 4448 wfail(t, 4449 "wasm: data relocation target symbol has no linear-memory " 4450 "address"); 4451 switch (r->kind) { 4452 case R_ABS32: 4453 width = 4; 4454 value = (u64)(u32)((i64)sym_addr + r->addend); 4455 break; 4456 case R_ABS64: 4457 wfail(t, 4458 "wasm: R_ABS64 data relocation not supported on wasm32 target"); 4459 default: 4460 wfail(t, "wasm: unsupported data relocation kind %u", 4461 (unsigned)r->kind); 4462 } 4463 } 4464 u32 dst_off = t->section_base[r->section_id] + r->offset; 4465 mem_write_le(mem, dst_off, value, width); 4466 } 4467 } 4468 4469 /* Walk the deferred WSymFixup queue and patch the placeholder i32.const 4470 * imm in each WasmFunc.insns[] with the resolved absolute address. */ 4471 static void apply_sym_fixups(WTarget* t) { 4472 for (u32 i = 0; i < t->sym_fixups_count; ++i) { 4473 WSymFixup fx = t->sym_fixups[i]; 4474 int ok = 0; 4475 u32 addr = wasm_sym_linear_addr(t, fx.sym, fx.addend, &ok); 4476 if (!ok) wfail(t, "wasm: deferred symbol fixup against unresolved symbol"); 4477 WasmFunc* f = &t->module->funcs[fx.wasm_func_idx]; 4478 if (fx.insn_idx >= f->ninsns) 4479 wfail(t, "wasm: deferred symbol fixup insn_idx out of range"); 4480 f->insns[fx.insn_idx].imm = (i64)addr; 4481 } 4482 } 4483 4484 static void wasm_materialize_data(WTarget* t) { 4485 if (!t->has_memory) { 4486 /* No linear memory was needed by any function body or addr_of, so 4487 * symbol fixups should be empty by construction. */ 4488 return; 4489 } 4490 u32 image_end = assign_section_bases(t); 4491 image_end = assign_common_bases(t, image_end); 4492 u32 stack_size = t->has_stack_pointer ? t->stack_size : 0u; 4493 u32 image = image_end ? align_to_u32(image_end, 16u) : WASM_DATA_NULL_GUARD; 4494 if (image > UINT32_MAX - stack_size) 4495 wfail(t, "wasm: linear memory image too large"); 4496 /* Build a single active data segment covering 0..image. Passive segments 4497 * + memory.init would be needed for multi-TU linking; single-TU output 4498 * stays with the simpler shape. */ 4499 WasmDataSegment* seg = NULL; 4500 if (image) { 4501 seg = wasm_add_data(t->c, t->module); 4502 seg->mode = WASM_SEG_ACTIVE; 4503 seg->memidx = 0; 4504 seg->offset = 0; 4505 wasm_data_set_bytes(t->c, t->module, seg, NULL, (u64)image); 4506 } 4507 u32 nsec = obj_section_count(t->obj); 4508 for (ObjSecId sid = 1; sid < nsec; ++sid) { 4509 const Section* s = obj_section_get(t->obj, sid); 4510 if (!s || s->removed || !(s->flags & SF_ALLOC) || s->flags & SF_EXEC) 4511 continue; 4512 if (s->kind == SEC_BSS || s->sem == SSEM_NOBITS || !s->bytes.total) 4513 continue; 4514 buf_flatten(&s->bytes, seg->bytes + t->section_base[sid]); 4515 } 4516 if (seg) apply_data_relocs(t, seg->bytes); 4517 apply_sym_fixups(t); 4518 t->data_end = image; 4519 u32 stack_top = (u32)align_to_u32(image + stack_size, 16u); 4520 t->module->memories[0].min_pages = (stack_top + 65535u) / 65536u; 4521 /* Shared memory requires has_max and max >= min. ensure_shared_memory set a 4522 * provisional wasm32-ceiling cap (65536 pages = 4 GiB); now that the final 4523 * layout is known, tighten max down to min so the module declares a snug, 4524 * fixed shared memory. The backend never emits memory.grow, so the memory is 4525 * non-growable regardless, and a 4 GiB declared max would otherwise force an 4526 * embedder (e.g. `kit run`) to reserve the full ceiling up front. */ 4527 if (t->module->memories[0].shared) { 4528 t->module->memories[0].has_max = 1; 4529 t->module->memories[0].max_pages = t->module->memories[0].min_pages; 4530 } 4531 if (t->has_stack_pointer && t->stack_pointer_global < t->module->nglobals) { 4532 t->module->globals[t->stack_pointer_global].init.imm = stack_top; 4533 } 4534 } 4535 4536 /* Static-data initializers (e.g. `static fn_t v[] = {&foo, &bar};`) go 4537 * through ObjBuilder relocations rather than wasm_addr_of, so they never 4538 * touch queue_func_table_fixup. Scan the reloc table once before building 4539 * the funcref table so every function whose address is referenced from data 4540 * also gets a table slot. apply_data_relocs then patches the linear-memory 4541 * image with the assigned index. */ 4542 static void wasm_collect_func_data_refs(WTarget* t) { 4543 u32 ntotal = obj_reloc_total(t->obj); 4544 for (u32 i = 0; i < ntotal; ++i) { 4545 const Reloc* r = obj_reloc_at(t->obj, i); 4546 const ObjSym* tos; 4547 if (!r || r->removed) continue; 4548 if (r->section_id == OBJ_SEC_NONE) continue; 4549 { 4550 const Section* rs = obj_section_get(t->obj, r->section_id); 4551 if (!rs || rs->flags & SF_EXEC) continue; /* code-section relocs */ 4552 } 4553 tos = obj_symbol_get(t->obj, r->sym); 4554 if (!tos || tos->kind != SK_FUNC) continue; 4555 (void)func_table_index_for(t, r->sym); 4556 (void)sym_to_wasm_func(t, r->sym, NULL); 4557 } 4558 } 4559 4560 /* Build the single funcref table and its active element segment, then patch 4561 * every queued WFuncTableFixup's placeholder `i32.const 0` with the assigned 4562 * table index. Slot 0 stays reserved (call_indirect through index 0 traps on 4563 * the type check), so the first recorded function lands at index 1. Each 4564 * WasmElemSegment caps its funcs array at 64 entries; we chunk across 4565 * multiple segments when the address-taken set is larger. */ 4566 static void wasm_materialize_functable(WTarget* t) { 4567 wasm_collect_func_data_refs(t); 4568 if (!t->has_func_table || t->func_table_count == 0) return; 4569 ensure_module(t); 4570 /* Table: non-growable, sized to hold the reserved null slot plus every 4571 * assigned entry. */ 4572 WasmTable* tbl = wasm_add_table(t->c, t->module); 4573 tbl->elem_type = WASM_VAL_FUNCREF; 4574 tbl->min = 1u + t->func_table_count; 4575 tbl->max = tbl->min; 4576 tbl->has_max = 1; 4577 /* Active element segment populates table 0 starting at offset 1 (slot 0 4578 * stays null). Element segments are now heap-grown — no chunking needed. */ 4579 { 4580 WasmElemSegment* seg = wasm_add_elem(t->c, t->module); 4581 seg->mode = WASM_SEG_ACTIVE; 4582 seg->elem_type = WASM_VAL_FUNCREF; 4583 seg->tableidx = 0; 4584 seg->offset = 1; 4585 for (u32 i = 0; i < t->func_table_count; ++i) { 4586 ObjSymId sym = t->func_table[i]; 4587 wasm_elem_push_func(t->c, t->module, seg, sym_to_wasm_func(t, sym, NULL)); 4588 } 4589 } 4590 /* Patch placeholders. */ 4591 for (u32 i = 0; i < t->func_table_fixups_count; ++i) { 4592 WFuncTableFixup fx = t->func_table_fixups[i]; 4593 u32 tbl_idx = func_table_index_for(t, fx.sym); 4594 WasmFunc* f = &t->module->funcs[fx.wasm_func_idx]; 4595 if (fx.insn_idx >= f->ninsns) 4596 wfail(t, "wasm: function-pointer fixup insn_idx out of range"); 4597 f->insns[fx.insn_idx].imm = (i64)tbl_idx; 4598 } 4599 } 4600 4601 /* Wasm requires every import to occupy a lower function index than any 4602 * defined function. The backend, however, allocates a WasmFunc for any 4603 * direct-call target in walk order — so a defined function may end up at a 4604 * lower array index than an import created later by promote_import_func. 4605 * Reorder m->funcs so all imports precede all definitions, then walk every 4606 * function-index reference in the module and apply the old->new mapping. */ 4607 static void wasm_reorder_funcs_imports_first(WTarget* t) { 4608 WasmModule* m = t->module; 4609 if (!m || m->nfuncs == 0) return; 4610 Heap* h = m->heap; 4611 u32 n = m->nfuncs; 4612 /* Quick check: bail out if imports are already before all definitions. */ 4613 int seen_def = 0; 4614 int needs_reorder = 0; 4615 for (u32 i = 0; i < n; ++i) { 4616 if (m->funcs[i].is_import) { 4617 if (seen_def) { 4618 needs_reorder = 1; 4619 break; 4620 } 4621 } else { 4622 seen_def = 1; 4623 } 4624 } 4625 if (!needs_reorder) return; 4626 u32* old_to_new = (u32*)h->alloc(h, sizeof(u32) * n, _Alignof(u32)); 4627 WasmFunc* new_funcs = 4628 (WasmFunc*)h->alloc(h, sizeof(WasmFunc) * n, _Alignof(WasmFunc)); 4629 if (!old_to_new || !new_funcs) wfail(t, "wasm: out of memory"); 4630 u32 w_idx = 0; 4631 for (u32 i = 0; i < n; ++i) { 4632 if (m->funcs[i].is_import) { 4633 new_funcs[w_idx] = m->funcs[i]; 4634 old_to_new[i] = w_idx++; 4635 } 4636 } 4637 for (u32 i = 0; i < n; ++i) { 4638 if (!m->funcs[i].is_import) { 4639 new_funcs[w_idx] = m->funcs[i]; 4640 old_to_new[i] = w_idx++; 4641 } 4642 } 4643 /* Swap arrays. Old buffer is freed via the module's heap-tracked 4644 * realloc bookkeeping when wasm_module_free runs; we just overwrite the 4645 * pointer + length here. */ 4646 h->free(h, m->funcs, sizeof(WasmFunc) * m->cap_funcs); 4647 m->funcs = new_funcs; 4648 m->cap_funcs = n; 4649 /* Remap every funcidx-bearing slot in the module. */ 4650 for (u32 fi = 0; fi < n; ++fi) { 4651 WasmFunc* f = &m->funcs[fi]; 4652 for (u32 j = 0; j < f->ninsns; ++j) { 4653 WasmInsn* in = &f->insns[j]; 4654 if (in->kind == WASM_INSN_CALL || in->kind == WASM_INSN_RETURN_CALL || 4655 in->kind == WASM_INSN_REF_FUNC) { 4656 u32 old = (u32)in->imm; 4657 if (old < n) in->imm = (int64_t)old_to_new[old]; 4658 } 4659 } 4660 } 4661 for (u32 i = 0; i < m->nexports; ++i) { 4662 if (m->exports[i].kind == 0u && m->exports[i].index < n) 4663 m->exports[i].index = old_to_new[m->exports[i].index]; 4664 } 4665 for (u32 i = 0; i < m->nelems; ++i) { 4666 WasmElemSegment* seg = &m->elems[i]; 4667 for (u32 j = 0; j < seg->nfuncs; ++j) { 4668 if (seg->funcs[j] < n) seg->funcs[j] = old_to_new[seg->funcs[j]]; 4669 } 4670 } 4671 if (m->has_start && m->start_func < n) 4672 m->start_func = old_to_new[m->start_func]; 4673 /* Update the backend's sym_to_func reverse map so any post-finalize lookups 4674 * (e.g. data-reloc fixups) resolve to the new indices. The map stores 4675 * idx+1 so 0 = "unassigned"; preserve that convention. */ 4676 for (ObjSymId sym = 0; sym < t->sym_to_func_cap; ++sym) { 4677 if (t->sym_to_func[sym]) { 4678 u32 old = t->sym_to_func[sym] - 1u; 4679 if (old < n) t->sym_to_func[sym] = old_to_new[old] + 1u; 4680 } 4681 } 4682 h->free(h, old_to_new, sizeof(u32) * n); 4683 } 4684 4685 /* Export the module's linear memory under the conventional name "memory" so 4686 * standard runtimes (browser/wasmtime/wasmer/Node) can find it. Only emits 4687 * when the module has at least one defined (non-import) memory and no 4688 * memory export already exists. */ 4689 static void wasm_export_memory(WTarget* t) { 4690 WasmModule* m = t->module; 4691 if (!m) return; 4692 ensure_linear_memory(t); 4693 m = t->module; 4694 /* Find the first defined (non-import) memory. */ 4695 u32 mem_idx = 0; 4696 int found = 0; 4697 for (u32 i = 0; i < m->nmemories; ++i) { 4698 if (!m->memories[i].is_import) { 4699 mem_idx = i; 4700 found = 1; 4701 break; 4702 } 4703 } 4704 if (!found) return; 4705 /* Skip if the user already added a memory export (e.g. via the WAT path 4706 * or future explicit-export hook). */ 4707 for (u32 i = 0; i < m->nexports; ++i) { 4708 if (m->exports[i].kind == 2u && m->exports[i].index == mem_idx) return; 4709 } 4710 Heap* h = t->c->ctx->heap; 4711 WasmExport* e = wasm_add_export(t->c, m); 4712 static const char kName[] = "memory"; 4713 char* dup = (char*)h->alloc(h, sizeof(kName), 1); 4714 if (!dup) wfail(t, "wasm: out of memory"); 4715 memcpy(dup, kName, sizeof(kName)); 4716 e->name = dup; 4717 e->kind = 2u; /* memory export */ 4718 e->index = mem_idx; 4719 } 4720 4721 /* Diagnose any WasmFunc that has neither a body nor import status — that's a 4722 * declaration with no definition, e.g. a function-pointer reference to an 4723 * extern whose call site never appeared (so we never saw an ABI to synthesize 4724 * an import signature from). Emitting such a function would produce a 4725 * malformed module. Diagnose by sym name so users can fix the source. */ 4726 static void wasm_diagnose_unresolved_funcs(WTarget* t) { 4727 if (!t->module) return; 4728 for (ObjSymId sym = 1; sym < t->sym_to_func_cap; ++sym) { 4729 if (!t->sym_to_func[sym]) continue; 4730 u32 idx = t->sym_to_func[sym] - 1u; 4731 if (idx >= t->module->nfuncs) continue; 4732 WasmFunc* f = &t->module->funcs[idx]; 4733 if (f->is_import) continue; 4734 if (f->ninsns != 0) continue; 4735 const ObjSym* os = obj_symbol_get(t->obj, sym); 4736 if (!os || os->section_id != OBJ_SEC_NONE) continue; 4737 const char* name = pool_sym_cstr(t->c->global, os->name, NULL); 4738 wfail(t, 4739 "wasm: undefined function '%s' has its address taken but no direct " 4740 "call was seen — cannot synthesize import signature; add a direct " 4741 "call or annotate the declaration", 4742 name ? name : "(anonymous)"); 4743 } 4744 } 4745 4746 void wasm_finalize(CGTarget* tg) { 4747 WTarget* t = (WTarget*)tg; 4748 wasm_materialize_functable(t); 4749 wasm_materialize_data(t); 4750 if (t->module) { 4751 wasm_diagnose_unresolved_funcs(t); 4752 wasm_export_memory(t); 4753 wasm_reorder_funcs_imports_first(t); 4754 } 4755 /* WasmModule remains attached to ObjBuilder via OBJ_EXT_WASM; emit_wasm 4756 * flushes it. */ 4757 } 4758 4759 static void wasm_module_freefn(Compiler* c, void* p) { 4760 (void)c; 4761 WasmModule* m = (WasmModule*)p; 4762 Heap* h = m->heap; 4763 wasm_module_free(m); 4764 h->free(h, m, sizeof *m); 4765 } 4766 4767 WTarget* wasm_emit_target_new(Compiler* c, ObjBuilder* o, MCEmitter* mc) { 4768 Heap* h; 4769 WTarget* t; 4770 if (!c) return NULL; 4771 h = (Heap*)c->ctx->heap; 4772 t = (WTarget*)h->alloc(h, sizeof *t, _Alignof(WTarget)); 4773 if (!t) return NULL; 4774 memset(t, 0, sizeof *t); 4775 t->base.c = c; 4776 t->base.obj = o; 4777 t->c = c; 4778 t->obj = o; 4779 (void)mc; 4780 return t; 4781 } 4782 4783 void wasm_destroy(CGTarget* tg) { 4784 WTarget* t = (WTarget*)tg; 4785 Heap* h = t->c->ctx->heap; 4786 if (t->reg_to_local) h->free(h, t->reg_to_local, sizeof(u32) * t->reg_cap); 4787 if (t->reg_type) h->free(h, t->reg_type, sizeof(KitCgTypeId) * t->reg_cap); 4788 if (t->reg_cls) h->free(h, t->reg_cls, t->reg_cap); 4789 if (t->wir) h->free(h, t->wir, sizeof(WIR) * t->wir_cap); 4790 if (t->labels) h->free(h, t->labels, sizeof(WLabel) * t->labels_cap); 4791 if (t->slots) h->free(h, t->slots, sizeof(WSlot) * t->slots_cap); 4792 if (t->param_local_idx) 4793 h->free(h, t->param_local_idx, sizeof(u32) * t->param_local_idx_cap); 4794 if (t->byval_copies) 4795 h->free(h, t->byval_copies, sizeof(WByvalCopy) * t->byval_copies_cap); 4796 if (t->sym_to_func) 4797 h->free(h, t->sym_to_func, sizeof(u32) * t->sym_to_func_cap); 4798 if (t->funcs) h->free(h, t->funcs, sizeof(WFunc) * t->funcs_cap); 4799 if (t->section_base) 4800 h->free(h, t->section_base, sizeof(u32) * t->section_base_cap); 4801 if (t->common_base) 4802 h->free(h, t->common_base, sizeof(u32) * t->common_base_cap); 4803 if (t->sym_fixups) 4804 h->free(h, t->sym_fixups, sizeof(WSymFixup) * t->sym_fixups_cap); 4805 if (t->func_table) 4806 h->free(h, t->func_table, sizeof(ObjSymId) * t->func_table_cap); 4807 if (t->func_table_fixups) 4808 h->free(h, t->func_table_fixups, 4809 sizeof(WFuncTableFixup) * t->func_table_fixups_cap); 4810 h->free(h, t, sizeof *t); 4811 } 4812 4813 /* ----------------------------------------------------------------- 4814 * Module bootstrap: attach a WasmModule to the ObjBuilder so emit_wasm 4815 * can find it. Lazily on first func_begin. 4816 * ----------------------------------------------------------------- */ 4817 4818 static struct WasmModule* ensure_module(WTarget* t) { 4819 if (t->module) return t->module; 4820 Heap* h = t->c->ctx->heap; 4821 WasmModule* m = (WasmModule*)h->alloc(h, sizeof *m, _Alignof(WasmModule)); 4822 if (!m) wfail(t, "wasm: out of memory"); 4823 wasm_module_init(m, h); 4824 /* kit-produced modules always declare bulk-memory support: WIR_COPY_BYTES 4825 * / WIR_SET_BYTES lower to memory.copy / memory.fill unconditionally, and 4826 * the sret-return path emits memory.copy too. */ 4827 m->features |= WASM_FEATURE_BULK_MEMORY; 4828 t->module = m; 4829 obj_ext_set(t->obj, OBJ_EXT_WASM, m, wasm_module_freefn); 4830 return m; 4831 }