internal.h (16429B)
1 #ifndef KIT_ARCH_WASM_INTERNAL_H 2 #define KIT_ARCH_WASM_INTERNAL_H 3 4 /* Wasm CGTarget. 5 * 6 * Produces a tool-conventions-shaped Wasm module from a single Toy/C 7 * translation unit. Operates with virtual_regs=1: CG mints fresh Reg ids and 8 * the target assigns each Reg a Wasm local on first use, materializing values 9 * by `local.get`/`local.set`. The accumulating WasmModule is attached to the 10 * ObjBuilder under OBJ_EXT_WASM so emit_wasm can flush it at finalize time. 11 * 12 * Scope (initial): scalar i32/i64/f32/f64 with locals, params, return, direct 13 * calls, structured `scope_begin(LOOP)` + break/continue, label-based 14 * forward `if`/`if-else` via the kit_cg_if_begin/else/end pattern, basic 15 * binop/unop/cmp/convert, and OPK_IMM constants. Address-taken locals, 16 * aggregates, indirect calls, alloca, va_*, intrinsics other than 17 * trivial ones, inline asm, TLS, bitfields, switch_, indirect_branch, and 18 * load_label_addr all panic with a precise diagnostic; they will land as 19 * follow-ups against doc/WASM.md. Atomics (load/store/rmw/cmpxchg/fence) 20 * are lowered through the wasm-threads opcodes; the linear memory is 21 * promoted to shared on the first atomic emission. */ 22 23 #include <kit/core.h> 24 25 #include "arch/mc.h" 26 #include "core/core.h" 27 #include "obj/obj.h" 28 #include "opt/ir.h" 29 30 typedef CgTarget CGTarget; 31 typedef struct CgIrModule CgIrModule; 32 33 typedef struct WasmOptCGSwitchDesc { 34 Operand selector; 35 KitCgTypeId selector_type; 36 Label default_label; 37 const CGSwitchCase* cases; 38 u32 ncases; 39 u8 hint; 40 u8 opt_level; 41 u8 pad[2]; 42 } WasmOptCGSwitchDesc; 43 #define CGSwitchDesc WasmOptCGSwitchDesc 44 45 /* Forward references into the shared src/wasm module representation. The 46 * target reuses that model so emit_wasm can flush through wasm_encode. */ 47 struct WasmModule; 48 struct WasmFunc; 49 50 /* Per-instruction kinds we record, then linearize at func_end. Keeping the 51 * record list separate from the WasmFunc body lets us run the deferred 52 * label/jump → wasm-block resolution without baking forward-only structure 53 * into the recording side. */ 54 typedef enum WIROp { 55 WIR_LOAD_IMM, /* dst, imm, type */ 56 WIR_LOAD_CONST_F, /* dst, fp_imm, type */ 57 WIR_COPY, /* dst, src */ 58 WIR_BINOP, /* dst, a, b, BinOp */ 59 WIR_UNOP, /* dst, a, UnOp */ 60 WIR_CMP, /* dst, a, b, CmpOp */ 61 WIR_CONVERT, /* dst, src, ConvKind */ 62 WIR_LABEL, /* place label[0] */ 63 WIR_JUMP, /* jump to label[0] */ 64 WIR_CMP_BRANCH, /* cmp_branch op a b -> label[0] */ 65 WIR_CALL, /* callee_sym, nargs, args[], ret_reg (or REG_NONE) */ 66 WIR_CALL_INDIRECT, /* callee in `a` (i32 table index), typeidx in `imm`, 67 nargs/args[]/ret_reg same shape as WIR_CALL */ 68 WIR_RET, /* val (or REG_NONE) */ 69 WIR_SCOPE_OPEN, /* scope_id; kind == SCOPE_LOOP/BLOCK */ 70 WIR_SCOPE_CLOSE, /* scope_id */ 71 WIR_UNREACHABLE, 72 WIR_SWITCH, /* selector operand + dense label table at lowering */ 73 WIR_LOAD_LOCAL, /* dst, frame_slot in `imm`, type */ 74 WIR_STORE_LOCAL, /* frame_slot in `imm`, src (a/imm_a/imm_kind), type */ 75 WIR_LOAD_MEM, /* dst = load addr */ 76 WIR_STORE_MEM, /* store src -> addr */ 77 WIR_ADDR_OF, /* dst = address of addr */ 78 WIR_ALLOCA, /* dst = stack allocation of size operand */ 79 WIR_COPY_BYTES, /* memcpy-like byte copy */ 80 WIR_SET_BYTES, /* memset-like byte set */ 81 WIR_ATOMIC_LOAD, /* dst = atomic.load(addr_reg), mem holds access info */ 82 WIR_ATOMIC_STORE, /* atomic.store(addr_reg, src) */ 83 WIR_ATOMIC_RMW, /* dst = atomic.rmw_<op>(addr_reg, val); cgop = KitCgAtomicOp 84 */ 85 WIR_ATOMIC_CAS, /* dst = prior; dst2 = ok (i32 0/1); a=addr_reg, 86 b/imm_kind_b=expected, op_c/imm_kind_c=desired */ 87 WIR_FENCE, /* atomic.fence (memory order ignored, wasm has seq_cst) */ 88 WIR_VA_START, /* addr = ap_addr; stores va_ptr_param_local i32 at *ap */ 89 WIR_VA_ARG, /* dst = load of `type` from *(*addr); advance *addr by 8 */ 90 WIR_VA_COPY, /* addr = dst_ap_addr; call_sret_addr = src_ap_addr */ 91 WIR_INTRINSIC, /* cgop = IntrinKind; operand layout per kind: 92 - bit ops (CLZ/CTZ/POPCOUNT/BSWAP): one 93 register operand in `a`, single dst in `dst`, 94 `type` carries the operand/result type. 95 - overflow arith (S/U{ADD,SUB,MUL}_OVERFLOW): two 96 value operands captured into a/imm_a/imm_kind and 97 b/imm_b/imm_kind_b, dst = value reg, dst2 = i32 98 overflow flag reg, `type` = value type. */ 99 WIR_ASM_BLOCK, /* inline asm block. raw_insns[0..raw_ninsns) is the 100 pre-parsed body; local.get/set/tee indices in 101 [0, asm_nin) are remapped to freshly allocated wasm 102 locals at linearize time. Input bindings are 103 captured into asm_in_kinds/asm_in_imms/asm_in_regs; 104 output bindings into asm_out_regs. */ 105 } WIROp; 106 107 typedef struct WIR { 108 u8 op; 109 u8 cls; /* RegClass for the produced value or branch operand */ 110 u8 cgop; /* BinOp/UnOp/CmpOp/ConvKind discriminator */ 111 u8 pad; 112 KitCgTypeId type; /* type of dst (or operand type for cmp/store) */ 113 KitCgTypeId type2; /* operand type for convert/cmp */ 114 Reg dst; 115 Reg a; 116 Reg b; 117 i64 imm; 118 double fp_imm; 119 Label labels[1]; /* used by LABEL/JUMP/CMP_BRANCH */ 120 u32 scope_id; 121 u32 imm_kind; /* 0=reg, 1=imm — operand variant for a/b */ 122 u32 imm_kind_b; 123 i64 imm_a; 124 i64 imm_b; 125 ObjSymId call_sym; 126 u32 call_narg; 127 Operand addr; 128 MemAccess mem; 129 AggregateAccess agg; 130 /* Per-arg captured operand. kind: 0=REG (value=arg_regs[i]), 1=IMM 131 * (value=arg_imms[i]), 4=ADDR (the i32 address of an aggregate source — 132 * call_arg_addrs[i] holds the original Operand). Heap-allocated when 133 * nargs>0; freed at func_end. */ 134 Reg* call_args; 135 i64* call_arg_imms; 136 u8* call_arg_kinds; 137 KitCgTypeId* call_arg_types; 138 Operand* call_arg_addrs; 139 /* Sret return slot: when the called function returns indirectly, the caller 140 * prepends an i32 pointer to a caller-allocated buffer. call_sret_addr is 141 * that buffer's address operand; emit-time pushes its i32 first. */ 142 u8 call_has_sret; 143 u8 call_variadic; /* callee is variadic; pack call_var_* into linear-memory 144 * buffer and push its addr as hidden trailing i32 arg. */ 145 u8 call_tail; /* emit as return_call / return_call_indirect */ 146 u8 pad_call[1]; 147 Operand call_sret_addr; 148 /* Variadic args (those past d->abi->nparams when callee is variadic). Stored 149 * separately from call_args because they don't appear in the wasm signature 150 * at all — they go into a caller-packed linear-memory buffer, each in an 151 * 8-byte slot. kinds are WOP_REG / WOP_IMM only; aggregate variadic args 152 * diagnose in wasm_call. */ 153 u32 call_nvar; 154 Reg* call_var_regs; 155 i64* call_var_imms; 156 u8* call_var_kinds; 157 KitCgTypeId* call_var_types; 158 CGSwitchCase* switch_cases; 159 u32 switch_ncases; 160 /* Atomic CAS extras: third value-operand (desired) plus a second result reg 161 * (ok bool). For the other atomic ops these stay 0. */ 162 Reg dst2; 163 Reg op_c; 164 u32 imm_kind_c; 165 i64 imm_c; 166 /* WIR_ASM_BLOCK payload. Heap-allocated arrays owned by the WIR (freed at 167 * per-func teardown). raw_insns is the parsed body; the *_in_* arrays hold 168 * one entry per input operand (declaration order, including inout duplicates 169 * appended at the end); the *_out_* arrays hold one entry per output. */ 170 struct WasmInsn* raw_insns; 171 u32 raw_ninsns; 172 u32 asm_nin; 173 u32 asm_nout; 174 u8* asm_in_kinds; /* WOP_REG / WOP_IMM / WOP_LOCAL */ 175 i64* asm_in_imms; 176 Reg* asm_in_regs; 177 KitCgTypeId* asm_in_types; 178 /* For each input i: index of the matching output (numeric tieback "N" or 179 * +r inout duplicate); -1 if the input is independent. Inputs that share 180 * an output's wasm local materialize into the OUTPUT's local. */ 181 i32* asm_in_share_out; 182 Reg* asm_out_regs; 183 KitCgTypeId* asm_out_types; 184 } WIR; 185 186 typedef struct WScope { 187 u8 cg_kind; /* ScopeKind */ 188 u8 placed_in_wir; /* WIR_SCOPE_OPEN emitted */ 189 u8 break_seen; /* break_label_place arrived */ 190 u8 cont_seen; /* continue_label_place arrived */ 191 u32 id; 192 Label break_lbl; 193 Label cont_lbl; 194 KitCgTypeId result_type; 195 } WScope; 196 197 typedef enum WLabelKind { 198 WLBL_UNBOUND = 0, 199 WLBL_SCOPE_BREAK, 200 WLBL_SCOPE_CONT, 201 WLBL_FORWARD, /* placed via label_place but not tied to a scope */ 202 } WLabelKind; 203 204 typedef struct WLabel { 205 u8 kind; 206 u8 placed; 207 u8 pad[2]; 208 u32 scope_id; /* for SCOPE_BREAK/SCOPE_CONT */ 209 u32 wir_index; /* WIR_LABEL position once placed */ 210 } WLabel; 211 212 typedef struct WFunc { 213 ObjSymId sym; 214 u32 wasm_func_idx; /* index into WasmModule.funcs */ 215 KitCgTypeId fn_type; 216 u8 has_export_name; 217 u8 pad[3]; 218 } WFunc; 219 220 typedef enum WSlotKind { 221 W_SLOT_LOCAL = 0, 222 W_SLOT_STACK = 1, 223 } WSlotKind; 224 225 /* For each ABI_ARG_INDIRECT (byval) param the callee receives an i32 pointer 226 * in a wasm function param and must copy the pointed-to aggregate into a 227 * caller-isolated buffer in the linear-memory stack frame. We queue these at 228 * wasm_param time and emit the byte copies in the prologue, after frame setup 229 * and before the user body. */ 230 typedef struct WByvalCopy { 231 u32 ptr_wasm_local; /* wasm-local holding the source pointer (param slot) */ 232 u32 dst_slot_id; /* index into WTarget.slots; slot is W_SLOT_STACK */ 233 } WByvalCopy; 234 235 typedef struct WSlot { 236 u8 kind; 237 u8 pad[3]; 238 KitCgTypeId type; 239 u32 wasm_local; 240 u32 size; 241 u32 align; 242 u32 frame_offset; 243 } WSlot; 244 245 /* Deferred symbol-address fixup. emit_addr_operand for OPK_GLOBAL pushes 246 * `i32.const 0` placeholder and queues a WSymFixup. wasm_materialize_data 247 * computes the compact section base layout and rewrites in.imm with the 248 * absolute linear-memory address (section_base[sym->section_id] + sym->value 249 * + addend). Static-data relocations are applied directly to the linear 250 * memory image via the same section_base table. */ 251 typedef struct WSymFixup { 252 u32 wasm_func_idx; 253 u32 insn_idx; 254 ObjSymId sym; 255 i64 addend; 256 } WSymFixup; 257 258 /* Function-pointer fixup. emit_addr_operand for an OPK_GLOBAL pointing at a 259 * function symbol pushes `i32.const 0` and queues a WFuncTableFixup. At 260 * finalize, wasm_materialize_functable assigns each address-taken function a 261 * sequential index (starting at 1; slot 0 is reserved as null) into the 262 * single funcref table and rewrites in.imm with the assigned table index. */ 263 typedef struct WFuncTableFixup { 264 u32 wasm_func_idx; 265 u32 insn_idx; 266 ObjSymId sym; 267 } WFuncTableFixup; 268 269 typedef struct WTarget { 270 CgTarget base; 271 272 Compiler* c; 273 ObjBuilder* obj; 274 struct WasmModule* module; 275 276 /* TU-wide: ObjSymId -> (wasm_func_idx + 1); 0 means "not yet a wasm func". 277 * Lazily grown as ObjSymIds appear via call() or func_begin(). */ 278 u32* sym_to_func; 279 u32 sym_to_func_cap; 280 WFunc* funcs; 281 u32 nfuncs; 282 u32 funcs_cap; 283 284 /* Per-function state. Reset on func_begin. */ 285 const CGFuncDesc* cur_fn_desc; 286 /* Most recent SrcLoc the frontend told us about via wasm_set_loc. Used by 287 * cur_loc so diagnostics attribute to the actual failing statement 288 * rather than the function-definition location. Zeroed at func_begin; 289 * a zero line falls back to cur_fn_desc->loc. */ 290 SrcLoc cur_stmt_loc; 291 u32 cur_func_idx; 292 struct WasmFunc* cur_func; 293 294 /* SSA Reg -> Wasm local index (0..nparams=params, then locals). 0xffffffffu 295 * means "not assigned yet". */ 296 u32* reg_to_local; 297 KitCgTypeId* reg_type; 298 u8* reg_cls; 299 u32 reg_cap; 300 301 /* WIR record list for the current function. */ 302 WIR* wir; 303 u32 nwir; 304 u32 wir_cap; 305 306 /* Labels minted by label_new. */ 307 WLabel* labels; 308 u32 nlabels; 309 u32 labels_cap; 310 311 /* CG scope stack. */ 312 WScope scopes[32]; 313 u32 nscopes; 314 u32 next_scope_id; 315 316 /* Per-function frame slots. Scalar slots stay as Wasm locals; addressable 317 * slots are assigned offsets in a downward-growing linear-memory frame. */ 318 WSlot* slots; 319 u32 nslots; 320 u32 slots_cap; 321 u32 frame_size; 322 u32 frame_align; 323 u32 frame_base_local; 324 u32 frame_saved_sp_local; 325 u8 has_stack_frame; 326 u8 has_memory; 327 u8 has_stack_pointer; 328 u8 cur_has_sret; 329 u32 stack_pointer_global; 330 u32 stack_size; 331 u32 data_end; 332 333 /* Compact section -> linear memory base. Populated lazily in 334 * wasm_materialize_data; 0xFFFFFFFFu means "not assigned yet". The 335 * compact layout reserves a small null guard at low memory and walks 336 * SF_ALLOC sections in id order, giving each section an aligned base. */ 337 u32* section_base; 338 u32 section_base_cap; 339 /* Common-symbol (SK_COMMON) -> linear memory base, indexed by ObjSymId. 340 * Common symbols have section_id == OBJ_SEC_NONE in ObjBuilder; for a 341 * single-TU final-module emit we lay them out BSS-style after sections. 342 * 0xFFFFFFFFu = no common base assigned. */ 343 u32* common_base; 344 u32 common_base_cap; 345 346 /* Deferred symbol-address fixups; see WSymFixup. */ 347 WSymFixup* sym_fixups; 348 u32 sym_fixups_count; 349 u32 sym_fixups_cap; 350 351 /* Function-pointer table state. ObjSymIds of every function whose address 352 * has been taken, in insertion order. The implied table index for entry 353 * func_table[i] is (i + 1); slot 0 stays unpopulated as a null/trap guard. 354 * Filled lazily by emit_addr_operand and patched into placeholder 355 * `i32.const 0` insns at finalize. */ 356 ObjSymId* func_table; 357 u32 func_table_count; 358 u32 func_table_cap; 359 WFuncTableFixup* func_table_fixups; 360 u32 func_table_fixups_count; 361 u32 func_table_fixups_cap; 362 /* Patched into the linear-memory image at apply_data_relocs time for 363 * R_ABS32 relocations whose target symbol is a function (so a static 364 * `static fn_t v = &foo;` initializer ends up holding the table index). */ 365 u8 has_func_table; 366 367 /* Per-function aggregate-lowering state. Populated by func_begin and used by 368 * wasm_param/ret/call to translate sret + byval through the wasm32 BasicCABI 369 * shape (sret pointer as hidden i32 leading param, byval args as i32 370 * pointers, callee copy-in into a stack-backed local buffer). */ 371 u32 sret_param_local; /* wasm-local idx of the sret pointer; 0xffffffffu when 372 none */ 373 u32 va_ptr_param_local; /* wasm-local idx of the hidden i32 va_ptr trailing 374 * param on variadic functions; 0xffffffffu when none. 375 * Read by wasm_va_start to seed the va_list. */ 376 u32* param_local_idx; /* per CG-param idx -> wasm-local idx (0xffffffffu for 377 IGNORE) */ 378 u32 param_local_idx_cap; 379 u32 nparams_cg; /* count of entries in param_local_idx */ 380 u8 cur_is_variadic; /* current function declared variadic (abi->variadic) */ 381 /* Per-function scratch locals for variadic-call packing. Lazily allocated 382 * on the first variadic call site; reused across all variadic calls in the 383 * same function (each call's save/restore window is self-contained, so a 384 * single pair of locals is safe). 0xffffffffu when not yet allocated. */ 385 u32 varcall_saved_sp_local; 386 u32 varcall_buf_local; 387 /* Scratch i32 local used by va_arg to remember the va_list address across 388 * the load-current-slot / advance-pointer sequence. Lazily allocated on 389 * first va_arg in the function. 0xffffffffu when not allocated. */ 390 u32 va_arg_tmp_addr_local; 391 WByvalCopy* byval_copies; 392 u32 nbyval_copies; 393 u32 byval_copies_cap; 394 395 /* Body has been terminated unconditionally — skip further insns until the 396 * next label_place. Lets us emit dead code that the parser may produce 397 * without breaking wasm validation. */ 398 u8 dead; 399 } WTarget; 400 401 CgTarget* wasm_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* mc); 402 WTarget* wasm_emit_target_new(Compiler* c, ObjBuilder* o, MCEmitter* mc); 403 void wasm_emit_ir_module(WTarget* t, const CgIrModule* module); 404 void wasm_finalize(CGTarget*); 405 void wasm_destroy(CGTarget*); 406 407 /* CFG structurer (src/arch/wasm/structure.c). Rewrites the recorded WIR 408 * list so every free WIR_LABEL becomes the break/continue of a synthetic 409 * SCOPE_BLOCK / SCOPE_LOOP. Called from emit.c's linearize() before the 410 * WIR walk; after this returns, br_to_label resolves every jump through 411 * the existing scope-bound machinery. Labels referenced by WIR_SWITCH 412 * are left untouched and handled by try_linearize_switch_island. */ 413 void wasm_structurize(WTarget* t); 414 415 #endif