kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

internal.h (16429B)


      1 #ifndef KIT_ARCH_WASM_INTERNAL_H
      2 #define KIT_ARCH_WASM_INTERNAL_H
      3 
      4 /* Wasm CGTarget.
      5  *
      6  * Produces a tool-conventions-shaped Wasm module from a single Toy/C
      7  * translation unit. Operates with virtual_regs=1: CG mints fresh Reg ids and
      8  * the target assigns each Reg a Wasm local on first use, materializing values
      9  * by `local.get`/`local.set`. The accumulating WasmModule is attached to the
     10  * ObjBuilder under OBJ_EXT_WASM so emit_wasm can flush it at finalize time.
     11  *
     12  * Scope (initial): scalar i32/i64/f32/f64 with locals, params, return, direct
     13  * calls, structured `scope_begin(LOOP)` + break/continue, label-based
     14  * forward `if`/`if-else` via the kit_cg_if_begin/else/end pattern, basic
     15  * binop/unop/cmp/convert, and OPK_IMM constants. Address-taken locals,
     16  * aggregates, indirect calls, alloca, va_*, intrinsics other than
     17  * trivial ones, inline asm, TLS, bitfields, switch_, indirect_branch, and
     18  * load_label_addr all panic with a precise diagnostic; they will land as
     19  * follow-ups against doc/WASM.md. Atomics (load/store/rmw/cmpxchg/fence)
     20  * are lowered through the wasm-threads opcodes; the linear memory is
     21  * promoted to shared on the first atomic emission. */
     22 
     23 #include <kit/core.h>
     24 
     25 #include "arch/mc.h"
     26 #include "core/core.h"
     27 #include "obj/obj.h"
     28 #include "opt/ir.h"
     29 
     30 typedef CgTarget CGTarget;
     31 typedef struct CgIrModule CgIrModule;
     32 
     33 typedef struct WasmOptCGSwitchDesc {
     34   Operand selector;
     35   KitCgTypeId selector_type;
     36   Label default_label;
     37   const CGSwitchCase* cases;
     38   u32 ncases;
     39   u8 hint;
     40   u8 opt_level;
     41   u8 pad[2];
     42 } WasmOptCGSwitchDesc;
     43 #define CGSwitchDesc WasmOptCGSwitchDesc
     44 
     45 /* Forward references into the shared src/wasm module representation. The
     46  * target reuses that model so emit_wasm can flush through wasm_encode. */
     47 struct WasmModule;
     48 struct WasmFunc;
     49 
     50 /* Per-instruction kinds we record, then linearize at func_end. Keeping the
     51  * record list separate from the WasmFunc body lets us run the deferred
     52  * label/jump → wasm-block resolution without baking forward-only structure
     53  * into the recording side. */
     54 typedef enum WIROp {
     55   WIR_LOAD_IMM,      /* dst, imm, type */
     56   WIR_LOAD_CONST_F,  /* dst, fp_imm, type */
     57   WIR_COPY,          /* dst, src */
     58   WIR_BINOP,         /* dst, a, b, BinOp */
     59   WIR_UNOP,          /* dst, a, UnOp */
     60   WIR_CMP,           /* dst, a, b, CmpOp */
     61   WIR_CONVERT,       /* dst, src, ConvKind */
     62   WIR_LABEL,         /* place label[0] */
     63   WIR_JUMP,          /* jump to label[0] */
     64   WIR_CMP_BRANCH,    /* cmp_branch op a b -> label[0] */
     65   WIR_CALL,          /* callee_sym, nargs, args[], ret_reg (or REG_NONE) */
     66   WIR_CALL_INDIRECT, /* callee in `a` (i32 table index), typeidx in `imm`,
     67                         nargs/args[]/ret_reg same shape as WIR_CALL */
     68   WIR_RET,           /* val (or REG_NONE) */
     69   WIR_SCOPE_OPEN,    /* scope_id; kind == SCOPE_LOOP/BLOCK */
     70   WIR_SCOPE_CLOSE,   /* scope_id */
     71   WIR_UNREACHABLE,
     72   WIR_SWITCH,       /* selector operand + dense label table at lowering */
     73   WIR_LOAD_LOCAL,   /* dst, frame_slot in `imm`, type */
     74   WIR_STORE_LOCAL,  /* frame_slot in `imm`, src (a/imm_a/imm_kind), type */
     75   WIR_LOAD_MEM,     /* dst = load addr */
     76   WIR_STORE_MEM,    /* store src -> addr */
     77   WIR_ADDR_OF,      /* dst = address of addr */
     78   WIR_ALLOCA,       /* dst = stack allocation of size operand */
     79   WIR_COPY_BYTES,   /* memcpy-like byte copy */
     80   WIR_SET_BYTES,    /* memset-like byte set */
     81   WIR_ATOMIC_LOAD,  /* dst = atomic.load(addr_reg), mem holds access info */
     82   WIR_ATOMIC_STORE, /* atomic.store(addr_reg, src) */
     83   WIR_ATOMIC_RMW, /* dst = atomic.rmw_<op>(addr_reg, val); cgop = KitCgAtomicOp
     84                    */
     85   WIR_ATOMIC_CAS, /* dst = prior; dst2 = ok (i32 0/1); a=addr_reg,
     86                      b/imm_kind_b=expected, op_c/imm_kind_c=desired */
     87   WIR_FENCE,      /* atomic.fence (memory order ignored, wasm has seq_cst) */
     88   WIR_VA_START,   /* addr = ap_addr; stores va_ptr_param_local i32 at *ap */
     89   WIR_VA_ARG,     /* dst = load of `type` from *(*addr); advance *addr by 8 */
     90   WIR_VA_COPY,    /* addr = dst_ap_addr; call_sret_addr = src_ap_addr */
     91   WIR_INTRINSIC,  /* cgop = IntrinKind; operand layout per kind:
     92                      - bit ops (CLZ/CTZ/POPCOUNT/BSWAP): one
     93                        register operand in `a`, single dst in `dst`,
     94                        `type` carries the operand/result type.
     95                      - overflow arith (S/U{ADD,SUB,MUL}_OVERFLOW): two
     96                        value operands captured into a/imm_a/imm_kind and
     97                        b/imm_b/imm_kind_b, dst = value reg, dst2 = i32
     98                        overflow flag reg, `type` = value type. */
     99   WIR_ASM_BLOCK,  /* inline asm block. raw_insns[0..raw_ninsns) is the
    100                      pre-parsed body; local.get/set/tee indices in
    101                      [0, asm_nin) are remapped to freshly allocated wasm
    102                      locals at linearize time. Input bindings are
    103                      captured into asm_in_kinds/asm_in_imms/asm_in_regs;
    104                      output bindings into asm_out_regs. */
    105 } WIROp;
    106 
    107 typedef struct WIR {
    108   u8 op;
    109   u8 cls;  /* RegClass for the produced value or branch operand */
    110   u8 cgop; /* BinOp/UnOp/CmpOp/ConvKind discriminator */
    111   u8 pad;
    112   KitCgTypeId type;  /* type of dst (or operand type for cmp/store) */
    113   KitCgTypeId type2; /* operand type for convert/cmp */
    114   Reg dst;
    115   Reg a;
    116   Reg b;
    117   i64 imm;
    118   double fp_imm;
    119   Label labels[1]; /* used by LABEL/JUMP/CMP_BRANCH */
    120   u32 scope_id;
    121   u32 imm_kind; /* 0=reg, 1=imm — operand variant for a/b */
    122   u32 imm_kind_b;
    123   i64 imm_a;
    124   i64 imm_b;
    125   ObjSymId call_sym;
    126   u32 call_narg;
    127   Operand addr;
    128   MemAccess mem;
    129   AggregateAccess agg;
    130   /* Per-arg captured operand. kind: 0=REG (value=arg_regs[i]), 1=IMM
    131    * (value=arg_imms[i]), 4=ADDR (the i32 address of an aggregate source —
    132    * call_arg_addrs[i] holds the original Operand). Heap-allocated when
    133    * nargs>0; freed at func_end. */
    134   Reg* call_args;
    135   i64* call_arg_imms;
    136   u8* call_arg_kinds;
    137   KitCgTypeId* call_arg_types;
    138   Operand* call_arg_addrs;
    139   /* Sret return slot: when the called function returns indirectly, the caller
    140    * prepends an i32 pointer to a caller-allocated buffer. call_sret_addr is
    141    * that buffer's address operand; emit-time pushes its i32 first. */
    142   u8 call_has_sret;
    143   u8 call_variadic; /* callee is variadic; pack call_var_* into linear-memory
    144                      * buffer and push its addr as hidden trailing i32 arg. */
    145   u8 call_tail;     /* emit as return_call / return_call_indirect */
    146   u8 pad_call[1];
    147   Operand call_sret_addr;
    148   /* Variadic args (those past d->abi->nparams when callee is variadic). Stored
    149    * separately from call_args because they don't appear in the wasm signature
    150    * at all — they go into a caller-packed linear-memory buffer, each in an
    151    * 8-byte slot. kinds are WOP_REG / WOP_IMM only; aggregate variadic args
    152    * diagnose in wasm_call. */
    153   u32 call_nvar;
    154   Reg* call_var_regs;
    155   i64* call_var_imms;
    156   u8* call_var_kinds;
    157   KitCgTypeId* call_var_types;
    158   CGSwitchCase* switch_cases;
    159   u32 switch_ncases;
    160   /* Atomic CAS extras: third value-operand (desired) plus a second result reg
    161    * (ok bool). For the other atomic ops these stay 0. */
    162   Reg dst2;
    163   Reg op_c;
    164   u32 imm_kind_c;
    165   i64 imm_c;
    166   /* WIR_ASM_BLOCK payload. Heap-allocated arrays owned by the WIR (freed at
    167    * per-func teardown). raw_insns is the parsed body; the *_in_* arrays hold
    168    * one entry per input operand (declaration order, including inout duplicates
    169    * appended at the end); the *_out_* arrays hold one entry per output. */
    170   struct WasmInsn* raw_insns;
    171   u32 raw_ninsns;
    172   u32 asm_nin;
    173   u32 asm_nout;
    174   u8* asm_in_kinds; /* WOP_REG / WOP_IMM / WOP_LOCAL */
    175   i64* asm_in_imms;
    176   Reg* asm_in_regs;
    177   KitCgTypeId* asm_in_types;
    178   /* For each input i: index of the matching output (numeric tieback "N" or
    179    * +r inout duplicate); -1 if the input is independent. Inputs that share
    180    * an output's wasm local materialize into the OUTPUT's local. */
    181   i32* asm_in_share_out;
    182   Reg* asm_out_regs;
    183   KitCgTypeId* asm_out_types;
    184 } WIR;
    185 
    186 typedef struct WScope {
    187   u8 cg_kind;       /* ScopeKind */
    188   u8 placed_in_wir; /* WIR_SCOPE_OPEN emitted */
    189   u8 break_seen;    /* break_label_place arrived */
    190   u8 cont_seen;     /* continue_label_place arrived */
    191   u32 id;
    192   Label break_lbl;
    193   Label cont_lbl;
    194   KitCgTypeId result_type;
    195 } WScope;
    196 
    197 typedef enum WLabelKind {
    198   WLBL_UNBOUND = 0,
    199   WLBL_SCOPE_BREAK,
    200   WLBL_SCOPE_CONT,
    201   WLBL_FORWARD, /* placed via label_place but not tied to a scope */
    202 } WLabelKind;
    203 
    204 typedef struct WLabel {
    205   u8 kind;
    206   u8 placed;
    207   u8 pad[2];
    208   u32 scope_id;  /* for SCOPE_BREAK/SCOPE_CONT */
    209   u32 wir_index; /* WIR_LABEL position once placed */
    210 } WLabel;
    211 
    212 typedef struct WFunc {
    213   ObjSymId sym;
    214   u32 wasm_func_idx; /* index into WasmModule.funcs */
    215   KitCgTypeId fn_type;
    216   u8 has_export_name;
    217   u8 pad[3];
    218 } WFunc;
    219 
    220 typedef enum WSlotKind {
    221   W_SLOT_LOCAL = 0,
    222   W_SLOT_STACK = 1,
    223 } WSlotKind;
    224 
    225 /* For each ABI_ARG_INDIRECT (byval) param the callee receives an i32 pointer
    226  * in a wasm function param and must copy the pointed-to aggregate into a
    227  * caller-isolated buffer in the linear-memory stack frame. We queue these at
    228  * wasm_param time and emit the byte copies in the prologue, after frame setup
    229  * and before the user body. */
    230 typedef struct WByvalCopy {
    231   u32 ptr_wasm_local; /* wasm-local holding the source pointer (param slot) */
    232   u32 dst_slot_id;    /* index into WTarget.slots; slot is W_SLOT_STACK */
    233 } WByvalCopy;
    234 
    235 typedef struct WSlot {
    236   u8 kind;
    237   u8 pad[3];
    238   KitCgTypeId type;
    239   u32 wasm_local;
    240   u32 size;
    241   u32 align;
    242   u32 frame_offset;
    243 } WSlot;
    244 
    245 /* Deferred symbol-address fixup. emit_addr_operand for OPK_GLOBAL pushes
    246  * `i32.const 0` placeholder and queues a WSymFixup. wasm_materialize_data
    247  * computes the compact section base layout and rewrites in.imm with the
    248  * absolute linear-memory address (section_base[sym->section_id] + sym->value
    249  * + addend). Static-data relocations are applied directly to the linear
    250  * memory image via the same section_base table. */
    251 typedef struct WSymFixup {
    252   u32 wasm_func_idx;
    253   u32 insn_idx;
    254   ObjSymId sym;
    255   i64 addend;
    256 } WSymFixup;
    257 
    258 /* Function-pointer fixup. emit_addr_operand for an OPK_GLOBAL pointing at a
    259  * function symbol pushes `i32.const 0` and queues a WFuncTableFixup. At
    260  * finalize, wasm_materialize_functable assigns each address-taken function a
    261  * sequential index (starting at 1; slot 0 is reserved as null) into the
    262  * single funcref table and rewrites in.imm with the assigned table index. */
    263 typedef struct WFuncTableFixup {
    264   u32 wasm_func_idx;
    265   u32 insn_idx;
    266   ObjSymId sym;
    267 } WFuncTableFixup;
    268 
    269 typedef struct WTarget {
    270   CgTarget base;
    271 
    272   Compiler* c;
    273   ObjBuilder* obj;
    274   struct WasmModule* module;
    275 
    276   /* TU-wide: ObjSymId -> (wasm_func_idx + 1); 0 means "not yet a wasm func".
    277    * Lazily grown as ObjSymIds appear via call() or func_begin(). */
    278   u32* sym_to_func;
    279   u32 sym_to_func_cap;
    280   WFunc* funcs;
    281   u32 nfuncs;
    282   u32 funcs_cap;
    283 
    284   /* Per-function state. Reset on func_begin. */
    285   const CGFuncDesc* cur_fn_desc;
    286   /* Most recent SrcLoc the frontend told us about via wasm_set_loc. Used by
    287    * cur_loc so diagnostics attribute to the actual failing statement
    288    * rather than the function-definition location. Zeroed at func_begin;
    289    * a zero line falls back to cur_fn_desc->loc. */
    290   SrcLoc cur_stmt_loc;
    291   u32 cur_func_idx;
    292   struct WasmFunc* cur_func;
    293 
    294   /* SSA Reg -> Wasm local index (0..nparams=params, then locals). 0xffffffffu
    295    * means "not assigned yet". */
    296   u32* reg_to_local;
    297   KitCgTypeId* reg_type;
    298   u8* reg_cls;
    299   u32 reg_cap;
    300 
    301   /* WIR record list for the current function. */
    302   WIR* wir;
    303   u32 nwir;
    304   u32 wir_cap;
    305 
    306   /* Labels minted by label_new. */
    307   WLabel* labels;
    308   u32 nlabels;
    309   u32 labels_cap;
    310 
    311   /* CG scope stack. */
    312   WScope scopes[32];
    313   u32 nscopes;
    314   u32 next_scope_id;
    315 
    316   /* Per-function frame slots. Scalar slots stay as Wasm locals; addressable
    317    * slots are assigned offsets in a downward-growing linear-memory frame. */
    318   WSlot* slots;
    319   u32 nslots;
    320   u32 slots_cap;
    321   u32 frame_size;
    322   u32 frame_align;
    323   u32 frame_base_local;
    324   u32 frame_saved_sp_local;
    325   u8 has_stack_frame;
    326   u8 has_memory;
    327   u8 has_stack_pointer;
    328   u8 cur_has_sret;
    329   u32 stack_pointer_global;
    330   u32 stack_size;
    331   u32 data_end;
    332 
    333   /* Compact section -> linear memory base. Populated lazily in
    334    * wasm_materialize_data; 0xFFFFFFFFu means "not assigned yet". The
    335    * compact layout reserves a small null guard at low memory and walks
    336    * SF_ALLOC sections in id order, giving each section an aligned base. */
    337   u32* section_base;
    338   u32 section_base_cap;
    339   /* Common-symbol (SK_COMMON) -> linear memory base, indexed by ObjSymId.
    340    * Common symbols have section_id == OBJ_SEC_NONE in ObjBuilder; for a
    341    * single-TU final-module emit we lay them out BSS-style after sections.
    342    * 0xFFFFFFFFu = no common base assigned. */
    343   u32* common_base;
    344   u32 common_base_cap;
    345 
    346   /* Deferred symbol-address fixups; see WSymFixup. */
    347   WSymFixup* sym_fixups;
    348   u32 sym_fixups_count;
    349   u32 sym_fixups_cap;
    350 
    351   /* Function-pointer table state. ObjSymIds of every function whose address
    352    * has been taken, in insertion order. The implied table index for entry
    353    * func_table[i] is (i + 1); slot 0 stays unpopulated as a null/trap guard.
    354    * Filled lazily by emit_addr_operand and patched into placeholder
    355    * `i32.const 0` insns at finalize. */
    356   ObjSymId* func_table;
    357   u32 func_table_count;
    358   u32 func_table_cap;
    359   WFuncTableFixup* func_table_fixups;
    360   u32 func_table_fixups_count;
    361   u32 func_table_fixups_cap;
    362   /* Patched into the linear-memory image at apply_data_relocs time for
    363    * R_ABS32 relocations whose target symbol is a function (so a static
    364    * `static fn_t v = &foo;` initializer ends up holding the table index). */
    365   u8 has_func_table;
    366 
    367   /* Per-function aggregate-lowering state. Populated by func_begin and used by
    368    * wasm_param/ret/call to translate sret + byval through the wasm32 BasicCABI
    369    * shape (sret pointer as hidden i32 leading param, byval args as i32
    370    * pointers, callee copy-in into a stack-backed local buffer). */
    371   u32 sret_param_local; /* wasm-local idx of the sret pointer; 0xffffffffu when
    372                            none */
    373   u32 va_ptr_param_local; /* wasm-local idx of the hidden i32 va_ptr trailing
    374                            * param on variadic functions; 0xffffffffu when none.
    375                            * Read by wasm_va_start to seed the va_list. */
    376   u32* param_local_idx;   /* per CG-param idx -> wasm-local idx (0xffffffffu for
    377                              IGNORE) */
    378   u32 param_local_idx_cap;
    379   u32 nparams_cg;     /* count of entries in param_local_idx */
    380   u8 cur_is_variadic; /* current function declared variadic (abi->variadic) */
    381   /* Per-function scratch locals for variadic-call packing. Lazily allocated
    382    * on the first variadic call site; reused across all variadic calls in the
    383    * same function (each call's save/restore window is self-contained, so a
    384    * single pair of locals is safe). 0xffffffffu when not yet allocated. */
    385   u32 varcall_saved_sp_local;
    386   u32 varcall_buf_local;
    387   /* Scratch i32 local used by va_arg to remember the va_list address across
    388    * the load-current-slot / advance-pointer sequence. Lazily allocated on
    389    * first va_arg in the function. 0xffffffffu when not allocated. */
    390   u32 va_arg_tmp_addr_local;
    391   WByvalCopy* byval_copies;
    392   u32 nbyval_copies;
    393   u32 byval_copies_cap;
    394 
    395   /* Body has been terminated unconditionally — skip further insns until the
    396    * next label_place. Lets us emit dead code that the parser may produce
    397    * without breaking wasm validation. */
    398   u8 dead;
    399 } WTarget;
    400 
    401 CgTarget* wasm_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* mc);
    402 WTarget* wasm_emit_target_new(Compiler* c, ObjBuilder* o, MCEmitter* mc);
    403 void wasm_emit_ir_module(WTarget* t, const CgIrModule* module);
    404 void wasm_finalize(CGTarget*);
    405 void wasm_destroy(CGTarget*);
    406 
    407 /* CFG structurer (src/arch/wasm/structure.c). Rewrites the recorded WIR
    408  * list so every free WIR_LABEL becomes the break/continue of a synthetic
    409  * SCOPE_BLOCK / SCOPE_LOOP. Called from emit.c's linearize() before the
    410  * WIR walk; after this returns, br_to_label resolves every jump through
    411  * the existing scope-bound machinery. Labels referenced by WIR_SWITCH
    412  * are left untouched and handled by try_linearize_switch_island. */
    413 void wasm_structurize(WTarget* t);
    414 
    415 #endif