kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 55817243963568fc08d1668e2d8752ffdae1eb5c
parent 164bf2ad58cd1d8878ae1c792f9a9b139de524de
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 28 May 2026 12:02:19 -0700

wasm: ir_emit lowerings for bitfields, aggregates, TLS, and conds

Four independent backend lowerings that all live in src/arch/wasm/ir_emit.c
(plus a small obj_secnames hook):

- CG_IR_BITFIELD_{LOAD,STORE} lower to load + shift/mask + store over
  the storage unit, all in i64 arithmetic regardless of storage width
  (i64.load{8,16,32}_u zero-extends in, i64.store{8,16,32} truncates
  out, uniform 64-bit shift count keeps extraction width-agnostic).
  Unblocks the 5 bitfield test/parse cases.

- CG_IR_LOAD/CG_IR_COPY of an aggregate type route through a shared
  wasm_ir_emit_agg_move helper that materializes the two endpoint
  addresses with addr_of and emits memory.copy between them, instead
  of falling through to a scalar load. Unblocks the
  call_indirect_*_struct_* and call_large_const_global_struct_byval
  cases.

- cmp_branch, switch selector, and if-condition operands now go through
  wasm_ir_source_op, so an address-taken local (e.g. the `expected`
  out-param of __atomic_compare_exchange) is loaded from memory rather
  than read as an undefined wasm local. Unblocks
  rv64_atomic_widths_orders.

- Wasm has no thread-local storage model (one linear memory per
  instance), so a thread-local is just an ordinary data object:
  obj_secname_tdata/tbss name them .tdata/.tbss for CFREE_OBJ_WASM,
  and CG_IR_TLS_ADDR_OF lowers to a plain symbol address. Unblocks
  6_7_1_03_thread_local_basic and gnu_thread_storage_01.

Diffstat:
Msrc/arch/wasm/ir_emit.c | 203+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc/obj/obj_secnames.c | 10++++++++++
2 files changed, 179 insertions(+), 34 deletions(-)

diff --git a/src/arch/wasm/ir_emit.c b/src/arch/wasm/ir_emit.c @@ -254,6 +254,36 @@ static Operand wasm_ir_addr_op(WasmIrEmitter* e, CgSemOperand in, SrcLoc loc) { return out; } +/* Lower an aggregate move to a memory.copy between two linear-memory homes. + * Both `dst` and `src` are lvalue operands (frame slots, indirect `[ptr]` + * addressing, or global symbols); copy_bytes wants each endpoint as a + * pointer-valued register, so materialize the effective address of each with + * addr_of first. `ty` names the aggregate being moved. */ +static void wasm_ir_emit_agg_move(WasmIrEmitter* e, CgSemOperand dst, + CgSemOperand src, CfreeCgTypeId ty, + SrcLoc loc) { + CGTarget* t = (CGTarget*)&e->target->base; + AggregateAccess agg; + CfreeCgTypeId pty = cg_type_ptr_to(e->target->c, ty); + Operand adst = wasm_ir_addr_op(e, dst, loc); + Operand asrc = wasm_ir_addr_op(e, src, loc); + Operand dreg, sreg; + memset(&dreg, 0, sizeof dreg); + memset(&sreg, 0, sizeof sreg); + dreg.kind = sreg.kind = OPK_REG; + dreg.type = sreg.type = pty; + dreg.cls = sreg.cls = (u8)RC_INT; + dreg.v.reg = wasm_ir_temp_reg(e); + sreg.v.reg = wasm_ir_temp_reg(e); + wasm_addr_of(t, dreg, adst); + wasm_addr_of(t, sreg, asrc); + memset(&agg, 0, sizeof agg); + agg.type = ty; + agg.size = (u32)abi_cg_sizeof(e->target->c->abi, ty); + agg.align = (u32)abi_cg_alignof(e->target->c->abi, ty); + wasm_copy_bytes(t, dreg, sreg, agg); +} + static CGScope wasm_ir_scope_lookup(WasmIrEmitter* e, CGScope recorded, SrcLoc loc) { if ((u32)recorded >= e->scope_map_n || !e->scope_map[recorded]) @@ -359,7 +389,7 @@ static void wasm_ir_emit_switch(WasmIrEmitter* e, const CgIrInst* in) { const CgIrSwitchAux* aux = (const CgIrSwitchAux*)in->extra.aux; CGSwitchDesc d; memset(&d, 0, sizeof d); - d.selector = wasm_ir_value_op(e, in->opnds[0]); + d.selector = wasm_ir_source_op(e, in->opnds[0], in->loc); d.selector_type = aux->selector_type; d.default_label = aux->default_label; d.cases = aux->cases; @@ -369,6 +399,98 @@ static void wasm_ir_emit_switch(WasmIrEmitter* e, const CgIrInst* in) { wasm_switch((CGTarget*)&e->target->base, &d); } +/* Bitfields have no native wasm insert/extract, so lower to load + shift/mask + * + store over the storage unit. All arithmetic runs in i64 regardless of + * storage width: the load zero-extends into i64 (i64.load{8,16,32}_u), the + * store truncates back (i64.store{8,16,32}), and a uniform 64-bit shift count + * keeps the field-extraction math width-agnostic. storage_offset is always 0 + * here — the frontend folds it into record_addr. */ +#define WASM_BF_REG_BITS 64u + +static Operand wasm_ir_temp_i64(WasmIrEmitter* e) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_REG; + o.type = builtin_id(CFREE_CG_BUILTIN_I64); + o.cls = (u8)RC_INT; + o.v.reg = wasm_ir_temp_reg(e); + return o; +} + +static Operand wasm_ir_imm_i64(i64 v) { + Operand o; + memset(&o, 0, sizeof o); + o.kind = OPK_IMM; + o.type = builtin_id(CFREE_CG_BUILTIN_I64); + o.cls = (u8)RC_INT; + o.v.imm = v; + return o; +} + +/* Storage-unit access: i64 value, exactly storage_size bytes wide. */ +static MemAccess wasm_ir_bf_storage_mem(const BitFieldAccess* bf) { + MemAccess mem = bf->storage; + mem.type = builtin_id(CFREE_CG_BUILTIN_I64); + mem.size = bf->storage.size ? bf->storage.size : 4u; + return mem; +} + +static void wasm_ir_emit_bitfield_load(WasmIrEmitter* e, const CgIrInst* in) { + CGTarget* t = (CGTarget*)&e->target->base; + const CgIrBitFieldAux* aux = (const CgIrBitFieldAux*)in->extra.aux; + const BitFieldAccess* bf = &aux->access; + u32 width = bf->bit_width ? bf->bit_width : 1u; + u32 lsb = bf->bit_offset; + u32 left = WASM_BF_REG_BITS - lsb - width; /* bits above the field */ + u32 right = WASM_BF_REG_BITS - width; /* slide field back to bit 0 */ + Operand addr = wasm_ir_addr_op(e, in->opnds[1], in->loc); + Operand val = wasm_ir_temp_i64(e); + WasmIrDest d; + Operand dst; + + /* Load the storage unit, slide the field to the top of the i64, then back + * down — arithmetic shift sign-extends a signed field, logical zero-extends + * an unsigned one. */ + wasm_load(t, val, addr, wasm_ir_bf_storage_mem(bf)); + if (left) wasm_binop(t, BO_SHL, val, val, wasm_ir_imm_i64((i64)left)); + if (right) + wasm_binop(t, bf->signed_ ? BO_SHR_S : BO_SHR_U, val, val, + wasm_ir_imm_i64((i64)right)); + dst = wasm_ir_dest_op(e, in->opnds[0], &d); + /* Narrow to the field's wasm value type; a no-op copy when dst is i64. */ + wasm_convert(t, CV_TRUNC, dst, val); + wasm_ir_dest_finish(e, &d); +} + +static void wasm_ir_emit_bitfield_store(WasmIrEmitter* e, const CgIrInst* in) { + CGTarget* t = (CGTarget*)&e->target->base; + const CgIrBitFieldAux* aux = (const CgIrBitFieldAux*)in->extra.aux; + const BitFieldAccess* bf = &aux->access; + u32 width = bf->bit_width ? bf->bit_width : 1u; + u32 lsb = bf->bit_offset; + u64 ones = (width >= WASM_BF_REG_BITS) ? ~(u64)0 : (((u64)1 << width) - 1u); + u64 mask = ones << lsb; + MemAccess mem = wasm_ir_bf_storage_mem(bf); + Operand addr = wasm_ir_addr_op(e, in->opnds[0], in->loc); + Operand cur = wasm_ir_temp_i64(e); + + /* Read-modify-write: clear the field bits, OR in the masked/shifted value. */ + wasm_load(t, cur, addr, mem); + wasm_binop(t, BO_AND, cur, cur, wasm_ir_imm_i64((i64)~mask)); + if (in->opnds[1].kind == OPK_IMM) { + u64 v = ((u64)in->opnds[1].v.imm & ones) << lsb; + wasm_binop(t, BO_OR, cur, cur, wasm_ir_imm_i64((i64)v)); + } else { + Operand src = wasm_ir_source_op(e, in->opnds[1], in->loc); + Operand staged = wasm_ir_temp_i64(e); + wasm_convert(t, CV_ZEXT, staged, src); /* widen field value to i64 */ + wasm_binop(t, BO_AND, staged, staged, wasm_ir_imm_i64((i64)ones)); + if (lsb) wasm_binop(t, BO_SHL, staged, staged, wasm_ir_imm_i64((i64)lsb)); + wasm_binop(t, BO_OR, cur, cur, staged); + } + wasm_store(t, addr, cur, mem); +} + static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f, const CgIrInst* in) { CGTarget* t = (CGTarget*)&e->target->base; @@ -397,28 +519,8 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f, WasmIrDest d; Operand src, dst; if (wasm_ir_is_aggregate(e->target, in->opnds[0].type)) { - /* Aggregate value copy: lower to memory.copy between the two homes. - * copy_bytes wants both endpoints as pointer-valued registers, so - * materialize each slot address with addr_of first. */ - AggregateAccess agg; - CfreeCgTypeId pty = cg_type_ptr_to(e->target->c, in->opnds[0].type); - Operand adst = wasm_ir_addr_op(e, in->opnds[0], in->loc); - Operand asrc = wasm_ir_addr_op(e, in->opnds[1], in->loc); - Operand dreg, sreg; - memset(&dreg, 0, sizeof dreg); - memset(&sreg, 0, sizeof sreg); - dreg.kind = sreg.kind = OPK_REG; - dreg.type = sreg.type = pty; - dreg.cls = sreg.cls = (u8)RC_INT; - dreg.v.reg = wasm_ir_temp_reg(e); - sreg.v.reg = wasm_ir_temp_reg(e); - wasm_addr_of(t, dreg, adst); - wasm_addr_of(t, sreg, asrc); - memset(&agg, 0, sizeof agg); - agg.type = in->opnds[0].type; - agg.size = (u32)abi_cg_sizeof(e->target->c->abi, in->opnds[0].type); - agg.align = (u32)abi_cg_alignof(e->target->c->abi, in->opnds[0].type); - wasm_copy_bytes(t, dreg, sreg, agg); + wasm_ir_emit_agg_move(e, in->opnds[0], in->opnds[1], in->opnds[0].type, + in->loc); return; } src = wasm_ir_source_op(e, in->opnds[1], in->loc); @@ -429,8 +531,18 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f, } case CG_IR_LOAD: { WasmIrDest d; - Operand addr = wasm_ir_addr_op(e, in->opnds[1], in->loc); - Operand dst = wasm_ir_dest_op(e, in->opnds[0], &d); + Operand addr, dst; + if (wasm_ir_is_aggregate(e->target, in->opnds[0].type)) { + /* Aggregate load: the source operand is the address of the aggregate + * (an indirect `[ptr]` or a global symbol), so its effective address + * is the source home. Lower to memory.copy into the destination's + * home rather than a scalar wasm load. */ + wasm_ir_emit_agg_move(e, in->opnds[0], in->opnds[1], in->opnds[0].type, + in->loc); + return; + } + addr = wasm_ir_addr_op(e, in->opnds[1], in->loc); + dst = wasm_ir_dest_op(e, in->opnds[0], &d); wasm_load(t, dst, addr, in->extra.mem); wasm_ir_dest_finish(e, &d); return; @@ -449,9 +561,25 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f, wasm_ir_dest_finish(e, &d); return; } - case CG_IR_TLS_ADDR_OF: - wasm_ir_fail(e, in->loc, "wasm target: tls_addr_of not yet implemented"); + case CG_IR_TLS_ADDR_OF: { + /* Wasm has no thread-local storage: a module instance owns one linear + * memory, so a thread-local resolves to a fixed data address. Lower to + * the symbol's (addend-adjusted) linear-memory address, exactly like a + * non-TLS addr_of of a global. */ + const CgIrTlsAux* aux = (const CgIrTlsAux*)in->extra.aux; + WasmIrDest d; + Operand src; + Operand dst; + memset(&src, 0, sizeof src); + src.kind = OPK_GLOBAL; + src.type = in->opnds[0].type; + src.v.global.sym = aux->sym; + src.v.global.addend = aux->addend; + dst = wasm_ir_dest_op(e, in->opnds[0], &d); + wasm_addr_of(t, dst, src); + wasm_ir_dest_finish(e, &d); return; + } case CG_IR_AGG_COPY: { const CgIrAggAux* aux = (const CgIrAggAux*)in->extra.aux; Operand dst = wasm_ir_source_op(e, in->opnds[0], in->loc); @@ -467,12 +595,10 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f, return; } case CG_IR_BITFIELD_LOAD: - wasm_ir_fail(e, in->loc, - "wasm target: bitfield_load not yet implemented"); + wasm_ir_emit_bitfield_load(e, in); return; case CG_IR_BITFIELD_STORE: - wasm_ir_fail(e, in->loc, - "wasm target: bitfield_store not yet implemented"); + wasm_ir_emit_bitfield_store(e, in); return; case CG_IR_BINOP: { WasmIrDest d; @@ -519,8 +645,12 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f, return; case CG_IR_CMP_BRANCH: { const CgIrCmpBranchAux* aux = (const CgIrCmpBranchAux*)in->extra.aux; - wasm_cmp_branch(t, aux->op, wasm_ir_value_op(e, in->opnds[0]), - wasm_ir_value_op(e, in->opnds[1]), aux->target); + /* Use source_op, not value_op: a compared operand may be an + * address-taken local that lives in linear memory (e.g. the `expected` + * out-param of __atomic_compare_exchange), which must be loaded rather + * than read as a bare wasm local. */ + wasm_cmp_branch(t, aux->op, wasm_ir_source_op(e, in->opnds[0], in->loc), + wasm_ir_source_op(e, in->opnds[1], in->loc), aux->target); return; } case CG_IR_SWITCH: @@ -552,7 +682,12 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f, d.break_label = aux->desc.break_label; d.continue_label = aux->desc.continue_label; d.result_type = aux->desc.result_type; - d.cond = wasm_ir_value_op(e, aux->desc.cond); + /* Only SCOPE_IF consumes cond; source_op (not value_op) so an + * address-taken local condition is loaded from memory, not read as a + * bare wasm local. */ + d.cond = aux->desc.kind == SCOPE_IF + ? wasm_ir_source_op(e, aux->desc.cond, in->loc) + : wasm_ir_value_op(e, aux->desc.cond); wasm_ir_bind_scope(e, aux->scope, wasm_scope_begin(t, &d), in->loc); return; } diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c @@ -113,6 +113,12 @@ Sym obj_secname_tdata(Compiler* c) { /* MSVC `.tls$` convention; linker concatenates `.tls$*` sorted * by suffix. See doc/WINDOWS.md §1.6. */ return pool_intern_slice(c->global, SLICE_LIT(".tls$")); + case CFREE_OBJ_WASM: + /* Wasm has no thread-local storage model: a module instance owns a + * single linear memory, so a thread-local is just an ordinary + * data object. Keep the `.tdata` name (laid out like `.data`) and + * lower tls_addr_of to a plain symbol address. */ + return pool_intern_slice(c->global, SLICE_LIT(".tdata")); default: return secname_panic_unimpl(c, ".tdata"); } @@ -128,6 +134,10 @@ Sym obj_secname_tbss(Compiler* c) { /* sorted-alphabetically-last so it falls at the tail of the TLS * image's zero-fill region. See doc/WINDOWS.md §1.6. */ return pool_intern_slice(c->global, SLICE_LIT(".tls$ZZZ")); + case CFREE_OBJ_WASM: + /* See obj_secname_tdata: wasm thread-locals are ordinary + * (zero-filled) data. */ + return pool_intern_slice(c->global, SLICE_LIT(".tbss")); default: return secname_panic_unimpl(c, ".tbss"); }