commit 55817243963568fc08d1668e2d8752ffdae1eb5c
parent 164bf2ad58cd1d8878ae1c792f9a9b139de524de
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 28 May 2026 12:02:19 -0700
wasm: ir_emit lowerings for bitfields, aggregates, TLS, and conds
Four independent backend lowerings that all live in src/arch/wasm/ir_emit.c
(plus a small obj_secnames hook):
- CG_IR_BITFIELD_{LOAD,STORE} lower to load + shift/mask + store over
the storage unit, all in i64 arithmetic regardless of storage width
(i64.load{8,16,32}_u zero-extends in, i64.store{8,16,32} truncates
out, uniform 64-bit shift count keeps extraction width-agnostic).
Unblocks the 5 bitfield test/parse cases.
- CG_IR_LOAD/CG_IR_COPY of an aggregate type route through a shared
wasm_ir_emit_agg_move helper that materializes the two endpoint
addresses with addr_of and emits memory.copy between them, instead
of falling through to a scalar load. Unblocks the
call_indirect_*_struct_* and call_large_const_global_struct_byval
cases.
- cmp_branch, switch selector, and if-condition operands now go through
wasm_ir_source_op, so an address-taken local (e.g. the `expected`
out-param of __atomic_compare_exchange) is loaded from memory rather
than read as an undefined wasm local. Unblocks
rv64_atomic_widths_orders.
- Wasm has no thread-local storage model (one linear memory per
instance), so a thread-local is just an ordinary data object:
obj_secname_tdata/tbss name them .tdata/.tbss for CFREE_OBJ_WASM,
and CG_IR_TLS_ADDR_OF lowers to a plain symbol address. Unblocks
6_7_1_03_thread_local_basic and gnu_thread_storage_01.
Diffstat:
2 files changed, 179 insertions(+), 34 deletions(-)
diff --git a/src/arch/wasm/ir_emit.c b/src/arch/wasm/ir_emit.c
@@ -254,6 +254,36 @@ static Operand wasm_ir_addr_op(WasmIrEmitter* e, CgSemOperand in, SrcLoc loc) {
return out;
}
+/* Lower an aggregate move to a memory.copy between two linear-memory homes.
+ * Both `dst` and `src` are lvalue operands (frame slots, indirect `[ptr]`
+ * addressing, or global symbols); copy_bytes wants each endpoint as a
+ * pointer-valued register, so materialize the effective address of each with
+ * addr_of first. `ty` names the aggregate being moved. */
+static void wasm_ir_emit_agg_move(WasmIrEmitter* e, CgSemOperand dst,
+ CgSemOperand src, CfreeCgTypeId ty,
+ SrcLoc loc) {
+ CGTarget* t = (CGTarget*)&e->target->base;
+ AggregateAccess agg;
+ CfreeCgTypeId pty = cg_type_ptr_to(e->target->c, ty);
+ Operand adst = wasm_ir_addr_op(e, dst, loc);
+ Operand asrc = wasm_ir_addr_op(e, src, loc);
+ Operand dreg, sreg;
+ memset(&dreg, 0, sizeof dreg);
+ memset(&sreg, 0, sizeof sreg);
+ dreg.kind = sreg.kind = OPK_REG;
+ dreg.type = sreg.type = pty;
+ dreg.cls = sreg.cls = (u8)RC_INT;
+ dreg.v.reg = wasm_ir_temp_reg(e);
+ sreg.v.reg = wasm_ir_temp_reg(e);
+ wasm_addr_of(t, dreg, adst);
+ wasm_addr_of(t, sreg, asrc);
+ memset(&agg, 0, sizeof agg);
+ agg.type = ty;
+ agg.size = (u32)abi_cg_sizeof(e->target->c->abi, ty);
+ agg.align = (u32)abi_cg_alignof(e->target->c->abi, ty);
+ wasm_copy_bytes(t, dreg, sreg, agg);
+}
+
static CGScope wasm_ir_scope_lookup(WasmIrEmitter* e, CGScope recorded,
SrcLoc loc) {
if ((u32)recorded >= e->scope_map_n || !e->scope_map[recorded])
@@ -359,7 +389,7 @@ static void wasm_ir_emit_switch(WasmIrEmitter* e, const CgIrInst* in) {
const CgIrSwitchAux* aux = (const CgIrSwitchAux*)in->extra.aux;
CGSwitchDesc d;
memset(&d, 0, sizeof d);
- d.selector = wasm_ir_value_op(e, in->opnds[0]);
+ d.selector = wasm_ir_source_op(e, in->opnds[0], in->loc);
d.selector_type = aux->selector_type;
d.default_label = aux->default_label;
d.cases = aux->cases;
@@ -369,6 +399,98 @@ static void wasm_ir_emit_switch(WasmIrEmitter* e, const CgIrInst* in) {
wasm_switch((CGTarget*)&e->target->base, &d);
}
+/* Bitfields have no native wasm insert/extract, so lower to load + shift/mask
+ * + store over the storage unit. All arithmetic runs in i64 regardless of
+ * storage width: the load zero-extends into i64 (i64.load{8,16,32}_u), the
+ * store truncates back (i64.store{8,16,32}), and a uniform 64-bit shift count
+ * keeps the field-extraction math width-agnostic. storage_offset is always 0
+ * here — the frontend folds it into record_addr. */
+#define WASM_BF_REG_BITS 64u
+
+static Operand wasm_ir_temp_i64(WasmIrEmitter* e) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_REG;
+ o.type = builtin_id(CFREE_CG_BUILTIN_I64);
+ o.cls = (u8)RC_INT;
+ o.v.reg = wasm_ir_temp_reg(e);
+ return o;
+}
+
+static Operand wasm_ir_imm_i64(i64 v) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_IMM;
+ o.type = builtin_id(CFREE_CG_BUILTIN_I64);
+ o.cls = (u8)RC_INT;
+ o.v.imm = v;
+ return o;
+}
+
+/* Storage-unit access: i64 value, exactly storage_size bytes wide. */
+static MemAccess wasm_ir_bf_storage_mem(const BitFieldAccess* bf) {
+ MemAccess mem = bf->storage;
+ mem.type = builtin_id(CFREE_CG_BUILTIN_I64);
+ mem.size = bf->storage.size ? bf->storage.size : 4u;
+ return mem;
+}
+
+static void wasm_ir_emit_bitfield_load(WasmIrEmitter* e, const CgIrInst* in) {
+ CGTarget* t = (CGTarget*)&e->target->base;
+ const CgIrBitFieldAux* aux = (const CgIrBitFieldAux*)in->extra.aux;
+ const BitFieldAccess* bf = &aux->access;
+ u32 width = bf->bit_width ? bf->bit_width : 1u;
+ u32 lsb = bf->bit_offset;
+ u32 left = WASM_BF_REG_BITS - lsb - width; /* bits above the field */
+ u32 right = WASM_BF_REG_BITS - width; /* slide field back to bit 0 */
+ Operand addr = wasm_ir_addr_op(e, in->opnds[1], in->loc);
+ Operand val = wasm_ir_temp_i64(e);
+ WasmIrDest d;
+ Operand dst;
+
+ /* Load the storage unit, slide the field to the top of the i64, then back
+ * down — arithmetic shift sign-extends a signed field, logical zero-extends
+ * an unsigned one. */
+ wasm_load(t, val, addr, wasm_ir_bf_storage_mem(bf));
+ if (left) wasm_binop(t, BO_SHL, val, val, wasm_ir_imm_i64((i64)left));
+ if (right)
+ wasm_binop(t, bf->signed_ ? BO_SHR_S : BO_SHR_U, val, val,
+ wasm_ir_imm_i64((i64)right));
+ dst = wasm_ir_dest_op(e, in->opnds[0], &d);
+ /* Narrow to the field's wasm value type; a no-op copy when dst is i64. */
+ wasm_convert(t, CV_TRUNC, dst, val);
+ wasm_ir_dest_finish(e, &d);
+}
+
+static void wasm_ir_emit_bitfield_store(WasmIrEmitter* e, const CgIrInst* in) {
+ CGTarget* t = (CGTarget*)&e->target->base;
+ const CgIrBitFieldAux* aux = (const CgIrBitFieldAux*)in->extra.aux;
+ const BitFieldAccess* bf = &aux->access;
+ u32 width = bf->bit_width ? bf->bit_width : 1u;
+ u32 lsb = bf->bit_offset;
+ u64 ones = (width >= WASM_BF_REG_BITS) ? ~(u64)0 : (((u64)1 << width) - 1u);
+ u64 mask = ones << lsb;
+ MemAccess mem = wasm_ir_bf_storage_mem(bf);
+ Operand addr = wasm_ir_addr_op(e, in->opnds[0], in->loc);
+ Operand cur = wasm_ir_temp_i64(e);
+
+ /* Read-modify-write: clear the field bits, OR in the masked/shifted value. */
+ wasm_load(t, cur, addr, mem);
+ wasm_binop(t, BO_AND, cur, cur, wasm_ir_imm_i64((i64)~mask));
+ if (in->opnds[1].kind == OPK_IMM) {
+ u64 v = ((u64)in->opnds[1].v.imm & ones) << lsb;
+ wasm_binop(t, BO_OR, cur, cur, wasm_ir_imm_i64((i64)v));
+ } else {
+ Operand src = wasm_ir_source_op(e, in->opnds[1], in->loc);
+ Operand staged = wasm_ir_temp_i64(e);
+ wasm_convert(t, CV_ZEXT, staged, src); /* widen field value to i64 */
+ wasm_binop(t, BO_AND, staged, staged, wasm_ir_imm_i64((i64)ones));
+ if (lsb) wasm_binop(t, BO_SHL, staged, staged, wasm_ir_imm_i64((i64)lsb));
+ wasm_binop(t, BO_OR, cur, cur, staged);
+ }
+ wasm_store(t, addr, cur, mem);
+}
+
static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f,
const CgIrInst* in) {
CGTarget* t = (CGTarget*)&e->target->base;
@@ -397,28 +519,8 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f,
WasmIrDest d;
Operand src, dst;
if (wasm_ir_is_aggregate(e->target, in->opnds[0].type)) {
- /* Aggregate value copy: lower to memory.copy between the two homes.
- * copy_bytes wants both endpoints as pointer-valued registers, so
- * materialize each slot address with addr_of first. */
- AggregateAccess agg;
- CfreeCgTypeId pty = cg_type_ptr_to(e->target->c, in->opnds[0].type);
- Operand adst = wasm_ir_addr_op(e, in->opnds[0], in->loc);
- Operand asrc = wasm_ir_addr_op(e, in->opnds[1], in->loc);
- Operand dreg, sreg;
- memset(&dreg, 0, sizeof dreg);
- memset(&sreg, 0, sizeof sreg);
- dreg.kind = sreg.kind = OPK_REG;
- dreg.type = sreg.type = pty;
- dreg.cls = sreg.cls = (u8)RC_INT;
- dreg.v.reg = wasm_ir_temp_reg(e);
- sreg.v.reg = wasm_ir_temp_reg(e);
- wasm_addr_of(t, dreg, adst);
- wasm_addr_of(t, sreg, asrc);
- memset(&agg, 0, sizeof agg);
- agg.type = in->opnds[0].type;
- agg.size = (u32)abi_cg_sizeof(e->target->c->abi, in->opnds[0].type);
- agg.align = (u32)abi_cg_alignof(e->target->c->abi, in->opnds[0].type);
- wasm_copy_bytes(t, dreg, sreg, agg);
+ wasm_ir_emit_agg_move(e, in->opnds[0], in->opnds[1], in->opnds[0].type,
+ in->loc);
return;
}
src = wasm_ir_source_op(e, in->opnds[1], in->loc);
@@ -429,8 +531,18 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f,
}
case CG_IR_LOAD: {
WasmIrDest d;
- Operand addr = wasm_ir_addr_op(e, in->opnds[1], in->loc);
- Operand dst = wasm_ir_dest_op(e, in->opnds[0], &d);
+ Operand addr, dst;
+ if (wasm_ir_is_aggregate(e->target, in->opnds[0].type)) {
+ /* Aggregate load: the source operand is the address of the aggregate
+ * (an indirect `[ptr]` or a global symbol), so its effective address
+ * is the source home. Lower to memory.copy into the destination's
+ * home rather than a scalar wasm load. */
+ wasm_ir_emit_agg_move(e, in->opnds[0], in->opnds[1], in->opnds[0].type,
+ in->loc);
+ return;
+ }
+ addr = wasm_ir_addr_op(e, in->opnds[1], in->loc);
+ dst = wasm_ir_dest_op(e, in->opnds[0], &d);
wasm_load(t, dst, addr, in->extra.mem);
wasm_ir_dest_finish(e, &d);
return;
@@ -449,9 +561,25 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f,
wasm_ir_dest_finish(e, &d);
return;
}
- case CG_IR_TLS_ADDR_OF:
- wasm_ir_fail(e, in->loc, "wasm target: tls_addr_of not yet implemented");
+ case CG_IR_TLS_ADDR_OF: {
+ /* Wasm has no thread-local storage: a module instance owns one linear
+ * memory, so a thread-local resolves to a fixed data address. Lower to
+ * the symbol's (addend-adjusted) linear-memory address, exactly like a
+ * non-TLS addr_of of a global. */
+ const CgIrTlsAux* aux = (const CgIrTlsAux*)in->extra.aux;
+ WasmIrDest d;
+ Operand src;
+ Operand dst;
+ memset(&src, 0, sizeof src);
+ src.kind = OPK_GLOBAL;
+ src.type = in->opnds[0].type;
+ src.v.global.sym = aux->sym;
+ src.v.global.addend = aux->addend;
+ dst = wasm_ir_dest_op(e, in->opnds[0], &d);
+ wasm_addr_of(t, dst, src);
+ wasm_ir_dest_finish(e, &d);
return;
+ }
case CG_IR_AGG_COPY: {
const CgIrAggAux* aux = (const CgIrAggAux*)in->extra.aux;
Operand dst = wasm_ir_source_op(e, in->opnds[0], in->loc);
@@ -467,12 +595,10 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f,
return;
}
case CG_IR_BITFIELD_LOAD:
- wasm_ir_fail(e, in->loc,
- "wasm target: bitfield_load not yet implemented");
+ wasm_ir_emit_bitfield_load(e, in);
return;
case CG_IR_BITFIELD_STORE:
- wasm_ir_fail(e, in->loc,
- "wasm target: bitfield_store not yet implemented");
+ wasm_ir_emit_bitfield_store(e, in);
return;
case CG_IR_BINOP: {
WasmIrDest d;
@@ -519,8 +645,12 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f,
return;
case CG_IR_CMP_BRANCH: {
const CgIrCmpBranchAux* aux = (const CgIrCmpBranchAux*)in->extra.aux;
- wasm_cmp_branch(t, aux->op, wasm_ir_value_op(e, in->opnds[0]),
- wasm_ir_value_op(e, in->opnds[1]), aux->target);
+ /* Use source_op, not value_op: a compared operand may be an
+ * address-taken local that lives in linear memory (e.g. the `expected`
+ * out-param of __atomic_compare_exchange), which must be loaded rather
+ * than read as a bare wasm local. */
+ wasm_cmp_branch(t, aux->op, wasm_ir_source_op(e, in->opnds[0], in->loc),
+ wasm_ir_source_op(e, in->opnds[1], in->loc), aux->target);
return;
}
case CG_IR_SWITCH:
@@ -552,7 +682,12 @@ static void wasm_ir_emit_inst(WasmIrEmitter* e, const CgIrFunc* f,
d.break_label = aux->desc.break_label;
d.continue_label = aux->desc.continue_label;
d.result_type = aux->desc.result_type;
- d.cond = wasm_ir_value_op(e, aux->desc.cond);
+ /* Only SCOPE_IF consumes cond; source_op (not value_op) so an
+ * address-taken local condition is loaded from memory, not read as a
+ * bare wasm local. */
+ d.cond = aux->desc.kind == SCOPE_IF
+ ? wasm_ir_source_op(e, aux->desc.cond, in->loc)
+ : wasm_ir_value_op(e, aux->desc.cond);
wasm_ir_bind_scope(e, aux->scope, wasm_scope_begin(t, &d), in->loc);
return;
}
diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c
@@ -113,6 +113,12 @@ Sym obj_secname_tdata(Compiler* c) {
/* MSVC `.tls$` convention; linker concatenates `.tls$*` sorted
* by suffix. See doc/WINDOWS.md §1.6. */
return pool_intern_slice(c->global, SLICE_LIT(".tls$"));
+ case CFREE_OBJ_WASM:
+ /* Wasm has no thread-local storage model: a module instance owns a
+ * single linear memory, so a thread-local is just an ordinary
+ * data object. Keep the `.tdata` name (laid out like `.data`) and
+ * lower tls_addr_of to a plain symbol address. */
+ return pool_intern_slice(c->global, SLICE_LIT(".tdata"));
default:
return secname_panic_unimpl(c, ".tdata");
}
@@ -128,6 +134,10 @@ Sym obj_secname_tbss(Compiler* c) {
/* sorted-alphabetically-last so it falls at the tail of the TLS
* image's zero-fill region. See doc/WINDOWS.md §1.6. */
return pool_intern_slice(c->global, SLICE_LIT(".tls$ZZZ"));
+ case CFREE_OBJ_WASM:
+ /* See obj_secname_tdata: wasm thread-locals are ordinary
+ * (zero-filled) data. */
+ return pool_intern_slice(c->global, SLICE_LIT(".tbss"));
default:
return secname_panic_unimpl(c, ".tbss");
}