commit ed3b81da82f373155f1790929e3c7a7d96cf2b62
parent e25cbf0f256f656ae48652f76b1e757ad194564b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 14:23:42 -0700
test/cg: register Groups D, E, F (compare/branch, conv, memory)
Cases compile and link against the same CGTarget surface the parser
will drive; oracles are exit codes, fixed before the AArch64 backend
implements cmp/cmp_branch/scope_*, convert (all ConvKinds), and the
memory ops (copy_bytes/set_bytes/bitfield_*).
Diffstat:
| M | test/cg/CORPUS.md | | | 75 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
| M | test/cg/harness/cases.c | | | 891 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 963 insertions(+), 3 deletions(-)
diff --git a/test/cg/CORPUS.md b/test/cg/CORPUS.md
@@ -88,13 +88,82 @@ materialization, slot allocation, and call lowering use the live
| `c11_shr_signed` | ★ | `-16 >>(s) 2` | 252 (= -4 & 0xff) |
| `c12_imul_i64` | ★ | i64 `7 * 6` | 42 |
+## Group D — compare and branch
+
+Both arms of `cmp` (materializes 0/1 in a GPR) and `cmp_branch` (fused
+test+branch) plus the structured-CFG ops `scope_*`. `cmp` cases return
+the materialized 0/1 directly; `cmp_branch` cases return distinct
+sentinels from the taken vs. fallthrough paths so the oracle can tell
+them apart.
+
+| Case | Status | Body | Expected |
+|---|---|---|---|
+| `d01_cmp_eq_true` | · | `cmp(EQ, 5, 5)` materialize → 1 | 1 |
+| `d02_cmp_eq_false` | · | `cmp(EQ, 5, 6)` → 0 | 0 |
+| `d03_cmp_ne` | · | `cmp(NE, 5, 6)` → 1 | 1 |
+| `d04_cmp_lt_signed` | · | `cmp(LT_S, -1, 1)` → 1 | 1 |
+| `d05_cmp_lt_unsigned` | · | `cmp(LT_U, 0xFFFFFFFFu, 1u)` → 0 (signedness in op, not Type) | 0 |
+| `d06_cmp_ge_signed` | · | `cmp(GE_S, 5, 5)` → 1 (boundary on inclusive ops) | 1 |
+| `d07_cmp_branch_taken` | · | `cmp_branch(EQ, 7, 7) → L`; landing pad returns 42 | 42 |
+| `d08_cmp_branch_not_taken` | · | `cmp_branch(EQ, 5, 6) → L`; fallthrough returns 33 | 33 |
+| `d09_cmp_branch_lt_signed` | · | `cmp_branch(LT_S, -3, 0) → L`; landing pad returns 9 | 9 |
+| `d10_jump` | · | unconditional `jump L`; early ret is dead | 5 |
+| `d11_scope_if_true` | · | `int x=99; if(1) x=33; return x;` | 33 |
+| `d12_scope_if_false` | · | `int x=99; if(0) x=33; return x;` | 99 |
+| `d13_scope_if_else` | · | `if(0) x=10; else x=7; return x;` (exercises `scope_else`) | 7 |
+
+## Group E — conversions
+
+One `ConvKind` per case, plus the boundary widths the AArch64 backend
+selects between (`UXTB`/`SXTB` vs `UXTH`/`SXTH` vs `UBFX`/`SBFX`,
+32→64 sign- vs zero-extend). FP cases all funnel back through
+`CV_FTOI_S` so the runner sees an int exit code.
+
+| Case | Status | Body | Expected |
+|---|---|---|---|
+| `e01_sext_i8_i32` | · | `sext (i8)-1 → i32` = 0xFFFFFFFF; low 8 = 255 | 255 |
+| `e02_zext_u8_i32` | · | `zext (u8)0xFF → i32` = 0x000000FF; low 8 = 255 | 255 |
+| `e03_sext_i16_i32` | · | `sext (i16)-1000 → i32` = 0xFFFFFC18; low 8 = 0x18 = 24 | 24 |
+| `e04_zext_u16_i32` | · | `zext (u16)0xABCD → i32` = 0x0000ABCD; low 8 = 0xCD = 205 | 205 |
+| `e05_zext_u32_i64` | · | `zext (u32)0xFFFFFFFF → i64`; low 32 = 0xFFFFFFFF; low 8 = 255 | 255 |
+| `e06_sext_i32_i64` | · | `sext (i32)-1 → i64` = -1; low 32 = 0xFFFFFFFF; low 8 = 255 | 255 |
+| `e07_trunc_i64_i32` | · | `trunc 0x100000080 → i32` = 0x80 | 128 |
+| `e08_trunc_i32_i8` | · | `trunc 0x1FF → i8` = 0xFF; returned as u8 | 255 |
+| `e09_itof_s_i32_f32` | · | `(i32)7 → f32 7.0 → ftoi_s` round-trip | 7 |
+| `e10_itof_u_u32_f64` | · | `(u32)100 → f64 100.0 → ftoi_s` cross-width | 100 |
+| `e11_ftoi_s_neg` | · | `ftoi_s(-1.5f) = -1` (truncate toward zero); low 8 = 255 | 255 |
+| `e12_ftoi_u_pos` | · | `ftoi_u(200.7f) = 200u` | 200 |
+| `e13_fext_f32_f64` | · | `fext 3.5f → 3.5 → ftoi_s` → 3 | 3 |
+| `e14_ftrunc_f64_f32` | · | `ftrunc 7.875 → 7.875f → ftoi_s` → 7 | 7 |
+| `e15_bitcast_i32_f32` | · | `bitcast 0x40A00000 → f32 5.0f → ftoi_s` (same-size cross-class) | 5 |
+
+## Group F — memory (loads/stores beyond locals)
+
+Group B already exercises basic load/store of an i32 local. Group F
+pushes the surface: every scalar width, FP load/store, indirect
+non-zero offsets, store-from-IMM vs store-from-REG, `copy_bytes`,
+`set_bytes`, volatile, and the bitfield methods.
+
+| Case | Status | Body | Expected |
+|---|---|---|---|
+| `f01_load_store_i8` | · | local u8; store IMM 200; load; return | 200 |
+| `f02_load_store_i16` | · | local i16; store 0x1234; load; low 8 = 0x34 | 52 |
+| `f03_load_store_i64` | · | local i64; store 0x1_0000_0042; runner reads w0 = 0x42 | 66 |
+| `f04_load_store_f32` | · | local f32; store FP reg = 7.5f; load; ftoi_s | 7 |
+| `f05_load_store_f64` | · | local f64; store FP reg = 3.25; load; ftoi_s | 3 |
+| `f06_indirect_nonzero_offset` | · | i64 local addr-taken; store i32 at +0 (sentinel) and +4 | 42 |
+| `f07_store_reg` | · | store from REG (vs IMM in b04) into a local i32; reload | 17 |
+| `f08_copy_bytes` | · | `copy_bytes(dst, src, Pt {10,32})`; sum dst.a+dst.b | 42 |
+| `f09_set_bytes_zero` | · | `set_bytes(0)` over an i32 buffer; load → 0 | 0 |
+| `f10_set_bytes_ff` | · | `set_bytes(0xFF)` over an i32 buffer; load = 0xFFFFFFFF; low 8 | 255 |
+| `f11_volatile_rw` | · | b04 body with `MF_VOLATILE` on both store and load | 42 |
+| `f12_bitfield_unsigned` | · | `{u: 5}` at bit_offset=3; store 21; load (zero-extend) | 21 |
+| `f13_bitfield_signed` | · | `{s: 5}` at bit_offset=0; store -1; load sign-extends; low 8 | 255 |
+
## Deferred groups
| Group | Theme |
|---|---|
-| D | compare and branch |
-| E | conversions |
-| F | memory (loads/stores beyond locals) |
| G | calls (beyond the direct-call path Group B exercises) |
| H | control flow |
| I | alloca |
diff --git a/test/cg/harness/cases.c b/test/cg/harness/cases.c
@@ -710,6 +710,850 @@ static void build_c12_imul_i64(CgTestCtx* ctx)
}
/* ============================================================
+ * Group D: compare and branch
+ * ============================================================ */
+
+/* d01_cmp_eq_true — cmp materializes 0/1; (5 == 5) → 1. */
+static void build_d01_cmp_eq_true(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg a = T->alloc_reg(T, RC_INT, I32);
+ Reg b = T->alloc_reg(T, RC_INT, I32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(a, I32), 5);
+ T->load_imm(T, REG_op(b, I32), 5);
+ T->cmp(T, CMP_EQ, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* d02_cmp_eq_false — (5 == 6) → 0. */
+static void build_d02_cmp_eq_false(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg a = T->alloc_reg(T, RC_INT, I32);
+ Reg b = T->alloc_reg(T, RC_INT, I32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(a, I32), 5);
+ T->load_imm(T, REG_op(b, I32), 6);
+ T->cmp(T, CMP_EQ, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* d03_cmp_ne — (5 != 6) → 1. */
+static void build_d03_cmp_ne(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg a = T->alloc_reg(T, RC_INT, I32);
+ Reg b = T->alloc_reg(T, RC_INT, I32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(a, I32), 5);
+ T->load_imm(T, REG_op(b, I32), 6);
+ T->cmp(T, CMP_NE, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* d04_cmp_lt_signed — (-1 < 1) signed → 1. */
+static void build_d04_cmp_lt_signed(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg a = T->alloc_reg(T, RC_INT, I32);
+ Reg b = T->alloc_reg(T, RC_INT, I32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(a, I32), -1);
+ T->load_imm(T, REG_op(b, I32), 1);
+ T->cmp(T, CMP_LT_S, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* d05_cmp_lt_unsigned — same bit patterns as d04 but unsigned: 0xFFFFFFFF
+ * is huge, so (0xFFFFFFFF < 1) → 0. Signedness lives in CmpOp, not Type. */
+static void build_d05_cmp_lt_unsigned(CgTestCtx* ctx)
+{
+ const Type* U32 = T_u32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, U32);
+ CGTarget* T = ctx->target;
+ Reg a = T->alloc_reg(T, RC_INT, U32);
+ Reg b = T->alloc_reg(T, RC_INT, U32);
+ Reg d = T->alloc_reg(T, RC_INT, U32);
+ T->load_imm(T, REG_op(a, U32), -1);
+ T->load_imm(T, REG_op(b, U32), 1);
+ T->cmp(T, CMP_LT_U, REG_op(d, U32), REG_op(a, U32), REG_op(b, U32));
+ cgtest_ret_reg(tf, d, U32);
+ cgtest_end(tf);
+}
+
+/* d06_cmp_ge_signed — boundary: (5 >= 5) → 1 (LE/GE families include eq). */
+static void build_d06_cmp_ge_signed(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg a = T->alloc_reg(T, RC_INT, I32);
+ Reg b = T->alloc_reg(T, RC_INT, I32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(a, I32), 5);
+ T->load_imm(T, REG_op(b, I32), 5);
+ T->cmp(T, CMP_GE_S, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* d07_cmp_branch_taken — fused cmp_branch with the branch taken; landing
+ * pad past the label returns 42. */
+static void build_d07_cmp_branch_taken(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(r, I32), 7);
+ Label L = T->label_new(T);
+ T->cmp_branch(T, CMP_EQ, REG_op(r, I32), IMM_op(7, I32), L);
+ cgtest_ret_imm(tf, 0, I32); /* dead */
+ T->label_place(T, L);
+ cgtest_ret_imm(tf, 42, I32);
+ cgtest_end(tf);
+}
+
+/* d08_cmp_branch_not_taken — branch not taken; fallthrough returns 33. */
+static void build_d08_cmp_branch_not_taken(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(r, I32), 5);
+ Label L = T->label_new(T);
+ T->cmp_branch(T, CMP_EQ, REG_op(r, I32), IMM_op(6, I32), L);
+ cgtest_ret_imm(tf, 33, I32);
+ T->label_place(T, L);
+ cgtest_ret_imm(tf, 0, I32); /* dead */
+ cgtest_end(tf);
+}
+
+/* d09_cmp_branch_lt_signed — signed compare-and-branch with negative LHS;
+ * (-3 < 0) is true. */
+static void build_d09_cmp_branch_lt_signed(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(r, I32), -3);
+ Label L = T->label_new(T);
+ T->cmp_branch(T, CMP_LT_S, REG_op(r, I32), IMM_op(0, I32), L);
+ cgtest_ret_imm(tf, 0, I32); /* dead */
+ T->label_place(T, L);
+ cgtest_ret_imm(tf, 9, I32);
+ cgtest_end(tf);
+}
+
+/* d10_jump — unconditional jump; the early ret is skipped. */
+static void build_d10_jump(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Label L = T->label_new(T);
+ T->jump(T, L);
+ cgtest_ret_imm(tf, 0, I32); /* dead */
+ T->label_place(T, L);
+ cgtest_ret_imm(tf, 5, I32);
+ cgtest_end(tf);
+}
+
+/* d11_scope_if_true — `int x = 99; if (1) x = 33; return x;`
+ * SCOPE_IF consumes the cond at scope_begin; then-branch updates the
+ * local; scope_end closes the join. */
+static void build_d11_scope_if_true(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot x = cgtest_local(tf, I32, FSF_NONE);
+ cgtest_store_local(tf, x, IMM_op(99, I32), I32);
+
+ Reg c = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(c, I32), 1);
+ CGScopeDesc desc = { .kind = SCOPE_IF, .cond = REG_op(c, I32) };
+ CGScope s = T->scope_begin(T, &desc);
+ cgtest_store_local(tf, x, IMM_op(33, I32), I32);
+ T->scope_end(T, s);
+
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ cgtest_load_local(tf, REG_op(r, I32), x, I32);
+ cgtest_ret_reg(tf, r, I32);
+ cgtest_end(tf);
+}
+
+/* d12_scope_if_false — `int x = 99; if (0) x = 33; return x;`
+ * Then-branch is dead; the local keeps its initial value. */
+static void build_d12_scope_if_false(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot x = cgtest_local(tf, I32, FSF_NONE);
+ cgtest_store_local(tf, x, IMM_op(99, I32), I32);
+
+ Reg c = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(c, I32), 0);
+ CGScopeDesc desc = { .kind = SCOPE_IF, .cond = REG_op(c, I32) };
+ CGScope s = T->scope_begin(T, &desc);
+ cgtest_store_local(tf, x, IMM_op(33, I32), I32);
+ T->scope_end(T, s);
+
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ cgtest_load_local(tf, REG_op(r, I32), x, I32);
+ cgtest_ret_reg(tf, r, I32);
+ cgtest_end(tf);
+}
+
+/* d13_scope_if_else — `int x; if (0) x = 10; else x = 7; return x;`
+ * Exercises scope_else: cond is 0, so the else body wins. */
+static void build_d13_scope_if_else(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot x = cgtest_local(tf, I32, FSF_NONE);
+
+ Reg c = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(c, I32), 0);
+ CGScopeDesc desc = { .kind = SCOPE_IF, .cond = REG_op(c, I32) };
+ CGScope s = T->scope_begin(T, &desc);
+ cgtest_store_local(tf, x, IMM_op(10, I32), I32);
+ T->scope_else(T, s);
+ cgtest_store_local(tf, x, IMM_op(7, I32), I32);
+ T->scope_end(T, s);
+
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ cgtest_load_local(tf, REG_op(r, I32), x, I32);
+ cgtest_ret_reg(tf, r, I32);
+ cgtest_end(tf);
+}
+
+/* ============================================================
+ * Group E: conversions
+ *
+ * One ConvKind per case, plus the boundary widths the AArch64 backend
+ * actually selects between (UXTB/SXTB vs UXTH/SXTH vs UBFX/SBFX, 32→64
+ * sign-extend). FP conversions all funnel through ftoi_s so the runner
+ * sees an int exit code.
+ * ============================================================ */
+
+/* e01_sext_i8_i32 — sext (i8)-1 → i32 = 0xFFFFFFFF; low 8 = 0xFF = 255. */
+static void build_e01_sext_i8_i32(CgTestCtx* ctx)
+{
+ const Type* I8 = T_i8(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg s = T->alloc_reg(T, RC_INT, I8);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(s, I8), -1);
+ T->convert(T, CV_SEXT, REG_op(d, I32), REG_op(s, I8));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e02_zext_u8_i32 — zext (u8)0xFF → i32 = 0xFF; low 8 = 255. The high
+ * bits are zeroed, distinguishing this from e01. */
+static void build_e02_zext_u8_i32(CgTestCtx* ctx)
+{
+ const Type* U8 = T_u8(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg s = T->alloc_reg(T, RC_INT, U8);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(s, U8), 0xFF);
+ T->convert(T, CV_ZEXT, REG_op(d, I32), REG_op(s, U8));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e03_sext_i16_i32 — sext (i16)-1000 → 0xFFFFFC18; low 8 = 0x18 = 24. */
+static void build_e03_sext_i16_i32(CgTestCtx* ctx)
+{
+ const Type* I16 = T_i16(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg s = T->alloc_reg(T, RC_INT, I16);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(s, I16), -1000);
+ T->convert(T, CV_SEXT, REG_op(d, I32), REG_op(s, I16));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e04_zext_u16_i32 — zext (u16)0xABCD → 0x0000ABCD; low 8 = 0xCD = 205. */
+static void build_e04_zext_u16_i32(CgTestCtx* ctx)
+{
+ const Type* U16 = T_u16(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg s = T->alloc_reg(T, RC_INT, U16);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(s, U16), 0xABCD);
+ T->convert(T, CV_ZEXT, REG_op(d, I32), REG_op(s, U16));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e05_zext_u32_i64 — zext (u32)0xFFFFFFFF → i64 = 0x00000000FFFFFFFF;
+ * runner reads w0 = 0xFFFFFFFF; low 8 = 255. Distinct from e06: high
+ * 32 bits are zero. */
+static void build_e05_zext_u32_i64(CgTestCtx* ctx)
+{
+ const Type* U32 = T_u32(ctx);
+ const Type* I64 = T_i64(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I64);
+ CGTarget* T = ctx->target;
+ Reg s = T->alloc_reg(T, RC_INT, U32);
+ Reg d = T->alloc_reg(T, RC_INT, I64);
+ T->load_imm(T, REG_op(s, U32), 0xFFFFFFFFll);
+ T->convert(T, CV_ZEXT, REG_op(d, I64), REG_op(s, U32));
+ cgtest_ret_reg(tf, d, I64);
+ cgtest_end(tf);
+}
+
+/* e06_sext_i32_i64 — sext (i32)-1 → i64 = -1; low 8 = 255. Same low-byte
+ * exit as e05 but the high bits differ — exercises SXTW vs UXTW. */
+static void build_e06_sext_i32_i64(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* I64 = T_i64(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I64);
+ CGTarget* T = ctx->target;
+ Reg s = T->alloc_reg(T, RC_INT, I32);
+ Reg d = T->alloc_reg(T, RC_INT, I64);
+ T->load_imm(T, REG_op(s, I32), -1);
+ T->convert(T, CV_SEXT, REG_op(d, I64), REG_op(s, I32));
+ cgtest_ret_reg(tf, d, I64);
+ cgtest_end(tf);
+}
+
+/* e07_trunc_i64_i32 — trunc 0x100000080 → low 32 = 0x80 = 128. */
+static void build_e07_trunc_i64_i32(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* I64 = T_i64(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg s = T->alloc_reg(T, RC_INT, I64);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(s, I64), 0x100000080ll);
+ T->convert(T, CV_TRUNC, REG_op(d, I32), REG_op(s, I64));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e08_trunc_i32_i8 — trunc 0x1FF → low 8 = 0xFF; returned as u8 = 255. */
+static void build_e08_trunc_i32_i8(CgTestCtx* ctx)
+{
+ const Type* U8 = T_u8(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, U8);
+ CGTarget* T = ctx->target;
+ Reg s = T->alloc_reg(T, RC_INT, I32);
+ Reg d = T->alloc_reg(T, RC_INT, U8);
+ T->load_imm(T, REG_op(s, I32), 0x1FF);
+ T->convert(T, CV_TRUNC, REG_op(d, U8), REG_op(s, I32));
+ cgtest_ret_reg(tf, d, U8);
+ cgtest_end(tf);
+}
+
+/* e09_itof_s_i32_f32 — i32(7) → f32(7.0) → ftoi_s i32 → 7. Exact
+ * round-trip; verifies SCVTF + FCVTZS form a valid pair. */
+static void build_e09_itof_s_i32_f32(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* F32 = T_f32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg si = T->alloc_reg(T, RC_INT, I32);
+ Reg f = T->alloc_reg(T, RC_FP, F32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(si, I32), 7);
+ T->convert(T, CV_ITOF_S, REG_op(f, F32), REG_op(si, I32));
+ T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f, F32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e10_itof_u_u32_f64 — u32(100) → f64(100.0) → ftoi_s i32 → 100.
+ * Crosses width on the way up (UCVTF Dn,Wn) and back down. */
+static void build_e10_itof_u_u32_f64(CgTestCtx* ctx)
+{
+ const Type* U32 = T_u32(ctx);
+ const Type* I32 = T_i32(ctx);
+ const Type* F64 = T_f64(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg si = T->alloc_reg(T, RC_INT, U32);
+ Reg f = T->alloc_reg(T, RC_FP, F64);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(si, U32), 100);
+ T->convert(T, CV_ITOF_U, REG_op(f, F64), REG_op(si, U32));
+ T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f, F64));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e11_ftoi_s_neg — ftoi_s(-1.5f) = -1; low 8 = 255. C99 truncation
+ * rounds toward zero. */
+static void build_e11_ftoi_s_neg(CgTestCtx* ctx)
+{
+ const Type* F32 = T_f32(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ static const u8 BYTES_NEG_1_5[4] = { 0x00, 0x00, 0xC0, 0xBF }; /* -1.5f LE */
+ Reg f = T->alloc_reg(T, RC_FP, F32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ ConstBytes cb = { .type = F32, .bytes = BYTES_NEG_1_5, .size = 4, .align = 4 };
+ T->load_const(T, REG_op(f, F32), cb);
+ T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f, F32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e12_ftoi_u_pos — ftoi_u(200.7f) = 200u. Truncation toward zero,
+ * matching C's (unsigned)x. */
+static void build_e12_ftoi_u_pos(CgTestCtx* ctx)
+{
+ const Type* F32 = T_f32(ctx);
+ const Type* U32 = T_u32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, U32);
+ CGTarget* T = ctx->target;
+ static const u8 BYTES_200_7[4] = { 0x33, 0xB3, 0x48, 0x43 }; /* 200.7f LE */
+ Reg f = T->alloc_reg(T, RC_FP, F32);
+ Reg d = T->alloc_reg(T, RC_INT, U32);
+ ConstBytes cb = { .type = F32, .bytes = BYTES_200_7, .size = 4, .align = 4 };
+ T->load_const(T, REG_op(f, F32), cb);
+ T->convert(T, CV_FTOI_U, REG_op(d, U32), REG_op(f, F32));
+ cgtest_ret_reg(tf, d, U32);
+ cgtest_end(tf);
+}
+
+/* e13_fext_f32_f64 — float→double promotion preserves an exactly
+ * representable value (3.5f = 3.5). ftoi_s then yields 3. */
+static void build_e13_fext_f32_f64(CgTestCtx* ctx)
+{
+ const Type* F32 = T_f32(ctx);
+ const Type* F64 = T_f64(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ static const u8 BYTES_3_5F[4] = { 0x00, 0x00, 0x60, 0x40 }; /* 3.5f LE */
+ Reg f32r = T->alloc_reg(T, RC_FP, F32);
+ Reg f64r = T->alloc_reg(T, RC_FP, F64);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ ConstBytes cb = { .type = F32, .bytes = BYTES_3_5F, .size = 4, .align = 4 };
+ T->load_const(T, REG_op(f32r, F32), cb);
+ T->convert(T, CV_FEXT, REG_op(f64r, F64), REG_op(f32r, F32));
+ T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f64r, F64));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e14_ftrunc_f64_f32 — double→float demotion of 7.875 (exact in both);
+ * ftoi_s yields 7. */
+static void build_e14_ftrunc_f64_f32(CgTestCtx* ctx)
+{
+ const Type* F32 = T_f32(ctx);
+ const Type* F64 = T_f64(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ static const u8 BYTES_7_875[8] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x1F, 0x40, /* 7.875 LE double */
+ };
+ Reg f64r = T->alloc_reg(T, RC_FP, F64);
+ Reg f32r = T->alloc_reg(T, RC_FP, F32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ ConstBytes cb = { .type = F64, .bytes = BYTES_7_875, .size = 8, .align = 8 };
+ T->load_const(T, REG_op(f64r, F64), cb);
+ T->convert(T, CV_FTRUNC, REG_op(f32r, F32), REG_op(f64r, F64));
+ T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f32r, F32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* e15_bitcast_i32_f32 — same-size cross-class reinterpret. 0x40A00000
+ * is the IEEE-754 single bit pattern for 5.0f. ftoi_s yields 5,
+ * confirming the bits travelled to the FP register intact. */
+static void build_e15_bitcast_i32_f32(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* F32 = T_f32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ Reg si = T->alloc_reg(T, RC_INT, I32);
+ Reg f = T->alloc_reg(T, RC_FP, F32);
+ Reg d = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(si, I32), 0x40A00000); /* 5.0f bit pattern */
+ T->convert(T, CV_BITCAST, REG_op(f, F32), REG_op(si, I32));
+ T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f, F32));
+ cgtest_ret_reg(tf, d, I32);
+ cgtest_end(tf);
+}
+
+/* ============================================================
+ * Group F: memory (loads/stores beyond locals)
+ *
+ * Group B already exercises the basic load/store-of-local path. Group F
+ * pushes the surface: every scalar width, FP load/store, indirect
+ * non-zero offsets, store-from-IMM vs store-from-REG, copy_bytes,
+ * set_bytes, volatile, and the bitfield methods.
+ * ============================================================ */
+
+/* f01_load_store_i8 — local u8; store IMM 200; load; return. */
+static void build_f01_load_store_i8(CgTestCtx* ctx)
+{
+ const Type* U8 = T_u8(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, U8);
+ CGTarget* T = ctx->target;
+ FrameSlot s = cgtest_local(tf, U8, FSF_NONE);
+ cgtest_store_local(tf, s, IMM_op(200, U8), U8);
+ Reg r = T->alloc_reg(T, RC_INT, U8);
+ cgtest_load_local(tf, REG_op(r, U8), s, U8);
+ cgtest_ret_reg(tf, r, U8);
+ cgtest_end(tf);
+}
+
+/* f02_load_store_i16 — local i16; store 0x1234; load; low 8 = 0x34 = 52. */
+static void build_f02_load_store_i16(CgTestCtx* ctx)
+{
+ const Type* I16 = T_i16(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I16);
+ CGTarget* T = ctx->target;
+ FrameSlot s = cgtest_local(tf, I16, FSF_NONE);
+ cgtest_store_local(tf, s, IMM_op(0x1234, I16), I16);
+ Reg r = T->alloc_reg(T, RC_INT, I16);
+ cgtest_load_local(tf, REG_op(r, I16), s, I16);
+ cgtest_ret_reg(tf, r, I16);
+ cgtest_end(tf);
+}
+
+/* f03_load_store_i64 — local i64; store 0x1_0000_0042; load; runner
+ * reads w0 = low 32 = 0x42 = 66. */
+static void build_f03_load_store_i64(CgTestCtx* ctx)
+{
+ const Type* I64 = T_i64(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I64);
+ CGTarget* T = ctx->target;
+ FrameSlot s = cgtest_local(tf, I64, FSF_NONE);
+ cgtest_store_local(tf, s, IMM_op(0x100000042ll, I64), I64);
+ Reg r = T->alloc_reg(T, RC_INT, I64);
+ cgtest_load_local(tf, REG_op(r, I64), s, I64);
+ cgtest_ret_reg(tf, r, I64);
+ cgtest_end(tf);
+}
+
+/* f04_load_store_f32 — local f32 home; store FP reg holding 7.5f; load
+ * back; ftoi_s → 7. Exercises STR Sn / LDR Sn forms. */
+static void build_f04_load_store_f32(CgTestCtx* ctx)
+{
+ const Type* F32 = T_f32(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ static const u8 BYTES_75F[4] = { 0x00, 0x00, 0xF0, 0x40 }; /* 7.5f LE */
+
+ FrameSlot s = cgtest_local(tf, F32, FSF_NONE);
+ Reg src = T->alloc_reg(T, RC_FP, F32);
+ ConstBytes cb = { .type = F32, .bytes = BYTES_75F, .size = 4, .align = 4 };
+ T->load_const(T, REG_op(src, F32), cb);
+ cgtest_store_local(tf, s, REG_op(src, F32), F32);
+
+ Reg dst = T->alloc_reg(T, RC_FP, F32);
+ cgtest_load_local(tf, REG_op(dst, F32), s, F32);
+ Reg ri = T->alloc_reg(T, RC_INT, I32);
+ T->convert(T, CV_FTOI_S, REG_op(ri, I32), REG_op(dst, F32));
+ cgtest_ret_reg(tf, ri, I32);
+ cgtest_end(tf);
+}
+
+/* f05_load_store_f64 — local f64 home; store FP reg holding 3.25; load
+ * back; ftoi_s → 3. STR Dn / LDR Dn. */
+static void build_f05_load_store_f64(CgTestCtx* ctx)
+{
+ const Type* F64 = T_f64(ctx);
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+ static const u8 BYTES_3_25[8] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x40, /* 3.25 LE double */
+ };
+
+ FrameSlot s = cgtest_local(tf, F64, FSF_NONE);
+ Reg src = T->alloc_reg(T, RC_FP, F64);
+ ConstBytes cb = { .type = F64, .bytes = BYTES_3_25, .size = 8, .align = 8 };
+ T->load_const(T, REG_op(src, F64), cb);
+ cgtest_store_local(tf, s, REG_op(src, F64), F64);
+
+ Reg dst = T->alloc_reg(T, RC_FP, F64);
+ cgtest_load_local(tf, REG_op(dst, F64), s, F64);
+ Reg ri = T->alloc_reg(T, RC_INT, I32);
+ T->convert(T, CV_FTOI_S, REG_op(ri, I32), REG_op(dst, F64));
+ cgtest_ret_reg(tf, ri, I32);
+ cgtest_end(tf);
+}
+
+/* f06_indirect_nonzero_offset — addr_of an i64 local, then store/load
+ * an i32 at +4. Exercises [base + #imm] addressing past byte 0; also
+ * verifies writes to one offset don't clobber a sentinel at another. */
+static void build_f06_indirect_nonzero_offset(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* I64 = T_i64(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot s = cgtest_local(tf, I64, FSF_ADDR_TAKEN);
+ Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I64));
+ T->addr_of(T, REG_op(base, T_ptr(ctx, I64)), LOCAL_op(s, I64));
+
+ MemAccess ma = { .type = I32, .size = 4, .align = 4,
+ .alias.kind = ALIAS_LOCAL };
+ T->store(T, IND_op(base, 0, I32), IMM_op(99, I32), ma);
+ T->store(T, IND_op(base, 4, I32), IMM_op(42, I32), ma);
+
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ T->load(T, REG_op(r, I32), IND_op(base, 4, I32), ma);
+ cgtest_ret_reg(tf, r, I32);
+ cgtest_end(tf);
+}
+
+/* f07_store_reg — store from REG (not IMM) into a local slot. b04 stored
+ * an immediate; this distinguishes the REG-source store path. */
+static void build_f07_store_reg(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot s = cgtest_local(tf, I32, FSF_NONE);
+ Reg src = T->alloc_reg(T, RC_INT, I32);
+ T->load_imm(T, REG_op(src, I32), 17);
+ cgtest_store_local(tf, s, REG_op(src, I32), I32);
+
+ Reg dst = T->alloc_reg(T, RC_INT, I32);
+ cgtest_load_local(tf, REG_op(dst, I32), s, I32);
+ cgtest_ret_reg(tf, dst, I32);
+ cgtest_end(tf);
+}
+
+/* f08_copy_bytes — copy_bytes(dst, src, Pt {10,32}); read back dst.a +
+ * dst.b → 42. The aggregate move is the operation under test; the per-
+ * field load/store after it just reads the result. */
+static void build_f08_copy_bytes(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* PT = build_b06_pt_type(ctx);
+
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot src = cgtest_local(tf, PT, FSF_ADDR_TAKEN);
+ FrameSlot dst = cgtest_local(tf, PT, FSF_ADDR_TAKEN);
+
+ /* Initialize src to {10, 32}. */
+ Reg src_addr = T->alloc_reg(T, RC_INT, T_ptr(ctx, PT));
+ T->addr_of(T, REG_op(src_addr, T_ptr(ctx, PT)), LOCAL_op(src, PT));
+ MemAccess ma_i32 = { .type = I32, .size = 4, .align = 4,
+ .alias.kind = ALIAS_LOCAL };
+ T->store(T, IND_op(src_addr, 0, I32), IMM_op(10, I32), ma_i32);
+ T->store(T, IND_op(src_addr, 4, I32), IMM_op(32, I32), ma_i32);
+
+ Reg dst_addr = T->alloc_reg(T, RC_INT, T_ptr(ctx, PT));
+ T->addr_of(T, REG_op(dst_addr, T_ptr(ctx, PT)), LOCAL_op(dst, PT));
+
+ AggregateAccess agg = {
+ .type = PT, .size = 8, .align = 4,
+ .mem = { .type = PT, .size = 8, .align = 4,
+ .alias.kind = ALIAS_LOCAL },
+ };
+ T->copy_bytes(T,
+ REG_op(dst_addr, T_ptr(ctx, PT)),
+ REG_op(src_addr, T_ptr(ctx, PT)),
+ agg);
+
+ Reg ra = T->alloc_reg(T, RC_INT, I32);
+ Reg rb = T->alloc_reg(T, RC_INT, I32);
+ Reg sum = T->alloc_reg(T, RC_INT, I32);
+ T->load(T, REG_op(ra, I32), IND_op(dst_addr, 0, I32), ma_i32);
+ T->load(T, REG_op(rb, I32), IND_op(dst_addr, 4, I32), ma_i32);
+ T->binop(T, BO_IADD, REG_op(sum, I32), REG_op(ra, I32), REG_op(rb, I32));
+ cgtest_ret_reg(tf, sum, I32);
+ cgtest_end(tf);
+}
+
+/* f09_set_bytes_zero — set_bytes(0) on an i32-sized buffer; load the
+ * word back → 0. Exercises the "memset to zero" path which backends
+ * often special-case (STR XZR). */
+static void build_f09_set_bytes_zero(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* U8 = T_u8(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot s = cgtest_local(tf, I32, FSF_ADDR_TAKEN);
+ Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32));
+ T->addr_of(T, REG_op(base, T_ptr(ctx, I32)), LOCAL_op(s, I32));
+
+ AggregateAccess agg = {
+ .type = I32, .size = 4, .align = 4,
+ .mem = { .type = I32, .size = 4, .align = 4,
+ .alias.kind = ALIAS_LOCAL },
+ };
+ T->set_bytes(T, REG_op(base, T_ptr(ctx, I32)), IMM_op(0, U8), agg);
+
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ T->load(T, REG_op(r, I32), IND_op(base, 0, I32), agg.mem);
+ cgtest_ret_reg(tf, r, I32);
+ cgtest_end(tf);
+}
+
+/* f10_set_bytes_ff — set_bytes(0xFF) on an i32-sized buffer; load the
+ * word → 0xFFFFFFFF; low 8 = 255. Exercises the byte-broadcast path. */
+static void build_f10_set_bytes_ff(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* U8 = T_u8(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot s = cgtest_local(tf, I32, FSF_ADDR_TAKEN);
+ Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32));
+ T->addr_of(T, REG_op(base, T_ptr(ctx, I32)), LOCAL_op(s, I32));
+
+ AggregateAccess agg = {
+ .type = I32, .size = 4, .align = 4,
+ .mem = { .type = I32, .size = 4, .align = 4,
+ .alias.kind = ALIAS_LOCAL },
+ };
+ T->set_bytes(T, REG_op(base, T_ptr(ctx, I32)), IMM_op(0xFF, U8), agg);
+
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ T->load(T, REG_op(r, I32), IND_op(base, 0, I32), agg.mem);
+ cgtest_ret_reg(tf, r, I32);
+ cgtest_end(tf);
+}
+
+/* f11_volatile_rw — same body as b04 but with MF_VOLATILE on both the
+ * store and the load. The expected exit value is identical; the
+ * difference is in the emitted code (no DSE/DCE, no fold-through-store). */
+static void build_f11_volatile_rw(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot s = cgtest_local(tf, I32, FSF_NONE);
+ MemAccess ma = { .type = I32, .size = 4, .align = 4,
+ .flags = MF_VOLATILE,
+ .alias.kind = ALIAS_LOCAL };
+ T->store(T, LOCAL_op(s, I32), IMM_op(42, I32), ma);
+
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ T->load(T, REG_op(r, I32), LOCAL_op(s, I32), ma);
+ cgtest_ret_reg(tf, r, I32);
+ cgtest_end(tf);
+}
+
+/* f12_bitfield_unsigned — { unsigned x : 5; } at bit_offset=3 inside a
+ * zeroed i32 storage word; store 21; load → 21 (zero-extended). The
+ * non-zero bit_offset forces the backend's mask+shift logic. */
+static void build_f12_bitfield_unsigned(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ const Type* U32 = T_u32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, U32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot s = cgtest_local(tf, I32, FSF_ADDR_TAKEN);
+ Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32));
+ T->addr_of(T, REG_op(base, T_ptr(ctx, I32)), LOCAL_op(s, I32));
+
+ /* Zero the storage word so neighboring bits don't perturb the read. */
+ MemAccess ma = { .type = I32, .size = 4, .align = 4,
+ .alias.kind = ALIAS_LOCAL };
+ T->store(T, IND_op(base, 0, I32), IMM_op(0, I32), ma);
+
+ BitFieldAccess bf = {
+ .field_type = U32,
+ .storage = ma,
+ .storage_offset = 0,
+ .bit_offset = 3,
+ .bit_width = 5,
+ .signed_ = 0,
+ };
+ T->bitfield_store(T, REG_op(base, T_ptr(ctx, I32)),
+ IMM_op(21, U32), bf);
+
+ Reg r = T->alloc_reg(T, RC_INT, U32);
+ T->bitfield_load(T, REG_op(r, U32),
+ REG_op(base, T_ptr(ctx, I32)), bf);
+ cgtest_ret_reg(tf, r, U32);
+ cgtest_end(tf);
+}
+
+/* f13_bitfield_signed — { signed x : 5; } at bit_offset=0; store -1
+ * (5-bit all-ones); load sign-extends to -1; low 8 = 255. */
+static void build_f13_bitfield_signed(CgTestCtx* ctx)
+{
+ const Type* I32 = T_i32(ctx);
+ CgTestFn* tf = cgtest_begin_main(ctx, I32);
+ CGTarget* T = ctx->target;
+
+ FrameSlot s = cgtest_local(tf, I32, FSF_ADDR_TAKEN);
+ Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32));
+ T->addr_of(T, REG_op(base, T_ptr(ctx, I32)), LOCAL_op(s, I32));
+
+ MemAccess ma = { .type = I32, .size = 4, .align = 4,
+ .alias.kind = ALIAS_LOCAL };
+ T->store(T, IND_op(base, 0, I32), IMM_op(0, I32), ma);
+
+ BitFieldAccess bf = {
+ .field_type = I32,
+ .storage = ma,
+ .storage_offset = 0,
+ .bit_offset = 0,
+ .bit_width = 5,
+ .signed_ = 1,
+ };
+ T->bitfield_store(T, REG_op(base, T_ptr(ctx, I32)),
+ IMM_op(-1, I32), bf);
+
+ Reg r = T->alloc_reg(T, RC_INT, I32);
+ T->bitfield_load(T, REG_op(r, I32),
+ REG_op(base, T_ptr(ctx, I32)), bf);
+ cgtest_ret_reg(tf, r, I32);
+ cgtest_end(tf);
+}
+
+/* ============================================================
* Registry
* ============================================================ */
@@ -752,6 +1596,53 @@ const CgCase cg_cases[] = {
{ "c10_logical_not", build_c10_logical_not, 1, CG_CASE_DEFAULT },
{ "c11_shr_signed", build_c11_shr_signed, 252, CG_CASE_DEFAULT },
{ "c12_imul_i64", build_c12_imul_i64, 42, CG_CASE_DEFAULT },
+
+ /* Group D — compare and branch */
+ { "d01_cmp_eq_true", build_d01_cmp_eq_true, 1, CG_CASE_DEFAULT },
+ { "d02_cmp_eq_false", build_d02_cmp_eq_false, 0, CG_CASE_DEFAULT },
+ { "d03_cmp_ne", build_d03_cmp_ne, 1, CG_CASE_DEFAULT },
+ { "d04_cmp_lt_signed", build_d04_cmp_lt_signed, 1, CG_CASE_DEFAULT },
+ { "d05_cmp_lt_unsigned", build_d05_cmp_lt_unsigned, 0, CG_CASE_DEFAULT },
+ { "d06_cmp_ge_signed", build_d06_cmp_ge_signed, 1, CG_CASE_DEFAULT },
+ { "d07_cmp_branch_taken", build_d07_cmp_branch_taken, 42, CG_CASE_DEFAULT },
+ { "d08_cmp_branch_not_taken", build_d08_cmp_branch_not_taken, 33, CG_CASE_DEFAULT },
+ { "d09_cmp_branch_lt_signed", build_d09_cmp_branch_lt_signed, 9, CG_CASE_DEFAULT },
+ { "d10_jump", build_d10_jump, 5, CG_CASE_DEFAULT },
+ { "d11_scope_if_true", build_d11_scope_if_true, 33, CG_CASE_DEFAULT },
+ { "d12_scope_if_false", build_d12_scope_if_false, 99, CG_CASE_DEFAULT },
+ { "d13_scope_if_else", build_d13_scope_if_else, 7, CG_CASE_DEFAULT },
+
+ /* Group E — conversions */
+ { "e01_sext_i8_i32", build_e01_sext_i8_i32, 255, CG_CASE_DEFAULT },
+ { "e02_zext_u8_i32", build_e02_zext_u8_i32, 255, CG_CASE_DEFAULT },
+ { "e03_sext_i16_i32", build_e03_sext_i16_i32, 24, CG_CASE_DEFAULT },
+ { "e04_zext_u16_i32", build_e04_zext_u16_i32, 205, CG_CASE_DEFAULT },
+ { "e05_zext_u32_i64", build_e05_zext_u32_i64, 255, CG_CASE_DEFAULT },
+ { "e06_sext_i32_i64", build_e06_sext_i32_i64, 255, CG_CASE_DEFAULT },
+ { "e07_trunc_i64_i32", build_e07_trunc_i64_i32, 128, CG_CASE_DEFAULT },
+ { "e08_trunc_i32_i8", build_e08_trunc_i32_i8, 255, CG_CASE_DEFAULT },
+ { "e09_itof_s_i32_f32", build_e09_itof_s_i32_f32, 7, CG_CASE_DEFAULT },
+ { "e10_itof_u_u32_f64", build_e10_itof_u_u32_f64, 100, CG_CASE_DEFAULT },
+ { "e11_ftoi_s_neg", build_e11_ftoi_s_neg, 255, CG_CASE_DEFAULT },
+ { "e12_ftoi_u_pos", build_e12_ftoi_u_pos, 200, CG_CASE_DEFAULT },
+ { "e13_fext_f32_f64", build_e13_fext_f32_f64, 3, CG_CASE_DEFAULT },
+ { "e14_ftrunc_f64_f32", build_e14_ftrunc_f64_f32, 7, CG_CASE_DEFAULT },
+ { "e15_bitcast_i32_f32", build_e15_bitcast_i32_f32, 5, CG_CASE_DEFAULT },
+
+ /* Group F — memory (loads/stores beyond locals) */
+ { "f01_load_store_i8", build_f01_load_store_i8, 200, CG_CASE_DEFAULT },
+ { "f02_load_store_i16", build_f02_load_store_i16, 52, CG_CASE_DEFAULT },
+ { "f03_load_store_i64", build_f03_load_store_i64, 66, CG_CASE_DEFAULT },
+ { "f04_load_store_f32", build_f04_load_store_f32, 7, CG_CASE_DEFAULT },
+ { "f05_load_store_f64", build_f05_load_store_f64, 3, CG_CASE_DEFAULT },
+ { "f06_indirect_nonzero_offset", build_f06_indirect_nonzero_offset,42, CG_CASE_DEFAULT },
+ { "f07_store_reg", build_f07_store_reg, 17, CG_CASE_DEFAULT },
+ { "f08_copy_bytes", build_f08_copy_bytes, 42, CG_CASE_DEFAULT },
+ { "f09_set_bytes_zero", build_f09_set_bytes_zero, 0, CG_CASE_DEFAULT },
+ { "f10_set_bytes_ff", build_f10_set_bytes_ff, 255, CG_CASE_DEFAULT },
+ { "f11_volatile_rw", build_f11_volatile_rw, 42, CG_CASE_DEFAULT },
+ { "f12_bitfield_unsigned", build_f12_bitfield_unsigned, 21, CG_CASE_DEFAULT },
+ { "f13_bitfield_signed", build_f13_bitfield_signed, 255, CG_CASE_DEFAULT },
};
const unsigned cg_cases_count = sizeof(cg_cases) / sizeof(cg_cases[0]);