test/cg: register Groups D, E, F (compare/branch, conv, memory) - kit

commit ed3b81da82f373155f1790929e3c7a7d96cf2b62
parent e25cbf0f256f656ae48652f76b1e757ad194564b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 14:23:42 -0700

test/cg: register Groups D, E, F (compare/branch, conv, memory)

Cases compile and link against the same CGTarget surface the parser
will drive; oracles are exit codes, fixed before the AArch64 backend
implements cmp/cmp_branch/scope_*, convert (all ConvKinds), and the
memory ops (copy_bytes/set_bytes/bitfield_*).

Diffstat:
M test/cg/CORPUS.md  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
M test/cg/harness/cases.c  | 891 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 963 insertions(+), 3 deletions(-)
diff --git a/test/cg/CORPUS.md b/test/cg/CORPUS.md
@@ -88,13 +88,82 @@ materialization, slot allocation, and call lowering use the live
 | `c11_shr_signed`     | ★ | `-16 >>(s) 2`                        | 252 (= -4 & 0xff) |
 | `c12_imul_i64`       | ★ | i64 `7 * 6`                          |  42 |
 
+## Group D — compare and branch
+
+Both arms of `cmp` (materializes 0/1 in a GPR) and `cmp_branch` (fused
+test+branch) plus the structured-CFG ops `scope_*`. `cmp` cases return
+the materialized 0/1 directly; `cmp_branch` cases return distinct
+sentinels from the taken vs. fallthrough paths so the oracle can tell
+them apart.
+
+| Case | Status | Body | Expected |
+|---|---|---|---|
+| `d01_cmp_eq_true`           | · | `cmp(EQ, 5, 5)` materialize → 1                                |   1 |
+| `d02_cmp_eq_false`          | · | `cmp(EQ, 5, 6)` → 0                                            |   0 |
+| `d03_cmp_ne`                | · | `cmp(NE, 5, 6)` → 1                                            |   1 |
+| `d04_cmp_lt_signed`         | · | `cmp(LT_S, -1, 1)` → 1                                         |   1 |
+| `d05_cmp_lt_unsigned`       | · | `cmp(LT_U, 0xFFFFFFFFu, 1u)` → 0 (signedness in op, not Type)  |   0 |
+| `d06_cmp_ge_signed`         | · | `cmp(GE_S, 5, 5)` → 1 (boundary on inclusive ops)              |   1 |
+| `d07_cmp_branch_taken`      | · | `cmp_branch(EQ, 7, 7) → L`; landing pad returns 42              |  42 |
+| `d08_cmp_branch_not_taken`  | · | `cmp_branch(EQ, 5, 6) → L`; fallthrough returns 33              |  33 |
+| `d09_cmp_branch_lt_signed`  | · | `cmp_branch(LT_S, -3, 0) → L`; landing pad returns 9            |   9 |
+| `d10_jump`                  | · | unconditional `jump L`; early ret is dead                      |   5 |
+| `d11_scope_if_true`         | · | `int x=99; if(1) x=33; return x;`                              |  33 |
+| `d12_scope_if_false`        | · | `int x=99; if(0) x=33; return x;`                              |  99 |
+| `d13_scope_if_else`         | · | `if(0) x=10; else x=7; return x;` (exercises `scope_else`)     |   7 |
+
+## Group E — conversions
+
+One `ConvKind` per case, plus the boundary widths the AArch64 backend
+selects between (`UXTB`/`SXTB` vs `UXTH`/`SXTH` vs `UBFX`/`SBFX`,
+32→64 sign- vs zero-extend). FP cases all funnel back through
+`CV_FTOI_S` so the runner sees an int exit code.
+
+| Case | Status | Body | Expected |
+|---|---|---|---|
+| `e01_sext_i8_i32`     | · | `sext (i8)-1 → i32` = 0xFFFFFFFF; low 8 = 255                     | 255 |
+| `e02_zext_u8_i32`     | · | `zext (u8)0xFF → i32` = 0x000000FF; low 8 = 255                   | 255 |
+| `e03_sext_i16_i32`    | · | `sext (i16)-1000 → i32` = 0xFFFFFC18; low 8 = 0x18 = 24           |  24 |
+| `e04_zext_u16_i32`    | · | `zext (u16)0xABCD → i32` = 0x0000ABCD; low 8 = 0xCD = 205         | 205 |
+| `e05_zext_u32_i64`    | · | `zext (u32)0xFFFFFFFF → i64`; low 32 = 0xFFFFFFFF; low 8 = 255    | 255 |
+| `e06_sext_i32_i64`    | · | `sext (i32)-1 → i64` = -1; low 32 = 0xFFFFFFFF; low 8 = 255       | 255 |
+| `e07_trunc_i64_i32`   | · | `trunc 0x100000080 → i32` = 0x80                                  | 128 |
+| `e08_trunc_i32_i8`    | · | `trunc 0x1FF → i8` = 0xFF; returned as u8                         | 255 |
+| `e09_itof_s_i32_f32`  | · | `(i32)7 → f32 7.0 → ftoi_s` round-trip                            |   7 |
+| `e10_itof_u_u32_f64`  | · | `(u32)100 → f64 100.0 → ftoi_s` cross-width                       | 100 |
+| `e11_ftoi_s_neg`      | · | `ftoi_s(-1.5f) = -1` (truncate toward zero); low 8 = 255          | 255 |
+| `e12_ftoi_u_pos`      | · | `ftoi_u(200.7f) = 200u`                                           | 200 |
+| `e13_fext_f32_f64`    | · | `fext 3.5f → 3.5 → ftoi_s` → 3                                    |   3 |
+| `e14_ftrunc_f64_f32`  | · | `ftrunc 7.875 → 7.875f → ftoi_s` → 7                              |   7 |
+| `e15_bitcast_i32_f32` | · | `bitcast 0x40A00000 → f32 5.0f → ftoi_s` (same-size cross-class)  |   5 |
+
+## Group F — memory (loads/stores beyond locals)
+
+Group B already exercises basic load/store of an i32 local. Group F
+pushes the surface: every scalar width, FP load/store, indirect
+non-zero offsets, store-from-IMM vs store-from-REG, `copy_bytes`,
+`set_bytes`, volatile, and the bitfield methods.
+
+| Case | Status | Body | Expected |
+|---|---|---|---|
+| `f01_load_store_i8`              | · | local u8; store IMM 200; load; return                           | 200 |
+| `f02_load_store_i16`             | · | local i16; store 0x1234; load; low 8 = 0x34                     |  52 |
+| `f03_load_store_i64`             | · | local i64; store 0x1_0000_0042; runner reads w0 = 0x42          |  66 |
+| `f04_load_store_f32`             | · | local f32; store FP reg = 7.5f; load; ftoi_s                    |   7 |
+| `f05_load_store_f64`             | · | local f64; store FP reg = 3.25; load; ftoi_s                    |   3 |
+| `f06_indirect_nonzero_offset`    | · | i64 local addr-taken; store i32 at +0 (sentinel) and +4         |  42 |
+| `f07_store_reg`                  | · | store from REG (vs IMM in b04) into a local i32; reload         |  17 |
+| `f08_copy_bytes`                 | · | `copy_bytes(dst, src, Pt {10,32})`; sum dst.a+dst.b             |  42 |
+| `f09_set_bytes_zero`             | · | `set_bytes(0)` over an i32 buffer; load → 0                     |   0 |
+| `f10_set_bytes_ff`               | · | `set_bytes(0xFF)` over an i32 buffer; load = 0xFFFFFFFF; low 8  | 255 |
+| `f11_volatile_rw`                | · | b04 body with `MF_VOLATILE` on both store and load              |  42 |
+| `f12_bitfield_unsigned`          | · | `{u: 5}` at bit_offset=3; store 21; load (zero-extend)          |  21 |
+| `f13_bitfield_signed`            | · | `{s: 5}` at bit_offset=0; store -1; load sign-extends; low 8    | 255 |
+
 ## Deferred groups
 
 | Group | Theme |
 |---|---|
-| D | compare and branch |
-| E | conversions |
-| F | memory (loads/stores beyond locals) |
 | G | calls (beyond the direct-call path Group B exercises) |
 | H | control flow |
 | I | alloca |
diff --git a/test/cg/harness/cases.c b/test/cg/harness/cases.c
@@ -710,6 +710,850 @@ static void build_c12_imul_i64(CgTestCtx* ctx)
 }
 
 /* ============================================================
+ * Group D: compare and branch
+ * ============================================================ */
+
+/* d01_cmp_eq_true — cmp materializes 0/1; (5 == 5) → 1. */
+static void build_d01_cmp_eq_true(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg a = T->alloc_reg(T, RC_INT, I32);
+    Reg b = T->alloc_reg(T, RC_INT, I32);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(a, I32), 5);
+    T->load_imm(T, REG_op(b, I32), 5);
+    T->cmp(T, CMP_EQ, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* d02_cmp_eq_false — (5 == 6) → 0. */
+static void build_d02_cmp_eq_false(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg a = T->alloc_reg(T, RC_INT, I32);
+    Reg b = T->alloc_reg(T, RC_INT, I32);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(a, I32), 5);
+    T->load_imm(T, REG_op(b, I32), 6);
+    T->cmp(T, CMP_EQ, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* d03_cmp_ne — (5 != 6) → 1. */
+static void build_d03_cmp_ne(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg a = T->alloc_reg(T, RC_INT, I32);
+    Reg b = T->alloc_reg(T, RC_INT, I32);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(a, I32), 5);
+    T->load_imm(T, REG_op(b, I32), 6);
+    T->cmp(T, CMP_NE, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* d04_cmp_lt_signed — (-1 < 1) signed → 1. */
+static void build_d04_cmp_lt_signed(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg a = T->alloc_reg(T, RC_INT, I32);
+    Reg b = T->alloc_reg(T, RC_INT, I32);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(a, I32), -1);
+    T->load_imm(T, REG_op(b, I32),  1);
+    T->cmp(T, CMP_LT_S, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* d05_cmp_lt_unsigned — same bit patterns as d04 but unsigned: 0xFFFFFFFF
+ * is huge, so (0xFFFFFFFF < 1) → 0. Signedness lives in CmpOp, not Type. */
+static void build_d05_cmp_lt_unsigned(CgTestCtx* ctx)
+{
+    const Type* U32 = T_u32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, U32);
+    CGTarget* T = ctx->target;
+    Reg a = T->alloc_reg(T, RC_INT, U32);
+    Reg b = T->alloc_reg(T, RC_INT, U32);
+    Reg d = T->alloc_reg(T, RC_INT, U32);
+    T->load_imm(T, REG_op(a, U32), -1);
+    T->load_imm(T, REG_op(b, U32),  1);
+    T->cmp(T, CMP_LT_U, REG_op(d, U32), REG_op(a, U32), REG_op(b, U32));
+    cgtest_ret_reg(tf, d, U32);
+    cgtest_end(tf);
+}
+
+/* d06_cmp_ge_signed — boundary: (5 >= 5) → 1 (LE/GE families include eq). */
+static void build_d06_cmp_ge_signed(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg a = T->alloc_reg(T, RC_INT, I32);
+    Reg b = T->alloc_reg(T, RC_INT, I32);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(a, I32), 5);
+    T->load_imm(T, REG_op(b, I32), 5);
+    T->cmp(T, CMP_GE_S, REG_op(d, I32), REG_op(a, I32), REG_op(b, I32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* d07_cmp_branch_taken — fused cmp_branch with the branch taken; landing
+ * pad past the label returns 42. */
+static void build_d07_cmp_branch_taken(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(r, I32), 7);
+    Label L = T->label_new(T);
+    T->cmp_branch(T, CMP_EQ, REG_op(r, I32), IMM_op(7, I32), L);
+    cgtest_ret_imm(tf, 0, I32);            /* dead */
+    T->label_place(T, L);
+    cgtest_ret_imm(tf, 42, I32);
+    cgtest_end(tf);
+}
+
+/* d08_cmp_branch_not_taken — branch not taken; fallthrough returns 33. */
+static void build_d08_cmp_branch_not_taken(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(r, I32), 5);
+    Label L = T->label_new(T);
+    T->cmp_branch(T, CMP_EQ, REG_op(r, I32), IMM_op(6, I32), L);
+    cgtest_ret_imm(tf, 33, I32);
+    T->label_place(T, L);
+    cgtest_ret_imm(tf, 0, I32);            /* dead */
+    cgtest_end(tf);
+}
+
+/* d09_cmp_branch_lt_signed — signed compare-and-branch with negative LHS;
+ * (-3 < 0) is true. */
+static void build_d09_cmp_branch_lt_signed(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(r, I32), -3);
+    Label L = T->label_new(T);
+    T->cmp_branch(T, CMP_LT_S, REG_op(r, I32), IMM_op(0, I32), L);
+    cgtest_ret_imm(tf, 0, I32);            /* dead */
+    T->label_place(T, L);
+    cgtest_ret_imm(tf, 9, I32);
+    cgtest_end(tf);
+}
+
+/* d10_jump — unconditional jump; the early ret is skipped. */
+static void build_d10_jump(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Label L = T->label_new(T);
+    T->jump(T, L);
+    cgtest_ret_imm(tf, 0, I32);            /* dead */
+    T->label_place(T, L);
+    cgtest_ret_imm(tf, 5, I32);
+    cgtest_end(tf);
+}
+
+/* d11_scope_if_true — `int x = 99; if (1) x = 33; return x;`
+ * SCOPE_IF consumes the cond at scope_begin; then-branch updates the
+ * local; scope_end closes the join. */
+static void build_d11_scope_if_true(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot x = cgtest_local(tf, I32, FSF_NONE);
+    cgtest_store_local(tf, x, IMM_op(99, I32), I32);
+
+    Reg c = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(c, I32), 1);
+    CGScopeDesc desc = { .kind = SCOPE_IF, .cond = REG_op(c, I32) };
+    CGScope s = T->scope_begin(T, &desc);
+    cgtest_store_local(tf, x, IMM_op(33, I32), I32);
+    T->scope_end(T, s);
+
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    cgtest_load_local(tf, REG_op(r, I32), x, I32);
+    cgtest_ret_reg(tf, r, I32);
+    cgtest_end(tf);
+}
+
+/* d12_scope_if_false — `int x = 99; if (0) x = 33; return x;`
+ * Then-branch is dead; the local keeps its initial value. */
+static void build_d12_scope_if_false(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot x = cgtest_local(tf, I32, FSF_NONE);
+    cgtest_store_local(tf, x, IMM_op(99, I32), I32);
+
+    Reg c = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(c, I32), 0);
+    CGScopeDesc desc = { .kind = SCOPE_IF, .cond = REG_op(c, I32) };
+    CGScope s = T->scope_begin(T, &desc);
+    cgtest_store_local(tf, x, IMM_op(33, I32), I32);
+    T->scope_end(T, s);
+
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    cgtest_load_local(tf, REG_op(r, I32), x, I32);
+    cgtest_ret_reg(tf, r, I32);
+    cgtest_end(tf);
+}
+
+/* d13_scope_if_else — `int x; if (0) x = 10; else x = 7; return x;`
+ * Exercises scope_else: cond is 0, so the else body wins. */
+static void build_d13_scope_if_else(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot x = cgtest_local(tf, I32, FSF_NONE);
+
+    Reg c = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(c, I32), 0);
+    CGScopeDesc desc = { .kind = SCOPE_IF, .cond = REG_op(c, I32) };
+    CGScope s = T->scope_begin(T, &desc);
+    cgtest_store_local(tf, x, IMM_op(10, I32), I32);
+    T->scope_else(T, s);
+    cgtest_store_local(tf, x, IMM_op(7, I32), I32);
+    T->scope_end(T, s);
+
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    cgtest_load_local(tf, REG_op(r, I32), x, I32);
+    cgtest_ret_reg(tf, r, I32);
+    cgtest_end(tf);
+}
+
+/* ============================================================
+ * Group E: conversions
+ *
+ * One ConvKind per case, plus the boundary widths the AArch64 backend
+ * actually selects between (UXTB/SXTB vs UXTH/SXTH vs UBFX/SBFX, 32→64
+ * sign-extend). FP conversions all funnel through ftoi_s so the runner
+ * sees an int exit code.
+ * ============================================================ */
+
+/* e01_sext_i8_i32 — sext (i8)-1 → i32 = 0xFFFFFFFF; low 8 = 0xFF = 255. */
+static void build_e01_sext_i8_i32(CgTestCtx* ctx)
+{
+    const Type* I8  = T_i8(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg s = T->alloc_reg(T, RC_INT, I8);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(s, I8), -1);
+    T->convert(T, CV_SEXT, REG_op(d, I32), REG_op(s, I8));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e02_zext_u8_i32 — zext (u8)0xFF → i32 = 0xFF; low 8 = 255. The high
+ * bits are zeroed, distinguishing this from e01. */
+static void build_e02_zext_u8_i32(CgTestCtx* ctx)
+{
+    const Type* U8  = T_u8(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg s = T->alloc_reg(T, RC_INT, U8);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(s, U8), 0xFF);
+    T->convert(T, CV_ZEXT, REG_op(d, I32), REG_op(s, U8));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e03_sext_i16_i32 — sext (i16)-1000 → 0xFFFFFC18; low 8 = 0x18 = 24. */
+static void build_e03_sext_i16_i32(CgTestCtx* ctx)
+{
+    const Type* I16 = T_i16(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg s = T->alloc_reg(T, RC_INT, I16);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(s, I16), -1000);
+    T->convert(T, CV_SEXT, REG_op(d, I32), REG_op(s, I16));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e04_zext_u16_i32 — zext (u16)0xABCD → 0x0000ABCD; low 8 = 0xCD = 205. */
+static void build_e04_zext_u16_i32(CgTestCtx* ctx)
+{
+    const Type* U16 = T_u16(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg s = T->alloc_reg(T, RC_INT, U16);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(s, U16), 0xABCD);
+    T->convert(T, CV_ZEXT, REG_op(d, I32), REG_op(s, U16));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e05_zext_u32_i64 — zext (u32)0xFFFFFFFF → i64 = 0x00000000FFFFFFFF;
+ * runner reads w0 = 0xFFFFFFFF; low 8 = 255. Distinct from e06: high
+ * 32 bits are zero. */
+static void build_e05_zext_u32_i64(CgTestCtx* ctx)
+{
+    const Type* U32 = T_u32(ctx);
+    const Type* I64 = T_i64(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I64);
+    CGTarget* T = ctx->target;
+    Reg s = T->alloc_reg(T, RC_INT, U32);
+    Reg d = T->alloc_reg(T, RC_INT, I64);
+    T->load_imm(T, REG_op(s, U32), 0xFFFFFFFFll);
+    T->convert(T, CV_ZEXT, REG_op(d, I64), REG_op(s, U32));
+    cgtest_ret_reg(tf, d, I64);
+    cgtest_end(tf);
+}
+
+/* e06_sext_i32_i64 — sext (i32)-1 → i64 = -1; low 8 = 255. Same low-byte
+ * exit as e05 but the high bits differ — exercises SXTW vs UXTW. */
+static void build_e06_sext_i32_i64(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* I64 = T_i64(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I64);
+    CGTarget* T = ctx->target;
+    Reg s = T->alloc_reg(T, RC_INT, I32);
+    Reg d = T->alloc_reg(T, RC_INT, I64);
+    T->load_imm(T, REG_op(s, I32), -1);
+    T->convert(T, CV_SEXT, REG_op(d, I64), REG_op(s, I32));
+    cgtest_ret_reg(tf, d, I64);
+    cgtest_end(tf);
+}
+
+/* e07_trunc_i64_i32 — trunc 0x100000080 → low 32 = 0x80 = 128. */
+static void build_e07_trunc_i64_i32(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* I64 = T_i64(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg s = T->alloc_reg(T, RC_INT, I64);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(s, I64), 0x100000080ll);
+    T->convert(T, CV_TRUNC, REG_op(d, I32), REG_op(s, I64));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e08_trunc_i32_i8 — trunc 0x1FF → low 8 = 0xFF; returned as u8 = 255. */
+static void build_e08_trunc_i32_i8(CgTestCtx* ctx)
+{
+    const Type* U8  = T_u8(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, U8);
+    CGTarget* T = ctx->target;
+    Reg s = T->alloc_reg(T, RC_INT, I32);
+    Reg d = T->alloc_reg(T, RC_INT, U8);
+    T->load_imm(T, REG_op(s, I32), 0x1FF);
+    T->convert(T, CV_TRUNC, REG_op(d, U8), REG_op(s, I32));
+    cgtest_ret_reg(tf, d, U8);
+    cgtest_end(tf);
+}
+
+/* e09_itof_s_i32_f32 — i32(7) → f32(7.0) → ftoi_s i32 → 7. Exact
+ * round-trip; verifies SCVTF + FCVTZS form a valid pair. */
+static void build_e09_itof_s_i32_f32(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* F32 = T_f32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg si = T->alloc_reg(T, RC_INT, I32);
+    Reg f  = T->alloc_reg(T, RC_FP,  F32);
+    Reg d  = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(si, I32), 7);
+    T->convert(T, CV_ITOF_S, REG_op(f, F32), REG_op(si, I32));
+    T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f, F32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e10_itof_u_u32_f64 — u32(100) → f64(100.0) → ftoi_s i32 → 100.
+ * Crosses width on the way up (UCVTF Dn,Wn) and back down. */
+static void build_e10_itof_u_u32_f64(CgTestCtx* ctx)
+{
+    const Type* U32 = T_u32(ctx);
+    const Type* I32 = T_i32(ctx);
+    const Type* F64 = T_f64(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg si = T->alloc_reg(T, RC_INT, U32);
+    Reg f  = T->alloc_reg(T, RC_FP,  F64);
+    Reg d  = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(si, U32), 100);
+    T->convert(T, CV_ITOF_U, REG_op(f, F64), REG_op(si, U32));
+    T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f, F64));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e11_ftoi_s_neg — ftoi_s(-1.5f) = -1; low 8 = 255. C99 truncation
+ * rounds toward zero. */
+static void build_e11_ftoi_s_neg(CgTestCtx* ctx)
+{
+    const Type* F32 = T_f32(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    static const u8 BYTES_NEG_1_5[4] = { 0x00, 0x00, 0xC0, 0xBF }; /* -1.5f LE */
+    Reg f = T->alloc_reg(T, RC_FP,  F32);
+    Reg d = T->alloc_reg(T, RC_INT, I32);
+    ConstBytes cb = { .type = F32, .bytes = BYTES_NEG_1_5, .size = 4, .align = 4 };
+    T->load_const(T, REG_op(f, F32), cb);
+    T->convert(T, CV_FTOI_S, REG_op(d, I32), REG_op(f, F32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e12_ftoi_u_pos — ftoi_u(200.7f) = 200u. Truncation toward zero,
+ * matching C's (unsigned)x. */
+static void build_e12_ftoi_u_pos(CgTestCtx* ctx)
+{
+    const Type* F32 = T_f32(ctx);
+    const Type* U32 = T_u32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, U32);
+    CGTarget* T = ctx->target;
+    static const u8 BYTES_200_7[4] = { 0x33, 0xB3, 0x48, 0x43 }; /* 200.7f LE */
+    Reg f = T->alloc_reg(T, RC_FP,  F32);
+    Reg d = T->alloc_reg(T, RC_INT, U32);
+    ConstBytes cb = { .type = F32, .bytes = BYTES_200_7, .size = 4, .align = 4 };
+    T->load_const(T, REG_op(f, F32), cb);
+    T->convert(T, CV_FTOI_U, REG_op(d, U32), REG_op(f, F32));
+    cgtest_ret_reg(tf, d, U32);
+    cgtest_end(tf);
+}
+
+/* e13_fext_f32_f64 — float→double promotion preserves an exactly
+ * representable value (3.5f = 3.5). ftoi_s then yields 3. */
+static void build_e13_fext_f32_f64(CgTestCtx* ctx)
+{
+    const Type* F32 = T_f32(ctx);
+    const Type* F64 = T_f64(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    static const u8 BYTES_3_5F[4] = { 0x00, 0x00, 0x60, 0x40 }; /* 3.5f LE */
+    Reg f32r = T->alloc_reg(T, RC_FP, F32);
+    Reg f64r = T->alloc_reg(T, RC_FP, F64);
+    Reg d    = T->alloc_reg(T, RC_INT, I32);
+    ConstBytes cb = { .type = F32, .bytes = BYTES_3_5F, .size = 4, .align = 4 };
+    T->load_const(T, REG_op(f32r, F32), cb);
+    T->convert(T, CV_FEXT,   REG_op(f64r, F64), REG_op(f32r, F32));
+    T->convert(T, CV_FTOI_S, REG_op(d, I32),    REG_op(f64r, F64));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e14_ftrunc_f64_f32 — double→float demotion of 7.875 (exact in both);
+ * ftoi_s yields 7. */
+static void build_e14_ftrunc_f64_f32(CgTestCtx* ctx)
+{
+    const Type* F32 = T_f32(ctx);
+    const Type* F64 = T_f64(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    static const u8 BYTES_7_875[8] = {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x1F, 0x40, /* 7.875 LE double */
+    };
+    Reg f64r = T->alloc_reg(T, RC_FP, F64);
+    Reg f32r = T->alloc_reg(T, RC_FP, F32);
+    Reg d    = T->alloc_reg(T, RC_INT, I32);
+    ConstBytes cb = { .type = F64, .bytes = BYTES_7_875, .size = 8, .align = 8 };
+    T->load_const(T, REG_op(f64r, F64), cb);
+    T->convert(T, CV_FTRUNC, REG_op(f32r, F32), REG_op(f64r, F64));
+    T->convert(T, CV_FTOI_S, REG_op(d, I32),    REG_op(f32r, F32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* e15_bitcast_i32_f32 — same-size cross-class reinterpret. 0x40A00000
+ * is the IEEE-754 single bit pattern for 5.0f. ftoi_s yields 5,
+ * confirming the bits travelled to the FP register intact. */
+static void build_e15_bitcast_i32_f32(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* F32 = T_f32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    Reg si = T->alloc_reg(T, RC_INT, I32);
+    Reg f  = T->alloc_reg(T, RC_FP,  F32);
+    Reg d  = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(si, I32), 0x40A00000); /* 5.0f bit pattern */
+    T->convert(T, CV_BITCAST, REG_op(f, F32), REG_op(si, I32));
+    T->convert(T, CV_FTOI_S,  REG_op(d, I32), REG_op(f, F32));
+    cgtest_ret_reg(tf, d, I32);
+    cgtest_end(tf);
+}
+
+/* ============================================================
+ * Group F: memory (loads/stores beyond locals)
+ *
+ * Group B already exercises the basic load/store-of-local path. Group F
+ * pushes the surface: every scalar width, FP load/store, indirect
+ * non-zero offsets, store-from-IMM vs store-from-REG, copy_bytes,
+ * set_bytes, volatile, and the bitfield methods.
+ * ============================================================ */
+
+/* f01_load_store_i8 — local u8; store IMM 200; load; return. */
+static void build_f01_load_store_i8(CgTestCtx* ctx)
+{
+    const Type* U8 = T_u8(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, U8);
+    CGTarget* T = ctx->target;
+    FrameSlot s = cgtest_local(tf, U8, FSF_NONE);
+    cgtest_store_local(tf, s, IMM_op(200, U8), U8);
+    Reg r = T->alloc_reg(T, RC_INT, U8);
+    cgtest_load_local(tf, REG_op(r, U8), s, U8);
+    cgtest_ret_reg(tf, r, U8);
+    cgtest_end(tf);
+}
+
+/* f02_load_store_i16 — local i16; store 0x1234; load; low 8 = 0x34 = 52. */
+static void build_f02_load_store_i16(CgTestCtx* ctx)
+{
+    const Type* I16 = T_i16(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I16);
+    CGTarget* T = ctx->target;
+    FrameSlot s = cgtest_local(tf, I16, FSF_NONE);
+    cgtest_store_local(tf, s, IMM_op(0x1234, I16), I16);
+    Reg r = T->alloc_reg(T, RC_INT, I16);
+    cgtest_load_local(tf, REG_op(r, I16), s, I16);
+    cgtest_ret_reg(tf, r, I16);
+    cgtest_end(tf);
+}
+
+/* f03_load_store_i64 — local i64; store 0x1_0000_0042; load; runner
+ * reads w0 = low 32 = 0x42 = 66. */
+static void build_f03_load_store_i64(CgTestCtx* ctx)
+{
+    const Type* I64 = T_i64(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I64);
+    CGTarget* T = ctx->target;
+    FrameSlot s = cgtest_local(tf, I64, FSF_NONE);
+    cgtest_store_local(tf, s, IMM_op(0x100000042ll, I64), I64);
+    Reg r = T->alloc_reg(T, RC_INT, I64);
+    cgtest_load_local(tf, REG_op(r, I64), s, I64);
+    cgtest_ret_reg(tf, r, I64);
+    cgtest_end(tf);
+}
+
+/* f04_load_store_f32 — local f32 home; store FP reg holding 7.5f; load
+ * back; ftoi_s → 7. Exercises STR Sn / LDR Sn forms. */
+static void build_f04_load_store_f32(CgTestCtx* ctx)
+{
+    const Type* F32 = T_f32(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    static const u8 BYTES_75F[4] = { 0x00, 0x00, 0xF0, 0x40 }; /* 7.5f LE */
+
+    FrameSlot s = cgtest_local(tf, F32, FSF_NONE);
+    Reg src = T->alloc_reg(T, RC_FP, F32);
+    ConstBytes cb = { .type = F32, .bytes = BYTES_75F, .size = 4, .align = 4 };
+    T->load_const(T, REG_op(src, F32), cb);
+    cgtest_store_local(tf, s, REG_op(src, F32), F32);
+
+    Reg dst = T->alloc_reg(T, RC_FP, F32);
+    cgtest_load_local(tf, REG_op(dst, F32), s, F32);
+    Reg ri = T->alloc_reg(T, RC_INT, I32);
+    T->convert(T, CV_FTOI_S, REG_op(ri, I32), REG_op(dst, F32));
+    cgtest_ret_reg(tf, ri, I32);
+    cgtest_end(tf);
+}
+
+/* f05_load_store_f64 — local f64 home; store FP reg holding 3.25; load
+ * back; ftoi_s → 3. STR Dn / LDR Dn. */
+static void build_f05_load_store_f64(CgTestCtx* ctx)
+{
+    const Type* F64 = T_f64(ctx);
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+    static const u8 BYTES_3_25[8] = {
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x40, /* 3.25 LE double */
+    };
+
+    FrameSlot s = cgtest_local(tf, F64, FSF_NONE);
+    Reg src = T->alloc_reg(T, RC_FP, F64);
+    ConstBytes cb = { .type = F64, .bytes = BYTES_3_25, .size = 8, .align = 8 };
+    T->load_const(T, REG_op(src, F64), cb);
+    cgtest_store_local(tf, s, REG_op(src, F64), F64);
+
+    Reg dst = T->alloc_reg(T, RC_FP, F64);
+    cgtest_load_local(tf, REG_op(dst, F64), s, F64);
+    Reg ri = T->alloc_reg(T, RC_INT, I32);
+    T->convert(T, CV_FTOI_S, REG_op(ri, I32), REG_op(dst, F64));
+    cgtest_ret_reg(tf, ri, I32);
+    cgtest_end(tf);
+}
+
+/* f06_indirect_nonzero_offset — addr_of an i64 local, then store/load
+ * an i32 at +4. Exercises [base + #imm] addressing past byte 0; also
+ * verifies writes to one offset don't clobber a sentinel at another. */
+static void build_f06_indirect_nonzero_offset(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* I64 = T_i64(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot s = cgtest_local(tf, I64, FSF_ADDR_TAKEN);
+    Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I64));
+    T->addr_of(T, REG_op(base, T_ptr(ctx, I64)), LOCAL_op(s, I64));
+
+    MemAccess ma = { .type = I32, .size = 4, .align = 4,
+                     .alias.kind = ALIAS_LOCAL };
+    T->store(T, IND_op(base, 0, I32), IMM_op(99, I32), ma);
+    T->store(T, IND_op(base, 4, I32), IMM_op(42, I32), ma);
+
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    T->load(T, REG_op(r, I32), IND_op(base, 4, I32), ma);
+    cgtest_ret_reg(tf, r, I32);
+    cgtest_end(tf);
+}
+
+/* f07_store_reg — store from REG (not IMM) into a local slot. b04 stored
+ * an immediate; this distinguishes the REG-source store path. */
+static void build_f07_store_reg(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot s = cgtest_local(tf, I32, FSF_NONE);
+    Reg src = T->alloc_reg(T, RC_INT, I32);
+    T->load_imm(T, REG_op(src, I32), 17);
+    cgtest_store_local(tf, s, REG_op(src, I32), I32);
+
+    Reg dst = T->alloc_reg(T, RC_INT, I32);
+    cgtest_load_local(tf, REG_op(dst, I32), s, I32);
+    cgtest_ret_reg(tf, dst, I32);
+    cgtest_end(tf);
+}
+
+/* f08_copy_bytes — copy_bytes(dst, src, Pt {10,32}); read back dst.a +
+ * dst.b → 42. The aggregate move is the operation under test; the per-
+ * field load/store after it just reads the result. */
+static void build_f08_copy_bytes(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* PT  = build_b06_pt_type(ctx);
+
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot src = cgtest_local(tf, PT, FSF_ADDR_TAKEN);
+    FrameSlot dst = cgtest_local(tf, PT, FSF_ADDR_TAKEN);
+
+    /* Initialize src to {10, 32}. */
+    Reg src_addr = T->alloc_reg(T, RC_INT, T_ptr(ctx, PT));
+    T->addr_of(T, REG_op(src_addr, T_ptr(ctx, PT)), LOCAL_op(src, PT));
+    MemAccess ma_i32 = { .type = I32, .size = 4, .align = 4,
+                         .alias.kind = ALIAS_LOCAL };
+    T->store(T, IND_op(src_addr, 0, I32), IMM_op(10, I32), ma_i32);
+    T->store(T, IND_op(src_addr, 4, I32), IMM_op(32, I32), ma_i32);
+
+    Reg dst_addr = T->alloc_reg(T, RC_INT, T_ptr(ctx, PT));
+    T->addr_of(T, REG_op(dst_addr, T_ptr(ctx, PT)), LOCAL_op(dst, PT));
+
+    AggregateAccess agg = {
+        .type = PT, .size = 8, .align = 4,
+        .mem  = { .type = PT, .size = 8, .align = 4,
+                  .alias.kind = ALIAS_LOCAL },
+    };
+    T->copy_bytes(T,
+                  REG_op(dst_addr, T_ptr(ctx, PT)),
+                  REG_op(src_addr, T_ptr(ctx, PT)),
+                  agg);
+
+    Reg ra  = T->alloc_reg(T, RC_INT, I32);
+    Reg rb  = T->alloc_reg(T, RC_INT, I32);
+    Reg sum = T->alloc_reg(T, RC_INT, I32);
+    T->load(T, REG_op(ra, I32), IND_op(dst_addr, 0, I32), ma_i32);
+    T->load(T, REG_op(rb, I32), IND_op(dst_addr, 4, I32), ma_i32);
+    T->binop(T, BO_IADD, REG_op(sum, I32), REG_op(ra, I32), REG_op(rb, I32));
+    cgtest_ret_reg(tf, sum, I32);
+    cgtest_end(tf);
+}
+
+/* f09_set_bytes_zero — set_bytes(0) on an i32-sized buffer; load the
+ * word back → 0. Exercises the "memset to zero" path which backends
+ * often special-case (STR XZR). */
+static void build_f09_set_bytes_zero(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* U8  = T_u8(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot s = cgtest_local(tf, I32, FSF_ADDR_TAKEN);
+    Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32));
+    T->addr_of(T, REG_op(base, T_ptr(ctx, I32)), LOCAL_op(s, I32));
+
+    AggregateAccess agg = {
+        .type = I32, .size = 4, .align = 4,
+        .mem  = { .type = I32, .size = 4, .align = 4,
+                  .alias.kind = ALIAS_LOCAL },
+    };
+    T->set_bytes(T, REG_op(base, T_ptr(ctx, I32)), IMM_op(0, U8), agg);
+
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    T->load(T, REG_op(r, I32), IND_op(base, 0, I32), agg.mem);
+    cgtest_ret_reg(tf, r, I32);
+    cgtest_end(tf);
+}
+
+/* f10_set_bytes_ff — set_bytes(0xFF) on an i32-sized buffer; load the
+ * word → 0xFFFFFFFF; low 8 = 255. Exercises the byte-broadcast path. */
+static void build_f10_set_bytes_ff(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* U8  = T_u8(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot s = cgtest_local(tf, I32, FSF_ADDR_TAKEN);
+    Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32));
+    T->addr_of(T, REG_op(base, T_ptr(ctx, I32)), LOCAL_op(s, I32));
+
+    AggregateAccess agg = {
+        .type = I32, .size = 4, .align = 4,
+        .mem  = { .type = I32, .size = 4, .align = 4,
+                  .alias.kind = ALIAS_LOCAL },
+    };
+    T->set_bytes(T, REG_op(base, T_ptr(ctx, I32)), IMM_op(0xFF, U8), agg);
+
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    T->load(T, REG_op(r, I32), IND_op(base, 0, I32), agg.mem);
+    cgtest_ret_reg(tf, r, I32);
+    cgtest_end(tf);
+}
+
+/* f11_volatile_rw — same body as b04 but with MF_VOLATILE on both the
+ * store and the load. The expected exit value is identical; the
+ * difference is in the emitted code (no DSE/DCE, no fold-through-store). */
+static void build_f11_volatile_rw(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot s = cgtest_local(tf, I32, FSF_NONE);
+    MemAccess ma = { .type = I32, .size = 4, .align = 4,
+                     .flags = MF_VOLATILE,
+                     .alias.kind = ALIAS_LOCAL };
+    T->store(T, LOCAL_op(s, I32), IMM_op(42, I32), ma);
+
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    T->load(T, REG_op(r, I32), LOCAL_op(s, I32), ma);
+    cgtest_ret_reg(tf, r, I32);
+    cgtest_end(tf);
+}
+
+/* f12_bitfield_unsigned — { unsigned x : 5; } at bit_offset=3 inside a
+ * zeroed i32 storage word; store 21; load → 21 (zero-extended). The
+ * non-zero bit_offset forces the backend's mask+shift logic. */
+static void build_f12_bitfield_unsigned(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    const Type* U32 = T_u32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, U32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot s = cgtest_local(tf, I32, FSF_ADDR_TAKEN);
+    Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32));
+    T->addr_of(T, REG_op(base, T_ptr(ctx, I32)), LOCAL_op(s, I32));
+
+    /* Zero the storage word so neighboring bits don't perturb the read. */
+    MemAccess ma = { .type = I32, .size = 4, .align = 4,
+                     .alias.kind = ALIAS_LOCAL };
+    T->store(T, IND_op(base, 0, I32), IMM_op(0, I32), ma);
+
+    BitFieldAccess bf = {
+        .field_type     = U32,
+        .storage        = ma,
+        .storage_offset = 0,
+        .bit_offset     = 3,
+        .bit_width      = 5,
+        .signed_        = 0,
+    };
+    T->bitfield_store(T, REG_op(base, T_ptr(ctx, I32)),
+                      IMM_op(21, U32), bf);
+
+    Reg r = T->alloc_reg(T, RC_INT, U32);
+    T->bitfield_load(T, REG_op(r, U32),
+                     REG_op(base, T_ptr(ctx, I32)), bf);
+    cgtest_ret_reg(tf, r, U32);
+    cgtest_end(tf);
+}
+
+/* f13_bitfield_signed — { signed x : 5; } at bit_offset=0; store -1
+ * (5-bit all-ones); load sign-extends to -1; low 8 = 255. */
+static void build_f13_bitfield_signed(CgTestCtx* ctx)
+{
+    const Type* I32 = T_i32(ctx);
+    CgTestFn* tf = cgtest_begin_main(ctx, I32);
+    CGTarget* T = ctx->target;
+
+    FrameSlot s = cgtest_local(tf, I32, FSF_ADDR_TAKEN);
+    Reg base = T->alloc_reg(T, RC_INT, T_ptr(ctx, I32));
+    T->addr_of(T, REG_op(base, T_ptr(ctx, I32)), LOCAL_op(s, I32));
+
+    MemAccess ma = { .type = I32, .size = 4, .align = 4,
+                     .alias.kind = ALIAS_LOCAL };
+    T->store(T, IND_op(base, 0, I32), IMM_op(0, I32), ma);
+
+    BitFieldAccess bf = {
+        .field_type     = I32,
+        .storage        = ma,
+        .storage_offset = 0,
+        .bit_offset     = 0,
+        .bit_width      = 5,
+        .signed_        = 1,
+    };
+    T->bitfield_store(T, REG_op(base, T_ptr(ctx, I32)),
+                      IMM_op(-1, I32), bf);
+
+    Reg r = T->alloc_reg(T, RC_INT, I32);
+    T->bitfield_load(T, REG_op(r, I32),
+                     REG_op(base, T_ptr(ctx, I32)), bf);
+    cgtest_ret_reg(tf, r, I32);
+    cgtest_end(tf);
+}
+
+/* ============================================================
  * Registry
  * ============================================================ */
 
@@ -752,6 +1596,53 @@ const CgCase cg_cases[] = {
     { "c10_logical_not",         build_c10_logical_not,           1, CG_CASE_DEFAULT },
     { "c11_shr_signed",          build_c11_shr_signed,          252, CG_CASE_DEFAULT },
     { "c12_imul_i64",            build_c12_imul_i64,             42, CG_CASE_DEFAULT },
+
+    /* Group D — compare and branch */
+    { "d01_cmp_eq_true",             build_d01_cmp_eq_true,             1, CG_CASE_DEFAULT },
+    { "d02_cmp_eq_false",            build_d02_cmp_eq_false,            0, CG_CASE_DEFAULT },
+    { "d03_cmp_ne",                  build_d03_cmp_ne,                  1, CG_CASE_DEFAULT },
+    { "d04_cmp_lt_signed",           build_d04_cmp_lt_signed,           1, CG_CASE_DEFAULT },
+    { "d05_cmp_lt_unsigned",         build_d05_cmp_lt_unsigned,         0, CG_CASE_DEFAULT },
+    { "d06_cmp_ge_signed",           build_d06_cmp_ge_signed,           1, CG_CASE_DEFAULT },
+    { "d07_cmp_branch_taken",        build_d07_cmp_branch_taken,       42, CG_CASE_DEFAULT },
+    { "d08_cmp_branch_not_taken",    build_d08_cmp_branch_not_taken,   33, CG_CASE_DEFAULT },
+    { "d09_cmp_branch_lt_signed",    build_d09_cmp_branch_lt_signed,    9, CG_CASE_DEFAULT },
+    { "d10_jump",                    build_d10_jump,                    5, CG_CASE_DEFAULT },
+    { "d11_scope_if_true",           build_d11_scope_if_true,          33, CG_CASE_DEFAULT },
+    { "d12_scope_if_false",          build_d12_scope_if_false,         99, CG_CASE_DEFAULT },
+    { "d13_scope_if_else",           build_d13_scope_if_else,           7, CG_CASE_DEFAULT },
+
+    /* Group E — conversions */
+    { "e01_sext_i8_i32",             build_e01_sext_i8_i32,           255, CG_CASE_DEFAULT },
+    { "e02_zext_u8_i32",             build_e02_zext_u8_i32,           255, CG_CASE_DEFAULT },
+    { "e03_sext_i16_i32",            build_e03_sext_i16_i32,           24, CG_CASE_DEFAULT },
+    { "e04_zext_u16_i32",            build_e04_zext_u16_i32,          205, CG_CASE_DEFAULT },
+    { "e05_zext_u32_i64",            build_e05_zext_u32_i64,          255, CG_CASE_DEFAULT },
+    { "e06_sext_i32_i64",            build_e06_sext_i32_i64,          255, CG_CASE_DEFAULT },
+    { "e07_trunc_i64_i32",           build_e07_trunc_i64_i32,         128, CG_CASE_DEFAULT },
+    { "e08_trunc_i32_i8",            build_e08_trunc_i32_i8,          255, CG_CASE_DEFAULT },
+    { "e09_itof_s_i32_f32",          build_e09_itof_s_i32_f32,          7, CG_CASE_DEFAULT },
+    { "e10_itof_u_u32_f64",          build_e10_itof_u_u32_f64,        100, CG_CASE_DEFAULT },
+    { "e11_ftoi_s_neg",              build_e11_ftoi_s_neg,            255, CG_CASE_DEFAULT },
+    { "e12_ftoi_u_pos",              build_e12_ftoi_u_pos,            200, CG_CASE_DEFAULT },
+    { "e13_fext_f32_f64",            build_e13_fext_f32_f64,            3, CG_CASE_DEFAULT },
+    { "e14_ftrunc_f64_f32",          build_e14_ftrunc_f64_f32,          7, CG_CASE_DEFAULT },
+    { "e15_bitcast_i32_f32",         build_e15_bitcast_i32_f32,         5, CG_CASE_DEFAULT },
+
+    /* Group F — memory (loads/stores beyond locals) */
+    { "f01_load_store_i8",           build_f01_load_store_i8,         200, CG_CASE_DEFAULT },
+    { "f02_load_store_i16",          build_f02_load_store_i16,         52, CG_CASE_DEFAULT },
+    { "f03_load_store_i64",          build_f03_load_store_i64,         66, CG_CASE_DEFAULT },
+    { "f04_load_store_f32",          build_f04_load_store_f32,          7, CG_CASE_DEFAULT },
+    { "f05_load_store_f64",          build_f05_load_store_f64,          3, CG_CASE_DEFAULT },
+    { "f06_indirect_nonzero_offset", build_f06_indirect_nonzero_offset,42, CG_CASE_DEFAULT },
+    { "f07_store_reg",               build_f07_store_reg,              17, CG_CASE_DEFAULT },
+    { "f08_copy_bytes",              build_f08_copy_bytes,             42, CG_CASE_DEFAULT },
+    { "f09_set_bytes_zero",          build_f09_set_bytes_zero,          0, CG_CASE_DEFAULT },
+    { "f10_set_bytes_ff",            build_f10_set_bytes_ff,          255, CG_CASE_DEFAULT },
+    { "f11_volatile_rw",             build_f11_volatile_rw,            42, CG_CASE_DEFAULT },
+    { "f12_bitfield_unsigned",       build_f12_bitfield_unsigned,      21, CG_CASE_DEFAULT },
+    { "f13_bitfield_signed",         build_f13_bitfield_signed,       255, CG_CASE_DEFAULT },
 };
 
 const unsigned cg_cases_count = sizeof(cg_cases) / sizeof(cg_cases[0]);

	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README

M	test/cg/CORPUS.md	\|	75	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
M	test/cg/harness/cases.c	\|	891	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++