commit df0362afce63cd628b21bd4bad1a7ea2f09fe35e
parent fabd386acadaae35207001867c0fefd28ab1aaf3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 17:36:16 -0700
Implement O2 dead store elimination
Diffstat:
4 files changed, 537 insertions(+), 4 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -800,7 +800,7 @@ Implement in order:
8. [x] CG identity expansion, shared `opt_simplify_local`, and O2
`opt_simplify` as described in Section 4.3
9. [x] extended memory tracker for joins/path state
-10. [ ] `opt_dse`
+10. [x] `opt_dse`
11. [ ] `opt_licm`
12. [ ] `opt_pressure_relief`
13. [x] `opt_make_conventional_ssa`
@@ -898,6 +898,21 @@ Current memory GVN slice:
non-escaping address-taken locals, while globals/params/heap/TLS, escaped
locals, and unknown roots remain conservatively clobbered.
+Current DSE slice:
+
+- [x] `opt_dse` runs in the O2 SSA schedule after GVN, copy propagation, and
+ SSA DCE, before conventional-SSA lowering.
+- [x] Backward memory liveness removes exact precise-root stores that are
+ overwritten before any overlapping read, and removes unread stores to
+ non-escaped locals at function exit.
+- [x] Calls, volatile/atomic accesses, aggregate memory ops, bitfield memory
+ ops, varargs memory ops, fences, inline asm, and intrinsics remain
+ conservative barriers. Escaped locals and externally observable roots stay
+ live across calls/exits.
+- [x] Pass-local tests cover same-block and all-path overwritten local stores,
+ unread non-escaped locals, escaped-local call preservation, and volatile
+ preservation.
+
Scalar/address identity simplification now runs before memory GVN. Red-green
coverage includes integer neutral/annihilator identities, same-register
integer identities, same-value integer compares, exact no-op conversions,
@@ -923,8 +938,8 @@ Remaining Phase D exit criteria:
- [x] Identity simplification has focused pass-local tests, a toy end-to-end
fixture, and an AArch64 disassembly sanity check showing identity arithmetic
removed from the generated code.
-- [ ] DSE, LICM, pressure relief, SSA combine, and full O2 jump optimization
- each have focused red-green tests before they enter the default O2 schedule.
+- [ ] LICM, pressure relief, SSA combine, and full O2 jump optimization each
+ have focused red-green tests before they enter the default O2 schedule.
### Phase E - Inlining and Cleanup
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1439,6 +1439,10 @@ static void opt_run_o2_pipeline(OptImpl* o) {
opt_verify(o->f, "o2-copy-prop-ssa");
opt_ssa_dce(o->f);
opt_verify(o->f, "o2-copy-dce");
+ opt_dse(o->f);
+ opt_verify(o->f, "o2-dse-ssa");
+ opt_ssa_dce(o->f);
+ opt_verify(o->f, "o2-dse-dce");
opt_make_conventional_ssa(o->f);
opt_verify(o->f, "o2-conventional-ssa");
opt_undo_ssa(o->f);
diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c
@@ -9,6 +9,7 @@
#define OPT_CLONE_MAX_PREDS 4u
#define OPT_CLONE_ABS_GROWTH_LIMIT 32u
#define GVN_ENTRY_NONE 0xffffffffu
+#define DSE_KEY_NONE 0xffffffffu
typedef struct CloneValMap {
Val old_val;
@@ -117,6 +118,43 @@ typedef struct GvnCtx {
int cfg_changed;
} GvnCtx;
+typedef struct DseKey {
+ u8 root_kind;
+ u8 pad0;
+ u16 addr_space;
+ u16 flags;
+ u16 pad1;
+ CfreeCgTypeId mem_type;
+ u32 size;
+ u32 align;
+ i64 root_id;
+ i64 offset;
+} DseKey;
+
+typedef struct DseBitset {
+ u64* words;
+} DseBitset;
+
+typedef struct DseBlockState {
+ DseBitset in;
+ DseBitset out;
+} DseBlockState;
+
+typedef struct DseCtx {
+ Func* f;
+ GvnCtx gvn;
+ DseKey* keys;
+ u32 nkeys;
+ u32 cap_keys;
+ u32** store_key_by_inst;
+ DseBlockState* block;
+ DseBitset scratch_in;
+ DseBitset scratch_out;
+ u8* local_escaped;
+ u32 words;
+ int changed;
+} DseCtx;
+
static u32 opt_inst_count(Func* f) {
u32 n = 0;
for (u32 b = 0; b < f->nblocks; ++b) n += f->blocks[b].ninsts;
@@ -1781,8 +1819,364 @@ void opt_gvn(Func* f) {
opt_rebuild_def_use(f);
}
+static int dse_key_equal(const DseKey* a, const DseKey* b) {
+ return a->root_kind == b->root_kind && a->addr_space == b->addr_space &&
+ a->flags == b->flags && a->mem_type == b->mem_type &&
+ a->size == b->size && a->align == b->align &&
+ a->root_id == b->root_id && a->offset == b->offset;
+}
+
+static int dse_key_same_root(const DseKey* a, const DseKey* b) {
+ return a->root_kind == b->root_kind && a->addr_space == b->addr_space &&
+ a->root_id == b->root_id;
+}
+
+static int dse_ranges_overlap(const DseKey* a, const DseKey* b) {
+ i64 ae = a->offset + (i64)a->size;
+ i64 be = b->offset + (i64)b->size;
+ return a->offset < be && b->offset < ae;
+}
+
+static int dse_range_covers(const DseKey* cover, const DseKey* covered) {
+ i64 ce = cover->offset + (i64)cover->size;
+ i64 de = covered->offset + (i64)covered->size;
+ return cover->offset <= covered->offset && ce >= de;
+}
+
+static int dse_root_call_clobbered(DseCtx* ctx, const DseKey* key) {
+ if (key->root_kind == ALIAS_LOCAL && key->root_id > 0 &&
+ (u32)key->root_id <= ctx->f->nframe_slots)
+ return ctx->local_escaped && ctx->local_escaped[key->root_id];
+ return key->root_kind != ALIAS_STRING;
+}
+
+static int dse_root_observable_at_exit(DseCtx* ctx, const DseKey* key) {
+ if (key->root_kind == ALIAS_LOCAL && key->root_id > 0 &&
+ (u32)key->root_id <= ctx->f->nframe_slots)
+ return ctx->local_escaped && ctx->local_escaped[key->root_id];
+ return key->root_kind != ALIAS_UNKNOWN;
+}
+
+static int dse_make_key(DseCtx* ctx, const Inst* in, u32 addr_idx,
+ DseKey* out) {
+ u8 root_kind;
+ i64 root_id;
+ i64 offset;
+ int singleton;
+
+ if (!ctx || !in || addr_idx >= in->nopnds ||
+ opt_mem_observable(&in->extra.mem))
+ return 0;
+ if (in->extra.mem.size == 0) return 0;
+ if (!gvn_mem_root_from_access(&ctx->gvn, &in->opnds[addr_idx],
+ &in->extra.mem, &root_kind, &root_id, &offset,
+ &singleton))
+ return 0;
+ if (!singleton || root_kind == ALIAS_UNKNOWN) return 0;
+
+ memset(out, 0, sizeof *out);
+ out->root_kind = root_kind;
+ out->addr_space = in->extra.mem.addr_space;
+ out->flags = in->extra.mem.flags;
+ out->mem_type = in->extra.mem.type;
+ out->size = in->extra.mem.size;
+ out->align = in->extra.mem.align;
+ out->root_id = root_id;
+ out->offset = offset;
+ return 1;
+}
+
+static u32 dse_key_intern(DseCtx* ctx, const DseKey* key) {
+ for (u32 i = 0; i < ctx->nkeys; ++i)
+ if (dse_key_equal(&ctx->keys[i], key)) return i;
+ if (ctx->nkeys == ctx->cap_keys) {
+ u32 ncap = ctx->cap_keys ? ctx->cap_keys * 2u : 16u;
+ DseKey* keys = arena_array(ctx->f->arena, DseKey, ncap);
+ if (ctx->keys) memcpy(keys, ctx->keys, sizeof(keys[0]) * ctx->nkeys);
+ ctx->keys = keys;
+ ctx->cap_keys = ncap;
+ }
+ ctx->keys[ctx->nkeys] = *key;
+ return ctx->nkeys++;
+}
+
+static void dse_bitset_clear(DseCtx* ctx, DseBitset* bs) {
+ memset(bs->words, 0, sizeof(bs->words[0]) * ctx->words);
+}
+
+static int dse_bitset_copy(DseCtx* ctx, DseBitset* dst, const DseBitset* src) {
+ int changed =
+ memcmp(dst->words, src->words, sizeof(dst->words[0]) * ctx->words) != 0;
+ if (changed)
+ memcpy(dst->words, src->words, sizeof(dst->words[0]) * ctx->words);
+ return changed;
+}
+
+static void dse_bitset_or(DseCtx* ctx, DseBitset* dst, const DseBitset* src) {
+ for (u32 w = 0; w < ctx->words; ++w) dst->words[w] |= src->words[w];
+}
+
+static int dse_bitset_test(const DseBitset* bs, u32 bit) {
+ return (bs->words[bit / 64u] & (1ull << (bit % 64u))) != 0;
+}
+
+static void dse_bitset_set(DseBitset* bs, u32 bit) {
+ bs->words[bit / 64u] |= 1ull << (bit % 64u);
+}
+
+static void dse_bitset_reset(DseBitset* bs, u32 bit) {
+ bs->words[bit / 64u] &= ~(1ull << (bit % 64u));
+}
+
+static int dse_live_overlaps(DseCtx* ctx, const DseBitset* live,
+ const DseKey* key) {
+ for (u32 i = 0; i < ctx->nkeys; ++i) {
+ if (!dse_bitset_test(live, i)) continue;
+ if (dse_key_same_root(&ctx->keys[i], key) &&
+ dse_ranges_overlap(&ctx->keys[i], key))
+ return 1;
+ }
+ return 0;
+}
+
+static void dse_live_mark_overlaps(DseCtx* ctx, DseBitset* live,
+ const DseKey* key) {
+ for (u32 i = 0; i < ctx->nkeys; ++i) {
+ if (dse_key_same_root(&ctx->keys[i], key) &&
+ dse_ranges_overlap(&ctx->keys[i], key))
+ dse_bitset_set(live, i);
+ }
+}
+
+static void dse_live_clear_covered(DseCtx* ctx, DseBitset* live,
+ const DseKey* key) {
+ for (u32 i = 0; i < ctx->nkeys; ++i) {
+ if (dse_key_same_root(&ctx->keys[i], key) &&
+ dse_range_covers(key, &ctx->keys[i]))
+ dse_bitset_reset(live, i);
+ }
+}
+
+static void dse_live_mark_all(DseCtx* ctx, DseBitset* live) {
+ for (u32 i = 0; i < ctx->nkeys; ++i) dse_bitset_set(live, i);
+}
+
+static void dse_live_mark_call_clobbered(DseCtx* ctx, DseBitset* live) {
+ for (u32 i = 0; i < ctx->nkeys; ++i)
+ if (dse_root_call_clobbered(ctx, &ctx->keys[i])) dse_bitset_set(live, i);
+}
+
+static void dse_live_mark_exit_observable(DseCtx* ctx, DseBitset* live) {
+ for (u32 i = 0; i < ctx->nkeys; ++i)
+ if (dse_root_observable_at_exit(ctx, &ctx->keys[i]))
+ dse_bitset_set(live, i);
+}
+
+static int dse_store_site_key(DseCtx* ctx, u32 block, u32 inst, u32* out) {
+ if (!ctx->store_key_by_inst || block >= ctx->f->nblocks ||
+ inst >= ctx->f->blocks[block].ninsts || !ctx->store_key_by_inst[block])
+ return 0;
+ *out = ctx->store_key_by_inst[block][inst];
+ if (*out != DSE_KEY_NONE) return 1;
+ return 0;
+}
+
+static int dse_inst_full_barrier(const Inst* in) {
+ return gvn_inst_memory_barrier(in);
+}
+
+static void dse_transfer_inst(DseCtx* ctx, DseBitset* live, u32 block, u32 inst,
+ int mark_dead) {
+ Inst* in = &ctx->f->blocks[block].insts[inst];
+ DseKey key;
+ u32 key_id;
+
+ switch ((IROp)in->op) {
+ case IR_LOAD:
+ if (opt_mem_observable(&in->extra.mem)) {
+ dse_live_mark_all(ctx, live);
+ } else if (dse_make_key(ctx, in, 1u, &key)) {
+ dse_live_mark_overlaps(ctx, live, &key);
+ } else {
+ dse_live_mark_all(ctx, live);
+ }
+ break;
+ case IR_STORE:
+ if (!dse_store_site_key(ctx, block, inst, &key_id)) {
+ dse_live_mark_all(ctx, live);
+ break;
+ }
+ key = ctx->keys[key_id];
+ if (mark_dead && !dse_live_overlaps(ctx, live, &key)) {
+ in->op = IR_NOP;
+ in->nopnds = 0;
+ in->opnds = NULL;
+ ctx->changed = 1;
+ }
+ dse_live_clear_covered(ctx, live, &key);
+ break;
+ case IR_CALL:
+ dse_live_mark_call_clobbered(ctx, live);
+ break;
+ default:
+ if (dse_inst_full_barrier(in)) dse_live_mark_all(ctx, live);
+ break;
+ }
+}
+
+static void dse_collect_constants(GvnCtx* ctx) {
+ for (u32 b = 0; b < ctx->f->nblocks; ++b) {
+ Block* bl = &ctx->f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) gvn_note_const(ctx, &bl->insts[i]);
+ }
+}
+
+static void dse_collect_stores(DseCtx* ctx) {
+ Func* f = ctx->f;
+ ctx->store_key_by_inst =
+ arena_zarray(f->arena, u32*, f->nblocks ? f->nblocks : 1u);
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ ctx->store_key_by_inst[b] =
+ arena_array(f->arena, u32, bl->ninsts ? bl->ninsts : 1u);
+ for (u32 i = 0; i < bl->ninsts; ++i)
+ ctx->store_key_by_inst[b][i] = DSE_KEY_NONE;
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ DseKey key;
+ if ((IROp)in->op != IR_STORE) continue;
+ if (!dse_make_key(ctx, in, 0u, &key)) continue;
+ ctx->store_key_by_inst[b][i] = dse_key_intern(ctx, &key);
+ }
+ }
+}
+
+static DseBitset dse_bitset_new(DseCtx* ctx) {
+ DseBitset bs;
+ bs.words = arena_zarray(ctx->f->arena, u64, ctx->words ? ctx->words : 1u);
+ return bs;
+}
+
+static void dse_init_block_sets(DseCtx* ctx) {
+ Func* f = ctx->f;
+ ctx->words = (ctx->nkeys + 63u) / 64u;
+ if (!ctx->words) ctx->words = 1u;
+ ctx->block =
+ arena_zarray(f->arena, DseBlockState, f->nblocks ? f->nblocks : 1u);
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ ctx->block[b].in = dse_bitset_new(ctx);
+ ctx->block[b].out = dse_bitset_new(ctx);
+ }
+ ctx->scratch_in = dse_bitset_new(ctx);
+ ctx->scratch_out = dse_bitset_new(ctx);
+}
+
+static void dse_compute_block_out(DseCtx* ctx, u32 b, DseBitset* out) {
+ Func* f = ctx->f;
+ Block* bl = &f->blocks[b];
+ dse_bitset_clear(ctx, out);
+ if (!bl->nsucc) {
+ dse_live_mark_exit_observable(ctx, out);
+ return;
+ }
+ for (u32 s = 0; s < bl->nsucc; ++s) {
+ u32 succ = bl->succ[s];
+ if (succ < f->nblocks) dse_bitset_or(ctx, out, &ctx->block[succ].in);
+ }
+}
+
+static void dse_transfer_block(DseCtx* ctx, u32 b, const DseBitset* out,
+ DseBitset* in, int mark_dead) {
+ Block* bl = &ctx->f->blocks[b];
+ dse_bitset_copy(ctx, in, out);
+ for (u32 ri = bl->ninsts; ri > 0; --ri)
+ dse_transfer_inst(ctx, in, b, ri - 1u, mark_dead);
+}
+
+static void dse_refresh_def_locations(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if (in->def != VAL_NONE && in->def < f->nvals) {
+ f->val_def_block[in->def] = b;
+ f->val_def_inst[in->def] = i;
+ }
+ for (u32 d = 0; d < in->ndefs; ++d) {
+ Val v = in->defs[d];
+ if (v != VAL_NONE && v < f->nvals) {
+ f->val_def_block[v] = b;
+ f->val_def_inst[v] = i;
+ }
+ }
+ }
+ }
+}
+
+static void dse_compact_nops(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ u32 w = 0;
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ if ((IROp)bl->insts[i].op == IR_NOP) continue;
+ bl->insts[w++] = bl->insts[i];
+ }
+ bl->ninsts = w;
+ }
+ dse_refresh_def_locations(f);
+}
+
void opt_dse(Func* f) {
- if (f) opt_rebuild_def_use(f);
+ if (!f || f->opt_rewritten) return;
+ if (!f->opt_reg_ssa && f->npregs > 1) {
+ opt_rebuild_def_use(f);
+ return;
+ }
+
+ opt_rebuild_def_use(f);
+ DseCtx ctx;
+ memset(&ctx, 0, sizeof ctx);
+ ctx.f = f;
+ ctx.local_escaped =
+ arena_zarray(f->arena, u8, f->nframe_slots ? f->nframe_slots + 1u : 1u);
+
+ memset(&ctx.gvn, 0, sizeof ctx.gvn);
+ ctx.gvn.f = f;
+ ctx.gvn.parent = arena_array(f->arena, Val, f->nvals ? f->nvals : 1u);
+ ctx.gvn.constants =
+ arena_zarray(f->arena, GvnConst, f->nvals ? f->nvals : 1u);
+ ctx.gvn.local_escaped = ctx.local_escaped;
+ for (Val v = 0; v < f->nvals; ++v) ctx.gvn.parent[v] = v;
+ gvn_collect_local_escapes(&ctx.gvn);
+ dse_collect_constants(&ctx.gvn);
+ dse_collect_stores(&ctx);
+ if (!ctx.nkeys) {
+ opt_rebuild_def_use(f);
+ return;
+ }
+
+ dse_init_block_sets(&ctx);
+ int changed = 1;
+ while (changed) {
+ changed = 0;
+ for (u32 rb = f->nblocks; rb > 0; --rb) {
+ u32 b = rb - 1u;
+ dse_compute_block_out(&ctx, b, &ctx.scratch_out);
+ dse_transfer_block(&ctx, b, &ctx.scratch_out, &ctx.scratch_in, 0);
+ changed |= dse_bitset_copy(&ctx, &ctx.block[b].out, &ctx.scratch_out);
+ changed |= dse_bitset_copy(&ctx, &ctx.block[b].in, &ctx.scratch_in);
+ }
+ }
+
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ dse_transfer_block(&ctx, b, &ctx.block[b].out, &ctx.scratch_in, 1);
+ }
+
+ if (ctx.changed) {
+ dse_compact_nops(f);
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ }
+ opt_rebuild_def_use(f);
}
void opt_cleanup(Func* f) {
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -3205,6 +3205,121 @@ static void opt_gvn_preserves_observable_memory_loads(void) {
tc_fini(&tc);
}
+static void opt_dse_removes_overwritten_local_store(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ Val first = add_val(f, tc.i32);
+ Val second = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, first, tc.i32);
+ emit_scalar_input(f, f->entry, second, tc.i32);
+ emit_store_local(f, f->entry, fs, first, tc.i32, 0);
+ emit_store_local(f, f->entry, fs, second, tc.i32, 0);
+ emit_load_local(f, f->entry, out, fs, tc.i32, 0);
+ emit_ret_val(f, f->entry, out, tc.i32);
+
+ opt_build_cfg(f);
+ opt_dse(f);
+ opt_verify(f, "test-dse-overwritten-local-store");
+ EXPECT(count_op(f, IR_STORE) == 1,
+ "DSE should remove an exactly overwritten local store");
+ tc_fini(&tc);
+}
+
+static void opt_dse_removes_store_overwritten_on_all_paths(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ u32 t = ir_block_new(f), e = ir_block_new(f), j = ir_block_new(f);
+ ir_note_emit(f, t);
+ ir_note_emit(f, e);
+ ir_note_emit(f, j);
+ Val cond = add_val(f, tc.i32);
+ Val init = add_val(f, tc.i32);
+ Val tv = add_val(f, tc.i32);
+ Val ev = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, cond, tc.i32);
+ emit_scalar_input(f, f->entry, init, tc.i32);
+ emit_scalar_input(f, f->entry, tv, tc.i32);
+ emit_scalar_input(f, f->entry, ev, tc.i32);
+ emit_store_local(f, f->entry, fs, init, tc.i32, 0);
+ emit_cond_branch(f, f->entry, cond, t, e, tc.i32);
+ emit_store_local(f, t, fs, tv, tc.i32, 0);
+ emit_br_to(f, t, j);
+ emit_store_local(f, e, fs, ev, tc.i32, 0);
+ emit_br_to(f, e, j);
+ emit_load_local(f, j, out, fs, tc.i32, 0);
+ emit_ret_val(f, j, out, tc.i32);
+
+ opt_build_cfg(f);
+ opt_dse(f);
+ opt_verify(f, "test-dse-overwritten-all-paths");
+ EXPECT(count_op(f, IR_STORE) == 2,
+ "DSE should remove stores overwritten on every successor path");
+ tc_fini(&tc);
+}
+
+static void opt_dse_removes_unread_nonescaped_local_store(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ Val src = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, src, tc.i32);
+ emit_store_local(f, f->entry, fs, src, tc.i32, 0);
+ emit_ret_val(f, f->entry, src, tc.i32);
+
+ opt_build_cfg(f);
+ opt_dse(f);
+ opt_verify(f, "test-dse-unread-nonescaped-local-store");
+ EXPECT(count_op(f, IR_STORE) == 0,
+ "DSE should remove unread stores to non-escaped locals");
+ tc_fini(&tc);
+}
+
+static void opt_dse_preserves_escaped_local_across_call(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0);
+ Val src = add_val(f, tc.i32);
+ Val addr = add_val(f, ptr_ty);
+ emit_scalar_input(f, f->entry, src, tc.i32);
+ emit_store_local(f, f->entry, fs, src, tc.i32, 0);
+ emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32);
+ emit_call_one_arg(&tc, f, f->entry, op_reg_(addr, ptr_ty), ptr_ty);
+ emit_ret_val(f, f->entry, src, tc.i32);
+
+ opt_build_cfg(f);
+ opt_dse(f);
+ opt_verify(f, "test-dse-escaped-local-call");
+ EXPECT(count_op(f, IR_STORE) == 1,
+ "DSE should preserve stores visible to a call through escaped locals");
+ tc_fini(&tc);
+}
+
+static void opt_dse_preserves_volatile_store(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ Val src = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, src, tc.i32);
+ emit_store_local(f, f->entry, fs, src, tc.i32, MF_VOLATILE);
+ emit_ret_val(f, f->entry, src, tc.i32);
+
+ opt_build_cfg(f);
+ opt_dse(f);
+ opt_verify(f, "test-dse-volatile-store");
+ EXPECT(count_op(f, IR_STORE) == 1, "DSE should preserve volatile stores");
+ tc_fini(&tc);
+}
+
static void opt_jump_cleanup_forwards_branch_targets(void) {
TestCtx tc;
tc_init(&tc);
@@ -5581,6 +5696,11 @@ int main(void) {
opt_gvn_preserves_nonescaped_local_across_call();
opt_gvn_clobbers_escaped_local_across_call();
opt_gvn_preserves_observable_memory_loads();
+ opt_dse_removes_overwritten_local_store();
+ opt_dse_removes_store_overwritten_on_all_paths();
+ opt_dse_removes_unread_nonescaped_local_store();
+ opt_dse_preserves_escaped_local_across_call();
+ opt_dse_preserves_volatile_store();
opt_jump_cleanup_forwards_branch_targets();
opt_jump_cleanup_inverts_to_remove_jump_block();
opt_jump_cleanup_keeps_conditional_fallthrough_block();