commit c9c0ce1781de35f3fe8b8c62610ca60e2f7eb097
parent 7e162375ecff7191eac6b17cecfed4ce7a56bcc5
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 17:25:12 -0700
opt: add path-aware memory GVN
Diffstat:
4 files changed, 464 insertions(+), 32 deletions(-)
diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c
@@ -83,13 +83,27 @@ typedef struct GvnMemVersion {
u32 version;
} GvnMemVersion;
+typedef struct GvnMemAvail {
+ GvnKey key;
+ Val val;
+} GvnMemAvail;
+
typedef struct GvnMemState {
GvnMemVersion* roots;
u32 nroots;
u32 cap;
+ GvnMemAvail* avail;
+ u32 navail;
+ u32 cap_avail;
u32 unknown_version;
} GvnMemState;
+typedef struct GvnBlockMemState {
+ GvnMemState in;
+ GvnMemState out;
+ u8 out_valid;
+} GvnBlockMemState;
+
typedef struct GvnCtx {
Func* f;
OptAnalysis* analysis;
@@ -97,6 +111,8 @@ typedef struct GvnCtx {
GvnConst* constants;
GvnTable table;
GvnMemState mem;
+ GvnBlockMemState* block_mem;
+ u8* local_escaped;
int changed;
int cfg_changed;
} GvnCtx;
@@ -1076,8 +1092,104 @@ static GvnMemVersion* gvn_mem_version(GvnCtx* ctx, u8 kind, u16 addr_space,
return r;
}
+static void gvn_mem_state_copy(GvnCtx* ctx, GvnMemState* dst,
+ const GvnMemState* src) {
+ memset(dst, 0, sizeof *dst);
+ dst->unknown_version = src->unknown_version;
+ if (src->nroots) {
+ dst->roots = arena_array(ctx->f->arena, GvnMemVersion, src->nroots);
+ memcpy(dst->roots, src->roots, sizeof(dst->roots[0]) * src->nroots);
+ dst->nroots = src->nroots;
+ dst->cap = src->nroots;
+ }
+ if (src->navail) {
+ dst->avail = arena_array(ctx->f->arena, GvnMemAvail, src->navail);
+ memcpy(dst->avail, src->avail, sizeof(dst->avail[0]) * src->navail);
+ dst->navail = src->navail;
+ dst->cap_avail = src->navail;
+ }
+}
+
+static int gvn_mem_key_tracks_unknown(GvnCtx* ctx, u8 kind, i64 root_id) {
+ if (kind == ALIAS_LOCAL && root_id > 0 && (u32)root_id <= ctx->f->nframe_slots)
+ return ctx->local_escaped && ctx->local_escaped[root_id];
+ return kind != ALIAS_STRING;
+}
+
+static int gvn_mem_call_clobbers_root(GvnCtx* ctx, u8 kind, i64 root_id) {
+ if (kind == ALIAS_LOCAL && root_id > 0 && (u32)root_id <= ctx->f->nframe_slots)
+ return !ctx->local_escaped || ctx->local_escaped[root_id];
+ return kind != ALIAS_STRING;
+}
+
+static void gvn_mem_avail_remove_at(GvnMemState* s, u32 i) {
+ if (i + 1u < s->navail)
+ memmove(&s->avail[i], &s->avail[i + 1u],
+ sizeof(s->avail[0]) * (s->navail - i - 1u));
+ --s->navail;
+}
+
+static void gvn_mem_avail_add(GvnCtx* ctx, const GvnKey* key, Val val) {
+ GvnMemState* s = &ctx->mem;
+ for (u32 i = 0; i < s->navail; ++i) {
+ if (gvn_key_equal(&s->avail[i].key, key)) {
+ s->avail[i].val = val;
+ return;
+ }
+ }
+ if (s->navail == s->cap_avail) {
+ u32 ncap = s->cap_avail ? s->cap_avail * 2u : 16u;
+ GvnMemAvail* avail = arena_array(ctx->f->arena, GvnMemAvail, ncap);
+ if (s->avail) memcpy(avail, s->avail, sizeof(avail[0]) * s->navail);
+ s->avail = avail;
+ s->cap_avail = ncap;
+ }
+ s->avail[s->navail].key = *key;
+ s->avail[s->navail].val = val;
+ ++s->navail;
+}
+
+static int gvn_mem_avail_find(GvnCtx* ctx, const GvnKey* key, u32 b, u32 i,
+ Val* out) {
+ for (u32 a = 0; a < ctx->mem.navail; ++a) {
+ Val v = ctx->mem.avail[a].val;
+ if (!gvn_key_equal(&ctx->mem.avail[a].key, key)) continue;
+ if (v == VAL_NONE || v >= ctx->f->nvals) continue;
+ if (!gvn_leader_dominates_site(ctx->analysis, ctx->f->val_def_block[v],
+ ctx->f->val_def_inst[v], b, i))
+ continue;
+ *out = v;
+ return 1;
+ }
+ return 0;
+}
+
+static void gvn_mem_avail_remove_call_clobbered(GvnCtx* ctx) {
+ GvnMemState* s = &ctx->mem;
+ for (u32 i = 0; i < s->navail;) {
+ GvnMemKey* m = &s->avail[i].key.mem;
+ if (!m->valid || m->root_kind == ALIAS_UNKNOWN ||
+ gvn_mem_call_clobbers_root(ctx, m->root_kind, m->root_id))
+ gvn_mem_avail_remove_at(s, i);
+ else
+ ++i;
+ }
+}
+
static void gvn_mem_barrier(GvnCtx* ctx) {
++ctx->mem.unknown_version;
+ for (u32 i = 0; i < ctx->mem.nroots; ++i) ++ctx->mem.roots[i].version;
+ ctx->mem.navail = 0;
+}
+
+static void gvn_mem_call_barrier(GvnCtx* ctx) {
+ ++ctx->mem.unknown_version;
+ for (u32 i = 0; i < ctx->mem.nroots; ++i) {
+ GvnMemVersion* r = &ctx->mem.roots[i];
+ if (gvn_mem_call_clobbers_root(ctx, r->root_kind, r->root_id))
+ ++r->version;
+ }
+ gvn_mem_avail_remove_call_clobbered(ctx);
}
static void gvn_mem_bump_store(GvnCtx* ctx, u8 kind, u16 addr_space,
@@ -1119,7 +1231,8 @@ static int gvn_make_memory_key(GvnCtx* ctx, const Inst* in, u32 addr_idx,
key->mem.root_kind = root_kind;
key->mem.root_id = root_id;
key->mem.offset = offset;
- key->mem.unknown_version = ctx->mem.unknown_version;
+ if (gvn_mem_key_tracks_unknown(ctx, root_kind, root_id))
+ key->mem.unknown_version = ctx->mem.unknown_version;
if (root_kind != ALIAS_UNKNOWN) {
key->mem.root_version =
gvn_mem_version(ctx, root_kind, mem->addr_space, root_id)->version;
@@ -1346,14 +1459,14 @@ static int gvn_visit_memory_inst(GvnCtx* ctx, u32 b, u32 i, Inst* in) {
ctx->f->val_cls[in->def], &key))
return 1;
Val leader = VAL_NONE;
- if (gvn_table_find(ctx, &key, b, i, &leader)) {
+ if (gvn_mem_avail_find(ctx, &key, b, i, &leader)) {
if (gvn_replace_dominated_uses(ctx, in->def, leader,
ctx->f->val_def_block[leader],
ctx->f->val_def_inst[leader]))
ctx->changed = 1;
return 1;
}
- gvn_table_add(&ctx->table, &key, in->def, b, i);
+ gvn_mem_avail_add(ctx, &key, in->def);
return 1;
}
@@ -1383,7 +1496,12 @@ static int gvn_visit_memory_inst(GvnCtx* ctx, u32 b, u32 i, Inst* in) {
if (!gvn_make_memory_key(ctx, in, 0u, ctx->f->val_type[src],
ctx->f->val_cls[src], &key))
return 1;
- gvn_table_add(&ctx->table, &key, src, b, i);
+ gvn_mem_avail_add(ctx, &key, src);
+ return 1;
+ }
+
+ if ((IROp)in->op == IR_CALL) {
+ gvn_mem_call_barrier(ctx);
return 1;
}
@@ -1421,6 +1539,169 @@ static void gvn_visit_inst(GvnCtx* ctx, u32 b, u32 i) {
gvn_table_add(&ctx->table, &key, in->def, b, i);
}
+static int gvn_raw_const_zero(Func* f, Val v) {
+ Inst* def = NULL;
+ return val_def_inst(f, v, &def) && def && (IROp)def->op == IR_LOAD_IMM &&
+ def->extra.imm == 0;
+}
+
+static int gvn_raw_operand_zero(Func* f, const Operand* op) {
+ if (!op) return 0;
+ if (op->kind == OPK_IMM) return op->v.imm == 0;
+ if (op->kind == OPK_REG) return gvn_raw_const_zero(f, (Val)op->v.reg);
+ return 0;
+}
+
+static int gvn_raw_local_addr_root(Func* f, Val v, u32 depth,
+ FrameSlot* out) {
+ Inst* def = NULL;
+ if (!f || depth > 4u || v == VAL_NONE) return 0;
+ if (!val_def_inst(f, v, &def) || !def) return 0;
+ if ((IROp)def->op == IR_ADDR_OF && def->nopnds >= 2 &&
+ def->opnds[1].kind == OPK_LOCAL) {
+ *out = def->opnds[1].v.frame_slot;
+ return 1;
+ }
+ if ((IROp)def->op == IR_COPY && def->nopnds >= 2 &&
+ def->opnds[1].kind == OPK_REG)
+ return gvn_raw_local_addr_root(f, (Val)def->opnds[1].v.reg, depth + 1u,
+ out);
+ if ((IROp)def->op == IR_BINOP && def->nopnds >= 3) {
+ if ((BinOp)def->extra.imm == BO_IADD) {
+ if (def->opnds[1].kind == OPK_REG &&
+ gvn_raw_operand_zero(f, &def->opnds[2]))
+ return gvn_raw_local_addr_root(f, (Val)def->opnds[1].v.reg,
+ depth + 1u, out);
+ if (def->opnds[2].kind == OPK_REG &&
+ gvn_raw_operand_zero(f, &def->opnds[1]))
+ return gvn_raw_local_addr_root(f, (Val)def->opnds[2].v.reg,
+ depth + 1u, out);
+ }
+ if ((BinOp)def->extra.imm == BO_ISUB && def->opnds[1].kind == OPK_REG &&
+ gvn_raw_operand_zero(f, &def->opnds[2]))
+ return gvn_raw_local_addr_root(f, (Val)def->opnds[1].v.reg,
+ depth + 1u, out);
+ }
+ return 0;
+}
+
+static void gvn_mark_escaped_operand(GvnCtx* ctx, const Operand* op) {
+ FrameSlot fs = FRAME_SLOT_NONE;
+ if (!op) return;
+ if (op->kind == OPK_REG) {
+ if (gvn_raw_local_addr_root(ctx->f, (Val)op->v.reg, 0, &fs) && fs > 0 &&
+ (u32)fs <= ctx->f->nframe_slots)
+ ctx->local_escaped[fs] = 1;
+ } else if (op->kind == OPK_INDIRECT) {
+ if (gvn_raw_local_addr_root(ctx->f, (Val)op->v.ind.base, 0, &fs) &&
+ fs > 0 && (u32)fs <= ctx->f->nframe_slots)
+ ctx->local_escaped[fs] = 1;
+ }
+}
+
+static void gvn_mark_escaped_abivalue(GvnCtx* ctx, const CGABIValue* v) {
+ if (!v) return;
+ gvn_mark_escaped_operand(ctx, &v->storage);
+ for (u32 i = 0; i < v->nparts; ++i)
+ gvn_mark_escaped_operand(ctx, &v->parts[i].op);
+}
+
+static void gvn_collect_local_escapes(GvnCtx* ctx) {
+ Func* f = ctx->f;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ switch ((IROp)in->op) {
+ case IR_LOAD:
+ case IR_ADDR_OF:
+ case IR_COPY:
+ break;
+ case IR_STORE:
+ if (in->nopnds >= 2) gvn_mark_escaped_operand(ctx, &in->opnds[1]);
+ break;
+ case IR_BINOP:
+ if (in->nopnds >= 3 && (BinOp)in->extra.imm == BO_IADD &&
+ (gvn_raw_operand_zero(f, &in->opnds[1]) ||
+ gvn_raw_operand_zero(f, &in->opnds[2])))
+ break;
+ if (in->nopnds >= 3 && (BinOp)in->extra.imm == BO_ISUB &&
+ gvn_raw_operand_zero(f, &in->opnds[2]))
+ break;
+ for (u32 o = 1; o < in->nopnds; ++o)
+ gvn_mark_escaped_operand(ctx, &in->opnds[o]);
+ break;
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) break;
+ if (aux->use_plan_replay) {
+ gvn_mark_escaped_operand(ctx, &aux->plan.callee);
+ for (u32 a = 0; a < aux->plan.nargs; ++a)
+ gvn_mark_escaped_operand(ctx, &aux->plan.args[a].src);
+ } else {
+ gvn_mark_escaped_operand(ctx, &aux->desc.callee);
+ for (u32 a = 0; a < aux->desc.nargs; ++a)
+ gvn_mark_escaped_abivalue(ctx, &aux->desc.args[a]);
+ }
+ break;
+ }
+ case IR_RET: {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ if (aux && aux->present) gvn_mark_escaped_abivalue(ctx, &aux->val);
+ break;
+ }
+ default:
+ if (gvn_inst_memory_barrier(in))
+ for (u32 o = 0; o < in->nopnds; ++o)
+ gvn_mark_escaped_operand(ctx, &in->opnds[o]);
+ break;
+ }
+ }
+ }
+}
+
+static int gvn_mem_state_has_avail(const GvnMemState* s, const GvnKey* key,
+ Val val) {
+ for (u32 i = 0; i < s->navail; ++i)
+ if (s->avail[i].val == val && gvn_key_equal(&s->avail[i].key, key))
+ return 1;
+ return 0;
+}
+
+static void gvn_mem_merge_block_in(GvnCtx* ctx, u32 b) {
+ Func* f = ctx->f;
+ Block* bl = &f->blocks[b];
+ GvnBlockMemState* bs = &ctx->block_mem[b];
+ memset(&bs->in, 0, sizeof bs->in);
+ if (!bl->npreds) return;
+ const GvnMemState* first = NULL;
+ for (u32 p = 0; p < bl->npreds; ++p) {
+ u32 pred = bl->preds[p];
+ if (pred >= f->nblocks || !ctx->analysis->reachable[pred]) continue;
+ if (!ctx->block_mem[pred].out_valid) return;
+ if (!first) first = &ctx->block_mem[pred].out;
+ }
+ if (!first) return;
+ gvn_mem_state_copy(ctx, &bs->in, first);
+ for (u32 i = 0; i < bs->in.navail;) {
+ GvnMemAvail* a = &bs->in.avail[i];
+ int keep = 1;
+ for (u32 p = 0; p < bl->npreds; ++p) {
+ u32 pred = bl->preds[p];
+ if (pred >= f->nblocks || !ctx->analysis->reachable[pred]) continue;
+ if (!gvn_mem_state_has_avail(&ctx->block_mem[pred].out, &a->key,
+ a->val)) {
+ keep = 0;
+ break;
+ }
+ }
+ if (keep)
+ ++i;
+ else
+ gvn_mem_avail_remove_at(&bs->in, i);
+ }
+}
+
static void gvn_realign_phi_preds(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
@@ -1471,13 +1752,22 @@ void opt_gvn(Func* f) {
ctx.analysis = &a;
ctx.parent = arena_array(f->arena, Val, f->nvals ? f->nvals : 1u);
ctx.constants = arena_zarray(f->arena, GvnConst, f->nvals ? f->nvals : 1u);
+ ctx.block_mem =
+ arena_zarray(f->arena, GvnBlockMemState, f->nblocks ? f->nblocks : 1u);
+ ctx.local_escaped =
+ arena_zarray(f->arena, u8, f->nframe_slots ? f->nframe_slots + 1u : 1u);
for (Val v = 0; v < f->nvals; ++v) ctx.parent[v] = v;
gvn_table_init(&ctx.table, f);
+ gvn_collect_local_escapes(&ctx);
for (u32 ri = 0; ri < a.nrpo; ++ri) {
u32 b = a.rpo[ri];
if (b >= f->nblocks || !a.reachable[b]) continue;
+ gvn_mem_merge_block_in(&ctx, b);
+ gvn_mem_state_copy(&ctx, &ctx.mem, &ctx.block_mem[b].in);
for (u32 i = 0; i < f->blocks[b].ninsts; ++i) gvn_visit_inst(&ctx, b, i);
+ gvn_mem_state_copy(&ctx, &ctx.block_mem[b].out, &ctx.mem);
+ ctx.block_mem[b].out_valid = 1;
}
if (ctx.cfg_changed) {
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -477,6 +477,19 @@ static void emit_test_branch(Func* f, u32 b, u32 taken, u32 fallthrough,
f->blocks[b].nsucc = 2;
}
+static void emit_cond_branch(Func* f, u32 b, Val cond, u32 taken,
+ u32 fallthrough, CfreeCgTypeId ty) {
+ Inst* in = ir_emit(f, b, IR_CMP_BRANCH);
+ in->opnds = arena_array(f->arena, Operand, 2);
+ in->opnds[0] = op_reg_(cond, ty);
+ in->opnds[1] = op_imm_(0, ty);
+ in->nopnds = 2;
+ in->extra.imm = CMP_NE;
+ f->blocks[b].succ[0] = taken;
+ f->blocks[b].succ[1] = fallthrough;
+ f->blocks[b].nsucc = 2;
+}
+
static int bytes_contains(const unsigned char* data, size_t len,
const char* needle) {
size_t nlen = strlen(needle);
@@ -1123,6 +1136,21 @@ static CGCallDesc make_call_desc(TestCtx* tc, CfreeCgTypeId fn_ty,
return d;
}
+static Inst* emit_call_one_arg(TestCtx* tc, Func* f, u32 b, Operand arg,
+ CfreeCgTypeId arg_ty) {
+ Inst* in = ir_emit(f, b, IR_CALL);
+ IRCallAux* aux = arena_znew(f->arena, IRCallAux);
+ CfreeCgTypeId params[1] = {arg_ty};
+ CfreeCgTypeId fn_ty = make_func_type(tc, CFREE_CG_TYPE_NONE, params, 1, 0);
+ const ABIFuncInfo* abi = abi_cg_func_info(tc->c->abi, fn_ty);
+ CGABIValue* args = arena_zarray(f->arena, CGABIValue, 1);
+ args[0] = call_arg_value(arg_ty, abi ? &abi->params[0] : NULL, arg);
+ aux->desc = make_call_desc(tc, fn_ty, args, 1, (CGABIValue){0});
+ aux->desc.callee = op_global_(OBJ_SYM_NONE, 0, fn_ty);
+ in->extra.aux = aux;
+ return in;
+}
+
static void expect_plan_arg(const CGCallPlan* p, u32 i, u8 dst_kind, u8 cls,
Reg reg, u32 stack_offset, const char* ctx) {
EXPECT(i < p->nargs, "%s missing arg %u", ctx, (unsigned)i);
@@ -2793,6 +2821,92 @@ static void opt_gvn_reuses_store_to_addr_of_zero_index_load(void) {
tc_fini(&tc);
}
+static void opt_gvn_reuses_joined_same_value_store(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ u32 t = ir_block_new(f), e = ir_block_new(f), j = ir_block_new(f);
+ ir_note_emit(f, t);
+ ir_note_emit(f, e);
+ ir_note_emit(f, j);
+ Val c = add_val(f, tc.i32), src = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, c, tc.i32);
+ emit_scalar_input(f, f->entry, src, tc.i32);
+ emit_cond_branch(f, f->entry, c, t, e, tc.i32);
+ emit_store_local(f, t, fs, src, tc.i32, 0);
+ emit_br_to(f, t, j);
+ emit_store_local(f, e, fs, src, tc.i32, 0);
+ emit_br_to(f, e, j);
+ emit_load_local(f, j, out, fs, tc.i32, 0);
+ emit_ret_val(f, j, out, tc.i32);
+ opt_build_cfg(f);
+ opt_gvn(f);
+ opt_verify(f, "test-gvn-memory-join-same-store");
+ EXPECT(ret_val(f, j) == src,
+ "memory GVN should reuse joined stores only when all preds agree");
+ tc_fini(&tc);
+}
+
+static void opt_gvn_preserves_joined_different_or_missing_store(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ u32 t = ir_block_new(f), e = ir_block_new(f), j = ir_block_new(f);
+ ir_note_emit(f, t);
+ ir_note_emit(f, e);
+ ir_note_emit(f, j);
+ Val c = add_val(f, tc.i32), a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32), out = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, c, tc.i32);
+ emit_scalar_input(f, f->entry, a, tc.i32);
+ emit_scalar_input(f, f->entry, b, tc.i32);
+ emit_cond_branch(f, f->entry, c, t, e, tc.i32);
+ emit_store_local(f, t, fs, a, tc.i32, 0);
+ emit_br_to(f, t, j);
+ emit_store_local(f, e, fs, b, tc.i32, 0);
+ emit_br_to(f, e, j);
+ emit_load_local(f, j, out, fs, tc.i32, 0);
+ emit_ret_val(f, j, out, tc.i32);
+ opt_build_cfg(f);
+ opt_gvn(f);
+ opt_verify(f, "test-gvn-memory-join-different-store");
+ EXPECT(ret_val(f, j) == out,
+ "memory GVN should preserve joined load when preds disagree");
+ tc_fini(&tc);
+}
+
+static void opt_gvn_preserves_loop_header_load(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ u32 h = ir_block_new(f), body = ir_block_new(f), ex = ir_block_new(f);
+ ir_note_emit(f, h);
+ ir_note_emit(f, body);
+ ir_note_emit(f, ex);
+ Val c = add_val(f, tc.i32), init = add_val(f, tc.i32);
+ Val next = add_val(f, tc.i32), cur = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, c, tc.i32);
+ emit_scalar_input(f, f->entry, init, tc.i32);
+ emit_scalar_input(f, f->entry, next, tc.i32);
+ emit_store_local(f, f->entry, fs, init, tc.i32, 0);
+ emit_br_to(f, f->entry, h);
+ emit_load_local(f, h, cur, fs, tc.i32, 0);
+ emit_cond_branch(f, h, c, body, ex, tc.i32);
+ emit_store_local(f, body, fs, next, tc.i32, 0);
+ emit_br_to(f, body, h);
+ emit_ret_val(f, ex, cur, tc.i32);
+ opt_build_cfg(f);
+ opt_gvn(f);
+ opt_verify(f, "test-gvn-memory-loop-header");
+ EXPECT(ret_val(f, ex) == cur,
+ "memory GVN should not reuse preheader availability at loop headers");
+ tc_fini(&tc);
+}
+
static void opt_gvn_preserves_load_across_unknown_store(void) {
TestCtx tc;
tc_init(&tc);
@@ -2831,33 +2945,47 @@ static void opt_gvn_preserves_load_across_unknown_store(void) {
tc_fini(&tc);
}
-static void opt_gvn_preserves_load_across_call(void) {
+static void opt_gvn_preserves_nonescaped_local_across_call(void) {
TestCtx tc;
tc_init(&tc);
Func* f = new_func(&tc);
FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
- Val first = add_val(f, tc.i32);
- Val second = add_val(f, tc.i32);
- emit_load_local(f, f->entry, first, fs, tc.i32, 0);
+ Val src = add_val(f, tc.i32);
+ Val loaded = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, src, tc.i32);
+ emit_store_local(f, f->entry, fs, src, tc.i32, 0);
emit_call_void(f, f->entry);
- emit_load_local(f, f->entry, second, fs, tc.i32, 0);
- emit_ret_val(f, f->entry, second, tc.i32);
+ emit_load_local(f, f->entry, loaded, fs, tc.i32, 0);
+ emit_ret_val(f, f->entry, loaded, tc.i32);
opt_build_cfg(f);
opt_gvn(f);
- opt_verify(f, "test-gvn-memory-call-barrier");
- EXPECT(ret_val(f, f->entry) == second,
- "memory GVN should not rewrite across calls");
- expect_ir_dump_eq(f,
- "ir blocks=1 vals=3\n"
- "block 0 preds=[] succs=[] insts=4\n"
- " 0 load def=v1 opnds=[v1,local#1] mem=size4 align4 "
- "flags=0x0 alias=local#1\n"
- " 1 call\n"
- " 2 load def=v2 opnds=[v2,local#1] mem=size4 align4 "
- "flags=0x0 alias=local#1\n"
- " 3 ret ret=v2\n",
- "memory GVN call barrier");
+ opt_verify(f, "test-gvn-memory-call-preserves-nonescaped-local");
+ EXPECT(ret_val(f, f->entry) == src,
+ "memory GVN should preserve non-escaped locals across calls");
+ tc_fini(&tc);
+}
+
+static void opt_gvn_clobbers_escaped_local_across_call(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0);
+ Val src = add_val(f, tc.i32);
+ Val addr = add_val(f, ptr_ty);
+ Val loaded = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, src, tc.i32);
+ emit_store_local(f, f->entry, fs, src, tc.i32, 0);
+ emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32);
+ emit_call_one_arg(&tc, f, f->entry, op_reg_(addr, ptr_ty), ptr_ty);
+ emit_load_local(f, f->entry, loaded, fs, tc.i32, 0);
+ emit_ret_val(f, f->entry, loaded, tc.i32);
+ opt_build_cfg(f);
+ opt_gvn(f);
+ opt_verify(f, "test-gvn-memory-call-clobbers-escaped-local");
+ EXPECT(ret_val(f, f->entry) == loaded,
+ "memory GVN should clobber locals whose address reaches a call");
tc_fini(&tc);
}
@@ -5270,8 +5398,12 @@ int main(void) {
opt_gvn_rewrites_redundant_local_load();
opt_gvn_reuses_store_to_local_load();
opt_gvn_reuses_store_to_addr_of_zero_index_load();
+ opt_gvn_reuses_joined_same_value_store();
+ opt_gvn_preserves_joined_different_or_missing_store();
+ opt_gvn_preserves_loop_header_load();
opt_gvn_preserves_load_across_unknown_store();
- opt_gvn_preserves_load_across_call();
+ opt_gvn_preserves_nonescaped_local_across_call();
+ opt_gvn_clobbers_escaped_local_across_call();
opt_gvn_preserves_observable_memory_loads();
opt_jump_cleanup_forwards_branch_targets();
opt_jump_cleanup_inverts_to_remove_jump_block();
diff --git a/test/toy/cases/128_o2_branch_join_addr_mem.expected b/test/toy/cases/128_o2_branch_join_addr_mem.expected
@@ -1 +1 @@
-52
+0
diff --git a/test/toy/cases/128_o2_branch_join_addr_mem.toy b/test/toy/cases/128_o2_branch_join_addr_mem.toy
@@ -1,16 +1,26 @@
-fn choose(flag: i64): i64 {
- var x: i64 = 5;
+fn same(flag: i64, v: i64): i64 {
+ var x: i64 = 0;
if flag != 0 {
- x = x + 10;
+ x = v;
} else {
- x = x + 20;
+ x = v;
}
- let p: *i64 = &x;
- return p[0] + 6;
+ return (&x).* + 1;
+}
+
+fn different(flag: i64, a: i64, b: i64): i64 {
+ var x: i64 = 0;
+ if flag != 0 {
+ x = a;
+ } else {
+ x = b;
+ }
+ return (&x).*;
}
fn __user_main(): i64 {
- return choose(1) + choose(0);
+ return (same(1, 41) - 42) + (same(0, 41) - 42) +
+ (different(1, 17, 23) - 17) + (different(0, 17, 23) - 23);
}
fn main(): i32 { return __user_main() as i32; }