kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit c9c0ce1781de35f3fe8b8c62610ca60e2f7eb097
parent 7e162375ecff7191eac6b17cecfed4ce7a56bcc5
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 17:25:12 -0700

opt: add path-aware memory GVN

Diffstat:
Msrc/opt/pass_o2.c | 298+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mtest/opt/opt_test.c | 172+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mtest/toy/cases/128_o2_branch_join_addr_mem.expected | 2+-
Mtest/toy/cases/128_o2_branch_join_addr_mem.toy | 24+++++++++++++++++-------
4 files changed, 464 insertions(+), 32 deletions(-)

diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c @@ -83,13 +83,27 @@ typedef struct GvnMemVersion { u32 version; } GvnMemVersion; +typedef struct GvnMemAvail { + GvnKey key; + Val val; +} GvnMemAvail; + typedef struct GvnMemState { GvnMemVersion* roots; u32 nroots; u32 cap; + GvnMemAvail* avail; + u32 navail; + u32 cap_avail; u32 unknown_version; } GvnMemState; +typedef struct GvnBlockMemState { + GvnMemState in; + GvnMemState out; + u8 out_valid; +} GvnBlockMemState; + typedef struct GvnCtx { Func* f; OptAnalysis* analysis; @@ -97,6 +111,8 @@ typedef struct GvnCtx { GvnConst* constants; GvnTable table; GvnMemState mem; + GvnBlockMemState* block_mem; + u8* local_escaped; int changed; int cfg_changed; } GvnCtx; @@ -1076,8 +1092,104 @@ static GvnMemVersion* gvn_mem_version(GvnCtx* ctx, u8 kind, u16 addr_space, return r; } +static void gvn_mem_state_copy(GvnCtx* ctx, GvnMemState* dst, + const GvnMemState* src) { + memset(dst, 0, sizeof *dst); + dst->unknown_version = src->unknown_version; + if (src->nroots) { + dst->roots = arena_array(ctx->f->arena, GvnMemVersion, src->nroots); + memcpy(dst->roots, src->roots, sizeof(dst->roots[0]) * src->nroots); + dst->nroots = src->nroots; + dst->cap = src->nroots; + } + if (src->navail) { + dst->avail = arena_array(ctx->f->arena, GvnMemAvail, src->navail); + memcpy(dst->avail, src->avail, sizeof(dst->avail[0]) * src->navail); + dst->navail = src->navail; + dst->cap_avail = src->navail; + } +} + +static int gvn_mem_key_tracks_unknown(GvnCtx* ctx, u8 kind, i64 root_id) { + if (kind == ALIAS_LOCAL && root_id > 0 && (u32)root_id <= ctx->f->nframe_slots) + return ctx->local_escaped && ctx->local_escaped[root_id]; + return kind != ALIAS_STRING; +} + +static int gvn_mem_call_clobbers_root(GvnCtx* ctx, u8 kind, i64 root_id) { + if (kind == ALIAS_LOCAL && root_id > 0 && (u32)root_id <= ctx->f->nframe_slots) + return !ctx->local_escaped || ctx->local_escaped[root_id]; + return kind != ALIAS_STRING; +} + +static void gvn_mem_avail_remove_at(GvnMemState* s, u32 i) { + if (i + 1u < s->navail) + memmove(&s->avail[i], &s->avail[i + 1u], + sizeof(s->avail[0]) * (s->navail - i - 1u)); + --s->navail; +} + +static void gvn_mem_avail_add(GvnCtx* ctx, const GvnKey* key, Val val) { + GvnMemState* s = &ctx->mem; + for (u32 i = 0; i < s->navail; ++i) { + if (gvn_key_equal(&s->avail[i].key, key)) { + s->avail[i].val = val; + return; + } + } + if (s->navail == s->cap_avail) { + u32 ncap = s->cap_avail ? s->cap_avail * 2u : 16u; + GvnMemAvail* avail = arena_array(ctx->f->arena, GvnMemAvail, ncap); + if (s->avail) memcpy(avail, s->avail, sizeof(avail[0]) * s->navail); + s->avail = avail; + s->cap_avail = ncap; + } + s->avail[s->navail].key = *key; + s->avail[s->navail].val = val; + ++s->navail; +} + +static int gvn_mem_avail_find(GvnCtx* ctx, const GvnKey* key, u32 b, u32 i, + Val* out) { + for (u32 a = 0; a < ctx->mem.navail; ++a) { + Val v = ctx->mem.avail[a].val; + if (!gvn_key_equal(&ctx->mem.avail[a].key, key)) continue; + if (v == VAL_NONE || v >= ctx->f->nvals) continue; + if (!gvn_leader_dominates_site(ctx->analysis, ctx->f->val_def_block[v], + ctx->f->val_def_inst[v], b, i)) + continue; + *out = v; + return 1; + } + return 0; +} + +static void gvn_mem_avail_remove_call_clobbered(GvnCtx* ctx) { + GvnMemState* s = &ctx->mem; + for (u32 i = 0; i < s->navail;) { + GvnMemKey* m = &s->avail[i].key.mem; + if (!m->valid || m->root_kind == ALIAS_UNKNOWN || + gvn_mem_call_clobbers_root(ctx, m->root_kind, m->root_id)) + gvn_mem_avail_remove_at(s, i); + else + ++i; + } +} + static void gvn_mem_barrier(GvnCtx* ctx) { ++ctx->mem.unknown_version; + for (u32 i = 0; i < ctx->mem.nroots; ++i) ++ctx->mem.roots[i].version; + ctx->mem.navail = 0; +} + +static void gvn_mem_call_barrier(GvnCtx* ctx) { + ++ctx->mem.unknown_version; + for (u32 i = 0; i < ctx->mem.nroots; ++i) { + GvnMemVersion* r = &ctx->mem.roots[i]; + if (gvn_mem_call_clobbers_root(ctx, r->root_kind, r->root_id)) + ++r->version; + } + gvn_mem_avail_remove_call_clobbered(ctx); } static void gvn_mem_bump_store(GvnCtx* ctx, u8 kind, u16 addr_space, @@ -1119,7 +1231,8 @@ static int gvn_make_memory_key(GvnCtx* ctx, const Inst* in, u32 addr_idx, key->mem.root_kind = root_kind; key->mem.root_id = root_id; key->mem.offset = offset; - key->mem.unknown_version = ctx->mem.unknown_version; + if (gvn_mem_key_tracks_unknown(ctx, root_kind, root_id)) + key->mem.unknown_version = ctx->mem.unknown_version; if (root_kind != ALIAS_UNKNOWN) { key->mem.root_version = gvn_mem_version(ctx, root_kind, mem->addr_space, root_id)->version; @@ -1346,14 +1459,14 @@ static int gvn_visit_memory_inst(GvnCtx* ctx, u32 b, u32 i, Inst* in) { ctx->f->val_cls[in->def], &key)) return 1; Val leader = VAL_NONE; - if (gvn_table_find(ctx, &key, b, i, &leader)) { + if (gvn_mem_avail_find(ctx, &key, b, i, &leader)) { if (gvn_replace_dominated_uses(ctx, in->def, leader, ctx->f->val_def_block[leader], ctx->f->val_def_inst[leader])) ctx->changed = 1; return 1; } - gvn_table_add(&ctx->table, &key, in->def, b, i); + gvn_mem_avail_add(ctx, &key, in->def); return 1; } @@ -1383,7 +1496,12 @@ static int gvn_visit_memory_inst(GvnCtx* ctx, u32 b, u32 i, Inst* in) { if (!gvn_make_memory_key(ctx, in, 0u, ctx->f->val_type[src], ctx->f->val_cls[src], &key)) return 1; - gvn_table_add(&ctx->table, &key, src, b, i); + gvn_mem_avail_add(ctx, &key, src); + return 1; + } + + if ((IROp)in->op == IR_CALL) { + gvn_mem_call_barrier(ctx); return 1; } @@ -1421,6 +1539,169 @@ static void gvn_visit_inst(GvnCtx* ctx, u32 b, u32 i) { gvn_table_add(&ctx->table, &key, in->def, b, i); } +static int gvn_raw_const_zero(Func* f, Val v) { + Inst* def = NULL; + return val_def_inst(f, v, &def) && def && (IROp)def->op == IR_LOAD_IMM && + def->extra.imm == 0; +} + +static int gvn_raw_operand_zero(Func* f, const Operand* op) { + if (!op) return 0; + if (op->kind == OPK_IMM) return op->v.imm == 0; + if (op->kind == OPK_REG) return gvn_raw_const_zero(f, (Val)op->v.reg); + return 0; +} + +static int gvn_raw_local_addr_root(Func* f, Val v, u32 depth, + FrameSlot* out) { + Inst* def = NULL; + if (!f || depth > 4u || v == VAL_NONE) return 0; + if (!val_def_inst(f, v, &def) || !def) return 0; + if ((IROp)def->op == IR_ADDR_OF && def->nopnds >= 2 && + def->opnds[1].kind == OPK_LOCAL) { + *out = def->opnds[1].v.frame_slot; + return 1; + } + if ((IROp)def->op == IR_COPY && def->nopnds >= 2 && + def->opnds[1].kind == OPK_REG) + return gvn_raw_local_addr_root(f, (Val)def->opnds[1].v.reg, depth + 1u, + out); + if ((IROp)def->op == IR_BINOP && def->nopnds >= 3) { + if ((BinOp)def->extra.imm == BO_IADD) { + if (def->opnds[1].kind == OPK_REG && + gvn_raw_operand_zero(f, &def->opnds[2])) + return gvn_raw_local_addr_root(f, (Val)def->opnds[1].v.reg, + depth + 1u, out); + if (def->opnds[2].kind == OPK_REG && + gvn_raw_operand_zero(f, &def->opnds[1])) + return gvn_raw_local_addr_root(f, (Val)def->opnds[2].v.reg, + depth + 1u, out); + } + if ((BinOp)def->extra.imm == BO_ISUB && def->opnds[1].kind == OPK_REG && + gvn_raw_operand_zero(f, &def->opnds[2])) + return gvn_raw_local_addr_root(f, (Val)def->opnds[1].v.reg, + depth + 1u, out); + } + return 0; +} + +static void gvn_mark_escaped_operand(GvnCtx* ctx, const Operand* op) { + FrameSlot fs = FRAME_SLOT_NONE; + if (!op) return; + if (op->kind == OPK_REG) { + if (gvn_raw_local_addr_root(ctx->f, (Val)op->v.reg, 0, &fs) && fs > 0 && + (u32)fs <= ctx->f->nframe_slots) + ctx->local_escaped[fs] = 1; + } else if (op->kind == OPK_INDIRECT) { + if (gvn_raw_local_addr_root(ctx->f, (Val)op->v.ind.base, 0, &fs) && + fs > 0 && (u32)fs <= ctx->f->nframe_slots) + ctx->local_escaped[fs] = 1; + } +} + +static void gvn_mark_escaped_abivalue(GvnCtx* ctx, const CGABIValue* v) { + if (!v) return; + gvn_mark_escaped_operand(ctx, &v->storage); + for (u32 i = 0; i < v->nparts; ++i) + gvn_mark_escaped_operand(ctx, &v->parts[i].op); +} + +static void gvn_collect_local_escapes(GvnCtx* ctx) { + Func* f = ctx->f; + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + switch ((IROp)in->op) { + case IR_LOAD: + case IR_ADDR_OF: + case IR_COPY: + break; + case IR_STORE: + if (in->nopnds >= 2) gvn_mark_escaped_operand(ctx, &in->opnds[1]); + break; + case IR_BINOP: + if (in->nopnds >= 3 && (BinOp)in->extra.imm == BO_IADD && + (gvn_raw_operand_zero(f, &in->opnds[1]) || + gvn_raw_operand_zero(f, &in->opnds[2]))) + break; + if (in->nopnds >= 3 && (BinOp)in->extra.imm == BO_ISUB && + gvn_raw_operand_zero(f, &in->opnds[2])) + break; + for (u32 o = 1; o < in->nopnds; ++o) + gvn_mark_escaped_operand(ctx, &in->opnds[o]); + break; + case IR_CALL: { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux) break; + if (aux->use_plan_replay) { + gvn_mark_escaped_operand(ctx, &aux->plan.callee); + for (u32 a = 0; a < aux->plan.nargs; ++a) + gvn_mark_escaped_operand(ctx, &aux->plan.args[a].src); + } else { + gvn_mark_escaped_operand(ctx, &aux->desc.callee); + for (u32 a = 0; a < aux->desc.nargs; ++a) + gvn_mark_escaped_abivalue(ctx, &aux->desc.args[a]); + } + break; + } + case IR_RET: { + IRRetAux* aux = (IRRetAux*)in->extra.aux; + if (aux && aux->present) gvn_mark_escaped_abivalue(ctx, &aux->val); + break; + } + default: + if (gvn_inst_memory_barrier(in)) + for (u32 o = 0; o < in->nopnds; ++o) + gvn_mark_escaped_operand(ctx, &in->opnds[o]); + break; + } + } + } +} + +static int gvn_mem_state_has_avail(const GvnMemState* s, const GvnKey* key, + Val val) { + for (u32 i = 0; i < s->navail; ++i) + if (s->avail[i].val == val && gvn_key_equal(&s->avail[i].key, key)) + return 1; + return 0; +} + +static void gvn_mem_merge_block_in(GvnCtx* ctx, u32 b) { + Func* f = ctx->f; + Block* bl = &f->blocks[b]; + GvnBlockMemState* bs = &ctx->block_mem[b]; + memset(&bs->in, 0, sizeof bs->in); + if (!bl->npreds) return; + const GvnMemState* first = NULL; + for (u32 p = 0; p < bl->npreds; ++p) { + u32 pred = bl->preds[p]; + if (pred >= f->nblocks || !ctx->analysis->reachable[pred]) continue; + if (!ctx->block_mem[pred].out_valid) return; + if (!first) first = &ctx->block_mem[pred].out; + } + if (!first) return; + gvn_mem_state_copy(ctx, &bs->in, first); + for (u32 i = 0; i < bs->in.navail;) { + GvnMemAvail* a = &bs->in.avail[i]; + int keep = 1; + for (u32 p = 0; p < bl->npreds; ++p) { + u32 pred = bl->preds[p]; + if (pred >= f->nblocks || !ctx->analysis->reachable[pred]) continue; + if (!gvn_mem_state_has_avail(&ctx->block_mem[pred].out, &a->key, + a->val)) { + keep = 0; + break; + } + } + if (keep) + ++i; + else + gvn_mem_avail_remove_at(&bs->in, i); + } +} + static void gvn_realign_phi_preds(Func* f) { for (u32 b = 0; b < f->nblocks; ++b) { Block* bl = &f->blocks[b]; @@ -1471,13 +1752,22 @@ void opt_gvn(Func* f) { ctx.analysis = &a; ctx.parent = arena_array(f->arena, Val, f->nvals ? f->nvals : 1u); ctx.constants = arena_zarray(f->arena, GvnConst, f->nvals ? f->nvals : 1u); + ctx.block_mem = + arena_zarray(f->arena, GvnBlockMemState, f->nblocks ? f->nblocks : 1u); + ctx.local_escaped = + arena_zarray(f->arena, u8, f->nframe_slots ? f->nframe_slots + 1u : 1u); for (Val v = 0; v < f->nvals; ++v) ctx.parent[v] = v; gvn_table_init(&ctx.table, f); + gvn_collect_local_escapes(&ctx); for (u32 ri = 0; ri < a.nrpo; ++ri) { u32 b = a.rpo[ri]; if (b >= f->nblocks || !a.reachable[b]) continue; + gvn_mem_merge_block_in(&ctx, b); + gvn_mem_state_copy(&ctx, &ctx.mem, &ctx.block_mem[b].in); for (u32 i = 0; i < f->blocks[b].ninsts; ++i) gvn_visit_inst(&ctx, b, i); + gvn_mem_state_copy(&ctx, &ctx.block_mem[b].out, &ctx.mem); + ctx.block_mem[b].out_valid = 1; } if (ctx.cfg_changed) { diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -477,6 +477,19 @@ static void emit_test_branch(Func* f, u32 b, u32 taken, u32 fallthrough, f->blocks[b].nsucc = 2; } +static void emit_cond_branch(Func* f, u32 b, Val cond, u32 taken, + u32 fallthrough, CfreeCgTypeId ty) { + Inst* in = ir_emit(f, b, IR_CMP_BRANCH); + in->opnds = arena_array(f->arena, Operand, 2); + in->opnds[0] = op_reg_(cond, ty); + in->opnds[1] = op_imm_(0, ty); + in->nopnds = 2; + in->extra.imm = CMP_NE; + f->blocks[b].succ[0] = taken; + f->blocks[b].succ[1] = fallthrough; + f->blocks[b].nsucc = 2; +} + static int bytes_contains(const unsigned char* data, size_t len, const char* needle) { size_t nlen = strlen(needle); @@ -1123,6 +1136,21 @@ static CGCallDesc make_call_desc(TestCtx* tc, CfreeCgTypeId fn_ty, return d; } +static Inst* emit_call_one_arg(TestCtx* tc, Func* f, u32 b, Operand arg, + CfreeCgTypeId arg_ty) { + Inst* in = ir_emit(f, b, IR_CALL); + IRCallAux* aux = arena_znew(f->arena, IRCallAux); + CfreeCgTypeId params[1] = {arg_ty}; + CfreeCgTypeId fn_ty = make_func_type(tc, CFREE_CG_TYPE_NONE, params, 1, 0); + const ABIFuncInfo* abi = abi_cg_func_info(tc->c->abi, fn_ty); + CGABIValue* args = arena_zarray(f->arena, CGABIValue, 1); + args[0] = call_arg_value(arg_ty, abi ? &abi->params[0] : NULL, arg); + aux->desc = make_call_desc(tc, fn_ty, args, 1, (CGABIValue){0}); + aux->desc.callee = op_global_(OBJ_SYM_NONE, 0, fn_ty); + in->extra.aux = aux; + return in; +} + static void expect_plan_arg(const CGCallPlan* p, u32 i, u8 dst_kind, u8 cls, Reg reg, u32 stack_offset, const char* ctx) { EXPECT(i < p->nargs, "%s missing arg %u", ctx, (unsigned)i); @@ -2793,6 +2821,92 @@ static void opt_gvn_reuses_store_to_addr_of_zero_index_load(void) { tc_fini(&tc); } +static void opt_gvn_reuses_joined_same_value_store(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); + u32 t = ir_block_new(f), e = ir_block_new(f), j = ir_block_new(f); + ir_note_emit(f, t); + ir_note_emit(f, e); + ir_note_emit(f, j); + Val c = add_val(f, tc.i32), src = add_val(f, tc.i32); + Val out = add_val(f, tc.i32); + emit_scalar_input(f, f->entry, c, tc.i32); + emit_scalar_input(f, f->entry, src, tc.i32); + emit_cond_branch(f, f->entry, c, t, e, tc.i32); + emit_store_local(f, t, fs, src, tc.i32, 0); + emit_br_to(f, t, j); + emit_store_local(f, e, fs, src, tc.i32, 0); + emit_br_to(f, e, j); + emit_load_local(f, j, out, fs, tc.i32, 0); + emit_ret_val(f, j, out, tc.i32); + opt_build_cfg(f); + opt_gvn(f); + opt_verify(f, "test-gvn-memory-join-same-store"); + EXPECT(ret_val(f, j) == src, + "memory GVN should reuse joined stores only when all preds agree"); + tc_fini(&tc); +} + +static void opt_gvn_preserves_joined_different_or_missing_store(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); + u32 t = ir_block_new(f), e = ir_block_new(f), j = ir_block_new(f); + ir_note_emit(f, t); + ir_note_emit(f, e); + ir_note_emit(f, j); + Val c = add_val(f, tc.i32), a = add_val(f, tc.i32); + Val b = add_val(f, tc.i32), out = add_val(f, tc.i32); + emit_scalar_input(f, f->entry, c, tc.i32); + emit_scalar_input(f, f->entry, a, tc.i32); + emit_scalar_input(f, f->entry, b, tc.i32); + emit_cond_branch(f, f->entry, c, t, e, tc.i32); + emit_store_local(f, t, fs, a, tc.i32, 0); + emit_br_to(f, t, j); + emit_store_local(f, e, fs, b, tc.i32, 0); + emit_br_to(f, e, j); + emit_load_local(f, j, out, fs, tc.i32, 0); + emit_ret_val(f, j, out, tc.i32); + opt_build_cfg(f); + opt_gvn(f); + opt_verify(f, "test-gvn-memory-join-different-store"); + EXPECT(ret_val(f, j) == out, + "memory GVN should preserve joined load when preds disagree"); + tc_fini(&tc); +} + +static void opt_gvn_preserves_loop_header_load(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); + u32 h = ir_block_new(f), body = ir_block_new(f), ex = ir_block_new(f); + ir_note_emit(f, h); + ir_note_emit(f, body); + ir_note_emit(f, ex); + Val c = add_val(f, tc.i32), init = add_val(f, tc.i32); + Val next = add_val(f, tc.i32), cur = add_val(f, tc.i32); + emit_scalar_input(f, f->entry, c, tc.i32); + emit_scalar_input(f, f->entry, init, tc.i32); + emit_scalar_input(f, f->entry, next, tc.i32); + emit_store_local(f, f->entry, fs, init, tc.i32, 0); + emit_br_to(f, f->entry, h); + emit_load_local(f, h, cur, fs, tc.i32, 0); + emit_cond_branch(f, h, c, body, ex, tc.i32); + emit_store_local(f, body, fs, next, tc.i32, 0); + emit_br_to(f, body, h); + emit_ret_val(f, ex, cur, tc.i32); + opt_build_cfg(f); + opt_gvn(f); + opt_verify(f, "test-gvn-memory-loop-header"); + EXPECT(ret_val(f, ex) == cur, + "memory GVN should not reuse preheader availability at loop headers"); + tc_fini(&tc); +} + static void opt_gvn_preserves_load_across_unknown_store(void) { TestCtx tc; tc_init(&tc); @@ -2831,33 +2945,47 @@ static void opt_gvn_preserves_load_across_unknown_store(void) { tc_fini(&tc); } -static void opt_gvn_preserves_load_across_call(void) { +static void opt_gvn_preserves_nonescaped_local_across_call(void) { TestCtx tc; tc_init(&tc); Func* f = new_func(&tc); FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val first = add_val(f, tc.i32); - Val second = add_val(f, tc.i32); - emit_load_local(f, f->entry, first, fs, tc.i32, 0); + Val src = add_val(f, tc.i32); + Val loaded = add_val(f, tc.i32); + emit_scalar_input(f, f->entry, src, tc.i32); + emit_store_local(f, f->entry, fs, src, tc.i32, 0); emit_call_void(f, f->entry); - emit_load_local(f, f->entry, second, fs, tc.i32, 0); - emit_ret_val(f, f->entry, second, tc.i32); + emit_load_local(f, f->entry, loaded, fs, tc.i32, 0); + emit_ret_val(f, f->entry, loaded, tc.i32); opt_build_cfg(f); opt_gvn(f); - opt_verify(f, "test-gvn-memory-call-barrier"); - EXPECT(ret_val(f, f->entry) == second, - "memory GVN should not rewrite across calls"); - expect_ir_dump_eq(f, - "ir blocks=1 vals=3\n" - "block 0 preds=[] succs=[] insts=4\n" - " 0 load def=v1 opnds=[v1,local#1] mem=size4 align4 " - "flags=0x0 alias=local#1\n" - " 1 call\n" - " 2 load def=v2 opnds=[v2,local#1] mem=size4 align4 " - "flags=0x0 alias=local#1\n" - " 3 ret ret=v2\n", - "memory GVN call barrier"); + opt_verify(f, "test-gvn-memory-call-preserves-nonescaped-local"); + EXPECT(ret_val(f, f->entry) == src, + "memory GVN should preserve non-escaped locals across calls"); + tc_fini(&tc); +} + +static void opt_gvn_clobbers_escaped_local_across_call(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); + CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); + Val src = add_val(f, tc.i32); + Val addr = add_val(f, ptr_ty); + Val loaded = add_val(f, tc.i32); + emit_scalar_input(f, f->entry, src, tc.i32); + emit_store_local(f, f->entry, fs, src, tc.i32, 0); + emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32); + emit_call_one_arg(&tc, f, f->entry, op_reg_(addr, ptr_ty), ptr_ty); + emit_load_local(f, f->entry, loaded, fs, tc.i32, 0); + emit_ret_val(f, f->entry, loaded, tc.i32); + opt_build_cfg(f); + opt_gvn(f); + opt_verify(f, "test-gvn-memory-call-clobbers-escaped-local"); + EXPECT(ret_val(f, f->entry) == loaded, + "memory GVN should clobber locals whose address reaches a call"); tc_fini(&tc); } @@ -5270,8 +5398,12 @@ int main(void) { opt_gvn_rewrites_redundant_local_load(); opt_gvn_reuses_store_to_local_load(); opt_gvn_reuses_store_to_addr_of_zero_index_load(); + opt_gvn_reuses_joined_same_value_store(); + opt_gvn_preserves_joined_different_or_missing_store(); + opt_gvn_preserves_loop_header_load(); opt_gvn_preserves_load_across_unknown_store(); - opt_gvn_preserves_load_across_call(); + opt_gvn_preserves_nonescaped_local_across_call(); + opt_gvn_clobbers_escaped_local_across_call(); opt_gvn_preserves_observable_memory_loads(); opt_jump_cleanup_forwards_branch_targets(); opt_jump_cleanup_inverts_to_remove_jump_block(); diff --git a/test/toy/cases/128_o2_branch_join_addr_mem.expected b/test/toy/cases/128_o2_branch_join_addr_mem.expected @@ -1 +1 @@ -52 +0 diff --git a/test/toy/cases/128_o2_branch_join_addr_mem.toy b/test/toy/cases/128_o2_branch_join_addr_mem.toy @@ -1,16 +1,26 @@ -fn choose(flag: i64): i64 { - var x: i64 = 5; +fn same(flag: i64, v: i64): i64 { + var x: i64 = 0; if flag != 0 { - x = x + 10; + x = v; } else { - x = x + 20; + x = v; } - let p: *i64 = &x; - return p[0] + 6; + return (&x).* + 1; +} + +fn different(flag: i64, a: i64, b: i64): i64 { + var x: i64 = 0; + if flag != 0 { + x = a; + } else { + x = b; + } + return (&x).*; } fn __user_main(): i64 { - return choose(1) + choose(0); + return (same(1, 41) - 42) + (same(0, 41) - 42) + + (different(1, 17, 23) - 17) + (different(0, 17, 23) - 23); } fn main(): i32 { return __user_main() as i32; }