kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 2c25f54093c2c023dde69b0bfdd00813d324972d
parent e928b11c173e6c3466f548b35f45c5ff4265c6fb
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 27 May 2026 15:35:58 -0700

rm old opt tests

Diffstat:
Dtest/opt/o2_many_values.c | 12------------
Dtest/opt/opt_test.c | 7094-------------------------------------------------------------------------------
Dtest/opt/ret_aggregate_base_clobber.c | 39---------------------------------------
Dtest/opt/run.sh | 55-------------------------------------------------------
4 files changed, 0 insertions(+), 7200 deletions(-)

diff --git a/test/opt/o2_many_values.c b/test/opt/o2_many_values.c @@ -1,12 +0,0 @@ -int f(int a) { - int s = a; - s = s + 0; - s = s + 1; - s = s + 2; - s = s + 3; - s = s + 4; - s = s + 5; - s = s + 6; - s = s + 7; - return s; -} diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -1,7094 +0,0 @@ -#include "opt/opt.h" - -#include <cfree/cg.h> -#include <cfree/core.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "abi/abi.h" -#include "arch/mc.h" -#include "arch/rv64/isa.h" -#include "arch/x64/isa.h" -#include "core/core.h" -#include "core/heap.h" -#include "core/pool.h" -#include "opt/ir.h" -#include "opt/opt_internal.h" - -static void* h_alloc(CfreeHeap* h, size_t n, size_t a) { - (void)h; - (void)a; - return n ? malloc(n) : NULL; -} -static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) { - (void)h; - (void)o; - (void)a; - return realloc(p, n); -} -static void h_free(CfreeHeap* h, void* p, size_t n) { - (void)h; - (void)n; - free(p); -} -static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL}; -static int g_suppress_expected_panic_diag; - -static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, - const char* fmt, va_list ap) { - static const char* names[] = {"note", "warning", "error", "fatal"}; - (void)s; - (void)loc; - if (g_suppress_expected_panic_diag && k == CFREE_DIAG_FATAL) return; - fprintf(stderr, "%s: ", names[k]); - vfprintf(stderr, fmt, ap); - fputc('\n', stderr); -} -static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; - -static int g_fails; -static int g_checks; - -#define EXPECT(cond, ...) \ - do { \ - ++g_checks; \ - if (!(cond)) { \ - ++g_fails; \ - fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ - fprintf(stderr, __VA_ARGS__); \ - fputc('\n', stderr); \ - } \ - } while (0) - -typedef struct TestCtx { - Compiler* c; - CfreeContext ctx; - CfreeCgTypeId i8; - CfreeCgTypeId i16; - CfreeCgTypeId i32; - CfreeCgTypeId i64; - CfreeCgTypeId f64; - CfreeCgTypeId f128; -} TestCtx; - -static void tc_init_target(TestCtx* tc, CfreeArchKind arch, CfreeOSKind os) { - memset(tc, 0, sizeof *tc); - tc->ctx.heap = &g_heap; - tc->ctx.diag = &g_diag; - tc->ctx.now = -1; - - CfreeTarget tgt; - memset(&tgt, 0, sizeof tgt); - tgt.arch = arch; - tgt.os = os; - tgt.obj = CFREE_OBJ_ELF; - tgt.ptr_size = 8; - tgt.ptr_align = 8; - - if (cfree_compiler_new(tgt, &tc->ctx, (CfreeCompiler**)&tc->c) != CFREE_OK || - !tc->c) { - fprintf(stderr, "fatal: cfree_compiler_new failed\n"); - abort(); - } - { - CfreeCgBuiltinTypes b = cfree_cg_builtin_types(tc->c); - tc->i8 = b.id[CFREE_CG_BUILTIN_I8]; - tc->i16 = b.id[CFREE_CG_BUILTIN_I16]; - tc->i32 = b.id[CFREE_CG_BUILTIN_I32]; - tc->i64 = b.id[CFREE_CG_BUILTIN_I64]; - tc->f64 = b.id[CFREE_CG_BUILTIN_F64]; - tc->f128 = b.id[CFREE_CG_BUILTIN_F128]; - } -} - -static void tc_init(TestCtx* tc) { - tc_init_target(tc, CFREE_ARCH_ARM_64, CFREE_OS_LINUX); -} - -static void tc_fini(TestCtx* tc) { - cfree_compiler_free(tc->c); - tc->c = NULL; -} - -static Operand op_reg_(Reg r, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_REG; - o.cls = RC_INT; - o.type = ty; - o.v.reg = r; - return o; -} - -static Operand op_reg_cls_(Reg r, CfreeCgTypeId ty, RegClass cls) { - Operand o = op_reg_(r, ty); - o.cls = cls; - return o; -} - -static Operand op_imm_(i64 v, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_IMM; - o.cls = RC_INT; - o.type = ty; - o.v.imm = v; - return o; -} - -static Operand op_local_(FrameSlot fs, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_LOCAL; - o.cls = RC_INT; - o.type = ty; - o.v.frame_slot = fs; - return o; -} - -static Operand op_indirect_(Reg base, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_INDIRECT; - o.cls = RC_INT; - o.type = ty; - o.v.ind.base = base; - o.v.ind.index = REG_NONE; - o.v.ind.ofs = 0; - return o; -} - -static Operand op_indexed_indirect_(Reg base, Reg index, u8 log2_scale, i32 ofs, - CfreeCgTypeId ty) { - Operand o = op_indirect_(base, ty); - o.v.ind.index = index; - o.v.ind.log2_scale = log2_scale; - o.v.ind.ofs = ofs; - return o; -} - -static Operand op_global_(ObjSymId sym, i64 addend, CfreeCgTypeId ty) { - Operand o; - memset(&o, 0, sizeof o); - o.kind = OPK_GLOBAL; - o.cls = RC_INT; - o.type = ty; - o.v.global.sym = sym; - o.v.global.addend = addend; - return o; -} - -static MemAccess mem_local_(FrameSlot fs, CfreeCgTypeId ty, u32 size, - u16 flags) { - MemAccess m; - memset(&m, 0, sizeof m); - m.type = ty; - m.size = size; - m.align = size >= 8 ? 8 : size; - m.flags = flags; - m.alias.kind = ALIAS_LOCAL; - m.alias.v.local_id = (i32)fs; - return m; -} - -static MemAccess mem_unknown_(CfreeCgTypeId ty, u32 size) { - MemAccess m; - memset(&m, 0, sizeof m); - m.type = ty; - m.size = size; - m.align = size >= 8 ? 8 : size; - m.alias.kind = ALIAS_UNKNOWN; - return m; -} - -static Func* new_func(TestCtx* tc) { - CGFuncDesc fd; - CfreeCgFuncSig sig; - memset(&fd, 0, sizeof fd); - memset(&sig, 0, sizeof sig); - sig.ret = tc->i32; - sig.call_conv = CFREE_CG_CC_TARGET_C; - fd.fn_type = cfree_cg_type_func(tc->c, sig); - Func* f = ir_func_new(tc->c, &fd); - f->entry = ir_block_new(f); - ir_note_emit(f, f->entry); - return f; -} - -static Val add_val(Func* f, CfreeCgTypeId ty) { - return ir_alloc_val(f, ty, RC_INT); -} - -static Val add_val_cls(Func* f, CfreeCgTypeId ty, RegClass cls) { - return ir_alloc_val(f, ty, cls); -} - -static FrameSlot add_frame_slot(Func* f, CfreeCgTypeId ty, FrameSlotKind kind, - u32 size, u16 flags) { - FrameSlotDesc d; - memset(&d, 0, sizeof d); - d.type = ty; - d.size = size; - d.align = size >= 8 ? 8 : size; - d.kind = kind; - d.flags = flags; - return ir_frame_slot_new(f, &d); -} - -static Inst* emit_load_imm(Func* f, u32 b, Val dst, CfreeCgTypeId ty, i64 imm) { - Inst* in = ir_emit(f, b, IR_LOAD_IMM); - in->opnds = arena_array(f->arena, Operand, 1); - in->opnds[0] = op_reg_(dst, ty); - in->nopnds = 1; - in->def = dst; - in->type = ty; - in->extra.imm = imm; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_scalar_input(Func* f, u32 b, Val dst, CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_PARAM_DECL); - in->def = dst; - in->type = ty; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_copy(Func* f, u32 b, Val dst, Val src, CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_COPY); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_reg_(src, ty); - in->nopnds = 2; - in->def = dst; - in->type = ty; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_binop(Func* f, u32 b, Val dst, Val a, Val c, - CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_BINOP); - in->opnds = arena_array(f->arena, Operand, 3); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_reg_(a, ty); - in->opnds[2] = op_reg_(c, ty); - in->nopnds = 3; - in->def = dst; - in->type = ty; - in->extra.imm = BO_IADD; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_unop(Func* f, u32 b, Val dst, Val src, CfreeCgTypeId ty, - UnOp op) { - Inst* in = ir_emit(f, b, IR_UNOP); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_reg_(src, ty); - in->nopnds = 2; - in->def = dst; - in->type = ty; - in->extra.imm = op; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_cmp(Func* f, u32 b, Val dst, Val a, Val c, CfreeCgTypeId ty, - CmpOp op) { - Inst* in = ir_emit(f, b, IR_CMP); - in->opnds = arena_array(f->arena, Operand, 3); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_reg_(a, ty); - in->opnds[2] = op_reg_(c, ty); - in->nopnds = 3; - in->def = dst; - in->type = ty; - in->extra.imm = op; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_phys_copy(Func* f, u32 b, Reg dst, Reg src, - CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_COPY); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_reg_(src, ty); - in->nopnds = 2; - return in; -} - -static Inst* emit_phys_binop(Func* f, u32 b, Reg dst, Reg a, Reg c, - CfreeCgTypeId ty, BinOp op) { - Inst* in = ir_emit(f, b, IR_BINOP); - in->opnds = arena_array(f->arena, Operand, 3); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_reg_(a, ty); - in->opnds[2] = op_reg_(c, ty); - in->nopnds = 3; - in->extra.imm = op; - return in; -} - -static Inst* emit_convert_typed(Func* f, u32 b, Val dst, Val src, - CfreeCgTypeId dst_ty, CfreeCgTypeId src_ty, - ConvKind k) { - Inst* in = ir_emit(f, b, IR_CONVERT); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, dst_ty); - in->opnds[1] = op_reg_(src, src_ty); - in->nopnds = 2; - in->def = dst; - in->type = dst_ty; - in->extra.imm = k; - if (dst < f->nvals) { - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - } - return in; -} - -static Inst* emit_convert(Func* f, u32 b, Val dst, Val src, CfreeCgTypeId ty, - ConvKind k) { - return emit_convert_typed(f, b, dst, src, ty, ty, k); -} - -static Inst* emit_load_local(Func* f, u32 b, Val dst, FrameSlot fs, - CfreeCgTypeId ty, u16 flags) { - Inst* in = ir_emit(f, b, IR_LOAD); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_local_(fs, ty); - in->nopnds = 2; - in->def = dst; - in->type = ty; - in->extra.mem = mem_local_(fs, ty, 4, flags); - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_store_local(Func* f, u32 b, FrameSlot fs, Val src, - CfreeCgTypeId ty, u16 flags) { - Inst* in = ir_emit(f, b, IR_STORE); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_local_(fs, ty); - in->opnds[1] = op_reg_(src, ty); - in->nopnds = 2; - in->extra.mem = mem_local_(fs, ty, 4, flags); - return in; -} - -static Inst* emit_store_indirect(Func* f, u32 b, Val base, Val src, - CfreeCgTypeId ty, u16 flags) { - Inst* in = ir_emit(f, b, IR_STORE); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_indirect_((Reg)base, ty); - in->opnds[1] = op_reg_(src, ty); - in->nopnds = 2; - in->extra.mem = mem_unknown_(ty, 4); - in->extra.mem.flags = flags; - return in; -} - -static Inst* emit_addr_of_local(Func* f, u32 b, Val dst, FrameSlot fs, - CfreeCgTypeId ptr_ty, CfreeCgTypeId local_ty) { - Inst* in = ir_emit(f, b, IR_ADDR_OF); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ptr_ty); - in->opnds[1] = op_local_(fs, local_ty); - in->nopnds = 2; - in->def = dst; - in->type = ptr_ty; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_atomic_load_local(Func* f, u32 b, Val dst, FrameSlot fs, - CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_ATOMIC_LOAD); - IRAtomicAux* aux = arena_znew(f->arena, IRAtomicAux); - aux->mem = mem_local_(fs, ty, 4, MF_ATOMIC); - aux->mo = MO_SEQ_CST; - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_local_(fs, ty); - in->nopnds = 2; - in->def = dst; - in->type = ty; - in->extra.aux = aux; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_load_indirect(Func* f, u32 b, Val dst, Val base, - CfreeCgTypeId ty, u16 flags) { - Inst* in = ir_emit(f, b, IR_LOAD); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_indirect_((Reg)base, ty); - in->nopnds = 2; - in->def = dst; - in->type = ty; - in->extra.mem = mem_unknown_(ty, 4); - in->extra.mem.flags = flags; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_load_indexed_indirect(Func* f, u32 b, Val dst, Val base, - Val index, u8 log2_scale, i32 ofs, - CfreeCgTypeId ty, u16 flags) { - Inst* in = ir_emit(f, b, IR_LOAD); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = - op_indexed_indirect_((Reg)base, (Reg)index, log2_scale, ofs, ty); - in->nopnds = 2; - in->def = dst; - in->type = ty; - in->extra.mem = mem_unknown_(ty, 4); - in->extra.mem.flags = flags; - f->val_def_block[dst] = b; - f->val_def_inst[dst] = f->blocks[b].ninsts - 1u; - return in; -} - -static Inst* emit_call_void(Func* f, u32 b) { - Inst* in = ir_emit(f, b, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - in->extra.aux = aux; - return in; -} - -static void emit_ret_val(Func* f, u32 b, Val v, CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_RET); - IRRetAux* aux = arena_znew(f->arena, IRRetAux); - aux->present = 1; - aux->val.type = ty; - aux->val.storage = op_reg_(v, ty); - in->extra.aux = aux; -} - -static void emit_br_to(Func* f, u32 b, u32 target) { - Inst* in = ir_emit(f, b, IR_BR); - (void)in; - f->blocks[b].succ[0] = target; - f->blocks[b].nsucc = 1; -} - -static void emit_test_branch(Func* f, u32 b, u32 taken, u32 fallthrough, - CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_CMP_BRANCH); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_imm_(1, ty); - in->opnds[1] = op_imm_(0, ty); - in->nopnds = 2; - in->extra.imm = CMP_NE; - f->blocks[b].succ[0] = taken; - f->blocks[b].succ[1] = fallthrough; - f->blocks[b].nsucc = 2; -} - -static void emit_cond_branch(Func* f, u32 b, Val cond, u32 taken, - u32 fallthrough, CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_CMP_BRANCH); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(cond, ty); - in->opnds[1] = op_imm_(0, ty); - in->nopnds = 2; - in->extra.imm = CMP_NE; - f->blocks[b].succ[0] = taken; - f->blocks[b].succ[1] = fallthrough; - f->blocks[b].nsucc = 2; -} - -static void emit_raw_condbr(Func* f, u32 b, Val cond, u32 taken, - u32 fallthrough, CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_CONDBR); - in->opnds = arena_array(f->arena, Operand, 1); - in->opnds[0] = op_reg_(cond, ty); - in->nopnds = 1; - f->blocks[b].succ[0] = taken; - f->blocks[b].succ[1] = fallthrough; - f->blocks[b].nsucc = 2; -} - -static void emit_switch2(Func* f, u32 b, Val sel, u64 v0, u32 case0, u64 v1, - u32 case1, u32 default_block, CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_SWITCH); - IRSwitchAux* aux = arena_znew(f->arena, IRSwitchAux); - in->opnds = arena_array(f->arena, Operand, 1); - in->opnds[0] = op_reg_(sel, ty); - in->nopnds = 1; - aux->selector_type = ty; - aux->ncases = 2; - aux->has_default = 1; - aux->default_block = default_block; - aux->cases = arena_array(f->arena, IRSwitchAuxCase, 2); - aux->cases[0].value = v0; - aux->cases[0].block = case0; - aux->cases[1].value = v1; - aux->cases[1].block = case1; - in->extra.aux = aux; - ir_block_set_nsucc(f, b, 3); - f->blocks[b].succ[0] = case0; - f->blocks[b].succ[1] = case1; - f->blocks[b].succ[2] = default_block; -} - -static int bytes_contains(const unsigned char* data, size_t len, - const char* needle) { - size_t nlen = strlen(needle); - if (!data || nlen > len) return 0; - for (size_t i = 0; i + nlen <= len; ++i) - if (memcmp(data + i, needle, nlen) == 0) return 1; - return 0; -} - -static void expect_ir_dump_eq(Func* f, const char* expected, - const char* label) { - CfreeWriter* w = NULL; - (void)cfree_writer_mem(&g_heap, &w); - opt_ir_dump(f, w); - size_t len = 0; - const unsigned char* bytes = cfree_writer_mem_bytes(w, &len); - size_t expected_len = strlen(expected); - int ok = bytes && len == expected_len && - memcmp(bytes, expected, expected_len) == 0; - EXPECT(ok, "%s dump should match golden", label); - if (!ok) { - fprintf(stderr, "expected:\n%sactual:\n%.*s\n", expected, (int)len, - bytes ? (const char*)bytes : ""); - } - cfree_writer_close(w); -} - -static void ensure_test_preg_info(Func* f) { - if (f->preg_info) return; - u32 nregs = opt_reg_count(f); - f->preg_info = arena_zarray(f->arena, OptPRegInfo, nregs ? nregs : 1u); - for (PReg r = 0; r < nregs; ++r) { - f->preg_info[r].tied_hard_reg = -1; - f->preg_info[r].hard_reg = REG_NONE; - f->preg_info[r].spill_slot = FRAME_SLOT_NONE; - f->preg_info[r].cls = opt_reg_cls(f, r); - } -} - -static int ranges_overlap(const OptLiveRangeSet* ranges, Val a, Val b) { - for (u32 ar = ranges->first_range_by_preg[a]; ar != OPT_RANGE_NONE; - ar = ranges->ranges[ar].next) { - const OptLiveRange* ra = &ranges->ranges[ar]; - for (u32 br = ranges->first_range_by_preg[b]; br != OPT_RANGE_NONE; - br = ranges->ranges[br].next) { - const OptLiveRange* rb = &ranges->ranges[br]; - if (ra->start < rb->end && rb->start < ra->end) return 1; - } - } - return 0; -} - -static int expect_panic(Compiler* c, void (*fn)(void*), void* arg) { - PanicSave saved; - int panicked = 0; - compiler_panic_save(c, &saved); - g_suppress_expected_panic_diag++; - if (setjmp(c->panic)) { - panicked = 1; - } else { - fn(arg); - } - g_suppress_expected_panic_diag--; - compiler_panic_restore(c, &saved); - return panicked; -} - -static int count_op(Func* f, IROp op) { - int n = 0; - for (u32 b = 0; b < f->nblocks; ++b) - for (u32 i = 0; i < f->blocks[b].ninsts; ++i) - if ((IROp)f->blocks[b].insts[i].op == op) ++n; - return n; -} - -static Inst* def_inst(Func* f, Val v) { - if (!f || v == VAL_NONE || v >= f->nvals) return NULL; - u32 b = f->val_def_block[v]; - u32 i = f->val_def_inst[v]; - if (b >= f->nblocks || i >= f->blocks[b].ninsts) return NULL; - return &f->blocks[b].insts[i]; -} - -static Val ret_val(Func* f, u32 b) { - if (!f || b >= f->nblocks || !f->blocks[b].ninsts) return VAL_NONE; - Inst* in = &f->blocks[b].insts[f->blocks[b].ninsts - 1u]; - if ((IROp)in->op != IR_RET) return VAL_NONE; - IRRetAux* aux = (IRRetAux*)in->extra.aux; - if (!aux || !aux->present || aux->val.storage.kind != OPK_REG) - return VAL_NONE; - return (Val)aux->val.storage.v.reg; -} - -static int val_is_load_imm(Func* f, Val v, i64 imm) { - Inst* in = def_inst(f, v); - return in && (IROp)in->op == IR_LOAD_IMM && in->extra.imm == imm; -} - -static int any_ret_load_imm(Func* f, i64 imm) { - for (u32 b = 0; b < f->nblocks; ++b) { - Val v = ret_val(f, b); - if (v != VAL_NONE && val_is_load_imm(f, v, imm)) return 1; - } - return 0; -} - -static u32 count_uses_of(Func* f, Val v) { - opt_rebuild_def_use(f); - u32 n = 0; - for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE; - u = f->opt_uses[u].next_for_val) - ++n; - return n; -} - -static u32 live_range_count_for(const OptLiveRangeSet* ranges, Val v) { - u32 n = 0; - for (u32 r = ranges->first_range_by_preg[v]; r != OPT_RANGE_NONE; - r = ranges->ranges[r].next) - ++n; - return n; -} - -/* ============================================================ - * MockCGTarget — provides register coordination so opt_machinize - * and opt_emit can query backend policy without hard-coding arch - * knowledge in the tests. - * ============================================================ */ - -typedef struct MockCGTarget { - CGTarget base; - const Reg* pool[OPT_REG_CLASSES]; - u32 pool_n[OPT_REG_CLASSES]; - const CGPhysRegInfo* phys[OPT_REG_CLASSES]; - u32 phys_n[OPT_REG_CLASSES]; - const Reg* scratch[OPT_REG_CLASSES]; - u32 scratch_n[OPT_REG_CLASSES]; - u32 caller_saved_mask[OPT_REG_CLASSES]; - u32 callee_saved_mask[OPT_REG_CLASSES]; - u32 call_clobber_mask[OPT_REG_CLASSES]; - int plan_call_count; - int plan_calls[OPT_REG_CLASSES]; - int plan_regs[OPT_REG_CLASSES]; - int func_begin_plan_calls; - int known_frame_calls; - CGKnownFrameDesc last_known_frame; - int reserve_calls[OPT_REG_CLASSES]; - int load_imm_calls; - Reg last_load_imm_dst; - int load_const_calls; - u8 last_const_bytes[16]; - u32 last_const_size; - int copy_calls; - Operand copy_dst[16]; - Operand copy_src[16]; - int load_calls; - int store_calls; - int store_call_arg_calls; - CGCallPlanMove last_stack_arg; - int load_call_arg_calls; - CGCallPlanMove last_load_arg; - int store_call_ret_calls; - CGCallPlanRet last_store_ret; - int addr_of_calls; - int cmp_branch_calls; - int call_calls; - int emit_call_plan_calls; - char events[32]; - int event_count; - Operand last_plan_callee; - Reg planned_arg_regs[8]; - u32 planned_nargs; - Reg planned_ret_regs[4]; - u32 planned_nrets; - int planned_stack_arg; - int param_calls; - CGLocalStorage last_param_storage; - int alloca_calls; -} MockCGTarget; - -static void mock_func_begin(CGTarget* t, const CGFuncDesc* d) { - MockCGTarget* m = (MockCGTarget*)t; - int n = 0; - for (u32 i = 0; i < OPT_REG_CLASSES; ++i) n += m->plan_calls[i]; - m->func_begin_plan_calls = n; - (void)d; -} -static void mock_func_end(CGTarget* t) { (void)t; } - -static void mock_func_begin_known_frame(CGTarget* t, const CGFuncDesc* d, - const CGKnownFrameDesc* frame, - FrameSlot* out_slots) { - MockCGTarget* m = (MockCGTarget*)t; - mock_func_begin(t, d); - ++m->known_frame_calls; - if (frame) m->last_known_frame = *frame; - for (u32 i = 0; frame && i < frame->nslots; ++i) { - if (out_slots) out_slots[i] = (FrameSlot)(i + 1u); - } -} - -static u32 mock_call_stack_size(CGTarget* t, const CGCallDesc* d) { - MockCGTarget* m = (MockCGTarget*)t; - (void)d; - return m->planned_stack_arg ? 8u : 0u; -} - -static void mock_get_allocable_regs(CGTarget* t, RegClass cls, const Reg** out, - u32* nregs) { - MockCGTarget* m = (MockCGTarget*)t; - *out = m->pool[cls]; - *nregs = m->pool_n[cls]; -} - -static void mock_get_phys_regs(CGTarget* t, RegClass cls, - const CGPhysRegInfo** out, u32* nregs) { - MockCGTarget* m = (MockCGTarget*)t; - *out = m->phys[cls]; - *nregs = m->phys_n[cls]; -} - -static void mock_get_scratch_regs(CGTarget* t, RegClass cls, const Reg** out, - u32* nregs) { - MockCGTarget* m = (MockCGTarget*)t; - *out = m->scratch[cls]; - *nregs = m->scratch_n[cls]; -} - -static int mock_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) { - MockCGTarget* m = (MockCGTarget*)t; - if (cls >= OPT_REG_CLASSES || reg >= 32) return 0; - return (m->caller_saved_mask[cls] & (1u << reg)) != 0; -} - -static u32 mock_call_clobber_mask(CGTarget* t, const CGCallDesc* d, - RegClass cls) { - MockCGTarget* m = (MockCGTarget*)t; - (void)d; - return cls < OPT_REG_CLASSES ? m->call_clobber_mask[cls] : 0; -} - -static u32 mock_callee_save_mask(CGTarget* t, RegClass cls) { - MockCGTarget* m = (MockCGTarget*)t; - return cls < OPT_REG_CLASSES ? m->callee_saved_mask[cls] : 0; -} - -static u32 mock_return_reg_mask(CGTarget* t, const ABIFuncInfo* abi, - RegClass cls) { - (void)t; - (void)abi; - (void)cls; - return 0; -} - -static void mock_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) { - MockCGTarget* m = (MockCGTarget*)t; - memset(out, 0, sizeof *out); - out->callee = d->callee; - out->flags = d->flags; - for (u32 c = 0; c < OPT_REG_CLASSES; ++c) - out->clobber_mask[c] = m->call_clobber_mask[c]; - u32 nargs = m->planned_nargs ? m->planned_nargs : d->nargs; - if (nargs) out->args = arena_zarray(t->c->tu, CGCallPlanMove, nargs); - for (u32 i = 0; i < nargs && i < d->nargs; ++i) { - CGCallPlanMove* pm = &out->args[out->nargs++]; - pm->src = d->args[i].storage; - pm->dst_kind = m->planned_stack_arg ? CG_CALL_PLAN_STACK : CG_CALL_PLAN_REG; - pm->cls = RC_INT; - pm->dst_reg = - m->planned_arg_regs[i] ? m->planned_arg_regs[i] : (Reg)(i + 1u); - pm->stack_offset = i * 8u; - pm->mem.type = d->args[i].type; - pm->mem.size = 8; - pm->mem.align = 8; - } - if (m->planned_nrets) { - out->rets = arena_zarray(t->c->tu, CGCallPlanRet, m->planned_nrets); - for (u32 i = 0; i < m->planned_nrets; ++i) { - CGCallPlanRet* pr = &out->rets[out->nrets++]; - pr->dst = d->ret.storage; - pr->cls = RC_INT; - pr->src_reg = m->planned_ret_regs[i]; - pr->mem.type = d->ret.type; - pr->mem.size = 8; - pr->mem.align = 8; - } - } - ++m->plan_call_count; -} - -static void mock_call(CGTarget* t, const CGCallDesc* d) { - MockCGTarget* m = (MockCGTarget*)t; - (void)d; - ++m->call_calls; -} - -static void mock_emit_call_plan(CGTarget* t, const CGCallPlan* p) { - MockCGTarget* m = (MockCGTarget*)t; - ++m->emit_call_plan_calls; - if (m->event_count < (int)sizeof m->events) m->events[m->event_count++] = 'p'; - m->last_plan_callee = p->callee; -} - -static void mock_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, - u32 n) { - MockCGTarget* m = (MockCGTarget*)t; - if (cls < OPT_REG_CLASSES) m->reserve_calls[cls] += (int)n; - (void)regs; -} - -static void mock_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, - u32 n) { - MockCGTarget* m = (MockCGTarget*)t; - if (cls < OPT_REG_CLASSES) { - m->plan_calls[cls]++; - m->plan_regs[cls] += (int)n; - } - (void)regs; -} - -static int mock_resolve_reg_name(CGTarget* t, Sym name, Reg* out, - RegClass* cls_out) { - Slice sl = pool_slice(t->c->global, name); - const char* s = sl.s; - size_t len = sl.len; - if (!s || len < 2) return 1; - if ((s[0] != 'r' && s[0] != 'x' && s[0] != 'v') || s[1] < '0' || s[1] > '9') - return 1; - u32 n = 0; - for (size_t i = 1; i < len; ++i) { - if (s[i] < '0' || s[i] > '9') return 1; - n = n * 10u + (u32)(s[i] - '0'); - } - if (n >= 32u) return 1; - if (out) *out = (Reg)n; - if (cls_out) *cls_out = (s[0] == 'v') ? RC_FP : RC_INT; - return 0; -} - -static void mock_load_imm(CGTarget* t, Operand dst, i64 imm) { - (void)imm; - MockCGTarget* m = (MockCGTarget*)t; - ++m->load_imm_calls; - m->last_load_imm_dst = dst.v.reg; -} - -static void mock_load_const(CGTarget* t, Operand dst, ConstBytes cb) { - MockCGTarget* m = (MockCGTarget*)t; - (void)dst; - ++m->load_const_calls; - m->last_const_size = cb.size; - if (cb.size <= sizeof m->last_const_bytes && cb.bytes) - memcpy(m->last_const_bytes, cb.bytes, cb.size); -} - -static void mock_copy(CGTarget* t, Operand dst, Operand src) { - MockCGTarget* m = (MockCGTarget*)t; - if (m->event_count < (int)sizeof m->events) m->events[m->event_count++] = 'c'; - if (m->copy_calls < (int)(sizeof m->copy_dst / sizeof m->copy_dst[0])) { - m->copy_dst[m->copy_calls] = dst; - m->copy_src[m->copy_calls] = src; - } - ++m->copy_calls; -} - -static void mock_load(CGTarget* t, Operand dst, Operand addr, MemAccess macc) { - MockCGTarget* m = (MockCGTarget*)t; - (void)dst; - (void)addr; - (void)macc; - ++m->load_calls; -} - -static void mock_store(CGTarget* t, Operand addr, Operand src, MemAccess macc) { - MockCGTarget* m = (MockCGTarget*)t; - (void)addr; - (void)src; - (void)macc; - ++m->store_calls; -} - -static void mock_store_call_arg(CGTarget* t, const CGCallPlanMove* move) { - MockCGTarget* m = (MockCGTarget*)t; - if (m->event_count < (int)sizeof m->events) m->events[m->event_count++] = 's'; - ++m->store_call_arg_calls; - m->last_stack_arg = *move; -} - -static void mock_load_call_arg(CGTarget* t, Operand dst, - const CGCallPlanMove* move) { - MockCGTarget* m = (MockCGTarget*)t; - (void)dst; - ++m->load_call_arg_calls; - m->last_load_arg = *move; - if (move->src_kind == CG_CALL_PLAN_SRC_ADDR) - ++m->addr_of_calls; - else - ++m->load_calls; -} - -static void mock_store_call_ret(CGTarget* t, const CGCallPlanRet* ret, - Operand src) { - MockCGTarget* m = (MockCGTarget*)t; - (void)src; - ++m->store_call_ret_calls; - m->last_store_ret = *ret; -} - -static FrameSlot mock_frame_slot(CGTarget* t, const FrameSlotDesc* d); - -static CGLocalStorage mock_param(CGTarget* t, const CGParamDesc* p) { - MockCGTarget* m = (MockCGTarget*)t; - CGLocalStorage st = p->storage; - ++m->param_calls; - if (st.kind == CG_LOCAL_STORAGE_FRAME && st.v.frame_slot == FRAME_SLOT_NONE) { - FrameSlotDesc fsd = {0}; - fsd.type = p->type; - fsd.name = p->name; - fsd.loc = p->loc; - fsd.size = p->size; - fsd.align = p->align; - fsd.kind = FS_PARAM; - if (p->flags & CG_LOCAL_ADDR_TAKEN) fsd.flags |= FSF_ADDR_TAKEN; - st.v.frame_slot = mock_frame_slot(t, &fsd); - } - m->last_param_storage = st; - return st; -} - -static void mock_addr_of(CGTarget* t, Operand dst, Operand lv) { - MockCGTarget* m = (MockCGTarget*)t; - (void)dst; - (void)lv; - ++m->addr_of_calls; -} - -static void mock_ret(CGTarget* t, const CGABIValue* v) { - (void)t; - (void)v; -} - -static Label mock_label_new(CGTarget* t) { - (void)t; - static Label next = 1; - return next++; -} -static void mock_label_place(CGTarget* t, Label l) { - (void)t; - (void)l; -} -static void mock_jump(CGTarget* t, Label l) { - (void)t; - (void)l; -} - -static void mock_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, - Label l) { - MockCGTarget* m = (MockCGTarget*)t; - (void)op; - (void)a; - (void)b; - (void)l; - ++m->cmp_branch_calls; -} - -static FrameSlot mock_frame_slot(CGTarget* t, const FrameSlotDesc* d) { - (void)t; - (void)d; - static FrameSlot next = 1; - return next++; -} - -static void mock_set_loc(CGTarget* t, SrcLoc loc) { - (void)t; - (void)loc; -} - -static void mock_alloca(CGTarget* t, Operand dst, Operand size, u32 align) { - MockCGTarget* m = (MockCGTarget*)t; - (void)dst; - (void)size; - (void)align; - ++m->alloca_calls; -} - -static void mock_init(MockCGTarget* m, Compiler* c) { - memset(m, 0, sizeof *m); - m->base.c = c; - m->base.func_begin = mock_func_begin; - m->base.func_begin_known_frame = mock_func_begin_known_frame; - m->base.func_end = mock_func_end; - m->base.frame_slot = mock_frame_slot; - m->base.label_new = mock_label_new; - m->base.label_place = mock_label_place; - m->base.jump = mock_jump; - m->base.cmp_branch = mock_cmp_branch; - m->base.load_imm = mock_load_imm; - m->base.load_const = mock_load_const; - m->base.copy = mock_copy; - m->base.load = mock_load; - m->base.store = mock_store; - m->base.load_call_arg = mock_load_call_arg; - m->base.store_call_arg = mock_store_call_arg; - m->base.store_call_ret = mock_store_call_ret; - m->base.param = mock_param; - m->base.addr_of = mock_addr_of; - m->base.ret = mock_ret; - m->base.alloca_ = mock_alloca; - m->base.set_loc = mock_set_loc; - m->base.get_allocable_regs = mock_get_allocable_regs; - m->base.get_phys_regs = mock_get_phys_regs; - m->base.get_scratch_regs = mock_get_scratch_regs; - m->base.is_caller_saved = mock_is_caller_saved; - m->base.call_clobber_mask = mock_call_clobber_mask; - m->base.call_stack_size = mock_call_stack_size; - m->base.return_reg_mask = mock_return_reg_mask; - m->base.callee_save_mask = mock_callee_save_mask; - m->base.call = mock_call; - m->base.plan_call = mock_plan_call; - m->base.emit_call_plan = mock_emit_call_plan; - m->base.plan_hard_regs = mock_plan_hard_regs; - m->base.reserve_hard_regs = mock_reserve_hard_regs; - m->base.resolve_reg_name = mock_resolve_reg_name; -} - -static void mock_set_pool(MockCGTarget* m, RegClass cls, const Reg* pool, - u32 npool, const Reg* scratch_, u32 nscratch, - u32 caller_mask) { - m->pool[cls] = pool; - m->pool_n[cls] = npool; - m->scratch[cls] = scratch_; - m->scratch_n[cls] = nscratch; - m->caller_saved_mask[cls] = caller_mask; - m->call_clobber_mask[cls] = caller_mask; -} - -static void mock_set_phys(MockCGTarget* m, RegClass cls, - const CGPhysRegInfo* phys, u32 nphys) { - m->phys[cls] = phys; - m->phys_n[cls] = nphys; - m->caller_saved_mask[cls] = 0; - m->callee_saved_mask[cls] = 0; - for (u32 i = 0; i < nphys; ++i) { - if (phys[i].reg >= 32u) continue; - if (phys[i].flags & CG_REG_CALLER_SAVED) - m->caller_saved_mask[cls] |= 1u << phys[i].reg; - if (phys[i].flags & CG_REG_CALLEE_SAVED) - m->callee_saved_mask[cls] |= 1u << phys[i].reg; - } -} - -typedef struct RealArchExpect { - const char* name; - CfreeArchKind arch; - Reg int_arg[8]; - Reg fp_arg[8]; - Reg int_ret[2]; - Reg fp_ret[2]; - Reg sret_reg; - u32 n_int_arg_regs; - int variadic_fp_counted; - int variadic_fp_uses_int_reg; -} RealArchExpect; - -static const RealArchExpect g_real_arch[] = { - {"x64", - CFREE_ARCH_X86_64, - {X64_RDI, X64_RSI, X64_RDX, X64_RCX, X64_R8, X64_R9, 0, 0}, - {X64_XMM0, X64_XMM1, X64_XMM2, X64_XMM3, X64_XMM4, X64_XMM5, X64_XMM6, - X64_XMM7}, - {X64_RAX, X64_RDX}, - {X64_XMM0, X64_XMM1}, - X64_RDI, - 6, - 1, - 0}, - {"aa64", - CFREE_ARCH_ARM_64, - {0, 1, 2, 3, 4, 5, 6, 7}, - {0, 1, 2, 3, 4, 5, 6, 7}, - {0, 1}, - {0, 1}, - 8, - 8, - 0, - 0}, - {"rv64", - CFREE_ARCH_RV64, - {RV_A0, RV_A1, RV_A2, RV_A3, RV_A4, RV_A5, RV_A6, RV_A7}, - {10, 11, 12, 13, 14, 15, 16, 17}, - {RV_A0, RV_A1}, - {10, 11}, - RV_A0, - 8, - 0, - 1}, -}; - -static CfreeCgTypeId record_of_i64s(TestCtx* tc, const char* name, u64 count) { - CfreeCgTypeId arr = cfree_cg_type_array(tc->c, tc->i64, count); - CfreeCgField field; - memset(&field, 0, sizeof field); - field.name = cfree_sym_intern(tc->c, CFREE_SLICE_LIT("a")); - field.type = arr; - return cfree_cg_type_record( - tc->c, cfree_sym_intern(tc->c, cfree_slice_cstr(name)), &field, 1); -} - -static CfreeCgTypeId make_func_type(TestCtx* tc, CfreeCgTypeId ret, - const CfreeCgTypeId* params, u32 nparams, - int variadic) { - CfreeCgFuncParam ps[12]; - CfreeCgFuncSig sig; - memset(ps, 0, sizeof ps); - memset(&sig, 0, sizeof sig); - for (u32 i = 0; i < nparams; ++i) ps[i].type = params[i]; - sig.ret = ret; - sig.params = ps; - sig.nparams = nparams; - sig.call_conv = CFREE_CG_CC_TARGET_C; - sig.abi_variadic = variadic; - return cfree_cg_type_func(tc->c, sig); -} - -static CGABIValue call_arg_value(CfreeCgTypeId ty, const ABIArgInfo* abi, - Operand storage) { - CGABIValue v; - memset(&v, 0, sizeof v); - v.type = ty; - v.abi = abi; - v.storage = storage; - v.size = 0; - return v; -} - -static CGCallDesc make_call_desc(TestCtx* tc, CfreeCgTypeId fn_ty, - const CGABIValue* args, u32 nargs, - CGABIValue ret) { - CGCallDesc d; - memset(&d, 0, sizeof d); - d.fn_type = fn_ty; - d.abi = abi_cg_func_info(tc->c->abi, fn_ty); - d.callee = op_reg_(30, tc->i64); - d.args = args; - d.nargs = nargs; - d.ret = ret; - return d; -} - -static Inst* emit_call_one_arg(TestCtx* tc, Func* f, u32 b, Operand arg, - CfreeCgTypeId arg_ty) { - Inst* in = ir_emit(f, b, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - CfreeCgTypeId params[1] = {arg_ty}; - CfreeCgTypeId fn_ty = make_func_type(tc, CFREE_CG_TYPE_NONE, params, 1, 0); - const ABIFuncInfo* abi = abi_cg_func_info(tc->c->abi, fn_ty); - CGABIValue* args = arena_zarray(f->arena, CGABIValue, 1); - args[0] = call_arg_value(arg_ty, abi ? &abi->params[0] : NULL, arg); - aux->desc = make_call_desc(tc, fn_ty, args, 1, (CGABIValue){0}); - aux->desc.callee = op_global_(OBJ_SYM_NONE, 0, fn_ty); - in->extra.aux = aux; - return in; -} - -static Func* new_named_func(TestCtx* tc, ObjSymId sym, CfreeCgTypeId ret, - const CfreeCgTypeId* params, u32 nparams, - int variadic) { - CGFuncDesc fd; - memset(&fd, 0, sizeof fd); - fd.sym = sym; - fd.fn_type = make_func_type(tc, ret, params, nparams, variadic); - fd.abi = abi_cg_func_info(tc->c->abi, fd.fn_type); - Func* f = ir_func_new(tc->c, &fd); - f->entry = ir_block_new(f); - ir_note_emit(f, f->entry); - return f; -} - -static PReg add_preg(Func* f, CfreeCgTypeId ty) { - return ir_alloc_preg(f, ty, RC_INT); -} - -static void add_reg_param(Func* f, PReg r, CfreeCgTypeId ty) { - CGParamDesc d; - memset(&d, 0, sizeof d); - d.index = f->nparams; - d.type = ty; - d.size = 4; - d.align = 4; - d.storage.kind = CG_LOCAL_STORAGE_REG; - d.storage.v.reg = r; - ir_param_add(f, &d); - - Inst* in = ir_emit(f, f->entry, IR_PARAM_DECL); - in->def = r; - in->type = ty; -} - -static FrameSlot add_frame_param(Func* f, CfreeCgTypeId ty) { - FrameSlot fs = add_frame_slot(f, ty, FS_PARAM, 4, 0); - CGParamDesc d; - memset(&d, 0, sizeof d); - d.index = f->nparams; - d.type = ty; - d.size = 4; - d.align = 4; - d.storage.kind = CG_LOCAL_STORAGE_FRAME; - d.storage.v.frame_slot = fs; - ir_param_add(f, &d); - return fs; -} - -static Inst* emit_preg_load_imm(Func* f, u32 b, PReg dst, CfreeCgTypeId ty, - i64 imm) { - Inst* in = ir_emit(f, b, IR_LOAD_IMM); - in->opnds = arena_array(f->arena, Operand, 1); - in->opnds[0] = op_reg_(dst, ty); - in->nopnds = 1; - in->def = dst; - in->type = ty; - in->extra.imm = imm; - return in; -} - -static Inst* emit_preg_load_local(Func* f, u32 b, PReg dst, FrameSlot fs, - CfreeCgTypeId ty, u16 flags) { - Inst* in = ir_emit(f, b, IR_LOAD); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_local_(fs, ty); - in->nopnds = 2; - in->def = dst; - in->type = ty; - in->extra.mem = mem_local_(fs, ty, 4, flags); - return in; -} - -static Inst* emit_preg_binop(Func* f, u32 b, PReg dst, PReg a, PReg c, - CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_BINOP); - in->opnds = arena_array(f->arena, Operand, 3); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = op_reg_(a, ty); - in->opnds[2] = op_reg_(c, ty); - in->nopnds = 3; - in->def = dst; - in->type = ty; - in->extra.imm = BO_IADD; - return in; -} - -static void emit_preg_ret(Func* f, u32 b, PReg v, CfreeCgTypeId ty) { - emit_ret_val(f, b, v, ty); -} - -static Inst* emit_direct_call(TestCtx* tc, Func* f, u32 b, ObjSymId callee_sym, - CfreeCgTypeId fn_ty, const Operand* args, - u32 nargs, Operand ret) { - Inst* in = ir_emit(f, b, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - CGABIValue* av = NULL; - if (nargs) { - av = arena_zarray(f->arena, CGABIValue, nargs); - const ABIFuncInfo* abi = abi_cg_func_info(tc->c->abi, fn_ty); - for (u32 i = 0; i < nargs; ++i) - av[i] = - call_arg_value(args[i].type, abi ? &abi->params[i] : NULL, args[i]); - } - CGABIValue rv; - memset(&rv, 0, sizeof rv); - if (ret.kind) rv = call_arg_value(ret.type, NULL, ret); - aux->desc = make_call_desc(tc, fn_ty, av, nargs, rv); - aux->desc.callee = op_global_(callee_sym, 0, fn_ty); - in->extra.aux = aux; - return in; -} - -static void expect_plan_arg(const CGCallPlan* p, u32 i, u8 dst_kind, u8 cls, - Reg reg, u32 stack_offset, const char* ctx) { - EXPECT(i < p->nargs, "%s missing arg %u", ctx, (unsigned)i); - if (i >= p->nargs) return; - EXPECT(p->args[i].dst_kind == dst_kind, "%s arg %u dst kind got %u want %u", - ctx, (unsigned)i, (unsigned)p->args[i].dst_kind, (unsigned)dst_kind); - EXPECT(p->args[i].cls == cls, "%s arg %u class got %u want %u", ctx, - (unsigned)i, (unsigned)p->args[i].cls, (unsigned)cls); - if (dst_kind == CG_CALL_PLAN_REG) { - EXPECT(p->args[i].dst_reg == reg, "%s arg %u reg got %u want %u", ctx, - (unsigned)i, (unsigned)p->args[i].dst_reg, (unsigned)reg); - } else if (dst_kind == CG_CALL_PLAN_STACK) { - EXPECT(p->args[i].stack_offset == stack_offset, - "%s arg %u stack offset got %u want %u", ctx, (unsigned)i, - (unsigned)p->args[i].stack_offset, (unsigned)stack_offset); - } -} - -static void expect_plan_ret(const CGCallPlan* p, u32 i, u8 cls, Reg reg, - const char* ctx) { - EXPECT(i < p->nrets, "%s missing ret %u", ctx, (unsigned)i); - if (i >= p->nrets) return; - EXPECT(p->rets[i].cls == cls, "%s ret %u class got %u want %u", ctx, - (unsigned)i, (unsigned)p->rets[i].cls, (unsigned)cls); - EXPECT(p->rets[i].src_reg == reg, "%s ret %u reg got %u want %u", ctx, - (unsigned)i, (unsigned)p->rets[i].src_reg, (unsigned)reg); -} - -/* ============================================================ - * Pass-shape tests — build IR via the public IR API, run one - * pass at a time, assert on IR structure. Backend policy is - * injected through MockCGTarget + opt_machinize. - * ============================================================ */ - -static void opt_machinize_uses_phys_reg_metadata(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg legacy_pool[] = {7}; - static const Reg scratch[] = {9, 10}; - static const CGPhysRegInfo phys[] = { - {13, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_ARG, 0, - 1}, - {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED | CG_REG_RET, - 50, 4}, - {29, RC_INT, 0xff, CG_REG_RESERVED, 0, 0}, - }; - mock_set_pool(&mock, RC_INT, legacy_pool, 1, scratch, 2, 0); - mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - EXPECT(f->opt_hard_reg_count[RC_INT] == 2, - "phys metadata should replace legacy allocable pool"); - EXPECT(f->opt_hard_regs[RC_INT][0] == 13 && f->opt_hard_regs[RC_INT][1] == 19, - "phys allocable order should be preserved"); - EXPECT((f->opt_caller_saved[RC_INT] & (1u << 13)) != 0, - "caller-saved phys flag should be recorded"); - EXPECT((f->opt_callee_saved[RC_INT] & (1u << 19)) != 0, - "callee-saved phys flag should be recorded"); - EXPECT((f->opt_reserved_regs[RC_INT] & (1u << 29)) != 0, - "reserved phys flag should be recorded"); - EXPECT((f->opt_arg_regs[RC_INT] & (1u << 13)) != 0, - "arg phys flag should be recorded"); - EXPECT((f->opt_ret_regs[RC_INT] & (1u << 19)) != 0, - "ret phys flag should be recorded"); - tc_fini(&tc); -} - -static void opt_machinize_keeps_abi_regs_without_legacy_call_fallback(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg scratch[] = {9, 10}; - static const CGPhysRegInfo phys[] = { - {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_ARG, 0, 0}, - {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_RET, 0, 0}, - {12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, - {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, - }; - mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0); - mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]); - mock.planned_stack_arg = 1; - - Func* f = new_func(&tc); - Val arg = add_val(f, tc.i64); - emit_load_imm(f, f->entry, arg, tc.i64, 7); - Inst* in = emit_call_void(f, f->entry); - IRCallAux* aux = (IRCallAux*)in->extra.aux; - CGABIValue* args = arena_zarray(f->arena, CGABIValue, 1); - args[0].type = tc.i64; - args[0].storage = op_reg_(arg, tc.i64); - aux->desc.callee = op_reg_(arg, tc.i64); - aux->desc.args = args; - aux->desc.nargs = 1; - - opt_machinize(f, &mock.base); - - EXPECT(mock.plan_call_count == 1, "call should be planned before filtering"); - EXPECT(aux->use_plan_replay, "stack-arg call should use planned replay"); - EXPECT(f->opt_hard_reg_count[RC_INT] == 4, - "planned stack calls should keep ABI arg/ret regs allocable"); - EXPECT(f->opt_hard_regs[RC_INT][0] == 2 && f->opt_hard_regs[RC_INT][1] == 3 && - f->opt_hard_regs[RC_INT][2] == 12 && - f->opt_hard_regs[RC_INT][3] == 19, - "all non-reserved regs should remain allocable under planned calls"); - EXPECT((f->opt_arg_regs[RC_INT] & (1u << 2)) != 0, - "arg metadata should still be recorded"); - EXPECT((f->opt_ret_regs[RC_INT] & (1u << 3)) != 0, - "ret metadata should still be recorded"); - - tc_fini(&tc); -} - -static void opt_machinize_keeps_abi_regs_for_incoming_params(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg scratch[] = {9, 10}; - static const CGPhysRegInfo phys[] = { - {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_ARG, 0, 0}, - {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_RET, 0, 0}, - {12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, - {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, - }; - mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0); - mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]); - - Func* f = new_func(&tc); - f->nparams = 1; - opt_machinize(f, &mock.base); - EXPECT(f->opt_hard_reg_count[RC_INT] == 4, - "incoming params should not suppress ABI arg/ret regs"); - EXPECT(f->opt_hard_regs[RC_INT][0] == 2 && f->opt_hard_regs[RC_INT][1] == 3 && - f->opt_hard_regs[RC_INT][2] == 12 && - f->opt_hard_regs[RC_INT][3] == 19, - "param functions should keep all non-reserved regs allocable"); - tc_fini(&tc); -} - -static void real_arch_call_plan_layout_one(const RealArchExpect* ex) { - TestCtx tc; - tc_init_target(&tc, ex->arch, CFREE_OS_LINUX); - CGTarget* target = cgtarget_new(tc.c, NULL, NULL); - CfreeCgTypeId large = record_of_i64s(&tc, "Large", 3); - - { - CfreeCgTypeId params[2] = {tc.i64, tc.i64}; - CfreeCgTypeId fn = make_func_type(&tc, tc.i64, params, 2, 0); - const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn); - CGABIValue args[2]; - args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(1, tc.i64)); - args[1] = call_arg_value(tc.i64, &abi->params[1], op_reg_(2, tc.i64)); - CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(3, tc.i64)); - CGCallDesc d = make_call_desc(&tc, fn, args, 2, ret); - CGCallPlan p; - target->plan_call(target, &d, &p); - EXPECT(p.nargs == 2, "%s scalar nargs got %u", ex->name, (unsigned)p.nargs); - expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[0], 0, - ex->name); - expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0, - ex->name); - expect_plan_ret(&p, 0, RC_INT, ex->int_ret[0], ex->name); - EXPECT((p.return_mask[RC_INT] & (1u << ex->int_ret[0])) != 0, - "%s scalar return mask should include first int return reg", - ex->name); - } - - { - CfreeCgTypeId params[2] = {tc.f64, tc.f64}; - CfreeCgTypeId fn = make_func_type(&tc, tc.f64, params, 2, 0); - const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn); - CGABIValue args[2]; - args[0] = - call_arg_value(tc.f64, &abi->params[0], op_reg_cls_(4, tc.f64, RC_FP)); - args[1] = - call_arg_value(tc.f64, &abi->params[1], op_reg_cls_(5, tc.f64, RC_FP)); - CGABIValue ret = - call_arg_value(tc.f64, &abi->ret, op_reg_cls_(6, tc.f64, RC_FP)); - CGCallDesc d = make_call_desc(&tc, fn, args, 2, ret); - CGCallPlan p; - target->plan_call(target, &d, &p); - EXPECT(p.nargs == 2, "%s fp nargs got %u", ex->name, (unsigned)p.nargs); - expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0, ex->name); - expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[1], 0, ex->name); - expect_plan_ret(&p, 0, RC_FP, ex->fp_ret[0], ex->name); - } - - { - CfreeCgTypeId params[4] = {tc.i64, tc.f64, tc.i64, tc.f64}; - CfreeCgTypeId fn = make_func_type(&tc, tc.i64, params, 4, 0); - const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn); - CGABIValue args[4]; - args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(7, tc.i64)); - args[1] = - call_arg_value(tc.f64, &abi->params[1], op_reg_cls_(8, tc.f64, RC_FP)); - args[2] = call_arg_value(tc.i64, &abi->params[2], op_reg_(9, tc.i64)); - args[3] = - call_arg_value(tc.f64, &abi->params[3], op_reg_cls_(10, tc.f64, RC_FP)); - CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(11, tc.i64)); - CGCallDesc d = make_call_desc(&tc, fn, args, 4, ret); - CGCallPlan p; - target->plan_call(target, &d, &p); - expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[0], 0, - ex->name); - expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0, ex->name); - expect_plan_arg(&p, 2, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0, - ex->name); - expect_plan_arg(&p, 3, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[1], 0, ex->name); - } - - { - CfreeCgTypeId params[1] = {tc.i64}; - CfreeCgTypeId fn = make_func_type(&tc, large, params, 1, 0); - const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn); - CGABIValue args[1]; - args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(12, tc.i64)); - CGABIValue ret = call_arg_value(large, &abi->ret, op_local_(1, large)); - CGCallDesc d = make_call_desc(&tc, fn, args, 1, ret); - CGCallPlan p; - target->plan_call(target, &d, &p); - EXPECT(p.has_sret, "%s sret plan should be marked", ex->name); - expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->sret_reg, 0, ex->name); - EXPECT(p.args[0].src_kind == CG_CALL_PLAN_SRC_ADDR, - "%s sret hidden pointer should materialize an address", ex->name); - expect_plan_arg( - &p, 1, CG_CALL_PLAN_REG, RC_INT, - ex->arch == CFREE_ARCH_ARM_64 ? ex->int_arg[0] : ex->int_arg[1], 0, - ex->name); - } - - { - CfreeCgTypeId fixed[1] = {tc.i64}; - CfreeCgTypeId fn = make_func_type(&tc, tc.i64, fixed, 1, 1); - const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn); - CGABIValue args[3]; - args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(13, tc.i64)); - args[1] = call_arg_value(tc.f64, NULL, op_reg_cls_(14, tc.f64, RC_FP)); - args[2] = call_arg_value(tc.i64, NULL, op_reg_(15, tc.i64)); - CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(16, tc.i64)); - CGCallDesc d = make_call_desc(&tc, fn, args, 3, ret); - CGCallPlan p; - target->plan_call(target, &d, &p); - EXPECT(p.is_variadic, "%s variadic plan should be marked", ex->name); - expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[0], 0, - ex->name); - if (ex->variadic_fp_uses_int_reg) { - expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0, - ex->name); - expect_plan_arg(&p, 2, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[2], 0, - ex->name); - } else { - expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0, - ex->name); - expect_plan_arg(&p, 2, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0, - ex->name); - } - if (ex->variadic_fp_counted) - EXPECT(p.variadic_fp_count == 1, "%s variadic FP count got %u want 1", - ex->name, (unsigned)p.variadic_fp_count); - } - - { - CfreeCgTypeId params[9]; - CGABIValue args[9]; - for (u32 i = 0; i < 9; ++i) params[i] = tc.i64; - CfreeCgTypeId fn = make_func_type(&tc, tc.i64, params, 9, 0); - const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn); - for (u32 i = 0; i < 9; ++i) - args[i] = call_arg_value(tc.i64, &abi->params[i], - op_reg_((Reg)(20 + i), tc.i64)); - CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(29, tc.i64)); - CGCallDesc d = make_call_desc(&tc, fn, args, 9, ret); - CGCallPlan p; - target->plan_call(target, &d, &p); - EXPECT(p.stack_arg_size != 0, "%s stack-arg size should be nonzero", - ex->name); - expect_plan_arg(&p, ex->n_int_arg_regs, CG_CALL_PLAN_STACK, RC_INT, 0, 0, - ex->name); - } - - if (ex->arch == CFREE_ARCH_ARM_64) { - CfreeCgTypeId params[10]; - CGABIValue args[10]; - for (u32 i = 0; i < 10; ++i) params[i] = tc.f128; - CfreeCgTypeId fn = make_func_type(&tc, tc.f128, params, 10, 0); - const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn); - for (u32 i = 0; i < 10; ++i) - args[i] = call_arg_value(tc.f128, &abi->params[i], - op_local_((FrameSlot)(20 + i), tc.f128)); - CGABIValue ret = call_arg_value(tc.f128, &abi->ret, op_local_(30, tc.f128)); - CGCallDesc d = make_call_desc(&tc, fn, args, 10, ret); - CGCallPlan p; - target->plan_call(target, &d, &p); - EXPECT(p.stack_arg_size == 32, "aa64 f128 stack-arg size got %u want 32", - (unsigned)p.stack_arg_size); - expect_plan_arg(&p, 8, CG_CALL_PLAN_STACK, RC_FP, 0, 0, "aa64 f128"); - expect_plan_arg(&p, 9, CG_CALL_PLAN_STACK, RC_FP, 0, 16, "aa64 f128"); - } - - tc_fini(&tc); -} - -static void real_arch_call_plan_layouts(void) { - for (u32 i = 0; i < sizeof g_real_arch / sizeof g_real_arch[0]; ++i) - real_arch_call_plan_layout_one(&g_real_arch[i]); -} - -static void opt_regalloc_prefers_caller_saved_for_non_call_value(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg scratch[] = {9, 10}; - static const CGPhysRegInfo phys[] = { - {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_ARG, 0, 0}, - {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, - }; - mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0); - mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]); - - Func* f = new_func(&tc); - Val v = add_val(f, tc.i32); - emit_load_imm(f, f->entry, v, tc.i32, 42); - emit_ret_val(f, f->entry, v, tc.i32); - opt_machinize(f, &mock.base); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - - EXPECT(f->preg_info[v].alloc_kind == OPT_ALLOC_HARD, - "single live value should allocate a hard register"); - EXPECT(f->preg_info[v].hard_reg == 2, - "non-call-crossing value should prefer caller-saved r2, got r%u", - (unsigned)f->preg_info[v].hard_reg); - tc_fini(&tc); -} - -static void opt_call_plan_drives_call_specific_preservation(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {13}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 1u << 13); - mock.call_clobber_mask[RC_INT] = 0; - - Func* f = new_func(&tc); - Val live = add_val(f, tc.i32); - emit_load_imm(f, f->entry, live, tc.i32, 11); - emit_call_void(f, f->entry); - emit_ret_val(f, f->entry, live, tc.i32); - opt_machinize(f, &mock.base); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - - EXPECT(mock.plan_call_count == 1, "opt_machinize should request call plan"); - Block* b = &f->blocks[f->entry]; - int saw_call_save_restore = 0; - for (u32 i = 1; i + 1 < b->ninsts; ++i) { - if ((IROp)b->insts[i].op == IR_CALL && - (IROp)b->insts[i - 1u].op == IR_STORE && - (IROp)b->insts[i + 1u].op == IR_LOAD) { - saw_call_save_restore = 1; - } - } - EXPECT(!saw_call_save_restore, - "call-specific non-clobbering plan should suppress save/restore"); - tc_fini(&tc); -} - -static void opt_liveness_branch(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 b1 = ir_block_new(f); - u32 b2 = ir_block_new(f); - ir_note_emit(f, b1); - ir_note_emit(f, b2); - Val v = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - Val t = add_val(f, tc.i32); - Val u = add_val(f, tc.i32); - emit_load_imm(f, b0, v, tc.i32, 7); - emit_load_imm(f, b0, c, tc.i32, 1); - Inst* br = ir_emit(f, b0, IR_CMP_BRANCH); - br->opnds = arena_array(f->arena, Operand, 2); - br->opnds[0] = op_reg_(c, tc.i32); - br->opnds[1] = op_imm_(0, tc.i32); - br->nopnds = 2; - br->extra.imm = CMP_NE; - f->blocks[b0].succ[0] = b1; - f->blocks[b0].succ[1] = b2; - f->blocks[b0].nsucc = 2; - emit_copy(f, b1, t, v, tc.i32); - ir_emit(f, b1, IR_RET); - emit_copy(f, b2, u, v, tc.i32); - ir_emit(f, b2, IR_RET); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - opt_live_blocks(f, &live); - - EXPECT(opt_bitset_has(&live.blocks[b0].live_out, v), - "v%u live_out of branch block", v); - EXPECT(opt_bitset_has(&live.blocks[b1].live_in, v), "v%u live_in true block", - v); - EXPECT(opt_bitset_has(&live.blocks[b2].live_in, v), "v%u live_in false block", - v); - EXPECT(!opt_bitset_has(&live.blocks[b1].live_out, v), - "v%u dies in true block", v); - tc_fini(&tc); -} - -static void opt_block_liveness_phase1(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - Val across_branch = add_val(f, tc.i32); - Val across_call = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - - emit_load_imm(f, entry, across_branch, tc.i32, 7); - emit_load_imm(f, entry, across_call, tc.i32, 11); - emit_br_to(f, entry, header); - emit_test_branch(f, header, body, exit, tc.i32); - emit_call_void(f, body); - emit_binop(f, body, out, across_branch, across_call, tc.i32); - emit_br_to(f, body, header); - emit_ret_val(f, exit, across_call, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - - OptLiveInfo live; - opt_live_blocks(f, &live); - EXPECT(opt_bitset_has(&live.blocks[entry].live_out, across_branch), - "branch value should be live-out from entry"); - EXPECT(opt_bitset_has(&live.blocks[header].live_in, across_branch), - "branch value should be live-in to loop header"); - EXPECT(opt_bitset_has(&live.blocks[body].live_in, across_branch), - "branch value should be live-in to loop body"); - EXPECT(opt_bitset_has(&live.blocks[body].live_in, across_call), - "call-crossing value should be live-in to call block"); - EXPECT(opt_bitset_has(&live.blocks[body].live_out, across_call), - "call-crossing value should remain live-out on loop backedge"); - EXPECT(!opt_bitset_has(&live.blocks[exit].live_out, across_call), - "returned value should die at function exit"); - EXPECT(live.block_bytes != 0, "block-liveness byte metric should be set"); - EXPECT(live.set_bit_scans != 0, - "block-liveness set-bit scan metric should be set"); - - CfreeWriter* w = NULL; - (void)cfree_writer_mem(&g_heap, &w); - opt_live_dump_blocks(f, &live, w); - size_t len = 0; - const unsigned char* bytes = cfree_writer_mem_bytes(w, &len); - EXPECT(bytes_contains(bytes, len, "block 0\n"), - "block liveness dump should include block header"); - EXPECT(bytes_contains(bytes, len, " in:"), - "block liveness dump should include live-in set"); - cfree_writer_close(w); - - Func* g = new_func(&tc); - Val a = add_val(g, tc.i32); - Val dead = add_val(g, tc.i32); - emit_load_imm(g, g->entry, a, tc.i32, 1); - emit_copy(g, g->entry, dead, a, tc.i32); - emit_ret_val(g, g->entry, a, tc.i32); - opt_build_cfg(g); - opt_build_loop_tree(g); - OptLiveInfo g_live; - opt_live_blocks(g, &g_live); - opt_dead_def_elim_with_live(g, &g_live); - EXPECT(count_op(g, IR_COPY) == 0, - "DDE should consume pass-local block liveness"); - tc_fini(&tc); -} - -static void opt_liveness_grows_high_preg_bitsets(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 use_block = ir_block_new(f); - ir_note_emit(f, use_block); - - Val high = 193; - ir_ensure_val(f, high, tc.i32, RC_INT); - emit_load_imm(f, entry, high, tc.i32, 9); - emit_br_to(f, entry, use_block); - emit_ret_val(f, use_block, high, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - opt_live_blocks(f, &live); - - EXPECT(opt_bitset_has(&live.blocks[entry].live_out, high), - "high preg should be live-out from defining block"); - EXPECT(opt_bitset_has(&live.blocks[use_block].live_in, high), - "high preg should be live-in to use block"); - EXPECT(live.blocks[entry].live_out.active_words >= high / 64u + 1u, - "live-out bitset should grow to high preg"); - EXPECT(live.blocks[use_block].live_out.active_words == 0, - "live-out bitset should trim after high preg dies"); - tc_fini(&tc); -} - -static void opt_live_ranges_phase2(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - Val loop_v = add_val(f, tc.i32); - Val call_live = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - - emit_load_imm(f, entry, loop_v, tc.i32, 1); - emit_load_imm(f, entry, call_live, tc.i32, 2); - emit_br_to(f, entry, header); - emit_test_branch(f, header, body, exit, tc.i32); - emit_call_void(f, body); - emit_binop(f, body, out, loop_v, call_live, tc.i32); - emit_br_to(f, body, header); - emit_ret_val(f, exit, call_live, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - opt_live_blocks(f, &live); - OptLiveRangeSet ranges; - opt_live_ranges_build(f, &live, &ranges); - - EXPECT(ranges.first_range_by_preg[loop_v] != OPT_RANGE_NONE, - "loop value should have a live range"); - EXPECT(ranges.first_range_by_preg[call_live] != OPT_RANGE_NONE, - "call-live value should have a live range"); - EXPECT(live_range_count_for(&ranges, loop_v) >= 2, - "loop-carried value should have ranges in multiple blocks"); - EXPECT(ranges.live_length_by_preg[loop_v] != 0, - "loop value should record live length"); - EXPECT(ranges.spill_cost_by_preg[loop_v] != 0, - "loop value should record spill cost"); - EXPECT(ranges.live_across_call_freq_by_preg[call_live] != 0, - "call-live value should record live-across-call frequency"); - EXPECT(ranges.point_count != 0, "range builder should compress points"); - EXPECT(ranges.point_count <= ranges.raw_point_count, - "compressed point count should not exceed raw point count"); - EXPECT(ranges.whole_block_spans != 0, - "range builder should record whole-block spans"); - EXPECT(ranges.max_ranges_per_preg >= live_range_count_for(&ranges, loop_v), - "range metrics should record max ranges per value"); - - CfreeWriter* w = NULL; - (void)cfree_writer_mem(&g_heap, &w); - opt_live_dump_ranges(f, &ranges, w); - size_t len = 0; - const unsigned char* bytes = cfree_writer_mem_bytes(w, &len); - EXPECT(bytes_contains(bytes, len, "ranges total="), - "range dump should include summary"); - EXPECT(bytes_contains(bytes, len, "r"), - "range dump should include preg rows"); - cfree_writer_close(w); - tc_fini(&tc); -} - -static void opt_range_liveness_linear(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b = f->entry; - Val a = add_val(f, tc.i32); - Val bv = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - - emit_load_imm(f, b, a, tc.i32, 1); - u32 ia = f->blocks[b].ninsts - 1u; - emit_load_imm(f, b, bv, tc.i32, 2); - u32 ib = f->blocks[b].ninsts - 1u; - emit_binop(f, b, c, a, bv, tc.i32); - u32 ic = f->blocks[b].ninsts - 1u; - emit_ret_val(f, b, c, tc.i32); - u32 ir = f->blocks[b].ninsts - 1u; - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - OptLiveRangeSet ranges; - opt_live_blocks(f, &live); - opt_live_ranges_build(f, &live, &ranges); - - (void)ia; - (void)ib; - (void)ic; - (void)ir; - EXPECT(ranges.first_range_by_preg[a] != OPT_RANGE_NONE, - "a should get a live range"); - EXPECT(ranges.first_range_by_preg[bv] != OPT_RANGE_NONE, - "b should get a live range"); - EXPECT(ranges.first_range_by_preg[c] != OPT_RANGE_NONE, - "c should get a live range"); - EXPECT(ranges_overlap(&ranges, a, bv), "a and b ranges should overlap"); - EXPECT(ranges_overlap(&ranges, a, c), - "same-instruction def/use should overlap a and c"); - EXPECT(ranges_overlap(&ranges, bv, c), - "same-instruction def/use should overlap b and c"); - tc_fini(&tc); -} - -static void opt_interference_branch_disjoint(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - u32 entry = f->entry; - u32 then_b = ir_block_new(f); - u32 else_b = ir_block_new(f); - ir_note_emit(f, then_b); - ir_note_emit(f, else_b); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - - emit_test_branch(f, entry, then_b, else_b, tc.i32); - emit_load_imm(f, then_b, a, tc.i32, 11); - emit_ret_val(f, then_b, a, tc.i32); - emit_load_imm(f, else_b, b, tc.i32, 22); - emit_ret_val(f, else_b, b, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - OptLiveRangeSet ranges; - opt_live_blocks(f, &live); - opt_live_ranges_build(f, &live, &ranges); - - EXPECT(!ranges_overlap(&ranges, a, b), - "branch-local values v%u and v%u should not overlap", a, b); - - opt_regalloc(f, 0); - EXPECT(f->preg_info[a].alloc_kind == OPT_ALLOC_HARD, - "then value should get the one hard register"); - EXPECT(f->preg_info[b].alloc_kind == OPT_ALLOC_HARD, - "else value should share the one hard register"); - EXPECT(f->preg_info[a].hard_reg == f->preg_info[b].hard_reg, - "disjoint branch values should share a hard register"); - tc_fini(&tc); -} - -static void opt_range_overlap_def_live_out(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b = f->entry; - Val a = add_val(f, tc.i32); - Val bv = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - - emit_load_imm(f, b, a, tc.i32, 1); - emit_load_imm(f, b, bv, tc.i32, 2); - emit_binop(f, b, c, a, bv, tc.i32); - emit_ret_val(f, b, c, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - OptLiveRangeSet ranges; - opt_live_blocks(f, &live); - opt_live_ranges_build(f, &live, &ranges); - - EXPECT(ranges_overlap(&ranges, a, bv), "a and b should overlap"); - EXPECT(ranges_overlap(&ranges, bv, a), "range overlap should be symmetric"); - EXPECT(ranges_overlap(&ranges, a, c), - "a should overlap c for same-instruction def/use lowering"); - EXPECT(ranges_overlap(&ranges, bv, c), - "b should overlap c for same-instruction def/use lowering"); - tc_fini(&tc); -} - -static void opt_loop_frequency_weights_ranges(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - Val loop_v = add_val(f, tc.i32); - Val exit_v = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - - emit_load_imm(f, entry, loop_v, tc.i32, 1); - emit_br_to(f, entry, header); - emit_test_branch(f, header, body, exit, tc.i32); - emit_binop(f, body, out, loop_v, loop_v, tc.i32); - emit_br_to(f, body, header); - emit_load_imm(f, exit, exit_v, tc.i32, 2); - emit_ret_val(f, exit, exit_v, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - OptLiveRangeSet ranges; - opt_live_blocks(f, &live); - opt_live_ranges_build(f, &live, &ranges); - - EXPECT(ranges.use_freq_by_preg[loop_v] > ranges.use_freq_by_preg[exit_v], - "loop-used value should have higher weighted use frequency"); - EXPECT(ranges.spill_cost_by_preg[loop_v] > ranges.spill_cost_by_preg[exit_v], - "loop-used value should have higher spill cost"); - tc_fini(&tc); -} - -static void opt_live_across_call_frequency(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b = f->entry; - Val live_val = add_val(f, tc.i32); - Val dead = add_val(f, tc.i32); - - emit_load_imm(f, b, live_val, tc.i32, 11); - emit_load_imm(f, b, dead, tc.i32, 12); - emit_call_void(f, b); - emit_ret_val(f, b, live_val, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live_info; - OptLiveRangeSet ranges; - opt_live_blocks(f, &live_info); - opt_live_ranges_build(f, &live_info, &ranges); - - EXPECT(ranges.live_across_call_freq_by_preg[live_val] > 0, - "live value should be marked live across call"); - EXPECT(ranges.live_across_call_freq_by_preg[dead] == 0, - "dead value should not be marked live across call"); - tc_fini(&tc); -} - -static void opt_range_overlap_class(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b = f->entry; - Val i0 = add_val(f, tc.i32); - Val i1 = add_val(f, tc.i32); - Val fp = add_val_cls(f, tc.i64, RC_FP); - - emit_load_imm(f, b, i0, tc.i32, 1); - emit_load_imm(f, b, i1, tc.i32, 2); - emit_load_imm(f, b, fp, tc.i64, 3); - emit_binop(f, b, i1, i0, i1, tc.i32); - emit_ret_val(f, b, i1, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - OptLiveRangeSet ranges; - opt_live_blocks(f, &live); - opt_live_ranges_build(f, &live, &ranges); - - EXPECT(ranges_overlap(&ranges, i0, i1), "int values should overlap"); - EXPECT(ranges_overlap(&ranges, i1, i0), "overlap should be symmetric"); - EXPECT(f->val_cls[i0] != f->val_cls[fp], - "range allocator keeps register classes out of overlap policy"); - tc_fini(&tc); -} - -static void opt_cfg_prunes_unreachable(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 b1 = ir_block_new(f); - u32 dead = ir_block_new(f); - ir_note_emit(f, b1); - ir_note_emit(f, dead); - - Val live_val = add_val(f, tc.i32); - Val dead_val = add_val(f, tc.i32); - Inst* br = ir_emit(f, b0, IR_BR); - (void)br; - f->blocks[b0].succ[0] = b1; - f->blocks[b0].nsucc = 1; - emit_load_imm(f, b1, live_val, tc.i32, 7); - emit_ret_val(f, b1, live_val, tc.i32); - emit_load_imm(f, dead, dead_val, tc.i32, 99); - emit_ret_val(f, dead, dead_val, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - OptLiveRangeSet ranges; - opt_live_blocks(f, &live); - opt_live_ranges_build(f, &live, &ranges); - - EXPECT(f->blocks[dead].ninsts == 0, - "unreachable block should have no instructions after cfg cleanup"); - EXPECT(f->blocks[dead].nsucc == 0, - "unreachable block should have no successors after cfg cleanup"); - EXPECT(f->emit_order_n == 2, - "emit_order should skip unreachable block, got %u", - (unsigned)f->emit_order_n); - EXPECT(ranges.first_range_by_preg[dead_val] == OPT_RANGE_NONE, - "unreachable value should not get a live range"); - tc_fini(&tc); -} - -static void opt_cfg_preserves_scope_edges(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 b_then = ir_block_new(f); - u32 b_else = ir_block_new(f); - u32 b_join = ir_block_new(f); - ir_note_emit(f, b_then); - ir_note_emit(f, b_else); - ir_note_emit(f, b_join); - - Val v = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - Val tv = add_val(f, tc.i32); - Val ev = add_val(f, tc.i32); - emit_load_imm(f, b0, v, tc.i32, 7); - emit_load_imm(f, b0, c, tc.i32, 1); - - Inst* begin = ir_emit(f, b0, IR_SCOPE_BEGIN); - IRScopeAux* begin_aux = arena_znew(f->arena, IRScopeAux); - begin_aux->desc.kind = SCOPE_IF; - begin_aux->desc.cond = op_reg_(c, tc.i32); - begin_aux->scope_id = 1; - begin_aux->if_then_block = b_then; - begin_aux->if_else_block = b_else; - begin_aux->if_end_block = b_join; - begin->extra.aux = begin_aux; - f->blocks[b0].succ[0] = b_then; - f->blocks[b0].succ[1] = b_else; - f->blocks[b0].nsucc = 2; - - emit_copy(f, b_then, tv, v, tc.i32); - Inst* scope_else = ir_emit(f, b_then, IR_SCOPE_ELSE); - scope_else->extra.imm = 1; - f->blocks[b_then].succ[0] = b_join; - f->blocks[b_then].nsucc = 1; - - emit_copy(f, b_else, ev, v, tc.i32); - Inst* scope_end = ir_emit(f, b_else, IR_SCOPE_END); - scope_end->extra.imm = 1; - f->blocks[b_else].succ[0] = b_join; - f->blocks[b_else].nsucc = 1; - emit_ret_val(f, b_join, v, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - OptLiveInfo live; - opt_live_blocks(f, &live); - - EXPECT(f->blocks[b0].nsucc == 2, - "scope_begin IF should preserve two CFG successors, got %u", - (unsigned)f->blocks[b0].nsucc); - EXPECT(f->blocks[b_join].npreds == 2, - "scope join should have then/else predecessors, got %u", - (unsigned)f->blocks[b_join].npreds); - EXPECT(opt_bitset_has(&live.blocks[b_then].live_in, v), - "scope then block should remain reachable and see v%u live-in", v); - EXPECT(opt_bitset_has(&live.blocks[b_else].live_in, v), - "scope else block should remain reachable and see v%u live-in", v); - tc_fini(&tc); -} - -static int block_list_has(const OptBlockList* list, u32 block) { - for (u32 i = 0; i < list->n; ++i) - if (list->items[i] == block) return 1; - return 0; -} - -static void opt_analysis_dominators_and_frontier(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 then_b = ir_block_new(f); - u32 else_b = ir_block_new(f); - u32 join = ir_block_new(f); - u32 dead = ir_block_new(f); - ir_note_emit(f, then_b); - ir_note_emit(f, else_b); - ir_note_emit(f, join); - ir_note_emit(f, dead); - - Val v = add_val(f, tc.i32); - emit_test_branch(f, entry, then_b, else_b, tc.i32); - emit_br_to(f, then_b, join); - emit_br_to(f, else_b, join); - emit_load_imm(f, join, v, tc.i32, 17); - emit_ret_val(f, join, v, tc.i32); - emit_ret_val(f, dead, v, tc.i32); - - opt_build_cfg(f); - EXPECT(opt_analysis_has(f, OPT_ANALYSIS_CFG), - "build_cfg should mark CFG analysis valid"); - opt_verify(f, "analysis-test"); - OptAnalysis a; - memset(&a, 0, sizeof a); - opt_analysis_build_dom_frontier(f, &a); - EXPECT(opt_analysis_has(f, OPT_ANALYSIS_DOM), - "dominator build should mark dominators valid"); - - EXPECT(a.npo == 4, "analysis should visit four reachable blocks, got %u", - (unsigned)a.npo); - EXPECT(a.reachable[entry] && a.reachable[then_b] && a.reachable[else_b] && - a.reachable[join], - "analysis should mark diamond reachable"); - EXPECT(!a.reachable[dead], "analysis should leave pruned block unreachable"); - EXPECT(a.idom[entry] == entry, "entry should dominate itself"); - EXPECT(a.idom[then_b] == entry, "then idom should be entry"); - EXPECT(a.idom[else_b] == entry, "else idom should be entry"); - EXPECT(a.idom[join] == entry, "join idom should be entry"); - EXPECT(opt_analysis_dominates(&a, entry, join), "entry should dominate join"); - EXPECT(!opt_analysis_dominates(&a, then_b, else_b), - "then should not dominate else"); - EXPECT(block_list_has(&a.dom_frontier[then_b], join), - "then dominance frontier should contain join"); - EXPECT(block_list_has(&a.dom_frontier[else_b], join), - "else dominance frontier should contain join"); - tc_fini(&tc); -} - -static void opt_ssa_diamond_mem2reg_phi(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, 0); - u32 entry = f->entry; - u32 then_b = ir_block_new(f); - u32 else_b = ir_block_new(f); - u32 join = ir_block_new(f); - ir_note_emit(f, then_b); - ir_note_emit(f, else_b); - ir_note_emit(f, join); - - emit_test_branch(f, entry, then_b, else_b, tc.i32); - Val tv = add_val(f, tc.i32); - Val ev = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_load_imm(f, then_b, tv, tc.i32, 11); - emit_store_local(f, then_b, fs, tv, tc.i32, 0); - emit_br_to(f, then_b, join); - emit_load_imm(f, else_b, ev, tc.i32, 22); - emit_store_local(f, else_b, fs, ev, tc.i32, 0); - emit_br_to(f, else_b, join); - emit_load_local(f, join, out, fs, tc.i32, 0); - emit_ret_val(f, join, out, tc.i32); - - opt_build_cfg(f); - opt_build_ssa(f); - opt_verify(f, "test-ssa-diamond"); - EXPECT(count_op(f, IR_PHI) == 1, "diamond should insert one phi"); - EXPECT(count_op(f, IR_STORE) == 0, "promoted stores should be removed"); - EXPECT(count_op(f, IR_LOAD) == 0, "promoted load should be removed"); - Inst* phi = &f->blocks[join].insts[0]; - IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; - EXPECT((IROp)phi->op == IR_PHI && phi->def != VAL_NONE, - "phi should have a real def"); - EXPECT(aux && aux->slot_id == (u32)fs, "phi should carry slot id in aux"); - EXPECT(aux && aux->npreds == f->blocks[join].npreds, - "phi predecessor count should match block preds"); - EXPECT(aux && aux->pred_vals[0] == tv && aux->pred_vals[1] == ev, - "phi inputs should be renamed to branch-local defs"); - Inst* ret = &f->blocks[join].insts[f->blocks[join].ninsts - 1u]; - IRRetAux* ret_aux = (IRRetAux*)ret->extra.aux; - EXPECT(ret_aux && ret_aux->val.storage.v.reg == (Reg)phi->def, - "promoted load use should be rewritten to phi def"); - EXPECT(count_uses_of(f, phi->def) >= 1, - "def-use chains should include phi def users"); - - CfreeWriter* w = NULL; - (void)cfree_writer_mem(&g_heap, &w); - opt_ssa_dump(f, w); - size_t len = 0; - const unsigned char* bytes = cfree_writer_mem_bytes(w, &len); - EXPECT(bytes_contains(bytes, len, " phi slot="), - "SSA dump should include phi records"); - EXPECT(bytes_contains(bytes, len, "preds=b"), - "SSA dump should include phi predecessor inputs"); - EXPECT(bytes_contains(bytes, len, "uses=v"), - "SSA dump should include rewritten operand uses"); - cfree_writer_close(w); - - opt_make_conventional_ssa(f); - opt_verify(f, "test-ssa-diamond-conventional"); - opt_undo_ssa(f); - opt_verify(f, "test-ssa-diamond-undo"); - EXPECT(count_op(f, IR_PHI) == 0, "undo_ssa should remove phis"); - EXPECT(count_op(f, IR_COPY) >= 2, "phi lowering should insert edge copies"); - tc_fini(&tc); -} - -static void opt_ssa_loop_carried_phi(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, 0); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - - Val zero = add_val(f, tc.i32); - Val cur = add_val(f, tc.i32); - Val one = add_val(f, tc.i32); - Val next = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_load_imm(f, entry, zero, tc.i32, 0); - emit_store_local(f, entry, fs, zero, tc.i32, 0); - emit_br_to(f, entry, header); - emit_load_local(f, header, cur, fs, tc.i32, 0); - emit_test_branch(f, header, body, exit, tc.i32); - emit_load_imm(f, body, one, tc.i32, 1); - emit_binop(f, body, next, cur, one, tc.i32); - emit_store_local(f, body, fs, next, tc.i32, 0); - emit_br_to(f, body, header); - emit_load_local(f, exit, out, fs, tc.i32, 0); - emit_ret_val(f, exit, out, tc.i32); - - opt_build_cfg(f); - opt_build_ssa(f); - opt_verify(f, "test-ssa-loop"); - EXPECT(count_op(f, IR_PHI) == 1, "loop should insert one carried phi"); - EXPECT((IROp)f->blocks[header].insts[0].op == IR_PHI, - "loop-carried phi should be in the header"); - Inst* phi = &f->blocks[header].insts[0]; - IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; - EXPECT(aux && aux->npreds == 2 && aux->pred_vals[0] == zero && - aux->pred_vals[1] == next, - "loop phi should carry entry and backedge defs"); - EXPECT(count_op(f, IR_STORE) == 0, "loop promoted stores should be removed"); - EXPECT(count_op(f, IR_LOAD) == 0, "loop promoted loads should be removed"); - opt_make_conventional_ssa(f); - opt_undo_ssa(f); - opt_verify(f, "test-ssa-loop-undo"); - tc_fini(&tc); -} - -static void opt_ssa_non_promotable_slots_stay_memory(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val v = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_load_imm(f, f->entry, v, tc.i32, 7); - emit_store_local(f, f->entry, fs, v, tc.i32, 0); - emit_load_local(f, f->entry, out, fs, tc.i32, 0); - emit_ret_val(f, f->entry, out, tc.i32); - opt_build_cfg(f); - opt_build_ssa(f); - opt_verify(f, "test-ssa-addr-taken"); - EXPECT(count_op(f, IR_STORE) == 1 && count_op(f, IR_LOAD) == 1, - "address-taken slot should remain in memory"); - - Func* g = new_func(&tc); - fs = add_frame_slot(g, tc.i32, FS_LOCAL, 4, FSF_VOLATILE); - v = add_val(g, tc.i32); - out = add_val(g, tc.i32); - emit_load_imm(g, g->entry, v, tc.i32, 9); - emit_store_local(g, g->entry, fs, v, tc.i32, MF_VOLATILE); - emit_load_local(g, g->entry, out, fs, tc.i32, MF_VOLATILE); - emit_ret_val(g, g->entry, out, tc.i32); - opt_build_cfg(g); - opt_build_ssa(g); - opt_verify(g, "test-ssa-volatile"); - EXPECT(count_op(g, IR_STORE) == 1 && count_op(g, IR_LOAD) == 1, - "volatile slot should remain in memory"); - tc_fini(&tc); -} - -static void opt_ssa_conventional_splits_critical_edge(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, 0); - u32 entry = f->entry; - u32 other = ir_block_new(f); - u32 join = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, other); - ir_note_emit(f, join); - ir_note_emit(f, exit); - - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_load_imm(f, entry, a, tc.i32, 1); - emit_store_local(f, entry, fs, a, tc.i32, 0); - emit_test_branch(f, entry, join, other, tc.i32); - emit_load_imm(f, other, b, tc.i32, 2); - emit_store_local(f, other, fs, b, tc.i32, 0); - emit_br_to(f, other, join); - emit_load_local(f, join, out, fs, tc.i32, 0); - emit_ret_val(f, join, out, tc.i32); - ir_emit(f, exit, IR_RET); - - opt_build_cfg(f); - u32 before_blocks = f->nblocks; - opt_build_ssa(f); - EXPECT(count_op(f, IR_PHI) == 1, "critical-edge test should build a phi"); - opt_make_conventional_ssa(f); - opt_verify(f, "test-ssa-critical-edge"); - EXPECT(f->nblocks > before_blocks, - "conventional SSA should split the critical edge"); - opt_undo_ssa(f); - opt_verify(f, "test-ssa-critical-edge-undo"); - EXPECT(count_op(f, IR_PHI) == 0, "critical-edge undo should remove phi"); - tc_fini(&tc); -} - -#ifndef NDEBUG -static void stale_verify_arg(void* arg) { - opt_verify((Func*)arg, "stale-def-use-test"); -} -#endif - -static void opt_verify_catches_stale_def_use(void) { -#ifdef NDEBUG - /* opt_verify is a no-op under NDEBUG, so it cannot catch this. */ - return; -#else - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_load_imm(f, f->entry, b, tc.i32, 2); - emit_ret_val(f, f->entry, a, tc.i32); - opt_build_cfg(f); - opt_rebuild_def_use(f); - Inst* ret = &f->blocks[f->entry].insts[f->blocks[f->entry].ninsts - 1u]; - IRRetAux* aux = (IRRetAux*)ret->extra.aux; - aux->val.storage.v.reg = (Reg)b; - EXPECT(expect_panic(tc.c, stale_verify_arg, f), - "verifier should catch stale cached def-use after mutation"); - tc_fini(&tc); -#endif -} - -static void opt_ssa_dce_removes_dead_defs_and_phi(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, 0); - u32 entry = f->entry; - u32 then_b = ir_block_new(f); - u32 else_b = ir_block_new(f); - u32 join = ir_block_new(f); - ir_note_emit(f, then_b); - ir_note_emit(f, else_b); - ir_note_emit(f, join); - - Val keep = add_val(f, tc.i32); - Val dead = add_val(f, tc.i32); - Val tv = add_val(f, tc.i32); - Val ev = add_val(f, tc.i32); - emit_load_imm(f, entry, keep, tc.i32, 42); - emit_load_imm(f, entry, dead, tc.i32, 99); - emit_test_branch(f, entry, then_b, else_b, tc.i32); - emit_load_imm(f, then_b, tv, tc.i32, 11); - emit_store_local(f, then_b, fs, tv, tc.i32, 0); - emit_br_to(f, then_b, join); - emit_load_imm(f, else_b, ev, tc.i32, 22); - emit_store_local(f, else_b, fs, ev, tc.i32, 0); - emit_br_to(f, else_b, join); - emit_ret_val(f, join, keep, tc.i32); - - opt_build_cfg(f); - opt_build_ssa(f); - EXPECT(count_op(f, IR_PHI) == 1, "test should build an unused phi"); - opt_ssa_dce(f); - opt_verify(f, "test-ssa-dce"); - EXPECT(count_op(f, IR_PHI) == 0, "SSA DCE should remove unused phi"); - EXPECT(count_uses_of(f, dead) == 0, "dead value should have no users"); - EXPECT(count_op(f, IR_LOAD_IMM) == 1, - "SSA DCE should remove dead pure load_imm defs"); - tc_fini(&tc); -} - -static void opt_copy_cleanup_rewrites_users(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 42); - emit_copy(f, f->entry, b, a, tc.i32); - emit_copy(f, f->entry, c, b, tc.i32); - emit_ret_val(f, f->entry, c, tc.i32); - opt_build_cfg(f); - opt_rebuild_def_use(f); - opt_copy_cleanup(f); - opt_verify(f, "test-copy-cleanup"); - Inst* ret = &f->blocks[f->entry].insts[f->blocks[f->entry].ninsts - 1u]; - IRRetAux* aux = (IRRetAux*)ret->extra.aux; - EXPECT(count_op(f, IR_COPY) == 0, "copy cleanup should remove copy chain"); - EXPECT(aux && aux->val.storage.v.reg == (Reg)a, - "copy cleanup should rewrite final user to original value"); - tc_fini(&tc); -} - -static void opt_copy_prop_rewrites_ssa_copy_chain(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 42); - emit_copy(f, f->entry, b, a, tc.i32); - emit_copy(f, f->entry, c, b, tc.i32); - emit_ret_val(f, f->entry, c, tc.i32); - - opt_build_cfg(f); - opt_copy_prop(f); - opt_verify(f, "test-copy-prop-chain"); - EXPECT(count_op(f, IR_COPY) == 0, - "copy propagation should remove SSA copy chains"); - EXPECT(ret_val(f, f->entry) == a, - "copy propagation should rewrite users to the original value"); - tc_fini(&tc); -} - -static void opt_copy_prop_collapses_redundant_extension_chain(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val x = add_val(f, tc.i8); - Val w16 = add_val(f, tc.i16); - Val w32 = add_val(f, tc.i32); - Val w64 = add_val(f, tc.i64); - emit_scalar_input(f, f->entry, x, tc.i8); - emit_convert_typed(f, f->entry, w16, x, tc.i16, tc.i8, CV_ZEXT); - emit_convert_typed(f, f->entry, w32, w16, tc.i32, tc.i16, CV_ZEXT); - emit_convert_typed(f, f->entry, w64, w32, tc.i64, tc.i32, CV_ZEXT); - emit_ret_val(f, f->entry, w64, tc.i64); - - opt_build_cfg(f); - opt_copy_prop(f); - opt_verify(f, "test-copy-prop-ext-chain"); - - Inst* wide = def_inst(f, w64); - EXPECT(wide && (IROp)wide->op == IR_CONVERT && wide->nopnds == 2, - "extension cleanup should preserve the final widening conversion"); - EXPECT( - wide && wide->opnds[1].kind == OPK_REG && wide->opnds[1].v.reg == (Reg)x, - "extension cleanup should bypass redundant intermediate extensions"); - expect_ir_dump_eq(f, - "ir blocks=1 vals=5\n" - "block 0 preds=[] succs=[] insts=5\n" - " 0 param_decl def=v1\n" - " 1 convert def=v2 opnds=[v2,v1]\n" - " 2 convert def=v3 opnds=[v3,v1]\n" - " 3 convert def=v4 opnds=[v4,v1]\n" - " 4 ret ret=v4\n", - "copy prop extension chain"); - tc_fini(&tc); -} - -static void opt_block_cloning_clones_small_join_blocks(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 then_b = ir_block_new(f); - u32 else_b = ir_block_new(f); - u32 join = ir_block_new(f); - ir_note_emit(f, then_b); - ir_note_emit(f, else_b); - ir_note_emit(f, join); - - Val out = add_val(f, tc.i32); - emit_test_branch(f, entry, then_b, else_b, tc.i32); - emit_br_to(f, then_b, join); - emit_br_to(f, else_b, join); - emit_load_imm(f, join, out, tc.i32, 7); - emit_ret_val(f, join, out, tc.i32); - - opt_build_cfg(f); - u32 before_blocks = f->nblocks; - opt_block_cloning(f); - opt_verify(f, "test-block-clone-join"); - - EXPECT(f->nblocks > before_blocks, - "block cloning should create per-predecessor join clones"); - EXPECT(f->blocks[join].ninsts == 0, - "original fully cloned join should become unreachable"); - EXPECT(count_op(f, IR_RET) == 2, - "cloned join should leave two reachable return blocks"); - expect_ir_dump_eq(f, - "ir blocks=6 vals=4\n" - "block 0 preds=[] succs=[b1,b2] insts=1\n" - " 0 cmp_branch opnds=[imm:1,imm:0]\n" - "block 1 preds=[b0] succs=[b4] insts=1\n" - " 0 br\n" - "block 2 preds=[b0] succs=[b5] insts=1\n" - " 0 br\n" - "block 3 preds=[] succs=[] insts=0\n" - "block 4 preds=[b1] succs=[] insts=2\n" - " 0 load_imm def=v2 opnds=[v2] imm=7\n" - " 1 ret ret=v2\n" - "block 5 preds=[b2] succs=[] insts=2\n" - " 0 load_imm def=v3 opnds=[v3] imm=7\n" - " 1 ret ret=v3\n", - "block clone"); - tc_fini(&tc); -} - -static void opt_block_cloning_skips_loop_backedges(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - - Val out = add_val(f, tc.i32); - emit_br_to(f, entry, header); - emit_test_branch(f, header, body, exit, tc.i32); - emit_br_to(f, body, header); - emit_load_imm(f, exit, out, tc.i32, 3); - emit_ret_val(f, exit, out, tc.i32); - - opt_build_cfg(f); - u32 before_blocks = f->nblocks; - opt_block_cloning(f); - opt_verify(f, "test-block-clone-loop-skip"); - EXPECT(f->nblocks == before_blocks, - "block cloning should skip loop headers/backedges"); - tc_fini(&tc); -} - -static void opt_addr_xform_folds_local_addr_into_memory_operand(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - Val addr = add_val(f, ptr_ty); - Val out = add_val(f, tc.i32); - emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32); - emit_load_indirect(f, f->entry, out, addr, tc.i32, 0); - emit_ret_val(f, f->entry, out, tc.i32); - - opt_build_cfg(f); - opt_build_ssa(f); - opt_addr_xform(f); - opt_verify(f, "test-addr-xform-local"); - Inst* load = &f->blocks[f->entry].insts[1]; - EXPECT((IROp)load->op == IR_LOAD && load->opnds[1].kind == OPK_LOCAL, - "address transform should fold local addr_of into load operand"); - expect_ir_dump_eq(f, - "ir blocks=1 vals=3\n" - "block 0 preds=[] succs=[] insts=3\n" - " 0 nop\n" - " 1 load def=v2 opnds=[v2,local#1] mem=size4 align4 " - "flags=0x0 alias=unknown\n" - " 2 ret ret=v2\n", - "address transform"); - opt_ssa_dce(f); - opt_copy_cleanup(f); - opt_verify(f, "test-addr-xform-local-cleanup"); - EXPECT(count_op(f, IR_ADDR_OF) == 0, - "cleanup after addr_xform should remove dead addr_of pseudo"); - tc_fini(&tc); -} - -static void opt_addr_xform_preserves_volatile_and_globals(void) { - TestCtx tc; - tc_init(&tc); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val addr = add_val(f, ptr_ty); - Val out = add_val(f, tc.i32); - emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32); - emit_load_indirect(f, f->entry, out, addr, tc.i32, MF_VOLATILE); - emit_ret_val(f, f->entry, out, tc.i32); - opt_build_cfg(f); - opt_build_ssa(f); - opt_addr_xform(f); - opt_verify(f, "test-addr-xform-volatile"); - EXPECT(count_op(f, IR_ADDR_OF) == 1, - "address transform should preserve volatile memory addresses"); - - Func* g = new_func(&tc); - ObjSymId sym = 1; - Val gaddr = add_val(g, ptr_ty); - Val gout = add_val(g, tc.i32); - Inst* a = ir_emit(g, g->entry, IR_ADDR_OF); - a->opnds = arena_array(g->arena, Operand, 2); - a->opnds[0] = op_reg_(gaddr, ptr_ty); - a->opnds[1] = op_global_(sym, 0, tc.i32); - a->nopnds = 2; - a->def = gaddr; - a->type = ptr_ty; - g->val_def_block[gaddr] = g->entry; - g->val_def_inst[gaddr] = g->blocks[g->entry].ninsts - 1u; - emit_load_indirect(g, g->entry, gout, gaddr, tc.i32, 0); - emit_ret_val(g, g->entry, gout, tc.i32); - opt_build_cfg(g); - opt_build_ssa(g); - opt_addr_xform(g); - opt_verify(g, "test-addr-xform-global"); - EXPECT(count_op(g, IR_ADDR_OF) == 1, - "address transform should leave global address materialization alone"); - tc_fini(&tc); -} - -static void opt_ssa_combine_fuses_cmp_condbr(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 taken = ir_block_new(f); - u32 fallthrough = ir_block_new(f); - ir_note_emit(f, taken); - ir_note_emit(f, fallthrough); - - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val cmp = add_val(f, tc.i32); - emit_scalar_input(f, entry, a, tc.i32); - emit_scalar_input(f, entry, b, tc.i32); - emit_cmp(f, entry, cmp, a, b, tc.i32, CMP_LT_S); - emit_raw_condbr(f, entry, cmp, taken, fallthrough, tc.i32); - emit_ret_val(f, taken, a, tc.i32); - emit_ret_val(f, fallthrough, b, tc.i32); - - opt_build_cfg(f); - opt_ssa_combine(f); - opt_verify(f, "test-ssa-combine-cmp-branch"); - - Inst* br = &f->blocks[entry].insts[f->blocks[entry].ninsts - 1u]; - EXPECT((IROp)br->op == IR_CMP_BRANCH && br->extra.imm == CMP_LT_S, - "ssa combine should fuse cmp-fed condbr into cmp_branch"); - EXPECT(br->nopnds == 2 && br->opnds[0].v.reg == (Reg)a && - br->opnds[1].v.reg == (Reg)b, - "fused cmp_branch should use the original cmp operands"); - tc_fini(&tc); -} - -static void opt_ssa_combine_folds_partial_local_addr_uses(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - Val addr = add_val(f, ptr_ty); - Val out = add_val(f, tc.i32); - Val src = add_val(f, tc.i32); - emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32); - emit_load_indirect(f, f->entry, out, addr, tc.i32, 0); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_store_indirect(f, f->entry, addr, src, tc.i32, MF_VOLATILE); - emit_ret_val(f, f->entry, out, tc.i32); - - opt_build_cfg(f); - opt_ssa_combine(f); - opt_verify(f, "test-ssa-combine-addr"); - - Inst* load = &f->blocks[f->entry].insts[1]; - Inst* store = &f->blocks[f->entry].insts[3]; - EXPECT((IROp)load->op == IR_LOAD && load->opnds[1].kind == OPK_LOCAL, - "ssa combine should fold foldable local address load uses"); - EXPECT((IROp)store->op == IR_STORE && store->opnds[0].kind == OPK_INDIRECT, - "ssa combine should preserve non-foldable address uses"); - tc_fini(&tc); -} - -static void opt_simplify_local_rewrites_integer_identities(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val x = add_val(f, tc.i32); - Val mul = add_val(f, tc.i32); - Val rem = add_val(f, tc.i32); - Val self_xor = add_val(f, tc.i32); - Val self_or = add_val(f, tc.i32); - Val cmp = add_val(f, tc.i32); - Val cv = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, x, tc.i32); - Inst* in = emit_binop(f, f->entry, mul, x, x, tc.i32); - in->extra.imm = BO_IMUL; - in->opnds[2] = op_imm_(1, tc.i32); - in = emit_binop(f, f->entry, rem, x, x, tc.i32); - in->extra.imm = BO_SREM; - in->opnds[2] = op_imm_(1, tc.i32); - in = emit_binop(f, f->entry, self_xor, x, x, tc.i32); - in->extra.imm = BO_XOR; - in = emit_binop(f, f->entry, self_or, x, x, tc.i32); - in->extra.imm = BO_OR; - emit_cmp(f, f->entry, cmp, x, x, tc.i32, CMP_EQ); - emit_convert(f, f->entry, cv, x, tc.i32, CV_ZEXT); - emit_ret_val(f, f->entry, self_or, tc.i32); - - opt_build_cfg(f); - opt_simplify_local(f); - opt_verify(f, "test-simplify-local"); - EXPECT((IROp)def_inst(f, mul)->op == IR_COPY, "x * 1 should become a copy"); - EXPECT(val_is_load_imm(f, rem, 0), "x %% 1 should become zero"); - EXPECT(val_is_load_imm(f, self_xor, 0), "x ^ x should become zero"); - EXPECT((IROp)def_inst(f, self_or)->op == IR_COPY, - "x | x should become a copy"); - EXPECT(val_is_load_imm(f, cmp, 1), "x == x should become true"); - EXPECT((IROp)def_inst(f, cv)->op == IR_COPY, - "exact no-op convert should become a copy"); - tc_fini(&tc); -} - -static void opt_simplify_local_preserves_unsafe_cases(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val x = add_val(f, tc.i32); - Val flagged = add_val(f, tc.i32); - Val div_self = add_val(f, tc.i32); - Val fp = add_val_cls(f, tc.f64, RC_FP); - Val fp_add = add_val_cls(f, tc.f64, RC_FP); - emit_scalar_input(f, f->entry, x, tc.i32); - emit_scalar_input(f, f->entry, fp, tc.f64); - Inst* in = emit_binop(f, f->entry, flagged, x, x, tc.i32); - in->extra.imm = BO_IMUL; - in->opnds[2] = op_imm_(1, tc.i32); - in->flags = 1; - in = emit_binop(f, f->entry, div_self, x, x, tc.i32); - in->extra.imm = BO_SDIV; - in = emit_binop(f, f->entry, fp_add, fp, fp, tc.f64); - in->extra.imm = BO_FADD; - in->opnds[2].kind = OPK_IMM; - in->opnds[2].cls = RC_FP; - in->opnds[2].type = tc.f64; - in->opnds[2].v.imm = 0; - emit_ret_val(f, f->entry, div_self, tc.i32); - - opt_build_cfg(f); - opt_simplify_local(f); - opt_verify(f, "test-simplify-local-unsafe"); - EXPECT((IROp)def_inst(f, flagged)->op == IR_BINOP, - "flagged binop should not be simplified"); - EXPECT((IROp)def_inst(f, div_self)->op == IR_BINOP, - "x / x should not be simplified"); - EXPECT((IROp)def_inst(f, fp_add)->op == IR_BINOP, - "FP identity should not be simplified"); - tc_fini(&tc); -} - -static void opt_simplify_rewrites_ssa_nested_identities(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - f->opt_reg_ssa = 1; - Val x = add_val(f, tc.i32); - Val n = add_val(f, tc.i32); - Val nn = add_val(f, tc.i32); - Val z = add_val(f, tc.i32); - Val add = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, x, tc.i32); - emit_unop(f, f->entry, n, x, tc.i32, UO_BNOT); - emit_unop(f, f->entry, nn, n, tc.i32, UO_BNOT); - emit_load_imm(f, f->entry, z, tc.i32, 0); - Inst* in = emit_binop(f, f->entry, add, nn, z, tc.i32); - in->extra.imm = BO_IADD; - emit_ret_val(f, f->entry, add, tc.i32); - - opt_build_cfg(f); - opt_simplify(f); - opt_verify(f, "test-simplify-ssa-nested"); - EXPECT((IROp)def_inst(f, nn)->op == IR_COPY && - def_inst(f, nn)->opnds[1].v.reg == (Reg)x, - "double bitwise-not should become copy of original source"); - EXPECT((IROp)def_inst(f, add)->op == IR_COPY && - def_inst(f, add)->opnds[1].v.reg == (Reg)nn, - "SSA add-zero value should become a copy"); - tc_fini(&tc); -} - -static void opt_simplify_canonicalizes_add_zero_address_chain(void) { - TestCtx tc; - tc_init(&tc); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - Func* f = new_func(&tc); - f->opt_reg_ssa = 1; - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val addr = add_val(f, ptr_ty); - Val zero = add_val(f, ptr_ty); - Val addr2 = add_val(f, ptr_ty); - Val out = add_val(f, tc.i32); - emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32); - emit_load_imm(f, f->entry, zero, ptr_ty, 0); - Inst* in = emit_binop(f, f->entry, addr2, addr, zero, ptr_ty); - in->extra.imm = BO_IADD; - emit_load_indirect(f, f->entry, out, addr2, tc.i32, 0); - emit_ret_val(f, f->entry, out, tc.i32); - - opt_build_cfg(f); - opt_simplify(f); - opt_copy_cleanup(f); - opt_verify(f, "test-simplify-addr-zero"); - EXPECT(f->blocks[f->entry].insts[2].op != IR_BINOP, - "add-zero address chain should be canonicalized away"); - EXPECT(f->blocks[f->entry].insts[2].op == IR_LOAD && - f->blocks[f->entry].insts[2].opnds[1].v.ind.base == (Reg)addr, - "load should use the canonical address value"); - tc_fini(&tc); -} - -static void opt_simplify_feeds_gvn_with_canonical_shape(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - f->opt_reg_ssa = 1; - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, 0); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val zero = add_val(f, tc.i32); - Val a0 = add_val(f, tc.i32); - Val first = add_val(f, tc.i32); - Val second = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, a, tc.i32); - emit_scalar_input(f, f->entry, b, tc.i32); - emit_load_imm(f, f->entry, zero, tc.i32, 0); - Inst* in = emit_binop(f, f->entry, a0, a, zero, tc.i32); - in->extra.imm = BO_IADD; - emit_binop(f, f->entry, first, a, b, tc.i32); - emit_store_local(f, f->entry, fs, first, tc.i32, 0); - emit_binop(f, f->entry, second, a0, b, tc.i32); - emit_ret_val(f, f->entry, second, tc.i32); - - opt_build_cfg(f); - opt_simplify(f); - opt_ssa_dce(f); - opt_copy_cleanup(f); - opt_gvn(f); - opt_verify(f, "test-simplify-feeds-gvn"); - EXPECT(ret_val(f, f->entry) == first, - "simplify should expose duplicate scalar shape to GVN"); - tc_fini(&tc); -} - -static void opt_gvn_rewrites_same_block_scalar_duplicate(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val first = add_val(f, tc.i32); - Val second = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, a, tc.i32); - emit_scalar_input(f, f->entry, b, tc.i32); - emit_binop(f, f->entry, first, a, b, tc.i32); - emit_binop(f, f->entry, second, a, b, tc.i32); - emit_ret_val(f, f->entry, second, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-ssa-same-block"); - EXPECT(ret_val(f, f->entry) == first, - "GVN should rewrite duplicate same-block scalar users"); - EXPECT(count_uses_of(f, second) == 0, - "GVN should leave duplicate scalar def unused after replacement"); - tc_fini(&tc); -} - -static void opt_gvn_rewrites_dominated_scalar_duplicate(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 child = ir_block_new(f); - ir_note_emit(f, child); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val first = add_val(f, tc.i32); - Val second = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, a, tc.i32); - emit_scalar_input(f, f->entry, b, tc.i32); - emit_binop(f, f->entry, first, a, b, tc.i32); - emit_br_to(f, f->entry, child); - emit_binop(f, child, second, a, b, tc.i32); - emit_ret_val(f, child, second, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-ssa-dominated"); - EXPECT(ret_val(f, child) == first, - "GVN should rewrite scalar duplicate dominated by first def"); - tc_fini(&tc); -} - -static void opt_gvn_preserves_nondominated_scalar_duplicates(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 then_b = ir_block_new(f); - u32 else_b = ir_block_new(f); - ir_note_emit(f, then_b); - ir_note_emit(f, else_b); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val then_v = add_val(f, tc.i32); - Val else_v = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, a, tc.i32); - emit_scalar_input(f, f->entry, b, tc.i32); - emit_test_branch(f, f->entry, then_b, else_b, tc.i32); - emit_binop(f, then_b, then_v, a, b, tc.i32); - emit_ret_val(f, then_b, then_v, tc.i32); - emit_binop(f, else_b, else_v, a, b, tc.i32); - emit_ret_val(f, else_b, else_v, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-ssa-nondominated"); - EXPECT(ret_val(f, then_b) == then_v && ret_val(f, else_b) == else_v, - "GVN should not rewrite through a non-dominating sibling def"); - tc_fini(&tc); -} - -static void opt_gvn_canonicalizes_commutative_scalar_operands(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val first = add_val(f, tc.i32); - Val second = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, a, tc.i32); - emit_scalar_input(f, f->entry, b, tc.i32); - emit_binop(f, f->entry, first, a, b, tc.i32); - emit_binop(f, f->entry, second, b, a, tc.i32); - emit_ret_val(f, f->entry, second, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-ssa-commutative"); - EXPECT(ret_val(f, f->entry) == first, - "GVN should canonicalize commutative scalar operands"); - tc_fini(&tc); -} - -static void opt_gvn_folds_safe_scalar_constants(void) { - TestCtx tc; - tc_init(&tc); - - Func* add = new_func(&tc); - Val two = add_val(add, tc.i32); - Val three = add_val(add, tc.i32); - Val sum = add_val(add, tc.i32); - emit_load_imm(add, add->entry, two, tc.i32, 2); - emit_load_imm(add, add->entry, three, tc.i32, 3); - emit_binop(add, add->entry, sum, two, three, tc.i32); - emit_ret_val(add, add->entry, sum, tc.i32); - opt_build_cfg(add); - opt_gvn(add); - opt_verify(add, "test-gvn-ssa-fold-add"); - EXPECT(val_is_load_imm(add, ret_val(add, add->entry), 5), - "GVN should fold safe integer binop constants"); - - Func* logical = new_func(&tc); - Val zero = add_val(logical, tc.i32); - Val not_zero = add_val(logical, tc.i32); - emit_load_imm(logical, logical->entry, zero, tc.i32, 0); - emit_unop(logical, logical->entry, not_zero, zero, tc.i32, UO_NOT); - emit_ret_val(logical, logical->entry, not_zero, tc.i32); - opt_build_cfg(logical); - opt_gvn(logical); - opt_verify(logical, "test-gvn-ssa-fold-unop"); - EXPECT(val_is_load_imm(logical, ret_val(logical, logical->entry), 1), - "GVN should fold safe integer unop constants"); - - Func* cmp = new_func(&tc); - Val lhs = add_val(cmp, tc.i32); - Val rhs = add_val(cmp, tc.i32); - Val lt = add_val(cmp, tc.i32); - emit_load_imm(cmp, cmp->entry, lhs, tc.i32, 2); - emit_load_imm(cmp, cmp->entry, rhs, tc.i32, 3); - emit_cmp(cmp, cmp->entry, lt, lhs, rhs, tc.i32, CMP_LT_S); - emit_ret_val(cmp, cmp->entry, lt, tc.i32); - opt_build_cfg(cmp); - opt_gvn(cmp); - opt_verify(cmp, "test-gvn-ssa-fold-cmp"); - EXPECT(val_is_load_imm(cmp, ret_val(cmp, cmp->entry), 1), - "GVN should fold safe integer cmp constants"); - - Func* trunc = new_func(&tc); - Val wide = add_val(trunc, tc.i64); - Val narrow = add_val(trunc, tc.i32); - emit_load_imm(trunc, trunc->entry, wide, tc.i64, 5); - emit_convert_typed(trunc, trunc->entry, narrow, wide, tc.i32, tc.i64, - CV_TRUNC); - emit_ret_val(trunc, trunc->entry, narrow, tc.i32); - opt_build_cfg(trunc); - opt_gvn(trunc); - opt_verify(trunc, "test-gvn-ssa-fold-convert"); - EXPECT(val_is_load_imm(trunc, ret_val(trunc, trunc->entry), 5), - "GVN should fold safe integer conversion constants"); - - tc_fini(&tc); -} - -static void opt_gvn_rewrites_redundant_local_load(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val first = add_val(f, tc.i32); - Val second = add_val(f, tc.i32); - emit_load_local(f, f->entry, first, fs, tc.i32, 0); - emit_load_local(f, f->entry, second, fs, tc.i32, 0); - emit_ret_val(f, f->entry, second, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-redundant-local-load"); - EXPECT(ret_val(f, f->entry) == first, - "memory GVN should rewrite repeated local loads"); - expect_ir_dump_eq(f, - "ir blocks=1 vals=3\n" - "block 0 preds=[] succs=[] insts=3\n" - " 0 load def=v1 opnds=[v1,local#1] mem=size4 align4 " - "flags=0x0 alias=local#1\n" - " 1 load def=v2 opnds=[v2,local#1] mem=size4 align4 " - "flags=0x0 alias=local#1\n" - " 2 ret ret=v1\n", - "memory GVN repeated local load"); - tc_fini(&tc); -} - -static void opt_gvn_reuses_store_to_local_load(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val src = add_val(f, tc.i32); - Val loaded = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_store_local(f, f->entry, fs, src, tc.i32, 0); - emit_load_local(f, f->entry, loaded, fs, tc.i32, 0); - emit_ret_val(f, f->entry, loaded, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-store-load-local"); - EXPECT(ret_val(f, f->entry) == src, - "memory GVN should reuse a dominating store to the same local"); - expect_ir_dump_eq(f, - "ir blocks=1 vals=3\n" - "block 0 preds=[] succs=[] insts=4\n" - " 0 param_decl def=v1\n" - " 1 store opnds=[local#1,v1] mem=size4 align4 " - "flags=0x0 alias=local#1\n" - " 2 load def=v2 opnds=[v2,local#1] mem=size4 align4 " - "flags=0x0 alias=local#1\n" - " 3 ret ret=v1\n", - "memory GVN store load local"); - tc_fini(&tc); -} - -static void opt_gvn_reuses_store_to_addr_of_zero_index_load(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - Val src = add_val(f, tc.i32); - Val addr_store = add_val(f, ptr_ty); - Val zero_store = add_val(f, ptr_ty); - Val indexed_store = add_val(f, ptr_ty); - Val addr_load = add_val(f, ptr_ty); - Val zero_load = add_val(f, ptr_ty); - Val indexed_load = add_val(f, ptr_ty); - Val loaded = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_addr_of_local(f, f->entry, addr_store, fs, ptr_ty, tc.i32); - emit_load_imm(f, f->entry, zero_store, ptr_ty, 0); - emit_binop(f, f->entry, indexed_store, addr_store, zero_store, ptr_ty); - emit_store_indirect(f, f->entry, indexed_store, src, tc.i32, 0); - emit_addr_of_local(f, f->entry, addr_load, fs, ptr_ty, tc.i32); - emit_load_imm(f, f->entry, zero_load, ptr_ty, 0); - emit_binop(f, f->entry, indexed_load, addr_load, zero_load, ptr_ty); - emit_load_indirect(f, f->entry, loaded, indexed_load, tc.i32, 0); - emit_ret_val(f, f->entry, loaded, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-addr-of-zero-index-load"); - EXPECT(ret_val(f, f->entry) == src, - "memory GVN should see through addr_of plus zero index"); - tc_fini(&tc); -} - -static void opt_gvn_preserves_distinct_indexed_local_loads(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i64, FS_LOCAL, 32, FSF_ADDR_TAKEN); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i64, 0); - Val base = add_val(f, ptr_ty); - Val i0 = add_val(f, ptr_ty); - Val i1 = add_val(f, ptr_ty); - Val first = add_val(f, tc.i64); - Val second = add_val(f, tc.i64); - - emit_addr_of_local(f, f->entry, base, fs, ptr_ty, tc.i64); - emit_load_imm(f, f->entry, i0, ptr_ty, 0); - emit_load_imm(f, f->entry, i1, ptr_ty, 1); - emit_load_indexed_indirect(f, f->entry, first, base, i0, 3, 0, tc.i64, 0); - emit_load_indexed_indirect(f, f->entry, second, base, i1, 3, 0, tc.i64, 0); - emit_ret_val(f, f->entry, second, tc.i64); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-distinct-indexed-local-loads"); - EXPECT(ret_val(f, f->entry) == second, - "memory GVN should not merge loads with distinct index operands"); - tc_fini(&tc); -} - -static void opt_gvn_reuses_joined_same_value_store(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - u32 t = ir_block_new(f), e = ir_block_new(f), j = ir_block_new(f); - ir_note_emit(f, t); - ir_note_emit(f, e); - ir_note_emit(f, j); - Val c = add_val(f, tc.i32), src = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, c, tc.i32); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_cond_branch(f, f->entry, c, t, e, tc.i32); - emit_store_local(f, t, fs, src, tc.i32, 0); - emit_br_to(f, t, j); - emit_store_local(f, e, fs, src, tc.i32, 0); - emit_br_to(f, e, j); - emit_load_local(f, j, out, fs, tc.i32, 0); - emit_ret_val(f, j, out, tc.i32); - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-join-same-store"); - EXPECT(ret_val(f, j) == src, - "memory GVN should reuse joined stores only when all preds agree"); - tc_fini(&tc); -} - -static void opt_gvn_preserves_joined_different_or_missing_store(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - u32 t = ir_block_new(f), e = ir_block_new(f), j = ir_block_new(f); - ir_note_emit(f, t); - ir_note_emit(f, e); - ir_note_emit(f, j); - Val c = add_val(f, tc.i32), a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32), out = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, c, tc.i32); - emit_scalar_input(f, f->entry, a, tc.i32); - emit_scalar_input(f, f->entry, b, tc.i32); - emit_cond_branch(f, f->entry, c, t, e, tc.i32); - emit_store_local(f, t, fs, a, tc.i32, 0); - emit_br_to(f, t, j); - emit_store_local(f, e, fs, b, tc.i32, 0); - emit_br_to(f, e, j); - emit_load_local(f, j, out, fs, tc.i32, 0); - emit_ret_val(f, j, out, tc.i32); - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-join-different-store"); - EXPECT(ret_val(f, j) == out, - "memory GVN should preserve joined load when preds disagree"); - tc_fini(&tc); -} - -static void opt_gvn_preserves_loop_header_load(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - u32 h = ir_block_new(f), body = ir_block_new(f), ex = ir_block_new(f); - ir_note_emit(f, h); - ir_note_emit(f, body); - ir_note_emit(f, ex); - Val c = add_val(f, tc.i32), init = add_val(f, tc.i32); - Val next = add_val(f, tc.i32), cur = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, c, tc.i32); - emit_scalar_input(f, f->entry, init, tc.i32); - emit_scalar_input(f, f->entry, next, tc.i32); - emit_store_local(f, f->entry, fs, init, tc.i32, 0); - emit_br_to(f, f->entry, h); - emit_load_local(f, h, cur, fs, tc.i32, 0); - emit_cond_branch(f, h, c, body, ex, tc.i32); - emit_store_local(f, body, fs, next, tc.i32, 0); - emit_br_to(f, body, h); - emit_ret_val(f, ex, cur, tc.i32); - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-loop-header"); - EXPECT(ret_val(f, ex) == cur, - "memory GVN should not reuse preheader availability at loop headers"); - tc_fini(&tc); -} - -static void opt_gvn_preserves_load_across_unknown_store(void) { - TestCtx tc; - tc_init(&tc); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val ptr = add_val(f, ptr_ty); - Val src = add_val(f, tc.i32); - Val first = add_val(f, tc.i32); - Val second = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, ptr, ptr_ty); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_load_local(f, f->entry, first, fs, tc.i32, 0); - emit_store_indirect(f, f->entry, ptr, src, tc.i32, 0); - emit_load_local(f, f->entry, second, fs, tc.i32, 0); - emit_ret_val(f, f->entry, second, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-unknown-store-barrier"); - EXPECT(ret_val(f, f->entry) == second, - "memory GVN should not rewrite across an unknown store"); - expect_ir_dump_eq(f, - "ir blocks=1 vals=5\n" - "block 0 preds=[] succs=[] insts=6\n" - " 0 param_decl def=v1\n" - " 1 param_decl def=v2\n" - " 2 load def=v3 opnds=[v3,local#1] mem=size4 align4 " - "flags=0x0 alias=local#1\n" - " 3 store opnds=[[v1+0],v2] mem=size4 align4 flags=0x0 " - "alias=unknown\n" - " 4 load def=v4 opnds=[v4,local#1] mem=size4 align4 " - "flags=0x0 alias=local#1\n" - " 5 ret ret=v4\n", - "memory GVN unknown store barrier"); - tc_fini(&tc); -} - -static void opt_gvn_preserves_nonescaped_local_across_call(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val src = add_val(f, tc.i32); - Val loaded = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_store_local(f, f->entry, fs, src, tc.i32, 0); - emit_call_void(f, f->entry); - emit_load_local(f, f->entry, loaded, fs, tc.i32, 0); - emit_ret_val(f, f->entry, loaded, tc.i32); - - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-call-preserves-nonescaped-local"); - EXPECT(ret_val(f, f->entry) == src, - "memory GVN should preserve non-escaped locals across calls"); - tc_fini(&tc); -} - -static void opt_gvn_clobbers_escaped_local_across_call(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - Val src = add_val(f, tc.i32); - Val addr = add_val(f, ptr_ty); - Val loaded = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_store_local(f, f->entry, fs, src, tc.i32, 0); - emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32); - emit_call_one_arg(&tc, f, f->entry, op_reg_(addr, ptr_ty), ptr_ty); - emit_load_local(f, f->entry, loaded, fs, tc.i32, 0); - emit_ret_val(f, f->entry, loaded, tc.i32); - opt_build_cfg(f); - opt_gvn(f); - opt_verify(f, "test-gvn-memory-call-clobbers-escaped-local"); - EXPECT(ret_val(f, f->entry) == loaded, - "memory GVN should clobber locals whose address reaches a call"); - tc_fini(&tc); -} - -static void opt_gvn_preserves_observable_memory_loads(void) { - TestCtx tc; - tc_init(&tc); - - Func* volatile_f = new_func(&tc); - FrameSlot vfs = - add_frame_slot(volatile_f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val vfirst = add_val(volatile_f, tc.i32); - Val vsecond = add_val(volatile_f, tc.i32); - emit_load_local(volatile_f, volatile_f->entry, vfirst, vfs, tc.i32, - MF_VOLATILE); - emit_load_local(volatile_f, volatile_f->entry, vsecond, vfs, tc.i32, - MF_VOLATILE); - emit_ret_val(volatile_f, volatile_f->entry, vsecond, tc.i32); - opt_build_cfg(volatile_f); - opt_gvn(volatile_f); - opt_verify(volatile_f, "test-gvn-memory-volatile-loads"); - EXPECT(ret_val(volatile_f, volatile_f->entry) == vsecond, - "memory GVN should not rewrite volatile loads"); - expect_ir_dump_eq(volatile_f, - "ir blocks=1 vals=3\n" - "block 0 preds=[] succs=[] insts=3\n" - " 0 load def=v1 opnds=[v1,local#1] mem=size4 align4 " - "flags=0x1 alias=local#1\n" - " 1 load def=v2 opnds=[v2,local#1] mem=size4 align4 " - "flags=0x1 alias=local#1\n" - " 2 ret ret=v2\n", - "memory GVN volatile loads"); - - Func* atomic_f = new_func(&tc); - FrameSlot afs = add_frame_slot(atomic_f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val afirst = add_val(atomic_f, tc.i32); - Val asecond = add_val(atomic_f, tc.i32); - emit_atomic_load_local(atomic_f, atomic_f->entry, afirst, afs, tc.i32); - emit_atomic_load_local(atomic_f, atomic_f->entry, asecond, afs, tc.i32); - emit_ret_val(atomic_f, atomic_f->entry, asecond, tc.i32); - opt_build_cfg(atomic_f); - opt_gvn(atomic_f); - opt_verify(atomic_f, "test-gvn-memory-atomic-loads"); - EXPECT(ret_val(atomic_f, atomic_f->entry) == asecond, - "memory GVN should not rewrite atomic loads"); - - tc_fini(&tc); -} - -static void opt_dse_removes_overwritten_local_store(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val first = add_val(f, tc.i32); - Val second = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, first, tc.i32); - emit_scalar_input(f, f->entry, second, tc.i32); - emit_store_local(f, f->entry, fs, first, tc.i32, 0); - emit_store_local(f, f->entry, fs, second, tc.i32, 0); - emit_load_local(f, f->entry, out, fs, tc.i32, 0); - emit_ret_val(f, f->entry, out, tc.i32); - - opt_build_cfg(f); - opt_dse(f); - opt_verify(f, "test-dse-overwritten-local-store"); - EXPECT(count_op(f, IR_STORE) == 1, - "DSE should remove an exactly overwritten local store"); - tc_fini(&tc); -} - -static void opt_dse_removes_store_overwritten_on_all_paths(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - u32 t = ir_block_new(f), e = ir_block_new(f), j = ir_block_new(f); - ir_note_emit(f, t); - ir_note_emit(f, e); - ir_note_emit(f, j); - Val cond = add_val(f, tc.i32); - Val init = add_val(f, tc.i32); - Val tv = add_val(f, tc.i32); - Val ev = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, cond, tc.i32); - emit_scalar_input(f, f->entry, init, tc.i32); - emit_scalar_input(f, f->entry, tv, tc.i32); - emit_scalar_input(f, f->entry, ev, tc.i32); - emit_store_local(f, f->entry, fs, init, tc.i32, 0); - emit_cond_branch(f, f->entry, cond, t, e, tc.i32); - emit_store_local(f, t, fs, tv, tc.i32, 0); - emit_br_to(f, t, j); - emit_store_local(f, e, fs, ev, tc.i32, 0); - emit_br_to(f, e, j); - emit_load_local(f, j, out, fs, tc.i32, 0); - emit_ret_val(f, j, out, tc.i32); - - opt_build_cfg(f); - opt_dse(f); - opt_verify(f, "test-dse-overwritten-all-paths"); - EXPECT(count_op(f, IR_STORE) == 2, - "DSE should remove stores overwritten on every successor path"); - tc_fini(&tc); -} - -static void opt_dse_removes_unread_nonescaped_local_store(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val src = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_store_local(f, f->entry, fs, src, tc.i32, 0); - emit_ret_val(f, f->entry, src, tc.i32); - - opt_build_cfg(f); - opt_dse(f); - opt_verify(f, "test-dse-unread-nonescaped-local-store"); - EXPECT(count_op(f, IR_STORE) == 0, - "DSE should remove unread stores to non-escaped locals"); - tc_fini(&tc); -} - -static void opt_dse_preserves_escaped_local_across_call(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - Val src = add_val(f, tc.i32); - Val addr = add_val(f, ptr_ty); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_store_local(f, f->entry, fs, src, tc.i32, 0); - emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32); - emit_call_one_arg(&tc, f, f->entry, op_reg_(addr, ptr_ty), ptr_ty); - emit_ret_val(f, f->entry, src, tc.i32); - - opt_build_cfg(f); - opt_dse(f); - opt_verify(f, "test-dse-escaped-local-call"); - EXPECT(count_op(f, IR_STORE) == 1, - "DSE should preserve stores visible to a call through escaped locals"); - tc_fini(&tc); -} - -static void opt_dse_preserves_volatile_store(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val src = add_val(f, tc.i32); - emit_scalar_input(f, f->entry, src, tc.i32); - emit_store_local(f, f->entry, fs, src, tc.i32, MF_VOLATILE); - emit_ret_val(f, f->entry, src, tc.i32); - - opt_build_cfg(f); - opt_dse(f); - opt_verify(f, "test-dse-volatile-store"); - EXPECT(count_op(f, IR_STORE) == 1, "DSE should preserve volatile stores"); - tc_fini(&tc); -} - -static void opt_jump_cleanup_forwards_branch_targets(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 hop = ir_block_new(f); - u32 ret = ir_block_new(f); - ir_note_emit(f, hop); - ir_note_emit(f, ret); - - Val v = add_val(f, tc.i32); - emit_br_to(f, b0, hop); - emit_br_to(f, hop, ret); - emit_load_imm(f, ret, v, tc.i32, 7); - emit_ret_val(f, ret, v, tc.i32); - - opt_build_cfg(f); - opt_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG); - opt_build_cfg(f); - - EXPECT(f->blocks[b0].nsucc == 1 && f->blocks[b0].succ[0] == ret, - "jump cleanup should forward branch target to final block"); - EXPECT(f->blocks[hop].ninsts == 0, - "forwarded branch-only block should be pruned as unreachable"); - EXPECT(f->emit_order_n == 2, - "emit_order should drop pruned branch-only block, got %u", - (unsigned)f->emit_order_n); - tc_fini(&tc); -} - -static void opt_jump_cleanup_inverts_to_remove_jump_block(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 jump_ft = ir_block_new(f); - u32 taken = ir_block_new(f); - u32 join = ir_block_new(f); - ir_note_emit(f, jump_ft); - ir_note_emit(f, taken); - ir_note_emit(f, join); - - Val v = add_val(f, tc.i32); - emit_test_branch(f, b0, taken, jump_ft, tc.i32); - emit_br_to(f, jump_ft, join); - emit_load_imm(f, taken, v, tc.i32, 7); - emit_br_to(f, taken, join); - emit_ret_val(f, join, v, tc.i32); - - opt_build_cfg(f); - opt_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG); - opt_build_cfg(f); - - Inst* br = &f->blocks[b0].insts[f->blocks[b0].ninsts - 1u]; - EXPECT((IROp)br->op == IR_CMP_BRANCH && br->extra.imm == CMP_EQ, - "jump cleanup should invert CMP_NE to CMP_EQ"); - EXPECT(f->blocks[b0].succ[0] == join && f->blocks[b0].succ[1] == taken, - "inverted branch should target join and fall through to taken"); - EXPECT(f->blocks[jump_ft].ninsts == 0, - "removed fallthrough jump block should be pruned"); - EXPECT(f->emit_order_n >= 2 && f->emit_order[1] == taken, - "taken block should become physical fallthrough after pruning"); - tc_fini(&tc); -} - -static void opt_jump_cleanup_keeps_conditional_fallthrough_block(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 jump_ft = ir_block_new(f); - u32 other = ir_block_new(f); - u32 taken = ir_block_new(f); - u32 join = ir_block_new(f); - ir_note_emit(f, jump_ft); - ir_note_emit(f, other); - ir_note_emit(f, taken); - ir_note_emit(f, join); - - Val v = add_val(f, tc.i32); - Val w = add_val(f, tc.i32); - emit_test_branch(f, b0, taken, jump_ft, tc.i32); - emit_br_to(f, jump_ft, join); - emit_load_imm(f, taken, v, tc.i32, 7); - emit_br_to(f, taken, other); - emit_load_imm(f, other, w, tc.i32, 1); - emit_br_to(f, other, join); - emit_ret_val(f, join, w, tc.i32); - - opt_build_cfg(f); - opt_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG); - opt_build_cfg(f); - - EXPECT(f->blocks[b0].succ[1] == jump_ft, - "conditional fallthrough edge should not be forwarded without " - "layout-safe inversion"); - EXPECT(f->blocks[jump_ft].ninsts == 1, - "fallthrough jump block should remain reachable"); - EXPECT(f->emit_order_n >= 2 && f->emit_order[1] == jump_ft, - "fallthrough jump block should remain physical next block"); - tc_fini(&tc); -} - -static void opt_jump_cleanup_layout_deletes_fallthrough_branch(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 b1 = ir_block_new(f); - ir_note_emit(f, b1); - - Val v = add_val(f, tc.i32); - emit_br_to(f, b0, b1); - emit_load_imm(f, b1, v, tc.i32, 7); - emit_ret_val(f, b1, v, tc.i32); - - opt_build_cfg(f); - opt_jump_cleanup(f, OPT_JUMP_CLEANUP_LAYOUT); - - EXPECT(count_op(f, IR_BR) == 0, - "layout cleanup should delete branch to physical fallthrough"); - EXPECT(f->blocks[b0].nsucc == 1 && f->blocks[b0].succ[0] == b1, - "layout cleanup should leave successor metadata for final emit"); - tc_fini(&tc); -} - -static void opt_jump_opt_forwards_switch_targets(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 case_hop = ir_block_new(f); - u32 case_ret = ir_block_new(f); - u32 other_ret = ir_block_new(f); - u32 default_hop = ir_block_new(f); - u32 default_ret = ir_block_new(f); - ir_note_emit(f, case_hop); - ir_note_emit(f, case_ret); - ir_note_emit(f, other_ret); - ir_note_emit(f, default_hop); - ir_note_emit(f, default_ret); - - Val sel = add_val(f, tc.i32); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - emit_scalar_input(f, b0, sel, tc.i32); - emit_switch2(f, b0, sel, 1, case_hop, 2, other_ret, default_hop, tc.i32); - emit_br_to(f, case_hop, case_ret); - emit_load_imm(f, case_ret, a, tc.i32, 11); - emit_ret_val(f, case_ret, a, tc.i32); - emit_load_imm(f, other_ret, b, tc.i32, 22); - emit_ret_val(f, other_ret, b, tc.i32); - emit_br_to(f, default_hop, default_ret); - emit_load_imm(f, default_ret, c, tc.i32, 33); - emit_ret_val(f, default_ret, c, tc.i32); - - opt_jump_opt(f); - opt_verify(f, "test-jump-opt-switch"); - - Inst* sw = &f->blocks[b0].insts[f->blocks[b0].ninsts - 1u]; - IRSwitchAux* aux = (IRSwitchAux*)sw->extra.aux; - EXPECT(aux->cases[0].block == case_ret, - "jump opt should forward switch case trampoline"); - EXPECT(aux->default_block == default_ret, - "jump opt should forward switch default trampoline"); - EXPECT(f->blocks[case_hop].ninsts == 0 && f->blocks[default_hop].ninsts == 0, - "forwarded switch trampolines should be pruned"); - tc_fini(&tc); -} - -static void opt_jump_opt_forwards_empty_fallthrough_chain(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 empty0 = ir_block_new(f); - u32 empty1 = ir_block_new(f); - u32 fallthrough = ir_block_new(f); - u32 taken = ir_block_new(f); - ir_note_emit(f, empty0); - ir_note_emit(f, empty1); - ir_note_emit(f, fallthrough); - ir_note_emit(f, taken); - - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - emit_test_branch(f, b0, taken, empty0, tc.i32); - emit_load_imm(f, fallthrough, a, tc.i32, 1); - emit_ret_val(f, fallthrough, a, tc.i32); - emit_load_imm(f, taken, b, tc.i32, 2); - emit_ret_val(f, taken, b, tc.i32); - - opt_jump_opt(f); - opt_verify(f, "test-jump-opt-empty-fallthrough"); - - EXPECT(f->blocks[b0].succ[1] == fallthrough, - "jump opt should forward empty physical fallthrough chain"); - EXPECT(f->blocks[empty0].ninsts == 0 && f->blocks[empty0].nsucc == 0, - "first empty fallthrough block should be pruned"); - EXPECT(f->blocks[empty1].ninsts == 0 && f->blocks[empty1].nsucc == 0, - "second empty fallthrough block should be pruned"); - EXPECT(f->emit_order_n >= 2 && f->emit_order[1] == fallthrough, - "forwarded fallthrough should become physical next block"); - tc_fini(&tc); -} - -static void opt_jump_opt_repeatedly_forwards_branch_chain(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 hop0 = ir_block_new(f); - u32 hop1 = ir_block_new(f); - u32 ret = ir_block_new(f); - u32 other = ir_block_new(f); - ir_note_emit(f, hop0); - ir_note_emit(f, hop1); - ir_note_emit(f, ret); - ir_note_emit(f, other); - - Val cond = add_val(f, tc.i32); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - emit_scalar_input(f, b0, cond, tc.i32); - emit_cond_branch(f, b0, cond, hop0, other, tc.i32); - emit_br_to(f, hop0, hop1); - emit_br_to(f, hop1, ret); - emit_load_imm(f, ret, a, tc.i32, 7); - emit_ret_val(f, ret, a, tc.i32); - emit_load_imm(f, other, b, tc.i32, 9); - emit_ret_val(f, other, b, tc.i32); - - opt_jump_opt(f); - opt_verify(f, "test-jump-opt-repeated-branch-forward"); - - EXPECT(f->blocks[b0].succ[0] == ret, - "jump opt should forward repeated branch-only chain"); - EXPECT(f->blocks[hop0].ninsts == 0 && f->blocks[hop1].ninsts == 0, - "repeated branch trampolines should be pruned"); - tc_fini(&tc); -} - -static void opt_jump_opt_collapses_same_target_cond_branch(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 b0 = f->entry; - u32 ret = ir_block_new(f); - ir_note_emit(f, ret); - - Val cond = add_val(f, tc.i32); - Val v = add_val(f, tc.i32); - emit_scalar_input(f, b0, cond, tc.i32); - emit_cond_branch(f, b0, cond, ret, ret, tc.i32); - emit_load_imm(f, ret, v, tc.i32, 7); - emit_ret_val(f, ret, v, tc.i32); - - opt_jump_opt(f); - opt_verify(f, "test-jump-opt-same-target"); - - Inst* br = &f->blocks[b0].insts[f->blocks[b0].ninsts - 1u]; - EXPECT((IROp)br->op == IR_BR && f->blocks[b0].nsucc == 1 && - f->blocks[b0].succ[0] == ret, - "jump opt should collapse same-target conditional branch"); - tc_fini(&tc); -} - -static void opt_loop_tree_excludes_side_exit(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 exit = ir_block_new(f); /* Numerically between header and latch. */ - u32 body = ir_block_new(f); - u32 latch = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, exit); - ir_note_emit(f, body); - ir_note_emit(f, latch); - - emit_br_to(f, entry, header); - emit_test_branch(f, header, body, exit, tc.i32); - emit_br_to(f, body, latch); - emit_br_to(f, latch, header); - ir_emit(f, exit, IR_RET); - - opt_build_cfg(f); - opt_build_loop_tree(f); - - EXPECT(f->blocks[entry].loop_depth == 0, - "entry should not be in loop, got depth %u", - (unsigned)f->blocks[entry].loop_depth); - EXPECT(f->blocks[header].loop_depth == 1, - "header should have loop depth 1, got %u", - (unsigned)f->blocks[header].loop_depth); - EXPECT(f->blocks[body].loop_depth == 1, - "body should have loop depth 1, got %u", - (unsigned)f->blocks[body].loop_depth); - EXPECT(f->blocks[latch].loop_depth == 1, - "latch should have loop depth 1, got %u", - (unsigned)f->blocks[latch].loop_depth); - EXPECT(f->blocks[exit].loop_depth == 0, - "side exit should not be in loop, got depth %u", - (unsigned)f->blocks[exit].loop_depth); - EXPECT(f->blocks[exit].frequency == 1, - "side exit should keep frequency 1, got %u", - (unsigned)f->blocks[exit].frequency); - EXPECT(f->blocks[header].frequency > f->blocks[exit].frequency, - "loop header should be hotter than side exit"); - tc_fini(&tc); -} - -static void opt_loop_tree_nested_depths(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 outer_header = ir_block_new(f); - u32 inner_header = ir_block_new(f); - u32 inner_body = ir_block_new(f); - u32 outer_latch = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, outer_header); - ir_note_emit(f, inner_header); - ir_note_emit(f, inner_body); - ir_note_emit(f, outer_latch); - ir_note_emit(f, exit); - - emit_br_to(f, entry, outer_header); - emit_test_branch(f, outer_header, inner_header, exit, tc.i32); - emit_test_branch(f, inner_header, inner_body, outer_latch, tc.i32); - emit_br_to(f, inner_body, inner_header); - emit_br_to(f, outer_latch, outer_header); - ir_emit(f, exit, IR_RET); - - opt_build_cfg(f); - opt_build_loop_tree(f); - - EXPECT(f->blocks[entry].loop_depth == 0, - "entry should not be in nested loops, got depth %u", - (unsigned)f->blocks[entry].loop_depth); - EXPECT(f->blocks[outer_header].loop_depth == 1, - "outer header should have depth 1, got %u", - (unsigned)f->blocks[outer_header].loop_depth); - EXPECT(f->blocks[outer_latch].loop_depth == 1, - "outer latch should have depth 1, got %u", - (unsigned)f->blocks[outer_latch].loop_depth); - EXPECT(f->blocks[inner_header].loop_depth == 2, - "inner header should have depth 2, got %u", - (unsigned)f->blocks[inner_header].loop_depth); - EXPECT(f->blocks[inner_body].loop_depth == 2, - "inner body should have depth 2, got %u", - (unsigned)f->blocks[inner_body].loop_depth); - EXPECT(f->blocks[exit].loop_depth == 0, - "exit should not be in nested loops, got depth %u", - (unsigned)f->blocks[exit].loop_depth); - EXPECT(f->blocks[inner_header].frequency > f->blocks[outer_header].frequency, - "inner loop should be hotter than outer-only blocks"); - tc_fini(&tc); -} - -static void opt_loop_tree_does_not_mutate_cfg(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - - emit_br_to(f, entry, header); - emit_test_branch(f, header, body, exit, tc.i32); - emit_br_to(f, body, header); - ir_emit(f, exit, IR_RET); - - opt_build_cfg(f); - - u32 nblocks = f->nblocks; - u8 nsucc[8]; - u32 succ0[8]; - u32 succ1[8]; - u32 npreds[8]; - EXPECT(nblocks <= 8, "test CFG grew past fixed snapshot size"); - for (u32 b = 0; b < nblocks && b < 8; ++b) { - nsucc[b] = f->blocks[b].nsucc; - succ0[b] = f->blocks[b].succ[0]; - succ1[b] = f->blocks[b].succ[1]; - npreds[b] = f->blocks[b].npreds; - } - - opt_build_loop_tree(f); - - EXPECT(f->nblocks == nblocks, "loop tree must not change block count"); - for (u32 b = 0; b < nblocks && b < 8; ++b) { - EXPECT(f->blocks[b].nsucc == nsucc[b], - "loop tree changed b%u nsucc from %u to %u", (unsigned)b, - (unsigned)nsucc[b], (unsigned)f->blocks[b].nsucc); - EXPECT(f->blocks[b].succ[0] == succ0[b], - "loop tree changed b%u succ0 from %u to %u", (unsigned)b, - (unsigned)succ0[b], (unsigned)f->blocks[b].succ[0]); - EXPECT(f->blocks[b].succ[1] == succ1[b], - "loop tree changed b%u succ1 from %u to %u", (unsigned)b, - (unsigned)succ1[b], (unsigned)f->blocks[b].succ[1]); - EXPECT(f->blocks[b].npreds == npreds[b], - "loop tree changed b%u npreds from %u to %u", (unsigned)b, - (unsigned)npreds[b], (unsigned)f->blocks[b].npreds); - } - tc_fini(&tc); -} - -static void opt_licm_hoists_safe_invariant_to_preheader(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - - Val cond = add_val(f, tc.i32); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val inv = add_val(f, tc.i32); - Val loaded = add_val(f, tc.i32); - Val use = add_val(f, tc.i32); - emit_scalar_input(f, entry, cond, tc.i32); - emit_scalar_input(f, entry, a, tc.i32); - emit_scalar_input(f, entry, b, tc.i32); - emit_br_to(f, entry, header); - emit_cond_branch(f, header, cond, body, exit, tc.i32); - Inst* mul = emit_binop(f, body, inv, a, b, tc.i32); - mul->extra.imm = BO_IMUL; - emit_load_local(f, body, loaded, fs, tc.i32, 0); - emit_binop(f, body, use, inv, loaded, tc.i32); - emit_br_to(f, body, header); - emit_ret_val(f, exit, a, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_licm(f); - opt_verify(f, "test-licm-hoist"); - - EXPECT(f->val_def_block[inv] == entry, - "LICM should hoist invariant multiply to preheader"); - EXPECT(f->val_def_inst[inv] + 1u < f->blocks[entry].ninsts, - "hoisted multiply should be inserted before preheader branch"); - EXPECT(f->val_def_block[use] == body, - "LICM should leave loop-variant dependent use in loop body"); - tc_fini(&tc); -} - -static void opt_licm_preserves_trapping_and_memory_ops(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - - Val cond = add_val(f, tc.i32); - Val num = add_val(f, tc.i32); - Val den = add_val(f, tc.i32); - Val loaded = add_val(f, tc.i32); - Val div = add_val(f, tc.i32); - emit_scalar_input(f, entry, cond, tc.i32); - emit_scalar_input(f, entry, num, tc.i32); - emit_scalar_input(f, entry, den, tc.i32); - emit_br_to(f, entry, header); - emit_cond_branch(f, header, cond, body, exit, tc.i32); - emit_load_local(f, body, loaded, fs, tc.i32, 0); - Inst* div_inst = emit_binop(f, body, div, num, den, tc.i32); - div_inst->extra.imm = BO_SDIV; - emit_br_to(f, body, header); - emit_ret_val(f, exit, num, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_licm(f); - opt_verify(f, "test-licm-preserve-unsafe"); - - EXPECT(f->val_def_block[loaded] == body, "LICM must not hoist memory loads"); - EXPECT(f->val_def_block[div] == body, - "LICM must not hoist potentially trapping division"); - tc_fini(&tc); -} - -static void opt_pressure_relief_sinks_single_use_load_imm(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val x = add_val(f, tc.i32); - Val y = add_val(f, tc.i32); - Val imm = add_val(f, tc.i32); - Val last = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - - emit_scalar_input(f, f->entry, x, tc.i32); - emit_scalar_input(f, f->entry, y, tc.i32); - emit_load_imm(f, f->entry, imm, tc.i32, 42); - for (u32 i = 0; i < 12; ++i) { - last = add_val(f, tc.i32); - emit_binop(f, f->entry, last, x, y, tc.i32); - } - emit_binop(f, f->entry, out, imm, last, tc.i32); - emit_ret_val(f, f->entry, out, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_pressure_relief(f); - opt_verify(f, "test-pressure-sink"); - - EXPECT(f->val_def_block[imm] == f->entry, - "pressure relief should keep same-block load_imm in entry"); - EXPECT(f->val_def_inst[imm] + 1u == f->val_def_inst[out], - "pressure relief should sink load_imm immediately before its use"); - tc_fini(&tc); -} - -static void opt_pressure_relief_does_not_sink_into_loop(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - u32 entry = f->entry; - u32 header = ir_block_new(f); - u32 body = ir_block_new(f); - u32 exit = ir_block_new(f); - ir_note_emit(f, header); - ir_note_emit(f, body); - ir_note_emit(f, exit); - - Val cond = add_val(f, tc.i32); - Val imm = add_val(f, tc.i32); - Val x = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_scalar_input(f, entry, cond, tc.i32); - emit_load_imm(f, entry, imm, tc.i32, 7); - emit_scalar_input(f, entry, x, tc.i32); - emit_br_to(f, entry, header); - emit_cond_branch(f, header, cond, body, exit, tc.i32); - emit_binop(f, body, out, imm, x, tc.i32); - emit_br_to(f, body, header); - emit_ret_val(f, exit, x, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_pressure_relief(f); - opt_verify(f, "test-pressure-no-loop-sink"); - - EXPECT(f->val_def_block[imm] == entry, - "pressure relief must not sink an outer def into a loop block"); - tc_fini(&tc); -} - -static void opt_pressure_relief_preserves_multi_use_constant(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val imm = add_val(f, tc.i32); - Val x = add_val(f, tc.i32); - Val y = add_val(f, tc.i32); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - emit_load_imm(f, f->entry, imm, tc.i32, 9); - u32 original_inst = f->val_def_inst[imm]; - emit_scalar_input(f, f->entry, x, tc.i32); - emit_scalar_input(f, f->entry, y, tc.i32); - emit_binop(f, f->entry, a, imm, x, tc.i32); - emit_binop(f, f->entry, b, imm, y, tc.i32); - emit_ret_val(f, f->entry, b, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_pressure_relief(f); - opt_verify(f, "test-pressure-multi-use"); - - EXPECT(f->val_def_inst[imm] == original_inst, - "pressure relief must leave multi-use constants in place"); - EXPECT(count_uses_of(f, imm) == 2, - "multi-use constant should still have both uses"); - tc_fini(&tc); -} - -static void opt_pressure_relief_does_not_cross_memory_ops(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN); - Val imm = add_val(f, tc.i32); - Val loaded = add_val(f, tc.i32); - Val x = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_load_imm(f, f->entry, imm, tc.i32, 11); - u32 original_inst = f->val_def_inst[imm]; - emit_load_local(f, f->entry, loaded, fs, tc.i32, 0); - emit_scalar_input(f, f->entry, x, tc.i32); - emit_binop(f, f->entry, out, imm, x, tc.i32); - emit_ret_val(f, f->entry, out, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_pressure_relief(f); - opt_verify(f, "test-pressure-memory-barrier"); - - EXPECT(f->val_def_inst[imm] == original_inst, - "pressure relief must not sink constants across memory operations"); - EXPECT(f->val_def_inst[loaded] == original_inst + 1u, - "memory operation should stay after the original constant"); - tc_fini(&tc); -} - -static void opt_pressure_relief_sinks_many_immediates_in_one_pass(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - Val x = add_val(f, tc.i32); - Val y = add_val(f, tc.i32); - Val imm[32]; - Val out[32]; - Val final; - - emit_scalar_input(f, f->entry, x, tc.i32); - emit_scalar_input(f, f->entry, y, tc.i32); - for (u32 i = 0; i < 32; ++i) { - imm[i] = add_val(f, tc.i32); - emit_load_imm(f, f->entry, imm[i], tc.i32, (i64)i); - } - for (u32 i = 0; i < 32; ++i) { - out[i] = add_val(f, tc.i32); - emit_binop(f, f->entry, out[i], imm[i], x, tc.i32); - } - final = add_val(f, tc.i32); - emit_binop(f, f->entry, final, x, out[31], tc.i32); - emit_ret_val(f, f->entry, final, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_pressure_relief(f); - opt_verify(f, "test-pressure-many-sink"); - - for (u32 i = 0; i < 32; ++i) { - EXPECT(f->val_def_inst[imm[i]] + 1u == f->val_def_inst[out[i]], - "pressure relief should sink each immediate before its use"); - } - tc_fini(&tc); -} - -static void opt_regalloc_priority(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val pinned = add_val(f, tc.i32); - Val hot = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_load_imm(f, f->entry, pinned, tc.i32, 1); - emit_load_imm(f, f->entry, hot, tc.i32, 2); - emit_binop(f, f->entry, out, pinned, hot, tc.i32); - emit_ret_val(f, f->entry, out, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - ensure_test_preg_info(f); - f->preg_info[pinned].tied_hard_reg = (i32)f->opt_hard_regs[RC_INT][0]; - f->preg_info[hot].frequency += 1000; - opt_regalloc(f, 0); - Reg expected_hard = f->opt_hard_regs[RC_INT][0]; - EXPECT(f->preg_info[pinned].alloc_kind == OPT_ALLOC_HARD, - "tied value should get a hard register"); - EXPECT(f->preg_info[pinned].hard_reg == expected_hard, - "tied value should get hard r%u, got r%u", (unsigned)expected_hard, - (unsigned)f->preg_info[pinned].hard_reg); - EXPECT(f->preg_info[hot].alloc_kind == OPT_ALLOC_SPILL, - "overlapping untied value should spill under one-reg pressure"); - tc_fini(&tc); -} - -static void opt_o2_coalesces_nonconflicting_copy(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_copy(f, f->entry, b, a, tc.i32); - emit_ret_val(f, f->entry, b, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 1); - - EXPECT(f->opt_coalesce_candidates == 1, - "copy should be a coalesce candidate"); - EXPECT(f->opt_coalesce_merges == 1, "non-overlapping copy should coalesce"); - EXPECT(f->preg_info[a].alloc_kind == OPT_ALLOC_HARD && - f->preg_info[b].alloc_kind == OPT_ALLOC_HARD, - "coalesced values should allocate hard under one-reg pressure"); - EXPECT(f->preg_info[a].hard_reg == f->preg_info[b].hard_reg, - "coalesced values should share a hard register"); - opt_combine(f); - opt_dce(f); - EXPECT(count_op(f, IR_COPY) == 0, - "coalesced hard-reg self copy should be removed"); - tc_fini(&tc); -} - -static void opt_o1_skips_coalesce(void) { - /* O1 matches MIR's pipeline (mir-gen.c:9431): coalescing runs only at - * optimize_level >= 2. At O1 the point-bitmap allocator emits the copy - * through the normal path without merging operands. */ - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_copy(f, f->entry, b, a, tc.i32); - emit_ret_val(f, f->entry, b, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - - EXPECT(f->opt_coalesce_candidates == 0 && f->opt_coalesce_merges == 0, - "O1 regalloc should not run coalesce"); - tc_fini(&tc); -} - -static void opt_o2_refuses_overlapping_copy_coalesce(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19, 20}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 2, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_copy(f, f->entry, b, a, tc.i32); - emit_binop(f, f->entry, out, a, b, tc.i32); - emit_ret_val(f, f->entry, out, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 1); - - EXPECT(f->opt_coalesce_candidates == 1, - "overlapping copy should still be collected as a candidate"); - EXPECT(f->opt_coalesce_conflicts >= 1, - "overlapping copy values should be recorded as conflicting"); - EXPECT(f->opt_coalesce_merges == 0, "overlapping copy should not coalesce"); - tc_fini(&tc); -} - -static void opt_o2_refuses_incompatible_copy_coalesce(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19, 20}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 2, scratch, 2, 0x4007FFFFu); - - Func* diff_type = new_func(&tc); - opt_machinize(diff_type, &mock.base); - Val a = add_val(diff_type, tc.i32); - Val b = add_val(diff_type, tc.i64); - emit_load_imm(diff_type, diff_type->entry, a, tc.i32, 1); - emit_copy(diff_type, diff_type->entry, b, a, tc.i64); - emit_ret_val(diff_type, diff_type->entry, b, tc.i64); - opt_build_cfg(diff_type); - opt_build_loop_tree(diff_type); - opt_regalloc(diff_type, 1); - EXPECT(diff_type->opt_coalesce_candidates == 0, - "different value types should not become coalesce candidates"); - - Func* tied = new_func(&tc); - opt_machinize(tied, &mock.base); - a = add_val(tied, tc.i32); - b = add_val(tied, tc.i32); - emit_load_imm(tied, tied->entry, a, tc.i32, 1); - emit_copy(tied, tied->entry, b, a, tc.i32); - emit_ret_val(tied, tied->entry, b, tc.i32); - opt_build_cfg(tied); - opt_build_loop_tree(tied); - ensure_test_preg_info(tied); - tied->preg_info[a].tied_hard_reg = 19; - tied->preg_info[b].tied_hard_reg = 20; - opt_regalloc(tied, 1); - EXPECT(tied->opt_coalesce_candidates == 1, - "same-shape tied copy should be collected"); - EXPECT(tied->opt_coalesce_merges == 0, - "different fixed hard regs should block coalescing"); - tc_fini(&tc); -} - -static void opt_o2_spills_singleton_when_whole_alloc_fails(void) { - /* Live-range splitting is deferred per doc/OPT_PERF.md plan. With one hard - * reg pinned and another value live across the pinned use, the allocator - * spills the unpinned value whole instead of producing OPT_ALLOC_SPLIT. */ - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val pinned = add_val(f, tc.i32); - Val v = add_val(f, tc.i32); - Val tmp = add_val(f, tc.i32); - emit_load_imm(f, f->entry, pinned, tc.i32, 1); - emit_load_imm(f, f->entry, v, tc.i32, 2); - emit_binop(f, f->entry, tmp, pinned, v, tc.i32); - emit_load_imm(f, f->entry, v, tc.i32, 3); - emit_ret_val(f, f->entry, v, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - ensure_test_preg_info(f); - f->preg_info[pinned].tied_hard_reg = 19; - opt_regalloc(f, 1); - - EXPECT(f->preg_info[pinned].alloc_kind == OPT_ALLOC_HARD, - "pinned value should keep the only hard register"); - EXPECT(f->preg_info[v].alloc_kind == OPT_ALLOC_SPILL, - "without splitting, the conflicting value should whole-spill"); - EXPECT(f->preg_info[v].spill_slot != FRAME_SLOT_NONE, - "spilled value should have a stack slot"); - tc_fini(&tc); -} - -static void opt_o2_does_not_split_critical_edge(void) { - /* Live-range splitting (and the associated critical-edge materialization) - * is deferred. The unpinned value whole-spills; no edge blocks are added. */ - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - u32 entry = f->entry; - u32 other = ir_block_new(f); - u32 join = ir_block_new(f); - ir_note_emit(f, other); - ir_note_emit(f, join); - Val pinned = add_val(f, tc.i32); - Val v = add_val(f, tc.i32); - Val tmp = add_val(f, tc.i32); - emit_load_imm(f, entry, pinned, tc.i32, 1); - emit_load_imm(f, entry, v, tc.i32, 2); - emit_binop(f, entry, tmp, pinned, v, tc.i32); - emit_test_branch(f, entry, join, other, tc.i32); - emit_br_to(f, other, join); - emit_ret_val(f, join, v, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - ensure_test_preg_info(f); - f->preg_info[pinned].tied_hard_reg = 19; - u32 original_blocks = f->nblocks; - opt_regalloc(f, 1); - - EXPECT(f->preg_info[v].alloc_kind != OPT_ALLOC_SPLIT, - "splitting is deferred; v%u should not be OPT_ALLOC_SPLIT", - (unsigned)v); - EXPECT(f->nblocks == original_blocks, - "no edge materialization expected without splitting"); - tc_fini(&tc); -} - -static void opt_o1_does_not_split_spill_edges(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - u32 entry = f->entry; - u32 other = ir_block_new(f); - u32 join = ir_block_new(f); - ir_note_emit(f, other); - ir_note_emit(f, join); - Val pinned = add_val(f, tc.i32); - Val v = add_val(f, tc.i32); - Val tmp = add_val(f, tc.i32); - emit_load_imm(f, entry, pinned, tc.i32, 1); - emit_load_imm(f, entry, v, tc.i32, 2); - emit_binop(f, entry, tmp, pinned, v, tc.i32); - emit_test_branch(f, entry, join, other, tc.i32); - emit_br_to(f, other, join); - emit_ret_val(f, join, v, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - ensure_test_preg_info(f); - f->preg_info[pinned].tied_hard_reg = 19; - u32 original_blocks = f->nblocks; - opt_regalloc(f, 0); - - EXPECT(f->nblocks == original_blocks, - "O1 regalloc should not split CFG spill edges"); - EXPECT(f->blocks[entry].succ[0] == join, - "O1 should leave the critical edge unsplit"); - tc_fini(&tc); -} - -static void opt_range_regalloc_no_conflicts_and_stack_reuse(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_load_imm(f, f->entry, b, tc.i32, 2); - emit_ret_val(f, f->entry, b, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - - opt_regalloc(f, 0); - - EXPECT(f->opt_used_loc_words != 0, - "range allocator should record used-location storage"); - EXPECT(f->opt_alloc_hard_loc_words != 0, - "range allocator should record hard occupancy words"); - EXPECT(f->opt_alloc_stack_slots != 0, - "range allocator should record allocated stack slots"); - EXPECT(f->opt_alloc_stack_loc_words <= f->opt_used_loc_words, - "stack occupancy should be reported separately"); - EXPECT(f->preg_info[a].alloc_kind == OPT_ALLOC_SPILL, - "no hard regs should force v%u to stack", (unsigned)a); - EXPECT(f->preg_info[b].alloc_kind == OPT_ALLOC_SPILL, - "no hard regs should force v%u to stack", (unsigned)b); - EXPECT(f->preg_info[a].spill_slot == f->preg_info[b].spill_slot, - "disjoint stack ranges should reuse a spill slot"); - tc_fini(&tc); -} - -static void opt_stack_spill_assignment_avoids_quadratic_probe(void) { - enum { NVALS = 64 }; - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val vals[NVALS]; - for (u32 i = 0; i < NVALS; ++i) { - vals[i] = add_val(f, tc.i32); - emit_load_imm(f, f->entry, vals[i], tc.i32, (i64)i + 1); - } - Val acc = vals[0]; - for (u32 i = 1; i < NVALS; ++i) { - Val next = add_val(f, tc.i32); - emit_binop(f, f->entry, next, acc, vals[i], tc.i32); - acc = next; - } - emit_ret_val(f, f->entry, acc, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - - EXPECT(f->opt_alloc_stack_slots >= NVALS - 1u, - "overlapping pressure should allocate many stack slots, got %u", - (unsigned)f->opt_alloc_stack_slots); - EXPECT(f->opt_alloc_stack_point_visits <= f->opt_alloc_stack_mark_points, - "stack assignment should not probe existing slots quadratically: " - "visits=%llu marks=%llu", - (unsigned long long)f->opt_alloc_stack_point_visits, - (unsigned long long)f->opt_alloc_stack_mark_points); - tc_fini(&tc); -} - -static void opt_rewrite_spill_use_def(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_load_imm(f, f->entry, b, tc.i32, 2); - emit_binop(f, f->entry, c, a, b, tc.i32); - emit_ret_val(f, f->entry, c, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - EXPECT(f->opt_rewritten, "regalloc should rewrite pseudos"); - EXPECT(count_op(f, IR_STORE) >= 1, "spill def should insert store"); - EXPECT(count_op(f, IR_LOAD) >= 1, "spill use should insert reload"); - int saw_spill_slot = 0; - for (u32 i = 0; i < f->nframe_slots; ++i) - if (f->frame_slots[i].kind == FS_SPILL) saw_spill_slot = 1; - EXPECT(saw_spill_slot, "rewrite should allocate FS_SPILL slot"); - - CfreeWriter* w = NULL; - (void)cfree_writer_mem(&g_heap, &w); - opt_rewrite_dump(f, w); - size_t len = 0; - const unsigned char* bytes = cfree_writer_mem_bytes(w, &len); - EXPECT(bytes_contains(bytes, len, "rewrite blocks="), - "rewrite dump should include summary"); - EXPECT(bytes_contains(bytes, len, "op="), - "rewrite dump should include rewritten instructions"); - cfree_writer_close(w); - tc_fini(&tc); -} - -static void opt_call_clobber_preservation(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; /* callee-saved */ - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val live = add_val(f, tc.i32); - emit_load_imm(f, f->entry, live, tc.i32, 11); - emit_call_void(f, f->entry); - emit_ret_val(f, f->entry, live, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - - Block* b = &f->blocks[f->entry]; - int saw_call_save_restore = 0; - for (u32 i = 1; i + 1 < b->ninsts; ++i) { - if ((IROp)b->insts[i].op == IR_CALL && - (IROp)b->insts[i - 1u].op == IR_STORE && - (IROp)b->insts[i + 1u].op == IR_LOAD) { - saw_call_save_restore = 1; - } - } - EXPECT(!saw_call_save_restore, - "live callee-saved hard reg across call should NOT be stored/loaded"); - tc_fini(&tc); -} - -static void opt_call_clobber_caller_saved(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {13}; /* caller-saved */ - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val live = add_val(f, tc.i32); - emit_load_imm(f, f->entry, live, tc.i32, 11); - emit_call_void(f, f->entry); - emit_ret_val(f, f->entry, live, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - - Block* b = &f->blocks[f->entry]; - int saw_call_save_restore = 0; - for (u32 i = 1; i + 1 < b->ninsts; ++i) { - if ((IROp)b->insts[i].op == IR_CALL && - (IROp)b->insts[i - 1u].op == IR_STORE && - (IROp)b->insts[i + 1u].op == IR_LOAD) { - saw_call_save_restore = 1; - } - } - EXPECT(saw_call_save_restore, - "live caller-saved hard reg across call should be stored before and " - "loaded after"); - tc_fini(&tc); -} - -static void opt_spill_pressure(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - Val d = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_load_imm(f, f->entry, b, tc.i32, 2); - emit_load_imm(f, f->entry, c, tc.i32, 3); - emit_binop(f, f->entry, d, a, b, tc.i32); - emit_binop(f, f->entry, d, d, c, tc.i32); - emit_ret_val(f, f->entry, d, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - - int spills = 0; - for (Val v = 1; v < f->nvals; ++v) - if (f->preg_info[v].alloc_kind == OPT_ALLOC_SPILL) ++spills; - EXPECT(spills >= 2, "one hard reg should force multiple spills, got %d", - spills); - EXPECT(count_op(f, IR_LOAD) >= 2, "spilled uses should reload"); - EXPECT(count_op(f, IR_STORE) >= 2, "spilled defs should store"); - tc_fini(&tc); -} - -static void opt_inline_asm_tied_fixed_regs(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val tied = add_val(f, tc.i32); - emit_load_imm(f, f->entry, tied, tc.i32, 5); - - Inst* in = ir_emit(f, f->entry, IR_ASM_BLOCK); - IRAsmAux* aux = arena_znew(f->arena, IRAsmAux); - aux->tmpl = "add %0, %0, #1"; - aux->nout = 1; - aux->nin = 1; - aux->outs = arena_array(f->arena, AsmConstraint, 1); - aux->ins = arena_array(f->arena, AsmConstraint, 1); - aux->out_ops = arena_array(f->arena, Operand, 1); - aux->in_ops = arena_array(f->arena, Operand, 1); - memset(aux->outs, 0, sizeof(AsmConstraint)); - memset(aux->ins, 0, sizeof(AsmConstraint)); - aux->outs[0].str = "+r"; - aux->outs[0].dir = ASM_INOUT; - aux->outs[0].type = tc.i32; - aux->ins[0].str = "0"; - aux->ins[0].dir = ASM_IN; - aux->ins[0].type = tc.i32; - aux->out_ops[0] = op_reg_(tied, tc.i32); - aux->in_ops[0] = op_reg_(tied, tc.i32); - in->extra.aux = aux; - emit_ret_val(f, f->entry, tied, tc.i32); - - opt_build_cfg(f); - opt_build_loop_tree(f); - ensure_test_preg_info(f); - f->preg_info[tied].tied_hard_reg = (i32)f->opt_hard_regs[RC_INT][0]; - opt_regalloc(f, 0); - aux = (IRAsmAux*)in->extra.aux; - Reg expected = f->opt_hard_regs[RC_INT][0]; - EXPECT(f->preg_info[tied].alloc_kind == OPT_ALLOC_HARD, - "tied asm val should allocate hard"); - EXPECT(f->preg_info[tied].hard_reg == expected, "tied asm val should get r%u", - (unsigned)expected); - EXPECT(aux->out_ops[0].v.reg == expected && aux->in_ops[0].v.reg == expected, - "asm tied operands should rewrite to the fixed hard reg"); - tc_fini(&tc); -} - -static void opt_inline_asm_constraints_and_clobbers(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19, 20}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 2, scratch, 2, 0); - - Func* f = new_func(&tc); - Val fixed = add_val(f, tc.i32); - Val live = add_val(f, tc.i32); - Val out = add_val(f, tc.i32); - emit_load_imm(f, f->entry, fixed, tc.i32, 5); - emit_load_imm(f, f->entry, live, tc.i32, 7); - - Inst* in = ir_emit(f, f->entry, IR_ASM_BLOCK); - IRAsmAux* aux = arena_znew(f->arena, IRAsmAux); - aux->tmpl = "add %0, %1, #1"; - aux->nout = 1; - aux->nin = 1; - aux->nclob = 1; - aux->outs = arena_array(f->arena, AsmConstraint, 1); - aux->ins = arena_array(f->arena, AsmConstraint, 1); - aux->out_ops = arena_array(f->arena, Operand, 1); - aux->in_ops = arena_array(f->arena, Operand, 1); - aux->clobbers = arena_array(f->arena, Sym, 1); - memset(aux->outs, 0, sizeof(AsmConstraint)); - memset(aux->ins, 0, sizeof(AsmConstraint)); - aux->outs[0].str = "=r"; - aux->outs[0].dir = ASM_OUT; - aux->outs[0].type = tc.i32; - aux->ins[0].str = "{x19}"; - aux->ins[0].dir = ASM_IN; - aux->ins[0].type = tc.i32; - aux->out_ops[0] = op_reg_(out, tc.i32); - aux->in_ops[0] = op_reg_(fixed, tc.i32); - aux->clobbers[0] = pool_intern_slice(tc.c->global, SLICE_LIT("x20")); - in->extra.aux = aux; - in->ndefs = 1; - in->defs = arena_array(f->arena, Val, 1); - in->defs[0] = out; - in->def = out; - f->val_def_block[out] = f->entry; - f->val_def_inst[out] = f->blocks[f->entry].ninsts - 1u; - - emit_binop(f, f->entry, out, out, live, tc.i32); - emit_ret_val(f, f->entry, out, tc.i32); - - opt_machinize(f, &mock.base); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - aux = (IRAsmAux*)in->extra.aux; - - EXPECT(f->preg_info[fixed].alloc_kind == OPT_ALLOC_HARD && - f->preg_info[fixed].hard_reg == 19, - "fixed asm input should allocate x19"); - EXPECT(f->preg_info[live].alloc_kind != OPT_ALLOC_HARD || - f->preg_info[live].hard_reg != 20, - "value live across asm clobber should not allocate clobbered x20"); - EXPECT(aux->in_ops[0].kind == OPK_REG && aux->in_ops[0].v.reg == 19, - "fixed asm input should rewrite to x19"); - tc_fini(&tc); -} - -static void opt_post_rewrite_dce(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_copy(f, f->entry, a, a, tc.i32); - ir_emit(f, f->entry, IR_NOP); - emit_ret_val(f, f->entry, a, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_regalloc(f, 0); - opt_combine(f); - opt_dce(f); - EXPECT(count_op(f, IR_COPY) == 0, "noop physical copy should be removed"); - EXPECT(count_op(f, IR_NOP) == 0, "post-rewrite DCE should remove nops"); - tc_fini(&tc); -} - -static void opt_dce_call_clobbers_hard_regs(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {10}; - static const Reg scratch[] = {9}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 1, 1u << 10); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - f->opt_rewritten = 1; - Inst* li = ir_emit(f, f->entry, IR_LOAD_IMM); - li->opnds = arena_array(f->arena, Operand, 1); - li->opnds[0] = op_reg_(10, tc.i32); - li->nopnds = 1; - li->extra.imm = 7; - emit_call_void(f, f->entry); - Inst* add = ir_emit(f, f->entry, IR_BINOP); - add->opnds = arena_array(f->arena, Operand, 3); - add->opnds[0] = op_reg_(19, tc.i32); - add->opnds[1] = op_reg_(10, tc.i32); - add->opnds[2] = op_imm_(1, tc.i32); - add->nopnds = 3; - add->extra.imm = BO_IADD; - emit_ret_val(f, f->entry, 19, tc.i32); - - opt_dce(f); - EXPECT(count_op(f, IR_LOAD_IMM) == 0, - "pre-call caller-saved def should not satisfy a post-call use"); - - Func* g = new_func(&tc); - opt_machinize(g, &mock.base); - g->opt_rewritten = 1; - li = ir_emit(g, g->entry, IR_LOAD_IMM); - li->opnds = arena_array(g->arena, Operand, 1); - li->opnds[0] = op_reg_(10, tc.i32); - li->nopnds = 1; - li->extra.imm = 9; - Inst* call = emit_call_void(g, g->entry); - IRCallAux* caux = (IRCallAux*)call->extra.aux; - CGABIValue* args = arena_zarray(g->arena, CGABIValue, 1); - args[0].storage = op_reg_(10, tc.i32); - caux->desc.nargs = 1; - caux->desc.args = args; - emit_ret_val(g, g->entry, 10, tc.i32); - - opt_dce(g); - EXPECT(count_op(g, IR_LOAD_IMM) == 1, - "hard-reg direct call argument should keep its reaching def"); - - Func* h = new_func(&tc); - opt_machinize(h, &mock.base); - h->opt_rewritten = 1; - li = ir_emit(h, h->entry, IR_LOAD_IMM); - li->opnds = arena_array(h->arena, Operand, 1); - li->opnds[0] = op_reg_(10, tc.i32); - li->nopnds = 1; - li->extra.imm = 11; - call = emit_call_void(h, h->entry); - caux = (IRCallAux*)call->extra.aux; - args = arena_zarray(h->arena, CGABIValue, 1); - CGABIPart* parts = arena_zarray(h->arena, CGABIPart, 1); - parts[0].op = op_reg_(10, tc.i32); - args[0].nparts = 1; - args[0].parts = parts; - caux->desc.nargs = 1; - caux->desc.args = args; - ir_emit(h, h->entry, IR_RET); - - opt_dce(h); - EXPECT(count_op(h, IR_LOAD_IMM) == 1, - "hard-reg ABI argument part should keep its reaching def"); - - Func* callee_f = new_func(&tc); - opt_machinize(callee_f, &mock.base); - callee_f->opt_rewritten = 1; - li = ir_emit(callee_f, callee_f->entry, IR_LOAD_IMM); - li->opnds = arena_array(callee_f->arena, Operand, 1); - li->opnds[0] = op_reg_(10, tc.i32); - li->nopnds = 1; - li->extra.imm = 13; - call = emit_call_void(callee_f, callee_f->entry); - caux = (IRCallAux*)call->extra.aux; - caux->desc.callee = op_indirect_(10, tc.i32); - ir_emit(callee_f, callee_f->entry, IR_RET); - - opt_dce(callee_f); - EXPECT(count_op(callee_f, IR_LOAD_IMM) == 1, - "indirect call callee base should keep its reaching def"); - - MockCGTarget ret_mock; - mock_init(&ret_mock, tc.c); - mock_set_pool(&ret_mock, RC_INT, pool, 1, scratch, 1, 0); - Func* ret_f = new_func(&tc); - opt_machinize(ret_f, &ret_mock.base); - ret_f->opt_rewritten = 1; - li = ir_emit(ret_f, ret_f->entry, IR_LOAD_IMM); - li->opnds = arena_array(ret_f->arena, Operand, 1); - li->opnds[0] = op_reg_(10, tc.i32); - li->nopnds = 1; - li->extra.imm = 15; - call = emit_call_void(ret_f, ret_f->entry); - caux = (IRCallAux*)call->extra.aux; - caux->desc.ret.storage = op_reg_(10, tc.i32); - emit_ret_val(ret_f, ret_f->entry, 10, tc.i32); - - opt_dce(ret_f); - EXPECT(count_op(ret_f, IR_LOAD_IMM) == 0 && count_op(ret_f, IR_CALL) == 1, - "call return hard-reg def should feed ret and kill pre-call def"); - tc_fini(&tc); -} - -typedef struct NoScratchCtx { - Func* f; -} NoScratchCtx; - -static void run_no_scratch_regalloc(void* arg) { - NoScratchCtx* ctx = (NoScratchCtx*)arg; - opt_regalloc(ctx->f, 0); -} - -static void opt_regalloc_spill_requires_scratch(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - mock_set_pool(&mock, RC_INT, pool, 1, NULL, 0, 0); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - Val c = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_load_imm(f, f->entry, b, tc.i32, 2); - emit_binop(f, f->entry, c, a, b, tc.i32); - emit_ret_val(f, f->entry, c, tc.i32); - opt_build_cfg(f); - opt_build_loop_tree(f); - - NoScratchCtx nctx = {f}; - EXPECT(expect_panic(tc.c, run_no_scratch_regalloc, &nctx), - "spilling without a scratch register should panic"); - tc_fini(&tc); -} - -static void opt_combine_spill_peeps(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - f->opt_rewritten = 1; - FrameSlot fs = add_frame_slot(f, tc.i32, FS_SPILL, 4, 0); - - Inst* st = ir_emit(f, f->entry, IR_STORE); - st->opnds = arena_array(f->arena, Operand, 2); - st->opnds[0] = op_local_(fs, tc.i32); - st->opnds[1] = op_reg_(9, tc.i32); - st->nopnds = 2; - st->extra.mem = mem_local_(fs, tc.i32, 4, 0); - - Inst* ld = ir_emit(f, f->entry, IR_LOAD); - ld->opnds = arena_array(f->arena, Operand, 2); - ld->opnds[0] = op_reg_(9, tc.i32); - ld->opnds[1] = op_local_(fs, tc.i32); - ld->nopnds = 2; - ld->extra.mem = mem_local_(fs, tc.i32, 4, 0); - - opt_combine(f); - EXPECT(f->blocks[f->entry].ninsts == 1, - "store followed by same-reg spill reload should combine to one inst"); - EXPECT((IROp)f->blocks[f->entry].insts[0].op == IR_STORE, - "remaining inst should be the spill store"); - - Func* g = new_func(&tc); - g->opt_rewritten = 1; - fs = add_frame_slot(g, tc.i32, FS_SPILL, 4, 0); - - ld = ir_emit(g, g->entry, IR_LOAD); - ld->opnds = arena_array(g->arena, Operand, 2); - ld->opnds[0] = op_reg_(9, tc.i32); - ld->opnds[1] = op_local_(fs, tc.i32); - ld->nopnds = 2; - ld->extra.mem = mem_local_(fs, tc.i32, 4, 0); - - st = ir_emit(g, g->entry, IR_STORE); - st->opnds = arena_array(g->arena, Operand, 2); - st->opnds[0] = op_local_(fs, tc.i32); - st->opnds[1] = op_reg_(9, tc.i32); - st->nopnds = 2; - st->extra.mem = mem_local_(fs, tc.i32, 4, 0); - - opt_combine(g); - EXPECT( - g->blocks[g->entry].ninsts == 1, - "spill reload followed by same-reg writeback should combine to one inst"); - EXPECT((IROp)g->blocks[g->entry].insts[0].op == IR_LOAD, - "remaining inst should be the spill reload"); - tc_fini(&tc); -} - -static void opt_combine_single_use_copy_and_imm(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - f->opt_rewritten = 1; - - Inst* li = ir_emit(f, f->entry, IR_LOAD_IMM); - li->opnds = arena_array(f->arena, Operand, 1); - li->opnds[0] = op_reg_(9, tc.i32); - li->nopnds = 1; - li->extra.imm = 7; - - Inst* cp = ir_emit(f, f->entry, IR_COPY); - cp->opnds = arena_array(f->arena, Operand, 2); - cp->opnds[0] = op_reg_(10, tc.i32); - cp->opnds[1] = op_reg_(11, tc.i32); - cp->nopnds = 2; - - Inst* add = ir_emit(f, f->entry, IR_BINOP); - add->opnds = arena_array(f->arena, Operand, 3); - add->opnds[0] = op_reg_(19, tc.i32); - add->opnds[1] = op_reg_(9, tc.i32); - add->opnds[2] = op_reg_(10, tc.i32); - add->nopnds = 3; - add->extra.imm = BO_IADD; - emit_ret_val(f, f->entry, 19, tc.i32); - - opt_combine(f); - add = &f->blocks[f->entry].insts[2]; - EXPECT(add->opnds[1].kind == OPK_IMM && add->opnds[1].v.imm == 7, - "one-use load_imm should fold into binop immediate operand"); - EXPECT(add->opnds[2].kind == OPK_REG && add->opnds[2].v.reg == 11, - "one-use physical copy should fold into binop register operand"); - - opt_dce(f); - EXPECT(count_op(f, IR_LOAD_IMM) == 0, - "folded load_imm should be removed by post-rewrite DCE"); - EXPECT(count_op(f, IR_COPY) == 0, - "folded physical copy should be removed by post-rewrite DCE"); - - Func* g = new_func(&tc); - g->opt_rewritten = 1; - li = ir_emit(g, g->entry, IR_LOAD_IMM); - li->opnds = arena_array(g->arena, Operand, 1); - li->opnds[0] = op_reg_(9, tc.i32); - li->nopnds = 1; - li->extra.imm = 0; - - Inst* cmp = ir_emit(g, g->entry, IR_CMP); - cmp->opnds = arena_array(g->arena, Operand, 3); - cmp->opnds[0] = op_reg_(19, tc.i32); - cmp->opnds[1] = op_reg_(10, tc.i32); - cmp->opnds[2] = op_reg_(9, tc.i32); - cmp->nopnds = 3; - cmp->extra.imm = CMP_EQ; - emit_ret_val(g, g->entry, 19, tc.i32); - - opt_combine(g); - cmp = &g->blocks[g->entry].insts[1]; - EXPECT(cmp->opnds[2].kind == OPK_IMM && cmp->opnds[2].v.imm == 0, - "one-use load_imm should fold into cmp immediate operand"); - - Func* h = new_func(&tc); - h->opt_rewritten = 1; - u32 taken = ir_block_new(h); - u32 fallthrough = ir_block_new(h); - li = ir_emit(h, h->entry, IR_LOAD_IMM); - li->opnds = arena_array(h->arena, Operand, 1); - li->opnds[0] = op_reg_(9, tc.i32); - li->nopnds = 1; - li->extra.imm = 1; - - Inst* br = ir_emit(h, h->entry, IR_CMP_BRANCH); - br->opnds = arena_array(h->arena, Operand, 2); - br->opnds[0] = op_reg_(9, tc.i32); - br->opnds[1] = op_reg_(10, tc.i32); - br->nopnds = 2; - br->extra.imm = CMP_NE; - h->blocks[h->entry].succ[0] = taken; - h->blocks[h->entry].succ[1] = fallthrough; - h->blocks[h->entry].nsucc = 2; - - opt_combine(h); - br = &h->blocks[h->entry].insts[1]; - EXPECT(br->opnds[0].kind == OPK_IMM && br->opnds[0].v.imm == 1, - "one-use load_imm should fold into cmp_branch immediate operand"); - tc_fini(&tc); -} - -static void opt_combine_sinks_or_preserves_producer_copy_after_rewrite(void) { - TestCtx tc; - tc_init(&tc); - - /* Base case: producer dies after the copy. Sink fires — producer - * retargets to the copy's destination and the copy is removed. */ - Func* f = new_func(&tc); - f->opt_rewritten = 1; - emit_phys_binop(f, f->entry, 21, 20, 19, tc.i32, BO_IADD); - emit_phys_copy(f, f->entry, 22, 21, tc.i32); - emit_ret_val(f, f->entry, 22, tc.i32); - - opt_combine(f); - EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 0, - "single-use producer should sink into copy dst (copy removed)"); - Inst* add = &f->blocks[f->entry].insts[0]; - EXPECT(add->opnds[0].v.reg == 22, - "sunk producer's destination should become the copy's destination"); - - /* Lhs overlap: producer's lhs source operand equals the copy dst. - * retarget_producer_legal allows this without swap; sink fires and the - * binop becomes `add r20, r20, r19` (dst==lhs). */ - Func* lhs = new_func(&tc); - lhs->opt_rewritten = 1; - emit_phys_binop(lhs, lhs->entry, 21, 20, 19, tc.i32, BO_IADD); - emit_phys_copy(lhs, lhs->entry, 20, 21, tc.i32); - emit_ret_val(lhs, lhs->entry, 20, tc.i32); - - opt_combine(lhs); - add = &lhs->blocks[lhs->entry].insts[0]; - EXPECT(count_op(lhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 && - add->opnds[1].v.reg == 20 && add->opnds[2].v.reg == 19, - "lhs-overlap sink should produce add r20, r20, r19 without swap"); - - /* Rhs overlap on a commutative op: sink commutes the binop so the new - * destination lands on the lhs. */ - Func* rhs = new_func(&tc); - rhs->opt_rewritten = 1; - emit_phys_binop(rhs, rhs->entry, 21, 19, 20, tc.i32, BO_IADD); - emit_phys_copy(rhs, rhs->entry, 20, 21, tc.i32); - emit_ret_val(rhs, rhs->entry, 20, tc.i32); - - opt_combine(rhs); - add = &rhs->blocks[rhs->entry].insts[0]; - EXPECT(count_op(rhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 && - add->opnds[1].v.reg == 20 && add->opnds[2].v.reg == 19, - "rhs-overlap sink should swap commutative binop to land on lhs"); - - Func* retreg = new_func(&tc); - retreg->opt_rewritten = 1; - retreg->opt_ret_regs[RC_INT] = 1u << 20; - emit_phys_binop(retreg, retreg->entry, 21, 19, 20, tc.i32, BO_IADD); - emit_ret_val(retreg, retreg->entry, 21, tc.i32); - - opt_combine(retreg); - add = &retreg->blocks[retreg->entry].insts[0]; - IRRetAux* raux = (IRRetAux*)retreg->blocks[retreg->entry].insts[1].extra.aux; - EXPECT(add->opnds[0].v.reg == 20 && add->opnds[1].v.reg == 20 && - add->opnds[2].v.reg == 19 && raux->val.storage.v.reg == 20, - "adjacent scalar return producer should retarget to ABI return reg"); - - Func* sub = new_func(&tc); - sub->opt_rewritten = 1; - emit_phys_binop(sub, sub->entry, 21, 19, 20, tc.i32, BO_ISUB); - emit_phys_copy(sub, sub->entry, 20, 21, tc.i32); - emit_ret_val(sub, sub->entry, 20, tc.i32); - - opt_combine(sub); - EXPECT(count_op(sub, IR_COPY) == 1, - "noncommutative rhs overlap should not retarget producer"); - - Func* multi = new_func(&tc); - multi->opt_rewritten = 1; - emit_phys_binop(multi, multi->entry, 21, 20, 19, tc.i32, BO_IADD); - emit_phys_copy(multi, multi->entry, 22, 21, tc.i32); - emit_phys_binop(multi, multi->entry, 23, 21, 18, tc.i32, BO_IADD); - emit_ret_val(multi, multi->entry, 23, tc.i32); - - opt_combine(multi); - EXPECT(count_op(multi, IR_COPY) == 1, - "producer with two same-block uses should not retarget"); - - Func* call_f = new_func(&tc); - call_f->opt_rewritten = 1; - emit_phys_binop(call_f, call_f->entry, 21, 20, 19, tc.i32, BO_IADD); - emit_call_void(call_f, call_f->entry); - emit_phys_copy(call_f, call_f->entry, 22, 21, tc.i32); - emit_ret_val(call_f, call_f->entry, 22, tc.i32); - - opt_combine(call_f); - EXPECT(count_op(call_f, IR_COPY) == 1, - "producer-copy retargeting should not cross a call"); - - Func* call_live = new_func(&tc); - call_live->opt_rewritten = 1; - emit_phys_binop(call_live, call_live->entry, 19, 20, 21, tc.i64, BO_IADD); - emit_phys_copy(call_live, call_live->entry, 0, 19, tc.i64); - emit_call_void(call_live, call_live->entry); - emit_phys_binop(call_live, call_live->entry, 22, 19, 18, tc.i64, BO_IADD); - emit_ret_val(call_live, call_live->entry, 22, tc.i64); - - opt_combine(call_live); - add = &call_live->blocks[call_live->entry].insts[0]; - EXPECT(count_op(call_live, IR_COPY) == 1 && add->opnds[0].v.reg == 19, - "producer used after a call barrier should not sink into call arg"); - - Func* clobber = new_func(&tc); - clobber->opt_rewritten = 1; - emit_phys_binop(clobber, clobber->entry, 21, 20, 19, tc.i32, BO_IADD); - Inst* li = ir_emit(clobber, clobber->entry, IR_LOAD_IMM); - li->opnds = arena_array(clobber->arena, Operand, 1); - li->opnds[0] = op_reg_(20, tc.i32); - li->nopnds = 1; - li->extra.imm = 7; - emit_phys_copy(clobber, clobber->entry, 20, 21, tc.i32); - emit_ret_val(clobber, clobber->entry, 20, tc.i32); - - opt_combine(clobber); - EXPECT(count_op(clobber, IR_COPY) == 1, - "producer retargeting should not cross a destination clobber"); - - Func* dst_live = new_func(&tc); - dst_live->opt_rewritten = 1; - li = ir_emit(dst_live, dst_live->entry, IR_LOAD_IMM); - li->opnds = arena_array(dst_live->arena, Operand, 1); - li->opnds[0] = op_reg_(24, tc.i32); - li->nopnds = 1; - li->extra.imm = 0; - emit_phys_copy(dst_live, dst_live->entry, 21, 20, tc.i32); - emit_phys_copy(dst_live, dst_live->entry, 20, 24, tc.i32); - emit_ret_val(dst_live, dst_live->entry, 20, tc.i32); - - opt_combine(dst_live); - EXPECT(dst_live->blocks[dst_live->entry].insts[0].opnds[0].v.reg == 24 && - count_op(dst_live, IR_COPY) == 2, - "producer retargeting should not clobber an intervening dst use"); - - Func* liveout = new_func(&tc); - liveout->opt_rewritten = 1; - u32 succ = ir_block_new(liveout); - emit_phys_binop(liveout, liveout->entry, 21, 20, 19, tc.i32, BO_IADD); - emit_phys_copy(liveout, liveout->entry, 22, 21, tc.i32); - emit_br_to(liveout, liveout->entry, succ); - emit_ret_val(liveout, succ, 21, tc.i32); - - opt_combine(liveout); - EXPECT(count_op(liveout, IR_COPY) == 1, - "producer live out of the block should not retarget to copy dst"); - - tc_fini(&tc); -} - -static void opt_combine_keeps_unsafe_and_multiuse_defs(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - f->opt_rewritten = 1; - - Inst* cp = ir_emit(f, f->entry, IR_COPY); - cp->opnds = arena_array(f->arena, Operand, 2); - cp->opnds[0] = op_reg_(10, tc.i32); - cp->opnds[1] = op_reg_(11, tc.i32); - cp->nopnds = 2; - - Inst* clobber = ir_emit(f, f->entry, IR_LOAD_IMM); - clobber->opnds = arena_array(f->arena, Operand, 1); - clobber->opnds[0] = op_reg_(11, tc.i32); - clobber->nopnds = 1; - clobber->extra.imm = 4; - - Inst* add = ir_emit(f, f->entry, IR_BINOP); - add->opnds = arena_array(f->arena, Operand, 3); - add->opnds[0] = op_reg_(19, tc.i32); - add->opnds[1] = op_reg_(10, tc.i32); - add->opnds[2] = op_reg_(12, tc.i32); - add->nopnds = 3; - add->extra.imm = BO_IADD; - emit_ret_val(f, f->entry, 19, tc.i32); - - opt_combine(f); - add = &f->blocks[f->entry].insts[2]; - EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 10, - "copy fold should not cross a clobber of the source register"); - - Func* call_f = new_func(&tc); - call_f->opt_rewritten = 1; - cp = ir_emit(call_f, call_f->entry, IR_COPY); - cp->opnds = arena_array(call_f->arena, Operand, 2); - cp->opnds[0] = op_reg_(10, tc.i32); - cp->opnds[1] = op_reg_(11, tc.i32); - cp->nopnds = 2; - emit_call_void(call_f, call_f->entry); - - add = ir_emit(call_f, call_f->entry, IR_BINOP); - add->opnds = arena_array(call_f->arena, Operand, 3); - add->opnds[0] = op_reg_(19, tc.i32); - add->opnds[1] = op_reg_(10, tc.i32); - add->opnds[2] = op_reg_(12, tc.i32); - add->nopnds = 3; - add->extra.imm = BO_IADD; - emit_ret_val(call_f, call_f->entry, 19, tc.i32); - - opt_combine(call_f); - add = &call_f->blocks[call_f->entry].insts[2]; - EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 10, - "copy fold should not cross a call clobber barrier"); - - Func* g = new_func(&tc); - g->opt_rewritten = 1; - Inst* li = ir_emit(g, g->entry, IR_LOAD_IMM); - li->opnds = arena_array(g->arena, Operand, 1); - li->opnds[0] = op_reg_(9, tc.i32); - li->nopnds = 1; - li->extra.imm = 7; - - add = ir_emit(g, g->entry, IR_BINOP); - add->opnds = arena_array(g->arena, Operand, 3); - add->opnds[0] = op_reg_(19, tc.i32); - add->opnds[1] = op_reg_(9, tc.i32); - add->opnds[2] = op_reg_(12, tc.i32); - add->nopnds = 3; - add->extra.imm = BO_IADD; - - Inst* cmp = ir_emit(g, g->entry, IR_CMP); - cmp->opnds = arena_array(g->arena, Operand, 3); - cmp->opnds[0] = op_reg_(20, tc.i32); - cmp->opnds[1] = op_reg_(9, tc.i32); - cmp->opnds[2] = op_reg_(13, tc.i32); - cmp->nopnds = 3; - cmp->extra.imm = CMP_EQ; - emit_ret_val(g, g->entry, 19, tc.i32); - - opt_combine(g); - add = &g->blocks[g->entry].insts[1]; - EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 9, - "multi-use load_imm should not fold into the first user"); - tc_fini(&tc); -} - -static void opt_combine_copy_chains_and_convert_pairs(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - f->opt_rewritten = 1; - - Inst* cp = ir_emit(f, f->entry, IR_COPY); - cp->opnds = arena_array(f->arena, Operand, 2); - cp->opnds[0] = op_reg_(10, tc.i32); - cp->opnds[1] = op_reg_(11, tc.i32); - cp->nopnds = 2; - - cp = ir_emit(f, f->entry, IR_COPY); - cp->opnds = arena_array(f->arena, Operand, 2); - cp->opnds[0] = op_reg_(12, tc.i32); - cp->opnds[1] = op_reg_(10, tc.i32); - cp->nopnds = 2; - - Inst* add = ir_emit(f, f->entry, IR_BINOP); - add->opnds = arena_array(f->arena, Operand, 3); - add->opnds[0] = op_reg_(19, tc.i32); - add->opnds[1] = op_reg_(12, tc.i32); - add->opnds[2] = op_reg_(13, tc.i32); - add->nopnds = 3; - add->extra.imm = BO_IADD; - emit_ret_val(f, f->entry, 19, tc.i32); - - opt_combine(f); - add = &f->blocks[f->entry].insts[2]; - EXPECT(add->opnds[1].kind == OPK_REG && add->opnds[1].v.reg == 11, - "single-use physical copy chains should collapse before DCE"); - opt_dce(f); - EXPECT(count_op(f, IR_COPY) == 0, - "collapsed copy chain should be removed by post-rewrite DCE"); - - Func* g = new_func(&tc); - g->opt_rewritten = 1; - emit_convert(g, g->entry, 10, 11, tc.i32, CV_BITCAST); - emit_convert(g, g->entry, 12, 10, tc.i32, CV_BITCAST); - emit_ret_val(g, g->entry, 12, tc.i32); - - opt_combine(g); - Inst* cv = &g->blocks[g->entry].insts[1]; - EXPECT(cv->opnds[1].kind == OPK_REG && cv->opnds[1].v.reg == 11, - "identical one-use convert pairs should fold to the original source"); - opt_dce(g); - EXPECT(count_op(g, IR_CONVERT) == 1, - "folded convert producer should be removed by post-rewrite DCE"); - tc_fini(&tc); -} - -/* Helper: emit IR_LOAD with arbitrary indirect operand. */ -static Inst* tt_emit_load_phys_indirect(Func* f, u32 b, Reg dst, Operand addr, - CfreeCgTypeId ty) { - Inst* in = ir_emit(f, b, IR_LOAD); - in->opnds = arena_array(f->arena, Operand, 2); - in->opnds[0] = op_reg_(dst, ty); - in->opnds[1] = addr; - in->nopnds = 2; - in->extra.mem = mem_unknown_(ty, 4); - return in; -} - -static void opt_combine_substitutes_into_indirect_base_and_index(void) { - TestCtx tc; - tc_init(&tc); - - /* Base substitution: `copy r4, r2 ; load r1, [r4 + 8]` → load uses r2. */ - Func* f = new_func(&tc); - f->opt_rewritten = 1; - emit_phys_copy(f, f->entry, 4, 2, tc.i32); - Operand addr = op_indirect_(4, tc.i32); - addr.v.ind.ofs = 8; - tt_emit_load_phys_indirect(f, f->entry, 1, addr, tc.i32); - emit_ret_val(f, f->entry, 1, tc.i32); - - opt_combine(f); - opt_dce(f); - Inst* load = NULL; - for (u32 i = 0; i < f->blocks[f->entry].ninsts; ++i) - if ((IROp)f->blocks[f->entry].insts[i].op == IR_LOAD) - load = &f->blocks[f->entry].insts[i]; - EXPECT(load && load->opnds[1].kind == OPK_INDIRECT && - load->opnds[1].v.ind.base == 2 && load->opnds[1].v.ind.ofs == 8 && - load->opnds[1].v.ind.index == (Reg)REG_NONE, - "single-use copy should substitute into indirect base"); - EXPECT(count_op(f, IR_COPY) == 0, - "copy whose only use is an indirect base should be dead-DCE'd"); - - /* Index substitution: `copy r4, r2 ; load r1, [r0 + r4 * 4]` */ - Func* g = new_func(&tc); - g->opt_rewritten = 1; - emit_phys_copy(g, g->entry, 4, 2, tc.i32); - Operand iaddr = op_indexed_indirect_(0, 4, 2, 0, tc.i32); - tt_emit_load_phys_indirect(g, g->entry, 1, iaddr, tc.i32); - emit_ret_val(g, g->entry, 1, tc.i32); - - opt_combine(g); - opt_dce(g); - load = NULL; - for (u32 i = 0; i < g->blocks[g->entry].ninsts; ++i) - if ((IROp)g->blocks[g->entry].insts[i].op == IR_LOAD) - load = &g->blocks[g->entry].insts[i]; - EXPECT(load && load->opnds[1].kind == OPK_INDIRECT && - load->opnds[1].v.ind.base == 0 && - load->opnds[1].v.ind.index == 2 && - load->opnds[1].v.ind.log2_scale == 2, - "single-use copy should substitute into indirect index"); - EXPECT(count_op(g, IR_COPY) == 0, - "copy whose only use is an indirect index should be dead-DCE'd"); - - Func* store_f = new_func(&tc); - store_f->opt_rewritten = 1; - emit_phys_copy(store_f, store_f->entry, 4, 2, tc.i32); - Inst* st = emit_store_indirect(store_f, store_f->entry, 4, 1, tc.i32, 0); - st->opnds[0].v.ind.ofs = 8; - ir_emit(store_f, store_f->entry, IR_RET); - - opt_combine(store_f); - st = NULL; - for (u32 i = 0; i < store_f->blocks[store_f->entry].ninsts; ++i) - if ((IROp)store_f->blocks[store_f->entry].insts[i].op == IR_STORE) - st = &store_f->blocks[store_f->entry].insts[i]; - EXPECT( - st && st->opnds[0].kind == OPK_INDIRECT && st->opnds[0].v.ind.base == 4, - "copy substitution should not rewrite store address bases"); - tc_fini(&tc); -} - -static void opt_combine_copy_chain_source_reuse_blocks_indirect_subst(void) { - TestCtx tc; - tc_init(&tc); - - Func* f = new_func(&tc); - f->opt_rewritten = 1; - emit_phys_copy(f, f->entry, 7, 8, tc.i64); - emit_phys_copy(f, f->entry, 14, 7, tc.i64); - Operand off_addr = op_indexed_indirect_(13, 12, 0, 0, tc.i32); - tt_emit_load_phys_indirect(f, f->entry, 8, off_addr, tc.i32); - emit_phys_binop(f, f->entry, 7, 21, 8, tc.i32, BO_IADD); - emit_phys_copy(f, f->entry, 8, 7, tc.i32); - emit_phys_copy(f, f->entry, 13, 8, tc.i32); - Operand field_addr = op_indirect_(14, tc.i64); - field_addr.v.ind.ofs = 8; - tt_emit_load_phys_indirect(f, f->entry, 12, field_addr, tc.i64); - emit_ret_val(f, f->entry, 12, tc.i64); - - opt_combine(f); - Inst* load = NULL; - for (u32 i = 0; i < f->blocks[f->entry].ninsts; ++i) - if ((IROp)f->blocks[f->entry].insts[i].op == IR_LOAD && - f->blocks[f->entry].insts[i].opnds[0].v.reg == 12) - load = &f->blocks[f->entry].insts[i]; - - EXPECT(load && load->opnds[1].kind == OPK_INDIRECT && - load->opnds[1].v.ind.base == 14, - "copy-chain indirect substitution must not cross reuse of source reg"); - - tc_fini(&tc); -} - -static void opt_combine_synthesizes_address_modes(void) { - TestCtx tc; - tc_init(&tc); - - /* reg + reg: `add r4, r2, r3 ; load r1, [r4]` → `load r1, [r2 + r3*1]` */ - Func* f = new_func(&tc); - f->opt_rewritten = 1; - emit_phys_binop(f, f->entry, 4, 2, 3, tc.i32, BO_IADD); - Operand addr = op_indirect_(4, tc.i32); - tt_emit_load_phys_indirect(f, f->entry, 1, addr, tc.i32); - emit_ret_val(f, f->entry, 1, tc.i32); - - opt_combine(f); - opt_dce(f); - Inst* load = NULL; - for (u32 i = 0; i < f->blocks[f->entry].ninsts; ++i) - if ((IROp)f->blocks[f->entry].insts[i].op == IR_LOAD) - load = &f->blocks[f->entry].insts[i]; - EXPECT(load && load->opnds[1].kind == OPK_INDIRECT && - load->opnds[1].v.ind.base == 2 && - load->opnds[1].v.ind.index == 3 && - load->opnds[1].v.ind.log2_scale == 0, - "add reg+reg producer should synthesize base+index addr mode"); - EXPECT(count_op(f, IR_BINOP) == 0, - "synthesized address mode should kill the producing add"); - - /* reg + imm: `add r4, r2, 16 ; load r1, [r4 + 4]` → `load r1, [r2 + 20]` */ - Func* g = new_func(&tc); - g->opt_rewritten = 1; - Inst* add = ir_emit(g, g->entry, IR_BINOP); - add->opnds = arena_array(g->arena, Operand, 3); - add->opnds[0] = op_reg_(4, tc.i32); - add->opnds[1] = op_reg_(2, tc.i32); - add->opnds[2] = op_imm_(16, tc.i32); - add->nopnds = 3; - add->extra.imm = BO_IADD; - Operand a2 = op_indirect_(4, tc.i32); - a2.v.ind.ofs = 4; - tt_emit_load_phys_indirect(g, g->entry, 1, a2, tc.i32); - emit_ret_val(g, g->entry, 1, tc.i32); - - opt_combine(g); - opt_dce(g); - load = NULL; - for (u32 i = 0; i < g->blocks[g->entry].ninsts; ++i) - if ((IROp)g->blocks[g->entry].insts[i].op == IR_LOAD) - load = &g->blocks[g->entry].insts[i]; - EXPECT(load && load->opnds[1].kind == OPK_INDIRECT && - load->opnds[1].v.ind.base == 2 && load->opnds[1].v.ind.ofs == 20 && - load->opnds[1].v.ind.index == (Reg)REG_NONE, - "add reg+imm producer should fold immediate into indirect offset"); - EXPECT(count_op(g, IR_BINOP) == 0, - "folded immediate offset should kill the producing add"); - - /* scale-from-shl chained with reg+reg add: produces scaled index. */ - Func* h = new_func(&tc); - h->opt_rewritten = 1; - Inst* shl = ir_emit(h, h->entry, IR_BINOP); - shl->opnds = arena_array(h->arena, Operand, 3); - shl->opnds[0] = op_reg_(4, tc.i32); - shl->opnds[1] = op_reg_(3, tc.i32); - shl->opnds[2] = op_imm_(2, tc.i32); - shl->nopnds = 3; - shl->extra.imm = BO_SHL; - emit_phys_binop(h, h->entry, 5, 2, 4, tc.i32, BO_IADD); - Operand a3 = op_indirect_(5, tc.i32); - tt_emit_load_phys_indirect(h, h->entry, 1, a3, tc.i32); - emit_ret_val(h, h->entry, 1, tc.i32); - - opt_combine(h); - opt_dce(h); - load = NULL; - for (u32 i = 0; i < h->blocks[h->entry].ninsts; ++i) - if ((IROp)h->blocks[h->entry].insts[i].op == IR_LOAD) - load = &h->blocks[h->entry].insts[i]; - EXPECT(load && load->opnds[1].kind == OPK_INDIRECT && - load->opnds[1].v.ind.base == 2 && - load->opnds[1].v.ind.index == 3 && - load->opnds[1].v.ind.log2_scale == 2, - "shl+add chain should synthesize base + scaled index across fixpoint"); - EXPECT(count_op(h, IR_BINOP) == 0, - "chained add+shl producers should both die after synthesis"); - - /* Blocked when consumer already has an index: cannot stack two indices. */ - Func* blk = new_func(&tc); - blk->opt_rewritten = 1; - emit_phys_binop(blk, blk->entry, 5, 2, 3, tc.i32, BO_IADD); - Operand a4 = op_indexed_indirect_(5, 6, 0, 0, tc.i32); - tt_emit_load_phys_indirect(blk, blk->entry, 1, a4, tc.i32); - emit_ret_val(blk, blk->entry, 1, tc.i32); - - opt_combine(blk); - load = NULL; - for (u32 i = 0; i < blk->blocks[blk->entry].ninsts; ++i) - if ((IROp)blk->blocks[blk->entry].insts[i].op == IR_LOAD) - load = &blk->blocks[blk->entry].insts[i]; - EXPECT(load && load->opnds[1].kind == OPK_INDIRECT && - load->opnds[1].v.ind.base == 5 && load->opnds[1].v.ind.index == 6, - "addr-mode synthesis should refuse to stack two indices"); - EXPECT(count_op(blk, IR_BINOP) == 1, - "blocked synthesis should leave the producing add in place"); - - /* base IADD reg+imm with a pre-existing index: imm-fold sub-rule (c) - * mutates only the offset and is safe even when index != REG_NONE. - * `add r5, r2, 12 ; load r1, [r5 + r6*2 + 4]` → `[r2 + r6*2 + 16]`. */ - Func* m = new_func(&tc); - m->opt_rewritten = 1; - Inst* madd = ir_emit(m, m->entry, IR_BINOP); - madd->opnds = arena_array(m->arena, Operand, 3); - madd->opnds[0] = op_reg_(5, tc.i32); - madd->opnds[1] = op_reg_(2, tc.i32); - madd->opnds[2] = op_imm_(12, tc.i32); - madd->nopnds = 3; - madd->extra.imm = BO_IADD; - Operand a5 = op_indexed_indirect_(5, 6, 1, 4, tc.i32); - tt_emit_load_phys_indirect(m, m->entry, 1, a5, tc.i32); - emit_ret_val(m, m->entry, 1, tc.i32); - - opt_combine(m); - opt_dce(m); - load = NULL; - for (u32 i = 0; i < m->blocks[m->entry].ninsts; ++i) - if ((IROp)m->blocks[m->entry].insts[i].op == IR_LOAD) - load = &m->blocks[m->entry].insts[i]; - EXPECT(load && load->opnds[1].kind == OPK_INDIRECT && - load->opnds[1].v.ind.base == 2 && - load->opnds[1].v.ind.index == 6 && - load->opnds[1].v.ind.log2_scale == 1 && - load->opnds[1].v.ind.ofs == 16, - "reg+imm base producer should imm-fold even when an index is set"); - EXPECT( - count_op(m, IR_BINOP) == 0, - "imm-fold synthesis with existing index should kill the producing add"); - - tc_fini(&tc); -} - -static void opt_combine_full_ext_of_ext_rules(void) { - TestCtx tc; - tc_init(&tc); - - /* sext-of-sext: outer 8 bytes, inner-source 1 byte. Outer signed, inner - * signed. Pattern B (w2 < w with outer signed) fires; outer chains - * directly to the inner source. */ - Func* a = new_func(&tc); - a->opt_rewritten = 1; - emit_convert_typed(a, a->entry, 10, 11, tc.i32, tc.i8, CV_SEXT); - emit_convert_typed(a, a->entry, 12, 10, tc.i64, tc.i32, CV_SEXT); - emit_ret_val(a, a->entry, 12, tc.i64); - - opt_combine(a); - Inst* cv = &a->blocks[a->entry].insts[1]; - EXPECT(cv->opnds[1].v.reg == 11, - "sext(sext) chain should chain to the innermost source"); - - /* uext-of-sext (the unsafe combination): outer unsigned, inner signed, - * outer wider than inner source — must be left alone (would lose - * sign info). */ - Func* b = new_func(&tc); - b->opt_rewritten = 1; - emit_convert_typed(b, b->entry, 10, 11, tc.i32, tc.i8, CV_SEXT); - emit_convert_typed(b, b->entry, 12, 10, tc.i64, tc.i32, CV_ZEXT); - emit_ret_val(b, b->entry, 12, tc.i64); - - opt_combine(b); - cv = &b->blocks[b->entry].insts[1]; - EXPECT(cv->opnds[1].v.reg == 10, - "uext(sext) chain must NOT collapse (would lose sign info)"); - - /* sext-of-uext (mixed but safe): outer signed picks up outer signedness; - * collapses to inner source. */ - Func* c = new_func(&tc); - c->opt_rewritten = 1; - emit_convert_typed(c, c->entry, 10, 11, tc.i32, tc.i8, CV_ZEXT); - emit_convert_typed(c, c->entry, 12, 10, tc.i64, tc.i32, CV_SEXT); - emit_ret_val(c, c->entry, 12, tc.i64); - - opt_combine(c); - cv = &c->blocks[c->entry].insts[1]; - EXPECT(cv->opnds[1].v.reg == 11, - "sext(uext) chain should chain to the innermost source"); - - /* Pattern A: outer width <= inner-source width. Outer can chain past the - * inner ext regardless of signedness combination. */ - Func* d = new_func(&tc); - d->opt_rewritten = 1; - emit_convert_typed(d, d->entry, 10, 11, tc.i64, tc.i32, CV_SEXT); - emit_convert_typed(d, d->entry, 12, 10, tc.i32, tc.i32, CV_ZEXT); - emit_ret_val(d, d->entry, 12, tc.i32); - - opt_combine(d); - cv = &d->blocks[d->entry].insts[1]; - EXPECT(cv->opnds[1].v.reg == 11, - "outer width <= inner-source width should chain past inner ext"); - - tc_fini(&tc); -} - -static void opt_dce_physical_dead_defs(void) { - TestCtx tc; - tc_init(&tc); - Func* f = new_func(&tc); - f->opt_rewritten = 1; - Inst* in = ir_emit(f, f->entry, IR_LOAD_IMM); - in->opnds = arena_array(f->arena, Operand, 1); - in->opnds[0] = op_reg_(19, tc.i32); - in->nopnds = 1; - in->extra.imm = 1; - ir_emit(f, f->entry, IR_RET); - - opt_dce(f); - EXPECT(count_op(f, IR_LOAD_IMM) == 0, - "post-rewrite DCE should remove dead physical register defs"); - - Func* g = new_func(&tc); - g->opt_rewritten = 1; - in = ir_emit(g, g->entry, IR_LOAD_IMM); - in->opnds = arena_array(g->arena, Operand, 1); - in->opnds[0] = op_reg_(19, tc.i32); - in->nopnds = 1; - in->extra.imm = 1; - emit_ret_val(g, g->entry, 19, tc.i32); - - opt_dce(g); - EXPECT(count_op(g, IR_LOAD_IMM) == 1, - "post-rewrite DCE should keep physical defs used by ret"); - tc_fini(&tc); -} - -static void opt_dead_def_keeps_observable_loads(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_VOLATILE); - Val dead = add_val(f, tc.i32); - emit_load_local(f, f->entry, dead, fs, tc.i32, MF_VOLATILE); - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_dead_def_elim(f); - EXPECT(count_op(f, IR_LOAD) == 1, - "dead_def_elim should keep volatile loads even when the result dies"); - - Func* g = new_func(&tc); - opt_machinize(g, &mock.base); - fs = add_frame_slot(g, tc.i32, FS_LOCAL, 4, 0); - dead = add_val(g, tc.i32); - emit_load_local(g, g->entry, dead, fs, tc.i32, MF_ATOMIC); - opt_build_cfg(g); - opt_build_loop_tree(g); - opt_dead_def_elim(g); - EXPECT(count_op(g, IR_LOAD) == 1, - "dead_def_elim should keep atomic loads even when the result dies"); - tc_fini(&tc); -} - -static void opt_dead_def_elim_test(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - Func* f = new_func(&tc); - opt_machinize(f, &mock.base); - Val a = add_val(f, tc.i32); - Val b = add_val(f, tc.i32); - emit_load_imm(f, f->entry, a, tc.i32, 1); - emit_copy(f, f->entry, b, a, tc.i32); /* b is dead after this */ - emit_ret_val(f, f->entry, a, tc.i32); /* a stays live */ - opt_build_cfg(f); - opt_build_loop_tree(f); - opt_dead_def_elim(f); - EXPECT(count_op(f, IR_COPY) == 0, - "dead copy should be eliminated by dead_def_elim"); - EXPECT(count_op(f, IR_LOAD_IMM) == 1, - "live load_imm should survive dead_def_elim"); - tc_fini(&tc); -} - -static void opt_planned_call_replay_resolves_arg_cycle(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - Func* f = new_func(&tc); - f->opt_scratch_regs[RC_INT][0] = 9; - f->opt_scratch_reg_count[RC_INT] = 1; - - Inst* in = ir_emit(f, f->entry, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - in->extra.aux = aux; - aux->plan_valid = 1; - aux->use_plan_replay = 1; - aux->plan.callee = op_reg_(8, tc.i64); - aux->plan.args = arena_zarray(f->arena, CGCallPlanMove, 2); - aux->plan.nargs = 2; - aux->plan.args[0].src = op_reg_(1, tc.i64); - aux->plan.args[0].dst_kind = CG_CALL_PLAN_REG; - aux->plan.args[0].cls = RC_INT; - aux->plan.args[0].dst_reg = 2; - aux->plan.args[0].mem = mem_unknown_(tc.i64, 8); - aux->plan.args[1].src = op_reg_(2, tc.i64); - aux->plan.args[1].dst_kind = CG_CALL_PLAN_REG; - aux->plan.args[1].cls = RC_INT; - aux->plan.args[1].dst_reg = 1; - aux->plan.args[1].mem = mem_unknown_(tc.i64, 8); - - opt_emit(tc.c, f, &mock.base); - - EXPECT(mock.emit_call_plan_calls == 1, - "planned call should use emit_call_plan"); - EXPECT(mock.copy_calls == 3, - "two-register cycle should need three copies, got %d", - mock.copy_calls); - EXPECT(mock.copy_dst[0].v.reg == 9 && mock.copy_src[0].v.reg == 1, - "cycle should save first source to scratch"); - EXPECT(mock.copy_dst[1].v.reg == 1 && mock.copy_src[1].v.reg == 2, - "cycle should rotate second arg into first ABI reg"); - EXPECT(mock.copy_dst[2].v.reg == 2 && mock.copy_src[2].v.reg == 9, - "cycle should restore scratch into second ABI reg"); - tc_fini(&tc); -} - -static void opt_planned_call_replay_preserves_indirect_callee_arg_reg(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - Func* f = new_func(&tc); - f->opt_scratch_regs[RC_INT][0] = 9; - f->opt_scratch_reg_count[RC_INT] = 1; - - Inst* in = ir_emit(f, f->entry, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - in->extra.aux = aux; - aux->plan_valid = 1; - aux->use_plan_replay = 1; - aux->plan.callee = op_reg_(1, tc.i64); - aux->plan.args = arena_zarray(f->arena, CGCallPlanMove, 1); - aux->plan.nargs = 1; - aux->plan.args[0].src = op_reg_(2, tc.i64); - aux->plan.args[0].dst_kind = CG_CALL_PLAN_REG; - aux->plan.args[0].cls = RC_INT; - aux->plan.args[0].dst_reg = 1; - aux->plan.args[0].mem = mem_unknown_(tc.i64, 8); - - opt_emit(tc.c, f, &mock.base); - - EXPECT(mock.emit_call_plan_calls == 1, - "planned indirect call should use emit_call_plan"); - EXPECT(mock.copy_calls >= 2, - "callee-in-arg-register hazard should copy callee plus arg"); - EXPECT(mock.copy_dst[0].v.reg == 9 && mock.copy_src[0].v.reg == 1, - "callee should be saved before arg setup overwrites its register"); - EXPECT( - mock.last_plan_callee.kind == OPK_REG && mock.last_plan_callee.v.reg == 9, - "emit_call_plan should receive scratch callee register"); - tc_fini(&tc); -} - -static void opt_planned_call_replay_stores_stack_args(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - Func* f = new_func(&tc); - Inst* in = ir_emit(f, f->entry, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - in->extra.aux = aux; - aux->plan_valid = 1; - aux->use_plan_replay = 1; - aux->plan.callee = op_reg_(8, tc.i64); - aux->plan.args = arena_zarray(f->arena, CGCallPlanMove, 1); - aux->plan.nargs = 1; - aux->plan.args[0].src = op_reg_(1, tc.i64); - aux->plan.args[0].dst_kind = CG_CALL_PLAN_STACK; - aux->plan.args[0].cls = RC_INT; - aux->plan.args[0].mem = mem_unknown_(tc.i64, 8); - - opt_emit(tc.c, f, &mock.base); - - EXPECT(mock.emit_call_plan_calls == 1, - "stack-arg plans should use emit_call_plan"); - EXPECT(mock.call_calls == 0, "legacy call fallback should not be used"); - EXPECT(mock.store_call_arg_calls == 1, - "planned stack arg should be materialized by opt replay"); - EXPECT(mock.last_stack_arg.stack_offset == 0, - "planned stack arg offset should be preserved"); - tc_fini(&tc); -} - -static void opt_planned_call_replay_materializes_address_args(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - Func* f = new_func(&tc); - FrameSlot fs = add_frame_slot(f, tc.i64, FS_LOCAL, 8, 0); - Inst* in = ir_emit(f, f->entry, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - in->extra.aux = aux; - aux->plan_valid = 1; - aux->use_plan_replay = 1; - aux->plan.has_sret = 1; - aux->plan.callee = op_reg_(8, tc.i64); - aux->plan.args = arena_zarray(f->arena, CGCallPlanMove, 2); - aux->plan.nargs = 2; - aux->plan.args[0].src = op_local_(fs, tc.i64); - aux->plan.args[0].src_kind = CG_CALL_PLAN_SRC_ADDR; - aux->plan.args[0].dst_kind = CG_CALL_PLAN_REG; - aux->plan.args[0].cls = RC_INT; - aux->plan.args[0].dst_reg = 1; - aux->plan.args[0].mem = mem_unknown_(tc.i64, 8); - aux->plan.args[1].src = op_local_(fs, tc.i64); - aux->plan.args[1].src_kind = CG_CALL_PLAN_SRC_ADDR; - aux->plan.args[1].dst_kind = CG_CALL_PLAN_STACK; - aux->plan.args[1].cls = RC_INT; - aux->plan.args[1].stack_offset = 8; - aux->plan.args[1].mem = mem_unknown_(tc.i64, 8); - - opt_emit(tc.c, f, &mock.base); - - EXPECT(mock.emit_call_plan_calls == 1, - "sret-shaped plans should use planned replay"); - EXPECT(mock.addr_of_calls == 1, - "register address arg should materialize with addr_of"); - EXPECT(mock.load_calls == 0, "address args should not be loaded as values"); - EXPECT(mock.store_call_arg_calls == 1 && - mock.last_stack_arg.src_kind == CG_CALL_PLAN_SRC_ADDR, - "stack address arg should stay marked as an address"); - tc_fini(&tc); -} - -static void opt_planned_call_replay_resolves_return_reg_collision(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - Func* f = new_func(&tc); - f->opt_scratch_regs[RC_INT][0] = 9; - f->opt_scratch_reg_count[RC_INT] = 1; - - Inst* in = ir_emit(f, f->entry, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - in->extra.aux = aux; - aux->plan_valid = 1; - aux->use_plan_replay = 1; - aux->plan.callee = op_reg_(8, tc.i64); - aux->plan.rets = arena_zarray(f->arena, CGCallPlanRet, 2); - aux->plan.nrets = 2; - aux->plan.rets[0].dst = op_reg_(2, tc.i64); - aux->plan.rets[0].cls = RC_INT; - aux->plan.rets[0].src_reg = 1; - aux->plan.rets[0].mem = mem_unknown_(tc.i64, 8); - aux->plan.rets[1].dst = op_reg_(1, tc.i64); - aux->plan.rets[1].cls = RC_INT; - aux->plan.rets[1].src_reg = 2; - aux->plan.rets[1].mem = mem_unknown_(tc.i64, 8); - - opt_emit(tc.c, f, &mock.base); - - EXPECT(mock.emit_call_plan_calls == 1, - "return collision call should use emit_call_plan"); - EXPECT(mock.copy_calls == 3, - "two-register return collision should need three copies, got %d", - mock.copy_calls); - EXPECT( - mock.event_count >= 4 && mock.events[0] == 'p' && mock.events[1] == 'c', - "return copies should occur after the planned call branch"); - EXPECT(mock.copy_dst[0].v.reg == 9 && mock.copy_src[0].v.reg == 1, - "return cycle should save first return register to scratch"); - EXPECT(mock.copy_dst[1].v.reg == 1 && mock.copy_src[1].v.reg == 2, - "return cycle should rotate second return into first destination"); - EXPECT(mock.copy_dst[2].v.reg == 2 && mock.copy_src[2].v.reg == 9, - "return cycle should restore scratch into second destination"); - tc_fini(&tc); -} - -static void opt_planned_call_replay_stores_stack_sources_before_clobber(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - Func* f = new_func(&tc); - Inst* in = ir_emit(f, f->entry, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - in->extra.aux = aux; - aux->plan_valid = 1; - aux->use_plan_replay = 1; - aux->plan.callee = op_reg_(8, tc.i64); - aux->plan.args = arena_zarray(f->arena, CGCallPlanMove, 2); - aux->plan.nargs = 2; - aux->plan.args[0].src = op_reg_(2, tc.i64); - aux->plan.args[0].dst_kind = CG_CALL_PLAN_REG; - aux->plan.args[0].cls = RC_INT; - aux->plan.args[0].dst_reg = 1; - aux->plan.args[0].mem = mem_unknown_(tc.i64, 8); - aux->plan.args[1].src = op_reg_(1, tc.i64); - aux->plan.args[1].dst_kind = CG_CALL_PLAN_STACK; - aux->plan.args[1].cls = RC_INT; - aux->plan.args[1].stack_offset = 0; - aux->plan.args[1].mem = mem_unknown_(tc.i64, 8); - - opt_emit(tc.c, f, &mock.base); - - EXPECT(mock.emit_call_plan_calls == 1, - "stack-source hazard call should use emit_call_plan"); - EXPECT(mock.store_call_arg_calls == 1 && mock.copy_calls == 1, - "expected one stack store and one register arg copy"); - EXPECT( - mock.event_count >= 3 && mock.events[0] == 's' && mock.events[1] == 'c' && - mock.events[2] == 'p', - "stack arg source should be stored before its source reg is clobbered"); - EXPECT(mock.last_stack_arg.src.kind == OPK_REG && - mock.last_stack_arg.src.v.reg == 1, - "stack arg should use the original source register"); - tc_fini(&tc); -} - -static void opt_planned_tail_call_uses_replay_without_return_moves(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - Func* f = new_func(&tc); - Inst* in = ir_emit(f, f->entry, IR_CALL); - IRCallAux* aux = arena_znew(f->arena, IRCallAux); - in->extra.aux = aux; - aux->desc.flags = CG_CALL_TAIL; - aux->plan_valid = 1; - aux->use_plan_replay = 1; - aux->plan.flags = CG_CALL_TAIL; - aux->plan.callee = op_reg_(8, tc.i64); - aux->plan.args = arena_zarray(f->arena, CGCallPlanMove, 1); - aux->plan.nargs = 1; - aux->plan.args[0].src = op_reg_(2, tc.i64); - aux->plan.args[0].dst_kind = CG_CALL_PLAN_REG; - aux->plan.args[0].cls = RC_INT; - aux->plan.args[0].dst_reg = 1; - aux->plan.args[0].mem = mem_unknown_(tc.i64, 8); - aux->plan.rets = arena_zarray(f->arena, CGCallPlanRet, 1); - aux->plan.nrets = 1; - aux->plan.rets[0].dst = op_reg_(3, tc.i64); - aux->plan.rets[0].cls = RC_INT; - aux->plan.rets[0].src_reg = 1; - aux->plan.rets[0].mem = mem_unknown_(tc.i64, 8); - - opt_emit(tc.c, f, &mock.base); - - EXPECT(mock.emit_call_plan_calls == 1, - "tail call should use planned emit path"); - EXPECT(mock.call_calls == 0, "tail call should not use legacy call fallback"); - EXPECT(mock.copy_calls == 1, - "tail call should materialize args but skip return extraction"); - tc_fini(&tc); -} - -static void opt_emit_preserves_physical_reg_zero(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - Func* f = new_func(&tc); - f->opt_rewritten = 1; - Inst* in = ir_emit(f, f->entry, IR_LOAD_IMM); - in->opnds = arena_array(f->arena, Operand, 1); - in->opnds[0] = op_reg_(0, tc.i32); - in->nopnds = 1; - in->extra.imm = 42; - emit_ret_val(f, f->entry, 0, tc.i32); - - opt_emit(tc.c, f, &mock.base); - - EXPECT(mock.load_imm_calls == 1, "physical r0 load should be emitted"); - EXPECT(mock.last_load_imm_dst == 0, - "identity replay should preserve physical r0, got r%u", - (unsigned)mock.last_load_imm_dst); - tc_fini(&tc); -} - -/* ============================================================ - * End-to-end test — drive the opt-wrapped CGTarget through the - * public CGTarget interface, let func_end run the full pipeline, - * and assert on what the mock backend received. - * ============================================================ */ - -static void opt_emit_no_virtual_alloc(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - CGFuncDesc fd; - CfreeCgFuncSig sig; - memset(&fd, 0, sizeof fd); - memset(&sig, 0, sizeof sig); - sig.ret = tc.i32; - sig.call_conv = CFREE_CG_CC_TARGET_C; - fd.fn_type = cfree_cg_type_func(tc.c, sig); - opt->func_begin(opt, &fd); - - Reg a = 1; - opt->load_imm(opt, op_reg_(a, tc.i32), 42); - CGABIValue retv = {0}; - retv.type = tc.i32; - retv.storage = op_reg_(a, tc.i32); - opt->ret(opt, &retv); - opt->func_end(opt); - - EXPECT(mock.func_begin_plan_calls == (int)OPT_REG_CLASSES, - "opt_emit should plan hard regs before backend func_begin"); - EXPECT(mock.plan_regs[RC_INT] == 1, - "opt_emit should plan only the replayed hard reg, got %d", - mock.plan_regs[RC_INT]); - EXPECT(mock.reserve_calls[RC_INT] == 1, - "opt_emit should reserve only the replayed hard reg, got %d", - mock.reserve_calls[RC_INT]); - EXPECT(mock.load_imm_calls == 1, "expected one emitted load_imm"); - EXPECT(mock.last_load_imm_dst == 19, - "emitted hard dst should be r19, got r%u", - (unsigned)mock.last_load_imm_dst); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void opt_records_const_bytes_by_value(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg fp_pool[] = {19}; - static const Reg fp_scratch[] = {9, 10}; - mock_set_pool(&mock, RC_FP, fp_pool, 1, fp_scratch, 2, 0x4007FFFFu); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - CGFuncDesc fd; - CfreeCgFuncSig sig; - memset(&fd, 0, sizeof fd); - memset(&sig, 0, sizeof sig); - sig.ret = tc.f64; - sig.call_conv = CFREE_CG_CC_TARGET_C; - fd.fn_type = cfree_cg_type_func(tc.c, sig); - opt->func_begin(opt, &fd); - - u8 bytes[8] = {0, 0, 0, 0, 0, 0, 0x45, 0x40}; /* 42.0 */ - ConstBytes cb; - memset(&cb, 0, sizeof cb); - cb.type = tc.f64; - cb.bytes = bytes; - cb.size = sizeof bytes; - cb.align = 8; - Operand dst = op_reg_cls_(1, tc.f64, RC_FP); - opt->load_const(opt, dst, cb); - memset(bytes, 0xa5, sizeof bytes); - - CGABIValue retv = {0}; - retv.type = tc.f64; - retv.storage = dst; - opt->ret(opt, &retv); - opt->func_end(opt); - - EXPECT(mock.load_const_calls == 1, "expected one emitted load_const"); - EXPECT(mock.last_const_size == 8, "expected 8-byte FP constant"); - EXPECT(mock.last_const_bytes[0] == 0 && mock.last_const_bytes[6] == 0x45 && - mock.last_const_bytes[7] == 0x40, - "opt replay should preserve original const bytes"); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void opt_cmp_branch_keeps_fallthrough_after_block_growth(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - CGFuncDesc fd; - CfreeCgFuncSig sig; - memset(&fd, 0, sizeof fd); - memset(&sig, 0, sizeof sig); - sig.ret = tc.i32; - sig.call_conv = CFREE_CG_CC_TARGET_C; - fd.fn_type = cfree_cg_type_func(tc.c, sig); - opt->func_begin(opt, &fd); - - Label labels[7]; - for (u32 i = 0; i < 7; ++i) labels[i] = opt->label_new(opt); - for (u32 i = 0; i < 7; ++i) opt->label_place(opt, labels[i]); - - opt->cmp_branch(opt, CMP_EQ, op_imm_(0, tc.i32), op_imm_(1, tc.i32), - labels[0]); - opt->load_imm(opt, op_reg_(1, tc.i32), 42); - CGABIValue retv = {0}; - retv.type = tc.i32; - retv.storage = op_reg_(1, tc.i32); - opt->ret(opt, &retv); - opt->func_end(opt); - - EXPECT(mock.cmp_branch_calls == 1, "expected one emitted cmp_branch"); - EXPECT(mock.load_imm_calls == 1, - "cmp_branch fallthrough block should remain reachable"); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void begin_mock_opt_func(TestCtx* tc, CGTarget* opt, - CfreeCgTypeId ret_ty); -static CGLocalDesc local_desc_(CfreeCgTypeId ty, u32 size, u32 align, - u32 flags); - -static void opt_known_frame_marks_empty_leaf_omittable(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {2}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - begin_mock_opt_func(&tc, opt, tc.i32); - - opt->load_imm(opt, op_reg_(1, tc.i32), 42); - CGABIValue retv = {0}; - retv.type = tc.i32; - retv.storage = op_reg_(1, tc.i32); - opt->ret(opt, &retv); - opt->func_end(opt); - - EXPECT(mock.known_frame_calls == 1, "O1 should use known-frame replay"); - EXPECT(mock.last_known_frame.may_omit_frame, - "empty leaf known frame should be marked omittable"); - EXPECT(mock.last_known_frame.nslots == 0, "empty leaf should have no slots"); - EXPECT(!mock.last_known_frame.has_call, "empty leaf should not report calls"); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void opt_known_frame_keeps_frame_for_slot_call_and_alloca(void) { - TestCtx tc; - tc_init(&tc); - static const Reg pool[] = {2, 3, 4}; - static const Reg scratch[] = {9, 10}; - - MockCGTarget slot_mock; - mock_init(&slot_mock, tc.c); - mock_set_pool(&slot_mock, RC_INT, pool, 3, scratch, 2, 0x4007FFFFu); - CGTarget* slot_opt = opt_cgtarget_new(tc.c, &slot_mock.base, 1); - begin_mock_opt_func(&tc, slot_opt, tc.i32); - CGLocalDesc ld = - local_desc_(tc.i32, 4, 4, CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED); - CGLocalStorage lst = slot_opt->local(slot_opt, &ld); - Operand addr = op_reg_(1, cfree_cg_type_ptr(tc.c, tc.i32, 0)); - slot_opt->local_addr(slot_opt, addr, &ld, lst); - slot_opt->load_imm(slot_opt, op_reg_(2, tc.i32), 7); - CGABIValue sret = {0}; - sret.type = tc.i32; - sret.storage = op_reg_(2, tc.i32); - slot_opt->ret(slot_opt, &sret); - slot_opt->func_end(slot_opt); - EXPECT(!slot_mock.last_known_frame.may_omit_frame, - "frame slot should block frame omission"); - EXPECT(slot_mock.last_known_frame.nslots != 0, - "frame-backed local should be in known-frame slots"); - slot_opt->destroy(slot_opt); - - MockCGTarget call_mock; - mock_init(&call_mock, tc.c); - mock_set_pool(&call_mock, RC_INT, pool, 3, scratch, 2, 0x4007FFFFu); - CGTarget* call_opt = opt_cgtarget_new(tc.c, &call_mock.base, 1); - begin_mock_opt_func(&tc, call_opt, tc.i32); - CGCallDesc cd; - memset(&cd, 0, sizeof cd); - cd.fn_type = cfree_cg_type_func(tc.c, (CfreeCgFuncSig){.ret = tc.i32}); - cd.callee = op_global_(OBJ_SYM_NONE, 0, cd.fn_type); - call_opt->call(call_opt, &cd); - call_opt->load_imm(call_opt, op_reg_(1, tc.i32), 3); - CGABIValue cret = {0}; - cret.type = tc.i32; - cret.storage = op_reg_(1, tc.i32); - call_opt->ret(call_opt, &cret); - call_opt->func_end(call_opt); - EXPECT(call_mock.last_known_frame.has_call, - "known-frame summary should report calls"); - EXPECT(!call_mock.last_known_frame.may_omit_frame, - "non-leaf function should block frame omission"); - call_opt->destroy(call_opt); - - MockCGTarget alloca_mock; - mock_init(&alloca_mock, tc.c); - mock_set_pool(&alloca_mock, RC_INT, pool, 3, scratch, 2, 0x4007FFFFu); - CGTarget* alloca_opt = opt_cgtarget_new(tc.c, &alloca_mock.base, 1); - begin_mock_opt_func(&tc, alloca_opt, tc.i32); - alloca_opt->alloca_(alloca_opt, - op_reg_(1, cfree_cg_type_ptr(tc.c, tc.i32, 0)), - op_imm_(16, tc.i32), 16); - alloca_opt->load_imm(alloca_opt, op_reg_(2, tc.i32), 5); - CGABIValue aret = {0}; - aret.type = tc.i32; - aret.storage = op_reg_(2, tc.i32); - alloca_opt->ret(alloca_opt, &aret); - alloca_opt->func_end(alloca_opt); - EXPECT(alloca_mock.last_known_frame.has_alloca, - "known-frame summary should report alloca"); - EXPECT(!alloca_mock.last_known_frame.may_omit_frame, - "alloca should block frame omission"); - alloca_opt->destroy(alloca_opt); - - tc_fini(&tc); -} - -static void begin_mock_opt_func(TestCtx* tc, CGTarget* opt, - CfreeCgTypeId ret_ty) { - CGFuncDesc fd; - CfreeCgFuncSig sig; - memset(&fd, 0, sizeof fd); - memset(&sig, 0, sizeof sig); - sig.ret = ret_ty; - sig.call_conv = CFREE_CG_CC_TARGET_C; - fd.fn_type = cfree_cg_type_func(tc->c, sig); - opt->func_begin(opt, &fd); -} - -static CGLocalDesc local_desc_(CfreeCgTypeId ty, u32 size, u32 align, - u32 flags) { - CGLocalDesc d; - memset(&d, 0, sizeof d); - d.type = ty; - d.size = size; - d.align = align; - d.flags = flags; - return d; -} - -static CGParamDesc param_desc_(CfreeCgTypeId ty, u32 size, u32 align, - u32 flags) { - CGParamDesc d; - memset(&d, 0, sizeof d); - d.type = ty; - d.size = size; - d.align = align; - d.flags = flags; - return d; -} - -static void opt_local_hook_chooses_register_for_scalar(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - begin_mock_opt_func(&tc, opt, tc.i32); - - CGLocalDesc d = local_desc_(tc.i32, 4, 4, 0); - CGLocalStorage st = opt->local(opt, &d); - EXPECT(st.kind == CG_LOCAL_STORAGE_REG, - "non-address-taken scalar local should be register-backed"); - EXPECT(st.v.reg != (Reg)REG_NONE, "register-backed local needs a vreg"); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void opt_param_hook_chooses_register_for_scalar(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - begin_mock_opt_func(&tc, opt, tc.i32); - - CGParamDesc d = param_desc_(tc.i32, 4, 4, 0); - CGLocalStorage st = opt->param(opt, &d); - EXPECT(st.kind == CG_LOCAL_STORAGE_REG, - "non-address-taken scalar param should be register-backed"); - EXPECT(st.v.reg != (Reg)REG_NONE, "register-backed param needs a vreg"); - - CGABIValue retv = {0}; - retv.type = tc.i32; - retv.storage = op_reg_(st.v.reg, tc.i32); - opt->ret(opt, &retv); - opt->func_end(opt); - - EXPECT(mock.param_calls == 1, "param should replay to wrapped backend"); - EXPECT(mock.last_param_storage.kind == CG_LOCAL_STORAGE_REG, - "replayed scalar param should remain register-backed"); - EXPECT(mock.last_param_storage.v.reg == 19, - "replayed param storage should be allocated hard reg r19, got r%u", - (unsigned)mock.last_param_storage.v.reg); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void opt_param_memory_required_uses_frame(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - begin_mock_opt_func(&tc, opt, tc.i32); - - CGParamDesc d = - param_desc_(tc.i32, 4, 4, CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED); - CGLocalStorage st = opt->param(opt, &d); - EXPECT(st.kind == CG_LOCAL_STORAGE_FRAME, - "memory-required param should be frame-backed"); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void opt_local_addr_taken_uses_frame_and_replays_addr_of(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 1, scratch, 2, 0x4007FFFFu); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - begin_mock_opt_func(&tc, opt, ptr_ty); - - CGLocalDesc d = - local_desc_(tc.i32, 4, 4, CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED); - CGLocalStorage st = opt->local(opt, &d); - EXPECT(st.kind == CG_LOCAL_STORAGE_FRAME, - "address-taken local should be frame-backed"); - - Operand addr = op_reg_(1, ptr_ty); - opt->local_addr(opt, addr, &d, st); - CGABIValue retv = {0}; - retv.type = ptr_ty; - retv.storage = addr; - opt->ret(opt, &retv); - opt->func_end(opt); - - EXPECT(mock.addr_of_calls == 1, - "frame-backed local address should replay as addr_of"); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void opt_register_local_addr_frame_homes(void) { - TestCtx tc; - tc_init(&tc); - MockCGTarget mock; - mock_init(&mock, tc.c); - static const Reg pool[] = {19, 20}; - static const Reg scratch[] = {9, 10}; - mock_set_pool(&mock, RC_INT, pool, 2, scratch, 2, 0x4007FFFFu); - - CGTarget* opt = opt_cgtarget_new(tc.c, &mock.base, 1); - CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0); - begin_mock_opt_func(&tc, opt, ptr_ty); - - CGLocalDesc desc = local_desc_(tc.i32, 4, 4, 0); - CGLocalStorage storage = opt->local(opt, &desc); - EXPECT(storage.kind == CG_LOCAL_STORAGE_REG, - "frame-home test needs register-backed local"); - - Operand local = op_reg_(storage.v.reg, tc.i32); - opt->load_imm(opt, local, 42); - Operand addr = op_reg_(2, ptr_ty); - opt->local_addr(opt, addr, &desc, storage); - opt->store(opt, op_indirect_(addr.v.reg, tc.i32), local, - mem_unknown_(tc.i32, 4)); - - CGABIValue retv = {0}; - retv.type = ptr_ty; - retv.storage = addr; - opt->ret(opt, &retv); - opt->func_end(opt); - - EXPECT(mock.addr_of_calls == 1, - "register-backed local addrof should replay as addr_of"); - EXPECT(mock.store_calls >= 1, - "register-backed local addrof should frame-home prior stores"); - EXPECT(mock.load_calls >= 1, - "register-backed local use after addrof should reload from home"); - - opt->destroy(opt); - tc_fini(&tc); -} - -static void opt_inline_direct_wrapper(void) { - TestCtx tc; - tc_init(&tc); - CfreeCgTypeId ps[1] = {tc.i32}; - Func* callee = new_named_func(&tc, (ObjSymId)2, tc.i32, ps, 1, 0); - PReg x = add_preg(callee, tc.i32); - add_reg_param(callee, x, tc.i32); - PReg one = add_preg(callee, tc.i32); - PReg sum = add_preg(callee, tc.i32); - emit_preg_load_imm(callee, callee->entry, one, tc.i32, 1); - emit_preg_binop(callee, callee->entry, sum, x, one, tc.i32); - emit_preg_ret(callee, callee->entry, sum, tc.i32); - - Func* caller = new_named_func(&tc, (ObjSymId)1, tc.i32, NULL, 0, 0); - PReg arg = add_preg(caller, tc.i32); - PReg ret = add_preg(caller, tc.i32); - emit_preg_load_imm(caller, caller->entry, arg, tc.i32, 41); - Operand arg_op = op_reg_(arg, tc.i32); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)2, callee->type, - &arg_op, 1, op_reg_(ret, tc.i32)); - emit_preg_ret(caller, caller->entry, ret, tc.i32); - - Func* funcs[2] = {caller, callee}; - FuncSet fs = {tc.c, tc.c->tu, funcs, 2, 2}; - opt_inline(&fs, 1); - EXPECT(count_op(caller, IR_CALL) == 0, "direct wrapper call should inline"); - EXPECT(count_op(caller, IR_BINOP) == 1, - "inlined wrapper should clone callee arithmetic"); - tc_fini(&tc); -} - -static void opt_inline_policy_controls_decisions(void) { - TestCtx tc; - tc_init(&tc); - - Func* noinline_callee = new_named_func(&tc, (ObjSymId)31, tc.i32, NULL, 0, 0); - noinline_callee->desc.inline_policy = CFREE_CG_INLINE_NEVER; - PReg nr = add_preg(noinline_callee, tc.i32); - emit_preg_load_imm(noinline_callee, noinline_callee->entry, nr, tc.i32, 7); - emit_preg_ret(noinline_callee, noinline_callee->entry, nr, tc.i32); - - Func* always_callee = new_named_func(&tc, (ObjSymId)32, tc.i32, NULL, 0, 0); - always_callee->desc.inline_policy = CFREE_CG_INLINE_ALWAYS; - PReg ar = PREG_NONE; - for (u32 i = 0; i < 30; ++i) { - ar = add_preg(always_callee, tc.i32); - emit_preg_load_imm(always_callee, always_callee->entry, ar, tc.i32, i); - } - emit_preg_ret(always_callee, always_callee->entry, ar, tc.i32); - - Func* call_never_callee = - new_named_func(&tc, (ObjSymId)33, tc.i32, NULL, 0, 0); - PReg cr = add_preg(call_never_callee, tc.i32); - emit_preg_load_imm(call_never_callee, call_never_callee->entry, cr, tc.i32, - 9); - emit_preg_ret(call_never_callee, call_never_callee->entry, cr, tc.i32); - - Func* hint_callee = new_named_func(&tc, (ObjSymId)34, tc.i32, NULL, 0, 0); - PReg hr = PREG_NONE; - for (u32 i = 0; i < 30; ++i) { - hr = add_preg(hint_callee, tc.i32); - emit_preg_load_imm(hint_callee, hint_callee->entry, hr, tc.i32, i); - } - emit_preg_ret(hint_callee, hint_callee->entry, hr, tc.i32); - - Func* caller = new_named_func(&tc, (ObjSymId)35, tc.i32, NULL, 0, 0); - PReg r1 = add_preg(caller, tc.i32); - PReg r2 = add_preg(caller, tc.i32); - PReg r3 = add_preg(caller, tc.i32); - PReg r4 = add_preg(caller, tc.i32); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)31, - noinline_callee->type, NULL, 0, op_reg_(r1, tc.i32)); - Inst* call_hint = - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)34, - hint_callee->type, NULL, 0, op_reg_(r4, tc.i32)); - ((IRCallAux*)call_hint->extra.aux)->desc.inline_policy = CFREE_CG_INLINE_HINT; - Inst* call_never = - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)33, - call_never_callee->type, NULL, 0, op_reg_(r3, tc.i32)); - ((IRCallAux*)call_never->extra.aux)->desc.inline_policy = - CFREE_CG_INLINE_NEVER; - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)32, - always_callee->type, NULL, 0, op_reg_(r2, tc.i32)); - emit_preg_ret(caller, caller->entry, r2, tc.i32); - - Func* funcs[5] = {caller, noinline_callee, always_callee, call_never_callee, - hint_callee}; - FuncSet fs = {tc.c, tc.c->tu, funcs, 5, 5}; - opt_inline(&fs, 1); - EXPECT(count_op(caller, IR_CALL) == 2, - "noinline function and noinline call site should remain calls"); - EXPECT(count_op(caller, IR_LOAD_IMM) >= 60, - "always and hint policies should inline over normal cost budget"); - tc_fini(&tc); -} - -static void opt_inline_two_return_scalar(void) { - TestCtx tc; - tc_init(&tc); - CfreeCgTypeId ps[1] = {tc.i32}; - Func* callee = new_named_func(&tc, (ObjSymId)3, tc.i32, ps, 1, 0); - PReg x = add_preg(callee, tc.i32); - add_reg_param(callee, x, tc.i32); - u32 then_b = ir_block_new(callee); - u32 else_b = ir_block_new(callee); - ir_note_emit(callee, then_b); - ir_note_emit(callee, else_b); - emit_cond_branch(callee, callee->entry, x, then_b, else_b, tc.i32); - emit_preg_ret(callee, then_b, x, tc.i32); - PReg zero = add_preg(callee, tc.i32); - emit_preg_load_imm(callee, else_b, zero, tc.i32, 0); - emit_preg_ret(callee, else_b, zero, tc.i32); - - Func* caller = new_named_func(&tc, (ObjSymId)4, tc.i32, NULL, 0, 0); - PReg arg = add_preg(caller, tc.i32); - PReg ret = add_preg(caller, tc.i32); - emit_preg_load_imm(caller, caller->entry, arg, tc.i32, 1); - Operand a = op_reg_(arg, tc.i32); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)3, callee->type, &a, 1, - op_reg_(ret, tc.i32)); - emit_preg_ret(caller, caller->entry, ret, tc.i32); - - Func* funcs[2] = {caller, callee}; - FuncSet fs = {tc.c, tc.c->tu, funcs, 2, 2}; - opt_inline(&fs, 1); - EXPECT(count_op(caller, IR_CALL) == 0, - "two-return scalar callee should inline"); - EXPECT(count_op(caller, IR_COPY) >= 2, - "each cloned return should materialize the scalar result"); - tc_fini(&tc); -} - -static void opt_inline_bottom_up_chain_single_iter(void) { - TestCtx tc; - tc_init(&tc); - CfreeCgTypeId ps[1] = {tc.i32}; - Func* g = new_named_func(&tc, (ObjSymId)5, tc.i32, ps, 1, 0); - PReg gx = add_preg(g, tc.i32); - add_reg_param(g, gx, tc.i32); - PReg one = add_preg(g, tc.i32); - PReg gout = add_preg(g, tc.i32); - emit_preg_load_imm(g, g->entry, one, tc.i32, 1); - emit_preg_binop(g, g->entry, gout, gx, one, tc.i32); - emit_preg_ret(g, g->entry, gout, tc.i32); - - Func* f = new_named_func(&tc, (ObjSymId)6, tc.i32, ps, 1, 0); - PReg fx = add_preg(f, tc.i32); - PReg fout = add_preg(f, tc.i32); - add_reg_param(f, fx, tc.i32); - Operand fa = op_reg_(fx, tc.i32); - emit_direct_call(&tc, f, f->entry, (ObjSymId)5, g->type, &fa, 1, - op_reg_(fout, tc.i32)); - emit_preg_ret(f, f->entry, fout, tc.i32); - - Func* main_f = new_named_func(&tc, (ObjSymId)7, tc.i32, NULL, 0, 0); - PReg arg = add_preg(main_f, tc.i32); - PReg ret = add_preg(main_f, tc.i32); - emit_preg_load_imm(main_f, main_f->entry, arg, tc.i32, 41); - Operand ma = op_reg_(arg, tc.i32); - emit_direct_call(&tc, main_f, main_f->entry, (ObjSymId)6, f->type, &ma, 1, - op_reg_(ret, tc.i32)); - emit_preg_ret(main_f, main_f->entry, ret, tc.i32); - - Func* funcs[3] = {main_f, f, g}; - FuncSet fs = {tc.c, tc.c->tu, funcs, 3, 3}; - opt_inline(&fs, 1); - EXPECT(count_op(f, IR_CALL) == 0, - "bottom-up order should inline leaf into wrapper first"); - EXPECT(count_op(main_f, IR_CALL) == 0, - "bottom-up order should inline cleaned wrapper into caller"); - tc_fini(&tc); -} - -static void opt_inline_refuses_recursive_and_unsupported(void) { - TestCtx tc; - tc_init(&tc); - Func* rec = new_named_func(&tc, (ObjSymId)8, tc.i32, NULL, 0, 0); - PReg rr = add_preg(rec, tc.i32); - emit_direct_call(&tc, rec, rec->entry, (ObjSymId)8, rec->type, NULL, 0, - op_reg_(rr, tc.i32)); - emit_preg_ret(rec, rec->entry, rr, tc.i32); - - Func* varg = new_named_func(&tc, (ObjSymId)9, tc.i32, NULL, 0, 1); - PReg vr = add_preg(varg, tc.i32); - emit_preg_load_imm(varg, varg->entry, vr, tc.i32, 3); - emit_preg_ret(varg, varg->entry, vr, tc.i32); - - Func* alloca_f = new_named_func(&tc, (ObjSymId)13, tc.i32, NULL, 0, 0); - ir_emit(alloca_f, alloca_f->entry, IR_ALLOCA); - PReg ar = add_preg(alloca_f, tc.i32); - emit_preg_load_imm(alloca_f, alloca_f->entry, ar, tc.i32, 4); - emit_preg_ret(alloca_f, alloca_f->entry, ar, tc.i32); - - Func* asm_f = new_named_func(&tc, (ObjSymId)14, tc.i32, NULL, 0, 0); - ir_emit(asm_f, asm_f->entry, IR_ASM_BLOCK); - PReg sr = add_preg(asm_f, tc.i32); - emit_preg_load_imm(asm_f, asm_f->entry, sr, tc.i32, 5); - emit_preg_ret(asm_f, asm_f->entry, sr, tc.i32); - - Func* setjmp_f = new_named_func(&tc, (ObjSymId)15, tc.i32, NULL, 0, 0); - Inst* sj = ir_emit(setjmp_f, setjmp_f->entry, IR_INTRINSIC); - IRIntrinAux* sj_aux = arena_znew(setjmp_f->arena, IRIntrinAux); - sj_aux->kind = INTRIN_SETJMP; - sj->extra.aux = sj_aux; - PReg jr = add_preg(setjmp_f, tc.i32); - emit_preg_load_imm(setjmp_f, setjmp_f->entry, jr, tc.i32, 6); - emit_preg_ret(setjmp_f, setjmp_f->entry, jr, tc.i32); - - Func* caller = new_named_func(&tc, (ObjSymId)10, tc.i32, NULL, 0, 0); - PReg a = add_preg(caller, tc.i32); - PReg b = add_preg(caller, tc.i32); - PReg c = add_preg(caller, tc.i32); - PReg d = add_preg(caller, tc.i32); - PReg e = add_preg(caller, tc.i32); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)8, rec->type, NULL, 0, - op_reg_(a, tc.i32)); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)9, varg->type, NULL, 0, - op_reg_(b, tc.i32)); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)13, alloca_f->type, - NULL, 0, op_reg_(c, tc.i32)); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)14, asm_f->type, NULL, - 0, op_reg_(d, tc.i32)); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)15, setjmp_f->type, - NULL, 0, op_reg_(e, tc.i32)); - emit_preg_ret(caller, caller->entry, e, tc.i32); - - Func* funcs[6] = {caller, rec, varg, alloca_f, asm_f, setjmp_f}; - FuncSet fs = {tc.c, tc.c->tu, funcs, 6, 6}; - opt_inline(&fs, 1); - EXPECT(count_op(rec, IR_CALL) == 1, "self recursion should not inline"); - EXPECT(count_op(caller, IR_CALL) == 5, - "recursive, variadic, alloca, asm, and setjmp callees remain calls"); - tc_fini(&tc); -} - -static void opt_inline_caller_growth_cap(void) { - TestCtx tc; - tc_init(&tc); - Func* callee = new_named_func(&tc, (ObjSymId)11, tc.i32, NULL, 0, 0); - PReg last = PREG_NONE; - for (u32 i = 0; i < 20; ++i) { - last = add_preg(callee, tc.i32); - emit_preg_load_imm(callee, callee->entry, last, tc.i32, (i64)i); - } - emit_preg_ret(callee, callee->entry, last, tc.i32); - - Func* caller = new_named_func(&tc, (ObjSymId)12, tc.i32, NULL, 0, 0); - PReg ret = PREG_NONE; - for (u32 i = 0; i < 8; ++i) { - ret = add_preg(caller, tc.i32); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)11, callee->type, - NULL, 0, op_reg_(ret, tc.i32)); - } - emit_preg_ret(caller, caller->entry, ret, tc.i32); - - Func* funcs[2] = {caller, callee}; - FuncSet fs = {tc.c, tc.c->tu, funcs, 2, 2}; - opt_inline(&fs, 1); - EXPECT(count_op(caller, IR_CALL) > 0, - "caller growth cap should leave some large calls uninlined"); - EXPECT(count_op(caller, IR_CALL) < 8, - "growth cap should still permit the first affordable inline"); - tc_fini(&tc); -} - -static void opt_inline_cleanup_promotes_cloned_param_frame(void) { - TestCtx tc; - tc_init(&tc); - CfreeCgTypeId ps[1] = {tc.i32}; - Func* callee = new_named_func(&tc, (ObjSymId)16, tc.i32, ps, 1, 0); - FrameSlot param = add_frame_param(callee, tc.i32); - PReg x = add_preg(callee, tc.i32); - PReg one = add_preg(callee, tc.i32); - PReg sum = add_preg(callee, tc.i32); - emit_preg_load_local(callee, callee->entry, x, param, tc.i32, 0); - emit_preg_load_imm(callee, callee->entry, one, tc.i32, 1); - emit_preg_binop(callee, callee->entry, sum, x, one, tc.i32); - emit_preg_ret(callee, callee->entry, sum, tc.i32); - - Func* caller = new_named_func(&tc, (ObjSymId)17, tc.i32, NULL, 0, 0); - PReg arg = add_preg(caller, tc.i32); - PReg ret = add_preg(caller, tc.i32); - emit_preg_load_imm(caller, caller->entry, arg, tc.i32, 41); - Operand arg_op = op_reg_(arg, tc.i32); - emit_direct_call(&tc, caller, caller->entry, (ObjSymId)16, callee->type, - &arg_op, 1, op_reg_(ret, tc.i32)); - emit_preg_ret(caller, caller->entry, ret, tc.i32); - - Func* funcs[2] = {caller, callee}; - FuncSet fs = {tc.c, tc.c->tu, funcs, 2, 2}; - opt_inline(&fs, 1); - EXPECT(count_op(caller, IR_CALL) == 0, "wrapper call should inline"); - opt_cleanup(caller); - opt_verify(caller, "test-inline-cleanup-param-frame"); - - EXPECT(count_op(caller, IR_LOAD) == 0 && count_op(caller, IR_STORE) == 0, - "cleanup should promote cloned parameter frame traffic"); - EXPECT(any_ret_load_imm(caller, 42), - "cleanup should fold inlined wrapper result to constant 42"); - tc_fini(&tc); -} - -int main(void) { - opt_machinize_uses_phys_reg_metadata(); - opt_machinize_keeps_abi_regs_without_legacy_call_fallback(); - opt_machinize_keeps_abi_regs_for_incoming_params(); - real_arch_call_plan_layouts(); - opt_regalloc_prefers_caller_saved_for_non_call_value(); - opt_call_plan_drives_call_specific_preservation(); - opt_cfg_prunes_unreachable(); - opt_cfg_preserves_scope_edges(); - opt_analysis_dominators_and_frontier(); - opt_ssa_diamond_mem2reg_phi(); - opt_ssa_loop_carried_phi(); - opt_ssa_non_promotable_slots_stay_memory(); - opt_ssa_conventional_splits_critical_edge(); - opt_verify_catches_stale_def_use(); - opt_ssa_dce_removes_dead_defs_and_phi(); - opt_copy_cleanup_rewrites_users(); - opt_copy_prop_rewrites_ssa_copy_chain(); - opt_copy_prop_collapses_redundant_extension_chain(); - opt_block_cloning_clones_small_join_blocks(); - opt_block_cloning_skips_loop_backedges(); - opt_addr_xform_folds_local_addr_into_memory_operand(); - opt_addr_xform_preserves_volatile_and_globals(); - opt_ssa_combine_fuses_cmp_condbr(); - opt_ssa_combine_folds_partial_local_addr_uses(); - opt_simplify_local_rewrites_integer_identities(); - opt_simplify_local_preserves_unsafe_cases(); - opt_simplify_rewrites_ssa_nested_identities(); - opt_simplify_canonicalizes_add_zero_address_chain(); - opt_simplify_feeds_gvn_with_canonical_shape(); - opt_gvn_rewrites_same_block_scalar_duplicate(); - opt_gvn_rewrites_dominated_scalar_duplicate(); - opt_gvn_preserves_nondominated_scalar_duplicates(); - opt_gvn_canonicalizes_commutative_scalar_operands(); - opt_gvn_folds_safe_scalar_constants(); - opt_gvn_rewrites_redundant_local_load(); - opt_gvn_reuses_store_to_local_load(); - opt_gvn_reuses_store_to_addr_of_zero_index_load(); - opt_gvn_preserves_distinct_indexed_local_loads(); - opt_gvn_reuses_joined_same_value_store(); - opt_gvn_preserves_joined_different_or_missing_store(); - opt_gvn_preserves_loop_header_load(); - opt_gvn_preserves_load_across_unknown_store(); - opt_gvn_preserves_nonescaped_local_across_call(); - opt_gvn_clobbers_escaped_local_across_call(); - opt_gvn_preserves_observable_memory_loads(); - opt_dse_removes_overwritten_local_store(); - opt_dse_removes_store_overwritten_on_all_paths(); - opt_dse_removes_unread_nonescaped_local_store(); - opt_dse_preserves_escaped_local_across_call(); - opt_dse_preserves_volatile_store(); - opt_jump_cleanup_forwards_branch_targets(); - opt_jump_cleanup_inverts_to_remove_jump_block(); - opt_jump_cleanup_keeps_conditional_fallthrough_block(); - opt_jump_cleanup_layout_deletes_fallthrough_branch(); - opt_jump_opt_forwards_switch_targets(); - opt_jump_opt_forwards_empty_fallthrough_chain(); - opt_jump_opt_repeatedly_forwards_branch_chain(); - opt_jump_opt_collapses_same_target_cond_branch(); - opt_loop_tree_excludes_side_exit(); - opt_loop_tree_nested_depths(); - opt_loop_tree_does_not_mutate_cfg(); - opt_licm_hoists_safe_invariant_to_preheader(); - opt_licm_preserves_trapping_and_memory_ops(); - opt_pressure_relief_sinks_single_use_load_imm(); - opt_pressure_relief_does_not_sink_into_loop(); - opt_pressure_relief_preserves_multi_use_constant(); - opt_pressure_relief_does_not_cross_memory_ops(); - opt_pressure_relief_sinks_many_immediates_in_one_pass(); - opt_liveness_branch(); - opt_block_liveness_phase1(); - opt_liveness_grows_high_preg_bitsets(); - opt_live_ranges_phase2(); - opt_range_liveness_linear(); - opt_interference_branch_disjoint(); - opt_range_overlap_def_live_out(); - opt_loop_frequency_weights_ranges(); - opt_live_across_call_frequency(); - opt_range_overlap_class(); - opt_regalloc_priority(); - opt_o2_coalesces_nonconflicting_copy(); - opt_o1_skips_coalesce(); - opt_o2_refuses_overlapping_copy_coalesce(); - opt_o2_refuses_incompatible_copy_coalesce(); - opt_o2_spills_singleton_when_whole_alloc_fails(); - opt_o2_does_not_split_critical_edge(); - opt_o1_does_not_split_spill_edges(); - opt_range_regalloc_no_conflicts_and_stack_reuse(); - opt_stack_spill_assignment_avoids_quadratic_probe(); - opt_rewrite_spill_use_def(); - opt_call_clobber_preservation(); - opt_call_clobber_caller_saved(); - opt_spill_pressure(); - opt_inline_asm_tied_fixed_regs(); - opt_inline_asm_constraints_and_clobbers(); - opt_post_rewrite_dce(); - opt_dce_call_clobbers_hard_regs(); - opt_regalloc_spill_requires_scratch(); - opt_combine_spill_peeps(); - opt_combine_single_use_copy_and_imm(); - opt_combine_sinks_or_preserves_producer_copy_after_rewrite(); - opt_combine_keeps_unsafe_and_multiuse_defs(); - opt_combine_copy_chains_and_convert_pairs(); - opt_combine_substitutes_into_indirect_base_and_index(); - opt_combine_copy_chain_source_reuse_blocks_indirect_subst(); - opt_combine_synthesizes_address_modes(); - opt_combine_full_ext_of_ext_rules(); - opt_dce_physical_dead_defs(); - opt_dead_def_keeps_observable_loads(); - opt_dead_def_elim_test(); - opt_planned_call_replay_resolves_arg_cycle(); - opt_planned_call_replay_preserves_indirect_callee_arg_reg(); - opt_planned_call_replay_stores_stack_args(); - opt_planned_call_replay_materializes_address_args(); - opt_planned_call_replay_resolves_return_reg_collision(); - opt_planned_call_replay_stores_stack_sources_before_clobber(); - opt_planned_tail_call_uses_replay_without_return_moves(); - opt_emit_preserves_physical_reg_zero(); - opt_emit_no_virtual_alloc(); - opt_records_const_bytes_by_value(); - opt_cmp_branch_keeps_fallthrough_after_block_growth(); - opt_known_frame_marks_empty_leaf_omittable(); - opt_known_frame_keeps_frame_for_slot_call_and_alloca(); - opt_local_hook_chooses_register_for_scalar(); - opt_param_hook_chooses_register_for_scalar(); - opt_param_memory_required_uses_frame(); - opt_local_addr_taken_uses_frame_and_replays_addr_of(); - opt_register_local_addr_frame_homes(); - opt_inline_direct_wrapper(); - opt_inline_policy_controls_decisions(); - opt_inline_two_return_scalar(); - opt_inline_bottom_up_chain_single_iter(); - opt_inline_refuses_recursive_and_unsupported(); - opt_inline_caller_growth_cap(); - opt_inline_cleanup_promotes_cloned_param_frame(); - if (g_fails) { - fprintf(stderr, "opt tests: %d failed (%d checks)\n", g_fails, g_checks); - return 1; - } - printf("opt tests: PASS (%d checks)\n", g_checks); - return 0; -} diff --git a/test/opt/ret_aggregate_base_clobber.c b/test/opt/ret_aggregate_base_clobber.c @@ -1,39 +0,0 @@ -/* Regression: returning a small aggregate loaded through a pointer chain must - * not clobber the base address with the first field load before later fields - * are read. The aa64 ret path loaded part i into xN=i from [base+off]; when the - * struct address sat in x0 (a return reg), `ldur x0,[x0]; ldur w1,[x0,#8]` - * read the second field off the just-loaded data. Pre-fix this segfaulted at - * -O1/-O2; -O0 was fine. Driven via `cfree run` (entry returns 343 & 0xff). */ -typedef struct { - unsigned size; - unsigned align; - unsigned char a, b, c, d; -} Info; /* 12 bytes -> returned in x0 + w1 */ - -struct VT { - long x; - Info info; -}; -struct A { - long y; - struct VT* vt; -}; - -__attribute__((noinline)) Info ret_info(struct A* a) { return a->vt->info; } - -int entry(void) { - struct VT vt; - vt.x = 0; - vt.info.size = 111; - vt.info.align = 222; - vt.info.a = 1; - vt.info.b = 2; - vt.info.c = 3; - vt.info.d = 4; - struct A a; - a.y = 0; - a.vt = &vt; - Info r = ret_info(&a); - return (int)((r.size + r.align + r.a + r.b + r.c + r.d) & - 0xff); /* 343&255=87 */ -} diff --git a/test/opt/run.sh b/test/opt/run.sh @@ -1,55 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -ROOT="$(cd "$(dirname "$0")/../.." && pwd)" -"$ROOT/build/test/opt_test" - -mkdir -p "$ROOT/build/test/opt" -"$ROOT/build/cfree" cc -target aarch64-linux-gnu \ - -O2 -c "$ROOT/test/opt/o2_many_values.c" \ - -o "$ROOT/build/test/opt/o2_many_values.o" - -INLINE_WORK="$ROOT/build/test/opt/inline_cleanup_quality" -mkdir -p "$INLINE_WORK" -"$ROOT/build/cfree" cc -target aarch64-linux-gnu -O2 -c \ - "$ROOT/test/toy/cases/135_inline_cleanup_quality.toy" \ - -o "$INLINE_WORK/inline_cleanup_quality.o" \ - > "$INLINE_WORK/cc.out" 2> "$INLINE_WORK/cc.err" -"$ROOT/build/cfree" objdump -d "$INLINE_WORK/inline_cleanup_quality.o" \ - > "$INLINE_WORK/objdump.out" 2> "$INLINE_WORK/objdump.err" -awk ' - /^000000000000[0-9a-f]+ <main>:/ { in_main = 1; print; next } - /^000000000000[0-9a-f]+ </ { in_main = 0 } - in_main { print } -' "$INLINE_WORK/objdump.out" > "$INLINE_WORK/main.dis" -if grep -Eq '\bbl\b|stur|ldur|stp|ldp|sub sp|add sp' \ - "$INLINE_WORK/main.dis"; then - printf 'inline cleanup quality check failed; unexpected call/frame traffic:\n' >&2 - sed 's/^/ | /' "$INLINE_WORK/main.dis" >&2 - exit 1 -fi -if ! grep -Eq 'movz[[:space:]]+w0, 0x2a|mov[[:space:]]+w0, #42|li[[:space:]]+a0, 42' \ - "$INLINE_WORK/main.dis"; then - printf 'inline cleanup quality check failed; main is not a constant 42 return:\n' >&2 - sed 's/^/ | /' "$INLINE_WORK/main.dis" >&2 - exit 1 -fi - -# Regression: small-aggregate return must not clobber its base address with the -# first field load (aa64 ret part-load base aliasing). Pre-fix this segfaulted -# at -O1/-O2. Driven through the in-process JIT so it runs on the host arch; -# entry() returns 343 & 0xff == 87. -RET_CLOBBER_SRC="$ROOT/test/opt/ret_aggregate_base_clobber.c" -for opt in -O0 -O1 -O2; do - if "$ROOT/build/cfree" run "$opt" -e entry "$RET_CLOBBER_SRC" \ - > /dev/null 2>&1; then - rc=0 - else - rc=$? - fi - if [ "$rc" -ne 87 ]; then - printf 'ret aggregate base-clobber check failed at %s: exit %d (want 87)\n' \ - "$opt" "$rc" >&2 - exit 1 - fi -done