commit f60a16d14658662018ec245649772ee7990d67ba
parent a691bcbf26887ba8ddcb59d1ebbf17408d2a3fca
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 26 May 2026 17:53:43 -0700
opt: rewrite pipeline to consume CgIrFunc input; replace pass_emit with NativeTarget
Fundamental rearchitecture of the optimizer pipeline:
- cg_ir_lower.c (new): converts a completed CgIrFunc semantic recording into
the optimizer's pseudo-register/frame-slot Func representation. This is the
new pipeline intake point, replacing the old CGTarget-intercepting recorder.
- pass_addr_fold.c (new): address-folding optimizations extracted from pass_o2
so they run at all opt levels (>= O1), not just O2.
- pass_native_emit.c (new): replaces the deleted pass_emit.c (1384 lines).
Drives a NativeTarget instead of replaying through a CGTarget, handling
local-static-data sequences, frame-slot mapping, and label placement.
- pass_emit.c (deleted): old CGTarget replay loop, superseded above.
- opt.c: rewritten from ~1700 lines to ~680. Sheds CGTarget-wrapping recorder
logic; entry point now accepts CgIrFunc and runs:
cg_ir_lower → addr_fold → [O2] → lower → machinize → native_emit.
- pass_o2.c: addr-folding logic removed (now in pass_addr_fold).
- pass_machinize.c, pass_lower.c: updated for NativeTarget interface.
- pass_analysis.c, pass_cfg.c, pass_dce.c, ir_print.c: handle new
local-static-data IR ops.
Includes cg_ir_lower_test.c: unit test for the CgIrFunc → Func conversion.
Diffstat:
13 files changed, 3951 insertions(+), 3843 deletions(-)
diff --git a/src/opt/cg_ir_lower.c b/src/opt/cg_ir_lower.c
@@ -0,0 +1,1082 @@
+#include <string.h>
+
+#include "cg/ir.h"
+#include "cg/type.h"
+#include "opt/opt_internal.h"
+
+#undef Operand
+#undef CGParamDesc
+#undef CGCallDesc
+#undef CGFuncDesc
+#undef CGLocalStorage
+#undef FrameSlotDesc
+
+typedef struct OptLocalMap {
+ OptCGLocalStorage storage;
+ NativeFrameSlot home_slot;
+ CfreeCgTypeId type;
+ u32 size;
+ u32 align;
+ u8 cls;
+ u8 address_taken;
+ u8 pad[2];
+} OptLocalMap;
+
+typedef struct CgIrLower {
+ Compiler* c;
+ const CgIrFunc* src;
+ Func* f;
+ OptLocalMap* locals;
+ u32 nlocals;
+ u32* label_block;
+ u32 nlabels;
+ u32* inst_block;
+ u8* leader;
+} CgIrLower;
+
+static _Noreturn void lower_panic(CgIrLower* l, SrcLoc loc, const char* msg) {
+ compiler_panic(l->c, loc, "opt cg-ir lower: %s", msg);
+}
+
+static u8 local_reg_class(Compiler* c, CfreeCgTypeId ty) {
+ return cg_type_is_float(c, ty) ? RC_FP : RC_INT;
+}
+
+static OptCGFuncDesc lower_func_desc(Arena* a, const struct CGFuncDesc* in) {
+ OptCGFuncDesc out;
+ memset(&out, 0, sizeof out);
+ if (!in) return out;
+ out.sym = in->sym;
+ out.text_section_id = in->text_section_id;
+ out.group_id = in->group_id;
+ out.fn_type = in->fn_type;
+ out.result_types = in->result_types;
+ out.nresults = in->nresults;
+ out.nparams = in->nparams;
+ out.loc = in->loc;
+ out.flags = in->flags;
+ out.inline_policy = in->inline_policy;
+ out.atomize = in->atomize;
+ if (in->nparams && in->params) {
+ OptCGParamDesc* params = arena_zarray(a, OptCGParamDesc, in->nparams);
+ for (u32 i = 0; i < in->nparams; ++i) {
+ params[i].index = in->params[i].index;
+ params[i].name = in->params[i].name;
+ params[i].type = in->params[i].type;
+ params[i].size = in->params[i].size;
+ params[i].align = in->params[i].align;
+ params[i].flags = in->params[i].flags;
+ params[i].loc = in->params[i].loc;
+ }
+ out.params = params;
+ }
+ return out;
+}
+
+static NativeFrameSlotDesc local_slot_desc(const CgIrLocal* in, u8 kind) {
+ NativeFrameSlotDesc out;
+ memset(&out, 0, sizeof out);
+ out.type = in->desc.type;
+ out.name = in->desc.name;
+ out.loc = in->desc.loc;
+ out.size = in->desc.size;
+ out.align = in->desc.align;
+ out.kind = kind;
+ if (in->address_taken || (in->desc.flags & CG_LOCAL_ADDR_TAKEN))
+ out.flags |= FSF_ADDR_TAKEN;
+ if (in->desc.flags & CG_LOCAL_MEMORY_REQUIRED)
+ out.flags |= FSF_MEMORY_REQUIRED;
+ return out;
+}
+
+static OptLocalMap* local_map(CgIrLower* l, CGLocal id, SrcLoc loc) {
+ if (id == CG_LOCAL_NONE || id > l->nlocals)
+ lower_panic(l, loc, "bad semantic local");
+ return &l->locals[id - 1u];
+}
+
+static int local_needs_home(const CgIrLocal* in) {
+ return in->address_taken ||
+ (in->desc.flags & (CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED));
+}
+
+static int operand_uses_local_addr(const Operand* op, CGLocal local) {
+ if (!op) return 0;
+ if (op->kind == OPK_LOCAL) return op->v.local == local;
+ return 0;
+}
+
+static int local_address_used_in_cg_ir(const CgIrFunc* f, CGLocal local) {
+ for (u32 i = 0; i < f->ninsts; ++i) {
+ const CgIrInst* in = &f->insts[i];
+ switch ((CgIrOp)in->op) {
+ case CG_IR_LOAD:
+ case CG_IR_BITFIELD_LOAD:
+ if (in->nopnds > 1u && operand_uses_local_addr(&in->opnds[1], local))
+ return 1;
+ break;
+ case CG_IR_STORE:
+ case CG_IR_AGG_SET:
+ case CG_IR_BITFIELD_STORE:
+ if (in->nopnds > 0u && operand_uses_local_addr(&in->opnds[0], local))
+ return 1;
+ break;
+ case CG_IR_ADDR_OF:
+ if (in->nopnds > 1u && operand_uses_local_addr(&in->opnds[1], local))
+ return 1;
+ break;
+ case CG_IR_AGG_COPY:
+ case CG_IR_VA_COPY:
+ if ((in->nopnds > 0u &&
+ operand_uses_local_addr(&in->opnds[0], local)) ||
+ (in->nopnds > 1u && operand_uses_local_addr(&in->opnds[1], local)))
+ return 1;
+ break;
+ case CG_IR_VA_START:
+ case CG_IR_VA_END:
+ if (in->nopnds > 0u && operand_uses_local_addr(&in->opnds[0], local))
+ return 1;
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+static void lower_locals(CgIrLower* l) {
+ l->nlocals = l->src->nlocals;
+ l->locals =
+ arena_zarray(l->f->arena, OptLocalMap, l->nlocals ? l->nlocals : 1u);
+ for (u32 i = 0; i < l->src->nlocals; ++i) {
+ const CgIrLocal* in = &l->src->locals[i];
+ OptLocalMap* m;
+ if (in->id == CG_LOCAL_NONE || in->id > l->src->nlocals)
+ lower_panic(l, in->desc.loc, "non-dense semantic local table");
+ m = &l->locals[in->id - 1u];
+ m->type = in->desc.type;
+ m->size = in->desc.size;
+ m->align = in->desc.align;
+ m->cls = local_reg_class(l->c, in->desc.type);
+ m->address_taken =
+ local_needs_home(in) || local_address_used_in_cg_ir(l->src, in->id);
+
+ PReg r = ir_alloc_preg(l->f, in->desc.type, m->cls);
+ if (m->address_taken) {
+ m->storage.kind = CG_LOCAL_STORAGE_FRAME;
+ } else {
+ m->storage.kind = CG_LOCAL_STORAGE_REG;
+ m->storage.v.reg = (Reg)r;
+ }
+
+ if (m->address_taken) {
+ NativeFrameSlotDesc fsd =
+ local_slot_desc(in, in->is_param ? FS_PARAM : FS_LOCAL);
+ m->home_slot = ir_frame_slot_new(l->f, &fsd);
+ m->storage.v.frame_slot = m->home_slot;
+ } else {
+ m->home_slot = FRAME_SLOT_NONE;
+ }
+ (void)ir_local_add(l->f, &in->desc, m->storage);
+ l->f->locals[l->f->nlocals - 1u].address_taken = m->address_taken;
+ l->f->locals[l->f->nlocals - 1u].home_slot = m->home_slot;
+ }
+}
+
+static const CgIrParam* find_param(const CgIrFunc* f, CGLocal local) {
+ for (u32 i = 0; i < f->nparams; ++i)
+ if (f->params[i].local == local) return &f->params[i];
+ return NULL;
+}
+
+static void lower_params(CgIrLower* l) {
+ for (u32 i = 0; i < l->src->nlocals; ++i) {
+ const CgIrLocal* loc = &l->src->locals[i];
+ if (!loc->is_param) continue;
+ const CgIrParam* p = find_param(l->src, loc->id);
+ OptLocalMap* m = local_map(l, loc->id, loc->desc.loc);
+ OptCGParamDesc d;
+ memset(&d, 0, sizeof d);
+ if (p) {
+ d.index = p->desc.index;
+ d.name = p->desc.name;
+ d.type = p->desc.type;
+ d.size = p->desc.size;
+ d.align = p->desc.align;
+ d.flags = p->desc.flags;
+ d.loc = p->desc.loc;
+ } else {
+ d.index = loc->param_index;
+ d.name = loc->desc.name;
+ d.type = loc->desc.type;
+ d.size = loc->desc.size;
+ d.align = loc->desc.align;
+ d.flags = loc->desc.flags;
+ d.loc = loc->desc.loc;
+ }
+ d.storage = m->storage;
+ ir_param_add(l->f, &d);
+ }
+}
+
+static int cg_inst_terminates(const CgIrInst* in) {
+ if (!in) return 0;
+ switch ((CgIrOp)in->op) {
+ case CG_IR_BR:
+ case CG_IR_RET:
+ case CG_IR_CMP_BRANCH:
+ case CG_IR_SWITCH:
+ case CG_IR_INDIRECT_BRANCH:
+ case CG_IR_BREAK_TO:
+ case CG_IR_CONTINUE_TO:
+ return 1;
+ case CG_IR_INTRINSIC: {
+ const CgIrIntrinsicAux* aux = (const CgIrIntrinsicAux*)in->extra.aux;
+ return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP ||
+ aux->kind == INTRIN_UNREACHABLE);
+ }
+ default:
+ return 0;
+ }
+}
+
+static u32 label_id_max(const CgIrFunc* f) {
+ u32 max = 0;
+ for (u32 i = 0; i < f->nlabels; ++i)
+ if (f->labels[i].id > max) max = f->labels[i].id;
+ return max;
+}
+
+static void mark_label_leader(CgIrLower* l, Label label, const u32* place) {
+ if (label == LABEL_NONE || label > l->nlabels || place[label] == UINT32_MAX)
+ return;
+ l->leader[place[label]] = 1;
+}
+
+static void mark_leaders(CgIrLower* l, u32* label_place) {
+ const CgIrFunc* f = l->src;
+ for (u32 i = 0; i <= f->ninsts; ++i) l->leader[i] = 0;
+ if (f->ninsts) l->leader[0] = 1;
+ for (u32 i = 0; i < f->ninsts; ++i) {
+ const CgIrInst* in = &f->insts[i];
+ if ((CgIrOp)in->op == CG_IR_LABEL) {
+ Label label = (Label)in->extra.imm;
+ l->leader[i] = 1;
+ if (label && label <= l->nlabels && label_place[label] == UINT32_MAX)
+ label_place[label] = i;
+ }
+ }
+ for (u32 i = 0; i < f->ninsts; ++i) {
+ const CgIrInst* in = &f->insts[i];
+ if (cg_inst_terminates(in) && i + 1u < f->ninsts) l->leader[i + 1u] = 1;
+ switch ((CgIrOp)in->op) {
+ case CG_IR_BR:
+ case CG_IR_LOAD_LABEL_ADDR:
+ mark_label_leader(l, (Label)in->extra.imm, label_place);
+ break;
+ case CG_IR_CMP_BRANCH: {
+ CgIrCmpBranchAux* aux = (CgIrCmpBranchAux*)in->extra.aux;
+ if (i + 1u < f->ninsts) l->leader[i + 1u] = 1;
+ if (aux) mark_label_leader(l, aux->target, label_place);
+ break;
+ }
+ case CG_IR_SWITCH: {
+ CgIrSwitchAux* aux = (CgIrSwitchAux*)in->extra.aux;
+ if (i + 1u < f->ninsts) l->leader[i + 1u] = 1;
+ if (aux) {
+ mark_label_leader(l, aux->default_label, label_place);
+ for (u32 c = 0; c < aux->ncases; ++c)
+ mark_label_leader(l, aux->cases[c].label, label_place);
+ }
+ break;
+ }
+ case CG_IR_INDIRECT_BRANCH: {
+ CgIrIndirectAux* aux = (CgIrIndirectAux*)in->extra.aux;
+ if (aux) {
+ for (u32 t = 0; t < aux->ntargets; ++t)
+ mark_label_leader(l, aux->targets[t], label_place);
+ }
+ break;
+ }
+ case CG_IR_SCOPE_BEGIN:
+ if (i + 1u < f->ninsts) l->leader[i + 1u] = 1;
+ break;
+ case CG_IR_SCOPE_ELSE:
+ case CG_IR_SCOPE_END:
+ l->leader[i] = 1;
+ if (i + 1u < f->ninsts) l->leader[i + 1u] = 1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void make_blocks(CgIrLower* l, const u32* label_place) {
+ const CgIrFunc* f = l->src;
+ u32 cur = UINT32_MAX;
+ l->inst_block = arena_zarray(l->f->arena, u32, f->ninsts ? f->ninsts : 1u);
+ for (u32 i = 0; i < f->ninsts; ++i) {
+ if (l->leader[i] || cur == UINT32_MAX) {
+ cur = ir_block_new(l->f);
+ ir_note_emit(l->f, cur);
+ if (l->f->nblocks == 1u) l->f->entry = cur;
+ }
+ l->inst_block[i] = cur;
+ }
+ l->label_block =
+ arena_zarray(l->f->arena, u32, l->nlabels ? l->nlabels + 1u : 1u);
+ for (u32 i = 0; i <= l->nlabels; ++i) l->label_block[i] = UINT32_MAX;
+ for (u32 label = 1; label <= l->nlabels; ++label) {
+ if (label_place[label] != UINT32_MAX) {
+ u32 place = label_place[label];
+ l->label_block[label] = (place + 1u < f->ninsts)
+ ? l->inst_block[place + 1u]
+ : l->inst_block[place];
+ } else {
+ l->label_block[label] = ir_block_new(l->f);
+ }
+ }
+ if (!l->f->nblocks) {
+ l->f->entry = ir_block_new(l->f);
+ ir_note_emit(l->f, l->f->entry);
+ }
+ l->f->emit_order_n = 0;
+ for (u32 i = 0; i < f->ninsts; ++i) ir_note_emit(l->f, l->inst_block[i]);
+ if (!f->ninsts) ir_note_emit(l->f, l->f->entry);
+}
+
+static void emit_param_decls(CgIrLower* l) {
+ if (!l->f->nparams || l->f->entry >= l->f->nblocks) return;
+ for (u32 i = 0; i < l->f->nparams; ++i) {
+ IRParam* p = &l->f->params[i];
+ Inst* in = ir_emit(l->f, l->f->entry, IR_PARAM_DECL);
+ IRParamDeclAux* aux = arena_znew(l->f->arena, IRParamDeclAux);
+ in->loc = p->loc;
+ in->type = p->type;
+ if (p->storage.kind == CG_LOCAL_STORAGE_REG) in->def = p->storage.v.reg;
+ memset(aux, 0, sizeof *aux);
+ aux->desc.index = p->index;
+ aux->desc.name = p->name;
+ aux->desc.type = p->type;
+ aux->desc.size = p->size;
+ aux->desc.align = p->align;
+ aux->desc.flags = p->flags;
+ aux->desc.loc = p->loc;
+ aux->desc.storage = p->storage;
+ aux->desc.abi = p->abi;
+ in->extra.aux = aux;
+ }
+}
+
+static u32 block_for_label(CgIrLower* l, Label label, SrcLoc loc) {
+ if (label == LABEL_NONE || label > l->nlabels ||
+ l->label_block[label] == UINT32_MAX)
+ lower_panic(l, loc, "bad label");
+ return l->label_block[label];
+}
+
+static u32 fallthrough_block(CgIrLower* l, u32 inst_index) {
+ if (inst_index + 1u >= l->src->ninsts) return UINT32_MAX;
+ return l->inst_block[inst_index + 1u];
+}
+
+static void set_succ1(CgIrLower* l, u32 block, u32 succ) {
+ if (succ == UINT32_MAX) {
+ l->f->blocks[block].nsucc = 0;
+ return;
+ }
+ l->f->blocks[block].succ[0] = succ;
+ l->f->blocks[block].nsucc = 1;
+}
+
+static OptOperand* dup_opt_ops(CgIrLower* l, const OptOperand* ops, u32 n) {
+ if (!n) return NULL;
+ OptOperand* out = arena_array(l->f->arena, OptOperand, n);
+ memcpy(out, ops, sizeof(*out) * n);
+ return out;
+}
+
+static OptOperand opt_reg_operand(OptLocalMap* m) {
+ OptOperand out;
+ memset(&out, 0, sizeof out);
+ out.kind = OPK_REG;
+ out.cls = m->cls;
+ out.type = m->type;
+ out.v.reg = m->storage.v.reg;
+ return out;
+}
+
+static OptOperand opt_frame_operand(OptLocalMap* m) {
+ OptOperand out;
+ memset(&out, 0, sizeof out);
+ out.kind = OPK_LOCAL;
+ out.cls = RC_INT;
+ out.type = m->type;
+ out.v.frame_slot = m->home_slot;
+ return out;
+}
+
+static OptOperand lower_operand_value(CgIrLower* l, const Operand* in,
+ SrcLoc loc);
+
+static OptOperand lower_operand_addr(CgIrLower* l, const Operand* in,
+ SrcLoc loc) {
+ OptOperand out;
+ memset(&out, 0, sizeof out);
+ if (!in) return out;
+ out.type = in->type;
+ switch ((OpKind)in->kind) {
+ case OPK_LOCAL: {
+ OptLocalMap* m = local_map(l, in->v.local, loc);
+ if (m->home_slot == FRAME_SLOT_NONE) {
+ const CgIrLocal* src = &l->src->locals[in->v.local - 1u];
+ NativeFrameSlotDesc fsd =
+ local_slot_desc(src, src->is_param ? FS_PARAM : FS_LOCAL);
+ m->home_slot = ir_frame_slot_new(l->f, &fsd);
+ m->address_taken = 1;
+ if (in->v.local - 1u < l->f->nlocals) {
+ l->f->locals[in->v.local - 1u].address_taken = 1;
+ l->f->locals[in->v.local - 1u].home_slot = m->home_slot;
+ }
+ }
+ return opt_frame_operand(m);
+ }
+ case OPK_GLOBAL:
+ out.kind = OPK_GLOBAL;
+ out.cls = RC_INT;
+ out.v.global.sym = in->v.global.sym;
+ out.v.global.addend = in->v.global.addend;
+ return out;
+ case OPK_INDIRECT: {
+ OptLocalMap* base = local_map(l, in->v.ind.base, loc);
+ out.kind = OPK_INDIRECT;
+ out.cls = RC_INT;
+ out.v.ind.base = base->storage.v.reg;
+ out.v.ind.index = REG_NONE;
+ if (in->v.ind.index != CG_LOCAL_NONE) {
+ OptLocalMap* idx = local_map(l, in->v.ind.index, loc);
+ out.v.ind.index = idx->storage.v.reg;
+ }
+ out.v.ind.log2_scale = in->v.ind.log2_scale;
+ out.v.ind.ofs = in->v.ind.ofs;
+ return out;
+ }
+ case OPK_IMM:
+ default:
+ lower_panic(l, loc, "operand is not addressable");
+ }
+}
+
+static OptOperand lower_operand_value(CgIrLower* l, const Operand* in,
+ SrcLoc loc) {
+ OptOperand out;
+ memset(&out, 0, sizeof out);
+ if (!in) return out;
+ out.type = in->type;
+ switch ((OpKind)in->kind) {
+ case OPK_IMM:
+ out.kind = OPK_IMM;
+ out.cls = RC_INT;
+ out.v.imm = in->v.imm;
+ return out;
+ case OPK_LOCAL: {
+ OptLocalMap* m = local_map(l, in->v.local, loc);
+ return m->address_taken ? opt_frame_operand(m) : opt_reg_operand(m);
+ }
+ case OPK_GLOBAL:
+ out.kind = OPK_GLOBAL;
+ out.cls = RC_INT;
+ out.v.global.sym = in->v.global.sym;
+ out.v.global.addend = in->v.global.addend;
+ return out;
+ case OPK_INDIRECT:
+ return lower_operand_addr(l, in, loc);
+ default:
+ lower_panic(l, loc, "bad operand kind");
+ }
+}
+
+static void set_inst_def(Inst* out, const OptOperand* op) {
+ if (op && op->kind == OPK_REG) {
+ out->def = (Val)op->v.reg;
+ out->type = op->type;
+ }
+}
+
+/* Lower `n` value operands. When `defs_first` is set, opnds[0] is the
+ * instruction's destination (def); otherwise all operands are uses. Branch
+ * terminators (CMP_BRANCH, SWITCH, INDIRECT_BRANCH) read their first operand
+ * and define nothing, so they must pass defs_first=0 -- otherwise dead-def
+ * elimination treats the branch as a redefinition of the tested value and
+ * removes the real producer. */
+static void lower_value_ops_ex(CgIrLower* l, Inst* out, const CgIrInst* in,
+ u32 n, int defs_first) {
+ OptOperand tmp[5];
+ if (n > 5u) lower_panic(l, in->loc, "too many operands");
+ for (u32 i = 0; i < n; ++i)
+ tmp[i] = lower_operand_value(l, &in->opnds[i], in->loc);
+ out->opnds = dup_opt_ops(l, tmp, n);
+ out->nopnds = n;
+ if (n && defs_first) set_inst_def(out, &out->opnds[0]);
+}
+
+static void lower_value_ops(CgIrLower* l, Inst* out, const CgIrInst* in,
+ u32 n) {
+ lower_value_ops_ex(l, out, in, n, 1);
+}
+
+static void lower_use_ops(CgIrLower* l, Inst* out, const CgIrInst* in, u32 n) {
+ lower_value_ops_ex(l, out, in, n, 0);
+}
+
+static void lower_addr_value_ops(CgIrLower* l, Inst* out, const CgIrInst* in,
+ u32 naddr, u32 nvalue) {
+ OptOperand tmp[5];
+ u32 n = naddr + nvalue;
+ if (n > 5u) lower_panic(l, in->loc, "too many operands");
+ for (u32 i = 0; i < naddr; ++i)
+ tmp[i] = lower_operand_addr(l, &in->opnds[i], in->loc);
+ for (u32 i = 0; i < nvalue; ++i)
+ tmp[naddr + i] = lower_operand_value(l, &in->opnds[naddr + i], in->loc);
+ out->opnds = dup_opt_ops(l, tmp, n);
+ out->nopnds = n;
+}
+
+static OptCGABIValue abi_value_for_local(CgIrLower* l, CGLocal local,
+ SrcLoc loc) {
+ OptCGABIValue out;
+ memset(&out, 0, sizeof out);
+ OptLocalMap* m = local_map(l, local, loc);
+ out.type = m->type;
+ out.storage = m->address_taken ? opt_frame_operand(m) : opt_reg_operand(m);
+ return out;
+}
+
+static void lower_call(CgIrLower* l, Inst* out, const CgIrInst* in) {
+ const CgIrCallAux* src = (const CgIrCallAux*)in->extra.aux;
+ IRCallAux* aux = arena_znew(l->f->arena, IRCallAux);
+ memset(aux, 0, sizeof *aux);
+ if (!src) {
+ out->extra.aux = aux;
+ return;
+ }
+ aux->desc.fn_type = src->desc.fn_type;
+ aux->desc.callee = lower_operand_value(l, &src->desc.callee, in->loc);
+ aux->desc.nargs = src->desc.nargs;
+ aux->desc.flags = src->desc.flags;
+ aux->desc.tail_policy = src->desc.tail_policy;
+ aux->desc.inline_policy = src->desc.inline_policy;
+ if (src->desc.nargs) {
+ aux->desc.args = arena_zarray(l->f->arena, OptCGABIValue, src->desc.nargs);
+ for (u32 i = 0; i < src->desc.nargs; ++i)
+ aux->desc.args[i] = abi_value_for_local(l, src->desc.args[i], in->loc);
+ }
+ if (src->desc.nresults) {
+ aux->desc.ret = abi_value_for_local(l, src->desc.results[0], in->loc);
+ set_inst_def(out, &aux->desc.ret.storage);
+ }
+ out->type = src->desc.fn_type;
+ out->extra.aux = aux;
+}
+
+static void lower_ret(CgIrLower* l, Inst* out, const CgIrInst* in) {
+ const CgIrRetAux* src = (const CgIrRetAux*)in->extra.aux;
+ IRRetAux* aux = arena_znew(l->f->arena, IRRetAux);
+ if (src && src->nvalues) {
+ aux->present = 1;
+ aux->val = abi_value_for_local(l, src->values[0], in->loc);
+ }
+ out->extra.aux = aux;
+}
+
+static void lower_intrinsic(CgIrLower* l, Inst* out, const CgIrInst* in) {
+ const CgIrIntrinsicAux* src = (const CgIrIntrinsicAux*)in->extra.aux;
+ IRIntrinAux* aux = arena_znew(l->f->arena, IRIntrinAux);
+ if (src) {
+ aux->kind = src->kind;
+ aux->ndst = src->ndst;
+ aux->narg = src->narg;
+ aux->dsts =
+ src->ndst ? arena_array(l->f->arena, OptOperand, src->ndst) : NULL;
+ aux->args =
+ src->narg ? arena_array(l->f->arena, OptOperand, src->narg) : NULL;
+ for (u32 i = 0; i < src->ndst; ++i)
+ aux->dsts[i] = lower_operand_value(l, &src->dsts[i], in->loc);
+ for (u32 i = 0; i < src->narg; ++i)
+ aux->args[i] = lower_operand_value(l, &src->args[i], in->loc);
+ if (src->ndst) {
+ out->ndefs = src->ndst;
+ out->defs = arena_array(l->f->arena, Val, src->ndst);
+ for (u32 i = 0; i < src->ndst; ++i)
+ out->defs[i] = aux->dsts[i].kind == OPK_REG ? aux->dsts[i].v.reg : 0;
+ out->def = out->defs[0];
+ out->type = aux->dsts[0].type;
+ }
+ }
+ out->extra.aux = aux;
+}
+
+static void lower_asm(CgIrLower* l, Inst* out, const CgIrInst* in) {
+ const CgIrAsmAux* src = (const CgIrAsmAux*)in->extra.aux;
+ IRAsmAux* aux = arena_znew(l->f->arena, IRAsmAux);
+ if (src) {
+ aux->tmpl = src->tmpl;
+ aux->outs = src->outs;
+ aux->ins = src->ins;
+ aux->clobbers = src->clobbers;
+ aux->nout = src->nout;
+ aux->nin = src->nin;
+ aux->nclob = src->nclob;
+ aux->out_ops =
+ src->nout ? arena_array(l->f->arena, OptOperand, src->nout) : NULL;
+ aux->in_ops =
+ src->nin ? arena_array(l->f->arena, OptOperand, src->nin) : NULL;
+ for (u32 i = 0; i < src->nout; ++i)
+ aux->out_ops[i] = lower_operand_value(l, &src->out_ops[i], in->loc);
+ for (u32 i = 0; i < src->nin; ++i)
+ aux->in_ops[i] = lower_operand_value(l, &src->in_ops[i], in->loc);
+ if (src->nout) {
+ out->ndefs = src->nout;
+ out->defs = arena_array(l->f->arena, Val, src->nout);
+ for (u32 i = 0; i < src->nout; ++i)
+ out->defs[i] =
+ aux->out_ops[i].kind == OPK_REG ? aux->out_ops[i].v.reg : 0;
+ out->def = out->defs[0];
+ out->type = aux->out_ops[0].type;
+ }
+ }
+ out->extra.aux = aux;
+}
+
+static void lower_one_inst(CgIrLower* l, u32 idx) {
+ const CgIrInst* in = &l->src->insts[idx];
+ u32 block = l->inst_block[idx];
+ Inst* out = NULL;
+ IROp op = IR_NOP;
+ switch ((CgIrOp)in->op) {
+ case CG_IR_LABEL:
+ return;
+ case CG_IR_LOAD_IMM:
+ op = IR_LOAD_IMM;
+ break;
+ case CG_IR_LOAD_CONST:
+ op = IR_LOAD_CONST;
+ break;
+ case CG_IR_COPY:
+ op = IR_COPY;
+ break;
+ case CG_IR_LOAD:
+ op = IR_LOAD;
+ break;
+ case CG_IR_STORE:
+ op = IR_STORE;
+ break;
+ case CG_IR_ADDR_OF:
+ op = IR_ADDR_OF;
+ break;
+ case CG_IR_TLS_ADDR_OF:
+ op = IR_TLS_ADDR_OF;
+ break;
+ case CG_IR_AGG_COPY:
+ op = IR_AGG_COPY;
+ break;
+ case CG_IR_AGG_SET:
+ op = IR_AGG_SET;
+ break;
+ case CG_IR_BITFIELD_LOAD:
+ op = IR_BITFIELD_LOAD;
+ break;
+ case CG_IR_BITFIELD_STORE:
+ op = IR_BITFIELD_STORE;
+ break;
+ case CG_IR_BINOP:
+ op = IR_BINOP;
+ break;
+ case CG_IR_UNOP:
+ op = IR_UNOP;
+ break;
+ case CG_IR_CMP:
+ op = IR_CMP;
+ break;
+ case CG_IR_CONVERT:
+ op = IR_CONVERT;
+ break;
+ case CG_IR_CALL:
+ op = IR_CALL;
+ break;
+ case CG_IR_RET:
+ op = IR_RET;
+ break;
+ case CG_IR_BR:
+ op = IR_BR;
+ break;
+ case CG_IR_CMP_BRANCH:
+ op = IR_CMP_BRANCH;
+ break;
+ case CG_IR_SWITCH:
+ op = IR_SWITCH;
+ break;
+ case CG_IR_INDIRECT_BRANCH:
+ op = IR_INDIRECT_BRANCH;
+ break;
+ case CG_IR_LOAD_LABEL_ADDR:
+ op = IR_LOAD_LABEL_ADDR;
+ break;
+ case CG_IR_LOCAL_STATIC_DATA_BEGIN:
+ op = IR_LOCAL_STATIC_DATA_BEGIN;
+ break;
+ case CG_IR_LOCAL_STATIC_DATA_WRITE:
+ op = IR_LOCAL_STATIC_DATA_WRITE;
+ break;
+ case CG_IR_LOCAL_STATIC_DATA_LABEL_ADDR:
+ op = IR_LOCAL_STATIC_DATA_LABEL_ADDR;
+ break;
+ case CG_IR_LOCAL_STATIC_DATA_END:
+ op = IR_LOCAL_STATIC_DATA_END;
+ break;
+ case CG_IR_SCOPE_BEGIN:
+ op = IR_SCOPE_BEGIN;
+ break;
+ case CG_IR_SCOPE_ELSE:
+ op = IR_SCOPE_ELSE;
+ break;
+ case CG_IR_SCOPE_END:
+ op = IR_SCOPE_END;
+ break;
+ case CG_IR_BREAK_TO:
+ op = IR_BREAK_TO;
+ break;
+ case CG_IR_CONTINUE_TO:
+ op = IR_CONTINUE_TO;
+ break;
+ case CG_IR_ALLOCA:
+ op = IR_ALLOCA;
+ break;
+ case CG_IR_VA_START:
+ op = IR_VA_START;
+ break;
+ case CG_IR_VA_ARG:
+ op = IR_VA_ARG;
+ break;
+ case CG_IR_VA_END:
+ op = IR_VA_END;
+ break;
+ case CG_IR_VA_COPY:
+ op = IR_VA_COPY;
+ break;
+ case CG_IR_ATOMIC_LOAD:
+ op = IR_ATOMIC_LOAD;
+ break;
+ case CG_IR_ATOMIC_STORE:
+ op = IR_ATOMIC_STORE;
+ break;
+ case CG_IR_ATOMIC_RMW:
+ op = IR_ATOMIC_RMW;
+ break;
+ case CG_IR_ATOMIC_CAS:
+ op = IR_ATOMIC_CAS;
+ break;
+ case CG_IR_FENCE:
+ op = IR_FENCE;
+ break;
+ case CG_IR_INTRINSIC:
+ op = IR_INTRINSIC;
+ break;
+ case CG_IR_ASM_BLOCK:
+ op = IR_ASM_BLOCK;
+ break;
+ default:
+ op = IR_NOP;
+ break;
+ }
+ out = ir_emit(l->f, block, op);
+ out->loc = in->loc;
+ switch ((CgIrOp)in->op) {
+ case CG_IR_LOAD_IMM:
+ lower_value_ops(l, out, in, 1);
+ out->extra.imm = in->extra.imm;
+ break;
+ case CG_IR_LOAD_CONST:
+ lower_value_ops(l, out, in, 1);
+ out->extra.cbytes = in->extra.cbytes;
+ break;
+ case CG_IR_COPY:
+ case CG_IR_BINOP:
+ case CG_IR_UNOP:
+ case CG_IR_CMP:
+ case CG_IR_CONVERT:
+ case CG_IR_ALLOCA:
+ case CG_IR_VA_ARG:
+ lower_value_ops(l, out, in, in->nopnds);
+ out->extra.imm = in->extra.imm;
+ break;
+ case CG_IR_LOAD:
+ case CG_IR_BITFIELD_LOAD: {
+ OptOperand ops[2];
+ ops[0] = lower_operand_value(l, &in->opnds[0], in->loc);
+ ops[1] = lower_operand_addr(l, &in->opnds[1], in->loc);
+ out->opnds = dup_opt_ops(l, ops, 2);
+ out->nopnds = 2;
+ set_inst_def(out, &out->opnds[0]);
+ if ((CgIrOp)in->op == CG_IR_LOAD)
+ out->extra.mem = in->extra.mem;
+ else
+ out->extra.aux = in->extra.aux;
+ break;
+ }
+ case CG_IR_ATOMIC_LOAD: {
+ OptOperand ops[2];
+ ops[0] = lower_operand_value(l, &in->opnds[0], in->loc);
+ ops[1] = lower_operand_value(l, &in->opnds[1], in->loc);
+ out->opnds = dup_opt_ops(l, ops, 2);
+ out->nopnds = 2;
+ set_inst_def(out, &out->opnds[0]);
+ out->extra.aux = in->extra.aux;
+ break;
+ }
+ case CG_IR_STORE:
+ case CG_IR_AGG_COPY:
+ case CG_IR_AGG_SET:
+ case CG_IR_BITFIELD_STORE:
+ lower_addr_value_ops(l, out, in, 1, in->nopnds - 1u);
+ if ((CgIrOp)in->op == CG_IR_STORE)
+ out->extra.mem = in->extra.mem;
+ else
+ out->extra.aux = in->extra.aux;
+ break;
+ case CG_IR_ATOMIC_STORE: {
+ OptOperand ops[2];
+ ops[0] = lower_operand_value(l, &in->opnds[0], in->loc);
+ ops[1] = lower_operand_value(l, &in->opnds[1], in->loc);
+ out->opnds = dup_opt_ops(l, ops, 2);
+ out->nopnds = 2;
+ out->extra.aux = in->extra.aux;
+ break;
+ }
+ case CG_IR_ADDR_OF: {
+ OptOperand ops[2];
+ ops[0] = lower_operand_value(l, &in->opnds[0], in->loc);
+ ops[1] = lower_operand_addr(l, &in->opnds[1], in->loc);
+ out->opnds = dup_opt_ops(l, ops, 2);
+ out->nopnds = 2;
+ set_inst_def(out, &out->opnds[0]);
+ break;
+ }
+ case CG_IR_TLS_ADDR_OF:
+ lower_value_ops(l, out, in, 1);
+ out->extra.aux = in->extra.aux;
+ break;
+ case CG_IR_CALL:
+ lower_call(l, out, in);
+ break;
+ case CG_IR_RET:
+ lower_ret(l, out, in);
+ l->f->blocks[block].nsucc = 0;
+ break;
+ case CG_IR_BR:
+ out->extra.imm = block_for_label(l, (Label)in->extra.imm, in->loc);
+ set_succ1(l, block, (u32)out->extra.imm);
+ break;
+ case CG_IR_CMP_BRANCH: {
+ CgIrCmpBranchAux* aux = (CgIrCmpBranchAux*)in->extra.aux;
+ lower_use_ops(l, out, in, 2);
+ out->extra.imm = aux ? aux->op : CMP_NE;
+ ir_block_set_nsucc(l->f, block, 2);
+ l->f->blocks[block].succ[0] =
+ aux ? block_for_label(l, aux->target, in->loc) : UINT32_MAX;
+ l->f->blocks[block].succ[1] = fallthrough_block(l, idx);
+ break;
+ }
+ case CG_IR_SWITCH: {
+ CgIrSwitchAux* src = (CgIrSwitchAux*)in->extra.aux;
+ IRSwitchAux* aux = arena_znew(l->f->arena, IRSwitchAux);
+ lower_use_ops(l, out, in, 1);
+ if (src) {
+ aux->selector_type = src->selector_type;
+ aux->ncases = src->ncases;
+ aux->hint = src->hint;
+ aux->has_default = src->default_label != LABEL_NONE;
+ aux->default_block =
+ aux->has_default ? block_for_label(l, src->default_label, in->loc)
+ : fallthrough_block(l, idx);
+ if (src->ncases) {
+ aux->cases = arena_array(l->f->arena, IRSwitchAuxCase, src->ncases);
+ for (u32 i = 0; i < src->ncases; ++i) {
+ aux->cases[i].value = src->cases[i].value;
+ aux->cases[i].block =
+ block_for_label(l, src->cases[i].label, in->loc);
+ }
+ }
+ ir_block_set_nsucc(l->f, block, src->ncases + 1u);
+ for (u32 i = 0; i < src->ncases; ++i)
+ l->f->blocks[block].succ[i] = aux->cases[i].block;
+ l->f->blocks[block].succ[src->ncases] = aux->default_block;
+ }
+ out->extra.aux = aux;
+ break;
+ }
+ case CG_IR_INDIRECT_BRANCH: {
+ CgIrIndirectAux* src = (CgIrIndirectAux*)in->extra.aux;
+ IRIndirectAux* aux = arena_znew(l->f->arena, IRIndirectAux);
+ lower_use_ops(l, out, in, 1);
+ if (src && src->ntargets) {
+ aux->ntargets = src->ntargets;
+ aux->targets = arena_array(l->f->arena, u32, src->ntargets);
+ ir_block_set_nsucc(l->f, block, src->ntargets);
+ for (u32 i = 0; i < src->ntargets; ++i) {
+ aux->targets[i] = block_for_label(l, src->targets[i], in->loc);
+ l->f->blocks[block].succ[i] = aux->targets[i];
+ }
+ }
+ out->extra.aux = aux;
+ break;
+ }
+ case CG_IR_LOAD_LABEL_ADDR:
+ lower_value_ops(l, out, in, 1);
+ out->extra.imm = block_for_label(l, (Label)in->extra.imm, in->loc);
+ break;
+ case CG_IR_LOCAL_STATIC_DATA_BEGIN:
+ out->extra.aux = in->extra.aux;
+ break;
+ case CG_IR_LOCAL_STATIC_DATA_WRITE:
+ out->extra.aux = in->extra.aux;
+ break;
+ case CG_IR_LOCAL_STATIC_DATA_LABEL_ADDR: {
+ CgIrLocalStaticLabelAux* src = (CgIrLocalStaticLabelAux*)in->extra.aux;
+ CgIrLocalStaticLabelAux* aux =
+ arena_znew(l->f->arena, CgIrLocalStaticLabelAux);
+ if (src) {
+ *aux = *src;
+ aux->target = (Label)block_for_label(l, src->target, in->loc);
+ }
+ out->extra.aux = aux;
+ break;
+ }
+ case CG_IR_LOCAL_STATIC_DATA_END:
+ break;
+ case CG_IR_SCOPE_BEGIN: {
+ CgIrScopeAux* src = (CgIrScopeAux*)in->extra.aux;
+ IRScopeAux* aux = arena_znew(l->f->arena, IRScopeAux);
+ if (src) {
+ aux->scope_id = src->scope;
+ aux->desc.kind = src->desc.kind;
+ aux->desc.break_label = src->desc.break_label;
+ aux->desc.continue_label = src->desc.continue_label;
+ aux->desc.result_type = src->desc.result_type;
+ aux->desc.cond = lower_operand_value(l, &src->desc.cond, in->loc);
+ }
+ out->extra.aux = aux;
+ break;
+ }
+ case CG_IR_SCOPE_ELSE:
+ case CG_IR_SCOPE_END:
+ case CG_IR_BREAK_TO:
+ case CG_IR_CONTINUE_TO:
+ out->extra.imm = in->extra.imm;
+ break;
+ case CG_IR_VA_START:
+ case CG_IR_VA_END:
+ lower_addr_value_ops(l, out, in, 1, 0);
+ break;
+ case CG_IR_VA_COPY:
+ lower_addr_value_ops(l, out, in, 2, 0);
+ break;
+ case CG_IR_ATOMIC_RMW:
+ lower_value_ops(l, out, in, 3);
+ out->extra.aux = in->extra.aux;
+ break;
+ case CG_IR_ATOMIC_CAS:
+ lower_value_ops(l, out, in, 5);
+ out->ndefs = 2;
+ out->defs = arena_array(l->f->arena, Val, 2);
+ out->defs[0] = out->opnds[0].v.reg;
+ out->defs[1] = out->opnds[1].v.reg;
+ out->def = out->defs[0];
+ out->type = out->opnds[0].type;
+ {
+ const CgIrAtomicAux* src = (const CgIrAtomicAux*)in->extra.aux;
+ IRCasAux* aux = arena_znew(l->f->arena, IRCasAux);
+ if (src) {
+ aux->mem = src->mem;
+ aux->success = src->order;
+ aux->failure = src->failure;
+ }
+ out->extra.aux = aux;
+ }
+ break;
+ case CG_IR_FENCE:
+ out->extra.imm = in->extra.imm;
+ break;
+ case CG_IR_INTRINSIC:
+ lower_intrinsic(l, out, in);
+ break;
+ case CG_IR_ASM_BLOCK:
+ lower_asm(l, out, in);
+ break;
+ default:
+ out->extra.aux = in->extra.aux;
+ break;
+ }
+}
+
+static void add_fallthrough_succs(CgIrLower* l) {
+ for (u32 b = 0; b < l->f->nblocks; ++b) {
+ Block* bl = &l->f->blocks[b];
+ if (bl->nsucc) continue;
+ if (bl->ninsts) {
+ Inst* last = &bl->insts[bl->ninsts - 1u];
+ switch ((IROp)last->op) {
+ case IR_BR:
+ case IR_CONDBR:
+ case IR_CMP_BRANCH:
+ case IR_SWITCH:
+ case IR_INDIRECT_BRANCH:
+ case IR_RET:
+ case IR_BREAK_TO:
+ case IR_CONTINUE_TO:
+ continue;
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)last->extra.aux;
+ if (aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP ||
+ aux->kind == INTRIN_UNREACHABLE))
+ continue;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ for (u32 i = 0; i + 1u < l->f->emit_order_n; ++i) {
+ if (l->f->emit_order[i] == b) {
+ set_succ1(l, b, l->f->emit_order[i + 1u]);
+ break;
+ }
+ }
+ }
+}
+
+Func* opt_func_from_cg_ir(Compiler* c, const CgIrFunc* src) {
+ if (!c || !src) return NULL;
+ OptCGFuncDesc desc = lower_func_desc(c->tu, &src->desc);
+ Func* f = ir_func_new(c, &desc);
+ CgIrLower l;
+ memset(&l, 0, sizeof l);
+ l.c = c;
+ l.src = src;
+ l.f = f;
+ l.nlabels = label_id_max(src);
+ u32* label_place =
+ arena_array(f->arena, u32, l.nlabels ? l.nlabels + 1u : 1u);
+ for (u32 i = 0; i <= l.nlabels; ++i) label_place[i] = UINT32_MAX;
+ l.leader = arena_zarray(f->arena, u8, src->ninsts + 1u);
+ lower_locals(&l);
+ lower_params(&l);
+ mark_leaders(&l, label_place);
+ make_blocks(&l, label_place);
+ emit_param_decls(&l);
+ for (u32 i = 0; i < src->ninsts; ++i) lower_one_inst(&l, i);
+ add_fallthrough_succs(&l);
+ opt_build_cfg(f);
+ return f;
+}
diff --git a/src/opt/ir_print.c b/src/opt/ir_print.c
@@ -78,6 +78,14 @@ static const char* op_name(IROp op) {
return "indirect_branch";
case IR_LOAD_LABEL_ADDR:
return "load_label_addr";
+ case IR_LOCAL_STATIC_DATA_BEGIN:
+ return "local_static_data_begin";
+ case IR_LOCAL_STATIC_DATA_WRITE:
+ return "local_static_data_write";
+ case IR_LOCAL_STATIC_DATA_LABEL_ADDR:
+ return "local_static_data_label_addr";
+ case IR_LOCAL_STATIC_DATA_END:
+ return "local_static_data_end";
case IR_RET:
return "ret";
case IR_SCOPE_BEGIN:
@@ -177,7 +185,7 @@ static void dump_operand(Writer* w, const Operand* op) {
return;
}
strbuf_init(&sb, buf, sizeof buf);
- switch ((OpKind)op->kind) {
+ switch ((OptOperandKind)op->kind) {
case OPK_IMM:
strbuf_puts(&sb, "imm:");
strbuf_put_i64(&sb, (i64)op->v.imm);
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1,1698 +1,678 @@
-/* opt.c — CGTarget wrapper that records each function as IR (doc/OPT.md
- * §1). Each CGTarget call lands as exactly one Inst in the current
- * Func's current block. CG virtual registers are recorded as mutable
- * pseudo-register ids (PReg); O2 turns them into Val ids with
- * opt_build_reg_ssa. Labels, frame slots, and scopes keep their direct IR id
- * mappings (label ↔ block id, vslot ↔ IR FrameSlot, vscope ↔ scope_aux index).
- *
- * OPT1: level 1 records the CGTarget stream, runs the minimal backend
- * lowering schedule, rewrites virtual regs to hard regs/spill slots,
- * and emits the rewritten IR into the wrapped target.
- *
- * Methods the wrapper rejects under unbounded virtuals:
- * - spill_reg / reload_reg are CG -O0 register-pressure
- * mechanics. CG never invokes them on real backends in v1, and
- * they're meaningless for opt's vreg space — calling them is a
- * wiring bug, so we panic loudly. */
-
#include <string.h>
+#include "abi/abi.h"
+#include "cg/ir.h"
+#include "cg/ir_recorder.h"
+#include "cg/native_direct_target.h"
+#include "cg/type.h"
#include "core/arena.h"
#include "core/core.h"
#include "core/metrics.h"
-#include "core/slice.h"
-#include "opt/ir.h"
+#include "core/strbuf.h"
#include "opt/opt_internal.h"
-/* ---- wrapper state ---- */
+#undef Operand
+#undef CGCallDesc
+#undef CGFuncDesc
+#undef CGParamDesc
+#undef CGScopeDesc
typedef struct OptImpl {
- CGTarget base;
- CGTarget* target;
- int level;
Compiler* c;
-
- /* Current function being recorded. NULL between functions. */
- Func* f;
- u32 cur; /* current block id */
- SrcLoc pending_loc; /* most recent set_loc; stamped on each Inst */
- FuncSet funcs;
-
+ CgTarget* target;
+ NativeTarget* native;
+ int level;
Writer* dump_writer;
} OptImpl;
-static OptImpl* impl_of(CGTarget* t) { return (OptImpl*)t; }
-
-static _Noreturn void panic_unsupported(OptImpl* o, const char* what) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(o->c, loc,
- "opt_cgtarget: %.*s called under unbounded virtuals",
- SLICE_ARG(slice_from_cstr(what)));
-}
-
-/* ---- recording helpers ---- */
-
-static Inst* rec(OptImpl* o, IROp op) {
- Inst* in = ir_emit(o->f, o->cur, op);
- in->loc = o->pending_loc;
- return in;
-}
-
-static void set_preg_def(Func* f, Inst* in, u32 block, PReg r,
- CfreeCgTypeId t) {
- (void)f;
- (void)block;
- in->def = (Val)r;
- in->type = t;
-}
-
-static int intrinsic_terminates(IntrinKind kind) {
- return kind == INTRIN_LONGJMP || kind == INTRIN_TRAP ||
- kind == INTRIN_UNREACHABLE;
-}
-
-static void ensure_operand(Func* f, const Operand* op) {
- if (!op) return;
- if (op->kind == OPK_REG) {
- ir_ensure_preg(f, (PReg)op->v.reg, op->type, op->cls);
- } else if (op->kind == OPK_INDIRECT) {
- ir_ensure_preg(f, (PReg)op->v.ind.base, 0, RC_INT);
- if (op->v.ind.index != (Reg)REG_NONE)
- ir_ensure_preg(f, (PReg)op->v.ind.index, 0, RC_INT);
- }
-}
-
-static void ensure_abivalue(Func* f, const CGABIValue* v) {
- if (!v) return;
- ensure_operand(f, &v->storage);
- for (u32 i = 0; i < v->nparts; ++i) ensure_operand(f, &v->parts[i].op);
-}
-
-static Operand* dup_opnds(Func* f, const Operand* src, u32 n) {
- if (!n) return NULL;
- for (u32 i = 0; i < n; ++i) ensure_operand(f, &src[i]);
- Operand* dst = arena_array(f->arena, Operand, n);
- memcpy(dst, src, sizeof(Operand) * n);
- return dst;
-}
-
-static int cur_terminated(OptImpl* o) {
- Block* b = &o->f->blocks[o->cur];
- if (b->nsucc > 0) return 1;
- if (b->ninsts == 0) return 0;
- Inst* last = &b->insts[b->ninsts - 1];
- if ((IROp)last->op == IR_RET) return 1;
- if ((IROp)last->op == IR_INTRINSIC) {
- IRIntrinAux* aux = (IRIntrinAux*)last->extra.aux;
- return aux && intrinsic_terminates(aux->kind);
+typedef struct OptReplay {
+ OptImpl* o;
+ CGLocal* local_map;
+ u32 nlocals;
+ Label* label_map;
+ u32 nlabels;
+ CGScope* scope_map;
+ u32 nscopes;
+} OptReplay;
+
+static int opt_type_large_or_aggregate(Compiler* c, CfreeCgTypeId ty) {
+ if (!ty) return 0;
+ return cg_type_is_aggregate(c, ty) || abi_cg_sizeof(c->abi, ty) > 8u;
+}
+
+static int opt_func_needs_direct_replay(OptImpl* o, const CgIrFunc* f) {
+ for (u32 i = 0; i < f->desc.nresults; ++i)
+ if (opt_type_large_or_aggregate(o->c, f->desc.result_types[i])) return 1;
+ for (u32 i = 0; i < f->desc.nparams; ++i)
+ if (opt_type_large_or_aggregate(o->c, f->desc.params[i].type)) return 1;
+ for (u32 i = 0; i < f->ninsts; ++i) {
+ const CgIrInst* in = &f->insts[i];
+ switch ((CgIrOp)in->op) {
+ case CG_IR_ASM_BLOCK:
+ case CG_IR_ALLOCA:
+ case CG_IR_INTRINSIC:
+ case CG_IR_VA_START:
+ case CG_IR_VA_ARG:
+ case CG_IR_VA_END:
+ case CG_IR_VA_COPY:
+ return 1;
+ case CG_IR_CALL: {
+ const CgIrCallAux* aux = (const CgIrCallAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 a = 0; a < aux->desc.nargs; ++a) {
+ CGLocal local = aux->desc.args[a];
+ if (local && local <= f->nlocals &&
+ opt_type_large_or_aggregate(o->c,
+ f->locals[local - 1u].desc.type))
+ return 1;
+ }
+ for (u32 r = 0; r < aux->desc.nresults; ++r) {
+ CGLocal local = aux->desc.results[r];
+ if (local && local <= f->nlocals &&
+ opt_type_large_or_aggregate(o->c,
+ f->locals[local - 1u].desc.type))
+ return 1;
+ }
+ break;
+ }
+ default:
+ break;
+ }
}
return 0;
}
-static void set_cur(OptImpl* o, u32 b) {
- o->cur = b;
- ir_note_emit(o->f, b);
-}
-
-/* After emitting a terminator, allocate a fresh block for any
- * subsequent (likely unreachable) recording. */
-static void after_terminator(OptImpl* o) { set_cur(o, ir_block_new(o->f)); }
-
-/* ---- function lifecycle ---- */
-
-static void w_func_begin(CGTarget* t, const CGFuncDesc* fd) {
- OptImpl* o = impl_of(t);
- o->f = ir_func_new(o->c, fd);
- u32 entry = ir_block_new(o->f);
- o->f->entry = entry;
- set_cur(o, entry);
- o->pending_loc = (SrcLoc){0, 0, 0};
-}
-
-static void w_func_end(CGTarget* t);
-static void w_addr_of(CGTarget* t, Operand dst, Operand lv);
-
-/* ---- registers and frame slots ---- */
-
-static FrameSlot w_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
- OptImpl* o = impl_of(t);
- return ir_frame_slot_new(o->f, d);
-}
-
-static FrameSlot opt_local_frame_slot(Func* f, const CGLocalDesc* d,
- int force_addr_taken) {
- FrameSlotDesc fsd;
- memset(&fsd, 0, sizeof fsd);
- fsd.type = d->type;
- fsd.name = d->name;
- fsd.loc = d->loc;
- fsd.size = d->size;
- fsd.align = d->align;
- fsd.kind = FS_LOCAL;
- if (force_addr_taken || (d->flags & CG_LOCAL_ADDR_TAKEN))
- fsd.flags |= FSF_ADDR_TAKEN;
- return ir_frame_slot_new(f, &fsd);
+static Label replay_label(OptReplay* r, Label label, SrcLoc loc) {
+ if (label == LABEL_NONE) return LABEL_NONE;
+ if (label > r->nlabels || !r->label_map[label])
+ compiler_panic(r->o->c, loc, "opt direct replay: bad label");
+ return r->label_map[label];
}
-static FrameSlot opt_param_frame_slot(Func* f, const CGParamDesc* d) {
- FrameSlotDesc fsd;
- memset(&fsd, 0, sizeof fsd);
- fsd.type = d->type;
- fsd.name = d->name;
- fsd.loc = d->loc;
- fsd.size = d->size;
- fsd.align = d->align;
- fsd.kind = FS_PARAM;
- if (d->flags & CG_LOCAL_ADDR_TAKEN) fsd.flags |= FSF_ADDR_TAKEN;
- return ir_frame_slot_new(f, &fsd);
+static CGLocal replay_local(OptReplay* r, CGLocal local, SrcLoc loc) {
+ if (local == CG_LOCAL_NONE) return CG_LOCAL_NONE;
+ if (local > r->nlocals || !r->local_map[local])
+ compiler_panic(r->o->c, loc, "opt direct replay: bad local");
+ return r->local_map[local];
}
-static u8 opt_local_reg_class_for(Compiler* c, CfreeCgTypeId ty) {
- CfreeCgTypeKind kind = cfree_cg_type_kind((CfreeCompiler*)c, ty);
- return kind == CFREE_CG_TYPE_FLOAT ? RC_FP : RC_INT;
+static CGScope replay_scope(OptReplay* r, CGScope scope, SrcLoc loc) {
+ if (scope == CG_SCOPE_NONE) return CG_SCOPE_NONE;
+ if (scope > r->nscopes || !r->scope_map[scope])
+ compiler_panic(r->o->c, loc, "opt direct replay: bad scope");
+ return r->scope_map[scope];
}
-static u8 opt_local_reg_class(OptImpl* o, CfreeCgTypeId ty) {
- return opt_local_reg_class_for(o->c, ty);
-}
-
-static CGLocalStorage w_local(CGTarget* t, const CGLocalDesc* d) {
- OptImpl* o = impl_of(t);
- CGLocalStorage st;
- memset(&st, 0, sizeof st);
- if (o->level < 2 &&
- (d->flags & (CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED)) == 0) {
- PReg v = ir_alloc_preg(o->f, d->type, opt_local_reg_class(o, d->type));
- st.kind = CG_LOCAL_STORAGE_REG;
- st.v.reg = (Reg)v;
- } else {
- st.kind = CG_LOCAL_STORAGE_FRAME;
- st.v.frame_slot = opt_local_frame_slot(o->f, d, 0);
+static Operand replay_operand(OptReplay* r, Operand in, SrcLoc loc) {
+ if (in.kind == OPK_LOCAL) {
+ in.v.local = replay_local(r, in.v.local, loc);
+ } else if (in.kind == OPK_INDIRECT) {
+ in.v.ind.base = replay_local(r, in.v.ind.base, loc);
+ in.v.ind.index = replay_local(r, in.v.ind.index, loc);
}
- ir_local_add(o->f, d, st);
- return st;
-}
-
-static IRLocal* opt_find_local_by_reg(Func* f, Reg reg) {
- for (u32 i = 0; i < f->nlocals; ++i) {
- IRLocal* l = &f->locals[i];
- if (l->storage.kind == CG_LOCAL_STORAGE_REG && l->storage.v.reg == reg)
- return l;
- }
- return NULL;
+ return in;
}
-static void w_local_addr(CGTarget* t, Operand dst, const CGLocalDesc* d,
- CGLocalStorage st) {
- OptImpl* o = impl_of(t);
- IRLocal* local = NULL;
- FrameSlot frame_slot = FRAME_SLOT_NONE;
- const CGLocalDesc* desc = d;
- if (st.kind == CG_LOCAL_STORAGE_REG) {
- local = opt_find_local_by_reg(o->f, st.v.reg);
- if (!local) {
- compiler_panic(o->c, d ? d->loc : o->pending_loc,
- "opt_cgtarget: unknown register-backed local address");
+static void replay_operands(OptReplay* r, Operand* dst, const Operand* src,
+ u32 n, SrcLoc loc) {
+ for (u32 i = 0; i < n; ++i) dst[i] = replay_operand(r, src[i], loc);
+}
+
+static CGCallDesc replay_call_desc(OptReplay* r, const CGCallDesc* src,
+ SrcLoc loc) {
+ CGCallDesc out = *src;
+ out.callee = replay_operand(r, src->callee, loc);
+ if (src->nargs) {
+ CGLocal* args = arena_array(r->o->c->tu, CGLocal, src->nargs);
+ for (u32 i = 0; i < src->nargs; ++i)
+ args[i] = replay_local(r, src->args[i], loc);
+ out.args = args;
+ }
+ if (src->nresults) {
+ CGLocal* results = arena_array(r->o->c->tu, CGLocal, src->nresults);
+ for (u32 i = 0; i < src->nresults; ++i)
+ results[i] = replay_local(r, src->results[i], loc);
+ out.results = results;
+ }
+ return out;
+}
+
+static void replay_switch(OptReplay* r, const CgIrInst* in) {
+ const CgIrSwitchAux* src = (const CgIrSwitchAux*)in->extra.aux;
+ CGSwitchDesc d;
+ memset(&d, 0, sizeof d);
+ d.selector = replay_operand(r, in->opnds[0], in->loc);
+ d.selector_type = src->selector_type;
+ d.default_label = replay_label(r, src->default_label, in->loc);
+ d.ncases = src->ncases;
+ d.hint = src->hint;
+ d.opt_level = src->opt_level;
+ if (src->ncases) {
+ CGSwitchCase* cases = arena_array(r->o->c->tu, CGSwitchCase, src->ncases);
+ for (u32 i = 0; i < src->ncases; ++i) {
+ cases[i] = src->cases[i];
+ cases[i].label = replay_label(r, src->cases[i].label, in->loc);
}
- if (local->home_slot == FRAME_SLOT_NONE)
- local->home_slot = opt_local_frame_slot(o->f, &local->desc, 1);
- local->address_taken = 1;
- local->desc.flags |= CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED;
- frame_slot = local->home_slot;
- desc = &local->desc;
- } else {
- frame_slot = st.v.frame_slot;
- }
- Operand lv;
- memset(&lv, 0, sizeof lv);
- lv.kind = OPK_LOCAL;
- lv.cls = RC_INT;
- lv.type = desc ? desc->type : dst.type;
- lv.v.frame_slot = frame_slot;
- w_addr_of(t, dst, lv);
-}
-
-static Operand opt_local_addr_operand(IRLocal* l) {
- Operand o;
- memset(&o, 0, sizeof o);
- o.kind = OPK_LOCAL;
- o.cls = RC_INT;
- o.type = l->desc.type;
- o.v.frame_slot = l->home_slot;
- return o;
-}
-
-static MemAccess opt_local_mem(IRLocal* l) {
- MemAccess m;
- memset(&m, 0, sizeof m);
- m.type = l->desc.type;
- m.size = l->desc.size;
- m.align = l->desc.align;
- m.alias.kind = ALIAS_LOCAL;
- m.alias.v.local_id = (i32)l->home_slot;
- return m;
-}
-
-static int inst_defines_val(const Inst* in, Val v) {
- if (!in || v == VAL_NONE) return 0;
- if (in->def == v) return 1;
- for (u32 i = 0; i < in->ndefs; ++i)
- if (in->defs[i] == v) return 1;
- return 0;
-}
-
-static int op_uses_reg(const Operand* op, Reg reg) {
- if (!op) return 0;
- if (op->kind == OPK_REG && op->v.reg == reg) return 1;
- if (op->kind == OPK_INDIRECT &&
- (op->v.ind.base == reg ||
- (op->v.ind.index != (Reg)REG_NONE && op->v.ind.index == reg)))
- return 1;
- return 0;
-}
-
-static int abivalue_uses_reg(const CGABIValue* v, Reg reg) {
- if (!v) return 0;
- if (op_uses_reg(&v->storage, reg)) return 1;
- for (u32 i = 0; i < v->nparts; ++i)
- if (op_uses_reg(&v->parts[i].op, reg)) return 1;
- return 0;
-}
-
-static int inst_uses_local_reg(const Inst* in, Reg reg) {
- if (!in) return 0;
- for (u32 i = 0; i < in->nopnds; ++i) {
- int is_def = i == 0 && in->opnds[i].kind == OPK_REG &&
- inst_defines_val(in, (Val)in->opnds[i].v.reg);
- if (!is_def && op_uses_reg(&in->opnds[i], reg)) return 1;
- }
- switch ((IROp)in->op) {
- case IR_CALL: {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) return 0;
- if (op_uses_reg(&aux->desc.callee, reg)) return 1;
- for (u32 i = 0; i < aux->desc.nargs; ++i)
- if (abivalue_uses_reg(&aux->desc.args[i], reg)) return 1;
- return 0;
+ d.cases = cases;
+ }
+ r->o->target->switch_(r->o->target, &d);
+}
+
+static void replay_inst(OptReplay* r, const CgIrInst* in) {
+ CgTarget* t = r->o->target;
+ Operand ops[5];
+ if (t->set_loc) t->set_loc(t, in->loc);
+ switch ((CgIrOp)in->op) {
+ case CG_IR_NOP:
+ return;
+ case CG_IR_LABEL:
+ t->label_place(t, replay_label(r, (Label)in->extra.imm, in->loc));
+ return;
+ case CG_IR_LOAD_IMM:
+ ops[0] = replay_operand(r, in->opnds[0], in->loc);
+ t->load_imm(t, ops[0], in->extra.imm);
+ return;
+ case CG_IR_LOAD_CONST:
+ ops[0] = replay_operand(r, in->opnds[0], in->loc);
+ t->load_const(t, ops[0], in->extra.cbytes);
+ return;
+ case CG_IR_COPY:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->copy(t, ops[0], ops[1]);
+ return;
+ case CG_IR_LOAD:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->load(t, ops[0], ops[1], in->extra.mem);
+ return;
+ case CG_IR_STORE:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->store(t, ops[0], ops[1], in->extra.mem);
+ return;
+ case CG_IR_ADDR_OF:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->addr_of(t, ops[0], ops[1]);
+ return;
+ case CG_IR_TLS_ADDR_OF: {
+ const CgIrTlsAux* aux = (const CgIrTlsAux*)in->extra.aux;
+ ops[0] = replay_operand(r, in->opnds[0], in->loc);
+ t->tls_addr_of(t, ops[0], aux->sym, aux->addend);
+ return;
}
- case IR_RET: {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- return aux && aux->present && abivalue_uses_reg(&aux->val, reg);
+ case CG_IR_AGG_COPY: {
+ const CgIrAggAux* aux = (const CgIrAggAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->copy_bytes(t, ops[0], ops[1], aux->access);
+ return;
}
- case IR_SCOPE_BEGIN: {
- IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
- return aux && op_uses_reg(&aux->desc.cond, reg);
+ case CG_IR_AGG_SET: {
+ const CgIrAggAux* aux = (const CgIrAggAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->set_bytes(t, ops[0], ops[1], aux->access);
+ return;
}
- case IR_ASM_BLOCK: {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) return 0;
- for (u32 i = 0; i < aux->nin; ++i)
- if (op_uses_reg(&aux->in_ops[i], reg)) return 1;
- return 0;
+ case CG_IR_BITFIELD_LOAD: {
+ const CgIrBitFieldAux* aux = (const CgIrBitFieldAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->bitfield_load(t, ops[0], ops[1], aux->access);
+ return;
}
- case IR_INTRINSIC: {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- if (!aux) return 0;
- for (u32 i = 0; i < aux->narg; ++i)
- if (op_uses_reg(&aux->args[i], reg)) return 1;
- return 0;
+ case CG_IR_BITFIELD_STORE: {
+ const CgIrBitFieldAux* aux = (const CgIrBitFieldAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->bitfield_store(t, ops[0], ops[1], aux->access);
+ return;
}
- default:
- return 0;
- }
-}
-
-static void opt_make_local_load(Func* f, Inst* out, IRLocal* l, SrcLoc loc) {
- memset(out, 0, sizeof *out);
- out->op = IR_LOAD;
- ir_assign_inst_id(f, out);
- out->loc = loc;
- out->type = l->desc.type;
- out->def = (Val)l->storage.v.reg;
- out->opnds = arena_array(f->arena, Operand, 2);
- out->opnds[0].kind = OPK_REG;
- out->opnds[0].cls = opt_local_reg_class_for(f->c, l->desc.type);
- out->opnds[0].type = l->desc.type;
- out->opnds[0].v.reg = l->storage.v.reg;
- out->opnds[1] = opt_local_addr_operand(l);
- out->nopnds = 2;
- out->extra.mem = opt_local_mem(l);
-}
-
-static void opt_make_local_store(Func* f, Inst* out, IRLocal* l, SrcLoc loc) {
- memset(out, 0, sizeof *out);
- out->op = IR_STORE;
- ir_assign_inst_id(f, out);
- out->loc = loc;
- out->opnds = arena_array(f->arena, Operand, 2);
- out->opnds[0] = opt_local_addr_operand(l);
- out->opnds[1].kind = OPK_REG;
- out->opnds[1].cls = opt_local_reg_class_for(f->c, l->desc.type);
- out->opnds[1].type = l->desc.type;
- out->opnds[1].v.reg = l->storage.v.reg;
- out->nopnds = 2;
- out->extra.mem = opt_local_mem(l);
-}
-
-static IRLocal* opt_addr_taken_reg_local_defined_by(Func* f, const Inst* in) {
- if (!in) return NULL;
- for (u32 i = 0; i < f->nlocals; ++i) {
- IRLocal* l = &f->locals[i];
- if (!l->address_taken || l->home_slot == FRAME_SLOT_NONE) continue;
- if (l->storage.kind == CG_LOCAL_STORAGE_REG &&
- inst_defines_val(in, (Val)l->storage.v.reg))
- return l;
- }
- return NULL;
-}
-
-static void opt_frame_home_addr_taken_locals(Func* f) {
- int any = 0;
- for (u32 i = 0; i < f->nlocals; ++i) {
- IRLocal* l = &f->locals[i];
- if (l->address_taken && l->storage.kind == CG_LOCAL_STORAGE_REG &&
- l->home_slot != FRAME_SLOT_NONE) {
- any = 1;
- break;
+ case CG_IR_BINOP:
+ replay_operands(r, ops, in->opnds, 3, in->loc);
+ t->binop(t, (BinOp)in->extra.imm, ops[0], ops[1], ops[2]);
+ return;
+ case CG_IR_UNOP:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->unop(t, (UnOp)in->extra.imm, ops[0], ops[1]);
+ return;
+ case CG_IR_CMP:
+ replay_operands(r, ops, in->opnds, 3, in->loc);
+ t->cmp(t, (CmpOp)in->extra.imm, ops[0], ops[1], ops[2]);
+ return;
+ case CG_IR_CONVERT:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->convert(t, (ConvKind)in->extra.imm, ops[0], ops[1]);
+ return;
+ case CG_IR_CALL: {
+ const CgIrCallAux* aux = (const CgIrCallAux*)in->extra.aux;
+ CGCallDesc d = replay_call_desc(r, &aux->desc, in->loc);
+ t->call(t, &d);
+ return;
}
- }
- if (!any) return;
-
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- if (!bl->ninsts) continue;
- u32 out_cap = bl->ninsts * (f->nlocals + 2u);
- Inst* out = arena_zarray(f->arena, Inst, out_cap ? out_cap : 1u);
- u32 nout = 0;
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst in = bl->insts[i];
- for (u32 j = 0; j < f->nlocals; ++j) {
- IRLocal* used = &f->locals[j];
- if (!used->address_taken || used->home_slot == FRAME_SLOT_NONE)
- continue;
- if (used->storage.kind != CG_LOCAL_STORAGE_REG) continue;
- if (inst_uses_local_reg(&in, used->storage.v.reg))
- opt_make_local_load(f, &out[nout++], used, in.loc);
+ case CG_IR_RET: {
+ const CgIrRetAux* aux = (const CgIrRetAux*)in->extra.aux;
+ CGLocal* values = NULL;
+ if (aux && aux->nvalues) {
+ values = arena_array(r->o->c->tu, CGLocal, aux->nvalues);
+ for (u32 i = 0; i < aux->nvalues; ++i)
+ values[i] = replay_local(r, aux->values[i], in->loc);
}
- out[nout++] = in;
- IRLocal* defined = opt_addr_taken_reg_local_defined_by(f, &in);
- if (defined) opt_make_local_store(f, &out[nout++], defined, in.loc);
+ t->ret(t, values, aux ? aux->nvalues : 0u);
+ return;
}
- bl->insts = out;
- bl->ninsts = nout;
- bl->cap = bl->ninsts;
- }
-}
-
-static CGLocalStorage w_param(CGTarget* t, const CGParamDesc* d) {
- OptImpl* o = impl_of(t);
- CGLocalStorage st = d->storage;
- CGLocalDesc local_desc;
- memset(&local_desc, 0, sizeof local_desc);
- local_desc.type = d->type;
- local_desc.name = d->name;
- local_desc.loc = d->loc;
- local_desc.size = d->size;
- local_desc.align = d->align;
- local_desc.flags = d->flags;
- if (st.kind == CG_LOCAL_STORAGE_FRAME && st.v.frame_slot == FRAME_SLOT_NONE) {
- if (o->level < 2 &&
- (d->flags & (CG_LOCAL_ADDR_TAKEN | CG_LOCAL_MEMORY_REQUIRED)) == 0) {
- PReg v = ir_alloc_preg(o->f, d->type, opt_local_reg_class(o, d->type));
- st.kind = CG_LOCAL_STORAGE_REG;
- st.v.reg = (Reg)v;
- } else {
- st.kind = CG_LOCAL_STORAGE_FRAME;
- st.v.frame_slot = opt_param_frame_slot(o->f, d);
+ case CG_IR_BR:
+ t->jump(t, replay_label(r, (Label)in->extra.imm, in->loc));
+ return;
+ case CG_IR_CMP_BRANCH: {
+ const CgIrCmpBranchAux* aux = (const CgIrCmpBranchAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->cmp_branch(t, aux->op, ops[0], ops[1],
+ replay_label(r, aux->target, in->loc));
+ return;
}
- }
- /* Deep-copy parts so caller-stack memory isn't relied on. */
- CGParamDesc copy = *d;
- copy.storage = st;
- if (d->nincoming) {
- CGABIPart* parts = arena_array(o->f->arena, CGABIPart, d->nincoming);
- memcpy(parts, d->incoming, sizeof(CGABIPart) * d->nincoming);
- copy.incoming = parts;
- }
- ir_param_add(o->f, ©);
- ir_local_add(o->f, &local_desc, st);
- if (st.kind == CG_LOCAL_STORAGE_REG) {
- ir_ensure_preg(o->f, (PReg)st.v.reg, d->type,
- opt_local_reg_class(o, d->type));
- Inst* in = rec(o, IR_PARAM_DECL);
- in->def = (Val)st.v.reg;
- in->type = d->type;
- }
- return st;
-}
-
-static void w_spill_reg(CGTarget* t, Operand src, FrameSlot s, MemAccess m) {
- (void)src;
- (void)s;
- (void)m;
- panic_unsupported(impl_of(t), "spill_reg");
-}
-static void w_reload_reg(CGTarget* t, Operand dst, FrameSlot s, MemAccess m) {
- (void)dst;
- (void)s;
- (void)m;
- panic_unsupported(impl_of(t), "reload_reg");
-}
-
-static void w_get_allocable_regs(CGTarget* t, RegClass cls, const Reg** out,
- u32* nregs) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->get_allocable_regs)
- wr->get_allocable_regs(wr, cls, out, nregs);
- else {
- *out = NULL;
- *nregs = 0;
- }
-}
-
-static void w_get_phys_regs(CGTarget* t, RegClass cls,
- const CGPhysRegInfo** out, u32* nregs) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->get_phys_regs)
- wr->get_phys_regs(wr, cls, out, nregs);
- else {
- *out = NULL;
- *nregs = 0;
- }
-}
-
-static void w_get_scratch_regs(CGTarget* t, RegClass cls, const Reg** out,
- u32* nregs) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->get_scratch_regs)
- wr->get_scratch_regs(wr, cls, out, nregs);
- else {
- *out = NULL;
- *nregs = 0;
- }
-}
-
-static int w_is_caller_saved(CGTarget* t, RegClass cls, Reg r) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->is_caller_saved) return wr->is_caller_saved(wr, cls, r);
- return 0;
-}
-
-static u32 w_call_clobber_mask(CGTarget* t, const CGCallDesc* d, RegClass cls) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->call_clobber_mask) return wr->call_clobber_mask(wr, d, cls);
- return 0;
-}
-
-static u32 w_return_reg_mask(CGTarget* t, const ABIFuncInfo* abi,
- RegClass cls) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->return_reg_mask) return wr->return_reg_mask(wr, abi, cls);
- return 0;
-}
-
-static u32 w_callee_save_mask(CGTarget* t, RegClass cls) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->callee_save_mask) return wr->callee_save_mask(wr, cls);
- return 0;
-}
-
-static void w_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
- u32 n) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->plan_hard_regs) wr->plan_hard_regs(wr, cls, regs, n);
-}
-
-static void w_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
- u32 n) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->reserve_hard_regs) wr->reserve_hard_regs(wr, cls, regs, n);
-}
-
-static int w_resolve_reg_name(CGTarget* t, Sym name, Reg* out,
- RegClass* cls_out) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->resolve_reg_name) return wr->resolve_reg_name(wr, name, out, cls_out);
- return 1;
-}
-
-static void w_file_scope_asm(CGTarget* t, const char* src, size_t len) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->file_scope_asm) wr->file_scope_asm(wr, src, len);
-}
-
-/* ---- labels and control flow ---- */
-
-static Label w_label_new(CGTarget* t) {
- OptImpl* o = impl_of(t);
- u32 block = ir_block_new(o->f);
- /* Pre-allocate an MCLabel id so frontend code that needs a stable
- * MCLabel before pass_emit replays (cfree_cg_data_label_addr in
- * particular) has one. pass_emit places it through the wrapped
- * target's label_place during replay. */
- if (o->target && o->target->mc) {
- o->f->blocks[block].mc_label = o->target->mc->label_new(o->target->mc);
- }
- return (Label)block;
-}
-
-static MCLabel w_cg_label_to_mc_label(CGTarget* t, Label l) {
- OptImpl* o = impl_of(t);
- u32 block = (u32)l;
- if (block >= o->f->nblocks) return MC_LABEL_NONE;
- return o->f->blocks[block].mc_label;
-}
-
-static void w_label_place(CGTarget* t, Label l) {
- OptImpl* o = impl_of(t);
- u32 target_blk = (u32)l;
- if (target_blk >= o->f->nblocks) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(o->c, loc, "opt: label_place(%u) out of range", (unsigned)l);
- }
- if (!cur_terminated(o)) {
- Block* cb = &o->f->blocks[o->cur];
- rec(o, IR_BR);
- cb->succ[0] = target_blk;
- cb->nsucc = 1;
- }
- set_cur(o, target_blk);
-}
-
-static void w_jump(CGTarget* t, Label l) {
- OptImpl* o = impl_of(t);
- u32 target_blk = (u32)l;
- Block* cb = &o->f->blocks[o->cur];
- rec(o, IR_BR);
- cb->succ[0] = target_blk;
- cb->nsucc = 1;
- after_terminator(o);
-}
-
-static void w_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l) {
- OptImpl* o = impl_of(t);
- u32 taken = (u32)l;
- Inst* in = rec(o, IR_CMP_BRANCH);
- Operand ops[2] = {a, b};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- in->extra.imm = (i64)op;
- u32 cur = o->cur;
- u32 ft = ir_block_new(o->f);
- Block* cb = &o->f->blocks[cur];
- cb->succ[0] = taken;
- cb->succ[1] = ft;
- cb->nsucc = 2;
- set_cur(o, ft);
-}
-
-static void w_switch_(CGTarget* t, const CGSwitchDesc* d) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_SWITCH);
- IRSwitchAux* aux = arena_znew(o->f->arena, IRSwitchAux);
- Operand sel = d->selector;
- in->opnds = dup_opnds(o->f, &sel, 1);
- in->nopnds = 1;
- aux->selector_type = d->selector_type;
- aux->ncases = d->ncases;
- aux->hint = d->hint;
- aux->cases = NULL;
- if (d->ncases) {
- aux->cases = arena_array(o->f->arena, IRSwitchAuxCase, d->ncases);
- for (u32 i = 0; i < d->ncases; ++i) {
- aux->cases[i].value = d->cases[i].value;
- aux->cases[i].block = (u32)d->cases[i].label;
+ case CG_IR_SWITCH:
+ replay_switch(r, in);
+ return;
+ case CG_IR_INDIRECT_BRANCH: {
+ const CgIrIndirectAux* aux = (const CgIrIndirectAux*)in->extra.aux;
+ Label* targets =
+ arena_array(r->o->c->tu, Label, aux->ntargets ? aux->ntargets : 1u);
+ for (u32 i = 0; i < aux->ntargets; ++i)
+ targets[i] = replay_label(r, aux->targets[i], in->loc);
+ ops[0] = replay_operand(r, in->opnds[0], in->loc);
+ t->indirect_branch(t, ops[0], targets, aux->ntargets);
+ return;
}
- }
- u32 cur = o->cur;
- /* Default label may be LABEL_NONE meaning "fall through past the
- * switch." Materialize a fresh post-switch block to land on in that
- * case so the CFG still has a single block as default successor. */
- u32 default_blk;
- if (d->default_label != LABEL_NONE) {
- aux->has_default = 1;
- default_blk = (u32)d->default_label;
- } else {
- aux->has_default = 0;
- default_blk = ir_block_new(o->f);
- }
- aux->default_block = default_blk;
- in->extra.aux = aux;
-
- ir_block_set_nsucc(o->f, cur, d->ncases + 1u);
- Block* cb = &o->f->blocks[cur];
- for (u32 i = 0; i < d->ncases; ++i) cb->succ[i] = (u32)d->cases[i].label;
- cb->succ[d->ncases] = default_blk;
- /* No-default fall-through: emit a fresh post-switch block as the
- * continuation point. With an explicit default the next recorded
- * instruction is unreachable until a label_place re-anchors cur. */
- if (!aux->has_default) {
- set_cur(o, default_blk);
- } else {
- after_terminator(o);
- }
-}
-
-static void w_indirect_branch(CGTarget* t, Operand addr, const Label* targets,
- u32 ntargets) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_INDIRECT_BRANCH);
- IRIndirectAux* aux = arena_znew(o->f->arena, IRIndirectAux);
- Operand a = addr;
- in->opnds = dup_opnds(o->f, &a, 1);
- in->nopnds = 1;
- aux->ntargets = ntargets;
- aux->targets = NULL;
- if (ntargets) {
- aux->targets = arena_array(o->f->arena, u32, ntargets);
- for (u32 i = 0; i < ntargets; ++i) aux->targets[i] = (u32)targets[i];
- }
- in->extra.aux = aux;
- u32 cur = o->cur;
- ir_block_set_nsucc(o->f, cur, ntargets);
- Block* cb = &o->f->blocks[cur];
- for (u32 i = 0; i < ntargets; ++i) cb->succ[i] = (u32)targets[i];
- after_terminator(o);
-}
-
-static void w_load_label_addr(CGTarget* t, Operand dst, Label l) {
- OptImpl* o = impl_of(t);
- ensure_operand(o->f, &dst);
- Inst* in = rec(o, IR_LOAD_LABEL_ADDR);
- in->opnds = dup_opnds(o->f, &dst, 1);
- in->nopnds = 1;
- in->extra.imm = (i64)(u32)l;
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-/* ---- structured scopes ---- */
-
-static u32 scope_register(Func* f, Inst* in) {
- if (f->nscopes == f->scopes_cap) {
- u32 ncap = f->scopes_cap ? f->scopes_cap * 2u : 4u;
- Inst** nb = arena_zarray(f->arena, Inst*, ncap);
- if (f->scope_aux_inst)
- memcpy(nb, f->scope_aux_inst, sizeof(Inst*) * f->nscopes);
- f->scope_aux_inst = nb;
- f->scopes_cap = ncap;
- }
- f->scope_aux_inst[f->nscopes++] = in;
- return f->nscopes;
-}
-
-static IRScopeAux* scope_lookup(OptImpl* o, CGScope s) {
- if (s == CG_SCOPE_NONE || s > o->f->nscopes) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(o->c, loc, "opt: bad scope id %u", (unsigned)s);
- }
- return (IRScopeAux*)o->f->scope_aux_inst[s - 1]->extra.aux;
-}
-
-static CGScope w_scope_begin(CGTarget* t, const CGScopeDesc* d) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_SCOPE_BEGIN);
- IRScopeAux* aux = arena_znew(o->f->arena, IRScopeAux);
- aux->desc = *d;
- in->extra.aux = aux;
- u32 sid = scope_register(o->f, in);
- aux->scope_id = sid;
-
- if (d->kind == SCOPE_IF) {
- aux->if_then_block = ir_block_new(o->f);
- aux->if_else_block = ir_block_new(o->f);
- aux->if_end_block = ir_block_new(o->f);
- Block* cb = &o->f->blocks[o->cur];
- cb->succ[0] = aux->if_then_block;
- cb->succ[1] = aux->if_else_block;
- cb->nsucc = 2;
- set_cur(o, aux->if_then_block);
- } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
- aux->loop_break_block =
- d->break_label != LABEL_NONE ? (u32)d->break_label : 0;
- aux->loop_continue_block =
- d->continue_label != LABEL_NONE ? (u32)d->continue_label : 0;
- }
- return (CGScope)sid;
-}
-
-static void w_scope_else(CGTarget* t, CGScope s) {
- OptImpl* o = impl_of(t);
- IRScopeAux* aux = scope_lookup(o, s);
- if (aux->desc.kind != SCOPE_IF) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(o->c, loc, "opt: scope_else on non-IF scope %u",
- (unsigned)s);
- }
- Inst* in = rec(o, IR_SCOPE_ELSE);
- in->extra.imm = (i64)s;
- if (!cur_terminated(o)) {
- Block* cb = &o->f->blocks[o->cur];
- cb->succ[0] = aux->if_end_block;
- cb->nsucc = 1;
- }
- aux->if_has_else = 1;
- set_cur(o, aux->if_else_block);
-}
-
-static void w_scope_end(CGTarget* t, CGScope s) {
- OptImpl* o = impl_of(t);
- IRScopeAux* aux = scope_lookup(o, s);
- Inst* in = rec(o, IR_SCOPE_END);
- in->extra.imm = (i64)s;
- if (aux->desc.kind == SCOPE_IF) {
- if (!cur_terminated(o)) {
- Block* cb = &o->f->blocks[o->cur];
- cb->succ[0] = aux->if_end_block;
- cb->nsucc = 1;
+ case CG_IR_LOAD_LABEL_ADDR:
+ ops[0] = replay_operand(r, in->opnds[0], in->loc);
+ t->load_label_addr(t, ops[0],
+ replay_label(r, (Label)in->extra.imm, in->loc));
+ return;
+ case CG_IR_SCOPE_BEGIN: {
+ const CgIrScopeAux* aux = (const CgIrScopeAux*)in->extra.aux;
+ CGScopeDesc d = aux->desc;
+ d.break_label = replay_label(r, d.break_label, in->loc);
+ d.continue_label = replay_label(r, d.continue_label, in->loc);
+ d.cond = replay_operand(r, d.cond, in->loc);
+ r->scope_map[aux->scope] = t->scope_begin(t, &d);
+ return;
}
- if (!aux->if_has_else) {
- Block* eb = &o->f->blocks[aux->if_else_block];
- if (eb->nsucc == 0) {
- eb->succ[0] = aux->if_end_block;
- eb->nsucc = 1;
- }
- /* Else block was never visited as cur, but it has code (the
- * fall-through from scope_begin) — record it before end so emit
- * order has it. */
- ir_note_emit(o->f, aux->if_else_block);
+ case CG_IR_SCOPE_ELSE:
+ t->scope_else(t, replay_scope(r, (CGScope)in->extra.imm, in->loc));
+ return;
+ case CG_IR_SCOPE_END:
+ t->scope_end(t, replay_scope(r, (CGScope)in->extra.imm, in->loc));
+ return;
+ case CG_IR_BREAK_TO:
+ t->break_to(t, replay_scope(r, (CGScope)in->extra.imm, in->loc));
+ return;
+ case CG_IR_CONTINUE_TO:
+ t->continue_to(t, replay_scope(r, (CGScope)in->extra.imm, in->loc));
+ return;
+ case CG_IR_ALLOCA:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->alloca_(t, ops[0], ops[1], (u32)in->extra.imm);
+ return;
+ case CG_IR_VA_START:
+ ops[0] = replay_operand(r, in->opnds[0], in->loc);
+ t->va_start_(t, ops[0]);
+ return;
+ case CG_IR_VA_ARG:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->va_arg_(t, ops[0], ops[1], (CfreeCgTypeId)in->extra.imm);
+ return;
+ case CG_IR_VA_END:
+ ops[0] = replay_operand(r, in->opnds[0], in->loc);
+ t->va_end_(t, ops[0]);
+ return;
+ case CG_IR_VA_COPY:
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->va_copy_(t, ops[0], ops[1]);
+ return;
+ case CG_IR_ATOMIC_LOAD: {
+ const CgIrAtomicAux* aux = (const CgIrAtomicAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->atomic_load(t, ops[0], ops[1], aux->mem, aux->order);
+ return;
}
- set_cur(o, aux->if_end_block);
- }
-}
-
-static void w_break_to(CGTarget* t, CGScope s) {
- OptImpl* o = impl_of(t);
- IRScopeAux* aux = scope_lookup(o, s);
- Inst* in = rec(o, IR_BREAK_TO);
- in->extra.imm = (i64)s;
- Block* cb = &o->f->blocks[o->cur];
- cb->succ[0] = aux->loop_break_block;
- cb->nsucc = 1;
- after_terminator(o);
-}
-
-static void w_continue_to(CGTarget* t, CGScope s) {
- OptImpl* o = impl_of(t);
- IRScopeAux* aux = scope_lookup(o, s);
- Inst* in = rec(o, IR_CONTINUE_TO);
- in->extra.imm = (i64)s;
- Block* cb = &o->f->blocks[o->cur];
- cb->succ[0] = aux->loop_continue_block;
- cb->nsucc = 1;
- after_terminator(o);
-}
-
-/* ---- data movement ---- */
-
-static void w_load_imm(CGTarget* t, Operand dst, i64 imm) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_LOAD_IMM);
- Operand ops[1] = {dst};
- in->opnds = dup_opnds(o->f, ops, 1);
- in->nopnds = 1;
- in->extra.imm = imm;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_LOAD_CONST);
- Operand ops[1] = {dst};
- in->opnds = dup_opnds(o->f, ops, 1);
- in->nopnds = 1;
- in->extra.cbytes = cb;
- if (cb.size) {
- u8* bytes = arena_array(o->f->arena, u8, cb.size);
- memcpy(bytes, cb.bytes, cb.size);
- in->extra.cbytes.bytes = bytes;
- }
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_copy(CGTarget* t, Operand dst, Operand src) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_COPY);
- Operand ops[2] = {dst, src};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_load(CGTarget* t, Operand dst, Operand addr, MemAccess m) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_LOAD);
- Operand ops[2] = {dst, addr};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- in->extra.mem = m;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_store(CGTarget* t, Operand addr, Operand src, MemAccess m) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_STORE);
- Operand ops[2] = {addr, src};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- in->extra.mem = m;
-}
-
-static void w_addr_of(CGTarget* t, Operand dst, Operand lv) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_ADDR_OF);
- Operand ops[2] = {dst, lv};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_TLS_ADDR_OF);
- Operand ops[1] = {dst};
- in->opnds = dup_opnds(o->f, ops, 1);
- in->nopnds = 1;
- IRTlsAux* aux = arena_znew(o->f->arena, IRTlsAux);
- aux->sym = sym;
- aux->addend = addend;
- in->extra.aux = aux;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_copy_bytes(CGTarget* t, Operand dst, Operand src,
- AggregateAccess agg) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_AGG_COPY);
- Operand ops[2] = {dst, src};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- IRAggAux* aux = arena_znew(o->f->arena, IRAggAux);
- aux->access = agg;
- in->extra.aux = aux;
-}
-
-static void w_set_bytes(CGTarget* t, Operand dst, Operand byte,
- AggregateAccess agg) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_AGG_SET);
- Operand ops[2] = {dst, byte};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- IRAggAux* aux = arena_znew(o->f->arena, IRAggAux);
- aux->access = agg;
- in->extra.aux = aux;
-}
-
-static void w_bitfield_load(CGTarget* t, Operand dst, Operand record,
- BitFieldAccess bf) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_BITFIELD_LOAD);
- Operand ops[2] = {dst, record};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- IRBitFieldAux* aux = arena_znew(o->f->arena, IRBitFieldAux);
- aux->access = bf;
- in->extra.aux = aux;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_bitfield_store(CGTarget* t, Operand record, Operand src,
- BitFieldAccess bf) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_BITFIELD_STORE);
- Operand ops[2] = {record, src};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- IRBitFieldAux* aux = arena_znew(o->f->arena, IRBitFieldAux);
- aux->access = bf;
- in->extra.aux = aux;
-}
-
-/* ---- arithmetic / cmp / convert ---- */
-
-static void w_binop(CGTarget* t, BinOp op, Operand dst, Operand a, Operand b) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_BINOP);
- Operand ops[3] = {dst, a, b};
- in->opnds = dup_opnds(o->f, ops, 3);
- in->nopnds = 3;
- in->extra.imm = (i64)op;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_unop(CGTarget* t, UnOp op, Operand dst, Operand a) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_UNOP);
- Operand ops[2] = {dst, a};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- in->extra.imm = (i64)op;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_CMP);
- Operand ops[3] = {dst, a, b};
- in->opnds = dup_opnds(o->f, ops, 3);
- in->nopnds = 3;
- in->extra.imm = (i64)op;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_CONVERT);
- Operand ops[2] = {dst, src};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- in->extra.imm = (i64)k;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-/* ---- calls / return ---- */
-
-static CGABIPart* dup_parts(Arena* a, const CGABIPart* src, u32 n) {
- if (!n) return NULL;
- CGABIPart* dst = arena_array(a, CGABIPart, n);
- memcpy(dst, src, sizeof(CGABIPart) * n);
- return dst;
-}
-
-static void w_call(CGTarget* t, const CGCallDesc* d) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_CALL);
- IRCallAux* aux = arena_znew(o->f->arena, IRCallAux);
- ensure_operand(o->f, &d->callee);
- aux->desc = *d;
- if (d->nargs) {
- CGABIValue* args = arena_array(o->f->arena, CGABIValue, d->nargs);
- for (u32 i = 0; i < d->nargs; ++i) {
- ensure_abivalue(o->f, &d->args[i]);
- args[i] = d->args[i];
- args[i].parts =
- dup_parts(o->f->arena, d->args[i].parts, d->args[i].nparts);
+ case CG_IR_ATOMIC_STORE: {
+ const CgIrAtomicAux* aux = (const CgIrAtomicAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 2, in->loc);
+ t->atomic_store(t, ops[0], ops[1], aux->mem, aux->order);
+ return;
}
- aux->desc.args = args;
- }
- ensure_abivalue(o->f, &d->ret);
- aux->desc.ret = d->ret;
- aux->desc.ret.parts = dup_parts(o->f->arena, d->ret.parts, d->ret.nparts);
- in->extra.aux = aux;
- in->type = d->fn_type;
- if (d->ret.storage.kind == OPK_REG) {
- set_preg_def(o->f, in, o->cur, (PReg)d->ret.storage.v.reg, d->ret.type);
- }
-}
-
-static const char* w_tail_call_unrealizable_reason(CGTarget* t,
- const CGCallDesc* d) {
- (void)t;
- (void)d;
- /* The recorder accepts every tail call. Realizability depends on the laid-
- * out frame, known only when the call is emitted onto the real backend
- * during replay (pass_emit). There the real target's hook is consulted and
- * the call is emitted as a tail, falls back to an ordinary call+return
- * (ALLOWED), or diagnosed (MUST). */
- return NULL;
-}
-
-static void w_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->plan_call)
- wr->plan_call(wr, d, out);
- else
- memset(out, 0, sizeof *out);
-}
-
-static void w_emit_call_plan(CGTarget* t, const CGCallPlan* p) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->emit_call_plan) wr->emit_call_plan(wr, p);
-}
-
-static void w_load_call_arg(CGTarget* t, Operand dst, const CGCallPlanMove* m) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->load_call_arg) wr->load_call_arg(wr, dst, m);
-}
-
-static void w_store_call_arg(CGTarget* t, const CGCallPlanMove* m) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->store_call_arg) wr->store_call_arg(wr, m);
-}
-
-static void w_store_call_ret(CGTarget* t, const CGCallPlanRet* ret,
- Operand src) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->store_call_ret) wr->store_call_ret(wr, ret, src);
-}
-
-static void w_ret(CGTarget* t, const CGABIValue* v) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_RET);
- IRRetAux* aux = arena_znew(o->f->arena, IRRetAux);
- if (v) {
- ensure_abivalue(o->f, v);
- aux->present = 1;
- aux->val = *v;
- aux->val.parts = dup_parts(o->f->arena, v->parts, v->nparts);
- }
- in->extra.aux = aux;
- Block* cb = &o->f->blocks[o->cur];
- cb->nsucc = 0;
- after_terminator(o);
-}
-
-/* ---- alloca / variadics / atomics / fence / intrinsic ---- */
-
-static void w_alloca_(CGTarget* t, Operand dst, Operand size, u32 align) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_ALLOCA);
- Operand ops[2] = {dst, size};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- in->extra.imm = (i64)align;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_va_start_(CGTarget* t, Operand ap) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_VA_START);
- Operand ops[1] = {ap};
- in->opnds = dup_opnds(o->f, ops, 1);
- in->nopnds = 1;
-}
-
-static void w_va_arg_(CGTarget* t, Operand dst, Operand ap, CfreeCgTypeId ty) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_VA_ARG);
- Operand ops[2] = {dst, ap};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- in->extra.aux = (void*)(uintptr_t)ty;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_va_end_(CGTarget* t, Operand ap) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_VA_END);
- Operand ops[1] = {ap};
- in->opnds = dup_opnds(o->f, ops, 1);
- in->nopnds = 1;
-}
-
-static void w_va_copy_(CGTarget* t, Operand dst, Operand src) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_VA_COPY);
- Operand ops[2] = {dst, src};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
-}
-
-static void w_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess m,
- MemOrder mo) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_ATOMIC_LOAD);
- Operand ops[2] = {dst, addr};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- IRAtomicAux* aux = arena_znew(o->f->arena, IRAtomicAux);
- aux->mem = m;
- aux->mo = mo;
- in->extra.aux = aux;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_atomic_store(CGTarget* t, Operand addr, Operand src, MemAccess m,
- MemOrder mo) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_ATOMIC_STORE);
- Operand ops[2] = {addr, src};
- in->opnds = dup_opnds(o->f, ops, 2);
- in->nopnds = 2;
- IRAtomicAux* aux = arena_znew(o->f->arena, IRAtomicAux);
- aux->mem = m;
- aux->mo = mo;
- in->extra.aux = aux;
-}
-
-static void w_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
- Operand val, MemAccess m, MemOrder mo) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_ATOMIC_RMW);
- Operand ops[3] = {dst, addr, val};
- in->opnds = dup_opnds(o->f, ops, 3);
- in->nopnds = 3;
- IRAtomicAux* aux = arena_znew(o->f->arena, IRAtomicAux);
- aux->mem = m;
- aux->mo = mo;
- aux->op = (u8)op;
- in->extra.aux = aux;
- if (dst.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)dst.v.reg, dst.type);
-}
-
-static void w_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
- Operand expected, Operand desired, MemAccess m,
- MemOrder s, MemOrder f) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_ATOMIC_CAS);
- Operand ops[5] = {prior, ok, addr, expected, desired};
- in->opnds = dup_opnds(o->f, ops, 5);
- in->nopnds = 5;
- IRCasAux* aux = arena_znew(o->f->arena, IRCasAux);
- aux->mem = m;
- aux->success = s;
- aux->failure = f;
- in->extra.aux = aux;
- if (prior.kind == OPK_REG)
- set_preg_def(o->f, in, o->cur, (PReg)prior.v.reg, prior.type);
- if (ok.kind == OPK_REG) {
- in->ndefs = 2;
- in->defs = arena_array(o->f->arena, Val, 2);
- in->defs[0] = (prior.kind == OPK_REG) ? (Val)prior.v.reg : VAL_NONE;
- in->defs[1] = (Val)ok.v.reg;
- }
-}
-
-static void w_fence(CGTarget* t, MemOrder mo) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_FENCE);
- in->extra.imm = (i64)mo;
-}
-
-static void w_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
- const Operand* args, u32 na) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_INTRINSIC);
- IRIntrinAux* aux = arena_znew(o->f->arena, IRIntrinAux);
- aux->kind = kind;
- aux->ndst = nd;
- aux->narg = na;
- aux->dsts = nd ? arena_array(o->f->arena, Operand, nd) : NULL;
- aux->args = na ? arena_array(o->f->arena, Operand, na) : NULL;
- if (nd) {
- memcpy(aux->dsts, dsts, sizeof(Operand) * nd);
- for (u32 i = 0; i < nd; ++i) ensure_operand(o->f, &aux->dsts[i]);
- }
- if (na) {
- memcpy(aux->args, args, sizeof(Operand) * na);
- for (u32 i = 0; i < na; ++i) ensure_operand(o->f, &aux->args[i]);
- }
- in->extra.aux = aux;
- if (nd == 1 && dsts[0].kind == OPK_REG) {
- set_preg_def(o->f, in, o->cur, (PReg)dsts[0].v.reg, dsts[0].type);
- } else if (nd > 1) {
- in->ndefs = nd;
- in->defs = arena_array(o->f->arena, Val, nd);
- for (u32 i = 0; i < nd; ++i) {
- in->defs[i] = (dsts[i].kind == OPK_REG) ? (Val)dsts[i].v.reg : VAL_NONE;
+ case CG_IR_ATOMIC_RMW: {
+ const CgIrAtomicAux* aux = (const CgIrAtomicAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 3, in->loc);
+ t->atomic_rmw(t, aux->op, ops[0], ops[1], ops[2], aux->mem, aux->order);
+ return;
}
- in->def = in->defs[0];
- in->type = dsts[0].type;
- }
- if (intrinsic_terminates(kind)) {
- Block* cb = &o->f->blocks[o->cur];
- cb->nsucc = 0;
- after_terminator(o);
- }
-}
-
-static void w_asm_block(CGTarget* t, const char* tmpl,
- const AsmConstraint* outs, u32 nout, Operand* out_ops,
- const AsmConstraint* ins, u32 nin,
- const Operand* in_ops, const Sym* clobbers, u32 nclob) {
- OptImpl* o = impl_of(t);
- Inst* in = rec(o, IR_ASM_BLOCK);
- IRAsmAux* aux = arena_znew(o->f->arena, IRAsmAux);
- /* Template strings reach us via the parser's interned string pool, which
- * outlives the CG/Opt arenas. Storing the pointer is safe; copy
- * defensively into the IR arena anyway so the IR is self-contained. */
- if (tmpl) {
- size_t tl = 0;
- while (tmpl[tl]) ++tl;
- aux->tmpl = arena_strdup(o->f->arena, tmpl, tl);
- } else {
- aux->tmpl = NULL;
- }
- aux->nout = nout;
- aux->nin = nin;
- aux->nclob = nclob;
- if (nout) {
- aux->outs = arena_array(o->f->arena, AsmConstraint, nout);
- memcpy(aux->outs, outs, nout * sizeof *outs);
- aux->out_ops = arena_array(o->f->arena, Operand, nout);
- memcpy(aux->out_ops, out_ops, nout * sizeof *out_ops);
- for (u32 i = 0; i < nout; ++i) ensure_operand(o->f, &aux->out_ops[i]);
- }
- if (nin) {
- aux->ins = arena_array(o->f->arena, AsmConstraint, nin);
- memcpy(aux->ins, ins, nin * sizeof *ins);
- aux->in_ops = arena_array(o->f->arena, Operand, nin);
- memcpy(aux->in_ops, in_ops, nin * sizeof *in_ops);
- for (u32 i = 0; i < nin; ++i) ensure_operand(o->f, &aux->in_ops[i]);
- }
- if (nclob) {
- aux->clobbers = arena_array(o->f->arena, Sym, nclob);
- memcpy(aux->clobbers, clobbers, nclob * sizeof *clobbers);
- }
- in->extra.aux = aux;
- if (nout) {
- in->ndefs = nout;
- in->defs = arena_array(o->f->arena, Val, nout);
- for (u32 i = 0; i < nout; ++i) {
- in->defs[i] =
- (out_ops[i].kind == OPK_REG) ? (Val)out_ops[i].v.reg : VAL_NONE;
+ case CG_IR_ATOMIC_CAS: {
+ const CgIrAtomicAux* aux = (const CgIrAtomicAux*)in->extra.aux;
+ replay_operands(r, ops, in->opnds, 5, in->loc);
+ t->atomic_cas(t, ops[0], ops[1], ops[2], ops[3], ops[4], aux->mem,
+ aux->order, aux->failure);
+ return;
}
- in->def = in->defs[0];
- in->type = out_ops[0].type;
- }
-}
-
-static void w_set_loc(CGTarget* t, SrcLoc loc) {
- OptImpl* o = impl_of(t);
- o->pending_loc = loc;
-}
-
-static u64 func_inst_count(Func* f) {
- u64 n = 0;
- if (!f) return 0;
- for (u32 b = 0; b < f->nblocks; ++b) n += f->blocks[b].ninsts;
- return n;
-}
-
-static int inst_spill_local(Func* f, const Inst* in, u32 op_idx) {
- FrameSlot fs;
- if (!f || !in || op_idx >= in->nopnds) return 0;
- if (in->opnds[op_idx].kind != OPK_LOCAL) return 0;
- fs = in->opnds[op_idx].v.frame_slot;
- return fs != FRAME_SLOT_NONE && fs <= f->nframe_slots &&
- f->frame_slots[fs - 1u].kind == FS_SPILL;
-}
-
-static u64 func_spill_alloc_count(Func* f) {
- u64 n = 0;
- if (!f || (!f->preg_locs && !f->preg_info)) return 0;
- for (PReg r = 1; r < opt_reg_count(f); ++r)
- if (opt_preg_alloc_kind(f, r) == OPT_ALLOC_SPILL) ++n;
- return n;
-}
-
-static u64 blocks_spill_load_count(Func* f, Block* blocks, u32 nblocks) {
- u64 n = 0;
- if (!f || !blocks) return 0;
- for (u32 b = 0; b < nblocks; ++b) {
- Block* bl = &blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op == IR_LOAD && inst_spill_local(f, in, 1)) ++n;
+ case CG_IR_FENCE:
+ t->fence(t, (MemOrder)in->extra.imm);
+ return;
+ case CG_IR_INTRINSIC: {
+ const CgIrIntrinsicAux* aux = (const CgIrIntrinsicAux*)in->extra.aux;
+ Operand* dsts =
+ arena_array(r->o->c->tu, Operand, aux->ndst ? aux->ndst : 1u);
+ Operand* args =
+ arena_array(r->o->c->tu, Operand, aux->narg ? aux->narg : 1u);
+ replay_operands(r, dsts, aux->dsts, aux->ndst, in->loc);
+ replay_operands(r, args, aux->args, aux->narg, in->loc);
+ t->intrinsic(t, aux->kind, dsts, aux->ndst, args, aux->narg);
+ return;
}
- }
- return n;
-}
-
-static u64 func_spill_load_count(Func* f) {
- if (!f) return 0;
- if (f->mir)
- return blocks_spill_load_count(f, f->mir->blocks, f->mir->nblocks);
- return blocks_spill_load_count(f, f->blocks, f->nblocks);
-}
-
-static u64 blocks_spill_store_count(Func* f, Block* blocks, u32 nblocks) {
- u64 n = 0;
- if (!f || !blocks) return 0;
- for (u32 b = 0; b < nblocks; ++b) {
- Block* bl = &blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op == IR_STORE && inst_spill_local(f, in, 0)) ++n;
+ case CG_IR_ASM_BLOCK: {
+ const CgIrAsmAux* aux = (const CgIrAsmAux*)in->extra.aux;
+ Operand* out_ops =
+ arena_array(r->o->c->tu, Operand, aux->nout ? aux->nout : 1u);
+ Operand* in_ops =
+ arena_array(r->o->c->tu, Operand, aux->nin ? aux->nin : 1u);
+ replay_operands(r, out_ops, aux->out_ops, aux->nout, in->loc);
+ replay_operands(r, in_ops, aux->in_ops, aux->nin, in->loc);
+ t->asm_block(t, aux->tmpl, aux->outs, aux->nout, out_ops, aux->ins,
+ aux->nin, in_ops, aux->clobbers, aux->nclob);
+ return;
}
+ case CG_IR_LOCAL_STATIC_DATA_BEGIN: {
+ const CgIrLocalStaticBeginAux* aux =
+ (const CgIrLocalStaticBeginAux*)in->extra.aux;
+ if (!t->local_static_data_begin ||
+ !t->local_static_data_begin(t, &aux->desc))
+ compiler_panic(r->o->c, in->loc,
+ "opt direct replay: local static data unsupported");
+ return;
+ }
+ case CG_IR_LOCAL_STATIC_DATA_WRITE: {
+ const CgIrLocalStaticWriteAux* aux =
+ (const CgIrLocalStaticWriteAux*)in->extra.aux;
+ t->local_static_data_write(t, aux->has_data ? aux->data : NULL, aux->len);
+ return;
+ }
+ case CG_IR_LOCAL_STATIC_DATA_LABEL_ADDR: {
+ const CgIrLocalStaticLabelAux* aux =
+ (const CgIrLocalStaticLabelAux*)in->extra.aux;
+ t->local_static_data_label_addr(t, replay_label(r, aux->target, in->loc),
+ aux->addend, aux->width,
+ aux->address_space);
+ return;
+ }
+ case CG_IR_LOCAL_STATIC_DATA_END:
+ t->local_static_data_end(t);
+ return;
+ }
+}
+
+static void opt_replay_cg_ir_direct(OptImpl* o, const CgIrFunc* f) {
+ OptReplay r;
+ memset(&r, 0, sizeof r);
+ r.o = o;
+ r.nlocals = f->nlocals;
+ r.local_map =
+ arena_zarray(o->c->tu, CGLocal, f->nlocals ? f->nlocals + 1u : 1u);
+ for (u32 i = 0; i < f->nlabels; ++i)
+ if (f->labels[i].id > r.nlabels) r.nlabels = f->labels[i].id;
+ r.label_map = arena_zarray(o->c->tu, Label, r.nlabels ? r.nlabels + 1u : 1u);
+ r.nscopes = f->nscopes;
+ r.scope_map =
+ arena_zarray(o->c->tu, CGScope, f->nscopes ? f->nscopes + 1u : 1u);
+
+ o->target->func_begin(o->target, &f->desc);
+ for (u32 i = 0; i < f->nlabels; ++i)
+ r.label_map[f->labels[i].id] = o->target->label_new(o->target);
+ for (u32 i = 0; i < f->nparams; ++i) {
+ const CgIrParam* p = &f->params[i];
+ r.local_map[p->local] = o->target->param(o->target, &p->desc);
+ }
+ for (u32 i = 0; i < f->nlocals; ++i) {
+ const CgIrLocal* l = &f->locals[i];
+ if (!r.local_map[l->id])
+ r.local_map[l->id] = o->target->local(o->target, &l->desc);
}
- return n;
+ for (u32 i = 0; i < f->ninsts; ++i) replay_inst(&r, &f->insts[i]);
+ o->target->func_end(o->target);
}
-static u64 func_spill_store_count(Func* f) {
- if (!f) return 0;
- if (f->mir)
- return blocks_spill_store_count(f, f->mir->blocks, f->mir->nblocks);
- return blocks_spill_store_count(f, f->blocks, f->nblocks);
+static void opt_dbg_dump(OptImpl* o, Func* f, const char* tag) {
+ extern char* getenv(const char*);
+ const char* s = getenv("CFREE_DUMP");
+ CfreeWriter* w = NULL;
+ size_t len = 0;
+ const uint8_t* bytes;
+ if (!s) return;
+ cfree_writer_mem(o->c->ctx->heap, &w);
+ opt_ir_dump(f, w);
+ bytes = cfree_writer_mem_bytes(w, &len);
+ compiler_panic(o->c, f->desc.loc, "DUMP %s:\n%.*s", tag, (int)len,
+ (const char*)bytes);
}
-static void opt_run_lowering_pipeline(OptImpl* o, const char* total_scope,
- int allow_live_range_split) {
- (void)allow_live_range_split;
- metrics_scope_begin(o->c, total_scope);
+static void opt_run_o1_native(OptImpl* o, Func* f) {
+ OptLiveInfo live;
+ OptLiveInfo regalloc_live;
+ if (!o->native)
+ compiler_panic(o->c, f ? f->desc.loc : (SrcLoc){0, 0, 0},
+ "O1 optimizer requires a native target");
+ opt_dbg_dump(o, f, "entry");
+
+ metrics_scope_begin(o->c, "opt.o1.total");
metrics_count(o->c, "opt.funcs", 1);
- metrics_count(o->c, "opt.blocks", o->f->nblocks);
- metrics_count(o->c, "opt.insts", func_inst_count(o->f));
- metrics_count(o->c, "opt.pregs", o->f->npregs);
+ metrics_count(o->c, "opt.blocks", f->nblocks);
+ metrics_count(o->c, "opt.pregs", f->npregs);
+
metrics_scope_begin(o->c, "opt.cfg.build_1");
- opt_build_cfg(o->f);
+ opt_build_cfg(f);
metrics_scope_end(o->c, "opt.cfg.build_1");
metrics_scope_begin(o->c, "opt.cfg.jump_cleanup_cfg");
- opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
+ opt_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG);
metrics_scope_end(o->c, "opt.cfg.jump_cleanup_cfg");
metrics_scope_begin(o->c, "opt.cfg.build_2");
- opt_build_cfg(o->f);
+ opt_build_cfg(f);
metrics_scope_end(o->c, "opt.cfg.build_2");
metrics_scope_begin(o->c, "opt.cfg.simplify_local");
- opt_simplify_local(o->f);
+ opt_simplify_local(f);
metrics_scope_end(o->c, "opt.cfg.simplify_local");
metrics_scope_begin(o->c, "opt.cfg.verify");
- opt_verify(o->f, "lowering-cfg");
+ opt_verify(f, "lowering-cfg");
metrics_scope_end(o->c, "opt.cfg.verify");
+
metrics_scope_begin(o->c, "opt.machinize");
- opt_machinize(o->f, o->target);
+ opt_machinize_native(f, o->native);
metrics_scope_end(o->c, "opt.machinize");
metrics_scope_begin(o->c, "opt.machinize.verify");
- opt_verify(o->f, "lowering-machinize");
+ opt_verify(f, "lowering-machinize");
metrics_scope_end(o->c, "opt.machinize.verify");
+
metrics_scope_begin(o->c, "opt.o1.addr_xform_pregs");
- opt_addr_xform_pregs(o->f);
+ opt_addr_xform_pregs(f);
metrics_scope_end(o->c, "opt.o1.addr_xform_pregs");
metrics_scope_begin(o->c, "opt.o1.addr_xform.verify");
- opt_verify(o->f, "o1-addr-xform");
+ opt_verify(f, "o1-addr-xform");
metrics_scope_end(o->c, "opt.o1.addr_xform.verify");
metrics_scope_begin(o->c, "opt.o1.promote_scalar_locals");
- opt_promote_scalar_locals(o->f);
+ opt_promote_scalar_locals(f);
metrics_scope_end(o->c, "opt.o1.promote_scalar_locals");
metrics_scope_begin(o->c, "opt.o1.promote_scalar.verify");
- opt_verify(o->f, "o1-promote-scalar");
+ opt_verify(f, "o1-promote-scalar");
metrics_scope_end(o->c, "opt.o1.promote_scalar.verify");
metrics_scope_begin(o->c, "opt.o1.addr_of_global_cse");
- opt_addr_of_global_cse(o->f);
+ opt_addr_of_global_cse(f);
metrics_scope_end(o->c, "opt.o1.addr_of_global_cse");
metrics_scope_begin(o->c, "opt.o1.addr_of_global.verify");
- opt_verify(o->f, "o1-addr-global-cse");
+ opt_verify(f, "o1-addr-global-cse");
metrics_scope_end(o->c, "opt.o1.addr_of_global.verify");
+
metrics_scope_begin(o->c, "opt.build_loop_tree");
- opt_build_loop_tree(o->f);
+ opt_build_loop_tree(f);
metrics_scope_end(o->c, "opt.build_loop_tree");
+
metrics_scope_begin(o->c, "opt.live_blocks.pre_dde");
- OptLiveInfo live;
- opt_live_blocks(o->f, &live);
- metrics_count(o->c, "opt.live_words", o->f->opt_live_words);
- metrics_count(o->c, "opt.live.blocks", o->f->nblocks);
- metrics_count(o->c, "opt.live.active_words", live.active_words);
- metrics_count(o->c, "opt.live.block_bytes", live.block_bytes);
- metrics_count(o->c, "opt.live.set_bit_scans", live.set_bit_scans);
- metrics_count(o->c, "opt.live.bitset_words_touched",
- live.bitset_words_touched);
- metrics_count(o->c, "opt.live.dataflow_iterations", live.dataflow_iterations);
- metrics_count(o->c, "opt.live.dataflow_block_visits",
- live.dataflow_block_visits);
- metrics_count(o->c, "opt.conflict_bytes", 0);
+ memset(&live, 0, sizeof live);
+ opt_live_blocks(f, &live);
+ metrics_count(o->c, "opt.live_words", f->opt_live_words);
metrics_scope_end(o->c, "opt.live_blocks.pre_dde");
metrics_scope_begin(o->c, "opt.dead_def_elim");
- opt_dead_def_elim_with_live(o->f, &live);
- metrics_count(o->c, "opt.dde.live_words_touched",
- o->f->opt_dde_live_words_touched);
+ opt_dead_def_elim_with_live(f, &live);
metrics_scope_end(o->c, "opt.dead_def_elim");
+
metrics_scope_begin(o->c, "opt.regalloc");
- OptLiveInfo regalloc_live;
- opt_regalloc_locations(o->f, 0, ®alloc_live);
- metrics_count(o->c, "opt.alloc.used_loc_words", o->f->opt_used_loc_words);
- metrics_count(o->c, "opt.alloc.hard_loc_words",
- o->f->opt_alloc_hard_loc_words);
- metrics_count(o->c, "opt.alloc.stack_loc_words",
- o->f->opt_alloc_stack_loc_words);
- metrics_count(o->c, "opt.alloc.stack_slots", o->f->opt_alloc_stack_slots);
- metrics_count(o->c, "opt.alloc.hard_point_visits",
- o->f->opt_alloc_hard_point_visits);
- metrics_count(o->c, "opt.alloc.stack_point_visits",
- o->f->opt_alloc_stack_point_visits);
- metrics_count(o->c, "opt.alloc.hard_word_ors", o->f->opt_alloc_hard_word_ors);
- metrics_count(o->c, "opt.alloc.stack_word_ors",
- o->f->opt_alloc_stack_word_ors);
- metrics_count(o->c, "opt.alloc.hard_mark_points",
- o->f->opt_alloc_hard_mark_points);
- metrics_count(o->c, "opt.alloc.stack_mark_points",
- o->f->opt_alloc_stack_mark_points);
- metrics_count(o->c, "opt.alloc.spills", func_spill_alloc_count(o->f));
+ memset(®alloc_live, 0, sizeof regalloc_live);
+ opt_regalloc_locations(f, 0, ®alloc_live);
metrics_scope_end(o->c, "opt.regalloc");
metrics_scope_begin(o->c, "opt.regalloc.verify");
- opt_analysis_invalidate(o->f, OPT_ANALYSIS_DEF_USE);
- opt_verify(o->f, "post-regalloc");
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ opt_verify(f, "post-regalloc");
metrics_scope_end(o->c, "opt.regalloc.verify");
+
metrics_scope_begin(o->c, "opt.lower_mir");
- opt_lower_to_mir(o->f, ®alloc_live);
- metrics_count(o->c, "opt.rewrite.reloads", func_spill_load_count(o->f));
- metrics_count(o->c, "opt.rewrite.stores", func_spill_store_count(o->f));
- metrics_count(o->c, "opt.rewrite.inserted_insts",
- o->f->opt_rewrite_inserted_insts);
- metrics_count(o->c, "opt.rewrite.live_words_touched",
- o->f->opt_rewrite_live_words_touched);
+ opt_lower_to_mir(f, ®alloc_live);
metrics_scope_end(o->c, "opt.lower_mir");
metrics_scope_begin(o->c, "opt.lower_mir.verify");
- opt_mir_verify(o->f, "lower-mir");
+ opt_mir_verify(f, "lower-mir");
metrics_scope_end(o->c, "opt.lower_mir.verify");
metrics_scope_begin(o->c, "opt.combine");
- opt_mir_combine(o->f);
+ opt_mir_combine(f);
metrics_scope_end(o->c, "opt.combine");
metrics_scope_begin(o->c, "opt.combine.verify");
- opt_mir_verify(o->f, "post-mir-combine");
+ opt_mir_verify(f, "post-mir-combine");
metrics_scope_end(o->c, "opt.combine.verify");
metrics_scope_begin(o->c, "opt.dce");
- opt_mir_dce(o->f);
+ opt_mir_dce(f);
metrics_scope_end(o->c, "opt.dce");
metrics_scope_begin(o->c, "opt.dce.verify");
- opt_mir_verify(o->f, "post-mir-dce");
+ opt_mir_verify(f, "post-mir-dce");
metrics_scope_end(o->c, "opt.dce.verify");
metrics_scope_begin(o->c, "opt.post_ra.jump_cleanup_cfg");
- opt_mir_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
+ opt_mir_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG);
metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_cfg");
metrics_scope_begin(o->c, "opt.post_ra.build_cfg");
- opt_mir_build_cfg(o->f);
+ opt_mir_build_cfg(f);
metrics_scope_end(o->c, "opt.post_ra.build_cfg");
metrics_scope_begin(o->c, "opt.post_ra.verify");
- opt_mir_verify(o->f, "post-mir-jump-cfg");
+ opt_mir_verify(f, "post-mir-jump-cfg");
metrics_scope_end(o->c, "opt.post_ra.verify");
metrics_scope_begin(o->c, "opt.post_ra.jump_cleanup_layout");
- opt_mir_jump_cleanup(o->f, OPT_JUMP_CLEANUP_LAYOUT);
+ opt_mir_jump_cleanup(f, OPT_JUMP_CLEANUP_LAYOUT);
metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_layout");
+
metrics_scope_begin(o->c, "opt.emit");
- opt_emit(o->c, o->f, o->target);
+ opt_emit_native(o->c, f, o->native);
metrics_scope_end(o->c, "opt.emit");
- metrics_scope_end(o->c, total_scope);
-}
-
-static void opt_run_o1_pipeline(OptImpl* o) {
- opt_run_lowering_pipeline(o, "opt.o1.total", 0);
-}
-
-static void opt_run_o2_pipeline(OptImpl* o) {
- metrics_scope_begin(o->c, "opt.o2.ssa");
- opt_cleanup(o->f);
- metrics_scope_end(o->c, "opt.o2.ssa");
- opt_run_lowering_pipeline(o, "opt.o2.total", 1);
-}
-
-static void opt_funcset_add(OptImpl* o, Func* f) {
- FuncSet* fs = &o->funcs;
- if (fs->nfuncs == fs->cap) {
- u32 ncap = fs->cap ? fs->cap * 2u : 8u;
- Func** nf = arena_array(o->c->tu, Func*, ncap);
- if (fs->funcs) memcpy(nf, fs->funcs, sizeof(Func*) * fs->nfuncs);
- fs->funcs = nf;
- fs->cap = ncap;
- }
- fs->funcs[fs->nfuncs++] = f;
-}
-
-static int func_requires_non_ssa_o2(Func* f) {
- if (!f) return 0;
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- switch ((IROp)bl->insts[i].op) {
- case IR_ASM_BLOCK:
- case IR_LOAD_LABEL_ADDR:
- case IR_INDIRECT_BRANCH:
- return 1;
- default:
- break;
+ metrics_scope_end(o->c, "opt.o1.total");
+}
+
+static void opt_dbg_dump_cg(OptImpl* o, const CgIrFunc* f) {
+ extern char* getenv(const char*);
+ StrBuf sb;
+ char buf[8192];
+ if (!getenv("CFREE_DUMPCG")) return;
+ strbuf_init(&sb, buf, sizeof buf);
+ for (u32 i = 0; i < f->ninsts; ++i) {
+ const CgIrInst* in = &f->insts[i];
+ strbuf_put_u64(&sb, in->op);
+ if (in->op == CG_IR_LOAD_IMM) {
+ strbuf_put_slice(&sb, SLICE_LIT("(imm="));
+ strbuf_put_u64(&sb, (u64)in->extra.imm);
+ strbuf_put_slice(&sb, SLICE_LIT(")"));
+ }
+ strbuf_put_slice(&sb, SLICE_LIT(" ["));
+ for (u32 j = 0; j < in->nopnds; ++j) {
+ const Operand* op = &in->opnds[j];
+ strbuf_put_slice(&sb, SLICE_LIT(" k"));
+ strbuf_put_u64(&sb, op->kind);
+ if (op->kind == OPK_LOCAL) {
+ strbuf_put_slice(&sb, SLICE_LIT(":L"));
+ strbuf_put_u64(&sb, op->v.local);
+ strbuf_put_slice(&sb, cg_type_is_ptr(o->c, op->type)
+ ? SLICE_LIT("(ptr)")
+ : SLICE_LIT("(val)"));
+ } else if (op->kind == OPK_INDIRECT) {
+ strbuf_put_slice(&sb, SLICE_LIT(":ind(b"));
+ strbuf_put_u64(&sb, op->v.ind.base);
+ strbuf_put_slice(&sb, SLICE_LIT(",i"));
+ strbuf_put_u64(&sb, op->v.ind.index);
+ strbuf_put_slice(&sb, SLICE_LIT(")"));
+ } else if (op->kind == OPK_IMM) {
+ strbuf_put_slice(&sb, SLICE_LIT(":i"));
+ strbuf_put_u64(&sb, (u64)op->v.imm);
}
}
+ strbuf_put_slice(&sb, SLICE_LIT(" ]\n"));
}
- return 0;
+ compiler_panic(o->c, f->desc.loc, "CGIR:\n%s", strbuf_cstr(&sb));
}
-/* ---- func_end: optionally run dry-run passes; replay; reset ---- */
-
-static void w_func_end(CGTarget* t) {
- OptImpl* o = impl_of(t);
- if (!o->f) return;
- opt_frame_home_addr_taken_locals(o->f);
-
- if (o->level >= 2) {
- opt_funcset_add(o, o->f);
- } else if (o->level >= 1) {
- opt_run_o1_pipeline(o);
- } else {
- opt_replay(o->c, o->f, o->target);
+static void opt_on_func(void* user, CgIrFunc* cg_func) {
+ OptImpl* o = (OptImpl*)user;
+ Func* f;
+ opt_dbg_dump_cg(o, cg_func);
+ if (opt_func_needs_direct_replay(o, cg_func)) {
+ opt_replay_cg_ir_direct(o, cg_func);
+ return;
}
- o->f = NULL;
- o->cur = 0;
+ metrics_scope_begin(o->c, "opt.o1.cg_ir_lower");
+ f = opt_func_from_cg_ir(o->c, cg_func);
+ metrics_scope_end(o->c, "opt.o1.cg_ir_lower");
+ if (o->dump_writer && f) opt_ir_dump(f, o->dump_writer);
+ opt_run_o1_native(o, f);
}
-/* ---- finalize / destroy ---- */
-
-static void w_finalize(CGTarget* t) {
- OptImpl* o = impl_of(t);
- CGTarget* wr = o->target;
- if (o->level >= 2 && o->funcs.nfuncs) {
- opt_inline(&o->funcs, 1);
- for (u32 i = 0; i < o->funcs.nfuncs; ++i) {
- o->f = o->funcs.funcs[i];
- if (!o->f) continue;
- if (!func_requires_non_ssa_o2(o->f))
- opt_run_o2_pipeline(o);
- else
- opt_run_o1_pipeline(o);
- }
- o->f = NULL;
- }
- if (wr->finalize) wr->finalize(wr);
+static void opt_on_finalize(void* user, const CgIrModule* module) {
+ OptImpl* o = (OptImpl*)user;
+ (void)module;
+ if (o->native && o->native->finalize) o->native->finalize(o->native);
}
-static void w_destroy(CGTarget* t) {
- CGTarget* wr = impl_of(t)->target;
- if (wr->destroy) wr->destroy(wr);
+static void opt_on_destroy(void* user) {
+ OptImpl* o = (OptImpl*)user;
+ if (o->native && o->native->destroy) o->native->destroy(o->native);
+ (void)o;
}
-/* ---- public dump-writer API ---- */
-
-void opt_set_dump_writer(CGTarget* t, Writer* w) {
- if (!t || t->func_begin != w_func_begin) return;
- impl_of(t)->dump_writer = w;
+static int opt_on_local_static_data_begin(void* user,
+ const CGLocalStaticDataDesc* desc) {
+ (void)desc;
+ OptImpl* o = (OptImpl*)user;
+ return o && o->target && o->target->local_static_data_begin &&
+ o->target->local_static_data_write &&
+ o->target->local_static_data_label_addr &&
+ o->target->local_static_data_end;
}
-/* ---- construction ---- */
-
-CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) {
- if (!target) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(c, loc, "opt_cgtarget_new: target is NULL");
- }
- if (level < 1 || level > 2) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(c, loc, "opt_cgtarget_new: level %d out of range [1, 2]",
- level);
- }
+static const char* opt_on_tail_call_unrealizable_reason(
+ void* user, const struct CGFuncDesc* caller, const CGCallDesc* call) {
+ (void)user;
+ (void)caller;
+ (void)call;
+ return NULL;
+}
- OptImpl* o = arena_new(c->tu, OptImpl);
- memset(o, 0, sizeof *o);
+CgTarget* opt_cgtarget_new(Compiler* c, CgTarget* target, int level) {
+ if (!target)
+ compiler_panic(c, (SrcLoc){0, 0, 0}, "opt_cgtarget_new: target is NULL");
+ if (level < 1)
+ compiler_panic(c, (SrcLoc){0, 0, 0},
+ "opt_cgtarget_new: level %d out of range", level);
+ OptImpl* o = arena_znew(c->tu, OptImpl);
o->c = c;
o->target = target;
- o->level = level;
- o->funcs.c = c;
- o->funcs.arena = c->tu;
-
- CGTarget* t = &o->base;
- t->c = c;
- t->obj = target->obj;
- t->mc = target->mc;
- t->debug = target->debug;
- t->virtual_regs = 1;
-
- t->func_begin = w_func_begin;
- t->func_end = w_func_end;
-
- t->frame_slot = w_frame_slot;
- t->local = w_local;
- t->local_addr = w_local_addr;
- t->param = w_param;
- t->spill_reg = w_spill_reg;
- t->reload_reg = w_reload_reg;
-
- t->get_allocable_regs = w_get_allocable_regs;
- t->get_phys_regs = w_get_phys_regs;
- t->get_scratch_regs = w_get_scratch_regs;
- t->is_caller_saved = w_is_caller_saved;
- t->call_clobber_mask = w_call_clobber_mask;
- t->return_reg_mask = w_return_reg_mask;
- t->callee_save_mask = w_callee_save_mask;
- t->plan_hard_regs = w_plan_hard_regs;
- t->reserve_hard_regs = w_reserve_hard_regs;
-
- t->label_new = w_label_new;
- t->label_place = w_label_place;
- t->cg_label_to_mc_label = w_cg_label_to_mc_label;
- t->jump = w_jump;
- t->cmp_branch = w_cmp_branch;
- t->switch_ = w_switch_;
- t->indirect_branch = w_indirect_branch;
- t->load_label_addr = w_load_label_addr;
-
- t->scope_begin = w_scope_begin;
- t->scope_else = w_scope_else;
- t->scope_end = w_scope_end;
- t->break_to = w_break_to;
- t->continue_to = w_continue_to;
-
- t->load_imm = w_load_imm;
- t->load_const = w_load_const;
- t->copy = w_copy;
- t->load = w_load;
- t->store = w_store;
- t->addr_of = w_addr_of;
- t->tls_addr_of = w_tls_addr_of;
- t->copy_bytes = w_copy_bytes;
- t->set_bytes = w_set_bytes;
- t->bitfield_load = w_bitfield_load;
- t->bitfield_store = w_bitfield_store;
-
- t->binop = w_binop;
- t->unop = w_unop;
- t->cmp = w_cmp;
- t->convert = w_convert;
-
- t->call = w_call;
- t->tail_call_unrealizable_reason = w_tail_call_unrealizable_reason;
- t->plan_call = w_plan_call;
- t->load_call_arg = w_load_call_arg;
- t->store_call_arg = w_store_call_arg;
- t->store_call_ret = w_store_call_ret;
- t->emit_call_plan = w_emit_call_plan;
- t->ret = w_ret;
-
- t->alloca_ = w_alloca_;
- t->va_start_ = w_va_start_;
- t->va_arg_ = w_va_arg_;
- t->va_end_ = w_va_end_;
- t->va_copy_ = w_va_copy_;
-
- t->atomic_load = w_atomic_load;
- t->atomic_store = w_atomic_store;
- t->atomic_rmw = w_atomic_rmw;
- t->atomic_cas = w_atomic_cas;
- t->fence = w_fence;
-
- t->intrinsic = w_intrinsic;
- t->asm_block = w_asm_block;
- t->resolve_reg_name = w_resolve_reg_name;
- /* Only expose file_scope_asm if the wrapped target overrides it. Native
- * targets leave it NULL so cfree_cg_file_scope_asm falls through to the
- * MC-based asm_parse path; setting the wrapper unconditionally would
- * swallow that NULL signal and silently drop file-scope asm. */
- if (target->file_scope_asm) t->file_scope_asm = w_file_scope_asm;
-
- t->set_loc = w_set_loc;
- t->finalize = w_finalize;
- t->destroy = w_destroy;
-
- return t;
+ o->native = native_direct_target_native(target);
+ o->level = 1;
+
+ CgIrRecorderConfig cfg;
+ memset(&cfg, 0, sizeof cfg);
+ cfg.func_recorded = opt_on_func;
+ cfg.finalize = opt_on_finalize;
+ cfg.destroy = opt_on_destroy;
+ cfg.local_static_data_begin = opt_on_local_static_data_begin;
+ cfg.tail_call_unrealizable_reason = opt_on_tail_call_unrealizable_reason;
+ cfg.user = o;
+ return cg_ir_recorder_new(c, target->obj, &cfg);
+}
+
+void opt_set_dump_writer(CgTarget* t, Writer* w) {
+ CgIrRecorder* rec = cg_ir_recorder_from_target(t);
+ (void)rec;
+ (void)w;
}
diff --git a/src/opt/pass_addr_fold.c b/src/opt/pass_addr_fold.c
@@ -0,0 +1,760 @@
+/* O1 HIR address-folding passes.
+ *
+ * Split out of pass_o2.c so the always-on O1 lowering pipeline does not depend
+ * on the (currently disabled) O2 pass translation unit. These three passes run
+ * at every opt_level >= 1:
+ *
+ * - opt_addr_xform_pregs: PReg-namespace addr-of-local folding
+ * - opt_promote_scalar_locals: promote non-escaped scalar locals to PRegs
+ * - opt_addr_of_global_cse: hoist/CSE duplicate ADDR_OF(global)
+ */
+#include <cfree/cg.h>
+#include <string.h>
+
+#include "opt/opt_internal.h"
+
+/* Private copy of the tiny inst-removal helper shared with pass_o2.c's
+ * opt_addr_xform. Both are file-local statics, so there is no link conflict. */
+static void addr_inst_remove(Inst* in) {
+ in->op = IR_NOP;
+ in->def = VAL_NONE;
+ in->ndefs = 0;
+ in->defs = NULL;
+ in->nopnds = 0;
+ in->opnds = NULL;
+}
+
+/* PReg-namespace variant of opt_addr_xform for the O1 pipeline (no SSA, no
+ * Val-keyed def-use chains). Scans the whole function once per candidate
+ * IR_ADDR_OF def to classify uses of its PReg result.
+ *
+ * Use classifications (see addr_xform_pregs_classify_use):
+ *
+ * OPF_ESCAPE The use is something other than a non-observable
+ * IR_LOAD/IR_STORE base operand. The IR_ADDR_OF cannot
+ * be folded; the local's address truly escapes.
+ * OPF_FOLD_LOCAL Zero-EA use: `OPK_INDIRECT(base=p, ofs=0, index=NONE)`
+ * in load/store base position. Foldable to OPK_LOCAL.
+ * OPF_FOLD_EA EA-shaped use: same load/store base position, but with
+ * nonzero `ofs` or `index != REG_NONE`. The EA must stay
+ * on the load/store (the operand layout for OPK_LOCAL
+ * cannot carry the EA today), so the operand is left
+ * alone and the IR_ADDR_OF def must stay alive to feed
+ * the OPK_INDIRECT base. The use is still recognized as
+ * "non-escape" for downstream analysis (e.g. scalar
+ * promotion's non-escape check).
+ *
+ * After classification: if any use is OPF_ESCAPE, no rewrite happens. If
+ * every use is OPF_FOLD_LOCAL, fold all uses to OPK_LOCAL and NOP the
+ * IR_ADDR_OF. If a mix of OPF_FOLD_LOCAL and OPF_FOLD_EA, fold the
+ * zero-EA uses but keep the IR_ADDR_OF alive for the EA-shaped uses. */
+
+typedef enum AddrXformUseClass {
+ OPF_ESCAPE = 0,
+ OPF_FOLD_LOCAL = 1,
+ OPF_FOLD_EA = 2,
+} AddrXformUseClass;
+
+static int addr_xform_pregs_main_op_position_ok(Inst* in, u32 op_idx) {
+ if ((IROp)in->op != IR_LOAD && (IROp)in->op != IR_STORE) return 0;
+ if (opt_mem_observable(&in->extra.mem)) return 0;
+ if ((IROp)in->op == IR_LOAD && op_idx != 1u) return 0;
+ if ((IROp)in->op == IR_STORE && op_idx != 0u) return 0;
+ return 1;
+}
+
+static AddrXformUseClass addr_xform_pregs_classify_use(Inst* in, Operand* op,
+ u32 op_idx) {
+ if (op->kind != OPK_INDIRECT) return OPF_ESCAPE;
+ if (!addr_xform_pregs_main_op_position_ok(in, op_idx)) return OPF_ESCAPE;
+ if (op->v.ind.ofs == 0 && op->v.ind.index == (Reg)REG_NONE)
+ return OPF_FOLD_LOCAL;
+ return OPF_FOLD_EA;
+}
+
+static int addr_xform_pregs_op_uses(const Operand* op, PReg p) {
+ if (!op) return 0;
+ if (op->kind == OPK_REG && (PReg)op->v.reg == p) return 1;
+ if (op->kind == OPK_INDIRECT) {
+ if ((PReg)op->v.ind.base == p) return 1;
+ if (op->v.ind.index != (Reg)REG_NONE && (PReg)op->v.ind.index == p)
+ return 1;
+ }
+ return 0;
+}
+
+static int addr_xform_pregs_abivalue_uses(const CGABIValue* v, PReg p) {
+ if (!v) return 0;
+ if (addr_xform_pregs_op_uses(&v->storage, p)) return 1;
+ for (u32 i = 0; i < v->nparts; ++i)
+ if (addr_xform_pregs_op_uses((const Operand*)&v->parts[i].op, p)) return 1;
+ return 0;
+}
+
+static int addr_xform_pregs_aux_uses(Inst* in, PReg p) {
+ switch ((IROp)in->op) {
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) return 0;
+ if (aux->use_plan_replay) {
+ if (addr_xform_pregs_op_uses(&aux->plan.callee, p)) return 1;
+ for (u32 i = 0; i < aux->plan.nargs; ++i)
+ if (addr_xform_pregs_op_uses(&aux->plan.args[i].src, p)) return 1;
+ for (u32 i = 0; i < aux->plan.nrets; ++i)
+ if (addr_xform_pregs_op_uses(&aux->plan.rets[i].dst, p)) return 1;
+ } else {
+ if (addr_xform_pregs_op_uses(&aux->desc.callee, p)) return 1;
+ for (u32 i = 0; i < aux->desc.nargs; ++i)
+ if (addr_xform_pregs_abivalue_uses(
+ (const CGABIValue*)&aux->desc.args[i], p))
+ return 1;
+ if (addr_xform_pregs_abivalue_uses(&aux->desc.ret, p)) return 1;
+ }
+ return 0;
+ }
+ case IR_RET: {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ if (!aux || !aux->present) return 0;
+ return addr_xform_pregs_abivalue_uses(&aux->val, p);
+ }
+ case IR_SCOPE_BEGIN: {
+ IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
+ if (!aux) return 0;
+ return addr_xform_pregs_op_uses(&aux->desc.cond, p);
+ }
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) return 0;
+ for (u32 i = 0; i < aux->nin; ++i)
+ if (addr_xform_pregs_op_uses(&aux->in_ops[i], p)) return 1;
+ for (u32 i = 0; i < aux->nout; ++i)
+ if (addr_xform_pregs_op_uses(&aux->out_ops[i], p)) return 1;
+ return 0;
+ }
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) return 0;
+ for (u32 i = 0; i < aux->narg; ++i)
+ if (addr_xform_pregs_op_uses(&aux->args[i], p)) return 1;
+ for (u32 i = 0; i < aux->ndst; ++i)
+ if (addr_xform_pregs_op_uses(&aux->dsts[i], p)) return 1;
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+/* Returns nonzero if every use of `p` is foldable (OPF_FOLD_LOCAL or
+ * OPF_FOLD_EA) and at least one use exists. *out_has_ea is set to 1 if any
+ * use was OPF_FOLD_EA; in that case the rewrite must keep the IR_ADDR_OF
+ * alive (the EA-shaped use still names p as the OPK_INDIRECT base). */
+static int addr_xform_pregs_classify(Func* f, PReg p, Inst* def_inst,
+ int* out_has_ea) {
+ int has_foldable_use = 0;
+ int has_ea = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if (in == def_inst) continue;
+ for (u32 o = 0; o < in->nopnds; ++o) {
+ Operand* op = &in->opnds[o];
+ if (!addr_xform_pregs_op_uses(op, p)) continue;
+ AddrXformUseClass uc = addr_xform_pregs_classify_use(in, op, o);
+ if (uc == OPF_ESCAPE) return 0;
+ has_foldable_use = 1;
+ if (uc == OPF_FOLD_EA) has_ea = 1;
+ }
+ if (addr_xform_pregs_aux_uses(in, p)) return 0;
+ }
+ }
+ if (out_has_ea) *out_has_ea = has_ea;
+ return has_foldable_use;
+}
+
+void opt_addr_xform_pregs(Func* f) {
+ if (!f || f->opt_reg_ssa || f->opt_rewritten) return;
+ int changed = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op != IR_ADDR_OF) continue;
+ if (in->nopnds < 2) continue;
+ if (in->opnds[0].kind != OPK_REG) continue;
+ if (in->opnds[1].kind != OPK_LOCAL) continue;
+ PReg p = (PReg)in->opnds[0].v.reg;
+ if (!opt_reg_valid(f, p)) continue;
+ int has_ea = 0;
+ if (!addr_xform_pregs_classify(f, p, in, &has_ea)) continue;
+ Operand local = in->opnds[1];
+ /* Fold every zero-EA use of p to OPK_LOCAL. EA-shaped uses are left
+ * as OPK_INDIRECT(base=p, ofs, index, log2_scale) so the EA stays on
+ * the load/store; the IR_ADDR_OF def must survive to feed them. */
+ for (u32 bb = 0; bb < f->nblocks; ++bb) {
+ Block* rb = &f->blocks[bb];
+ for (u32 ii = 0; ii < rb->ninsts; ++ii) {
+ Inst* use = &rb->insts[ii];
+ if (use == in) continue;
+ for (u32 o = 0; o < use->nopnds; ++o) {
+ Operand* op = &use->opnds[o];
+ if (op->kind != OPK_INDIRECT) continue;
+ if ((PReg)op->v.ind.base != p) continue;
+ if (op->v.ind.ofs != 0 || op->v.ind.index != (Reg)REG_NONE)
+ continue; /* EA-shaped; leave alone */
+ Operand folded = local;
+ folded.type =
+ use->extra.mem.type ? use->extra.mem.type : local.type;
+ *op = folded;
+ }
+ }
+ }
+ if (!has_ea) addr_inst_remove(in);
+ changed = 1;
+ }
+ }
+ /* After folding, walk all frame slots and clear FSF_ADDR_TAKEN on any
+ * slot whose surviving IR_ADDR_OF defs (if any) have all been retired.
+ * The frontend-set ADDR_TAKEN flag is conservative; if we proved the
+ * address no longer escapes, downstream passes (opt_promote_scalar_locals)
+ * can take advantage of the actual non-escape state. */
+ if (changed) {
+ u8* still_taken =
+ arena_zarray(f->arena, u8, f->nframe_slots ? f->nframe_slots : 1u);
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op != IR_ADDR_OF) continue;
+ if (in->nopnds < 2 || in->opnds[1].kind != OPK_LOCAL) continue;
+ FrameSlot slot = in->opnds[1].v.frame_slot;
+ if (slot && slot <= f->nframe_slots) still_taken[slot - 1u] = 1;
+ }
+ }
+ for (u32 s = 0; s < f->nframe_slots; ++s) {
+ if (!still_taken[s]) f->frame_slots[s].flags &= (u16)~FSF_ADDR_TAKEN;
+ }
+ }
+ if (changed)
+ opt_analysis_invalidate(
+ f, OPT_ANALYSIS_DEF_USE | OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
+}
+
+/* Scalar local promotion for the O1 pipeline. Runs after
+ * `opt_addr_xform_pregs` has folded zero-EA `OPK_INDIRECT(p)` uses to
+ * `OPK_LOCAL(slot)` and retired non-escaping `IR_ADDR_OF` defs. For each
+ * frame slot that is now only referenced as the base of matching-type,
+ * non-observable `IR_LOAD`/`IR_STORE`, the slot is replaced by a fresh
+ * mutable PReg: each store becomes `IR_COPY P_slot, src` (or `IR_LOAD_IMM`
+ * for an immediate source), each load becomes `IR_COPY dst, P_slot`. The
+ * slot becomes unreferenced and the backend drops it from the frame.
+ *
+ * A mutable PReg in `-O1` IR has the same data-flow semantics as a named
+ * memory cell that does not escape (multiple defs, multiple uses, value at
+ * a use comes from whichever def reaches it via CFG edges). No phis are
+ * required because the IR model has no phis; PReg flow becomes hard-reg
+ * flow after regalloc, and regalloc already handles it.
+ *
+ * Conditions for promotion (per slot):
+ *
+ * 1. Slot kind is FS_LOCAL (real locals, not spills, sret, alloca).
+ * 2. Slot has no FSF_ADDR_TAKEN, FSF_VOLATILE flag (after
+ * `opt_addr_xform_pregs` has cleared the conservative ADDR_TAKEN
+ * flag for slots whose IR_ADDR_OF defs were all retired).
+ * 3. Slot's declared type is scalar (int, float, bool, ptr, enum).
+ * 4. Every appearance of `OPK_LOCAL(slot)` in any instruction operand is
+ * either:
+ * - `IR_LOAD.opnds[1]` with matching `access.type == slot.type`,
+ * no observable mem flags, dst is OPK_REG;
+ * - `IR_STORE.opnds[0]` with matching `access.type == slot.type`,
+ * no observable mem flags, src is OPK_REG or OPK_IMM.
+ * 5. Slot does not appear in any aux operand position (calls, asm, etc.)
+ * or as an OPK_LOCAL anywhere else (e.g., a surviving IR_ADDR_OF).
+ *
+ * Param-slot case: FS_PARAM slots are excluded. The backend prologue is
+ * responsible for moving the ABI-incoming hard reg into the slot, and that
+ * move is not visible in the IR (there is no `IR_STORE OPK_LOCAL(slot)` to
+ * rewrite). At O1 the wrapper already places scalar params in REG storage
+ * when the frontend does not force a memory home, so the param's value
+ * arrives in a PReg without needing this pass. If a future scheme records
+ * the entry-move as a synthetic IR_STORE OPK_LOCAL(slot), this pass would
+ * promote it the same way it promotes any other store-to-slot. */
+
+static int promote_local_type_is_scalar(Func* f, CfreeCgTypeId ty) {
+ if (!ty) return 0;
+ CfreeCgTypeKind kind = cfree_cg_type_kind((CfreeCompiler*)f->c, ty);
+ switch (kind) {
+ case CFREE_CG_TYPE_BOOL:
+ case CFREE_CG_TYPE_INT:
+ case CFREE_CG_TYPE_FLOAT:
+ case CFREE_CG_TYPE_PTR:
+ case CFREE_CG_TYPE_ENUM:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int promote_op_uses_slot(const Operand* op, FrameSlot slot) {
+ return op && op->kind == OPK_LOCAL && op->v.frame_slot == slot;
+}
+
+static int promote_abivalue_uses_slot(const CGABIValue* v, FrameSlot slot) {
+ if (!v) return 0;
+ if (promote_op_uses_slot(&v->storage, slot)) return 1;
+ for (u32 i = 0; i < v->nparts; ++i)
+ if (promote_op_uses_slot((const Operand*)&v->parts[i].op, slot)) return 1;
+ return 0;
+}
+
+static int promote_aux_uses_slot(const Inst* in, FrameSlot slot) {
+ switch ((IROp)in->op) {
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) return 0;
+ if (aux->use_plan_replay) {
+ if (promote_op_uses_slot(&aux->plan.callee, slot)) return 1;
+ for (u32 i = 0; i < aux->plan.nargs; ++i)
+ if (promote_op_uses_slot(&aux->plan.args[i].src, slot)) return 1;
+ for (u32 i = 0; i < aux->plan.nrets; ++i)
+ if (promote_op_uses_slot(&aux->plan.rets[i].dst, slot)) return 1;
+ } else {
+ if (promote_op_uses_slot(&aux->desc.callee, slot)) return 1;
+ for (u32 i = 0; i < aux->desc.nargs; ++i)
+ if (promote_abivalue_uses_slot((const CGABIValue*)&aux->desc.args[i],
+ slot))
+ return 1;
+ if (promote_abivalue_uses_slot(&aux->desc.ret, slot)) return 1;
+ }
+ return 0;
+ }
+ case IR_RET: {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ if (!aux || !aux->present) return 0;
+ return promote_abivalue_uses_slot(&aux->val, slot);
+ }
+ case IR_SCOPE_BEGIN: {
+ IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
+ if (!aux) return 0;
+ return promote_op_uses_slot(&aux->desc.cond, slot);
+ }
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) return 0;
+ for (u32 i = 0; i < aux->nin; ++i)
+ if (promote_op_uses_slot(&aux->in_ops[i], slot)) return 1;
+ for (u32 i = 0; i < aux->nout; ++i)
+ if (promote_op_uses_slot(&aux->out_ops[i], slot)) return 1;
+ return 0;
+ }
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) return 0;
+ for (u32 i = 0; i < aux->narg; ++i)
+ if (promote_op_uses_slot(&aux->args[i], slot)) return 1;
+ for (u32 i = 0; i < aux->ndst; ++i)
+ if (promote_op_uses_slot(&aux->dsts[i], slot)) return 1;
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+/* Per-inst check. Returns:
+ * 1 = "instruction touches slot in a promotable position" (load/store base).
+ * 0 = "instruction does not touch slot at all".
+ * -1 = "instruction touches slot in a non-promotable way" (e.g., wrong
+ * operand position, type mismatch, observable flags, aux use). */
+static int promote_inst_classify(const Inst* in, FrameSlot slot,
+ CfreeCgTypeId slot_ty) {
+ int touched = 0;
+ /* IR_LOAD: opnds[0]=dst REG, opnds[1]=addr (allowed: OPK_LOCAL slot). */
+ if ((IROp)in->op == IR_LOAD) {
+ if (in->nopnds >= 2 && promote_op_uses_slot(&in->opnds[1], slot)) {
+ if (opt_mem_observable(&in->extra.mem)) return -1;
+ if (in->opnds[0].kind != OPK_REG) return -1;
+ CfreeCgTypeId at = in->extra.mem.type;
+ if (at && at != slot_ty) return -1;
+ touched = 1;
+ }
+ /* opnds[0] is the dst REG — never OPK_LOCAL by construction. */
+ if (in->nopnds >= 1 && promote_op_uses_slot(&in->opnds[0], slot)) return -1;
+ } else if ((IROp)in->op == IR_STORE) {
+ if (in->nopnds >= 1 && promote_op_uses_slot(&in->opnds[0], slot)) {
+ if (opt_mem_observable(&in->extra.mem)) return -1;
+ if (in->nopnds < 2) return -1;
+ Operand* src = &in->opnds[1];
+ if (src->kind != OPK_REG && src->kind != OPK_IMM) return -1;
+ CfreeCgTypeId at = in->extra.mem.type;
+ if (at && at != slot_ty) return -1;
+ touched = 1;
+ }
+ /* opnds[1] is the src value — should never be OPK_LOCAL for a scalar. */
+ if (in->nopnds >= 2 && promote_op_uses_slot(&in->opnds[1], slot)) return -1;
+ } else {
+ /* Any other instruction with an OPK_LOCAL(slot) operand blocks promotion.
+ */
+ for (u32 o = 0; o < in->nopnds; ++o)
+ if (promote_op_uses_slot(&in->opnds[o], slot)) return -1;
+ }
+ if (promote_aux_uses_slot(in, slot)) return -1;
+ return touched;
+}
+
+/* Rewrite an `IR_STORE OPK_LOCAL(slot), src` into a PReg def. If src is
+ * OPK_IMM, emit IR_LOAD_IMM into preg; otherwise emit IR_COPY. */
+static void promote_rewrite_store(Func* f, Inst* in, PReg preg,
+ CfreeCgTypeId ty, u8 cls) {
+ Operand src = in->opnds[1];
+ Operand* opnds = arena_array(f->arena, Operand, 2);
+ memset(&opnds[0], 0, sizeof opnds[0]);
+ opnds[0].kind = OPK_REG;
+ opnds[0].type = ty;
+ opnds[0].cls = cls;
+ opnds[0].v.reg = (Reg)preg;
+ in->type = ty;
+ in->def = (Val)preg;
+ if (src.kind == OPK_IMM) {
+ in->op = IR_LOAD_IMM;
+ in->nopnds = 1;
+ in->opnds = opnds;
+ in->extra.imm = src.v.imm;
+ } else {
+ opnds[1] = src;
+ opnds[1].type = ty;
+ opnds[1].cls = cls;
+ in->op = IR_COPY;
+ in->nopnds = 2;
+ in->opnds = opnds;
+ memset(&in->extra, 0, sizeof in->extra);
+ }
+}
+
+/* Rewrite an `IR_LOAD dst, OPK_LOCAL(slot)` into `IR_COPY dst, preg`. */
+static void promote_rewrite_load(Func* f, Inst* in, PReg preg, CfreeCgTypeId ty,
+ u8 cls) {
+ Operand dst = in->opnds[0];
+ Operand* opnds = arena_array(f->arena, Operand, 2);
+ opnds[0] = dst;
+ opnds[0].type = ty;
+ opnds[0].cls = cls;
+ memset(&opnds[1], 0, sizeof opnds[1]);
+ opnds[1].kind = OPK_REG;
+ opnds[1].type = ty;
+ opnds[1].cls = cls;
+ opnds[1].v.reg = (Reg)preg;
+ in->op = IR_COPY;
+ in->type = ty;
+ in->nopnds = 2;
+ in->opnds = opnds;
+ memset(&in->extra, 0, sizeof in->extra);
+}
+
+void opt_promote_scalar_locals(Func* f) {
+ if (!f || f->opt_reg_ssa || f->opt_rewritten) return;
+ if (!f->nframe_slots) return;
+ int changed = 0;
+ for (u32 sidx = 0; sidx < f->nframe_slots; ++sidx) {
+ IRFrameSlot* slot = &f->frame_slots[sidx];
+ FrameSlot id = slot->id;
+ /* FS_PARAM slots are owned by the backend prologue (which copies the
+ * ABI-incoming hard reg into the slot before any user IR runs); there
+ * is no IR-level store to rewrite. At O1, the wrapper already places
+ * scalar params in REG storage when the frontend does not force a
+ * memory home, so the FS_PARAM promotion path is normally a no-op.
+ * Only promote FS_LOCAL slots. */
+ if (slot->kind != FS_LOCAL) continue;
+ if (slot->flags & (FSF_ADDR_TAKEN | FSF_VOLATILE)) continue;
+ if (!promote_local_type_is_scalar(f, slot->type)) continue;
+ int touched_count = 0;
+ int rejected = 0;
+ for (u32 b = 0; b < f->nblocks && !rejected; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ int r = promote_inst_classify(in, id, slot->type);
+ if (r < 0) {
+ rejected = 1;
+ break;
+ }
+ touched_count += r;
+ }
+ }
+ if (rejected || !touched_count) continue;
+ u8 cls = (cfree_cg_type_kind((CfreeCompiler*)f->c, slot->type) ==
+ CFREE_CG_TYPE_FLOAT)
+ ? RC_FP
+ : RC_INT;
+ PReg preg = ir_alloc_preg(f, slot->type, cls);
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op == IR_LOAD && in->nopnds >= 2 &&
+ promote_op_uses_slot(&in->opnds[1], id)) {
+ promote_rewrite_load(f, in, preg, slot->type, cls);
+ } else if ((IROp)in->op == IR_STORE && in->nopnds >= 2 &&
+ promote_op_uses_slot(&in->opnds[0], id)) {
+ promote_rewrite_store(f, in, preg, slot->type, cls);
+ }
+ }
+ }
+ /* The frame slot is now unreferenced. Leave the slot table entry in
+ * place (compaction would require remapping every other slot id);
+ * the backend's frame layout pass simply omits unreferenced slots. */
+ changed = 1;
+ }
+ if (changed)
+ opt_analysis_invalidate(
+ f, OPT_ANALYSIS_DEF_USE | OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
+}
+
+/* CSE-style hoist of `IR_ADDR_OF(OPK_GLOBAL{sym, addend})` defs that appear
+ * more than once in the same function. The address is a link-time constant
+ * (TLS and IFUNC live on separate IROps), so all occurrences compute the
+ * same value; consolidating to a single entry-block def shrinks each loop
+ * body by the per-iter `adrp`/`add` pair the backend would otherwise re-emit.
+ *
+ * Implementation:
+ * - Walk all insts, group ADDR_OF defs by (sym, addend).
+ * - For each key with >= 2 defs: allocate a fresh PReg, materialize one
+ * IR_ADDR_OF in block 0 (after any IR_PARAM_DECL prologue), build a
+ * preg-remap from each original def-PReg to the new PReg, and NOP each
+ * original def.
+ * - One IR walk applies the remap to every operand `v.reg` /
+ * `v.ind.base`.
+ *
+ * Runs after opt_addr_xform_pregs so local addr-of has already been folded
+ * out; the remaining IR_ADDR_OF defs are global. */
+
+typedef struct AddrCseEntry {
+ ObjSymId sym;
+ i64 addend;
+ PReg canonical; /* freshly allocated PReg, def in block 0 */
+ CfreeCgTypeId addr_type; /* operand[0].type from the first def */
+ u8 cls; /* operand[0].cls from the first def */
+ u32 count; /* number of original ADDR_OF defs seen */
+} AddrCseEntry;
+
+static u32 addr_cse_find_or_add(AddrCseEntry** entries, u32* n, u32* cap,
+ Arena* arena, ObjSymId sym, i64 addend) {
+ for (u32 i = 0; i < *n; ++i) {
+ if ((*entries)[i].sym == sym && (*entries)[i].addend == addend) return i;
+ }
+ if (*n == *cap) {
+ u32 ncap = *cap ? *cap * 2u : 16u;
+ AddrCseEntry* nv = arena_array(arena, AddrCseEntry, ncap);
+ if (*entries) memcpy(nv, *entries, sizeof(AddrCseEntry) * (*n));
+ *entries = nv;
+ *cap = ncap;
+ }
+ u32 idx = (*n)++;
+ AddrCseEntry* e = &(*entries)[idx];
+ memset(e, 0, sizeof *e);
+ e->sym = sym;
+ e->addend = addend;
+ e->canonical = PREG_NONE;
+ e->count = 0;
+ return idx;
+}
+
+static void addr_cse_apply_to_operand(Operand* op, const PReg* remap) {
+ /* remap is zero-initialized; 0 means "no remap" (preg 0 is reserved as
+ * unused). PREG_NONE = 0xffffffff and would be a valid remap target but
+ * we never produce that. */
+ if (!op) return;
+ if (op->kind == OPK_REG) {
+ PReg p = (PReg)op->v.reg;
+ if (p != PREG_NONE && p != 0 && remap[p] != 0) op->v.reg = remap[p];
+ } else if (op->kind == OPK_INDIRECT) {
+ PReg p = (PReg)op->v.ind.base;
+ if (p != PREG_NONE && p != 0 && remap[p] != 0) op->v.ind.base = remap[p];
+ if (op->v.ind.index != (Reg)REG_NONE) {
+ PReg pi = (PReg)op->v.ind.index;
+ if (pi != PREG_NONE && pi != 0 && remap[pi] != 0)
+ op->v.ind.index = remap[pi];
+ }
+ }
+}
+
+static void addr_cse_apply_to_inst(Inst* in, const PReg* remap) {
+ for (u32 o = 0; o < in->nopnds; ++o)
+ addr_cse_apply_to_operand(&in->opnds[o], remap);
+ /* IR_CALL aux carries operands too; rewrite both replay variants. */
+ if ((IROp)in->op == IR_CALL) {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) return;
+ if (aux->use_plan_replay) {
+ addr_cse_apply_to_operand(&aux->plan.callee, remap);
+ for (u32 i = 0; i < aux->plan.nargs; ++i)
+ addr_cse_apply_to_operand(&aux->plan.args[i].src, remap);
+ for (u32 i = 0; i < aux->plan.nrets; ++i)
+ addr_cse_apply_to_operand(&aux->plan.rets[i].dst, remap);
+ } else {
+ addr_cse_apply_to_operand(&aux->desc.callee, remap);
+ for (u32 i = 0; i < aux->desc.nargs; ++i) {
+ CGABIValue* v = (CGABIValue*)&aux->desc.args[i];
+ addr_cse_apply_to_operand(&v->storage, remap);
+ for (u32 k = 0; k < v->nparts; ++k)
+ addr_cse_apply_to_operand((Operand*)&v->parts[k].op, remap);
+ }
+ addr_cse_apply_to_operand(&aux->desc.ret.storage, remap);
+ for (u32 k = 0; k < aux->desc.ret.nparts; ++k)
+ addr_cse_apply_to_operand((Operand*)&aux->desc.ret.parts[k].op, remap);
+ }
+ } else if ((IROp)in->op == IR_RET) {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ if (aux && aux->present) {
+ addr_cse_apply_to_operand(&aux->val.storage, remap);
+ for (u32 k = 0; k < aux->val.nparts; ++k)
+ addr_cse_apply_to_operand((Operand*)&aux->val.parts[k].op, remap);
+ }
+ } else if ((IROp)in->op == IR_ASM_BLOCK) {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) return;
+ for (u32 i = 0; i < aux->nin; ++i)
+ addr_cse_apply_to_operand(&aux->in_ops[i], remap);
+ for (u32 i = 0; i < aux->nout; ++i)
+ addr_cse_apply_to_operand(&aux->out_ops[i], remap);
+ } else if ((IROp)in->op == IR_INTRINSIC) {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) return;
+ for (u32 i = 0; i < aux->narg; ++i)
+ addr_cse_apply_to_operand(&aux->args[i], remap);
+ for (u32 i = 0; i < aux->ndst; ++i)
+ addr_cse_apply_to_operand(&aux->dsts[i], remap);
+ }
+}
+
+static Inst* block_insert_at(Func* f, Block* bl, u32 at, u32 k) {
+ if (at > bl->ninsts) at = bl->ninsts;
+ if (bl->ninsts + k > bl->cap) {
+ u32 ncap = bl->cap ? bl->cap : 8u;
+ while (ncap < bl->ninsts + k) ncap *= 2u;
+ Inst* nb = arena_zarray(f->arena, Inst, ncap);
+ if (bl->insts && at) memcpy(nb, bl->insts, sizeof(Inst) * at);
+ if (bl->insts && bl->ninsts > at)
+ memcpy(nb + at + k, bl->insts + at, sizeof(Inst) * (bl->ninsts - at));
+ bl->insts = nb;
+ bl->cap = ncap;
+ } else {
+ if (bl->ninsts > at)
+ memmove(bl->insts + at + k, bl->insts + at,
+ sizeof(Inst) * (bl->ninsts - at));
+ }
+ for (u32 i = 0; i < k; ++i) memset(&bl->insts[at + i], 0, sizeof(Inst));
+ bl->ninsts += k;
+ return &bl->insts[at];
+}
+
+void opt_addr_of_global_cse(Func* f) {
+ if (!f || f->opt_reg_ssa || f->opt_rewritten) return;
+ if (f->nblocks == 0) return;
+
+ /* Pass 1: index ADDR_OF(global) defs by (sym, addend). */
+ AddrCseEntry* entries = NULL;
+ u32 n_entries = 0;
+ u32 cap_entries = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op != IR_ADDR_OF) continue;
+ if (in->nopnds < 2) continue;
+ if (in->opnds[0].kind != OPK_REG) continue;
+ if (in->opnds[1].kind != OPK_GLOBAL) continue;
+ u32 idx = addr_cse_find_or_add(&entries, &n_entries, &cap_entries,
+ f->arena, in->opnds[1].v.global.sym,
+ in->opnds[1].v.global.addend);
+ AddrCseEntry* e = &entries[idx];
+ if (e->count == 0) {
+ e->addr_type = in->opnds[0].type;
+ e->cls = in->opnds[0].cls;
+ }
+ ++e->count;
+ }
+ }
+ if (!n_entries) return;
+
+ /* Pass 2: for each duplicate key, allocate a canonical PReg. */
+ u32 dup_count = 0;
+ for (u32 i = 0; i < n_entries; ++i) {
+ if (entries[i].count >= 2) {
+ entries[i].canonical =
+ ir_alloc_preg(f, entries[i].addr_type, entries[i].cls);
+ ++dup_count;
+ }
+ }
+ if (!dup_count) return;
+
+ /* Pass 3: walk again, build per-old-PReg remap and NOP duplicate defs. */
+ PReg* remap = arena_zarray(f->arena, PReg, opt_reg_count(f));
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op != IR_ADDR_OF) continue;
+ if (in->nopnds < 2) continue;
+ if (in->opnds[0].kind != OPK_REG) continue;
+ if (in->opnds[1].kind != OPK_GLOBAL) continue;
+ u32 idx = addr_cse_find_or_add(&entries, &n_entries, &cap_entries,
+ f->arena, in->opnds[1].v.global.sym,
+ in->opnds[1].v.global.addend);
+ if (entries[idx].canonical == PREG_NONE) continue; /* singleton */
+ PReg old = (PReg)in->opnds[0].v.reg;
+ if (opt_reg_valid(f, old)) remap[old] = entries[idx].canonical;
+ /* NOP the original def. */
+ in->op = IR_NOP;
+ in->def = VAL_NONE;
+ in->ndefs = 0;
+ in->defs = NULL;
+ in->nopnds = 0;
+ in->opnds = NULL;
+ }
+ }
+
+ /* Pass 4: hoist a single ADDR_OF for each duplicated key to the entry
+ * block, inserted after any leading IR_PARAM_DECL instructions. */
+ if (f->entry >= f->nblocks) return;
+ Block* entry = &f->blocks[f->entry];
+ u32 insert_at = 0;
+ while (insert_at < entry->ninsts &&
+ (IROp)entry->insts[insert_at].op == IR_PARAM_DECL)
+ ++insert_at;
+ Inst* slot = block_insert_at(f, entry, insert_at, dup_count);
+ u32 w = 0;
+ for (u32 i = 0; i < n_entries; ++i) {
+ if (entries[i].canonical == PREG_NONE) continue;
+ Inst* in = &slot[w++];
+ in->op = (u16)IR_ADDR_OF;
+ in->def = (Val)entries[i].canonical;
+ in->type = entries[i].addr_type;
+ in->nopnds = 2;
+ in->opnds = arena_array(f->arena, Operand, 2);
+ memset(&in->opnds[0], 0, sizeof(Operand));
+ in->opnds[0].kind = OPK_REG;
+ in->opnds[0].cls = entries[i].cls;
+ in->opnds[0].type = entries[i].addr_type;
+ in->opnds[0].v.reg = entries[i].canonical;
+ memset(&in->opnds[1], 0, sizeof(Operand));
+ in->opnds[1].kind = OPK_GLOBAL;
+ in->opnds[1].cls = entries[i].cls;
+ in->opnds[1].type = entries[i].addr_type;
+ in->opnds[1].v.global.sym = entries[i].sym;
+ in->opnds[1].v.global.addend = entries[i].addend;
+ ir_assign_inst_id(f, in);
+ }
+
+ /* Pass 5: apply remap to all operand uses in the function. */
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ addr_cse_apply_to_inst(&bl->insts[i], remap);
+ }
+ }
+
+ opt_analysis_invalidate(
+ f, OPT_ANALYSIS_DEF_USE | OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
+}
diff --git a/src/opt/pass_analysis.c b/src/opt/pass_analysis.c
@@ -75,7 +75,7 @@ static void verify_storage(Func* f, const char* stage, CGLocalStorage st,
static void verify_operand_shape(Func* f, const char* stage, const Operand* op,
int physical_regs) {
if (!op) return;
- switch ((OpKind)op->kind) {
+ switch ((OptOperandKind)op->kind) {
case OPK_IMM:
case OPK_GLOBAL:
break;
@@ -578,9 +578,11 @@ static void verify_values(Func* f, const char* stage) {
opt_fail(f, stage, "phi input type mismatch", b, p);
}
} else if ((IROp)in->op == IR_PARAM_DECL) {
+ IRParamDeclAux* aux = (IRParamDeclAux*)in->extra.aux;
if (in->nopnds || in->opnds)
opt_fail(f, stage, "param_decl should not carry operands", b, i);
- if (in->def == VAL_NONE)
+ if ((!aux || aux->desc.storage.kind == CG_LOCAL_STORAGE_REG) &&
+ in->def == VAL_NONE)
opt_fail(f, stage, "param_decl missing def", b, i);
}
}
@@ -693,9 +695,11 @@ static void verify_rewritten(Func* f, const char* stage) {
if ((IROp)in->op == IR_PHI)
opt_fail(f, stage, "phi survived rewrite", b, i);
if ((IROp)in->op == IR_PARAM_DECL) {
+ IRParamDeclAux* aux = (IRParamDeclAux*)in->extra.aux;
if (in->nopnds || in->opnds)
opt_fail(f, stage, "param_decl carries operands after rewrite", b, i);
- if (in->def == VAL_NONE || in->def >= opt_reg_count(f))
+ if ((!aux || aux->desc.storage.kind == CG_LOCAL_STORAGE_REG) &&
+ (in->def == VAL_NONE || in->def >= opt_reg_count(f)))
opt_fail(f, stage, "bad param_decl def after rewrite", b, i);
continue;
}
@@ -804,8 +808,6 @@ void opt_verify(Func* f, const char* stage) {
if (bl->id != b) opt_fail(f, stage, "block id mismatch", bl->id, b);
if (!a.reachable[b] && (bl->ninsts || bl->nsucc || bl->npreds))
opt_fail(f, stage, "unreachable block still connected", b, bl->ninsts);
- if (bl->ninsts == 0 && bl->nsucc != 0)
- opt_fail(f, stage, "empty block has successors", b, bl->nsucc);
if (bl->ninsts) {
u32 expected = 0;
if (fixed_terminator_succ_count(&bl->insts[bl->ninsts - 1], &expected) &&
diff --git a/src/opt/pass_cfg.c b/src/opt/pass_cfg.c
@@ -220,7 +220,10 @@ void opt_build_cfg(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
if (bl->ninsts == 0) {
- bl->nsucc = 0;
+ /* Empty blocks are valid label-only blocks. Their fallthrough successor
+ * is assigned by the lowering/layout pass and must survive CFG rebuilds
+ * so branches to labels placed immediately before another block remain
+ * connected. */
continue;
}
const Inst* last = &bl->insts[bl->ninsts - 1];
@@ -230,7 +233,6 @@ void opt_build_cfg(Func* f) {
bl->nsucc = nsucc;
continue;
}
- bl->nsucc = 0;
continue;
}
switch ((IROp)last->op) {
diff --git a/src/opt/pass_dce.c b/src/opt/pass_dce.c
@@ -1,8 +1,34 @@
#include "core/arena.h"
#include "opt/opt_internal.h"
+/* A value-producing op whose destination is an OPK_LOCAL operand writes to an
+ * address-taken (frame-homed) local. cg_ir_lower emits those as a value op with
+ * a frame destination rather than a separate IR_STORE, so the write is a memory
+ * side effect even though the op itself (e.g. IR_LOAD_IMM, IR_COPY) is
+ * otherwise pure. Without this, dead-def elimination drops stores to escaped
+ * locals. */
+static int opt_inst_writes_frame_local(const Inst* in) {
+ switch ((IROp)in->op) {
+ case IR_LOAD_IMM:
+ case IR_LOAD_CONST:
+ case IR_LOAD_LABEL_ADDR:
+ case IR_COPY:
+ case IR_LOAD:
+ case IR_ADDR_OF:
+ case IR_TLS_ADDR_OF:
+ case IR_BINOP:
+ case IR_UNOP:
+ case IR_CMP:
+ case IR_CONVERT:
+ return in->nopnds > 0 && in->opnds[0].kind == OPK_LOCAL;
+ default:
+ return 0;
+ }
+}
+
int opt_inst_has_side_effect(Func* f, const Inst* in) {
(void)f;
+ if (opt_inst_writes_frame_local(in)) return 1;
switch ((IROp)in->op) {
case IR_LOAD:
return opt_mem_observable(&in->extra.mem);
@@ -22,6 +48,10 @@ int opt_inst_has_side_effect(Func* f, const Inst* in) {
case IR_CMP_BRANCH:
case IR_SWITCH:
case IR_INDIRECT_BRANCH:
+ case IR_LOCAL_STATIC_DATA_BEGIN:
+ case IR_LOCAL_STATIC_DATA_WRITE:
+ case IR_LOCAL_STATIC_DATA_LABEL_ADDR:
+ case IR_LOCAL_STATIC_DATA_END:
case IR_RET:
case IR_SCOPE_BEGIN:
case IR_SCOPE_ELSE:
diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c
@@ -1,1384 +0,0 @@
-#include <string.h>
-
-#include "arch/regalloc.h"
-#include "core/arena.h"
-#include "core/core.h"
-#include "core/metrics.h"
-#include "core/slice.h"
-#include "opt/ir.h"
-#include "opt/opt_internal.h"
-
-typedef struct ReplayCtx {
- Compiler* c;
- Func* f;
- CGTarget* tgt;
- Reg* val_to_reg;
- FrameSlot* slot_map;
- Label* label_map;
- CGScope* scope_map;
- u8* val_alloced;
- u8* block_label_placed;
- u8 identity_regs;
- CGSimpleRegAlloc regalloc;
- /* Cached hard-reg collection: filled once when identity_regs is set and
- * reused by plan_hard_regs and reserve_hard_regs callbacks. */
- Reg used_hard_regs[OPT_REG_CLASSES][OPT_MAX_HARD_REGS];
- u32 nused_hard_regs[OPT_REG_CLASSES];
- u8 used_hard_regs_valid;
- /* Last source location pushed to the target — used to skip redundant
- * set_loc calls when consecutive insts share a loc (the common case). */
- SrcLoc last_loc;
- u8 last_loc_valid;
- /* When debug info isn't being emitted, set_loc only affects the panic
- * loc — we set it once per function in func_begin and skip per-inst
- * updates entirely. */
- u8 wants_loc;
-} ReplayCtx;
-
-static inline int srcloc_eq(SrcLoc a, SrcLoc b) {
- return a.file_id == b.file_id && a.line == b.line && a.col == b.col;
-}
-
-static Reg val_to_target_reg(ReplayCtx* r, Val v) {
- Func* f = r->f;
- if (v == VAL_NONE) return REG_NONE;
- if (r->identity_regs) return (Reg)v;
- if (v >= f->nvals) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(r->c, loc, "opt replay: Val %u out of range", v);
- }
- if (!r->val_alloced[v]) {
- r->val_to_reg[v] =
- cg_simple_regalloc_alloc(&r->regalloc, (RegClass)f->val_cls[v]);
- if (r->val_to_reg[v] == (Reg)REG_NONE) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(r->c, loc, "opt replay: hard reg pool exhausted");
- }
- r->val_alloced[v] = 1;
- }
- return r->val_to_reg[v];
-}
-
-static FrameSlot slot_to_target(ReplayCtx* r, FrameSlot vs) {
- if (vs == FRAME_SLOT_NONE) return FRAME_SLOT_NONE;
- if (vs >= r->f->nframe_slots + 1u) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(r->c, loc, "opt replay: vslot %u out of range",
- (unsigned)vs);
- }
- return r->slot_map[vs];
-}
-
-static CGLocalStorage xlat_storage(ReplayCtx* r, CGLocalStorage st,
- CfreeCgTypeId ty) {
- (void)ty;
- if (st.kind == CG_LOCAL_STORAGE_REG) {
- PReg pr = (PReg)st.v.reg;
- if (r->identity_regs && r->f->opt_rewritten && opt_reg_valid(r->f, pr)) {
- u8 alloc_kind = opt_preg_alloc_kind(r->f, pr);
- if (alloc_kind == OPT_ALLOC_HARD) {
- st.v.reg = opt_preg_hard_reg(r->f, pr);
- } else if (alloc_kind == OPT_ALLOC_SPILL) {
- st.kind = CG_LOCAL_STORAGE_FRAME;
- st.v.frame_slot = slot_to_target(r, opt_preg_spill_slot(r->f, pr));
- } else {
- st.v.reg = val_to_target_reg(r, (Val)pr);
- }
- } else {
- st.v.reg = val_to_target_reg(r, (Val)pr);
- }
- } else {
- st.v.frame_slot = slot_to_target(r, st.v.frame_slot);
- }
- return st;
-}
-
-static int replay_reg_storage_unused(ReplayCtx* r, CGLocalStorage st) {
- if (!r || st.kind != CG_LOCAL_STORAGE_REG) return 0;
- if (!(r->identity_regs && r->f->opt_rewritten)) return 0;
- PReg pr = (PReg)st.v.reg;
- if (pr == 0 || pr >= opt_reg_count(r->f)) return 0;
- if (opt_preg_alloc_kind(r->f, pr) == OPT_ALLOC_NONE) return 1;
- return r->f->preg_info && r->f->preg_info[pr].use_freq == 0;
-}
-
-static Operand xlat_op(ReplayCtx* r, Operand op) {
- switch ((OpKind)op.kind) {
- case OPK_IMM:
- case OPK_GLOBAL:
- return op;
- case OPK_REG:
- if (r->identity_regs && r->f->opt_rewritten) return op;
- op.v.reg = val_to_target_reg(r, (Val)op.v.reg);
- return op;
- case OPK_LOCAL:
- op.v.frame_slot = slot_to_target(r, op.v.frame_slot);
- return op;
- case OPK_INDIRECT:
- if (!(r->identity_regs && r->f->opt_rewritten)) {
- op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base);
- if (op.v.ind.index != (Reg)REG_NONE)
- op.v.ind.index = val_to_target_reg(r, (Val)op.v.ind.index);
- }
- return op;
- }
- return op;
-}
-
-static CGABIValue xlat_abivalue(ReplayCtx* r, const CGABIValue* in,
- CGABIPart* parts_out) {
- CGABIValue out = *in;
- out.storage = xlat_op(r, in->storage);
- if (in->nparts && parts_out) {
- for (u32 i = 0; i < in->nparts; ++i) {
- parts_out[i] = in->parts[i];
- parts_out[i].op = xlat_op(r, in->parts[i].op);
- }
- out.parts = parts_out;
- } else {
- out.parts = NULL;
- }
- return out;
-}
-
-typedef struct ReplayParallelMove {
- Operand dst;
- Operand src;
- MemAccess mem;
- const CGCallPlanRet* ret;
- u32 src_offset;
- u32 dst_offset;
- u32 stack_offset;
- u8 dst_kind;
- u8 src_kind;
- u8 is_ret;
- u8 done;
-} ReplayParallelMove;
-
-static Operand phys_reg_operand(Reg r, RegClass cls, CfreeCgTypeId ty) {
- Operand op;
- memset(&op, 0, sizeof op);
- op.kind = OPK_REG;
- op.cls = (u8)cls;
- op.type = ty;
- op.v.reg = r;
- return op;
-}
-
-static int operand_reg_eq(const Operand* a, const Operand* b) {
- return a && b && a->kind == OPK_REG && b->kind == OPK_REG &&
- a->cls == b->cls && a->v.reg == b->v.reg;
-}
-
-static int operand_uses_reg_for_replay(const Operand* op, const Operand* r) {
- if (!op || !r || r->kind != OPK_REG) return 0;
- if (op->kind == OPK_REG) return operand_reg_eq(op, r);
- if (op->kind == OPK_INDIRECT)
- return r->cls == RC_INT &&
- (op->v.ind.base == r->v.reg ||
- (op->v.ind.index != (Reg)REG_NONE && op->v.ind.index == r->v.reg));
- return 0;
-}
-
-static int replay_move_src_ready(const ReplayParallelMove* moves, u32 n,
- u32 idx) {
- const Operand* dst = &moves[idx].dst;
- for (u32 i = 0; i < n; ++i) {
- if (i == idx || moves[i].done) continue;
- if (operand_uses_reg_for_replay(&moves[i].src, dst)) return 0;
- }
- return 1;
-}
-
-static int replay_find_move_dst(const ReplayParallelMove* moves, u32 n,
- const Operand* dst) {
- for (u32 i = 0; i < n; ++i) {
- if (!moves[i].done && operand_reg_eq(&moves[i].dst, dst)) return (int)i;
- }
- return -1;
-}
-
-static Reg replay_scratch_reg(ReplayCtx* r, RegClass cls, Reg avoid) {
- if ((u32)cls >= OPT_REG_CLASSES) return REG_NONE;
- for (u32 i = 0; i < r->f->opt_scratch_reg_count[cls]; ++i) {
- Reg sr = r->f->opt_scratch_regs[cls][i];
- if (sr != avoid) return sr;
- }
- return REG_NONE;
-}
-
-static void replay_emit_move(CGTarget* w, const ReplayParallelMove* move) {
- Operand dst = move->dst;
- Operand src = move->src;
- MemAccess mem = move->mem;
- if (move->dst_kind == CG_CALL_PLAN_STACK ||
- move->dst_kind == CG_CALL_PLAN_TAIL_STACK) {
- CGCallPlanMove m;
- memset(&m, 0, sizeof m);
- m.src = src;
- m.src_kind = move->src_kind;
- m.dst_kind = move->dst_kind;
- m.cls = dst.cls;
- m.src_offset = move->src_offset;
- m.stack_offset = move->stack_offset;
- m.mem = mem;
- w->store_call_arg(w, &m);
- } else if (dst.kind == OPK_REG) {
- if (move->src_kind == CG_CALL_PLAN_SRC_ADDR || move->src_offset) {
- CGCallPlanMove m;
- memset(&m, 0, sizeof m);
- m.src = src;
- m.src_kind = move->src_kind;
- m.dst_kind = CG_CALL_PLAN_REG;
- m.cls = dst.cls;
- m.dst_reg = dst.v.reg;
- m.src_offset = move->src_offset;
- m.mem = mem;
- w->load_call_arg(w, dst, &m);
- return;
- }
- if (src.kind == OPK_REG) {
- if (!operand_reg_eq(&dst, &src)) w->copy(w, dst, src);
- } else if (src.kind == OPK_IMM) {
- w->load_imm(w, dst, src.v.imm);
- } else if (src.kind == OPK_LOCAL || src.kind == OPK_INDIRECT) {
- w->load(w, dst, src, mem);
- } else if (src.kind == OPK_GLOBAL) {
- w->addr_of(w, dst, src);
- }
- } else if (dst.kind == OPK_LOCAL || dst.kind == OPK_INDIRECT) {
- if (move->is_ret && move->dst_offset) {
- CGCallPlanRet ret = move->ret ? *move->ret : (CGCallPlanRet){0};
- ret.dst = dst;
- ret.dst_offset = move->dst_offset;
- ret.mem = mem;
- w->store_call_ret(w, &ret, src);
- return;
- }
- w->store(w, dst, src, mem);
- }
-}
-
-/* `avoid` names a physical register the caller has reserved across these
- * moves (e.g. the scratch holding an indirect call's target). Cycle-breaking
- * temporaries must steer clear of it, or they would clobber the live value. */
-static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves,
- u32 n, Reg avoid) {
- CGTarget* w = r->tgt;
- u32 remaining = 0;
- for (u32 i = 0; i < n; ++i) {
- if (operand_reg_eq(&moves[i].dst, &moves[i].src)) {
- moves[i].done = 1;
- } else {
- ++remaining;
- }
- }
-
- while (remaining) {
- int progressed = 0;
- for (u32 i = 0; i < n; ++i) {
- if (moves[i].done || !replay_move_src_ready(moves, n, i)) continue;
- replay_emit_move(w, &moves[i]);
- moves[i].done = 1;
- --remaining;
- progressed = 1;
- }
- if (progressed) continue;
-
- for (u32 i = 0; i < n; ++i) {
- if (moves[i].done || moves[i].src.kind == OPK_REG) continue;
- Reg sr = replay_scratch_reg(r, (RegClass)moves[i].dst.cls, avoid);
- if (sr == (Reg)REG_NONE) continue;
- Operand tmp =
- phys_reg_operand(sr, (RegClass)moves[i].dst.cls, moves[i].dst.type);
- ReplayParallelMove tmp_move = moves[i];
- tmp_move.dst = tmp;
- tmp_move.dst_kind = CG_CALL_PLAN_REG;
- replay_emit_move(w, &tmp_move);
- moves[i].src = tmp;
- moves[i].src_kind = CG_CALL_PLAN_SRC_VALUE;
- moves[i].src_offset = 0;
- progressed = 1;
- break;
- }
- if (progressed) continue;
-
- u32 first = 0;
- while (first < n && moves[first].done) ++first;
- if (first == n) break;
- Operand save = moves[first].src;
- Reg sr = replay_scratch_reg(r, (RegClass)save.cls, avoid);
- if (sr == (Reg)REG_NONE) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(r->c, loc,
- "opt replay: no scratch register for parallel call move");
- }
- Operand tmp = phys_reg_operand(sr, (RegClass)save.cls, save.type);
- w->copy(w, tmp, save);
-
- Operand hole = save;
- for (;;) {
- int idx = replay_find_move_dst(moves, n, &hole);
- if (idx < 0 || (u32)idx == first) break;
- replay_emit_move(w, &moves[idx]);
- hole = moves[idx].src;
- moves[idx].done = 1;
- --remaining;
- }
- moves[first].src = tmp;
- moves[first].src_kind = CG_CALL_PLAN_SRC_VALUE;
- moves[first].src_offset = 0;
- replay_emit_move(w, &moves[first]);
- moves[first].done = 1;
- --remaining;
- }
-}
-
-static int replay_plan_supported(CGTarget* w, const CGCallPlan* p,
- const char** reason) {
- if (reason) *reason = NULL;
- if (!p) {
- if (reason) *reason = "missing plan";
- return 0;
- }
- for (u32 i = 0; i < p->nargs; ++i) {
- if ((p->args[i].dst_kind == CG_CALL_PLAN_STACK ||
- p->args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) &&
- !w->store_call_arg) {
- if (reason) *reason = "stack arg without store_call_arg";
- return 0;
- }
- if (p->args[i].dst_kind == CG_CALL_PLAN_REG &&
- (p->args[i].src_kind == CG_CALL_PLAN_SRC_ADDR ||
- p->args[i].src_offset) &&
- !w->load_call_arg) {
- if (reason) *reason = "reg arg without load_call_arg";
- return 0;
- }
- }
- for (u32 i = 0; i < p->nrets; ++i)
- if (p->rets[i].dst.kind != OPK_REG && p->rets[i].dst.kind != OPK_LOCAL &&
- p->rets[i].dst.kind != OPK_INDIRECT) {
- if (reason) *reason = "unsupported ret destination";
- return 0;
- }
- for (u32 i = 0; i < p->nrets; ++i)
- if (p->rets[i].dst_offset &&
- (p->rets[i].dst.kind == OPK_LOCAL ||
- p->rets[i].dst.kind == OPK_INDIRECT) &&
- !w->store_call_ret) {
- if (reason) *reason = "ret offset without store_call_ret";
- return 0;
- }
- return 1;
-}
-
-/* Materialize args and emit the call instruction for a planned call. Does not
- * emit return-value moves. */
-static void emit_call_and_args(ReplayCtx* r, const CGCallPlan* src_plan) {
- CGCallPlan plan = *src_plan;
- plan.callee = xlat_op(r, src_plan->callee);
- plan.args = src_plan->nargs
- ? arena_array(r->f->arena, CGCallPlanMove, src_plan->nargs)
- : NULL;
-
- ReplayParallelMove* arg_moves =
- src_plan->nargs
- ? arena_zarray(r->f->arena, ReplayParallelMove, src_plan->nargs)
- : NULL;
- u32 nargs = 0;
- for (u32 i = 0; i < src_plan->nargs; ++i) {
- plan.args[i] = src_plan->args[i];
- plan.args[i].src = xlat_op(r, src_plan->args[i].src);
- if ((src_plan->flags & CG_CALL_TAIL) &&
- plan.args[i].dst_kind == CG_CALL_PLAN_STACK) {
- plan.args[i].dst_kind = CG_CALL_PLAN_TAIL_STACK;
- }
- Operand dst;
- if (plan.args[i].dst_kind == CG_CALL_PLAN_REG) {
- dst = phys_reg_operand(plan.args[i].dst_reg, (RegClass)plan.args[i].cls,
- plan.args[i].mem.type);
- } else {
- memset(&dst, 0, sizeof dst);
- dst.kind = OPK_LOCAL;
- dst.cls = plan.args[i].cls;
- dst.type = plan.args[i].mem.type;
- }
- arg_moves[nargs].dst = dst;
- arg_moves[nargs].src = plan.args[i].src;
- arg_moves[nargs].mem = plan.args[i].mem;
- arg_moves[nargs].src_offset = plan.args[i].src_offset;
- arg_moves[nargs].stack_offset = plan.args[i].stack_offset;
- arg_moves[nargs].dst_kind = plan.args[i].dst_kind;
- arg_moves[nargs].src_kind = plan.args[i].src_kind;
- ++nargs;
- }
-
- Reg callee_scratch = REG_NONE;
- if (plan.callee.kind == OPK_REG) {
- for (u32 i = 0; i < nargs; ++i) {
- if (arg_moves[i].dst_kind != CG_CALL_PLAN_REG ||
- !operand_reg_eq(&arg_moves[i].dst, &plan.callee))
- continue;
- callee_scratch = replay_scratch_reg(r, RC_INT, REG_NONE);
- if (callee_scratch == (Reg)REG_NONE) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(r->c, loc,
- "opt replay: no scratch register for indirect call");
- }
- Operand tmp = phys_reg_operand(callee_scratch, RC_INT, plan.callee.type);
- r->tgt->copy(r->tgt, tmp, plan.callee);
- plan.callee = tmp;
- break;
- }
- }
-
- replay_parallel_moves(r, arg_moves, nargs, callee_scratch);
- r->tgt->emit_call_plan(r->tgt, &plan);
-}
-
-/* An ALLOWED tail call the backend could not realize: emit it as an ordinary
- * call and return its result. The callee's return registers are the function's
- * (the return shapes match by CG's precondition) and the epilogue preserves
- * them, so a plain call followed by a bare return forwards the value. The
- * frame already reserves outgoing space for this call (the known-frame sizing
- * counts tail calls' stack args), so the ordinary call's args fit. */
-static void replay_tail_fallback(ReplayCtx* r, const IRCallAux* aux) {
- /* Reuse the recorded plan (its arg sources are the post-regalloc operands)
- * but clear CG_CALL_TAIL so emit_call_and_args emits an ordinary BL and
- * keeps stack args in the outgoing area rather than the tail slots. */
- CGCallPlan plan = aux->plan;
- plan.flags &= (u16)~CG_CALL_TAIL;
- emit_call_and_args(r, &plan);
- r->tgt->ret(r->tgt, NULL);
-}
-
-static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux,
- SrcLoc loc) {
- const CGCallPlan* src_plan = &aux->plan;
-
- /* The opt recorder accepted this tail call unconditionally; resolve it now
- * against the real backend, which has a laid-out frame. NULL => realizable,
- * emit the sibling call. Otherwise MUST diagnoses and ALLOWED falls back. */
- if (src_plan->flags & CG_CALL_TAIL) {
- const char* reason =
- r->tgt->tail_call_unrealizable_reason
- ? r->tgt->tail_call_unrealizable_reason(r->tgt, &aux->desc)
- : "target does not support tail calls";
- if (reason) {
- if (aux->desc.tail_policy == CFREE_CG_TAIL_MUST)
- compiler_panic(r->c, loc, "musttail call not realizable: %s", reason);
- replay_tail_fallback(r, aux);
- return;
- }
- }
-
- emit_call_and_args(r, src_plan);
- if (src_plan->flags & CG_CALL_TAIL) return;
-
- CGCallPlanRet* rets =
- src_plan->nrets ? arena_array(r->f->arena, CGCallPlanRet, src_plan->nrets)
- : NULL;
- ReplayParallelMove* ret_moves =
- src_plan->nrets
- ? arena_zarray(r->f->arena, ReplayParallelMove, src_plan->nrets)
- : NULL;
- u32 nrets = 0;
- for (u32 i = 0; i < src_plan->nrets; ++i) {
- rets[i] = src_plan->rets[i];
- rets[i].dst = xlat_op(r, src_plan->rets[i].dst);
- Operand src = phys_reg_operand(rets[i].src_reg, (RegClass)rets[i].cls,
- rets[i].mem.type);
- ret_moves[nrets].dst = rets[i].dst;
- ret_moves[nrets].src = src;
- ret_moves[nrets].mem = rets[i].mem;
- ret_moves[nrets].ret = &rets[i];
- ret_moves[nrets].dst_offset = rets[i].dst_offset;
- ret_moves[nrets].dst_kind = CG_CALL_PLAN_REG;
- ret_moves[nrets].src_kind = CG_CALL_PLAN_SRC_VALUE;
- ret_moves[nrets].is_ret = 1;
- ++nrets;
- }
- replay_parallel_moves(r, ret_moves, nrets, REG_NONE);
-}
-
-static Label ensure_label(ReplayCtx* r, u32 b) {
- if (b >= r->f->nblocks) return LABEL_NONE;
- if (r->label_map[b] == LABEL_NONE) {
- /* If w_label_new pre-allocated an MCLabel during recording (so
- * cfree_cg_data_label_addr could queue a deferred fixup against
- * it), reuse it here so the place we emit lines up with the
- * existing pending fixup list. */
- Block* bl = &r->f->blocks[b];
- if (bl->mc_label != MC_LABEL_NONE) {
- r->label_map[b] = (Label)bl->mc_label;
- } else {
- r->label_map[b] = r->tgt->label_new(r->tgt);
- }
- }
- return r->label_map[b];
-}
-
-static void ensure_label_placed(ReplayCtx* r, u32 b) {
- if (r->block_label_placed[b]) return;
- r->block_label_placed[b] = 1;
- if (b == r->f->entry) return;
- Label l = ensure_label(r, b);
- r->tgt->label_place(r->tgt, l);
-}
-
-static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
- CGTarget* w = r->tgt;
- /* set_loc serves two purposes (see arch/mc.c and the per-arch emit code):
- * 1. error reporting via compiler_panic - needs some recent loc
- * 2. DWARF line-info rows via debug_emit_row, gated on mc->debug
- * When debug info isn't being emitted we still set the loc once (the
- * first inst's check catches that via last_loc_valid=0), so panic
- * messages still point at a real source location, but subsequent updates
- * are skipped. When debug info IS being emitted we update on every
- * change so line rows stay accurate. */
- if (r->wants_loc) {
- if (!r->last_loc_valid || !srcloc_eq(r->last_loc, in->loc)) {
- w->set_loc(w, in->loc);
- r->last_loc = in->loc;
- r->last_loc_valid = 1;
- }
- } else if (!r->last_loc_valid) {
- w->set_loc(w, in->loc);
- r->last_loc = in->loc;
- r->last_loc_valid = 1;
- }
-
- switch ((IROp)in->op) {
- case IR_NOP:
- case IR_CONST_I:
- case IR_CONST_BYTES:
- case IR_PARAM_DECL:
- case IR_PHI:
- case IR_CONDBR:
- break;
- case IR_ASM_BLOCK: {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- Operand* in_ops_ = NULL;
- Operand* out_ops_ = NULL;
- if (aux->nin) {
- in_ops_ = arena_array(r->f->arena, Operand, aux->nin);
- for (u32 k = 0; k < aux->nin; ++k) {
- in_ops_[k] = xlat_op(r, aux->in_ops[k]);
- }
- }
- if (aux->nout) {
- out_ops_ = arena_array(r->f->arena, Operand, aux->nout);
- for (u32 k = 0; k < aux->nout; ++k) {
- out_ops_[k] = xlat_op(r, aux->out_ops[k]);
- }
- }
- w->asm_block(w, aux->tmpl, aux->outs, aux->nout, out_ops_, aux->ins,
- aux->nin, in_ops_, aux->clobbers, aux->nclob);
- break;
- }
- case IR_LOAD_IMM: {
- Operand dst = xlat_op(r, in->opnds[0]);
- w->load_imm(w, dst, in->extra.imm);
- break;
- }
- case IR_LOAD_CONST: {
- Operand dst = xlat_op(r, in->opnds[0]);
- w->load_const(w, dst, in->extra.cbytes);
- break;
- }
- case IR_COPY: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand src = xlat_op(r, in->opnds[1]);
- w->copy(w, dst, src);
- break;
- }
- case IR_LOAD: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand addr = xlat_op(r, in->opnds[1]);
- w->load(w, dst, addr, in->extra.mem);
- break;
- }
- case IR_STORE: {
- Operand addr = xlat_op(r, in->opnds[0]);
- Operand src = xlat_op(r, in->opnds[1]);
- w->store(w, addr, src, in->extra.mem);
- break;
- }
- case IR_ADDR_OF: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand lv = xlat_op(r, in->opnds[1]);
- w->addr_of(w, dst, lv);
- break;
- }
- case IR_TLS_ADDR_OF: {
- Operand dst = xlat_op(r, in->opnds[0]);
- IRTlsAux* aux = (IRTlsAux*)in->extra.aux;
- w->tls_addr_of(w, dst, aux->sym, aux->addend);
- break;
- }
- case IR_AGG_COPY: {
- Operand a = xlat_op(r, in->opnds[0]);
- Operand bo = xlat_op(r, in->opnds[1]);
- IRAggAux* aux = (IRAggAux*)in->extra.aux;
- w->copy_bytes(w, a, bo, aux->access);
- break;
- }
- case IR_AGG_SET: {
- Operand a = xlat_op(r, in->opnds[0]);
- Operand bo = xlat_op(r, in->opnds[1]);
- IRAggAux* aux = (IRAggAux*)in->extra.aux;
- w->set_bytes(w, a, bo, aux->access);
- break;
- }
- case IR_BITFIELD_LOAD: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand rec_ = xlat_op(r, in->opnds[1]);
- IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux;
- w->bitfield_load(w, dst, rec_, aux->access);
- break;
- }
- case IR_BITFIELD_STORE: {
- Operand rec_ = xlat_op(r, in->opnds[0]);
- Operand src = xlat_op(r, in->opnds[1]);
- IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux;
- w->bitfield_store(w, rec_, src, aux->access);
- break;
- }
- case IR_BINOP: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand a = xlat_op(r, in->opnds[1]);
- Operand bo = xlat_op(r, in->opnds[2]);
- w->binop(w, (BinOp)in->extra.imm, dst, a, bo);
- break;
- }
- case IR_UNOP: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand a = xlat_op(r, in->opnds[1]);
- w->unop(w, (UnOp)in->extra.imm, dst, a);
- break;
- }
- case IR_CMP: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand a = xlat_op(r, in->opnds[1]);
- Operand bo = xlat_op(r, in->opnds[2]);
- w->cmp(w, (CmpOp)in->extra.imm, dst, a, bo);
- break;
- }
- case IR_CONVERT: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand src = xlat_op(r, in->opnds[1]);
- w->convert(w, (ConvKind)in->extra.imm, dst, src);
- break;
- }
- case IR_CALL: {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- const char* plan_reason = NULL;
- if (aux && aux->use_plan_replay && w->emit_call_plan &&
- replay_plan_supported(w, &aux->plan, &plan_reason)) {
- replay_planned_call(r, aux, in->loc);
- break;
- }
- compiler_panic(
- r->c, in->loc, "opt replay: call has no supported call plan%.*s%.*s",
- SLICE_ARG(plan_reason ? SLICE_LIT(": ") : SLICE_NULL),
- SLICE_ARG(plan_reason ? slice_from_cstr(plan_reason) : SLICE_NULL));
- break;
- }
- case IR_BR: {
- Block* bl = &r->f->blocks[b];
- if (bl->nsucc < 1) break;
- Label l = ensure_label(r, bl->succ[0]);
- w->jump(w, l);
- break;
- }
- case IR_CMP_BRANCH: {
- Operand a = xlat_op(r, in->opnds[0]);
- Operand bo = xlat_op(r, in->opnds[1]);
- Block* bl = &r->f->blocks[b];
- Label taken = ensure_label(r, bl->succ[0]);
- w->cmp_branch(w, (CmpOp)in->extra.imm, a, bo, taken);
- break;
- }
- case IR_SWITCH: {
- IRSwitchAux* aux = (IRSwitchAux*)in->extra.aux;
- Operand sel = xlat_op(r, in->opnds[0]);
- CGSwitchDesc d;
- CGSwitchCase* cases = NULL;
- memset(&d, 0, sizeof d);
- d.selector = sel;
- d.selector_type = aux->selector_type;
- /* default_block is always a real successor block in the IR (the
- * recorder synthesizes one for no-default switches). Replay must
- * emit an explicit jump to it so fall-through layout assumptions
- * don't depend on block placement. */
- d.default_label = ensure_label(r, aux->default_block);
- d.ncases = aux->ncases;
- d.hint = aux->hint;
- /* opt only invokes pass_emit at level >= 1; cfree_cg_switch
- * already routed dense/forced-table switches through
- * cg_emit_switch_table, so anything that survives in IR_SWITCH
- * is chain by construction. Set the field anyway to keep the
- * desc fully populated. */
- d.opt_level = 1u;
- if (aux->ncases) {
- cases = arena_array(r->f->arena, CGSwitchCase, aux->ncases);
- for (u32 i = 0; i < aux->ncases; ++i) {
- cases[i].value = aux->cases[i].value;
- cases[i].label = ensure_label(r, aux->cases[i].block);
- }
- d.cases = cases;
- }
- if (w->switch_) {
- w->switch_(w, &d);
- } else {
- cg_lower_switch_default(w, &d);
- }
- break;
- }
- case IR_INDIRECT_BRANCH: {
- IRIndirectAux* aux = (IRIndirectAux*)in->extra.aux;
- Operand addr = xlat_op(r, in->opnds[0]);
- Label* labels = NULL;
- if (aux->ntargets) {
- labels = arena_array(r->f->arena, Label, aux->ntargets);
- for (u32 i = 0; i < aux->ntargets; ++i)
- labels[i] = ensure_label(r, aux->targets[i]);
- }
- w->indirect_branch(w, addr, labels, aux->ntargets);
- break;
- }
- case IR_LOAD_LABEL_ADDR: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Label l = ensure_label(r, (u32)in->extra.imm);
- w->load_label_addr(w, dst, l);
- break;
- }
- case IR_RET: {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- if (!aux || !aux->present) {
- w->ret(w, NULL);
- } else {
- CGABIPart* parts = aux->val.nparts ? arena_array(r->f->arena, CGABIPart,
- aux->val.nparts)
- : NULL;
- CGABIValue v = xlat_abivalue(r, &aux->val, parts);
- w->ret(w, &v);
- }
- break;
- }
- case IR_SCOPE_BEGIN: {
- IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
- CGScopeDesc d = aux->desc;
- d.cond = xlat_op(r, d.cond);
- if (aux->desc.kind == SCOPE_LOOP || aux->desc.kind == SCOPE_BLOCK) {
- d.break_label = aux->loop_break_block
- ? ensure_label(r, aux->loop_break_block)
- : LABEL_NONE;
- d.continue_label = aux->loop_continue_block
- ? ensure_label(r, aux->loop_continue_block)
- : LABEL_NONE;
- }
- CGScope cs = w->scope_begin(w, &d);
- r->scope_map[aux->scope_id] = cs;
- break;
- }
- case IR_SCOPE_ELSE:
- w->scope_else(w, r->scope_map[(u32)in->extra.imm]);
- break;
- case IR_SCOPE_END:
- w->scope_end(w, r->scope_map[(u32)in->extra.imm]);
- break;
- case IR_BREAK_TO:
- w->break_to(w, r->scope_map[(u32)in->extra.imm]);
- break;
- case IR_CONTINUE_TO:
- w->continue_to(w, r->scope_map[(u32)in->extra.imm]);
- break;
- case IR_ALLOCA: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand size = xlat_op(r, in->opnds[1]);
- w->alloca_(w, dst, size, (u32)in->extra.imm);
- break;
- }
- case IR_VA_START: {
- Operand ap = xlat_op(r, in->opnds[0]);
- w->va_start_(w, ap);
- break;
- }
- case IR_VA_ARG: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand ap = xlat_op(r, in->opnds[1]);
- CfreeCgTypeId ty = (CfreeCgTypeId)(uintptr_t)in->extra.aux;
- w->va_arg_(w, dst, ap, ty);
- break;
- }
- case IR_VA_END: {
- Operand ap = xlat_op(r, in->opnds[0]);
- w->va_end_(w, ap);
- break;
- }
- case IR_VA_COPY: {
- Operand a = xlat_op(r, in->opnds[0]);
- Operand src = xlat_op(r, in->opnds[1]);
- w->va_copy_(w, a, src);
- break;
- }
- case IR_ATOMIC_LOAD: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand addr = xlat_op(r, in->opnds[1]);
- IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux;
- w->atomic_load(w, dst, addr, aux->mem, aux->mo);
- break;
- }
- case IR_ATOMIC_STORE: {
- Operand addr = xlat_op(r, in->opnds[0]);
- Operand src = xlat_op(r, in->opnds[1]);
- IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux;
- w->atomic_store(w, addr, src, aux->mem, aux->mo);
- break;
- }
- case IR_ATOMIC_RMW: {
- Operand dst = xlat_op(r, in->opnds[0]);
- Operand addr = xlat_op(r, in->opnds[1]);
- Operand val = xlat_op(r, in->opnds[2]);
- IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux;
- w->atomic_rmw(w, (AtomicOp)aux->op, dst, addr, val, aux->mem, aux->mo);
- break;
- }
- case IR_ATOMIC_CAS: {
- Operand prior = xlat_op(r, in->opnds[0]);
- Operand ok = xlat_op(r, in->opnds[1]);
- Operand addr = xlat_op(r, in->opnds[2]);
- Operand expected = xlat_op(r, in->opnds[3]);
- Operand desired = xlat_op(r, in->opnds[4]);
- IRCasAux* aux = (IRCasAux*)in->extra.aux;
- w->atomic_cas(w, prior, ok, addr, expected, desired, aux->mem,
- aux->success, aux->failure);
- break;
- }
- case IR_FENCE:
- w->fence(w, (MemOrder)in->extra.imm);
- break;
- case IR_INTRINSIC: {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- Operand* dsts =
- aux->ndst ? arena_array(r->f->arena, Operand, aux->ndst) : NULL;
- Operand* args =
- aux->narg ? arena_array(r->f->arena, Operand, aux->narg) : NULL;
- for (u32 k = 0; k < aux->ndst; ++k) dsts[k] = xlat_op(r, aux->dsts[k]);
- for (u32 k = 0; k < aux->narg; ++k) args[k] = xlat_op(r, aux->args[k]);
- w->intrinsic(w, aux->kind, dsts, aux->ndst, args, aux->narg);
- break;
- }
- }
-}
-
-static void replay_block(ReplayCtx* r, u32 b) {
- Func* f = r->f;
- if (b >= f->nblocks) return;
- ensure_label_placed(r, b);
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- replay_inst(r, b, &bl->insts[i]);
- }
-}
-
-static void add_unique_reg(Reg* used, u32* nused, u32 cap, Reg r) {
- for (u32 i = 0; i < *nused; ++i) {
- if (used[i] == r) return;
- }
- if (*nused < cap) used[(*nused)++] = r;
-}
-
-static void collect_replayed_reg(Func* f, Reg raw, RegClass cls, Reg* used,
- u32* nused, u32 cap) {
- if (raw == (Reg)REG_NONE) return;
- if (f && f->opt_rewritten && opt_reg_valid(f, (PReg)raw)) {
- PReg pr = (PReg)raw;
- if (opt_preg_alloc_kind(f, pr) == OPT_ALLOC_HARD &&
- opt_preg_loc_cls(f, pr) == (u8)cls)
- add_unique_reg(used, nused, cap, opt_preg_hard_reg(f, pr));
- }
- add_unique_reg(used, nused, cap, raw);
-}
-
-static void collect_replayed_operand_reg(Func* f, const Operand* op,
- RegClass cls, Reg* used, u32* nused,
- u32 cap) {
- if (!op) return;
- if (op->kind == OPK_REG) {
- if (op->cls == cls)
- collect_replayed_reg(f, op->v.reg, cls, used, nused, cap);
- } else if (op->kind == OPK_INDIRECT) {
- if (cls == RC_INT) {
- collect_replayed_reg(f, op->v.ind.base, cls, used, nused, cap);
- if (op->v.ind.index != (Reg)REG_NONE)
- collect_replayed_reg(f, op->v.ind.index, cls, used, nused, cap);
- }
- }
-}
-
-static void collect_replayed_abivalue_regs(Func* f, const CGABIValue* v,
- RegClass cls, Reg* used, u32* nused,
- u32 cap) {
- if (!v) return;
- collect_replayed_operand_reg(f, &v->storage, cls, used, nused, cap);
- for (u32 i = 0; i < v->nparts; ++i)
- collect_replayed_operand_reg(f, &v->parts[i].op, cls, used, nused, cap);
-}
-
-static void collect_replayed_param_regs(Func* f, RegClass cls, Reg* used,
- u32* nused, u32 cap) {
- if (!f->opt_rewritten) return;
- for (u32 i = 0; i < f->nparams; ++i) {
- IRParam* p = &f->params[i];
- if (p->storage.kind != CG_LOCAL_STORAGE_REG) continue;
- PReg pr = (PReg)p->storage.v.reg;
- if (pr == 0 || pr >= opt_reg_count(f)) continue;
- if (opt_preg_alloc_kind(f, pr) != OPT_ALLOC_HARD ||
- opt_preg_loc_cls(f, pr) != (u8)cls)
- continue;
- add_unique_reg(used, nused, cap, opt_preg_hard_reg(f, pr));
- }
-}
-
-static u32 collect_replayed_hard_regs(Func* f, CGTarget* w, RegClass cls,
- Reg* used, u32 cap) {
- u32 nused = 0;
- collect_replayed_param_regs(f, cls, used, &nused, cap);
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op == IR_PARAM_DECL) continue;
- for (u32 j = 0; j < in->nopnds; ++j)
- collect_replayed_operand_reg(f, &in->opnds[j], cls, used, &nused, cap);
-
- switch ((IROp)in->op) {
- case IR_CALL: {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) break;
- if (aux->use_plan_replay) {
- collect_replayed_operand_reg(f, &aux->plan.callee, cls, used,
- &nused, cap);
- for (u32 j = 0; j < aux->plan.nargs; ++j) {
- collect_replayed_operand_reg(f, &aux->plan.args[j].src, cls, used,
- &nused, cap);
- if (aux->plan.args[j].dst_kind == CG_CALL_PLAN_REG &&
- aux->plan.args[j].cls == (u8)cls)
- add_unique_reg(used, &nused, cap, aux->plan.args[j].dst_reg);
- }
- for (u32 j = 0; j < aux->plan.nrets; ++j) {
- collect_replayed_operand_reg(f, &aux->plan.rets[j].dst, cls, used,
- &nused, cap);
- if (aux->plan.rets[j].cls == (u8)cls)
- add_unique_reg(used, &nused, cap, aux->plan.rets[j].src_reg);
- }
- } else {
- collect_replayed_operand_reg(f, &aux->desc.callee, cls, used,
- &nused, cap);
- for (u32 j = 0; j < aux->desc.nargs; ++j)
- collect_replayed_abivalue_regs(f, &aux->desc.args[j], cls, used,
- &nused, cap);
- collect_replayed_abivalue_regs(f, &aux->desc.ret, cls, used, &nused,
- cap);
- }
- break;
- }
- case IR_RET: {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- if (aux && aux->present)
- collect_replayed_abivalue_regs(f, &aux->val, cls, used, &nused,
- cap);
- break;
- }
- case IR_SCOPE_BEGIN: {
- IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
- if (aux)
- collect_replayed_operand_reg(f, &aux->desc.cond, cls, used, &nused,
- cap);
- break;
- }
- case IR_ASM_BLOCK: {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) break;
- for (u32 j = 0; j < aux->nin; ++j)
- collect_replayed_operand_reg(f, &aux->in_ops[j], cls, used, &nused,
- cap);
- for (u32 j = 0; j < aux->nout; ++j)
- collect_replayed_operand_reg(f, &aux->out_ops[j], cls, used, &nused,
- cap);
- break;
- }
- case IR_INTRINSIC: {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- if (!aux) break;
- for (u32 j = 0; j < aux->narg; ++j)
- collect_replayed_operand_reg(f, &aux->args[j], cls, used, &nused,
- cap);
- for (u32 j = 0; j < aux->ndst; ++j)
- collect_replayed_operand_reg(f, &aux->dsts[j], cls, used, &nused,
- cap);
- break;
- }
- default:
- break;
- }
- }
- }
- if (w->resolve_reg_name) {
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op != IR_ASM_BLOCK) continue;
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) continue;
- for (u32 j = 0; j < aux->nclob; ++j) {
- Reg r;
- RegClass rcls;
- if (w->resolve_reg_name(w, aux->clobbers[j], &r, &rcls) != 0)
- continue;
- if (rcls == cls) add_unique_reg(used, &nused, cap, r);
- }
- }
- }
- }
- return nused;
-}
-
-static int replay_operand_uses_frame_slot(const Operand* op) {
- return op && op->kind == OPK_LOCAL && op->v.frame_slot != FRAME_SLOT_NONE;
-}
-
-static int replay_storage_uses_frame_slot(CGLocalStorage st) {
- return st.kind == CG_LOCAL_STORAGE_FRAME &&
- st.v.frame_slot != FRAME_SLOT_NONE;
-}
-
-static int replay_param_storage_uses_frame_slot(Func* f, CGLocalStorage st) {
- if (replay_storage_uses_frame_slot(st)) return 1;
- if (st.kind != CG_LOCAL_STORAGE_REG || !f || !f->opt_rewritten) return 0;
- PReg pr = (PReg)st.v.reg;
- return opt_reg_valid(f, pr) &&
- opt_preg_alloc_kind(f, pr) == OPT_ALLOC_SPILL &&
- opt_preg_spill_slot(f, pr) != FRAME_SLOT_NONE;
-}
-
-static int replay_abivalue_uses_frame_slot(const CGABIValue* v) {
- if (!v) return 0;
- if (replay_operand_uses_frame_slot(&v->storage)) return 1;
- for (u32 i = 0; i < v->nparts; ++i)
- if (replay_operand_uses_frame_slot(&v->parts[i].op)) return 1;
- return 0;
-}
-
-static int replay_inst_uses_frame_slot(const Inst* in) {
- for (u32 i = 0; i < in->nopnds; ++i)
- if (replay_operand_uses_frame_slot(&in->opnds[i])) return 1;
- switch ((IROp)in->op) {
- case IR_CALL: {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) return 0;
- if (aux->use_plan_replay) {
- if (replay_operand_uses_frame_slot(&aux->plan.callee)) return 1;
- for (u32 i = 0; i < aux->plan.nargs; ++i)
- if (replay_operand_uses_frame_slot(&aux->plan.args[i].src)) return 1;
- for (u32 i = 0; i < aux->plan.nrets; ++i)
- if (replay_operand_uses_frame_slot(&aux->plan.rets[i].dst)) return 1;
- } else {
- if (replay_operand_uses_frame_slot(&aux->desc.callee)) return 1;
- for (u32 i = 0; i < aux->desc.nargs; ++i)
- if (replay_abivalue_uses_frame_slot(&aux->desc.args[i])) return 1;
- if (replay_abivalue_uses_frame_slot(&aux->desc.ret)) return 1;
- }
- return 0;
- }
- case IR_RET: {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- return aux && aux->present && replay_abivalue_uses_frame_slot(&aux->val);
- }
- case IR_SCOPE_BEGIN: {
- IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
- return aux && replay_operand_uses_frame_slot(&aux->desc.cond);
- }
- case IR_ASM_BLOCK: {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) return 0;
- for (u32 i = 0; i < aux->nin; ++i)
- if (replay_operand_uses_frame_slot(&aux->in_ops[i])) return 1;
- for (u32 i = 0; i < aux->nout; ++i)
- if (replay_operand_uses_frame_slot(&aux->out_ops[i])) return 1;
- return 0;
- }
- case IR_INTRINSIC: {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- if (!aux) return 0;
- for (u32 i = 0; i < aux->narg; ++i)
- if (replay_operand_uses_frame_slot(&aux->args[i])) return 1;
- for (u32 i = 0; i < aux->ndst; ++i)
- if (replay_operand_uses_frame_slot(&aux->dsts[i])) return 1;
- return 0;
- }
- default:
- return 0;
- }
-}
-
-static int replay_func_uses_frame_slot(Func* f) {
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i)
- if (replay_inst_uses_frame_slot(&bl->insts[i])) return 1;
- }
- for (u32 i = 0; i < f->nparams; ++i)
- if (replay_param_storage_uses_frame_slot(f, f->params[i].storage)) return 1;
- for (u32 i = 0; i < f->nframe_slots; ++i)
- if (f->frame_slots[i].flags & (FSF_ADDR_TAKEN | FSF_VOLATILE)) return 1;
- return 0;
-}
-
-static void collect_known_frame(Func* f, CGTarget* w, CGKnownFrameDesc* out) {
- memset(out, 0, sizeof(*out));
- FrameSlotDesc* slots = NULL;
- int uses_frame_slot = replay_func_uses_frame_slot(f);
- if (uses_frame_slot && f->nframe_slots) {
- slots = arena_zarray(f->arena, FrameSlotDesc, f->nframe_slots);
- for (u32 i = 0; i < f->nframe_slots; ++i) {
- IRFrameSlot* s = &f->frame_slots[i];
- slots[i].type = s->type;
- slots[i].name = s->name;
- slots[i].loc = s->loc;
- slots[i].size = s->size;
- slots[i].align = s->align;
- slots[i].kind = s->kind;
- slots[i].flags = s->flags;
- }
- }
- out->slots = slots;
- out->nslots = uses_frame_slot ? f->nframe_slots : 0;
-
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op == IR_ALLOCA) {
- out->has_alloca = 1;
- } else if ((IROp)in->op == IR_CALL) {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) {
- out->has_call = 1;
- continue;
- }
- /* A non-tail call, or an ALLOWED tail that may fall back to an
- * ordinary call (clobbering the link register), needs a frame. A MUST
- * tail never falls back, so it alone does not force one. */
- if ((aux->desc.flags & CG_CALL_TAIL) == 0 ||
- aux->desc.tail_policy != CFREE_CG_TAIL_MUST)
- out->has_call = 1;
- u32 need = 0;
- if (aux->use_plan_replay) {
- need = aux->plan.stack_arg_size;
- for (u32 j = 0; j < aux->plan.nargs; ++j) {
- CGCallPlanMove* m = &aux->plan.args[j];
- if (m->dst_kind == CG_CALL_PLAN_STACK ||
- m->dst_kind == CG_CALL_PLAN_TAIL_STACK) {
- u32 end = m->stack_offset + (m->mem.size > 8u ? m->mem.size : 8u);
- if (end > need) need = end;
- }
- }
- need = (need + 15u) & ~15u;
- } else if (w->call_stack_size) {
- need = w->call_stack_size(w, &aux->desc);
- }
- if (need > out->max_outgoing) out->max_outgoing = need;
- }
- }
- }
- out->may_omit_frame = (!out->has_call && !out->has_alloca &&
- out->nslots == 0 && out->max_outgoing == 0)
- ? 1u
- : 0u;
-}
-
-static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
- ReplayCtx r;
- metrics_scope_begin(c, "opt.emit.setup");
- r.c = c;
- r.f = f;
- r.tgt = w;
- r.identity_regs = identity ? 1u : 0u;
- cg_simple_regalloc_init(&r.regalloc);
- u32 nv = f->nvals ? f->nvals : 1u;
- r.val_to_reg = arena_zarray(f->arena, Reg, nv);
- for (u32 i = 0; i < nv; ++i) r.val_to_reg[i] = REG_NONE;
- r.val_alloced = arena_zarray(f->arena, u8, nv);
- r.slot_map = arena_zarray(f->arena, FrameSlot, f->nframe_slots + 1u);
- for (u32 i = 0; i <= f->nframe_slots; ++i) r.slot_map[i] = FRAME_SLOT_NONE;
- u32 nb = f->nblocks ? f->nblocks : 1u;
- r.label_map = arena_zarray(f->arena, Label, nb);
- for (u32 i = 0; i < f->nblocks; ++i) r.label_map[i] = LABEL_NONE;
- r.scope_map = arena_zarray(f->arena, CGScope, f->nscopes + 1u);
- for (u32 i = 0; i <= f->nscopes; ++i) r.scope_map[i] = CG_SCOPE_NONE;
- r.block_label_placed = arena_zarray(f->arena, u8, nb);
- r.used_hard_regs_valid = 0;
- r.last_loc_valid = 0;
- /* If the target isn't emitting debug info, we only need to keep the
- * panic loc accurate at function granularity. Set once at func entry
- * (handled by the first replay_inst's dedup check) and skip the rest. */
- r.wants_loc = w->debug != NULL;
- metrics_scope_end(c, "opt.emit.setup");
-
- metrics_scope_begin(c, "opt.emit.plan_hard_regs");
- if (identity && (w->plan_hard_regs || w->reserve_hard_regs)) {
- /* Collect once; reuse for both plan_hard_regs (here) and
- * reserve_hard_regs (after the body). The IR doesn't change between
- * these two callbacks, so a second scan would compute the same data. */
- for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) {
- r.nused_hard_regs[cidx] = collect_replayed_hard_regs(
- f, w, (RegClass)cidx, r.used_hard_regs[cidx], OPT_MAX_HARD_REGS);
- }
- r.used_hard_regs_valid = 1;
- if (w->plan_hard_regs) {
- for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) {
- w->plan_hard_regs(w, (RegClass)cidx, r.used_hard_regs[cidx],
- r.nused_hard_regs[cidx]);
- }
- }
- }
- metrics_scope_end(c, "opt.emit.plan_hard_regs");
-
- metrics_scope_begin(c, "opt.emit.func_begin");
- int known_frame = w->func_begin_known_frame != NULL;
- if (known_frame) {
- CGKnownFrameDesc frame;
- FrameSlot* target_slots =
- f->nframe_slots ? arena_zarray(f->arena, FrameSlot, f->nframe_slots)
- : NULL;
- collect_known_frame(f, w, &frame);
- w->func_begin_known_frame(w, &f->desc, &frame, target_slots);
- for (u32 i = 0; i < f->nframe_slots; ++i)
- r.slot_map[f->frame_slots[i].id] = target_slots[i];
- }
- if (!known_frame) {
- /* func_begin with the recorded descriptor. Parameter storage is replayed
- * through target->param below after frame slots are mapped. */
- w->func_begin(w, &f->desc);
- }
-
- if (!r.identity_regs) {
- for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) {
- const Reg* regs = NULL;
- u32 nregs = 0;
- if (w->get_allocable_regs)
- w->get_allocable_regs(w, (RegClass)cidx, ®s, &nregs);
- if (regs && nregs)
- cg_simple_regalloc_set_ordered(&r.regalloc, (RegClass)cidx, regs,
- nregs);
- }
- }
-
- if (!known_frame) {
- for (u32 i = 0; i < f->nframe_slots; ++i) {
- IRFrameSlot* s = &f->frame_slots[i];
- FrameSlotDesc d = {0};
- d.type = s->type;
- d.name = s->name;
- d.loc = s->loc;
- d.size = s->size;
- d.align = s->align;
- d.kind = s->kind;
- d.flags = s->flags;
- r.slot_map[s->id] = w->frame_slot(w, &d);
- }
- }
-
- for (u32 i = 0; i < f->nparams; ++i) {
- IRParam* p = &f->params[i];
- CGParamDesc d = {0};
- d.index = p->index;
- d.name = p->name;
- d.type = p->type;
- d.size = p->size;
- d.align = p->align;
- d.flags = p->flags;
- if (replay_reg_storage_unused(&r, p->storage)) {
- d.storage = p->storage;
- d.storage.v.reg = REG_NONE;
- } else {
- d.storage = xlat_storage(&r, p->storage, p->type);
- }
- if (known_frame && d.storage.kind == CG_LOCAL_STORAGE_FRAME &&
- d.storage.v.frame_slot == FRAME_SLOT_NONE) {
- SrcLoc loc = p->loc;
- compiler_panic(c, loc,
- "opt replay: frame-backed param %u missing known-frame "
- "slot mapping",
- (unsigned)i);
- }
- d.abi = p->abi;
- d.loc = p->loc;
- (void)w->param(w, &d);
- }
- metrics_scope_end(c, "opt.emit.func_begin");
-
- metrics_scope_begin(c, "opt.emit.body");
- /* Body in emit order — the order CG's emit cursor visited each
- * block. Block-creation order can differ when label_new precedes a
- * cmp_branch whose fallthrough block must physically follow. */
- for (u32 i = 0; i < f->emit_order_n; ++i) {
- replay_block(&r, f->emit_order[i]);
- }
- metrics_scope_end(c, "opt.emit.body");
-
- metrics_scope_begin(c, "opt.emit.reserve_hard_regs");
- /* At -O1, opt managed allocation and emitted hard regs directly,
- * bypassing backend-local allocation. Tell the backend which hard
- * regs are still visible in replay so it can save the right callee-saved
- * subset in prologue/epilogue. Reuses the cached collection from the
- * plan_hard_regs pass — the IR hasn't changed since.
- *
- * The backend records only callee-saved members of this set for
- * prologue/epilogue preservation. */
- if (r.identity_regs && w->reserve_hard_regs && r.used_hard_regs_valid) {
- for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
- if (r.nused_hard_regs[c])
- w->reserve_hard_regs(w, (RegClass)c, r.used_hard_regs[c],
- r.nused_hard_regs[c]);
- }
- } else if (!r.identity_regs && w->reserve_hard_regs) {
- for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
- Reg used[CG_SIMPLE_REGALLOC_MAX_REGS];
- u32 nused = cg_simple_regalloc_used_regs(&r.regalloc, (RegClass)c, used,
- CG_SIMPLE_REGALLOC_MAX_REGS);
- if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused);
- }
- }
-
- metrics_scope_end(c, "opt.emit.reserve_hard_regs");
- metrics_scope_begin(c, "opt.emit.func_end");
- w->func_end(w);
- metrics_scope_end(c, "opt.emit.func_end");
-}
-
-void opt_replay(Compiler* c, Func* f, CGTarget* target) {
- replay_func_to(c, f, target, 0);
-}
-
-void opt_emit(Compiler* c, Func* f, CGTarget* target) {
- if (f && f->mir) {
- Func view = *f;
- view.blocks = f->mir->blocks;
- view.nblocks = f->mir->nblocks;
- view.entry = f->mir->entry;
- view.emit_order = f->mir->emit_order;
- view.emit_order_n = f->mir->emit_order_n;
- view.emit_order_cap = f->mir->emit_order_cap;
- view.opt_rewritten = 1;
- view.mir = NULL;
- replay_func_to(c, &view, target, 1);
- return;
- }
- replay_func_to(c, f, target, 1);
-}
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -474,7 +474,7 @@ static u32 alloc_alloc_stack_slot(Func* f, OptAllocator* a, FrameSlot fs) {
static u32 hard_reg_alloc_score(Func* f, const OptAllocator* a,
const OptPRegInfo* vi, Reg hr) {
const CGPhysRegInfo* pi = phys_info_for(f, vi->cls, hr);
- u32 score = pi ? pi->use_cost : 0;
+ u32 score = pi ? pi->spill_cost : 0;
if (vi->live_across_call_freq) {
if (is_caller_saved(f, vi->cls, hr))
score += 1000u + vi->live_across_call_freq;
@@ -484,7 +484,7 @@ static u32 hard_reg_alloc_score(Func* f, const OptAllocator* a,
u32 bit = hard_loc_bit(vi->cls, hr);
int already_open =
a->hard_open && bit < a->hard_loc_bits && a->hard_open[bit];
- if (!already_open) score += pi ? pi->save_cost : 50u;
+ if (!already_open) score += pi ? pi->copy_cost : 50u;
}
return score;
}
@@ -1158,6 +1158,35 @@ static void rewrite_call_arg_operand(Func* f, Operand* op) {
}
}
+static void rewrite_store_value_operand(Func* f, Inst* owner, Operand* op,
+ RewriteCtx* ctx) {
+ PReg v;
+ u8 alloc_kind;
+ const OptAllocSegment* seg;
+ if (!op || op->kind != OPK_REG) return;
+ v = (PReg)op->v.reg;
+ if (v == PREG_NONE || v == 0 || v >= opt_reg_count(f)) return;
+ alloc_kind = opt_preg_alloc_kind(f, v);
+ if (alloc_kind == OPT_ALLOC_HARD) {
+ op->v.reg = opt_preg_hard_reg(f, v);
+ return;
+ }
+ if (alloc_kind == OPT_ALLOC_SPLIT) {
+ seg = split_segment_at(f, v, ctx->raw_point);
+ if (seg && seg->loc_kind == OPT_LOC_HARD) {
+ op->v.reg = seg->hard_reg;
+ return;
+ }
+ *op = spill_addr(f, v);
+ return;
+ }
+ if (alloc_kind == OPT_ALLOC_SPILL) {
+ *op = spill_addr(f, v);
+ return;
+ }
+ rewrite_one_operand(f, owner, op, 0, ctx);
+}
+
static void rewrite_call_arg_indirect_base(Func* f, Inst* owner, Operand* op,
RewriteCtx* ctx) {
if (!op || op->kind != OPK_INDIRECT) return;
@@ -1492,6 +1521,9 @@ static void rewrite_func(Func* f, const OptLiveInfo* live_info) {
&ctx);
}
}
+ } else if ((IROp)in.op == IR_STORE && in.nopnds >= 2) {
+ opt_walk_operand(f, &in, &in.opnds[0], 0, rewrite_one_operand, &ctx);
+ rewrite_store_value_operand(f, &in, &in.opnds[1], &ctx);
} else {
opt_walk_inst_operands(f, &in, rewrite_one_operand, &ctx);
}
@@ -1520,34 +1552,10 @@ static void rewrite_func(Func* f, const OptLiveInfo* live_info) {
f->opt_rewritten = 1;
}
-static Block* lower_copy_blocks(Func* f) {
- Block* blocks = arena_zarray(f->arena, Block, f->nblocks ? f->nblocks : 1u);
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* dst = &blocks[b];
- Block* src = &f->blocks[b];
- *dst = *src;
- if (src->ninsts) {
- dst->insts = arena_array(f->arena, Inst, src->ninsts);
- memcpy(dst->insts, src->insts, sizeof(Inst) * src->ninsts);
- dst->cap = src->ninsts;
- }
- if (src->nsucc) {
- dst->succ = arena_array(f->arena, u32, src->nsucc);
- memcpy(dst->succ, src->succ, sizeof(u32) * src->nsucc);
- dst->succ_cap = src->nsucc;
- }
- if (src->npreds) {
- dst->preds = arena_array(f->arena, u32, src->npreds);
- memcpy(dst->preds, src->preds, sizeof(u32) * src->npreds);
- }
- }
- return blocks;
-}
-
void opt_lower_to_mir(Func* f, const OptLiveInfo* live_info) {
if (!f) return;
Func phys = *f;
- phys.blocks = lower_copy_blocks(f);
+ phys.blocks = f->blocks;
phys.opt_rewritten = 0;
phys.mir = NULL;
@@ -1565,6 +1573,9 @@ void opt_lower_to_mir(Func* f, const OptLiveInfo* live_info) {
memcpy(m->emit_order, phys.emit_order, sizeof(u32) * phys.emit_order_n);
f->mir = m;
+ f->blocks = phys.blocks;
+ f->nblocks = phys.nblocks;
+ f->blocks_cap = phys.blocks_cap;
f->frame_slots = phys.frame_slots;
f->nframe_slots = phys.nframe_slots;
f->frame_slots_cap = phys.frame_slots_cap;
diff --git a/src/opt/pass_machinize.c b/src/opt/pass_machinize.c
@@ -1,7 +1,5 @@
#include <string.h>
-#include "core/arena.h"
-#include "core/core.h"
#include "core/pool.h"
#include "core/slice.h"
#include "opt/opt_internal.h"
@@ -13,24 +11,32 @@ static const char* asm_constraint_body(const char* s) {
return s;
}
-static int asm_resolve_fixed_constraint(Func* f, CGTarget* target,
+static int native_resolve_reg(NativeTarget* target, Sym name, Reg* out,
+ RegClass* cls_out) {
+ NativeAllocClass cls;
+ if (!target || !target->regs || !target->regs->resolve_name) return 1;
+ if (target->regs->resolve_name(target->regs, name, out, &cls) != 0) return 1;
+ if (cls_out) *cls_out = (RegClass)cls;
+ return 0;
+}
+
+static int asm_resolve_fixed_constraint(Func* f, NativeTarget* target,
const char* constraint, Reg* reg_out,
RegClass* cls_out) {
const char* body = asm_constraint_body(constraint);
- if (!target->resolve_reg_name) return 0;
if (body[0] != '{') return 0;
const char* end = body + 1;
while (*end && *end != '}') ++end;
if (*end != '}' || end == body + 1) return 0;
Sym name = pool_intern_slice(
f->c->global, (Slice){.s = body + 1, .len = (size_t)(end - body - 1)});
- return target->resolve_reg_name(target, name, reg_out, cls_out) == 0;
+ return native_resolve_reg(target, name, reg_out, cls_out) == 0;
}
-static void asm_prepare_constraints(Func* f, CGTarget* target, IRAsmAux* aux) {
+static void asm_prepare_constraints(Func* f, NativeTarget* target,
+ IRAsmAux* aux) {
if (!aux) return;
for (u32 c = 0; c < OPT_REG_CLASSES; ++c) aux->clobber_mask[c] = 0;
-
if (aux->nout && !aux->out_fixed_regs) {
aux->out_fixed_regs = arena_array(f->arena, i32, aux->nout);
aux->out_fixed_cls = arena_zarray(f->arena, u8, aux->nout);
@@ -41,18 +47,12 @@ static void asm_prepare_constraints(Func* f, CGTarget* target, IRAsmAux* aux) {
aux->in_fixed_cls = arena_zarray(f->arena, u8, aux->nin);
for (u32 i = 0; i < aux->nin; ++i) aux->in_fixed_regs[i] = -1;
}
-
- if (target->resolve_reg_name) {
- for (u32 i = 0; i < aux->nclob; ++i) {
- Reg r;
- RegClass cls;
- if (target->resolve_reg_name(target, aux->clobbers[i], &r, &cls) != 0)
- continue;
- if ((u32)cls < OPT_REG_CLASSES && r < 32)
- aux->clobber_mask[cls] |= 1u << r;
- }
+ for (u32 i = 0; i < aux->nclob; ++i) {
+ Reg r;
+ RegClass cls;
+ if (native_resolve_reg(target, aux->clobbers[i], &r, &cls) != 0) continue;
+ if ((u32)cls < OPT_REG_CLASSES && r < 32) aux->clobber_mask[cls] |= 1u << r;
}
-
for (u32 i = 0; i < aux->nout; ++i) {
Reg r;
RegClass cls;
@@ -71,10 +71,7 @@ static void asm_prepare_constraints(Func* f, CGTarget* target, IRAsmAux* aux) {
}
}
-static int call_plan_replay_supported(const IRCallAux* aux,
- const CGTarget* target);
-
-static void machinize_reset(Func* f, CGTarget* target) {
+static void machinize_reset(Func* f, NativeTarget* target) {
f->opt_target = target->c->target;
f->opt_has_target = 1;
for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
@@ -89,80 +86,46 @@ static void machinize_reset(Func* f, CGTarget* target) {
}
}
-static void machinize_prepare_insts(Func* f, CGTarget* target) {
+static void machinize_prepare_insts(Func* f, NativeTarget* target) {
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
for (u32 i = 0; i < bl->ninsts; ++i) {
Inst* in = &bl->insts[i];
- if ((IROp)in->op == IR_ASM_BLOCK) {
+ if ((IROp)in->op == IR_ASM_BLOCK)
asm_prepare_constraints(f, target, (IRAsmAux*)in->extra.aux);
- } else if ((IROp)in->op == IR_CALL && target->plan_call) {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (aux) {
- target->plan_call(target, &aux->desc, &aux->plan);
- aux->plan_valid = 1;
- aux->use_plan_replay = call_plan_replay_supported(aux, target);
- }
- }
}
}
}
-static void machinize_collect_regs(Func* f, CGTarget* target) {
- for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
- const CGPhysRegInfo* phys = NULL;
- u32 nphys = 0;
- if (target->get_phys_regs)
- target->get_phys_regs(target, (RegClass)c, &phys, &nphys);
- if (phys) {
- u32 phys_limit = nphys < OPT_MAX_HARD_REGS ? nphys : OPT_MAX_HARD_REGS;
- for (u32 i = 0; i < phys_limit; ++i) {
- Reg hr = phys[i].reg;
- u16 flags = phys[i].flags;
- if (hr < 32u) {
- if (flags & CG_REG_CALLER_SAVED) f->opt_caller_saved[c] |= 1u << hr;
- if (flags & CG_REG_CALLEE_SAVED) f->opt_callee_saved[c] |= 1u << hr;
- if (flags & CG_REG_RESERVED) f->opt_reserved_regs[c] |= 1u << hr;
- if (flags & CG_REG_ARG) f->opt_arg_regs[c] |= 1u << hr;
- if (flags & CG_REG_RET) f->opt_ret_regs[c] |= 1u << hr;
- }
- f->opt_phys_regs[c][f->opt_phys_reg_count[c]++] = phys[i];
- if ((flags & CG_REG_ALLOCABLE) && !(flags & CG_REG_RESERVED)) {
- f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hr;
- }
- }
- } else {
- const Reg* hard = NULL;
- u32 nhard = 0;
- if (target->get_allocable_regs)
- target->get_allocable_regs(target, (RegClass)c, &hard, &nhard);
- u32 hard_limit = nhard < OPT_MAX_HARD_REGS ? nhard : OPT_MAX_HARD_REGS;
- for (u32 i = 0; i < hard_limit; ++i)
- f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hard[i];
- }
-
- const Reg* scratch = NULL;
- u32 nscratch = 0;
- if (target->get_scratch_regs)
- target->get_scratch_regs(target, (RegClass)c, &scratch, &nscratch);
- u32 scratch_limit =
- nscratch < OPT_MAX_SCRATCH_REGS ? nscratch : OPT_MAX_SCRATCH_REGS;
- for (u32 i = 0; i < scratch_limit; ++i)
- f->opt_scratch_regs[c][f->opt_scratch_reg_count[c]++] = scratch[i];
-
- if (!phys && target->is_caller_saved) {
- for (u32 i = 0; i < f->opt_hard_reg_count[c]; ++i) {
- Reg hr = f->opt_hard_regs[c][i];
- if (target->is_caller_saved(target, (RegClass)c, hr))
- f->opt_caller_saved[c] |= (1u << hr);
- }
- }
- u32* callee_saved = &f->opt_callee_saved[c];
- if (target->callee_save_mask) {
- u32 mask = target->callee_save_mask(target, (RegClass)c);
- *callee_saved |= mask;
- }
+static void collect_class(Func* f, const NativeAllocClassInfo* ci) {
+ u32 cls = ci->cls;
+ if (cls >= OPT_REG_CLASSES) return;
+ f->opt_caller_saved[cls] = ci->caller_saved_mask;
+ f->opt_callee_saved[cls] = ci->callee_saved_mask;
+ f->opt_reserved_regs[cls] = ci->reserved_mask;
+ f->opt_arg_regs[cls] = ci->arg_mask;
+ f->opt_ret_regs[cls] = ci->ret_mask;
+ for (u32 i = 0;
+ i < ci->nphys && f->opt_phys_reg_count[cls] < OPT_MAX_HARD_REGS; ++i) {
+ const NativePhysRegInfo* src = &ci->phys[i];
+ CGPhysRegInfo* dst = &f->opt_phys_regs[cls][f->opt_phys_reg_count[cls]++];
+ memset(dst, 0, sizeof *dst);
+ dst->reg = src->reg;
+ dst->cls = src->cls;
+ dst->abi_index = src->abi_index;
+ dst->flags = src->flags;
+ if ((src->flags & CG_REG_ALLOCABLE) && !(src->flags & CG_REG_RESERVED) &&
+ f->opt_hard_reg_count[cls] < OPT_MAX_HARD_REGS)
+ f->opt_hard_regs[cls][f->opt_hard_reg_count[cls]++] = src->reg;
}
+ for (u32 i = 0; i < ci->nscratch && i < OPT_MAX_SCRATCH_REGS; ++i)
+ f->opt_scratch_regs[cls][f->opt_scratch_reg_count[cls]++] = ci->scratch[i];
+}
+
+static void machinize_collect_regs(Func* f, NativeTarget* target) {
+ if (!target || !target->regs) return;
+ for (u32 i = 0; i < target->regs->nclasses; ++i)
+ collect_class(f, &target->regs->classes[i]);
}
static void machinize_check_overlap(Func* f) {
@@ -171,8 +134,7 @@ static void machinize_check_overlap(Func* f) {
Reg hr = f->opt_hard_regs[c][i];
for (u32 s = 0; s < f->opt_scratch_reg_count[c]; ++s) {
if (f->opt_scratch_regs[c][s] == hr) {
- SrcLoc loc = {0, 0, 0};
- compiler_panic(f->c, loc,
+ compiler_panic(f->c, (SrcLoc){0, 0, 0},
"opt_machinize: hard reg %u overlaps scratch reg "
"in class %u",
(unsigned)hr, (unsigned)c);
@@ -182,37 +144,9 @@ static void machinize_check_overlap(Func* f) {
}
}
-void opt_machinize(Func* f, CGTarget* target) {
+void opt_machinize_native(Func* f, NativeTarget* target) {
machinize_reset(f, target);
machinize_prepare_insts(f, target);
machinize_collect_regs(f, target);
machinize_check_overlap(f);
}
-
-static int call_plan_replay_supported(const IRCallAux* aux,
- const CGTarget* target) {
- if (!aux || !aux->plan_valid || !target || !target->emit_call_plan) return 0;
- for (u32 i = 0; i < aux->plan.nargs; ++i) {
- if ((aux->plan.args[i].dst_kind == CG_CALL_PLAN_STACK ||
- aux->plan.args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) &&
- !target->store_call_arg)
- return 0;
- if (aux->plan.args[i].dst_kind == CG_CALL_PLAN_REG &&
- (aux->plan.args[i].src_kind == CG_CALL_PLAN_SRC_ADDR ||
- aux->plan.args[i].src_offset) &&
- !target->load_call_arg)
- return 0;
- }
- for (u32 i = 0; i < aux->plan.nrets; ++i)
- if (aux->plan.rets[i].dst.kind != OPK_REG &&
- aux->plan.rets[i].dst.kind != OPK_LOCAL &&
- aux->plan.rets[i].dst.kind != OPK_INDIRECT)
- return 0;
- for (u32 i = 0; i < aux->plan.nrets; ++i)
- if (aux->plan.rets[i].dst_offset &&
- (aux->plan.rets[i].dst.kind == OPK_LOCAL ||
- aux->plan.rets[i].dst.kind == OPK_INDIRECT) &&
- !target->store_call_ret)
- return 0;
- return 1;
-}
diff --git a/src/opt/pass_native_emit.c b/src/opt/pass_native_emit.c
@@ -0,0 +1,1219 @@
+#include <string.h>
+
+#include "cg/type.h"
+#include "core/metrics.h"
+#include "core/pool.h"
+#include "opt/opt_internal.h"
+
+#undef Operand
+#undef CGParamDesc
+#undef CGCallDesc
+#undef CGFuncDesc
+#undef CGLocalStorage
+#undef CGABIValue
+#undef CGABIPart
+#undef CGCallPlan
+#undef CGCallPlanMove
+#undef CGCallPlanRet
+#undef CGScopeDesc
+
+typedef struct NativeEmitCtx {
+ Compiler* c;
+ Func* f;
+ NativeTarget* target;
+ NativeFrameSlot* slot_map;
+ NativeFrameSlot* param_home_by_preg;
+ MCLabel* labels;
+ u8* label_placed;
+ u32 max_outgoing;
+ ObjSecId local_static_sec;
+ ObjSymId local_static_sym;
+ u32 local_static_base;
+ u32 local_static_size;
+ u8 local_static_active;
+} NativeEmitCtx;
+
+static _Noreturn void emit_panic(NativeEmitCtx* e, SrcLoc loc,
+ const char* msg) {
+ compiler_panic(e->c, loc, "opt native emit: %s", msg);
+}
+
+static void emit_local_static_begin(NativeEmitCtx* e,
+ const CGLocalStaticDataDesc* desc,
+ SrcLoc loc) {
+ Sym name;
+ SecKind kind;
+ u16 flags;
+ u32 align;
+ if (!desc) emit_panic(e, loc, "missing local static data descriptor");
+ if (e->local_static_active) emit_panic(e, loc, "nested local static data");
+ if (desc->attrs.section) {
+ name = (Sym)desc->attrs.section;
+ kind =
+ (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) ? SEC_RODATA : SEC_DATA;
+ flags = (desc->attrs.flags & CFREE_CG_DATADEF_READONLY)
+ ? SF_ALLOC
+ : (SF_ALLOC | SF_WRITE);
+ } else if (desc->attrs.flags & CFREE_CG_DATADEF_READONLY) {
+ name = pool_intern_slice(e->c->global, SLICE_LIT(".rodata"));
+ kind = SEC_RODATA;
+ flags = SF_ALLOC;
+ } else {
+ name = pool_intern_slice(e->c->global, SLICE_LIT(".data"));
+ kind = SEC_DATA;
+ flags = SF_ALLOC | SF_WRITE;
+ }
+ align = desc->align ? desc->align : 1u;
+ e->local_static_sec = obj_section(e->target->obj, name, kind, flags, align);
+ e->local_static_base =
+ obj_align_to(e->target->obj, e->local_static_sec, align);
+ e->local_static_size = 0;
+ e->local_static_sym = desc->sym;
+ e->local_static_active = 1;
+}
+
+static void emit_local_static_write(NativeEmitCtx* e, const u8* data, u64 len,
+ SrcLoc loc) {
+ u8 zero[64];
+ u64 orig_len = len;
+ if (!e->local_static_active) emit_panic(e, loc, "local static data inactive");
+ if (!len) return;
+ if (data) {
+ obj_write(e->target->obj, e->local_static_sec, data, (size_t)len);
+ } else {
+ memset(zero, 0, sizeof zero);
+ while (len >= sizeof zero) {
+ obj_write(e->target->obj, e->local_static_sec, zero, sizeof zero);
+ len -= sizeof zero;
+ }
+ if (len) obj_write(e->target->obj, e->local_static_sec, zero, (size_t)len);
+ }
+ e->local_static_size += (u32)orig_len;
+}
+
+static void emit_local_static_label_addr(NativeEmitCtx* e, MCLabel target,
+ i64 addend, u32 width, SrcLoc loc) {
+ u8 zero[8];
+ u32 off;
+ if (!e->local_static_active) emit_panic(e, loc, "local static data inactive");
+ if (width != 8u) emit_panic(e, loc, "unsupported local static label width");
+ memset(zero, 0, sizeof zero);
+ off = e->local_static_base + e->local_static_size;
+ obj_write(e->target->obj, e->local_static_sec, zero, width);
+ e->target->mc->emit_label_data_reloc(e->target->mc, e->local_static_sec, off,
+ target, R_ABS64, width, addend);
+ e->local_static_size += width;
+}
+
+static void emit_local_static_end(NativeEmitCtx* e, SrcLoc loc) {
+ if (!e->local_static_active) emit_panic(e, loc, "local static data inactive");
+ obj_symbol_define(e->target->obj, e->local_static_sym, e->local_static_sec,
+ e->local_static_base, e->local_static_size);
+ e->local_static_active = 0;
+ e->local_static_sec = OBJ_SEC_NONE;
+ e->local_static_sym = OBJ_SYM_NONE;
+ e->local_static_base = 0;
+ e->local_static_size = 0;
+}
+
+static u32 type_size_or(Compiler* c, CfreeCgTypeId type, u32 fallback) {
+ u64 n = type ? cg_type_size(c, type) : 0u;
+ if (!n || n > 0xffffffffull) return fallback;
+ return (u32)n;
+}
+
+static u32 type_align_or(Compiler* c, CfreeCgTypeId type, u32 fallback) {
+ u64 n = type ? cg_type_align(c, type) : 0u;
+ if (!n || n > 0xffffffffull) return fallback;
+ return (u32)n;
+}
+
+static MemAccess mem_for_type(Compiler* c, CfreeCgTypeId type) {
+ MemAccess mem;
+ memset(&mem, 0, sizeof mem);
+ mem.type = type;
+ mem.size = type_size_or(c, type, 8u);
+ mem.align = type_align_or(c, type, mem.size >= 8u ? 8u : mem.size);
+ return mem;
+}
+
+static NativeAllocClass class_for_type(NativeEmitCtx* e, CfreeCgTypeId type) {
+ if (e->target->class_for_type)
+ return e->target->class_for_type(e->target, type);
+ return cg_type_is_float(e->c, type) ? NATIVE_REG_FP : NATIVE_REG_INT;
+}
+
+static NativeLoc loc_none(void) {
+ NativeLoc loc;
+ memset(&loc, 0, sizeof loc);
+ return loc;
+}
+
+static NativeLoc loc_reg(CfreeCgTypeId type, NativeAllocClass cls, Reg reg) {
+ NativeLoc loc;
+ memset(&loc, 0, sizeof loc);
+ loc.kind = NATIVE_LOC_REG;
+ loc.cls = (u8)cls;
+ loc.type = type;
+ loc.v.reg = reg;
+ return loc;
+}
+
+static NativeLoc loc_frame(CfreeCgTypeId type, NativeAllocClass cls,
+ NativeFrameSlot slot) {
+ NativeLoc loc;
+ memset(&loc, 0, sizeof loc);
+ loc.kind = NATIVE_LOC_FRAME;
+ loc.cls = (u8)cls;
+ loc.type = type;
+ loc.v.frame = slot;
+ return loc;
+}
+
+static NativeLoc loc_imm(CfreeCgTypeId type, i64 imm) {
+ NativeLoc loc;
+ memset(&loc, 0, sizeof loc);
+ loc.kind = NATIVE_LOC_IMM;
+ loc.cls = NATIVE_REG_INT;
+ loc.type = type;
+ loc.v.imm = imm;
+ return loc;
+}
+
+static NativeLoc loc_global(CfreeCgTypeId type, ObjSymId sym, i64 addend) {
+ NativeLoc loc;
+ memset(&loc, 0, sizeof loc);
+ loc.kind = NATIVE_LOC_GLOBAL;
+ loc.cls = NATIVE_REG_INT;
+ loc.type = type;
+ loc.v.global.sym = sym;
+ loc.v.global.addend = addend;
+ return loc;
+}
+
+static int loc_same_frame(NativeLoc a, NativeLoc b) {
+ return a.kind == NATIVE_LOC_FRAME && b.kind == NATIVE_LOC_FRAME &&
+ a.v.frame == b.v.frame;
+}
+
+static Reg scratch_reg(NativeEmitCtx* e, NativeAllocClass cls, Reg a, Reg b,
+ SrcLoc loc) {
+ u32 c = (u32)cls;
+ if (c < OPT_REG_CLASSES) {
+ for (u32 i = 0; i < e->f->opt_scratch_reg_count[c]; ++i) {
+ Reg r = e->f->opt_scratch_regs[c][i];
+ if (r != a && r != b) return r;
+ }
+ }
+ emit_panic(e, loc, "no scratch register for native emission");
+}
+
+static int scratch_available(NativeEmitCtx* e, NativeAllocClass cls, Reg a,
+ Reg b) {
+ u32 c = (u32)cls;
+ if (c < OPT_REG_CLASSES) {
+ for (u32 i = 0; i < e->f->opt_scratch_reg_count[c]; ++i) {
+ Reg r = e->f->opt_scratch_regs[c][i];
+ if (r != a && r != b) return 1;
+ }
+ }
+ return 0;
+}
+
+static NativeLoc scratch_loc(NativeEmitCtx* e, CfreeCgTypeId type,
+ NativeAllocClass cls, Reg a, Reg b, SrcLoc loc) {
+ return loc_reg(type, cls, scratch_reg(e, cls, a, b, loc));
+}
+
+static NativeFrameSlot map_slot(NativeEmitCtx* e, NativeFrameSlot slot,
+ SrcLoc loc) {
+ if (slot == NATIVE_FRAME_SLOT_NONE) return NATIVE_FRAME_SLOT_NONE;
+ if (slot > e->f->nframe_slots) emit_panic(e, loc, "bad frame slot");
+ if (!e->slot_map[slot]) emit_panic(e, loc, "unmapped frame slot");
+ return e->slot_map[slot];
+}
+
+static MCLabel ensure_label(NativeEmitCtx* e, u32 block, SrcLoc loc) {
+ if (block >= e->f->nblocks) emit_panic(e, loc, "bad block label");
+ if (e->labels[block] == MC_LABEL_NONE)
+ e->labels[block] = e->target->label_new(e->target);
+ return e->labels[block];
+}
+
+static NativeAddr addr_from_loc(NativeEmitCtx* e, NativeLoc loc,
+ SrcLoc src_loc) {
+ NativeAddr addr;
+ memset(&addr, 0, sizeof addr);
+ addr.base_type = loc.type;
+ switch ((NativeLocKind)loc.kind) {
+ case NATIVE_LOC_FRAME:
+ addr.base_kind = NATIVE_ADDR_BASE_FRAME;
+ addr.base.frame = loc.v.frame;
+ return addr;
+ case NATIVE_LOC_STACK:
+ addr.base_kind = NATIVE_ADDR_BASE_FRAME;
+ addr.base.frame = loc.v.stack.slot;
+ addr.offset = loc.v.stack.offset;
+ return addr;
+ case NATIVE_LOC_GLOBAL:
+ addr.base_kind = NATIVE_ADDR_BASE_GLOBAL;
+ addr.base.global.sym = loc.v.global.sym;
+ addr.base.global.addend = loc.v.global.addend;
+ return addr;
+ case NATIVE_LOC_REG:
+ addr.base_kind = NATIVE_ADDR_BASE_REG;
+ addr.cls = loc.cls;
+ addr.base.reg = loc.v.reg;
+ return addr;
+ case NATIVE_LOC_ADDR:
+ return loc.v.addr;
+ default:
+ emit_panic(e, src_loc, "location is not addressable");
+ }
+}
+
+static NativeAddr addr_from_operand(NativeEmitCtx* e, const OptOperand* op,
+ SrcLoc loc) {
+ NativeAddr addr;
+ memset(&addr, 0, sizeof addr);
+ if (!op) emit_panic(e, loc, "missing address operand");
+ addr.base_type = op->type;
+ switch ((OptOperandKind)op->kind) {
+ case OPT_OPK_LOCAL:
+ addr.base_kind = NATIVE_ADDR_BASE_FRAME;
+ addr.base.frame = map_slot(e, op->v.frame_slot, loc);
+ return addr;
+ case OPT_OPK_GLOBAL:
+ addr.base_kind = NATIVE_ADDR_BASE_GLOBAL;
+ addr.base.global.sym = op->v.global.sym;
+ addr.base.global.addend = op->v.global.addend;
+ return addr;
+ case OPT_OPK_INDIRECT:
+ addr.base_kind = NATIVE_ADDR_BASE_REG;
+ addr.cls = NATIVE_REG_INT;
+ addr.base.reg = op->v.ind.base;
+ addr.index_kind = op->v.ind.index == (Reg)REG_NONE
+ ? NATIVE_ADDR_INDEX_NONE
+ : NATIVE_ADDR_INDEX_REG;
+ addr.index_cls = NATIVE_REG_INT;
+ addr.index.reg = op->v.ind.index;
+ addr.log2_scale = op->v.ind.log2_scale;
+ addr.offset = op->v.ind.ofs;
+ return addr;
+ case OPT_OPK_REG:
+ addr.base_kind = NATIVE_ADDR_BASE_REG;
+ addr.cls = op->cls;
+ addr.base.reg = op->v.reg;
+ return addr;
+ default:
+ emit_panic(e, loc, "operand is not addressable");
+ }
+}
+
+static NativeAddr pointer_addr_from_operand(NativeEmitCtx* e,
+ const OptOperand* op, SrcLoc loc,
+ Reg avoid_a, Reg avoid_b) {
+ NativeAddr addr;
+ memset(&addr, 0, sizeof addr);
+ if (!op) emit_panic(e, loc, "missing pointer operand");
+ addr.base_type = op->type;
+ switch ((OptOperandKind)op->kind) {
+ case OPT_OPK_LOCAL: {
+ NativeAddr frame;
+ NativeLoc dst;
+ NativeAllocClass cls = class_for_type(e, op->type);
+ Reg r = scratch_reg(e, cls, avoid_a, avoid_b, loc);
+ memset(&frame, 0, sizeof frame);
+ frame.base_kind = NATIVE_ADDR_BASE_FRAME;
+ frame.base.frame = map_slot(e, op->v.frame_slot, loc);
+ frame.base_type = op->type;
+ dst = loc_reg(op->type, cls, r);
+ e->target->load(e->target, dst, frame, mem_for_type(e->c, op->type));
+ addr.base_kind = NATIVE_ADDR_BASE_REG;
+ addr.cls = (u8)cls;
+ addr.base.reg = r;
+ return addr;
+ }
+ case OPT_OPK_GLOBAL:
+ addr.base_kind = NATIVE_ADDR_BASE_GLOBAL;
+ addr.base.global.sym = op->v.global.sym;
+ addr.base.global.addend = op->v.global.addend;
+ return addr;
+ case OPT_OPK_INDIRECT:
+ return addr_from_operand(e, op, loc);
+ case OPT_OPK_REG:
+ addr.base_kind = NATIVE_ADDR_BASE_REG;
+ addr.cls = op->cls;
+ addr.base.reg = op->v.reg;
+ return addr;
+ default:
+ emit_panic(e, loc, "operand is not a pointer address");
+ }
+}
+
+static Reg addr_base_reg(const NativeAddr* addr) {
+ return addr && addr->base_kind == NATIVE_ADDR_BASE_REG ? addr->base.reg
+ : REG_NONE;
+}
+
+static Reg addr_index_reg(const NativeAddr* addr) {
+ return addr && addr->index_kind == NATIVE_ADDR_INDEX_REG ? addr->index.reg
+ : REG_NONE;
+}
+
+static void collapse_addr_to_reg(NativeEmitCtx* e, NativeAddr* addr,
+ SrcLoc loc) {
+ Reg r = addr_base_reg(addr);
+ NativeLoc dst;
+ if (r == (Reg)REG_NONE)
+ r = scratch_reg(e, NATIVE_REG_INT, REG_NONE, REG_NONE, loc);
+ dst = loc_reg(addr->base_type, NATIVE_REG_INT, r);
+ e->target->load_addr(e->target, dst, *addr);
+ memset(addr, 0, sizeof *addr);
+ addr->base_kind = NATIVE_ADDR_BASE_REG;
+ addr->cls = NATIVE_REG_INT;
+ addr->base.reg = r;
+ addr->base_type = dst.type;
+}
+
+/* Collapse an address the target cannot encode for this access (e.g. an
+ * index scale aarch64 cannot fold into a load/store) into a single base
+ * register via load_addr. Mirrors NativeDirectTarget's nd_addr_materialize so
+ * the O1 emit path legalizes the same address shapes as direct -O0 emission. */
+static void legalize_addr(NativeEmitCtx* e, NativeAddr* addr, MemAccess mem,
+ SrcLoc loc) {
+ if (e->target->addr_legal && !e->target->addr_legal(e->target, addr, mem))
+ collapse_addr_to_reg(e, addr, loc);
+}
+
+static NativeLoc loc_from_operand(NativeEmitCtx* e, const OptOperand* op,
+ SrcLoc loc) {
+ if (!op) return loc_none();
+ switch ((OptOperandKind)op->kind) {
+ case OPT_OPK_REG:
+ return loc_reg(op->type, (NativeAllocClass)op->cls, op->v.reg);
+ case OPT_OPK_IMM:
+ return loc_imm(op->type, op->v.imm);
+ case OPT_OPK_GLOBAL:
+ return loc_global(op->type, op->v.global.sym, op->v.global.addend);
+ case OPT_OPK_LOCAL:
+ return loc_frame(op->type, class_for_type(e, op->type),
+ map_slot(e, op->v.frame_slot, loc));
+ case OPT_OPK_INDIRECT: {
+ NativeLoc out = loc_none();
+ out.kind = NATIVE_LOC_ADDR;
+ out.cls = op->cls;
+ out.type = op->type;
+ out.v.addr = addr_from_operand(e, op, loc);
+ return out;
+ }
+ }
+ emit_panic(e, loc, "bad operand kind");
+}
+
+static NativeLoc materialize(NativeEmitCtx* e, NativeLoc src,
+ NativeAllocClass cls, CfreeCgTypeId type,
+ Reg avoid_a, Reg avoid_b, SrcLoc loc) {
+ NativeLoc dst;
+ NativeAddr addr;
+ MemAccess mem;
+ if (src.kind == NATIVE_LOC_REG) return src;
+ dst = scratch_loc(e, type ? type : src.type, cls, avoid_a, avoid_b, loc);
+ switch ((NativeLocKind)src.kind) {
+ case NATIVE_LOC_IMM:
+ e->target->load_imm(e->target, dst, src.v.imm);
+ return dst;
+ case NATIVE_LOC_GLOBAL:
+ addr = addr_from_loc(e, src, loc);
+ e->target->load_addr(e->target, dst, addr);
+ return dst;
+ case NATIVE_LOC_FRAME:
+ case NATIVE_LOC_STACK:
+ case NATIVE_LOC_ADDR:
+ addr = addr_from_loc(e, src, loc);
+ mem = mem_for_type(e->c, dst.type);
+ e->target->load(e->target, dst, addr, mem);
+ return dst;
+ default:
+ emit_panic(e, loc, "cannot materialize location");
+ }
+}
+
+static void write_loc(NativeEmitCtx* e, NativeLoc dst, NativeLoc src,
+ MemAccess mem, SrcLoc loc) {
+ NativeAddr addr;
+ NativeLoc tmp;
+ if (dst.kind == NATIVE_LOC_NONE) return;
+ if (loc_same_frame(dst, src)) return;
+ if (dst.kind == NATIVE_LOC_REG) {
+ if (src.kind == NATIVE_LOC_REG) {
+ if (dst.v.reg != src.v.reg || dst.cls != src.cls)
+ e->target->move(e->target, dst, src);
+ return;
+ }
+ tmp = materialize(e, src, (NativeAllocClass)dst.cls, dst.type, dst.v.reg,
+ REG_NONE, loc);
+ if (tmp.v.reg != dst.v.reg || tmp.cls != dst.cls)
+ e->target->move(e->target, dst, tmp);
+ return;
+ }
+ addr = addr_from_loc(e, dst, loc);
+ if (src.kind != NATIVE_LOC_REG)
+ src = materialize(e, src, (NativeAllocClass)dst.cls, dst.type, REG_NONE,
+ REG_NONE, loc);
+ e->target->store(e->target, addr, src, mem);
+}
+
+static CGFuncDesc semantic_func_desc(NativeEmitCtx* e) {
+ OptCGFuncDesc* in = &e->f->desc;
+ CGFuncDesc out;
+ memset(&out, 0, sizeof out);
+ out.sym = in->sym;
+ out.text_section_id = in->text_section_id;
+ out.group_id = in->group_id;
+ out.fn_type = in->fn_type;
+ out.result_types = in->result_types;
+ out.nresults = in->nresults;
+ out.nparams = in->nparams;
+ out.loc = in->loc;
+ out.flags = in->flags;
+ out.inline_policy = in->inline_policy;
+ out.atomize = in->atomize;
+ if (in->nparams && in->params) {
+ CGParamDesc* params = arena_zarray(e->f->arena, CGParamDesc, in->nparams);
+ for (u32 i = 0; i < in->nparams; ++i) {
+ params[i].index = in->params[i].index;
+ params[i].name = in->params[i].name;
+ params[i].type = in->params[i].type;
+ params[i].size = in->params[i].size;
+ params[i].align = in->params[i].align;
+ params[i].flags = in->params[i].flags;
+ params[i].loc = in->params[i].loc;
+ }
+ out.params = params;
+ }
+ return out;
+}
+
+static CGParamDesc semantic_param_desc(const IRParam* p) {
+ CGParamDesc out;
+ memset(&out, 0, sizeof out);
+ out.index = p->index;
+ out.name = p->name;
+ out.type = p->type;
+ out.size = p->size;
+ out.align = p->align;
+ out.flags = p->flags;
+ out.loc = p->loc;
+ return out;
+}
+
+static NativeFrameSlot local_home_for_preg(Func* f, PReg preg) {
+ for (u32 i = 0; i < f->nlocals; ++i) {
+ IRLocal* l = &f->locals[i];
+ if (l->storage.kind == CG_LOCAL_STORAGE_REG &&
+ (PReg)l->storage.v.reg == preg && l->home_slot)
+ return l->home_slot;
+ }
+ return NATIVE_FRAME_SLOT_NONE;
+}
+
+static NativeFrameSlot allocate_param_home(NativeEmitCtx* e, const IRParam* p) {
+ NativeFrameSlot opt_home = NATIVE_FRAME_SLOT_NONE;
+ NativeFrameSlotDesc d;
+ if (p->storage.kind == CG_LOCAL_STORAGE_REG)
+ opt_home = local_home_for_preg(e->f, (PReg)p->storage.v.reg);
+ if (opt_home) return map_slot(e, opt_home, p->loc);
+ memset(&d, 0, sizeof d);
+ d.type = p->type;
+ d.name = p->name;
+ d.loc = p->loc;
+ d.size = p->size ? p->size : type_size_or(e->c, p->type, 8u);
+ d.align = p->align ? p->align : type_align_or(e->c, p->type, 8u);
+ d.kind = NATIVE_FRAME_SLOT_PARAM;
+ if (p->flags & CG_LOCAL_ADDR_TAKEN) d.flags |= NATIVE_FRAME_SLOT_ADDR_TAKEN;
+ if (p->flags & CG_LOCAL_MEMORY_REQUIRED)
+ d.flags |= NATIVE_FRAME_SLOT_MEMORY_REQUIRED;
+ return e->target->frame_slot(e->target, &d);
+}
+
+static NativeLoc loc_for_preg(NativeEmitCtx* e, PReg preg, CfreeCgTypeId type,
+ SrcLoc loc) {
+ u8 kind = opt_preg_alloc_kind(e->f, preg);
+ if (kind == OPT_ALLOC_HARD)
+ return loc_reg(type, (NativeAllocClass)opt_preg_loc_cls(e->f, preg),
+ opt_preg_hard_reg(e->f, preg));
+ if (kind == OPT_ALLOC_SPILL)
+ return loc_frame(type, class_for_type(e, type),
+ map_slot(e, opt_preg_spill_slot(e->f, preg), loc));
+ return loc_none();
+}
+
+static void bind_params(NativeEmitCtx* e) {
+ u32 nregs = opt_reg_count(e->f);
+ e->param_home_by_preg =
+ arena_zarray(e->f->arena, NativeFrameSlot, nregs ? nregs : 1u);
+ for (u32 i = 0; i < e->f->nparams; ++i) {
+ IRParam* p = &e->f->params[i];
+ CGParamDesc sd = semantic_param_desc(p);
+ NativeFrameSlot home = allocate_param_home(e, p);
+ if (p->storage.kind == CG_LOCAL_STORAGE_REG && p->storage.v.reg < nregs)
+ e->param_home_by_preg[p->storage.v.reg] = home;
+ if (p->storage.kind == CG_LOCAL_STORAGE_FRAME)
+ home = map_slot(e, p->storage.v.frame_slot, p->loc);
+ if (e->target->bind_param) e->target->bind_param(e->target, &sd, home);
+ }
+}
+
+static void emit_param_decl(NativeEmitCtx* e, Inst* in) {
+ IRParamDeclAux* aux = (IRParamDeclAux*)in->extra.aux;
+ NativeFrameSlot home;
+ NativeLoc src, dst;
+ MemAccess mem;
+ if (!aux || aux->desc.storage.kind != CG_LOCAL_STORAGE_REG) return;
+ PReg preg = (PReg)aux->desc.storage.v.reg;
+ if (!preg || preg >= opt_reg_count(e->f)) return;
+ home = e->param_home_by_preg ? e->param_home_by_preg[preg] : 0u;
+ if (!home) return;
+ src = loc_frame(aux->desc.type, class_for_type(e, aux->desc.type), home);
+ dst = loc_for_preg(e, preg, aux->desc.type, in->loc);
+ mem = mem_for_type(e->c, aux->desc.type);
+ write_loc(e, dst, src, mem, in->loc);
+}
+
+static NativeFrameSlot temp_slot(NativeEmitCtx* e, CfreeCgTypeId type,
+ SrcLoc loc, NativeFrameSlotKind kind) {
+ NativeFrameSlotDesc d;
+ memset(&d, 0, sizeof d);
+ d.type = type;
+ d.loc = loc;
+ d.size = type_size_or(e->c, type, 8u);
+ d.align = type_align_or(e->c, type, d.size >= 8u ? 8u : d.size);
+ d.kind = kind;
+ return e->target->frame_slot(e->target, &d);
+}
+
+static NativeLoc abi_storage_loc(NativeEmitCtx* e, const OptCGABIValue* v,
+ SrcLoc loc) {
+ if (!v) return loc_none();
+ return loc_from_operand(e, &v->storage, loc);
+}
+
+static void emit_call(NativeEmitCtx* e, Inst* in) {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ NativeCallDesc d;
+ NativeCallPlan plan;
+ NativeLoc* args = NULL;
+ NativeLoc* results = NULL;
+ NativeLoc final_result = loc_none();
+ NativeFrameSlot result_slot = NATIVE_FRAME_SLOT_NONE;
+ MemAccess result_mem;
+ if (!aux) return;
+ memset(&d, 0, sizeof d);
+ memset(&plan, 0, sizeof plan);
+ if (aux->desc.nargs)
+ args = arena_zarray(e->f->arena, NativeLoc, aux->desc.nargs);
+ for (u32 i = 0; i < aux->desc.nargs; ++i)
+ args[i] = abi_storage_loc(e, &aux->desc.args[i], in->loc);
+ if (aux->desc.ret.storage.kind) {
+ results = arena_zarray(e->f->arena, NativeLoc, 1);
+ final_result = abi_storage_loc(e, &aux->desc.ret, in->loc);
+ result_slot =
+ temp_slot(e, aux->desc.ret.type, in->loc, NATIVE_FRAME_SLOT_SPILL);
+ results[0] = loc_frame(aux->desc.ret.type,
+ class_for_type(e, aux->desc.ret.type), result_slot);
+ }
+ d.fn_type = aux->desc.fn_type;
+ d.callee = loc_from_operand(e, &aux->desc.callee, in->loc);
+ d.args = args;
+ d.results = results;
+ d.nargs = aux->desc.nargs;
+ d.nresults = results ? 1u : 0u;
+ d.flags = aux->desc.flags;
+ d.tail_policy = aux->desc.tail_policy;
+ d.inline_policy = aux->desc.inline_policy;
+ e->target->plan_call(e->target, &d, &plan);
+ if (plan.stack_arg_size > e->max_outgoing)
+ e->max_outgoing = plan.stack_arg_size;
+ for (u32 i = 0; i < plan.nargs; ++i)
+ write_loc(e, plan.args[i].dst, plan.args[i].src, plan.args[i].mem, in->loc);
+ if (plan.callee.kind != NATIVE_LOC_REG &&
+ plan.callee.kind != NATIVE_LOC_GLOBAL)
+ plan.callee = materialize(e, plan.callee, NATIVE_REG_INT, plan.callee.type,
+ REG_NONE, REG_NONE, in->loc);
+ e->target->emit_call(e->target, &plan);
+ for (u32 i = 0; i < plan.nrets; ++i)
+ write_loc(e, plan.rets[i].dst, plan.rets[i].src, plan.rets[i].mem, in->loc);
+ if (result_slot && final_result.kind != NATIVE_LOC_NONE) {
+ NativeLoc tmp = loc_frame(
+ aux->desc.ret.type, class_for_type(e, aux->desc.ret.type), result_slot);
+ result_mem = mem_for_type(e->c, aux->desc.ret.type);
+ write_loc(e, final_result, tmp, result_mem, in->loc);
+ }
+}
+
+static void emit_ret(NativeEmitCtx* e, Inst* in, const CGFuncDesc* fd) {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ NativeLoc value = loc_none();
+ NativeLoc* values = NULL;
+ NativeCallPlanRet* rets = NULL;
+ u32 nrets = 0;
+ if (aux && aux->present) {
+ NativeLoc final = abi_storage_loc(e, &aux->val, in->loc);
+ NativeFrameSlot slot =
+ temp_slot(e, aux->val.type, in->loc, NATIVE_FRAME_SLOT_SPILL);
+ NativeLoc frame =
+ loc_frame(aux->val.type, class_for_type(e, aux->val.type), slot);
+ write_loc(e, frame, final, mem_for_type(e->c, aux->val.type), in->loc);
+ value = frame;
+ values = &value;
+ }
+ e->target->plan_ret(e->target, fd, values, values ? 1u : 0u, &rets, &nrets);
+ for (u32 i = 0; i < nrets; ++i)
+ write_loc(e, rets[i].dst, rets[i].src, rets[i].mem, in->loc);
+ e->target->ret(e->target);
+}
+
+static void emit_inst(NativeEmitCtx* e, u32 block, u32 order_index, Inst* in,
+ const CGFuncDesc* fd) {
+ NativeLoc dst, a, b, src, tmp;
+ NativeAddr addr, addr2;
+ Reg dst_reg;
+ (void)block;
+ if (e->target->set_loc) e->target->set_loc(e->target, in->loc);
+ switch ((IROp)in->op) {
+ case IR_NOP:
+ case IR_CONST_I:
+ case IR_CONST_BYTES:
+ case IR_PHI:
+ case IR_SCOPE_BEGIN:
+ case IR_SCOPE_ELSE:
+ case IR_SCOPE_END:
+ return;
+ case IR_PARAM_DECL:
+ emit_param_decl(e, in);
+ return;
+ case IR_LOAD_IMM:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ write_loc(e, dst, loc_imm(in->opnds[0].type, in->extra.imm),
+ mem_for_type(e->c, in->opnds[0].type), in->loc);
+ return;
+ case IR_LOAD_CONST:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = materialize(e, dst, class_for_type(e, in->opnds[0].type),
+ in->opnds[0].type, REG_NONE, REG_NONE, in->loc);
+ e->target->load_const(e->target, dst, in->extra.cbytes);
+ return;
+ case IR_COPY:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ src = loc_from_operand(e, &in->opnds[1], in->loc);
+ write_loc(e, dst, src, mem_for_type(e->c, in->opnds[0].type), in->loc);
+ return;
+ case IR_LOAD:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ addr = addr_from_operand(e, &in->opnds[1], in->loc);
+ legalize_addr(e, &addr, in->extra.mem, in->loc);
+ if (dst.kind == NATIVE_LOC_REG) {
+ e->target->load(e->target, dst, addr, in->extra.mem);
+ } else {
+ if (!scratch_available(e, class_for_type(e, in->opnds[0].type),
+ addr_base_reg(&addr), addr_index_reg(&addr)))
+ collapse_addr_to_reg(e, &addr, in->loc);
+ tmp = scratch_loc(e, in->opnds[0].type,
+ class_for_type(e, in->opnds[0].type),
+ addr_base_reg(&addr), addr_index_reg(&addr), in->loc);
+ e->target->load(e->target, tmp, addr, in->extra.mem);
+ write_loc(e, dst, tmp, in->extra.mem, in->loc);
+ }
+ return;
+ case IR_STORE:
+ addr = addr_from_operand(e, &in->opnds[0], in->loc);
+ legalize_addr(e, &addr, in->extra.mem, in->loc);
+ src = loc_from_operand(e, &in->opnds[1], in->loc);
+ if (src.kind == NATIVE_LOC_REG && (src.v.reg == addr_base_reg(&addr) ||
+ src.v.reg == addr_index_reg(&addr))) {
+ NativeFrameSlot slot =
+ temp_slot(e, in->opnds[1].type, in->loc, NATIVE_FRAME_SLOT_SPILL);
+ NativeLoc frame = loc_frame(in->opnds[1].type,
+ class_for_type(e, in->opnds[1].type), slot);
+ write_loc(e, frame, src, mem_for_type(e->c, in->opnds[1].type),
+ in->loc);
+ collapse_addr_to_reg(e, &addr, in->loc);
+ src = materialize(e, frame, class_for_type(e, in->opnds[1].type),
+ in->opnds[1].type, addr_base_reg(&addr), REG_NONE,
+ in->loc);
+ }
+ if (src.kind != NATIVE_LOC_REG) {
+ if (!scratch_available(e, class_for_type(e, in->opnds[1].type),
+ addr_base_reg(&addr), addr_index_reg(&addr)))
+ collapse_addr_to_reg(e, &addr, in->loc);
+ src = materialize(e, src, class_for_type(e, in->opnds[1].type),
+ in->opnds[1].type, addr_base_reg(&addr),
+ addr_index_reg(&addr), in->loc);
+ }
+ e->target->store(e->target, addr, src, in->extra.mem);
+ return;
+ case IR_ADDR_OF:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ addr = addr_from_operand(e, &in->opnds[1], in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = materialize(e, dst, class_for_type(e, in->opnds[0].type),
+ in->opnds[0].type, REG_NONE, REG_NONE, in->loc);
+ e->target->load_addr(e->target, dst, addr);
+ return;
+ case IR_TLS_ADDR_OF: {
+ IRTlsAux* aux = (IRTlsAux*)in->extra.aux;
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = materialize(e, dst, NATIVE_REG_INT, in->opnds[0].type, REG_NONE,
+ REG_NONE, in->loc);
+ e->target->tls_addr_of(e->target, dst, aux->sym, aux->addend);
+ return;
+ }
+ case IR_AGG_COPY: {
+ IRAggAux* aux = (IRAggAux*)in->extra.aux;
+ addr = pointer_addr_from_operand(e, &in->opnds[0], in->loc, REG_NONE,
+ REG_NONE);
+ addr2 = pointer_addr_from_operand(
+ e, &in->opnds[1], in->loc,
+ addr.base_kind == NATIVE_ADDR_BASE_REG ? addr.base.reg : REG_NONE,
+ REG_NONE);
+ e->target->copy_bytes(e->target, addr, addr2, aux->access);
+ return;
+ }
+ case IR_AGG_SET: {
+ IRAggAux* aux = (IRAggAux*)in->extra.aux;
+ addr = pointer_addr_from_operand(e, &in->opnds[0], in->loc, REG_NONE,
+ REG_NONE);
+ src = loc_from_operand(e, &in->opnds[1], in->loc);
+ if (src.kind != NATIVE_LOC_REG) {
+ if (!scratch_available(e, NATIVE_REG_INT, addr_base_reg(&addr),
+ addr_index_reg(&addr)))
+ collapse_addr_to_reg(e, &addr, in->loc);
+ src = materialize(e, src, NATIVE_REG_INT, in->opnds[1].type,
+ addr_base_reg(&addr), addr_index_reg(&addr), in->loc);
+ }
+ e->target->set_bytes(e->target, addr, src, aux->access);
+ return;
+ }
+ case IR_BITFIELD_LOAD: {
+ IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux;
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ addr = addr_from_operand(e, &in->opnds[1], in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = materialize(e, dst, class_for_type(e, in->opnds[0].type),
+ in->opnds[0].type, REG_NONE, REG_NONE, in->loc);
+ e->target->bitfield_load(e->target, dst, addr, aux->access);
+ return;
+ }
+ case IR_BITFIELD_STORE: {
+ IRBitFieldAux* aux = (IRBitFieldAux*)in->extra.aux;
+ addr = addr_from_operand(e, &in->opnds[0], in->loc);
+ src = loc_from_operand(e, &in->opnds[1], in->loc);
+ if (src.kind != NATIVE_LOC_REG)
+ src = materialize(e, src, class_for_type(e, in->opnds[1].type),
+ in->opnds[1].type, REG_NONE, REG_NONE, in->loc);
+ e->target->bitfield_store(e->target, addr, src, aux->access);
+ return;
+ }
+ case IR_BINOP:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ dst_reg = dst.kind == NATIVE_LOC_REG ? dst.v.reg : REG_NONE;
+ a = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc),
+ class_for_type(e, in->opnds[1].type), in->opnds[1].type,
+ dst_reg, REG_NONE, in->loc);
+ b = materialize(e, loc_from_operand(e, &in->opnds[2], in->loc),
+ class_for_type(e, in->opnds[2].type), in->opnds[2].type,
+ a.v.reg, dst_reg, in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = scratch_loc(e, in->opnds[0].type,
+ class_for_type(e, in->opnds[0].type), a.v.reg,
+ b.v.reg, in->loc);
+ e->target->binop(e->target, (BinOp)in->extra.imm, dst, a, b);
+ if (in->opnds[0].kind != OPK_REG)
+ write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst,
+ mem_for_type(e->c, in->opnds[0].type), in->loc);
+ return;
+ case IR_UNOP:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ dst_reg = dst.kind == NATIVE_LOC_REG ? dst.v.reg : REG_NONE;
+ a = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc),
+ class_for_type(e, in->opnds[1].type), in->opnds[1].type,
+ dst_reg, REG_NONE, in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = scratch_loc(e, in->opnds[0].type,
+ class_for_type(e, in->opnds[0].type), a.v.reg,
+ REG_NONE, in->loc);
+ e->target->unop(e->target, (UnOp)in->extra.imm, dst, a);
+ if (in->opnds[0].kind != OPK_REG)
+ write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst,
+ mem_for_type(e->c, in->opnds[0].type), in->loc);
+ return;
+ case IR_CMP:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ dst_reg = dst.kind == NATIVE_LOC_REG ? dst.v.reg : REG_NONE;
+ a = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc),
+ class_for_type(e, in->opnds[1].type), in->opnds[1].type,
+ dst_reg, REG_NONE, in->loc);
+ b = materialize(e, loc_from_operand(e, &in->opnds[2], in->loc),
+ class_for_type(e, in->opnds[2].type), in->opnds[2].type,
+ a.v.reg, dst_reg, in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = scratch_loc(e, in->opnds[0].type,
+ class_for_type(e, in->opnds[0].type), a.v.reg,
+ b.v.reg, in->loc);
+ e->target->cmp(e->target, (CmpOp)in->extra.imm, dst, a, b);
+ if (in->opnds[0].kind != OPK_REG)
+ write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst,
+ mem_for_type(e->c, in->opnds[0].type), in->loc);
+ return;
+ case IR_CONVERT:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ dst_reg = dst.kind == NATIVE_LOC_REG ? dst.v.reg : REG_NONE;
+ src = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc),
+ class_for_type(e, in->opnds[1].type), in->opnds[1].type,
+ dst_reg, REG_NONE, in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = scratch_loc(e, in->opnds[0].type,
+ class_for_type(e, in->opnds[0].type), src.v.reg,
+ REG_NONE, in->loc);
+ e->target->convert(e->target, (ConvKind)in->extra.imm, dst, src);
+ if (in->opnds[0].kind != OPK_REG)
+ write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst,
+ mem_for_type(e->c, in->opnds[0].type), in->loc);
+ return;
+ case IR_CALL:
+ emit_call(e, in);
+ return;
+ case IR_BR:
+ e->target->jump(e->target,
+ ensure_label(e, e->f->blocks[block].succ[0], in->loc));
+ return;
+ case IR_CMP_BRANCH: {
+ u32 next = order_index + 1u < e->f->emit_order_n
+ ? e->f->emit_order[order_index + 1u]
+ : UINT32_MAX;
+ a = materialize(e, loc_from_operand(e, &in->opnds[0], in->loc),
+ class_for_type(e, in->opnds[0].type), in->opnds[0].type,
+ REG_NONE, REG_NONE, in->loc);
+ b = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc),
+ class_for_type(e, in->opnds[1].type), in->opnds[1].type,
+ a.v.reg, REG_NONE, in->loc);
+ e->target->cmp_branch(
+ e->target, (CmpOp)in->extra.imm, a, b,
+ ensure_label(e, e->f->blocks[block].succ[0], in->loc));
+ if (e->f->blocks[block].nsucc > 1u && e->f->blocks[block].succ[1] != next)
+ e->target->jump(e->target,
+ ensure_label(e, e->f->blocks[block].succ[1], in->loc));
+ return;
+ }
+ case IR_SWITCH: {
+ IRSwitchAux* aux = (IRSwitchAux*)in->extra.aux;
+ NativeLoc sel =
+ materialize(e, loc_from_operand(e, &in->opnds[0], in->loc),
+ class_for_type(e, in->opnds[0].type), in->opnds[0].type,
+ REG_NONE, REG_NONE, in->loc);
+ NativeLoc imm =
+ scratch_loc(e, in->opnds[0].type, (NativeAllocClass)sel.cls,
+ sel.v.reg, REG_NONE, in->loc);
+ for (u32 i = 0; aux && i < aux->ncases; ++i) {
+ e->target->load_imm(e->target, imm, (i64)aux->cases[i].value);
+ e->target->cmp_branch(e->target, CMP_EQ, sel, imm,
+ ensure_label(e, aux->cases[i].block, in->loc));
+ }
+ if (aux)
+ e->target->jump(e->target,
+ ensure_label(e, aux->default_block, in->loc));
+ return;
+ }
+ case IR_INDIRECT_BRANCH: {
+ IRIndirectAux* aux = (IRIndirectAux*)in->extra.aux;
+ MCLabel* labels = aux && aux->ntargets
+ ? arena_array(e->f->arena, MCLabel, aux->ntargets)
+ : NULL;
+ for (u32 i = 0; aux && i < aux->ntargets; ++i)
+ labels[i] = ensure_label(e, aux->targets[i], in->loc);
+ src = materialize(e, loc_from_operand(e, &in->opnds[0], in->loc),
+ NATIVE_REG_INT, in->opnds[0].type, REG_NONE, REG_NONE,
+ in->loc);
+ e->target->indirect_branch(e->target, src, labels,
+ aux ? aux->ntargets : 0u);
+ return;
+ }
+ case IR_LOAD_LABEL_ADDR:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = materialize(e, dst, NATIVE_REG_INT, in->opnds[0].type, REG_NONE,
+ REG_NONE, in->loc);
+ e->target->load_label_addr(e->target, dst,
+ ensure_label(e, (u32)in->extra.imm, in->loc));
+ return;
+ case IR_LOCAL_STATIC_DATA_BEGIN: {
+ CgIrLocalStaticBeginAux* aux = (CgIrLocalStaticBeginAux*)in->extra.aux;
+ emit_local_static_begin(e, aux ? &aux->desc : NULL, in->loc);
+ return;
+ }
+ case IR_LOCAL_STATIC_DATA_WRITE: {
+ CgIrLocalStaticWriteAux* aux = (CgIrLocalStaticWriteAux*)in->extra.aux;
+ if (!aux) emit_panic(e, in->loc, "missing local static data write");
+ emit_local_static_write(e, aux->has_data ? aux->data : NULL, aux->len,
+ in->loc);
+ return;
+ }
+ case IR_LOCAL_STATIC_DATA_LABEL_ADDR: {
+ CgIrLocalStaticLabelAux* aux = (CgIrLocalStaticLabelAux*)in->extra.aux;
+ if (!aux) emit_panic(e, in->loc, "missing local static label data");
+ (void)aux->address_space;
+ emit_local_static_label_addr(e,
+ ensure_label(e, (u32)aux->target, in->loc),
+ aux->addend, aux->width, in->loc);
+ return;
+ }
+ case IR_LOCAL_STATIC_DATA_END:
+ emit_local_static_end(e, in->loc);
+ return;
+ case IR_RET:
+ emit_ret(e, in, fd);
+ return;
+ case IR_ALLOCA:
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ src = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc),
+ NATIVE_REG_INT, in->opnds[1].type, REG_NONE, REG_NONE,
+ in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = scratch_loc(e, in->opnds[0].type, NATIVE_REG_INT, src.v.reg,
+ REG_NONE, in->loc);
+ e->target->alloca_(e->target, dst, src, (u32)in->extra.imm);
+ return;
+ case IR_ATOMIC_LOAD: {
+ IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux;
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ addr = pointer_addr_from_operand(e, &in->opnds[1], in->loc, REG_NONE,
+ REG_NONE);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = scratch_loc(e, in->opnds[0].type,
+ class_for_type(e, in->opnds[0].type), REG_NONE,
+ REG_NONE, in->loc);
+ e->target->atomic_load(e->target, dst, addr, aux->mem, aux->mo);
+ if (in->opnds[0].kind != OPK_REG)
+ write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, aux->mem,
+ in->loc);
+ return;
+ }
+ case IR_ATOMIC_STORE: {
+ IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux;
+ addr = pointer_addr_from_operand(e, &in->opnds[0], in->loc, REG_NONE,
+ REG_NONE);
+ src = materialize(e, loc_from_operand(e, &in->opnds[1], in->loc),
+ class_for_type(e, in->opnds[1].type), in->opnds[1].type,
+ REG_NONE, REG_NONE, in->loc);
+ e->target->atomic_store(e->target, addr, src, aux->mem, aux->mo);
+ return;
+ }
+ case IR_ATOMIC_RMW: {
+ IRAtomicAux* aux = (IRAtomicAux*)in->extra.aux;
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ addr = pointer_addr_from_operand(e, &in->opnds[1], in->loc, REG_NONE,
+ REG_NONE);
+ src = materialize(e, loc_from_operand(e, &in->opnds[2], in->loc),
+ class_for_type(e, in->opnds[2].type), in->opnds[2].type,
+ REG_NONE, REG_NONE, in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = scratch_loc(e, in->opnds[0].type,
+ class_for_type(e, in->opnds[0].type), src.v.reg,
+ REG_NONE, in->loc);
+ e->target->atomic_rmw(e->target, (AtomicOp)aux->op, dst, addr, src,
+ aux->mem, aux->mo);
+ if (in->opnds[0].kind != OPK_REG)
+ write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, aux->mem,
+ in->loc);
+ return;
+ }
+ case IR_ATOMIC_CAS: {
+ IRCasAux* aux = (IRCasAux*)in->extra.aux;
+ NativeLoc ok;
+ NativeLoc expected;
+ NativeLoc desired;
+ dst = loc_from_operand(e, &in->opnds[0], in->loc);
+ ok = loc_from_operand(e, &in->opnds[1], in->loc);
+ addr = pointer_addr_from_operand(e, &in->opnds[2], in->loc, REG_NONE,
+ REG_NONE);
+ expected = materialize(e, loc_from_operand(e, &in->opnds[3], in->loc),
+ class_for_type(e, in->opnds[3].type),
+ in->opnds[3].type, REG_NONE, REG_NONE, in->loc);
+ desired =
+ materialize(e, loc_from_operand(e, &in->opnds[4], in->loc),
+ class_for_type(e, in->opnds[4].type), in->opnds[4].type,
+ expected.v.reg, REG_NONE, in->loc);
+ if (dst.kind != NATIVE_LOC_REG)
+ dst = scratch_loc(e, in->opnds[0].type,
+ class_for_type(e, in->opnds[0].type), expected.v.reg,
+ desired.v.reg, in->loc);
+ if (ok.kind != NATIVE_LOC_REG)
+ ok = scratch_loc(e, in->opnds[1].type,
+ class_for_type(e, in->opnds[1].type), dst.v.reg,
+ expected.v.reg, in->loc);
+ e->target->atomic_cas(e->target, dst, ok, addr, expected, desired,
+ aux->mem, aux->success, aux->failure);
+ if (in->opnds[0].kind != OPK_REG)
+ write_loc(e, loc_from_operand(e, &in->opnds[0], in->loc), dst, aux->mem,
+ in->loc);
+ if (in->opnds[1].kind != OPK_REG)
+ write_loc(e, loc_from_operand(e, &in->opnds[1], in->loc), ok,
+ mem_for_type(e->c, in->opnds[1].type), in->loc);
+ return;
+ }
+ case IR_VA_START:
+ case IR_VA_ARG:
+ case IR_VA_END:
+ case IR_VA_COPY:
+ case IR_BREAK_TO:
+ case IR_CONTINUE_TO:
+ case IR_ASM_BLOCK:
+ emit_panic(e, in->loc, "operation is not wired to NativeTarget yet");
+ case IR_FENCE:
+ e->target->fence(e->target, (MemOrder)in->extra.imm);
+ return;
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ NativeLoc* dsts = aux && aux->ndst
+ ? arena_array(e->f->arena, NativeLoc, aux->ndst)
+ : NULL;
+ NativeLoc* args = aux && aux->narg
+ ? arena_array(e->f->arena, NativeLoc, aux->narg)
+ : NULL;
+ for (u32 i = 0; aux && i < aux->ndst; ++i)
+ dsts[i] = loc_from_operand(e, &aux->dsts[i], in->loc);
+ for (u32 i = 0; aux && i < aux->narg; ++i) {
+ if (aux->args[i].kind == OPK_IMM) {
+ args[i] = loc_from_operand(e, &aux->args[i], in->loc);
+ } else {
+ args[i] = materialize(e, loc_from_operand(e, &aux->args[i], in->loc),
+ class_for_type(e, aux->args[i].type),
+ aux->args[i].type, REG_NONE, REG_NONE, in->loc);
+ }
+ }
+ e->target->intrinsic(e->target, aux->kind, dsts, aux->ndst, args,
+ aux->narg);
+ return;
+ }
+ default:
+ emit_panic(e, in->loc, "unknown IR op");
+ }
+}
+
+static int native_emit_terminates(const Inst* in) {
+ if (!in) return 0;
+ switch ((IROp)in->op) {
+ case IR_BR:
+ case IR_CONDBR:
+ case IR_CMP_BRANCH:
+ case IR_SWITCH:
+ case IR_INDIRECT_BRANCH:
+ case IR_RET:
+ case IR_BREAK_TO:
+ case IR_CONTINUE_TO:
+ return 1;
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP ||
+ aux->kind == INTRIN_UNREACHABLE);
+ }
+ default:
+ return 0;
+ }
+}
+
+static void emit_block(NativeEmitCtx* e, u32 block, u32 order_index,
+ const CGFuncDesc* fd) {
+ if (block >= e->f->nblocks) return;
+ if (!e->label_placed[block]) {
+ e->label_placed[block] = 1u;
+ if (block != e->f->entry)
+ e->target->label_place(e->target,
+ ensure_label(e, block, (SrcLoc){0, 0, 0}));
+ }
+ Block* bl = &e->f->blocks[block];
+ for (u32 i = 0; i < bl->ninsts; ++i)
+ emit_inst(e, block, order_index, &bl->insts[i], fd);
+ if (bl->nsucc == 1u &&
+ (bl->ninsts == 0 ||
+ !native_emit_terminates(&bl->insts[bl->ninsts - 1u]))) {
+ u32 next = order_index + 1u < e->f->emit_order_n
+ ? e->f->emit_order[order_index + 1u]
+ : UINT32_MAX;
+ if (bl->succ[0] != next)
+ e->target->jump(e->target,
+ ensure_label(e, bl->succ[0], (SrcLoc){0, 0, 0}));
+ }
+}
+
+static void map_frame_slots(NativeEmitCtx* e) {
+ e->slot_map =
+ arena_zarray(e->f->arena, NativeFrameSlot, e->f->nframe_slots + 1u);
+ for (u32 i = 0; i < e->f->nframe_slots; ++i) {
+ IRFrameSlot* s = &e->f->frame_slots[i];
+ NativeFrameSlotDesc d;
+ memset(&d, 0, sizeof d);
+ d.type = s->type;
+ d.name = s->name;
+ d.loc = s->loc;
+ d.size = s->size;
+ d.align = s->align;
+ d.kind = s->kind;
+ d.flags = s->flags;
+ e->slot_map[s->id] = e->target->frame_slot(e->target, &d);
+ }
+}
+
+void opt_emit_native(Compiler* c, Func* f, NativeTarget* target) {
+ NativeEmitCtx e;
+ Func view;
+ CGFuncDesc fd;
+ NativeFramePatchState state;
+ if (!f || !target) return;
+ memset(&e, 0, sizeof e);
+ if (f->mir) {
+ view = *f;
+ view.blocks = f->mir->blocks;
+ view.nblocks = f->mir->nblocks;
+ view.entry = f->mir->entry;
+ view.emit_order = f->mir->emit_order;
+ view.emit_order_n = f->mir->emit_order_n;
+ view.emit_order_cap = f->mir->emit_order_cap;
+ view.opt_rewritten = 1;
+ view.mir = NULL;
+ e.f = &view;
+ } else {
+ e.f = f;
+ }
+ e.c = c;
+ e.target = target;
+ metrics_scope_begin(c, "opt.native_emit.setup");
+ e.labels = arena_array(e.f->arena, MCLabel, e.f->nblocks ? e.f->nblocks : 1u);
+ e.label_placed =
+ arena_zarray(e.f->arena, u8, e.f->nblocks ? e.f->nblocks : 1u);
+ for (u32 i = 0; i < e.f->nblocks; ++i) e.labels[i] = MC_LABEL_NONE;
+ fd = semantic_func_desc(&e);
+ metrics_scope_end(c, "opt.native_emit.setup");
+
+ metrics_scope_begin(c, "opt.native_emit.func_begin");
+ target->func_begin(target, &fd);
+ map_frame_slots(&e);
+ bind_params(&e);
+ metrics_scope_end(c, "opt.native_emit.func_begin");
+
+ metrics_scope_begin(c, "opt.native_emit.body");
+ for (u32 i = 0; i < e.f->emit_order_n; ++i)
+ emit_block(&e, e.f->emit_order[i], i, &fd);
+ metrics_scope_end(c, "opt.native_emit.body");
+
+ memset(&state, 0, sizeof state);
+ state.max_outgoing = e.max_outgoing;
+ if (target->note_frame_state) target->note_frame_state(target, &state);
+ if (target->patch_apply) target->patch_apply(target);
+ metrics_scope_begin(c, "opt.native_emit.func_end");
+ target->func_end(target);
+ metrics_scope_end(c, "opt.native_emit.func_end");
+}
diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c
@@ -619,741 +619,6 @@ void opt_addr_xform(Func* f) {
opt_rebuild_def_use(f);
}
-/* PReg-namespace variant of opt_addr_xform for the O1 pipeline (no SSA, no
- * Val-keyed def-use chains). Scans the whole function once per candidate
- * IR_ADDR_OF def to classify uses of its PReg result.
- *
- * Use classifications (see addr_xform_pregs_classify_use):
- *
- * OPF_ESCAPE The use is something other than a non-observable
- * IR_LOAD/IR_STORE base operand. The IR_ADDR_OF cannot
- * be folded; the local's address truly escapes.
- * OPF_FOLD_LOCAL Zero-EA use: `OPK_INDIRECT(base=p, ofs=0, index=NONE)`
- * in load/store base position. Foldable to OPK_LOCAL.
- * OPF_FOLD_EA EA-shaped use: same load/store base position, but with
- * nonzero `ofs` or `index != REG_NONE`. The EA must stay
- * on the load/store (the operand layout for OPK_LOCAL
- * cannot carry the EA today), so the operand is left
- * alone and the IR_ADDR_OF def must stay alive to feed
- * the OPK_INDIRECT base. The use is still recognized as
- * "non-escape" for downstream analysis (e.g. scalar
- * promotion's non-escape check).
- *
- * After classification: if any use is OPF_ESCAPE, no rewrite happens. If
- * every use is OPF_FOLD_LOCAL, fold all uses to OPK_LOCAL and NOP the
- * IR_ADDR_OF. If a mix of OPF_FOLD_LOCAL and OPF_FOLD_EA, fold the
- * zero-EA uses but keep the IR_ADDR_OF alive for the EA-shaped uses. */
-
-typedef enum AddrXformUseClass {
- OPF_ESCAPE = 0,
- OPF_FOLD_LOCAL = 1,
- OPF_FOLD_EA = 2,
-} AddrXformUseClass;
-
-static int addr_xform_pregs_main_op_position_ok(Inst* in, u32 op_idx) {
- if ((IROp)in->op != IR_LOAD && (IROp)in->op != IR_STORE) return 0;
- if (opt_mem_observable(&in->extra.mem)) return 0;
- if ((IROp)in->op == IR_LOAD && op_idx != 1u) return 0;
- if ((IROp)in->op == IR_STORE && op_idx != 0u) return 0;
- return 1;
-}
-
-static AddrXformUseClass addr_xform_pregs_classify_use(Inst* in, Operand* op,
- u32 op_idx) {
- if (op->kind != OPK_INDIRECT) return OPF_ESCAPE;
- if (!addr_xform_pregs_main_op_position_ok(in, op_idx)) return OPF_ESCAPE;
- if (op->v.ind.ofs == 0 && op->v.ind.index == (Reg)REG_NONE)
- return OPF_FOLD_LOCAL;
- return OPF_FOLD_EA;
-}
-
-static int addr_xform_pregs_op_uses(const Operand* op, PReg p) {
- if (!op) return 0;
- if (op->kind == OPK_REG && (PReg)op->v.reg == p) return 1;
- if (op->kind == OPK_INDIRECT) {
- if ((PReg)op->v.ind.base == p) return 1;
- if (op->v.ind.index != (Reg)REG_NONE && (PReg)op->v.ind.index == p)
- return 1;
- }
- return 0;
-}
-
-static int addr_xform_pregs_abivalue_uses(const CGABIValue* v, PReg p) {
- if (!v) return 0;
- if (addr_xform_pregs_op_uses(&v->storage, p)) return 1;
- for (u32 i = 0; i < v->nparts; ++i)
- if (addr_xform_pregs_op_uses((const Operand*)&v->parts[i].op, p)) return 1;
- return 0;
-}
-
-static int addr_xform_pregs_aux_uses(Inst* in, PReg p) {
- switch ((IROp)in->op) {
- case IR_CALL: {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) return 0;
- if (aux->use_plan_replay) {
- if (addr_xform_pregs_op_uses(&aux->plan.callee, p)) return 1;
- for (u32 i = 0; i < aux->plan.nargs; ++i)
- if (addr_xform_pregs_op_uses(&aux->plan.args[i].src, p)) return 1;
- for (u32 i = 0; i < aux->plan.nrets; ++i)
- if (addr_xform_pregs_op_uses(&aux->plan.rets[i].dst, p)) return 1;
- } else {
- if (addr_xform_pregs_op_uses(&aux->desc.callee, p)) return 1;
- for (u32 i = 0; i < aux->desc.nargs; ++i)
- if (addr_xform_pregs_abivalue_uses(
- (const CGABIValue*)&aux->desc.args[i], p))
- return 1;
- if (addr_xform_pregs_abivalue_uses(&aux->desc.ret, p)) return 1;
- }
- return 0;
- }
- case IR_RET: {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- if (!aux || !aux->present) return 0;
- return addr_xform_pregs_abivalue_uses(&aux->val, p);
- }
- case IR_SCOPE_BEGIN: {
- IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
- if (!aux) return 0;
- return addr_xform_pregs_op_uses(&aux->desc.cond, p);
- }
- case IR_ASM_BLOCK: {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) return 0;
- for (u32 i = 0; i < aux->nin; ++i)
- if (addr_xform_pregs_op_uses(&aux->in_ops[i], p)) return 1;
- for (u32 i = 0; i < aux->nout; ++i)
- if (addr_xform_pregs_op_uses(&aux->out_ops[i], p)) return 1;
- return 0;
- }
- case IR_INTRINSIC: {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- if (!aux) return 0;
- for (u32 i = 0; i < aux->narg; ++i)
- if (addr_xform_pregs_op_uses(&aux->args[i], p)) return 1;
- for (u32 i = 0; i < aux->ndst; ++i)
- if (addr_xform_pregs_op_uses(&aux->dsts[i], p)) return 1;
- return 0;
- }
- default:
- return 0;
- }
-}
-
-/* Returns nonzero if every use of `p` is foldable (OPF_FOLD_LOCAL or
- * OPF_FOLD_EA) and at least one use exists. *out_has_ea is set to 1 if any
- * use was OPF_FOLD_EA; in that case the rewrite must keep the IR_ADDR_OF
- * alive (the EA-shaped use still names p as the OPK_INDIRECT base). */
-static int addr_xform_pregs_classify(Func* f, PReg p, Inst* def_inst,
- int* out_has_ea) {
- int has_foldable_use = 0;
- int has_ea = 0;
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if (in == def_inst) continue;
- for (u32 o = 0; o < in->nopnds; ++o) {
- Operand* op = &in->opnds[o];
- if (!addr_xform_pregs_op_uses(op, p)) continue;
- AddrXformUseClass uc = addr_xform_pregs_classify_use(in, op, o);
- if (uc == OPF_ESCAPE) return 0;
- has_foldable_use = 1;
- if (uc == OPF_FOLD_EA) has_ea = 1;
- }
- if (addr_xform_pregs_aux_uses(in, p)) return 0;
- }
- }
- if (out_has_ea) *out_has_ea = has_ea;
- return has_foldable_use;
-}
-
-void opt_addr_xform_pregs(Func* f) {
- if (!f || f->opt_reg_ssa || f->opt_rewritten) return;
- int changed = 0;
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op != IR_ADDR_OF) continue;
- if (in->nopnds < 2) continue;
- if (in->opnds[0].kind != OPK_REG) continue;
- if (in->opnds[1].kind != OPK_LOCAL) continue;
- PReg p = (PReg)in->opnds[0].v.reg;
- if (!opt_reg_valid(f, p)) continue;
- int has_ea = 0;
- if (!addr_xform_pregs_classify(f, p, in, &has_ea)) continue;
- Operand local = in->opnds[1];
- /* Fold every zero-EA use of p to OPK_LOCAL. EA-shaped uses are left
- * as OPK_INDIRECT(base=p, ofs, index, log2_scale) so the EA stays on
- * the load/store; the IR_ADDR_OF def must survive to feed them. */
- for (u32 bb = 0; bb < f->nblocks; ++bb) {
- Block* rb = &f->blocks[bb];
- for (u32 ii = 0; ii < rb->ninsts; ++ii) {
- Inst* use = &rb->insts[ii];
- if (use == in) continue;
- for (u32 o = 0; o < use->nopnds; ++o) {
- Operand* op = &use->opnds[o];
- if (op->kind != OPK_INDIRECT) continue;
- if ((PReg)op->v.ind.base != p) continue;
- if (op->v.ind.ofs != 0 || op->v.ind.index != (Reg)REG_NONE)
- continue; /* EA-shaped; leave alone */
- Operand folded = local;
- folded.type =
- use->extra.mem.type ? use->extra.mem.type : local.type;
- *op = folded;
- }
- }
- }
- if (!has_ea) addr_inst_remove(in);
- changed = 1;
- }
- }
- /* After folding, walk all frame slots and clear FSF_ADDR_TAKEN on any
- * slot whose surviving IR_ADDR_OF defs (if any) have all been retired.
- * The frontend-set ADDR_TAKEN flag is conservative; if we proved the
- * address no longer escapes, downstream passes (opt_promote_scalar_locals)
- * can take advantage of the actual non-escape state. */
- if (changed) {
- u8* still_taken =
- arena_zarray(f->arena, u8, f->nframe_slots ? f->nframe_slots : 1u);
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op != IR_ADDR_OF) continue;
- if (in->nopnds < 2 || in->opnds[1].kind != OPK_LOCAL) continue;
- FrameSlot slot = in->opnds[1].v.frame_slot;
- if (slot && slot <= f->nframe_slots) still_taken[slot - 1u] = 1;
- }
- }
- for (u32 s = 0; s < f->nframe_slots; ++s) {
- if (!still_taken[s]) f->frame_slots[s].flags &= (u16)~FSF_ADDR_TAKEN;
- }
- }
- if (changed)
- opt_analysis_invalidate(
- f, OPT_ANALYSIS_DEF_USE | OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
-}
-
-/* Scalar local promotion for the O1 pipeline. Runs after
- * `opt_addr_xform_pregs` has folded zero-EA `OPK_INDIRECT(p)` uses to
- * `OPK_LOCAL(slot)` and retired non-escaping `IR_ADDR_OF` defs. For each
- * frame slot that is now only referenced as the base of matching-type,
- * non-observable `IR_LOAD`/`IR_STORE`, the slot is replaced by a fresh
- * mutable PReg: each store becomes `IR_COPY P_slot, src` (or `IR_LOAD_IMM`
- * for an immediate source), each load becomes `IR_COPY dst, P_slot`. The
- * slot becomes unreferenced and the backend drops it from the frame.
- *
- * A mutable PReg in `-O1` IR has the same data-flow semantics as a named
- * memory cell that does not escape (multiple defs, multiple uses, value at
- * a use comes from whichever def reaches it via CFG edges). No phis are
- * required because the IR model has no phis; PReg flow becomes hard-reg
- * flow after regalloc, and regalloc already handles it.
- *
- * Conditions for promotion (per slot):
- *
- * 1. Slot kind is FS_LOCAL (real locals, not spills, sret, alloca).
- * 2. Slot has no FSF_ADDR_TAKEN, FSF_VOLATILE flag (after
- * `opt_addr_xform_pregs` has cleared the conservative ADDR_TAKEN
- * flag for slots whose IR_ADDR_OF defs were all retired).
- * 3. Slot's declared type is scalar (int, float, bool, ptr, enum).
- * 4. Every appearance of `OPK_LOCAL(slot)` in any instruction operand is
- * either:
- * - `IR_LOAD.opnds[1]` with matching `access.type == slot.type`,
- * no observable mem flags, dst is OPK_REG;
- * - `IR_STORE.opnds[0]` with matching `access.type == slot.type`,
- * no observable mem flags, src is OPK_REG or OPK_IMM.
- * 5. Slot does not appear in any aux operand position (calls, asm, etc.)
- * or as an OPK_LOCAL anywhere else (e.g., a surviving IR_ADDR_OF).
- *
- * Param-slot case: FS_PARAM slots are excluded. The backend prologue is
- * responsible for moving the ABI-incoming hard reg into the slot, and that
- * move is not visible in the IR (there is no `IR_STORE OPK_LOCAL(slot)` to
- * rewrite). At O1 the wrapper already places scalar params in REG storage
- * when the frontend does not force a memory home, so the param's value
- * arrives in a PReg without needing this pass. If a future scheme records
- * the entry-move as a synthetic IR_STORE OPK_LOCAL(slot), this pass would
- * promote it the same way it promotes any other store-to-slot. */
-
-static int promote_local_type_is_scalar(Func* f, CfreeCgTypeId ty) {
- if (!ty) return 0;
- CfreeCgTypeKind kind = cfree_cg_type_kind((CfreeCompiler*)f->c, ty);
- switch (kind) {
- case CFREE_CG_TYPE_BOOL:
- case CFREE_CG_TYPE_INT:
- case CFREE_CG_TYPE_FLOAT:
- case CFREE_CG_TYPE_PTR:
- case CFREE_CG_TYPE_ENUM:
- return 1;
- default:
- return 0;
- }
-}
-
-static int promote_op_uses_slot(const Operand* op, FrameSlot slot) {
- return op && op->kind == OPK_LOCAL && op->v.frame_slot == slot;
-}
-
-static int promote_abivalue_uses_slot(const CGABIValue* v, FrameSlot slot) {
- if (!v) return 0;
- if (promote_op_uses_slot(&v->storage, slot)) return 1;
- for (u32 i = 0; i < v->nparts; ++i)
- if (promote_op_uses_slot((const Operand*)&v->parts[i].op, slot)) return 1;
- return 0;
-}
-
-static int promote_aux_uses_slot(const Inst* in, FrameSlot slot) {
- switch ((IROp)in->op) {
- case IR_CALL: {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) return 0;
- if (aux->use_plan_replay) {
- if (promote_op_uses_slot(&aux->plan.callee, slot)) return 1;
- for (u32 i = 0; i < aux->plan.nargs; ++i)
- if (promote_op_uses_slot(&aux->plan.args[i].src, slot)) return 1;
- for (u32 i = 0; i < aux->plan.nrets; ++i)
- if (promote_op_uses_slot(&aux->plan.rets[i].dst, slot)) return 1;
- } else {
- if (promote_op_uses_slot(&aux->desc.callee, slot)) return 1;
- for (u32 i = 0; i < aux->desc.nargs; ++i)
- if (promote_abivalue_uses_slot((const CGABIValue*)&aux->desc.args[i],
- slot))
- return 1;
- if (promote_abivalue_uses_slot(&aux->desc.ret, slot)) return 1;
- }
- return 0;
- }
- case IR_RET: {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- if (!aux || !aux->present) return 0;
- return promote_abivalue_uses_slot(&aux->val, slot);
- }
- case IR_SCOPE_BEGIN: {
- IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
- if (!aux) return 0;
- return promote_op_uses_slot(&aux->desc.cond, slot);
- }
- case IR_ASM_BLOCK: {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) return 0;
- for (u32 i = 0; i < aux->nin; ++i)
- if (promote_op_uses_slot(&aux->in_ops[i], slot)) return 1;
- for (u32 i = 0; i < aux->nout; ++i)
- if (promote_op_uses_slot(&aux->out_ops[i], slot)) return 1;
- return 0;
- }
- case IR_INTRINSIC: {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- if (!aux) return 0;
- for (u32 i = 0; i < aux->narg; ++i)
- if (promote_op_uses_slot(&aux->args[i], slot)) return 1;
- for (u32 i = 0; i < aux->ndst; ++i)
- if (promote_op_uses_slot(&aux->dsts[i], slot)) return 1;
- return 0;
- }
- default:
- return 0;
- }
-}
-
-/* Per-inst check. Returns:
- * 1 = "instruction touches slot in a promotable position" (load/store base).
- * 0 = "instruction does not touch slot at all".
- * -1 = "instruction touches slot in a non-promotable way" (e.g., wrong
- * operand position, type mismatch, observable flags, aux use). */
-static int promote_inst_classify(const Inst* in, FrameSlot slot,
- CfreeCgTypeId slot_ty) {
- int touched = 0;
- /* IR_LOAD: opnds[0]=dst REG, opnds[1]=addr (allowed: OPK_LOCAL slot). */
- if ((IROp)in->op == IR_LOAD) {
- if (in->nopnds >= 2 && promote_op_uses_slot(&in->opnds[1], slot)) {
- if (opt_mem_observable(&in->extra.mem)) return -1;
- if (in->opnds[0].kind != OPK_REG) return -1;
- CfreeCgTypeId at = in->extra.mem.type;
- if (at && at != slot_ty) return -1;
- touched = 1;
- }
- /* opnds[0] is the dst REG — never OPK_LOCAL by construction. */
- if (in->nopnds >= 1 && promote_op_uses_slot(&in->opnds[0], slot)) return -1;
- } else if ((IROp)in->op == IR_STORE) {
- if (in->nopnds >= 1 && promote_op_uses_slot(&in->opnds[0], slot)) {
- if (opt_mem_observable(&in->extra.mem)) return -1;
- if (in->nopnds < 2) return -1;
- Operand* src = &in->opnds[1];
- if (src->kind != OPK_REG && src->kind != OPK_IMM) return -1;
- CfreeCgTypeId at = in->extra.mem.type;
- if (at && at != slot_ty) return -1;
- touched = 1;
- }
- /* opnds[1] is the src value — should never be OPK_LOCAL for a scalar. */
- if (in->nopnds >= 2 && promote_op_uses_slot(&in->opnds[1], slot)) return -1;
- } else {
- /* Any other instruction with an OPK_LOCAL(slot) operand blocks promotion.
- */
- for (u32 o = 0; o < in->nopnds; ++o)
- if (promote_op_uses_slot(&in->opnds[o], slot)) return -1;
- }
- if (promote_aux_uses_slot(in, slot)) return -1;
- return touched;
-}
-
-/* Rewrite an `IR_STORE OPK_LOCAL(slot), src` into a PReg def. If src is
- * OPK_IMM, emit IR_LOAD_IMM into preg; otherwise emit IR_COPY. */
-static void promote_rewrite_store(Func* f, Inst* in, PReg preg,
- CfreeCgTypeId ty, u8 cls) {
- Operand src = in->opnds[1];
- Operand* opnds = arena_array(f->arena, Operand, 2);
- memset(&opnds[0], 0, sizeof opnds[0]);
- opnds[0].kind = OPK_REG;
- opnds[0].type = ty;
- opnds[0].cls = cls;
- opnds[0].v.reg = (Reg)preg;
- in->type = ty;
- in->def = (Val)preg;
- if (src.kind == OPK_IMM) {
- in->op = IR_LOAD_IMM;
- in->nopnds = 1;
- in->opnds = opnds;
- in->extra.imm = src.v.imm;
- } else {
- opnds[1] = src;
- opnds[1].type = ty;
- opnds[1].cls = cls;
- in->op = IR_COPY;
- in->nopnds = 2;
- in->opnds = opnds;
- memset(&in->extra, 0, sizeof in->extra);
- }
-}
-
-/* Rewrite an `IR_LOAD dst, OPK_LOCAL(slot)` into `IR_COPY dst, preg`. */
-static void promote_rewrite_load(Func* f, Inst* in, PReg preg, CfreeCgTypeId ty,
- u8 cls) {
- Operand dst = in->opnds[0];
- Operand* opnds = arena_array(f->arena, Operand, 2);
- opnds[0] = dst;
- opnds[0].type = ty;
- opnds[0].cls = cls;
- memset(&opnds[1], 0, sizeof opnds[1]);
- opnds[1].kind = OPK_REG;
- opnds[1].type = ty;
- opnds[1].cls = cls;
- opnds[1].v.reg = (Reg)preg;
- in->op = IR_COPY;
- in->type = ty;
- in->nopnds = 2;
- in->opnds = opnds;
- memset(&in->extra, 0, sizeof in->extra);
-}
-
-void opt_promote_scalar_locals(Func* f) {
- if (!f || f->opt_reg_ssa || f->opt_rewritten) return;
- if (!f->nframe_slots) return;
- int changed = 0;
- for (u32 sidx = 0; sidx < f->nframe_slots; ++sidx) {
- IRFrameSlot* slot = &f->frame_slots[sidx];
- FrameSlot id = slot->id;
- /* FS_PARAM slots are owned by the backend prologue (which copies the
- * ABI-incoming hard reg into the slot before any user IR runs); there
- * is no IR-level store to rewrite. At O1, the wrapper already places
- * scalar params in REG storage when the frontend does not force a
- * memory home, so the FS_PARAM promotion path is normally a no-op.
- * Only promote FS_LOCAL slots. */
- if (slot->kind != FS_LOCAL) continue;
- if (slot->flags & (FSF_ADDR_TAKEN | FSF_VOLATILE)) continue;
- if (!promote_local_type_is_scalar(f, slot->type)) continue;
- int touched_count = 0;
- int rejected = 0;
- for (u32 b = 0; b < f->nblocks && !rejected; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- int r = promote_inst_classify(in, id, slot->type);
- if (r < 0) {
- rejected = 1;
- break;
- }
- touched_count += r;
- }
- }
- if (rejected || !touched_count) continue;
- u8 cls = (cfree_cg_type_kind((CfreeCompiler*)f->c, slot->type) ==
- CFREE_CG_TYPE_FLOAT)
- ? RC_FP
- : RC_INT;
- PReg preg = ir_alloc_preg(f, slot->type, cls);
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op == IR_LOAD && in->nopnds >= 2 &&
- promote_op_uses_slot(&in->opnds[1], id)) {
- promote_rewrite_load(f, in, preg, slot->type, cls);
- } else if ((IROp)in->op == IR_STORE && in->nopnds >= 2 &&
- promote_op_uses_slot(&in->opnds[0], id)) {
- promote_rewrite_store(f, in, preg, slot->type, cls);
- }
- }
- }
- /* The frame slot is now unreferenced. Leave the slot table entry in
- * place (compaction would require remapping every other slot id);
- * the backend's frame layout pass simply omits unreferenced slots. */
- changed = 1;
- }
- if (changed)
- opt_analysis_invalidate(
- f, OPT_ANALYSIS_DEF_USE | OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
-}
-
-/* CSE-style hoist of `IR_ADDR_OF(OPK_GLOBAL{sym, addend})` defs that appear
- * more than once in the same function. The address is a link-time constant
- * (TLS and IFUNC live on separate IROps), so all occurrences compute the
- * same value; consolidating to a single entry-block def shrinks each loop
- * body by the per-iter `adrp`/`add` pair the backend would otherwise re-emit.
- *
- * Implementation:
- * - Walk all insts, group ADDR_OF defs by (sym, addend).
- * - For each key with >= 2 defs: allocate a fresh PReg, materialize one
- * IR_ADDR_OF in block 0 (after any IR_PARAM_DECL prologue), build a
- * preg-remap from each original def-PReg to the new PReg, and NOP each
- * original def.
- * - One IR walk applies the remap to every operand `v.reg` /
- * `v.ind.base`.
- *
- * Runs after opt_addr_xform_pregs so local addr-of has already been folded
- * out; the remaining IR_ADDR_OF defs are global. */
-
-typedef struct AddrCseEntry {
- ObjSymId sym;
- i64 addend;
- PReg canonical; /* freshly allocated PReg, def in block 0 */
- CfreeCgTypeId addr_type; /* operand[0].type from the first def */
- u8 cls; /* operand[0].cls from the first def */
- u32 count; /* number of original ADDR_OF defs seen */
-} AddrCseEntry;
-
-static u32 addr_cse_find_or_add(AddrCseEntry** entries, u32* n, u32* cap,
- Arena* arena, ObjSymId sym, i64 addend) {
- for (u32 i = 0; i < *n; ++i) {
- if ((*entries)[i].sym == sym && (*entries)[i].addend == addend) return i;
- }
- if (*n == *cap) {
- u32 ncap = *cap ? *cap * 2u : 16u;
- AddrCseEntry* nv = arena_array(arena, AddrCseEntry, ncap);
- if (*entries) memcpy(nv, *entries, sizeof(AddrCseEntry) * (*n));
- *entries = nv;
- *cap = ncap;
- }
- u32 idx = (*n)++;
- AddrCseEntry* e = &(*entries)[idx];
- memset(e, 0, sizeof *e);
- e->sym = sym;
- e->addend = addend;
- e->canonical = PREG_NONE;
- e->count = 0;
- return idx;
-}
-
-static void addr_cse_apply_to_operand(Operand* op, const PReg* remap) {
- /* remap is zero-initialized; 0 means "no remap" (preg 0 is reserved as
- * unused). PREG_NONE = 0xffffffff and would be a valid remap target but
- * we never produce that. */
- if (!op) return;
- if (op->kind == OPK_REG) {
- PReg p = (PReg)op->v.reg;
- if (p != PREG_NONE && p != 0 && remap[p] != 0) op->v.reg = remap[p];
- } else if (op->kind == OPK_INDIRECT) {
- PReg p = (PReg)op->v.ind.base;
- if (p != PREG_NONE && p != 0 && remap[p] != 0) op->v.ind.base = remap[p];
- if (op->v.ind.index != (Reg)REG_NONE) {
- PReg pi = (PReg)op->v.ind.index;
- if (pi != PREG_NONE && pi != 0 && remap[pi] != 0)
- op->v.ind.index = remap[pi];
- }
- }
-}
-
-static void addr_cse_apply_to_inst(Inst* in, const PReg* remap) {
- for (u32 o = 0; o < in->nopnds; ++o)
- addr_cse_apply_to_operand(&in->opnds[o], remap);
- /* IR_CALL aux carries operands too; rewrite both replay variants. */
- if ((IROp)in->op == IR_CALL) {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) return;
- if (aux->use_plan_replay) {
- addr_cse_apply_to_operand(&aux->plan.callee, remap);
- for (u32 i = 0; i < aux->plan.nargs; ++i)
- addr_cse_apply_to_operand(&aux->plan.args[i].src, remap);
- for (u32 i = 0; i < aux->plan.nrets; ++i)
- addr_cse_apply_to_operand(&aux->plan.rets[i].dst, remap);
- } else {
- addr_cse_apply_to_operand(&aux->desc.callee, remap);
- for (u32 i = 0; i < aux->desc.nargs; ++i) {
- CGABIValue* v = (CGABIValue*)&aux->desc.args[i];
- addr_cse_apply_to_operand(&v->storage, remap);
- for (u32 k = 0; k < v->nparts; ++k)
- addr_cse_apply_to_operand((Operand*)&v->parts[k].op, remap);
- }
- addr_cse_apply_to_operand(&aux->desc.ret.storage, remap);
- for (u32 k = 0; k < aux->desc.ret.nparts; ++k)
- addr_cse_apply_to_operand((Operand*)&aux->desc.ret.parts[k].op, remap);
- }
- } else if ((IROp)in->op == IR_RET) {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- if (aux && aux->present) {
- addr_cse_apply_to_operand(&aux->val.storage, remap);
- for (u32 k = 0; k < aux->val.nparts; ++k)
- addr_cse_apply_to_operand((Operand*)&aux->val.parts[k].op, remap);
- }
- } else if ((IROp)in->op == IR_ASM_BLOCK) {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) return;
- for (u32 i = 0; i < aux->nin; ++i)
- addr_cse_apply_to_operand(&aux->in_ops[i], remap);
- for (u32 i = 0; i < aux->nout; ++i)
- addr_cse_apply_to_operand(&aux->out_ops[i], remap);
- } else if ((IROp)in->op == IR_INTRINSIC) {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- if (!aux) return;
- for (u32 i = 0; i < aux->narg; ++i)
- addr_cse_apply_to_operand(&aux->args[i], remap);
- for (u32 i = 0; i < aux->ndst; ++i)
- addr_cse_apply_to_operand(&aux->dsts[i], remap);
- }
-}
-
-static Inst* block_insert_at(Func* f, Block* bl, u32 at, u32 k) {
- if (at > bl->ninsts) at = bl->ninsts;
- if (bl->ninsts + k > bl->cap) {
- u32 ncap = bl->cap ? bl->cap : 8u;
- while (ncap < bl->ninsts + k) ncap *= 2u;
- Inst* nb = arena_zarray(f->arena, Inst, ncap);
- if (bl->insts && at) memcpy(nb, bl->insts, sizeof(Inst) * at);
- if (bl->insts && bl->ninsts > at)
- memcpy(nb + at + k, bl->insts + at, sizeof(Inst) * (bl->ninsts - at));
- bl->insts = nb;
- bl->cap = ncap;
- } else {
- if (bl->ninsts > at)
- memmove(bl->insts + at + k, bl->insts + at,
- sizeof(Inst) * (bl->ninsts - at));
- }
- for (u32 i = 0; i < k; ++i) memset(&bl->insts[at + i], 0, sizeof(Inst));
- bl->ninsts += k;
- return &bl->insts[at];
-}
-
-void opt_addr_of_global_cse(Func* f) {
- if (!f || f->opt_reg_ssa || f->opt_rewritten) return;
- if (f->nblocks == 0) return;
-
- /* Pass 1: index ADDR_OF(global) defs by (sym, addend). */
- AddrCseEntry* entries = NULL;
- u32 n_entries = 0;
- u32 cap_entries = 0;
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op != IR_ADDR_OF) continue;
- if (in->nopnds < 2) continue;
- if (in->opnds[0].kind != OPK_REG) continue;
- if (in->opnds[1].kind != OPK_GLOBAL) continue;
- u32 idx = addr_cse_find_or_add(&entries, &n_entries, &cap_entries,
- f->arena, in->opnds[1].v.global.sym,
- in->opnds[1].v.global.addend);
- AddrCseEntry* e = &entries[idx];
- if (e->count == 0) {
- e->addr_type = in->opnds[0].type;
- e->cls = in->opnds[0].cls;
- }
- ++e->count;
- }
- }
- if (!n_entries) return;
-
- /* Pass 2: for each duplicate key, allocate a canonical PReg. */
- u32 dup_count = 0;
- for (u32 i = 0; i < n_entries; ++i) {
- if (entries[i].count >= 2) {
- entries[i].canonical =
- ir_alloc_preg(f, entries[i].addr_type, entries[i].cls);
- ++dup_count;
- }
- }
- if (!dup_count) return;
-
- /* Pass 3: walk again, build per-old-PReg remap and NOP duplicate defs. */
- PReg* remap = arena_zarray(f->arena, PReg, opt_reg_count(f));
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op != IR_ADDR_OF) continue;
- if (in->nopnds < 2) continue;
- if (in->opnds[0].kind != OPK_REG) continue;
- if (in->opnds[1].kind != OPK_GLOBAL) continue;
- u32 idx = addr_cse_find_or_add(&entries, &n_entries, &cap_entries,
- f->arena, in->opnds[1].v.global.sym,
- in->opnds[1].v.global.addend);
- if (entries[idx].canonical == PREG_NONE) continue; /* singleton */
- PReg old = (PReg)in->opnds[0].v.reg;
- if (opt_reg_valid(f, old)) remap[old] = entries[idx].canonical;
- /* NOP the original def. */
- in->op = IR_NOP;
- in->def = VAL_NONE;
- in->ndefs = 0;
- in->defs = NULL;
- in->nopnds = 0;
- in->opnds = NULL;
- }
- }
-
- /* Pass 4: hoist a single ADDR_OF for each duplicated key to the entry
- * block, inserted after any leading IR_PARAM_DECL instructions. */
- if (f->entry >= f->nblocks) return;
- Block* entry = &f->blocks[f->entry];
- u32 insert_at = 0;
- while (insert_at < entry->ninsts &&
- (IROp)entry->insts[insert_at].op == IR_PARAM_DECL)
- ++insert_at;
- Inst* slot = block_insert_at(f, entry, insert_at, dup_count);
- u32 w = 0;
- for (u32 i = 0; i < n_entries; ++i) {
- if (entries[i].canonical == PREG_NONE) continue;
- Inst* in = &slot[w++];
- in->op = (u16)IR_ADDR_OF;
- in->def = (Val)entries[i].canonical;
- in->type = entries[i].addr_type;
- in->nopnds = 2;
- in->opnds = arena_array(f->arena, Operand, 2);
- memset(&in->opnds[0], 0, sizeof(Operand));
- in->opnds[0].kind = OPK_REG;
- in->opnds[0].cls = entries[i].cls;
- in->opnds[0].type = entries[i].addr_type;
- in->opnds[0].v.reg = entries[i].canonical;
- memset(&in->opnds[1], 0, sizeof(Operand));
- in->opnds[1].kind = OPK_GLOBAL;
- in->opnds[1].cls = entries[i].cls;
- in->opnds[1].type = entries[i].addr_type;
- in->opnds[1].v.global.sym = entries[i].sym;
- in->opnds[1].v.global.addend = entries[i].addend;
- ir_assign_inst_id(f, in);
- }
-
- /* Pass 5: apply remap to all operand uses in the function. */
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- addr_cse_apply_to_inst(&bl->insts[i], remap);
- }
- }
-
- opt_analysis_invalidate(
- f, OPT_ANALYSIS_DEF_USE | OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
-}
-
static u64 gvn_width_mask(u32 width) {
if (width >= 64u) return ~0ull;
return (1ull << width) - 1ull;
diff --git a/test/opt/cg_ir_lower_test.c b/test/opt/cg_ir_lower_test.c
@@ -0,0 +1,199 @@
+#include <cfree/core.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cg/ir.h"
+#include "opt/opt.h"
+
+#undef Operand
+#undef CGFuncDesc
+#undef CGParamDesc
+#undef CGCallDesc
+#undef CGLocalStorage
+
+static void* h_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+
+static void* h_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+
+static void h_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+
+static CfreeHeap g_heap = {h_alloc, h_realloc, h_free, NULL};
+static int g_fails;
+static int g_checks;
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+#define EXPECT(cond, ...) \
+ do { \
+ ++g_checks; \
+ if (!(cond)) { \
+ ++g_fails; \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ } \
+ } while (0)
+
+typedef struct TestCtx {
+ CfreeContext ctx;
+ Compiler* c;
+ CfreeCgTypeId i32;
+} TestCtx;
+
+static void tc_init(TestCtx* tc) {
+ CfreeTarget target;
+ CfreeCgBuiltinTypes b;
+ memset(tc, 0, sizeof *tc);
+ tc->ctx.heap = &g_heap;
+ tc->ctx.diag = &g_diag;
+ tc->ctx.now = -1;
+ memset(&target, 0, sizeof target);
+ target.arch = CFREE_ARCH_ARM_64;
+ target.os = CFREE_OS_MACOS;
+ target.obj = CFREE_OBJ_MACHO;
+ target.ptr_size = 8;
+ target.ptr_align = 8;
+ if (cfree_compiler_new(target, &tc->ctx, (CfreeCompiler**)&tc->c) !=
+ CFREE_OK ||
+ !tc->c) {
+ fprintf(stderr, "fatal: compiler allocation failed\n");
+ abort();
+ }
+ b = cfree_cg_builtin_types(tc->c);
+ tc->i32 = b.id[CFREE_CG_BUILTIN_I32];
+}
+
+static void tc_fini(TestCtx* tc) {
+ cfree_compiler_free(tc->c);
+ tc->c = NULL;
+}
+
+static Operand local_op(CGLocal local, CfreeCgTypeId type) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_LOCAL;
+ o.type = type;
+ o.v.local = local;
+ return o;
+}
+
+static Operand imm_op(i64 value, CfreeCgTypeId type) {
+ Operand o;
+ memset(&o, 0, sizeof o);
+ o.kind = OPK_IMM;
+ o.type = type;
+ o.v.imm = value;
+ return o;
+}
+
+static CGLocal add_local(CgIrFunc* f, CfreeCgTypeId type, const char* name) {
+ CGLocalDesc d;
+ (void)name;
+ memset(&d, 0, sizeof d);
+ d.type = type;
+ d.size = 4;
+ d.align = 4;
+ return cg_ir_func_add_local(f, &d, 0, 0);
+}
+
+static CgIrInst* emit_ops(CgIrFunc* f, CgIrOp op, const Operand* ops, u32 n) {
+ CgIrInst* in = cg_ir_emit(f, op, (SrcLoc){0, 0, 0});
+ in->opnds = cg_ir_dup_operands(f->arena, ops, n);
+ in->nopnds = n;
+ return in;
+}
+
+static void converter_builds_cfg_and_pregs(void) {
+ TestCtx tc;
+ tc_init(&tc);
+
+ CGFuncDesc fd;
+ memset(&fd, 0, sizeof fd);
+ CfreeCgTypeId result_types[1];
+ result_types[0] = tc.i32;
+ fd.fn_type = tc.i32;
+ fd.result_types = result_types;
+ fd.nresults = 1;
+ CgIrFunc* cg = cg_ir_func_new(tc.c, &fd);
+ CGLocal a = add_local(cg, tc.i32, "a");
+ CGLocal b = add_local(cg, tc.i32, "b");
+ Label done = cg_ir_func_add_label(cg);
+
+ Operand one[] = {local_op(a, tc.i32)};
+ CgIrInst* li = emit_ops(cg, CG_IR_LOAD_IMM, one, 1);
+ li->extra.imm = 1;
+
+ Operand cmp[] = {local_op(a, tc.i32), imm_op(0, tc.i32)};
+ CgIrInst* br = emit_ops(cg, CG_IR_CMP_BRANCH, cmp, 2);
+ CgIrCmpBranchAux* br_aux = arena_znew(cg->arena, CgIrCmpBranchAux);
+ br_aux->op = CMP_NE;
+ br_aux->target = done;
+ br->extra.aux = br_aux;
+
+ Operand two[] = {local_op(b, tc.i32)};
+ CgIrInst* li2 = emit_ops(cg, CG_IR_LOAD_IMM, two, 1);
+ li2->extra.imm = 2;
+
+ CgIrInst* label = cg_ir_emit(cg, CG_IR_LABEL, (SrcLoc){0, 0, 0});
+ label->extra.imm = (i64)done;
+ cg_ir_func_note_label_place(cg, done, (SrcLoc){0, 0, 0});
+
+ CgIrInst* li3 = emit_ops(cg, CG_IR_LOAD_IMM, two, 1);
+ li3->extra.imm = 3;
+
+ CgIrRetAux* ret_aux = arena_znew(cg->arena, CgIrRetAux);
+ CGLocal retv = b;
+ ret_aux->values = cg_ir_dup_locals(cg->arena, &retv, 1);
+ ret_aux->nvalues = 1;
+ CgIrInst* ret = cg_ir_emit(cg, CG_IR_RET, (SrcLoc){0, 0, 0});
+ ret->extra.aux = ret_aux;
+
+ Func* f = opt_func_from_cg_ir(tc.c, cg);
+ EXPECT(f != NULL, "converter returned NULL");
+ EXPECT(f->nlocals == 2, "expected 2 locals, got %u", f->nlocals);
+ EXPECT(f->npregs == 3, "expected two PRegs plus sentinel, got %u", f->npregs);
+ EXPECT(f->nblocks >= 3, "expected at least 3 blocks, got %u", f->nblocks);
+ EXPECT(f->blocks[f->entry].nsucc == 2, "entry should branch two ways");
+ EXPECT(f->blocks[f->entry].ninsts == 2, "entry should contain load+branch");
+ EXPECT(f->blocks[f->entry].insts[0].op == IR_LOAD_IMM,
+ "first inst should be IR_LOAD_IMM");
+ EXPECT(f->blocks[f->entry].insts[0].opnds[0].kind == OPK_REG,
+ "local value should lower to PReg operand");
+
+ tc_fini(&tc);
+}
+
+int main(void) {
+ converter_builds_cfg_and_pregs();
+ if (g_fails) {
+ fprintf(stderr, "cg-ir-lower: %d/%d failed\n", g_fails, g_checks);
+ return 1;
+ }
+ printf("cg-ir-lower: %d checks, 0 failures\n", g_checks);
+ return 0;
+}