commit cc3abed6052eb7fdcc58fc6ccba94bbd8fc8a5ff
parent 9747c24c5a4aa056f0f2c3d89498be8edb971c47
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 05:23:34 -0700
opt: record-replay wrapper + tape printer + constfold (phases 0-2)
Implements doc/OPT.md phases 0-2. opt_cgtarget_new now returns a
CGTarget that records each function as a tape of CGTarget calls,
hands out wrapper-local virtual ids for alloc_reg/frame_slot/
label_new/scope_begin, and replays the tape onto the wrapped target
at func_end (with vid -> target-id translation). A simple integer
constfold peephole rewrites LOAD_IMM+LOAD_IMM+BINOP{IADD,ISUB,IMUL}
to a single LOAD_IMM, chaining transitively. opt_set_dump_writer
exposes a textual tape dump for debugging, wired through cg-runner
--opt-level N --dump-tape NAME. The cg corpus now runs at every
level in CFREE_OPT_LEVELS (default "0 1") so D/R/E/J equivalence is
checked end-to-end; W stays at level 0.
Diffstat:
5 files changed, 2164 insertions(+), 158 deletions(-)
diff --git a/src/api/stubs.c b/src/api/stubs.c
@@ -54,15 +54,7 @@ void parse_asm(Compiler* c, Lexer* l, MCEmitter* m) {
* cgtarget_new / cgtarget_finalize / cgtarget_free live in src/arch/<target>.c
* (dispatched through src/arch/arch.c). */
-/* ============================================================
- * Optimizer
- * ============================================================ */
-
-CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* t, int level) {
- (void)t;
- (void)level;
- unimplemented(c, "opt");
-}
+/* Optimizer (opt_cgtarget_new) lives in src/opt/opt.c. */
/* Debug info producer lives in src/debug/. */
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -0,0 +1,1875 @@
+/* opt — CGTarget wrapper that records each function as a tape of
+ * CGTarget calls, then replays them onto the wrapped target on
+ * func_end. See doc/OPT.md for the phased plan.
+ *
+ * Phase 1 (current): record every emit-side call into a per-function
+ * tape; alloc_reg / frame_slot / label_new / scope_begin hand out
+ * wrapper-local virtual ids. On func_end the tape is replayed
+ * linearly: each entry produces exactly one wrapped target call,
+ * with virtual ids translated to target-side ids on the fly. This
+ * preserves doc/DESIGN.md §8's "function-at-a-time" streaming
+ * guarantee at -O1.
+ *
+ * Phase 2 (current): a small, safe peephole pass runs over the tape
+ * between recording and replay. See try_peephole_constfold.
+ *
+ * Phase 3+ (deferred): build CFG and SSA from the tape, run
+ * intra-procedural passes, lower through machinize → regalloc →
+ * emit. Until that lands, level 2 is functionally identical to
+ * level 1 (per-function record + replay).
+ *
+ * Methods the wrapper rejects under unbounded virtuals:
+ * - clobbers / spill_reg / reload_reg are CG -O0 register-pressure
+ * mechanics. CG never invokes them on real backends in v1, and
+ * they're meaningless for opt's vreg space — calling them is a
+ * wiring bug, so we panic loudly.
+ * - free_reg is documented as a hint and is silently ignored. */
+
+#include "opt/opt.h"
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/core.h"
+
+/* ---- tape op tags ---- */
+
+typedef enum {
+ TOP_FUNC_BEGIN,
+ TOP_FUNC_END,
+ TOP_ALLOC_REG,
+ TOP_FRAME_SLOT,
+ TOP_PARAM,
+ TOP_LABEL_NEW,
+ TOP_LABEL_PLACE,
+ TOP_JUMP,
+ TOP_CMP_BRANCH,
+ TOP_SCOPE_BEGIN,
+ TOP_SCOPE_ELSE,
+ TOP_SCOPE_END,
+ TOP_BREAK_TO,
+ TOP_CONTINUE_TO,
+ TOP_LOAD_IMM,
+ TOP_LOAD_CONST,
+ TOP_COPY,
+ TOP_LOAD,
+ TOP_STORE,
+ TOP_ADDR_OF,
+ TOP_TLS_ADDR_OF,
+ TOP_COPY_BYTES,
+ TOP_SET_BYTES,
+ TOP_BITFIELD_LOAD,
+ TOP_BITFIELD_STORE,
+ TOP_BINOP,
+ TOP_UNOP,
+ TOP_CMP,
+ TOP_CONVERT,
+ TOP_CALL,
+ TOP_RET,
+ TOP_ALLOCA,
+ TOP_VA_START,
+ TOP_VA_ARG,
+ TOP_VA_END,
+ TOP_VA_COPY,
+ TOP_SETJMP,
+ TOP_LONGJMP,
+ TOP_ATOMIC_LOAD,
+ TOP_ATOMIC_STORE,
+ TOP_ATOMIC_RMW,
+ TOP_ATOMIC_CAS,
+ TOP_FENCE,
+ TOP_INTRINSIC,
+ TOP_SET_LOC,
+} TapeOpKind;
+
+/* TapeEntry: one recorded CGTarget call. The tagged union is wide; we
+ * pay arena bytes for clarity. */
+typedef struct TapeEntry {
+ u8 op; /* TapeOpKind */
+ u8 dead; /* set by peepholes; replay skips dead entries */
+ u16 padding;
+ SrcLoc loc;
+ union {
+ /* WOP_FUNC_BEGIN: deep-copied descriptor. The caller's CGFuncDesc
+ * may be stack-allocated, so we copy by value into our arena.
+ * params[] is also copied; field shapes inside (Type*, ABIArgInfo*,
+ * incoming pointer) are TU-lifetime and shared. */
+ struct {
+ CGFuncDesc desc;
+ CGParamDesc* params; /* arena copy of fd.params */
+ } func_begin;
+
+ /* WOP_ALLOC_REG: returns a vreg, indexed into reg_map at replay. */
+ struct {
+ RegClass cls;
+ const Type* ty;
+ Reg vreg;
+ } alloc_reg;
+
+ /* WOP_FRAME_SLOT */
+ struct {
+ FrameSlotDesc desc;
+ FrameSlot vslot;
+ } frame_slot;
+
+ /* WOP_PARAM */
+ struct {
+ CGParamDesc desc;
+ } param;
+
+ /* WOP_LABEL_NEW */
+ struct {
+ Label vlabel;
+ } label_new;
+
+ /* WOP_LABEL_PLACE / WOP_JUMP */
+ struct {
+ Label vlabel;
+ } label_op;
+
+ /* WOP_CMP_BRANCH */
+ struct {
+ CmpOp op;
+ Operand a, b;
+ Label vlabel;
+ } cmp_branch;
+
+ /* WOP_SCOPE_BEGIN */
+ struct {
+ CGScopeDesc desc;
+ CGScope vscope;
+ } scope_begin;
+
+ /* WOP_SCOPE_ELSE / WOP_SCOPE_END / WOP_BREAK_TO / WOP_CONTINUE_TO */
+ struct {
+ CGScope vscope;
+ } scope_op;
+
+ /* WOP_LOAD_IMM */
+ struct {
+ Operand dst;
+ i64 imm;
+ } load_imm;
+
+ /* WOP_LOAD_CONST */
+ struct {
+ Operand dst;
+ ConstBytes cb;
+ } load_const;
+
+ /* WOP_COPY / WOP_ADDR_OF / WOP_VA_COPY */
+ struct {
+ Operand dst;
+ Operand src;
+ } copy;
+
+ /* WOP_LOAD */
+ struct {
+ Operand dst;
+ Operand addr;
+ MemAccess mem;
+ } load;
+
+ /* WOP_STORE */
+ struct {
+ Operand addr;
+ Operand src;
+ MemAccess mem;
+ } store;
+
+ /* WOP_TLS_ADDR_OF */
+ struct {
+ Operand dst;
+ ObjSymId sym;
+ i64 addend;
+ } tls_addr_of;
+
+ /* WOP_COPY_BYTES / WOP_SET_BYTES */
+ struct {
+ Operand a;
+ Operand b;
+ AggregateAccess agg;
+ } agg;
+
+ /* WOP_BITFIELD_LOAD */
+ struct {
+ Operand dst;
+ Operand record;
+ BitFieldAccess bf;
+ } bitfield_load;
+
+ /* WOP_BITFIELD_STORE */
+ struct {
+ Operand record;
+ Operand src;
+ BitFieldAccess bf;
+ } bitfield_store;
+
+ /* WOP_BINOP */
+ struct {
+ BinOp op;
+ Operand dst, a, b;
+ } binop;
+
+ /* WOP_UNOP */
+ struct {
+ UnOp op;
+ Operand dst, a;
+ } unop;
+
+ /* WOP_CMP */
+ struct {
+ CmpOp op;
+ Operand dst, a, b;
+ } cmp;
+
+ /* WOP_CONVERT */
+ struct {
+ ConvKind kind;
+ Operand dst, src;
+ } convert;
+
+ /* WOP_CALL: deep-copied descriptor and inner arrays. */
+ struct {
+ CGCallDesc desc;
+ CGABIValue* args; /* len = desc.nargs */
+ CGABIPart* ret_parts; /* len = desc.ret.nparts; NULL if 0 */
+ CGABIPart** arg_parts; /* per-arg parts arrays; entry is NULL if 0 */
+ } call;
+
+ /* WOP_RET: present == 1 means there is a CGABIValue; otherwise a
+ * void return. parts is deep-copied. */
+ struct {
+ u8 present;
+ CGABIValue val;
+ CGABIPart* parts; /* len = val.nparts */
+ } ret;
+
+ /* WOP_ALLOCA */
+ struct {
+ Operand dst;
+ Operand size;
+ u32 align;
+ } alloca_;
+
+ /* WOP_VA_START / WOP_VA_END */
+ struct {
+ Operand ap;
+ } va_se;
+
+ /* WOP_VA_ARG */
+ struct {
+ Operand dst;
+ Operand ap;
+ const Type* ty;
+ } va_arg_;
+
+ /* WOP_SETJMP */
+ struct {
+ Operand dst;
+ Operand buf;
+ } setjmp_;
+
+ /* WOP_LONGJMP */
+ struct {
+ Operand buf;
+ Operand val;
+ } longjmp_;
+
+ /* WOP_ATOMIC_LOAD */
+ struct {
+ Operand dst;
+ Operand addr;
+ MemAccess mem;
+ MemOrder mo;
+ } atomic_load;
+
+ /* WOP_ATOMIC_STORE */
+ struct {
+ Operand addr;
+ Operand src;
+ MemAccess mem;
+ MemOrder mo;
+ } atomic_store;
+
+ /* WOP_ATOMIC_RMW */
+ struct {
+ AtomicOp op;
+ Operand dst;
+ Operand addr;
+ Operand val;
+ MemAccess mem;
+ MemOrder mo;
+ } atomic_rmw;
+
+ /* WOP_ATOMIC_CAS */
+ struct {
+ Operand prior;
+ Operand ok;
+ Operand addr;
+ Operand expected;
+ Operand desired;
+ MemAccess mem;
+ MemOrder success;
+ MemOrder failure;
+ } atomic_cas;
+
+ /* WOP_FENCE */
+ struct {
+ MemOrder mo;
+ } fence;
+
+ /* WOP_INTRINSIC */
+ struct {
+ IntrinKind kind;
+ Operand* dsts; /* deep-copied */
+ u32 ndst;
+ Operand* args; /* deep-copied */
+ u32 narg;
+ } intrinsic;
+
+ /* WOP_SET_LOC */
+ struct {
+ SrcLoc loc;
+ } set_loc;
+ } u;
+} TapeEntry;
+
+/* ---- wrapper state ---- */
+
+typedef struct OptImpl {
+ CGTarget base;
+ CGTarget* target; /* wrapped */
+ int level;
+ Compiler* c;
+
+ /* Tape: per-function, reset on func_begin. Allocated from c->tu so
+ * the buffer survives panic via compiler_defer cleanups. */
+ TapeEntry* tape;
+ u32 ntape, tape_cap;
+
+ /* Wrapper-local virtual id counters. 1-based; 0 reserved as NONE.
+ * Reset on each func_begin. */
+ Reg next_vreg;
+ FrameSlot next_vslot;
+ Label next_vlabel;
+ CGScope next_vscope;
+
+ /* Replay-time translation tables. Index by virtual id; entry 0 is
+ * the NONE sentinel and never referenced. Allocated lazily on first
+ * replay so peak size matches the largest function. */
+ Reg* reg_map;
+ u32 reg_map_cap;
+ FrameSlot* slot_map;
+ u32 slot_map_cap;
+ Label* label_map;
+ u32 label_map_cap;
+ CGScope* scope_map;
+ u32 scope_map_cap;
+
+ SrcLoc pending_loc; /* most recent set_loc; stamped onto each entry */
+
+ /* If non-NULL, dump the tape to this writer on each func_end (before
+ * replay). Used by cg-runner --dump-tape and ad-hoc debugging. */
+ Writer* dump_writer;
+} OptImpl;
+
+static OptImpl* impl_of(CGTarget* t) { return (OptImpl*)t; }
+
+static _Noreturn void panic_unsupported(OptImpl* o, const char* what) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(o->c, loc, "opt_cgtarget: %s called under unbounded virtuals",
+ what);
+}
+
+/* ---- tape append ---- */
+
+static TapeEntry* tape_append(OptImpl* o, TapeOpKind op) {
+ TapeEntry* e;
+ if (o->ntape == o->tape_cap) {
+ u32 ncap = o->tape_cap ? o->tape_cap * 2u : 64u;
+ TapeEntry* nb = arena_array(o->c->tu, TapeEntry, ncap);
+ if (o->tape) memcpy(nb, o->tape, sizeof(TapeEntry) * o->ntape);
+ o->tape = nb;
+ o->tape_cap = ncap;
+ }
+ e = &o->tape[o->ntape++];
+ memset(e, 0, sizeof *e);
+ e->op = (u8)op;
+ e->loc = o->pending_loc;
+ return e;
+}
+
+/* ---- deep-copy helpers ---- */
+
+static CGParamDesc* copy_params(Compiler* c, const CGParamDesc* src, u32 n) {
+ CGParamDesc* dst;
+ if (!n) return NULL;
+ dst = arena_array(c->tu, CGParamDesc, n);
+ memcpy(dst, src, sizeof(CGParamDesc) * n);
+ return dst;
+}
+
+static CGABIPart* copy_parts(Compiler* c, const CGABIPart* src, u32 n) {
+ CGABIPart* dst;
+ if (!n) return NULL;
+ dst = arena_array(c->tu, CGABIPart, n);
+ memcpy(dst, src, sizeof(CGABIPart) * n);
+ return dst;
+}
+
+static Operand* copy_operands(Compiler* c, const Operand* src, u32 n) {
+ Operand* dst;
+ if (!n) return NULL;
+ dst = arena_array(c->tu, Operand, n);
+ memcpy(dst, src, sizeof(Operand) * n);
+ return dst;
+}
+
+/* ---- map helpers (replay-time) ----
+ * The maps are direct-indexed by the 1-based virtual id; entry 0 is
+ * the NONE sentinel. */
+
+static void map_reg_grow(OptImpl* o, u32 needed) {
+ u32 ncap;
+ Reg* nb;
+ if (needed <= o->reg_map_cap) return;
+ ncap = o->reg_map_cap ? o->reg_map_cap : 16u;
+ while (ncap < needed) ncap *= 2u;
+ nb = arena_array(o->c->tu, Reg, ncap);
+ if (o->reg_map) memcpy(nb, o->reg_map, sizeof(Reg) * o->reg_map_cap);
+ /* New slots default to REG_NONE (0xffffffff). */
+ for (u32 i = o->reg_map_cap; i < ncap; ++i) nb[i] = REG_NONE;
+ o->reg_map = nb;
+ o->reg_map_cap = ncap;
+}
+
+static void map_slot_grow(OptImpl* o, u32 needed) {
+ u32 ncap;
+ FrameSlot* nb;
+ if (needed <= o->slot_map_cap) return;
+ ncap = o->slot_map_cap ? o->slot_map_cap : 16u;
+ while (ncap < needed) ncap *= 2u;
+ nb = arena_array(o->c->tu, FrameSlot, ncap);
+ if (o->slot_map) memcpy(nb, o->slot_map, sizeof(FrameSlot) * o->slot_map_cap);
+ for (u32 i = o->slot_map_cap; i < ncap; ++i) nb[i] = FRAME_SLOT_NONE;
+ o->slot_map = nb;
+ o->slot_map_cap = ncap;
+}
+
+static void map_label_grow(OptImpl* o, u32 needed) {
+ u32 ncap;
+ Label* nb;
+ if (needed <= o->label_map_cap) return;
+ ncap = o->label_map_cap ? o->label_map_cap : 16u;
+ while (ncap < needed) ncap *= 2u;
+ nb = arena_array(o->c->tu, Label, ncap);
+ if (o->label_map) memcpy(nb, o->label_map, sizeof(Label) * o->label_map_cap);
+ for (u32 i = o->label_map_cap; i < ncap; ++i) nb[i] = LABEL_NONE;
+ o->label_map = nb;
+ o->label_map_cap = ncap;
+}
+
+static void map_scope_grow(OptImpl* o, u32 needed) {
+ u32 ncap;
+ CGScope* nb;
+ if (needed <= o->scope_map_cap) return;
+ ncap = o->scope_map_cap ? o->scope_map_cap : 8u;
+ while (ncap < needed) ncap *= 2u;
+ nb = arena_array(o->c->tu, CGScope, ncap);
+ if (o->scope_map) memcpy(nb, o->scope_map, sizeof(CGScope) * o->scope_map_cap);
+ for (u32 i = o->scope_map_cap; i < ncap; ++i) nb[i] = CG_SCOPE_NONE;
+ o->scope_map = nb;
+ o->scope_map_cap = ncap;
+}
+
+/* ---- recording: every emit-side method records a tape entry.
+ *
+ * Allocator methods (alloc_reg, frame_slot, label_new, scope_begin)
+ * additionally hand back a wrapper-local virtual id; the underlying
+ * target is not consulted until replay. */
+
+static void w_func_begin(CGTarget* t, const CGFuncDesc* fd) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e;
+
+ /* Reset per-function state. */
+ o->tape = NULL;
+ o->ntape = 0;
+ o->tape_cap = 0;
+ o->next_vreg = 1;
+ o->next_vslot = 1;
+ o->next_vlabel = 1;
+ o->next_vscope = 1;
+ o->pending_loc = (SrcLoc){0, 0, 0};
+ /* Reset translation maps; capacities are kept for amortization. */
+ for (u32 i = 0; i < o->reg_map_cap; ++i) o->reg_map[i] = REG_NONE;
+ for (u32 i = 0; i < o->slot_map_cap; ++i) o->slot_map[i] = FRAME_SLOT_NONE;
+ for (u32 i = 0; i < o->label_map_cap; ++i) o->label_map[i] = LABEL_NONE;
+ for (u32 i = 0; i < o->scope_map_cap; ++i) o->scope_map[i] = CG_SCOPE_NONE;
+
+ e = tape_append(o, TOP_FUNC_BEGIN);
+ /* Shallow-copy the descriptor by value, then deep-copy the params
+ * array — the harness mutates pds[i].slot AFTER func_begin returns,
+ * so we can't rely on pointer-shallow-copy for that field. The slots
+ * we record here are wrapper vslots (allocated by w_frame_slot in the
+ * subsequent param-setup loop); replay translates them. */
+ e->u.func_begin.desc = *fd;
+ e->u.func_begin.params = copy_params(o->c, fd->params, fd->nparams);
+ e->u.func_begin.desc.params = e->u.func_begin.params;
+}
+
+static void w_func_end(CGTarget* t);
+
+static Reg w_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_ALLOC_REG);
+ Reg vreg = o->next_vreg++;
+ e->u.alloc_reg.cls = cls;
+ e->u.alloc_reg.ty = ty;
+ e->u.alloc_reg.vreg = vreg;
+ return vreg;
+}
+
+static void w_free_reg(CGTarget* t, Reg r) {
+ /* Hint; opt_cgtarget ignores. The wrapper's vregs are unbounded —
+ * there is no pool to return to. */
+ (void)t;
+ (void)r;
+}
+
+static FrameSlot w_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_FRAME_SLOT);
+ FrameSlot vslot = o->next_vslot++;
+ e->u.frame_slot.desc = *d;
+ e->u.frame_slot.vslot = vslot;
+ return vslot;
+}
+
+static void w_param(CGTarget* t, const CGParamDesc* d) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_PARAM);
+ e->u.param.desc = *d;
+}
+
+static const Reg* w_clobbers(CGTarget* t, RegClass cls, u32* nregs) {
+ (void)cls;
+ (void)nregs;
+ panic_unsupported(impl_of(t), "clobbers");
+}
+static void w_spill_reg(CGTarget* t, Operand src, FrameSlot s, MemAccess m) {
+ (void)src;
+ (void)s;
+ (void)m;
+ panic_unsupported(impl_of(t), "spill_reg");
+}
+static void w_reload_reg(CGTarget* t, Operand dst, FrameSlot s, MemAccess m) {
+ (void)dst;
+ (void)s;
+ (void)m;
+ panic_unsupported(impl_of(t), "reload_reg");
+}
+
+static Label w_label_new(CGTarget* t) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_LABEL_NEW);
+ Label v = o->next_vlabel++;
+ e->u.label_new.vlabel = v;
+ return v;
+}
+
+static void w_label_place(CGTarget* t, Label l) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_LABEL_PLACE);
+ e->u.label_op.vlabel = l;
+}
+static void w_jump(CGTarget* t, Label l) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_JUMP);
+ e->u.label_op.vlabel = l;
+}
+static void w_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_CMP_BRANCH);
+ e->u.cmp_branch.op = op;
+ e->u.cmp_branch.a = a;
+ e->u.cmp_branch.b = b;
+ e->u.cmp_branch.vlabel = l;
+}
+
+static CGScope w_scope_begin(CGTarget* t, const CGScopeDesc* d) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_SCOPE_BEGIN);
+ CGScope v = o->next_vscope++;
+ e->u.scope_begin.desc = *d;
+ e->u.scope_begin.vscope = v;
+ return v;
+}
+static void w_scope_else(CGTarget* t, CGScope s) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_SCOPE_ELSE);
+ e->u.scope_op.vscope = s;
+}
+static void w_scope_end(CGTarget* t, CGScope s) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_SCOPE_END);
+ e->u.scope_op.vscope = s;
+}
+static void w_break_to(CGTarget* t, CGScope s) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_BREAK_TO);
+ e->u.scope_op.vscope = s;
+}
+static void w_continue_to(CGTarget* t, CGScope s) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_CONTINUE_TO);
+ e->u.scope_op.vscope = s;
+}
+
+static void w_load_imm(CGTarget* t, Operand dst, i64 imm) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_LOAD_IMM);
+ e->u.load_imm.dst = dst;
+ e->u.load_imm.imm = imm;
+}
+static void w_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_LOAD_CONST);
+ e->u.load_const.dst = dst;
+ e->u.load_const.cb = cb;
+}
+static void w_copy(CGTarget* t, Operand dst, Operand src) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_COPY);
+ e->u.copy.dst = dst;
+ e->u.copy.src = src;
+}
+static void w_load(CGTarget* t, Operand dst, Operand addr, MemAccess m) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_LOAD);
+ e->u.load.dst = dst;
+ e->u.load.addr = addr;
+ e->u.load.mem = m;
+}
+static void w_store(CGTarget* t, Operand addr, Operand src, MemAccess m) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_STORE);
+ e->u.store.addr = addr;
+ e->u.store.src = src;
+ e->u.store.mem = m;
+}
+static void w_addr_of(CGTarget* t, Operand dst, Operand lv) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_ADDR_OF);
+ e->u.copy.dst = dst;
+ e->u.copy.src = lv;
+}
+static void w_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_TLS_ADDR_OF);
+ e->u.tls_addr_of.dst = dst;
+ e->u.tls_addr_of.sym = sym;
+ e->u.tls_addr_of.addend = addend;
+}
+static void w_copy_bytes(CGTarget* t, Operand dst, Operand src,
+ AggregateAccess agg) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_COPY_BYTES);
+ e->u.agg.a = dst;
+ e->u.agg.b = src;
+ e->u.agg.agg = agg;
+}
+static void w_set_bytes(CGTarget* t, Operand dst, Operand byte,
+ AggregateAccess agg) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_SET_BYTES);
+ e->u.agg.a = dst;
+ e->u.agg.b = byte;
+ e->u.agg.agg = agg;
+}
+static void w_bitfield_load(CGTarget* t, Operand dst, Operand record,
+ BitFieldAccess bf) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_BITFIELD_LOAD);
+ e->u.bitfield_load.dst = dst;
+ e->u.bitfield_load.record = record;
+ e->u.bitfield_load.bf = bf;
+}
+static void w_bitfield_store(CGTarget* t, Operand record, Operand src,
+ BitFieldAccess bf) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_BITFIELD_STORE);
+ e->u.bitfield_store.record = record;
+ e->u.bitfield_store.src = src;
+ e->u.bitfield_store.bf = bf;
+}
+
+static void w_binop(CGTarget* t, BinOp op, Operand dst, Operand a, Operand b) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_BINOP);
+ e->u.binop.op = op;
+ e->u.binop.dst = dst;
+ e->u.binop.a = a;
+ e->u.binop.b = b;
+}
+static void w_unop(CGTarget* t, UnOp op, Operand dst, Operand a) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_UNOP);
+ e->u.unop.op = op;
+ e->u.unop.dst = dst;
+ e->u.unop.a = a;
+}
+static void w_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_CMP);
+ e->u.cmp.op = op;
+ e->u.cmp.dst = dst;
+ e->u.cmp.a = a;
+ e->u.cmp.b = b;
+}
+static void w_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_CONVERT);
+ e->u.convert.kind = k;
+ e->u.convert.dst = dst;
+ e->u.convert.src = src;
+}
+
+static void w_call(CGTarget* t, const CGCallDesc* d) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_CALL);
+ CGABIValue* args_copy = NULL;
+ CGABIPart** arg_parts_copy = NULL;
+ CGABIPart* ret_parts_copy = NULL;
+ u32 i;
+
+ /* Deep-copy the argv. Caller-owned d may be on the stack, and
+ * args[i].parts may be too. */
+ if (d->nargs) {
+ args_copy = arena_array(o->c->tu, CGABIValue, d->nargs);
+ arg_parts_copy = arena_array(o->c->tu, CGABIPart*, d->nargs);
+ for (i = 0; i < d->nargs; ++i) {
+ args_copy[i] = d->args[i];
+ arg_parts_copy[i] =
+ copy_parts(o->c, d->args[i].parts, d->args[i].nparts);
+ args_copy[i].parts = arg_parts_copy[i];
+ }
+ }
+ ret_parts_copy = copy_parts(o->c, d->ret.parts, d->ret.nparts);
+
+ e->u.call.desc = *d;
+ e->u.call.desc.args = args_copy;
+ e->u.call.desc.ret.parts = ret_parts_copy;
+ e->u.call.args = args_copy;
+ e->u.call.arg_parts = arg_parts_copy;
+ e->u.call.ret_parts = ret_parts_copy;
+}
+
+static void w_ret(CGTarget* t, const CGABIValue* v) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_RET);
+ if (!v) {
+ e->u.ret.present = 0;
+ return;
+ }
+ e->u.ret.present = 1;
+ e->u.ret.val = *v;
+ e->u.ret.parts = copy_parts(o->c, v->parts, v->nparts);
+ e->u.ret.val.parts = e->u.ret.parts;
+}
+
+static void w_alloca_(CGTarget* t, Operand dst, Operand size, u32 align) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_ALLOCA);
+ e->u.alloca_.dst = dst;
+ e->u.alloca_.size = size;
+ e->u.alloca_.align = align;
+}
+
+static void w_va_start_(CGTarget* t, Operand ap) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_VA_START);
+ e->u.va_se.ap = ap;
+}
+static void w_va_arg_(CGTarget* t, Operand dst, Operand ap, const Type* ty) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_VA_ARG);
+ e->u.va_arg_.dst = dst;
+ e->u.va_arg_.ap = ap;
+ e->u.va_arg_.ty = ty;
+}
+static void w_va_end_(CGTarget* t, Operand ap) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_VA_END);
+ e->u.va_se.ap = ap;
+}
+static void w_va_copy_(CGTarget* t, Operand dst, Operand src) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_VA_COPY);
+ e->u.copy.dst = dst;
+ e->u.copy.src = src;
+}
+
+static void w_setjmp_(CGTarget* t, Operand dst, Operand buf) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_SETJMP);
+ e->u.setjmp_.dst = dst;
+ e->u.setjmp_.buf = buf;
+}
+static void w_longjmp_(CGTarget* t, Operand buf, Operand val) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_LONGJMP);
+ e->u.longjmp_.buf = buf;
+ e->u.longjmp_.val = val;
+}
+
+static void w_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess m,
+ MemOrder mo) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_ATOMIC_LOAD);
+ e->u.atomic_load.dst = dst;
+ e->u.atomic_load.addr = addr;
+ e->u.atomic_load.mem = m;
+ e->u.atomic_load.mo = mo;
+}
+static void w_atomic_store(CGTarget* t, Operand addr, Operand src, MemAccess m,
+ MemOrder mo) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_ATOMIC_STORE);
+ e->u.atomic_store.addr = addr;
+ e->u.atomic_store.src = src;
+ e->u.atomic_store.mem = m;
+ e->u.atomic_store.mo = mo;
+}
+static void w_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
+ Operand val, MemAccess m, MemOrder mo) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_ATOMIC_RMW);
+ e->u.atomic_rmw.op = op;
+ e->u.atomic_rmw.dst = dst;
+ e->u.atomic_rmw.addr = addr;
+ e->u.atomic_rmw.val = val;
+ e->u.atomic_rmw.mem = m;
+ e->u.atomic_rmw.mo = mo;
+}
+static void w_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
+ Operand expected, Operand desired, MemAccess m,
+ MemOrder s, MemOrder f) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_ATOMIC_CAS);
+ e->u.atomic_cas.prior = prior;
+ e->u.atomic_cas.ok = ok;
+ e->u.atomic_cas.addr = addr;
+ e->u.atomic_cas.expected = expected;
+ e->u.atomic_cas.desired = desired;
+ e->u.atomic_cas.mem = m;
+ e->u.atomic_cas.success = s;
+ e->u.atomic_cas.failure = f;
+}
+static void w_fence(CGTarget* t, MemOrder mo) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_FENCE);
+ e->u.fence.mo = mo;
+}
+
+static void w_intrinsic(CGTarget* t, IntrinKind k, Operand* dsts, u32 nd,
+ const Operand* args, u32 na) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e = tape_append(o, TOP_INTRINSIC);
+ e->u.intrinsic.kind = k;
+ e->u.intrinsic.ndst = nd;
+ e->u.intrinsic.narg = na;
+ e->u.intrinsic.dsts = copy_operands(o->c, dsts, nd);
+ e->u.intrinsic.args = copy_operands(o->c, args, na);
+}
+
+static void w_asm_block(CGTarget* t, const char* tmpl,
+ const AsmConstraint* outs, u32 nout, Operand* out_ops,
+ const AsmConstraint* ins, u32 nin,
+ const Operand* in_ops, const Sym* clobbers, u32 nclob) {
+ (void)tmpl;
+ (void)outs;
+ (void)nout;
+ (void)out_ops;
+ (void)ins;
+ (void)nin;
+ (void)in_ops;
+ (void)clobbers;
+ (void)nclob;
+ /* Group M (inline asm) is deferred in the corpus; the wrapper does
+ * not yet support it. */
+ panic_unsupported(impl_of(t), "asm_block");
+}
+
+static void w_set_loc(CGTarget* t, SrcLoc loc) {
+ OptImpl* o = impl_of(t);
+ TapeEntry* e;
+ o->pending_loc = loc;
+ e = tape_append(o, TOP_SET_LOC);
+ e->u.set_loc.loc = loc;
+}
+
+/* ---- replay-time translation ---- */
+
+static Reg xlat_reg(OptImpl* o, Reg vreg) {
+ if (vreg == REG_NONE || vreg == 0u) return vreg;
+ if (vreg >= o->reg_map_cap || o->reg_map[vreg] == REG_NONE) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(o->c, loc, "opt replay: unmapped vreg %u", (unsigned)vreg);
+ }
+ return o->reg_map[vreg];
+}
+
+static FrameSlot xlat_slot(OptImpl* o, FrameSlot vs) {
+ if (vs == FRAME_SLOT_NONE) return FRAME_SLOT_NONE;
+ if (vs >= o->slot_map_cap || o->slot_map[vs] == FRAME_SLOT_NONE) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(o->c, loc, "opt replay: unmapped vslot %u", (unsigned)vs);
+ }
+ return o->slot_map[vs];
+}
+
+static Label xlat_label(OptImpl* o, Label vl) {
+ if (vl == LABEL_NONE) return LABEL_NONE;
+ if (vl >= o->label_map_cap || o->label_map[vl] == LABEL_NONE) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(o->c, loc, "opt replay: unmapped vlabel %u", (unsigned)vl);
+ }
+ return o->label_map[vl];
+}
+
+static CGScope xlat_scope(OptImpl* o, CGScope vs) {
+ if (vs == CG_SCOPE_NONE) return CG_SCOPE_NONE;
+ if (vs >= o->scope_map_cap || o->scope_map[vs] == CG_SCOPE_NONE) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(o->c, loc, "opt replay: unmapped vscope %u", (unsigned)vs);
+ }
+ return o->scope_map[vs];
+}
+
+static Operand xlat_op(OptImpl* o, Operand op) {
+ switch ((OpKind)op.kind) {
+ case OPK_IMM:
+ case OPK_GLOBAL:
+ return op;
+ case OPK_REG:
+ op.v.reg = xlat_reg(o, op.v.reg);
+ return op;
+ case OPK_LOCAL:
+ op.v.frame_slot = xlat_slot(o, op.v.frame_slot);
+ return op;
+ case OPK_INDIRECT:
+ op.v.ind.base = xlat_reg(o, op.v.ind.base);
+ return op;
+ }
+ /* unreachable */
+ return op;
+}
+
+static CGABIValue xlat_abivalue(OptImpl* o, const CGABIValue* in,
+ CGABIPart* parts_out) {
+ CGABIValue out = *in;
+ out.storage = xlat_op(o, in->storage);
+ if (in->nparts && parts_out) {
+ for (u32 i = 0; i < in->nparts; ++i) {
+ parts_out[i] = in->parts[i];
+ parts_out[i].op = xlat_op(o, in->parts[i].op);
+ }
+ out.parts = parts_out;
+ }
+ return out;
+}
+
+/* ---- replay ---- */
+
+static void replay(OptImpl* o) {
+ CGTarget* w = o->target;
+
+ /* Pre-size the maps to the high-water mark for this function. */
+ if (o->next_vreg > 1) map_reg_grow(o, o->next_vreg);
+ if (o->next_vslot > 1) map_slot_grow(o, o->next_vslot);
+ if (o->next_vlabel > 1) map_label_grow(o, o->next_vlabel);
+ if (o->next_vscope > 1) map_scope_grow(o, o->next_vscope);
+
+ for (u32 i = 0; i < o->ntape; ++i) {
+ TapeEntry* e = &o->tape[i];
+ if (e->dead) continue;
+ switch ((TapeOpKind)e->op) {
+ case TOP_FUNC_BEGIN: {
+ /* Build a fresh CGFuncDesc with translated param slots. */
+ CGFuncDesc fd = e->u.func_begin.desc;
+ if (fd.nparams) {
+ CGParamDesc* params = arena_array(o->c->tu, CGParamDesc, fd.nparams);
+ for (u32 k = 0; k < fd.nparams; ++k) {
+ params[k] = e->u.func_begin.params[k];
+ params[k].slot = xlat_slot(o, e->u.func_begin.params[k].slot);
+ }
+ fd.params = params;
+ }
+ w->func_begin(w, &fd);
+ break;
+ }
+ case TOP_FUNC_END:
+ w->func_end(w);
+ break;
+ case TOP_ALLOC_REG: {
+ Reg r =
+ w->alloc_reg(w, e->u.alloc_reg.cls, e->u.alloc_reg.ty);
+ Reg v = e->u.alloc_reg.vreg;
+ if (v >= o->reg_map_cap) map_reg_grow(o, v + 1);
+ o->reg_map[v] = r;
+ break;
+ }
+ case TOP_FRAME_SLOT: {
+ FrameSlot s = w->frame_slot(w, &e->u.frame_slot.desc);
+ FrameSlot v = e->u.frame_slot.vslot;
+ if (v >= o->slot_map_cap) map_slot_grow(o, v + 1);
+ o->slot_map[v] = s;
+ break;
+ }
+ case TOP_PARAM: {
+ CGParamDesc d = e->u.param.desc;
+ d.slot = xlat_slot(o, d.slot);
+ w->param(w, &d);
+ break;
+ }
+ case TOP_LABEL_NEW: {
+ Label l = w->label_new(w);
+ Label v = e->u.label_new.vlabel;
+ if (v >= o->label_map_cap) map_label_grow(o, v + 1);
+ o->label_map[v] = l;
+ break;
+ }
+ case TOP_LABEL_PLACE:
+ w->label_place(w, xlat_label(o, e->u.label_op.vlabel));
+ break;
+ case TOP_JUMP:
+ w->jump(w, xlat_label(o, e->u.label_op.vlabel));
+ break;
+ case TOP_CMP_BRANCH:
+ w->cmp_branch(w, e->u.cmp_branch.op, xlat_op(o, e->u.cmp_branch.a),
+ xlat_op(o, e->u.cmp_branch.b),
+ xlat_label(o, e->u.cmp_branch.vlabel));
+ break;
+ case TOP_SCOPE_BEGIN: {
+ CGScopeDesc d = e->u.scope_begin.desc;
+ d.cond = xlat_op(o, d.cond);
+ d.break_label = xlat_label(o, d.break_label);
+ d.continue_label = xlat_label(o, d.continue_label);
+ CGScope s = w->scope_begin(w, &d);
+ CGScope v = e->u.scope_begin.vscope;
+ if (v >= o->scope_map_cap) map_scope_grow(o, v + 1);
+ o->scope_map[v] = s;
+ break;
+ }
+ case TOP_SCOPE_ELSE:
+ w->scope_else(w, xlat_scope(o, e->u.scope_op.vscope));
+ break;
+ case TOP_SCOPE_END:
+ w->scope_end(w, xlat_scope(o, e->u.scope_op.vscope));
+ break;
+ case TOP_BREAK_TO:
+ w->break_to(w, xlat_scope(o, e->u.scope_op.vscope));
+ break;
+ case TOP_CONTINUE_TO:
+ w->continue_to(w, xlat_scope(o, e->u.scope_op.vscope));
+ break;
+ case TOP_LOAD_IMM:
+ w->load_imm(w, xlat_op(o, e->u.load_imm.dst), e->u.load_imm.imm);
+ break;
+ case TOP_LOAD_CONST:
+ w->load_const(w, xlat_op(o, e->u.load_const.dst), e->u.load_const.cb);
+ break;
+ case TOP_COPY:
+ w->copy(w, xlat_op(o, e->u.copy.dst), xlat_op(o, e->u.copy.src));
+ break;
+ case TOP_LOAD:
+ w->load(w, xlat_op(o, e->u.load.dst), xlat_op(o, e->u.load.addr),
+ e->u.load.mem);
+ break;
+ case TOP_STORE:
+ w->store(w, xlat_op(o, e->u.store.addr), xlat_op(o, e->u.store.src),
+ e->u.store.mem);
+ break;
+ case TOP_ADDR_OF:
+ w->addr_of(w, xlat_op(o, e->u.copy.dst), xlat_op(o, e->u.copy.src));
+ break;
+ case TOP_TLS_ADDR_OF:
+ w->tls_addr_of(w, xlat_op(o, e->u.tls_addr_of.dst),
+ e->u.tls_addr_of.sym, e->u.tls_addr_of.addend);
+ break;
+ case TOP_COPY_BYTES:
+ w->copy_bytes(w, xlat_op(o, e->u.agg.a), xlat_op(o, e->u.agg.b),
+ e->u.agg.agg);
+ break;
+ case TOP_SET_BYTES:
+ w->set_bytes(w, xlat_op(o, e->u.agg.a), xlat_op(o, e->u.agg.b),
+ e->u.agg.agg);
+ break;
+ case TOP_BITFIELD_LOAD:
+ w->bitfield_load(w, xlat_op(o, e->u.bitfield_load.dst),
+ xlat_op(o, e->u.bitfield_load.record),
+ e->u.bitfield_load.bf);
+ break;
+ case TOP_BITFIELD_STORE:
+ w->bitfield_store(w, xlat_op(o, e->u.bitfield_store.record),
+ xlat_op(o, e->u.bitfield_store.src),
+ e->u.bitfield_store.bf);
+ break;
+ case TOP_BINOP:
+ w->binop(w, e->u.binop.op, xlat_op(o, e->u.binop.dst),
+ xlat_op(o, e->u.binop.a), xlat_op(o, e->u.binop.b));
+ break;
+ case TOP_UNOP:
+ w->unop(w, e->u.unop.op, xlat_op(o, e->u.unop.dst),
+ xlat_op(o, e->u.unop.a));
+ break;
+ case TOP_CMP:
+ w->cmp(w, e->u.cmp.op, xlat_op(o, e->u.cmp.dst),
+ xlat_op(o, e->u.cmp.a), xlat_op(o, e->u.cmp.b));
+ break;
+ case TOP_CONVERT:
+ w->convert(w, e->u.convert.kind, xlat_op(o, e->u.convert.dst),
+ xlat_op(o, e->u.convert.src));
+ break;
+ case TOP_CALL: {
+ CGCallDesc cd = e->u.call.desc;
+ cd.callee = xlat_op(o, cd.callee);
+ CGABIValue* args = NULL;
+ if (cd.nargs) {
+ args = arena_array(o->c->tu, CGABIValue, cd.nargs);
+ for (u32 k = 0; k < cd.nargs; ++k) {
+ CGABIPart* parts =
+ e->u.call.args[k].nparts
+ ? arena_array(o->c->tu, CGABIPart,
+ e->u.call.args[k].nparts)
+ : NULL;
+ args[k] = xlat_abivalue(o, &e->u.call.args[k], parts);
+ }
+ cd.args = args;
+ } else {
+ cd.args = NULL;
+ }
+ CGABIPart* ret_parts =
+ cd.ret.nparts
+ ? arena_array(o->c->tu, CGABIPart, cd.ret.nparts)
+ : NULL;
+ cd.ret = xlat_abivalue(o, &e->u.call.desc.ret, ret_parts);
+ w->call(w, &cd);
+ break;
+ }
+ case TOP_RET: {
+ if (!e->u.ret.present) {
+ w->ret(w, NULL);
+ break;
+ }
+ CGABIPart* parts =
+ e->u.ret.val.nparts
+ ? arena_array(o->c->tu, CGABIPart, e->u.ret.val.nparts)
+ : NULL;
+ CGABIValue v = xlat_abivalue(o, &e->u.ret.val, parts);
+ w->ret(w, &v);
+ break;
+ }
+ case TOP_ALLOCA:
+ w->alloca_(w, xlat_op(o, e->u.alloca_.dst),
+ xlat_op(o, e->u.alloca_.size), e->u.alloca_.align);
+ break;
+ case TOP_VA_START:
+ w->va_start_(w, xlat_op(o, e->u.va_se.ap));
+ break;
+ case TOP_VA_ARG:
+ w->va_arg_(w, xlat_op(o, e->u.va_arg_.dst),
+ xlat_op(o, e->u.va_arg_.ap), e->u.va_arg_.ty);
+ break;
+ case TOP_VA_END:
+ w->va_end_(w, xlat_op(o, e->u.va_se.ap));
+ break;
+ case TOP_VA_COPY:
+ w->va_copy_(w, xlat_op(o, e->u.copy.dst), xlat_op(o, e->u.copy.src));
+ break;
+ case TOP_SETJMP:
+ w->setjmp_(w, xlat_op(o, e->u.setjmp_.dst),
+ xlat_op(o, e->u.setjmp_.buf));
+ break;
+ case TOP_LONGJMP:
+ w->longjmp_(w, xlat_op(o, e->u.longjmp_.buf),
+ xlat_op(o, e->u.longjmp_.val));
+ break;
+ case TOP_ATOMIC_LOAD:
+ w->atomic_load(w, xlat_op(o, e->u.atomic_load.dst),
+ xlat_op(o, e->u.atomic_load.addr),
+ e->u.atomic_load.mem, e->u.atomic_load.mo);
+ break;
+ case TOP_ATOMIC_STORE:
+ w->atomic_store(w, xlat_op(o, e->u.atomic_store.addr),
+ xlat_op(o, e->u.atomic_store.src),
+ e->u.atomic_store.mem, e->u.atomic_store.mo);
+ break;
+ case TOP_ATOMIC_RMW:
+ w->atomic_rmw(w, e->u.atomic_rmw.op, xlat_op(o, e->u.atomic_rmw.dst),
+ xlat_op(o, e->u.atomic_rmw.addr),
+ xlat_op(o, e->u.atomic_rmw.val), e->u.atomic_rmw.mem,
+ e->u.atomic_rmw.mo);
+ break;
+ case TOP_ATOMIC_CAS:
+ w->atomic_cas(w, xlat_op(o, e->u.atomic_cas.prior),
+ xlat_op(o, e->u.atomic_cas.ok),
+ xlat_op(o, e->u.atomic_cas.addr),
+ xlat_op(o, e->u.atomic_cas.expected),
+ xlat_op(o, e->u.atomic_cas.desired),
+ e->u.atomic_cas.mem, e->u.atomic_cas.success,
+ e->u.atomic_cas.failure);
+ break;
+ case TOP_FENCE:
+ w->fence(w, e->u.fence.mo);
+ break;
+ case TOP_INTRINSIC: {
+ Operand* dsts = NULL;
+ Operand* args = NULL;
+ if (e->u.intrinsic.ndst) {
+ dsts = arena_array(o->c->tu, Operand, e->u.intrinsic.ndst);
+ for (u32 k = 0; k < e->u.intrinsic.ndst; ++k) {
+ dsts[k] = xlat_op(o, e->u.intrinsic.dsts[k]);
+ }
+ }
+ if (e->u.intrinsic.narg) {
+ args = arena_array(o->c->tu, Operand, e->u.intrinsic.narg);
+ for (u32 k = 0; k < e->u.intrinsic.narg; ++k) {
+ args[k] = xlat_op(o, e->u.intrinsic.args[k]);
+ }
+ }
+ w->intrinsic(w, e->u.intrinsic.kind, dsts, e->u.intrinsic.ndst, args,
+ e->u.intrinsic.narg);
+ break;
+ }
+ case TOP_SET_LOC:
+ w->set_loc(w, e->u.set_loc.loc);
+ break;
+ }
+ }
+}
+
+/* ---- printer ---- */
+
+static void wstr(Writer* w, const char* s) {
+ size_t n = 0;
+ while (s[n]) ++n;
+ if (n) w->write(w, s, n);
+}
+
+/* Minimal i64 → decimal formatter. Writes into a 32-byte buffer (enough
+ * for INT64_MIN). Returns nothing; the caller hands the buffer to wstr. */
+static void fmt_i64(i64 v, char* out) {
+ char tmp[32];
+ u32 n = 0;
+ u64 u;
+ int neg = 0;
+ if (v < 0) {
+ neg = 1;
+ u = (u64)(-(v + 1)) + 1u; /* avoid UB for INT64_MIN */
+ } else {
+ u = (u64)v;
+ }
+ do {
+ tmp[n++] = (char)('0' + (u % 10u));
+ u /= 10u;
+ } while (u);
+ if (neg) tmp[n++] = '-';
+ /* reverse */
+ for (u32 i = 0; i < n; ++i) out[i] = tmp[n - 1 - i];
+ out[n] = 0;
+}
+
+static void wint(Writer* w, i64 v) {
+ char buf[32];
+ fmt_i64(v, buf);
+ wstr(w, buf);
+}
+
+static const char* binop_name(BinOp op) {
+ switch (op) {
+ case BO_IADD: return "iadd";
+ case BO_ISUB: return "isub";
+ case BO_IMUL: return "imul";
+ case BO_SDIV: return "sdiv";
+ case BO_UDIV: return "udiv";
+ case BO_SREM: return "srem";
+ case BO_UREM: return "urem";
+ case BO_FADD: return "fadd";
+ case BO_FSUB: return "fsub";
+ case BO_FMUL: return "fmul";
+ case BO_FDIV: return "fdiv";
+ case BO_AND: return "and";
+ case BO_OR: return "or";
+ case BO_XOR: return "xor";
+ case BO_SHL: return "shl";
+ case BO_SHR_S: return "shr_s";
+ case BO_SHR_U: return "shr_u";
+ }
+ return "?binop";
+}
+
+static const char* unop_name(UnOp op) {
+ switch (op) {
+ case UO_NEG: return "neg";
+ case UO_NOT: return "not";
+ case UO_BNOT: return "bnot";
+ }
+ return "?unop";
+}
+
+static const char* cmp_name(CmpOp op) {
+ switch (op) {
+ case CMP_EQ: return "eq";
+ case CMP_NE: return "ne";
+ case CMP_LT_S: return "lt_s";
+ case CMP_LE_S: return "le_s";
+ case CMP_GT_S: return "gt_s";
+ case CMP_GE_S: return "ge_s";
+ case CMP_LT_U: return "lt_u";
+ case CMP_LE_U: return "le_u";
+ case CMP_GT_U: return "gt_u";
+ case CMP_GE_U: return "ge_u";
+ case CMP_LT_F: return "lt_f";
+ case CMP_LE_F: return "le_f";
+ case CMP_GT_F: return "gt_f";
+ case CMP_GE_F: return "ge_f";
+ }
+ return "?cmp";
+}
+
+static void print_operand(Writer* w, const Operand* op) {
+ switch ((OpKind)op->kind) {
+ case OPK_IMM:
+ wstr(w, "imm:");
+ wint(w, op->v.imm);
+ return;
+ case OPK_REG:
+ wstr(w, "v");
+ wint(w, (i64)op->v.reg);
+ return;
+ case OPK_LOCAL:
+ wstr(w, "fs");
+ wint(w, (i64)op->v.frame_slot);
+ return;
+ case OPK_GLOBAL:
+ wstr(w, "sym");
+ wint(w, (i64)op->v.global.sym);
+ if (op->v.global.addend) {
+ wstr(w, "+");
+ wint(w, op->v.global.addend);
+ }
+ return;
+ case OPK_INDIRECT:
+ wstr(w, "[v");
+ wint(w, (i64)op->v.ind.base);
+ if (op->v.ind.ofs) {
+ wstr(w, "+");
+ wint(w, op->v.ind.ofs);
+ }
+ wstr(w, "]");
+ return;
+ }
+ wstr(w, "?op");
+}
+
+static void print_tape(OptImpl* o, Writer* w) {
+ for (u32 i = 0; i < o->ntape; ++i) {
+ TapeEntry* e = &o->tape[i];
+ if (e->dead) {
+ wstr(w, " ; dead\n");
+ continue;
+ }
+ wstr(w, " ");
+ switch ((TapeOpKind)e->op) {
+ case TOP_FUNC_BEGIN:
+ wstr(w, "func_begin sym=");
+ wint(w, (i64)e->u.func_begin.desc.sym);
+ wstr(w, " nparams=");
+ wint(w, (i64)e->u.func_begin.desc.nparams);
+ break;
+ case TOP_FUNC_END:
+ wstr(w, "func_end");
+ break;
+ case TOP_ALLOC_REG:
+ wstr(w, "alloc_reg v");
+ wint(w, (i64)e->u.alloc_reg.vreg);
+ wstr(w, " cls=");
+ wint(w, (i64)e->u.alloc_reg.cls);
+ break;
+ case TOP_FRAME_SLOT:
+ wstr(w, "frame_slot fs");
+ wint(w, (i64)e->u.frame_slot.vslot);
+ wstr(w, " size=");
+ wint(w, (i64)e->u.frame_slot.desc.size);
+ wstr(w, " kind=");
+ wint(w, (i64)e->u.frame_slot.desc.kind);
+ break;
+ case TOP_PARAM:
+ wstr(w, "param idx=");
+ wint(w, (i64)e->u.param.desc.index);
+ wstr(w, " fs=");
+ wint(w, (i64)e->u.param.desc.slot);
+ break;
+ case TOP_LABEL_NEW:
+ wstr(w, "label_new L");
+ wint(w, (i64)e->u.label_new.vlabel);
+ break;
+ case TOP_LABEL_PLACE:
+ wstr(w, "label_place L");
+ wint(w, (i64)e->u.label_op.vlabel);
+ break;
+ case TOP_JUMP:
+ wstr(w, "jump L");
+ wint(w, (i64)e->u.label_op.vlabel);
+ break;
+ case TOP_CMP_BRANCH:
+ wstr(w, "cmp_branch ");
+ wstr(w, cmp_name(e->u.cmp_branch.op));
+ wstr(w, " ");
+ print_operand(w, &e->u.cmp_branch.a);
+ wstr(w, ", ");
+ print_operand(w, &e->u.cmp_branch.b);
+ wstr(w, " -> L");
+ wint(w, (i64)e->u.cmp_branch.vlabel);
+ break;
+ case TOP_SCOPE_BEGIN:
+ wstr(w, "scope_begin S");
+ wint(w, (i64)e->u.scope_begin.vscope);
+ wstr(w, " kind=");
+ wint(w, (i64)e->u.scope_begin.desc.kind);
+ break;
+ case TOP_SCOPE_ELSE:
+ wstr(w, "scope_else S");
+ wint(w, (i64)e->u.scope_op.vscope);
+ break;
+ case TOP_SCOPE_END:
+ wstr(w, "scope_end S");
+ wint(w, (i64)e->u.scope_op.vscope);
+ break;
+ case TOP_BREAK_TO:
+ wstr(w, "break_to S");
+ wint(w, (i64)e->u.scope_op.vscope);
+ break;
+ case TOP_CONTINUE_TO:
+ wstr(w, "continue_to S");
+ wint(w, (i64)e->u.scope_op.vscope);
+ break;
+ case TOP_LOAD_IMM:
+ wstr(w, "load_imm ");
+ print_operand(w, &e->u.load_imm.dst);
+ wstr(w, ", ");
+ wint(w, e->u.load_imm.imm);
+ break;
+ case TOP_LOAD_CONST:
+ wstr(w, "load_const ");
+ print_operand(w, &e->u.load_const.dst);
+ wstr(w, ", <bytes:");
+ wint(w, (i64)e->u.load_const.cb.size);
+ wstr(w, ">");
+ break;
+ case TOP_COPY:
+ wstr(w, "copy ");
+ print_operand(w, &e->u.copy.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.copy.src);
+ break;
+ case TOP_LOAD:
+ wstr(w, "load ");
+ print_operand(w, &e->u.load.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.load.addr);
+ break;
+ case TOP_STORE:
+ wstr(w, "store ");
+ print_operand(w, &e->u.store.addr);
+ wstr(w, ", ");
+ print_operand(w, &e->u.store.src);
+ break;
+ case TOP_ADDR_OF:
+ wstr(w, "addr_of ");
+ print_operand(w, &e->u.copy.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.copy.src);
+ break;
+ case TOP_TLS_ADDR_OF:
+ wstr(w, "tls_addr_of ");
+ print_operand(w, &e->u.tls_addr_of.dst);
+ wstr(w, ", sym");
+ wint(w, (i64)e->u.tls_addr_of.sym);
+ break;
+ case TOP_COPY_BYTES:
+ wstr(w, "copy_bytes ");
+ print_operand(w, &e->u.agg.a);
+ wstr(w, ", ");
+ print_operand(w, &e->u.agg.b);
+ wstr(w, " size=");
+ wint(w, (i64)e->u.agg.agg.size);
+ break;
+ case TOP_SET_BYTES:
+ wstr(w, "set_bytes ");
+ print_operand(w, &e->u.agg.a);
+ wstr(w, ", ");
+ print_operand(w, &e->u.agg.b);
+ wstr(w, " size=");
+ wint(w, (i64)e->u.agg.agg.size);
+ break;
+ case TOP_BITFIELD_LOAD:
+ wstr(w, "bitfield_load ");
+ print_operand(w, &e->u.bitfield_load.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.bitfield_load.record);
+ break;
+ case TOP_BITFIELD_STORE:
+ wstr(w, "bitfield_store ");
+ print_operand(w, &e->u.bitfield_store.record);
+ wstr(w, ", ");
+ print_operand(w, &e->u.bitfield_store.src);
+ break;
+ case TOP_BINOP:
+ wstr(w, binop_name(e->u.binop.op));
+ wstr(w, " ");
+ print_operand(w, &e->u.binop.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.binop.a);
+ wstr(w, ", ");
+ print_operand(w, &e->u.binop.b);
+ break;
+ case TOP_UNOP:
+ wstr(w, unop_name(e->u.unop.op));
+ wstr(w, " ");
+ print_operand(w, &e->u.unop.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.unop.a);
+ break;
+ case TOP_CMP:
+ wstr(w, "cmp.");
+ wstr(w, cmp_name(e->u.cmp.op));
+ wstr(w, " ");
+ print_operand(w, &e->u.cmp.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.cmp.a);
+ wstr(w, ", ");
+ print_operand(w, &e->u.cmp.b);
+ break;
+ case TOP_CONVERT:
+ wstr(w, "convert ");
+ print_operand(w, &e->u.convert.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.convert.src);
+ wstr(w, " kind=");
+ wint(w, (i64)e->u.convert.kind);
+ break;
+ case TOP_CALL:
+ wstr(w, "call ");
+ print_operand(w, &e->u.call.desc.callee);
+ wstr(w, " nargs=");
+ wint(w, (i64)e->u.call.desc.nargs);
+ break;
+ case TOP_RET:
+ wstr(w, "ret");
+ if (e->u.ret.present) {
+ wstr(w, " ");
+ print_operand(w, &e->u.ret.val.storage);
+ }
+ break;
+ case TOP_ALLOCA:
+ wstr(w, "alloca ");
+ print_operand(w, &e->u.alloca_.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.alloca_.size);
+ break;
+ case TOP_VA_START:
+ wstr(w, "va_start ");
+ print_operand(w, &e->u.va_se.ap);
+ break;
+ case TOP_VA_ARG:
+ wstr(w, "va_arg ");
+ print_operand(w, &e->u.va_arg_.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.va_arg_.ap);
+ break;
+ case TOP_VA_END:
+ wstr(w, "va_end ");
+ print_operand(w, &e->u.va_se.ap);
+ break;
+ case TOP_VA_COPY:
+ wstr(w, "va_copy ");
+ print_operand(w, &e->u.copy.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.copy.src);
+ break;
+ case TOP_SETJMP:
+ wstr(w, "setjmp ");
+ print_operand(w, &e->u.setjmp_.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.setjmp_.buf);
+ break;
+ case TOP_LONGJMP:
+ wstr(w, "longjmp ");
+ print_operand(w, &e->u.longjmp_.buf);
+ wstr(w, ", ");
+ print_operand(w, &e->u.longjmp_.val);
+ break;
+ case TOP_ATOMIC_LOAD:
+ wstr(w, "atomic_load ");
+ print_operand(w, &e->u.atomic_load.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.atomic_load.addr);
+ break;
+ case TOP_ATOMIC_STORE:
+ wstr(w, "atomic_store ");
+ print_operand(w, &e->u.atomic_store.addr);
+ wstr(w, ", ");
+ print_operand(w, &e->u.atomic_store.src);
+ break;
+ case TOP_ATOMIC_RMW:
+ wstr(w, "atomic_rmw op=");
+ wint(w, (i64)e->u.atomic_rmw.op);
+ wstr(w, " ");
+ print_operand(w, &e->u.atomic_rmw.dst);
+ wstr(w, ", ");
+ print_operand(w, &e->u.atomic_rmw.addr);
+ wstr(w, ", ");
+ print_operand(w, &e->u.atomic_rmw.val);
+ break;
+ case TOP_ATOMIC_CAS:
+ wstr(w, "atomic_cas prior=");
+ print_operand(w, &e->u.atomic_cas.prior);
+ wstr(w, " ok=");
+ print_operand(w, &e->u.atomic_cas.ok);
+ wstr(w, " addr=");
+ print_operand(w, &e->u.atomic_cas.addr);
+ break;
+ case TOP_FENCE:
+ wstr(w, "fence mo=");
+ wint(w, (i64)e->u.fence.mo);
+ break;
+ case TOP_INTRINSIC:
+ wstr(w, "intrinsic kind=");
+ wint(w, (i64)e->u.intrinsic.kind);
+ wstr(w, " ndst=");
+ wint(w, (i64)e->u.intrinsic.ndst);
+ wstr(w, " narg=");
+ wint(w, (i64)e->u.intrinsic.narg);
+ break;
+ case TOP_SET_LOC:
+ wstr(w, "set_loc ");
+ wint(w, (i64)e->u.set_loc.loc.line);
+ wstr(w, ":");
+ wint(w, (i64)e->u.set_loc.loc.col);
+ break;
+ }
+ wstr(w, "\n");
+ }
+}
+
+/* ---- Phase 2 peephole: integer constant folding ----
+ *
+ * Pattern: LOAD_IMM(V_a, k_a); LOAD_IMM(V_b, k_b); BINOP(op, V_d, V_a, V_b)
+ * with op ∈ {IADD, ISUB, IMUL}.
+ * After: the BINOP is rewritten to LOAD_IMM(V_d, k_a OP k_b).
+ *
+ * Both operands must be OPK_REG referencing wrapper vregs whose only
+ * recorded definition was a LOAD_IMM. The intermediate LOAD_IMMs are
+ * left in place — they may have other uses, and DCE is a Phase 3
+ * concern.
+ *
+ * Folding is done in 64-bit signed arithmetic and truncated by the
+ * target's load_imm based on the destination type. This matches C11
+ * §6.5/3 ("two's-complement wraparound at the abstract machine level
+ * for signed and unsigned integer types alike" per cfree's no-UB
+ * stance — see doc/DESIGN.md §9). */
+
+typedef struct ImmInfo {
+ i64 val;
+ u8 known;
+} ImmInfo;
+
+static void peephole_constfold(OptImpl* o) {
+ ImmInfo* imm;
+ u32 cap;
+
+ if (o->next_vreg <= 1) return;
+ cap = o->next_vreg;
+ imm = arena_zarray(o->c->tu, ImmInfo, cap);
+
+ for (u32 i = 0; i < o->ntape; ++i) {
+ TapeEntry* e = &o->tape[i];
+ if (e->dead) continue;
+ switch ((TapeOpKind)e->op) {
+ case TOP_LOAD_IMM:
+ if (e->u.load_imm.dst.kind == OPK_REG) {
+ Reg r = e->u.load_imm.dst.v.reg;
+ if (r < cap) {
+ imm[r].val = e->u.load_imm.imm;
+ imm[r].known = 1;
+ }
+ }
+ break;
+ case TOP_BINOP: {
+ Operand a = e->u.binop.a;
+ Operand b = e->u.binop.b;
+ BinOp op = e->u.binop.op;
+ if (a.kind != OPK_REG || b.kind != OPK_REG) break;
+ if (a.v.reg >= cap || b.v.reg >= cap) break;
+ if (!imm[a.v.reg].known || !imm[b.v.reg].known) break;
+ if (op != BO_IADD && op != BO_ISUB && op != BO_IMUL) break;
+
+ i64 av = imm[a.v.reg].val;
+ i64 bv = imm[b.v.reg].val;
+ u64 folded;
+ /* Compute in u64 to make wraparound deterministic, then cast
+ * back. cfree's no-UB stance forbids signed-overflow-is-UB
+ * exploitation (doc/DESIGN.md §9), so this is the right shape. */
+ switch (op) {
+ case BO_IADD: folded = (u64)av + (u64)bv; break;
+ case BO_ISUB: folded = (u64)av - (u64)bv; break;
+ case BO_IMUL: folded = (u64)av * (u64)bv; break;
+ default: continue;
+ }
+
+ Operand dst = e->u.binop.dst;
+ memset(&e->u, 0, sizeof e->u);
+ e->op = (u8)TOP_LOAD_IMM;
+ e->u.load_imm.dst = dst;
+ e->u.load_imm.imm = (i64)folded;
+ if (dst.kind == OPK_REG && dst.v.reg < cap) {
+ imm[dst.v.reg].val = (i64)folded;
+ imm[dst.v.reg].known = 1;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+/* ---- func_end: append TOP_FUNC_END, run peepholes, replay ---- */
+
+static void w_func_end(CGTarget* t) {
+ OptImpl* o = impl_of(t);
+ tape_append(o, TOP_FUNC_END);
+ peephole_constfold(o);
+ if (o->dump_writer) print_tape(o, o->dump_writer);
+ replay(o);
+}
+
+/* ---- public API: dump writer ---- */
+
+void opt_set_dump_writer(CGTarget* t, Writer* w) {
+ /* Identify our own targets by the func_begin slot. Anything else is
+ * a non-opt CGTarget and the call is a silent no-op. */
+ if (!t || t->func_begin != w_func_begin) return;
+ impl_of(t)->dump_writer = w;
+}
+
+/* ---- end-of-TU and destruction ---- */
+
+static void w_finalize(CGTarget* t) {
+ CGTarget* wr = impl_of(t)->target;
+ if (wr->finalize) wr->finalize(wr);
+}
+
+static void w_destroy(CGTarget* t) {
+ CGTarget* wr = impl_of(t)->target;
+ if (wr->destroy) wr->destroy(wr);
+}
+
+/* ---- construction ---- */
+
+CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) {
+ OptImpl* o;
+ CGTarget* t;
+
+ if (!target) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(c, loc, "opt_cgtarget_new: target is NULL");
+ }
+ if (level < 1 || level > 2) {
+ SrcLoc loc = {0, 0, 0};
+ compiler_panic(c, loc, "opt_cgtarget_new: level %d out of range [1, 2]",
+ level);
+ }
+
+ o = arena_new(c->tu, OptImpl);
+ memset(o, 0, sizeof *o);
+ o->c = c;
+ o->target = target;
+ o->level = level;
+
+ t = &o->base;
+ t->c = c;
+ t->obj = target->obj;
+ t->mc = target->mc;
+ t->debug = target->debug;
+
+ t->func_begin = w_func_begin;
+ t->func_end = w_func_end;
+
+ t->alloc_reg = w_alloc_reg;
+ t->free_reg = w_free_reg;
+ t->frame_slot = w_frame_slot;
+ t->param = w_param;
+ t->clobbers = w_clobbers;
+ t->spill_reg = w_spill_reg;
+ t->reload_reg = w_reload_reg;
+
+ t->label_new = w_label_new;
+ t->label_place = w_label_place;
+ t->jump = w_jump;
+ t->cmp_branch = w_cmp_branch;
+
+ t->scope_begin = w_scope_begin;
+ t->scope_else = w_scope_else;
+ t->scope_end = w_scope_end;
+ t->break_to = w_break_to;
+ t->continue_to = w_continue_to;
+
+ t->load_imm = w_load_imm;
+ t->load_const = w_load_const;
+ t->copy = w_copy;
+ t->load = w_load;
+ t->store = w_store;
+ t->addr_of = w_addr_of;
+ t->tls_addr_of = w_tls_addr_of;
+ t->copy_bytes = w_copy_bytes;
+ t->set_bytes = w_set_bytes;
+ t->bitfield_load = w_bitfield_load;
+ t->bitfield_store = w_bitfield_store;
+
+ t->binop = w_binop;
+ t->unop = w_unop;
+ t->cmp = w_cmp;
+ t->convert = w_convert;
+
+ t->call = w_call;
+ t->ret = w_ret;
+
+ t->alloca_ = w_alloca_;
+ t->va_start_ = w_va_start_;
+ t->va_arg_ = w_va_arg_;
+ t->va_end_ = w_va_end_;
+ t->va_copy_ = w_va_copy_;
+
+ t->setjmp_ = target->setjmp_ ? w_setjmp_ : NULL;
+ t->longjmp_ = target->longjmp_ ? w_longjmp_ : NULL;
+
+ t->atomic_load = w_atomic_load;
+ t->atomic_store = w_atomic_store;
+ t->atomic_rmw = w_atomic_rmw;
+ t->atomic_cas = w_atomic_cas;
+ t->fence = w_fence;
+
+ t->intrinsic = w_intrinsic;
+ t->asm_block = w_asm_block;
+
+ t->set_loc = w_set_loc;
+ t->finalize = w_finalize;
+ t->destroy = w_destroy;
+
+ return t;
+}
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -77,4 +77,10 @@ void opt_dce(Func*); /* post-RA DCE */
* needs. Stamps each emitted insn's SrcLoc onto target via CGTarget.set_loc. */
void opt_emit(Compiler*, Func*, CGTarget* target);
+/* When set, the wrapper writes a textual dump of each function's recorded
+ * tape to `w` on func_end, immediately before replay. Pass `w == NULL` to
+ * disable. The format is line-oriented and stable enough for golden-file
+ * diffs but otherwise unspecified. No-op if `t` is not an opt_cgtarget. */
+void opt_set_dump_writer(CGTarget* t, Writer* w);
+
#endif
diff --git a/test/cg/harness/cg_runner.c b/test/cg/harness/cg_runner.c
@@ -31,8 +31,15 @@
#include "debug/debug.h"
#include "link/link.h"
#include "obj/obj.h"
+#include "opt/opt.h"
#include "type/type.h"
+/* --opt-level N: wrap the constructed CGTarget with opt_cgtarget_new(level)
+ * before each case runs. 0 (default) drives the backend directly; 1 / 2
+ * exercise the opt pipeline. The corpus is the equivalence oracle — every
+ * case's exit code at level 0 must match levels 1 / 2. */
+static int g_opt_level = 0;
+
/* ---- env ---- */
static void* h_alloc(CfreeHeap* h, size_t n, size_t a) {
@@ -265,6 +272,9 @@ static int build_case(BuildState* st, const CgCase* cc) {
if (cc->kind != CG_CASE_MC_ONLY) {
st->target = cgtarget_new(c, st->ob, st->mc);
+ if (g_opt_level > 0) {
+ st->target = opt_cgtarget_new(c, st->target, g_opt_level);
+ }
} else {
st->target = NULL;
}
@@ -328,6 +338,96 @@ static int mode_expected(const char* name) {
return 0;
}
+/* CfreeWriter that wraps stdout; used by --dump-tape. */
+typedef struct StdoutWriter {
+ CfreeWriter base;
+} StdoutWriter;
+
+static void sw_write(CfreeWriter* w, const void* data, size_t n) {
+ (void)w;
+ fwrite(data, 1, n, stdout);
+}
+static void sw_seek(CfreeWriter* w, uint64_t off) {
+ (void)w;
+ (void)off;
+}
+static uint64_t sw_tell(CfreeWriter* w) {
+ (void)w;
+ return 0;
+}
+static int sw_error(CfreeWriter* w) {
+ (void)w;
+ return 0;
+}
+static void sw_close(CfreeWriter* w) { (void)w; }
+
+static StdoutWriter g_stdout_writer = {{sw_write, sw_seek, sw_tell, sw_error,
+ sw_close}};
+
+/* --dump-tape NAME — build the case at the current --opt-level (must be
+ * >= 1) and print each function's recorded tape to stdout instead of
+ * just running the equivalence path. Useful for ad-hoc inspection and
+ * golden-file diffs. */
+static int mode_dump_tape(const char* name) {
+ const CgCase* cc = find_case(name);
+ if (!cc) {
+ fprintf(stderr, "cg-runner: unknown case '%s'\n", name);
+ return 2;
+ }
+ if (g_opt_level < 1) {
+ fprintf(stderr, "cg-runner: --dump-tape requires --opt-level >= 1\n");
+ return 2;
+ }
+
+ CfreeTarget target;
+ target_aarch64_linux(&target);
+ CfreeEnv env;
+ memset(&env, 0, sizeof env);
+ env.heap = &g_heap;
+ env.diag = &g_diag;
+ env.execmem = &g_execmem;
+ env.now = -1;
+
+ CfreeCompiler* cc_ = cfree_compiler_new(target, &env);
+ if (!cc_) return 2;
+
+ BuildState st;
+ memset(&st, 0, sizeof st);
+ st.c = (Compiler*)cc_;
+
+ /* Pre-empt build_case so we can install the dump writer before the
+ * case runs through func_begin/func_end. */
+ Compiler* c = st.c;
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free(cc_);
+ return 1;
+ }
+ st.ob = obj_new(c);
+ st.mc = mc_new(c, st.ob);
+ st.target = cgtarget_new(c, st.ob, st.mc);
+ st.target = opt_cgtarget_new(c, st.target, g_opt_level);
+ opt_set_dump_writer(st.target, &g_stdout_writer.base);
+
+ Sym text_name = pool_intern_cstr(c->global, ".text");
+ ObjSecId text_sec =
+ obj_section(st.ob, text_name, SEC_TEXT, SF_ALLOC | SF_EXEC, 4);
+
+ st.ctx.c = c;
+ st.ctx.ob = st.ob;
+ st.ctx.mc = st.mc;
+ st.ctx.target = st.target;
+ st.ctx.text_sec = text_sec;
+ st.ctx.pool = c->global;
+ st.ctx.debug = NULL;
+ st.mc->set_section(st.mc, text_sec);
+ cc->build(&st.ctx);
+ cgtarget_finalize(st.target);
+
+ cfree_compiler_free(cc_);
+ return 0;
+}
+
/* --dwarf-checks NAME — print the W-path directive blob registered for
* NAME, or nothing if the case has no DWARF checks. The shell harness
* pipes this into cg_check_dwarf <obj>. */
@@ -502,11 +602,12 @@ static int mode_jit(const char* name) {
static int usage(void) {
fprintf(stderr,
- "usage: cg-runner --list\n"
- " cg-runner --expected NAME\n"
- " cg-runner --dwarf-checks NAME\n"
- " cg-runner --emit NAME OUT.o\n"
- " cg-runner --jit NAME\n");
+ "usage: cg-runner [--opt-level N] --list\n"
+ " cg-runner [--opt-level N] --expected NAME\n"
+ " cg-runner [--opt-level N] --dwarf-checks NAME\n"
+ " cg-runner [--opt-level N] --emit NAME OUT.o\n"
+ " cg-runner [--opt-level N] --jit NAME\n"
+ " cg-runner --opt-level N --dump-tape NAME\n");
return 2;
}
@@ -515,6 +616,12 @@ int main(int argc, char** argv) {
long ps = sysconf(_SC_PAGESIZE);
if (ps > 0) g_execmem.page_size = (size_t)ps;
}
+ /* Optional leading --opt-level N flag. */
+ if (argc >= 3 && !strcmp(argv[1], "--opt-level")) {
+ g_opt_level = atoi(argv[2]);
+ argc -= 2;
+ argv += 2;
+ }
if (argc < 2) return usage();
if (!strcmp(argv[1], "--list"))
return mode_list();
@@ -526,5 +633,7 @@ int main(int argc, char** argv) {
return mode_emit(argv[2], argv[3]);
else if (!strcmp(argv[1], "--jit") && argc == 3)
return mode_jit(argv[2]);
+ else if (!strcmp(argv[1], "--dump-tape") && argc == 3)
+ return mode_dump_tape(argv[2]);
return usage();
}
diff --git a/test/cg/run.sh b/test/cg/run.sh
@@ -51,8 +51,15 @@ ALLOW_SKIP="${CFREE_TEST_ALLOW_SKIP:-0}"
# Filters (env vars or positional args; args win):
# $1 / CFREE_TEST_FILTER — substring match against case name
# $2 / CFREE_TEST_PATHS — subset of "DREJ" (default "DREJ")
+# CFREE_OPT_LEVELS — space-separated opt levels to exercise. Default "0 1"
+# so every case is built twice: directly against the
+# backend (level 0) and through the opt_cgtarget
+# wrapper (level 1). Path W (DWARF) only runs at
+# level 0 — opt-level DWARF equivalence is a later
+# phase concern.
FILTER="${1:-${CFREE_TEST_FILTER:-}}"
PATHS="${2:-${CFREE_TEST_PATHS:-DREJW}}"
+OPT_LEVELS="${CFREE_OPT_LEVELS:-0 1}"
case "$PATHS" in *D*) RUN_D=1;; *) RUN_D=0;; esac
case "$PATHS" in *R*) RUN_R=1;; *) RUN_R=0;; esac
case "$PATHS" in *E*) RUN_E=1;; *) RUN_E=0;; esac
@@ -221,178 +228,195 @@ if [ $have_clang_cross -eq 1 ]; then
fi
fi
-printf 'Running cases...\n'
+CASES="$($CG_RUNNER --list)"
+
+# Each level wraps cg-runner with --opt-level N. Level 0 drives the AArch64
+# backend directly; level >0 inserts opt_cgtarget. Cases tagged with /L<N>
+# in the output when level>0 so failures localize to the level.
+for OPT_LEVEL in $OPT_LEVELS; do
+ if [ "$OPT_LEVEL" = "0" ]; then
+ CG_RUN=("$CG_RUNNER")
+ TAG=""
+ WORK_SUB="cg"
+ else
+ CG_RUN=("$CG_RUNNER" "--opt-level" "$OPT_LEVEL")
+ TAG="/L${OPT_LEVEL}"
+ WORK_SUB="cg-L${OPT_LEVEL}"
+ fi
-# ---- per-case loop ---------------------------------------------------------
+ printf 'Running cases (opt-level %s)...\n' "$OPT_LEVEL"
-CASES="$($CG_RUNNER --list)"
+ # Path E result bookkeeping (per level — flushed at end of this iteration).
+ E_NAMES=()
+ E_WORK=()
+ E_LINK_MS=()
+ E_EXPECTED=()
+
+ for name in $CASES; do
+ [ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]] && continue
+ work="$BUILD_DIR/$WORK_SUB/$name"
+ mkdir -p "$work"
-# Path E result bookkeeping. We queue exes during the main loop and verify
-# after a single batched podman flush.
-E_NAMES=()
-E_WORK=()
-E_LINK_MS=()
-E_EXPECTED=()
-
-for name in $CASES; do
- [ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]] && continue
- work="$BUILD_DIR/cg/$name"
- mkdir -p "$work"
-
- expected="$($CG_RUNNER --expected "$name" 2>/dev/null)"
- expected="${expected:-0}"
- # Exit codes are mod 256 on POSIX; mask the expected the same way so
- # negative-return cases compare correctly.
- expected_byte=$(( expected & 0xff ))
-
- # ---- Path D: in-process JIT (only on aarch64) ------------------------
- if [ $RUN_D -eq 1 ]; then
- if [ $is_aarch64 -eq 1 ]; then
- t0=$(now_ms)
- "$CG_RUNNER" --jit "$name" >"$work/d.out" 2>"$work/d.err"
- d_rc=$?
- dt=$(( $(now_ms) - t0 )); T_D=$(( T_D + dt ))
- if [ "$d_rc" -eq "$expected_byte" ]; then
- note_pass "$name/D (${dt}ms)"
+ expected="$("${CG_RUN[@]}" --expected "$name" 2>/dev/null)"
+ expected="${expected:-0}"
+ # Exit codes are mod 256 on POSIX; mask the expected the same way so
+ # negative-return cases compare correctly.
+ expected_byte=$(( expected & 0xff ))
+
+ # ---- Path D: in-process JIT (only on aarch64) ------------------------
+ if [ $RUN_D -eq 1 ]; then
+ if [ $is_aarch64 -eq 1 ]; then
+ t0=$(now_ms)
+ "${CG_RUN[@]}" --jit "$name" >"$work/d.out" 2>"$work/d.err"
+ d_rc=$?
+ dt=$(( $(now_ms) - t0 )); T_D=$(( T_D + dt ))
+ if [ "$d_rc" -eq "$expected_byte" ]; then
+ note_pass "$name/D${TAG} (${dt}ms)"
+ else
+ note_fail "$name/D${TAG} (expected $expected_byte got $d_rc, ${dt}ms)"
+ fi
else
- note_fail "$name/D (expected $expected_byte got $d_rc, ${dt}ms)"
+ note_skip "$name/D${TAG}" "not on aarch64 host"
fi
- else
- note_skip "$name/D" "not on aarch64 host"
fi
- fi
-
- # ---- emit (needed by R/E/J/W) -----------------------------------------
- obj="$work/$name.o"
- if [ $RUN_R -eq 1 ] || [ $RUN_E -eq 1 ] || [ $RUN_J -eq 1 ] \
- || [ $RUN_W -eq 1 ]; then
- if ! "$CG_RUNNER" --emit "$name" "$obj" 2>"$work/emit.err"; then
- note_fail "$name/emit (cg-runner --emit failed; see $work/emit.err)"
- continue
- fi
- fi
- # ---- Path R: ELF roundtrip --------------------------------------------
- if [ $RUN_R -eq 1 ]; then
- if [ $have_roundtrip -eq 1 ] && [ $have_readelf -eq 1 ] && [ $have_python3 -eq 1 ]; then
- t0=$(now_ms)
- rt="$work/$name.rt.o"
- r_ok=1; r_msg=""
- if ! "$ROUNDTRIP_BIN" "$obj" "$rt" 2>"$work/rt.err"; then
- r_ok=0; r_msg=" (roundtrip failed)"
- else
- "$READELF_BIN" -aW "$obj" | python3 "$NORMALIZE" >"$work/golden.norm" 2>/dev/null
- "$READELF_BIN" -aW "$rt" | python3 "$NORMALIZE" >"$work/rt.norm" 2>/dev/null
- diff -u "$work/golden.norm" "$work/rt.norm" >"$work/r.diff" 2>&1 || r_ok=0
+ # ---- emit (needed by R/E/J/W) -----------------------------------------
+ obj="$work/$name.o"
+ if [ $RUN_R -eq 1 ] || [ $RUN_E -eq 1 ] || [ $RUN_J -eq 1 ] \
+ || [ $RUN_W -eq 1 ]; then
+ if ! "${CG_RUN[@]}" --emit "$name" "$obj" 2>"$work/emit.err"; then
+ note_fail "$name/emit${TAG} (cg-runner --emit failed; see $work/emit.err)"
+ continue
fi
- dt=$(( $(now_ms) - t0 )); T_R=$(( T_R + dt ))
- if [ $r_ok -eq 1 ]; then note_pass "$name/R (${dt}ms)"
- else note_fail "$name/R${r_msg} (${dt}ms)"; fi
- else
- note_skip "$name/R" "missing roundtrip/readelf/python3"
fi
- fi
- # ---- Path E: link + (batched) qemu/podman ------------------------------
- # Link now (per case); the run is queued for the post-loop flush.
- if [ $RUN_E -eq 1 ]; then
- if [ $have_exe_runner -eq 1 ] && [ $have_clang_cross -eq 1 ] \
- && [ $have_start_obj -eq 1 ]; then
- t0=$(now_ms)
- exe="$work/linked.exe"
- if ! "$LINK_EXE_RUNNER" -o "$exe" "$obj" "$START_OBJ" \
- >"$work/exec_link.out" 2>"$work/exec_link.err"; then
- dt=$(( $(now_ms) - t0 )); T_E=$(( T_E + dt ))
- note_fail "$name/E (link failed, ${dt}ms)"
- elif [ $have_runner -eq 1 ]; then
- link_dt=$(( $(now_ms) - t0 )); T_E=$(( T_E + link_dt ))
- E_NAMES+=("$name")
- E_WORK+=("$work")
- E_LINK_MS+=("$link_dt")
- E_EXPECTED+=("$expected_byte")
- exec_aarch64_queue "$name" "$exe" \
- "$work/exec.out" "$work/exec.err" "$work/exec.rc"
+ # ---- Path R: ELF roundtrip --------------------------------------------
+ if [ $RUN_R -eq 1 ]; then
+ if [ $have_roundtrip -eq 1 ] && [ $have_readelf -eq 1 ] && [ $have_python3 -eq 1 ]; then
+ t0=$(now_ms)
+ rt="$work/$name.rt.o"
+ r_ok=1; r_msg=""
+ if ! "$ROUNDTRIP_BIN" "$obj" "$rt" 2>"$work/rt.err"; then
+ r_ok=0; r_msg=" (roundtrip failed)"
+ else
+ "$READELF_BIN" -aW "$obj" | python3 "$NORMALIZE" >"$work/golden.norm" 2>/dev/null
+ "$READELF_BIN" -aW "$rt" | python3 "$NORMALIZE" >"$work/rt.norm" 2>/dev/null
+ diff -u "$work/golden.norm" "$work/rt.norm" >"$work/r.diff" 2>&1 || r_ok=0
+ fi
+ dt=$(( $(now_ms) - t0 )); T_R=$(( T_R + dt ))
+ if [ $r_ok -eq 1 ]; then note_pass "$name/R${TAG} (${dt}ms)"
+ else note_fail "$name/R${TAG}${r_msg} (${dt}ms)"; fi
else
- note_skip "$name/E" "no qemu/podman"
+ note_skip "$name/R${TAG}" "missing roundtrip/readelf/python3"
fi
- else
- note_skip "$name/E" "no link-exe-runner, aarch64 clang, or start.o"
fi
- fi
- # ---- Path J: jit-via-file ---------------------------------------------
- if [ $RUN_J -eq 1 ]; then
- if [ $have_jit_runner -eq 1 ]; then
- t0=$(now_ms)
- "$JIT_RUNNER" "$obj" >"$work/jit.out" 2>"$work/jit.err"
- j_rc=$?
- dt=$(( $(now_ms) - t0 )); T_J=$(( T_J + dt ))
- if [ "$j_rc" -eq "$expected_byte" ]; then
- note_pass "$name/J (${dt}ms)"
+ # ---- Path E: link + (batched) qemu/podman ------------------------------
+ # Link now (per case); the run is queued for the post-loop flush.
+ if [ $RUN_E -eq 1 ]; then
+ if [ $have_exe_runner -eq 1 ] && [ $have_clang_cross -eq 1 ] \
+ && [ $have_start_obj -eq 1 ]; then
+ t0=$(now_ms)
+ exe="$work/linked.exe"
+ if ! "$LINK_EXE_RUNNER" -o "$exe" "$obj" "$START_OBJ" \
+ >"$work/exec_link.out" 2>"$work/exec_link.err"; then
+ dt=$(( $(now_ms) - t0 )); T_E=$(( T_E + dt ))
+ note_fail "$name/E${TAG} (link failed, ${dt}ms)"
+ elif [ $have_runner -eq 1 ]; then
+ link_dt=$(( $(now_ms) - t0 )); T_E=$(( T_E + link_dt ))
+ E_NAMES+=("$name")
+ E_WORK+=("$work")
+ E_LINK_MS+=("$link_dt")
+ E_EXPECTED+=("$expected_byte")
+ # Queue with a level-tagged key so cases at different
+ # opt levels don't collide in the batched runner.
+ exec_aarch64_queue "L${OPT_LEVEL}_${name}" "$exe" \
+ "$work/exec.out" "$work/exec.err" "$work/exec.rc"
+ else
+ note_skip "$name/E${TAG}" "no qemu/podman"
+ fi
else
- note_fail "$name/J (expected $expected_byte got $j_rc, ${dt}ms)"
+ note_skip "$name/E${TAG}" "no link-exe-runner, aarch64 clang, or start.o"
fi
- else
- note_skip "$name/J" "no jit-runner (not aarch64 host)"
fi
- fi
- # ---- Path W: DWARF check ----------------------------------------------
- # Cases that don't register directives produce empty stdout from
- # --dwarf-checks; we silently skip those (no SKIP entry, since W is
- # opt-in per case rather than per host).
- if [ $RUN_W -eq 1 ]; then
- "$CG_RUNNER" --dwarf-checks "$name" >"$work/w.directives" \
- 2>"$work/w.dc.err"
- if [ -s "$work/w.directives" ]; then
- if [ $have_dwarf_check -eq 1 ]; then
+ # ---- Path J: jit-via-file ---------------------------------------------
+ if [ $RUN_J -eq 1 ]; then
+ if [ $have_jit_runner -eq 1 ]; then
t0=$(now_ms)
- "$DWARF_CHECK" "$obj" <"$work/w.directives" \
- >"$work/w.out" 2>"$work/w.err"
- w_rc=$?
- dt=$(( $(now_ms) - t0 )); T_W=$(( T_W + dt ))
- if [ "$w_rc" -eq 0 ]; then
- note_pass "$name/W (${dt}ms)"
+ "$JIT_RUNNER" "$obj" >"$work/jit.out" 2>"$work/jit.err"
+ j_rc=$?
+ dt=$(( $(now_ms) - t0 )); T_J=$(( T_J + dt ))
+ if [ "$j_rc" -eq "$expected_byte" ]; then
+ note_pass "$name/J${TAG} (${dt}ms)"
else
- note_fail "$name/W (see $work/w.out, $work/w.err; ${dt}ms)"
+ note_fail "$name/J${TAG} (expected $expected_byte got $j_rc, ${dt}ms)"
fi
else
- note_skip "$name/W" "no cg-check-dwarf"
+ note_skip "$name/J${TAG}" "no jit-runner (not aarch64 host)"
fi
fi
- fi
-done
-# ---- batched path-E flush + verification -----------------------------------
-# Run every queued case in a single podman invocation, then iterate the
-# queue to read each exit code and emit PASS/FAIL.
-
-T_E_BATCH=0
-if [ "$(exec_aarch64_queue_size)" -gt 0 ]; then
- printf 'Running path E (%d cases batched)...\n' "$(exec_aarch64_queue_size)"
- t0=$(now_ms)
- exec_aarch64_flush
- T_E_BATCH=$(( $(now_ms) - t0 )); T_E=$(( T_E + T_E_BATCH ))
-
- i=0
- while [ $i -lt ${#E_NAMES[@]} ]; do
- name="${E_NAMES[$i]}"
- work="${E_WORK[$i]}"
- link_dt="${E_LINK_MS[$i]}"
- expected_byte="${E_EXPECTED[$i]}"
- if [ ! -f "$work/exec.rc" ]; then
- note_fail "$name/E (no rc; podman batch did not produce results)"
- else
- RUN_RC="$(cat "$work/exec.rc")"
- if [ "$RUN_RC" -eq "$expected_byte" ]; then
- note_pass "$name/E (link ${link_dt}ms)"
- else
- note_fail "$name/E (expected $expected_byte got $RUN_RC, link ${link_dt}ms)"
+ # ---- Path W: DWARF check ----------------------------------------------
+ # Cases that don't register directives produce empty stdout from
+ # --dwarf-checks; we silently skip those (no SKIP entry, since W is
+ # opt-in per case rather than per host). DWARF / opt-level
+ # equivalence is a Phase 5+ concern, so skip W when level > 0.
+ if [ $RUN_W -eq 1 ] && [ "$OPT_LEVEL" = "0" ]; then
+ "${CG_RUN[@]}" --dwarf-checks "$name" >"$work/w.directives" \
+ 2>"$work/w.dc.err"
+ if [ -s "$work/w.directives" ]; then
+ if [ $have_dwarf_check -eq 1 ]; then
+ t0=$(now_ms)
+ "$DWARF_CHECK" "$obj" <"$work/w.directives" \
+ >"$work/w.out" 2>"$work/w.err"
+ w_rc=$?
+ dt=$(( $(now_ms) - t0 )); T_W=$(( T_W + dt ))
+ if [ "$w_rc" -eq 0 ]; then
+ note_pass "$name/W (${dt}ms)"
+ else
+ note_fail "$name/W (see $work/w.out, $work/w.err; ${dt}ms)"
+ fi
+ else
+ note_skip "$name/W" "no cg-check-dwarf"
+ fi
fi
fi
- i=$((i+1))
done
-fi
+
+ # ---- batched path-E flush + verification (per level) -------------------
+ # Run every queued case in a single podman invocation, then iterate the
+ # queue to read each exit code and emit PASS/FAIL.
+ if [ "$(exec_aarch64_queue_size)" -gt 0 ]; then
+ printf 'Running path E%s (%d cases batched)...\n' \
+ "$TAG" "$(exec_aarch64_queue_size)"
+ t0=$(now_ms)
+ exec_aarch64_flush
+ DELTA=$(( $(now_ms) - t0 ))
+ T_E_BATCH=$(( ${T_E_BATCH:-0} + DELTA )); T_E=$(( T_E + DELTA ))
+
+ i=0
+ while [ $i -lt ${#E_NAMES[@]} ]; do
+ name="${E_NAMES[$i]}"
+ work="${E_WORK[$i]}"
+ link_dt="${E_LINK_MS[$i]}"
+ expected_byte="${E_EXPECTED[$i]}"
+ if [ ! -f "$work/exec.rc" ]; then
+ note_fail "$name/E${TAG} (no rc; podman batch did not produce results)"
+ else
+ RUN_RC="$(cat "$work/exec.rc")"
+ if [ "$RUN_RC" -eq "$expected_byte" ]; then
+ note_pass "$name/E${TAG} (link ${link_dt}ms)"
+ else
+ note_fail "$name/E${TAG} (expected $expected_byte got $RUN_RC, link ${link_dt}ms)"
+ fi
+ fi
+ i=$((i+1))
+ done
+ fi
+done
+
+T_E_BATCH=${T_E_BATCH:-0}
# ---- summary ---------------------------------------------------------------