commit ed078ac80320bb5d7b8e4aa8c4e780d0aec8fdb8
parent a3da4df2317690b3bff662fffd9fd1f3455e580f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 28 May 2026 18:20:10 -0700
opt: fold const convert-of-load_imm and thread branch targets through pass-through blocks"
Diffstat:
6 files changed, 323 insertions(+), 15 deletions(-)
diff --git a/doc/PERCALL.md b/doc/PERCALL.md
@@ -63,8 +63,7 @@ cfree puts the frame record above the callee-saves (`add x29, sp, #16`), so the
sp-decrement and the fp/lr save cannot fold into a single pre-indexed `stp`.
Moving the frame record to the bottom of the frame (fp-at-bottom, `mov x29, sp`)
unlocks the pre-indexed entry `stp …, [sp, #-N]!` and post-indexed exit
-`ldp …, [sp], #N`. This is [OPT_O1_PERF_TODO.md](OPT_O1_PERF_TODO.md) item 2,
-the largest open per-call win.
+`ldp …, [sp], #N`.
(An aside: omitting the frame pointer entirely — packing a callee-save with lr,
e.g. `stp x19, x30, [sp, #-N]!`, no `x29` at all — would save one more insn, but
@@ -75,14 +74,22 @@ planned.)
Two small constant adders beyond the prologue, both call-shaped:
-1. **Zero through a temp** (item 3). `BottomUpTree`'s leaf `NewTreeNode(NULL,
- NULL)` materializes each null arg via a scratch:
+1. **Zero through a temp** (item 3) — **FIXED**. `BottomUpTree`'s leaf
+ `NewTreeNode(NULL, NULL)` used to materialize each null arg via a scratch:
```
movz x8, 0x0 ; mov x0, x8 ; movz x8, 0x0 ; mov x1, x8 ; 4 insns
```
- Optimal is `mov x0, #0 ; mov x1, #0` (2 insns). **−2** on that path.
- `IR_LOAD_IMM` sources don't participate in the call-arg register-hint
- propagation in `pass_lower.c`.
+ It now emits the optimal `movz x0, 0x0 ; movz x1, 0x0` (2 insns), **−2** on
+ that path. The trigger was a *pointer-typed* null (`(void*)0` / `NULL` from
+ headers, not an integer `0`): the cast inserts an int→ptr `IR_CONVERT`
+ between the `load_imm` and the call arg. `apply_abi_aliasing_hints` hints the
+ arg PReg to x0/x1 and `propagate_hint_through_copies` carries it backward —
+ but only across `IR_COPY`, so the convert broke the chain and its `load_imm`
+ source landed in the x8 scratch. Fixed by `try_fold_const_convert` in
+ `pass_combine.c`, which constant-folds `load_imm rT,k ; convert rD,rT` into a
+ single `load_imm rD,k'` (the now-dead `load_imm` is removed by DCE); the
+ folded `load_imm` inherits the convert's hinted dst directly. Covered by
+ `test/opt/zero_arg.sh`.
2. **Redundant branch chain** (item 4). `DeleteTree`'s if/else merge emits
`b A; A: b B` — a conditional branch to a label that just unconditionally
@@ -96,12 +103,13 @@ Two small constant adders beyond the prologue, both call-shaped:
| --- | ---: | ---: | --- |
| NewTreeNode | 14 | 16 | +2 prologue |
| ItemCheck | 20 | 21 | +2 prologue |
-| BottomUpTree | 25 | 24 | +2 prologue, +2 zero-temp |
+| BottomUpTree | 23 | 24 | +2 prologue (zero-temp fixed) |
| DeleteTree | 20 | 18 | +2 prologue, +1 branch |
cfree already beats gcc -O0 on the malloc-heavy `NewTreeNode` (args stay in
-registers vs gcc's 8 stack shuffles) and ties `ItemCheck`. It loses
-`BottomUpTree`/`DeleteTree` purely on the fixed overhead above.
+registers vs gcc's 8 stack shuffles), ties `ItemCheck`, and — with the
+zero-temp fix — now edges ahead on `BottomUpTree`. `DeleteTree` remains the only
+loss, on the fixed prologue overhead plus the item-4 branch chain.
## Optimal target
diff --git a/src/opt/pass_combine.c b/src/opt/pass_combine.c
@@ -1,6 +1,8 @@
#include <stdint.h>
#include <string.h>
+#include <cfree/cg.h>
+
#include "core/arena.h"
#include "opt/opt_internal.h"
@@ -594,6 +596,56 @@ static int ext_params(const Inst* in, u32* src_bytes_out, u32* dst_bytes_out,
return 1;
}
+/* Width in bytes of a scalar integer or pointer type (1..8), else 0. Mirrors
+ * pass_simplify's simplify_width: builtin ints decode without the compiler,
+ * pointers fall back to the type-size query. */
+static u32 combine_scalar_width_bytes(Func* f, CfreeCgTypeId t) {
+ u32 b = builtin_int_bytes(t);
+ if (b) return b > 8u ? 0u : b;
+ if (f->c && cfree_cg_type_kind((CfreeCompiler*)f->c, t) == CFREE_CG_TYPE_PTR) {
+ u64 sz = cfree_cg_type_size((CfreeCompiler*)f->c, t);
+ if (sz && sz <= 8u) return (u32)sz;
+ }
+ return 0;
+}
+
+static u64 combine_width_mask(u32 bytes) {
+ return bytes >= 8u ? UINT64_MAX : ((1ull << (bytes * 8u)) - 1ull);
+}
+
+/* Compute the constant produced by applying an integer/pointer convert `k`
+ * (src width `sb`, dst width `db`, both bytes) to immediate `imm`. Returns 0
+ * for kinds that aren't a bit-preserving integer/pointer move (the float
+ * conversions reinterpret the value and must not be folded this way). The
+ * materialized-constant model: a load_imm puts (u64)imm into a register, so a
+ * widening move/zext keeps the low `sb` bits and a trunc/narrowing keeps the
+ * low `db` bits. */
+static int const_convert_value(ConvKind k, i64 imm, u32 sb, u32 db, i64* out) {
+ if (!sb || !db) return 0;
+ u64 src_mask = combine_width_mask(sb);
+ u64 dst_mask = combine_width_mask(db);
+ u64 v = (u64)imm;
+ switch (k) {
+ case CV_BITCAST:
+ case CV_ZEXT:
+ *out = (i64)((v & src_mask) & dst_mask);
+ return 1;
+ case CV_TRUNC:
+ *out = (i64)(v & dst_mask);
+ return 1;
+ case CV_SEXT: {
+ u64 low = v & src_mask;
+ u32 sbits = sb * 8u;
+ if (sbits < 64u && (low & (1ull << (sbits - 1u))))
+ low |= ~src_mask; /* replicate the sign bit into the high bits */
+ *out = (i64)(low & dst_mask);
+ return 1;
+ }
+ default:
+ return 0; /* CV_ITOF_*, CV_FTOI_*, CV_FEXT, CV_FTRUNC */
+ }
+}
+
/* ---- producer-retarget legality (for sink rewrite) ---- */
static int binop_is_commutative(BinOp op) {
@@ -1230,6 +1282,65 @@ static int try_ret_retarget(Func* f, Block* bl, i32 i) {
return 1;
}
+/* ---- Rewrite 5: constant-fold a convert of a load_imm into a load_imm ----
+ *
+ * `load_imm rT,k ; convert rD,rT` where the convert is a bit-preserving
+ * integer/pointer move collapses to `load_imm rD,k'` (k' = convert applied to
+ * k). The original load_imm, now dead, is removed by post-combine DCE.
+ *
+ * This is the convert-shaped sibling of the load_imm-into-copy fold (see
+ * try_substitute / combine_subst_slot). It matters for pointer-typed constant
+ * call args (e.g. NewTreeNode((void*)0, ...)): the arg-register hint reaches
+ * the convert's def but not its load_imm source, so without this the source
+ * lands in a scratch and the convert emits a `mov rD, scratch`. Folding lets
+ * the load_imm inherit the convert's (hinted) dst directly — `movz x0, 0`. */
+static int try_fold_const_convert(CombineCtx* ctx, Inst* in, i32 i) {
+ if ((IROp)in->op != IR_CONVERT || in->nopnds < 2) return 0;
+ if (in->opnds[0].kind != OPK_REG || in->opnds[1].kind != OPK_REG) return 0;
+ /* Integer/pointer domain only: a load_imm produces an int-class value, and
+ * the bit-preserving convert kinds we fold (BITCAST/ZEXT/SEXT/TRUNC) keep it
+ * int-class. A class change (RC_INT->RC_FP, e.g. an int->float bitcast) is
+ * not bit-preserving and must not collapse to a load_imm. */
+ if (in->opnds[0].cls != RC_INT || in->opnds[1].cls != RC_INT) return 0;
+
+ i32 prod_idx = ctx_producer_of(ctx, in->opnds[1].cls, in->opnds[1].v.reg);
+ if (prod_idx < 0 || prod_idx >= i) return 0;
+ Inst* prod = &ctx->bl->insts[prod_idx];
+ if ((IROp)prod->op != IR_LOAD_IMM || prod->nopnds < 1 ||
+ prod->opnds[0].kind != OPK_REG ||
+ !same_phys_reg(&prod->opnds[0], &in->opnds[1]))
+ return 0;
+
+ u32 sb = combine_scalar_width_bytes(ctx->f, in->opnds[1].type);
+ u32 db = combine_scalar_width_bytes(ctx->f, in->opnds[0].type);
+ i64 value = 0;
+ if (!const_convert_value((ConvKind)in->extra.imm, prod->extra.imm, sb, db,
+ &value))
+ return 0;
+
+ /* Single-use + not-live-out gate on the load_imm dst, mirroring the SK_IMM
+ * gate in try_substitute_for_reg: only fold when this convert is the sole
+ * use, so we replace rather than duplicate the materialization. */
+ Operand src_def = prod->opnds[0];
+ int killed = 0;
+ if (count_uses_in_live_range(ctx->f, ctx->bl, prod_idx, &src_def, &killed) !=
+ 1)
+ return 0;
+ if (!killed && opt_block_live_out_has_phys_reg(ctx->f, ctx->hard_live,
+ ctx->bl->id, &src_def))
+ return 0;
+
+ /* Rewrite the convert in place into a load_imm of the converted constant.
+ * The dst operand (opnds[0]) is the hinted arg/return reg and is kept; the
+ * source operand is dropped. The dead load_imm at prod_idx is left for DCE. */
+ in->op = (u16)IR_LOAD_IMM;
+ in->type = in->opnds[0].type;
+ in->nopnds = 1;
+ in->extra.imm = value;
+ ctx->block_change_p = 1;
+ return 1;
+}
+
/* ---- per-BB driver ---- */
static int opt_combine_fold_block(Func* f, Block* bl,
@@ -1273,6 +1384,7 @@ static int opt_combine_fold_block(Func* f, Block* bl,
}
if (enable_o1_combine_rewrites) {
+ try_fold_const_convert(&ctx, in, i);
try_combine_exts(&ctx, in, i);
try_substitute(&ctx, in, i);
try_addr_synth(&ctx, in, i);
diff --git a/src/opt/pass_jump.c b/src/opt/pass_jump.c
@@ -9,6 +9,7 @@ typedef struct JumpCleanupCtx {
u32* emit_index_by_block;
u32* forwarded_target_by_block;
u32* forward_path;
+ u8* has_label_addr_ref;
} JumpCleanupCtx;
static u32 emit_order_index(const JumpCleanupCtx* c, u32 block) {
@@ -22,6 +23,35 @@ static u32 next_emit_block(const JumpCleanupCtx* c, u32 block) {
return c->f->emit_order[idx + 1u];
}
+static void mark_label_addr_refs(Func* f, u8* refs) {
+ if (!f || !refs) return;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ switch ((IROp)in->op) {
+ case IR_LOAD_LABEL_ADDR:
+ if ((u32)in->extra.imm < f->nblocks) refs[(u32)in->extra.imm] = 1;
+ break;
+ case IR_LOCAL_STATIC_DATA_LABEL_ADDR: {
+ CgIrLocalStaticLabelAux* aux =
+ (CgIrLocalStaticLabelAux*)in->extra.aux;
+ if (aux && (u32)aux->target < f->nblocks)
+ refs[(u32)aux->target] = 1;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+}
+
+static int block_has_label_addr_ref(const JumpCleanupCtx* c, u32 block) {
+ if (!c || block >= c->f->nblocks) return 0;
+ return c->has_label_addr_ref && c->has_label_addr_ref[block];
+}
+
static JumpCleanupCtx jump_cleanup_ctx(Func* f) {
JumpCleanupCtx c;
c.f = f;
@@ -30,6 +60,8 @@ static JumpCleanupCtx jump_cleanup_ctx(Func* f) {
c.forwarded_target_by_block =
arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
c.forward_path = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
+ c.has_label_addr_ref =
+ arena_zarray(f->arena, u8, f->nblocks ? f->nblocks : 1u);
for (u32 b = 0; b < f->nblocks; ++b) c.emit_index_by_block[b] = BLOCK_NONE;
for (u32 b = 0; b < f->nblocks; ++b)
c.forwarded_target_by_block[b] = BLOCK_NONE;
@@ -37,6 +69,7 @@ static JumpCleanupCtx jump_cleanup_ctx(Func* f) {
u32 b = f->emit_order[i];
if (b < f->nblocks) c.emit_index_by_block[b] = i;
}
+ mark_label_addr_refs(f, c.has_label_addr_ref);
return c;
}
@@ -45,7 +78,7 @@ static int single_jump_block(const JumpCleanupCtx* c, u32 block,
Func* f = c->f;
if (block >= f->nblocks) return 0;
Block* bl = &f->blocks[block];
- if (bl->mc_label != MC_LABEL_NONE) return 0;
+ if (block_has_label_addr_ref(c, block)) return 0;
if (bl->ninsts != 1 || bl->nsucc != 1) return 0;
if ((IROp)bl->insts[0].op != IR_BR) return 0;
if (target_out) *target_out = bl->succ[0];
@@ -57,7 +90,7 @@ static int empty_fallthrough_block(const JumpCleanupCtx* c, u32 block,
Func* f = c->f;
if (block >= f->nblocks || block == f->entry) return 0;
Block* bl = &f->blocks[block];
- if (bl->mc_label != MC_LABEL_NONE) return 0;
+ if (block_has_label_addr_ref(c, block)) return 0;
if (bl->ninsts != 0 || bl->nsucc != 0) return 0;
u32 idx = emit_order_index(c, block);
if (idx == BLOCK_NONE || idx + 1u >= f->emit_order_n) return 0;
@@ -67,6 +100,30 @@ static int empty_fallthrough_block(const JumpCleanupCtx* c, u32 block,
return 1;
}
+static int passthrough_succ_block(const JumpCleanupCtx* c, u32 block,
+ u32* target_out) {
+ Func* f = c->f;
+ if (block >= f->nblocks || block == f->entry) return 0;
+ Block* bl = &f->blocks[block];
+ if (block_has_label_addr_ref(c, block)) return 0;
+ if (bl->nsucc != 1) return 0;
+ if (bl->succ[0] == block) return 0;
+ if (bl->ninsts != 0) {
+ if (bl->ninsts != 1) return 0;
+ switch ((IROp)bl->insts[0].op) {
+ case IR_NOP:
+ case IR_SCOPE_BEGIN:
+ case IR_SCOPE_ELSE:
+ case IR_SCOPE_END:
+ break;
+ default:
+ return 0;
+ }
+ }
+ if (target_out) *target_out = bl->succ[0];
+ return 1;
+}
+
static u32 forward_jump_target_ex(JumpCleanupCtx* c, u32 target,
int allow_empty_fallthrough) {
u32 cur = target;
@@ -84,7 +141,9 @@ static u32 forward_jump_target_ex(JumpCleanupCtx* c, u32 target,
break;
}
if (!single_jump_block(c, cur, &next) &&
- (!allow_empty_fallthrough || !empty_fallthrough_block(c, cur, &next))) {
+ (!allow_empty_fallthrough ||
+ (!passthrough_succ_block(c, cur, &next) &&
+ !empty_fallthrough_block(c, cur, &next)))) {
result = cur;
c->forwarded_target_by_block[cur] = result;
break;
@@ -169,7 +228,7 @@ static void cleanup_branch_targets(JumpCleanupCtx* c) {
}
for (u32 s = 0; s < nsucc; ++s) {
u32 target = bl->succ[s];
- u32 forwarded = forward_jump_target(c, target);
+ u32 forwarded = forward_jump_target_ex(c, target, 1);
if (forwarded < f->nblocks) bl->succ[s] = forwarded;
}
}
diff --git a/test/opt/cg_ir_lower_test.c b/test/opt/cg_ir_lower_test.c
@@ -188,8 +188,71 @@ static void converter_builds_cfg_and_pregs(void) {
tc_fini(&tc);
}
+static void jump_cleanup_threads_empty_fallthrough_target(void) {
+ TestCtx tc;
+ tc_init(&tc);
+
+ Func f;
+ memset(&f, 0, sizeof f);
+ f.c = tc.c;
+ f.arena = tc.c->tu;
+ f.entry = ir_block_new(&f);
+ u32 then_block = ir_block_new(&f);
+ u32 scope_block = ir_block_new(&f);
+ u32 empty_block = ir_block_new(&f);
+ u32 merge_block = ir_block_new(&f);
+ ir_note_emit(&f, f.entry);
+ ir_note_emit(&f, then_block);
+ ir_note_emit(&f, scope_block);
+ ir_note_emit(&f, empty_block);
+ ir_note_emit(&f, merge_block);
+
+ Inst* br = ir_emit(&f, f.entry, IR_CMP_BRANCH);
+ br->extra.imm = CMP_EQ;
+ f.blocks[f.entry].succ[0] = scope_block;
+ f.blocks[f.entry].succ[1] = then_block;
+ f.blocks[f.entry].nsucc = 2;
+
+ Inst* then_body = ir_emit(&f, then_block, IR_LOAD_IMM);
+ (void)then_body;
+ Inst* then_br = ir_emit(&f, then_block, IR_BR);
+ (void)then_br;
+ f.blocks[then_block].succ[0] = merge_block;
+ f.blocks[then_block].nsucc = 1;
+
+ Inst* scope_end = ir_emit(&f, scope_block, IR_SCOPE_END);
+ (void)scope_end;
+ f.blocks[scope_block].succ[0] = empty_block;
+ f.blocks[scope_block].nsucc = 1;
+
+ f.blocks[empty_block].succ[0] = merge_block;
+ f.blocks[empty_block].nsucc = 1;
+
+ Inst* ret = ir_emit(&f, merge_block, IR_RET);
+ (void)ret;
+
+ opt_build_cfg(&f);
+ EXPECT(f.blocks[f.entry].succ[0] == scope_block,
+ "precondition: branch should target scope block");
+ EXPECT(f.blocks[scope_block].npreds == 1,
+ "precondition: scope block should have one predecessor");
+
+ opt_jump_cleanup(&f, OPT_JUMP_CLEANUP_CFG);
+ opt_build_cfg(&f);
+
+ EXPECT(f.blocks[f.entry].succ[0] == merge_block,
+ "branch target should forward through empty fallthrough block");
+ EXPECT(f.blocks[scope_block].npreds == 0 && f.blocks[scope_block].nsucc == 0,
+ "scope block should become unreachable after forwarding");
+ EXPECT(f.blocks[empty_block].npreds == 0 && f.blocks[empty_block].nsucc == 0,
+ "empty block should become unreachable after forwarding");
+
+ tc_fini(&tc);
+}
+
int main(void) {
converter_builds_cfg_and_pregs();
+ jump_cleanup_threads_empty_fallthrough_target();
if (g_fails) {
fprintf(stderr, "cg-ir-lower: %d/%d failed\n", g_fails, g_checks);
return 1;
diff --git a/test/opt/zero_arg.sh b/test/opt/zero_arg.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+# Behavioral check for the O1 "zero through a temp" call-arg wart (PERCALL.md
+# item 3). A pointer-typed null argument (`(void*)0`) introduces an int->ptr
+# IR_CONVERT between the constant's `load_imm` and the call arg. At -O1 the
+# arg-register hint reaches the convert's def but not its load_imm source, so
+# the source lands in a scratch (x8) and the convert emits `mov x0, x8` --
+# routing each zero through a temp:
+#
+# movz x8, 0x0 ; mov x0, x8 ; movz x8, 0x0 ; mov x1, x8 (4 insns)
+#
+# Optimal is `movz x0, 0x0 ; movz x1, 0x0` (2 insns, no temp). This test
+# asserts the args are materialized directly into the arg registers.
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+CFREE="${CFREE:-$ROOT/build/cfree}"
+WORK="$ROOT/build/test/opt/zero_arg"
+mkdir -p "$WORK"
+
+SRC="$WORK/leaf.c"
+cat > "$SRC" <<'EOF'
+#define NULL ((void*)0)
+typedef struct tn { struct tn* l; struct tn* r; } treeNode;
+treeNode* NewTreeNode(treeNode* left, treeNode* right);
+treeNode* leaf(void) { return NewTreeNode(NULL, NULL); }
+EOF
+
+"$CFREE" cc -target aarch64-linux-gnu -O1 -std=c99 -c "$SRC" \
+ -o "$WORK/leaf.o" > "$WORK/cc.out" 2>&1
+"$CFREE" objdump -d "$WORK/leaf.o" > "$WORK/dis.out" 2>&1
+
+# Slice out the leaf() function body.
+awk '
+ /^[0-9a-f]+ <leaf>:/ { in_fn = 1; next }
+ /^[0-9a-f]+ </ { in_fn = 0 }
+ in_fn { print }
+' "$WORK/dis.out" > "$WORK/leaf.dis"
+
+fail() {
+ printf 'zero-arg check FAILED: %s\n' "$1" >&2
+ sed 's/^/ | /' "$WORK/leaf.dis" >&2
+ exit 1
+}
+
+# The wart: a register-to-register move into an arg register (the zero routed
+# through a scratch temp). Optimal codegen materializes the constant straight
+# into the arg reg, so there must be no `mov x0, x<n>` / `mov x1, x<n>`.
+# (`movz x0, ...` is a separate mnemonic and is not matched by `\bmov x`.)
+if grep -Eq '\bmov[[:space:]]+x[01],[[:space:]]*x[0-9]' "$WORK/leaf.dis"; then
+ fail 'arg zero routed through a scratch temp (mov x0/x1, x<n>)'
+fi
+
+# Both args must be loaded directly with movz into x0 and x1.
+if ! grep -Eq '\bmovz[[:space:]]+x0,' "$WORK/leaf.dis"; then
+ fail 'arg x0 not materialized directly (movz x0, ...)'
+fi
+if ! grep -Eq '\bmovz[[:space:]]+x1,' "$WORK/leaf.dis"; then
+ fail 'arg x1 not materialized directly (movz x1, ...)'
+fi
+
+printf 'zero-arg: ok\n'
diff --git a/test/test.mk b/test/test.mk
@@ -509,7 +509,7 @@ test-macho: lib $(TEST_RT_DEP) $(ROUNDTRIP_BIN_MACHO) $(LINK_EXE_RUNNER) $(JIT_R
OPT_TEST_BIN = build/test/cg_ir_lower_test
TINY_INLINE_TEST_BIN = build/test/tiny_inline_test
-test-opt: bin $(OPT_TEST_BIN) test-opt-tiny-inline test-opt-inline
+test-opt: bin $(OPT_TEST_BIN) test-opt-tiny-inline test-opt-inline test-opt-zero-arg
$(OPT_TEST_BIN)
$(OPT_TEST_BIN): test/opt/cg_ir_lower_test.c $(LIB_OBJS)
@@ -527,6 +527,11 @@ $(TINY_INLINE_TEST_BIN): test/opt/tiny_inline_test.c $(LIB_OBJS)
test-opt-inline: bin
@CFREE=$(abspath $(BIN)) bash test/opt/run.sh
+# Behavioral disasm check: a pointer-typed null call arg is not routed through
+# a scratch temp at -O1 (PERCALL.md item 3, "zero through a temp").
+test-opt-zero-arg: bin
+ @CFREE=$(abspath $(BIN)) bash test/opt/zero_arg.sh
+
test-parse: test-parse-ok test-parse-err
test-parse-ok: lib $(TEST_RT_DEP) $(PARSE_RUNNER) $(ROUNDTRIP_BIN) $(LINK_EXE_RUNNER) $(JIT_RUNNER)