commit 0c5222c1d9651c72759c16bd0e19caa8806a50c0
parent 704ea4520ffd3e01d0d9f37a02f12119f6e38edc
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 20 May 2026 12:24:26 -0700
c_target: fix several path-C bugs in test-parse
src/arch/c_target/emit.c:
- c_ret: cast operand to the function's return type for scalars; CG
reuses Reg ids across types so the C local backing the return
value may not match. Spell the defensive ret_void epilogue on a
non-void function as __builtin_unreachable() instead of `return;`.
- c_emit_addr_deref: when an OPK_LOCAL access type differs from the
slot's declared type, emit `*(T*)&slot_N` instead of plain slot_N.
- data emission: run a second pass for constructor fixups after all
data-symbol storage definitions so __cfree_init_<name> can refer
to any other data symbol regardless of emission order.
- data extern decls: drop the `referenced` gate (unreliable when the
C target writes names directly into source) and recognize the
SK_OBJ + section=NONE shape used by `extern T x __attribute__((weak));`.
On Mach-O spell weak undef as `weak_import` rather than `weak`.
- OPK_GLOBAL with aggregate type: emit the deref `*(T*)((char*)sym+ofs)`
instead of an illegal struct-typed cast.
- __builtin_*_overflow intrinsics: wrap unsigned variants in a block
with a scratch result variable of the correct signedness so gcc
actually checks unsigned overflow.
test/parse/run.sh:
- Probe host __LDBL_MANT_DIG__ and skip the ldbl128_* path-C cases
when the host long double isn't 128-bit.
- Skip attr_p2_08_weak_undef path-C on Mach-O (ld64 rejects
unresolved weak undefs without a backing dylib).
- When path C is the only enabled path, restrict to opt=0 so opt!=0
workers don't write empty event files (which the replay loop
flagged as "missing worker result").
test/toy/run.sh:
- Guard the empty err_cases loop under `set -u` (bash 3.x trips on
`${arr[@]}` when arr is empty).
`make test-cbackend` now passes across all three frontends:
parse 417/0/31, toy 124/0/3, wasm 32/0/0.
Diffstat:
4 files changed, 267 insertions(+), 37 deletions(-)
diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c
@@ -450,24 +450,47 @@ void c_emit_operand(CTarget* t, Operand op) {
return;
}
case OPK_GLOBAL: {
- /* Address expression for `&sym + addend`. The data symbol is declared
- * elsewhere (either as a function forward, or as `uint8_t name[N]`
- * data emitted at finalize); here we just form the address with the
- * right pointer type. */
+ /* OPK_GLOBAL carries `&sym + addend`. How we spell it depends on
+ * op.type:
+ * - pointer/scalar/void: the value IS the address, so cast through
+ * `((T)((char*)sym + addend))`.
+ * - aggregate (RECORD/ARRAY): the symbol's storage is an aggregate
+ * value; emit `(*(T*)((char*)sym + addend))` so the deref reads
+ * the aggregate value (used by call args that pass struct
+ * by-value via a global initialized buffer). */
+ obj_sym_mark_referenced(t->obj, op.v.global.sym);
const char* nm = c_sym_name(t, op.v.global.sym);
- cbuf_puts(&t->body, "((");
- if (op.type != CFREE_CG_TYPE_NONE) {
+ const CgType* gty = (op.type != CFREE_CG_TYPE_NONE)
+ ? cg_type_get(t->c,
+ api_unalias_type(t->c, op.type))
+ : NULL;
+ int is_aggregate = gty && (gty->kind == CFREE_CG_TYPE_RECORD ||
+ gty->kind == CFREE_CG_TYPE_ARRAY);
+ if (is_aggregate) {
+ cbuf_puts(&t->body, "(*(");
c_emit_type(t, &t->body, op.type);
+ cbuf_puts(&t->body, "*)((char*)");
+ cbuf_puts(&t->body, nm);
+ if (op.v.global.addend != 0) {
+ cbuf_puts(&t->body, " + ");
+ cbuf_put_i64(&t->body, op.v.global.addend);
+ }
+ cbuf_puts(&t->body, "))");
} else {
- cbuf_puts(&t->body, "void*");
- }
- cbuf_puts(&t->body, ")((char*)");
- cbuf_puts(&t->body, nm);
- if (op.v.global.addend != 0) {
- cbuf_puts(&t->body, " + ");
- cbuf_put_i64(&t->body, op.v.global.addend);
+ cbuf_puts(&t->body, "((");
+ if (op.type != CFREE_CG_TYPE_NONE) {
+ c_emit_type(t, &t->body, op.type);
+ } else {
+ cbuf_puts(&t->body, "void*");
+ }
+ cbuf_puts(&t->body, ")((char*)");
+ cbuf_puts(&t->body, nm);
+ if (op.v.global.addend != 0) {
+ cbuf_puts(&t->body, " + ");
+ cbuf_put_i64(&t->body, op.v.global.addend);
+ }
+ cbuf_puts(&t->body, "))");
}
- cbuf_puts(&t->body, "))");
return;
}
default: {
@@ -608,14 +631,31 @@ static void c_emit_addr_deref(CTarget* t, Operand addr, CfreeCgTypeId access_typ
char buf[24];
SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
switch (addr.kind) {
- case OPK_LOCAL:
- /* slot_N is already a typed C variable matching its declared type.
- * For access_type that differs (rare; e.g. bit-casting a slot), we'd
- * need to memcpy; defer until a fixture forces it. */
+ case OPK_LOCAL: {
+ /* slot_N is a typed C variable. If access_type matches the slot's
+ * declared type, emit `slot_N` directly. Otherwise, treat the slot as
+ * raw storage and deref through `*(access_ty*)&slot_N` so the access
+ * sees the requested type (CG can store/load a slot with a wider or
+ * differently typed view than the declared one). */
c_slot_name(addr.v.frame_slot, buf, sizeof buf);
- cbuf_puts(&t->body, buf);
+ u32 idx = (u32)addr.v.frame_slot - 1u;
+ CfreeCgTypeId slot_ty =
+ (idx < t->nslots) ? t->slot_type[idx] : (CfreeCgTypeId)0;
+ if (access_type == 0 || slot_ty == 0 ||
+ api_unalias_type(t->c, access_type) ==
+ api_unalias_type(t->c, slot_ty)) {
+ cbuf_puts(&t->body, buf);
+ } else {
+ cbuf_puts(&t->body, "(*(");
+ c_emit_type(t, &t->body, access_type);
+ cbuf_puts(&t->body, "*)&");
+ cbuf_puts(&t->body, buf);
+ cbuf_puts(&t->body, ")");
+ }
return;
+ }
case OPK_GLOBAL: {
+ obj_sym_mark_referenced(t->obj, addr.v.global.sym);
const char* nm = c_sym_name(t, addr.v.global.sym);
cbuf_puts(&t->body, "(*(");
c_emit_type(t, &t->body, access_type);
@@ -677,6 +717,7 @@ static void c_emit_lvalue_addr(CTarget* t, Operand lv, CfreeCgTypeId dst_type) {
cbuf_puts(&t->body, ")");
return;
case OPK_GLOBAL: {
+ obj_sym_mark_referenced(t->obj, lv.v.global.sym);
const char* nm = c_sym_name(t, lv.v.global.sym);
cbuf_puts(&t->body, "((");
c_emit_type(t, &t->body, dst_type);
@@ -1580,10 +1621,39 @@ void c_addr_of(CGTarget* T, Operand dst, Operand lv) {
void c_ret(CGTarget* T, const CGABIValue* val) {
CTarget* t = (CTarget*)T;
+ /* CG emits a defensive ret_void epilogue at the end of every function. For
+ * a non-void function that's unreachable; emitting a bare `return;` would
+ * trip -Wreturn-type. Spell it as `__builtin_unreachable()` so the host C
+ * compiler sees the path is dead without us inventing a fake value. */
+ if (!val && t->cur_fn) {
+ CfreeCgTypeId rt = cg_type_func_ret_id(t->c, t->cur_fn->fn_type);
+ if (rt && !cg_type_is_void(t->c, rt)) {
+ cbuf_puts(&t->body, " __builtin_unreachable();\n");
+ return;
+ }
+ }
cbuf_puts(&t->body, " return");
if (val) {
cbuf_puts(&t->body, " ");
- c_emit_operand(t, val->storage);
+ /* The function's declared C return type and the operand's underlying C
+ * type may differ: CG reuses Reg ids across types, so the C local backing
+ * `val->storage` was declared at first sighting and may not match. For
+ * scalar return types, bridge through (ret_type)(uintptr_t)(op) so
+ * -Wint-conversion doesn't trip; for aggregates we trust c_emit_operand
+ * (it already deref-casts via the slot type). */
+ CfreeCgTypeId ret_type = t->cur_fn
+ ? cg_type_func_ret_id(t->c, t->cur_fn->fn_type)
+ : (CfreeCgTypeId)0;
+ const CgType* rty = ret_type ? cg_type_get(t->c,
+ api_unalias_type(t->c, ret_type))
+ : NULL;
+ int is_aggregate = rty && (rty->kind == CFREE_CG_TYPE_RECORD ||
+ rty->kind == CFREE_CG_TYPE_ARRAY);
+ if (ret_type && !is_aggregate) {
+ c_emit_operand_as(t, val->storage, ret_type);
+ } else {
+ c_emit_operand(t, val->storage);
+ }
}
cbuf_puts(&t->body, ";\n");
}
@@ -1816,32 +1886,57 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst,
case INTRIN_USUB_OVERFLOW:
case INTRIN_SMUL_OVERFLOW:
case INTRIN_UMUL_OVERFLOW: {
- /* dsts[0] = value reg, dsts[1] = i1 overflow flag */
+ /* dsts[0] = value reg, dsts[1] = i1 overflow flag.
+ *
+ * Signedness comes from the intrinsic kind, but cfree's CG int type
+ * is width-only and the C target declares every result as a signed
+ * fixed-width (int{8,16,32,64}_t). __builtin_*_overflow keys its
+ * overflow check on the result type, so passing the signed reg
+ * directly makes a UADD test as if it were signed and miss true
+ * unsigned overflow. Wrap the call in a block with a scratch result
+ * of the right signedness and copy it back through the int/uint
+ * bridge. */
if (ndst != 2 || narg != 2) {
compiler_panic(t->c, loc,
"C target: overflow-intrin: bad shape");
}
+ int is_unsigned = (k == INTRIN_UADD_OVERFLOW ||
+ k == INTRIN_USUB_OVERFLOW ||
+ k == INTRIN_UMUL_OVERFLOW);
const char* fn = c_overflow_builtin(k);
c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls);
c_ensure_reg(t, dsts[1].v.reg, dsts[1].type, (RegClass)dsts[1].cls);
char vbuf[24], obuf[24];
c_reg_name(dsts[0].v.reg, vbuf, sizeof vbuf);
c_reg_name(dsts[1].v.reg, obuf, sizeof obuf);
- /* gcc/clang's __builtin_*_overflow writes the result through a pointer
- * and returns the overflow as int. */
- cbuf_puts(&t->body, " ");
+ u32 w = c_int_width_for_signedness(t, dsts[0].type);
+ const char* sty = c_int_type_name_for_width(w, !is_unsigned);
+ if (!sty) {
+ compiler_panic(t->c, loc,
+ "C target: overflow-intrin: unsupported width %u",
+ (unsigned)w);
+ }
+ cbuf_puts(&t->body, " { ");
+ cbuf_puts(&t->body, sty);
+ cbuf_puts(&t->body, " __ovsc; ");
cbuf_puts(&t->body, obuf);
cbuf_puts(&t->body, " = (");
c_emit_type(t, &t->body, dsts[1].type);
cbuf_puts(&t->body, ")");
cbuf_puts(&t->body, fn);
- cbuf_puts(&t->body, "(");
+ cbuf_puts(&t->body, "((");
+ cbuf_puts(&t->body, sty);
+ cbuf_puts(&t->body, ")");
c_emit_operand(t, args[0]);
- cbuf_puts(&t->body, ", ");
+ cbuf_puts(&t->body, ", (");
+ cbuf_puts(&t->body, sty);
+ cbuf_puts(&t->body, ")");
c_emit_operand(t, args[1]);
- cbuf_puts(&t->body, ", &");
+ cbuf_puts(&t->body, ", &__ovsc); ");
cbuf_puts(&t->body, vbuf);
- cbuf_puts(&t->body, ");\n");
+ cbuf_puts(&t->body, " = (");
+ c_emit_type(t, &t->body, dsts[0].type);
+ cbuf_puts(&t->body, ")__ovsc; }\n");
return;
}
case INTRIN_SETJMP: {
@@ -2619,9 +2714,33 @@ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) {
* obj symbol's `referenced` bit is exactly the right signal. */
/* SK_TLS with no defining section = extern TLS — falls through to the
* undef branch below with the `_Thread_local` qualifier. */
- if (os->kind == SK_UNDEF ||
- (is_tls && os->section_id == OBJ_SEC_NONE)) {
- if (!os->referenced) return;
+ /* Extern: SK_UNDEF, or any other defined-kind sym that the producer
+ * marked as having no defining section (the C frontend uses SK_OBJ +
+ * section=NONE for `extern T x __attribute__((weak));`). */
+ int is_extern = (os->kind == SK_UNDEF) ||
+ (os->kind != SK_COMMON && os->section_id == OBJ_SEC_NONE);
+ if (is_extern) {
+ /* Always declare extern data syms in C source: the host cc tolerates
+ * unused externs, and the ObjSym::referenced bit isn't reliably set on
+ * syms the C target only addresses by writing the name into the source
+ * (no relocation gets emitted against them).
+ *
+ * Weak externs need different attributes per object format: on Mach-O
+ * the `weak` attribute requires a definition; the right spelling for an
+ * undefined weak ref is `__attribute__((weak_import))`. On ELF/PE the
+ * existing `weak` attribute works as expected. */
+ if (os->bind == SB_WEAK) {
+ if (t->c->target.obj == CFREE_OBJ_MACHO) {
+ cbuf_puts(b, "__attribute__((weak_import)) ");
+ } else {
+ cbuf_puts(b, "__attribute__((weak)) ");
+ }
+ }
+ if (os->vis == SV_HIDDEN) {
+ cbuf_puts(b, "__attribute__((visibility(\"hidden\"))) ");
+ } else if (os->vis == SV_PROTECTED) {
+ cbuf_puts(b, "__attribute__((visibility(\"protected\"))) ");
+ }
cbuf_puts(b, "extern ");
if (is_tls) cbuf_puts(b, "_Thread_local ");
cbuf_puts(b, "uint8_t ");
@@ -2692,7 +2811,27 @@ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) {
h->free(h, bytes, size);
cbuf_puts(b, ";\n");
}
- if (has_relocs) c_emit_sym_relocs_fixup(t, nm, os->section_id, base, size);
+ (void)nm;
+ (void)has_relocs;
+}
+
+/* Emit only the constructor-time fixups for a single data symbol, if any.
+ * Storage must already be defined (emitted by c_emit_data_symbol). Run as a
+ * second pass so that __cfree_init_<name> sees forward-defined names for
+ * any relocation target later in the symbol order. */
+static void c_emit_data_symbol_fixups(CTarget* t, ObjSymId id,
+ const ObjSym* os) {
+ if (os->kind == SK_FUNC || os->kind == SK_IFUNC) return;
+ if (os->kind == SK_SECTION || os->kind == SK_FILE) return;
+ if (os->kind == SK_UNDEF || os->kind == SK_COMMON) return;
+ if (os->section_id == OBJ_SEC_NONE) return;
+ const Section* sec = obj_section_get(t->obj, os->section_id);
+ if (!c_is_data_section(sec)) return;
+ u32 base = (u32)os->value;
+ u32 size = (u32)os->size;
+ if (!c_sym_has_relocs(t, os->section_id, base, size)) return;
+ const char* nm = c_sym_name(t, id);
+ c_emit_sym_relocs_fixup(t, nm, os->section_id, base, size);
}
static void c_emit_data(CTarget* t) {
@@ -2704,6 +2843,17 @@ static void c_emit_data(CTarget* t) {
c_emit_data_symbol(t, e.id, e.sym);
}
obj_symiter_free(it);
+
+ /* Second pass: emit constructor fixups after all data storage has been
+ * declared. This lets a __cfree_init_<name> body reference any other data
+ * symbol regardless of emission order. */
+ it = obj_symiter_new(t->obj);
+ if (!it) return;
+ while (obj_symiter_next(it, &e)) {
+ if (!e.sym) continue;
+ c_emit_data_symbol_fixups(t, e.id, e.sym);
+ }
+ obj_symiter_free(it);
}
/* === finalize / destroy === */
diff --git a/test/parse/run.sh b/test/parse/run.sh
@@ -364,6 +364,24 @@ else
printf ' %s c-wrapper (host CC failed)\n' "$(color_yel warn)" >&2
fi
+# Probe whether the host C compiler treats `long double` as 128-bit
+# (IEEE binary128). The ldbl128_* fixtures early-return 0 unless
+# __LDBL_MANT_DIG__ == 113, and the cfree target the C path uses
+# (host arch + host OS) matches the host compiler, so a mismatch means
+# the test cannot exercise its 128-bit code path and silently turns
+# into a return-0 — which then fails the non-zero expected. Skip path C
+# for these tests when the host doesn't provide 128-bit ldbl.
+HOST_LDBL128=0
+LDBL_PROBE_SRC="$BUILD_DIR/parse_ldbl_probe.c"
+LDBL_PROBE_BIN="$BUILD_DIR/parse_ldbl_probe"
+cat > "$LDBL_PROBE_SRC" <<'EOF'
+int main(void) { return __LDBL_MANT_DIG__ == 113 ? 0 : 1; }
+EOF
+if $CC -std=c11 "$LDBL_PROBE_SRC" -o "$LDBL_PROBE_BIN" 2>/dev/null \
+ && "$LDBL_PROBE_BIN" 2>/dev/null; then
+ HOST_LDBL128=1
+fi
+
printf 'Running cases (%s jobs, opt levels: %s)...\n' "$TEST_JOBS" "$OPT_LEVELS"
# ---- per-case loop ---------------------------------------------------------
@@ -381,10 +399,19 @@ E_LINK_MS=()
E_EXPECTED=()
FILTERED_CASES=()
+# Path C forces opt_level=0 internally — when it's the only enabled path,
+# higher opt levels duplicate identical work and (because no other path
+# emits anything) leave a zero-byte worker event file that the replay
+# loop flags as "missing worker result". Restrict to opt=0 in that case.
+case_opt_levels="$OPT_LEVELS"
+if [ $RUN_D -eq 0 ] && [ $RUN_R -eq 0 ] && [ $RUN_E -eq 0 ] && \
+ [ $RUN_J -eq 0 ] && [ $RUN_C -eq 1 ]; then
+ case_opt_levels="0"
+fi
for src in "${CASES[@]}"; do
name="$(basename "$src" .c)"
[ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]] && continue
- for opt in $OPT_LEVELS; do
+ for opt in $case_opt_levels; do
FILTERED_CASES+=("$opt:$src")
done
done
@@ -530,6 +557,25 @@ run_parse_case() {
emit_event "$event" SKIP "$name/C" "$reason"
run_c=0
fi
+ # ldbl128_* tests assert 128-bit long-double semantics. Skip them on
+ # path C when the host C compiler doesn't provide 128-bit ldbl (the
+ # test's `if (__LDBL_MANT_DIG__ != 113) return 0;` early-out can't be
+ # reconciled with a non-zero expected).
+ if [ $run_c -eq 1 ] && [ $HOST_LDBL128 -eq 0 ] && \
+ [[ "$base_name" == ldbl128_* ]] && \
+ [[ "$base_name" != ldbl128_01_* ]]; then
+ emit_event "$event" SKIP "$name/C" "host long double is not 128-bit"
+ run_c=0
+ fi
+ # Mach-O's static linker rejects unresolved weak undef refs that
+ # aren't backed by a dylib. ELF lets them resolve to 0 at link time,
+ # which is what the test expects.
+ if [ $run_c -eq 1 ] && [ "$HOST_OBJ_FMT" = "macho" ] && \
+ [[ "$base_name" == attr_p2_08_weak_undef ]]; then
+ emit_event "$event" SKIP "$name/C" \
+ "Mach-O static link rejects weak undef ref without dylib"
+ run_c=0
+ fi
if [ $run_c -eq 1 ]; then
if [ $have_c_wrapper -eq 1 ] && [ $is_native_target -eq 1 ]; then
t0=$(now_ms)
diff --git a/test/toy/run.sh b/test/toy/run.sh
@@ -412,7 +412,9 @@ if [ $RUN_R -eq 1 ] || [ $RUN_L -eq 1 ] || [ $RUN_X -eq 1 ]; then
else
err_cases=()
fi
-for src in "${err_cases[@]}"; do
+# Bash 3.x trips on `${arr[@]}` when arr is empty under `set -u`; guard.
+for src in "${err_cases[@]:-}"; do
+ [ -n "$src" ] || continue
name="$(basename "$src" .toy)"
if [ -n "$FILTER" ] && [[ "$name" != *"$FILTER"* ]]; then
continue
diff --git a/test/wasm/run.sh b/test/wasm/run.sh
@@ -131,10 +131,42 @@ C_WRAPPER_OBJ="$BUILD_DIR/wasm_c_wrapper.o"
have_c_wrapper=0
if [ "$RUN_C" -eq 1 ]; then
cat > "$C_WRAPPER_SRC" <<'EOF'
-/* Generated by test/wasm/run.sh — bridges main() to test_main(). */
+/* Generated by test/wasm/run.sh — bridges main() to test_main().
+ *
+ * Wasm frontend exports use the internal instance ABI:
+ * __cfree_wasm_init(instance)
+ * test_main(instance)
+ *
+ * Mirrors test/wasm/harness/start_wasm.c but runs hosted (uses calloc),
+ * since the C target is compiled with the host toolchain. */
#include <stdint.h>
-extern int32_t test_main(void);
-int main(void) { return (int)test_main(); }
+#include <stdlib.h>
+
+extern void __cfree_wasm_init(void *);
+extern int32_t test_main(void *);
+
+typedef struct {
+ unsigned char *data;
+ unsigned long long pages;
+ unsigned long long max_pages;
+ unsigned int flags;
+} WasmStartMemoryPrefix;
+
+#define WASM_START_MEMORY_PREFIX_COUNT 8u
+#define WASM_START_INSTANCE_SIZE (64u * 1024u)
+#define WASM_START_MEMORY_SIZE (16u * 1024u * 1024u)
+
+int main(void) {
+ void *instance = calloc(1, WASM_START_INSTANCE_SIZE);
+ unsigned char *memory = calloc(1, WASM_START_MEMORY_SIZE);
+ if (!instance || !memory) return 1;
+ for (unsigned int i = 0; i < WASM_START_MEMORY_PREFIX_COUNT; ++i) {
+ ((WasmStartMemoryPrefix *)instance)[i].data =
+ memory + i * (WASM_START_MEMORY_SIZE / WASM_START_MEMORY_PREFIX_COUNT);
+ }
+ __cfree_wasm_init(instance);
+ return (int)test_main(instance);
+}
EOF
if $HOST_CC -std=gnu99 -c "$C_WRAPPER_SRC" -o "$C_WRAPPER_OBJ" \
2>"$BUILD_DIR/wasm_c_wrapper.err"; then