commit da199356cebfa4d53f2874216ea8b1dc84d0dc5b
parent 8fb0cbbf0d0cd9cd03e25e9de96c8edfa3576a56
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 20 May 2026 14:41:24 -0700
c_target: cut bridge-cast noise; support Mach-O TLS
Readability — the emitted C now skips the (T)(uintptr_t) bridge when the
RHS type already matches the declared type, drops trailing unreachable
`return 0;` epilogues, uses `= 0` instead of `= {0}` for scalar locals,
and elides the redundant `((int32_t)((int32_t)23))` double cast on
signed-typed operands. `c_emit_reg_assign_open` now takes an RHS type
hint; call sites pass it where the result type is known and pass 0 (keep
the bridge) where reg-id reuse could legitimately mix types.
TLS — Mach-O TLS data definitions previously panicked-as-skip because
obj_tls.c lowers the user-visible sym to a 24-byte TLV descriptor with
the initial bytes living in a synthesized `<name>$tlv$init` aux sym. The
C target now resolves the descriptor's R_ABS64 reloc at offset +16 to
find the init sym, emits one `_Thread_local _Alignas(A) uint8_t
name[N] = {bytes};` with the descriptor's user-facing name and the init
sym's contents, and skips the aux sym. The host C compiler reconstructs
whatever TLV plumbing it needs. The `__tlv_bootstrap` extern that
obj_tls.c synthesizes is filtered out of the emitted source. ELF TLS is
unchanged.
432/432 parse cases (-Wall -Wextra -Werror under both gcc-14 and clang)
plus the two _Thread_local cases that were SKIPs now pass end-to-end.
Diffstat:
2 files changed, 293 insertions(+), 45 deletions(-)
diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c
@@ -30,6 +30,9 @@ static void c_ensure_typedef(CTarget* t, CfreeCgTypeId tid);
static const char* c_typedef_name(CTarget* t, CfreeCgTypeId tid);
static const char* c_typename(CTarget* t, CfreeCgTypeId type);
static int c_operand_is_ptr_typed(CTarget* t, Operand op);
+/* Private accessor on ObjBuilder (defined in obj/obj.c, not in obj.h).
+ * Same forward-decl trick as obj_tls.c uses. */
+ObjSymId obj_tlv_bootstrap_get(const ObjBuilder*);
/* === Writer helpers === */
@@ -370,6 +373,19 @@ static void c_grow_slot_table(CTarget* t, u32 needed) {
t->slot_cap = newcap;
}
+/* Emit the trailing `__attribute__((unused)) = INIT;` for a local decl of
+ * type `ty`. Scalars get `= 0` (readable); aggregates get `= {0}` (which is
+ * the only form that compiles for record/array). */
+static void c_emit_zero_init(CTarget* t, CfreeCgTypeId ty) {
+ const CgType* cgt = ty ? cg_type_get(t->c, api_unalias_type(t->c, ty))
+ : NULL;
+ int is_aggregate = cgt && (cgt->kind == CFREE_CG_TYPE_RECORD ||
+ cgt->kind == CFREE_CG_TYPE_ARRAY);
+ cbuf_puts(&t->decls, is_aggregate
+ ? " __attribute__((unused)) = {0};\n"
+ : " __attribute__((unused)) = 0;\n");
+}
+
void c_ensure_reg(CTarget* t, Reg r, CfreeCgTypeId type, RegClass cls) {
(void)cls;
if (r == (Reg)REG_NONE) {
@@ -386,10 +402,10 @@ void c_ensure_reg(CTarget* t, Reg r, CfreeCgTypeId type, RegClass cls) {
char buf[24];
c_reg_name(r, buf, sizeof buf);
cbuf_puts(&t->decls, buf);
- /* `= {0}` zeroes scalars and aggregates uniformly. Kills
- * -Wsometimes-uninitialized for control flow clang can't reason through;
- * the host C compiler DSEs the init when a real assignment dominates. */
- cbuf_puts(&t->decls, " __attribute__((unused)) = {0};\n");
+ /* Zero-init kills -Wsometimes-uninitialized for control flow clang can't
+ * reason through; the host C compiler DSEs the init when a real
+ * assignment dominates. Scalars get `= 0`, aggregates `= {0}`. */
+ c_emit_zero_init(t, type);
}
/* Emit a signed-int64 literal. INT64_MIN can't be written directly: clang
@@ -531,9 +547,46 @@ static int c_type_is_float(CTarget* t, CfreeCgTypeId type) {
return ty && ty->kind == CFREE_CG_TYPE_FLOAT;
}
+/* True iff a and b name the same CG type after alias resolution. */
+static int c_types_equiv(CTarget* t, CfreeCgTypeId a, CfreeCgTypeId b) {
+ if (a == 0 || b == 0) return 0;
+ return api_unalias_type(t->c, a) == api_unalias_type(t->c, b);
+}
+
+/* CG type that the C expression for `op` will produce when emitted via
+ * c_emit_operand. Differs from op.type only when reg-id reuse caused the
+ * reg's first-sighting decl type to lock to something else: in that case
+ * the emitted `vN` has the declared type, and a downstream cast back to
+ * op.type is still needed (the uintptr_t bridge handles that). */
+static CfreeCgTypeId c_operand_emit_type(CTarget* t, Operand op) {
+ if (op.kind == OPK_REG && (u32)op.v.reg < t->reg_cap &&
+ t->reg_declared[op.v.reg]) {
+ return t->reg_type[op.v.reg];
+ }
+ if (op.kind == OPK_LOCAL) {
+ u32 idx = (u32)op.v.frame_slot - 1u;
+ if (idx < t->nslots) {
+ /* c_emit_operand emits `slot_N` (slot_type) when op.type matches,
+ * otherwise a deref-cast to op.type. */
+ if (op.type == 0 ||
+ api_unalias_type(t->c, op.type) ==
+ api_unalias_type(t->c, t->slot_type[idx])) {
+ return t->slot_type[idx];
+ }
+ }
+ }
+ return op.type;
+}
+
/* Emit " vN = (DECL_T)(uintptr_t)(" or, for float regs, " vN = (DECL_T)(".
- * Caller must then emit the RHS expression and call c_emit_reg_assign_close. */
-static void c_emit_reg_assign_open(CTarget* t, Reg r) {
+ * Caller must then emit the RHS expression and call c_emit_reg_assign_close.
+ *
+ * `rhs_ty` is the CG type the RHS expression will produce (or 0 if unknown).
+ * When it matches the reg's declared type, the cast wrappers are elided —
+ * `(DECL_T)(uintptr_t)(...)` is a defensive bridge for reg-id reuse across
+ * types, and emitting it when types match buries the actual code in noise.
+ * The outer `(...)` parens are kept so the closer's `);` stays balanced. */
+static void c_emit_reg_assign_open(CTarget* t, Reg r, CfreeCgTypeId rhs_ty) {
if ((u32)r >= t->reg_cap || !t->reg_declared[r]) {
compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
"C target: assign to undeclared reg v%u", (unsigned)r);
@@ -543,11 +596,14 @@ static void c_emit_reg_assign_open(CTarget* t, Reg r) {
c_reg_name(r, buf, sizeof buf);
cbuf_puts(&t->body, " ");
cbuf_puts(&t->body, buf);
- cbuf_puts(&t->body, " = (");
- c_emit_type(t, &t->body, decl);
- cbuf_puts(&t->body, ")");
- if (!c_type_is_float(t, decl)) {
- cbuf_puts(&t->body, "(uintptr_t)");
+ cbuf_puts(&t->body, " = ");
+ if (!c_types_equiv(t, rhs_ty, decl)) {
+ cbuf_putc(&t->body, '(');
+ c_emit_type(t, &t->body, decl);
+ cbuf_putc(&t->body, ')');
+ if (!c_type_is_float(t, decl)) {
+ cbuf_puts(&t->body, "(uintptr_t)");
+ }
}
cbuf_puts(&t->body, "(");
}
@@ -572,6 +628,18 @@ void c_emit_operand_signed(CTarget* t, Operand op, int signed_) {
* types), bridge through uintptr_t so the narrow int cast doesn't trip
* -Wvoid-pointer-to-int-cast. */
int via_uptr = c_operand_is_ptr_typed(t, op);
+ /* CG ints are width-only; the C target declares every int reg/slot/IMM
+ * as the signed `int{W}_t` of its width. So when `signed_` is true and
+ * the operand's emit-width matches `w`, the explicit cast is redundant
+ * with what c_emit_operand already produces. Skipping it cuts the
+ * ubiquitous `((int32_t)((int32_t)23))` double-cast down to one. */
+ if (!via_uptr && signed_) {
+ CfreeCgTypeId et = c_operand_emit_type(t, op);
+ if (c_int_width_for_signedness(t, et) == w) {
+ c_emit_operand(t, op);
+ return;
+ }
+ }
cbuf_puts(&t->body, "((");
cbuf_puts(&t->body, tn);
cbuf_puts(&t->body, ")");
@@ -610,8 +678,16 @@ static int c_operand_is_ptr_typed(CTarget* t, Operand op) {
/* Emit `(target_ty)(uintptr_t)(op)` (or `(target_ty)(op)` for float
* target_ty). Used when the caller knows the C expression type they want and
* the source operand may have been declared with a different type (CG reuses
- * reg ids across types). Without the bridge, gcc trips -Wint-conversion. */
+ * reg ids across types). Without the bridge, gcc trips -Wint-conversion.
+ *
+ * When the operand's emit-type already matches target_ty (the common case
+ * when reg-id reuse hasn't happened), drop the wrappers — they add noise
+ * for no semantic gain. */
static void c_emit_operand_as(CTarget* t, Operand op, CfreeCgTypeId target_ty) {
+ if (c_types_equiv(t, c_operand_emit_type(t, op), target_ty)) {
+ c_emit_operand(t, op);
+ return;
+ }
cbuf_puts(&t->body, "(");
c_emit_type(t, &t->body, target_ty);
cbuf_puts(&t->body, ")");
@@ -897,6 +973,7 @@ void c_func_begin(CGTarget* T, const CGFuncDesc* fd) {
t->next_label = 0;
t->next_tmp = 0;
t->nscopes = 0;
+ t->last_was_terminator = 0;
const char* name = c_sym_name(t, fd->sym);
@@ -975,8 +1052,8 @@ FrameSlot c_frame_slot(CGTarget* T, const FrameSlotDesc* fsd) {
char buf[24];
c_slot_name(id, buf, sizeof buf);
cbuf_puts(&t->decls, buf);
- /* See c_ensure_reg — same `= {0}` reasoning. */
- cbuf_puts(&t->decls, " __attribute__((unused)) = {0};\n");
+ /* See c_ensure_reg — same zero-init reasoning. */
+ c_emit_zero_init(t, fsd->type);
return id;
}
@@ -1018,7 +1095,26 @@ void c_load_imm(CGTarget* T, Operand dst, i64 imm) {
compiler_panic(t->c, loc, "C target: load_imm dst must be REG");
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* The literal is emitted bare; its C type is `long long`. We can drop
+ * the bridge cast iff the bare assignment compiles cleanly:
+ * - integer dst: imm must fit in dst's signed range (else
+ * -Wconstant-conversion). 64-bit dst always fits.
+ * - pointer dst: only `0` (null pointer constant) is safe; any other
+ * literal trips -Wint-conversion.
+ * Otherwise keep the bridge. */
+ u32 w = c_int_width_for_signedness(t, dst.type);
+ int can_drop_bridge;
+ if (w > 0) {
+ can_drop_bridge = (w >= 64) ||
+ (imm >= -((i64)1 << (w - 1)) &&
+ imm <= (((i64)1 << (w - 1)) - 1));
+ } else if (c_type_is_ptr(t, dst.type)) {
+ can_drop_bridge = (imm == 0);
+ } else {
+ can_drop_bridge = 0;
+ }
+ c_emit_reg_assign_open(t, dst.v.reg,
+ can_drop_bridge ? dst.type : (CfreeCgTypeId)0);
c_emit_imm_literal(t, imm);
c_emit_reg_assign_close(t);
}
@@ -1030,7 +1126,7 @@ void c_copy(CGTarget* T, Operand dst, Operand src) {
compiler_panic(t->c, loc, "C target: copy dst must be REG");
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ c_emit_reg_assign_open(t, dst.v.reg, c_operand_emit_type(t, src));
c_emit_operand(t, src);
c_emit_reg_assign_close(t);
}
@@ -1104,7 +1200,14 @@ void c_binop(CGTarget* T, BinOp op, Operand dst, Operand a, Operand b) {
compiler_panic(t->c, loc, "C target: binop dst must be REG");
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* Pointer operands get cast to uintptr_t inside c_emit_operand_arith,
+ * so the binop's C result type is `uintptr_t`, not the original pointer
+ * type. Keep the bridge when dst or either operand is pointer-typed so
+ * the assignment back to a pointer dst doesn't trip -Wint-conversion. */
+ int has_ptr = c_operand_is_ptr_typed(t, dst) ||
+ c_operand_is_ptr_typed(t, a) || c_operand_is_ptr_typed(t, b);
+ c_emit_reg_assign_open(t, dst.v.reg,
+ has_ptr ? (CfreeCgTypeId)0 : dst.type);
int lhs_signed = 1;
BinSignCast bsc = binop_sign_kind(op, &lhs_signed);
switch (bsc) {
@@ -1164,7 +1267,7 @@ void c_unop(CGTarget* T, UnOp op, Operand dst, Operand a) {
default:
compiler_panic(t->c, loc, "C target: unknown unop %d", (int)op);
}
- c_emit_reg_assign_open(t, dst.v.reg);
+ c_emit_reg_assign_open(t, dst.v.reg, dst.type);
cbuf_puts(&t->body, sym);
c_emit_operand(t, a);
c_emit_reg_assign_close(t);
@@ -1240,7 +1343,9 @@ void c_cmp(CGTarget* T, CmpOp op, Operand dst, Operand a, Operand b) {
compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op);
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* Compare result is C `int` (0/1); assigning to integer dst.type narrows
+ * implicitly without -Wall complaint. */
+ c_emit_reg_assign_open(t, dst.v.reg, dst.type);
c_emit_cmp_operands(t, op, a, b);
c_emit_reg_assign_close(t);
}
@@ -1282,19 +1387,23 @@ void c_label_place(CGTarget* T, Label l) {
cbuf_puts(&t->body, " ");
cbuf_puts(&t->body, buf);
cbuf_puts(&t->body, ": __attribute__((unused));\n");
+ t->last_was_terminator = 0;
}
void c_jump(CGTarget* T, Label l) {
CTarget* t = (CTarget*)T;
+ if (t->last_was_terminator) return;
char buf[24];
c_label_name(l, buf, sizeof buf);
cbuf_puts(&t->body, " goto ");
cbuf_puts(&t->body, buf);
cbuf_puts(&t->body, ";\n");
+ t->last_was_terminator = 1;
}
void c_cmp_branch(CGTarget* T, CmpOp op, Operand a, Operand b, Label l) {
CTarget* t = (CTarget*)T;
+ if (t->last_was_terminator) return;
SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0};
if (!cmp_to_c(op)) {
compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op);
@@ -1424,7 +1533,11 @@ void c_local_addr(CGTarget* T, Operand dst, const CGLocalDesc* d,
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
char buf[24];
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* RHS is `&slot_N` — a pointer to the slot's storage type. dst.type is
+ * declared as void* / a typed pointer; leave the bridge to the caller's
+ * hint logic. We pass 0 so the bridge stays — `&slot_N` would otherwise
+ * be `RECORD_TYPE*` and not match dst.type (typically void*). */
+ c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0);
cbuf_puts(&t->body, "&");
c_slot_name(s.v.frame_slot, buf, sizeof buf);
cbuf_puts(&t->body, buf);
@@ -1479,7 +1592,8 @@ void c_convert(CGTarget* T, ConvKind k, Operand dst, Operand src) {
break;
}
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* The cast `(dst.type)(src)` produces a value of dst.type. */
+ c_emit_reg_assign_open(t, dst.v.reg, dst.type);
cbuf_puts(&t->body, "(");
c_emit_type(t, &t->body, dst.type);
cbuf_puts(&t->body, ")");
@@ -1546,7 +1660,8 @@ void c_call(CGTarget* T, const CGCallDesc* d) {
Operand rs = d->ret.storage;
if (rs.kind == OPK_REG) {
c_ensure_reg(t, rs.v.reg, ret_type, (RegClass)rs.cls);
- c_emit_reg_assign_open(t, rs.v.reg);
+ /* Callee returns ret_type; the assignment is direct. */
+ c_emit_reg_assign_open(t, rs.v.reg, ret_type);
} else if (rs.kind == OPK_LOCAL) {
char buf[24];
c_slot_name(rs.v.frame_slot, buf, sizeof buf);
@@ -1610,6 +1725,7 @@ void c_call(CGTarget* T, const CGCallDesc* d) {
cbuf_puts(&t->body, " return;\n");
}
}
+ if (is_tail) t->last_was_terminator = 1;
}
/* === load / store === */
@@ -1622,7 +1738,8 @@ void c_load(CGTarget* T, Operand dst, Operand addr, MemAccess m) {
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
CfreeCgTypeId access_ty = m.type ? m.type : dst.type;
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* The deref `*(access_ty*)addr` produces a value of access_ty. */
+ c_emit_reg_assign_open(t, dst.v.reg, access_ty);
c_emit_addr_deref(t, addr, access_ty);
c_emit_reg_assign_close(t);
}
@@ -1647,13 +1764,17 @@ void c_addr_of(CGTarget* T, Operand dst, Operand lv) {
compiler_panic(t->c, loc, "C target: addr_of dst must be REG");
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* `c_emit_lvalue_addr` casts its output to dst.type already. */
+ c_emit_reg_assign_open(t, dst.v.reg, dst.type);
c_emit_lvalue_addr(t, lv, dst.type);
c_emit_reg_assign_close(t);
}
void c_ret(CGTarget* T, const CGABIValue* val) {
CTarget* t = (CTarget*)T;
+ /* Already-terminated block: this ret is unreachable (the frontend's
+ * defensive `return 0;` epilogue lands here right after a user return). */
+ if (t->last_was_terminator) return;
/* CG emits a defensive ret_void epilogue at the end of every function. For
* a non-void function that's unreachable; emitting a bare `return;` would
* trip -Wreturn-type. Spell it as `__builtin_unreachable()` so the host C
@@ -1662,6 +1783,7 @@ void c_ret(CGTarget* T, const CGABIValue* val) {
CfreeCgTypeId rt = cg_type_func_ret_id(t->c, t->cur_fn->fn_type);
if (rt && !cg_type_is_void(t->c, rt)) {
cbuf_puts(&t->body, " __builtin_unreachable();\n");
+ t->last_was_terminator = 1;
return;
}
}
@@ -1689,6 +1811,7 @@ void c_ret(CGTarget* T, const CGABIValue* val) {
}
}
cbuf_puts(&t->body, ";\n");
+ t->last_was_terminator = 1;
}
/* === alias ===
@@ -1844,7 +1967,8 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst,
(unsigned)ndst);
}
c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls);
- c_emit_reg_assign_open(t, dsts[0].v.reg);
+ /* Returns void*; bridge to dst pointer type. */
+ c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0);
cbuf_puts(&t->body, "__builtin_assume_aligned(");
for (u32 i = 0; i < narg; ++i) {
if (i > 0) cbuf_puts(&t->body, ", ");
@@ -1862,7 +1986,8 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst,
(unsigned)ndst, (unsigned)narg);
}
c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls);
- c_emit_reg_assign_open(t, dsts[0].v.reg);
+ /* Returns `long`; dst.type may be a narrower int — keep the bridge. */
+ c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0);
cbuf_puts(&t->body, "__builtin_expect((long)");
c_emit_operand(t, args[0]);
cbuf_puts(&t->body, ", (long)");
@@ -1889,7 +2014,9 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst,
(unsigned)w);
}
c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls);
- c_emit_reg_assign_open(t, dsts[0].v.reg);
+ /* __builtin_popcount/ctz/clz return `int`; bswap returns its input
+ * type. Narrow to dst.type via the bridge. */
+ c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0);
cbuf_puts(&t->body, fn);
cbuf_puts(&t->body, "(");
c_emit_operand(t, args[0]);
@@ -1978,7 +2105,8 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst,
compiler_panic(t->c, loc, "C target: setjmp: bad shape");
}
c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls);
- c_emit_reg_assign_open(t, dsts[0].v.reg);
+ /* setjmp returns `int`; bridge to dst.type. */
+ c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0);
cbuf_puts(&t->body, "setjmp(*(jmp_buf*)(");
c_emit_operand(t, args[0]);
cbuf_puts(&t->body, "))");
@@ -2010,7 +2138,8 @@ void c_alloca(CGTarget* T, Operand dst, Operand size, u32 align) {
compiler_panic(t->c, loc, "C target: alloca dst must be REG");
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* __builtin_alloca returns `void*`; dst.type is typically void* too. */
+ c_emit_reg_assign_open(t, dst.v.reg, dst.type);
if (align > 1) {
/* gcc has __builtin_alloca_with_align taking bits, not bytes. */
cbuf_puts(&t->body, "__builtin_alloca_with_align(");
@@ -2077,7 +2206,8 @@ void c_va_arg(CGTarget* T, Operand dst, Operand ap_addr, CfreeCgTypeId ty) {
compiler_panic(t->c, loc, "C target: va_arg dst must be REG");
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* __builtin_va_arg yields a value of `ty`. */
+ c_emit_reg_assign_open(t, dst.v.reg, ty);
cbuf_puts(&t->body, "__builtin_va_arg(*(va_list*)(");
c_emit_operand(t, ap_addr);
cbuf_puts(&t->body, "), ");
@@ -2128,7 +2258,9 @@ void c_tls_addr_of(CGTarget* T, Operand dst, ObjSymId sym, i64 addend) {
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
const char* nm = c_sym_name(t, sym);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* RHS spells `(char*)&sym + addend` — pointer type that may not match
+ * dst.type; keep the bridge to cast through cleanly. */
+ c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0);
cbuf_puts(&t->body, "((char*)&");
cbuf_puts(&t->body, nm);
if (addend != 0) {
@@ -2187,7 +2319,8 @@ void c_bitfield_load(CGTarget* T, Operand dst, Operand addr,
/* Zero-width — layout barrier only; nothing to load. Emit a no-op
* assignment so the dst reg still gets a defined value. */
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* RHS is the literal 0 (int); narrowing to dst.type is fine. */
+ c_emit_reg_assign_open(t, dst.v.reg, dst.type);
cbuf_puts(&t->body, "0");
c_emit_reg_assign_close(t);
return;
@@ -2198,7 +2331,9 @@ void c_bitfield_load(CGTarget* T, Operand dst, Operand addr,
(unsigned)bf.storage.size);
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* RHS is the storage-width int from the mask/shift expression; bridge
+ * to dst.type so any signedness/width adjustment is explicit. */
+ c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0);
/* For signed bitfields, sign-extend via the standard shift-up / arith-shift-
* down trick on a signed integer of the storage width. For unsigned, mask
* the extracted bits.
@@ -2473,7 +2608,8 @@ void c_atomic_load(CGTarget* T, Operand dst, Operand addr, MemAccess m,
CTarget* t = (CTarget*)T;
(void)m;
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* __atomic_load_n returns a value of the pointed-to type (dst.type). */
+ c_emit_reg_assign_open(t, dst.v.reg, dst.type);
cbuf_puts(&t->body, "__atomic_load_n((");
c_emit_type(t, &t->body, dst.type);
cbuf_puts(&t->body, "*)");
@@ -2522,7 +2658,8 @@ void c_atomic_rmw(CGTarget* T, AtomicOp op, Operand dst, Operand addr,
compiler_panic(t->c, loc, "C target: unknown atomic op %d", (int)op);
}
c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
- c_emit_reg_assign_open(t, dst.v.reg);
+ /* __atomic_fetch_* returns the prior value of the pointed-to type. */
+ c_emit_reg_assign_open(t, dst.v.reg, val.type);
cbuf_puts(&t->body, fn);
cbuf_puts(&t->body, "((");
c_emit_type(t, &t->body, val.type);
@@ -2660,6 +2797,37 @@ static void c_emit_data_bytes(CBuf* b, const u8* bytes, size_t n) {
cbuf_puts(b, "\n }");
}
+/* Mach-O TLS support: the user-visible SK_TLS symbol is a 24-byte TLV
+ * descriptor in __DATA,__thread_vars, and the actual initial bytes live in
+ * a synthesized `<name>$tlv$init` sym in __thread_data (or __thread_bss).
+ * The descriptor carries an R_ABS64 reloc at offset +16 pointing at that
+ * init sym. For C-source emission we don't care about the descriptor at all
+ * — we just emit `_Thread_local` with the init sym's bytes and let the host
+ * C compiler synthesize whatever TLV plumbing it needs. */
+
+/* Find the data init sym referenced by a Mach-O TLS descriptor at
+ * `desc_base` in section `desc_sec`. Looks for an R_ABS64 reloc at
+ * `desc_base + 16`. Returns OBJ_SYM_NONE if not found. */
+static ObjSymId c_macho_tls_find_init(CTarget* t, ObjSecId desc_sec,
+ u32 desc_base) {
+ u32 total = obj_reloc_total(t->obj);
+ for (u32 i = 0; i < total; ++i) {
+ const Reloc* r = obj_reloc_at(t->obj, i);
+ if (r->section_id != desc_sec) continue;
+ if (r->offset != desc_base + 16u) continue;
+ return r->sym;
+ }
+ return OBJ_SYM_NONE;
+}
+
+/* Returns 1 if the section is __DATA,__thread_vars (the descriptor section
+ * on Mach-O). Compared by interned Sym id. */
+static int c_sec_name_is_macho_tvars(CTarget* t, const Section* sec) {
+ if (!sec) return 0;
+ Sym tvars = pool_intern_cstr(t->c->global, "__DATA,__thread_vars");
+ return sec->name == tvars;
+}
+
/* Returns 1 if any relocation falls into the half-open range [base, base+size)
* of section `sec_id` (i.e. patches the bytes of this symbol). */
static int c_sym_has_relocs(CTarget* t, ObjSecId sec_id, u32 base, u32 size) {
@@ -2731,6 +2899,14 @@ static void c_emit_sym_relocs_fixup(CTarget* t, const char* nm,
static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) {
if (os->kind == SK_FUNC || os->kind == SK_IFUNC) return;
if (os->kind == SK_SECTION || os->kind == SK_FILE) return;
+ /* On Mach-O, obj_tls.c synthesizes `__tlv_bootstrap` as an SK_UNDEF
+ * extern for the TLV descriptor's first slot. The C target delegates all
+ * TLS lowering to the host compiler via `_Thread_local`, so this
+ * descriptor-time-only symbol has no place in the emitted source. */
+ if (os->kind == SK_UNDEF && t->c->target.obj == CFREE_OBJ_MACHO) {
+ const ObjBuilder* ob = t->obj;
+ if (id == obj_tlv_bootstrap_get(ob)) return;
+ }
const char* nm = c_sym_name(t, id);
CBuf* b = &t->data_defs;
/* SK_TLS user-visible syms need a _Thread_local prefix. On ELF the sym
@@ -2782,17 +2958,75 @@ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) {
return;
}
if (is_tls && t->c->target.obj == CFREE_OBJ_MACHO) {
- /* Mach-O TLV: the user sym is the descriptor (24 bytes, with relocs to
- * __tlv_bootstrap and the synthesized __init.<name> sym). We can't
- * faithfully express the descriptor in portable C — `_Thread_local`
- * needs the underlying initial value, which lives in a different
- * (synthesized) sym. Phase 4 leaves this for a future pass. */
- compiler_panic(t->c, (SrcLoc){0, 0, 0},
- "C target: Mach-O TLS data definition not yet supported");
+ /* Mach-O splits TLS across two object-file symbols (see obj_tls.c): the
+ * user-visible sym is a 24-byte TLV descriptor in
+ * __DATA,__thread_vars; the actual initial bytes live in a synthesized
+ * `<name>$tlv$init` sym in __DATA,__thread_data (or __thread_bss). For
+ * C source emission we don't need either of those — `_Thread_local`
+ * delegates to the host C compiler, which builds its own descriptor.
+ *
+ * We use the descriptor sym as the carrier (its name is what user code
+ * references) and pull the initial bytes/size/alignment from the init
+ * sym, found via the R_ABS64 reloc at descriptor offset +16. The init
+ * sym is skipped in its own iteration. */
+ const Section* desc_sec = obj_section_get(t->obj, os->section_id);
+ if (c_sec_name_is_macho_tvars(t, desc_sec)) {
+ ObjSymId init_id = c_macho_tls_find_init(t, os->section_id,
+ (u32)os->value);
+ if (init_id == OBJ_SYM_NONE) {
+ compiler_panic(t->c, (SrcLoc){0, 0, 0},
+ "C target: Mach-O TLS descriptor missing init reloc");
+ }
+ const ObjSym* init_os = obj_symbol_get(t->obj, init_id);
+ if (!init_os || init_os->section_id == OBJ_SEC_NONE) {
+ compiler_panic(t->c, (SrcLoc){0, 0, 0},
+ "C target: Mach-O TLS init sym not defined");
+ }
+ const Section* init_sec = obj_section_get(t->obj, init_os->section_id);
+ u32 init_base = (u32)init_os->value;
+ u32 init_size = (u32)init_os->size;
+ /* TLS data with relocations would need the constructor-fixup path
+ * (and we'd have to rewrite the reloc target's section/offset to
+ * the descriptor's name in the emitted C). No test currently
+ * exercises this; surface it as a clear panic-as-skip if we hit it. */
+ if (c_sym_has_relocs(t, init_os->section_id, init_base, init_size)) {
+ compiler_panic(t->c, (SrcLoc){0, 0, 0},
+ "C target: Mach-O TLS with pointer init not yet "
+ "supported");
+ }
+ if (os->bind == SB_LOCAL) cbuf_puts(b, "static ");
+ cbuf_puts(b, "_Thread_local ");
+ c_emit_link_attrs(b, os);
+ cbuf_puts(b, "__attribute__((unused)) ");
+ cbuf_puts(b, "_Alignas(");
+ cbuf_put_u64(b, init_sec->align ? init_sec->align : 1);
+ cbuf_puts(b, ") uint8_t ");
+ cbuf_puts(b, nm);
+ cbuf_puts(b, "[");
+ cbuf_put_u64(b, init_size ? init_size : 1);
+ cbuf_puts(b, "]");
+ if (init_sec->kind == SEC_BSS || init_sec->sem == SSEM_NOBITS ||
+ init_size == 0) {
+ cbuf_puts(b, ";\n");
+ } else {
+ Heap* h = t->c->ctx->heap;
+ u8* bytes = (u8*)h->alloc(h, init_size, 1);
+ if (!bytes) {
+ compiler_panic(t->c, (SrcLoc){0, 0, 0},
+ "C target: oom on TLS init bytes");
+ }
+ c_read_section_bytes(init_sec, init_base, bytes, init_size);
+ c_emit_data_bytes(b, bytes, init_size);
+ h->free(h, bytes, init_size);
+ cbuf_puts(b, ";\n");
+ }
+ return;
+ }
+ /* Not the descriptor: this is the synthesized `<name>$tlv$init` data
+ * sym (or a __thread_ptrs entry). The descriptor case above already
+ * emitted the user-facing _Thread_local; nothing more to do. */
+ return;
}
- /* Skip the synthesized `__init.<name>` aux sym on Mach-O — even if we got
- * here, it shouldn't be emitted as user-visible data. (Currently
- * unreachable because the descriptor path panicked above.) */
if (os->kind == SK_COMMON) {
/* Common — uninitialized, with explicit alignment. Emit as
* tentative-definition (`uint8_t name[size];` at file scope), which C
@@ -2862,6 +3096,12 @@ static void c_emit_data_symbol_fixups(CTarget* t, ObjSymId id,
if (os->kind == SK_SECTION || os->kind == SK_FILE) return;
if (os->kind == SK_UNDEF || os->kind == SK_COMMON) return;
if (os->section_id == OBJ_SEC_NONE) return;
+ /* Mach-O TLS: descriptor bytes are not user-visible data (they hold the
+ * TLV pointer and an offset), and the init sym has been folded into the
+ * descriptor's `_Thread_local` emission. Either way, the C-side
+ * `_Thread_local` declaration carries its own initializer; there is no
+ * separate storage to fix up. */
+ if (os->kind == SK_TLS && t->c->target.obj == CFREE_OBJ_MACHO) return;
const Section* sec = obj_section_get(t->obj, os->section_id);
if (!c_is_data_section(sec)) return;
u32 base = (u32)os->value;
diff --git a/src/arch/c_target/internal.h b/src/arch/c_target/internal.h
@@ -108,6 +108,14 @@ typedef struct CTarget {
/* Monotone counter for synthesizing unique temporary names within a
* function (e.g. bitcast scratch). Reset on func_begin. */
u32 next_tmp;
+
+ /* Tracks whether the last emitted body statement was an unconditional
+ * terminator (`return`, `goto`, etc.) with no intervening label. When
+ * set, subsequent ret/jump/cmp_branch emissions are dropped — they're
+ * unreachable, and emitting them produces dead C that distracts from
+ * the live code. Reset on func_begin and label_place. */
+ u8 last_was_terminator;
+ u8 pad3[3];
} CTarget;
typedef struct CScopeInfo {