kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit da199356cebfa4d53f2874216ea8b1dc84d0dc5b
parent 8fb0cbbf0d0cd9cd03e25e9de96c8edfa3576a56
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 20 May 2026 14:41:24 -0700

c_target: cut bridge-cast noise; support Mach-O TLS

Readability — the emitted C now skips the (T)(uintptr_t) bridge when the
RHS type already matches the declared type, drops trailing unreachable
`return 0;` epilogues, uses `= 0` instead of `= {0}` for scalar locals,
and elides the redundant `((int32_t)((int32_t)23))` double cast on
signed-typed operands. `c_emit_reg_assign_open` now takes an RHS type
hint; call sites pass it where the result type is known and pass 0 (keep
the bridge) where reg-id reuse could legitimately mix types.

TLS — Mach-O TLS data definitions previously panicked-as-skip because
obj_tls.c lowers the user-visible sym to a 24-byte TLV descriptor with
the initial bytes living in a synthesized `<name>$tlv$init` aux sym. The
C target now resolves the descriptor's R_ABS64 reloc at offset +16 to
find the init sym, emits one `_Thread_local _Alignas(A) uint8_t
name[N] = {bytes};` with the descriptor's user-facing name and the init
sym's contents, and skips the aux sym. The host C compiler reconstructs
whatever TLV plumbing it needs. The `__tlv_bootstrap` extern that
obj_tls.c synthesizes is filtered out of the emitted source. ELF TLS is
unchanged.

432/432 parse cases (-Wall -Wextra -Werror under both gcc-14 and clang)
plus the two _Thread_local cases that were SKIPs now pass end-to-end.

Diffstat:
Msrc/arch/c_target/emit.c | 330++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Msrc/arch/c_target/internal.h | 8++++++++
2 files changed, 293 insertions(+), 45 deletions(-)

diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c @@ -30,6 +30,9 @@ static void c_ensure_typedef(CTarget* t, CfreeCgTypeId tid); static const char* c_typedef_name(CTarget* t, CfreeCgTypeId tid); static const char* c_typename(CTarget* t, CfreeCgTypeId type); static int c_operand_is_ptr_typed(CTarget* t, Operand op); +/* Private accessor on ObjBuilder (defined in obj/obj.c, not in obj.h). + * Same forward-decl trick as obj_tls.c uses. */ +ObjSymId obj_tlv_bootstrap_get(const ObjBuilder*); /* === Writer helpers === */ @@ -370,6 +373,19 @@ static void c_grow_slot_table(CTarget* t, u32 needed) { t->slot_cap = newcap; } +/* Emit the trailing `__attribute__((unused)) = INIT;` for a local decl of + * type `ty`. Scalars get `= 0` (readable); aggregates get `= {0}` (which is + * the only form that compiles for record/array). */ +static void c_emit_zero_init(CTarget* t, CfreeCgTypeId ty) { + const CgType* cgt = ty ? cg_type_get(t->c, api_unalias_type(t->c, ty)) + : NULL; + int is_aggregate = cgt && (cgt->kind == CFREE_CG_TYPE_RECORD || + cgt->kind == CFREE_CG_TYPE_ARRAY); + cbuf_puts(&t->decls, is_aggregate + ? " __attribute__((unused)) = {0};\n" + : " __attribute__((unused)) = 0;\n"); +} + void c_ensure_reg(CTarget* t, Reg r, CfreeCgTypeId type, RegClass cls) { (void)cls; if (r == (Reg)REG_NONE) { @@ -386,10 +402,10 @@ void c_ensure_reg(CTarget* t, Reg r, CfreeCgTypeId type, RegClass cls) { char buf[24]; c_reg_name(r, buf, sizeof buf); cbuf_puts(&t->decls, buf); - /* `= {0}` zeroes scalars and aggregates uniformly. Kills - * -Wsometimes-uninitialized for control flow clang can't reason through; - * the host C compiler DSEs the init when a real assignment dominates. */ - cbuf_puts(&t->decls, " __attribute__((unused)) = {0};\n"); + /* Zero-init kills -Wsometimes-uninitialized for control flow clang can't + * reason through; the host C compiler DSEs the init when a real + * assignment dominates. Scalars get `= 0`, aggregates `= {0}`. */ + c_emit_zero_init(t, type); } /* Emit a signed-int64 literal. INT64_MIN can't be written directly: clang @@ -531,9 +547,46 @@ static int c_type_is_float(CTarget* t, CfreeCgTypeId type) { return ty && ty->kind == CFREE_CG_TYPE_FLOAT; } +/* True iff a and b name the same CG type after alias resolution. */ +static int c_types_equiv(CTarget* t, CfreeCgTypeId a, CfreeCgTypeId b) { + if (a == 0 || b == 0) return 0; + return api_unalias_type(t->c, a) == api_unalias_type(t->c, b); +} + +/* CG type that the C expression for `op` will produce when emitted via + * c_emit_operand. Differs from op.type only when reg-id reuse caused the + * reg's first-sighting decl type to lock to something else: in that case + * the emitted `vN` has the declared type, and a downstream cast back to + * op.type is still needed (the uintptr_t bridge handles that). */ +static CfreeCgTypeId c_operand_emit_type(CTarget* t, Operand op) { + if (op.kind == OPK_REG && (u32)op.v.reg < t->reg_cap && + t->reg_declared[op.v.reg]) { + return t->reg_type[op.v.reg]; + } + if (op.kind == OPK_LOCAL) { + u32 idx = (u32)op.v.frame_slot - 1u; + if (idx < t->nslots) { + /* c_emit_operand emits `slot_N` (slot_type) when op.type matches, + * otherwise a deref-cast to op.type. */ + if (op.type == 0 || + api_unalias_type(t->c, op.type) == + api_unalias_type(t->c, t->slot_type[idx])) { + return t->slot_type[idx]; + } + } + } + return op.type; +} + /* Emit " vN = (DECL_T)(uintptr_t)(" or, for float regs, " vN = (DECL_T)(". - * Caller must then emit the RHS expression and call c_emit_reg_assign_close. */ -static void c_emit_reg_assign_open(CTarget* t, Reg r) { + * Caller must then emit the RHS expression and call c_emit_reg_assign_close. + * + * `rhs_ty` is the CG type the RHS expression will produce (or 0 if unknown). + * When it matches the reg's declared type, the cast wrappers are elided — + * `(DECL_T)(uintptr_t)(...)` is a defensive bridge for reg-id reuse across + * types, and emitting it when types match buries the actual code in noise. + * The outer `(...)` parens are kept so the closer's `);` stays balanced. */ +static void c_emit_reg_assign_open(CTarget* t, Reg r, CfreeCgTypeId rhs_ty) { if ((u32)r >= t->reg_cap || !t->reg_declared[r]) { compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, "C target: assign to undeclared reg v%u", (unsigned)r); @@ -543,11 +596,14 @@ static void c_emit_reg_assign_open(CTarget* t, Reg r) { c_reg_name(r, buf, sizeof buf); cbuf_puts(&t->body, " "); cbuf_puts(&t->body, buf); - cbuf_puts(&t->body, " = ("); - c_emit_type(t, &t->body, decl); - cbuf_puts(&t->body, ")"); - if (!c_type_is_float(t, decl)) { - cbuf_puts(&t->body, "(uintptr_t)"); + cbuf_puts(&t->body, " = "); + if (!c_types_equiv(t, rhs_ty, decl)) { + cbuf_putc(&t->body, '('); + c_emit_type(t, &t->body, decl); + cbuf_putc(&t->body, ')'); + if (!c_type_is_float(t, decl)) { + cbuf_puts(&t->body, "(uintptr_t)"); + } } cbuf_puts(&t->body, "("); } @@ -572,6 +628,18 @@ void c_emit_operand_signed(CTarget* t, Operand op, int signed_) { * types), bridge through uintptr_t so the narrow int cast doesn't trip * -Wvoid-pointer-to-int-cast. */ int via_uptr = c_operand_is_ptr_typed(t, op); + /* CG ints are width-only; the C target declares every int reg/slot/IMM + * as the signed `int{W}_t` of its width. So when `signed_` is true and + * the operand's emit-width matches `w`, the explicit cast is redundant + * with what c_emit_operand already produces. Skipping it cuts the + * ubiquitous `((int32_t)((int32_t)23))` double-cast down to one. */ + if (!via_uptr && signed_) { + CfreeCgTypeId et = c_operand_emit_type(t, op); + if (c_int_width_for_signedness(t, et) == w) { + c_emit_operand(t, op); + return; + } + } cbuf_puts(&t->body, "(("); cbuf_puts(&t->body, tn); cbuf_puts(&t->body, ")"); @@ -610,8 +678,16 @@ static int c_operand_is_ptr_typed(CTarget* t, Operand op) { /* Emit `(target_ty)(uintptr_t)(op)` (or `(target_ty)(op)` for float * target_ty). Used when the caller knows the C expression type they want and * the source operand may have been declared with a different type (CG reuses - * reg ids across types). Without the bridge, gcc trips -Wint-conversion. */ + * reg ids across types). Without the bridge, gcc trips -Wint-conversion. + * + * When the operand's emit-type already matches target_ty (the common case + * when reg-id reuse hasn't happened), drop the wrappers — they add noise + * for no semantic gain. */ static void c_emit_operand_as(CTarget* t, Operand op, CfreeCgTypeId target_ty) { + if (c_types_equiv(t, c_operand_emit_type(t, op), target_ty)) { + c_emit_operand(t, op); + return; + } cbuf_puts(&t->body, "("); c_emit_type(t, &t->body, target_ty); cbuf_puts(&t->body, ")"); @@ -897,6 +973,7 @@ void c_func_begin(CGTarget* T, const CGFuncDesc* fd) { t->next_label = 0; t->next_tmp = 0; t->nscopes = 0; + t->last_was_terminator = 0; const char* name = c_sym_name(t, fd->sym); @@ -975,8 +1052,8 @@ FrameSlot c_frame_slot(CGTarget* T, const FrameSlotDesc* fsd) { char buf[24]; c_slot_name(id, buf, sizeof buf); cbuf_puts(&t->decls, buf); - /* See c_ensure_reg — same `= {0}` reasoning. */ - cbuf_puts(&t->decls, " __attribute__((unused)) = {0};\n"); + /* See c_ensure_reg — same zero-init reasoning. */ + c_emit_zero_init(t, fsd->type); return id; } @@ -1018,7 +1095,26 @@ void c_load_imm(CGTarget* T, Operand dst, i64 imm) { compiler_panic(t->c, loc, "C target: load_imm dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* The literal is emitted bare; its C type is `long long`. We can drop + * the bridge cast iff the bare assignment compiles cleanly: + * - integer dst: imm must fit in dst's signed range (else + * -Wconstant-conversion). 64-bit dst always fits. + * - pointer dst: only `0` (null pointer constant) is safe; any other + * literal trips -Wint-conversion. + * Otherwise keep the bridge. */ + u32 w = c_int_width_for_signedness(t, dst.type); + int can_drop_bridge; + if (w > 0) { + can_drop_bridge = (w >= 64) || + (imm >= -((i64)1 << (w - 1)) && + imm <= (((i64)1 << (w - 1)) - 1)); + } else if (c_type_is_ptr(t, dst.type)) { + can_drop_bridge = (imm == 0); + } else { + can_drop_bridge = 0; + } + c_emit_reg_assign_open(t, dst.v.reg, + can_drop_bridge ? dst.type : (CfreeCgTypeId)0); c_emit_imm_literal(t, imm); c_emit_reg_assign_close(t); } @@ -1030,7 +1126,7 @@ void c_copy(CGTarget* T, Operand dst, Operand src) { compiler_panic(t->c, loc, "C target: copy dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + c_emit_reg_assign_open(t, dst.v.reg, c_operand_emit_type(t, src)); c_emit_operand(t, src); c_emit_reg_assign_close(t); } @@ -1104,7 +1200,14 @@ void c_binop(CGTarget* T, BinOp op, Operand dst, Operand a, Operand b) { compiler_panic(t->c, loc, "C target: binop dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* Pointer operands get cast to uintptr_t inside c_emit_operand_arith, + * so the binop's C result type is `uintptr_t`, not the original pointer + * type. Keep the bridge when dst or either operand is pointer-typed so + * the assignment back to a pointer dst doesn't trip -Wint-conversion. */ + int has_ptr = c_operand_is_ptr_typed(t, dst) || + c_operand_is_ptr_typed(t, a) || c_operand_is_ptr_typed(t, b); + c_emit_reg_assign_open(t, dst.v.reg, + has_ptr ? (CfreeCgTypeId)0 : dst.type); int lhs_signed = 1; BinSignCast bsc = binop_sign_kind(op, &lhs_signed); switch (bsc) { @@ -1164,7 +1267,7 @@ void c_unop(CGTarget* T, UnOp op, Operand dst, Operand a) { default: compiler_panic(t->c, loc, "C target: unknown unop %d", (int)op); } - c_emit_reg_assign_open(t, dst.v.reg); + c_emit_reg_assign_open(t, dst.v.reg, dst.type); cbuf_puts(&t->body, sym); c_emit_operand(t, a); c_emit_reg_assign_close(t); @@ -1240,7 +1343,9 @@ void c_cmp(CGTarget* T, CmpOp op, Operand dst, Operand a, Operand b) { compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* Compare result is C `int` (0/1); assigning to integer dst.type narrows + * implicitly without -Wall complaint. */ + c_emit_reg_assign_open(t, dst.v.reg, dst.type); c_emit_cmp_operands(t, op, a, b); c_emit_reg_assign_close(t); } @@ -1282,19 +1387,23 @@ void c_label_place(CGTarget* T, Label l) { cbuf_puts(&t->body, " "); cbuf_puts(&t->body, buf); cbuf_puts(&t->body, ": __attribute__((unused));\n"); + t->last_was_terminator = 0; } void c_jump(CGTarget* T, Label l) { CTarget* t = (CTarget*)T; + if (t->last_was_terminator) return; char buf[24]; c_label_name(l, buf, sizeof buf); cbuf_puts(&t->body, " goto "); cbuf_puts(&t->body, buf); cbuf_puts(&t->body, ";\n"); + t->last_was_terminator = 1; } void c_cmp_branch(CGTarget* T, CmpOp op, Operand a, Operand b, Label l) { CTarget* t = (CTarget*)T; + if (t->last_was_terminator) return; SrcLoc loc = t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}; if (!cmp_to_c(op)) { compiler_panic(t->c, loc, "C target: unknown cmp %d", (int)op); @@ -1424,7 +1533,11 @@ void c_local_addr(CGTarget* T, Operand dst, const CGLocalDesc* d, } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); char buf[24]; - c_emit_reg_assign_open(t, dst.v.reg); + /* RHS is `&slot_N` — a pointer to the slot's storage type. dst.type is + * declared as void* / a typed pointer; leave the bridge to the caller's + * hint logic. We pass 0 so the bridge stays — `&slot_N` would otherwise + * be `RECORD_TYPE*` and not match dst.type (typically void*). */ + c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0); cbuf_puts(&t->body, "&"); c_slot_name(s.v.frame_slot, buf, sizeof buf); cbuf_puts(&t->body, buf); @@ -1479,7 +1592,8 @@ void c_convert(CGTarget* T, ConvKind k, Operand dst, Operand src) { break; } - c_emit_reg_assign_open(t, dst.v.reg); + /* The cast `(dst.type)(src)` produces a value of dst.type. */ + c_emit_reg_assign_open(t, dst.v.reg, dst.type); cbuf_puts(&t->body, "("); c_emit_type(t, &t->body, dst.type); cbuf_puts(&t->body, ")"); @@ -1546,7 +1660,8 @@ void c_call(CGTarget* T, const CGCallDesc* d) { Operand rs = d->ret.storage; if (rs.kind == OPK_REG) { c_ensure_reg(t, rs.v.reg, ret_type, (RegClass)rs.cls); - c_emit_reg_assign_open(t, rs.v.reg); + /* Callee returns ret_type; the assignment is direct. */ + c_emit_reg_assign_open(t, rs.v.reg, ret_type); } else if (rs.kind == OPK_LOCAL) { char buf[24]; c_slot_name(rs.v.frame_slot, buf, sizeof buf); @@ -1610,6 +1725,7 @@ void c_call(CGTarget* T, const CGCallDesc* d) { cbuf_puts(&t->body, " return;\n"); } } + if (is_tail) t->last_was_terminator = 1; } /* === load / store === */ @@ -1622,7 +1738,8 @@ void c_load(CGTarget* T, Operand dst, Operand addr, MemAccess m) { } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); CfreeCgTypeId access_ty = m.type ? m.type : dst.type; - c_emit_reg_assign_open(t, dst.v.reg); + /* The deref `*(access_ty*)addr` produces a value of access_ty. */ + c_emit_reg_assign_open(t, dst.v.reg, access_ty); c_emit_addr_deref(t, addr, access_ty); c_emit_reg_assign_close(t); } @@ -1647,13 +1764,17 @@ void c_addr_of(CGTarget* T, Operand dst, Operand lv) { compiler_panic(t->c, loc, "C target: addr_of dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* `c_emit_lvalue_addr` casts its output to dst.type already. */ + c_emit_reg_assign_open(t, dst.v.reg, dst.type); c_emit_lvalue_addr(t, lv, dst.type); c_emit_reg_assign_close(t); } void c_ret(CGTarget* T, const CGABIValue* val) { CTarget* t = (CTarget*)T; + /* Already-terminated block: this ret is unreachable (the frontend's + * defensive `return 0;` epilogue lands here right after a user return). */ + if (t->last_was_terminator) return; /* CG emits a defensive ret_void epilogue at the end of every function. For * a non-void function that's unreachable; emitting a bare `return;` would * trip -Wreturn-type. Spell it as `__builtin_unreachable()` so the host C @@ -1662,6 +1783,7 @@ void c_ret(CGTarget* T, const CGABIValue* val) { CfreeCgTypeId rt = cg_type_func_ret_id(t->c, t->cur_fn->fn_type); if (rt && !cg_type_is_void(t->c, rt)) { cbuf_puts(&t->body, " __builtin_unreachable();\n"); + t->last_was_terminator = 1; return; } } @@ -1689,6 +1811,7 @@ void c_ret(CGTarget* T, const CGABIValue* val) { } } cbuf_puts(&t->body, ";\n"); + t->last_was_terminator = 1; } /* === alias === @@ -1844,7 +1967,8 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, (unsigned)ndst); } c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); - c_emit_reg_assign_open(t, dsts[0].v.reg); + /* Returns void*; bridge to dst pointer type. */ + c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0); cbuf_puts(&t->body, "__builtin_assume_aligned("); for (u32 i = 0; i < narg; ++i) { if (i > 0) cbuf_puts(&t->body, ", "); @@ -1862,7 +1986,8 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, (unsigned)ndst, (unsigned)narg); } c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); - c_emit_reg_assign_open(t, dsts[0].v.reg); + /* Returns `long`; dst.type may be a narrower int — keep the bridge. */ + c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0); cbuf_puts(&t->body, "__builtin_expect((long)"); c_emit_operand(t, args[0]); cbuf_puts(&t->body, ", (long)"); @@ -1889,7 +2014,9 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, (unsigned)w); } c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); - c_emit_reg_assign_open(t, dsts[0].v.reg); + /* __builtin_popcount/ctz/clz return `int`; bswap returns its input + * type. Narrow to dst.type via the bridge. */ + c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0); cbuf_puts(&t->body, fn); cbuf_puts(&t->body, "("); c_emit_operand(t, args[0]); @@ -1978,7 +2105,8 @@ void c_intrinsic(CGTarget* T, IntrinKind k, Operand* dsts, u32 ndst, compiler_panic(t->c, loc, "C target: setjmp: bad shape"); } c_ensure_reg(t, dsts[0].v.reg, dsts[0].type, (RegClass)dsts[0].cls); - c_emit_reg_assign_open(t, dsts[0].v.reg); + /* setjmp returns `int`; bridge to dst.type. */ + c_emit_reg_assign_open(t, dsts[0].v.reg, (CfreeCgTypeId)0); cbuf_puts(&t->body, "setjmp(*(jmp_buf*)("); c_emit_operand(t, args[0]); cbuf_puts(&t->body, "))"); @@ -2010,7 +2138,8 @@ void c_alloca(CGTarget* T, Operand dst, Operand size, u32 align) { compiler_panic(t->c, loc, "C target: alloca dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* __builtin_alloca returns `void*`; dst.type is typically void* too. */ + c_emit_reg_assign_open(t, dst.v.reg, dst.type); if (align > 1) { /* gcc has __builtin_alloca_with_align taking bits, not bytes. */ cbuf_puts(&t->body, "__builtin_alloca_with_align("); @@ -2077,7 +2206,8 @@ void c_va_arg(CGTarget* T, Operand dst, Operand ap_addr, CfreeCgTypeId ty) { compiler_panic(t->c, loc, "C target: va_arg dst must be REG"); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* __builtin_va_arg yields a value of `ty`. */ + c_emit_reg_assign_open(t, dst.v.reg, ty); cbuf_puts(&t->body, "__builtin_va_arg(*(va_list*)("); c_emit_operand(t, ap_addr); cbuf_puts(&t->body, "), "); @@ -2128,7 +2258,9 @@ void c_tls_addr_of(CGTarget* T, Operand dst, ObjSymId sym, i64 addend) { } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); const char* nm = c_sym_name(t, sym); - c_emit_reg_assign_open(t, dst.v.reg); + /* RHS spells `(char*)&sym + addend` — pointer type that may not match + * dst.type; keep the bridge to cast through cleanly. */ + c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0); cbuf_puts(&t->body, "((char*)&"); cbuf_puts(&t->body, nm); if (addend != 0) { @@ -2187,7 +2319,8 @@ void c_bitfield_load(CGTarget* T, Operand dst, Operand addr, /* Zero-width — layout barrier only; nothing to load. Emit a no-op * assignment so the dst reg still gets a defined value. */ c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* RHS is the literal 0 (int); narrowing to dst.type is fine. */ + c_emit_reg_assign_open(t, dst.v.reg, dst.type); cbuf_puts(&t->body, "0"); c_emit_reg_assign_close(t); return; @@ -2198,7 +2331,9 @@ void c_bitfield_load(CGTarget* T, Operand dst, Operand addr, (unsigned)bf.storage.size); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* RHS is the storage-width int from the mask/shift expression; bridge + * to dst.type so any signedness/width adjustment is explicit. */ + c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0); /* For signed bitfields, sign-extend via the standard shift-up / arith-shift- * down trick on a signed integer of the storage width. For unsigned, mask * the extracted bits. @@ -2473,7 +2608,8 @@ void c_atomic_load(CGTarget* T, Operand dst, Operand addr, MemAccess m, CTarget* t = (CTarget*)T; (void)m; c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* __atomic_load_n returns a value of the pointed-to type (dst.type). */ + c_emit_reg_assign_open(t, dst.v.reg, dst.type); cbuf_puts(&t->body, "__atomic_load_n(("); c_emit_type(t, &t->body, dst.type); cbuf_puts(&t->body, "*)"); @@ -2522,7 +2658,8 @@ void c_atomic_rmw(CGTarget* T, AtomicOp op, Operand dst, Operand addr, compiler_panic(t->c, loc, "C target: unknown atomic op %d", (int)op); } c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); - c_emit_reg_assign_open(t, dst.v.reg); + /* __atomic_fetch_* returns the prior value of the pointed-to type. */ + c_emit_reg_assign_open(t, dst.v.reg, val.type); cbuf_puts(&t->body, fn); cbuf_puts(&t->body, "(("); c_emit_type(t, &t->body, val.type); @@ -2660,6 +2797,37 @@ static void c_emit_data_bytes(CBuf* b, const u8* bytes, size_t n) { cbuf_puts(b, "\n }"); } +/* Mach-O TLS support: the user-visible SK_TLS symbol is a 24-byte TLV + * descriptor in __DATA,__thread_vars, and the actual initial bytes live in + * a synthesized `<name>$tlv$init` sym in __thread_data (or __thread_bss). + * The descriptor carries an R_ABS64 reloc at offset +16 pointing at that + * init sym. For C-source emission we don't care about the descriptor at all + * — we just emit `_Thread_local` with the init sym's bytes and let the host + * C compiler synthesize whatever TLV plumbing it needs. */ + +/* Find the data init sym referenced by a Mach-O TLS descriptor at + * `desc_base` in section `desc_sec`. Looks for an R_ABS64 reloc at + * `desc_base + 16`. Returns OBJ_SYM_NONE if not found. */ +static ObjSymId c_macho_tls_find_init(CTarget* t, ObjSecId desc_sec, + u32 desc_base) { + u32 total = obj_reloc_total(t->obj); + for (u32 i = 0; i < total; ++i) { + const Reloc* r = obj_reloc_at(t->obj, i); + if (r->section_id != desc_sec) continue; + if (r->offset != desc_base + 16u) continue; + return r->sym; + } + return OBJ_SYM_NONE; +} + +/* Returns 1 if the section is __DATA,__thread_vars (the descriptor section + * on Mach-O). Compared by interned Sym id. */ +static int c_sec_name_is_macho_tvars(CTarget* t, const Section* sec) { + if (!sec) return 0; + Sym tvars = pool_intern_cstr(t->c->global, "__DATA,__thread_vars"); + return sec->name == tvars; +} + /* Returns 1 if any relocation falls into the half-open range [base, base+size) * of section `sec_id` (i.e. patches the bytes of this symbol). */ static int c_sym_has_relocs(CTarget* t, ObjSecId sec_id, u32 base, u32 size) { @@ -2731,6 +2899,14 @@ static void c_emit_sym_relocs_fixup(CTarget* t, const char* nm, static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) { if (os->kind == SK_FUNC || os->kind == SK_IFUNC) return; if (os->kind == SK_SECTION || os->kind == SK_FILE) return; + /* On Mach-O, obj_tls.c synthesizes `__tlv_bootstrap` as an SK_UNDEF + * extern for the TLV descriptor's first slot. The C target delegates all + * TLS lowering to the host compiler via `_Thread_local`, so this + * descriptor-time-only symbol has no place in the emitted source. */ + if (os->kind == SK_UNDEF && t->c->target.obj == CFREE_OBJ_MACHO) { + const ObjBuilder* ob = t->obj; + if (id == obj_tlv_bootstrap_get(ob)) return; + } const char* nm = c_sym_name(t, id); CBuf* b = &t->data_defs; /* SK_TLS user-visible syms need a _Thread_local prefix. On ELF the sym @@ -2782,17 +2958,75 @@ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) { return; } if (is_tls && t->c->target.obj == CFREE_OBJ_MACHO) { - /* Mach-O TLV: the user sym is the descriptor (24 bytes, with relocs to - * __tlv_bootstrap and the synthesized __init.<name> sym). We can't - * faithfully express the descriptor in portable C — `_Thread_local` - * needs the underlying initial value, which lives in a different - * (synthesized) sym. Phase 4 leaves this for a future pass. */ - compiler_panic(t->c, (SrcLoc){0, 0, 0}, - "C target: Mach-O TLS data definition not yet supported"); + /* Mach-O splits TLS across two object-file symbols (see obj_tls.c): the + * user-visible sym is a 24-byte TLV descriptor in + * __DATA,__thread_vars; the actual initial bytes live in a synthesized + * `<name>$tlv$init` sym in __DATA,__thread_data (or __thread_bss). For + * C source emission we don't need either of those — `_Thread_local` + * delegates to the host C compiler, which builds its own descriptor. + * + * We use the descriptor sym as the carrier (its name is what user code + * references) and pull the initial bytes/size/alignment from the init + * sym, found via the R_ABS64 reloc at descriptor offset +16. The init + * sym is skipped in its own iteration. */ + const Section* desc_sec = obj_section_get(t->obj, os->section_id); + if (c_sec_name_is_macho_tvars(t, desc_sec)) { + ObjSymId init_id = c_macho_tls_find_init(t, os->section_id, + (u32)os->value); + if (init_id == OBJ_SYM_NONE) { + compiler_panic(t->c, (SrcLoc){0, 0, 0}, + "C target: Mach-O TLS descriptor missing init reloc"); + } + const ObjSym* init_os = obj_symbol_get(t->obj, init_id); + if (!init_os || init_os->section_id == OBJ_SEC_NONE) { + compiler_panic(t->c, (SrcLoc){0, 0, 0}, + "C target: Mach-O TLS init sym not defined"); + } + const Section* init_sec = obj_section_get(t->obj, init_os->section_id); + u32 init_base = (u32)init_os->value; + u32 init_size = (u32)init_os->size; + /* TLS data with relocations would need the constructor-fixup path + * (and we'd have to rewrite the reloc target's section/offset to + * the descriptor's name in the emitted C). No test currently + * exercises this; surface it as a clear panic-as-skip if we hit it. */ + if (c_sym_has_relocs(t, init_os->section_id, init_base, init_size)) { + compiler_panic(t->c, (SrcLoc){0, 0, 0}, + "C target: Mach-O TLS with pointer init not yet " + "supported"); + } + if (os->bind == SB_LOCAL) cbuf_puts(b, "static "); + cbuf_puts(b, "_Thread_local "); + c_emit_link_attrs(b, os); + cbuf_puts(b, "__attribute__((unused)) "); + cbuf_puts(b, "_Alignas("); + cbuf_put_u64(b, init_sec->align ? init_sec->align : 1); + cbuf_puts(b, ") uint8_t "); + cbuf_puts(b, nm); + cbuf_puts(b, "["); + cbuf_put_u64(b, init_size ? init_size : 1); + cbuf_puts(b, "]"); + if (init_sec->kind == SEC_BSS || init_sec->sem == SSEM_NOBITS || + init_size == 0) { + cbuf_puts(b, ";\n"); + } else { + Heap* h = t->c->ctx->heap; + u8* bytes = (u8*)h->alloc(h, init_size, 1); + if (!bytes) { + compiler_panic(t->c, (SrcLoc){0, 0, 0}, + "C target: oom on TLS init bytes"); + } + c_read_section_bytes(init_sec, init_base, bytes, init_size); + c_emit_data_bytes(b, bytes, init_size); + h->free(h, bytes, init_size); + cbuf_puts(b, ";\n"); + } + return; + } + /* Not the descriptor: this is the synthesized `<name>$tlv$init` data + * sym (or a __thread_ptrs entry). The descriptor case above already + * emitted the user-facing _Thread_local; nothing more to do. */ + return; } - /* Skip the synthesized `__init.<name>` aux sym on Mach-O — even if we got - * here, it shouldn't be emitted as user-visible data. (Currently - * unreachable because the descriptor path panicked above.) */ if (os->kind == SK_COMMON) { /* Common — uninitialized, with explicit alignment. Emit as * tentative-definition (`uint8_t name[size];` at file scope), which C @@ -2862,6 +3096,12 @@ static void c_emit_data_symbol_fixups(CTarget* t, ObjSymId id, if (os->kind == SK_SECTION || os->kind == SK_FILE) return; if (os->kind == SK_UNDEF || os->kind == SK_COMMON) return; if (os->section_id == OBJ_SEC_NONE) return; + /* Mach-O TLS: descriptor bytes are not user-visible data (they hold the + * TLV pointer and an offset), and the init sym has been folded into the + * descriptor's `_Thread_local` emission. Either way, the C-side + * `_Thread_local` declaration carries its own initializer; there is no + * separate storage to fix up. */ + if (os->kind == SK_TLS && t->c->target.obj == CFREE_OBJ_MACHO) return; const Section* sec = obj_section_get(t->obj, os->section_id); if (!c_is_data_section(sec)) return; u32 base = (u32)os->value; diff --git a/src/arch/c_target/internal.h b/src/arch/c_target/internal.h @@ -108,6 +108,14 @@ typedef struct CTarget { /* Monotone counter for synthesizing unique temporary names within a * function (e.g. bitcast scratch). Reset on func_begin. */ u32 next_tmp; + + /* Tracks whether the last emitted body statement was an unconditional + * terminator (`return`, `goto`, etc.) with no intervening label. When + * set, subsequent ret/jump/cmp_branch emissions are dropped — they're + * unreachable, and emitting them produces dead C that distracts from + * the live code. Reset on func_begin and label_place. */ + u8 last_was_terminator; + u8 pad3[3]; } CTarget; typedef struct CScopeInfo {