commit e98db5738ea91cf5fe5953d9f5a9937a56377a59
parent 4b4f64638f1b09154ac2edb68f1e6bf987db2ab1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 1 Jun 2026 17:15:14 -0700
arch/c_target: re-emit file-scope asm verbatim
The CG layer hands the backend de-escaped file-scope __asm__ text;
c_emit_file_scope_asm re-quotes it as a single C string literal into data_defs
(flushed at file scope before any function body) so the host C compiler
assembles it. Skip the parse corpus C lane on Mach-O, where the verbatim bare
symbol name (global_x) cannot resolve against the _-prefixed C reference -- a
mangling mismatch the C backend cannot bridge without parsing the asm. ELF has
no prefix, so the emitted C links and runs there.
Diffstat:
4 files changed, 48 insertions(+), 0 deletions(-)
diff --git a/src/arch/c_target/c_emit.c b/src/arch/c_target/c_emit.c
@@ -3885,6 +3885,39 @@ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) {
if (nrelocs) h->free(h, (void*)rs, nrelocs * sizeof(const Reloc*));
}
+/* Re-emit a file-scope `__asm__("...")` block at TU scope. The CG layer hands
+ * us the de-escaped assembly text (real newlines); re-quote it as a single C
+ * string literal so the host C compiler assembles it. Lands in data_defs, which
+ * finalize flushes at file scope before any function body. */
+void c_emit_file_scope_asm(CTarget* t, const char* src, size_t len) {
+ CBuf* b = &t->data_defs;
+ cbuf_puts(b, "__asm__(\"");
+ for (size_t i = 0; i < len; ++i) {
+ char ch = src[i];
+ switch (ch) {
+ case '\\':
+ cbuf_puts(b, "\\\\");
+ break;
+ case '"':
+ cbuf_puts(b, "\\\"");
+ break;
+ case '\n':
+ cbuf_puts(b, "\\n");
+ break;
+ case '\t':
+ cbuf_puts(b, "\\t");
+ break;
+ case '\r':
+ cbuf_puts(b, "\\r");
+ break;
+ default:
+ cbuf_putc(b, ch);
+ break;
+ }
+ }
+ cbuf_puts(b, "\");\n");
+}
+
static void c_emit_data(CTarget* t) {
ObjSymIter* it = obj_symiter_new(t->obj);
if (!it) return;
diff --git a/src/arch/c_target/c_emit.h b/src/arch/c_target/c_emit.h
@@ -191,6 +191,8 @@ void c_writer_puts(CTarget* t, const char* s);
void c_emit_func_begin(CTarget*, const CGFuncDesc*);
void c_emit_func_end(CTarget*);
void c_emit_alias(CTarget*, ObjSymId, ObjSymId, CfreeCgTypeId);
+/* Re-emit a file-scope `__asm__("...")` block verbatim at TU scope. */
+void c_emit_file_scope_asm(CTarget*, const char* src, size_t len);
void c_emit_ret(CTarget*, const CGLocal*, u32);
void c_emit_load_imm(CTarget*, Operand, i64);
void c_emit_load_const(CTarget*, Operand, ConstBytes);
diff --git a/src/arch/c_target/ir_emit.c b/src/arch/c_target/ir_emit.c
@@ -279,6 +279,10 @@ void c_emit_ir_module(CTarget* t, const CgIrModule* module) {
const CgIrAlias* a = &module->aliases[i];
c_emit_alias(t, a->alias_sym, a->target_sym, a->type);
}
+ for (u32 i = 0; i < module->nfile_scope_asms; ++i) {
+ const CgIrFileScopeAsm* a = &module->file_scope_asms[i];
+ c_emit_file_scope_asm(t, a->src, a->len);
+ }
for (u32 i = 0; i < module->nfuncs; ++i) {
ir_emit_func(t, module->funcs[i]);
}
diff --git a/test/parse/run.sh b/test/parse/run.sh
@@ -422,6 +422,15 @@ cf_lane_C() {
cf_skip "$CF_NAME/C" "Mach-O static link rejects weak undef ref without dylib"
return
fi
+ # File-scope asm that defines C-visible symbols re-emits verbatim, so it
+ # defines the bare name (global_x). On Mach-O the C reference picks up the
+ # leading-underscore (_global_x), so the link can't resolve — a name-mangling
+ # mismatch the C backend can't bridge without parsing the opaque asm. ELF has
+ # no such prefix, so the emitted C links and runs there.
+ if [ "$HOST_OBJ_FMT" = "macho" ] && [[ "$CF_BASE" == asm_02_file_scope ]]; then
+ cf_skip "$CF_NAME/C" "Mach-O underscores C symbol refs; verbatim file-scope asm defines the bare name"
+ return
+ fi
if [ $have_c_wrapper -eq 0 ]; then
cf_skip "$CF_NAME/C" "no c-wrapper (host CC failed)"
return