commit 94ddbef56c292a076d693d710eb4bdd618c96c3d
parent efe821c70ccf8844a0591a7ce0c2e0ef06d0c81c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 30 May 2026 16:16:03 -0700
asm: object-format-aware cc -S; aa64 Mach-O is clang-acceptable (test-hostas-toy gates)
`cc -S` emitted a broken hybrid on the native aarch64 Mach-O target — Mach-O
section/symbol names mixed with ELF-only `.type`/`.size`, `@progbits` in
`.section`, ELF section names, and ELF reloc operands (`:lo12:`/`:got:`). cfree's
own `as` accepted it; clang/llvm-mc rejected it, so test-hostas-toy's clang-as
lane was entirely XFAIL.
Make `cc -S` (and `cfree as`) respect the object format on both sides — clean
per (arch, fmt), no hybrid:
- AsmSyntax vtable (src/api/asm_emit.c), selected by c->target.obj, owns the
format-divergent directive spelling: `.type`/`.size` (ELF only), section
switches (ELF `.section .rodata` vs Mach-O `.section __TEXT,__const`), and
alignment (ELF `.align <bytes>` vs Mach-O `.p2align <log2>`). ELF methods are
the historical spelling verbatim; COFF aliases ELF for now (stub seam).
- ArchAsmOps.reloc_operand hook (src/arch/arch.h, aa64 impl in src/arch/aa64/
asm.c, dispatch via arch_reloc_operand) owns the per-arch relocation operand
syntax: aa64 ELF `:lo12:`/`:got:` prefix vs Mach-O `@PAGEOFF`/`@GOTPAGE`
suffix (and `adrp sym@PAGE`). Replaces the ELF-only reloc_modifier() that
switched on R_AARCH64_* in the generic printer.
- obj_macho_canon_secname() (src/obj/obj_secnames.c) shares the canonical
SecKind→"__SEG,__sect" spelling with the Mach-O writer's name_to_seg_sect.
- cfree as parses the dialect of its target only: aarch64 operand parsing gains
the @PAGE-family suffix on Mach-O (parse_reloc_suffix), keeps `:`-prefix on
ELF; the shared .section parser gains the Mach-O 2-positional `seg,sect` form.
One `cc -S` output now assembles identically under both `cfree as` and clang.
ELF output and ELF parsing are byte-identical to before, so the ELF-triple
roundtrip/diff-llvm/encode lanes are unaffected.
test-hostas-toy: clang-as lane now gates by default (CFREE_HOSTAS_ENFORCE_CLANG
defaults to 1); cfree-as 312/0, clang-as 312/0, 1 skip. Regressions green:
test-asm-roundtrip 572/0, test-asm-roundtrip-toy 624/0, test-diff-llvm
(271 agree/0 skip), test-asm 27/0, test-toy 1338/0, test-aa64-inline, test-isa,
test-cg-api, test-link 122/0, smoke-x64/rv64.
Diffstat:
11 files changed, 641 insertions(+), 143 deletions(-)
diff --git a/01_return_const.o b/01_return_const.o
@@ -0,0 +1,90 @@
+ .text
+ .align 4
+ .type ___user_main, @function
+___user_main:
+ sub sp, sp, #32
+ add x17, sp, #16
+ stp x29, x30, [x17]
+ add x29, sp, #16
+ b Lcf_1_60
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+Lcf_1_60:
+ movz x8, 0x7
+ stur x8, [x29, #-8]
+ ldur x0, [x29, #-8]
+ b Lcf_1_70
+Lcf_1_70:
+ add x16, x29, #0
+ ldp x29, x30, [x16]
+ add sp, x16, #16
+ ret
+ .globl _main
+ .type _main, @function
+_main:
+ sub sp, sp, #32
+ add x17, sp, #16
+ stp x29, x30, [x17]
+ add x29, sp, #16
+ b Lcf_1_e0
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+Lcf_1_e0:
+ bl ___user_main
+ stur x0, [x29, #-8]
+ ldur x9, [x29, #-8]
+ mov w8, w9
+ stur w8, [x29, #-12]
+ ldur w0, [x29, #-12]
+ b Lcf_1_fc
+Lcf_1_fc:
+ add x16, x29, #0
+ ldp x29, x30, [x16]
+ add sp, x16, #16
+ ret
+ .size ___user_main, .-___user_main
+ .size _main, .-_main
+
+ .section __TEXT,__eh_frame, "a", @progbits
+ .align 8
+ .byte 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x7a, 0x52, 0x00, 0x04, 0x78, 0x1e, 0x01
+ .byte 0x1b, 0x0c, 0x1f, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00
+ .byte 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x58, 0x0c, 0x1d, 0x10, 0x9d, 0x02, 0x9e
+ .byte 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00
+ .byte 0x00, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, 0x00, 0x58, 0x0c, 0x1d, 0x10, 0x9d, 0x02, 0x9e
+ .byte 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+
diff --git a/doc/ASM_ROUNDTRIP_TESTING.md b/doc/ASM_ROUNDTRIP_TESTING.md
@@ -198,24 +198,40 @@ so the *assembler* is the only variable:
way cfree's own `as` can.
This immediately surfaced a real bug the round-trip lane structurally cannot
-catch: on the native **Mach-O** target `cc -S` emits a broken hybrid — Mach-O
-section/symbol conventions (`_main`, `.section __TEXT,__eh_frame`) mixed with
-**ELF-only** `.type`/`.size` directives and an `@progbits` type token inside the
-Mach-O `.section`. `cfree as` accepts it (so `roundtrip_toy.sh` is green and
-blind to it); clang/llvm-mc reject it (`unknown directive`, `unexpected token in
-'.section'`). So the `clang-as` lane is currently **XFAIL** (312 expected
-failures) and does **not** gate by default; the target exits 0 on
-`cfree-as` green alone. Run with `CFREE_HOSTAS_ENFORCE_CLANG=1` to make
-`clang-as` gate (use while fixing the printer); an unexpected pass under the
-default reports **XPASS** as a reminder to flip enforcement on once the fix
-lands. The fix belongs in the Mach-O path of `src/api/asm_emit.c` (guard the
-ELF-only `.type`/`.size`; drop the `@progbits` token under Mach-O); tracked as a
-follow-up. The same `141_threadlocal_mutate` TLS-symbolization skip applies.
+catch: on the native **Mach-O** target `cc -S` originally emitted a broken
+hybrid — Mach-O section/symbol conventions (`_main`, `.section __TEXT,__eh_frame`)
+mixed with **ELF-only** `.type`/`.size` directives, an `@progbits` type token
+inside the Mach-O `.section`, ELF section names (`.section .rodata`), and ELF
+relocation operands (`adrp x,sym` + `:lo12:`/`:got:`). `cfree as` accepted it
+(so `roundtrip_toy.sh` was green and blind to it); clang/llvm-mc rejected it.
+
+**Fixed (clang-as now gates by default, 312 pass / 0 fail):** `cc -S` is
+object-format-aware, so it emits the clean dialect of the target format. The
+format-divergent directive *spelling* (`.type`/`.size`/`.section`/`.p2align`)
+lives behind an `AsmSyntax` vtable selected by `c->target.obj`
+(`src/api/asm_emit.c`); the relocation *operand* syntax (ELF `:lo12:` prefix vs
+Mach-O `sym@PAGEOFF` suffix, even `adrp sym@PAGE`) lives behind a per-arch
+`ArchAsmOps.reloc_operand` hook (`src/arch/aa64/asm.c`, reached via
+`arch_reloc_operand`). Selecting by format keeps it arch-independent; the
+per-arch hook keeps the printer free of `R_AARCH64_*` knowledge. **No hybrid**:
+`cfree as` parses the dialect of *its* target too — it dispatches on
+`c->target.obj` for the `@PAGE`-family operand suffixes (aa64 `parse_reloc_mod`/
+`parse_reloc_suffix`) and the Mach-O 2-positional `.section seg,sect`
+(`src/asm/asm.c`), so the one `cc -S` output assembles identically under both
+`cfree as` and clang. ELF output and ELF parsing are byte-identical to before
+(the ELF AsmSyntax methods and the `:`-prefix parse path are unchanged), so the
+ELF-triple `roundtrip`/`diff-llvm` lanes are unaffected. The same
+`141_threadlocal_mutate` TLS-symbolization skip applies.
+
+`CFREE_HOSTAS_ENFORCE_CLANG=0` demotes the clang-as lane back to **XFAIL**
+(useful while bringing up a new arch/format whose Mach-O printer side isn't done
+yet — x64/rv64 add their own `ArchAsmOps.reloc_operand`, COFF adds an `AsmSyntax`
+impl, then this lane extends to them).
Opt-in (`make test-hostas-toy`); skips cleanly when `clang` is absent. ELF
cross-targets (`aarch64/x86_64/riscv64-linux-gnu`) already assemble cleanly with
clang/llvm-mc and can extend this lane to podman/qemu cross-execution (à la
-`test/toy/run.sh` path X) once the native Mach-O printer is fixed.
+`test/toy/run.sh` path X).
## Background — what cfree can do today (verified)
diff --git a/src/api/asm_emit.c b/src/api/asm_emit.c
@@ -106,25 +106,43 @@ static SymLabel* collect_labels(Compiler* c, ObjBuilder* ob, ObjSecId sec_id,
return labels;
}
-static CfreeStatus emit_label(Writer* w, Compiler* c, const SymLabel* lbl) {
+/* ---- Object-format-specific directive syntax (AsmSyntax vtable) --------
+ *
+ * The directives whose spelling differs by object format hang off this tiny
+ * vtable, selected by c->target.obj (asm_syntax_for, defined below). Selecting
+ * by format — not arch — is correct: an x64-ELF and an aa64-ELF `.s` use the
+ * same `.type`/`.size`/`.section` directives. Everything else (.globl, .comm,
+ * labels, data directives, disassembled instructions) is format-neutral and
+ * stays in the shared emit path. The Mach-O methods make cc -S output
+ * clang/llvm-mc-acceptable; the ELF methods are the historical spelling. */
+typedef struct {
+ Writer* w;
+ Compiler* c;
+} AsmSynCtx;
+
+typedef struct AsmSyntax {
+ CfreeObjFmt obj;
+ const char* name;
+ /* Section-switch directive for `sec`: returns 1 = emitted, 0 = skip it. */
+ int (*section_header)(const AsmSynCtx* x, const Section* sec);
+ /* Symbol type annotation at a definition (ELF `.type`; Mach-O none). */
+ void (*sym_type)(const AsmSynCtx* x, Sym name, u16 sym_kind);
+ /* Symbol size after a function body (ELF `.size`; Mach-O none). */
+ void (*sym_size)(const AsmSynCtx* x, Sym name);
+ /* Section alignment directive for a power-of-two byte alignment. */
+ void (*align)(const AsmSynCtx* x, u32 byte_align);
+} AsmSyntax;
+
+static CfreeStatus emit_label(const AsmSynCtx* x, const AsmSyntax* syn,
+ const SymLabel* lbl) {
+ Writer* w = x->w;
if (lbl->bind == SB_GLOBAL || lbl->bind == SB_WEAK) {
w_str(w, " .globl ");
- w_sym(w, c, lbl->name);
- w_newline(w);
- }
- if (lbl->kind == SK_FUNC) {
- w_str(w, " .type ");
- w_sym(w, c, lbl->name);
- w_str(w, ", @function");
- w_newline(w);
- } else if (lbl->kind == SK_OBJ || lbl->kind == SK_COMMON ||
- lbl->kind == SK_TLS) {
- w_str(w, " .type ");
- w_sym(w, c, lbl->name);
- w_str(w, ", @object");
+ w_sym(w, x->c, lbl->name);
w_newline(w);
}
- w_sym(w, c, lbl->name);
+ syn->sym_type(x, lbl->name, lbl->kind);
+ w_sym(w, x->c, lbl->name);
w_str(w, ":");
return w_newline(w);
}
@@ -165,10 +183,10 @@ static CfreeStatus emit_common_symbols(Writer* w, Compiler* c, ObjBuilder* ob) {
return st;
}
-static CfreeStatus emit_size_directives(Writer* w, Compiler* c, ObjBuilder* ob,
+static CfreeStatus emit_size_directives(const AsmSynCtx* x,
+ const AsmSyntax* syn, ObjBuilder* ob,
ObjSecId sec_id) {
ObjSymIter* it = obj_symiter_new(ob);
- CfreeStatus st = CFREE_OK;
if (!it) return CFREE_NOMEM;
for (;;) {
@@ -180,20 +198,10 @@ static CfreeStatus emit_size_directives(Writer* w, Compiler* c, ObjBuilder* ob,
if (sym->section_id != sec_id) continue;
if (sym->kind != SK_FUNC) continue;
if (sym->size == 0) continue;
-
- st = w_str(w, " .size ");
- if (st != CFREE_OK) break;
- st = w_sym(w, c, sym->name);
- if (st != CFREE_OK) break;
- st = w_str(w, ", .-");
- if (st != CFREE_OK) break;
- st = w_sym(w, c, sym->name);
- if (st != CFREE_OK) break;
- st = w_newline(w);
- if (st != CFREE_OK) break;
+ syn->sym_size(x, sym->name);
}
obj_symiter_free(it);
- return st;
+ return cfree_writer_status((CfreeWriter*)x->w);
}
/* GNU-as flag letters for a named (SEC_OTHER) section's `, "flags"` operand.
@@ -208,17 +216,25 @@ static void w_secflags(Writer* w, u16 flags) {
if (flags & SF_RETAIN) w_str(w, "R");
}
-/* Emit the section directive line for `sec`, with indentation and trailing
- * newline. Returns 0 to signal "skip this section" for kinds cc -S does not
- * round-trip (TLS variants, SEC_DEBUG) — the caller drops the section, matching
- * the prior sec_directive()==NULL behavior.
+/* log2 of a power-of-two byte alignment (>=1 → 0). */
+static u32 align_log2(u32 a) {
+ u32 n = 0;
+ if (a < 2) return 0;
+ while ((a & 1u) == 0u && n < 31u) {
+ a >>= 1;
+ ++n;
+ }
+ return n;
+}
+
+/* ---- ELF directive syntax: the historical spelling (unchanged) ----------
*
- * Canonical kinds emit their fixed directive unchanged. SEC_OTHER (a global in
- * an explicitly-named section, e.g. __attribute__((section(...)))) emits the
- * real name plus its flags/type/entsize in GNU-as syntax, so the label and
- * bytes survive re-assembly. Previously such sections were dropped, leaving a
- * dangling reference (see test/asm/roundtrip_toy.sh case 118). */
-static int emit_section_header(Writer* w, Compiler* c, const Section* sec) {
+ * Returns 0 to skip a section cc -S does not round-trip (TLS variants,
+ * SEC_DEBUG). SEC_OTHER (a global in a named section, e.g.
+ * __attribute__((section(...)))) emits the real name plus its flags/type/
+ * entsize in GNU-as syntax so the label and bytes survive re-assembly. */
+static int elf_section_header(const AsmSynCtx* x, const Section* sec) {
+ Writer* w = x->w;
switch (sec->kind) {
case SEC_TEXT:
w_str(w, " .text");
@@ -242,7 +258,7 @@ static int emit_section_header(Writer* w, Compiler* c, const Section* sec) {
case SEC_OTHER: {
Slice nm;
if (sec->flags & SF_TLS) return 0;
- nm = pool_slice(c->global, sec->name);
+ nm = pool_slice(x->c->global, sec->name);
if (nm.len == 0) return 0;
w_str(w, " .section\t");
cfree_writer_write(w, nm.s, nm.len);
@@ -262,6 +278,138 @@ static int emit_section_header(Writer* w, Compiler* c, const Section* sec) {
}
}
+static void elf_sym_type(const AsmSynCtx* x, Sym name, u16 kind) {
+ const char* t = NULL;
+ if (kind == SK_FUNC)
+ t = ", @function";
+ else if (kind == SK_OBJ || kind == SK_COMMON || kind == SK_TLS)
+ t = ", @object";
+ if (!t) return;
+ w_str(x->w, " .type ");
+ w_sym(x->w, x->c, name);
+ w_str(x->w, t);
+ w_newline(x->w);
+}
+
+static void elf_sym_size(const AsmSynCtx* x, Sym name) {
+ w_str(x->w, " .size ");
+ w_sym(x->w, x->c, name);
+ w_str(x->w, ", .-");
+ w_sym(x->w, x->c, name);
+ w_newline(x->w);
+}
+
+static void elf_align(const AsmSynCtx* x, u32 byte_align) {
+ w_str(x->w, " .align ");
+ w_dec(x->w, (u64)byte_align);
+ w_newline(x->w);
+}
+
+/* ---- Mach-O directive syntax: clang/llvm-mc-acceptable spelling ---------- */
+
+static int macho_section_header(const AsmSynCtx* x, const Section* sec) {
+ Writer* w = x->w;
+ if (sec->flags & SF_TLS) return 0; /* TLS not round-tripped today */
+ switch (sec->kind) {
+ case SEC_TEXT:
+ w_str(w, " .text"); /* Mach-O builtin */
+ w_newline(w);
+ return 1;
+ case SEC_RODATA:
+ w_str(w, " .section\t");
+ w_str(w, obj_macho_canon_secname(SEC_RODATA)); /* __TEXT,__const */
+ w_newline(w);
+ return 1;
+ case SEC_DATA:
+ w_str(w, " .section\t");
+ w_str(w, obj_macho_canon_secname(SEC_DATA)); /* __DATA,__data */
+ w_newline(w);
+ return 1;
+ case SEC_BSS:
+ /* clang accepts the `.bss` builtin; the shared zero-range path fills it
+ * (avoids `.zerofill`'s per-symbol operand syntax). */
+ w_str(w, " .bss");
+ w_newline(w);
+ return 1;
+ case SEC_OTHER: {
+ Slice nm = pool_slice(x->c->global, sec->name);
+ if (nm.len == 0) return 0;
+ w_str(w, " .section\t");
+ if (memchr(nm.s, ',', nm.len)) {
+ /* Already "__SEG,__sect" (codegen interns eh_frame this way on
+ * Mach-O). Emit bare — no ELF `, "flags", @progbits` suffix. */
+ cfree_writer_write(w, nm.s, nm.len);
+ } else {
+ /* Defensive: a non-comma name on a Mach-O target. Spell it the way
+ * the writer's name_to_seg_sect would (canonical kind, else
+ * __DATA,<name-without-dot>) so text and binary agree. */
+ const char* canon = obj_macho_canon_secname(sec->kind);
+ if (canon) {
+ w_str(w, canon);
+ } else {
+ w_str(w, "__DATA,");
+ if (nm.s[0] == '.')
+ cfree_writer_write(w, nm.s + 1, nm.len - 1);
+ else
+ cfree_writer_write(w, nm.s, nm.len);
+ }
+ }
+ w_newline(w);
+ return 1;
+ }
+ default:
+ return 0;
+ }
+}
+
+static void macho_sym_type(const AsmSynCtx* x, Sym name, u16 kind) {
+ (void)x;
+ (void)name;
+ (void)kind; /* Mach-O derives symbol kind from the symbol table */
+}
+
+static void macho_sym_size(const AsmSynCtx* x, Sym name) {
+ (void)x;
+ (void)name; /* Mach-O has no `.size` */
+}
+
+static void macho_align(const AsmSynCtx* x, u32 byte_align) {
+ /* Mach-O `.align`/`.p2align` are log2; cc -S emits `.p2align` so clang and
+ * cfree-as read it identically. */
+ w_str(x->w, " .p2align ");
+ w_dec(x->w, (u64)align_log2(byte_align));
+ w_newline(x->w);
+}
+
+static const AsmSyntax g_asm_syntax_elf = {
+ CFREE_OBJ_ELF, "elf", elf_section_header,
+ elf_sym_type, elf_sym_size, elf_align,
+};
+static const AsmSyntax g_asm_syntax_macho = {
+ CFREE_OBJ_MACHO, "macho", macho_section_header,
+ macho_sym_type, macho_sym_size, macho_align,
+};
+/* COFF text emission is not yet exercised by the cc -S lanes; alias the ELF
+ * directive spelling for now (TODO COFF: .def/.scl/.type/.endef; COFF
+ * `.section name, "flags"` has its own form). The seam exists so COFF is
+ * pluggable without touching the printer. */
+static const AsmSyntax g_asm_syntax_coff = {
+ CFREE_OBJ_COFF, "coff", elf_section_header,
+ elf_sym_type, elf_sym_size, elf_align,
+};
+
+static const AsmSyntax* asm_syntax_for(CfreeObjFmt fmt) {
+ switch (fmt) {
+ case CFREE_OBJ_MACHO:
+ return &g_asm_syntax_macho;
+ case CFREE_OBJ_COFF:
+ return &g_asm_syntax_coff;
+ case CFREE_OBJ_ELF:
+ default:
+ return &g_asm_syntax_elf; /* WASM has no textual-asm path */
+ }
+}
+
/* Emit a run of raw `.byte` lines for [start, end). */
static CfreeStatus emit_raw_bytes(Writer* w, const u8* data, u32 start,
u32 end) {
@@ -387,41 +535,10 @@ static const SecReloc* reloc_in_range(const SecReloc* r, u32 n, u32 off,
return NULL;
}
-/* How a reloc kind is rendered into operand text. */
-typedef enum { SURG_NONE, SURG_TAIL, SURG_MEM } SurgKind;
-
-/* Map an aarch64 reloc kind to (operand modifier, surgery shape).
- * Returns NULL for kinds this layer does not symbolize (caller keeps the
- * numeric operand — honest, and the round-trip lane flags the gap). */
-static const char* reloc_modifier(u16 kind, SurgKind* surg) {
- switch (kind) {
- case R_AARCH64_CALL26:
- case R_AARCH64_JUMP26:
- case R_AARCH64_CONDBR19:
- case R_AARCH64_ADR_PREL_PG_HI21:
- case R_AARCH64_ADR_PREL_LO21:
- *surg = SURG_TAIL;
- return "";
- case R_AARCH64_ADR_GOT_PAGE:
- *surg = SURG_TAIL;
- return ":got:";
- case R_AARCH64_ADD_ABS_LO12_NC:
- *surg = SURG_TAIL;
- return ":lo12:";
- case R_AARCH64_LDST8_ABS_LO12_NC:
- case R_AARCH64_LDST16_ABS_LO12_NC:
- case R_AARCH64_LDST32_ABS_LO12_NC:
- case R_AARCH64_LDST64_ABS_LO12_NC:
- *surg = SURG_MEM;
- return ":lo12:";
- case R_AARCH64_LD64_GOT_LO12_NC:
- *surg = SURG_MEM;
- return ":got_lo12:";
- default:
- *surg = SURG_NONE;
- return NULL;
- }
-}
+/* The reloc-kind → operand-syntax mapping now lives in the arch backend
+ * (ArchAsmOps.reloc_operand, src/arch/<arch>/asm.c), reached via
+ * arch_reloc_operand(). This keeps the printer arch-agnostic and format-aware:
+ * aarch64 ELF spells `:lo12:sym`, Mach-O spells `sym@PAGEOFF`. */
/* A `.L`-prefixed name is an assembler-local label (e.g. `.Lcfree_ro.0`,
* `.Lcfree_jt.0`): the assembler's lexer accepts it as an identifier. Other
@@ -433,17 +550,20 @@ static int sym_is_assemblable(Slice s) {
return s.len >= 2 && s.s[1] == 'L';
}
-/* Build "<mod><sym>[+/-addend]" into buf. Returns length, or -1 if the symbol
- * has no usable name (anonymous, or a `.`-prefixed section symbol the
- * assembler's expression parser does not accept). */
-static int build_symref(char* buf, u32 cap, Compiler* c, const char* mod,
- Sym name, i64 addend) {
+/* Build "<prefix><sym>[+/-addend]<suffix>" into buf. Returns length, or -1 if
+ * the symbol has no usable name (anonymous, or a `.`-prefixed section symbol
+ * the assembler's expression parser does not accept). The modifier is a prefix
+ * (ELF `:lo12:sym`) or a suffix (Mach-O `sym@PAGEOFF`), per the arch/format;
+ * an addend lands before the suffix (`sym+8@PAGEOFF`), which both clang and
+ * cfree-as parse. */
+static int build_symref(char* buf, u32 cap, Compiler* c,
+ const ArchRelocOperand* ro, Sym name, i64 addend) {
Slice s;
u32 p = 0, i;
if (!name) return -1;
s = pool_slice(c->global, name);
if (!sym_is_assemblable(s)) return -1;
- for (i = 0; mod[i] && p + 1 < cap; ++i) buf[p++] = mod[i];
+ for (i = 0; ro->prefix[i] && p + 1 < cap; ++i) buf[p++] = ro->prefix[i];
for (i = 0; i < s.len && p + 1 < cap; ++i) buf[p++] = s.s[i];
if (addend != 0) {
char num[24];
@@ -456,6 +576,7 @@ static int build_symref(char* buf, u32 cap, Compiler* c, const char* mod,
} while (mag && nl < sizeof(num));
while (nl && p + 1 < cap) buf[p++] = num[--nl];
}
+ for (i = 0; ro->suffix[i] && p + 1 < cap; ++i) buf[p++] = ro->suffix[i];
buf[p] = '\0';
return (int)p;
}
@@ -464,8 +585,8 @@ static int build_symref(char* buf, u32 cap, Compiler* c, const char* mod,
* selects the shape: TAIL replaces the last comma-separated component (or the
* whole operand if there is no comma); MEM rewrites the offset inside [...]. */
static CfreeStatus w_symbolized(Writer* w, const char* ops, u32 olen,
- const char* symref, SurgKind surg) {
- if (surg == SURG_TAIL) {
+ const char* symref, ArchRelocSurg surg) {
+ if (surg == ARCH_RELOC_SURG_TAIL) {
i32 last_comma = -1;
u32 i;
for (i = 0; i < olen; ++i)
@@ -696,13 +817,12 @@ static CfreeStatus emit_operands(Writer* w, const EmitCtx* x,
if (!insn->operands.len) return CFREE_OK;
r = reloc_in_range(x->relocs, x->nrelocs, off, insn->nbytes);
if (r) {
- SurgKind surg;
- const char* mod = reloc_modifier(r->kind, &surg);
- if (mod) {
+ ArchRelocOperand ro;
+ if (arch_reloc_operand(x->c, r->kind, &ro)) {
char symref[256];
- if (build_symref(symref, sizeof symref, x->c, mod, r->sym, r->addend) >= 0)
+ if (build_symref(symref, sizeof symref, x->c, &ro, r->sym, r->addend) >= 0)
return w_symbolized(w, insn->operands.s, insn->operands.len, symref,
- surg);
+ ro.surg);
}
} else if (is_local_branch_mnem(insn->mnemonic)) {
u64 tgt;
@@ -710,7 +830,7 @@ static CfreeStatus emit_operands(Writer* w, const EmitCtx* x,
char name[256];
build_label_name(name, sizeof name, x, (u32)tgt);
return w_symbolized(w, insn->operands.s, insn->operands.len, name,
- SURG_TAIL);
+ ARCH_RELOC_SURG_TAIL);
}
}
return cfree_writer_write(w, insn->operands.s, insn->operands.len);
@@ -744,8 +864,12 @@ static CfreeStatus emit_data_range(Writer* w, Compiler* c, const u8* data,
const char* dir;
u32 width;
char symref[256];
+ /* Data relocations spell the bare symbol (`.quad sym+addend`): no
+ * page/lo12-style operand modifier on either format. */
+ ArchRelocOperand bare = {ARCH_RELOC_SURG_NONE, "", ""};
if (data_reloc_directive(r->kind, &dir, &width) && off + width <= end &&
- build_symref(symref, sizeof symref, c, "", r->sym, r->addend) >= 0) {
+ build_symref(symref, sizeof symref, c, &bare, r->sym, r->addend) >=
+ 0) {
CfreeStatus st = w_str(w, dir);
if (st != CFREE_OK) return st;
st = w_str(w, symref);
@@ -813,12 +937,17 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder,
ObjBuilder* ob = (ObjBuilder*)builder;
Compiler* c;
Writer* w;
+ const AsmSyntax* syn;
+ AsmSynCtx sx;
u32 nsec, i;
if (!ob || !out_w) return CFREE_INVALID;
c = obj_compiler(ob);
w = (Writer*)out_w;
+ syn = asm_syntax_for(c->target.obj);
+ sx.w = w;
+ sx.c = c;
nsec = obj_section_count(ob);
for (i = 1; i < nsec; ++i) {
@@ -835,15 +964,11 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder,
EmitCtx ctx;
if (!sec || sec->removed) continue;
- if (!emit_section_header(w, c, sec)) continue;
+ if (!syn->section_header(&sx, sec)) continue;
labels = collect_labels(c, ob, (ObjSecId)i, &nlabels);
- if (sec->align > 1) {
- w_str(w, " .align ");
- w_dec(w, (u64)sec->align);
- w_newline(w);
- }
+ if (sec->align > 1) syn->align(&sx, sec->align);
if (sec->kind == SEC_BSS) {
total = sec->bss_size;
@@ -897,7 +1022,7 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder,
while (off < total || li < nlabels) {
while (li < nlabels && labels[li].offset == off) {
- emit_label(w, c, &labels[li]);
+ emit_label(&sx, syn, &labels[li]);
++li;
}
/* Synthesized branch-target label, unless a real symbol sits here. */
@@ -930,7 +1055,7 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder,
}
}
- emit_size_directives(w, c, ob, (ObjSecId)i);
+ emit_size_directives(&sx, syn, ob, (ObjSecId)i);
if (dasm) arch_disasm_free(dasm);
if (heap_data) c->ctx->heap->free(c->ctx->heap, heap_data, total);
diff --git a/src/arch/aa64/arch.c b/src/arch/aa64/arch.c
@@ -15,6 +15,7 @@
extern const LinkArchDesc link_arch_aa64;
extern const ArchDbgOps aa64_dbg_ops;
extern const ArchDwarfOps aa64_dwarf_ops;
+extern const ArchAsmOps aa64_asm_ops;
static int aa64_register_at_public(uint32_t idx, CfreeArchReg* out) {
const char* nm = NULL;
@@ -165,6 +166,7 @@ const ArchImpl arch_impl_aa64 = {
.link = &link_arch_aa64,
.dwarf = &aa64_dwarf_ops,
.dbg = &aa64_dbg_ops,
+ .asm_ops = &aa64_asm_ops,
.predefined_macros = aa64_predefined_macros,
.npredefined_macros =
(u32)(sizeof aa64_predefined_macros / sizeof aa64_predefined_macros[0]),
diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c
@@ -356,11 +356,20 @@ static void parse_imm_sym(AsmDriver* d, ObjSymId* sym_out, i64* val_out) {
* `:got_lo12:`). AA64_RELMOD_NONE means no modifier was present. */
typedef enum AA64RelMod {
AA64_RELMOD_NONE = 0,
+ AA64_RELMOD_PAGE, /* explicit adrp page reloc (Mach-O `@PAGE`); == bare adrp */
AA64_RELMOD_LO12,
AA64_RELMOD_GOT,
AA64_RELMOD_GOT_LO12,
} AA64RelMod;
+/* True when the assembler's target object format is Mach-O, which spells
+ * operand relocations as `@PAGE`/`@PAGEOFF` suffixes; ELF/COFF spell them as
+ * `:lo12:`/`:got:` prefixes. cfree as parses the dialect of its target only
+ * (no hybrid), mirroring what `cc -S` emits for that format. */
+static int target_is_macho(AsmDriver* d) {
+ return asm_driver_compiler(d)->target.obj == CFREE_OBJ_MACHO;
+}
+
/* If the next token is ':', consume a `:name:` relocation modifier prefix and
* return its kind. A leading ':' is unambiguous at an operand position (a
* label's ':' only appears at end-of-mnemonic). Returns AA64_RELMOD_NONE and
@@ -385,6 +394,26 @@ static AA64RelMod parse_reloc_mod(AsmDriver* d) {
return mod;
}
+/* Mach-O operand relocation suffix: after a symbol(+addend), an optional
+ * `@PAGE` / `@PAGEOFF` / `@GOTPAGE` / `@GOTPAGEOFF`. Maps to the same
+ * AA64RelMod the ELF `:mod:` prefix produces, so downstream encoding/reloc
+ * emission is shared. `@PAGE` is the explicit spelling of an adrp page reloc
+ * (a bare adrp on ELF). Returns AA64_RELMOD_NONE, stream untouched, when the
+ * next token is not '@'. */
+static AA64RelMod parse_reloc_suffix(AsmDriver* d) {
+ if (!tok_punct(asm_driver_peek(d), '@')) return AA64_RELMOD_NONE;
+ (void)asm_driver_next(d); /* eat '@' */
+ AsmTok name = asm_driver_next(d);
+ if (name.kind != ASM_TOK_IDENT)
+ asm_driver_panic(d, "asm: expected relocation suffix name after '@'");
+ Slice s = pool_slice(asm_driver_pool(d), name.v.ident);
+ if (icase_eq(s.s, s.len, "PAGE")) return AA64_RELMOD_PAGE;
+ if (icase_eq(s.s, s.len, "PAGEOFF")) return AA64_RELMOD_LO12;
+ if (icase_eq(s.s, s.len, "GOTPAGE")) return AA64_RELMOD_GOT;
+ if (icase_eq(s.s, s.len, "GOTPAGEOFF")) return AA64_RELMOD_GOT_LO12;
+ asm_driver_panic(d, "asm: unsupported relocation suffix");
+}
+
/* The R_AARCH64_LDST{8,16,32,64}_ABS_LO12_NC reloc for an access log2-size. */
static RelocKind aa64_ldst_lo12_reloc(AsmDriver* d, u32 size) {
switch (size) {
@@ -401,6 +430,57 @@ static RelocKind aa64_ldst_lo12_reloc(AsmDriver* d, u32 size) {
}
}
+/* Printer-side inverse of the operand reloc-modifier parsers above: how a
+ * relocated aarch64 operand is spelled in `cc -S` text for the target object
+ * format. ELF uses a `:mod:` prefix; Mach-O uses an `@MOD` suffix — and even
+ * a bare adrp page reloc needs an explicit `@PAGE` there. Kept adjacent to
+ * the `.s` parser (parse_reloc_mod / parse_reloc_suffix and their call sites)
+ * so the emit and parse spellings stay in lockstep. See ArchAsmOps. */
+static int aa64_reloc_operand(u16 kind, CfreeObjFmt fmt, ArchRelocOperand* out) {
+ ArchRelocSurg surg;
+ const char* elf; /* `:mod:` prefix */
+ const char* macho; /* `@MOD` suffix */
+ switch (kind) {
+ case R_AARCH64_CALL26:
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_ADR_PREL_LO21:
+ surg = ARCH_RELOC_SURG_TAIL, elf = "", macho = "";
+ break;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ surg = ARCH_RELOC_SURG_TAIL, elf = "", macho = "@PAGE";
+ break;
+ case R_AARCH64_ADR_GOT_PAGE:
+ surg = ARCH_RELOC_SURG_TAIL, elf = ":got:", macho = "@GOTPAGE";
+ break;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ surg = ARCH_RELOC_SURG_TAIL, elf = ":lo12:", macho = "@PAGEOFF";
+ break;
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ surg = ARCH_RELOC_SURG_MEM, elf = ":lo12:", macho = "@PAGEOFF";
+ break;
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ surg = ARCH_RELOC_SURG_MEM, elf = ":got_lo12:", macho = "@GOTPAGEOFF";
+ break;
+ default:
+ return 0; /* TLV and anything else: keep the numeric operand */
+ }
+ out->surg = surg;
+ if (fmt == CFREE_OBJ_MACHO) {
+ out->prefix = "";
+ out->suffix = macho;
+ } else {
+ out->prefix = elf;
+ out->suffix = "";
+ }
+ return 1;
+}
+
+const ArchAsmOps aa64_asm_ops = {.reloc_operand = aa64_reloc_operand};
+
static void emit32(AsmDriver* d, u32 word) {
MCEmitter* mc = asm_driver_mc(d);
(void)asm_driver_cur_section(d);
@@ -717,17 +797,39 @@ static void p_addsub(AsmDriver* d, int is_sub, int set_flags) {
AA64Reg rn = parse_reg(d);
expect_comma(d, "add/sub");
AsmTok t = asm_driver_peek(d);
- if (!is_sub && !set_flags && tok_punct(t, ':')) {
- /* `add Rd, Rn, :lo12:sym` — ADD (immediate) with a zero imm12 plus an
- * R_AARCH64_ADD_ABS_LO12_NC relocation (the low-12 PIC/abs sequence). */
- AA64RelMod mod = parse_reloc_mod(d);
- if (mod != AA64_RELMOD_LO12)
- asm_driver_panic(d, "asm: add: only :lo12: is valid here");
- if (rd.is64 != rn.is64)
- asm_driver_panic(d, "asm: add :lo12:: width mismatch");
+ /* `add Rd, Rn, <sym lo12>` — ADD (immediate), zero imm12, plus an
+ * R_AARCH64_ADD_ABS_LO12_NC relocation (the low-12 PIC/abs sequence). ELF
+ * spells the modifier as a `:lo12:` prefix (leading ':'); Mach-O spells it
+ * as a `sym@PAGEOFF` suffix, so the trigger there is a non-register IDENT
+ * third operand (probe with parse_reg_from_ident so `add x0,x1,x2` stays the
+ * register path). */
+ int symbolic = 0;
+ if (!is_sub && !set_flags) {
+ if (target_is_macho(d)) {
+ AA64Reg probe;
+ memset(&probe, 0, sizeof probe);
+ symbolic = (t.kind == ASM_TOK_IDENT &&
+ !parse_reg_from_ident(d, t.v.ident, &probe));
+ } else {
+ symbolic = tok_punct(t, ':');
+ }
+ }
+ if (symbolic) {
+ AA64RelMod mod;
ObjSymId sym = OBJ_SYM_NONE;
i64 off = 0;
- parse_imm_sym(d, &sym, &off);
+ if (target_is_macho(d)) {
+ parse_imm_sym(d, &sym, &off);
+ mod = parse_reloc_suffix(d);
+ } else {
+ mod = parse_reloc_mod(d);
+ parse_imm_sym(d, &sym, &off);
+ }
+ if (mod != AA64_RELMOD_LO12)
+ asm_driver_panic(d, "asm: add: only :lo12: (ELF) / @PAGEOFF (Mach-O) is "
+ "valid here");
+ if (rd.is64 != rn.is64)
+ asm_driver_panic(d, "asm: add lo12: width mismatch");
u32 word = aa64_addsubimm_pack((AA64AddSubImm){.sf = rd.is64,
.op = 0,
.S = 0,
@@ -1151,12 +1253,13 @@ static AA64Mem parse_mem(AsmDriver* d) {
asm_driver_panic(d, "asm: ldr/str: base register must be 64-bit");
require_sp_spelling(d, m.base, "ldr/str base");
if (asm_driver_eat_comma(d)) {
- /* `:lo12:sym` / `:got_lo12:sym` relocation offset, a register index, or a
- * plain `#imm`/expression. */
+ /* A relocation offset (ELF `:lo12:sym`/`:got_lo12:sym` prefix, or Mach-O
+ * `sym@PAGEOFF`/`sym@GOTPAGEOFF` suffix), a register index, or a plain
+ * `#imm`/expression. */
AsmTok t = asm_driver_peek(d);
AA64Reg idx;
memset(&idx, 0, sizeof idx);
- if (tok_punct(t, ':')) {
+ if (!target_is_macho(d) && tok_punct(t, ':')) {
m.reloc_mod = parse_reloc_mod(d);
parse_imm_sym(d, &m.reloc_sym, &m.reloc_off);
m.has_offset = 1; /* imm field stays 0; reloc supplies the low bits */
@@ -1168,6 +1271,15 @@ static AA64Mem parse_mem(AsmDriver* d) {
m.has_index = 1;
m.option = idx.is64 ? AA64_LDST_OPTION_LSL : AA64_LDST_OPTION_UXTW;
if (asm_driver_eat_comma(d)) parse_mem_extend(d, &m);
+ } else if (target_is_macho(d) && t.kind == ASM_TOK_IDENT) {
+ /* Mach-O: `[Xn, sym@PAGEOFF]` / `[Xn, sym@GOTPAGEOFF]`. */
+ parse_imm_sym(d, &m.reloc_sym, &m.reloc_off);
+ m.reloc_mod = parse_reloc_suffix(d);
+ if (m.reloc_mod != AA64_RELMOD_LO12 &&
+ m.reloc_mod != AA64_RELMOD_GOT_LO12)
+ asm_driver_panic(
+ d, "asm: ldr/str: symbol offset needs @PAGEOFF/@GOTPAGEOFF");
+ m.has_offset = 1;
} else {
m.imm = parse_imm_const(d);
m.has_offset = 1;
@@ -1371,14 +1483,27 @@ static void p_ldp_stp(AsmDriver* d, int is_load) {
static void p_adr(AsmDriver* d, int is_adrp) {
AA64Reg rd = parse_reg(d);
expect_comma(d, "adr");
- /* `adrp Rd, :got:sym` selects the GOT-page relocation; a bare symbol uses
- * the PC-relative page reloc. `:got:` is only meaningful for adrp. */
- AA64RelMod mod = parse_reloc_mod(d);
- if (mod != AA64_RELMOD_NONE && (!is_adrp || mod != AA64_RELMOD_GOT))
- asm_driver_panic(d, "asm: adr/adrp: only :got: (with adrp) is valid here");
+ /* adrp page reloc on a symbol: ELF spells a bare symbol (`:got:` selects the
+ * GOT page); Mach-O spells `sym@PAGE` / `sym@GOTPAGE`. adr takes a bare
+ * symbol on both. cc -S emits the form matching the target format. */
+ AA64RelMod mod = AA64_RELMOD_NONE;
ObjSymId sym = OBJ_SYM_NONE;
i64 off = 0;
- parse_imm_sym(d, &sym, &off);
+ if (target_is_macho(d)) {
+ parse_imm_sym(d, &sym, &off);
+ mod = parse_reloc_suffix(d);
+ } else {
+ mod = parse_reloc_mod(d);
+ parse_imm_sym(d, &sym, &off);
+ }
+ if (!is_adrp) {
+ if (mod != AA64_RELMOD_NONE)
+ asm_driver_panic(d, "asm: adr: no relocation modifier valid here");
+ } else if (mod != AA64_RELMOD_NONE && mod != AA64_RELMOD_PAGE &&
+ mod != AA64_RELMOD_GOT) {
+ asm_driver_panic(d, "asm: adrp: only @PAGE/@GOTPAGE (Mach-O) or :got: "
+ "(ELF) valid here");
+ }
if (sym == OBJ_SYM_NONE)
asm_driver_panic(d, "asm: adr/adrp: symbol required");
AA64PCRelAdr f = {.op = is_adrp ? AA64_ADR_OP_ADRP : AA64_ADR_OP_ADR,
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -175,6 +175,34 @@ typedef struct ArchDbgOps {
u64* target_out);
} ArchDbgOps;
+/* ---- textual-assembly operand syntax (printer <-> parser) ----------------
+ *
+ * How a relocated operand is spelled in `cc -S` output. The shape selects
+ * which part of the disassembled operand text the symbolizer rewrites; the
+ * prefix/suffix are the relocation-modifier spelling for the target object
+ * format (e.g. aarch64 ELF `:lo12:sym` is a prefix; Mach-O `sym@PAGEOFF` is a
+ * suffix). At most one of prefix/suffix is non-empty for a given (kind, fmt).
+ * This is the inverse of the arch assembler's operand reloc-modifier parser. */
+typedef enum ArchRelocSurg {
+ ARCH_RELOC_SURG_NONE = 0, /* not symbolizable here; keep numeric operand */
+ ARCH_RELOC_SURG_TAIL, /* replace last comma component (or whole operand) */
+ ARCH_RELOC_SURG_MEM, /* rewrite the offset inside [...] */
+} ArchRelocSurg;
+
+typedef struct ArchRelocOperand {
+ ArchRelocSurg surg;
+ const char* prefix; /* e.g. ":lo12:" (ELF); "" if none */
+ const char* suffix; /* e.g. "@PAGEOFF" (Mach-O); "" if none */
+} ArchRelocOperand;
+
+typedef struct ArchAsmOps {
+ /* Map (reloc kind, target object format) to the operand syntax the cc -S
+ * symbolizer must emit (and that this arch's .s parser accepts back).
+ * Returns 1 and fills *out when the kind is symbolizable for fmt; 0
+ * otherwise (printer keeps the numeric operand). */
+ int (*reloc_operand)(u16 reloc_kind, CfreeObjFmt fmt, ArchRelocOperand* out);
+} ArchAsmOps;
+
typedef struct ArchImpl {
/* First field, so `(const CGBackend*)&arch_impl_x` is the arch's backend
* view. Every machine-code arch is a CGBackend by composition; c_target
@@ -197,6 +225,7 @@ typedef struct ArchImpl {
const LinkArchDesc* link;
const ArchDwarfOps* dwarf;
const ArchDbgOps* dbg;
+ const ArchAsmOps* asm_ops; /* textual-asm operand syntax; NULL = keep numeric */
const CfreePredefinedMacro* predefined_macros;
u32 npredefined_macros;
@@ -220,6 +249,12 @@ typedef struct ArchImpl {
const ArchImpl* arch_lookup(CfreeArchKind);
const ArchImpl* arch_for_compiler(const Compiler*);
+/* Spelling for a relocated operand in `cc -S` text, for the compiler's target
+ * arch+format. Returns 1 and fills *out when symbolizable, 0 to keep numeric
+ * (also when the arch provides no asm_ops). Thin dispatch over ArchAsmOps. */
+int arch_reloc_operand(const Compiler* c, u16 reloc_kind,
+ ArchRelocOperand* out);
+
ArchDisasm* arch_disasm_new(Compiler*);
u32 arch_disasm_decode(ArchDisasm*, const u8* bytes, size_t len, u64 vaddr,
CfreeInsn* out);
diff --git a/src/arch/registry.c b/src/arch/registry.c
@@ -88,6 +88,13 @@ const ArchImpl* arch_for_compiler(const Compiler* c) {
return arch_lookup(c->target.arch);
}
+int arch_reloc_operand(const Compiler* c, u16 reloc_kind,
+ ArchRelocOperand* out) {
+ const ArchImpl* a = arch_for_compiler(c);
+ if (!a || !a->asm_ops || !a->asm_ops->reloc_operand) return 0;
+ return a->asm_ops->reloc_operand(reloc_kind, c->target.obj, out);
+}
+
const CGBackend* cg_backend_for_session(const Compiler* c,
const CfreeCodeOptions* opts) {
if (opts && opts->check_only) {
diff --git a/src/asm/asm.c b/src/asm/asm.c
@@ -695,11 +695,55 @@ static void do_directive(AsmDriver* d, Sym name) {
u16 flags = 0;
u32 entsize = 0;
int have_flags = 0;
+ int macho_2pos = 0;
+
+ /* Mach-O `.section segname,sectname[,type[,attrs]]`: the token after the
+ * first comma is a bare sectname IDENT (vs GNU's "flags" STRING). cfree as
+ * parses the dialect of its target only (no hybrid), so this branch is
+ * gated on the Mach-O object format. Rebuild the comma-joined "seg,sect"
+ * name that the Mach-O writer's name_to_seg_sect splits back. */
+ if (d->c->target.obj == CFREE_OBJ_MACHO && tok_is_punct(d_peek(d), ',')) {
+ (void)d_next(d); /* eat ',' */
+ AsmTok sect = d_next(d);
+ size_t sgn = 0, scn = 0;
+ const char* sgp;
+ const char* scp;
+ char buf[128];
+ if (sect.kind != ASM_TOK_IDENT)
+ d_panicf(d, "asm: .section: expected Mach-O sectname after ','");
+ sgp = asm_str(d, sname, &sgn);
+ scp = asm_str(d, sect.v.ident, &scn);
+ if (sgn + 1 + scn >= sizeof buf)
+ d_panicf(d, "asm: .section: name too long");
+ memcpy(buf, sgp, sgn);
+ buf[sgn] = ',';
+ memcpy(buf + sgn + 1, scp, scn);
+ sname =
+ pool_intern_slice(d->pool, (Slice){.s = buf, .len = sgn + 1 + scn});
+ macho_2pos = 1;
+ /* Optional trailing Mach-O type/attribute fields (regular,
+ * cstring_literals, …): accept and consume; map the few affecting
+ * flags. */
+ while (asm_driver_eat_comma(d)) {
+ AsmTok ty = d_peek(d);
+ if (ty.kind == ASM_TOK_IDENT) {
+ size_t tn = 0;
+ const char* tp = asm_str(d, ty.v.ident, &tn);
+ if (tn == 16 && memcmp(tp, "cstring_literals", 16) == 0)
+ flags |= SF_STRINGS;
+ (void)d_next(d);
+ } else if (ty.kind == ASM_TOK_NUM) {
+ (void)d_next(d);
+ } else {
+ break;
+ }
+ }
+ }
/* Optional GNU-as operands: , "flags" [, @type [, entsize]]. The emitter
* (src/api/asm_emit.c) writes these for SEC_OTHER named sections; parse
* them back so a global's section flags/entsize round-trip faithfully. */
- if (asm_driver_eat_comma(d)) {
+ if (!macho_2pos && asm_driver_eat_comma(d)) {
AsmTok ft = d_peek(d);
if (ft.kind == ASM_TOK_STR) {
size_t fn = 0;
@@ -741,7 +785,33 @@ static void do_directive(AsmDriver* d, Sym name) {
{
size_t nn = 0;
const char* p = asm_str(d, sname, &nn);
- if (have_flags) {
+ if (macho_2pos) {
+ /* Canonical Apple seg,sect → SecKind (inverse of name_to_seg_sect),
+ * else SEC_OTHER (e.g. __TEXT,__eh_frame). Every Mach-O section is
+ * allocated; the comma name is preserved so the writer round-trips
+ * seg/sect verbatim. */
+ if (nn == 13 && memcmp(p, "__TEXT,__text", 13) == 0) {
+ kind = SEC_TEXT;
+ flags |= (u16)(SF_ALLOC | SF_EXEC);
+ } else if (nn == 14 && memcmp(p, "__TEXT,__const", 14) == 0) {
+ kind = SEC_RODATA;
+ flags |= (u16)SF_ALLOC;
+ } else if (nn == 16 && memcmp(p, "__TEXT,__cstring", 16) == 0) {
+ kind = SEC_RODATA;
+ flags |= (u16)(SF_ALLOC | SF_STRINGS);
+ } else if (nn == 13 && memcmp(p, "__DATA,__data", 13) == 0) {
+ kind = SEC_DATA;
+ flags |= (u16)(SF_ALLOC | SF_WRITE);
+ } else if (nn == 12 && memcmp(p, "__DATA,__bss", 12) == 0) {
+ kind = SEC_BSS;
+ flags |= (u16)(SF_ALLOC | SF_WRITE);
+ } else if (nn >= 7 && memcmp(p, "__DWARF", 7) == 0) {
+ kind = SEC_DEBUG;
+ } else {
+ kind = SEC_OTHER;
+ flags |= (u16)SF_ALLOC;
+ }
+ } else if (have_flags) {
/* Explicit flags: a canonical name keeps its kind; any other name is a
* SEC_OTHER named section (matching codegen for section(...) globals). */
if (p && nn == 5 && memcmp(p, ".text", 5) == 0) kind = SEC_TEXT;
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -620,6 +620,16 @@ Sym obj_secname_tbss(Compiler*);
* agree on the truncated spelling. */
int obj_macho_debug_sectname(const char* name, size_t len, char out[17]);
+/* Canonical Mach-O "segname,sectname" spelling for a SecKind, as a
+ * NUL-terminated literal. The single source of truth shared by the Mach-O
+ * object writer (name_to_seg_sect) and the `cc -S` printer (asm_emit.c), so
+ * the textual `.section` directive and the binary section header never drift:
+ * SEC_RODATA -> "__TEXT,__const", SEC_DATA -> "__DATA,__data",
+ * SEC_BSS -> "__DATA,__bss", SEC_TEXT -> "__TEXT,__text".
+ * Returns NULL for kinds with no fixed canonical Mach-O home (SEC_OTHER /
+ * SEC_DEBUG), which callers spell from the section's own name. */
+const char* obj_macho_canon_secname(SecKind kind);
+
/* ---- thread-local storage emission ---------------------------------
*
* The frontend collects a `_Thread_local` definition's bytes (or marks
diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c
@@ -44,6 +44,24 @@ int obj_macho_debug_sectname(const char* name, size_t len, char out[17]) {
return 1;
}
+const char* obj_macho_canon_secname(SecKind kind) {
+ /* Mirrors the SecKind cases of name_to_seg_sect (src/obj/macho/emit.c):
+ * keep the two in lockstep so a section's text spelling and its binary
+ * header land in the same Mach-O (segment,section). */
+ switch (kind) {
+ case SEC_TEXT:
+ return "__TEXT,__text";
+ case SEC_RODATA:
+ return "__TEXT,__const";
+ case SEC_DATA:
+ return "__DATA,__data";
+ case SEC_BSS:
+ return "__DATA,__bss";
+ default: /* SEC_OTHER / SEC_DEBUG: spelled from the section's own name. */
+ return NULL;
+ }
+}
+
static Sym secname_panic_unimpl(Compiler* c, const char* which) {
SrcLoc l = {0, 0, 0};
compiler_panic(c, l,
diff --git a/test/asm/hostas_toy.sh b/test/asm/hostas_toy.sh
@@ -17,14 +17,14 @@
# (exit code), not by matching bytes (cfree and clang produce different code, so
# a byte/text match would be meaningless).
#
-# KNOWN-RED today: on the native Mach-O target cfree's `cc -S` emits ELF-only
-# `.type`/`.size` directives and an `@progbits` type token inside Mach-O
-# `.section`s, which clang/llvm-mc reject while cfree's own `as` accepts them. So
-# the clang-as lane is EXPECTED to fail until that printer gap is fixed; those
-# failures report as XFAIL and do NOT fail this target by default. Set
-# CFREE_HOSTAS_ENFORCE_CLANG=1 to make the clang-as lane gate (use while fixing);
-# an unexpected clang-as pass under the default reports XPASS as a reminder to
-# flip enforcement on once the fix lands.
+# GATED: `cc -S` is now object-format-aware (src/api/asm_emit.c's AsmSyntax
+# vtable + the aarch64 ArchAsmOps reloc-operand hook), so on the native Mach-O
+# target it emits clean Mach-O assembly clang/llvm-mc accept — no ELF `.type`/
+# `.size`, `.section __SEG,__SECT`, `.p2align`, and `sym@PAGE`/`@PAGEOFF`
+# relocation operands. cfree's own `as` parses the same Mach-O dialect (it
+# dispatches on its target format), so BOTH lanes pass. The clang-as lane gates
+# by default; set CFREE_HOSTAS_ENFORCE_CLANG=0 to demote it to XFAIL (e.g. while
+# bringing up a new arch/format whose printer side isn't done yet).
set -u
@@ -34,7 +34,7 @@ CASES="$ROOT/test/toy/cases"
WORK="$ROOT/build/test/asm/hostas_toy"
OPTS="${CFREE_TEST_OPTS:-O0 O1}"
FILTER="${1:-}"
-ENFORCE_CLANG="${CFREE_HOSTAS_ENFORCE_CLANG:-0}"
+ENFORCE_CLANG="${CFREE_HOSTAS_ENFORCE_CLANG:-1}"
# Cases blocked on a separate, known cc -S symbolizer gap (the round-trip lane
# quarantines the same set): 141 emits an unsymbolized `adrp x,0x0` for a
@@ -154,10 +154,10 @@ printf '\n'
printf 'hostas-toy: cfree-as %d pass, %d fail | clang-as %d pass, %d xfail, %d xpass, %d efail | %d skip\n' \
"$a_pass" "$a_fail" "$b_pass" "$b_xfail" "$b_xpass" "$b_efail" "$skip"
if [ "$ENFORCE_CLANG" != "1" ] && [ "$b_xfail" -gt 0 ]; then
- printf 'hostas-toy: clang-as is XFAIL (expected red) — native Mach-O cc -S emits ELF-only .type/.size/@progbits clang rejects; fix tracked separately. Run with CFREE_HOSTAS_ENFORCE_CLANG=1 to gate it.\n'
+ printf 'hostas-toy: clang-as demoted to XFAIL (CFREE_HOSTAS_ENFORCE_CLANG=0): %d not accepted by clang. The clang-as lane gates by default; unset the env var to enforce.\n' "$b_xfail"
fi
if [ "$ENFORCE_CLANG" != "1" ] && [ "$b_xpass" -gt 0 ]; then
- printf 'hostas-toy: clang-as had %d XPASS — the asm gap looks fixed; set CFREE_HOSTAS_ENFORCE_CLANG=1 to enforce.\n' "$b_xpass"
+ printf 'hostas-toy: clang-as had %d XPASS under opt-out — re-enable gating (default) to enforce.\n' "$b_xpass"
fi
rc=0