commit 2d12f3145b87e3617dd72fc0ddac2be6e357010a
parent d391d1a13fc19116a5ed2473022ffa5d5344916b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 10:29:18 -0700
c_target: implement native data relocations via packed structs
Diffstat:
| M | doc/CBACKEND.md | | | 662 | +++++++++---------------------------------------------------------------------- |
| M | src/arch/c_target/emit.c | | | 274 | +++++++++++++++++++++++++++++++++++++++++++++---------------------------------- |
2 files changed, 228 insertions(+), 708 deletions(-)
diff --git a/doc/CBACKEND.md b/doc/CBACKEND.md
@@ -2,103 +2,42 @@
## Motivation
-cfree's no-deps posture rules out linking against LLVM or GCC's optimizer.
-The practical path to "industrial-strength" optimization for cfree users is
-to emit C from CG and hand the result to gcc/clang, which exist on every
-build host we care about. The output is `.c` source, not `.o` bytes; the
-host C compiler does ABI lowering, instruction selection, and register
-allocation. cfree's job is to produce *legal* and *complete* C, not fast C.
+cfree's no-deps posture rules out linking against LLVM or GCC's optimizer. The practical path to "industrial-strength" optimization for cfree users is to emit C from CG and hand the result to `gcc` or `clang`, which exist on every build host. The output is `.c` source, not `.o` bytes; the host C compiler handles ABI lowering, instruction selection, register allocation, and advanced optimizations like SROA. cfree's job is to produce *legal* and *complete* C, not necessarily human-readable C.
GCC/clang-extension C covers everything cfree CG can express. Concretely:
-
-- inline asm — `CfreeCgInlineAsm` is already GCC's
- `asm(tmpl : outs : ins : clobbers)` shape; emit verbatim.
-- overflow/trap — `__builtin_{add,sub,mul}_overflow`, `__builtin_trap`,
- `__builtin_unreachable`.
-- atomics — `_Atomic` + `<stdatomic.h>` with explicit `memory_order_*`.
+- inline asm — emits verbatim as GCC `__asm__`.
+- overflow/trap — `__builtin_{add,sub,mul}_overflow`, `__builtin_trap`.
+- atomics — `__atomic_*` builtins with explicit memory orders.
- TLS — `_Thread_local`.
-- `setjmp`/`longjmp` — `<setjmp.h>`.
-- computed goto / label-as-value — GCC `&&label` extension.
-- `__int128`, `long double` — host C compiler types.
-- bitfields — emit as bit-extract/insert on the storage unit (cfree CG
- already carries `BitFieldAccess.storage_offset + bit_offset + bit_width`,
- not the original C field declaration).
-
-## Scope: target-locked, not portable
-
-The emitted C is **target-locked**: it must be compiled for the same triple
-that `cfree --target=` selected. Compile it for a different triple and it
-may silently misbehave.
-
-Cause: CG flattens semantic lvalue chains to `(base_reg, byte_offset)`
-before any backend sees them. `cfree_cg_field(g, field_index)` becomes
-`OPK_INDIRECT(reg, ofs=12)` at the vtable; the field identity is gone. The
-offset `12` was computed using the cfree-selected target's
-`abi_cg_record_layout`. If the downstream C compiler assumes a different
-layout, the access is wrong. Same story for array indexing, struct sizes,
-and pointer arithmetic.
-
-This is the same trade LLVM IR makes (datalayout-locked). It does *not*
-limit usefulness for the stated goal — "industrial-strength optimization
-via the host toolchain" — because the user already controls the triple at
-cfree invocation. Concretely supported:
-
-- `cfree --target=x86_64-linux --emit=c foo.c | {gcc,clang,tcc} -O3 -c` ✓
-- moving that `.c` to a different-arch host and recompiling ✗
-
-Producing genuinely portable C source would require a separate emission
-path in the C frontend (`lang/c/`), above CG, where field/element identity
-is still alive. That is a different project from this one. If "portable C
-as a deliverable" ever becomes a goal, expect a new doc, not an extension
-of this plan.
-
-## Where the C backend plugs in
-
-A C backend is *not* a new arch in the sense `arch_impl_x64` is. The eventual
-machine code still runs on the host triple — x86_64, aarch64, rv64. What
-changes is the *form of CG output*: text instead of object bytes. So the
-seam is not `ArchImpl`; it is `CGTarget`.
-
-The two relevant abstractions in `src/arch/arch.h`:
-
-- `MCEmitter` writes bytes to an `ObjBuilder` section. Per-arch concrete
- backends call into it from each `binop`, `load`, `store`, etc.
-- `CGTarget` is the vtable CG calls — ~50 methods covering function
- lifecycle, frame slots, data movement, arithmetic, calls/returns,
- intrinsics, atomics, inline asm, varargs, scopes, source locations.
-
-A C backend is a new `CGTarget` implementation that:
+- `setjmp`/`longjmp` — standard C.
+- computed goto — GCC `&&label` and `goto *ptr` extension.
+- bitfields — emitted as bit-extract/insert on the underlying storage unit; cfree does not emit C bitfield declarations.
-1. Ignores `MCEmitter` and writes C source to a `CfreeWriter` instead.
-2. Inherits the host's `ABIVtable` only for `sizeof`/`alignof`/`record_layout`.
- It does **not** consult ABI classification for arg routing — gcc will
- re-do that on the emitted C.
-3. Sets `virtual_regs = 1` so CG hands out fresh, unbounded `Reg` ids; each
- id becomes a unique C local variable.
+## Scope: Target-Locked, Not Portable
-The arch identity (`CFREE_ARCH_X86_64` etc.) is preserved end-to-end so type
-sizes and struct layouts match the downstream gcc invocation.
+The emitted C is **target-locked**: it must be compiled for the same triple that `cfree --target=` selected. Compile it for a different triple and it may silently misbehave.
-### Selection
+Cause: CG flattens semantic lvalue chains to `(base_reg, byte_offset)` before any backend sees them. `cfree_cg_field(g, field_index)` becomes `OPK_INDIRECT(reg, ofs=12)` at the vtable; the field identity is gone. The offset `12` was computed using the cfree-selected target's `abi_cg_record_layout`. If the downstream C compiler assumes a different layout, the access is wrong.
-`CfreeCodeOptions` gains two fields:
+This is the same trade LLVM IR makes (datalayout-locked). It does *not* limit usefulness for the stated goal — "industrial-strength optimization via the host toolchain" — because the user already controls the triple at cfree invocation.
-- `bool emit_c_source` — opt into the C target.
-- `CfreeWriter* c_source_writer` — destination for emitted source.
+## Where the C Backend Plugs In
-When `emit_c_source` is set, `cfree_cg_new` (in `src/cg/session.c`)
-constructs `c_cgtarget_new(c, ob, writer)` instead of dispatching through
-`cgtarget_new`. `MCEmitter` and `Debug` are skipped entirely in this mode
-(no machine bytes, no DWARF). `cgtarget_new` itself is unchanged: it
-doesn't see `CodeOptions`, and the branch is cleaner one level up.
+A C backend is *not* a new arch in the sense `arch_impl_x64` is. The eventual machine code still runs on the host triple. What changes is the *form of CG output*: text instead of object bytes. The seam is `CGTarget`.
-The downstream driver workflow: `cfree --emit=c foo.c -o foo.c.cfree.c`,
-then the user runs `cc -O2 foo.c.cfree.c`. No object format coupling at
-the cfree boundary.
+1. Ignores `MCEmitter` and writes C source to a `CfreeWriter` instead.
+2. Inherits the host's `ABIVtable` only for `sizeof`/`alignof`/`record_layout`. It does **not** consult ABI classification for arg routing — the host C compiler will re-do that on the emitted C.
+3. Sets `virtual_regs = 1` so CG hands out fresh, unbounded `Reg` ids; each id becomes a unique C local variable.
-## Architecture sketch
+### Selection
-```
+When `emit_c_source` is set in `CodeOptions`, `cfree_cg_new` constructs `c_cgtarget_new` instead of dispatching through standard machine-code generation. `MCEmitter` and `Debug` are skipped entirely in this mode.
+
+The downstream driver workflow: `cfree cc --emit=c foo.c -o foo.cfree.c`, then the user runs `cc -O2 foo.cfree.c`.
+
+## Architecture Sketch
+
+```text
+---------------------+
| frontend (lang/c/) | source AST → CG calls
+----------+----------+
@@ -109,522 +48,65 @@ the cfree boundary.
+----------+----------+
| CGTarget vtable
v
-+---------------------+ +---------------------+
-| arch CGTarget | OR | C-source CGTarget |
-| (aa64/x64/rv64) | | (new, this plan) |
-+----------+----------+ +----------+----------+
- | |
- v v
- MCEmitter→bytes CfreeWriter→text
- ↓ObjBuilder ↓.c file
++---------------------+
+| C-source CGTarget |
+| (src/arch/c_target) |
++----------+----------+
+ |
+ v
+ CfreeWriter→text
+ ↓.c file
```
-The C target shares the entire frontend pipeline and the CG layer. Only the
-backend differs.
+### Substrate: `virtual_regs`
-### Substrate: virtual_regs
+`CGTarget.virtual_regs = 1` is used. Each minted `Reg` id maps to one declared C local: `uintN_t v17;` or `double v23;`. Registers are reused across different types by the CG value stack, so all writes cast back to the target type, typically bridging through `uintptr_t` to avoid `-Wint-conversion` warnings.
-`CGTarget.virtual_regs = 1` already exists and is used by `opt_cgtarget`.
-Effect in CG (`src/cg/value.c:313`, `:342`):
+### Aggregate-by-Address
-- `api_regalloc_begin` initializes the regalloc in virtual mode.
-- `api_alloc_reg` mints fresh ids 1, 2, 3, … and never panics.
-- `api_free_reg` is a no-op; spill paths are unreachable.
+When CG packs a call arg whose type is an aggregate, it materializes an address operand (`OPK_LOCAL` / `OPK_GLOBAL` / `OPK_INDIRECT`) referring to a memory image of the struct. The C target emits a direct pointer dereference (e.g. `(*(T*)((char*)vN + K))`).
-The C target sets this and is otherwise free of register pool concerns.
-Each minted `Reg` id maps to one declared C local: `uintN_t v17;` or
-`double v23;` keyed on the Operand's `cls` and the source `CfreeCgTypeId`
-carried alongside.
+For returns of aggregates, `api_alloc_call_ret_storage` allocates a fresh frame slot. The C target emits direct assignment `slot_R = f(args);` or lifts the call into an indirect write `(*(T*)addr) = f(args);`. No `sret` shim is added — the host C compiler handles the ABI details.
-The C target still must implement `get_allocable_regs`,
-`get_phys_regs`, etc. as empty stubs (the CG checks `virtual_regs` and skips
-them); same for `spill_reg`/`reload_reg` (unreachable in virtual mode but
-required by the vtable's non-null-callable contract).
+## Sequencing with Opt
-### What about the aggregate-by-address issue?
+`opt_cgtarget` runs SSA/DCE/combine/loop passes. For the C backend this is undesirable: the *whole point* is to defer optimization to the host C compiler (gcc/clang).
-When CG packs a call arg whose type is an aggregate
-(`src/cg/call.c:30`), it materializes an address operand
-(`OPK_LOCAL`/`OPK_GLOBAL`/`OPK_INDIRECT`) referring to a memory image of the
-struct. The C target's `call` method sees `desc.args[i].storage.kind ==
-OPK_LOCAL` (or similar) and the source type `desc.args[i].type` is the
-aggregate.
+Decision: when `emit_c_source` is set, `opt_level = 0` is forced regardless of what the caller asked for. The C target sits directly under CG with `virtual_regs = 1`.
-The emission rule for an aggregate operand:
+## Type Emission
-```
-desc.args[i].type = struct S
-desc.args[i].storage.kind = OPK_LOCAL, frame_slot = 7
- → emit `slot_7` (where slot_7 was declared `struct S slot_7;`)
+The C target maintains a type-emission worklist. C source needs each composite type declared before first use.
+1. As CG calls into emission methods, the target records every `CfreeCgTypeId` it sees.
+2. At `finalize`, walk the recorded types, topologically order by dependency, and emit:
+ - scalars → use `<stdint.h>` / `float` / `double`.
+ - pointers → `void*`.
+ - records / arrays → `typedef struct { _Alignas(A) uint8_t raw[N]; } __ty_N;`
+ - function types → `typedef R (*__ty_N)(...);`
-desc.args[i].storage.kind = OPK_INDIRECT, base = vN, ofs = K
- → emit `(*(struct S*)((char*)vN + K))`
-```
+**Composite types are opaque.** Records and arrays are emitted as `typedef struct { _Alignas(A) uint8_t raw[N]; } __ty_N;` — the same shape regardless of field layout. CG already speaks in `(base, byte_offset)` for field/element access, so the indirect path `(*(T*)((char*)addr + ofs))` does all the work and the host C compiler never needs to see the original field declarations. Modern compilers (clang) see perfectly through these offsets for SROA and other optimizations.
-Both are valid C. The first is what the common case looks like (caller
-spilled the struct to a named frame slot). No CG change needed.
-
-For *returns* of aggregates, `api_alloc_call_ret_storage` (`src/cg/call.c:44`)
-allocates a fresh frame slot via `api_arg_storage_must_be_addr`. The C target
-sees a frame-slot Operand for `desc.ret.storage`; after the call it can either:
-
-- emit `slot_R = f(args);` directly (preferred — gcc handles the aggregate
- return on its end), or
-- emit `f_into(&slot_R, args);` if the backend chooses to lift aggregate
- returns out (not required).
-
-Either way no `sret` shim is in the emitted C — gcc figures that out.
-
-## CGTarget surface, mapped
-
-Methods in `arch.h:592–850`, grouped by emission strategy:
-
-| Method | C-source emission |
-| --------------------- | --------------------------------------------------------- |
-| `func_begin`/`_end` | `static? T name(P0 p0, P1 p1, …) {` … `}` |
-| `frame_slot` | declare `T slot_N;` at function entry |
-| `local` | declare `T loc_N;` at function entry |
-| `local_addr` | `vDST = &loc_N;` |
-| `param` | already a function parameter — track name mapping |
-| `spill_reg`/`reload_` | unreachable in virtual_regs mode (no-op stub) |
-| `label_new`/`_place` | `Lk:` placement; minted ids → unique label names |
-| `jump` | `goto Lk;` |
-| `cmp_branch` | `if ((vA OP vB)) goto Lk;` |
-| `scope_begin/end/...` | C `if`/`{ }` block or `for(;;){ … L_break: ;}` |
-| `load_imm` | `vDST = K;` |
-| `load_const` | static const decl at top; `vDST = rodata_N;` |
-| `copy` | `vDST = vSRC;` |
-| `load` | `vDST = *(T*)addr;` (or `__atomic_load` for atomic) |
-| `store` | `*(T*)addr = src;` |
-| `addr_of` | `vDST = &lv;` |
-| `tls_addr_of` | `vDST = &tls_sym;` — gcc handles model selection |
-| `copy_bytes` | `__builtin_memcpy(dst, src, N)` |
-| `set_bytes` | `__builtin_memset(dst, byte, N)` |
-| `bitfield_load` | `vDST = (T)((load(storage) >> bit_offset) & mask);` |
-| `bitfield_store` | `store(storage, (load(storage) & ~mask) │ ((src&mask)<<lo))` |
-| `binop`/`unop`/`cmp` | direct C operator with appropriate cast for signedness |
-| `convert` | C cast, except CV_BITCAST → `__builtin_bit_cast` or memcpy |
-| `call` | `[vRET = ] fname(args…);` (see aggregate rule above) |
-| `ret` | `return vR;` or `return;` |
-| `alloca_` | `vDST = __builtin_alloca(size);` |
-| `va_start_`/`arg`/etc | `__builtin_va_start(ap, last)` and friends |
-| `atomic_load`/etc | `<stdatomic.h>` primitives with explicit memory order |
-| `intrinsic` | per-kind: `__builtin_{popcount,ctz,clz,bswap,trap,…}` |
-| `asm_block` | re-serialize as GCC `__asm__(tmpl : outs : ins : clob);` |
-| `set_loc` | `# line N "file"` directive |
-| `finalize` | flush data definitions, close writer |
-| `destroy` | arena-backed, no-op |
-
-CGCallPlan methods (`plan_call`/`load_call_arg`/etc.) are used by
-`opt_cgtarget`'s lowering pass. The C target advertises `virtual_regs = 1`
-which causes opt to be enabled — but the C target should refuse opt levels
-above 0 (or be wrapped so opt is bypassed). See "Sequencing with opt" below.
-
-## Sequencing with opt
-
-`src/cg/session.c:36-39`:
-
-```c
-if (opt_level > 0) {
- target = opt_cgtarget_new((Compiler*)c, target, opt_level);
-}
-```
+## Symbol and Data Emission
+
+The C target maps cfree CG definitions to standard C:
+- Data Definition → Emitted as an initialized packed struct (`struct __attribute__((packed)) __cfree_data_<name> { ... }`).
+- Data Relocations (cross-symbol references) → Synthesized directly into the data definition struct by interleaving raw byte array chunks with typed pointer fields (e.g. `void* ptr_0`). The initializer assigns standard C address-of (`&<sym>`) to these pointer fields, allowing the host C compiler's linker to handle cross-symbol references natively without any runtime constructor overhead.
+- TLS objects → `_Thread_local` is used on the synthesized struct for TLS data.
+
+## Source Locations
+
+`cfree_cg_set_loc` emits `#line N "path"` directives. With `-g` set on the downstream host gcc/clang invocation, the resulting object code carries source-mapped debug info back to the original cfree input. cfree `Debug` DWARF generation is unused in this mode.
+
+## Testing
+
+The C backend is tested via the `CFREE_TEST_PATHS=C` path in both `test/toy/run.sh` and `test/parse/run.sh`.
+For each case, the test harness:
+1. Emits C source via `cfree cc --emit=c <src> -o <name>.cfree.c`.
+2. Compiles the emitted source with the host C compiler (`cc -Werror -O0`).
+3. Runs the compiled executable and asserts the output matches the expected behavior.
+
+## Known Limitations
-`opt_cgtarget` records IR, runs SSA/DCE/combine/loop passes, then lowers
-through the wrapped target's CGCallPlan + value-emission methods. For the
-C backend this is undesirable: the *whole point* is to defer optimization
-to gcc. opt would just churn.
-
-Decision: when `emit_c_source` is set, force `opt_level = 0` regardless of
-what the caller asked for, with a diagnostic note. The C target then sits
-directly under CG with `virtual_regs = 1`, and CG mints virtual reg ids
-without any IR layer between them.
-
-## Type emission
-
-C source needs each composite type declared (typedef'd) before first use.
-The C target maintains a type-emission worklist:
-
-1. As CG calls into `func_begin`, `frame_slot`, `local`, `param`, `load`,
- `store`, etc., it carries `CfreeCgTypeId`. The target records every
- `CfreeCgTypeId` it sees.
-2. At `finalize`, walk the recorded types, topologically order by
- dependency (pointee, element, field, return, param), and emit:
- - scalars → use `<stdint.h>` / `_Bool` / `__int128` / `float` / `double` /
- `long double`.
- - pointers → `T*`.
- - arrays → `T (*) [N]` for parameters or `T name[N]` for declarations.
- - records → `struct ___s_N { … };` with explicit `_Alignas(K)` only if
- `align_override` requires it.
- - enums → emit underlying integer type; do not emit C `enum` (cfree CG
- does not preserve the C-level enum identity at this layer).
- - aliases → `typedef base_T name;`.
-3. Functions: emit signature with proper calling convention attribute
- (`__attribute__((sysv_abi))` etc.) only when CG requested non-default;
- the common case is the host's default ABI, which is exactly what we want.
-
-Bitfield-in-record handling: `cfree_cg_field` already returns byte offsets
-and bit-offset/width separately. We do **not** emit the field as a C
-bitfield. Instead, the record's emitted layout uses opaque storage bytes
-(e.g. one `unsigned char raw[N]` member or just a `uint64_t storage`), and
-the frontend's `bitfield_load`/`_store` calls into CG produce explicit
-mask/shift expressions referencing the storage member. This sidesteps the
-ABI ambiguity of C bitfields entirely.
-
-## Symbol and data emission
-
-`cfree_cg_decl`, `cfree_cg_data_*`, `cfree_cg_const_data` define symbols
-and data. The C target maps these to:
-
-- function decl → `T name(args);` forward declaration at TU top.
-- function defn → emitted by `func_begin/end`.
-- object decl (no body) → `extern T name;`.
-- object defn (data_begin/data_end) → `T name = { … };` constructed from
- the `data_int`/`data_float`/`data_bytes`/`data_zero`/`data_addr` stream
- buffered during the data definition. Easiest representation: write the
- bytes verbatim as `static const unsigned char __sym[N] = { 0xAA, … };`
- and cast to the typed pointer at uses. Loses readability, gains
- correctness with arbitrary aggregate initializers and inter-object
- relocations.
-- `data_addr(target, addend, …)` inside an initializer → cannot be expressed
- as a static const initializer if the target is in another TU; in that
- case lift to a runtime initializer (`__attribute__((constructor))`) or
- fail with a diagnostic. v0 may diagnose; v1 lifts.
-- `data_pcrel`, `data_symdiff` → diagnose; these are link-time concepts
- with no C-source equivalent. Frontends that need them are not viable C
- targets.
-- TLS objects → `_Thread_local T name;`.
-- COMDAT, weak, visibility — emit `__attribute__((weak))`,
- `__attribute__((visibility("hidden")))`, `__attribute__((selectany))`.
-- Symbol bind/visibility/flags from `CfreeCgSymbolAttrs` map to gcc
- attributes; no behavioral change.
-
-## Source locations and debug
-
-`cfree_cg_set_loc` triggers `T->set_loc`. The C target emits
-`#line N "path"` immediately before the next significant emission. With
-`-g` set on the downstream gcc invocation, the resulting `.o` carries
-source-mapped debug info back to the original cfree input. The cfree
-`Debug` producer is not used in this mode (no DWARF emission).
-
-## Touchpoints in the existing tree
-
-New files (additions only):
-
-- `src/arch/c_target/` — the C `CGTarget` implementation.
- - `target.c` — vtable construction; `c_cgtarget_new()` entry point;
- panic stubs for unimplemented methods.
- - `emit.c` — per-method emission bodies (includes name mapping; no
- separate `names.c`).
- - `cbuf.c` — heap-backed growable byte buffer for per-function
- declarations and TU-wide body/forwards text.
- - `internal.h` — `CTarget` struct and local prototypes.
-
-Type emission, data emission, and name mapping all live in `emit.c` for
-now — the `types.c`/`data.c`/`names.c` split anticipated in the original
-plan turned out to be overkill at Phase 1 size; revisit if a file grows
-unwieldy.
-
-Existing files touched (minimal, additive):
-
-- `include/cfree/core.h` — `CodeOptions` gains `emit_c_source` and
- `c_source_writer`.
-- `src/cg/session.c` — `cfree_cg_new` branches on `emit_c_source`: skips
- `mc_new`/`debug_new`, calls `c_cgtarget_new`, forces `opt_level = 0`.
-- `src/cg/asm.c` — `cfree_cg_file_scope_asm` panics with a "C target: …
- not yet supported" message when `g->mc == NULL`, so file-scope asm
- surfaces as a phased-rollout SKIP in the harness rather than a
- segfault.
-- `src/api/compile.c` — both `cfree_compile_c_obj_emit` and
- `cfree_compile_source_obj_emit` skip `emit_object_bytes` when
- `emit_c_source` is set (the C target already wrote source to the
- writer). The skip in `source_obj_emit` is what lets non-C frontends
- (toy, wasm) drive the C target too.
-- `src/arch/arch.h` / `src/cg/session.c` — new optional `CGTarget.alias`
- hook. Native backends leave it NULL; the C target implements it to
- emit `__attribute__((alias))` (ELF/PE) or a thunk (Mach-O), because
- the alias relationship doesn't survive serialization to text the way
- it does in an obj-level `(section, value)` pair.
-- `driver/cc.c` — accepts `--emit=c`, sets `compile_only`, routes the
- output writer to `copts.code.c_source_writer`.
-
-Nothing in `src/abi/*`, `src/arch/{aa64,x64,rv64}/`,
-`src/arch/regalloc.c`, `src/arch/mc.c`, or `src/arch/cgtarget.c` needs to
-change. The C backend is strictly additive at the CGTarget seam.
-
-## Things to **not** do
-
-- Do not invent a new `ABIStorageShape` enum or new `OpKind` for value-shaped
- aggregates. Aggregates stay address-shaped at the CG-Operand boundary; the
- C target emits `*(T*)addr` and gcc rebuilds value semantics.
-- Do not write a "trivial C ABI vtable". The C target inherits the host
- arch's ABI vtable for sizeof/alignof and ignores classification at the
- call site.
-- Do not register `arch_impl_c` in `src/arch/registry.c`. There is no "C
- arch"; the host arch is unchanged.
-- Do not try to make `opt_cgtarget` lower into the C target. Bypass opt.
-
-## Test surface
-
-Instead of a standalone `test/cbackend/`, we added a new path `C` to the
-existing `test/parse/` and `test/toy/` and `test/wasm/` runners. The frontends
-together prove the CGTarget seam is frontend-agnostic. The aggregate `make
-test-cbackend` invokes both runners with `CFREE_TEST_PATHS=C`.
-
-Per case, path `C` runs:
-1. `cfree cc --emit=c <src> -o <work>/<name>.cfree.c` (parse uses the
- `parse-runner --emit-c` harness with the cross-target overridden to
- the host's obj format; toy uses the driver directly).
-2. Host `cc` compiles the emitted source (`-std=gnu99
- -Wno-main-return-type` for toy because `fn main(): i64` emits as
- `int64_t main(void)`; parse uses `-Werror -std=c11` because its
- wrapper provides `int main()`).
-3. Native exec; exit code compared against `<name>.expected`.
-
-Phased-rollout handling: stderr matching `"C target: … not
-implemented|not yet supported"` is reported as SKIP rather than FAIL, so
-the harness signal reflects the implemented surface. Cases that need a
-later phase for non-panic reasons (e.g. require multi-TU LTO) can opt out
-of path `C` only via a `<name>.cbackend.skip` sidecar without affecting
-the other paths.
-
-Path `C` requires `is_native_target == 1` (the emitted C is target-locked
-and must be compiled by a host cc with a matching triple).
-
-Future tiers (still TODO):
-
-- **Self-hosting smoke** — compile libcfree through libcfree-via-C and
- check that the bootstrapped artifact still passes its test suite.
- Separate effort; flagged here only to note the long-term shape.
-
-## Phasing
-
-### Phase 0 — scaffolding ✅ landed
-
-- `CodeOptions.{emit_c_source, c_source_writer}` added.
-- `cfree_cg_new` branches on the flag; `opt_level` forced to 0.
-- `c_cgtarget_new` returns a vtable where unimplemented methods
- `compiler_panic("C target: method <name> not implemented")`.
-- `--emit=c` wired through `driver/cc.c`.
-- Acceptance met: an empty C source emits a clean prologue;
- unimplemented methods surface their name in the panic message.
-
-### Phase 1 — minimal viable: scalar arithmetic and calls ✅ landed
-
-Implemented: `func_begin`/`func_end`, `param`, `frame_slot`, `ret`,
-`binop`, `load_imm`, `copy`, `load`, `store`, `call`, `set_loc`,
-`finalize`, `destroy`, plus minimal int/void/pointer type emission.
-
-Other implementation choices that landed in Phase 1:
-
-- **Params land in frame slots, not regs.** `c_param` allocates a fresh
- frame slot via `c_frame_slot`, declares `T slot_N;`, and emits
- `slot_N = pN;` at function entry; CG then references the param as
- `OPK_LOCAL`. Simpler than coordinating fresh reg ids with the regalloc
- from outside CG.
-- **Per-TU forwards buffer.** `c_func_begin` and `c_call` both register
- forward declarations via `c_ensure_forward_decl` (dedup'd by
- `ObjSymId`). This handles out-of-order callees and references to
- external symbols.
-- **`alias` hook on CGTarget.** `c_alias` emits
- `__attribute__((alias("target")))` on ELF/PE and a wrapper thunk on
- Mach-O (clang on Darwin rejects the attribute outright).
-- **Mach-O linker-symbol underscore.** `c_sym_name` strips the leading
- `_` on Mach-O so the emitted C uses the source-level name (the host
- cc re-adds the underscore at link time).
-
-Acceptance met: `int add(int,int){return a+b;} int main(){return add(2,3);}`
-round-trips through the C target and exits 5.
-
-### Phase 2 — control flow and memory ✅ landed
-
-Implemented: `label_new`/`label_place`/`jump`/`cmp_branch`, `cmp`, `unop`,
-`scope_begin`/`scope_else`/`scope_end`/`break_to`/`continue_to`, `local`,
-`local_addr`, `addr_of`, `convert` (including `CV_BITCAST` via
-`__builtin_memcpy`), `load`/`store` for `OPK_INDIRECT`.
-
-Other choices that landed in Phase 2:
-
-- **Reg ids reused across types.** CG's value stack mints fresh virtual
- reg ids but reuses them across statements when the dst reg's previous
- value has been consumed. The first-sighting C declaration locks in one
- type, so every write to a `vN` emits `vN = (DECL_T)(uintptr_t)(RHS);`
- (or `(DECL_T)(RHS)` for FP dst). The `(uintptr_t)` bridge suppresses
- `-Wint-conversion` when the RHS happens to be an integer expression and
- `DECL_T` is a pointer.
-- **Locals always go to frame slots.** `c_local` allocates a slot via
- `c_frame_slot` and returns `CG_LOCAL_STORAGE_FRAME`; non-addr-taken
- locals could in principle be REGs but the uniform FRAME mapping is
- simpler and the C compiler dissolves the distinction.
-- **Scope methods are mostly inert.** The public `CfreeCg` API places
- break/continue labels via `label_place` itself, so `scope_begin` and
- `scope_end` only record kind/labels in a per-target stack. `break_to`
- and `continue_to` shim onto `jump` for completeness, and `SCOPE_IF`
- emits `if (!cond) goto break_label;` for the internal scope path.
-- **`OPK_GLOBAL` data access still panics → SKIP.** Phase 2 unlocked many
- toy fixtures that lower to global data references; those now surface as
- Phase 4 SKIPs ("OPK_GLOBAL data reference not yet supported") rather
- than masquerading as Phase-2 method gaps.
-- **Toy harness now runs path C at opt level 0 only.** `--emit=c` forces
- `opt_level=0` internally, so running it for every level in
- `CFREE_TOY_OPT_LEVELS` was duplicating identical work; the toy err
- cases no longer run when only path C is enabled (they're cc-failure
- tests with no dependency on the emit target).
-
-Acceptance met: 49 toy cases under path C pass (loops, conditionals,
-locals, pointer chasing, recursion); the remaining 78 SKIPs all blame
-Phase 3/4 surface (aggregates, intrinsics, atomics, data emission).
-
-### Phase 3 — aggregates, varargs, intrinsics ✅ landed
-
-Implemented: type emission for records / arrays / function types / enums /
-floats / `va_list`; aggregate call args and aggregate returns (address-operand
-emission); indirect call via function-pointer typedef; tail calls (lowered to
-`return f(args)`); `va_start_`/`va_arg_`/`va_end_`/`va_copy_`; `intrinsic` for
-the full `IntrinKind` set; `copy_bytes`/`set_bytes`; `alloca_`.
-
-Other choices that landed in Phase 3:
-
-- **Composite types as opaque byte storage.** Records and arrays are
- emitted as `typedef struct { _Alignas(A) uint8_t raw[N]; } __ty_N;` —
- same shape regardless of field layout. CG already speaks in
- `(base, byte_offset)` for field/element access, so the indirect path
- `(*(T*)((char*)addr + ofs))` does all the work and the host C compiler
- never needs to see the original field declarations. Sidesteps C bitfield
- ABI ambiguity entirely (see §"Type emission" in this doc).
-- **Pointer arithmetic via `uintptr_t`.** Any binop/cmp whose operand type
- is a pointer is wrapped `((uintptr_t)op)` before the operator so the
- emitted source compiles cleanly under `-Wpedantic -Werror` (strict C
- forbids `void* + void*`). cfree IR already carries byte deltas, so
- arithmetic is integer-shaped semantically; this just makes that syntactic.
-- **Type-emission worklist.** `c_ensure_typedef` lazily emits a typedef
- the first time `c_typename` encounters a RECORD/ARRAY/FUNC, recursing
- on field/element/param types so dependencies emit first. Output lands
- in a TU-wide `typedefs` CBuf flushed between the prologue and forward
- declarations.
-- **Variadic signature.** Function signatures emit `, ...)` when
- `func.abi_variadic` is set so the host cc accepts `__builtin_va_start`.
-- **Tail calls become `return f(args);`.** For tail-flagged calls CG
- doesn't follow with a `ret`, so the C target emits the return itself;
- gcc handles the actual TCO. Void tail calls add a trailing `return;`.
-- **Aggregate ret storage.** `OPK_LOCAL`/`OPK_INDIRECT` ret storage is
- spelled as a direct assignment (`slot_R = f(args);` /
- `(*(T*)addr) = f(args);`). No sret shim — the host cc rebuilds that.
-- **Untyped IMM byte values.** `cfree_cg_memset` passes
- `OPK_IMM(byte, type=NONE)`; `c_emit_operand` recognizes this and emits
- the literal raw instead of attempting to cast to `(NONE)`.
-
-Acceptance met: 87 toy cases under path `C` pass; the remaining 40 SKIPs
-all blame Phase 4 surface (data emission, atomics, inline asm).
-
-### Phase 4 — atomics, asm, TLS, exotic features ✅ landed
-
-Implemented: `atomic_load`/`atomic_store`/`atomic_rmw`/`atomic_cas`/`fence`
-via `__atomic_*` builtins; `asm_block` re-serialized as GCC extended asm;
-data-symbol emission with `static`/`__attribute__((weak))`/visibility/
-`_Alignas`; `bitfield_load`/`_store` via mask+shift on opaque storage units;
-`load_const` via static-const byte array + `__builtin_memcpy`;
-`tls_addr_of` + `_Thread_local` extern TLS data; OPK_GLOBAL operand support
-in all use sites; identifier sanitization (cfree's `.L...` assembler-style
-local syms become C-safe).
-
-Other choices that landed in Phase 4:
-
-- **Data is opaque bytes.** Every data symbol emits as `_Alignas(A) uint8_t
- name[N] = { 0x.., ... };` regardless of source type. Field/element access
- goes through the existing `(*(T*)((char*)addr + ofs))` path, sidestepping
- C's static-initializer constraints.
-- **Relocations run at startup.** Cross-symbol references in data
- initializers become `__attribute__((constructor)) static void
- __cfree_init_<name>(void) { *(uintN_t*)(...) = (uintN_t)(uintptr_t)(...);
- }` thunks. Same path covers same-TU and cross-TU targets uniformly.
- Cost: rodata-with-relocs loses its `const` qualifier (the constructor
- writes through). Reasonable trade for the simplification.
-- **pcrel + symdiff are skipped.** Both are link-time concepts with no
- faithful in-language C-source equivalent (a constructor fixup can write
- the absolute value but not preserve the "section-relative" semantics that
- pcrel users actually want — they want shared `.rodata` pages, not dirty
- per-process tables). v1 lift would emit format-gated top-level
- `__asm__(".long sym - .\n")`; deferred.
-- **Atomics via `__atomic_*` builtins.** Memory order maps 1-1 to
- `__ATOMIC_*`. `compare_exchange_n` needs a real lvalue for `expected`,
- but CG reuses reg ids across types so `&prior_reg` may have the wrong C
- type — we materialize a scratch local typed by `prior.type` and copy out.
-- **Atomic value-arg cast bridging.** Same reg-id-reuse hazard exists for
- store/rmw value operands; `c_emit_operand_as(t, op, target_ty)` bridges
- the cast through `(uintptr_t)` (or skips for float dst) so `-Wint-
- conversion` doesn't trip.
-- **asm_block input filter.** cfree synthesizes a matching `"N"` input for
- every ASM_INOUT output to model the read in IR; gcc rejects that as
- redundant when the output is `+r`-tied, so we drop synthesized matches.
-- **Bitfields don't use C bitfields.** Loads/stores spell explicit shift +
- mask on a `uint{8,16,32,64}_t` storage view. Signed loads use the
- shift-left-then-arith-shift-right trick on a signed cast. The host C
- compiler never sees a `int x : 3;`-style declaration, so its ABI choices
- don't enter the picture.
-- **Mach-O TLS definitions panic-as-skip.** On Mach-O the user-visible TLS
- sym is the 24-byte TLV descriptor; reconstructing the original initial
- value from the synthesized `__init.<name>` aux sym is non-trivial. ELF
- TLS data definitions (where `.tdata`/`.tbss` carry the user's bytes
- directly) work — emit `_Thread_local _Alignas(A) uint8_t name[N] =
- {bytes};`. Mach-O TLS externs work too (they're SK_UNDEF, no descriptor).
-- **Identifier sanitization.** cfree mints assembler-style names like
- `.Ltoy_static_0` for static locals; `c_sym_name` rewrites illegal C
- identifier chars to `_`. Safe because such names are SB_LOCAL and have
- no cross-TU contract.
-
-Acceptance: 124 toy cases under path `C` pass. The 3 remaining SKIPs are
-pcrel/symdiff data relocs as documented.
-
-### Phase 5 — quality
-
-- Better identifier names (use `Sym name` where available instead of `vN`).
-- Optional: emit C struct field accesses instead of `*(T*)((char*)p + ofs)`
- when the offset corresponds to a known record field.
-- `#line` directives.
-- C-target-specific diagnostics for unsupported combinations
- (`data_pcrel`, `data_symdiff`, computed goto across functions, etc.).
-
-## Estimated size
-
-Order-of-magnitude:
-
-- Phase 0: ~150 LOC.
-- Phase 1: ~400 LOC (target.c skeleton + emit.c primitives + names.c +
- minimal type emission).
-- Phase 2: ~600 LOC.
-- Phase 3: ~800 LOC (type worklist is most of it).
-- Phase 4: ~600 LOC.
-- Phase 5: ongoing.
-
-Plus test fixtures and a `test-cbackend` make target — roughly 1k LOC of
-test infrastructure to start.
-
-Total to a credibly useful C backend: ~3k LOC, isolated to
-`src/arch/c_target/` plus the four small touchpoints listed earlier. No
-modifications to CG, ABI, regalloc, or existing arch backends.
-
-## Open questions
-
-- **Output format flag location**: resolved — `CodeOptions.emit_c_source`
- (bool) plus `c_source_writer` (CfreeWriter*). A `CFREE_OBJ_C_SOURCE`
- enum extension would have forced every `obj_fmt` switch to handle
- it; the boolean keeps the change additive.
-- **Aggregate returns in C source**: still pending Phase 3. Plan: emit
- `slot_R = f(args)` and let gcc handle the ABI. Revisit if codegen is
- poor.
-- **Multi-TU emission**: one `.c` source out per `.c` source in. Cross-TU
- LTO is gcc's job downstream.
-- **Floating-point reproducibility**: cfree's FP-flag enum
- (`CfreeCgFpFlag.REASSOC`/`APPROX`/…) maps to gcc's `-ffast-math`-style
- behavior, but per-operation. C doesn't have a per-op syntax. Options:
- ignore the flags (correct but pessimistic), wrap in `#pragma STDC
- FP_CONTRACT off`, or emit `__attribute__((optimize(...)))` on the
- enclosing function. Probably ignore in v0/v1, document the gap.
-- **i128 division/modulo and f128 ops** today get lowered to `__divti3` /
- `__multf3` / `__addtf3` calls. The C target could prefer native
- `__int128` / `long double` so gcc inlines them. Defer to Phase 3 along
- with the rest of wide-scalar work.
-- **Mach-O aliases lose `&alias == &target` identity** because we emit a
- thunk. No fixture depends on this; document the gap and revisit only
- if a real consumer hits it.
+- **PCREL and SYMDIFF relocations**: These are link-time concepts with no faithful in-language C-source equivalent. Frontends attempting to emit these will trigger a compiler panic/skip.
+- **128-bit Types**: `__int128` and `_Float128` usage may be limited or skipped depending on the host C compiler capabilities (e.g. host long double might not be 128-bit).
+- **Floating-point reproducibility**: cfree's FP-flag enum (REASSOC, APPROX) applies per-operation. C does not have a per-operation fast-math syntax, so these flags are currently ignored, resulting in correctly strict but potentially pessimistic FP evaluation.
diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c
@@ -502,7 +502,7 @@ void c_emit_operand(CTarget* t, Operand op) {
if (is_aggregate) {
cbuf_puts(&t->body, "(*(");
c_emit_type(t, &t->body, op.type);
- cbuf_puts(&t->body, "*)((char*)");
+ cbuf_puts(&t->body, "*)((char*)&");
cbuf_puts(&t->body, nm);
if (op.v.global.addend != 0) {
cbuf_puts(&t->body, " + ");
@@ -516,7 +516,7 @@ void c_emit_operand(CTarget* t, Operand op) {
} else {
cbuf_puts(&t->body, "void*");
}
- cbuf_puts(&t->body, ")((char*)");
+ cbuf_puts(&t->body, ")((char*)&");
cbuf_puts(&t->body, nm);
if (op.v.global.addend != 0) {
cbuf_puts(&t->body, " + ");
@@ -767,7 +767,7 @@ static void c_emit_addr_deref(CTarget* t, Operand addr, CfreeCgTypeId access_typ
const char* nm = c_sym_name(t, addr.v.global.sym);
cbuf_puts(&t->body, "(*(");
c_emit_type(t, &t->body, access_type);
- cbuf_puts(&t->body, "*)((char*)");
+ cbuf_puts(&t->body, "*)((char*)&");
cbuf_puts(&t->body, nm);
if (addr.v.global.addend != 0) {
cbuf_puts(&t->body, " + ");
@@ -829,7 +829,7 @@ static void c_emit_lvalue_addr(CTarget* t, Operand lv, CfreeCgTypeId dst_type) {
const char* nm = c_sym_name(t, lv.v.global.sym);
cbuf_puts(&t->body, "((");
c_emit_type(t, &t->body, dst_type);
- cbuf_puts(&t->body, ")((char*)");
+ cbuf_puts(&t->body, ")((char*)&");
cbuf_puts(&t->body, nm);
if (lv.v.global.addend != 0) {
cbuf_puts(&t->body, " + ");
@@ -1013,8 +1013,23 @@ void c_ensure_forward_decl(CTarget* t, ObjSymId sym, CfreeCgTypeId fn_type) {
if (t->sym_forwarded[sym]) return;
t->sym_forwarded[sym] = 1;
const char* name = c_sym_name(t, sym);
- c_emit_func_signature(t, &t->forwards, name, fn_type);
- cbuf_puts(&t->forwards, ";\n");
+ const ObjSym* os = obj_symbol_get(t->obj, sym);
+ if ((os && (os->kind == SK_FUNC || os->kind == SK_IFUNC)) || fn_type != 0) {
+ c_emit_func_signature(t, &t->forwards, name, fn_type);
+ cbuf_puts(&t->forwards, ";\n");
+ } else {
+ if (os && os->bind == SB_LOCAL) cbuf_puts(&t->forwards, "static ");
+ else cbuf_puts(&t->forwards, "extern ");
+ if (os && os->section_id != OBJ_SEC_NONE) {
+ const Section* sec = obj_section_get(t->obj, os->section_id);
+ if (sec->kind == SEC_RODATA) cbuf_puts(&t->forwards, "const ");
+ }
+ cbuf_puts(&t->forwards, "struct __cfree_data_");
+ cbuf_puts(&t->forwards, name);
+ cbuf_puts(&t->forwards, " ");
+ cbuf_puts(&t->forwards, name);
+ cbuf_puts(&t->forwards, ";\n");
+ }
}
void c_func_end(CGTarget* T) {
@@ -3257,58 +3272,6 @@ static int c_sym_has_relocs(CTarget* t, ObjSecId sec_id, u32 base, u32 size) {
return 0;
}
-static void c_emit_sym_relocs_fixup(CTarget* t, const char* nm,
- ObjSecId sec_id, u32 base, u32 size) {
- u32 total = obj_reloc_total(t->obj);
- int first = 1;
- for (u32 i = 0; i < total; ++i) {
- const Reloc* r = obj_reloc_at(t->obj, i);
- if (r->section_id != sec_id) continue;
- if (r->offset < base || r->offset >= base + size) continue;
- if (first) {
- cbuf_puts(&t->data_defs,
- "__attribute__((constructor)) static void __cfree_init_");
- cbuf_puts(&t->data_defs, nm);
- cbuf_puts(&t->data_defs, "(void) {\n");
- first = 0;
- }
- /* api_data_reloc_kind only emits R_ABS{32,64} and R_PC{32,64}; pcrel
- * has no constructor-time spelling (see doc/CBACKEND.md §pcrel). */
- u32 width = 0;
- switch ((int)r->kind) {
- case R_ABS32: width = 4; break;
- case R_ABS64: width = 8; break;
- case R_PC32:
- case R_PC64:
- compiler_panic(t->c, (SrcLoc){0, 0, 0},
- "C target: pcrel data reloc not yet supported");
- default:
- compiler_panic(t->c, (SrcLoc){0, 0, 0},
- "C target: data reloc kind %d not yet supported",
- (int)r->kind);
- }
- const char* tgt = c_sym_name(t, r->sym);
- const char* ity = (width == 8) ? "uint64_t" :
- (width == 4) ? "uint32_t" :
- (width == 2) ? "uint16_t" : "uint8_t";
- cbuf_puts(&t->data_defs, " *(");
- cbuf_puts(&t->data_defs, ity);
- cbuf_puts(&t->data_defs, "*)((char*)");
- cbuf_puts(&t->data_defs, nm);
- cbuf_puts(&t->data_defs, " + ");
- cbuf_put_u64(&t->data_defs, (u64)(r->offset - base));
- cbuf_puts(&t->data_defs, ") = (");
- cbuf_puts(&t->data_defs, ity);
- cbuf_puts(&t->data_defs, ")(uintptr_t)((char*)");
- cbuf_puts(&t->data_defs, tgt);
- if (r->addend != 0) {
- cbuf_puts(&t->data_defs, " + ");
- cbuf_put_i64(&t->data_defs, r->addend);
- }
- cbuf_puts(&t->data_defs, ");\n");
- }
- if (!first) cbuf_puts(&t->data_defs, "}\n");
-}
/* Emit one data symbol: extern declaration if undef, otherwise the full
* definition with bytes. Function symbols are skipped — those go through the
@@ -3463,71 +3426,157 @@ static void c_emit_data_symbol(CTarget* t, ObjSymId id, const ObjSym* os) {
if (!c_is_data_section(sec)) return;
u32 base = (u32)os->value;
u32 size = (u32)os->size;
- /* Storage class: SB_LOCAL → `static`, SB_GLOBAL/WEAK → file scope. */
+ u32 nrelocs = 0;
+ u32 total_relocs = obj_reloc_total(t->obj);
+ for (u32 i = 0; i < total_relocs; ++i) {
+ const Reloc* r = obj_reloc_at(t->obj, i);
+ if (r->section_id == os->section_id && r->offset >= base && r->offset < base + size) {
+ nrelocs++;
+ }
+ }
+
+ Heap* h = t->c->ctx->heap;
+ const Reloc** rs = NULL;
+ if (nrelocs) {
+ rs = (const Reloc**)h->alloc(h, nrelocs * sizeof(const Reloc*), 1);
+ u32 j = 0;
+ for (u32 i = 0; i < total_relocs; ++i) {
+ const Reloc* r = obj_reloc_at(t->obj, i);
+ if (r->section_id == os->section_id && r->offset >= base && r->offset < base + size) {
+ rs[j++] = r;
+ }
+ }
+ for (u32 i = 1; i < nrelocs; ++i) {
+ const Reloc* tmp = rs[i];
+ u32 k = i;
+ while (k > 0 && rs[k-1]->offset > tmp->offset) {
+ rs[k] = rs[k-1];
+ k--;
+ }
+ rs[k] = tmp;
+ }
+ }
+
+ cbuf_puts(b, "struct ");
+ if (nrelocs > 0) cbuf_puts(b, "__attribute__((packed)) ");
+ cbuf_puts(b, "__cfree_data_");
+ cbuf_puts(b, nm);
+ cbuf_puts(b, " {\n");
+
+ if (nrelocs == 0) {
+ cbuf_puts(b, " uint8_t raw[");
+ cbuf_put_u64(b, size ? size : 1);
+ cbuf_puts(b, "];\n");
+ } else {
+ u32 cur = base;
+ for (u32 i = 0; i < nrelocs; ++i) {
+ const Reloc* r = rs[i];
+ if (r->offset > cur) {
+ cbuf_puts(b, " uint8_t chunk_");
+ cbuf_put_u64(b, i);
+ cbuf_puts(b, "[");
+ cbuf_put_u64(b, r->offset - cur);
+ cbuf_puts(b, "];\n");
+ }
+ u32 width = (r->kind == R_ABS32) ? 4 : 8;
+ const char* ty = (width == 4) ? "uint32_t" : "void*";
+ cbuf_puts(b, " ");
+ cbuf_puts(b, ty);
+ cbuf_puts(b, " ptr_");
+ cbuf_put_u64(b, i);
+ cbuf_puts(b, ";\n");
+ cur = r->offset + width;
+ }
+ if (cur < base + size) {
+ cbuf_puts(b, " uint8_t chunk_");
+ cbuf_put_u64(b, nrelocs);
+ cbuf_puts(b, "[");
+ cbuf_put_u64(b, base + size - cur);
+ cbuf_puts(b, "];\n");
+ }
+ }
+ cbuf_puts(b, "};\n");
+
if (os->bind == SB_LOCAL) cbuf_puts(b, "static ");
if (is_tls) cbuf_puts(b, "_Thread_local ");
c_emit_link_attrs(b, os);
- /* `__attribute__((unused))` so -Wunused-const-variable doesn't fire when
- * the upstream use got folded away (CG can emit the data sym before the
- * frontend decides to inline its load). */
cbuf_puts(b, "__attribute__((unused)) ");
- /* `const` only when the section is RODATA and no relocs patch it (we
- * still drop const if there are relocs because the constructor will
- * write through this storage). */
- int has_relocs = c_sym_has_relocs(t, os->section_id, base, size);
+
int is_ro = (sec->kind == SEC_RODATA);
- if (is_ro && !has_relocs) cbuf_puts(b, "const ");
+ if (is_ro) cbuf_puts(b, "const ");
+
cbuf_puts(b, "_Alignas(");
cbuf_put_u64(b, sec->align ? sec->align : 1);
- cbuf_puts(b, ") uint8_t ");
+ cbuf_puts(b, ") struct __cfree_data_");
cbuf_puts(b, nm);
- cbuf_puts(b, "[");
- cbuf_put_u64(b, size ? size : 1);
- cbuf_puts(b, "]");
+ cbuf_puts(b, " ");
+ cbuf_puts(b, nm);
+
if (sec->kind == SEC_BSS || sec->sem == SSEM_NOBITS) {
- /* BSS — no initializer (defaults to zero in C). */
cbuf_puts(b, ";\n");
} else if (size == 0) {
- cbuf_puts(b, ";\n");
+ cbuf_puts(b, " = {{0}};\n");
} else {
- Heap* h = t->c->ctx->heap;
+ cbuf_puts(b, " = {\n");
u8* bytes = (u8*)h->alloc(h, size, 1);
- if (!bytes) {
- compiler_panic(t->c, (SrcLoc){0, 0, 0}, "C target: oom on data bytes");
- }
c_read_section_bytes(sec, base, bytes, size);
- c_emit_data_bytes(b, bytes, size);
+
+ if (nrelocs == 0) {
+ cbuf_puts(b, " .raw = {");
+ for (u32 i = 0; i < size; ++i) {
+ if (i > 0) cbuf_puts(b, ", ");
+ cbuf_put_u64(b, bytes[i]);
+ }
+ cbuf_puts(b, "}\n");
+ } else {
+ u32 cur = base;
+ for (u32 i = 0; i < nrelocs; ++i) {
+ const Reloc* r = rs[i];
+ if (r->offset > cur) {
+ cbuf_puts(b, " .chunk_");
+ cbuf_put_u64(b, i);
+ cbuf_puts(b, " = {");
+ for (u32 k = 0; k < r->offset - cur; ++k) {
+ if (k > 0) cbuf_puts(b, ", ");
+ cbuf_put_u64(b, bytes[cur - base + k]);
+ }
+ cbuf_puts(b, "},\n");
+ }
+
+ u32 width = (r->kind == R_ABS32) ? 4 : 8;
+ c_ensure_forward_decl(t, r->sym, 0);
+ const char* tgt = c_sym_name(t, r->sym);
+ const char* cast = (width == 4) ? "(uint32_t)(uintptr_t)" : "(void*)";
+
+ cbuf_puts(b, " .ptr_");
+ cbuf_put_u64(b, i);
+ cbuf_puts(b, " = ");
+ cbuf_puts(b, cast);
+ cbuf_puts(b, "((char*)&");
+ cbuf_puts(b, tgt);
+ if (r->addend != 0) {
+ cbuf_puts(b, " + ");
+ cbuf_put_i64(b, r->addend);
+ }
+ cbuf_puts(b, "),\n");
+ cur = r->offset + width;
+ }
+ if (cur < base + size) {
+ cbuf_puts(b, " .chunk_");
+ cbuf_put_u64(b, nrelocs);
+ cbuf_puts(b, " = {");
+ for (u32 k = 0; k < base + size - cur; ++k) {
+ if (k > 0) cbuf_puts(b, ", ");
+ cbuf_put_u64(b, bytes[cur - base + k]);
+ }
+ cbuf_puts(b, "}\n");
+ }
+ }
h->free(h, bytes, size);
- cbuf_puts(b, ";\n");
+ cbuf_puts(b, "};\n");
}
- (void)nm;
- (void)has_relocs;
-}
-/* Emit only the constructor-time fixups for a single data symbol, if any.
- * Storage must already be defined (emitted by c_emit_data_symbol). Run as a
- * second pass so that __cfree_init_<name> sees forward-defined names for
- * any relocation target later in the symbol order. */
-static void c_emit_data_symbol_fixups(CTarget* t, ObjSymId id,
- const ObjSym* os) {
- if (c_is_local_static_sym(t, id)) return;
- if (os->kind == SK_FUNC || os->kind == SK_IFUNC) return;
- if (os->kind == SK_SECTION || os->kind == SK_FILE) return;
- if (os->kind == SK_UNDEF || os->kind == SK_COMMON) return;
- if (os->section_id == OBJ_SEC_NONE) return;
- /* Mach-O TLS: descriptor bytes are not user-visible data (they hold the
- * TLV pointer and an offset), and the init sym has been folded into the
- * descriptor's `_Thread_local` emission. Either way, the C-side
- * `_Thread_local` declaration carries its own initializer; there is no
- * separate storage to fix up. */
- if (os->kind == SK_TLS && t->c->target.obj == CFREE_OBJ_MACHO) return;
- const Section* sec = obj_section_get(t->obj, os->section_id);
- if (!c_is_data_section(sec)) return;
- u32 base = (u32)os->value;
- u32 size = (u32)os->size;
- if (!c_sym_has_relocs(t, os->section_id, base, size)) return;
- const char* nm = c_sym_name(t, id);
- c_emit_sym_relocs_fixup(t, nm, os->section_id, base, size);
+ if (nrelocs) h->free(h, (void*)rs, nrelocs * sizeof(const Reloc*));
}
static void c_emit_data(CTarget* t) {
@@ -3539,17 +3588,6 @@ static void c_emit_data(CTarget* t) {
c_emit_data_symbol(t, e.id, e.sym);
}
obj_symiter_free(it);
-
- /* Second pass: emit constructor fixups after all data storage has been
- * declared. This lets a __cfree_init_<name> body reference any other data
- * symbol regardless of emission order. */
- it = obj_symiter_new(t->obj);
- if (!it) return;
- while (obj_symiter_next(it, &e)) {
- if (!e.sym) continue;
- c_emit_data_symbol_fixups(t, e.id, e.sym);
- }
- obj_symiter_free(it);
}
/* === finalize / destroy === */
@@ -3566,11 +3604,11 @@ void c_finalize(CGTarget* T) {
c_writer_write(t, t->typedefs.data, t->typedefs.len);
c_writer_puts(t, "\n");
}
+ c_emit_data(t);
if (t->forwards.len) {
c_writer_write(t, t->forwards.data, t->forwards.len);
c_writer_puts(t, "\n");
}
- c_emit_data(t);
if (t->data_defs.len) {
c_writer_write(t, t->data_defs.data, t->data_defs.len);
c_writer_puts(t, "\n");