commit 3eb1bba007c88ed0e45f9234967602b1f2d56b8c
parent 9a3a508a459a3d8d8d75530d66b988b68da42c43
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 22 May 2026 12:34:13 -0700
Add Windows PE/COFF support
Diffstat:
88 files changed, 12910 insertions(+), 330 deletions(-)
diff --git a/README.md b/README.md
@@ -12,6 +12,9 @@ It features:
- A cross-compiling backend, supporting 32-bit and 64-bit RISC-V, x86, ARM, and
WASM.
- Support for object files and executables (PE/COFF, ELF, Mach-O)
+- Primary tested targets: x86_64-linux, aarch64-linux, x86_64-macos,
+ aarch64-macos, x86_64-windows, aarch64-windows, plus freestanding
+ variants of the same architectures.
- An archiver
- An assembler, standalone and inline
- Basic linker script support
diff --git a/doc/CTOOLCHAIN.md b/doc/CTOOLCHAIN.md
@@ -248,6 +248,39 @@ gaps that exist equally in `cc -E`:
- **C++ demangling.** `-C`, `--demangle` — N/A for C; can land as a
silent no-op once it's needed.
+## Windows (PE/COFF) target
+
+Cross-compilation to Windows requires the mingw-w64 sysroot for system
+libraries and CRT bits. Set `CFREE_MINGW_SYSROOT` to the
+`<toolchain>/x86_64-w64-mingw32` directory (or pass `-isysroot` /
+`--sysroot`) so the `cc` driver appends `$SYSROOT/lib` to the library
+search path. Both `cc -lFOO` and `ld -lFOO` resolve Windows libraries
+using the suffix list `libFOO.dll.a` → `libFOO.a` → `FOO.lib` →
+`FOO.dll.a` (mingw-canonical first, MSVC-style fallback).
+
+Example invocations:
+
+```sh
+export CFREE_MINGW_SYSROOT=/opt/homebrew/opt/mingw-w64/toolchain-x86_64/x86_64-w64-mingw32
+
+# Compile-only: produces hello.obj (note .obj suffix on Windows targets).
+cfree cc -target x86_64-windows -c hello.c
+
+# Inspect a PE32+ image. -p prints the optional header, data
+# directories, and per-DLL import lists.
+cfree objdump -p hello.exe
+
+# Link via MSVC-style flag surface (opt-in via --ms-link-driver):
+cfree ld --ms-link-driver /OUT:hello.exe /SUBSYSTEM:CONSOLE \
+ /DEFAULTLIB:kernel32 hello.obj
+```
+
+Windows predefined macros emitted by `cc -target x86_64-windows`:
+`_WIN32`, `_WIN64`, `WIN32`, `__MINGW32__`, `__MINGW64__`, `_M_X64`,
+`_M_AMD64`. `aarch64-windows` substitutes `_M_ARM64` for the
+x64-specific names. `_MSC_VER` is deliberately not set — cfree targets
+the mingw flavor on Windows (DWARF debug info, mingwex CRT), not MSVC.
+
## Recommended next moves
1. **Add to `cc` first**: `-rdynamic`, `-print-search-dirs`,
diff --git a/doc/WINDOWS.md b/doc/WINDOWS.md
@@ -0,0 +1,408 @@
+# Windows / PE-COFF support
+
+This document describes the Windows target support in `cfree` as it
+exists now. It is no longer a bring-up plan: x64 and aarch64 PE/COFF
+object emission, PE executable linking, mingw import library ingestion,
+Windows ABI selection, and llvm-mingw UCRT hosted links are implemented.
+
+## Scope
+
+Supported targets:
+
+- `x86_64-windows`
+- `aarch64-windows`
+
+The Windows path is 64-bit only. i386 Win32 is out of scope because
+`cfree` has no 32-bit x86 backend.
+
+The intended hosted profile is mingw/llvm-mingw UCRT, not MSVC. cfree
+links against llvm-mingw's CRT and import archives and emits PE32+
+executables that import UCRT API-set DLLs and system DLLs such as
+`KERNEL32.dll`.
+
+Non-goals for the current Windows path:
+
+- SEH unwind metadata and C++ exception interop through cfree frames
+- `.pdata` / `.xdata` emission for cfree-generated functions
+- PDB, CodeView, windbg integration, and MSVC object/debug parity
+- ARM64EC ABI support
+- legacy MSVCRT as a separately selectable hosted profile
+
+## Current Status
+
+The implemented path can:
+
+- compile C to relocatable PE/COFF objects for x64 and aarch64
+- read and write COFF objects, including COMDAT, weak externals,
+ common symbols, section aux records, and per-arch relocations
+- link PE32+ executables directly with `cfree ld` / `cfree cc`
+- ingest mingw import archives and synthesize PE import tables
+- link llvm-mingw UCRT startup objects, CRT archives, and system import
+ libraries
+- run trivial x64 and aarch64 Windows executables under Wine through
+ Debian podman containers
+- select the Win64 x64 and Windows AArch64 ABI through the normal
+ `(arch, os)` ABI dispatch
+- emit Windows driver defaults such as `.obj`, `.exe`, Windows
+ predefined macros, subsystem selection, and sysroot library search
+
+Validated smoke coverage includes:
+
+- COFF round-trip: 22 hand-built ObjBuilder cases, byte-stable
+- PE import unit smoke: synthetic short import to linked `.exe`
+- PE import mingw smoke: real `libkernel32.a` to linked `.exe`
+- llvm-mingw UCRT hosted x64 and aarch64 console executables
+- x64 and aarch64 `windows.h` console and `-mwindows` GUI links
+- Debian podman + Wine execution for x64 and aarch64 return-code
+ propagation
+- x64 and aarch64 `Sleep` smoke execution through `KERNEL32.dll`
+- x64 and aarch64 `windows.h` coverage for handles, callback typedefs,
+ wide APIs, `winbase`, `processthreadsapi`, `synchapi`, `fileapi`,
+ `errhandlingapi`, `winuser`, inline helpers, and macro-heavy declarations
+- x64 and aarch64 Wine runtime coverage for `argc` / `argv` / `envp`,
+ stdout/stderr handles, heap allocation, file I/O, error codes, and
+ callback execution through `qsort`
+- x64 and aarch64 Wine runtime coverage for UCRT stdio entry points and
+ imported data reads through `__dcrt_initial_narrow_environment`
+- cfree-emitted TLS variables on x64 and aarch64, including PE TLS
+ directory presence and Wine runtime execution when matching Wine
+ containers are available
+- system-DLL coverage for `user32` + `gdi32` GUI links, `gdi32` drawing
+ via memory DC + stock objects, `advapi32` registry open/query,
+ `ws2_32` Winsock startup/socket/closesocket/cleanup, `ole32`
+ CoInitializeEx / CoUninitialize, `shell32` `CommandLineToArgvW`,
+ `comctl32` `InitCommonControls(Ex)`, and a mixed-member `libucrt.a`
+ case that pulls in both an `api-ms-win-crt-*` short-import and a
+ real `lib64_libmingwex_a-*.o` stdio helper — x64 and aarch64,
+ link-level imports verified via `cfree objdump -p` and exit code
+ checked under Debian podman Wine when the matching container is
+ available
+- ABI classifier tests for x64/aa64 Windows alongside Linux/macOS
+
+The remaining work is coverage and polish, not first-link bring-up. See
+the checklist at the end of this file.
+
+## Compile, Link, And Run Under Wine
+
+Use llvm-mingw UCRT for run-on-Wine validation on both architectures.
+The Homebrew `mingw-w64` x64 sysroot is still useful for object and
+import-library tests, but its legacy MSVCRT profile can import CRT entry
+points that Debian bookworm Wine does not implement
+(`msvcrt.dll.__acrt_iob_func`). Prefer llvm-mingw UCRT for executable
+runtime checks.
+
+On this host, `podman --arch ...` is the reliable way to select the
+container architecture. `--platform linux/amd64` has not consistently
+selected amd64 under the qemu-backed podman setup. The minimal Debian
+Wine package exposes the launcher as `/usr/lib/wine/wine64`.
+
+Build one trivial return-code executable per Windows target:
+
+```sh
+UCRT_ROOT=/private/tmp/llvm-mingw/llvm-mingw-20260519-ucrt-macos-universal
+
+cat >/tmp/ret7.c <<'SRC'
+int main(void) { return 7; }
+SRC
+
+build/cfree cc -target x86_64-windows \
+ --sysroot "$UCRT_ROOT/x86_64-w64-mingw32" \
+ /tmp/ret7.c -o build/test/ret7-x64-ucrt-windows.exe
+
+build/cfree cc -target aarch64-windows \
+ --sysroot "$UCRT_ROOT/aarch64-w64-mingw32" \
+ /tmp/ret7.c -o build/test/ret7-arm64-windows.exe
+```
+
+Build the Debian Wine containers once:
+
+```sh
+podman build --arch amd64 -t localhost/cfree-wine-amd64 - <<'EOF'
+FROM docker.io/library/debian:bookworm
+RUN apt-get update \
+ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends wine64 \
+ && rm -rf /var/lib/apt/lists/*
+EOF
+
+podman build --arch arm64 -t localhost/cfree-wine-arm64 - <<'EOF'
+FROM docker.io/library/debian:bookworm
+RUN apt-get update \
+ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends wine64 \
+ && rm -rf /var/lib/apt/lists/*
+EOF
+```
+
+Run the executables through Wine and assert the process exit code:
+
+```sh
+podman run --rm --arch amd64 -v "$PWD:/work:ro" \
+ localhost/cfree-wine-amd64 \
+ bash -lc '
+ export WINEDEBUG=-all WINEPREFIX=/tmp/wineprefix
+ /usr/lib/wine/wine64 /work/build/test/ret7-x64-ucrt-windows.exe
+ rc=$?
+ echo "x64 exit=$rc"
+ test "$rc" -eq 7
+ '
+
+podman run --rm --arch arm64 -v "$PWD:/work:ro" \
+ localhost/cfree-wine-arm64 \
+ bash -lc '
+ export WINEDEBUG=-all WINEPREFIX=/tmp/wineprefix
+ /usr/lib/wine/wine64 /work/build/test/ret7-arm64-windows.exe
+ rc=$?
+ echo "arm64 exit=$rc"
+ test "$rc" -eq 7
+ '
+```
+
+## Design
+
+### Target And Driver
+
+`driver/target.c` recognizes `x86_64-windows` and
+`aarch64-windows`, sets `CFREE_OS_WINDOWS`, and selects `CFREE_OBJ_COFF`.
+Windows targets use `.obj` for relocatable output and `.exe` for linked
+programs.
+
+The driver defines the mingw-style Windows macros:
+
+- `_WIN32`
+- `_WIN64`
+- `__MINGW32__`
+- `__MINGW64__`
+- `_M_X64` / `_M_AMD64` for x64
+- `_M_ARM64` for aarch64
+
+For `cfree cc --sysroot <mingw-target-sysroot>`, the driver adds the
+target sysroot library directory and links the mingw CRT startup and
+runtime archives around user objects. The hosted UCRT profile uses:
+
+- `crt2.o`
+- `crtbegin.o`
+- `libmingw32.a`
+- `libmoldname.a`
+- `libmingwex.a`
+- `libmsvcrt.a`
+- system import libraries such as `libkernel32.a`
+- `crtend.o`
+
+In llvm-mingw UCRT, `libmsvcrt.a` is a compatibility/import archive.
+The final PE should import `api-ms-win-crt-*.dll` API-set DLLs, not
+literal `msvcrt.dll` or `ucrt.dll`.
+
+The linker supports console and GUI subsystem selection through
+`ld --subsystem=windows`, `ld --ms-link-driver /SUBSYSTEM:WINDOWS`,
+`cc -mwindows`, and `cc -Wl,/SUBSYSTEM:WINDOWS`. Console is the default.
+
+### ABI And Code Generation
+
+The ABI vtable is selected by `(arch, os)`.
+
+x64 Windows uses the Win64 calling convention:
+
+- integer/pointer args in `RCX`, `RDX`, `R8`, `R9`
+- floating args in `XMM0`-`XMM3`
+- 32-byte caller shadow space
+- `RBX`, `RBP`, `RDI`, `RSI`, `RSP`, and `R12`-`R15` callee-saved
+- `XMM6`-`XMM15` callee-saved when used
+- varargs duplicate floating-point arguments into the paired integer
+ argument registers
+- `va_list` is pointer-shaped
+
+Large Win64 stack frames emit `__chkstk` probes. The prologue loads the
+frame size into `EAX`, calls `__chkstk`, then subtracts the probed size
+from `RSP`. mingw's `libmingwex.a` supplies the hosted symbol.
+
+AArch64 Windows mostly follows AAPCS64, with the Windows `va_list`
+layout handled through the target ABI path. Variadic functions use a
+pointer-shaped `va_list`; floating-point arguments to variadic functions
+are carried in integer argument slots, matching llvm-mingw/Clang and the
+UCRT stdio wrappers. cfree also accepts ARM64EC COFF machine values as
+AArch64 where the object encoding is identical; ARM64EC ABI interop is
+still out of scope.
+
+`long double` is 64-bit `double` on Windows. `__int128` follows the
+mingw/GCC split into two GPR slots on Win64 rather than MSVC's
+pass-by-reference rule.
+
+### PE/COFF Objects
+
+`emit_coff` and `read_coff` implement relocatable COFF object support
+through the normal `ObjBuilder` API.
+
+The object path handles:
+
+- file, section, symbol, auxiliary symbol, relocation, and string-table
+ wire records
+- AMD64 and ARM64 machine types
+- COMDAT section groups and SELECTANY-style deduplication
+- weak externals and mingw alias fallback
+- common symbols
+- COFF section characteristics and alignment
+- section-relative and architecture-specific relocations
+- long section names through the COFF string table
+- short import records and long-form mingw import members
+
+The reader preserves enough COFF-specific metadata in object extension
+fields for round-trip stability while normalizing the information the
+linker needs.
+
+### PE Linker
+
+`link_emit_image_writer` dispatches COFF targets to the PE writer. The
+PE writer emits:
+
+- DOS stub and PE/COFF headers
+- PE32+ optional header
+- loadable sections with Windows alignment
+- `.idata` import descriptors, ILT/IAT blocks, hint/name tables, and
+ per-DLL grouping
+- per-architecture import call stubs
+- `.reloc` base relocation blocks for absolute addresses
+- TLS directory records and relocations for the directory fields
+- subsystem and entry-point selection
+- image identifiers through the shared `link_image_id_compute` path
+
+The default console entry point resolves to `mainCRTStartup`. GUI links
+default to `WinMainCRTStartup` when the subsystem is Windows GUI.
+
+### Imports, DLLs, And IAT
+
+COFF has no ELF-style GOT/PLT model. cfree emits direct references in
+the object and the linker resolves imported functions through IAT
+slots and import stubs.
+
+The import reader handles both:
+
+- short import records (`Sig1=0`, `Sig2=0xffff`)
+- long-form mingw import archive members such as those in
+ `libkernel32.a`
+
+Archive ingestion classifies import members as DSO shims, preserves
+per-member DLL names, and skips head/trailer members. The PE linker then
+builds one import descriptor per DLL.
+
+Imported data aliases such as `__imp_<name>` are object-like IAT data
+slots, not callable function imports. The PE hint/name table strips the
+`__imp_` prefix only for the exported symbol name.
+
+`read_coff_dso` can walk raw PE DLL export directories for named
+exports. Forwarder ENT entries (EAT RVA inside the export directory's
+own range, contents `OTHERDLL.OtherSym`) are surfaced as defined
+symbols so the linker can satisfy imports against them; cfree does
+not chase the chain at link time — the OS loader follows it at
+runtime, which is how `api-ms-win-crt-*.dll` resolves to
+`ucrtbase.dll`. The contract is pinned by
+`test/coff/pe-dso-forwarder.c`. Ordinal-only exports (entries present
+in the EAT but absent from the ENT) and ordinal-only short imports
+(`NameType=IMPORT_OBJECT_ORDINAL` in a short-import archive member)
+are not yet implemented: the latter fails with a clean diagnostic
+naming the offending archive member and ordinal rather than an
+internal panic. No mingw / llvm-mingw sysroot archive on the
+supported targets uses either shape.
+
+Mixed-member archives — where one `.a` file contains both short-import
+members and full long-form COFF object members — are ingested in a
+single pass: each member is classified independently, short-import
+records route through `read_coff_short_import` and become DSO inputs
+keyed by the embedded DLL name, while long-form members fall through
+to `read_coff` as regular objects. `libucrt.a` uses exactly this shape
+(`api-ms-win-crt-*.dll` short imports alongside
+`lib64_libucrt_extra_a-*.o` helpers). The composition is pinned by
+`test/coff/pe-mixed-archive.c`.
+
+### TLS
+
+COFF TLS data is materialized into `.tls$` sections. Code generation
+uses Windows TLS access:
+
+- x64: `gs:[0x58] + _tls_index * 8 + SECREL(sym)`
+- aarch64: `x18` (TEB), then the Windows TLS slot at `+0x58`, then
+ `_tls_index * 8 + SECREL(sym)`
+
+The PE writer emits a TLS directory in `.rdata` and base relocations for
+the directory's absolute fields. The optional-header TLS data directory
+and the mingw-visible `_tls_used` symbol both name that same record. The
+hosted UCRT smoke compiles and runs cfree-emitted TLS variables on both
+x64 and aarch64 under Wine when the matching podman images are
+installed.
+
+### Tooling
+
+Windows support is wired into the existing tools:
+
+- `objdump -p` prints PE image headers, data directories, and imports
+- `objdump -h` decodes raw `IMAGE_SECTION_HEADER.Characteristics` into
+ GNU-objdump-style tags (`LINK_ONCE`, `DISCARDABLE`, `LINK_REMOVE`,
+ `SHARED`, `GPREL`, ...) for both COFF .obj inputs and PE images
+- `objdump -f` summarizes architecture, format, section/symbol counts,
+ and (for PE images) image base / entry point / subsystem
+- `objdump -h` also prints COMDAT group membership immediately after
+ the section table
+- `objcopy` and `strip` accept COFF inputs
+- object detection distinguishes COFF objects from PE images
+- `ld --ms-link-driver` accepts common MS-link spellings such as
+ `/OUT`, `/ENTRY`, `/LIBPATH`, `/DEFAULTLIB`, and `/SUBSYSTEM`
+
+## Test Expectations
+
+Current test layers:
+
+- **COFF unit**: hand-built ObjBuilder to emit/read round-trip,
+ byte-stable
+- **mingw fixtures**: mingw-built `.obj` inputs read and re-emitted
+- **cfree codegen**: `cfree -target windows -c` objects linked by
+ external mingw tools where useful
+- **cfree linker**: `cfree cc` / `cfree ld` emits PE executables
+- **Wine execution**: produced `.exe` files run under Debian podman
+ Wine containers for amd64 and arm64
+- **bad inputs**: malformed PE/COFF inputs should diagnose cleanly
+- **header ingestion**: `cfree cc` against llvm-mingw headers
+- **DLL/import reader**: raw PE DLL and import-library absorption
+
+The harness should skip Windows-target tests with a clear
+`SKIP: no mingw` message when the required toolchain is unavailable.
+Set `CFREE_TEST_HAS_MINGW=1` to require mingw/llvm-mingw test inputs.
+Wine execution should remain gated so normal local test runs do not
+require podman or Wine.
+
+## Remaining Checklist
+
+- [x] Broaden `windows.h` coverage beyond the current trivial smoke.
+ Add focused cases for handles, structs, callback typedefs, wide
+ APIs, selected `winbase`, `processthreadsapi`, `synchapi`,
+ `fileapi`, `errhandlingapi`, `winuser`, inline helpers, and
+ macro-heavy declarations.
+- [x] Broaden runtime execution under Wine on both x64 and aarch64.
+ Covered `argc` / `argv` / `envp`, stdout/stderr handles, heap
+ allocation, file I/O, error codes, callbacks, GUI `WinMain` links,
+ and cfree-emitted TLS variables.
+- [x] Continue broadening runtime execution under Wine for UCRT stdio
+ on aarch64 and imported data reads.
+- [x] Fix aarch64 Windows variadic UCRT stdio calls such as
+ `printf("x:%d\n", 42)` and floating-point printf arguments.
+ cfree now matches llvm-mingw/Clang argument lowering; Debian Wine
+ 8.0's aarch64 UCRT still crashes on formatted `printf` arguments
+ for clang-built binaries too, so Wine runtime coverage remains on
+ non-formatted UCRT stdio calls there.
+- [x] Add runtime tests for cfree-emitted TLS variables on both
+ architectures.
+- [x] Finish `_tls_used` symbol synthesis for the PE TLS contract.
+- [x] Expand DLL/import-library coverage for forwarded exports,
+ ordinal-only exports, mixed import-library members, and larger
+ system libraries. Forwarders pinned by
+ `test/coff/pe-dso-forwarder.c`; ordinal-only short imports
+ currently diagnose cleanly rather than panic; mixed-member
+ archives covered by `test/coff/pe-mixed-archive.c`; broader
+ system-DLL link + Wine coverage in
+ `test/coff/windows-system-dlls-smoke.sh` (user32/gdi32,
+ advapi32, ws2_32, ole32, shell32, comctl32, mixed UCRT).
+- [ ] Implement ordinal-only short imports if a real consumer surfaces
+ (today the path is a clean diagnostic, not a panic).
+- [ ] Optionally walk forwarder chains at link time (today the OS
+ loader chases them at runtime).
+- [x] Expand x64 hosted `windows.h` execution coverage to match the
+ aarch64 `Sleep` and GUI subsystem smokes.
+- [x] Optionally expose richer COFF section characteristics in
+ `objdump` output.
diff --git a/driver/cc.c b/driver/cc.c
@@ -125,6 +125,7 @@ typedef struct CcOptions {
size_t owned_output_path_size;
const char* entry; /* -e */
const char* linker_script; /* -T path */
+ uint16_t pe_subsystem; /* CfreePeSubsystem */
const char* sysroot; /* --sysroot / -isysroot */
const char* support_dir; /* --support-dir */
int probe_kind; /* CcProbeKind */
@@ -163,9 +164,14 @@ typedef struct CcOptions {
uint32_t narchives;
CcDsoInput* dsos;
uint32_t ndsos;
- /* -L search paths (argv-borrowed). */
+ /* -L search paths (argv-borrowed; last slot may be owned, see
+ * owned_sysroot_lib_dir). */
const char** lib_search_paths;
uint32_t nlib_search_paths;
+ /* Owned `<sysroot>/lib` slot appended for Windows targets when a
+ * sysroot is in effect (cmdline or CFREE_MINGW_SYSROOT). */
+ char* owned_sysroot_lib_dir;
+ size_t owned_sysroot_lib_dir_size;
/* Pending -l names (resolved at end-of-parse). */
CcPendingLib* pending_libs;
uint32_t npending_libs;
@@ -298,6 +304,9 @@ static void cc_options_release(CcOptions* o) {
driver_free(o->env, o->build_id_bytes, o->build_id_len);
if (o->owned_output_path)
driver_free(o->env, o->owned_output_path, o->owned_output_path_size);
+ if (o->owned_sysroot_lib_dir)
+ driver_free(o->env, o->owned_sysroot_lib_dir,
+ o->owned_sysroot_lib_dir_size);
driver_hosted_plan_fini(o->env, &o->hosted);
driver_cflags_fini(&o->cf, o->env);
driver_free(o->env, o->source_files, bound * sizeof(*o->source_files));
@@ -330,19 +339,51 @@ static char* cc_dup_span(DriverEnv* env, const char* s, size_t n) {
static int cc_record_build_id(CcOptions* o, const char* val);
static int cc_apply_hosted_profile(CcOptions* o);
+static int cc_subsystem_value_eq(const char* val, size_t n,
+ const char* want) {
+ size_t i;
+ for (i = 0; want[i]; ++i) {
+ char a;
+ char b;
+ if (i >= n) return 0;
+ a = val[i];
+ b = want[i];
+ if (a >= 'a' && a <= 'z') a = (char)(a - 'a' + 'A');
+ if (b >= 'a' && b <= 'z') b = (char)(b - 'a' + 'A');
+ if (a != b) return 0;
+ }
+ return i == n || val[i] == ',';
+}
+
+static int cc_record_pe_subsystem(CcOptions* o, const char* val, size_t n) {
+ if (cc_subsystem_value_eq(val, n, "CONSOLE") ||
+ cc_subsystem_value_eq(val, n, "CUI")) {
+ o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_CUI;
+ return 0;
+ }
+ if (cc_subsystem_value_eq(val, n, "WINDOWS") ||
+ cc_subsystem_value_eq(val, n, "GUI")) {
+ o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_GUI;
+ return 0;
+ }
+ driver_errf(CC_TOOL, "unsupported subsystem: %.*s", (int)n, val);
+ return 1;
+}
+
/* Parse a single GCC-style -Wl,X[,Y...] pass-through argument. */
static int cc_record_wl(CcOptions* o, const char* arg) {
const char* p = arg;
int expect_rpath = 0;
int expect_soname = 0;
int expect_interp = 0;
+ int expect_subsystem = 0;
while (*p) {
const char* tok = p;
size_t n = 0;
while (p[n] && p[n] != ',') ++n;
p = tok + n + (tok[n] == ',' ? 1 : 0);
- if (expect_rpath || expect_soname || expect_interp) {
+ if (expect_rpath || expect_soname || expect_interp || expect_subsystem) {
char* buf = cc_dup_span(o->env, tok, n);
if (!buf) {
driver_errf(CC_TOOL, "out of memory");
@@ -351,7 +392,12 @@ static int cc_record_wl(CcOptions* o, const char* arg) {
if (expect_rpath) o->rpaths[o->nrpaths++] = buf;
if (expect_soname) o->soname = buf;
if (expect_interp) o->interp_path = buf;
- expect_rpath = expect_soname = expect_interp = 0;
+ if (expect_subsystem) {
+ int rc = cc_record_pe_subsystem(o, buf, driver_strlen(buf));
+ driver_free(o->env, buf, n + 1u);
+ if (rc != 0) return 1;
+ }
+ expect_rpath = expect_soname = expect_interp = expect_subsystem = 0;
continue;
}
@@ -433,11 +479,23 @@ static int cc_record_wl(CcOptions* o, const char* arg) {
o->build_id_mode = CFREE_BUILDID_SHA256;
continue;
}
+ if (n >= 12 && driver_strneq(tok, "--subsystem=", 12)) {
+ if (cc_record_pe_subsystem(o, tok + 12, n - 12) != 0) return 1;
+ continue;
+ }
+ if (n == 11 && driver_strneq(tok, "--subsystem", 11)) {
+ expect_subsystem = 1;
+ continue;
+ }
+ if (n >= 11 && driver_strneq(tok, "/SUBSYSTEM:", 11)) {
+ if (cc_record_pe_subsystem(o, tok + 11, n - 11) != 0) return 1;
+ continue;
+ }
driver_errf(CC_TOOL, "unsupported -Wl, token: %.*s", (int)n, tok);
return 1;
}
- if (expect_rpath || expect_soname || expect_interp) {
+ if (expect_rpath || expect_soname || expect_interp || expect_subsystem) {
driver_errf(CC_TOOL, "-Wl option requires another comma argument");
return 1;
}
@@ -779,8 +837,12 @@ static int cc_resolve_pending_libs(CcOptions* o) {
LibResolveMode mode = (o->static_link || pl->link_mode == CFREE_LM_STATIC)
? LIB_RESOLVE_STATIC_ONLY
: LIB_RESOLVE_DYNAMIC_PREFER;
- if (driver_lib_resolve(o->env, pl->name, mode, o->lib_search_paths,
- o->nlib_search_paths, &p, &sz, &kind) != 0) {
+ LibResolveOS resolve_os = (o->target.os == CFREE_OS_WINDOWS)
+ ? LIB_RESOLVE_OS_WINDOWS
+ : LIB_RESOLVE_OS_POSIX;
+ if (driver_lib_resolve_for_os(o->env, pl->name, mode, resolve_os,
+ o->lib_search_paths, o->nlib_search_paths,
+ &p, &sz, &kind) != 0) {
driver_errf(CC_TOOL, "library not found: -l%s", pl->name);
return 1;
}
@@ -865,6 +927,75 @@ static int cc_apply_env(CcOptions* o) {
return 0;
}
+/* Append a default `<sysroot>/lib` to the library search path for
+ * Windows targets. The llvm-mingw UCRT sysroot ships import archives
+ * such as libkernel32.a, libmsvcrt.a, and the UCRT API-set archives
+ * under <sysroot>/lib; the user-supplied -L list is searched first,
+ * then this appended default. In this profile libmsvcrt.a is the
+ * UCRT-flavoured mingw compatibility archive, not a request to import
+ * literal msvcrt.dll. Sysroot resolution order:
+ * 1. -isysroot / --sysroot on the command line (already in
+ * o->sysroot at this point);
+ * 2. CFREE_MINGW_SYSROOT env var (e.g. .../x86_64-w64-mingw32).
+ *
+ * No-op for non-Windows targets and for Windows when neither source
+ * provides a sysroot — keeps existing tests untouched. The appended
+ * path aliases the sysroot string for its lifetime; o->sysroot is
+ * either argv-borrowed or env-borrowed, both stable across the
+ * driver run, so the lib_search_paths slot remains valid. */
+static int cc_append_windows_lib_dirs(CcOptions* o) {
+ const char* sysroot = o->sysroot;
+ char* joined = NULL;
+ size_t srlen;
+ size_t need_slash;
+ size_t bytes;
+ size_t off = 0;
+ if (o->target.os != CFREE_OS_WINDOWS) return 0;
+ if (!sysroot || !sysroot[0]) {
+ sysroot = driver_getenv("CFREE_MINGW_SYSROOT");
+ if (!sysroot || !sysroot[0]) return 0;
+ o->sysroot = sysroot;
+ }
+ srlen = driver_strlen(sysroot);
+ need_slash = (srlen > 0 && sysroot[srlen - 1] != '/') ? 1u : 0u;
+ /* "<sysroot>" + "/"? + "lib" + NUL */
+ bytes = srlen + need_slash + 3u + 1u;
+ joined = driver_alloc(o->env, bytes);
+ if (!joined) {
+ driver_errf(CC_TOOL, "out of memory");
+ return 1;
+ }
+ driver_memcpy(joined + off, sysroot, srlen);
+ off += srlen;
+ if (need_slash) joined[off++] = '/';
+ driver_memcpy(joined + off, "lib", 3);
+ off += 3;
+ joined[off] = '\0';
+ if (o->owned_sysroot_lib_dir) {
+ driver_free(o->env, o->owned_sysroot_lib_dir,
+ o->owned_sysroot_lib_dir_size);
+ }
+ o->owned_sysroot_lib_dir = joined;
+ o->owned_sysroot_lib_dir_size = bytes;
+ o->lib_search_paths[o->nlib_search_paths++] = joined;
+ return 0;
+}
+
+static int cc_has_link_action(const CcOptions* o) {
+ return !o->compile_only && !o->preprocess_only && !o->dump_tokens &&
+ o->dep_mode != CC_DEP_M && o->dep_mode != CC_DEP_MM;
+}
+
+static void cc_apply_default_hosted_profile(CcOptions* o) {
+ if (o->target.os != CFREE_OS_WINDOWS || o->target.obj != CFREE_OBJ_COFF)
+ return;
+ if (o->no_stdlib || o->no_defaultlibs || o->wants_hosted_libc) return;
+ if (!o->sysroot || !o->sysroot[0]) return;
+ if (!cc_has_link_action(o) && o->nsource_files + o->nsource_memory == 0)
+ return;
+ o->wants_hosted_libc = 1;
+}
+
static char* cc_dep_default_target(DriverEnv* env, const CcOptions* o,
size_t* out_size);
@@ -1131,6 +1262,14 @@ static int cc_parse(int argc, char** argv, CcOptions* o) {
if (o->target.pic == CFREE_PIC_NONE) o->target.pic = CFREE_PIC_PIC;
continue;
}
+ if (driver_streq(a, "-mwindows")) {
+ o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_GUI;
+ continue;
+ }
+ if (driver_streq(a, "-mconsole")) {
+ o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_CUI;
+ continue;
+ }
if (driver_strneq(a, "-Wl,", 4)) {
if (cc_record_wl(o, a + 4) != 0) return 1;
continue;
@@ -1339,6 +1478,7 @@ static int cc_parse(int argc, char** argv, CcOptions* o) {
if (o->probe_kind != CC_PROBE_NONE) return 0;
if (cc_apply_env(o) != 0) return 1;
+ if (cc_append_windows_lib_dirs(o) != 0) return 1;
if (cc_resolve_pending_libs(o) != 0) return 1;
{
@@ -1432,11 +1572,13 @@ static int cc_parse(int argc, char** argv, CcOptions* o) {
} else if (o->preprocess_only) {
/* stdout */
} else {
- o->output_path = "a.out";
+ o->output_path =
+ (o->target.os == CFREE_OS_WINDOWS) ? "a.exe" : "a.out";
}
}
}
}
+ cc_apply_default_hosted_profile(o);
if (cc_apply_hosted_profile(o) != 0) return 1;
return 0;
}
@@ -1661,22 +1803,32 @@ static char* cc_dep_default_target(DriverEnv* env, const CcOptions* o,
}
}
{
+ int win = (o && o->target.os == CFREE_OS_WINDOWS);
+ size_t ext_len = win ? 4u : 2u;
+ const char* ext = win ? ".obj" : ".o";
size_t name_len = dot - slash;
- size_t bufsz = name_len + 3;
+ size_t bufsz = name_len + ext_len + 1u;
buf = driver_alloc(env, bufsz);
if (!buf) return NULL;
driver_memcpy(buf, src + slash, name_len);
- buf[name_len] = '.';
- buf[name_len + 1] = 'o';
- buf[name_len + 2] = '\0';
+ driver_memcpy(buf + name_len, ext, ext_len);
+ buf[name_len + ext_len] = '\0';
*out_size = bufsz;
return buf;
}
}
}
-static char* cc_default_obj_path_for_name(DriverEnv* env, const char* src,
+static char* cc_default_obj_path_for_name(DriverEnv* env,
+ const CcOptions* o, const char* src,
size_t* out_size) {
+ /* Windows targets default to a `.obj` suffix; everyone else `.o`.
+ * Drivers accept both spellings as inputs (driver/inputs.c), but
+ * tooling that scrapes default outputs expects the canonical
+ * platform extension. */
+ int win = (o && o->target.os == CFREE_OS_WINDOWS);
+ size_t ext_len = win ? 4u : 2u; /* ".obj" or ".o" */
+ const char* ext = win ? ".obj" : ".o";
size_t srclen = driver_strlen(src);
size_t dot = srclen;
size_t slash = 0;
@@ -1697,13 +1849,12 @@ static char* cc_default_obj_path_for_name(DriverEnv* env, const char* src,
}
{
size_t name_len = dot - slash;
- size_t bufsz = name_len + 3;
+ size_t bufsz = name_len + ext_len + 1u;
buf = driver_alloc(env, bufsz);
if (!buf) return NULL;
driver_memcpy(buf, src + slash, name_len);
- buf[name_len] = '.';
- buf[name_len + 1] = 'o';
- buf[name_len + 2] = '\0';
+ driver_memcpy(buf + name_len, ext, ext_len);
+ buf[name_len + ext_len] = '\0';
*out_size = bufsz;
return buf;
}
@@ -2031,7 +2182,7 @@ static int cc_run_compile_objs(DriverEnv* env, const CcOptions* o,
for (i = 0; i < o->nsource_files; ++i) {
size_t out_size = 0;
char* out =
- cc_default_obj_path_for_name(env, o->source_files[i], &out_size);
+ cc_default_obj_path_for_name(env, o, o->source_files[i], &out_size);
int rc;
if (!out) {
driver_errf(CC_TOOL, "out of memory");
@@ -2204,6 +2355,7 @@ static int cc_run_link_exe(DriverEnv* env, const CcOptions* o,
lopts.build_id_len = o->build_id_len;
lopts.gc_sections = o->gc_sections;
lopts.pie = o->pie;
+ lopts.pe_subsystem = o->pe_subsystem;
lopts.interp_path = o->interp_path;
lopts.soname = o->soname;
if (o->new_dtags) {
diff --git a/driver/hosted.c b/driver/hosted.c
@@ -371,6 +371,73 @@ static int hosted_resolve_linux(const DriverHostedRequest* req,
return 1;
}
+static int hosted_resolve_windows_mingw(const DriverHostedRequest* req,
+ DriverHostedPlan* plan) {
+ if (!req->sysroot || !req->sysroot[0]) {
+ driver_errf(req->tool, "Windows hosted profile requires --sysroot");
+ return 1;
+ }
+ plan->profile_name = "windows-mingw";
+ if (hosted_add_existing_include(plan, req->env, req->sysroot, "include") !=
+ 0) {
+ driver_errf(req->tool, "out of memory");
+ return 1;
+ }
+ if (!req->link_inputs) return 0;
+ if (hosted_add_required(plan->before, &plan->nbefore,
+ DRIVER_HOSTED_MAX_BEFORE, req, req->sysroot,
+ "lib/crt2.o", DRIVER_HOSTED_INPUT_OBJECT) != 0 ||
+ hosted_add_required(plan->before, &plan->nbefore,
+ DRIVER_HOSTED_MAX_BEFORE, req, req->sysroot,
+ "lib/crtbegin.o", DRIVER_HOSTED_INPUT_OBJECT) != 0)
+ return 1;
+ if (hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libmingw32.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libmoldname.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libmingwex.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libmsvcrt.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libadvapi32.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libshell32.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libuser32.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libkernel32.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libmingw32.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libmoldname.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libmingwex.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libmsvcrt.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0 ||
+ hosted_add_required(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER,
+ req, req->sysroot, "lib/libkernel32.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0)
+ return 1;
+ if (hosted_add_required(plan->final, &plan->nfinal, DRIVER_HOSTED_MAX_FINAL,
+ req, req->sysroot, "lib/crtend.o",
+ DRIVER_HOSTED_INPUT_OBJECT) != 0)
+ return 1;
+ return 0;
+}
+
int driver_hosted_resolve(const DriverHostedRequest* req,
DriverHostedPlan* out) {
DriverHostedPlan zero = {0};
@@ -382,6 +449,9 @@ int driver_hosted_resolve(const DriverHostedRequest* req,
} else if (req->target.os == CFREE_OS_LINUX &&
req->target.obj == CFREE_OBJ_ELF) {
rc = hosted_resolve_linux(req, out);
+ } else if (req->target.os == CFREE_OS_WINDOWS &&
+ req->target.obj == CFREE_OBJ_COFF) {
+ rc = hosted_resolve_windows_mingw(req, out);
} else {
driver_errf(req->tool, "no hosted libc profile for target");
rc = 1;
diff --git a/driver/hosted.h b/driver/hosted.h
@@ -22,7 +22,7 @@ typedef struct DriverHostedInput {
} DriverHostedInput;
#define DRIVER_HOSTED_MAX_BEFORE 4
-#define DRIVER_HOSTED_MAX_AFTER 6
+#define DRIVER_HOSTED_MAX_AFTER 16
#define DRIVER_HOSTED_MAX_FINAL 2
#define DRIVER_HOSTED_MAX_INCLUDES 4
#define DRIVER_HOSTED_MAX_DEFINES 20
diff --git a/driver/ld.c b/driver/ld.c
@@ -74,6 +74,7 @@ typedef struct LdOptions {
int output_seen;
const char* entry; /* -e */
const char* script_path; /* -T */
+ uint16_t pe_subsystem; /* CfreePeSubsystem */
/* PT_INTERP path. NULL means "let libcfree pick the target default
* (e.g. /lib/ld-musl-aarch64.so.1)". Set by -dynamic-linker /
* --dynamic-linker. */
@@ -121,6 +122,10 @@ typedef struct LdOptions {
uint8_t cur_link_mode; /* CfreeLinkMode for following inputs */
uint8_t cur_group_id; /* 0 outside any --start-group */
uint8_t next_group_id; /* increments on --start-group */
+
+ /* Opt-in: treat `/...` arguments as MSVC link.exe flags. Off by
+ * default so legacy paths like `/usr/lib/foo.o` remain inputs. */
+ int ms_link_driver;
} LdOptions;
static void ld_usage(void) {
@@ -154,6 +159,7 @@ void driver_help_ld(void) {
"\n"
"ENTRY / SCRIPT\n"
" -e SYMBOL Entry symbol\n"
+ " --subsystem NAME PE subsystem: console or windows\n"
" -T SCRIPT.ld Use a linker script (parsed, not raw)\n"
"\n"
"TARGET\n"
@@ -365,15 +371,171 @@ static const char* arg_eq_value(const char* arg, const char* prefix) {
return arg + n + 1;
}
+/* Compare an MSVC-style flag against `arg`, case-insensitive on the
+ * key. MSVC accepts both `/` and `-` as the lead char and is
+ * case-insensitive in the key part — we accept `/KEY:val`, `/key:val`,
+ * `-KEY:val`. Returns the tail past the colon, or NULL on mismatch. */
+static const char* ms_flag_value(const char* arg, const char* key) {
+ size_t klen = driver_strlen(key);
+ size_t i;
+ if (arg[0] != '/' && arg[0] != '-') return NULL;
+ for (i = 0; i < klen; ++i) {
+ char a = arg[1 + i];
+ char k = key[i];
+ if (a >= 'a' && a <= 'z') a = (char)(a - 'a' + 'A');
+ if (k >= 'a' && k <= 'z') k = (char)(k - 'a' + 'A');
+ if (a != k) return NULL;
+ }
+ if (arg[1 + klen] != ':') return NULL;
+ return arg + 1 + klen + 1;
+}
+
+/* Same shape as ms_flag_value but for bare flags (no `:value`). */
+static int ms_flag_match(const char* arg, const char* key) {
+ size_t klen = driver_strlen(key);
+ size_t i;
+ if (arg[0] != '/' && arg[0] != '-') return 0;
+ for (i = 0; i < klen; ++i) {
+ char a = arg[1 + i];
+ char k = key[i];
+ if (a >= 'a' && a <= 'z') a = (char)(a - 'a' + 'A');
+ if (k >= 'a' && k <= 'z') k = (char)(k - 'a' + 'A');
+ if (a != k) return 0;
+ }
+ return arg[1 + klen] == '\0';
+}
+
+static int ld_subsystem_value_eq(const char* val, const char* want) {
+ size_t i;
+ for (i = 0; want[i]; ++i) {
+ char a = val[i];
+ char b = want[i];
+ if (a >= 'a' && a <= 'z') a = (char)(a - 'a' + 'A');
+ if (b >= 'a' && b <= 'z') b = (char)(b - 'a' + 'A');
+ if (a != b) return 0;
+ }
+ return val[i] == '\0' || val[i] == ',';
+}
+
+static int ld_parse_pe_subsystem(LdOptions* o, const char* val) {
+ if (ld_subsystem_value_eq(val, "CONSOLE") ||
+ ld_subsystem_value_eq(val, "CUI")) {
+ o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_CUI;
+ return 0;
+ }
+ if (ld_subsystem_value_eq(val, "WINDOWS") ||
+ ld_subsystem_value_eq(val, "GUI")) {
+ o->pe_subsystem = CFREE_PE_SUBSYSTEM_WINDOWS_GUI;
+ return 0;
+ }
+ driver_errf(LD_TOOL, "unsupported subsystem: %s", val);
+ return 1;
+}
+
+/* Parse one MSVC-style argument. Recognized subset (others warn and
+ * skip — match-but-no-op so legacy build scripts pass cleanly):
+ * /OUT:path → o->output_path
+ * /ENTRY:sym → o->entry
+ * /SUBSYSTEM:CONSOLE|WINDOWS → PE optional-header subsystem; WINDOWS
+ * also defaults entry to WinMainCRTStartup.
+ * /DEFAULTLIB:name → equivalent to -l<name>; resolved
+ * lazily in the same path as -l.
+ * /LIBPATH:dir → equivalent to -L dir
+ *
+ * Returns 1 if consumed, 0 if not a recognized MS flag (caller falls
+ * through to its existing behaviour), -1 on hard error. */
+static int ld_try_ms_flag(LdOptions* o, const char* a) {
+ const char* val;
+ if (!o->ms_link_driver) return 0;
+ if (a[0] != '/' && a[0] != '-') return 0;
+
+ if ((val = ms_flag_value(a, "OUT")) != NULL) {
+ if (o->output_seen) {
+ driver_errf(LD_TOOL, "/OUT specified after -o");
+ return -1;
+ }
+ o->output_path = val;
+ o->output_seen = 1;
+ return 1;
+ }
+ if ((val = ms_flag_value(a, "ENTRY")) != NULL) {
+ o->entry = val;
+ return 1;
+ }
+ if ((val = ms_flag_value(a, "LIBPATH")) != NULL) {
+ o->lib_dirs[o->nlib_dirs++] = val;
+ return 1;
+ }
+ if ((val = ms_flag_value(a, "DEFAULTLIB")) != NULL) {
+ /* Resolve eagerly like -l does, using whatever current link-mode
+ * state is in effect. Windows mode triggers the .lib/.dll.a/.a
+ * suffix list. */
+ char* resolved;
+ size_t resolved_size;
+ LibResolveKind kind;
+ LibResolveMode mode = (o->cur_link_mode == CFREE_LM_STATIC)
+ ? LIB_RESOLVE_STATIC_ONLY
+ : LIB_RESOLVE_DYNAMIC_PREFER;
+ if (driver_lib_resolve_for_os(o->env, val, mode, LIB_RESOLVE_OS_WINDOWS,
+ o->lib_dirs, o->nlib_dirs, &resolved,
+ &resolved_size, &kind) != 0) {
+ driver_errf(LD_TOOL, "/DEFAULTLIB: cannot find %s", val);
+ return -1;
+ }
+ if (kind == LIB_RESOLVE_KIND_SHARED || kind == LIB_RESOLVE_KIND_TBD) {
+ ld_push_dso(o, resolved, 1, resolved_size);
+ } else {
+ ld_push_archive(o, resolved, 1, resolved_size);
+ }
+ return 1;
+ }
+ if ((val = ms_flag_value(a, "SUBSYSTEM")) != NULL) {
+ if (ld_parse_pe_subsystem(o, val) != 0) return -1;
+ return 1;
+ }
+ if (ms_flag_match(a, "NOLOGO") || ms_flag_match(a, "VERBOSE") ||
+ ms_flag_match(a, "INCREMENTAL") || ms_flag_match(a, "DEBUG") ||
+ ms_flag_match(a, "DYNAMICBASE") || ms_flag_match(a, "NXCOMPAT")) {
+ /* Common flags every Windows build script sets; silently accept. */
+ return 1;
+ }
+
+ /* Any other `/key[:val]` shape under --ms-link-driver: warn + skip.
+ * We treat the entire arg as consumed so it doesn't fall through to
+ * the positional path and try to open a file. */
+ driver_errf(LD_TOOL, "ignoring unsupported MS-style flag: %s", a);
+ return 1;
+}
+
static int ld_parse(int argc, char** argv, LdOptions* o) {
int i;
if (ld_alloc_arrays(o, argc) != 0) return 1;
o->target = driver_host_target();
+ /* First pass: detect --ms-link-driver up front so the option can
+ * appear anywhere on the command line and still affect earlier
+ * `/...` tokens. (Matches how `link.exe` treats option order as
+ * non-positional.) */
+ for (i = 1; i < argc; ++i) {
+ if (driver_streq(argv[i], "--ms-link-driver")) {
+ o->ms_link_driver = 1;
+ break;
+ }
+ }
+
for (i = 1; i < argc; ++i) {
const char* a = argv[i];
const char* val;
+ int ms_rc;
+
+ if (driver_streq(a, "--ms-link-driver")) {
+ o->ms_link_driver = 1;
+ continue;
+ }
+ ms_rc = ld_try_ms_flag(o, a);
+ if (ms_rc < 0) return 1;
+ if (ms_rc > 0) continue;
if (driver_streq(a, "-o")) {
if (++i >= argc) {
@@ -430,6 +592,18 @@ static int ld_parse(int argc, char** argv, LdOptions* o) {
o->entry = argv[i];
continue;
}
+ if ((val = arg_eq_value(a, "--subsystem")) != NULL) {
+ if (ld_parse_pe_subsystem(o, val) != 0) return 1;
+ continue;
+ }
+ if (driver_streq(a, "--subsystem")) {
+ if (++i >= argc) {
+ driver_errf(LD_TOOL, "--subsystem requires an argument");
+ return 1;
+ }
+ if (ld_parse_pe_subsystem(o, argv[i]) != 0) return 1;
+ continue;
+ }
if (driver_streq(a, "-T")) {
if (++i >= argc) {
driver_errf(LD_TOOL, "-T requires an argument");
@@ -479,6 +653,7 @@ static int ld_parse(int argc, char** argv, LdOptions* o) {
size_t resolved_size;
LibResolveKind kind;
LibResolveMode mode;
+ LibResolveOS resolve_os;
if (!name) {
driver_errf(LD_TOOL, "-l requires an argument");
return 1;
@@ -487,8 +662,11 @@ static int ld_parse(int argc, char** argv, LdOptions* o) {
* -Bdynamic, --as-needed) prefers .so but falls back to .a. */
mode = (o->cur_link_mode == CFREE_LM_STATIC) ? LIB_RESOLVE_STATIC_ONLY
: LIB_RESOLVE_DYNAMIC_PREFER;
- if (driver_lib_resolve(o->env, name, mode, o->lib_dirs, o->nlib_dirs,
- &resolved, &resolved_size, &kind) != 0) {
+ resolve_os = (o->target.os == CFREE_OS_WINDOWS) ? LIB_RESOLVE_OS_WINDOWS
+ : LIB_RESOLVE_OS_POSIX;
+ if (driver_lib_resolve_for_os(o->env, name, mode, resolve_os, o->lib_dirs,
+ o->nlib_dirs, &resolved, &resolved_size,
+ &kind) != 0) {
driver_errf(LD_TOOL, "cannot find -l%s", name);
return 1;
}
@@ -506,8 +684,12 @@ static int ld_parse(int argc, char** argv, LdOptions* o) {
LibResolveMode mode = (o->cur_link_mode == CFREE_LM_STATIC)
? LIB_RESOLVE_STATIC_ONLY
: LIB_RESOLVE_DYNAMIC_PREFER;
- if (driver_lib_resolve(o->env, val, mode, o->lib_dirs, o->nlib_dirs,
- &resolved, &resolved_size, &kind) != 0) {
+ LibResolveOS resolve_os = (o->target.os == CFREE_OS_WINDOWS)
+ ? LIB_RESOLVE_OS_WINDOWS
+ : LIB_RESOLVE_OS_POSIX;
+ if (driver_lib_resolve_for_os(o->env, val, mode, resolve_os, o->lib_dirs,
+ o->nlib_dirs, &resolved, &resolved_size,
+ &kind) != 0) {
driver_errf(LD_TOOL, "cannot find -l%s", val);
return 1;
}
@@ -523,14 +705,18 @@ static int ld_parse(int argc, char** argv, LdOptions* o) {
size_t resolved_size;
LibResolveKind kind;
LibResolveMode mode;
+ LibResolveOS resolve_os;
if (++i >= argc) {
driver_errf(LD_TOOL, "--library requires an argument");
return 1;
}
mode = (o->cur_link_mode == CFREE_LM_STATIC) ? LIB_RESOLVE_STATIC_ONLY
: LIB_RESOLVE_DYNAMIC_PREFER;
- if (driver_lib_resolve(o->env, argv[i], mode, o->lib_dirs, o->nlib_dirs,
- &resolved, &resolved_size, &kind) != 0) {
+ resolve_os = (o->target.os == CFREE_OS_WINDOWS) ? LIB_RESOLVE_OS_WINDOWS
+ : LIB_RESOLVE_OS_POSIX;
+ if (driver_lib_resolve_for_os(o->env, argv[i], mode, resolve_os,
+ o->lib_dirs, o->nlib_dirs, &resolved,
+ &resolved_size, &kind) != 0) {
driver_errf(LD_TOOL, "cannot find -l%s", argv[i]);
return 1;
}
@@ -982,6 +1168,7 @@ static int ld_run_link(LdOptions* o) {
lopts.build_id_len = o->build_id_len;
lopts.gc_sections = o->gc_sections;
lopts.pie = o->pie;
+ lopts.pe_subsystem = o->pe_subsystem;
lopts.interp_path = o->interp_path;
lopts.soname = o->soname;
/* Per --enable-new-dtags / --disable-new-dtags: when new_dtags is
diff --git a/driver/lib_resolve.c b/driver/lib_resolve.c
@@ -3,19 +3,22 @@
#include <stddef.h>
#include <stdint.h>
-/* Compose `<dir>/lib<name><suffix>` into a fresh heap buffer. Inserts
- * a separating '/' iff `dir` does not already end in one. Empty `dir`
- * is treated as the current directory: the path becomes
- * `lib<name><suffix>`. `suffix` is e.g. ".a" or ".so" — caller-owned,
- * NUL-terminated. */
-static char* compose_path(DriverEnv* env, const char* dir, const char* name,
- const char* suffix, size_t* out_size) {
+/* Compose `<dir>/<prefix><name><suffix>` into a fresh heap buffer.
+ * Inserts a separating '/' iff `dir` does not already end in one.
+ * Empty `dir` is treated as the current directory: the path becomes
+ * `<prefix><name><suffix>`. `prefix` is "lib" or "" (Windows MSVC-
+ * style libs ship without the prefix); `suffix` is e.g. ".a" or ".so"
+ * — both caller-owned, NUL-terminated. */
+static char* compose_path(DriverEnv* env, const char* dir, const char* prefix,
+ const char* name, const char* suffix,
+ size_t* out_size) {
size_t dlen = driver_strlen(dir);
+ size_t plen = driver_strlen(prefix);
size_t nlen = driver_strlen(name);
size_t slen = driver_strlen(suffix);
size_t need_slash = (dlen > 0 && dir[dlen - 1] != '/') ? 1 : 0;
- /* "<dir>" + "/"? + "lib" + "<name>" + "<suffix>" + NUL */
- size_t bytes = dlen + need_slash + 3 + nlen + slen + 1;
+ /* "<dir>" + "/"? + "<prefix>" + "<name>" + "<suffix>" + NUL */
+ size_t bytes = dlen + need_slash + plen + nlen + slen + 1;
char* buf = driver_alloc(env, bytes);
size_t off = 0;
if (!buf) return NULL;
@@ -26,8 +29,10 @@ static char* compose_path(DriverEnv* env, const char* dir, const char* name,
if (need_slash) {
buf[off++] = '/';
}
- driver_memcpy(buf + off, "lib", 3);
- off += 3;
+ if (plen) {
+ driver_memcpy(buf + off, prefix, plen);
+ off += plen;
+ }
if (nlen) {
driver_memcpy(buf + off, name, nlen);
off += nlen;
@@ -41,16 +46,18 @@ static char* compose_path(DriverEnv* env, const char* dir, const char* name,
return buf;
}
-/* Try one (suffix, kind) pair across every search dir; return 0 on
+/* Try one (prefix, suffix) pair across every search dir; return 0 on
* the first hit. Allocations for non-matching candidates are freed
* before the next attempt. */
-static int try_suffix(DriverEnv* env, const char* name, const char* suffix,
- const char* const* search_dirs, uint32_t nsearch_dirs,
- char** out_path, size_t* out_size) {
+static int try_variant(DriverEnv* env, const char* prefix, const char* name,
+ const char* suffix, const char* const* search_dirs,
+ uint32_t nsearch_dirs, char** out_path,
+ size_t* out_size) {
uint32_t i;
for (i = 0; i < nsearch_dirs; ++i) {
size_t bytes;
- char* cand = compose_path(env, search_dirs[i], name, suffix, &bytes);
+ char* cand =
+ compose_path(env, search_dirs[i], prefix, name, suffix, &bytes);
if (!cand) return 1;
if (driver_path_exists(cand)) {
*out_path = cand;
@@ -62,12 +69,18 @@ static int try_suffix(DriverEnv* env, const char* name, const char* suffix,
return 1;
}
-int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode,
- const char* const* search_dirs, uint32_t nsearch_dirs,
- char** out_path, size_t* out_size,
- LibResolveKind* out_kind) {
- if (!env || !name) return 1;
+/* POSIX-suffix `lib<name><suffix>` convenience wrapper. */
+static int try_suffix(DriverEnv* env, const char* name, const char* suffix,
+ const char* const* search_dirs, uint32_t nsearch_dirs,
+ char** out_path, size_t* out_size) {
+ return try_variant(env, "lib", name, suffix, search_dirs, nsearch_dirs,
+ out_path, out_size);
+}
+static int resolve_posix(DriverEnv* env, const char* name, LibResolveMode mode,
+ const char* const* search_dirs, uint32_t nsearch_dirs,
+ char** out_path, size_t* out_size,
+ LibResolveKind* out_kind) {
/* GNU-ld order: under dynamic mode prefer .so over .a within the
* same search dir. In practice that means we still iterate dirs in
* order, but for each dir try .so first when applicable. To keep
@@ -103,3 +116,63 @@ int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode,
}
return 1;
}
+
+static int resolve_windows(DriverEnv* env, const char* name,
+ LibResolveMode mode,
+ const char* const* search_dirs,
+ uint32_t nsearch_dirs, char** out_path,
+ size_t* out_size, LibResolveKind* out_kind) {
+ /* Windows / mingw layout. Try the mingw-canonical names first
+ * (lib<n>.dll.a, lib<n>.a) then the MSVC `<n>.lib` / `<n>.dll.a`
+ * variants. We feed every match to the linker as a static archive
+ * input — short-form import libraries (lib<n>.dll.a) are AR
+ * archives whose members are COFF .obj files plus IDATA stubs, so
+ * the existing archive ingestion path handles them. Long-form
+ * import libraries are tracked separately (a parallel Windows
+ * task; not yet wired here). */
+ (void)mode;
+ if (try_variant(env, "lib", name, ".dll.a", search_dirs, nsearch_dirs,
+ out_path, out_size) == 0) {
+ if (out_kind) *out_kind = LIB_RESOLVE_KIND_ARCHIVE;
+ return 0;
+ }
+ if (try_variant(env, "lib", name, ".a", search_dirs, nsearch_dirs, out_path,
+ out_size) == 0) {
+ if (out_kind) *out_kind = LIB_RESOLVE_KIND_ARCHIVE;
+ return 0;
+ }
+ if (try_variant(env, "", name, ".lib", search_dirs, nsearch_dirs, out_path,
+ out_size) == 0) {
+ if (out_kind) *out_kind = LIB_RESOLVE_KIND_ARCHIVE;
+ return 0;
+ }
+ if (try_variant(env, "", name, ".dll.a", search_dirs, nsearch_dirs, out_path,
+ out_size) == 0) {
+ if (out_kind) *out_kind = LIB_RESOLVE_KIND_ARCHIVE;
+ return 0;
+ }
+ return 1;
+}
+
+int driver_lib_resolve_for_os(DriverEnv* env, const char* name,
+ LibResolveMode mode, LibResolveOS os,
+ const char* const* search_dirs,
+ uint32_t nsearch_dirs, char** out_path,
+ size_t* out_size, LibResolveKind* out_kind) {
+ if (!env || !name) return 1;
+ if (os == LIB_RESOLVE_OS_WINDOWS) {
+ return resolve_windows(env, name, mode, search_dirs, nsearch_dirs, out_path,
+ out_size, out_kind);
+ }
+ return resolve_posix(env, name, mode, search_dirs, nsearch_dirs, out_path,
+ out_size, out_kind);
+}
+
+int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode,
+ const char* const* search_dirs, uint32_t nsearch_dirs,
+ char** out_path, size_t* out_size,
+ LibResolveKind* out_kind) {
+ return driver_lib_resolve_for_os(env, name, mode, LIB_RESOLVE_OS_POSIX,
+ search_dirs, nsearch_dirs, out_path,
+ out_size, out_kind);
+}
diff --git a/driver/lib_resolve.h b/driver/lib_resolve.h
@@ -27,6 +27,14 @@ typedef enum LibResolveKind {
LIB_RESOLVE_KIND_TBD = 2,
} LibResolveKind;
+/* Target-OS hint for the suffix list. Windows uses the mingw / MSVC
+ * naming variants (`lib<name>.dll.a`, `lib<name>.a`, `<name>.lib`,
+ * `<name>.dll.a`); everything else uses the POSIX `lib<name>.*` set. */
+typedef enum LibResolveOS {
+ LIB_RESOLVE_OS_POSIX = 0,
+ LIB_RESOLVE_OS_WINDOWS = 1,
+} LibResolveOS;
+
/* Resolve `-l<name>` against a list of `-L`-style search directories.
*
* On success, returns 0 and writes a heap-allocated, NUL-terminated
@@ -36,6 +44,12 @@ typedef enum LibResolveKind {
* matched file is a `.so` (LIB_RESOLVE_KIND_SHARED) or a `.a`
* (LIB_RESOLVE_KIND_ARCHIVE).
*
+ * The legacy entry point `driver_lib_resolve` defaults to POSIX
+ * naming. `driver_lib_resolve_for_os` is the same function with an
+ * explicit target-OS hint so the caller can switch the suffix list
+ * for cross-compilation (Windows targets need .lib / .dll.a in
+ * addition to .a). The OS hint is independent of the host OS.
+ *
* On failure, returns nonzero with `*out_path` unchanged. Failure
* cases:
* - no candidate exists in any of the search directories
@@ -45,4 +59,10 @@ int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode,
char** out_path, size_t* out_size,
LibResolveKind* out_kind);
+int driver_lib_resolve_for_os(DriverEnv* env, const char* name,
+ LibResolveMode mode, LibResolveOS os,
+ const char* const* search_dirs,
+ uint32_t nsearch_dirs, char** out_path,
+ size_t* out_size, LibResolveKind* out_kind);
+
#endif
diff --git a/driver/objdump.c b/driver/objdump.c
@@ -24,6 +24,7 @@ typedef struct ObjdumpOpts {
int D; /* -D: disasm all sections */
int r; /* -r: relocations */
int s; /* -s: hex section contents */
+ int p; /* -p / --private-headers: PE optional header + data dirs */
const char* j[MAX_J_FILTERS];
int nj;
} ObjdumpOpts;
@@ -53,15 +54,25 @@ void driver_help_objdump(void) {
" symbol table), matching GNU objdump's default-ish behaviour.\n"
"\n"
"OPERATIONS (any combination)\n"
- " -f Print the file header\n"
+ " -f Print the file header: architecture, format,\n"
+ " section / symbol counts, HAS_RELOC / HAS_SYMS\n"
+ " flags, and (for PE images) image base,\n"
+ " entry point, and subsystem.\n"
" -h Print section headers (idx, name, size, align,\n"
- " flags). NOTE: this is the GNU objdump meaning of\n"
- " -h — it does NOT print this help; use --help.\n"
+ " flags). For COFF inputs the raw\n"
+ " IMAGE_SCN_* Characteristics value is appended\n"
+ " on a continuation line and COMDAT groups are\n"
+ " printed after the section table. NOTE: this is\n"
+ " the GNU objdump meaning of -h — it does NOT\n"
+ " print this help; use --help.\n"
" -t Print the symbol table\n"
" -d Disassemble executable sections\n"
" -D Disassemble all sections\n"
" -r Print relocation records\n"
" -s Print section contents as a hex+ASCII dump\n"
+ " -p, --private-headers\n"
+ " Print PE optional header, data directories,\n"
+ " and per-DLL import lists (PE images only)\n"
" -x Aggregate: -f -h -r -t\n"
"\n"
"FILTERS\n"
@@ -86,6 +97,463 @@ void driver_help_objdump(void) {
"usage\n");
}
+/* ---- PE/COFF private-header walker (used by `-p`) ----
+ *
+ * The objdump driver currently relies on the high-level CfreeObjFile
+ * interface for section/symbol/disasm output. For PE images that hides
+ * a lot of useful structure: the optional header, data directories,
+ * and per-DLL import lists. The walker below operates on the raw input
+ * bytes so we can print this view without piping the data through
+ * libcfree. It does the strict minimum needed for a `-p` style dump
+ * and bails out on malformed offsets — diagnostic, not security-grade.
+ *
+ * RVA-to-file resolution: each section header records VirtualAddress
+ * (RVA) and PointerToRawData (file offset). A target RVA lands inside
+ * a section iff RVA in [VA, VA + VirtualSize). The file offset of the
+ * RVA inside the section's raw bytes is PointerToRawData + (RVA - VA).
+ * `pe_rva_to_file` returns -1 when no section covers the RVA. */
+#define PE_DOS_E_LFANEW_OFFSET 60u
+#define PE_FILE_HEADER_SIZE 20u
+#define PE_OPT_HDR64_MAGIC 0x020Bu
+#define PE_NUM_DATA_DIRS 16u
+#define PE_DATA_DIRECTORY_SIZE 8u
+#define PE_SECTION_HEADER_SIZE 40u
+#define PE_DIR_EXPORT 0u
+#define PE_DIR_IMPORT 1u
+#define PE_DIR_RESOURCE 2u
+#define PE_DIR_EXCEPTION 3u
+#define PE_DIR_BASERELOC 5u
+#define PE_DIR_DEBUG 6u
+#define PE_DIR_TLS 9u
+#define PE_DIR_IAT 12u
+#define PE_IMPORT_DESCRIPTOR_SIZE 20u
+#define PE_THUNK_SIZE 8u
+#define PE_ORDINAL_FLAG64 0x8000000000000000ull
+
+/* COFF-specific Characteristics bits we surface as tags. Kept in sync
+ * with src/obj/coff.h's IMAGE_SCN_* values; objdump only needs the
+ * diagnostic-visible subset. */
+#define OBJDUMP_IMAGE_SCN_LNK_INFO 0x00000200u
+#define OBJDUMP_IMAGE_SCN_LNK_REMOVE 0x00000800u
+#define OBJDUMP_IMAGE_SCN_LNK_COMDAT 0x00001000u
+#define OBJDUMP_IMAGE_SCN_GPREL 0x00008000u
+#define OBJDUMP_IMAGE_SCN_MEM_DISCARDABLE 0x02000000u
+#define OBJDUMP_IMAGE_SCN_MEM_SHARED 0x10000000u
+
+static int j_match(const ObjdumpOpts* o, const char* name);
+
+static uint16_t pe_rd_u16(const uint8_t* p) {
+ return (uint16_t)(p[0] | ((uint32_t)p[1] << 8));
+}
+static uint32_t pe_rd_u32(const uint8_t* p) {
+ return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
+ ((uint32_t)p[3] << 24);
+}
+static uint64_t pe_rd_u64(const uint8_t* p) {
+ return (uint64_t)pe_rd_u32(p) | ((uint64_t)pe_rd_u32(p + 4) << 32);
+}
+
+/* Names match the IMAGE_DIRECTORY_ENTRY_* index. Keep aligned with the
+ * order in coff.h to avoid drift. */
+static const char* pe_dir_name(uint32_t i) {
+ switch (i) {
+ case 0:
+ return "EXPORT";
+ case 1:
+ return "IMPORT";
+ case 2:
+ return "RESOURCE";
+ case 3:
+ return "EXCEPTION";
+ case 4:
+ return "SECURITY";
+ case 5:
+ return "BASERELOC";
+ case 6:
+ return "DEBUG";
+ case 7:
+ return "ARCHITECTURE";
+ case 8:
+ return "GLOBALPTR";
+ case 9:
+ return "TLS";
+ case 10:
+ return "LOAD_CONFIG";
+ case 11:
+ return "BOUND_IMPORT";
+ case 12:
+ return "IAT";
+ case 13:
+ return "DELAY_IMPORT";
+ case 14:
+ return "COM_DESCRIPTOR";
+ case 15:
+ return "RESERVED";
+ default:
+ return "?";
+ }
+}
+
+static const char* pe_subsystem_name(uint16_t s) {
+ switch (s) {
+ case 1:
+ return "NATIVE";
+ case 2:
+ return "WINDOWS_GUI";
+ case 3:
+ return "WINDOWS_CUI";
+ case 5:
+ return "OS2_CUI";
+ case 7:
+ return "POSIX_CUI";
+ case 9:
+ return "WINDOWS_CE_GUI";
+ case 10:
+ return "EFI_APPLICATION";
+ case 11:
+ return "EFI_BOOT_SERVICE_DRIVER";
+ case 12:
+ return "EFI_RUNTIME_DRIVER";
+ case 13:
+ return "EFI_ROM";
+ case 14:
+ return "XBOX";
+ case 16:
+ return "WINDOWS_BOOT_APPLICATION";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+/* Find file offset for an RVA by scanning the section headers. Returns
+ * -1 if the RVA is outside every section's covered range. */
+static long pe_rva_to_file(const uint8_t* buf, size_t buf_len, size_t sec_off,
+ uint16_t nsec, uint32_t rva) {
+ uint16_t i;
+ for (i = 0; i < nsec; ++i) {
+ size_t sh = sec_off + (size_t)i * PE_SECTION_HEADER_SIZE;
+ uint32_t va;
+ uint32_t vsize;
+ uint32_t raw_off;
+ uint32_t raw_size;
+ if (sh + PE_SECTION_HEADER_SIZE > buf_len) return -1;
+ vsize = pe_rd_u32(buf + sh + 8);
+ va = pe_rd_u32(buf + sh + 12);
+ raw_size = pe_rd_u32(buf + sh + 16);
+ raw_off = pe_rd_u32(buf + sh + 20);
+ /* VirtualSize is sometimes 0 in object files; fall back to raw size. */
+ if (vsize == 0) vsize = raw_size;
+ if (rva >= va && rva < va + vsize) {
+ uint32_t delta = rva - va;
+ if (delta >= raw_size) return -1;
+ return (long)(raw_off + delta);
+ }
+ }
+ return -1;
+}
+
+/* Read a NUL-terminated ASCII string starting at `off`, capped to
+ * 256 bytes. Writes a copy into `dst` (size `dstcap`) and returns
+ * 0 on success, 1 if the offset is out of bounds. */
+static int pe_read_cstr(const uint8_t* buf, size_t buf_len, size_t off,
+ char* dst, size_t dstcap) {
+ size_t i;
+ if (off >= buf_len) {
+ if (dstcap) dst[0] = '\0';
+ return 1;
+ }
+ for (i = 0; i + 1 < dstcap && off + i < buf_len && buf[off + i]; ++i) {
+ dst[i] = (char)buf[off + i];
+ }
+ dst[i] = '\0';
+ return 0;
+}
+
+static void pe_dump_imports(const uint8_t* buf, size_t buf_len, size_t sec_off,
+ uint16_t nsec, uint32_t import_rva,
+ uint32_t import_size) {
+ long desc_off;
+ uint32_t consumed;
+ (void)import_size;
+ desc_off = pe_rva_to_file(buf, buf_len, sec_off, nsec, import_rva);
+ if (desc_off < 0) {
+ driver_printf(" (import directory RVA not covered by any section)\n");
+ return;
+ }
+ driver_printf("\nThe Import Tables:\n");
+ for (consumed = 0;; consumed += PE_IMPORT_DESCRIPTOR_SIZE) {
+ size_t off = (size_t)desc_off + consumed;
+ uint32_t ilt_rva;
+ uint32_t name_rva;
+ uint32_t iat_rva;
+ char dll[256];
+ long thunk_off;
+ uint32_t i;
+ if (off + PE_IMPORT_DESCRIPTOR_SIZE > buf_len) break;
+ ilt_rva = pe_rd_u32(buf + off + 0);
+ name_rva = pe_rd_u32(buf + off + 12);
+ iat_rva = pe_rd_u32(buf + off + 16);
+ /* All-zero descriptor terminates the chain. */
+ if (ilt_rva == 0 && name_rva == 0 && iat_rva == 0) break;
+ {
+ long name_off = pe_rva_to_file(buf, buf_len, sec_off, nsec, name_rva);
+ if (name_off < 0 ||
+ pe_read_cstr(buf, buf_len, (size_t)name_off, dll, sizeof dll) != 0) {
+ dll[0] = '\0';
+ }
+ }
+ driver_printf(" DLL Name: %s\n", dll[0] ? dll : "(unreadable)");
+ driver_printf(" ILT RVA: 0x%x IAT RVA: 0x%x\n", ilt_rva, iat_rva);
+ /* Prefer walking the original first thunk (ILT) for names. Some
+ * mingw-emitted images zero the ILT and only ship the IAT; fall
+ * back to the IAT in that case. */
+ {
+ uint32_t walk_rva = ilt_rva ? ilt_rva : iat_rva;
+ thunk_off =
+ walk_rva ? pe_rva_to_file(buf, buf_len, sec_off, nsec, walk_rva) : -1;
+ }
+ if (thunk_off < 0) continue;
+ for (i = 0;; ++i) {
+ size_t toff = (size_t)thunk_off + (size_t)i * PE_THUNK_SIZE;
+ uint64_t t;
+ if (toff + PE_THUNK_SIZE > buf_len) break;
+ t = pe_rd_u64(buf + toff);
+ if (t == 0) break;
+ if (t & PE_ORDINAL_FLAG64) {
+ driver_printf(" Ordinal: %u\n", (unsigned)(t & 0xFFFFu));
+ } else {
+ long hint_off =
+ pe_rva_to_file(buf, buf_len, sec_off, nsec, (uint32_t)t);
+ char name[256];
+ if (hint_off < 0 ||
+ pe_read_cstr(buf, buf_len, (size_t)hint_off + 2u, name,
+ sizeof name) != 0) {
+ continue;
+ }
+ driver_printf(" Name: %s\n", name);
+ }
+ }
+ }
+ driver_printf("\n");
+}
+
+/* Parsed view of a PE32+ image's headers. Populated by pe_parse_image;
+ * callers check `valid` before reading other fields. Avoids the prior
+ * pattern of every PE-walker re-validating the DOS/PE/optional header
+ * triplet from scratch. */
+typedef struct PeImage {
+ int valid;
+ uint16_t machine;
+ uint16_t file_chars;
+ uint16_t nsec;
+ uint16_t opt_magic;
+ uint16_t subsystem;
+ uint16_t dllchars;
+ uint64_t image_base;
+ uint32_t entry_rva;
+ size_t sec_off;
+ size_t dir_off;
+} PeImage;
+
+static int pe_parse_image(const uint8_t* buf, size_t buf_len, PeImage* out) {
+ uint32_t e_lfanew;
+ size_t coff_off;
+ size_t opt_off;
+ uint16_t opt_size;
+
+ out->valid = 0;
+ if (buf_len < PE_DOS_E_LFANEW_OFFSET + 4u) return 0;
+ if (pe_rd_u16(buf) != 0x5A4Du) return 0;
+ e_lfanew = pe_rd_u32(buf + PE_DOS_E_LFANEW_OFFSET);
+ if ((size_t)e_lfanew + 4u + PE_FILE_HEADER_SIZE > buf_len) return 0;
+ if (pe_rd_u32(buf + e_lfanew) != 0x00004550u) return 0;
+ coff_off = (size_t)e_lfanew + 4u;
+ out->machine = pe_rd_u16(buf + coff_off + 0);
+ out->nsec = pe_rd_u16(buf + coff_off + 2);
+ out->file_chars = pe_rd_u16(buf + coff_off + 18);
+ opt_size = pe_rd_u16(buf + coff_off + 16);
+ opt_off = coff_off + PE_FILE_HEADER_SIZE;
+ if (opt_size == 0 || opt_off + opt_size > buf_len) return 0;
+ out->opt_magic = pe_rd_u16(buf + opt_off);
+ if (out->opt_magic != PE_OPT_HDR64_MAGIC) {
+ /* PE32 (0x10B) is out of scope but the caller may still want to
+ * report what it found, so we return a "valid header, unsupported
+ * subset" view rather than failing. */
+ out->valid = 1;
+ return 1;
+ }
+ out->entry_rva = pe_rd_u32(buf + opt_off + 16);
+ out->image_base = pe_rd_u64(buf + opt_off + 24);
+ out->subsystem = pe_rd_u16(buf + opt_off + 68);
+ out->dllchars = pe_rd_u16(buf + opt_off + 70);
+ out->sec_off = opt_off + opt_size;
+ out->dir_off = opt_off + 112u;
+ out->valid = 1;
+ return 1;
+}
+
+static const char* pe_machine_name(uint16_t m) {
+ switch (m) {
+ case 0x8664u: return "x86_64 (AMD64)";
+ case 0xAA64u: return "aarch64 (ARM64)";
+ case 0xA641u: return "aarch64 (ARM64EC)";
+ case 0x014Cu: return "i386";
+ case 0x01C0u: return "arm";
+ case 0x01C4u: return "armnt";
+ case 0x0200u: return "ia64";
+ case 0x5064u: return "riscv64";
+ default: return "unknown";
+ }
+}
+
+/* PE-image `-f`: architecture, image base, entry point, subsystem.
+ * Counterpart to dump_file_header for inputs that cfree_obj_open can't
+ * parse yet (PE executables / DLLs vs .obj). */
+static void dump_pe_file_header(const char* label, const PeImage* pe) {
+ driver_printf("%s:\tfile format pei-%s\n\n", label,
+ pe_machine_name(pe->machine));
+ driver_printf("architecture: %s, flags 0x%04x\n",
+ pe_machine_name(pe->machine), (unsigned)pe->file_chars);
+ if (pe->opt_magic == PE_OPT_HDR64_MAGIC) {
+ driver_printf("start address 0x%016llx\n",
+ (unsigned long long)(pe->image_base + pe->entry_rva));
+ driver_printf(
+ "image base: 0x%llx, entry rva: 0x%x, subsystem: %u (%s)\n\n",
+ (unsigned long long)pe->image_base, pe->entry_rva,
+ (unsigned)pe->subsystem, pe_subsystem_name(pe->subsystem));
+ } else {
+ driver_printf("PE32 (magic 0x%x) — only PE32+ inspection is implemented\n\n",
+ (unsigned)pe->opt_magic);
+ }
+}
+
+/* Decode IMAGE_SECTION_HEADER.Characteristics into the GNU objdump tag
+ * style used for COFF .obj inputs. Used by both dump_sections (via
+ * render_sec_flags) and the PE section walker. */
+static void render_pe_sec_flags(uint32_t ch, char* buf, size_t cap) {
+ size_t n = 0;
+ const char* tags[16];
+ int nt = 0;
+ int i;
+ /* Bit layout shared with render_sec_flags; PE images don't carry
+ * BSS / TLS-by-name detection so we go straight from raw flags to
+ * tags. */
+ if (ch & 0x00000020u) tags[nt++] = "CODE";
+ if (ch & 0x00000040u) tags[nt++] = "DATA";
+ if (ch & 0x00000080u) tags[nt++] = "BSS";
+ if (ch & OBJDUMP_IMAGE_SCN_LNK_INFO) tags[nt++] = "LINK_INFO";
+ if (ch & OBJDUMP_IMAGE_SCN_LNK_REMOVE) tags[nt++] = "LINK_REMOVE";
+ if (ch & OBJDUMP_IMAGE_SCN_LNK_COMDAT) tags[nt++] = "LINK_ONCE";
+ if (ch & OBJDUMP_IMAGE_SCN_GPREL) tags[nt++] = "GPREL";
+ if (ch & OBJDUMP_IMAGE_SCN_MEM_DISCARDABLE) tags[nt++] = "DISCARDABLE";
+ if (ch & OBJDUMP_IMAGE_SCN_MEM_SHARED) tags[nt++] = "SHARED";
+ if (ch & 0x20000000u) tags[nt++] = "EXEC";
+ if (ch & 0x40000000u) tags[nt++] = "READ";
+ if (ch & 0x80000000u) tags[nt++] = "WRITE";
+ for (i = 0; i < nt && n + 1 < cap; ++i) {
+ const char* t = tags[i];
+ if (i > 0 && n + 1 < cap) buf[n++] = ',';
+ while (*t && n + 1 < cap) buf[n++] = *t++;
+ }
+ buf[n] = '\0';
+}
+
+/* PE-image `-h`: walks IMAGE_SECTION_HEADER table directly from raw
+ * bytes (cfree_obj_open doesn't yet parse PE executables). Output
+ * shape mirrors dump_sections for .obj inputs so postprocessing
+ * grep-fu doesn't care which path produced the line. */
+static void dump_pe_sections(const char* label, const PeImage* pe,
+ const uint8_t* buf, size_t buf_len,
+ const ObjdumpOpts* opts) {
+ uint16_t i;
+ char flagbuf[160];
+ char name[9];
+ driver_printf("%s:\tSections (PE image):\n", label);
+ driver_printf("Idx Name VMA Size "
+ "FileOff Align Flags\n");
+ for (i = 0; i < pe->nsec; ++i) {
+ size_t sh = pe->sec_off + (size_t)i * PE_SECTION_HEADER_SIZE;
+ uint32_t vsize;
+ uint32_t va;
+ uint32_t raw_size;
+ uint32_t raw_off;
+ uint32_t ch;
+ uint32_t align_field;
+ unsigned align_log2;
+ int j;
+ if (sh + PE_SECTION_HEADER_SIZE > buf_len) break;
+ for (j = 0; j < 8; ++j) name[j] = (char)buf[sh + (size_t)j];
+ name[8] = '\0';
+ vsize = pe_rd_u32(buf + sh + 8);
+ va = pe_rd_u32(buf + sh + 12);
+ raw_size = pe_rd_u32(buf + sh + 16);
+ raw_off = pe_rd_u32(buf + sh + 20);
+ ch = pe_rd_u32(buf + sh + 36);
+ align_field = (ch >> 20) & 0xFu;
+ align_log2 = align_field ? (align_field - 1u) : 0u;
+ if (!j_match(opts, name)) continue;
+ render_pe_sec_flags(ch, flagbuf, sizeof(flagbuf));
+ driver_printf("%3u %-16s %016llx %08x %08x 2**%-2u %s\n", (unsigned)i,
+ name, (unsigned long long)(pe->image_base + va),
+ vsize ? vsize : raw_size, raw_off, align_log2, flagbuf);
+ driver_printf(" Characteristics: 0x%08x\n",
+ ch);
+ }
+ driver_printf("\n");
+}
+
+/* Walk a PE image (DOS → "PE\0\0" → COFF file header → optional header
+ * → data directories) and print the highlights GNU objdump's `-p`
+ * shows. Bails silently on any header that doesn't validate — leaves
+ * the basic dump_obj() output untouched. */
+static void dump_pe_private(const char* label, const uint8_t* buf,
+ size_t buf_len) {
+ PeImage pe;
+ uint32_t import_rva = 0;
+ uint32_t import_size = 0;
+ uint32_t i;
+ if (!pe_parse_image(buf, buf_len, &pe) || !pe.valid) return;
+ if (pe.opt_magic != PE_OPT_HDR64_MAGIC) {
+ driver_printf("%s:\tPE optional header magic 0x%x (PE32) — skipping\n",
+ label, (unsigned)pe.opt_magic);
+ return;
+ }
+
+ driver_printf("\n%s:\tPE32+ private headers\n", label);
+ driver_printf(" Magic: 0x%x (PE32+)\n", pe.opt_magic);
+ driver_printf(" Machine: 0x%04x (%s)\n", (unsigned)pe.machine,
+ pe_machine_name(pe.machine));
+ driver_printf(" Characteristics: 0x%04x\n", (unsigned)pe.file_chars);
+ driver_printf(" ImageBase: 0x%llx\n",
+ (unsigned long long)pe.image_base);
+ driver_printf(" AddressOfEntryPoint: 0x%x\n", pe.entry_rva);
+ driver_printf(" Subsystem: %u (%s)\n", (unsigned)pe.subsystem,
+ pe_subsystem_name(pe.subsystem));
+ driver_printf(" DllCharacteristics: 0x%04x\n", (unsigned)pe.dllchars);
+ driver_printf(" NumberOfSections: %u\n", (unsigned)pe.nsec);
+
+ if (pe.dir_off + PE_NUM_DATA_DIRS * PE_DATA_DIRECTORY_SIZE > buf_len) return;
+ driver_printf("\nData Directories:\n");
+ driver_printf(" Idx Name RVA Size\n");
+ for (i = 0; i < PE_NUM_DATA_DIRS; ++i) {
+ uint32_t rva =
+ pe_rd_u32(buf + pe.dir_off + i * PE_DATA_DIRECTORY_SIZE);
+ uint32_t sz =
+ pe_rd_u32(buf + pe.dir_off + i * PE_DATA_DIRECTORY_SIZE + 4);
+ if (rva == 0 && sz == 0) continue;
+ driver_printf(" %2u %-14s 0x%08x 0x%08x\n", i, pe_dir_name(i), rva, sz);
+ if (i == PE_DIR_IMPORT) {
+ import_rva = rva;
+ import_size = sz;
+ }
+ }
+
+ if (import_rva && import_size) {
+ pe_dump_imports(buf, buf_len, pe.sec_off, pe.nsec, import_rva,
+ import_size);
+ }
+}
+
static const char* fmt_str(CfreeObjFmt fmt, uint8_t ptr_size) {
switch (fmt) {
case CFREE_OBJ_ELF:
@@ -167,11 +635,13 @@ static int j_match(const ObjdumpOpts* o, const char* name) {
return 0;
}
-/* Compose the comma-separated flag tag list GNU objdump prints in -h. */
-static void render_sec_flags(const CfreeObjSecInfo* sec, char* buf,
- size_t cap) {
+/* Compose the comma-separated flag tag list GNU objdump prints in -h.
+ * For COFF inputs, `coff_chars` is the raw IMAGE_SECTION_HEADER.Characteristics
+ * value; for other formats it should be 0. */
+static void render_sec_flags(const CfreeObjSecInfo* sec, CfreeObjFmt fmt,
+ uint32_t coff_chars, char* buf, size_t cap) {
size_t n = 0;
- const char* tags[12];
+ const char* tags[16];
int nt = 0;
int i;
int is_bss = (sec->kind == CFREE_SEC_BSS);
@@ -190,6 +660,16 @@ static void render_sec_flags(const CfreeObjSecInfo* sec, char* buf,
if (sec->flags & CFREE_SF_STRINGS) tags[nt++] = "STRINGS";
if (sec->kind == CFREE_SEC_DEBUG) tags[nt++] = "DEBUGGING";
+ if (fmt == CFREE_OBJ_COFF) {
+ if (coff_chars & OBJDUMP_IMAGE_SCN_LNK_COMDAT) tags[nt++] = "LINK_ONCE";
+ if (coff_chars & OBJDUMP_IMAGE_SCN_LNK_INFO) tags[nt++] = "LINK_INFO";
+ if (coff_chars & OBJDUMP_IMAGE_SCN_LNK_REMOVE) tags[nt++] = "LINK_REMOVE";
+ if (coff_chars & OBJDUMP_IMAGE_SCN_MEM_DISCARDABLE)
+ tags[nt++] = "DISCARDABLE";
+ if (coff_chars & OBJDUMP_IMAGE_SCN_MEM_SHARED) tags[nt++] = "SHARED";
+ if (coff_chars & OBJDUMP_IMAGE_SCN_GPREL) tags[nt++] = "GPREL";
+ }
+
for (i = 0; i < nt && n + 1 < cap; ++i) {
const char* t = tags[i];
if (i > 0 && n + 1 < cap) buf[n++] = ',';
@@ -208,25 +688,71 @@ static void render_sec_flags(const CfreeObjSecInfo* sec, char* buf,
static void dump_sections(CfreeObjFile* f, const ObjdumpOpts* opts) {
uint32_t nsec = cfree_obj_nsections(f);
+ CfreeObjFmt fmt = cfree_obj_fmt(f);
uint32_t i;
- char flagbuf[128];
+ char flagbuf[160];
driver_printf("Sections:\n");
driver_printf("Idx Name Size Align Flags\n");
for (i = 0; i < nsec; ++i) {
CfreeObjSecInfo sec;
+ uint32_t raw_type = 0;
if (cfree_obj_section(f, i, &sec) != CFREE_OK) continue;
if (!j_match(opts, sec.name)) continue;
- render_sec_flags(&sec, flagbuf, sizeof(flagbuf));
+ cfree_obj_section_format_flags(f, i, &raw_type, NULL);
+ render_sec_flags(&sec, fmt, raw_type, flagbuf, sizeof(flagbuf));
driver_printf(
"%3u %-20s %08llx 2**%-2u %s\n", i, sec.name[0] ? sec.name : "(anon)",
(unsigned long long)sec.size,
sec.align ? (unsigned)__builtin_ctz(sec.align ? sec.align : 1) : 0,
flagbuf);
+ /* Show the raw IMAGE_SCN_* value on a continuation line for COFF
+ * inputs — useful when diagnosing why a section ended up with the
+ * tags it did. The hex is much shorter than printing every set bit
+ * by name, and the tag list above already covers the bits that
+ * change behaviour at link time. */
+ if (fmt == CFREE_OBJ_COFF && raw_type) {
+ driver_printf(" Characteristics: 0x%08x\n",
+ raw_type);
+ }
}
driver_printf("\n");
}
+/* GNU objdump prints COMDAT group membership immediately after the
+ * section header table. The reader exposes groups uniformly across
+ * formats (ELF SHT_GROUP and COFF COMDAT both arrive here) so we just
+ * iterate. Output is silent when the object carries no groups. */
+static void dump_groups(CfreeObjFile* f, const ObjdumpOpts* opts) {
+ CfreeObjGroupIter* it = NULL;
+ CfreeObjGroupInfo g;
+ int printed_header = 0;
+ (void)opts;
+
+ if (cfree_obj_groupiter_new(f, &it) != CFREE_OK) return;
+ while (cfree_obj_groupiter_next(it, &g) == CFREE_ITER_ITEM) {
+ uint32_t k;
+ if (!printed_header) {
+ driver_printf("COMDAT groups:\n");
+ printed_header = 1;
+ }
+ driver_printf(" group %s (signature sym #%u, %u section%s)\n",
+ g.name && g.name[0] ? g.name : "(anon)",
+ (unsigned)g.signature, (unsigned)g.nsections,
+ g.nsections == 1 ? "" : "s");
+ for (k = 0; k < g.nsections; ++k) {
+ CfreeObjSection sid = g.sections[k];
+ CfreeObjSecInfo si;
+ if (sid == CFREE_SECTION_NONE) continue;
+ if (cfree_obj_section(f, sid, &si) != CFREE_OK) continue;
+ driver_printf(" [%3u] %s\n", (unsigned)sid,
+ si.name && si.name[0] ? si.name : "(anon)");
+ }
+ }
+ cfree_obj_groupiter_free(it);
+ if (printed_header) driver_printf("\n");
+}
+
static void dump_symbols(CfreeObjFile* f, const ObjdumpOpts* opts) {
CfreeObjSymIter* it = NULL;
CfreeObjSymInfo sym;
@@ -412,6 +938,48 @@ static void dump_disasm(const CfreeDisasmContext* dctx, CfreeObjFile* f,
}
}
+/* `-f`: GNU objdump-style file header summary. Object files have no
+ * meaningful entry point so start address is always 0; PE images are
+ * handled separately by dump_pe_private. The flags line summarizes
+ * whether the input has symbols and relocations so it's clear at a
+ * glance whether further -t / -r work is going to be productive. */
+static void dump_file_header(CfreeObjFile* f, const char* label) {
+ CfreeTarget target = cfree_obj_target(f);
+ CfreeObjFmt fmt = cfree_obj_fmt(f);
+ CfreeObjSymIter* sit = NULL;
+ CfreeObjRelocIter* rit = NULL;
+ uint32_t nsec = cfree_obj_nsections(f);
+ uint32_t nsym = 0;
+ int has_relocs = 0;
+ unsigned flags = 0;
+
+ if (cfree_obj_symiter_new(f, &sit) == CFREE_OK) {
+ CfreeObjSymInfo s;
+ while (cfree_obj_symiter_next(sit, &s) == CFREE_ITER_ITEM) nsym++;
+ cfree_obj_symiter_free(sit);
+ }
+ if (cfree_obj_reliter_new(f, &rit) == CFREE_OK) {
+ CfreeObjReloc r;
+ if (cfree_obj_reliter_next(rit, &r) == CFREE_ITER_ITEM) has_relocs = 1;
+ cfree_obj_reliter_free(rit);
+ }
+ /* GNU objdump's flag bits: 0x01 HAS_RELOC, 0x10 HAS_SYMS, 0x40 D_PAGED.
+ * cfree's reader handles relocatable .o-shaped inputs only, so we
+ * never see EXEC_P here; D_PAGED is irrelevant. */
+ if (has_relocs) flags |= 0x0001u;
+ if (nsym) flags |= 0x0010u;
+
+ driver_printf("architecture: %s, flags 0x%08x:\n", arch_str(target.arch),
+ flags);
+ if (has_relocs) driver_printf("HAS_RELOC, ");
+ if (nsym) driver_printf("HAS_SYMS");
+ if (has_relocs || nsym) driver_printf("\n");
+ driver_printf("start address 0x%016llx\n", 0ull);
+ driver_printf("format: %s, sections: %u, symbols: %u\n\n",
+ fmt_str(fmt, target.ptr_size), nsec, nsym);
+ (void)label;
+}
+
static void dump_obj(const CfreeDisasmContext* dctx, const char* label,
CfreeObjFile* f, const ObjdumpOpts* opts) {
CfreeTarget target = cfree_obj_target(f);
@@ -420,7 +988,9 @@ static void dump_obj(const CfreeDisasmContext* dctx, const char* label,
driver_printf("%s:\tfile format %s-%s\n\n", label,
fmt_str(fmt, target.ptr_size), arch_str(target.arch));
+ if (opts->f) dump_file_header(f, label);
if (opts->h) dump_sections(f, opts);
+ if (opts->h) dump_groups(f, opts);
if (opts->t) dump_symbols(f, opts);
if (opts->s) dump_hex(f, opts);
if (opts->d || opts->D) dump_disasm(dctx, f, opts);
@@ -500,6 +1070,9 @@ static int parse_short_flags(const char* arg, ObjdumpOpts* o) {
case 's':
o->s = 1;
break;
+ case 'p':
+ o->p = 1;
+ break;
case 'x':
o->f = 1;
o->h = 1;
@@ -546,6 +1119,10 @@ static int parse_long_flag(const char* arg, ObjdumpOpts* o) {
o->t = 1;
return 1;
}
+ if (driver_streq(arg, "--private-headers")) {
+ o->p = 1;
+ return 1;
+ }
return 0;
}
@@ -596,7 +1173,8 @@ int driver_objdump(int argc, char** argv) {
}
}
- saw_op = opts.f || opts.h || opts.t || opts.d || opts.D || opts.r || opts.s;
+ saw_op = opts.f || opts.h || opts.t || opts.d || opts.D || opts.r || opts.s ||
+ opts.p;
if (!saw_op) { /* Default = -h -t (matches the prior behavior). */
opts.h = 1;
opts.t = 1;
@@ -644,11 +1222,44 @@ int driver_objdump(int argc, char** argv) {
case CFREE_BIN_MACHO:
case CFREE_BIN_WASM: {
CfreeObjFile* f = NULL;
+ /* PE executables aren't yet readable via cfree_obj_open (the
+ * obj reader is .obj-shaped only). For PE inputs we serve -f /
+ * -h / -p by walking the raw image bytes; -t / -d / -r / -s
+ * still need an ObjFile and are skipped with a soft error so
+ * the other ops don't get swallowed. */
if (cfree_obj_open(&ctx, &input, &f) != CFREE_OK) {
- driver_errf(OBJDUMP_TOOL, "failed to parse: %s", a);
- rc = 1;
+ if (bin == CFREE_BIN_PE) {
+ PeImage pe;
+ int parsed = pe_parse_image(input.data, input.len, &pe) && pe.valid;
+ int handled = 0;
+ if (parsed && opts.f) {
+ dump_pe_file_header(a, &pe);
+ handled = 1;
+ }
+ if (parsed && opts.h && pe.opt_magic == PE_OPT_HDR64_MAGIC) {
+ dump_pe_sections(a, &pe, input.data, input.len, &opts);
+ handled = 1;
+ }
+ if (opts.p) {
+ dump_pe_private(a, input.data, input.len);
+ handled = 1;
+ }
+ if (!handled) {
+ driver_errf(OBJDUMP_TOOL,
+ "%s: PE images support only -f / -h / -p; "
+ "use -p for image details",
+ a);
+ rc = 1;
+ }
+ } else {
+ driver_errf(OBJDUMP_TOOL, "failed to parse: %s", a);
+ rc = 1;
+ }
} else {
dump_obj(dctx_p, a, f, &opts);
+ if (opts.p && bin == CFREE_BIN_PE) {
+ dump_pe_private(a, input.data, input.len);
+ }
cfree_obj_free(f);
}
break;
diff --git a/driver/runtime.c b/driver/runtime.c
@@ -35,6 +35,19 @@ static const char* const kRtSrcX64[] = {
"coro/coro.c",
};
+static const char* const kRtSrcX64Windows[] = {
+ "int/int.c",
+ "int/si_div.c",
+ "fp/fp.c",
+ "atomic/atomic_freestanding.c",
+ "cache/clear_cache.c",
+ "cfree/ifunc_init.c",
+ "int64/int64.c",
+ "stack/chkstk_x86_64_win.c",
+ "coro/x86_64_win.c",
+ "coro/coro.c",
+};
+
static const char* const kRtSrcAarch64Linux[] = {
"int/int.c", "fp/fp.c",
"mem/mem.c", "atomic/atomic_freestanding.c",
@@ -56,6 +69,18 @@ static const char* const kRtSrcAarch64Darwin[] = {
"coro/aarch64_macho.s",
};
+static const char* const kRtSrcAarch64Windows[] = {
+ "int/int.c",
+ "int/si_div.c",
+ "fp/fp.c",
+ "atomic/atomic_freestanding.c",
+ "cache/clear_cache.c",
+ "cfree/ifunc_init.c",
+ "int64/int64.c",
+ "coro/aarch64.c",
+ "coro/coro.c",
+};
+
static const char* const kRtSrcRv64Linux[] = {
/* fp_tf and fp_ti are bundled with LDBL128 in the host rt
* Makefile; mirror that here. long double = double on rv64 per
@@ -80,12 +105,19 @@ static const RuntimeVariant kRtVariants[] = {
{"x86_64-apple-darwin", CFREE_ARCH_X86_64, CFREE_OS_MACOS, CFREE_OBJ_MACHO,
8, 8, "lib/include/lp64_le", 1, 0, kRtSrcX64,
(uint32_t)(sizeof(kRtSrcX64) / sizeof(kRtSrcX64[0]))},
+ {"x86_64-pc-windows", CFREE_ARCH_X86_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF,
+ 8, 8, "lib/include/llp64_le", 1, 0, kRtSrcX64Windows,
+ (uint32_t)(sizeof(kRtSrcX64Windows) / sizeof(kRtSrcX64Windows[0]))},
{"aarch64-linux", CFREE_ARCH_ARM_64, CFREE_OS_LINUX, CFREE_OBJ_ELF, 8, 8,
"lib/include/lp64_le", 1, 1, kRtSrcAarch64Linux,
(uint32_t)(sizeof(kRtSrcAarch64Linux) / sizeof(kRtSrcAarch64Linux[0]))},
{"aarch64-apple-darwin", CFREE_ARCH_ARM_64, CFREE_OS_MACOS, CFREE_OBJ_MACHO,
8, 8, "lib/include/lp64_le", 1, 0, kRtSrcAarch64Darwin,
(uint32_t)(sizeof(kRtSrcAarch64Darwin) / sizeof(kRtSrcAarch64Darwin[0]))},
+ {"aarch64-windows", CFREE_ARCH_ARM_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF, 8,
+ 8, "lib/include/llp64_le", 1, 0, kRtSrcAarch64Windows,
+ (uint32_t)(sizeof(kRtSrcAarch64Windows) /
+ sizeof(kRtSrcAarch64Windows[0]))},
/* rv64 long double = double per the locked decision (matches RV64
* musl/glibc default and avoids the binary128 soft-float tail). */
{"riscv64-linux", CFREE_ARCH_RV64, CFREE_OS_LINUX, CFREE_OBJ_ELF, 8, 8,
diff --git a/include/cfree/link.h b/include/cfree/link.h
@@ -151,10 +151,17 @@ typedef enum CfreeLinkOutputKind {
CFREE_LINK_OUTPUT_JIT,
} CfreeLinkOutputKind;
+typedef enum CfreePeSubsystem {
+ CFREE_PE_SUBSYSTEM_DEFAULT = 0,
+ CFREE_PE_SUBSYSTEM_WINDOWS_GUI = 2,
+ CFREE_PE_SUBSYSTEM_WINDOWS_CUI = 3,
+} CfreePeSubsystem;
+
typedef struct CfreeLinkSessionOptions {
uint8_t output_kind; /* CfreeLinkOutputKind */
bool gc_sections;
bool pie;
+ uint16_t pe_subsystem; /* CfreePeSubsystem; 0 => target default */
const char* interp_path;
const char* entry;
const CfreeLinkScript* linker_script;
diff --git a/include/cfree/object.h b/include/cfree/object.h
@@ -252,6 +252,24 @@ CfreeStatus cfree_obj_section_data(const CfreeObjFile *, CfreeObjSection idx,
CfreeStatus cfree_obj_section_by_name(const CfreeObjFile *, const char *name,
CfreeObjSection *out);
+/* Format-specific raw section attributes preserved by the reader.
+ *
+ * COFF : *raw_type_out = IMAGE_SECTION_HEADER.Characteristics
+ * *raw_flags_out = 0
+ * ELF : *raw_type_out = sh_type override (when the canonical SecSem
+ * mapping is lossy, e.g. SHT_LLVM_ADDRSIG); 0 otherwise
+ * *raw_flags_out = sh_flags bits not represented in SecFlag
+ * (e.g. SHF_EXCLUDE)
+ * Mach-O/Wasm : both zero (reader does not preserve a raw view here).
+ *
+ * Use when canonical SecFlag/SecKind isn't enough — e.g. objdump
+ * decoding `IMAGE_SCN_LNK_COMDAT` / `_MEM_DISCARDABLE` for diagnostic
+ * display. NULL output pointers are ignored. */
+CfreeStatus cfree_obj_section_format_flags(const CfreeObjFile *,
+ CfreeObjSection idx,
+ uint32_t *raw_type_out,
+ uint32_t *raw_flags_out);
+
CfreeStatus cfree_obj_symbol_by_name(const CfreeObjFile *, const char *name,
CfreeObjSymInfo *out);
diff --git a/lang/c/parse/parse.c b/lang/c/parse/parse.c
@@ -1217,8 +1217,7 @@ static void parse_external_decl(Parser* p) {
if (!parse_decl_specs(p, &specs)) {
perr(p, "expected declaration");
}
- if (specs.storage == DS_REGISTER ||
- (specs.storage == DS_AUTO && specs.storage_explicit)) {
+ if (specs.storage == DS_AUTO && specs.storage_explicit) {
perr(p, "invalid storage-class specifier at file scope");
}
@@ -1275,6 +1274,9 @@ static void parse_external_decl(Parser* p) {
attr_list_append(&fent->attrs, dattrs);
if (is_punct(&p->cur, '{')) {
+ int suppress_body_codegen =
+ specs.storage == DS_EXTERN &&
+ ((specs.flags | fn_decl_flags) & DF_INLINE);
if (fent->defined) perr(p, "redefinition of function");
fent->defined = 1;
fent->decl_state = DSTATE_FUNC_DEFINED;
@@ -1282,8 +1284,10 @@ static void parse_external_decl(Parser* p) {
const Type* saved_func_ret = p->cur_func_ret;
p->cur_func_name = name;
p->cur_func_ret = fn_ty->fn.ret;
+ if (suppress_body_codegen) pcg_codegen_suppress_push(p);
parse_function_body(p, fent->v.sym, fn_ty, abi, infos, nparams, loc,
fn_section_id, fn_decl_flags);
+ if (suppress_body_codegen) pcg_codegen_suppress_pop(p);
p->cur_func_name = saved_func_name;
p->cur_func_ret = saved_func_ret;
return;
@@ -1320,7 +1324,9 @@ static void parse_external_decl(Parser* p) {
/* Global object declaration. */
for (;;) {
int has_init = is_punct(&p->cur, '=');
- int is_pure_extern = (specs.storage == DS_EXTERN) && !has_init;
+ int is_pure_extern =
+ (specs.storage == DS_EXTERN || specs.storage == DS_REGISTER) &&
+ !has_init;
SymEntry* existing = scope_lookup_current(p, name);
ObjSymId sym = OBJ_SYM_NONE;
ObjSecId section_id = OBJ_SEC_NONE;
@@ -1457,6 +1463,9 @@ static void parse_translation_unit(Parser* p) {
parse_file_scope_asm(p);
continue;
}
+ if (accept_punct(p, ';')) {
+ continue;
+ }
parse_external_decl(p);
}
}
diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c
@@ -48,6 +48,11 @@ static const Type* ty_char16(Parser* p) {
static const Type* ty_char32(Parser* p) { return type_prim(p->pool, TY_UINT); }
+static const Type* ty_wchar(Parser* p) {
+ CfreeTarget target = cfree_compiler_target(p->c);
+ return target.os == CFREE_OS_WINDOWS ? ty_char16(p) : ty_int(p);
+}
+
static int pointer_pointees_compatible(Parser* p, const Type* lhs,
const Type* rhs) {
const Type* lp;
@@ -338,7 +343,7 @@ static CLitStringEnc literal_string_encoding(const Tok* t) {
}
const Type* string_literal_elem_type(Parser* p, const Tok* t) {
- if (t->flags & TF_STR_WIDE) return ty_int(p);
+ if (t->flags & TF_STR_WIDE) return ty_wchar(p);
if (t->flags & TF_STR_U16) return ty_char16(p);
if (t->flags & TF_STR_U32) return ty_char32(p);
return type_prim(p->pool, TY_CHAR);
@@ -1262,6 +1267,40 @@ static MemOrder parse_atomic_mem_order(Parser* p) {
* Builtin call handling
* ============================================================ */
+static int offsetof_find_member(Parser* p, const Type* rec_ty, Sym mname,
+ const Type** out_ty, u32* out_off) {
+ const ABIRecordLayout* L;
+ rec_ty = type_unqual(p->pool, rec_ty);
+ if (!rec_ty || (rec_ty->kind != TY_STRUCT && rec_ty->kind != TY_UNION))
+ return 0;
+ L = c_abi_record_layout(p->abi, p->pool, rec_ty);
+ if (!L) return 0;
+ for (u16 i = 0; i < rec_ty->rec.nfields; ++i) {
+ const Field* f = &rec_ty->rec.fields[i];
+ if (f->name == mname && mname != 0) {
+ *out_ty = f->type;
+ *out_off = L->fields[i].offset;
+ return 1;
+ }
+ }
+ for (u16 i = 0; i < rec_ty->rec.nfields; ++i) {
+ const Field* f = &rec_ty->rec.fields[i];
+ const Type* fty = type_unqual(p->pool, f->type);
+ const Type* nested_ty = NULL;
+ u32 nested_off = 0;
+ if (!((f->flags & FIELD_ANON) &&
+ (fty->kind == TY_STRUCT || fty->kind == TY_UNION))) {
+ continue;
+ }
+ if (offsetof_find_member(p, fty, mname, &nested_ty, &nested_off)) {
+ *out_ty = nested_ty;
+ *out_off = L->fields[i].offset + nested_off;
+ return 1;
+ }
+ }
+ return 0;
+}
+
static const Type* offsetof_designator(Parser* p, const Type* base, u32* off) {
const Type* cur = base;
if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
@@ -1272,24 +1311,8 @@ static const Type* offsetof_designator(Parser* p, const Type* base, u32* off) {
Sym mname = p->cur.v.ident;
const Type* mty = NULL;
u32 moff = 0;
- const Field* mf = NULL;
- /* find_field is static in parse_type.c; we need it here.
- * We call c_abi_record_layout directly inline. */
- const ABIRecordLayout* L = c_abi_record_layout(p->abi, p->pool, cur);
- if (!L) perr(p, "no such member in __builtin_offsetof");
- int found = 0;
- for (u16 i = 0; i < cur->rec.nfields; ++i) {
- const Field* f = &cur->rec.fields[i];
- if (f->name == mname && mname != 0) {
- mty = f->type;
- moff = L->fields[i].offset;
- mf = f;
- found = 1;
- break;
- }
- }
- (void)mf;
- if (!found) perr(p, "no such member in __builtin_offsetof");
+ if (!offsetof_find_member(p, cur, mname, &mty, &moff))
+ perr(p, "no such member in __builtin_offsetof");
advance(p);
*off += moff;
cur = mty;
@@ -1550,6 +1573,44 @@ static int parse_builtin_fabs_call(Parser* p, Sym name, SrcLoc loc) {
return 1;
}
+static int parse_builtin_abs_call(Parser* p, Sym name, SrcLoc loc) {
+ size_t nlen = 0;
+ const char* nm = pool_str(p->pool, name, &nlen);
+ const char* libname = NULL;
+ const Type* int_ty = NULL;
+ const Type* params[1];
+ const Type* fn_ty;
+ CfreeCgSym sym;
+
+ if (nm && nlen == 13u && memcmp(nm, "__builtin_abs", 13u) == 0) {
+ libname = "abs";
+ int_ty = type_prim(p->pool, TY_INT);
+ } else if (nm && nlen == 14u && memcmp(nm, "__builtin_labs", 14u) == 0) {
+ libname = "labs";
+ int_ty = type_prim(p->pool, TY_LONG);
+ } else if (nm && nlen == 15u && memcmp(nm, "__builtin_llabs", 15u) == 0) {
+ libname = "llabs";
+ int_ty = type_prim(p->pool, TY_LLONG);
+ } else {
+ return 0;
+ }
+
+ advance(p); /* IDENT */
+ expect_punct(p, '(', "'(' after abs builtin");
+ parse_assign_expr(p);
+ to_rvalue(p);
+ coerce_top_to_type(p, int_ty);
+ expect_punct(p, ')', "')' after abs builtin");
+
+ params[0] = int_ty;
+ fn_ty = type_func(p->pool, int_ty, params, 1, 0);
+ sym = pcg_emit_enabled(p) ? builtin_libcall_sym(p, libname, fn_ty)
+ : CFREE_CG_SYM_NONE;
+ cg_set_loc(p->cg, loc);
+ pcg_call_symbol(p, sym, 1, fn_ty);
+ return 1;
+}
+
static int try_parse_builtin_call(Parser* p) {
Sym name = p->cur.v.ident;
SrcLoc loc = p->cur.loc;
@@ -1563,6 +1624,7 @@ static int try_parse_builtin_call(Parser* p) {
if (parse_builtin_isnan_call(p, name, loc)) return 1;
if (parse_builtin_inf_call(p, name, loc)) return 1;
if (parse_builtin_fabs_call(p, name, loc)) return 1;
+ if (parse_builtin_abs_call(p, name, loc)) return 1;
if (parse_builtin_clear_cache_call(p, name, loc)) return 1;
if (name != p->sym_b_alloca && name != p->sym_b_ctz &&
diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h
@@ -242,6 +242,13 @@ typedef struct Parser {
Sym sym_b_expect;
Sym sym_b_offsetof;
Sym sym_b_va_list;
+ /* Cached singleton for __builtin_va_list — built lazily on first
+ * mention so every occurrence resolves to the same Type* (and the
+ * same TagId where applicable). Without the cache, c_abi_va_list_type
+ * mints a fresh struct type per occurrence and headers that pass
+ * locally-declared __builtin_va_list values to functions taking
+ * va_list (e.g. mingw's sec_api/stdio_s.h) fail type-equality. */
+ const Type* type_b_va_list;
Sym sym_b_va_start;
Sym sym_b_va_arg;
Sym sym_b_va_end;
diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c
@@ -447,7 +447,7 @@ void validate_decl_type_constraints(Parser* p, const DeclSpecs* specs,
if (specs->flags & DF_THREAD)
perr(p, "_Thread_local is invalid for struct member");
}
- if (u->kind == TY_VOID && !is_function) {
+ if (u->kind == TY_VOID && !is_function && specs->storage != DS_TYPEDEF) {
perr(p, "object may not have void type");
}
if ((specs->flags & DF_INLINE) && !is_function) {
@@ -771,7 +771,9 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) {
} else if (!acc.saw_explicit_type && !tagged_ty && t.kind == TOK_IDENT &&
ident_kw(p, t.v.ident) == KW_NONE) {
if (t.v.ident == p->sym_b_va_list) {
- tagged_ty = c_abi_va_list_type(p->abi, p->pool);
+ if (!p->type_b_va_list)
+ p->type_b_va_list = c_abi_va_list_type(p->abi, p->pool);
+ tagged_ty = p->type_b_va_list;
acc.saw_explicit_type = 1;
advance(p);
seen = 1;
diff --git a/lang/c/pp/pp.c b/lang/c/pp/pp.c
@@ -315,6 +315,10 @@ static void compute_date_time(Pp* pp) {
}
static void pp_register_static_predefined(Pp* pp) {
+ pp_define(pp, "__cfree__", "1");
+ pp_define(pp, "__cfree_major__", "0");
+ pp_define(pp, "__cfree_minor__", "0");
+ pp_define(pp, "__cfree_patchlevel__", "0");
pp_define(pp, "__STDC__", "1");
pp_define(pp, "__STDC_HOSTED__", "0");
pp_define(pp, "__STDC_VERSION__", "201112L");
@@ -332,15 +336,17 @@ static void pp_register_static_predefined(Pp* pp) {
/* Target-dependent predefined macros consumed by rt/include/stddef.h and
* rt/include/stdint.h. The set mirrors the subset of GCC/Clang's __*_TYPE__
* / __*_MAX__ namespace that those headers reference. We split only on
- * pointer width: ptr_size == 8 picks the LP64 model (every 64-bit target
- * cfree supports), ptr_size == 4 picks ILP32. LLP64 (Windows x86-64) is
- * not yet a supported target, so `long` always tracks pointer width here. */
+ * pointer width plus the target data model: LP64 for Unix-like 64-bit targets,
+ * LLP64 for 64-bit Windows, and ILP32 for 32-bit targets. */
static void pp_register_target_predefined(Pp* pp) {
CfreeTarget target = cfree_compiler_target(pp->c);
const CfreePredefinedMacro* arch_defs = NULL;
uint32_t narch_defs = cfree_compiler_arch_predefines(pp->c, &arch_defs);
uint32_t i;
- int lp64 = (target.ptr_size == 8);
+ int ptr64 = (target.ptr_size == 8);
+ int win = (target.os == CFREE_OS_WINDOWS);
+ int lp64 = ptr64 && !win;
+ int wchar16 = win;
for (i = 0; i < narch_defs; ++i) {
pp_define(pp, arch_defs[i].name, arch_defs[i].body);
@@ -360,19 +366,140 @@ static void pp_register_target_predefined(Pp* pp) {
pp_define(pp, "__SIZEOF_INT__", "4");
pp_define(pp, "__SIZEOF_LONG__", lp64 ? "8" : "4");
pp_define(pp, "__SIZEOF_LONG_LONG__", "8");
- pp_define(pp, "__SIZEOF_POINTER__", lp64 ? "8" : "4");
- pp_define(pp, "__SIZEOF_SIZE_T__", lp64 ? "8" : "4");
- pp_define(pp, "__SIZEOF_PTRDIFF_T__", lp64 ? "8" : "4");
- pp_define(pp, "__SIZEOF_WCHAR_T__", "4");
+ pp_define(pp, "__SIZEOF_POINTER__", ptr64 ? "8" : "4");
+ pp_define(pp, "__SIZEOF_SIZE_T__", ptr64 ? "8" : "4");
+ pp_define(pp, "__SIZEOF_PTRDIFF_T__", ptr64 ? "8" : "4");
+ pp_define(pp, "__SIZEOF_WCHAR_T__", wchar16 ? "2" : "4");
pp_define(pp, "__SIZEOF_WINT_T__", "4");
pp_define(pp, "__SIZEOF_FLOAT__", "4");
pp_define(pp, "__SIZEOF_DOUBLE__", "8");
pp_define(pp, "__SIZEOF_LONG_DOUBLE__", "8");
+ /* Windows / mingw predefined macros. cfree targets the mingw
+ * flavor (DWARF debug info, mingwex CRT) rather than MSVC, so we
+ * advertise __MINGW{32,64}__ and friends but never set _MSC_VER.
+ * Both _WIN32 and the legacy unprefixed WIN32 are defined; _WIN64
+ * is set on 64-bit targets only. The MSVC-compat machine macros
+ * (_M_X64 / _M_AMD64 / _M_ARM64) are useful for headers that gate
+ * on them but harmless to set everywhere — mingw's own headers
+ * tolerate them. */
+ if (target.os == CFREE_OS_WINDOWS) {
+ pp_define(pp, "_WIN32", "1");
+ pp_define(pp, "WIN32", "1");
+ pp_define(pp, "__MINGW32__", "1");
+ if (target.ptr_size == 8) {
+ pp_define(pp, "_WIN64", "1");
+ pp_define(pp, "__MINGW64__", "1");
+ }
+ if (target.arch == CFREE_ARCH_X86_64) {
+ pp_define(pp, "_M_X64", "100");
+ pp_define(pp, "_M_AMD64", "100");
+ } else if (target.arch == CFREE_ARCH_ARM_64) {
+ pp_define(pp, "_M_ARM64", "1");
+ }
+ /* mingw's <vadefs.h> / many CRT headers gate __builtin_va_list /
+ * __gnuc_va_list on __GNUC__. cfree implements the va_* builtins
+ * and __builtin_va_list with the GCC contract, so impersonating a
+ * conservative GCC vintage lets the mingw header tree compile.
+ * We pick 4.0 — old enough that no header expects GCC-specific
+ * extensions cfree doesn't implement (e.g. transactional memory,
+ * GIMPLE plugins), but new enough to clear every __GNUC__ >= N
+ * gate we've seen in practice. */
+ pp_define(pp, "__GNUC__", "4");
+ pp_define(pp, "__GNUC_MINOR__", "0");
+ pp_define(pp, "__GNUC_PATCHLEVEL__", "0");
+ /* __has_builtin / __has_attribute / __has_include_next: clang/GCC
+ * preprocessor extensions. mingw's _mingw.h gates inline-asm
+ * intrinsic definitions on whether the compiler claims to have
+ * them as builtins (e.g. __debugbreak, __fastfail, __prefetch).
+ * cfree doesn't model individual builtin lookups; claim "yes"
+ * uniformly so mingw skips its inline-asm fallbacks (which use
+ * intel/{$}-form asm syntax cfree's parser doesn't accept). */
+ pp_define(pp, "__has_builtin(x)", "1");
+ pp_define(pp, "__has_feature(x)", "0");
+ pp_define(pp, "__has_attribute(x)", "0");
+ /* MSVC fixed-width integer types. mingw's corecrt.h uses these
+ * directly (e.g. `typedef unsigned __int64 size_t;`). Map to the
+ * C standard equivalents. */
+ pp_define(pp, "__int8", "char");
+ pp_define(pp, "__int16", "short");
+ pp_define(pp, "__int32", "int");
+ pp_define(pp, "__int64", "long long");
+ /* mingw's psdk_inc/intrin-impl.h emits an inline implementation
+ * for every MSVC intrinsic (_lrotl, _BitScanForward, ...) and
+ * gates them with __INTRINSIC_PROLOG, which uses ## to paste the
+ * intrinsic's name into a `defined(__INTRINSIC_DEFINED_<name>)`
+ * test. Once an intrinsic gets defined, a later re-invocation of
+ * the same gate macro hits a cfree pp bug where a *defined*
+ * symbol referenced inside `defined()` gets expanded before the
+ * `defined` operator captures it. Predefining
+ * __INTRINSIC_ONLYSPECIAL flips the gate's second clause so
+ * none of the inline intrinsics are emitted (mingw expects this
+ * idiom for non-special builds; the linker pulls them from
+ * libmingwex/libmsvcrt instead). This sidesteps the pp bug
+ * entirely. */
+ pp_define(pp, "__INTRINSIC_ONLYSPECIAL", "1");
+ /* __declspec(...) is the MSVC syntax for attributes. mingw uses
+ * it in headers for dllimport/dllexport, alignment, noreturn,
+ * etc. cfree's COFF linker routes externs through the IAT
+ * regardless of the dllimport hint and doesn't yet model
+ * dllexport via this attribute — so we erase it as a no-op
+ * macro. (Note: this is at the preprocessor layer; the parser
+ * still needs to handle the syntax if/when the macro is removed.)
+ */
+ pp_define(pp, "__declspec(x)", "");
+ /* GNU `__extension__` is a pedantic-quiet wrapper around
+ * non-standard constructs (statement exprs, anonymous structs).
+ * cfree's parser is permissive about those already; the keyword
+ * has no effect on parsing, so we erase it. */
+ pp_define(pp, "__extension__", "");
+ /* __restrict / __restrict__: GCC-flavored alternates to the C99
+ * `restrict` keyword. cfree parses `restrict` already; map the
+ * GCC spellings onto it. */
+ pp_define(pp, "__restrict", "restrict");
+ pp_define(pp, "__restrict__", "restrict");
+ pp_define(pp, "__volatile__", "volatile");
+ pp_define(pp, "__const__", "const");
+ pp_define(pp, "__signed__", "signed");
+ /* MSVC calling-convention attributes. On x86_64 they're no-ops
+ * (every function uses the Win64 ABI) and on ARM64 likewise; on
+ * i386 they actually mean something but cfree doesn't target it.
+ * Defining them as empty macros lets mingw headers that say
+ * `void __cdecl foo(void)` parse correctly. Same posture mingw's
+ * own GCC takes: __MINGW_USYMBOL((__cdecl__)). */
+ /* MSVC calling-convention attributes — no-ops on Win64. cfree
+ * pre-defines them empty *only when* mingw's headers don't
+ * themselves redefine them; we use the __MINGW_<x>_REDEFINE form
+ * via `#undef` first to play nicely with mingw's own
+ * redefinitions (mingw's _mingw.h does `#define __cdecl
+ * __attribute__((__cdecl__))` further down). Setting them empty
+ * here is safe because cfree's parser will see the redefinition
+ * before any header uses them. */
+ pp_define(pp, "__cdecl", "");
+ pp_define(pp, "__stdcall", "");
+ pp_define(pp, "__fastcall", "");
+ pp_define(pp, "__thiscall", "");
+ pp_define(pp, "__vectorcall", "");
+ pp_define(pp, "_cdecl", "");
+ pp_define(pp, "_stdcall", "");
+ pp_define(pp, "_fastcall", "");
+ /* __forceinline / __inline / __w64: mingw's _mingw.h redefines
+ * them itself when __GNUC__ is set, so we leave them alone here
+ * to avoid a redefinition-with-different-replacement error. */
+ }
+
/* stddef.h base aliases */
- pp_define(pp, "__SIZE_TYPE__", lp64 ? "unsigned long" : "unsigned int");
- pp_define(pp, "__PTRDIFF_TYPE__", lp64 ? "long" : "int");
- pp_define(pp, "__WCHAR_TYPE__", "int");
+ if (lp64) {
+ pp_define(pp, "__SIZE_TYPE__", "unsigned long");
+ pp_define(pp, "__PTRDIFF_TYPE__", "long");
+ } else if (ptr64) {
+ pp_define(pp, "__SIZE_TYPE__", "unsigned long long");
+ pp_define(pp, "__PTRDIFF_TYPE__", "long long");
+ } else {
+ pp_define(pp, "__SIZE_TYPE__", "unsigned int");
+ pp_define(pp, "__PTRDIFF_TYPE__", "int");
+ }
+ pp_define(pp, "__WCHAR_TYPE__", wchar16 ? "unsigned short" : "int");
pp_define(pp, "__CHAR16_TYPE__", "unsigned short");
pp_define(pp, "__CHAR32_TYPE__", "unsigned int");
@@ -404,19 +531,28 @@ static void pp_register_target_predefined(Pp* pp) {
pp_define(pp, "__UINT_FAST8_TYPE__", "unsigned char");
pp_define(pp, "__INT_FAST8_MAX__", "127");
pp_define(pp, "__UINT_FAST8_MAX__", "255");
- if (lp64) {
- pp_define(pp, "__INT_FAST16_TYPE__", "long");
- pp_define(pp, "__INT_FAST32_TYPE__", "long");
- pp_define(pp, "__INT_FAST64_TYPE__", "long");
- pp_define(pp, "__UINT_FAST16_TYPE__", "unsigned long");
- pp_define(pp, "__UINT_FAST32_TYPE__", "unsigned long");
- pp_define(pp, "__UINT_FAST64_TYPE__", "unsigned long");
- pp_define(pp, "__INT_FAST16_MAX__", "9223372036854775807L");
- pp_define(pp, "__INT_FAST32_MAX__", "9223372036854775807L");
- pp_define(pp, "__INT_FAST64_MAX__", "9223372036854775807L");
- pp_define(pp, "__UINT_FAST16_MAX__", "18446744073709551615UL");
- pp_define(pp, "__UINT_FAST32_MAX__", "18446744073709551615UL");
- pp_define(pp, "__UINT_FAST64_MAX__", "18446744073709551615UL");
+ if (ptr64) {
+ pp_define(pp, "__INT_FAST16_TYPE__", lp64 ? "long" : "long long");
+ pp_define(pp, "__INT_FAST32_TYPE__", lp64 ? "long" : "long long");
+ pp_define(pp, "__INT_FAST64_TYPE__", lp64 ? "long" : "long long");
+ pp_define(pp, "__UINT_FAST16_TYPE__",
+ lp64 ? "unsigned long" : "unsigned long long");
+ pp_define(pp, "__UINT_FAST32_TYPE__",
+ lp64 ? "unsigned long" : "unsigned long long");
+ pp_define(pp, "__UINT_FAST64_TYPE__",
+ lp64 ? "unsigned long" : "unsigned long long");
+ pp_define(pp, "__INT_FAST16_MAX__",
+ lp64 ? "9223372036854775807L" : "9223372036854775807LL");
+ pp_define(pp, "__INT_FAST32_MAX__",
+ lp64 ? "9223372036854775807L" : "9223372036854775807LL");
+ pp_define(pp, "__INT_FAST64_MAX__",
+ lp64 ? "9223372036854775807L" : "9223372036854775807LL");
+ pp_define(pp, "__UINT_FAST16_MAX__",
+ lp64 ? "18446744073709551615UL" : "18446744073709551615ULL");
+ pp_define(pp, "__UINT_FAST32_MAX__",
+ lp64 ? "18446744073709551615UL" : "18446744073709551615ULL");
+ pp_define(pp, "__UINT_FAST64_MAX__",
+ lp64 ? "18446744073709551615UL" : "18446744073709551615ULL");
} else {
pp_define(pp, "__INT_FAST16_TYPE__", "int");
pp_define(pp, "__INT_FAST32_TYPE__", "int");
@@ -441,6 +577,14 @@ static void pp_register_target_predefined(Pp* pp) {
pp_define(pp, "__UINTPTR_MAX__", "18446744073709551615UL");
pp_define(pp, "__PTRDIFF_MAX__", "9223372036854775807L");
pp_define(pp, "__SIZE_MAX__", "18446744073709551615UL");
+ } else if (ptr64) {
+ pp_define(pp, "__LONG_MAX__", "2147483647L");
+ pp_define(pp, "__INTPTR_TYPE__", "long long");
+ pp_define(pp, "__UINTPTR_TYPE__", "unsigned long long");
+ pp_define(pp, "__INTPTR_MAX__", "9223372036854775807LL");
+ pp_define(pp, "__UINTPTR_MAX__", "18446744073709551615ULL");
+ pp_define(pp, "__PTRDIFF_MAX__", "9223372036854775807LL");
+ pp_define(pp, "__SIZE_MAX__", "18446744073709551615ULL");
} else {
pp_define(pp, "__LONG_MAX__", "2147483647L");
pp_define(pp, "__INTPTR_TYPE__", "int");
@@ -472,9 +616,8 @@ static void pp_register_target_predefined(Pp* pp) {
pp_define(pp, "__UINTMAX_C(c)", "c ## ULL");
}
- /* wchar_t / wint_t / sig_atomic_t are all `int` in cfree's model */
- pp_define(pp, "__WCHAR_MAX__", "2147483647");
- pp_define(pp, "__WCHAR_MIN__", "(-__WCHAR_MAX__ - 1)");
+ pp_define(pp, "__WCHAR_MAX__", wchar16 ? "65535" : "2147483647");
+ pp_define(pp, "__WCHAR_MIN__", wchar16 ? "0" : "(-__WCHAR_MAX__ - 1)");
pp_define(pp, "__WINT_MAX__", "2147483647");
pp_define(pp, "__WINT_MIN__", "(-__WINT_MAX__ - 1)");
pp_define(pp, "__SIG_ATOMIC_MAX__", "2147483647");
diff --git a/lang/c/pp/pp_directive.c b/lang/c/pp/pp_directive.c
@@ -130,13 +130,27 @@ static void prepass_defined(Pp* pp, const Tok* in, u32 nin, TokVec* out) {
}
}
-/* Macro-expand a sequence of pre-#if tokens to completion. */
+/* Macro-expand a sequence of pre-#if tokens to completion.
+ *
+ * Sets pp->in_if_expansion for the duration so pp_next_raw can keep
+ * `defined`-operator operands raw even when they ride out of a macro
+ * body via the ## operator. Without this flag a macro body like
+ * #define G(x) (!defined(__G_DEFINED_ ## x))
+ * would have the pasted operand expanded if it happens to name an
+ * already-defined macro, leaving the second prepass to choke on
+ * `defined()`. */
static void expand_for_if(Pp* pp, const Tok* in, u32 nin, TokVec* out) {
Tok* slice;
+ u8 saved;
if (nin == 0) return;
slice = arena_array(pp->arena, Tok, nin);
memcpy(slice, in, sizeof(Tok) * nin);
+ saved = pp->in_if_expansion;
+ pp->in_if_expansion = 1;
+ pp->defined_skip = 0;
expand_arg_to_eof(pp, slice, NULL, nin, out);
+ pp->in_if_expansion = saved;
+ pp->defined_skip = 0;
}
/* Replace remaining identifiers with `0` per §6.10.1 ¶4, after `defined`
diff --git a/lang/c/pp/pp_expand.c b/lang/c/pp/pp_expand.c
@@ -933,6 +933,40 @@ Tok pp_next_raw(Pp* pp) {
* iteration picks up. */
continue;
}
+ /* While expanding an #if condition, suppress macro expansion of
+ * `defined`-operator operands so a `defined(X)` produced by a
+ * macro body whose argument was pasted via ## doesn't accidentally
+ * expand an already-defined X to its body (typically empty). See
+ * the `defined_skip` field comment in pp_priv.h. */
+ if (pp->in_if_expansion) {
+ if (pp->defined_skip == 1 && t.kind == TOK_IDENT) {
+ t.flags |= TF_NO_EXPAND;
+ pp->defined_skip = 0;
+ } else if (pp->defined_skip == 2) {
+ if (t.kind == TOK_PUNCT && t.v.punct == '(') {
+ pp->defined_skip = 3;
+ } else if (t.kind == TOK_IDENT) {
+ /* `defined IDENT` (no parens) — same as the skip==1 case. */
+ t.flags |= TF_NO_EXPAND;
+ pp->defined_skip = 0;
+ } else {
+ pp->defined_skip = 0;
+ }
+ } else if (pp->defined_skip == 3) {
+ if (t.kind == TOK_IDENT) {
+ t.flags |= TF_NO_EXPAND;
+ pp->defined_skip = 4;
+ } else if (t.kind == TOK_PUNCT && t.v.punct == ')') {
+ pp->defined_skip = 0;
+ }
+ } else if (pp->defined_skip == 4) {
+ if (t.kind == TOK_PUNCT && t.v.punct == ')') {
+ pp->defined_skip = 0;
+ }
+ } else if (t.kind == TOK_IDENT && t.v.ident == pp->sym_defined) {
+ pp->defined_skip = 2;
+ }
+ }
if (t.kind == TOK_IDENT && (t.flags & TF_NO_EXPAND) == 0) {
Sym id = t.v.ident;
diff --git a/lang/c/pp/pp_priv.h b/lang/c/pp/pp_priv.h
@@ -162,6 +162,29 @@ struct Pp {
* time(NULL) if unset). */
Sym val_date_str;
Sym val_time_str;
+
+ /* Defined-operator handling during #if expansion.
+ *
+ * The first prepass in eval_if_expr replaces `defined X` / `defined
+ * (X)` literally found in the directive line, but `defined()` can
+ * also come from macro bodies (mingw's intrin-impl.h uses
+ * `defined(__INTRINSIC_DEFINED_ ## name)` inside a #define). When
+ * the expander processes such a body, the identifier inside
+ * `defined(...)` must NOT be macro-expanded — otherwise an empty
+ * macro X would turn `defined(X)` into `defined()` and the
+ * post-expansion prepass would reject it.
+ *
+ * This pair of fields tracks the state across `pp_next_raw` calls
+ * within `expand_for_if`:
+ * in_if_expansion: 1 inside an #if's expand_arg_to_eof call
+ * defined_skip: 0 normally; 1 after emitting `defined`
+ * (consume one IDENT before clearing); 2 after
+ * emitting `defined (` (waiting for IDENT then
+ * `)`).
+ * The expander uses these to mark the operand IDENT TF_NO_EXPAND
+ * before the macro-expansion check at the head of pp_next_raw. */
+ u8 in_if_expansion;
+ u8 defined_skip;
};
/* ============================================================
diff --git a/lang/c/type/type.c b/lang/c/type/type.c
@@ -502,6 +502,8 @@ static CfreeCgTypeId type_cg_builtin(CfreeCompiler* c, TypeKind kind) {
return b.id[CFREE_CG_BUILTIN_I32];
case TY_LONG:
case TY_ULONG:
+ if (target.os == CFREE_OS_WINDOWS) return b.id[CFREE_CG_BUILTIN_I32];
+ return b.id[CFREE_CG_BUILTIN_I64];
case TY_LLONG:
case TY_ULLONG:
return b.id[CFREE_CG_BUILTIN_I64];
diff --git a/rt/Makefile b/rt/Makefile
@@ -17,6 +17,7 @@ RT_VARIANTS = \
riscv64-linux \
riscv64-elf \
riscv64-elf-save-restore \
+ aarch64-windows \
x86_64-pc-windows \
i386-linux \
wasm32 \
@@ -71,6 +72,12 @@ RT_aarch64-apple-darwin_INT128 = 1
RT_aarch64-apple-darwin_CORO = aarch64
RT_EXTRA_SRCS_aarch64-apple-darwin = rt/lib/coro/aarch64_macho.s
+RT_aarch64-windows_TARGET = aarch64-w64-windows-gnu
+RT_aarch64-windows_ABI = llp64
+RT_aarch64-windows_INT128 = 1
+RT_aarch64-windows_CORO = aarch64
+RT_aarch64-windows_HOSTED = 1
+
RT_riscv64-linux_TARGET = riscv64-linux-gnu
RT_riscv64-linux_ABI = lp64
RT_riscv64-linux_INT128 = 1
@@ -96,6 +103,7 @@ RT_x86_64-pc-windows_TARGET = x86_64-pc-windows-msvc
RT_x86_64-pc-windows_ABI = llp64
RT_x86_64-pc-windows_INT128 = 1
RT_x86_64-pc-windows_CORO = x86_64_win
+RT_x86_64-pc-windows_HOSTED = 1
RT_i386-linux_TARGET = i386-linux-gnu
RT_i386-linux_ABI = ilp32
@@ -146,6 +154,14 @@ RT_BASE_SRCS = \
rt/lib/cache/clear_cache.c \
rt/lib/cfree/ifunc_init.c
+RT_COMPILER_SRCS = \
+ rt/lib/int/int.c \
+ rt/lib/int/si_div.c \
+ rt/lib/fp/fp.c \
+ rt/lib/atomic/atomic_freestanding.c \
+ rt/lib/cache/clear_cache.c \
+ rt/lib/cfree/ifunc_init.c
+
RT_ABI_SRCS_lp64 = rt/lib/int64/int64.c
RT_ABI_SRCS_llp64 = rt/lib/int64/int64.c
RT_ABI_SRCS_ilp32 = rt/lib/int32/int32.c
@@ -177,7 +193,7 @@ RT_AEABI_FLAGS_thumb1 = -march=armv6-m -mthumb -mfloat-abi=soft
define RT_VARIANT_template
RT_SRCS_$(1) := \
- $$(RT_BASE_SRCS) \
+ $$(if $$(RT_$(1)_HOSTED),$$(RT_COMPILER_SRCS),$$(RT_BASE_SRCS)) \
$$(RT_ABI_SRCS_$$(RT_$(1)_ABI)) \
$$(RT_CORO_SRCS_$$(RT_$(1)_CORO)) \
$$(if $$(RT_$(1)_LDBL128),$$(RT_LDBL128_SRCS)) \
diff --git a/rt/include/emmintrin.h b/rt/include/emmintrin.h
@@ -0,0 +1,3 @@
+#pragma once
+
+/* See x86intrin.h in this directory. */
diff --git a/rt/include/mm_malloc.h b/rt/include/mm_malloc.h
@@ -0,0 +1,5 @@
+#pragma once
+
+/* Clang's x86 <malloc.h> companion declares aligned allocation helpers.
+ * llvm-mingw's CRT headers include it for x64; cfree uses the CRT prototypes
+ * from mingw headers and does not need Clang's intrinsic companion here. */
diff --git a/rt/include/x86intrin.h b/rt/include/x86intrin.h
@@ -0,0 +1,12 @@
+#pragma once
+
+/* cfree does not implement Clang/GCC x86 vector intrinsic headers yet.
+ * llvm-mingw's <windows.h> includes this header while declaring WinNT
+ * processor helpers. The scalar helper declarations themselves come from
+ * mingw's psdk_inc/intrin-impl.h; this shim only prevents pulling in Clang's
+ * vector intrinsic header tree. */
+
+void __stosb(unsigned char*, unsigned char, unsigned long long);
+unsigned long long __readgsqword(unsigned long);
+#define __INTRINSIC_DEFINED___stosb
+#define __INTRINSIC_DEFINED___readgsqword
diff --git a/rt/lib/impl/fp_compare_impl.inc b/rt/lib/impl/fp_compare_impl.inc
@@ -32,7 +32,7 @@ typedef char CMP_RESULT;
typedef long CMP_RESULT;
#endif
-#if !defined(__clang__) && defined(__GNUC__)
+#if !defined(__clang__) && defined(__GNUC__) && !defined(__cfree__)
// GCC uses a special __libgcc_cmp_return__ mode to define the return type, so
// check that we are ABI-compatible when compiling the builtins with GCC.
typedef int GCC_CMP_RESULT __attribute__((__mode__(__libgcc_cmp_return__)));
diff --git a/rt/lib/stack/chkstk_x86_64_win.c b/rt/lib/stack/chkstk_x86_64_win.c
@@ -0,0 +1,32 @@
+/*
+ * Win64 stack probes.
+ *
+ * x64 callers pass the pending frame allocation size in rax, call the probe,
+ * then subtract rax from rsp. The probe touches each intervening page and
+ * returns with rax preserved.
+ */
+
+__asm__(
+ ".text\n"
+ ".globl __chkstk\n"
+ "__chkstk:\n"
+ ".globl ___chkstk_ms\n"
+ "___chkstk_ms:\n"
+ " movq %rsp, %r10\n"
+ " addq $8, %r10\n"
+ " movq %rax, %r11\n"
+ " cmpq $4096, %r11\n"
+ " jb __cfree_chkstk_last\n"
+ "__cfree_chkstk_loop:\n"
+ " subq $4096, %r10\n"
+ " movq (%r10), %rcx\n"
+ " subq $4096, %r11\n"
+ " cmpq $4096, %r11\n"
+ " jae __cfree_chkstk_loop\n"
+ " testq %r11, %r11\n"
+ " je __cfree_chkstk_done\n"
+ "__cfree_chkstk_last:\n"
+ " subq %r11, %r10\n"
+ " movq (%r10), %rcx\n"
+ "__cfree_chkstk_done:\n"
+ " ret\n");
diff --git a/src/abi/abi_aapcs64_windows.c b/src/abi/abi_aapcs64_windows.c
@@ -0,0 +1,67 @@
+/* Windows-on-ARM64 ABI dispatch.
+ *
+ * Vtable selection keys on (target.arch, target.os); (ARM_64, WINDOWS)
+ * lands here instead of AAPCS64. The two ABIs diverge in:
+ *
+ * 1. va_list shape — Windows-ARM64 `__builtin_va_list` is plain
+ * `void*`; AAPCS64 is a five-field struct.
+ *
+ * 2. long double — 64-bit double on Windows-ARM64 (AAPCS64: 128-bit).
+ * Assumed lowered by the front end before classification.
+ *
+ * Variadics still use registers (NOT all-on-stack like Apple ARM64), but
+ * FP arguments to variadic functions are routed through integer slots so
+ * `va_list` can remain a plain pointer.
+ *
+ * Classification starts from aapcs64_compute_func_info, then adjusts FP
+ * parameter parts for variadic functions. */
+
+#include <string.h>
+
+#include "abi/abi_internal.h"
+#include "core/core.h"
+
+#include "core/arena.h"
+
+extern ABIFuncInfo* aapcs64_compute_func_info(TargetABI*, CfreeCgTypeId);
+
+static void remap_fp_parts_to_int(TargetABI* a, ABIArgInfo* ai) {
+ if (!ai || ai->kind != ABI_ARG_DIRECT || ai->nparts == 0) return;
+ int needs_copy = 0;
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ if (ai->parts[i].cls == ABI_CLASS_FP) {
+ needs_copy = 1;
+ break;
+ }
+ }
+ if (!needs_copy) return;
+
+ ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, ai->nparts);
+ memcpy(parts, ai->parts, sizeof(ABIArgPart) * ai->nparts);
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ if (parts[i].cls == ABI_CLASS_FP) parts[i].cls = ABI_CLASS_INT;
+ }
+ ai->parts = parts;
+}
+
+static ABIFuncInfo* aapcs64_windows_compute_func_info(TargetABI* a,
+ CfreeCgTypeId fn) {
+ ABIFuncInfo* info = aapcs64_compute_func_info(a, fn);
+ /* vararg_on_stack stays 0 — Windows-ARM64 variadics use registers,
+ * unlike Apple.
+ *
+ * In a variadic function, Windows ARM64 routes floating-point arguments
+ * through the integer argument slots. That applies to named FP parameters
+ * too; trailing `...` arguments are handled by the call lowering path. */
+ if (info && info->variadic) {
+ for (u16 i = 0; i < info->nparams; ++i) {
+ remap_fp_parts_to_int(a, (ABIArgInfo*)&info->params[i]);
+ }
+ }
+ return info;
+}
+
+const ABIVtable aapcs64_windows_vtable = {
+ .compute_func_info = aapcs64_windows_compute_func_info,
+ .va_list_info = {8, 8, ABI_SC_PTR, 0, 0, 0},
+};
diff --git a/src/abi/abi_internal.h b/src/abi/abi_internal.h
@@ -24,6 +24,9 @@ extern const ABIVtable rv64_vtable;
* abi.c::select_vtable. */
extern const ABIVtable apple_arm64_vtable;
extern const ABIVtable apple_x64_vtable;
+/* Windows variants — selected when os == CFREE_OS_WINDOWS. */
+extern const ABIVtable win64_x64_vtable;
+extern const ABIVtable aapcs64_windows_vtable;
/* Shared TargetABI internals. The struct definition is here so each ABI
* TU can reach into the per-TU caches via TargetABI*. abi.c owns the
diff --git a/src/abi/abi_win64_x64.c b/src/abi/abi_win64_x64.c
@@ -0,0 +1,178 @@
+/* Win64 (Microsoft x86_64) ABI classifier.
+ *
+ * Selected when (target.arch == X86_64, target.os == WINDOWS).
+ *
+ * Win64 vs SysV-x64 deltas:
+ * - Arg slots: RCX/RDX/R8/R9 share index with XMM0..3 (codegen
+ * assigns by index; classifier emits per-slot INT or FP parts).
+ * - Aggregates: pass-by-value only for sizes in {1,2,4,8}; otherwise
+ * hidden-pointer (byval for args, sret for returns).
+ * - __int128: passed as two INTEGER eightbytes (mingw convention;
+ * differs from MSVC spec which says by reference).
+ * - long double: 64-bit double (no x87).
+ * - va_list: void* (single pointer; no struct).
+ * - varargs: still in regs; FP-args duplicated in matching GPR slot
+ * by the call-site codegen (not encoded here).
+ *
+ * Shadow-space (32 B above return addr) is a call-site reservation,
+ * not an ABI classifier concern -- see arch/x64/call.c.
+ */
+
+#include <string.h>
+
+#include "abi/abi_internal.h"
+#include "cg/type.h"
+#include "core/arena.h"
+#include "core/core.h"
+
+static void classify_void(ABIArgInfo* out) {
+ memset(out, 0, sizeof *out);
+ out->kind = ABI_ARG_IGNORE;
+}
+
+static void classify_scalar(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out,
+ int is_return) {
+ ABITypeInfo ti = abi_internal_type_info(a, t);
+ (void)is_return;
+ /* __int128 / __uint128: mingw/GCC convention emits two INTEGER
+ * eightbytes (rcx+rdx for args, rax+rdx for return) -- same shape
+ * as SysV. MSVC's official spec says "by reference" for 16-byte
+ * aggregates, but mingw is cfree's interop target on Windows and
+ * mingw matches SysV here. */
+ if (ti.scalar_kind == ABI_SC_INT && ti.size == 16) {
+ ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, 2);
+ memset(parts, 0, sizeof(ABIArgPart) * 2);
+ for (u32 i = 0; i < 2; ++i) {
+ parts[i].cls = ABI_CLASS_INT;
+ parts[i].loc = ABI_LOC_REG;
+ parts[i].size = 8;
+ parts[i].align = 8;
+ parts[i].src_offset = i * 8;
+ }
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->parts = parts;
+ out->nparts = 2;
+ out->indirect_align = 0;
+ return;
+ }
+ /* long double on Win64 is 64-bit double (both MSVC and mingw, unless
+ * mingw's -mlong-double-80 is in effect -- not supported in v1).
+ * The front end should already have lowered `long double` to a size-8
+ * float for Windows targets. Defensive path: if a size-16 FP slips
+ * through, treat it as a size-8 double (one FP part) -- this stays
+ * register-passed, unlike SysV which routes long double through
+ * memory. */
+ if (ti.scalar_kind == ABI_SC_FLOAT && ti.size == 16) {
+ ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart);
+ memset(parts, 0, sizeof *parts);
+ parts->cls = ABI_CLASS_FP;
+ parts->loc = ABI_LOC_REG;
+ parts->size = 8;
+ parts->align = 8;
+ parts->src_offset = 0;
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->parts = parts;
+ out->nparts = 1;
+ out->indirect_align = 0;
+ return;
+ }
+
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->indirect_align = 0;
+
+ ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart);
+ memset(parts, 0, sizeof *parts);
+ parts->cls = (ti.scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ parts->loc = ABI_LOC_REG;
+ parts->size = ti.size;
+ parts->align = ti.align;
+ parts->src_offset = 0;
+
+ out->parts = parts;
+ out->nparts = 1;
+}
+
+static void classify_aggregate(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out,
+ int is_return) {
+ ABITypeInfo ti = abi_internal_type_info(a, t);
+ if (ti.size == 0) {
+ classify_void(out);
+ return;
+ }
+ /* Win64: aggregates pass by value only when the size is exactly one
+ * of {1, 2, 4, 8}. A 3-byte struct is NOT a 3-byte INT part -- it
+ * goes by hidden pointer. A 16-byte struct is also hidden-pointer
+ * (no two-register pair, unlike SysV's <=16B path). */
+ if (ti.size == 1 || ti.size == 2 || ti.size == 4 || ti.size == 8) {
+ ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart);
+ memset(parts, 0, sizeof *parts);
+ parts->cls = ABI_CLASS_INT;
+ parts->loc = ABI_LOC_REG;
+ parts->size = ti.size;
+ parts->align = ti.align ? ti.align : ti.size;
+ parts->src_offset = 0;
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->parts = parts;
+ out->nparts = 1;
+ out->indirect_align = 0;
+ } else {
+ out->kind = ABI_ARG_INDIRECT;
+ out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
+ out->indirect_align = ti.align ? ti.align : 8;
+ out->parts = NULL;
+ out->nparts = 0;
+ }
+}
+
+static void classify_one(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out,
+ int is_return) {
+ const CgType* ty = cg_type_get(a->c, t);
+ if (!ty || ty->kind == CFREE_CG_TYPE_VOID) {
+ classify_void(out);
+ return;
+ }
+ switch (ty->kind) {
+ case CFREE_CG_TYPE_RECORD:
+ classify_aggregate(a, t, out, is_return);
+ return;
+ case CFREE_CG_TYPE_ALIAS:
+ classify_one(a, ty->alias.base, out, is_return);
+ return;
+ default:
+ classify_scalar(a, t, out, is_return);
+ return;
+ }
+}
+
+static ABIFuncInfo* win64_x64_compute_func_info(TargetABI* a,
+ CfreeCgTypeId fn) {
+ ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo);
+ const CgType* fnty = cg_type_get(a->c, fn);
+ memset(info, 0, sizeof *info);
+
+ classify_one(a, fnty->func.ret, &info->ret, /*is_return=*/1);
+ info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0;
+ info->variadic = fnty->func.abi_variadic;
+
+ info->nparams = (u16)fnty->func.nparams;
+ if (fnty->func.nparams) {
+ ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fnty->func.nparams);
+ memset(arr, 0, sizeof(ABIArgInfo) * fnty->func.nparams);
+ for (u32 i = 0; i < fnty->func.nparams; ++i) {
+ classify_one(a, fnty->func.params[i].type, &arr[i], /*is_return=*/0);
+ }
+ info->params = arr;
+ } else {
+ info->params = NULL;
+ }
+ return info;
+}
+
+const ABIVtable win64_x64_vtable = {
+ .compute_func_info = win64_x64_compute_func_info,
+ .va_list_info = {8, 8, ABI_SC_PTR, 0, 0, 0},
+};
diff --git a/src/api/link.c b/src/api/link.c
@@ -90,6 +90,14 @@ CfreeStatus cfree_link_session_new(CfreeCompiler* c,
link_free(l);
return CFREE_INVALID;
}
+ if (opts->pe_subsystem != CFREE_PE_SUBSYSTEM_DEFAULT &&
+ opts->pe_subsystem != CFREE_PE_SUBSYSTEM_WINDOWS_GUI &&
+ opts->pe_subsystem != CFREE_PE_SUBSYSTEM_WINDOWS_CUI) {
+ h->free(h, s, sizeof(*s));
+ link_free(l);
+ return CFREE_INVALID;
+ }
+ link_set_pe_subsystem(l, opts->pe_subsystem);
switch ((CfreeLinkOutputKind)opts->output_kind) {
case CFREE_LINK_OUTPUT_EXE:
@@ -129,7 +137,12 @@ CfreeStatus cfree_link_session_new(CfreeCompiler* c,
break;
}
if (opts->linker_script) link_set_script(l, opts->linker_script);
- if (opts->entry) link_set_entry(l, opts->entry);
+ if (opts->entry) {
+ link_set_entry(l, opts->entry);
+ } else if (opts->pe_subsystem == CFREE_PE_SUBSYSTEM_WINDOWS_GUI &&
+ !(opts->linker_script && opts->linker_script->entry)) {
+ link_set_entry(l, "WinMainCRTStartup");
+ }
(void)opts->build_id_mode;
(void)opts->build_id_bytes;
(void)opts->build_id_len;
diff --git a/src/api/object_detect.c b/src/api/object_detect.c
@@ -39,6 +39,34 @@ CfreeBinFmt cfree_detect_fmt(const uint8_t* data, size_t len) {
case 0x8664:
case 0x014C:
case 0xAA64:
+ case 0xA641: /* ARM64EC — accept as a COFF flavour the ARM64
+ * codegen / linker treat as plain AArch64 (the
+ * encoding is unchanged; only the ABI differs,
+ * and we link these objs into pure-AArch64
+ * images). */
+ case 0x01C4:
+ case 0x5032:
+ case 0x5064:
+ return CFREE_BIN_COFF;
+ }
+ }
+ /* Microsoft "short import" record: Sig1=0, Sig2=0xFFFF. Routed
+ * through read_coff (which dispatches to read_coff_short_import).
+ * The header continues with a Machine word, which we also sanity-
+ * check so a stray 00 00 FF FF prefix on some other format does
+ * not mis-route. */
+ if (len >= 8 && data[0] == 0x00 && data[1] == 0x00 && data[2] == 0xFF &&
+ data[3] == 0xFF) {
+ u16 mach = (u16)data[6] | ((u16)data[7] << 8);
+ switch (mach) {
+ case 0x8664:
+ case 0x014C:
+ case 0xAA64:
+ case 0xA641: /* ARM64EC — accept as a COFF flavour the ARM64
+ * codegen / linker treat as plain AArch64 (the
+ * encoding is unchanged; only the ABI differs,
+ * and we link these objs into pure-AArch64
+ * images). */
case 0x01C4:
case 0x5032:
case 0x5064:
diff --git a/src/api/object_file.c b/src/api/object_file.c
@@ -179,6 +179,20 @@ CfreeStatus cfree_obj_section_data(const CfreeObjFile* cf, CfreeObjSection idx,
return CFREE_OK;
}
+CfreeStatus cfree_obj_section_format_flags(const CfreeObjFile* f,
+ CfreeObjSection idx,
+ uint32_t* raw_type_out,
+ uint32_t* raw_flags_out) {
+ const Section* sec;
+ if (!f) return CFREE_INVALID;
+ if (idx >= obj_section_count(f->ob)) return CFREE_NOT_FOUND;
+ sec = obj_section_get(f->ob, (ObjSecId)(idx + 1));
+ if (!sec) return CFREE_NOT_FOUND;
+ if (raw_type_out) *raw_type_out = sec->ext_type;
+ if (raw_flags_out) *raw_flags_out = sec->ext_flags;
+ return CFREE_OK;
+}
+
CfreeStatus cfree_obj_section_by_name(const CfreeObjFile* f, const char* name,
CfreeObjSection* out) {
u32 n, i;
diff --git a/src/api/stubs.c b/src/api/stubs.c
@@ -8,25 +8,15 @@ static _Noreturn void unimplemented(Compiler* c, const char* what) {
compiler_panic(c, loc, "subsystem not implemented: %s", what);
}
-/* COFF / WASM emit/read remain stubs until those writers/readers land. */
+/* WASM emit/read remain stubs until those writers/readers land.
+ * COFF emit/read are implemented in src/obj/coff_emit.c and coff_read.c. */
-void emit_coff(Compiler* c, ObjBuilder* o, Writer* w) {
- (void)o;
- (void)w;
- unimplemented(c, "emit_coff");
-}
void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) {
(void)o;
(void)w;
unimplemented(c, "emit_wasm");
}
-ObjBuilder* read_coff(Compiler* c, const char* n, const u8* d, size_t l) {
- (void)n;
- (void)d;
- (void)l;
- unimplemented(c, "read_coff");
-}
ObjBuilder* read_wasm(Compiler* c, const char* n, const u8* d, size_t l) {
(void)n;
(void)d;
diff --git a/src/arch/aa64/arch.c b/src/arch/aa64/arch.c
@@ -8,6 +8,7 @@
#include "arch/aa64/regs.h"
#include "core/bytes.h"
#include "link/link_arch.h"
+#include "obj/coff.h"
#include "obj/elf.h"
#include "obj/macho.h"
#include "obj/obj.h"
@@ -20,6 +21,8 @@ static const ABIVtable* aa64_abi_vtable(Compiler* c, CfreeOSKind os) {
switch (os) {
case CFREE_OS_MACOS:
return &apple_arm64_vtable;
+ case CFREE_OS_WINDOWS:
+ return &aapcs64_windows_vtable;
default:
return &aapcs64_vtable;
}
@@ -59,6 +62,12 @@ static const ArchDwarfOps aa64_dwarf_ops = {
.max_ops_per_inst = 1u,
};
+static const ArchCoffOps aa64_coff_ops = {
+ .machine = IMAGE_FILE_MACHINE_ARM64,
+ .reloc_to = coff_aarch64_reloc_to,
+ .reloc_from = coff_aarch64_reloc_from,
+};
+
static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
const Section* s;
u8 cur[4];
@@ -159,6 +168,7 @@ const ArchImpl arch_impl_aa64 = {
.link = &link_arch_aa64,
.elf = &aa64_elf_ops,
.macho = &aa64_macho_ops,
+ .coff = &aa64_coff_ops,
.dwarf = &aa64_dwarf_ops,
.dbg = &aa64_dbg_ops,
.predefined_macros = aa64_predefined_macros,
diff --git a/src/arch/aa64/emit.c b/src/arch/aa64/emit.c
@@ -640,13 +640,26 @@ CGLocalStorage aa_param(CGTarget *t, const CGParamDesc *p) {
u32 dst = reg_num((Operand){.kind = OPK_REG, .v.reg = st.v.reg});
if (a->next_param_int < 8) {
u32 src = a->next_param_int++;
- u32 sf = (sz == 8) ? 1u : 0u;
- if (dst != src) aa64_emit32(t->mc, aa64_mov_reg(sf, dst, src));
+ if (p->type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F64)) {
+ aa64_emit32(t->mc, aa64_fmov_d_x(dst, src));
+ } else if (p->type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F32)) {
+ aa64_emit32(t->mc, aa64_fmov_s_w(dst, src));
+ } else {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ if (dst != src) aa64_emit32(t->mc, aa64_mov_reg(sf, dst, src));
+ }
} else {
u32 caller_off = a->next_param_stack;
a->next_param_stack += 8;
- aa64_emit_ldur_off(t->mc, sidx, dst, incoming_stack_base,
- incoming_stack_bias + (i32)caller_off, AA_TMP0);
+ if (p->type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F64) ||
+ p->type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F32)) {
+ aa64_emit_ldur_fp_off(t->mc, sidx, dst, incoming_stack_base,
+ incoming_stack_bias + (i32)caller_off,
+ AA_TMP0);
+ } else {
+ aa64_emit_ldur_off(t->mc, sidx, dst, incoming_stack_base,
+ incoming_stack_bias + (i32)caller_off, AA_TMP0);
+ }
}
} else if (pt->cls == ABI_CLASS_FP) {
u32 dst = reg_num((Operand){.kind = OPK_REG, .v.reg = st.v.reg});
diff --git a/src/arch/aa64/link.c b/src/arch/aa64/link.c
@@ -135,6 +135,31 @@ static u32 aa64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
return 2;
}
+/* PE/COFF IAT stub for aarch64 (12 B):
+ *
+ * adrp x16, iat_slot@PAGE ; x16 = page-aligned base
+ * ldr x16, [x16, #iat_off] ; x16 = *iat_slot (function ptr)
+ * br x16 ; tail-call
+ *
+ * Uses x16 (intra-procedure-call scratch) so the called function
+ * sees an unperturbed x30 / argument registers. Page+offset are
+ * baked from the post-shift IAT slot vaddr; no apply-time reloc
+ * needed because both ends move together under image-base shift. */
+static void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr,
+ u64 iat_slot_vaddr) {
+ u32 immlo, immhi;
+ aa64_adrp_imm_halves(stub_vaddr, iat_slot_vaddr, &immlo, &immhi);
+ u32 lo12 = (u32)(iat_slot_vaddr & AA64_PAGE_MASK);
+ /* IAT slots are 8-byte aligned (function pointers), so the low 3
+ * bits of lo12 are always 0; LDR Xt scales the imm12 by 8. */
+ u32 ldr_imm12 = (lo12 >> 3) & 0xfffu;
+
+ wr_u32_le(dst + 0, aa64_adrp(AA64_PLT_SCRATCH_X16, immlo, immhi));
+ wr_u32_le(dst + 4, aa64_ldr64_uimm12(AA64_PLT_SCRATCH_X16,
+ AA64_PLT_SCRATCH_X16, ldr_imm12));
+ wr_u32_le(dst + 8, aa64_br(AA64_PLT_SCRATCH_X16));
+}
+
static void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) {
i64 page_s = ((i64)got_slot_vaddr) & ~(i64)0xfff;
i64 page_p = ((i64)stub_vaddr) & ~(i64)0xfff;
@@ -200,6 +225,9 @@ const LinkArchDesc link_arch_aa64 = {
.macho_stub_size = AA64_IPLT_STUB_SIZE,
.emit_macho_stub = aa64_emit_macho_stub,
+ .coff_stub_size = AA64_IPLT_STUB_SIZE,
+ .emit_coff_iat_stub = aa64_emit_coff_iat_stub,
+
.is_branch_reloc = aa64_is_branch_reloc,
.is_got_load_reloc = aa64_is_got_load_reloc,
.is_tlvp_reloc = aa64_is_tlvp_reloc,
diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c
@@ -70,6 +70,18 @@ static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
}
static void aa_copy(CGTarget* t, Operand dst, Operand src) {
+ if (dst.cls == RC_FP && src.cls == RC_INT) {
+ u32 sz = type_byte_size(dst.type);
+ aa64_emit32(t->mc, sz == 8 ? aa64_fmov_d_x(reg_num(dst), reg_num(src))
+ : aa64_fmov_s_w(reg_num(dst), reg_num(src)));
+ return;
+ }
+ if (dst.cls == RC_INT && src.cls == RC_FP) {
+ u32 sz = type_byte_size(src.type);
+ aa64_emit32(t->mc, sz == 8 ? aa64_fmov_x_d(reg_num(dst), reg_num(src))
+ : aa64_fmov_w_s(reg_num(dst), reg_num(src)));
+ return;
+ }
if (dst.cls == RC_FP || src.cls == RC_FP) {
if (type_byte_size(dst.type) == 16) {
aa64_emit32(t->mc, aa64_mov_v16b(reg_num(dst), reg_num(src)));
@@ -356,6 +368,66 @@ static void aa_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
return;
}
+ /* Windows-on-ARM64 TLS Local-Exec.
+ *
+ * ldr xd, [x18, #0x58] ; xd = TEB->TlsSlots (TLS array)
+ * adrp x16, _tls_index ; ADR_PREL_PG_HI21
+ * ldr w16, [x16, :lo12:_tls_index] ; LDST32_ABS_LO12_NC
+ * add xd, xd, x16, lsl #3 ; xd += index*8
+ * ldr xd, [xd] ; xd = per-image TLS block base
+ * add xd, xd, #:secrel_hi12:sym, lsl#12 ; SECREL_HIGH12A
+ * add xd, xd, #:secrel_lo12:sym ; SECREL_LOW12A
+ *
+ * x16 (IP0) is a caller-saved intra-procedure-call scratch reg,
+ * always safe to clobber inside a function body. The two ADD-imm12
+ * SECREL fixups assume the merged .tls section is < 16 MiB; cfree
+ * panics with a clear diagnostic at link time if that ever fails. */
+ if (t->c->target.os == CFREE_OS_WINDOWS) {
+ Sym idx_name = pool_intern_cstr(t->c->global, "_tls_index");
+ ObjSymId idx_sym = obj_symbol_find(t->obj, idx_name);
+ if (idx_sym == 0) {
+ idx_sym = obj_symbol(t->obj, idx_name, SB_GLOBAL, SK_UNDEF,
+ OBJ_SEC_NONE, 0, 0);
+ }
+ /* Windows ARM64 reserves x18 as the TEB pointer. Do not read
+ * TPIDR_EL0 here; Wine and real Windows expose the TLS slots via
+ * x18 + 0x58, matching clang/llvm-mingw codegen. */
+ aa64_emit32(mc, aa64_ldr_uimm(/*size=*/3, rd, /*Rn=*/18,
+ /*byte_off=*/0x58));
+
+ u32 adrp_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_adrp_base(/*Rd=*/16));
+ mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21,
+ idx_sym, 0, 0, 0);
+ u32 ldr_pos = mc->pos(mc);
+ aa64_emit32(mc,
+ aa64_ldr_uimm(/*size=*/2, /*Rt=*/16, /*Rn=*/16, /*byte_off=*/0));
+ mc->emit_reloc_at(mc, sec, ldr_pos, R_AARCH64_LDST32_ABS_LO12_NC,
+ idx_sym, 0, 0, 0);
+
+ /* add xd, xd, x16, LSL #3:
+ * 0x8B000000 | (Rm << 16) | (3 << 10) | (Rn << 5) | Rd
+ * sf=1, shift=LSL (00), Rm=16. */
+ u32 add_shr =
+ 0x8B000000u | (16u << 16) | (3u << 10) | ((rd & 0x1fu) << 5) |
+ (rd & 0x1fu);
+ aa64_emit32(mc, add_shr);
+ aa64_emit32(mc, aa64_ldr_uimm(/*size=*/3, rd, rd, /*byte_off=*/0));
+
+ /* add xd, xd, #(0 << 12), then patch HIGH12A: sh=1 in the encoding. */
+ u32 hi_pos = mc->pos(mc);
+ aa64_emit32(mc,
+ aa64_add_imm(/*sf=*/1, rd, rd, /*imm12=*/0, /*sh=*/1));
+ mc->emit_reloc_at(mc, sec, hi_pos, R_COFF_AARCH64_SECREL_HIGH12A, sym,
+ addend, 0, 0);
+ u32 lo_pos = mc->pos(mc);
+ aa64_emit32(mc,
+ aa64_add_imm(/*sf=*/1, rd, rd, /*imm12=*/0, /*sh=*/0));
+ mc->emit_reloc_at(mc, sec, lo_pos, R_COFF_AARCH64_SECREL_LOW12A, sym,
+ addend, 0, 0);
+ return;
+ }
+
aa64_emit32(mc, aa64_mrs_tpidr_el0(AA_TMP0));
u32 hi_pos = mc->pos(mc);
@@ -890,6 +962,19 @@ static void aa_store_stack_reg(CGTarget* t, u32 reg, RegClass cls,
aa_store(t, addr, src, ma);
}
+static int aa_windows_fp_vararg(const CGTarget* t, const CGABIValue* av) {
+ return t->c->target.os == CFREE_OS_WINDOWS && av && av->abi == NULL &&
+ av->storage.cls == RC_FP;
+}
+
+static void aa_move_fp_to_int_reg(MCEmitter* mc, u32 dst_reg, Operand src,
+ u32 size) {
+ if (size == 8)
+ aa64_emit32(mc, aa64_fmov_x_d(dst_reg, reg_num(src)));
+ else
+ aa64_emit32(mc, aa64_fmov_w_s(dst_reg, reg_num(src)));
+}
+
static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
const CGABIValue* av, u32* next_int, u32* next_fp,
u32* stack_off, int tail) {
@@ -904,7 +989,10 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
va_ai.kind = ABI_ARG_DIRECT;
va_ai.parts = &va_pt;
va_ai.nparts = 1;
- va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ va_pt.cls = aa_windows_fp_vararg(t, av)
+ ? ABI_CLASS_INT
+ : ((av->storage.cls == RC_FP) ? ABI_CLASS_FP
+ : ABI_CLASS_INT);
va_pt.size = sz;
va_pt.align = sz;
va_pt.src_offset = 0;
@@ -961,7 +1049,11 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
}
case OPK_REG: {
u32 sf = (sz == 8) ? 1u : 0u;
- aa64_emit32(t->mc, aa64_mov_reg(sf, dst_reg, reg_num(av->storage)));
+ if (av->storage.cls == RC_FP)
+ aa_move_fp_to_int_reg(t->mc, dst_reg, av->storage, sz);
+ else
+ aa64_emit32(t->mc,
+ aa64_mov_reg(sf, dst_reg, reg_num(av->storage)));
break;
}
case OPK_LOCAL: {
@@ -1072,7 +1164,8 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
}
}
-static void count_arg_stack(const ABIFuncInfo* fi, const CGABIValue* av,
+static void count_arg_stack(CGTarget* t, const ABIFuncInfo* fi,
+ const CGABIValue* av,
u32* next_int, u32* next_fp, u32* stack_off) {
ABIArgInfo va_ai;
ABIArgPart va_pt;
@@ -1084,7 +1177,10 @@ static void count_arg_stack(const ABIFuncInfo* fi, const CGABIValue* av,
va_ai.kind = ABI_ARG_DIRECT;
va_ai.parts = &va_pt;
va_ai.nparts = 1;
- va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ va_pt.cls = aa_windows_fp_vararg(t, av)
+ ? ABI_CLASS_INT
+ : ((av->storage.cls == RC_FP) ? ABI_CLASS_FP
+ : ABI_CLASS_INT);
va_pt.size = sz;
va_pt.align = sz;
va_pt.src_offset = 0;
@@ -1122,7 +1218,7 @@ static u32 aa_call_stack_size(CGTarget* t, const CGCallDesc* d) {
(void)t;
u32 next_int = 0, next_fp = 0, stack_off = 0;
for (u32 i = 0; i < d->nargs; ++i)
- count_arg_stack(d->abi, &d->args[i], &next_int, &next_fp, &stack_off);
+ count_arg_stack(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off);
return (stack_off + 15u) & ~15u;
}
@@ -1644,6 +1740,23 @@ static void aa_va_start_(CGTarget* t, Operand ap_op) {
aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, ap, 0));
return;
}
+ if (t->c->target.os == CFREE_OS_WINDOWS) {
+ if (a->next_param_int < 8) {
+ AASlot* gs = aa64_slot_get(a, a->gp_save_slot);
+ emit_fp_off(mc, AA_TMP0,
+ -(i32)gs->off + (i32)(a->next_param_int * 8u));
+ } else {
+ u32 ofs = 16u + a->next_param_stack;
+ if (ofs <= 0xfff)
+ aa64_emit32(mc, aa64_add_imm(1, AA_TMP0, 29, ofs, 0));
+ else {
+ aa64_emit_load_imm(mc, 1, AA_TMP0, (i64)ofs);
+ aa64_emit32(mc, aa64_add(1, AA_TMP0, 29, AA_TMP0));
+ }
+ }
+ aa64_emit32(mc, aa64_str_uimm(3, AA_TMP0, ap, 0));
+ return;
+ }
AASlot* gs = aa64_slot_get(a, a->gp_save_slot);
AASlot* fs = aa64_slot_get(a, a->fp_save_slot);
@@ -1671,6 +1784,7 @@ static void aa_va_start_(CGTarget* t, Operand ap_op) {
static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
CfreeCgTypeId ty) {
+ AAImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
u32 ap = reg_num(ap_op);
int is_fp = (dst.cls == RC_FP);
@@ -1690,6 +1804,33 @@ static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
aa64_emit32(mc, aa64_stur(3, AA_TMP1, ap, 0));
return;
}
+ if (t->c->target.os == CFREE_OS_WINDOWS) {
+ MCLabel L_store = mc->label_new(mc);
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP1, ap, 0));
+ if (is_fp)
+ aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), AA_TMP1, 0));
+ else
+ aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), AA_TMP1, 0));
+ aa64_emit32(mc, aa64_add_imm(1, AA_TMP1, AA_TMP1, 8u, 0));
+
+ AASlot* gs = aa64_slot_get(a, a->gp_save_slot);
+ if (gs) {
+ emit_fp_off(mc, AA_TMP2, -(i32)gs->off + 64);
+ aa64_emit32(mc, aa64_subs_reg(1, 31u, AA_TMP1, AA_TMP2));
+ aa64_emit32(mc, aa64_b_cond(0x1 /*NE*/));
+ mc->emit_label_ref(mc, L_store, R_AARCH64_CONDBR19, 4, 0);
+ u32 ofs = 16u + a->next_param_stack;
+ if (ofs <= 0xfff)
+ aa64_emit32(mc, aa64_add_imm(1, AA_TMP1, 29, ofs, 0));
+ else {
+ aa64_emit_load_imm(mc, 1, AA_TMP1, (i64)ofs);
+ aa64_emit32(mc, aa64_add(1, AA_TMP1, 29, AA_TMP1));
+ }
+ }
+ mc->label_place(mc, L_store);
+ aa64_emit32(mc, aa64_stur(3, AA_TMP1, ap, 0));
+ return;
+ }
MCLabel L_stack = mc->label_new(mc);
MCLabel L_done = mc->label_new(mc);
@@ -1737,6 +1878,11 @@ static void aa_va_copy_(CGTarget* t, Operand d, Operand s) {
aa64_emit32(mc, aa64_stur(3, AA_TMP0, dr, 0));
return;
}
+ if (t->c->target.os == CFREE_OS_WINDOWS) {
+ aa64_emit32(mc, aa64_ldur(3, AA_TMP0, sr, 0));
+ aa64_emit32(mc, aa64_stur(3, AA_TMP0, dr, 0));
+ return;
+ }
for (u32 i = 0; i < 32u; i += 8u) {
aa64_emit32(mc, aa64_ldur(3, AA_TMP0, sr, (i32)i));
aa64_emit32(mc, aa64_stur(3, AA_TMP0, dr, (i32)i));
diff --git a/src/arch/aa64/opt_coord.c b/src/arch/aa64/opt_coord.c
@@ -202,6 +202,11 @@ static u32 aa_return_reg_mask(CGTarget* t, const ABIFuncInfo* abi,
return mask;
}
+static int aa_windows_fp_vararg_plan(CGTarget* t, const CGABIValue* av) {
+ return t->c->target.os == CFREE_OS_WINDOWS && av && av->abi == NULL &&
+ av->storage.cls == RC_FP;
+}
+
static void aa_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) {
memset(out, 0, sizeof *out);
out->callee = d->callee;
@@ -236,7 +241,10 @@ static void aa_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) {
if (!ai) {
memset(&vai, 0, sizeof vai);
memset(&vap, 0, sizeof vap);
- vap.cls = av->storage.cls == RC_FP ? ABI_CLASS_FP : ABI_CLASS_INT;
+ vap.cls = aa_windows_fp_vararg_plan(t, av)
+ ? ABI_CLASS_INT
+ : (av->storage.cls == RC_FP ? ABI_CLASS_FP
+ : ABI_CLASS_INT);
vap.size = type_byte_size(av->type);
vai.kind = ABI_ARG_DIRECT;
vai.nparts = 1;
diff --git a/src/arch/registry.c b/src/arch/registry.c
@@ -37,3 +37,16 @@ const ArchImpl* arch_lookup_macho_cputype(u32 cputype) {
}
return NULL;
}
+
+const ArchImpl* arch_lookup_coff_machine(u16 machine) {
+ /* IMAGE_FILE_MACHINE_ARM64EC (0xA641) aliases to AArch64 — the
+ * instruction encoding is identical; only the ABI differs, and the
+ * linker treats both as a single image's worth of code on Windows
+ * targets. */
+ if (machine == 0xA641u) machine = 0xAA64u;
+ for (u32 i = 0; i < (u32)(sizeof arch_impls / sizeof arch_impls[0]); ++i) {
+ const ArchImpl* impl = arch_impls[i];
+ if (impl->coff && impl->coff->machine == machine) return impl;
+ }
+ return NULL;
+}
diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c
@@ -91,13 +91,27 @@ XSlot* x64_slot_get(XImpl* a, FrameSlot fs) {
}
/* ---- param: bind incoming arg(s) to the requested storage ---- */
+
+/* Win64 shares one arg-slot counter across int and FP regs; the kth
+ * argument consumes either GPR-k or XMM-k but never both. Keep
+ * next_param_int and next_param_fp in lockstep so a later FP/int arg
+ * sees the same slot index. */
+static inline void x_param_sync_slot(XImpl* a) {
+ if (!a->abi->slot_shared_int_fp) return;
+ u32 m = a->next_param_int > a->next_param_fp ? a->next_param_int
+ : a->next_param_fp;
+ a->next_param_int = m;
+ a->next_param_fp = m;
+}
+
static void x_consume_param_location(XImpl* a, const ABIArgInfo* ai) {
if (!ai || ai->kind == ABI_ARG_IGNORE) return;
if (ai->kind == ABI_ARG_INDIRECT) {
- if (a->next_param_int < 6)
+ if (a->next_param_int < a->abi->n_int_args)
++a->next_param_int;
else
a->next_param_stack += 8;
+ x_param_sync_slot(a);
return;
}
if (ai->kind == ABI_ARG_DIRECT && x64_abi_direct_to_stack(
@@ -109,16 +123,17 @@ static void x_consume_param_location(XImpl* a, const ABIArgInfo* ai) {
for (u16 i = 0; i < ai->nparts; ++i) {
const ABIArgPart* pt = &ai->parts[i];
if (pt->cls == ABI_CLASS_INT) {
- if (a->next_param_int < 6)
+ if (a->next_param_int < a->abi->n_int_args)
++a->next_param_int;
else
a->next_param_stack += 8;
} else if (pt->cls == ABI_CLASS_FP) {
- if (a->next_param_fp < 8)
+ if (a->next_param_fp < a->abi->n_fp_args)
++a->next_param_fp;
else
a->next_param_stack += 8;
}
+ x_param_sync_slot(a);
}
}
@@ -143,7 +158,13 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) {
compiler_panic(t->c, a->loc, "x64 param: bad slot");
const ABIArgInfo* ai = p->abi;
u32 incoming_stack_base = a->omit_frame ? X64_RSP : X64_RBP;
- i32 incoming_stack_bias = a->omit_frame ? 8 : 16;
+ /* incoming_stack_bias is the offset from the base register to the
+ * first stack-passed argument. After `push rbp` we are at +0; +8
+ * skips the saved RBP and +16 skips the saved return address.
+ * Win64 reserves 32 B of caller-provided "home space" for the 4
+ * register arg slots immediately above the return address, so stack
+ * args start at [rbp + 16 + 32] = +48. SysV has no shadow space. */
+ i32 incoming_stack_bias = a->omit_frame ? 8 : (i32)(16u + a->abi->shadow_space);
if (ai->kind == ABI_ARG_IGNORE) return st;
if (st.kind == CG_LOCAL_STORAGE_REG && st.v.reg == (Reg)REG_NONE) {
@@ -158,8 +179,8 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) {
const ABIArgPart* pt = &ai->parts[0];
u32 sz = pt->size;
if (pt->cls == ABI_CLASS_INT) {
- if (a->next_param_int < 6) {
- u32 src = g_int_arg_regs[a->next_param_int++];
+ if (a->next_param_int < a->abi->n_int_args) {
+ u32 src = a->abi->int_args[a->next_param_int++];
u32 dst = st.v.reg & 0xFu;
int w = (sz == 8) ? 1 : 0;
if (dst != src) emit_mov_rr(t->mc, w, dst, src);
@@ -172,7 +193,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) {
} else if (pt->cls == ABI_CLASS_FP) {
u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
u32 dst = st.v.reg & 0xFu;
- if (a->next_param_fp < 8) {
+ if (a->next_param_fp < a->abi->n_fp_args) {
u32 src = a->next_param_fp++;
if (dst != src) emit_sse_rr(t->mc, prefix, 0x10, dst, src);
} else {
@@ -185,13 +206,14 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) {
compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl",
(int)pt->cls);
}
+ x_param_sync_slot(a);
return st;
}
if (ai->kind == ABI_ARG_INDIRECT) {
/* Incoming pointer to byval copy: load pointer, memcpy into slot. */
u32 ptr_reg;
- if (a->next_param_int < 6) {
- ptr_reg = g_int_arg_regs[a->next_param_int++];
+ if (a->next_param_int < a->abi->n_int_args) {
+ ptr_reg = a->abi->int_args[a->next_param_int++];
} else {
u32 caller_off = a->next_param_stack;
a->next_param_stack += 8;
@@ -199,6 +221,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) {
incoming_stack_bias + (i32)caller_off);
ptr_reg = X64_R11;
}
+ x_param_sync_slot(a);
u32 nbytes = s->size;
u32 i = 0;
while (i + 8 <= nbytes) {
@@ -250,8 +273,8 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) {
u32 part_off = pt->src_offset;
u32 sz = pt->size;
if (pt->cls == ABI_CLASS_INT) {
- if (a->next_param_int < 6) {
- u32 reg = g_int_arg_regs[a->next_param_int++];
+ if (a->next_param_int < a->abi->n_int_args) {
+ u32 reg = a->abi->int_args[a->next_param_int++];
emit_mov_store(t->mc, sz, reg, X64_RBP, -(i32)s->off + (i32)part_off);
} else {
u32 caller_off = a->next_param_stack;
@@ -262,7 +285,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) {
-(i32)s->off + (i32)part_off);
}
} else if (pt->cls == ABI_CLASS_FP) {
- if (a->next_param_fp < 8) {
+ if (a->next_param_fp < a->abi->n_fp_args) {
u32 xmm = a->next_param_fp++;
u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
emit_sse_store(t->mc, prefix, 0x11, xmm, X64_RBP,
@@ -280,6 +303,7 @@ CGLocalStorage x_param(CGTarget* t, const CGParamDesc* p) {
compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl",
(int)pt->cls);
}
+ x_param_sync_slot(a);
}
return st;
}
diff --git a/src/arch/x64/arch.c b/src/arch/x64/arch.c
@@ -7,6 +7,7 @@
#include "arch/x64/x64.h"
#include "core/bytes.h"
#include "link/link_arch.h"
+#include "obj/coff.h"
#include "obj/elf.h"
#include "obj/macho.h"
#include "obj/obj.h"
@@ -19,6 +20,8 @@ static const ABIVtable* x64_abi_vtable(Compiler* c, CfreeOSKind os) {
switch (os) {
case CFREE_OS_MACOS:
return &apple_x64_vtable;
+ case CFREE_OS_WINDOWS:
+ return &win64_x64_vtable;
default:
return &sysv_x64_vtable;
}
@@ -45,6 +48,12 @@ static const ArchDwarfOps x64_dwarf_ops = {
.max_ops_per_inst = 1u,
};
+static const ArchCoffOps x64_coff_ops = {
+ .machine = IMAGE_FILE_MACHINE_AMD64,
+ .reloc_to = coff_x86_64_reloc_to,
+ .reloc_from = coff_x86_64_reloc_from,
+};
+
static int x64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
(void)c;
if (!fx || fx->kind != R_PC32 || fx->width != 4) return 1;
@@ -84,6 +93,7 @@ const ArchImpl arch_impl_x64 = {
.link = &link_arch_x64,
.elf = &x64_elf_ops,
.macho = &x64_macho_ops,
+ .coff = &x64_coff_ops,
.dwarf = &x64_dwarf_ops,
.dbg = &x64_dbg_ops,
.predefined_macros = x64_predefined_macros,
diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c
@@ -37,7 +37,9 @@ typedef struct X64AsmOperand {
u8 reg;
u8 base;
u8 high8;
- u8 pad[3];
+ u8 seg;
+ u8 no_base;
+ u8 pad[1];
i64 imm;
i32 disp;
} X64AsmOperand;
@@ -101,6 +103,21 @@ static int x64_xmm_from_name(AsmDriver* d, Sym s, u32* reg_out) {
return 1;
}
+static int x64_segment_prefix_from_name(AsmDriver* d, Sym s, u8* prefix_out) {
+ size_t n = 0;
+ const char* p = pool_str(asm_driver_pool(d), s, &n);
+ if (!p || n != 2) return 0;
+ if (p[0] == 'f' && p[1] == 's') {
+ if (prefix_out) *prefix_out = 0x64;
+ return 1;
+ }
+ if (p[0] == 'g' && p[1] == 's') {
+ if (prefix_out) *prefix_out = 0x65;
+ return 1;
+ }
+ return 0;
+}
+
static u32 parse_reg(AsmDriver* d, u32* width_out, u32* high8_out) {
AsmTok t;
u32 reg;
@@ -136,6 +153,19 @@ static X64AsmOperand parse_operand(AsmDriver* d) {
(void)asm_driver_next(d);
ident = asm_driver_next(d);
if (ident.kind != ASM_TOK_IDENT) asm_driver_panic(d, "x64 asm: bad register");
+ if (x64_segment_prefix_from_name(d, ident.v.ident, &op.seg)) {
+ asm_driver_expect_punct(d, ':', "':' after x64 segment register");
+ op.kind = X64_ASM_OP_MEM;
+ if (!asm_driver_tok_is_punct(asm_driver_peek(d), '('))
+ op.disp = (i32)asm_driver_parse_const(d);
+ if (asm_driver_eat_punct(d, '(')) {
+ op.base = (u8)parse_reg(d, NULL, NULL);
+ asm_driver_expect_punct(d, ')', "')' in x64 memory operand");
+ } else {
+ op.no_base = 1;
+ }
+ return op;
+ }
if (x64_xmm_from_name(d, ident.v.ident, &width)) {
op.kind = X64_ASM_OP_XMM;
op.reg = (u8)width;
@@ -164,6 +194,48 @@ static X64AsmOperand parse_operand(AsmDriver* d) {
return op;
}
+static u32 x64_pack_rex_mem_operand(u8* out, int w, u32 reg,
+ X64AsmOperand mem) {
+ return x64_pack_rex(out, w, reg, 0, mem.no_base ? 0u : mem.base);
+}
+
+static u32 x64_pack_mem_operand(u8* out, u32 reg, X64AsmOperand mem) {
+ if (mem.no_base) {
+ out[0] = x64_modrm(0u, reg, 4u);
+ out[1] = x64_sib(0u, 4u, 5u);
+ return 2u + x64_put_u32le(out + 2, (u32)mem.disp);
+ }
+ return x64_pack_mem(out, reg, mem.base, mem.disp);
+}
+
+static void emit_mov_load_operand(MCEmitter* mc, u32 size, u32 dst,
+ X64AsmOperand src) {
+ u8 buf[16];
+ u32 n = 0;
+ if (size == 2u) buf[n++] = X64_OPSIZE_PFX;
+ if (src.seg) buf[n++] = src.seg;
+ n += x64_pack_rex_mem_operand(buf + n, size == 8u, dst, src);
+ buf[n++] = X64_OPC_MOV_R_RM;
+ n += x64_pack_mem_operand(buf + n, dst, src);
+ mc->emit_bytes(mc, buf, n);
+}
+
+static void emit_mov_store_operand(MCEmitter* mc, u32 size, u32 src,
+ X64AsmOperand dst, int force_rex) {
+ u8 buf[16];
+ u32 n = 0;
+ if (size == 2u) buf[n++] = X64_OPSIZE_PFX;
+ if (dst.seg) buf[n++] = dst.seg;
+ if (force_rex)
+ n += x64_pack_rex_force(buf + n, size == 8u, src, 0,
+ dst.no_base ? 0u : dst.base);
+ else
+ n += x64_pack_rex_mem_operand(buf + n, size == 8u, src, dst);
+ buf[n++] = size == 1u ? X64_OPC_MOV_RM_R8 : X64_OPC_MOV_RM_R;
+ n += x64_pack_mem_operand(buf + n, src, dst);
+ mc->emit_bytes(mc, buf, n);
+}
+
static void expect_comma(AsmDriver* d) {
if (!asm_driver_eat_comma(d)) asm_driver_panic(d, "x64 asm: expected ','");
}
@@ -213,12 +285,14 @@ static __attribute__((unused)) void emit_movb_store_operand(
AsmDriver* d, MCEmitter* mc, X64AsmOperand src, X64AsmOperand dst) {
if (src.high8) {
u8 ob = 0x88;
- if (dst.base >= 8u) asm_driver_panic(d, "x64 asm: high-byte register cannot use REX");
+ if (dst.no_base || dst.base >= 8u)
+ asm_driver_panic(d, "x64 asm: high-byte register cannot use REX");
+ if (dst.seg) mc->emit_bytes(mc, &dst.seg, 1);
mc->emit_bytes(mc, &ob, 1);
emit_mem_operand(mc, src.reg, dst.base, dst.disp);
return;
}
- emit_mov_store(mc, 1, src.reg, dst.base, dst.disp);
+ emit_mov_store_operand(mc, 1, src.reg, dst, 1);
}
static __attribute__((unused)) void emit_rm_imm(AsmDriver* d, MCEmitter* mc,
@@ -607,12 +681,12 @@ static void parse_alu_rr(X64ParseCtx* p) {
if (p->width == 1u)
emit_movb_store_operand(p->d, p->mc, src, dst);
else
- emit_mov_store(p->mc, p->width, src.reg, dst.base, dst.disp);
+ emit_mov_store_operand(p->mc, p->width, src.reg, dst, 0);
return;
}
if (p->desc->opc[0] == 0x89u &&
src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) {
- emit_mov_load(p->mc, p->width, 0, dst.reg, src.base, src.disp);
+ emit_mov_load_operand(p->mc, p->width, dst.reg, src);
return;
}
asm_driver_panic(p->d, "x64 asm: unsupported alu_rr form");
@@ -647,15 +721,9 @@ static void parse_mov_rm_load(X64ParseCtx* p) {
}
if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) {
if (p->width == 2u) {
- u8 buf[16];
- u32 n = x64_mov_rm_load_pack(
- (X64MovRMLoad){.w = 0, .opc0 = X64_OPC_MOV_R_RM, .dst = dst.reg,
- .base = src.base, .disp = src.disp},
- buf + 1);
- buf[0] = X64_OPSIZE_PFX;
- emit_packed(p->mc, buf, n + 1u);
+ emit_mov_load_operand(p->mc, p->width, dst.reg, src);
} else {
- emit_mov_load(p->mc, p->width, 0, dst.reg, src.base, src.disp);
+ emit_mov_load_operand(p->mc, p->width, dst.reg, src);
}
return;
}
@@ -946,6 +1014,13 @@ static void parse_sse_rr(X64ParseCtx* p) {
dst.disp);
return;
}
+ if (dst.kind == X64_ASM_OP_MEM && src.kind == X64_ASM_OP_XMM &&
+ p->desc->opc[1] == 0x28u &&
+ !strcmp(p->desc->mnemonic, "movaps")) {
+ emit_sse_store(p->mc, p->desc->leg_pfx, 0x29, src.reg, dst.base,
+ dst.disp);
+ return;
+ }
if (dst.kind != X64_ASM_OP_XMM) asm_driver_panic(p->d, "x64 asm: sse dst xmm");
if (src.kind == X64_ASM_OP_XMM)
emit_sse_rr(p->mc, p->desc->leg_pfx, p->desc->opc[1], dst.reg, src.reg);
@@ -1172,22 +1247,11 @@ static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) {
if (w == 1u)
emit_movb_store_operand(d, mc, src, dst);
else
- emit_mov_store(mc, w, src.reg, dst.base, dst.disp);
+ emit_mov_store_operand(mc, w, src.reg, dst, 0);
return;
}
if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) {
- if (w == 2u) {
- u8 buf[16];
- u32 nn = x64_mov_rm_load_pack(
- (X64MovRMLoad){.w = 0, .opc0 = X64_OPC_MOV_R_RM,
- .dst = dst.reg, .base = src.base,
- .disp = src.disp},
- buf + 1);
- buf[0] = X64_OPSIZE_PFX;
- emit_packed(mc, buf, nn + 1u);
- } else {
- emit_mov_load(mc, w, 0, dst.reg, src.base, src.disp);
- }
+ emit_mov_load_operand(mc, w, dst.reg, src);
return;
}
asm_driver_panic(d, "x64 asm: mov form");
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -2,7 +2,7 @@
*
* Covers: REX, ModR/M, SIB, all emit_* primitives, x_func_begin,
* x_func_end, and the shared constant tables (g_int_order, g_fp_order,
- * g_int_arg_regs). */
+ * per-ABI int_args tables exposed via X64ABIRegs). */
#include <string.h>
@@ -30,8 +30,44 @@ const Reg g_fp_order[10] = {
X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, X64_XMM0 + 14, X64_XMM15,
};
-const u32 g_int_arg_regs[6] = {X64_RDI, X64_RSI, X64_RDX,
- X64_RCX, X64_R8, X64_R9};
+static const u32 g_int_arg_regs_sysv[6] = {X64_RDI, X64_RSI, X64_RDX,
+ X64_RCX, X64_R8, X64_R9};
+static const u32 g_int_arg_regs_win64[4] = {X64_RCX, X64_RDX, X64_R8, X64_R9};
+
+static const X64ABIRegs g_x64_abi_sysv = {
+ .int_args = g_int_arg_regs_sysv,
+ .n_int_args = 6,
+ .n_fp_args = 8,
+ .slot_shared_int_fp = 0,
+ .shadow_space = 0,
+ .emit_sysv_vararg_save = 1,
+ .vararg_fp_dup_to_gpr = 0,
+ .cs_int_mask = (1ull << X64_RBX) | (1ull << X64_RBP) | (1ull << X64_R12) |
+ (1ull << X64_R13) | (1ull << X64_R14) | (1ull << X64_R15),
+ .cs_fp_mask = 0,
+};
+
+static const X64ABIRegs g_x64_abi_win64 = {
+ .int_args = g_int_arg_regs_win64,
+ .n_int_args = 4,
+ .n_fp_args = 4,
+ .slot_shared_int_fp = 1,
+ .shadow_space = X64_WIN64_SHADOW_SPACE,
+ .emit_sysv_vararg_save = 0,
+ .vararg_fp_dup_to_gpr = 1,
+ .cs_int_mask = (1ull << X64_RBX) | (1ull << X64_RBP) | (1ull << X64_R12) |
+ (1ull << X64_R13) | (1ull << X64_R14) | (1ull << X64_R15) |
+ (1ull << X64_RDI) | (1ull << X64_RSI),
+ .cs_fp_mask = (1ull << X64_XMM6) | (1ull << X64_XMM7) | (1ull << X64_XMM8) |
+ (1ull << (X64_XMM0 + 9)) | (1ull << (X64_XMM0 + 10)) |
+ (1ull << (X64_XMM0 + 11)) | (1ull << (X64_XMM0 + 12)) |
+ (1ull << (X64_XMM0 + 13)) | (1ull << (X64_XMM0 + 14)) |
+ (1ull << X64_XMM15),
+};
+
+const X64ABIRegs* x64_abi_for_os(CfreeOSKind os) {
+ return (os == CFREE_OS_WINDOWS) ? &g_x64_abi_win64 : &g_x64_abi_sysv;
+}
/* ============================================================
* Byte-level emit helpers.
@@ -480,11 +516,28 @@ void emit_sse_rr_w(MCEmitter *mc, u8 prefix, u8 opcode, int w, u32 dst,
/* ============================================================
* Function lifecycle */
-static u32 count_x64_cs_int(u32 mask) {
+/* Count the callee-saved GPR bits in `mask` that the ABI's cs_int_mask
+ * actually owns. RBP is excluded because the prologue head saves it via
+ * `push rbp`, not via the per-reg slot loop. */
+static u32 count_x64_cs_int(u32 mask, u64 cs_int_mask) {
+ u32 n = 0;
+ u64 eligible = (u64)mask & cs_int_mask;
+ eligible &= ~(1ull << X64_RBP);
+ while (eligible) {
+ eligible &= (eligible - 1);
+ ++n;
+ }
+ return n;
+}
+
+/* Count callee-saved XMM bits the ABI claims (Win64 only — SysV's
+ * cs_fp_mask is empty). */
+static u32 count_x64_cs_fp(u32 mask, u64 cs_fp_mask) {
u32 n = 0;
- for (u32 i = 0; i < 5u; ++i) {
- Reg r = g_int_order[i];
- if (mask & (1u << r)) ++n;
+ u64 eligible = (u64)mask & cs_fp_mask;
+ while (eligible) {
+ eligible &= (eligible - 1);
+ ++n;
}
return n;
}
@@ -492,7 +545,14 @@ static u32 count_x64_cs_int(u32 mask) {
static u32 x64_planned_prologue_bytes(const XImpl *a) {
u32 n = X64_PROLOGUE_BASE_BYTES;
if (a->has_sret) n += X64_PROLOGUE_SRET_BYTES;
- n += count_x64_cs_int(a->planned_cs_int_mask) * X64_PROLOGUE_SAVE_BYTES;
+ n += count_x64_cs_int(a->planned_cs_int_mask, a->abi->cs_int_mask) *
+ X64_PROLOGUE_SAVE_BYTES;
+ n += count_x64_cs_fp(a->planned_cs_fp_mask, a->abi->cs_fp_mask) *
+ X64_PROLOGUE_XMM_SAVE_BYTES;
+ /* We don't know the final frame size at planning time; reserve the
+ * chkstk delta whenever the ABI requires it so the placeholder is
+ * large enough if the body grows past 4 KiB. */
+ if (a->abi->shadow_space) n += X64_PROLOGUE_CHKSTK_DELTA;
return n ? n : 1u;
}
@@ -504,6 +564,7 @@ static void x_func_begin_init(CGTarget *t, const CGFuncDesc *fd) {
mc->emit_align(mc, 16, 0x90);
a->fd = fd;
+ a->abi = x64_abi_for_os(t->c->target.os);
a->func_start = mc->pos(mc);
mc_begin_function(mc, fd->sym, fd->text_section_id, a->func_start);
a->next_param_int = 0;
@@ -518,8 +579,11 @@ static void x_func_begin_init(CGTarget *t, const CGFuncDesc *fd) {
a->max_outgoing = 0;
a->used_cs_int_mask = a->has_planned_regs ? a->planned_cs_int_mask : 0;
a->used_cs_fp_mask = a->has_planned_regs ? a->planned_cs_fp_mask : 0;
- a->prologue_nbytes = a->has_planned_regs ? x64_planned_prologue_bytes(a)
- : X64_PROLOGUE_BYTES;
+ a->prologue_nbytes =
+ a->has_planned_regs
+ ? x64_planned_prologue_bytes(a)
+ : (a->abi->shadow_space ? X64_PROLOGUE_BYTES_WIN64
+ : X64_PROLOGUE_BYTES);
a->planned_cs_int_mask = 0;
a->planned_cs_fp_mask = 0;
a->has_planned_regs = 0;
@@ -536,8 +600,9 @@ static void x_func_begin_init(CGTarget *t, const CGFuncDesc *fd) {
static void x_add_entry_frame_slots(CGTarget *t) {
XImpl *a = impl_of(t);
- /* sret: rdi at entry holds the destination pointer. Spill it to a
- * hidden slot so the body can use rdi freely. */
+ /* sret: the first int arg reg at entry holds the destination pointer
+ * (RDI on SysV, RCX on Win64). Spill it to a hidden slot so the body
+ * can use that register freely. */
if (a->has_sret) {
FrameSlotDesc fsd = {
.type = CFREE_CG_TYPE_NONE,
@@ -549,15 +614,16 @@ static void x_add_entry_frame_slots(CGTarget *t) {
.flags = 0,
};
a->sret_ptr_slot = x_frame_slot(t, &fsd);
- /* Subsequent int args start at rsi (next_param_int = 1). */
+ /* Subsequent int args start at the next slot. */
a->next_param_int = 1;
}
- /* Variadic: reserve the SysV reg-save area (rdi..r9 at +0..+40, then
- * xmm0..xmm7 at +48..+160 with 16-byte stride) and emit the saves
- * directly after the prologue placeholder so the original register
- * args are preserved before x_param() spills the named ones. */
- if (a->is_variadic) {
+ /* Variadic SysV: reserve the 176 B reg-save area (rdi..r9 at +0..+40,
+ * then xmm0..xmm7 at +48..+160 with 16-byte stride) and emit the
+ * saves after the prologue placeholder. Win64 variadic uses the
+ * caller-provided 32 B home space at [rbp+16..+47] instead — no
+ * callee-allocated reg-save slot. */
+ if (a->is_variadic && a->abi->emit_sysv_vararg_save) {
FrameSlotDesc rsd = {
.type = CFREE_CG_TYPE_NONE,
.name = 0,
@@ -576,69 +642,177 @@ static void x_emit_variadic_reg_saves(CGTarget *t) {
MCEmitter *mc = t->mc;
if (!a->is_variadic) return;
- XSlot *rs = x64_slot_get(a, a->reg_save_slot);
- static const u32 gprs[6] = {X64_RDI, X64_RSI, X64_RDX,
- X64_RCX, X64_R8, X64_R9};
- for (u32 i = 0; i < 6; ++i) {
- emit_mov_store(mc, 8, gprs[i], X64_RBP, -(i32)rs->off + (i32)(i * 8u));
- }
- /* movsd writes the low 8 bytes of each xmm; va_arg reads 8 bytes per
- * FP slot, so the upper half of the 16-byte stride stays unused. */
- for (u32 i = 0; i < 8; ++i) {
- emit_sse_store(mc, 0xF2, 0x11, (u32)(X64_XMM0 + i), X64_RBP,
- -(i32)rs->off + (i32)(48u + i * 16u));
+ if (a->abi->emit_sysv_vararg_save) {
+ XSlot *rs = x64_slot_get(a, a->reg_save_slot);
+ static const u32 gprs[6] = {X64_RDI, X64_RSI, X64_RDX,
+ X64_RCX, X64_R8, X64_R9};
+ for (u32 i = 0; i < 6; ++i) {
+ emit_mov_store(mc, 8, gprs[i], X64_RBP, -(i32)rs->off + (i32)(i * 8u));
+ }
+ /* movsd writes the low 8 bytes of each xmm; va_arg reads 8 bytes per
+ * FP slot, so the upper half of the 16-byte stride stays unused. */
+ for (u32 i = 0; i < 8; ++i) {
+ emit_sse_store(mc, 0xF2, 0x11, (u32)(X64_XMM0 + i), X64_RBP,
+ -(i32)rs->off + (i32)(48u + i * 16u));
+ }
+ return;
}
+ /* Win64 variadic: spill RCX, RDX, R8, R9 into the caller's 32 B home
+ * space at [rbp+16..+47]. va_start ends up pointing at
+ * [rbp+16 + named_int_slots*8] (a contiguous arg array). FP variadic
+ * args are duplicated into the matching GPR at the call site (see
+ * vararg_fp_dup_to_gpr), so by the time the callee accesses them
+ * they're already in the GPR home slot. */
+ emit_mov_store(mc, 8, X64_RCX, X64_RBP, 16);
+ emit_mov_store(mc, 8, X64_RDX, X64_RBP, 24);
+ emit_mov_store(mc, 8, X64_R8, X64_RBP, 32);
+ emit_mov_store(mc, 8, X64_R9, X64_RBP, 40);
}
static u32 align_up_u32(u32 v, u32 a) { return (v + (a - 1u)) & ~(a - 1u); }
+/* Spill order for the per-ABI callee-saved set. SysV: RBX, R12..R15 (the
+ * leading entries of g_int_order). Win64 adds RDI then RSI at the tail
+ * (mingw/MSVC pick a stable order; the saved slot is offsets-only for
+ * cfree's purposes). RBP is excluded — handled by the prologue head. */
+static const Reg g_cs_int_order_all[X64_MAX_CS_INT_REGS] = {
+ X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, X64_RDI, X64_RSI,
+};
+
+/* Spill order for Win64 callee-saved XMMs (XMM6..XMM15). */
+#define X64_MAX_CS_FP_REGS 10u
+static const Reg g_cs_fp_order_all[X64_MAX_CS_FP_REGS] = {
+ X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9,
+ X64_XMM0 + 10, X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13,
+ X64_XMM0 + 14, X64_XMM15,
+};
+
static u32 x_collect_cs_regs(const XImpl *a, Reg *cs_regs) {
u32 cs_used = 0;
- for (u32 i = 0; i < 5u; ++i) {
- Reg r = g_int_order[i];
- if (a->used_cs_int_mask & (1u << r))
- cs_regs[cs_used++] = r;
+ u64 mask = (u64)a->used_cs_int_mask & a->abi->cs_int_mask;
+ mask &= ~(1ull << X64_RBP);
+ for (u32 i = 0; i < X64_MAX_CS_INT_REGS; ++i) {
+ Reg r = g_cs_int_order_all[i];
+ if (mask & (1ull << r)) cs_regs[cs_used++] = r;
}
return cs_used;
}
-static u32 x_compute_frame_size(const XImpl *a, u32 cs_used) {
+static u32 x_collect_cs_fp_regs(const XImpl *a, Reg *cs_fp_regs) {
+ u32 n = 0;
+ u64 mask = (u64)a->used_cs_fp_mask & a->abi->cs_fp_mask;
+ for (u32 i = 0; i < X64_MAX_CS_FP_REGS; ++i) {
+ Reg r = g_cs_fp_order_all[i];
+ if (mask & (1ull << r)) cs_fp_regs[n++] = r;
+ }
+ return n;
+}
+
+/* Frame layout (rbp-relative, high → low):
+ * [rbp] : saved rbp (push rbp)
+ * [rbp - cum_off] : locals + spills (cum_off bytes)
+ * [rbp - xmm_base] : XMM saves, 16 B each (16-aligned)
+ * [rbp - xmm_base - cs_size] : GPR callee-saves
+ * [rsp] : outgoing args (max_outgoing, 16-aligned)
+ * xmm_base = align_up(cum_off, 16) when any XMM saved, else == cum_off.
+ * Frame size includes the alignment pad so rsp lands at 0 mod 16. */
+static u32 x_xmm_base(const XImpl *a, u32 cs_fp_used) {
+ if (cs_fp_used == 0) return a->cum_off;
+ return align_up_u32(a->cum_off, 16u);
+}
+
+static u32 x_compute_frame_size(const XImpl *a, u32 cs_used, u32 cs_fp_used) {
+ u32 xmm_base = x_xmm_base(a, cs_fp_used);
u32 cs_size = cs_used * 8u;
- u32 raw = a->max_outgoing + cs_size + a->cum_off;
+ u32 xmm_size = cs_fp_used * 16u;
+ u32 raw = a->max_outgoing + cs_size + xmm_size + xmm_base;
u32 frame_size = align_up_u32(raw, 16u);
return frame_size ? frame_size : 16u;
}
+/* Cached lookup/creation of __chkstk as a SK_UNDEF symbol. The Win64
+ * stack-probe helper is provided by mingw's libmingwex / MSVC's CRT;
+ * cfree references it on demand from the prologue and lets the linker
+ * resolve it. */
+static ObjSymId x_chkstk_sym(CGTarget *t) {
+ Sym name = pool_intern_cstr(t->c->global, "__chkstk");
+ ObjSymId s = obj_symbol_find(t->obj, name);
+ if (s != 0) return s;
+ return obj_symbol(t->obj, name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+}
+
+/* Build the prologue byte sequence. Returns the number of bytes
+ * written. If `chkstk_disp_pos_out` is non-NULL and the chkstk path was
+ * taken, stores the byte offset of the `call __chkstk` disp32 within
+ * `buf` so the caller can emit the matching R_X64_PLT32 reloc. Sets
+ * it to UINT32_MAX otherwise. */
static u32 x_build_prologue(CGTarget *t, u8 *buf, u32 cap, u32 frame_size,
- const Reg *cs_regs, u32 cs_used) {
+ const Reg *cs_regs, u32 cs_used,
+ const Reg *cs_fp_regs, u32 cs_fp_used,
+ u32 *chkstk_disp_pos_out) {
XImpl *a = impl_of(t);
u32 wi = 0;
+ if (chkstk_disp_pos_out) *chkstk_disp_pos_out = (u32)-1;
- if (wi + 11 > cap) goto overflow;
+ if (wi + 4 > cap) goto overflow;
/* push rbp (1 byte). */
buf[wi++] = 0x55;
/* mov rbp, rsp: REX.W 89 E5. */
buf[wi++] = X64_REX_BASE | X64_REX_W;
buf[wi++] = 0x89;
buf[wi++] = modrm(3u, X64_RSP, X64_RBP);
- /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */
- buf[wi++] = X64_REX_BASE | X64_REX_W;
- buf[wi++] = 0x81;
- buf[wi++] = modrm(3u, 5u, X64_RSP);
- buf[wi++] = (u8)frame_size;
- buf[wi++] = (u8)(frame_size >> 8);
- buf[wi++] = (u8)(frame_size >> 16);
- buf[wi++] = (u8)(frame_size >> 24);
-
- /* sret: mov [rbp + disp32], rdi. */
+
+ int need_chkstk =
+ a->abi->shadow_space && frame_size > X64_WIN64_CHKSTK_THRESHOLD;
+ if (need_chkstk) {
+ /* Win64 large-frame probe sequence (matches what GCC/clang emit on
+ * x86_64-windows):
+ * mov eax, frame_size ; B8 imm32 (5 bytes)
+ * call __chkstk ; E8 disp32 (5 bytes)
+ * sub rsp, rax ; REX.W 29 C4 (3 bytes)
+ * __chkstk probes one page at a time over the requested allocation
+ * but does NOT adjust rsp itself; the explicit `sub rsp, rax`
+ * after the call does that. */
+ if (wi + 13 > cap) goto overflow;
+ buf[wi++] = 0xB8;
+ buf[wi++] = (u8)frame_size;
+ buf[wi++] = (u8)(frame_size >> 8);
+ buf[wi++] = (u8)(frame_size >> 16);
+ buf[wi++] = (u8)(frame_size >> 24);
+ buf[wi++] = 0xE8;
+ if (chkstk_disp_pos_out) *chkstk_disp_pos_out = wi;
+ buf[wi++] = 0;
+ buf[wi++] = 0;
+ buf[wi++] = 0;
+ buf[wi++] = 0;
+ buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] = 0x29;
+ buf[wi++] = modrm(3u, X64_RAX, X64_RSP);
+ } else {
+ /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */
+ if (wi + 7 > cap) goto overflow;
+ buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] = 0x81;
+ buf[wi++] = modrm(3u, 5u, X64_RSP);
+ buf[wi++] = (u8)frame_size;
+ buf[wi++] = (u8)(frame_size >> 8);
+ buf[wi++] = (u8)(frame_size >> 16);
+ buf[wi++] = (u8)(frame_size >> 24);
+ }
+
+ /* sret: spill the first int arg reg (which holds the destination
+ * pointer at entry) to the hidden slot. SysV uses RDI; Win64 uses
+ * RCX. */
if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
XSlot *s = x64_slot_get(a, a->sret_ptr_slot);
if (s) {
i32 off = -(i32)s->off;
+ u32 sret_reg = a->abi->int_args[0];
if (wi + 7 > cap) goto overflow;
- buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] =
+ (u8)(X64_REX_BASE | X64_REX_W | ((sret_reg & 8) ? X64_REX_R : 0));
buf[wi++] = 0x89;
- buf[wi++] = modrm(2u, X64_RDI, X64_RBP);
+ buf[wi++] = modrm(2u, (sret_reg & 7u), X64_RBP);
buf[wi++] = (u8)off;
buf[wi++] = (u8)(off >> 8);
buf[wi++] = (u8)(off >> 16);
@@ -646,10 +820,12 @@ static u32 x_build_prologue(CGTarget *t, u8 *buf, u32 cap, u32 frame_size,
}
}
+ u32 xmm_base = x_xmm_base(a, cs_fp_used);
+
/* Spill callee-saves. */
for (u32 i = 0; i < cs_used; ++i) {
u32 reg = cs_regs[i];
- i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
+ i32 off = -(i32)xmm_base - (i32)(cs_fp_used) * 16 - (i32)(i + 1) * 8;
if (wi + 7 > cap) goto overflow;
buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0));
buf[wi++] = 0x89;
@@ -659,6 +835,26 @@ static u32 x_build_prologue(CGTarget *t, u8 *buf, u32 cap, u32 frame_size,
buf[wi++] = (u8)(off >> 16);
buf[wi++] = (u8)(off >> 24);
}
+
+ /* Spill callee-saved XMMs (Win64 only). movaps [rbp+disp32], xmm_n.
+ * Layout: xmm[0] at -(xmm_base+16), xmm[1] at -(xmm_base+32), ...
+ * Each slot is 16-aligned because rbp is 16-aligned at entry and
+ * xmm_base is rounded up to 16. */
+ for (u32 i = 0; i < cs_fp_used; ++i) {
+ u32 xmm = cs_fp_regs[i];
+ i32 off = -(i32)xmm_base - (i32)(i + 1) * 16;
+ u8 rex = (u8)((xmm & 8) ? (X64_REX_BASE | X64_REX_R) : 0);
+ u32 n = rex ? 8u : 7u;
+ if (wi + n > cap) goto overflow;
+ if (rex) buf[wi++] = rex;
+ buf[wi++] = 0x0F;
+ buf[wi++] = 0x29; /* MOVAPS r/m128, xmm */
+ buf[wi++] = modrm(2u, (xmm & 7u), X64_RBP);
+ buf[wi++] = (u8)off;
+ buf[wi++] = (u8)(off >> 8);
+ buf[wi++] = (u8)(off >> 16);
+ buf[wi++] = (u8)(off >> 24);
+ }
return wi;
overflow:
@@ -686,8 +882,9 @@ void x_func_begin_known_frame(CGTarget *t, const CGFuncDesc *fd,
const CGKnownFrameDesc *frame,
FrameSlot *out_slots) {
XImpl *a = impl_of(t);
- Reg cs_regs[5];
- u8 buf[X64_PROLOGUE_BYTES];
+ Reg cs_regs[X64_MAX_CS_INT_REGS];
+ Reg cs_fp_regs[X64_MAX_CS_FP_REGS];
+ u8 buf[X64_PROLOGUE_BYTES_WIN64];
x_func_begin_init(t, fd);
a->known_frame = 1;
@@ -702,17 +899,26 @@ void x_func_begin_known_frame(CGTarget *t, const CGFuncDesc *fd,
}
u32 cs_used = x_collect_cs_regs(a, cs_regs);
+ u32 cs_fp_used = x_collect_cs_fp_regs(a, cs_fp_regs);
if (frame && frame->may_omit_frame && frame->nslots == 0 &&
frame->max_outgoing == 0 && !frame->has_alloca && !frame->has_call &&
- !a->has_sret && !a->is_variadic && cs_used == 0) {
+ !a->has_sret && !a->is_variadic && cs_used == 0 && cs_fp_used == 0) {
a->omit_frame = 1;
return;
}
- u32 frame_size = x_compute_frame_size(a, cs_used);
+ u32 frame_size = x_compute_frame_size(a, cs_used, cs_fp_used);
a->prologue_pos = t->mc->pos(t->mc);
- u32 nbytes = x_build_prologue(t, buf, X64_PROLOGUE_BYTES, frame_size,
- cs_regs, cs_used);
+ u32 chkstk_disp_pos = (u32)-1;
+ u32 nbytes = x_build_prologue(t, buf, sizeof buf, frame_size,
+ cs_regs, cs_used, cs_fp_regs, cs_fp_used,
+ &chkstk_disp_pos);
t->mc->emit_bytes(t->mc, buf, nbytes);
+ if (chkstk_disp_pos != (u32)-1) {
+ ObjSymId chk = x_chkstk_sym(t);
+ t->mc->emit_reloc_at(t->mc, t->mc->section_id,
+ a->prologue_pos + chkstk_disp_pos, R_X64_PLT32,
+ chk, -4, 1, 0);
+ }
x_emit_variadic_reg_saves(t);
}
@@ -720,24 +926,36 @@ void x_func_end(CGTarget *t) {
XImpl *a = impl_of(t);
MCEmitter *mc = t->mc;
- Reg cs_regs[5];
+ Reg cs_regs[X64_MAX_CS_INT_REGS];
+ Reg cs_fp_regs[X64_MAX_CS_FP_REGS];
u32 cs_used = x_collect_cs_regs(a, cs_regs);
+ u32 cs_fp_used = x_collect_cs_fp_regs(a, cs_fp_regs);
/* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call,
* which means rsp ≡ 8 mod 16 inside the function (after the return
* address is pushed). On entry, rsp ≡ 8 mod 16; after `push rbp` it
* is 0 mod 16; after `sub rsp, frame_size` we need it back to 0
* mod 16, so frame_size must be a multiple of 16. */
- u32 frame_size = x_compute_frame_size(a, cs_used);
+ u32 frame_size = x_compute_frame_size(a, cs_used, cs_fp_used);
if (a->omit_frame) goto finish;
mc->label_place(mc, a->epilogue_label);
- /* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */
+ u32 xmm_base = x_xmm_base(a, cs_fp_used);
+
+ /* Restore callee-saved XMMs (Win64). movaps xmm_n, [rbp+disp32]. */
+ for (i32 i = (i32)cs_fp_used - 1; i >= 0; --i) {
+ u32 xmm = cs_fp_regs[i];
+ i32 off = -(i32)xmm_base - (i32)(i + 1) * 16;
+ /* prefix=0 selects MOVAPS (0F 28 /r) when used through emit_sse_load. */
+ emit_sse_load(mc, /*prefix=*/0, /*opcode=*/0x28, xmm, X64_RBP, off);
+ }
+
+ /* Restore callee-saved GPRs. */
for (i32 i = (i32)cs_used - 1; i >= 0; --i) {
u32 reg = cs_regs[i];
- i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
+ i32 off = -(i32)xmm_base - (i32)(cs_fp_used) * 16 - (i32)(i + 1) * 8;
emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off);
}
@@ -747,15 +965,23 @@ void x_func_end(CGTarget *t) {
if (!a->known_frame) {
/* Patch prologue placeholder. */
- u8 buf[X64_PROLOGUE_BYTES];
+ u8 buf[X64_PROLOGUE_BYTES_WIN64];
u32 prologue_nbytes = a->prologue_nbytes ? a->prologue_nbytes
: X64_PROLOGUE_BYTES;
for (u32 i = 0; i < prologue_nbytes; ++i)
buf[i] = 0x90;
+ u32 chkstk_disp_pos = (u32)-1;
(void)x_build_prologue(t, buf, prologue_nbytes, frame_size, cs_regs,
- cs_used);
+ cs_used, cs_fp_regs, cs_fp_used,
+ &chkstk_disp_pos);
obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf,
prologue_nbytes);
+ if (chkstk_disp_pos != (u32)-1) {
+ ObjSymId chk = x_chkstk_sym(t);
+ mc->emit_reloc_at(mc, a->fd->text_section_id,
+ a->prologue_pos + chkstk_disp_pos, R_X64_PLT32,
+ chk, -4, 1, 0);
+ }
}
/* Patch each alloca's `lea dst, [rsp + 0]` disp32 with the final
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -19,10 +19,61 @@
#include "core/pool.h"
#include "obj/obj.h"
+/* Prologue placeholder budget for the unplanned-regs path (the C
+ * frontend's default; the opt pipeline pre-plans registers and hits
+ * x64_planned_prologue_bytes for tight sizing).
+ *
+ * SysV worst case: 11 base + 7 sret + 5*7 GPR saves = 53.
+ * Win64 worst case adds XMM6-15 (10 * 8 = 80) plus chkstk delta (+6)
+ * plus the 2 extra GPR slots for RDI/RSI (2*7 = 14), so 153 — round
+ * up to 192. We pick the larger budget for both OSes (the SysV path
+ * is unaffected past byte 53) and rely on dead-strip / link-time
+ * coalescing if size becomes a concern. */
#define X64_PROLOGUE_BYTES 96u
+#define X64_PROLOGUE_BYTES_WIN64 192u
#define X64_PROLOGUE_BASE_BYTES 11u
#define X64_PROLOGUE_SRET_BYTES 7u
#define X64_PROLOGUE_SAVE_BYTES 7u
+/* XMM save: movaps [rbp + disp32], xmm_n.
+ * XMM0-7 : 0F 29 modrm disp32 = 7 B
+ * XMM8-15 : 44 0F 29 modrm disp32 (REX.R) = 8 B
+ * We size with the high-reg worst case so the placeholder always fits. */
+#define X64_PROLOGUE_XMM_SAVE_BYTES 8u
+/* chkstk replaces a 7B sub-rsp-imm32 with 13B (mov eax,imm32 +
+ * call disp32 + sub rsp,rax). Net +6 over the plain sub. */
+#define X64_PROLOGUE_CHKSTK_DELTA 6u
+
+/* Win64-specific constants. */
+#define X64_WIN64_SHADOW_SPACE 32u /* 4 home slots, 8 B each. */
+#define X64_WIN64_CHKSTK_THRESHOLD 4096u
+
+/* Maximum callee-saved GPRs across all supported ABIs. SysV saves up to
+ * 5 (RBX, R12..R15; RBP is handled separately by the prologue head),
+ * Win64 adds RDI + RSI for 7. */
+#define X64_MAX_CS_INT_REGS 7u
+
+/* ============================================================
+ * Per-OS ABI register layout.
+ *
+ * Selected once at x_func_begin_init from t->c->target.os and
+ * consulted by the call-site and param-consumer paths so they stop
+ * hard-coding SysV reg orders and slot counts. */
+typedef struct X64ABIRegs {
+ const u32* int_args; /* size = n_int_args; SysV: RDI..R9;
+ Win64: RCX..R9 */
+ u32 n_int_args; /* 6 (SysV) or 4 (Win64) */
+ u32 n_fp_args; /* 8 (SysV) or 4 (Win64) */
+ int slot_shared_int_fp; /* 1 (Win64): arg slot index shared between
+ int_args[i] and XMMi; 0 (SysV) */
+ u32 shadow_space; /* 0 (SysV) or 32 (Win64) */
+ int emit_sysv_vararg_save; /* 1 (SysV): emit the 176 B reg-save area */
+ int vararg_fp_dup_to_gpr; /* 1 (Win64): call-site duplicates each
+ variadic FP arg into the matching GPR */
+ u64 cs_int_mask; /* callee-saved GPRs (eligible set) */
+ u64 cs_fp_mask; /* callee-saved XMMs (eligible set) */
+} X64ABIRegs;
+
+const X64ABIRegs* x64_abi_for_os(CfreeOSKind os);
/* ============================================================
* XImpl and friends. */
@@ -80,13 +131,15 @@ typedef struct XImpl {
FrameSlot sret_ptr_slot;
FrameSlot reg_save_slot; /* variadic: 176-byte __va_list_tag reg save area */
- u32 used_cs_int_mask; /* SysV callee-saved GPRs used by this function */
- u32 used_cs_fp_mask; /* reserved for ABIs with callee-saved FP regs */
+ u32 used_cs_int_mask; /* callee-saved GPRs used by this function */
+ u32 used_cs_fp_mask; /* callee-saved XMMs used by this function */
u32 planned_cs_int_mask;
u32 planned_cs_fp_mask;
u8 has_planned_regs;
u8 pad1[3];
+ const X64ABIRegs* abi; /* selected from t->c->target.os at func_begin */
+
XScope* scopes;
u32 nscopes;
u32 scopes_cap;
@@ -140,7 +193,6 @@ static inline _Noreturn void x_panic(CGTarget* t, const char* what) {
extern const Reg g_int_order[6];
extern const Reg g_fp_order[10];
-extern const u32 g_int_arg_regs[6];
static inline void x64_abi_direct_reg_need(const ABIArgInfo* ai,
u32* need_int, u32* need_fp) {
diff --git a/src/arch/x64/isa.c b/src/arch/x64/isa.c
@@ -256,6 +256,11 @@ const X64InsnDesc x64_insn_table[] = {
X64_FMT_SSE_RR, 0),
ROW("movss", X64_PFX_F3, 2, 0x0F, 0x11, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY,
X64_FMT_SSE_RR, X64_ASMFL_ALIAS),
+ /* MOVAPS */
+ ROW("movaps", X64_PFX_NONE, 2, 0x0F, 0x28, 0, 0xFF, NO_MODRM,
+ X64_W_REQ_ANY, X64_FMT_SSE_RR, 0),
+ ROW("movaps", X64_PFX_NONE, 2, 0x0F, 0x29, 0, 0xFF, NO_MODRM,
+ X64_W_REQ_ANY, X64_FMT_SSE_RR, X64_ASMFL_ALIAS),
/* ADD/SUB/MUL/DIV — opcodes 58/5C/59/5E (same byte for ss and sd;
* prefix picks). */
ROW("addsd", X64_PFX_F2, 2, 0x0F, 0x58, 0, 0xFF, NO_MODRM, X64_W_REQ_ANY,
diff --git a/src/arch/x64/link.c b/src/arch/x64/link.c
@@ -68,6 +68,22 @@ static int x64_is_got_load_reloc(RelocKind kind) {
kind == R_X64_REX_GOTPCRELX;
}
+/* PE/COFF IAT stub for x86_64 (6 B):
+ *
+ * ff 25 disp32 ; jmpq *[rip + disp_to_iat_slot]
+ *
+ * disp32 is signed offset from the END of the JMP (stub_vaddr + 6)
+ * to the IAT slot in .idata. Identical layout to the ELF PLT entry
+ * head, minus the trailing NOP pad — Win64 calls don't need a stub
+ * aligned to a fixed entry stride because there's no PLT0 to share
+ * the address space with. */
+static void x64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr,
+ u64 iat_slot_vaddr) {
+ i64 disp = (i64)iat_slot_vaddr - (i64)(stub_vaddr + X64_JMP_RIPREL_SIZE);
+ i32 disp32 = (i32)(u32)((u64)disp & 0xffffffffu);
+ x64_write_jmp_riprel(dst, disp32);
+}
+
const LinkArchDesc link_arch_x64 = {
.e_machine = EM_X86_64,
.default_musl_interp = "/lib/ld-musl-x86_64.so.1",
@@ -87,4 +103,7 @@ const LinkArchDesc link_arch_x64 = {
.is_branch_reloc = x64_is_branch_reloc,
.is_got_load_reloc = x64_is_got_load_reloc,
.needs_jit_call_stub = x64_is_branch_reloc,
+
+ .coff_stub_size = X64_JMP_RIPREL_SIZE,
+ .emit_coff_iat_stub = x64_emit_coff_iat_stub,
};
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -83,6 +83,20 @@ static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
}
static void x_copy(CGTarget* t, Operand dst, Operand src) {
+ if (dst.cls == RC_FP && src.cls == RC_INT) {
+ u32 sz = type_byte_size(dst.type);
+ int w = sz == 8 ? 1 : 0;
+ emit_sse_rr_w(t->mc, 0x66, 0x6E, w, dst.v.reg & 0xFu,
+ src.v.reg & 0xFu);
+ return;
+ }
+ if (dst.cls == RC_INT && src.cls == RC_FP) {
+ u32 sz = type_byte_size(src.type);
+ int w = sz == 8 ? 1 : 0;
+ emit_sse_rr_w(t->mc, 0x66, 0x7E, w, src.v.reg & 0xFu,
+ dst.v.reg & 0xFu);
+ return;
+ }
if (dst.cls == RC_FP || src.cls == RC_FP) {
u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
emit_sse_rr(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, src.v.reg & 0xFu);
@@ -369,6 +383,107 @@ static void x_addr_of(CGTarget* t, Operand dst, Operand lv) {
x_panic(t, "addr_of: kind unsupported");
}
+/* Win64 TLS Local-Exec materialization (PE-COFF).
+ *
+ * Sequence (5 instructions, 26-29 bytes depending on register encoding):
+ * mov rd, gs:[0x58] ; TEB.ThreadLocalStoragePointer
+ * mov r11d,[rip + _tls_index] ; per-image TLS slot index
+ * mov rd, [rd + r11*8] ; TLS block base for this image
+ * lea rd, [rd + sym@SECREL] ; rd = &sym
+ *
+ * `_tls_index` is a u32 the CRT defines for each image; the linker
+ * resolves the RIP-relative load. The LEA's disp32 carries
+ * IMAGE_REL_AMD64_SECREL (via R_COFF_SECREL) against the TLS data
+ * symbol — the linker fills in the symbol's offset from the start of
+ * the merged .tls section, which matches what gs:[0x58]+index lookup
+ * lands on at runtime. R11 is caller-saved under Win64; we use it
+ * unconditionally as scratch so we don't have to special-case
+ * rd == rcx. */
+static void x_tls_addr_of_win64(CGTarget* t, Operand dst, ObjSymId sym,
+ i64 addend) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 rd = dst.v.reg & 0xFu;
+
+ /* (1) mov rd, gs:[0x58]: 65 [REX.W|R?] 8B mod=00/reg=rd/rm=100 sib disp32. */
+ u8 gs_prefix = 0x65;
+ mc->emit_bytes(mc, &gs_prefix, 1);
+ emit_rex(mc, 1, rd, 0, 0);
+ u8 op_mov_load = 0x8B;
+ mc->emit_bytes(mc, &op_mov_load, 1);
+ u8 mr1 = modrm(0u, rd & 7u, 4u);
+ mc->emit_bytes(mc, &mr1, 1);
+ u8 s1 = sib(0u, 4u, 5u);
+ mc->emit_bytes(mc, &s1, 1);
+ emit_u32le(mc, 0x58u);
+
+ /* (2) mov r11d, [rip + _tls_index]: 44 8B 1D disp32. */
+ Sym idx_name = pool_intern_cstr(t->c->global, "_tls_index");
+ ObjSymId idx_sym = obj_symbol_find(t->obj, idx_name);
+ if (idx_sym == 0) {
+ idx_sym = obj_symbol(t->obj, idx_name, SB_GLOBAL, SK_UNDEF,
+ OBJ_SEC_NONE, 0, 0);
+ }
+ u8 rex_r_only = X64_REX_BASE | X64_REX_R; /* R11 in ModRM.reg. */
+ mc->emit_bytes(mc, &rex_r_only, 1);
+ u8 op_mov_load_32 = 0x8B;
+ mc->emit_bytes(mc, &op_mov_load_32, 1);
+ u8 mr2 = modrm(0u, 3u /* r11 & 7 */, 5u /* RIP-rel */);
+ mc->emit_bytes(mc, &mr2, 1);
+ u32 idx_disp_pos = mc->pos(mc);
+ emit_u32le(mc, 0);
+ mc->emit_reloc_at(mc, sec, idx_disp_pos, R_PC32, idx_sym, -4, 1, 0);
+
+ /* (3) mov rd, [rd + r11*8]: REX.W + (REX.X for r11) + (REX.B for rd>=8) +
+ * 8B modrm(mod, reg=rd&7, rm=4=SIB) sib(scale=3, index=3=r11&7, base=rd&7).
+ * When base&7 == 5 (rbp/r13) mod=0 means "disp32 only"; force mod=01
+ * with disp8=0 to actually mean [reg+r11*8 + 0]. */
+ u8 rex3 = X64_REX_BASE | X64_REX_W | X64_REX_X;
+ if (rd & 8) rex3 |= X64_REX_R; /* reg = rd */
+ if (rd & 8) rex3 |= X64_REX_B; /* base = rd */
+ mc->emit_bytes(mc, &rex3, 1);
+ u8 op_mov_load2 = 0x8B;
+ mc->emit_bytes(mc, &op_mov_load2, 1);
+ if ((rd & 7u) == 5u) {
+ u8 mr3 = modrm(1u, rd & 7u, 4u);
+ mc->emit_bytes(mc, &mr3, 1);
+ u8 s3 = sib(3u, 3u, rd & 7u);
+ mc->emit_bytes(mc, &s3, 1);
+ u8 zero = 0;
+ mc->emit_bytes(mc, &zero, 1);
+ } else {
+ u8 mr3 = modrm(0u, rd & 7u, 4u);
+ mc->emit_bytes(mc, &mr3, 1);
+ u8 s3 = sib(3u, 3u, rd & 7u);
+ mc->emit_bytes(mc, &s3, 1);
+ }
+
+ /* (4) lea rd, [rd + disp32@SECREL]: REX.W + (.R/.B for rd) + 8D modrm + disp32.
+ * rsp/r12 (rd&7==4) needs a SIB; rbp/r13 (rd&7==5) already takes
+ * disp32 form natively at mod=10. */
+ u8 rex4 = X64_REX_BASE | X64_REX_W;
+ if (rd & 8) rex4 |= X64_REX_R; /* reg = rd */
+ if (rd & 8) rex4 |= X64_REX_B; /* base = rd */
+ mc->emit_bytes(mc, &rex4, 1);
+ u8 op_lea = 0x8D;
+ mc->emit_bytes(mc, &op_lea, 1);
+ u32 lea_disp_pos;
+ if ((rd & 7u) == 4u) {
+ u8 mr4 = modrm(2u, rd & 7u, 4u);
+ mc->emit_bytes(mc, &mr4, 1);
+ u8 s4 = sib(0u, 4u, rd & 7u);
+ mc->emit_bytes(mc, &s4, 1);
+ lea_disp_pos = mc->pos(mc);
+ emit_u32le(mc, 0);
+ } else {
+ u8 mr4 = modrm(2u, rd & 7u, rd & 7u);
+ mc->emit_bytes(mc, &mr4, 1);
+ lea_disp_pos = mc->pos(mc);
+ emit_u32le(mc, 0);
+ }
+ mc->emit_reloc_at(mc, sec, lea_disp_pos, R_COFF_SECREL, sym, addend, 1, 0);
+}
+
/* x86_64 TLS Local-Exec materialization.
* mov rd, fs:0 ; read thread pointer (FS base + 0)
* lea rd, [rd + sym@tpoff] ; add TP-relative offset
@@ -380,6 +495,11 @@ static void x_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
u32 sec = mc->section_id;
u32 rd = dst.v.reg & 0xFu;
+ if (t->c->target.os == CFREE_OS_WINDOWS) {
+ x_tls_addr_of_win64(t, dst, sym, addend);
+ return;
+ }
+
/* mov rd, qword ptr fs:[0]
* 64 [REX.W|REX.R] 8B mod=00/reg=rd/rm=100 sib(0,4,5) disp32=0 */
u8 fs_prefix = 0x64;
@@ -1008,6 +1128,9 @@ static u32 x_call_plan_stack_raw_size(const CGCallPlan* p) {
return size;
}
+static inline void x_call_sync_slot(const X64ABIRegs* abi, u32* next_int,
+ u32* next_fp);
+
static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
u32* next_fp, u32* stack_off, int tail) {
XImpl* a = impl_of(t);
@@ -1031,13 +1154,11 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
if (ai->kind == ABI_ARG_IGNORE) return;
if (ai->kind == ABI_ARG_INDIRECT) {
/* Pass &av->storage_local in the next int arg reg. */
- u32 dst_reg = (*next_int < 6) ? g_int_arg_regs[(*next_int)++] : X64_RAX;
- int to_stack = (*next_int > 6) || (dst_reg == X64_RAX && *next_int == 6);
- /* Above is awkward — recompute clearly: */
- if (*next_int >= 6 + (a->has_sret ? 0 : 0)) {
- /* (next_int was already bumped past 6) — stack route */
- }
- to_stack = (dst_reg == X64_RAX);
+ u32 nargs_reg = a->abi->n_int_args;
+ u32 dst_reg = (*next_int < nargs_reg) ? a->abi->int_args[(*next_int)++]
+ : X64_RAX;
+ int to_stack = (dst_reg == X64_RAX);
+ x_call_sync_slot(a->abi, next_int, next_fp);
if (av->storage.kind == OPK_LOCAL) {
XSlot* s = x64_slot_get(a, av->storage.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "x64 call: bad byval slot");
@@ -1135,8 +1256,9 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
const ABIArgPart* pt = &ai->parts[i];
u32 sz = pt->size;
if (pt->cls == ABI_CLASS_INT) {
- int to_stack = (*next_int >= 6);
- u32 dst_reg = to_stack ? X64_RAX : g_int_arg_regs[(*next_int)++];
+ int to_stack = (*next_int >= a->abi->n_int_args);
+ u32 dst_reg = to_stack ? X64_RAX : a->abi->int_args[(*next_int)++];
+ if (!to_stack) x_call_sync_slot(a->abi, next_int, next_fp);
switch (av->storage.kind) {
case OPK_IMM: {
int w = (sz == 8) ? 1 : 0;
@@ -1176,10 +1298,16 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
*stack_off += 8;
}
} else if (pt->cls == ABI_CLASS_FP) {
- int to_stack = (*next_fp >= 8);
+ int to_stack = (*next_fp >= a->abi->n_fp_args);
u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
if (!to_stack) {
u32 dst_x = (*next_fp)++;
+ /* Win64: variadic FP args must be duplicated into the matching
+ * GPR so a callee that doesn't know the argument type finds the
+ * bits in either register. `av->abi == NULL` is cfree's marker
+ * that this is a variadic (un-prototyped) arg. */
+ int dup_to_gpr = a->abi->vararg_fp_dup_to_gpr && (av->abi == NULL) &&
+ (dst_x < a->abi->n_int_args);
if (av->storage.kind == OPK_REG) {
u32 sx = av->storage.v.reg & 0xFu;
if (sx != dst_x) emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx);
@@ -1197,6 +1325,15 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
"x64 call: FP arg storage kind %d unsupported",
(int)av->storage.kind);
}
+ if (dup_to_gpr) {
+ /* movq r64, xmm: 66 REX.W 0F 7E /r (xmm as ModRM:reg,
+ * r64 as ModRM:r/m). emit_sse_rr_w(prefix=0x66, opcode=0x7E,
+ * w=1, dst=xmm, src=gpr) emits that encoding. */
+ u32 gpr = a->abi->int_args[dst_x];
+ emit_sse_rr_w(t->mc, 0x66, 0x7E, /*w=*/1, dst_x, gpr);
+ }
+ /* Keep int/fp slot indices in lockstep on Win64. */
+ x_call_sync_slot(a->abi, next_int, next_fp);
} else {
if (av->storage.kind == OPK_REG) {
Operand addr = x_call_stack_arg_addr(t, *stack_off, tail);
@@ -1233,8 +1370,16 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
}
}
-static void count_arg_stack(const CGABIValue* av, u32* next_int, u32* next_fp,
- u32* stack_off) {
+static inline void x_call_sync_slot(const X64ABIRegs* abi, u32* next_int,
+ u32* next_fp) {
+ if (!abi->slot_shared_int_fp) return;
+ u32 m = *next_int > *next_fp ? *next_int : *next_fp;
+ *next_int = m;
+ *next_fp = m;
+}
+
+static void count_arg_stack(const X64ABIRegs* abi, const CGABIValue* av,
+ u32* next_int, u32* next_fp, u32* stack_off) {
ABIArgInfo va_ai;
ABIArgPart va_pt;
const ABIArgInfo* ai = av->abi;
@@ -1253,10 +1398,11 @@ static void count_arg_stack(const CGABIValue* av, u32* next_int, u32* next_fp,
}
if (ai->kind == ABI_ARG_IGNORE) return;
if (ai->kind == ABI_ARG_INDIRECT) {
- if (*next_int < 6)
+ if (*next_int < abi->n_int_args)
++*next_int;
else
*stack_off += 8;
+ x_call_sync_slot(abi, next_int, next_fp);
return;
}
if (ai->kind == ABI_ARG_DIRECT &&
@@ -1267,47 +1413,87 @@ static void count_arg_stack(const CGABIValue* av, u32* next_int, u32* next_fp,
for (u16 i = 0; i < ai->nparts; ++i) {
const ABIArgPart* pt = &ai->parts[i];
if (pt->cls == ABI_CLASS_INT) {
- if (*next_int < 6)
+ if (*next_int < abi->n_int_args)
++*next_int;
else
*stack_off += 8;
} else if (pt->cls == ABI_CLASS_FP) {
- if (*next_fp < 8)
+ if (*next_fp < abi->n_fp_args)
++*next_fp;
else
*stack_off += 8;
}
+ x_call_sync_slot(abi, next_int, next_fp);
}
}
static u32 x_call_stack_size(CGTarget* t, const CGCallDesc* d) {
- (void)t;
+ const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os);
u32 next_int = (d->abi && d->abi->has_sret) ? 1u : 0u;
- u32 next_fp = 0, stack_off = 0;
+ u32 next_fp = 0;
+ /* Win64 reserves a 32 B shadow space at [rsp+0..31] which is part of
+ * the caller's outgoing area; stack args land above it. SysV has no
+ * shadow space. */
+ u32 stack_off = abi->shadow_space;
+ x_call_sync_slot(abi, &next_int, &next_fp);
for (u32 i = 0; i < d->nargs; ++i)
- count_arg_stack(&d->args[i], &next_int, &next_fp, &stack_off);
+ count_arg_stack(abi, &d->args[i], &next_int, &next_fp, &stack_off);
return (stack_off + 15u) & ~15u;
}
+static const Reg g_tail_cs_int_order_all[X64_MAX_CS_INT_REGS] = {
+ X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, X64_RDI, X64_RSI,
+};
+
+#define X64_TAIL_MAX_CS_FP_REGS 10u
+static const Reg g_tail_cs_fp_order_all[X64_TAIL_MAX_CS_FP_REGS] = {
+ X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9,
+ X64_XMM0 + 10, X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13,
+ X64_XMM0 + 14, X64_XMM15,
+};
+
static u32 x_tail_collect_cs_regs(const XImpl* a, Reg* cs_regs) {
u32 cs_used = 0;
- for (u32 i = 0; i < 5u; ++i) {
- Reg r = g_int_order[i];
- if (a->used_cs_int_mask & (1u << r)) cs_regs[cs_used++] = r;
+ u64 mask = (u64)a->used_cs_int_mask & a->abi->cs_int_mask;
+ mask &= ~(1ull << X64_RBP);
+ for (u32 i = 0; i < X64_MAX_CS_INT_REGS; ++i) {
+ Reg r = g_tail_cs_int_order_all[i];
+ if (mask & (1ull << r)) cs_regs[cs_used++] = r;
}
return cs_used;
}
+static u32 x_tail_collect_cs_fp_regs(const XImpl* a, Reg* cs_fp_regs) {
+ u32 n = 0;
+ u64 mask = (u64)a->used_cs_fp_mask & a->abi->cs_fp_mask;
+ for (u32 i = 0; i < X64_TAIL_MAX_CS_FP_REGS; ++i) {
+ Reg r = g_tail_cs_fp_order_all[i];
+ if (mask & (1ull << r)) cs_fp_regs[n++] = r;
+ }
+ return n;
+}
+
static void x_tail_restore_frame(CGTarget* t) {
XImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
- Reg cs_regs[5];
+ Reg cs_regs[X64_MAX_CS_INT_REGS];
+ Reg cs_fp_regs[X64_TAIL_MAX_CS_FP_REGS];
u32 cs_used = x_tail_collect_cs_regs(a, cs_regs);
+ u32 cs_fp_used = x_tail_collect_cs_fp_regs(a, cs_fp_regs);
if (a->omit_frame) return;
+ /* Mirror the func_end frame layout: xmm_base is cum_off rounded up to
+ * 16 when any XMM is saved, else == cum_off. */
+ u32 xmm_base = a->cum_off;
+ if (cs_fp_used) xmm_base = (xmm_base + 15u) & ~15u;
+ for (i32 i = (i32)cs_fp_used - 1; i >= 0; --i) {
+ u32 xmm = cs_fp_regs[i];
+ i32 off = -(i32)xmm_base - (i32)(i + 1) * 16;
+ emit_sse_load(mc, /*prefix=*/0, /*opcode=*/0x28, xmm, X64_RBP, off);
+ }
for (i32 i = (i32)cs_used - 1; i >= 0; --i) {
u32 reg = cs_regs[i];
- i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
+ i32 off = -(i32)xmm_base - (i32)(cs_fp_used) * 16 - (i32)(i + 1) * 8;
emit_mov_load(mc, 8, 0, reg, X64_RBP, off);
}
{
@@ -1344,7 +1530,11 @@ static void x_call(CGTarget* t, const CGCallDesc* d) {
XImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
- u32 next_int = 0, next_fp = 0, stack_off = 0;
+ u32 next_int = 0, next_fp = 0;
+ /* Win64 reserves a 32 B shadow space at [rsp+0..31] which is part of
+ * the caller's outgoing-arg area; the first stack-passed arg lands
+ * at [rsp+32]. SysV starts at [rsp+0]. */
+ u32 stack_off = a->abi->shadow_space;
int requested_tail = (d->flags & CG_CALL_TAIL) != 0;
int tail_ok = 1;
if (requested_tail) {
@@ -1352,15 +1542,17 @@ static void x_call(CGTarget* t, const CGCallDesc* d) {
tail_ok = tail_stack <= a->next_param_stack;
}
- /* sret: caller puts destination pointer in rdi. */
+ /* sret: caller puts the destination pointer in the first int arg reg
+ * (RDI on SysV, RCX on Win64). */
if (d->abi && d->abi->has_sret) {
if (d->ret.storage.kind != OPK_LOCAL) {
compiler_panic(t->c, a->loc, "x64 call: sret destination must be LOCAL");
}
XSlot* s = x64_slot_get(a, d->ret.storage.v.frame_slot);
if (!s) compiler_panic(t->c, a->loc, "x64 call: bad sret slot");
- emit_lea(mc, X64_RDI, X64_RBP, -(i32)s->off);
+ emit_lea(mc, a->abi->int_args[0], X64_RBP, -(i32)s->off);
next_int = 1;
+ x_call_sync_slot(a->abi, &next_int, &next_fp);
}
for (u32 i = 0; i < d->nargs; ++i) {
emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off,
@@ -1811,6 +2003,22 @@ static void x_va_start_(CGTarget* t, Operand ap_op) {
if (!a->is_variadic)
compiler_panic(t->c, a->loc, "x64 va_start: function not variadic");
u32 ap = ap_op.v.reg & 0xFu;
+ if (a->abi->shadow_space) {
+ /* Win64 va_list is a single pointer to the next variadic stack
+ * slot. The 32 B caller-allocated home space at [rbp + 16] holds
+ * the first four named integer args (RCX/RDX/R8/R9, spilled by
+ * the prologue's variadic save). Variadic args start immediately
+ * after the named args at:
+ * [rbp + 16 + named_int_count * 8 + named_stack_bytes]
+ * x_emit_variadic_reg_saves already spilled the four arg regs to
+ * the home space; va_arg consumes from there onward at 8-byte
+ * stride (the call-site duplicates FP varargs into the matching
+ * GPR, so all FP varargs are reachable through the integer arm). */
+ u32 first_var_off = 16u + a->next_param_int * 8u + a->next_param_stack;
+ emit_lea(mc, X64_RAX, X64_RBP, (i32)first_var_off);
+ emit_mov_store(mc, 8, X64_RAX, ap, 0);
+ return;
+ }
XSlot* rs = x64_slot_get(a, a->reg_save_slot);
if (!rs) compiler_panic(t->c, a->loc, "x64 va_start: no reg_save_slot");
@@ -1830,14 +2038,46 @@ static void x_va_start_(CGTarget* t, Operand ap_op) {
static void x_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
CfreeCgTypeId ty) {
+ XImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
u32 ap = ap_op.v.reg & 0xFu;
u32 sz = type_byte_size(ty);
int is_fp = (dst.cls == RC_FP);
+ u32 dr = dst.v.reg & 0xFu;
+ if (a->abi->shadow_space) {
+ /* Win64: va_list is a plain pointer to the next slot. Every
+ * variadic arg occupies exactly 8 bytes (or 16-byte aggregates
+ * passed by hidden ptr — cfree's caller side already handles
+ * that). FP varargs are duplicated into the matching GPR slot
+ * at the call site (vararg_fp_dup_to_gpr), so we always load
+ * from the integer slot at *ap.
+ * r11 = *ap ; current slot address
+ * dst = [r11] ; load
+ * r11 += 8 ; advance
+ * *ap = r11 ; write back */
+ emit_mov_load(mc, 8, 0, X64_R11, ap, 0);
+ if (is_fp) {
+ u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0);
+ } else {
+ int sx = type_is_signed(ty);
+ emit_mov_load(mc, sz, sx, dr, X64_R11, 0);
+ }
+ /* add r11, 8 : REX.WB 0x83 /0 imm8. */
+ {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 rex = (u8)(X64_REX_BASE | X64_REX_W | X64_REX_B);
+ mc->emit_bytes(mc, &rex, 1);
+ u8 buf[3] = {0x83, modrm(3u, 0u, X64_R11 & 7u), 8};
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+ }
+ emit_mov_store(mc, 8, X64_R11, ap, 0);
+ return;
+ }
u32 offs_field = is_fp ? 4u : 0u;
u32 max_offs = is_fp ? 176u : 48u;
u32 stride = is_fp ? 16u : 8u;
- u32 dr = dst.v.reg & 0xFu;
MCLabel L_stack = mc->label_new(mc);
MCLabel L_done = mc->label_new(mc);
@@ -1914,10 +2154,17 @@ static void x_va_end_(CGTarget* t, Operand a) {
}
static void x_va_copy_(CGTarget* t, Operand d, Operand s) {
+ XImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
u32 dr = d.v.reg & 0xFu;
u32 sr = s.v.reg & 0xFu;
- /* va_list is 24 bytes; three 8B loads + stores via rax. */
+ if (a->abi->shadow_space) {
+ /* Win64 va_list is a single 8-byte pointer. */
+ emit_mov_load(mc, 8, 0, X64_RAX, sr, 0);
+ emit_mov_store(mc, 8, X64_RAX, dr, 0);
+ return;
+ }
+ /* SysV va_list is 24 bytes; three 8B loads + stores via rax. */
for (u32 i = 0; i < 24u; i += 8u) {
emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c
@@ -119,16 +119,19 @@ static void x_get_phys_regs(CGTarget* t, RegClass cls,
}
static int x_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
- (void)t;
+ const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os);
switch (cls) {
case RC_INT:
- /* SysV AMD64 caller-saved: RAX,RCX,RDX,RSI,RDI,R8-R11 */
- return reg == X64_RAX || reg == X64_RCX || reg == X64_RDX ||
- reg == X64_RSI || reg == X64_RDI ||
- (reg >= X64_R8 && reg <= X64_R11);
+ /* Everything that isn't callee-saved (and isn't RSP/RBP) is
+ * caller-saved. Inverting the ABI's cs_int_mask handles both
+ * SysV and Win64 in one line. */
+ if (reg == X64_RSP || reg == X64_RBP) return 0;
+ return (abi->cs_int_mask & (1ull << reg)) == 0;
case RC_FP:
- /* SysV AMD64: all XMM regs are caller-saved */
- return reg >= X64_XMM0 && reg <= X64_XMM0 + 15;
+ /* SysV: all XMMs caller-saved. Win64: XMM0..XMM5 caller-saved,
+ * XMM6..XMM15 callee-saved. */
+ if (reg < X64_XMM0 || reg > X64_XMM0 + 15) return 0;
+ return (abi->cs_fp_mask & (1ull << reg)) == 0;
default:
return 0;
}
@@ -136,26 +139,41 @@ static int x_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
static u32 x_call_clobber_mask(CGTarget* t, const CGCallDesc* d,
RegClass cls) {
- (void)t;
(void)d;
+ const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os);
switch (cls) {
- case RC_INT:
- return (1u << X64_RAX) | (1u << X64_RCX) | (1u << X64_RDX) |
- (1u << X64_RSI) | (1u << X64_RDI) | (1u << X64_R8) |
- (1u << X64_R9) | (1u << X64_R10) | (1u << X64_R11);
- case RC_FP:
- return 0xFFFFu;
+ case RC_INT: {
+ /* All GPRs except callee-saved (and RSP/RBP) are clobbered by a
+ * call. */
+ u32 mask = 0;
+ for (u32 r = 0; r < 16u; ++r) {
+ if (r == X64_RSP || r == X64_RBP) continue;
+ if ((abi->cs_int_mask & (1ull << r)) == 0) mask |= (1u << r);
+ }
+ return mask;
+ }
+ case RC_FP: {
+ /* All XMMs except callee-saved are clobbered by a call. */
+ u32 mask = 0;
+ for (u32 r = 0; r < 16u; ++r) {
+ if ((abi->cs_fp_mask & (1ull << r)) == 0) mask |= (1u << r);
+ }
+ return mask;
+ }
default:
return 0;
}
}
static u32 x_callee_save_mask(CGTarget* t, RegClass cls) {
- (void)t;
- return cls == RC_INT ? ((1u << X64_RBX) | (1u << X64_R12) |
- (1u << X64_R13) | (1u << X64_R14) |
- (1u << X64_R15))
- : 0;
+ const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os);
+ if (cls == RC_INT) {
+ /* RBP is saved by the prologue head, not exposed for general
+ * callee-save spill bookkeeping. */
+ return (u32)(abi->cs_int_mask & ~(1ull << X64_RBP));
+ }
+ if (cls == RC_FP) return (u32)abi->cs_fp_mask;
+ return 0;
}
static u32 x_return_reg_mask(CGTarget* t, const ABIFuncInfo* abi,
@@ -194,19 +212,26 @@ static void x_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) {
u32 cap = d->nargs * 2u + 2u;
out->args = arena_zarray(t->c->tu, CGCallPlanMove, cap ? cap : 1u);
out->rets = arena_zarray(t->c->tu, CGCallPlanRet, 4);
- u32 next_int = d->abi && d->abi->has_sret ? 1u : 0u, next_fp = 0, stack = 0;
- static const u32 iregs[6] = {X64_RDI, X64_RSI, X64_RDX, X64_RCX, X64_R8, X64_R9};
+ const X64ABIRegs* abi = x64_abi_for_os(t->c->target.os);
+ u32 next_int = d->abi && d->abi->has_sret ? 1u : 0u, next_fp = 0;
+ /* Win64 reserves a 32 B shadow space above the return address that
+ * the caller owns; the first stack-passed arg lands above it. SysV
+ * starts at offset 0. */
+ u32 stack = abi->shadow_space;
if (d->abi && d->abi->has_sret) {
CGCallPlanMove* m = &out->args[out->nargs++];
m->src = d->ret.storage;
m->src_kind = CG_CALL_PLAN_SRC_ADDR;
m->dst_kind = CG_CALL_PLAN_REG;
m->cls = RC_INT;
- m->dst_reg = X64_RDI;
+ m->dst_reg = abi->int_args[0];
m->mem.type = d->ret.type;
m->mem.size = 8;
m->mem.align = 8;
}
+ /* On Win64, advance the FP slot counter in lockstep with the int
+ * slot counter (shared slot). */
+ if (abi->slot_shared_int_fp) next_fp = next_int;
for (u32 a = 0; a < d->nargs; ++a) {
const CGABIValue* av = &d->args[a];
const ABIArgInfo* ai = av->abi;
@@ -228,14 +253,15 @@ static void x_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) {
m->src = av->storage;
m->src_kind = CG_CALL_PLAN_SRC_ADDR;
m->cls = RC_INT;
- if (next_int < 6) {
+ if (next_int < abi->n_int_args) {
m->dst_kind = CG_CALL_PLAN_REG;
- m->dst_reg = iregs[next_int++];
+ m->dst_reg = abi->int_args[next_int++];
} else {
m->dst_kind = CG_CALL_PLAN_STACK;
m->stack_offset = stack;
stack += 8;
}
+ if (abi->slot_shared_int_fp) next_fp = next_int;
m->mem.type = av->type;
m->mem.size = 8;
m->mem.align = 8;
@@ -271,24 +297,34 @@ static void x_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) {
m->mem.align = p->align ? p->align : p->size;
if (p->cls == ABI_CLASS_FP) {
m->cls = RC_FP;
- if (next_fp < 8) {
+ if (next_fp < abi->n_fp_args) {
+ u32 dst_x = next_fp;
m->dst_kind = CG_CALL_PLAN_REG;
m->dst_reg = X64_XMM0 + next_fp++;
+ if (abi->vararg_fp_dup_to_gpr && av->abi == NULL &&
+ dst_x < abi->n_int_args) {
+ CGCallPlanMove* dup = &out->args[out->nargs++];
+ *dup = *m;
+ dup->cls = RC_INT;
+ dup->dst_reg = abi->int_args[dst_x];
+ }
} else {
m->dst_kind = CG_CALL_PLAN_STACK;
m->stack_offset = stack;
stack += 8;
}
+ if (abi->slot_shared_int_fp) next_int = next_fp;
} else {
m->cls = RC_INT;
- if (next_int < 6) {
+ if (next_int < abi->n_int_args) {
m->dst_kind = CG_CALL_PLAN_REG;
- m->dst_reg = iregs[next_int++];
+ m->dst_reg = abi->int_args[next_int++];
} else {
m->dst_kind = CG_CALL_PLAN_STACK;
m->stack_offset = stack;
stack += 8;
}
+ if (abi->slot_shared_int_fp) next_fp = next_int;
}
}
}
diff --git a/src/link/link.c b/src/link/link.c
@@ -139,6 +139,10 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data,
ob = read_macho(l->c, name, data, len);
reader_name = "read_macho";
break;
+ case CFREE_BIN_COFF:
+ ob = read_coff(l->c, name, data, len);
+ reader_name = "read_coff";
+ break;
default:
compiler_panic(l->c, no_loc(),
"link_add_obj_bytes: unsupported object format "
@@ -150,10 +154,23 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data,
"link_add_obj_bytes: %s returned NULL for '%s'",
reader_name, name ? name : "(unnamed)");
in = inputs_push(l, &id);
- in->kind = LINK_INPUT_OBJ_BYTES;
in->order = l->next_input_order++;
in->obj = ob; /* re-uses the ObjBuilder slot for ownership */
in->name = name ? pool_intern_cstr(l->c->global, name) : 0;
+ /* PE/COFF short-import: read_coff_short_import stashes the providing
+ * DLL name on the builder. Reclassify the input as a DSO so the
+ * resolver treats its symbols as exports (matching the .lib archive
+ * member path in include_archive_member). */
+ {
+ Sym coff_dll = 0;
+ if (fmt == CFREE_BIN_COFF && obj_get_coff_import_dll(ob, &coff_dll) &&
+ coff_dll) {
+ in->kind = LINK_INPUT_DSO_BYTES;
+ in->soname = coff_dll;
+ } else {
+ in->kind = LINK_INPUT_OBJ_BYTES;
+ }
+ }
return id;
}
@@ -186,6 +203,16 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data,
ob = read_macho_dso(l->c, name, data, len, &soname);
reader_name = "read_macho_dso";
break;
+ case CFREE_BIN_COFF:
+ case CFREE_BIN_PE:
+ /* Both spellings route through read_coff_dso: CFREE_BIN_PE is
+ * the MZ/PE-signed form (a real .dll), CFREE_BIN_COFF can land
+ * here when the caller hands us a single short-import record
+ * directly (rare; .lib archives are the usual conveyance and
+ * are handled by link_add_archive_bytes). */
+ ob = read_coff_dso(l->c, name, data, len, &soname);
+ reader_name = "read_coff_dso";
+ break;
default:
compiler_panic(l->c, no_loc(),
"link_add_dso_bytes: unsupported DSO format "
@@ -219,6 +246,305 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data,
return id;
}
+/* ---- COFF long-form import-archive support ----
+ *
+ * mingw `.a` archives (e.g. libkernel32.a) don't use the Microsoft
+ * short-import record format (Sig1=0/Sig2=0xFFFF — handled in
+ * read_coff_short_import). Instead every archive member is a regular
+ * long-form COFF .o file containing `.idata$N` sections. Three flavors
+ * appear:
+ *
+ * - Head member (e.g. libkernel32h.o): defines `_head_lib64_<lib>_a`,
+ * has `.idata$2` with one IMAGE_IMPORT_DESCRIPTOR template plus
+ * sentinel `.idata$4` / `.idata$5` slots.
+ * - Trailer member (e.g. libkernel32t.o): defines
+ * `__lib64_lib<lib>_a_iname` (the DLL-name string), with tiny
+ * `.idata$4` / `.idata$5` / `.idata$7` terminators.
+ * - Per-function stub (e.g. libkernel32s00001.o for ExitProcess):
+ * defines `__imp_<name>` (the IAT slot in `.idata$5`) and the bare
+ * `<name>` in `.text` (a 6-byte `ff 25 disp32` indirect jump
+ * against `__imp_<name>`). Carries `.idata$4` (ILT), `.idata$5`
+ * (IAT), `.idata$6` (hint+name), `.idata$7` (DLL-name back-ptr).
+ *
+ * cfree's link-emit path synthesizes the entire .idata from
+ * LinkSymbol.imported entries (link_emit_coff). The long-form members'
+ * `.idata$N` byte content is therefore redundant — only the symbol
+ * naming the export matters. We absorb the per-function stubs at
+ * archive-ingest time by rewriting them into short-import-shaped
+ * DSO shims (matching what read_coff_short_import produces), and we
+ * drop the head/trailer members entirely.
+ *
+ * DLL-name source: mingw/llvm-mingw long import members are named after
+ * the providing DLL (`KERNEL32.dll`, `api-ms-win-crt-runtime-l1-1-0.dll`,
+ * ...). Fall back to the archive filename for older import archives.
+ *
+ * The conversion is gated on Compiler.target.obj == CFREE_OBJ_COFF so
+ * non-Windows targets are unaffected. */
+
+typedef enum CoffArMemberClass {
+ COFF_AR_KEEP = 0, /* regular .obj — leave as LINK_INPUT_OBJ_BYTES */
+ COFF_AR_SHIM = 1, /* per-function stub — replaced with DSO shim */
+ COFF_AR_SKIP = 2, /* head/trailer — drop entirely */
+} CoffArMemberClass;
+
+static const char kCoffImpPrefix_[] = "__imp_";
+static const u32 kCoffImpPrefixLen_ = (u32)(sizeof kCoffImpPrefix_ - 1u);
+static const char kCoffHeadPrefix_[] = "_head_";
+static const u32 kCoffHeadPrefixLen_ = (u32)(sizeof kCoffHeadPrefix_ - 1u);
+static const char kCoffInameSuffix_[] = "_iname";
+static const u32 kCoffInameSuffixLen_ = (u32)(sizeof kCoffInameSuffix_ - 1u);
+
+/* Derive a DLL name from the archive path. Handles:
+ * path/to/libkernel32.a -> "kernel32.dll"
+ * path/to/libkernel32.dll.a -> "kernel32.dll"
+ * path/to/kernel32.lib -> "kernel32.dll"
+ * path/to/libfoo -> "foo.dll"
+ * If nothing recognizable, returns the interned basename verbatim
+ * (callers can still match by name; case-insensitive at runtime). */
+static Sym derive_dll_name_from_archive_path(Compiler* c, const char* path) {
+ const char* base;
+ const char* p;
+ size_t n;
+ size_t out_len;
+ char* out;
+ Sym sym;
+ if (!path || !*path) return 0;
+ base = path;
+ for (p = path; *p; ++p)
+ if (*p == '/' || *p == '\\') base = p + 1;
+ n = strlen(base);
+ /* Strip trailing ".dll.a" / ".a" / ".lib" (case-sensitive — mingw
+ * uses lowercase, MSVC uses .lib). */
+ if (n >= 6 && memcmp(base + n - 6, ".dll.a", 6) == 0) n -= 6;
+ else if (n >= 2 && memcmp(base + n - 2, ".a", 2) == 0) n -= 2;
+ else if (n >= 4 && memcmp(base + n - 4, ".lib", 4) == 0) n -= 4;
+ /* Strip leading "lib" prefix. */
+ if (n >= 3 && memcmp(base, "lib", 3) == 0) {
+ base += 3;
+ n -= 3;
+ }
+ if (n == 0) return 0;
+ /* Append ".dll". */
+ out_len = n + 4u;
+ out = (char*)arena_array(c->scratch, char, out_len);
+ memcpy(out, base, n);
+ memcpy(out + n, ".dll", 4);
+ sym = pool_intern(c->global, out, (u32)out_len);
+ return sym;
+}
+
+static Sym derive_dll_name_from_archive_member(Compiler* c,
+ const char* member_name,
+ Sym fallback) {
+ const char* base;
+ const char* p;
+ size_t n;
+ if (!member_name || !*member_name) return fallback;
+ base = member_name;
+ for (p = member_name; *p; ++p)
+ if (*p == '/' || *p == '\\') base = p + 1;
+ n = strlen(base);
+ if (n >= 4 && memcmp(base + n - 4, ".dll", 4) == 0)
+ return pool_intern(c->global, base, (u32)n);
+ if (n >= 4 && memcmp(base + n - 4, ".DLL", 4) == 0)
+ return pool_intern(c->global, base, (u32)n);
+ return fallback;
+}
+
+/* Resolve a COFF symbol-record's name to (ptr, len) without copying.
+ * Mirrors the resolve_sym_name helper in coff_read.c: 8-byte short
+ * name in-record, or (Zeroes==0, Offset) into the string table. */
+static void coff_resolve_sym_name_(const u8* rec, const u8* strtab,
+ u32 strtab_size, const char** name_out,
+ u32* len_out) {
+ u32 z = (u32)rec[0] | ((u32)rec[1] << 8) | ((u32)rec[2] << 16) |
+ ((u32)rec[3] << 24);
+ if (z == 0) {
+ u32 off = (u32)rec[4] | ((u32)rec[5] << 8) | ((u32)rec[6] << 16) |
+ ((u32)rec[7] << 24);
+ if (off >= strtab_size) {
+ *name_out = "";
+ *len_out = 0;
+ return;
+ }
+ const char* s = (const char*)(strtab + off);
+ u32 max = strtab_size - off;
+ u32 n = 0;
+ while (n < max && s[n] != '\0') ++n;
+ *name_out = s;
+ *len_out = n;
+ return;
+ }
+ u32 n = 0;
+ while (n < 8 && rec[n] != '\0') ++n;
+ *name_out = (const char*)rec;
+ *len_out = n;
+}
+
+/* Byte-level classifier that walks a long-form COFF member's symbol
+ * table directly, without running read_coff. We use this instead of
+ * the post-read_coff approach because mingw's archives contain reloc
+ * types read_coff doesn't grok (e.g. IMAGE_REL_AMD64_SECTION/SECREL
+ * in `.idata$N` sections), and we want to drop those members entirely
+ * rather than fail at read time.
+ *
+ * Returns SHIM / SKIP / KEEP. On SHIM, *out_name is the interned bare
+ * `<name>` (the export's real symbol, decoded from `__imp_<name>`).
+ *
+ * The COFF byte layout we rely on: header is fixed 20 bytes; symbol
+ * table starts at PointerToSymbolTable; each symbol record is
+ * COFF_SYMBOL_SIZE (18) bytes including aux slots. String table
+ * follows symtab: u32 size header + bytes. */
+#define COFF_SYM_REC_SIZE_ 18u
+#define COFF_FILE_HDR_SIZE_ 20u
+#define COFF_SYM_CLASS_EXTERNAL_ 2u
+
+static CoffArMemberClass classify_coff_archive_member_bytes(
+ Compiler* c, const u8* data, size_t len, Sym* out_name) {
+ u32 ptr_to_symtab;
+ u32 nsymbols;
+ u16 nsections;
+ const u8* sym_base;
+ const u8* strtab;
+ u32 strtab_size;
+ int has_imp = 0;
+ int has_idata = 0;
+ int has_head_def = 0;
+ int has_iname_def = 0;
+ Sym imp_bare_name = 0;
+ u32 i;
+ *out_name = 0;
+ if (len < COFF_FILE_HDR_SIZE_) return COFF_AR_KEEP;
+ nsections = (u16)((u32)data[2] | ((u32)data[3] << 8));
+ ptr_to_symtab = (u32)data[8] | ((u32)data[9] << 8) |
+ ((u32)data[10] << 16) | ((u32)data[11] << 24);
+ nsymbols = (u32)data[12] | ((u32)data[13] << 8) |
+ ((u32)data[14] << 16) | ((u32)data[15] << 24);
+ if (nsymbols == 0 || ptr_to_symtab == 0) return COFF_AR_KEEP;
+ if ((u64)COFF_FILE_HDR_SIZE_ + (u64)nsections * 40u <= (u64)len) {
+ u32 si;
+ for (si = 0; si < (u32)nsections; ++si) {
+ const u8* sh = data + COFF_FILE_HDR_SIZE_ + (u64)si * 40u;
+ if (memcmp(sh, ".idata$", 7) == 0) {
+ has_idata = 1;
+ break;
+ }
+ }
+ }
+ if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_ > (u64)len)
+ return COFF_AR_KEEP;
+ sym_base = data + ptr_to_symtab;
+ /* String table follows symtab. Leading u32 = total size (incl. self).
+ * Absent if there's no room after symtab. */
+ {
+ u64 symtab_end =
+ (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYM_REC_SIZE_;
+ if (symtab_end + 4u <= (u64)len) {
+ u32 declared = (u32)data[symtab_end] |
+ ((u32)data[symtab_end + 1] << 8) |
+ ((u32)data[symtab_end + 2] << 16) |
+ ((u32)data[symtab_end + 3] << 24);
+ if (declared < 4u || symtab_end + (u64)declared > (u64)len) {
+ strtab = NULL;
+ strtab_size = 0;
+ } else {
+ strtab = data + symtab_end;
+ strtab_size = declared;
+ }
+ } else {
+ strtab = NULL;
+ strtab_size = 0;
+ }
+ }
+ i = 0;
+ while (i < nsymbols) {
+ const u8* p = sym_base + (u64)i * COFF_SYM_REC_SIZE_;
+ u16 sec_num = (u16)((u32)p[12] | ((u32)p[13] << 8));
+ u8 sclass = p[16];
+ u8 naux = p[17];
+ const char* nm = NULL;
+ u32 nlen = 0;
+ /* Only consider defined external symbols. UNDEF (sec_num==0)
+ * gives no information about what this object *provides*. */
+ if (sclass == COFF_SYM_CLASS_EXTERNAL_ && sec_num != 0) {
+ coff_resolve_sym_name_(p, strtab, strtab_size, &nm, &nlen);
+ if (nlen > kCoffImpPrefixLen_ &&
+ memcmp(nm, kCoffImpPrefix_, kCoffImpPrefixLen_) == 0) {
+ has_imp = 1;
+ if (imp_bare_name == 0) {
+ const char* tail = nm + kCoffImpPrefixLen_;
+ u32 tail_len = nlen - kCoffImpPrefixLen_;
+ imp_bare_name = pool_intern(c->global, tail, tail_len);
+ }
+ } else if (nlen > kCoffHeadPrefixLen_ &&
+ memcmp(nm, kCoffHeadPrefix_, kCoffHeadPrefixLen_) == 0) {
+ has_head_def = 1;
+ } else if (nlen > kCoffInameSuffixLen_ &&
+ memcmp(nm + nlen - kCoffInameSuffixLen_, kCoffInameSuffix_,
+ kCoffInameSuffixLen_) == 0) {
+ has_iname_def = 1;
+ }
+ }
+ /* Skip primary + aux records. */
+ i += 1u + (u32)naux;
+ }
+ if (has_imp && has_idata) {
+ *out_name = imp_bare_name;
+ return COFF_AR_SHIM;
+ }
+ if (has_head_def || has_iname_def) return COFF_AR_SKIP;
+ return COFF_AR_KEEP;
+}
+
+/* Build a fresh ObjBuilder containing just `<name>` and `__imp_<name>`
+ * as defined-at-OBJ_SEC_NONE globals (the shape read_coff_dso /
+ * read_coff_short_import produce for a DLL export), and annotate it
+ * with the providing DLL name. Mirrors read_coff_short_import. */
+static ObjBuilder* build_coff_long_import_shim(Compiler* c, Sym bare_name,
+ Sym dll_name) {
+ ObjBuilder* ob;
+ const char* bare;
+ size_t bare_len = 0;
+ u32 imp_len;
+ char* imp_buf;
+ Sym imp_sn;
+ ObjSymId id;
+ ObjSymId imp_id;
+ if (bare_name == 0 || dll_name == 0) return NULL;
+ bare = pool_str(c->global, bare_name, &bare_len);
+ if (!bare || bare_len == 0) return NULL;
+ ob = obj_new(c);
+ if (!ob) return NULL;
+ id = obj_symbol_ex(ob, bare_name, SB_GLOBAL, SV_DEFAULT, SK_FUNC,
+ OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, id);
+ imp_len = kCoffImpPrefixLen_ + (u32)bare_len;
+ imp_buf = (char*)arena_array(c->scratch, char, imp_len);
+ memcpy(imp_buf, kCoffImpPrefix_, kCoffImpPrefixLen_);
+ memcpy(imp_buf + kCoffImpPrefixLen_, bare, bare_len);
+ imp_sn = pool_intern(c->global, imp_buf, imp_len);
+ imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ,
+ OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, imp_id);
+ obj_set_coff_import_dll(ob, dll_name);
+ obj_finalize(ob);
+ return ob;
+}
+
+static int coff_skip_long_import_shim_bare(Compiler* c, Sym bare_name) {
+ const char* s;
+ size_t n = 0;
+ if (!bare_name) return 0;
+ s = pool_str(c->global, bare_name, &n);
+ if (!s) return 0;
+ /* llvm-mingw's UCRT libmsvcrt.a intentionally provides these legacy
+ * CRT entry helpers as regular archive members later in the same
+ * archive. Prefer those wrappers over the older direct msvcrt.dll
+ * import shims. */
+ return (n == 13 && memcmp(s, "__getmainargs", 13) == 0) ||
+ (n == 13 && memcmp(s, "__p___initenv", 13) == 0);
+}
+
LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
size_t len, u8 whole_archive, u8 link_mode,
u8 group_id) {
@@ -227,8 +553,12 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
CfreeArMember mem;
LinkArchive* ar;
u32 n;
+ Sym archive_dll_name = 0;
+ int is_coff_target = (l && l->c->target.obj == CFREE_OBJ_COFF);
if (!l || !data || !len) return LINK_INPUT_NONE;
+ if (is_coff_target)
+ archive_dll_name = derive_dll_name_from_archive_path(l->c, name);
in_arc.name = name;
in_arc.data = data;
@@ -277,6 +607,35 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
while (cfree_ar_iter_next(it, &mem) == CFREE_ITER_ITEM && n < ar->nmembers) {
ObjBuilder* ob = NULL;
CfreeBinFmt mfmt = cfree_detect_fmt(mem.data, mem.size);
+ /* COFF long-form import-archive absorption (mingw `.a`). Classify
+ * the member from raw bytes *before* read_coff so we can drop
+ * members carrying `.idata$N` reloc types read_coff doesn't model
+ * (e.g. IMAGE_REL_AMD64_SECTION) without ever invoking the reader
+ * on them. KEEP members fall through to the standard read path. */
+ if (mfmt == CFREE_BIN_COFF && is_coff_target && archive_dll_name != 0) {
+ Sym bare = 0;
+ CoffArMemberClass cls = classify_coff_archive_member_bytes(
+ l->c, mem.data, mem.size, &bare);
+ if (cls == COFF_AR_SHIM) {
+ if (coff_skip_long_import_shim_bare(l->c, bare)) {
+ ob = NULL;
+ } else {
+ Sym member_dll =
+ derive_dll_name_from_archive_member(l->c, mem.name,
+ archive_dll_name);
+ ob = build_coff_long_import_shim(l->c, bare, member_dll);
+ }
+ } else if (cls == COFF_AR_SKIP) {
+ ob = NULL;
+ }
+ if (cls != COFF_AR_KEEP) {
+ ar->members[n].name =
+ mem.name ? pool_intern_cstr(l->c->global, mem.name) : 0;
+ ar->members[n].obj = ob;
+ ++n;
+ continue;
+ }
+ }
switch (mfmt) {
case CFREE_BIN_ELF:
ob = read_elf(l->c, mem.name, mem.data, mem.size);
@@ -284,6 +643,9 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
case CFREE_BIN_MACHO:
ob = read_macho(l->c, mem.name, mem.data, mem.size);
break;
+ case CFREE_BIN_COFF:
+ ob = read_coff(l->c, mem.name, mem.data, mem.size);
+ break;
default:
compiler_panic(l->c, no_loc(),
"link_add_archive_bytes: unsupported member "
@@ -360,6 +722,11 @@ void link_set_pie(Linker* l, int enable) {
l->emit_pie = enable ? 1 : 0;
}
+void link_set_pe_subsystem(Linker* l, u16 subsystem) {
+ if (!l) return;
+ l->pe_subsystem = subsystem;
+}
+
void link_set_jit_host(Linker* l, const CfreeJitHost* host) {
if (!l) return;
l->jit_host = host;
@@ -504,6 +871,9 @@ static void link_image_release(LinkImage* img) {
if (m->section)
img->heap->free(img->heap, m->section,
sizeof(*m->section) * m->nsection);
+ if (m->comdat_discarded)
+ img->heap->free(img->heap, m->comdat_discarded,
+ m->nsection ? m->nsection : 1u);
}
img->heap->free(img->heap, img->input_maps,
sizeof(*img->input_maps) * img->ninput_maps);
@@ -591,9 +961,8 @@ void link_emit_image_writer(LinkImage* img, Writer* w) {
link_emit_macho(img, w);
return;
case CFREE_OBJ_COFF:
- compiler_panic(img->c, no_loc(),
- "link_emit_image_writer: COFF/PE linker emit not yet "
- "implemented");
+ link_emit_coff(img, w);
+ return;
case CFREE_OBJ_WASM:
compiler_panic(img->c, no_loc(),
"link_emit_image_writer: Wasm linker emit not yet "
diff --git a/src/link/link.h b/src/link/link.h
@@ -199,6 +199,7 @@ void link_set_jit_mode(Linker*, int enable);
* emit_static_exe; both may be set in the same link (the IFUNC ctor
* still wants to run on the exe path regardless of PIE). */
void link_set_pie(Linker*, int enable);
+void link_set_pe_subsystem(Linker*, u16 subsystem);
/* Runtime loader path written into PT_INTERP / .interp. NULL leaves the
* default ("/lib/ld-musl-aarch64.so.1" for aarch64-linux). Only
diff --git a/src/link/link_arch.h b/src/link/link_arch.h
@@ -101,6 +101,20 @@ typedef struct LinkArchDesc {
u32 macho_stub_size;
void (*emit_macho_stub)(u8* dst, u64 stub_vaddr, u64 got_slot_vaddr);
+ /* PE/COFF IAT stub. Used when target.obj == CFREE_OBJ_COFF and a
+ * relocation targets an imported function. The stub performs an
+ * indirect jump through the IAT slot:
+ * x64: ff 25 disp32 ; jmp [rip + disp_to_iat_slot] (6 B)
+ * aa64: adrp/ldr/br x16 ; load IAT slot, branch to it (12 B)
+ *
+ * The 32-bit displacement on x64 and the page-relative pair on
+ * aa64 are baked into the stub bytes directly (no apply-time
+ * relocations needed), so callers do not enqueue extra
+ * LinkRelocApply records — see how emit_iplt_stub returns 0 for
+ * arches that can encode the displacement inline. */
+ u32 coff_stub_size;
+ void (*emit_coff_iat_stub)(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr);
+
/* Relocation classification used by format-specific linker passes. */
int (*is_branch_reloc)(RelocKind);
int (*is_got_load_reloc)(RelocKind);
diff --git a/src/link/link_coff.c b/src/link/link_coff.c
@@ -0,0 +1,1741 @@
+/* link_emit_coff: write a PE32+ MH_EXECUTABLE-style image to the
+ * caller-provided Writer.
+ *
+ * Phase 3.1 deliverable per doc/WINDOWS.md: skeleton + base-reloc
+ * handling for the four standard PE sections. Import-table synthesis
+ * (.idata / IAT) lands in Phase 3.2; per-arch IAT stub bytes in 3.3;
+ * TLS directory in 3.5; debug directory in 3.6 — those code paths
+ * panic loudly here so the strict-by-default posture surfaces them.
+ *
+ * File layout (in write order):
+ *
+ * [DOS stub IMAGE_DOS_HEADER] -- 64 bytes; e_lfanew=0x40
+ * [PE signature "PE\0\0"] -- 4 bytes
+ * [IMAGE_FILE_HEADER] -- 20 bytes
+ * [IMAGE_OPTIONAL_HEADER64] -- 240 bytes (PE32+)
+ * [IMAGE_SECTION_HEADER * nsec] -- 40 bytes each
+ * [pad to FileAlignment]
+ * [.text bytes, padded to FileAlignment]
+ * [.rdata bytes, padded to FileAlignment]
+ * [.data bytes, padded to FileAlignment]
+ * [.reloc bytes, padded to FileAlignment]
+ *
+ * .bss is uninitialized — it has a section header (with VirtualSize)
+ * but no file bytes and PointerToRawData=0.
+ *
+ * RVAs follow SectionAlignment (0x1000); FileAlignment is 0x200; the
+ * first section starts at RVA 0x1000 (right after the headers map).
+ * ImageBase is the Win64 convention 0x140000000.
+ *
+ * Reloc strategy. The link layout pass has already placed every kept
+ * input section into img->sections / img->segments under the ELF/Mach-O
+ * coordinate system (image-relative vaddrs, often packed by permission
+ * bucket). COFF wants a different packing — the four standard
+ * sections at SectionAlignment-aligned RVAs — so this writer re-derives
+ * per-input-section vaddrs from scratch and shifts each LinkSection /
+ * symbol / LinkRelocApply by its section's per-section delta before
+ * applying relocations. link_emit_macho takes the same tack for its
+ * __DATA_CONST splits; the ELF writer leaves vaddrs alone because the
+ * link layout already matches ELF's PT_LOAD shape. */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "arch/arch.h"
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/util.h"
+#include "core/vec.h"
+#include "link/link.h"
+#include "link/link_arch.h"
+#include "link/link_internal.h"
+#include "obj/coff.h"
+
+/* ---- .idata layout constants ----
+ *
+ * Per doc/WINDOWS.md §3.2: the .idata content is a concatenation of an
+ * IMAGE_IMPORT_DESCRIPTOR table (NULL-terminated), one ILT per DLL
+ * (each NULL-terminated u64 array), one IAT per DLL (same shape),
+ * a hint/name table, and a DLL-name string pool. Each block is
+ * pointer-sized aligned within the section. AArch64 import thunks use
+ * PAGEOFFSET_12L for 64-bit ILT/IAT slots, so those sub-blocks must be
+ * 8-byte aligned. */
+#define PE_IDATA_BLOCK_ALIGN 8u
+/* Hint field on IMAGE_IMPORT_BY_NAME records. cfree never has a real
+ * hint (the OS loader doesn't need one to do the bsearch on the DLL's
+ * export name table), so 0 is the canonical "no hint" value. */
+#define PE_IMPORT_HINT_NONE 0u
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- PE/Win64 layout constants ----
+ *
+ * Centralised here so the wire-format numbers in this TU stay named
+ * (and the magic-numbers rule in CLAUDE.md is honoured). Values match
+ * the PE/COFF spec + Win64 conventions; mingw-w64's ld defaults agree. */
+#define PE_IMAGE_BASE LINK_PE_IMAGE_BASE
+#define PE_SECTION_ALIGNMENT 0x1000u
+#define PE_FILE_ALIGNMENT 0x200u
+#define PE_FIRST_SECTION_RVA 0x1000u
+#define PE_DOS_E_LFANEW 0x40u
+#define PE_NUM_DATA_DIRS COFF_NUM_DATA_DIRECTORIES
+#define PE_OPT_HDR_SIZE COFF_OPT_HDR64_SIZE
+#define PE_LINKER_MAJOR 0u
+#define PE_LINKER_MINOR 1u
+#define PE_OS_MAJOR 6u /* Windows Vista+ — mingw default */
+#define PE_OS_MINOR 0u
+#define PE_SUBSYS_MAJOR 6u
+#define PE_SUBSYS_MINOR 0u
+#define PE_STACK_RESERVE 0x100000ULL
+#define PE_STACK_COMMIT 0x1000ULL
+#define PE_HEAP_RESERVE 0x100000ULL
+#define PE_HEAP_COMMIT 0x1000ULL
+#define PE_DLL_CHARS \
+ (IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | \
+ IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | \
+ IMAGE_DLLCHARACTERISTICS_NX_COMPAT | \
+ IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE)
+
+/* PE32+ DOS-stub-to-PE-signature offsets (manual, since we marshal
+ * field-by-field rather than memcpy'ing the packed struct). */
+#define PE_DOS_HDR_SIZE COFF_DOS_HEADER_SIZE
+#define PE_SIG_SIZE 4u
+#define PE_FILE_HDR_SIZE COFF_FILE_HEADER_SIZE
+#define PE_SECTION_HDR_SIZE COFF_SECTION_HEADER_SIZE
+
+/* Standard PE output buckets, plus .idata (import directory) and
+ * .reloc — both synthesised here rather than copied from input
+ * sections. Order matters: it's the on-image RVA order. */
+typedef enum CoffBucket {
+ COFF_BUCKET_TEXT = 0,
+ COFF_BUCKET_RDATA = 1,
+ COFF_BUCKET_IDATA = 2,
+ COFF_BUCKET_DATA = 3,
+ COFF_BUCKET_TLS = 4,
+ COFF_BUCKET_BSS = 5,
+ COFF_BUCKET_RELOC = 6,
+ COFF_NBUCKETS = 7,
+} CoffBucket;
+
+/* IMAGE_TLS_DIRECTORY64 wire size: u64*4 + u32*2 = 40 bytes. */
+#define COFF_TLS_DIRECTORY64_SIZE 40u
+/* Byte offsets of the four u64 VA fields within IMAGE_TLS_DIRECTORY64
+ * — they need base relocations so ASLR can fix them up. */
+#define COFF_TLSDIR_OFF_START_ADDR 0u
+#define COFF_TLSDIR_OFF_END_ADDR 8u
+#define COFF_TLSDIR_OFF_INDEX_ADDR 16u
+#define COFF_TLSDIR_OFF_CALLBACKS 24u
+
+typedef struct CoffSection {
+ const char* name; /* short ASCII; <= 8 bytes including NUL pad */
+ u32 characteristics;
+ u8* bytes; /* NULL for .bss / .reloc-before-build */
+ u32 size; /* VirtualSize (real bytes; for .bss, mem size) */
+ u32 size_raw; /* SizeOfRawData (file size, FileAlignment-padded) */
+ u32 rva; /* VirtualAddress in image */
+ u32 file_offset; /* PointerToRawData; 0 for .bss */
+ u8 in_image; /* 1 if this bucket is emitted as a section */
+ u8 has_file_bytes; /* 0 for .bss */
+ u8 pad[2];
+} CoffSection;
+
+/* ---- byte writer helpers ---- */
+
+static void coff_write_zeroes(Writer* w, u64 n) {
+ static const u8 zeroes[256] = {0};
+ while (n) {
+ u64 step = n > sizeof(zeroes) ? sizeof(zeroes) : n;
+ cfree_writer_write(w, zeroes, (size_t)step);
+ n -= step;
+ }
+}
+
+/* Return the COFF bucket for a kept LinkSection. SF_TLS sections route
+ * into the dedicated .tls bucket so SECREL relocations from TLS access
+ * code resolve against the merged TLS image, not against .data.
+ * Everything else partitions on SF_EXEC / SF_WRITE plus the SSEM_NOBITS
+ * bit for .bss. */
+static CoffBucket coff_bucket_for(const LinkSection* ls) {
+ if (ls->flags & SF_EXEC) return COFF_BUCKET_TEXT;
+ if (ls->flags & SF_TLS) return COFF_BUCKET_TLS;
+ if (ls->sem == SSEM_NOBITS) return COFF_BUCKET_BSS;
+ if (ls->flags & SF_WRITE) return COFF_BUCKET_DATA;
+ return COFF_BUCKET_RDATA;
+}
+
+/* True for relocation kinds that need an entry in .reloc so the OS
+ * loader can patch the site after ASLR picks a runtime ImageBase.
+ * PC-relative fixups don't need base-relocs — the displacement is
+ * load-invariant. */
+static int coff_reloc_needs_base_reloc(RelocKind k) {
+ return k == R_ABS64 || k == R_ABS32;
+}
+
+/* Look up the LinkSection whose [vaddr, vaddr+size] range covers the
+ * given image-relative address `v`, or return NULL. Used to attribute
+ * symbol vaddrs to a containing section so we can apply per-section
+ * vaddr deltas after re-laying out for PE. */
+static const LinkSection* coff_section_at(const LinkImage* img, u64 v) {
+ u32 i;
+ for (i = 0; i < img->nsections; ++i) {
+ const LinkSection* ls = &img->sections[i];
+ if (v >= ls->vaddr && v <= ls->vaddr + ls->size) return ls;
+ }
+ return NULL;
+}
+
+/* Per-input-section delta map. Indexed by `LinkSection.id - 1`.
+ * Populated by coff_build_buckets. Consumed by every subsequent pass
+ * that needs to translate input-coordinate offsets (the world that
+ * img->sections / img->relocs live in) into PE-coordinate ones (where
+ * the writer plants bytes). delta is stored explicitly so callers
+ * avoid recomputing (new_rva + bucket.rva - old_vaddr) for every
+ * LinkRelocApply whose link_section_id points at the section. */
+typedef struct CoffSecMap {
+ u32 new_rva; /* image-relative RVA after PE relayout */
+ u32 new_file_off; /* file offset of the patched byte */
+ i64 delta; /* new_rva - old_vaddr */
+ u8 bucket;
+ u8 pad[3];
+} CoffSecMap;
+
+/* TLS directory placement state. Populated when at least one SF_TLS
+ * section survives dead-strip; consumed by the optional-header writer,
+ * the .reloc builder (base-relocs for the four absolute VA fields),
+ * and the .rdata emit pass that writes the final 40-byte record. */
+typedef struct CoffTlsLayout {
+ int present; /* 1 iff at least one TLS section was kept */
+ u32 dir_rdata_off; /* byte offset of the IMAGE_TLS_DIRECTORY64 within .rdata */
+ u32 tls_size; /* size of the merged .tls bucket */
+ LinkSymId tls_index_sym; /* resolved _tls_index LinkSymbol */
+ LinkSymId callbacks_sym; /* __xl_a when mingw's TLS callbacks are linked */
+ u64 callbacks_addend; /* mingw points past the leading NULL sentinel */
+} CoffTlsLayout;
+
+static LinkSymId coff_find_sym(LinkImage* img, const char* name) {
+ Sym sym = pool_intern_cstr(img->c->global, name);
+ u32 n = LinkSyms_count(&img->syms);
+ u32 i;
+ for (i = 0; i < n; ++i) {
+ const LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->name == sym) return (LinkSymId)(i + 1);
+ }
+ return LINK_SYM_NONE;
+}
+
+/* Locate _tls_index by name in the resolved symbol table. mingw's
+ * libmingwex defines it (as part of tlsmcrt); without a CRT the link
+ * fails here with a clear message rather than producing a TLS
+ * directory pointing at a stale address. */
+static LinkSymId coff_find_tls_index_sym(LinkImage* img) {
+ return coff_find_sym(img, "_tls_index");
+}
+
+static const LinkSection* coff_symbol_section(const LinkImage* img,
+ const LinkSymbol* s) {
+ if (s->name) {
+ size_t n = 0;
+ const char* nm = pool_str(img->c->global, s->name, &n);
+ const char* sec_name = NULL;
+ if (nm && n == 6 && memcmp(nm, "__xd_a", 6) == 0)
+ sec_name = ".CRT$XDA";
+ else if (nm && n == 6 && memcmp(nm, "__xd_z", 6) == 0)
+ sec_name = ".CRT$XDZ";
+ else if (nm && n == 6 && memcmp(nm, "__xl_a", 6) == 0)
+ sec_name = ".CRT$XLA";
+ else if (nm && n == 6 && memcmp(nm, "__xl_c", 6) == 0)
+ sec_name = ".CRT$XLC";
+ else if (nm && n == 6 && memcmp(nm, "__xl_d", 6) == 0)
+ sec_name = ".CRT$XLD";
+ else if (nm && n == 6 && memcmp(nm, "__xl_z", 6) == 0)
+ sec_name = ".CRT$XLZ";
+ if (sec_name) {
+ u32 i;
+ size_t sn = strlen(sec_name);
+ for (i = 0; i < img->nsections; ++i) {
+ const LinkSection* ls = &img->sections[i];
+ size_t ln = 0;
+ const char* lname =
+ ls->name ? pool_str(img->c->global, ls->name, &ln) : NULL;
+ if (lname && ln == sn && memcmp(lname, sec_name, sn) == 0)
+ return ls;
+ }
+ }
+ }
+ if (s->section_id != LINK_SEC_NONE && s->section_id <= img->nsections)
+ return &img->sections[s->section_id - 1];
+ return coff_section_at(img, s->vaddr);
+}
+
+static u64 coff_symbol_final_va(const LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffSecMap* map,
+ LinkSymId id,
+ const char* what) {
+ const LinkSymbol* s = LinkSyms_at(&img->syms, id - 1);
+ if (!s->defined || s->kind == SK_ABS) {
+ compiler_panic(img->c, no_loc(),
+ "link_emit_coff: `%s` is not a defined section-bound "
+ "symbol",
+ what);
+ }
+ const LinkSection* sec = coff_symbol_section(img, s);
+ if (!sec) {
+ compiler_panic(img->c, no_loc(),
+ "link_emit_coff: `%s` has no containing section", what);
+ }
+ u8 b = map[sec->id - 1].bucket;
+ return PE_IMAGE_BASE + (u64)out[b].rva +
+ (u64)map[sec->id - 1].new_rva + (s->vaddr - sec->vaddr);
+}
+
+/* Reserve 40 bytes at the tail of the .rdata bucket for the
+ * IMAGE_TLS_DIRECTORY64 record. Records the offset for later emit and
+ * grows the bucket if needed. The bytes start zeroed; coff_emit_tls_dir
+ * fills them in once final RVAs are known. */
+static void coff_plan_tls_layout(LinkImage* img,
+ CoffSection out[COFF_NBUCKETS],
+ u32* rdata_cap, CoffTlsLayout* tls) {
+ memset(tls, 0, sizeof(*tls));
+ if (out[COFF_BUCKET_TLS].size == 0) return;
+ tls->present = 1;
+ tls->tls_size = out[COFF_BUCKET_TLS].size;
+ tls->tls_index_sym = coff_find_tls_index_sym(img);
+ if (tls->tls_index_sym == LINK_SYM_NONE) {
+ compiler_panic(img->c, no_loc(),
+ "link_emit_coff: .tls section requires `_tls_index` "
+ "(provided by mingw libmingwex / tlsmcrt.o) — none of "
+ "the linked inputs define it");
+ }
+ /* IMAGE_TLS_DIRECTORY64 needs 8-byte alignment for its u64 fields;
+ * round the .rdata size up before reserving the 40-byte record. */
+ tls->callbacks_sym = coff_find_sym(img, "__xl_a");
+ if (tls->callbacks_sym != LINK_SYM_NONE) {
+ tls->callbacks_addend = 8;
+ } else {
+ tls->callbacks_sym = coff_find_sym(img, "__xl_c");
+ tls->callbacks_addend = 0;
+ }
+ u32 rdata_size = (u32)ALIGN_UP((u64)out[COFF_BUCKET_RDATA].size, 8ull);
+ u32 need = rdata_size + COFF_TLS_DIRECTORY64_SIZE;
+ if (need > *rdata_cap) {
+ (void)VEC_GROW(img->heap, out[COFF_BUCKET_RDATA].bytes, *rdata_cap, need);
+ }
+ /* Zero any padding bytes introduced by the alignment bump and the
+ * directory slot itself. */
+ if (rdata_size > out[COFF_BUCKET_RDATA].size) {
+ memset(out[COFF_BUCKET_RDATA].bytes + out[COFF_BUCKET_RDATA].size, 0,
+ rdata_size - out[COFF_BUCKET_RDATA].size);
+ }
+ memset(out[COFF_BUCKET_RDATA].bytes + rdata_size, 0,
+ COFF_TLS_DIRECTORY64_SIZE);
+ tls->dir_rdata_off = rdata_size;
+ out[COFF_BUCKET_RDATA].size = need;
+}
+
+/* Write the IMAGE_TLS_DIRECTORY64 bytes once all bucket RVAs are
+ * final. Each u64 VA field gets ImageBase + RVA; the base-reloc pass
+ * will emit IMAGE_REL_BASED_DIR64 entries so ASLR keeps them valid. */
+static void coff_emit_tls_dir(const LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffSecMap* map,
+ const CoffTlsLayout* tls) {
+ if (!tls->present) return;
+ u64 tls_start = PE_IMAGE_BASE + (u64)out[COFF_BUCKET_TLS].rva;
+ u64 tls_end = tls_start + (u64)tls->tls_size;
+ u64 idx_vaddr =
+ coff_symbol_final_va(img, out, map, tls->tls_index_sym, "_tls_index");
+ const char* callbacks_name =
+ tls->callbacks_addend ? "__xl_a" : "__xl_c";
+ u64 callbacks_vaddr =
+ tls->callbacks_sym
+ ? coff_symbol_final_va(img, out, map, tls->callbacks_sym,
+ callbacks_name) +
+ tls->callbacks_addend
+ : 0;
+
+ u8* p = out[COFF_BUCKET_RDATA].bytes + tls->dir_rdata_off;
+ wr_u64_le(p + COFF_TLSDIR_OFF_START_ADDR, tls_start);
+ wr_u64_le(p + COFF_TLSDIR_OFF_END_ADDR, tls_end);
+ wr_u64_le(p + COFF_TLSDIR_OFF_INDEX_ADDR, idx_vaddr);
+ wr_u64_le(p + COFF_TLSDIR_OFF_CALLBACKS, callbacks_vaddr);
+ wr_u32_le(p + 32, 0); /* SizeOfZeroFill */
+ wr_u32_le(p + 36, 0); /* Characteristics */
+}
+
+static void coff_define_tls_used(LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffTlsLayout* tls) {
+ if (!tls->present) return;
+ if (!img->linker) return;
+ link_emit_boundary_sym(img->linker, img, "_tls_used",
+ PE_IMAGE_BASE + (u64)out[COFF_BUCKET_RDATA].rva +
+ (u64)tls->dir_rdata_off);
+}
+
+/* ---- import-table synthesis (Phase 3.2) ---------------------------
+ *
+ * Per doc/WINDOWS.md §3.2: every LinkSymbol with `imported = 1` gets
+ * routed through an IAT slot synthesized in `.idata`. Function
+ * imports additionally receive a small per-arch stub in `.text`
+ * (`ff 25 disp32` on x64 / `adrp;ldr;br` on aa64) so a direct CALL26
+ * or PC32 against the symbol lands on a stub that indirects through
+ * the IAT. Data imports skip the stub — the symbol's final vaddr is
+ * just the IAT slot vaddr, and code-gen emits a `mov rax, [slot]`
+ * sequence the same way it would for any other GOT-style load.
+ *
+ * cfree's COFF code-gen uses direct symbol references; there is no
+ * separate `__imp_<name>` LinkSymbol consulted at link time. The
+ * IAT-slot rewrite happens entirely by overriding the imported
+ * symbol's vaddr in apply_all_relocs. */
+
+typedef struct CoffImport {
+ LinkSymId sym; /* canonical LinkSymId from img->syms */
+ u32 dll_idx; /* index into CoffImportTable.dlls */
+ u32 stub_off; /* offset in .text bucket (functions only) */
+ u32 iat_off; /* offset in .idata IAT block */
+ u32 ilt_off; /* offset in .idata ILT block */
+ u32 hint_off; /* offset in .idata hint/name table */
+ u8 is_func;
+ u8 pad[3];
+} CoffImport;
+
+typedef struct CoffImportDll {
+ Sym soname;
+ u32 first; /* index of first import in CoffImportTable.imports */
+ u32 count;
+ u32 ilt_off; /* offset of this DLL's ILT block in .idata */
+ u32 iat_off; /* offset of this DLL's IAT block in .idata */
+ u32 name_off; /* offset of DLL name string in .idata */
+} CoffImportDll;
+
+typedef struct CoffImportTable {
+ CoffImport* imports;
+ u32 nimports;
+ u32 imports_cap; /* heap-allocation size for cleanup */
+ u32 nfunc_imports; /* subset of nimports that needs a .text stub */
+ CoffImportDll* dlls;
+ u32 ndlls;
+ u32 dlls_cap; /* heap-allocation size for cleanup */
+ /* Offsets within .idata of the five sub-blocks. Filled in by
+ * coff_plan_idata_layout once nimports / ndlls is known. */
+ u32 desc_off; /* always 0 — descriptors come first */
+ u32 desc_size;
+ u32 ilt_base;
+ u32 ilt_total;
+ u32 iat_base;
+ u32 iat_total;
+ u32 hint_base;
+ u32 hint_total;
+ u32 name_base;
+ u32 name_total;
+ u32 idata_size;
+ /* Stub region in .text bucket. Stubs are appended after every
+ * input .text section has been bucketed. stub_text_off is the
+ * bucket-local offset of the first stub; per-import stub offsets
+ * are stored in CoffImport.stub_off. */
+ u32 stub_text_off;
+ u32 stub_total;
+} CoffImportTable;
+
+/* Sort comparator: imports grouped by DLL slot, stable on input
+ * order within a DLL (sort is stable enough via secondary key). */
+static int coff_import_cmp(const void* a, const void* b) {
+ const CoffImport* ia = (const CoffImport*)a;
+ const CoffImport* ib = (const CoffImport*)b;
+ if (ia->dll_idx < ib->dll_idx) return -1;
+ if (ia->dll_idx > ib->dll_idx) return 1;
+ /* Secondary: LinkSymId so the order is reproducible. */
+ if (ia->sym < ib->sym) return -1;
+ if (ia->sym > ib->sym) return 1;
+ return 0;
+}
+
+static const char* coff_import_lookup_name(Compiler* c, const LinkSymbol* s,
+ size_t* nlen_out) {
+ size_t nlen = 0;
+ const char* nm = s->name ? pool_str(c->global, s->name, &nlen) : NULL;
+ static const char kImpPrefix[] = "__imp_";
+ const size_t kImpPrefixLen = sizeof(kImpPrefix) - 1u;
+ if (nm && nlen > kImpPrefixLen &&
+ memcmp(nm, kImpPrefix, kImpPrefixLen) == 0) {
+ nm += kImpPrefixLen;
+ nlen -= kImpPrefixLen;
+ }
+ if (nlen_out) *nlen_out = nlen;
+ return nm;
+}
+
+/* True iff the import classifies as function-like. Mirrors the ELF
+ * `sym_is_func_import` heuristic: if the canonical kind is known
+ * we trust it, otherwise we default to function (which matches the
+ * COFF code-gen contract — direct calls are by far the common case
+ * and a data import wrongly stubbed would still fail loudly via the
+ * IAT-routed call). */
+static int coff_import_is_func(Compiler* c, const LinkSymbol* s) {
+ if (s->name) {
+ size_t nlen = 0;
+ const char* nm = pool_str(c->global, s->name, &nlen);
+ if (nm && nlen > 6u && memcmp(nm, "__imp_", 6u) == 0) return 0;
+ }
+ if (s->kind == SK_FUNC || s->kind == SK_IFUNC) return 1;
+ if (s->kind == SK_OBJ) return 0;
+ /* SK_UNDEF / SK_NOTYPE: assume function (the common case). */
+ return 1;
+}
+
+/* Walk LinkSyms, collect imports, group by DLL soname. Returns 1 if
+ * any imports were collected, 0 otherwise (caller skips the entire
+ * .idata path). */
+static int coff_collect_imports(LinkImage* img, CoffImportTable* it) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ Linker* l = img->linker;
+ u32 nsyms = LinkSyms_count(&img->syms);
+ u32 imp_cap = 0;
+ u32 dll_cap = 0;
+ u32 i;
+
+ memset(it, 0, sizeof(*it));
+ if (!l) return 0;
+ for (i = 0; i < nsyms; ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ LinkInput* in;
+ u32 dll_idx = (u32)-1;
+ u32 d;
+ if (!s->imported) continue;
+ if (s->name == 0) continue;
+ if (s->dso_input_id == LINK_INPUT_NONE) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: imported symbol has no providing DSO");
+ }
+ /* img->globals only carries defined globals/weaks; imported undefs
+ * never land there. Dedup by name: skip if any earlier slot
+ * already collected this name. */
+ {
+ int dup = 0;
+ for (u32 k = 0; k < it->nimports; ++k) {
+ LinkSymbol* prev = LinkSyms_at(&img->syms, it->imports[k].sym - 1);
+ if (prev->name == s->name) { dup = 1; break; }
+ }
+ if (dup) continue;
+ }
+ if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: import dso_input_id out of range");
+ }
+ in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u);
+ if (in->soname == 0) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: providing DSO has no soname; cannot "
+ "emit IMAGE_IMPORT_DESCRIPTOR.Name");
+ }
+ /* Find-or-add the DLL slot. */
+ for (d = 0; d < it->ndlls; ++d) {
+ if (it->dlls[d].soname == in->soname) { dll_idx = d; break; }
+ }
+ if (dll_idx == (u32)-1) {
+ if (VEC_GROW(heap, it->dlls, dll_cap, it->ndlls + 1u))
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on import dlls");
+ dll_idx = it->ndlls++;
+ memset(&it->dlls[dll_idx], 0, sizeof(it->dlls[dll_idx]));
+ it->dlls[dll_idx].soname = in->soname;
+ }
+ if (VEC_GROW(heap, it->imports, imp_cap, it->nimports + 1u))
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on imports");
+ memset(&it->imports[it->nimports], 0,
+ sizeof(it->imports[it->nimports]));
+ it->imports[it->nimports].sym = s->id;
+ it->imports[it->nimports].dll_idx = dll_idx;
+ it->imports[it->nimports].is_func = (u8)coff_import_is_func(c, s);
+ if (it->imports[it->nimports].is_func) ++it->nfunc_imports;
+ ++it->nimports;
+ it->dlls[dll_idx].count++;
+ }
+ if (it->nimports == 0) return 0;
+ /* Re-bucket the imports array so each DLL's run is contiguous. */
+ qsort(it->imports, it->nimports, sizeof(*it->imports), coff_import_cmp);
+ /* Fix up CoffImportDll.first now that imports[] is sorted. */
+ {
+ u32 cur = 0;
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ it->dlls[d].first = cur;
+ cur += it->dlls[d].count;
+ }
+ }
+ it->imports_cap = imp_cap;
+ it->dlls_cap = dll_cap;
+ return 1;
+}
+
+static void coff_imports_free(LinkImage* img, CoffImportTable* it) {
+ Heap* heap = img->heap;
+ if (it->imports) {
+ heap->free(heap, it->imports,
+ (size_t)it->imports_cap * sizeof(*it->imports));
+ }
+ if (it->dlls) {
+ heap->free(heap, it->dlls,
+ (size_t)it->dlls_cap * sizeof(*it->dlls));
+ }
+}
+
+/* Compute every per-block / per-import offset inside .idata and the
+ * total .idata size in bytes. Also assigns per-import hint/name and
+ * dll-name offsets so the descriptor table can reference them by RVA
+ * later (RVAs need the bucket's final RVA, added in coff_emit_idata). */
+static void coff_plan_idata_layout(LinkImage* img, CoffImportTable* it) {
+ Compiler* c = img->c;
+ u32 off;
+
+ /* Block 1: import descriptors (one per DLL + zero terminator). */
+ it->desc_off = 0;
+ it->desc_size = (it->ndlls + 1u) * COFF_IMPORT_DESCRIPTOR_SIZE;
+ off = (u32)ALIGN_UP((u64)it->desc_size, (u64)PE_IDATA_BLOCK_ALIGN);
+
+ /* Block 2: ILTs. Per DLL: count entries + 1 (terminator), 8 B each. */
+ it->ilt_base = off;
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ it->dlls[d].ilt_off = off;
+ /* Per-import: assign ilt_off within this DLL's block. */
+ for (u32 k = 0; k < it->dlls[d].count; ++k) {
+ it->imports[it->dlls[d].first + k].ilt_off =
+ off + k * (u32)COFF_THUNK_DATA64_SIZE;
+ }
+ off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE;
+ }
+ it->ilt_total = off - it->ilt_base;
+ off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
+
+ /* Block 3: IATs (same shape as ILTs). */
+ it->iat_base = off;
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ it->dlls[d].iat_off = off;
+ for (u32 k = 0; k < it->dlls[d].count; ++k) {
+ it->imports[it->dlls[d].first + k].iat_off =
+ off + k * (u32)COFF_THUNK_DATA64_SIZE;
+ }
+ off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE;
+ }
+ it->iat_total = off - it->iat_base;
+ off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
+
+ /* Block 4: hint/name records. Each: u16 hint + NUL-term name +
+ * 1-byte pad if the resulting size is odd (PE/COFF spec). */
+ it->hint_base = off;
+ for (u32 i = 0; i < it->nimports; ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1);
+ size_t nlen = 0;
+ const char* nm = coff_import_lookup_name(c, s, &nlen);
+ if (!nm || nlen == 0)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: imported symbol has empty name");
+ it->imports[i].hint_off = off;
+ /* hint (2 B) + name (nlen + 1) + optional pad to even. */
+ u32 rec = 2u + (u32)nlen + 1u;
+ if (rec & 1u) ++rec;
+ off += rec;
+ }
+ it->hint_total = off - it->hint_base;
+ off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
+
+ /* Block 5: DLL name strings (NUL-terminated). */
+ it->name_base = off;
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ size_t nlen = 0;
+ const char* nm = pool_str(c->global, it->dlls[d].soname, &nlen);
+ if (!nm || nlen == 0)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: providing DSO has empty soname");
+ it->dlls[d].name_off = off;
+ off += (u32)nlen + 1u;
+ }
+ it->name_total = off - it->name_base;
+ it->idata_size = off;
+}
+
+/* Append the function-import stubs to the .text bucket. Each stub is
+ * `coff_stub_size` bytes (arch-specific). Records each stub's bucket-
+ * local offset on the matching CoffImport so the per-symbol stub vaddr
+ * can be computed once the .text bucket's RVA is final. */
+static void coff_append_stubs(LinkImage* img, CoffImportTable* it,
+ CoffSection* text_bucket,
+ u32* text_bucket_cap) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ const LinkArchDesc* arch = link_arch_desc_for(c);
+ u32 stub_size;
+ u32 stub_align;
+ u64 cur;
+ if (!arch || arch->coff_stub_size == 0 || !arch->emit_coff_iat_stub) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: arch has no COFF IAT stub emitter");
+ }
+ stub_size = arch->coff_stub_size;
+ /* Stubs are pure code; aligning to instruction alignment is enough.
+ * x64 wants byte-granular, aa64 wants 4 B; align to stub size as a
+ * convenient upper bound. */
+ stub_align = stub_size;
+ cur = (u64)text_bucket->size;
+ cur = ALIGN_UP(cur, (u64)stub_align);
+ it->stub_text_off = (u32)cur;
+ for (u32 i = 0; i < it->nimports; ++i) {
+ if (!it->imports[i].is_func) continue;
+ it->imports[i].stub_off = (u32)cur;
+ cur += stub_size;
+ }
+ it->stub_total = (u32)cur - it->stub_text_off;
+ if (it->stub_total == 0) return;
+ /* Grow the .text bucket buffer to hold the new region. */
+ u32 need = (u32)cur;
+ if (need > *text_bucket_cap) {
+ (void)VEC_GROW(heap, text_bucket->bytes, *text_bucket_cap, need);
+ }
+ /* Zero the alignment pad; stub bytes are written later by
+ * coff_emit_stubs once vaddrs are known. */
+ if ((u32)cur > text_bucket->size) {
+ memset(text_bucket->bytes + text_bucket->size, 0,
+ (size_t)((u32)cur - text_bucket->size));
+ }
+ text_bucket->size = (u32)cur;
+}
+
+/* Emit each function import's IAT stub into the .text bucket. Must
+ * run after coff_assign_layout has fixed both .text's RVA and
+ * .idata's RVA, since the stub bakes in the post-shift IAT slot
+ * displacement. */
+static void coff_emit_stubs(LinkImage* img, const CoffImportTable* it,
+ const CoffSection out[COFF_NBUCKETS]) {
+ Compiler* c = img->c;
+ const LinkArchDesc* arch = link_arch_desc_for(c);
+ u64 img_base = PE_IMAGE_BASE;
+ u32 text_rva = out[COFF_BUCKET_TEXT].rva;
+ u32 idata_rva = out[COFF_BUCKET_IDATA].rva;
+ if (!arch || !arch->emit_coff_iat_stub) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: arch has no COFF IAT stub emitter");
+ }
+ for (u32 i = 0; i < it->nimports; ++i) {
+ u64 stub_va, slot_va;
+ if (!it->imports[i].is_func) continue;
+ stub_va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off;
+ slot_va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off;
+ arch->emit_coff_iat_stub(out[COFF_BUCKET_TEXT].bytes +
+ it->imports[i].stub_off,
+ stub_va, slot_va);
+ }
+}
+
+/* Emit .idata content into the bucket buffer. Allocates the buffer
+ * here (size is already known from coff_plan_idata_layout). */
+static void coff_emit_idata(LinkImage* img, const CoffImportTable* it,
+ CoffSection out[COFF_NBUCKETS],
+ u32* idata_bucket_cap) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ CoffSection* idata = &out[COFF_BUCKET_IDATA];
+ u32 idata_rva = idata->rva;
+ u8* buf;
+ /* Allocate the bucket buffer (idata_size is already block-aligned). */
+ buf = (u8*)heap->alloc(heap, it->idata_size, _Alignof(u64));
+ if (!buf)
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on .idata buffer");
+ memset(buf, 0, it->idata_size);
+ idata->bytes = buf;
+ idata->size = it->idata_size;
+ *idata_bucket_cap = it->idata_size;
+
+ /* Block 1: IMAGE_IMPORT_DESCRIPTOR table. */
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ u8* p = buf + d * (u32)COFF_IMPORT_DESCRIPTOR_SIZE;
+ u32 ilt_rva = idata_rva + it->dlls[d].ilt_off;
+ u32 iat_rva = idata_rva + it->dlls[d].iat_off;
+ u32 name_rva = idata_rva + it->dlls[d].name_off;
+ wr_u32_le(p + 0, ilt_rva); /* OriginalFirstThunk */
+ wr_u32_le(p + 4, 0u); /* TimeDateStamp */
+ wr_u32_le(p + 8, 0u); /* ForwarderChain */
+ wr_u32_le(p + 12, name_rva); /* Name */
+ wr_u32_le(p + 16, iat_rva); /* FirstThunk */
+ }
+ /* Trailing zero descriptor already zero-filled by memset. */
+
+ /* Blocks 2+3: ILT + IAT. Both initially point at the same hint/name
+ * record for each import; the OS loader rewrites IAT entries at
+ * load time. */
+ for (u32 i = 0; i < it->nimports; ++i) {
+ u64 hint_rva = (u64)idata_rva + (u64)it->imports[i].hint_off;
+ wr_u64_le(buf + it->imports[i].ilt_off, hint_rva);
+ wr_u64_le(buf + it->imports[i].iat_off, hint_rva);
+ }
+ /* Per-DLL ILT/IAT terminators are u64 0, already zero-filled. */
+
+ /* Block 4: hint/name records. */
+ for (u32 i = 0; i < it->nimports; ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1);
+ size_t nlen = 0;
+ const char* nm = coff_import_lookup_name(c, s, &nlen);
+ u8* p = buf + it->imports[i].hint_off;
+ wr_u16_le(p, PE_IMPORT_HINT_NONE);
+ memcpy(p + 2, nm, nlen);
+ /* NUL terminator + optional pad already zero. */
+ }
+
+ /* Block 5: DLL name strings. */
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ size_t nlen = 0;
+ const char* nm = pool_str(c->global, it->dlls[d].soname, &nlen);
+ memcpy(buf + it->dlls[d].name_off, nm, nlen);
+ /* NUL already zero. */
+ }
+}
+
+/* Per-LinkSymId vaddr override table for imports. Indexed by
+ * LinkSymId-1; 0 means "not an import". Built once after the .idata
+ * bucket RVA is final. Consumed by coff_apply_all_relocs in lieu of
+ * the symbol's own vaddr field (which is 0 for imports). */
+typedef struct CoffImportVaddr {
+ u64* by_sym; /* size = nsyms; 0 entries mean "not imported" */
+ u32 nsyms;
+} CoffImportVaddr;
+
+static void coff_import_vaddr_build(LinkImage* img, const CoffImportTable* it,
+ const CoffSection out[COFF_NBUCKETS],
+ CoffImportVaddr* iv) {
+ Heap* heap = img->heap;
+ u64 img_base = PE_IMAGE_BASE;
+ u32 text_rva = out[COFF_BUCKET_TEXT].rva;
+ u32 idata_rva = out[COFF_BUCKET_IDATA].rva;
+ iv->nsyms = LinkSyms_count(&img->syms);
+ iv->by_sym = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)(iv->nsyms + 1u),
+ _Alignof(u64));
+ if (!iv->by_sym)
+ compiler_panic(img->c, no_loc(),
+ "link_emit_coff: oom on import vaddr table");
+ memset(iv->by_sym, 0, sizeof(u64) * (size_t)(iv->nsyms + 1u));
+ for (u32 i = 0; i < it->nimports; ++i) {
+ LinkSymId sid = it->imports[i].sym;
+ u64 va;
+ if (it->imports[i].is_func) {
+ va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off;
+ } else {
+ va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off;
+ }
+ iv->by_sym[sid - 1u] = va;
+ /* Fan out across every shadow LinkSymId with the same name so a
+ * per-input undef reference resolves to the same import slot. */
+ {
+ LinkSymbol* canonical = LinkSyms_at(&img->syms, sid - 1u);
+ for (u32 j = 0; j < iv->nsyms; ++j) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, j);
+ if (s->name == canonical->name && s->imported) {
+ iv->by_sym[s->id - 1u] = va;
+ }
+ }
+ }
+ }
+}
+
+static void coff_import_vaddr_free(LinkImage* img, CoffImportVaddr* iv) {
+ Heap* heap = img->heap;
+ if (iv->by_sym) {
+ heap->free(heap, iv->by_sym,
+ sizeof(u64) * (size_t)(iv->nsyms + 1u));
+ }
+}
+
+/* Resolve Compiler.target.arch -> IMAGE_FILE_MACHINE_* via the per-arch
+ * coff ops table. Panic if the arch has no COFF descriptor or the
+ * machine value is one cfree doesn't ship (Phase 1 supports AMD64 and
+ * ARM64 only). */
+static u16 coff_machine_or_panic(Compiler* c) {
+ const ArchImpl* arch = arch_for_compiler(c);
+ u16 m;
+ if (!arch || !arch->coff)
+ compiler_panic(c, no_loc(), "link_emit_coff: no COFF arch descriptor");
+ m = arch->coff->machine;
+ if (m != IMAGE_FILE_MACHINE_AMD64 && m != IMAGE_FILE_MACHINE_ARM64)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: unsupported machine 0x%x", (unsigned)m);
+ return m;
+}
+
+static int coff_section_name_starts(Compiler* c, const LinkSection* ls,
+ const char* prefix) {
+ size_t n = 0;
+ size_t pn = strlen(prefix);
+ const char* s = ls->name ? pool_str(c->global, ls->name, &n) : NULL;
+ return s && n >= pn && memcmp(s, prefix, pn) == 0;
+}
+
+static int coff_section_name_cmp(Compiler* c, const LinkSection* a,
+ const LinkSection* b) {
+ size_t an = 0, bn = 0;
+ const char* as = a->name ? pool_str(c->global, a->name, &an) : "";
+ const char* bs = b->name ? pool_str(c->global, b->name, &bn) : "";
+ size_t n = an < bn ? an : bn;
+ int cmp = n ? memcmp(as, bs, n) : 0;
+ if (cmp) return cmp;
+ if (an < bn) return -1;
+ if (an > bn) return 1;
+ if (a->id < b->id) return -1;
+ if (a->id > b->id) return 1;
+ return 0;
+}
+
+static void coff_place_section(LinkImage* img, CoffSection out[COFF_NBUCKETS],
+ CoffSecMap* map, u64 bucket_cur[COFF_NBUCKETS],
+ u32 bucket_cap[COFF_NBUCKETS],
+ const LinkSection* ls) {
+ Heap* heap = img->heap;
+ CoffBucket b2 = coff_bucket_for(ls);
+ u32 align = ls->align ? ls->align : 1u;
+ u64 cur = bucket_cur[b2];
+ cur = ALIGN_UP(cur, (u64)align);
+ map[ls->id - 1].bucket = (u8)b2;
+ /* Record the bucket-local offset; the absolute RVA / file offset
+ * are filled in after bucket placement (RVAs need
+ * SectionAlignment, file offsets need FileAlignment). */
+ map[ls->id - 1].new_rva = (u32)cur;
+ if (b2 != COFF_BUCKET_BSS) {
+ /* Copy bytes from the source segment buffer into the bucket. */
+ if (ls->size) {
+ u32 need = (u32)(cur + ls->size);
+ if (need > bucket_cap[b2]) {
+ (void)VEC_GROW(heap, out[b2].bytes, bucket_cap[b2], need);
+ }
+ memset(out[b2].bytes + bucket_cur[b2], 0,
+ (size_t)(cur - bucket_cur[b2]));
+ if (ls->sem != SSEM_NOBITS) {
+ const LinkSegment* seg = &img->segments[ls->segment_id - 1];
+ const u8* src = img->segment_bytes[seg->id - 1] +
+ (size_t)(ls->file_offset - seg->file_offset);
+ memcpy(out[b2].bytes + cur, src, (size_t)ls->size);
+ } else {
+ memset(out[b2].bytes + cur, 0, (size_t)ls->size);
+ }
+ }
+ }
+ cur += ls->size;
+ bucket_cur[b2] = cur;
+ out[b2].size = (u32)cur;
+}
+
+static void coff_insert_sorted_section(Compiler* c, const LinkSection** a,
+ u32* n, const LinkSection* ls) {
+ u32 i = *n;
+ while (i > 0 && coff_section_name_cmp(c, ls, a[i - 1u]) < 0) {
+ a[i] = a[i - 1u];
+ --i;
+ }
+ a[i] = ls;
+ *n += 1u;
+}
+
+/* ---- pass 1: bucket input sections, assemble bytes, assign deltas ----
+ * CoffSecMap is defined above (alongside CoffTlsLayout) because the
+ * TLS planning helpers need to consume one. */
+
+/* Build the four payload buckets (.text/.rdata/.data/.bss).
+ *
+ * `map[secid-1]` is populated for every kept LinkSection with the
+ * section's new RVA, new file offset, the bucket it landed in, and the
+ * delta to add to in-section vaddrs. Bucket buffers are
+ * heap-allocated; the caller frees them after emit. */
+static void coff_build_buckets(LinkImage* img, CoffSection out[COFF_NBUCKETS],
+ CoffSecMap* map) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ const LinkSection** tls_sorted = NULL;
+ const LinkSection** crt_sorted = NULL;
+ u32 ntls_sorted = 0;
+ u32 ncrt_sorted = 0;
+ u32 i, b;
+
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ memset(&out[b], 0, sizeof(out[b]));
+ }
+ out[COFF_BUCKET_TEXT].name = ".text";
+ out[COFF_BUCKET_TEXT].characteristics =
+ IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ;
+ out[COFF_BUCKET_TEXT].has_file_bytes = 1;
+ out[COFF_BUCKET_RDATA].name = ".rdata";
+ out[COFF_BUCKET_RDATA].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
+ out[COFF_BUCKET_RDATA].has_file_bytes = 1;
+ out[COFF_BUCKET_IDATA].name = ".idata";
+ out[COFF_BUCKET_IDATA].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
+ out[COFF_BUCKET_IDATA].has_file_bytes = 1;
+ out[COFF_BUCKET_DATA].name = ".data";
+ out[COFF_BUCKET_DATA].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_WRITE;
+ out[COFF_BUCKET_DATA].has_file_bytes = 1;
+ /* The Windows loader uses .tls as a *template*: the bytes on disk
+ * seed each thread's per-TLS copy at thread creation, and threads
+ * write to their copies, not the template. The PE section is still
+ * marked writable because that's what mingw and link.exe emit; the
+ * loader special-cases it via the TLS directory. */
+ out[COFF_BUCKET_TLS].name = ".tls";
+ out[COFF_BUCKET_TLS].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_WRITE;
+ out[COFF_BUCKET_TLS].has_file_bytes = 1;
+ out[COFF_BUCKET_BSS].name = ".bss";
+ out[COFF_BUCKET_BSS].characteristics =
+ IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_WRITE;
+ out[COFF_BUCKET_BSS].has_file_bytes = 0;
+ out[COFF_BUCKET_RELOC].name = ".reloc";
+ out[COFF_BUCKET_RELOC].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_DISCARDABLE;
+ out[COFF_BUCKET_RELOC].has_file_bytes = 1;
+
+ /* Track per-bucket cursors. Bucket sizes are bounded by the sum of
+ * input section sizes plus per-section alignment padding; we grow
+ * lazily via VEC_GROW. */
+ u64 bucket_cur[COFF_NBUCKETS];
+ u32 bucket_cap[COFF_NBUCKETS];
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ bucket_cur[b] = 0;
+ bucket_cap[b] = 0;
+ }
+
+ tls_sorted = img->nsections
+ ? (const LinkSection**)heap->alloc(
+ heap, sizeof(*tls_sorted) * img->nsections,
+ _Alignof(const LinkSection*))
+ : NULL;
+ crt_sorted = img->nsections
+ ? (const LinkSection**)heap->alloc(
+ heap, sizeof(*crt_sorted) * img->nsections,
+ _Alignof(const LinkSection*))
+ : NULL;
+ if (img->nsections && (!tls_sorted || !crt_sorted))
+ compiler_panic(c, no_loc(), "link_emit_coff: oom sorting sections");
+
+ for (i = 0; i < img->nsections; ++i) {
+ const LinkSection* ls = &img->sections[i];
+ if (!(ls->flags & SF_ALLOC)) continue;
+ if (ls->flags & SF_TLS) {
+ coff_insert_sorted_section(c, tls_sorted, &ntls_sorted, ls);
+ continue;
+ }
+ if (coff_section_name_starts(c, ls, ".CRT$")) {
+ coff_insert_sorted_section(c, crt_sorted, &ncrt_sorted, ls);
+ continue;
+ }
+ coff_place_section(img, out, map, bucket_cur, bucket_cap, ls);
+ }
+
+ for (i = 0; i < ntls_sorted; ++i) {
+ coff_place_section(img, out, map, bucket_cur, bucket_cap, tls_sorted[i]);
+ }
+ for (i = 0; i < ncrt_sorted; ++i) {
+ coff_place_section(img, out, map, bucket_cur, bucket_cap, crt_sorted[i]);
+ }
+
+ /* Track caps so we can free with the right size later (heap->free
+ * needs the original allocation size). Stash into size_raw
+ * temporarily — overwritten below with the proper PE value. */
+ for (b = 0; b < COFF_NBUCKETS; ++b) out[b].size_raw = bucket_cap[b];
+ if (tls_sorted)
+ heap->free(heap, tls_sorted, sizeof(*tls_sorted) * img->nsections);
+ if (crt_sorted)
+ heap->free(heap, crt_sorted, sizeof(*crt_sorted) * img->nsections);
+}
+
+/* Assign RVAs and file offsets to the buckets that participate in the
+ * image. Returns the file offset at which trailing pad-to-EOF should
+ * land (== file size). */
+static u64 coff_assign_layout(CoffSection out[COFF_NBUCKETS],
+ u32 headers_file_size,
+ u32 first_section_rva) {
+ u32 rva = first_section_rva;
+ u64 file = ALIGN_UP((u64)headers_file_size, (u64)PE_FILE_ALIGNMENT);
+ u32 b;
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (out[b].size == 0) {
+ out[b].in_image = 0;
+ out[b].rva = 0;
+ out[b].file_offset = 0;
+ out[b].size_raw = 0;
+ continue;
+ }
+ out[b].in_image = 1;
+ out[b].rva = (u32)ALIGN_UP((u64)rva, (u64)PE_SECTION_ALIGNMENT);
+ if (out[b].has_file_bytes) {
+ out[b].file_offset = (u32)file;
+ out[b].size_raw = (u32)ALIGN_UP((u64)out[b].size,
+ (u64)PE_FILE_ALIGNMENT);
+ file += out[b].size_raw;
+ } else {
+ out[b].file_offset = 0;
+ out[b].size_raw = 0;
+ }
+ rva = out[b].rva + out[b].size;
+ }
+ return file;
+}
+
+/* Build the .reloc bytes by grouping absolute relocs by 4-KiB page.
+ * The map[] array maps LinkSectionId-1 to the per-section post-PE-relayout
+ * RVA, so we can compute each reloc's site_rva = section_rva + (orig
+ * write_vaddr - orig section_vaddr).
+ *
+ * Layout per page:
+ * u32 page_rva
+ * u32 size_of_block (8 + n_entries*2, padded to a multiple of 4)
+ * u16 entries[]: (type << 12) | (offset & 0xfff)
+ * optional trailing u16 = 0 (IMAGE_REL_BASED_ABSOLUTE) for u32 alignment */
+typedef struct CoffRelocEntry {
+ u32 site_rva;
+ u16 type;
+ u16 pad;
+} CoffRelocEntry;
+
+static int coff_reloc_entry_cmp(const void* a, const void* b) {
+ const CoffRelocEntry* ea = (const CoffRelocEntry*)a;
+ const CoffRelocEntry* eb = (const CoffRelocEntry*)b;
+ if (ea->site_rva < eb->site_rva) return -1;
+ if (ea->site_rva > eb->site_rva) return 1;
+ return 0;
+}
+
+static void coff_build_reloc_section(LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffSecMap* map,
+ CoffSection* reloc,
+ const CoffRelocEntry* extras,
+ u32 n_extras) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ u32 nrel = LinkRelocs_count(&img->relocs);
+ CoffRelocEntry* entries = NULL;
+ u32 nentries = 0;
+ u32 cap = 0;
+ u32 i;
+
+ if (!img->pie) {
+ reloc->bytes = NULL;
+ reloc->size = 0;
+ return;
+ }
+ for (i = 0; i < nrel; ++i) {
+ const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ const LinkSection* ls;
+ u64 site_old_vaddr;
+ u32 site_rva;
+ u16 type;
+ if (!coff_reloc_needs_base_reloc(r->kind)) continue;
+ if (r->link_section_id == LINK_SEC_NONE) continue;
+ ls = &img->sections[r->link_section_id - 1];
+ /* r->write_vaddr is in the pre-relayout coordinate system (same as
+ * ls->vaddr), so the offset into the section is stable. Add the
+ * containing bucket's final RVA to land at the image RVA. */
+ site_old_vaddr = r->write_vaddr;
+ u8 sb = map[ls->id - 1].bucket;
+ site_rva = out[sb].rva + map[ls->id - 1].new_rva +
+ (u32)(site_old_vaddr - ls->vaddr);
+ if (r->kind == R_ABS64) {
+ type = (u16)IMAGE_REL_BASED_DIR64;
+ } else {
+ type = (u16)IMAGE_REL_BASED_HIGHLOW;
+ }
+ if (nentries == cap) {
+ (void)VEC_GROW(heap, entries, cap, nentries + 1u);
+ }
+ entries[nentries].site_rva = site_rva;
+ entries[nentries].type = type;
+ entries[nentries].pad = 0;
+ ++nentries;
+ }
+ /* Append caller-supplied extras (TLS directory absolute-VA fields,
+ * etc.). These are already site-RVAs in the final image. */
+ for (i = 0; i < n_extras; ++i) {
+ if (nentries == cap) {
+ (void)VEC_GROW(heap, entries, cap, nentries + 1u);
+ }
+ entries[nentries] = extras[i];
+ ++nentries;
+ }
+ if (nentries == 0) {
+ reloc->bytes = NULL;
+ reloc->size = 0;
+ if (entries) heap->free(heap, entries, cap * sizeof(*entries));
+ (void)c;
+ return;
+ }
+ /* Sort entries by RVA so we can group runs sharing a 4-KiB page. */
+ qsort(entries, nentries, sizeof(*entries), coff_reloc_entry_cmp);
+
+ /* Two-pass: first compute the total size (so we can allocate the
+ * blob exactly), then emit. */
+ u32 blob_size = 0;
+ u32 run_start = 0;
+ while (run_start < nentries) {
+ u32 page = entries[run_start].site_rva & ~0xfffu;
+ u32 run_end = run_start;
+ while (run_end < nentries &&
+ (entries[run_end].site_rva & ~0xfffu) == page) {
+ ++run_end;
+ }
+ u32 n = run_end - run_start;
+ u32 block = COFF_BASE_RELOCATION_SIZE + n * 2u;
+ block = (u32)ALIGN_UP((u64)block, 4ull);
+ blob_size += block;
+ run_start = run_end;
+ }
+ reloc->bytes = (u8*)heap->alloc(heap, blob_size, 4);
+ if (!reloc->bytes && blob_size)
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on .reloc blob");
+ memset(reloc->bytes, 0, blob_size);
+ reloc->size = blob_size;
+ /* Stash allocation size for free path. */
+ reloc->size_raw = blob_size;
+
+ u32 cursor = 0;
+ run_start = 0;
+ while (run_start < nentries) {
+ u32 page = entries[run_start].site_rva & ~0xfffu;
+ u32 run_end = run_start;
+ while (run_end < nentries &&
+ (entries[run_end].site_rva & ~0xfffu) == page) {
+ ++run_end;
+ }
+ u32 n = run_end - run_start;
+ u32 raw_size = COFF_BASE_RELOCATION_SIZE + n * 2u;
+ u32 block = (u32)ALIGN_UP((u64)raw_size, 4ull);
+ u8* p = reloc->bytes + cursor;
+ wr_u32_le(p, page);
+ wr_u32_le(p + 4, block);
+ u32 k;
+ for (k = 0; k < n; ++k) {
+ u16 entry = (u16)(((u16)entries[run_start + k].type << 12) |
+ (entries[run_start + k].site_rva & 0xfffu));
+ wr_u16_le(p + 8 + k * 2u, entry);
+ }
+ /* Optional trailing pad: a single IMAGE_REL_BASED_ABSOLUTE (0). */
+ if (block > raw_size) {
+ wr_u16_le(p + 8 + n * 2u, 0);
+ }
+ cursor += block;
+ run_start = run_end;
+ }
+ heap->free(heap, entries, cap * sizeof(*entries));
+}
+
+/* Patch each LinkRelocApply against the PE-relayout coordinates and
+ * apply. `bucket_bytes[bucket]` gives the writable buffer for that
+ * bucket; the per-section delta in map[] turns the old in-section
+ * offsets into bucket-local offsets.
+ *
+ * Imported targets (LinkSymbol.imported == 1) have no vaddr of their
+ * own — instead the .idata pass populated `iv->by_sym[id-1]` with the
+ * function stub's vaddr (for callable imports) or the IAT slot's
+ * vaddr (for data imports). This is the spot where that table is
+ * consulted in lieu of the symbol's own zero vaddr. */
+static void coff_apply_all_relocs(LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffSecMap* map,
+ const CoffImportVaddr* iv) {
+ Compiler* c = img->c;
+ u32 i;
+ u64 img_base = PE_IMAGE_BASE;
+ u32 nrel = LinkRelocs_count(&img->relocs);
+ for (i = 0; i < nrel; ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
+ const LinkSection* sec;
+ const LinkSection* tgt_sec;
+ u64 S, P;
+ u8* P_bytes;
+ u8 bucket;
+ u32 site_off_in_sec;
+ u32 site_bucket_off;
+ if (r->link_section_id == LINK_SEC_NONE) continue;
+ sec = &img->sections[r->link_section_id - 1];
+ bucket = map[sec->id - 1].bucket;
+ if (!out[bucket].has_file_bytes || !out[bucket].bytes) {
+ /* Shouldn't happen — .bss has no relocations. */
+ continue;
+ }
+ site_off_in_sec = (u32)(r->write_vaddr - sec->vaddr);
+ site_bucket_off = map[sec->id - 1].new_rva + site_off_in_sec;
+ P_bytes = out[bucket].bytes + site_bucket_off;
+ /* P = ImageBase + bucket_rva + map[].new_rva + site_off_in_sec
+ * — i.e. the final runtime address of the patch site. */
+ P = img_base + (u64)out[bucket].rva +
+ (u64)map[sec->id - 1].new_rva + site_off_in_sec;
+
+ /* Resolve S: target symbol's new image-relative address. Look up
+ * the LinkSection that contains the symbol's original vaddr, then
+ * apply that section's delta. */
+ if (tgt->imported) {
+ /* IAT-routed: stub vaddr (functions) / slot vaddr (data). */
+ if (!iv || iv->by_sym[r->target - 1u] == 0)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: imported target lacks IAT slot");
+ S = iv->by_sym[r->target - 1u];
+ } else if (tgt->kind == SK_ABS) {
+ S = tgt->vaddr;
+ } else if (tgt->defined) {
+ tgt_sec = coff_symbol_section(img, tgt);
+ if (!tgt_sec) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: symbol vaddr 0x%llx has no "
+ "containing section",
+ (unsigned long long)tgt->vaddr);
+ }
+ u8 tb = map[tgt_sec->id - 1].bucket;
+ u64 sym_off = tgt->vaddr - tgt_sec->vaddr;
+ S = img_base + (u64)out[tb].rva +
+ (u64)map[tgt_sec->id - 1].new_rva + sym_off;
+ } else {
+ /* Undef and not imported — shouldn't survive resolve_undefs. */
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: unresolved non-imported symbol");
+ }
+ /* COFF-only section-relative kinds: the SECREL value is the
+ * symbol's offset from the start of its containing output section
+ * (PE bucket), and SECTION is the 1-based PE section index.
+ * link_reloc_apply only sees S and P, so we patch these inline
+ * before delegating common kinds. */
+ if (r->kind == R_COFF_SECREL || r->kind == R_COFF_SECTION ||
+ r->kind == R_COFF_AARCH64_SECREL_LOW12A ||
+ r->kind == R_COFF_AARCH64_SECREL_HIGH12A) {
+ if (!tgt->defined || tgt->kind == SK_ABS) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: COFF SECREL/SECTION requires a "
+ "defined section-bound target symbol");
+ }
+ u8 tb = map[tgt_sec->id - 1].bucket;
+ u64 sym_off_in_bucket =
+ (u64)map[tgt_sec->id - 1].new_rva + (tgt->vaddr - tgt_sec->vaddr);
+ if (r->kind == R_COFF_SECREL) {
+ u64 v = sym_off_in_bucket + (u64)r->addend;
+ wr_u32_le(P_bytes, (u32)(v & 0xffffffffu));
+ } else if (r->kind == R_COFF_SECTION) {
+ /* PE section indices are 1-based; buckets are 0-based, so add 1. */
+ wr_u16_le(P_bytes, (u16)((tb + 1u) & 0xffffu));
+ } else {
+ /* AArch64 SECREL_{LOW,HIGH}12A: patch the imm12 field of an
+ * existing ADD-imm12 instruction. LOW12A = bits [11:0] of the
+ * SECREL; HIGH12A = bits [23:12]. The instruction's sh bit was
+ * already set by the codegen (0 for LOW, 1 for HIGH). */
+ u64 v = sym_off_in_bucket + (u64)r->addend;
+ u32 imm12 = (r->kind == R_COFF_AARCH64_SECREL_HIGH12A)
+ ? (u32)((v >> 12) & 0xfffu)
+ : (u32)(v & 0xfffu);
+ u32 instr = rd_u32_le(P_bytes);
+ instr = (instr & ~(0xfffu << 10)) | (imm12 << 10);
+ wr_u32_le(P_bytes, instr);
+ }
+ continue;
+ }
+ link_reloc_apply(c, r->kind, P_bytes, S, r->addend, P);
+ }
+}
+
+/* ---- header marshalling ----
+ *
+ * Each helper streams its on-disk shape to the writer field-by-field;
+ * we avoid sizeof(struct) on the packed PE wire types since they carry
+ * implicit-padding hazards on hosts that disagree with #pragma pack(1)
+ * defaults. */
+
+static void coff_write_dos_stub(Writer* w) {
+ u8 buf[PE_DOS_HDR_SIZE];
+ memset(buf, 0, sizeof(buf));
+ /* e_magic ("MZ") + e_lfanew (offset of PE signature). All other
+ * legacy fields zero. */
+ buf[0] = (u8)(IMAGE_DOS_SIGNATURE & 0xffu);
+ buf[1] = (u8)((IMAGE_DOS_SIGNATURE >> 8) & 0xffu);
+ wr_u32_le(buf + 0x3c, PE_DOS_E_LFANEW);
+ cfree_writer_write(w, buf, sizeof(buf));
+}
+
+static void coff_write_file_header(Writer* w, u16 machine, u16 nsec,
+ u16 characteristics) {
+ coff_wr_u16(w, machine);
+ coff_wr_u16(w, nsec);
+ coff_wr_u32(w, 0u); /* TimeDateStamp */
+ coff_wr_u32(w, 0u); /* PointerToSymbolTable */
+ coff_wr_u32(w, 0u); /* NumberOfSymbols */
+ coff_wr_u16(w, (u16)PE_OPT_HDR_SIZE); /* SizeOfOptionalHeader */
+ coff_wr_u16(w, characteristics);
+}
+
+/* Per-section meta used by both the data-directory fill and the
+ * IMAGE_SECTION_HEADER emit. Compactly captures everything the writer
+ * needs to know about the four-or-five output sections. */
+typedef struct CoffOutHdr {
+ const char* name;
+ u32 vsize;
+ u32 rva;
+ u32 size_raw;
+ u32 file_offset;
+ u32 characteristics;
+} CoffOutHdr;
+
+static void coff_write_optional_header(Writer* w, u32 entry_rva,
+ const CoffSection out[COFF_NBUCKETS],
+ u32 headers_size_padded,
+ u32 image_size,
+ int pie,
+ u16 subsystem,
+ const CoffImportTable* it,
+ const CoffTlsLayout* tls) {
+ /* Standard fields. */
+ coff_wr_u16(w, IMAGE_NT_OPTIONAL_HDR64_MAGIC);
+ coff_wr_u8(w, PE_LINKER_MAJOR);
+ coff_wr_u8(w, PE_LINKER_MINOR);
+ /* SizeOfCode / SizeOfInitializedData / SizeOfUninitializedData. */
+ u32 size_code = out[COFF_BUCKET_TEXT].in_image
+ ? out[COFF_BUCKET_TEXT].size_raw : 0;
+ u32 size_init = (out[COFF_BUCKET_RDATA].in_image
+ ? out[COFF_BUCKET_RDATA].size_raw : 0) +
+ (out[COFF_BUCKET_DATA].in_image
+ ? out[COFF_BUCKET_DATA].size_raw : 0);
+ u32 size_uninit = out[COFF_BUCKET_BSS].in_image
+ ? out[COFF_BUCKET_BSS].size : 0;
+ coff_wr_u32(w, size_code);
+ coff_wr_u32(w, size_init);
+ coff_wr_u32(w, size_uninit);
+ coff_wr_u32(w, entry_rva);
+ coff_wr_u32(w, out[COFF_BUCKET_TEXT].in_image
+ ? out[COFF_BUCKET_TEXT].rva : 0);
+ /* Windows-specific fields. */
+ coff_wr_u64(w, PE_IMAGE_BASE);
+ coff_wr_u32(w, PE_SECTION_ALIGNMENT);
+ coff_wr_u32(w, PE_FILE_ALIGNMENT);
+ coff_wr_u16(w, PE_OS_MAJOR);
+ coff_wr_u16(w, PE_OS_MINOR);
+ coff_wr_u16(w, 0u); /* MajorImageVersion */
+ coff_wr_u16(w, 0u); /* MinorImageVersion */
+ coff_wr_u16(w, PE_SUBSYS_MAJOR);
+ coff_wr_u16(w, PE_SUBSYS_MINOR);
+ coff_wr_u32(w, 0u); /* Win32VersionValue */
+ coff_wr_u32(w, image_size);
+ coff_wr_u32(w, headers_size_padded);
+ coff_wr_u32(w, 0u); /* CheckSum */
+ coff_wr_u16(w, subsystem ? subsystem : IMAGE_SUBSYSTEM_WINDOWS_CUI);
+ coff_wr_u16(w, PE_DLL_CHARS);
+ coff_wr_u64(w, PE_STACK_RESERVE);
+ coff_wr_u64(w, PE_STACK_COMMIT);
+ coff_wr_u64(w, PE_HEAP_RESERVE);
+ coff_wr_u64(w, PE_HEAP_COMMIT);
+ coff_wr_u32(w, 0u); /* LoaderFlags */
+ coff_wr_u32(w, (u32)PE_NUM_DATA_DIRS);
+ /* DataDirectory[16]. Populated entries:
+ * [1] IMPORT — descriptor table RVA + total descriptor bytes
+ * [5] BASERELOC — when PIE and .reloc is in the image
+ * [12] IAT — first IAT block RVA + sum of per-DLL IAT sizes
+ * Everything else stays zero. */
+ u32 i;
+ int has_idata = it && it->nimports > 0 &&
+ out[COFF_BUCKET_IDATA].in_image;
+ for (i = 0; i < PE_NUM_DATA_DIRS; ++i) {
+ if (i == IMAGE_DIRECTORY_ENTRY_IMPORT && has_idata) {
+ coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->desc_off);
+ coff_wr_u32(w, it->desc_size);
+ } else if (i == IMAGE_DIRECTORY_ENTRY_IAT && has_idata) {
+ coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->iat_base);
+ coff_wr_u32(w, it->iat_total);
+ } else if (i == IMAGE_DIRECTORY_ENTRY_BASERELOC && pie &&
+ out[COFF_BUCKET_RELOC].in_image) {
+ coff_wr_u32(w, out[COFF_BUCKET_RELOC].rva);
+ coff_wr_u32(w, out[COFF_BUCKET_RELOC].size);
+ } else if (i == IMAGE_DIRECTORY_ENTRY_TLS && tls && tls->present) {
+ coff_wr_u32(w, out[COFF_BUCKET_RDATA].rva + tls->dir_rdata_off);
+ coff_wr_u32(w, COFF_TLS_DIRECTORY64_SIZE);
+ } else {
+ coff_wr_u32(w, 0u);
+ coff_wr_u32(w, 0u);
+ }
+ }
+}
+
+static void coff_write_section_header(Writer* w, const char* name,
+ u32 vsize, u32 rva, u32 size_raw,
+ u32 file_offset,
+ u32 characteristics) {
+ u8 nm[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+ size_t n = strlen(name);
+ if (n > 8) n = 8;
+ memcpy(nm, name, n);
+ cfree_writer_write(w, nm, 8);
+ coff_wr_u32(w, vsize);
+ coff_wr_u32(w, rva);
+ coff_wr_u32(w, size_raw);
+ coff_wr_u32(w, file_offset);
+ coff_wr_u32(w, 0u); /* PointerToRelocations */
+ coff_wr_u32(w, 0u); /* PointerToLinenumbers */
+ coff_wr_u16(w, 0u); /* NumberOfRelocations */
+ coff_wr_u16(w, 0u); /* NumberOfLinenumbers */
+ coff_wr_u32(w, characteristics);
+}
+
+/* ---- main entry ---- */
+
+void link_emit_coff(LinkImage* img, Writer* w) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ u16 machine = coff_machine_or_panic(c);
+ if (img->entry_sym == LINK_SYM_NONE)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: no resolved entry symbol");
+
+ /* ---- pass 1: build buckets + per-section delta map ---- */
+ CoffSection out[COFF_NBUCKETS];
+ CoffSecMap* map = (CoffSecMap*)heap->alloc(
+ heap, sizeof(CoffSecMap) * (img->nsections + 1u),
+ _Alignof(CoffSecMap));
+ if (!map && img->nsections)
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on section map");
+ memset(map, 0, sizeof(CoffSecMap) * (img->nsections + 1u));
+
+ /* coff_build_buckets stashes per-bucket allocation caps in size_raw;
+ * we read them back into a local before size_raw is recomputed by
+ * coff_assign_layout so the cleanup path can free with the right
+ * size. */
+ coff_build_buckets(img, out, map);
+ /* coff_build_buckets stashes per-bucket allocation caps in size_raw
+ * (the only bucket field we own for the duration of layout); read
+ * them out before coff_assign_layout overwrites the field. .reloc
+ * and .idata aren't touched by coff_build_buckets — their caps are
+ * filled in below once coff_build_reloc_section / coff_emit_idata
+ * run. */
+ u32 bucket_caps[COFF_NBUCKETS];
+ u32 b;
+ for (b = 0; b < COFF_NBUCKETS; ++b) bucket_caps[b] = out[b].size_raw;
+
+ /* ---- pass 1b: collect imports and reserve .idata + .text stubs ----
+ *
+ * Builds the per-DLL / per-import layout and appends one IAT-routing
+ * stub per imported function to the .text bucket. The .idata bucket
+ * size is set here (so it counts in nsec); the stub vaddrs and
+ * IAT-slot vaddrs are finalised after coff_assign_layout. */
+ CoffImportTable imports;
+ int have_imports = coff_collect_imports(img, &imports);
+ if (have_imports) {
+ coff_plan_idata_layout(img, &imports);
+ coff_append_stubs(img, &imports, &out[COFF_BUCKET_TEXT],
+ &bucket_caps[COFF_BUCKET_TEXT]);
+ /* Reserve the .idata bucket size so coff_assign_layout / nsec
+ * accounting sees it. Actual bytes are written by coff_emit_idata
+ * once the bucket RVA is known. */
+ out[COFF_BUCKET_IDATA].size = imports.idata_size;
+ }
+
+ /* ---- pass 1c: plan the TLS directory record ----
+ *
+ * If any SF_TLS sections survived, reserve 40 bytes at the tail of
+ * .rdata for the IMAGE_TLS_DIRECTORY64. Bytes are zeroed now and
+ * filled in by coff_emit_tls_dir once the bucket RVAs are final. */
+ CoffTlsLayout tls;
+ coff_plan_tls_layout(img, out, &bucket_caps[COFF_BUCKET_RDATA], &tls);
+
+ /* ---- pass 2: decide whether .reloc will be in the image ----
+ *
+ * The headers' file size (and therefore every section's file
+ * offset) depends on the section-table entry count, so we need to
+ * commit to "is .reloc emitted?" before laying out file offsets.
+ * .reloc lights up iff PIE and at least one absolute reloc points
+ * into a kept section, OR a TLS directory is emitted (its four u64
+ * VA fields all need base-relocs). */
+ int emit_reloc = 0;
+ if (img->pie) {
+ u32 i;
+ u32 nrel = LinkRelocs_count(&img->relocs);
+ for (i = 0; i < nrel; ++i) {
+ const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (!coff_reloc_needs_base_reloc(r->kind)) continue;
+ if (r->link_section_id == LINK_SEC_NONE) continue;
+ emit_reloc = 1;
+ break;
+ }
+ if (!emit_reloc && tls.present) emit_reloc = 1;
+ }
+
+ u32 nsec = 0;
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (b == COFF_BUCKET_RELOC) {
+ if (emit_reloc) ++nsec; /* tentative; size set below */
+ continue;
+ }
+ if (out[b].size) ++nsec;
+ }
+ u32 headers_size_unpadded =
+ PE_DOS_HDR_SIZE + PE_SIG_SIZE + PE_FILE_HDR_SIZE + PE_OPT_HDR_SIZE +
+ nsec * PE_SECTION_HDR_SIZE;
+ u32 headers_size_padded =
+ (u32)ALIGN_UP((u64)headers_size_unpadded, (u64)PE_FILE_ALIGNMENT);
+
+ /* First layout pass: fixes RVAs / file offsets for buckets that
+ * already have a finalised size (.text, .rdata, .idata, .data, .bss).
+ * .reloc's RVA is provisional — it depends on .reloc's own size,
+ * which is still 0 at this point. */
+ (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA);
+
+ /* ---- pass 2b: emit .idata bytes + per-arch IAT stubs ----
+ *
+ * The .idata bucket's RVA is final after the first assign_layout;
+ * stubs need it (the indirect-jump displacement targets an IAT slot)
+ * and .idata's own descriptor / ILT / IAT records all carry RVAs.
+ * coff_import_vaddr_build builds the per-LinkSymId override table
+ * that apply_all_relocs consults in place of the (zero) symbol
+ * vaddr for imported targets. */
+ CoffImportVaddr import_vaddr;
+ memset(&import_vaddr, 0, sizeof(import_vaddr));
+ if (have_imports) {
+ coff_emit_idata(img, &imports, out, &bucket_caps[COFF_BUCKET_IDATA]);
+ coff_emit_stubs(img, &imports, out);
+ coff_import_vaddr_build(img, &imports, out, &import_vaddr);
+ }
+
+ /* Write the TLS directory bytes now that bucket RVAs are final. */
+ coff_emit_tls_dir(img, out, map, &tls);
+
+ /* ---- pass 3: build .reloc using the now-final bucket RVAs ----
+ *
+ * coff_build_reloc_section reads out[bucket].rva indirectly via
+ * map[].new_rva + (write_vaddr - sec->vaddr) → site offset within
+ * the bucket; the absolute site_rva is bucket.rva + that offset.
+ * Patch site RVAs are page-quantised in the emitted blob, so this
+ * is the spot where the bucket RVAs need to be already final.
+ *
+ * TLS directory's four absolute-VA fields ride into the entries via
+ * the `extras` array — they aren't ordinary symbol relocations, so
+ * they don't show up in img->relocs. */
+ if (emit_reloc) {
+ CoffRelocEntry tls_extras[4];
+ u32 n_tls_extras = 0;
+ if (tls.present) {
+ u32 dir_rva = out[COFF_BUCKET_RDATA].rva + tls.dir_rdata_off;
+ static const u32 field_offs[4] = {
+ COFF_TLSDIR_OFF_START_ADDR, COFF_TLSDIR_OFF_END_ADDR,
+ COFF_TLSDIR_OFF_INDEX_ADDR, COFF_TLSDIR_OFF_CALLBACKS,
+ };
+ u32 k;
+ for (k = 0; k < 4; ++k) {
+ if (field_offs[k] == COFF_TLSDIR_OFF_CALLBACKS &&
+ !tls.callbacks_sym)
+ continue;
+ tls_extras[n_tls_extras].site_rva = dir_rva + field_offs[k];
+ tls_extras[n_tls_extras].type = (u16)IMAGE_REL_BASED_DIR64;
+ tls_extras[n_tls_extras].pad = 0;
+ ++n_tls_extras;
+ }
+ }
+ coff_build_reloc_section(img, out, map, &out[COFF_BUCKET_RELOC],
+ tls_extras, n_tls_extras);
+ bucket_caps[COFF_BUCKET_RELOC] = out[COFF_BUCKET_RELOC].size_raw;
+ /* size_raw was stashed by build; assign_layout below recomputes it
+ * as the FileAlignment-padded length. */
+ (void)coff_assign_layout(out, headers_size_padded,
+ PE_FIRST_SECTION_RVA);
+ }
+
+ /* `_tls_used` is the public mingw/PE name for the TLS directory
+ * record. Keep it in lockstep with the optional-header TLS data
+ * directory, rather than leaving references bound to mingw's tlssup.o
+ * placeholder record. */
+ coff_define_tls_used(img, out, &tls);
+
+ /* ---- pass 4: resolve entry symbol's PE RVA ----
+ *
+ * Done before apply so the optional-header field has its final
+ * value. */
+ const LinkSymbol* entry_sym =
+ LinkSyms_at(&img->syms, img->entry_sym - 1);
+ if (!entry_sym->defined || entry_sym->kind == SK_ABS)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: entry symbol is not a defined "
+ "image-relative function");
+ const LinkSection* entry_sec = coff_section_at(img, entry_sym->vaddr);
+ if (!entry_sec)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: entry symbol has no containing "
+ "section");
+ u8 entry_bucket = map[entry_sec->id - 1].bucket;
+ u32 entry_rva = out[entry_bucket].rva +
+ map[entry_sec->id - 1].new_rva +
+ (u32)(entry_sym->vaddr - entry_sec->vaddr);
+
+ /* ---- pass 5: apply all relocations into bucket bytes ---- */
+ coff_apply_all_relocs(img, out, map,
+ have_imports ? &import_vaddr : NULL);
+
+ /* ---- pass 6: compute SizeOfImage (in-memory size) ---- */
+ u32 image_size = 0;
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (!out[b].in_image) continue;
+ u32 end = out[b].rva + out[b].size;
+ if (end > image_size) image_size = end;
+ }
+ image_size = (u32)ALIGN_UP((u64)image_size, (u64)PE_SECTION_ALIGNMENT);
+
+ /* ---- pass 7: write everything ---- */
+ u16 file_chars = IMAGE_FILE_EXECUTABLE_IMAGE |
+ IMAGE_FILE_LARGE_ADDRESS_AWARE;
+ if (!img->pie || !out[COFF_BUCKET_RELOC].in_image) {
+ file_chars |= IMAGE_FILE_RELOCS_STRIPPED;
+ }
+
+ coff_write_dos_stub(w);
+ /* PE signature. */
+ coff_wr_u32(w, IMAGE_NT_SIGNATURE);
+ coff_write_file_header(w, machine, (u16)nsec, file_chars);
+ u16 subsystem = img->linker ? img->linker->pe_subsystem : 0;
+ coff_write_optional_header(w, entry_rva, out, headers_size_padded,
+ image_size, img->pie, subsystem,
+ have_imports ? &imports : NULL, &tls);
+
+ /* Section table. */
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (!out[b].in_image) continue;
+ coff_write_section_header(w, out[b].name, out[b].size, out[b].rva,
+ out[b].size_raw, out[b].file_offset,
+ out[b].characteristics);
+ }
+
+ /* Pad to first section's file offset. */
+ u64 cur = (u64)headers_size_unpadded;
+ u64 first_file_off = headers_size_padded;
+ if (cur < first_file_off) {
+ coff_write_zeroes(w, first_file_off - cur);
+ cur = first_file_off;
+ }
+
+ /* Section bodies. */
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (!out[b].in_image) continue;
+ if (!out[b].has_file_bytes) continue;
+ if (cur < out[b].file_offset) {
+ coff_write_zeroes(w, out[b].file_offset - cur);
+ cur = out[b].file_offset;
+ }
+ cfree_writer_write(w, out[b].bytes, out[b].size);
+ cur += out[b].size;
+ if (out[b].size_raw > out[b].size) {
+ coff_write_zeroes(w, out[b].size_raw - out[b].size);
+ cur += out[b].size_raw - out[b].size;
+ }
+ }
+
+ /* ---- cleanup ---- */
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (out[b].bytes) heap->free(heap, out[b].bytes, bucket_caps[b]);
+ }
+ heap->free(heap, map, sizeof(CoffSecMap) * (img->nsections + 1u));
+ if (have_imports) {
+ coff_import_vaddr_free(img, &import_vaddr);
+ coff_imports_free(img, &imports);
+ }
+}
diff --git a/src/link/link_internal.h b/src/link/link_internal.h
@@ -23,6 +23,11 @@ typedef struct InputMap {
u32 nsym;
LinkSectionId* section; /* size = ObjBuilder.nsections */
u32 nsection;
+ /* COMDAT discard mask, size = nsection. Set by link_resolve_symbols
+ * for COFF/PE SELECTANY: when an input's COMDAT section conflicts
+ * with an earlier definition, the duplicate section is marked here
+ * so link_gc_compute and link_layout_sections skip it. */
+ u8* comdat_discarded;
} InputMap;
/* Open-addressed name -> LinkSymId hash for global / weak definitions
@@ -111,6 +116,7 @@ struct Linker {
* DSO input is present. Triggers layout_dyn (Phase 4) and the
* dynamic ELF emit path (Phase 6). */
int emit_pie;
+ u16 pe_subsystem;
/* Caller-supplied PT_INTERP. layout_dyn falls back to a target-
* derived default when this is 0. */
Sym interp_path;
@@ -182,6 +188,10 @@ u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec);
/* ---- Public entries (link_resolve.c) --------------------------------------
*/
void link_ingest_archives(struct Linker*);
+/* PE/COFF only: synthesize a tiny ObjBuilder providing the mingw CRT
+ * `__CTOR_LIST__` / `__CTOR_END__` / `__DTOR_LIST__` / `__DTOR_END__`
+ * boundary symbols. See link_resolve.c for the contract. */
+void link_synth_coff_ctor_dtor_list(struct Linker*);
void link_resolve_symbols(struct Linker*, LinkImage*);
void link_resolve_undefs(struct Linker*, LinkImage*);
void link_gc_compute(struct Linker*, LinkImage*, GcLive*);
@@ -218,6 +228,12 @@ void link_resolve_entry(struct Linker*, LinkImage*);
* (the caller still owns). DSO / TBD inputs are skipped. */
void link_capture_debug_inputs(struct Linker*, LinkImage*);
+/* Default PE/COFF ImageBase for executables. Mirrored in link_coff.c
+ * (the emitter writes this into the optional header). Exposed here so
+ * link_layout can synthesize the `__ImageBase` symbol at the same
+ * vaddr, before resolve_undefs runs. */
+#define LINK_PE_IMAGE_BASE 0x140000000ULL
+
/* Defined in link_dyn.c. Phase 4: synthesize .interp/.dynsym/.dynstr/
* .gnu.hash/.rela.dyn/.rela.plt/.plt/.got.plt/.dynamic when the link
* is producing a PIE / ET_DYN exe. No-op when there are zero imports
@@ -445,6 +461,7 @@ void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P);
* reloc application remains keyed by RelocKind. COFF arrives later. */
void link_emit_elf(LinkImage*, Writer*);
void link_emit_macho(LinkImage*, Writer*);
+void link_emit_coff(LinkImage*, Writer*);
/* Format-agnostic 16-byte image identity, derived from per-segment
* post-shift bytes + vaddrs/sizes. ELF wraps it in a
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -510,10 +510,16 @@ void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
Sym sym = boundary_name(l, name);
LinkSymId id = symhash_get(&img->globals, sym);
LinkSymbol rec;
+ u8 kind = SK_OBJ;
u32 i, n;
+ if (l->c->target.obj == CFREE_OBJ_COFF &&
+ (strcmp(name, "__ImageBase") == 0 ||
+ strcmp(name, "_tls_used") == 0)) {
+ kind = SK_ABS;
+ }
memset(&rec, 0, sizeof(rec));
rec.name = sym;
- rec.kind = SK_OBJ;
+ rec.kind = kind;
rec.defined = 1;
rec.vaddr = vaddr;
rec.bind = SB_GLOBAL;
@@ -533,7 +539,7 @@ void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
s->section_id = LINK_SEC_NONE;
s->value = 0;
s->vaddr = vaddr;
- s->kind = SK_OBJ;
+ s->kind = kind;
s->defined = 1;
s->imported = 0;
}
@@ -947,6 +953,7 @@ LinkImage* link_resolve(Linker* l) {
Heap* h;
metrics_scope_begin(l->c, "link.resolve.total");
+ link_synth_coff_ctor_dtor_list(l);
metrics_scope_begin(l->c, "link.ingest_archives");
link_ingest_archives(l);
metrics_scope_end(l->c, "link.ingest_archives");
@@ -997,6 +1004,14 @@ LinkImage* link_resolve(Linker* l) {
link_emit_boundary_sym(l, img, "__dso_handle", 0);
link_emit_boundary_sym(l, img, "_DYNAMIC", 0);
link_emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0);
+ /* PE/COFF: mingw CRT references `__ImageBase` for ASLR-relative
+ * addressing and base-relocation bookkeeping. The PE emitter
+ * writes LINK_PE_IMAGE_BASE into the optional header; expose the
+ * same value as a linker-defined symbol so input objects resolve. */
+ if (l->c->target.obj == CFREE_OBJ_COFF) {
+ link_emit_boundary_sym(l, img, "__ImageBase", LINK_PE_IMAGE_BASE);
+ if (img->tls_memsz) link_emit_boundary_sym(l, img, "_tls_used", 0);
+ }
{
const LinkArchDesc* arch = link_arch_desc_for(l->c);
u32 si;
@@ -1041,8 +1056,12 @@ LinkImage* link_resolve(Linker* l) {
if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size);
if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size);
}
+ /* layout_dyn synthesizes ELF-specific .interp / .dynsym / .dynstr /
+ * .rela.dyn / .plt / .got.plt / .dynamic sections. Mach-O has its
+ * own equivalent path; COFF binds imports through .idata + IAT
+ * (Phase 3.2). Skip for non-ELF formats. */
metrics_scope_begin(l->c, "link.layout_dyn");
- layout_dyn(l, img);
+ if (l->c->target.obj == CFREE_OBJ_ELF) layout_dyn(l, img);
metrics_scope_end(l->c, "link.layout_dyn");
metrics_scope_begin(l->c, "link.resolve_entry");
link_resolve_entry(l, img);
diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c
@@ -330,6 +330,13 @@ static u8 reloc_width(RelocKind k) {
case R_RV_ADD64:
case R_RV_SUB64:
return 8;
+ case R_COFF_SECREL:
+ return 4;
+ case R_COFF_SECTION:
+ return 2;
+ case R_COFF_AARCH64_SECREL_LOW12A:
+ case R_COFF_AARCH64_SECREL_HIGH12A:
+ return 4;
default:
return 0;
}
diff --git a/src/link/link_resolve.c b/src/link/link_resolve.c
@@ -41,6 +41,10 @@ static void map_alloc(LinkImage* img, InputMap* m, u32 nsym, u32 nsection) {
if (!m->section)
compiler_panic(img->c, no_loc(), "link: oom on input section map");
memset(m->section, 0, sizeof(*m->section) * nsection);
+ m->comdat_discarded = (u8*)h->alloc(h, nsection ? nsection : 1u, 1);
+ if (!m->comdat_discarded)
+ compiler_panic(img->c, no_loc(), "link: oom on input comdat map");
+ memset(m->comdat_discarded, 0, nsection ? nsection : 1u);
}
/* ---- pass 1: collect symbols ---- */
@@ -64,6 +68,17 @@ static int obj_sym_is_logical_undef(const ObjSym* s) {
s->kind != SK_COMMON;
}
+/* COFF/PE SELECTANY: a duplicate strong global is acceptable iff both
+ * definitions live in COMDAT (SF_GROUP-tagged) sections. When that
+ * holds, the earlier-processed definition wins and the new section is
+ * marked for discard so its bytes never reach layout. */
+static int obj_sym_defined_in_comdat(ObjBuilder* ob, const ObjSym* s) {
+ const Section* sec;
+ if (!s || s->section_id == OBJ_SEC_NONE) return 0;
+ sec = obj_section_get(ob, s->section_id);
+ return sec && (sec->flags & SF_GROUP);
+}
+
void link_resolve_symbols(Linker* l, LinkImage* img) {
u32 ii;
for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
@@ -146,12 +161,29 @@ void link_resolve_symbols(Linker* l, LinkImage* img) {
m->sym[e.id] = existing;
} else if (new_strength == old_strength &&
new_strength == bind_strength(SB_GLOBAL)) {
- size_t namelen;
- const char* nm = pool_str(l->c->global, s->name, &namelen);
- compiler_panic(l->c, no_loc(),
- "link: duplicate definition of "
- "global symbol '%.*s'",
- (int)namelen, nm);
+ /* COFF SELECTANY: if both defs are in COMDAT sections,
+ * keep the earlier one and discard the new section. */
+ ObjBuilder* prev_ob = (prev->input_id != LINK_INPUT_NONE)
+ ? LinkInputs_at(&l->inputs,
+ prev->input_id - 1)->obj
+ : NULL;
+ const ObjSym* prev_os = prev_ob
+ ? obj_symbol_get(prev_ob, prev->obj_sym)
+ : NULL;
+ if (prev_ob && prev_os &&
+ obj_sym_defined_in_comdat(prev_ob, prev_os) &&
+ obj_sym_defined_in_comdat(ob, s)) {
+ m->sym[e.id] = existing;
+ if (s->section_id < m->nsection)
+ m->comdat_discarded[s->section_id] = 1;
+ } else {
+ size_t namelen;
+ const char* nm = pool_str(l->c->global, s->name, &namelen);
+ compiler_panic(l->c, no_loc(),
+ "link: duplicate definition of "
+ "global symbol '%.*s'",
+ (int)namelen, nm);
+ }
} else {
m->sym[e.id] = existing;
}
@@ -227,6 +259,71 @@ void link_resolve_undefs(Linker* l, LinkImage* img) {
continue;
}
}
+ /* COFF WEAK_EXTERNAL alias fallback: cfree drops the aux TagIndex
+ * at read time (see coff_read.c step "WEAK_EXTERNAL primary"), so
+ * the alias relationship is recovered here via the mingw single-
+ * underscore naming convention. e.g. `__set_app_type` aliases to
+ * `_set_app_type`; `__imp___set_app_type` aliases to
+ * `__imp__set_app_type`. Try the de-underscored variant first,
+ * then the re-underscored one. Look in both image globals (for
+ * defined-in-input aliases) and DSO exports (for DLL imports).
+ *
+ * Applied to both WEAK (alias-declarator members) and GLOBAL
+ * undefs (user references like crt2.o's call to `__set_app_type`)
+ * because the alias relationship is purely a naming convention in
+ * the mingw CRT — losing the aux TagIndex means we can't tell
+ * which side is the alias declarator. */
+ if (l->c->target.obj == CFREE_OBJ_COFF && s->name != 0) {
+ size_t nlen;
+ const char* nm = pool_str(l->c->global, s->name, &nlen);
+ Sym candidates[2] = {0, 0};
+ u32 ncand = 0;
+ if (nm && nlen >= 2 && nm[0] == '_') {
+ candidates[ncand++] =
+ pool_intern(l->c->global, nm + 1, (u32)(nlen - 1u));
+ }
+ if (nm && nlen > 0) {
+ char* buf = (char*)arena_array(l->c->scratch, char, nlen + 1u);
+ buf[0] = '_';
+ memcpy(buf + 1, nm, nlen);
+ candidates[ncand++] = pool_intern(l->c->global, buf, (u32)(nlen + 1u));
+ }
+ int resolved = 0;
+ for (u32 ci = 0; !resolved && ci < ncand; ++ci) {
+ Sym alias = candidates[ci];
+ if (alias == 0) continue;
+ LinkSymId hit = symhash_get(&img->globals, alias);
+ if (hit != LINK_SYM_NONE) {
+ LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1);
+ if (def->defined || def->imported) {
+ s->name = def->name;
+ s->section_id = def->section_id;
+ s->value = def->value;
+ s->vaddr = def->vaddr;
+ s->kind = def->kind;
+ s->defined = def->defined;
+ s->imported = def->imported;
+ s->dso_input_id = def->dso_input_id;
+ if (!s->defined && !s->imported) {
+ s->kind = SK_ABS;
+ s->vaddr = 0;
+ s->defined = 1;
+ }
+ resolved = 1;
+ break;
+ }
+ }
+ LinkInputId dso = find_dso_export(l, alias);
+ if (dso != LINK_INPUT_NONE) {
+ s->name = alias;
+ s->imported = 1;
+ s->dso_input_id = dso;
+ resolved = 1;
+ break;
+ }
+ }
+ if (resolved) continue;
+ }
if (s->bind == SB_WEAK) {
s->kind = SK_ABS;
s->vaddr = 0;
@@ -404,10 +501,12 @@ void link_gc_compute(Linker* l, LinkImage* img, GcLive* g) {
if (!l->gc_sections) {
for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ InputMap* m = &img->input_maps[ii];
u32 nsec = obj_section_count(ob);
for (j = 1; j < nsec; ++j) {
const Section* s = obj_section_get(ob, j);
- if (s && link_section_kept(s)) g->marks[ii][j] = 1;
+ if (s && link_section_kept(s) && !m->comdat_discarded[j])
+ g->marks[ii][j] = 1;
}
}
return;
@@ -417,11 +516,13 @@ void link_gc_compute(Linker* l, LinkImage* img, GcLive* g) {
for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ InputMap* m = &img->input_maps[ii];
u32 nsec = obj_section_count(ob);
for (j = 1; j < nsec; ++j) {
const Section* s = obj_section_get(ob, j);
int root;
if (!s || !link_section_kept(s)) continue;
+ if (m->comdat_discarded[j]) continue;
root = (s->flags & SF_RETAIN) || s->sem == SSEM_INIT_ARRAY ||
s->sem == SSEM_FINI_ARRAY || s->sem == SSEM_PREINIT_ARRAY;
if (root) gc_mark(g, &q, h, ii, j);
@@ -503,13 +604,23 @@ static void include_archive_member(Linker* l, const LinkArchive* ar,
LinkInput* in;
LinkInputId id;
u32 idx;
+ Sym coff_dll = 0;
if (mem->included) return;
in = LinkInputs_push(&l->inputs, &idx);
if (!in)
compiler_panic(l->c, no_loc(), "link: oom growing inputs (archive member)");
id = (LinkInputId)(idx + 1u);
in->id = id;
- in->kind = LINK_INPUT_OBJ_BYTES;
+ /* PE/COFF short-import shim: read_coff_short_import stashes the
+ * providing DLL name on the ObjBuilder. Such members behave like
+ * DSO inputs — symbols are exports, not local definitions — so route
+ * through LINK_INPUT_DSO_BYTES with the DLL name as the soname. */
+ if (mem->obj && obj_get_coff_import_dll(mem->obj, &coff_dll) && coff_dll) {
+ in->kind = LINK_INPUT_DSO_BYTES;
+ in->soname = coff_dll;
+ } else {
+ in->kind = LINK_INPUT_OBJ_BYTES;
+ }
in->order = ar->order;
in->obj = mem->obj;
in->name = mem->name;
@@ -531,10 +642,21 @@ static void scan_presence_before(Linker* l, u32 max_order, SymHash* defined,
const ObjSym* s = e.sym;
if (s->name == 0) continue;
if (s->bind == SB_LOCAL) continue;
- if (obj_sym_is_logical_undef(s))
+ if (obj_sym_is_logical_undef(s)) {
+ /* Match the spurious-UNDEF prune in link_resolve (line 109) and
+ * obj_sweep_dead at .o emit (obj.c:513): an unreferenced
+ * global/weak extern declaration is a header artifact, not a
+ * real demand to pull from an archive. Without this check the
+ * C frontend's per-extern undef synthesis (e.g. every prototype
+ * in <math.h>) drags in matching archive members even when the
+ * user's source never references them. */
+ if (!s->referenced &&
+ (s->bind == SB_GLOBAL || s->bind == SB_WEAK))
+ continue;
symhash_set(undefs, s->name, 1u);
- else
+ } else {
symhash_set(defined, s->name, 1u);
+ }
}
obj_symiter_free(it);
}
@@ -562,7 +684,7 @@ static int inputs_have_defined_ifunc_before(Linker* l, u32 max_order) {
}
static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined,
- const SymHash* wanted) {
+ const SymHash* wanted, int coff_target) {
ObjSymIter* it;
ObjSymEntry e;
int hit = 0;
@@ -570,7 +692,16 @@ static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined,
while (obj_symiter_next(it, &e)) {
const ObjSym* s = e.sym;
if (s->name == 0) continue;
- if (s->kind == SK_UNDEF) continue;
+ /* In COFF archives, WEAK_EXTERNAL alias declarations are read as
+ * SB_WEAK + SK_UNDEF (cfree has no native alias model — see
+ * coff_read.c step "WEAK_EXTERNAL primary"). The archive's symbol
+ * map still lists the member as the canonical provider of that
+ * name, so treat such weak undefs as defining for the archive-pull
+ * decision. The actual alias-to-target resolution happens later in
+ * link_resolve_undefs. */
+ if (s->kind == SK_UNDEF) {
+ if (!(coff_target && s->bind == SB_WEAK)) continue;
+ }
if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue;
if (symhash_get(wanted, s->name) == LINK_SYM_NONE) continue;
if (symhash_get(defined, s->name) != LINK_SYM_NONE) continue;
@@ -581,6 +712,78 @@ static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined,
return hit;
}
+/* Synthesize an ObjBuilder providing the mingw CRT ctor/dtor list
+ * boundary symbols (`__CTOR_LIST__`, `__CTOR_END__`, `__DTOR_LIST__`,
+ * `__DTOR_END__`) backed by a 16-byte zero blob. mingw's gccmain.o
+ * references these and walks them at program startup; lld/binutils
+ * generate them via the linker script's `.ctors` / `.dtors` rules.
+ * cfree has no script for PE, so we inject an equivalent here.
+ *
+ * Zero contents are intentional for the empty-list case:
+ * - __do_global_ctors loads `*(u32*)__CTOR_LIST__`; sees 0; cbz
+ * short-circuit returns without iterating.
+ * - __do_global_dtors loads `*(u64*)__DTOR_LIST__`; sees 0; cbz
+ * short-circuit returns.
+ *
+ * For programs that emit real ctor/dtor sections this synth would
+ * need to coordinate with .ctors/.dtors layout; v1 covers the empty
+ * case (hello-world through mingw CRT). */
+void link_synth_coff_ctor_dtor_list(Linker* l) {
+ ObjBuilder* ob;
+ ObjSecId sid;
+ static const u8 kZeros[16] = {0};
+ /* AArch64 __chkstk: probes `x15 * 16` bytes of stack one page at a
+ * time, then returns. Mirrors the LLVM compiler-rt implementation
+ * (chkstk.S in builtins/aarch64). 28 bytes. */
+ static const u8 kAa64Chkstk[28] = {
+ 0xf0, 0xed, 0x7c, 0xd3, /* lsl x16, x15, #4 */
+ 0xf1, 0x03, 0x00, 0x91, /* mov x17, sp */
+ 0x31, 0x06, 0x40, 0xd1, /* sub x17, x17, #0x1, lsl #12 */
+ 0x10, 0x06, 0x40, 0xf1, /* subs x16, x16, #0x1, lsl #12 */
+ 0x3f, 0x02, 0x40, 0xf9, /* ldr xzr, [x17] */
+ 0xac, 0xff, 0xff, 0x54, /* b.gt #-0x14 */
+ 0xc0, 0x03, 0x5f, 0xd6, /* ret */
+ };
+ LinkInput* in;
+ u32 idx;
+ if (!l || l->c->target.obj != CFREE_OBJ_COFF) return;
+ ob = obj_new(l->c);
+ if (!ob) return;
+ sid = obj_section_ex(ob, pool_intern_cstr(l->c->global, ".rdata$ctors"),
+ SEC_RODATA, SSEM_PROGBITS, SF_ALLOC | SF_RETAIN, 16,
+ 0u, 0u, 0u);
+ obj_section_replace_bytes(ob, sid, kZeros, sizeof(kZeros));
+ obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__CTOR_LIST__"),
+ SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0);
+ obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__CTOR_END__"),
+ SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0);
+ obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__DTOR_LIST__"),
+ SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0);
+ obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__DTOR_END__"),
+ SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0);
+ /* __chkstk: only the aa64 variant is synthesized here; x64 codegen
+ * already emits inline probes (or links libmingwex's __chkstk
+ * which is a plain object, not an ARM64EC alias). */
+ if (l->c->target.arch == CFREE_ARCH_ARM_64) {
+ ObjSecId tsid =
+ obj_section_ex(ob, pool_intern_cstr(l->c->global, ".text$chkstk"),
+ SEC_TEXT, SSEM_PROGBITS,
+ SF_ALLOC | SF_EXEC | SF_RETAIN, 4, 0u, 0u, 0u);
+ obj_section_replace_bytes(ob, tsid, kAa64Chkstk, sizeof(kAa64Chkstk));
+ obj_symbol_ex(ob, pool_intern_cstr(l->c->global, "__chkstk"), SB_GLOBAL,
+ SV_DEFAULT, SK_FUNC, tsid, 0, sizeof(kAa64Chkstk), 0);
+ }
+ obj_finalize(ob);
+ in = LinkInputs_push(&l->inputs, &idx);
+ if (!in) compiler_panic(l->c, no_loc(), "link: oom growing inputs (synth)");
+ in->id = (LinkInputId)(idx + 1u);
+ in->kind = LINK_INPUT_OBJ_BYTES;
+ in->order = l->next_input_order++;
+ in->obj = ob;
+ in->name = pool_intern_cstr(l->c->global, "<cfree-synth-coff-runtime>");
+ in->soname = 0;
+}
+
void link_ingest_archives(Linker* l) {
u32 a, m;
if (LinkArchives_count(&l->archives) == 0) return;
@@ -588,8 +791,12 @@ void link_ingest_archives(Linker* l) {
for (a = 0; a < LinkArchives_count(&l->archives); ++a) {
LinkArchive* ar = LinkArchives_at(&l->archives, a);
if (!ar->whole_archive) continue;
- for (m = 0; m < ar->nmembers; ++m)
+ for (m = 0; m < ar->nmembers; ++m) {
+ /* obj==NULL is the long-form COFF head/trailer skip path
+ * (set by link_add_archive_bytes). Drop them silently. */
+ if (!ar->members[m].obj) continue;
include_archive_member(l, ar, &ar->members[m]);
+ }
}
for (a = 0; a < LinkArchives_count(&l->archives); ++a) {
@@ -608,10 +815,12 @@ void link_ingest_archives(Linker* l) {
symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE)
symhash_set(&undefs, want_ifunc_init, 1u);
+ int coff_target = (l->c->target.obj == CFREE_OBJ_COFF);
for (m = 0; m < ar->nmembers; ++m) {
LinkArchiveMember* mem = &ar->members[m];
if (mem->included) continue;
- if (!member_satisfies(mem, &defined, &undefs)) continue;
+ if (!mem->obj) continue; /* long-form skip (head/trailer) */
+ if (!member_satisfies(mem, &defined, &undefs, coff_target)) continue;
include_archive_member(l, ar, mem);
changed = 1;
}
diff --git a/src/obj/coff.h b/src/obj/coff.h
@@ -0,0 +1,598 @@
+/* PE/COFF wire-format constants, structs, and per-arch reloc translators
+ * shared between obj/coff_emit.c, obj/coff_read.c, and link/link_coff.c
+ * (none of which exist yet).
+ *
+ * Private to src/. The public ObjBuilder/Linker surface is format-neutral
+ * (obj/obj.h, link/link.h); the PE/COFF spelling of those abstractions
+ * only exists inside libcfree.
+ *
+ * Scope: 64-bit little-endian only — IMAGE_FILE_MACHINE_AMD64 (x86_64)
+ * and IMAGE_FILE_MACHINE_ARM64 (aarch64). 32-bit (i386 win32) and
+ * big-endian variants are out of scope. The per-arch reloc mapping is
+ * split across coff_reloc_<arch>.c, mirroring the ELF arrangement;
+ * emit_coff and the linker dispatch to the right translator by
+ * Compiler.target.arch. */
+
+#ifndef CFREE_OBJ_COFF_H
+#define CFREE_OBJ_COFF_H
+
+#include <cfree/core.h>
+
+#include "core/bytes.h"
+#include "core/core.h"
+#include "obj/obj.h"
+
+/* ---- file header (IMAGE_FILE_HEADER) ----
+ * On-disk: 20 bytes, little-endian, no padding. */
+#define COFF_FILE_HEADER_SIZE 20u
+
+typedef struct ImageFileHeader {
+ u16 Machine; /* IMAGE_FILE_MACHINE_* */
+ u16 NumberOfSections;
+ u32 TimeDateStamp; /* zero for reproducible builds */
+ u32 PointerToSymbolTable; /* file offset, or 0 if no symtab */
+ u32 NumberOfSymbols; /* counts aux records too */
+ u16 SizeOfOptionalHeader; /* 0 for .obj, 240 for PE32+ image */
+ u16 Characteristics; /* IMAGE_FILE_* */
+} ImageFileHeader;
+
+/* Machine types. Only AMD64 and ARM64 are emitted/read by cfree; the
+ * rest are listed for completeness so readers can give a useful
+ * "unsupported machine" diagnostic instead of "unknown machine". */
+#define IMAGE_FILE_MACHINE_UNKNOWN 0x0000u
+#define IMAGE_FILE_MACHINE_AMD64 0x8664u /* x86_64, cfree supports */
+#define IMAGE_FILE_MACHINE_ARM64 0xAA64u /* aarch64, cfree supports */
+#define IMAGE_FILE_MACHINE_ARM64EC 0xA641u /* ARM64EC — readers alias
+ * to AArch64 (encoding is
+ * identical, only ABI
+ * differs). llvm-mingw's
+ * compiler-rt builtins ship
+ * as ARM64EC objects. */
+/* Not supported by cfree (here for diagnostic recognition only): */
+#define IMAGE_FILE_MACHINE_I386 0x014Cu
+#define IMAGE_FILE_MACHINE_ARM 0x01C0u
+#define IMAGE_FILE_MACHINE_ARMNT 0x01C4u
+#define IMAGE_FILE_MACHINE_IA64 0x0200u
+#define IMAGE_FILE_MACHINE_RISCV64 0x5064u
+
+/* Characteristics flags (subset cfree handles). */
+#define IMAGE_FILE_RELOCS_STRIPPED 0x0001u
+#define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002u
+#define IMAGE_FILE_LARGE_ADDRESS_AWARE 0x0020u
+#define IMAGE_FILE_DEBUG_STRIPPED 0x0200u
+#define IMAGE_FILE_DLL 0x2000u
+
+/* ---- DOS header + PE signature ----
+ * cfree only emits a minimal DOS stub for executable images: 'MZ'
+ * magic and the e_lfanew offset pointing at "PE\0\0". The remaining
+ * legacy fields are zeroed but kept named for clarity. The DOS stub
+ * program (typically prints "This program cannot be run in DOS mode")
+ * is emitted as a separate byte blob after this header. */
+#define COFF_DOS_HEADER_SIZE 64u
+#define IMAGE_DOS_SIGNATURE 0x5A4Du /* 'MZ' little-endian */
+#define IMAGE_NT_SIGNATURE 0x00004550u /* "PE\0\0" little-endian */
+
+typedef struct ImageDosHeader {
+ u16 e_magic; /* IMAGE_DOS_SIGNATURE */
+ u16 e_cblp;
+ u16 e_cp;
+ u16 e_crlc;
+ u16 e_cparhdr;
+ u16 e_minalloc;
+ u16 e_maxalloc;
+ u16 e_ss;
+ u16 e_sp;
+ u16 e_csum;
+ u16 e_ip;
+ u16 e_cs;
+ u16 e_lfarlc;
+ u16 e_ovno;
+ u16 e_res[4];
+ u16 e_oemid;
+ u16 e_oeminfo;
+ u16 e_res2[10];
+ u32 e_lfanew; /* file offset of "PE\0\0" */
+} ImageDosHeader;
+
+/* ---- optional header (PE32+, IMAGE_OPTIONAL_HEADER64) ----
+ * On-disk size for PE32+ with 16 DataDirectory entries = 240 bytes:
+ * 28 (standard) + 88 (windows-specific) + 16*8 (data directories). */
+#define COFF_OPT_HDR64_SIZE 240u
+#define COFF_DATA_DIRECTORY_SIZE 8u
+#define COFF_NUM_DATA_DIRECTORIES 16u
+
+#define IMAGE_NT_OPTIONAL_HDR64_MAGIC 0x020Bu
+
+/* Subsystem (Subsystem field). */
+#define IMAGE_SUBSYSTEM_UNKNOWN 0u
+#define IMAGE_SUBSYSTEM_NATIVE 1u
+#define IMAGE_SUBSYSTEM_WINDOWS_GUI 2u
+#define IMAGE_SUBSYSTEM_WINDOWS_CUI 3u /* console */
+
+/* DllCharacteristics. */
+#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA 0x0020u
+#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE 0x0040u
+#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT 0x0100u
+#define IMAGE_DLLCHARACTERISTICS_NO_SEH 0x0400u
+#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000u
+
+/* DataDirectory indices into ImageOptionalHeader64.DataDirectory[]. */
+#define IMAGE_DIRECTORY_ENTRY_EXPORT 0
+#define IMAGE_DIRECTORY_ENTRY_IMPORT 1
+#define IMAGE_DIRECTORY_ENTRY_RESOURCE 2
+#define IMAGE_DIRECTORY_ENTRY_EXCEPTION 3
+#define IMAGE_DIRECTORY_ENTRY_SECURITY 4
+#define IMAGE_DIRECTORY_ENTRY_BASERELOC 5
+#define IMAGE_DIRECTORY_ENTRY_DEBUG 6
+#define IMAGE_DIRECTORY_ENTRY_ARCHITECTURE 7
+#define IMAGE_DIRECTORY_ENTRY_GLOBALPTR 8
+#define IMAGE_DIRECTORY_ENTRY_TLS 9
+#define IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG 10
+#define IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT 11
+#define IMAGE_DIRECTORY_ENTRY_IAT 12
+#define IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT 13
+#define IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR 14
+/* index 15 is reserved (must be zero) */
+
+typedef struct ImageDataDirectory {
+ u32 VirtualAddress; /* RVA */
+ u32 Size;
+} ImageDataDirectory;
+
+typedef struct ImageOptionalHeader64 {
+ /* Standard fields (28 bytes for PE32+). */
+ u16 Magic; /* IMAGE_NT_OPTIONAL_HDR64_MAGIC */
+ u8 MajorLinkerVersion;
+ u8 MinorLinkerVersion;
+ u32 SizeOfCode;
+ u32 SizeOfInitializedData;
+ u32 SizeOfUninitializedData;
+ u32 AddressOfEntryPoint; /* RVA of _start */
+ u32 BaseOfCode;
+ /* Windows-specific (88 bytes for PE32+). */
+ u64 ImageBase; /* preferred load address */
+ u32 SectionAlignment; /* in-memory alignment, >= page */
+ u32 FileAlignment; /* on-disk alignment */
+ u16 MajorOperatingSystemVersion;
+ u16 MinorOperatingSystemVersion;
+ u16 MajorImageVersion;
+ u16 MinorImageVersion;
+ u16 MajorSubsystemVersion;
+ u16 MinorSubsystemVersion;
+ u32 Win32VersionValue; /* reserved, zero */
+ u32 SizeOfImage; /* in-memory size, SectionAlignment-padded */
+ u32 SizeOfHeaders; /* file offset of first section's raw data */
+ u32 CheckSum;
+ u16 Subsystem; /* IMAGE_SUBSYSTEM_* */
+ u16 DllCharacteristics; /* IMAGE_DLLCHARACTERISTICS_* */
+ u64 SizeOfStackReserve;
+ u64 SizeOfStackCommit;
+ u64 SizeOfHeapReserve;
+ u64 SizeOfHeapCommit;
+ u32 LoaderFlags; /* reserved, zero */
+ u32 NumberOfRvaAndSizes; /* COFF_NUM_DATA_DIRECTORIES */
+ /* Data directories (128 bytes = 16 * 8). */
+ ImageDataDirectory DataDirectory[COFF_NUM_DATA_DIRECTORIES];
+} ImageOptionalHeader64;
+
+/* ---- section header (IMAGE_SECTION_HEADER) ----
+ * On-disk: 40 bytes, no padding. */
+#define COFF_SECTION_HEADER_SIZE 40u
+
+/* Name field convention: 8 raw bytes. If the section name is <= 8 chars
+ * the bytes are the name, zero-padded (not necessarily NUL-terminated
+ * if exactly 8). For longer names (only legal in object files, not
+ * images) the form is "/<decimal-offset>" where <offset> is the
+ * little-endian decimal offset into the string table. Emit/read paths
+ * must marshal this convention explicitly. */
+typedef struct ImageSectionHeader {
+ char Name[8];
+ u32 VirtualSize; /* size in image; for .obj usually 0 */
+ u32 VirtualAddress; /* RVA in image; for .obj usually 0 */
+ u32 SizeOfRawData;
+ u32 PointerToRawData; /* file offset */
+ u32 PointerToRelocations; /* file offset of reloc array */
+ u32 PointerToLinenumbers; /* file offset of COFF linenumbers (legacy) */
+ u16 NumberOfRelocations;
+ u16 NumberOfLinenumbers;
+ u32 Characteristics; /* IMAGE_SCN_* */
+} ImageSectionHeader;
+
+/* Section characteristics flags. */
+#define IMAGE_SCN_CNT_CODE 0x00000020u
+#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040u
+#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080u
+#define IMAGE_SCN_LNK_INFO 0x00000200u
+#define IMAGE_SCN_LNK_REMOVE 0x00000800u
+#define IMAGE_SCN_LNK_COMDAT 0x00001000u
+#define IMAGE_SCN_GPREL 0x00008000u
+#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000u
+#define IMAGE_SCN_MEM_SHARED 0x10000000u
+#define IMAGE_SCN_MEM_EXECUTE 0x20000000u
+#define IMAGE_SCN_MEM_READ 0x40000000u
+#define IMAGE_SCN_MEM_WRITE 0x80000000u
+
+/* Alignment lives in bits 20..23 of Characteristics. Encoding is
+ * (log2(align) + 1) << 20: ALIGN_1BYTES = 1<<20, ALIGN_2BYTES = 2<<20,
+ * ..., ALIGN_8192BYTES = 14<<20. Zero in the field means "default"
+ * (16-byte for code). */
+#define IMAGE_SCN_ALIGN_1BYTES 0x00100000u
+#define IMAGE_SCN_ALIGN_2BYTES 0x00200000u
+#define IMAGE_SCN_ALIGN_4BYTES 0x00300000u
+#define IMAGE_SCN_ALIGN_8BYTES 0x00400000u
+#define IMAGE_SCN_ALIGN_16BYTES 0x00500000u
+#define IMAGE_SCN_ALIGN_32BYTES 0x00600000u
+#define IMAGE_SCN_ALIGN_64BYTES 0x00700000u
+#define IMAGE_SCN_ALIGN_128BYTES 0x00800000u
+#define IMAGE_SCN_ALIGN_256BYTES 0x00900000u
+#define IMAGE_SCN_ALIGN_512BYTES 0x00A00000u
+#define IMAGE_SCN_ALIGN_1024BYTES 0x00B00000u
+#define IMAGE_SCN_ALIGN_2048BYTES 0x00C00000u
+#define IMAGE_SCN_ALIGN_4096BYTES 0x00D00000u
+#define IMAGE_SCN_ALIGN_8192BYTES 0x00E00000u
+#define IMAGE_SCN_ALIGN_MASK 0x00F00000u
+
+/* Encode an alignment given as log2(bytes): align=1 (2^0=1B) -> 1<<20,
+ * align=13 (2^13=8192B) -> 14<<20. */
+#define IMAGE_SCN_ALIGN_FROM_LOG2(n) (((u32)((n) + 1u)) << 20)
+
+/* ---- symbol record (IMAGE_SYMBOL) ----
+ * On-disk: 18 bytes per record, packed to 2-byte alignment (pragma
+ * pack(2) in the official headers). The host C struct below would
+ * have sizeof >= 20 due to padding; emit/read MUST marshal field by
+ * field — never write sizeof(ImageSymbol). The COFF_SYMBOL_SIZE
+ * constant is the source of truth. */
+#define COFF_SYMBOL_SIZE 18u
+
+typedef struct ImageSymbol {
+ union {
+ char ShortName[8]; /* in-place if name <= 8 bytes */
+ struct {
+ u32 Zeroes; /* 0 signals strtab lookup */
+ u32 Offset; /* string-table offset (>= 4) */
+ } LongName;
+ } Name;
+ u32 Value;
+ i16 SectionNumber; /* 1-based; specials below */
+ u16 Type; /* low4=base, high12=derived */
+ u8 StorageClass; /* IMAGE_SYM_CLASS_* */
+ u8 NumberOfAuxSymbols;
+} ImageSymbol;
+
+/* Section number specials (i16-valued sentinel values). */
+#define IMAGE_SYM_UNDEFINED 0
+#define IMAGE_SYM_ABSOLUTE (-1)
+#define IMAGE_SYM_DEBUG (-2)
+
+/* Type encoding. Low 4 bits = base type, high 12 bits = derived. The
+ * only derived-type bit cfree distinguishes is FUNCTION (so a global
+ * is marked as a function when (Type >> 8) == DTYPE_FUNCTION). */
+#define IMAGE_SYM_TYPE_NULL 0u
+#define IMAGE_SYM_DTYPE_NULL 0u
+#define IMAGE_SYM_DTYPE_FUNCTION 2u
+#define COFF_SYM_TYPE_FUNCTION (IMAGE_SYM_DTYPE_FUNCTION << 8)
+
+/* Storage classes. The subset cfree emits is EXTERNAL, STATIC, FILE,
+ * SECTION, WEAK_EXTERNAL; readers must additionally skip LABEL and
+ * FUNCTION (.bf/.ef debug pairs). END_OF_FUNCTION is signed -1 (the
+ * field is u8 so the wire value is 0xFF). */
+#define IMAGE_SYM_CLASS_END_OF_FUNCTION 0xFFu
+#define IMAGE_SYM_CLASS_NULL 0u
+#define IMAGE_SYM_CLASS_AUTOMATIC 1u
+#define IMAGE_SYM_CLASS_EXTERNAL 2u
+#define IMAGE_SYM_CLASS_STATIC 3u
+#define IMAGE_SYM_CLASS_REGISTER 4u
+#define IMAGE_SYM_CLASS_EXTERNAL_DEF 5u
+#define IMAGE_SYM_CLASS_LABEL 6u
+#define IMAGE_SYM_CLASS_UNDEFINED_LABEL 7u
+#define IMAGE_SYM_CLASS_FUNCTION 101u /* .bf / .ef markers */
+#define IMAGE_SYM_CLASS_FILE 103u /* aux records hold filename */
+#define IMAGE_SYM_CLASS_SECTION 104u
+#define IMAGE_SYM_CLASS_WEAK_EXTERNAL 105u
+#define IMAGE_SYM_CLASS_CLR_TOKEN 107u
+
+/* ---- aux records ----
+ * Each aux record is exactly COFF_SYMBOL_SIZE (18 bytes) on disk; the
+ * structs below are wire-shaped but again must be marshalled field by
+ * field rather than via sizeof. */
+#define COFF_AUX_SECTION_SIZE 18u
+#define COFF_AUX_WEAKEXTERN_SIZE 18u
+#define COFF_AUX_FILE_SIZE 18u
+#define COFF_AUX_FUNCTION_SIZE 18u
+
+/* Follows a STATIC symbol whose Value is 0 and SectionNumber is the
+ * section's 1-based index. Encodes per-section metadata + COMDAT
+ * grouping. */
+typedef struct ImageAuxSymbolSection {
+ u32 Length; /* section's SizeOfRawData */
+ u16 NumberOfRelocations;
+ u16 NumberOfLinenumbers;
+ u32 CheckSum; /* COMDAT checksum, 0 otherwise */
+ u16 Number; /* associated section idx for COMDAT */
+ u8 Selection; /* IMAGE_COMDAT_SELECT_* */
+ u8 Unused[3];
+} ImageAuxSymbolSection;
+
+/* Follows a WEAK_EXTERNAL symbol. TagIndex is the symbol-table index
+ * of the fall-back symbol used when the weak ref is unresolved. */
+typedef struct ImageAuxSymbolWeakExternal {
+ u32 TagIndex;
+ u32 Characteristics; /* IMAGE_WEAK_EXTERN_SEARCH_* */
+ u8 Unused[10];
+} ImageAuxSymbolWeakExternal;
+
+/* Follows a FILE symbol. For source paths longer than 18 bytes the
+ * NumberOfAuxSymbols on the parent FILE record is >1 and the name
+ * spans multiple aux records concatenated. */
+typedef struct ImageAuxSymbolFile {
+ char FileName[18];
+} ImageAuxSymbolFile;
+
+/* Follows a FUNCTION (.bf/.ef) symbol. cfree does not emit these but
+ * the reader must skip them when walking the symbol table. */
+typedef struct ImageAuxSymbolFunction {
+ u32 TagIndex;
+ u32 TotalSize;
+ u32 PointerToLinenumber;
+ u32 PointerToNextFunction;
+ u8 Unused[2];
+} ImageAuxSymbolFunction;
+
+/* COMDAT selection (ImageAuxSymbolSection.Selection). */
+#define IMAGE_COMDAT_SELECT_NODUPLICATES 1u
+#define IMAGE_COMDAT_SELECT_ANY 2u
+#define IMAGE_COMDAT_SELECT_SAME_SIZE 3u
+#define IMAGE_COMDAT_SELECT_EXACT_MATCH 4u
+#define IMAGE_COMDAT_SELECT_ASSOCIATIVE 5u
+#define IMAGE_COMDAT_SELECT_LARGEST 6u
+#define IMAGE_COMDAT_SELECT_NEWEST 7u
+
+/* Weak-external resolution policy. */
+#define IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY 1u
+#define IMAGE_WEAK_EXTERN_SEARCH_LIBRARY 2u
+#define IMAGE_WEAK_EXTERN_SEARCH_ALIAS 3u
+
+/* ---- relocation entry (IMAGE_RELOCATION) ----
+ * On-disk: 10 bytes per record, packed to 2-byte alignment. Same
+ * sizeof caveat as ImageSymbol — never write sizeof(ImageRelocation),
+ * use COFF_RELOC_SIZE. */
+#define COFF_RELOC_SIZE 10u
+
+typedef struct ImageRelocation {
+ u32 VirtualAddress; /* offset within the section being patched */
+ u32 SymbolTableIndex;
+ u16 Type; /* IMAGE_REL_<machine>_* */
+} ImageRelocation;
+
+/* ---- string table layout ----
+ * Immediately follows the symbol table on disk:
+ * [0..3] u32 total size in bytes, INCLUSIVE of these 4 bytes.
+ * [4..] concatenated NUL-terminated UTF-8 strings.
+ * Therefore the smallest legal offset for a name reference is 4, and
+ * Offset == 0 in the LongName form is reserved (means "no string").
+ * Empty string tables write the size field as 4 (i.e. no payload). */
+#define COFF_STRTAB_SIZE_FIELD_BYTES 4u
+
+/* ---- base relocation block (IMAGE_BASE_RELOCATION) ----
+ * Used in the .reloc directory of PE images. Each block describes
+ * fixups for one 4 KiB page: VirtualAddress is the page base RVA, the
+ * payload is (SizeOfBlock - 8) bytes of u16 entries packed as
+ * (type:4, offset:12) where offset is relative to VirtualAddress. */
+typedef struct ImageBaseRelocation {
+ u32 VirtualAddress;
+ u32 SizeOfBlock; /* header (8) + entries; padded to u32 */
+} ImageBaseRelocation;
+#define COFF_BASE_RELOCATION_SIZE 8u
+
+#define IMAGE_REL_BASED_ABSOLUTE 0u /* skip entry, used for padding */
+#define IMAGE_REL_BASED_HIGH 1u
+#define IMAGE_REL_BASED_LOW 2u
+#define IMAGE_REL_BASED_HIGHLOW 3u
+#define IMAGE_REL_BASED_HIGHADJ 4u
+#define IMAGE_REL_BASED_ARM_MOV32 5u
+#define IMAGE_REL_BASED_DIR64 10u /* the one used on x64 / arm64 */
+
+/* ---- export directory (IMAGE_EXPORT_DIRECTORY) ----
+ * One record, pointed at by IMAGE_DIRECTORY_ENTRY_EXPORT in the
+ * optional header. AddressOfFunctions is the EAT (u32 RVAs); the ENT
+ * (u32 RVAs at AddressOfNames) is parallel to the ordinal table
+ * (u16 ordinals at AddressOfNameOrdinals) and indexes into the EAT.
+ * An EAT entry whose RVA falls inside the export directory's own
+ * [VA, VA+Size) range is a *forwarder*: the bytes at that RVA are a
+ * "OTHERMODULE.OtherName" NUL-terminated string and the OS loader
+ * follows the chain at load time. */
+#define COFF_EXPORT_DIR_SIZE 40u
+
+typedef struct ImageExportDirectory {
+ u32 Characteristics;
+ u32 TimeDateStamp;
+ u16 MajorVersion;
+ u16 MinorVersion;
+ u32 Name; /* RVA of DLL name */
+ u32 Base; /* first ordinal */
+ u32 NumberOfFunctions;
+ u32 NumberOfNames;
+ u32 AddressOfFunctions; /* EAT: RVA[NumberOfFunctions] */
+ u32 AddressOfNames; /* ENT: RVA[NumberOfNames] */
+ u32 AddressOfNameOrdinals; /* parallel ordinals: u16[NumberOfNames] */
+} ImageExportDirectory;
+
+/* ---- import directory (IMAGE_IMPORT_DESCRIPTOR) ----
+ * Array of these, terminated by an all-zero entry, lives at the RVA
+ * named by IMAGE_DIRECTORY_ENTRY_IMPORT in the optional header.
+ * OriginalFirstThunk -> the import lookup table (read-only); FirstThunk
+ * -> the IAT (overwritten by the loader with resolved addresses). */
+#define COFF_IMPORT_DESCRIPTOR_SIZE 20u
+
+typedef struct ImageImportDescriptor {
+ u32 OriginalFirstThunk; /* RVA -> IMAGE_THUNK_DATA64[] (ILT) */
+ u32 TimeDateStamp;
+ u32 ForwarderChain;
+ u32 Name; /* RVA -> NUL-terminated DLL name */
+ u32 FirstThunk; /* RVA -> IMAGE_THUNK_DATA64[] (IAT) */
+} ImageImportDescriptor;
+
+/* Thunk entries are u64 on PE32+. If the high bit (IMAGE_ORDINAL_FLAG64)
+ * is set, the low 16 bits hold an ordinal. Otherwise the value is an
+ * RVA to an IMAGE_IMPORT_BY_NAME record. */
+#define IMAGE_ORDINAL_FLAG64 0x8000000000000000ull
+#define COFF_THUNK_DATA64_SIZE 8u
+
+typedef struct ImageImportByName {
+ u16 Hint; /* index hint into the DLL's export table */
+ /* char Name[]; NUL-terminated, followed by optional pad to even. */
+} ImageImportByName;
+
+/* ---- TLS directory (IMAGE_TLS_DIRECTORY64) ----
+ * Pointed at by IMAGE_DIRECTORY_ENTRY_TLS in the optional header. The
+ * loader walks the callbacks array (NUL-terminated) before main runs. */
+#define COFF_TLS_DIRECTORY64_SIZE 40u
+
+typedef struct ImageTlsDirectory64 {
+ u64 StartAddressOfRawData;
+ u64 EndAddressOfRawData;
+ u64 AddressOfIndex; /* VA of u32 _tls_index */
+ u64 AddressOfCallBacks; /* VA of NULL-terminated callback array */
+ u32 SizeOfZeroFill;
+ u32 Characteristics; /* alignment encoded as IMAGE_SCN_ALIGN_* */
+} ImageTlsDirectory64;
+
+/* ---- short import record (Microsoft .lib member) ----
+ * Inside an archive whose member-data starts with Sig1==0, Sig2==0xFFFF
+ * the rest of the member is this "short import" descriptor: a fixed
+ * 20-byte header followed by SizeOfData bytes containing two
+ * NUL-terminated strings — symbol name then DLL name. */
+#define COFF_IMPORT_OBJECT_HEADER_SIZE 20u
+#define IMPORT_OBJECT_HDR_SIG1 0x0000u
+#define IMPORT_OBJECT_HDR_SIG2 0xFFFFu
+
+typedef struct ImportObjectHeader {
+ u16 Sig1; /* IMPORT_OBJECT_HDR_SIG1 (0) */
+ u16 Sig2; /* IMPORT_OBJECT_HDR_SIG2 (0xFFFF) */
+ u16 Version;
+ u16 Machine; /* IMAGE_FILE_MACHINE_* */
+ u32 TimeDateStamp;
+ u32 SizeOfData; /* bytes after this header */
+ u16 OrdinalOrHint;
+ /* Bitfield encoded as a single u16 on the wire:
+ * Type:2, NameType:3, Reserved:11 (low-to-high). */
+ u16 TypeFlags;
+} ImportObjectHeader;
+
+#define IMPORT_OBJECT_CODE 0u
+#define IMPORT_OBJECT_DATA 1u
+#define IMPORT_OBJECT_CONST 2u
+
+#define IMPORT_OBJECT_ORDINAL 0u
+#define IMPORT_OBJECT_NAME 1u
+#define IMPORT_OBJECT_NAME_NOPREFIX 2u
+#define IMPORT_OBJECT_NAME_UNDECORATE 3u
+
+/* ---- debug directory (IMAGE_DEBUG_DIRECTORY) ----
+ * Pointed at by IMAGE_DIRECTORY_ENTRY_DEBUG. cfree emits a single
+ * IMAGE_DEBUG_TYPE_REPRO entry to mark the image as deterministic. */
+#define COFF_DEBUG_DIRECTORY_SIZE 28u
+
+typedef struct ImageDebugDirectory {
+ u32 Characteristics; /* reserved, zero */
+ u32 TimeDateStamp;
+ u16 MajorVersion;
+ u16 MinorVersion;
+ u32 Type; /* IMAGE_DEBUG_TYPE_* */
+ u32 SizeOfData;
+ u32 AddressOfRawData; /* RVA in image */
+ u32 PointerToRawData; /* file offset */
+} ImageDebugDirectory;
+
+#define IMAGE_DEBUG_TYPE_UNKNOWN 0u
+#define IMAGE_DEBUG_TYPE_COFF 1u
+#define IMAGE_DEBUG_TYPE_CODEVIEW 2u
+#define IMAGE_DEBUG_TYPE_MISC 4u
+#define IMAGE_DEBUG_TYPE_REPRO 16u /* deterministic-build marker */
+
+/* ---- AMD64 (x86_64) PE/COFF wire reloc types ----
+ * The REL32_N variants encode the PC base N bytes after the relocation
+ * field (so REL32_1 maps to a -1 implicit addend in cfree's S + A - P
+ * model). Plain REL32 is relative to the byte after the 4-byte field. */
+#define IMAGE_REL_AMD64_ABSOLUTE 0u
+#define IMAGE_REL_AMD64_ADDR64 1u /* 64-bit VA */
+#define IMAGE_REL_AMD64_ADDR32 2u /* 32-bit VA */
+#define IMAGE_REL_AMD64_ADDR32NB 3u /* 32-bit RVA (image-relative) */
+#define IMAGE_REL_AMD64_REL32 4u /* 32-bit relative to next inst */
+#define IMAGE_REL_AMD64_REL32_1 5u
+#define IMAGE_REL_AMD64_REL32_2 6u
+#define IMAGE_REL_AMD64_REL32_3 7u
+#define IMAGE_REL_AMD64_REL32_4 8u
+#define IMAGE_REL_AMD64_REL32_5 9u
+#define IMAGE_REL_AMD64_SECTION 10u /* 16-bit section index */
+#define IMAGE_REL_AMD64_SECREL 11u /* 32-bit section-relative */
+#define IMAGE_REL_AMD64_SECREL7 12u
+#define IMAGE_REL_AMD64_TOKEN 13u
+#define IMAGE_REL_AMD64_SREL32 14u
+#define IMAGE_REL_AMD64_PAIR 15u
+#define IMAGE_REL_AMD64_SSPAN32 16u
+
+/* ---- ARM64 PE/COFF wire reloc types ---- */
+#define IMAGE_REL_ARM64_ABSOLUTE 0u
+#define IMAGE_REL_ARM64_ADDR32 1u
+#define IMAGE_REL_ARM64_ADDR32NB 2u
+#define IMAGE_REL_ARM64_BRANCH26 3u
+#define IMAGE_REL_ARM64_PAGEBASE_REL21 4u
+#define IMAGE_REL_ARM64_REL21 5u
+#define IMAGE_REL_ARM64_PAGEOFFSET_12A 6u
+#define IMAGE_REL_ARM64_PAGEOFFSET_12L 7u
+#define IMAGE_REL_ARM64_SECREL 8u
+#define IMAGE_REL_ARM64_SECREL_LOW12A 9u
+#define IMAGE_REL_ARM64_SECREL_HIGH12A 10u
+#define IMAGE_REL_ARM64_SECREL_LOW12L 11u
+#define IMAGE_REL_ARM64_TOKEN 12u
+#define IMAGE_REL_ARM64_SECTION 13u
+#define IMAGE_REL_ARM64_ADDR64 14u
+#define IMAGE_REL_ARM64_BRANCH19 15u
+#define IMAGE_REL_ARM64_BRANCH14 16u
+#define IMAGE_REL_ARM64_REL32 17u
+
+/* ---- per-arch reloc translators ----
+ * Map cfree-canonical RelocKind <-> PE/COFF wire type. Contract
+ * matches elf_<arch>_reloc_{to,from}:
+ * _to: returns the wire type, or IMAGE_REL_*_ABSOLUTE (== 0) for
+ * unsupported input. Callers treat that as a panic trigger.
+ * _from: returns the canonical RelocKind, or (u32)-1 on unknown
+ * input. Callers diagnose the unknown wire value. */
+u32 coff_x86_64_reloc_to(u32 kind /* RelocKind */);
+u32 coff_x86_64_reloc_from(u32 wire_type);
+u32 coff_aarch64_reloc_to(u32 kind /* RelocKind */);
+u32 coff_aarch64_reloc_from(u32 wire_type);
+
+/* ---- little-endian byte writers/readers (Writer-based) ----
+ * Reads use rd_u*_le from core/bytes.h directly; only writes need the
+ * Writer-bridging wrappers. */
+
+static inline void coff_wr_u8(Writer* w, u32 v) {
+ u8 b = (u8)v;
+ cfree_writer_write(w, &b, 1);
+}
+
+static inline void coff_wr_u16(Writer* w, u32 v) {
+ u8 b[2];
+ wr_u16_le(b, (u16)v);
+ cfree_writer_write(w, b, 2);
+}
+
+static inline void coff_wr_u32(Writer* w, u32 v) {
+ u8 b[4];
+ wr_u32_le(b, v);
+ cfree_writer_write(w, b, 4);
+}
+
+static inline void coff_wr_u64(Writer* w, u64 v) {
+ u8 b[8];
+ wr_u64_le(b, v);
+ cfree_writer_write(w, b, 8);
+}
+
+static inline u16 coff_rd_u16(const u8* p) { return rd_u16_le(p); }
+static inline u32 coff_rd_u32(const u8* p) { return rd_u32_le(p); }
+static inline u64 coff_rd_u64(const u8* p) { return rd_u64_le(p); }
+
+#endif /* CFREE_OBJ_COFF_H */
diff --git a/src/obj/coff_emit.c b/src/obj/coff_emit.c
@@ -0,0 +1,731 @@
+/* PE/COFF relocatable .obj writer. Walks a finalized ObjBuilder and
+ * emits a 64-bit little-endian relocatable object via the supplied
+ * Writer. Counterpart to emit_elf / emit_macho.
+ *
+ * Layout strategy:
+ * 1. plan COFF sections (one per kept obj section), assigning
+ * Characteristics, alignment, raw size, and per-section reloc
+ * counts;
+ * 2. build the symbol table (synthesized per-section static symbols
+ * with section-definition aux records, plus file symbols and
+ * every ObjSym kept after sweep);
+ * 3. build per-section relocation records via the per-arch
+ * translator (arch_for_compiler(c)->coff->reloc_to);
+ * 4. assign file offsets:
+ * file header | section headers | (bytes + relocs)* | symtab | strtab
+ * 5. write the file in that order.
+ *
+ * 64-bit little-endian only — IMAGE_FILE_MACHINE_AMD64 (x86_64) and
+ * IMAGE_FILE_MACHINE_ARM64 (aarch64). Big-endian / ptr_size != 8 panic
+ * at entry.
+ *
+ * Section name mapping policy: we pass the cfree Section.name through
+ * verbatim to the COFF Name field. Callers / readers are expected to
+ * have stored COFF-shaped names (".text", ".rdata", ".tls$", etc.) at
+ * the obj layer; emit_coff does not rewrite ELF-style spellings like
+ * ".rodata" -> ".rdata". Names longer than 8 bytes spill into the
+ * string table with the "/<decimal-offset>" encoding.
+ *
+ * Addend handling: COFF stores the addend inline in the patched bytes
+ * (there is no addend field in IMAGE_RELOCATION). The ObjBuilder
+ * caller is responsible for having written the addend into the section
+ * bytes already — matching how MSVC / mingw emit. A nonzero
+ * Reloc::addend with has_explicit_addend set is rejected here as a
+ * known v1 limitation. */
+
+#include <string.h>
+
+#include "arch/arch.h"
+#include "core/arena.h"
+#include "core/buf.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/util.h"
+#include "obj/coff.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+static int coff_rel32_absorbs_minus4(CfreeArchKind arch, RelocKind kind,
+ i64 addend) {
+ if (arch != CFREE_ARCH_X86_64 || addend != -4) return 0;
+ switch (kind) {
+ case R_PC32:
+ case R_REL32:
+ case R_PLT32:
+ case R_X64_PLT32:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* ---- per-COFF-section plan record ---- */
+
+typedef struct CSec {
+ /* IMAGE_SECTION_HEADER fields (little-endian-encoded at write time). */
+ char name8[8]; /* Name field bytes; "/N" form if long name */
+ u32 virtual_size; /* nonzero for NOBITS (bss size) */
+ u32 size_of_raw_data; /* zero for NOBITS */
+ u32 pointer_to_raw_data;
+ u32 pointer_to_relocations;
+ u16 number_of_relocations;
+ u32 characteristics; /* IMAGE_SCN_* | ALIGN nibble */
+
+ /* Planning state. */
+ u32 align; /* in bytes, power of two */
+ u32 obj_sec; /* originating ObjSecId */
+ int is_nobits;
+ const Buf* obj_bytes; /* NULL when nobits */
+ u8* reloc_bytes; /* arena-allocated, nreloc * 10 bytes */
+ ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a group */
+} CSec;
+
+/* ---- emit ---- */
+
+static u32 log2_align(u32 a) {
+ u32 r = 0;
+ while ((1u << r) < a) ++r;
+ return r;
+}
+
+/* Map cfree section flags/sem to IMAGE_SCN_* Characteristics, leaving
+ * the alignment nibble for the caller to OR in. */
+static u32 sec_characteristics(const Section* s, int in_group) {
+ u32 r = 0;
+ int is_bss = (s->kind == SEC_BSS) || (s->sem == SSEM_NOBITS);
+ if (s->flags & SF_EXEC) {
+ r |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE;
+ } else if (is_bss) {
+ r |= IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ } else if (s->flags & SF_WRITE) {
+ r |= IMAGE_SCN_CNT_INITIALIZED_DATA;
+ } else if (s->flags & SF_ALLOC) {
+ /* Read-only allocated data (.rdata). */
+ r |= IMAGE_SCN_CNT_INITIALIZED_DATA;
+ }
+ if (s->flags & SF_ALLOC) r |= IMAGE_SCN_MEM_READ;
+ if (s->flags & SF_WRITE) r |= IMAGE_SCN_MEM_WRITE;
+ if (in_group) r |= IMAGE_SCN_LNK_COMDAT;
+ /* When a reader stashed format-specific flag bits on a COFF-origin
+ * section, OR them back in here. ext_type carries the raw
+ * Characteristics value (or zero if no override); ext_flags is a
+ * sibling bag for any bits the canonical mapping above would lose. */
+ if (s->ext_kind == OBJ_EXT_COFF) {
+ if (s->ext_type) {
+ /* Preserve the raw characteristics verbatim — overrides the
+ * canonical mapping. Keeps round-trip byte-stable for sections
+ * carrying CNT_INFO / LNK_REMOVE / MEM_DISCARDABLE / etc. */
+ r = s->ext_type & ~IMAGE_SCN_ALIGN_MASK;
+ }
+ r |= s->ext_flags;
+ }
+ return r;
+}
+
+/* Append `len` bytes of `s` followed by a single NUL to `b`, returning
+ * the offset at which `s` was placed. Dedupe linearly — strtabs are
+ * small enough that this is fine without a hash table, and the
+ * dedupe matches what binutils / llvm-objcopy emit. Mirror of the
+ * helper in elf_emit. */
+static u32 strtab_add(Buf* b, const char* s, u32 len) {
+ if (len == 0) return 0;
+ u32 total = buf_pos(b);
+ if (total > len) {
+ u8 stack[256];
+ u8* tmp =
+ total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1);
+ if (tmp) {
+ buf_flatten(b, tmp);
+ /* Skip the first 4 bytes (the size-prefix placeholder) when
+ * searching for matches. */
+ u32 start = COFF_STRTAB_SIZE_FIELD_BYTES;
+ if (total > start + len) {
+ for (u32 i = start; i + len < total; ++i) {
+ if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) {
+ if (tmp != stack) b->heap->free(b->heap, tmp, total);
+ return i;
+ }
+ }
+ }
+ if (tmp != stack) b->heap->free(b->heap, tmp, total);
+ }
+ }
+ u32 off = total;
+ buf_write(b, s, len);
+ {
+ u8 z = 0;
+ buf_write(b, &z, 1);
+ }
+ return off;
+}
+
+/* Encode an 8-byte Name field. If the name fits in 8 bytes, copy
+ * verbatim and zero-pad. Otherwise allocate the name in `strtab` and
+ * write "/<decimal-offset>" (NUL-padded to 8 bytes). */
+static void encode_name8(char out[8], const char* name, u32 nlen, Buf* strtab) {
+ memset(out, 0, 8);
+ if (nlen <= 8) {
+ if (nlen) memcpy(out, name, nlen);
+ return;
+ }
+ u32 off = strtab_add(strtab, name, nlen);
+ /* "/<decimal-offset>" — up to 7 decimal digits leaves room for the
+ * leading slash within 8 bytes. COFF .obj strtabs are < 1 MiB in
+ * practice, so 7 digits is plenty. */
+ char tmp[16];
+ int n = 0;
+ tmp[n++] = '/';
+ /* Decimal-format off into tmp+1. */
+ char dig[12];
+ int d = 0;
+ u32 v = off;
+ if (v == 0) {
+ dig[d++] = '0';
+ } else {
+ while (v) {
+ dig[d++] = (char)('0' + (v % 10u));
+ v /= 10u;
+ }
+ }
+ while (d > 0 && n < (int)sizeof tmp) tmp[n++] = dig[--d];
+ if (n > 8) n = 8;
+ memcpy(out, tmp, (size_t)n);
+}
+
+/* Write one 18-byte IMAGE_SYMBOL record into `dst`. */
+static void wr_sym(u8* dst, const char ShortName[8], u32 Zeroes, u32 Offset,
+ u32 Value, i16 SectionNumber, u16 Type, u8 StorageClass,
+ u8 NumberOfAuxSymbols) {
+ if (Zeroes == 0 && Offset != 0) {
+ /* LongName form: 4 zero bytes then 4-byte LE strtab offset. */
+ memset(dst, 0, 4);
+ wr_u32_le(dst + 4, Offset);
+ } else {
+ memcpy(dst, ShortName, 8);
+ }
+ wr_u32_le(dst + 8, Value);
+ wr_u16_le(dst + 12, (u16)SectionNumber);
+ wr_u16_le(dst + 14, Type);
+ dst[16] = StorageClass;
+ dst[17] = NumberOfAuxSymbols;
+}
+
+/* Write a section-definition aux record (18 bytes). */
+static void wr_aux_secdef(u8* dst, u32 Length, u16 NumberOfRelocations,
+ u16 NumberOfLinenumbers, u32 CheckSum, u16 Number,
+ u8 Selection) {
+ wr_u32_le(dst + 0, Length);
+ wr_u16_le(dst + 4, NumberOfRelocations);
+ wr_u16_le(dst + 6, NumberOfLinenumbers);
+ wr_u32_le(dst + 8, CheckSum);
+ wr_u16_le(dst + 12, Number);
+ dst[14] = Selection;
+ dst[15] = 0;
+ dst[16] = 0;
+ dst[17] = 0;
+}
+
+/* Write a weak-externals aux record (18 bytes). */
+static void wr_aux_weak(u8* dst, u32 TagIndex, u32 Characteristics) {
+ wr_u32_le(dst + 0, TagIndex);
+ wr_u32_le(dst + 4, Characteristics);
+ memset(dst + 8, 0, 10);
+}
+
+/* Look up the pool-interned string for a Sym. */
+static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) {
+ size_t len;
+ const char* s = pool_str(c->global, n, &len);
+ if (!s) {
+ *len_out = 0;
+ return "";
+ }
+ *len_out = (u32)len;
+ return s;
+}
+
+void emit_coff(Compiler* c, ObjBuilder* ob, Writer* w) {
+ Heap* h = (Heap*)c->ctx->heap;
+
+ /* Tombstone sweep — see obj_sweep_dead. */
+ obj_sweep_dead(ob);
+
+ /* ---- target validation ----------------------------------------- */
+ const ArchImpl* arch = arch_for_compiler(c);
+ const ArchCoffOps* coff = arch ? arch->coff : NULL;
+ if (!coff || !coff->reloc_to) {
+ compiler_panic(c, no_loc(), "emit_coff: unsupported target arch %u",
+ (u32)c->target.arch);
+ }
+ u16 machine = coff->machine;
+ u32 (*reloc_to)(u32) = coff->reloc_to;
+ if (c->target.big_endian) {
+ compiler_panic(c, no_loc(), "emit_coff: big-endian COFF not supported");
+ }
+ if (c->target.ptr_size != 8) {
+ compiler_panic(c, no_loc(), "emit_coff: ptr_size %u (expected 8)",
+ (u32)c->target.ptr_size);
+ }
+
+ /* ---- pass 1: plan sections ------------------------------------- */
+ u32 nobjsec = obj_section_count(ob);
+ CSec* secs = arena_zarray(c->scratch, CSec, nobjsec ? nobjsec : 1);
+ u32* obj_to_coff = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1);
+ u32 nsecs = 0;
+
+ /* String table — leading 4-byte size placeholder. Real strings start
+ * at offset 4. */
+ Buf strtab;
+ buf_init(&strtab, h);
+ {
+ u8 zero4[COFF_STRTAB_SIZE_FIELD_BYTES] = {0, 0, 0, 0};
+ buf_write(&strtab, zero4, COFF_STRTAB_SIZE_FIELD_BYTES);
+ }
+
+ for (u32 i = 1; i < nobjsec; ++i) {
+ const Section* s = obj_section_get(ob, i);
+ if (s->removed) continue;
+ /* Skip ELF-style synthetic sections (a reader from another format
+ * may have surfaced them) — COFF stores symtab/strtab/relocs
+ * out-of-band, not as named sections. */
+ if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB ||
+ s->sem == SSEM_RELA || s->sem == SSEM_REL || s->sem == SSEM_GROUP) {
+ continue;
+ }
+
+ CSec* cs = &secs[nsecs];
+ u32 nlen;
+ const char* nm = sym_to_str(c, s->name, &nlen);
+ encode_name8(cs->name8, nm, nlen, &strtab);
+
+ cs->obj_sec = i;
+ cs->group_id = s->group_id;
+ cs->align = s->align ? s->align : 1;
+
+ int in_group = (s->group_id != OBJ_GROUP_NONE);
+ u32 ch = sec_characteristics(s, in_group);
+ /* Alignment lives in bits 20..23. Cap at log2(8192)=13 -> nibble
+ * value 14 (IMAGE_SCN_ALIGN_8192BYTES). */
+ u32 lg = log2_align(cs->align);
+ if (lg > 13) lg = 13;
+ ch &= ~IMAGE_SCN_ALIGN_MASK;
+ ch |= IMAGE_SCN_ALIGN_FROM_LOG2(lg);
+ cs->characteristics = ch;
+
+ if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
+ cs->is_nobits = 1;
+ cs->virtual_size = s->bss_size;
+ cs->size_of_raw_data = 0;
+ cs->obj_bytes = NULL;
+ } else {
+ cs->is_nobits = 0;
+ cs->virtual_size = 0;
+ cs->size_of_raw_data = s->bytes.total;
+ cs->obj_bytes = &s->bytes;
+ }
+
+ obj_to_coff[i] = nsecs + 1; /* 1-based; matches SectionNumber. */
+ nsecs++;
+ }
+
+ /* ---- pass 2: count and assign per-section reloc counts --------- */
+ /* COFF stores NumberOfRelocations as u16; sections with > 65535
+ * relocs use the IMAGE_SCN_LNK_NRELOC_OVFL extension which we don't
+ * implement in v1. Panic if any single section exceeds the limit. */
+ u32 total_relocs = obj_reloc_total(ob);
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ CSec* cs = &secs[ci];
+ u32 nr = obj_reloc_count(ob, cs->obj_sec);
+ if (nr > 0xFFFFu) {
+ compiler_panic(c, no_loc(),
+ "emit_coff: section %u has %u relocs (max 65535)",
+ (u32)cs->obj_sec, nr);
+ }
+ cs->number_of_relocations = (u16)nr;
+ }
+
+ /* ---- pass 3: build the symbol table ---------------------------- */
+ /* Count ObjSyms (incl. tombstoned — we'll skip those when emitting). */
+ u32 nobjsym = 0;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) ++nobjsym;
+ obj_symiter_free(it);
+ }
+
+ /* Upper bound on symbol-table records (including aux slots):
+ * - 2 records per section symbol (primary + 1 aux secdef)
+ * - 2 records per ObjSym (primary + up to 1 weak aux)
+ * - +2 spare for safety
+ * Worst case is generous; we trim by tracking nrecords as we emit. */
+ u32 max_records = 2u * nsecs + 2u * nobjsym + 4u;
+ u8* symtab = (u8*)arena_zarray(c->scratch, u8,
+ (size_t)COFF_SYMBOL_SIZE * max_records);
+ u32 nrecords = 0;
+
+ /* obj_id -> COFF symbol index (including aux slots). Index 0 is
+ * reserved as "none" in our internal map (a real COFF symbol may
+ * legitimately live at index 0, but no ObjSym ever maps there since
+ * we never put OBJ_SYM_NONE through). */
+ u32* sym_to_coff = arena_zarray(c->scratch, u32, nobjsym + 2);
+
+ /* Section symbols first — one STATIC per kept obj section, each
+ * followed by a SECTION DEFINITION aux. Reloc-against-section in
+ * other tools' output uses these; emitting them unconditionally
+ * matches what clang / mingw emit and gives readers a stable target. */
+ u32* secsym_index = arena_zarray(c->scratch, u32, nsecs + 1);
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ CSec* cs = &secs[ci];
+ char short_name[8];
+ /* The section symbol's name is the section's own name (truncated
+ * to 8 bytes — section symbols never use the strtab spill form in
+ * MSVC/clang output). */
+ memcpy(short_name, cs->name8, 8);
+
+ u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_sym(slot, short_name, /*Zeroes*/ 1, /*Offset*/ 0,
+ /*Value*/ 0,
+ /*SectionNumber*/ (i16)(ci + 1),
+ /*Type*/ IMAGE_SYM_TYPE_NULL,
+ /*StorageClass*/ IMAGE_SYM_CLASS_STATIC,
+ /*NumberOfAuxSymbols*/ 1);
+ secsym_index[ci] = nrecords;
+ nrecords++;
+
+ /* Section-definition aux. For COMDAT members we encode the
+ * Selection from the group; default to SELECT_ANY which is what
+ * gcc/clang emit unless the user requests a specific selection
+ * mode. The associated-section Number is left at 0 (cfree does
+ * not produce associative-COMDAT chains today). */
+ u8 selection = 0;
+ if (cs->group_id != OBJ_GROUP_NONE) {
+ const ObjGroup* g = obj_group_get(ob, cs->group_id);
+ if (g && !g->removed) {
+ selection = g->flags ? (u8)IMAGE_COMDAT_SELECT_ANY
+ : (u8)IMAGE_COMDAT_SELECT_ANY;
+ }
+ }
+ u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_aux_secdef(aux, /*Length*/ cs->size_of_raw_data,
+ /*NumberOfRelocations*/ cs->number_of_relocations,
+ /*NumberOfLinenumbers*/ 0,
+ /*CheckSum*/ 0,
+ /*Number*/ 0,
+ /*Selection*/ selection);
+ nrecords++;
+ }
+
+ /* File / regular symbols. */
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->removed) continue;
+ if (s->kind == SK_IFUNC) {
+ compiler_panic(c, no_loc(),
+ "emit_coff: SK_IFUNC has no PE/COFF representation");
+ }
+ /* Don't re-emit SK_SECTION symbols — section symbols are
+ * synthesized above. Map any input-side SK_SECTION onto the
+ * already-emitted one. */
+ if (s->kind == SK_SECTION) {
+ if (s->section_id && s->section_id < nobjsec) {
+ u32 ci = obj_to_coff[s->section_id];
+ if (ci) sym_to_coff[e.id] = secsym_index[ci - 1];
+ }
+ continue;
+ }
+
+ u32 nlen;
+ const char* nm = sym_to_str(c, s->name, &nlen);
+
+ if (s->kind == SK_FILE) {
+ /* File symbol: name ".file" (short), section IMAGE_SYM_DEBUG,
+ * storage class FILE, followed by aux records carrying the
+ * NUL-padded file path (18 bytes per aux). */
+ u32 file_len = nlen;
+ u32 naux = file_len ? (file_len + COFF_AUX_FILE_SIZE - 1u) /
+ COFF_AUX_FILE_SIZE
+ : 1u;
+ char short_name[8] = {'.', 'f', 'i', 'l', 'e', 0, 0, 0};
+ u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_sym(slot, short_name, 1, 0, /*Value*/ 0,
+ /*SectionNumber*/ (i16)IMAGE_SYM_DEBUG,
+ /*Type*/ IMAGE_SYM_TYPE_NULL,
+ /*StorageClass*/ IMAGE_SYM_CLASS_FILE,
+ /*NumberOfAuxSymbols*/ (u8)naux);
+ sym_to_coff[e.id] = nrecords;
+ nrecords++;
+ for (u32 a = 0; a < naux; ++a) {
+ u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ memset(aux, 0, COFF_AUX_FILE_SIZE);
+ u32 off = a * COFF_AUX_FILE_SIZE;
+ u32 copy = file_len > off ? file_len - off : 0;
+ if (copy > COFF_AUX_FILE_SIZE) copy = COFF_AUX_FILE_SIZE;
+ if (copy) memcpy(aux, nm + off, copy);
+ nrecords++;
+ }
+ continue;
+ }
+
+ /* Regular symbol. */
+ char short_name[8];
+ u32 zeroes = 1, offset = 0;
+ memset(short_name, 0, 8);
+ if (nlen <= 8) {
+ if (nlen) memcpy(short_name, nm, nlen);
+ } else {
+ zeroes = 0;
+ offset = strtab_add(&strtab, nm, nlen);
+ }
+
+ i16 section_number = 0;
+ u32 value = 0;
+ u8 storage_class = IMAGE_SYM_CLASS_NULL;
+ u16 type = IMAGE_SYM_TYPE_NULL;
+ u8 naux = 0;
+ int emit_weak_aux = 0;
+
+ switch (s->kind) {
+ case SK_ABS:
+ section_number = (i16)IMAGE_SYM_ABSOLUTE;
+ value = (u32)s->value;
+ break;
+ case SK_COMMON:
+ /* COFF lacks a per-common alignment field; encode size in
+ * Value with SectionNumber=UNDEFINED and rely on the linker
+ * to pick a default alignment. (cfree's frontend uses
+ * COMMON only via __attribute__((common)) which is rare on
+ * PE/COFF targets.) */
+ section_number = (i16)IMAGE_SYM_UNDEFINED;
+ value = (u32)s->size;
+ break;
+ default:
+ if (s->section_id == OBJ_SEC_NONE) {
+ section_number = (i16)IMAGE_SYM_UNDEFINED;
+ value = 0;
+ } else if (s->section_id < nobjsec && obj_to_coff[s->section_id]) {
+ section_number = (i16)obj_to_coff[s->section_id];
+ value = (u32)s->value;
+ } else {
+ section_number = (i16)IMAGE_SYM_UNDEFINED;
+ value = 0;
+ }
+ break;
+ }
+
+ if (s->kind == SK_FUNC) type = (u16)COFF_SYM_TYPE_FUNCTION;
+
+ switch (s->bind) {
+ case SB_LOCAL:
+ storage_class = IMAGE_SYM_CLASS_STATIC;
+ break;
+ case SB_GLOBAL:
+ storage_class = IMAGE_SYM_CLASS_EXTERNAL;
+ break;
+ case SB_WEAK:
+ /* mingw / clang spell weak as EXTERNAL with a WeakExternal
+ * aux that points at the fallback symbol. cfree's obj layer
+ * doesn't carry a separate fallback symbol today, so we emit
+ * a self-referential weak aux (TagIndex=0) which the linker
+ * treats as "weak, no fallback" — equivalent to ELF STB_WEAK. */
+ storage_class = IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+ emit_weak_aux = 1;
+ naux = 1;
+ break;
+ default:
+ storage_class = IMAGE_SYM_CLASS_STATIC;
+ break;
+ }
+
+ u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_sym(slot, short_name, zeroes, offset, value, section_number, type,
+ storage_class, naux);
+ sym_to_coff[e.id] = nrecords;
+ nrecords++;
+ if (emit_weak_aux) {
+ u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_aux_weak(aux, /*TagIndex*/ 0,
+ /*Characteristics*/ IMAGE_WEAK_EXTERN_SEARCH_LIBRARY);
+ nrecords++;
+ }
+ }
+ obj_symiter_free(it);
+ }
+
+ /* ---- pass 4: build per-section relocation tables --------------- */
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ CSec* cs = &secs[ci];
+ u32 nr = cs->number_of_relocations;
+ if (!nr) continue;
+ u8* buf =
+ (u8*)arena_alloc(c->scratch, (size_t)COFF_RELOC_SIZE * nr, _Alignof(u32));
+ u32 j = 0;
+ for (u32 ri = 0; ri < total_relocs; ++ri) {
+ const Reloc* r = obj_reloc_at(ob, ri);
+ if (r->removed) continue;
+ if (r->section_id != cs->obj_sec) continue;
+ if (r->sym == OBJ_SYM_NONE) {
+ compiler_panic(c, no_loc(),
+ "emit_coff: reloc without symbol not supported "
+ "(sec=%u offset=%u kind=%u)",
+ (u32)r->section_id, (u32)r->offset, (u32)r->kind);
+ }
+ if (r->has_explicit_addend && r->addend != 0 &&
+ !coff_rel32_absorbs_minus4(c->target.arch, (RelocKind)r->kind,
+ r->addend)) {
+ /* v1 limitation: COFF carries the addend in the patched bytes,
+ * and we don't currently mutate the obj's section bytes to
+ * encode a separate explicit addend. cfree's MCEmitter writes
+ * the addend inline for COFF targets, so this branch only
+ * fires for inputs synthesized by external tools. */
+ compiler_panic(c, no_loc(),
+ "emit_coff: explicit nonzero addend not supported "
+ "(sec=%u offset=%u kind=%u addend=%lld)",
+ (u32)r->section_id, (u32)r->offset, (u32)r->kind,
+ (long long)r->addend);
+ }
+ u32 wire = reloc_to(r->kind);
+ /* Both arch translators use 0 (IMAGE_REL_*_ABSOLUTE) as the
+ * unsupported-input sentinel; treat that as a panic unless the
+ * input really is R_NONE. */
+ if (wire == 0 && r->kind != R_NONE) {
+ compiler_panic(
+ c, no_loc(),
+ "emit_coff: unsupported relocation kind %u for arch %u",
+ (u32)r->kind, (u32)c->target.arch);
+ }
+ u32 sym_idx = sym_to_coff[r->sym];
+ u8* slot = buf + (size_t)j * COFF_RELOC_SIZE;
+ wr_u32_le(slot + 0, r->offset);
+ wr_u32_le(slot + 4, sym_idx);
+ wr_u16_le(slot + 8, (u16)wire);
+ ++j;
+ }
+ cs->reloc_bytes = buf;
+ /* If a tombstoned reloc was skipped between count and emit, j may
+ * be less than nr; trust the latter count for the wire field. */
+ if (j != nr) cs->number_of_relocations = (u16)j;
+ }
+
+ /* ---- pass 5: assign file offsets ------------------------------- */
+ /* Layout:
+ * [file header] [section headers] [per-section: bytes, relocs]*
+ * [symbol table] [string table] */
+ u64 cur = (u64)COFF_FILE_HEADER_SIZE +
+ (u64)COFF_SECTION_HEADER_SIZE * (u64)nsecs;
+
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ CSec* cs = &secs[ci];
+ /* Raw data offset. NOBITS contributes nothing on disk. */
+ if (cs->is_nobits || cs->size_of_raw_data == 0) {
+ cs->pointer_to_raw_data = 0;
+ } else {
+ cur = ALIGN_UP(cur, (u64)cs->align);
+ cs->pointer_to_raw_data = (u32)cur;
+ cur += cs->size_of_raw_data;
+ }
+ /* Reloc table. COFF doesn't mandate alignment for the reloc array,
+ * but llvm and binutils emit them naturally byte-packed; we 4-align
+ * for tidiness. */
+ if (cs->number_of_relocations) {
+ cur = ALIGN_UP(cur, (u64)4);
+ cs->pointer_to_relocations = (u32)cur;
+ cur += (u64)cs->number_of_relocations * COFF_RELOC_SIZE;
+ } else {
+ cs->pointer_to_relocations = 0;
+ }
+ }
+
+ cur = ALIGN_UP(cur, (u64)4);
+ u64 symtab_off = cur;
+ cur += (u64)nrecords * COFF_SYMBOL_SIZE;
+
+ /* String table starts immediately after the symtab. Patch the 4-byte
+ * size prefix (inclusive). */
+ u32 strtab_size = buf_pos(&strtab);
+ /* The size field is part of the on-disk strtab and is the total
+ * inclusive byte count. Patch it now. */
+ {
+ u8 sz_le[4];
+ wr_u32_le(sz_le, strtab_size);
+ /* Buf doesn't expose in-place patch; flatten, patch, re-emit when
+ * we write. Just remember the value. */
+ (void)sz_le;
+ }
+ u64 strtab_off = cur;
+ cur += strtab_size;
+
+ /* ---- pass 6: write the file ------------------------------------ */
+ cfree_writer_seek(w, 0);
+
+ /* IMAGE_FILE_HEADER */
+ coff_wr_u16(w, machine);
+ coff_wr_u16(w, (u16)nsecs);
+ coff_wr_u32(w, 0); /* TimeDateStamp: reproducible */
+ coff_wr_u32(w, (u32)symtab_off);
+ coff_wr_u32(w, nrecords);
+ coff_wr_u16(w, 0); /* SizeOfOptionalHeader: 0 for .obj */
+ coff_wr_u16(w, IMAGE_FILE_LARGE_ADDRESS_AWARE);
+
+ /* Section headers — one 40-byte block immediately after the file
+ * header. */
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ const CSec* cs = &secs[ci];
+ cfree_writer_write(w, cs->name8, 8);
+ coff_wr_u32(w, cs->virtual_size);
+ coff_wr_u32(w, 0); /* VirtualAddress: 0 for .obj */
+ coff_wr_u32(w, cs->size_of_raw_data);
+ coff_wr_u32(w, cs->pointer_to_raw_data);
+ coff_wr_u32(w, cs->pointer_to_relocations);
+ coff_wr_u32(w, 0); /* PointerToLinenumbers: 0 */
+ coff_wr_u16(w, cs->number_of_relocations);
+ coff_wr_u16(w, 0); /* NumberOfLinenumbers: 0 */
+ coff_wr_u32(w, cs->characteristics);
+ }
+
+ /* Section bytes + relocs (interleaved). */
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ const CSec* cs = &secs[ci];
+ if (!cs->is_nobits && cs->size_of_raw_data && cs->obj_bytes) {
+ cfree_writer_seek(w, cs->pointer_to_raw_data);
+ u32 sz = cs->obj_bytes->total;
+ u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
+ if (sz) buf_flatten(cs->obj_bytes, tmp);
+ cfree_writer_write(w, tmp, sz);
+ h->free(h, tmp, sz ? sz : 1);
+ }
+ if (cs->number_of_relocations && cs->reloc_bytes) {
+ cfree_writer_seek(w, cs->pointer_to_relocations);
+ cfree_writer_write(w, cs->reloc_bytes,
+ (size_t)cs->number_of_relocations * COFF_RELOC_SIZE);
+ }
+ }
+
+ /* Symbol table. */
+ cfree_writer_seek(w, symtab_off);
+ cfree_writer_write(w, symtab, (size_t)nrecords * COFF_SYMBOL_SIZE);
+
+ /* String table: 4-byte total size (inclusive) followed by the body.
+ * `strtab` was initialized with 4 placeholder zero bytes; rewrite
+ * them with the real size before flushing. */
+ {
+ u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1);
+ if (strtab_size) buf_flatten(&strtab, flat);
+ /* Patch the 4-byte size prefix in place. */
+ if (strtab_size >= COFF_STRTAB_SIZE_FIELD_BYTES) {
+ wr_u32_le(flat, strtab_size);
+ }
+ cfree_writer_seek(w, strtab_off);
+ cfree_writer_write(w, flat, strtab_size);
+ }
+ buf_fini(&strtab);
+}
diff --git a/src/obj/coff_read.c b/src/obj/coff_read.c
@@ -0,0 +1,714 @@
+/* PE/COFF .obj (IMAGE_FILE_HEADER + sections) reader. Parses a 64-bit
+ * little-endian relocatable object back into a fresh ObjBuilder. Peer
+ * of read_elf / read_macho; the post-finalize ObjBuilder shape is the
+ * canonical superset doc/DESIGN.md §5.5 promises: read_coff of an
+ * emit_coff output produces an ObjBuilder shape-equivalent to the
+ * writer's input, modulo synthesized SECTION symbols and the COMDAT
+ * section-definition aux records.
+ *
+ * Scope: IMAGE_FILE_MACHINE_AMD64 and IMAGE_FILE_MACHINE_ARM64. PE
+ * executables (with a non-zero SizeOfOptionalHeader) are rejected — a
+ * future read_coff_pe would handle those. Microsoft "short import"
+ * records (Sig1=0, Sig2=0xFFFF) found inside .lib archive members are
+ * detected at entry and dispatched to read_coff_short_import, which
+ * synthesizes a DSO-shaped ObjBuilder annotated with the providing
+ * DLL name via obj_set_coff_import_dll. */
+
+#include <string.h>
+
+#include "arch/arch.h"
+#include "core/arena.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "obj/coff.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- section-header scratch ---- */
+
+typedef struct CSecRec {
+ char raw_name[8];
+ u32 virtual_size;
+ u32 size_of_raw_data;
+ u32 pointer_to_raw_data;
+ u32 pointer_to_relocations;
+ u16 number_of_relocations;
+ u32 characteristics;
+ ObjSecId obj_sec; /* OBJ_SEC_NONE if skipped */
+} CSecRec;
+
+static void parse_shdr(const u8* p, CSecRec* out) {
+ memcpy(out->raw_name, p, 8);
+ out->virtual_size = coff_rd_u32(p + 8);
+ out->size_of_raw_data = coff_rd_u32(p + 16);
+ out->pointer_to_raw_data = coff_rd_u32(p + 20);
+ out->pointer_to_relocations = coff_rd_u32(p + 24);
+ out->number_of_relocations = coff_rd_u16(p + 32);
+ out->characteristics = coff_rd_u32(p + 36);
+ out->obj_sec = OBJ_SEC_NONE;
+}
+
+/* ---- string-table lookup (4-byte size prefix, NUL-terminated entries) ---- */
+
+static const char* strtab_lookup(const u8* tab, u32 tab_size, u32 off,
+ u32* len_out) {
+ if (off >= tab_size) {
+ *len_out = 0;
+ return "";
+ }
+ const char* s = (const char*)(tab + off);
+ u32 max = tab_size - off;
+ u32 n = 0;
+ while (n < max && s[n] != '\0') ++n;
+ *len_out = n;
+ return s;
+}
+
+/* Resolve a section/symbol short-or-long name into (ptr, len). COFF
+ * section names use the "/<decimal>" convention for >8-byte names; COFF
+ * symbol names use the (Zeroes==0, Offset) form instead. This helper
+ * handles the section form (8 raw bytes; leading '/' triggers strtab
+ * lookup). */
+static void resolve_section_name(const char raw[8], const u8* strtab,
+ u32 strtab_size, const char** name_out,
+ u32* len_out) {
+ if (raw[0] == '/') {
+ /* Parse decimal offset. Up to 7 ASCII digits. */
+ u32 off = 0;
+ for (u32 i = 1; i < 8 && raw[i] >= '0' && raw[i] <= '9'; ++i) {
+ off = off * 10u + (u32)(raw[i] - '0');
+ }
+ *name_out = strtab_lookup(strtab, strtab_size, off, len_out);
+ return;
+ }
+ /* Inline: up to 8 bytes, NUL-padded (not necessarily NUL-terminated). */
+ u32 n = 0;
+ while (n < 8 && raw[n] != '\0') ++n;
+ *name_out = raw;
+ *len_out = n;
+}
+
+/* ---- characteristics -> SecKind / SecFlag / SecSem ---- */
+
+static u16 coff_sec_kind(const char* name, u32 nlen, u32 ch) {
+ if (ch & IMAGE_SCN_CNT_UNINITIALIZED_DATA) return SEC_BSS;
+ if (ch & IMAGE_SCN_CNT_CODE) return SEC_TEXT;
+ if (ch & IMAGE_SCN_MEM_EXECUTE) return SEC_TEXT;
+ if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG;
+ /* The MS toolchain spells DWARF section names with a leading ".debug$"
+ * (CodeView) — keep ELF-style ".debug_" detection but also treat the
+ * MS form as debug. */
+ if (nlen >= 7 && memcmp(name, ".debug$", 7) == 0) return SEC_DEBUG;
+ if (ch & IMAGE_SCN_CNT_INITIALIZED_DATA) {
+ if (ch & IMAGE_SCN_MEM_WRITE) return SEC_DATA;
+ return SEC_RODATA;
+ }
+ return SEC_OTHER;
+}
+
+static u16 coff_sec_flags(const char* name, u32 nlen, u32 ch) {
+ u16 f = 0;
+ if (ch & IMAGE_SCN_MEM_READ) f |= SF_ALLOC;
+ if (ch & IMAGE_SCN_MEM_EXECUTE) f |= SF_EXEC;
+ if (ch & IMAGE_SCN_MEM_WRITE) f |= SF_WRITE;
+ if (ch & IMAGE_SCN_LNK_COMDAT) f |= SF_GROUP;
+ /* TLS sections in PE are spelled ".tls$<suffix>" (e.g. ".tls$", ".tls$ZZZ").
+ * There is no characteristics bit for TLS — detection is name-based. */
+ if (nlen >= 5 && memcmp(name, ".tls$", 5) == 0) f |= SF_TLS;
+ if (nlen == 4 && memcmp(name, ".tls", 4) == 0) f |= SF_TLS;
+ return f;
+}
+
+/* Bits 20..23 of Characteristics encode alignment as (log2(align)+1).
+ * 0 means "default"; we collapse to align=1 for round-trip purposes. */
+static u32 coff_sec_align(u32 ch) {
+ u32 n = (ch & IMAGE_SCN_ALIGN_MASK) >> 20;
+ if (n == 0) return 1;
+ return 1u << (n - 1u);
+}
+
+/* ---- symbol-name resolution ---- */
+
+static void resolve_sym_name(const u8* rec, const u8* strtab, u32 strtab_size,
+ const char** name_out, u32* len_out) {
+ /* ShortName: 8 bytes. If first 4 bytes are zero, second 4 bytes is
+ * the strtab offset (LongName form). */
+ u32 z = coff_rd_u32(rec + 0);
+ if (z == 0) {
+ u32 off = coff_rd_u32(rec + 4);
+ *name_out = strtab_lookup(strtab, strtab_size, off, len_out);
+ return;
+ }
+ u32 n = 0;
+ while (n < 8 && rec[n] != '\0') ++n;
+ *name_out = (const char*)rec;
+ *len_out = n;
+}
+
+/* ---- short-import record handler ----
+ * Microsoft "short import" format: a 20-byte ImportObjectHeader
+ * followed by SizeOfData bytes containing two NUL-terminated strings —
+ * the imported symbol name then the DLL name. These live as members
+ * of .lib archives (mingw's libkernel32.dll.a etc.) and stand in for
+ * a full long-form COFF import object.
+ *
+ * cfree-side model: synthesize a DSO-shaped ObjBuilder with the
+ * imported symbol defined at section_id = OBJ_SEC_NONE (the same
+ * shape read_coff_dso / read_elf_dso produce for an exported name),
+ * and stash the providing DLL name on the builder via
+ * obj_set_coff_import_dll so the archive-ingestion layer can route
+ * the resulting LinkInput as a DSO with this name as the soname.
+ *
+ * We also synthesize the `__imp_<name>` alias mingw codegen uses to
+ * spell explicit IAT-slot access; both names ultimately resolve to
+ * the same DLL export at link time. */
+static ObjBuilder* read_coff_short_import(Compiler* c, const char* name,
+ const u8* data, size_t len) {
+ if (len < COFF_IMPORT_OBJECT_HEADER_SIZE)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import record shorter than header");
+
+ /* Sig1 / Sig2 already checked by the caller. */
+ /* data + 4: Version (2 bytes, ignored). */
+ u16 machine = coff_rd_u16(data + 6);
+ /* data + 8: TimeDateStamp (4 bytes, ignored). */
+ u32 size_of_data = coff_rd_u32(data + 12);
+ u16 ordinal_or_hint = coff_rd_u16(data + 16);
+ u16 type_flags = coff_rd_u16(data + 18);
+
+ if ((u64)COFF_IMPORT_OBJECT_HEADER_SIZE + (u64)size_of_data > (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import SizeOfData=%u extends past input "
+ "(len=%zu)",
+ size_of_data, len);
+
+ if (machine != IMAGE_FILE_MACHINE_AMD64 &&
+ machine != IMAGE_FILE_MACHINE_ARM64)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import unsupported machine %#x",
+ (u32)machine);
+
+ /* Decode TypeFlags bitfield (Type:2, NameType:3, Reserved:11). */
+ u32 import_type = (u32)(type_flags & 0x3u);
+ u32 name_type = (u32)((type_flags >> 2) & 0x7u);
+
+ /* Ordinal-only imports (NameType=IMPORT_OBJECT_ORDINAL) are not yet
+ * implemented in cfree. None of the mingw / llvm-mingw system import
+ * archives use this shape — every libfoo.a member in the supported
+ * sysroots imports by name — so refusing here is a clean diagnostic,
+ * not an internal panic. When a real consumer surfaces, the work is
+ * to thread the ordinal through link_resolve and into the PE import
+ * directory hint/name tables. */
+ if (name_type == IMPORT_OBJECT_ORDINAL)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import by ordinal not implemented "
+ "(archive member \"%s\", ordinal %u). cfree links "
+ "imports by name only; rebuild the consumer to import "
+ "by name, or omit this archive from the link.",
+ name ? name : "<unnamed>",
+ (unsigned)ordinal_or_hint);
+
+ /* Symbol name: NUL-terminated starting at data + 20. */
+ const u8* body = data + COFF_IMPORT_OBJECT_HEADER_SIZE;
+ u32 sym_name_max = size_of_data;
+ u32 sym_name_len = 0;
+ while (sym_name_len < sym_name_max && body[sym_name_len] != '\0')
+ ++sym_name_len;
+ if (sym_name_len == sym_name_max)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import symbol name not NUL-terminated");
+
+ /* DLL name: NUL-terminated starting after the symbol name's NUL. */
+ u32 dll_name_off = sym_name_len + 1u;
+ if (dll_name_off >= size_of_data)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import missing DLL name");
+ const u8* dll_p = body + dll_name_off;
+ u32 dll_name_max = size_of_data - dll_name_off;
+ u32 dll_name_len = 0;
+ while (dll_name_len < dll_name_max && dll_p[dll_name_len] != '\0')
+ ++dll_name_len;
+ if (dll_name_len == dll_name_max)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import DLL name not NUL-terminated");
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed");
+
+ /* Pick SymKind by import type: CODE -> function, DATA/CONST -> object.
+ * Both are defined at section_id=OBJ_SEC_NONE, value=0, size=0 — the
+ * shape read_coff_dso would produce for a DLL export. */
+ SymKind k = (import_type == IMPORT_OBJECT_CODE) ? SK_FUNC : SK_OBJ;
+
+ Sym sn = pool_intern(c->global, (const char*)body, sym_name_len);
+ ObjSymId id = obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, k, OBJ_SEC_NONE,
+ 0, 0, 0);
+ obj_sym_mark_referenced(ob, id);
+
+ /* `__imp_<name>` alias for codegen that refers to the IAT slot
+ * directly (mingw convention). Even code imports use an object-like
+ * `__imp_` symbol because references to it want the IAT data slot, not
+ * the callable import stub. */
+ static const char kImpPrefix[] = "__imp_";
+ u32 imp_len = (u32)(sizeof kImpPrefix - 1u) + sym_name_len;
+ char* imp_buf = arena_array(c->scratch, char, imp_len);
+ memcpy(imp_buf, kImpPrefix, sizeof kImpPrefix - 1u);
+ memcpy(imp_buf + (sizeof kImpPrefix - 1u), body, sym_name_len);
+ Sym imp_sn = pool_intern(c->global, imp_buf, imp_len);
+ ObjSymId imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ,
+ OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, imp_id);
+
+ /* Stash the DLL name so the archive-ingestion layer (Phase 4.3) can
+ * route this builder as a DSO with the DLL as soname. */
+ Sym dll_sn = pool_intern(c->global, (const char*)dll_p, dll_name_len);
+ obj_set_coff_import_dll(ob, dll_sn);
+
+ (void)name_type;
+ obj_finalize(ob);
+ return ob;
+}
+
+ObjBuilder* read_coff(Compiler* c, const char* name, const u8* data,
+ size_t len) {
+ (void)name;
+
+ /* ---- Step 0: header validation ---- */
+ if (len < COFF_FILE_HEADER_SIZE)
+ compiler_panic(c, no_loc(), "read_coff: input shorter than COFF header");
+
+ /* Microsoft short-import record? (Sig1=0, Sig2=0xFFFF.) These live
+ * as members of .lib archives and stand in for a long-form import
+ * object. Detect at entry; the rest of read_coff assumes the
+ * input is a real IMAGE_FILE_HEADER. */
+ if (len >= 4 &&
+ coff_rd_u16(data + 0) == IMPORT_OBJECT_HDR_SIG1 &&
+ coff_rd_u16(data + 2) == IMPORT_OBJECT_HDR_SIG2) {
+ return read_coff_short_import(c, name, data, len);
+ }
+
+ u16 machine = coff_rd_u16(data + 0);
+ u16 nsections = coff_rd_u16(data + 2);
+ /* data + 4: TimeDateStamp (4 bytes, ignored). */
+ u32 ptr_to_symtab = coff_rd_u32(data + 8);
+ u32 nsymbols = coff_rd_u32(data + 12);
+ u16 size_opt_hdr = coff_rd_u16(data + 16);
+ /* data + 18: Characteristics (2 bytes, currently ignored). */
+
+ if (size_opt_hdr != 0)
+ compiler_panic(c, no_loc(),
+ "read_coff: input has optional header (size=%u); "
+ "use read_coff_pe for executables",
+ (u32)size_opt_hdr);
+
+ if (machine != IMAGE_FILE_MACHINE_AMD64 &&
+ machine != IMAGE_FILE_MACHINE_ARM64 &&
+ machine != IMAGE_FILE_MACHINE_ARM64EC)
+ compiler_panic(c, no_loc(), "read_coff: unsupported machine %#x",
+ (u32)machine);
+
+ const ArchImpl* arch = arch_lookup_coff_machine(machine);
+ if (!arch || !arch->coff || !arch->coff->reloc_from)
+ compiler_panic(c, no_loc(), "read_coff: no arch impl for machine %#x",
+ (u32)machine);
+ u32 (*reloc_from)(u32) = arch->coff->reloc_from;
+
+ if ((u64)COFF_FILE_HEADER_SIZE +
+ (u64)nsections * (u64)COFF_SECTION_HEADER_SIZE >
+ (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: section header table out of range");
+
+ /* ---- Step 1: bootstrap, locate strtab ---- */
+ /* Strtab is at PointerToSymbolTable + NumberOfSymbols * 18. When the
+ * file has no symbol table (ptr=0, n=0) we treat strtab as empty. */
+ const u8* strtab = NULL;
+ u32 strtab_size = 0;
+ if (ptr_to_symtab && nsymbols) {
+ u64 symtab_end =
+ (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE;
+ if (symtab_end + COFF_STRTAB_SIZE_FIELD_BYTES > (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: symbol table / strtab header out of range");
+ u32 declared = coff_rd_u32(data + symtab_end);
+ /* The size field is inclusive of the 4-byte prefix; treat <4 as
+ * "empty" (some tools write 0). */
+ if (declared < COFF_STRTAB_SIZE_FIELD_BYTES) declared = 0;
+ if (declared) {
+ if (symtab_end + (u64)declared > (u64)len)
+ compiler_panic(c, no_loc(), "read_coff: strtab body out of range");
+ strtab = data + symtab_end;
+ strtab_size = declared;
+ } else {
+ strtab = data + symtab_end;
+ strtab_size = COFF_STRTAB_SIZE_FIELD_BYTES;
+ }
+ }
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed");
+
+ /* ---- Step 2: ingest sections ---- */
+ CSecRec* secs = arena_array(c->scratch, CSecRec, nsections ? nsections : 1);
+ const u8* shdr_base = data + COFF_FILE_HEADER_SIZE;
+ for (u32 i = 0; i < nsections; ++i) {
+ CSecRec* s = &secs[i];
+ parse_shdr(shdr_base + (u64)i * COFF_SECTION_HEADER_SIZE, s);
+
+ const char* nm;
+ u32 nlen;
+ resolve_section_name(s->raw_name, strtab, strtab_size, &nm, &nlen);
+ Sym sn = pool_intern(c->global, nm, nlen);
+
+ u16 kind = coff_sec_kind(nm, nlen, s->characteristics);
+ u16 flags = coff_sec_flags(nm, nlen, s->characteristics);
+ u32 align = coff_sec_align(s->characteristics);
+
+ int is_bss = (s->characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0;
+ u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS;
+
+ ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags,
+ align, 0u, 0u, 0u);
+ if (id == OBJ_SEC_NONE)
+ compiler_panic(c, no_loc(),
+ "read_coff: obj_section_ex failed for section %u", i);
+ s->obj_sec = id;
+
+ /* Preserve raw Characteristics so emit_coff can write back any bits
+ * the canonical SecFlag/SecSem mapping doesn't model (LNK_INFO,
+ * LNK_REMOVE, MEM_DISCARDABLE, MEM_SHARED, GPREL, alignment nibble). */
+ obj_section_set_ext(ob, id, OBJ_EXT_COFF, s->characteristics, 0);
+
+ if (is_bss) {
+ u32 bss_size = s->virtual_size ? s->virtual_size : s->size_of_raw_data;
+ obj_reserve_bss(ob, id, bss_size, align);
+ } else if (s->size_of_raw_data) {
+ u64 end = (u64)s->pointer_to_raw_data + (u64)s->size_of_raw_data;
+ if (end > (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: section %u bytes out of range", i);
+ u8* dst = obj_reserve(ob, id, s->size_of_raw_data);
+ memcpy(dst, data + s->pointer_to_raw_data, s->size_of_raw_data);
+ }
+ }
+
+ /* ---- Step 3: ingest symbols (with aux-record awareness) ----
+ * sym_to_obj is indexed by RAW symbol-table index (including aux
+ * slots), so reloc.SymbolTableIndex resolves directly without
+ * adjusting for skipped aux records. Aux slots map to OBJ_SYM_NONE. */
+ ObjSymId* sym_to_obj =
+ arena_zarray(c->scratch, ObjSymId, nsymbols ? nsymbols : 1);
+
+ /* Track section-symbol primary symtab index per section, stored as
+ * (raw_index + 1) so 0 can mean "not seen yet" without colliding
+ * with the (legitimate) first symbol-table slot — emit_coff always
+ * lays the first section's section-symbol at index 0. */
+ u32* sec_sym_primary = arena_zarray(c->scratch, u32, nsections + 1u);
+
+ const u8* sym_base = data + ptr_to_symtab;
+ if (nsymbols) {
+ if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE > (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: symbol table body out of range");
+ }
+
+ for (u32 i = 0; i < nsymbols; ) {
+ const u8* p = sym_base + (u64)i * COFF_SYMBOL_SIZE;
+ const char* nm;
+ u32 nlen;
+ resolve_sym_name(p, strtab, strtab_size, &nm, &nlen);
+
+ u32 value = coff_rd_u32(p + 8);
+ i16 sec_num = (i16)coff_rd_u16(p + 12);
+ u16 type = coff_rd_u16(p + 14);
+ u8 sclass = p[16];
+ u8 naux = p[17];
+
+ /* FILE storage class: concatenate aux records' raw bytes (each
+ * 18 bytes, NUL-padded) for the source-file name. */
+ if (sclass == IMAGE_SYM_CLASS_FILE) {
+ /* Build name from aux records (up to naux*18 bytes); fall back
+ * to the primary record's name if naux==0. */
+ const char* fnm = nm;
+ u32 fnlen = nlen;
+ if (naux) {
+ /* Each aux record's 18 bytes are interpreted as raw file-name
+ * bytes; concatenate then trim trailing NULs. */
+ u32 total = (u32)naux * COFF_SYMBOL_SIZE;
+ if ((u64)i + 1u + (u64)naux > (u64)nsymbols)
+ compiler_panic(c, no_loc(),
+ "read_coff: FILE aux records extend past symbol "
+ "table");
+ const u8* aux = p + COFF_SYMBOL_SIZE;
+ u32 n = 0;
+ while (n < total && aux[n] != '\0') ++n;
+ fnm = (const char*)aux;
+ fnlen = n;
+ }
+ Sym fsn = fnlen ? pool_intern(c->global, fnm, fnlen) : 0;
+ ObjSymId id = obj_symbol_ex(ob, fsn, SB_LOCAL, SV_DEFAULT, SK_FILE,
+ OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, id);
+ sym_to_obj[i] = id;
+ i += 1u + naux;
+ continue;
+ }
+
+ /* Skip .bf/.ef debug pair primaries (FUNCTION storage class) and
+ * the END_OF_FUNCTION marker: they carry no symbol cfree models. */
+ if (sclass == IMAGE_SYM_CLASS_FUNCTION ||
+ sclass == IMAGE_SYM_CLASS_END_OF_FUNCTION) {
+ sym_to_obj[i] = OBJ_SYM_NONE;
+ i += 1u + naux;
+ continue;
+ }
+
+ /* Resolve (bind, vis, kind, section_id, value, size, cmnalign). */
+ SymBind bind = SB_LOCAL;
+ SymVis vis = SV_DEFAULT;
+ SymKind kind = SK_NOTYPE;
+ ObjSecId target_sec = OBJ_SEC_NONE;
+ u64 sym_value = 0;
+ u64 sym_size = 0;
+ u64 cmnalign = 0;
+
+ if (sec_num == IMAGE_SYM_UNDEFINED) {
+ /* Undef or common. EXTERNAL with Value > 0 is a common. */
+ if (sclass == IMAGE_SYM_CLASS_EXTERNAL && value > 0) {
+ bind = SB_GLOBAL;
+ kind = SK_COMMON;
+ sym_size = value;
+ cmnalign = 1; /* COFF doesn't carry per-common alignment */
+ } else {
+ bind = (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) ? SB_WEAK
+ : (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL
+ : SB_LOCAL;
+ kind = SK_UNDEF;
+ }
+ } else if (sec_num == IMAGE_SYM_ABSOLUTE) {
+ kind = SK_ABS;
+ sym_value = value;
+ bind = (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL : SB_LOCAL;
+ } else if (sec_num == IMAGE_SYM_DEBUG) {
+ /* Defined-in-debug — cfree has no model for it. Skip with an
+ * OBJ_SYM_NONE entry; relocations against this slot will resolve
+ * to OBJ_SYM_NONE, which obj_reloc_ex tolerates. */
+ sym_to_obj[i] = OBJ_SYM_NONE;
+ i += 1u + naux;
+ continue;
+ } else if (sec_num >= 1 && (u32)sec_num <= nsections) {
+ target_sec = secs[sec_num - 1].obj_sec;
+ sym_value = value;
+ switch (sclass) {
+ case IMAGE_SYM_CLASS_EXTERNAL:
+ bind = SB_GLOBAL;
+ break;
+ case IMAGE_SYM_CLASS_WEAK_EXTERNAL:
+ bind = SB_WEAK;
+ break;
+ case IMAGE_SYM_CLASS_STATIC:
+ case IMAGE_SYM_CLASS_LABEL:
+ default:
+ bind = SB_LOCAL;
+ break;
+ }
+
+ /* Detect SECTION symbols: STATIC, Value==0, name matches the
+ * section's own name, and the section has at least one aux
+ * record (the section-definition aux). Mark as SK_SECTION so
+ * emit_coff regenerates the synthetic entry. */
+ int is_section_sym = 0;
+ if (sclass == IMAGE_SYM_CLASS_STATIC && value == 0 && naux >= 1) {
+ const CSecRec* cs = &secs[sec_num - 1];
+ u32 raw_nlen = 0;
+ while (raw_nlen < 8 && cs->raw_name[raw_nlen] != '\0') ++raw_nlen;
+ if (raw_nlen == nlen && memcmp(cs->raw_name, nm, nlen) == 0) {
+ is_section_sym = 1;
+ } else if (cs->raw_name[0] == '/') {
+ /* Long-named section: compare the resolved name. */
+ const char* rn;
+ u32 rnlen;
+ resolve_section_name(cs->raw_name, strtab, strtab_size, &rn, &rnlen);
+ if (rnlen == nlen && memcmp(rn, nm, nlen) == 0) is_section_sym = 1;
+ }
+ }
+
+ if (is_section_sym) {
+ kind = SK_SECTION;
+ sec_sym_primary[sec_num] = i + 1u;
+ } else if (sclass == IMAGE_SYM_CLASS_SECTION) {
+ kind = SK_SECTION;
+ } else if (sclass == IMAGE_SYM_CLASS_LABEL) {
+ kind = SK_NOTYPE;
+ } else if ((type >> 8) == IMAGE_SYM_DTYPE_FUNCTION) {
+ kind = SK_FUNC;
+ } else if (type == IMAGE_SYM_TYPE_NULL) {
+ kind = (bind == SB_LOCAL) ? SK_NOTYPE : SK_OBJ;
+ } else {
+ kind = SK_OBJ;
+ }
+ } else {
+ compiler_panic(c, no_loc(),
+ "read_coff: symbol section number %d out of range",
+ (int)sec_num);
+ }
+
+ /* WEAK_EXTERNAL primary: aux record carries TagIndex + Characteristics.
+ * cfree's model has SB_WEAK; the fallback symbol is link-time
+ * resolution by name and we drop the explicit index. */
+ if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) bind = SB_WEAK;
+
+ Sym sn = nlen ? pool_intern(c->global, nm, nlen) : 0;
+ ObjSymId id = obj_symbol_ex(ob, sn, bind, vis, kind, target_sec, sym_value,
+ sym_size, cmnalign);
+ obj_sym_mark_referenced(ob, id);
+ sym_to_obj[i] = id;
+ i += 1u + naux;
+ }
+
+ /* ---- Step 4: stitch COMDAT groups from section-definition aux ----
+ * Each COMDAT section has a STATIC primary symbol (the section
+ * symbol) followed by one section-definition aux record. Selection
+ * != 0 marks the section as a COMDAT member; the signature symbol
+ * is the section symbol itself (Number field's selection variant
+ * controls dedup policy at link time). */
+ for (u32 s = 1; s <= nsections; ++s) {
+ u32 prim_plus1 = sec_sym_primary[s];
+ if (!prim_plus1) continue;
+ u32 prim = prim_plus1 - 1u;
+ const CSecRec* cs = &secs[s - 1];
+ if (!(cs->characteristics & IMAGE_SCN_LNK_COMDAT)) continue;
+ const u8* p = sym_base + (u64)prim * COFF_SYMBOL_SIZE;
+ u8 naux = p[17];
+ if (!naux) continue;
+ const u8* aux = p + COFF_SYMBOL_SIZE;
+ /* Aux layout: Length(4), NumberOfRelocations(2), NumberOfLinenumbers(2),
+ * CheckSum(4), Number(2), Selection(1), Unused(3). */
+ u16 assoc_number = coff_rd_u16(aux + 12);
+ u8 selection = aux[14];
+ if (selection == 0) continue;
+
+ ObjSymId sig = sym_to_obj[prim];
+ const ObjSym* sigsym = obj_symbol_get(ob, sig);
+ Sym gname = sigsym ? sigsym->name : 0;
+ ObjGroupId gid = obj_group(ob, gname, sig, (u32)selection);
+ obj_group_add_section(ob, gid, cs->obj_sec);
+ obj_section_set_group(ob, cs->obj_sec, gid);
+
+ /* ASSOCIATIVE: the COMDAT member is associated with another
+ * section's group. Add this section to that group's list too so
+ * dead-strip keeps them paired. */
+ if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && assoc_number >= 1 &&
+ (u32)assoc_number <= nsections) {
+ u32 other_prim_plus1 = sec_sym_primary[assoc_number];
+ if (other_prim_plus1) {
+ u32 other_prim = other_prim_plus1 - 1u;
+ const u8* op = sym_base + (u64)other_prim * COFF_SYMBOL_SIZE;
+ if (op[17]) {
+ const u8* oaux = op + COFF_SYMBOL_SIZE;
+ u8 osel = oaux[14];
+ if (osel != 0) {
+ ObjSymId osig = sym_to_obj[other_prim];
+ const ObjSym* osigsym = obj_symbol_get(ob, osig);
+ Sym ogname = osigsym ? osigsym->name : 0;
+ ObjGroupId ogid = obj_group(ob, ogname, osig, (u32)osel);
+ obj_group_add_section(ob, ogid, cs->obj_sec);
+ }
+ }
+ }
+ }
+ }
+
+ /* ---- Step 5: per-section relocations ---- */
+ for (u32 i = 0; i < nsections; ++i) {
+ const CSecRec* s = &secs[i];
+ if (!s->number_of_relocations) continue;
+ u64 reloc_end = (u64)s->pointer_to_relocations +
+ (u64)s->number_of_relocations * (u64)COFF_RELOC_SIZE;
+ if (reloc_end > (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: relocation table for section %u out of range",
+ i);
+ const u8* rbase = data + s->pointer_to_relocations;
+ for (u32 j = 0; j < s->number_of_relocations; ++j) {
+ const u8* rp = rbase + (u64)j * COFF_RELOC_SIZE;
+ u32 r_va = coff_rd_u32(rp + 0);
+ u32 r_sym = coff_rd_u32(rp + 4);
+ u16 r_type = coff_rd_u16(rp + 8);
+
+ u32 kind = reloc_from(r_type);
+ if (kind == (u32)-1)
+ compiler_panic(c, no_loc(),
+ "read_coff: unsupported reloc type %u for machine %#x",
+ (u32)r_type, (u32)machine);
+
+ ObjSymId target = OBJ_SYM_NONE;
+ if (r_sym < nsymbols) target = sym_to_obj[r_sym];
+
+ /* AMD64 REL32 encodings are relative to a PC after the relocated
+ * field, while cfree's R_PC32-style apply formula subtracts the
+ * relocation field address P. Plain REL32 is relative to P+4;
+ * REL32_N is relative to P+N. Record that convention as an
+ * implicit negative addend so link_reloc_apply can stay format
+ * neutral. */
+ /* ARM64 PAGEOFFSET_12L is one wire code for LDST{8,16,32,64,128}.
+ * The per-arch translator returns R_AARCH64_LDST64_ABS_LO12_NC by
+ * default; recover the actual access width from the patched LDR/
+ * STR instruction's size field at bits [31:30] (and a SIMD/FP
+ * extension via bit 26 + opc[23]) so the linker applies the right
+ * scale. Mismatch panics at apply-time with "misaligned
+ * address" otherwise — see link_reloc.c. */
+ if ((machine == IMAGE_FILE_MACHINE_ARM64 ||
+ machine == IMAGE_FILE_MACHINE_ARM64EC) &&
+ r_type == IMAGE_REL_ARM64_PAGEOFFSET_12L &&
+ s->size_of_raw_data && (u64)r_va + 4u <= (u64)s->size_of_raw_data) {
+ const u8* ibytes = data + s->pointer_to_raw_data + r_va;
+ u32 instr = (u32)ibytes[0] | ((u32)ibytes[1] << 8) |
+ ((u32)ibytes[2] << 16) | ((u32)ibytes[3] << 24);
+ u32 sz = (instr >> 30) & 0x3u;
+ int is_simd = (instr >> 26) & 0x1u;
+ if (is_simd && ((instr >> 23) & 0x1u)) {
+ kind = R_AARCH64_LDST128_ABS_LO12_NC;
+ } else {
+ switch (sz) {
+ case 0: kind = R_AARCH64_LDST8_ABS_LO12_NC; break;
+ case 1: kind = R_AARCH64_LDST16_ABS_LO12_NC; break;
+ case 2: kind = R_AARCH64_LDST32_ABS_LO12_NC; break;
+ default: kind = R_AARCH64_LDST64_ABS_LO12_NC; break;
+ }
+ }
+ }
+
+ i64 addend = 0;
+ int has_explicit = 0;
+ if (machine == IMAGE_FILE_MACHINE_AMD64) {
+ switch (r_type) {
+ case IMAGE_REL_AMD64_REL32:
+ addend = -4; has_explicit = 1; break;
+ case IMAGE_REL_AMD64_REL32_1:
+ addend = -1; has_explicit = 1; break;
+ case IMAGE_REL_AMD64_REL32_2:
+ addend = -2; has_explicit = 1; break;
+ case IMAGE_REL_AMD64_REL32_3:
+ addend = -3; has_explicit = 1; break;
+ case IMAGE_REL_AMD64_REL32_4:
+ addend = -4; has_explicit = 1; break;
+ case IMAGE_REL_AMD64_REL32_5:
+ addend = -5; has_explicit = 1; break;
+ default:
+ break;
+ }
+ }
+
+ obj_reloc_ex(ob, s->obj_sec, r_va, (RelocKind)kind, target, addend,
+ has_explicit, 0);
+ }
+ }
+
+ /* ---- Step 6: finalize and return ---- */
+ obj_finalize(ob);
+ return ob;
+}
diff --git a/src/obj/coff_read_dso.c b/src/obj/coff_read_dso.c
@@ -0,0 +1,235 @@
+/* PE32+ DLL reader. Peer of read_elf_dso / read_macho_dso: walks the
+ * IMAGE_DIRECTORY_ENTRY_EXPORT data directory of a Windows .dll and
+ * produces an ObjBuilder of defined OBJ_SEC_NONE symbols — one per
+ * name in the Export Name Table. The DLL's own Name string (the
+ * analogue of DT_SONAME / LC_ID_DYLIB) is returned via *soname_out.
+ *
+ * The produced ObjBuilder carries no sections, relocations, or groups
+ * — DSO inputs contribute no bytes to the link. The consumer's
+ * resolve_undefs pass sees the exports as defined globals and marks
+ * matching consumer-side undefs as `imported`; the import-table
+ * emitter (Phase 3 / 4.4) later groups them by providing DLL.
+ *
+ * Scope: PE32+ only (IMAGE_NT_OPTIONAL_HDR64_MAGIC), AMD64 or ARM64,
+ * with IMAGE_FILE_DLL set. Ordinal-only exports (entries present in
+ * the EAT but absent from the ENT) are not synthesized in v1 — almost
+ * all real-world imports are by name. Forwarder entries (EAT RVA
+ * falls within the export directory's own range) are still emitted as
+ * symbols so the linker can satisfy imports against them; the OS
+ * loader follows the forwarder chain at runtime. This contract is
+ * pinned by test/coff/pe-dso-forwarder.c. */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "obj/coff.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- RVA -> file offset ----
+ * Walks the section table once per call. Returns 1 on success and
+ * fills *off_out; returns 0 if the RVA falls outside every section's
+ * [VirtualAddress, VirtualAddress + max(VirtualSize, SizeOfRawData))
+ * range or the resulting file offset would exceed `len`. */
+static int rva_to_offset(const u8* shdrs, u16 nsec, u32 rva, size_t len,
+ u64* off_out) {
+ for (u16 i = 0; i < nsec; ++i) {
+ const u8* sh = shdrs + (u64)i * COFF_SECTION_HEADER_SIZE;
+ u32 vsize = coff_rd_u32(sh + 8);
+ u32 vaddr = coff_rd_u32(sh + 12);
+ u32 raw_size = coff_rd_u32(sh + 16);
+ u32 raw_ptr = coff_rd_u32(sh + 20);
+ /* Some linkers leave VirtualSize == 0 in objects; use raw_size as
+ * a fallback so we still resolve RVAs in well-formed images. */
+ u32 span = vsize ? vsize : raw_size;
+ if (rva >= vaddr && rva < vaddr + span) {
+ u64 delta = (u64)(rva - vaddr);
+ if (delta >= raw_size) return 0; /* RVA past on-disk data */
+ u64 off = (u64)raw_ptr + delta;
+ if (off >= len) return 0;
+ *off_out = off;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Read a NUL-terminated string starting at `off`, bounded by `len`.
+ * Returns the string length (excluding NUL); writes the pointer to
+ * *out. Returns 0 if off is out of range or the string is not
+ * terminated within the file. */
+static u32 read_cstr(const u8* data, size_t len, u64 off, const char** out) {
+ if (off >= len) { *out = ""; return 0; }
+ const char* s = (const char*)(data + off);
+ u64 max = (u64)len - off;
+ u64 n = 0;
+ while (n < max && s[n] != '\0') ++n;
+ if (n == max) { *out = ""; return 0; } /* unterminated */
+ *out = s;
+ return (u32)n;
+}
+
+ObjBuilder* read_coff_dso(Compiler* c, const char* name, const u8* data,
+ size_t len, Sym* soname_out) {
+ (void)name;
+ if (soname_out) *soname_out = 0;
+
+ /* ---- DOS header + PE signature ---- */
+ if (len < COFF_DOS_HEADER_SIZE)
+ compiler_panic(c, no_loc(), "read_coff_dso: input shorter than DOS header");
+ u16 e_magic = coff_rd_u16(data + 0);
+ if (e_magic != IMAGE_DOS_SIGNATURE)
+ compiler_panic(c, no_loc(), "read_coff_dso: bad DOS magic 0x%x", e_magic);
+ u32 e_lfanew = coff_rd_u32(data + 60);
+
+ u64 nt_end = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + COFF_OPT_HDR64_SIZE;
+ if (nt_end > len)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: PE headers extend past end of file");
+
+ u32 pe_sig = coff_rd_u32(data + e_lfanew);
+ if (pe_sig != IMAGE_NT_SIGNATURE)
+ compiler_panic(c, no_loc(), "read_coff_dso: bad PE signature 0x%x", pe_sig);
+
+ /* ---- IMAGE_FILE_HEADER ---- */
+ const u8* fh = data + e_lfanew + 4u;
+ u16 machine = coff_rd_u16(fh + 0);
+ u16 nsec = coff_rd_u16(fh + 2);
+ u16 size_of_opt = coff_rd_u16(fh + 16);
+ u16 chars = coff_rd_u16(fh + 18);
+
+ if (machine != IMAGE_FILE_MACHINE_AMD64 && machine != IMAGE_FILE_MACHINE_ARM64)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: unsupported machine 0x%x", machine);
+ if (!(chars & IMAGE_FILE_DLL))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: not a DLL (Characteristics=0x%x)", chars);
+ if (size_of_opt < COFF_OPT_HDR64_SIZE)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: SizeOfOptionalHeader %u too small for PE32+",
+ size_of_opt);
+
+ /* ---- IMAGE_OPTIONAL_HEADER64 ---- */
+ const u8* oh = fh + COFF_FILE_HEADER_SIZE;
+ u16 opt_magic = coff_rd_u16(oh + 0);
+ if (opt_magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: not PE32+ (optional header Magic=0x%x)",
+ opt_magic);
+
+ /* DataDirectory begins at offset 112 inside the PE32+ optional header
+ * (28 standard + 84 windows-specific + NumberOfRvaAndSizes = 112). */
+ const u8* data_dir = oh + COFF_OPT_HDR64_SIZE
+ - COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE;
+ u32 export_rva = coff_rd_u32(data_dir
+ + IMAGE_DIRECTORY_ENTRY_EXPORT
+ * COFF_DATA_DIRECTORY_SIZE);
+ u32 export_size = coff_rd_u32(data_dir
+ + IMAGE_DIRECTORY_ENTRY_EXPORT
+ * COFF_DATA_DIRECTORY_SIZE
+ + 4u);
+
+ /* ---- section table ---- */
+ u64 shdrs_off = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + size_of_opt;
+ u64 shdrs_end = shdrs_off + (u64)nsec * COFF_SECTION_HEADER_SIZE;
+ if (shdrs_end > len)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: section table extends past end of file");
+ const u8* shdrs = data + shdrs_off;
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_coff_dso: obj_new failed");
+
+ /* No export directory => empty DSO (legal for stub DLLs). */
+ if (export_size == 0 || export_rva == 0) {
+ obj_finalize(ob);
+ return ob;
+ }
+
+ u64 exp_off;
+ if (!rva_to_offset(shdrs, nsec, export_rva, len, &exp_off))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: export directory RVA 0x%x out of range",
+ export_rva);
+ if (exp_off + COFF_EXPORT_DIR_SIZE > len)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: export directory truncated");
+
+ const u8* ed = data + exp_off;
+ u32 name_rva = coff_rd_u32(ed + 12);
+ u32 num_funcs = coff_rd_u32(ed + 20);
+ u32 num_names = coff_rd_u32(ed + 24);
+ u32 eat_rva = coff_rd_u32(ed + 28);
+ u32 ent_rva = coff_rd_u32(ed + 32);
+ u32 ord_rva = coff_rd_u32(ed + 36);
+ /* Base (ed + 16) is the user-visible ordinal offset; the cfree linker
+ * matches imports by name, so we don't propagate it. */
+
+ /* ---- DLL name (soname) ---- */
+ if (name_rva) {
+ u64 name_off;
+ if (!rva_to_offset(shdrs, nsec, name_rva, len, &name_off))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: DLL name RVA 0x%x out of range",
+ name_rva);
+ const char* dll_name;
+ u32 nlen = read_cstr(data, len, name_off, &dll_name);
+ if (nlen && soname_out)
+ *soname_out = pool_intern(c->global, dll_name, nlen);
+ }
+
+ /* ---- resolve EAT / ENT / ordinal table once ---- */
+ u64 eat_off = 0, ent_off = 0, ord_off = 0;
+ if (num_names) {
+ if (!rva_to_offset(shdrs, nsec, eat_rva, len, &eat_off))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: EAT RVA 0x%x out of range", eat_rva);
+ if (!rva_to_offset(shdrs, nsec, ent_rva, len, &ent_off))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: ENT RVA 0x%x out of range", ent_rva);
+ if (!rva_to_offset(shdrs, nsec, ord_rva, len, &ord_off))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: ordinal table RVA 0x%x out of range",
+ ord_rva);
+ if (ent_off + (u64)num_names * 4u > len
+ || ord_off + (u64)num_names * 2u > len)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: ENT/ordinal table extends past file");
+ if (eat_off + (u64)num_funcs * 4u > len)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: EAT extends past file");
+ }
+
+ /* ---- walk the ENT ----
+ * Forwarders (EAT RVA inside [export_rva, export_rva + export_size))
+ * still produce a symbol: cfree's linker doesn't follow the chain,
+ * but the import needs to be satisfiable so the OS loader can. */
+ for (u32 i = 0; i < num_names; ++i) {
+ u32 nrva = coff_rd_u32(data + ent_off + (u64)i * 4u);
+ u16 ord = coff_rd_u16(data + ord_off + (u64)i * 2u);
+ if (ord >= num_funcs) continue; /* malformed; skip rather than panic */
+ /* func_rva is fetched for forwarder classification only; cfree does
+ * not consume the address itself (DSO symbols are OBJ_SEC_NONE). */
+ u32 func_rva = coff_rd_u32(data + eat_off + (u64)ord * 4u);
+ (void)func_rva; /* see comment above re: forwarders */
+
+ u64 name_off;
+ if (!rva_to_offset(shdrs, nsec, nrva, len, &name_off)) continue;
+ const char* nm;
+ u32 nlen = read_cstr(data, len, name_off, &nm);
+ if (!nlen) continue;
+
+ Sym sn = pool_intern(c->global, nm, nlen);
+ ObjSymId id = obj_symbol(ob, sn, SB_GLOBAL, SK_FUNC,
+ OBJ_SEC_NONE, 0, 0);
+ obj_sym_mark_referenced(ob, id);
+ }
+
+ obj_finalize(ob);
+ return ob;
+}
diff --git a/src/obj/coff_reloc_aarch64.c b/src/obj/coff_reloc_aarch64.c
@@ -0,0 +1,96 @@
+/* RelocKind <-> AArch64 PE/COFF reloc-type mapping. Mirror of
+ * elf_reloc_aarch64.c for PE/COFF.
+ *
+ * The ARM64 PE/COFF reloc set covers the common AArch64 patch sites:
+ * ADRP page-base / page-offset pairs, BRANCH26/19/14, ADDR32/64, plus
+ * the section-relative SECREL family which cfree does not model in v1.
+ * PAGEOFFSET_12L collapses all LDST*_ABS_LO12_NC widths into one wire
+ * code; the width is recoverable from the patched LDR/STR instruction
+ * encoding, so the reader picks the LDST64 form and the consumer can
+ * disambiguate later if it cares. ADDR32NB is image-relative; v1
+ * collapses it to R_ABS32 and lets layout subtract the image base. */
+
+#include "obj/coff.h"
+
+u32 coff_aarch64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return IMAGE_REL_ARM64_ABSOLUTE;
+ case R_ABS64:
+ return IMAGE_REL_ARM64_ADDR64;
+ case R_ABS32:
+ return IMAGE_REL_ARM64_ADDR32;
+ case R_AARCH64_CALL26:
+ case R_AARCH64_JUMP26:
+ return IMAGE_REL_ARM64_BRANCH26;
+ case R_AARCH64_CONDBR19:
+ return IMAGE_REL_ARM64_BRANCH19;
+ case R_AARCH64_TSTBR14:
+ return IMAGE_REL_ARM64_BRANCH14;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ return IMAGE_REL_ARM64_PAGEBASE_REL21;
+ case R_AARCH64_ADR_PREL_LO21:
+ return IMAGE_REL_ARM64_REL21;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ return IMAGE_REL_ARM64_PAGEOFFSET_12A;
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ return IMAGE_REL_ARM64_PAGEOFFSET_12L;
+ case R_PC32:
+ case R_REL32:
+ return IMAGE_REL_ARM64_REL32;
+ case R_COFF_SECREL:
+ return IMAGE_REL_ARM64_SECREL;
+ case R_COFF_SECTION:
+ return IMAGE_REL_ARM64_SECTION;
+ case R_COFF_AARCH64_SECREL_LOW12A:
+ return IMAGE_REL_ARM64_SECREL_LOW12A;
+ case R_COFF_AARCH64_SECREL_HIGH12A:
+ return IMAGE_REL_ARM64_SECREL_HIGH12A;
+ default:
+ return IMAGE_REL_ARM64_ABSOLUTE;
+ }
+}
+
+u32 coff_aarch64_reloc_from(u32 wire_type) {
+ switch (wire_type) {
+ case IMAGE_REL_ARM64_ABSOLUTE:
+ return R_NONE;
+ case IMAGE_REL_ARM64_ADDR64:
+ return R_ABS64;
+ case IMAGE_REL_ARM64_ADDR32:
+ return R_ABS32;
+ case IMAGE_REL_ARM64_ADDR32NB:
+ return R_ABS32;
+ case IMAGE_REL_ARM64_BRANCH26:
+ return R_AARCH64_CALL26;
+ case IMAGE_REL_ARM64_BRANCH19:
+ return R_AARCH64_CONDBR19;
+ case IMAGE_REL_ARM64_BRANCH14:
+ return R_AARCH64_TSTBR14;
+ case IMAGE_REL_ARM64_PAGEBASE_REL21:
+ return R_AARCH64_ADR_PREL_PG_HI21;
+ case IMAGE_REL_ARM64_REL21:
+ return R_AARCH64_ADR_PREL_LO21;
+ case IMAGE_REL_ARM64_PAGEOFFSET_12A:
+ return R_AARCH64_ADD_ABS_LO12_NC;
+ case IMAGE_REL_ARM64_PAGEOFFSET_12L:
+ return R_AARCH64_LDST64_ABS_LO12_NC;
+ case IMAGE_REL_ARM64_REL32:
+ return R_PC32;
+ case IMAGE_REL_ARM64_SECREL:
+ return R_COFF_SECREL;
+ case IMAGE_REL_ARM64_SECTION:
+ return R_COFF_SECTION;
+ case IMAGE_REL_ARM64_SECREL_LOW12A:
+ return R_COFF_AARCH64_SECREL_LOW12A;
+ case IMAGE_REL_ARM64_SECREL_HIGH12A:
+ return R_COFF_AARCH64_SECREL_HIGH12A;
+ default:
+ return (u32)-1; /* sentinel */
+ }
+}
diff --git a/src/obj/coff_reloc_x86_64.c b/src/obj/coff_reloc_x86_64.c
@@ -0,0 +1,76 @@
+/* RelocKind <-> x86_64 PE/COFF reloc-type mapping. Mirror of
+ * elf_reloc_x86_64.c for PE/COFF.
+ *
+ * PE/COFF's AMD64 reloc set is much narrower than ELF's: only ABSOLUTE,
+ * ADDR64, ADDR32, ADDR32NB, REL32 (with REL32_1..5 implicit-addend
+ * variants), plus a few section-relative forms cfree does not model in
+ * v1. We emit plain REL32 (4) for every PC-relative kind and let the
+ * explicit Reloc.addend ride in the patched bytes; on the read side
+ * REL32_1..5 collapse to R_PC32 (the reader applies the implicit
+ * addend itself). IMAGE_REL_AMD64_ABSOLUTE (== 0) doubles as the
+ * "unsupported" sentinel on the _to side, matching the ELF contract. */
+
+#include "obj/coff.h"
+
+u32 coff_x86_64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return IMAGE_REL_AMD64_ABSOLUTE;
+ case R_ABS64:
+ return IMAGE_REL_AMD64_ADDR64;
+ case R_ABS32:
+ return IMAGE_REL_AMD64_ADDR32;
+ case R_X64_32S:
+ return IMAGE_REL_AMD64_ADDR32NB;
+ case R_PC32:
+ case R_REL32:
+ case R_PLT32:
+ case R_X64_PLT32:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ return IMAGE_REL_AMD64_REL32;
+ case R_COFF_SECREL:
+ return IMAGE_REL_AMD64_SECREL;
+ case R_COFF_SECTION:
+ return IMAGE_REL_AMD64_SECTION;
+ default:
+ return IMAGE_REL_AMD64_ABSOLUTE;
+ }
+}
+
+u32 coff_x86_64_reloc_from(u32 wire_type) {
+ switch (wire_type) {
+ case IMAGE_REL_AMD64_ABSOLUTE:
+ return R_NONE;
+ case IMAGE_REL_AMD64_ADDR64:
+ return R_ABS64;
+ case IMAGE_REL_AMD64_ADDR32:
+ return R_ABS32;
+ case IMAGE_REL_AMD64_ADDR32NB:
+ return R_X64_32S;
+ case IMAGE_REL_AMD64_REL32:
+ case IMAGE_REL_AMD64_REL32_1:
+ case IMAGE_REL_AMD64_REL32_2:
+ case IMAGE_REL_AMD64_REL32_3:
+ case IMAGE_REL_AMD64_REL32_4:
+ case IMAGE_REL_AMD64_REL32_5:
+ return R_PC32;
+ case IMAGE_REL_AMD64_SECREL:
+ return R_COFF_SECREL;
+ case IMAGE_REL_AMD64_SECTION:
+ return R_COFF_SECTION;
+ /* SECREL7 (7-bit section-relative) appears in mingw-emitted archive
+ * members (intrinsic helpers, exception tables, DWARF). cfree
+ * doesn't currently apply or emit these, but panicking at read
+ * time would block ingesting any mingw archive whose non-import
+ * members carry .debug_info / .pdata. Map to R_NONE so the
+ * relocation slot is preserved structurally but treated as a
+ * no-op by the relocator; the member can still be dead-stripped
+ * when nothing references it. */
+ case IMAGE_REL_AMD64_SECREL7:
+ return R_NONE;
+ default:
+ return (u32)-1; /* sentinel */
+ }
+}
diff --git a/src/obj/obj.c b/src/obj/obj.c
@@ -33,6 +33,11 @@ struct CfreeObjBuilder {
* derives a sensible default by arch. */
u32 elf_e_flags;
u8 elf_e_flags_set;
+ /* COFF short-import annotation. Carries the providing DLL name when
+ * the builder was synthesized by read_coff from a Microsoft short
+ * import record; zero / unset otherwise. See obj_set_coff_import_dll. */
+ Sym coff_import_dll;
+ u8 coff_import_dll_set;
/* Cached undef extern `__tlv_bootstrap` (Mach-O on-disk name) used by
* obj_define_tls when emitting `_Thread_local` storage on Mach-O.
* Lazily materialized on the first TLV emission; OBJ_SYM_NONE otherwise. */
@@ -113,6 +118,18 @@ int obj_get_elf_e_flags(const ObjBuilder* ob, u32* out) {
return 1;
}
+void obj_set_coff_import_dll(ObjBuilder* ob, Sym dll_name) {
+ if (!ob) return;
+ ob->coff_import_dll = dll_name;
+ ob->coff_import_dll_set = 1;
+}
+
+int obj_get_coff_import_dll(const ObjBuilder* ob, Sym* out) {
+ if (!ob || !ob->coff_import_dll_set) return 0;
+ if (out) *out = ob->coff_import_dll;
+ return 1;
+}
+
/* ---- write side ---- */
ObjSecId obj_section(ObjBuilder* ob, Sym name, SecKind kind, u16 flags,
@@ -766,6 +783,10 @@ const char* reloc_kind_name(RelocKind k) {
_CASE(R_WASM_TABLEIDX);
_CASE(R_WASM_MEMOFS);
_CASE(R_WASM_TYPEIDX);
+ _CASE(R_COFF_SECREL);
+ _CASE(R_COFF_SECTION);
+ _CASE(R_COFF_AARCH64_SECREL_LOW12A);
+ _CASE(R_COFF_AARCH64_SECREL_HIGH12A);
#undef _CASE
}
return "UNKNOWN";
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -236,6 +236,24 @@ typedef enum RelocKind {
R_WASM_TABLEIDX,
R_WASM_MEMOFS,
R_WASM_TYPEIDX,
+ /* COFF/PE-only reloc kinds — section-relative fixups used by Windows
+ * TLS Local-Exec lowering and debug info. SECREL = 32-bit offset
+ * from the start of the containing section. SECTION = 16-bit section
+ * index (1-based). Both arch-independent on the cfree side; the
+ * per-arch translators map to IMAGE_REL_{AMD64,ARM64}_SECREL/SECTION. */
+ R_COFF_SECREL,
+ R_COFF_SECTION,
+ /* AArch64 Windows TLS access uses an ADD-imm12-pair to materialize a
+ * 24-bit SECREL value into a register:
+ * add xd, xd, #:secrel_hi12:sym, lsl #12 ; HIGH12A bits [23:12]
+ * add xd, xd, #:secrel_lo12:sym ; LOW12A bits [11:0]
+ * The instruction at the patch site already has sh=1 (HIGH) or sh=0
+ * (LOW) preset by the codegen; the linker only patches the imm12
+ * field at bits [21:10]. NC variants ("no carry / no overflow check"
+ * in PE terminology) mean the high bits of SECREL above 24 are
+ * discarded — fine for any .tls section under 16 MiB. */
+ R_COFF_AARCH64_SECREL_LOW12A,
+ R_COFF_AARCH64_SECREL_HIGH12A,
} RelocKind;
typedef struct Section {
@@ -466,6 +484,18 @@ void obj_sweep_dead(ObjBuilder*);
void obj_set_elf_e_flags(ObjBuilder*, u32 e_flags);
int obj_get_elf_e_flags(const ObjBuilder*, u32* out);
+/* COFF short-import shim annotation. Set by read_coff when the input
+ * is a Microsoft "short import" record (Sig1=0, Sig2=0xFFFF) found
+ * inside a .lib archive member: the ObjBuilder synthesizes the
+ * imported symbol(s) the long-form import object would have provided,
+ * and stores the providing DLL name here so the archive-ingestion
+ * layer (Phase 4.3) can reclassify the resulting LinkInput as a
+ * DSO with this name as the soname. Unset (returns 0 from the
+ * getter) on every other input. The setter records a presence bit
+ * the same way obj_set_elf_e_flags does. */
+void obj_set_coff_import_dll(ObjBuilder*, Sym dll_name);
+int obj_get_coff_import_dll(const ObjBuilder*, Sym* out);
+
/* Per-symbol format-specific flag bits. ObjSym.flags is otherwise
* unused; readers stash format-specific attribute bits there so the
* matching emitter can re-apply them. Today this is Mach-O n_desc
@@ -645,6 +675,18 @@ ObjBuilder* read_elf(Compiler*, const char* name, const u8* data, size_t len);
ObjBuilder* read_elf_dso(Compiler*, const char* name, const u8* data,
size_t len, Sym* soname_out);
ObjBuilder* read_coff(Compiler*, const char* name, const u8* data, size_t len);
+/* PE32+ DLL reader. Walks the IMAGE_DIRECTORY_ENTRY_EXPORT data
+ * directory and produces an ObjBuilder containing one defined symbol
+ * (OBJ_SEC_NONE, SB_GLOBAL, SK_FUNC) per name in the Export Name
+ * Table — the peer of read_elf_dso / read_macho_dso. The DLL's
+ * own Name string (the analogue of DT_SONAME / LC_ID_DYLIB) is
+ * interned and returned via *soname_out, or 0 if missing.
+ *
+ * Scope: PE32+ images with IMAGE_FILE_DLL set, machine AMD64 or
+ * ARM64. Ordinal-only exports (in the EAT but not the ENT) are not
+ * synthesized in v1 — almost all real-world imports are by name. */
+ObjBuilder* read_coff_dso(Compiler*, const char* name, const u8* data,
+ size_t len, Sym* soname_out);
ObjBuilder* read_macho(Compiler*, const char* name, const u8* data, size_t len);
/* Mach-O MH_DYLIB reader. Produces an ObjBuilder containing only the
* dylib's exported symbols (as defined OBJ_SEC_NONE entries — the
diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c
@@ -40,6 +40,10 @@ Sym obj_secname_init_array(Compiler* c) {
return pool_intern_cstr(c->global, ".init_array");
case CFREE_OBJ_MACHO:
return pool_intern_cstr(c->global, "__DATA,__mod_init_func");
+ case CFREE_OBJ_COFF:
+ /* CRT runtime scans `.CRT$X[A-Z]` for ctor/dtor tables; XCU is
+ * the user-constructor bucket. See doc/WINDOWS.md §1.6. */
+ return pool_intern_cstr(c->global, ".CRT$XCU");
default:
return secname_panic_unimpl(c, ".init_array");
}
@@ -51,6 +55,10 @@ Sym obj_secname_fini_array(Compiler* c) {
return pool_intern_cstr(c->global, ".fini_array");
case CFREE_OBJ_MACHO:
return pool_intern_cstr(c->global, "__DATA,__mod_term_func");
+ case CFREE_OBJ_COFF:
+ /* `.CRT$XPA`/`XPZ` are markers; XPU is the user-destructor
+ * bucket. See doc/WINDOWS.md §1.6. */
+ return pool_intern_cstr(c->global, ".CRT$XPU");
default:
return secname_panic_unimpl(c, ".fini_array");
}
@@ -67,6 +75,10 @@ Sym obj_secname_preinit_array(Compiler* c) {
* caller hitting this on a MACHO target is doing something the
* platform can't represent. */
return secname_panic_unimpl(c, ".preinit_array");
+ case CFREE_OBJ_COFF:
+ /* CRT's own setup runs in `.CRT$XI*`; user pre-init lives at
+ * XIA just after the CRT. See doc/WINDOWS.md §1.6. */
+ return pool_intern_cstr(c->global, ".CRT$XIA");
default:
return secname_panic_unimpl(c, ".preinit_array");
}
@@ -78,6 +90,10 @@ Sym obj_secname_tdata(Compiler* c) {
return pool_intern_cstr(c->global, ".tdata");
case CFREE_OBJ_MACHO:
return pool_intern_cstr(c->global, "__DATA,__thread_data");
+ case CFREE_OBJ_COFF:
+ /* MSVC `.tls$` convention; linker concatenates `.tls$*` sorted
+ * by suffix. See doc/WINDOWS.md §1.6. */
+ return pool_intern_cstr(c->global, ".tls$");
default:
return secname_panic_unimpl(c, ".tdata");
}
@@ -89,6 +105,10 @@ Sym obj_secname_tbss(Compiler* c) {
return pool_intern_cstr(c->global, ".tbss");
case CFREE_OBJ_MACHO:
return pool_intern_cstr(c->global, "__DATA,__thread_bss");
+ case CFREE_OBJ_COFF:
+ /* sorted-alphabetically-last so it falls at the tail of the TLS
+ * image's zero-fill region. See doc/WINDOWS.md §1.6. */
+ return pool_intern_cstr(c->global, ".tls$ZZZ");
default:
return secname_panic_unimpl(c, ".tbss");
}
@@ -169,5 +189,9 @@ void obj_format_demangle_c(const Compiler* c, const char** name,
* literal; caller interns. */
const char* obj_format_default_entry_name(const Compiler* c) {
if (c && c->target.obj == CFREE_OBJ_MACHO) return "_main";
+ /* COFF: PE/Windows CRT entry sets up argc/argv and calls main.
+ * Resolved against the user-supplied CRT archive (mingw's
+ * libmingwex.a). See doc/WINDOWS.md §1.6. */
+ if (c && c->target.obj == CFREE_OBJ_COFF) return "mainCRTStartup";
return "_start";
}
diff --git a/test/api/abi_classify_test.c b/test/api/abi_classify_test.c
@@ -59,6 +59,9 @@ static int g_fail;
/* Storage outlives every Compiler; cfree_compiler_new just stores `ctx`. */
static CfreeContext g_ctx;
+static void expect_direct_1x_int(const char* tag, const ABIArgInfo* ai,
+ u32 want_size);
+
static CfreeCompiler* new_compiler(CfreeArchKind arch, CfreeOSKind os,
CfreeObjFmt obj) {
CfreeTarget t;
@@ -98,7 +101,7 @@ static const ABIFuncInfo* classify_fn(CfreeCompiler* c, CfreeCgTypeId ret_ty,
static const char* arch_name(CfreeArchKind a) {
switch (a) {
- case CFREE_ARCH_X86_64: return "sysv-x64";
+ case CFREE_ARCH_X86_64: return "x64";
case CFREE_ARCH_ARM_64: return "aarch64";
case CFREE_ARCH_RV64: return "rv64";
default: return "?";
@@ -106,9 +109,10 @@ static const char* arch_name(CfreeArchKind a) {
}
static const char* os_name(CfreeOSKind o) {
switch (o) {
- case CFREE_OS_LINUX: return "linux";
- case CFREE_OS_MACOS: return "macos";
- default: return "?";
+ case CFREE_OS_LINUX: return "linux";
+ case CFREE_OS_MACOS: return "macos";
+ case CFREE_OS_WINDOWS: return "windows";
+ default: return "?";
}
}
@@ -216,7 +220,17 @@ static void check_target(CfreeArchKind arch, CfreeOSKind os, CfreeObjFmt obj) {
{
const ABIFuncInfo* fi = classify_fn(c, f128_ty, f128_ty);
snprintf(tag, sizeof tag, "%s/%s f128 arg", arch_name(arch), os_name(os));
- if (arch == CFREE_ARCH_X86_64) {
+ if (arch == CFREE_ARCH_X86_64 && os == CFREE_OS_WINDOWS) {
+ /* Win64: long double is 64-bit double. Front end normally lowers
+ * f128 before classification; defensive path treats size-16 FP as
+ * a size-8 double — DIRECT/1 FP part of 8B for both arg and ret. */
+ expect_direct_1x_fp(tag, &fi->params[0], 8);
+ snprintf(tag, sizeof tag, "%s/%s f128 ret", arch_name(arch),
+ os_name(os));
+ expect_direct_1x_fp(tag, &fi->ret, 8);
+ EXPECT(fi->has_sret == 0, "%s/%s: f128 should not set has_sret",
+ arch_name(arch), os_name(os));
+ } else if (arch == CFREE_ARCH_X86_64) {
/* SysV-x64: long double is x87 (80-bit padded to 16B). cfree lacks
* x87 support; classify as INDIRECT (memory) so it routes through
* a stack image consistent with the wide16 CG-layer shortcut. */
@@ -257,41 +271,321 @@ static void check_target(CfreeArchKind arch, CfreeOSKind os, CfreeObjFmt obj) {
const ABIFuncInfo* fi = classify_fn(c, f64_i64, f64_i64);
snprintf(tag, sizeof tag, "%s/%s {double,long} arg",
arch_name(arch), os_name(os));
- expect_direct_2(tag, &fi->params[0], ABI_CLASS_FP, ABI_CLASS_INT, 8, 8);
+ if (os == CFREE_OS_WINDOWS) {
+ expect_indirect(tag, &fi->params[0], /*is_return=*/0);
+ } else {
+ expect_direct_2(tag, &fi->params[0], ABI_CLASS_FP, ABI_CLASS_INT, 8,
+ 8);
+ }
snprintf(tag, sizeof tag, "%s/%s {double,long} ret",
arch_name(arch), os_name(os));
- expect_direct_2(tag, &fi->ret, ABI_CLASS_FP, ABI_CLASS_INT, 8, 8);
- EXPECT(fi->has_sret == 0, "%s/%s: mixed record should not use sret",
- arch_name(arch), os_name(os));
+ if (os == CFREE_OS_WINDOWS) {
+ expect_indirect(tag, &fi->ret, /*is_return=*/1);
+ EXPECT(fi->has_sret == 1, "%s/%s: mixed record should use sret",
+ arch_name(arch), os_name(os));
+ } else {
+ expect_direct_2(tag, &fi->ret, ABI_CLASS_FP, ABI_CLASS_INT, 8, 8);
+ EXPECT(fi->has_sret == 0, "%s/%s: mixed record should not use sret",
+ arch_name(arch), os_name(os));
+ }
}
{
const ABIFuncInfo* fi = classify_fn(c, i64_f64, i64_f64);
snprintf(tag, sizeof tag, "%s/%s {long,double} arg",
arch_name(arch), os_name(os));
- expect_direct_2(tag, &fi->params[0], ABI_CLASS_INT, ABI_CLASS_FP, 8, 8);
+ if (os == CFREE_OS_WINDOWS) {
+ expect_indirect(tag, &fi->params[0], /*is_return=*/0);
+ } else {
+ expect_direct_2(tag, &fi->params[0], ABI_CLASS_INT, ABI_CLASS_FP, 8,
+ 8);
+ }
snprintf(tag, sizeof tag, "%s/%s {long,double} ret",
arch_name(arch), os_name(os));
- expect_direct_2(tag, &fi->ret, ABI_CLASS_INT, ABI_CLASS_FP, 8, 8);
+ if (os == CFREE_OS_WINDOWS) {
+ expect_indirect(tag, &fi->ret, /*is_return=*/1);
+ } else {
+ expect_direct_2(tag, &fi->ret, ABI_CLASS_INT, ABI_CLASS_FP, 8, 8);
+ }
}
{
const ABIFuncInfo* fi = classify_fn(c, f32x2, f32x2);
snprintf(tag, sizeof tag, "%s/%s {float,float} arg",
arch_name(arch), os_name(os));
- expect_direct_1x_fp(tag, &fi->params[0], 8);
+ if (os == CFREE_OS_WINDOWS)
+ expect_direct_1x_int(tag, &fi->params[0], 8);
+ else
+ expect_direct_1x_fp(tag, &fi->params[0], 8);
snprintf(tag, sizeof tag, "%s/%s {float,float} ret",
arch_name(arch), os_name(os));
- expect_direct_1x_fp(tag, &fi->ret, 8);
+ if (os == CFREE_OS_WINDOWS)
+ expect_direct_1x_int(tag, &fi->ret, 8);
+ else
+ expect_direct_1x_fp(tag, &fi->ret, 8);
}
}
cfree_compiler_free(c);
}
+/* Build a record with N i8 fields (so size == N and align == 1). */
+static CfreeCgTypeId make_i8_record(CfreeCompiler* c, const char* tag_name,
+ u32 nfields) {
+ CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c);
+ CfreeCgTypeId i8 = bi.id[CFREE_CG_BUILTIN_I8];
+ CfreeCgField fields[16];
+ static const char* const names[16] = {"f0", "f1", "f2", "f3", "f4", "f5",
+ "f6", "f7", "f8", "f9", "fa", "fb",
+ "fc", "fd", "fe", "ff"};
+ if (nfields > 16) exit(2);
+ memset(fields, 0, sizeof fields);
+ for (u32 i = 0; i < nfields; ++i) {
+ fields[i].name = cfree_sym_intern(c, names[i]);
+ fields[i].type = i8;
+ }
+ return cfree_cg_type_record(c, cfree_sym_intern(c, tag_name), fields,
+ nfields);
+}
+
+/* Build a record { i64 a; i64 b; } — size 16, align 8. */
+static CfreeCgTypeId make_two_i64_record(CfreeCompiler* c, const char* tag_n) {
+ CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c);
+ CfreeCgTypeId i64 = bi.id[CFREE_CG_BUILTIN_I64];
+ CfreeCgField fields[2];
+ memset(fields, 0, sizeof fields);
+ fields[0].name = cfree_sym_intern(c, "a");
+ fields[0].type = i64;
+ fields[1].name = cfree_sym_intern(c, "b");
+ fields[1].type = i64;
+ return cfree_cg_type_record(c, cfree_sym_intern(c, tag_n), fields, 2);
+}
+
+/* Classify a function `ret_ty fn(p0, p1, ..., pN-1)` and return its info. */
+static const ABIFuncInfo* classify_fn_n(CfreeCompiler* c, CfreeCgTypeId ret_ty,
+ const CfreeCgTypeId* arg_types,
+ u32 nargs, int variadic) {
+ CfreeCgFuncParam params[8];
+ CfreeCgFuncSig sig;
+ CfreeCgTypeId fn;
+ if (nargs > 8) exit(2);
+ memset(params, 0, sizeof params);
+ for (u32 i = 0; i < nargs; ++i) params[i].type = arg_types[i];
+ memset(&sig, 0, sizeof sig);
+ sig.ret = ret_ty;
+ sig.params = params;
+ sig.nparams = nargs;
+ sig.abi_variadic = variadic ? true : false;
+ fn = cfree_cg_type_func(c, sig);
+ return abi_cg_func_info(((Compiler*)c)->abi, fn);
+}
+
+/* Expect INDIRECT (memory image) with a specific indirect alignment.
+ * Win64 preserves the source type's natural alignment in the byval/sret
+ * copy — for a 3-byte i8 aggregate that's 1, not 8. */
+static void expect_indirect_align(const char* tag, const ABIArgInfo* ai,
+ int is_return, u32 want_align) {
+ EXPECT(ai->kind == ABI_ARG_INDIRECT, "%s: kind=%d want INDIRECT", tag,
+ (int)ai->kind);
+ EXPECT(ai->nparts == 0, "%s: nparts=%u want 0", tag, (unsigned)ai->nparts);
+ EXPECT(ai->indirect_align == want_align,
+ "%s: indirect_align=%u want %u", tag, (unsigned)ai->indirect_align,
+ want_align);
+ u32 expected_flag = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
+ EXPECT((ai->flags & expected_flag) != 0,
+ "%s: flags=0x%x missing %s", tag, (unsigned)ai->flags,
+ is_return ? "SRET" : "BYVAL");
+}
+
+/* Expect DIRECT with a single INT part of the given size. */
+static void expect_direct_1x_int(const char* tag, const ABIArgInfo* ai,
+ u32 want_size) {
+ EXPECT(ai->kind == ABI_ARG_DIRECT, "%s: kind=%d want DIRECT", tag,
+ (int)ai->kind);
+ EXPECT(ai->nparts == 1, "%s: nparts=%u want 1", tag, (unsigned)ai->nparts);
+ if (ai->nparts != 1 || !ai->parts) return;
+ EXPECT(ai->parts[0].cls == ABI_CLASS_INT,
+ "%s: parts[0].cls=%d want INT", tag, (int)ai->parts[0].cls);
+ EXPECT(ai->parts[0].size == want_size,
+ "%s: parts[0].size=%u want %u", tag, (unsigned)ai->parts[0].size,
+ want_size);
+}
+
+/* Win64-specific ABI shape assertions: aggregate rules ({1,2,4,8} by value
+ * else hidden pointer), va_list = void*, variadic flag wiring, and that
+ * each scalar arg gets one part of the right class (reg-vs-stack
+ * placement is codegen, not classifier output). */
+static void test_win64_specifics(void) {
+ CfreeCompiler* c =
+ new_compiler(CFREE_ARCH_X86_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF);
+ CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c);
+ CfreeCgTypeId i32 = bi.id[CFREE_CG_BUILTIN_I32];
+ CfreeCgTypeId f64 = bi.id[CFREE_CG_BUILTIN_F64];
+ CfreeCgTypeId voidp = cfree_cg_type_ptr(c, bi.id[CFREE_CG_BUILTIN_VOID], 0);
+ CfreeCgTypeId rec1 = make_i8_record(c, "S1", 1);
+ CfreeCgTypeId rec3 = make_i8_record(c, "S3", 3);
+ CfreeCgTypeId rec16 = make_two_i64_record(c, "S16");
+
+ /* Case 1: int main(void) — DIRECT/1 INT/4 ret, no params. */
+ {
+ const ABIFuncInfo* fi = classify_fn_n(c, i32, NULL, 0, 0);
+ expect_direct_1x_int("win64 main ret", &fi->ret, 4);
+ EXPECT(fi->nparams == 0, "win64 main: nparams=%u want 0",
+ (unsigned)fi->nparams);
+ EXPECT(fi->has_sret == 0, "win64 main: has_sret set");
+ EXPECT(fi->variadic == 0, "win64 main: variadic set");
+ }
+
+ /* Case 2: void f(int,int,int,int,int) — 5 ints, each DIRECT/1 INT/4.
+ * Reg vs stack placement (4 reg slots) is a codegen concern; the
+ * classifier emits per-arg parts regardless. */
+ {
+ CfreeCgTypeId args[5] = {i32, i32, i32, i32, i32};
+ const ABIFuncInfo* fi = classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID],
+ args, 5, 0);
+ EXPECT(fi->nparams == 5, "win64 5xint: nparams=%u want 5",
+ (unsigned)fi->nparams);
+ for (u32 i = 0; i < 5; ++i) {
+ char t[64];
+ snprintf(t, sizeof t, "win64 5xint arg[%u]", i);
+ expect_direct_1x_int(t, &fi->params[i], 4);
+ }
+ }
+
+ /* Case 3: void f(double,double,double,double,double) — 5 doubles. */
+ {
+ CfreeCgTypeId args[5] = {f64, f64, f64, f64, f64};
+ const ABIFuncInfo* fi = classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID],
+ args, 5, 0);
+ EXPECT(fi->nparams == 5, "win64 5xfp: nparams=%u want 5",
+ (unsigned)fi->nparams);
+ for (u32 i = 0; i < 5; ++i) {
+ char t[64];
+ snprintf(t, sizeof t, "win64 5xfp arg[%u]", i);
+ expect_direct_1x_fp(t, &fi->params[i], 8);
+ }
+ }
+
+ /* Case 4: void f(int,double,int,double) — slot-shared on Win64.
+ * The classifier just emits per-arg parts of the right class; slot
+ * sharing is a codegen call-site concern. */
+ {
+ CfreeCgTypeId args[4] = {i32, f64, i32, f64};
+ const ABIFuncInfo* fi = classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID],
+ args, 4, 0);
+ EXPECT(fi->nparams == 4, "win64 mix: nparams=%u want 4",
+ (unsigned)fi->nparams);
+ expect_direct_1x_int("win64 mix arg[0]", &fi->params[0], 4);
+ expect_direct_1x_fp("win64 mix arg[1]", &fi->params[1], 8);
+ expect_direct_1x_int("win64 mix arg[2]", &fi->params[2], 4);
+ expect_direct_1x_fp("win64 mix arg[3]", &fi->params[3], 8);
+ }
+
+ /* Case 5: struct{char a;} foo(void) — size 1, DIRECT/1 INT/1. */
+ {
+ const ABIFuncInfo* fi = classify_fn_n(c, rec1, NULL, 0, 0);
+ expect_direct_1x_int("win64 ret S1", &fi->ret, 1);
+ EXPECT(fi->has_sret == 0, "win64 S1 ret: has_sret set");
+ }
+
+ /* Case 6: struct{long a; long b;} foo(void) — size 16, INDIRECT/sret. */
+ {
+ const ABIFuncInfo* fi = classify_fn_n(c, rec16, NULL, 0, 0);
+ expect_indirect("win64 ret S16", &fi->ret, /*is_return=*/1);
+ EXPECT(fi->has_sret == 1, "win64 S16 ret: has_sret not set");
+ }
+
+ /* Case 7: struct{char,char,char} foo(void) — size 3, INDIRECT/sret on
+ * Win64 (only {1,2,4,8} pass by value). Natural align of the 3-byte
+ * i8 aggregate is 1, which Win64 preserves in indirect_align. */
+ {
+ const ABIFuncInfo* fi = classify_fn_n(c, rec3, NULL, 0, 0);
+ expect_indirect_align("win64 ret S3", &fi->ret, /*is_return=*/1,
+ /*want_align=*/1);
+ EXPECT(fi->has_sret == 1, "win64 S3 ret: has_sret not set");
+ }
+
+ /* Case 8: void f(struct{char,char,char}) — by-value 3-byte aggregate
+ * goes by hidden pointer (BYVAL) on Win64. */
+ {
+ CfreeCgTypeId args[1] = {rec3};
+ const ABIFuncInfo* fi = classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID],
+ args, 1, 0);
+ EXPECT(fi->nparams == 1, "win64 S3 arg: nparams=%u want 1",
+ (unsigned)fi->nparams);
+ expect_indirect_align("win64 S3 arg", &fi->params[0], /*is_return=*/0,
+ /*want_align=*/1);
+ }
+
+ /* Case 9: int printf(const char*, ...) — variadic flag set. */
+ {
+ CfreeCgTypeId args[1] = {voidp};
+ const ABIFuncInfo* fi = classify_fn_n(c, i32, args, 1, /*variadic=*/1);
+ EXPECT(fi->variadic == 1, "win64 printf: variadic=%u want 1",
+ (unsigned)fi->variadic);
+ EXPECT(fi->vararg_on_stack == 0,
+ "win64 printf: vararg_on_stack=%u want 0",
+ (unsigned)fi->vararg_on_stack);
+ expect_direct_1x_int("win64 printf ret", &fi->ret, 4);
+ }
+
+ /* Case 10: va_list info — Win64 has va_list = void* (8/8/PTR). */
+ {
+ ABITypeInfo vi = abi_va_list_info(((Compiler*)c)->abi);
+ EXPECT(vi.size == 8, "win64 va_list size=%u want 8", (unsigned)vi.size);
+ EXPECT(vi.align == 8, "win64 va_list align=%u want 8",
+ (unsigned)vi.align);
+ EXPECT(vi.scalar_kind == ABI_SC_PTR,
+ "win64 va_list scalar_kind=%u want ABI_SC_PTR (%u)",
+ (unsigned)vi.scalar_kind, (unsigned)ABI_SC_PTR);
+ }
+
+ cfree_compiler_free(c);
+}
+
+/* AArch64-Windows mostly starts from AAPCS64. Deltas: va_list is `void*`,
+ * and FP parameters to variadic functions are routed through integer slots. */
+static void test_aarch64_windows_variadic(void) {
+ CfreeCompiler* c =
+ new_compiler(CFREE_ARCH_ARM_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF);
+ CfreeCgBuiltinTypes bi = cfree_cg_builtin_types(c);
+ CfreeCgTypeId f64 = bi.id[CFREE_CG_BUILTIN_F64];
+ CfreeCgTypeId args[1] = {f64};
+
+ ABITypeInfo vi = abi_va_list_info(((Compiler*)c)->abi);
+ EXPECT(vi.size == 8, "aarch64/windows va_list size=%u want 8",
+ (unsigned)vi.size);
+ EXPECT(vi.align == 8, "aarch64/windows va_list align=%u want 8",
+ (unsigned)vi.align);
+ EXPECT(vi.scalar_kind == ABI_SC_PTR,
+ "aarch64/windows va_list scalar_kind=%u want ABI_SC_PTR (%u)",
+ (unsigned)vi.scalar_kind, (unsigned)ABI_SC_PTR);
+
+ {
+ const ABIFuncInfo* fi =
+ classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID], args, 1, 0);
+ expect_direct_1x_fp("aarch64/windows nonvariadic double arg",
+ &fi->params[0], 8);
+ }
+ {
+ const ABIFuncInfo* fi =
+ classify_fn_n(c, bi.id[CFREE_CG_BUILTIN_VOID], args, 1, 1);
+ expect_direct_1x_int("aarch64/windows variadic double arg",
+ &fi->params[0], 8);
+ EXPECT(fi->vararg_on_stack == 0,
+ "aarch64/windows variadic: vararg_on_stack=%u want 0",
+ (unsigned)fi->vararg_on_stack);
+ }
+ cfree_compiler_free(c);
+}
+
int main(void) {
check_target(CFREE_ARCH_X86_64, CFREE_OS_LINUX, CFREE_OBJ_ELF);
check_target(CFREE_ARCH_ARM_64, CFREE_OS_LINUX, CFREE_OBJ_ELF);
check_target(CFREE_ARCH_ARM_64, CFREE_OS_MACOS, CFREE_OBJ_MACHO);
check_target(CFREE_ARCH_RV64, CFREE_OS_LINUX, CFREE_OBJ_ELF);
+ check_target(CFREE_ARCH_X86_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF);
+ check_target(CFREE_ARCH_ARM_64, CFREE_OS_WINDOWS, CFREE_OBJ_COFF);
+ test_win64_specifics();
+ test_aarch64_windows_variadic();
if (g_fail) {
fprintf(stderr, "%d failures\n", g_fail);
return 1;
diff --git a/test/coff/CORPUS.md b/test/coff/CORPUS.md
@@ -0,0 +1,199 @@
+# PE/COFF Test Corpus — Target Coverage
+
+What the `test/coff/` corpus should cover for full PE/COFF object-file
+support, independent of cfree's current implementation state. Mirrors
+the section layout of `test/elf/CORPUS.md`.
+
+Conventions:
+- **U** = unit (hand-built `ObjBuilder` round-trip; what this harness
+ ships today)
+- **C** = `cases/` (`x86_64-w64-mingw32-gcc -c` / `aarch64-w64-mingw32-gcc -c`
+ fixtures — deferred, no mingw toolchain wired in yet)
+- **E** = `exec/` (`link_emit_coff` + Wine — deferred until Phase 3
+ of `doc/WINDOWS.md` lands)
+
+The driver lives in `cfree-roundtrip-coff.c`. Each U case is a
+self-contained `static void test_*(void)` that builds an
+`ObjBuilder`, emits to a `cfree_writer_mem`, reads back via
+`read_coff`, asserts shape equivalence, then re-emits and asserts
+byte equality between the two emits.
+
+---
+
+## 1. File header / target identification
+
+| Case | Layer | Shape |
+|---|---|---|
+| `IMAGE_FILE_MACHINE_AMD64` round-trip | **U** | minimal `.text` with two bytes, no symbols (`test_header_minimal_x64`) |
+| `IMAGE_FILE_MACHINE_ARM64` round-trip | **U** | minimal `.text` with `ret`, no symbols (`test_header_minimal_aa64`) |
+| Reproducible TimeDateStamp == 0 | **U** | implicitly checked by byte-stable round-trip |
+| Per-arch reloc machine dispatch | **U** | covered by reloc tests below |
+| `SizeOfOptionalHeader == 0` for `.obj` | **U** | implicitly: every U case is a `.obj`, not a PE image |
+
+## 2. Section types
+
+| Case | Layer | Shape |
+|---|---|---|
+| `.text` (`IMAGE_SCN_CNT_CODE \| MEM_EXECUTE \| MEM_READ`) | **U** | `test_text_only_x64` / `test_text_only_aa64` |
+| `.rdata` (`CNT_INITIALIZED_DATA \| MEM_READ`) | **U** | `test_rodata` |
+| `.data` (`CNT_INITIALIZED_DATA \| MEM_READ \| MEM_WRITE`) | **U** | `test_data_with_reloc_abs64_x64` |
+| `.bss` (`CNT_UNINITIALIZED_DATA`) | **U** | `test_bss` |
+| `.tls$` (TLS template section, name-detected) | **U** | `test_tls_section` |
+| `.debug_*` (DWARF passthrough) | C | deferred |
+| `.CRT$X[CIP]*` (init/fini) | C | deferred |
+| `.xdata` / `.pdata` (SEH unwind) | C | deferred — `doc/WINDOWS.md` §3.5 |
+| Multiple text sections (`.text$mn`, etc.) | **U** | covered via `test_comdat_group` |
+
+## 3. Section characteristics flags
+
+| Flag | Coverage |
+|---|---|
+| `CNT_CODE` / `INITIALIZED_DATA` / `UNINITIALIZED_DATA` | U — kind matrix above |
+| `MEM_EXECUTE` / `MEM_READ` / `MEM_WRITE` | U — kind matrix above |
+| `IMAGE_SCN_LNK_COMDAT` | **U** — `test_comdat_group` |
+| `IMAGE_SCN_ALIGN_*` nibble (1, 4, 8, 16, 4096) | **U** — `test_align_nibble` |
+| `LNK_INFO` / `LNK_REMOVE` / `MEM_DISCARDABLE` | C — preserved via `OBJ_EXT_COFF`, not yet exercised by a U case |
+
+## 4. Symbol coverage
+
+**Storage classes:** `EXTERNAL`, `STATIC`, `WEAK_EXTERNAL`, `FILE`,
+`SECTION` (synthesized).
+
+**Section number specials:** ordinary 1-based index, `UNDEFINED` (0),
+`ABSOLUTE` (-1), `DEBUG` (-2).
+
+| Case | Layer | Shape |
+|---|---|---|
+| Plain global function (`EXTERNAL`, `SK_FUNC`) | **U** | `test_text_only_x64` |
+| Static (file-local, `STATIC`, `SB_LOCAL`) | **U** | `test_static_local_symbol` |
+| Common (`UNDEFINED` + Value>0) | **U** | `test_common_symbol` |
+| Weak external (`WEAK_EXTERNAL` + aux) | **U** | `test_weak_global` |
+| Section symbol synthesis (`SK_SECTION` round-trip) | **U** | `test_section_symbol_synthesis` |
+| Long symbol name (>8 chars; strtab spillover) | **U** | `test_long_symbol_name` |
+| Long section name (`/N` form) | **U** | `test_long_section_name` |
+| File symbol (`.file` + aux records) | C | deferred (cfree's emit_coff handles it; no U case yet) |
+| Hidden / protected visibility | n/a | COFF has no visibility model |
+
+## 5. Relocation coverage
+
+### x86_64 (`IMAGE_REL_AMD64_*`)
+
+| Wire kind | cfree `RelocKind` | Layer | Shape |
+|---|---|---|---|
+| `ABSOLUTE` (0) | `R_NONE` | implicit | — |
+| `ADDR64` (1) | `R_ABS64` | **U** | `test_data_with_reloc_abs64_x64` |
+| `ADDR32` (2) | `R_ABS32` | **U** | covered alongside REL32 (same harness) |
+| `ADDR32NB` (3) | `R_X64_32S` | C | not yet exercised |
+| `REL32` (4) | `R_PC32` / `R_REL32` / `R_PLT32` / `R_X64_GOTPCREL*` | **U** | `test_data_with_reloc_rel32_x64` |
+| `REL32_1..5` (5..9) | `R_PC32` + explicit addend on read | C | reader-only path; no U yet |
+| `SECREL` / `SECTION` | (not modeled in v1) | — | deferred — `doc/WINDOWS.md` §3.1 |
+
+### aarch64 (`IMAGE_REL_ARM64_*`)
+
+| Wire kind | cfree `RelocKind` | Layer | Shape |
+|---|---|---|---|
+| `ABSOLUTE` (0) | `R_NONE` | implicit | — |
+| `ADDR32` (1) | `R_ABS32` | C | not yet exercised |
+| `ADDR32NB` (2) | `R_ABS32` | C | not yet exercised |
+| `BRANCH26` (3) | `R_AARCH64_CALL26` / `R_AARCH64_JUMP26` | **U** | `test_aa64_branch26` |
+| `PAGEBASE_REL21` (4) | `R_AARCH64_ADR_PREL_PG_HI21` | **U** | `test_aa64_pagebase_pageoffset` |
+| `REL21` (5) | `R_AARCH64_ADR_PREL_LO21` | C | not yet exercised |
+| `PAGEOFFSET_12A` (6) | `R_AARCH64_ADD_ABS_LO12_NC` | **U** | `test_aa64_pagebase_pageoffset` |
+| `PAGEOFFSET_12L` (7) | `R_AARCH64_LDST64_ABS_LO12_NC` | C | not yet exercised |
+| `BRANCH19` (15) | `R_AARCH64_CONDBR19` | C | not yet exercised |
+| `BRANCH14` (16) | `R_AARCH64_TSTBR14` | C | not yet exercised |
+| `ADDR64` (14) | `R_ABS64` | **U** | `test_data_with_reloc_abs64_aa64` |
+| `SECREL` family | (not modeled in v1) | — | deferred |
+
+## 6. COMDAT / groups
+
+| Case | Layer | Shape |
+|---|---|---|
+| COMDAT group with `SELECT_ANY` | **U** | `test_comdat_group` |
+| `SELECT_NODUPLICATES` | C | not yet exercised |
+| `SELECT_SAME_SIZE` / `EXACT_MATCH` | C | not yet exercised |
+| `SELECT_ASSOCIATIVE` (paired sections) | C | reader handles; no U yet |
+| `SELECT_LARGEST` / `NEWEST` | C | not yet exercised |
+
+## 7. TLS / special sections
+
+| Case | Layer | Shape |
+|---|---|---|
+| `.tls$` data section | **U** | `test_tls_section` |
+| `.tls$ZZZ` BSS-tail | C | — |
+| `_tls_index` / `_tls_used` directory | E | Phase 3 |
+| `.CRT$XCU` constructors | C | deferred |
+
+## 8. Layout / structure edges
+
+| Case | Layer | Shape |
+|---|---|---|
+| Empty `.obj` (no sections, no symbols) | **U** | `test_empty_obj` |
+| Long section name (`/<decimal>` form) | **U** | `test_long_section_name` |
+| Long symbol name (LongName form) | **U** | `test_long_symbol_name` |
+| Section alignment 1 / 4 / 8 / 16 / 4096 | **U** | `test_align_nibble` |
+| > 65535 relocations in one section | n/a | emitter panics; not legal in v1 |
+
+## 9. Negative inputs (`bad/`)
+
+Deferred — no `bad/` corpus in Phase 1. Layer E will cover:
+
+- Truncated file header (< 20 bytes)
+- Non-zero `SizeOfOptionalHeader` (i.e. PE image fed to `.obj`
+ reader)
+- Unsupported `Machine` (e.g. `IMAGE_FILE_MACHINE_I386`)
+- `PointerToRawData + SizeOfRawData > file_size`
+- `PointerToSymbolTable + NumberOfSymbols * 18` overflows
+- Strtab size field < 4 / strtab body extending past file
+- Reloc `SymbolTableIndex` past symbol table
+- COMDAT aux with `Selection == ASSOCIATIVE` and `Number` out of
+ range
+
+## 10. Known limitations (round-trip asymmetries)
+
+1. **Section-definition aux records.** `emit_coff` always emits a
+ STATIC section symbol + section-definition aux for every kept
+ section, even if the input `ObjBuilder` did not name one. The
+ reader maps those aux records onto `SK_SECTION` symbols. After
+ one round-trip the readback carries an `SK_SECTION` symbol per
+ section; the second emit reproduces the exact same wire bytes
+ (byte-stable from step 2 onward).
+
+2. **Symbol ordering.** Section symbols come first (one per kept
+ section), then `.file` symbols (if any), then user-defined
+ symbols in iteration order. A user-supplied `ObjBuilder` that
+ mints user symbols before section symbols still round-trips,
+ but the symbol-table index ordering differs after the first
+ emit. The harness compares by name, not index.
+
+3. **TimeDateStamp.** Always zero (reproducible builds), so byte
+ stability holds even across re-emits with different `now`
+ values.
+
+4. **COMDAT selection flag-vs-enum.** `obj_group(..., flags)` takes
+ a flag bitfield (`CFREE_OBJ_GROUP_COMDAT = 1`). The COFF
+ selection (e.g. `IMAGE_COMDAT_SELECT_ANY = 2`) is a small int
+ enum stored as `flags` on the group when read back from COFF.
+ Round-trip stability holds as long as callers consistently use
+ one or the other model — see `test_comdat_group`.
+
+## Stratification
+
+When picking what to land next:
+
+1. ★ **Reloc-kind matrix per arch (U)** — every kind in the
+ per-arch translator table needs a U case. Currently covered:
+ `R_ABS64`, `R_PC32` on both arches; `R_AARCH64_CALL26`,
+ `R_AARCH64_ADR_PREL_PG_HI21` + `R_AARCH64_ADD_ABS_LO12_NC` on
+ aa64.
+2. ★ **Symbol storage-class matrix (U)** — covered: `EXTERNAL`,
+ `STATIC`, `WEAK_EXTERNAL`, `SECTION`; common symbols.
+3. ★ **Section characteristics matrix (U)** — kind × flags matrix
+ covered for `.text` / `.rdata` / `.data` / `.bss` / `.tls$`.
+4. mingw fixtures (C) — gated on toolchain availability.
+5. Negative inputs (Layer E) — defer until reader's diagnostic
+ surface is exercised by Phase 3 link tests.
+6. SEH / unwind-info round-trip — Phase 2.7.
+
+A "complete" corpus has one U cell for each row in groups 1–3 and
+at least one C row for groups 4–6.
diff --git a/test/coff/README.md b/test/coff/README.md
@@ -0,0 +1,50 @@
+# test/coff — PE/COFF format roundtrip harness
+
+Round-trip testing for `emit_coff` / `read_coff` against the
+`x86_64-windows` and `aarch64-windows` targets. Peer of
+`test/elf/` and `test/macho/`.
+
+## Scope
+
+Hand-built `ObjBuilder` → `emit_coff` → bytes → `read_coff` → second
+`emit_coff` against the readback. The harness asserts:
+
+1. Structural equivalence between the original `ObjBuilder` and the
+ readback (sections, symbols, relocations, groups all preserved
+ modulo synthesized SECTION symbols and section-definition aux
+ records — the asymmetry that `src/obj/coff_read.c` documents).
+2. Byte stability across `emit_coff(read_coff(emit_coff(ob)))` — the
+ second emit must produce the exact bytes of the first.
+
+## Running
+
+```
+make test-coff
+```
+
+This builds `build/test/cfree-roundtrip-coff` and runs the embedded
+unit cases. It also runs `windows-ucrt-hosted-smoke.sh`, which
+self-skips unless an llvm-mingw UCRT sysroot is available via
+`CFREE_MINGW_SYSROOT` or under `/tmp/llvm-mingw*`. Wine is not needed.
+
+## Layers
+
+`CORPUS.md` enumerates the cases worth covering, marked:
+
+- **U** (unit) — hand-built `ObjBuilder` fixtures. Layer A. The
+ current harness only carries U cases.
+- **C** (cases) — mingw-cross-built `.obj` fixtures. Layer B.
+ The hosted UCRT smoke now covers one aarch64 llvm-mingw sysroot
+ path; broader fixture coverage remains pending.
+- **E** (exec) — link + exec via Wine. Layer C/D, gated on Wine
+ availability (`doc/WINDOWS.md` Phase 3).
+
+Layer A is sufficient to gate the wire encoder / decoder against
+each other. Layers B/C/D will catch cross-tool agreement and
+end-to-end runtime correctness once the surrounding infrastructure
+lands.
+
+## Pointer
+
+See `doc/WINDOWS.md` for the full PE/COFF support plan, including
+the Phase-by-Phase task list, ABI notes, and corpus stratification.
diff --git a/test/coff/cfree-roundtrip-coff.c b/test/coff/cfree-roundtrip-coff.c
@@ -0,0 +1,1404 @@
+/* PE/COFF round-trip harness — peer of test/elf/unit/smoke.c and
+ * the Mach-O unit tests. Each test_*() builds an ObjBuilder, emits via
+ * emit_coff into a memory writer, reads back via read_coff, and
+ * asserts:
+ *
+ * 1. Structural equivalence — sections by name, symbols by name,
+ * relocations by (section, offset, kind, target-sym-name).
+ * Section-symbol synthesis is honored as a known asymmetry
+ * (see test/coff/CORPUS.md §10).
+ * 2. Byte stability — re-emitting the readback ObjBuilder produces
+ * the same bytes as the first emit (memcmp).
+ *
+ * Mixes public (<cfree/core.h>, <cfree/object.h>) and internal
+ * (src/obj/obj.h, src/core/core.h) surfaces. Compiled with -Isrc
+ * by test/test.mk. Not a libcfree consumer in the usual sense — a
+ * test binary that pokes the same private headers the writer /
+ * reader use. */
+
+#include <cfree/core.h>
+#include <cfree/object.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/core.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+
+/* ---- env vtables --------------------------------------------------- */
+
+static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void heap_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+/* ---- failure tracking --------------------------------------------- */
+
+static int g_failures;
+static const char* g_test_name = "?";
+#define EXPECT(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL [%s] %s:%d: ", g_test_name, __FILE__, \
+ __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
+ } while (0)
+
+/* ---- target builders ---------------------------------------------- */
+
+static void target_x64_windows(CfreeTarget* t) {
+ memset(t, 0, sizeof *t);
+ t->arch = CFREE_ARCH_X86_64;
+ t->os = CFREE_OS_WINDOWS;
+ t->obj = CFREE_OBJ_COFF;
+ t->ptr_size = 8;
+ t->ptr_align = 8;
+ t->big_endian = false;
+ t->pic = CFREE_PIC_PIE;
+ t->code_model = CFREE_CM_SMALL;
+}
+
+static void target_aa64_windows(CfreeTarget* t) {
+ memset(t, 0, sizeof *t);
+ t->arch = CFREE_ARCH_ARM_64;
+ t->os = CFREE_OS_WINDOWS;
+ t->obj = CFREE_OBJ_COFF;
+ t->ptr_size = 8;
+ t->ptr_align = 8;
+ t->big_endian = false;
+ t->pic = CFREE_PIC_PIE;
+ t->code_model = CFREE_CM_SMALL;
+}
+
+/* ---- shape helpers ------------------------------------------------- */
+
+static int sym_eq_str(Pool* p, Sym s, const char* want) {
+ size_t len;
+ const char* got = pool_str(p, s, &len);
+ size_t wlen = strlen(want);
+ return got && len == wlen && memcmp(got, want, len) == 0;
+}
+
+static const Section* find_section_named(const ObjBuilder* ob, Pool* p,
+ const char* want) {
+ u32 n = obj_section_count(ob);
+ for (u32 i = 1; i < n; ++i) {
+ const Section* s = obj_section_get(ob, i);
+ if (s->removed) continue;
+ if (sym_eq_str(p, s->name, want)) return s;
+ }
+ return NULL;
+}
+
+static ObjSecId find_section_id(const ObjBuilder* ob, Pool* p,
+ const char* want) {
+ u32 n = obj_section_count(ob);
+ for (u32 i = 1; i < n; ++i) {
+ const Section* s = obj_section_get(ob, i);
+ if (s->removed) continue;
+ if (sym_eq_str(p, s->name, want)) return i;
+ }
+ return OBJ_SEC_NONE;
+}
+
+static ObjSymId find_sym_named(const ObjBuilder* ob, Pool* p,
+ const char* want) {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ ObjSymId found = OBJ_SYM_NONE;
+ while (obj_symiter_next(it, &e)) {
+ if (e.sym->removed) continue;
+ if (sym_eq_str(p, e.sym->name, want)) {
+ found = e.id;
+ break;
+ }
+ }
+ obj_symiter_free(it);
+ return found;
+}
+
+/* ---- emit / read driver ------------------------------------------- */
+
+/* Emit ob into a fresh malloc()ed buffer. Caller frees *out_buf.
+ * Returns 0 on success, non-zero on failure. */
+static int emit_to_buf(Compiler* c, ObjBuilder* ob, uint8_t** out_buf,
+ size_t* out_len) {
+ CfreeWriter* w = NULL;
+ if (cfree_writer_mem(&g_heap, &w) != CFREE_OK || !w) return -1;
+ emit_coff(c, ob, w);
+ size_t n = 0;
+ const uint8_t* data = cfree_writer_mem_bytes(w, &n);
+ uint8_t* buf = (uint8_t*)malloc(n ? n : 1);
+ if (!buf) {
+ cfree_writer_close(w);
+ return -1;
+ }
+ if (n) memcpy(buf, data, n);
+ cfree_writer_close(w);
+ *out_buf = buf;
+ *out_len = n;
+ return 0;
+}
+
+/* Debug helper: dump bytes side-by-side to stderr. */
+static void dump_diff(const uint8_t* a, const uint8_t* b, size_t n) {
+ for (size_t i = 0; i < n; i += 16) {
+ fprintf(stderr, "%04zx ", i);
+ for (size_t j = 0; j < 16 && i + j < n; ++j) {
+ fprintf(stderr, "%02x%c", a[i + j],
+ (i + j < n && a[i + j] != b[i + j]) ? '*' : ' ');
+ }
+ fprintf(stderr, " | ");
+ for (size_t j = 0; j < 16 && i + j < n; ++j) {
+ fprintf(stderr, "%02x%c", b[i + j],
+ (i + j < n && a[i + j] != b[i + j]) ? '*' : ' ');
+ }
+ fprintf(stderr, "\n");
+ }
+}
+
+/* Three-stage round-trip workflow:
+ *
+ * emit_coff(in) -> b1
+ * read_coff(b1) -> mid
+ * verify(mid)
+ * emit_coff(mid) -> b2
+ * EXPECT(b1 == b2) [skipped when expect_byte_stable == 0]
+ *
+ * `verify_fn` runs against the readback ObjBuilder (mid). */
+static void run_roundtrip_ex(Compiler* c, ObjBuilder* in,
+ void (*verify_fn)(const ObjBuilder*, Pool*),
+ int expect_byte_stable) {
+ uint8_t* b1 = NULL;
+ size_t n1 = 0;
+ if (emit_to_buf(c, in, &b1, &n1) != 0) {
+ EXPECT(0, "emit_to_buf #1 failed");
+ return;
+ }
+ /* Header sanity: little-endian machine + nsections must be present. */
+ EXPECT(n1 >= 20, "emit_coff #1 produced %zu bytes (< 20)", n1);
+
+ ObjBuilder* mid = read_coff(c, "roundtrip", b1, n1);
+ EXPECT(mid != NULL, "read_coff returned NULL");
+ if (!mid) {
+ free(b1);
+ return;
+ }
+
+ if (verify_fn) verify_fn(mid, c->global);
+
+ uint8_t* b2 = NULL;
+ size_t n2 = 0;
+ if (emit_to_buf(c, mid, &b2, &n2) != 0) {
+ EXPECT(0, "emit_to_buf #2 failed");
+ obj_free(mid);
+ free(b1);
+ return;
+ }
+
+ if (expect_byte_stable) {
+ EXPECT(n1 == n2, "byte-stable round-trip size mismatch: %zu vs %zu", n1,
+ n2);
+ if (n1 == n2) {
+ int differs = memcmp(b1, b2, n1) != 0;
+ EXPECT(!differs, "byte-stable round-trip differs (size %zu)", n1);
+ if (differs && getenv("CFREE_COFF_DUMP_DIFF")) {
+ fprintf(stderr, "--- b1 | b2 ---\n");
+ dump_diff(b1, b2, n1);
+ }
+ }
+ }
+
+ obj_free(mid);
+ free(b1);
+ free(b2);
+}
+
+static void run_roundtrip(Compiler* c, ObjBuilder* in,
+ void (*verify_fn)(const ObjBuilder*, Pool*)) {
+ run_roundtrip_ex(c, in, verify_fn, /*expect_byte_stable=*/1);
+}
+
+/* ---- compiler lifecycle ----------------------------------------------
+ * CfreeContext must outlive the Compiler — compiler_init stashes the
+ * pointer. Use a file-scope context so make_compiler doesn't leave
+ * the compiler with a dangling ctx. */
+
+static CfreeContext g_ctx;
+
+static Compiler* make_compiler(const CfreeTarget* t) {
+ memset(&g_ctx, 0, sizeof g_ctx);
+ g_ctx.heap = &g_heap;
+ g_ctx.diag = &g_diag;
+ g_ctx.now = -1;
+ CfreeCompiler* cc = NULL;
+ if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL;
+ return (Compiler*)cc;
+}
+
+/* ---- payload bytes ------------------------------------------------- */
+
+/* x64: mov eax, 42 ; ret. */
+static const uint8_t TEXT_X64[6] = {
+ 0xb8, 0x2a, 0x00, 0x00, 0x00, 0xc3,
+};
+
+/* aa64: mov w0, #42 ; ret. */
+static const uint8_t TEXT_AA64[8] = {
+ 0x40, 0x05, 0x80, 0x52, 0xc0, 0x03, 0x5f, 0xd6,
+};
+
+/* ---- per-test verifiers / builders -------------------------------- */
+
+/* test_header_minimal_x64 / _aa64: a single .text section, no
+ * relocations, no user symbols. Exercises the file/section-header
+ * encoder + the section-symbol synthesis path. */
+
+static void verify_header_minimal(const ObjBuilder* ob, Pool* p) {
+ const Section* text = find_section_named(ob, p, ".text");
+ EXPECT(text != NULL, ".text not present");
+ if (text) {
+ EXPECT(text->kind == SEC_TEXT, ".text kind=%u", text->kind);
+ EXPECT((text->flags & SF_EXEC) != 0, ".text missing SF_EXEC");
+ EXPECT((text->flags & SF_ALLOC) != 0, ".text missing SF_ALLOC");
+ }
+}
+
+static void test_header_minimal_x64(void) {
+ g_test_name = "header_minimal_x64";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ EXPECT(c != NULL, "compiler_new");
+ if (!c) return;
+
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic during test");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym text = pool_intern_cstr(p, ".text");
+ ObjSecId sec = obj_section(ob, text, SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_header_minimal);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+static void test_header_minimal_aa64(void) {
+ g_test_name = "header_minimal_aa64";
+ CfreeTarget t;
+ target_aa64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ EXPECT(c != NULL, "compiler_new");
+ if (!c) return;
+
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic during test");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym text = pool_intern_cstr(p, ".text");
+ ObjSecId sec = obj_section(ob, text, SEC_TEXT, SF_ALLOC | SF_EXEC, 4);
+ obj_write(ob, sec, TEXT_AA64, sizeof TEXT_AA64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_header_minimal);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_text_only_x64: .text + one defined global function symbol. */
+
+static void verify_text_only(const ObjBuilder* ob, Pool* p) {
+ const Section* text = find_section_named(ob, p, ".text");
+ EXPECT(text != NULL, ".text not present");
+ ObjSymId main = find_sym_named(ob, p, "main");
+ EXPECT(main != OBJ_SYM_NONE, "missing 'main' symbol");
+ if (main) {
+ const ObjSym* s = obj_symbol_get(ob, main);
+ EXPECT(s->bind == SB_GLOBAL, "main bind=%u", s->bind);
+ EXPECT(s->kind == SK_FUNC, "main kind=%u", s->kind);
+ EXPECT(s->section_id != OBJ_SEC_NONE, "main has no section");
+ }
+}
+
+static void test_text_only_x64(void) {
+ g_test_name = "text_only_x64";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym tn = pool_intern_cstr(p, ".text");
+ Sym mn = pool_intern_cstr(p, "main");
+ ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64);
+ obj_symbol(ob, mn, SB_GLOBAL, SK_FUNC, sec, 0, sizeof TEXT_X64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_text_only);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+static void test_text_only_aa64(void) {
+ g_test_name = "text_only_aa64";
+ CfreeTarget t;
+ target_aa64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym tn = pool_intern_cstr(p, ".text");
+ Sym mn = pool_intern_cstr(p, "main");
+ ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 4);
+ obj_write(ob, sec, TEXT_AA64, sizeof TEXT_AA64);
+ obj_symbol(ob, mn, SB_GLOBAL, SK_FUNC, sec, 0, sizeof TEXT_AA64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_text_only);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_rodata: .rdata read-only data + a defined object symbol. */
+
+static void verify_rodata(const ObjBuilder* ob, Pool* p) {
+ const Section* rd = find_section_named(ob, p, ".rdata");
+ EXPECT(rd != NULL, ".rdata not present");
+ if (rd) {
+ EXPECT(rd->kind == SEC_RODATA, ".rdata kind=%u (want %u)", rd->kind,
+ SEC_RODATA);
+ EXPECT((rd->flags & SF_ALLOC) != 0, ".rdata missing SF_ALLOC");
+ EXPECT((rd->flags & SF_WRITE) == 0, ".rdata wrongly has SF_WRITE");
+ }
+ ObjSymId k = find_sym_named(ob, p, "kMsg");
+ EXPECT(k != OBJ_SYM_NONE, "missing 'kMsg' symbol");
+}
+
+static void test_rodata(void) {
+ g_test_name = "rodata";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym rdn = pool_intern_cstr(p, ".rdata");
+ Sym kn = pool_intern_cstr(p, "kMsg");
+ ObjSecId sec = obj_section(ob, rdn, SEC_RODATA, SF_ALLOC, 8);
+ static const uint8_t MSG[12] = "hello world\0";
+ obj_write(ob, sec, MSG, sizeof MSG);
+ obj_symbol(ob, kn, SB_GLOBAL, SK_OBJ, sec, 0, sizeof MSG);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_rodata);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_bss: .bss section (NOBITS), one defined symbol, size > 0. */
+
+static void verify_bss(const ObjBuilder* ob, Pool* p) {
+ const Section* bss = find_section_named(ob, p, ".bss");
+ EXPECT(bss != NULL, ".bss not present");
+ if (bss) {
+ EXPECT(bss->kind == SEC_BSS, ".bss kind=%u (want %u)", bss->kind, SEC_BSS);
+ EXPECT(bss->bss_size >= 64, ".bss size=%u (want >= 64)", bss->bss_size);
+ }
+}
+
+static void test_bss(void) {
+ g_test_name = "bss";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym bn = pool_intern_cstr(p, ".bss");
+ Sym vn = pool_intern_cstr(p, "g_buf");
+ ObjSecId sec = obj_section_ex(ob, bn, SEC_BSS, SSEM_NOBITS,
+ SF_ALLOC | SF_WRITE, 16, 0, 0, 0);
+ obj_reserve_bss(ob, sec, 64, 16);
+ obj_symbol(ob, vn, SB_GLOBAL, SK_OBJ, sec, 0, 64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_bss);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_data_with_reloc_abs64_x64: .data with an 8-byte slot
+ * relocated R_ABS64 against an undefined external. */
+
+static void verify_data_abs64(const ObjBuilder* ob, Pool* p) {
+ ObjSecId data_id = find_section_id(ob, p, ".data");
+ EXPECT(data_id != OBJ_SEC_NONE, ".data id");
+ ObjSymId foo = find_sym_named(ob, p, "foo_extern");
+ EXPECT(foo != OBJ_SYM_NONE, "missing 'foo_extern'");
+ if (foo) {
+ const ObjSym* s = obj_symbol_get(ob, foo);
+ EXPECT(s->section_id == OBJ_SEC_NONE, "foo_extern not undef");
+ }
+ if (data_id == OBJ_SEC_NONE) return;
+ u32 nr = obj_reloc_count(ob, data_id);
+ EXPECT(nr == 1, ".data reloc count=%u (want 1)", nr);
+ u32 total = obj_reloc_total(ob);
+ const Reloc* found = NULL;
+ for (u32 i = 0; i < total; ++i) {
+ const Reloc* r = obj_reloc_at(ob, i);
+ if (r->removed) continue;
+ if (r->section_id == data_id) {
+ found = r;
+ break;
+ }
+ }
+ EXPECT(found != NULL, "no reloc on .data");
+ if (found) {
+ EXPECT(found->kind == R_ABS64, ".data reloc kind=%u (want %u)", found->kind,
+ R_ABS64);
+ EXPECT(found->offset == 0, ".data reloc offset=%u", found->offset);
+ }
+}
+
+static void test_data_with_reloc_abs64_x64(void) {
+ g_test_name = "data_with_reloc_abs64_x64";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym dn = pool_intern_cstr(p, ".data");
+ Sym fn = pool_intern_cstr(p, "foo_extern");
+ ObjSecId sec = obj_section(ob, dn, SEC_DATA, SF_ALLOC | SF_WRITE, 8);
+ static const uint8_t zero8[8] = {0};
+ obj_write(ob, sec, zero8, sizeof zero8);
+ ObjSymId foo = obj_symbol(ob, fn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ obj_reloc(ob, sec, 0, R_ABS64, foo, 0);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_data_abs64);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+static void test_data_with_reloc_abs64_aa64(void) {
+ g_test_name = "data_with_reloc_abs64_aa64";
+ CfreeTarget t;
+ target_aa64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym dn = pool_intern_cstr(p, ".data");
+ Sym fn = pool_intern_cstr(p, "foo_extern");
+ ObjSecId sec = obj_section(ob, dn, SEC_DATA, SF_ALLOC | SF_WRITE, 8);
+ static const uint8_t zero8[8] = {0};
+ obj_write(ob, sec, zero8, sizeof zero8);
+ ObjSymId foo = obj_symbol(ob, fn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ obj_reloc(ob, sec, 0, R_ABS64, foo, 0);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_data_abs64);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_data_with_reloc_rel32_x64: .text with a REL32 relocation
+ * referencing an external symbol (call thunk). */
+
+static void verify_rel32(const ObjBuilder* ob, Pool* p) {
+ ObjSecId text_id = find_section_id(ob, p, ".text");
+ EXPECT(text_id != OBJ_SEC_NONE, ".text id");
+ ObjSymId helper = find_sym_named(ob, p, "helper");
+ EXPECT(helper != OBJ_SYM_NONE, "missing 'helper'");
+ if (text_id == OBJ_SEC_NONE) return;
+ u32 nr = obj_reloc_count(ob, text_id);
+ EXPECT(nr == 1, ".text reloc count=%u (want 1)", nr);
+ u32 total = obj_reloc_total(ob);
+ for (u32 i = 0; i < total; ++i) {
+ const Reloc* r = obj_reloc_at(ob, i);
+ if (r->removed) continue;
+ if (r->section_id != text_id) continue;
+ EXPECT(r->kind == R_PC32, "reloc kind=%u (want R_PC32=%u)", r->kind,
+ R_PC32);
+ }
+}
+
+static void test_data_with_reloc_rel32_x64(void) {
+ g_test_name = "reloc_rel32_x64";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym tn = pool_intern_cstr(p, ".text");
+ Sym hn = pool_intern_cstr(p, "helper");
+ ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
+ /* call helper ; ret — e8 disp32 c3 (disp filled by reloc). */
+ static const uint8_t bytes[6] = {0xe8, 0, 0, 0, 0, 0xc3};
+ obj_write(ob, sec, bytes, sizeof bytes);
+ /* Undef symbol kind: SK_UNDEF — matches what real COFF inputs carry.
+ * SK_FUNC + section_id == 0 emits Type=function but the reader collapses
+ * to SK_UNDEF on readback (no "undef function" kind in cfree's model),
+ * which breaks byte stability. See CORPUS.md §10. */
+ ObjSymId helper =
+ obj_symbol(ob, hn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ obj_reloc(ob, sec, 1, R_PC32, helper, 0);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_rel32);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_aa64_branch26: .text with a BRANCH26 (R_AARCH64_CALL26)
+ * relocation against an external. */
+
+static void verify_aa64_branch26(const ObjBuilder* ob, Pool* p) {
+ ObjSecId text_id = find_section_id(ob, p, ".text");
+ EXPECT(text_id != OBJ_SEC_NONE, ".text id");
+ u32 total = obj_reloc_total(ob);
+ int seen = 0;
+ for (u32 i = 0; i < total; ++i) {
+ const Reloc* r = obj_reloc_at(ob, i);
+ if (r->removed) continue;
+ if (r->section_id != text_id) continue;
+ EXPECT(r->kind == R_AARCH64_CALL26,
+ "branch26 reloc kind=%u (want %u)", r->kind, R_AARCH64_CALL26);
+ ++seen;
+ }
+ EXPECT(seen == 1, "branch26 reloc count=%d (want 1)", seen);
+}
+
+static void test_aa64_branch26(void) {
+ g_test_name = "aa64_branch26";
+ CfreeTarget t;
+ target_aa64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym tn = pool_intern_cstr(p, ".text");
+ Sym cn = pool_intern_cstr(p, "callee");
+ ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 4);
+ /* bl callee ; ret — both 4 bytes; disp filled by reloc. */
+ static const uint8_t bytes[8] = {0, 0, 0, 0x94, 0xc0, 0x03, 0x5f, 0xd6};
+ obj_write(ob, sec, bytes, sizeof bytes);
+ /* See reloc_rel32_x64 note on SK_UNDEF for undef symbols. */
+ ObjSymId callee =
+ obj_symbol(ob, cn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ obj_reloc(ob, sec, 0, R_AARCH64_CALL26, callee, 0);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_aa64_branch26);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_aa64_pagebase_pageoffset: ADRP + ADD pair against a .rdata
+ * symbol — exercises both PAGEBASE_REL21 and PAGEOFFSET_12A. */
+
+static void verify_aa64_adrp_add(const ObjBuilder* ob, Pool* p) {
+ ObjSecId text_id = find_section_id(ob, p, ".text");
+ EXPECT(text_id != OBJ_SEC_NONE, ".text id");
+ u32 total = obj_reloc_total(ob);
+ int n_page = 0, n_off = 0;
+ for (u32 i = 0; i < total; ++i) {
+ const Reloc* r = obj_reloc_at(ob, i);
+ if (r->removed) continue;
+ if (r->section_id != text_id) continue;
+ if (r->kind == R_AARCH64_ADR_PREL_PG_HI21) ++n_page;
+ if (r->kind == R_AARCH64_ADD_ABS_LO12_NC) ++n_off;
+ }
+ EXPECT(n_page == 1, "ADRP reloc count=%d (want 1)", n_page);
+ EXPECT(n_off == 1, "ADD lo12 reloc count=%d (want 1)", n_off);
+}
+
+static void test_aa64_pagebase_pageoffset(void) {
+ g_test_name = "aa64_pagebase_pageoffset";
+ CfreeTarget t;
+ target_aa64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym tn = pool_intern_cstr(p, ".text");
+ Sym rdn = pool_intern_cstr(p, ".rdata");
+ Sym kn = pool_intern_cstr(p, "kStr");
+ ObjSecId tsec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 4);
+ ObjSecId rsec = obj_section(ob, rdn, SEC_RODATA, SF_ALLOC, 8);
+ /* adrp x0, kStr ; add x0, x0, :lo12:kStr ; ret. */
+ static const uint8_t txt[12] = {
+ 0x00, 0, 0, 0x90, 0, 0, 0, 0x91, 0xc0, 0x03, 0x5f, 0xd6,
+ };
+ obj_write(ob, tsec, txt, sizeof txt);
+ static const uint8_t str[6] = "hello";
+ obj_write(ob, rsec, str, sizeof str);
+ ObjSymId kStr =
+ obj_symbol(ob, kn, SB_LOCAL, SK_OBJ, rsec, 0, sizeof str);
+ obj_reloc(ob, tsec, 0, R_AARCH64_ADR_PREL_PG_HI21, kStr, 0);
+ obj_reloc(ob, tsec, 4, R_AARCH64_ADD_ABS_LO12_NC, kStr, 0);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_aa64_adrp_add);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_long_section_name: section whose name exceeds 8 bytes,
+ * triggering the "/N" strtab-spill encoding. */
+
+static void verify_long_section_name(const ObjBuilder* ob, Pool* p) {
+ const Section* s = find_section_named(ob, p, ".text$long_name_section");
+ EXPECT(s != NULL, "long-named section not present");
+ if (s) EXPECT(s->kind == SEC_TEXT, "long section kind=%u", s->kind);
+}
+
+static void test_long_section_name(void) {
+ g_test_name = "long_section_name";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym nm = pool_intern_cstr(p, ".text$long_name_section");
+ ObjSecId sec = obj_section(ob, nm, SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_long_section_name);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_long_symbol_name: symbol whose name exceeds 8 bytes — uses
+ * the LongName (Zeroes=0, Offset) wire form. */
+
+static void verify_long_symbol_name(const ObjBuilder* ob, Pool* p) {
+ ObjSymId s = find_sym_named(ob, p, "very_long_symbol_name");
+ EXPECT(s != OBJ_SYM_NONE, "long-named symbol not present");
+ if (s) {
+ const ObjSym* sym = obj_symbol_get(ob, s);
+ EXPECT(sym->bind == SB_GLOBAL, "long sym bind=%u", sym->bind);
+ EXPECT(sym->kind == SK_FUNC, "long sym kind=%u", sym->kind);
+ }
+}
+
+static void test_long_symbol_name(void) {
+ g_test_name = "long_symbol_name";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym tn = pool_intern_cstr(p, ".text");
+ Sym sn = pool_intern_cstr(p, "very_long_symbol_name");
+ ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64);
+ obj_symbol(ob, sn, SB_GLOBAL, SK_FUNC, sec, 0, sizeof TEXT_X64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_long_symbol_name);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_weak_global: weak global symbol — IMAGE_SYM_CLASS_WEAK_EXTERNAL
+ * with a weak-extern aux record. */
+
+static void verify_weak_global(const ObjBuilder* ob, Pool* p) {
+ ObjSymId s = find_sym_named(ob, p, "weak_sym");
+ EXPECT(s != OBJ_SYM_NONE, "weak_sym not present");
+ if (s) {
+ const ObjSym* sym = obj_symbol_get(ob, s);
+ EXPECT(sym->bind == SB_WEAK, "weak_sym bind=%u (want SB_WEAK=%u)",
+ sym->bind, SB_WEAK);
+ }
+}
+
+static void test_weak_global(void) {
+ g_test_name = "weak_global";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym tn = pool_intern_cstr(p, ".text");
+ Sym wn = pool_intern_cstr(p, "weak_sym");
+ ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64);
+ obj_symbol(ob, wn, SB_WEAK, SK_FUNC, sec, 0, sizeof TEXT_X64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_weak_global);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_common_symbol: COFF common — UNDEFINED section number with
+ * Value > 0 holding the size. */
+
+static void verify_common_symbol(const ObjBuilder* ob, Pool* p) {
+ ObjSymId s = find_sym_named(ob, p, "common_var");
+ EXPECT(s != OBJ_SYM_NONE, "common_var not present");
+ if (s) {
+ const ObjSym* sym = obj_symbol_get(ob, s);
+ EXPECT(sym->kind == SK_COMMON, "common_var kind=%u (want SK_COMMON=%u)",
+ sym->kind, SK_COMMON);
+ EXPECT(sym->size == 128, "common_var size=%llu (want 128)",
+ (unsigned long long)sym->size);
+ }
+}
+
+static void test_common_symbol(void) {
+ g_test_name = "common_symbol";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym cn = pool_intern_cstr(p, "common_var");
+ obj_symbol_ex(ob, cn, SB_GLOBAL, SV_DEFAULT, SK_COMMON, OBJ_SEC_NONE, 0, 128,
+ 1);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_common_symbol);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_comdat_group: two sections wired into one COMDAT group. */
+
+static void verify_comdat_group(const ObjBuilder* ob, Pool* p) {
+ const Section* tsec = find_section_named(ob, p, ".text$x");
+ const Section* dsec = find_section_named(ob, p, ".data$x");
+ EXPECT(tsec != NULL, ".text$x missing");
+ EXPECT(dsec != NULL, ".data$x missing");
+ if (tsec)
+ EXPECT((tsec->flags & SF_GROUP) != 0, ".text$x missing SF_GROUP");
+ if (dsec)
+ EXPECT((dsec->flags & SF_GROUP) != 0, ".data$x missing SF_GROUP");
+
+ /* COFF encodes COMDAT per-section (each member section carries its
+ * own section-definition aux with the selection rule); the wire
+ * format has no SHT_GROUP-style "N-member" record. read_coff
+ * therefore emits one ObjGroup per COMDAT section — two input
+ * COMDAT sections => two single-section groups after round-trip.
+ * Each carries the section's section-symbol as its signature. */
+ ObjGroupIter* it = obj_groupiter_new(ob);
+ ObjGroupEntry e;
+ int seen = 0;
+ u32 total_member_sections = 0;
+ while (obj_groupiter_next(it, &e)) {
+ if (e.group->removed) continue;
+ ++seen;
+ total_member_sections += e.group->nsections;
+ }
+ obj_groupiter_free(it);
+ EXPECT(seen == 2, "expected 2 groups after COMDAT round-trip, got %d", seen);
+ EXPECT(total_member_sections == 2,
+ "total COMDAT member sections=%u (want 2)",
+ total_member_sections);
+}
+
+static void test_comdat_group(void) {
+ g_test_name = "comdat_group";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ /* Short section names (<= 8 bytes) — section names that overflow into
+ * the strtab don't round-trip COMDAT detection because the section
+ * symbol's name is truncated on emit but the reader compares the
+ * resolved long name. See CORPUS.md §10 / src/obj/coff_read.c
+ * is_section_sym logic. */
+ Sym tn = pool_intern_cstr(p, ".text$x");
+ Sym dn = pool_intern_cstr(p, ".data$x");
+ Sym sign = pool_intern_cstr(p, "inline_fn");
+
+ ObjSecId tsec = obj_section(ob, tn, SEC_TEXT,
+ SF_ALLOC | SF_EXEC | SF_GROUP, 16);
+ ObjSecId dsec = obj_section(ob, dn, SEC_DATA,
+ SF_ALLOC | SF_WRITE | SF_GROUP, 8);
+ obj_write(ob, tsec, TEXT_X64, sizeof TEXT_X64);
+ static const uint8_t z8[8] = {0};
+ obj_write(ob, dsec, z8, sizeof z8);
+
+ ObjSymId sig =
+ obj_symbol(ob, sign, SB_WEAK, SK_FUNC, tsec, 0, sizeof TEXT_X64);
+ ObjGroupId gid = obj_group(ob, sign, sig, CFREE_OBJ_GROUP_COMDAT);
+ obj_group_add_section(ob, gid, tsec);
+ obj_group_add_section(ob, gid, dsec);
+ obj_section_set_group(ob, tsec, gid);
+ obj_section_set_group(ob, dsec, gid);
+
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_comdat_group);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_static_local_symbol: STATIC storage class — file-local symbol. */
+
+static void verify_static_local(const ObjBuilder* ob, Pool* p) {
+ ObjSymId s = find_sym_named(ob, p, "local_fn");
+ EXPECT(s != OBJ_SYM_NONE, "local_fn not present");
+ if (s) {
+ const ObjSym* sym = obj_symbol_get(ob, s);
+ EXPECT(sym->bind == SB_LOCAL, "local_fn bind=%u (want SB_LOCAL=%u)",
+ sym->bind, SB_LOCAL);
+ }
+}
+
+static void test_static_local_symbol(void) {
+ g_test_name = "static_local_symbol";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym tn = pool_intern_cstr(p, ".text");
+ Sym ln = pool_intern_cstr(p, "local_fn");
+ ObjSecId sec = obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, sec, TEXT_X64, sizeof TEXT_X64);
+ obj_symbol(ob, ln, SB_LOCAL, SK_FUNC, sec, 0, sizeof TEXT_X64);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_static_local);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_section_symbol_synthesis: input has no explicit SK_SECTION
+ * symbol; readback should contain one per kept section (from the
+ * emitter-synthesized SECTION primary + section-definition aux). */
+
+static void verify_section_symbol_synthesis(const ObjBuilder* ob, Pool* p) {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ int n_section_syms = 0;
+ while (obj_symiter_next(it, &e)) {
+ if (e.sym->removed) continue;
+ if (e.sym->kind == SK_SECTION) ++n_section_syms;
+ }
+ obj_symiter_free(it);
+ EXPECT(n_section_syms >= 1,
+ "no SK_SECTION symbols after round-trip (expected at least one)");
+ /* Best-effort: text + data + bss + rdata = 4. */
+ EXPECT(n_section_syms == 4,
+ "section-symbol count=%d (want 4: text/data/bss/rdata)",
+ n_section_syms);
+ (void)p;
+}
+
+static void test_section_symbol_synthesis(void) {
+ g_test_name = "section_symbol_synthesis";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+
+ ObjSecId text = obj_section(ob, pool_intern_cstr(p, ".text"), SEC_TEXT,
+ SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, text, TEXT_X64, sizeof TEXT_X64);
+ ObjSecId data = obj_section(ob, pool_intern_cstr(p, ".data"), SEC_DATA,
+ SF_ALLOC | SF_WRITE, 8);
+ static const uint8_t z8[8] = {0};
+ obj_write(ob, data, z8, sizeof z8);
+ ObjSecId rdata = obj_section(ob, pool_intern_cstr(p, ".rdata"), SEC_RODATA,
+ SF_ALLOC, 8);
+ obj_write(ob, rdata, "hi\0", 3);
+ ObjSecId bss = obj_section_ex(ob, pool_intern_cstr(p, ".bss"), SEC_BSS,
+ SSEM_NOBITS, SF_ALLOC | SF_WRITE, 8, 0, 0, 0);
+ obj_reserve_bss(ob, bss, 16, 8);
+
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_section_symbol_synthesis);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_tls_section: ".tls$" section gets SF_TLS on readback (name-
+ * based detection in read_coff). */
+
+static void verify_tls_section(const ObjBuilder* ob, Pool* p) {
+ const Section* s = find_section_named(ob, p, ".tls$");
+ EXPECT(s != NULL, ".tls$ not present");
+ if (s) {
+ EXPECT((s->flags & SF_TLS) != 0,
+ ".tls$ missing SF_TLS (flags=0x%x)", s->flags);
+ }
+}
+
+static void test_tls_section(void) {
+ g_test_name = "tls_section";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym nm = pool_intern_cstr(p, ".tls$");
+ Sym vn = pool_intern_cstr(p, "tls_var");
+ ObjSecId sec = obj_section(ob, nm, SEC_DATA, SF_ALLOC | SF_WRITE | SF_TLS, 8);
+ static const uint8_t z8[8] = {0};
+ obj_write(ob, sec, z8, sizeof z8);
+ obj_symbol(ob, vn, SB_GLOBAL, SK_OBJ, sec, 0, sizeof z8);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_tls_section);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_align_nibble: section with a non-trivial alignment (4096)
+ * round-trips via the ALIGN_4096BYTES nibble. */
+
+static void verify_align_nibble(const ObjBuilder* ob, Pool* p) {
+ const Section* s = find_section_named(ob, p, ".rdata");
+ EXPECT(s != NULL, ".rdata not present");
+ if (s) {
+ EXPECT(s->align == 4096, ".rdata align=%u (want 4096)", s->align);
+ }
+}
+
+static void test_align_nibble(void) {
+ g_test_name = "align_nibble";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym nm = pool_intern_cstr(p, ".rdata");
+ ObjSecId sec = obj_section(ob, nm, SEC_RODATA, SF_ALLOC, 4096);
+ static const uint8_t z[16] = {0};
+ obj_write(ob, sec, z, sizeof z);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_align_nibble);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* test_empty_obj: no sections, no symbols. Smallest valid .obj. */
+
+static void verify_empty_obj(const ObjBuilder* ob, Pool* p) {
+ (void)p;
+ u32 n = obj_section_count(ob);
+ /* obj_section_count includes the id-0 placeholder. */
+ int real = 0;
+ for (u32 i = 1; i < n; ++i) {
+ const Section* s = obj_section_get(ob, i);
+ if (!s->removed) ++real;
+ }
+ EXPECT(real == 0, "empty obj has %d sections after round-trip", real);
+}
+
+static void test_empty_obj(void) {
+ g_test_name = "empty_obj";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic");
+ return;
+ }
+ ObjBuilder* ob = obj_new(c);
+ obj_finalize(ob);
+
+ run_roundtrip(c, ob, verify_empty_obj);
+
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* ---- short-import (Microsoft .lib member) smoke ------------------ */
+
+/* Hand-build a 45-byte short-import record:
+ * header (20) + "ExitProcess\0" (12) + "KERNEL32.dll\0" (13) = 45
+ * SizeOfData = 25 (the two NUL-terminated strings).
+ * Machine = AMD64 (0x8664).
+ * TypeFlags = (Type=CODE) | (NameType=NAME << 2) = 0 | (1<<2) = 4.
+ *
+ * Verifies that read_coff dispatches to the short-import path, the
+ * synthesized ObjBuilder has the imported symbol and its `__imp_*`
+ * alias defined at OBJ_SEC_NONE (DSO-shape), and the providing DLL
+ * name is recoverable via obj_get_coff_import_dll. */
+static void test_short_import_amd64(void) {
+ g_test_name = "short_import_amd64";
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ EXPECT(0, "compiler_new");
+ return;
+ }
+ if (setjmp(c->panic)) {
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ EXPECT(0, "panic during test");
+ return;
+ }
+
+ static const char kSym[] = "ExitProcess"; /* 11 chars + NUL = 12 */
+ static const char kDll[] = "KERNEL32.dll"; /* 12 chars + NUL = 13 */
+ const uint32_t kSymLen = (uint32_t)(sizeof kSym - 1);
+ const uint32_t kDllLen = (uint32_t)(sizeof kDll - 1);
+ const uint32_t kDataLen = sizeof kSym + sizeof kDll; /* 12 + 13 = 25 */
+ const size_t kTotal = 20 + kDataLen; /* 45 */
+ uint8_t buf[64];
+ EXPECT(kTotal <= sizeof buf, "buf too small");
+ memset(buf, 0, kTotal);
+ /* Header. */
+ buf[0] = 0x00; buf[1] = 0x00; /* Sig1 = 0 */
+ buf[2] = 0xFF; buf[3] = 0xFF; /* Sig2 = 0xFFFF */
+ buf[4] = 0x00; buf[5] = 0x00; /* Version = 0 */
+ buf[6] = 0x64; buf[7] = 0x86; /* Machine = AMD64 (0x8664) */
+ /* TimeDateStamp = 0 (bytes 8..11 already 0). */
+ buf[12] = (uint8_t)(kDataLen & 0xFF);
+ buf[13] = (uint8_t)((kDataLen >> 8) & 0xFF);
+ buf[14] = (uint8_t)((kDataLen >> 16) & 0xFF);
+ buf[15] = (uint8_t)((kDataLen >> 24) & 0xFF);
+ /* OrdinalOrHint = 0 (16..17). */
+ /* TypeFlags = Type=CODE(0) | NameType=NAME(1)<<2 = 0x0004. */
+ buf[18] = 0x04; buf[19] = 0x00;
+ /* Body: symbol name NUL DLL name NUL. */
+ memcpy(buf + 20, kSym, sizeof kSym);
+ memcpy(buf + 20 + sizeof kSym, kDll, sizeof kDll);
+
+ ObjBuilder* ob = read_coff(c, "short-import", buf, kTotal);
+ EXPECT(ob != NULL, "read_coff returned NULL on short-import");
+ if (!ob) {
+ cfree_compiler_free((CfreeCompiler*)c);
+ return;
+ }
+
+ Pool* p = c->global;
+ ObjSymId sid = find_sym_named(ob, p, kSym);
+ EXPECT(sid != OBJ_SYM_NONE, "missing imported symbol");
+ if (sid) {
+ const ObjSym* s = obj_symbol_get(ob, sid);
+ EXPECT(s->bind == SB_GLOBAL, "imported sym bind=%u (want SB_GLOBAL)",
+ s->bind);
+ EXPECT(s->kind == SK_FUNC, "imported sym kind=%u (want SK_FUNC)", s->kind);
+ EXPECT(s->section_id == OBJ_SEC_NONE,
+ "imported sym section_id=%u (want OBJ_SEC_NONE)",
+ (unsigned)s->section_id);
+ }
+
+ ObjSymId imp_id = find_sym_named(ob, p, "__imp_ExitProcess");
+ EXPECT(imp_id != OBJ_SYM_NONE, "missing __imp_<name> alias");
+ if (imp_id) {
+ const ObjSym* s = obj_symbol_get(ob, imp_id);
+ EXPECT(s->bind == SB_GLOBAL, "__imp_ bind=%u (want SB_GLOBAL)", s->bind);
+ EXPECT(s->section_id == OBJ_SEC_NONE,
+ "__imp_ section_id=%u (want OBJ_SEC_NONE)",
+ (unsigned)s->section_id);
+ }
+
+ Sym dll = 0;
+ int got = obj_get_coff_import_dll(ob, &dll);
+ EXPECT(got, "obj_get_coff_import_dll returned 0 (annotation missing)");
+ if (got) EXPECT(sym_eq_str(p, dll, kDll), "DLL name mismatch");
+
+ (void)kSymLen; (void)kDllLen;
+ obj_free(ob);
+ cfree_compiler_free((CfreeCompiler*)c);
+}
+
+/* ---- driver -------------------------------------------------------- */
+
+typedef void (*TestFn)(void);
+
+static const struct {
+ const char* name;
+ TestFn fn;
+} TESTS[] = {
+ {"header_minimal_x64", test_header_minimal_x64},
+ {"header_minimal_aa64", test_header_minimal_aa64},
+ {"text_only_x64", test_text_only_x64},
+ {"text_only_aa64", test_text_only_aa64},
+ {"rodata", test_rodata},
+ {"bss", test_bss},
+ {"data_with_reloc_abs64_x64", test_data_with_reloc_abs64_x64},
+ {"data_with_reloc_abs64_aa64", test_data_with_reloc_abs64_aa64},
+ {"reloc_rel32_x64", test_data_with_reloc_rel32_x64},
+ {"aa64_branch26", test_aa64_branch26},
+ {"aa64_pagebase_pageoffset", test_aa64_pagebase_pageoffset},
+ {"long_section_name", test_long_section_name},
+ {"long_symbol_name", test_long_symbol_name},
+ {"weak_global", test_weak_global},
+ {"common_symbol", test_common_symbol},
+ {"comdat_group", test_comdat_group},
+ {"static_local_symbol", test_static_local_symbol},
+ {"section_symbol_synthesis", test_section_symbol_synthesis},
+ {"tls_section", test_tls_section},
+ {"align_nibble", test_align_nibble},
+ {"empty_obj", test_empty_obj},
+ {"short_import_amd64", test_short_import_amd64},
+};
+static const size_t NTESTS = sizeof TESTS / sizeof TESTS[0];
+
+int main(void) {
+ for (size_t i = 0; i < NTESTS; ++i) {
+ int before = g_failures;
+ TESTS[i].fn();
+ if (g_failures == before) {
+ fprintf(stderr, " ok %s\n", TESTS[i].name);
+ } else {
+ fprintf(stderr, " FAIL %s\n", TESTS[i].name);
+ }
+ }
+ if (g_failures) {
+ fprintf(stderr, "FAILED %d assertion(s) across %zu tests\n", g_failures,
+ NTESTS);
+ return 1;
+ }
+ fprintf(stderr, "OK %zu tests\n", NTESTS);
+ return 0;
+}
+
diff --git a/test/coff/pe-dso-forwarder.c b/test/coff/pe-dso-forwarder.c
@@ -0,0 +1,303 @@
+/* read_coff_dso forwarder-export contract test.
+ *
+ * Synthesizes a minimal PE32+ DLL with two named exports — one direct
+ * (EAT RVA outside the export directory's range) and one forwarder
+ * (EAT RVA inside the export directory's range, contents
+ * "OTHERDLL.OtherSym") — and asserts that read_coff_dso surfaces both
+ * as OBJ_SEC_NONE globals on the returned ObjBuilder. cfree's linker
+ * does not follow forwarder chains: the symbols just need to be
+ * defined so import resolution succeeds, and the OS loader follows
+ * the chain at runtime. This test locks in that contract. */
+
+#include <cfree/core.h>
+#include <cfree/object.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/core.h"
+#include "core/pool.h"
+#include "obj/coff.h"
+#include "obj/obj.h"
+
+/* ---- env vtables --------------------------------------------------- */
+
+static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void heap_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+static int g_failures;
+#define EXPECT(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
+ } while (0)
+
+/* ---- compiler ----------------------------------------------------- */
+
+static CfreeContext g_ctx;
+
+static void target_x64_windows(CfreeTarget* t) {
+ memset(t, 0, sizeof *t);
+ t->arch = CFREE_ARCH_X86_64;
+ t->os = CFREE_OS_WINDOWS;
+ t->obj = CFREE_OBJ_COFF;
+ t->ptr_size = 8;
+ t->ptr_align = 8;
+ t->big_endian = false;
+ t->pic = CFREE_PIC_PIE;
+ t->code_model = CFREE_CM_SMALL;
+}
+
+static Compiler* make_compiler(const CfreeTarget* t) {
+ memset(&g_ctx, 0, sizeof g_ctx);
+ g_ctx.heap = &g_heap;
+ g_ctx.diag = &g_diag;
+ g_ctx.now = -1;
+ CfreeCompiler* cc = NULL;
+ if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL;
+ return (Compiler*)cc;
+}
+
+/* ---- little-endian writers ---------------------------------------- */
+
+static void wr_u16(uint8_t* p, uint16_t v) {
+ p[0] = (uint8_t)(v & 0xFF);
+ p[1] = (uint8_t)((v >> 8) & 0xFF);
+}
+static void wr_u32(uint8_t* p, uint32_t v) {
+ p[0] = (uint8_t)(v & 0xFF);
+ p[1] = (uint8_t)((v >> 8) & 0xFF);
+ p[2] = (uint8_t)((v >> 16) & 0xFF);
+ p[3] = (uint8_t)((v >> 24) & 0xFF);
+}
+
+/* ---- synthetic PE32+ DLL builder ---------------------------------- */
+
+/* Layout (file offsets):
+ * 0x000 .. 0x03F DOS header (e_lfanew = 0x40)
+ * 0x040 .. 0x043 "PE\0\0"
+ * 0x044 .. 0x057 IMAGE_FILE_HEADER (20 bytes)
+ * 0x058 .. 0x147 IMAGE_OPTIONAL_HEADER64 (240 bytes)
+ * 0x148 .. 0x16F one IMAGE_SECTION_HEADER (40 bytes)
+ * 0x170 .. 0x36F section raw data (RVA 0x1000, 0x200 bytes)
+ *
+ * The single section ".edata" at RVA 0x1000 carries the export
+ * directory plus its tables and strings. The export DataDirectory
+ * record points at the start of that section and covers everything
+ * including the forwarder target string so the reader classifies
+ * "OTHERDLL.OtherSym" EAT entries as forwarders. */
+
+#define E_LFANEW 0x40u
+#define FH_OFF (E_LFANEW + 4u)
+#define OH_OFF (FH_OFF + COFF_FILE_HEADER_SIZE)
+#define SH_OFF (OH_OFF + COFF_OPT_HDR64_SIZE)
+#define RAW_OFF 0x170u
+#define SEC_VA 0x1000u
+#define SEC_RAW_SZ 0x200u
+#define FILE_SIZE (RAW_OFF + SEC_RAW_SZ)
+
+/* In-section offsets (relative to RAW_OFF / RVA = SEC_VA + off). */
+#define EXP_DIR_OFF 0u
+#define EAT_OFF (EXP_DIR_OFF + COFF_EXPORT_DIR_SIZE) /* +40 */
+#define EAT_COUNT 2u
+#define ENT_OFF (EAT_OFF + EAT_COUNT * 4u) /* +48 */
+#define ENT_COUNT 2u
+#define ORD_OFF (ENT_OFF + ENT_COUNT * 4u) /* +56 */
+#define DLLNAME_OFF (ORD_OFF + ENT_COUNT * 2u) /* +60 */
+
+static const char kDllName[] = "TestDll.dll";
+static const char kDirect[] = "DirectFn";
+static const char kForwarded[] = "ForwardedFn";
+static const char kForwardTarget[] = "OTHERDLL.OtherSym";
+
+#define DIRECT_NAME_OFF (DLLNAME_OFF + (uint32_t)sizeof kDllName)
+#define FORWARDED_NAME_OFF (DIRECT_NAME_OFF + (uint32_t)sizeof kDirect)
+#define FORWARD_TGT_OFF (FORWARDED_NAME_OFF + (uint32_t)sizeof kForwarded)
+#define EXP_DIR_END (FORWARD_TGT_OFF + (uint32_t)sizeof kForwardTarget)
+
+/* Some RVA outside the export directory range — interpreted as a
+ * direct export pointing into the (notional) code section. */
+#define DIRECT_FN_RVA 0x2000u
+
+static void build_dso(uint8_t* buf) {
+ memset(buf, 0, FILE_SIZE);
+
+ /* DOS header. */
+ wr_u16(buf + 0, IMAGE_DOS_SIGNATURE);
+ wr_u32(buf + 60, E_LFANEW);
+
+ /* PE signature. */
+ wr_u32(buf + E_LFANEW, IMAGE_NT_SIGNATURE);
+
+ /* IMAGE_FILE_HEADER. */
+ wr_u16(buf + FH_OFF + 0, IMAGE_FILE_MACHINE_AMD64);
+ wr_u16(buf + FH_OFF + 2, 1); /* NumberOfSections */
+ wr_u32(buf + FH_OFF + 4, 0); /* TimeDateStamp */
+ wr_u32(buf + FH_OFF + 8, 0); /* PointerToSymbolTable */
+ wr_u32(buf + FH_OFF + 12, 0); /* NumberOfSymbols */
+ wr_u16(buf + FH_OFF + 16, COFF_OPT_HDR64_SIZE);
+ wr_u16(buf + FH_OFF + 18, IMAGE_FILE_DLL);
+
+ /* IMAGE_OPTIONAL_HEADER64. Only the fields the reader inspects
+ * matter: Magic, and the export DataDirectory at index 0. */
+ wr_u16(buf + OH_OFF + 0, IMAGE_NT_OPTIONAL_HDR64_MAGIC);
+ /* Data directories live at the tail of the optional header. */
+ uint32_t dd_off = OH_OFF + COFF_OPT_HDR64_SIZE
+ - COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE;
+ uint32_t exp_rva = SEC_VA + EXP_DIR_OFF;
+ uint32_t exp_size = EXP_DIR_END;
+ wr_u32(buf + dd_off + IMAGE_DIRECTORY_ENTRY_EXPORT * 8u + 0, exp_rva);
+ wr_u32(buf + dd_off + IMAGE_DIRECTORY_ENTRY_EXPORT * 8u + 4, exp_size);
+
+ /* One section header: ".edata". */
+ memcpy(buf + SH_OFF + 0, ".edata\0\0", 8);
+ wr_u32(buf + SH_OFF + 8, exp_size); /* VirtualSize */
+ wr_u32(buf + SH_OFF + 12, SEC_VA); /* VirtualAddress */
+ wr_u32(buf + SH_OFF + 16, SEC_RAW_SZ); /* SizeOfRawData */
+ wr_u32(buf + SH_OFF + 20, RAW_OFF); /* PointerToRawData */
+ wr_u32(buf + SH_OFF + 24, 0); /* PtrToRelocations */
+ wr_u32(buf + SH_OFF + 28, 0); /* PtrToLinenumbers */
+ wr_u16(buf + SH_OFF + 32, 0); /* NumberOfRelocations */
+ wr_u16(buf + SH_OFF + 34, 0); /* NumberOfLinenumbers */
+ wr_u32(buf + SH_OFF + 36, 0x40000040u); /* Characteristics:
+ INITIALIZED_DATA |
+ MEM_READ */
+
+ /* Section raw data — written via RAW_OFF + off. */
+ uint8_t* sec = buf + RAW_OFF;
+
+ /* Export Directory header. */
+ wr_u32(sec + EXP_DIR_OFF + 0, 0); /* Characteristics */
+ wr_u32(sec + EXP_DIR_OFF + 4, 0); /* TimeDateStamp */
+ wr_u16(sec + EXP_DIR_OFF + 8, 0); /* MajorVersion */
+ wr_u16(sec + EXP_DIR_OFF + 10, 0); /* MinorVersion */
+ wr_u32(sec + EXP_DIR_OFF + 12, SEC_VA + DLLNAME_OFF); /* Name */
+ wr_u32(sec + EXP_DIR_OFF + 16, 1); /* Base */
+ wr_u32(sec + EXP_DIR_OFF + 20, EAT_COUNT); /* NumberOfFunctions */
+ wr_u32(sec + EXP_DIR_OFF + 24, ENT_COUNT); /* NumberOfNames */
+ wr_u32(sec + EXP_DIR_OFF + 28, SEC_VA + EAT_OFF); /* AddressOfFunctions */
+ wr_u32(sec + EXP_DIR_OFF + 32, SEC_VA + ENT_OFF); /* AddressOfNames */
+ wr_u32(sec + EXP_DIR_OFF + 36, SEC_VA + ORD_OFF); /* AddressOfNameOrds */
+
+ /* EAT: index 0 = direct (outside export-dir range);
+ * index 1 = forwarder (inside export-dir range, pointing at
+ * the OTHERDLL.OtherSym string). */
+ wr_u32(sec + EAT_OFF + 0u, DIRECT_FN_RVA);
+ wr_u32(sec + EAT_OFF + 4u, SEC_VA + FORWARD_TGT_OFF);
+
+ /* ENT: RVAs of the two name strings, in alphabetical-ish order.
+ * The reader walks ENT[i] -> Ord[i] -> EAT[Ord[i]]. */
+ wr_u32(sec + ENT_OFF + 0u, SEC_VA + DIRECT_NAME_OFF);
+ wr_u32(sec + ENT_OFF + 4u, SEC_VA + FORWARDED_NAME_OFF);
+
+ /* Ordinal table: index into the EAT. */
+ wr_u16(sec + ORD_OFF + 0u, 0);
+ wr_u16(sec + ORD_OFF + 2u, 1);
+
+ /* Strings. */
+ memcpy(sec + DLLNAME_OFF, kDllName, sizeof kDllName);
+ memcpy(sec + DIRECT_NAME_OFF, kDirect, sizeof kDirect);
+ memcpy(sec + FORWARDED_NAME_OFF, kForwarded, sizeof kForwarded);
+ memcpy(sec + FORWARD_TGT_OFF, kForwardTarget, sizeof kForwardTarget);
+}
+
+/* ---- main --------------------------------------------------------- */
+
+static int has_sym(const ObjBuilder* ob, Pool* p, const char* name) {
+ Sym needle = pool_intern_cstr(p, name);
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ int found = 0;
+ while (obj_symiter_next(it, &e)) {
+ if (e.sym && !e.sym->removed && e.sym->name == needle &&
+ e.sym->section_id == OBJ_SEC_NONE && e.sym->bind == SB_GLOBAL) {
+ found = 1;
+ break;
+ }
+ }
+ obj_symiter_free(it);
+ return found;
+}
+
+int main(void) {
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ fprintf(stderr, "FAIL: compiler_new\n");
+ return 1;
+ }
+ if (setjmp(c->panic)) {
+ fprintf(stderr, "FAIL: panic during pe-dso-forwarder\n");
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+
+ uint8_t* buf = (uint8_t*)malloc(FILE_SIZE);
+ EXPECT(buf != NULL, "malloc PE buffer");
+ if (!buf) {
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+ build_dso(buf);
+
+ Sym soname = 0;
+ ObjBuilder* ob = read_coff_dso(c, "TestDll.dll", buf, FILE_SIZE, &soname);
+ EXPECT(ob != NULL, "read_coff_dso returned NULL");
+
+ /* soname propagated from the Export Directory's Name field. */
+ Sym expected_soname = pool_intern_cstr(c->global, kDllName);
+ EXPECT(soname == expected_soname,
+ "soname mismatch (expected interned \"%s\")", kDllName);
+
+ /* Both exports must surface — direct and forwarder treated the same
+ * way by read_coff_dso (the OS loader chases the forwarder chain at
+ * runtime; the linker just needs the name defined). */
+ EXPECT(has_sym(ob, c->global, kDirect),
+ "direct export \"%s\" missing from ObjBuilder", kDirect);
+ EXPECT(has_sym(ob, c->global, kForwarded),
+ "forwarded export \"%s\" missing from ObjBuilder", kForwarded);
+
+ free(buf);
+ cfree_compiler_free((CfreeCompiler*)c);
+
+ if (g_failures) {
+ fprintf(stderr, "FAILED %d assertion(s)\n", g_failures);
+ return 1;
+ }
+ fprintf(stderr, "OK pe-dso-forwarder\n");
+ return 0;
+}
diff --git a/test/coff/pe-import-mingw.c b/test/coff/pe-import-mingw.c
@@ -0,0 +1,377 @@
+/* PE import-directory smoke test using a real mingw archive.
+ *
+ * Counterpart to pe-import-smoke.c, which exercises the Microsoft
+ * short-import format (Sig1=0/Sig2=0xFFFF). Mingw archives use the
+ * long-form layout instead — every member is a regular long-form COFF
+ * `.o` file with `.idata$N` sections — so this test drives the
+ * long-form absorption path in link_add_archive_bytes (per-function
+ * stubs reclassified as DSO shims at archive-ingest time, head/trailer
+ * members dropped).
+ *
+ * Skips cleanly when the mingw toolchain isn't installed.
+ *
+ * Verification: assemble the target program against the archive, write
+ * a PE32+ to /tmp, then probe with x86_64-w64-mingw32-objdump -p and
+ * assert (a) the import directory has KERNEL32.dll, (b) ExitProcess is
+ * the only resolved import. */
+
+#include <cfree/core.h>
+#include <cfree/link.h>
+#include <cfree/object.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "core/core.h"
+#include "core/pool.h"
+#include "link/link.h"
+#include "obj/obj.h"
+
+#define MINGW_ARCHIVE_PATH \
+ "/opt/homebrew/opt/mingw-w64/toolchain-x86_64/x86_64-w64-mingw32/lib/" \
+ "libkernel32.a"
+
+/* mingw's archive declares the DLL name in uppercase. */
+#define MINGW_DLL_NAME "KERNEL32.dll"
+/* Whichever case derive_dll_name_from_archive_path picks; matched
+ * case-insensitively in objdump output below. */
+#define MINGW_IMPORT_FN "ExitProcess"
+
+/* The exit-process program: e8 disp32 c3 (call ExitProcess; ret). */
+static const uint8_t PROG_TEXT_X64[6] = {0xe8, 0, 0, 0, 0, 0xc3};
+
+/* ---- env vtables --------------------------------------------------- */
+
+static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void heap_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+/* ---- failure tracking --------------------------------------------- */
+
+static int g_failures;
+#define EXPECT(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
+ } while (0)
+
+/* ---- target / compiler ------------------------------------------- */
+
+static CfreeContext g_ctx;
+
+static void target_x64_windows(CfreeTarget* t) {
+ memset(t, 0, sizeof *t);
+ t->arch = CFREE_ARCH_X86_64;
+ t->os = CFREE_OS_WINDOWS;
+ t->obj = CFREE_OBJ_COFF;
+ t->ptr_size = 8;
+ t->ptr_align = 8;
+ t->big_endian = false;
+ t->pic = CFREE_PIC_PIE;
+ t->code_model = CFREE_CM_SMALL;
+}
+
+static Compiler* make_compiler(const CfreeTarget* t) {
+ memset(&g_ctx, 0, sizeof g_ctx);
+ g_ctx.heap = &g_heap;
+ g_ctx.diag = &g_diag;
+ g_ctx.now = -1;
+ CfreeCompiler* cc = NULL;
+ if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL;
+ return (Compiler*)cc;
+}
+
+/* ---- program ObjBuilder builder ----------------------------------- */
+
+static ObjBuilder* build_program(Compiler* c) {
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym text_name = pool_intern_cstr(p, ".text");
+ Sym main_name = pool_intern_cstr(p, "mainCRTStartup");
+ Sym exit_name = pool_intern_cstr(p, MINGW_IMPORT_FN);
+ ObjSecId text = obj_section(ob, text_name, SEC_TEXT,
+ SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, text, PROG_TEXT_X64, sizeof PROG_TEXT_X64);
+ obj_symbol(ob, main_name, SB_GLOBAL, SK_FUNC, text, 0,
+ sizeof PROG_TEXT_X64);
+ ObjSymId exit_sym =
+ obj_symbol(ob, exit_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ obj_reloc(ob, text, 1, R_PC32, exit_sym, -4);
+ obj_finalize(ob);
+ return ob;
+}
+
+/* ---- archive / objdump helpers ------------------------------------ */
+
+static int have_mingw_objdump(void) {
+ FILE* fp = popen(
+ "command -v x86_64-w64-mingw32-objdump 2>/dev/null", "r");
+ if (!fp) return 0;
+ char buf[256];
+ size_t n = fread(buf, 1, sizeof buf - 1, fp);
+ pclose(fp);
+ return n > 0;
+}
+
+static uint8_t* slurp_file(const char* path, size_t* len_out) {
+ FILE* fp = fopen(path, "rb");
+ if (!fp) return NULL;
+ if (fseek(fp, 0, SEEK_END) != 0) {
+ fclose(fp);
+ return NULL;
+ }
+ long sz = ftell(fp);
+ if (sz < 0) {
+ fclose(fp);
+ return NULL;
+ }
+ rewind(fp);
+ uint8_t* buf = (uint8_t*)malloc((size_t)sz);
+ if (!buf) {
+ fclose(fp);
+ return NULL;
+ }
+ size_t got = fread(buf, 1, (size_t)sz, fp);
+ fclose(fp);
+ if (got != (size_t)sz) {
+ free(buf);
+ return NULL;
+ }
+ *len_out = (size_t)sz;
+ return buf;
+}
+
+static char* slurp_cmd(const char* cmd) {
+ FILE* fp = popen(cmd, "r");
+ if (!fp) return NULL;
+ size_t cap = 4096, len = 0;
+ char* buf = (char*)malloc(cap);
+ if (!buf) {
+ pclose(fp);
+ return NULL;
+ }
+ for (;;) {
+ if (len + 1024 + 1 > cap) {
+ cap *= 2;
+ char* nb = (char*)realloc(buf, cap);
+ if (!nb) {
+ free(buf);
+ pclose(fp);
+ return NULL;
+ }
+ buf = nb;
+ }
+ size_t got = fread(buf + len, 1, 1024, fp);
+ len += got;
+ if (got < 1024) break;
+ }
+ int rc = pclose(fp);
+ (void)rc;
+ buf[len] = '\0';
+ return buf;
+}
+
+static int contains_ci(const char* haystack, const char* needle) {
+ size_t nn = strlen(needle);
+ for (const char* p = haystack; *p; ++p) {
+ size_t i = 0;
+ while (i < nn) {
+ int hc = (unsigned char)p[i];
+ int nc = (unsigned char)needle[i];
+ if (hc >= 'A' && hc <= 'Z') hc += 32;
+ if (nc >= 'A' && nc <= 'Z') nc += 32;
+ if (hc != nc) break;
+ ++i;
+ }
+ if (i == nn) return 1;
+ }
+ return 0;
+}
+
+/* ---- main ---------------------------------------------------------- */
+
+int main(void) {
+ if (!have_mingw_objdump()) {
+ fprintf(stderr, "SKIP: x86_64-w64-mingw32-objdump not on PATH\n");
+ return 0;
+ }
+
+ size_t ar_len = 0;
+ uint8_t* ar_bytes = slurp_file(MINGW_ARCHIVE_PATH, &ar_len);
+ if (!ar_bytes || !ar_len) {
+ fprintf(stderr, "SKIP: cannot read %s\n", MINGW_ARCHIVE_PATH);
+ free(ar_bytes);
+ return 0;
+ }
+
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ fprintf(stderr, "FAIL: compiler_new\n");
+ free(ar_bytes);
+ return 1;
+ }
+ if (setjmp(c->panic)) {
+ fprintf(stderr, "FAIL: panic during pe-import-mingw\n");
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ free(ar_bytes);
+ return 1;
+ }
+
+ /* 1. Program ObjBuilder calling ExitProcess. */
+ ObjBuilder* prog = build_program(c);
+
+ /* 2. Drive the linker. The archive feeds in raw — link_add_archive_bytes
+ * classifies its ~3000 members and rewrites the per-function stubs
+ * into DSO shims; demand resolution then pulls only ExitProcess. */
+ Linker* l = link_new(c);
+ EXPECT(l != NULL, "link_new returned NULL");
+ link_add_obj(l, prog);
+ LinkInputId ar_id = link_add_archive_bytes(l, "libkernel32.a", ar_bytes,
+ ar_len, /*whole_archive=*/0,
+ /*link_mode=*/0,
+ /*group_id=*/0);
+ EXPECT(ar_id != LINK_INPUT_NONE,
+ "link_add_archive_bytes returned LINK_INPUT_NONE");
+ link_set_entry(l, "mainCRTStartup");
+ link_set_pie(l, 1);
+ link_set_emit_static_exe(l, 1);
+
+ LinkImage* img = link_resolve(l);
+ EXPECT(img != NULL, "link_resolve returned NULL");
+ if (!img) {
+ link_free(l);
+ cfree_compiler_free((CfreeCompiler*)c);
+ free(ar_bytes);
+ return 1;
+ }
+
+ /* Sanity: ExitProcess should be present and marked imported. */
+ {
+ Sym exit_name = pool_intern_cstr(c->global, MINGW_IMPORT_FN);
+ const LinkSymbol* found = NULL;
+ for (LinkSymId i = 1;; ++i) {
+ const LinkSymbol* s = link_symbol(img, i);
+ if (!s) break;
+ if (s->name == exit_name) {
+ found = s;
+ break;
+ }
+ }
+ EXPECT(found != NULL,
+ "%s LinkSymbol not present after link_resolve", MINGW_IMPORT_FN);
+ if (found) {
+ EXPECT(found->imported,
+ "%s.imported=0 (expected 1 after archive match)",
+ MINGW_IMPORT_FN);
+ }
+ }
+
+ /* 3. Emit the PE. */
+ CfreeWriter* w = NULL;
+ if (cfree_writer_mem(&g_heap, &w) != CFREE_OK || !w) {
+ fprintf(stderr, "FAIL: cfree_writer_mem\n");
+ link_image_free(img);
+ link_free(l);
+ cfree_compiler_free((CfreeCompiler*)c);
+ free(ar_bytes);
+ return 1;
+ }
+ link_emit_image_writer(img, w);
+
+ size_t out_len = 0;
+ const uint8_t* out_bytes = cfree_writer_mem_bytes(w, &out_len);
+ EXPECT(out_len > 0, "link_emit_image_writer produced %zu bytes", out_len);
+
+ const char* exe_path = "/tmp/pe-import-mingw.exe";
+ (void)unlink(exe_path);
+ FILE* fp = fopen(exe_path, "wb");
+ EXPECT(fp != NULL, "fopen(%s) for write", exe_path);
+ if (fp) {
+ size_t wr = fwrite(out_bytes, 1, out_len, fp);
+ EXPECT(wr == out_len, "fwrite wrote %zu / %zu", wr, out_len);
+ fclose(fp);
+ }
+
+ cfree_writer_close(w);
+ link_image_free(img);
+ link_free(l);
+
+ /* 4. Probe with objdump. */
+ char* dump_p = slurp_cmd(
+ "x86_64-w64-mingw32-objdump -p /tmp/pe-import-mingw.exe 2>&1");
+ EXPECT(dump_p != NULL, "slurp objdump -p");
+ if (dump_p) {
+ /* KERNEL32.dll listed (case-insensitive — derive_dll_name picks
+ * lowercase, but mingw objdump renders names verbatim from the
+ * import directory's name string). */
+ EXPECT(contains_ci(dump_p, "DLL Name: " MINGW_DLL_NAME) ||
+ contains_ci(dump_p, "DLL Name: kernel32.dll"),
+ "objdump -p: KERNEL32.dll not in import directory\n---\n%s\n---",
+ dump_p);
+ EXPECT(strstr(dump_p, MINGW_IMPORT_FN) != NULL,
+ "objdump -p: '%s' not in import directory\n---\n%s\n---",
+ MINGW_IMPORT_FN, dump_p);
+ /* Verify it's the only KERNEL32 import — no other functions
+ * pulled in (dead-strip working). Count NUL-separated entries
+ * under DLL Name: KERNEL32 by counting hint/name lines that
+ * start with whitespace followed by a hex hint. mingw objdump
+ * prints them like:
+ * vma: Hint/Ord Member-Name Bound-To
+ * 3008 6 ExitProcess
+ * We just confirm the expected one shows up; an over-pull
+ * would show extra names like CreateFileA, CloseHandle, etc.
+ * The dead-strip pass should suppress everything except the
+ * single referenced symbol. */
+ EXPECT(strstr(dump_p, "CreateFile") == NULL,
+ "objdump -p: unexpected CreateFile import (dead-strip "
+ "failure?)\n---\n%s\n---", dump_p);
+ EXPECT(strstr(dump_p, "CloseHandle") == NULL,
+ "objdump -p: unexpected CloseHandle import\n---\n%s\n---", dump_p);
+ free(dump_p);
+ }
+
+ cfree_compiler_free((CfreeCompiler*)c);
+ free(ar_bytes);
+
+ if (g_failures) {
+ fprintf(stderr, "FAILED %d assertion(s)\n", g_failures);
+ return 1;
+ }
+ fprintf(stderr, "OK pe-import-mingw\n");
+ return 0;
+}
diff --git a/test/coff/pe-import-smoke.c b/test/coff/pe-import-smoke.c
@@ -0,0 +1,435 @@
+/* PE import-directory smoke test — Phase 4.5 from doc/WINDOWS.md.
+ *
+ * Exercises the full chain:
+ * short-import shim bytes
+ * -> link_add_obj_bytes (reclassifies as DSO via OBJ_EXT_COFF
+ * annotation set by read_coff's short-import path)
+ * -> link_resolve (marks ExitProcess as imported, dso_input_id
+ * = the shim)
+ * -> link_emit_image_writer -> link_emit_coff (synthesizes
+ * .idata, IAT, per-arch IAT stub in .text)
+ *
+ * No execution — verification is byte-shape only via mingw's
+ * x86_64-w64-mingw32-objdump -p (import directory / headers)
+ * x86_64-w64-mingw32-objdump -d (disassembly of .text shows the
+ * call through the IAT stub)
+ *
+ * Skips cleanly with non-zero diagnostic-style message but exit 0
+ * when the mingw objdump is not on PATH. */
+
+#include <cfree/core.h>
+#include <cfree/link.h>
+#include <cfree/object.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "core/core.h"
+#include "core/pool.h"
+#include "link/link.h"
+#include "obj/obj.h"
+
+/* ---- short-import wire constants (mirror the spec in
+ * test/coff/cfree-roundtrip-coff.c::test_short_import_amd64). ---- */
+#define SHIM_HEADER_SIZE 20u
+#define SHIM_SYM_CSTR "ExitProcess"
+#define SHIM_DLL_CSTR "KERNEL32.dll"
+#define SHIM_SYM_NUL_LEN 12u /* "ExitProcess\0" */
+#define SHIM_DLL_NUL_LEN 13u /* "KERNEL32.dll\0" */
+#define SHIM_DATA_LEN (SHIM_SYM_NUL_LEN + SHIM_DLL_NUL_LEN) /* 25 */
+#define SHIM_TOTAL_LEN (SHIM_HEADER_SIZE + SHIM_DATA_LEN) /* 45 */
+
+/* IMAGE_FILE_MACHINE_AMD64. */
+#define COFF_MACHINE_AMD64 0x8664u
+/* Sig1=0, Sig2=0xFFFF marks a short-import record. */
+#define COFF_SHIMP_SIG2 0xFFFFu
+/* TypeFlags = Type=CODE(0) | (NameType=NAME(1) << 2) = 0x0004. */
+#define COFF_SHIMP_TYPEFLAGS 0x0004u
+
+/* PE optional-header / data-directory constants we assert. */
+#define PE_DD_IDX_IMPORT 1
+#define PE_DD_IDX_IAT 12
+
+/* The exit-process program: e8 disp32 c3 (call ExitProcess; ret).
+ * disp32 is patched by R_PC32 against an undef ExitProcess. */
+static const uint8_t PROG_TEXT_X64[6] = {0xe8, 0, 0, 0, 0, 0xc3};
+
+/* ---- env vtables --------------------------------------------------- */
+
+static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void heap_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+/* ---- failure tracking --------------------------------------------- */
+
+static int g_failures;
+#define EXPECT(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
+ } while (0)
+
+/* ---- target / compiler ------------------------------------------- */
+
+static CfreeContext g_ctx;
+
+static void target_x64_windows(CfreeTarget* t) {
+ memset(t, 0, sizeof *t);
+ t->arch = CFREE_ARCH_X86_64;
+ t->os = CFREE_OS_WINDOWS;
+ t->obj = CFREE_OBJ_COFF;
+ t->ptr_size = 8;
+ t->ptr_align = 8;
+ t->big_endian = false;
+ t->pic = CFREE_PIC_PIE;
+ t->code_model = CFREE_CM_SMALL;
+}
+
+static Compiler* make_compiler(const CfreeTarget* t) {
+ memset(&g_ctx, 0, sizeof g_ctx);
+ g_ctx.heap = &g_heap;
+ g_ctx.diag = &g_diag;
+ g_ctx.now = -1;
+ CfreeCompiler* cc = NULL;
+ if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL;
+ return (Compiler*)cc;
+}
+
+/* ---- short-import shim builder ------------------------------------ */
+
+static void build_short_import_amd64(uint8_t buf[SHIM_TOTAL_LEN]) {
+ memset(buf, 0, SHIM_TOTAL_LEN);
+ /* Sig1 = 0 (bytes 0..1 already 0). */
+ /* Sig2 = 0xFFFF. */
+ buf[2] = (uint8_t)(COFF_SHIMP_SIG2 & 0xFF);
+ buf[3] = (uint8_t)((COFF_SHIMP_SIG2 >> 8) & 0xFF);
+ /* Version = 0. */
+ /* Machine. */
+ buf[6] = (uint8_t)(COFF_MACHINE_AMD64 & 0xFF);
+ buf[7] = (uint8_t)((COFF_MACHINE_AMD64 >> 8) & 0xFF);
+ /* TimeDateStamp = 0 (bytes 8..11). */
+ /* SizeOfData. */
+ buf[12] = (uint8_t)(SHIM_DATA_LEN & 0xFFu);
+ buf[13] = (uint8_t)((SHIM_DATA_LEN >> 8) & 0xFFu);
+ buf[14] = (uint8_t)((SHIM_DATA_LEN >> 16) & 0xFFu);
+ buf[15] = (uint8_t)((SHIM_DATA_LEN >> 24) & 0xFFu);
+ /* OrdinalOrHint = 0 (16..17). */
+ /* TypeFlags. */
+ buf[18] = (uint8_t)(COFF_SHIMP_TYPEFLAGS & 0xFF);
+ buf[19] = (uint8_t)((COFF_SHIMP_TYPEFLAGS >> 8) & 0xFF);
+ /* Body: "ExitProcess\0" + "KERNEL32.dll\0". */
+ memcpy(buf + SHIM_HEADER_SIZE, SHIM_SYM_CSTR, SHIM_SYM_NUL_LEN);
+ memcpy(buf + SHIM_HEADER_SIZE + SHIM_SYM_NUL_LEN, SHIM_DLL_CSTR,
+ SHIM_DLL_NUL_LEN);
+}
+
+/* ---- program ObjBuilder builder ----------------------------------- */
+
+static ObjBuilder* build_program(Compiler* c) {
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym text_name = pool_intern_cstr(p, ".text");
+ Sym main_name = pool_intern_cstr(p, "mainCRTStartup");
+ Sym exit_name = pool_intern_cstr(p, SHIM_SYM_CSTR);
+ ObjSecId text = obj_section(ob, text_name, SEC_TEXT,
+ SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, text, PROG_TEXT_X64, sizeof PROG_TEXT_X64);
+ /* mainCRTStartup at .text offset 0. */
+ obj_symbol(ob, main_name, SB_GLOBAL, SK_FUNC, text, 0,
+ sizeof PROG_TEXT_X64);
+ /* ExitProcess as undef; reloc against the `e8` displacement (offset 1). */
+ ObjSymId exit_sym =
+ obj_symbol(ob, exit_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ obj_reloc(ob, text, 1, R_PC32, exit_sym, -4);
+ obj_finalize(ob);
+ return ob;
+}
+
+/* ---- objdump probe ------------------------------------------------ */
+
+static int have_mingw_objdump(void) {
+ FILE* fp = popen(
+ "command -v x86_64-w64-mingw32-objdump 2>/dev/null", "r");
+ if (!fp) return 0;
+ char buf[256];
+ size_t n = fread(buf, 1, sizeof buf - 1, fp);
+ pclose(fp);
+ return n > 0;
+}
+
+/* Run a shell command and slurp its stdout into a fresh malloc'd
+ * NUL-terminated string. Returns NULL on failure. */
+static char* slurp_cmd(const char* cmd) {
+ FILE* fp = popen(cmd, "r");
+ if (!fp) return NULL;
+ size_t cap = 4096, len = 0;
+ char* buf = (char*)malloc(cap);
+ if (!buf) {
+ pclose(fp);
+ return NULL;
+ }
+ for (;;) {
+ if (len + 1024 + 1 > cap) {
+ cap *= 2;
+ char* nb = (char*)realloc(buf, cap);
+ if (!nb) {
+ free(buf);
+ pclose(fp);
+ return NULL;
+ }
+ buf = nb;
+ }
+ size_t got = fread(buf + len, 1, 1024, fp);
+ len += got;
+ if (got < 1024) break;
+ }
+ int rc = pclose(fp);
+ (void)rc;
+ buf[len] = '\0';
+ return buf;
+}
+
+/* ---- main ---------------------------------------------------------- */
+
+int main(void) {
+ if (!have_mingw_objdump()) {
+ fprintf(stderr, "SKIP: x86_64-w64-mingw32-objdump not on PATH\n");
+ return 0;
+ }
+
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ fprintf(stderr, "FAIL: compiler_new\n");
+ return 1;
+ }
+ if (setjmp(c->panic)) {
+ fprintf(stderr, "FAIL: panic during pe-import-smoke\n");
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+
+ /* 1. Program ObjBuilder. */
+ ObjBuilder* prog = build_program(c);
+
+ /* 2. Short-import shim bytes. */
+ uint8_t shim[SHIM_TOTAL_LEN];
+ build_short_import_amd64(shim);
+
+ /* 3. Drive the linker. */
+ Linker* l = link_new(c);
+ EXPECT(l != NULL, "link_new returned NULL");
+ link_add_obj(l, prog);
+ LinkInputId dso_id = link_add_obj_bytes(
+ l, "ExitProcess.lib-member", shim, SHIM_TOTAL_LEN);
+ EXPECT(dso_id != LINK_INPUT_NONE,
+ "link_add_obj_bytes returned LINK_INPUT_NONE for short-import shim");
+ link_set_entry(l, "mainCRTStartup");
+ link_set_pie(l, 1);
+ link_set_emit_static_exe(l, 1);
+
+ LinkImage* img = link_resolve(l);
+ EXPECT(img != NULL, "link_resolve returned NULL");
+ if (!img) {
+ link_free(l);
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+
+ /* Sanity: walk LinkSyms and find ExitProcess. The globals hashmap only
+ * holds defined symbols, so link_symbol_lookup can't find an imported
+ * undef by name — iterate the dense LinkSyms array instead. */
+ {
+ Sym exit_name = pool_intern_cstr(c->global, SHIM_SYM_CSTR);
+ const LinkSymbol* found = NULL;
+ /* link_symbol returns NULL once we walk off the end. */
+ for (LinkSymId i = 1;; ++i) {
+ const LinkSymbol* s = link_symbol(img, i);
+ if (!s) break;
+ if (s->name == exit_name) {
+ found = s;
+ break;
+ }
+ }
+ EXPECT(found != NULL,
+ "ExitProcess LinkSymbol not present after link_resolve");
+ if (found) {
+ EXPECT(found->imported,
+ "ExitProcess.imported=0 (expected 1 after DSO match)");
+ EXPECT(found->dso_input_id == dso_id,
+ "ExitProcess.dso_input_id=%u (expected %u)",
+ (unsigned)found->dso_input_id, (unsigned)dso_id);
+ }
+ }
+
+ /* 4. Emit the PE. */
+ CfreeWriter* w = NULL;
+ if (cfree_writer_mem(&g_heap, &w) != CFREE_OK || !w) {
+ fprintf(stderr, "FAIL: cfree_writer_mem\n");
+ link_image_free(img);
+ link_free(l);
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+ link_emit_image_writer(img, w);
+
+ size_t out_len = 0;
+ const uint8_t* out_bytes = cfree_writer_mem_bytes(w, &out_len);
+ EXPECT(out_len > 0, "link_emit_image_writer produced %zu bytes", out_len);
+
+ /* 5. Write to /tmp and shell out to objdump. */
+ const char* exe_path = "/tmp/pe-import-smoke.exe";
+ (void)unlink(exe_path);
+ FILE* fp = fopen(exe_path, "wb");
+ EXPECT(fp != NULL, "fopen(%s) for write", exe_path);
+ if (fp) {
+ size_t wr = fwrite(out_bytes, 1, out_len, fp);
+ EXPECT(wr == out_len, "fwrite wrote %zu / %zu", wr, out_len);
+ fclose(fp);
+ }
+
+ cfree_writer_close(w);
+ link_image_free(img);
+ link_free(l);
+
+ /* objdump -p shows headers + import directory. */
+ char* dump_p = slurp_cmd(
+ "x86_64-w64-mingw32-objdump -p /tmp/pe-import-smoke.exe 2>&1");
+ EXPECT(dump_p != NULL, "slurp objdump -p");
+ if (dump_p) {
+ EXPECT(strstr(dump_p, "Magic\t\t\t020b") != NULL ||
+ strstr(dump_p, "Magic\t020b") != NULL ||
+ strstr(dump_p, "020b\t(PE32+)") != NULL ||
+ strstr(dump_p, "PE32+") != NULL,
+ "objdump -p: missing PE32+ magic 020b\n---\n%s\n---", dump_p);
+ EXPECT(strstr(dump_p, "SectionAlignment") != NULL &&
+ strstr(dump_p, "00001000") != NULL,
+ "objdump -p: SectionAlignment 0x1000 missing");
+ EXPECT(strstr(dump_p, "FileAlignment") != NULL &&
+ strstr(dump_p, "00000200") != NULL,
+ "objdump -p: FileAlignment 0x200 missing");
+ EXPECT(strstr(dump_p, "Subsystem") != NULL,
+ "objdump -p: Subsystem line missing");
+ /* mingw objdump prints "Subsystem\t\t00000003\t(Windows CUI)" */
+ EXPECT(strstr(dump_p, "Windows CUI") != NULL ||
+ strstr(dump_p, "(Windows CUI)") != NULL,
+ "objdump -p: Subsystem != Windows CUI\n---\n%s\n---", dump_p);
+ /* Import directory: DLL Name: KERNEL32.dll. */
+ EXPECT(strstr(dump_p, "DLL Name: " SHIM_DLL_CSTR) != NULL,
+ "objdump -p: 'DLL Name: %s' not found\n---\n%s\n---",
+ SHIM_DLL_CSTR, dump_p);
+ /* The hint/name array prints "<hint> <name>". Check ExitProcess
+ * appears in the import list. */
+ EXPECT(strstr(dump_p, SHIM_SYM_CSTR) != NULL,
+ "objdump -p: '%s' not in import directory\n---\n%s\n---",
+ SHIM_SYM_CSTR, dump_p);
+ /* Data directories: IMPORT (idx 1) and IAT (idx 12) must be set.
+ * mingw objdump prints them as
+ * "Entry 1 NNNNNNNN NNNNNNNN Import Directory"
+ * "Entry c NNNNNNNN NNNNNNNN Import Address Table Directory"
+ * Reject "00000000 00000000" on those lines. */
+ {
+ const char* imp_line = strstr(dump_p, "Import Directory");
+ EXPECT(imp_line != NULL,
+ "objdump -p: 'Import Directory' line missing");
+ if (imp_line) {
+ /* Walk back to start of line. */
+ const char* ls = imp_line;
+ while (ls > dump_p && ls[-1] != '\n') --ls;
+ EXPECT(strstr(ls, "00000000 00000000 [size]") == NULL &&
+ strstr(ls, "\t00000000\t00000000\t") == NULL,
+ "Import Directory data-dir entry is zero\nline: %.*s",
+ (int)(imp_line - ls + (int)strlen("Import Directory")), ls);
+ }
+ const char* iat_line =
+ strstr(dump_p, "Import Address Table Directory");
+ EXPECT(iat_line != NULL,
+ "objdump -p: 'Import Address Table Directory' line missing");
+ if (iat_line) {
+ const char* ls = iat_line;
+ while (ls > dump_p && ls[-1] != '\n') --ls;
+ EXPECT(strstr(ls, "00000000 00000000 [size]") == NULL &&
+ strstr(ls, "\t00000000\t00000000\t") == NULL,
+ "IAT data-dir entry is zero\nline: %.*s",
+ (int)(iat_line - ls +
+ (int)strlen("Import Address Table Directory")),
+ ls);
+ }
+ }
+ free(dump_p);
+ }
+
+ /* objdump -d: confirm the .text disassembly has the call (from
+ * mainCRTStartup) plus the per-arch IAT stub `jmp *off(%rip)` that
+ * link_emit_coff appends. The PE has no symbol table — there's no
+ * <mainCRTStartup> label in the disassembly, just .text. */
+ char* dump_d = slurp_cmd(
+ "x86_64-w64-mingw32-objdump -d /tmp/pe-import-smoke.exe 2>&1");
+ EXPECT(dump_d != NULL, "slurp objdump -d");
+ if (dump_d) {
+ EXPECT(strstr(dump_d, "<.text>") != NULL,
+ "objdump -d: <.text> section header missing\n---\n%s\n---",
+ dump_d);
+ /* The mainCRTStartup body is a `call <disp32>` at the entry. The
+ * disp32 must have been patched away from zero by the linker —
+ * objdump renders it as `call 0xNNNNNNNN`, never `call 0x0`. */
+ EXPECT(strstr(dump_d, "call ") != NULL || strstr(dump_d, "callq ") != NULL,
+ "objdump -d: no call instruction in disassembly\n---\n%s\n---",
+ dump_d);
+ EXPECT(strstr(dump_d, "call 0x0\n") == NULL &&
+ strstr(dump_d, "callq 0x0\n") == NULL,
+ "objdump -d: call target left at 0x0 (unrelocated)\n---\n%s\n---",
+ dump_d);
+ /* The IAT stub is the `ff 25 disp32` indirect jmp the per-arch
+ * stub emitter appends to .text for the imported symbol. */
+ EXPECT(strstr(dump_d, "jmp *") != NULL ||
+ strstr(dump_d, "jmpq *") != NULL ||
+ strstr(dump_d, "ff 25") != NULL,
+ "objdump -d: no IAT stub `jmp *off(%%rip)` in .text\n---\n%s\n---",
+ dump_d);
+ free(dump_d);
+ }
+
+ cfree_compiler_free((CfreeCompiler*)c);
+
+ if (g_failures) {
+ fprintf(stderr, "FAILED %d assertion(s)\n", g_failures);
+ return 1;
+ }
+ fprintf(stderr, "OK pe-import-smoke\n");
+ return 0;
+}
diff --git a/test/coff/pe-mixed-archive.c b/test/coff/pe-mixed-archive.c
@@ -0,0 +1,336 @@
+/* Mixed-member archive ingestion test.
+ *
+ * Verifies that a single archive containing BOTH a short-import member
+ * and a long-form COFF object with a real defined symbol satisfies
+ * references from both shapes in one pass — the same composition
+ * llvm-mingw's libucrt.a uses (api-ms-win-crt-*.dll short imports
+ * alongside lib64_libucrt_extra_a-*.o helpers).
+ *
+ * Composition:
+ * Member A: short-import record (Sig1=0/Sig2=0xFFFF) for `ImportedFn`
+ * living in `FOO.dll`.
+ * Member B: a normal COFF object (emit_coff'd from a tiny ObjBuilder)
+ * defining `g_helper_value` in `.data`.
+ *
+ * The program references both via R_PC32 from .text. After link_resolve
+ * we assert:
+ * - ImportedFn surfaces as imported (dso_input_id != 0).
+ * - g_helper_value resolves to a real defined LinkSym.
+ * No external tools required. */
+
+#include <cfree/archive.h>
+#include <cfree/core.h>
+#include <cfree/link.h>
+#include <cfree/object.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/core.h"
+#include "core/pool.h"
+#include "link/link.h"
+#include "obj/obj.h"
+
+/* ---- short-import wire constants (mirror pe-import-smoke.c). ---- */
+#define SHIM_HEADER_SIZE 20u
+#define SHIM_SYM_CSTR "ImportedFn"
+#define SHIM_DLL_CSTR "FOO.dll"
+#define SHIM_SYM_NUL_LEN 11u /* "ImportedFn\0" */
+#define SHIM_DLL_NUL_LEN 8u /* "FOO.dll\0" */
+#define SHIM_DATA_LEN (SHIM_SYM_NUL_LEN + SHIM_DLL_NUL_LEN) /* 19 */
+#define SHIM_TOTAL_LEN (SHIM_HEADER_SIZE + SHIM_DATA_LEN) /* 39 */
+#define COFF_MACHINE_AMD64 0x8664u
+#define COFF_SHIMP_SIG2 0xFFFFu
+#define COFF_SHIMP_TYPEFLAGS 0x0004u /* Type=CODE | NameType=NAME */
+
+#define HELPER_SYM_CSTR "g_helper_value"
+
+/* Program text: two `call disp32` instructions plus `ret`. Each call's
+ * disp32 is patched by the linker via R_PC32 against an undef target. */
+static const uint8_t PROG_TEXT_X64[11] = {
+ 0xe8, 0, 0, 0, 0, /* call ImportedFn */
+ 0xe8, 0, 0, 0, 0, /* call g_helper_value (target treated as PC-rel
+ reference; data symbols can be referenced the
+ same way for the purposes of this test — the
+ linker just resolves the symbol address) */
+ 0xc3, /* ret */
+};
+
+/* ---- env vtables --------------------------------------------------- */
+
+static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void heap_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+static int g_failures;
+#define EXPECT(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
+ } while (0)
+
+/* ---- compiler / target -------------------------------------------- */
+
+static CfreeContext g_ctx;
+
+static void target_x64_windows(CfreeTarget* t) {
+ memset(t, 0, sizeof *t);
+ t->arch = CFREE_ARCH_X86_64;
+ t->os = CFREE_OS_WINDOWS;
+ t->obj = CFREE_OBJ_COFF;
+ t->ptr_size = 8;
+ t->ptr_align = 8;
+ t->big_endian = false;
+ t->pic = CFREE_PIC_PIE;
+ t->code_model = CFREE_CM_SMALL;
+}
+
+static Compiler* make_compiler(const CfreeTarget* t) {
+ memset(&g_ctx, 0, sizeof g_ctx);
+ g_ctx.heap = &g_heap;
+ g_ctx.diag = &g_diag;
+ g_ctx.now = -1;
+ CfreeCompiler* cc = NULL;
+ if (cfree_compiler_new(*t, &g_ctx, &cc) != CFREE_OK || !cc) return NULL;
+ return (Compiler*)cc;
+}
+
+/* ---- builders ----------------------------------------------------- */
+
+static void build_short_import_amd64(uint8_t buf[SHIM_TOTAL_LEN]) {
+ memset(buf, 0, SHIM_TOTAL_LEN);
+ /* Sig1=0, Sig2=0xFFFF. */
+ buf[2] = (uint8_t)(COFF_SHIMP_SIG2 & 0xFF);
+ buf[3] = (uint8_t)((COFF_SHIMP_SIG2 >> 8) & 0xFF);
+ /* Machine. */
+ buf[6] = (uint8_t)(COFF_MACHINE_AMD64 & 0xFF);
+ buf[7] = (uint8_t)((COFF_MACHINE_AMD64 >> 8) & 0xFF);
+ /* SizeOfData. */
+ buf[12] = (uint8_t)(SHIM_DATA_LEN & 0xFFu);
+ buf[13] = (uint8_t)((SHIM_DATA_LEN >> 8) & 0xFFu);
+ /* TypeFlags = CODE | NAME. */
+ buf[18] = (uint8_t)(COFF_SHIMP_TYPEFLAGS & 0xFF);
+ buf[19] = (uint8_t)((COFF_SHIMP_TYPEFLAGS >> 8) & 0xFF);
+ memcpy(buf + SHIM_HEADER_SIZE, SHIM_SYM_CSTR, SHIM_SYM_NUL_LEN);
+ memcpy(buf + SHIM_HEADER_SIZE + SHIM_SYM_NUL_LEN, SHIM_DLL_CSTR,
+ SHIM_DLL_NUL_LEN);
+}
+
+/* Build a long-form COFF object that defines `g_helper_value` in .data. */
+static uint8_t* build_helper_object(Compiler* c, size_t* len_out) {
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym data_name = pool_intern_cstr(p, ".data");
+ Sym helper_name = pool_intern_cstr(p, HELPER_SYM_CSTR);
+ ObjSecId data = obj_section(ob, data_name, SEC_DATA,
+ SF_ALLOC | SF_WRITE, 4);
+ static const uint8_t kHelperBytes[4] = {0x2A, 0x00, 0x00, 0x00};
+ obj_write(ob, data, kHelperBytes, sizeof kHelperBytes);
+ obj_symbol(ob, helper_name, SB_GLOBAL, SK_OBJ, data, 0,
+ sizeof kHelperBytes);
+ obj_finalize(ob);
+
+ CfreeWriter* w = NULL;
+ if (cfree_writer_mem(&g_heap, &w) != CFREE_OK || !w) return NULL;
+ emit_coff(c, ob, w);
+ size_t n = 0;
+ const uint8_t* src = cfree_writer_mem_bytes(w, &n);
+ uint8_t* copy = (uint8_t*)malloc(n);
+ if (copy && n) memcpy(copy, src, n);
+ cfree_writer_close(w);
+ *len_out = n;
+ return copy;
+}
+
+/* Program: .text references both ImportedFn (function) and
+ * g_helper_value (data) via R_PC32 relocs. */
+static ObjBuilder* build_program(Compiler* c) {
+ ObjBuilder* ob = obj_new(c);
+ Pool* p = c->global;
+ Sym text_name = pool_intern_cstr(p, ".text");
+ Sym main_name = pool_intern_cstr(p, "mainCRTStartup");
+ Sym import_name = pool_intern_cstr(p, SHIM_SYM_CSTR);
+ Sym helper_name = pool_intern_cstr(p, HELPER_SYM_CSTR);
+ ObjSecId text = obj_section(ob, text_name, SEC_TEXT,
+ SF_ALLOC | SF_EXEC, 16);
+ obj_write(ob, text, PROG_TEXT_X64, sizeof PROG_TEXT_X64);
+ obj_symbol(ob, main_name, SB_GLOBAL, SK_FUNC, text, 0,
+ sizeof PROG_TEXT_X64);
+ ObjSymId import_sym =
+ obj_symbol(ob, import_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ ObjSymId helper_sym =
+ obj_symbol(ob, helper_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ obj_reloc(ob, text, 1, R_PC32, import_sym, -4);
+ obj_reloc(ob, text, 6, R_PC32, helper_sym, -4);
+ obj_finalize(ob);
+ return ob;
+}
+
+/* ---- main --------------------------------------------------------- */
+
+int main(void) {
+ CfreeTarget t;
+ target_x64_windows(&t);
+ Compiler* c = make_compiler(&t);
+ if (!c) {
+ fprintf(stderr, "FAIL: compiler_new\n");
+ return 1;
+ }
+ if (setjmp(c->panic)) {
+ fprintf(stderr, "FAIL: panic during pe-mixed-archive\n");
+ compiler_run_cleanups(c);
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+
+ ObjBuilder* prog = build_program(c);
+
+ /* Member A: short-import shim. */
+ uint8_t shim[SHIM_TOTAL_LEN];
+ build_short_import_amd64(shim);
+
+ /* Member B: long-form COFF object defining g_helper_value. */
+ size_t helper_len = 0;
+ uint8_t* helper_bytes = build_helper_object(c, &helper_len);
+ EXPECT(helper_bytes != NULL && helper_len > 0,
+ "build_helper_object produced %zu bytes", helper_len);
+ if (!helper_bytes) {
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+
+ /* Assemble both into an archive. cfree's archive ingestion walks
+ * every member regardless of the symbol index, so symbol_index=0 is
+ * sufficient — the linker rediscovers each member's exports during
+ * scan_presence_before. */
+ CfreeBytes members[2];
+ members[0].name = "importfn.o";
+ members[0].data = shim;
+ members[0].len = SHIM_TOTAL_LEN;
+ members[1].name = "helper.o";
+ members[1].data = helper_bytes;
+ members[1].len = helper_len;
+
+ CfreeWriter* aw = NULL;
+ if (cfree_writer_mem(&g_heap, &aw) != CFREE_OK || !aw) {
+ fprintf(stderr, "FAIL: writer_mem for archive\n");
+ free(helper_bytes);
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+ CfreeArWriteOptions opts;
+ memset(&opts, 0, sizeof opts);
+ CfreeStatus arst = cfree_ar_write(aw, members, 2, &opts);
+ EXPECT(arst == CFREE_OK, "cfree_ar_write rc=%d", (int)arst);
+
+ size_t ar_len = 0;
+ const uint8_t* ar_view = cfree_writer_mem_bytes(aw, &ar_len);
+ uint8_t* ar_bytes = (uint8_t*)malloc(ar_len);
+ if (ar_bytes && ar_len) memcpy(ar_bytes, ar_view, ar_len);
+ cfree_writer_close(aw);
+ EXPECT(ar_bytes != NULL && ar_len > 0,
+ "archive empty after cfree_ar_write (len=%zu)", ar_len);
+ if (!ar_bytes) {
+ free(helper_bytes);
+ cfree_compiler_free((CfreeCompiler*)c);
+ return 1;
+ }
+
+ /* Drive the linker. Name the archive `libmixed.a` so
+ * derive_dll_name_from_archive_path treats it as a potential import
+ * archive (archive_dll_name="mixed.dll") — the short-import member
+ * carries its own DLL name (FOO.dll) and overrides this fallback, and
+ * the long-form COFF object is classified COFF_AR_KEEP and read as a
+ * normal object regardless. */
+ Linker* l = link_new(c);
+ EXPECT(l != NULL, "link_new returned NULL");
+ link_add_obj(l, prog);
+ LinkInputId ar_id = link_add_archive_bytes(l, "libmixed.a", ar_bytes,
+ ar_len, /*whole_archive=*/0,
+ /*link_mode=*/0,
+ /*group_id=*/0);
+ EXPECT(ar_id != LINK_INPUT_NONE,
+ "link_add_archive_bytes returned LINK_INPUT_NONE");
+ link_set_entry(l, "mainCRTStartup");
+ link_set_pie(l, 1);
+ link_set_emit_static_exe(l, 1);
+
+ LinkImage* img = link_resolve(l);
+ EXPECT(img != NULL, "link_resolve returned NULL");
+ if (img) {
+ Sym import_name = pool_intern_cstr(c->global, SHIM_SYM_CSTR);
+ Sym helper_name = pool_intern_cstr(c->global, HELPER_SYM_CSTR);
+ const LinkSymbol* importf = NULL;
+ const LinkSymbol* helper = NULL;
+ for (LinkSymId i = 1;; ++i) {
+ const LinkSymbol* s = link_symbol(img, i);
+ if (!s) break;
+ if (s->name == import_name) importf = s;
+ else if (s->name == helper_name) helper = s;
+ }
+ EXPECT(importf != NULL,
+ "ImportedFn LinkSymbol not present after link_resolve");
+ if (importf) {
+ EXPECT(importf->imported,
+ "ImportedFn.imported=0 (expected 1; short-import member "
+ "should be pulled in and routed as DSO)");
+ EXPECT(importf->dso_input_id != 0,
+ "ImportedFn.dso_input_id=0 (expected nonzero — short-import "
+ "member should be present as a DSO input)");
+ }
+ EXPECT(helper != NULL,
+ "g_helper_value LinkSymbol not present after link_resolve");
+ if (helper) {
+ EXPECT(!helper->imported,
+ "g_helper_value.imported=1 (expected 0; helper.o is a real "
+ "COFF object, not an import shim)");
+ EXPECT(helper->section_id != LINK_SEC_NONE,
+ "g_helper_value.section_id=NONE (expected a real .data "
+ "section after long-form COFF ingestion)");
+ }
+ link_image_free(img);
+ }
+ link_free(l);
+
+ free(ar_bytes);
+ free(helper_bytes);
+ cfree_compiler_free((CfreeCompiler*)c);
+
+ if (g_failures) {
+ fprintf(stderr, "FAILED %d assertion(s)\n", g_failures);
+ return 1;
+ }
+ fprintf(stderr, "OK pe-mixed-archive\n");
+ return 0;
+}
diff --git a/test/coff/windows-system-dlls-smoke.sh b/test/coff/windows-system-dlls-smoke.sh
@@ -0,0 +1,444 @@
+#!/usr/bin/env bash
+# Windows system-DLL coverage smoke.
+#
+# Companion to windows-ucrt-hosted-smoke.sh: that script proves the UCRT
+# console + GUI link round-trip for one program per surface (Sleep,
+# runtime, stdio, TLS, GUI WinMain). This script broadens the surface
+# across the typical large system DLLs an application links against:
+#
+# user32 + gdi32 (GUI window + drawing)
+# advapi32 (registry)
+# ws2_32 (Winsock lifecycle)
+# ole32 (COM init)
+# shell32 (CommandLineToArgvW)
+# comctl32 (InitCommonControls)
+# libucrt.a (mixed short-import + long-form members)
+#
+# Each program is built with `cfree cc` for both x86_64-windows and
+# aarch64-windows; the link-level check inspects the PE import
+# directory via `cfree objdump -p`. The Wine runtime check is run
+# conditionally — same pattern as windows-ucrt-hosted-smoke.sh, and
+# silently skipped when the matching podman/Wine container is absent.
+#
+# Skip semantics: prints `SKIP: ...` and exits 0 when the llvm-mingw
+# UCRT sysroot is not discoverable.
+
+set -euo pipefail
+
+ROOT=${CFREE_TEST_ROOT:-$(cd "$(dirname "$0")/../.." && pwd)}
+CFREE=${CFREE:-"$ROOT/build/cfree"}
+SDK=${CFREE_MINGW_SYSROOT:-}
+
+find_sdk() {
+ local arch=$1
+ local d
+ for d in \
+ /tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/"$arch"-w64-mingw32 \
+ /tmp/llvm-mingw*/"$arch"-w64-mingw32 \
+ /private/tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/"$arch"-w64-mingw32 \
+ /private/tmp/llvm-mingw*/"$arch"-w64-mingw32; do
+ if [ -d "$d/lib" ] && [ -r "$d/include/windows.h" ]; then
+ printf '%s\n' "$d"
+ return 0
+ fi
+ done
+ return 1
+}
+
+sdk_for_arch() {
+ local arch=$1
+ local base
+ if [ -n "$SDK" ]; then
+ if [ "$(basename "$SDK")" = "$arch-w64-mingw32" ]; then
+ printf '%s\n' "$SDK"
+ return 0
+ fi
+ base=$(dirname "$SDK")
+ if [ -d "$base/$arch-w64-mingw32/lib" ] &&
+ [ -r "$base/$arch-w64-mingw32/include/windows.h" ]; then
+ printf '%s\n' "$base/$arch-w64-mingw32"
+ return 0
+ fi
+ fi
+ find_sdk "$arch"
+}
+
+if [ ! -x "$CFREE" ]; then
+ echo "FAIL windows-system-dlls-smoke: cfree binary not found: $CFREE" >&2
+ exit 1
+fi
+
+TMP=${TMPDIR:-/tmp}
+WORK=$(mktemp -d "$TMP/cfree-windows-system-dlls-smoke.XXXXXX")
+WORK_REAL=$(cd "$WORK" && pwd -P)
+trap 'rm -rf "$WORK"' EXIT
+
+GUI_C=$WORK/gui_hello_window.c
+GDI_C=$WORK/gdi_drawing.c
+REG_C=$WORK/advapi32_registry.c
+WS_C=$WORK/ws2_32_socket.c
+OLE_C=$WORK/ole32_coinit.c
+SHELL_C=$WORK/shell32_argv.c
+COMCTL_C=$WORK/comctl32_init.c
+MIXED_C=$WORK/mixed_ucrt.c
+
+# A hidden window + minimal message-pump program. Wine in a headless
+# container may legitimately refuse to create a real window; the
+# program tolerates that and returns 0 so the link-level imports check
+# is what gates the test.
+cat >"$GUI_C" <<'SRC'
+#define UNICODE
+#define _UNICODE
+#include <windows.h>
+
+static LRESULT CALLBACK wp(HWND h, UINT m, WPARAM w, LPARAM l) {
+ if (m == WM_DESTROY) PostQuitMessage(0);
+ return DefWindowProcW(h, m, w, l);
+}
+
+int WINAPI WinMain(HINSTANCE i, HINSTANCE p, LPSTR c, int s) {
+ (void)p; (void)c; (void)s;
+ WNDCLASSEXW wc;
+ ZeroMemory(&wc, sizeof(wc));
+ wc.cbSize = sizeof(wc);
+ wc.lpfnWndProc = wp;
+ wc.hInstance = i;
+ wc.lpszClassName = L"cfree_hello";
+ wc.hCursor = LoadCursorW(NULL, IDC_ARROW);
+ (void)RegisterClassExW(&wc);
+ HWND h = CreateWindowExW(0, L"cfree_hello", L"cfree", WS_OVERLAPPEDWINDOW,
+ 0, 0, 16, 16, NULL, NULL, i, NULL);
+ if (h) {
+ PostMessageW(h, WM_QUIT, 0, 0);
+ MSG msg;
+ while (PeekMessageW(&msg, NULL, 0, 0, PM_REMOVE)) {
+ if (msg.message == WM_QUIT) break;
+ TranslateMessage(&msg);
+ DispatchMessageW(&msg);
+ }
+ DestroyWindow(h);
+ }
+ return 0;
+}
+SRC
+
+# gdi32 surface: create a memory DC, select a stock font and brush,
+# release. Stock objects do not require an active display, so this
+# runs cleanly under Wine in headless containers.
+cat >"$GDI_C" <<'SRC'
+#include <windows.h>
+
+int main(void) {
+ HDC screen = GetDC(NULL);
+ HDC mem = CreateCompatibleDC(screen);
+ HGDIOBJ old_font = SelectObject(mem, GetStockObject(SYSTEM_FONT));
+ HGDIOBJ old_brush = SelectObject(mem, GetStockObject(WHITE_BRUSH));
+ TEXTMETRICW tm;
+ GetTextMetricsW(mem, &tm);
+ SelectObject(mem, old_font);
+ SelectObject(mem, old_brush);
+ DeleteDC(mem);
+ ReleaseDC(NULL, screen);
+ return tm.tmHeight > 0 ? 0 : 1;
+}
+SRC
+
+# advapi32 surface: open a well-known read-only registry key and close
+# it. HKEY_CURRENT_USER\Environment exists by default under Wine.
+cat >"$REG_C" <<'SRC'
+#include <windows.h>
+
+int main(void) {
+ HKEY h = NULL;
+ LONG rc = RegOpenKeyExW(HKEY_CURRENT_USER, L"Environment", 0, KEY_READ, &h);
+ if (rc == ERROR_SUCCESS) {
+ DWORD subkeys = 0, values = 0;
+ RegQueryInfoKeyW(h, NULL, NULL, NULL, &subkeys, NULL, NULL, &values,
+ NULL, NULL, NULL, NULL);
+ RegCloseKey(h);
+ return 0;
+ }
+ /* Some Wine configurations may not pre-create the Environment key.
+ * The link-level test (imports satisfied) is what we care about. */
+ return rc == ERROR_FILE_NOT_FOUND ? 0 : 2;
+}
+SRC
+
+# ws2_32 surface: full WSAStartup/socket/closesocket/WSACleanup
+# lifecycle with no network traffic.
+cat >"$WS_C" <<'SRC'
+#include <winsock2.h>
+#include <windows.h>
+
+int main(void) {
+ WSADATA wsa;
+ if (WSAStartup(MAKEWORD(2, 2), &wsa) != 0) return 1;
+ SOCKET s = socket(AF_INET, SOCK_DGRAM, 0);
+ if (s == INVALID_SOCKET) { WSACleanup(); return 2; }
+ closesocket(s);
+ WSACleanup();
+ return 0;
+}
+SRC
+
+# ole32 surface: COM apartment init/teardown.
+cat >"$OLE_C" <<'SRC'
+#include <windows.h>
+#include <objbase.h>
+
+int main(void) {
+ HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
+ if (FAILED(hr)) return 1;
+ CoUninitialize();
+ return 0;
+}
+SRC
+
+# shell32 surface: CommandLineToArgvW round-trip.
+cat >"$SHELL_C" <<'SRC'
+#include <windows.h>
+#include <shellapi.h>
+
+int main(void) {
+ int argc = 0;
+ LPWSTR cmd = GetCommandLineW();
+ LPWSTR* argv = CommandLineToArgvW(cmd, &argc);
+ int ok = (argv != NULL && argc >= 1);
+ if (argv) LocalFree(argv);
+ return ok ? 0 : 1;
+}
+SRC
+
+# comctl32 surface: legacy InitCommonControls. Pulls in comctl32.dll
+# imports without needing a real display.
+cat >"$COMCTL_C" <<'SRC'
+#include <windows.h>
+#include <commctrl.h>
+
+int main(void) {
+ INITCOMMONCONTROLSEX icc;
+ ZeroMemory(&icc, sizeof(icc));
+ icc.dwSize = sizeof(icc);
+ icc.dwICC = ICC_STANDARD_CLASSES;
+ /* Both spellings exist in libcomctl32.a; we use the simpler one. */
+ InitCommonControls();
+ return InitCommonControlsEx(&icc) ? 0 : 0;
+}
+SRC
+
+# Mixed libucrt.a members: pulls in both short-import members
+# (api-ms-win-crt-stdio for puts/fflush) and a long-form COFF helper
+# (fabsf lives in lib64_libmingwex_a-*.o as a real .o member).
+cat >"$MIXED_C" <<'SRC'
+#include <math.h>
+#include <stdio.h>
+#include <windows.h>
+
+int main(void) {
+ float x = fabsf(-1.5f);
+ HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE);
+ DWORD wrote = 0;
+ WriteFile(out, "mixed-ok\n", 9, &wrote, 0);
+ fflush(stdout);
+ return x == 1.5f ? 0 : 1;
+}
+SRC
+
+check_no_legacy_crt_imports() {
+ local dump=$1
+ local what=$2
+ if grep -Eiq 'DLL Name: (msvcrt|ucrt)\.dll' "$dump"; then
+ echo "FAIL windows-system-dlls-smoke: $what imports legacy CRT DLL" >&2
+ grep -Ei 'DLL Name: (msvcrt|ucrt)\.dll' "$dump" >&2
+ exit 1
+ fi
+}
+
+run_wine_if_available() {
+ local label=$1
+ local image=$2
+ local pod_arch=$3
+ local exe=$4
+ shift 4
+
+ if ! command -v podman >/dev/null 2>&1; then
+ echo "SKIP windows-system-dlls-smoke: podman unavailable for $label Wine run"
+ return 0
+ fi
+ if ! podman image exists "$image" >/dev/null 2>&1; then
+ echo "SKIP windows-system-dlls-smoke: $image unavailable for $label Wine run"
+ return 0
+ fi
+
+ podman run --rm --arch "$pod_arch" -v "$WORK_REAL:/probe:ro" "$image" \
+ bash -lc "
+ export WINEDEBUG=-all WINEPREFIX=/tmp/wineprefix
+ timeout 120s /usr/lib/wine/wine64 /probe/$(basename "$exe") $*
+ rc=\$?
+ echo \"$label exit=\$rc\"
+ test \"\$rc\" -eq 0
+ "
+}
+
+# build_and_check <label> <c-source> <exe> <dump> <link-mode> <libs>
+# <expected-dll-1> [<expected-dll-2> ...] -- [<expected-sym-1> ...]
+#
+# link-mode is "console" or "windows" (drives -mconsole vs -mwindows).
+# libs is a space-separated list of `-l<name>` archives to add (e.g.
+# "gdi32 ws2_32") beyond the driver-auto-linked set
+# (kernel32/user32/advapi32/shell32/msvcrt/mingwex/mingw32/moldname).
+build_and_check() {
+ local label=$1
+ local csrc=$2
+ local exe=$3
+ local dump=$4
+ local mode=$5
+ local libs=$6
+ shift 6
+
+ local dlls=()
+ local syms=()
+ local in_syms=0
+ while [ $# -gt 0 ]; do
+ if [ "$1" = "--" ]; then in_syms=1; shift; continue; fi
+ if [ "$in_syms" -eq 0 ]; then dlls+=("$1"); else syms+=("$1"); fi
+ shift
+ done
+
+ local mode_flag=-mconsole
+ if [ "$mode" = "windows" ]; then mode_flag=-mwindows; fi
+
+ local extra_lflags=()
+ local lib
+ for lib in $libs; do extra_lflags+=("-l$lib"); done
+
+ "$CFREE" cc -target "$TARGET" --sysroot "$ARCH_SDK" "$mode_flag" \
+ "$csrc" "${extra_lflags[@]}" -o "$exe"
+ "$CFREE" objdump -p "$exe" >"$dump"
+ check_no_legacy_crt_imports "$dump" "$label PE"
+
+ local d
+ for d in "${dlls[@]}"; do
+ if ! grep -Fq "DLL Name: $d" "$dump"; then
+ echo "FAIL windows-system-dlls-smoke: $label: expected import of $d" >&2
+ grep -F 'DLL Name:' "$dump" >&2 || true
+ exit 1
+ fi
+ done
+
+ local s
+ for s in "${syms[@]}"; do
+ if ! grep -Fq "Name: $s" "$dump"; then
+ echo "FAIL windows-system-dlls-smoke: $label: expected import symbol $s" >&2
+ exit 1
+ fi
+ done
+
+ if [ "$mode" = "windows" ] &&
+ ! grep -Fq "Subsystem: 2 (WINDOWS_GUI)" "$dump"; then
+ echo "FAIL windows-system-dlls-smoke: $label: subsystem != WINDOWS_GUI" >&2
+ exit 1
+ fi
+}
+
+ran=0
+for arch in x86_64 aarch64; do
+ case "$arch" in
+ x86_64)
+ TARGET=x86_64-windows
+ LABEL=x64
+ IMAGE=localhost/cfree-wine-amd64
+ POD_ARCH=amd64
+ ;;
+ aarch64)
+ TARGET=aarch64-windows
+ LABEL=aarch64
+ IMAGE=localhost/cfree-wine-arm64
+ POD_ARCH=arm64
+ ;;
+ esac
+
+ if ! ARCH_SDK=$(sdk_for_arch "$arch"); then
+ echo "SKIP windows-system-dlls-smoke: no $arch llvm-mingw UCRT sysroot"
+ continue
+ fi
+ if [ ! -r "$ARCH_SDK/include/windows.h" ] ||
+ [ ! -r "$ARCH_SDK/lib/libucrt.a" ]; then
+ echo "SKIP windows-system-dlls-smoke: invalid UCRT llvm-mingw sysroot: $ARCH_SDK"
+ continue
+ fi
+
+ ran=1
+
+ # ---- GUI hello window (user32 + gdi32 + kernel32) ----
+ GUI_EXE=$WORK/gui_hello_window-$LABEL.exe
+ GUI_DUMP=$WORK/gui_hello_window-$LABEL.dump
+ build_and_check "$LABEL gui_hello_window" "$GUI_C" "$GUI_EXE" "$GUI_DUMP" \
+ windows "" USER32.dll KERNEL32.dll -- RegisterClassExW CreateWindowExW \
+ DefWindowProcW PeekMessageW DispatchMessageW
+ run_wine_if_available "$LABEL gui_hello_window" "$IMAGE" "$POD_ARCH" \
+ "$GUI_EXE"
+
+ # ---- gdi32 surface ----
+ GDI_EXE=$WORK/gdi_drawing-$LABEL.exe
+ GDI_DUMP=$WORK/gdi_drawing-$LABEL.dump
+ build_and_check "$LABEL gdi_drawing" "$GDI_C" "$GDI_EXE" "$GDI_DUMP" \
+ console gdi32 GDI32.dll USER32.dll -- CreateCompatibleDC GetStockObject \
+ SelectObject DeleteDC
+ run_wine_if_available "$LABEL gdi_drawing" "$IMAGE" "$POD_ARCH" "$GDI_EXE"
+
+ # ---- advapi32 surface ----
+ REG_EXE=$WORK/advapi32_registry-$LABEL.exe
+ REG_DUMP=$WORK/advapi32_registry-$LABEL.dump
+ build_and_check "$LABEL advapi32_registry" "$REG_C" "$REG_EXE" "$REG_DUMP" \
+ console "" ADVAPI32.dll KERNEL32.dll -- RegOpenKeyExW RegCloseKey \
+ RegQueryInfoKeyW
+ run_wine_if_available "$LABEL advapi32_registry" "$IMAGE" "$POD_ARCH" \
+ "$REG_EXE"
+
+ # ---- ws2_32 surface ----
+ WS_EXE=$WORK/ws2_32_socket-$LABEL.exe
+ WS_DUMP=$WORK/ws2_32_socket-$LABEL.dump
+ build_and_check "$LABEL ws2_32_socket" "$WS_C" "$WS_EXE" "$WS_DUMP" \
+ console ws2_32 WS2_32.dll KERNEL32.dll -- WSAStartup WSACleanup socket \
+ closesocket
+ run_wine_if_available "$LABEL ws2_32_socket" "$IMAGE" "$POD_ARCH" "$WS_EXE"
+
+ # ---- ole32 surface ----
+ OLE_EXE=$WORK/ole32_coinit-$LABEL.exe
+ OLE_DUMP=$WORK/ole32_coinit-$LABEL.dump
+ build_and_check "$LABEL ole32_coinit" "$OLE_C" "$OLE_EXE" "$OLE_DUMP" \
+ console ole32 ole32.dll KERNEL32.dll -- CoInitializeEx CoUninitialize
+ run_wine_if_available "$LABEL ole32_coinit" "$IMAGE" "$POD_ARCH" "$OLE_EXE"
+
+ # ---- shell32 surface ----
+ SHELL_EXE=$WORK/shell32_argv-$LABEL.exe
+ SHELL_DUMP=$WORK/shell32_argv-$LABEL.dump
+ build_and_check "$LABEL shell32_argv" "$SHELL_C" "$SHELL_EXE" "$SHELL_DUMP" \
+ console "" SHELL32.dll KERNEL32.dll -- CommandLineToArgvW
+ run_wine_if_available "$LABEL shell32_argv" "$IMAGE" "$POD_ARCH" \
+ "$SHELL_EXE"
+
+ # ---- comctl32 surface ----
+ COMCTL_EXE=$WORK/comctl32_init-$LABEL.exe
+ COMCTL_DUMP=$WORK/comctl32_init-$LABEL.dump
+ build_and_check "$LABEL comctl32_init" "$COMCTL_C" "$COMCTL_EXE" \
+ "$COMCTL_DUMP" console comctl32 COMCTL32.dll KERNEL32.dll -- \
+ InitCommonControls InitCommonControlsEx
+ run_wine_if_available "$LABEL comctl32_init" "$IMAGE" "$POD_ARCH" \
+ "$COMCTL_EXE"
+
+ # ---- mixed libucrt.a (short-import + long-form helper) ----
+ MIXED_EXE=$WORK/mixed_ucrt-$LABEL.exe
+ MIXED_DUMP=$WORK/mixed_ucrt-$LABEL.dump
+ build_and_check "$LABEL mixed_ucrt" "$MIXED_C" "$MIXED_EXE" "$MIXED_DUMP" \
+ console "" KERNEL32.dll api-ms-win-crt-stdio-l1-1-0.dll -- fflush
+ run_wine_if_available "$LABEL mixed_ucrt" "$IMAGE" "$POD_ARCH" "$MIXED_EXE"
+done
+
+if [ "$ran" -eq 0 ]; then
+ echo "SKIP windows-system-dlls-smoke: set CFREE_MINGW_SYSROOT or install llvm-mingw UCRT under /tmp/llvm-mingw*"
+ exit 0
+fi
+
+echo "PASS windows-system-dlls-smoke: user32/gdi32, advapi32, ws2_32, ole32, shell32, comctl32, mixed UCRT for x64/aarch64"
diff --git a/test/coff/windows-ucrt-hosted-smoke.sh b/test/coff/windows-ucrt-hosted-smoke.sh
@@ -0,0 +1,450 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=${CFREE_TEST_ROOT:-$(cd "$(dirname "$0")/../.." && pwd)}
+CFREE=${CFREE:-"$ROOT/build/cfree"}
+SDK=${CFREE_MINGW_SYSROOT:-}
+
+find_sdk() {
+ local arch=$1
+ local d
+ for d in \
+ /tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/"$arch"-w64-mingw32 \
+ /tmp/llvm-mingw*/"$arch"-w64-mingw32 \
+ /private/tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/"$arch"-w64-mingw32 \
+ /private/tmp/llvm-mingw*/"$arch"-w64-mingw32; do
+ if [ -d "$d/lib" ] && [ -r "$d/include/windows.h" ]; then
+ printf '%s\n' "$d"
+ return 0
+ fi
+ done
+ return 1
+}
+
+sdk_for_arch() {
+ local arch=$1
+ local base
+ if [ -n "$SDK" ]; then
+ if [ "$(basename "$SDK")" = "$arch-w64-mingw32" ]; then
+ printf '%s\n' "$SDK"
+ return 0
+ fi
+ base=$(dirname "$SDK")
+ if [ -d "$base/$arch-w64-mingw32/lib" ] &&
+ [ -r "$base/$arch-w64-mingw32/include/windows.h" ]; then
+ printf '%s\n' "$base/$arch-w64-mingw32"
+ return 0
+ fi
+ fi
+ find_sdk "$arch"
+}
+
+if [ ! -x "$CFREE" ]; then
+ echo "FAIL windows-ucrt-hosted-smoke: cfree binary not found: $CFREE" >&2
+ exit 1
+fi
+
+TMP=${TMPDIR:-/tmp}
+WORK=$(mktemp -d "$TMP/cfree-windows-ucrt-smoke.XXXXXX")
+WORK_REAL=$(cd "$WORK" && pwd -P)
+trap 'rm -rf "$WORK"' EXIT
+
+CONSOLE_C=$WORK/windows-h.c
+HEADER_C=$WORK/windows-h-coverage.c
+RUNTIME_C=$WORK/runtime.c
+STDIO_C=$WORK/stdio.c
+IMPORTDATA_C=$WORK/import-data.c
+GUI_C=$WORK/gui.c
+TLS_C=$WORK/tls.c
+
+cat >"$CONSOLE_C" <<'SRC'
+#include <windows.h>
+int main(void) { Sleep(1); return 0; }
+SRC
+
+cat >"$HEADER_C" <<'SRC'
+#include <windows.h>
+#include <fileapi.h>
+#include <processthreadsapi.h>
+#include <synchapi.h>
+#include <errhandlingapi.h>
+#include <winuser.h>
+
+_Static_assert(sizeof(long) == 4, "windows long is LLP64");
+_Static_assert(sizeof(WCHAR) == 2, "WCHAR is UTF-16");
+_Static_assert(sizeof(void*) == 8, "PE32+ pointer size");
+
+static DWORD WINAPI thread_proc(LPVOID ctx) {
+ return (DWORD)(ULONG_PTR)ctx;
+}
+
+static BOOL CALLBACK enum_windows_proc(HWND hwnd, LPARAM lparam) {
+ RECT r;
+ POINT p;
+ WINDOWPLACEMENT wp;
+ ZeroMemory(&wp, sizeof(wp));
+ wp.length = sizeof(wp);
+ GetClientRect(hwnd, &r);
+ p.x = r.left;
+ p.y = r.top;
+ ClientToScreen(hwnd, &p);
+ SetLastError((DWORD)lparam);
+ return TRUE;
+}
+
+int main(void) {
+ HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE);
+ DWORD wrote = 0;
+ const char msg[] = "windows coverage\n";
+ WriteFile(out, msg, sizeof(msg) - 1, &wrote, NULL);
+
+ SECURITY_ATTRIBUTES sa;
+ ZeroMemory(&sa, sizeof(sa));
+ sa.nLength = sizeof(sa);
+ sa.bInheritHandle = FALSE;
+
+ WCHAR tmp_path[MAX_PATH];
+ WCHAR file_path[MAX_PATH];
+ GetTempPathW(MAX_PATH, tmp_path);
+ GetTempFileNameW(tmp_path, L"cfr", 0, file_path);
+ HANDLE h = CreateFileW(file_path, GENERIC_READ | GENERIC_WRITE, 0, &sa,
+ CREATE_ALWAYS, FILE_ATTRIBUTE_TEMPORARY, NULL);
+ if (h != INVALID_HANDLE_VALUE) {
+ LARGE_INTEGER pos;
+ pos.QuadPart = 0;
+ SetFilePointerEx(h, pos, NULL, FILE_BEGIN);
+ CloseHandle(h);
+ DeleteFileW(file_path);
+ }
+
+ CRITICAL_SECTION cs;
+ InitializeCriticalSection(&cs);
+ EnterCriticalSection(&cs);
+ LeaveCriticalSection(&cs);
+ DeleteCriticalSection(&cs);
+
+ DWORD tid = 0;
+ HANDLE th = CreateThread(NULL, 0, thread_proc, (LPVOID)(ULONG_PTR)3, 0, &tid);
+ if (th) {
+ WaitForSingleObject(th, INFINITE);
+ CloseHandle(th);
+ }
+
+ EnumWindows(enum_windows_proc, 0);
+ MessageBoxW(NULL, L"", L"", MB_OK | MB_SETFOREGROUND);
+ return 0;
+}
+SRC
+
+cat >"$RUNTIME_C" <<'SRC'
+#include <windows.h>
+#include <stdlib.h>
+#include <string.h>
+
+static int cmp_ints(const void *a, const void *b) {
+ int ia = *(const int *)a;
+ int ib = *(const int *)b;
+ return (ia > ib) - (ia < ib);
+}
+
+static int has_env(char **envp, const char *prefix) {
+ size_t n = strlen(prefix);
+ if (!envp) return 0;
+ for (; *envp; ++envp) {
+ if (strncmp(*envp, prefix, n) == 0) return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char **argv, char **envp) {
+ if (argc < 3) return 10;
+ if (strcmp(argv[1], "alpha") != 0 || strcmp(argv[2], "beta") != 0) return 11;
+ if (!has_env(envp, "CFREE_WIN_PROBE=present")) return 12;
+
+ HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE);
+ HANDLE err = GetStdHandle(STD_ERROR_HANDLE);
+ DWORD wrote = 0;
+ WriteFile(out, "stdout-ok\n", 10, &wrote, 0);
+ WriteFile(err, "stderr-ok\n", 10, &wrote, 0);
+
+ HANDLE heap = GetProcessHeap();
+ char *mem = (char *)HeapAlloc(heap, 0, 32);
+ if (!mem) return 13;
+ strcpy(mem, "heap-ok");
+ if (strcmp(mem, "heap-ok") != 0) return 14;
+ HeapFree(heap, 0, mem);
+
+ int vals[4] = {4, 1, 3, 2};
+ qsort(vals, 4, sizeof(vals[0]), cmp_ints);
+ if (vals[0] != 1 || vals[3] != 4) return 15;
+
+ SetLastError(1234);
+ if (GetLastError() != 1234) return 16;
+
+ char dir[MAX_PATH];
+ char path[MAX_PATH];
+ if (!GetTempPathA(MAX_PATH, dir)) return 17;
+ wsprintfA(path, "%scfree-runtime-%lu.tmp", dir,
+ (unsigned long)GetCurrentProcessId());
+ HANDLE f = CreateFileA(path, GENERIC_READ | GENERIC_WRITE, 0, 0,
+ CREATE_ALWAYS, FILE_ATTRIBUTE_TEMPORARY, 0);
+ if (f == INVALID_HANDLE_VALUE) return 18;
+ if (!WriteFile(f, "file-ok", 7, &wrote, 0) || wrote != 7) return 19;
+ SetFilePointer(f, 0, 0, FILE_BEGIN);
+ char buf[8];
+ DWORD got = 0;
+ memset(buf, 0, sizeof(buf));
+ if (!ReadFile(f, buf, 7, &got, 0)) return 20;
+ CloseHandle(f);
+ DeleteFileA(path);
+ if (got != 7 || strcmp(buf, "file-ok") != 0) return 21;
+ return 0;
+}
+SRC
+
+cat >"$STDIO_C" <<'SRC'
+#define _INC_STDIO_S
+#include <stdio.h>
+
+int main(void) {
+ puts("puts-ok");
+ fputs("fputs-ok\n", stdout);
+ printf("printf-ok\n");
+ fflush(stdout);
+ return 0;
+}
+SRC
+
+cat >"$IMPORTDATA_C" <<'SRC'
+#include <windows.h>
+
+extern char **__dcrt_initial_narrow_environment;
+
+int main(void) {
+ HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE);
+ DWORD wrote = 0;
+ WriteFile(out, "importdata\n", 11, &wrote, 0);
+ if (&__dcrt_initial_narrow_environment == 0) return 10;
+ if (__dcrt_initial_narrow_environment == 0) return 11;
+ return 0;
+}
+SRC
+
+cat >"$GUI_C" <<'SRC'
+#include <windows.h>
+int WINAPI WinMain(HINSTANCE hinst, HINSTANCE prev, LPSTR cmd, int show) {
+ (void)hinst;
+ (void)prev;
+ (void)cmd;
+ (void)show;
+ return 0;
+}
+SRC
+
+cat >"$TLS_C" <<'SRC'
+struct tlsdir {
+ unsigned long long start;
+ unsigned long long end;
+ unsigned long long index;
+ unsigned long long callbacks;
+ unsigned int zero_fill;
+ unsigned int characteristics;
+};
+
+extern unsigned char __ImageBase;
+extern const struct tlsdir _tls_used;
+
+_Thread_local int tls_init = 7;
+_Thread_local int tls_zero;
+
+static unsigned int rd32(const unsigned char *p) {
+ return (unsigned int)p[0] |
+ ((unsigned int)p[1] << 8) |
+ ((unsigned int)p[2] << 16) |
+ ((unsigned int)p[3] << 24);
+}
+
+static int check_tls_directory(void) {
+ const unsigned char *image = &__ImageBase;
+ unsigned int pe = rd32(image + 0x3c);
+ const unsigned char *opt = image + pe + 4 + 20;
+ const unsigned char *dir = opt + 112 + 9 * 8;
+ unsigned int tls_rva = rd32(dir);
+ unsigned int tls_size = rd32(dir + 4);
+ if (tls_size != 40) return 34;
+ if ((const unsigned char *)&_tls_used != image + tls_rva) return 35;
+ if (_tls_used.start == 0 || _tls_used.end <= _tls_used.start) return 36;
+ if (_tls_used.index == 0) return 37;
+ if (_tls_used.zero_fill != 0 || _tls_used.characteristics != 0) return 38;
+ return 0;
+}
+
+static int bump(void) {
+ tls_zero += 3;
+ tls_init += tls_zero;
+ return tls_init;
+}
+
+int main(void) {
+ int dir = check_tls_directory();
+ if (dir) return dir;
+ int a = bump();
+ int b = bump();
+ if (a != 10) return 31;
+ if (b != 16) return 32;
+ if (tls_zero != 6) return 33;
+ return 0;
+}
+SRC
+
+check_no_legacy_crt_imports() {
+ local dump=$1
+ local what=$2
+ if grep -Eiq 'DLL Name: (msvcrt|ucrt)\.dll' "$dump"; then
+ echo "FAIL windows-ucrt-hosted-smoke: $what imports legacy CRT DLL directly" >&2
+ grep -Ei 'DLL Name: (msvcrt|ucrt)\.dll' "$dump" >&2
+ exit 1
+ fi
+}
+
+run_wine_if_available() {
+ local label=$1
+ local image=$2
+ local pod_arch=$3
+ local exe=$4
+ shift 4
+
+ if ! command -v podman >/dev/null 2>&1; then
+ echo "SKIP windows-ucrt-hosted-smoke: podman unavailable for $label Wine run"
+ return 0
+ fi
+ if ! podman image exists "$image" >/dev/null 2>&1; then
+ echo "SKIP windows-ucrt-hosted-smoke: $image unavailable for $label Wine run"
+ return 0
+ fi
+
+ podman run --rm --arch "$pod_arch" -v "$WORK_REAL:/probe:ro" "$image" \
+ bash -lc "
+ export WINEDEBUG=-all WINEPREFIX=/tmp/wineprefix CFREE_WIN_PROBE=present
+ timeout 120s /usr/lib/wine/wine64 /probe/$(basename "$exe") $*
+ rc=\$?
+ echo \"$label exit=\$rc\"
+ test \"\$rc\" -eq 0
+ "
+}
+
+ran=0
+for arch in x86_64 aarch64; do
+ case "$arch" in
+ x86_64)
+ target=x86_64-windows
+ label=x64
+ image=localhost/cfree-wine-amd64
+ pod_arch=amd64
+ ;;
+ aarch64)
+ target=aarch64-windows
+ label=aarch64
+ image=localhost/cfree-wine-arm64
+ pod_arch=arm64
+ ;;
+ esac
+
+ if ! ARCH_SDK=$(sdk_for_arch "$arch"); then
+ echo "SKIP windows-ucrt-hosted-smoke: no $arch llvm-mingw UCRT sysroot"
+ continue
+ fi
+ if [ ! -r "$ARCH_SDK/include/windows.h" ] ||
+ [ ! -r "$ARCH_SDK/lib/libmsvcrt.a" ]; then
+ echo "FAIL windows-ucrt-hosted-smoke: invalid UCRT llvm-mingw sysroot: $ARCH_SDK" >&2
+ exit 1
+ fi
+
+ ran=1
+ CONSOLE_EXE=$WORK/windows-h-$arch.exe
+ CONSOLE_DUMP=$WORK/windows-h-$arch.dump
+ HEADER_EXE=$WORK/windows-h-coverage-$arch.exe
+ HEADER_DUMP=$WORK/windows-h-coverage-$arch.dump
+ RUNTIME_EXE=$WORK/runtime-$arch.exe
+ RUNTIME_DUMP=$WORK/runtime-$arch.dump
+ STDIO_EXE=$WORK/stdio-$arch.exe
+ STDIO_DUMP=$WORK/stdio-$arch.dump
+ IMPORTDATA_EXE=$WORK/import-data-$arch.exe
+ IMPORTDATA_DUMP=$WORK/import-data-$arch.dump
+ TLS_EXE=$WORK/tls-$arch.exe
+ TLS_DUMP=$WORK/tls-$arch.dump
+ GUI_EXE=$WORK/gui-$arch.exe
+ GUI_DUMP=$WORK/gui-$arch.dump
+
+ "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" -mconsole \
+ "$CONSOLE_C" -o "$CONSOLE_EXE"
+ "$CFREE" objdump -p "$CONSOLE_EXE" >"$CONSOLE_DUMP"
+ check_no_legacy_crt_imports "$CONSOLE_DUMP" "$label console PE"
+ if grep -Fq "Name: __set_app_type" "$CONSOLE_DUMP"; then
+ echo "FAIL windows-ucrt-hosted-smoke: weak alias leaked as __set_app_type import" >&2
+ exit 1
+ fi
+ grep -Fq "DLL Name: KERNEL32.dll" "$CONSOLE_DUMP"
+ grep -Fq "Name: Sleep" "$CONSOLE_DUMP"
+ grep -Fq "DLL Name: api-ms-win-crt-runtime-l1-1-0.dll" "$CONSOLE_DUMP"
+ grep -Fq "Name: _set_app_type" "$CONSOLE_DUMP"
+ run_wine_if_available "$label Sleep" "$image" "$pod_arch" "$CONSOLE_EXE"
+
+ "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" -mconsole \
+ "$HEADER_C" -o "$HEADER_EXE"
+ "$CFREE" objdump -p "$HEADER_EXE" >"$HEADER_DUMP"
+ check_no_legacy_crt_imports "$HEADER_DUMP" "$label windows.h coverage PE"
+ grep -Fq "Name: CreateFileW" "$HEADER_DUMP"
+ grep -Fq "Name: CreateThread" "$HEADER_DUMP"
+ grep -Fq "Name: WaitForSingleObject" "$HEADER_DUMP"
+ grep -Fq "Name: MessageBoxW" "$HEADER_DUMP"
+
+ "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" \
+ "$RUNTIME_C" -o "$RUNTIME_EXE"
+ "$CFREE" objdump -p "$RUNTIME_EXE" >"$RUNTIME_DUMP"
+ check_no_legacy_crt_imports "$RUNTIME_DUMP" "$label runtime PE"
+ grep -Fq "Name: HeapAlloc" "$RUNTIME_DUMP"
+ grep -Fq "Name: CreateFileA" "$RUNTIME_DUMP"
+ grep -Fq "Name: qsort" "$RUNTIME_DUMP"
+ run_wine_if_available "$label runtime" "$image" "$pod_arch" "$RUNTIME_EXE" \
+ alpha beta
+
+ "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" \
+ "$STDIO_C" -o "$STDIO_EXE"
+ "$CFREE" objdump -p "$STDIO_EXE" >"$STDIO_DUMP"
+ check_no_legacy_crt_imports "$STDIO_DUMP" "$label UCRT stdio PE"
+ grep -Fq "DLL Name: api-ms-win-crt-stdio-l1-1-0.dll" "$STDIO_DUMP"
+ grep -Fq "Name: fflush" "$STDIO_DUMP"
+ run_wine_if_available "$label UCRT stdio" "$image" "$pod_arch" "$STDIO_EXE"
+
+ "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" \
+ "$IMPORTDATA_C" -o "$IMPORTDATA_EXE"
+ "$CFREE" objdump -p "$IMPORTDATA_EXE" >"$IMPORTDATA_DUMP"
+ check_no_legacy_crt_imports "$IMPORTDATA_DUMP" "$label imported-data PE"
+ grep -Fq "DLL Name: api-ms-win-crt-private-l1-1-0.dll" "$IMPORTDATA_DUMP"
+ grep -Fq "Name: __dcrt_initial_narrow_environment" "$IMPORTDATA_DUMP"
+ run_wine_if_available "$label imported-data" "$image" "$pod_arch" \
+ "$IMPORTDATA_EXE"
+
+ "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" \
+ "$TLS_C" -o "$TLS_EXE"
+ "$CFREE" objdump -p "$TLS_EXE" >"$TLS_DUMP"
+ check_no_legacy_crt_imports "$TLS_DUMP" "$label TLS PE"
+ grep -Eq '^[[:space:]]*9[[:space:]]+TLS[[:space:]]+0x[0-9a-fA-F]+[[:space:]]+0x00000028' \
+ "$TLS_DUMP"
+
+ "$CFREE" cc -target "$target" --sysroot "$ARCH_SDK" -mwindows \
+ "$GUI_C" -o "$GUI_EXE"
+ "$CFREE" objdump -p "$GUI_EXE" >"$GUI_DUMP"
+ grep -Fq "Subsystem: 2 (WINDOWS_GUI)" "$GUI_DUMP"
+ check_no_legacy_crt_imports "$GUI_DUMP" "$label GUI PE"
+
+ run_wine_if_available "$label TLS" "$image" "$pod_arch" "$TLS_EXE"
+done
+
+if [ "$ran" -eq 0 ]; then
+ echo "SKIP windows-ucrt-hosted-smoke: set CFREE_MINGW_SYSROOT or install llvm-mingw UCRT under /tmp/llvm-mingw*"
+ exit 0
+fi
+
+echo "PASS windows-ucrt-hosted-smoke: Sleep, windows.h, runtime, UCRT stdio/imports/imported-data, GUI, and TLS for x64/aarch64"
diff --git a/test/objdump/aarch64-windows/cases/01-coff-file-header.expected b/test/objdump/aarch64-windows/cases/01-coff-file-header.expected
@@ -0,0 +1,5 @@
+t.obj: file format coff64-arm64
+architecture: arm64, flags 0x00000011:
+HAS_RELOC, HAS_SYMS
+start address 0x0000000000000000
+format: coff64
diff --git a/test/objdump/aarch64-windows/cases/01-coff-file-header.sh b/test/objdump/aarch64-windows/cases/01-coff-file-header.sh
@@ -0,0 +1,14 @@
+# Golden: -f file header summary for an aarch64 Windows .obj.
+# Mirrors the x86_64-windows case so the arch dispatch in -f and
+# the COFF code path both stay covered.
+
+cat > t.c <<'EOF'
+int main(void) { return 0; }
+EOF
+"$CFREE" cc -target aarch64-windows -c t.c -o t.obj
+"$CFREE" objdump -f t.obj | awk '
+/file format/ {print; next}
+/^architecture:/ {print; next}
+/^HAS_/ {print; next}
+/^start address/ {print; next}
+/^format: coff64/ {print "format: coff64"}'
diff --git a/test/objdump/x86_64-windows/cases/01-coff-characteristics.expected b/test/objdump/x86_64-windows/cases/01-coff-characteristics.expected
@@ -0,0 +1,4 @@
+t.obj: file format coff64-x86_64
+Idx Name Size Align Flags
+section: .text CONTENTS,ALLOC,LOAD,READONLY,CODE
+raw: 0x60300020
diff --git a/test/objdump/x86_64-windows/cases/01-coff-characteristics.sh b/test/objdump/x86_64-windows/cases/01-coff-characteristics.sh
@@ -0,0 +1,13 @@
+# Golden: COFF section characteristics decode for an x86_64 Windows
+# .obj. Asserts that the Characteristics field is surfaced under -h
+# and that the canonical IMAGE_SCN_* bits land in the tag column.
+
+cat > t.c <<'EOF'
+int main(void) { return 0; }
+EOF
+"$CFREE" cc -target x86_64-windows -c t.c -o t.obj
+"$CFREE" objdump -h t.obj | awk '
+/file format/ {print; next}
+/^Idx Name/ {print; next}
+/^ *[0-9]+ \.text/ {print "section:", $2, $5}
+/Characteristics:.*0x[0-9a-f]+/ {print "raw:", $NF; exit}'
diff --git a/test/objdump/x86_64-windows/cases/02-coff-file-header.expected b/test/objdump/x86_64-windows/cases/02-coff-file-header.expected
@@ -0,0 +1,5 @@
+t.obj: file format coff64-x86_64
+architecture: x86_64, flags 0x00000011:
+HAS_RELOC, HAS_SYMS
+start address 0x0000000000000000
+format: coff64
diff --git a/test/objdump/x86_64-windows/cases/02-coff-file-header.sh b/test/objdump/x86_64-windows/cases/02-coff-file-header.sh
@@ -0,0 +1,14 @@
+# Golden: -f file header summary for an x86_64 Windows .obj. Asserts
+# architecture/HAS_SYMS/HAS_RELOC reporting after wiring -f from
+# silent-noop to a real summary.
+
+cat > t.c <<'EOF'
+int main(void) { return 0; }
+EOF
+"$CFREE" cc -target x86_64-windows -c t.c -o t.obj
+"$CFREE" objdump -f t.obj | awk '
+/file format/ {print; next}
+/^architecture:/ {print; next}
+/^HAS_/ {print; next}
+/^start address/ {print; next}
+/^format: coff64/ {print "format: coff64"}'
diff --git a/test/objdump/x86_64-windows/cases/03-pe-private-headers.expected b/test/objdump/x86_64-windows/cases/03-pe-private-headers.expected
@@ -0,0 +1,8 @@
+found: PE32+ header line
+Magic: 0x20b
+subsystem: WINDOWS_CUI
+DllCharacteristics:
+Data Directories:
+ Idx Name RVA Size
+The Import Tables:
+import: KERNEL32.dll
diff --git a/test/objdump/x86_64-windows/cases/03-pe-private-headers.sh b/test/objdump/x86_64-windows/cases/03-pe-private-headers.sh
@@ -0,0 +1,38 @@
+# Golden: objdump -p on a linked PE32+ executable. Asserts the
+# private-header walker prints the DOS/PE signature, optional-header
+# highlights, and at least one DLL import section. Gated on llvm-mingw
+# UCRT availability — most CI hosts won't have it.
+
+find_sdk() {
+ local d
+ for d in \
+ /tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/x86_64-w64-mingw32 \
+ /tmp/llvm-mingw*/x86_64-w64-mingw32 \
+ /private/tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/x86_64-w64-mingw32 \
+ /private/tmp/llvm-mingw*/x86_64-w64-mingw32; do
+ if [ -d "$d/lib" ] && [ -r "$d/include/windows.h" ]; then
+ printf '%s\n' "$d"
+ return 0
+ fi
+ done
+ return 1
+}
+
+SDK=$(find_sdk) || {
+ echo "SKIP: no llvm-mingw UCRT sysroot"
+ exit 0
+}
+
+cat > t.c <<'EOF'
+int main(void) { return 0; }
+EOF
+"$CFREE" cc -target x86_64-windows --sysroot "$SDK" t.c -o t.exe
+"$CFREE" objdump -p t.exe | awk '
+/PE32\+ private headers/ {print "found: PE32+ header line"; next}
+/^ Magic:/ {print $1, $2; next}
+/^ Subsystem:.*WINDOWS_CUI/ {print "subsystem: WINDOWS_CUI"; next}
+/^ DllCharacteristics:/ {print $1; next}
+/^Data Directories:/ {print; next}
+/^ Idx Name/ {print; next}
+/^The Import Tables:/ {print; next}
+/^ DLL Name: KERNEL32\.dll/ {print "import: KERNEL32.dll"; next}'
diff --git a/test/objdump/x86_64-windows/cases/04-pe-sections.expected b/test/objdump/x86_64-windows/cases/04-pe-sections.expected
@@ -0,0 +1,4 @@
+found: PE sections
+found: PE header row
+section: .text
+section: .idata
diff --git a/test/objdump/x86_64-windows/cases/04-pe-sections.sh b/test/objdump/x86_64-windows/cases/04-pe-sections.sh
@@ -0,0 +1,34 @@
+# Golden: objdump -h on a linked PE32+ executable. Asserts the PE
+# section walker fires (since cfree_obj_open does not parse PE
+# images) and produces a section table with the canonical headers.
+# Gated on llvm-mingw UCRT availability.
+
+find_sdk() {
+ local d
+ for d in \
+ /tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/x86_64-w64-mingw32 \
+ /tmp/llvm-mingw*/x86_64-w64-mingw32 \
+ /private/tmp/llvm-mingw*/llvm-mingw-*-ucrt-*/x86_64-w64-mingw32 \
+ /private/tmp/llvm-mingw*/x86_64-w64-mingw32; do
+ if [ -d "$d/lib" ] && [ -r "$d/include/windows.h" ]; then
+ printf '%s\n' "$d"
+ return 0
+ fi
+ done
+ return 1
+}
+
+SDK=$(find_sdk) || {
+ echo "SKIP: no llvm-mingw UCRT sysroot"
+ exit 0
+}
+
+cat > t.c <<'EOF'
+int main(void) { return 0; }
+EOF
+"$CFREE" cc -target x86_64-windows --sysroot "$SDK" t.c -o t.exe
+"$CFREE" objdump -h t.exe | awk '
+/Sections \(PE image\)/ {print "found: PE sections"; next}
+/^Idx Name.*VMA.*Size/ {print "found: PE header row"; next}
+/^ *[0-9]+ \.text/ {print "section: .text"; next}
+/^ *[0-9]+ \.idata/ {print "section: .idata"; next}'
diff --git a/test/test.mk b/test/test.mk
@@ -27,9 +27,9 @@
# asm_parse / cfree_disasm_iter_* are still stubs; the harness builds
# and runs end-to-end so the wiring stays exercised. See doc/ASM.md.
-.PHONY: test test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-strip-driver test-objcopy-driver test-objdump-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-rv64-inline test-rv64-jit test-emu test-x64-inline test-x64-dbg test-rt-headers test-rt-runtime test-musl test-musl-rv64 test-glibc test-glibc-rv64 test-lib-deps test-smoke-x64 test-smoke-rv64 test-cbackend rv64-doctor
+.PHONY: test test-driver test-lex test-pp test-pp-err test-elf test-coff test-coff-mingw-import test-coff-windows-ucrt test-ar test-ar-driver test-strip-driver test-objcopy-driver test-objdump-driver test-link test-cg-api test-toy test-opt test-dwarf test-debug test-parse test-parse-err test-asm test-wasm-front test-isa test-aa64-inline test-rv64-inline test-rv64-jit test-emu test-x64-inline test-x64-dbg test-rt-headers test-rt-runtime test-musl test-musl-rv64 test-glibc test-glibc-rv64 test-lib-deps test-smoke-x64 test-smoke-rv64 test-cbackend rv64-doctor
-test: test-driver test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-strip-driver test-objcopy-driver test-objdump-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-rv64-inline test-rv64-jit test-emu test-x64-inline test-x64-dbg test-rt-headers test-lib-deps
+test: test-driver test-lex test-pp test-pp-err test-elf test-coff test-ar test-ar-driver test-strip-driver test-objcopy-driver test-objdump-driver test-link test-toy test-dwarf test-debug test-parse test-parse-err test-asm test-isa test-aa64-inline test-rv64-inline test-rv64-jit test-emu test-x64-inline test-x64-dbg test-rt-headers test-lib-deps
# `test-cbackend` is intentionally not in the default `test` target: the
# Phase 1 C backend skips most fixtures pending later phases, which would
# add noise to the default summary. Run it explicitly to gate progress.
@@ -260,8 +260,13 @@ test-rt-runtime: bin rt $(LINK_EXE_RUNNER)
# void* to a function pointer, which pedantic rejects under C11.
HARNESS_CFLAGS = -std=c11 -Wall -Wextra -Werror -isysroot $(SYSROOT) -Iinclude -Itest
-ROUNDTRIP_BIN = build/test/cfree-roundtrip
-ROUNDTRIP_BIN_MACHO = build/test/cfree-roundtrip-macho
+ROUNDTRIP_BIN = build/test/cfree-roundtrip
+ROUNDTRIP_BIN_MACHO = build/test/cfree-roundtrip-macho
+ROUNDTRIP_BIN_COFF = build/test/cfree-roundtrip-coff
+COFF_IMPORT_SMOKE_BIN = build/test/pe-import-smoke
+COFF_IMPORT_MINGW_BIN = build/test/pe-import-mingw
+COFF_DSO_FORWARDER_BIN = build/test/pe-dso-forwarder
+COFF_MIXED_ARCHIVE_BIN = build/test/pe-mixed-archive
LINK_EXE_RUNNER = build/test/link-exe-runner
JIT_RUNNER = build/test/jit-runner
PARSE_RUNNER = build/test/parse-runner
@@ -278,6 +283,45 @@ $(ROUNDTRIP_BIN_MACHO): test/macho/cfree-roundtrip-macho.c $(LIB_AR)
@mkdir -p $(dir $@)
$(CC) $(HARNESS_CFLAGS) -Isrc test/macho/cfree-roundtrip-macho.c $(LIB_AR) -o $@
+# PE/COFF round-trip harness (test/coff/). All-in-one binary: builds
+# hand-crafted ObjBuilders and asserts emit_coff/read_coff round-trip
+# stability for both x86_64-windows and aarch64-windows.
+$(ROUNDTRIP_BIN_COFF): test/coff/cfree-roundtrip-coff.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/cfree-roundtrip-coff.c $(LIB_AR) -o $@
+
+# PE import-directory smoke test (test/coff/pe-import-smoke.c).
+# Exercises the full chain: short-import shim bytes -> link_add_obj_bytes
+# (reclassified as DSO) -> link_resolve -> link_emit_coff. Verifies the
+# produced PE32+ via x86_64-w64-mingw32-objdump; skips cleanly if absent.
+$(COFF_IMPORT_SMOKE_BIN): test/coff/pe-import-smoke.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/pe-import-smoke.c $(LIB_AR) -o $@
+
+# PE import test against a real mingw archive (test/coff/pe-import-mingw.c).
+# Exercises the long-form import-archive absorption path
+# (link_add_archive_bytes -> classify_coff_archive_member). Skips cleanly
+# when the mingw toolchain isn't installed.
+$(COFF_IMPORT_MINGW_BIN): test/coff/pe-import-mingw.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/pe-import-mingw.c $(LIB_AR) -o $@
+
+# read_coff_dso forwarder-export contract (test/coff/pe-dso-forwarder.c).
+# Synthesizes a tiny PE32+ DLL with one direct and one forwarder export
+# and asserts both surface as OBJ_SEC_NONE globals on the ObjBuilder.
+$(COFF_DSO_FORWARDER_BIN): test/coff/pe-dso-forwarder.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/pe-dso-forwarder.c $(LIB_AR) -o $@
+
+# Mixed-member archive (test/coff/pe-mixed-archive.c). Verifies that
+# one archive containing both a short-import member and a long-form
+# COFF object with a defined data symbol satisfies references through
+# both shapes — the same composition libucrt.a uses (API-set imports
+# alongside lib64_libucrt_extra_a-*.o helpers).
+$(COFF_MIXED_ARCHIVE_BIN): test/coff/pe-mixed-archive.c $(LIB_AR)
+ @mkdir -p $(dir $@)
+ $(CC) $(HARNESS_CFLAGS) -Isrc test/coff/pe-mixed-archive.c $(LIB_AR) -o $@
+
$(LINK_EXE_RUNNER): test/link/harness/link_exe_runner.c $(LIB_AR)
@mkdir -p $(dir $@)
$(CC) $(HARNESS_CFLAGS) test/link/harness/link_exe_runner.c $(LIB_AR) -o $@
@@ -297,6 +341,25 @@ $(WASM_TOOL): test/wasm/harness/wasm_tool.c $(LIB_AR)
test-elf: lib bin-soft $(ROUNDTRIP_BIN)
bash test/elf/run.sh
+# PE/COFF round-trip harness plus optional hosted Windows smoke. The
+# UCRT smoke self-skips when llvm-mingw is not installed.
+test-coff: lib bin rt-aarch64-windows $(ROUNDTRIP_BIN_COFF) $(COFF_IMPORT_SMOKE_BIN) $(COFF_DSO_FORWARDER_BIN) $(COFF_MIXED_ARCHIVE_BIN)
+ $(ROUNDTRIP_BIN_COFF)
+ $(COFF_IMPORT_SMOKE_BIN)
+ $(COFF_DSO_FORWARDER_BIN)
+ $(COFF_MIXED_ARCHIVE_BIN)
+ bash test/coff/windows-ucrt-hosted-smoke.sh
+ bash test/coff/windows-system-dlls-smoke.sh
+
+# Separate target so it can be skipped gracefully if mingw isn't
+# installed. The test itself self-skips on missing tooling, but the
+# build target only fires when explicitly requested.
+test-coff-mingw-import: lib $(COFF_IMPORT_MINGW_BIN)
+ $(COFF_IMPORT_MINGW_BIN)
+
+test-coff-windows-ucrt: bin rt-aarch64-windows
+ bash test/coff/windows-ucrt-hosted-smoke.sh
+
test-link: lib $(ROUNDTRIP_BIN) $(ROUNDTRIP_BIN_MACHO) $(LINK_EXE_RUNNER) $(JIT_RUNNER)
bash test/link/run.sh